aboutsummaryrefslogtreecommitdiffhomepage
path: root/third_party/eigen3/Eigen/src/Core
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/eigen3/Eigen/src/Core')
-rw-r--r--third_party/eigen3/Eigen/src/Core/Array.h338
-rw-r--r--third_party/eigen3/Eigen/src/Core/ArrayBase.h238
-rw-r--r--third_party/eigen3/Eigen/src/Core/ArrayWrapper.h287
-rw-r--r--third_party/eigen3/Eigen/src/Core/Assign.h622
-rw-r--r--third_party/eigen3/Eigen/src/Core/AssignEvaluator.h842
-rw-r--r--third_party/eigen3/Eigen/src/Core/Assign_MKL.h225
-rw-r--r--third_party/eigen3/Eigen/src/Core/BandMatrix.h334
-rw-r--r--third_party/eigen3/Eigen/src/Core/Block.h432
-rw-r--r--third_party/eigen3/Eigen/src/Core/BooleanRedux.h154
-rw-r--r--third_party/eigen3/Eigen/src/Core/CommaInitializer.h161
-rw-r--r--third_party/eigen3/Eigen/src/Core/CoreEvaluators.h1121
-rw-r--r--third_party/eigen3/Eigen/src/Core/CoreIterators.h61
-rw-r--r--third_party/eigen3/Eigen/src/Core/CwiseBinaryOp.h238
-rw-r--r--third_party/eigen3/Eigen/src/Core/CwiseNullaryOp.h875
-rw-r--r--third_party/eigen3/Eigen/src/Core/CwiseUnaryOp.h135
-rw-r--r--third_party/eigen3/Eigen/src/Core/CwiseUnaryView.h139
-rw-r--r--third_party/eigen3/Eigen/src/Core/DenseBase.h561
-rw-r--r--third_party/eigen3/Eigen/src/Core/DenseCoeffsBase.h787
-rw-r--r--third_party/eigen3/Eigen/src/Core/DenseStorage.h480
-rw-r--r--third_party/eigen3/Eigen/src/Core/Diagonal.h258
-rw-r--r--third_party/eigen3/Eigen/src/Core/DiagonalMatrix.h346
-rw-r--r--third_party/eigen3/Eigen/src/Core/DiagonalProduct.h130
-rw-r--r--third_party/eigen3/Eigen/src/Core/Dot.h270
-rw-r--r--third_party/eigen3/Eigen/src/Core/EigenBase.h146
-rw-r--r--third_party/eigen3/Eigen/src/Core/Flagged.h140
-rw-r--r--third_party/eigen3/Eigen/src/Core/ForceAlignedAccess.h146
-rw-r--r--third_party/eigen3/Eigen/src/Core/Functors.h1020
-rw-r--r--third_party/eigen3/Eigen/src/Core/Fuzzy.h155
-rw-r--r--third_party/eigen3/Eigen/src/Core/GeneralProduct.h674
-rw-r--r--third_party/eigen3/Eigen/src/Core/GenericPacketMath.h584
-rw-r--r--third_party/eigen3/Eigen/src/Core/GlobalFunctions.h94
-rw-r--r--third_party/eigen3/Eigen/src/Core/IO.h257
-rw-r--r--third_party/eigen3/Eigen/src/Core/Map.h185
-rw-r--r--third_party/eigen3/Eigen/src/Core/MapBase.h257
-rw-r--r--third_party/eigen3/Eigen/src/Core/MathFunctions.h1089
-rw-r--r--third_party/eigen3/Eigen/src/Core/Matrix.h443
-rw-r--r--third_party/eigen3/Eigen/src/Core/MatrixBase.h614
-rw-r--r--third_party/eigen3/Eigen/src/Core/NestByValue.h112
-rw-r--r--third_party/eigen3/Eigen/src/Core/NoAlias.h141
-rw-r--r--third_party/eigen3/Eigen/src/Core/NumTraits.h177
-rw-r--r--third_party/eigen3/Eigen/src/Core/PermutationMatrix.h689
-rw-r--r--third_party/eigen3/Eigen/src/Core/PlainObjectBase.h895
-rw-r--r--third_party/eigen3/Eigen/src/Core/Product.h107
-rw-r--r--third_party/eigen3/Eigen/src/Core/ProductBase.h280
-rw-r--r--third_party/eigen3/Eigen/src/Core/ProductEvaluators.h411
-rw-r--r--third_party/eigen3/Eigen/src/Core/Random.h193
-rw-r--r--third_party/eigen3/Eigen/src/Core/Redux.h417
-rw-r--r--third_party/eigen3/Eigen/src/Core/Ref.h260
-rw-r--r--third_party/eigen3/Eigen/src/Core/Replicate.h177
-rw-r--r--third_party/eigen3/Eigen/src/Core/ReturnByValue.h89
-rw-r--r--third_party/eigen3/Eigen/src/Core/Reverse.h224
-rw-r--r--third_party/eigen3/Eigen/src/Core/Select.h162
-rw-r--r--third_party/eigen3/Eigen/src/Core/SelfAdjointView.h338
-rw-r--r--third_party/eigen3/Eigen/src/Core/SelfCwiseBinaryOp.h226
-rw-r--r--third_party/eigen3/Eigen/src/Core/SolveTriangular.h260
-rw-r--r--third_party/eigen3/Eigen/src/Core/StableNorm.h200
-rw-r--r--third_party/eigen3/Eigen/src/Core/Stride.h113
-rw-r--r--third_party/eigen3/Eigen/src/Core/Swap.h140
-rw-r--r--third_party/eigen3/Eigen/src/Core/Transpose.h428
-rw-r--r--third_party/eigen3/Eigen/src/Core/Transpositions.h436
-rw-r--r--third_party/eigen3/Eigen/src/Core/TriangularMatrix.h900
-rw-r--r--third_party/eigen3/Eigen/src/Core/VectorBlock.h97
-rw-r--r--third_party/eigen3/Eigen/src/Core/VectorwiseOp.h651
-rw-r--r--third_party/eigen3/Eigen/src/Core/Visitor.h237
-rw-r--r--third_party/eigen3/Eigen/src/Core/arch/AVX/Complex.h463
-rw-r--r--third_party/eigen3/Eigen/src/Core/arch/AVX/MathFunctions.h495
-rw-r--r--third_party/eigen3/Eigen/src/Core/arch/AVX/PacketMath.h650
-rw-r--r--third_party/eigen3/Eigen/src/Core/arch/AVX/TypeCasting.h51
-rw-r--r--third_party/eigen3/Eigen/src/Core/arch/AltiVec/Complex.h439
-rw-r--r--third_party/eigen3/Eigen/src/Core/arch/AltiVec/MathFunctions.h299
-rw-r--r--third_party/eigen3/Eigen/src/Core/arch/AltiVec/PacketMath.h943
-rw-r--r--third_party/eigen3/Eigen/src/Core/arch/CUDA/MathFunctions.h75
-rw-r--r--third_party/eigen3/Eigen/src/Core/arch/CUDA/PacketMath.h336
-rw-r--r--third_party/eigen3/Eigen/src/Core/arch/Default/Settings.h49
-rw-r--r--third_party/eigen3/Eigen/src/Core/arch/NEON/Complex.h467
-rw-r--r--third_party/eigen3/Eigen/src/Core/arch/NEON/MathFunctions.h91
-rw-r--r--third_party/eigen3/Eigen/src/Core/arch/NEON/PacketMath.h745
-rw-r--r--third_party/eigen3/Eigen/src/Core/arch/SSE/Complex.h486
-rw-r--r--third_party/eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h529
-rw-r--r--third_party/eigen3/Eigen/src/Core/arch/SSE/PacketMath.h883
-rw-r--r--third_party/eigen3/Eigen/src/Core/arch/SSE/TypeCasting.h77
-rw-r--r--third_party/eigen3/Eigen/src/Core/functors/AssignmentFunctors.h167
-rw-r--r--third_party/eigen3/Eigen/src/Core/functors/BinaryFunctors.h498
-rw-r--r--third_party/eigen3/Eigen/src/Core/functors/NullaryFunctors.h158
-rw-r--r--third_party/eigen3/Eigen/src/Core/functors/StlFunctors.h129
-rw-r--r--third_party/eigen3/Eigen/src/Core/functors/UnaryFunctors.h493
-rw-r--r--third_party/eigen3/Eigen/src/Core/products/CoeffBasedProduct.h454
-rw-r--r--third_party/eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h2197
-rw-r--r--third_party/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h465
-rw-r--r--third_party/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h285
-rw-r--r--third_party/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h146
-rw-r--r--third_party/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h118
-rw-r--r--third_party/eigen3/Eigen/src/Core/products/GeneralMatrixVector.h618
-rw-r--r--third_party/eigen3/Eigen/src/Core/products/GeneralMatrixVector_MKL.h131
-rw-r--r--third_party/eigen3/Eigen/src/Core/products/Parallelizer.h158
-rw-r--r--third_party/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h523
-rw-r--r--third_party/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h295
-rw-r--r--third_party/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h281
-rw-r--r--third_party/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h114
-rw-r--r--third_party/eigen3/Eigen/src/Core/products/SelfadjointProduct.h123
-rw-r--r--third_party/eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h93
-rw-r--r--third_party/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h434
-rw-r--r--third_party/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h309
-rw-r--r--third_party/eigen3/Eigen/src/Core/products/TriangularMatrixVector.h354
-rw-r--r--third_party/eigen3/Eigen/src/Core/products/TriangularMatrixVector_MKL.h247
-rw-r--r--third_party/eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h331
-rw-r--r--third_party/eigen3/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h155
-rw-r--r--third_party/eigen3/Eigen/src/Core/products/TriangularSolverVector.h145
-rw-r--r--third_party/eigen3/Eigen/src/Core/util/BlasUtil.h237
-rw-r--r--third_party/eigen3/Eigen/src/Core/util/Constants.h453
-rw-r--r--third_party/eigen3/Eigen/src/Core/util/DisableStupidWarnings.h40
-rw-r--r--third_party/eigen3/Eigen/src/Core/util/ForwardDeclarations.h301
-rw-r--r--third_party/eigen3/Eigen/src/Core/util/MKL_support.h126
-rw-r--r--third_party/eigen3/Eigen/src/Core/util/Macros.h740
-rw-r--r--third_party/eigen3/Eigen/src/Core/util/MatrixMapper.h155
-rw-r--r--third_party/eigen3/Eigen/src/Core/util/Memory.h984
-rw-r--r--third_party/eigen3/Eigen/src/Core/util/Meta.h334
-rw-r--r--third_party/eigen3/Eigen/src/Core/util/ReenableStupidWarnings.h14
-rw-r--r--third_party/eigen3/Eigen/src/Core/util/StaticAssert.h206
-rw-r--r--third_party/eigen3/Eigen/src/Core/util/XprHelper.h481
120 files changed, 43768 insertions, 0 deletions
diff --git a/third_party/eigen3/Eigen/src/Core/Array.h b/third_party/eigen3/Eigen/src/Core/Array.h
new file mode 100644
index 0000000000..28d6f14434
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/Array.h
@@ -0,0 +1,338 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ARRAY_H
+#define EIGEN_ARRAY_H
+
+namespace Eigen {
+
+/** \class Array
+ * \ingroup Core_Module
+ *
+ * \brief General-purpose arrays with easy API for coefficient-wise operations
+ *
+ * The %Array class is very similar to the Matrix class. It provides
+ * general-purpose one- and two-dimensional arrays. The difference between the
+ * %Array and the %Matrix class is primarily in the API: the API for the
+ * %Array class provides easy access to coefficient-wise operations, while the
+ * API for the %Matrix class provides easy access to linear-algebra
+ * operations.
+ *
+ * This class can be extended with the help of the plugin mechanism described on the page
+ * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN.
+ *
+ * \sa \ref TutorialArrayClass, \ref TopicClassHierarchy
+ */
+namespace internal {
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+{
+ typedef ArrayXpr XprKind;
+ typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;
+};
+}
+
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+class Array
+ : public PlainObjectBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+{
+ public:
+
+ typedef PlainObjectBase<Array> Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(Array)
+
+ enum { Options = _Options };
+ typedef typename Base::PlainObject PlainObject;
+
+ protected:
+ template <typename Derived, typename OtherDerived, bool IsVector>
+ friend struct internal::conservative_resize_like_impl;
+
+ using Base::m_storage;
+
+ public:
+
+ using Base::base;
+ using Base::coeff;
+ using Base::coeffRef;
+
+ /**
+ * The usage of
+ * using Base::operator=;
+ * fails on MSVC. Since the code below is working with GCC and MSVC, we skipped
+ * the usage of 'using'. This should be done only for operator=.
+ */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array& operator=(const EigenBase<OtherDerived> &other)
+ {
+ return Base::operator=(other);
+ }
+
+ /** Copies the value of the expression \a other into \c *this with automatic resizing.
+ *
+ * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized),
+ * it will be initialized.
+ *
+ * Note that copying a row-vector into a vector (and conversely) is allowed.
+ * The resizing, if any, is then done in the appropriate way so that row-vectors
+ * remain row-vectors and vectors remain vectors.
+ */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array& operator=(const ArrayBase<OtherDerived>& other)
+ {
+ return Base::_set(other);
+ }
+
+ /** This is a special case of the templated operator=. Its purpose is to
+ * prevent a default operator= from hiding the templated operator=.
+ */
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array& operator=(const Array& other)
+ {
+ return Base::_set(other);
+ }
+
+ /** Default constructor.
+ *
+ * For fixed-size matrices, does nothing.
+ *
+ * For dynamic-size matrices, creates an empty matrix of size 0. Does not allocate any array. Such a matrix
+ * is called a null matrix. This constructor is the unique way to create null matrices: resizing
+ * a matrix to 0 is not supported.
+ *
+ * \sa resize(Index,Index)
+ */
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array() : Base()
+ {
+ Base::_check_template_params();
+ EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+ }
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ // FIXME is it still needed ??
+ /** \internal */
+ EIGEN_DEVICE_FUNC
+ Array(internal::constructor_without_unaligned_array_assert)
+ : Base(internal::constructor_without_unaligned_array_assert())
+ {
+ Base::_check_template_params();
+ EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+ }
+#endif
+
+#ifdef EIGEN_HAVE_RVALUE_REFERENCES
+ Array(Array&& other)
+ : Base(std::move(other))
+ {
+ Base::_check_template_params();
+ if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
+ Base::_set_noalias(other);
+ }
+ Array& operator=(Array&& other)
+ {
+ other.swap(*this);
+ return *this;
+ }
+#endif
+
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE explicit Array(const T& x)
+ {
+ Base::_check_template_params();
+ Base::template _init1<T>(x);
+ }
+
+ template<typename T0, typename T1>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array(const T0& val0, const T1& val1)
+ {
+ Base::_check_template_params();
+ this->template _init2<T0,T1>(val0, val1);
+ }
+ #else
+ /** \brief Constructs a fixed-sized array initialized with coefficients starting at \a data */
+ EIGEN_DEVICE_FUNC explicit Array(const Scalar *data);
+ /** Constructs a vector or row-vector with given dimension. \only_for_vectors
+ *
+ * Note that this is only useful for dynamic-size vectors. For fixed-size vectors,
+ * it is redundant to pass the dimension here, so it makes more sense to use the default
+ * constructor Array() instead.
+ */
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE explicit Array(Index dim);
+ /** constructs an initialized 1x1 Array with the given coefficient */
+ Array(const Scalar& value);
+ /** constructs an uninitialized array with \a rows rows and \a cols columns.
+ *
+ * This is useful for dynamic-size arrays. For fixed-size arrays,
+ * it is redundant to pass these parameters, so one should use the default constructor
+ * Array() instead. */
+ Array(Index rows, Index cols);
+ /** constructs an initialized 2D vector with given coefficients */
+ Array(const Scalar& val0, const Scalar& val1);
+ #endif
+
+ /** constructs an initialized 3D vector with given coefficients */
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2)
+ {
+ Base::_check_template_params();
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 3)
+ m_storage.data()[0] = val0;
+ m_storage.data()[1] = val1;
+ m_storage.data()[2] = val2;
+ }
+ /** constructs an initialized 4D vector with given coefficients */
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2, const Scalar& val3)
+ {
+ Base::_check_template_params();
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 4)
+ m_storage.data()[0] = val0;
+ m_storage.data()[1] = val1;
+ m_storage.data()[2] = val2;
+ m_storage.data()[3] = val3;
+ }
+
+ /** Constructor copying the value of the expression \a other */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array(const ArrayBase<OtherDerived>& other)
+ : Base(other.rows() * other.cols(), other.rows(), other.cols())
+ {
+ Base::_check_template_params();
+ Base::_set_noalias(other);
+ }
+ /** Copy constructor */
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array(const Array& other)
+ : Base(other.rows() * other.cols(), other.rows(), other.cols())
+ {
+ Base::_check_template_params();
+ Base::_set_noalias(other);
+ }
+ /** Copy constructor with in-place evaluation */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array(const ReturnByValue<OtherDerived>& other)
+ {
+ Base::_check_template_params();
+ Base::resize(other.rows(), other.cols());
+ other.evalTo(*this);
+ }
+
+ /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other)
+ : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
+ {
+ Base::_check_template_params();
+ Base::_resize_to_match(other);
+ *this = other;
+ }
+
+ /** Override MatrixBase::swap() since for dynamic-sized matrices of same type it is enough to swap the
+ * data pointers.
+ */
+ template<typename OtherDerived>
+ void swap(ArrayBase<OtherDerived> const & other)
+ { this->_swap(other.derived()); }
+
+ EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
+ EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
+
+ #ifdef EIGEN_ARRAY_PLUGIN
+ #include EIGEN_ARRAY_PLUGIN
+ #endif
+
+ private:
+
+ template<typename MatrixType, typename OtherDerived, bool SwapPointers>
+ friend struct internal::matrix_swap_impl;
+};
+
+/** \defgroup arraytypedefs Global array typedefs
+ * \ingroup Core_Module
+ *
+ * Eigen defines several typedef shortcuts for most common 1D and 2D array types.
+ *
+ * The general patterns are the following:
+ *
+ * \c ArrayRowsColsType where \c Rows and \c Cols can be \c 2,\c 3,\c 4 for fixed size square matrices or \c X for dynamic size,
+ * and where \c Type can be \c i for integer, \c f for float, \c d for double, \c cf for complex float, \c cd
+ * for complex double.
+ *
+ * For example, \c Array33d is a fixed-size 3x3 array type of doubles, and \c ArrayXXf is a dynamic-size matrix of floats.
+ *
+ * There are also \c ArraySizeType which are self-explanatory. For example, \c Array4cf is
+ * a fixed-size 1D array of 4 complex floats.
+ *
+ * \sa class Array
+ */
+
+#define EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \
+/** \ingroup arraytypedefs */ \
+typedef Array<Type, Size, Size> Array##SizeSuffix##SizeSuffix##TypeSuffix; \
+/** \ingroup arraytypedefs */ \
+typedef Array<Type, Size, 1> Array##SizeSuffix##TypeSuffix;
+
+#define EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \
+/** \ingroup arraytypedefs */ \
+typedef Array<Type, Size, Dynamic> Array##Size##X##TypeSuffix; \
+/** \ingroup arraytypedefs */ \
+typedef Array<Type, Dynamic, Size> Array##X##Size##TypeSuffix;
+
+#define EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \
+EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 2, 2) \
+EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 3, 3) \
+EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 4, 4) \
+EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \
+EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \
+EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \
+EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 4)
+
+EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(int, i)
+EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(float, f)
+EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(double, d)
+EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex<float>, cf)
+EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)
+
+#undef EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES
+#undef EIGEN_MAKE_ARRAY_TYPEDEFS
+
+#undef EIGEN_MAKE_ARRAY_TYPEDEFS_LARGE
+
+#define EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
+using Eigen::Matrix##SizeSuffix##TypeSuffix; \
+using Eigen::Vector##SizeSuffix##TypeSuffix; \
+using Eigen::RowVector##SizeSuffix##TypeSuffix;
+
+#define EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(TypeSuffix) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \
+
+#define EIGEN_USING_ARRAY_TYPEDEFS \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(i) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(f) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(d) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(cf) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(cd)
+
+} // end namespace Eigen
+
+#endif // EIGEN_ARRAY_H
diff --git a/third_party/eigen3/Eigen/src/Core/ArrayBase.h b/third_party/eigen3/Eigen/src/Core/ArrayBase.h
new file mode 100644
index 0000000000..2c9ace4a77
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/ArrayBase.h
@@ -0,0 +1,238 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ARRAYBASE_H
+#define EIGEN_ARRAYBASE_H
+
+namespace Eigen {
+
+template<typename ExpressionType> class MatrixWrapper;
+
+/** \class ArrayBase
+ * \ingroup Core_Module
+ *
+ * \brief Base class for all 1D and 2D array, and related expressions
+ *
+ * An array is similar to a dense vector or matrix. While matrices are mathematical
+ * objects with well defined linear algebra operators, an array is just a collection
+ * of scalar values arranged in a one or two dimensionnal fashion. As the main consequence,
+ * all operations applied to an array are performed coefficient wise. Furthermore,
+ * arrays support scalar math functions of the c++ standard library (e.g., std::sin(x)), and convenient
+ * constructors allowing to easily write generic code working for both scalar values
+ * and arrays.
+ *
+ * This class is the base that is inherited by all array expression types.
+ *
+ * \tparam Derived is the derived type, e.g., an array or an expression type.
+ *
+ * This class can be extended with the help of the plugin mechanism described on the page
+ * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAYBASE_PLUGIN.
+ *
+ * \sa class MatrixBase, \ref TopicClassHierarchy
+ */
+template<typename Derived> class ArrayBase
+ : public DenseBase<Derived>
+{
+ public:
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ /** The base class for a given storage type. */
+ typedef ArrayBase StorageBaseType;
+
+ typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl;
+
+ using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
+ typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*;
+
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ typedef typename internal::traits<Derived>::Index Index;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+
+ typedef DenseBase<Derived> Base;
+ using Base::RowsAtCompileTime;
+ using Base::ColsAtCompileTime;
+ using Base::SizeAtCompileTime;
+ using Base::MaxRowsAtCompileTime;
+ using Base::MaxColsAtCompileTime;
+ using Base::MaxSizeAtCompileTime;
+ using Base::IsVectorAtCompileTime;
+ using Base::Flags;
+ using Base::CoeffReadCost;
+
+ using Base::derived;
+ using Base::const_cast_derived;
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::coeff;
+ using Base::coeffRef;
+ using Base::lazyAssign;
+ using Base::operator=;
+ using Base::operator+=;
+ using Base::operator-=;
+ using Base::operator*=;
+ using Base::operator/=;
+
+ typedef typename Base::CoeffReturnType CoeffReturnType;
+
+#endif // not EIGEN_PARSED_BY_DOXYGEN
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ /** \internal the plain matrix type corresponding to this expression. Note that is not necessarily
+ * exactly the return type of eval(): in the case of plain matrices, the return type of eval() is a const
+ * reference to a matrix, not a matrix! It is however guaranteed that the return type of eval() is either
+ * PlainObject or const PlainObject&.
+ */
+ typedef Array<typename internal::traits<Derived>::Scalar,
+ internal::traits<Derived>::RowsAtCompileTime,
+ internal::traits<Derived>::ColsAtCompileTime,
+ AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
+ internal::traits<Derived>::MaxRowsAtCompileTime,
+ internal::traits<Derived>::MaxColsAtCompileTime
+ > PlainObject;
+
+
+ /** \internal Represents a matrix with all coefficients equal to one another*/
+ typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
+#endif // not EIGEN_PARSED_BY_DOXYGEN
+
+#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase
+# include "../plugins/CommonCwiseUnaryOps.h"
+# include "../plugins/MatrixCwiseUnaryOps.h"
+# include "../plugins/ArrayCwiseUnaryOps.h"
+# include "../plugins/CommonCwiseBinaryOps.h"
+# include "../plugins/MatrixCwiseBinaryOps.h"
+# include "../plugins/ArrayCwiseBinaryOps.h"
+# ifdef EIGEN_ARRAYBASE_PLUGIN
+# include EIGEN_ARRAYBASE_PLUGIN
+# endif
+#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
+
+ /** Special case of the template operator=, in order to prevent the compiler
+ * from generating a default operator= (issue hit with g++ 4.1)
+ */
+ EIGEN_DEVICE_FUNC
+ Derived& operator=(const ArrayBase& other)
+ {
+ return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
+ }
+
+ EIGEN_DEVICE_FUNC
+ Derived& operator+=(const Scalar& scalar);
+ EIGEN_DEVICE_FUNC
+ Derived& operator-=(const Scalar& scalar);
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator+=(const ArrayBase<OtherDerived>& other);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator-=(const ArrayBase<OtherDerived>& other);
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator*=(const ArrayBase<OtherDerived>& other);
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator/=(const ArrayBase<OtherDerived>& other);
+
+ public:
+ EIGEN_DEVICE_FUNC
+ ArrayBase<Derived>& array() { return *this; }
+ EIGEN_DEVICE_FUNC
+ const ArrayBase<Derived>& array() const { return *this; }
+
+ /** \returns an \link Eigen::MatrixBase Matrix \endlink expression of this array
+ * \sa MatrixBase::array() */
+ EIGEN_DEVICE_FUNC
+ MatrixWrapper<Derived> matrix() { return derived(); }
+ EIGEN_DEVICE_FUNC
+ const MatrixWrapper<const Derived> matrix() const { return derived(); }
+
+// template<typename Dest>
+// inline void evalTo(Dest& dst) const { dst = matrix(); }
+
+ protected:
+ EIGEN_DEVICE_FUNC
+ ArrayBase() : Base() {}
+
+ private:
+ explicit ArrayBase(Index);
+ ArrayBase(Index,Index);
+ template<typename OtherDerived> explicit ArrayBase(const ArrayBase<OtherDerived>&);
+ protected:
+ // mixing arrays and matrices is not legal
+ template<typename OtherDerived> Derived& operator+=(const MatrixBase<OtherDerived>& )
+ {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
+ // mixing arrays and matrices is not legal
+ template<typename OtherDerived> Derived& operator-=(const MatrixBase<OtherDerived>& )
+ {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
+};
+
+/** replaces \c *this by \c *this - \a other.
+ *
+ * \returns a reference to \c *this
+ */
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_STRONG_INLINE Derived &
+ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
+{
+ SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, Derived, OtherDerived> tmp(derived());
+ tmp = other.derived();
+ return derived();
+}
+
+/** replaces \c *this by \c *this + \a other.
+ *
+ * \returns a reference to \c *this
+ */
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_STRONG_INLINE Derived &
+ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
+{
+ SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, Derived, OtherDerived> tmp(derived());
+ tmp = other.derived();
+ return derived();
+}
+
+/** replaces \c *this by \c *this * \a other coefficient wise.
+ *
+ * \returns a reference to \c *this
+ */
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_STRONG_INLINE Derived &
+ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
+{
+ SelfCwiseBinaryOp<internal::scalar_product_op<Scalar>, Derived, OtherDerived> tmp(derived());
+ tmp = other.derived();
+ return derived();
+}
+
+/** replaces \c *this by \c *this / \a other coefficient wise.
+ *
+ * \returns a reference to \c *this
+ */
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_STRONG_INLINE Derived &
+ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other)
+{
+ SelfCwiseBinaryOp<internal::scalar_quotient_op<Scalar>, Derived, OtherDerived> tmp(derived());
+ tmp = other.derived();
+ return derived();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_ARRAYBASE_H
diff --git a/third_party/eigen3/Eigen/src/Core/ArrayWrapper.h b/third_party/eigen3/Eigen/src/Core/ArrayWrapper.h
new file mode 100644
index 0000000000..4bb6480243
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/ArrayWrapper.h
@@ -0,0 +1,287 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ARRAYWRAPPER_H
+#define EIGEN_ARRAYWRAPPER_H
+
+namespace Eigen {
+
+/** \class ArrayWrapper
+ * \ingroup Core_Module
+ *
+ * \brief Expression of a mathematical vector or matrix as an array object
+ *
+ * This class is the return type of MatrixBase::array(), and most of the time
+ * this is the only way it is use.
+ *
+ * \sa MatrixBase::array(), class MatrixWrapper
+ */
+
+namespace internal {
+template<typename ExpressionType>
+struct traits<ArrayWrapper<ExpressionType> >
+ : public traits<typename remove_all<typename ExpressionType::Nested>::type >
+{
+ typedef ArrayXpr XprKind;
+};
+}
+
+template<typename ExpressionType>
+class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
+{
+ public:
+ typedef ArrayBase<ArrayWrapper> Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper)
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper)
+
+ typedef typename internal::conditional<
+ internal::is_lvalue<ExpressionType>::value,
+ Scalar,
+ const Scalar
+ >::type ScalarWithConstIfNotLvalue;
+
+ typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
+
+ EIGEN_DEVICE_FUNC
+ inline Index rows() const { return m_expression.rows(); }
+ EIGEN_DEVICE_FUNC
+ inline Index cols() const { return m_expression.cols(); }
+ EIGEN_DEVICE_FUNC
+ inline Index outerStride() const { return m_expression.outerStride(); }
+ EIGEN_DEVICE_FUNC
+ inline Index innerStride() const { return m_expression.innerStride(); }
+
+ EIGEN_DEVICE_FUNC
+ inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar* data() const { return m_expression.data(); }
+
+ EIGEN_DEVICE_FUNC
+ inline CoeffReturnType coeff(Index rowId, Index colId) const
+ {
+ return m_expression.coeff(rowId, colId);
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline Scalar& coeffRef(Index rowId, Index colId)
+ {
+ return m_expression.const_cast_derived().coeffRef(rowId, colId);
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index rowId, Index colId) const
+ {
+ return m_expression.const_cast_derived().coeffRef(rowId, colId);
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline CoeffReturnType coeff(Index index) const
+ {
+ return m_expression.coeff(index);
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline Scalar& coeffRef(Index index)
+ {
+ return m_expression.const_cast_derived().coeffRef(index);
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index index) const
+ {
+ return m_expression.const_cast_derived().coeffRef(index);
+ }
+
+ template<int LoadMode>
+ inline const PacketScalar packet(Index rowId, Index colId) const
+ {
+ return m_expression.template packet<LoadMode>(rowId, colId);
+ }
+
+ template<int LoadMode>
+ inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
+ {
+ m_expression.const_cast_derived().template writePacket<LoadMode>(rowId, colId, val);
+ }
+
+ template<int LoadMode>
+ inline const PacketScalar packet(Index index) const
+ {
+ return m_expression.template packet<LoadMode>(index);
+ }
+
+ template<int LoadMode>
+ inline void writePacket(Index index, const PacketScalar& val)
+ {
+ m_expression.const_cast_derived().template writePacket<LoadMode>(index, val);
+ }
+
+ template<typename Dest>
+ EIGEN_DEVICE_FUNC
+ inline void evalTo(Dest& dst) const { dst = m_expression; }
+
+ const typename internal::remove_all<NestedExpressionType>::type&
+ EIGEN_DEVICE_FUNC
+ nestedExpression() const
+ {
+ return m_expression;
+ }
+
+ /** Forwards the resizing request to the nested expression
+ * \sa DenseBase::resize(Index) */
+ EIGEN_DEVICE_FUNC
+ void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); }
+ /** Forwards the resizing request to the nested expression
+ * \sa DenseBase::resize(Index,Index)*/
+ EIGEN_DEVICE_FUNC
+ void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); }
+
+ protected:
+ NestedExpressionType m_expression;
+};
+
+/** \class MatrixWrapper
+ * \ingroup Core_Module
+ *
+ * \brief Expression of an array as a mathematical vector or matrix
+ *
+ * This class is the return type of ArrayBase::matrix(), and most of the time
+ * this is the only way it is use.
+ *
+ * \sa MatrixBase::matrix(), class ArrayWrapper
+ */
+
+namespace internal {
+template<typename ExpressionType>
+struct traits<MatrixWrapper<ExpressionType> >
+ : public traits<typename remove_all<typename ExpressionType::Nested>::type >
+{
+ typedef MatrixXpr XprKind;
+};
+}
+
+template<typename ExpressionType>
+class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
+{
+ public:
+ typedef MatrixBase<MatrixWrapper<ExpressionType> > Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper)
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper)
+
+ typedef typename internal::conditional<
+ internal::is_lvalue<ExpressionType>::value,
+ Scalar,
+ const Scalar
+ >::type ScalarWithConstIfNotLvalue;
+
+ typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
+
+ EIGEN_DEVICE_FUNC
+ inline MatrixWrapper(ExpressionType& a_matrix) : m_expression(a_matrix) {}
+
+ EIGEN_DEVICE_FUNC
+ inline Index rows() const { return m_expression.rows(); }
+ EIGEN_DEVICE_FUNC
+ inline Index cols() const { return m_expression.cols(); }
+ EIGEN_DEVICE_FUNC
+ inline Index outerStride() const { return m_expression.outerStride(); }
+ EIGEN_DEVICE_FUNC
+ inline Index innerStride() const { return m_expression.innerStride(); }
+
+ EIGEN_DEVICE_FUNC
+ inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar* data() const { return m_expression.data(); }
+
+ EIGEN_DEVICE_FUNC
+ inline CoeffReturnType coeff(Index rowId, Index colId) const
+ {
+ return m_expression.coeff(rowId, colId);
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline Scalar& coeffRef(Index rowId, Index colId)
+ {
+ return m_expression.const_cast_derived().coeffRef(rowId, colId);
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index rowId, Index colId) const
+ {
+ return m_expression.derived().coeffRef(rowId, colId);
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline CoeffReturnType coeff(Index index) const
+ {
+ return m_expression.coeff(index);
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline Scalar& coeffRef(Index index)
+ {
+ return m_expression.const_cast_derived().coeffRef(index);
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index index) const
+ {
+ return m_expression.const_cast_derived().coeffRef(index);
+ }
+
+ template<int LoadMode>
+ inline const PacketScalar packet(Index rowId, Index colId) const
+ {
+ return m_expression.template packet<LoadMode>(rowId, colId);
+ }
+
+ template<int LoadMode>
+ inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
+ {
+ m_expression.const_cast_derived().template writePacket<LoadMode>(rowId, colId, val);
+ }
+
+ template<int LoadMode>
+ inline const PacketScalar packet(Index index) const
+ {
+ return m_expression.template packet<LoadMode>(index);
+ }
+
+ template<int LoadMode>
+ inline void writePacket(Index index, const PacketScalar& val)
+ {
+ m_expression.const_cast_derived().template writePacket<LoadMode>(index, val);
+ }
+
+ EIGEN_DEVICE_FUNC
+ const typename internal::remove_all<NestedExpressionType>::type&
+ nestedExpression() const
+ {
+ return m_expression;
+ }
+
+ /** Forwards the resizing request to the nested expression
+ * \sa DenseBase::resize(Index) */
+ EIGEN_DEVICE_FUNC
+ void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); }
+ /** Forwards the resizing request to the nested expression
+ * \sa DenseBase::resize(Index,Index)*/
+ EIGEN_DEVICE_FUNC
+ void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); }
+
+ protected:
+ NestedExpressionType m_expression;
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_ARRAYWRAPPER_H
diff --git a/third_party/eigen3/Eigen/src/Core/Assign.h b/third_party/eigen3/Eigen/src/Core/Assign.h
new file mode 100644
index 0000000000..07da2fe31d
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/Assign.h
@@ -0,0 +1,622 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2007 Michael Olbrich <michael.olbrich@gmx.net>
+// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ASSIGN_H
+#define EIGEN_ASSIGN_H
+
+namespace Eigen {
+
+namespace internal {
+
+/***************************************************************************
+* Part 1 : the logic deciding a strategy for traversal and unrolling *
+***************************************************************************/
+
+template <typename Derived, typename OtherDerived>
+struct assign_traits
+{
+public:
+ enum {
+ DstIsAligned = Derived::Flags & AlignedBit,
+ DstHasDirectAccess = Derived::Flags & DirectAccessBit,
+ SrcIsAligned = OtherDerived::Flags & AlignedBit,
+ JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned
+ };
+
+private:
+ enum {
+ InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime)
+ : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime)
+ : int(Derived::RowsAtCompileTime),
+ InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime)
+ : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime)
+ : int(Derived::MaxRowsAtCompileTime),
+ MaxSizeAtCompileTime = Derived::SizeAtCompileTime,
+ PacketSize = packet_traits<typename Derived::Scalar>::size
+ };
+
+ enum {
+ StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)),
+ MightVectorize = StorageOrdersAgree
+ && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
+ MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
+ && int(DstIsAligned) && int(SrcIsAligned),
+ MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
+ MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
+ && (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
+ /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
+ so it's only good for large enough sizes. */
+ MaySliceVectorize = MightVectorize && DstHasDirectAccess
+ && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize)
+ /* slice vectorization can be slow, so we only want it if the slices are big, which is
+ indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
+ in a fixed-size matrix */
+ };
+
+public:
+ enum {
+ Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
+ : int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
+ : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
+ : int(MayLinearize) ? int(LinearTraversal)
+ : int(DefaultTraversal),
+ Vectorized = int(Traversal) == InnerVectorizedTraversal
+ || int(Traversal) == LinearVectorizedTraversal
+ || int(Traversal) == SliceVectorizedTraversal
+ };
+
+private:
+ enum {
+ UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1),
+ MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic
+ && int(OtherDerived::CoeffReadCost) != Dynamic
+ && int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit),
+ MayUnrollInner = int(InnerSize) != Dynamic
+ && int(OtherDerived::CoeffReadCost) != Dynamic
+ && int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit)
+ };
+
+public:
+ enum {
+ Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
+ ? (
+ int(MayUnrollCompletely) ? int(CompleteUnrolling)
+ : int(MayUnrollInner) ? int(InnerUnrolling)
+ : int(NoUnrolling)
+ )
+ : int(Traversal) == int(LinearVectorizedTraversal)
+ ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) )
+ : int(Traversal) == int(LinearTraversal)
+ ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) )
+ : int(NoUnrolling)
+ };
+
+#ifdef EIGEN_DEBUG_ASSIGN
+ static void debug()
+ {
+ EIGEN_DEBUG_VAR(DstIsAligned)
+ EIGEN_DEBUG_VAR(SrcIsAligned)
+ EIGEN_DEBUG_VAR(JointAlignment)
+ EIGEN_DEBUG_VAR(Derived::SizeAtCompileTime)
+ EIGEN_DEBUG_VAR(OtherDerived::CoeffReadCost)
+ EIGEN_DEBUG_VAR(InnerSize)
+ EIGEN_DEBUG_VAR(InnerMaxSize)
+ EIGEN_DEBUG_VAR(PacketSize)
+ EIGEN_DEBUG_VAR(StorageOrdersAgree)
+ EIGEN_DEBUG_VAR(MightVectorize)
+ EIGEN_DEBUG_VAR(MayLinearize)
+ EIGEN_DEBUG_VAR(MayInnerVectorize)
+ EIGEN_DEBUG_VAR(MayLinearVectorize)
+ EIGEN_DEBUG_VAR(MaySliceVectorize)
+ EIGEN_DEBUG_VAR(Traversal)
+ EIGEN_DEBUG_VAR(UnrollingLimit)
+ EIGEN_DEBUG_VAR(MayUnrollCompletely)
+ EIGEN_DEBUG_VAR(MayUnrollInner)
+ EIGEN_DEBUG_VAR(Unrolling)
+ }
+#endif
+};
+
+/***************************************************************************
+* Part 2 : meta-unrollers
+***************************************************************************/
+
+/************************
+*** Default traversal ***
+************************/
+
+template<typename Derived1, typename Derived2, int Index, int Stop>
+struct assign_DefaultTraversal_CompleteUnrolling
+{
+ enum {
+ outer = Index / Derived1::InnerSizeAtCompileTime,
+ inner = Index % Derived1::InnerSizeAtCompileTime
+ };
+
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+ {
+ dst.copyCoeffByOuterInner(outer, inner, src);
+ assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
+ }
+};
+
+template<typename Derived1, typename Derived2, int Stop>
+struct assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
+{
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
+};
+
+template<typename Derived1, typename Derived2, int Index, int Stop>
+struct assign_DefaultTraversal_InnerUnrolling
+{
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer)
+ {
+ dst.copyCoeffByOuterInner(outer, Index, src);
+ assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, outer);
+ }
+};
+
+template<typename Derived1, typename Derived2, int Stop>
+struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
+{
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {}
+};
+
+/***********************
+*** Linear traversal ***
+***********************/
+
+template<typename Derived1, typename Derived2, int Index, int Stop>
+struct assign_LinearTraversal_CompleteUnrolling
+{
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+ {
+ dst.copyCoeff(Index, src);
+ assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
+ }
+};
+
+template<typename Derived1, typename Derived2, int Stop>
+struct assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
+{
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
+};
+
+/**************************
+*** Inner vectorization ***
+**************************/
+
+template<typename Derived1, typename Derived2, int Index, int Stop>
+struct assign_innervec_CompleteUnrolling
+{
+ enum {
+ outer = Index / Derived1::InnerSizeAtCompileTime,
+ inner = Index % Derived1::InnerSizeAtCompileTime,
+ JointAlignment = assign_traits<Derived1,Derived2>::JointAlignment
+ };
+
+ static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+ {
+ dst.template copyPacketByOuterInner<Derived2, Aligned, JointAlignment>(outer, inner, src);
+ assign_innervec_CompleteUnrolling<Derived1, Derived2,
+ Index+packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src);
+ }
+};
+
+template<typename Derived1, typename Derived2, int Stop>
+struct assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
+{
+ static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
+};
+
+template<typename Derived1, typename Derived2, int Index, int Stop>
+struct assign_innervec_InnerUnrolling
+{
+ static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer)
+ {
+ dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, Index, src);
+ assign_innervec_InnerUnrolling<Derived1, Derived2,
+ Index+packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src, outer);
+ }
+};
+
+template<typename Derived1, typename Derived2, int Stop>
+struct assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
+{
+ static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {}
+};
+
+/***************************************************************************
+* Part 3 : implementation of all cases
+***************************************************************************/
+
+template<typename Derived1, typename Derived2,
+ int Traversal = assign_traits<Derived1, Derived2>::Traversal,
+ int Unrolling = assign_traits<Derived1, Derived2>::Unrolling,
+ int Version = Specialized>
+struct assign_impl;
+
+/************************
+*** Default traversal ***
+************************/
+
+template<typename Derived1, typename Derived2, int Unrolling, int Version>
+struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling, Version>
+{
+ EIGEN_DEVICE_FUNC
+ static inline void run(Derived1 &, const Derived2 &) { }
+};
+
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling, Version>
+{
+ typedef typename Derived1::Index Index;
+ EIGEN_DEVICE_FUNC
+ static inline void run(Derived1 &dst, const Derived2 &src)
+ {
+ const Index innerSize = dst.innerSize();
+ const Index outerSize = dst.outerSize();
+ for(Index outer = 0; outer < outerSize; ++outer)
+ for(Index inner = 0; inner < innerSize; ++inner)
+ dst.copyCoeffByOuterInner(outer, inner, src);
+ }
+};
+
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling, Version>
+{
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+ {
+ assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
+ ::run(dst, src);
+ }
+};
+
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling, Version>
+{
+ typedef typename Derived1::Index Index;
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+ {
+ const Index outerSize = dst.outerSize();
+ for(Index outer = 0; outer < outerSize; ++outer)
+ assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime>
+ ::run(dst, src, outer);
+ }
+};
+
+/***********************
+*** Linear traversal ***
+***********************/
+
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling, Version>
+{
+ typedef typename Derived1::Index Index;
+ EIGEN_DEVICE_FUNC
+ static inline void run(Derived1 &dst, const Derived2 &src)
+ {
+ const Index size = dst.size();
+ for(Index i = 0; i < size; ++i)
+ dst.copyCoeff(i, src);
+ }
+};
+
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling, Version>
+{
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+ {
+ assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
+ ::run(dst, src);
+ }
+};
+
+/**************************
+*** Inner vectorization ***
+**************************/
+
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling, Version>
+{
+ typedef typename Derived1::Index Index;
+ static inline void run(Derived1 &dst, const Derived2 &src)
+ {
+ const Index innerSize = dst.innerSize();
+ const Index outerSize = dst.outerSize();
+ const Index packetSize = packet_traits<typename Derived1::Scalar>::size;
+ for(Index outer = 0; outer < outerSize; ++outer)
+ for(Index inner = 0; inner < innerSize; inner+=packetSize)
+ dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, inner, src);
+ }
+};
+
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling, Version>
+{
+ static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+ {
+ assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
+ ::run(dst, src);
+ }
+};
+
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling, Version>
+{
+ typedef typename Derived1::Index Index;
+ static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+ {
+ const Index outerSize = dst.outerSize();
+ for(Index outer = 0; outer < outerSize; ++outer)
+ assign_innervec_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime>
+ ::run(dst, src, outer);
+ }
+};
+
+/***************************
+*** Linear vectorization ***
+***************************/
+
+template <bool IsAligned = false>
+struct unaligned_assign_impl
+{
+ template <typename Derived, typename OtherDerived>
+ static EIGEN_STRONG_INLINE void run(const Derived&, OtherDerived&, typename Derived::Index, typename Derived::Index) {}
+};
+
+template <>
+struct unaligned_assign_impl<false>
+{
+ // MSVC must not inline this functions. If it does, it fails to optimize the
+ // packet access path.
+#ifdef _MSC_VER
+ template <typename Derived, typename OtherDerived>
+ static EIGEN_DONT_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end)
+#else
+ template <typename Derived, typename OtherDerived>
+ static EIGEN_STRONG_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end)
+#endif
+ {
+ for (typename Derived::Index index = start; index < end; ++index)
+ dst.copyCoeff(index, src);
+ }
+};
+
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling, Version>
+{
+ typedef typename Derived1::Index Index;
+ static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+ {
+ const Index size = dst.size();
+ typedef packet_traits<typename Derived1::Scalar> PacketTraits;
+ enum {
+ packetSize = PacketTraits::size,
+ dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : int(assign_traits<Derived1,Derived2>::DstIsAligned) ,
+ srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment
+ };
+ const Index alignedStart = assign_traits<Derived1,Derived2>::DstIsAligned ? 0
+ : internal::first_aligned(&dst.coeffRef(0), size);
+ const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
+
+ unaligned_assign_impl<assign_traits<Derived1,Derived2>::DstIsAligned!=0>::run(src,dst,0,alignedStart);
+
+ for(Index index = alignedStart; index < alignedEnd; index += packetSize)
+ {
+ dst.template copyPacket<Derived2, dstAlignment, srcAlignment>(index, src);
+ }
+
+ unaligned_assign_impl<>::run(src,dst,alignedEnd,size);
+ }
+};
+
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling, Version>
+{
+ typedef typename Derived1::Index Index;
+ static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+ {
+ enum { size = Derived1::SizeAtCompileTime,
+ packetSize = packet_traits<typename Derived1::Scalar>::size,
+ alignedSize = (size/packetSize)*packetSize };
+
+ assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, alignedSize>::run(dst, src);
+ assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, alignedSize, size>::run(dst, src);
+ }
+};
+
+/**************************
+*** Slice vectorization ***
+***************************/
+
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling, Version>
+{
+ typedef typename Derived1::Index Index;
+ static inline void run(Derived1 &dst, const Derived2 &src)
+ {
+ typedef packet_traits<typename Derived1::Scalar> PacketTraits;
+ enum {
+ packetSize = PacketTraits::size,
+ alignable = PacketTraits::AlignedOnScalar,
+ dstAlignment = alignable ? Aligned : int(assign_traits<Derived1,Derived2>::DstIsAligned) ,
+ srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment
+ };
+ const Index packetAlignedMask = packetSize - 1;
+ const Index innerSize = dst.innerSize();
+ const Index outerSize = dst.outerSize();
+ const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
+ Index alignedStart = ((!alignable) || assign_traits<Derived1,Derived2>::DstIsAligned) ? 0
+ : internal::first_aligned(&dst.coeffRef(0,0), innerSize);
+
+ for(Index outer = 0; outer < outerSize; ++outer)
+ {
+ const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
+ // do the non-vectorizable part of the assignment
+ for(Index inner = 0; inner<alignedStart ; ++inner)
+ dst.copyCoeffByOuterInner(outer, inner, src);
+
+ // do the vectorizable part of the assignment
+ for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
+ dst.template copyPacketByOuterInner<Derived2, dstAlignment, Unaligned>(outer, inner, src);
+
+ // do the non-vectorizable part of the assignment
+ for(Index inner = alignedEnd; inner<innerSize ; ++inner)
+ dst.copyCoeffByOuterInner(outer, inner, src);
+
+ alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
+ }
+ }
+};
+
+} // end namespace internal
+
+/***************************************************************************
+* Part 4 : implementation of DenseBase methods
+***************************************************************************/
+
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
+ ::lazyAssign(const DenseBase<OtherDerived>& other)
+{
+ enum{
+ SameType = internal::is_same<typename Derived::Scalar,typename OtherDerived::Scalar>::value
+ };
+
+ EIGEN_STATIC_ASSERT_LVALUE(Derived)
+ EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
+ EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+
+#ifdef EIGEN_TEST_EVALUATORS
+
+#ifdef EIGEN_DEBUG_ASSIGN
+ internal::copy_using_evaluator_traits<Derived, OtherDerived>::debug();
+#endif
+ eigen_assert(rows() == other.rows() && cols() == other.cols());
+ internal::call_dense_assignment_loop(derived(),other.derived());
+
+#else // EIGEN_TEST_EVALUATORS
+
+#ifdef EIGEN_DEBUG_ASSIGN
+ internal::assign_traits<Derived, OtherDerived>::debug();
+#endif
+ eigen_assert(rows() == other.rows() && cols() == other.cols());
+ internal::assign_impl<Derived, OtherDerived, int(SameType) ? int(internal::assign_traits<Derived, OtherDerived>::Traversal)
+ : int(InvalidTraversal)>::run(derived(),other.derived());
+
+#endif // EIGEN_TEST_EVALUATORS
+
+#ifndef EIGEN_NO_DEBUG
+ checkTransposeAliasing(other.derived());
+#endif
+ return derived();
+}
+
+namespace internal {
+
+template<typename Derived, typename OtherDerived,
+ bool EvalBeforeAssigning = (int(internal::traits<OtherDerived>::Flags) & EvalBeforeAssigningBit) != 0,
+ bool NeedToTranspose = ((int(Derived::RowsAtCompileTime) == 1 && int(OtherDerived::ColsAtCompileTime) == 1)
+ | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
+ // revert to || as soon as not needed anymore.
+ (int(Derived::ColsAtCompileTime) == 1 && int(OtherDerived::RowsAtCompileTime) == 1))
+ && int(Derived::SizeAtCompileTime) != 1>
+struct assign_selector;
+
+template<typename Derived, typename OtherDerived>
+struct assign_selector<Derived,OtherDerived,false,false> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
+ template<typename ActualDerived, typename ActualOtherDerived>
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { other.evalTo(dst); return dst; }
+};
+template<typename Derived, typename OtherDerived>
+struct assign_selector<Derived,OtherDerived,true,false> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
+};
+template<typename Derived, typename OtherDerived>
+struct assign_selector<Derived,OtherDerived,false,true> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
+ template<typename ActualDerived, typename ActualOtherDerived>
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { Transpose<ActualDerived> dstTrans(dst); other.evalTo(dstTrans); return dst; }
+};
+template<typename Derived, typename OtherDerived>
+struct assign_selector<Derived,OtherDerived,true,true> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
+};
+
+} // end namespace internal
+
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
+{
+ return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
+}
+
+template<typename Derived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase& other)
+{
+ return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
+}
+
+template<typename Derived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& other)
+{
+ return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
+}
+
+template<typename Derived>
+template <typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
+{
+ return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
+}
+
+template<typename Derived>
+template <typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const EigenBase<OtherDerived>& other)
+{
+ return internal::assign_selector<Derived,OtherDerived,false>::evalTo(derived(), other.derived());
+}
+
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
+{
+ return internal::assign_selector<Derived,OtherDerived,false>::evalTo(derived(), other.derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_ASSIGN_H
diff --git a/third_party/eigen3/Eigen/src/Core/AssignEvaluator.h b/third_party/eigen3/Eigen/src/Core/AssignEvaluator.h
new file mode 100644
index 0000000000..b1e304e2b1
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/AssignEvaluator.h
@@ -0,0 +1,842 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2011-2013 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ASSIGN_EVALUATOR_H
+#define EIGEN_ASSIGN_EVALUATOR_H
+
+namespace Eigen {
+
+// This implementation is based on Assign.h
+
+namespace internal {
+
+/***************************************************************************
+* Part 1 : the logic deciding a strategy for traversal and unrolling *
+***************************************************************************/
+
+// copy_using_evaluator_traits is based on assign_traits
+
+template <typename Derived, typename OtherDerived>
+struct copy_using_evaluator_traits
+{
+public:
+ enum {
+ DstIsAligned = Derived::Flags & AlignedBit,
+ DstHasDirectAccess = Derived::Flags & DirectAccessBit,
+ SrcIsAligned = OtherDerived::Flags & AlignedBit,
+ JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned,
+ SrcEvalBeforeAssign = (evaluator_traits<OtherDerived>::HasEvalTo == 1)
+ };
+
+private:
+ enum {
+ InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime)
+ : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime)
+ : int(Derived::RowsAtCompileTime),
+ InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime)
+ : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime)
+ : int(Derived::MaxRowsAtCompileTime),
+ MaxSizeAtCompileTime = Derived::SizeAtCompileTime,
+ PacketSize = packet_traits<typename Derived::Scalar>::size
+ };
+
+ enum {
+ StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)),
+ MightVectorize = StorageOrdersAgree
+ && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
+ MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
+ && int(DstIsAligned) && int(SrcIsAligned),
+ MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
+ MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
+ && (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
+ /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
+ so it's only good for large enough sizes. */
+ MaySliceVectorize = MightVectorize && DstHasDirectAccess
+ && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize)
+ /* slice vectorization can be slow, so we only want it if the slices are big, which is
+ indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
+ in a fixed-size matrix */
+ };
+
+public:
+ enum {
+ Traversal = int(SrcEvalBeforeAssign) ? int(AllAtOnceTraversal)
+ : int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
+ : int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
+ : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
+ : int(MayLinearize) ? int(LinearTraversal)
+ : int(DefaultTraversal),
+ Vectorized = int(Traversal) == InnerVectorizedTraversal
+ || int(Traversal) == LinearVectorizedTraversal
+ || int(Traversal) == SliceVectorizedTraversal
+ };
+
+private:
+ enum {
+ UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1),
+ MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic
+ && int(OtherDerived::CoeffReadCost) != Dynamic
+ && int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit),
+ MayUnrollInner = int(InnerSize) != Dynamic
+ && int(OtherDerived::CoeffReadCost) != Dynamic
+ && int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit)
+ };
+
+public:
+ enum {
+ Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
+ ? (
+ int(MayUnrollCompletely) ? int(CompleteUnrolling)
+ : int(MayUnrollInner) ? int(InnerUnrolling)
+ : int(NoUnrolling)
+ )
+ : int(Traversal) == int(LinearVectorizedTraversal)
+ ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling)
+ : int(NoUnrolling) )
+ : int(Traversal) == int(LinearTraversal)
+ ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
+ : int(NoUnrolling) )
+ : int(NoUnrolling)
+ };
+
+#ifdef EIGEN_DEBUG_ASSIGN
+ static void debug()
+ {
+ EIGEN_DEBUG_VAR(DstIsAligned)
+ EIGEN_DEBUG_VAR(SrcIsAligned)
+ EIGEN_DEBUG_VAR(JointAlignment)
+ EIGEN_DEBUG_VAR(InnerSize)
+ EIGEN_DEBUG_VAR(InnerMaxSize)
+ EIGEN_DEBUG_VAR(PacketSize)
+ EIGEN_DEBUG_VAR(StorageOrdersAgree)
+ EIGEN_DEBUG_VAR(MightVectorize)
+ EIGEN_DEBUG_VAR(MayLinearize)
+ EIGEN_DEBUG_VAR(MayInnerVectorize)
+ EIGEN_DEBUG_VAR(MayLinearVectorize)
+ EIGEN_DEBUG_VAR(MaySliceVectorize)
+ EIGEN_DEBUG_VAR(Traversal)
+ EIGEN_DEBUG_VAR(UnrollingLimit)
+ EIGEN_DEBUG_VAR(MayUnrollCompletely)
+ EIGEN_DEBUG_VAR(MayUnrollInner)
+ EIGEN_DEBUG_VAR(Unrolling)
+ }
+#endif
+};
+
+/***************************************************************************
+* Part 2 : meta-unrollers
+***************************************************************************/
+
+/************************
+*** Default traversal ***
+************************/
+
+template<typename Kernel, int Index, int Stop>
+struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
+{
+ typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
+ typedef typename DstEvaluatorType::XprType DstXprType;
+
+ enum {
+ outer = Index / DstXprType::InnerSizeAtCompileTime,
+ inner = Index % DstXprType::InnerSizeAtCompileTime
+ };
+
+ static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ kernel.assignCoeffByOuterInner(outer, inner);
+ copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
+ }
+};
+
+template<typename Kernel, int Stop>
+struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
+{
+ static EIGEN_STRONG_INLINE void run(Kernel&) { }
+};
+
+template<typename Kernel, int Index, int Stop>
+struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
+{
+ static EIGEN_STRONG_INLINE void run(Kernel &kernel, int outer)
+ {
+ kernel.assignCoeffByOuterInner(outer, Index);
+ copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index+1, Stop>::run(kernel, outer);
+ }
+};
+
+template<typename Kernel, int Stop>
+struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
+{
+ static EIGEN_STRONG_INLINE void run(Kernel&, int) { }
+};
+
+/***********************
+*** Linear traversal ***
+***********************/
+
+template<typename Kernel, int Index, int Stop>
+struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
+{
+ static EIGEN_STRONG_INLINE void run(Kernel& kernel)
+ {
+ kernel.assignCoeff(Index);
+ copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
+ }
+};
+
+template<typename Kernel, int Stop>
+struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
+{
+ static EIGEN_STRONG_INLINE void run(Kernel&) { }
+};
+
+/**************************
+*** Inner vectorization ***
+**************************/
+
+template<typename Kernel, int Index, int Stop>
+struct copy_using_evaluator_innervec_CompleteUnrolling
+{
+ typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
+ typedef typename DstEvaluatorType::XprType DstXprType;
+
+ enum {
+ outer = Index / DstXprType::InnerSizeAtCompileTime,
+ inner = Index % DstXprType::InnerSizeAtCompileTime,
+ JointAlignment = Kernel::AssignmentTraits::JointAlignment
+ };
+
+ static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ kernel.template assignPacketByOuterInner<Aligned, JointAlignment>(outer, inner);
+ enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
+ copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
+ }
+};
+
+template<typename Kernel, int Stop>
+struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
+{
+ static EIGEN_STRONG_INLINE void run(Kernel&) { }
+};
+
+template<typename Kernel, int Index, int Stop>
+struct copy_using_evaluator_innervec_InnerUnrolling
+{
+ static EIGEN_STRONG_INLINE void run(Kernel &kernel, int outer)
+ {
+ kernel.template assignPacketByOuterInner<Aligned, Aligned>(outer, Index);
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
+ copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer);
+ }
+};
+
+template<typename Kernel, int Stop>
+struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop>
+{
+ static EIGEN_STRONG_INLINE void run(Kernel &, int) { }
+};
+
+/***************************************************************************
+* Part 3 : implementation of all cases
+***************************************************************************/
+
+// dense_assignment_loop is based on assign_impl
+
+template<typename Kernel,
+ int Traversal = Kernel::AssignmentTraits::Traversal,
+ int Unrolling = Kernel::AssignmentTraits::Unrolling>
+struct dense_assignment_loop;
+
+/************************
+*** Default traversal ***
+************************/
+
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
+{
+ static void run(Kernel &kernel)
+ {
+ typedef typename Kernel::Index Index;
+
+ for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
+ for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
+ kernel.assignCoeffByOuterInner(outer, inner);
+ }
+ }
+ }
+};
+
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
+{
+ static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
+ }
+};
+
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
+{
+ typedef typename Kernel::Index Index;
+ static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+
+ const Index outerSize = kernel.outerSize();
+ for(Index outer = 0; outer < outerSize; ++outer)
+ copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
+ }
+};
+
+/***************************
+*** Linear vectorization ***
+***************************/
+
+
+// The goal of unaligned_dense_assignment_loop is simply to factorize the handling
+// of the non vectorizable beginning and ending parts
+
+template <bool IsAligned = false>
+struct unaligned_dense_assignment_loop
+{
+ // if IsAligned = true, then do nothing
+ template <typename Kernel>
+ static EIGEN_STRONG_INLINE void run(Kernel&, typename Kernel::Index, typename Kernel::Index) {}
+};
+
+template <>
+struct unaligned_dense_assignment_loop<false>
+{
+ // MSVC must not inline this functions. If it does, it fails to optimize the
+ // packet access path.
+ // FIXME check which version exhibits this issue
+#if EIGEN_COMP_MSVC
+ template <typename Kernel>
+ static EIGEN_DONT_INLINE void run(Kernel &kernel,
+ typename Kernel::Index start,
+ typename Kernel::Index end)
+#else
+ template <typename Kernel>
+ static EIGEN_STRONG_INLINE void run(Kernel &kernel,
+ typename Kernel::Index start,
+ typename Kernel::Index end)
+#endif
+ {
+ for (typename Kernel::Index index = start; index < end; ++index)
+ kernel.assignCoeff(index);
+ }
+};
+
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
+{
+ static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ typedef typename Kernel::Index Index;
+
+ const Index size = kernel.size();
+ typedef packet_traits<typename Kernel::Scalar> PacketTraits;
+ enum {
+ packetSize = PacketTraits::size,
+ dstIsAligned = int(Kernel::AssignmentTraits::DstIsAligned),
+ dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : dstIsAligned,
+ srcAlignment = Kernel::AssignmentTraits::JointAlignment
+ };
+ const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(&kernel.dstEvaluator().coeffRef(0), size);
+ const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
+
+ unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
+
+ for(Index index = alignedStart; index < alignedEnd; index += packetSize)
+ kernel.template assignPacket<dstAlignment, srcAlignment>(index);
+
+ unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
+ }
+};
+
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
+{
+ typedef typename Kernel::Index Index;
+ static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+
+ enum { size = DstXprType::SizeAtCompileTime,
+ packetSize = packet_traits<typename Kernel::Scalar>::size,
+ alignedSize = (size/packetSize)*packetSize };
+
+ copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
+ copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
+ }
+};
+
+/**************************
+*** Inner vectorization ***
+**************************/
+
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
+{
+ static inline void run(Kernel &kernel)
+ {
+ typedef typename Kernel::Index Index;
+
+ const Index innerSize = kernel.innerSize();
+ const Index outerSize = kernel.outerSize();
+ const Index packetSize = packet_traits<typename Kernel::Scalar>::size;
+ for(Index outer = 0; outer < outerSize; ++outer)
+ for(Index inner = 0; inner < innerSize; inner+=packetSize)
+ kernel.template assignPacketByOuterInner<Aligned, Aligned>(outer, inner);
+ }
+};
+
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
+{
+ static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
+ }
+};
+
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
+{
+ typedef typename Kernel::Index Index;
+ static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ const Index outerSize = kernel.outerSize();
+ for(Index outer = 0; outer < outerSize; ++outer)
+ copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
+ }
+};
+
+/***********************
+*** Linear traversal ***
+***********************/
+
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
+{
+ static inline void run(Kernel &kernel)
+ {
+ typedef typename Kernel::Index Index;
+ const Index size = kernel.size();
+ for(Index i = 0; i < size; ++i)
+ kernel.assignCoeff(i);
+ }
+};
+
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
+{
+ static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
+ }
+};
+
+/**************************
+*** Slice vectorization ***
+***************************/
+
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
+{
+ static inline void run(Kernel &kernel)
+ {
+ typedef typename Kernel::Index Index;
+ typedef packet_traits<typename Kernel::Scalar> PacketTraits;
+ enum {
+ packetSize = PacketTraits::size,
+ alignable = PacketTraits::AlignedOnScalar,
+ dstAlignment = alignable ? Aligned : int(Kernel::AssignmentTraits::DstIsAligned)
+ };
+ const Index packetAlignedMask = packetSize - 1;
+ const Index innerSize = kernel.innerSize();
+ const Index outerSize = kernel.outerSize();
+ const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
+ Index alignedStart = ((!alignable) || Kernel::AssignmentTraits::DstIsAligned) ? 0
+ : internal::first_aligned(&kernel.dstEvaluator().coeffRef(0,0), innerSize);
+
+ for(Index outer = 0; outer < outerSize; ++outer)
+ {
+ const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
+ // do the non-vectorizable part of the assignment
+ for(Index inner = 0; inner<alignedStart ; ++inner)
+ kernel.assignCoeffByOuterInner(outer, inner);
+
+ // do the vectorizable part of the assignment
+ for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
+ kernel.template assignPacketByOuterInner<dstAlignment, Unaligned>(outer, inner);
+
+ // do the non-vectorizable part of the assignment
+ for(Index inner = alignedEnd; inner<innerSize ; ++inner)
+ kernel.assignCoeffByOuterInner(outer, inner);
+
+ alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
+ }
+ }
+};
+
+/****************************
+*** All-at-once traversal ***
+****************************/
+
+// TODO: this 'AllAtOnceTraversal' should be dropped or caught earlier (Gael)
+// Indeed, what to do with the kernel's functor??
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, AllAtOnceTraversal, NoUnrolling>
+{
+ static inline void run(Kernel & kernel)
+ {
+ // Evaluate rhs in temporary to prevent aliasing problems in a = a * a;
+ // TODO: Do not pass the xpr object to evalTo() (Jitse)
+ kernel.srcEvaluator().evalTo(kernel.dstEvaluator(), kernel.dstExpression());
+ }
+};
+
+/***************************************************************************
+* Part 4 : Generic Assignment routine
+***************************************************************************/
+
+// This class generalize the assignment of a coefficient (or packet) from one dense evaluator
+// to another dense writable evaluator.
+// It is parametrized by the two evaluators, and the actual assignment functor.
+// This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
+// One can customize the assignment using this generic dense_assignment_kernel with different
+// functors, or by completely overloading it, by-passing a functor.
+template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor>
+class generic_dense_assignment_kernel
+{
+protected:
+ typedef typename DstEvaluatorTypeT::XprType DstXprType;
+ typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
+public:
+
+ typedef DstEvaluatorTypeT DstEvaluatorType;
+ typedef SrcEvaluatorTypeT SrcEvaluatorType;
+ typedef typename DstEvaluatorType::Scalar Scalar;
+ typedef typename DstEvaluatorType::Index Index;
+ typedef copy_using_evaluator_traits<DstXprType, SrcXprType> AssignmentTraits;
+
+
+ generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
+ : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
+ {}
+
+ Index size() const { return m_dstExpr.size(); }
+ Index innerSize() const { return m_dstExpr.innerSize(); }
+ Index outerSize() const { return m_dstExpr.outerSize(); }
+ Index outerStride() const { return m_dstExpr.outerStride(); }
+
+ // TODO get rid of this one:
+ DstXprType& dstExpression() const { return m_dstExpr; }
+
+ DstEvaluatorType& dstEvaluator() { return m_dst; }
+ const SrcEvaluatorType& srcEvaluator() const { return m_src; }
+
+ void assignCoeff(Index row, Index col)
+ {
+ m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
+ }
+
+ void assignCoeff(Index index)
+ {
+ m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
+ }
+
+ void assignCoeffByOuterInner(Index outer, Index inner)
+ {
+ Index row = rowIndexByOuterInner(outer, inner);
+ Index col = colIndexByOuterInner(outer, inner);
+ assignCoeff(row, col);
+ }
+
+
+ template<int StoreMode, int LoadMode>
+ void assignPacket(Index row, Index col)
+ {
+ m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode>(row,col));
+ }
+
+ template<int StoreMode, int LoadMode>
+ void assignPacket(Index index)
+ {
+ m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode>(index));
+ }
+
+ template<int StoreMode, int LoadMode>
+ void assignPacketByOuterInner(Index outer, Index inner)
+ {
+ Index row = rowIndexByOuterInner(outer, inner);
+ Index col = colIndexByOuterInner(outer, inner);
+ assignPacket<StoreMode,LoadMode>(row, col);
+ }
+
+ static Index rowIndexByOuterInner(Index outer, Index inner)
+ {
+ typedef typename DstEvaluatorType::ExpressionTraits Traits;
+ return int(Traits::RowsAtCompileTime) == 1 ? 0
+ : int(Traits::ColsAtCompileTime) == 1 ? inner
+ : int(Traits::Flags)&RowMajorBit ? outer
+ : inner;
+ }
+
+ static Index colIndexByOuterInner(Index outer, Index inner)
+ {
+ typedef typename DstEvaluatorType::ExpressionTraits Traits;
+ return int(Traits::ColsAtCompileTime) == 1 ? 0
+ : int(Traits::RowsAtCompileTime) == 1 ? inner
+ : int(Traits::Flags)&RowMajorBit ? inner
+ : outer;
+ }
+
+protected:
+ DstEvaluatorType& m_dst;
+ const SrcEvaluatorType& m_src;
+ const Functor &m_functor;
+ // TODO find a way to avoid the needs of the original expression
+ DstXprType& m_dstExpr;
+};
+
+template<typename DstXprType, typename SrcXprType, typename Functor>
+void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
+{
+#ifdef EIGEN_DEBUG_ASSIGN
+ // TODO these traits should be computed from information provided by the evaluators
+ internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug();
+#endif
+ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+
+ typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+ typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+
+ DstEvaluatorType dstEvaluator(dst);
+ SrcEvaluatorType srcEvaluator(src);
+
+ typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
+ Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
+
+ dense_assignment_loop<Kernel>::run(kernel);
+}
+
+template<typename DstXprType, typename SrcXprType>
+void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
+{
+ call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>());
+}
+
+/***************************************************************************
+* Part 5 : Entry points
+***************************************************************************/
+
+// Based on DenseBase::LazyAssign()
+// The following functions are just for testing and they are meant to be moved to operator= and the likes.
+
+template<typename DstXprType, template <typename> class StorageBase, typename SrcXprType>
+EIGEN_STRONG_INLINE
+const DstXprType& copy_using_evaluator(const NoAlias<DstXprType, StorageBase>& dst,
+ const EigenBase<SrcXprType>& src)
+{
+ return noalias_copy_using_evaluator(dst.expression(), src.derived(), internal::assign_op<typename DstXprType::Scalar>());
+}
+
+template<typename XprType, int AssumeAliasing = evaluator_traits<XprType>::AssumeAliasing>
+struct AddEvalIfAssumingAliasing;
+
+template<typename XprType>
+struct AddEvalIfAssumingAliasing<XprType, 0>
+{
+ static const XprType& run(const XprType& xpr)
+ {
+ return xpr;
+ }
+};
+
+template<typename XprType>
+struct AddEvalIfAssumingAliasing<XprType, 1>
+{
+ static const EvalToTemp<XprType> run(const XprType& xpr)
+ {
+ return EvalToTemp<XprType>(xpr);
+ }
+};
+
+template<typename DstXprType, typename SrcXprType, typename Functor>
+EIGEN_STRONG_INLINE
+const DstXprType& copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func)
+{
+ return noalias_copy_using_evaluator(dst.const_cast_derived(),
+ AddEvalIfAssumingAliasing<SrcXprType>::run(src.derived()),
+ func
+ );
+}
+
+// this mimics operator=
+template<typename DstXprType, typename SrcXprType>
+EIGEN_STRONG_INLINE
+const DstXprType& copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src)
+{
+ return copy_using_evaluator(dst.const_cast_derived(), src.derived(), internal::assign_op<typename DstXprType::Scalar>());
+}
+
+template<typename DstXprType, typename SrcXprType, typename Functor>
+EIGEN_STRONG_INLINE
+const DstXprType& noalias_copy_using_evaluator(const PlainObjectBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func)
+{
+#ifdef EIGEN_DEBUG_ASSIGN
+ internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug();
+#endif
+#ifdef EIGEN_NO_AUTOMATIC_RESIZING
+ eigen_assert((dst.size()==0 || (IsVectorAtCompileTime ? (dst.size() == src.size())
+ : (dst.rows() == src.rows() && dst.cols() == src.cols())))
+ && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
+#else
+ dst.const_cast_derived().resizeLike(src.derived());
+#endif
+ call_dense_assignment_loop(dst.const_cast_derived(), src.derived(), func);
+ return dst.derived();
+}
+
+template<typename DstXprType, typename SrcXprType, typename Functor>
+EIGEN_STRONG_INLINE
+const DstXprType& noalias_copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func)
+{
+ call_dense_assignment_loop(dst.const_cast_derived(), src.derived(), func);
+ return dst.derived();
+}
+
+// Based on DenseBase::swap()
+// TODO: Check whether we need to do something special for swapping two
+// Arrays or Matrices. (Jitse)
+
+// Overload default assignPacket behavior for swapping them
+template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT>
+class swap_kernel : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar> >
+{
+ typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar> > Base;
+ typedef typename DstEvaluatorTypeT::PacketScalar PacketScalar;
+ using Base::m_dst;
+ using Base::m_src;
+ using Base::m_functor;
+
+public:
+ typedef typename Base::Scalar Scalar;
+ typedef typename Base::Index Index;
+ typedef typename Base::DstXprType DstXprType;
+
+ swap_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, DstXprType& dstExpr)
+ : Base(dst, src, swap_assign_op<Scalar>(), dstExpr)
+ {}
+
+ template<int StoreMode, int LoadMode>
+ void assignPacket(Index row, Index col)
+ {
+ m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(row,col), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(row,col));
+ }
+
+ template<int StoreMode, int LoadMode>
+ void assignPacket(Index index)
+ {
+ m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(index), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(index));
+ }
+
+ // TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael)
+ template<int StoreMode, int LoadMode>
+ void assignPacketByOuterInner(Index outer, Index inner)
+ {
+ Index row = Base::rowIndexByOuterInner(outer, inner);
+ Index col = Base::colIndexByOuterInner(outer, inner);
+ assignPacket<StoreMode,LoadMode>(row, col);
+ }
+};
+
+template<typename DstXprType, typename SrcXprType>
+void swap_using_evaluator(const DstXprType& dst, const SrcXprType& src)
+{
+ // TODO there is too much redundancy with call_dense_assignment_loop
+
+ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+
+ typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+ typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+
+ DstEvaluatorType dstEvaluator(dst);
+ SrcEvaluatorType srcEvaluator(src);
+
+ typedef swap_kernel<DstEvaluatorType,SrcEvaluatorType> Kernel;
+ Kernel kernel(dstEvaluator, srcEvaluator, dst.const_cast_derived());
+
+ dense_assignment_loop<Kernel>::run(kernel);
+}
+
+// Based on MatrixBase::operator+= (in CwiseBinaryOp.h)
+template<typename DstXprType, typename SrcXprType>
+void add_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src)
+{
+ typedef typename DstXprType::Scalar Scalar;
+ copy_using_evaluator(dst.derived(), src.derived(), add_assign_op<Scalar>());
+}
+
+// Based on ArrayBase::operator+=
+template<typename DstXprType, typename SrcXprType>
+void add_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
+{
+ typedef typename DstXprType::Scalar Scalar;
+ copy_using_evaluator(dst.derived(), src.derived(), add_assign_op<Scalar>());
+}
+
+// TODO: Add add_assign_using_evaluator for EigenBase ? (Jitse)
+
+template<typename DstXprType, typename SrcXprType>
+void subtract_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src)
+{
+ typedef typename DstXprType::Scalar Scalar;
+ copy_using_evaluator(dst.derived(), src.derived(), sub_assign_op<Scalar>());
+}
+
+template<typename DstXprType, typename SrcXprType>
+void subtract_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
+{
+ typedef typename DstXprType::Scalar Scalar;
+ copy_using_evaluator(dst.derived(), src.derived(), sub_assign_op<Scalar>());
+}
+
+template<typename DstXprType, typename SrcXprType>
+void multiply_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
+{
+ typedef typename DstXprType::Scalar Scalar;
+ copy_using_evaluator(dst.derived(), src.derived(), mul_assign_op<Scalar>());
+}
+
+template<typename DstXprType, typename SrcXprType>
+void divide_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
+{
+ typedef typename DstXprType::Scalar Scalar;
+ copy_using_evaluator(dst.derived(), src.derived(), div_assign_op<Scalar>());
+}
+
+
+} // namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_ASSIGN_EVALUATOR_H
diff --git a/third_party/eigen3/Eigen/src/Core/Assign_MKL.h b/third_party/eigen3/Eigen/src/Core/Assign_MKL.h
new file mode 100644
index 0000000000..97134ffd72
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/Assign_MKL.h
@@ -0,0 +1,225 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ * Content : Eigen bindings to Intel(R) MKL
+ * MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin()
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_ASSIGN_VML_H
+#define EIGEN_ASSIGN_VML_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<typename Op> struct vml_call
+{ enum { IsSupported = 0 }; };
+
+template<typename Dst, typename Src, typename UnaryOp>
+class vml_assign_traits
+{
+ private:
+ enum {
+ DstHasDirectAccess = Dst::Flags & DirectAccessBit,
+ SrcHasDirectAccess = Src::Flags & DirectAccessBit,
+
+ StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
+ InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
+ : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
+ : int(Dst::RowsAtCompileTime),
+ InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
+ : int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
+ : int(Dst::MaxRowsAtCompileTime),
+ MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
+
+ MightEnableVml = vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess
+ && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
+ MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
+ VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
+ LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD,
+ MayEnableVml = MightEnableVml && LargeEnough,
+ MayLinearize = MayEnableVml && MightLinearize
+ };
+ public:
+ enum {
+ Traversal = MayLinearize ? LinearVectorizedTraversal
+ : MayEnableVml ? InnerVectorizedTraversal
+ : DefaultTraversal
+ };
+};
+
+template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling,
+ int VmlTraversal = vml_assign_traits<Derived1, Derived2, UnaryOp>::Traversal >
+struct vml_assign_impl
+ : assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>
+{
+};
+
+template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
+struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal>
+{
+ typedef typename Derived1::Scalar Scalar;
+ typedef typename Derived1::Index Index;
+ static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
+ {
+ // in case we want to (or have to) skip VML at runtime we can call:
+ // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
+ const Index innerSize = dst.innerSize();
+ const Index outerSize = dst.outerSize();
+ for(Index outer = 0; outer < outerSize; ++outer) {
+ const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) :
+ &(src.nestedExpression().coeffRef(0, outer));
+ Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));
+ vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr );
+ }
+ }
+};
+
+template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
+struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal>
+{
+ static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
+ {
+ // in case we want to (or have to) skip VML at runtime we can call:
+ // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
+ vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() );
+ }
+};
+
+// Macroses
+
+#define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \
+ template<typename Derived1, typename Derived2, typename UnaryOp> \
+ struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized> { \
+ static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \
+ vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \
+ } \
+ };
+
+EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling)
+EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling)
+EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling)
+EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling)
+EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling)
+EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling)
+EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling)
+EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling)
+EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling)
+EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling)
+EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling)
+
+
+#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
+#define EIGEN_MKL_VML_MODE VML_HA
+#else
+#define EIGEN_MKL_VML_MODE VML_LA
+#endif
+
+#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
+ template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
+ enum { IsSupported = 1 }; \
+ static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/, \
+ int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
+ VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \
+ } \
+ };
+
+#define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
+ template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
+ enum { IsSupported = 1 }; \
+ static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/, \
+ int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
+ MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
+ VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \
+ } \
+ };
+
+#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
+ template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
+ enum { IsSupported = 1 }; \
+ static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \
+ int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
+ EIGENTYPE exponent = func.m_exponent; \
+ MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
+ VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \
+ (VMLTYPE*)dst, &vmlMode); \
+ } \
+ };
+
+#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
+ EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \
+ EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double)
+
+#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \
+ EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \
+ EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16)
+
+#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \
+ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
+ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)
+
+
+#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
+ EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \
+ EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double)
+
+#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \
+ EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \
+ EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16)
+
+#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \
+ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
+ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)
+
+
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(atan, Atan)
+//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt)
+
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr)
+
+// The vm*powx functions are not avaibale in the windows version of MKL.
+#ifndef _WIN32
+EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float)
+EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double)
+EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8)
+EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16)
+#endif
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_ASSIGN_VML_H
diff --git a/third_party/eigen3/Eigen/src/Core/BandMatrix.h b/third_party/eigen3/Eigen/src/Core/BandMatrix.h
new file mode 100644
index 0000000000..ffd7fe8b30
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/BandMatrix.h
@@ -0,0 +1,334 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_BANDMATRIX_H
+#define EIGEN_BANDMATRIX_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<typename Derived>
+class BandMatrixBase : public EigenBase<Derived>
+{
+ public:
+
+ enum {
+ Flags = internal::traits<Derived>::Flags,
+ CoeffReadCost = internal::traits<Derived>::CoeffReadCost,
+ RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
+ ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
+ MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
+ Supers = internal::traits<Derived>::Supers,
+ Subs = internal::traits<Derived>::Subs,
+ Options = internal::traits<Derived>::Options
+ };
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime> DenseMatrixType;
+ typedef typename DenseMatrixType::Index Index;
+ typedef typename internal::traits<Derived>::CoefficientsType CoefficientsType;
+ typedef EigenBase<Derived> Base;
+
+ protected:
+ enum {
+ DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic))
+ ? 1 + Supers + Subs
+ : Dynamic,
+ SizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime,ColsAtCompileTime)
+ };
+
+ public:
+
+ using Base::derived;
+ using Base::rows;
+ using Base::cols;
+
+ /** \returns the number of super diagonals */
+ inline Index supers() const { return derived().supers(); }
+
+ /** \returns the number of sub diagonals */
+ inline Index subs() const { return derived().subs(); }
+
+ /** \returns an expression of the underlying coefficient matrix */
+ inline const CoefficientsType& coeffs() const { return derived().coeffs(); }
+
+ /** \returns an expression of the underlying coefficient matrix */
+ inline CoefficientsType& coeffs() { return derived().coeffs(); }
+
+ /** \returns a vector expression of the \a i -th column,
+ * only the meaningful part is returned.
+ * \warning the internal storage must be column major. */
+ inline Block<CoefficientsType,Dynamic,1> col(Index i)
+ {
+ EIGEN_STATIC_ASSERT((Options&RowMajor)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
+ Index start = 0;
+ Index len = coeffs().rows();
+ if (i<=supers())
+ {
+ start = supers()-i;
+ len = (std::min)(rows(),std::max<Index>(0,coeffs().rows() - (supers()-i)));
+ }
+ else if (i>=rows()-subs())
+ len = std::max<Index>(0,coeffs().rows() - (i + 1 - rows() + subs()));
+ return Block<CoefficientsType,Dynamic,1>(coeffs(), start, i, len, 1);
+ }
+
+ /** \returns a vector expression of the main diagonal */
+ inline Block<CoefficientsType,1,SizeAtCompileTime> diagonal()
+ { return Block<CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,(std::min)(rows(),cols())); }
+
+ /** \returns a vector expression of the main diagonal (const version) */
+ inline const Block<const CoefficientsType,1,SizeAtCompileTime> diagonal() const
+ { return Block<const CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,(std::min)(rows(),cols())); }
+
+ template<int Index> struct DiagonalIntReturnType {
+ enum {
+ ReturnOpposite = (Options&SelfAdjoint) && (((Index)>0 && Supers==0) || ((Index)<0 && Subs==0)),
+ Conjugate = ReturnOpposite && NumTraits<Scalar>::IsComplex,
+ ActualIndex = ReturnOpposite ? -Index : Index,
+ DiagonalSize = (RowsAtCompileTime==Dynamic || ColsAtCompileTime==Dynamic)
+ ? Dynamic
+ : (ActualIndex<0
+ ? EIGEN_SIZE_MIN_PREFER_DYNAMIC(ColsAtCompileTime, RowsAtCompileTime + ActualIndex)
+ : EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime, ColsAtCompileTime - ActualIndex))
+ };
+ typedef Block<CoefficientsType,1, DiagonalSize> BuildType;
+ typedef typename internal::conditional<Conjugate,
+ CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>,BuildType >,
+ BuildType>::type Type;
+ };
+
+ /** \returns a vector expression of the \a N -th sub or super diagonal */
+ template<int N> inline typename DiagonalIntReturnType<N>::Type diagonal()
+ {
+ return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, (std::max)(0,N), 1, diagonalLength(N));
+ }
+
+ /** \returns a vector expression of the \a N -th sub or super diagonal */
+ template<int N> inline const typename DiagonalIntReturnType<N>::Type diagonal() const
+ {
+ return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, (std::max)(0,N), 1, diagonalLength(N));
+ }
+
+ /** \returns a vector expression of the \a i -th sub or super diagonal */
+ inline Block<CoefficientsType,1,Dynamic> diagonal(Index i)
+ {
+ eigen_assert((i<0 && -i<=subs()) || (i>=0 && i<=supers()));
+ return Block<CoefficientsType,1,Dynamic>(coeffs(), supers()-i, std::max<Index>(0,i), 1, diagonalLength(i));
+ }
+
+ /** \returns a vector expression of the \a i -th sub or super diagonal */
+ inline const Block<const CoefficientsType,1,Dynamic> diagonal(Index i) const
+ {
+ eigen_assert((i<0 && -i<=subs()) || (i>=0 && i<=supers()));
+ return Block<const CoefficientsType,1,Dynamic>(coeffs(), supers()-i, std::max<Index>(0,i), 1, diagonalLength(i));
+ }
+
+ template<typename Dest> inline void evalTo(Dest& dst) const
+ {
+ dst.resize(rows(),cols());
+ dst.setZero();
+ dst.diagonal() = diagonal();
+ for (Index i=1; i<=supers();++i)
+ dst.diagonal(i) = diagonal(i);
+ for (Index i=1; i<=subs();++i)
+ dst.diagonal(-i) = diagonal(-i);
+ }
+
+ DenseMatrixType toDenseMatrix() const
+ {
+ DenseMatrixType res(rows(),cols());
+ evalTo(res);
+ return res;
+ }
+
+ protected:
+
+ inline Index diagonalLength(Index i) const
+ { return i<0 ? (std::min)(cols(),rows()+i) : (std::min)(rows(),cols()-i); }
+};
+
+/**
+ * \class BandMatrix
+ * \ingroup Core_Module
+ *
+ * \brief Represents a rectangular matrix with a banded storage
+ *
+ * \param _Scalar Numeric type, i.e. float, double, int
+ * \param Rows Number of rows, or \b Dynamic
+ * \param Cols Number of columns, or \b Dynamic
+ * \param Supers Number of super diagonal
+ * \param Subs Number of sub diagonal
+ * \param _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint
+ * The former controls \ref TopicStorageOrders "storage order", and defaults to
+ * column-major. The latter controls whether the matrix represents a selfadjoint
+ * matrix in which case either Supers of Subs have to be null.
+ *
+ * \sa class TridiagonalMatrix
+ */
+
+template<typename _Scalar, int _Rows, int _Cols, int _Supers, int _Subs, int _Options>
+struct traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
+{
+ typedef _Scalar Scalar;
+ typedef Dense StorageKind;
+ typedef DenseIndex Index;
+ enum {
+ CoeffReadCost = NumTraits<Scalar>::ReadCost,
+ RowsAtCompileTime = _Rows,
+ ColsAtCompileTime = _Cols,
+ MaxRowsAtCompileTime = _Rows,
+ MaxColsAtCompileTime = _Cols,
+ Flags = LvalueBit,
+ Supers = _Supers,
+ Subs = _Subs,
+ Options = _Options,
+ DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic
+ };
+ typedef Matrix<Scalar,DataRowsAtCompileTime,ColsAtCompileTime,Options&RowMajor?RowMajor:ColMajor> CoefficientsType;
+};
+
+template<typename _Scalar, int Rows, int Cols, int Supers, int Subs, int Options>
+class BandMatrix : public BandMatrixBase<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Options> >
+{
+ public:
+
+ typedef typename internal::traits<BandMatrix>::Scalar Scalar;
+ typedef typename internal::traits<BandMatrix>::Index Index;
+ typedef typename internal::traits<BandMatrix>::CoefficientsType CoefficientsType;
+
+ inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs)
+ : m_coeffs(1+supers+subs,cols),
+ m_rows(rows), m_supers(supers), m_subs(subs)
+ {
+ }
+
+ /** \returns the number of columns */
+ inline Index rows() const { return m_rows.value(); }
+
+ /** \returns the number of rows */
+ inline Index cols() const { return m_coeffs.cols(); }
+
+ /** \returns the number of super diagonals */
+ inline Index supers() const { return m_supers.value(); }
+
+ /** \returns the number of sub diagonals */
+ inline Index subs() const { return m_subs.value(); }
+
+ inline const CoefficientsType& coeffs() const { return m_coeffs; }
+ inline CoefficientsType& coeffs() { return m_coeffs; }
+
+ protected:
+
+ CoefficientsType m_coeffs;
+ internal::variable_if_dynamic<Index, Rows> m_rows;
+ internal::variable_if_dynamic<Index, Supers> m_supers;
+ internal::variable_if_dynamic<Index, Subs> m_subs;
+};
+
+template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
+class BandMatrixWrapper;
+
+template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
+struct traits<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
+{
+ typedef typename _CoefficientsType::Scalar Scalar;
+ typedef typename _CoefficientsType::StorageKind StorageKind;
+ typedef typename _CoefficientsType::Index Index;
+ enum {
+ CoeffReadCost = internal::traits<_CoefficientsType>::CoeffReadCost,
+ RowsAtCompileTime = _Rows,
+ ColsAtCompileTime = _Cols,
+ MaxRowsAtCompileTime = _Rows,
+ MaxColsAtCompileTime = _Cols,
+ Flags = LvalueBit,
+ Supers = _Supers,
+ Subs = _Subs,
+ Options = _Options,
+ DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic
+ };
+ typedef _CoefficientsType CoefficientsType;
+};
+
+template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
+class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
+{
+ public:
+
+ typedef typename internal::traits<BandMatrixWrapper>::Scalar Scalar;
+ typedef typename internal::traits<BandMatrixWrapper>::CoefficientsType CoefficientsType;
+ typedef typename internal::traits<BandMatrixWrapper>::Index Index;
+
+ inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs)
+ : m_coeffs(coeffs),
+ m_rows(rows), m_supers(supers), m_subs(subs)
+ {
+ EIGEN_UNUSED_VARIABLE(cols);
+ //internal::assert(coeffs.cols()==cols() && (supers()+subs()+1)==coeffs.rows());
+ }
+
+ /** \returns the number of columns */
+ inline Index rows() const { return m_rows.value(); }
+
+ /** \returns the number of rows */
+ inline Index cols() const { return m_coeffs.cols(); }
+
+ /** \returns the number of super diagonals */
+ inline Index supers() const { return m_supers.value(); }
+
+ /** \returns the number of sub diagonals */
+ inline Index subs() const { return m_subs.value(); }
+
+ inline const CoefficientsType& coeffs() const { return m_coeffs; }
+
+ protected:
+
+ const CoefficientsType& m_coeffs;
+ internal::variable_if_dynamic<Index, _Rows> m_rows;
+ internal::variable_if_dynamic<Index, _Supers> m_supers;
+ internal::variable_if_dynamic<Index, _Subs> m_subs;
+};
+
+/**
+ * \class TridiagonalMatrix
+ * \ingroup Core_Module
+ *
+ * \brief Represents a tridiagonal matrix with a compact banded storage
+ *
+ * \param _Scalar Numeric type, i.e. float, double, int
+ * \param Size Number of rows and cols, or \b Dynamic
+ * \param _Options Can be 0 or \b SelfAdjoint
+ *
+ * \sa class BandMatrix
+ */
+template<typename Scalar, int Size, int Options>
+class TridiagonalMatrix : public BandMatrix<Scalar,Size,Size,Options&SelfAdjoint?0:1,1,Options|RowMajor>
+{
+ typedef BandMatrix<Scalar,Size,Size,Options&SelfAdjoint?0:1,1,Options|RowMajor> Base;
+ typedef typename Base::Index Index;
+ public:
+ TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {}
+
+ inline typename Base::template DiagonalIntReturnType<1>::Type super()
+ { return Base::template diagonal<1>(); }
+ inline const typename Base::template DiagonalIntReturnType<1>::Type super() const
+ { return Base::template diagonal<1>(); }
+ inline typename Base::template DiagonalIntReturnType<-1>::Type sub()
+ { return Base::template diagonal<-1>(); }
+ inline const typename Base::template DiagonalIntReturnType<-1>::Type sub() const
+ { return Base::template diagonal<-1>(); }
+ protected:
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_BANDMATRIX_H
diff --git a/third_party/eigen3/Eigen/src/Core/Block.h b/third_party/eigen3/Eigen/src/Core/Block.h
new file mode 100644
index 0000000000..da193d1a22
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/Block.h
@@ -0,0 +1,432 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_BLOCK_H
+#define EIGEN_BLOCK_H
+
+namespace Eigen {
+
+/** \class Block
+ * \ingroup Core_Module
+ *
+ * \brief Expression of a fixed-size or dynamic-size block
+ *
+ * \param XprType the type of the expression in which we are taking a block
+ * \param BlockRows the number of rows of the block we are taking at compile time (optional)
+ * \param BlockCols the number of columns of the block we are taking at compile time (optional)
+ * \param InnerPanel is true, if the block maps to a set of rows of a row major matrix or
+ * to set of columns of a column major matrix (optional). The parameter allows to determine
+ * at compile time whether aligned access is possible on the block expression.
+ *
+ * This class represents an expression of either a fixed-size or dynamic-size block. It is the return
+ * type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block<int,int>(Index,Index) and
+ * most of the time this is the only way it is used.
+ *
+ * However, if you want to directly maniputate block expressions,
+ * for instance if you want to write a function returning such an expression, you
+ * will need to use this class.
+ *
+ * Here is an example illustrating the dynamic case:
+ * \include class_Block.cpp
+ * Output: \verbinclude class_Block.out
+ *
+ * \note Even though this expression has dynamic size, in the case where \a XprType
+ * has fixed size, this expression inherits a fixed maximal size which means that evaluating
+ * it does not cause a dynamic memory allocation.
+ *
+ * Here is an example illustrating the fixed-size case:
+ * \include class_FixedBlock.cpp
+ * Output: \verbinclude class_FixedBlock.out
+ *
+ * \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock
+ */
+
+namespace internal {
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
+struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprType>
+{
+ typedef typename traits<XprType>::Scalar Scalar;
+ typedef typename traits<XprType>::StorageKind StorageKind;
+ typedef typename traits<XprType>::XprKind XprKind;
+ typedef typename nested<XprType>::type XprTypeNested;
+ typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
+ enum{
+ MatrixRows = traits<XprType>::RowsAtCompileTime,
+ MatrixCols = traits<XprType>::ColsAtCompileTime,
+ RowsAtCompileTime = MatrixRows == 0 ? 0 : BlockRows,
+ ColsAtCompileTime = MatrixCols == 0 ? 0 : BlockCols,
+ MaxRowsAtCompileTime = BlockRows==0 ? 0
+ : RowsAtCompileTime != Dynamic ? int(RowsAtCompileTime)
+ : int(traits<XprType>::MaxRowsAtCompileTime),
+ MaxColsAtCompileTime = BlockCols==0 ? 0
+ : ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime)
+ : int(traits<XprType>::MaxColsAtCompileTime),
+ XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0,
+ IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
+ : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
+ : XprTypeIsRowMajor,
+ HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor),
+ InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
+ InnerStrideAtCompileTime = HasSameStorageOrderAsXprType
+ ? int(inner_stride_at_compile_time<XprType>::ret)
+ : int(outer_stride_at_compile_time<XprType>::ret),
+ OuterStrideAtCompileTime = HasSameStorageOrderAsXprType
+ ? int(outer_stride_at_compile_time<XprType>::ret)
+ : int(inner_stride_at_compile_time<XprType>::ret),
+ MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
+ && (InnerStrideAtCompileTime == 1)
+ ? PacketAccessBit : 0,
+ MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0,
+ FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (traits<XprType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
+ FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
+ FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
+ Flags0 = traits<XprType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
+ DirectAccessBit |
+ MaskPacketAccessBit |
+ MaskAlignedBit),
+ Flags = Flags0 | FlagsLinearAccessBit | FlagsLvalueBit | FlagsRowMajorBit
+ };
+};
+
+template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false,
+ bool HasDirectAccess = internal::has_direct_access<XprType>::ret> class BlockImpl_dense;
+
+} // end namespace internal
+
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, typename StorageKind> class BlockImpl;
+
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class Block
+ : public BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind>
+{
+ typedef BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind> Impl;
+ public:
+ //typedef typename Impl::Base Base;
+ typedef Impl Base;
+ EIGEN_GENERIC_PUBLIC_INTERFACE(Block)
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block)
+
+ /** Column or Row constructor
+ */
+ EIGEN_DEVICE_FUNC
+ inline Block(XprType& xpr, Index i) : Impl(xpr,i)
+ {
+ eigen_assert( (i>=0) && (
+ ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && i<xpr.rows())
+ ||((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && i<xpr.cols())));
+ }
+
+ /** Fixed-size constructor
+ */
+ EIGEN_DEVICE_FUNC
+ inline Block(XprType& xpr, Index a_startRow, Index a_startCol)
+ : Impl(xpr, a_startRow, a_startCol)
+ {
+ EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
+ eigen_assert(a_startRow >= 0 && BlockRows >= 1 && a_startRow + BlockRows <= xpr.rows()
+ && a_startCol >= 0 && BlockCols >= 1 && a_startCol + BlockCols <= xpr.cols());
+ }
+
+ /** Dynamic-size constructor
+ */
+ EIGEN_DEVICE_FUNC
+ inline Block(XprType& xpr,
+ Index a_startRow, Index a_startCol,
+ Index blockRows, Index blockCols)
+ : Impl(xpr, a_startRow, a_startCol, blockRows, blockCols)
+ {
+ eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
+ && (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));
+ eigen_assert(a_startRow >= 0 && blockRows >= 0 && a_startRow <= xpr.rows() - blockRows
+ && a_startCol >= 0 && blockCols >= 0 && a_startCol <= xpr.cols() - blockCols);
+ }
+};
+
+// The generic default implementation for dense block simplu forward to the internal::BlockImpl_dense
+// that must be specialized for direct and non-direct access...
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
+class BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, Dense>
+ : public internal::BlockImpl_dense<XprType, BlockRows, BlockCols, InnerPanel>
+{
+ typedef internal::BlockImpl_dense<XprType, BlockRows, BlockCols, InnerPanel> Impl;
+ typedef typename XprType::Index Index;
+ public:
+ typedef Impl Base;
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
+ EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {}
+ EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol) : Impl(xpr, a_startRow, a_startCol) {}
+ EIGEN_DEVICE_FUNC
+ inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol, Index blockRows, Index blockCols)
+ : Impl(xpr, a_startRow, a_startCol, blockRows, blockCols) {}
+};
+
+namespace internal {
+
+/** \internal Internal implementation of dense Blocks in the general case. */
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess> class BlockImpl_dense
+ : public internal::dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel> >::type
+{
+ typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
+ public:
+
+ typedef typename internal::dense_xpr_base<BlockType>::type Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(BlockType)
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense)
+
+ class InnerIterator;
+
+ /** Column or Row constructor
+ */
+ EIGEN_DEVICE_FUNC
+ inline BlockImpl_dense(XprType& xpr, Index i)
+ : m_xpr(xpr),
+ // It is a row if and only if BlockRows==1 and BlockCols==XprType::ColsAtCompileTime,
+ // and it is a column if and only if BlockRows==XprType::RowsAtCompileTime and BlockCols==1,
+ // all other cases are invalid.
+ // The case a 1x1 matrix seems ambiguous, but the result is the same anyway.
+ m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0),
+ m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0),
+ m_blockRows(BlockRows==1 ? 1 : xpr.rows()),
+ m_blockCols(BlockCols==1 ? 1 : xpr.cols())
+ {}
+
+ /** Fixed-size constructor
+ */
+ EIGEN_DEVICE_FUNC
+ inline BlockImpl_dense(XprType& xpr, Index a_startRow, Index a_startCol)
+ : m_xpr(xpr), m_startRow(a_startRow), m_startCol(a_startCol),
+ m_blockRows(BlockRows), m_blockCols(BlockCols)
+ {}
+
+ /** Dynamic-size constructor
+ */
+ EIGEN_DEVICE_FUNC
+ inline BlockImpl_dense(XprType& xpr,
+ Index a_startRow, Index a_startCol,
+ Index blockRows, Index blockCols)
+ : m_xpr(xpr), m_startRow(a_startRow), m_startCol(a_startCol),
+ m_blockRows(blockRows), m_blockCols(blockCols)
+ {}
+
+ EIGEN_DEVICE_FUNC inline Index rows() const { return m_blockRows.value(); }
+ EIGEN_DEVICE_FUNC inline Index cols() const { return m_blockCols.value(); }
+
+ EIGEN_DEVICE_FUNC
+ inline Scalar& coeffRef(Index rowId, Index colId)
+ {
+ EIGEN_STATIC_ASSERT_LVALUE(XprType)
+ return m_xpr.const_cast_derived()
+ .coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index rowId, Index colId) const
+ {
+ return m_xpr.derived()
+ .coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index rowId, Index colId) const
+ {
+ return m_xpr.coeff(rowId + m_startRow.value(), colId + m_startCol.value());
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline Scalar& coeffRef(Index index)
+ {
+ EIGEN_STATIC_ASSERT_LVALUE(XprType)
+ return m_xpr.const_cast_derived()
+ .coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+ m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index index) const
+ {
+ return m_xpr.const_cast_derived()
+ .coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+ m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline const CoeffReturnType coeff(Index index) const
+ {
+ return m_xpr
+ .coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+ m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
+ }
+
+ template<int LoadMode>
+ inline PacketScalar packet(Index rowId, Index colId) const
+ {
+ return m_xpr.template packet<Unaligned>
+ (rowId + m_startRow.value(), colId + m_startCol.value());
+ }
+
+ template<int LoadMode>
+ inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
+ {
+ m_xpr.const_cast_derived().template writePacket<Unaligned>
+ (rowId + m_startRow.value(), colId + m_startCol.value(), val);
+ }
+
+ template<int LoadMode>
+ inline PacketScalar packet(Index index) const
+ {
+ return m_xpr.template packet<Unaligned>
+ (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+ m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
+ }
+
+ template<int LoadMode>
+ inline void writePacket(Index index, const PacketScalar& val)
+ {
+ m_xpr.const_cast_derived().template writePacket<Unaligned>
+ (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+ m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val);
+ }
+
+ #ifdef EIGEN_PARSED_BY_DOXYGEN
+ /** \sa MapBase::data() */
+ EIGEN_DEVICE_FUNC inline const Scalar* data() const;
+ EIGEN_DEVICE_FUNC inline Index innerStride() const;
+ EIGEN_DEVICE_FUNC inline Index outerStride() const;
+ #endif
+
+ EIGEN_DEVICE_FUNC
+ const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
+ {
+ return m_xpr;
+ }
+
+ EIGEN_DEVICE_FUNC
+ Index startRow() const
+ {
+ return m_startRow.value();
+ }
+
+ EIGEN_DEVICE_FUNC
+ Index startCol() const
+ {
+ return m_startCol.value();
+ }
+
+ protected:
+
+ const typename XprType::Nested m_xpr;
+ const internal::variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
+ const internal::variable_if_dynamic<Index, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
+ const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_blockRows;
+ const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_blockCols;
+};
+
+/** \internal Internal implementation of dense Blocks in the direct access case.*/
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
+class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
+ : public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel> >
+{
+ typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
+ public:
+
+ typedef MapBase<BlockType> Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(BlockType)
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense)
+
+ /** Column or Row constructor
+ */
+ EIGEN_DEVICE_FUNC
+ inline BlockImpl_dense(XprType& xpr, Index i)
+ : Base(internal::const_cast_ptr(&xpr.coeffRef(
+ (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0,
+ (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)),
+ BlockRows==1 ? 1 : xpr.rows(),
+ BlockCols==1 ? 1 : xpr.cols()),
+ m_xpr(xpr)
+ {
+ init();
+ }
+
+ /** Fixed-size constructor
+ */
+ EIGEN_DEVICE_FUNC
+ inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
+ : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol))), m_xpr(xpr)
+ {
+ init();
+ }
+
+ /** Dynamic-size constructor
+ */
+ EIGEN_DEVICE_FUNC
+ inline BlockImpl_dense(XprType& xpr,
+ Index startRow, Index startCol,
+ Index blockRows, Index blockCols)
+ : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol)), blockRows, blockCols),
+ m_xpr(xpr)
+ {
+ init();
+ }
+
+ EIGEN_DEVICE_FUNC
+ const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
+ {
+ return m_xpr;
+ }
+
+ /** \sa MapBase::innerStride() */
+ EIGEN_DEVICE_FUNC
+ inline Index innerStride() const
+ {
+ return internal::traits<BlockType>::HasSameStorageOrderAsXprType
+ ? m_xpr.innerStride()
+ : m_xpr.outerStride();
+ }
+
+ /** \sa MapBase::outerStride() */
+ EIGEN_DEVICE_FUNC
+ inline Index outerStride() const
+ {
+ return m_outerStride;
+ }
+
+ #ifndef __SUNPRO_CC
+ // FIXME sunstudio is not friendly with the above friend...
+ // META-FIXME there is no 'friend' keyword around here. Is this obsolete?
+ protected:
+ #endif
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ /** \internal used by allowAligned() */
+ EIGEN_DEVICE_FUNC
+ inline BlockImpl_dense(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols)
+ : Base(data, blockRows, blockCols), m_xpr(xpr)
+ {
+ init();
+ }
+ #endif
+
+ protected:
+ EIGEN_DEVICE_FUNC
+ void init()
+ {
+ m_outerStride = internal::traits<BlockType>::HasSameStorageOrderAsXprType
+ ? m_xpr.outerStride()
+ : m_xpr.innerStride();
+ }
+
+ typename XprType::Nested m_xpr;
+ Index m_outerStride;
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_BLOCK_H
diff --git a/third_party/eigen3/Eigen/src/Core/BooleanRedux.h b/third_party/eigen3/Eigen/src/Core/BooleanRedux.h
new file mode 100644
index 0000000000..be9f48a8c7
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/BooleanRedux.h
@@ -0,0 +1,154 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ALLANDANY_H
+#define EIGEN_ALLANDANY_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<typename Derived, int UnrollCount>
+struct all_unroller
+{
+ enum {
+ col = (UnrollCount-1) / Derived::RowsAtCompileTime,
+ row = (UnrollCount-1) % Derived::RowsAtCompileTime
+ };
+
+ static inline bool run(const Derived &mat)
+ {
+ return all_unroller<Derived, UnrollCount-1>::run(mat) && mat.coeff(row, col);
+ }
+};
+
+template<typename Derived>
+struct all_unroller<Derived, 0>
+{
+ static inline bool run(const Derived &/*mat*/) { return true; }
+};
+
+template<typename Derived>
+struct all_unroller<Derived, Dynamic>
+{
+ static inline bool run(const Derived &) { return false; }
+};
+
+template<typename Derived, int UnrollCount>
+struct any_unroller
+{
+ enum {
+ col = (UnrollCount-1) / Derived::RowsAtCompileTime,
+ row = (UnrollCount-1) % Derived::RowsAtCompileTime
+ };
+
+ static inline bool run(const Derived &mat)
+ {
+ return any_unroller<Derived, UnrollCount-1>::run(mat) || mat.coeff(row, col);
+ }
+};
+
+template<typename Derived>
+struct any_unroller<Derived, 0>
+{
+ static inline bool run(const Derived & /*mat*/) { return false; }
+};
+
+template<typename Derived>
+struct any_unroller<Derived, Dynamic>
+{
+ static inline bool run(const Derived &) { return false; }
+};
+
+} // end namespace internal
+
+/** \returns true if all coefficients are true
+ *
+ * Example: \include MatrixBase_all.cpp
+ * Output: \verbinclude MatrixBase_all.out
+ *
+ * \sa any(), Cwise::operator<()
+ */
+template<typename Derived>
+inline bool DenseBase<Derived>::all() const
+{
+ enum {
+ unroll = SizeAtCompileTime != Dynamic
+ && CoeffReadCost != Dynamic
+ && NumTraits<Scalar>::AddCost != Dynamic
+ && SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
+ };
+ if(unroll)
+ return internal::all_unroller<Derived, unroll ? int(SizeAtCompileTime) : Dynamic>::run(derived());
+ else
+ {
+ for(Index j = 0; j < cols(); ++j)
+ for(Index i = 0; i < rows(); ++i)
+ if (!coeff(i, j)) return false;
+ return true;
+ }
+}
+
+/** \returns true if at least one coefficient is true
+ *
+ * \sa all()
+ */
+template<typename Derived>
+inline bool DenseBase<Derived>::any() const
+{
+ enum {
+ unroll = SizeAtCompileTime != Dynamic
+ && CoeffReadCost != Dynamic
+ && NumTraits<Scalar>::AddCost != Dynamic
+ && SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
+ };
+ if(unroll)
+ return internal::any_unroller<Derived, unroll ? int(SizeAtCompileTime) : Dynamic>::run(derived());
+ else
+ {
+ for(Index j = 0; j < cols(); ++j)
+ for(Index i = 0; i < rows(); ++i)
+ if (coeff(i, j)) return true;
+ return false;
+ }
+}
+
+/** \returns the number of coefficients which evaluate to true
+ *
+ * \sa all(), any()
+ */
+template<typename Derived>
+inline typename DenseBase<Derived>::Index DenseBase<Derived>::count() const
+{
+ return derived().template cast<bool>().template cast<Index>().sum();
+}
+
+/** \returns true is \c *this contains at least one Not A Number (NaN).
+ *
+ * \sa allFinite()
+ */
+template<typename Derived>
+inline bool DenseBase<Derived>::hasNaN() const
+{
+ return !((derived().array()==derived().array()).all());
+}
+
+/** \returns true if \c *this contains only finite numbers, i.e., no NaN and no +/-INF values.
+ *
+ * \sa hasNaN()
+ */
+template<typename Derived>
+inline bool DenseBase<Derived>::allFinite() const
+{
+ return !((derived()-derived()).hasNaN());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_ALLANDANY_H
diff --git a/third_party/eigen3/Eigen/src/Core/CommaInitializer.h b/third_party/eigen3/Eigen/src/Core/CommaInitializer.h
new file mode 100644
index 0000000000..70cbfeff55
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/CommaInitializer.h
@@ -0,0 +1,161 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COMMAINITIALIZER_H
+#define EIGEN_COMMAINITIALIZER_H
+
+namespace Eigen {
+
+/** \class CommaInitializer
+ * \ingroup Core_Module
+ *
+ * \brief Helper class used by the comma initializer operator
+ *
+ * This class is internally used to implement the comma initializer feature. It is
+ * the return type of MatrixBase::operator<<, and most of the time this is the only
+ * way it is used.
+ *
+ * \sa \ref MatrixBaseCommaInitRef "MatrixBase::operator<<", CommaInitializer::finished()
+ */
+template<typename XprType>
+struct CommaInitializer
+{
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::Index Index;
+
+ EIGEN_DEVICE_FUNC
+ inline CommaInitializer(XprType& xpr, const Scalar& s)
+ : m_xpr(xpr), m_row(0), m_col(1), m_currentBlockRows(1)
+ {
+ m_xpr.coeffRef(0,0) = s;
+ }
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ inline CommaInitializer(XprType& xpr, const DenseBase<OtherDerived>& other)
+ : m_xpr(xpr), m_row(0), m_col(other.cols()), m_currentBlockRows(other.rows())
+ {
+ m_xpr.block(0, 0, other.rows(), other.cols()) = other;
+ }
+
+ /* Copy/Move constructor which transfers ownership. This is crucial in
+ * absence of return value optimization to avoid assertions during destruction. */
+ // FIXME in C++11 mode this could be replaced by a proper RValue constructor
+ EIGEN_DEVICE_FUNC
+ inline CommaInitializer(const CommaInitializer& o)
+ : m_xpr(o.m_xpr), m_row(o.m_row), m_col(o.m_col), m_currentBlockRows(o.m_currentBlockRows) {
+ // Mark original object as finished. In absence of R-value references we need to const_cast:
+ const_cast<CommaInitializer&>(o).m_row = m_xpr.rows();
+ const_cast<CommaInitializer&>(o).m_col = m_xpr.cols();
+ const_cast<CommaInitializer&>(o).m_currentBlockRows = 0;
+ }
+
+ /* inserts a scalar value in the target matrix */
+ EIGEN_DEVICE_FUNC
+ CommaInitializer& operator,(const Scalar& s)
+ {
+ if (m_col==m_xpr.cols())
+ {
+ m_row+=m_currentBlockRows;
+ m_col = 0;
+ m_currentBlockRows = 1;
+ eigen_assert(m_row<m_xpr.rows()
+ && "Too many rows passed to comma initializer (operator<<)");
+ }
+ eigen_assert(m_col<m_xpr.cols()
+ && "Too many coefficients passed to comma initializer (operator<<)");
+ eigen_assert(m_currentBlockRows==1);
+ m_xpr.coeffRef(m_row, m_col++) = s;
+ return *this;
+ }
+
+ /* inserts a matrix expression in the target matrix */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ CommaInitializer& operator,(const DenseBase<OtherDerived>& other)
+ {
+ if(other.cols()==0 || other.rows()==0)
+ return *this;
+ if (m_col==m_xpr.cols())
+ {
+ m_row+=m_currentBlockRows;
+ m_col = 0;
+ m_currentBlockRows = other.rows();
+ eigen_assert(m_row+m_currentBlockRows<=m_xpr.rows()
+ && "Too many rows passed to comma initializer (operator<<)");
+ }
+ eigen_assert(m_col<m_xpr.cols()
+ && "Too many coefficients passed to comma initializer (operator<<)");
+ eigen_assert(m_currentBlockRows==other.rows());
+ if (OtherDerived::SizeAtCompileTime != Dynamic)
+ m_xpr.template block<OtherDerived::RowsAtCompileTime != Dynamic ? OtherDerived::RowsAtCompileTime : 1,
+ OtherDerived::ColsAtCompileTime != Dynamic ? OtherDerived::ColsAtCompileTime : 1>
+ (m_row, m_col) = other;
+ else
+ m_xpr.block(m_row, m_col, other.rows(), other.cols()) = other;
+ m_col += other.cols();
+ return *this;
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline ~CommaInitializer()
+ {
+ eigen_assert((m_row+m_currentBlockRows) == m_xpr.rows()
+ && m_col == m_xpr.cols()
+ && "Too few coefficients passed to comma initializer (operator<<)");
+ }
+
+ /** \returns the built matrix once all its coefficients have been set.
+ * Calling finished is 100% optional. Its purpose is to write expressions
+ * like this:
+ * \code
+ * quaternion.fromRotationMatrix((Matrix3f() << axis0, axis1, axis2).finished());
+ * \endcode
+ */
+ EIGEN_DEVICE_FUNC
+ inline XprType& finished() { return m_xpr; }
+
+ XprType& m_xpr; // target expression
+ Index m_row; // current row id
+ Index m_col; // current col id
+ Index m_currentBlockRows; // current block height
+};
+
+/** \anchor MatrixBaseCommaInitRef
+ * Convenient operator to set the coefficients of a matrix.
+ *
+ * The coefficients must be provided in a row major order and exactly match
+ * the size of the matrix. Otherwise an assertion is raised.
+ *
+ * Example: \include MatrixBase_set.cpp
+ * Output: \verbinclude MatrixBase_set.out
+ *
+ * \note According the c++ standard, the argument expressions of this comma initializer are evaluated in arbitrary order.
+ *
+ * \sa CommaInitializer::finished(), class CommaInitializer
+ */
+template<typename Derived>
+inline CommaInitializer<Derived> DenseBase<Derived>::operator<< (const Scalar& s)
+{
+ return CommaInitializer<Derived>(*static_cast<Derived*>(this), s);
+}
+
+/** \sa operator<<(const Scalar&) */
+template<typename Derived>
+template<typename OtherDerived>
+inline CommaInitializer<Derived>
+DenseBase<Derived>::operator<<(const DenseBase<OtherDerived>& other)
+{
+ return CommaInitializer<Derived>(*static_cast<Derived *>(this), other);
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_COMMAINITIALIZER_H
diff --git a/third_party/eigen3/Eigen/src/Core/CoreEvaluators.h b/third_party/eigen3/Eigen/src/Core/CoreEvaluators.h
new file mode 100644
index 0000000000..3568cb85f9
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/CoreEvaluators.h
@@ -0,0 +1,1121 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+#ifndef EIGEN_COREEVALUATORS_H
+#define EIGEN_COREEVALUATORS_H
+
+namespace Eigen {
+
+namespace internal {
+
+// evaluator_traits<T> contains traits for evaluator_impl<T>
+
+template<typename T>
+struct evaluator_traits
+{
+ // 1 if evaluator_impl<T>::evalTo() exists
+ // 0 if evaluator_impl<T> allows coefficient-based access
+ static const int HasEvalTo = 0;
+
+ // 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a
+ // temporary; 0 if not.
+ static const int AssumeAliasing = 0;
+};
+
+// expression class for evaluating nested expression to a temporary
+
+template<typename ArgType>
+class EvalToTemp;
+
+// evaluator<T>::type is type of evaluator for T
+// evaluator<T>::nestedType is type of evaluator if T is nested inside another evaluator
+
+template<typename T>
+struct evaluator_impl
+{ };
+
+template<typename T, int Nested = evaluator_traits<T>::HasEvalTo>
+struct evaluator_nested_type;
+
+template<typename T>
+struct evaluator_nested_type<T, 0>
+{
+ typedef evaluator_impl<T> type;
+};
+
+template<typename T>
+struct evaluator_nested_type<T, 1>
+{
+ typedef evaluator_impl<EvalToTemp<T> > type;
+};
+
+template<typename T>
+struct evaluator
+{
+ typedef evaluator_impl<T> type;
+ typedef typename evaluator_nested_type<T>::type nestedType;
+};
+
+// TODO: Think about const-correctness
+
+template<typename T>
+struct evaluator<const T>
+ : evaluator<T>
+{ };
+
+// ---------- base class for all writable evaluators ----------
+
+// TODO this class does not seem to be necessary anymore
+template<typename ExpressionType>
+struct evaluator_impl_base
+{
+ typedef typename ExpressionType::Index Index;
+ // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices.
+ typedef traits<ExpressionType> ExpressionTraits;
+
+ evaluator_impl<ExpressionType>& derived()
+ {
+ return *static_cast<evaluator_impl<ExpressionType>*>(this);
+ }
+};
+
+// -------------------- Matrix and Array --------------------
+//
+// evaluator_impl<PlainObjectBase> is a common base class for the
+// Matrix and Array evaluators.
+
+template<typename Derived>
+struct evaluator_impl<PlainObjectBase<Derived> >
+ : evaluator_impl_base<Derived>
+{
+ typedef PlainObjectBase<Derived> PlainObjectType;
+
+ enum {
+ IsRowMajor = PlainObjectType::IsRowMajor,
+ IsVectorAtCompileTime = PlainObjectType::IsVectorAtCompileTime,
+ RowsAtCompileTime = PlainObjectType::RowsAtCompileTime,
+ ColsAtCompileTime = PlainObjectType::ColsAtCompileTime
+ };
+
+ evaluator_impl(const PlainObjectType& m)
+ : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride())
+ { }
+
+ typedef typename PlainObjectType::Index Index;
+ typedef typename PlainObjectType::Scalar Scalar;
+ typedef typename PlainObjectType::CoeffReturnType CoeffReturnType;
+ typedef typename PlainObjectType::PacketScalar PacketScalar;
+ typedef typename PlainObjectType::PacketReturnType PacketReturnType;
+
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ if (IsRowMajor)
+ return m_data[row * m_outerStride.value() + col];
+ else
+ return m_data[row + col * m_outerStride.value()];
+ }
+
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_data[index];
+ }
+
+ Scalar& coeffRef(Index row, Index col)
+ {
+ if (IsRowMajor)
+ return const_cast<Scalar*>(m_data)[row * m_outerStride.value() + col];
+ else
+ return const_cast<Scalar*>(m_data)[row + col * m_outerStride.value()];
+ }
+
+ Scalar& coeffRef(Index index)
+ {
+ return const_cast<Scalar*>(m_data)[index];
+ }
+
+ template<int LoadMode>
+ PacketReturnType packet(Index row, Index col) const
+ {
+ if (IsRowMajor)
+ return ploadt<PacketScalar, LoadMode>(m_data + row * m_outerStride.value() + col);
+ else
+ return ploadt<PacketScalar, LoadMode>(m_data + row + col * m_outerStride.value());
+ }
+
+ template<int LoadMode>
+ PacketReturnType packet(Index index) const
+ {
+ return ploadt<PacketScalar, LoadMode>(m_data + index);
+ }
+
+ template<int StoreMode>
+ void writePacket(Index row, Index col, const PacketScalar& x)
+ {
+ if (IsRowMajor)
+ return pstoret<Scalar, PacketScalar, StoreMode>
+ (const_cast<Scalar*>(m_data) + row * m_outerStride.value() + col, x);
+ else
+ return pstoret<Scalar, PacketScalar, StoreMode>
+ (const_cast<Scalar*>(m_data) + row + col * m_outerStride.value(), x);
+ }
+
+ template<int StoreMode>
+ void writePacket(Index index, const PacketScalar& x)
+ {
+ return pstoret<Scalar, PacketScalar, StoreMode>(const_cast<Scalar*>(m_data) + index, x);
+ }
+
+protected:
+ const Scalar *m_data;
+
+ // We do not need to know the outer stride for vectors
+ variable_if_dynamic<Index, IsVectorAtCompileTime ? 0
+ : int(IsRowMajor) ? ColsAtCompileTime
+ : RowsAtCompileTime> m_outerStride;
+};
+
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+struct evaluator_impl<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
+ : evaluator_impl<PlainObjectBase<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > >
+{
+ typedef Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
+
+ evaluator_impl(const XprType& m)
+ : evaluator_impl<PlainObjectBase<XprType> >(m)
+ { }
+};
+
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+struct evaluator_impl<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
+ : evaluator_impl<PlainObjectBase<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > >
+{
+ typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
+
+ evaluator_impl(const XprType& m)
+ : evaluator_impl<PlainObjectBase<XprType> >(m)
+ { }
+};
+
+// -------------------- EvalToTemp --------------------
+
+template<typename ArgType>
+struct traits<EvalToTemp<ArgType> >
+ : public traits<ArgType>
+{ };
+
+template<typename ArgType>
+class EvalToTemp
+ : public dense_xpr_base<EvalToTemp<ArgType> >::type
+{
+ public:
+
+ typedef typename dense_xpr_base<EvalToTemp>::type Base;
+ EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp)
+
+ EvalToTemp(const ArgType& arg)
+ : m_arg(arg)
+ { }
+
+ const ArgType& arg() const
+ {
+ return m_arg;
+ }
+
+ Index rows() const
+ {
+ return m_arg.rows();
+ }
+
+ Index cols() const
+ {
+ return m_arg.cols();
+ }
+
+ private:
+ const ArgType& m_arg;
+};
+
+template<typename ArgType>
+struct evaluator_impl<EvalToTemp<ArgType> >
+{
+ typedef EvalToTemp<ArgType> XprType;
+ typedef typename ArgType::PlainObject PlainObject;
+
+ evaluator_impl(const XprType& xpr)
+ : m_result(xpr.rows(), xpr.cols()), m_resultImpl(m_result)
+ {
+ // TODO we should simply do m_result(xpr.arg());
+ call_dense_assignment_loop(m_result, xpr.arg());
+ }
+
+ // This constructor is used when nesting an EvalTo evaluator in another evaluator
+ evaluator_impl(const ArgType& arg)
+ : m_result(arg.rows(), arg.cols()), m_resultImpl(m_result)
+ {
+ // TODO we should simply do m_result(xpr.arg());
+ call_dense_assignment_loop(m_result, arg);
+ }
+
+ typedef typename PlainObject::Index Index;
+ typedef typename PlainObject::Scalar Scalar;
+ typedef typename PlainObject::CoeffReturnType CoeffReturnType;
+ typedef typename PlainObject::PacketScalar PacketScalar;
+ typedef typename PlainObject::PacketReturnType PacketReturnType;
+
+ // All other functions are forwarded to m_resultImpl
+
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_resultImpl.coeff(row, col);
+ }
+
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_resultImpl.coeff(index);
+ }
+
+ Scalar& coeffRef(Index row, Index col)
+ {
+ return m_resultImpl.coeffRef(row, col);
+ }
+
+ Scalar& coeffRef(Index index)
+ {
+ return m_resultImpl.coeffRef(index);
+ }
+
+ template<int LoadMode>
+ PacketReturnType packet(Index row, Index col) const
+ {
+ return m_resultImpl.template packet<LoadMode>(row, col);
+ }
+
+ template<int LoadMode>
+ PacketReturnType packet(Index index) const
+ {
+ return m_resultImpl.packet<LoadMode>(index);
+ }
+
+ template<int StoreMode>
+ void writePacket(Index row, Index col, const PacketScalar& x)
+ {
+ m_resultImpl.template writePacket<StoreMode>(row, col, x);
+ }
+
+ template<int StoreMode>
+ void writePacket(Index index, const PacketScalar& x)
+ {
+ m_resultImpl.template writePacket<StoreMode>(index, x);
+ }
+
+protected:
+ PlainObject m_result;
+ typename evaluator<PlainObject>::nestedType m_resultImpl;
+};
+
+// -------------------- Transpose --------------------
+
+template<typename ArgType>
+struct evaluator_impl<Transpose<ArgType> >
+ : evaluator_impl_base<Transpose<ArgType> >
+{
+ typedef Transpose<ArgType> XprType;
+
+ evaluator_impl(const XprType& t) : m_argImpl(t.nestedExpression()) {}
+
+ typedef typename XprType::Index Index;
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ typedef typename XprType::PacketScalar PacketScalar;
+ typedef typename XprType::PacketReturnType PacketReturnType;
+
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_argImpl.coeff(col, row);
+ }
+
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_argImpl.coeff(index);
+ }
+
+ Scalar& coeffRef(Index row, Index col)
+ {
+ return m_argImpl.coeffRef(col, row);
+ }
+
+ typename XprType::Scalar& coeffRef(Index index)
+ {
+ return m_argImpl.coeffRef(index);
+ }
+
+ template<int LoadMode>
+ PacketReturnType packet(Index row, Index col) const
+ {
+ return m_argImpl.template packet<LoadMode>(col, row);
+ }
+
+ template<int LoadMode>
+ PacketReturnType packet(Index index) const
+ {
+ return m_argImpl.template packet<LoadMode>(index);
+ }
+
+ template<int StoreMode>
+ void writePacket(Index row, Index col, const PacketScalar& x)
+ {
+ m_argImpl.template writePacket<StoreMode>(col, row, x);
+ }
+
+ template<int StoreMode>
+ void writePacket(Index index, const PacketScalar& x)
+ {
+ m_argImpl.template writePacket<StoreMode>(index, x);
+ }
+
+protected:
+ typename evaluator<ArgType>::nestedType m_argImpl;
+};
+
+// -------------------- CwiseNullaryOp --------------------
+
+template<typename NullaryOp, typename PlainObjectType>
+struct evaluator_impl<CwiseNullaryOp<NullaryOp,PlainObjectType> >
+{
+ typedef CwiseNullaryOp<NullaryOp,PlainObjectType> XprType;
+
+ evaluator_impl(const XprType& n)
+ : m_functor(n.functor())
+ { }
+
+ typedef typename XprType::Index Index;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ typedef typename XprType::PacketScalar PacketScalar;
+
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_functor(row, col);
+ }
+
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_functor(index);
+ }
+
+ template<int LoadMode>
+ PacketScalar packet(Index row, Index col) const
+ {
+ return m_functor.packetOp(row, col);
+ }
+
+ template<int LoadMode>
+ PacketScalar packet(Index index) const
+ {
+ return m_functor.packetOp(index);
+ }
+
+protected:
+ const NullaryOp m_functor;
+};
+
+// -------------------- CwiseUnaryOp --------------------
+
+template<typename UnaryOp, typename ArgType>
+struct evaluator_impl<CwiseUnaryOp<UnaryOp, ArgType> >
+{
+ typedef CwiseUnaryOp<UnaryOp, ArgType> XprType;
+
+ evaluator_impl(const XprType& op)
+ : m_functor(op.functor()),
+ m_argImpl(op.nestedExpression())
+ { }
+
+ typedef typename XprType::Index Index;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ typedef typename XprType::PacketScalar PacketScalar;
+
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_functor(m_argImpl.coeff(row, col));
+ }
+
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_functor(m_argImpl.coeff(index));
+ }
+
+ template<int LoadMode>
+ PacketScalar packet(Index row, Index col) const
+ {
+ return m_functor.packetOp(m_argImpl.template packet<LoadMode>(row, col));
+ }
+
+ template<int LoadMode>
+ PacketScalar packet(Index index) const
+ {
+ return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index));
+ }
+
+protected:
+ const UnaryOp m_functor;
+ typename evaluator<ArgType>::nestedType m_argImpl;
+};
+
+// -------------------- CwiseBinaryOp --------------------
+
+template<typename BinaryOp, typename Lhs, typename Rhs>
+struct evaluator_impl<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
+{
+ typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
+
+ evaluator_impl(const XprType& xpr)
+ : m_functor(xpr.functor()),
+ m_lhsImpl(xpr.lhs()),
+ m_rhsImpl(xpr.rhs())
+ { }
+
+ typedef typename XprType::Index Index;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ typedef typename XprType::PacketScalar PacketScalar;
+
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col));
+ }
+
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index));
+ }
+
+ template<int LoadMode>
+ PacketScalar packet(Index row, Index col) const
+ {
+ return m_functor.packetOp(m_lhsImpl.template packet<LoadMode>(row, col),
+ m_rhsImpl.template packet<LoadMode>(row, col));
+ }
+
+ template<int LoadMode>
+ PacketScalar packet(Index index) const
+ {
+ return m_functor.packetOp(m_lhsImpl.template packet<LoadMode>(index),
+ m_rhsImpl.template packet<LoadMode>(index));
+ }
+
+protected:
+ const BinaryOp m_functor;
+ typename evaluator<Lhs>::nestedType m_lhsImpl;
+ typename evaluator<Rhs>::nestedType m_rhsImpl;
+};
+
+// -------------------- CwiseUnaryView --------------------
+
+template<typename UnaryOp, typename ArgType>
+struct evaluator_impl<CwiseUnaryView<UnaryOp, ArgType> >
+ : evaluator_impl_base<CwiseUnaryView<UnaryOp, ArgType> >
+{
+ typedef CwiseUnaryView<UnaryOp, ArgType> XprType;
+
+ evaluator_impl(const XprType& op)
+ : m_unaryOp(op.functor()),
+ m_argImpl(op.nestedExpression())
+ { }
+
+ typedef typename XprType::Index Index;
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_unaryOp(m_argImpl.coeff(row, col));
+ }
+
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_unaryOp(m_argImpl.coeff(index));
+ }
+
+ Scalar& coeffRef(Index row, Index col)
+ {
+ return m_unaryOp(m_argImpl.coeffRef(row, col));
+ }
+
+ Scalar& coeffRef(Index index)
+ {
+ return m_unaryOp(m_argImpl.coeffRef(index));
+ }
+
+protected:
+ const UnaryOp m_unaryOp;
+ typename evaluator<ArgType>::nestedType m_argImpl;
+};
+
+// -------------------- Map --------------------
+
+template<typename Derived, int AccessorsType>
+struct evaluator_impl<MapBase<Derived, AccessorsType> >
+ : evaluator_impl_base<Derived>
+{
+ typedef MapBase<Derived, AccessorsType> MapType;
+ typedef Derived XprType;
+
+ typedef typename XprType::PointerType PointerType;
+ typedef typename XprType::Index Index;
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ typedef typename XprType::PacketScalar PacketScalar;
+ typedef typename XprType::PacketReturnType PacketReturnType;
+
+ evaluator_impl(const XprType& map)
+ : m_data(const_cast<PointerType>(map.data())),
+ m_rowStride(map.rowStride()),
+ m_colStride(map.colStride())
+ { }
+
+ enum {
+ RowsAtCompileTime = XprType::RowsAtCompileTime
+ };
+
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_data[col * m_colStride + row * m_rowStride];
+ }
+
+ CoeffReturnType coeff(Index index) const
+ {
+ return coeff(RowsAtCompileTime == 1 ? 0 : index,
+ RowsAtCompileTime == 1 ? index : 0);
+ }
+
+ Scalar& coeffRef(Index row, Index col)
+ {
+ return m_data[col * m_colStride + row * m_rowStride];
+ }
+
+ Scalar& coeffRef(Index index)
+ {
+ return coeffRef(RowsAtCompileTime == 1 ? 0 : index,
+ RowsAtCompileTime == 1 ? index : 0);
+ }
+
+ template<int LoadMode>
+ PacketReturnType packet(Index row, Index col) const
+ {
+ PointerType ptr = m_data + row * m_rowStride + col * m_colStride;
+ return internal::ploadt<PacketScalar, LoadMode>(ptr);
+ }
+
+ template<int LoadMode>
+ PacketReturnType packet(Index index) const
+ {
+ return packet<LoadMode>(RowsAtCompileTime == 1 ? 0 : index,
+ RowsAtCompileTime == 1 ? index : 0);
+ }
+
+ template<int StoreMode>
+ void writePacket(Index row, Index col, const PacketScalar& x)
+ {
+ PointerType ptr = m_data + row * m_rowStride + col * m_colStride;
+ return internal::pstoret<Scalar, PacketScalar, StoreMode>(ptr, x);
+ }
+
+ template<int StoreMode>
+ void writePacket(Index index, const PacketScalar& x)
+ {
+ return writePacket<StoreMode>(RowsAtCompileTime == 1 ? 0 : index,
+ RowsAtCompileTime == 1 ? index : 0,
+ x);
+ }
+
+protected:
+ PointerType m_data;
+ int m_rowStride;
+ int m_colStride;
+};
+
+template<typename PlainObjectType, int MapOptions, typename StrideType>
+struct evaluator_impl<Map<PlainObjectType, MapOptions, StrideType> >
+ : public evaluator_impl<MapBase<Map<PlainObjectType, MapOptions, StrideType> > >
+{
+ typedef Map<PlainObjectType, MapOptions, StrideType> XprType;
+
+ evaluator_impl(const XprType& map)
+ : evaluator_impl<MapBase<XprType> >(map)
+ { }
+};
+
+// -------------------- Block --------------------
+
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel,
+ bool HasDirectAccess = internal::has_direct_access<ArgType>::ret> struct block_evaluator;
+
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+struct evaluator_impl<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
+ : block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel>
+{
+ typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
+ typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
+ evaluator_impl(const XprType& block) : block_evaluator_type(block) {}
+};
+
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAccess*/ false>
+ : evaluator_impl_base<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
+{
+ typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
+
+ block_evaluator(const XprType& block)
+ : m_argImpl(block.nestedExpression()),
+ m_startRow(block.startRow()),
+ m_startCol(block.startCol())
+ { }
+
+ typedef typename XprType::Index Index;
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ typedef typename XprType::PacketScalar PacketScalar;
+ typedef typename XprType::PacketReturnType PacketReturnType;
+
+ enum {
+ RowsAtCompileTime = XprType::RowsAtCompileTime
+ };
+
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col);
+ }
+
+ CoeffReturnType coeff(Index index) const
+ {
+ return coeff(RowsAtCompileTime == 1 ? 0 : index,
+ RowsAtCompileTime == 1 ? index : 0);
+ }
+
+ Scalar& coeffRef(Index row, Index col)
+ {
+ return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col);
+ }
+
+ Scalar& coeffRef(Index index)
+ {
+ return coeffRef(RowsAtCompileTime == 1 ? 0 : index,
+ RowsAtCompileTime == 1 ? index : 0);
+ }
+
+ template<int LoadMode>
+ PacketReturnType packet(Index row, Index col) const
+ {
+ return m_argImpl.template packet<LoadMode>(m_startRow.value() + row, m_startCol.value() + col);
+ }
+
+ template<int LoadMode>
+ PacketReturnType packet(Index index) const
+ {
+ return packet<LoadMode>(RowsAtCompileTime == 1 ? 0 : index,
+ RowsAtCompileTime == 1 ? index : 0);
+ }
+
+ template<int StoreMode>
+ void writePacket(Index row, Index col, const PacketScalar& x)
+ {
+ return m_argImpl.template writePacket<StoreMode>(m_startRow.value() + row, m_startCol.value() + col, x);
+ }
+
+ template<int StoreMode>
+ void writePacket(Index index, const PacketScalar& x)
+ {
+ return writePacket<StoreMode>(RowsAtCompileTime == 1 ? 0 : index,
+ RowsAtCompileTime == 1 ? index : 0,
+ x);
+ }
+
+protected:
+ typename evaluator<ArgType>::nestedType m_argImpl;
+ const variable_if_dynamic<Index, ArgType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
+ const variable_if_dynamic<Index, ArgType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
+};
+
+// TODO: This evaluator does not actually use the child evaluator;
+// all action is via the data() as returned by the Block expression.
+
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAccess */ true>
+ : evaluator_impl<MapBase<Block<ArgType, BlockRows, BlockCols, InnerPanel> > >
+{
+ typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
+
+ block_evaluator(const XprType& block)
+ : evaluator_impl<MapBase<XprType> >(block)
+ { }
+};
+
+
+// -------------------- Select --------------------
+
+template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
+struct evaluator_impl<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
+{
+ typedef Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> XprType;
+
+ evaluator_impl(const XprType& select)
+ : m_conditionImpl(select.conditionMatrix()),
+ m_thenImpl(select.thenMatrix()),
+ m_elseImpl(select.elseMatrix())
+ { }
+
+ typedef typename XprType::Index Index;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ if (m_conditionImpl.coeff(row, col))
+ return m_thenImpl.coeff(row, col);
+ else
+ return m_elseImpl.coeff(row, col);
+ }
+
+ CoeffReturnType coeff(Index index) const
+ {
+ if (m_conditionImpl.coeff(index))
+ return m_thenImpl.coeff(index);
+ else
+ return m_elseImpl.coeff(index);
+ }
+
+protected:
+ typename evaluator<ConditionMatrixType>::nestedType m_conditionImpl;
+ typename evaluator<ThenMatrixType>::nestedType m_thenImpl;
+ typename evaluator<ElseMatrixType>::nestedType m_elseImpl;
+};
+
+
+// -------------------- Replicate --------------------
+
+template<typename ArgType, int RowFactor, int ColFactor>
+struct evaluator_impl<Replicate<ArgType, RowFactor, ColFactor> >
+{
+ typedef Replicate<ArgType, RowFactor, ColFactor> XprType;
+
+ evaluator_impl(const XprType& replicate)
+ : m_argImpl(replicate.nestedExpression()),
+ m_rows(replicate.nestedExpression().rows()),
+ m_cols(replicate.nestedExpression().cols())
+ { }
+
+ typedef typename XprType::Index Index;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ typedef typename XprType::PacketReturnType PacketReturnType;
+
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ // try to avoid using modulo; this is a pure optimization strategy
+ const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
+ : RowFactor==1 ? row
+ : row % m_rows.value();
+ const Index actual_col = internal::traits<XprType>::ColsAtCompileTime==1 ? 0
+ : ColFactor==1 ? col
+ : col % m_cols.value();
+
+ return m_argImpl.coeff(actual_row, actual_col);
+ }
+
+ template<int LoadMode>
+ PacketReturnType packet(Index row, Index col) const
+ {
+ const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
+ : RowFactor==1 ? row
+ : row % m_rows.value();
+ const Index actual_col = internal::traits<XprType>::ColsAtCompileTime==1 ? 0
+ : ColFactor==1 ? col
+ : col % m_cols.value();
+
+ return m_argImpl.template packet<LoadMode>(actual_row, actual_col);
+ }
+
+protected:
+ typename evaluator<ArgType>::nestedType m_argImpl;
+ const variable_if_dynamic<Index, XprType::RowsAtCompileTime> m_rows;
+ const variable_if_dynamic<Index, XprType::ColsAtCompileTime> m_cols;
+};
+
+
+// -------------------- PartialReduxExpr --------------------
+//
+// This is a wrapper around the expression object.
+// TODO: Find out how to write a proper evaluator without duplicating
+// the row() and col() member functions.
+
+template< typename ArgType, typename MemberOp, int Direction>
+struct evaluator_impl<PartialReduxExpr<ArgType, MemberOp, Direction> >
+{
+ typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType;
+
+ evaluator_impl(const XprType expr)
+ : m_expr(expr)
+ { }
+
+ typedef typename XprType::Index Index;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_expr.coeff(row, col);
+ }
+
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_expr.coeff(index);
+ }
+
+protected:
+ const XprType m_expr;
+};
+
+
+// -------------------- MatrixWrapper and ArrayWrapper --------------------
+//
+// evaluator_impl_wrapper_base<T> is a common base class for the
+// MatrixWrapper and ArrayWrapper evaluators.
+
+template<typename XprType>
+struct evaluator_impl_wrapper_base
+ : evaluator_impl_base<XprType>
+{
+ typedef typename remove_all<typename XprType::NestedExpressionType>::type ArgType;
+
+ evaluator_impl_wrapper_base(const ArgType& arg) : m_argImpl(arg) {}
+
+ typedef typename ArgType::Index Index;
+ typedef typename ArgType::Scalar Scalar;
+ typedef typename ArgType::CoeffReturnType CoeffReturnType;
+ typedef typename ArgType::PacketScalar PacketScalar;
+ typedef typename ArgType::PacketReturnType PacketReturnType;
+
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_argImpl.coeff(row, col);
+ }
+
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_argImpl.coeff(index);
+ }
+
+ Scalar& coeffRef(Index row, Index col)
+ {
+ return m_argImpl.coeffRef(row, col);
+ }
+
+ Scalar& coeffRef(Index index)
+ {
+ return m_argImpl.coeffRef(index);
+ }
+
+ template<int LoadMode>
+ PacketReturnType packet(Index row, Index col) const
+ {
+ return m_argImpl.template packet<LoadMode>(row, col);
+ }
+
+ template<int LoadMode>
+ PacketReturnType packet(Index index) const
+ {
+ return m_argImpl.template packet<LoadMode>(index);
+ }
+
+ template<int StoreMode>
+ void writePacket(Index row, Index col, const PacketScalar& x)
+ {
+ m_argImpl.template writePacket<StoreMode>(row, col, x);
+ }
+
+ template<int StoreMode>
+ void writePacket(Index index, const PacketScalar& x)
+ {
+ m_argImpl.template writePacket<StoreMode>(index, x);
+ }
+
+protected:
+ typename evaluator<ArgType>::nestedType m_argImpl;
+};
+
+template<typename TArgType>
+struct evaluator_impl<MatrixWrapper<TArgType> >
+ : evaluator_impl_wrapper_base<MatrixWrapper<TArgType> >
+{
+ typedef MatrixWrapper<TArgType> XprType;
+
+ evaluator_impl(const XprType& wrapper)
+ : evaluator_impl_wrapper_base<MatrixWrapper<TArgType> >(wrapper.nestedExpression())
+ { }
+};
+
+template<typename TArgType>
+struct evaluator_impl<ArrayWrapper<TArgType> >
+ : evaluator_impl_wrapper_base<ArrayWrapper<TArgType> >
+{
+ typedef ArrayWrapper<TArgType> XprType;
+
+ evaluator_impl(const XprType& wrapper)
+ : evaluator_impl_wrapper_base<ArrayWrapper<TArgType> >(wrapper.nestedExpression())
+ { }
+};
+
+
+// -------------------- Reverse --------------------
+
+// defined in Reverse.h:
+template<typename PacketScalar, bool ReversePacket> struct reverse_packet_cond;
+
+template<typename ArgType, int Direction>
+struct evaluator_impl<Reverse<ArgType, Direction> >
+ : evaluator_impl_base<Reverse<ArgType, Direction> >
+{
+ typedef Reverse<ArgType, Direction> XprType;
+ typedef typename XprType::Index Index;
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ typedef typename XprType::PacketScalar PacketScalar;
+ typedef typename XprType::PacketReturnType PacketReturnType;
+
+ enum {
+ PacketSize = internal::packet_traits<Scalar>::size,
+ IsRowMajor = XprType::IsRowMajor,
+ IsColMajor = !IsRowMajor,
+ ReverseRow = (Direction == Vertical) || (Direction == BothDirections),
+ ReverseCol = (Direction == Horizontal) || (Direction == BothDirections),
+ OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1,
+ OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1,
+ ReversePacket = (Direction == BothDirections)
+ || ((Direction == Vertical) && IsColMajor)
+ || ((Direction == Horizontal) && IsRowMajor)
+ };
+ typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet;
+
+ evaluator_impl(const XprType& reverse)
+ : m_argImpl(reverse.nestedExpression()),
+ m_rows(ReverseRow ? reverse.nestedExpression().rows() : 0),
+ m_cols(ReverseCol ? reverse.nestedExpression().cols() : 0)
+ { }
+
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row,
+ ReverseCol ? m_cols.value() - col - 1 : col);
+ }
+
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1);
+ }
+
+ Scalar& coeffRef(Index row, Index col)
+ {
+ return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row,
+ ReverseCol ? m_cols.value() - col - 1 : col);
+ }
+
+ Scalar& coeffRef(Index index)
+ {
+ return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1);
+ }
+
+ template<int LoadMode>
+ PacketScalar packet(Index row, Index col) const
+ {
+ return reverse_packet::run(m_argImpl.template packet<LoadMode>(
+ ReverseRow ? m_rows.value() - row - OffsetRow : row,
+ ReverseCol ? m_cols.value() - col - OffsetCol : col));
+ }
+
+ template<int LoadMode>
+ PacketScalar packet(Index index) const
+ {
+ return preverse(m_argImpl.template packet<LoadMode>(m_rows.value() * m_cols.value() - index - PacketSize));
+ }
+
+ template<int LoadMode>
+ void writePacket(Index row, Index col, const PacketScalar& x)
+ {
+ m_argImpl.template writePacket<LoadMode>(
+ ReverseRow ? m_rows.value() - row - OffsetRow : row,
+ ReverseCol ? m_cols.value() - col - OffsetCol : col,
+ reverse_packet::run(x));
+ }
+
+ template<int LoadMode>
+ void writePacket(Index index, const PacketScalar& x)
+ {
+ m_argImpl.template writePacket<LoadMode>
+ (m_rows.value() * m_cols.value() - index - PacketSize, preverse(x));
+ }
+
+protected:
+ typename evaluator<ArgType>::nestedType m_argImpl;
+
+ // If we do not reverse rows, then we do not need to know the number of rows; same for columns
+ const variable_if_dynamic<Index, ReverseRow ? ArgType::RowsAtCompileTime : 0> m_rows;
+ const variable_if_dynamic<Index, ReverseCol ? ArgType::ColsAtCompileTime : 0> m_cols;
+};
+
+
+// -------------------- Diagonal --------------------
+
+template<typename ArgType, int DiagIndex>
+struct evaluator_impl<Diagonal<ArgType, DiagIndex> >
+ : evaluator_impl_base<Diagonal<ArgType, DiagIndex> >
+{
+ typedef Diagonal<ArgType, DiagIndex> XprType;
+
+ evaluator_impl(const XprType& diagonal)
+ : m_argImpl(diagonal.nestedExpression()),
+ m_index(diagonal.index())
+ { }
+
+ typedef typename XprType::Index Index;
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+
+ CoeffReturnType coeff(Index row, Index) const
+ {
+ return m_argImpl.coeff(row + rowOffset(), row + colOffset());
+ }
+
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_argImpl.coeff(index + rowOffset(), index + colOffset());
+ }
+
+ Scalar& coeffRef(Index row, Index)
+ {
+ return m_argImpl.coeffRef(row + rowOffset(), row + colOffset());
+ }
+
+ Scalar& coeffRef(Index index)
+ {
+ return m_argImpl.coeffRef(index + rowOffset(), index + colOffset());
+ }
+
+protected:
+ typename evaluator<ArgType>::nestedType m_argImpl;
+ const internal::variable_if_dynamicindex<Index, XprType::DiagIndex> m_index;
+
+private:
+ EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); }
+ EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; }
+};
+
+} // namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_COREEVALUATORS_H
diff --git a/third_party/eigen3/Eigen/src/Core/CoreIterators.h b/third_party/eigen3/Eigen/src/Core/CoreIterators.h
new file mode 100644
index 0000000000..6da4683d2c
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/CoreIterators.h
@@ -0,0 +1,61 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COREITERATORS_H
+#define EIGEN_COREITERATORS_H
+
+namespace Eigen {
+
+/* This file contains the respective InnerIterator definition of the expressions defined in Eigen/Core
+ */
+
+/** \ingroup SparseCore_Module
+ * \class InnerIterator
+ * \brief An InnerIterator allows to loop over the element of a sparse (or dense) matrix or expression
+ *
+ * todo
+ */
+
+// generic version for dense matrix and expressions
+template<typename Derived> class DenseBase<Derived>::InnerIterator
+{
+ protected:
+ typedef typename Derived::Scalar Scalar;
+ typedef typename Derived::Index Index;
+
+ enum { IsRowMajor = (Derived::Flags&RowMajorBit)==RowMajorBit };
+ public:
+ EIGEN_STRONG_INLINE InnerIterator(const Derived& expr, Index outer)
+ : m_expression(expr), m_inner(0), m_outer(outer), m_end(expr.innerSize())
+ {}
+
+ EIGEN_STRONG_INLINE Scalar value() const
+ {
+ return (IsRowMajor) ? m_expression.coeff(m_outer, m_inner)
+ : m_expression.coeff(m_inner, m_outer);
+ }
+
+ EIGEN_STRONG_INLINE InnerIterator& operator++() { m_inner++; return *this; }
+
+ EIGEN_STRONG_INLINE Index index() const { return m_inner; }
+ inline Index row() const { return IsRowMajor ? m_outer : index(); }
+ inline Index col() const { return IsRowMajor ? index() : m_outer; }
+
+ EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; }
+
+ protected:
+ const Derived& m_expression;
+ Index m_inner;
+ const Index m_outer;
+ const Index m_end;
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_COREITERATORS_H
diff --git a/third_party/eigen3/Eigen/src/Core/CwiseBinaryOp.h b/third_party/eigen3/Eigen/src/Core/CwiseBinaryOp.h
new file mode 100644
index 0000000000..e20daacc8c
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/CwiseBinaryOp.h
@@ -0,0 +1,238 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CWISE_BINARY_OP_H
+#define EIGEN_CWISE_BINARY_OP_H
+
+namespace Eigen {
+
+/** \class CwiseBinaryOp
+ * \ingroup Core_Module
+ *
+ * \brief Generic expression where a coefficient-wise binary operator is applied to two expressions
+ *
+ * \param BinaryOp template functor implementing the operator
+ * \param Lhs the type of the left-hand side
+ * \param Rhs the type of the right-hand side
+ *
+ * This class represents an expression where a coefficient-wise binary operator is applied to two expressions.
+ * It is the return type of binary operators, by which we mean only those binary operators where
+ * both the left-hand side and the right-hand side are Eigen expressions.
+ * For example, the return type of matrix1+matrix2 is a CwiseBinaryOp.
+ *
+ * Most of the time, this is the only way that it is used, so you typically don't have to name
+ * CwiseBinaryOp types explicitly.
+ *
+ * \sa MatrixBase::binaryExpr(const MatrixBase<OtherDerived> &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp
+ */
+
+namespace internal {
+template<typename BinaryOp, typename Lhs, typename Rhs>
+struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
+{
+ // we must not inherit from traits<Lhs> since it has
+ // the potential to cause problems with MSVC
+ typedef typename remove_all<Lhs>::type Ancestor;
+ typedef typename traits<Ancestor>::XprKind XprKind;
+ enum {
+ RowsAtCompileTime = traits<Ancestor>::RowsAtCompileTime,
+ ColsAtCompileTime = traits<Ancestor>::ColsAtCompileTime,
+ MaxRowsAtCompileTime = traits<Ancestor>::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = traits<Ancestor>::MaxColsAtCompileTime
+ };
+
+ // even though we require Lhs and Rhs to have the same scalar type (see CwiseBinaryOp constructor),
+ // we still want to handle the case when the result type is different.
+ typedef typename result_of<
+ BinaryOp(
+ typename Lhs::Scalar,
+ typename Rhs::Scalar
+ )
+ >::type Scalar;
+ typedef typename promote_storage_type<typename traits<Lhs>::StorageKind,
+ typename traits<Rhs>::StorageKind>::ret StorageKind;
+ typedef typename promote_index_type<typename traits<Lhs>::Index,
+ typename traits<Rhs>::Index>::type Index;
+ typedef typename Lhs::Nested LhsNested;
+ typedef typename Rhs::Nested RhsNested;
+ typedef typename remove_reference<LhsNested>::type _LhsNested;
+ typedef typename remove_reference<RhsNested>::type _RhsNested;
+ enum {
+ LhsCoeffReadCost = _LhsNested::CoeffReadCost,
+ RhsCoeffReadCost = _RhsNested::CoeffReadCost,
+ LhsFlags = _LhsNested::Flags,
+ RhsFlags = _RhsNested::Flags,
+ SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
+ StorageOrdersAgree = (int(Lhs::Flags)&RowMajorBit)==(int(Rhs::Flags)&RowMajorBit),
+ Flags0 = (int(LhsFlags) | int(RhsFlags)) & (
+ HereditaryBits
+ | (int(LhsFlags) & int(RhsFlags) &
+ ( AlignedBit
+ | (StorageOrdersAgree ? LinearAccessBit : 0)
+ | (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
+ )
+ )
+ ),
+ Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit),
+ CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + functor_traits<BinaryOp>::Cost
+ };
+};
+} // end namespace internal
+
+// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor
+// that would take two operands of different types. If there were such an example, then this check should be
+// moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as
+// currently they take only one typename Scalar template parameter.
+// It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths.
+// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to
+// add together a float matrix and a double matrix.
+#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \
+ EIGEN_STATIC_ASSERT((internal::functor_is_product_like<BINOP>::ret \
+ ? int(internal::scalar_product_traits<LHS, RHS>::Defined) \
+ : int(internal::is_same<LHS, RHS>::value)), \
+ YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+
+template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
+class CwiseBinaryOpImpl;
+
+template<typename BinaryOp, typename Lhs, typename Rhs>
+class CwiseBinaryOp : internal::no_assignment_operator,
+ public CwiseBinaryOpImpl<
+ BinaryOp, Lhs, Rhs,
+ typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
+ typename internal::traits<Rhs>::StorageKind>::ret>
+{
+ public:
+
+ typedef typename CwiseBinaryOpImpl<
+ BinaryOp, Lhs, Rhs,
+ typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
+ typename internal::traits<Rhs>::StorageKind>::ret>::Base Base;
+ EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp)
+
+ typedef typename internal::nested<Lhs>::type LhsNested;
+ typedef typename internal::nested<Rhs>::type RhsNested;
+ typedef typename internal::remove_reference<LhsNested>::type _LhsNested;
+ typedef typename internal::remove_reference<RhsNested>::type _RhsNested;
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, const BinaryOp& func = BinaryOp())
+ : m_lhs(aLhs), m_rhs(aRhs), m_functor(func)
+ {
+ EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename Rhs::Scalar);
+ // require the sizes to match
+ EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs, Rhs)
+ eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols());
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index rows() const {
+ // return the fixed size type if available to enable compile time optimizations
+ if (internal::traits<typename internal::remove_all<LhsNested>::type>::RowsAtCompileTime==Dynamic)
+ return m_rhs.rows();
+ else
+ return m_lhs.rows();
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index cols() const {
+ // return the fixed size type if available to enable compile time optimizations
+ if (internal::traits<typename internal::remove_all<LhsNested>::type>::ColsAtCompileTime==Dynamic)
+ return m_rhs.cols();
+ else
+ return m_lhs.cols();
+ }
+
+ /** \returns the left hand side nested expression */
+ EIGEN_DEVICE_FUNC
+ const _LhsNested& lhs() const { return m_lhs; }
+ /** \returns the right hand side nested expression */
+ EIGEN_DEVICE_FUNC
+ const _RhsNested& rhs() const { return m_rhs; }
+ /** \returns the functor representing the binary operation */
+ EIGEN_DEVICE_FUNC
+ const BinaryOp& functor() const { return m_functor; }
+
+ protected:
+ LhsNested m_lhs;
+ RhsNested m_rhs;
+ const BinaryOp m_functor;
+};
+
+template<typename BinaryOp, typename Lhs, typename Rhs>
+class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Dense>
+ : public internal::dense_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
+{
+ typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> Derived;
+ public:
+
+ typedef typename internal::dense_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE( Derived )
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
+ {
+ return derived().functor()(derived().lhs().coeff(rowId, colId),
+ derived().rhs().coeff(rowId, colId));
+ }
+
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const
+ {
+ return derived().functor().packetOp(derived().lhs().template packet<LoadMode>(rowId, colId),
+ derived().rhs().template packet<LoadMode>(rowId, colId));
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
+ {
+ return derived().functor()(derived().lhs().coeff(index),
+ derived().rhs().coeff(index));
+ }
+
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
+ {
+ return derived().functor().packetOp(derived().lhs().template packet<LoadMode>(index),
+ derived().rhs().template packet<LoadMode>(index));
+ }
+};
+
+/** replaces \c *this by \c *this - \a other.
+ *
+ * \returns a reference to \c *this
+ */
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_STRONG_INLINE Derived &
+MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)
+{
+ SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, Derived, OtherDerived> tmp(derived());
+ tmp = other.derived();
+ return derived();
+}
+
+/** replaces \c *this by \c *this + \a other.
+ *
+ * \returns a reference to \c *this
+ */
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_STRONG_INLINE Derived &
+MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
+{
+ SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, Derived, OtherDerived> tmp(derived());
+ tmp = other.derived();
+ return derived();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_CWISE_BINARY_OP_H
+
diff --git a/third_party/eigen3/Eigen/src/Core/CwiseNullaryOp.h b/third_party/eigen3/Eigen/src/Core/CwiseNullaryOp.h
new file mode 100644
index 0000000000..1243831142
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/CwiseNullaryOp.h
@@ -0,0 +1,875 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CWISE_NULLARY_OP_H
+#define EIGEN_CWISE_NULLARY_OP_H
+
+namespace Eigen {
+
+/** \class CwiseNullaryOp
+ * \ingroup Core_Module
+ *
+ * \brief Generic expression of a matrix where all coefficients are defined by a functor
+ *
+ * \param NullaryOp template functor implementing the operator
+ * \param PlainObjectType the underlying plain matrix/array type
+ *
+ * This class represents an expression of a generic nullary operator.
+ * It is the return type of the Ones(), Zero(), Constant(), Identity() and Random() methods,
+ * and most of the time this is the only way it is used.
+ *
+ * However, if you want to write a function returning such an expression, you
+ * will need to use this class.
+ *
+ * \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr()
+ */
+
+namespace internal {
+template<typename NullaryOp, typename PlainObjectType>
+struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType>
+{
+ enum {
+ Flags = (traits<PlainObjectType>::Flags
+ & ( HereditaryBits
+ | (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0)
+ | (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0)))
+ | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit),
+ CoeffReadCost = functor_traits<NullaryOp>::Cost
+ };
+};
+}
+
+template<typename NullaryOp, typename PlainObjectType>
+class CwiseNullaryOp : internal::no_assignment_operator,
+ public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type
+{
+ public:
+
+ typedef typename internal::dense_xpr_base<CwiseNullaryOp>::type Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp)
+
+ EIGEN_DEVICE_FUNC
+ CwiseNullaryOp(Index nbRows, Index nbCols, const NullaryOp& func = NullaryOp())
+ : m_rows(nbRows), m_cols(nbCols), m_functor(func)
+ {
+ eigen_assert(nbRows >= 0
+ && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == nbRows)
+ && nbCols >= 0
+ && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == nbCols));
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index rows() const { return m_rows.value(); }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index cols() const { return m_cols.value(); }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
+ {
+ return m_functor(rowId, colId);
+ }
+
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const
+ {
+ return m_functor.packetOp(rowId, colId);
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
+ {
+ return m_functor(index);
+ }
+
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
+ {
+ return m_functor.packetOp(index);
+ }
+
+ /** \returns the functor representing the nullary operation */
+ EIGEN_DEVICE_FUNC
+ const NullaryOp& functor() const { return m_functor; }
+
+ protected:
+ const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
+ const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
+ const NullaryOp m_functor;
+};
+
+
+/** \returns an expression of a matrix defined by a custom functor \a func
+ *
+ * The parameters \a rows and \a cols are the number of rows and of columns of
+ * the returned matrix. Must be compatible with this MatrixBase type.
+ *
+ * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
+ * it is redundant to pass \a rows and \a cols as arguments, so Zero() should be used
+ * instead.
+ *
+ * The template parameter \a CustomNullaryOp is the type of the functor.
+ *
+ * \sa class CwiseNullaryOp
+ */
+template<typename Derived>
+template<typename CustomNullaryOp>
+EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, Derived>
+DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func)
+{
+ return CwiseNullaryOp<CustomNullaryOp, Derived>(rows, cols, func);
+}
+
+/** \returns an expression of a matrix defined by a custom functor \a func
+ *
+ * The parameter \a size is the size of the returned vector.
+ * Must be compatible with this MatrixBase type.
+ *
+ * \only_for_vectors
+ *
+ * This variant is meant to be used for dynamic-size vector types. For fixed-size types,
+ * it is redundant to pass \a size as argument, so Zero() should be used
+ * instead.
+ *
+ * The template parameter \a CustomNullaryOp is the type of the functor.
+ *
+ * Here is an example with C++11 random generators: \include random_cpp11.cpp
+ * Output: \verbinclude random_cpp11.out
+ *
+ * \sa class CwiseNullaryOp
+ */
+template<typename Derived>
+template<typename CustomNullaryOp>
+EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, Derived>
+DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ if(RowsAtCompileTime == 1) return CwiseNullaryOp<CustomNullaryOp, Derived>(1, size, func);
+ else return CwiseNullaryOp<CustomNullaryOp, Derived>(size, 1, func);
+}
+
+/** \returns an expression of a matrix defined by a custom functor \a func
+ *
+ * This variant is only for fixed-size DenseBase types. For dynamic-size types, you
+ * need to use the variants taking size arguments.
+ *
+ * The template parameter \a CustomNullaryOp is the type of the functor.
+ *
+ * \sa class CwiseNullaryOp
+ */
+template<typename Derived>
+template<typename CustomNullaryOp>
+EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, Derived>
+DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
+{
+ return CwiseNullaryOp<CustomNullaryOp, Derived>(RowsAtCompileTime, ColsAtCompileTime, func);
+}
+
+/** \returns an expression of a constant matrix of value \a value
+ *
+ * The parameters \a nbRows and \a nbCols are the number of rows and of columns of
+ * the returned matrix. Must be compatible with this DenseBase type.
+ *
+ * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
+ * it is redundant to pass \a nbRows and \a nbCols as arguments, so Zero() should be used
+ * instead.
+ *
+ * The template parameter \a CustomNullaryOp is the type of the functor.
+ *
+ * \sa class CwiseNullaryOp
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Constant(Index nbRows, Index nbCols, const Scalar& value)
+{
+ return DenseBase<Derived>::NullaryExpr(nbRows, nbCols, internal::scalar_constant_op<Scalar>(value));
+}
+
+/** \returns an expression of a constant matrix of value \a value
+ *
+ * The parameter \a size is the size of the returned vector.
+ * Must be compatible with this DenseBase type.
+ *
+ * \only_for_vectors
+ *
+ * This variant is meant to be used for dynamic-size vector types. For fixed-size types,
+ * it is redundant to pass \a size as argument, so Zero() should be used
+ * instead.
+ *
+ * The template parameter \a CustomNullaryOp is the type of the functor.
+ *
+ * \sa class CwiseNullaryOp
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Constant(Index size, const Scalar& value)
+{
+ return DenseBase<Derived>::NullaryExpr(size, internal::scalar_constant_op<Scalar>(value));
+}
+
+/** \returns an expression of a constant matrix of value \a value
+ *
+ * This variant is only for fixed-size DenseBase types. For dynamic-size types, you
+ * need to use the variants taking size arguments.
+ *
+ * The template parameter \a CustomNullaryOp is the type of the functor.
+ *
+ * \sa class CwiseNullaryOp
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Constant(const Scalar& value)
+{
+ EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
+ return DenseBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_constant_op<Scalar>(value));
+}
+
+/**
+ * \brief Sets a linearly space vector.
+ *
+ * The function generates 'size' equally spaced values in the closed interval [low,high].
+ * This particular version of LinSpaced() uses sequential access, i.e. vector access is
+ * assumed to be a(0), a(1), ..., a(size). This assumption allows for better vectorization
+ * and yields faster code than the random access version.
+ *
+ * When size is set to 1, a vector of length 1 containing 'high' is returned.
+ *
+ * \only_for_vectors
+ *
+ * Example: \include DenseBase_LinSpaced_seq.cpp
+ * Output: \verbinclude DenseBase_LinSpaced_seq.out
+ *
+ * \sa setLinSpaced(Index,const Scalar&,const Scalar&), LinSpaced(Index,Scalar,Scalar), CwiseNullaryOp
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturnType
+DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,false>(low,high,size));
+}
+
+/**
+ * \copydoc DenseBase::LinSpaced(Sequential_t, Index, const Scalar&, const Scalar&)
+ * Special version for fixed size types which does not require the size parameter.
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturnType
+DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
+ return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,false>(low,high,Derived::SizeAtCompileTime));
+}
+
+/**
+ * \brief Sets a linearly space vector.
+ *
+ * The function generates 'size' equally spaced values in the closed interval [low,high].
+ * When size is set to 1, a vector of length 1 containing 'high' is returned.
+ *
+ * \only_for_vectors
+ *
+ * Example: \include DenseBase_LinSpaced.cpp
+ * Output: \verbinclude DenseBase_LinSpaced.out
+ *
+ * \sa setLinSpaced(Index,const Scalar&,const Scalar&), LinSpaced(Sequential_t,Index,const Scalar&,const Scalar&,Index), CwiseNullaryOp
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
+DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,true>(low,high,size));
+}
+
+/**
+ * \copydoc DenseBase::LinSpaced(Index, const Scalar&, const Scalar&)
+ * Special version for fixed size types which does not require the size parameter.
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
+DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
+ return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,true>(low,high,Derived::SizeAtCompileTime));
+}
+
+/** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */
+template<typename Derived>
+bool DenseBase<Derived>::isApproxToConstant
+(const Scalar& val, const RealScalar& prec) const
+{
+ for(Index j = 0; j < cols(); ++j)
+ for(Index i = 0; i < rows(); ++i)
+ if(!internal::isApprox(this->coeff(i, j), val, prec))
+ return false;
+ return true;
+}
+
+/** This is just an alias for isApproxToConstant().
+ *
+ * \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */
+template<typename Derived>
+bool DenseBase<Derived>::isConstant
+(const Scalar& val, const RealScalar& prec) const
+{
+ return isApproxToConstant(val, prec);
+}
+
+/** Alias for setConstant(): sets all coefficients in this expression to \a val.
+ *
+ * \sa setConstant(), Constant(), class CwiseNullaryOp
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
+{
+ setConstant(val);
+}
+
+/** Sets all coefficients in this expression to \a value.
+ *
+ * \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes()
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
+{
+ return derived() = Constant(rows(), cols(), val);
+}
+
+/** Resizes to the given \a size, and sets all coefficients in this expression to the given \a value.
+ *
+ * \only_for_vectors
+ *
+ * Example: \include Matrix_setConstant_int.cpp
+ * Output: \verbinclude Matrix_setConstant_int.out
+ *
+ * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
+{
+ resize(size);
+ return setConstant(val);
+}
+
+/** Resizes to the given size, and sets all coefficients in this expression to the given \a value.
+ *
+ * \param nbRows the new number of rows
+ * \param nbCols the new number of columns
+ * \param val the value to which all coefficients are set
+ *
+ * Example: \include Matrix_setConstant_int_int.cpp
+ * Output: \verbinclude Matrix_setConstant_int_int.out
+ *
+ * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setConstant(Index nbRows, Index nbCols, const Scalar& val)
+{
+ resize(nbRows, nbCols);
+ return setConstant(val);
+}
+
+/**
+ * \brief Sets a linearly space vector.
+ *
+ * The function generates 'size' equally spaced values in the closed interval [low,high].
+ * When size is set to 1, a vector of length 1 containing 'high' is returned.
+ *
+ * \only_for_vectors
+ *
+ * Example: \include DenseBase_setLinSpaced.cpp
+ * Output: \verbinclude DenseBase_setLinSpaced.out
+ *
+ * \sa CwiseNullaryOp
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,false>(low,high,newSize));
+}
+
+/**
+ * \brief Sets a linearly space vector.
+ *
+ * The function fill *this with equally spaced values in the closed interval [low,high].
+ * When size is set to 1, a vector of length 1 containing 'high' is returned.
+ *
+ * \only_for_vectors
+ *
+ * \sa setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return setLinSpaced(size(), low, high);
+}
+
+// zero:
+
+/** \returns an expression of a zero matrix.
+ *
+ * The parameters \a rows and \a cols are the number of rows and of columns of
+ * the returned matrix. Must be compatible with this MatrixBase type.
+ *
+ * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
+ * it is redundant to pass \a rows and \a cols as arguments, so Zero() should be used
+ * instead.
+ *
+ * Example: \include MatrixBase_zero_int_int.cpp
+ * Output: \verbinclude MatrixBase_zero_int_int.out
+ *
+ * \sa Zero(), Zero(Index)
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Zero(Index nbRows, Index nbCols)
+{
+ return Constant(nbRows, nbCols, Scalar(0));
+}
+
+/** \returns an expression of a zero vector.
+ *
+ * The parameter \a size is the size of the returned vector.
+ * Must be compatible with this MatrixBase type.
+ *
+ * \only_for_vectors
+ *
+ * This variant is meant to be used for dynamic-size vector types. For fixed-size types,
+ * it is redundant to pass \a size as argument, so Zero() should be used
+ * instead.
+ *
+ * Example: \include MatrixBase_zero_int.cpp
+ * Output: \verbinclude MatrixBase_zero_int.out
+ *
+ * \sa Zero(), Zero(Index,Index)
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Zero(Index size)
+{
+ return Constant(size, Scalar(0));
+}
+
+/** \returns an expression of a fixed-size zero matrix or vector.
+ *
+ * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
+ * need to use the variants taking size arguments.
+ *
+ * Example: \include MatrixBase_zero.cpp
+ * Output: \verbinclude MatrixBase_zero.out
+ *
+ * \sa Zero(Index), Zero(Index,Index)
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Zero()
+{
+ return Constant(Scalar(0));
+}
+
+/** \returns true if *this is approximately equal to the zero matrix,
+ * within the precision given by \a prec.
+ *
+ * Example: \include MatrixBase_isZero.cpp
+ * Output: \verbinclude MatrixBase_isZero.out
+ *
+ * \sa class CwiseNullaryOp, Zero()
+ */
+template<typename Derived>
+bool DenseBase<Derived>::isZero(const RealScalar& prec) const
+{
+ for(Index j = 0; j < cols(); ++j)
+ for(Index i = 0; i < rows(); ++i)
+ if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast<Scalar>(1), prec))
+ return false;
+ return true;
+}
+
+/** Sets all coefficients in this expression to zero.
+ *
+ * Example: \include MatrixBase_setZero.cpp
+ * Output: \verbinclude MatrixBase_setZero.out
+ *
+ * \sa class CwiseNullaryOp, Zero()
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
+{
+ return setConstant(Scalar(0));
+}
+
+/** Resizes to the given \a size, and sets all coefficients in this expression to zero.
+ *
+ * \only_for_vectors
+ *
+ * Example: \include Matrix_setZero_int.cpp
+ * Output: \verbinclude Matrix_setZero_int.out
+ *
+ * \sa DenseBase::setZero(), setZero(Index,Index), class CwiseNullaryOp, DenseBase::Zero()
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setZero(Index newSize)
+{
+ resize(newSize);
+ return setConstant(Scalar(0));
+}
+
+/** Resizes to the given size, and sets all coefficients in this expression to zero.
+ *
+ * \param nbRows the new number of rows
+ * \param nbCols the new number of columns
+ *
+ * Example: \include Matrix_setZero_int_int.cpp
+ * Output: \verbinclude Matrix_setZero_int_int.out
+ *
+ * \sa DenseBase::setZero(), setZero(Index), class CwiseNullaryOp, DenseBase::Zero()
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setZero(Index nbRows, Index nbCols)
+{
+ resize(nbRows, nbCols);
+ return setConstant(Scalar(0));
+}
+
+// ones:
+
+/** \returns an expression of a matrix where all coefficients equal one.
+ *
+ * The parameters \a nbRows and \a nbCols are the number of rows and of columns of
+ * the returned matrix. Must be compatible with this MatrixBase type.
+ *
+ * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
+ * it is redundant to pass \a rows and \a cols as arguments, so Ones() should be used
+ * instead.
+ *
+ * Example: \include MatrixBase_ones_int_int.cpp
+ * Output: \verbinclude MatrixBase_ones_int_int.out
+ *
+ * \sa Ones(), Ones(Index), isOnes(), class Ones
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Ones(Index nbRows, Index nbCols)
+{
+ return Constant(nbRows, nbCols, Scalar(1));
+}
+
+/** \returns an expression of a vector where all coefficients equal one.
+ *
+ * The parameter \a newSize is the size of the returned vector.
+ * Must be compatible with this MatrixBase type.
+ *
+ * \only_for_vectors
+ *
+ * This variant is meant to be used for dynamic-size vector types. For fixed-size types,
+ * it is redundant to pass \a size as argument, so Ones() should be used
+ * instead.
+ *
+ * Example: \include MatrixBase_ones_int.cpp
+ * Output: \verbinclude MatrixBase_ones_int.out
+ *
+ * \sa Ones(), Ones(Index,Index), isOnes(), class Ones
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Ones(Index newSize)
+{
+ return Constant(newSize, Scalar(1));
+}
+
+/** \returns an expression of a fixed-size matrix or vector where all coefficients equal one.
+ *
+ * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
+ * need to use the variants taking size arguments.
+ *
+ * Example: \include MatrixBase_ones.cpp
+ * Output: \verbinclude MatrixBase_ones.out
+ *
+ * \sa Ones(Index), Ones(Index,Index), isOnes(), class Ones
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Ones()
+{
+ return Constant(Scalar(1));
+}
+
+/** \returns true if *this is approximately equal to the matrix where all coefficients
+ * are equal to 1, within the precision given by \a prec.
+ *
+ * Example: \include MatrixBase_isOnes.cpp
+ * Output: \verbinclude MatrixBase_isOnes.out
+ *
+ * \sa class CwiseNullaryOp, Ones()
+ */
+template<typename Derived>
+bool DenseBase<Derived>::isOnes
+(const RealScalar& prec) const
+{
+ return isApproxToConstant(Scalar(1), prec);
+}
+
+/** Sets all coefficients in this expression to one.
+ *
+ * Example: \include MatrixBase_setOnes.cpp
+ * Output: \verbinclude MatrixBase_setOnes.out
+ *
+ * \sa class CwiseNullaryOp, Ones()
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
+{
+ return setConstant(Scalar(1));
+}
+
+/** Resizes to the given \a newSize, and sets all coefficients in this expression to one.
+ *
+ * \only_for_vectors
+ *
+ * Example: \include Matrix_setOnes_int.cpp
+ * Output: \verbinclude Matrix_setOnes_int.out
+ *
+ * \sa MatrixBase::setOnes(), setOnes(Index,Index), class CwiseNullaryOp, MatrixBase::Ones()
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setOnes(Index newSize)
+{
+ resize(newSize);
+ return setConstant(Scalar(1));
+}
+
+/** Resizes to the given size, and sets all coefficients in this expression to one.
+ *
+ * \param nbRows the new number of rows
+ * \param nbCols the new number of columns
+ *
+ * Example: \include Matrix_setOnes_int_int.cpp
+ * Output: \verbinclude Matrix_setOnes_int_int.out
+ *
+ * \sa MatrixBase::setOnes(), setOnes(Index), class CwiseNullaryOp, MatrixBase::Ones()
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setOnes(Index nbRows, Index nbCols)
+{
+ resize(nbRows, nbCols);
+ return setConstant(Scalar(1));
+}
+
+// Identity:
+
+/** \returns an expression of the identity matrix (not necessarily square).
+ *
+ * The parameters \a nbRows and \a nbCols are the number of rows and of columns of
+ * the returned matrix. Must be compatible with this MatrixBase type.
+ *
+ * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
+ * it is redundant to pass \a rows and \a cols as arguments, so Identity() should be used
+ * instead.
+ *
+ * Example: \include MatrixBase_identity_int_int.cpp
+ * Output: \verbinclude MatrixBase_identity_int_int.out
+ *
+ * \sa Identity(), setIdentity(), isIdentity()
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
+MatrixBase<Derived>::Identity(Index nbRows, Index nbCols)
+{
+ return DenseBase<Derived>::NullaryExpr(nbRows, nbCols, internal::scalar_identity_op<Scalar>());
+}
+
+/** \returns an expression of the identity matrix (not necessarily square).
+ *
+ * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
+ * need to use the variant taking size arguments.
+ *
+ * Example: \include MatrixBase_identity.cpp
+ * Output: \verbinclude MatrixBase_identity.out
+ *
+ * \sa Identity(Index,Index), setIdentity(), isIdentity()
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
+MatrixBase<Derived>::Identity()
+{
+ EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
+ return MatrixBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_identity_op<Scalar>());
+}
+
+/** \returns true if *this is approximately equal to the identity matrix
+ * (not necessarily square),
+ * within the precision given by \a prec.
+ *
+ * Example: \include MatrixBase_isIdentity.cpp
+ * Output: \verbinclude MatrixBase_isIdentity.out
+ *
+ * \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), setIdentity()
+ */
+template<typename Derived>
+bool MatrixBase<Derived>::isIdentity
+(const RealScalar& prec) const
+{
+ for(Index j = 0; j < cols(); ++j)
+ {
+ for(Index i = 0; i < rows(); ++i)
+ {
+ if(i == j)
+ {
+ if(!internal::isApprox(this->coeff(i, j), static_cast<Scalar>(1), prec))
+ return false;
+ }
+ else
+ {
+ if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast<RealScalar>(1), prec))
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+namespace internal {
+
+template<typename Derived, bool Big = (Derived::SizeAtCompileTime>=16)>
+struct setIdentity_impl
+{
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Derived& run(Derived& m)
+ {
+ return m = Derived::Identity(m.rows(), m.cols());
+ }
+};
+
+template<typename Derived>
+struct setIdentity_impl<Derived, true>
+{
+ typedef typename Derived::Index Index;
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Derived& run(Derived& m)
+ {
+ m.setZero();
+ const Index size = (std::min)(m.rows(), m.cols());
+ for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1);
+ return m;
+ }
+};
+
+} // end namespace internal
+
+/** Writes the identity expression (not necessarily square) into *this.
+ *
+ * Example: \include MatrixBase_setIdentity.cpp
+ * Output: \verbinclude MatrixBase_setIdentity.out
+ *
+ * \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), isIdentity()
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
+{
+ return internal::setIdentity_impl<Derived>::run(derived());
+}
+
+/** \brief Resizes to the given size, and writes the identity expression (not necessarily square) into *this.
+ *
+ * \param nbRows the new number of rows
+ * \param nbCols the new number of columns
+ *
+ * Example: \include Matrix_setIdentity_int_int.cpp
+ * Output: \verbinclude Matrix_setIdentity_int_int.out
+ *
+ * \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity()
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index nbRows, Index nbCols)
+{
+ derived().resize(nbRows, nbCols);
+ return setIdentity();
+}
+
+/** \returns an expression of the i-th unit (basis) vector.
+ *
+ * \only_for_vectors
+ *
+ * \sa MatrixBase::Unit(Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return BasisReturnType(SquareMatrixType::Identity(newSize,newSize), i);
+}
+
+/** \returns an expression of the i-th unit (basis) vector.
+ *
+ * \only_for_vectors
+ *
+ * This variant is for fixed-size vector only.
+ *
+ * \sa MatrixBase::Unit(Index,Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return BasisReturnType(SquareMatrixType::Identity(),i);
+}
+
+/** \returns an expression of the X axis unit vector (1{,0}^*)
+ *
+ * \only_for_vectors
+ *
+ * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX()
+{ return Derived::Unit(0); }
+
+/** \returns an expression of the Y axis unit vector (0,1{,0}^*)
+ *
+ * \only_for_vectors
+ *
+ * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY()
+{ return Derived::Unit(1); }
+
+/** \returns an expression of the Z axis unit vector (0,0,1{,0}^*)
+ *
+ * \only_for_vectors
+ *
+ * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ()
+{ return Derived::Unit(2); }
+
+/** \returns an expression of the W axis unit vector (0,0,0,1)
+ *
+ * \only_for_vectors
+ *
+ * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW()
+{ return Derived::Unit(3); }
+
+} // end namespace Eigen
+
+#endif // EIGEN_CWISE_NULLARY_OP_H
diff --git a/third_party/eigen3/Eigen/src/Core/CwiseUnaryOp.h b/third_party/eigen3/Eigen/src/Core/CwiseUnaryOp.h
new file mode 100644
index 0000000000..aa7df197f9
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/CwiseUnaryOp.h
@@ -0,0 +1,135 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CWISE_UNARY_OP_H
+#define EIGEN_CWISE_UNARY_OP_H
+
+namespace Eigen {
+
+/** \class CwiseUnaryOp
+ * \ingroup Core_Module
+ *
+ * \brief Generic expression where a coefficient-wise unary operator is applied to an expression
+ *
+ * \param UnaryOp template functor implementing the operator
+ * \param XprType the type of the expression to which we are applying the unary operator
+ *
+ * This class represents an expression where a unary operator is applied to an expression.
+ * It is the return type of all operations taking exactly 1 input expression, regardless of the
+ * presence of other inputs such as scalars. For example, the operator* in the expression 3*matrix
+ * is considered unary, because only the right-hand side is an expression, and its
+ * return type is a specialization of CwiseUnaryOp.
+ *
+ * Most of the time, this is the only way that it is used, so you typically don't have to name
+ * CwiseUnaryOp types explicitly.
+ *
+ * \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp
+ */
+
+namespace internal {
+template<typename UnaryOp, typename XprType>
+struct traits<CwiseUnaryOp<UnaryOp, XprType> >
+ : traits<XprType>
+{
+ typedef typename result_of<
+ UnaryOp(typename XprType::Scalar)
+ >::type Scalar;
+ typedef typename XprType::Nested XprTypeNested;
+ typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
+ enum {
+ Flags = _XprTypeNested::Flags & (
+ HereditaryBits | LinearAccessBit | AlignedBit
+ | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
+ CoeffReadCost = _XprTypeNested::CoeffReadCost + functor_traits<UnaryOp>::Cost
+ };
+};
+}
+
+template<typename UnaryOp, typename XprType, typename StorageKind>
+class CwiseUnaryOpImpl;
+
+template<typename UnaryOp, typename XprType>
+class CwiseUnaryOp : internal::no_assignment_operator,
+ public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>
+{
+ public:
+
+ typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
+ EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
+
+ EIGEN_DEVICE_FUNC
+ inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
+ : m_xpr(xpr), m_functor(func) {}
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index rows() const { return m_xpr.rows(); }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index cols() const { return m_xpr.cols(); }
+
+ /** \returns the functor representing the unary operation */
+ EIGEN_DEVICE_FUNC
+ const UnaryOp& functor() const { return m_functor; }
+
+ /** \returns the nested expression */
+ EIGEN_DEVICE_FUNC
+ const typename internal::remove_all<typename XprType::Nested>::type&
+ nestedExpression() const { return m_xpr; }
+
+ /** \returns the nested expression */
+ EIGEN_DEVICE_FUNC
+ typename internal::remove_all<typename XprType::Nested>::type&
+ nestedExpression() { return m_xpr.const_cast_derived(); }
+
+ protected:
+ typename XprType::Nested m_xpr;
+ const UnaryOp m_functor;
+};
+
+// This is the generic implementation for dense storage.
+// It can be used for any expression types implementing the dense concept.
+template<typename UnaryOp, typename XprType>
+class CwiseUnaryOpImpl<UnaryOp,XprType,Dense>
+ : public internal::dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type
+{
+ public:
+
+ typedef CwiseUnaryOp<UnaryOp, XprType> Derived;
+ typedef typename internal::dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
+ {
+ return derived().functor()(derived().nestedExpression().coeff(rowId, colId));
+ }
+
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const
+ {
+ return derived().functor().packetOp(derived().nestedExpression().template packet<LoadMode>(rowId, colId));
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
+ {
+ return derived().functor()(derived().nestedExpression().coeff(index));
+ }
+
+ template<int LoadMode>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
+ {
+ return derived().functor().packetOp(derived().nestedExpression().template packet<LoadMode>(index));
+ }
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_CWISE_UNARY_OP_H
diff --git a/third_party/eigen3/Eigen/src/Core/CwiseUnaryView.h b/third_party/eigen3/Eigen/src/Core/CwiseUnaryView.h
new file mode 100644
index 0000000000..b2638d3265
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/CwiseUnaryView.h
@@ -0,0 +1,139 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CWISE_UNARY_VIEW_H
+#define EIGEN_CWISE_UNARY_VIEW_H
+
+namespace Eigen {
+
+/** \class CwiseUnaryView
+ * \ingroup Core_Module
+ *
+ * \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector
+ *
+ * \param ViewOp template functor implementing the view
+ * \param MatrixType the type of the matrix we are applying the unary operator
+ *
+ * This class represents a lvalue expression of a generic unary view operator of a matrix or a vector.
+ * It is the return type of real() and imag(), and most of the time this is the only way it is used.
+ *
+ * \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp
+ */
+
+namespace internal {
+template<typename ViewOp, typename MatrixType>
+struct traits<CwiseUnaryView<ViewOp, MatrixType> >
+ : traits<MatrixType>
+{
+ typedef typename result_of<
+ ViewOp(typename traits<MatrixType>::Scalar)
+ >::type Scalar;
+ typedef typename MatrixType::Nested MatrixTypeNested;
+ typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
+ enum {
+ Flags = (traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)),
+ CoeffReadCost = traits<_MatrixTypeNested>::CoeffReadCost + functor_traits<ViewOp>::Cost,
+ MatrixTypeInnerStride = inner_stride_at_compile_time<MatrixType>::ret,
+ // need to cast the sizeof's from size_t to int explicitly, otherwise:
+ // "error: no integral type can represent all of the enumerator values
+ InnerStrideAtCompileTime = MatrixTypeInnerStride == Dynamic
+ ? int(Dynamic)
+ : int(MatrixTypeInnerStride) * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar)),
+ OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret == Dynamic
+ ? int(Dynamic)
+ : outer_stride_at_compile_time<MatrixType>::ret * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar))
+ };
+};
+}
+
+template<typename ViewOp, typename MatrixType, typename StorageKind>
+class CwiseUnaryViewImpl;
+
+template<typename ViewOp, typename MatrixType>
+class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename internal::traits<MatrixType>::StorageKind>
+{
+ public:
+
+ typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
+ EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView)
+
+ inline CwiseUnaryView(const MatrixType& mat, const ViewOp& func = ViewOp())
+ : m_matrix(mat), m_functor(func) {}
+
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryView)
+
+ EIGEN_STRONG_INLINE Index rows() const { return m_matrix.rows(); }
+ EIGEN_STRONG_INLINE Index cols() const { return m_matrix.cols(); }
+
+ /** \returns the functor representing unary operation */
+ const ViewOp& functor() const { return m_functor; }
+
+ /** \returns the nested expression */
+ const typename internal::remove_all<typename MatrixType::Nested>::type&
+ nestedExpression() const { return m_matrix; }
+
+ /** \returns the nested expression */
+ typename internal::remove_all<typename MatrixType::Nested>::type&
+ nestedExpression() { return m_matrix.const_cast_derived(); }
+
+ protected:
+ // FIXME changed from MatrixType::Nested because of a weird compilation error with sun CC
+ typename internal::nested<MatrixType>::type m_matrix;
+ ViewOp m_functor;
+};
+
+template<typename ViewOp, typename MatrixType>
+class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
+ : public internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type
+{
+ public:
+
+ typedef CwiseUnaryView<ViewOp, MatrixType> Derived;
+ typedef typename internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type Base;
+
+ EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl)
+
+ inline Scalar* data() { return &coeffRef(0); }
+ inline const Scalar* data() const { return &coeff(0); }
+
+ inline Index innerStride() const
+ {
+ return derived().nestedExpression().innerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
+ }
+
+ inline Index outerStride() const
+ {
+ return derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
+ }
+
+ EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
+ {
+ return derived().functor()(derived().nestedExpression().coeff(row, col));
+ }
+
+ EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
+ {
+ return derived().functor()(derived().nestedExpression().coeff(index));
+ }
+
+ EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)
+ {
+ return derived().functor()(const_cast_derived().nestedExpression().coeffRef(row, col));
+ }
+
+ EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
+ {
+ return derived().functor()(const_cast_derived().nestedExpression().coeffRef(index));
+ }
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_CWISE_UNARY_VIEW_H
diff --git a/third_party/eigen3/Eigen/src/Core/DenseBase.h b/third_party/eigen3/Eigen/src/Core/DenseBase.h
new file mode 100644
index 0000000000..55cec0bc26
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/DenseBase.h
@@ -0,0 +1,561 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_DENSEBASE_H
+#define EIGEN_DENSEBASE_H
+
+namespace Eigen {
+
+namespace internal {
+
+// The index type defined by EIGEN_DEFAULT_DENSE_INDEX_TYPE must be a signed type.
+// This dummy function simply aims at checking that at compile time.
+static inline void check_DenseIndex_is_signed() {
+ EIGEN_STATIC_ASSERT(NumTraits<DenseIndex>::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE);
+}
+
+} // end namespace internal
+
+/** \class DenseBase
+ * \ingroup Core_Module
+ *
+ * \brief Base class for all dense matrices, vectors, and arrays
+ *
+ * This class is the base that is inherited by all dense objects (matrix, vector, arrays,
+ * and related expression types). The common Eigen API for dense objects is contained in this class.
+ *
+ * \tparam Derived is the derived type, e.g., a matrix type or an expression.
+ *
+ * This class can be extended with the help of the plugin mechanism described on the page
+ * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN.
+ *
+ * \sa \ref TopicClassHierarchy
+ */
+template<typename Derived> class DenseBase
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ : public internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
+ typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>
+#else
+ : public DenseCoeffsBase<Derived>
+#endif // not EIGEN_PARSED_BY_DOXYGEN
+{
+ public:
+ using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
+ typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*;
+
+ class InnerIterator;
+
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+
+ /** \brief The type of indices
+ * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE.
+ * \sa \ref TopicPreprocessorDirectives.
+ */
+ typedef typename internal::traits<Derived>::Index Index;
+
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+
+ typedef DenseCoeffsBase<Derived> Base;
+ using Base::derived;
+ using Base::const_cast_derived;
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::rowIndexByOuterInner;
+ using Base::colIndexByOuterInner;
+ using Base::coeff;
+ using Base::coeffByOuterInner;
+ using Base::packet;
+ using Base::packetByOuterInner;
+ using Base::writePacket;
+ using Base::writePacketByOuterInner;
+ using Base::coeffRef;
+ using Base::coeffRefByOuterInner;
+ using Base::copyCoeff;
+ using Base::copyCoeffByOuterInner;
+ using Base::copyPacket;
+ using Base::copyPacketByOuterInner;
+ using Base::operator();
+ using Base::operator[];
+ using Base::x;
+ using Base::y;
+ using Base::z;
+ using Base::w;
+ using Base::stride;
+ using Base::innerStride;
+ using Base::outerStride;
+ using Base::rowStride;
+ using Base::colStride;
+ typedef typename Base::CoeffReturnType CoeffReturnType;
+
+ enum {
+
+ RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
+ /**< The number of rows at compile-time. This is just a copy of the value provided
+ * by the \a Derived type. If a value is not known at compile-time,
+ * it is set to the \a Dynamic constant.
+ * \sa MatrixBase::rows(), MatrixBase::cols(), ColsAtCompileTime, SizeAtCompileTime */
+
+ ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
+ /**< The number of columns at compile-time. This is just a copy of the value provided
+ * by the \a Derived type. If a value is not known at compile-time,
+ * it is set to the \a Dynamic constant.
+ * \sa MatrixBase::rows(), MatrixBase::cols(), RowsAtCompileTime, SizeAtCompileTime */
+
+
+ SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
+ internal::traits<Derived>::ColsAtCompileTime>::ret),
+ /**< This is equal to the number of coefficients, i.e. the number of
+ * rows times the number of columns, or to \a Dynamic if this is not
+ * known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */
+
+ MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
+ /**< This value is equal to the maximum possible number of rows that this expression
+ * might have. If this expression might have an arbitrarily high number of rows,
+ * this value is set to \a Dynamic.
+ *
+ * This value is useful to know when evaluating an expression, in order to determine
+ * whether it is possible to avoid doing a dynamic memory allocation.
+ *
+ * \sa RowsAtCompileTime, MaxColsAtCompileTime, MaxSizeAtCompileTime
+ */
+
+ MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
+ /**< This value is equal to the maximum possible number of columns that this expression
+ * might have. If this expression might have an arbitrarily high number of columns,
+ * this value is set to \a Dynamic.
+ *
+ * This value is useful to know when evaluating an expression, in order to determine
+ * whether it is possible to avoid doing a dynamic memory allocation.
+ *
+ * \sa ColsAtCompileTime, MaxRowsAtCompileTime, MaxSizeAtCompileTime
+ */
+
+ MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
+ internal::traits<Derived>::MaxColsAtCompileTime>::ret),
+ /**< This value is equal to the maximum possible number of coefficients that this expression
+ * might have. If this expression might have an arbitrarily high number of coefficients,
+ * this value is set to \a Dynamic.
+ *
+ * This value is useful to know when evaluating an expression, in order to determine
+ * whether it is possible to avoid doing a dynamic memory allocation.
+ *
+ * \sa SizeAtCompileTime, MaxRowsAtCompileTime, MaxColsAtCompileTime
+ */
+
+ IsVectorAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime == 1
+ || internal::traits<Derived>::MaxColsAtCompileTime == 1,
+ /**< This is set to true if either the number of rows or the number of
+ * columns is known at compile-time to be equal to 1. Indeed, in that case,
+ * we are dealing with a column-vector (if there is only one column) or with
+ * a row-vector (if there is only one row). */
+
+ Flags = internal::traits<Derived>::Flags,
+ /**< This stores expression \ref flags flags which may or may not be inherited by new expressions
+ * constructed from this one. See the \ref flags "list of flags".
+ */
+
+ IsRowMajor = int(Flags) & RowMajorBit, /**< True if this expression has row-major storage order. */
+
+ InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime)
+ : int(IsRowMajor) ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
+
+ CoeffReadCost = internal::traits<Derived>::CoeffReadCost,
+ /**< This is a rough measure of how expensive it is to read one coefficient from
+ * this expression.
+ */
+
+ InnerStrideAtCompileTime = internal::inner_stride_at_compile_time<Derived>::ret,
+ OuterStrideAtCompileTime = internal::outer_stride_at_compile_time<Derived>::ret
+ };
+
+ enum { ThisConstantIsPrivateInPlainObjectBase };
+
+ /** \returns the number of nonzero coefficients which is in practice the number
+ * of stored coefficients. */
+ EIGEN_DEVICE_FUNC
+ inline Index nonZeros() const { return size(); }
+ /** \returns true if either the number of rows or the number of columns is equal to 1.
+ * In other words, this function returns
+ * \code rows()==1 || cols()==1 \endcode
+ * \sa rows(), cols(), IsVectorAtCompileTime. */
+
+ /** \returns the outer size.
+ *
+ * \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension
+ * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of columns for a
+ * column-major matrix, and the number of rows for a row-major matrix. */
+ EIGEN_DEVICE_FUNC
+ Index outerSize() const
+ {
+ return IsVectorAtCompileTime ? 1
+ : int(IsRowMajor) ? this->rows() : this->cols();
+ }
+
+ /** \returns the inner size.
+ *
+ * \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension
+ * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of rows for a
+ * column-major matrix, and the number of columns for a row-major matrix. */
+ EIGEN_DEVICE_FUNC
+ Index innerSize() const
+ {
+ return IsVectorAtCompileTime ? this->size()
+ : int(IsRowMajor) ? this->cols() : this->rows();
+ }
+
+ /** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are
+ * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does
+ * nothing else.
+ */
+ EIGEN_DEVICE_FUNC
+ void resize(Index newSize)
+ {
+ EIGEN_ONLY_USED_FOR_DEBUG(newSize);
+ eigen_assert(newSize == this->size()
+ && "DenseBase::resize() does not actually allow to resize.");
+ }
+ /** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are
+ * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does
+ * nothing else.
+ */
+ EIGEN_DEVICE_FUNC
+ void resize(Index nbRows, Index nbCols)
+ {
+ EIGEN_ONLY_USED_FOR_DEBUG(nbRows);
+ EIGEN_ONLY_USED_FOR_DEBUG(nbCols);
+ eigen_assert(nbRows == this->rows() && nbCols == this->cols()
+ && "DenseBase::resize() does not actually allow to resize.");
+ }
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+
+ /** \internal Represents a matrix with all coefficients equal to one another*/
+ typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
+ /** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */
+ typedef CwiseNullaryOp<internal::linspaced_op<Scalar,false>,Derived> SequentialLinSpacedReturnType;
+ /** \internal Represents a vector with linearly spaced coefficients that allows random access. */
+ typedef CwiseNullaryOp<internal::linspaced_op<Scalar,true>,Derived> RandomAccessLinSpacedReturnType;
+ /** \internal the return type of MatrixBase::eigenvalues() */
+ typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
+
+#endif // not EIGEN_PARSED_BY_DOXYGEN
+
+ /** Copies \a other into *this. \returns a reference to *this. */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator=(const DenseBase<OtherDerived>& other);
+
+ /** Special case of the template operator=, in order to prevent the compiler
+ * from generating a default operator= (issue hit with g++ 4.1)
+ */
+ EIGEN_DEVICE_FUNC
+ Derived& operator=(const DenseBase& other);
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator=(const EigenBase<OtherDerived> &other);
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator+=(const EigenBase<OtherDerived> &other);
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator-=(const EigenBase<OtherDerived> &other);
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator=(const ReturnByValue<OtherDerived>& func);
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ /** Copies \a other into *this without evaluating other. \returns a reference to *this. */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& lazyAssign(const DenseBase<OtherDerived>& other);
+#endif // not EIGEN_PARSED_BY_DOXYGEN
+
+ EIGEN_DEVICE_FUNC
+ CommaInitializer<Derived> operator<< (const Scalar& s);
+
+ template<unsigned int Added,unsigned int Removed>
+ const Flagged<Derived, Added, Removed> flagged() const;
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ CommaInitializer<Derived> operator<< (const DenseBase<OtherDerived>& other);
+
+ EIGEN_DEVICE_FUNC
+ Eigen::Transpose<Derived> transpose();
+ typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
+ EIGEN_DEVICE_FUNC
+ ConstTransposeReturnType transpose() const;
+ EIGEN_DEVICE_FUNC
+ void transposeInPlace();
+#ifndef EIGEN_NO_DEBUG
+ protected:
+ template<typename OtherDerived>
+ void checkTransposeAliasing(const OtherDerived& other) const;
+ public:
+#endif
+
+
+ EIGEN_DEVICE_FUNC static const ConstantReturnType
+ Constant(Index rows, Index cols, const Scalar& value);
+ EIGEN_DEVICE_FUNC static const ConstantReturnType
+ Constant(Index size, const Scalar& value);
+ EIGEN_DEVICE_FUNC static const ConstantReturnType
+ Constant(const Scalar& value);
+
+ EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
+ LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high);
+ EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
+ LinSpaced(Index size, const Scalar& low, const Scalar& high);
+ EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
+ LinSpaced(Sequential_t, const Scalar& low, const Scalar& high);
+ EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
+ LinSpaced(const Scalar& low, const Scalar& high);
+
+ template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
+ static const CwiseNullaryOp<CustomNullaryOp, Derived>
+ NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func);
+ template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
+ static const CwiseNullaryOp<CustomNullaryOp, Derived>
+ NullaryExpr(Index size, const CustomNullaryOp& func);
+ template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
+ static const CwiseNullaryOp<CustomNullaryOp, Derived>
+ NullaryExpr(const CustomNullaryOp& func);
+
+ EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols);
+ EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index size);
+ EIGEN_DEVICE_FUNC static const ConstantReturnType Zero();
+ EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index rows, Index cols);
+ EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index size);
+ EIGEN_DEVICE_FUNC static const ConstantReturnType Ones();
+
+ EIGEN_DEVICE_FUNC void fill(const Scalar& value);
+ EIGEN_DEVICE_FUNC Derived& setConstant(const Scalar& value);
+ EIGEN_DEVICE_FUNC Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
+ EIGEN_DEVICE_FUNC Derived& setLinSpaced(const Scalar& low, const Scalar& high);
+ EIGEN_DEVICE_FUNC Derived& setZero();
+ EIGEN_DEVICE_FUNC Derived& setOnes();
+ EIGEN_DEVICE_FUNC Derived& setRandom();
+
+ template<typename OtherDerived> EIGEN_DEVICE_FUNC
+ bool isApprox(const DenseBase<OtherDerived>& other,
+ const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ EIGEN_DEVICE_FUNC
+ bool isMuchSmallerThan(const RealScalar& other,
+ const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ template<typename OtherDerived> EIGEN_DEVICE_FUNC
+ bool isMuchSmallerThan(const DenseBase<OtherDerived>& other,
+ const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+
+ EIGEN_DEVICE_FUNC bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ EIGEN_DEVICE_FUNC bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ EIGEN_DEVICE_FUNC bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ EIGEN_DEVICE_FUNC bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+
+ inline bool hasNaN() const;
+ inline bool allFinite() const;
+
+ EIGEN_DEVICE_FUNC
+ inline Derived& operator*=(const Scalar& other);
+ EIGEN_DEVICE_FUNC
+ inline Derived& operator/=(const Scalar& other);
+
+ typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
+ /** \returns the matrix or vector obtained by evaluating this expression.
+ *
+ * Notice that in the case of a plain matrix or vector (not an expression) this function just returns
+ * a const reference, in order to avoid a useless copy.
+ */
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE EvalReturnType eval() const
+ {
+ // Even though MSVC does not honor strong inlining when the return type
+ // is a dynamic matrix, we desperately need strong inlining for fixed
+ // size types on MSVC.
+ return typename internal::eval<Derived>::type(derived());
+ }
+
+ /** swaps *this with the expression \a other.
+ *
+ */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ void swap(const DenseBase<OtherDerived>& other,
+ int = OtherDerived::ThisConstantIsPrivateInPlainObjectBase)
+ {
+ SwapWrapper<Derived>(derived()).lazyAssign(other.derived());
+ }
+
+ /** swaps *this with the matrix or array \a other.
+ *
+ */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ void swap(PlainObjectBase<OtherDerived>& other)
+ {
+ SwapWrapper<Derived>(derived()).lazyAssign(other.derived());
+ }
+
+
+ EIGEN_DEVICE_FUNC inline const NestByValue<Derived> nestByValue() const;
+ EIGEN_DEVICE_FUNC inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
+ EIGEN_DEVICE_FUNC inline ForceAlignedAccess<Derived> forceAlignedAccess();
+ template<bool Enable> EIGEN_DEVICE_FUNC
+ inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;
+ template<bool Enable> EIGEN_DEVICE_FUNC
+ inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
+
+ EIGEN_DEVICE_FUNC Scalar sum() const;
+ EIGEN_DEVICE_FUNC Scalar mean() const;
+ EIGEN_DEVICE_FUNC Scalar trace() const;
+
+ EIGEN_DEVICE_FUNC Scalar prod() const;
+
+ EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar minCoeff() const;
+ EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar maxCoeff() const;
+
+ template<typename IndexType> EIGEN_DEVICE_FUNC
+ typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const;
+ template<typename IndexType> EIGEN_DEVICE_FUNC
+ typename internal::traits<Derived>::Scalar maxCoeff(IndexType* row, IndexType* col) const;
+ template<typename IndexType> EIGEN_DEVICE_FUNC
+ typename internal::traits<Derived>::Scalar minCoeff(IndexType* index) const;
+ template<typename IndexType> EIGEN_DEVICE_FUNC
+ typename internal::traits<Derived>::Scalar maxCoeff(IndexType* index) const;
+
+ template<typename BinaryOp>
+ EIGEN_DEVICE_FUNC
+ typename internal::result_of<BinaryOp(typename internal::traits<Derived>::Scalar)>::type
+ redux(const BinaryOp& func) const;
+
+ template<typename Visitor>
+ EIGEN_DEVICE_FUNC
+ void visit(Visitor& func) const;
+
+ inline const WithFormat<Derived> format(const IOFormat& fmt) const;
+
+ /** \returns the unique coefficient of a 1x1 expression */
+ EIGEN_DEVICE_FUNC
+ CoeffReturnType value() const
+ {
+ EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
+ eigen_assert(this->rows() == 1 && this->cols() == 1);
+ return derived().coeff(0,0);
+ }
+
+ bool all() const;
+ bool any() const;
+ Index count() const;
+
+ typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType;
+ typedef const VectorwiseOp<const Derived, Horizontal> ConstRowwiseReturnType;
+ typedef VectorwiseOp<Derived, Vertical> ColwiseReturnType;
+ typedef const VectorwiseOp<const Derived, Vertical> ConstColwiseReturnType;
+
+ ConstRowwiseReturnType rowwise() const;
+ RowwiseReturnType rowwise();
+ ConstColwiseReturnType colwise() const;
+ ColwiseReturnType colwise();
+
+ static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random(Index rows, Index cols);
+ static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random(Index size);
+ static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random();
+
+ template<typename ThenDerived,typename ElseDerived>
+ const Select<Derived,ThenDerived,ElseDerived>
+ select(const DenseBase<ThenDerived>& thenMatrix,
+ const DenseBase<ElseDerived>& elseMatrix) const;
+
+ template<typename ThenDerived>
+ inline const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>
+ select(const DenseBase<ThenDerived>& thenMatrix, const typename ThenDerived::Scalar& elseScalar) const;
+
+ template<typename ElseDerived>
+ inline const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >
+ select(const typename ElseDerived::Scalar& thenScalar, const DenseBase<ElseDerived>& elseMatrix) const;
+
+ template<int p> RealScalar lpNorm() const;
+
+ template<int RowFactor, int ColFactor>
+ const Replicate<Derived,RowFactor,ColFactor> replicate() const;
+ const Replicate<Derived,Dynamic,Dynamic> replicate(Index rowFacor,Index colFactor) const;
+
+ typedef Reverse<Derived, BothDirections> ReverseReturnType;
+ typedef const Reverse<const Derived, BothDirections> ConstReverseReturnType;
+ ReverseReturnType reverse();
+ ConstReverseReturnType reverse() const;
+ void reverseInPlace();
+
+#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase
+# include "../plugins/BlockMethods.h"
+# ifdef EIGEN_DENSEBASE_PLUGIN
+# include EIGEN_DENSEBASE_PLUGIN
+# endif
+// Because of an intra-Google include scanner limitation,
+// third_party/stan cannot define the EIGEN_DENSEBASE_PLUGIN
+// macro
+// as "stan/math/matrix/EigenDenseBaseAddons.hpp". According to
+// ambrose@google.com, this is a known limitation: the include
+// scanner doesn't maintain any preprocessor state about macros,
+// previously visited files, etc. See also //base/stacktrace.cc.
+# ifdef STAN_MATH_MATRIX_EIGEN_DENSEBASE_PLUGIN
+# include "stan/math/matrix/EigenDenseBaseAddons.hpp"
+# endif
+#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
+
+#ifdef EIGEN2_SUPPORT
+
+ Block<Derived> corner(CornerType type, Index cRows, Index cCols);
+ const Block<Derived> corner(CornerType type, Index cRows, Index cCols) const;
+ template<int CRows, int CCols>
+ Block<Derived, CRows, CCols> corner(CornerType type);
+ template<int CRows, int CCols>
+ const Block<Derived, CRows, CCols> corner(CornerType type) const;
+
+#endif // EIGEN2_SUPPORT
+
+
+ // disable the use of evalTo for dense objects with a nice compilation error
+ template<typename Dest>
+ EIGEN_DEVICE_FUNC
+ inline void evalTo(Dest& ) const
+ {
+ EIGEN_STATIC_ASSERT((internal::is_same<Dest,void>::value),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS);
+ }
+
+ protected:
+ /** Default constructor. Do nothing. */
+ EIGEN_DEVICE_FUNC DenseBase()
+ {
+ /* Just checks for self-consistency of the flags.
+ * Only do it when debugging Eigen, as this borders on paranoiac and could slow compilation down
+ */
+#ifdef EIGEN_INTERNAL_DEBUGGING
+ EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, int(IsRowMajor))
+ && EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, int(!IsRowMajor))),
+ INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION)
+#endif
+ }
+
+ private:
+ EIGEN_DEVICE_FUNC explicit DenseBase(int);
+ EIGEN_DEVICE_FUNC DenseBase(int,int);
+ template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit DenseBase(const DenseBase<OtherDerived>&);
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_DENSEBASE_H
diff --git a/third_party/eigen3/Eigen/src/Core/DenseCoeffsBase.h b/third_party/eigen3/Eigen/src/Core/DenseCoeffsBase.h
new file mode 100644
index 0000000000..efabb5e675
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/DenseCoeffsBase.h
@@ -0,0 +1,787 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_DENSECOEFFSBASE_H
+#define EIGEN_DENSECOEFFSBASE_H
+
+namespace Eigen {
+
+namespace internal {
+template<typename T> struct add_const_on_value_type_if_arithmetic
+{
+ typedef typename conditional<is_arithmetic<T>::value, T, typename add_const_on_value_type<T>::type>::type type;
+};
+}
+
+/** \brief Base class providing read-only coefficient access to matrices and arrays.
+ * \ingroup Core_Module
+ * \tparam Derived Type of the derived class
+ * \tparam #ReadOnlyAccessors Constant indicating read-only access
+ *
+ * This class defines the \c operator() \c const function and friends, which can be used to read specific
+ * entries of a matrix or array.
+ *
+ * \sa DenseCoeffsBase<Derived, WriteAccessors>, DenseCoeffsBase<Derived, DirectAccessors>,
+ * \ref TopicClassHierarchy
+ */
+template<typename Derived>
+class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
+{
+ public:
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ typedef typename internal::traits<Derived>::Index Index;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+
+ // Explanation for this CoeffReturnType typedef.
+ // - This is the return type of the coeff() method.
+ // - The LvalueBit means exactly that we can offer a coeffRef() method, which means exactly that we can get references
+ // to coeffs, which means exactly that we can have coeff() return a const reference (as opposed to returning a value).
+ // - The is_artihmetic check is required since "const int", "const double", etc. will cause warnings on some systems
+ // while the declaration of "const T", where T is a non arithmetic type does not. Always returning "const Scalar&" is
+ // not possible, since the underlying expressions might not offer a valid address the reference could be referring to.
+ typedef typename internal::conditional<bool(internal::traits<Derived>::Flags&LvalueBit),
+ const Scalar&,
+ typename internal::conditional<internal::is_arithmetic<Scalar>::value, Scalar, const Scalar>::type
+ >::type CoeffReturnType;
+
+ typedef typename internal::add_const_on_value_type_if_arithmetic<
+ typename internal::packet_traits<Scalar>::type
+ >::type PacketReturnType;
+
+ typedef EigenBase<Derived> Base;
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::derived;
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const
+ {
+ return int(Derived::RowsAtCompileTime) == 1 ? 0
+ : int(Derived::ColsAtCompileTime) == 1 ? inner
+ : int(Derived::Flags)&RowMajorBit ? outer
+ : inner;
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const
+ {
+ return int(Derived::ColsAtCompileTime) == 1 ? 0
+ : int(Derived::RowsAtCompileTime) == 1 ? inner
+ : int(Derived::Flags)&RowMajorBit ? inner
+ : outer;
+ }
+
+ /** Short version: don't use this function, use
+ * \link operator()(Index,Index) const \endlink instead.
+ *
+ * Long version: this function is similar to
+ * \link operator()(Index,Index) const \endlink, but without the assertion.
+ * Use this for limiting the performance cost of debugging code when doing
+ * repeated coefficient access. Only use this when it is guaranteed that the
+ * parameters \a row and \a col are in range.
+ *
+ * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this
+ * function equivalent to \link operator()(Index,Index) const \endlink.
+ *
+ * \sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const
+ */
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
+ {
+ eigen_internal_assert(row >= 0 && row < rows()
+ && col >= 0 && col < cols());
+ return derived().coeff(row, col);
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
+ {
+ return coeff(rowIndexByOuterInner(outer, inner),
+ colIndexByOuterInner(outer, inner));
+ }
+
+ /** \returns the coefficient at given the given row and column.
+ *
+ * \sa operator()(Index,Index), operator[](Index)
+ */
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const
+ {
+ eigen_assert(row >= 0 && row < rows()
+ && col >= 0 && col < cols());
+ return derived().coeff(row, col);
+ }
+
+ /** Short version: don't use this function, use
+ * \link operator[](Index) const \endlink instead.
+ *
+ * Long version: this function is similar to
+ * \link operator[](Index) const \endlink, but without the assertion.
+ * Use this for limiting the performance cost of debugging code when doing
+ * repeated coefficient access. Only use this when it is guaranteed that the
+ * parameter \a index is in range.
+ *
+ * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this
+ * function equivalent to \link operator[](Index) const \endlink.
+ *
+ * \sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const
+ */
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType
+ coeff(Index index) const
+ {
+ eigen_internal_assert(index >= 0 && index < size());
+ return derived().coeff(index);
+ }
+
+
+ /** \returns the coefficient at given index.
+ *
+ * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit.
+ *
+ * \sa operator[](Index), operator()(Index,Index) const, x() const, y() const,
+ * z() const, w() const
+ */
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType
+ operator[](Index index) const
+ {
+ #ifndef EIGEN2_SUPPORT
+ EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
+ THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
+ #endif
+ eigen_assert(index >= 0 && index < size());
+ return derived().coeff(index);
+ }
+
+ /** \returns the coefficient at given index.
+ *
+ * This is synonymous to operator[](Index) const.
+ *
+ * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit.
+ *
+ * \sa operator[](Index), operator()(Index,Index) const, x() const, y() const,
+ * z() const, w() const
+ */
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType
+ operator()(Index index) const
+ {
+ eigen_assert(index >= 0 && index < size());
+ return derived().coeff(index);
+ }
+
+ /** equivalent to operator[](0). */
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType
+ x() const { return (*this)[0]; }
+
+ /** equivalent to operator[](1). */
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType
+ y() const { return (*this)[1]; }
+
+ /** equivalent to operator[](2). */
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType
+ z() const { return (*this)[2]; }
+
+ /** equivalent to operator[](3). */
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType
+ w() const { return (*this)[3]; }
+
+ /** \internal
+ * \returns the packet of coefficients starting at the given row and column. It is your responsibility
+ * to ensure that a packet really starts there. This method is only available on expressions having the
+ * PacketAccessBit.
+ *
+ * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
+ * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
+ * starting at an address which is a multiple of the packet size.
+ */
+
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const
+ {
+ eigen_internal_assert(row >= 0 && row < rows()
+ && col >= 0 && col < cols());
+ return derived().template packet<LoadMode>(row,col);
+ }
+
+
+ /** \internal */
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketReturnType packetByOuterInner(Index outer, Index inner) const
+ {
+ return packet<LoadMode>(rowIndexByOuterInner(outer, inner),
+ colIndexByOuterInner(outer, inner));
+ }
+
+ /** \internal
+ * \returns the packet of coefficients starting at the given index. It is your responsibility
+ * to ensure that a packet really starts there. This method is only available on expressions having the
+ * PacketAccessBit and the LinearAccessBit.
+ *
+ * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
+ * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
+ * starting at an address which is a multiple of the packet size.
+ */
+
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
+ {
+ eigen_internal_assert(index >= 0 && index < size());
+ return derived().template packet<LoadMode>(index);
+ }
+
+ protected:
+ // explanation: DenseBase is doing "using ..." on the methods from DenseCoeffsBase.
+ // But some methods are only available in the DirectAccess case.
+ // So we add dummy methods here with these names, so that "using... " doesn't fail.
+ // It's not private so that the child class DenseBase can access them, and it's not public
+ // either since it's an implementation detail, so has to be protected.
+ void coeffRef();
+ void coeffRefByOuterInner();
+ void writePacket();
+ void writePacketByOuterInner();
+ void copyCoeff();
+ void copyCoeffByOuterInner();
+ void copyPacket();
+ void copyPacketByOuterInner();
+ void stride();
+ void innerStride();
+ void outerStride();
+ void rowStride();
+ void colStride();
+};
+
+/** \brief Base class providing read/write coefficient access to matrices and arrays.
+ * \ingroup Core_Module
+ * \tparam Derived Type of the derived class
+ * \tparam #WriteAccessors Constant indicating read/write access
+ *
+ * This class defines the non-const \c operator() function and friends, which can be used to write specific
+ * entries of a matrix or array. This class inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which
+ * defines the const variant for reading specific entries.
+ *
+ * \sa DenseCoeffsBase<Derived, DirectAccessors>, \ref TopicClassHierarchy
+ */
+template<typename Derived>
+class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors>
+{
+ public:
+
+ typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;
+
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ typedef typename internal::traits<Derived>::Index Index;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+
+ using Base::coeff;
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::derived;
+ using Base::rowIndexByOuterInner;
+ using Base::colIndexByOuterInner;
+ using Base::operator[];
+ using Base::operator();
+ using Base::x;
+ using Base::y;
+ using Base::z;
+ using Base::w;
+
+ /** Short version: don't use this function, use
+ * \link operator()(Index,Index) \endlink instead.
+ *
+ * Long version: this function is similar to
+ * \link operator()(Index,Index) \endlink, but without the assertion.
+ * Use this for limiting the performance cost of debugging code when doing
+ * repeated coefficient access. Only use this when it is guaranteed that the
+ * parameters \a row and \a col are in range.
+ *
+ * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this
+ * function equivalent to \link operator()(Index,Index) \endlink.
+ *
+ * \sa operator()(Index,Index), coeff(Index, Index) const, coeffRef(Index)
+ */
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)
+ {
+ eigen_internal_assert(row >= 0 && row < rows()
+ && col >= 0 && col < cols());
+ return derived().coeffRef(row, col);
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ coeffRefByOuterInner(Index outer, Index inner)
+ {
+ return coeffRef(rowIndexByOuterInner(outer, inner),
+ colIndexByOuterInner(outer, inner));
+ }
+
+ /** \returns a reference to the coefficient at given the given row and column.
+ *
+ * \sa operator[](Index)
+ */
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ operator()(Index row, Index col)
+ {
+ eigen_assert(row >= 0 && row < rows()
+ && col >= 0 && col < cols());
+ return derived().coeffRef(row, col);
+ }
+
+
+ /** Short version: don't use this function, use
+ * \link operator[](Index) \endlink instead.
+ *
+ * Long version: this function is similar to
+ * \link operator[](Index) \endlink, but without the assertion.
+ * Use this for limiting the performance cost of debugging code when doing
+ * repeated coefficient access. Only use this when it is guaranteed that the
+ * parameters \a row and \a col are in range.
+ *
+ * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this
+ * function equivalent to \link operator[](Index) \endlink.
+ *
+ * \sa operator[](Index), coeff(Index) const, coeffRef(Index,Index)
+ */
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ coeffRef(Index index)
+ {
+ eigen_internal_assert(index >= 0 && index < size());
+ return derived().coeffRef(index);
+ }
+
+ /** \returns a reference to the coefficient at given index.
+ *
+ * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit.
+ *
+ * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
+ */
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ operator[](Index index)
+ {
+ #ifndef EIGEN2_SUPPORT
+ EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
+ THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
+ #endif
+ eigen_assert(index >= 0 && index < size());
+ return derived().coeffRef(index);
+ }
+
+ /** \returns a reference to the coefficient at given index.
+ *
+ * This is synonymous to operator[](Index).
+ *
+ * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit.
+ *
+ * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
+ */
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ operator()(Index index)
+ {
+ eigen_assert(index >= 0 && index < size());
+ return derived().coeffRef(index);
+ }
+
+ /** equivalent to operator[](0). */
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ x() { return (*this)[0]; }
+
+ /** equivalent to operator[](1). */
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ y() { return (*this)[1]; }
+
+ /** equivalent to operator[](2). */
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ z() { return (*this)[2]; }
+
+ /** equivalent to operator[](3). */
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ w() { return (*this)[3]; }
+
+ /** \internal
+ * Stores the given packet of coefficients, at the given row and column of this expression. It is your responsibility
+ * to ensure that a packet really starts there. This method is only available on expressions having the
+ * PacketAccessBit.
+ *
+ * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
+ * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
+ * starting at an address which is a multiple of the packet size.
+ */
+
+ template<int StoreMode>
+ EIGEN_STRONG_INLINE void writePacket
+ (Index row, Index col, const typename internal::packet_traits<Scalar>::type& val)
+ {
+ eigen_internal_assert(row >= 0 && row < rows()
+ && col >= 0 && col < cols());
+ derived().template writePacket<StoreMode>(row,col,val);
+ }
+
+
+ /** \internal */
+ template<int StoreMode>
+ EIGEN_STRONG_INLINE void writePacketByOuterInner
+ (Index outer, Index inner, const typename internal::packet_traits<Scalar>::type& val)
+ {
+ writePacket<StoreMode>(rowIndexByOuterInner(outer, inner),
+ colIndexByOuterInner(outer, inner),
+ val);
+ }
+
+ /** \internal
+ * Stores the given packet of coefficients, at the given index in this expression. It is your responsibility
+ * to ensure that a packet really starts there. This method is only available on expressions having the
+ * PacketAccessBit and the LinearAccessBit.
+ *
+ * The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
+ * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
+ * starting at an address which is a multiple of the packet size.
+ */
+ template<int StoreMode>
+ EIGEN_STRONG_INLINE void writePacket
+ (Index index, const typename internal::packet_traits<Scalar>::type& val)
+ {
+ eigen_internal_assert(index >= 0 && index < size());
+ derived().template writePacket<StoreMode>(index,val);
+ }
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+
+ /** \internal Copies the coefficient at position (row,col) of other into *this.
+ *
+ * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
+ * with usual assignments.
+ *
+ * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
+ */
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
+ {
+ eigen_internal_assert(row >= 0 && row < rows()
+ && col >= 0 && col < cols());
+ derived().coeffRef(row, col) = other.derived().coeff(row, col);
+ }
+
+ /** \internal Copies the coefficient at the given index of other into *this.
+ *
+ * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
+ * with usual assignments.
+ *
+ * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
+ */
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
+ {
+ eigen_internal_assert(index >= 0 && index < size());
+ derived().coeffRef(index) = other.derived().coeff(index);
+ }
+
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void copyCoeffByOuterInner(Index outer, Index inner, const DenseBase<OtherDerived>& other)
+ {
+ const Index row = rowIndexByOuterInner(outer,inner);
+ const Index col = colIndexByOuterInner(outer,inner);
+ // derived() is important here: copyCoeff() may be reimplemented in Derived!
+ derived().copyCoeff(row, col, other);
+ }
+
+ /** \internal Copies the packet at position (row,col) of other into *this.
+ *
+ * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
+ * with usual assignments.
+ *
+ * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
+ */
+
+ template<typename OtherDerived, int StoreMode, int LoadMode>
+ EIGEN_STRONG_INLINE void copyPacket(Index row, Index col, const DenseBase<OtherDerived>& other)
+ {
+ eigen_internal_assert(row >= 0 && row < rows()
+ && col >= 0 && col < cols());
+ derived().template writePacket<StoreMode>(row, col,
+ other.derived().template packet<LoadMode>(row, col));
+ }
+
+ /** \internal Copies the packet at the given index of other into *this.
+ *
+ * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
+ * with usual assignments.
+ *
+ * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
+ */
+
+ template<typename OtherDerived, int StoreMode, int LoadMode>
+ EIGEN_STRONG_INLINE void copyPacket(Index index, const DenseBase<OtherDerived>& other)
+ {
+ eigen_internal_assert(index >= 0 && index < size());
+ derived().template writePacket<StoreMode>(index,
+ other.derived().template packet<LoadMode>(index));
+ }
+
+ /** \internal */
+ template<typename OtherDerived, int StoreMode, int LoadMode>
+ EIGEN_STRONG_INLINE void copyPacketByOuterInner(Index outer, Index inner, const DenseBase<OtherDerived>& other)
+ {
+ const Index row = rowIndexByOuterInner(outer,inner);
+ const Index col = colIndexByOuterInner(outer,inner);
+ // derived() is important here: copyCoeff() may be reimplemented in Derived!
+ derived().template copyPacket< OtherDerived, StoreMode, LoadMode>(row, col, other);
+ }
+#endif
+
+};
+
+/** \brief Base class providing direct read-only coefficient access to matrices and arrays.
+ * \ingroup Core_Module
+ * \tparam Derived Type of the derived class
+ * \tparam #DirectAccessors Constant indicating direct access
+ *
+ * This class defines functions to work with strides which can be used to access entries directly. This class
+ * inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which defines functions to access entries read-only using
+ * \c operator() .
+ *
+ * \sa \ref TopicClassHierarchy
+ */
+template<typename Derived>
+class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors>
+{
+ public:
+
+ typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;
+ typedef typename internal::traits<Derived>::Index Index;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::derived;
+
+ /** \returns the pointer increment between two consecutive elements within a slice in the inner direction.
+ *
+ * \sa outerStride(), rowStride(), colStride()
+ */
+ EIGEN_DEVICE_FUNC
+ inline Index innerStride() const
+ {
+ return derived().innerStride();
+ }
+
+ /** \returns the pointer increment between two consecutive inner slices (for example, between two consecutive columns
+ * in a column-major matrix).
+ *
+ * \sa innerStride(), rowStride(), colStride()
+ */
+ EIGEN_DEVICE_FUNC
+ inline Index outerStride() const
+ {
+ return derived().outerStride();
+ }
+
+ // FIXME shall we remove it ?
+ inline Index stride() const
+ {
+ return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();
+ }
+
+ /** \returns the pointer increment between two consecutive rows.
+ *
+ * \sa innerStride(), outerStride(), colStride()
+ */
+ EIGEN_DEVICE_FUNC
+ inline Index rowStride() const
+ {
+ return Derived::IsRowMajor ? outerStride() : innerStride();
+ }
+
+ /** \returns the pointer increment between two consecutive columns.
+ *
+ * \sa innerStride(), outerStride(), rowStride()
+ */
+ EIGEN_DEVICE_FUNC
+ inline Index colStride() const
+ {
+ return Derived::IsRowMajor ? innerStride() : outerStride();
+ }
+};
+
+/** \brief Base class providing direct read/write coefficient access to matrices and arrays.
+ * \ingroup Core_Module
+ * \tparam Derived Type of the derived class
+ * \tparam #DirectWriteAccessors Constant indicating direct access
+ *
+ * This class defines functions to work with strides which can be used to access entries directly. This class
+ * inherits DenseCoeffsBase<Derived, WriteAccessors> which defines functions to access entries read/write using
+ * \c operator().
+ *
+ * \sa \ref TopicClassHierarchy
+ */
+template<typename Derived>
+class DenseCoeffsBase<Derived, DirectWriteAccessors>
+ : public DenseCoeffsBase<Derived, WriteAccessors>
+{
+ public:
+
+ typedef DenseCoeffsBase<Derived, WriteAccessors> Base;
+ typedef typename internal::traits<Derived>::Index Index;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::derived;
+
+ /** \returns the pointer increment between two consecutive elements within a slice in the inner direction.
+ *
+ * \sa outerStride(), rowStride(), colStride()
+ */
+ EIGEN_DEVICE_FUNC
+ inline Index innerStride() const
+ {
+ return derived().innerStride();
+ }
+
+ /** \returns the pointer increment between two consecutive inner slices (for example, between two consecutive columns
+ * in a column-major matrix).
+ *
+ * \sa innerStride(), rowStride(), colStride()
+ */
+ EIGEN_DEVICE_FUNC
+ inline Index outerStride() const
+ {
+ return derived().outerStride();
+ }
+
+ // FIXME shall we remove it ?
+ inline Index stride() const
+ {
+ return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();
+ }
+
+ /** \returns the pointer increment between two consecutive rows.
+ *
+ * \sa innerStride(), outerStride(), colStride()
+ */
+ EIGEN_DEVICE_FUNC
+ inline Index rowStride() const
+ {
+ return Derived::IsRowMajor ? outerStride() : innerStride();
+ }
+
+ /** \returns the pointer increment between two consecutive columns.
+ *
+ * \sa innerStride(), outerStride(), rowStride()
+ */
+ EIGEN_DEVICE_FUNC
+ inline Index colStride() const
+ {
+ return Derived::IsRowMajor ? innerStride() : outerStride();
+ }
+};
+
+namespace internal {
+
+template<typename Derived, bool JustReturnZero>
+struct first_aligned_impl
+{
+ static inline typename Derived::Index run(const Derived&)
+ { return 0; }
+};
+
+template<typename Derived>
+struct first_aligned_impl<Derived, false>
+{
+ static inline typename Derived::Index run(const Derived& m)
+ {
+ return internal::first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size());
+ }
+};
+
+/** \internal \returns the index of the first element of the array that is well aligned for vectorization.
+ *
+ * There is also the variant first_aligned(const Scalar*, Integer) defined in Memory.h. See it for more
+ * documentation.
+ */
+template<typename Derived>
+static inline typename Derived::Index first_aligned(const Derived& m)
+{
+ return first_aligned_impl
+ <Derived, (Derived::Flags & AlignedBit) || !(Derived::Flags & DirectAccessBit)>
+ ::run(m);
+}
+
+template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>
+struct inner_stride_at_compile_time
+{
+ enum { ret = traits<Derived>::InnerStrideAtCompileTime };
+};
+
+template<typename Derived>
+struct inner_stride_at_compile_time<Derived, false>
+{
+ enum { ret = 0 };
+};
+
+template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>
+struct outer_stride_at_compile_time
+{
+ enum { ret = traits<Derived>::OuterStrideAtCompileTime };
+};
+
+template<typename Derived>
+struct outer_stride_at_compile_time<Derived, false>
+{
+ enum { ret = 0 };
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_DENSECOEFFSBASE_H
diff --git a/third_party/eigen3/Eigen/src/Core/DenseStorage.h b/third_party/eigen3/Eigen/src/Core/DenseStorage.h
new file mode 100644
index 0000000000..59f5154956
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/DenseStorage.h
@@ -0,0 +1,480 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2010-2013 Hauke Heibel <hauke.heibel@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATRIXSTORAGE_H
+#define EIGEN_MATRIXSTORAGE_H
+
+#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+ #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN EIGEN_DENSE_STORAGE_CTOR_PLUGIN;
+#else
+ #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
+#endif
+
+namespace Eigen {
+
+namespace internal {
+
+struct constructor_without_unaligned_array_assert {};
+
+template<typename T, int Size>
+EIGEN_DEVICE_FUNC
+void check_static_allocation_size()
+{
+ // if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit
+ #if EIGEN_STACK_ALLOCATION_LIMIT
+ EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG);
+ #endif
+}
+
+/** \internal
+ * Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned:
+ * to 16 bytes boundary if the total size is a multiple of 16 bytes.
+ */
+template <typename T, int Size, int MatrixOrArrayOptions,
+ int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
+ : (((Size*sizeof(T))%EIGEN_ALIGN_BYTES)==0) ? EIGEN_ALIGN_BYTES
+ : 0 >
+struct plain_array
+{
+ T array[Size];
+
+ EIGEN_DEVICE_FUNC
+ plain_array()
+ {
+ check_static_allocation_size<T,Size>();
+ }
+
+ EIGEN_DEVICE_FUNC
+ plain_array(constructor_without_unaligned_array_assert)
+ {
+ check_static_allocation_size<T,Size>();
+ }
+};
+
+#if defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
+ #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask)
+#elif EIGEN_GNUC_AT_LEAST(4,7)
+ // GCC 4.7 is too aggressive in its optimizations and remove the alignement test based on the fact the array is declared to be aligned.
+ // See this bug report: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=53900
+ // Hiding the origin of the array pointer behind a function argument seems to do the trick even if the function is inlined:
+ template<typename PtrType>
+ EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; }
+ #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
+ eigen_assert((reinterpret_cast<size_t>(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \
+ && "this assertion is explained here: " \
+ "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
+ " **** READ THIS WEB PAGE !!! ****");
+#else
+ #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
+ eigen_assert((reinterpret_cast<size_t>(array) & (sizemask)) == 0 \
+ && "this assertion is explained here: " \
+ "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
+ " **** READ THIS WEB PAGE !!! ****");
+#endif
+
+template <typename T, int Size, int MatrixOrArrayOptions>
+struct plain_array<T, Size, MatrixOrArrayOptions, EIGEN_ALIGN_BYTES>
+{
+ EIGEN_USER_ALIGN_DEFAULT T array[Size];
+
+ EIGEN_DEVICE_FUNC
+ plain_array()
+ {
+ EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(EIGEN_ALIGN_BYTES-1);
+ check_static_allocation_size<T,Size>();
+ }
+
+ EIGEN_DEVICE_FUNC
+ plain_array(constructor_without_unaligned_array_assert)
+ {
+ check_static_allocation_size<T,Size>();
+ }
+};
+
+template <typename T, int MatrixOrArrayOptions, int Alignment>
+struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
+{
+ EIGEN_USER_ALIGN_DEFAULT T array[1];
+ EIGEN_DEVICE_FUNC plain_array() {}
+ EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
+};
+
+} // end namespace internal
+
+/** \internal
+ *
+ * \class DenseStorage
+ * \ingroup Core_Module
+ *
+ * \brief Stores the data of a matrix
+ *
+ * This class stores the data of fixed-size, dynamic-size or mixed matrices
+ * in a way as compact as possible.
+ *
+ * \sa Matrix
+ */
+template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseStorage;
+
+// purely fixed-size matrix
+template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseStorage
+{
+ internal::plain_array<T,Size,_Options> m_data;
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() {}
+ EIGEN_DEVICE_FUNC
+ DenseStorage(internal::constructor_without_unaligned_array_assert)
+ : m_data(internal::constructor_without_unaligned_array_assert()) {}
+ EIGEN_DEVICE_FUNC
+ DenseStorage(const DenseStorage& other) : m_data(other.m_data) {}
+ EIGEN_DEVICE_FUNC
+ DenseStorage& operator=(const DenseStorage& other)
+ {
+ if (this != &other) m_data = other.m_data;
+ return *this;
+ }
+ EIGEN_DEVICE_FUNC DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
+ EIGEN_DEVICE_FUNC static DenseIndex rows(void) {return _Rows;}
+ EIGEN_DEVICE_FUNC static DenseIndex cols(void) {return _Cols;}
+ EIGEN_DEVICE_FUNC void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
+ EIGEN_DEVICE_FUNC void resize(DenseIndex,DenseIndex,DenseIndex) {}
+ EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
+ EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+};
+
+// null matrix
+template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0, _Rows, _Cols, _Options>
+{
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() {}
+ EIGEN_DEVICE_FUNC DenseStorage(internal::constructor_without_unaligned_array_assert) {}
+ EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) {}
+ EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; }
+ EIGEN_DEVICE_FUNC DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& ) {}
+ EIGEN_DEVICE_FUNC static DenseIndex rows(void) {return _Rows;}
+ EIGEN_DEVICE_FUNC static DenseIndex cols(void) {return _Cols;}
+ EIGEN_DEVICE_FUNC void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
+ EIGEN_DEVICE_FUNC void resize(DenseIndex,DenseIndex,DenseIndex) {}
+ EIGEN_DEVICE_FUNC const T *data() const { return 0; }
+ EIGEN_DEVICE_FUNC T *data() { return 0; }
+};
+
+// more specializations for null matrices; these are necessary to resolve ambiguities
+template<typename T, int _Options> class DenseStorage<T, 0, Dynamic, Dynamic, _Options>
+: public DenseStorage<T, 0, 0, 0, _Options> { };
+
+template<typename T, int _Rows, int _Options> class DenseStorage<T, 0, _Rows, Dynamic, _Options>
+: public DenseStorage<T, 0, 0, 0, _Options> { };
+
+template<typename T, int _Cols, int _Options> class DenseStorage<T, 0, Dynamic, _Cols, _Options>
+: public DenseStorage<T, 0, 0, 0, _Options> { };
+
+// dynamic-size matrix with fixed-size storage
+template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic, Dynamic, _Options>
+{
+ internal::plain_array<T,Size,_Options> m_data;
+ DenseIndex m_rows;
+ DenseIndex m_cols;
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {}
+ DenseStorage(internal::constructor_without_unaligned_array_assert)
+ : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
+ DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {}
+ DenseStorage& operator=(const DenseStorage& other)
+ {
+ if (this != &other)
+ {
+ m_data = other.m_data;
+ m_rows = other.m_rows;
+ m_cols = other.m_cols;
+ }
+ return *this;
+ }
+ DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) : m_rows(nbRows), m_cols(nbCols) {}
+ void swap(DenseStorage& other)
+ { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
+ EIGEN_DEVICE_FUNC DenseIndex rows() const {return m_rows;}
+ EIGEN_DEVICE_FUNC DenseIndex cols() const {return m_cols;}
+ void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; }
+ void resize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; }
+ EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
+ EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+};
+
+// dynamic-size matrix with fixed-size storage and fixed width
+template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Size, Dynamic, _Cols, _Options>
+{
+ internal::plain_array<T,Size,_Options> m_data;
+ DenseIndex m_rows;
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
+ DenseStorage(internal::constructor_without_unaligned_array_assert)
+ : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
+ DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {}
+ DenseStorage& operator=(const DenseStorage& other)
+ {
+ if (this != &other)
+ {
+ m_data = other.m_data;
+ m_rows = other.m_rows;
+ }
+ return *this;
+ }
+ DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex) : m_rows(nbRows) {}
+ void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
+ EIGEN_DEVICE_FUNC DenseIndex rows(void) const {return m_rows;}
+ EIGEN_DEVICE_FUNC DenseIndex cols(void) const {return _Cols;}
+ void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; }
+ void resize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; }
+ EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
+ EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+};
+
+// dynamic-size matrix with fixed-size storage and fixed height
+template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Size, _Rows, Dynamic, _Options>
+{
+ internal::plain_array<T,Size,_Options> m_data;
+ DenseIndex m_cols;
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}
+ DenseStorage(internal::constructor_without_unaligned_array_assert)
+ : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
+ DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {}
+ DenseStorage& operator=(const DenseStorage& other)
+ {
+ if (this != &other)
+ {
+ m_data = other.m_data;
+ m_cols = other.m_cols;
+ }
+ return *this;
+ }
+ DenseStorage(DenseIndex, DenseIndex, DenseIndex nbCols) : m_cols(nbCols) {}
+ void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
+ EIGEN_DEVICE_FUNC DenseIndex rows(void) const {return _Rows;}
+ EIGEN_DEVICE_FUNC DenseIndex cols(void) const {return m_cols;}
+ void conservativeResize(DenseIndex, DenseIndex, DenseIndex nbCols) { m_cols = nbCols; }
+ void resize(DenseIndex, DenseIndex, DenseIndex nbCols) { m_cols = nbCols; }
+ EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
+ EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+};
+
+// purely dynamic matrix.
+template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynamic, _Options>
+{
+ T *m_data;
+ DenseIndex m_rows;
+ DenseIndex m_cols;
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
+ DenseStorage(internal::constructor_without_unaligned_array_assert)
+ : m_data(0), m_rows(0), m_cols(0) {}
+ DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols)
+ : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows), m_cols(nbCols)
+ { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
+ DenseStorage(const DenseStorage& other)
+ : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*other.m_cols))
+ , m_rows(other.m_rows)
+ , m_cols(other.m_cols)
+ {
+ internal::smart_copy(other.m_data, other.m_data+other.m_rows*other.m_cols, m_data);
+ }
+ DenseStorage& operator=(const DenseStorage& other)
+ {
+ if (this != &other)
+ {
+ DenseStorage tmp(other);
+ this->swap(tmp);
+ }
+ return *this;
+ }
+#ifdef EIGEN_HAVE_RVALUE_REFERENCES
+ DenseStorage(DenseStorage&& other)
+ : m_data(std::move(other.m_data))
+ , m_rows(std::move(other.m_rows))
+ , m_cols(std::move(other.m_cols))
+ {
+ other.m_data = nullptr;
+ }
+ DenseStorage& operator=(DenseStorage&& other)
+ {
+ using std::swap;
+ swap(m_data, other.m_data);
+ swap(m_rows, other.m_rows);
+ swap(m_cols, other.m_cols);
+ return *this;
+ }
+#endif
+ ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
+ void swap(DenseStorage& other)
+ { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
+ EIGEN_DEVICE_FUNC DenseIndex rows(void) const {return m_rows;}
+ EIGEN_DEVICE_FUNC DenseIndex cols(void) const {return m_cols;}
+ void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols)
+ {
+ m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
+ m_rows = nbRows;
+ m_cols = nbCols;
+ }
+ void resize(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols)
+ {
+ if(size != m_rows*m_cols)
+ {
+ internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols);
+ if (size)
+ m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
+ else
+ m_data = 0;
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
+ }
+ m_rows = nbRows;
+ m_cols = nbCols;
+ }
+ EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
+ EIGEN_DEVICE_FUNC T *data() { return m_data; }
+};
+
+// matrix with dynamic width and fixed height (so that matrix has dynamic size).
+template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Rows, Dynamic, _Options>
+{
+ T *m_data;
+ DenseIndex m_cols;
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {}
+ DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
+ DenseStorage(DenseIndex size, DenseIndex, DenseIndex nbCols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(nbCols)
+ { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
+ DenseStorage(const DenseStorage& other)
+ : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(_Rows*other.m_cols))
+ , m_cols(other.m_cols)
+ {
+ internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data);
+ }
+ DenseStorage& operator=(const DenseStorage& other)
+ {
+ if (this != &other)
+ {
+ DenseStorage tmp(other);
+ this->swap(tmp);
+ }
+ return *this;
+ }
+#ifdef EIGEN_HAVE_RVALUE_REFERENCES
+ DenseStorage(DenseStorage&& other)
+ : m_data(std::move(other.m_data))
+ , m_cols(std::move(other.m_cols))
+ {
+ other.m_data = nullptr;
+ }
+ DenseStorage& operator=(DenseStorage&& other)
+ {
+ using std::swap;
+ swap(m_data, other.m_data);
+ swap(m_cols, other.m_cols);
+ return *this;
+ }
+#endif
+ ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
+ void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
+ EIGEN_DEVICE_FUNC static DenseIndex rows(void) {return _Rows;}
+ EIGEN_DEVICE_FUNC DenseIndex cols(void) const {return m_cols;}
+ void conservativeResize(DenseIndex size, DenseIndex, DenseIndex nbCols)
+ {
+ m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
+ m_cols = nbCols;
+ }
+ EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex, DenseIndex nbCols)
+ {
+ if(size != _Rows*m_cols)
+ {
+ internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols);
+ if (size)
+ m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
+ else
+ m_data = 0;
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
+ }
+ m_cols = nbCols;
+ }
+ EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
+ EIGEN_DEVICE_FUNC T *data() { return m_data; }
+};
+
+// matrix with dynamic height and fixed width (so that matrix has dynamic size).
+template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dynamic, _Cols, _Options>
+{
+ T *m_data;
+ DenseIndex m_rows;
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {}
+ DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
+ DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows)
+ { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
+ DenseStorage(const DenseStorage& other)
+ : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*_Cols))
+ , m_rows(other.m_rows)
+ {
+ internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data);
+ }
+ DenseStorage& operator=(const DenseStorage& other)
+ {
+ if (this != &other)
+ {
+ DenseStorage tmp(other);
+ this->swap(tmp);
+ }
+ return *this;
+ }
+#ifdef EIGEN_HAVE_RVALUE_REFERENCES
+ DenseStorage(DenseStorage&& other)
+ : m_data(std::move(other.m_data))
+ , m_rows(std::move(other.m_rows))
+ {
+ other.m_data = nullptr;
+ }
+ DenseStorage& operator=(DenseStorage&& other)
+ {
+ using std::swap;
+ swap(m_data, other.m_data);
+ swap(m_rows, other.m_rows);
+ return *this;
+ }
+#endif
+ ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
+ void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
+ EIGEN_DEVICE_FUNC DenseIndex rows(void) const {return m_rows;}
+ EIGEN_DEVICE_FUNC static DenseIndex cols(void) {return _Cols;}
+ void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex)
+ {
+ m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
+ m_rows = nbRows;
+ }
+ EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex nbRows, DenseIndex)
+ {
+ if(size != m_rows*_Cols)
+ {
+ internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows);
+ if (size)
+ m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
+ else
+ m_data = 0;
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
+ }
+ m_rows = nbRows;
+ }
+ EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
+ EIGEN_DEVICE_FUNC T *data() { return m_data; }
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATRIX_H
diff --git a/third_party/eigen3/Eigen/src/Core/Diagonal.h b/third_party/eigen3/Eigen/src/Core/Diagonal.h
new file mode 100644
index 0000000000..d760762cc2
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/Diagonal.h
@@ -0,0 +1,258 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_DIAGONAL_H
+#define EIGEN_DIAGONAL_H
+
+namespace Eigen {
+
+/** \class Diagonal
+ * \ingroup Core_Module
+ *
+ * \brief Expression of a diagonal/subdiagonal/superdiagonal in a matrix
+ *
+ * \param MatrixType the type of the object in which we are taking a sub/main/super diagonal
+ * \param DiagIndex the index of the sub/super diagonal. The default is 0 and it means the main diagonal.
+ * A positive value means a superdiagonal, a negative value means a subdiagonal.
+ * You can also use Dynamic so the index can be set at runtime.
+ *
+ * The matrix is not required to be square.
+ *
+ * This class represents an expression of the main diagonal, or any sub/super diagonal
+ * of a square matrix. It is the return type of MatrixBase::diagonal() and MatrixBase::diagonal(Index) and most of the
+ * time this is the only way it is used.
+ *
+ * \sa MatrixBase::diagonal(), MatrixBase::diagonal(Index)
+ */
+
+namespace internal {
+template<typename MatrixType, int DiagIndex>
+struct traits<Diagonal<MatrixType,DiagIndex> >
+ : traits<MatrixType>
+{
+ typedef typename nested<MatrixType>::type MatrixTypeNested;
+ typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
+ typedef typename MatrixType::StorageKind StorageKind;
+ enum {
+ RowsAtCompileTime = (int(DiagIndex) == DynamicIndex || int(MatrixType::SizeAtCompileTime) == Dynamic) ? Dynamic
+ : (EIGEN_PLAIN_ENUM_MIN(MatrixType::RowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0),
+ MatrixType::ColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))),
+ ColsAtCompileTime = 1,
+ MaxRowsAtCompileTime = int(MatrixType::MaxSizeAtCompileTime) == Dynamic ? Dynamic
+ : DiagIndex == DynamicIndex ? EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::MaxRowsAtCompileTime,
+ MatrixType::MaxColsAtCompileTime)
+ : (EIGEN_PLAIN_ENUM_MIN(MatrixType::MaxRowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0),
+ MatrixType::MaxColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))),
+ MaxColsAtCompileTime = 1,
+ MaskLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
+ Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit,
+ CoeffReadCost = _MatrixTypeNested::CoeffReadCost,
+ MatrixTypeOuterStride = outer_stride_at_compile_time<MatrixType>::ret,
+ InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1,
+ OuterStrideAtCompileTime = 0
+ };
+};
+}
+
+template<typename MatrixType, int _DiagIndex> class Diagonal
+ : public internal::dense_xpr_base< Diagonal<MatrixType,_DiagIndex> >::type
+{
+ public:
+
+ enum { DiagIndex = _DiagIndex };
+ typedef typename internal::dense_xpr_base<Diagonal>::type Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)
+
+ EIGEN_DEVICE_FUNC
+ inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {}
+
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)
+
+ EIGEN_DEVICE_FUNC
+ inline Index rows() const
+ {
+ return m_index.value()<0 ? numext::mini(Index(m_matrix.cols()),Index(m_matrix.rows()+m_index.value()))
+ : numext::mini(Index(m_matrix.rows()),Index(m_matrix.cols()-m_index.value()));
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline Index cols() const { return 1; }
+
+ EIGEN_DEVICE_FUNC
+ inline Index innerStride() const
+ {
+ return m_matrix.outerStride() + 1;
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline Index outerStride() const
+ {
+ return 0;
+ }
+
+ typedef typename internal::conditional<
+ internal::is_lvalue<MatrixType>::value,
+ Scalar,
+ const Scalar
+ >::type ScalarWithConstIfNotLvalue;
+
+ EIGEN_DEVICE_FUNC
+ inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar* data() const { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
+
+ EIGEN_DEVICE_FUNC
+ inline Scalar& coeffRef(Index row, Index)
+ {
+ EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
+ return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index row, Index) const
+ {
+ return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline CoeffReturnType coeff(Index row, Index) const
+ {
+ return m_matrix.coeff(row+rowOffset(), row+colOffset());
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline Scalar& coeffRef(Index idx)
+ {
+ EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
+ return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset());
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index idx) const
+ {
+ return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset());
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline CoeffReturnType coeff(Index idx) const
+ {
+ return m_matrix.coeff(idx+rowOffset(), idx+colOffset());
+ }
+
+ EIGEN_DEVICE_FUNC
+ const typename internal::remove_all<typename MatrixType::Nested>::type&
+ nestedExpression() const
+ {
+ return m_matrix;
+ }
+
+ EIGEN_DEVICE_FUNC
+ int index() const
+ {
+ return m_index.value();
+ }
+
+ protected:
+ typename MatrixType::Nested m_matrix;
+ const internal::variable_if_dynamicindex<Index, DiagIndex> m_index;
+
+ private:
+ // some compilers may fail to optimize std::max etc in case of compile-time constants...
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index absDiagIndex() const { return m_index.value()>0 ? m_index.value() : -m_index.value(); }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; }
+ // triger a compile time error is someone try to call packet
+ template<int LoadMode> typename MatrixType::PacketReturnType packet(Index) const;
+ template<int LoadMode> typename MatrixType::PacketReturnType packet(Index,Index) const;
+};
+
+/** \returns an expression of the main diagonal of the matrix \c *this
+ *
+ * \c *this is not required to be square.
+ *
+ * Example: \include MatrixBase_diagonal.cpp
+ * Output: \verbinclude MatrixBase_diagonal.out
+ *
+ * \sa class Diagonal */
+template<typename Derived>
+inline typename MatrixBase<Derived>::DiagonalReturnType
+MatrixBase<Derived>::diagonal()
+{
+ return derived();
+}
+
+/** This is the const version of diagonal(). */
+template<typename Derived>
+inline typename MatrixBase<Derived>::ConstDiagonalReturnType
+MatrixBase<Derived>::diagonal() const
+{
+ return ConstDiagonalReturnType(derived());
+}
+
+/** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this
+ *
+ * \c *this is not required to be square.
+ *
+ * The template parameter \a DiagIndex represent a super diagonal if \a DiagIndex > 0
+ * and a sub diagonal otherwise. \a DiagIndex == 0 is equivalent to the main diagonal.
+ *
+ * Example: \include MatrixBase_diagonal_int.cpp
+ * Output: \verbinclude MatrixBase_diagonal_int.out
+ *
+ * \sa MatrixBase::diagonal(), class Diagonal */
+template<typename Derived>
+inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<DynamicIndex>::Type
+MatrixBase<Derived>::diagonal(Index index)
+{
+ return typename DiagonalIndexReturnType<DynamicIndex>::Type(derived(), index);
+}
+
+/** This is the const version of diagonal(Index). */
+template<typename Derived>
+inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<DynamicIndex>::Type
+MatrixBase<Derived>::diagonal(Index index) const
+{
+ return typename ConstDiagonalIndexReturnType<DynamicIndex>::Type(derived(), index);
+}
+
+/** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this
+ *
+ * \c *this is not required to be square.
+ *
+ * The template parameter \a DiagIndex represent a super diagonal if \a DiagIndex > 0
+ * and a sub diagonal otherwise. \a DiagIndex == 0 is equivalent to the main diagonal.
+ *
+ * Example: \include MatrixBase_diagonal_template_int.cpp
+ * Output: \verbinclude MatrixBase_diagonal_template_int.out
+ *
+ * \sa MatrixBase::diagonal(), class Diagonal */
+template<typename Derived>
+template<int Index>
+inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Index>::Type
+MatrixBase<Derived>::diagonal()
+{
+ return derived();
+}
+
+/** This is the const version of diagonal<int>(). */
+template<typename Derived>
+template<int Index>
+inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Index>::Type
+MatrixBase<Derived>::diagonal() const
+{
+ return derived();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_DIAGONAL_H
diff --git a/third_party/eigen3/Eigen/src/Core/DiagonalMatrix.h b/third_party/eigen3/Eigen/src/Core/DiagonalMatrix.h
new file mode 100644
index 0000000000..f7ac22f8b0
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/DiagonalMatrix.h
@@ -0,0 +1,346 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_DIAGONALMATRIX_H
+#define EIGEN_DIAGONALMATRIX_H
+
+namespace Eigen {
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename Derived>
+class DiagonalBase : public EigenBase<Derived>
+{
+ public:
+ typedef typename internal::traits<Derived>::DiagonalVectorType DiagonalVectorType;
+ typedef typename DiagonalVectorType::Scalar Scalar;
+ typedef typename DiagonalVectorType::RealScalar RealScalar;
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ typedef typename internal::traits<Derived>::Index Index;
+
+ enum {
+ RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
+ ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
+ MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
+ MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
+ IsVectorAtCompileTime = 0,
+ Flags = 0
+ };
+
+ typedef Matrix<Scalar, RowsAtCompileTime, ColsAtCompileTime, 0, MaxRowsAtCompileTime, MaxColsAtCompileTime> DenseMatrixType;
+ typedef DenseMatrixType DenseType;
+ typedef DiagonalMatrix<Scalar,DiagonalVectorType::SizeAtCompileTime,DiagonalVectorType::MaxSizeAtCompileTime> PlainObject;
+
+ EIGEN_DEVICE_FUNC
+ inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
+ EIGEN_DEVICE_FUNC
+ inline Derived& derived() { return *static_cast<Derived*>(this); }
+
+ EIGEN_DEVICE_FUNC
+ DenseMatrixType toDenseMatrix() const { return derived(); }
+ template<typename DenseDerived>
+ EIGEN_DEVICE_FUNC
+ void evalTo(MatrixBase<DenseDerived> &other) const;
+ template<typename DenseDerived>
+ EIGEN_DEVICE_FUNC
+ void addTo(MatrixBase<DenseDerived> &other) const
+ { other.diagonal() += diagonal(); }
+ template<typename DenseDerived>
+ EIGEN_DEVICE_FUNC
+ void subTo(MatrixBase<DenseDerived> &other) const
+ { other.diagonal() -= diagonal(); }
+
+ EIGEN_DEVICE_FUNC
+ inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); }
+ EIGEN_DEVICE_FUNC
+ inline DiagonalVectorType& diagonal() { return derived().diagonal(); }
+
+ EIGEN_DEVICE_FUNC
+ inline Index rows() const { return diagonal().size(); }
+ EIGEN_DEVICE_FUNC
+ inline Index cols() const { return diagonal().size(); }
+
+ /** \returns the diagonal matrix product of \c *this by the matrix \a matrix.
+ */
+ template<typename MatrixDerived>
+ EIGEN_DEVICE_FUNC
+ const DiagonalProduct<MatrixDerived, Derived, OnTheLeft>
+ operator*(const MatrixBase<MatrixDerived> &matrix) const
+ {
+ return DiagonalProduct<MatrixDerived, Derived, OnTheLeft>(matrix.derived(), derived());
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> >
+ inverse() const
+ {
+ return diagonal().cwiseInverse();
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> >
+ operator*(const Scalar& scalar) const
+ {
+ return diagonal() * scalar;
+ }
+ EIGEN_DEVICE_FUNC
+ friend inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> >
+ operator*(const Scalar& scalar, const DiagonalBase& other)
+ {
+ return other.diagonal() * scalar;
+ }
+
+ #ifdef EIGEN2_SUPPORT
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ bool isApprox(const DiagonalBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
+ {
+ return diagonal().isApprox(other.diagonal(), precision);
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ bool isApprox(const MatrixBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
+ {
+ return toDenseMatrix().isApprox(other, precision);
+ }
+ #endif
+};
+
+template<typename Derived>
+template<typename DenseDerived>
+void DiagonalBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
+{
+ other.setZero();
+ other.diagonal() = diagonal();
+}
+#endif
+
+/** \class DiagonalMatrix
+ * \ingroup Core_Module
+ *
+ * \brief Represents a diagonal matrix with its storage
+ *
+ * \param _Scalar the type of coefficients
+ * \param SizeAtCompileTime the dimension of the matrix, or Dynamic
+ * \param MaxSizeAtCompileTime the dimension of the matrix, or Dynamic. This parameter is optional and defaults
+ * to SizeAtCompileTime. Most of the time, you do not need to specify it.
+ *
+ * \sa class DiagonalWrapper
+ */
+
+namespace internal {
+template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime>
+struct traits<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> >
+ : traits<Matrix<_Scalar,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
+{
+ typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType;
+ typedef Dense StorageKind;
+ typedef DenseIndex Index;
+ enum {
+ Flags = LvalueBit
+ };
+};
+}
+template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime>
+class DiagonalMatrix
+ : public DiagonalBase<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> >
+{
+ public:
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ typedef typename internal::traits<DiagonalMatrix>::DiagonalVectorType DiagonalVectorType;
+ typedef const DiagonalMatrix& Nested;
+ typedef _Scalar Scalar;
+ typedef typename internal::traits<DiagonalMatrix>::StorageKind StorageKind;
+ typedef typename internal::traits<DiagonalMatrix>::Index Index;
+ #endif
+
+ protected:
+
+ DiagonalVectorType m_diagonal;
+
+ public:
+
+ /** const version of diagonal(). */
+ EIGEN_DEVICE_FUNC
+ inline const DiagonalVectorType& diagonal() const { return m_diagonal; }
+ /** \returns a reference to the stored vector of diagonal coefficients. */
+ EIGEN_DEVICE_FUNC
+ inline DiagonalVectorType& diagonal() { return m_diagonal; }
+
+ /** Default constructor without initialization */
+ EIGEN_DEVICE_FUNC
+ inline DiagonalMatrix() {}
+
+ /** Constructs a diagonal matrix with given dimension */
+ EIGEN_DEVICE_FUNC
+ inline DiagonalMatrix(Index dim) : m_diagonal(dim) {}
+
+ /** 2D constructor. */
+ EIGEN_DEVICE_FUNC
+ inline DiagonalMatrix(const Scalar& x, const Scalar& y) : m_diagonal(x,y) {}
+
+ /** 3D constructor. */
+ EIGEN_DEVICE_FUNC
+ inline DiagonalMatrix(const Scalar& x, const Scalar& y, const Scalar& z) : m_diagonal(x,y,z) {}
+
+ /** Copy constructor. */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ inline DiagonalMatrix(const DiagonalBase<OtherDerived>& other) : m_diagonal(other.diagonal()) {}
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ /** copy constructor. prevent a default copy constructor from hiding the other templated constructor */
+ inline DiagonalMatrix(const DiagonalMatrix& other) : m_diagonal(other.diagonal()) {}
+ #endif
+
+ /** generic constructor from expression of the diagonal coefficients */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ explicit inline DiagonalMatrix(const MatrixBase<OtherDerived>& other) : m_diagonal(other)
+ {}
+
+ /** Copy operator. */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ DiagonalMatrix& operator=(const DiagonalBase<OtherDerived>& other)
+ {
+ m_diagonal = other.diagonal();
+ return *this;
+ }
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ /** This is a special case of the templated operator=. Its purpose is to
+ * prevent a default operator= from hiding the templated operator=.
+ */
+ EIGEN_DEVICE_FUNC
+ DiagonalMatrix& operator=(const DiagonalMatrix& other)
+ {
+ m_diagonal = other.diagonal();
+ return *this;
+ }
+ #endif
+
+ /** Resizes to given size. */
+ EIGEN_DEVICE_FUNC
+ inline void resize(Index size) { m_diagonal.resize(size); }
+ /** Sets all coefficients to zero. */
+ EIGEN_DEVICE_FUNC
+ inline void setZero() { m_diagonal.setZero(); }
+ /** Resizes and sets all coefficients to zero. */
+ EIGEN_DEVICE_FUNC
+ inline void setZero(Index size) { m_diagonal.setZero(size); }
+ /** Sets this matrix to be the identity matrix of the current size. */
+ EIGEN_DEVICE_FUNC
+ inline void setIdentity() { m_diagonal.setOnes(); }
+ /** Sets this matrix to be the identity matrix of the given size. */
+ EIGEN_DEVICE_FUNC
+ inline void setIdentity(Index size) { m_diagonal.setOnes(size); }
+};
+
+/** \class DiagonalWrapper
+ * \ingroup Core_Module
+ *
+ * \brief Expression of a diagonal matrix
+ *
+ * \param _DiagonalVectorType the type of the vector of diagonal coefficients
+ *
+ * This class is an expression of a diagonal matrix, but not storing its own vector of diagonal coefficients,
+ * instead wrapping an existing vector expression. It is the return type of MatrixBase::asDiagonal()
+ * and most of the time this is the only way that it is used.
+ *
+ * \sa class DiagonalMatrix, class DiagonalBase, MatrixBase::asDiagonal()
+ */
+
+namespace internal {
+template<typename _DiagonalVectorType>
+struct traits<DiagonalWrapper<_DiagonalVectorType> >
+{
+ typedef _DiagonalVectorType DiagonalVectorType;
+ typedef typename DiagonalVectorType::Scalar Scalar;
+ typedef typename DiagonalVectorType::Index Index;
+ typedef typename DiagonalVectorType::StorageKind StorageKind;
+ enum {
+ RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
+ ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
+ MaxRowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
+ MaxColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
+ Flags = traits<DiagonalVectorType>::Flags & LvalueBit
+ };
+};
+}
+
+template<typename _DiagonalVectorType>
+class DiagonalWrapper
+ : public DiagonalBase<DiagonalWrapper<_DiagonalVectorType> >, internal::no_assignment_operator
+{
+ public:
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ typedef _DiagonalVectorType DiagonalVectorType;
+ typedef DiagonalWrapper Nested;
+ #endif
+
+ /** Constructor from expression of diagonal coefficients to wrap. */
+ EIGEN_DEVICE_FUNC
+ inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {}
+
+ /** \returns a const reference to the wrapped expression of diagonal coefficients. */
+ EIGEN_DEVICE_FUNC
+ const DiagonalVectorType& diagonal() const { return m_diagonal; }
+
+ protected:
+ typename DiagonalVectorType::Nested m_diagonal;
+};
+
+/** \returns a pseudo-expression of a diagonal matrix with *this as vector of diagonal coefficients
+ *
+ * \only_for_vectors
+ *
+ * Example: \include MatrixBase_asDiagonal.cpp
+ * Output: \verbinclude MatrixBase_asDiagonal.out
+ *
+ * \sa class DiagonalWrapper, class DiagonalMatrix, diagonal(), isDiagonal()
+ **/
+template<typename Derived>
+inline const DiagonalWrapper<const Derived>
+MatrixBase<Derived>::asDiagonal() const
+{
+ return derived();
+}
+
+/** \returns true if *this is approximately equal to a diagonal matrix,
+ * within the precision given by \a prec.
+ *
+ * Example: \include MatrixBase_isDiagonal.cpp
+ * Output: \verbinclude MatrixBase_isDiagonal.out
+ *
+ * \sa asDiagonal()
+ */
+template<typename Derived>
+bool MatrixBase<Derived>::isDiagonal(const RealScalar& prec) const
+{
+ using std::abs;
+ if(cols() != rows()) return false;
+ RealScalar maxAbsOnDiagonal = static_cast<RealScalar>(-1);
+ for(Index j = 0; j < cols(); ++j)
+ {
+ RealScalar absOnDiagonal = abs(coeff(j,j));
+ if(absOnDiagonal > maxAbsOnDiagonal) maxAbsOnDiagonal = absOnDiagonal;
+ }
+ for(Index j = 0; j < cols(); ++j)
+ for(Index i = 0; i < j; ++i)
+ {
+ if(!internal::isMuchSmallerThan(coeff(i, j), maxAbsOnDiagonal, prec)) return false;
+ if(!internal::isMuchSmallerThan(coeff(j, i), maxAbsOnDiagonal, prec)) return false;
+ }
+ return true;
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_DIAGONALMATRIX_H
diff --git a/third_party/eigen3/Eigen/src/Core/DiagonalProduct.h b/third_party/eigen3/Eigen/src/Core/DiagonalProduct.h
new file mode 100644
index 0000000000..c03a0c2e12
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/DiagonalProduct.h
@@ -0,0 +1,130 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_DIAGONALPRODUCT_H
+#define EIGEN_DIAGONALPRODUCT_H
+
+namespace Eigen {
+
+namespace internal {
+template<typename MatrixType, typename DiagonalType, int ProductOrder>
+struct traits<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
+ : traits<MatrixType>
+{
+ typedef typename scalar_product_traits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
+ enum {
+ RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+ ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+ MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
+
+ _StorageOrder = MatrixType::Flags & RowMajorBit ? RowMajor : ColMajor,
+ _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
+ ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
+ _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
+ // FIXME currently we need same types, but in the future the next rule should be the one
+ //_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))),
+ _Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))),
+ _LinearAccessMask = (RowsAtCompileTime==1 || ColsAtCompileTime==1) ? LinearAccessBit : 0,
+
+ Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit,//(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit),
+ CoeffReadCost = NumTraits<Scalar>::MulCost + MatrixType::CoeffReadCost + DiagonalType::DiagonalVectorType::CoeffReadCost
+ };
+};
+}
+
+template<typename MatrixType, typename DiagonalType, int ProductOrder>
+class DiagonalProduct : internal::no_assignment_operator,
+ public MatrixBase<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
+{
+ public:
+
+ typedef MatrixBase<DiagonalProduct> Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(DiagonalProduct)
+
+ inline DiagonalProduct(const MatrixType& matrix, const DiagonalType& diagonal)
+ : m_matrix(matrix), m_diagonal(diagonal)
+ {
+ eigen_assert(diagonal.diagonal().size() == (ProductOrder == OnTheLeft ? matrix.rows() : matrix.cols()));
+ }
+
+ EIGEN_STRONG_INLINE Index rows() const { return m_matrix.rows(); }
+ EIGEN_STRONG_INLINE Index cols() const { return m_matrix.cols(); }
+
+ EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
+ {
+ return m_diagonal.diagonal().coeff(ProductOrder == OnTheLeft ? row : col) * m_matrix.coeff(row, col);
+ }
+
+ EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
+ {
+ enum {
+ StorageOrder = int(MatrixType::Flags) & RowMajorBit ? RowMajor : ColMajor
+ };
+ return coeff(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
+ }
+
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
+ {
+ enum {
+ StorageOrder = Flags & RowMajorBit ? RowMajor : ColMajor
+ };
+ const Index indexInDiagonalVector = ProductOrder == OnTheLeft ? row : col;
+ return packet_impl<LoadMode>(row,col,indexInDiagonalVector,typename internal::conditional<
+ ((int(StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft)
+ ||(int(StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)), internal::true_type, internal::false_type>::type());
+ }
+
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const
+ {
+ enum {
+ StorageOrder = int(MatrixType::Flags) & RowMajorBit ? RowMajor : ColMajor
+ };
+ return packet<LoadMode>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
+ }
+
+ protected:
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::true_type) const
+ {
+ return internal::pmul(m_matrix.template packet<LoadMode>(row, col),
+ internal::pset1<PacketScalar>(m_diagonal.diagonal().coeff(id)));
+ }
+
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::false_type) const
+ {
+ enum {
+ InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
+ DiagonalVectorPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned)
+ };
+ return internal::pmul(m_matrix.template packet<LoadMode>(row, col),
+ m_diagonal.diagonal().template packet<DiagonalVectorPacketLoadMode>(id));
+ }
+
+ typename MatrixType::Nested m_matrix;
+ typename DiagonalType::Nested m_diagonal;
+};
+
+/** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal.
+ */
+template<typename Derived>
+template<typename DiagonalDerived>
+inline const DiagonalProduct<Derived, DiagonalDerived, OnTheRight>
+MatrixBase<Derived>::operator*(const DiagonalBase<DiagonalDerived> &a_diagonal) const
+{
+ return DiagonalProduct<Derived, DiagonalDerived, OnTheRight>(derived(), a_diagonal.derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_DIAGONALPRODUCT_H
diff --git a/third_party/eigen3/Eigen/src/Core/Dot.h b/third_party/eigen3/Eigen/src/Core/Dot.h
new file mode 100644
index 0000000000..718de5d1af
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/Dot.h
@@ -0,0 +1,270 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2008, 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_DOT_H
+#define EIGEN_DOT_H
+
+namespace Eigen {
+
+namespace internal {
+
+// helper function for dot(). The problem is that if we put that in the body of dot(), then upon calling dot
+// with mismatched types, the compiler emits errors about failing to instantiate cwiseProduct BEFORE
+// looking at the static assertions. Thus this is a trick to get better compile errors.
+template<typename T, typename U,
+// the NeedToTranspose condition here is taken straight from Assign.h
+ bool NeedToTranspose = T::IsVectorAtCompileTime
+ && U::IsVectorAtCompileTime
+ && ((int(T::RowsAtCompileTime) == 1 && int(U::ColsAtCompileTime) == 1)
+ | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
+ // revert to || as soon as not needed anymore.
+ (int(T::ColsAtCompileTime) == 1 && int(U::RowsAtCompileTime) == 1))
+>
+struct dot_nocheck
+{
+ typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar;
+ EIGEN_DEVICE_FUNC
+ static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
+ {
+ return a.template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum();
+ }
+};
+
+template<typename T, typename U>
+struct dot_nocheck<T, U, true>
+{
+ typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar;
+ EIGEN_DEVICE_FUNC
+ static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
+ {
+ return a.transpose().template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum();
+ }
+};
+
+} // end namespace internal
+
+/** \returns the dot product of *this with other.
+ *
+ * \only_for_vectors
+ *
+ * \note If the scalar type is complex numbers, then this function returns the hermitian
+ * (sesquilinear) dot product, conjugate-linear in the first variable and linear in the
+ * second variable.
+ *
+ * \sa squaredNorm(), norm()
+ */
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
+MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
+ typedef internal::scalar_conj_product_op<Scalar,typename OtherDerived::Scalar> func;
+ EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar);
+
+ eigen_assert(size() == other.size());
+
+ return internal::dot_nocheck<Derived,OtherDerived>::run(*this, other);
+}
+
+#ifdef EIGEN2_SUPPORT
+/** \returns the dot product of *this with other, with the Eigen2 convention that the dot product is linear in the first variable
+ * (conjugating the second variable). Of course this only makes a difference in the complex case.
+ *
+ * This method is only available in EIGEN2_SUPPORT mode.
+ *
+ * \only_for_vectors
+ *
+ * \sa dot()
+ */
+template<typename Derived>
+template<typename OtherDerived>
+typename internal::traits<Derived>::Scalar
+MatrixBase<Derived>::eigen2_dot(const MatrixBase<OtherDerived>& other) const
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
+ YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+
+ eigen_assert(size() == other.size());
+
+ return internal::dot_nocheck<OtherDerived,Derived>::run(other,*this);
+}
+#endif
+
+
+//---------- implementation of L2 norm and related functions ----------
+
+/** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the Frobenius norm.
+ * In both cases, it consists in the sum of the square of all the matrix entries.
+ * For vectors, this is also equals to the dot product of \c *this with itself.
+ *
+ * \sa dot(), norm()
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const
+{
+ return numext::real((*this).cwiseAbs2().sum());
+}
+
+/** \returns, for vectors, the \em l2 norm of \c *this, and for matrices the Frobenius norm.
+ * In both cases, it consists in the square root of the sum of the square of all the matrix entries.
+ * For vectors, this is also equals to the square root of the dot product of \c *this with itself.
+ *
+ * \sa dot(), squaredNorm()
+ */
+template<typename Derived>
+inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
+{
+ using std::sqrt;
+ return sqrt(squaredNorm());
+}
+
+/** \returns an expression of the quotient of *this by its own norm.
+ *
+ * \only_for_vectors
+ *
+ * \sa norm(), normalize()
+ */
+template<typename Derived>
+inline const typename MatrixBase<Derived>::PlainObject
+MatrixBase<Derived>::normalized() const
+{
+ typedef typename internal::nested<Derived>::type Nested;
+ typedef typename internal::remove_reference<Nested>::type _Nested;
+ _Nested n(derived());
+ return n / n.norm();
+}
+
+/** Normalizes the vector, i.e. divides it by its own norm.
+ *
+ * \only_for_vectors
+ *
+ * \sa norm(), normalized()
+ */
+template<typename Derived>
+inline void MatrixBase<Derived>::normalize()
+{
+ *this /= norm();
+}
+
+//---------- implementation of other norms ----------
+
+namespace internal {
+
+template<typename Derived, int p>
+struct lpNorm_selector
+{
+ typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const MatrixBase<Derived>& m)
+ {
+ using std::pow;
+ return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p);
+ }
+};
+
+template<typename Derived>
+struct lpNorm_selector<Derived, 1>
+{
+ EIGEN_DEVICE_FUNC
+ static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
+ {
+ return m.cwiseAbs().sum();
+ }
+};
+
+template<typename Derived>
+struct lpNorm_selector<Derived, 2>
+{
+ EIGEN_DEVICE_FUNC
+ static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
+ {
+ return m.norm();
+ }
+};
+
+template<typename Derived>
+struct lpNorm_selector<Derived, Infinity>
+{
+ EIGEN_DEVICE_FUNC
+ static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
+ {
+ return m.cwiseAbs().maxCoeff();
+ }
+};
+
+} // end namespace internal
+
+/** \returns the \f$ \ell^p \f$ norm of *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values
+ * of the coefficients of *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$
+ * norm, that is the maximum of the absolute values of the coefficients of *this.
+ *
+ * \sa norm()
+ */
+template<typename Derived>
+template<int p>
+inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
+MatrixBase<Derived>::lpNorm() const
+{
+ return internal::lpNorm_selector<Derived, p>::run(*this);
+}
+
+//---------- implementation of isOrthogonal / isUnitary ----------
+
+/** \returns true if *this is approximately orthogonal to \a other,
+ * within the precision given by \a prec.
+ *
+ * Example: \include MatrixBase_isOrthogonal.cpp
+ * Output: \verbinclude MatrixBase_isOrthogonal.out
+ */
+template<typename Derived>
+template<typename OtherDerived>
+bool MatrixBase<Derived>::isOrthogonal
+(const MatrixBase<OtherDerived>& other, const RealScalar& prec) const
+{
+ typename internal::nested<Derived,2>::type nested(derived());
+ typename internal::nested<OtherDerived,2>::type otherNested(other.derived());
+ return numext::abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm();
+}
+
+/** \returns true if *this is approximately an unitary matrix,
+ * within the precision given by \a prec. In the case where the \a Scalar
+ * type is real numbers, a unitary matrix is an orthogonal matrix, whence the name.
+ *
+ * \note This can be used to check whether a family of vectors forms an orthonormal basis.
+ * Indeed, \c m.isUnitary() returns true if and only if the columns (equivalently, the rows) of m form an
+ * orthonormal basis.
+ *
+ * Example: \include MatrixBase_isUnitary.cpp
+ * Output: \verbinclude MatrixBase_isUnitary.out
+ */
+template<typename Derived>
+bool MatrixBase<Derived>::isUnitary(const RealScalar& prec) const
+{
+ typename Derived::Nested nested(derived());
+ for(Index i = 0; i < cols(); ++i)
+ {
+ if(!internal::isApprox(nested.col(i).squaredNorm(), static_cast<RealScalar>(1), prec))
+ return false;
+ for(Index j = 0; j < i; ++j)
+ if(!internal::isMuchSmallerThan(nested.col(i).dot(nested.col(j)), static_cast<Scalar>(1), prec))
+ return false;
+ }
+ return true;
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_DOT_H
diff --git a/third_party/eigen3/Eigen/src/Core/EigenBase.h b/third_party/eigen3/Eigen/src/Core/EigenBase.h
new file mode 100644
index 0000000000..1a577c2dce
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/EigenBase.h
@@ -0,0 +1,146 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_EIGENBASE_H
+#define EIGEN_EIGENBASE_H
+
+namespace Eigen {
+
+/** Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T).
+ *
+ * In other words, an EigenBase object is an object that can be copied into a MatrixBase.
+ *
+ * Besides MatrixBase-derived classes, this also includes special matrix classes such as diagonal matrices, etc.
+ *
+ * Notice that this class is trivial, it is only used to disambiguate overloaded functions.
+ *
+ * \sa \ref TopicClassHierarchy
+ */
+template<typename Derived> struct EigenBase
+{
+// typedef typename internal::plain_matrix_type<Derived>::type PlainObject;
+
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ typedef typename internal::traits<Derived>::Index Index;
+
+ /** \returns a reference to the derived object */
+ EIGEN_DEVICE_FUNC
+ Derived& derived() { return *static_cast<Derived*>(this); }
+ /** \returns a const reference to the derived object */
+ EIGEN_DEVICE_FUNC
+ const Derived& derived() const { return *static_cast<const Derived*>(this); }
+
+ EIGEN_DEVICE_FUNC
+ inline Derived& const_cast_derived() const
+ { return *static_cast<Derived*>(const_cast<EigenBase*>(this)); }
+ EIGEN_DEVICE_FUNC
+ inline const Derived& const_derived() const
+ { return *static_cast<const Derived*>(this); }
+
+ /** \returns the number of rows. \sa cols(), RowsAtCompileTime */
+ EIGEN_DEVICE_FUNC
+ inline Index rows() const { return derived().rows(); }
+ /** \returns the number of columns. \sa rows(), ColsAtCompileTime*/
+ EIGEN_DEVICE_FUNC
+ inline Index cols() const { return derived().cols(); }
+ /** \returns the number of coefficients, which is rows()*cols().
+ * \sa rows(), cols(), SizeAtCompileTime. */
+ EIGEN_DEVICE_FUNC
+ inline Index size() const { return rows() * cols(); }
+
+ /** \internal Don't use it, but do the equivalent: \code dst = *this; \endcode */
+ template<typename Dest>
+ EIGEN_DEVICE_FUNC
+ inline void evalTo(Dest& dst) const
+ { derived().evalTo(dst); }
+
+ /** \internal Don't use it, but do the equivalent: \code dst += *this; \endcode */
+ template<typename Dest>
+ EIGEN_DEVICE_FUNC
+ inline void addTo(Dest& dst) const
+ {
+ // This is the default implementation,
+ // derived class can reimplement it in a more optimized way.
+ typename Dest::PlainObject res(rows(),cols());
+ evalTo(res);
+ dst += res;
+ }
+
+ /** \internal Don't use it, but do the equivalent: \code dst -= *this; \endcode */
+ template<typename Dest>
+ EIGEN_DEVICE_FUNC
+ inline void subTo(Dest& dst) const
+ {
+ // This is the default implementation,
+ // derived class can reimplement it in a more optimized way.
+ typename Dest::PlainObject res(rows(),cols());
+ evalTo(res);
+ dst -= res;
+ }
+
+ /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheRight(*this); \endcode */
+ template<typename Dest>
+ EIGEN_DEVICE_FUNC inline void applyThisOnTheRight(Dest& dst) const
+ {
+ // This is the default implementation,
+ // derived class can reimplement it in a more optimized way.
+ dst = dst * this->derived();
+ }
+
+ /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheLeft(*this); \endcode */
+ template<typename Dest>
+ EIGEN_DEVICE_FUNC inline void applyThisOnTheLeft(Dest& dst) const
+ {
+ // This is the default implementation,
+ // derived class can reimplement it in a more optimized way.
+ dst = this->derived() * dst;
+ }
+
+};
+
+/***************************************************************************
+* Implementation of matrix base methods
+***************************************************************************/
+
+/** \brief Copies the generic expression \a other into *this.
+ *
+ * \details The expression must provide a (templated) evalTo(Derived& dst) const
+ * function which does the actual job. In practice, this allows any user to write
+ * its own special matrix without having to modify MatrixBase
+ *
+ * \returns a reference to *this.
+ */
+template<typename Derived>
+template<typename OtherDerived>
+Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
+{
+ other.derived().evalTo(derived());
+ return derived();
+}
+
+template<typename Derived>
+template<typename OtherDerived>
+Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
+{
+ other.derived().addTo(derived());
+ return derived();
+}
+
+template<typename Derived>
+template<typename OtherDerived>
+Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
+{
+ other.derived().subTo(derived());
+ return derived();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_EIGENBASE_H
diff --git a/third_party/eigen3/Eigen/src/Core/Flagged.h b/third_party/eigen3/Eigen/src/Core/Flagged.h
new file mode 100644
index 0000000000..1f2955fc1d
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/Flagged.h
@@ -0,0 +1,140 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_FLAGGED_H
+#define EIGEN_FLAGGED_H
+
+namespace Eigen {
+
+/** \class Flagged
+ * \ingroup Core_Module
+ *
+ * \brief Expression with modified flags
+ *
+ * \param ExpressionType the type of the object of which we are modifying the flags
+ * \param Added the flags added to the expression
+ * \param Removed the flags removed from the expression (has priority over Added).
+ *
+ * This class represents an expression whose flags have been modified.
+ * It is the return type of MatrixBase::flagged()
+ * and most of the time this is the only way it is used.
+ *
+ * \sa MatrixBase::flagged()
+ */
+
+namespace internal {
+template<typename ExpressionType, unsigned int Added, unsigned int Removed>
+struct traits<Flagged<ExpressionType, Added, Removed> > : traits<ExpressionType>
+{
+ enum { Flags = (ExpressionType::Flags | Added) & ~Removed };
+};
+}
+
+template<typename ExpressionType, unsigned int Added, unsigned int Removed> class Flagged
+ : public MatrixBase<Flagged<ExpressionType, Added, Removed> >
+{
+ public:
+
+ typedef MatrixBase<Flagged> Base;
+
+ EIGEN_DENSE_PUBLIC_INTERFACE(Flagged)
+ typedef typename internal::conditional<internal::must_nest_by_value<ExpressionType>::ret,
+ ExpressionType, const ExpressionType&>::type ExpressionTypeNested;
+ typedef typename ExpressionType::InnerIterator InnerIterator;
+
+ inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {}
+
+ inline Index rows() const { return m_matrix.rows(); }
+ inline Index cols() const { return m_matrix.cols(); }
+ inline Index outerStride() const { return m_matrix.outerStride(); }
+ inline Index innerStride() const { return m_matrix.innerStride(); }
+
+ inline CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_matrix.coeff(row, col);
+ }
+
+ inline CoeffReturnType coeff(Index index) const
+ {
+ return m_matrix.coeff(index);
+ }
+
+ inline const Scalar& coeffRef(Index row, Index col) const
+ {
+ return m_matrix.const_cast_derived().coeffRef(row, col);
+ }
+
+ inline const Scalar& coeffRef(Index index) const
+ {
+ return m_matrix.const_cast_derived().coeffRef(index);
+ }
+
+ inline Scalar& coeffRef(Index row, Index col)
+ {
+ return m_matrix.const_cast_derived().coeffRef(row, col);
+ }
+
+ inline Scalar& coeffRef(Index index)
+ {
+ return m_matrix.const_cast_derived().coeffRef(index);
+ }
+
+ template<int LoadMode>
+ inline const PacketScalar packet(Index row, Index col) const
+ {
+ return m_matrix.template packet<LoadMode>(row, col);
+ }
+
+ template<int LoadMode>
+ inline void writePacket(Index row, Index col, const PacketScalar& x)
+ {
+ m_matrix.const_cast_derived().template writePacket<LoadMode>(row, col, x);
+ }
+
+ template<int LoadMode>
+ inline const PacketScalar packet(Index index) const
+ {
+ return m_matrix.template packet<LoadMode>(index);
+ }
+
+ template<int LoadMode>
+ inline void writePacket(Index index, const PacketScalar& x)
+ {
+ m_matrix.const_cast_derived().template writePacket<LoadMode>(index, x);
+ }
+
+ const ExpressionType& _expression() const { return m_matrix; }
+
+ template<typename OtherDerived>
+ typename ExpressionType::PlainObject solveTriangular(const MatrixBase<OtherDerived>& other) const;
+
+ template<typename OtherDerived>
+ void solveTriangularInPlace(const MatrixBase<OtherDerived>& other) const;
+
+ protected:
+ ExpressionTypeNested m_matrix;
+};
+
+/** \returns an expression of *this with added and removed flags
+ *
+ * This is mostly for internal use.
+ *
+ * \sa class Flagged
+ */
+template<typename Derived>
+template<unsigned int Added,unsigned int Removed>
+inline const Flagged<Derived, Added, Removed>
+DenseBase<Derived>::flagged() const
+{
+ return derived();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_FLAGGED_H
diff --git a/third_party/eigen3/Eigen/src/Core/ForceAlignedAccess.h b/third_party/eigen3/Eigen/src/Core/ForceAlignedAccess.h
new file mode 100644
index 0000000000..807c7a2934
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/ForceAlignedAccess.h
@@ -0,0 +1,146 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_FORCEALIGNEDACCESS_H
+#define EIGEN_FORCEALIGNEDACCESS_H
+
+namespace Eigen {
+
+/** \class ForceAlignedAccess
+ * \ingroup Core_Module
+ *
+ * \brief Enforce aligned packet loads and stores regardless of what is requested
+ *
+ * \param ExpressionType the type of the object of which we are forcing aligned packet access
+ *
+ * This class is the return type of MatrixBase::forceAlignedAccess()
+ * and most of the time this is the only way it is used.
+ *
+ * \sa MatrixBase::forceAlignedAccess()
+ */
+
+namespace internal {
+template<typename ExpressionType>
+struct traits<ForceAlignedAccess<ExpressionType> > : public traits<ExpressionType>
+{};
+}
+
+template<typename ExpressionType> class ForceAlignedAccess
+ : public internal::dense_xpr_base< ForceAlignedAccess<ExpressionType> >::type
+{
+ public:
+
+ typedef typename internal::dense_xpr_base<ForceAlignedAccess>::type Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(ForceAlignedAccess)
+
+ inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {}
+
+ inline Index rows() const { return m_expression.rows(); }
+ inline Index cols() const { return m_expression.cols(); }
+ inline Index outerStride() const { return m_expression.outerStride(); }
+ inline Index innerStride() const { return m_expression.innerStride(); }
+
+ inline const CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_expression.coeff(row, col);
+ }
+
+ inline Scalar& coeffRef(Index row, Index col)
+ {
+ return m_expression.const_cast_derived().coeffRef(row, col);
+ }
+
+ inline const CoeffReturnType coeff(Index index) const
+ {
+ return m_expression.coeff(index);
+ }
+
+ inline Scalar& coeffRef(Index index)
+ {
+ return m_expression.const_cast_derived().coeffRef(index);
+ }
+
+ template<int LoadMode>
+ inline const PacketScalar packet(Index row, Index col) const
+ {
+ return m_expression.template packet<Aligned>(row, col);
+ }
+
+ template<int LoadMode>
+ inline void writePacket(Index row, Index col, const PacketScalar& x)
+ {
+ m_expression.const_cast_derived().template writePacket<Aligned>(row, col, x);
+ }
+
+ template<int LoadMode>
+ inline const PacketScalar packet(Index index) const
+ {
+ return m_expression.template packet<Aligned>(index);
+ }
+
+ template<int LoadMode>
+ inline void writePacket(Index index, const PacketScalar& x)
+ {
+ m_expression.const_cast_derived().template writePacket<Aligned>(index, x);
+ }
+
+ operator const ExpressionType&() const { return m_expression; }
+
+ protected:
+ const ExpressionType& m_expression;
+
+ private:
+ ForceAlignedAccess& operator=(const ForceAlignedAccess&);
+};
+
+/** \returns an expression of *this with forced aligned access
+ * \sa forceAlignedAccessIf(),class ForceAlignedAccess
+ */
+template<typename Derived>
+inline const ForceAlignedAccess<Derived>
+MatrixBase<Derived>::forceAlignedAccess() const
+{
+ return ForceAlignedAccess<Derived>(derived());
+}
+
+/** \returns an expression of *this with forced aligned access
+ * \sa forceAlignedAccessIf(), class ForceAlignedAccess
+ */
+template<typename Derived>
+inline ForceAlignedAccess<Derived>
+MatrixBase<Derived>::forceAlignedAccess()
+{
+ return ForceAlignedAccess<Derived>(derived());
+}
+
+/** \returns an expression of *this with forced aligned access if \a Enable is true.
+ * \sa forceAlignedAccess(), class ForceAlignedAccess
+ */
+template<typename Derived>
+template<bool Enable>
+inline typename internal::add_const_on_value_type<typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type>::type
+MatrixBase<Derived>::forceAlignedAccessIf() const
+{
+ return derived();
+}
+
+/** \returns an expression of *this with forced aligned access if \a Enable is true.
+ * \sa forceAlignedAccess(), class ForceAlignedAccess
+ */
+template<typename Derived>
+template<bool Enable>
+inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type
+MatrixBase<Derived>::forceAlignedAccessIf()
+{
+ return derived();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_FORCEALIGNEDACCESS_H
diff --git a/third_party/eigen3/Eigen/src/Core/Functors.h b/third_party/eigen3/Eigen/src/Core/Functors.h
new file mode 100644
index 0000000000..0a45fa31a9
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/Functors.h
@@ -0,0 +1,1020 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_FUNCTORS_H
+#define EIGEN_FUNCTORS_H
+
+namespace Eigen {
+
+namespace internal {
+
+// associative functors:
+
+/** \internal
+ * \brief Template functor to compute the sum of two scalars
+ *
+ * \sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, MatrixBase::sum()
+ */
+template<typename Scalar> struct scalar_sum_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
+ EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; }
+ template<typename Packet>
+ EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::padd(a,b); }
+ template<typename Packet>
+ EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
+ { return internal::predux(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_sum_op<Scalar> > {
+ enum {
+ Cost = NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasAdd
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the product of two scalars
+ *
+ * \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux()
+ */
+template<typename LhsScalar,typename RhsScalar> struct scalar_product_op {
+ enum {
+ // TODO vectorize mixed product
+ Vectorizable = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasMul && packet_traits<RhsScalar>::HasMul
+ };
+ typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
+ EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
+ template<typename Packet>
+ EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::pmul(a,b); }
+ template<typename Packet>
+ EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
+ { return internal::predux_mul(a); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_product_op<LhsScalar,RhsScalar> > {
+ enum {
+ Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost)/2, // rough estimate!
+ PacketAccess = scalar_product_op<LhsScalar,RhsScalar>::Vectorizable
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the conjugate product of two scalars
+ *
+ * This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y)
+ */
+template<typename LhsScalar,typename RhsScalar> struct scalar_conj_product_op {
+
+ enum {
+ Conj = NumTraits<LhsScalar>::IsComplex
+ };
+
+ typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
+
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op)
+ EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const
+ { return conj_helper<LhsScalar,RhsScalar,Conj,false>().pmul(a,b); }
+
+ template<typename Packet>
+ EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return conj_helper<Packet,Packet,Conj,false>().pmul(a,b); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_conj_product_op<LhsScalar,RhsScalar> > {
+ enum {
+ Cost = NumTraits<LhsScalar>::MulCost,
+ PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMul
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the min of two scalars
+ *
+ * \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff()
+ */
+template<typename Scalar> struct scalar_min_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)
+ EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::min; return (min)(a, b); }
+ template<typename Packet>
+ EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::pmin(a,b); }
+ template<typename Packet>
+ EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
+ { return internal::predux_min(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_min_op<Scalar> > {
+ enum {
+ Cost = NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasMin
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the max of two scalars
+ *
+ * \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff()
+ */
+template<typename Scalar> struct scalar_max_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)
+ EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::max; return (max)(a, b); }
+ template<typename Packet>
+ EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::pmax(a,b); }
+ template<typename Packet>
+ EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
+ { return internal::predux_max(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_max_op<Scalar> > {
+ enum {
+ Cost = NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasMax
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the hypot of two scalars
+ *
+ * \sa MatrixBase::stableNorm(), class Redux
+ */
+template<typename Scalar> struct scalar_hypot_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
+// typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
+ {
+ using std::max;
+ using std::min;
+ using std::sqrt;
+ Scalar p = (max)(_x, _y);
+ Scalar q = (min)(_x, _y);
+ Scalar qp = q/p;
+ return p * sqrt(Scalar(1) + qp*qp);
+ }
+};
+template<typename Scalar>
+struct functor_traits<scalar_hypot_op<Scalar> > {
+ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess=0 };
+};
+
+/** \internal
+ * \brief Template functor to compute the pow of two scalars
+ */
+template<typename Scalar, typename OtherScalar> struct scalar_binary_pow_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_binary_pow_op)
+ inline Scalar operator() (const Scalar& a, const OtherScalar& b) const { return numext::pow(a, b); }
+};
+template<typename Scalar, typename OtherScalar>
+struct functor_traits<scalar_binary_pow_op<Scalar,OtherScalar> > {
+ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
+};
+
+// other binary functors:
+
+/** \internal
+ * \brief Template functor to compute the difference of two scalars
+ *
+ * \sa class CwiseBinaryOp, MatrixBase::operator-
+ */
+template<typename Scalar> struct scalar_difference_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
+ EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; }
+ template<typename Packet>
+ EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::psub(a,b); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_difference_op<Scalar> > {
+ enum {
+ Cost = NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasSub
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the quotient of two scalars
+ *
+ * \sa class CwiseBinaryOp, Cwise::operator/()
+ */
+template<typename LhsScalar,typename RhsScalar> struct scalar_quotient_op {
+ enum {
+ // TODO vectorize mixed product
+ Vectorizable = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasDiv && packet_traits<RhsScalar>::HasDiv
+ };
+ typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
+ EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; }
+ template<typename Packet>
+ EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::pdiv(a,b); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
+ enum {
+ Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost), // rough estimate!
+ PacketAccess = scalar_quotient_op<LhsScalar,RhsScalar>::Vectorizable
+ };
+};
+
+
+
+/** \internal
+ * \brief Template functor to compute the and of two booleans
+ *
+ * \sa class CwiseBinaryOp, ArrayBase::operator&&
+ */
+struct scalar_boolean_and_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op)
+ EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; }
+};
+template<> struct functor_traits<scalar_boolean_and_op> {
+ enum {
+ Cost = NumTraits<bool>::AddCost,
+ PacketAccess = false
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the or of two booleans
+ *
+ * \sa class CwiseBinaryOp, ArrayBase::operator||
+ */
+struct scalar_boolean_or_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op)
+ EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; }
+};
+template<> struct functor_traits<scalar_boolean_or_op> {
+ enum {
+ Cost = NumTraits<bool>::AddCost,
+ PacketAccess = false
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the xor of two booleans
+ *
+ * \sa class CwiseBinaryOp, ArrayBase::operator^
+ */
+struct scalar_boolean_xor_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_xor_op)
+ EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a ^ b; }
+};
+template<> struct functor_traits<scalar_boolean_xor_op> {
+ enum {
+ Cost = NumTraits<bool>::AddCost,
+ PacketAccess = false
+ };
+};
+
+// unary functors:
+
+/** \internal
+ * \brief Template functor to compute the opposite of a scalar
+ *
+ * \sa class CwiseUnaryOp, MatrixBase::operator-
+ */
+template<typename Scalar> struct scalar_opposite_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_opposite_op)
+ EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; }
+ template<typename Packet>
+ EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+ { return internal::pnegate(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_opposite_op<Scalar> >
+{ enum {
+ Cost = NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasNegate };
+};
+
+/** \internal
+ * \brief Template functor to compute the absolute value of a scalar
+ *
+ * \sa class CwiseUnaryOp, Cwise::abs
+ */
+template<typename Scalar> struct scalar_abs_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using std::abs; return abs(a); }
+ template<typename Packet>
+ EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+ { return internal::pabs(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_abs_op<Scalar> >
+{
+ enum {
+ Cost = NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasAbs
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the squared absolute value of a scalar
+ *
+ * \sa class CwiseUnaryOp, Cwise::abs2
+ */
+template<typename Scalar> struct scalar_abs2_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_abs2_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs2(a); }
+ template<typename Packet>
+ EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+ { return internal::pmul(a,a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_abs2_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasAbs2 }; };
+
+/** \internal
+ * \brief Template functor to compute the conjugate of a complex value
+ *
+ * \sa class CwiseUnaryOp, MatrixBase::conjugate()
+ */
+template<typename Scalar> struct scalar_conjugate_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_conjugate_op)
+ EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::conj; return conj(a); }
+ template<typename Packet>
+ EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_conjugate_op<Scalar> >
+{
+ enum {
+ Cost = NumTraits<Scalar>::IsComplex ? NumTraits<Scalar>::AddCost : 0,
+ PacketAccess = packet_traits<Scalar>::HasConj
+ };
+};
+
+/** \internal
+ * \brief Template functor to cast a scalar to another type
+ *
+ * \sa class CwiseUnaryOp, MatrixBase::cast()
+ */
+template<typename Scalar, typename NewType>
+struct scalar_cast_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
+ typedef NewType result_type;
+ EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return cast<Scalar, NewType>(a); }
+};
+template<typename Scalar, typename NewType>
+struct functor_traits<scalar_cast_op<Scalar,NewType> >
+{ enum { Cost = is_same<Scalar, NewType>::value ? 0 : NumTraits<NewType>::AddCost, PacketAccess = false }; };
+
+/** \internal
+ * \brief Template functor to convert a scalar to another type using a custom functor.
+ *
+ * \sa class CwiseUnaryOp, MatrixBase::convert()
+ */
+template<typename Scalar, typename NewType, typename ConvertOp>
+struct scalar_convert_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_convert_op)
+ typedef NewType result_type;
+ EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return ConvertOp()(a); }
+};
+template<typename Scalar, typename NewType, typename ConvertOp>
+struct functor_traits<scalar_convert_op<Scalar,NewType,ConvertOp> >
+{ enum { Cost = is_same<Scalar, NewType>::value ? 0 : NumTraits<NewType>::AddCost, PacketAccess = false }; };
+
+/** \internal
+ * \brief Template functor to extract the real part of a complex
+ *
+ * \sa class CwiseUnaryOp, MatrixBase::real()
+ */
+template<typename Scalar>
+struct scalar_real_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_real_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::real(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_real_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+/** \internal
+ * \brief Template functor to extract the imaginary part of a complex
+ *
+ * \sa class CwiseUnaryOp, MatrixBase::imag()
+ */
+template<typename Scalar>
+struct scalar_imag_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::imag(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_imag_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+/** \internal
+ * \brief Template functor to extract the real part of a complex as a reference
+ *
+ * \sa class CwiseUnaryOp, MatrixBase::real()
+ */
+template<typename Scalar>
+struct scalar_real_ref_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_real_ref_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::real_ref(*const_cast<Scalar*>(&a)); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_real_ref_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+/** \internal
+ * \brief Template functor to extract the imaginary part of a complex as a reference
+ *
+ * \sa class CwiseUnaryOp, MatrixBase::imag()
+ */
+template<typename Scalar>
+struct scalar_imag_ref_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_ref_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::imag_ref(*const_cast<Scalar*>(&a)); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_imag_ref_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+/** \internal
+ *
+ * \brief Template functor to compute the exponential of a scalar
+ *
+ * \sa class CwiseUnaryOp, Cwise::exp()
+ */
+template<typename Scalar> struct scalar_exp_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op)
+ inline const Scalar operator() (const Scalar& a) const { using std::exp; return exp(a); }
+ typedef typename packet_traits<Scalar>::type Packet;
+ inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_exp_op<Scalar> >
+{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasExp }; };
+
+/** \internal
+ *
+ * \brief Template functor to compute the logarithm of a scalar
+ *
+ * \sa class CwiseUnaryOp, Cwise::log()
+ */
+template<typename Scalar> struct scalar_log_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op)
+ inline const Scalar operator() (const Scalar& a) const { using std::log; return log(a); }
+ typedef typename packet_traits<Scalar>::type Packet;
+ inline Packet packetOp(const Packet& a) const { return internal::plog(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_log_op<Scalar> >
+{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasLog }; };
+
+/** \internal
+ * \brief Template functor to multiply a scalar by a fixed other one
+ *
+ * \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/
+ */
+/* NOTE why doing the pset1() in packetOp *is* an optimization ?
+ * indeed it seems better to declare m_other as a Packet and do the pset1() once
+ * in the constructor. However, in practice:
+ * - GCC does not like m_other as a Packet and generate a load every time it needs it
+ * - on the other hand GCC is able to moves the pset1() outside the loop :)
+ * - simpler code ;)
+ * (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y)
+ */
+template<typename Scalar>
+struct scalar_multiple_op {
+ typedef typename packet_traits<Scalar>::type Packet;
+ // FIXME default copy constructors seems bugged with std::complex<>
+ EIGEN_STRONG_INLINE scalar_multiple_op(const scalar_multiple_op& other) : m_other(other.m_other) { }
+ EIGEN_STRONG_INLINE scalar_multiple_op(const Scalar& other) : m_other(other) { }
+ EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; }
+ EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+ { return internal::pmul(a, pset1<Packet>(m_other)); }
+ typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
+};
+template<typename Scalar>
+struct functor_traits<scalar_multiple_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
+
+template<typename Scalar1, typename Scalar2>
+struct scalar_multiple2_op {
+ typedef typename packet_traits<Scalar1>::type Packet1;
+ typedef typename scalar_product_traits<Scalar1,Scalar2>::ReturnType result_type;
+ typedef typename packet_traits<result_type>::type packet_result_type;
+ EIGEN_STRONG_INLINE scalar_multiple2_op(const scalar_multiple2_op& other) : m_other(other.m_other) { }
+ EIGEN_STRONG_INLINE scalar_multiple2_op(const Scalar2& other) : m_other(other) { }
+ EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const packet_result_type packetOp(const Packet1& a) const
+ { eigen_assert("packetOp is not defined"); }
+ typename add_const_on_value_type<typename NumTraits<Scalar2>::Nested>::type m_other;
+};
+template<typename Scalar1,typename Scalar2>
+struct functor_traits<scalar_multiple2_op<Scalar1,Scalar2> >
+{ enum { Cost = NumTraits<Scalar1>::MulCost, PacketAccess = false }; };
+
+/** \internal
+ * \brief Template functor to divide a scalar by a fixed other one
+ *
+ * This functor is used to implement the quotient of a matrix by
+ * a scalar where the scalar type is not necessarily a floating point type.
+ *
+ * \sa class CwiseUnaryOp, MatrixBase::operator/
+ */
+template<typename Scalar>
+struct scalar_quotient1_op {
+ typedef typename packet_traits<Scalar>::type Packet;
+ // FIXME default copy constructors seems bugged with std::complex<>
+ EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { }
+ EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {}
+ EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; }
+ EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+ { return internal::pdiv(a, pset1<Packet>(m_other)); }
+ typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
+};
+template<typename Scalar>
+struct functor_traits<scalar_quotient1_op<Scalar> >
+{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
+
+// nullary functors
+
+template<typename Scalar>
+struct scalar_constant_op {
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { }
+ template<typename Index>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; }
+ template<typename Index>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index, Index = 0) const { return internal::pset1<Packet>(m_other); }
+ const Scalar m_other;
+};
+template<typename Scalar>
+struct functor_traits<scalar_constant_op<Scalar> >
+// FIXME replace this packet test by a safe one
+{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::Vectorizable, IsRepeatable = true }; };
+
+template<typename Scalar> struct scalar_identity_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op)
+ template<typename Index>
+ EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const { return row==col ? Scalar(1) : Scalar(0); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_identity_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
+
+template <typename Scalar, bool RandomAccess> struct linspaced_op_impl;
+
+// linear access for packet ops:
+// 1) initialization
+// base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0])
+// 2) each step (where size is 1 for coeff access or PacketSize for packet access)
+// base += [size*step, ..., size*step]
+//
+// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp)
+// in order to avoid the padd() in operator() ?
+template <typename Scalar>
+struct linspaced_op_impl<Scalar,false>
+{
+ typedef typename packet_traits<Scalar>::type Packet;
+
+ linspaced_op_impl(const Scalar& low, const Scalar& step) :
+ m_low(low), m_step(step),
+ m_packetStep(pset1<Packet>(packet_traits<Scalar>::size*step)),
+ m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(step),plset<Scalar>(-packet_traits<Scalar>::size)))) {}
+
+ template<typename Index>
+ EIGEN_STRONG_INLINE const Scalar operator() (Index i) const
+ {
+ m_base = padd(m_base, pset1<Packet>(m_step));
+ return m_low+Scalar(i)*m_step;
+ }
+
+ template<typename Index>
+ EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); }
+
+ const Scalar m_low;
+ const Scalar m_step;
+ const Packet m_packetStep;
+ mutable Packet m_base;
+};
+
+// random access for packet ops:
+// 1) each step
+// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
+template <typename Scalar>
+struct linspaced_op_impl<Scalar,true>
+{
+ typedef typename packet_traits<Scalar>::type Packet;
+
+ linspaced_op_impl(const Scalar& low, const Scalar& step) :
+ m_low(low), m_step(step),
+ m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Scalar>(0)) {}
+
+ template<typename Index>
+ EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
+
+ template<typename Index>
+ EIGEN_STRONG_INLINE const Packet packetOp(Index i) const
+ { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1<Packet>(i),m_interPacket))); }
+
+ const Scalar m_low;
+ const Scalar m_step;
+ const Packet m_lowPacket;
+ const Packet m_stepPacket;
+ const Packet m_interPacket;
+};
+
+// ----- Linspace functor ----------------------------------------------------------------
+
+// Forward declaration (we default to random access which does not really give
+// us a speed gain when using packet access but it allows to use the functor in
+// nested expressions).
+template <typename Scalar, bool RandomAccess = true> struct linspaced_op;
+template <typename Scalar, bool RandomAccess> struct functor_traits< linspaced_op<Scalar,RandomAccess> >
+{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::HasSetLinear, IsRepeatable = true }; };
+template <typename Scalar, bool RandomAccess> struct linspaced_op
+{
+ typedef typename packet_traits<Scalar>::type Packet;
+ linspaced_op(const Scalar& low, const Scalar& high, DenseIndex num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/(num_steps-1))) {}
+
+ template<typename Index>
+ EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); }
+
+ // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since
+ // there row==0 and col is used for the actual iteration.
+ template<typename Index>
+ EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const
+ {
+ eigen_assert(col==0 || row==0);
+ return impl(col + row);
+ }
+
+ template<typename Index>
+ EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); }
+
+ // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since
+ // there row==0 and col is used for the actual iteration.
+ template<typename Index>
+ EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const
+ {
+ eigen_assert(col==0 || row==0);
+ return impl.packetOp(col + row);
+ }
+
+ // This proxy object handles the actual required temporaries, the different
+ // implementations (random vs. sequential access) as well as the
+ // correct piping to size 2/4 packet operations.
+ const linspaced_op_impl<Scalar,RandomAccess> impl;
+};
+
+// all functors allow linear access, except scalar_identity_op. So we fix here a quick meta
+// to indicate whether a functor allows linear access, just always answering 'yes' except for
+// scalar_identity_op.
+// FIXME move this to functor_traits adding a functor_default
+template<typename Functor> struct functor_has_linear_access { enum { ret = 1 }; };
+template<typename Scalar> struct functor_has_linear_access<scalar_identity_op<Scalar> > { enum { ret = 0 }; };
+
+// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication
+// where the mixing of different types is handled by scalar_product_traits
+// In particular, real * complex<real> is allowed.
+// FIXME move this to functor_traits adding a functor_default
+template<typename Functor> struct functor_is_product_like { enum { ret = 0 }; };
+template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
+template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_conj_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
+template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_quotient_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
+
+
+/** \internal
+ * \brief Template functor to add a scalar to a fixed other one
+ * \sa class CwiseUnaryOp, Array::operator+
+ */
+/* If you wonder why doing the pset1() in packetOp() is an optimization check scalar_multiple_op */
+template<typename Scalar>
+struct scalar_add_op {
+ typedef typename packet_traits<Scalar>::type Packet;
+ // FIXME default copy constructors seems bugged with std::complex<>
+ inline scalar_add_op(const scalar_add_op& other) : m_other(other.m_other) { }
+ inline scalar_add_op(const Scalar& other) : m_other(other) { }
+ inline Scalar operator() (const Scalar& a) const { return a + m_other; }
+ inline const Packet packetOp(const Packet& a) const
+ { return internal::padd(a, pset1<Packet>(m_other)); }
+ const Scalar m_other;
+};
+template<typename Scalar>
+struct functor_traits<scalar_add_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = packet_traits<Scalar>::HasAdd }; };
+
+/** \internal
+ * \brief Template functor to compute the square root of a scalar
+ * \sa class CwiseUnaryOp, Cwise::sqrt()
+ */
+template<typename Scalar> struct scalar_sqrt_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)
+ inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return sqrt(a); }
+ typedef typename packet_traits<Scalar>::type Packet;
+ inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_sqrt_op<Scalar> >
+{ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasSqrt
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the cosine of a scalar
+ * \sa class CwiseUnaryOp, ArrayBase::cos()
+ */
+template<typename Scalar> struct scalar_cos_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op)
+ inline Scalar operator() (const Scalar& a) const { using std::cos; return cos(a); }
+ typedef typename packet_traits<Scalar>::type Packet;
+ inline Packet packetOp(const Packet& a) const { return internal::pcos(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_cos_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasCos
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the sine of a scalar
+ * \sa class CwiseUnaryOp, ArrayBase::sin()
+ */
+template<typename Scalar> struct scalar_sin_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op)
+ inline const Scalar operator() (const Scalar& a) const { using std::sin; return sin(a); }
+ typedef typename packet_traits<Scalar>::type Packet;
+ inline Packet packetOp(const Packet& a) const { return internal::psin(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_sin_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasSin
+ };
+};
+
+
+/** \internal
+ * \brief Template functor to compute the tan of a scalar
+ * \sa class CwiseUnaryOp, ArrayBase::tan()
+ */
+template<typename Scalar> struct scalar_tan_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op)
+ inline const Scalar operator() (const Scalar& a) const { using std::tan; return tan(a); }
+ typedef typename packet_traits<Scalar>::type Packet;
+ inline Packet packetOp(const Packet& a) const { return internal::ptan(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_tan_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasTan
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the arc cosine of a scalar
+ * \sa class CwiseUnaryOp, ArrayBase::acos()
+ */
+template<typename Scalar> struct scalar_acos_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op)
+ inline const Scalar operator() (const Scalar& a) const { using std::acos; return acos(a); }
+ typedef typename packet_traits<Scalar>::type Packet;
+ inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_acos_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasACos
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the arc sine of a scalar
+ * \sa class CwiseUnaryOp, ArrayBase::asin()
+ */
+template<typename Scalar> struct scalar_asin_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
+ inline const Scalar operator() (const Scalar& a) const { using std::asin; return asin(a); }
+ typedef typename packet_traits<Scalar>::type Packet;
+ inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_asin_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasASin
+ };
+};
+
+/** \internal
+ * \brief Template functor to raise a scalar to a power
+ * \sa class CwiseUnaryOp, Cwise::pow
+ */
+template<typename Scalar>
+struct scalar_pow_op {
+ // FIXME default copy constructors seems bugged with std::complex<>
+ inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { }
+ inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {}
+ inline Scalar operator() (const Scalar& a) const { return numext::pow(a, m_exponent); }
+ const Scalar m_exponent;
+};
+template<typename Scalar>
+struct functor_traits<scalar_pow_op<Scalar> >
+{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
+
+/** \internal
+ * \brief Template functor to compute the quotient between a scalar and array entries.
+ * \sa class CwiseUnaryOp, Cwise::inverse()
+ */
+template<typename Scalar>
+struct scalar_inverse_mult_op {
+ scalar_inverse_mult_op(const Scalar& other) : m_other(other) {}
+ inline Scalar operator() (const Scalar& a) const { return m_other / a; }
+ template<typename Packet>
+ inline const Packet packetOp(const Packet& a) const
+ { return internal::pdiv(pset1<Packet>(m_other),a); }
+ Scalar m_other;
+};
+
+/** \internal
+ * \brief Template functor to compute the inverse of a scalar
+ * \sa class CwiseUnaryOp, Cwise::inverse()
+ */
+template<typename Scalar>
+struct scalar_inverse_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_inverse_op)
+ inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; }
+ template<typename Packet>
+ inline const Packet packetOp(const Packet& a) const
+ { return internal::pdiv(pset1<Packet>(Scalar(1)),a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_inverse_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
+
+/** \internal
+ * \brief Template functor to compute the square of a scalar
+ * \sa class CwiseUnaryOp, Cwise::square()
+ */
+template<typename Scalar>
+struct scalar_square_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op)
+ inline Scalar operator() (const Scalar& a) const { return a*a; }
+ template<typename Packet>
+ inline const Packet packetOp(const Packet& a) const
+ { return internal::pmul(a,a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_square_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
+
+/** \internal
+ * \brief Template functor to compute the cube of a scalar
+ * \sa class CwiseUnaryOp, Cwise::cube()
+ */
+template<typename Scalar>
+struct scalar_cube_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op)
+ inline Scalar operator() (const Scalar& a) const { return a*a*a; }
+ template<typename Packet>
+ inline const Packet packetOp(const Packet& a) const
+ { return internal::pmul(a,pmul(a,a)); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_cube_op<Scalar> >
+{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
+
+// default functor traits for STL functors:
+
+template<typename T>
+struct functor_traits<std::multiplies<T> >
+{ enum { Cost = NumTraits<T>::MulCost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::divides<T> >
+{ enum { Cost = NumTraits<T>::MulCost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::plus<T> >
+{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::minus<T> >
+{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::negate<T> >
+{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::logical_or<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::logical_and<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::logical_not<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::greater<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::less<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::greater_equal<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::less_equal<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::equal_to<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::not_equal_to<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::binder2nd<T> >
+{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::binder1st<T> >
+{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::unary_negate<T> >
+{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::binary_negate<T> >
+{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
+
+#ifdef EIGEN_STDEXT_SUPPORT
+
+template<typename T0,typename T1>
+struct functor_traits<std::project1st<T0,T1> >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+template<typename T0,typename T1>
+struct functor_traits<std::project2nd<T0,T1> >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+template<typename T0,typename T1>
+struct functor_traits<std::select2nd<std::pair<T0,T1> > >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+template<typename T0,typename T1>
+struct functor_traits<std::select1st<std::pair<T0,T1> > >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+template<typename T0,typename T1>
+struct functor_traits<std::unary_compose<T0,T1> >
+{ enum { Cost = functor_traits<T0>::Cost + functor_traits<T1>::Cost, PacketAccess = false }; };
+
+template<typename T0,typename T1,typename T2>
+struct functor_traits<std::binary_compose<T0,T1,T2> >
+{ enum { Cost = functor_traits<T0>::Cost + functor_traits<T1>::Cost + functor_traits<T2>::Cost, PacketAccess = false }; };
+
+#endif // EIGEN_STDEXT_SUPPORT
+
+// allow to add new functors and specializations of functor_traits from outside Eigen.
+// this macro is really needed because functor_traits must be specialized after it is declared but before it is used...
+#ifdef EIGEN_FUNCTORS_PLUGIN
+#include EIGEN_FUNCTORS_PLUGIN
+#endif
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_FUNCTORS_H
diff --git a/third_party/eigen3/Eigen/src/Core/Fuzzy.h b/third_party/eigen3/Eigen/src/Core/Fuzzy.h
new file mode 100644
index 0000000000..0ff1b96f56
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/Fuzzy.h
@@ -0,0 +1,155 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_FUZZY_H
+#define EIGEN_FUZZY_H
+
+namespace Eigen {
+
+namespace internal
+{
+
+template<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
+struct isApprox_selector
+{
+ EIGEN_DEVICE_FUNC
+ static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec)
+ {
+ typename internal::nested<Derived,2>::type nested(x);
+ typename internal::nested<OtherDerived,2>::type otherNested(y);
+ return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * numext::mini(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
+ }
+};
+
+template<typename Derived, typename OtherDerived>
+struct isApprox_selector<Derived, OtherDerived, true>
+{
+ EIGEN_DEVICE_FUNC
+ static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar&)
+ {
+ return x.matrix() == y.matrix();
+ }
+};
+
+template<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
+struct isMuchSmallerThan_object_selector
+{
+ EIGEN_DEVICE_FUNC
+ static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec)
+ {
+ return x.cwiseAbs2().sum() <= numext::abs2(prec) * y.cwiseAbs2().sum();
+ }
+};
+
+template<typename Derived, typename OtherDerived>
+struct isMuchSmallerThan_object_selector<Derived, OtherDerived, true>
+{
+ EIGEN_DEVICE_FUNC
+ static bool run(const Derived& x, const OtherDerived&, const typename Derived::RealScalar&)
+ {
+ return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();
+ }
+};
+
+template<typename Derived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
+struct isMuchSmallerThan_scalar_selector
+{
+ EIGEN_DEVICE_FUNC
+ static bool run(const Derived& x, const typename Derived::RealScalar& y, const typename Derived::RealScalar& prec)
+ {
+ return x.cwiseAbs2().sum() <= numext::abs2(prec * y);
+ }
+};
+
+template<typename Derived>
+struct isMuchSmallerThan_scalar_selector<Derived, true>
+{
+ EIGEN_DEVICE_FUNC
+ static bool run(const Derived& x, const typename Derived::RealScalar&, const typename Derived::RealScalar&)
+ {
+ return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();
+ }
+};
+
+} // end namespace internal
+
+
+/** \returns \c true if \c *this is approximately equal to \a other, within the precision
+ * determined by \a prec.
+ *
+ * \note The fuzzy compares are done multiplicatively. Two vectors \f$ v \f$ and \f$ w \f$
+ * are considered to be approximately equal within precision \f$ p \f$ if
+ * \f[ \Vert v - w \Vert \leqslant p\,\min(\Vert v\Vert, \Vert w\Vert). \f]
+ * For matrices, the comparison is done using the Hilbert-Schmidt norm (aka Frobenius norm
+ * L2 norm).
+ *
+ * \note Because of the multiplicativeness of this comparison, one can't use this function
+ * to check whether \c *this is approximately equal to the zero matrix or vector.
+ * Indeed, \c isApprox(zero) returns false unless \c *this itself is exactly the zero matrix
+ * or vector. If you want to test whether \c *this is zero, use internal::isMuchSmallerThan(const
+ * RealScalar&, RealScalar) instead.
+ *
+ * \sa internal::isMuchSmallerThan(const RealScalar&, RealScalar) const
+ */
+template<typename Derived>
+template<typename OtherDerived>
+bool DenseBase<Derived>::isApprox(
+ const DenseBase<OtherDerived>& other,
+ const RealScalar& prec
+) const
+{
+ return internal::isApprox_selector<Derived, OtherDerived>::run(derived(), other.derived(), prec);
+}
+
+/** \returns \c true if the norm of \c *this is much smaller than \a other,
+ * within the precision determined by \a prec.
+ *
+ * \note The fuzzy compares are done multiplicatively. A vector \f$ v \f$ is
+ * considered to be much smaller than \f$ x \f$ within precision \f$ p \f$ if
+ * \f[ \Vert v \Vert \leqslant p\,\vert x\vert. \f]
+ *
+ * For matrices, the comparison is done using the Hilbert-Schmidt norm. For this reason,
+ * the value of the reference scalar \a other should come from the Hilbert-Schmidt norm
+ * of a reference matrix of same dimensions.
+ *
+ * \sa isApprox(), isMuchSmallerThan(const DenseBase<OtherDerived>&, RealScalar) const
+ */
+template<typename Derived>
+bool DenseBase<Derived>::isMuchSmallerThan(
+ const typename NumTraits<Scalar>::Real& other,
+ const RealScalar& prec
+) const
+{
+ return internal::isMuchSmallerThan_scalar_selector<Derived>::run(derived(), other, prec);
+}
+
+/** \returns \c true if the norm of \c *this is much smaller than the norm of \a other,
+ * within the precision determined by \a prec.
+ *
+ * \note The fuzzy compares are done multiplicatively. A vector \f$ v \f$ is
+ * considered to be much smaller than a vector \f$ w \f$ within precision \f$ p \f$ if
+ * \f[ \Vert v \Vert \leqslant p\,\Vert w\Vert. \f]
+ * For matrices, the comparison is done using the Hilbert-Schmidt norm.
+ *
+ * \sa isApprox(), isMuchSmallerThan(const RealScalar&, RealScalar) const
+ */
+template<typename Derived>
+template<typename OtherDerived>
+bool DenseBase<Derived>::isMuchSmallerThan(
+ const DenseBase<OtherDerived>& other,
+ const RealScalar& prec
+) const
+{
+ return internal::isMuchSmallerThan_object_selector<Derived, OtherDerived>::run(derived(), other.derived(), prec);
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_FUZZY_H
diff --git a/third_party/eigen3/Eigen/src/Core/GeneralProduct.h b/third_party/eigen3/Eigen/src/Core/GeneralProduct.h
new file mode 100644
index 0000000000..d2618ba25b
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/GeneralProduct.h
@@ -0,0 +1,674 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_GENERAL_PRODUCT_H
+#define EIGEN_GENERAL_PRODUCT_H
+
+namespace Eigen {
+
+/** \class GeneralProduct
+ * \ingroup Core_Module
+ *
+ * \brief Expression of the product of two general matrices or vectors
+ *
+ * \param LhsNested the type used to store the left-hand side
+ * \param RhsNested the type used to store the right-hand side
+ * \param ProductMode the type of the product
+ *
+ * This class represents an expression of the product of two general matrices.
+ * We call a general matrix, a dense matrix with full storage. For instance,
+ * This excludes triangular, selfadjoint, and sparse matrices.
+ * It is the return type of the operator* between general matrices. Its template
+ * arguments are determined automatically by ProductReturnType. Therefore,
+ * GeneralProduct should never be used direclty. To determine the result type of a
+ * function which involves a matrix product, use ProductReturnType::Type.
+ *
+ * \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
+ */
+template<typename Lhs, typename Rhs, int ProductType = internal::product_type<Lhs,Rhs>::value>
+class GeneralProduct;
+
+enum {
+ Large = 2,
+ Small = 3
+};
+
+namespace internal {
+
+template<int Rows, int Cols, int Depth> struct product_type_selector;
+
+template<int Size, int MaxSize> struct product_size_category
+{
+ enum { is_large = MaxSize == Dynamic ||
+ Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD,
+ value = is_large ? Large
+ : Size == 1 ? 1
+ : Small
+ };
+};
+
+template<typename Lhs, typename Rhs> struct product_type
+{
+ typedef typename remove_all<Lhs>::type _Lhs;
+ typedef typename remove_all<Rhs>::type _Rhs;
+ enum {
+ MaxRows = _Lhs::MaxRowsAtCompileTime,
+ Rows = _Lhs::RowsAtCompileTime,
+ MaxCols = _Rhs::MaxColsAtCompileTime,
+ Cols = _Rhs::ColsAtCompileTime,
+ MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,
+ _Rhs::MaxRowsAtCompileTime),
+ Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime,
+ _Rhs::RowsAtCompileTime)
+ };
+
+ // the splitting into different lines of code here, introducing the _select enums and the typedef below,
+ // is to work around an internal compiler error with gcc 4.1 and 4.2.
+private:
+ enum {
+ rows_select = product_size_category<Rows,MaxRows>::value,
+ cols_select = product_size_category<Cols,MaxCols>::value,
+ depth_select = product_size_category<Depth,MaxDepth>::value
+ };
+ typedef product_type_selector<rows_select, cols_select, depth_select> selector;
+
+public:
+ enum {
+ value = selector::ret
+ };
+#ifdef EIGEN_DEBUG_PRODUCT
+ static void debug()
+ {
+ EIGEN_DEBUG_VAR(Rows);
+ EIGEN_DEBUG_VAR(Cols);
+ EIGEN_DEBUG_VAR(Depth);
+ EIGEN_DEBUG_VAR(rows_select);
+ EIGEN_DEBUG_VAR(cols_select);
+ EIGEN_DEBUG_VAR(depth_select);
+ EIGEN_DEBUG_VAR(value);
+ }
+#endif
+};
+
+
+/* The following allows to select the kind of product at compile time
+ * based on the three dimensions of the product.
+ * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
+// FIXME I'm not sure the current mapping is the ideal one.
+template<int M, int N> struct product_type_selector<M,N,1> { enum { ret = OuterProduct }; };
+template<int Depth> struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; };
+template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; };
+template<> struct product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<Small,Small,Small> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<Small, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
+template<> struct product_type_selector<Small, Large, 1> { enum { ret = LazyCoeffBasedProductMode }; };
+template<> struct product_type_selector<Large, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
+template<> struct product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; };
+template<> struct product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<Large,1, Small> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<Large,1, Large> { enum { ret = GemvProduct }; };
+template<> struct product_type_selector<Small,1, Large> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<Small,Small,Large> { enum { ret = GemmProduct }; };
+template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
+template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
+template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; };
+template<> struct product_type_selector<Large,Small,Small> { enum { ret = GemmProduct }; };
+template<> struct product_type_selector<Small,Large,Small> { enum { ret = GemmProduct }; };
+template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
+
+} // end namespace internal
+
+/** \class ProductReturnType
+ * \ingroup Core_Module
+ *
+ * \brief Helper class to get the correct and optimized returned type of operator*
+ *
+ * \param Lhs the type of the left-hand side
+ * \param Rhs the type of the right-hand side
+ * \param ProductMode the type of the product (determined automatically by internal::product_mode)
+ *
+ * This class defines the typename Type representing the optimized product expression
+ * between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type
+ * is the recommended way to define the result type of a function returning an expression
+ * which involve a matrix product. The class Product should never be
+ * used directly.
+ *
+ * \sa class Product, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
+ */
+template<typename Lhs, typename Rhs, int ProductType>
+struct ProductReturnType
+{
+ // TODO use the nested type to reduce instanciations ????
+// typedef typename internal::nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
+// typedef typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
+
+ typedef GeneralProduct<Lhs/*Nested*/, Rhs/*Nested*/, ProductType> Type;
+};
+
+template<typename Lhs, typename Rhs>
+struct ProductReturnType<Lhs,Rhs,CoeffBasedProductMode>
+{
+ typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
+ typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
+ typedef CoeffBasedProduct<LhsNested, RhsNested, EvalBeforeAssigningBit | EvalBeforeNestingBit> Type;
+};
+
+template<typename Lhs, typename Rhs>
+struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
+{
+ typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
+ typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
+ typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type;
+};
+
+// this is a workaround for sun CC
+template<typename Lhs, typename Rhs>
+struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
+{};
+
+/***********************************************************************
+* Implementation of Inner Vector Vector Product
+***********************************************************************/
+
+// FIXME : maybe the "inner product" could return a Scalar
+// instead of a 1x1 matrix ??
+// Pro: more natural for the user
+// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix
+// product ends up to a row-vector times col-vector product... To tackle this use
+// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
+
+namespace internal {
+
+template<typename Lhs, typename Rhs>
+struct traits<GeneralProduct<Lhs,Rhs,InnerProduct> >
+ : traits<Matrix<typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> >
+{};
+
+}
+
+template<typename Lhs, typename Rhs>
+class GeneralProduct<Lhs, Rhs, InnerProduct>
+ : internal::no_assignment_operator,
+ public Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1>
+{
+ typedef Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> Base;
+ public:
+ GeneralProduct(const Lhs& lhs, const Rhs& rhs)
+ {
+ EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
+ YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+
+ Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
+ }
+
+ /** Convertion to scalar */
+ operator const typename Base::Scalar() const {
+ return Base::coeff(0,0);
+ }
+};
+
+/***********************************************************************
+* Implementation of Outer Vector Vector Product
+***********************************************************************/
+
+namespace internal {
+
+// Column major
+template<typename ProductType, typename Dest, typename Func>
+EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const false_type&)
+{
+ typedef typename Dest::Index Index;
+ // FIXME make sure lhs is sequentially stored
+ // FIXME not very good if rhs is real and lhs complex while alpha is real too
+ const Index cols = dest.cols();
+ for (Index j=0; j<cols; ++j)
+ func(dest.col(j), prod.rhs().coeff(j) * prod.lhs());
+}
+
+// Row major
+template<typename ProductType, typename Dest, typename Func>
+EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const true_type&) {
+ typedef typename Dest::Index Index;
+ // FIXME make sure rhs is sequentially stored
+ // FIXME not very good if lhs is real and rhs complex while alpha is real too
+ const Index rows = dest.rows();
+ for (Index i=0; i<rows; ++i)
+ func(dest.row(i), prod.lhs().coeff(i) * prod.rhs());
+}
+
+template<typename Lhs, typename Rhs>
+struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
+ : traits<ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> >
+{};
+
+}
+
+template<typename Lhs, typename Rhs>
+class GeneralProduct<Lhs, Rhs, OuterProduct>
+ : public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
+{
+ template<typename T> struct IsRowMajor : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
+
+ public:
+ EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
+
+ GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
+ {
+ EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
+ YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+ }
+
+ struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
+ struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
+ struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
+ struct adds {
+ Scalar m_scale;
+ adds(const Scalar& s) : m_scale(s) {}
+ template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
+ dst.const_cast_derived() += m_scale * src;
+ }
+ };
+
+ template<typename Dest>
+ inline void evalTo(Dest& dest) const {
+ internal::outer_product_selector_run(*this, dest, set(), IsRowMajor<Dest>());
+ }
+
+ template<typename Dest>
+ inline void addTo(Dest& dest) const {
+ internal::outer_product_selector_run(*this, dest, add(), IsRowMajor<Dest>());
+ }
+
+ template<typename Dest>
+ inline void subTo(Dest& dest) const {
+ internal::outer_product_selector_run(*this, dest, sub(), IsRowMajor<Dest>());
+ }
+
+ template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
+ {
+ internal::outer_product_selector_run(*this, dest, adds(alpha), IsRowMajor<Dest>());
+ }
+};
+
+/***********************************************************************
+* Implementation of General Matrix Vector Product
+***********************************************************************/
+
+/* According to the shape/flags of the matrix we have to distinghish 3 different cases:
+ * 1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine
+ * 2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine
+ * 3 - all other cases are handled using a simple loop along the outer-storage direction.
+ * Therefore we need a lower level meta selector.
+ * Furthermore, if the matrix is the rhs, then the product has to be transposed.
+ */
+namespace internal {
+
+template<typename Lhs, typename Rhs>
+struct traits<GeneralProduct<Lhs,Rhs,GemvProduct> >
+ : traits<ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> >
+{};
+
+template<int Side, int StorageOrder, bool BlasCompatible>
+struct gemv_selector;
+
+} // end namespace internal
+
+template<typename Lhs, typename Rhs>
+class GeneralProduct<Lhs, Rhs, GemvProduct>
+ : public ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs>
+{
+ public:
+ EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
+
+ typedef typename Lhs::Scalar LhsScalar;
+ typedef typename Rhs::Scalar RhsScalar;
+
+ GeneralProduct(const Lhs& a_lhs, const Rhs& a_rhs) : Base(a_lhs,a_rhs)
+ {
+// EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::Scalar, typename Rhs::Scalar>::value),
+// YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+ }
+
+ enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
+ typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType;
+
+ template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
+ {
+ eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
+ internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
+ bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
+ }
+};
+
+namespace internal {
+
+// The vector is on the left => transposition
+template<int StorageOrder, bool BlasCompatible>
+struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
+{
+ template<typename ProductType, typename Dest>
+ static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
+ {
+ Transpose<Dest> destT(dest);
+ enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
+ gemv_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
+ ::run(GeneralProduct<Transpose<const typename ProductType::_RhsNested>,Transpose<const typename ProductType::_LhsNested>, GemvProduct>
+ (prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha);
+ }
+};
+
+template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
+
+template<typename Scalar,int Size,int MaxSize>
+struct gemv_static_vector_if<Scalar,Size,MaxSize,false>
+{
+ EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
+};
+
+template<typename Scalar,int Size>
+struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
+{
+ EIGEN_STRONG_INLINE Scalar* data() { return 0; }
+};
+
+template<typename Scalar,int Size,int MaxSize>
+struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
+{
+ #if EIGEN_ALIGN_STATICALLY
+ internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
+ EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
+ #else
+ // Some architectures cannot align on the stack,
+ // => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
+ enum {
+ ForceAlignment = internal::packet_traits<Scalar>::Vectorizable,
+ PacketSize = internal::packet_traits<Scalar>::size
+ };
+ internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
+ EIGEN_STRONG_INLINE Scalar* data() {
+ return ForceAlignment
+ ? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES)
+ : m_data.array;
+ }
+ #endif
+};
+
+template<> struct gemv_selector<OnTheRight,ColMajor,true>
+{
+ template<typename ProductType, typename Dest>
+ static inline void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
+ {
+ typedef typename ProductType::Index Index;
+ typedef typename ProductType::LhsScalar LhsScalar;
+ typedef typename ProductType::RhsScalar RhsScalar;
+ typedef typename ProductType::Scalar ResScalar;
+ typedef typename ProductType::RealScalar RealScalar;
+ typedef typename ProductType::ActualLhsType ActualLhsType;
+ typedef typename ProductType::ActualRhsType ActualRhsType;
+ typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
+ typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
+ typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
+
+ ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
+ ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
+
+ ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
+ * RhsBlasTraits::extractScalarFactor(prod.rhs());
+
+ enum {
+ // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
+ // on, the other hand it is good for the cache to pack the vector anyways...
+ EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
+ ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
+ MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
+ };
+
+ gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
+
+ bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
+ bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
+
+ RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
+
+ ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
+ evalToDest ? dest.data() : static_dest.data());
+
+ if(!evalToDest)
+ {
+ #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+ int size = dest.size();
+ EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+ #endif
+ if(!alphaIsCompatible)
+ {
+ MappedDest(actualDestPtr, dest.size()).setZero();
+ compatibleAlpha = RhsScalar(1);
+ }
+ else
+ MappedDest(actualDestPtr, dest.size()) = dest;
+ }
+
+ typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
+ typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
+ general_matrix_vector_product
+ <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
+ actualLhs.rows(), actualLhs.cols(),
+ LhsMapper(actualLhs.data(), actualLhs.outerStride()),
+ RhsMapper(actualRhs.data(), actualRhs.innerStride()),
+ actualDestPtr, 1,
+ compatibleAlpha);
+
+ if (!evalToDest)
+ {
+ if(!alphaIsCompatible)
+ dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
+ else
+ dest = MappedDest(actualDestPtr, dest.size());
+ }
+ }
+};
+
+template<> struct gemv_selector<OnTheRight,RowMajor,true>
+{
+ template<typename ProductType, typename Dest>
+ static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
+ {
+ typedef typename ProductType::LhsScalar LhsScalar;
+ typedef typename ProductType::RhsScalar RhsScalar;
+ typedef typename ProductType::Scalar ResScalar;
+ typedef typename ProductType::Index Index;
+ typedef typename ProductType::ActualLhsType ActualLhsType;
+ typedef typename ProductType::ActualRhsType ActualRhsType;
+ typedef typename ProductType::_ActualRhsType _ActualRhsType;
+ typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
+ typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
+
+ typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+ typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+
+ ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
+ * RhsBlasTraits::extractScalarFactor(prod.rhs());
+
+ enum {
+ // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
+ // on, the other hand it is good for the cache to pack the vector anyways...
+ DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
+ };
+
+ gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
+
+ ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
+ DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
+
+ if(!DirectlyUseRhs)
+ {
+ #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+ int size = actualRhs.size();
+ EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+ #endif
+ Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+ }
+
+ typedef const_blas_data_mapper<LhsScalar,Index,RowMajor> LhsMapper;
+ typedef const_blas_data_mapper<RhsScalar,Index,ColMajor> RhsMapper;
+ general_matrix_vector_product
+ <Index,LhsScalar,LhsMapper,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
+ actualLhs.rows(), actualLhs.cols(),
+ LhsMapper(actualLhs.data(), actualLhs.outerStride()),
+ RhsMapper(actualRhsPtr, 1),
+ dest.data(), dest.innerStride(),
+ actualAlpha);
+ }
+};
+
+template<> struct gemv_selector<OnTheRight,ColMajor,false>
+{
+ template<typename ProductType, typename Dest>
+ static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
+ {
+ typedef typename Dest::Index Index;
+ // TODO makes sure dest is sequentially stored in memory, otherwise use a temp
+ const Index size = prod.rhs().rows();
+ for(Index k=0; k<size; ++k)
+ dest += (alpha*prod.rhs().coeff(k)) * prod.lhs().col(k);
+ }
+};
+
+template<> struct gemv_selector<OnTheRight,RowMajor,false>
+{
+ template<typename ProductType, typename Dest>
+ static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
+ {
+ typedef typename Dest::Index Index;
+ // TODO makes sure rhs is sequentially stored in memory, otherwise use a temp
+ const Index rows = prod.rows();
+ for(Index i=0; i<rows; ++i)
+ dest.coeffRef(i) += alpha * (prod.lhs().row(i).cwiseProduct(prod.rhs().transpose())).sum();
+ }
+};
+
+} // end namespace internal
+
+/***************************************************************************
+* Implementation of matrix base methods
+***************************************************************************/
+
+/** \returns the matrix product of \c *this and \a other.
+ *
+ * \note If instead of the matrix product you want the coefficient-wise product, see Cwise::operator*().
+ *
+ * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
+ */
+#ifndef __CUDACC__
+
+#ifdef EIGEN_TEST_EVALUATORS
+template<typename Derived>
+template<typename OtherDerived>
+inline const Product<Derived, OtherDerived>
+MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
+{
+ // A note regarding the function declaration: In MSVC, this function will sometimes
+ // not be inlined since DenseStorage is an unwindable object for dynamic
+ // matrices and product types are holding a member to store the result.
+ // Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
+ enum {
+ ProductIsValid = Derived::ColsAtCompileTime==Dynamic
+ || OtherDerived::RowsAtCompileTime==Dynamic
+ || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
+ AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
+ SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
+ };
+ // note to the lost user:
+ // * for a dot product use: v1.dot(v2)
+ // * for a coeff-wise product use: v1.cwiseProduct(v2)
+ EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
+ INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
+ EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
+ INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
+ EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
+#ifdef EIGEN_DEBUG_PRODUCT
+ internal::product_type<Derived,OtherDerived>::debug();
+#endif
+
+ return Product<Derived, OtherDerived>(derived(), other.derived());
+}
+#else
+template<typename Derived>
+template<typename OtherDerived>
+inline const typename ProductReturnType<Derived, OtherDerived>::Type
+MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
+{
+ // A note regarding the function declaration: In MSVC, this function will sometimes
+ // not be inlined since DenseStorage is an unwindable object for dynamic
+ // matrices and product types are holding a member to store the result.
+ // Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
+ enum {
+ ProductIsValid = Derived::ColsAtCompileTime==Dynamic
+ || OtherDerived::RowsAtCompileTime==Dynamic
+ || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
+ AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
+ SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
+ };
+ // note to the lost user:
+ // * for a dot product use: v1.dot(v2)
+ // * for a coeff-wise product use: v1.cwiseProduct(v2)
+ EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
+ INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
+ EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
+ INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
+ EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
+#ifdef EIGEN_DEBUG_PRODUCT
+ internal::product_type<Derived,OtherDerived>::debug();
+#endif
+ return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
+}
+#endif
+
+#endif
+/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
+ *
+ * The returned product will behave like any other expressions: the coefficients of the product will be
+ * computed once at a time as requested. This might be useful in some extremely rare cases when only
+ * a small and no coherent fraction of the result's coefficients have to be computed.
+ *
+ * \warning This version of the matrix product can be much much slower. So use it only if you know
+ * what you are doing and that you measured a true speed improvement.
+ *
+ * \sa operator*(const MatrixBase&)
+ */
+template<typename Derived>
+template<typename OtherDerived>
+const typename LazyProductReturnType<Derived,OtherDerived>::Type
+MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
+{
+ enum {
+ ProductIsValid = Derived::ColsAtCompileTime==Dynamic
+ || OtherDerived::RowsAtCompileTime==Dynamic
+ || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
+ AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
+ SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
+ };
+ // note to the lost user:
+ // * for a dot product use: v1.dot(v2)
+ // * for a coeff-wise product use: v1.cwiseProduct(v2)
+ EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
+ INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
+ EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
+ INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
+ EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
+
+ return typename LazyProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_PRODUCT_H
diff --git a/third_party/eigen3/Eigen/src/Core/GenericPacketMath.h b/third_party/eigen3/Eigen/src/Core/GenericPacketMath.h
new file mode 100644
index 0000000000..bf9d6f9c33
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/GenericPacketMath.h
@@ -0,0 +1,584 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_GENERIC_PACKET_MATH_H
+#define EIGEN_GENERIC_PACKET_MATH_H
+
+namespace Eigen {
+
+namespace internal {
+
+/** \internal
+ * \file GenericPacketMath.h
+ *
+ * Default implementation for types not supported by the vectorization.
+ * In practice these functions are provided to make easier the writing
+ * of generic vectorized code.
+ */
+
+#ifndef EIGEN_DEBUG_ALIGNED_LOAD
+#define EIGEN_DEBUG_ALIGNED_LOAD
+#endif
+
+#ifndef EIGEN_DEBUG_UNALIGNED_LOAD
+#define EIGEN_DEBUG_UNALIGNED_LOAD
+#endif
+
+#ifndef EIGEN_DEBUG_ALIGNED_STORE
+#define EIGEN_DEBUG_ALIGNED_STORE
+#endif
+
+#ifndef EIGEN_DEBUG_UNALIGNED_STORE
+#define EIGEN_DEBUG_UNALIGNED_STORE
+#endif
+
+struct default_packet_traits
+{
+ enum {
+ HasHalfPacket = 0,
+
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasNegate = 1,
+ HasAbs = 1,
+ HasAbs2 = 1,
+ HasMin = 1,
+ HasMax = 1,
+ HasConj = 1,
+ HasSetLinear = 1,
+ HasBlend = 0,
+
+ HasDiv = 0,
+ HasSqrt = 0,
+ HasRsqrt = 0,
+ HasExp = 0,
+ HasLog = 0,
+ HasPow = 0,
+
+ HasSin = 0,
+ HasCos = 0,
+ HasTan = 0,
+ HasASin = 0,
+ HasACos = 0,
+ HasATan = 0,
+ HasTanH = 0
+ };
+};
+
+template<typename T> struct packet_traits : default_packet_traits
+{
+ typedef T type;
+ typedef T half;
+ enum {
+ Vectorizable = 0,
+ size = 1,
+ AlignedOnScalar = 0,
+ HasHalfPacket = 0
+ };
+ enum {
+ HasAdd = 0,
+ HasSub = 0,
+ HasMul = 0,
+ HasNegate = 0,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasConj = 0,
+ HasSetLinear = 0
+ };
+};
+
+template<typename T> struct packet_traits<const T> : packet_traits<T> { };
+
+
+template <typename Src, typename Tgt> struct type_casting_traits {
+ enum {
+ VectorizedCast = 0,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 1
+ };
+};
+
+template <typename T> struct type_casting_traits<T, T> {
+ enum {
+ VectorizedCast = 1,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 1
+ };
+};
+
+
+/** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
+template <typename SrcPacket, typename TgtPacket>
+EIGEN_DEVICE_FUNC inline TgtPacket
+pcast(const SrcPacket& a) {
+ return static_cast<TgtPacket>(a);
+}
+template <typename SrcPacket, typename TgtPacket>
+EIGEN_DEVICE_FUNC inline TgtPacket
+pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
+ return static_cast<TgtPacket>(a);
+}
+
+template <typename SrcPacket, typename TgtPacket>
+EIGEN_DEVICE_FUNC inline TgtPacket
+pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) {
+ return static_cast<TgtPacket>(a);
+}
+
+/** \internal \returns a + b (coeff-wise) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+padd(const Packet& a,
+ const Packet& b) { return a+b; }
+
+/** \internal \returns a - b (coeff-wise) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+psub(const Packet& a,
+ const Packet& b) { return a-b; }
+
+/** \internal \returns true for if a == b */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+peq(const Packet& a, const Packet& b) { return a == b; }
+
+/** \internal \returns true for if a < b */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+plt(const Packet& a, const Packet& b) { return a < b; }
+
+/** \internal \returns true for if a <= b */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+ple(const Packet& a, const Packet& b) { return a <= b; }
+
+/** \internal \returns b if false_mask is set, else a */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pselect(const Packet& a,
+ const Packet& b,
+ const Packet& false_mask) {
+ return false_mask ? b : a;
+}
+
+/** \internal \returns -a (coeff-wise) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pnegate(const Packet& a) { return -a; }
+
+/** \internal \returns conj(a) (coeff-wise) */
+
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pconj(const Packet& a) { return numext::conj(a); }
+
+/** \internal \returns a * b (coeff-wise) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pmul(const Packet& a,
+ const Packet& b) { return a*b; }
+
+/** \internal \returns a / b (coeff-wise) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pdiv(const Packet& a,
+ const Packet& b) { return a/b; }
+
+/** \internal \returns the min of \a a and \a b (coeff-wise) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pmin(const Packet& a,
+ const Packet& b) { return numext::mini(a, b); }
+
+/** \internal \returns the max of \a a and \a b (coeff-wise) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pmax(const Packet& a,
+ const Packet& b) { return numext::maxi(a, b); }
+
+/** \internal \returns the absolute value of \a a */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pabs(const Packet& a) { using std::abs; return abs(a); }
+
+/** \internal \returns the bitwise and of \a a and \a b */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pand(const Packet& a, const Packet& b) { return a & b; }
+
+/** \internal \returns the bitwise or of \a a and \a b */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+por(const Packet& a, const Packet& b) { return a | b; }
+
+/** \internal \returns the bitwise xor of \a a and \a b */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pxor(const Packet& a, const Packet& b) { return a ^ b; }
+
+/** \internal \returns the bitwise andnot of \a a and \a b */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pandnot(const Packet& a, const Packet& b) { return a & (!b); }
+
+/** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
+
+/** \internal \returns a packet version of \a *from, (un-aligned load) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
+
+/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
+
+/** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(*a); }
+
+/** \internal \returns a packet with elements of \a *from duplicated.
+ * For instance, for a packet of 8 elements, 4 scalars will be read from \a *from and
+ * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}
+ * Currently, this function is only used for scalar * complex products.
+ */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
+
+/** \internal \returns a packet with elements of \a *from quadrupled.
+ * For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and
+ * replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]}
+ * Currently, this function is only used in matrix products.
+ * For packet-size smaller or equal to 4, this function is equivalent to pload1
+ */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+ploadquad(const typename unpacket_traits<Packet>::type* from)
+{ return pload1<Packet>(from); }
+
+/** \internal equivalent to
+ * \code
+ * a0 = pload1(a+0);
+ * a1 = pload1(a+1);
+ * a2 = pload1(a+2);
+ * a3 = pload1(a+3);
+ * \endcode
+ * \sa pset1, pload1, ploaddup, pbroadcast2
+ */
+template<typename Packet> EIGEN_DEVICE_FUNC
+inline void pbroadcast4(const typename unpacket_traits<Packet>::type *a,
+ Packet& a0, Packet& a1, Packet& a2, Packet& a3)
+{
+ a0 = pload1<Packet>(a+0);
+ a1 = pload1<Packet>(a+1);
+ a2 = pload1<Packet>(a+2);
+ a3 = pload1<Packet>(a+3);
+}
+
+/** \internal equivalent to
+ * \code
+ * a0 = pload1(a+0);
+ * a1 = pload1(a+1);
+ * \endcode
+ * \sa pset1, pload1, ploaddup, pbroadcast4
+ */
+template<typename Packet> EIGEN_DEVICE_FUNC
+inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
+ Packet& a0, Packet& a1)
+{
+ a0 = pload1<Packet>(a+0);
+ a1 = pload1<Packet>(a+1);
+}
+
+/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
+template<typename Scalar> inline typename packet_traits<Scalar>::type
+plset(const Scalar& a) { return a; }
+
+/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
+template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
+{ (*to) = from; }
+
+/** \internal copy the packet \a from to \a *to, (un-aligned store) */
+template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
+{ (*to) = from; }
+
+ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, int /*stride*/)
+ { return ploadu<Packet>(from); }
+
+ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, int /*stride*/)
+ { pstore(to, from); }
+
+/** \internal tries to do cache prefetching of \a addr */
+template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
+{
+#ifdef __CUDA_ARCH__
+#if defined(__LP64__)
+ // 64-bit pointer operand constraint for inlined asm
+ asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
+#else
+ // 32-bit pointer operand constraint for inlined asm
+ asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
+#endif
+#elif !defined(_MSC_VER)
+ __builtin_prefetch(addr);
+#endif
+}
+
+/** \internal \returns the first element of a packet */
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
+{ return a; }
+
+/** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+preduxp(const Packet* vecs) { return vecs[0]; }
+
+/** \internal \returns the sum of the elements of \a a*/
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a)
+{ return a; }
+
+/** \internal \returns the sum of the elements of \a a by block of 4 elements.
+ * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
+ * For packet-size smaller or equal to 4, this boils down to a noop.
+ */
+template<typename Packet> EIGEN_DEVICE_FUNC inline
+typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
+predux4(const Packet& a)
+{ return a; }
+
+/** \internal \returns the product of the elements of \a a*/
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
+{ return a; }
+
+/** \internal \returns the min of the elements of \a a*/
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
+{ return a; }
+
+/** \internal \returns the max of the elements of \a a*/
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
+{ return a; }
+
+/** \internal \returns the reversed elements of \a a*/
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
+{ return a; }
+
+template<size_t offset, typename Packet>
+struct protate_impl
+{
+ // Empty so attempts to use this unimplemented path will fail to compile.
+ // Only specializations of this template should be used.
+};
+
+/** \internal \returns a packet with the coefficients rotated to the right in little-endian convention,
+ * by the given offset, e.g. for offset == 1:
+ * (packet[3], packet[2], packet[1], packet[0]) becomes (packet[0], packet[3], packet[2], packet[1])
+ */
+template<size_t offset, typename Packet> EIGEN_DEVICE_FUNC inline Packet protate(const Packet& a)
+{
+ return offset ? protate_impl<offset, Packet>::run(a) : a;
+}
+
+/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
+{
+ // FIXME: uncomment the following in case we drop the internal imag and real functions.
+// using std::imag;
+// using std::real;
+ return Packet(imag(a),real(a));
+}
+
+/**************************
+* Special math functions
+***************************/
+
+/** \internal \returns the sine of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet psin(const Packet& a) { using std::sin; return sin(a); }
+
+/** \internal \returns the cosine of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pcos(const Packet& a) { using std::cos; return cos(a); }
+
+/** \internal \returns the tan of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet ptan(const Packet& a) { using std::tan; return tan(a); }
+
+/** \internal \returns the arc sine of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pasin(const Packet& a) { using std::asin; return asin(a); }
+
+/** \internal \returns the arc cosine of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pacos(const Packet& a) { using std::acos; return acos(a); }
+
+/** \internal \returns the atan of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet patan(const Packet& a) { using std::atan; return atan(a); }
+
+/** \internal \returns the exp of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pexp(const Packet& a) { using std::exp; return exp(a); }
+
+/** \internal \returns the log of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet plog(const Packet& a) { using std::log; return log(a); }
+
+/** \internal \returns the square-root of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }
+
+/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet prsqrt(const Packet& a) {
+ using std::sqrt;
+ const Packet one(1);
+ return one/sqrt(a);
+}
+
+// Default ptanh approximation threshold, assumes single precision
+// floating point.
+template<typename Packet> Packet ptanh_approx_threshold() {
+ return pset1<Packet>(0.01);
+}
+
+/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet ptanh(const Packet& x)
+{
+ const Packet one = pset1<Packet>(1);
+ const Packet two = pset1<Packet>(2);
+ const Packet three = pset1<Packet>(3);
+ const Packet thresh = ptanh_approx_threshold<Packet>();
+ const Packet x2 = pmul(x, x);
+ const Packet small_approx = pmul(x, psub(one, pdiv(x2, three)));
+ const Packet med_approx = psub(one, pdiv(two, padd(pexp(pmul(two, x)), one)));
+
+ // If |x| > thresh, tanh(x) = 1-2/(exp(2*x) + 1)
+ // tanh(x) can be written: x(1 - x^2/3 + ...) for |x| < pi/2
+ // Select a thresh s.t. |tanh(x) - x| = O(eps), where for floats,
+ // If |x| < thresh, tanh(x) = x*(1-x^2/3)
+ // Use theresh = 0.01 as this matches the float32 approximation
+ // threshold on my system!
+ return pselect(med_approx, small_approx, ple(pabs(x), thresh));
+}
+
+/***************************************************************************
+* The following functions might not have to be overwritten for vectorized types
+***************************************************************************/
+
+/** \internal copy a packet with constant coeficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
+// NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
+template<typename Packet>
+inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
+{
+ pstore(to, pset1<Packet>(a));
+}
+
+/** \internal \returns a * b + c (coeff-wise) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pmadd(const Packet& a,
+ const Packet& b,
+ const Packet& c)
+{ return padd(pmul(a, b),c); }
+
+/** \internal \returns a packet version of \a *from.
+ * If LoadMode equals #Aligned, \a from must be 16 bytes aligned */
+template<typename Packet, int LoadMode>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
+{
+ if(LoadMode == Aligned)
+ return pload<Packet>(from);
+ else
+ return ploadu<Packet>(from);
+}
+
+/** \internal copy the packet \a from to \a *to.
+ * If StoreMode equals #Aligned, \a to must be 16 bytes aligned */
+template<typename Scalar, typename Packet, int LoadMode>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
+{
+ if(LoadMode == Aligned)
+ pstore(to, from);
+ else
+ pstoreu(to, from);
+}
+
+/** \internal \returns a packet version of \a *from.
+ * Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the
+ * hardware if available to speedup the loading of data that won't be modified
+ * by the current computation.
+ */
+template<typename Packet, int LoadMode>
+inline Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
+{
+ return ploadt<Packet, LoadMode>(from);
+}
+
+/** \internal default implementation of palign() allowing partial specialization */
+template<int Offset,typename PacketType>
+struct palign_impl
+{
+ // by default data are aligned, so there is nothing to be done :)
+ static inline void run(PacketType&, const PacketType&) {}
+};
+
+/** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements
+ * of \a first and \a Offset first elements of \a second.
+ *
+ * This function is currently only used to optimize matrix-vector products on unligned matrices.
+ * It takes 2 packets that represent a contiguous memory array, and returns a packet starting
+ * at the position \a Offset. For instance, for packets of 4 elements, we have:
+ * Input:
+ * - first = {f0,f1,f2,f3}
+ * - second = {s0,s1,s2,s3}
+ * Output:
+ * - if Offset==0 then {f0,f1,f2,f3}
+ * - if Offset==1 then {f1,f2,f3,s0}
+ * - if Offset==2 then {f2,f3,s0,s1}
+ * - if Offset==3 then {f3,s0,s1,s3}
+ */
+template<int Offset,typename PacketType>
+inline void palign(PacketType& first, const PacketType& second)
+{
+ palign_impl<Offset,PacketType>::run(first,second);
+}
+
+/***************************************************************************
+* Fast complex products (GCC generates a function call which is very slow)
+***************************************************************************/
+
+// Eigen+CUDA does not support complexes.
+#ifndef __CUDACC__
+
+template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
+{ return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
+
+template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
+{ return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
+
+#endif
+
+
+/***************************************************************************
+ * PacketBlock, that is a collection of N packets where the number of words
+ * in the packet is a multiple of N.
+***************************************************************************/
+template <typename Packet,int N=unpacket_traits<Packet>::size> struct PacketBlock {
+ Packet packet[N];
+};
+
+template<typename SquarePacketBlock> EIGEN_DEVICE_FUNC inline void
+ptranspose(SquarePacketBlock& /*kernel*/) {
+ // Nothing to do in the scalar case, i.e. a 1x1 matrix.
+}
+
+
+/***************************************************************************
+ * Selector, i.e. vector of N boolean values used to select (i.e. blend)
+ * words from 2 packets.
+***************************************************************************/
+template <size_t N> struct Selector {
+ bool select[N];
+};
+
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {
+ return ifPacket.select[0] ? thenPacket : elsePacket;
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_GENERIC_PACKET_MATH_H
diff --git a/third_party/eigen3/Eigen/src/Core/GlobalFunctions.h b/third_party/eigen3/Eigen/src/Core/GlobalFunctions.h
new file mode 100644
index 0000000000..0b1ce46ba2
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/GlobalFunctions.h
@@ -0,0 +1,94 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2010-2012 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_GLOBAL_FUNCTIONS_H
+#define EIGEN_GLOBAL_FUNCTIONS_H
+
+#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR) \
+ template<typename Derived> \
+ inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> \
+ NAME(const Eigen::ArrayBase<Derived>& x) { \
+ return x.derived(); \
+ }
+
+#define EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(NAME,FUNCTOR) \
+ \
+ template<typename Derived> \
+ struct NAME##_retval<ArrayBase<Derived> > \
+ { \
+ typedef const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> type; \
+ }; \
+ template<typename Derived> \
+ struct NAME##_impl<ArrayBase<Derived> > \
+ { \
+ static inline typename NAME##_retval<ArrayBase<Derived> >::type run(const Eigen::ArrayBase<Derived>& x) \
+ { \
+ return x.derived(); \
+ } \
+ };
+
+
+namespace Eigen
+{
+ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op)
+ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag,scalar_imag_op)
+ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj,scalar_conjugate_op)
+ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin,scalar_sin_op)
+ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos,scalar_cos_op)
+ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op)
+ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op)
+ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op)
+ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op)
+ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op)
+ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op)
+ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op)
+ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op)
+ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op)
+
+ template<typename Derived>
+ inline const Eigen::CwiseUnaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar>, const Derived>
+ pow(const Eigen::ArrayBase<Derived>& x, const typename Derived::Scalar& exponent) {
+ return x.derived().pow(exponent);
+ }
+
+ template<typename Derived>
+ inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename Derived::Scalar>, const Derived, const Derived>
+ pow(const Eigen::ArrayBase<Derived>& x, const Eigen::ArrayBase<Derived>& exponents)
+ {
+ return Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename Derived::Scalar>, const Derived, const Derived>(
+ x.derived(),
+ exponents.derived()
+ );
+ }
+
+ /**
+ * \brief Component-wise division of a scalar by array elements.
+ **/
+ template <typename Derived>
+ inline const Eigen::CwiseUnaryOp<Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>, const Derived>
+ operator/(const typename Derived::Scalar& s, const Eigen::ArrayBase<Derived>& a)
+ {
+ return Eigen::CwiseUnaryOp<Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>, const Derived>(
+ a.derived(),
+ Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>(s)
+ );
+ }
+
+ namespace internal
+ {
+ EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(real,scalar_real_op)
+ EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(imag,scalar_imag_op)
+ EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(abs2,scalar_abs2_op)
+ }
+}
+
+// TODO: cleanly disable those functions that are not supported on Array (numext::real_ref, internal::random, internal::isApprox...)
+
+#endif // EIGEN_GLOBAL_FUNCTIONS_H
diff --git a/third_party/eigen3/Eigen/src/Core/IO.h b/third_party/eigen3/Eigen/src/Core/IO.h
new file mode 100644
index 0000000000..a1a90c119d
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/IO.h
@@ -0,0 +1,257 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_IO_H
+#define EIGEN_IO_H
+
+namespace Eigen {
+
+enum { DontAlignCols = 1 };
+enum { StreamPrecision = -1,
+ FullPrecision = -2 };
+
+namespace internal {
+template<typename Derived>
+std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt);
+}
+
+/** \class IOFormat
+ * \ingroup Core_Module
+ *
+ * \brief Stores a set of parameters controlling the way matrices are printed
+ *
+ * List of available parameters:
+ * - \b precision number of digits for floating point values, or one of the special constants \c StreamPrecision and \c FullPrecision.
+ * The default is the special value \c StreamPrecision which means to use the
+ * stream's own precision setting, as set for instance using \c cout.precision(3). The other special value
+ * \c FullPrecision means that the number of digits will be computed to match the full precision of each floating-point
+ * type.
+ * - \b flags an OR-ed combination of flags, the default value is 0, the only currently available flag is \c DontAlignCols which
+ * allows to disable the alignment of columns, resulting in faster code.
+ * - \b coeffSeparator string printed between two coefficients of the same row
+ * - \b rowSeparator string printed between two rows
+ * - \b rowPrefix string printed at the beginning of each row
+ * - \b rowSuffix string printed at the end of each row
+ * - \b matPrefix string printed at the beginning of the matrix
+ * - \b matSuffix string printed at the end of the matrix
+ *
+ * Example: \include IOFormat.cpp
+ * Output: \verbinclude IOFormat.out
+ *
+ * \sa DenseBase::format(), class WithFormat
+ */
+struct IOFormat
+{
+ /** Default constructor, see class IOFormat for the meaning of the parameters */
+ IOFormat(int _precision = StreamPrecision, int _flags = 0,
+ const std::string& _coeffSeparator = " ",
+ const std::string& _rowSeparator = "\n", const std::string& _rowPrefix="", const std::string& _rowSuffix="",
+ const std::string& _matPrefix="", const std::string& _matSuffix="")
+ : matPrefix(_matPrefix), matSuffix(_matSuffix), rowPrefix(_rowPrefix), rowSuffix(_rowSuffix), rowSeparator(_rowSeparator),
+ rowSpacer(""), coeffSeparator(_coeffSeparator), precision(_precision), flags(_flags)
+ {
+ // TODO check if rowPrefix, rowSuffix or rowSeparator contains a newline
+ // don't add rowSpacer if columns are not to be aligned
+ if((flags & DontAlignCols))
+ return;
+ int i = int(matSuffix.length())-1;
+ while (i>=0 && matSuffix[i]!='\n')
+ {
+ rowSpacer += ' ';
+ i--;
+ }
+ }
+ std::string matPrefix, matSuffix;
+ std::string rowPrefix, rowSuffix, rowSeparator, rowSpacer;
+ std::string coeffSeparator;
+ int precision;
+ int flags;
+};
+
+/** \class WithFormat
+ * \ingroup Core_Module
+ *
+ * \brief Pseudo expression providing matrix output with given format
+ *
+ * \param ExpressionType the type of the object on which IO stream operations are performed
+ *
+ * This class represents an expression with stream operators controlled by a given IOFormat.
+ * It is the return type of DenseBase::format()
+ * and most of the time this is the only way it is used.
+ *
+ * See class IOFormat for some examples.
+ *
+ * \sa DenseBase::format(), class IOFormat
+ */
+template<typename ExpressionType>
+class WithFormat
+{
+ public:
+
+ WithFormat(const ExpressionType& matrix, const IOFormat& format)
+ : m_matrix(matrix), m_format(format)
+ {}
+
+ friend std::ostream & operator << (std::ostream & s, const WithFormat& wf)
+ {
+ return internal::print_matrix(s, wf.m_matrix.eval(), wf.m_format);
+ }
+
+ protected:
+ const typename ExpressionType::Nested m_matrix;
+ IOFormat m_format;
+};
+
+/** \returns a WithFormat proxy object allowing to print a matrix the with given
+ * format \a fmt.
+ *
+ * See class IOFormat for some examples.
+ *
+ * \sa class IOFormat, class WithFormat
+ */
+template<typename Derived>
+inline const WithFormat<Derived>
+DenseBase<Derived>::format(const IOFormat& fmt) const
+{
+ return WithFormat<Derived>(derived(), fmt);
+}
+
+namespace internal {
+
+template<typename Scalar, bool IsInteger>
+struct significant_decimals_default_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ static inline int run()
+ {
+ using std::ceil;
+ using std::log;
+ return cast<RealScalar,int>(ceil(-log(NumTraits<RealScalar>::epsilon())/log(RealScalar(10))));
+ }
+};
+
+template<typename Scalar>
+struct significant_decimals_default_impl<Scalar, true>
+{
+ static inline int run()
+ {
+ return 0;
+ }
+};
+
+template<typename Scalar>
+struct significant_decimals_impl
+ : significant_decimals_default_impl<Scalar, NumTraits<Scalar>::IsInteger>
+{};
+
+/** \internal
+ * print the matrix \a _m to the output stream \a s using the output format \a fmt */
+template<typename Derived>
+std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt)
+{
+ if(_m.size() == 0)
+ {
+ s << fmt.matPrefix << fmt.matSuffix;
+ return s;
+ }
+
+ typename Derived::Nested m = _m;
+ typedef typename Derived::Scalar Scalar;
+ typedef typename Derived::Index Index;
+
+ Index width = 0;
+
+ std::streamsize explicit_precision;
+ if(fmt.precision == StreamPrecision)
+ {
+ explicit_precision = 0;
+ }
+ else if(fmt.precision == FullPrecision)
+ {
+ if (NumTraits<Scalar>::IsInteger)
+ {
+ explicit_precision = 0;
+ }
+ else
+ {
+ explicit_precision = significant_decimals_impl<Scalar>::run();
+ }
+ }
+ else
+ {
+ explicit_precision = fmt.precision;
+ }
+
+ std::streamsize old_precision = 0;
+ if(explicit_precision) old_precision = s.precision(explicit_precision);
+
+ bool align_cols = !(fmt.flags & DontAlignCols);
+ if(align_cols)
+ {
+ // compute the largest width
+ for(Index j = 0; j < m.cols(); ++j)
+ for(Index i = 0; i < m.rows(); ++i)
+ {
+ std::stringstream sstr;
+ sstr.copyfmt(s);
+ sstr << m.coeff(i,j);
+ width = std::max<Index>(width, Index(sstr.str().length()));
+ }
+ }
+ s << fmt.matPrefix;
+ const char old_fill = s.fill();
+ s.fill(' ');
+ for(Index i = 0; i < m.rows(); ++i)
+ {
+ if (i)
+ s << fmt.rowSpacer;
+ s << fmt.rowPrefix;
+ if(width) s.width(width);
+ s << m.coeff(i, 0);
+ for(Index j = 1; j < m.cols(); ++j)
+ {
+ s << fmt.coeffSeparator;
+ if (width) s.width(width);
+ s << m.coeff(i, j);
+ }
+ s << fmt.rowSuffix;
+ if( i < m.rows() - 1)
+ s << fmt.rowSeparator;
+ }
+ s.fill(old_fill);
+ s << fmt.matSuffix;
+ if(explicit_precision) s.precision(old_precision);
+ return s;
+}
+
+} // end namespace internal
+
+/** \relates DenseBase
+ *
+ * Outputs the matrix, to the given stream.
+ *
+ * If you wish to print the matrix with a format different than the default, use DenseBase::format().
+ *
+ * It is also possible to change the default format by defining EIGEN_DEFAULT_IO_FORMAT before including Eigen headers.
+ * If not defined, this will automatically be defined to Eigen::IOFormat(), that is the Eigen::IOFormat with default parameters.
+ *
+ * \sa DenseBase::format()
+ */
+template<typename Derived>
+std::ostream & operator <<
+(std::ostream & s,
+ const DenseBase<Derived> & m)
+{
+ return internal::print_matrix(s, m.eval(), EIGEN_DEFAULT_IO_FORMAT);
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_IO_H
diff --git a/third_party/eigen3/Eigen/src/Core/Map.h b/third_party/eigen3/Eigen/src/Core/Map.h
new file mode 100644
index 0000000000..0838d69e37
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/Map.h
@@ -0,0 +1,185 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MAP_H
+#define EIGEN_MAP_H
+
+namespace Eigen {
+
+/** \class Map
+ * \ingroup Core_Module
+ *
+ * \brief A matrix or vector expression mapping an existing array of data.
+ *
+ * \tparam PlainObjectType the equivalent matrix type of the mapped data
+ * \tparam MapOptions specifies whether the pointer is \c #Aligned, or \c #Unaligned.
+ * The default is \c #Unaligned.
+ * \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout
+ * of an ordinary, contiguous array. This can be overridden by specifying strides.
+ * The type passed here must be a specialization of the Stride template, see examples below.
+ *
+ * This class represents a matrix or vector expression mapping an existing array of data.
+ * It can be used to let Eigen interface without any overhead with non-Eigen data structures,
+ * such as plain C arrays or structures from other libraries. By default, it assumes that the
+ * data is laid out contiguously in memory. You can however override this by explicitly specifying
+ * inner and outer strides.
+ *
+ * Here's an example of simply mapping a contiguous array as a \ref TopicStorageOrders "column-major" matrix:
+ * \include Map_simple.cpp
+ * Output: \verbinclude Map_simple.out
+ *
+ * If you need to map non-contiguous arrays, you can do so by specifying strides:
+ *
+ * Here's an example of mapping an array as a vector, specifying an inner stride, that is, the pointer
+ * increment between two consecutive coefficients. Here, we're specifying the inner stride as a compile-time
+ * fixed value.
+ * \include Map_inner_stride.cpp
+ * Output: \verbinclude Map_inner_stride.out
+ *
+ * Here's an example of mapping an array while specifying an outer stride. Here, since we're mapping
+ * as a column-major matrix, 'outer stride' means the pointer increment between two consecutive columns.
+ * Here, we're specifying the outer stride as a runtime parameter. Note that here \c OuterStride<> is
+ * a short version of \c OuterStride<Dynamic> because the default template parameter of OuterStride
+ * is \c Dynamic
+ * \include Map_outer_stride.cpp
+ * Output: \verbinclude Map_outer_stride.out
+ *
+ * For more details and for an example of specifying both an inner and an outer stride, see class Stride.
+ *
+ * \b Tip: to change the array of data mapped by a Map object, you can use the C++
+ * placement new syntax:
+ *
+ * Example: \include Map_placement_new.cpp
+ * Output: \verbinclude Map_placement_new.out
+ *
+ * This class is the return type of PlainObjectBase::Map() but can also be used directly.
+ *
+ * \sa PlainObjectBase::Map(), \ref TopicStorageOrders
+ */
+
+namespace internal {
+template<typename PlainObjectType, int MapOptions, typename StrideType>
+struct traits<Map<PlainObjectType, MapOptions, StrideType> >
+ : public traits<PlainObjectType>
+{
+ typedef traits<PlainObjectType> TraitsBase;
+ typedef typename PlainObjectType::Index Index;
+ typedef typename PlainObjectType::Scalar Scalar;
+ enum {
+ InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
+ ? int(PlainObjectType::InnerStrideAtCompileTime)
+ : int(StrideType::InnerStrideAtCompileTime),
+ OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
+ ? int(PlainObjectType::OuterStrideAtCompileTime)
+ : int(StrideType::OuterStrideAtCompileTime),
+ HasNoInnerStride = InnerStrideAtCompileTime == 1,
+ HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
+ HasNoStride = HasNoInnerStride && HasNoOuterStride,
+ IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
+ IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
+ KeepsPacketAccess = bool(HasNoInnerStride)
+ && ( bool(IsDynamicSize)
+ || HasNoOuterStride
+ || ( OuterStrideAtCompileTime!=Dynamic
+ && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ),
+ Flags0 = TraitsBase::Flags & (~NestByRefBit),
+ Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
+ Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
+ ? int(Flags1) : int(Flags1 & ~LinearAccessBit),
+ Flags3 = is_lvalue<PlainObjectType>::value ? int(Flags2) : (int(Flags2) & ~LvalueBit),
+ Flags = KeepsPacketAccess ? int(Flags3) : (int(Flags3) & ~PacketAccessBit)
+ };
+private:
+ enum { Options }; // Expressions don't have Options
+};
+}
+
+template<typename PlainObjectType, int MapOptions, typename StrideType> class Map
+ : public MapBase<Map<PlainObjectType, MapOptions, StrideType> >
+{
+ public:
+
+ typedef MapBase<Map> Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(Map)
+
+ typedef typename Base::PointerType PointerType;
+#if EIGEN2_SUPPORT_STAGE <= STAGE30_FULL_EIGEN3_API
+ typedef const Scalar* PointerArgType;
+ inline PointerType cast_to_pointer_type(PointerArgType ptr) { return const_cast<PointerType>(ptr); }
+#else
+ typedef PointerType PointerArgType;
+ EIGEN_DEVICE_FUNC
+ inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; }
+#endif
+
+ EIGEN_DEVICE_FUNC
+ inline Index innerStride() const
+ {
+ return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline Index outerStride() const
+ {
+ return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
+ : IsVectorAtCompileTime ? this->size()
+ : int(Flags)&RowMajorBit ? this->cols()
+ : this->rows();
+ }
+
+ /** Constructor in the fixed-size case.
+ *
+ * \param dataPtr pointer to the array to map
+ * \param a_stride optional Stride object, passing the strides.
+ */
+ EIGEN_DEVICE_FUNC
+ inline Map(PointerArgType dataPtr, const StrideType& a_stride = StrideType())
+ : Base(cast_to_pointer_type(dataPtr)), m_stride(a_stride)
+ {
+ PlainObjectType::Base::_check_template_params();
+ }
+
+ /** Constructor in the dynamic-size vector case.
+ *
+ * \param dataPtr pointer to the array to map
+ * \param a_size the size of the vector expression
+ * \param a_stride optional Stride object, passing the strides.
+ */
+ EIGEN_DEVICE_FUNC
+ inline Map(PointerArgType dataPtr, Index a_size, const StrideType& a_stride = StrideType())
+ : Base(cast_to_pointer_type(dataPtr), a_size), m_stride(a_stride)
+ {
+ PlainObjectType::Base::_check_template_params();
+ }
+
+ /** Constructor in the dynamic-size matrix case.
+ *
+ * \param dataPtr pointer to the array to map
+ * \param nbRows the number of rows of the matrix expression
+ * \param nbCols the number of columns of the matrix expression
+ * \param a_stride optional Stride object, passing the strides.
+ */
+ EIGEN_DEVICE_FUNC
+ inline Map(PointerArgType dataPtr, Index nbRows, Index nbCols, const StrideType& a_stride = StrideType())
+ : Base(cast_to_pointer_type(dataPtr), nbRows, nbCols), m_stride(a_stride)
+ {
+ PlainObjectType::Base::_check_template_params();
+ }
+
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)
+
+ protected:
+ StrideType m_stride;
+};
+
+
+} // end namespace Eigen
+
+#endif // EIGEN_MAP_H
diff --git a/third_party/eigen3/Eigen/src/Core/MapBase.h b/third_party/eigen3/Eigen/src/Core/MapBase.h
new file mode 100644
index 0000000000..e8ecb175bf
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/MapBase.h
@@ -0,0 +1,257 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MAPBASE_H
+#define EIGEN_MAPBASE_H
+
+#define EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) \
+ EIGEN_STATIC_ASSERT((int(internal::traits<Derived>::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \
+ YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT)
+
+namespace Eigen {
+
+/** \class MapBase
+ * \ingroup Core_Module
+ *
+ * \brief Base class for Map and Block expression with direct access
+ *
+ * \sa class Map, class Block
+ */
+template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
+ : public internal::dense_xpr_base<Derived>::type
+{
+ public:
+
+ typedef typename internal::dense_xpr_base<Derived>::type Base;
+ enum {
+ RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
+ ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
+ SizeAtCompileTime = Base::SizeAtCompileTime
+ };
+
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ typedef typename internal::traits<Derived>::Index Index;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ typedef typename internal::conditional<
+ bool(internal::is_lvalue<Derived>::value),
+ Scalar *,
+ const Scalar *>::type
+ PointerType;
+
+ using Base::derived;
+// using Base::RowsAtCompileTime;
+// using Base::ColsAtCompileTime;
+// using Base::SizeAtCompileTime;
+ using Base::MaxRowsAtCompileTime;
+ using Base::MaxColsAtCompileTime;
+ using Base::MaxSizeAtCompileTime;
+ using Base::IsVectorAtCompileTime;
+ using Base::Flags;
+ using Base::IsRowMajor;
+
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::coeff;
+ using Base::coeffRef;
+ using Base::lazyAssign;
+ using Base::eval;
+
+ using Base::innerStride;
+ using Base::outerStride;
+ using Base::rowStride;
+ using Base::colStride;
+
+ // bug 217 - compile error on ICC 11.1
+ using Base::operator=;
+
+ typedef typename Base::CoeffReturnType CoeffReturnType;
+
+ EIGEN_DEVICE_FUNC inline Index rows() const { return m_rows.value(); }
+ EIGEN_DEVICE_FUNC inline Index cols() const { return m_cols.value(); }
+
+ /** Returns a pointer to the first coefficient of the matrix or vector.
+ *
+ * \note When addressing this data, make sure to honor the strides returned by innerStride() and outerStride().
+ *
+ * \sa innerStride(), outerStride()
+ */
+ inline const Scalar* data() const { return m_data; }
+
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeff(Index rowId, Index colId) const
+ {
+ return m_data[colId * colStride() + rowId * rowStride()];
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeff(Index index) const
+ {
+ EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+ return m_data[index * innerStride()];
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index rowId, Index colId) const
+ {
+ return this->m_data[colId * colStride() + rowId * rowStride()];
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index index) const
+ {
+ EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+ return this->m_data[index * innerStride()];
+ }
+
+ template<int LoadMode>
+ inline PacketScalar packet(Index rowId, Index colId) const
+ {
+ return internal::ploadt<PacketScalar, LoadMode>
+ (m_data + (colId * colStride() + rowId * rowStride()));
+ }
+
+ template<int LoadMode>
+ inline PacketScalar packet(Index index) const
+ {
+ EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+ return internal::ploadt<PacketScalar, LoadMode>(m_data + index * innerStride());
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
+ {
+ EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
+ checkSanity();
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline MapBase(PointerType dataPtr, Index vecSize)
+ : m_data(dataPtr),
+ m_rows(RowsAtCompileTime == Dynamic ? vecSize : Index(RowsAtCompileTime)),
+ m_cols(ColsAtCompileTime == Dynamic ? vecSize : Index(ColsAtCompileTime))
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ eigen_assert(vecSize >= 0);
+ eigen_assert(dataPtr == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == vecSize);
+ checkSanity();
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols)
+ : m_data(dataPtr), m_rows(nbRows), m_cols(nbCols)
+ {
+ eigen_assert( (dataPtr == 0)
+ || ( nbRows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == nbRows)
+ && nbCols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == nbCols)));
+ checkSanity();
+ }
+
+ protected:
+
+ EIGEN_DEVICE_FUNC
+ void checkSanity() const
+ {
+ EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits<Derived>::Flags&PacketAccessBit,
+ internal::inner_stride_at_compile_time<Derived>::ret==1),
+ PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
+ eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % EIGEN_ALIGN_BYTES) == 0)
+ && "data is not aligned");
+ }
+
+ PointerType m_data;
+ const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
+ const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
+};
+
+template<typename Derived> class MapBase<Derived, WriteAccessors>
+ : public MapBase<Derived, ReadOnlyAccessors>
+{
+ public:
+
+ typedef MapBase<Derived, ReadOnlyAccessors> Base;
+
+ typedef typename Base::Scalar Scalar;
+ typedef typename Base::PacketScalar PacketScalar;
+ typedef typename Base::Index Index;
+ typedef typename Base::PointerType PointerType;
+
+ using Base::derived;
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::coeff;
+ using Base::coeffRef;
+
+ using Base::innerStride;
+ using Base::outerStride;
+ using Base::rowStride;
+ using Base::colStride;
+
+ typedef typename internal::conditional<
+ internal::is_lvalue<Derived>::value,
+ Scalar,
+ const Scalar
+ >::type ScalarWithConstIfNotLvalue;
+
+ EIGEN_DEVICE_FUNC
+ inline const Scalar* data() const { return this->m_data; }
+ EIGEN_DEVICE_FUNC
+ inline ScalarWithConstIfNotLvalue* data() { return this->m_data; } // no const-cast here so non-const-correct code will give a compile error
+
+ EIGEN_DEVICE_FUNC
+ inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col)
+ {
+ return this->m_data[col * colStride() + row * rowStride()];
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
+ {
+ EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+ return this->m_data[index * innerStride()];
+ }
+
+ template<int StoreMode>
+ inline void writePacket(Index row, Index col, const PacketScalar& val)
+ {
+ internal::pstoret<Scalar, PacketScalar, StoreMode>
+ (this->m_data + (col * colStride() + row * rowStride()), val);
+ }
+
+ template<int StoreMode>
+ inline void writePacket(Index index, const PacketScalar& val)
+ {
+ EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+ internal::pstoret<Scalar, PacketScalar, StoreMode>
+ (this->m_data + index * innerStride(), val);
+ }
+
+ EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {}
+ EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}
+ EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols) : Base(dataPtr, nbRows, nbCols) {}
+
+ EIGEN_DEVICE_FUNC
+ Derived& operator=(const MapBase& other)
+ {
+ Base::Base::operator=(other);
+ return derived();
+ }
+
+ using Base::Base::operator=;
+};
+
+#undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS
+
+} // end namespace Eigen
+
+#endif // EIGEN_MAPBASE_H
diff --git a/third_party/eigen3/Eigen/src/Core/MathFunctions.h b/third_party/eigen3/Eigen/src/Core/MathFunctions.h
new file mode 100644
index 0000000000..941f72d224
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/MathFunctions.h
@@ -0,0 +1,1089 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATHFUNCTIONS_H
+#define EIGEN_MATHFUNCTIONS_H
+
+// source: http://www.geom.uiuc.edu/~huberty/math5337/groupe/digits.html
+#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406
+
+namespace Eigen {
+
+// On WINCE, std::abs is defined for int only, so let's defined our own overloads:
+// This issue has been confirmed with MSVC 2008 only, but the issue might exist for more recent versions too.
+#if EIGEN_OS_WINCE && EIGEN_COMP_MSVC && EIGEN_COMP_MSVC<=1500
+long abs(long x) { return (labs(x)); }
+double abs(double x) { return (fabs(x)); }
+float abs(float x) { return (fabsf(x)); }
+long double abs(long double x) { return (fabsl(x)); }
+#endif
+
+namespace internal {
+
+/** \internal \struct global_math_functions_filtering_base
+ *
+ * What it does:
+ * Defines a typedef 'type' as follows:
+ * - if type T has a member typedef Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl, then
+ * global_math_functions_filtering_base<T>::type is a typedef for it.
+ * - otherwise, global_math_functions_filtering_base<T>::type is a typedef for T.
+ *
+ * How it's used:
+ * To allow to defined the global math functions (like sin...) in certain cases, like the Array expressions.
+ * When you do sin(array1+array2), the object array1+array2 has a complicated expression type, all what you want to know
+ * is that it inherits ArrayBase. So we implement a partial specialization of sin_impl for ArrayBase<Derived>.
+ * So we must make sure to use sin_impl<ArrayBase<Derived> > and not sin_impl<Derived>, otherwise our partial specialization
+ * won't be used. How does sin know that? That's exactly what global_math_functions_filtering_base tells it.
+ *
+ * How it's implemented:
+ * SFINAE in the style of enable_if. Highly susceptible of breaking compilers. With GCC, it sure does work, but if you replace
+ * the typename dummy by an integer template parameter, it doesn't work anymore!
+ */
+
+template<typename T, typename dummy = void>
+struct global_math_functions_filtering_base
+{
+ typedef T type;
+};
+
+template<typename T> struct always_void { typedef void type; };
+
+template<typename T>
+struct global_math_functions_filtering_base
+ <T,
+ typename always_void<typename T::Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl>::type
+ >
+{
+ typedef typename T::Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl type;
+};
+
+#define EIGEN_MATHFUNC_IMPL(func, scalar) Eigen::internal::func##_impl<typename Eigen::internal::global_math_functions_filtering_base<scalar>::type>
+#define EIGEN_MATHFUNC_RETVAL(func, scalar) typename Eigen::internal::func##_retval<typename Eigen::internal::global_math_functions_filtering_base<scalar>::type>::type
+
+/****************************************************************************
+* Implementation of real *
+****************************************************************************/
+
+template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+struct real_default_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ return x;
+ }
+};
+
+template<typename Scalar>
+struct real_default_impl<Scalar,true>
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ using std::real;
+ return real(x);
+ }
+};
+
+template<typename Scalar> struct real_impl : real_default_impl<Scalar> {};
+
+template<typename Scalar>
+struct real_retval
+{
+ typedef typename NumTraits<Scalar>::Real type;
+};
+
+/****************************************************************************
+* Implementation of imag *
+****************************************************************************/
+
+template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+struct imag_default_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar&)
+ {
+ return RealScalar(0);
+ }
+};
+
+template<typename Scalar>
+struct imag_default_impl<Scalar,true>
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ using std::imag;
+ return imag(x);
+ }
+};
+
+template<typename Scalar> struct imag_impl : imag_default_impl<Scalar> {};
+
+template<typename Scalar>
+struct imag_retval
+{
+ typedef typename NumTraits<Scalar>::Real type;
+};
+
+/****************************************************************************
+* Implementation of real_ref *
+****************************************************************************/
+
+template<typename Scalar>
+struct real_ref_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar& run(Scalar& x)
+ {
+ return reinterpret_cast<RealScalar*>(&x)[0];
+ }
+ EIGEN_DEVICE_FUNC
+ static inline const RealScalar& run(const Scalar& x)
+ {
+ return reinterpret_cast<const RealScalar*>(&x)[0];
+ }
+};
+
+template<typename Scalar>
+struct real_ref_retval
+{
+ typedef typename NumTraits<Scalar>::Real & type;
+};
+
+/****************************************************************************
+* Implementation of imag_ref *
+****************************************************************************/
+
+template<typename Scalar, bool IsComplex>
+struct imag_ref_default_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar& run(Scalar& x)
+ {
+ return reinterpret_cast<RealScalar*>(&x)[1];
+ }
+ EIGEN_DEVICE_FUNC
+ static inline const RealScalar& run(const Scalar& x)
+ {
+ return reinterpret_cast<RealScalar*>(&x)[1];
+ }
+};
+
+template<typename Scalar>
+struct imag_ref_default_impl<Scalar, false>
+{
+ EIGEN_DEVICE_FUNC
+ static inline Scalar run(Scalar&)
+ {
+ return Scalar(0);
+ }
+ EIGEN_DEVICE_FUNC
+ static inline const Scalar run(const Scalar&)
+ {
+ return Scalar(0);
+ }
+};
+
+template<typename Scalar>
+struct imag_ref_impl : imag_ref_default_impl<Scalar, NumTraits<Scalar>::IsComplex> {};
+
+template<typename Scalar>
+struct imag_ref_retval
+{
+ typedef typename NumTraits<Scalar>::Real & type;
+};
+
+/****************************************************************************
+* Implementation of conj *
+****************************************************************************/
+
+template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+struct conj_impl
+{
+ EIGEN_DEVICE_FUNC
+ static inline Scalar run(const Scalar& x)
+ {
+ return x;
+ }
+};
+
+template<typename Scalar>
+struct conj_impl<Scalar,true>
+{
+ EIGEN_DEVICE_FUNC
+ static inline Scalar run(const Scalar& x)
+ {
+ using std::conj;
+ return conj(x);
+ }
+};
+
+template<typename Scalar>
+struct conj_retval
+{
+ typedef Scalar type;
+};
+
+/****************************************************************************
+* Implementation of abs2 *
+****************************************************************************/
+
+template<typename Scalar>
+struct abs2_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ return x*x;
+ }
+};
+
+template<typename RealScalar>
+struct abs2_impl<std::complex<RealScalar> >
+{
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const std::complex<RealScalar>& x)
+ {
+ return real(x)*real(x) + imag(x)*imag(x);
+ }
+};
+
+template<typename Scalar>
+struct abs2_retval
+{
+ typedef typename NumTraits<Scalar>::Real type;
+};
+
+/****************************************************************************
+* Implementation of norm1 *
+****************************************************************************/
+
+template<typename Scalar, bool IsComplex>
+struct norm1_default_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ using std::abs;
+ return abs(real(x)) + abs(imag(x));
+ }
+};
+
+template<typename Scalar>
+struct norm1_default_impl<Scalar, false>
+{
+ EIGEN_DEVICE_FUNC
+ static inline Scalar run(const Scalar& x)
+ {
+ using std::abs;
+ return abs(x);
+ }
+};
+
+template<typename Scalar>
+struct norm1_impl : norm1_default_impl<Scalar, NumTraits<Scalar>::IsComplex> {};
+
+template<typename Scalar>
+struct norm1_retval
+{
+ typedef typename NumTraits<Scalar>::Real type;
+};
+
+/****************************************************************************
+* Implementation of hypot *
+****************************************************************************/
+
+template<typename Scalar>
+struct hypot_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ static inline RealScalar run(const Scalar& x, const Scalar& y)
+ {
+ using std::abs;
+ using std::sqrt;
+ RealScalar _x = abs(x);
+ RealScalar _y = abs(y);
+ Scalar p, qp;
+ if(_x>_y)
+ {
+ p = _x;
+ qp = _y / p;
+ }
+ else
+ {
+ p = _y;
+ qp = _x / p;
+ }
+ if(p==RealScalar(0)) return RealScalar(0);
+ return p * sqrt(RealScalar(1) + qp*qp);
+ }
+};
+
+template<typename Scalar>
+struct hypot_retval
+{
+ typedef typename NumTraits<Scalar>::Real type;
+};
+
+/****************************************************************************
+* Implementation of cast *
+****************************************************************************/
+
+template<typename OldType, typename NewType>
+struct cast_impl
+{
+ EIGEN_DEVICE_FUNC static inline NewType run(const OldType& x)
+ {
+ return static_cast<NewType>(x);
+ }
+};
+
+// here, for once, we're plainly returning NewType: we don't want cast to do weird things.
+
+template<typename OldType, typename NewType>
+EIGEN_DEVICE_FUNC inline NewType cast(const OldType& x)
+{
+ return cast_impl<OldType, NewType>::run(x);
+}
+
+/****************************************************************************
+* Implementation of atanh2 *
+****************************************************************************/
+
+template<typename Scalar>
+struct atanh2_impl
+{
+ static inline Scalar run(const Scalar& x, const Scalar& r)
+ {
+ EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
+ using std::abs;
+ using std::log;
+ using std::sqrt;
+ Scalar z = x / r;
+ if (r == 0 || abs(z) > sqrt(NumTraits<Scalar>::epsilon()))
+ return log((r + x) / (r - x)) / 2;
+ else
+ return z + z*z*z / 3;
+ }
+};
+
+template<typename RealScalar>
+struct atanh2_impl<std::complex<RealScalar> >
+{
+ typedef std::complex<RealScalar> Scalar;
+ static inline Scalar run(const Scalar& x, const Scalar& r)
+ {
+ using std::log;
+ using std::norm;
+ using std::sqrt;
+ Scalar z = x / r;
+ if (r == Scalar(0) || norm(z) > NumTraits<RealScalar>::epsilon())
+ return RealScalar(0.5) * log((r + x) / (r - x));
+ else
+ return z + z*z*z / RealScalar(3);
+ }
+};
+
+template<typename Scalar>
+struct atanh2_retval
+{
+ typedef Scalar type;
+};
+
+/****************************************************************************
+* Implementation of round *
+****************************************************************************/
+
+#if EIGEN_HAS_CXX11_MATH
+ template<typename Scalar>
+ struct round_impl {
+ static inline Scalar run(const Scalar& x)
+ {
+ EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
+ using std::round;
+ return round(x);
+ }
+ };
+#else
+ template<typename Scalar>
+ struct round_impl
+ {
+ static inline Scalar run(const Scalar& x)
+ {
+ EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
+ using std::floor;
+ using std::ceil;
+ return (x > 0.0) ? floor(x + 0.5) : ceil(x - 0.5);
+ }
+ };
+#endif
+
+template<typename Scalar>
+struct round_retval
+{
+ typedef Scalar type;
+};
+
+/****************************************************************************
+* Implementation of arg *
+****************************************************************************/
+
+#if EIGEN_HAS_CXX11_MATH
+ template<typename Scalar>
+ struct arg_impl {
+ static inline Scalar run(const Scalar& x)
+ {
+ using std::arg;
+ return arg(x);
+ }
+ };
+#else
+ template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+ struct arg_default_impl
+ {
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ return (x < 0.0) ? EIGEN_PI : 0.0; }
+ };
+
+ template<typename Scalar>
+ struct arg_default_impl<Scalar,true>
+ {
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ using std::arg;
+ return arg(x);
+ }
+ };
+
+ template<typename Scalar> struct arg_impl : arg_default_impl<Scalar> {};
+#endif
+
+template<typename Scalar>
+struct arg_retval
+{
+ typedef typename NumTraits<Scalar>::Real type;
+};
+
+/****************************************************************************
+* Implementation of log1p *
+****************************************************************************/
+template<typename Scalar, bool isComplex = NumTraits<Scalar>::IsComplex >
+struct log1p_impl
+{
+ static inline Scalar run(const Scalar& x)
+ {
+ EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ using std::log;
+ Scalar x1p = RealScalar(1) + x;
+ return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
+ }
+};
+
+#if EIGEN_HAS_CXX11_MATH
+template<typename Scalar>
+struct log1p_impl<Scalar, false> {
+ static inline Scalar run(const Scalar& x)
+ {
+ EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
+ using std::log1p;
+ return log1p(x);
+ }
+};
+#endif
+
+template<typename Scalar>
+struct log1p_retval
+{
+ typedef Scalar type;
+};
+
+/****************************************************************************
+* Implementation of pow *
+****************************************************************************/
+
+template<typename Scalar, bool IsInteger>
+struct pow_default_impl
+{
+ typedef Scalar retval;
+ static inline Scalar run(const Scalar& x, const Scalar& y)
+ {
+ using std::pow;
+ return pow(x, y);
+ }
+};
+
+template<typename Scalar>
+struct pow_default_impl<Scalar, true>
+{
+ static inline Scalar run(Scalar x, Scalar y)
+ {
+ Scalar res(1);
+ eigen_assert(!NumTraits<Scalar>::IsSigned || y >= 0);
+ if(y & 1) res *= x;
+ y >>= 1;
+ while(y)
+ {
+ x *= x;
+ if(y&1) res *= x;
+ y >>= 1;
+ }
+ return res;
+ }
+};
+
+template<typename Scalar>
+struct pow_impl : pow_default_impl<Scalar, NumTraits<Scalar>::IsInteger> {};
+
+template<typename Scalar>
+struct pow_retval
+{
+ typedef Scalar type;
+};
+
+/****************************************************************************
+* Implementation of random *
+****************************************************************************/
+
+template<typename Scalar,
+ bool IsComplex,
+ bool IsInteger>
+struct random_default_impl {};
+
+template<typename Scalar>
+struct random_impl : random_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};
+
+template<typename Scalar>
+struct random_retval
+{
+ typedef Scalar type;
+};
+
+template<typename Scalar> inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y);
+template<typename Scalar> inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random();
+
+template<typename Scalar>
+struct random_default_impl<Scalar, false, false>
+{
+ static inline Scalar run(const Scalar& x, const Scalar& y)
+ {
+ return x + (y-x) * Scalar(std::rand()) / Scalar(RAND_MAX);
+ }
+ static inline Scalar run()
+ {
+ return run(Scalar(NumTraits<Scalar>::IsSigned ? -1 : 0), Scalar(1));
+ }
+};
+
+enum {
+ meta_floor_log2_terminate,
+ meta_floor_log2_move_up,
+ meta_floor_log2_move_down,
+ meta_floor_log2_bogus
+};
+
+template<unsigned int n, int lower, int upper> struct meta_floor_log2_selector
+{
+ enum { middle = (lower + upper) / 2,
+ value = (upper <= lower + 1) ? int(meta_floor_log2_terminate)
+ : (n < (1 << middle)) ? int(meta_floor_log2_move_down)
+ : (n==0) ? int(meta_floor_log2_bogus)
+ : int(meta_floor_log2_move_up)
+ };
+};
+
+template<unsigned int n,
+ int lower = 0,
+ int upper = sizeof(unsigned int) * CHAR_BIT - 1,
+ int selector = meta_floor_log2_selector<n, lower, upper>::value>
+struct meta_floor_log2 {};
+
+template<unsigned int n, int lower, int upper>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_down>
+{
+ enum { value = meta_floor_log2<n, lower, meta_floor_log2_selector<n, lower, upper>::middle>::value };
+};
+
+template<unsigned int n, int lower, int upper>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_up>
+{
+ enum { value = meta_floor_log2<n, meta_floor_log2_selector<n, lower, upper>::middle, upper>::value };
+};
+
+template<unsigned int n, int lower, int upper>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_terminate>
+{
+ enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower };
+};
+
+template<unsigned int n, int lower, int upper>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_bogus>
+{
+ // no value, error at compile time
+};
+
+template<typename Scalar>
+struct random_default_impl<Scalar, false, true>
+{
+ static inline Scalar run(const Scalar& x, const Scalar& y)
+ {
+ typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX;
+ if(y<x)
+ return x;
+ std::size_t range = ScalarX(y)-ScalarX(x);
+ std::size_t offset = 0;
+ // rejection sampling
+ std::size_t divisor = (range+RAND_MAX-1)/(range+1);
+ std::size_t multiplier = (range+RAND_MAX-1)/std::size_t(RAND_MAX);
+
+ do {
+ offset = ( (std::size_t(std::rand()) * multiplier) / divisor );
+ } while (offset > range);
+
+ return Scalar(ScalarX(x) + offset);
+ }
+
+ static inline Scalar run()
+ {
+#ifdef EIGEN_MAKING_DOCS
+ return run(Scalar(NumTraits<Scalar>::IsSigned ? -10 : 0), Scalar(10));
+#else
+ enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value,
+ scalar_bits = sizeof(Scalar) * CHAR_BIT,
+ shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)),
+ offset = NumTraits<Scalar>::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0
+ };
+ return Scalar((std::rand() >> shift) - offset);
+#endif
+ }
+};
+
+template<typename Scalar>
+struct random_default_impl<Scalar, true, false>
+{
+ static inline Scalar run(const Scalar& x, const Scalar& y)
+ {
+ return Scalar(random(real(x), real(y)),
+ random(imag(x), imag(y)));
+ }
+ static inline Scalar run()
+ {
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ return Scalar(random<RealScalar>(), random<RealScalar>());
+ }
+};
+
+template<typename Scalar>
+inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y)
+{
+ return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(x, y);
+}
+
+template<typename Scalar>
+inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random()
+{
+ return EIGEN_MATHFUNC_IMPL(random, Scalar)::run();
+}
+
+} // end namespace internal
+
+/****************************************************************************
+* Generic math functions *
+****************************************************************************/
+
+namespace numext {
+
+#ifndef __CUDA_ARCH__
+template<typename T>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
+{
+ EIGEN_USING_STD_MATH(min);
+ return min EIGEN_NOT_A_MACRO (x,y);
+}
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
+{
+ EIGEN_USING_STD_MATH(max);
+ return max EIGEN_NOT_A_MACRO (x,y);
+}
+#else
+template<typename T>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
+{
+ return y < x ? y : x;
+}
+template<>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y)
+{
+ return fmin(x, y);
+}
+template<typename T>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
+{
+ return x < y ? y : x;
+}
+template<>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y)
+{
+ return fmax(x, y);
+}
+#endif
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) >::type real_ref(const Scalar& x)
+{
+ return internal::real_ref_impl<Scalar>::run(x);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(arg, Scalar) arg(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(arg, Scalar)::run(x);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) >::type imag_ref(const Scalar& x)
+{
+ return internal::imag_ref_impl<Scalar>::run(x);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) imag_ref(Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(imag_ref, Scalar)::run(x);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(conj, Scalar) conj(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(conj, Scalar)::run(x);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) norm1(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(norm1, Scalar)::run(x);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar& y)
+{
+ return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(atanh2, Scalar) atanh2(const Scalar& x, const Scalar& y)
+{
+ return EIGEN_MATHFUNC_IMPL(atanh2, Scalar)::run(x, y);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y)
+{
+ return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y);
+}
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+bool (isfinite)(const T& x)
+{
+ #if EIGEN_HAS_CXX11_MATH
+ using std::isfinite;
+ return isfinite(x);
+ #else
+ return x<NumTraits<T>::highest() && x>NumTraits<T>::lowest();
+ #endif
+}
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+bool (isfinite)(const std::complex<T>& x)
+{
+ return numext::isfinite(numext::real(x)) && numext::isfinite(numext::imag(x));
+}
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+bool (isnan)(const T& x)
+{
+ #if EIGEN_HAS_CXX11_MATH
+ using std::isnan;
+ return isnan(x);
+ #else
+ return x != x;
+ #endif
+}
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+bool (isnan)(const std::complex<T>& x)
+{
+ return numext::isnan(numext::real(x)) || numext::isnan(numext::imag(x));
+}
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+bool (isinf)(const T& x)
+{
+ #if EIGEN_HAS_CXX11_MATH
+ using std::isinf;
+ return isinf(x);
+ #else
+ return x>NumTraits<T>::highest() || x<NumTraits<T>::lowest();
+ #endif
+}
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+bool (isinf)(const std::complex<T>& x)
+{
+ return (numext::isinf(numext::real(x)) || numext::isinf(numext::imag(x))) && (!numext::isnan(x));
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x);
+}
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+T (floor)(const T& x)
+{
+ using std::floor;
+ return floor(x);
+}
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+T (ceil)(const T& x)
+{
+ using std::ceil;
+ return ceil(x);
+}
+
+// Log base 2 for 32 bits positive integers.
+// Conveniently returns 0 for x==0.
+inline int log2(int x)
+{
+ eigen_assert(x>=0);
+ unsigned int v(x);
+ static const int table[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ return table[(v * 0x07C4ACDDU) >> 27];
+}
+
+} // end namespace numext
+
+namespace internal {
+
+/****************************************************************************
+* Implementation of fuzzy comparisons *
+****************************************************************************/
+
+template<typename Scalar,
+ bool IsComplex,
+ bool IsInteger>
+struct scalar_fuzzy_default_impl {};
+
+template<typename Scalar>
+struct scalar_fuzzy_default_impl<Scalar, false, false>
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ template<typename OtherScalar> EIGEN_DEVICE_FUNC
+ static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
+ {
+ using std::abs;
+ return abs(x) <= abs(y) * prec;
+ }
+ EIGEN_DEVICE_FUNC
+ static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
+ {
+ using std::abs;
+ return abs(x - y) <= numext::mini(abs(x), abs(y)) * prec;
+ }
+ EIGEN_DEVICE_FUNC
+ static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec)
+ {
+ return x <= y || isApprox(x, y, prec);
+ }
+};
+
+template<typename Scalar>
+struct scalar_fuzzy_default_impl<Scalar, false, true>
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ template<typename OtherScalar> EIGEN_DEVICE_FUNC
+ static inline bool isMuchSmallerThan(const Scalar& x, const Scalar&, const RealScalar&)
+ {
+ return x == Scalar(0);
+ }
+ EIGEN_DEVICE_FUNC
+ static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar&)
+ {
+ return x == y;
+ }
+ EIGEN_DEVICE_FUNC
+ static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar&)
+ {
+ return x <= y;
+ }
+};
+
+template<typename Scalar>
+struct scalar_fuzzy_default_impl<Scalar, true, false>
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ template<typename OtherScalar>
+ static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
+ {
+ return numext::abs2(x) <= numext::abs2(y) * prec * prec;
+ }
+ static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
+ {
+ return numext::abs2(x - y) <= numext::mini(numext::abs2(x), numext::abs2(y)) * prec * prec;
+ }
+};
+
+template<typename Scalar>
+struct scalar_fuzzy_impl : scalar_fuzzy_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};
+
+template<typename Scalar, typename OtherScalar> EIGEN_DEVICE_FUNC
+inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y,
+ typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
+{
+ return scalar_fuzzy_impl<Scalar>::template isMuchSmallerThan<OtherScalar>(x, y, precision);
+}
+
+template<typename Scalar> EIGEN_DEVICE_FUNC
+inline bool isApprox(const Scalar& x, const Scalar& y,
+ typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
+{
+ return scalar_fuzzy_impl<Scalar>::isApprox(x, y, precision);
+}
+
+template<typename Scalar> EIGEN_DEVICE_FUNC
+inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y,
+ typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
+{
+ return scalar_fuzzy_impl<Scalar>::isApproxOrLessThan(x, y, precision);
+}
+
+/******************************************
+*** The special case of the bool type ***
+******************************************/
+
+template<> struct random_impl<bool>
+{
+ static inline bool run()
+ {
+ return random<int>(0,1)==0 ? false : true;
+ }
+};
+
+template<> struct scalar_fuzzy_impl<bool>
+{
+ typedef bool RealScalar;
+
+ template<typename OtherScalar> EIGEN_DEVICE_FUNC
+ static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&)
+ {
+ return !x;
+ }
+
+ EIGEN_DEVICE_FUNC
+ static inline bool isApprox(bool x, bool y, bool)
+ {
+ return x == y;
+ }
+
+ EIGEN_DEVICE_FUNC
+ static inline bool isApproxOrLessThan(const bool& x, const bool& y, const bool&)
+ {
+ return (!x) || y;
+ }
+
+};
+
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATHFUNCTIONS_H
diff --git a/third_party/eigen3/Eigen/src/Core/Matrix.h b/third_party/eigen3/Eigen/src/Core/Matrix.h
new file mode 100644
index 0000000000..782d67f54f
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/Matrix.h
@@ -0,0 +1,443 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATRIX_H
+#define EIGEN_MATRIX_H
+
+namespace Eigen {
+
+/** \class Matrix
+ * \ingroup Core_Module
+ *
+ * \brief The matrix class, also used for vectors and row-vectors
+ *
+ * The %Matrix class is the work-horse for all \em dense (\ref dense "note") matrices and vectors within Eigen.
+ * Vectors are matrices with one column, and row-vectors are matrices with one row.
+ *
+ * The %Matrix class encompasses \em both fixed-size and dynamic-size objects (\ref fixedsize "note").
+ *
+ * The first three template parameters are required:
+ * \tparam _Scalar \anchor matrix_tparam_scalar Numeric type, e.g. float, double, int or std::complex<float>.
+ * User defined sclar types are supported as well (see \ref user_defined_scalars "here").
+ * \tparam _Rows Number of rows, or \b Dynamic
+ * \tparam _Cols Number of columns, or \b Dynamic
+ *
+ * The remaining template parameters are optional -- in most cases you don't have to worry about them.
+ * \tparam _Options \anchor matrix_tparam_options A combination of either \b #RowMajor or \b #ColMajor, and of either
+ * \b #AutoAlign or \b #DontAlign.
+ * The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required
+ * for vectorization. It defaults to aligning matrices except for fixed sizes that aren't a multiple of the packet size.
+ * \tparam _MaxRows Maximum number of rows. Defaults to \a _Rows (\ref maxrows "note").
+ * \tparam _MaxCols Maximum number of columns. Defaults to \a _Cols (\ref maxrows "note").
+ *
+ * Eigen provides a number of typedefs covering the usual cases. Here are some examples:
+ *
+ * \li \c Matrix2d is a 2x2 square matrix of doubles (\c Matrix<double, 2, 2>)
+ * \li \c Vector4f is a vector of 4 floats (\c Matrix<float, 4, 1>)
+ * \li \c RowVector3i is a row-vector of 3 ints (\c Matrix<int, 1, 3>)
+ *
+ * \li \c MatrixXf is a dynamic-size matrix of floats (\c Matrix<float, Dynamic, Dynamic>)
+ * \li \c VectorXf is a dynamic-size vector of floats (\c Matrix<float, Dynamic, 1>)
+ *
+ * \li \c Matrix2Xf is a partially fixed-size (dynamic-size) matrix of floats (\c Matrix<float, 2, Dynamic>)
+ * \li \c MatrixX3d is a partially dynamic-size (fixed-size) matrix of double (\c Matrix<double, Dynamic, 3>)
+ *
+ * See \link matrixtypedefs this page \endlink for a complete list of predefined \em %Matrix and \em Vector typedefs.
+ *
+ * You can access elements of vectors and matrices using normal subscripting:
+ *
+ * \code
+ * Eigen::VectorXd v(10);
+ * v[0] = 0.1;
+ * v[1] = 0.2;
+ * v(0) = 0.3;
+ * v(1) = 0.4;
+ *
+ * Eigen::MatrixXi m(10, 10);
+ * m(0, 1) = 1;
+ * m(0, 2) = 2;
+ * m(0, 3) = 3;
+ * \endcode
+ *
+ * This class can be extended with the help of the plugin mechanism described on the page
+ * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIX_PLUGIN.
+ *
+ * <i><b>Some notes:</b></i>
+ *
+ * <dl>
+ * <dt><b>\anchor dense Dense versus sparse:</b></dt>
+ * <dd>This %Matrix class handles dense, not sparse matrices and vectors. For sparse matrices and vectors, see the Sparse module.
+ *
+ * Dense matrices and vectors are plain usual arrays of coefficients. All the coefficients are stored, in an ordinary contiguous array.
+ * This is unlike Sparse matrices and vectors where the coefficients are stored as a list of nonzero coefficients.</dd>
+ *
+ * <dt><b>\anchor fixedsize Fixed-size versus dynamic-size:</b></dt>
+ * <dd>Fixed-size means that the numbers of rows and columns are known are compile-time. In this case, Eigen allocates the array
+ * of coefficients as a fixed-size array, as a class member. This makes sense for very small matrices, typically up to 4x4, sometimes up
+ * to 16x16. Larger matrices should be declared as dynamic-size even if one happens to know their size at compile-time.
+ *
+ * Dynamic-size means that the numbers of rows or columns are not necessarily known at compile-time. In this case they are runtime
+ * variables, and the array of coefficients is allocated dynamically on the heap.
+ *
+ * Note that \em dense matrices, be they Fixed-size or Dynamic-size, <em>do not</em> expand dynamically in the sense of a std::map.
+ * If you want this behavior, see the Sparse module.</dd>
+ *
+ * <dt><b>\anchor maxrows _MaxRows and _MaxCols:</b></dt>
+ * <dd>In most cases, one just leaves these parameters to the default values.
+ * These parameters mean the maximum size of rows and columns that the matrix may have. They are useful in cases
+ * when the exact numbers of rows and columns are not known are compile-time, but it is known at compile-time that they cannot
+ * exceed a certain value. This happens when taking dynamic-size blocks inside fixed-size matrices: in this case _MaxRows and _MaxCols
+ * are the dimensions of the original matrix, while _Rows and _Cols are Dynamic.</dd>
+ * </dl>
+ *
+ * \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy,
+ * \ref TopicStorageOrders
+ */
+
+namespace internal {
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+{
+ typedef _Scalar Scalar;
+ typedef Dense StorageKind;
+ typedef DenseIndex Index;
+ typedef MatrixXpr XprKind;
+ enum {
+ RowsAtCompileTime = _Rows,
+ ColsAtCompileTime = _Cols,
+ MaxRowsAtCompileTime = _MaxRows,
+ MaxColsAtCompileTime = _MaxCols,
+ Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
+ CoeffReadCost = NumTraits<Scalar>::ReadCost,
+ Options = _Options,
+ InnerStrideAtCompileTime = 1,
+ OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime
+ };
+};
+}
+
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+class Matrix
+ : public PlainObjectBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+{
+ public:
+
+ /** \brief Base class typedef.
+ * \sa PlainObjectBase
+ */
+ typedef PlainObjectBase<Matrix> Base;
+
+ enum { Options = _Options };
+
+ EIGEN_DENSE_PUBLIC_INTERFACE(Matrix)
+
+ typedef typename Base::PlainObject PlainObject;
+
+ using Base::base;
+ using Base::coeffRef;
+
+ /**
+ * \brief Assigns matrices to each other.
+ *
+ * \note This is a special case of the templated operator=. Its purpose is
+ * to prevent a default operator= from hiding the templated operator=.
+ *
+ * \callgraph
+ */
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix& operator=(const Matrix& other)
+ {
+ return Base::_set(other);
+ }
+
+ /** \internal
+ * \brief Copies the value of the expression \a other into \c *this with automatic resizing.
+ *
+ * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized),
+ * it will be initialized.
+ *
+ * Note that copying a row-vector into a vector (and conversely) is allowed.
+ * The resizing, if any, is then done in the appropriate way so that row-vectors
+ * remain row-vectors and vectors remain vectors.
+ */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix& operator=(const MatrixBase<OtherDerived>& other)
+ {
+ return Base::_set(other);
+ }
+
+ /* Here, doxygen failed to copy the brief information when using \copydoc */
+
+ /**
+ * \brief Copies the generic expression \a other into *this.
+ * \copydetails DenseBase::operator=(const EigenBase<OtherDerived> &other)
+ */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase<OtherDerived> &other)
+ {
+ return Base::operator=(other);
+ }
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix& operator=(const ReturnByValue<OtherDerived>& func)
+ {
+ return Base::operator=(func);
+ }
+
+ /** \brief Default constructor.
+ *
+ * For fixed-size matrices, does nothing.
+ *
+ * For dynamic-size matrices, creates an empty matrix of size 0. Does not allocate any array. Such a matrix
+ * is called a null matrix. This constructor is the unique way to create null matrices: resizing
+ * a matrix to 0 is not supported.
+ *
+ * \sa resize(Index,Index)
+ */
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix() : Base()
+ {
+ Base::_check_template_params();
+ EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+ }
+
+ // FIXME is it still needed
+ EIGEN_DEVICE_FUNC
+ Matrix(internal::constructor_without_unaligned_array_assert)
+ : Base(internal::constructor_without_unaligned_array_assert())
+ { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
+
+#ifdef EIGEN_HAVE_RVALUE_REFERENCES
+ Matrix(Matrix&& other)
+ : Base(std::move(other))
+ {
+ Base::_check_template_params();
+ if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
+ Base::_set_noalias(other);
+ }
+ Matrix& operator=(Matrix&& other)
+ {
+ other.swap(*this);
+ return *this;
+ }
+#endif
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+
+ // This constructor is for both 1x1 matrices and dynamic vectors
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE explicit Matrix(const T& x)
+ {
+ Base::_check_template_params();
+ Base::template _init1<T>(x);
+ }
+
+ template<typename T0, typename T1>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y)
+ {
+ Base::_check_template_params();
+ Base::template _init2<T0,T1>(x, y);
+ }
+ #else
+ /** \brief Constructs a fixed-sized matrix initialized with coefficients starting at \a data */
+ EIGEN_DEVICE_FUNC
+ explicit Matrix(const Scalar *data);
+
+ /** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors
+ *
+ * Note that this is only useful for dynamic-size vectors. For fixed-size vectors,
+ * it is redundant to pass the dimension here, so it makes more sense to use the default
+ * constructor Matrix() instead.
+ */
+ EIGEN_STRONG_INLINE explicit Matrix(Index dim);
+ /** \brief Constructs an initialized 1x1 matrix with the given coefficient */
+ Matrix(const Scalar& x);
+ /** \brief Constructs an uninitialized matrix with \a rows rows and \a cols columns.
+ *
+ * This is useful for dynamic-size matrices. For fixed-size matrices,
+ * it is redundant to pass these parameters, so one should use the default constructor
+ * Matrix() instead. */
+ EIGEN_DEVICE_FUNC
+ Matrix(Index rows, Index cols);
+ /** \brief Constructs an initialized 2D vector with given coefficients */
+ Matrix(const Scalar& x, const Scalar& y);
+ #endif
+
+ /** \brief Constructs an initialized 3D vector with given coefficients */
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z)
+ {
+ Base::_check_template_params();
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 3)
+ m_storage.data()[0] = x;
+ m_storage.data()[1] = y;
+ m_storage.data()[2] = z;
+ }
+ /** \brief Constructs an initialized 4D vector with given coefficients */
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w)
+ {
+ Base::_check_template_params();
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 4)
+ m_storage.data()[0] = x;
+ m_storage.data()[1] = y;
+ m_storage.data()[2] = z;
+ m_storage.data()[3] = w;
+ }
+
+
+ /** \brief Constructor copying the value of the expression \a other */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix(const MatrixBase<OtherDerived>& other)
+ : Base(other.rows() * other.cols(), other.rows(), other.cols())
+ {
+ // This test resides here, to bring the error messages closer to the user. Normally, these checks
+ // are performed deeply within the library, thus causing long and scary error traces.
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
+ YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+
+ Base::_check_template_params();
+ Base::_set_noalias(other);
+ }
+ /** \brief Copy constructor */
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix(const Matrix& other)
+ : Base(other.rows() * other.cols(), other.rows(), other.cols())
+ {
+ Base::_check_template_params();
+ Base::_set_noalias(other);
+ }
+ /** \brief Copy constructor with in-place evaluation */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix(const ReturnByValue<OtherDerived>& other)
+ {
+ Base::_check_template_params();
+ Base::resize(other.rows(), other.cols());
+ other.evalTo(*this);
+ }
+
+ /** \brief Copy constructor for generic expressions.
+ * \sa MatrixBase::operator=(const EigenBase<OtherDerived>&)
+ */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix(const EigenBase<OtherDerived> &other)
+ : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
+ {
+ Base::_check_template_params();
+ Base::_resize_to_match(other);
+ // FIXME/CHECK: isn't *this = other.derived() more efficient. it allows to
+ // go for pure _set() implementations, right?
+ *this = other;
+ }
+
+ /** \internal
+ * \brief Override MatrixBase::swap() since for dynamic-sized matrices
+ * of same type it is enough to swap the data pointers.
+ */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ void swap(MatrixBase<OtherDerived> const & other)
+ { this->_swap(other.derived()); }
+
+ EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
+ EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
+
+ /////////// Geometry module ///////////
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ explicit Matrix(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Matrix& operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
+
+ #ifdef EIGEN2_SUPPORT
+ template<typename OtherDerived>
+ explicit Matrix(const eigen2_RotationBase<OtherDerived,ColsAtCompileTime>& r);
+ template<typename OtherDerived>
+ Matrix& operator=(const eigen2_RotationBase<OtherDerived,ColsAtCompileTime>& r);
+ #endif
+
+ // allow to extend Matrix outside Eigen
+ #ifdef EIGEN_MATRIX_PLUGIN
+ #include EIGEN_MATRIX_PLUGIN
+ #endif
+
+ protected:
+ template <typename Derived, typename OtherDerived, bool IsVector>
+ friend struct internal::conservative_resize_like_impl;
+
+ using Base::m_storage;
+};
+
+/** \defgroup matrixtypedefs Global matrix typedefs
+ *
+ * \ingroup Core_Module
+ *
+ * Eigen defines several typedef shortcuts for most common matrix and vector types.
+ *
+ * The general patterns are the following:
+ *
+ * \c MatrixSizeType where \c Size can be \c 2,\c 3,\c 4 for fixed size square matrices or \c X for dynamic size,
+ * and where \c Type can be \c i for integer, \c f for float, \c d for double, \c cf for complex float, \c cd
+ * for complex double.
+ *
+ * For example, \c Matrix3d is a fixed-size 3x3 matrix type of doubles, and \c MatrixXf is a dynamic-size matrix of floats.
+ *
+ * There are also \c VectorSizeType and \c RowVectorSizeType which are self-explanatory. For example, \c Vector4cf is
+ * a fixed-size vector of 4 complex floats.
+ *
+ * \sa class Matrix
+ */
+
+#define EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \
+/** \ingroup matrixtypedefs */ \
+typedef Matrix<Type, Size, Size> Matrix##SizeSuffix##TypeSuffix; \
+/** \ingroup matrixtypedefs */ \
+typedef Matrix<Type, Size, 1> Vector##SizeSuffix##TypeSuffix; \
+/** \ingroup matrixtypedefs */ \
+typedef Matrix<Type, 1, Size> RowVector##SizeSuffix##TypeSuffix;
+
+#define EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \
+/** \ingroup matrixtypedefs */ \
+typedef Matrix<Type, Size, Dynamic> Matrix##Size##X##TypeSuffix; \
+/** \ingroup matrixtypedefs */ \
+typedef Matrix<Type, Dynamic, Size> Matrix##X##Size##TypeSuffix;
+
+#define EIGEN_MAKE_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 2, 2) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 3, 3) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 4, 4) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \
+EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \
+EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \
+EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 4)
+
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(int, i)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(float, f)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(double, d)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex<float>, cf)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)
+
+#undef EIGEN_MAKE_TYPEDEFS_ALL_SIZES
+#undef EIGEN_MAKE_TYPEDEFS
+#undef EIGEN_MAKE_FIXED_TYPEDEFS
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATRIX_H
diff --git a/third_party/eigen3/Eigen/src/Core/MatrixBase.h b/third_party/eigen3/Eigen/src/Core/MatrixBase.h
new file mode 100644
index 0000000000..598b38ed47
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/MatrixBase.h
@@ -0,0 +1,614 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATRIXBASE_H
+#define EIGEN_MATRIXBASE_H
+
+namespace Eigen {
+
+/** \class MatrixBase
+ * \ingroup Core_Module
+ *
+ * \brief Base class for all dense matrices, vectors, and expressions
+ *
+ * This class is the base that is inherited by all matrix, vector, and related expression
+ * types. Most of the Eigen API is contained in this class, and its base classes. Other important
+ * classes for the Eigen API are Matrix, and VectorwiseOp.
+ *
+ * Note that some methods are defined in other modules such as the \ref LU_Module LU module
+ * for all functions related to matrix inversions.
+ *
+ * \tparam Derived is the derived type, e.g. a matrix type, or an expression, etc.
+ *
+ * When writing a function taking Eigen objects as argument, if you want your function
+ * to take as argument any matrix, vector, or expression, just let it take a
+ * MatrixBase argument. As an example, here is a function printFirstRow which, given
+ * a matrix, vector, or expression \a x, prints the first row of \a x.
+ *
+ * \code
+ template<typename Derived>
+ void printFirstRow(const Eigen::MatrixBase<Derived>& x)
+ {
+ cout << x.row(0) << endl;
+ }
+ * \endcode
+ *
+ * This class can be extended with the help of the plugin mechanism described on the page
+ * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN.
+ *
+ * \sa \ref TopicClassHierarchy
+ */
+template<typename Derived> class MatrixBase
+ : public DenseBase<Derived>
+{
+ public:
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ typedef MatrixBase StorageBaseType;
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ typedef typename internal::traits<Derived>::Index Index;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+
+ typedef DenseBase<Derived> Base;
+ using Base::RowsAtCompileTime;
+ using Base::ColsAtCompileTime;
+ using Base::SizeAtCompileTime;
+ using Base::MaxRowsAtCompileTime;
+ using Base::MaxColsAtCompileTime;
+ using Base::MaxSizeAtCompileTime;
+ using Base::IsVectorAtCompileTime;
+ using Base::Flags;
+ using Base::CoeffReadCost;
+
+ using Base::derived;
+ using Base::const_cast_derived;
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::coeff;
+ using Base::coeffRef;
+ using Base::lazyAssign;
+ using Base::eval;
+ using Base::operator+=;
+ using Base::operator-=;
+ using Base::operator*=;
+ using Base::operator/=;
+
+ typedef typename Base::CoeffReturnType CoeffReturnType;
+ typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType;
+ typedef typename Base::RowXpr RowXpr;
+ typedef typename Base::ColXpr ColXpr;
+#endif // not EIGEN_PARSED_BY_DOXYGEN
+
+
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ /** type of the equivalent square matrix */
+ typedef Matrix<Scalar,EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime),
+ EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime)> SquareMatrixType;
+#endif // not EIGEN_PARSED_BY_DOXYGEN
+
+ /** \returns the size of the main diagonal, which is min(rows(),cols()).
+ * \sa rows(), cols(), SizeAtCompileTime. */
+ EIGEN_DEVICE_FUNC
+ inline Index diagonalSize() const { return (std::min)(rows(),cols()); }
+
+ /** \brief The plain matrix type corresponding to this expression.
+ *
+ * This is not necessarily exactly the return type of eval(). In the case of plain matrices,
+ * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed
+ * that the return type of eval() is either PlainObject or const PlainObject&.
+ */
+ typedef Matrix<typename internal::traits<Derived>::Scalar,
+ internal::traits<Derived>::RowsAtCompileTime,
+ internal::traits<Derived>::ColsAtCompileTime,
+ AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
+ internal::traits<Derived>::MaxRowsAtCompileTime,
+ internal::traits<Derived>::MaxColsAtCompileTime
+ > PlainObject;
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ /** \internal Represents a matrix with all coefficients equal to one another*/
+ typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
+ /** \internal the return type of MatrixBase::adjoint() */
+ typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+ CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
+ ConstTransposeReturnType
+ >::type AdjointReturnType;
+ /** \internal Return type of eigenvalues() */
+ typedef Matrix<std::complex<RealScalar>, internal::traits<Derived>::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType;
+ /** \internal the return type of identity */
+ typedef CwiseNullaryOp<internal::scalar_identity_op<Scalar>,Derived> IdentityReturnType;
+ /** \internal the return type of unit vectors */
+ typedef Block<const CwiseNullaryOp<internal::scalar_identity_op<Scalar>, SquareMatrixType>,
+ internal::traits<Derived>::RowsAtCompileTime,
+ internal::traits<Derived>::ColsAtCompileTime> BasisReturnType;
+#endif // not EIGEN_PARSED_BY_DOXYGEN
+
+#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase
+# include "../plugins/CommonCwiseUnaryOps.h"
+# include "../plugins/CommonCwiseBinaryOps.h"
+# include "../plugins/MatrixCwiseUnaryOps.h"
+# include "../plugins/MatrixCwiseBinaryOps.h"
+# ifdef EIGEN_MATRIXBASE_PLUGIN
+# include EIGEN_MATRIXBASE_PLUGIN
+# endif
+#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
+
+ /** Special case of the template operator=, in order to prevent the compiler
+ * from generating a default operator= (issue hit with g++ 4.1)
+ */
+ EIGEN_DEVICE_FUNC
+ Derived& operator=(const MatrixBase& other);
+
+ // We cannot inherit here via Base::operator= since it is causing
+ // trouble with MSVC.
+
+ template <typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator=(const DenseBase<OtherDerived>& other);
+
+ template <typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator=(const EigenBase<OtherDerived>& other);
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator=(const ReturnByValue<OtherDerived>& other);
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ template<typename ProductDerived, typename Lhs, typename Rhs>
+ EIGEN_DEVICE_FUNC
+ Derived& lazyAssign(const ProductBase<ProductDerived, Lhs,Rhs>& other);
+#endif // not EIGEN_PARSED_BY_DOXYGEN
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator+=(const MatrixBase<OtherDerived>& other);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator-=(const MatrixBase<OtherDerived>& other);
+
+#ifdef __CUDACC__
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ const typename LazyProductReturnType<Derived,OtherDerived>::Type
+ operator*(const MatrixBase<OtherDerived> &other) const
+ { return this->lazyProduct(other); }
+#else
+
+#ifdef EIGEN_TEST_EVALUATORS
+ template<typename OtherDerived>
+ const Product<Derived,OtherDerived>
+ operator*(const MatrixBase<OtherDerived> &other) const;
+#else
+ template<typename OtherDerived>
+ const typename ProductReturnType<Derived,OtherDerived>::Type
+ operator*(const MatrixBase<OtherDerived> &other) const;
+#endif
+
+#endif
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ const typename LazyProductReturnType<Derived,OtherDerived>::Type
+ lazyProduct(const MatrixBase<OtherDerived> &other) const;
+
+ template<typename OtherDerived>
+ Derived& operator*=(const EigenBase<OtherDerived>& other);
+
+ template<typename OtherDerived>
+ void applyOnTheLeft(const EigenBase<OtherDerived>& other);
+
+ template<typename OtherDerived>
+ void applyOnTheRight(const EigenBase<OtherDerived>& other);
+
+ template<typename DiagonalDerived>
+ EIGEN_DEVICE_FUNC
+ const DiagonalProduct<Derived, DiagonalDerived, OnTheRight>
+ operator*(const DiagonalBase<DiagonalDerived> &diagonal) const;
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
+ dot(const MatrixBase<OtherDerived>& other) const;
+
+ #ifdef EIGEN2_SUPPORT
+ template<typename OtherDerived>
+ Scalar eigen2_dot(const MatrixBase<OtherDerived>& other) const;
+ #endif
+
+ EIGEN_DEVICE_FUNC RealScalar squaredNorm() const;
+ EIGEN_DEVICE_FUNC RealScalar norm() const;
+ RealScalar stableNorm() const;
+ RealScalar blueNorm() const;
+ RealScalar hypotNorm() const;
+ EIGEN_DEVICE_FUNC const PlainObject normalized() const;
+ EIGEN_DEVICE_FUNC void normalize();
+
+ EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const;
+ EIGEN_DEVICE_FUNC void adjointInPlace();
+
+ typedef Diagonal<Derived> DiagonalReturnType;
+ EIGEN_DEVICE_FUNC
+ DiagonalReturnType diagonal();
+
+ typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType;
+ EIGEN_DEVICE_FUNC
+ ConstDiagonalReturnType diagonal() const;
+
+ template<int Index> struct DiagonalIndexReturnType { typedef Diagonal<Derived,Index> Type; };
+ template<int Index> struct ConstDiagonalIndexReturnType { typedef const Diagonal<const Derived,Index> Type; };
+
+ template<int Index>
+ EIGEN_DEVICE_FUNC
+ typename DiagonalIndexReturnType<Index>::Type diagonal();
+
+ template<int Index>
+ EIGEN_DEVICE_FUNC
+ typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
+
+ // Note: The "MatrixBase::" prefixes are added to help MSVC9 to match these declarations with the later implementations.
+ // On the other hand they confuse MSVC8...
+ #if EIGEN_COMP_MSVC >= 1500 // 2008 or later
+ typename MatrixBase::template DiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index);
+ typename MatrixBase::template ConstDiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index) const;
+ #else
+ EIGEN_DEVICE_FUNC
+ typename DiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index);
+
+ EIGEN_DEVICE_FUNC
+ typename ConstDiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index) const;
+ #endif
+
+ #ifdef EIGEN2_SUPPORT
+ template<unsigned int Mode> typename internal::eigen2_part_return_type<Derived, Mode>::type part();
+ template<unsigned int Mode> const typename internal::eigen2_part_return_type<Derived, Mode>::type part() const;
+
+ // huuuge hack. make Eigen2's matrix.part<Diagonal>() work in eigen3. Problem: Diagonal is now a class template instead
+ // of an integer constant. Solution: overload the part() method template wrt template parameters list.
+ template<template<typename T, int N> class U>
+ const DiagonalWrapper<ConstDiagonalReturnType> part() const
+ { return diagonal().asDiagonal(); }
+ #endif // EIGEN2_SUPPORT
+
+ template<unsigned int Mode> struct TriangularViewReturnType { typedef TriangularView<Derived, Mode> Type; };
+ template<unsigned int Mode> struct ConstTriangularViewReturnType { typedef const TriangularView<const Derived, Mode> Type; };
+
+ template<unsigned int Mode>
+ EIGEN_DEVICE_FUNC
+ typename TriangularViewReturnType<Mode>::Type triangularView();
+ template<unsigned int Mode>
+ EIGEN_DEVICE_FUNC
+ typename ConstTriangularViewReturnType<Mode>::Type triangularView() const;
+
+ template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SelfAdjointView<Derived, UpLo> Type; };
+ template<unsigned int UpLo> struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView<const Derived, UpLo> Type; };
+
+ template<unsigned int UpLo>
+ EIGEN_DEVICE_FUNC
+ typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
+ template<unsigned int UpLo>
+ EIGEN_DEVICE_FUNC
+ typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;
+
+ const SparseView<Derived> sparseView(const Scalar& m_reference = Scalar(0),
+ const typename NumTraits<Scalar>::Real& m_epsilon = NumTraits<Scalar>::dummy_precision()) const;
+ EIGEN_DEVICE_FUNC static const IdentityReturnType Identity();
+ EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(Index rows, Index cols);
+ EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index size, Index i);
+ EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index i);
+ EIGEN_DEVICE_FUNC static const BasisReturnType UnitX();
+ EIGEN_DEVICE_FUNC static const BasisReturnType UnitY();
+ EIGEN_DEVICE_FUNC static const BasisReturnType UnitZ();
+ EIGEN_DEVICE_FUNC static const BasisReturnType UnitW();
+
+ EIGEN_DEVICE_FUNC
+ const DiagonalWrapper<const Derived> asDiagonal() const;
+ const PermutationWrapper<const Derived> asPermutation() const;
+
+ EIGEN_DEVICE_FUNC
+ Derived& setIdentity();
+ EIGEN_DEVICE_FUNC
+ Derived& setIdentity(Index rows, Index cols);
+
+ bool isIdentity(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ bool isDiagonal(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+
+ bool isUpperTriangular(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ bool isLowerTriangular(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+
+ template<typename OtherDerived>
+ bool isOrthogonal(const MatrixBase<OtherDerived>& other,
+ const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ bool isUnitary(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+
+ /** \returns true if each coefficients of \c *this and \a other are all exactly equal.
+ * \warning When using floating point scalar values you probably should rather use a
+ * fuzzy comparison such as isApprox()
+ * \sa isApprox(), operator!= */
+ template<typename OtherDerived>
+ inline bool operator==(const MatrixBase<OtherDerived>& other) const
+ { return cwiseEqual(other).all(); }
+
+ /** \returns true if at least one pair of coefficients of \c *this and \a other are not exactly equal to each other.
+ * \warning When using floating point scalar values you probably should rather use a
+ * fuzzy comparison such as isApprox()
+ * \sa isApprox(), operator== */
+ template<typename OtherDerived>
+ inline bool operator!=(const MatrixBase<OtherDerived>& other) const
+ { return cwiseNotEqual(other).any(); }
+
+ NoAlias<Derived,Eigen::MatrixBase > noalias();
+
+ inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
+ inline ForceAlignedAccess<Derived> forceAlignedAccess();
+ template<bool Enable> inline typename internal::add_const_on_value_type<typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type>::type forceAlignedAccessIf() const;
+ template<bool Enable> inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
+
+ Scalar trace() const;
+
+ template<int p> EIGEN_DEVICE_FUNC RealScalar lpNorm() const;
+
+ EIGEN_DEVICE_FUNC MatrixBase<Derived>& matrix() { return *this; }
+ EIGEN_DEVICE_FUNC const MatrixBase<Derived>& matrix() const { return *this; }
+
+ /** \returns an \link Eigen::ArrayBase Array \endlink expression of this matrix
+ * \sa ArrayBase::matrix() */
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper<Derived> array() { return derived(); }
+ /** \returns a const \link Eigen::ArrayBase Array \endlink expression of this matrix
+ * \sa ArrayBase::matrix() */
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper<const Derived> array() const { return derived(); }
+
+/////////// LU module ///////////
+
+ EIGEN_DEVICE_FUNC const FullPivLU<PlainObject> fullPivLu() const;
+ EIGEN_DEVICE_FUNC const PartialPivLU<PlainObject> partialPivLu() const;
+
+ #if EIGEN2_SUPPORT_STAGE < STAGE20_RESOLVE_API_CONFLICTS
+ const LU<PlainObject> lu() const;
+ #endif
+
+ #ifdef EIGEN2_SUPPORT
+ const LU<PlainObject> eigen2_lu() const;
+ #endif
+
+ #if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
+ const PartialPivLU<PlainObject> lu() const;
+ #endif
+
+ #ifdef EIGEN2_SUPPORT
+ template<typename ResultType>
+ void computeInverse(MatrixBase<ResultType> *result) const {
+ *result = this->inverse();
+ }
+ #endif
+
+ EIGEN_DEVICE_FUNC
+ const internal::inverse_impl<Derived> inverse() const;
+ template<typename ResultType>
+ void computeInverseAndDetWithCheck(
+ ResultType& inverse,
+ typename ResultType::Scalar& determinant,
+ bool& invertible,
+ const RealScalar& absDeterminantThreshold = NumTraits<Scalar>::dummy_precision()
+ ) const;
+ template<typename ResultType>
+ void computeInverseWithCheck(
+ ResultType& inverse,
+ bool& invertible,
+ const RealScalar& absDeterminantThreshold = NumTraits<Scalar>::dummy_precision()
+ ) const;
+ Scalar determinant() const;
+
+/////////// Cholesky module ///////////
+
+ const LLT<PlainObject> llt() const;
+ const LDLT<PlainObject> ldlt() const;
+
+/////////// QR module ///////////
+
+ const HouseholderQR<PlainObject> householderQr() const;
+ const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const;
+ const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const;
+
+ #ifdef EIGEN2_SUPPORT
+ const QR<PlainObject> qr() const;
+ #endif
+
+ EigenvaluesReturnType eigenvalues() const;
+ RealScalar operatorNorm() const;
+
+/////////// SVD module ///////////
+
+ JacobiSVD<PlainObject> jacobiSvd(unsigned int computationOptions = 0) const;
+
+ #ifdef EIGEN2_SUPPORT
+ SVD<PlainObject> svd() const;
+ #endif
+
+/////////// Geometry module ///////////
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ /// \internal helper struct to form the return type of the cross product
+ template<typename OtherDerived> struct cross_product_return_type {
+ typedef typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType Scalar;
+ typedef Matrix<Scalar,MatrixBase::RowsAtCompileTime,MatrixBase::ColsAtCompileTime> type;
+ };
+ #endif // EIGEN_PARSED_BY_DOXYGEN
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ typename cross_product_return_type<OtherDerived>::type
+ cross(const MatrixBase<OtherDerived>& other) const;
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ PlainObject cross3(const MatrixBase<OtherDerived>& other) const;
+
+ EIGEN_DEVICE_FUNC
+ PlainObject unitOrthogonal(void) const;
+
+ Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
+
+ #if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
+ ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
+ // put this as separate enum value to work around possible GCC 4.3 bug (?)
+ enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1?Vertical:Horizontal };
+ typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
+ HomogeneousReturnType homogeneous() const;
+ #endif
+
+ enum {
+ SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1
+ };
+ typedef Block<const Derived,
+ internal::traits<Derived>::ColsAtCompileTime==1 ? SizeMinusOne : 1,
+ internal::traits<Derived>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne;
+ typedef CwiseUnaryOp<internal::scalar_quotient1_op<typename internal::traits<Derived>::Scalar>,
+ const ConstStartMinusOne > HNormalizedReturnType;
+
+ const HNormalizedReturnType hnormalized() const;
+
+////////// Householder module ///////////
+
+ void makeHouseholderInPlace(Scalar& tau, RealScalar& beta);
+ template<typename EssentialPart>
+ void makeHouseholder(EssentialPart& essential,
+ Scalar& tau, RealScalar& beta) const;
+ template<typename EssentialPart>
+ void applyHouseholderOnTheLeft(const EssentialPart& essential,
+ const Scalar& tau,
+ Scalar* workspace);
+ template<typename EssentialPart>
+ void applyHouseholderOnTheRight(const EssentialPart& essential,
+ const Scalar& tau,
+ Scalar* workspace);
+
+///////// Jacobi module /////////
+
+ template<typename OtherScalar>
+ void applyOnTheLeft(Index p, Index q, const JacobiRotation<OtherScalar>& j);
+ template<typename OtherScalar>
+ void applyOnTheRight(Index p, Index q, const JacobiRotation<OtherScalar>& j);
+
+///////// MatrixFunctions module /////////
+
+ typedef typename internal::stem_function<Scalar>::type StemFunction;
+ const MatrixExponentialReturnValue<Derived> exp() const;
+ const MatrixFunctionReturnValue<Derived> matrixFunction(StemFunction f) const;
+ const MatrixFunctionReturnValue<Derived> cosh() const;
+ const MatrixFunctionReturnValue<Derived> sinh() const;
+ const MatrixFunctionReturnValue<Derived> cos() const;
+ const MatrixFunctionReturnValue<Derived> sin() const;
+ const MatrixSquareRootReturnValue<Derived> sqrt() const;
+ const MatrixLogarithmReturnValue<Derived> log() const;
+ const MatrixPowerReturnValue<Derived> pow(const RealScalar& p) const;
+ const MatrixComplexPowerReturnValue<Derived> pow(const std::complex<RealScalar>& p) const;
+
+#ifdef EIGEN2_SUPPORT
+ template<typename ProductDerived, typename Lhs, typename Rhs>
+ Derived& operator+=(const Flagged<ProductBase<ProductDerived, Lhs,Rhs>, 0,
+ EvalBeforeAssigningBit>& other);
+
+ template<typename ProductDerived, typename Lhs, typename Rhs>
+ Derived& operator-=(const Flagged<ProductBase<ProductDerived, Lhs,Rhs>, 0,
+ EvalBeforeAssigningBit>& other);
+
+ /** \deprecated because .lazy() is deprecated
+ * Overloaded for cache friendly product evaluation */
+ template<typename OtherDerived>
+ Derived& lazyAssign(const Flagged<OtherDerived, 0, EvalBeforeAssigningBit>& other)
+ { return lazyAssign(other._expression()); }
+
+ template<unsigned int Added>
+ const Flagged<Derived, Added, 0> marked() const;
+ const Flagged<Derived, 0, EvalBeforeAssigningBit> lazy() const;
+
+ inline const Cwise<Derived> cwise() const;
+ inline Cwise<Derived> cwise();
+
+ VectorBlock<Derived> start(Index size);
+ const VectorBlock<const Derived> start(Index size) const;
+ VectorBlock<Derived> end(Index size);
+ const VectorBlock<const Derived> end(Index size) const;
+ template<int Size> VectorBlock<Derived,Size> start();
+ template<int Size> const VectorBlock<const Derived,Size> start() const;
+ template<int Size> VectorBlock<Derived,Size> end();
+ template<int Size> const VectorBlock<const Derived,Size> end() const;
+
+ Minor<Derived> minor(Index row, Index col);
+ const Minor<Derived> minor(Index row, Index col) const;
+#endif
+
+ protected:
+ EIGEN_DEVICE_FUNC MatrixBase() : Base() {}
+
+ private:
+ EIGEN_DEVICE_FUNC explicit MatrixBase(int);
+ EIGEN_DEVICE_FUNC MatrixBase(int,int);
+ template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit MatrixBase(const MatrixBase<OtherDerived>&);
+ protected:
+ // mixing arrays and matrices is not legal
+ template<typename OtherDerived> Derived& operator+=(const ArrayBase<OtherDerived>& )
+ {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
+ // mixing arrays and matrices is not legal
+ template<typename OtherDerived> Derived& operator-=(const ArrayBase<OtherDerived>& )
+ {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
+};
+
+
+/***************************************************************************
+* Implementation of matrix base methods
+***************************************************************************/
+
+/** replaces \c *this by \c *this * \a other.
+ *
+ * \returns a reference to \c *this
+ *
+ * Example: \include MatrixBase_applyOnTheRight.cpp
+ * Output: \verbinclude MatrixBase_applyOnTheRight.out
+ */
+template<typename Derived>
+template<typename OtherDerived>
+inline Derived&
+MatrixBase<Derived>::operator*=(const EigenBase<OtherDerived> &other)
+{
+ other.derived().applyThisOnTheRight(derived());
+ return derived();
+}
+
+/** replaces \c *this by \c *this * \a other. It is equivalent to MatrixBase::operator*=().
+ *
+ * Example: \include MatrixBase_applyOnTheRight.cpp
+ * Output: \verbinclude MatrixBase_applyOnTheRight.out
+ */
+template<typename Derived>
+template<typename OtherDerived>
+inline void MatrixBase<Derived>::applyOnTheRight(const EigenBase<OtherDerived> &other)
+{
+ other.derived().applyThisOnTheRight(derived());
+}
+
+/** replaces \c *this by \a other * \c *this.
+ *
+ * Example: \include MatrixBase_applyOnTheLeft.cpp
+ * Output: \verbinclude MatrixBase_applyOnTheLeft.out
+ */
+template<typename Derived>
+template<typename OtherDerived>
+inline void MatrixBase<Derived>::applyOnTheLeft(const EigenBase<OtherDerived> &other)
+{
+ other.derived().applyThisOnTheLeft(derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATRIXBASE_H
diff --git a/third_party/eigen3/Eigen/src/Core/NestByValue.h b/third_party/eigen3/Eigen/src/Core/NestByValue.h
new file mode 100644
index 0000000000..1944bd7858
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/NestByValue.h
@@ -0,0 +1,112 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_NESTBYVALUE_H
+#define EIGEN_NESTBYVALUE_H
+
+namespace Eigen {
+
+/** \class NestByValue
+ * \ingroup Core_Module
+ *
+ * \brief Expression which must be nested by value
+ *
+ * \param ExpressionType the type of the object of which we are requiring nesting-by-value
+ *
+ * This class is the return type of MatrixBase::nestByValue()
+ * and most of the time this is the only way it is used.
+ *
+ * \sa MatrixBase::nestByValue()
+ */
+
+namespace internal {
+template <typename ExpressionType>
+struct traits<NestByValue<ExpressionType> > : public traits<ExpressionType> {
+ enum { Flags = traits<ExpressionType>::Flags & ~NestByRefBit };
+};
+}
+
+template<typename ExpressionType> class NestByValue
+ : public internal::dense_xpr_base< NestByValue<ExpressionType> >::type
+{
+ public:
+
+ typedef typename internal::dense_xpr_base<NestByValue>::type Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue)
+
+ inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {}
+
+ inline Index rows() const { return m_expression.rows(); }
+ inline Index cols() const { return m_expression.cols(); }
+ inline Index outerStride() const { return m_expression.outerStride(); }
+ inline Index innerStride() const { return m_expression.innerStride(); }
+
+ inline const CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_expression.coeff(row, col);
+ }
+
+ inline Scalar& coeffRef(Index row, Index col)
+ {
+ return m_expression.const_cast_derived().coeffRef(row, col);
+ }
+
+ inline const CoeffReturnType coeff(Index index) const
+ {
+ return m_expression.coeff(index);
+ }
+
+ inline Scalar& coeffRef(Index index)
+ {
+ return m_expression.const_cast_derived().coeffRef(index);
+ }
+
+ template<int LoadMode>
+ inline const PacketScalar packet(Index row, Index col) const
+ {
+ return m_expression.template packet<LoadMode>(row, col);
+ }
+
+ template<int LoadMode>
+ inline void writePacket(Index row, Index col, const PacketScalar& x)
+ {
+ m_expression.const_cast_derived().template writePacket<LoadMode>(row, col, x);
+ }
+
+ template<int LoadMode>
+ inline const PacketScalar packet(Index index) const
+ {
+ return m_expression.template packet<LoadMode>(index);
+ }
+
+ template<int LoadMode>
+ inline void writePacket(Index index, const PacketScalar& x)
+ {
+ m_expression.const_cast_derived().template writePacket<LoadMode>(index, x);
+ }
+
+ operator const ExpressionType&() const { return m_expression; }
+
+ protected:
+ const ExpressionType m_expression;
+};
+
+/** \returns an expression of the temporary version of *this.
+ */
+template<typename Derived>
+inline const NestByValue<Derived>
+DenseBase<Derived>::nestByValue() const
+{
+ return NestByValue<Derived>(derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_NESTBYVALUE_H
diff --git a/third_party/eigen3/Eigen/src/Core/NoAlias.h b/third_party/eigen3/Eigen/src/Core/NoAlias.h
new file mode 100644
index 0000000000..0a1c327433
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/NoAlias.h
@@ -0,0 +1,141 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_NOALIAS_H
+#define EIGEN_NOALIAS_H
+
+namespace Eigen {
+
+/** \class NoAlias
+ * \ingroup Core_Module
+ *
+ * \brief Pseudo expression providing an operator = assuming no aliasing
+ *
+ * \param ExpressionType the type of the object on which to do the lazy assignment
+ *
+ * This class represents an expression with special assignment operators
+ * assuming no aliasing between the target expression and the source expression.
+ * More precisely it alloas to bypass the EvalBeforeAssignBit flag of the source expression.
+ * It is the return type of MatrixBase::noalias()
+ * and most of the time this is the only way it is used.
+ *
+ * \sa MatrixBase::noalias()
+ */
+template<typename ExpressionType, template <typename> class StorageBase>
+class NoAlias
+{
+ typedef typename ExpressionType::Scalar Scalar;
+ public:
+ NoAlias(ExpressionType& expression) : m_expression(expression) {}
+
+ /** Behaves like MatrixBase::lazyAssign(other)
+ * \sa MatrixBase::lazyAssign() */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other)
+ { return internal::assign_selector<ExpressionType,OtherDerived,false>::run(m_expression,other.derived()); }
+
+ /** \sa MatrixBase::operator+= */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other)
+ {
+ typedef SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
+ SelfAdder tmp(m_expression);
+ typedef typename internal::nested<OtherDerived>::type OtherDerivedNested;
+ typedef typename internal::remove_all<OtherDerivedNested>::type _OtherDerivedNested;
+ internal::assign_selector<SelfAdder,_OtherDerivedNested,false>::run(tmp,OtherDerivedNested(other.derived()));
+ return m_expression;
+ }
+
+ /** \sa MatrixBase::operator-= */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other)
+ {
+ typedef SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
+ SelfAdder tmp(m_expression);
+ typedef typename internal::nested<OtherDerived>::type OtherDerivedNested;
+ typedef typename internal::remove_all<OtherDerivedNested>::type _OtherDerivedNested;
+ internal::assign_selector<SelfAdder,_OtherDerivedNested,false>::run(tmp,OtherDerivedNested(other.derived()));
+ return m_expression;
+ }
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ template<typename ProductDerived, typename Lhs, typename Rhs>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE ExpressionType& operator+=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
+ { other.derived().addTo(m_expression); return m_expression; }
+
+ template<typename ProductDerived, typename Lhs, typename Rhs>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE ExpressionType& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
+ { other.derived().subTo(m_expression); return m_expression; }
+
+ template<typename Lhs, typename Rhs, int NestingFlags>
+ EIGEN_STRONG_INLINE ExpressionType& operator+=(const CoeffBasedProduct<Lhs,Rhs,NestingFlags>& other)
+ { return m_expression.derived() += CoeffBasedProduct<Lhs,Rhs,NestByRefBit>(other.lhs(), other.rhs()); }
+
+ template<typename Lhs, typename Rhs, int NestingFlags>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE ExpressionType& operator-=(const CoeffBasedProduct<Lhs,Rhs,NestingFlags>& other)
+ { return m_expression.derived() -= CoeffBasedProduct<Lhs,Rhs,NestByRefBit>(other.lhs(), other.rhs()); }
+
+ template<typename OtherDerived>
+ ExpressionType& operator=(const ReturnByValue<OtherDerived>& func)
+ { return m_expression = func; }
+#endif
+
+ EIGEN_DEVICE_FUNC
+ ExpressionType& expression() const
+ {
+ return m_expression;
+ }
+
+ protected:
+ ExpressionType& m_expression;
+};
+
+/** \returns a pseudo expression of \c *this with an operator= assuming
+ * no aliasing between \c *this and the source expression.
+ *
+ * More precisely, noalias() allows to bypass the EvalBeforeAssignBit flag.
+ * Currently, even though several expressions may alias, only product
+ * expressions have this flag. Therefore, noalias() is only usefull when
+ * the source expression contains a matrix product.
+ *
+ * Here are some examples where noalias is usefull:
+ * \code
+ * D.noalias() = A * B;
+ * D.noalias() += A.transpose() * B;
+ * D.noalias() -= 2 * A * B.adjoint();
+ * \endcode
+ *
+ * On the other hand the following example will lead to a \b wrong result:
+ * \code
+ * A.noalias() = A * B;
+ * \endcode
+ * because the result matrix A is also an operand of the matrix product. Therefore,
+ * there is no alternative than evaluating A * B in a temporary, that is the default
+ * behavior when you write:
+ * \code
+ * A = A * B;
+ * \endcode
+ *
+ * \sa class NoAlias
+ */
+template<typename Derived>
+NoAlias<Derived,MatrixBase> MatrixBase<Derived>::noalias()
+{
+ return derived();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_NOALIAS_H
diff --git a/third_party/eigen3/Eigen/src/Core/NumTraits.h b/third_party/eigen3/Eigen/src/Core/NumTraits.h
new file mode 100644
index 0000000000..dee9159517
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/NumTraits.h
@@ -0,0 +1,177 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_NUMTRAITS_H
+#define EIGEN_NUMTRAITS_H
+
+namespace Eigen {
+
+/** \class NumTraits
+ * \ingroup Core_Module
+ *
+ * \brief Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
+ *
+ * \param T the numeric type at hand
+ *
+ * This class stores enums, typedefs and static methods giving information about a numeric type.
+ *
+ * The provided data consists of:
+ * \li A typedef \a Real, giving the "real part" type of \a T. If \a T is already real,
+ * then \a Real is just a typedef to \a T. If \a T is \c std::complex<U> then \a Real
+ * is a typedef to \a U.
+ * \li A typedef \a NonInteger, giving the type that should be used for operations producing non-integral values,
+ * such as quotients, square roots, etc. If \a T is a floating-point type, then this typedef just gives
+ * \a T again. Note however that many Eigen functions such as internal::sqrt simply refuse to
+ * take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is
+ * only intended as a helper for code that needs to explicitly promote types.
+ * \li A typedef \a Nested giving the type to use to nest a value inside of the expression tree. If you don't know what
+ * this means, just use \a T here.
+ * \li An enum value \a IsComplex. It is equal to 1 if \a T is a \c std::complex
+ * type, and to 0 otherwise.
+ * \li An enum value \a IsInteger. It is equal to \c 1 if \a T is an integer type such as \c int,
+ * and to \c 0 otherwise.
+ * \li Enum values ReadCost, AddCost and MulCost representing a rough estimate of the number of CPU cycles needed
+ * to by move / add / mul instructions respectively, assuming the data is already stored in CPU registers.
+ * Stay vague here. No need to do architecture-specific stuff.
+ * \li An enum value \a IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned.
+ * \li An enum value \a RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must
+ * be called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 otherwise.
+ * \li An epsilon() function which, unlike std::numeric_limits::epsilon(), returns a \a Real instead of a \a T.
+ * \li A dummy_precision() function returning a weak epsilon value. It is mainly used as a default
+ * value by the fuzzy comparison operators.
+ * \li highest() and lowest() functions returning the highest and lowest possible values respectively.
+ */
+
+template<typename T> struct GenericNumTraits
+{
+ enum {
+ IsInteger = std::numeric_limits<T>::is_integer,
+ IsSigned = std::numeric_limits<T>::is_signed,
+ IsComplex = 0,
+ RequireInitialization = internal::is_arithmetic<T>::value ? 0 : 1,
+ ReadCost = 1,
+ AddCost = 1,
+ MulCost = 1
+ };
+
+ typedef T Real;
+ typedef typename internal::conditional<
+ IsInteger,
+ typename internal::conditional<sizeof(T)<=2, float, double>::type,
+ T
+ >::type NonInteger;
+ typedef T Nested;
+
+ EIGEN_DEVICE_FUNC
+ static inline Real epsilon()
+ {
+#if defined(__CUDA_ARCH__) && !defined(__GCUDACC__)
+ return internal::device::numeric_limits<T>::epsilon();
+#else
+ return std::numeric_limits<T>::epsilon();
+#endif
+ }
+ EIGEN_DEVICE_FUNC
+ static inline Real dummy_precision()
+ {
+ // make sure to override this for floating-point types
+ return Real(0);
+ }
+
+ EIGEN_DEVICE_FUNC
+ static inline T highest() {
+#if defined(__CUDA_ARCH__) && !defined(__GCUDACC__)
+ return internal::device::numeric_limits<T>::max();
+#else
+ return (std::numeric_limits<T>::max)();
+#endif
+ }
+
+ EIGEN_DEVICE_FUNC
+ static inline T lowest() {
+#if defined(__CUDA_ARCH__) && !defined(__GCUDACC__)
+ return internal::device::numeric_limits<T>::lowest();
+#else
+ return IsInteger ? (std::numeric_limits<T>::min)() : (-(std::numeric_limits<T>::max)());
+#endif
+ }
+
+#ifdef EIGEN2_SUPPORT
+ enum {
+ HasFloatingPoint = !IsInteger
+ };
+ typedef NonInteger FloatingPoint;
+#endif
+};
+
+template<typename T> struct NumTraits : GenericNumTraits<T>
+{};
+
+template<> struct NumTraits<float>
+ : GenericNumTraits<float>
+{
+ EIGEN_DEVICE_FUNC
+ static inline float dummy_precision() { return 1e-5f; }
+};
+
+template<> struct NumTraits<double> : GenericNumTraits<double>
+{
+ EIGEN_DEVICE_FUNC
+ static inline double dummy_precision() { return 1e-12; }
+};
+
+template<> struct NumTraits<long double>
+ : GenericNumTraits<long double>
+{
+ static inline long double dummy_precision() { return 1e-15l; }
+};
+
+template<typename _Real> struct NumTraits<std::complex<_Real> >
+ : GenericNumTraits<std::complex<_Real> >
+{
+ typedef _Real Real;
+ enum {
+ IsComplex = 1,
+ RequireInitialization = NumTraits<_Real>::RequireInitialization,
+ ReadCost = 2 * NumTraits<_Real>::ReadCost,
+ AddCost = 2 * NumTraits<Real>::AddCost,
+ MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
+ };
+
+ static inline Real epsilon() { return NumTraits<Real>::epsilon(); }
+ static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
+};
+
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
+{
+ typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> ArrayType;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ typedef Array<RealScalar, Rows, Cols, Options, MaxRows, MaxCols> Real;
+ typedef typename NumTraits<Scalar>::NonInteger NonIntegerScalar;
+ typedef Array<NonIntegerScalar, Rows, Cols, Options, MaxRows, MaxCols> NonInteger;
+ typedef ArrayType & Nested;
+
+ enum {
+ IsComplex = NumTraits<Scalar>::IsComplex,
+ IsInteger = NumTraits<Scalar>::IsInteger,
+ IsSigned = NumTraits<Scalar>::IsSigned,
+ RequireInitialization = 1,
+ ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::ReadCost,
+ AddCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost,
+ MulCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost
+ };
+
+ static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
+ static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_NUMTRAITS_H
diff --git a/third_party/eigen3/Eigen/src/Core/PermutationMatrix.h b/third_party/eigen3/Eigen/src/Core/PermutationMatrix.h
new file mode 100644
index 0000000000..1297b8413f
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/PermutationMatrix.h
@@ -0,0 +1,689 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2009-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PERMUTATIONMATRIX_H
+#define EIGEN_PERMUTATIONMATRIX_H
+
+namespace Eigen {
+
+template<int RowCol,typename IndicesType,typename MatrixType, typename StorageKind> class PermutedImpl;
+
+/** \class PermutationBase
+ * \ingroup Core_Module
+ *
+ * \brief Base class for permutations
+ *
+ * \param Derived the derived class
+ *
+ * This class is the base class for all expressions representing a permutation matrix,
+ * internally stored as a vector of integers.
+ * The convention followed here is that if \f$ \sigma \f$ is a permutation, the corresponding permutation matrix
+ * \f$ P_\sigma \f$ is such that if \f$ (e_1,\ldots,e_p) \f$ is the canonical basis, we have:
+ * \f[ P_\sigma(e_i) = e_{\sigma(i)}. \f]
+ * This convention ensures that for any two permutations \f$ \sigma, \tau \f$, we have:
+ * \f[ P_{\sigma\circ\tau} = P_\sigma P_\tau. \f]
+ *
+ * Permutation matrices are square and invertible.
+ *
+ * Notice that in addition to the member functions and operators listed here, there also are non-member
+ * operator* to multiply any kind of permutation object with any kind of matrix expression (MatrixBase)
+ * on either side.
+ *
+ * \sa class PermutationMatrix, class PermutationWrapper
+ */
+
+namespace internal {
+
+template<typename PermutationType, typename MatrixType, int Side, bool Transposed=false>
+struct permut_matrix_product_retval;
+template<typename PermutationType, typename MatrixType, int Side, bool Transposed=false>
+struct permut_sparsematrix_product_retval;
+enum PermPermProduct_t {PermPermProduct};
+
+} // end namespace internal
+
+template<typename Derived>
+class PermutationBase : public EigenBase<Derived>
+{
+ typedef internal::traits<Derived> Traits;
+ typedef EigenBase<Derived> Base;
+ public:
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ typedef typename Traits::IndicesType IndicesType;
+ enum {
+ Flags = Traits::Flags,
+ CoeffReadCost = Traits::CoeffReadCost,
+ RowsAtCompileTime = Traits::RowsAtCompileTime,
+ ColsAtCompileTime = Traits::ColsAtCompileTime,
+ MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = Traits::MaxColsAtCompileTime
+ };
+ typedef typename Traits::Scalar Scalar;
+ typedef typename Traits::Index Index;
+ typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime,0,MaxRowsAtCompileTime,MaxColsAtCompileTime>
+ DenseMatrixType;
+ typedef PermutationMatrix<IndicesType::SizeAtCompileTime,IndicesType::MaxSizeAtCompileTime,Index>
+ PlainPermutationType;
+ using Base::derived;
+ #endif
+
+ /** Copies the other permutation into *this */
+ template<typename OtherDerived>
+ Derived& operator=(const PermutationBase<OtherDerived>& other)
+ {
+ indices() = other.indices();
+ return derived();
+ }
+
+ /** Assignment from the Transpositions \a tr */
+ template<typename OtherDerived>
+ Derived& operator=(const TranspositionsBase<OtherDerived>& tr)
+ {
+ setIdentity(tr.size());
+ for(Index k=size()-1; k>=0; --k)
+ applyTranspositionOnTheRight(k,tr.coeff(k));
+ return derived();
+ }
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ /** This is a special case of the templated operator=. Its purpose is to
+ * prevent a default operator= from hiding the templated operator=.
+ */
+ Derived& operator=(const PermutationBase& other)
+ {
+ indices() = other.indices();
+ return derived();
+ }
+ #endif
+
+ /** \returns the number of rows */
+ inline Index rows() const { return Index(indices().size()); }
+
+ /** \returns the number of columns */
+ inline Index cols() const { return Index(indices().size()); }
+
+ /** \returns the size of a side of the respective square matrix, i.e., the number of indices */
+ inline Index size() const { return Index(indices().size()); }
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ template<typename DenseDerived>
+ void evalTo(MatrixBase<DenseDerived>& other) const
+ {
+ other.setZero();
+ for (int i=0; i<rows();++i)
+ other.coeffRef(indices().coeff(i),i) = typename DenseDerived::Scalar(1);
+ }
+ #endif
+
+ /** \returns a Matrix object initialized from this permutation matrix. Notice that it
+ * is inefficient to return this Matrix object by value. For efficiency, favor using
+ * the Matrix constructor taking EigenBase objects.
+ */
+ DenseMatrixType toDenseMatrix() const
+ {
+ return derived();
+ }
+
+ /** const version of indices(). */
+ const IndicesType& indices() const { return derived().indices(); }
+ /** \returns a reference to the stored array representing the permutation. */
+ IndicesType& indices() { return derived().indices(); }
+
+ /** Resizes to given size.
+ */
+ inline void resize(Index newSize)
+ {
+ indices().resize(newSize);
+ }
+
+ /** Sets *this to be the identity permutation matrix */
+ void setIdentity()
+ {
+ for(Index i = 0; i < size(); ++i)
+ indices().coeffRef(i) = i;
+ }
+
+ /** Sets *this to be the identity permutation matrix of given size.
+ */
+ void setIdentity(Index newSize)
+ {
+ resize(newSize);
+ setIdentity();
+ }
+
+ /** Multiplies *this by the transposition \f$(ij)\f$ on the left.
+ *
+ * \returns a reference to *this.
+ *
+ * \warning This is much slower than applyTranspositionOnTheRight(int,int):
+ * this has linear complexity and requires a lot of branching.
+ *
+ * \sa applyTranspositionOnTheRight(int,int)
+ */
+ Derived& applyTranspositionOnTheLeft(Index i, Index j)
+ {
+ eigen_assert(i>=0 && j>=0 && i<size() && j<size());
+ for(Index k = 0; k < size(); ++k)
+ {
+ if(indices().coeff(k) == i) indices().coeffRef(k) = j;
+ else if(indices().coeff(k) == j) indices().coeffRef(k) = i;
+ }
+ return derived();
+ }
+
+ /** Multiplies *this by the transposition \f$(ij)\f$ on the right.
+ *
+ * \returns a reference to *this.
+ *
+ * This is a fast operation, it only consists in swapping two indices.
+ *
+ * \sa applyTranspositionOnTheLeft(int,int)
+ */
+ Derived& applyTranspositionOnTheRight(Index i, Index j)
+ {
+ eigen_assert(i>=0 && j>=0 && i<size() && j<size());
+ std::swap(indices().coeffRef(i), indices().coeffRef(j));
+ return derived();
+ }
+
+ /** \returns the inverse permutation matrix.
+ *
+ * \note \note_try_to_help_rvo
+ */
+ inline Transpose<PermutationBase> inverse() const
+ { return derived(); }
+ /** \returns the tranpose permutation matrix.
+ *
+ * \note \note_try_to_help_rvo
+ */
+ inline Transpose<PermutationBase> transpose() const
+ { return derived(); }
+
+ /**** multiplication helpers to hopefully get RVO ****/
+
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ protected:
+ template<typename OtherDerived>
+ void assignTranspose(const PermutationBase<OtherDerived>& other)
+ {
+ for (int i=0; i<rows();++i) indices().coeffRef(other.indices().coeff(i)) = i;
+ }
+ template<typename Lhs,typename Rhs>
+ void assignProduct(const Lhs& lhs, const Rhs& rhs)
+ {
+ eigen_assert(lhs.cols() == rhs.rows());
+ for (int i=0; i<rows();++i) indices().coeffRef(i) = lhs.indices().coeff(rhs.indices().coeff(i));
+ }
+#endif
+
+ public:
+
+ /** \returns the product permutation matrix.
+ *
+ * \note \note_try_to_help_rvo
+ */
+ template<typename Other>
+ inline PlainPermutationType operator*(const PermutationBase<Other>& other) const
+ { return PlainPermutationType(internal::PermPermProduct, derived(), other.derived()); }
+
+ /** \returns the product of a permutation with another inverse permutation.
+ *
+ * \note \note_try_to_help_rvo
+ */
+ template<typename Other>
+ inline PlainPermutationType operator*(const Transpose<PermutationBase<Other> >& other) const
+ { return PlainPermutationType(internal::PermPermProduct, *this, other.eval()); }
+
+ /** \returns the product of an inverse permutation with another permutation.
+ *
+ * \note \note_try_to_help_rvo
+ */
+ template<typename Other> friend
+ inline PlainPermutationType operator*(const Transpose<PermutationBase<Other> >& other, const PermutationBase& perm)
+ { return PlainPermutationType(internal::PermPermProduct, other.eval(), perm); }
+
+ protected:
+
+};
+
+/** \class PermutationMatrix
+ * \ingroup Core_Module
+ *
+ * \brief Permutation matrix
+ *
+ * \param SizeAtCompileTime the number of rows/cols, or Dynamic
+ * \param MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
+ * \param IndexType the interger type of the indices
+ *
+ * This class represents a permutation matrix, internally stored as a vector of integers.
+ *
+ * \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix
+ */
+
+namespace internal {
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
+struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType> >
+ : traits<Matrix<IndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
+{
+ typedef IndexType Index;
+ typedef Matrix<IndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
+};
+}
+
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
+class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType> >
+{
+ typedef PermutationBase<PermutationMatrix> Base;
+ typedef internal::traits<PermutationMatrix> Traits;
+ public:
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ typedef typename Traits::IndicesType IndicesType;
+ #endif
+
+ inline PermutationMatrix()
+ {}
+
+ /** Constructs an uninitialized permutation matrix of given size.
+ */
+ inline PermutationMatrix(int size) : m_indices(size)
+ {}
+
+ /** Copy constructor. */
+ template<typename OtherDerived>
+ inline PermutationMatrix(const PermutationBase<OtherDerived>& other)
+ : m_indices(other.indices()) {}
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ /** Standard copy constructor. Defined only to prevent a default copy constructor
+ * from hiding the other templated constructor */
+ inline PermutationMatrix(const PermutationMatrix& other) : m_indices(other.indices()) {}
+ #endif
+
+ /** Generic constructor from expression of the indices. The indices
+ * array has the meaning that the permutations sends each integer i to indices[i].
+ *
+ * \warning It is your responsibility to check that the indices array that you passes actually
+ * describes a permutation, i.e., each value between 0 and n-1 occurs exactly once, where n is the
+ * array's size.
+ */
+ template<typename Other>
+ explicit inline PermutationMatrix(const MatrixBase<Other>& a_indices) : m_indices(a_indices)
+ {}
+
+ /** Convert the Transpositions \a tr to a permutation matrix */
+ template<typename Other>
+ explicit PermutationMatrix(const TranspositionsBase<Other>& tr)
+ : m_indices(tr.size())
+ {
+ *this = tr;
+ }
+
+ /** Copies the other permutation into *this */
+ template<typename Other>
+ PermutationMatrix& operator=(const PermutationBase<Other>& other)
+ {
+ m_indices = other.indices();
+ return *this;
+ }
+
+ /** Assignment from the Transpositions \a tr */
+ template<typename Other>
+ PermutationMatrix& operator=(const TranspositionsBase<Other>& tr)
+ {
+ return Base::operator=(tr.derived());
+ }
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ /** This is a special case of the templated operator=. Its purpose is to
+ * prevent a default operator= from hiding the templated operator=.
+ */
+ PermutationMatrix& operator=(const PermutationMatrix& other)
+ {
+ m_indices = other.m_indices;
+ return *this;
+ }
+ #endif
+
+ /** const version of indices(). */
+ const IndicesType& indices() const { return m_indices; }
+ /** \returns a reference to the stored array representing the permutation. */
+ IndicesType& indices() { return m_indices; }
+
+
+ /**** multiplication helpers to hopefully get RVO ****/
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ template<typename Other>
+ PermutationMatrix(const Transpose<PermutationBase<Other> >& other)
+ : m_indices(other.nestedPermutation().size())
+ {
+ for (int i=0; i<m_indices.size();++i) m_indices.coeffRef(other.nestedPermutation().indices().coeff(i)) = i;
+ }
+ template<typename Lhs,typename Rhs>
+ PermutationMatrix(internal::PermPermProduct_t, const Lhs& lhs, const Rhs& rhs)
+ : m_indices(lhs.indices().size())
+ {
+ Base::assignProduct(lhs,rhs);
+ }
+#endif
+
+ protected:
+
+ IndicesType m_indices;
+};
+
+
+namespace internal {
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int _PacketAccess>
+struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,_PacketAccess> >
+ : traits<Matrix<IndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
+{
+ typedef IndexType Index;
+ typedef Map<const Matrix<IndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, _PacketAccess> IndicesType;
+};
+}
+
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int _PacketAccess>
+class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,_PacketAccess>
+ : public PermutationBase<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,_PacketAccess> >
+{
+ typedef PermutationBase<Map> Base;
+ typedef internal::traits<Map> Traits;
+ public:
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ typedef typename Traits::IndicesType IndicesType;
+ typedef typename IndicesType::Scalar Index;
+ #endif
+
+ inline Map(const Index* indicesPtr)
+ : m_indices(indicesPtr)
+ {}
+
+ inline Map(const Index* indicesPtr, Index size)
+ : m_indices(indicesPtr,size)
+ {}
+
+ /** Copies the other permutation into *this */
+ template<typename Other>
+ Map& operator=(const PermutationBase<Other>& other)
+ { return Base::operator=(other.derived()); }
+
+ /** Assignment from the Transpositions \a tr */
+ template<typename Other>
+ Map& operator=(const TranspositionsBase<Other>& tr)
+ { return Base::operator=(tr.derived()); }
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ /** This is a special case of the templated operator=. Its purpose is to
+ * prevent a default operator= from hiding the templated operator=.
+ */
+ Map& operator=(const Map& other)
+ {
+ m_indices = other.m_indices;
+ return *this;
+ }
+ #endif
+
+ /** const version of indices(). */
+ const IndicesType& indices() const { return m_indices; }
+ /** \returns a reference to the stored array representing the permutation. */
+ IndicesType& indices() { return m_indices; }
+
+ protected:
+
+ IndicesType m_indices;
+};
+
+/** \class PermutationWrapper
+ * \ingroup Core_Module
+ *
+ * \brief Class to view a vector of integers as a permutation matrix
+ *
+ * \param _IndicesType the type of the vector of integer (can be any compatible expression)
+ *
+ * This class allows to view any vector expression of integers as a permutation matrix.
+ *
+ * \sa class PermutationBase, class PermutationMatrix
+ */
+
+struct PermutationStorage {};
+
+template<typename _IndicesType> class TranspositionsWrapper;
+namespace internal {
+template<typename _IndicesType>
+struct traits<PermutationWrapper<_IndicesType> >
+{
+ typedef PermutationStorage StorageKind;
+ typedef typename _IndicesType::Scalar Scalar;
+ typedef typename _IndicesType::Scalar Index;
+ typedef _IndicesType IndicesType;
+ enum {
+ RowsAtCompileTime = _IndicesType::SizeAtCompileTime,
+ ColsAtCompileTime = _IndicesType::SizeAtCompileTime,
+ MaxRowsAtCompileTime = IndicesType::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = IndicesType::MaxColsAtCompileTime,
+ Flags = 0,
+ CoeffReadCost = _IndicesType::CoeffReadCost
+ };
+};
+}
+
+template<typename _IndicesType>
+class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesType> >
+{
+ typedef PermutationBase<PermutationWrapper> Base;
+ typedef internal::traits<PermutationWrapper> Traits;
+ public:
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ typedef typename Traits::IndicesType IndicesType;
+ #endif
+
+ inline PermutationWrapper(const IndicesType& a_indices)
+ : m_indices(a_indices)
+ {}
+
+ /** const version of indices(). */
+ const typename internal::remove_all<typename IndicesType::Nested>::type&
+ indices() const { return m_indices; }
+
+ protected:
+
+ typename IndicesType::Nested m_indices;
+};
+
+/** \returns the matrix with the permutation applied to the columns.
+ */
+template<typename Derived, typename PermutationDerived>
+inline const internal::permut_matrix_product_retval<PermutationDerived, Derived, OnTheRight>
+operator*(const MatrixBase<Derived>& matrix,
+ const PermutationBase<PermutationDerived> &permutation)
+{
+ return internal::permut_matrix_product_retval
+ <PermutationDerived, Derived, OnTheRight>
+ (permutation.derived(), matrix.derived());
+}
+
+/** \returns the matrix with the permutation applied to the rows.
+ */
+template<typename Derived, typename PermutationDerived>
+inline const internal::permut_matrix_product_retval
+ <PermutationDerived, Derived, OnTheLeft>
+operator*(const PermutationBase<PermutationDerived> &permutation,
+ const MatrixBase<Derived>& matrix)
+{
+ return internal::permut_matrix_product_retval
+ <PermutationDerived, Derived, OnTheLeft>
+ (permutation.derived(), matrix.derived());
+}
+
+namespace internal {
+
+template<typename PermutationType, typename MatrixType, int Side, bool Transposed>
+struct traits<permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
+{
+ typedef typename MatrixType::PlainObject ReturnType;
+};
+
+template<typename PermutationType, typename MatrixType, int Side, bool Transposed>
+struct permut_matrix_product_retval
+ : public ReturnByValue<permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
+{
+ typedef typename remove_all<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
+ typedef typename MatrixType::Index Index;
+
+ permut_matrix_product_retval(const PermutationType& perm, const MatrixType& matrix)
+ : m_permutation(perm), m_matrix(matrix)
+ {}
+
+ inline Index rows() const { return m_matrix.rows(); }
+ inline Index cols() const { return m_matrix.cols(); }
+
+ template<typename Dest> inline void evalTo(Dest& dst) const
+ {
+ const Index n = Side==OnTheLeft ? rows() : cols();
+ // FIXME we need an is_same for expression that is not sensitive to constness. For instance
+ // is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true.
+ if(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix))
+ {
+ // apply the permutation inplace
+ Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(m_permutation.size());
+ mask.fill(false);
+ Index r = 0;
+ while(r < m_permutation.size())
+ {
+ // search for the next seed
+ while(r<m_permutation.size() && mask[r]) r++;
+ if(r>=m_permutation.size())
+ break;
+ // we got one, let's follow it until we are back to the seed
+ Index k0 = r++;
+ Index kPrev = k0;
+ mask.coeffRef(k0) = true;
+ for(Index k=m_permutation.indices().coeff(k0); k!=k0; k=m_permutation.indices().coeff(k))
+ {
+ Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
+ .swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
+ (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev));
+
+ mask.coeffRef(k) = true;
+ kPrev = k;
+ }
+ }
+ }
+ else
+ {
+ for(int i = 0; i < n; ++i)
+ {
+ Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
+ (dst, ((Side==OnTheLeft) ^ Transposed) ? m_permutation.indices().coeff(i) : i)
+
+ =
+
+ Block<const MatrixTypeNestedCleaned,Side==OnTheLeft ? 1 : MatrixType::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixType::ColsAtCompileTime>
+ (m_matrix, ((Side==OnTheRight) ^ Transposed) ? m_permutation.indices().coeff(i) : i);
+ }
+ }
+ }
+
+ protected:
+ const PermutationType& m_permutation;
+ typename MatrixType::Nested m_matrix;
+};
+
+/* Template partial specialization for transposed/inverse permutations */
+
+template<typename Derived>
+struct traits<Transpose<PermutationBase<Derived> > >
+ : traits<Derived>
+{};
+
+} // end namespace internal
+
+template<typename Derived>
+class Transpose<PermutationBase<Derived> >
+ : public EigenBase<Transpose<PermutationBase<Derived> > >
+{
+ typedef Derived PermutationType;
+ typedef typename PermutationType::IndicesType IndicesType;
+ typedef typename PermutationType::PlainPermutationType PlainPermutationType;
+ public:
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ typedef internal::traits<PermutationType> Traits;
+ typedef typename Derived::DenseMatrixType DenseMatrixType;
+ enum {
+ Flags = Traits::Flags,
+ CoeffReadCost = Traits::CoeffReadCost,
+ RowsAtCompileTime = Traits::RowsAtCompileTime,
+ ColsAtCompileTime = Traits::ColsAtCompileTime,
+ MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = Traits::MaxColsAtCompileTime
+ };
+ typedef typename Traits::Scalar Scalar;
+ #endif
+
+ Transpose(const PermutationType& p) : m_permutation(p) {}
+
+ inline int rows() const { return m_permutation.rows(); }
+ inline int cols() const { return m_permutation.cols(); }
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ template<typename DenseDerived>
+ void evalTo(MatrixBase<DenseDerived>& other) const
+ {
+ other.setZero();
+ for (int i=0; i<rows();++i)
+ other.coeffRef(i, m_permutation.indices().coeff(i)) = typename DenseDerived::Scalar(1);
+ }
+ #endif
+
+ /** \return the equivalent permutation matrix */
+ PlainPermutationType eval() const { return *this; }
+
+ DenseMatrixType toDenseMatrix() const { return *this; }
+
+ /** \returns the matrix with the inverse permutation applied to the columns.
+ */
+ template<typename OtherDerived> friend
+ inline const internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheRight, true>
+ operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trPerm)
+ {
+ return internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheRight, true>(trPerm.m_permutation, matrix.derived());
+ }
+
+ /** \returns the matrix with the inverse permutation applied to the rows.
+ */
+ template<typename OtherDerived>
+ inline const internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheLeft, true>
+ operator*(const MatrixBase<OtherDerived>& matrix) const
+ {
+ return internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheLeft, true>(m_permutation, matrix.derived());
+ }
+
+ const PermutationType& nestedPermutation() const { return m_permutation; }
+
+ protected:
+ const PermutationType& m_permutation;
+};
+
+template<typename Derived>
+const PermutationWrapper<const Derived> MatrixBase<Derived>::asPermutation() const
+{
+ return derived();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_PERMUTATIONMATRIX_H
diff --git a/third_party/eigen3/Eigen/src/Core/PlainObjectBase.h b/third_party/eigen3/Eigen/src/Core/PlainObjectBase.h
new file mode 100644
index 0000000000..50c3656a98
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/PlainObjectBase.h
@@ -0,0 +1,895 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_DENSESTORAGEBASE_H
+#define EIGEN_DENSESTORAGEBASE_H
+
+#if defined(EIGEN_INITIALIZE_MATRICES_BY_ZERO)
+# define EIGEN_INITIALIZE_COEFFS
+# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=Scalar(0);
+#elif defined(EIGEN_INITIALIZE_MATRICES_BY_NAN)
+# define EIGEN_INITIALIZE_COEFFS
+# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=std::numeric_limits<Scalar>::quiet_NaN();
+#else
+# undef EIGEN_INITIALIZE_COEFFS
+# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+#endif
+
+namespace Eigen {
+
+namespace internal {
+
+template<int MaxSizeAtCompileTime> struct check_rows_cols_for_overflow {
+ template<typename Index>
+ EIGEN_DEVICE_FUNC
+ static EIGEN_ALWAYS_INLINE void run(Index, Index)
+ {
+ }
+};
+
+template<> struct check_rows_cols_for_overflow<Dynamic> {
+ template<typename Index>
+ EIGEN_DEVICE_FUNC
+ static EIGEN_ALWAYS_INLINE void run(Index rows, Index cols)
+ {
+ // http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
+ // we assume Index is signed
+ Index max_index = (size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed
+ bool error = (rows == 0 || cols == 0) ? false
+ : (rows > max_index / cols);
+ if (error)
+ throw_std_bad_alloc();
+ }
+};
+
+template <typename Derived,
+ typename OtherDerived = Derived,
+ bool IsVector = bool(Derived::IsVectorAtCompileTime) && bool(OtherDerived::IsVectorAtCompileTime)>
+struct conservative_resize_like_impl;
+
+template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct matrix_swap_impl;
+
+} // end namespace internal
+
+/** \class PlainObjectBase
+ * \brief %Dense storage base class for matrices and arrays.
+ *
+ * This class can be extended with the help of the plugin mechanism described on the page
+ * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_PLAINOBJECTBASE_PLUGIN.
+ *
+ * \sa \ref TopicClassHierarchy
+ */
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+namespace internal {
+
+// this is a warkaround to doxygen not being able to understand the inheritence logic
+// when it is hidden by the dense_xpr_base helper struct.
+template<typename Derived> struct dense_xpr_base_dispatcher_for_doxygen;// : public MatrixBase<Derived> {};
+/** This class is just a workaround for Doxygen and it does not not actually exist. */
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct dense_xpr_base_dispatcher_for_doxygen<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+ : public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > {};
+/** This class is just a workaround for Doxygen and it does not not actually exist. */
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct dense_xpr_base_dispatcher_for_doxygen<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+ : public ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > {};
+
+} // namespace internal
+
+template<typename Derived>
+class PlainObjectBase : public internal::dense_xpr_base_dispatcher_for_doxygen<Derived>
+#else
+template<typename Derived>
+class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
+#endif
+{
+ public:
+ enum { Options = internal::traits<Derived>::Options };
+ typedef typename internal::dense_xpr_base<Derived>::type Base;
+
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ typedef typename internal::traits<Derived>::Index Index;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ typedef Derived DenseType;
+
+ using Base::RowsAtCompileTime;
+ using Base::ColsAtCompileTime;
+ using Base::SizeAtCompileTime;
+ using Base::MaxRowsAtCompileTime;
+ using Base::MaxColsAtCompileTime;
+ using Base::MaxSizeAtCompileTime;
+ using Base::IsVectorAtCompileTime;
+ using Base::Flags;
+
+ template<typename PlainObjectType, int MapOptions, typename StrideType> friend class Eigen::Map;
+ friend class Eigen::Map<Derived, Unaligned>;
+ typedef Eigen::Map<Derived, Unaligned> MapType;
+ friend class Eigen::Map<const Derived, Unaligned>;
+ typedef const Eigen::Map<const Derived, Unaligned> ConstMapType;
+ friend class Eigen::Map<Derived, Aligned>;
+ typedef Eigen::Map<Derived, Aligned> AlignedMapType;
+ friend class Eigen::Map<const Derived, Aligned>;
+ typedef const Eigen::Map<const Derived, Aligned> ConstAlignedMapType;
+ template<typename StrideType> struct StridedMapType { typedef Eigen::Map<Derived, Unaligned, StrideType> type; };
+ template<typename StrideType> struct StridedConstMapType { typedef Eigen::Map<const Derived, Unaligned, StrideType> type; };
+ template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, Aligned, StrideType> type; };
+ template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, Aligned, StrideType> type; };
+
+ protected:
+ DenseStorage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage;
+
+ public:
+ enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits<Derived>::Flags & AlignedBit) != 0 };
+ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
+
+ EIGEN_DEVICE_FUNC
+ Base& base() { return *static_cast<Base*>(this); }
+ EIGEN_DEVICE_FUNC
+ const Base& base() const { return *static_cast<const Base*>(this); }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index rows() const { return m_storage.rows(); }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index cols() const { return m_storage.cols(); }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const
+ {
+ if(Flags & RowMajorBit)
+ return m_storage.data()[colId + rowId * m_storage.cols()];
+ else // column-major
+ return m_storage.data()[rowId + colId * m_storage.rows()];
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const
+ {
+ return m_storage.data()[index];
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId)
+ {
+ if(Flags & RowMajorBit)
+ return m_storage.data()[colId + rowId * m_storage.cols()];
+ else // column-major
+ return m_storage.data()[rowId + colId * m_storage.rows()];
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
+ {
+ return m_storage.data()[index];
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const
+ {
+ if(Flags & RowMajorBit)
+ return m_storage.data()[colId + rowId * m_storage.cols()];
+ else // column-major
+ return m_storage.data()[rowId + colId * m_storage.rows()];
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const
+ {
+ return m_storage.data()[index];
+ }
+
+ /** \internal */
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const
+ {
+ return internal::ploadt<PacketScalar, LoadMode>
+ (m_storage.data() + (Flags & RowMajorBit
+ ? colId + rowId * m_storage.cols()
+ : rowId + colId * m_storage.rows()));
+ }
+
+ /** \internal */
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
+ {
+ return internal::ploadt<PacketScalar, LoadMode>(m_storage.data() + index);
+ }
+
+ /** \internal */
+ template<int StoreMode>
+ EIGEN_STRONG_INLINE void writePacket(Index rowId, Index colId, const PacketScalar& val)
+ {
+ internal::pstoret<Scalar, PacketScalar, StoreMode>
+ (m_storage.data() + (Flags & RowMajorBit
+ ? colId + rowId * m_storage.cols()
+ : rowId + colId * m_storage.rows()), val);
+ }
+
+ /** \internal */
+ template<int StoreMode>
+ EIGEN_STRONG_INLINE void writePacket(Index index, const PacketScalar& val)
+ {
+ internal::pstoret<Scalar, PacketScalar, StoreMode>(m_storage.data() + index, val);
+ }
+
+ /** \returns a const pointer to the data array of this matrix */
+ EIGEN_STRONG_INLINE const Scalar *data() const
+ { return m_storage.data(); }
+
+ /** \returns a pointer to the data array of this matrix */
+ EIGEN_STRONG_INLINE Scalar *data()
+ { return m_storage.data(); }
+
+ /** Resizes \c *this to a \a rows x \a cols matrix.
+ *
+ * This method is intended for dynamic-size matrices, although it is legal to call it on any
+ * matrix as long as fixed dimensions are left unchanged. If you only want to change the number
+ * of rows and/or of columns, you can use resize(NoChange_t, Index), resize(Index, NoChange_t).
+ *
+ * If the current number of coefficients of \c *this exactly matches the
+ * product \a rows * \a cols, then no memory allocation is performed and
+ * the current values are left unchanged. In all other cases, including
+ * shrinking, the data is reallocated and all previous values are lost.
+ *
+ * Example: \include Matrix_resize_int_int.cpp
+ * Output: \verbinclude Matrix_resize_int_int.out
+ *
+ * \sa resize(Index) for vectors, resize(NoChange_t, Index), resize(Index, NoChange_t)
+ */
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void resize(Index nbRows, Index nbCols)
+ {
+ eigen_assert( EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,nbRows==RowsAtCompileTime)
+ && EIGEN_IMPLIES(ColsAtCompileTime!=Dynamic,nbCols==ColsAtCompileTime)
+ && EIGEN_IMPLIES(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic,nbRows<=MaxRowsAtCompileTime)
+ && EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,nbCols<=MaxColsAtCompileTime)
+ && nbRows>=0 && nbCols>=0 && "Invalid sizes when resizing a matrix or array.");
+ internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(nbRows, nbCols);
+ #ifdef EIGEN_INITIALIZE_COEFFS
+ Index size = nbRows*nbCols;
+ bool size_changed = size != this->size();
+ m_storage.resize(size, nbRows, nbCols);
+ if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+ #else
+ m_storage.resize(nbRows*nbCols, nbRows, nbCols);
+ #endif
+ }
+
+ /** Resizes \c *this to a vector of length \a size
+ *
+ * \only_for_vectors. This method does not work for
+ * partially dynamic matrices when the static dimension is anything other
+ * than 1. For example it will not work with Matrix<double, 2, Dynamic>.
+ *
+ * Example: \include Matrix_resize_int.cpp
+ * Output: \verbinclude Matrix_resize_int.out
+ *
+ * \sa resize(Index,Index), resize(NoChange_t, Index), resize(Index, NoChange_t)
+ */
+ EIGEN_DEVICE_FUNC
+ inline void resize(Index size)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase)
+ eigen_assert(((SizeAtCompileTime == Dynamic && (MaxSizeAtCompileTime==Dynamic || size<=MaxSizeAtCompileTime)) || SizeAtCompileTime == size) && size>=0);
+ #ifdef EIGEN_INITIALIZE_COEFFS
+ bool size_changed = size != this->size();
+ #endif
+ if(RowsAtCompileTime == 1)
+ m_storage.resize(size, 1, size);
+ else
+ m_storage.resize(size, size, 1);
+ #ifdef EIGEN_INITIALIZE_COEFFS
+ if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+ #endif
+ }
+
+ /** Resizes the matrix, changing only the number of columns. For the parameter of type NoChange_t, just pass the special value \c NoChange
+ * as in the example below.
+ *
+ * Example: \include Matrix_resize_NoChange_int.cpp
+ * Output: \verbinclude Matrix_resize_NoChange_int.out
+ *
+ * \sa resize(Index,Index)
+ */
+ EIGEN_DEVICE_FUNC
+ inline void resize(NoChange_t, Index nbCols)
+ {
+ resize(rows(), nbCols);
+ }
+
+ /** Resizes the matrix, changing only the number of rows. For the parameter of type NoChange_t, just pass the special value \c NoChange
+ * as in the example below.
+ *
+ * Example: \include Matrix_resize_int_NoChange.cpp
+ * Output: \verbinclude Matrix_resize_int_NoChange.out
+ *
+ * \sa resize(Index,Index)
+ */
+ EIGEN_DEVICE_FUNC
+ inline void resize(Index nbRows, NoChange_t)
+ {
+ resize(nbRows, cols());
+ }
+
+ /** Resizes \c *this to have the same dimensions as \a other.
+ * Takes care of doing all the checking that's needed.
+ *
+ * Note that copying a row-vector into a vector (and conversely) is allowed.
+ * The resizing, if any, is then done in the appropriate way so that row-vectors
+ * remain row-vectors and vectors remain vectors.
+ */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other)
+ {
+ const OtherDerived& other = _other.derived();
+ internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(other.rows(), other.cols());
+ const Index othersize = other.rows()*other.cols();
+ if(RowsAtCompileTime == 1)
+ {
+ eigen_assert(other.rows() == 1 || other.cols() == 1);
+ resize(1, othersize);
+ }
+ else if(ColsAtCompileTime == 1)
+ {
+ eigen_assert(other.rows() == 1 || other.cols() == 1);
+ resize(othersize, 1);
+ }
+ else resize(other.rows(), other.cols());
+ }
+
+ /** Resizes the matrix to \a rows x \a cols while leaving old values untouched.
+ *
+ * The method is intended for matrices of dynamic size. If you only want to change the number
+ * of rows and/or of columns, you can use conservativeResize(NoChange_t, Index) or
+ * conservativeResize(Index, NoChange_t).
+ *
+ * Matrices are resized relative to the top-left element. In case values need to be
+ * appended to the matrix they will be uninitialized.
+ */
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void conservativeResize(Index nbRows, Index nbCols)
+ {
+ internal::conservative_resize_like_impl<Derived>::run(*this, nbRows, nbCols);
+ }
+
+ /** Resizes the matrix to \a rows x \a cols while leaving old values untouched.
+ *
+ * As opposed to conservativeResize(Index rows, Index cols), this version leaves
+ * the number of columns unchanged.
+ *
+ * In case the matrix is growing, new rows will be uninitialized.
+ */
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void conservativeResize(Index nbRows, NoChange_t)
+ {
+ // Note: see the comment in conservativeResize(Index,Index)
+ conservativeResize(nbRows, cols());
+ }
+
+ /** Resizes the matrix to \a rows x \a cols while leaving old values untouched.
+ *
+ * As opposed to conservativeResize(Index rows, Index cols), this version leaves
+ * the number of rows unchanged.
+ *
+ * In case the matrix is growing, new columns will be uninitialized.
+ */
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index nbCols)
+ {
+ // Note: see the comment in conservativeResize(Index,Index)
+ conservativeResize(rows(), nbCols);
+ }
+
+ /** Resizes the vector to \a size while retaining old values.
+ *
+ * \only_for_vectors. This method does not work for
+ * partially dynamic matrices when the static dimension is anything other
+ * than 1. For example it will not work with Matrix<double, 2, Dynamic>.
+ *
+ * When values are appended, they will be uninitialized.
+ */
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void conservativeResize(Index size)
+ {
+ internal::conservative_resize_like_impl<Derived>::run(*this, size);
+ }
+
+ /** Resizes the matrix to \a rows x \a cols of \c other, while leaving old values untouched.
+ *
+ * The method is intended for matrices of dynamic size. If you only want to change the number
+ * of rows and/or of columns, you can use conservativeResize(NoChange_t, Index) or
+ * conservativeResize(Index, NoChange_t).
+ *
+ * Matrices are resized relative to the top-left element. In case values need to be
+ * appended to the matrix they will copied from \c other.
+ */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void conservativeResizeLike(const DenseBase<OtherDerived>& other)
+ {
+ internal::conservative_resize_like_impl<Derived,OtherDerived>::run(*this, other);
+ }
+
+ /** This is a special case of the templated operator=. Its purpose is to
+ * prevent a default operator= from hiding the templated operator=.
+ */
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Derived& operator=(const PlainObjectBase& other)
+ {
+ return _set(other);
+ }
+
+ /** \sa MatrixBase::lazyAssign() */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Derived& lazyAssign(const DenseBase<OtherDerived>& other)
+ {
+ _resize_to_match(other);
+ return Base::lazyAssign(other.derived());
+ }
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Derived& operator=(const ReturnByValue<OtherDerived>& func)
+ {
+ resize(func.rows(), func.cols());
+ return Base::operator=(func);
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE PlainObjectBase() : m_storage()
+ {
+// _check_template_params();
+// EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+ }
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ // FIXME is it still needed ?
+ /** \internal */
+ EIGEN_DEVICE_FUNC
+ PlainObjectBase(internal::constructor_without_unaligned_array_assert)
+ : m_storage(internal::constructor_without_unaligned_array_assert())
+ {
+// _check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+ }
+#endif
+
+#ifdef EIGEN_HAVE_RVALUE_REFERENCES
+ EIGEN_DEVICE_FUNC
+ PlainObjectBase(PlainObjectBase&& other)
+ : m_storage( std::move(other.m_storage) )
+ {
+ }
+
+ EIGEN_DEVICE_FUNC
+ PlainObjectBase& operator=(PlainObjectBase&& other)
+ {
+ using std::swap;
+ swap(m_storage, other.m_storage);
+ return *this;
+ }
+#endif
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE PlainObjectBase(Index a_size, Index nbRows, Index nbCols)
+ : m_storage(a_size, nbRows, nbCols)
+ {
+// _check_template_params();
+// EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+ }
+
+ /** \copydoc MatrixBase::operator=(const EigenBase<OtherDerived>&)
+ */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Derived& operator=(const EigenBase<OtherDerived> &other)
+ {
+ _resize_to_match(other);
+ Base::operator=(other.derived());
+ return this->derived();
+ }
+
+ /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived> &other)
+ : m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
+ {
+ _check_template_params();
+ internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(other.derived().rows(), other.derived().cols());
+ Base::operator=(other.derived());
+ }
+
+ /** \name Map
+ * These are convenience functions returning Map objects. The Map() static functions return unaligned Map objects,
+ * while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned
+ * \a data pointers.
+ *
+ * \see class Map
+ */
+ //@{
+ static inline ConstMapType Map(const Scalar* data)
+ { return ConstMapType(data); }
+ static inline MapType Map(Scalar* data)
+ { return MapType(data); }
+ static inline ConstMapType Map(const Scalar* data, Index size)
+ { return ConstMapType(data, size); }
+ static inline MapType Map(Scalar* data, Index size)
+ { return MapType(data, size); }
+ static inline ConstMapType Map(const Scalar* data, Index rows, Index cols)
+ { return ConstMapType(data, rows, cols); }
+ static inline MapType Map(Scalar* data, Index rows, Index cols)
+ { return MapType(data, rows, cols); }
+
+ static inline ConstAlignedMapType MapAligned(const Scalar* data)
+ { return ConstAlignedMapType(data); }
+ static inline AlignedMapType MapAligned(Scalar* data)
+ { return AlignedMapType(data); }
+ static inline ConstAlignedMapType MapAligned(const Scalar* data, Index size)
+ { return ConstAlignedMapType(data, size); }
+ static inline AlignedMapType MapAligned(Scalar* data, Index size)
+ { return AlignedMapType(data, size); }
+ static inline ConstAlignedMapType MapAligned(const Scalar* data, Index rows, Index cols)
+ { return ConstAlignedMapType(data, rows, cols); }
+ static inline AlignedMapType MapAligned(Scalar* data, Index rows, Index cols)
+ { return AlignedMapType(data, rows, cols); }
+
+ template<int Outer, int Inner>
+ static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, const Stride<Outer, Inner>& stride)
+ { return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, const Stride<Outer, Inner>& stride)
+ { return typename StridedMapType<Stride<Outer, Inner> >::type(data, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
+ { return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, size, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
+ { return typename StridedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
+ { return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
+ { return typename StridedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
+
+ template<int Outer, int Inner>
+ static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, const Stride<Outer, Inner>& stride)
+ { return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, const Stride<Outer, Inner>& stride)
+ { return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
+ { return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
+ { return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
+ { return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
+ { return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
+ //@}
+
+ using Base::setConstant;
+ EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& value);
+ EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& value);
+
+ using Base::setZero;
+ EIGEN_DEVICE_FUNC Derived& setZero(Index size);
+ EIGEN_DEVICE_FUNC Derived& setZero(Index rows, Index cols);
+
+ using Base::setOnes;
+ EIGEN_DEVICE_FUNC Derived& setOnes(Index size);
+ EIGEN_DEVICE_FUNC Derived& setOnes(Index rows, Index cols);
+
+ using Base::setRandom;
+ Derived& setRandom(Index size);
+ Derived& setRandom(Index rows, Index cols);
+
+ #ifdef EIGEN_PLAINOBJECTBASE_PLUGIN
+ #include EIGEN_PLAINOBJECTBASE_PLUGIN
+ #endif
+
+ protected:
+ /** \internal Resizes *this in preparation for assigning \a other to it.
+ * Takes care of doing all the checking that's needed.
+ *
+ * Note that copying a row-vector into a vector (and conversely) is allowed.
+ * The resizing, if any, is then done in the appropriate way so that row-vectors
+ * remain row-vectors and vectors remain vectors.
+ */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _resize_to_match(const EigenBase<OtherDerived>& other)
+ {
+ #ifdef EIGEN_NO_AUTOMATIC_RESIZING
+ eigen_assert((this->size()==0 || (IsVectorAtCompileTime ? (this->size() == other.size())
+ : (rows() == other.rows() && cols() == other.cols())))
+ && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
+ EIGEN_ONLY_USED_FOR_DEBUG(other);
+ #else
+ resizeLike(other);
+ #endif
+ }
+
+ /**
+ * \brief Copies the value of the expression \a other into \c *this with automatic resizing.
+ *
+ * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized),
+ * it will be initialized.
+ *
+ * Note that copying a row-vector into a vector (and conversely) is allowed.
+ * The resizing, if any, is then done in the appropriate way so that row-vectors
+ * remain row-vectors and vectors remain vectors.
+ *
+ * \sa operator=(const MatrixBase<OtherDerived>&), _set_noalias()
+ *
+ * \internal
+ */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Derived& _set(const DenseBase<OtherDerived>& other)
+ {
+ _set_selector(other.derived(), typename internal::conditional<static_cast<bool>(int(OtherDerived::Flags) & EvalBeforeAssigningBit), internal::true_type, internal::false_type>::type());
+ return this->derived();
+ }
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::true_type&) { _set_noalias(other.eval()); }
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::false_type&) { _set_noalias(other); }
+
+ /** \internal Like _set() but additionally makes the assumption that no aliasing effect can happen (which
+ * is the case when creating a new matrix) so one can enforce lazy evaluation.
+ *
+ * \sa operator=(const MatrixBase<OtherDerived>&), _set()
+ */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Derived& _set_noalias(const DenseBase<OtherDerived>& other)
+ {
+ // I don't think we need this resize call since the lazyAssign will anyways resize
+ // and lazyAssign will be called by the assign selector.
+ //_resize_to_match(other);
+ // the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because
+ // it wouldn't allow to copy a row-vector into a column-vector.
+ return internal::assign_selector<Derived,OtherDerived,false>::run(this->derived(), other.derived());
+ }
+
+ template<typename T0, typename T1>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init2(Index nbRows, Index nbCols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
+ {
+ EIGEN_STATIC_ASSERT(bool(NumTraits<T0>::IsInteger) &&
+ bool(NumTraits<T1>::IsInteger),
+ FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
+ resize(nbRows,nbCols);
+ }
+ template<typename T0, typename T1>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init2(const Scalar& val0, const Scalar& val1, typename internal::enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
+ m_storage.data()[0] = val0;
+ m_storage.data()[1] = val1;
+ }
+
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if<Base::SizeAtCompileTime!=1,T>::type* = 0)
+ {
+ EIGEN_STATIC_ASSERT(bool(NumTraits<T>::IsInteger),
+ FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
+ resize(size);
+ }
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if<Base::SizeAtCompileTime==1,T>::type* = 0)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
+ m_storage.data()[0] = val0;
+ }
+
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const Scalar* data){
+ this->_set_noalias(ConstMapType(data));
+ }
+
+ template<typename T, typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const DenseBase<OtherDerived>& other){
+ this->_set_noalias(other);
+ }
+
+ template<typename T, typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const EigenBase<OtherDerived>& other){
+ this->derived() = other;
+ }
+
+ template<typename T, typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const ReturnByValue<OtherDerived>& other)
+ {
+ resize(other.rows(), other.cols());
+ other.evalTo(this->derived());
+ }
+
+ template<typename T, typename OtherDerived, int ColsAtCompileTime>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const RotationBase<OtherDerived,ColsAtCompileTime>& r)
+ {
+ this->derived() = r;
+ }
+
+ template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
+ friend struct internal::matrix_swap_impl;
+
+ /** \internal generic implementation of swap for dense storage since for dynamic-sized matrices of same type it is enough to swap the
+ * data pointers.
+ */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ void _swap(DenseBase<OtherDerived> const & other)
+ {
+ enum { SwapPointers = internal::is_same<Derived, OtherDerived>::value && Base::SizeAtCompileTime==Dynamic };
+ internal::matrix_swap_impl<Derived, OtherDerived, bool(SwapPointers)>::run(this->derived(), other.const_cast_derived());
+ }
+
+ public:
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void _check_template_params()
+ {
+ EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (Options&RowMajor)==RowMajor)
+ && EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, (Options&RowMajor)==0)
+ && ((RowsAtCompileTime == Dynamic) || (RowsAtCompileTime >= 0))
+ && ((ColsAtCompileTime == Dynamic) || (ColsAtCompileTime >= 0))
+ && ((MaxRowsAtCompileTime == Dynamic) || (MaxRowsAtCompileTime >= 0))
+ && ((MaxColsAtCompileTime == Dynamic) || (MaxColsAtCompileTime >= 0))
+ && (MaxRowsAtCompileTime == RowsAtCompileTime || RowsAtCompileTime==Dynamic)
+ && (MaxColsAtCompileTime == ColsAtCompileTime || ColsAtCompileTime==Dynamic)
+ && (Options & (DontAlign|RowMajor)) == Options),
+ INVALID_MATRIX_TEMPLATE_PARAMETERS)
+ }
+#endif
+
+private:
+ enum { ThisConstantIsPrivateInPlainObjectBase };
+};
+
+namespace internal {
+
+template <typename Derived, typename OtherDerived, bool IsVector>
+struct conservative_resize_like_impl
+{
+ typedef typename Derived::Index Index;
+ static void run(DenseBase<Derived>& _this, Index rows, Index cols)
+ {
+ if (_this.rows() == rows && _this.cols() == cols) return;
+ EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived)
+
+ if ( ( Derived::IsRowMajor && _this.cols() == cols) || // row-major and we change only the number of rows
+ (!Derived::IsRowMajor && _this.rows() == rows) ) // column-major and we change only the number of columns
+ {
+ internal::check_rows_cols_for_overflow<Derived::MaxSizeAtCompileTime>::run(rows, cols);
+ _this.derived().m_storage.conservativeResize(rows*cols,rows,cols);
+ }
+ else
+ {
+ // The storage order does not allow us to use reallocation.
+ typename Derived::PlainObject tmp(rows,cols);
+ const Index common_rows = (std::min)(rows, _this.rows());
+ const Index common_cols = (std::min)(cols, _this.cols());
+ tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
+ _this.derived().swap(tmp);
+ }
+ }
+
+ static void run(DenseBase<Derived>& _this, const DenseBase<OtherDerived>& other)
+ {
+ if (_this.rows() == other.rows() && _this.cols() == other.cols()) return;
+
+ // Note: Here is space for improvement. Basically, for conservativeResize(Index,Index),
+ // neither RowsAtCompileTime or ColsAtCompileTime must be Dynamic. If only one of the
+ // dimensions is dynamic, one could use either conservativeResize(Index rows, NoChange_t) or
+ // conservativeResize(NoChange_t, Index cols). For these methods new static asserts like
+ // EIGEN_STATIC_ASSERT_DYNAMIC_ROWS and EIGEN_STATIC_ASSERT_DYNAMIC_COLS would be good.
+ EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived)
+ EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(OtherDerived)
+
+ if ( ( Derived::IsRowMajor && _this.cols() == other.cols()) || // row-major and we change only the number of rows
+ (!Derived::IsRowMajor && _this.rows() == other.rows()) ) // column-major and we change only the number of columns
+ {
+ const Index new_rows = other.rows() - _this.rows();
+ const Index new_cols = other.cols() - _this.cols();
+ _this.derived().m_storage.conservativeResize(other.size(),other.rows(),other.cols());
+ if (new_rows>0)
+ _this.bottomRightCorner(new_rows, other.cols()) = other.bottomRows(new_rows);
+ else if (new_cols>0)
+ _this.bottomRightCorner(other.rows(), new_cols) = other.rightCols(new_cols);
+ }
+ else
+ {
+ // The storage order does not allow us to use reallocation.
+ typename Derived::PlainObject tmp(other);
+ const Index common_rows = (std::min)(tmp.rows(), _this.rows());
+ const Index common_cols = (std::min)(tmp.cols(), _this.cols());
+ tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
+ _this.derived().swap(tmp);
+ }
+ }
+};
+
+// Here, the specialization for vectors inherits from the general matrix case
+// to allow calling .conservativeResize(rows,cols) on vectors.
+template <typename Derived, typename OtherDerived>
+struct conservative_resize_like_impl<Derived,OtherDerived,true>
+ : conservative_resize_like_impl<Derived,OtherDerived,false>
+{
+ using conservative_resize_like_impl<Derived,OtherDerived,false>::run;
+
+ typedef typename Derived::Index Index;
+ static void run(DenseBase<Derived>& _this, Index size)
+ {
+ const Index new_rows = Derived::RowsAtCompileTime==1 ? 1 : size;
+ const Index new_cols = Derived::RowsAtCompileTime==1 ? size : 1;
+ _this.derived().m_storage.conservativeResize(size,new_rows,new_cols);
+ }
+
+ static void run(DenseBase<Derived>& _this, const DenseBase<OtherDerived>& other)
+ {
+ if (_this.rows() == other.rows() && _this.cols() == other.cols()) return;
+
+ const Index num_new_elements = other.size() - _this.size();
+
+ const Index new_rows = Derived::RowsAtCompileTime==1 ? 1 : other.rows();
+ const Index new_cols = Derived::RowsAtCompileTime==1 ? other.cols() : 1;
+ _this.derived().m_storage.conservativeResize(other.size(),new_rows,new_cols);
+
+ if (num_new_elements > 0)
+ _this.tail(num_new_elements) = other.tail(num_new_elements);
+ }
+};
+
+template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
+struct matrix_swap_impl
+{
+ EIGEN_DEVICE_FUNC
+ static inline void run(MatrixTypeA& a, MatrixTypeB& b)
+ {
+ a.base().swap(b);
+ }
+};
+
+template<typename MatrixTypeA, typename MatrixTypeB>
+struct matrix_swap_impl<MatrixTypeA, MatrixTypeB, true>
+{
+ EIGEN_DEVICE_FUNC
+ static inline void run(MatrixTypeA& a, MatrixTypeB& b)
+ {
+ static_cast<typename MatrixTypeA::Base&>(a).m_storage.swap(static_cast<typename MatrixTypeB::Base&>(b).m_storage);
+ }
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_DENSESTORAGEBASE_H
diff --git a/third_party/eigen3/Eigen/src/Core/Product.h b/third_party/eigen3/Eigen/src/Core/Product.h
new file mode 100644
index 0000000000..5d3789be74
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/Product.h
@@ -0,0 +1,107 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PRODUCT_H
+#define EIGEN_PRODUCT_H
+
+namespace Eigen {
+
+template<typename Lhs, typename Rhs> class Product;
+template<typename Lhs, typename Rhs, typename StorageKind> class ProductImpl;
+
+/** \class Product
+ * \ingroup Core_Module
+ *
+ * \brief Expression of the product of two arbitrary matrices or vectors
+ *
+ * \param Lhs the type of the left-hand side expression
+ * \param Rhs the type of the right-hand side expression
+ *
+ * This class represents an expression of the product of two arbitrary matrices.
+ *
+ */
+
+// Use ProductReturnType to get correct traits, in particular vectorization flags
+namespace internal {
+template<typename Lhs, typename Rhs>
+struct traits<Product<Lhs, Rhs> >
+ : traits<typename ProductReturnType<Lhs, Rhs>::Type>
+{
+ // We want A+B*C to be of type Product<Matrix, Sum> and not Product<Matrix, Matrix>
+ // TODO: This flag should eventually go in a separate evaluator traits class
+ enum {
+ Flags = traits<typename ProductReturnType<Lhs, Rhs>::Type>::Flags & ~(EvalBeforeNestingBit | DirectAccessBit)
+ };
+};
+} // end namespace internal
+
+
+template<typename Lhs, typename Rhs>
+class Product : public ProductImpl<Lhs,Rhs,typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
+ typename internal::traits<Rhs>::StorageKind>::ret>
+{
+ public:
+
+ typedef typename ProductImpl<
+ Lhs, Rhs,
+ typename internal::promote_storage_type<typename Lhs::StorageKind,
+ typename Rhs::StorageKind>::ret>::Base Base;
+ EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
+
+ typedef typename Lhs::Nested LhsNested;
+ typedef typename Rhs::Nested RhsNested;
+ typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
+ typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
+
+ Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)
+ {
+ eigen_assert(lhs.cols() == rhs.rows()
+ && "invalid matrix product"
+ && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
+ }
+
+ inline Index rows() const { return m_lhs.rows(); }
+ inline Index cols() const { return m_rhs.cols(); }
+
+ const LhsNestedCleaned& lhs() const { return m_lhs; }
+ const RhsNestedCleaned& rhs() const { return m_rhs; }
+
+ protected:
+
+ LhsNested m_lhs;
+ RhsNested m_rhs;
+};
+
+template<typename Lhs, typename Rhs>
+class ProductImpl<Lhs,Rhs,Dense> : public internal::dense_xpr_base<Product<Lhs,Rhs> >::type
+{
+ typedef Product<Lhs, Rhs> Derived;
+ public:
+
+ typedef typename internal::dense_xpr_base<Product<Lhs, Rhs> >::type Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
+};
+
+/***************************************************************************
+* Implementation of matrix base methods
+***************************************************************************/
+
+
+/** \internal used to test the evaluator only
+ */
+template<typename Lhs,typename Rhs>
+const Product<Lhs,Rhs>
+prod(const Lhs& lhs, const Rhs& rhs)
+{
+ return Product<Lhs,Rhs>(lhs,rhs);
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_PRODUCT_H
diff --git a/third_party/eigen3/Eigen/src/Core/ProductBase.h b/third_party/eigen3/Eigen/src/Core/ProductBase.h
new file mode 100644
index 0000000000..b6152cb8ca
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/ProductBase.h
@@ -0,0 +1,280 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PRODUCTBASE_H
+#define EIGEN_PRODUCTBASE_H
+
+namespace Eigen {
+
+/** \class ProductBase
+ * \ingroup Core_Module
+ *
+ */
+
+namespace internal {
+template<typename Derived, typename _Lhs, typename _Rhs>
+struct traits<ProductBase<Derived,_Lhs,_Rhs> >
+{
+ typedef MatrixXpr XprKind;
+ typedef typename remove_all<_Lhs>::type Lhs;
+ typedef typename remove_all<_Rhs>::type Rhs;
+ typedef typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar;
+ typedef typename promote_storage_type<typename traits<Lhs>::StorageKind,
+ typename traits<Rhs>::StorageKind>::ret StorageKind;
+ typedef typename promote_index_type<typename traits<Lhs>::Index,
+ typename traits<Rhs>::Index>::type Index;
+ enum {
+ RowsAtCompileTime = traits<Lhs>::RowsAtCompileTime,
+ ColsAtCompileTime = traits<Rhs>::ColsAtCompileTime,
+ MaxRowsAtCompileTime = traits<Lhs>::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = traits<Rhs>::MaxColsAtCompileTime,
+ Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0)
+ | EvalBeforeNestingBit | EvalBeforeAssigningBit | NestByRefBit,
+ // Note that EvalBeforeNestingBit and NestByRefBit
+ // are not used in practice because nested is overloaded for products
+ CoeffReadCost = 0 // FIXME why is it needed ?
+ };
+};
+}
+
+#define EIGEN_PRODUCT_PUBLIC_INTERFACE(Derived) \
+ typedef ProductBase<Derived, Lhs, Rhs > Base; \
+ EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \
+ typedef typename Base::LhsNested LhsNested; \
+ typedef typename Base::_LhsNested _LhsNested; \
+ typedef typename Base::LhsBlasTraits LhsBlasTraits; \
+ typedef typename Base::ActualLhsType ActualLhsType; \
+ typedef typename Base::_ActualLhsType _ActualLhsType; \
+ typedef typename Base::RhsNested RhsNested; \
+ typedef typename Base::_RhsNested _RhsNested; \
+ typedef typename Base::RhsBlasTraits RhsBlasTraits; \
+ typedef typename Base::ActualRhsType ActualRhsType; \
+ typedef typename Base::_ActualRhsType _ActualRhsType; \
+ using Base::m_lhs; \
+ using Base::m_rhs;
+
+template<typename Derived, typename Lhs, typename Rhs>
+class ProductBase : public MatrixBase<Derived>
+{
+ public:
+ typedef MatrixBase<Derived> Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(ProductBase)
+
+ typedef typename Lhs::Nested LhsNested;
+ typedef typename internal::remove_all<LhsNested>::type _LhsNested;
+ typedef internal::blas_traits<_LhsNested> LhsBlasTraits;
+ typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+ typedef typename internal::remove_all<ActualLhsType>::type _ActualLhsType;
+ typedef typename internal::traits<Lhs>::Scalar LhsScalar;
+
+ typedef typename Rhs::Nested RhsNested;
+ typedef typename internal::remove_all<RhsNested>::type _RhsNested;
+ typedef internal::blas_traits<_RhsNested> RhsBlasTraits;
+ typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+ typedef typename internal::remove_all<ActualRhsType>::type _ActualRhsType;
+ typedef typename internal::traits<Rhs>::Scalar RhsScalar;
+
+ // Diagonal of a product: no need to evaluate the arguments because they are going to be evaluated only once
+ typedef CoeffBasedProduct<LhsNested, RhsNested, 0> FullyLazyCoeffBaseProductType;
+
+ public:
+
+ typedef typename Base::PlainObject PlainObject;
+
+ ProductBase(const Lhs& a_lhs, const Rhs& a_rhs)
+ : m_lhs(a_lhs), m_rhs(a_rhs)
+ {
+ eigen_assert(a_lhs.cols() == a_rhs.rows()
+ && "invalid matrix product"
+ && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
+ }
+
+ inline Index rows() const { return m_lhs.rows(); }
+ inline Index cols() const { return m_rhs.cols(); }
+
+ template<typename Dest>
+ inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst,Scalar(1)); }
+
+ template<typename Dest>
+ inline void addTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(1)); }
+
+ template<typename Dest>
+ inline void subTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(-1)); }
+
+ template<typename Dest>
+ inline void scaleAndAddTo(Dest& dst, const Scalar& alpha) const { derived().scaleAndAddTo(dst,alpha); }
+
+ const _LhsNested& lhs() const { return m_lhs; }
+ const _RhsNested& rhs() const { return m_rhs; }
+
+ // Implicit conversion to the nested type (trigger the evaluation of the product)
+ operator const PlainObject& () const
+ {
+ m_result.resize(m_lhs.rows(), m_rhs.cols());
+ derived().evalTo(m_result);
+ return m_result;
+ }
+
+ const Diagonal<const FullyLazyCoeffBaseProductType,0> diagonal() const
+ { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); }
+
+ template<int Index>
+ const Diagonal<const FullyLazyCoeffBaseProductType,Index> diagonal() const
+ { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); }
+
+ const Diagonal<const FullyLazyCoeffBaseProductType, DynamicIndex> diagonal(Index index) const {
+ return Diagonal<const FullyLazyCoeffBaseProductType, DynamicIndex>(
+ FullyLazyCoeffBaseProductType(m_lhs, m_rhs), index);
+ }
+
+ // restrict coeff accessors to 1x1 expressions. No need to care about mutators here since this isnt a Lvalue expression
+ typename Base::CoeffReturnType coeff(Index row, Index col) const
+ {
+#ifdef EIGEN2_SUPPORT
+ return lhs().row(row).cwiseProduct(rhs().col(col).transpose()).sum();
+#else
+ EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
+ eigen_assert(this->rows() == 1 && this->cols() == 1);
+ Matrix<Scalar,1,1> result = *this;
+ return result.coeff(row,col);
+#endif
+ }
+
+ typename Base::CoeffReturnType coeff(Index i) const
+ {
+ EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
+ eigen_assert(this->rows() == 1 && this->cols() == 1);
+ Matrix<Scalar,1,1> result = *this;
+ return result.coeff(i);
+ }
+
+ const Scalar& coeffRef(Index row, Index col) const
+ {
+ EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
+ eigen_assert(this->rows() == 1 && this->cols() == 1);
+ return derived().coeffRef(row,col);
+ }
+
+ const Scalar& coeffRef(Index i) const
+ {
+ EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
+ eigen_assert(this->rows() == 1 && this->cols() == 1);
+ return derived().coeffRef(i);
+ }
+
+ protected:
+
+ LhsNested m_lhs;
+ RhsNested m_rhs;
+
+ mutable PlainObject m_result;
+};
+
+// here we need to overload the nested rule for products
+// such that the nested type is a const reference to a plain matrix
+namespace internal {
+template<typename Lhs, typename Rhs, int Mode, int N, typename PlainObject>
+struct nested<GeneralProduct<Lhs,Rhs,Mode>, N, PlainObject>
+{
+ typedef PlainObject const& type;
+};
+}
+
+template<typename NestedProduct>
+class ScaledProduct;
+
+// Note that these two operator* functions are not defined as member
+// functions of ProductBase, because, otherwise we would have to
+// define all overloads defined in MatrixBase. Furthermore, Using
+// "using Base::operator*" would not work with MSVC.
+//
+// Also note that here we accept any compatible scalar types
+template<typename Derived,typename Lhs,typename Rhs>
+const ScaledProduct<Derived>
+operator*(const ProductBase<Derived,Lhs,Rhs>& prod, const typename Derived::Scalar& x)
+{ return ScaledProduct<Derived>(prod.derived(), x); }
+
+template<typename Derived,typename Lhs,typename Rhs>
+typename internal::enable_if<!internal::is_same<typename Derived::Scalar,typename Derived::RealScalar>::value,
+ const ScaledProduct<Derived> >::type
+operator*(const ProductBase<Derived,Lhs,Rhs>& prod, const typename Derived::RealScalar& x)
+{ return ScaledProduct<Derived>(prod.derived(), x); }
+
+
+template<typename Derived,typename Lhs,typename Rhs>
+const ScaledProduct<Derived>
+operator*(const typename Derived::Scalar& x,const ProductBase<Derived,Lhs,Rhs>& prod)
+{ return ScaledProduct<Derived>(prod.derived(), x); }
+
+template<typename Derived,typename Lhs,typename Rhs>
+typename internal::enable_if<!internal::is_same<typename Derived::Scalar,typename Derived::RealScalar>::value,
+ const ScaledProduct<Derived> >::type
+operator*(const typename Derived::RealScalar& x,const ProductBase<Derived,Lhs,Rhs>& prod)
+{ return ScaledProduct<Derived>(prod.derived(), x); }
+
+namespace internal {
+template<typename NestedProduct>
+struct traits<ScaledProduct<NestedProduct> >
+ : traits<ProductBase<ScaledProduct<NestedProduct>,
+ typename NestedProduct::_LhsNested,
+ typename NestedProduct::_RhsNested> >
+{
+ typedef typename traits<NestedProduct>::StorageKind StorageKind;
+};
+}
+
+template<typename NestedProduct>
+class ScaledProduct
+ : public ProductBase<ScaledProduct<NestedProduct>,
+ typename NestedProduct::_LhsNested,
+ typename NestedProduct::_RhsNested>
+{
+ public:
+ typedef ProductBase<ScaledProduct<NestedProduct>,
+ typename NestedProduct::_LhsNested,
+ typename NestedProduct::_RhsNested> Base;
+ typedef typename Base::Scalar Scalar;
+ typedef typename Base::PlainObject PlainObject;
+// EIGEN_PRODUCT_PUBLIC_INTERFACE(ScaledProduct)
+
+ ScaledProduct(const NestedProduct& prod, const Scalar& x)
+ : Base(prod.lhs(),prod.rhs()), m_prod(prod), m_alpha(x) {}
+
+ template<typename Dest>
+ inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst, Scalar(1)); }
+
+ template<typename Dest>
+ inline void addTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(1)); }
+
+ template<typename Dest>
+ inline void subTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(-1)); }
+
+ template<typename Dest>
+ inline void scaleAndAddTo(Dest& dst, const Scalar& a_alpha) const { m_prod.derived().scaleAndAddTo(dst,a_alpha * m_alpha); }
+
+ const Scalar& alpha() const { return m_alpha; }
+
+ protected:
+ const NestedProduct& m_prod;
+ Scalar m_alpha;
+};
+
+/** \internal
+ * Overloaded to perform an efficient C = (A*B).lazy() */
+template<typename Derived>
+template<typename ProductDerived, typename Lhs, typename Rhs>
+Derived& MatrixBase<Derived>::lazyAssign(const ProductBase<ProductDerived, Lhs,Rhs>& other)
+{
+ other.derived().evalTo(derived());
+ return derived();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_PRODUCTBASE_H
diff --git a/third_party/eigen3/Eigen/src/Core/ProductEvaluators.h b/third_party/eigen3/Eigen/src/Core/ProductEvaluators.h
new file mode 100644
index 0000000000..855914f2eb
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/ProductEvaluators.h
@@ -0,0 +1,411 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011 Jitse Niesen <jitse@maths.leeds.ac.uk>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+#ifndef EIGEN_PRODUCTEVALUATORS_H
+#define EIGEN_PRODUCTEVALUATORS_H
+
+namespace Eigen {
+
+namespace internal {
+
+// We can evaluate the product either all at once, like GeneralProduct and its evalTo() function, or
+// traverse the matrix coefficient by coefficient, like CoeffBasedProduct. Use the existing logic
+// in ProductReturnType to decide.
+
+template<typename XprType, typename ProductType>
+struct product_evaluator_dispatcher;
+
+template<typename Lhs, typename Rhs>
+struct evaluator_impl<Product<Lhs, Rhs> >
+ : product_evaluator_dispatcher<Product<Lhs, Rhs>, typename ProductReturnType<Lhs, Rhs>::Type>
+{
+ typedef Product<Lhs, Rhs> XprType;
+ typedef product_evaluator_dispatcher<XprType, typename ProductReturnType<Lhs, Rhs>::Type> Base;
+
+ evaluator_impl(const XprType& xpr) : Base(xpr)
+ { }
+};
+
+template<typename XprType, typename ProductType>
+struct product_evaluator_traits_dispatcher;
+
+template<typename Lhs, typename Rhs>
+struct evaluator_traits<Product<Lhs, Rhs> >
+ : product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, typename ProductReturnType<Lhs, Rhs>::Type>
+{
+ static const int AssumeAliasing = 1;
+};
+
+// Case 1: Evaluate all at once
+//
+// We can view the GeneralProduct class as a part of the product evaluator.
+// Four sub-cases: InnerProduct, OuterProduct, GemmProduct and GemvProduct.
+// InnerProduct is special because GeneralProduct does not have an evalTo() method in this case.
+
+template<typename Lhs, typename Rhs>
+struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, InnerProduct> >
+{
+ static const int HasEvalTo = 0;
+};
+
+template<typename Lhs, typename Rhs>
+struct product_evaluator_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, InnerProduct> >
+ : public evaluator<typename Product<Lhs, Rhs>::PlainObject>::type
+{
+ typedef Product<Lhs, Rhs> XprType;
+ typedef typename XprType::PlainObject PlainObject;
+ typedef typename evaluator<PlainObject>::type evaluator_base;
+
+ // TODO: Computation is too early (?)
+ product_evaluator_dispatcher(const XprType& xpr) : evaluator_base(m_result)
+ {
+ m_result.coeffRef(0,0) = (xpr.lhs().transpose().cwiseProduct(xpr.rhs())).sum();
+ }
+
+protected:
+ PlainObject m_result;
+};
+
+// For the other three subcases, simply call the evalTo() method of GeneralProduct
+// TODO: GeneralProduct should take evaluators, not expression objects.
+
+template<typename Lhs, typename Rhs, int ProductType>
+struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, ProductType> >
+{
+ static const int HasEvalTo = 1;
+};
+
+template<typename Lhs, typename Rhs, int ProductType>
+struct product_evaluator_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, ProductType> >
+{
+ typedef Product<Lhs, Rhs> XprType;
+ typedef typename XprType::PlainObject PlainObject;
+ typedef typename evaluator<PlainObject>::type evaluator_base;
+
+ product_evaluator_dispatcher(const XprType& xpr) : m_xpr(xpr)
+ { }
+
+ template<typename DstEvaluatorType, typename DstXprType>
+ void evalTo(DstEvaluatorType /* not used */, DstXprType& dst) const
+ {
+ dst.resize(m_xpr.rows(), m_xpr.cols());
+ GeneralProduct<Lhs, Rhs, ProductType>(m_xpr.lhs(), m_xpr.rhs()).evalTo(dst);
+ }
+
+protected:
+ const XprType& m_xpr;
+};
+
+// Case 2: Evaluate coeff by coeff
+//
+// This is mostly taken from CoeffBasedProduct.h
+// The main difference is that we add an extra argument to the etor_product_*_impl::run() function
+// for the inner dimension of the product, because evaluator object do not know their size.
+
+template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
+struct etor_product_coeff_impl;
+
+template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl;
+
+template<typename Lhs, typename Rhs, typename LhsNested, typename RhsNested, int Flags>
+struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, CoeffBasedProduct<LhsNested, RhsNested, Flags> >
+{
+ static const int HasEvalTo = 0;
+};
+
+template<typename Lhs, typename Rhs, typename LhsNested, typename RhsNested, int Flags>
+struct product_evaluator_dispatcher<Product<Lhs, Rhs>, CoeffBasedProduct<LhsNested, RhsNested, Flags> >
+ : evaluator_impl_base<Product<Lhs, Rhs> >
+{
+ typedef Product<Lhs, Rhs> XprType;
+ typedef CoeffBasedProduct<LhsNested, RhsNested, Flags> CoeffBasedProductType;
+
+ product_evaluator_dispatcher(const XprType& xpr)
+ : m_lhsImpl(xpr.lhs()),
+ m_rhsImpl(xpr.rhs()),
+ m_innerDim(xpr.lhs().cols())
+ { }
+
+ typedef typename XprType::Index Index;
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ typedef typename XprType::PacketScalar PacketScalar;
+ typedef typename XprType::PacketReturnType PacketReturnType;
+
+ // Everything below here is taken from CoeffBasedProduct.h
+
+ enum {
+ RowsAtCompileTime = traits<CoeffBasedProductType>::RowsAtCompileTime,
+ PacketSize = packet_traits<Scalar>::size,
+ InnerSize = traits<CoeffBasedProductType>::InnerSize,
+ CoeffReadCost = traits<CoeffBasedProductType>::CoeffReadCost,
+ Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
+ CanVectorizeInner = traits<CoeffBasedProductType>::CanVectorizeInner
+ };
+
+ typedef typename evaluator<Lhs>::type LhsEtorType;
+ typedef typename evaluator<Rhs>::type RhsEtorType;
+ typedef etor_product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
+ Unroll ? InnerSize-1 : Dynamic,
+ LhsEtorType, RhsEtorType, Scalar> CoeffImpl;
+
+ const CoeffReturnType coeff(Index row, Index col) const
+ {
+ Scalar res;
+ CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
+ return res;
+ }
+
+ /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
+ * which is why we don't set the LinearAccessBit.
+ */
+ const CoeffReturnType coeff(Index index) const
+ {
+ Scalar res;
+ const Index row = RowsAtCompileTime == 1 ? 0 : index;
+ const Index col = RowsAtCompileTime == 1 ? index : 0;
+ CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
+ return res;
+ }
+
+ template<int LoadMode>
+ const PacketReturnType packet(Index row, Index col) const
+ {
+ PacketScalar res;
+ typedef etor_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
+ Unroll ? InnerSize-1 : Dynamic,
+ LhsEtorType, RhsEtorType, PacketScalar, LoadMode> PacketImpl;
+ PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
+ return res;
+ }
+
+protected:
+ typename evaluator<Lhs>::type m_lhsImpl;
+ typename evaluator<Rhs>::type m_rhsImpl;
+
+ // TODO: Get rid of m_innerDim if known at compile time
+ Index m_innerDim;
+};
+
+/***************************************************************************
+* Normal product .coeff() implementation (with meta-unrolling)
+***************************************************************************/
+
+/**************************************
+*** Scalar path - no vectorization ***
+**************************************/
+
+template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
+struct etor_product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
+ {
+ etor_product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, innerDim, res);
+ res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
+ }
+};
+
+template<typename Lhs, typename Rhs, typename RetScalar>
+struct etor_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, RetScalar &res)
+ {
+ res = lhs.coeff(row, 0) * rhs.coeff(0, col);
+ }
+};
+
+template<typename Lhs, typename Rhs, typename RetScalar>
+struct etor_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar& res)
+ {
+ eigen_assert(innerDim>0 && "you are using a non initialized matrix");
+ res = lhs.coeff(row, 0) * rhs.coeff(0, col);
+ for(Index i = 1; i < innerDim; ++i)
+ res += lhs.coeff(row, i) * rhs.coeff(i, col);
+ }
+};
+
+/*******************************************
+*** Scalar path with inner vectorization ***
+*******************************************/
+
+template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
+struct etor_product_coeff_vectorized_unroller
+{
+ typedef typename Lhs::Index Index;
+ enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::PacketScalar &pres)
+ {
+ etor_product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres);
+ pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
+ }
+};
+
+template<typename Lhs, typename Rhs, typename Packet>
+struct etor_product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::PacketScalar &pres)
+ {
+ pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
+ }
+};
+
+template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
+struct etor_product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
+{
+ typedef typename Lhs::PacketScalar Packet;
+ typedef typename Lhs::Index Index;
+ enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
+ {
+ Packet pres;
+ etor_product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres);
+ etor_product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, innerDim, res);
+ res = predux(pres);
+ }
+};
+
+template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
+struct etor_product_coeff_vectorized_dyn_selector
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+ {
+ res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
+ }
+};
+
+// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
+// NOTE maybe they are now useless since we have a specialization for Block<Matrix>
+template<typename Lhs, typename Rhs, int RhsCols>
+struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+ {
+ res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
+ }
+};
+
+template<typename Lhs, typename Rhs, int LhsRows>
+struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+ {
+ res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
+ }
+};
+
+template<typename Lhs, typename Rhs>
+struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
+{
+ typedef typename Lhs::Index Index;
+ EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+ {
+ res = lhs.transpose().cwiseProduct(rhs).sum();
+ }
+};
+
+template<typename Lhs, typename Rhs, typename RetScalar>
+struct etor_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::Scalar &res)
+ {
+ etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, innerDim, res);
+ }
+};
+
+/*******************
+*** Packet path ***
+*******************/
+
+template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
+ {
+ etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
+ res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
+ }
+};
+
+template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
+ {
+ etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
+ res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
+ }
+};
+
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
+ {
+ res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
+ }
+};
+
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
+ {
+ res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
+ }
+};
+
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
+ {
+ eigen_assert(innerDim>0 && "you are using a non initialized matrix");
+ res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
+ for(Index i = 1; i < innerDim; ++i)
+ res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
+ }
+};
+
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
+ {
+ eigen_assert(innerDim>0 && "you are using a non initialized matrix");
+ res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
+ for(Index i = 1; i < innerDim; ++i)
+ res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
+ }
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_PRODUCT_EVALUATORS_H
diff --git a/third_party/eigen3/Eigen/src/Core/Random.h b/third_party/eigen3/Eigen/src/Core/Random.h
new file mode 100644
index 0000000000..2d3a7243bc
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/Random.h
@@ -0,0 +1,193 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_RANDOM_H
+#define EIGEN_RANDOM_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<typename Scalar> struct scalar_random_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_random_op)
+
+ template<typename Index>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const {
+#ifndef __CUDA_ARCH__
+ // We're not compiling a cuda kernel
+ return random<Scalar>();
+#else
+ // We're trying to generate a random number from a cuda kernel.
+ assert(false && "Generating random numbers on gpu isn't supported yet");
+ return Scalar(0);
+#endif
+ }
+};
+
+template<typename Scalar>
+struct functor_traits<scalar_random_op<Scalar> >
+{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false, IsRepeatable = false }; };
+
+} // end namespace internal
+
+/** \returns a random matrix expression
+ *
+ * Numbers are uniformly spread through their whole definition range for integer types,
+ * and in the [-1:1] range for floating point scalar types.
+ *
+ * The parameters \a rows and \a cols are the number of rows and of columns of
+ * the returned matrix. Must be compatible with this MatrixBase type.
+ *
+ * \not_reentrant
+ *
+ * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
+ * it is redundant to pass \a rows and \a cols as arguments, so Random() should be used
+ * instead.
+ *
+ *
+ * Example: \include MatrixBase_random_int_int.cpp
+ * Output: \verbinclude MatrixBase_random_int_int.out
+ *
+ * This expression has the "evaluate before nesting" flag so that it will be evaluated into
+ * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected
+ * behavior with expressions involving random matrices.
+ *
+ * See DenseBase::NullaryExpr(Index, const CustomNullaryOp&) for an example using C++11 random generators.
+ *
+ * \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random()
+ */
+template<typename Derived>
+inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
+DenseBase<Derived>::Random(Index rows, Index cols)
+{
+ return NullaryExpr(rows, cols, internal::scalar_random_op<Scalar>());
+}
+
+/** \returns a random vector expression
+ *
+ * Numbers are uniformly spread through their whole definition range for integer types,
+ * and in the [-1:1] range for floating point scalar types.
+ *
+ * The parameter \a size is the size of the returned vector.
+ * Must be compatible with this MatrixBase type.
+ *
+ * \only_for_vectors
+ * \not_reentrant
+ *
+ * This variant is meant to be used for dynamic-size vector types. For fixed-size types,
+ * it is redundant to pass \a size as argument, so Random() should be used
+ * instead.
+ *
+ * Example: \include MatrixBase_random_int.cpp
+ * Output: \verbinclude MatrixBase_random_int.out
+ *
+ * This expression has the "evaluate before nesting" flag so that it will be evaluated into
+ * a temporary vector whenever it is nested in a larger expression. This prevents unexpected
+ * behavior with expressions involving random matrices.
+ *
+ * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random()
+ */
+template<typename Derived>
+inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
+DenseBase<Derived>::Random(Index size)
+{
+ return NullaryExpr(size, internal::scalar_random_op<Scalar>());
+}
+
+/** \returns a fixed-size random matrix or vector expression
+ *
+ * Numbers are uniformly spread through their whole definition range for integer types,
+ * and in the [-1:1] range for floating point scalar types.
+ *
+ * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
+ * need to use the variants taking size arguments.
+ *
+ * Example: \include MatrixBase_random.cpp
+ * Output: \verbinclude MatrixBase_random.out
+ *
+ * This expression has the "evaluate before nesting" flag so that it will be evaluated into
+ * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected
+ * behavior with expressions involving random matrices.
+ *
+ * \not_reentrant
+ *
+ * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index)
+ */
+template<typename Derived>
+inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
+DenseBase<Derived>::Random()
+{
+ return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_random_op<Scalar>());
+}
+
+/** Sets all coefficients in this expression to random values.
+ *
+ * Numbers are uniformly spread through their whole definition range for integer types,
+ * and in the [-1:1] range for floating point scalar types.
+ *
+ * \not_reentrant
+ *
+ * Example: \include MatrixBase_setRandom.cpp
+ * Output: \verbinclude MatrixBase_setRandom.out
+ *
+ * \sa class CwiseNullaryOp, setRandom(Index), setRandom(Index,Index)
+ */
+template<typename Derived>
+inline Derived& DenseBase<Derived>::setRandom()
+{
+ return *this = Random(rows(), cols());
+}
+
+/** Resizes to the given \a newSize, and sets all coefficients in this expression to random values.
+ *
+ * Numbers are uniformly spread through their whole definition range for integer types,
+ * and in the [-1:1] range for floating point scalar types.
+ *
+ * \only_for_vectors
+ * \not_reentrant
+ *
+ * Example: \include Matrix_setRandom_int.cpp
+ * Output: \verbinclude Matrix_setRandom_int.out
+ *
+ * \sa DenseBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, DenseBase::Random()
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setRandom(Index newSize)
+{
+ resize(newSize);
+ return setRandom();
+}
+
+/** Resizes to the given size, and sets all coefficients in this expression to random values.
+ *
+ * Numbers are uniformly spread through their whole definition range for integer types,
+ * and in the [-1:1] range for floating point scalar types.
+ *
+ * \not_reentrant
+ *
+ * \param nbRows the new number of rows
+ * \param nbCols the new number of columns
+ *
+ * Example: \include Matrix_setRandom_int_int.cpp
+ * Output: \verbinclude Matrix_setRandom_int_int.out
+ *
+ * \sa DenseBase::setRandom(), setRandom(Index), class CwiseNullaryOp, DenseBase::Random()
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setRandom(Index nbRows, Index nbCols)
+{
+ resize(nbRows, nbCols);
+ return setRandom();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_RANDOM_H
diff --git a/third_party/eigen3/Eigen/src/Core/Redux.h b/third_party/eigen3/Eigen/src/Core/Redux.h
new file mode 100644
index 0000000000..5b82c9a654
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/Redux.h
@@ -0,0 +1,417 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_REDUX_H
+#define EIGEN_REDUX_H
+
+namespace Eigen {
+
+namespace internal {
+
+// TODO
+// * implement other kind of vectorization
+// * factorize code
+
+/***************************************************************************
+* Part 1 : the logic deciding a strategy for vectorization and unrolling
+***************************************************************************/
+
+template<typename Func, typename Derived>
+struct redux_traits
+{
+public:
+ enum {
+ PacketSize = packet_traits<typename Derived::Scalar>::size,
+ InnerMaxSize = int(Derived::IsRowMajor)
+ ? Derived::MaxColsAtCompileTime
+ : Derived::MaxRowsAtCompileTime
+ };
+
+ enum {
+ MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
+ && (functor_traits<Func>::PacketAccess),
+ MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit),
+ MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize
+ };
+
+public:
+ enum {
+ Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
+ : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
+ : int(DefaultTraversal)
+ };
+
+public:
+ enum {
+ Cost = ( Derived::SizeAtCompileTime == Dynamic
+ || Derived::CoeffReadCost == Dynamic
+ || (Derived::SizeAtCompileTime!=1 && functor_traits<Func>::Cost == Dynamic)
+ ) ? Dynamic
+ : Derived::SizeAtCompileTime * Derived::CoeffReadCost
+ + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
+ UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
+ };
+
+public:
+ enum {
+ Unrolling = Cost != Dynamic && Cost <= UnrollingLimit
+ ? CompleteUnrolling
+ : NoUnrolling
+ };
+};
+
+/***************************************************************************
+* Part 2 : unrollers
+***************************************************************************/
+
+/*** no vectorization ***/
+
+template<typename Func, typename Derived, int Start, int Length>
+struct redux_novec_unroller
+{
+ enum {
+ HalfLength = Length/2
+ };
+
+ typedef typename Derived::Scalar Scalar;
+
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
+ {
+ return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
+ redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
+ }
+};
+
+template<typename Func, typename Derived, int Start>
+struct redux_novec_unroller<Func, Derived, Start, 1>
+{
+ enum {
+ outer = Start / Derived::InnerSizeAtCompileTime,
+ inner = Start % Derived::InnerSizeAtCompileTime
+ };
+
+ typedef typename Derived::Scalar Scalar;
+
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
+ {
+ return mat.coeffByOuterInner(outer, inner);
+ }
+};
+
+// This is actually dead code and will never be called. It is required
+// to prevent false warnings regarding failed inlining though
+// for 0 length run() will never be called at all.
+template<typename Func, typename Derived, int Start>
+struct redux_novec_unroller<Func, Derived, Start, 0>
+{
+ typedef typename Derived::Scalar Scalar;
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
+};
+
+/*** vectorization ***/
+
+template<typename Func, typename Derived, int Start, int Length>
+struct redux_vec_unroller
+{
+ enum {
+ PacketSize = packet_traits<typename Derived::Scalar>::size,
+ HalfLength = Length/2
+ };
+
+ typedef typename Derived::Scalar Scalar;
+ typedef typename packet_traits<Scalar>::type PacketScalar;
+
+ static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
+ {
+ return func.packetOp(
+ redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
+ redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
+ }
+};
+
+template<typename Func, typename Derived, int Start>
+struct redux_vec_unroller<Func, Derived, Start, 1>
+{
+ enum {
+ index = Start * packet_traits<typename Derived::Scalar>::size,
+ outer = index / int(Derived::InnerSizeAtCompileTime),
+ inner = index % int(Derived::InnerSizeAtCompileTime),
+ alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
+ };
+
+ typedef typename Derived::Scalar Scalar;
+ typedef typename packet_traits<Scalar>::type PacketScalar;
+
+ static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
+ {
+ return mat.template packetByOuterInner<alignment>(outer, inner);
+ }
+};
+
+/***************************************************************************
+* Part 3 : implementation of all cases
+***************************************************************************/
+
+template<typename Func, typename Derived,
+ int Traversal = redux_traits<Func, Derived>::Traversal,
+ int Unrolling = redux_traits<Func, Derived>::Unrolling
+>
+struct redux_impl;
+
+template<typename Func, typename Derived>
+struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
+{
+ typedef typename Derived::Scalar Scalar;
+ typedef typename Derived::Index Index;
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
+ {
+ eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
+ Scalar res;
+ res = mat.coeffByOuterInner(0, 0);
+ for(Index i = 1; i < mat.innerSize(); ++i)
+ res = func(res, mat.coeffByOuterInner(0, i));
+ for(Index i = 1; i < mat.outerSize(); ++i)
+ for(Index j = 0; j < mat.innerSize(); ++j)
+ res = func(res, mat.coeffByOuterInner(i, j));
+ return res;
+ }
+};
+
+template<typename Func, typename Derived>
+struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
+ : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
+{};
+
+template<typename Func, typename Derived>
+struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
+{
+ typedef typename Derived::Scalar Scalar;
+ typedef typename packet_traits<Scalar>::type PacketScalar;
+ typedef typename Derived::Index Index;
+
+ static Scalar run(const Derived& mat, const Func& func)
+ {
+ const Index size = mat.size();
+ eigen_assert(size && "you are using an empty matrix");
+ const Index packetSize = packet_traits<Scalar>::size;
+ const Index alignedStart = internal::first_aligned(mat);
+ enum {
+ alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit)
+ ? Aligned : Unaligned
+ };
+ const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
+ const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
+ const Index alignedEnd2 = alignedStart + alignedSize2;
+ const Index alignedEnd = alignedStart + alignedSize;
+ Scalar res;
+ if(alignedSize)
+ {
+ PacketScalar packet_res0 = mat.template packet<alignment>(alignedStart);
+ if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
+ {
+ PacketScalar packet_res1 = mat.template packet<alignment>(alignedStart+packetSize);
+ for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
+ {
+ packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(index));
+ packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment>(index+packetSize));
+ }
+
+ packet_res0 = func.packetOp(packet_res0,packet_res1);
+ if(alignedEnd>alignedEnd2)
+ packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(alignedEnd2));
+ }
+ res = func.predux(packet_res0);
+
+ for(Index index = 0; index < alignedStart; ++index)
+ res = func(res,mat.coeff(index));
+
+ for(Index index = alignedEnd; index < size; ++index)
+ res = func(res,mat.coeff(index));
+ }
+ else // too small to vectorize anything.
+ // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
+ {
+ res = mat.coeff(0);
+ for(Index index = 1; index < size; ++index)
+ res = func(res,mat.coeff(index));
+ }
+
+ return res;
+ }
+};
+
+template<typename Func, typename Derived>
+struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
+{
+ typedef typename Derived::Scalar Scalar;
+ typedef typename packet_traits<Scalar>::type PacketScalar;
+ typedef typename Derived::Index Index;
+
+ static Scalar run(const Derived& mat, const Func& func)
+ {
+ eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
+ const Index innerSize = mat.innerSize();
+ const Index outerSize = mat.outerSize();
+ enum {
+ packetSize = packet_traits<Scalar>::size
+ };
+ const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
+ Scalar res;
+ if(packetedInnerSize)
+ {
+ PacketScalar packet_res = mat.template packet<Unaligned>(0,0);
+ for(Index j=0; j<outerSize; ++j)
+ for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
+ packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned>(j,i));
+
+ res = func.predux(packet_res);
+ for(Index j=0; j<outerSize; ++j)
+ for(Index i=packetedInnerSize; i<innerSize; ++i)
+ res = func(res, mat.coeffByOuterInner(j,i));
+ }
+ else // too small to vectorize anything.
+ // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
+ {
+ res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
+ }
+
+ return res;
+ }
+};
+
+template<typename Func, typename Derived>
+struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
+{
+ typedef typename Derived::Scalar Scalar;
+ typedef typename packet_traits<Scalar>::type PacketScalar;
+ enum {
+ PacketSize = packet_traits<Scalar>::size,
+ Size = Derived::SizeAtCompileTime,
+ VectorizedSize = (Size / PacketSize) * PacketSize
+ };
+ static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
+ {
+ eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
+ if (VectorizedSize > 0) {
+ Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
+ if (VectorizedSize != Size)
+ res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
+ return res;
+ }
+ else {
+ return redux_novec_unroller<Func, Derived, 0, Size>::run(mat,func);
+ }
+ }
+};
+
+} // end namespace internal
+
+/***************************************************************************
+* Part 4 : public API
+***************************************************************************/
+
+
+/** \returns the result of a full redux operation on the whole matrix or vector using \a func
+ *
+ * The template parameter \a BinaryOp is the type of the functor \a func which must be
+ * an associative operator. Both current STL and TR1 functor styles are handled.
+ *
+ * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise()
+ */
+template<typename Derived>
+template<typename Func>
+EIGEN_STRONG_INLINE typename internal::result_of<Func(typename internal::traits<Derived>::Scalar)>::type
+DenseBase<Derived>::redux(const Func& func) const
+{
+ typedef typename internal::remove_all<typename Derived::Nested>::type ThisNested;
+ return internal::redux_impl<Func, ThisNested>
+ ::run(derived(), func);
+}
+
+/** \returns the minimum of all coefficients of \c *this.
+ * \warning the result is undefined if \c *this contains NaN.
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::minCoeff() const
+{
+ return this->redux(Eigen::internal::scalar_min_op<Scalar>());
+}
+
+/** \returns the maximum of all coefficients of \c *this.
+ * \warning the result is undefined if \c *this contains NaN.
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::maxCoeff() const
+{
+ return this->redux(Eigen::internal::scalar_max_op<Scalar>());
+}
+
+/** \returns the sum of all coefficients of *this
+ *
+ * \sa trace(), prod(), mean()
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::sum() const
+{
+ if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
+ return Scalar(0);
+ return this->redux(Eigen::internal::scalar_sum_op<Scalar>());
+}
+
+/** \returns the mean of all coefficients of *this
+*
+* \sa trace(), prod(), sum()
+*/
+template<typename Derived>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::mean() const
+{
+ return Scalar(this->redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size());
+}
+
+/** \returns the product of all coefficients of *this
+ *
+ * Example: \include MatrixBase_prod.cpp
+ * Output: \verbinclude MatrixBase_prod.out
+ *
+ * \sa sum(), mean(), trace()
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::prod() const
+{
+ if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
+ return Scalar(1);
+ return this->redux(Eigen::internal::scalar_product_op<Scalar>());
+}
+
+/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
+ *
+ * \c *this can be any matrix, not necessarily square.
+ *
+ * \sa diagonal(), sum()
+ */
+template<typename Derived>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+MatrixBase<Derived>::trace() const
+{
+ return derived().diagonal().sum();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_REDUX_H
diff --git a/third_party/eigen3/Eigen/src/Core/Ref.h b/third_party/eigen3/Eigen/src/Core/Ref.h
new file mode 100644
index 0000000000..cd6d949c4c
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/Ref.h
@@ -0,0 +1,260 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_REF_H
+#define EIGEN_REF_H
+
+namespace Eigen {
+
+template<typename Derived> class RefBase;
+template<typename PlainObjectType, int Options = 0,
+ typename StrideType = typename internal::conditional<PlainObjectType::IsVectorAtCompileTime,InnerStride<1>,OuterStride<> >::type > class Ref;
+
+/** \class Ref
+ * \ingroup Core_Module
+ *
+ * \brief A matrix or vector expression mapping an existing expressions
+ *
+ * \tparam PlainObjectType the equivalent matrix type of the mapped data
+ * \tparam Options specifies whether the pointer is \c #Aligned, or \c #Unaligned.
+ * The default is \c #Unaligned.
+ * \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1),
+ * but accept a variable outer stride (leading dimension).
+ * This can be overridden by specifying strides.
+ * The type passed here must be a specialization of the Stride template, see examples below.
+ *
+ * This class permits to write non template functions taking Eigen's object as parameters while limiting the number of copies.
+ * A Ref<> object can represent either a const expression or a l-value:
+ * \code
+ * // in-out argument:
+ * void foo1(Ref<VectorXf> x);
+ *
+ * // read-only const argument:
+ * void foo2(const Ref<const VectorXf>& x);
+ * \endcode
+ *
+ * In the in-out case, the input argument must satisfies the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered.
+ * By default, a Ref<VectorXf> can reference any dense vector expression of float having a contiguous memory layout.
+ * Likewise, a Ref<MatrixXf> can reference any column major dense matrix expression of float whose column's elements are contiguously stored with
+ * the possibility to have a constant space inbetween each column, i.e.: the inner stride mmust be equal to 1, but the outer-stride (or leading dimension),
+ * can be greater than the number of rows.
+ *
+ * In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function.
+ * Here are some examples:
+ * \code
+ * MatrixXf A;
+ * VectorXf a;
+ * foo1(a.head()); // OK
+ * foo1(A.col()); // OK
+ * foo1(A.row()); // compilation error because here innerstride!=1
+ * foo2(A.row()); // The row is copied into a contiguous temporary
+ * foo2(2*a); // The expression is evaluated into a temporary
+ * foo2(A.col().segment(2,4)); // No temporary
+ * \endcode
+ *
+ * The range of inputs that can be referenced without temporary can be enlarged using the last two template parameter.
+ * Here is an example accepting an innerstride!=1:
+ * \code
+ * // in-out argument:
+ * void foo3(Ref<VectorXf,0,InnerStride<> > x);
+ * foo3(A.row()); // OK
+ * \endcode
+ * The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involved more
+ * expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overloads internally calling a
+ * template function, e.g.:
+ * \code
+ * // in the .h:
+ * void foo(const Ref<MatrixXf>& A);
+ * void foo(const Ref<MatrixXf,0,Stride<> >& A);
+ *
+ * // in the .cpp:
+ * template<typename TypeOfA> void foo_impl(const TypeOfA& A) {
+ * ... // crazy code goes here
+ * }
+ * void foo(const Ref<MatrixXf>& A) { foo_impl(A); }
+ * void foo(const Ref<MatrixXf,0,Stride<> >& A) { foo_impl(A); }
+ * \endcode
+ *
+ *
+ * \sa PlainObjectBase::Map(), \ref TopicStorageOrders
+ */
+
+namespace internal {
+
+template<typename _PlainObjectType, int _Options, typename _StrideType>
+struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
+ : public traits<Map<_PlainObjectType, _Options, _StrideType> >
+{
+ typedef _PlainObjectType PlainObjectType;
+ typedef _StrideType StrideType;
+ enum {
+ Options = _Options,
+ Flags = traits<Map<_PlainObjectType, _Options, _StrideType> >::Flags | NestByRefBit
+ };
+
+ template<typename Derived> struct match {
+ enum {
+ HasDirectAccess = internal::has_direct_access<Derived>::ret,
+ StorageOrderMatch = PlainObjectType::IsVectorAtCompileTime || Derived::IsVectorAtCompileTime || ((PlainObjectType::Flags&RowMajorBit)==(Derived::Flags&RowMajorBit)),
+ InnerStrideMatch = int(StrideType::InnerStrideAtCompileTime)==int(Dynamic)
+ || int(StrideType::InnerStrideAtCompileTime)==int(Derived::InnerStrideAtCompileTime)
+ || (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1),
+ OuterStrideMatch = Derived::IsVectorAtCompileTime
+ || int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime),
+ AlignmentMatch = (_Options!=Aligned) || ((PlainObjectType::Flags&AlignedBit)==0) || ((traits<Derived>::Flags&AlignedBit)==AlignedBit),
+ MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch
+ };
+ typedef typename internal::conditional<MatchAtCompileTime,internal::true_type,internal::false_type>::type type;
+ };
+
+};
+
+template<typename Derived>
+struct traits<RefBase<Derived> > : public traits<Derived> {};
+
+}
+
+template<typename Derived> class RefBase
+ : public MapBase<Derived>
+{
+ typedef typename internal::traits<Derived>::PlainObjectType PlainObjectType;
+ typedef typename internal::traits<Derived>::StrideType StrideType;
+
+public:
+
+ typedef MapBase<Derived> Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(RefBase)
+
+ inline Index innerStride() const
+ {
+ return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
+ }
+
+ inline Index outerStride() const
+ {
+ return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
+ : IsVectorAtCompileTime ? this->size()
+ : int(Flags)&RowMajorBit ? this->cols()
+ : this->rows();
+ }
+
+ RefBase()
+ : Base(0,RowsAtCompileTime==Dynamic?0:RowsAtCompileTime,ColsAtCompileTime==Dynamic?0:ColsAtCompileTime),
+ // Stride<> does not allow default ctor for Dynamic strides, so let' initialize it with dummy values:
+ m_stride(StrideType::OuterStrideAtCompileTime==Dynamic?0:StrideType::OuterStrideAtCompileTime,
+ StrideType::InnerStrideAtCompileTime==Dynamic?0:StrideType::InnerStrideAtCompileTime)
+ {}
+
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(RefBase)
+
+protected:
+
+ typedef Stride<StrideType::OuterStrideAtCompileTime,StrideType::InnerStrideAtCompileTime> StrideBase;
+
+ template<typename Expression>
+ void construct(Expression& expr)
+ {
+ if(PlainObjectType::RowsAtCompileTime==1)
+ {
+ eigen_assert(expr.rows()==1 || expr.cols()==1);
+ ::new (static_cast<Base*>(this)) Base(expr.data(), 1, expr.size());
+ }
+ else if(PlainObjectType::ColsAtCompileTime==1)
+ {
+ eigen_assert(expr.rows()==1 || expr.cols()==1);
+ ::new (static_cast<Base*>(this)) Base(expr.data(), expr.size(), 1);
+ }
+ else
+ ::new (static_cast<Base*>(this)) Base(expr.data(), expr.rows(), expr.cols());
+
+ if(Expression::IsVectorAtCompileTime && (!PlainObjectType::IsVectorAtCompileTime) && ((Expression::Flags&RowMajorBit)!=(PlainObjectType::Flags&RowMajorBit)))
+ ::new (&m_stride) StrideBase(expr.innerStride(), StrideType::InnerStrideAtCompileTime==0?0:1);
+ else
+ ::new (&m_stride) StrideBase(StrideType::OuterStrideAtCompileTime==0?0:expr.outerStride(),
+ StrideType::InnerStrideAtCompileTime==0?0:expr.innerStride());
+ }
+
+ StrideBase m_stride;
+};
+
+
+template<typename PlainObjectType, int Options, typename StrideType> class Ref
+ : public RefBase<Ref<PlainObjectType, Options, StrideType> >
+{
+ typedef internal::traits<Ref> Traits;
+ public:
+
+ typedef RefBase<Ref> Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(Ref)
+
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ template<typename Derived>
+ inline Ref(PlainObjectBase<Derived>& expr,
+ typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
+ {
+ Base::construct(expr);
+ }
+ template<typename Derived>
+ inline Ref(const DenseBase<Derived>& expr,
+ typename internal::enable_if<bool(internal::is_lvalue<Derived>::value&&bool(Traits::template match<Derived>::MatchAtCompileTime)),Derived>::type* = 0,
+ int = Derived::ThisConstantIsPrivateInPlainObjectBase)
+ #else
+ template<typename Derived>
+ inline Ref(DenseBase<Derived>& expr)
+ #endif
+ {
+ Base::construct(expr.const_cast_derived());
+ }
+
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Ref)
+
+};
+
+// this is the const ref version
+template<typename TPlainObjectType, int Options, typename StrideType> class Ref<const TPlainObjectType, Options, StrideType>
+ : public RefBase<Ref<const TPlainObjectType, Options, StrideType> >
+{
+ typedef internal::traits<Ref> Traits;
+ public:
+
+ typedef RefBase<Ref> Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(Ref)
+
+ template<typename Derived>
+ inline Ref(const DenseBase<Derived>& expr)
+ {
+// std::cout << match_helper<Derived>::HasDirectAccess << "," << match_helper<Derived>::OuterStrideMatch << "," << match_helper<Derived>::InnerStrideMatch << "\n";
+// std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n";
+// std::cout << int(StrideType::InnerStrideAtCompileTime) << " - " << int(Derived::InnerStrideAtCompileTime) << "\n";
+ construct(expr.derived(), typename Traits::template match<Derived>::type());
+ }
+
+ protected:
+
+ template<typename Expression>
+ void construct(const Expression& expr,internal::true_type)
+ {
+ Base::construct(expr);
+ }
+
+ template<typename Expression>
+ void construct(const Expression& expr, internal::false_type)
+ {
+ m_object.lazyAssign(expr);
+ Base::construct(m_object);
+ }
+
+ protected:
+ TPlainObjectType m_object;
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_REF_H
diff --git a/third_party/eigen3/Eigen/src/Core/Replicate.h b/third_party/eigen3/Eigen/src/Core/Replicate.h
new file mode 100644
index 0000000000..dde86a8349
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/Replicate.h
@@ -0,0 +1,177 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_REPLICATE_H
+#define EIGEN_REPLICATE_H
+
+namespace Eigen {
+
+/**
+ * \class Replicate
+ * \ingroup Core_Module
+ *
+ * \brief Expression of the multiple replication of a matrix or vector
+ *
+ * \param MatrixType the type of the object we are replicating
+ *
+ * This class represents an expression of the multiple replication of a matrix or vector.
+ * It is the return type of DenseBase::replicate() and most of the time
+ * this is the only way it is used.
+ *
+ * \sa DenseBase::replicate()
+ */
+
+namespace internal {
+template<typename MatrixType,int RowFactor,int ColFactor>
+struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
+ : traits<MatrixType>
+{
+ typedef typename MatrixType::Scalar Scalar;
+ typedef typename traits<MatrixType>::StorageKind StorageKind;
+ typedef typename traits<MatrixType>::XprKind XprKind;
+ enum {
+ Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor
+ };
+ typedef typename nested<MatrixType,Factor>::type MatrixTypeNested;
+ typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
+ enum {
+ RowsAtCompileTime = RowFactor==Dynamic || int(MatrixType::RowsAtCompileTime)==Dynamic
+ ? Dynamic
+ : RowFactor * MatrixType::RowsAtCompileTime,
+ ColsAtCompileTime = ColFactor==Dynamic || int(MatrixType::ColsAtCompileTime)==Dynamic
+ ? Dynamic
+ : ColFactor * MatrixType::ColsAtCompileTime,
+ //FIXME we don't propagate the max sizes !!!
+ MaxRowsAtCompileTime = RowsAtCompileTime,
+ MaxColsAtCompileTime = ColsAtCompileTime,
+ IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1
+ : MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0
+ : (MatrixType::Flags & RowMajorBit) ? 1 : 0,
+ Flags = (_MatrixTypeNested::Flags & HereditaryBits & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0),
+ CoeffReadCost = _MatrixTypeNested::CoeffReadCost
+ };
+};
+}
+
+template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
+ : public internal::dense_xpr_base< Replicate<MatrixType,RowFactor,ColFactor> >::type
+{
+ typedef typename internal::traits<Replicate>::MatrixTypeNested MatrixTypeNested;
+ typedef typename internal::traits<Replicate>::_MatrixTypeNested _MatrixTypeNested;
+ public:
+
+ typedef typename internal::dense_xpr_base<Replicate>::type Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(Replicate)
+
+ template<typename OriginalMatrixType>
+ inline explicit Replicate(const OriginalMatrixType& a_matrix)
+ : m_matrix(a_matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor)
+ {
+ EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
+ THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
+ eigen_assert(RowFactor!=Dynamic && ColFactor!=Dynamic);
+ }
+
+ template<typename OriginalMatrixType>
+ inline Replicate(const OriginalMatrixType& a_matrix, Index rowFactor, Index colFactor)
+ : m_matrix(a_matrix), m_rowFactor(rowFactor), m_colFactor(colFactor)
+ {
+ EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
+ THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
+ }
+
+ inline Index rows() const { return m_matrix.rows() * m_rowFactor.value(); }
+ inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); }
+
+ inline Scalar coeff(Index rowId, Index colId) const
+ {
+ // try to avoid using modulo; this is a pure optimization strategy
+ const Index actual_row = internal::traits<MatrixType>::RowsAtCompileTime==1 ? 0
+ : RowFactor==1 ? rowId
+ : rowId%m_matrix.rows();
+ const Index actual_col = internal::traits<MatrixType>::ColsAtCompileTime==1 ? 0
+ : ColFactor==1 ? colId
+ : colId%m_matrix.cols();
+
+ return m_matrix.coeff(actual_row, actual_col);
+ }
+ template<int LoadMode>
+ inline PacketScalar packet(Index rowId, Index colId) const
+ {
+ const Index actual_row = internal::traits<MatrixType>::RowsAtCompileTime==1 ? 0
+ : RowFactor==1 ? rowId
+ : rowId%m_matrix.rows();
+ const Index actual_col = internal::traits<MatrixType>::ColsAtCompileTime==1 ? 0
+ : ColFactor==1 ? colId
+ : colId%m_matrix.cols();
+
+ return m_matrix.template packet<LoadMode>(actual_row, actual_col);
+ }
+
+ const _MatrixTypeNested& nestedExpression() const
+ {
+ return m_matrix;
+ }
+
+ protected:
+ MatrixTypeNested m_matrix;
+ const internal::variable_if_dynamic<Index, RowFactor> m_rowFactor;
+ const internal::variable_if_dynamic<Index, ColFactor> m_colFactor;
+};
+
+/**
+ * \return an expression of the replication of \c *this
+ *
+ * Example: \include MatrixBase_replicate.cpp
+ * Output: \verbinclude MatrixBase_replicate.out
+ *
+ * \sa VectorwiseOp::replicate(), DenseBase::replicate(Index,Index), class Replicate
+ */
+template<typename Derived>
+template<int RowFactor, int ColFactor>
+inline const Replicate<Derived,RowFactor,ColFactor>
+DenseBase<Derived>::replicate() const
+{
+ return Replicate<Derived,RowFactor,ColFactor>(derived());
+}
+
+/**
+ * \return an expression of the replication of \c *this
+ *
+ * Example: \include MatrixBase_replicate_int_int.cpp
+ * Output: \verbinclude MatrixBase_replicate_int_int.out
+ *
+ * \sa VectorwiseOp::replicate(), DenseBase::replicate<int,int>(), class Replicate
+ */
+template<typename Derived>
+inline const Replicate<Derived,Dynamic,Dynamic>
+DenseBase<Derived>::replicate(Index rowFactor,Index colFactor) const
+{
+ return Replicate<Derived,Dynamic,Dynamic>(derived(),rowFactor,colFactor);
+}
+
+/**
+ * \return an expression of the replication of each column (or row) of \c *this
+ *
+ * Example: \include DirectionWise_replicate_int.cpp
+ * Output: \verbinclude DirectionWise_replicate_int.out
+ *
+ * \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate
+ */
+template<typename ExpressionType, int Direction>
+const typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType
+VectorwiseOp<ExpressionType,Direction>::replicate(Index factor) const
+{
+ return typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType
+ (_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1);
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_REPLICATE_H
diff --git a/third_party/eigen3/Eigen/src/Core/ReturnByValue.h b/third_party/eigen3/Eigen/src/Core/ReturnByValue.h
new file mode 100644
index 0000000000..7834f6cbcd
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/ReturnByValue.h
@@ -0,0 +1,89 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_RETURNBYVALUE_H
+#define EIGEN_RETURNBYVALUE_H
+
+namespace Eigen {
+
+/** \class ReturnByValue
+ * \ingroup Core_Module
+ *
+ */
+
+namespace internal {
+
+template<typename Derived>
+struct traits<ReturnByValue<Derived> >
+ : public traits<typename traits<Derived>::ReturnType>
+{
+ enum {
+ // We're disabling the DirectAccess because e.g. the constructor of
+ // the Block-with-DirectAccess expression requires to have a coeffRef method.
+ // Also, we don't want to have to implement the stride stuff.
+ Flags = (traits<typename traits<Derived>::ReturnType>::Flags
+ | EvalBeforeNestingBit) & ~DirectAccessBit
+ };
+};
+
+/* The ReturnByValue object doesn't even have a coeff() method.
+ * So the only way that nesting it in an expression can work, is by evaluating it into a plain matrix.
+ * So internal::nested always gives the plain return matrix type.
+ *
+ * FIXME: I don't understand why we need this specialization: isn't this taken care of by the EvalBeforeNestingBit ??
+ */
+template<typename Derived,int n,typename PlainObject>
+struct nested<ReturnByValue<Derived>, n, PlainObject>
+{
+ typedef typename traits<Derived>::ReturnType type;
+};
+
+} // end namespace internal
+
+template<typename Derived> class ReturnByValue
+ : internal::no_assignment_operator, public internal::dense_xpr_base< ReturnByValue<Derived> >::type
+{
+ public:
+ typedef typename internal::traits<Derived>::ReturnType ReturnType;
+
+ typedef typename internal::dense_xpr_base<ReturnByValue>::type Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(ReturnByValue)
+
+ template<typename Dest>
+ EIGEN_DEVICE_FUNC
+ inline void evalTo(Dest& dst) const
+ { static_cast<const Derived*>(this)->evalTo(dst); }
+ EIGEN_DEVICE_FUNC inline Index rows() const { return static_cast<const Derived*>(this)->rows(); }
+ EIGEN_DEVICE_FUNC inline Index cols() const { return static_cast<const Derived*>(this)->cols(); }
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+#define Unusable YOU_ARE_TRYING_TO_ACCESS_A_SINGLE_COEFFICIENT_IN_A_SPECIAL_EXPRESSION_WHERE_THAT_IS_NOT_ALLOWED_BECAUSE_THAT_WOULD_BE_INEFFICIENT
+ class Unusable{
+ Unusable(const Unusable&) {}
+ Unusable& operator=(const Unusable&) {return *this;}
+ };
+ const Unusable& coeff(Index) const { return *reinterpret_cast<const Unusable*>(this); }
+ const Unusable& coeff(Index,Index) const { return *reinterpret_cast<const Unusable*>(this); }
+ Unusable& coeffRef(Index) { return *reinterpret_cast<Unusable*>(this); }
+ Unusable& coeffRef(Index,Index) { return *reinterpret_cast<Unusable*>(this); }
+#endif
+};
+
+template<typename Derived>
+template<typename OtherDerived>
+Derived& DenseBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
+{
+ other.evalTo(derived());
+ return derived();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_RETURNBYVALUE_H
diff --git a/third_party/eigen3/Eigen/src/Core/Reverse.h b/third_party/eigen3/Eigen/src/Core/Reverse.h
new file mode 100644
index 0000000000..e30ae3d281
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/Reverse.h
@@ -0,0 +1,224 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2009 Ricard Marxer <email@ricardmarxer.com>
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_REVERSE_H
+#define EIGEN_REVERSE_H
+
+namespace Eigen {
+
+/** \class Reverse
+ * \ingroup Core_Module
+ *
+ * \brief Expression of the reverse of a vector or matrix
+ *
+ * \param MatrixType the type of the object of which we are taking the reverse
+ *
+ * This class represents an expression of the reverse of a vector.
+ * It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse()
+ * and most of the time this is the only way it is used.
+ *
+ * \sa MatrixBase::reverse(), VectorwiseOp::reverse()
+ */
+
+namespace internal {
+
+template<typename MatrixType, int Direction>
+struct traits<Reverse<MatrixType, Direction> >
+ : traits<MatrixType>
+{
+ typedef typename MatrixType::Scalar Scalar;
+ typedef typename traits<MatrixType>::StorageKind StorageKind;
+ typedef typename traits<MatrixType>::XprKind XprKind;
+ typedef typename nested<MatrixType>::type MatrixTypeNested;
+ typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
+ enum {
+ RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+ ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+ MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
+
+ // let's enable LinearAccess only with vectorization because of the product overhead
+ LinearAccess = ( (Direction==BothDirections) && (int(_MatrixTypeNested::Flags)&PacketAccessBit) )
+ ? LinearAccessBit : 0,
+
+ Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | LvalueBit | PacketAccessBit | LinearAccess),
+
+ CoeffReadCost = _MatrixTypeNested::CoeffReadCost
+ };
+};
+
+template<typename PacketScalar, bool ReversePacket> struct reverse_packet_cond
+{
+ static inline PacketScalar run(const PacketScalar& x) { return preverse(x); }
+};
+
+template<typename PacketScalar> struct reverse_packet_cond<PacketScalar,false>
+{
+ static inline PacketScalar run(const PacketScalar& x) { return x; }
+};
+
+} // end namespace internal
+
+template<typename MatrixType, int Direction> class Reverse
+ : public internal::dense_xpr_base< Reverse<MatrixType, Direction> >::type
+{
+ public:
+
+ typedef typename internal::dense_xpr_base<Reverse>::type Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(Reverse)
+ using Base::IsRowMajor;
+
+ // next line is necessary because otherwise const version of operator()
+ // is hidden by non-const version defined in this file
+ using Base::operator();
+
+ protected:
+ enum {
+ PacketSize = internal::packet_traits<Scalar>::size,
+ IsColMajor = !IsRowMajor,
+ ReverseRow = (Direction == Vertical) || (Direction == BothDirections),
+ ReverseCol = (Direction == Horizontal) || (Direction == BothDirections),
+ OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1,
+ OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1,
+ ReversePacket = (Direction == BothDirections)
+ || ((Direction == Vertical) && IsColMajor)
+ || ((Direction == Horizontal) && IsRowMajor)
+ };
+ typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet;
+ public:
+
+ inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { }
+
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse)
+
+ inline Index rows() const { return m_matrix.rows(); }
+ inline Index cols() const { return m_matrix.cols(); }
+
+ inline Index innerStride() const
+ {
+ return -m_matrix.innerStride();
+ }
+
+ inline Scalar& operator()(Index row, Index col)
+ {
+ eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
+ return coeffRef(row, col);
+ }
+
+ inline Scalar& coeffRef(Index row, Index col)
+ {
+ return m_matrix.const_cast_derived().coeffRef(ReverseRow ? m_matrix.rows() - row - 1 : row,
+ ReverseCol ? m_matrix.cols() - col - 1 : col);
+ }
+
+ inline CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_matrix.coeff(ReverseRow ? m_matrix.rows() - row - 1 : row,
+ ReverseCol ? m_matrix.cols() - col - 1 : col);
+ }
+
+ inline CoeffReturnType coeff(Index index) const
+ {
+ return m_matrix.coeff(m_matrix.size() - index - 1);
+ }
+
+ inline Scalar& coeffRef(Index index)
+ {
+ return m_matrix.const_cast_derived().coeffRef(m_matrix.size() - index - 1);
+ }
+
+ inline Scalar& operator()(Index index)
+ {
+ eigen_assert(index >= 0 && index < m_matrix.size());
+ return coeffRef(index);
+ }
+
+ template<int LoadMode>
+ inline const PacketScalar packet(Index row, Index col) const
+ {
+ return reverse_packet::run(m_matrix.template packet<LoadMode>(
+ ReverseRow ? m_matrix.rows() - row - OffsetRow : row,
+ ReverseCol ? m_matrix.cols() - col - OffsetCol : col));
+ }
+
+ template<int LoadMode>
+ inline void writePacket(Index row, Index col, const PacketScalar& x)
+ {
+ m_matrix.const_cast_derived().template writePacket<LoadMode>(
+ ReverseRow ? m_matrix.rows() - row - OffsetRow : row,
+ ReverseCol ? m_matrix.cols() - col - OffsetCol : col,
+ reverse_packet::run(x));
+ }
+
+ template<int LoadMode>
+ inline const PacketScalar packet(Index index) const
+ {
+ return internal::preverse(m_matrix.template packet<LoadMode>( m_matrix.size() - index - PacketSize ));
+ }
+
+ template<int LoadMode>
+ inline void writePacket(Index index, const PacketScalar& x)
+ {
+ m_matrix.const_cast_derived().template writePacket<LoadMode>(m_matrix.size() - index - PacketSize, internal::preverse(x));
+ }
+
+ const typename internal::remove_all<typename MatrixType::Nested>::type&
+ nestedExpression() const
+ {
+ return m_matrix;
+ }
+
+ protected:
+ typename MatrixType::Nested m_matrix;
+};
+
+/** \returns an expression of the reverse of *this.
+ *
+ * Example: \include MatrixBase_reverse.cpp
+ * Output: \verbinclude MatrixBase_reverse.out
+ *
+ */
+template<typename Derived>
+inline typename DenseBase<Derived>::ReverseReturnType
+DenseBase<Derived>::reverse()
+{
+ return derived();
+}
+
+/** This is the const version of reverse(). */
+template<typename Derived>
+inline const typename DenseBase<Derived>::ConstReverseReturnType
+DenseBase<Derived>::reverse() const
+{
+ return derived();
+}
+
+/** This is the "in place" version of reverse: it reverses \c *this.
+ *
+ * In most cases it is probably better to simply use the reversed expression
+ * of a matrix. However, when reversing the matrix data itself is really needed,
+ * then this "in-place" version is probably the right choice because it provides
+ * the following additional features:
+ * - less error prone: doing the same operation with .reverse() requires special care:
+ * \code m = m.reverse().eval(); \endcode
+ * - this API allows to avoid creating a temporary (the current implementation creates a temporary, but that could be avoided using swap)
+ * - it allows future optimizations (cache friendliness, etc.)
+ *
+ * \sa reverse() */
+template<typename Derived>
+inline void DenseBase<Derived>::reverseInPlace()
+{
+ derived() = derived().reverse().eval();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_REVERSE_H
diff --git a/third_party/eigen3/Eigen/src/Core/Select.h b/third_party/eigen3/Eigen/src/Core/Select.h
new file mode 100644
index 0000000000..87993bbb55
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/Select.h
@@ -0,0 +1,162 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SELECT_H
+#define EIGEN_SELECT_H
+
+namespace Eigen {
+
+/** \class Select
+ * \ingroup Core_Module
+ *
+ * \brief Expression of a coefficient wise version of the C++ ternary operator ?:
+ *
+ * \param ConditionMatrixType the type of the \em condition expression which must be a boolean matrix
+ * \param ThenMatrixType the type of the \em then expression
+ * \param ElseMatrixType the type of the \em else expression
+ *
+ * This class represents an expression of a coefficient wise version of the C++ ternary operator ?:.
+ * It is the return type of DenseBase::select() and most of the time this is the only way it is used.
+ *
+ * \sa DenseBase::select(const DenseBase<ThenDerived>&, const DenseBase<ElseDerived>&) const
+ */
+
+namespace internal {
+template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
+struct traits<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
+ : traits<ThenMatrixType>
+{
+ typedef typename traits<ThenMatrixType>::Scalar Scalar;
+ typedef Dense StorageKind;
+ typedef typename traits<ThenMatrixType>::XprKind XprKind;
+ typedef typename ConditionMatrixType::Nested ConditionMatrixNested;
+ typedef typename ThenMatrixType::Nested ThenMatrixNested;
+ typedef typename ElseMatrixType::Nested ElseMatrixNested;
+ enum {
+ RowsAtCompileTime = ConditionMatrixType::RowsAtCompileTime,
+ ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime,
+ MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime,
+ Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & HereditaryBits,
+ CoeffReadCost = traits<typename remove_all<ConditionMatrixNested>::type>::CoeffReadCost
+ + EIGEN_SIZE_MAX(traits<typename remove_all<ThenMatrixNested>::type>::CoeffReadCost,
+ traits<typename remove_all<ElseMatrixNested>::type>::CoeffReadCost)
+ };
+};
+}
+
+template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
+class Select : internal::no_assignment_operator,
+ public internal::dense_xpr_base< Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >::type
+{
+ public:
+
+ typedef typename internal::dense_xpr_base<Select>::type Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(Select)
+
+ Select(const ConditionMatrixType& a_conditionMatrix,
+ const ThenMatrixType& a_thenMatrix,
+ const ElseMatrixType& a_elseMatrix)
+ : m_condition(a_conditionMatrix), m_then(a_thenMatrix), m_else(a_elseMatrix)
+ {
+ eigen_assert(m_condition.rows() == m_then.rows() && m_condition.rows() == m_else.rows());
+ eigen_assert(m_condition.cols() == m_then.cols() && m_condition.cols() == m_else.cols());
+ }
+
+ Index rows() const { return m_condition.rows(); }
+ Index cols() const { return m_condition.cols(); }
+
+ const Scalar coeff(Index i, Index j) const
+ {
+ if (m_condition.coeff(i,j))
+ return m_then.coeff(i,j);
+ else
+ return m_else.coeff(i,j);
+ }
+
+ const Scalar coeff(Index i) const
+ {
+ if (m_condition.coeff(i))
+ return m_then.coeff(i);
+ else
+ return m_else.coeff(i);
+ }
+
+ const ConditionMatrixType& conditionMatrix() const
+ {
+ return m_condition;
+ }
+
+ const ThenMatrixType& thenMatrix() const
+ {
+ return m_then;
+ }
+
+ const ElseMatrixType& elseMatrix() const
+ {
+ return m_else;
+ }
+
+ protected:
+ typename ConditionMatrixType::Nested m_condition;
+ typename ThenMatrixType::Nested m_then;
+ typename ElseMatrixType::Nested m_else;
+};
+
+
+/** \returns a matrix where each coefficient (i,j) is equal to \a thenMatrix(i,j)
+ * if \c *this(i,j), and \a elseMatrix(i,j) otherwise.
+ *
+ * Example: \include MatrixBase_select.cpp
+ * Output: \verbinclude MatrixBase_select.out
+ *
+ * \sa class Select
+ */
+template<typename Derived>
+template<typename ThenDerived,typename ElseDerived>
+inline const Select<Derived,ThenDerived,ElseDerived>
+DenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,
+ const DenseBase<ElseDerived>& elseMatrix) const
+{
+ return Select<Derived,ThenDerived,ElseDerived>(derived(), thenMatrix.derived(), elseMatrix.derived());
+}
+
+/** Version of DenseBase::select(const DenseBase&, const DenseBase&) with
+ * the \em else expression being a scalar value.
+ *
+ * \sa DenseBase::select(const DenseBase<ThenDerived>&, const DenseBase<ElseDerived>&) const, class Select
+ */
+template<typename Derived>
+template<typename ThenDerived>
+inline const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>
+DenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,
+ const typename ThenDerived::Scalar& elseScalar) const
+{
+ return Select<Derived,ThenDerived,typename ThenDerived::ConstantReturnType>(
+ derived(), thenMatrix.derived(), ThenDerived::Constant(rows(),cols(),elseScalar));
+}
+
+/** Version of DenseBase::select(const DenseBase&, const DenseBase&) with
+ * the \em then expression being a scalar value.
+ *
+ * \sa DenseBase::select(const DenseBase<ThenDerived>&, const DenseBase<ElseDerived>&) const, class Select
+ */
+template<typename Derived>
+template<typename ElseDerived>
+inline const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >
+DenseBase<Derived>::select(const typename ElseDerived::Scalar& thenScalar,
+ const DenseBase<ElseDerived>& elseMatrix) const
+{
+ return Select<Derived,typename ElseDerived::ConstantReturnType,ElseDerived>(
+ derived(), ElseDerived::Constant(rows(),cols(),thenScalar), elseMatrix.derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_SELECT_H
diff --git a/third_party/eigen3/Eigen/src/Core/SelfAdjointView.h b/third_party/eigen3/Eigen/src/Core/SelfAdjointView.h
new file mode 100644
index 0000000000..8231e3f5cd
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/SelfAdjointView.h
@@ -0,0 +1,338 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SELFADJOINTMATRIX_H
+#define EIGEN_SELFADJOINTMATRIX_H
+
+namespace Eigen {
+
+/** \class SelfAdjointView
+ * \ingroup Core_Module
+ *
+ *
+ * \brief Expression of a selfadjoint matrix from a triangular part of a dense matrix
+ *
+ * \param MatrixType the type of the dense matrix storing the coefficients
+ * \param TriangularPart can be either \c #Lower or \c #Upper
+ *
+ * This class is an expression of a sefladjoint matrix from a triangular part of a matrix
+ * with given dense storage of the coefficients. It is the return type of MatrixBase::selfadjointView()
+ * and most of the time this is the only way that it is used.
+ *
+ * \sa class TriangularBase, MatrixBase::selfadjointView()
+ */
+
+namespace internal {
+template<typename MatrixType, unsigned int UpLo>
+struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
+{
+ typedef typename nested<MatrixType>::type MatrixTypeNested;
+ typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
+ typedef MatrixType ExpressionType;
+ typedef typename MatrixType::PlainObject DenseMatrixType;
+ enum {
+ Mode = UpLo | SelfAdjoint,
+ Flags = MatrixTypeNestedCleaned::Flags & (HereditaryBits)
+ & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)), // FIXME these flags should be preserved
+ CoeffReadCost = MatrixTypeNestedCleaned::CoeffReadCost
+ };
+};
+}
+
+template <typename Lhs, int LhsMode, bool LhsIsVector,
+ typename Rhs, int RhsMode, bool RhsIsVector>
+struct SelfadjointProductMatrix;
+
+// FIXME could also be called SelfAdjointWrapper to be consistent with DiagonalWrapper ??
+template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
+ : public TriangularBase<SelfAdjointView<MatrixType, UpLo> >
+{
+ public:
+
+ typedef TriangularBase<SelfAdjointView> Base;
+ typedef typename internal::traits<SelfAdjointView>::MatrixTypeNested MatrixTypeNested;
+ typedef typename internal::traits<SelfAdjointView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned;
+
+ /** \brief The type of coefficients in this matrix */
+ typedef typename internal::traits<SelfAdjointView>::Scalar Scalar;
+
+ typedef typename MatrixType::Index Index;
+
+ enum {
+ Mode = internal::traits<SelfAdjointView>::Mode
+ };
+ typedef typename MatrixType::PlainObject PlainObject;
+
+ EIGEN_DEVICE_FUNC
+ inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
+ {}
+
+ EIGEN_DEVICE_FUNC
+ inline Index rows() const { return m_matrix.rows(); }
+ EIGEN_DEVICE_FUNC
+ inline Index cols() const { return m_matrix.cols(); }
+ EIGEN_DEVICE_FUNC
+ inline Index outerStride() const { return m_matrix.outerStride(); }
+ EIGEN_DEVICE_FUNC
+ inline Index innerStride() const { return m_matrix.innerStride(); }
+
+ /** \sa MatrixBase::coeff()
+ * \warning the coordinates must fit into the referenced triangular part
+ */
+ EIGEN_DEVICE_FUNC
+ inline Scalar coeff(Index row, Index col) const
+ {
+ Base::check_coordinates_internal(row, col);
+ return m_matrix.coeff(row, col);
+ }
+
+ /** \sa MatrixBase::coeffRef()
+ * \warning the coordinates must fit into the referenced triangular part
+ */
+ EIGEN_DEVICE_FUNC
+ inline Scalar& coeffRef(Index row, Index col)
+ {
+ Base::check_coordinates_internal(row, col);
+ return m_matrix.const_cast_derived().coeffRef(row, col);
+ }
+
+ /** \internal */
+ EIGEN_DEVICE_FUNC
+ const MatrixTypeNestedCleaned& _expression() const { return m_matrix; }
+
+ EIGEN_DEVICE_FUNC
+ const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
+ EIGEN_DEVICE_FUNC
+ MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); }
+
+ /** Efficient self-adjoint matrix times vector/matrix product */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ SelfadjointProductMatrix<MatrixType,Mode,false,OtherDerived,0,OtherDerived::IsVectorAtCompileTime>
+ operator*(const MatrixBase<OtherDerived>& rhs) const
+ {
+ return SelfadjointProductMatrix
+ <MatrixType,Mode,false,OtherDerived,0,OtherDerived::IsVectorAtCompileTime>
+ (m_matrix, rhs.derived());
+ }
+
+ /** Efficient vector/matrix times self-adjoint matrix product */
+ template<typename OtherDerived> friend
+ EIGEN_DEVICE_FUNC
+ SelfadjointProductMatrix<OtherDerived,0,OtherDerived::IsVectorAtCompileTime,MatrixType,Mode,false>
+ operator*(const MatrixBase<OtherDerived>& lhs, const SelfAdjointView& rhs)
+ {
+ return SelfadjointProductMatrix
+ <OtherDerived,0,OtherDerived::IsVectorAtCompileTime,MatrixType,Mode,false>
+ (lhs.derived(),rhs.m_matrix);
+ }
+
+ /** Perform a symmetric rank 2 update of the selfadjoint matrix \c *this:
+ * \f$ this = this + \alpha u v^* + conj(\alpha) v u^* \f$
+ * \returns a reference to \c *this
+ *
+ * The vectors \a u and \c v \b must be column vectors, however they can be
+ * a adjoint expression without any overhead. Only the meaningful triangular
+ * part of the matrix is updated, the rest is left unchanged.
+ *
+ * \sa rankUpdate(const MatrixBase<DerivedU>&, Scalar)
+ */
+ template<typename DerivedU, typename DerivedV>
+ EIGEN_DEVICE_FUNC
+ SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha = Scalar(1));
+
+ /** Perform a symmetric rank K update of the selfadjoint matrix \c *this:
+ * \f$ this = this + \alpha ( u u^* ) \f$ where \a u is a vector or matrix.
+ *
+ * \returns a reference to \c *this
+ *
+ * Note that to perform \f$ this = this + \alpha ( u^* u ) \f$ you can simply
+ * call this function with u.adjoint().
+ *
+ * \sa rankUpdate(const MatrixBase<DerivedU>&, const MatrixBase<DerivedV>&, Scalar)
+ */
+ template<typename DerivedU>
+ EIGEN_DEVICE_FUNC
+ SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1));
+
+/////////// Cholesky module ///////////
+
+ const LLT<PlainObject, UpLo> llt() const;
+ const LDLT<PlainObject, UpLo> ldlt() const;
+
+/////////// Eigenvalue module ///////////
+
+ /** Real part of #Scalar */
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ /** Return type of eigenvalues() */
+ typedef Matrix<RealScalar, internal::traits<MatrixType>::ColsAtCompileTime, 1> EigenvaluesReturnType;
+
+ EIGEN_DEVICE_FUNC
+ EigenvaluesReturnType eigenvalues() const;
+ EIGEN_DEVICE_FUNC
+ RealScalar operatorNorm() const;
+
+ #ifdef EIGEN2_SUPPORT
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ SelfAdjointView& operator=(const MatrixBase<OtherDerived>& other)
+ {
+ enum {
+ OtherPart = UpLo == Upper ? StrictlyLower : StrictlyUpper
+ };
+ m_matrix.const_cast_derived().template triangularView<UpLo>() = other;
+ m_matrix.const_cast_derived().template triangularView<OtherPart>() = other.adjoint();
+ return *this;
+ }
+ template<typename OtherMatrixType, unsigned int OtherMode>
+ EIGEN_DEVICE_FUNC
+ SelfAdjointView& operator=(const TriangularView<OtherMatrixType, OtherMode>& other)
+ {
+ enum {
+ OtherPart = UpLo == Upper ? StrictlyLower : StrictlyUpper
+ };
+ m_matrix.const_cast_derived().template triangularView<UpLo>() = other.toDenseMatrix();
+ m_matrix.const_cast_derived().template triangularView<OtherPart>() = other.toDenseMatrix().adjoint();
+ return *this;
+ }
+ #endif
+
+ protected:
+ MatrixTypeNested m_matrix;
+};
+
+
+// template<typename OtherDerived, typename MatrixType, unsigned int UpLo>
+// internal::selfadjoint_matrix_product_returntype<OtherDerived,SelfAdjointView<MatrixType,UpLo> >
+// operator*(const MatrixBase<OtherDerived>& lhs, const SelfAdjointView<MatrixType,UpLo>& rhs)
+// {
+// return internal::matrix_selfadjoint_product_returntype<OtherDerived,SelfAdjointView<MatrixType,UpLo> >(lhs.derived(),rhs);
+// }
+
+// selfadjoint to dense matrix
+
+namespace internal {
+
+template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite>
+struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount, ClearOpposite>
+{
+ enum {
+ col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
+ row = (UnrollCount-1) % Derived1::RowsAtCompileTime
+ };
+
+ EIGEN_DEVICE_FUNC
+ static inline void run(Derived1 &dst, const Derived2 &src)
+ {
+ triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount-1, ClearOpposite>::run(dst, src);
+
+ if(row == col)
+ dst.coeffRef(row, col) = numext::real(src.coeff(row, col));
+ else if(row < col)
+ dst.coeffRef(col, row) = numext::conj(dst.coeffRef(row, col) = src.coeff(row, col));
+ }
+};
+
+template<typename Derived1, typename Derived2, bool ClearOpposite>
+struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, 0, ClearOpposite>
+{
+ EIGEN_DEVICE_FUNC
+ static inline void run(Derived1 &, const Derived2 &) {}
+};
+
+template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite>
+struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount, ClearOpposite>
+{
+ enum {
+ col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
+ row = (UnrollCount-1) % Derived1::RowsAtCompileTime
+ };
+
+ EIGEN_DEVICE_FUNC
+ static inline void run(Derived1 &dst, const Derived2 &src)
+ {
+ triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount-1, ClearOpposite>::run(dst, src);
+
+ if(row == col)
+ dst.coeffRef(row, col) = numext::real(src.coeff(row, col));
+ else if(row > col)
+ dst.coeffRef(col, row) = numext::conj(dst.coeffRef(row, col) = src.coeff(row, col));
+ }
+};
+
+template<typename Derived1, typename Derived2, bool ClearOpposite>
+struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, 0, ClearOpposite>
+{
+ EIGEN_DEVICE_FUNC
+ static inline void run(Derived1 &, const Derived2 &) {}
+};
+
+template<typename Derived1, typename Derived2, bool ClearOpposite>
+struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dynamic, ClearOpposite>
+{
+ typedef typename Derived1::Index Index;
+ EIGEN_DEVICE_FUNC
+ static inline void run(Derived1 &dst, const Derived2 &src)
+ {
+ for(Index j = 0; j < dst.cols(); ++j)
+ {
+ for(Index i = 0; i < j; ++i)
+ {
+ dst.copyCoeff(i, j, src);
+ dst.coeffRef(j,i) = numext::conj(dst.coeff(i,j));
+ }
+ dst.copyCoeff(j, j, src);
+ }
+ }
+};
+
+template<typename Derived1, typename Derived2, bool ClearOpposite>
+struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, Dynamic, ClearOpposite>
+{
+ EIGEN_DEVICE_FUNC
+ static inline void run(Derived1 &dst, const Derived2 &src)
+ {
+ typedef typename Derived1::Index Index;
+ for(Index i = 0; i < dst.rows(); ++i)
+ {
+ for(Index j = 0; j < i; ++j)
+ {
+ dst.copyCoeff(i, j, src);
+ dst.coeffRef(j,i) = numext::conj(dst.coeff(i,j));
+ }
+ dst.copyCoeff(i, i, src);
+ }
+ }
+};
+
+} // end namespace internal
+
+/***************************************************************************
+* Implementation of MatrixBase methods
+***************************************************************************/
+
+template<typename Derived>
+template<unsigned int UpLo>
+typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type
+MatrixBase<Derived>::selfadjointView() const
+{
+ return derived();
+}
+
+template<typename Derived>
+template<unsigned int UpLo>
+typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type
+MatrixBase<Derived>::selfadjointView()
+{
+ return derived();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_SELFADJOINTMATRIX_H
diff --git a/third_party/eigen3/Eigen/src/Core/SelfCwiseBinaryOp.h b/third_party/eigen3/Eigen/src/Core/SelfCwiseBinaryOp.h
new file mode 100644
index 0000000000..65864adf84
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/SelfCwiseBinaryOp.h
@@ -0,0 +1,226 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SELFCWISEBINARYOP_H
+#define EIGEN_SELFCWISEBINARYOP_H
+
+namespace Eigen {
+
+/** \class SelfCwiseBinaryOp
+ * \ingroup Core_Module
+ *
+ * \internal
+ *
+ * \brief Internal helper class for optimizing operators like +=, -=
+ *
+ * This is a pseudo expression class re-implementing the copyCoeff/copyPacket
+ * method to directly performs a +=/-= operations in an optimal way. In particular,
+ * this allows to make sure that the input/output data are loaded only once using
+ * aligned packet loads.
+ *
+ * \sa class SwapWrapper for a similar trick.
+ */
+
+namespace internal {
+template<typename BinaryOp, typename Lhs, typename Rhs>
+struct traits<SelfCwiseBinaryOp<BinaryOp,Lhs,Rhs> >
+ : traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >
+{
+ enum {
+ // Note that it is still a good idea to preserve the DirectAccessBit
+ // so that assign can correctly align the data.
+ Flags = traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >::Flags | (Lhs::Flags&AlignedBit) | (Lhs::Flags&DirectAccessBit) | (Lhs::Flags&LvalueBit),
+ OuterStrideAtCompileTime = Lhs::OuterStrideAtCompileTime,
+ InnerStrideAtCompileTime = Lhs::InnerStrideAtCompileTime
+ };
+};
+}
+
+template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
+ : public internal::dense_xpr_base< SelfCwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
+{
+ public:
+
+ typedef typename internal::dense_xpr_base<SelfCwiseBinaryOp>::type Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(SelfCwiseBinaryOp)
+
+ typedef typename internal::packet_traits<Scalar>::type Packet;
+
+ EIGEN_DEVICE_FUNC
+ inline SelfCwiseBinaryOp(Lhs& xpr, const BinaryOp& func = BinaryOp()) : m_matrix(xpr), m_functor(func) {}
+
+ EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); }
+ EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); }
+ EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_matrix.outerStride(); }
+ EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_matrix.innerStride(); }
+ EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_matrix.data(); }
+
+ // note that this function is needed by assign to correctly align loads/stores
+ // TODO make Assign use .data()
+ EIGEN_DEVICE_FUNC
+ inline Scalar& coeffRef(Index row, Index col)
+ {
+ EIGEN_STATIC_ASSERT_LVALUE(Lhs)
+ return m_matrix.const_cast_derived().coeffRef(row, col);
+ }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index row, Index col) const
+ {
+ return m_matrix.coeffRef(row, col);
+ }
+
+ // note that this function is needed by assign to correctly align loads/stores
+ // TODO make Assign use .data()
+ EIGEN_DEVICE_FUNC
+ inline Scalar& coeffRef(Index index)
+ {
+ EIGEN_STATIC_ASSERT_LVALUE(Lhs)
+ return m_matrix.const_cast_derived().coeffRef(index);
+ }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index index) const
+ {
+ return m_matrix.const_cast_derived().coeffRef(index);
+ }
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
+ {
+ OtherDerived& _other = other.const_cast_derived();
+ eigen_internal_assert(row >= 0 && row < rows()
+ && col >= 0 && col < cols());
+ Scalar& tmp = m_matrix.coeffRef(row,col);
+ tmp = m_functor(tmp, _other.coeff(row,col));
+ }
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
+ {
+ OtherDerived& _other = other.const_cast_derived();
+ eigen_internal_assert(index >= 0 && index < m_matrix.size());
+ Scalar& tmp = m_matrix.coeffRef(index);
+ tmp = m_functor(tmp, _other.coeff(index));
+ }
+
+ template<typename OtherDerived, int StoreMode, int LoadMode>
+ void copyPacket(Index row, Index col, const DenseBase<OtherDerived>& other)
+ {
+ OtherDerived& _other = other.const_cast_derived();
+ eigen_internal_assert(row >= 0 && row < rows()
+ && col >= 0 && col < cols());
+ m_matrix.template writePacket<StoreMode>(row, col,
+ m_functor.packetOp(m_matrix.template packet<StoreMode>(row, col),_other.template packet<LoadMode>(row, col)) );
+ }
+
+ template<typename OtherDerived, int StoreMode, int LoadMode>
+ void copyPacket(Index index, const DenseBase<OtherDerived>& other)
+ {
+ OtherDerived& _other = other.const_cast_derived();
+ eigen_internal_assert(index >= 0 && index < m_matrix.size());
+ m_matrix.template writePacket<StoreMode>(index,
+ m_functor.packetOp(m_matrix.template packet<StoreMode>(index),_other.template packet<LoadMode>(index)) );
+ }
+
+ // reimplement lazyAssign to handle complex *= real
+ // see CwiseBinaryOp ctor for details
+ template<typename RhsDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE SelfCwiseBinaryOp& lazyAssign(const DenseBase<RhsDerived>& rhs)
+ {
+ EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs,RhsDerived)
+ EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename RhsDerived::Scalar);
+
+ #ifdef EIGEN_DEBUG_ASSIGN
+ internal::assign_traits<SelfCwiseBinaryOp, RhsDerived>::debug();
+ #endif
+ eigen_assert(rows() == rhs.rows() && cols() == rhs.cols());
+ internal::assign_impl<SelfCwiseBinaryOp, RhsDerived>::run(*this,rhs.derived());
+ #ifndef EIGEN_NO_DEBUG
+ this->checkTransposeAliasing(rhs.derived());
+ #endif
+ return *this;
+ }
+
+ // overloaded to honor evaluation of special matrices
+ // maybe another solution would be to not use SelfCwiseBinaryOp
+ // at first...
+ EIGEN_DEVICE_FUNC
+ SelfCwiseBinaryOp& operator=(const Rhs& _rhs)
+ {
+ typename internal::nested<Rhs>::type rhs(_rhs);
+ return Base::operator=(rhs);
+ }
+
+ EIGEN_DEVICE_FUNC
+ Lhs& expression() const
+ {
+ return m_matrix;
+ }
+
+ EIGEN_DEVICE_FUNC
+ const BinaryOp& functor() const
+ {
+ return m_functor;
+ }
+
+ protected:
+ Lhs& m_matrix;
+ const BinaryOp& m_functor;
+
+ private:
+ SelfCwiseBinaryOp& operator=(const SelfCwiseBinaryOp&);
+};
+
+template<typename Derived>
+inline Derived& DenseBase<Derived>::operator*=(const Scalar& other)
+{
+ typedef typename Derived::PlainObject PlainObject;
+ SelfCwiseBinaryOp<internal::scalar_product_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
+ tmp = PlainObject::Constant(rows(),cols(),other);
+ return derived();
+}
+
+template<typename Derived>
+inline Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
+{
+ typedef typename Derived::PlainObject PlainObject;
+ SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
+ tmp = PlainObject::Constant(rows(),cols(),other);
+ return derived();
+}
+
+template<typename Derived>
+inline Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
+{
+ typedef typename Derived::PlainObject PlainObject;
+ SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
+ tmp = PlainObject::Constant(rows(),cols(),other);
+ return derived();
+}
+
+template<typename Derived>
+inline Derived& DenseBase<Derived>::operator/=(const Scalar& other)
+{
+ typedef typename internal::conditional<NumTraits<Scalar>::IsInteger,
+ internal::scalar_quotient_op<Scalar>,
+ internal::scalar_product_op<Scalar> >::type BinOp;
+ typedef typename Derived::PlainObject PlainObject;
+ SelfCwiseBinaryOp<BinOp, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
+ Scalar actual_other;
+ if(NumTraits<Scalar>::IsInteger) actual_other = other;
+ else actual_other = Scalar(1)/other;
+ tmp = PlainObject::Constant(rows(),cols(), actual_other);
+ return derived();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_SELFCWISEBINARYOP_H
diff --git a/third_party/eigen3/Eigen/src/Core/SolveTriangular.h b/third_party/eigen3/Eigen/src/Core/SolveTriangular.h
new file mode 100644
index 0000000000..e158e31626
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/SolveTriangular.h
@@ -0,0 +1,260 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SOLVETRIANGULAR_H
+#define EIGEN_SOLVETRIANGULAR_H
+
+namespace Eigen {
+
+namespace internal {
+
+// Forward declarations:
+// The following two routines are implemented in the products/TriangularSolver*.h files
+template<typename LhsScalar, typename RhsScalar, typename Index, int Side, int Mode, bool Conjugate, int StorageOrder>
+struct triangular_solve_vector;
+
+template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherStorageOrder>
+struct triangular_solve_matrix;
+
+// small helper struct extracting some traits on the underlying solver operation
+template<typename Lhs, typename Rhs, int Side>
+class trsolve_traits
+{
+ private:
+ enum {
+ RhsIsVectorAtCompileTime = (Side==OnTheLeft ? Rhs::ColsAtCompileTime : Rhs::RowsAtCompileTime)==1
+ };
+ public:
+ enum {
+ Unrolling = (RhsIsVectorAtCompileTime && Rhs::SizeAtCompileTime != Dynamic && Rhs::SizeAtCompileTime <= 8)
+ ? CompleteUnrolling : NoUnrolling,
+ RhsVectors = RhsIsVectorAtCompileTime ? 1 : Dynamic
+ };
+};
+
+template<typename Lhs, typename Rhs,
+ int Side, // can be OnTheLeft/OnTheRight
+ int Mode, // can be Upper/Lower | UnitDiag
+ int Unrolling = trsolve_traits<Lhs,Rhs,Side>::Unrolling,
+ int RhsVectors = trsolve_traits<Lhs,Rhs,Side>::RhsVectors
+ >
+struct triangular_solver_selector;
+
+template<typename Lhs, typename Rhs, int Side, int Mode>
+struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,1>
+{
+ typedef typename Lhs::Scalar LhsScalar;
+ typedef typename Rhs::Scalar RhsScalar;
+ typedef blas_traits<Lhs> LhsProductTraits;
+ typedef typename LhsProductTraits::ExtractType ActualLhsType;
+ typedef Map<Matrix<RhsScalar,Dynamic,1>, Aligned> MappedRhs;
+ static void run(const Lhs& lhs, Rhs& rhs)
+ {
+ ActualLhsType actualLhs = LhsProductTraits::extract(lhs);
+
+ // FIXME find a way to allow an inner stride if packet_traits<Scalar>::size==1
+
+ bool useRhsDirectly = Rhs::InnerStrideAtCompileTime==1 || rhs.innerStride()==1;
+
+ ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhs,rhs.size(),
+ (useRhsDirectly ? rhs.data() : 0));
+
+ if(!useRhsDirectly)
+ MappedRhs(actualRhs,rhs.size()) = rhs;
+
+ triangular_solve_vector<LhsScalar, RhsScalar, typename Lhs::Index, Side, Mode, LhsProductTraits::NeedToConjugate,
+ (int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor>
+ ::run(actualLhs.cols(), actualLhs.data(), actualLhs.outerStride(), actualRhs);
+
+ if(!useRhsDirectly)
+ rhs = MappedRhs(actualRhs, rhs.size());
+ }
+};
+
+// the rhs is a matrix
+template<typename Lhs, typename Rhs, int Side, int Mode>
+struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
+{
+ typedef typename Rhs::Scalar Scalar;
+ typedef typename Rhs::Index Index;
+ typedef blas_traits<Lhs> LhsProductTraits;
+ typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType;
+
+ static void run(const Lhs& lhs, Rhs& rhs)
+ {
+ typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsProductTraits::extract(lhs);
+
+ const Index size = lhs.rows();
+ const Index othersize = Side==OnTheLeft? rhs.cols() : rhs.rows();
+
+ typedef internal::gemm_blocking_space<(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
+ Rhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxRowsAtCompileTime,4> BlockingType;
+
+ BlockingType blocking(rhs.rows(), rhs.cols(), size, 1, false);
+
+ triangular_solve_matrix<Scalar,Index,Side,Mode,LhsProductTraits::NeedToConjugate,(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor,
+ (Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor>
+ ::run(size, othersize, &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.outerStride(), blocking);
+ }
+};
+
+/***************************************************************************
+* meta-unrolling implementation
+***************************************************************************/
+
+template<typename Lhs, typename Rhs, int Mode, int Index, int Size,
+ bool Stop = Index==Size>
+struct triangular_solver_unroller;
+
+template<typename Lhs, typename Rhs, int Mode, int Index, int Size>
+struct triangular_solver_unroller<Lhs,Rhs,Mode,Index,Size,false> {
+ enum {
+ IsLower = ((Mode&Lower)==Lower),
+ I = IsLower ? Index : Size - Index - 1,
+ S = IsLower ? 0 : I+1
+ };
+ static void run(const Lhs& lhs, Rhs& rhs)
+ {
+ if (Index>0)
+ rhs.coeffRef(I) -= lhs.row(I).template segment<Index>(S).transpose()
+ .cwiseProduct(rhs.template segment<Index>(S)).sum();
+
+ if(!(Mode & UnitDiag))
+ rhs.coeffRef(I) /= lhs.coeff(I,I);
+
+ triangular_solver_unroller<Lhs,Rhs,Mode,Index+1,Size>::run(lhs,rhs);
+ }
+};
+
+template<typename Lhs, typename Rhs, int Mode, int Index, int Size>
+struct triangular_solver_unroller<Lhs,Rhs,Mode,Index,Size,true> {
+ static void run(const Lhs&, Rhs&) {}
+};
+
+template<typename Lhs, typename Rhs, int Mode>
+struct triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,CompleteUnrolling,1> {
+ static void run(const Lhs& lhs, Rhs& rhs)
+ { triangular_solver_unroller<Lhs,Rhs,Mode,0,Rhs::SizeAtCompileTime>::run(lhs,rhs); }
+};
+
+template<typename Lhs, typename Rhs, int Mode>
+struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> {
+ static void run(const Lhs& lhs, Rhs& rhs)
+ {
+ Transpose<const Lhs> trLhs(lhs);
+ Transpose<Rhs> trRhs(rhs);
+
+ triangular_solver_unroller<Transpose<const Lhs>,Transpose<Rhs>,
+ ((Mode&Upper)==Upper ? Lower : Upper) | (Mode&UnitDiag),
+ 0,Rhs::SizeAtCompileTime>::run(trLhs,trRhs);
+ }
+};
+
+} // end namespace internal
+
+/***************************************************************************
+* TriangularView methods
+***************************************************************************/
+
+/** "in-place" version of TriangularView::solve() where the result is written in \a other
+ *
+ * \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.
+ * This function will const_cast it, so constness isn't honored here.
+ *
+ * See TriangularView:solve() for the details.
+ */
+template<typename MatrixType, unsigned int Mode>
+template<int Side, typename OtherDerived>
+void TriangularView<MatrixType,Mode>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
+{
+ OtherDerived& other = _other.const_cast_derived();
+ eigen_assert( cols() == rows() && ((Side==OnTheLeft && cols() == other.rows()) || (Side==OnTheRight && cols() == other.cols())) );
+ eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));
+
+ enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit && OtherDerived::IsVectorAtCompileTime };
+ typedef typename internal::conditional<copy,
+ typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::type OtherCopy;
+ OtherCopy otherCopy(other);
+
+ internal::triangular_solver_selector<MatrixType, typename internal::remove_reference<OtherCopy>::type,
+ Side, Mode>::run(nestedExpression(), otherCopy);
+
+ if (copy)
+ other = otherCopy;
+}
+
+/** \returns the product of the inverse of \c *this with \a other, \a *this being triangular.
+ *
+ * This function computes the inverse-matrix matrix product inverse(\c *this) * \a other if
+ * \a Side==OnTheLeft (the default), or the right-inverse-multiply \a other * inverse(\c *this) if
+ * \a Side==OnTheRight.
+ *
+ * The matrix \c *this must be triangular and invertible (i.e., all the coefficients of the
+ * diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this
+ * is an upper (resp. lower) triangular matrix.
+ *
+ * Example: \include MatrixBase_marked.cpp
+ * Output: \verbinclude MatrixBase_marked.out
+ *
+ * This function returns an expression of the inverse-multiply and can works in-place if it is assigned
+ * to the same matrix or vector \a other.
+ *
+ * For users coming from BLAS, this function (and more specifically solveInPlace()) offer
+ * all the operations supported by the \c *TRSV and \c *TRSM BLAS routines.
+ *
+ * \sa TriangularView::solveInPlace()
+ */
+template<typename Derived, unsigned int Mode>
+template<int Side, typename Other>
+const internal::triangular_solve_retval<Side,TriangularView<Derived,Mode>,Other>
+TriangularView<Derived,Mode>::solve(const MatrixBase<Other>& other) const
+{
+ return internal::triangular_solve_retval<Side,TriangularView,Other>(*this, other.derived());
+}
+
+namespace internal {
+
+
+template<int Side, typename TriangularType, typename Rhs>
+struct traits<triangular_solve_retval<Side, TriangularType, Rhs> >
+{
+ typedef typename internal::plain_matrix_type_column_major<Rhs>::type ReturnType;
+};
+
+template<int Side, typename TriangularType, typename Rhs> struct triangular_solve_retval
+ : public ReturnByValue<triangular_solve_retval<Side, TriangularType, Rhs> >
+{
+ typedef typename remove_all<typename Rhs::Nested>::type RhsNestedCleaned;
+ typedef ReturnByValue<triangular_solve_retval> Base;
+ typedef typename Base::Index Index;
+
+ triangular_solve_retval(const TriangularType& tri, const Rhs& rhs)
+ : m_triangularMatrix(tri), m_rhs(rhs)
+ {}
+
+ inline Index rows() const { return m_rhs.rows(); }
+ inline Index cols() const { return m_rhs.cols(); }
+
+ template<typename Dest> inline void evalTo(Dest& dst) const
+ {
+ if(!(is_same<RhsNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_rhs)))
+ dst = m_rhs;
+ m_triangularMatrix.template solveInPlace<Side>(dst);
+ }
+
+ protected:
+ const TriangularType& m_triangularMatrix;
+ typename Rhs::Nested m_rhs;
+};
+
+} // namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_SOLVETRIANGULAR_H
diff --git a/third_party/eigen3/Eigen/src/Core/StableNorm.h b/third_party/eigen3/Eigen/src/Core/StableNorm.h
new file mode 100644
index 0000000000..c862c0b63e
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/StableNorm.h
@@ -0,0 +1,200 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_STABLENORM_H
+#define EIGEN_STABLENORM_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<typename ExpressionType, typename Scalar>
+inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale)
+{
+ using std::max;
+ Scalar maxCoeff = bl.cwiseAbs().maxCoeff();
+
+ if (maxCoeff>scale)
+ {
+ ssq = ssq * numext::abs2(scale/maxCoeff);
+ Scalar tmp = Scalar(1)/maxCoeff;
+ if(tmp > NumTraits<Scalar>::highest())
+ {
+ invScale = NumTraits<Scalar>::highest();
+ scale = Scalar(1)/invScale;
+ }
+ else
+ {
+ scale = maxCoeff;
+ invScale = tmp;
+ }
+ }
+
+ // TODO if the maxCoeff is much much smaller than the current scale,
+ // then we can neglect this sub vector
+ if(scale>Scalar(0)) // if scale==0, then bl is 0
+ ssq += (bl*invScale).squaredNorm();
+}
+
+template<typename Derived>
+inline typename NumTraits<typename traits<Derived>::Scalar>::Real
+blueNorm_impl(const EigenBase<Derived>& _vec)
+{
+ typedef typename Derived::RealScalar RealScalar;
+ typedef typename Derived::Index Index;
+ using std::pow;
+ using std::sqrt;
+ using std::abs;
+ const Derived& vec(_vec.derived());
+ static bool initialized = false;
+ static RealScalar b1, b2, s1m, s2m, overfl, rbig, relerr;
+ if(!initialized)
+ {
+ int ibeta, it, iemin, iemax, iexp;
+ RealScalar eps;
+ // This program calculates the machine-dependent constants
+ // bl, b2, slm, s2m, relerr overfl
+ // from the "basic" machine-dependent numbers
+ // nbig, ibeta, it, iemin, iemax, rbig.
+ // The following define the basic machine-dependent constants.
+ // For portability, the PORT subprograms "ilmaeh" and "rlmach"
+ // are used. For any specific computer, each of the assignment
+ // statements can be replaced
+ ibeta = std::numeric_limits<RealScalar>::radix; // base for floating-point numbers
+ it = std::numeric_limits<RealScalar>::digits; // number of base-beta digits in mantissa
+ iemin = std::numeric_limits<RealScalar>::min_exponent; // minimum exponent
+ iemax = std::numeric_limits<RealScalar>::max_exponent; // maximum exponent
+ rbig = (std::numeric_limits<RealScalar>::max)(); // largest floating-point number
+
+ iexp = -((1-iemin)/2);
+ b1 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // lower boundary of midrange
+ iexp = (iemax + 1 - it)/2;
+ b2 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // upper boundary of midrange
+
+ iexp = (2-iemin)/2;
+ s1m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for lower range
+ iexp = - ((iemax+it)/2);
+ s2m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for upper range
+
+ overfl = rbig*s2m; // overflow boundary for abig
+ eps = RealScalar(pow(double(ibeta), 1-it));
+ relerr = sqrt(eps); // tolerance for neglecting asml
+ initialized = true;
+ }
+ Index n = vec.size();
+ RealScalar ab2 = b2 / RealScalar(n);
+ RealScalar asml = RealScalar(0);
+ RealScalar amed = RealScalar(0);
+ RealScalar abig = RealScalar(0);
+ for(typename Derived::InnerIterator it(vec, 0); it; ++it)
+ {
+ RealScalar ax = abs(it.value());
+ if(ax > ab2) abig += numext::abs2(ax*s2m);
+ else if(ax < b1) asml += numext::abs2(ax*s1m);
+ else amed += numext::abs2(ax);
+ }
+ if(abig > RealScalar(0))
+ {
+ abig = sqrt(abig);
+ if(abig > overfl)
+ {
+ return rbig;
+ }
+ if(amed > RealScalar(0))
+ {
+ abig = abig/s2m;
+ amed = sqrt(amed);
+ }
+ else
+ return abig/s2m;
+ }
+ else if(asml > RealScalar(0))
+ {
+ if (amed > RealScalar(0))
+ {
+ abig = sqrt(amed);
+ amed = sqrt(asml) / s1m;
+ }
+ else
+ return sqrt(asml)/s1m;
+ }
+ else
+ return sqrt(amed);
+ asml = numext::mini(abig, amed);
+ abig = numext::maxi(abig, amed);
+ if(asml <= abig*relerr)
+ return abig;
+ else
+ return abig * sqrt(RealScalar(1) + numext::abs2(asml/abig));
+}
+
+} // end namespace internal
+
+/** \returns the \em l2 norm of \c *this avoiding underflow and overflow.
+ * This version use a blockwise two passes algorithm:
+ * 1 - find the absolute largest coefficient \c s
+ * 2 - compute \f$ s \Vert \frac{*this}{s} \Vert \f$ in a standard way
+ *
+ * For architecture/scalar types supporting vectorization, this version
+ * is faster than blueNorm(). Otherwise the blueNorm() is much faster.
+ *
+ * \sa norm(), blueNorm(), hypotNorm()
+ */
+template<typename Derived>
+inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
+MatrixBase<Derived>::stableNorm() const
+{
+ using std::sqrt;
+ const Index blockSize = 4096;
+ RealScalar scale(0);
+ RealScalar invScale(1);
+ RealScalar ssq(0); // sum of square
+ enum {
+ Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0
+ };
+ Index n = size();
+ Index bi = internal::first_aligned(derived());
+ if (bi>0)
+ internal::stable_norm_kernel(this->head(bi), ssq, scale, invScale);
+ for (; bi<n; bi+=blockSize)
+ internal::stable_norm_kernel(this->segment(bi,numext::mini(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale);
+ return scale * sqrt(ssq);
+}
+
+/** \returns the \em l2 norm of \c *this using the Blue's algorithm.
+ * A Portable Fortran Program to Find the Euclidean Norm of a Vector,
+ * ACM TOMS, Vol 4, Issue 1, 1978.
+ *
+ * For architecture/scalar types without vectorization, this version
+ * is much faster than stableNorm(). Otherwise the stableNorm() is faster.
+ *
+ * \sa norm(), stableNorm(), hypotNorm()
+ */
+template<typename Derived>
+inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
+MatrixBase<Derived>::blueNorm() const
+{
+ return internal::blueNorm_impl(*this);
+}
+
+/** \returns the \em l2 norm of \c *this avoiding undeflow and overflow.
+ * This version use a concatenation of hypot() calls, and it is very slow.
+ *
+ * \sa norm(), stableNorm()
+ */
+template<typename Derived>
+inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
+MatrixBase<Derived>::hypotNorm() const
+{
+ return this->cwiseAbs().redux(internal::scalar_hypot_op<RealScalar>());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_STABLENORM_H
diff --git a/third_party/eigen3/Eigen/src/Core/Stride.h b/third_party/eigen3/Eigen/src/Core/Stride.h
new file mode 100644
index 0000000000..d3d454e4e2
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/Stride.h
@@ -0,0 +1,113 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_STRIDE_H
+#define EIGEN_STRIDE_H
+
+namespace Eigen {
+
+/** \class Stride
+ * \ingroup Core_Module
+ *
+ * \brief Holds strides information for Map
+ *
+ * This class holds the strides information for mapping arrays with strides with class Map.
+ *
+ * It holds two values: the inner stride and the outer stride.
+ *
+ * The inner stride is the pointer increment between two consecutive entries within a given row of a
+ * row-major matrix or within a given column of a column-major matrix.
+ *
+ * The outer stride is the pointer increment between two consecutive rows of a row-major matrix or
+ * between two consecutive columns of a column-major matrix.
+ *
+ * These two values can be passed either at compile-time as template parameters, or at runtime as
+ * arguments to the constructor.
+ *
+ * Indeed, this class takes two template parameters:
+ * \param _OuterStrideAtCompileTime the outer stride, or Dynamic if you want to specify it at runtime.
+ * \param _InnerStrideAtCompileTime the inner stride, or Dynamic if you want to specify it at runtime.
+ *
+ * Here is an example:
+ * \include Map_general_stride.cpp
+ * Output: \verbinclude Map_general_stride.out
+ *
+ * \sa class InnerStride, class OuterStride, \ref TopicStorageOrders
+ */
+template<int _OuterStrideAtCompileTime, int _InnerStrideAtCompileTime>
+class Stride
+{
+ public:
+ typedef DenseIndex Index;
+ enum {
+ InnerStrideAtCompileTime = _InnerStrideAtCompileTime,
+ OuterStrideAtCompileTime = _OuterStrideAtCompileTime
+ };
+
+ /** Default constructor, for use when strides are fixed at compile time */
+ EIGEN_DEVICE_FUNC
+ Stride()
+ : m_outer(OuterStrideAtCompileTime), m_inner(InnerStrideAtCompileTime)
+ {
+ eigen_assert(InnerStrideAtCompileTime != Dynamic && OuterStrideAtCompileTime != Dynamic);
+ }
+
+ /** Constructor allowing to pass the strides at runtime */
+ EIGEN_DEVICE_FUNC
+ Stride(Index outerStride, Index innerStride)
+ : m_outer(outerStride), m_inner(innerStride)
+ {
+ eigen_assert(innerStride>=0 && outerStride>=0);
+ }
+
+ /** Copy constructor */
+ EIGEN_DEVICE_FUNC
+ Stride(const Stride& other)
+ : m_outer(other.outer()), m_inner(other.inner())
+ {}
+
+ /** \returns the outer stride */
+ EIGEN_DEVICE_FUNC
+ inline Index outer() const { return m_outer.value(); }
+ /** \returns the inner stride */
+ EIGEN_DEVICE_FUNC
+ inline Index inner() const { return m_inner.value(); }
+
+ protected:
+ internal::variable_if_dynamic<Index, OuterStrideAtCompileTime> m_outer;
+ internal::variable_if_dynamic<Index, InnerStrideAtCompileTime> m_inner;
+};
+
+/** \brief Convenience specialization of Stride to specify only an inner stride
+ * See class Map for some examples */
+template<int Value = Dynamic>
+class InnerStride : public Stride<0, Value>
+{
+ typedef Stride<0, Value> Base;
+ public:
+ typedef DenseIndex Index;
+ EIGEN_DEVICE_FUNC InnerStride() : Base() {}
+ EIGEN_DEVICE_FUNC InnerStride(Index v) : Base(0, v) {}
+};
+
+/** \brief Convenience specialization of Stride to specify only an outer stride
+ * See class Map for some examples */
+template<int Value = Dynamic>
+class OuterStride : public Stride<Value, 0>
+{
+ typedef Stride<Value, 0> Base;
+ public:
+ typedef DenseIndex Index;
+ EIGEN_DEVICE_FUNC OuterStride() : Base() {}
+ EIGEN_DEVICE_FUNC OuterStride(Index v) : Base(v,0) {}
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_STRIDE_H
diff --git a/third_party/eigen3/Eigen/src/Core/Swap.h b/third_party/eigen3/Eigen/src/Core/Swap.h
new file mode 100644
index 0000000000..d602fba653
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/Swap.h
@@ -0,0 +1,140 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SWAP_H
+#define EIGEN_SWAP_H
+
+namespace Eigen {
+
+/** \class SwapWrapper
+ * \ingroup Core_Module
+ *
+ * \internal
+ *
+ * \brief Internal helper class for swapping two expressions
+ */
+namespace internal {
+template<typename ExpressionType>
+struct traits<SwapWrapper<ExpressionType> > : traits<ExpressionType> {};
+}
+
+template<typename ExpressionType> class SwapWrapper
+ : public internal::dense_xpr_base<SwapWrapper<ExpressionType> >::type
+{
+ public:
+
+ typedef typename internal::dense_xpr_base<SwapWrapper>::type Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(SwapWrapper)
+ typedef typename internal::packet_traits<Scalar>::type Packet;
+
+ EIGEN_DEVICE_FUNC
+ inline SwapWrapper(ExpressionType& xpr) : m_expression(xpr) {}
+
+ EIGEN_DEVICE_FUNC
+ inline Index rows() const { return m_expression.rows(); }
+ EIGEN_DEVICE_FUNC
+ inline Index cols() const { return m_expression.cols(); }
+ EIGEN_DEVICE_FUNC
+ inline Index outerStride() const { return m_expression.outerStride(); }
+ EIGEN_DEVICE_FUNC
+ inline Index innerStride() const { return m_expression.innerStride(); }
+
+ typedef typename internal::conditional<
+ internal::is_lvalue<ExpressionType>::value,
+ Scalar,
+ const Scalar
+ >::type ScalarWithConstIfNotLvalue;
+
+ EIGEN_DEVICE_FUNC
+ inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar* data() const { return m_expression.data(); }
+
+ EIGEN_DEVICE_FUNC
+ inline Scalar& coeffRef(Index rowId, Index colId)
+ {
+ return m_expression.const_cast_derived().coeffRef(rowId, colId);
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline Scalar& coeffRef(Index index)
+ {
+ return m_expression.const_cast_derived().coeffRef(index);
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline Scalar& coeffRef(Index rowId, Index colId) const
+ {
+ return m_expression.coeffRef(rowId, colId);
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline Scalar& coeffRef(Index index) const
+ {
+ return m_expression.coeffRef(index);
+ }
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ void copyCoeff(Index rowId, Index colId, const DenseBase<OtherDerived>& other)
+ {
+ OtherDerived& _other = other.const_cast_derived();
+ eigen_internal_assert(rowId >= 0 && rowId < rows()
+ && colId >= 0 && colId < cols());
+ Scalar tmp = m_expression.coeff(rowId, colId);
+ m_expression.coeffRef(rowId, colId) = _other.coeff(rowId, colId);
+ _other.coeffRef(rowId, colId) = tmp;
+ }
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
+ {
+ OtherDerived& _other = other.const_cast_derived();
+ eigen_internal_assert(index >= 0 && index < m_expression.size());
+ Scalar tmp = m_expression.coeff(index);
+ m_expression.coeffRef(index) = _other.coeff(index);
+ _other.coeffRef(index) = tmp;
+ }
+
+ template<typename OtherDerived, int StoreMode, int LoadMode>
+ void copyPacket(Index rowId, Index colId, const DenseBase<OtherDerived>& other)
+ {
+ OtherDerived& _other = other.const_cast_derived();
+ eigen_internal_assert(rowId >= 0 && rowId < rows()
+ && colId >= 0 && colId < cols());
+ Packet tmp = m_expression.template packet<StoreMode>(rowId, colId);
+ m_expression.template writePacket<StoreMode>(rowId, colId,
+ _other.template packet<LoadMode>(rowId, colId)
+ );
+ _other.template writePacket<LoadMode>(rowId, colId, tmp);
+ }
+
+ template<typename OtherDerived, int StoreMode, int LoadMode>
+ void copyPacket(Index index, const DenseBase<OtherDerived>& other)
+ {
+ OtherDerived& _other = other.const_cast_derived();
+ eigen_internal_assert(index >= 0 && index < m_expression.size());
+ Packet tmp = m_expression.template packet<StoreMode>(index);
+ m_expression.template writePacket<StoreMode>(index,
+ _other.template packet<LoadMode>(index)
+ );
+ _other.template writePacket<LoadMode>(index, tmp);
+ }
+
+ EIGEN_DEVICE_FUNC
+ ExpressionType& expression() const { return m_expression; }
+
+ protected:
+ ExpressionType& m_expression;
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_SWAP_H
diff --git a/third_party/eigen3/Eigen/src/Core/Transpose.h b/third_party/eigen3/Eigen/src/Core/Transpose.h
new file mode 100644
index 0000000000..aba3f66704
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/Transpose.h
@@ -0,0 +1,428 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TRANSPOSE_H
+#define EIGEN_TRANSPOSE_H
+
+namespace Eigen {
+
+/** \class Transpose
+ * \ingroup Core_Module
+ *
+ * \brief Expression of the transpose of a matrix
+ *
+ * \param MatrixType the type of the object of which we are taking the transpose
+ *
+ * This class represents an expression of the transpose of a matrix.
+ * It is the return type of MatrixBase::transpose() and MatrixBase::adjoint()
+ * and most of the time this is the only way it is used.
+ *
+ * \sa MatrixBase::transpose(), MatrixBase::adjoint()
+ */
+
+namespace internal {
+template<typename MatrixType>
+struct traits<Transpose<MatrixType> > : traits<MatrixType>
+{
+ typedef typename MatrixType::Scalar Scalar;
+ typedef typename nested<MatrixType>::type MatrixTypeNested;
+ typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedPlain;
+ typedef typename traits<MatrixType>::StorageKind StorageKind;
+ typedef typename traits<MatrixType>::XprKind XprKind;
+ enum {
+ RowsAtCompileTime = MatrixType::ColsAtCompileTime,
+ ColsAtCompileTime = MatrixType::RowsAtCompileTime,
+ MaxRowsAtCompileTime = MatrixType::MaxColsAtCompileTime,
+ MaxColsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+ FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
+ Flags0 = MatrixTypeNestedPlain::Flags & ~(LvalueBit | NestByRefBit),
+ Flags1 = Flags0 | FlagsLvalueBit,
+ Flags = Flags1 ^ RowMajorBit,
+ CoeffReadCost = MatrixTypeNestedPlain::CoeffReadCost,
+ InnerStrideAtCompileTime = inner_stride_at_compile_time<MatrixType>::ret,
+ OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret
+ };
+};
+}
+
+template<typename MatrixType, typename StorageKind> class TransposeImpl;
+
+template<typename MatrixType> class Transpose
+ : public TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>
+{
+ public:
+
+ typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
+ EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
+
+ EIGEN_DEVICE_FUNC
+ inline Transpose(MatrixType& a_matrix) : m_matrix(a_matrix) {}
+
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose)
+
+ EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.cols(); }
+ EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.rows(); }
+
+ /** \returns the nested expression */
+ EIGEN_DEVICE_FUNC
+ const typename internal::remove_all<typename MatrixType::Nested>::type&
+ nestedExpression() const { return m_matrix; }
+
+ /** \returns the nested expression */
+ EIGEN_DEVICE_FUNC
+ typename internal::remove_all<typename MatrixType::Nested>::type&
+ nestedExpression() { return m_matrix.const_cast_derived(); }
+
+ protected:
+ typename MatrixType::Nested m_matrix;
+};
+
+namespace internal {
+
+template<typename MatrixType, bool HasDirectAccess = has_direct_access<MatrixType>::ret>
+struct TransposeImpl_base
+{
+ typedef typename dense_xpr_base<Transpose<MatrixType> >::type type;
+};
+
+template<typename MatrixType>
+struct TransposeImpl_base<MatrixType, false>
+{
+ typedef typename dense_xpr_base<Transpose<MatrixType> >::type type;
+};
+
+} // end namespace internal
+
+template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
+ : public internal::TransposeImpl_base<MatrixType>::type
+{
+ public:
+
+ typedef typename internal::TransposeImpl_base<MatrixType>::type Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(Transpose<MatrixType>)
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TransposeImpl)
+
+ EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().nestedExpression().innerStride(); }
+ EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().nestedExpression().outerStride(); }
+
+ typedef typename internal::conditional<
+ internal::is_lvalue<MatrixType>::value,
+ Scalar,
+ const Scalar
+ >::type ScalarWithConstIfNotLvalue;
+
+ inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); }
+ inline const Scalar* data() const { return derived().nestedExpression().data(); }
+
+ EIGEN_DEVICE_FUNC
+ inline ScalarWithConstIfNotLvalue& coeffRef(Index rowId, Index colId)
+ {
+ EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
+ return derived().nestedExpression().const_cast_derived().coeffRef(colId, rowId);
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
+ {
+ EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
+ return derived().nestedExpression().const_cast_derived().coeffRef(index);
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index rowId, Index colId) const
+ {
+ return derived().nestedExpression().coeffRef(colId, rowId);
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index index) const
+ {
+ return derived().nestedExpression().coeffRef(index);
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline CoeffReturnType coeff(Index rowId, Index colId) const
+ {
+ return derived().nestedExpression().coeff(colId, rowId);
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline CoeffReturnType coeff(Index index) const
+ {
+ return derived().nestedExpression().coeff(index);
+ }
+
+ template<int LoadMode>
+ inline const PacketScalar packet(Index rowId, Index colId) const
+ {
+ return derived().nestedExpression().template packet<LoadMode>(colId, rowId);
+ }
+
+ template<int LoadMode>
+ inline void writePacket(Index rowId, Index colId, const PacketScalar& x)
+ {
+ derived().nestedExpression().const_cast_derived().template writePacket<LoadMode>(colId, rowId, x);
+ }
+
+ template<int LoadMode>
+ inline const PacketScalar packet(Index index) const
+ {
+ return derived().nestedExpression().template packet<LoadMode>(index);
+ }
+
+ template<int LoadMode>
+ inline void writePacket(Index index, const PacketScalar& x)
+ {
+ derived().nestedExpression().const_cast_derived().template writePacket<LoadMode>(index, x);
+ }
+};
+
+/** \returns an expression of the transpose of *this.
+ *
+ * Example: \include MatrixBase_transpose.cpp
+ * Output: \verbinclude MatrixBase_transpose.out
+ *
+ * \warning If you want to replace a matrix by its own transpose, do \b NOT do this:
+ * \code
+ * m = m.transpose(); // bug!!! caused by aliasing effect
+ * \endcode
+ * Instead, use the transposeInPlace() method:
+ * \code
+ * m.transposeInPlace();
+ * \endcode
+ * which gives Eigen good opportunities for optimization, or alternatively you can also do:
+ * \code
+ * m = m.transpose().eval();
+ * \endcode
+ *
+ * \sa transposeInPlace(), adjoint() */
+template<typename Derived>
+inline Transpose<Derived>
+DenseBase<Derived>::transpose()
+{
+ return derived();
+}
+
+/** This is the const version of transpose().
+ *
+ * Make sure you read the warning for transpose() !
+ *
+ * \sa transposeInPlace(), adjoint() */
+template<typename Derived>
+inline typename DenseBase<Derived>::ConstTransposeReturnType
+DenseBase<Derived>::transpose() const
+{
+ return ConstTransposeReturnType(derived());
+}
+
+/** \returns an expression of the adjoint (i.e. conjugate transpose) of *this.
+ *
+ * Example: \include MatrixBase_adjoint.cpp
+ * Output: \verbinclude MatrixBase_adjoint.out
+ *
+ * \warning If you want to replace a matrix by its own adjoint, do \b NOT do this:
+ * \code
+ * m = m.adjoint(); // bug!!! caused by aliasing effect
+ * \endcode
+ * Instead, use the adjointInPlace() method:
+ * \code
+ * m.adjointInPlace();
+ * \endcode
+ * which gives Eigen good opportunities for optimization, or alternatively you can also do:
+ * \code
+ * m = m.adjoint().eval();
+ * \endcode
+ *
+ * \sa adjointInPlace(), transpose(), conjugate(), class Transpose, class internal::scalar_conjugate_op */
+template<typename Derived>
+inline const typename MatrixBase<Derived>::AdjointReturnType
+MatrixBase<Derived>::adjoint() const
+{
+ return this->transpose(); // in the complex case, the .conjugate() is be implicit here
+ // due to implicit conversion to return type
+}
+
+/***************************************************************************
+* "in place" transpose implementation
+***************************************************************************/
+
+namespace internal {
+
+template<typename MatrixType,
+ bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic>
+struct inplace_transpose_selector;
+
+template<typename MatrixType>
+struct inplace_transpose_selector<MatrixType,true> { // square matrix
+ static void run(MatrixType& m) {
+ m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose());
+ }
+};
+
+template<typename MatrixType>
+struct inplace_transpose_selector<MatrixType,false> { // non square matrix
+ static void run(MatrixType& m) {
+ if (m.rows()==m.cols())
+ m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose());
+ else
+ m = m.transpose().eval();
+ }
+};
+
+} // end namespace internal
+
+/** This is the "in place" version of transpose(): it replaces \c *this by its own transpose.
+ * Thus, doing
+ * \code
+ * m.transposeInPlace();
+ * \endcode
+ * has the same effect on m as doing
+ * \code
+ * m = m.transpose().eval();
+ * \endcode
+ * and is faster and also safer because in the latter line of code, forgetting the eval() results
+ * in a bug caused by \ref TopicAliasing "aliasing".
+ *
+ * Notice however that this method is only useful if you want to replace a matrix by its own transpose.
+ * If you just need the transpose of a matrix, use transpose().
+ *
+ * \note if the matrix is not square, then \c *this must be a resizable matrix.
+ * This excludes (non-square) fixed-size matrices, block-expressions and maps.
+ *
+ * \sa transpose(), adjoint(), adjointInPlace() */
+template<typename Derived>
+inline void DenseBase<Derived>::transposeInPlace()
+{
+ eigen_assert((rows() == cols() || (RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic))
+ && "transposeInPlace() called on a non-square non-resizable matrix");
+ internal::inplace_transpose_selector<Derived>::run(derived());
+}
+
+/***************************************************************************
+* "in place" adjoint implementation
+***************************************************************************/
+
+/** This is the "in place" version of adjoint(): it replaces \c *this by its own transpose.
+ * Thus, doing
+ * \code
+ * m.adjointInPlace();
+ * \endcode
+ * has the same effect on m as doing
+ * \code
+ * m = m.adjoint().eval();
+ * \endcode
+ * and is faster and also safer because in the latter line of code, forgetting the eval() results
+ * in a bug caused by aliasing.
+ *
+ * Notice however that this method is only useful if you want to replace a matrix by its own adjoint.
+ * If you just need the adjoint of a matrix, use adjoint().
+ *
+ * \note if the matrix is not square, then \c *this must be a resizable matrix.
+ * This excludes (non-square) fixed-size matrices, block-expressions and maps.
+ *
+ * \sa transpose(), adjoint(), transposeInPlace() */
+template<typename Derived>
+inline void MatrixBase<Derived>::adjointInPlace()
+{
+ derived() = adjoint().eval();
+}
+
+#ifndef EIGEN_NO_DEBUG
+
+// The following is to detect aliasing problems in most common cases.
+
+namespace internal {
+
+template<typename BinOp,typename NestedXpr,typename Rhs>
+struct blas_traits<SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> >
+ : blas_traits<NestedXpr>
+{
+ typedef SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> XprType;
+ static inline const XprType extract(const XprType& x) { return x; }
+};
+
+template<bool DestIsTransposed, typename OtherDerived>
+struct check_transpose_aliasing_compile_time_selector
+{
+ enum { ret = bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed };
+};
+
+template<bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB>
+struct check_transpose_aliasing_compile_time_selector<DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >
+{
+ enum { ret = bool(blas_traits<DerivedA>::IsTransposed) != DestIsTransposed
+ || bool(blas_traits<DerivedB>::IsTransposed) != DestIsTransposed
+ };
+};
+
+template<typename Scalar, bool DestIsTransposed, typename OtherDerived>
+struct check_transpose_aliasing_run_time_selector
+{
+ static bool run(const Scalar* dest, const OtherDerived& src)
+ {
+ return (bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed) && (dest!=0 && dest==(const Scalar*)extract_data(src));
+ }
+};
+
+template<typename Scalar, bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB>
+struct check_transpose_aliasing_run_time_selector<Scalar,DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >
+{
+ static bool run(const Scalar* dest, const CwiseBinaryOp<BinOp,DerivedA,DerivedB>& src)
+ {
+ return ((blas_traits<DerivedA>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(const Scalar*)extract_data(src.lhs())))
+ || ((blas_traits<DerivedB>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(const Scalar*)extract_data(src.rhs())));
+ }
+};
+
+// the following selector, checkTransposeAliasing_impl, based on MightHaveTransposeAliasing,
+// is because when the condition controlling the assert is known at compile time, ICC emits a warning.
+// This is actually a good warning: in expressions that don't have any transposing, the condition is
+// known at compile time to be false, and using that, we can avoid generating the code of the assert again
+// and again for all these expressions that don't need it.
+
+template<typename Derived, typename OtherDerived,
+ bool MightHaveTransposeAliasing
+ = check_transpose_aliasing_compile_time_selector
+ <blas_traits<Derived>::IsTransposed,OtherDerived>::ret
+ >
+struct checkTransposeAliasing_impl
+{
+ static void run(const Derived& dst, const OtherDerived& other)
+ {
+ eigen_assert((!check_transpose_aliasing_run_time_selector
+ <typename Derived::Scalar,blas_traits<Derived>::IsTransposed,OtherDerived>
+ ::run(extract_data(dst), other))
+ && "aliasing detected during transposition, use transposeInPlace() "
+ "or evaluate the rhs into a temporary using .eval()");
+
+ }
+};
+
+template<typename Derived, typename OtherDerived>
+struct checkTransposeAliasing_impl<Derived, OtherDerived, false>
+{
+ static void run(const Derived&, const OtherDerived&)
+ {
+ }
+};
+
+} // end namespace internal
+
+template<typename Derived>
+template<typename OtherDerived>
+void DenseBase<Derived>::checkTransposeAliasing(const OtherDerived& other) const
+{
+ internal::checkTransposeAliasing_impl<Derived, OtherDerived>::run(derived(), other);
+}
+#endif
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRANSPOSE_H
diff --git a/third_party/eigen3/Eigen/src/Core/Transpositions.h b/third_party/eigen3/Eigen/src/Core/Transpositions.h
new file mode 100644
index 0000000000..ac3aef5af5
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/Transpositions.h
@@ -0,0 +1,436 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2010-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TRANSPOSITIONS_H
+#define EIGEN_TRANSPOSITIONS_H
+
+namespace Eigen {
+
+/** \class Transpositions
+ * \ingroup Core_Module
+ *
+ * \brief Represents a sequence of transpositions (row/column interchange)
+ *
+ * \param SizeAtCompileTime the number of transpositions, or Dynamic
+ * \param MaxSizeAtCompileTime the maximum number of transpositions, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
+ *
+ * This class represents a permutation transformation as a sequence of \em n transpositions
+ * \f$[T_{n-1} \ldots T_{i} \ldots T_{0}]\f$. It is internally stored as a vector of integers \c indices.
+ * Each transposition \f$ T_{i} \f$ applied on the left of a matrix (\f$ T_{i} M\f$) interchanges
+ * the rows \c i and \c indices[i] of the matrix \c M.
+ * A transposition applied on the right (e.g., \f$ M T_{i}\f$) yields a column interchange.
+ *
+ * Compared to the class PermutationMatrix, such a sequence of transpositions is what is
+ * computed during a decomposition with pivoting, and it is faster when applying the permutation in-place.
+ *
+ * To apply a sequence of transpositions to a matrix, simply use the operator * as in the following example:
+ * \code
+ * Transpositions tr;
+ * MatrixXf mat;
+ * mat = tr * mat;
+ * \endcode
+ * In this example, we detect that the matrix appears on both side, and so the transpositions
+ * are applied in-place without any temporary or extra copy.
+ *
+ * \sa class PermutationMatrix
+ */
+
+namespace internal {
+template<typename TranspositionType, typename MatrixType, int Side, bool Transposed=false> struct transposition_matrix_product_retval;
+}
+
+template<typename Derived>
+class TranspositionsBase
+{
+ typedef internal::traits<Derived> Traits;
+
+ public:
+
+ typedef typename Traits::IndicesType IndicesType;
+ typedef typename IndicesType::Scalar Index;
+
+ Derived& derived() { return *static_cast<Derived*>(this); }
+ const Derived& derived() const { return *static_cast<const Derived*>(this); }
+
+ /** Copies the \a other transpositions into \c *this */
+ template<typename OtherDerived>
+ Derived& operator=(const TranspositionsBase<OtherDerived>& other)
+ {
+ indices() = other.indices();
+ return derived();
+ }
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ /** This is a special case of the templated operator=. Its purpose is to
+ * prevent a default operator= from hiding the templated operator=.
+ */
+ Derived& operator=(const TranspositionsBase& other)
+ {
+ indices() = other.indices();
+ return derived();
+ }
+ #endif
+
+ /** \returns the number of transpositions */
+ inline Index size() const { return indices().size(); }
+
+ /** Direct access to the underlying index vector */
+ inline const Index& coeff(Index i) const { return indices().coeff(i); }
+ /** Direct access to the underlying index vector */
+ inline Index& coeffRef(Index i) { return indices().coeffRef(i); }
+ /** Direct access to the underlying index vector */
+ inline const Index& operator()(Index i) const { return indices()(i); }
+ /** Direct access to the underlying index vector */
+ inline Index& operator()(Index i) { return indices()(i); }
+ /** Direct access to the underlying index vector */
+ inline const Index& operator[](Index i) const { return indices()(i); }
+ /** Direct access to the underlying index vector */
+ inline Index& operator[](Index i) { return indices()(i); }
+
+ /** const version of indices(). */
+ const IndicesType& indices() const { return derived().indices(); }
+ /** \returns a reference to the stored array representing the transpositions. */
+ IndicesType& indices() { return derived().indices(); }
+
+ /** Resizes to given size. */
+ inline void resize(Index newSize)
+ {
+ indices().resize(newSize);
+ }
+
+ /** Sets \c *this to represents an identity transformation */
+ void setIdentity()
+ {
+ for(int i = 0; i < indices().size(); ++i)
+ coeffRef(i) = i;
+ }
+
+ // FIXME: do we want such methods ?
+ // might be usefull when the target matrix expression is complex, e.g.:
+ // object.matrix().block(..,..,..,..) = trans * object.matrix().block(..,..,..,..);
+ /*
+ template<typename MatrixType>
+ void applyForwardToRows(MatrixType& mat) const
+ {
+ for(Index k=0 ; k<size() ; ++k)
+ if(m_indices(k)!=k)
+ mat.row(k).swap(mat.row(m_indices(k)));
+ }
+
+ template<typename MatrixType>
+ void applyBackwardToRows(MatrixType& mat) const
+ {
+ for(Index k=size()-1 ; k>=0 ; --k)
+ if(m_indices(k)!=k)
+ mat.row(k).swap(mat.row(m_indices(k)));
+ }
+ */
+
+ /** \returns the inverse transformation */
+ inline Transpose<TranspositionsBase> inverse() const
+ { return Transpose<TranspositionsBase>(derived()); }
+
+ /** \returns the tranpose transformation */
+ inline Transpose<TranspositionsBase> transpose() const
+ { return Transpose<TranspositionsBase>(derived()); }
+
+ protected:
+};
+
+namespace internal {
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
+struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType> >
+{
+ typedef IndexType Index;
+ typedef Matrix<Index, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
+};
+}
+
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
+class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType> >
+{
+ typedef internal::traits<Transpositions> Traits;
+ public:
+
+ typedef TranspositionsBase<Transpositions> Base;
+ typedef typename Traits::IndicesType IndicesType;
+ typedef typename IndicesType::Scalar Index;
+
+ inline Transpositions() {}
+
+ /** Copy constructor. */
+ template<typename OtherDerived>
+ inline Transpositions(const TranspositionsBase<OtherDerived>& other)
+ : m_indices(other.indices()) {}
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ /** Standard copy constructor. Defined only to prevent a default copy constructor
+ * from hiding the other templated constructor */
+ inline Transpositions(const Transpositions& other) : m_indices(other.indices()) {}
+ #endif
+
+ /** Generic constructor from expression of the transposition indices. */
+ template<typename Other>
+ explicit inline Transpositions(const MatrixBase<Other>& a_indices) : m_indices(a_indices)
+ {}
+
+ /** Copies the \a other transpositions into \c *this */
+ template<typename OtherDerived>
+ Transpositions& operator=(const TranspositionsBase<OtherDerived>& other)
+ {
+ return Base::operator=(other);
+ }
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ /** This is a special case of the templated operator=. Its purpose is to
+ * prevent a default operator= from hiding the templated operator=.
+ */
+ Transpositions& operator=(const Transpositions& other)
+ {
+ m_indices = other.m_indices;
+ return *this;
+ }
+ #endif
+
+ /** Constructs an uninitialized permutation matrix of given size.
+ */
+ inline Transpositions(Index size) : m_indices(size)
+ {}
+
+ /** const version of indices(). */
+ const IndicesType& indices() const { return m_indices; }
+ /** \returns a reference to the stored array representing the transpositions. */
+ IndicesType& indices() { return m_indices; }
+
+ protected:
+
+ IndicesType m_indices;
+};
+
+
+namespace internal {
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int _PacketAccess>
+struct traits<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,_PacketAccess> >
+{
+ typedef IndexType Index;
+ typedef Map<const Matrix<Index,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1>, _PacketAccess> IndicesType;
+};
+}
+
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int PacketAccess>
+class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,PacketAccess>
+ : public TranspositionsBase<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,PacketAccess> >
+{
+ typedef internal::traits<Map> Traits;
+ public:
+
+ typedef TranspositionsBase<Map> Base;
+ typedef typename Traits::IndicesType IndicesType;
+ typedef typename IndicesType::Scalar Index;
+
+ inline Map(const Index* indicesPtr)
+ : m_indices(indicesPtr)
+ {}
+
+ inline Map(const Index* indicesPtr, Index size)
+ : m_indices(indicesPtr,size)
+ {}
+
+ /** Copies the \a other transpositions into \c *this */
+ template<typename OtherDerived>
+ Map& operator=(const TranspositionsBase<OtherDerived>& other)
+ {
+ return Base::operator=(other);
+ }
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ /** This is a special case of the templated operator=. Its purpose is to
+ * prevent a default operator= from hiding the templated operator=.
+ */
+ Map& operator=(const Map& other)
+ {
+ m_indices = other.m_indices;
+ return *this;
+ }
+ #endif
+
+ /** const version of indices(). */
+ const IndicesType& indices() const { return m_indices; }
+
+ /** \returns a reference to the stored array representing the transpositions. */
+ IndicesType& indices() { return m_indices; }
+
+ protected:
+
+ IndicesType m_indices;
+};
+
+namespace internal {
+template<typename _IndicesType>
+struct traits<TranspositionsWrapper<_IndicesType> >
+{
+ typedef typename _IndicesType::Scalar Index;
+ typedef _IndicesType IndicesType;
+};
+}
+
+template<typename _IndicesType>
+class TranspositionsWrapper
+ : public TranspositionsBase<TranspositionsWrapper<_IndicesType> >
+{
+ typedef internal::traits<TranspositionsWrapper> Traits;
+ public:
+
+ typedef TranspositionsBase<TranspositionsWrapper> Base;
+ typedef typename Traits::IndicesType IndicesType;
+ typedef typename IndicesType::Scalar Index;
+
+ inline TranspositionsWrapper(IndicesType& a_indices)
+ : m_indices(a_indices)
+ {}
+
+ /** Copies the \a other transpositions into \c *this */
+ template<typename OtherDerived>
+ TranspositionsWrapper& operator=(const TranspositionsBase<OtherDerived>& other)
+ {
+ return Base::operator=(other);
+ }
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ /** This is a special case of the templated operator=. Its purpose is to
+ * prevent a default operator= from hiding the templated operator=.
+ */
+ TranspositionsWrapper& operator=(const TranspositionsWrapper& other)
+ {
+ m_indices = other.m_indices;
+ return *this;
+ }
+ #endif
+
+ /** const version of indices(). */
+ const IndicesType& indices() const { return m_indices; }
+
+ /** \returns a reference to the stored array representing the transpositions. */
+ IndicesType& indices() { return m_indices; }
+
+ protected:
+
+ const typename IndicesType::Nested m_indices;
+};
+
+/** \returns the \a matrix with the \a transpositions applied to the columns.
+ */
+template<typename Derived, typename TranspositionsDerived>
+inline const internal::transposition_matrix_product_retval<TranspositionsDerived, Derived, OnTheRight>
+operator*(const MatrixBase<Derived>& matrix,
+ const TranspositionsBase<TranspositionsDerived> &transpositions)
+{
+ return internal::transposition_matrix_product_retval
+ <TranspositionsDerived, Derived, OnTheRight>
+ (transpositions.derived(), matrix.derived());
+}
+
+/** \returns the \a matrix with the \a transpositions applied to the rows.
+ */
+template<typename Derived, typename TranspositionDerived>
+inline const internal::transposition_matrix_product_retval
+ <TranspositionDerived, Derived, OnTheLeft>
+operator*(const TranspositionsBase<TranspositionDerived> &transpositions,
+ const MatrixBase<Derived>& matrix)
+{
+ return internal::transposition_matrix_product_retval
+ <TranspositionDerived, Derived, OnTheLeft>
+ (transpositions.derived(), matrix.derived());
+}
+
+namespace internal {
+
+template<typename TranspositionType, typename MatrixType, int Side, bool Transposed>
+struct traits<transposition_matrix_product_retval<TranspositionType, MatrixType, Side, Transposed> >
+{
+ typedef typename MatrixType::PlainObject ReturnType;
+};
+
+template<typename TranspositionType, typename MatrixType, int Side, bool Transposed>
+struct transposition_matrix_product_retval
+ : public ReturnByValue<transposition_matrix_product_retval<TranspositionType, MatrixType, Side, Transposed> >
+{
+ typedef typename remove_all<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
+ typedef typename TranspositionType::Index Index;
+
+ transposition_matrix_product_retval(const TranspositionType& tr, const MatrixType& matrix)
+ : m_transpositions(tr), m_matrix(matrix)
+ {}
+
+ inline Index rows() const { return m_matrix.rows(); }
+ inline Index cols() const { return m_matrix.cols(); }
+
+ template<typename Dest> inline void evalTo(Dest& dst) const
+ {
+ const Index size = m_transpositions.size();
+ Index j = 0;
+
+ if(!(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix)))
+ dst = m_matrix;
+
+ for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
+ if((j=m_transpositions.coeff(k))!=k)
+ {
+ if(Side==OnTheLeft)
+ dst.row(k).swap(dst.row(j));
+ else if(Side==OnTheRight)
+ dst.col(k).swap(dst.col(j));
+ }
+ }
+
+ protected:
+ const TranspositionType& m_transpositions;
+ typename MatrixType::Nested m_matrix;
+};
+
+} // end namespace internal
+
+/* Template partial specialization for transposed/inverse transpositions */
+
+template<typename TranspositionsDerived>
+class Transpose<TranspositionsBase<TranspositionsDerived> >
+{
+ typedef TranspositionsDerived TranspositionType;
+ typedef typename TranspositionType::IndicesType IndicesType;
+ public:
+
+ Transpose(const TranspositionType& t) : m_transpositions(t) {}
+
+ inline int size() const { return m_transpositions.size(); }
+
+ /** \returns the \a matrix with the inverse transpositions applied to the columns.
+ */
+ template<typename Derived> friend
+ inline const internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheRight, true>
+ operator*(const MatrixBase<Derived>& matrix, const Transpose& trt)
+ {
+ return internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheRight, true>(trt.m_transpositions, matrix.derived());
+ }
+
+ /** \returns the \a matrix with the inverse transpositions applied to the rows.
+ */
+ template<typename Derived>
+ inline const internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheLeft, true>
+ operator*(const MatrixBase<Derived>& matrix) const
+ {
+ return internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheLeft, true>(m_transpositions, matrix.derived());
+ }
+
+ protected:
+ const TranspositionType& m_transpositions;
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRANSPOSITIONS_H
diff --git a/third_party/eigen3/Eigen/src/Core/TriangularMatrix.h b/third_party/eigen3/Eigen/src/Core/TriangularMatrix.h
new file mode 100644
index 0000000000..1d6e346506
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/TriangularMatrix.h
@@ -0,0 +1,900 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TRIANGULARMATRIX_H
+#define EIGEN_TRIANGULARMATRIX_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<int Side, typename TriangularType, typename Rhs> struct triangular_solve_retval;
+
+}
+
+/** \internal
+ *
+ * \class TriangularBase
+ * \ingroup Core_Module
+ *
+ * \brief Base class for triangular part in a matrix
+ */
+template<typename Derived> class TriangularBase : public EigenBase<Derived>
+{
+ public:
+
+ enum {
+ Mode = internal::traits<Derived>::Mode,
+ CoeffReadCost = internal::traits<Derived>::CoeffReadCost,
+ RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
+ ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
+ MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime
+ };
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ typedef typename internal::traits<Derived>::Index Index;
+ typedef typename internal::traits<Derived>::DenseMatrixType DenseMatrixType;
+ typedef DenseMatrixType DenseType;
+
+ EIGEN_DEVICE_FUNC
+ inline TriangularBase() { eigen_assert(!((Mode&UnitDiag) && (Mode&ZeroDiag))); }
+
+ EIGEN_DEVICE_FUNC
+ inline Index rows() const { return derived().rows(); }
+ EIGEN_DEVICE_FUNC
+ inline Index cols() const { return derived().cols(); }
+ EIGEN_DEVICE_FUNC
+ inline Index outerStride() const { return derived().outerStride(); }
+ EIGEN_DEVICE_FUNC
+ inline Index innerStride() const { return derived().innerStride(); }
+
+ EIGEN_DEVICE_FUNC
+ inline Scalar coeff(Index row, Index col) const { return derived().coeff(row,col); }
+ EIGEN_DEVICE_FUNC
+ inline Scalar& coeffRef(Index row, Index col) { return derived().coeffRef(row,col); }
+
+ /** \see MatrixBase::copyCoeff(row,col)
+ */
+ template<typename Other>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, Other& other)
+ {
+ derived().coeffRef(row, col) = other.coeff(row, col);
+ }
+
+ EIGEN_DEVICE_FUNC
+ inline Scalar operator()(Index row, Index col) const
+ {
+ check_coordinates(row, col);
+ return coeff(row,col);
+ }
+ EIGEN_DEVICE_FUNC
+ inline Scalar& operator()(Index row, Index col)
+ {
+ check_coordinates(row, col);
+ return coeffRef(row,col);
+ }
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ EIGEN_DEVICE_FUNC
+ inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
+ EIGEN_DEVICE_FUNC
+ inline Derived& derived() { return *static_cast<Derived*>(this); }
+ #endif // not EIGEN_PARSED_BY_DOXYGEN
+
+ template<typename DenseDerived>
+ EIGEN_DEVICE_FUNC
+ void evalTo(MatrixBase<DenseDerived> &other) const;
+ template<typename DenseDerived>
+ EIGEN_DEVICE_FUNC
+ void evalToLazy(MatrixBase<DenseDerived> &other) const;
+
+ EIGEN_DEVICE_FUNC
+ DenseMatrixType toDenseMatrix() const
+ {
+ DenseMatrixType res(rows(), cols());
+ evalToLazy(res);
+ return res;
+ }
+
+ protected:
+
+ void check_coordinates(Index row, Index col) const
+ {
+ EIGEN_ONLY_USED_FOR_DEBUG(row);
+ EIGEN_ONLY_USED_FOR_DEBUG(col);
+ eigen_assert(col>=0 && col<cols() && row>=0 && row<rows());
+ const int mode = int(Mode) & ~SelfAdjoint;
+ EIGEN_ONLY_USED_FOR_DEBUG(mode);
+ eigen_assert((mode==Upper && col>=row)
+ || (mode==Lower && col<=row)
+ || ((mode==StrictlyUpper || mode==UnitUpper) && col>row)
+ || ((mode==StrictlyLower || mode==UnitLower) && col<row));
+ }
+
+ #ifdef EIGEN_INTERNAL_DEBUGGING
+ void check_coordinates_internal(Index row, Index col) const
+ {
+ check_coordinates(row, col);
+ }
+ #else
+ void check_coordinates_internal(Index , Index ) const {}
+ #endif
+
+};
+
+/** \class TriangularView
+ * \ingroup Core_Module
+ *
+ * \brief Base class for triangular part in a matrix
+ *
+ * \param MatrixType the type of the object in which we are taking the triangular part
+ * \param Mode the kind of triangular matrix expression to construct. Can be #Upper,
+ * #Lower, #UnitUpper, #UnitLower, #StrictlyUpper, or #StrictlyLower.
+ * This is in fact a bit field; it must have either #Upper or #Lower,
+ * and additionnaly it may have #UnitDiag or #ZeroDiag or neither.
+ *
+ * This class represents a triangular part of a matrix, not necessarily square. Strictly speaking, for rectangular
+ * matrices one should speak of "trapezoid" parts. This class is the return type
+ * of MatrixBase::triangularView() and most of the time this is the only way it is used.
+ *
+ * \sa MatrixBase::triangularView()
+ */
+namespace internal {
+template<typename MatrixType, unsigned int _Mode>
+struct traits<TriangularView<MatrixType, _Mode> > : traits<MatrixType>
+{
+ typedef typename nested<MatrixType>::type MatrixTypeNested;
+ typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef;
+ typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
+ typedef MatrixType ExpressionType;
+ typedef typename MatrixType::PlainObject DenseMatrixType;
+ enum {
+ Mode = _Mode,
+ Flags = (MatrixTypeNestedCleaned::Flags & (HereditaryBits) & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit))) | Mode,
+ CoeffReadCost = MatrixTypeNestedCleaned::CoeffReadCost
+ };
+};
+}
+
+template<int Mode, bool LhsIsTriangular,
+ typename Lhs, bool LhsIsVector,
+ typename Rhs, bool RhsIsVector>
+struct TriangularProduct;
+
+template<typename _MatrixType, unsigned int _Mode> class TriangularView
+ : public TriangularBase<TriangularView<_MatrixType, _Mode> >
+{
+ public:
+
+ typedef TriangularBase<TriangularView> Base;
+ typedef typename internal::traits<TriangularView>::Scalar Scalar;
+
+ typedef _MatrixType MatrixType;
+ typedef typename internal::traits<TriangularView>::DenseMatrixType DenseMatrixType;
+ typedef DenseMatrixType PlainObject;
+
+ protected:
+ typedef typename internal::traits<TriangularView>::MatrixTypeNested MatrixTypeNested;
+ typedef typename internal::traits<TriangularView>::MatrixTypeNestedNonRef MatrixTypeNestedNonRef;
+ typedef typename internal::traits<TriangularView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned;
+
+ typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;
+
+ public:
+ using Base::evalToLazy;
+
+
+ typedef typename internal::traits<TriangularView>::StorageKind StorageKind;
+ typedef typename internal::traits<TriangularView>::Index Index;
+
+ enum {
+ Mode = _Mode,
+ TransposeMode = (Mode & Upper ? Lower : 0)
+ | (Mode & Lower ? Upper : 0)
+ | (Mode & (UnitDiag))
+ | (Mode & (ZeroDiag))
+ };
+
+ EIGEN_DEVICE_FUNC
+ inline TriangularView(const MatrixType& matrix) : m_matrix(matrix)
+ {}
+
+ EIGEN_DEVICE_FUNC
+ inline Index rows() const { return m_matrix.rows(); }
+ EIGEN_DEVICE_FUNC
+ inline Index cols() const { return m_matrix.cols(); }
+ EIGEN_DEVICE_FUNC
+ inline Index outerStride() const { return m_matrix.outerStride(); }
+ EIGEN_DEVICE_FUNC
+ inline Index innerStride() const { return m_matrix.innerStride(); }
+
+ /** \sa MatrixBase::operator+=() */
+ template<typename Other>
+ EIGEN_DEVICE_FUNC
+ TriangularView& operator+=(const DenseBase<Other>& other) { return *this = m_matrix + other.derived(); }
+ /** \sa MatrixBase::operator-=() */
+ template<typename Other>
+ EIGEN_DEVICE_FUNC
+ TriangularView& operator-=(const DenseBase<Other>& other) { return *this = m_matrix - other.derived(); }
+ /** \sa MatrixBase::operator*=() */
+ EIGEN_DEVICE_FUNC
+ TriangularView& operator*=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = m_matrix * other; }
+ /** \sa MatrixBase::operator/=() */
+ EIGEN_DEVICE_FUNC
+ TriangularView& operator/=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = m_matrix / other; }
+
+ /** \sa MatrixBase::fill() */
+ EIGEN_DEVICE_FUNC
+ void fill(const Scalar& value) { setConstant(value); }
+ /** \sa MatrixBase::setConstant() */
+ EIGEN_DEVICE_FUNC
+ TriangularView& setConstant(const Scalar& value)
+ { return *this = MatrixType::Constant(rows(), cols(), value); }
+ /** \sa MatrixBase::setZero() */
+ EIGEN_DEVICE_FUNC
+ TriangularView& setZero() { return setConstant(Scalar(0)); }
+ /** \sa MatrixBase::setOnes() */
+ EIGEN_DEVICE_FUNC
+ TriangularView& setOnes() { return setConstant(Scalar(1)); }
+
+ /** \sa MatrixBase::coeff()
+ * \warning the coordinates must fit into the referenced triangular part
+ */
+ EIGEN_DEVICE_FUNC
+ inline Scalar coeff(Index row, Index col) const
+ {
+ Base::check_coordinates_internal(row, col);
+ return m_matrix.coeff(row, col);
+ }
+
+ /** \sa MatrixBase::coeffRef()
+ * \warning the coordinates must fit into the referenced triangular part
+ */
+ EIGEN_DEVICE_FUNC
+ inline Scalar& coeffRef(Index row, Index col)
+ {
+ Base::check_coordinates_internal(row, col);
+ return m_matrix.const_cast_derived().coeffRef(row, col);
+ }
+
+ EIGEN_DEVICE_FUNC
+ const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
+ EIGEN_DEVICE_FUNC
+ MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); }
+
+ /** Assigns a triangular matrix to a triangular part of a dense matrix */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ TriangularView& operator=(const TriangularBase<OtherDerived>& other);
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ TriangularView& operator=(const MatrixBase<OtherDerived>& other);
+
+ EIGEN_DEVICE_FUNC
+ TriangularView& operator=(const TriangularView& other)
+ { return *this = other.nestedExpression(); }
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ void lazyAssign(const TriangularBase<OtherDerived>& other);
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ void lazyAssign(const MatrixBase<OtherDerived>& other);
+
+ /** \sa MatrixBase::conjugate() */
+ EIGEN_DEVICE_FUNC
+ inline TriangularView<MatrixConjugateReturnType,Mode> conjugate()
+ { return m_matrix.conjugate(); }
+ /** \sa MatrixBase::conjugate() const */
+ EIGEN_DEVICE_FUNC
+ inline const TriangularView<MatrixConjugateReturnType,Mode> conjugate() const
+ { return m_matrix.conjugate(); }
+
+ /** \sa MatrixBase::adjoint() const */
+ EIGEN_DEVICE_FUNC
+ inline const TriangularView<const typename MatrixType::AdjointReturnType,TransposeMode> adjoint() const
+ { return m_matrix.adjoint(); }
+
+ /** \sa MatrixBase::transpose() */
+ EIGEN_DEVICE_FUNC
+ inline TriangularView<Transpose<MatrixType>,TransposeMode> transpose()
+ {
+ EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
+ return m_matrix.const_cast_derived().transpose();
+ }
+ /** \sa MatrixBase::transpose() const */
+ EIGEN_DEVICE_FUNC
+ inline const TriangularView<Transpose<MatrixType>,TransposeMode> transpose() const
+ {
+ return m_matrix.transpose();
+ }
+
+ /** Efficient triangular matrix times vector/matrix product */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ TriangularProduct<Mode,true,MatrixType,false,OtherDerived, OtherDerived::IsVectorAtCompileTime>
+ operator*(const MatrixBase<OtherDerived>& rhs) const
+ {
+ return TriangularProduct
+ <Mode,true,MatrixType,false,OtherDerived,OtherDerived::IsVectorAtCompileTime>
+ (m_matrix, rhs.derived());
+ }
+
+ /** Efficient vector/matrix times triangular matrix product */
+ template<typename OtherDerived> friend
+ EIGEN_DEVICE_FUNC
+ TriangularProduct<Mode,false,OtherDerived,OtherDerived::IsVectorAtCompileTime,MatrixType,false>
+ operator*(const MatrixBase<OtherDerived>& lhs, const TriangularView& rhs)
+ {
+ return TriangularProduct
+ <Mode,false,OtherDerived,OtherDerived::IsVectorAtCompileTime,MatrixType,false>
+ (lhs.derived(),rhs.m_matrix);
+ }
+
+ #ifdef EIGEN2_SUPPORT
+ template<typename OtherDerived>
+ struct eigen2_product_return_type
+ {
+ typedef typename TriangularView<MatrixType,Mode>::DenseMatrixType DenseMatrixType;
+ typedef typename OtherDerived::PlainObject::DenseType OtherPlainObject;
+ typedef typename ProductReturnType<DenseMatrixType, OtherPlainObject>::Type ProdRetType;
+ typedef typename ProdRetType::PlainObject type;
+ };
+ template<typename OtherDerived>
+ const typename eigen2_product_return_type<OtherDerived>::type
+ operator*(const EigenBase<OtherDerived>& rhs) const
+ {
+ typename OtherDerived::PlainObject::DenseType rhsPlainObject;
+ rhs.evalTo(rhsPlainObject);
+ return this->toDenseMatrix() * rhsPlainObject;
+ }
+ template<typename OtherMatrixType>
+ bool isApprox(const TriangularView<OtherMatrixType, Mode>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
+ {
+ return this->toDenseMatrix().isApprox(other.toDenseMatrix(), precision);
+ }
+ template<typename OtherDerived>
+ bool isApprox(const MatrixBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
+ {
+ return this->toDenseMatrix().isApprox(other, precision);
+ }
+ #endif // EIGEN2_SUPPORT
+
+ template<int Side, typename Other>
+ EIGEN_DEVICE_FUNC
+ inline const internal::triangular_solve_retval<Side,TriangularView, Other>
+ solve(const MatrixBase<Other>& other) const;
+
+ template<int Side, typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ void solveInPlace(const MatrixBase<OtherDerived>& other) const;
+
+ template<typename Other>
+ EIGEN_DEVICE_FUNC
+ inline const internal::triangular_solve_retval<OnTheLeft,TriangularView, Other>
+ solve(const MatrixBase<Other>& other) const
+ { return solve<OnTheLeft>(other); }
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ void solveInPlace(const MatrixBase<OtherDerived>& other) const
+ { return solveInPlace<OnTheLeft>(other); }
+
+ EIGEN_DEVICE_FUNC
+ const SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView() const
+ {
+ EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
+ return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
+ }
+ EIGEN_DEVICE_FUNC
+ SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView()
+ {
+ EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
+ return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
+ }
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ void swap(TriangularBase<OtherDerived> const & other)
+ {
+ TriangularView<SwapWrapper<MatrixType>,Mode>(const_cast<MatrixType&>(m_matrix)).lazyAssign(other.derived());
+ }
+
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ void swap(MatrixBase<OtherDerived> const & other)
+ {
+ SwapWrapper<MatrixType> swaper(const_cast<MatrixType&>(m_matrix));
+ TriangularView<SwapWrapper<MatrixType>,Mode>(swaper).lazyAssign(other.derived());
+ }
+
+ EIGEN_DEVICE_FUNC
+ Scalar determinant() const
+ {
+ if (Mode & UnitDiag)
+ return 1;
+ else if (Mode & ZeroDiag)
+ return 0;
+ else
+ return m_matrix.diagonal().prod();
+ }
+
+ // TODO simplify the following:
+ template<typename ProductDerived, typename Lhs, typename Rhs>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE TriangularView& operator=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
+ {
+ setZero();
+ return assignProduct(other,1);
+ }
+
+ template<typename ProductDerived, typename Lhs, typename Rhs>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE TriangularView& operator+=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
+ {
+ return assignProduct(other,1);
+ }
+
+ template<typename ProductDerived, typename Lhs, typename Rhs>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE TriangularView& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
+ {
+ return assignProduct(other,-1);
+ }
+
+
+ template<typename ProductDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE TriangularView& operator=(const ScaledProduct<ProductDerived>& other)
+ {
+ setZero();
+ return assignProduct(other,other.alpha());
+ }
+
+ template<typename ProductDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE TriangularView& operator+=(const ScaledProduct<ProductDerived>& other)
+ {
+ return assignProduct(other,other.alpha());
+ }
+
+ template<typename ProductDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE TriangularView& operator-=(const ScaledProduct<ProductDerived>& other)
+ {
+ return assignProduct(other,-other.alpha());
+ }
+
+ protected:
+
+ template<typename ProductDerived, typename Lhs, typename Rhs>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE TriangularView& assignProduct(const ProductBase<ProductDerived, Lhs,Rhs>& prod, const Scalar& alpha);
+
+ MatrixTypeNested m_matrix;
+};
+
+/***************************************************************************
+* Implementation of triangular evaluation/assignment
+***************************************************************************/
+
+namespace internal {
+
+template<typename Derived1, typename Derived2, unsigned int Mode, int UnrollCount, bool ClearOpposite>
+struct triangular_assignment_selector
+{
+ enum {
+ col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
+ row = (UnrollCount-1) % Derived1::RowsAtCompileTime
+ };
+
+ typedef typename Derived1::Scalar Scalar;
+
+ EIGEN_DEVICE_FUNC
+ static inline void run(Derived1 &dst, const Derived2 &src)
+ {
+ triangular_assignment_selector<Derived1, Derived2, Mode, UnrollCount-1, ClearOpposite>::run(dst, src);
+
+ eigen_assert( Mode == Upper || Mode == Lower
+ || Mode == StrictlyUpper || Mode == StrictlyLower
+ || Mode == UnitUpper || Mode == UnitLower);
+ if((Mode == Upper && row <= col)
+ || (Mode == Lower && row >= col)
+ || (Mode == StrictlyUpper && row < col)
+ || (Mode == StrictlyLower && row > col)
+ || (Mode == UnitUpper && row < col)
+ || (Mode == UnitLower && row > col))
+ dst.copyCoeff(row, col, src);
+ else if(ClearOpposite)
+ {
+ if (Mode&UnitDiag && row==col)
+ dst.coeffRef(row, col) = Scalar(1);
+ else
+ dst.coeffRef(row, col) = Scalar(0);
+ }
+ }
+};
+
+// prevent buggy user code from causing an infinite recursion
+template<typename Derived1, typename Derived2, unsigned int Mode, bool ClearOpposite>
+struct triangular_assignment_selector<Derived1, Derived2, Mode, 0, ClearOpposite>
+{
+ EIGEN_DEVICE_FUNC
+ static inline void run(Derived1 &, const Derived2 &) {}
+};
+
+template<typename Derived1, typename Derived2, bool ClearOpposite>
+struct triangular_assignment_selector<Derived1, Derived2, Upper, Dynamic, ClearOpposite>
+{
+ typedef typename Derived1::Index Index;
+ typedef typename Derived1::Scalar Scalar;
+ EIGEN_DEVICE_FUNC
+ static inline void run(Derived1 &dst, const Derived2 &src)
+ {
+ for(Index j = 0; j < dst.cols(); ++j)
+ {
+ Index maxi = (std::min)(j, dst.rows()-1);
+ for(Index i = 0; i <= maxi; ++i)
+ dst.copyCoeff(i, j, src);
+ if (ClearOpposite)
+ for(Index i = maxi+1; i < dst.rows(); ++i)
+ dst.coeffRef(i, j) = Scalar(0);
+ }
+ }
+};
+
+template<typename Derived1, typename Derived2, bool ClearOpposite>
+struct triangular_assignment_selector<Derived1, Derived2, Lower, Dynamic, ClearOpposite>
+{
+ typedef typename Derived1::Index Index;
+ EIGEN_DEVICE_FUNC
+ static inline void run(Derived1 &dst, const Derived2 &src)
+ {
+ for(Index j = 0; j < dst.cols(); ++j)
+ {
+ for(Index i = j; i < dst.rows(); ++i)
+ dst.copyCoeff(i, j, src);
+ Index maxi = (std::min)(j, dst.rows());
+ if (ClearOpposite)
+ for(Index i = 0; i < maxi; ++i)
+ dst.coeffRef(i, j) = static_cast<typename Derived1::Scalar>(0);
+ }
+ }
+};
+
+template<typename Derived1, typename Derived2, bool ClearOpposite>
+struct triangular_assignment_selector<Derived1, Derived2, StrictlyUpper, Dynamic, ClearOpposite>
+{
+ typedef typename Derived1::Index Index;
+ typedef typename Derived1::Scalar Scalar;
+ EIGEN_DEVICE_FUNC
+ static inline void run(Derived1 &dst, const Derived2 &src)
+ {
+ for(Index j = 0; j < dst.cols(); ++j)
+ {
+ Index maxi = (std::min)(j, dst.rows());
+ for(Index i = 0; i < maxi; ++i)
+ dst.copyCoeff(i, j, src);
+ if (ClearOpposite)
+ for(Index i = maxi; i < dst.rows(); ++i)
+ dst.coeffRef(i, j) = Scalar(0);
+ }
+ }
+};
+
+template<typename Derived1, typename Derived2, bool ClearOpposite>
+struct triangular_assignment_selector<Derived1, Derived2, StrictlyLower, Dynamic, ClearOpposite>
+{
+ typedef typename Derived1::Index Index;
+ EIGEN_DEVICE_FUNC
+ static inline void run(Derived1 &dst, const Derived2 &src)
+ {
+ for(Index j = 0; j < dst.cols(); ++j)
+ {
+ for(Index i = j+1; i < dst.rows(); ++i)
+ dst.copyCoeff(i, j, src);
+ Index maxi = (std::min)(j, dst.rows()-1);
+ if (ClearOpposite)
+ for(Index i = 0; i <= maxi; ++i)
+ dst.coeffRef(i, j) = static_cast<typename Derived1::Scalar>(0);
+ }
+ }
+};
+
+template<typename Derived1, typename Derived2, bool ClearOpposite>
+struct triangular_assignment_selector<Derived1, Derived2, UnitUpper, Dynamic, ClearOpposite>
+{
+ typedef typename Derived1::Index Index;
+ EIGEN_DEVICE_FUNC
+ static inline void run(Derived1 &dst, const Derived2 &src)
+ {
+ for(Index j = 0; j < dst.cols(); ++j)
+ {
+ Index maxi = (std::min)(j, dst.rows());
+ for(Index i = 0; i < maxi; ++i)
+ dst.copyCoeff(i, j, src);
+ if (ClearOpposite)
+ {
+ for(Index i = maxi+1; i < dst.rows(); ++i)
+ dst.coeffRef(i, j) = 0;
+ }
+ }
+ dst.diagonal().setOnes();
+ }
+};
+template<typename Derived1, typename Derived2, bool ClearOpposite>
+struct triangular_assignment_selector<Derived1, Derived2, UnitLower, Dynamic, ClearOpposite>
+{
+ typedef typename Derived1::Index Index;
+ EIGEN_DEVICE_FUNC
+ static inline void run(Derived1 &dst, const Derived2 &src)
+ {
+ for(Index j = 0; j < dst.cols(); ++j)
+ {
+ Index maxi = (std::min)(j, dst.rows());
+ for(Index i = maxi+1; i < dst.rows(); ++i)
+ dst.copyCoeff(i, j, src);
+ if (ClearOpposite)
+ {
+ for(Index i = 0; i < maxi; ++i)
+ dst.coeffRef(i, j) = 0;
+ }
+ }
+ dst.diagonal().setOnes();
+ }
+};
+
+} // end namespace internal
+
+// FIXME should we keep that possibility
+template<typename MatrixType, unsigned int Mode>
+template<typename OtherDerived>
+inline TriangularView<MatrixType, Mode>&
+TriangularView<MatrixType, Mode>::operator=(const MatrixBase<OtherDerived>& other)
+{
+ if(OtherDerived::Flags & EvalBeforeAssigningBit)
+ {
+ typename internal::plain_matrix_type<OtherDerived>::type other_evaluated(other.rows(), other.cols());
+ other_evaluated.template triangularView<Mode>().lazyAssign(other.derived());
+ lazyAssign(other_evaluated);
+ }
+ else
+ lazyAssign(other.derived());
+ return *this;
+}
+
+// FIXME should we keep that possibility
+template<typename MatrixType, unsigned int Mode>
+template<typename OtherDerived>
+void TriangularView<MatrixType, Mode>::lazyAssign(const MatrixBase<OtherDerived>& other)
+{
+ enum {
+ unroll = MatrixType::SizeAtCompileTime != Dynamic
+ && internal::traits<OtherDerived>::CoeffReadCost != Dynamic
+ && MatrixType::SizeAtCompileTime*internal::traits<OtherDerived>::CoeffReadCost/2 <= EIGEN_UNROLLING_LIMIT
+ };
+ eigen_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols());
+
+ internal::triangular_assignment_selector
+ <MatrixType, OtherDerived, int(Mode),
+ unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic,
+ false // do not change the opposite triangular part
+ >::run(m_matrix.const_cast_derived(), other.derived());
+}
+
+
+
+template<typename MatrixType, unsigned int Mode>
+template<typename OtherDerived>
+inline TriangularView<MatrixType, Mode>&
+TriangularView<MatrixType, Mode>::operator=(const TriangularBase<OtherDerived>& other)
+{
+ eigen_assert(Mode == int(OtherDerived::Mode));
+ if(internal::traits<OtherDerived>::Flags & EvalBeforeAssigningBit)
+ {
+ typename OtherDerived::DenseMatrixType other_evaluated(other.rows(), other.cols());
+ other_evaluated.template triangularView<Mode>().lazyAssign(other.derived().nestedExpression());
+ lazyAssign(other_evaluated);
+ }
+ else
+ lazyAssign(other.derived().nestedExpression());
+ return *this;
+}
+
+template<typename MatrixType, unsigned int Mode>
+template<typename OtherDerived>
+void TriangularView<MatrixType, Mode>::lazyAssign(const TriangularBase<OtherDerived>& other)
+{
+ enum {
+ unroll = MatrixType::SizeAtCompileTime != Dynamic
+ && internal::traits<OtherDerived>::CoeffReadCost != Dynamic
+ && MatrixType::SizeAtCompileTime * internal::traits<OtherDerived>::CoeffReadCost / 2
+ <= EIGEN_UNROLLING_LIMIT
+ };
+ eigen_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols());
+
+ internal::triangular_assignment_selector
+ <MatrixType, OtherDerived, int(Mode),
+ unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic,
+ false // preserve the opposite triangular part
+ >::run(m_matrix.const_cast_derived(), other.derived().nestedExpression());
+}
+
+/***************************************************************************
+* Implementation of TriangularBase methods
+***************************************************************************/
+
+/** Assigns a triangular or selfadjoint matrix to a dense matrix.
+ * If the matrix is triangular, the opposite part is set to zero. */
+template<typename Derived>
+template<typename DenseDerived>
+void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
+{
+ if(internal::traits<Derived>::Flags & EvalBeforeAssigningBit)
+ {
+ typename internal::plain_matrix_type<Derived>::type other_evaluated(rows(), cols());
+ evalToLazy(other_evaluated);
+ other.derived().swap(other_evaluated);
+ }
+ else
+ evalToLazy(other.derived());
+}
+
+/** Assigns a triangular or selfadjoint matrix to a dense matrix.
+ * If the matrix is triangular, the opposite part is set to zero. */
+template<typename Derived>
+template<typename DenseDerived>
+void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const
+{
+ enum {
+ unroll = DenseDerived::SizeAtCompileTime != Dynamic
+ && internal::traits<Derived>::CoeffReadCost != Dynamic
+ && DenseDerived::SizeAtCompileTime * internal::traits<Derived>::CoeffReadCost / 2
+ <= EIGEN_UNROLLING_LIMIT
+ };
+ other.derived().resize(this->rows(), this->cols());
+
+ internal::triangular_assignment_selector
+ <DenseDerived, typename internal::traits<Derived>::MatrixTypeNestedCleaned, Derived::Mode,
+ unroll ? int(DenseDerived::SizeAtCompileTime) : Dynamic,
+ true // clear the opposite triangular part
+ >::run(other.derived(), derived().nestedExpression());
+}
+
+/***************************************************************************
+* Implementation of TriangularView methods
+***************************************************************************/
+
+/***************************************************************************
+* Implementation of MatrixBase methods
+***************************************************************************/
+
+#ifdef EIGEN2_SUPPORT
+
+// implementation of part<>(), including the SelfAdjoint case.
+
+namespace internal {
+template<typename MatrixType, unsigned int Mode>
+struct eigen2_part_return_type
+{
+ typedef TriangularView<MatrixType, Mode> type;
+};
+
+template<typename MatrixType>
+struct eigen2_part_return_type<MatrixType, SelfAdjoint>
+{
+ typedef SelfAdjointView<MatrixType, Upper> type;
+};
+}
+
+/** \deprecated use MatrixBase::triangularView() */
+template<typename Derived>
+template<unsigned int Mode>
+const typename internal::eigen2_part_return_type<Derived, Mode>::type MatrixBase<Derived>::part() const
+{
+ return derived();
+}
+
+/** \deprecated use MatrixBase::triangularView() */
+template<typename Derived>
+template<unsigned int Mode>
+typename internal::eigen2_part_return_type<Derived, Mode>::type MatrixBase<Derived>::part()
+{
+ return derived();
+}
+#endif
+
+/**
+ * \returns an expression of a triangular view extracted from the current matrix
+ *
+ * The parameter \a Mode can have the following values: \c #Upper, \c #StrictlyUpper, \c #UnitUpper,
+ * \c #Lower, \c #StrictlyLower, \c #UnitLower.
+ *
+ * Example: \include MatrixBase_extract.cpp
+ * Output: \verbinclude MatrixBase_extract.out
+ *
+ * \sa class TriangularView
+ */
+template<typename Derived>
+template<unsigned int Mode>
+typename MatrixBase<Derived>::template TriangularViewReturnType<Mode>::Type
+MatrixBase<Derived>::triangularView()
+{
+ return derived();
+}
+
+/** This is the const version of MatrixBase::triangularView() */
+template<typename Derived>
+template<unsigned int Mode>
+typename MatrixBase<Derived>::template ConstTriangularViewReturnType<Mode>::Type
+MatrixBase<Derived>::triangularView() const
+{
+ return derived();
+}
+
+/** \returns true if *this is approximately equal to an upper triangular matrix,
+ * within the precision given by \a prec.
+ *
+ * \sa isLowerTriangular()
+ */
+template<typename Derived>
+bool MatrixBase<Derived>::isUpperTriangular(const RealScalar& prec) const
+{
+ using std::abs;
+ RealScalar maxAbsOnUpperPart = static_cast<RealScalar>(-1);
+ for(Index j = 0; j < cols(); ++j)
+ {
+ Index maxi = (std::min)(j, rows()-1);
+ for(Index i = 0; i <= maxi; ++i)
+ {
+ RealScalar absValue = abs(coeff(i,j));
+ if(absValue > maxAbsOnUpperPart) maxAbsOnUpperPart = absValue;
+ }
+ }
+ RealScalar threshold = maxAbsOnUpperPart * prec;
+ for(Index j = 0; j < cols(); ++j)
+ for(Index i = j+1; i < rows(); ++i)
+ if(abs(coeff(i, j)) > threshold) return false;
+ return true;
+}
+
+/** \returns true if *this is approximately equal to a lower triangular matrix,
+ * within the precision given by \a prec.
+ *
+ * \sa isUpperTriangular()
+ */
+template<typename Derived>
+bool MatrixBase<Derived>::isLowerTriangular(const RealScalar& prec) const
+{
+ using std::abs;
+ RealScalar maxAbsOnLowerPart = static_cast<RealScalar>(-1);
+ for(Index j = 0; j < cols(); ++j)
+ for(Index i = j; i < rows(); ++i)
+ {
+ RealScalar absValue = abs(coeff(i,j));
+ if(absValue > maxAbsOnLowerPart) maxAbsOnLowerPart = absValue;
+ }
+ RealScalar threshold = maxAbsOnLowerPart * prec;
+ for(Index j = 1; j < cols(); ++j)
+ {
+ Index maxi = (std::min)(j, rows()-1);
+ for(Index i = 0; i < maxi; ++i)
+ if(abs(coeff(i, j)) > threshold) return false;
+ }
+ return true;
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRIANGULARMATRIX_H
diff --git a/third_party/eigen3/Eigen/src/Core/VectorBlock.h b/third_party/eigen3/Eigen/src/Core/VectorBlock.h
new file mode 100644
index 0000000000..216c568c4f
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/VectorBlock.h
@@ -0,0 +1,97 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_VECTORBLOCK_H
+#define EIGEN_VECTORBLOCK_H
+
+namespace Eigen {
+
+/** \class VectorBlock
+ * \ingroup Core_Module
+ *
+ * \brief Expression of a fixed-size or dynamic-size sub-vector
+ *
+ * \param VectorType the type of the object in which we are taking a sub-vector
+ * \param Size size of the sub-vector we are taking at compile time (optional)
+ *
+ * This class represents an expression of either a fixed-size or dynamic-size sub-vector.
+ * It is the return type of DenseBase::segment(Index,Index) and DenseBase::segment<int>(Index) and
+ * most of the time this is the only way it is used.
+ *
+ * However, if you want to directly maniputate sub-vector expressions,
+ * for instance if you want to write a function returning such an expression, you
+ * will need to use this class.
+ *
+ * Here is an example illustrating the dynamic case:
+ * \include class_VectorBlock.cpp
+ * Output: \verbinclude class_VectorBlock.out
+ *
+ * \note Even though this expression has dynamic size, in the case where \a VectorType
+ * has fixed size, this expression inherits a fixed maximal size which means that evaluating
+ * it does not cause a dynamic memory allocation.
+ *
+ * Here is an example illustrating the fixed-size case:
+ * \include class_FixedVectorBlock.cpp
+ * Output: \verbinclude class_FixedVectorBlock.out
+ *
+ * \sa class Block, DenseBase::segment(Index,Index,Index,Index), DenseBase::segment(Index,Index)
+ */
+
+namespace internal {
+template<typename VectorType, int Size>
+struct traits<VectorBlock<VectorType, Size> >
+ : public traits<Block<VectorType,
+ traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
+ traits<VectorType>::Flags & RowMajorBit ? Size : 1> >
+{
+};
+}
+
+template<typename VectorType, int Size> class VectorBlock
+ : public Block<VectorType,
+ internal::traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
+ internal::traits<VectorType>::Flags & RowMajorBit ? Size : 1>
+{
+ typedef Block<VectorType,
+ internal::traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
+ internal::traits<VectorType>::Flags & RowMajorBit ? Size : 1> Base;
+ enum {
+ IsColVector = !(internal::traits<VectorType>::Flags & RowMajorBit)
+ };
+ public:
+ EIGEN_DENSE_PUBLIC_INTERFACE(VectorBlock)
+
+ using Base::operator=;
+
+ /** Dynamic-size constructor
+ */
+ EIGEN_DEVICE_FUNC
+ inline VectorBlock(VectorType& vector, Index start, Index size)
+ : Base(vector,
+ IsColVector ? start : 0, IsColVector ? 0 : start,
+ IsColVector ? size : 1, IsColVector ? 1 : size)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock);
+ }
+
+ /** Fixed-size constructor
+ */
+ EIGEN_DEVICE_FUNC
+ inline VectorBlock(VectorType& vector, Index start)
+ : Base(vector, IsColVector ? start : 0, IsColVector ? 0 : start)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock);
+ }
+};
+
+
+} // end namespace Eigen
+
+#endif // EIGEN_VECTORBLOCK_H
diff --git a/third_party/eigen3/Eigen/src/Core/VectorwiseOp.h b/third_party/eigen3/Eigen/src/Core/VectorwiseOp.h
new file mode 100644
index 0000000000..f25ddca174
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/VectorwiseOp.h
@@ -0,0 +1,651 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PARTIAL_REDUX_H
+#define EIGEN_PARTIAL_REDUX_H
+
+namespace Eigen {
+
+/** \class PartialReduxExpr
+ * \ingroup Core_Module
+ *
+ * \brief Generic expression of a partially reduxed matrix
+ *
+ * \tparam MatrixType the type of the matrix we are applying the redux operation
+ * \tparam MemberOp type of the member functor
+ * \tparam Direction indicates the direction of the redux (#Vertical or #Horizontal)
+ *
+ * This class represents an expression of a partial redux operator of a matrix.
+ * It is the return type of some VectorwiseOp functions,
+ * and most of the time this is the only way it is used.
+ *
+ * \sa class VectorwiseOp
+ */
+
+template< typename MatrixType, typename MemberOp, int Direction>
+class PartialReduxExpr;
+
+namespace internal {
+template<typename MatrixType, typename MemberOp, int Direction>
+struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
+ : traits<MatrixType>
+{
+ typedef typename MemberOp::result_type Scalar;
+ typedef typename traits<MatrixType>::StorageKind StorageKind;
+ typedef typename traits<MatrixType>::XprKind XprKind;
+ typedef typename MatrixType::Scalar InputScalar;
+ typedef typename nested<MatrixType>::type MatrixTypeNested;
+ typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
+ enum {
+ RowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::RowsAtCompileTime,
+ ColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::ColsAtCompileTime,
+ MaxRowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::MaxColsAtCompileTime,
+ Flags0 = (unsigned int)_MatrixTypeNested::Flags & HereditaryBits,
+ Flags = (Flags0 & ~RowMajorBit) | (RowsAtCompileTime == 1 ? RowMajorBit : 0),
+ TraversalSize = Direction==Vertical ? MatrixType::RowsAtCompileTime : MatrixType::ColsAtCompileTime
+ };
+ #if EIGEN_GNUC_AT_LEAST(3,4)
+ typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType;
+ #else
+ typedef typename MemberOp::template Cost<InputScalar,TraversalSize> CostOpType;
+ #endif
+ enum {
+ CoeffReadCost = TraversalSize==Dynamic ? Dynamic
+ : TraversalSize * traits<_MatrixTypeNested>::CoeffReadCost + int(CostOpType::value)
+ };
+};
+}
+
+template< typename MatrixType, typename MemberOp, int Direction>
+class PartialReduxExpr : internal::no_assignment_operator,
+ public internal::dense_xpr_base< PartialReduxExpr<MatrixType, MemberOp, Direction> >::type
+{
+ public:
+
+ typedef typename internal::dense_xpr_base<PartialReduxExpr>::type Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(PartialReduxExpr)
+ typedef typename internal::traits<PartialReduxExpr>::MatrixTypeNested MatrixTypeNested;
+ typedef typename internal::traits<PartialReduxExpr>::_MatrixTypeNested _MatrixTypeNested;
+
+ PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp())
+ : m_matrix(mat), m_functor(func) {}
+
+ Index rows() const { return (Direction==Vertical ? 1 : m_matrix.rows()); }
+ Index cols() const { return (Direction==Horizontal ? 1 : m_matrix.cols()); }
+
+ EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const
+ {
+ if (Direction==Vertical)
+ return m_functor(m_matrix.col(j));
+ else
+ return m_functor(m_matrix.row(i));
+ }
+
+ const Scalar coeff(Index index) const
+ {
+ if (Direction==Vertical)
+ return m_functor(m_matrix.col(index));
+ else
+ return m_functor(m_matrix.row(index));
+ }
+
+ protected:
+ MatrixTypeNested m_matrix;
+ const MemberOp m_functor;
+};
+
+#define EIGEN_MEMBER_FUNCTOR(MEMBER,COST) \
+ template <typename ResultType> \
+ struct member_##MEMBER { \
+ EIGEN_EMPTY_STRUCT_CTOR(member_##MEMBER) \
+ typedef ResultType result_type; \
+ template<typename Scalar, int Size> struct Cost \
+ { enum { value = COST }; }; \
+ template<typename XprType> \
+ EIGEN_STRONG_INLINE ResultType operator()(const XprType& mat) const \
+ { return mat.MEMBER(); } \
+ }
+
+namespace internal {
+
+EIGEN_MEMBER_FUNCTOR(squaredNorm, Size * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(norm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(stableNorm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(blueNorm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(hypotNorm, (Size-1) * functor_traits<scalar_hypot_op<Scalar> >::Cost );
+EIGEN_MEMBER_FUNCTOR(sum, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(mean, (Size-1)*NumTraits<Scalar>::AddCost + NumTraits<Scalar>::MulCost);
+EIGEN_MEMBER_FUNCTOR(minCoeff, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(maxCoeff, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(all, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(any, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(count, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(prod, (Size-1)*NumTraits<Scalar>::MulCost);
+
+
+template <typename BinaryOp, typename Scalar>
+struct member_redux {
+ typedef typename result_of<
+ BinaryOp(Scalar)
+ >::type result_type;
+ template<typename _Scalar, int Size> struct Cost
+ { enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; };
+ member_redux(const BinaryOp func) : m_functor(func) {}
+ template<typename Derived>
+ inline result_type operator()(const DenseBase<Derived>& mat) const
+ { return mat.redux(m_functor); }
+ const BinaryOp m_functor;
+};
+}
+
+/** \class VectorwiseOp
+ * \ingroup Core_Module
+ *
+ * \brief Pseudo expression providing partial reduction operations
+ *
+ * \param ExpressionType the type of the object on which to do partial reductions
+ * \param Direction indicates the direction of the redux (#Vertical or #Horizontal)
+ *
+ * This class represents a pseudo expression with partial reduction features.
+ * It is the return type of DenseBase::colwise() and DenseBase::rowwise()
+ * and most of the time this is the only way it is used.
+ *
+ * Example: \include MatrixBase_colwise.cpp
+ * Output: \verbinclude MatrixBase_colwise.out
+ *
+ * \sa DenseBase::colwise(), DenseBase::rowwise(), class PartialReduxExpr
+ */
+template<typename ExpressionType, int Direction> class VectorwiseOp
+{
+ public:
+
+ typedef typename ExpressionType::Scalar Scalar;
+ typedef typename ExpressionType::RealScalar RealScalar;
+ typedef typename ExpressionType::Index Index;
+ typedef typename internal::conditional<internal::must_nest_by_value<ExpressionType>::ret,
+ ExpressionType, ExpressionType&>::type ExpressionTypeNested;
+ typedef typename internal::remove_all<ExpressionTypeNested>::type ExpressionTypeNestedCleaned;
+
+ template<template<typename _Scalar> class Functor,
+ typename Scalar=typename internal::traits<ExpressionType>::Scalar> struct ReturnType
+ {
+ typedef PartialReduxExpr<ExpressionType,
+ Functor<Scalar>,
+ Direction
+ > Type;
+ };
+
+ template<typename BinaryOp> struct ReduxReturnType
+ {
+ typedef PartialReduxExpr<ExpressionType,
+ internal::member_redux<BinaryOp,typename internal::traits<ExpressionType>::Scalar>,
+ Direction
+ > Type;
+ };
+
+ enum {
+ IsVertical = (Direction==Vertical) ? 1 : 0,
+ IsHorizontal = (Direction==Horizontal) ? 1 : 0
+ };
+
+ protected:
+
+ /** \internal
+ * \returns the i-th subvector according to the \c Direction */
+ typedef typename internal::conditional<Direction==Vertical,
+ typename ExpressionType::ColXpr,
+ typename ExpressionType::RowXpr>::type SubVector;
+ SubVector subVector(Index i)
+ {
+ return SubVector(m_matrix.derived(),i);
+ }
+
+ /** \internal
+ * \returns the number of subvectors in the direction \c Direction */
+ Index subVectors() const
+ { return Direction==Vertical?m_matrix.cols():m_matrix.rows(); }
+
+ template<typename OtherDerived> struct ExtendedType {
+ typedef Replicate<OtherDerived,
+ Direction==Vertical ? 1 : ExpressionType::RowsAtCompileTime,
+ Direction==Horizontal ? 1 : ExpressionType::ColsAtCompileTime> Type;
+ };
+
+ /** \internal
+ * Replicates a vector to match the size of \c *this */
+ template<typename OtherDerived>
+ typename ExtendedType<OtherDerived>::Type
+ extendedTo(const DenseBase<OtherDerived>& other) const
+ {
+ EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Vertical, OtherDerived::MaxColsAtCompileTime==1),
+ YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
+ EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Horizontal, OtherDerived::MaxRowsAtCompileTime==1),
+ YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
+ return typename ExtendedType<OtherDerived>::Type
+ (other.derived(),
+ Direction==Vertical ? 1 : m_matrix.rows(),
+ Direction==Horizontal ? 1 : m_matrix.cols());
+ }
+
+ template<typename OtherDerived> struct OppositeExtendedType {
+ typedef Replicate<OtherDerived,
+ Direction==Horizontal ? 1 : ExpressionType::RowsAtCompileTime,
+ Direction==Vertical ? 1 : ExpressionType::ColsAtCompileTime> Type;
+ };
+
+ /** \internal
+ * Replicates a vector in the opposite direction to match the size of \c *this */
+ template<typename OtherDerived>
+ typename OppositeExtendedType<OtherDerived>::Type
+ extendedToOpposite(const DenseBase<OtherDerived>& other) const
+ {
+ EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Horizontal, OtherDerived::MaxColsAtCompileTime==1),
+ YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
+ EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Vertical, OtherDerived::MaxRowsAtCompileTime==1),
+ YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
+ return typename OppositeExtendedType<OtherDerived>::Type
+ (other.derived(),
+ Direction==Horizontal ? 1 : m_matrix.rows(),
+ Direction==Vertical ? 1 : m_matrix.cols());
+ }
+
+ public:
+
+ inline VectorwiseOp(ExpressionType& matrix) : m_matrix(matrix) {}
+
+ /** \internal */
+ inline const ExpressionType& _expression() const { return m_matrix; }
+
+ /** \returns a row or column vector expression of \c *this reduxed by \a func
+ *
+ * The template parameter \a BinaryOp is the type of the functor
+ * of the custom redux operator. Note that func must be an associative operator.
+ *
+ * \sa class VectorwiseOp, DenseBase::colwise(), DenseBase::rowwise()
+ */
+ template<typename BinaryOp>
+ const typename ReduxReturnType<BinaryOp>::Type
+ redux(const BinaryOp& func = BinaryOp()) const
+ { return typename ReduxReturnType<BinaryOp>::Type(_expression(), func); }
+
+ /** \returns a row (or column) vector expression of the smallest coefficient
+ * of each column (or row) of the referenced expression.
+ *
+ * \warning the result is undefined if \c *this contains NaN.
+ *
+ * Example: \include PartialRedux_minCoeff.cpp
+ * Output: \verbinclude PartialRedux_minCoeff.out
+ *
+ * \sa DenseBase::minCoeff() */
+ const typename ReturnType<internal::member_minCoeff>::Type minCoeff() const
+ { return _expression(); }
+
+ /** \returns a row (or column) vector expression of the largest coefficient
+ * of each column (or row) of the referenced expression.
+ *
+ * \warning the result is undefined if \c *this contains NaN.
+ *
+ * Example: \include PartialRedux_maxCoeff.cpp
+ * Output: \verbinclude PartialRedux_maxCoeff.out
+ *
+ * \sa DenseBase::maxCoeff() */
+ const typename ReturnType<internal::member_maxCoeff>::Type maxCoeff() const
+ { return _expression(); }
+
+ /** \returns a row (or column) vector expression of the squared norm
+ * of each column (or row) of the referenced expression.
+ * This is a vector with real entries, even if the original matrix has complex entries.
+ *
+ * Example: \include PartialRedux_squaredNorm.cpp
+ * Output: \verbinclude PartialRedux_squaredNorm.out
+ *
+ * \sa DenseBase::squaredNorm() */
+ const typename ReturnType<internal::member_squaredNorm,RealScalar>::Type squaredNorm() const
+ { return _expression(); }
+
+ /** \returns a row (or column) vector expression of the norm
+ * of each column (or row) of the referenced expression.
+ * This is a vector with real entries, even if the original matrix has complex entries.
+ *
+ * Example: \include PartialRedux_norm.cpp
+ * Output: \verbinclude PartialRedux_norm.out
+ *
+ * \sa DenseBase::norm() */
+ const typename ReturnType<internal::member_norm,RealScalar>::Type norm() const
+ { return _expression(); }
+
+
+ /** \returns a row (or column) vector expression of the norm
+ * of each column (or row) of the referenced expression, using
+ * Blue's algorithm.
+ * This is a vector with real entries, even if the original matrix has complex entries.
+ *
+ * \sa DenseBase::blueNorm() */
+ const typename ReturnType<internal::member_blueNorm,RealScalar>::Type blueNorm() const
+ { return _expression(); }
+
+
+ /** \returns a row (or column) vector expression of the norm
+ * of each column (or row) of the referenced expression, avoiding
+ * underflow and overflow.
+ * This is a vector with real entries, even if the original matrix has complex entries.
+ *
+ * \sa DenseBase::stableNorm() */
+ const typename ReturnType<internal::member_stableNorm,RealScalar>::Type stableNorm() const
+ { return _expression(); }
+
+
+ /** \returns a row (or column) vector expression of the norm
+ * of each column (or row) of the referenced expression, avoiding
+ * underflow and overflow using a concatenation of hypot() calls.
+ * This is a vector with real entries, even if the original matrix has complex entries.
+ *
+ * \sa DenseBase::hypotNorm() */
+ const typename ReturnType<internal::member_hypotNorm,RealScalar>::Type hypotNorm() const
+ { return _expression(); }
+
+ /** \returns a row (or column) vector expression of the sum
+ * of each column (or row) of the referenced expression.
+ *
+ * Example: \include PartialRedux_sum.cpp
+ * Output: \verbinclude PartialRedux_sum.out
+ *
+ * \sa DenseBase::sum() */
+ const typename ReturnType<internal::member_sum>::Type sum() const
+ { return _expression(); }
+
+ /** \returns a row (or column) vector expression of the mean
+ * of each column (or row) of the referenced expression.
+ *
+ * \sa DenseBase::mean() */
+ const typename ReturnType<internal::member_mean>::Type mean() const
+ { return _expression(); }
+
+ /** \returns a row (or column) vector expression representing
+ * whether \b all coefficients of each respective column (or row) are \c true.
+ * This expression can be assigned to a vector with entries of type \c bool.
+ *
+ * \sa DenseBase::all() */
+ const typename ReturnType<internal::member_all>::Type all() const
+ { return _expression(); }
+
+ /** \returns a row (or column) vector expression representing
+ * whether \b at \b least one coefficient of each respective column (or row) is \c true.
+ * This expression can be assigned to a vector with entries of type \c bool.
+ *
+ * \sa DenseBase::any() */
+ const typename ReturnType<internal::member_any>::Type any() const
+ { return _expression(); }
+
+ /** \returns a row (or column) vector expression representing
+ * the number of \c true coefficients of each respective column (or row).
+ * This expression can be assigned to a vector whose entries have the same type as is used to
+ * index entries of the original matrix; for dense matrices, this is \c std::ptrdiff_t .
+ *
+ * Example: \include PartialRedux_count.cpp
+ * Output: \verbinclude PartialRedux_count.out
+ *
+ * \sa DenseBase::count() */
+ const PartialReduxExpr<ExpressionType, internal::member_count<Index>, Direction> count() const
+ { return _expression(); }
+
+ /** \returns a row (or column) vector expression of the product
+ * of each column (or row) of the referenced expression.
+ *
+ * Example: \include PartialRedux_prod.cpp
+ * Output: \verbinclude PartialRedux_prod.out
+ *
+ * \sa DenseBase::prod() */
+ const typename ReturnType<internal::member_prod>::Type prod() const
+ { return _expression(); }
+
+
+ /** \returns a matrix expression
+ * where each column (or row) are reversed.
+ *
+ * Example: \include Vectorwise_reverse.cpp
+ * Output: \verbinclude Vectorwise_reverse.out
+ *
+ * \sa DenseBase::reverse() */
+ const Reverse<ExpressionType, Direction> reverse() const
+ { return Reverse<ExpressionType, Direction>( _expression() ); }
+
+ typedef Replicate<ExpressionType,Direction==Vertical?Dynamic:1,Direction==Horizontal?Dynamic:1> ReplicateReturnType;
+ const ReplicateReturnType replicate(Index factor) const;
+
+ /**
+ * \return an expression of the replication of each column (or row) of \c *this
+ *
+ * Example: \include DirectionWise_replicate.cpp
+ * Output: \verbinclude DirectionWise_replicate.out
+ *
+ * \sa VectorwiseOp::replicate(Index), DenseBase::replicate(), class Replicate
+ */
+ // NOTE implemented here because of sunstudio's compilation errors
+ template<int Factor> const Replicate<ExpressionType,(IsVertical?Factor:1),(IsHorizontal?Factor:1)>
+ replicate(Index factor = Factor) const
+ {
+ return Replicate<ExpressionType,Direction==Vertical?Factor:1,Direction==Horizontal?Factor:1>
+ (_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1);
+ }
+
+/////////// Artithmetic operators ///////////
+
+ /** Copies the vector \a other to each subvector of \c *this */
+ template<typename OtherDerived>
+ ExpressionType& operator=(const DenseBase<OtherDerived>& other)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ //eigen_assert((m_matrix.isNull()) == (other.isNull())); FIXME
+ return const_cast<ExpressionType&>(m_matrix = extendedTo(other.derived()));
+ }
+
+ /** Adds the vector \a other to each subvector of \c *this */
+ template<typename OtherDerived>
+ ExpressionType& operator+=(const DenseBase<OtherDerived>& other)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ return const_cast<ExpressionType&>(m_matrix += extendedTo(other.derived()));
+ }
+
+ /** Substracts the vector \a other to each subvector of \c *this */
+ template<typename OtherDerived>
+ ExpressionType& operator-=(const DenseBase<OtherDerived>& other)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ return const_cast<ExpressionType&>(m_matrix -= extendedTo(other.derived()));
+ }
+
+ /** Multiples each subvector of \c *this by the vector \a other */
+ template<typename OtherDerived>
+ ExpressionType& operator*=(const DenseBase<OtherDerived>& other)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ m_matrix *= extendedTo(other.derived());
+ return const_cast<ExpressionType&>(m_matrix);
+ }
+
+ /** Divides each subvector of \c *this by the vector \a other */
+ template<typename OtherDerived>
+ ExpressionType& operator/=(const DenseBase<OtherDerived>& other)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ m_matrix /= extendedTo(other.derived());
+ return const_cast<ExpressionType&>(m_matrix);
+ }
+
+ /** Returns the expression of the sum of the vector \a other to each subvector of \c *this */
+ template<typename OtherDerived> EIGEN_STRONG_INLINE
+ CwiseBinaryOp<internal::scalar_sum_op<Scalar>,
+ const ExpressionTypeNestedCleaned,
+ const typename ExtendedType<OtherDerived>::Type>
+ operator+(const DenseBase<OtherDerived>& other) const
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ return m_matrix + extendedTo(other.derived());
+ }
+
+ /** Returns the expression of the difference between each subvector of \c *this and the vector \a other */
+ template<typename OtherDerived>
+ CwiseBinaryOp<internal::scalar_difference_op<Scalar>,
+ const ExpressionTypeNestedCleaned,
+ const typename ExtendedType<OtherDerived>::Type>
+ operator-(const DenseBase<OtherDerived>& other) const
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ return m_matrix - extendedTo(other.derived());
+ }
+
+ /** Returns the expression where each subvector is the product of the vector \a other
+ * by the corresponding subvector of \c *this */
+ template<typename OtherDerived> EIGEN_STRONG_INLINE
+ CwiseBinaryOp<internal::scalar_product_op<Scalar>,
+ const ExpressionTypeNestedCleaned,
+ const typename ExtendedType<OtherDerived>::Type>
+ operator*(const DenseBase<OtherDerived>& other) const
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ return m_matrix * extendedTo(other.derived());
+ }
+
+ /** Returns the expression where each subvector is the quotient of the corresponding
+ * subvector of \c *this by the vector \a other */
+ template<typename OtherDerived>
+ CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
+ const ExpressionTypeNestedCleaned,
+ const typename ExtendedType<OtherDerived>::Type>
+ operator/(const DenseBase<OtherDerived>& other) const
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ return m_matrix / extendedTo(other.derived());
+ }
+
+ /** \returns an expression where each column of row of the referenced matrix are normalized.
+ * The referenced matrix is \b not modified.
+ * \sa MatrixBase::normalized(), normalize()
+ */
+ CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
+ const ExpressionTypeNestedCleaned,
+ const typename OppositeExtendedType<typename ReturnType<internal::member_norm,RealScalar>::Type>::Type>
+ normalized() const { return m_matrix.cwiseQuotient(extendedToOpposite(this->norm())); }
+
+
+ /** Normalize in-place each row or columns of the referenced matrix.
+ * \sa MatrixBase::normalize(), normalized()
+ */
+ void normalize() {
+ m_matrix = this->normalized();
+ }
+
+/////////// Geometry module ///////////
+
+ #if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
+ Homogeneous<ExpressionType,Direction> homogeneous() const;
+ #endif
+
+ typedef typename ExpressionType::PlainObject CrossReturnType;
+ template<typename OtherDerived>
+ const CrossReturnType cross(const MatrixBase<OtherDerived>& other) const;
+
+ enum {
+ HNormalized_Size = Direction==Vertical ? internal::traits<ExpressionType>::RowsAtCompileTime
+ : internal::traits<ExpressionType>::ColsAtCompileTime,
+ HNormalized_SizeMinusOne = HNormalized_Size==Dynamic ? Dynamic : HNormalized_Size-1
+ };
+ typedef Block<const ExpressionType,
+ Direction==Vertical ? int(HNormalized_SizeMinusOne)
+ : int(internal::traits<ExpressionType>::RowsAtCompileTime),
+ Direction==Horizontal ? int(HNormalized_SizeMinusOne)
+ : int(internal::traits<ExpressionType>::ColsAtCompileTime)>
+ HNormalized_Block;
+ typedef Block<const ExpressionType,
+ Direction==Vertical ? 1 : int(internal::traits<ExpressionType>::RowsAtCompileTime),
+ Direction==Horizontal ? 1 : int(internal::traits<ExpressionType>::ColsAtCompileTime)>
+ HNormalized_Factors;
+ typedef CwiseBinaryOp<internal::scalar_quotient_op<typename internal::traits<ExpressionType>::Scalar>,
+ const HNormalized_Block,
+ const Replicate<HNormalized_Factors,
+ Direction==Vertical ? HNormalized_SizeMinusOne : 1,
+ Direction==Horizontal ? HNormalized_SizeMinusOne : 1> >
+ HNormalizedReturnType;
+
+ const HNormalizedReturnType hnormalized() const;
+
+ protected:
+ ExpressionTypeNested m_matrix;
+};
+
+/** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations
+ *
+ * Example: \include MatrixBase_colwise.cpp
+ * Output: \verbinclude MatrixBase_colwise.out
+ *
+ * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
+ */
+template<typename Derived>
+inline const typename DenseBase<Derived>::ConstColwiseReturnType
+DenseBase<Derived>::colwise() const
+{
+ return derived();
+}
+
+/** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
+ *
+ * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
+ */
+template<typename Derived>
+inline typename DenseBase<Derived>::ColwiseReturnType
+DenseBase<Derived>::colwise()
+{
+ return derived();
+}
+
+/** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations
+ *
+ * Example: \include MatrixBase_rowwise.cpp
+ * Output: \verbinclude MatrixBase_rowwise.out
+ *
+ * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
+ */
+template<typename Derived>
+inline const typename DenseBase<Derived>::ConstRowwiseReturnType
+DenseBase<Derived>::rowwise() const
+{
+ return derived();
+}
+
+/** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
+ *
+ * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
+ */
+template<typename Derived>
+inline typename DenseBase<Derived>::RowwiseReturnType
+DenseBase<Derived>::rowwise()
+{
+ return derived();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_PARTIAL_REDUX_H
diff --git a/third_party/eigen3/Eigen/src/Core/Visitor.h b/third_party/eigen3/Eigen/src/Core/Visitor.h
new file mode 100644
index 0000000000..64867b7a2c
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/Visitor.h
@@ -0,0 +1,237 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_VISITOR_H
+#define EIGEN_VISITOR_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<typename Visitor, typename Derived, int UnrollCount>
+struct visitor_impl
+{
+ enum {
+ col = (UnrollCount-1) / Derived::RowsAtCompileTime,
+ row = (UnrollCount-1) % Derived::RowsAtCompileTime
+ };
+
+ static inline void run(const Derived &mat, Visitor& visitor)
+ {
+ visitor_impl<Visitor, Derived, UnrollCount-1>::run(mat, visitor);
+ visitor(mat.coeff(row, col), row, col);
+ }
+};
+
+template<typename Visitor, typename Derived>
+struct visitor_impl<Visitor, Derived, 1>
+{
+ static inline void run(const Derived &mat, Visitor& visitor)
+ {
+ return visitor.init(mat.coeff(0, 0), 0, 0);
+ }
+};
+
+template<typename Visitor, typename Derived>
+struct visitor_impl<Visitor, Derived, Dynamic>
+{
+ typedef typename Derived::Index Index;
+ static inline void run(const Derived& mat, Visitor& visitor)
+ {
+ visitor.init(mat.coeff(0,0), 0, 0);
+ for(Index i = 1; i < mat.rows(); ++i)
+ visitor(mat.coeff(i, 0), i, 0);
+ for(Index j = 1; j < mat.cols(); ++j)
+ for(Index i = 0; i < mat.rows(); ++i)
+ visitor(mat.coeff(i, j), i, j);
+ }
+};
+
+} // end namespace internal
+
+/** Applies the visitor \a visitor to the whole coefficients of the matrix or vector.
+ *
+ * The template parameter \a Visitor is the type of the visitor and provides the following interface:
+ * \code
+ * struct MyVisitor {
+ * // called for the first coefficient
+ * void init(const Scalar& value, Index i, Index j);
+ * // called for all other coefficients
+ * void operator() (const Scalar& value, Index i, Index j);
+ * };
+ * \endcode
+ *
+ * \note compared to one or two \em for \em loops, visitors offer automatic
+ * unrolling for small fixed size matrix.
+ *
+ * \sa minCoeff(Index*,Index*), maxCoeff(Index*,Index*), DenseBase::redux()
+ */
+template<typename Derived>
+template<typename Visitor>
+void DenseBase<Derived>::visit(Visitor& visitor) const
+{
+ enum { unroll = SizeAtCompileTime != Dynamic
+ && CoeffReadCost != Dynamic
+ && (SizeAtCompileTime == 1 || internal::functor_traits<Visitor>::Cost != Dynamic)
+ && SizeAtCompileTime * CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits<Visitor>::Cost
+ <= EIGEN_UNROLLING_LIMIT };
+ return internal::visitor_impl<Visitor, Derived,
+ unroll ? int(SizeAtCompileTime) : Dynamic
+ >::run(derived(), visitor);
+}
+
+namespace internal {
+
+/** \internal
+ * \brief Base class to implement min and max visitors
+ */
+template <typename Derived>
+struct coeff_visitor
+{
+ typedef typename Derived::Index Index;
+ typedef typename Derived::Scalar Scalar;
+ Index row, col;
+ Scalar res;
+ inline void init(const Scalar& value, Index i, Index j)
+ {
+ res = value;
+ row = i;
+ col = j;
+ }
+};
+
+/** \internal
+ * \brief Visitor computing the min coefficient with its value and coordinates
+ *
+ * \sa DenseBase::minCoeff(Index*, Index*)
+ */
+template <typename Derived>
+struct min_coeff_visitor : coeff_visitor<Derived>
+{
+ typedef typename Derived::Index Index;
+ typedef typename Derived::Scalar Scalar;
+ void operator() (const Scalar& value, Index i, Index j)
+ {
+ if(value < this->res)
+ {
+ this->res = value;
+ this->row = i;
+ this->col = j;
+ }
+ }
+};
+
+template<typename Scalar>
+struct functor_traits<min_coeff_visitor<Scalar> > {
+ enum {
+ Cost = NumTraits<Scalar>::AddCost
+ };
+};
+
+/** \internal
+ * \brief Visitor computing the max coefficient with its value and coordinates
+ *
+ * \sa DenseBase::maxCoeff(Index*, Index*)
+ */
+template <typename Derived>
+struct max_coeff_visitor : coeff_visitor<Derived>
+{
+ typedef typename Derived::Index Index;
+ typedef typename Derived::Scalar Scalar;
+ void operator() (const Scalar& value, Index i, Index j)
+ {
+ if(value > this->res)
+ {
+ this->res = value;
+ this->row = i;
+ this->col = j;
+ }
+ }
+};
+
+template<typename Scalar>
+struct functor_traits<max_coeff_visitor<Scalar> > {
+ enum {
+ Cost = NumTraits<Scalar>::AddCost
+ };
+};
+
+} // end namespace internal
+
+/** \returns the minimum of all coefficients of *this and puts in *row and *col its location.
+ * \warning the result is undefined if \c *this contains NaN.
+ *
+ * \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visitor(), DenseBase::minCoeff()
+ */
+template<typename Derived>
+template<typename IndexType>
+typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
+{
+ internal::min_coeff_visitor<Derived> minVisitor;
+ this->visit(minVisitor);
+ *rowId = minVisitor.row;
+ if (colId) *colId = minVisitor.col;
+ return minVisitor.res;
+}
+
+/** \returns the minimum of all coefficients of *this and puts in *index its location.
+ * \warning the result is undefined if \c *this contains NaN.
+ *
+ * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::minCoeff()
+ */
+template<typename Derived>
+template<typename IndexType>
+typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::minCoeff(IndexType* index) const
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ internal::min_coeff_visitor<Derived> minVisitor;
+ this->visit(minVisitor);
+ *index = (RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row;
+ return minVisitor.res;
+}
+
+/** \returns the maximum of all coefficients of *this and puts in *row and *col its location.
+ * \warning the result is undefined if \c *this contains NaN.
+ *
+ * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff()
+ */
+template<typename Derived>
+template<typename IndexType>
+typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::maxCoeff(IndexType* rowPtr, IndexType* colPtr) const
+{
+ internal::max_coeff_visitor<Derived> maxVisitor;
+ this->visit(maxVisitor);
+ *rowPtr = maxVisitor.row;
+ if (colPtr) *colPtr = maxVisitor.col;
+ return maxVisitor.res;
+}
+
+/** \returns the maximum of all coefficients of *this and puts in *index its location.
+ * \warning the result is undefined if \c *this contains NaN.
+ *
+ * \sa DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff()
+ */
+template<typename Derived>
+template<typename IndexType>
+typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::maxCoeff(IndexType* index) const
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ internal::max_coeff_visitor<Derived> maxVisitor;
+ this->visit(maxVisitor);
+ *index = (RowsAtCompileTime==1) ? maxVisitor.col : maxVisitor.row;
+ return maxVisitor.res;
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_VISITOR_H
diff --git a/third_party/eigen3/Eigen/src/Core/arch/AVX/Complex.h b/third_party/eigen3/Eigen/src/Core/arch/AVX/Complex.h
new file mode 100644
index 0000000000..e98c40e1f1
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/arch/AVX/Complex.h
@@ -0,0 +1,463 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner (benoit.steiner.goog@gmail.com)
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COMPLEX_AVX_H
+#define EIGEN_COMPLEX_AVX_H
+
+namespace Eigen {
+
+namespace internal {
+
+//---------- float ----------
+struct Packet4cf
+{
+ EIGEN_STRONG_INLINE Packet4cf() {}
+ EIGEN_STRONG_INLINE explicit Packet4cf(const __m256& a) : v(a) {}
+ __m256 v;
+};
+
+template<> struct packet_traits<std::complex<float> > : default_packet_traits
+{
+ typedef Packet4cf type;
+ typedef Packet2cf half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 4,
+ HasHalfPacket = 1,
+
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+
+template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4}; typedef Packet2cf half; };
+
+template<> EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_add_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_sub_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cf pnegate(const Packet4cf& a)
+{
+ return Packet4cf(pnegate(a.v));
+}
+template<> EIGEN_STRONG_INLINE Packet4cf pconj(const Packet4cf& a)
+{
+ const __m256 mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000));
+ return Packet4cf(_mm256_xor_ps(a.v,mask));
+}
+
+template<> EIGEN_STRONG_INLINE Packet4cf pmul<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
+{
+ __m256 tmp1 = _mm256_mul_ps(_mm256_moveldup_ps(a.v), b.v);
+ __m256 tmp2 = _mm256_mul_ps(_mm256_movehdup_ps(a.v), _mm256_permute_ps(b.v, _MM_SHUFFLE(2,3,0,1)));
+ __m256 result = _mm256_addsub_ps(tmp1, tmp2);
+ return Packet4cf(result);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4cf pand <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_and_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cf por <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_or_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cf pxor <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_xor_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cf pandnot<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_andnot_ps(a.v,b.v)); }
+
+template<> EIGEN_STRONG_INLINE Packet4cf pload <Packet4cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet4cf(pload<Packet8f>(&numext::real_ref(*from))); }
+template<> EIGEN_STRONG_INLINE Packet4cf ploadu<Packet4cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet4cf(ploadu<Packet8f>(&numext::real_ref(*from))); }
+
+
+template<> EIGEN_STRONG_INLINE Packet4cf pset1<Packet4cf>(const std::complex<float>& from)
+{
+ return Packet4cf(_mm256_castpd_ps(_mm256_broadcast_sd((const double*)(const void*)&from)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex<float>* from)
+{
+ // FIXME The following might be optimized using _mm256_movedup_pd
+ Packet2cf a = ploaddup<Packet2cf>(from);
+ Packet2cf b = ploaddup<Packet2cf>(from+1);
+ return Packet4cf(_mm256_insertf128_ps(_mm256_castps128_ps256(a.v), b.v, 1));
+}
+
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); }
+
+template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packet4cf>(const std::complex<float>* from, int stride)
+{
+ return Packet4cf(_mm256_set_ps(std::imag(from[3*stride]), std::real(from[3*stride]),
+ std::imag(from[2*stride]), std::real(from[2*stride]),
+ std::imag(from[1*stride]), std::real(from[1*stride]),
+ std::imag(from[0*stride]), std::real(from[0*stride])));
+}
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet4cf>(std::complex<float>* to, const Packet4cf& from, int stride)
+{
+ __m128 low = _mm256_extractf128_ps(from.v, 0);
+ to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 0)),
+ _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1)));
+ to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 2)),
+ _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3)));
+
+ __m128 high = _mm256_extractf128_ps(from.v, 1);
+ to[stride*2] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 0)),
+ _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1)));
+ to[stride*3] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 2)),
+ _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3)));
+
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet4cf>(const Packet4cf& a)
+{
+ return pfirst(Packet2cf(_mm256_castps256_ps128(a.v)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet4cf preverse(const Packet4cf& a) {
+ __m128 low = _mm256_extractf128_ps(a.v, 0);
+ __m128 high = _mm256_extractf128_ps(a.v, 1);
+ __m128d lowd = _mm_castps_pd(low);
+ __m128d highd = _mm_castps_pd(high);
+ low = _mm_castpd_ps(_mm_shuffle_pd(lowd,lowd,0x1));
+ high = _mm_castpd_ps(_mm_shuffle_pd(highd,highd,0x1));
+ __m256 result = _mm256_setzero_ps();
+ result = _mm256_insertf128_ps(result, low, 1);
+ result = _mm256_insertf128_ps(result, high, 0);
+ return Packet4cf(result);
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet4cf>(const Packet4cf& a)
+{
+ return predux(padd(Packet2cf(_mm256_extractf128_ps(a.v,0)),
+ Packet2cf(_mm256_extractf128_ps(a.v,1))));
+}
+
+template<> EIGEN_STRONG_INLINE Packet4cf preduxp<Packet4cf>(const Packet4cf* vecs)
+{
+ Packet8f t0 = _mm256_shuffle_ps(vecs[0].v, vecs[0].v, _MM_SHUFFLE(3, 1, 2 ,0));
+ Packet8f t1 = _mm256_shuffle_ps(vecs[1].v, vecs[1].v, _MM_SHUFFLE(3, 1, 2 ,0));
+ t0 = _mm256_hadd_ps(t0,t1);
+ Packet8f t2 = _mm256_shuffle_ps(vecs[2].v, vecs[2].v, _MM_SHUFFLE(3, 1, 2 ,0));
+ Packet8f t3 = _mm256_shuffle_ps(vecs[3].v, vecs[3].v, _MM_SHUFFLE(3, 1, 2 ,0));
+ t2 = _mm256_hadd_ps(t2,t3);
+
+ t1 = _mm256_permute2f128_ps(t0,t2, 0 + (2<<4));
+ t3 = _mm256_permute2f128_ps(t0,t2, 1 + (3<<4));
+
+ return Packet4cf(_mm256_add_ps(t1,t3));
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const Packet4cf& a)
+{
+ return predux_mul(pmul(Packet2cf(_mm256_extractf128_ps(a.v, 0)),
+ Packet2cf(_mm256_extractf128_ps(a.v, 1))));
+}
+
+template<int Offset>
+struct palign_impl<Offset,Packet4cf>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4cf& first, const Packet4cf& second)
+ {
+ if (Offset==0) return;
+ palign_impl<Offset*2,Packet8f>::run(first.v, second.v);
+ }
+};
+
+template<> struct conj_helper<Packet4cf, Packet4cf, false,true>
+{
+ EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+
+template<> struct conj_helper<Packet4cf, Packet4cf, true,false>
+{
+ EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+
+template<> struct conj_helper<Packet4cf, Packet4cf, true,true>
+{
+ EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+
+template<> struct conj_helper<Packet8f, Packet4cf, false,false>
+{
+ EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet8f& x, const Packet4cf& y, const Packet4cf& c) const
+ { return padd(c, pmul(x,y)); }
+
+ EIGEN_STRONG_INLINE Packet4cf pmul(const Packet8f& x, const Packet4cf& y) const
+ { return Packet4cf(Eigen::internal::pmul(x, y.v)); }
+};
+
+template<> struct conj_helper<Packet4cf, Packet8f, false,false>
+{
+ EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet8f& y, const Packet4cf& c) const
+ { return padd(c, pmul(x,y)); }
+
+ EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& x, const Packet8f& y) const
+ { return Packet4cf(Eigen::internal::pmul(x.v, y)); }
+};
+
+template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
+{
+ Packet4cf num = pmul(a, pconj(b));
+ __m256 tmp = _mm256_mul_ps(b.v, b.v);
+ __m256 tmp2 = _mm256_shuffle_ps(tmp,tmp,0xB1);
+ __m256 denom = _mm256_add_ps(tmp, tmp2);
+ return Packet4cf(_mm256_div_ps(num.v, denom));
+}
+
+template<> EIGEN_STRONG_INLINE Packet4cf pcplxflip<Packet4cf>(const Packet4cf& x)
+{
+ return Packet4cf(_mm256_shuffle_ps(x.v, x.v, _MM_SHUFFLE(2, 3, 0 ,1)));
+}
+
+//---------- double ----------
+struct Packet2cd
+{
+ EIGEN_STRONG_INLINE Packet2cd() {}
+ EIGEN_STRONG_INLINE explicit Packet2cd(const __m256d& a) : v(a) {}
+ __m256d v;
+};
+
+template<> struct packet_traits<std::complex<double> > : default_packet_traits
+{
+ typedef Packet2cd type;
+ typedef Packet1cd half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 0,
+ size = 2,
+ HasHalfPacket = 1,
+
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+
+template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2}; typedef Packet1cd half; };
+
+template<> EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_add_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_sub_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd pnegate(const Packet2cd& a) { return Packet2cd(pnegate(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd pconj(const Packet2cd& a)
+{
+ const __m256d mask = _mm256_castsi256_pd(_mm256_set_epi32(0x80000000,0x0,0x0,0x0,0x80000000,0x0,0x0,0x0));
+ return Packet2cd(_mm256_xor_pd(a.v,mask));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cd pmul<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
+{
+ __m256d tmp1 = _mm256_shuffle_pd(a.v,a.v,0x0);
+ __m256d even = _mm256_mul_pd(tmp1, b.v);
+ __m256d tmp2 = _mm256_shuffle_pd(a.v,a.v,0xF);
+ __m256d tmp3 = _mm256_shuffle_pd(b.v,b.v,0x5);
+ __m256d odd = _mm256_mul_pd(tmp2, tmp3);
+ return Packet2cd(_mm256_addsub_pd(even, odd));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cd pand <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_and_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd por <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_or_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd pxor <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_xor_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd pandnot<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_andnot_pd(a.v,b.v)); }
+
+template<> EIGEN_STRONG_INLINE Packet2cd pload <Packet2cd>(const std::complex<double>* from)
+{ EIGEN_DEBUG_ALIGNED_LOAD return Packet2cd(pload<Packet4d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet2cd ploadu<Packet2cd>(const std::complex<double>* from)
+{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cd(ploadu<Packet4d>((const double*)from)); }
+
+template<> EIGEN_STRONG_INLINE Packet2cd pset1<Packet2cd>(const std::complex<double>& from)
+{
+ // in case casting to a __m128d* is really not safe, then we can still fallback to this version: (much slower though)
+// return Packet2cd(_mm256_loadu2_m128d((const double*)&from,(const double*)&from));
+ return Packet2cd(_mm256_broadcast_pd((const __m128d*)(const void*)&from));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cd ploaddup<Packet2cd>(const std::complex<double>* from) { return pset1<Packet2cd>(*from); }
+
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
+
+template<> EIGEN_DEVICE_FUNC inline Packet2cd pgather<std::complex<double>, Packet2cd>(const std::complex<double>* from, int stride)
+{
+ return Packet2cd(_mm256_set_pd(std::imag(from[1*stride]), std::real(from[1*stride]),
+ std::imag(from[0*stride]), std::real(from[0*stride])));
+}
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet2cd>(std::complex<double>* to, const Packet2cd& from, int stride)
+{
+ __m128d low = _mm256_extractf128_pd(from.v, 0);
+ to[stride*0] = std::complex<double>(_mm_cvtsd_f64(low), _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1)));
+ __m128d high = _mm256_extractf128_pd(from.v, 1);
+ to[stride*1] = std::complex<double>(_mm_cvtsd_f64(high), _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1)));
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet2cd>(const Packet2cd& a)
+{
+ __m128d low = _mm256_extractf128_pd(a.v, 0);
+ EIGEN_ALIGN16 double res[2];
+ _mm_store_pd(res, low);
+ return std::complex<double>(res[0],res[1]);
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cd preverse(const Packet2cd& a) {
+ __m256d result = _mm256_permute2f128_pd(a.v, a.v, 1);
+ return Packet2cd(result);
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet2cd>(const Packet2cd& a)
+{
+ return predux(padd(Packet1cd(_mm256_extractf128_pd(a.v,0)),
+ Packet1cd(_mm256_extractf128_pd(a.v,1))));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cd preduxp<Packet2cd>(const Packet2cd* vecs)
+{
+ Packet4d t0 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 0 + (2<<4));
+ Packet4d t1 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 1 + (3<<4));
+
+ return Packet2cd(_mm256_add_pd(t0,t1));
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet2cd>(const Packet2cd& a)
+{
+ return predux(pmul(Packet1cd(_mm256_extractf128_pd(a.v,0)),
+ Packet1cd(_mm256_extractf128_pd(a.v,1))));
+}
+
+template<int Offset>
+struct palign_impl<Offset,Packet2cd>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2cd& first, const Packet2cd& second)
+ {
+ if (Offset==0) return;
+ palign_impl<Offset*2,Packet4d>::run(first.v, second.v);
+ }
+};
+
+template<> struct conj_helper<Packet2cd, Packet2cd, false,true>
+{
+ EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+
+template<> struct conj_helper<Packet2cd, Packet2cd, true,false>
+{
+ EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+
+template<> struct conj_helper<Packet2cd, Packet2cd, true,true>
+{
+ EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+
+template<> struct conj_helper<Packet4d, Packet2cd, false,false>
+{
+ EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet4d& x, const Packet2cd& y, const Packet2cd& c) const
+ { return padd(c, pmul(x,y)); }
+
+ EIGEN_STRONG_INLINE Packet2cd pmul(const Packet4d& x, const Packet2cd& y) const
+ { return Packet2cd(Eigen::internal::pmul(x, y.v)); }
+};
+
+template<> struct conj_helper<Packet2cd, Packet4d, false,false>
+{
+ EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet4d& y, const Packet2cd& c) const
+ { return padd(c, pmul(x,y)); }
+
+ EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& x, const Packet4d& y) const
+ { return Packet2cd(Eigen::internal::pmul(x.v, y)); }
+};
+
+template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
+{
+ Packet2cd num = pmul(a, pconj(b));
+ __m256d tmp = _mm256_mul_pd(b.v, b.v);
+ __m256d denom = _mm256_hadd_pd(tmp, tmp);
+ return Packet2cd(_mm256_div_pd(num.v, denom));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(const Packet2cd& x)
+{
+ return Packet2cd(_mm256_shuffle_pd(x.v, x.v, 0x5));
+}
+
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4cf,4>& kernel) {
+ __m256d P0 = _mm256_castps_pd(kernel.packet[0].v);
+ __m256d P1 = _mm256_castps_pd(kernel.packet[1].v);
+ __m256d P2 = _mm256_castps_pd(kernel.packet[2].v);
+ __m256d P3 = _mm256_castps_pd(kernel.packet[3].v);
+
+ __m256d T0 = _mm256_shuffle_pd(P0, P1, 15);
+ __m256d T1 = _mm256_shuffle_pd(P0, P1, 0);
+ __m256d T2 = _mm256_shuffle_pd(P2, P3, 15);
+ __m256d T3 = _mm256_shuffle_pd(P2, P3, 0);
+
+ kernel.packet[1].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 32));
+ kernel.packet[3].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 49));
+ kernel.packet[0].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 32));
+ kernel.packet[2].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 49));
+}
+
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2cd,2>& kernel) {
+ __m256d tmp = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 0+(2<<4));
+ kernel.packet[1].v = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 1+(3<<4));
+ kernel.packet[0].v = tmp;
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_COMPLEX_AVX_H
diff --git a/third_party/eigen3/Eigen/src/Core/arch/AVX/MathFunctions.h b/third_party/eigen3/Eigen/src/Core/arch/AVX/MathFunctions.h
new file mode 100644
index 0000000000..faa5c79021
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/arch/AVX/MathFunctions.h
@@ -0,0 +1,495 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATH_FUNCTIONS_AVX_H
+#define EIGEN_MATH_FUNCTIONS_AVX_H
+
+// For some reason, this function didn't make it into the avxintirn.h
+// used by the compiler, so we'll just wrap it.
+#define _mm256_setr_m128(lo, hi) \
+ _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1)
+
+/* The sin, cos, exp, and log functions of this file are loosely derived from
+ * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
+ */
+
+namespace Eigen {
+
+namespace internal {
+
+// Sine function
+// Computes sin(x) by wrapping x to the interval [-Pi/4,3*Pi/4] and
+// evaluating interpolants in [-Pi/4,Pi/4] or [Pi/4,3*Pi/4]. The interpolants
+// are (anti-)symmetric and thus have only odd/even coefficients
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+psin<Packet8f>(const Packet8f& _x) {
+ Packet8f x = _x;
+
+ // Some useful values.
+ _EIGEN_DECLARE_CONST_Packet8i(one, 1);
+ _EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
+ _EIGEN_DECLARE_CONST_Packet8f(two, 2.0f);
+ _EIGEN_DECLARE_CONST_Packet8f(one_over_four, 0.25f);
+ _EIGEN_DECLARE_CONST_Packet8f(one_over_pi, 3.183098861837907e-01f);
+ _EIGEN_DECLARE_CONST_Packet8f(neg_pi_first, -3.140625000000000e+00);
+ _EIGEN_DECLARE_CONST_Packet8f(neg_pi_second, -9.670257568359375e-04);
+ _EIGEN_DECLARE_CONST_Packet8f(neg_pi_third, -6.278329571784980e-07);
+ _EIGEN_DECLARE_CONST_Packet8f(four_over_pi, 1.273239544735163e+00);
+
+ // Map x from [-Pi/4,3*Pi/4] to z in [-1,3] and subtract the shifted period.
+ Packet8f z = pmul(x, p8f_one_over_pi);
+ Packet8f shift = _mm256_floor_ps(padd(z, p8f_one_over_four));
+ x = pmadd(shift, p8f_neg_pi_first, x);
+ x = pmadd(shift, p8f_neg_pi_second, x);
+ x = pmadd(shift, p8f_neg_pi_third, x);
+ z = pmul(x, p8f_four_over_pi);
+
+ // Make a mask for the entries that need flipping, i.e. wherever the shift
+ // is odd.
+ Packet8i shift_ints = _mm256_cvtps_epi32(shift);
+ Packet8i shift_isodd =
+ (__m256i)_mm256_and_ps((__m256)shift_ints, (__m256)p8i_one);
+#ifdef EIGEN_VECTORIZE_AVX2
+ Packet8i sign_flip_mask = _mm256_slli_epi32(shift_isodd, 31);
+#else
+ __m128i lo =
+ _mm_slli_epi32(_mm256_extractf128_si256((__m256i)shift_isodd, 0), 31);
+ __m128i hi =
+ _mm_slli_epi32(_mm256_extractf128_si256((__m256i)shift_isodd, 1), 31);
+ Packet8i sign_flip_mask = _mm256_setr_m128(lo, hi);
+#endif
+
+ // Create a mask for which interpolant to use, i.e. if z > 1, then the mask
+ // is set to ones for that entry.
+ Packet8f ival_mask = _mm256_cmp_ps(z, p8f_one, _CMP_GT_OQ);
+
+ // Evaluate the polynomial for the interval [1,3] in z.
+ _EIGEN_DECLARE_CONST_Packet8f(coeff_right_0, 9.999999724233232e-01f);
+ _EIGEN_DECLARE_CONST_Packet8f(coeff_right_2, -3.084242535619928e-01);
+ _EIGEN_DECLARE_CONST_Packet8f(coeff_right_4, 1.584991525700324e-02);
+ _EIGEN_DECLARE_CONST_Packet8f(coeff_right_6, -3.188805084631342e-04);
+ Packet8f z_minus_two = psub(z, p8f_two);
+ Packet8f z_minus_two2 = pmul(z_minus_two, z_minus_two);
+ Packet8f right = pmadd(p8f_coeff_right_6, z_minus_two2, p8f_coeff_right_4);
+ right = pmadd(right, z_minus_two2, p8f_coeff_right_2);
+ right = pmadd(right, z_minus_two2, p8f_coeff_right_0);
+
+ // Evaluate the polynomial for the interval [-1,1] in z.
+ _EIGEN_DECLARE_CONST_Packet8f(coeff_left_1, 7.853981525427295e-01);
+ _EIGEN_DECLARE_CONST_Packet8f(coeff_left_3, -8.074536727092352e-02);
+ _EIGEN_DECLARE_CONST_Packet8f(coeff_left_5, 2.489871967827018e-03);
+ _EIGEN_DECLARE_CONST_Packet8f(coeff_left_7, -3.587725841214251e-05);
+ Packet8f z2 = pmul(z, z);
+ Packet8f left = pmadd(p8f_coeff_left_7, z2, p8f_coeff_left_5);
+ left = pmadd(left, z2, p8f_coeff_left_3);
+ left = pmadd(left, z2, p8f_coeff_left_1);
+ left = pmul(left, z);
+
+ // Assemble the results, i.e. select the left and right polynomials.
+ left = _mm256_andnot_ps(ival_mask, left);
+ right = _mm256_and_ps(ival_mask, right);
+ Packet8f res = _mm256_or_ps(left, right);
+
+ // Flip the sign on the odd intervals and return the result.
+ res = _mm256_xor_ps(res, (__m256)sign_flip_mask);
+ return res;
+}
+
+// Natural logarithm
+// Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2)
+// and m is in the range [sqrt(1/2),sqrt(2)). In this range, the logarithm can
+// be easily approximated by a polynomial centered on m=1 for stability.
+// TODO(gonnet): Further reduce the interval allowing for lower-degree
+// polynomial interpolants -> ... -> profit!
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+plog<Packet8f>(const Packet8f& _x) {
+ Packet8f x = _x;
+ _EIGEN_DECLARE_CONST_Packet8f(1, 1.0f);
+ _EIGEN_DECLARE_CONST_Packet8f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet8f(126f, 126.0f);
+
+ _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inv_mant_mask, ~0x7f800000);
+
+ // The smallest non denormalized float number.
+ _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(min_norm_pos, 0x00800000);
+ _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(minus_inf, 0xff800000);
+
+ // Polynomial coefficients.
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_SQRTHF, 0.707106781186547524f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p0, 7.0376836292E-2f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p1, -1.1514610310E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p2, 1.1676998740E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p3, -1.2420140846E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p4, +1.4249322787E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p5, -1.6668057665E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p6, +2.0000714765E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p7, -2.4999993993E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p8, +3.3333331174E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_q1, -2.12194440e-4f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_q2, 0.693359375f);
+
+ // invalid_mask is set to true when x is NaN
+ Packet8f invalid_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_NGE_UQ);
+ Packet8f iszero_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_EQ_OQ);
+
+ // Truncate input values to the minimum positive normal.
+ x = pmax(x, p8f_min_norm_pos);
+
+// Extract the shifted exponents (No bitwise shifting in regular AVX, so
+// convert to SSE and do it there).
+#ifdef EIGEN_VECTORIZE_AVX2
+ Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_srli_epi32((__m256i)x, 23));
+#else
+ __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256((__m256i)x, 0), 23);
+ __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256((__m256i)x, 1), 23);
+ Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_setr_m128(lo, hi));
+#endif
+ Packet8f e = _mm256_sub_ps(emm0, p8f_126f);
+
+ // Set the exponents to -1, i.e. x are in the range [0.5,1).
+ x = _mm256_and_ps(x, p8f_inv_mant_mask);
+ x = _mm256_or_ps(x, p8f_half);
+
+ // part2: Shift the inputs from the range [0.5,1) to [sqrt(1/2),sqrt(2))
+ // and shift by -1. The values are then centered around 0, which improves
+ // the stability of the polynomial evaluation.
+ // if( x < SQRTHF ) {
+ // e -= 1;
+ // x = x + x - 1.0;
+ // } else { x = x - 1.0; }
+ Packet8f mask = _mm256_cmp_ps(x, p8f_cephes_SQRTHF, _CMP_LT_OQ);
+ Packet8f tmp = _mm256_and_ps(x, mask);
+ x = psub(x, p8f_1);
+ e = psub(e, _mm256_and_ps(p8f_1, mask));
+ x = padd(x, tmp);
+
+ Packet8f x2 = pmul(x, x);
+ Packet8f x3 = pmul(x2, x);
+
+ // Evaluate the polynomial approximant of degree 8 in three parts, probably
+ // to improve instruction-level parallelism.
+ Packet8f y, y1, y2;
+ y = pmadd(p8f_cephes_log_p0, x, p8f_cephes_log_p1);
+ y1 = pmadd(p8f_cephes_log_p3, x, p8f_cephes_log_p4);
+ y2 = pmadd(p8f_cephes_log_p6, x, p8f_cephes_log_p7);
+ y = pmadd(y, x, p8f_cephes_log_p2);
+ y1 = pmadd(y1, x, p8f_cephes_log_p5);
+ y2 = pmadd(y2, x, p8f_cephes_log_p8);
+ y = pmadd(y, x3, y1);
+ y = pmadd(y, x3, y2);
+ y = pmul(y, x3);
+
+ // Add the logarithm of the exponent back to the result of the interpolation.
+ y1 = pmul(e, p8f_cephes_log_q1);
+ tmp = pmul(x2, p8f_half);
+ y = padd(y, y1);
+ x = psub(x, tmp);
+ y2 = pmul(e, p8f_cephes_log_q2);
+ x = padd(x, y);
+ x = padd(x, y2);
+
+ // Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
+ return _mm256_or_ps(
+ _mm256_andnot_ps(iszero_mask, _mm256_or_ps(x, invalid_mask)),
+ _mm256_and_ps(iszero_mask, p8f_minus_inf));
+}
+
+// Exponential function. Works by writing "x = m*log(2) + r" where
+// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
+// "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+pexp<Packet8f>(const Packet8f& _x) {
+ _EIGEN_DECLARE_CONST_Packet8f(1, 1.0f);
+ _EIGEN_DECLARE_CONST_Packet8f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet8f(127, 127.0f);
+
+ _EIGEN_DECLARE_CONST_Packet8f(exp_hi, 88.3762626647950f);
+ _EIGEN_DECLARE_CONST_Packet8f(exp_lo, -88.3762626647949f);
+
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_LOG2EF, 1.44269504088896341f);
+
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p0, 1.9875691500E-4f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p1, 1.3981999507E-3f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p2, 8.3334519073E-3f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p3, 4.1665795894E-2f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p4, 1.6666665459E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p5, 5.0000001201E-1f);
+
+ // Clamp x.
+ Packet8f x = pmax(pmin(_x, p8f_exp_hi), p8f_exp_lo);
+
+ // Express exp(x) as exp(m*ln(2) + r), start by extracting
+ // m = floor(x/ln(2) + 0.5).
+ Packet8f m = _mm256_floor_ps(pmadd(x, p8f_cephes_LOG2EF, p8f_half));
+
+// Get r = x - m*ln(2). If no FMA instructions are available, m*ln(2) is
+// subtracted out in two parts, m*C1+m*C2 = m*ln(2), to avoid accumulating
+// truncation errors. Note that we don't use the "pmadd" function here to
+// ensure that a precision-preserving FMA instruction is used.
+#ifdef EIGEN_VECTORIZE_FMA
+ _EIGEN_DECLARE_CONST_Packet8f(nln2, -0.6931471805599453f);
+ Packet8f r = _mm256_fmadd_ps(m, p8f_nln2, x);
+#else
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_C1, 0.693359375f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_C2, -2.12194440e-4f);
+ Packet8f r = psub(x, pmul(m, p8f_cephes_exp_C1));
+ r = psub(r, pmul(m, p8f_cephes_exp_C2));
+#endif
+
+ Packet8f r2 = pmul(r, r);
+
+ // TODO(gonnet): Split into odd/even polynomials and try to exploit
+ // instruction-level parallelism.
+ Packet8f y = p8f_cephes_exp_p0;
+ y = pmadd(y, r, p8f_cephes_exp_p1);
+ y = pmadd(y, r, p8f_cephes_exp_p2);
+ y = pmadd(y, r, p8f_cephes_exp_p3);
+ y = pmadd(y, r, p8f_cephes_exp_p4);
+ y = pmadd(y, r, p8f_cephes_exp_p5);
+ y = pmadd(y, r2, r);
+ y = padd(y, p8f_1);
+
+ // Build emm0 = 2^m.
+ Packet8i emm0 = _mm256_cvttps_epi32(padd(m, p8f_127));
+#ifdef EIGEN_VECTORIZE_AVX2
+ emm0 = _mm256_slli_epi32(emm0, 23);
+#else
+ __m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(emm0, 0), 23);
+ __m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(emm0, 1), 23);
+ emm0 = _mm256_setr_m128(lo, hi);
+#endif
+
+ // Return 2^m * exp(r).
+ return pmax(pmul(y, _mm256_castsi256_ps(emm0)), _x);
+}
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
+pexp<Packet4d>(const Packet4d& _x) {
+ Packet4d x = _x;
+
+ _EIGEN_DECLARE_CONST_Packet4d(1, 1.0);
+ _EIGEN_DECLARE_CONST_Packet4d(2, 2.0);
+ _EIGEN_DECLARE_CONST_Packet4d(half, 0.5);
+
+ _EIGEN_DECLARE_CONST_Packet4d(exp_hi, 709.437);
+ _EIGEN_DECLARE_CONST_Packet4d(exp_lo, -709.436139303);
+
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_LOG2EF, 1.4426950408889634073599);
+
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p0, 1.26177193074810590878e-4);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p1, 3.02994407707441961300e-2);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p2, 9.99999999999999999910e-1);
+
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q0, 3.00198505138664455042e-6);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q1, 2.52448340349684104192e-3);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q2, 2.27265548208155028766e-1);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q3, 2.00000000000000000009e0);
+
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C1, 0.693145751953125);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C2, 1.42860682030941723212e-6);
+ _EIGEN_DECLARE_CONST_Packet4i(1023, 1023);
+
+ Packet4d tmp, fx;
+
+ // clamp x
+ x = pmax(pmin(x, p4d_exp_hi), p4d_exp_lo);
+ // Express exp(x) as exp(g + n*log(2)).
+ fx = pmadd(p4d_cephes_LOG2EF, x, p4d_half);
+
+ // Get the integer modulus of log(2), i.e. the "n" described above.
+ fx = _mm256_floor_pd(fx);
+
+ // Get the remainder modulo log(2), i.e. the "g" described above. Subtract
+ // n*log(2) out in two steps, i.e. n*C1 + n*C2, C1+C2=log2 to get the last
+ // digits right.
+ tmp = pmul(fx, p4d_cephes_exp_C1);
+ Packet4d z = pmul(fx, p4d_cephes_exp_C2);
+ x = psub(x, tmp);
+ x = psub(x, z);
+
+ Packet4d x2 = pmul(x, x);
+
+ // Evaluate the numerator polynomial of the rational interpolant.
+ Packet4d px = p4d_cephes_exp_p0;
+ px = pmadd(px, x2, p4d_cephes_exp_p1);
+ px = pmadd(px, x2, p4d_cephes_exp_p2);
+ px = pmul(px, x);
+
+ // Evaluate the denominator polynomial of the rational interpolant.
+ Packet4d qx = p4d_cephes_exp_q0;
+ qx = pmadd(qx, x2, p4d_cephes_exp_q1);
+ qx = pmadd(qx, x2, p4d_cephes_exp_q2);
+ qx = pmadd(qx, x2, p4d_cephes_exp_q3);
+
+ // I don't really get this bit, copied from the SSE2 routines, so...
+ // TODO(gonnet): Figure out what is going on here, perhaps find a better
+ // rational interpolant?
+ x = _mm256_div_pd(px, psub(qx, px));
+ x = pmadd(p4d_2, x, p4d_1);
+
+ // Build e=2^n by constructing the exponents in a 128-bit vector and
+ // shifting them to where they belong in double-precision values.
+ __m128i emm0 = _mm256_cvtpd_epi32(fx);
+ emm0 = _mm_add_epi32(emm0, p4i_1023);
+ emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(3, 1, 2, 0));
+ __m128i lo = _mm_slli_epi64(emm0, 52);
+ __m128i hi = _mm_slli_epi64(_mm_srli_epi64(emm0, 32), 52);
+ __m256i e = _mm256_insertf128_si256(_mm256_setzero_si256(), lo, 0);
+ e = _mm256_insertf128_si256(e, hi, 1);
+
+ // Construct the result 2^n * exp(g) = e * x. The max is used to catch
+ // non-finite values in the input.
+ return pmax(pmul(x, Packet4d(e)), _x);
+}
+
+// Functions for sqrt.
+// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
+// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
+// exact solution. The main advantage of this approach is not just speed, but
+// also the fact that it can be inlined and pipelined with other computations,
+// further reducing its effective latency.
+#if EIGEN_FAST_MATH
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+psqrt<Packet8f>(const Packet8f& _x) {
+ _EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f);
+ _EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f);
+ _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(flt_min, 0x00800000);
+
+ Packet8f neg_half = pmul(_x, p8f_minus_half);
+
+ // select only the inverse sqrt of positive normal inputs (denormals are
+ // flushed to zero and cause infs as well).
+ Packet8f non_zero_mask = _mm256_cmp_ps(_x, p8f_flt_min, _CMP_GE_OQ);
+ Packet8f x = _mm256_and_ps(non_zero_mask, _mm256_rsqrt_ps(_x));
+
+ // Do a single step of Newton's iteration.
+ x = pmul(x, pmadd(neg_half, pmul(x, x), p8f_one_point_five));
+
+ // Multiply the original _x by it's reciprocal square root to extract the
+ // square root.
+ return pmul(_x, x);
+}
+#else
+template <>
+EIGEN_STRONG_INLINE Packet8f psqrt<Packet8f>(const Packet8f& x) {
+ return _mm256_sqrt_ps(x);
+}
+#endif
+template <>
+EIGEN_STRONG_INLINE Packet4d psqrt<Packet4d>(const Packet4d& x) {
+ return _mm256_sqrt_pd(x);
+}
+
+// Functions for rsqrt.
+// Almost identical to the sqrt routine, just leave out the last multiplication
+// and fill in NaN/Inf where needed. Note that this function only exists as an
+// iterative version since there is no instruction for diretly computing the
+// reciprocal square root in AVX/AVX2 (there will be one in AVX-512).
+#ifdef EIGEN_FAST_MATH
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+prsqrt<Packet8f>(const Packet8f& _x) {
+ _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000);
+ _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(nan, 0x7fc00000);
+ _EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f);
+ _EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f);
+ _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(flt_min, 0x00800000);
+
+ Packet8f neg_half = pmul(_x, p8f_minus_half);
+
+ // select only the inverse sqrt of positive normal inputs (denormals are
+ // flushed to zero and cause infs as well).
+ Packet8f le_zero_mask = _mm256_cmp_ps(_x, p8f_flt_min, _CMP_LT_OQ);
+ Packet8f x = _mm256_andnot_ps(le_zero_mask, _mm256_rsqrt_ps(_x));
+
+ // Fill in NaNs and Infs for the negative/zero entries.
+ Packet8f neg_mask = _mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_LT_OQ);
+ Packet8f zero_mask = _mm256_andnot_ps(neg_mask, le_zero_mask);
+ Packet8f infs_and_nans = _mm256_or_ps(_mm256_and_ps(neg_mask, p8f_nan),
+ _mm256_and_ps(zero_mask, p8f_inf));
+
+ // Do a single step of Newton's iteration.
+ x = pmul(x, pmadd(neg_half, pmul(x, x), p8f_one_point_five));
+
+ // Insert NaNs and Infs in all the right places.
+ return _mm256_or_ps(x, infs_and_nans);
+}
+#else
+template <>
+EIGEN_STRONG_INLINE Packet8f prsqrt<Packet8f>(const Packet8f& x) {
+ _EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
+ return _mm256_div_ps(p8f_one, _mm256_sqrt_ps(x));
+}
+#endif
+template <>
+EIGEN_STRONG_INLINE Packet4d prsqrt<Packet4d>(const Packet4d& x) {
+ _EIGEN_DECLARE_CONST_Packet4d(one, 1.0);
+ return _mm256_div_pd(p4d_one, _mm256_sqrt_pd(x));
+}
+
+// Functions for division.
+// The EIGEN_FAST_MATH version uses the _mm_rcp_ps approximation and one step of
+// Newton's method, at a cost of 1-2 bits of precision as opposed to the exact
+// solution. The main advantage of this approach is not just speed, but also the
+// fact that it can be inlined and pipelined with other computations, further
+// reducing its effective latency.
+#if EIGEN_FAST_DIV
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+pdiv<Packet8f>(const Packet8f& a, const Packet8f& b) {
+ _EIGEN_DECLARE_CONST_Packet8f(two, 2.0f);
+ _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000);
+
+ Packet8f neg_b = pnegate(b);
+
+ /* select only the inverse of non-zero b */
+ Packet8f non_zero_mask = _mm256_cmp_ps(b, _mm256_setzero_ps(), _CMP_NEQ_OQ);
+ Packet8f x = _mm256_and_ps(non_zero_mask, _mm256_rcp_ps(b));
+
+ /* One step of Newton's method on b - x^-1 == 0. */
+ x = pmul(x, pmadd(neg_b, x, p8f_two));
+
+ /* Return Infs wherever there were zeros. */
+ return pmul(a, _mm256_or_ps(_mm256_and_ps(non_zero_mask, x),
+ _mm256_andnot_ps(non_zero_mask, p8f_inf)));
+}
+#else
+template <>
+EIGEN_STRONG_INLINE Packet8f
+pdiv<Packet8f>(const Packet8f& a, const Packet8f& b) {
+ return _mm256_div_ps(a, b);
+}
+#endif
+template <>
+EIGEN_STRONG_INLINE Packet4d
+pdiv<Packet4d>(const Packet4d& a, const Packet4d& b) {
+ return _mm256_div_pd(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8i
+pdiv<Packet8i>(const Packet8i& /*a*/, const Packet8i& /*b*/) {
+ eigen_assert(false && "packet integer division are not supported by AVX");
+ return pset1<Packet8i>(0);
+}
+
+// Identical to the ptanh in GenericPacketMath.h, but for doubles use
+// a small/medium approximation threshold of 0.001.
+template<> EIGEN_STRONG_INLINE Packet4d ptanh_approx_threshold() {
+ return pset1<Packet4d>(0.001);
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATH_FUNCTIONS_AVX_H
diff --git a/third_party/eigen3/Eigen/src/Core/arch/AVX/PacketMath.h b/third_party/eigen3/Eigen/src/Core/arch/AVX/PacketMath.h
new file mode 100644
index 0000000000..6369a836ab
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -0,0 +1,650 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner (benoit.steiner.goog@gmail.com)
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PACKET_MATH_AVX_H
+#define EIGEN_PACKET_MATH_AVX_H
+
+namespace Eigen {
+
+namespace internal {
+
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
+#endif
+
+#ifdef __FMA__
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#endif
+#endif
+
+typedef __m256 Packet8f;
+typedef __m256i Packet8i;
+typedef __m256d Packet4d;
+
+template<> struct is_arithmetic<__m256> { enum { value = true }; };
+template<> struct is_arithmetic<__m256i> { enum { value = true }; };
+template<> struct is_arithmetic<__m256d> { enum { value = true }; };
+
+#define _EIGEN_DECLARE_CONST_Packet8f(NAME,X) \
+ const Packet8f p8f_##NAME = pset1<Packet8f>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(NAME,X) \
+ const Packet8f p8f_##NAME = (__m256)pset1<Packet8i>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet8i(NAME,X) \
+ const Packet8i p8i_##NAME = pset1<Packet8i>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet4d(NAME,X) \
+ const Packet4d p4d_##NAME = pset1<Packet4d>(X)
+
+
+template<> struct packet_traits<float> : default_packet_traits
+{
+ typedef Packet8f type;
+ typedef Packet4f half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=8,
+ HasHalfPacket = 1,
+
+ HasDiv = 1,
+ HasSin = 1,
+ HasCos = 0,
+ HasTanH = 1,
+ HasBlend = 1,
+ HasLog = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasSelect = 1,
+ HasEq = 1,
+ };
+ };
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef Packet4d type;
+ typedef Packet2d half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 4,
+ HasHalfPacket = 1,
+
+ HasDiv = 1,
+ HasBlend = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasSelect = 1,
+ HasEq = 1,
+ };
+};
+
+/* Proper support for integers is only provided by AVX2. In the meantime, we'll
+ use SSE instructions and packets to deal with integers.
+template<> struct packet_traits<int> : default_packet_traits
+{
+ typedef Packet8i type;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=8
+ };
+};
+*/
+
+template<> struct unpacket_traits<Packet8f> { typedef float type; typedef Packet4f half; enum {size=8}; };
+template<> struct unpacket_traits<Packet4d> { typedef double type; typedef Packet2d half; enum {size=4}; };
+template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8}; };
+
+template<> EIGEN_STRONG_INLINE Packet8f pset1<Packet8f>(const float& from) { return _mm256_set1_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { return _mm256_set1_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet8i pset1<Packet8i>(const int& from) { return _mm256_set1_epi32(from); }
+
+template<> EIGEN_STRONG_INLINE Packet8f pload1<Packet8f>(const float* from) { return _mm256_broadcast_ss(from); }
+template<> EIGEN_STRONG_INLINE Packet4d pload1<Packet4d>(const double* from) { return _mm256_broadcast_sd(from); }
+
+template<> EIGEN_STRONG_INLINE Packet8f plset<float>(const float& a) { return _mm256_add_ps(_mm256_set1_ps(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); }
+template<> EIGEN_STRONG_INLINE Packet4d plset<double>(const double& a) { return _mm256_add_pd(_mm256_set1_pd(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); }
+
+template<> EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_add_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d padd<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_add_pd(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet8f psub<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_sub_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d psub<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_sub_pd(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet8f ple<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_cmp_ps(a,b,_CMP_NGT_UQ); }
+template<> EIGEN_STRONG_INLINE Packet4d ple<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_cmp_pd(a,b,_CMP_NGT_UQ); }
+
+template<> EIGEN_STRONG_INLINE Packet8f plt<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_cmp_ps(a,b,_CMP_NGE_UQ); }
+template<> EIGEN_STRONG_INLINE Packet4d plt<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_cmp_pd(a,b,_CMP_NGE_UQ); }
+
+template<> EIGEN_STRONG_INLINE Packet8f peq<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_cmp_ps(a,b,_CMP_EQ_UQ); }
+template<> EIGEN_STRONG_INLINE Packet4d peq<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_cmp_pd(a,b,_CMP_EQ_UQ); }
+
+template<> EIGEN_STRONG_INLINE Packet8f pselect<Packet8f>(const Packet8f& a, const Packet8f& b, const Packet8f& false_mask) { return _mm256_blendv_ps(a,b,false_mask); }
+template<> EIGEN_STRONG_INLINE Packet4d pselect<Packet4d>(const Packet4d& a, const Packet4d& b, const Packet4d& false_mask) { return _mm256_blendv_pd(a,b,false_mask); }
+
+template<> EIGEN_STRONG_INLINE Packet8f pnegate(const Packet8f& a)
+{
+ return _mm256_sub_ps(_mm256_set1_ps(0.0),a);
+}
+template<> EIGEN_STRONG_INLINE Packet4d pnegate(const Packet4d& a)
+{
+ return _mm256_sub_pd(_mm256_set1_pd(0.0),a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet8f pconj(const Packet8f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4d pconj(const Packet4d& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet8i pconj(const Packet8i& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE Packet8f pmul<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_mul_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pmul<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_mul_pd(a,b); }
+
+#ifdef __FMA__
+template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
+#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
+ // clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
+ // and gcc stupidly generates a vfmadd132ps instruction,
+ // so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate
+ // the result of the product.
+ Packet8f res = c;
+ asm("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
+ return res;
+#else
+ return _mm256_fmadd_ps(a,b,c);
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
+#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
+ // see above
+ Packet4d res = c;
+ asm("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
+ return res;
+#else
+ return _mm256_fmadd_pd(a,b,c);
+#endif
+}
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet8f pmin<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_min_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_min_pd(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet8f pand<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_and_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pand<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_and_pd(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet8f por<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_or_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d por<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_or_pd(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet8f pxor<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_xor_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pxor<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_xor_pd(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet8f pandnot<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_andnot_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pandnot<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_andnot_pd(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet8f pload<Packet8f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet4d pload<Packet4d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet8i pload<Packet8i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256(reinterpret_cast<const __m256i*>(from)); }
+
+template<> EIGEN_STRONG_INLINE Packet8f ploadu<Packet8f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet4d ploadu<Packet4d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet8i ploadu<Packet8i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from)); }
+
+// Loads 4 floats from memory a returns the packet {a0, a0 a1, a1, a2, a2, a3, a3}
+template<> EIGEN_STRONG_INLINE Packet8f ploaddup<Packet8f>(const float* from)
+{
+ // TODO try to find a way to avoid the need of a temporary register
+// Packet8f tmp = _mm256_castps128_ps256(_mm_loadu_ps(from));
+// tmp = _mm256_insertf128_ps(tmp, _mm_movehl_ps(_mm256_castps256_ps128(tmp),_mm256_castps256_ps128(tmp)), 1);
+// return _mm256_unpacklo_ps(tmp,tmp);
+
+ // _mm256_insertf128_ps is very slow on Haswell, thus:
+ Packet8f tmp = _mm256_broadcast_ps((const __m128*)(const void*)from);
+ // mimic an "inplace" permutation of the lower 128bits using a blend
+ tmp = _mm256_blend_ps(tmp,_mm256_castps128_ps256(_mm_permute_ps( _mm256_castps256_ps128(tmp), _MM_SHUFFLE(1,0,1,0))), 15);
+ // then we can perform a consistent permutation on the global register to get everything in shape:
+ return _mm256_permute_ps(tmp, _MM_SHUFFLE(3,3,2,2));
+}
+// Loads 2 doubles from memory a returns the packet {a0, a0 a1, a1}
+template<> EIGEN_STRONG_INLINE Packet4d ploaddup<Packet4d>(const double* from)
+{
+ Packet4d tmp = _mm256_broadcast_pd((const __m128d*)(const void*)from);
+ return _mm256_permute_pd(tmp, 3<<2);
+}
+
+// Loads 2 floats from memory a returns the packet {a0, a0 a0, a0, a1, a1, a1, a1}
+template<> EIGEN_STRONG_INLINE Packet8f ploadquad<Packet8f>(const float* from)
+{
+ Packet8f tmp = _mm256_castps128_ps256(_mm_broadcast_ss(from));
+ return _mm256_insertf128_ps(tmp, _mm_broadcast_ss(from+1), 1);
+}
+
+template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
+
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
+
+// NOTE: leverage _mm256_i32gather_ps and _mm256_i32gather_pd if AVX2 instructions are available
+template<> EIGEN_DEVICE_FUNC inline Packet8f pgather<float, Packet8f>(const float* from, int stride)
+{
+#ifdef EIGEN_VECTORIZE_AVX2
+ return _mm256_i32gather_ps(from, _mm256_set1_epi32(stride), 4);
+#else
+ return _mm256_set_ps(from[7*stride], from[6*stride], from[5*stride], from[4*stride],
+ from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+#endif
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const double* from, int stride)
+{
+#ifdef EIGEN_VECTORIZE_AVX2
+ return _mm256_i32gather_pd(from, _mm_set1_epi32(stride), 8);
+#else
+ return _mm256_set_pd(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+#endif
+}
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, int stride)
+{
+ __m128 low = _mm256_extractf128_ps(from, 0);
+ to[stride*0] = _mm_cvtss_f32(low);
+ to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1));
+ to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 2));
+ to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3));
+
+ __m128 high = _mm256_extractf128_ps(from, 1);
+ to[stride*4] = _mm_cvtss_f32(high);
+ to[stride*5] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1));
+ to[stride*6] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 2));
+ to[stride*7] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, const Packet4d& from, int stride)
+{
+ __m128d low = _mm256_extractf128_pd(from, 0);
+ to[stride*0] = _mm_cvtsd_f64(low);
+ to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1));
+ __m128d high = _mm256_extractf128_pd(from, 1);
+ to[stride*2] = _mm_cvtsd_f64(high);
+ to[stride*3] = _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1));
+}
+
+template<> EIGEN_STRONG_INLINE void pstore1<Packet8f>(float* to, const float& a)
+{
+ Packet8f pa = pset1<Packet8f>(a);
+ pstore(to, pa);
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet4d>(double* to, const double& a)
+{
+ Packet4d pa = pset1<Packet4d>(a);
+ pstore(to, pa);
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
+{
+ Packet8i pa = pset1<Packet8i>(a);
+ pstore(to, pa);
+}
+
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+
+template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) {
+ return _mm_cvtss_f32(_mm256_castps256_ps128(a));
+}
+template<> EIGEN_STRONG_INLINE double pfirst<Packet4d>(const Packet4d& a) {
+ return _mm_cvtsd_f64(_mm256_castpd256_pd128(a));
+}
+template<> EIGEN_STRONG_INLINE int pfirst<Packet8i>(const Packet8i& a) {
+ return _mm_cvtsi128_si32(_mm256_castsi256_si128(a));
+}
+
+
+template<> EIGEN_STRONG_INLINE Packet8f preverse(const Packet8f& a)
+{
+ __m256 tmp = _mm256_shuffle_ps(a,a,0x1b);
+ return _mm256_permute2f128_ps(tmp, tmp, 1);
+}
+template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a)
+{
+ __m256d tmp = _mm256_shuffle_pd(a,a,5);
+ return _mm256_permute2f128_pd(tmp, tmp, 1);
+
+ __m256d swap_halves = _mm256_permute2f128_pd(a,a,1);
+ return _mm256_permute_pd(swap_halves,5);
+}
+
+// pabs should be ok
+template<> EIGEN_STRONG_INLINE Packet8f pabs(const Packet8f& a)
+{
+ const Packet8f mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
+ return _mm256_and_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4d pabs(const Packet4d& a)
+{
+ const Packet4d mask = _mm256_castsi256_pd(_mm256_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
+ return _mm256_and_pd(a,mask);
+}
+
+// preduxp should be ok
+// FIXME: why is this ok? why isn't the simply implementation working as expected?
+template<> EIGEN_STRONG_INLINE Packet8f preduxp<Packet8f>(const Packet8f* vecs)
+{
+ __m256 hsum1 = _mm256_hadd_ps(vecs[0], vecs[1]);
+ __m256 hsum2 = _mm256_hadd_ps(vecs[2], vecs[3]);
+ __m256 hsum3 = _mm256_hadd_ps(vecs[4], vecs[5]);
+ __m256 hsum4 = _mm256_hadd_ps(vecs[6], vecs[7]);
+
+ __m256 hsum5 = _mm256_hadd_ps(hsum1, hsum1);
+ __m256 hsum6 = _mm256_hadd_ps(hsum2, hsum2);
+ __m256 hsum7 = _mm256_hadd_ps(hsum3, hsum3);
+ __m256 hsum8 = _mm256_hadd_ps(hsum4, hsum4);
+
+ __m256 perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
+ __m256 perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
+ __m256 perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
+ __m256 perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
+
+ __m256 sum1 = _mm256_add_ps(perm1, hsum5);
+ __m256 sum2 = _mm256_add_ps(perm2, hsum6);
+ __m256 sum3 = _mm256_add_ps(perm3, hsum7);
+ __m256 sum4 = _mm256_add_ps(perm4, hsum8);
+
+ __m256 blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
+ __m256 blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
+
+ __m256 final = _mm256_blend_ps(blend1, blend2, 0xf0);
+ return final;
+}
+template<> EIGEN_STRONG_INLINE Packet4d preduxp<Packet4d>(const Packet4d* vecs)
+{
+ Packet4d tmp0, tmp1;
+
+ tmp0 = _mm256_hadd_pd(vecs[0], vecs[1]);
+ tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
+
+ tmp1 = _mm256_hadd_pd(vecs[2], vecs[3]);
+ tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
+
+ return _mm256_blend_pd(tmp0, tmp1, 0xC);
+}
+
+template<> EIGEN_STRONG_INLINE float predux<Packet8f>(const Packet8f& a)
+{
+ Packet8f tmp0 = _mm256_hadd_ps(a,_mm256_permute2f128_ps(a,a,1));
+ tmp0 = _mm256_hadd_ps(tmp0,tmp0);
+ return pfirst(_mm256_hadd_ps(tmp0, tmp0));
+}
+template<> EIGEN_STRONG_INLINE double predux<Packet4d>(const Packet4d& a)
+{
+ Packet4d tmp0 = _mm256_hadd_pd(a,_mm256_permute2f128_pd(a,a,1));
+ return pfirst(_mm256_hadd_pd(tmp0,tmp0));
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f predux4<Packet8f>(const Packet8f& a)
+{
+ return _mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1));
+}
+
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet8f>(const Packet8f& a)
+{
+ Packet8f tmp;
+ tmp = _mm256_mul_ps(a, _mm256_permute2f128_ps(a,a,1));
+ tmp = _mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
+ return pfirst(_mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet4d>(const Packet4d& a)
+{
+ Packet4d tmp;
+ tmp = _mm256_mul_pd(a, _mm256_permute2f128_pd(a,a,1));
+ return pfirst(_mm256_mul_pd(tmp, _mm256_shuffle_pd(tmp,tmp,1)));
+}
+
+template<> EIGEN_STRONG_INLINE float predux_min<Packet8f>(const Packet8f& a)
+{
+ Packet8f tmp = _mm256_min_ps(a, _mm256_permute2f128_ps(a,a,1));
+ tmp = _mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
+ return pfirst(_mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_min<Packet4d>(const Packet4d& a)
+{
+ Packet4d tmp = _mm256_min_pd(a, _mm256_permute2f128_pd(a,a,1));
+ return pfirst(_mm256_min_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
+}
+
+template<> EIGEN_STRONG_INLINE float predux_max<Packet8f>(const Packet8f& a)
+{
+ Packet8f tmp = _mm256_max_ps(a, _mm256_permute2f128_ps(a,a,1));
+ tmp = _mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
+ return pfirst(_mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
+}
+
+template<> EIGEN_STRONG_INLINE double predux_max<Packet4d>(const Packet4d& a)
+{
+ Packet4d tmp = _mm256_max_pd(a, _mm256_permute2f128_pd(a,a,1));
+ return pfirst(_mm256_max_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
+}
+
+
+template<int Offset>
+struct palign_impl<Offset,Packet8f>
+{
+ static EIGEN_STRONG_INLINE void run(Packet8f& first, const Packet8f& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm256_blend_ps(first, second, 1);
+ Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
+ first = _mm256_blend_ps(tmp, _mm256_permute2f128_ps (tmp, tmp, 1), 0x88);
+ }
+ else if (Offset==2)
+ {
+ first = _mm256_blend_ps(first, second, 3);
+ Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
+ first = _mm256_blend_ps(tmp, _mm256_permute2f128_ps (tmp, tmp, 1), 0xcc);
+ }
+ else if (Offset==3)
+ {
+ first = _mm256_blend_ps(first, second, 7);
+ Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
+ first = _mm256_blend_ps(tmp, _mm256_permute2f128_ps (tmp, tmp, 1), 0xee);
+ }
+ else if (Offset==4)
+ {
+ first = _mm256_blend_ps(first, second, 15);
+ Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(3,2,1,0));
+ first = _mm256_permute_ps(_mm256_permute2f128_ps (tmp, tmp, 1), _MM_SHUFFLE(3,2,1,0));
+ }
+ else if (Offset==5)
+ {
+ first = _mm256_blend_ps(first, second, 31);
+ first = _mm256_permute2f128_ps(first, first, 1);
+ Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
+ first = _mm256_permute2f128_ps(tmp, tmp, 1);
+ first = _mm256_blend_ps(tmp, first, 0x88);
+ }
+ else if (Offset==6)
+ {
+ first = _mm256_blend_ps(first, second, 63);
+ first = _mm256_permute2f128_ps(first, first, 1);
+ Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
+ first = _mm256_permute2f128_ps(tmp, tmp, 1);
+ first = _mm256_blend_ps(tmp, first, 0xcc);
+ }
+ else if (Offset==7)
+ {
+ first = _mm256_blend_ps(first, second, 127);
+ first = _mm256_permute2f128_ps(first, first, 1);
+ Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
+ first = _mm256_permute2f128_ps(tmp, tmp, 1);
+ first = _mm256_blend_ps(tmp, first, 0xee);
+ }
+ }
+};
+
+template<int Offset>
+struct palign_impl<Offset,Packet4d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4d& first, const Packet4d& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm256_blend_pd(first, second, 1);
+ __m256d tmp = _mm256_permute_pd(first, 5);
+ first = _mm256_permute2f128_pd(tmp, tmp, 1);
+ first = _mm256_blend_pd(tmp, first, 0xA);
+ }
+ else if (Offset==2)
+ {
+ first = _mm256_blend_pd(first, second, 3);
+ first = _mm256_permute2f128_pd(first, first, 1);
+ }
+ else if (Offset==3)
+ {
+ first = _mm256_blend_pd(first, second, 7);
+ __m256d tmp = _mm256_permute_pd(first, 5);
+ first = _mm256_permute2f128_pd(tmp, tmp, 1);
+ first = _mm256_blend_pd(tmp, first, 5);
+ }
+ }
+};
+
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet8f,8>& kernel) {
+ __m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
+ __m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
+ __m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
+ __m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
+ __m256 T4 = _mm256_unpacklo_ps(kernel.packet[4], kernel.packet[5]);
+ __m256 T5 = _mm256_unpackhi_ps(kernel.packet[4], kernel.packet[5]);
+ __m256 T6 = _mm256_unpacklo_ps(kernel.packet[6], kernel.packet[7]);
+ __m256 T7 = _mm256_unpackhi_ps(kernel.packet[6], kernel.packet[7]);
+ __m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
+ __m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
+ __m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
+ __m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
+ __m256 S4 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(1,0,1,0));
+ __m256 S5 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(3,2,3,2));
+ __m256 S6 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(1,0,1,0));
+ __m256 S7 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(3,2,3,2));
+ kernel.packet[0] = _mm256_permute2f128_ps(S0, S4, 0x20);
+ kernel.packet[1] = _mm256_permute2f128_ps(S1, S5, 0x20);
+ kernel.packet[2] = _mm256_permute2f128_ps(S2, S6, 0x20);
+ kernel.packet[3] = _mm256_permute2f128_ps(S3, S7, 0x20);
+ kernel.packet[4] = _mm256_permute2f128_ps(S0, S4, 0x31);
+ kernel.packet[5] = _mm256_permute2f128_ps(S1, S5, 0x31);
+ kernel.packet[6] = _mm256_permute2f128_ps(S2, S6, 0x31);
+ kernel.packet[7] = _mm256_permute2f128_ps(S3, S7, 0x31);
+}
+
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet8f,4>& kernel) {
+ __m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
+ __m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
+ __m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
+ __m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
+
+ __m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
+ __m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
+ __m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
+ __m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
+
+ kernel.packet[0] = _mm256_permute2f128_ps(S0, S1, 0x20);
+ kernel.packet[1] = _mm256_permute2f128_ps(S2, S3, 0x20);
+ kernel.packet[2] = _mm256_permute2f128_ps(S0, S1, 0x31);
+ kernel.packet[3] = _mm256_permute2f128_ps(S2, S3, 0x31);
+}
+
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4d,4>& kernel) {
+ __m256d T0 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 15);
+ __m256d T1 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 0);
+ __m256d T2 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 15);
+ __m256d T3 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 0);
+
+ kernel.packet[1] = _mm256_permute2f128_pd(T0, T2, 32);
+ kernel.packet[3] = _mm256_permute2f128_pd(T0, T2, 49);
+ kernel.packet[0] = _mm256_permute2f128_pd(T1, T3, 32);
+ kernel.packet[2] = _mm256_permute2f128_pd(T1, T3, 49);
+}
+
+template<> EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket) {
+ const __m256 zero = _mm256_setzero_ps();
+ const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m256 false_mask = _mm256_cmp_ps(select, zero, _CMP_EQ_UQ);
+ return _mm256_blendv_ps(thenPacket, elsePacket, false_mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket) {
+ const __m256d zero = _mm256_setzero_pd();
+ const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m256d false_mask = _mm256_cmp_pd(select, zero, _CMP_EQ_UQ);
+ return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
+}
+
+// Functions to print vectors of different types, makes debugging much easier.
+namespace{
+void print4f(char* name, __m128 val) {
+ float temp[4] __attribute__((aligned(32)));
+ _mm_store_ps(temp, val);
+ printf("%s: ", name);
+ for (int k = 0; k < 4; k++) printf("%.8e ", temp[k]);
+ printf("\n");
+}
+void print8f(char* name, __m256 val) {
+ float temp[8] __attribute__((aligned(32)));
+ _mm256_store_ps(temp, val);
+ printf("%s: ", name);
+ for (int k = 0; k < 8; k++) printf("%.8e ", temp[k]);
+ printf("\n");
+}
+void print4i(char* name, __m128i val) {
+ int temp[4] __attribute__((aligned(32)));
+ _mm_store_si128((__m128i*)temp, val);
+ printf("%s: ", name);
+ for (int k = 0; k < 4; k++) printf("%i ", temp[k]);
+ printf("\n");
+}
+void print8i(char* name, __m256i val) {
+ int temp[8] __attribute__((aligned(32)));
+ _mm256_store_si256((__m256i*)temp, val);
+ printf("%s: ", name);
+ for (int k = 0; k < 8; k++) printf("%i ", temp[k]);
+ printf("\n");
+}
+void print8b(char* name, __m256i val) {
+ int temp[8] __attribute__((aligned(32)));
+ _mm256_store_si256((__m256i*)temp, val);
+ printf("%s: ", name);
+ for (int k = 0; k < 8; k++) printf("0x%08x ", temp[k]);
+ printf("\n");
+}
+void print4d(char* name, __m256d val) {
+ double temp[4] __attribute__((aligned(32)));
+ _mm256_store_pd(temp, val);
+ printf("%s: ", name);
+ for (int k = 0; k < 4; k++) printf("%.16e ", temp[k]);
+ printf("\n");
+}
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_PACKET_MATH_AVX_H
diff --git a/third_party/eigen3/Eigen/src/Core/arch/AVX/TypeCasting.h b/third_party/eigen3/Eigen/src/Core/arch/AVX/TypeCasting.h
new file mode 100644
index 0000000000..83bfdc604b
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/arch/AVX/TypeCasting.h
@@ -0,0 +1,51 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TYPE_CASTING_AVX_H
+#define EIGEN_TYPE_CASTING_AVX_H
+
+namespace Eigen {
+
+namespace internal {
+
+// For now we use SSE to handle integers, so we can't use AVX instructions to cast
+// from int to float
+template <>
+struct type_casting_traits<float, int> {
+ enum {
+ VectorizedCast = 0,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 1
+ };
+};
+
+template <>
+struct type_casting_traits<int, float> {
+ enum {
+ VectorizedCast = 0,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 1
+ };
+};
+
+
+
+template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {
+ return _mm256_cvtps_epi32(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8i, Packet8f>(const Packet8i& a) {
+ return _mm256_cvtepi32_ps(a);
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TYPE_CASTING_AVX_H
diff --git a/third_party/eigen3/Eigen/src/Core/arch/AltiVec/Complex.h b/third_party/eigen3/Eigen/src/Core/arch/AltiVec/Complex.h
new file mode 100644
index 0000000000..57df9508b3
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/arch/AltiVec/Complex.h
@@ -0,0 +1,439 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COMPLEX32_ALTIVEC_H
+#define EIGEN_COMPLEX32_ALTIVEC_H
+
+
+namespace Eigen {
+
+namespace internal {
+
+static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
+#ifdef EIGEN_VECTORIZE_VSX
+#ifdef _BIG_ENDIAN
+static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
+static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 };
+#else
+static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 };
+static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
+#endif
+#endif // EIGEN_VECTORIZE_VSX
+
+//---------- float ----------
+struct Packet2cf
+{
+ EIGEN_STRONG_INLINE Packet2cf() {}
+ EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
+ Packet4f v;
+};
+
+template<> struct packet_traits<std::complex<float> > : default_packet_traits
+{
+ typedef Packet2cf type;
+ typedef Packet2cf half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 2,
+
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; typedef Packet2cf half; };
+
+template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
+{
+ Packet2cf res;
+ /* On AltiVec we cannot load 64-bit registers, so wa have to take care of alignment */
+ if((ptrdiff_t(&from) % 16) == 0)
+ res.v = pload<Packet4f>((const float *)&from);
+ else
+ res.v = ploadu<Packet4f>((const float *)&from);
+ res.v = vec_perm(res.v, res.v, p16uc_PSET64_HI);
+ return res;
+}
+
+template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, int stride)
+{
+ std::complex<float> EIGEN_ALIGN16 af[2];
+ af[0] = from[0*stride];
+ af[1] = from[1*stride];
+ return Packet2cf(vec_ld(0, (const float*)af));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, int stride)
+{
+ std::complex<float> EIGEN_ALIGN16 af[2];
+ vec_st(from.v, 0, (float*)af);
+ to[0*stride] = af[0];
+ to[1*stride] = af[1];
+}
+
+
+template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_add(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_sub(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf((Packet4f)vec_xor((Packet4ui)a.v, p4ui_CONJ_XOR)); }
+
+template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ Packet4f v1, v2;
+
+ // Permute and multiply the real parts of a and b
+ v1 = vec_perm(a.v, a.v, p16uc_PSET32_WODD);
+ // Get the imaginary parts of a
+ v2 = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN);
+ // multiply a_re * b
+ v1 = vec_madd(v1, b.v, p4f_ZERO);
+ // multiply a_im * b and get the conjugate result
+ v2 = vec_madd(v2, b.v, p4f_ZERO);
+ v2 = (Packet4f) vec_xor((Packet4ui)v2, p4ui_CONJ_XOR);
+ // permute back to a proper order
+ v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV);
+
+ return Packet2cf(vec_add(v1, v2));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_or(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_xor(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v, vec_nor(b.v,b.v))); }
+
+template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
+
+template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from)
+{
+ return pset1<Packet2cf>(*from);
+}
+
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
+
+#ifndef __VSX__
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { vec_dstt((float *)addr, DST_CTRL(2,2,32), DST_CHAN); }
+#endif
+
+template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
+{
+ std::complex<float> EIGEN_ALIGN16 res[2];
+ pstore((float *)&res, a.v);
+
+ return res[0];
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
+{
+ Packet4f rev_a;
+ rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX32_REV2);
+ return Packet2cf(rev_a);
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
+{
+ Packet4f b;
+ b = (Packet4f) vec_sld(a.v, a.v, 8);
+ b = padd(a.v, b);
+ return pfirst(Packet2cf(b));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
+{
+ Packet4f b1, b2;
+#ifdef _BIG_ENDIAN
+ b1 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8);
+ b2 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8);
+#else
+ b1 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8);
+ b2 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8);
+#endif
+ b2 = (Packet4f) vec_sld(b2, b2, 8);
+ b2 = padd(b1, b2);
+
+ return Packet2cf(b2);
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
+{
+ Packet4f b;
+ Packet2cf prod;
+ b = (Packet4f) vec_sld(a.v, a.v, 8);
+ prod = pmul(a, Packet2cf(b));
+
+ return pfirst(prod);
+}
+
+template<int Offset>
+struct palign_impl<Offset,Packet2cf>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
+ {
+ if (Offset==1)
+ {
+#ifdef _BIG_ENDIAN
+ first.v = vec_sld(first.v, second.v, 8);
+#else
+ first.v = vec_sld(second.v, first.v, 8);
+#endif
+ }
+ }
+};
+
+template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+
+template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+
+template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+
+template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ // TODO optimize it for AltiVec
+ Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
+ Packet4f s = vec_madd(b.v, b.v, p4f_ZERO);
+ return Packet2cf(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX32_REV))));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)
+{
+ return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX32_REV));
+}
+
+template<> EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
+{
+ Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
+ kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
+ kernel.packet[0].v = tmp;
+}
+
+//---------- double ----------
+#if defined(EIGEN_VECTORIZE_VSX)
+struct Packet1cd
+{
+ EIGEN_STRONG_INLINE Packet1cd() {}
+ EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
+ Packet2d v;
+};
+
+template<> struct packet_traits<std::complex<double> > : default_packet_traits
+{
+ typedef Packet1cd type;
+ typedef Packet1cd half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 0,
+ size = 1,
+ HasHalfPacket = 0,
+
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; typedef Packet1cd half; };
+
+template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
+{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
+
+// Google-local: Change type from DenseIndex to int in patch.
+template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, int/*DenseIndex*/ stride)
+{
+ std::complex<double> EIGEN_ALIGN16 af[2];
+ af[0] = from[0*stride];
+ af[1] = from[1*stride];
+ return pload<Packet1cd>(af);
+}
+// Google-local: Change type from DenseIndex to int in patch.
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, int/*DenseIndex*/ stride)
+{
+ std::complex<double> EIGEN_ALIGN16 af[2];
+ pstore<std::complex<double> >(af, from);
+ to[0*stride] = af[0];
+ to[1*stride] = af[1];
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_add(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_sub(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
+template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ Packet2d a_re, a_im, v1, v2;
+
+ // Permute and multiply the real parts of a and b
+ a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI);
+ // Get the imaginary parts of a
+ a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO);
+ // multiply a_re * b
+ v1 = vec_madd(a_re, b.v, p2d_ZERO);
+ // multiply a_im * b and get the conjugate result
+ v2 = vec_madd(a_im, b.v, p2d_ZERO);
+ v2 = (Packet2d) vec_sld((Packet4ui)v2, (Packet4ui)v2, 8);
+ v2 = (Packet2d) vec_xor((Packet2d)v2, (Packet2d) p2ul_CONJ_XOR1);
+
+ return Packet1cd(vec_add(v1, v2));
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from)
+{
+ return pset1<Packet1cd>(*from);
+}
+
+#ifndef __VSX__
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { vec_dstt((long *)addr, DST_CTRL(2,2,32), DST_CHAN); }
+#endif
+
+template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
+{
+ std::complex<double> EIGEN_ALIGN16 res[2];
+ pstore<std::complex<double> >(res, a);
+
+ return res[0];
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
+{
+ return pfirst(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
+{
+ return vecs[0];
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
+{
+ return pfirst(a);
+}
+
+template<int Offset>
+struct palign_impl<Offset,Packet1cd>
+{
+ static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
+ {
+ // FIXME is it sure we never have to align a Packet1cd?
+ // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
+ }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+
+template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ // TODO optimize it for AltiVec
+ Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
+ Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_);
+ return Packet1cd(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_REVERSE64))));
+}
+
+EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
+{
+ return Packet1cd(preverse(Packet2d(x.v)));
+}
+
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
+{
+ Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
+ kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
+ kernel.packet[0].v = tmp;
+}
+#endif // EIGEN_VECTORIZE_VSX
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_COMPLEX32_ALTIVEC_H
diff --git a/third_party/eigen3/Eigen/src/Core/arch/AltiVec/MathFunctions.h b/third_party/eigen3/Eigen/src/Core/arch/AltiVec/MathFunctions.h
new file mode 100644
index 0000000000..e3545b4abc
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/arch/AltiVec/MathFunctions.h
@@ -0,0 +1,299 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2007 Julien Pommier
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/* The sin, cos, exp, and log functions of this file come from
+ * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
+ */
+
+#ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
+#define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
+
+#include <iostream>
+
+#define DUMP(v) do { std::cout << #v " = " << (v) << std::endl; } while(0)
+
+namespace Eigen {
+
+namespace internal {
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f plog<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+ _EIGEN_DECLARE_CONST_Packet4i(23, 23);
+
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
+
+ /* the smallest non denormalized float number */
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000); // -1.f/0.f
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_nan, 0xffffffff);
+
+ /* natural logarithm computed for 4 simultaneous float
+ return NaN for x <= 0
+ */
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
+
+
+ Packet4i emm0;
+
+ /* isvalid_mask is 0 if x < 0 or x is NaN. */
+ Packet4ui isvalid_mask = reinterpret_cast<Packet4ui>(vec_cmpge(x, p4f_ZERO));
+ Packet4ui iszero_mask = reinterpret_cast<Packet4ui>(vec_cmpeq(x, p4f_ZERO));
+
+ x = pmax(x, p4f_min_norm_pos); /* cut off denormalized stuff */
+ emm0 = vec_sr(reinterpret_cast<Packet4i>(x),
+ reinterpret_cast<Packet4ui>(p4i_23));
+
+ /* keep only the fractional part */
+ x = pand(x, p4f_inv_mant_mask);
+ x = por(x, p4f_half);
+
+ emm0 = psub(emm0, p4i_0x7f);
+ Packet4f e = padd(vec_ctf(emm0, 0), p4f_1);
+
+ /* part2:
+ if( x < SQRTHF ) {
+ e -= 1;
+ x = x + x - 1.0;
+ } else { x = x - 1.0; }
+ */
+ Packet4f mask = reinterpret_cast<Packet4f>(vec_cmplt(x, p4f_cephes_SQRTHF));
+ Packet4f tmp = pand(x, mask);
+ x = psub(x, p4f_1);
+ e = psub(e, pand(p4f_1, mask));
+ x = padd(x, tmp);
+
+ Packet4f x2 = pmul(x,x);
+ Packet4f x3 = pmul(x2,x);
+
+ Packet4f y, y1, y2;
+ y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
+ y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
+ y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
+ y = pmadd(y , x, p4f_cephes_log_p2);
+ y1 = pmadd(y1, x, p4f_cephes_log_p5);
+ y2 = pmadd(y2, x, p4f_cephes_log_p8);
+ y = pmadd(y, x3, y1);
+ y = pmadd(y, x3, y2);
+ y = pmul(y, x3);
+
+ y1 = pmul(e, p4f_cephes_log_q1);
+ tmp = pmul(x2, p4f_half);
+ y = padd(y, y1);
+ x = psub(x, tmp);
+ y2 = pmul(e, p4f_cephes_log_q2);
+ x = padd(x, y);
+ x = padd(x, y2);
+ // negative arg will be NAN, 0 will be -INF
+ x = vec_sel(x, p4f_minus_inf, iszero_mask);
+ x = vec_sel(p4f_minus_nan, x, isvalid_mask);
+ return x;
+}
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pexp<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+ _EIGEN_DECLARE_CONST_Packet4i(23, 23);
+
+
+ _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
+ _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
+
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
+
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
+
+ Packet4f tmp, fx;
+ Packet4i emm0;
+
+ // clamp x
+ x = vec_max(vec_min(x, p4f_exp_hi), p4f_exp_lo);
+
+ /* express exp(x) as exp(g + n*log(2)) */
+ fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);
+
+ fx = vec_floor(fx);
+
+ tmp = pmul(fx, p4f_cephes_exp_C1);
+ Packet4f z = pmul(fx, p4f_cephes_exp_C2);
+ x = psub(x, tmp);
+ x = psub(x, z);
+
+ z = pmul(x,x);
+
+ Packet4f y = p4f_cephes_exp_p0;
+ y = pmadd(y, x, p4f_cephes_exp_p1);
+ y = pmadd(y, x, p4f_cephes_exp_p2);
+ y = pmadd(y, x, p4f_cephes_exp_p3);
+ y = pmadd(y, x, p4f_cephes_exp_p4);
+ y = pmadd(y, x, p4f_cephes_exp_p5);
+ y = pmadd(y, z, x);
+ y = padd(y, p4f_1);
+
+ // build 2^n
+ emm0 = vec_cts(fx, 0);
+ emm0 = vec_add(emm0, p4i_0x7f);
+ emm0 = vec_sl(emm0, reinterpret_cast<Packet4ui>(p4i_23));
+
+ // Altivec's max & min operators just drop silent NaNs. Check NaNs in
+ // inputs and return them unmodified.
+ Packet4ui isnumber_mask = reinterpret_cast<Packet4ui>(vec_cmpeq(_x, _x));
+ return vec_sel(_x, pmax(pmul(y, reinterpret_cast<Packet4f>(emm0)), _x),
+ isnumber_mask);
+}
+
+#ifdef __VSX__
+
+#undef GCC_VERSION
+#define GCC_VERSION (__GNUC__ * 10000 \
+ + __GNUC_MINOR__ * 100 \
+ + __GNUC_PATCHLEVEL__)
+
+// VSX support varies between different compilers and even different
+// versions of the same compiler. For gcc version >= 4.9.3, we can use
+// vec_cts to efficiently convert Packet2d to Packet2l. Otherwise, use
+// a slow version that works with older compilers.
+static inline Packet2l ConvertToPacket2l(const Packet2d& x) {
+#if GCC_VERSION >= 40903 || defined(__clang__)
+ return vec_cts(x, 0);
+#else
+ double tmp[2];
+ memcpy(tmp, &x, sizeof(tmp));
+ Packet2l l = { static_cast<long long>(tmp[0]),
+ static_cast<long long>(tmp[1]) };
+ return l;
+#endif
+}
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d pexp<Packet2d>(const Packet2d& _x)
+{
+ Packet2d x = _x;
+
+ _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
+ _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
+ _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
+
+ _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
+ _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
+
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
+
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
+
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
+
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
+
+ Packet2d tmp, fx;
+ Packet2l emm0;
+
+ // clamp x
+ x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
+ /* express exp(x) as exp(g + n*log(2)) */
+ fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);
+
+ fx = vec_floor(fx);
+
+ tmp = pmul(fx, p2d_cephes_exp_C1);
+ Packet2d z = pmul(fx, p2d_cephes_exp_C2);
+ x = psub(x, tmp);
+ x = psub(x, z);
+
+ Packet2d x2 = pmul(x,x);
+
+ Packet2d px = p2d_cephes_exp_p0;
+ px = pmadd(px, x2, p2d_cephes_exp_p1);
+ px = pmadd(px, x2, p2d_cephes_exp_p2);
+ px = pmul (px, x);
+
+ Packet2d qx = p2d_cephes_exp_q0;
+ qx = pmadd(qx, x2, p2d_cephes_exp_q1);
+ qx = pmadd(qx, x2, p2d_cephes_exp_q2);
+ qx = pmadd(qx, x2, p2d_cephes_exp_q3);
+
+ x = pdiv(px,psub(qx,px));
+ x = pmadd(p2d_2,x,p2d_1);
+
+ // build 2^n
+ emm0 = ConvertToPacket2l(fx);
+
+#ifdef __POWER8_VECTOR__
+ static const Packet2l p2l_1023 = { 1023, 1023 };
+ static const Packet2ul p2ul_52 = { 52, 52 };
+
+ emm0 = vec_add(emm0, p2l_1023);
+ emm0 = vec_sl(emm0, p2ul_52);
+#else
+ // Code is a bit complex for POWER7. There is actually a
+ // vec_xxsldi intrinsic but it is not supported by some gcc versions.
+ // So we shift (52-32) bits and do a word swap with zeros.
+ _EIGEN_DECLARE_CONST_Packet4i(1023, 1023);
+ _EIGEN_DECLARE_CONST_Packet4i(20, 20); // 52 - 32
+
+ Packet4i emm04i = reinterpret_cast<Packet4i>(emm0);
+ emm04i = vec_add(emm04i, p4i_1023);
+ emm04i = vec_sl(emm04i, reinterpret_cast<Packet4ui>(p4i_20));
+ static const Packet16uc perm = {
+ 0x14, 0x15, 0x16, 0x17, 0x00, 0x01, 0x02, 0x03,
+ 0x1c, 0x1d, 0x1e, 0x1f, 0x08, 0x09, 0x0a, 0x0b };
+#ifdef _BIG_ENDIAN
+ emm0 = reinterpret_cast<Packet2l>(vec_perm(p4i_ZERO, emm04i, perm));
+#else
+ emm0 = reinterpret_cast<Packet2l>(vec_perm(emm04i, p4i_ZERO, perm));
+#endif
+
+#endif
+
+ // Altivec's max & min operators just drop silent NaNs. Check NaNs in
+ // inputs and return them unmodified.
+ Packet2ul isnumber_mask = reinterpret_cast<Packet2ul>(vec_cmpeq(_x, _x));
+ return vec_sel(_x, pmax(pmul(x, reinterpret_cast<Packet2d>(emm0)), _x),
+ isnumber_mask);
+}
+#endif
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATH_FUNCTIONS_ALTIVEC_H
diff --git a/third_party/eigen3/Eigen/src/Core/arch/AltiVec/PacketMath.h b/third_party/eigen3/Eigen/src/Core/arch/AltiVec/PacketMath.h
new file mode 100644
index 0000000000..640488e92b
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -0,0 +1,943 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Konstantinos Margaritis <markos@codex.gr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PACKET_MATH_ALTIVEC_H
+#define EIGEN_PACKET_MATH_ALTIVEC_H
+
+namespace Eigen {
+
+namespace internal {
+
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
+#endif
+
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#endif
+
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#endif
+
+// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
+#endif
+
+typedef __vector float Packet4f;
+typedef __vector int Packet4i;
+typedef __vector unsigned int Packet4ui;
+typedef __vector __bool int Packet4bi;
+typedef __vector short int Packet8i;
+typedef __vector unsigned char Packet16uc;
+
+// We don't want to write the same code all the time, but we need to reuse the constants
+// and it doesn't really work to declare them global, so we define macros instead
+
+#define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \
+ Packet4f p4f_##NAME = (Packet4f) vec_splat_s32(X)
+
+#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
+ Packet4i p4i_##NAME = vec_splat_s32(X)
+
+#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+ Packet4f p4f_##NAME = pset1<Packet4f>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+ Packet4i p4i_##NAME = pset1<Packet4i>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
+ Packet2d p2d_##NAME = pset1<Packet2d>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
+ Packet2l p2l_##NAME = pset1<Packet2l>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+ const Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(pset1<Packet4i>(X))
+
+#define DST_CHAN 1
+#define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride))
+
+// These constants are endian-agnostic
+static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0);
+static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0);
+#ifndef __VSX__
+static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1);
+static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0);
+#endif
+static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16);
+static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1);
+static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1);
+
+static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
+static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
+
+static Packet16uc p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 };
+static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
+
+// Mask alignment
+#ifdef __PPC64__
+#define _EIGEN_MASK_ALIGNMENT 0xfffffffffffffff0
+#else
+#define _EIGEN_MASK_ALIGNMENT 0xfffffff0
+#endif
+
+#define _EIGEN_ALIGNED_PTR(x) ((ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
+
+// Handle endianness properly while loading constants
+// Define global static constants:
+#ifdef _BIG_ENDIAN
+static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0);
+static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
+static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
+static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
+static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
+#else
+static Packet16uc p16uc_FORWARD = p16uc_REVERSE32;
+static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
+static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
+static Packet16uc p16uc_PSET32_WEVEN = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
+static Packet16uc p16uc_HALF64_0_16 = vec_sld(vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 0), (Packet16uc)p4i_ZERO, 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
+#endif // _BIG_ENDIAN
+
+static Packet16uc p16uc_PSET64_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
+static Packet16uc p16uc_PSET64_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
+static Packet16uc p16uc_TRANSPOSE64_HI = vec_add(p16uc_PSET64_HI, p16uc_HALF64_0_16); //{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
+static Packet16uc p16uc_TRANSPOSE64_LO = vec_add(p16uc_PSET64_LO, p16uc_HALF64_0_16); //{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};
+
+static Packet16uc p16uc_COMPLEX32_REV = vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8); //{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };
+
+#ifdef _BIG_ENDIAN
+static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8); //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
+#else
+static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_PSET64_HI, p16uc_PSET64_LO, 8); //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
+#endif // _BIG_ENDIAN
+
+template<> struct packet_traits<float> : default_packet_traits
+{
+ typedef Packet4f type;
+ typedef Packet4f half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+
+ // FIXME check the Has*
+#if defined(__VSX__)
+ HasDiv = 1,
+#endif
+ HasSin = 0,
+ HasCos = 0,
+ HasLog = 1,
+ HasExp = 1,
+ HasSqrt = 0
+ };
+};
+template<> struct packet_traits<int> : default_packet_traits
+{
+ typedef Packet4i type;
+ typedef Packet4i half;
+ enum {
+ // FIXME check the Has*
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4
+ };
+};
+
+
+template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; typedef Packet4f half; };
+template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; };
+
+inline std::ostream & operator <<(std::ostream & s, const Packet16uc & v)
+{
+ union {
+ Packet16uc v;
+ unsigned char n[16];
+ } vt;
+ vt.v = v;
+ for (int i=0; i< 16; i++)
+ s << (int)vt.n[i] << ", ";
+ return s;
+}
+
+inline std::ostream & operator <<(std::ostream & s, const Packet4f & v)
+{
+ union {
+ Packet4f v;
+ float n[4];
+ } vt;
+ vt.v = v;
+ s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3];
+ return s;
+}
+
+inline std::ostream & operator <<(std::ostream & s, const Packet4i & v)
+{
+ union {
+ Packet4i v;
+ int n[4];
+ } vt;
+ vt.v = v;
+ s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3];
+ return s;
+}
+
+inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v)
+{
+ union {
+ Packet4ui v;
+ unsigned int n[4];
+ } vt;
+ vt.v = v;
+ s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3];
+ return s;
+}
+/*
+inline std::ostream & operator <<(std::ostream & s, const Packetbi & v)
+{
+ union {
+ Packet4bi v;
+ unsigned int n[4];
+ } vt;
+ vt.v = v;
+ s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3];
+ return s;
+}*/
+
+
+// Need to define them first or we get specialization after instantiation errors
+template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
+template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
+
+template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); }
+template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
+ // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
+ float EIGEN_ALIGN16 af[4];
+ af[0] = from;
+ Packet4f vc = pload<Packet4f>(af);
+ vc = vec_splat(vc, 0);
+ return vc;
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
+ int EIGEN_ALIGN16 ai[4];
+ ai[0] = from;
+ Packet4i vc = pload<Packet4i>(ai);
+ vc = vec_splat(vc, 0);
+ return vc;
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4f>(const float *a,
+ Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
+{
+ a3 = pload<Packet4f>(a);
+ a0 = vec_splat(a3, 0);
+ a1 = vec_splat(a3, 1);
+ a2 = vec_splat(a3, 2);
+ a3 = vec_splat(a3, 3);
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4i>(const int *a,
+ Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3)
+{
+ a3 = pload<Packet4i>(a);
+ a0 = vec_splat(a3, 0);
+ a1 = vec_splat(a3, 1);
+ a2 = vec_splat(a3, 2);
+ a3 = vec_splat(a3, 3);
+}
+
+template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, int stride)
+{
+ float EIGEN_ALIGN16 af[4];
+ af[0] = from[0*stride];
+ af[1] = from[1*stride];
+ af[2] = from[2*stride];
+ af[3] = from[3*stride];
+ return pload<Packet4f>(af);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, int stride)
+{
+ int EIGEN_ALIGN16 ai[4];
+ ai[0] = from[0*stride];
+ ai[1] = from[1*stride];
+ ai[2] = from[2*stride];
+ ai[3] = from[3*stride];
+ return pload<Packet4i>(ai);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, int stride)
+{
+ float EIGEN_ALIGN16 af[4];
+ pstore<float>(af, from);
+ to[0*stride] = af[0];
+ to[1*stride] = af[1];
+ to[2*stride] = af[2];
+ to[3*stride] = af[3];
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, int stride)
+{
+ int EIGEN_ALIGN16 ai[4];
+ pstore<int>((int *)ai, from);
+ to[0*stride] = ai[0];
+ to[1*stride] = ai[1];
+ to[2*stride] = ai[2];
+ to[3*stride] = ai[3];
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return vec_add(pset1<Packet4f>(a), p4f_COUNTDOWN); }
+template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a) { return vec_add(pset1<Packet4i>(a), p4i_COUNTDOWN); }
+
+template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_add(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_add(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_sub(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_sub(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return psub<Packet4f>(p4f_ZERO, a); }
+template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return psub<Packet4i>(p4i_ZERO, a); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b,p4f_ZERO); }
+/* Commented out: it's actually slower than processing it scalar
+ *
+template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+ // Detailed in: http://freevec.org/content/32bit_signed_integer_multiplication_altivec
+ //Set up constants, variables
+ Packet4i a1, b1, bswap, low_prod, high_prod, prod, prod_, v1sel;
+
+ // Get the absolute values
+ a1 = vec_abs(a);
+ b1 = vec_abs(b);
+
+ // Get the signs using xor
+ Packet4bi sgn = (Packet4bi) vec_cmplt(vec_xor(a, b), p4i_ZERO);
+
+ // Do the multiplication for the asbolute values.
+ bswap = (Packet4i) vec_rl((Packet4ui) b1, (Packet4ui) p4i_MINUS16 );
+ low_prod = vec_mulo((Packet8i) a1, (Packet8i)b1);
+ high_prod = vec_msum((Packet8i) a1, (Packet8i) bswap, p4i_ZERO);
+ high_prod = (Packet4i) vec_sl((Packet4ui) high_prod, (Packet4ui) p4i_MINUS16);
+ prod = vec_add( low_prod, high_prod );
+
+ // NOR the product and select only the negative elements according to the sign mask
+ prod_ = vec_nor(prod, prod);
+ prod_ = vec_sel(p4i_ZERO, prod_, sgn);
+
+ // Add 1 to the result to get the negative numbers
+ v1sel = vec_sel(p4i_ZERO, p4i_ONE, sgn);
+ prod_ = vec_add(prod_, v1sel);
+
+ // Merge the results back to the final vector.
+ prod = vec_sel(prod, prod_, sgn);
+
+ return prod;
+}
+*/
+template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+#if !defined(__VSX__) // VSX actually provides a div instruction
+ Packet4f t, y_0, y_1;
+
+ // Altivec does not offer a divide instruction, we have to do a reciprocal approximation
+ y_0 = vec_re(b);
+
+ // Do one Newton-Raphson iteration to get the needed accuracy
+ t = vec_nmsub(y_0, b, p4f_ONE);
+ y_1 = vec_madd(y_0, t, y_0);
+
+ return vec_madd(a, y_1, p4f_ZERO);
+#else
+ return vec_div(a, b);
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
+{ eigen_assert(false && "packet integer division are not supported by AltiVec");
+ return pset1<Packet4i>(0);
+}
+
+// for some weird raisons, it has to be overloaded for packet of integers
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a, b, c); }
+template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_or(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_xor(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); }
+template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); }
+
+#ifdef _BIG_ENDIAN
+template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+ Packet16uc MSQ, LSQ;
+ Packet16uc mask;
+ MSQ = vec_ld(0, (unsigned char *)from); // most significant quadword
+ LSQ = vec_ld(15, (unsigned char *)from); // least significant quadword
+ mask = vec_lvsl(0, from); // create the permute mask
+ return (Packet4f) vec_perm(MSQ, LSQ, mask); // align the data
+
+}
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+ // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
+ Packet16uc MSQ, LSQ;
+ Packet16uc mask;
+ MSQ = vec_ld(0, (unsigned char *)from); // most significant quadword
+ LSQ = vec_ld(15, (unsigned char *)from); // least significant quadword
+ mask = vec_lvsl(0, from); // create the permute mask
+ return (Packet4i) vec_perm(MSQ, LSQ, mask); // align the data
+}
+#else
+// We also need ot redefine little endian loading of Packet4i/Packet4f using VSX
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+ return (Packet4i) vec_vsx_ld((long)from & 15, (const Packet4i*) _EIGEN_ALIGNED_PTR(from));
+}
+template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+ return (Packet4f) vec_vsx_ld((long)from & 15, (const Packet4f*) _EIGEN_ALIGNED_PTR(from));
+}
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
+{
+ Packet4f p;
+ if((ptrdiff_t(from) % 16) == 0) p = pload<Packet4f>(from);
+ else p = ploadu<Packet4f>(from);
+ return vec_perm(p, p, p16uc_DUPLICATE32_HI);
+}
+template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
+{
+ Packet4i p;
+ if((ptrdiff_t(from) % 16) == 0) p = pload<Packet4i>(from);
+ else p = ploadu<Packet4i>(from);
+ return vec_perm(p, p, p16uc_DUPLICATE32_HI);
+}
+
+#ifdef _BIG_ENDIAN
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from)
+{
+ EIGEN_DEBUG_UNALIGNED_STORE
+ // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
+ // Warning: not thread safe!
+ Packet16uc MSQ, LSQ, edges;
+ Packet16uc edgeAlign, align;
+
+ MSQ = vec_ld(0, (unsigned char *)to); // most significant quadword
+ LSQ = vec_ld(15, (unsigned char *)to); // least significant quadword
+ edgeAlign = vec_lvsl(0, to); // permute map to extract edges
+ edges=vec_perm(LSQ,MSQ,edgeAlign); // extract the edges
+ align = vec_lvsr( 0, to ); // permute map to misalign data
+ MSQ = vec_perm(edges,(Packet16uc)from,align); // misalign the data (MSQ)
+ LSQ = vec_perm((Packet16uc)from,edges,align); // misalign the data (LSQ)
+ vec_st( LSQ, 15, (unsigned char *)to ); // Store the LSQ part first
+ vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part
+}
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from)
+{
+ EIGEN_DEBUG_UNALIGNED_STORE
+ // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
+ // Warning: not thread safe!
+ Packet16uc MSQ, LSQ, edges;
+ Packet16uc edgeAlign, align;
+
+ MSQ = vec_ld(0, (unsigned char *)to); // most significant quadword
+ LSQ = vec_ld(15, (unsigned char *)to); // least significant quadword
+ edgeAlign = vec_lvsl(0, to); // permute map to extract edges
+ edges=vec_perm(LSQ, MSQ, edgeAlign); // extract the edges
+ align = vec_lvsr( 0, to ); // permute map to misalign data
+ MSQ = vec_perm(edges, (Packet16uc) from, align); // misalign the data (MSQ)
+ LSQ = vec_perm((Packet16uc) from, edges, align); // misalign the data (LSQ)
+ vec_st( LSQ, 15, (unsigned char *)to ); // Store the LSQ part first
+ vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part
+}
+#else
+// We also need to redefine little endian loading of Packet4i/Packet4f using VSX
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from)
+{
+ EIGEN_DEBUG_ALIGNED_STORE
+ vec_vsx_st(from, (long)to & 15, (Packet4i*) _EIGEN_ALIGNED_PTR(to));
+}
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from)
+{
+ EIGEN_DEBUG_ALIGNED_STORE
+ vec_vsx_st(from, (long)to & 15, (Packet4f*) _EIGEN_ALIGNED_PTR(to));
+}
+#endif
+
+#ifndef __VSX__
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); }
+#endif
+
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; }
+
+template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { return (Packet4f)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE32); }
+template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { return (Packet4i)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE32); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); }
+template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); }
+
+template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
+{
+ Packet4f b, sum;
+ b = (Packet4f) vec_sld(a, a, 8);
+ sum = vec_add(a, b);
+ b = (Packet4f) vec_sld(sum, sum, 4);
+ sum = vec_add(sum, b);
+ return pfirst(sum);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+ Packet4f v[4], sum[4];
+
+ // It's easier and faster to transpose then add as columns
+ // Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation
+ // Do the transpose, first set of moves
+ v[0] = vec_mergeh(vecs[0], vecs[2]);
+ v[1] = vec_mergel(vecs[0], vecs[2]);
+ v[2] = vec_mergeh(vecs[1], vecs[3]);
+ v[3] = vec_mergel(vecs[1], vecs[3]);
+ // Get the resulting vectors
+ sum[0] = vec_mergeh(v[0], v[2]);
+ sum[1] = vec_mergel(v[0], v[2]);
+ sum[2] = vec_mergeh(v[1], v[3]);
+ sum[3] = vec_mergel(v[1], v[3]);
+
+ // Now do the summation:
+ // Lines 0+1
+ sum[0] = vec_add(sum[0], sum[1]);
+ // Lines 2+3
+ sum[1] = vec_add(sum[2], sum[3]);
+ // Add the results
+ sum[0] = vec_add(sum[0], sum[1]);
+
+ return sum[0];
+}
+
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+ Packet4i sum;
+ sum = vec_sums(a, p4i_ZERO);
+#ifdef _BIG_ENDIAN
+ sum = vec_sld(sum, p4i_ZERO, 12);
+#else
+ sum = vec_sld(p4i_ZERO, sum, 4);
+#endif
+ return pfirst(sum);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+ Packet4i v[4], sum[4];
+
+ // It's easier and faster to transpose then add as columns
+ // Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation
+ // Do the transpose, first set of moves
+ v[0] = vec_mergeh(vecs[0], vecs[2]);
+ v[1] = vec_mergel(vecs[0], vecs[2]);
+ v[2] = vec_mergeh(vecs[1], vecs[3]);
+ v[3] = vec_mergel(vecs[1], vecs[3]);
+ // Get the resulting vectors
+ sum[0] = vec_mergeh(v[0], v[2]);
+ sum[1] = vec_mergel(v[0], v[2]);
+ sum[2] = vec_mergeh(v[1], v[3]);
+ sum[3] = vec_mergel(v[1], v[3]);
+
+ // Now do the summation:
+ // Lines 0+1
+ sum[0] = vec_add(sum[0], sum[1]);
+ // Lines 2+3
+ sum[1] = vec_add(sum[2], sum[3]);
+ // Add the results
+ sum[0] = vec_add(sum[0], sum[1]);
+
+ return sum[0];
+}
+
+// Other reduction functions:
+// mul
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
+{
+ Packet4f prod;
+ prod = pmul(a, (Packet4f)vec_sld(a, a, 8));
+ return pfirst(pmul(prod, (Packet4f)vec_sld(prod, prod, 4)));
+}
+
+template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
+{
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ return aux[0] * aux[1] * aux[2] * aux[3];
+}
+
+// min
+template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
+{
+ Packet4f b, res;
+ b = vec_min(a, vec_sld(a, a, 8));
+ res = vec_min(b, vec_sld(b, b, 4));
+ return pfirst(res);
+}
+
+template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
+{
+ Packet4i b, res;
+ b = vec_min(a, vec_sld(a, a, 8));
+ res = vec_min(b, vec_sld(b, b, 4));
+ return pfirst(res);
+}
+
+// max
+template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
+{
+ Packet4f b, res;
+ b = vec_max(a, vec_sld(a, a, 8));
+ res = vec_max(b, vec_sld(b, b, 4));
+ return pfirst(res);
+}
+
+template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
+{
+ Packet4i b, res;
+ b = vec_max(a, vec_sld(a, a, 8));
+ res = vec_max(b, vec_sld(b, b, 4));
+ return pfirst(res);
+}
+
+template<int Offset>
+struct palign_impl<Offset,Packet4f>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+ {
+#ifdef _BIG_ENDIAN
+ switch (Offset % 4) {
+ case 1:
+ first = vec_sld(first, second, 4); break;
+ case 2:
+ first = vec_sld(first, second, 8); break;
+ case 3:
+ first = vec_sld(first, second, 12); break;
+ }
+#else
+ switch (Offset % 4) {
+ case 1:
+ first = vec_sld(second, first, 12); break;
+ case 2:
+ first = vec_sld(second, first, 8); break;
+ case 3:
+ first = vec_sld(second, first, 4); break;
+ }
+#endif
+ }
+};
+
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+ {
+#ifdef _BIG_ENDIAN
+ switch (Offset % 4) {
+ case 1:
+ first = vec_sld(first, second, 4); break;
+ case 2:
+ first = vec_sld(first, second, 8); break;
+ case 3:
+ first = vec_sld(first, second, 12); break;
+ }
+#else
+ switch (Offset % 4) {
+ case 1:
+ first = vec_sld(second, first, 12); break;
+ case 2:
+ first = vec_sld(second, first, 8); break;
+ case 3:
+ first = vec_sld(second, first, 4); break;
+ }
+#endif
+ }
+};
+
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4f,4>& kernel) {
+ Packet4f t0, t1, t2, t3;
+ t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
+ t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
+ t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
+ t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
+ kernel.packet[0] = vec_mergeh(t0, t2);
+ kernel.packet[1] = vec_mergel(t0, t2);
+ kernel.packet[2] = vec_mergeh(t1, t3);
+ kernel.packet[3] = vec_mergel(t1, t3);
+}
+
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4i,4>& kernel) {
+ Packet4i t0, t1, t2, t3;
+ t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
+ t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
+ t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
+ t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
+ kernel.packet[0] = vec_mergeh(t0, t2);
+ kernel.packet[1] = vec_mergel(t0, t2);
+ kernel.packet[2] = vec_mergeh(t1, t3);
+ kernel.packet[3] = vec_mergel(t1, t3);
+}
+
+
+//---------- double ----------
+#if defined(__VSX__)
+typedef __vector double Packet2d;
+typedef __vector unsigned long long Packet2ul;
+typedef __vector long long Packet2l;
+
+static Packet2l p2l_ZERO = (Packet2l) p4i_ZERO;
+static Packet2d p2d_ONE = { 1.0, 1.0 };
+static Packet2d p2d_ZERO = (Packet2d) p4f_ZERO;
+static Packet2d p2d_ZERO_ = { -0.0, -0.0 };
+
+#ifdef _BIG_ENDIAN
+static Packet2d p2d_COUNTDOWN = (Packet2d) vec_sld((Packet16uc) p2d_ZERO, (Packet16uc) p2d_ONE, 8);
+#else
+static Packet2d p2d_COUNTDOWN = (Packet2d) vec_sld((Packet16uc) p2d_ONE, (Packet16uc) p2d_ZERO, 8);
+#endif
+
+static EIGEN_STRONG_INLINE Packet2d vec_splat_dbl(Packet2d& a, int index)
+{
+ switch (index) {
+ case 0:
+ return (Packet2d) vec_perm(a, a, p16uc_PSET64_HI);
+ case 1:
+ return (Packet2d) vec_perm(a, a, p16uc_PSET64_LO);
+ }
+ return a;
+}
+
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef Packet2d type;
+ typedef Packet2d half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=2,
+ HasHalfPacket = 0,
+
+ HasDiv = 1,
+ HasExp = 1,
+ HasSqrt = 0
+ };
+};
+
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; typedef Packet2d half; };
+
+
+inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
+{
+ union {
+ Packet2d v;
+ double n[2];
+ } vt;
+ vt.v = v;
+ s << vt.n[0] << ", " << vt.n[1];
+ return s;
+}
+
+// Need to define them first or we get specialization after instantiation errors
+template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return (Packet2d) vec_ld(0, (const float *) from); } //FIXME
+
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st((Packet4f)from, 0, (float *)to); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
+ double EIGEN_ALIGN16 af[2];
+ af[0] = from;
+ Packet2d vc = pload<Packet2d>(af);
+ vc = vec_splat_dbl(vc, 0);
+ return vc;
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet2d>(const double *a,
+ Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
+{
+ a1 = pload<Packet2d>(a);
+ a0 = vec_splat_dbl(a1, 0);
+ a1 = vec_splat_dbl(a1, 1);
+ a3 = pload<Packet2d>(a+2);
+ a2 = vec_splat_dbl(a3, 0);
+ a3 = vec_splat_dbl(a3, 1);
+}
+// Google-local: Change type from DenseIndex to int in patch.
+template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, int/*DenseIndex*/ stride)
+{
+ double EIGEN_ALIGN16 af[2];
+ af[0] = from[0*stride];
+ af[1] = from[1*stride];
+ return pload<Packet2d>(af);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, /*DenseIndex*/int stride)
+{
+ double EIGEN_ALIGN16 af[2];
+ pstore<double>(af, from);
+ to[0*stride] = af[0];
+ to[1*stride] = af[1];
+}
+template<> EIGEN_STRONG_INLINE Packet2d plset<double>(const double& a) { return vec_add(pset1<Packet2d>(a), p2d_COUNTDOWN); }
+
+template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_add(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_sub(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return psub<Packet2d>(p2d_ZERO, a); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_madd(a,b,p2d_ZERO); }
+template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_div(a,b); }
+
+// for some weird raisons, it has to be overloaded for packet of integers
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); }
+
+template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+ return (Packet2d) vec_vsx_ld((long)from & 15, (const Packet2d*) _EIGEN_ALIGNED_PTR(from));
+}
+template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
+{
+ Packet2d p;
+ if((ptrdiff_t(from) % 16) == 0) p = pload<Packet2d>(from);
+ else p = ploadu<Packet2d>(from);
+ return vec_perm(p, p, p16uc_PSET64_HI);
+}
+
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from)
+{
+ EIGEN_DEBUG_ALIGNED_STORE
+ vec_vsx_st((Packet4f)from, (long)to & 15, (Packet4f*) _EIGEN_ALIGNED_PTR(to));
+}
+
+#ifndef __VSX__
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { vec_dstt((const float *) addr, DST_CTRL(2,2,32), DST_CHAN); }
+#endif
+
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore(x, a); return x[0]; }
+
+template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return (Packet2d)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE64); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vec_abs(a); }
+
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
+{
+ Packet2d b, sum;
+ b = (Packet2d) vec_sld((Packet4ui) a, (Packet4ui)a, 8);
+ sum = vec_add(a, b);
+ return pfirst(sum);
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ Packet2d v[2], sum;
+ v[0] = vec_add(vecs[0], (Packet2d) vec_sld((Packet4ui) vecs[0], (Packet4ui) vecs[0], 8));
+ v[1] = vec_add(vecs[1], (Packet2d) vec_sld((Packet4ui) vecs[1], (Packet4ui) vecs[1], 8));
+
+#ifdef _BIG_ENDIAN
+ sum = (Packet2d) vec_sld((Packet4ui) v[0], (Packet4ui) v[1], 8);
+#else
+ sum = (Packet2d) vec_sld((Packet4ui) v[1], (Packet4ui) v[0], 8);
+#endif
+
+ return sum;
+}
+// Other reduction functions:
+// mul
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
+{
+ return pfirst(pmul(a, (Packet2d)vec_sld((Packet4ui) a, (Packet4ui) a, 8)));
+}
+
+// min
+template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
+{
+ return pfirst(vec_min(a, (Packet2d) vec_sld((Packet4ui) a, (Packet4ui) a, 8)));
+}
+
+// max
+template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
+{
+ return pfirst(vec_max(a, (Packet2d) vec_sld((Packet4ui) a, (Packet4ui) a, 8)));
+}
+
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+ {
+ if (Offset == 1)
+#ifdef _BIG_ENDIAN
+ first = (Packet2d) vec_sld((Packet4ui) first, (Packet4ui) second, 8);
+#else
+ first = (Packet2d) vec_sld((Packet4ui) second, (Packet4ui) first, 8);
+#endif
+ }
+};
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2d,2>& kernel) {
+ Packet2d t0, t1;
+ t0 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_HI);
+ t1 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_LO);
+ kernel.packet[0] = t0;
+ kernel.packet[1] = t1;
+}
+
+#endif // defined(__VSX__)
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_PACKET_MATH_ALTIVEC_H
+
diff --git a/third_party/eigen3/Eigen/src/Core/arch/CUDA/MathFunctions.h b/third_party/eigen3/Eigen/src/Core/arch/CUDA/MathFunctions.h
new file mode 100644
index 0000000000..675daae8f0
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/arch/CUDA/MathFunctions.h
@@ -0,0 +1,75 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATH_FUNCTIONS_CUDA_H
+#define EIGEN_MATH_FUNCTIONS_CUDA_H
+
+namespace Eigen {
+
+namespace internal {
+
+// Make sure this is only available when targeting a GPU: we don't want to
+// introduce conflicts between these packet_traits definitions and the ones
+// we'll use on the host side (SSE, AVX, ...)
+#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 plog<float4>(const float4& a)
+{
+ return make_float4(logf(a.x), logf(a.y), logf(a.z), logf(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 plog<double2>(const double2& a)
+{
+ return make_double2(log(a.x), log(a.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 pexp<float4>(const float4& a)
+{
+ return make_float4(expf(a.x), expf(a.y), expf(a.z), expf(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 pexp<double2>(const double2& a)
+{
+ return make_double2(exp(a.x), exp(a.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 psqrt<float4>(const float4& a)
+{
+ return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 psqrt<double2>(const double2& a)
+{
+ return make_double2(sqrt(a.x), sqrt(a.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 prsqrt<float4>(const float4& a)
+{
+ return make_float4(rsqrtf(a.x), rsqrtf(a.y), rsqrtf(a.z), rsqrtf(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 prsqrt<double2>(const double2& a)
+{
+ return make_double2(rsqrt(a.x), rsqrt(a.y));
+}
+
+#endif
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATH_FUNCTIONS_CUDA_H
diff --git a/third_party/eigen3/Eigen/src/Core/arch/CUDA/PacketMath.h b/third_party/eigen3/Eigen/src/Core/arch/CUDA/PacketMath.h
new file mode 100644
index 0000000000..d11f5ba411
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/arch/CUDA/PacketMath.h
@@ -0,0 +1,336 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PACKET_MATH_CUDA_H
+#define EIGEN_PACKET_MATH_CUDA_H
+
+namespace Eigen {
+
+namespace internal {
+// Make sure this is only available when targeting a GPU: we don't want to
+// introduce conflicts between these packet_traits definitions and the ones
+// we'll use on the host side (SSE, AVX, ...)
+#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
+template<> struct is_arithmetic<float4> { enum { value = true }; };
+template<> struct is_arithmetic<double2> { enum { value = true }; };
+
+
+template<> struct packet_traits<float> : default_packet_traits
+{
+ typedef float4 type;
+ typedef float4 half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasHalfPacket = 0,
+
+ HasDiv = 1,
+ HasSin = 0,
+ HasCos = 0,
+ HasLog = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+
+ HasBlend = 0,
+ HasSelect = 1,
+ HasEq = 1,
+ };
+};
+
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef double2 type;
+ typedef double2 half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=2,
+ HasHalfPacket = 0,
+
+ HasDiv = 1,
+ HasLog = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+
+ HasBlend = 0,
+ HasSelect = 1,
+ HasEq = 1,
+ };
+};
+
+
+template<> struct unpacket_traits<float4> { typedef float type; enum {size=4}; typedef float4 half; };
+template<> struct unpacket_traits<double2> { typedef double type; enum {size=2}; typedef double2 half; };
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pset1<float4>(const float& from) {
+ return make_float4(from, from, from, from);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const double& from) {
+ return make_double2(from, from);
+}
+
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float>(const float& a) {
+ return make_float4(a, a+1, a+2, a+3);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plset<double>(const double& a) {
+ return make_double2(a, a+1);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 padd<float4>(const float4& a, const float4& b) {
+ return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 padd<double2>(const double2& a, const double2& b) {
+ return make_double2(a.x+b.x, a.y+b.y);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 psub<float4>(const float4& a, const float4& b) {
+ return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 psub<double2>(const double2& a, const double2& b) {
+ return make_double2(a.x-b.x, a.y-b.y);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 peq<float4>(const float4& a, const float4& b) {
+ return make_float4(a.x == b.x ? 1.f : 0, a.y == b.y ? 1.f : 0, a.z == b.z ? 1.f : 0, a.w == b.w ? 1.f : 0);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 peq<double2>(const double2& a, const double2& b) {
+ return make_double2(a.x == b.x ? 1. : 0, a.y == b.y ? 1. : 0);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ple<float4>(const float4& a, const float4& b) {
+ return make_float4(a.x <= b.x ? 1.f : 0, a.y <= b.y ? 1.f : 0, a.z <= b.z ? 1.f : 0, a.w <= b.w ? 1.f : 0);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ple<double2>(const double2& a, const double2& b) {
+ return make_double2(a.x <= b.x ? 1. : 0, a.y <= b.y ? 1. : 0);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plt<float4>(const float4& a, const float4& b) {
+ return make_float4(a.x < b.x ? 1.f : 0, a.y < b.y ? 1.f : 0, a.z < b.z ? 1.f : 0, a.w < b.w ? 1.f : 0);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plt<double2>(const double2& a, const double2& b) {
+ return make_double2(a.x < b.x ? 1. : 0, a.y < b.y ? 1. : 0);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pselect<float4>(const float4& a, const float4& b, const float4& c) {
+ return make_float4(c.x ? b.x : a.x, c.y ? b.y : a.y, c.z ? b.z : a.z, c.w ? b.w : a.w);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pselect<double2>(const double2& a, const double2& b, const double2& c) {
+ return make_double2(c.x ? b.x : a.x, c.y ? b.y : a.y);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pnegate(const float4& a) {
+ return make_float4(-a.x, -a.y, -a.z, -a.w);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pnegate(const double2& a) {
+ return make_double2(-a.x, -a.y);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pconj(const float4& a) { return a; }
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pconj(const double2& a) { return a; }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmul<float4>(const float4& a, const float4& b) {
+ return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmul<double2>(const double2& a, const double2& b) {
+ return make_double2(a.x*b.x, a.y*b.y);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pdiv<float4>(const float4& a, const float4& b) {
+ return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pdiv<double2>(const double2& a, const double2& b) {
+ return make_double2(a.x/b.x, a.y/b.y);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmin<float4>(const float4& a, const float4& b) {
+ return make_float4(fminf(a.x, b.x), fminf(a.y, b.y), fminf(a.z, b.z), fminf(a.w, b.w));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmin<double2>(const double2& a, const double2& b) {
+ return make_double2(fmin(a.x, b.x), fmin(a.y, b.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmax<float4>(const float4& a, const float4& b) {
+ return make_float4(fmaxf(a.x, b.x), fmaxf(a.y, b.y), fmaxf(a.z, b.z), fmaxf(a.w, b.w));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmax<double2>(const double2& a, const double2& b) {
+ return make_double2(fmax(a.x, b.x), fmax(a.y, b.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pload<float4>(const float* from) {
+ return *reinterpret_cast<const float4*>(from);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pload<double2>(const double* from) {
+ return *reinterpret_cast<const double2*>(from);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploadu<float4>(const float* from) {
+ return make_float4(from[0], from[1], from[2], from[3]);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploadu<double2>(const double* from) {
+ return make_double2(from[0], from[1]);
+}
+
+template<> EIGEN_STRONG_INLINE float4 ploaddup<float4>(const float* from) {
+ return make_float4(from[0], from[0], from[1], from[1]);
+}
+template<> EIGEN_STRONG_INLINE double2 ploaddup<double2>(const double* from) {
+ return make_double2(from[0], from[0]);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<float>(float* to, const float4& from) {
+ *reinterpret_cast<float4*>(to) = from;
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<double>(double* to, const double2& from) {
+ *reinterpret_cast<double2*>(to) = from;
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const float4& from) {
+ to[0] = from.x;
+ to[1] = from.y;
+ to[2] = from.z;
+ to[3] = from.w;
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const double2& from) {
+ to[0] = from.x;
+ to[1] = from.y;
+}
+
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const float* from) {
+ return __ldg((const float4*)from);
+}
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const double* from) {
+ return __ldg((const double2*)from);
+}
+
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const float* from) {
+ return make_float4(__ldg(from+0), __ldg(from+1), __ldg(from+2), __ldg(from+3));
+}
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(const double* from) {
+ return make_double2(__ldg(from+0), __ldg(from+1));
+}
+#endif
+
+template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, int stride) {
+ return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
+}
+
+template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, int stride) {
+ return make_double2(from[0*stride], from[1*stride]);
+}
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, int stride) {
+ to[stride*0] = from.x;
+ to[stride*1] = from.y;
+ to[stride*2] = from.z;
+ to[stride*3] = from.w;
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, int stride) {
+ to[stride*0] = from.x;
+ to[stride*1] = from.y;
+}
+
+template<> EIGEN_DEVICE_FUNC inline float pfirst<float4>(const float4& a) {
+ return a.x;
+}
+template<> EIGEN_DEVICE_FUNC inline double pfirst<double2>(const double2& a) {
+ return a.x;
+}
+
+template<> EIGEN_DEVICE_FUNC inline float predux<float4>(const float4& a) {
+ return a.x + a.y + a.z + a.w;
+}
+template<> EIGEN_DEVICE_FUNC inline double predux<double2>(const double2& a) {
+ return a.x + a.y;
+}
+
+template<> EIGEN_DEVICE_FUNC inline float predux_max<float4>(const float4& a) {
+ return fmaxf(fmaxf(a.x, a.y), fmaxf(a.z, a.w));
+}
+template<> EIGEN_DEVICE_FUNC inline double predux_max<double2>(const double2& a) {
+ return fmax(a.x, a.y);
+}
+
+template<> EIGEN_DEVICE_FUNC inline float predux_min<float4>(const float4& a) {
+ return fminf(fminf(a.x, a.y), fminf(a.z, a.w));
+}
+template<> EIGEN_DEVICE_FUNC inline double predux_min<double2>(const double2& a) {
+ return fmin(a.x, a.y);
+}
+
+template <>
+EIGEN_DEVICE_FUNC inline float predux_mul<float4>(const float4& a) {
+ return a.x * a.y * a.z * a.w;
+}
+template <>
+EIGEN_DEVICE_FUNC inline double predux_mul<double2>(const double2& a) {
+ return a.x * a.y;
+}
+
+template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) {
+ return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
+}
+template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
+ return make_double2(fabs(a.x), fabs(a.y));
+}
+
+
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<float4,4>& kernel) {
+ double tmp = kernel.packet[0].y;
+ kernel.packet[0].y = kernel.packet[1].x;
+ kernel.packet[1].x = tmp;
+
+ tmp = kernel.packet[0].z;
+ kernel.packet[0].z = kernel.packet[2].x;
+ kernel.packet[2].x = tmp;
+
+ tmp = kernel.packet[0].w;
+ kernel.packet[0].w = kernel.packet[3].x;
+ kernel.packet[3].x = tmp;
+
+ tmp = kernel.packet[1].z;
+ kernel.packet[1].z = kernel.packet[2].y;
+ kernel.packet[2].y = tmp;
+
+ tmp = kernel.packet[1].w;
+ kernel.packet[1].w = kernel.packet[3].y;
+ kernel.packet[3].y = tmp;
+
+ tmp = kernel.packet[2].w;
+ kernel.packet[2].w = kernel.packet[3].z;
+ kernel.packet[3].z = tmp;
+}
+
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<double2,2>& kernel) {
+ double tmp = kernel.packet[0].y;
+ kernel.packet[0].y = kernel.packet[1].x;
+ kernel.packet[1].x = tmp;
+}
+
+#endif
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+
+#endif // EIGEN_PACKET_MATH_CUDA_H
diff --git a/third_party/eigen3/Eigen/src/Core/arch/Default/Settings.h b/third_party/eigen3/Eigen/src/Core/arch/Default/Settings.h
new file mode 100644
index 0000000000..097373c84d
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/arch/Default/Settings.h
@@ -0,0 +1,49 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+/* All the parameters defined in this file can be specialized in the
+ * architecture specific files, and/or by the user.
+ * More to come... */
+
+#ifndef EIGEN_DEFAULT_SETTINGS_H
+#define EIGEN_DEFAULT_SETTINGS_H
+
+/** Defines the maximal loop size to enable meta unrolling of loops.
+ * Note that the value here is expressed in Eigen's own notion of "number of FLOPS",
+ * it does not correspond to the number of iterations or the number of instructions
+ */
+#ifndef EIGEN_UNROLLING_LIMIT
+#define EIGEN_UNROLLING_LIMIT 100
+#endif
+
+/** Defines the threshold between a "small" and a "large" matrix.
+ * This threshold is mainly used to select the proper product implementation.
+ */
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+
+/** Defines the maximal width of the blocks used in the triangular product and solver
+ * for vectors (level 2 blas xTRMV and xTRSV). The default is 8.
+ */
+#ifndef EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH
+#define EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH 8
+#endif
+
+
+/** Defines the default number of registers available for that architecture.
+ * Currently it must be 8 or 16. Other values will fail.
+ */
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8
+#endif
+
+#endif // EIGEN_DEFAULT_SETTINGS_H
diff --git a/third_party/eigen3/Eigen/src/Core/arch/NEON/Complex.h b/third_party/eigen3/Eigen/src/Core/arch/NEON/Complex.h
new file mode 100644
index 0000000000..49e3fa1b02
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/arch/NEON/Complex.h
@@ -0,0 +1,467 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COMPLEX_NEON_H
+#define EIGEN_COMPLEX_NEON_H
+
+namespace Eigen {
+
+namespace internal {
+
+static uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000);
+static uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000);
+
+//---------- float ----------
+struct Packet2cf
+{
+ EIGEN_STRONG_INLINE Packet2cf() {}
+ EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
+ Packet4f v;
+};
+
+template<> struct packet_traits<std::complex<float> > : default_packet_traits
+{
+ typedef Packet2cf type;
+ typedef Packet2cf half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 2,
+ HasHalfPacket = 0,
+
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; typedef Packet2cf half; };
+
+template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
+{
+ float32x2_t r64;
+ r64 = vld1_f32((float *)&from);
+
+ return Packet2cf(vcombine_f32(r64, r64));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
+{
+ Packet4ui b = vreinterpretq_u32_f32(a.v);
+ return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ Packet4f v1, v2;
+
+ // Get the real values of a | a1_re | a1_re | a2_re | a2_re |
+ v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
+ // Get the real values of a | a1_im | a1_im | a2_im | a2_im |
+ v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
+ // Multiply the real a with b
+ v1 = vmulq_f32(v1, b.v);
+ // Multiply the imag a with b
+ v2 = vmulq_f32(v2, b.v);
+ // Conjugate v2
+ v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR));
+ // Swap real/imag elements in v2.
+ v2 = vrev64q_f32(v2);
+ // Add and return the result
+ return Packet2cf(vaddq_f32(v1, v2));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
+
+template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
+
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
+
+template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, int stride)
+{
+ Packet4f res = pset1<Packet4f>(0.f);
+ res = vsetq_lane_f32(std::real(from[0*stride]), res, 0);
+ res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
+ res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
+ res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3);
+ return Packet2cf(res);
+}
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, int stride)
+{
+ to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
+ to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
+}
+
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((float *)addr); }
+
+template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
+{
+ std::complex<float> EIGEN_ALIGN16 x[2];
+ vst1q_f32((float *)x, a.v);
+ return x[0];
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
+{
+ float32x2_t a_lo, a_hi;
+ Packet4f a_r128;
+
+ a_lo = vget_low_f32(a.v);
+ a_hi = vget_high_f32(a.v);
+ a_r128 = vcombine_f32(a_hi, a_lo);
+
+ return Packet2cf(a_r128);
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a)
+{
+ return Packet2cf(vrev64q_f32(a.v));
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
+{
+ float32x2_t a1, a2;
+ std::complex<float> s;
+
+ a1 = vget_low_f32(a.v);
+ a2 = vget_high_f32(a.v);
+ a2 = vadd_f32(a1, a2);
+ vst1_f32((float *)&s, a2);
+
+ return s;
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
+{
+ Packet4f sum1, sum2, sum;
+
+ // Add the first two 64-bit float32x2_t of vecs[0]
+ sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v));
+ sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v));
+ sum = vaddq_f32(sum1, sum2);
+
+ return Packet2cf(sum);
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
+{
+ float32x2_t a1, a2, v1, v2, prod;
+ std::complex<float> s;
+
+ a1 = vget_low_f32(a.v);
+ a2 = vget_high_f32(a.v);
+ // Get the real values of a | a1_re | a1_re | a2_re | a2_re |
+ v1 = vdup_lane_f32(a1, 0);
+ // Get the real values of a | a1_im | a1_im | a2_im | a2_im |
+ v2 = vdup_lane_f32(a1, 1);
+ // Multiply the real a with b
+ v1 = vmul_f32(v1, a2);
+ // Multiply the imag a with b
+ v2 = vmul_f32(v2, a2);
+ // Conjugate v2
+ v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR));
+ // Swap real/imag elements in v2.
+ v2 = vrev64_f32(v2);
+ // Add v1, v2
+ prod = vadd_f32(v1, v2);
+
+ vst1_f32((float *)&s, prod);
+
+ return s;
+}
+
+template<int Offset>
+struct palign_impl<Offset,Packet2cf>
+{
+ EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
+ {
+ if (Offset==1)
+ {
+ first.v = vextq_f32(first.v, second.v, 2);
+ }
+ }
+};
+
+template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+
+template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+
+template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+
+template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ // TODO optimize it for NEON
+ Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
+ Packet4f s, rev_s;
+
+ // this computes the norm
+ s = vmulq_f32(b.v, b.v);
+ rev_s = vrev64q_f32(s);
+
+ return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
+}
+
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2cf,2>& kernel) {
+ Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
+ kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
+ kernel.packet[1].v = tmp;
+}
+
+//---------- double ----------
+#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
+
+static uint64x2_t p2ul_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x0, 0x8000000000000000);
+
+struct Packet1cd
+{
+ EIGEN_STRONG_INLINE Packet1cd() {}
+ EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
+ Packet2d v;
+};
+
+template<> struct packet_traits<std::complex<double> > : default_packet_traits
+{
+ typedef Packet1cd type;
+ typedef Packet1cd half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 0,
+ size = 1,
+ HasHalfPacket = 0,
+
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; typedef Packet1cd half; };
+
+template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
+{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(padd<Packet2d>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(psub<Packet2d>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate<Packet2d>(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ Packet2d v1, v2;
+
+ // Get the real values of a
+ v1 = vdupq_lane_f64(vget_low_f64(a.v), 0);
+ // Get the real values of a
+ v2 = vdupq_lane_f64(vget_high_f64(a.v), 1);
+ // Multiply the real a with b
+ v1 = vmulq_f64(v1, b.v);
+ // Multiply the imag a with b
+ v2 = vmulq_f64(v2, b.v);
+ // Conjugate v2
+ v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR));
+ // Swap real/imag elements in v2.
+ v2 = preverse<Packet2d>(v2);
+ // Add and return the result
+ return Packet1cd(vaddq_f64(v1, v2));
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
+
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
+
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((double *)addr); }
+
+template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, int stride)
+{
+ Packet2d res = pset1<Packet2d>(0.0);
+ res = vsetq_lane_f64(std::real(from[0*stride]), res, 0);
+ res = vsetq_lane_f64(std::imag(from[0*stride]), res, 1);
+ return Packet1cd(res);
+}
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, int stride)
+{
+ to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1));
+}
+
+
+template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
+{
+ std::complex<double> EIGEN_ALIGN16 res;
+ pstore<std::complex<double> >(&res, a);
+
+ return res;
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) { return vecs[0]; }
+
+template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
+
+template<int Offset>
+struct palign_impl<Offset,Packet1cd>
+{
+ static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
+ {
+ // FIXME is it sure we never have to align a Packet1cd?
+ // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
+ }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+
+template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ // TODO optimize it for NEON
+ Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
+ Packet2d s = pmul<Packet2d>(b.v, b.v);
+ Packet2d rev_s = preverse<Packet2d>(s);
+
+ return Packet1cd(pdiv(res.v, padd<Packet2d>(s,rev_s)));
+}
+
+EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
+{
+ return Packet1cd(preverse(Packet2d(x.v)));
+}
+
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
+{
+ Packet2d tmp = vcombine_f64(vget_high_f64(kernel.packet[0].v), vget_high_f64(kernel.packet[1].v));
+ kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v));
+ kernel.packet[1].v = tmp;
+}
+#endif // EIGEN_ARCH_ARM64
+
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_COMPLEX_NEON_H
diff --git a/third_party/eigen3/Eigen/src/Core/arch/NEON/MathFunctions.h b/third_party/eigen3/Eigen/src/Core/arch/NEON/MathFunctions.h
new file mode 100644
index 0000000000..6bb05bb922
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/arch/NEON/MathFunctions.h
@@ -0,0 +1,91 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/* The sin, cos, exp, and log functions of this file come from
+ * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
+ */
+
+#ifndef EIGEN_MATH_FUNCTIONS_NEON_H
+#define EIGEN_MATH_FUNCTIONS_NEON_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pexp<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ Packet4f tmp, fx;
+
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+ _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
+ _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
+
+ x = vminq_f32(x, p4f_exp_hi);
+ x = vmaxq_f32(x, p4f_exp_lo);
+
+ /* express exp(x) as exp(g + n*log(2)) */
+ fx = vmlaq_f32(p4f_half, x, p4f_cephes_LOG2EF);
+
+ /* perform a floorf */
+ tmp = vcvtq_f32_s32(vcvtq_s32_f32(fx));
+
+ /* if greater, substract 1 */
+ Packet4ui mask = vcgtq_f32(tmp, fx);
+ mask = vandq_u32(mask, vreinterpretq_u32_f32(p4f_1));
+
+ fx = vsubq_f32(tmp, vreinterpretq_f32_u32(mask));
+
+ tmp = vmulq_f32(fx, p4f_cephes_exp_C1);
+ Packet4f z = vmulq_f32(fx, p4f_cephes_exp_C2);
+ x = vsubq_f32(x, tmp);
+ x = vsubq_f32(x, z);
+
+ Packet4f y = vmulq_f32(p4f_cephes_exp_p0, x);
+ z = vmulq_f32(x, x);
+ y = vaddq_f32(y, p4f_cephes_exp_p1);
+ y = vmulq_f32(y, x);
+ y = vaddq_f32(y, p4f_cephes_exp_p2);
+ y = vmulq_f32(y, x);
+ y = vaddq_f32(y, p4f_cephes_exp_p3);
+ y = vmulq_f32(y, x);
+ y = vaddq_f32(y, p4f_cephes_exp_p4);
+ y = vmulq_f32(y, x);
+ y = vaddq_f32(y, p4f_cephes_exp_p5);
+
+ y = vmulq_f32(y, z);
+ y = vaddq_f32(y, x);
+ y = vaddq_f32(y, p4f_1);
+
+ /* build 2^n */
+ int32x4_t mm;
+ mm = vcvtq_s32_f32(fx);
+ mm = vaddq_s32(mm, p4i_0x7f);
+ mm = vshlq_n_s32(mm, 23);
+ Packet4f pow2n = vreinterpretq_f32_s32(mm);
+
+ y = vmulq_f32(y, pow2n);
+ return y;
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATH_FUNCTIONS_NEON_H
diff --git a/third_party/eigen3/Eigen/src/Core/arch/NEON/PacketMath.h b/third_party/eigen3/Eigen/src/Core/arch/NEON/PacketMath.h
new file mode 100644
index 0000000000..856a65ad7b
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -0,0 +1,745 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2010 Konstantinos Margaritis <markos@codex.gr>
+// Heavily based on Gael's SSE version.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PACKET_MATH_NEON_H
+#define EIGEN_PACKET_MATH_NEON_H
+
+namespace Eigen {
+
+namespace internal {
+
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 16
+#endif
+
+// FIXME NEON has 16 quad registers, but since the current register allocator
+// is so bad, it is much better to reduce it to 8
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
+#endif
+
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#endif
+
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#endif
+
+typedef float32x2_t Packet2f;
+typedef float32x4_t Packet4f;
+typedef int32x4_t Packet4i;
+typedef int32x2_t Packet2i;
+typedef uint32x4_t Packet4ui;
+
+#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+ const Packet4f p4f_##NAME = pset1<Packet4f>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+ const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int>(X))
+
+#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+ const Packet4i p4i_##NAME = pset1<Packet4i>(X)
+
+#if EIGEN_COMP_LLVM && !EIGEN_COMP_CLANG
+ //Special treatment for Apple's llvm-gcc, its NEON packet types are unions
+ #define EIGEN_INIT_NEON_PACKET2(X, Y) {{X, Y}}
+ #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {{X, Y, Z, W}}
+#else
+ //Default initializer for packets
+ #define EIGEN_INIT_NEON_PACKET2(X, Y) {X, Y}
+ #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W}
+#endif
+
+// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function
+// which available on LLVM and GCC (at least)
+#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
+ #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR);
+#elif defined __pld
+ #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR)
+#elif !EIGEN_ARCH_ARM64
+ #define EIGEN_ARM_PREFETCH(ADDR) asm volatile ( " pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
+#else
+ // by default no explicit prefetching
+ #define EIGEN_ARM_PREFETCH(ADDR)
+#endif
+
+template<> struct packet_traits<float> : default_packet_traits
+{
+ typedef Packet4f type;
+ typedef Packet4f half; // Packet2f intrinsics not implemented yet
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 4,
+ HasHalfPacket=0, // Packet2f intrinsics not implemented yet
+
+ HasDiv = 1,
+ // FIXME check the Has*
+ HasSin = 0,
+ HasCos = 0,
+ HasTanH = 1,
+ HasLog = 0,
+ HasExp = 1,
+ HasSqrt = 0
+ };
+};
+template<> struct packet_traits<int> : default_packet_traits
+{
+ typedef Packet4i type;
+ typedef Packet4i half; // Packet2i intrinsics not implemented yet
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasHalfPacket=0 // Packet2i intrinsics not implemented yet
+ // FIXME check the Has*
+ };
+};
+
+#if EIGEN_GNUC_AT_MOST(4,4) && !EIGEN_COMP_LLVM
+// workaround gcc 4.2, 4.3 and 4.4 compilatin issue
+EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }
+EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); }
+EIGEN_STRONG_INLINE float32x2_t vld1_dup_f32 (const float* x) { return ::vld1_dup_f32 ((const float32_t*)x); }
+EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q_f32((float32_t*)to,from); }
+EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); }
+#endif
+
+template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; typedef Packet4f half; };
+template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; };
+
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return vdupq_n_s32(from); }
+
+template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a)
+{
+ Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
+ return vaddq_f32(pset1<Packet4f>(a), countdown);
+}
+template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a)
+{
+ Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
+ return vaddq_s32(pset1<Packet4i>(a), countdown);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vaddq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vaddq_s32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vsubq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vsubq_s32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return vnegq_f32(a); }
+template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return vnegq_s32(a); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmulq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmulq_s32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pselect<Packet4f>(const Packet4f& a, const Packet4f& b, const Packet4f& false_mask) {
+ return vbslq_f32(vreinterpretq_u32_f32(false_mask), b, a);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pselect<Packet4i>(const Packet4i& a, const Packet4i& b, const Packet4i& false_mask) {
+ return vbslq_s32(vreinterpretq_u32_s32(false_mask), b, a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+#if EIGEN_ARCH_ARM64
+ return vdivq_f32(a,b);
+#else
+ Packet4f inv, restep, div;
+
+ // NEON does not offer a divide instruction, we have to do a reciprocal approximation
+ // However NEON in contrast to other SIMD engines (AltiVec/SSE), offers
+ // a reciprocal estimate AND a reciprocal step -which saves a few instructions
+ // vrecpeq_f32() returns an estimate to 1/b, which we will finetune with
+ // Newton-Raphson and vrecpsq_f32()
+ inv = vrecpeq_f32(b);
+
+ // This returns a differential, by which we will have to multiply inv to get a better
+ // approximation of 1/b.
+ restep = vrecpsq_f32(b, inv);
+ inv = vmulq_f32(restep, inv);
+
+ // Finally, multiply a by 1/b and get the wanted result of the division.
+ div = vmulq_f32(a, inv);
+
+ return div;
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
+{ eigen_assert(false && "packet integer division are not supported by NEON");
+ return pset1<Packet4i>(0);
+}
+
+#ifdef __ARM_FEATURE_FMA
+// See bug 936.
+// FMA is available on VFPv4 i.e. when compiling with -mfpu=neon-vfpv4.
+// FMA is a true fused multiply-add i.e. only 1 rounding at the end, no intermediate rounding.
+// MLA is not fused i.e. does 2 roundings.
+// In addition to giving better accuracy, FMA also gives better performance here on a Krait (Nexus 4):
+// MLA: 10 GFlop/s ; FMA: 12 GFlops/s.
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vfmaq_f32(c,a,b); }
+#else
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vmlaq_f32(c,a,b); }
+#endif
+
+// No FMA instruction for int, so use MLA unconditionally.
+template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return vmlaq_s32(c,a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vminq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vminq_s32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmaxq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmaxq_s32(a,b); }
+
+// TODO(ebrevdo): add support for ple, plt, peq using vcle_f32/s32 or
+// vcleq_f32/s32, and their ilk, respectively, once it's clear which condition code to use.
+
+// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics
+template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vandq_s32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vorrq_s32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return veorq_s32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }
+template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }
+
+template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); }
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); }
+
+template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
+{
+ float32x2_t lo, hi;
+ lo = vld1_dup_f32(from);
+ hi = vld1_dup_f32(from+1);
+ return vcombine_f32(lo, hi);
+}
+template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
+{
+ int32x2_t lo, hi;
+ lo = vld1_dup_s32(from);
+ hi = vld1_dup_s32(from+1);
+ return vcombine_s32(lo, hi);
+}
+
+template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); }
+
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
+
+template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, int stride)
+{
+ Packet4f res = pset1<Packet4f>(0);
+ res = vsetq_lane_f32(from[0*stride], res, 0);
+ res = vsetq_lane_f32(from[1*stride], res, 1);
+ res = vsetq_lane_f32(from[2*stride], res, 2);
+ res = vsetq_lane_f32(from[3*stride], res, 3);
+ return res;
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, int stride)
+{
+ Packet4i res = pset1<Packet4i>(0);
+ res = vsetq_lane_s32(from[0*stride], res, 0);
+ res = vsetq_lane_s32(from[1*stride], res, 1);
+ res = vsetq_lane_s32(from[2*stride], res, 2);
+ res = vsetq_lane_s32(from[3*stride], res, 3);
+ return res;
+}
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, int stride)
+{
+ to[stride*0] = vgetq_lane_f32(from, 0);
+ to[stride*1] = vgetq_lane_f32(from, 1);
+ to[stride*2] = vgetq_lane_f32(from, 2);
+ to[stride*3] = vgetq_lane_f32(from, 3);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, int stride)
+{
+ to[stride*0] = vgetq_lane_s32(from, 0);
+ to[stride*1] = vgetq_lane_s32(from, 1);
+ to[stride*2] = vgetq_lane_s32(from, 2);
+ to[stride*3] = vgetq_lane_s32(from, 3);
+}
+
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ARM_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ARM_PREFETCH(addr); }
+
+// FIXME only store the 2 first elements ?
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; }
+
+template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
+ float32x2_t a_lo, a_hi;
+ Packet4f a_r64;
+
+ a_r64 = vrev64q_f32(a);
+ a_lo = vget_low_f32(a_r64);
+ a_hi = vget_high_f32(a_r64);
+ return vcombine_f32(a_hi, a_lo);
+}
+template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
+ int32x2_t a_lo, a_hi;
+ Packet4i a_r64;
+
+ a_r64 = vrev64q_s32(a);
+ a_lo = vget_low_s32(a_r64);
+ a_hi = vget_high_s32(a_r64);
+ return vcombine_s32(a_hi, a_lo);
+}
+
+template<size_t offset>
+struct protate_impl<offset, Packet4f>
+{
+ static Packet4f run(const Packet4f& a) {
+ return vextq_f32(a, a, offset);
+ }
+};
+
+template<size_t offset>
+struct protate_impl<offset, Packet4i>
+{
+ static Packet4i run(const Packet4i& a) {
+ return vextq_s32(a, a, offset);
+ }
+};
+
+template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); }
+template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); }
+
+template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
+{
+ float32x2_t a_lo, a_hi, sum;
+
+ a_lo = vget_low_f32(a);
+ a_hi = vget_high_f32(a);
+ sum = vpadd_f32(a_lo, a_hi);
+ sum = vpadd_f32(sum, sum);
+ return vget_lane_f32(sum, 0);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+ float32x4x2_t vtrn1, vtrn2, res1, res2;
+ Packet4f sum1, sum2, sum;
+
+ // NEON zip performs interleaving of the supplied vectors.
+ // We perform two interleaves in a row to acquire the transposed vector
+ vtrn1 = vzipq_f32(vecs[0], vecs[2]);
+ vtrn2 = vzipq_f32(vecs[1], vecs[3]);
+ res1 = vzipq_f32(vtrn1.val[0], vtrn2.val[0]);
+ res2 = vzipq_f32(vtrn1.val[1], vtrn2.val[1]);
+
+ // Do the addition of the resulting vectors
+ sum1 = vaddq_f32(res1.val[0], res1.val[1]);
+ sum2 = vaddq_f32(res2.val[0], res2.val[1]);
+ sum = vaddq_f32(sum1, sum2);
+
+ return sum;
+}
+
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+ int32x2_t a_lo, a_hi, sum;
+
+ a_lo = vget_low_s32(a);
+ a_hi = vget_high_s32(a);
+ sum = vpadd_s32(a_lo, a_hi);
+ sum = vpadd_s32(sum, sum);
+ return vget_lane_s32(sum, 0);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+ int32x4x2_t vtrn1, vtrn2, res1, res2;
+ Packet4i sum1, sum2, sum;
+
+ // NEON zip performs interleaving of the supplied vectors.
+ // We perform two interleaves in a row to acquire the transposed vector
+ vtrn1 = vzipq_s32(vecs[0], vecs[2]);
+ vtrn2 = vzipq_s32(vecs[1], vecs[3]);
+ res1 = vzipq_s32(vtrn1.val[0], vtrn2.val[0]);
+ res2 = vzipq_s32(vtrn1.val[1], vtrn2.val[1]);
+
+ // Do the addition of the resulting vectors
+ sum1 = vaddq_s32(res1.val[0], res1.val[1]);
+ sum2 = vaddq_s32(res2.val[0], res2.val[1]);
+ sum = vaddq_s32(sum1, sum2);
+
+ return sum;
+}
+
+// Other reduction functions:
+// mul
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
+{
+ float32x2_t a_lo, a_hi, prod;
+
+ // Get a_lo = |a1|a2| and a_hi = |a3|a4|
+ a_lo = vget_low_f32(a);
+ a_hi = vget_high_f32(a);
+ // Get the product of a_lo * a_hi -> |a1*a3|a2*a4|
+ prod = vmul_f32(a_lo, a_hi);
+ // Multiply prod with its swapped value |a2*a4|a1*a3|
+ prod = vmul_f32(prod, vrev64_f32(prod));
+
+ return vget_lane_f32(prod, 0);
+}
+template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
+{
+ int32x2_t a_lo, a_hi, prod;
+
+ // Get a_lo = |a1|a2| and a_hi = |a3|a4|
+ a_lo = vget_low_s32(a);
+ a_hi = vget_high_s32(a);
+ // Get the product of a_lo * a_hi -> |a1*a3|a2*a4|
+ prod = vmul_s32(a_lo, a_hi);
+ // Multiply prod with its swapped value |a2*a4|a1*a3|
+ prod = vmul_s32(prod, vrev64_s32(prod));
+
+ return vget_lane_s32(prod, 0);
+}
+
+// min
+template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
+{
+ float32x2_t a_lo, a_hi, min;
+
+ a_lo = vget_low_f32(a);
+ a_hi = vget_high_f32(a);
+ min = vpmin_f32(a_lo, a_hi);
+ min = vpmin_f32(min, min);
+
+ return vget_lane_f32(min, 0);
+}
+
+template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
+{
+ int32x2_t a_lo, a_hi, min;
+
+ a_lo = vget_low_s32(a);
+ a_hi = vget_high_s32(a);
+ min = vpmin_s32(a_lo, a_hi);
+ min = vpmin_s32(min, min);
+
+ return vget_lane_s32(min, 0);
+}
+
+// max
+template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
+{
+ float32x2_t a_lo, a_hi, max;
+
+ a_lo = vget_low_f32(a);
+ a_hi = vget_high_f32(a);
+ max = vpmax_f32(a_lo, a_hi);
+ max = vpmax_f32(max, max);
+
+ return vget_lane_f32(max, 0);
+}
+
+template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
+{
+ int32x2_t a_lo, a_hi, max;
+
+ a_lo = vget_low_s32(a);
+ a_hi = vget_high_s32(a);
+ max = vpmax_s32(a_lo, a_hi);
+ max = vpmax_s32(max, max);
+
+ return vget_lane_s32(max, 0);
+}
+
+// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
+// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
+#define PALIGN_NEON(Offset,Type,Command) \
+template<>\
+struct palign_impl<Offset,Type>\
+{\
+ EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
+ {\
+ if (Offset!=0)\
+ first = Command(first, second, Offset);\
+ }\
+};\
+
+PALIGN_NEON(0,Packet4f,vextq_f32)
+PALIGN_NEON(1,Packet4f,vextq_f32)
+PALIGN_NEON(2,Packet4f,vextq_f32)
+PALIGN_NEON(3,Packet4f,vextq_f32)
+PALIGN_NEON(0,Packet4i,vextq_s32)
+PALIGN_NEON(1,Packet4i,vextq_s32)
+PALIGN_NEON(2,Packet4i,vextq_s32)
+PALIGN_NEON(3,Packet4i,vextq_s32)
+
+#undef PALIGN_NEON
+
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4f,4>& kernel) {
+ float32x4x2_t tmp1 = vzipq_f32(kernel.packet[0], kernel.packet[1]);
+ float32x4x2_t tmp2 = vzipq_f32(kernel.packet[2], kernel.packet[3]);
+
+ kernel.packet[0] = vcombine_f32(vget_low_f32(tmp1.val[0]), vget_low_f32(tmp2.val[0]));
+ kernel.packet[1] = vcombine_f32(vget_high_f32(tmp1.val[0]), vget_high_f32(tmp2.val[0]));
+ kernel.packet[2] = vcombine_f32(vget_low_f32(tmp1.val[1]), vget_low_f32(tmp2.val[1]));
+ kernel.packet[3] = vcombine_f32(vget_high_f32(tmp1.val[1]), vget_high_f32(tmp2.val[1]));
+}
+
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4i,4>& kernel) {
+ int32x4x2_t tmp1 = vzipq_s32(kernel.packet[0], kernel.packet[1]);
+ int32x4x2_t tmp2 = vzipq_s32(kernel.packet[2], kernel.packet[3]);
+ kernel.packet[0] = vcombine_s32(vget_low_s32(tmp1.val[0]), vget_low_s32(tmp2.val[0]));
+ kernel.packet[1] = vcombine_s32(vget_high_s32(tmp1.val[0]), vget_high_s32(tmp2.val[0]));
+ kernel.packet[2] = vcombine_s32(vget_low_s32(tmp1.val[1]), vget_low_s32(tmp2.val[1]));
+ kernel.packet[3] = vcombine_s32(vget_high_s32(tmp1.val[1]), vget_high_s32(tmp2.val[1]));
+}
+
+//---------- double ----------
+
+// Clang 3.5 in the iOS toolchain has an ICE triggered by NEON intrisics for double.
+// Confirmed at least with __apple_build_version__ = 6000054.
+#ifdef __apple_build_version__
+// Let's hope that by the time __apple_build_version__ hits the 601* range, the bug will be fixed.
+// https://gist.github.com/yamaya/2924292 suggests that the 3 first digits are only updated with
+// major toolchain updates.
+#define EIGEN_APPLE_DOUBLE_NEON_BUG (__apple_build_version__ < 6010000)
+#else
+#define EIGEN_APPLE_DOUBLE_NEON_BUG 0
+#endif
+
+#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
+
+#if (EIGEN_COMP_GNUC_STRICT && defined(__ANDROID__)) || defined(__apple_build_version__)
+// Bug 907: workaround missing declarations of the following two functions in the ADK
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_f64 (float64x2_t __a)
+{
+ return (uint64x2_t) __a;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_f64_u64 (uint64x2_t __a)
+{
+ return (float64x2_t) __a;
+}
+#endif
+
+typedef float64x2_t Packet2d;
+typedef float64x1_t Packet1d;
+
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef Packet2d type;
+ typedef Packet2d half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 2,
+ HasHalfPacket=0,
+
+ HasDiv = 1,
+ // FIXME check the Has*
+ HasSin = 0,
+ HasCos = 0,
+ HasLog = 0,
+ HasExp = 0,
+ HasSqrt = 0
+ };
+};
+
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; typedef Packet2d half; };
+
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return vdupq_n_f64(from); }
+
+template<> EIGEN_STRONG_INLINE Packet2d plset<double>(const double& a)
+{
+ Packet2d countdown = EIGEN_INIT_NEON_PACKET2(0, 1);
+ return vaddq_f64(pset1<Packet2d>(a), countdown);
+}
+template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return vaddq_f64(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return vsubq_f64(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return vnegq_f64(a); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE Packet2d pselect<Packet2d>(const Packet2d& a, const Packet2d& b, const Packet2d& false_mask) {
+ return vbslq_f64(vreinterpretq_u64_f64(false_mask), b, a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vmulq_f64(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return vdivq_f64(a,b); }
+
+#ifdef __ARM_FEATURE_FMA
+// See bug 936. See above comment about FMA for float.
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vfmaq_f64(c,a,b); }
+#else
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vmlaq_f64(c,a,b); }
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vminq_f64(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vmaxq_f64(a,b); }
+
+// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics
+template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+ return vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+ return vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+ return vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+ return vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f64(from); }
+
+template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f64(from); }
+
+template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
+{
+ return vld1q_dup_f64(from);
+}
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f64(to, from); }
+
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f64(to, from); }
+
+template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, int stride)
+{
+ Packet2d res = pset1<Packet2d>(0.0);
+ res = vsetq_lane_f64(from[0*stride], res, 0);
+ res = vsetq_lane_f64(from[1*stride], res, 1);
+ return res;
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, int stride)
+{
+ to[stride*0] = vgetq_lane_f64(from, 0);
+ to[stride*1] = vgetq_lane_f64(from, 1);
+}
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ARM_PREFETCH(addr); }
+
+// FIXME only store the 2 first elements ?
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(a, 0); }
+
+template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return vcombine_f64(vget_high_f64(a), vget_low_f64(a)); }
+
+template<size_t offset>
+struct protate_impl<offset, Packet2d>
+{
+ static Packet2d run(const Packet2d& a) {
+ return vextq_f64(a, a, offset);
+ }
+};
+
+template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vabsq_f64(a); }
+
+#if EIGEN_COMP_CLANG && defined(__apple_build_version__)
+// workaround ICE, see bug 907
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return (vget_low_f64(a) + vget_high_f64(a))[0]; }
+#else
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return vget_lane_f64(vget_low_f64(a) + vget_high_f64(a), 0); }
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ float64x2_t trn1, trn2;
+
+ // NEON zip performs interleaving of the supplied vectors.
+ // We perform two interleaves in a row to acquire the transposed vector
+ trn1 = vzip1q_f64(vecs[0], vecs[1]);
+ trn2 = vzip2q_f64(vecs[0], vecs[1]);
+
+ // Do the addition of the resulting vectors
+ return vaddq_f64(trn1, trn2);
+}
+// Other reduction functions:
+// mul
+#if EIGEN_COMP_CLANG && defined(__apple_build_version__)
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) { return (vget_low_f64(a) * vget_high_f64(a))[0]; }
+#else
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) { return vget_lane_f64(vget_low_f64(a) * vget_high_f64(a), 0); }
+#endif
+
+// min
+template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(vpminq_f64(a, a), 0); }
+
+// max
+template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(vpmaxq_f64(a, a), 0); }
+
+// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
+// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
+#define PALIGN_NEON(Offset,Type,Command) \
+template<>\
+struct palign_impl<Offset,Type>\
+{\
+ EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
+ {\
+ if (Offset!=0)\
+ first = Command(first, second, Offset);\
+ }\
+};\
+
+PALIGN_NEON(0,Packet2d,vextq_f64)
+PALIGN_NEON(1,Packet2d,vextq_f64)
+#undef PALIGN_NEON
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2d,2>& kernel) {
+ float64x2_t trn1 = vzip1q_f64(kernel.packet[0], kernel.packet[1]);
+ float64x2_t trn2 = vzip2q_f64(kernel.packet[0], kernel.packet[1]);
+
+ kernel.packet[0] = trn1;
+ kernel.packet[1] = trn2;
+}
+#endif // EIGEN_ARCH_ARM64
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_PACKET_MATH_NEON_H
diff --git a/third_party/eigen3/Eigen/src/Core/arch/SSE/Complex.h b/third_party/eigen3/Eigen/src/Core/arch/SSE/Complex.h
new file mode 100644
index 0000000000..2722893dcf
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/arch/SSE/Complex.h
@@ -0,0 +1,486 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COMPLEX_SSE_H
+#define EIGEN_COMPLEX_SSE_H
+
+namespace Eigen {
+
+namespace internal {
+
+//---------- float ----------
+struct Packet2cf
+{
+ EIGEN_STRONG_INLINE Packet2cf() {}
+ EIGEN_STRONG_INLINE explicit Packet2cf(const __m128& a) : v(a) {}
+ __m128 v;
+};
+
+// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
+// to leverage AVX instructions.
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct packet_traits<std::complex<float> > : default_packet_traits
+{
+ typedef Packet2cf type;
+ typedef Packet2cf half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 2,
+ HasHalfPacket = 0,
+
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0,
+ HasBlend = 1,
+ };
+};
+#endif
+
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; typedef Packet2cf half; };
+
+template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a)
+{
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
+ return Packet2cf(_mm_xor_ps(a.v,mask));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
+{
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+ return Packet2cf(_mm_xor_ps(a.v,mask));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ // TODO optimize it for SSE3 and 4
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v),
+ _mm_mul_ps(_mm_movehdup_ps(a.v),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2))));
+// return Packet2cf(_mm_addsub_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
+// _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+// vec4f_swizzle1(b.v, 1, 0, 3, 2))));
+ #else
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));
+ return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
+ _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
+ #endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_andnot_ps(a.v,b.v)); }
+
+template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(&numext::real_ref(*from))); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(&numext::real_ref(*from))); }
+
+template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
+{
+ Packet2cf res;
+#if EIGEN_GNUC_AT_MOST(4,2)
+ // Workaround annoying "may be used uninitialized in this function" warning with gcc 4.2
+ res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), reinterpret_cast<const __m64*>(&from));
+#elif EIGEN_GNUC_AT_LEAST(4,6)
+ // Suppress annoying "may be used uninitialized in this function" warning with gcc >= 4.6
+ #pragma GCC diagnostic push
+ #pragma GCC diagnostic ignored "-Wuninitialized"
+ res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
+ #pragma GCC diagnostic pop
+#else
+ res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
+#endif
+ return Packet2cf(_mm_movelh_ps(res.v,res.v));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
+
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), Packet4f(from.v)); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), Packet4f(from.v)); }
+
+
+template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, int stride)
+{
+ return Packet2cf(_mm_set_ps(std::imag(from[1*stride]), std::real(from[1*stride]),
+ std::imag(from[0*stride]), std::real(from[0*stride])));
+}
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, int stride)
+{
+ to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)),
+ _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 1)));
+ to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 2)),
+ _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
+}
+
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+
+template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
+{
+ #if EIGEN_GNUC_AT_MOST(4,3)
+ // Workaround gcc 4.2 ICE - this is not performance wise ideal, but who cares...
+ // This workaround also fix invalid code generation with gcc 4.3
+ EIGEN_ALIGN16 std::complex<float> res[2];
+ _mm_store_ps((float*)res, a.v);
+ return res[0];
+ #else
+ std::complex<float> res;
+ _mm_storel_pi((__m64*)&res, a.v);
+ return res;
+ #endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(preverse(Packet2d(_mm_castps_pd(a.v))))); }
+
+template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
+{
+ return pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v))));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
+{
+ return Packet2cf(_mm_add_ps(_mm_movelh_ps(vecs[0].v,vecs[1].v), _mm_movehl_ps(vecs[1].v,vecs[0].v)));
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
+{
+ return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
+}
+
+template<int Offset>
+struct palign_impl<Offset,Packet2cf>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
+ {
+ if (Offset==1)
+ {
+ first.v = _mm_movehl_ps(first.v, first.v);
+ first.v = _mm_movelh_ps(first.v, second.v);
+ }
+ }
+};
+
+template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return internal::pmul(a, pconj(b));
+ #else
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+ return Packet2cf(_mm_add_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
+ _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2))));
+ #endif
+ }
+};
+
+template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return internal::pmul(pconj(a), b);
+ #else
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+ return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
+ _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
+ #endif
+ }
+};
+
+template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return pconj(internal::pmul(a, b));
+ #else
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+ return Packet2cf(_mm_sub_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
+ _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2))));
+ #endif
+ }
+};
+
+template<> struct conj_helper<Packet4f, Packet2cf, false,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(c, pmul(x,y)); }
+
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
+ { return Packet2cf(Eigen::internal::pmul<Packet4f>(x, y.v)); }
+};
+
+template<> struct conj_helper<Packet2cf, Packet4f, false,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
+ { return padd(c, pmul(x,y)); }
+
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
+ { return Packet2cf(Eigen::internal::pmul<Packet4f>(x.v, y)); }
+};
+
+template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ // TODO optimize it for SSE3 and 4
+ Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
+ __m128 s = _mm_mul_ps(b.v,b.v);
+ return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1)))));
+}
+
+EIGEN_STRONG_INLINE Packet2cf pcplxflip/*<Packet2cf>*/(const Packet2cf& x)
+{
+ return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
+}
+
+
+//---------- double ----------
+struct Packet1cd
+{
+ EIGEN_STRONG_INLINE Packet1cd() {}
+ EIGEN_STRONG_INLINE explicit Packet1cd(const __m128d& a) : v(a) {}
+ __m128d v;
+};
+
+// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
+// to leverage AVX instructions.
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct packet_traits<std::complex<double> > : default_packet_traits
+{
+ typedef Packet1cd type;
+ typedef Packet1cd half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 0,
+ size = 1,
+ HasHalfPacket = 0,
+
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+#endif
+
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; typedef Packet1cd half; };
+
+template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
+template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)
+{
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+ return Packet1cd(_mm_xor_pd(a.v,mask));
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ // TODO optimize it for SSE3 and 4
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return Packet1cd(_mm_addsub_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
+ _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0))));
+ #else
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
+ return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
+ _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0)), mask)));
+ #endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_andnot_pd(a.v,b.v)); }
+
+// FIXME force unaligned load, this is a temporary fix
+template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from)
+{ EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from)
+{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
+{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
+
+// FIXME force unaligned store, this is a temporary fix
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
+
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+
+template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
+{
+ EIGEN_ALIGN16 double res[2];
+ _mm_store_pd(res, a.v);
+ return std::complex<double>(res[0],res[1]);
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
+{
+ return pfirst(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
+{
+ return vecs[0];
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
+{
+ return pfirst(a);
+}
+
+template<int Offset>
+struct palign_impl<Offset,Packet1cd>
+{
+ static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
+ {
+ // FIXME is it sure we never have to align a Packet1cd?
+ // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
+ }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return internal::pmul(a, pconj(b));
+ #else
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+ return Packet1cd(_mm_add_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
+ _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0))));
+ #endif
+ }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return internal::pmul(pconj(a), b);
+ #else
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+ return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
+ _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0)), mask)));
+ #endif
+ }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return pconj(internal::pmul(a, b));
+ #else
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+ return Packet1cd(_mm_sub_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
+ _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0))));
+ #endif
+ }
+};
+
+template<> struct conj_helper<Packet2d, Packet1cd, false,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(c, pmul(x,y)); }
+
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
+ { return Packet1cd(Eigen::internal::pmul<Packet2d>(x, y.v)); }
+};
+
+template<> struct conj_helper<Packet1cd, Packet2d, false,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
+ { return padd(c, pmul(x,y)); }
+
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
+ { return Packet1cd(Eigen::internal::pmul<Packet2d>(x.v, y)); }
+};
+
+template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ // TODO optimize it for SSE3 and 4
+ Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
+ __m128d s = _mm_mul_pd(b.v,b.v);
+ return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
+}
+
+EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
+{
+ return Packet1cd(preverse(Packet2d(x.v)));
+}
+
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2cf,2>& kernel) {
+ __m128d w1 = _mm_castps_pd(kernel.packet[0].v);
+ __m128d w2 = _mm_castps_pd(kernel.packet[1].v);
+
+ __m128 tmp = _mm_castpd_ps(_mm_unpackhi_pd(w1, w2));
+ kernel.packet[0].v = _mm_castpd_ps(_mm_unpacklo_pd(w1, w2));
+ kernel.packet[1].v = tmp;
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
+ __m128d result = pblend(ifPacket, _mm_castps_pd(thenPacket.v), _mm_castps_pd(elsePacket.v));
+ return Packet2cf(_mm_castpd_ps(result));
+}
+
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_COMPLEX_SSE_H
diff --git a/third_party/eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h b/third_party/eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h
new file mode 100644
index 0000000000..0baa7b4b58
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -0,0 +1,529 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2007 Julien Pommier
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/* The sin, cos, exp, and log functions of this file come from
+ * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
+ */
+
+#ifndef EIGEN_MATH_FUNCTIONS_SSE_H
+#define EIGEN_MATH_FUNCTIONS_SSE_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f plog<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
+
+ /* the smallest non denormalized float number */
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000);//-1.f/0.f);
+
+ /* natural logarithm computed for 4 simultaneous float
+ return NaN for x <= 0
+ */
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
+
+
+ Packet4i emm0;
+
+ // invalid_mask is set to true when x is NaN
+ Packet4f invalid_mask = _mm_cmpnge_ps(x, _mm_setzero_ps());
+ Packet4f iszero_mask = _mm_cmpeq_ps(x, _mm_setzero_ps());
+
+ x = pmax(x, p4f_min_norm_pos); /* cut off denormalized stuff */
+ emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
+
+ /* keep only the fractional part */
+ x = _mm_and_ps(x, p4f_inv_mant_mask);
+ x = _mm_or_ps(x, p4f_half);
+
+ emm0 = _mm_sub_epi32(emm0, p4i_0x7f);
+ Packet4f e = padd(Packet4f(_mm_cvtepi32_ps(emm0)), p4f_1);
+
+ /* part2:
+ if( x < SQRTHF ) {
+ e -= 1;
+ x = x + x - 1.0;
+ } else { x = x - 1.0; }
+ */
+ Packet4f mask = _mm_cmplt_ps(x, p4f_cephes_SQRTHF);
+ Packet4f tmp = pand(x, mask);
+ x = psub(x, p4f_1);
+ e = psub(e, pand(p4f_1, mask));
+ x = padd(x, tmp);
+
+ Packet4f x2 = pmul(x,x);
+ Packet4f x3 = pmul(x2,x);
+
+ Packet4f y, y1, y2;
+ y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
+ y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
+ y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
+ y = pmadd(y , x, p4f_cephes_log_p2);
+ y1 = pmadd(y1, x, p4f_cephes_log_p5);
+ y2 = pmadd(y2, x, p4f_cephes_log_p8);
+ y = pmadd(y, x3, y1);
+ y = pmadd(y, x3, y2);
+ y = pmul(y, x3);
+
+ y1 = pmul(e, p4f_cephes_log_q1);
+ tmp = pmul(x2, p4f_half);
+ y = padd(y, y1);
+ x = psub(x, tmp);
+ y2 = pmul(e, p4f_cephes_log_q2);
+ x = padd(x, y);
+ x = padd(x, y2);
+ // negative arg will be NAN, 0 will be -INF
+ return _mm_or_ps(_mm_andnot_ps(iszero_mask, _mm_or_ps(x, invalid_mask)),
+ _mm_and_ps(iszero_mask, p4f_minus_inf));
+}
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pexp<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+
+
+ _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
+ _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
+
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
+
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
+
+ Packet4f tmp, fx;
+ Packet4i emm0;
+
+ // clamp x
+ x = pmax(pmin(x, p4f_exp_hi), p4f_exp_lo);
+
+ /* express exp(x) as exp(g + n*log(2)) */
+ fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);
+
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ fx = _mm_floor_ps(fx);
+#else
+ emm0 = _mm_cvttps_epi32(fx);
+ tmp = _mm_cvtepi32_ps(emm0);
+ /* if greater, substract 1 */
+ Packet4f mask = _mm_cmpgt_ps(tmp, fx);
+ mask = _mm_and_ps(mask, p4f_1);
+ fx = psub(tmp, mask);
+#endif
+
+ tmp = pmul(fx, p4f_cephes_exp_C1);
+ Packet4f z = pmul(fx, p4f_cephes_exp_C2);
+ x = psub(x, tmp);
+ x = psub(x, z);
+
+ z = pmul(x,x);
+
+ Packet4f y = p4f_cephes_exp_p0;
+ y = pmadd(y, x, p4f_cephes_exp_p1);
+ y = pmadd(y, x, p4f_cephes_exp_p2);
+ y = pmadd(y, x, p4f_cephes_exp_p3);
+ y = pmadd(y, x, p4f_cephes_exp_p4);
+ y = pmadd(y, x, p4f_cephes_exp_p5);
+ y = pmadd(y, z, x);
+ y = padd(y, p4f_1);
+
+ // build 2^n
+ emm0 = _mm_cvttps_epi32(fx);
+ emm0 = _mm_add_epi32(emm0, p4i_0x7f);
+ emm0 = _mm_slli_epi32(emm0, 23);
+ return pmax(pmul(y, Packet4f(_mm_castsi128_ps(emm0))), _x);
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d pexp<Packet2d>(const Packet2d& _x)
+{
+ Packet2d x = _x;
+
+ _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
+ _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
+ _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
+
+ _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
+ _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
+
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
+
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
+
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
+
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
+ static const __m128i p4i_1023_0 = _mm_setr_epi32(1023, 1023, 0, 0);
+
+ Packet2d tmp, fx;
+ Packet4i emm0;
+
+ // clamp x
+ x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
+ /* express exp(x) as exp(g + n*log(2)) */
+ fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);
+
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ fx = _mm_floor_pd(fx);
+#else
+ emm0 = _mm_cvttpd_epi32(fx);
+ tmp = _mm_cvtepi32_pd(emm0);
+ /* if greater, substract 1 */
+ Packet2d mask = _mm_cmpgt_pd(tmp, fx);
+ mask = _mm_and_pd(mask, p2d_1);
+ fx = psub(tmp, mask);
+#endif
+
+ tmp = pmul(fx, p2d_cephes_exp_C1);
+ Packet2d z = pmul(fx, p2d_cephes_exp_C2);
+ x = psub(x, tmp);
+ x = psub(x, z);
+
+ Packet2d x2 = pmul(x,x);
+
+ Packet2d px = p2d_cephes_exp_p0;
+ px = pmadd(px, x2, p2d_cephes_exp_p1);
+ px = pmadd(px, x2, p2d_cephes_exp_p2);
+ px = pmul (px, x);
+
+ Packet2d qx = p2d_cephes_exp_q0;
+ qx = pmadd(qx, x2, p2d_cephes_exp_q1);
+ qx = pmadd(qx, x2, p2d_cephes_exp_q2);
+ qx = pmadd(qx, x2, p2d_cephes_exp_q3);
+
+ x = pdiv(px,psub(qx,px));
+ x = pmadd(p2d_2,x,p2d_1);
+
+ // build 2^n
+ emm0 = _mm_cvttpd_epi32(fx);
+ emm0 = _mm_add_epi32(emm0, p4i_1023_0);
+ emm0 = _mm_slli_epi32(emm0, 20);
+ emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(1,2,0,3));
+ return pmax(pmul(x, Packet2d(_mm_castsi128_pd(emm0))), _x);
+}
+
+/* evaluation of 4 sines at onces, using SSE2 intrinsics.
+
+ The code is the exact rewriting of the cephes sinf function.
+ Precision is excellent as long as x < 8192 (I did not bother to
+ take into account the special handling they have for greater values
+ -- it does not return garbage for arguments over 8192, though, but
+ the extra precision is missing).
+
+ Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the
+ surprising but correct result.
+*/
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psin<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+
+ _EIGEN_DECLARE_CONST_Packet4i(1, 1);
+ _EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
+ _EIGEN_DECLARE_CONST_Packet4i(2, 2);
+ _EIGEN_DECLARE_CONST_Packet4i(4, 4);
+
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000);
+
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4 / M_PI
+
+ Packet4f xmm1, xmm2, xmm3, sign_bit, y;
+
+ Packet4i emm0, emm2;
+ sign_bit = x;
+ /* take the absolute value */
+ x = pabs(x);
+
+ /* take the modulo */
+
+ /* extract the sign bit (upper one) */
+ sign_bit = _mm_and_ps(sign_bit, p4f_sign_mask);
+
+ /* scale by 4/Pi */
+ y = pmul(x, p4f_cephes_FOPI);
+
+ /* store the integer part of y in mm0 */
+ emm2 = _mm_cvttps_epi32(y);
+ /* j=(j+1) & (~1) (see the cephes sources) */
+ emm2 = _mm_add_epi32(emm2, p4i_1);
+ emm2 = _mm_and_si128(emm2, p4i_not1);
+ y = _mm_cvtepi32_ps(emm2);
+ /* get the swap sign flag */
+ emm0 = _mm_and_si128(emm2, p4i_4);
+ emm0 = _mm_slli_epi32(emm0, 29);
+ /* get the polynom selection mask
+ there is one polynom for 0 <= x <= Pi/4
+ and another one for Pi/4<x<=Pi/2
+
+ Both branches will be computed.
+ */
+ emm2 = _mm_and_si128(emm2, p4i_2);
+ emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+
+ Packet4f swap_sign_bit = _mm_castsi128_ps(emm0);
+ Packet4f poly_mask = _mm_castsi128_ps(emm2);
+ sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
+
+ /* The magic pass: "Extended precision modular arithmetic"
+ x = ((x - y * DP1) - y * DP2) - y * DP3; */
+ xmm1 = pmul(y, p4f_minus_cephes_DP1);
+ xmm2 = pmul(y, p4f_minus_cephes_DP2);
+ xmm3 = pmul(y, p4f_minus_cephes_DP3);
+ x = padd(x, xmm1);
+ x = padd(x, xmm2);
+ x = padd(x, xmm3);
+
+ /* Evaluate the first polynom (0 <= x <= Pi/4) */
+ y = p4f_coscof_p0;
+ Packet4f z = _mm_mul_ps(x,x);
+
+ y = pmadd(y, z, p4f_coscof_p1);
+ y = pmadd(y, z, p4f_coscof_p2);
+ y = pmul(y, z);
+ y = pmul(y, z);
+ Packet4f tmp = pmul(z, p4f_half);
+ y = psub(y, tmp);
+ y = padd(y, p4f_1);
+
+ /* Evaluate the second polynom (Pi/4 <= x <= 0) */
+
+ Packet4f y2 = p4f_sincof_p0;
+ y2 = pmadd(y2, z, p4f_sincof_p1);
+ y2 = pmadd(y2, z, p4f_sincof_p2);
+ y2 = pmul(y2, z);
+ y2 = pmul(y2, x);
+ y2 = padd(y2, x);
+
+ /* select the correct result from the two polynoms */
+ y2 = _mm_and_ps(poly_mask, y2);
+ y = _mm_andnot_ps(poly_mask, y);
+ y = _mm_or_ps(y,y2);
+ /* update the sign */
+ return _mm_xor_ps(y, sign_bit);
+}
+
+/* almost the same as psin */
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pcos<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+
+ _EIGEN_DECLARE_CONST_Packet4i(1, 1);
+ _EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
+ _EIGEN_DECLARE_CONST_Packet4i(2, 2);
+ _EIGEN_DECLARE_CONST_Packet4i(4, 4);
+
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4 / M_PI
+
+ Packet4f xmm1, xmm2, xmm3, y;
+ Packet4i emm0, emm2;
+
+ x = pabs(x);
+
+ /* scale by 4/Pi */
+ y = pmul(x, p4f_cephes_FOPI);
+
+ /* get the integer part of y */
+ emm2 = _mm_cvttps_epi32(y);
+ /* j=(j+1) & (~1) (see the cephes sources) */
+ emm2 = _mm_add_epi32(emm2, p4i_1);
+ emm2 = _mm_and_si128(emm2, p4i_not1);
+ y = _mm_cvtepi32_ps(emm2);
+
+ emm2 = _mm_sub_epi32(emm2, p4i_2);
+
+ /* get the swap sign flag */
+ emm0 = _mm_andnot_si128(emm2, p4i_4);
+ emm0 = _mm_slli_epi32(emm0, 29);
+ /* get the polynom selection mask */
+ emm2 = _mm_and_si128(emm2, p4i_2);
+ emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+
+ Packet4f sign_bit = _mm_castsi128_ps(emm0);
+ Packet4f poly_mask = _mm_castsi128_ps(emm2);
+
+ /* The magic pass: "Extended precision modular arithmetic"
+ x = ((x - y * DP1) - y * DP2) - y * DP3; */
+ xmm1 = pmul(y, p4f_minus_cephes_DP1);
+ xmm2 = pmul(y, p4f_minus_cephes_DP2);
+ xmm3 = pmul(y, p4f_minus_cephes_DP3);
+ x = padd(x, xmm1);
+ x = padd(x, xmm2);
+ x = padd(x, xmm3);
+
+ /* Evaluate the first polynom (0 <= x <= Pi/4) */
+ y = p4f_coscof_p0;
+ Packet4f z = pmul(x,x);
+
+ y = pmadd(y,z,p4f_coscof_p1);
+ y = pmadd(y,z,p4f_coscof_p2);
+ y = pmul(y, z);
+ y = pmul(y, z);
+ Packet4f tmp = _mm_mul_ps(z, p4f_half);
+ y = psub(y, tmp);
+ y = padd(y, p4f_1);
+
+ /* Evaluate the second polynom (Pi/4 <= x <= 0) */
+ Packet4f y2 = p4f_sincof_p0;
+ y2 = pmadd(y2, z, p4f_sincof_p1);
+ y2 = pmadd(y2, z, p4f_sincof_p2);
+ y2 = pmul(y2, z);
+ y2 = pmadd(y2, x, x);
+
+ /* select the correct result from the two polynoms */
+ y2 = _mm_and_ps(poly_mask, y2);
+ y = _mm_andnot_ps(poly_mask, y);
+ y = _mm_or_ps(y,y2);
+
+ /* update the sign */
+ return _mm_xor_ps(y, sign_bit);
+}
+
+#if EIGEN_FAST_MATH
+
+// This is based on Quake3's fast inverse square root.
+// For detail see here: http://www.beyond3d.com/content/articles/8/
+// It lacks 1 (or 2 bits in some rare cases) of precision, and does not handle negative, +inf, or denormalized numbers correctly.
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psqrt<Packet4f>(const Packet4f& _x)
+{
+ Packet4f half = pmul(_x, pset1<Packet4f>(.5f));
+
+ /* select only the inverse sqrt of non-zero inputs */
+ Packet4f non_zero_mask = _mm_cmpge_ps(_x, pset1<Packet4f>((std::numeric_limits<float>::min)()));
+ Packet4f x = _mm_and_ps(non_zero_mask, _mm_rsqrt_ps(_x));
+
+ x = pmul(x, psub(pset1<Packet4f>(1.5f), pmul(half, pmul(x,x))));
+ return pmul(_x,x);
+}
+
+#else
+
+template<> EIGEN_STRONG_INLINE Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }
+
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); }
+
+
+#if EIGEN_FAST_MATH
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f prsqrt<Packet4f>(const Packet4f& _x) {
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inf, 0x7f800000);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(nan, 0x7fc00000);
+ _EIGEN_DECLARE_CONST_Packet4f(one_point_five, 1.5f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5f);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(flt_min, 0x00800000);
+
+ Packet4f neg_half = pmul(_x, p4f_minus_half);
+
+ // select only the inverse sqrt of positive normal inputs (denormals are
+ // flushed to zero and cause infs as well).
+ Packet4f le_zero_mask = _mm_cmple_ps(_x, p4f_flt_min);
+ Packet4f x = _mm_andnot_ps(le_zero_mask, _mm_rsqrt_ps(_x));
+
+ // Fill in NaNs and Infs for the negative/zero entries.
+ Packet4f neg_mask = _mm_cmplt_ps(_x, _mm_setzero_ps());
+ Packet4f zero_mask = _mm_andnot_ps(neg_mask, le_zero_mask);
+ Packet4f infs_and_nans = _mm_or_ps(_mm_and_ps(neg_mask, p4f_nan),
+ _mm_and_ps(zero_mask, p4f_inf));
+
+ // Do a single step of Newton's iteration.
+ x = pmul(x, pmadd(neg_half, pmul(x, x), p4f_one_point_five));
+
+ // Insert NaNs and Infs in all the right places.
+ return _mm_or_ps(x, infs_and_nans);
+}
+
+#else
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f prsqrt<Packet4f>(const Packet4f& x) {
+ // Unfortunately we can't use the much faster mm_rqsrt_ps since it only provides an approximation.
+ return _mm_div_ps(pset1<Packet4f>(1.0f), _mm_sqrt_ps(x));
+}
+
+#endif
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d prsqrt<Packet2d>(const Packet2d& x) {
+ // Unfortunately we can't use the much faster mm_rqsrt_pd since it only provides an approximation.
+ return _mm_div_pd(pset1<Packet2d>(1.0), _mm_sqrt_pd(x));
+}
+
+// Identical to the ptanh in GenericPacketMath.h, but for doubles use
+// a small/medium approximation threshold of 0.001.
+template<> EIGEN_STRONG_INLINE Packet2d ptanh_approx_threshold() {
+ return pset1<Packet2d>(0.001);
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATH_FUNCTIONS_SSE_H
diff --git a/third_party/eigen3/Eigen/src/Core/arch/SSE/PacketMath.h b/third_party/eigen3/Eigen/src/Core/arch/SSE/PacketMath.h
new file mode 100644
index 0000000000..7f4274fd99
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -0,0 +1,883 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PACKET_MATH_SSE_H
+#define EIGEN_PACKET_MATH_SSE_H
+
+namespace Eigen {
+
+namespace internal {
+
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
+#endif
+
+#ifdef __FMA__
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1
+#endif
+#endif
+
+typedef __m128 Packet4f;
+typedef __m128i Packet4i;
+typedef __m128d Packet2d;
+
+template<> struct is_arithmetic<__m128> { enum { value = true }; };
+template<> struct is_arithmetic<__m128i> { enum { value = true }; };
+template<> struct is_arithmetic<__m128d> { enum { value = true }; };
+
+#define vec4f_swizzle1(v,p,q,r,s) \
+ (_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), ((s)<<6|(r)<<4|(q)<<2|(p)))))
+
+#define vec4i_swizzle1(v,p,q,r,s) \
+ (_mm_shuffle_epi32( v, ((s)<<6|(r)<<4|(q)<<2|(p))))
+
+#define vec2d_swizzle1(v,p,q) \
+ (_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), ((q*2+1)<<6|(q*2)<<4|(p*2+1)<<2|(p*2)))))
+
+#define vec4f_swizzle2(a,b,p,q,r,s) \
+ (_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p))))
+
+#define vec4i_swizzle2(a,b,p,q,r,s) \
+ (_mm_castps_si128( (_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), ((s)<<6|(r)<<4|(q)<<2|(p))))))
+
+#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+ const Packet4f p4f_##NAME = pset1<Packet4f>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
+ const Packet2d p2d_##NAME = pset1<Packet2d>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+ const Packet4f p4f_##NAME = _mm_castsi128_ps(pset1<Packet4i>(X))
+
+#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+ const Packet4i p4i_##NAME = pset1<Packet4i>(X)
+
+
+// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
+// to leverage AVX instructions.
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct packet_traits<float> : default_packet_traits
+{
+ typedef Packet4f type;
+ typedef Packet4f half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasHalfPacket = 0,
+
+ HasDiv = 1,
+ HasSin = EIGEN_FAST_MATH,
+ HasCos = EIGEN_FAST_MATH,
+ HasTanH = 1,
+ HasLog = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+
+ HasBlend = 1,
+ HasSelect = 1,
+ HasEq = 1,
+ };
+};
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef Packet2d type;
+ typedef Packet2d half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=2,
+ HasHalfPacket = 0,
+
+ HasDiv = 1,
+ HasTanH = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+
+ HasBlend = 1,
+ HasSelect = 1,
+ HasEq = 1,
+ };
+};
+#endif
+template<> struct packet_traits<int> : default_packet_traits
+{
+ typedef Packet4i type;
+ typedef Packet4i half;
+ enum {
+ // FIXME check the Has*
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+
+ HasBlend = 1,
+ };
+};
+
+template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; typedef Packet4f half; };
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; typedef Packet2d half; };
+template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; };
+
+#if EIGEN_COMP_MSVC==1500
+// Workaround MSVC 9 internal compiler error.
+// TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode
+// TODO: let's check whether there does not exist a better fix, like adding a pset0() function. (it crashed on pset1(0)).
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps(from,from,from,from); }
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
+#else
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps1(from); }
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
+#endif
+
+// GCC generates a shufps instruction for _mm_set1_ps/_mm_load1_ps instead of the more efficient pshufd instruction.
+// However, using inrinsics for pset1 makes gcc to generate crappy code in some cases (see bug 203)
+// Using inline assembly is also not an option because then gcc fails to reorder properly the instructions.
+// Therefore, we introduced the pload1 functions to be used in product kernels for which bug 203 does not apply.
+// Also note that with AVX, we want it to generate a vbroadcastss.
+#if EIGEN_COMP_GNUC_STRICT && (!defined __AVX__)
+template<> EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float *from) {
+ return vec4f_swizzle1(_mm_load_ss(from),0,0,0,0);
+}
+#endif
+
+#ifndef EIGEN_VECTORIZE_AVX
+template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
+template<> EIGEN_STRONG_INLINE Packet2d plset<double>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
+#endif
+template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
+
+template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_sub_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_sub_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f ple<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_cmple_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d ple<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_cmple_pd(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f plt<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_cmplt_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d plt<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_cmplt_pd(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f peq<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_cmpeq_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d peq<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_cmpeq_pd(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pselect<Packet4f>(const Packet4f& a, const Packet4f& b, const Packet4f& false_mask) {
+#if defined(EIGEN_VECTORIZE_SSE4_1)
+ return _mm_blendv_ps(a, b, false_mask);
+#else
+ return _mm_or_ps(_mm_andnot_ps(false_mask, a), _mm_and_ps(false_mask, b));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet2d pselect<Packet2d>(const Packet2d& a, const Packet2d& b, const Packet2d& false_mask) {
+#if defined(EIGEN_VECTORIZE_SSE4_1)
+ return _mm_blendv_pd(a, b, false_mask);
+#else
+ return _mm_or_pd(_mm_andnot_pd(false_mask, a), _mm_and_pd(false_mask, b));
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
+{
+ const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
+ return _mm_xor_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a)
+{
+ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x80000000));
+ return _mm_xor_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)
+{
+ return psub(Packet4i(_mm_setr_epi32(0,0,0,0)), a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_mul_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_mullo_epi32(a,b);
+#else
+ // this version is slightly faster than 4 scalar products
+ return vec4i_swizzle1(
+ vec4i_swizzle2(
+ _mm_mul_epu32(a,b),
+ _mm_mul_epu32(vec4i_swizzle1(a,1,0,3,2),
+ vec4i_swizzle1(b,1,0,3,2)),
+ 0,2,0,2),
+ 0,2,1,3);
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
+{ eigen_assert(false && "packet integer division are not supported by SSE");
+ return pset1<Packet4i>(0);
+}
+
+// for some weird raisons, it has to be overloaded for packet of integers
+template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
+#ifdef __FMA__
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fmadd_ps(a,b,c); }
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); }
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_min_epi32(a,b);
+#else
+ // after some bench, this version *is* faster than a scalar implementation
+ Packet4i mask = _mm_cmplt_epi32(a,b);
+ return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_max_epi32(a,b);
+#else
+ // after some bench, this version *is* faster than a scalar implementation
+ Packet4i mask = _mm_cmpgt_epi32(a,b);
+ return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_or_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_or_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_or_si128(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_xor_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_xor_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_xor_si128(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_andnot_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }
+
+#if EIGEN_COMP_MSVC
+ template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ #if (EIGEN_COMP_MSVC==1600)
+ // NOTE Some version of MSVC10 generates bad code when using _mm_loadu_ps
+ // (i.e., it does not generate an unaligned load!!
+ // TODO On most architectures this version should also be faster than a single _mm_loadu_ps
+ // so we could also enable it for MSVC08 but first we have to make this later does not generate crap when doing so...
+ __m128 res = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)(from));
+ res = _mm_loadh_pi(res, (const __m64*)(from+2));
+ return res;
+ #else
+ return _mm_loadu_ps(from);
+ #endif
+ }
+ template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); }
+ template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from)); }
+#else
+// Fast unaligned loads. Note that here we cannot directly use intrinsics: this would
+// require pointer casting to incompatible pointer types and leads to invalid code
+// because of the strict aliasing rule. The "dummy" stuff are required to enforce
+// a correct instruction dependency.
+// TODO: do the same for MSVC (ICC is compatible)
+// NOTE: with the code below, MSVC's compiler crashes!
+
+#if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386 || (EIGEN_ARCH_x86_64 && EIGEN_GNUC_AT_LEAST(4, 8)))
+ // bug 195: gcc/i386 emits weird x87 fldl/fstpl instructions for _mm_load_sd
+ #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
+ #define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 1
+#elif EIGEN_COMP_CLANG
+ // bug 201: Segfaults in __mm_loadh_pd with clang 2.8
+ #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
+ #define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 0
+#else
+ #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 0
+ #define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 0
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS
+ return _mm_loadu_ps(from);
+#else
+ __m128d res;
+ res = _mm_load_sd((const double*)(from)) ;
+ res = _mm_loadh_pd(res, (const double*)(from+2)) ;
+ return _mm_castpd_ps(res);
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS
+ return _mm_loadu_pd(from);
+#else
+ __m128d res;
+ res = _mm_load_sd(from) ;
+ res = _mm_loadh_pd(res,from+1);
+ return res;
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS
+ return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
+#else
+ __m128d res;
+ res = _mm_load_sd((const double*)(from)) ;
+ res = _mm_loadh_pd(res, (const double*)(from+2)) ;
+ return _mm_castpd_si128(res);
+#endif
+}
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
+{
+ return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd(reinterpret_cast<const double*>(from))), 0, 0, 1, 1);
+}
+template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
+{ return pset1<Packet2d>(from[0]); }
+template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
+{
+ Packet4i tmp;
+ tmp = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(from));
+ return vec4i_swizzle1(tmp, 0, 0, 1, 1);
+}
+
+template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }
+
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) {
+ EIGEN_DEBUG_UNALIGNED_STORE
+#if EIGEN_AVOID_CUSTOM_UNALIGNED_STORES
+ _mm_storeu_pd(to, from);
+#else
+ _mm_storel_pd((to), from);
+ _mm_storeh_pd((to+1), from);
+#endif
+}
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), Packet2d(_mm_castps_pd(from))); }
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), Packet2d(_mm_castsi128_pd(from))); }
+
+template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, int stride)
+{
+ return _mm_set_ps(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, int stride)
+{
+ return _mm_set_pd(from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, int stride)
+{
+ return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+ }
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, int stride)
+{
+ to[stride*0] = _mm_cvtss_f32(from);
+ to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1));
+ to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2));
+ to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, int stride)
+{
+ to[stride*0] = _mm_cvtsd_f64(from);
+ to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, int stride)
+{
+ to[stride*0] = _mm_cvtsi128_si32(from);
+ to[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
+ to[stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
+ to[stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
+}
+
+// some compilers might be tempted to perform multiple moves instead of using a vector path.
+template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)
+{
+ Packet4f pa = _mm_set_ss(a);
+ pstore(to, Packet4f(vec4f_swizzle1(pa,0,0,0,0)));
+}
+// some compilers might be tempted to perform multiple moves instead of using a vector path.
+template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double& a)
+{
+ Packet2d pa = _mm_set_sd(a);
+ pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
+}
+
+#ifndef EIGEN_VECTORIZE_AVX
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+#endif
+
+#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
+// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010
+// Direct of the struct members fixed bug #62.
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return a.m128_f32[0]; }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return a.m128d_f64[0]; }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
+#elif EIGEN_COMP_MSVC_STRICT
+// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
+#else
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return _mm_cvtss_f32(a); }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return _mm_cvtsd_f64(a); }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { return _mm_cvtsi128_si32(a); }
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
+{ return _mm_shuffle_ps(a,a,0x1B); }
+template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
+{ return _mm_shuffle_pd(a,a,0x1); }
+template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
+{ return _mm_shuffle_epi32(a,0x1B); }
+
+template<size_t offset>
+struct protate_impl<offset, Packet4f>
+{
+ static Packet4f run(const Packet4f& a) {
+ return vec4f_swizzle1(a, offset, (offset + 1) % 4, (offset + 2) % 4, (offset + 3) % 4);
+ }
+};
+
+template<size_t offset>
+struct protate_impl<offset, Packet4i>
+{
+ static Packet4i run(const Packet4i& a) {
+ return vec4i_swizzle1(a, offset, (offset + 1) % 4, (offset + 2) % 4, (offset + 3) % 4);
+ }
+};
+
+template<size_t offset>
+struct protate_impl<offset, Packet2d>
+{
+ static Packet2d run(const Packet2d& a) {
+ return vec2d_swizzle1(a, offset, (offset + 1) % 2);
+ }
+};
+
+template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a)
+{
+ const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
+ return _mm_and_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a)
+{
+ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
+ return _mm_and_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a)
+{
+ #ifdef EIGEN_VECTORIZE_SSSE3
+ return _mm_abs_epi32(a);
+ #else
+ Packet4i aux = _mm_srai_epi32(a,31);
+ return _mm_sub_epi32(_mm_xor_si128(a,aux),aux);
+ #endif
+}
+
+// with AVX, the default implementations based on pload1 are faster
+#ifndef __AVX__
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4f>(const float *a,
+ Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
+{
+ a3 = pload<Packet4f>(a);
+ a0 = vec4f_swizzle1(a3, 0,0,0,0);
+ a1 = vec4f_swizzle1(a3, 1,1,1,1);
+ a2 = vec4f_swizzle1(a3, 2,2,2,2);
+ a3 = vec4f_swizzle1(a3, 3,3,3,3);
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet2d>(const double *a,
+ Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
+{
+#ifdef EIGEN_VECTORIZE_SSE3
+ a0 = _mm_loaddup_pd(a+0);
+ a1 = _mm_loaddup_pd(a+1);
+ a2 = _mm_loaddup_pd(a+2);
+ a3 = _mm_loaddup_pd(a+3);
+#else
+ a1 = pload<Packet2d>(a);
+ a0 = vec2d_swizzle1(a1, 0,0);
+ a1 = vec2d_swizzle1(a1, 1,1);
+ a3 = pload<Packet2d>(a+2);
+ a2 = vec2d_swizzle1(a3, 0,0);
+ a3 = vec2d_swizzle1(a3, 1,1);
+#endif
+}
+#endif
+
+EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs)
+{
+ vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55));
+ vecs[2] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xAA));
+ vecs[3] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xFF));
+ vecs[0] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x00));
+}
+
+#ifdef EIGEN_VECTORIZE_SSE3
+// TODO implement SSE2 versions as well as integer versions
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+ return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3]));
+}
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ return _mm_hadd_pd(vecs[0], vecs[1]);
+}
+// SSSE3 version:
+// EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs)
+// {
+// return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3]));
+// }
+
+template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp0 = _mm_hadd_ps(a,a);
+ return pfirst<Packet4f>(_mm_hadd_ps(tmp0, tmp0));
+}
+
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return pfirst<Packet2d>(_mm_hadd_pd(a, a)); }
+
+// SSSE3 version:
+// EIGEN_STRONG_INLINE float predux(const Packet4i& a)
+// {
+// Packet4i tmp0 = _mm_hadd_epi32(a,a);
+// return pfirst(_mm_hadd_epi32(tmp0, tmp0));
+// }
+#else
+// SSE2 versions
+template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
+ return pfirst(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
+{
+ return pfirst(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+ Packet4f tmp0, tmp1, tmp2;
+ tmp0 = _mm_unpacklo_ps(vecs[0], vecs[1]);
+ tmp1 = _mm_unpackhi_ps(vecs[0], vecs[1]);
+ tmp2 = _mm_unpackhi_ps(vecs[2], vecs[3]);
+ tmp0 = _mm_add_ps(tmp0, tmp1);
+ tmp1 = _mm_unpacklo_ps(vecs[2], vecs[3]);
+ tmp1 = _mm_add_ps(tmp1, tmp2);
+ tmp2 = _mm_movehl_ps(tmp1, tmp0);
+ tmp0 = _mm_movelh_ps(tmp0, tmp1);
+ return _mm_add_ps(tmp0, tmp2);
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1]));
+}
+#endif // SSE3
+
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+ Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
+ return pfirst(tmp) + pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1));
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+ Packet4i tmp0, tmp1, tmp2;
+ tmp0 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
+ tmp1 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
+ tmp2 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
+ tmp0 = _mm_add_epi32(tmp0, tmp1);
+ tmp1 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
+ tmp1 = _mm_add_epi32(tmp1, tmp2);
+ tmp2 = _mm_unpacklo_epi64(tmp0, tmp1);
+ tmp0 = _mm_unpackhi_epi64(tmp0, tmp1);
+ return _mm_add_epi32(tmp0, tmp2);
+}
+
+// Other reduction functions:
+
+// mul
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
+{
+ // after some experiments, it is seems this is the fastest way to implement it
+ // for GCC (eg., reusing pmul is very slow !)
+ // TODO try to call _mm_mul_epu32 directly
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ return (aux[0] * aux[1]) * (aux[2] * aux[3]);;
+}
+
+// min
+template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ Packet4i tmp = _mm_min_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
+ return pfirst<Packet4i>(_mm_min_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
+#else
+ // after some experiments, it is seems this is the fastest way to implement it
+ // for GCC (eg., it does not like using std::min after the pstore !!)
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
+ int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
+ return aux0<aux2 ? aux0 : aux2;
+#endif // EIGEN_VECTORIZE_SSE4_1
+}
+
+// max
+template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ Packet4i tmp = _mm_max_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
+ return pfirst<Packet4i>(_mm_max_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
+#else
+ // after some experiments, it is seems this is the fastest way to implement it
+ // for GCC (eg., it does not like using std::min after the pstore !!)
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
+ int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
+ return aux0>aux2 ? aux0 : aux2;
+#endif // EIGEN_VECTORIZE_SSE4_1
+}
+
+#if EIGEN_COMP_GNUC
+// template <> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
+// {
+// Packet4f res = b;
+// asm("mulps %[a], %[b] \n\taddps %[c], %[b]" : [b] "+x" (res) : [a] "x" (a), [c] "x" (c));
+// return res;
+// }
+// EIGEN_STRONG_INLINE Packet4i _mm_alignr_epi8(const Packet4i& a, const Packet4i& b, const int i)
+// {
+// Packet4i res = a;
+// asm("palignr %[i], %[a], %[b] " : [b] "+x" (res) : [a] "x" (a), [i] "i" (i));
+// return res;
+// }
+#endif
+
+#ifdef EIGEN_VECTORIZE_SSSE3
+// SSSE3 versions
+template<int Offset>
+struct palign_impl<Offset,Packet4f>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+ {
+ if (Offset!=0)
+ first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4));
+ }
+};
+
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+ {
+ if (Offset!=0)
+ first = _mm_alignr_epi8(second,first, Offset*4);
+ }
+};
+
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+ {
+ if (Offset==1)
+ first = _mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(second), _mm_castpd_si128(first), 8));
+ }
+};
+#else
+// SSE2 versions
+template<int Offset>
+struct palign_impl<Offset,Packet4f>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm_move_ss(first,second);
+ first = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(first),0x39));
+ }
+ else if (Offset==2)
+ {
+ first = _mm_movehl_ps(first,first);
+ first = _mm_movelh_ps(first,second);
+ }
+ else if (Offset==3)
+ {
+ first = _mm_move_ss(first,second);
+ first = _mm_shuffle_ps(first,second,0x93);
+ }
+ }
+};
+
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
+ first = _mm_shuffle_epi32(first,0x39);
+ }
+ else if (Offset==2)
+ {
+ first = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(first)));
+ first = _mm_castps_si128(_mm_movelh_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
+ }
+ else if (Offset==3)
+ {
+ first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
+ first = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second),0x93));
+ }
+ }
+};
+
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm_castps_pd(_mm_movehl_ps(_mm_castpd_ps(first),_mm_castpd_ps(first)));
+ first = _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(first),_mm_castpd_ps(second)));
+ }
+ }
+};
+#endif
+
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4f,4>& kernel) {
+ _MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
+}
+
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2d,2>& kernel) {
+ __m128d tmp = _mm_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
+ kernel.packet[0] = _mm_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
+ kernel.packet[1] = tmp;
+}
+
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4i,4>& kernel) {
+ __m128i T0 = _mm_unpacklo_epi32(kernel.packet[0], kernel.packet[1]);
+ __m128i T1 = _mm_unpacklo_epi32(kernel.packet[2], kernel.packet[3]);
+ __m128i T2 = _mm_unpackhi_epi32(kernel.packet[0], kernel.packet[1]);
+ __m128i T3 = _mm_unpackhi_epi32(kernel.packet[2], kernel.packet[3]);
+
+ kernel.packet[0] = _mm_unpacklo_epi64(T0, T1);
+ kernel.packet[1] = _mm_unpackhi_epi64(T0, T1);
+ kernel.packet[2] = _mm_unpacklo_epi64(T2, T3);
+ kernel.packet[3] = _mm_unpackhi_epi64(T2, T3);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i select = _mm_set_epi32(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m128i false_mask = _mm_cmpeq_epi32(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blendv_epi8(thenPacket, elsePacket, false_mask);
+#else
+ return _mm_or_si128(_mm_andnot_si128(false_mask, thenPacket), _mm_and_si128(false_mask, elsePacket));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
+ const __m128 zero = _mm_setzero_ps();
+ const __m128 select = _mm_set_ps(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m128 false_mask = _mm_cmpeq_ps(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blendv_ps(thenPacket, elsePacket, false_mask);
+#else
+ return _mm_or_ps(_mm_andnot_ps(false_mask, thenPacket), _mm_and_ps(false_mask, elsePacket));
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
+ const __m128d zero = _mm_setzero_pd();
+ const __m128d select = _mm_set_pd(ifPacket.select[1], ifPacket.select[0]);
+ __m128d false_mask = _mm_cmpeq_pd(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blendv_pd(thenPacket, elsePacket, false_mask);
+#else
+ return _mm_or_pd(_mm_andnot_pd(false_mask, thenPacket), _mm_and_pd(false_mask, elsePacket));
+#endif
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_PACKET_MATH_SSE_H
diff --git a/third_party/eigen3/Eigen/src/Core/arch/SSE/TypeCasting.h b/third_party/eigen3/Eigen/src/Core/arch/SSE/TypeCasting.h
new file mode 100644
index 0000000000..c848932306
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/arch/SSE/TypeCasting.h
@@ -0,0 +1,77 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TYPE_CASTING_SSE_H
+#define EIGEN_TYPE_CASTING_SSE_H
+
+namespace Eigen {
+
+namespace internal {
+
+template <>
+struct type_casting_traits<float, int> {
+ enum {
+ VectorizedCast = 1,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 1
+ };
+};
+
+template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
+ return _mm_cvttps_epi32(a);
+}
+
+
+template <>
+struct type_casting_traits<int, float> {
+ enum {
+ VectorizedCast = 1,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 1
+ };
+};
+
+template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
+ return _mm_cvtepi32_ps(a);
+}
+
+
+template <>
+struct type_casting_traits<double, float> {
+ enum {
+ VectorizedCast = 1,
+ SrcCoeffRatio = 2,
+ TgtCoeffRatio = 1
+ };
+};
+
+template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
+ return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
+}
+
+template <>
+struct type_casting_traits<float, double> {
+ enum {
+ VectorizedCast = 1,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 2
+ };
+};
+
+template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
+ // Simply discard the second half of the input
+ return _mm_cvtps_pd(a);
+}
+
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TYPE_CASTING_SSE_H
diff --git a/third_party/eigen3/Eigen/src/Core/functors/AssignmentFunctors.h b/third_party/eigen3/Eigen/src/Core/functors/AssignmentFunctors.h
new file mode 100644
index 0000000000..ae264aa640
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/functors/AssignmentFunctors.h
@@ -0,0 +1,167 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ASSIGNMENT_FUNCTORS_H
+#define EIGEN_ASSIGNMENT_FUNCTORS_H
+
+namespace Eigen {
+
+namespace internal {
+
+/** \internal
+ * \brief Template functor for scalar/packet assignment
+ *
+ */
+template<typename Scalar> struct assign_op {
+
+ EIGEN_EMPTY_STRUCT_CTOR(assign_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a = b; }
+
+ template<int Alignment, typename Packet>
+ EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
+ { internal::pstoret<Scalar,Packet,Alignment>(a,b); }
+};
+template<typename Scalar>
+struct functor_traits<assign_op<Scalar> > {
+ enum {
+ Cost = NumTraits<Scalar>::ReadCost,
+ PacketAccess = packet_traits<Scalar>::IsVectorized
+ };
+};
+
+/** \internal
+ * \brief Template functor for scalar/packet assignment with addition
+ *
+ */
+template<typename Scalar> struct add_assign_op {
+
+ EIGEN_EMPTY_STRUCT_CTOR(add_assign_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a += b; }
+
+ template<int Alignment, typename Packet>
+ EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
+ { internal::pstoret<Scalar,Packet,Alignment>(a,internal::padd(internal::ploadt<Packet,Alignment>(a),b)); }
+};
+template<typename Scalar>
+struct functor_traits<add_assign_op<Scalar> > {
+ enum {
+ Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasAdd
+ };
+};
+
+/** \internal
+ * \brief Template functor for scalar/packet assignment with subtraction
+ *
+ */
+template<typename Scalar> struct sub_assign_op {
+
+ EIGEN_EMPTY_STRUCT_CTOR(sub_assign_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a -= b; }
+
+ template<int Alignment, typename Packet>
+ EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
+ { internal::pstoret<Scalar,Packet,Alignment>(a,internal::psub(internal::ploadt<Packet,Alignment>(a),b)); }
+};
+template<typename Scalar>
+struct functor_traits<sub_assign_op<Scalar> > {
+ enum {
+ Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasAdd
+ };
+};
+
+/** \internal
+ * \brief Template functor for scalar/packet assignment with multiplication
+ *
+ */
+template<typename Scalar> struct mul_assign_op {
+
+ EIGEN_EMPTY_STRUCT_CTOR(mul_assign_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a *= b; }
+
+ template<int Alignment, typename Packet>
+ EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
+ { internal::pstoret<Scalar,Packet,Alignment>(a,internal::pmul(internal::ploadt<Packet,Alignment>(a),b)); }
+};
+template<typename Scalar>
+struct functor_traits<mul_assign_op<Scalar> > {
+ enum {
+ Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasMul
+ };
+};
+
+/** \internal
+ * \brief Template functor for scalar/packet assignment with diviving
+ *
+ */
+template<typename Scalar> struct div_assign_op {
+
+ EIGEN_EMPTY_STRUCT_CTOR(div_assign_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a /= b; }
+
+ template<int Alignment, typename Packet>
+ EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
+ { internal::pstoret<Scalar,Packet,Alignment>(a,internal::pdiv(internal::ploadt<Packet,Alignment>(a),b)); }
+};
+template<typename Scalar>
+struct functor_traits<div_assign_op<Scalar> > {
+ enum {
+ Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasMul
+ };
+};
+
+
+/** \internal
+ * \brief Template functor for scalar/packet assignment with swaping
+ *
+ * It works as follow. For a non-vectorized evaluation loop, we have:
+ * for(i) func(A.coeffRef(i), B.coeff(i));
+ * where B is a SwapWrapper expression. The trick is to make SwapWrapper::coeff behaves like a non-const coeffRef.
+ * Actually, SwapWrapper might not even be needed since even if B is a plain expression, since it has to be writable
+ * B.coeff already returns a const reference to the underlying scalar value.
+ *
+ * The case of a vectorized loop is more tricky:
+ * for(i,j) func.assignPacket<A_Align>(&A.coeffRef(i,j), B.packet<B_Align>(i,j));
+ * Here, B must be a SwapWrapper whose packet function actually returns a proxy object holding a Scalar*,
+ * the actual alignment and Packet type.
+ *
+ */
+template<typename Scalar> struct swap_assign_op {
+
+ EIGEN_EMPTY_STRUCT_CTOR(swap_assign_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const
+ {
+ using std::swap;
+ swap(a,const_cast<Scalar&>(b));
+ }
+
+ template<int LhsAlignment, int RhsAlignment, typename Packet>
+ EIGEN_STRONG_INLINE void swapPacket(Scalar* a, Scalar* b) const
+ {
+ Packet tmp = internal::ploadt<Packet,RhsAlignment>(b);
+ internal::pstoret<Scalar,Packet,RhsAlignment>(b, internal::ploadt<Packet,LhsAlignment>(a));
+ internal::pstoret<Scalar,Packet,LhsAlignment>(a, tmp);
+ }
+};
+template<typename Scalar>
+struct functor_traits<swap_assign_op<Scalar> > {
+ enum {
+ Cost = 3 * NumTraits<Scalar>::ReadCost,
+ PacketAccess = packet_traits<Scalar>::IsVectorized
+ };
+};
+
+} // namespace internal
+
+} // namespace Eigen
+
+#endif // EIGEN_ASSIGNMENT_FUNCTORS_H
diff --git a/third_party/eigen3/Eigen/src/Core/functors/BinaryFunctors.h b/third_party/eigen3/Eigen/src/Core/functors/BinaryFunctors.h
new file mode 100644
index 0000000000..d8ea058431
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/functors/BinaryFunctors.h
@@ -0,0 +1,498 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_BINARY_FUNCTORS_H
+#define EIGEN_BINARY_FUNCTORS_H
+
+// clang-format off
+
+namespace Eigen {
+
+namespace internal {
+
+//---------- associative binary functors ----------
+
+/** \internal
+ * \brief Template functor to compute the sum of two scalars
+ *
+ * \sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, DenseBase::sum()
+ */
+template<typename Scalar> struct scalar_sum_op {
+// typedef Scalar result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::padd(a,b); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
+ { return internal::predux(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_sum_op<Scalar> > {
+ enum {
+ Cost = NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasAdd
+ };
+};
+
+/** \internal
+ * \brief Template specialization to deprecate the summation of boolean expressions.
+ * This is required to solve Bug 426.
+ * \sa DenseBase::count(), DenseBase::any(), ArrayBase::cast(), MatrixBase::cast()
+ */
+template<> struct scalar_sum_op<bool> : scalar_sum_op<int> {
+ EIGEN_DEPRECATED
+ scalar_sum_op() {}
+};
+
+
+/** \internal
+ * \brief Template functor to compute the product of two scalars
+ *
+ * \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux()
+ */
+template<typename LhsScalar,typename RhsScalar> struct scalar_product_op {
+ enum {
+ // TODO vectorize mixed product
+ Vectorizable = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasMul && packet_traits<RhsScalar>::HasMul
+ };
+ typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::pmul(a,b); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
+ { return internal::predux_mul(a); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_product_op<LhsScalar,RhsScalar> > {
+ enum {
+ Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost)/2, // rough estimate!
+ PacketAccess = scalar_product_op<LhsScalar,RhsScalar>::Vectorizable
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the conjugate product of two scalars
+ *
+ * This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y)
+ */
+template<typename LhsScalar,typename RhsScalar> struct scalar_conj_product_op {
+
+ enum {
+ Conj = NumTraits<LhsScalar>::IsComplex
+ };
+
+ typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
+
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const
+ { return conj_helper<LhsScalar,RhsScalar,Conj,false>().pmul(a,b); }
+
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return conj_helper<Packet,Packet,Conj,false>().pmul(a,b); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_conj_product_op<LhsScalar,RhsScalar> > {
+ enum {
+ Cost = NumTraits<LhsScalar>::MulCost,
+ PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMul
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the min of two scalars
+ *
+ * \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff()
+ */
+template<typename Scalar> struct scalar_min_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return numext::mini(a, b); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::pmin(a,b); }
+ template<typename Packet>
+ EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
+ { return internal::predux_min(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_min_op<Scalar> > {
+ enum {
+ Cost = NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasMin
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the max of two scalars
+ *
+ * \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff()
+ */
+template<typename Scalar> struct scalar_max_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return numext::maxi(a, b); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::pmax(a,b); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
+ { return internal::predux_max(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_max_op<Scalar> > {
+ enum {
+ Cost = NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasMax
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the hypot of two scalars
+ *
+ * \sa MatrixBase::stableNorm(), class Redux
+ */
+template<typename Scalar> struct scalar_hypot_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
+// typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
+ {
+ using std::sqrt;
+ Scalar p = numext::maxi(_x, _y);
+ Scalar q = numext::mini(_x, _y);
+ Scalar qp = q/p;
+ return p * sqrt(Scalar(1) + qp*qp);
+ }
+};
+template<typename Scalar>
+struct functor_traits<scalar_hypot_op<Scalar> > {
+ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess=0 };
+};
+
+/** \internal
+ * \brief Template functor to compute the pow of two scalars
+ */
+template<typename Scalar, typename OtherScalar> struct scalar_binary_pow_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_binary_pow_op)
+ EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a, const OtherScalar& b) const { return numext::pow(a, b); }
+};
+template<typename Scalar, typename OtherScalar>
+struct functor_traits<scalar_binary_pow_op<Scalar,OtherScalar> > {
+ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
+};
+
+
+
+//---------- non associative binary functors ----------
+
+/** \internal
+ * \brief Template functor to compute the difference of two scalars
+ *
+ * \sa class CwiseBinaryOp, MatrixBase::operator-
+ */
+template<typename Scalar> struct scalar_difference_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::psub(a,b); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_difference_op<Scalar> > {
+ enum {
+ Cost = NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasSub
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the quotient of two scalars
+ *
+ * \sa class CwiseBinaryOp, Cwise::operator/()
+ */
+template<typename LhsScalar,typename RhsScalar> struct scalar_quotient_op {
+ enum {
+ // TODO vectorize mixed product
+ Vectorizable = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasDiv && packet_traits<RhsScalar>::HasDiv
+ };
+ typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::pdiv(a,b); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
+ enum {
+ Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost), // rough estimate!
+ PacketAccess = scalar_quotient_op<LhsScalar,RhsScalar>::Vectorizable
+ };
+};
+
+
+
+/** \internal
+ * \brief Template functor to compute the and of two booleans
+ *
+ * \sa class CwiseBinaryOp, ArrayBase::operator&&
+ */
+struct scalar_boolean_and_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; }
+};
+template<> struct functor_traits<scalar_boolean_and_op> {
+ enum {
+ Cost = NumTraits<bool>::AddCost,
+ PacketAccess = false
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the or of two booleans
+ *
+ * \sa class CwiseBinaryOp, ArrayBase::operator||
+ */
+struct scalar_boolean_or_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; }
+};
+template<> struct functor_traits<scalar_boolean_or_op> {
+ enum {
+ Cost = NumTraits<bool>::AddCost,
+ PacketAccess = false
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the xor of two booleans
+ *
+ * \sa class CwiseBinaryOp, ArrayBase::operator^
+ */
+struct scalar_boolean_xor_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_xor_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a ^ b; }
+};
+template<> struct functor_traits<scalar_boolean_xor_op> {
+ enum {
+ Cost = NumTraits<bool>::AddCost,
+ PacketAccess = false
+ };
+};
+
+
+
+//---------- binary functors bound to a constant, thus appearing as a unary functor ----------
+
+/** \internal
+ * \brief Template functor to multiply a scalar by a fixed other one
+ *
+ * \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/
+ */
+/* NOTE why doing the pset1() in packetOp *is* an optimization ?
+ * indeed it seems better to declare m_other as a Packet and do the pset1() once
+ * in the constructor. However, in practice:
+ * - GCC does not like m_other as a Packet and generate a load every time it needs it
+ * - on the other hand GCC is able to moves the pset1() outside the loop :)
+ * - simpler code ;)
+ * (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y)
+ */
+template<typename Scalar>
+struct scalar_multiple_op {
+ typedef typename packet_traits<Scalar>::type Packet;
+ // FIXME default copy constructors seems bugged with std::complex<>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple_op(const scalar_multiple_op& other) : m_other(other.m_other) { }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple_op(const Scalar& other) : m_other(other) { }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+ { return internal::pmul(a, pset1<Packet>(m_other)); }
+ typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
+};
+template<typename Scalar>
+struct functor_traits<scalar_multiple_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
+
+template<typename Scalar1, typename Scalar2>
+struct scalar_multiple2_op {
+ typedef typename packet_traits<Scalar1>::type Packet1;
+ typedef typename scalar_product_traits<Scalar1,Scalar2>::ReturnType result_type;
+ typedef typename packet_traits<result_type>::type packet_result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const scalar_multiple2_op& other) : m_other(other.m_other) { }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const Scalar2& other) : m_other(other) { }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const packet_result_type packetOp(const Packet1& a) const
+ { eigen_assert("packetOp is not defined"); }
+ typename add_const_on_value_type<typename NumTraits<Scalar2>::Nested>::type m_other;
+};
+template<typename Scalar1,typename Scalar2>
+struct functor_traits<scalar_multiple2_op<Scalar1,Scalar2> >
+{ enum { Cost = NumTraits<Scalar1>::MulCost, PacketAccess = false }; };
+
+/** \internal
+ * \brief Template functor to divide a scalar by a fixed other one
+ *
+ * This functor is used to implement the quotient of a matrix by
+ * a scalar where the scalar type is not necessarily a floating point type.
+ *
+ * \sa class CwiseUnaryOp, MatrixBase::operator/
+ */
+template<typename Scalar>
+struct scalar_quotient1_op {
+ typedef typename packet_traits<Scalar>::type Packet;
+ // FIXME default copy constructors seems bugged with std::complex<>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+ { return internal::pdiv(a, pset1<Packet>(m_other)); }
+ typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
+};
+template<typename Scalar>
+struct functor_traits<scalar_quotient1_op<Scalar> >
+{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
+
+// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication
+// where the mixing of different types is handled by scalar_product_traits
+// In particular, real * complex<real> is allowed.
+// FIXME move this to functor_traits adding a functor_default
+template<typename Functor> struct functor_is_product_like { enum { ret = 0 }; };
+template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
+template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_conj_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
+template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_quotient_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
+
+
+/** \internal
+ * \brief Template functor to add a scalar to a fixed other one
+ * \sa class CwiseUnaryOp, Array::operator+
+ */
+/* If you wonder why doing the pset1() in packetOp() is an optimization check scalar_multiple_op */
+template<typename Scalar>
+struct scalar_add_op {
+ typedef typename packet_traits<Scalar>::type Packet;
+ // FIXME default copy constructors seems bugged with std::complex<>
+ EIGEN_DEVICE_FUNC inline scalar_add_op(const scalar_add_op& other) : m_other(other.m_other) { }
+ EIGEN_DEVICE_FUNC inline scalar_add_op(const Scalar& other) : m_other(other) { }
+ EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a + m_other; }
+ EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
+ { return internal::padd(a, pset1<Packet>(m_other)); }
+ const Scalar m_other;
+};
+template<typename Scalar>
+struct functor_traits<scalar_add_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = packet_traits<Scalar>::HasAdd }; };
+
+/** \internal
+ * \brief Template functor to subtract a fixed scalar to another one
+ * \sa class CwiseUnaryOp, Array::operator-, struct scalar_add_op, struct scalar_rsub_op
+ */
+template<typename Scalar>
+struct scalar_sub_op {
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC inline scalar_sub_op(const scalar_sub_op& other) : m_other(other.m_other) { }
+ EIGEN_DEVICE_FUNC inline scalar_sub_op(const Scalar& other) : m_other(other) { }
+ EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a - m_other; }
+ EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
+ { return internal::psub(a, pset1<Packet>(m_other)); }
+ const Scalar m_other;
+};
+template<typename Scalar>
+struct functor_traits<scalar_sub_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = packet_traits<Scalar>::HasAdd }; };
+
+/** \internal
+ * \brief Template functor to subtract a scalar to fixed another one
+ * \sa class CwiseUnaryOp, Array::operator-, struct scalar_add_op, struct scalar_sub_op
+ */
+template<typename Scalar>
+struct scalar_rsub_op {
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC inline scalar_rsub_op(const scalar_rsub_op& other) : m_other(other.m_other) { }
+ EIGEN_DEVICE_FUNC inline scalar_rsub_op(const Scalar& other) : m_other(other) { }
+ EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other - a; }
+ EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
+ { return internal::psub(pset1<Packet>(m_other), a); }
+ const Scalar m_other;
+};
+template<typename Scalar>
+struct functor_traits<scalar_rsub_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = packet_traits<Scalar>::HasAdd }; };
+
+/** \internal
+ * \brief Template functor to raise a scalar to a power
+ * \sa class CwiseUnaryOp, Cwise::pow
+ */
+template<typename Scalar>
+struct scalar_pow_op {
+ // FIXME default copy constructors seems bugged with std::complex<>
+ EIGEN_DEVICE_FUNC inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { }
+ EIGEN_DEVICE_FUNC inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {}
+ EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return numext::pow(a, m_exponent); }
+ const Scalar m_exponent;
+};
+template<typename Scalar>
+struct functor_traits<scalar_pow_op<Scalar> >
+{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
+
+/** \internal
+ * \brief Template functor to compute the quotient between a scalar and array entries.
+ * \sa class CwiseUnaryOp, Cwise::inverse()
+ */
+template<typename Scalar>
+struct scalar_inverse_mult_op {
+ EIGEN_DEVICE_FUNC scalar_inverse_mult_op(const Scalar& other) : m_other(other) {}
+ EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other / a; }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
+ { return internal::pdiv(pset1<Packet>(m_other),a); }
+ Scalar m_other;
+};
+template<typename Scalar>
+struct functor_traits<scalar_inverse_mult_op<Scalar> >
+{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
+
+/** \internal
+ * \brief Template functor to compute the modulo between an array and a scalar.
+ */
+template <typename Scalar>
+struct scalar_mod_op {
+ EIGEN_DEVICE_FUNC scalar_mod_op(const Scalar& divisor) : m_divisor(divisor) {}
+ EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a % m_divisor; }
+ const Scalar m_divisor;
+};
+template <typename Scalar>
+struct functor_traits<scalar_mod_op<Scalar> >
+{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
+
+/** \internal
+ * \brief Template functor to compute the float modulo between an array and a scalar.
+ */
+template <typename Scalar>
+struct scalar_fmod_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_fmod_op);
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar
+ operator()(const Scalar& a, const Scalar& b) const {
+ EIGEN_USING_STD_MATH(fmod);
+ return (fmod)(a, b);
+ }
+};
+
+template <typename Scalar>
+struct functor_traits<scalar_fmod_op<Scalar> > {
+ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = false };
+};
+
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_BINARY_FUNCTORS_H
diff --git a/third_party/eigen3/Eigen/src/Core/functors/NullaryFunctors.h b/third_party/eigen3/Eigen/src/Core/functors/NullaryFunctors.h
new file mode 100644
index 0000000000..6e464b2b8a
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/functors/NullaryFunctors.h
@@ -0,0 +1,158 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_NULLARY_FUNCTORS_H
+#define EIGEN_NULLARY_FUNCTORS_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<typename Scalar>
+struct scalar_constant_op {
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { }
+ template<typename Index>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; }
+ template<typename Index>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index, Index = 0) const { return internal::pset1<Packet>(m_other); }
+ const Scalar m_other;
+};
+template<typename Scalar>
+struct functor_traits<scalar_constant_op<Scalar> >
+// FIXME replace this packet test by a safe one
+{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::Vectorizable, IsRepeatable = true }; };
+
+template<typename Scalar> struct scalar_identity_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op)
+ template<typename Index>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const { return row==col ? Scalar(1) : Scalar(0); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_identity_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
+
+template <typename Scalar, bool RandomAccess> struct linspaced_op_impl;
+
+// linear access for packet ops:
+// 1) initialization
+// base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0])
+// 2) each step (where size is 1 for coeff access or PacketSize for packet access)
+// base += [size*step, ..., size*step]
+//
+// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp)
+// in order to avoid the padd() in operator() ?
+template <typename Scalar>
+struct linspaced_op_impl<Scalar,false>
+{
+ typedef typename packet_traits<Scalar>::type Packet;
+
+ linspaced_op_impl(const Scalar& low, const Scalar& step) :
+ m_low(low), m_step(step),
+ m_packetStep(pset1<Packet>(packet_traits<Scalar>::size*step)),
+ m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(step),plset<Scalar>(-packet_traits<Scalar>::size)))) {}
+
+ template<typename Index>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const
+ {
+ m_base = padd(m_base, pset1<Packet>(m_step));
+ return m_low+Scalar(i)*m_step;
+ }
+
+ template<typename Index>
+ EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); }
+
+ const Scalar m_low;
+ const Scalar m_step;
+ const Packet m_packetStep;
+ mutable Packet m_base;
+};
+
+// random access for packet ops:
+// 1) each step
+// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
+template <typename Scalar>
+struct linspaced_op_impl<Scalar,true>
+{
+ typedef typename packet_traits<Scalar>::type Packet;
+
+ linspaced_op_impl(const Scalar& low, const Scalar& step) :
+ m_low(low), m_step(step),
+ m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Scalar>(0)) {}
+
+ template<typename Index>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
+
+ template<typename Index>
+ EIGEN_STRONG_INLINE const Packet packetOp(Index i) const
+ { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1<Packet>(i),m_interPacket))); }
+
+ const Scalar m_low;
+ const Scalar m_step;
+ const Packet m_lowPacket;
+ const Packet m_stepPacket;
+ const Packet m_interPacket;
+};
+
+// ----- Linspace functor ----------------------------------------------------------------
+
+// Forward declaration (we default to random access which does not really give
+// us a speed gain when using packet access but it allows to use the functor in
+// nested expressions).
+template <typename Scalar, bool RandomAccess = true> struct linspaced_op;
+template <typename Scalar, bool RandomAccess> struct functor_traits< linspaced_op<Scalar,RandomAccess> >
+{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::HasSetLinear, IsRepeatable = true }; };
+template <typename Scalar, bool RandomAccess> struct linspaced_op
+{
+ typedef typename packet_traits<Scalar>::type Packet;
+ linspaced_op(const Scalar& low, const Scalar& high, DenseIndex num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/(num_steps-1))) {}
+
+ template<typename Index>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); }
+
+ // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since
+ // there row==0 and col is used for the actual iteration.
+ template<typename Index>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const
+ {
+ eigen_assert(col==0 || row==0);
+ return impl(col + row);
+ }
+
+ template<typename Index>
+ EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); }
+
+ // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since
+ // there row==0 and col is used for the actual iteration.
+ template<typename Index>
+ EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const
+ {
+ eigen_assert(col==0 || row==0);
+ return impl.packetOp(col + row);
+ }
+
+ // This proxy object handles the actual required temporaries, the different
+ // implementations (random vs. sequential access) as well as the
+ // correct piping to size 2/4 packet operations.
+ const linspaced_op_impl<Scalar,RandomAccess> impl;
+};
+
+// all functors allow linear access, except scalar_identity_op. So we fix here a quick meta
+// to indicate whether a functor allows linear access, just always answering 'yes' except for
+// scalar_identity_op.
+// FIXME move this to functor_traits adding a functor_default
+template<typename Functor> struct functor_has_linear_access { enum { ret = 1 }; };
+template<typename Scalar> struct functor_has_linear_access<scalar_identity_op<Scalar> > { enum { ret = 0 }; };
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_NULLARY_FUNCTORS_H
diff --git a/third_party/eigen3/Eigen/src/Core/functors/StlFunctors.h b/third_party/eigen3/Eigen/src/Core/functors/StlFunctors.h
new file mode 100644
index 0000000000..863fd451d3
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/functors/StlFunctors.h
@@ -0,0 +1,129 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_STL_FUNCTORS_H
+#define EIGEN_STL_FUNCTORS_H
+
+namespace Eigen {
+
+namespace internal {
+
+// default functor traits for STL functors:
+
+template<typename T>
+struct functor_traits<std::multiplies<T> >
+{ enum { Cost = NumTraits<T>::MulCost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::divides<T> >
+{ enum { Cost = NumTraits<T>::MulCost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::plus<T> >
+{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::minus<T> >
+{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::negate<T> >
+{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::logical_or<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::logical_and<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::logical_not<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::greater<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::less<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::greater_equal<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::less_equal<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::equal_to<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::not_equal_to<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::binder2nd<T> >
+{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::binder1st<T> >
+{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::unary_negate<T> >
+{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::binary_negate<T> >
+{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
+
+#ifdef EIGEN_STDEXT_SUPPORT
+
+template<typename T0,typename T1>
+struct functor_traits<std::project1st<T0,T1> >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+template<typename T0,typename T1>
+struct functor_traits<std::project2nd<T0,T1> >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+template<typename T0,typename T1>
+struct functor_traits<std::select2nd<std::pair<T0,T1> > >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+template<typename T0,typename T1>
+struct functor_traits<std::select1st<std::pair<T0,T1> > >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+template<typename T0,typename T1>
+struct functor_traits<std::unary_compose<T0,T1> >
+{ enum { Cost = functor_traits<T0>::Cost + functor_traits<T1>::Cost, PacketAccess = false }; };
+
+template<typename T0,typename T1,typename T2>
+struct functor_traits<std::binary_compose<T0,T1,T2> >
+{ enum { Cost = functor_traits<T0>::Cost + functor_traits<T1>::Cost + functor_traits<T2>::Cost, PacketAccess = false }; };
+
+#endif // EIGEN_STDEXT_SUPPORT
+
+// allow to add new functors and specializations of functor_traits from outside Eigen.
+// this macro is really needed because functor_traits must be specialized after it is declared but before it is used...
+#ifdef EIGEN_FUNCTORS_PLUGIN
+#include EIGEN_FUNCTORS_PLUGIN
+#endif
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_STL_FUNCTORS_H
diff --git a/third_party/eigen3/Eigen/src/Core/functors/UnaryFunctors.h b/third_party/eigen3/Eigen/src/Core/functors/UnaryFunctors.h
new file mode 100644
index 0000000000..2a22e5bc19
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/functors/UnaryFunctors.h
@@ -0,0 +1,493 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_UNARY_FUNCTORS_H
+#define EIGEN_UNARY_FUNCTORS_H
+
+namespace Eigen {
+
+namespace internal {
+
+#if defined(__NVCC__) || !defined(__CUDA_ARCH__)
+using std::abs;
+using std::exp;
+using std::log;
+using std::min;
+using std::sqrt;
+using std::cos;
+using std::sin;
+using std::tan;
+using std::acos;
+using std::asin;
+using std::atan;
+#endif
+
+/** \internal
+ * \brief Template functor to compute the opposite of a scalar
+ *
+ * \sa class CwiseUnaryOp, MatrixBase::operator-
+ */
+template<typename Scalar> struct scalar_opposite_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_opposite_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; }
+ template<typename Packet>
+ EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+ { return internal::pnegate(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_opposite_op<Scalar> >
+{ enum {
+ Cost = NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasNegate };
+};
+
+/** \internal
+ * \brief Template functor to compute the absolute value of a scalar
+ *
+ * \sa class CwiseUnaryOp, Cwise::abs
+ */
+template<typename Scalar> struct scalar_abs_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return abs(a); }
+ template<typename Packet>
+ EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+ { return internal::pabs(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_abs_op<Scalar> >
+{
+ enum {
+ Cost = NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasAbs
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the squared absolute value of a scalar
+ *
+ * \sa class CwiseUnaryOp, Cwise::abs2
+ */
+template<typename Scalar> struct scalar_abs2_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_abs2_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs2(a); }
+ template<typename Packet>
+ EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+ { return internal::pmul(a,a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_abs2_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasAbs2 }; };
+
+/** \internal
+ * \brief Template functor to compute the conjugate of a complex value
+ *
+ * \sa class CwiseUnaryOp, MatrixBase::conjugate()
+ */
+template<typename Scalar> struct scalar_conjugate_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_conjugate_op)
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::conj; return conj(a); }
+ template<typename Packet>
+ EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_conjugate_op<Scalar> >
+{
+ enum {
+ Cost = NumTraits<Scalar>::IsComplex ? NumTraits<Scalar>::AddCost : 0,
+ PacketAccess = packet_traits<Scalar>::HasConj
+ };
+};
+
+/** \internal
+ * \brief Template functor to cast a scalar to another type
+ *
+ * \sa class CwiseUnaryOp, MatrixBase::cast()
+ */
+template<typename Scalar, typename NewType>
+struct scalar_cast_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
+ typedef NewType result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return cast<Scalar, NewType>(a); }
+};
+template<typename Scalar, typename NewType>
+struct functor_traits<scalar_cast_op<Scalar,NewType> >
+{ enum { Cost = is_same<Scalar, NewType>::value ? 0 : NumTraits<NewType>::AddCost, PacketAccess = false }; };
+
+/** \internal
+ * \brief Template functor to convert a scalar to another type using a custom functor.
+ *
+ * \sa class CwiseUnaryOp, MatrixBase::convert()
+ */
+template<typename Scalar, typename NewType, typename ConvertOp>
+struct scalar_convert_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_convert_op)
+ typedef NewType result_type;
+ EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return ConvertOp()(a); }
+};
+template<typename Scalar, typename NewType, typename ConvertOp>
+struct functor_traits<scalar_convert_op<Scalar,NewType,ConvertOp> >
+{ enum { Cost = is_same<Scalar, NewType>::value ? 0 : NumTraits<NewType>::AddCost, PacketAccess = false }; };
+
+
+/** \internal
+ * \brief Template functor to extract the real part of a complex
+ *
+ * \sa class CwiseUnaryOp, MatrixBase::real()
+ */
+template<typename Scalar>
+struct scalar_real_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_real_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::real(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_real_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+/** \internal
+ * \brief Template functor to extract the imaginary part of a complex
+ *
+ * \sa class CwiseUnaryOp, MatrixBase::imag()
+ */
+template<typename Scalar>
+struct scalar_imag_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::imag(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_imag_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+/** \internal
+ * \brief Template functor to extract the real part of a complex as a reference
+ *
+ * \sa class CwiseUnaryOp, MatrixBase::real()
+ */
+template<typename Scalar>
+struct scalar_real_ref_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_real_ref_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::real_ref(*const_cast<Scalar*>(&a)); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_real_ref_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+/** \internal
+ * \brief Template functor to extract the imaginary part of a complex as a reference
+ *
+ * \sa class CwiseUnaryOp, MatrixBase::imag()
+ */
+template<typename Scalar>
+struct scalar_imag_ref_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_ref_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::imag_ref(*const_cast<Scalar*>(&a)); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_imag_ref_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+/** \internal
+ *
+ * \brief Template functor to compute the exponential of a scalar
+ *
+ * \sa class CwiseUnaryOp, Cwise::exp()
+ */
+template<typename Scalar> struct scalar_exp_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return exp(a); }
+ typedef typename packet_traits<Scalar>::type Packet;
+ inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_exp_op<Scalar> >
+{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasExp }; };
+
+/** \internal
+ *
+ * \brief Template functor to compute the logarithm of a scalar
+ *
+ * \sa class CwiseUnaryOp, Cwise::log()
+ */
+template<typename Scalar> struct scalar_log_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return log(a); }
+ typedef typename packet_traits<Scalar>::type Packet;
+ inline Packet packetOp(const Packet& a) const { return internal::plog(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_log_op<Scalar> >
+{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasLog }; };
+
+
+/** \internal
+ * \brief Template functor to compute the square root of a scalar
+ * \sa class CwiseUnaryOp, Cwise::sqrt()
+ */
+template<typename Scalar> struct scalar_sqrt_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return sqrt(a); }
+ typedef typename packet_traits<Scalar>::type Packet;
+ inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_sqrt_op<Scalar> >
+{ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasSqrt
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the reciprocal square root of a scalar
+ * \sa class CwiseUnaryOp, Cwise::rsqrt()
+ */
+template<typename Scalar> struct scalar_rsqrt_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_rsqrt_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return Scalar(1)/sqrt(a); }
+ typedef typename packet_traits<Scalar>::type Packet;
+ inline Packet packetOp(const Packet& a) const { return internal::prsqrt(a); }
+};
+
+template<typename Scalar>
+struct functor_traits<scalar_rsqrt_op<Scalar> >
+{ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasRsqrt
+ };
+};
+
+
+/** \internal
+ * \brief Template functor to compute the cosine of a scalar
+ * \sa class CwiseUnaryOp, ArrayBase::cos()
+ */
+template<typename Scalar> struct scalar_cos_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op)
+ EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return cos(a); }
+ typedef typename packet_traits<Scalar>::type Packet;
+ inline Packet packetOp(const Packet& a) const { return internal::pcos(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_cos_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasCos
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the sine of a scalar
+ * \sa class CwiseUnaryOp, ArrayBase::sin()
+ */
+template<typename Scalar> struct scalar_sin_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return sin(a); }
+ typedef typename packet_traits<Scalar>::type Packet;
+ inline Packet packetOp(const Packet& a) const { return internal::psin(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_sin_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasSin
+ };
+};
+
+
+/** \internal
+ * \brief Template functor to compute the tan of a scalar
+ * \sa class CwiseUnaryOp, ArrayBase::tan()
+ */
+template<typename Scalar> struct scalar_tan_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return tan(a); }
+ typedef typename packet_traits<Scalar>::type Packet;
+ inline Packet packetOp(const Packet& a) const { return internal::ptan(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_tan_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasTan
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the arc cosine of a scalar
+ * \sa class CwiseUnaryOp, ArrayBase::acos()
+ */
+template<typename Scalar> struct scalar_acos_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return acos(a); }
+ typedef typename packet_traits<Scalar>::type Packet;
+ inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_acos_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasACos
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the arc sine of a scalar
+ * \sa class CwiseUnaryOp, ArrayBase::asin()
+ */
+template<typename Scalar> struct scalar_asin_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return asin(a); }
+ typedef typename packet_traits<Scalar>::type Packet;
+ inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_asin_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasASin
+ };
+};
+
+
+/** \internal
+ * \brief Template functor to compute the atan of a scalar
+ * \sa class CwiseUnaryOp, ArrayBase::atan()
+ */
+template<typename Scalar> struct scalar_atan_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_atan_op)
+ inline const Scalar operator() (const Scalar& a) const { return atan(a); }
+ typedef typename packet_traits<Scalar>::type Packet;
+ inline Packet packetOp(const Packet& a) const { return internal::patan(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_atan_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasATan
+ };
+};
+
+ /** \internal
+ * \brief Template functor to compute the tanh of a scalar
+ * \sa class CwiseUnaryOp, ArrayBase::tanh()
+ */
+template<typename Scalar> struct scalar_tanh_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::tanh; return tanh(a); }
+ typedef typename packet_traits<Scalar>::type Packet;
+ inline Packet packetOp(const Packet& a) const { return internal::ptanh(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_tanh_op<Scalar> >
+{
+ enum {
+ Cost = 6 * NumTraits<Scalar>::MulCost + 4 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasTanH
+ };
+};
+
+ /** \internal
+ * \brief Template functor to compute the sigmoid of a scalar
+ * \sa class CwiseUnaryOp, ArrayBase::sigmoid()
+ */
+template <typename T>
+struct scalar_sigmoid_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_sigmoid_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const {
+ const T one = T(1);
+ return one / (one + std::exp(-x));
+ }
+
+ template <typename Packet>
+ inline Packet packetOp(const Packet& x) const {
+ const Packet one = pset1<Packet>(1);
+ return pdiv(one, padd(one, pexp(pnegate(x))));
+ }
+};
+
+template <typename T>
+struct functor_traits<scalar_sigmoid_op<T> > {
+ enum {
+ Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost * 6,
+ PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasDiv &&
+ packet_traits<T>::HasNegate && packet_traits<T>::HasExp
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the inverse of a scalar
+ * \sa class CwiseUnaryOp, Cwise::inverse()
+ */
+template<typename Scalar>
+struct scalar_inverse_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_inverse_op)
+ EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; }
+ template<typename Packet>
+ inline const Packet packetOp(const Packet& a) const
+ { return internal::pdiv(pset1<Packet>(Scalar(1)),a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_inverse_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
+
+/** \internal
+ * \brief Template functor to compute the square of a scalar
+ * \sa class CwiseUnaryOp, Cwise::square()
+ */
+template<typename Scalar>
+struct scalar_square_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op)
+ EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a; }
+ template<typename Packet>
+ inline const Packet packetOp(const Packet& a) const
+ { return internal::pmul(a,a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_square_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
+
+/** \internal
+ * \brief Template functor to compute the cube of a scalar
+ * \sa class CwiseUnaryOp, Cwise::cube()
+ */
+template<typename Scalar>
+struct scalar_cube_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op)
+ EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a*a; }
+ template<typename Packet>
+ inline const Packet packetOp(const Packet& a) const
+ { return internal::pmul(a,pmul(a,a)); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_cube_op<Scalar> >
+{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
+
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_FUNCTORS_H
diff --git a/third_party/eigen3/Eigen/src/Core/products/CoeffBasedProduct.h b/third_party/eigen3/Eigen/src/Core/products/CoeffBasedProduct.h
new file mode 100644
index 0000000000..35a6e36e81
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/products/CoeffBasedProduct.h
@@ -0,0 +1,454 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COEFFBASED_PRODUCT_H
+#define EIGEN_COEFFBASED_PRODUCT_H
+
+namespace Eigen {
+
+namespace internal {
+
+/*********************************************************************************
+* Coefficient based product implementation.
+* It is designed for the following use cases:
+* - small fixed sizes
+* - lazy products
+*********************************************************************************/
+
+/* Since the all the dimensions of the product are small, here we can rely
+ * on the generic Assign mechanism to evaluate the product per coeff (or packet).
+ *
+ * Note that here the inner-loops should always be unrolled.
+ */
+
+template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
+struct product_coeff_impl;
+
+template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct product_packet_impl;
+
+template<typename LhsNested, typename RhsNested, int NestingFlags>
+struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
+{
+ typedef MatrixXpr XprKind;
+ typedef typename remove_all<LhsNested>::type _LhsNested;
+ typedef typename remove_all<RhsNested>::type _RhsNested;
+ typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar;
+ typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind,
+ typename traits<_RhsNested>::StorageKind>::ret StorageKind;
+ typedef typename promote_index_type<typename traits<_LhsNested>::Index,
+ typename traits<_RhsNested>::Index>::type Index;
+
+ enum {
+ LhsCoeffReadCost = _LhsNested::CoeffReadCost,
+ RhsCoeffReadCost = _RhsNested::CoeffReadCost,
+ LhsFlags = _LhsNested::Flags,
+ RhsFlags = _RhsNested::Flags,
+
+ RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
+ ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
+ InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
+
+ MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
+
+ LhsRowMajor = LhsFlags & RowMajorBit,
+ RhsRowMajor = RhsFlags & RowMajorBit,
+
+ SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
+
+ CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
+ && (ColsAtCompileTime == Dynamic
+ || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
+ && (RhsFlags&AlignedBit)
+ )
+ ),
+
+ CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
+ && (RowsAtCompileTime == Dynamic
+ || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
+ && (LhsFlags&AlignedBit)
+ )
+ ),
+
+ EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
+ : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
+ : (RhsRowMajor && !CanVectorizeLhs),
+
+ Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
+ | (EvalToRowMajor ? RowMajorBit : 0)
+ | NestingFlags
+ | (CanVectorizeLhs ? (LhsFlags & AlignedBit) : 0)
+ | (CanVectorizeRhs ? (RhsFlags & AlignedBit) : 0)
+ // TODO enable vectorization for mixed types
+ | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
+
+ CoeffReadCost = InnerSize == Dynamic ? Dynamic
+ : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
+ + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
+
+ /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
+ * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
+ * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
+ * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
+ */
+ CanVectorizeInner = SameType
+ && LhsRowMajor
+ && (!RhsRowMajor)
+ && (LhsFlags & RhsFlags & ActualPacketAccessBit)
+ && (LhsFlags & RhsFlags & AlignedBit)
+ && (InnerSize % packet_traits<Scalar>::size == 0)
+ };
+};
+
+} // end namespace internal
+
+template<typename LhsNested, typename RhsNested, int NestingFlags>
+class CoeffBasedProduct
+ : internal::no_assignment_operator,
+ public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> >
+{
+ public:
+
+ typedef MatrixBase<CoeffBasedProduct> Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct)
+ typedef typename Base::PlainObject PlainObject;
+
+ private:
+
+ typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested;
+ typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested;
+
+ enum {
+ PacketSize = internal::packet_traits<Scalar>::size,
+ InnerSize = internal::traits<CoeffBasedProduct>::InnerSize,
+ Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
+ CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner
+ };
+
+ typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
+ Unroll ? InnerSize-1 : Dynamic,
+ _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
+
+ typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
+
+ public:
+
+ EIGEN_DEVICE_FUNC
+ inline CoeffBasedProduct(const CoeffBasedProduct& other)
+ : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs)
+ {}
+
+ template<typename Lhs, typename Rhs>
+ EIGEN_DEVICE_FUNC
+ inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs)
+ : m_lhs(lhs), m_rhs(rhs)
+ {
+ // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
+ // We still allow to mix T and complex<T>.
+ EIGEN_STATIC_ASSERT((internal::scalar_product_traits<typename Lhs::RealScalar, typename Rhs::RealScalar>::Defined),
+ YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+ eigen_assert(lhs.cols() == rhs.rows()
+ && "invalid matrix product"
+ && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
+ {
+ Scalar res;
+ ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
+ return res;
+ }
+
+ /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
+ * which is why we don't set the LinearAccessBit.
+ */
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
+ {
+ Scalar res;
+ const Index row = RowsAtCompileTime == 1 ? 0 : index;
+ const Index col = RowsAtCompileTime == 1 ? index : 0;
+ ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
+ return res;
+ }
+
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const
+ {
+ PacketScalar res;
+ internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
+ Unroll ? InnerSize-1 : Dynamic,
+ _LhsNested, _RhsNested, PacketScalar, LoadMode>
+ ::run(row, col, m_lhs, m_rhs, res);
+ return res;
+ }
+
+ // Implicit conversion to the nested type (trigger the evaluation of the product)
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE operator const PlainObject& () const
+ {
+ m_result.lazyAssign(*this);
+ return m_result;
+ }
+
+ EIGEN_DEVICE_FUNC const _LhsNested& lhs() const { return m_lhs; }
+ EIGEN_DEVICE_FUNC const _RhsNested& rhs() const { return m_rhs; }
+
+ EIGEN_DEVICE_FUNC
+ const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const
+ { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
+
+ template<int DiagonalIndex>
+ EIGEN_DEVICE_FUNC
+ const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const
+ { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
+
+ EIGEN_DEVICE_FUNC
+ const Diagonal<const LazyCoeffBasedProductType, DynamicIndex> diagonal(Index index) const {
+ return Diagonal<const LazyCoeffBasedProductType, DynamicIndex>(
+ reinterpret_cast<const LazyCoeffBasedProductType&>(*this), index);
+ }
+
+ protected:
+ typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
+ typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
+
+ mutable PlainObject m_result;
+};
+
+namespace internal {
+
+// here we need to overload the nested rule for products
+// such that the nested type is a const reference to a plain matrix
+template<typename Lhs, typename Rhs, int N, typename PlainObject>
+struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject>
+{
+ typedef PlainObject const& type;
+};
+
+/***************************************************************************
+* Normal product .coeff() implementation (with meta-unrolling)
+***************************************************************************/
+
+/**************************************
+*** Scalar path - no vectorization ***
+**************************************/
+
+template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
+struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
+{
+ typedef typename Lhs::Index Index;
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
+ {
+ product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
+ res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
+ }
+};
+
+template<typename Lhs, typename Rhs, typename RetScalar>
+struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
+{
+ typedef typename Lhs::Index Index;
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
+ {
+ res = lhs.coeff(row, 0) * rhs.coeff(0, col);
+ }
+};
+
+template<typename Lhs, typename Rhs, typename RetScalar>
+struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
+{
+ typedef typename Lhs::Index Index;
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
+ {
+ eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
+ res = lhs.coeff(row, 0) * rhs.coeff(0, col);
+ for(Index i = 1; i < lhs.cols(); ++i)
+ res += lhs.coeff(row, i) * rhs.coeff(i, col);
+ }
+};
+
+/*******************************************
+*** Scalar path with inner vectorization ***
+*******************************************/
+
+template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
+struct product_coeff_vectorized_unroller
+{
+ typedef typename Lhs::Index Index;
+ enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
+ {
+ product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
+ pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
+ }
+};
+
+template<typename Lhs, typename Rhs, typename Packet>
+struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
+ {
+ pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
+ }
+};
+
+template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
+struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
+{
+ typedef typename Lhs::PacketScalar Packet;
+ typedef typename Lhs::Index Index;
+ enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
+ {
+ Packet pres;
+ product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
+ res = predux(pres);
+ }
+};
+
+template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
+struct product_coeff_vectorized_dyn_selector
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+ {
+ res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
+ }
+};
+
+// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
+// NOTE maybe they are now useless since we have a specialization for Block<Matrix>
+template<typename Lhs, typename Rhs, int RhsCols>
+struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+ {
+ res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
+ }
+};
+
+template<typename Lhs, typename Rhs, int LhsRows>
+struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+ {
+ res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
+ }
+};
+
+template<typename Lhs, typename Rhs>
+struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+ {
+ res = lhs.transpose().cwiseProduct(rhs).sum();
+ }
+};
+
+template<typename Lhs, typename Rhs, typename RetScalar>
+struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+ {
+ product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
+ }
+};
+
+/*******************
+*** Packet path ***
+*******************/
+
+template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
+ {
+ product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
+ res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
+ }
+};
+
+template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
+ {
+ product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
+ res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
+ }
+};
+
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
+ {
+ res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
+ }
+};
+
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
+ {
+ res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
+ }
+};
+
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
+ {
+ eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
+ res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
+ for(Index i = 1; i < lhs.cols(); ++i)
+ res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
+ }
+};
+
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
+{
+ typedef typename Lhs::Index Index;
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
+ {
+ eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
+ res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
+ for(Index i = 1; i < lhs.cols(); ++i)
+ res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
+ }
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_COEFFBASED_PRODUCT_H
diff --git a/third_party/eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/third_party/eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h
new file mode 100644
index 0000000000..80bd6aa0e6
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -0,0 +1,2197 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_GENERAL_BLOCK_PANEL_H
+#define EIGEN_GENERAL_BLOCK_PANEL_H
+
+
+namespace Eigen {
+
+namespace internal {
+
+template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false>
+class gebp_traits;
+
+
+/** \internal \returns b if a<=0, and returns a otherwise. */
+inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff_t b)
+{
+ return a<=0 ? b : a;
+}
+
+#if EIGEN_ARCH_i386_OR_x86_64
+const std::ptrdiff_t defaultL1CacheSize = 32*1024;
+const std::ptrdiff_t defaultL2CacheSize = 256*1024;
+const std::ptrdiff_t defaultL3CacheSize = 2*1024*1024;
+#else
+const std::ptrdiff_t defaultL1CacheSize = 16*1024;
+const std::ptrdiff_t defaultL2CacheSize = 512*1024;
+const std::ptrdiff_t defaultL3CacheSize = 512*1024;
+#endif
+
+/** \internal */
+inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3)
+{
+ static bool m_cache_sizes_initialized = false;
+ static std::ptrdiff_t m_l1CacheSize = 0;
+ static std::ptrdiff_t m_l2CacheSize = 0;
+ static std::ptrdiff_t m_l3CacheSize = 0;
+
+ if(EIGEN_UNLIKELY(!m_cache_sizes_initialized))
+ {
+ int l1CacheSize, l2CacheSize, l3CacheSize;
+ queryCacheSizes(l1CacheSize, l2CacheSize, l3CacheSize);
+ m_l1CacheSize = manage_caching_sizes_helper(l1CacheSize, defaultL1CacheSize);
+ m_l2CacheSize = manage_caching_sizes_helper(l2CacheSize, defaultL2CacheSize);
+ m_l3CacheSize = manage_caching_sizes_helper(l3CacheSize, defaultL3CacheSize);
+ m_cache_sizes_initialized = true;
+ }
+
+ if(EIGEN_UNLIKELY(action==SetAction))
+ {
+ // set the cpu cache size and cache all block sizes from a global cache size in byte
+ eigen_internal_assert(l1!=0 && l2!=0);
+ m_l1CacheSize = *l1;
+ m_l2CacheSize = *l2;
+ m_l3CacheSize = *l3;
+ }
+ else if(EIGEN_LIKELY(action==GetAction))
+ {
+ eigen_internal_assert(l1!=0 && l2!=0);
+ *l1 = m_l1CacheSize;
+ *l2 = m_l2CacheSize;
+ *l3 = m_l3CacheSize;
+ }
+ else
+ {
+ eigen_internal_assert(false);
+ }
+}
+
+#define CEIL(a, b) ((a)+(b)-1)/(b)
+
+/* Helper for computeProductBlockingSizes.
+ *
+ * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
+ * this function computes the blocking size parameters along the respective dimensions
+ * for matrix products and related algorithms. The blocking sizes depends on various
+ * parameters:
+ * - the L1 and L2 cache sizes,
+ * - the register level blocking sizes defined by gebp_traits,
+ * - the number of scalars that fit into a packet (when vectorization is enabled).
+ *
+ * \sa setCpuCacheSizes */
+template<typename LhsScalar, typename RhsScalar, int KcFactor, typename Index>
+void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index num_threads = 1)
+{
+ // Explanations:
+ // Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and
+ // mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed
+ // per kc x nr vertical small panels where nr is the blocking size along the n dimension
+ // at the register level. For vectorization purpose, these small vertical panels are unpacked,
+ // e.g., each coefficient is replicated to fit a packet. This small vertical panel has to
+ // stay in L1 cache.
+ typedef gebp_traits<LhsScalar,RhsScalar> Traits;
+ typedef typename Traits::ResScalar ResScalar;
+ enum {
+ kdiv = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)),
+ ksub = Traits::mr * Traits::nr * sizeof(ResScalar),
+ k_mask = (0xffffffff/8)*8,
+
+ mr = Traits::mr,
+ mr_mask = (0xffffffff/mr)*mr,
+
+ nr = Traits::nr,
+ nr_mask = (0xffffffff/nr)*nr
+ };
+
+ std::ptrdiff_t l1, l2, l3;
+ manage_caching_sizes(GetAction, &l1, &l2, &l3);
+
+ // Increasing k gives us more time to prefetch the content of the "C"
+ // registers. However once the latency is hidden there is no point in
+ // increasing the value of k, so we'll cap it at 320 (value determined
+ // experimentally).
+ const Index k_cache = (std::min<Index>)((l1-ksub)/kdiv, 320);
+ if (k_cache < k) {
+ k = k_cache & k_mask;
+ eigen_assert(k > 0);
+ }
+
+ const Index n_cache = (l2-l1) / (nr * sizeof(RhsScalar) * k);
+ Index n_per_thread = CEIL(n, num_threads);
+ if (n_cache <= n_per_thread) {
+ // Don't exceed the capacity of the l2 cache.
+ if (n_cache < nr) {
+ n = nr;
+ } else {
+ n = n_cache & nr_mask;
+ eigen_assert(n > 0);
+ }
+ } else {
+ n = (std::min<Index>)(n, (n_per_thread + nr - 1) & nr_mask);
+ }
+
+ if (l3 > l2) {
+ // l3 is shared between all cores, so we'll give each thread its own chunk of l3.
+ const Index m_cache = (l3-l2) / (sizeof(LhsScalar) * k * num_threads);
+ const Index m_per_thread = CEIL(m, num_threads);
+ if(m_cache < m_per_thread && m_cache >= static_cast<Index>(mr)) {
+ m = m_cache & mr_mask;
+ eigen_assert(m > 0);
+ } else {
+ m = (std::min<Index>)(m, (m_per_thread + mr - 1) & mr_mask);
+ }
+ }
+}
+
+template <typename Index>
+bool useSpecificBlockingSizes(Index& k, Index& m, Index& n)
+{
+#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
+ if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) {
+ k = std::min<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
+ m = std::min<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
+ n = std::min<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
+ return true;
+ }
+#else
+ EIGEN_UNUSED_VARIABLE(k)
+ EIGEN_UNUSED_VARIABLE(m)
+ EIGEN_UNUSED_VARIABLE(n)
+#endif
+ return false;
+}
+
+/** \brief Computes the blocking parameters for a m x k times k x n matrix product
+ *
+ * \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension.
+ * \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension.
+ * \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension.
+ *
+ * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
+ * this function computes the blocking size parameters along the respective dimensions
+ * for matrix products and related algorithms.
+ *
+ * The blocking size parameters may be evaluated:
+ * - either by a heuristic based on cache sizes;
+ * - or using fixed prescribed values (for testing purposes).
+ *
+ * \sa setCpuCacheSizes */
+
+template<typename LhsScalar, typename RhsScalar, int KcFactor, typename Index>
+void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
+{
+ if (!k || !m || !n) {
+ return;
+ }
+
+ if (!useSpecificBlockingSizes(k, m, n)) {
+ evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor>(k, m, n, num_threads);
+ }
+
+#if !EIGEN_ARCH_i386_OR_x86_64
+ // The following code rounds k,m,n down to the nearest multiple of register-level blocking sizes.
+ // We should always do that, and in upstream Eigen we always do that.
+ // Unfortunately, we can't do that in Google3 on x86[-64] because this makes tiny differences in results and
+ // we have some unfortunate tests require very specific relative errors which fail because of that,
+ // at least //learning/laser/algorithms/wals:wals_batch_solver_test.
+ // Note that this wouldn't make any difference if we had been using only correctly rounded values,
+ // but we've not! See how in evaluateProductBlockingSizesHeuristic, we do the rounding down by
+ // bit-masking, e.g. mr_mask = (0xffffffff/mr)*mr, implicitly assuming that mr is always a power of
+ // two, which is not the case with the 3px4 kernel.
+ typedef gebp_traits<LhsScalar,RhsScalar> Traits;
+ enum {
+ kr = 8,
+ mr = Traits::mr,
+ nr = Traits::nr
+ };
+ if (k > kr) k -= k % kr;
+ if (m > mr) m -= m % mr;
+ if (n > nr) n -= n % nr;
+#endif
+}
+
+template<typename LhsScalar, typename RhsScalar, typename Index>
+inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads)
+{
+ computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n, num_threads);
+}
+
+#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+ #define CJMADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C);
+#else
+
+ // FIXME (a bit overkill maybe ?)
+
+ template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector {
+ EIGEN_ALWAYS_INLINE static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
+ {
+ c = cj.pmadd(a,b,c);
+ }
+ };
+
+ template<typename CJ, typename T> struct gebp_madd_selector<CJ,T,T,T,T> {
+ EIGEN_ALWAYS_INLINE static void run(const CJ& cj, T& a, T& b, T& c, T& t)
+ {
+ t = b; t = cj.pmul(a,t); c = padd(c,t);
+ }
+ };
+
+ template<typename CJ, typename A, typename B, typename C, typename T>
+ EIGEN_STRONG_INLINE void gebp_madd(const CJ& cj, A& a, B& b, C& c, T& t)
+ {
+ gebp_madd_selector<CJ,A,B,C,T>::run(cj,a,b,c,t);
+ }
+
+ #define CJMADD(CJ,A,B,C,T) gebp_madd(CJ,A,B,C,T);
+// #define CJMADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = padd(C,T);
+#endif
+
+/* Vectorization logic
+ * real*real: unpack rhs to constant packets, ...
+ *
+ * cd*cd : unpack rhs to (b_r,b_r), (b_i,b_i), mul to get (a_r b_r,a_i b_r) (a_r b_i,a_i b_i),
+ * storing each res packet into two packets (2x2),
+ * at the end combine them: swap the second and addsub them
+ * cf*cf : same but with 2x4 blocks
+ * cplx*real : unpack rhs to constant packets, ...
+ * real*cplx : load lhs as (a0,a0,a1,a1), and mul as usual
+ */
+template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs, bool _ConjRhs>
+class gebp_traits
+{
+public:
+ typedef _LhsScalar LhsScalar;
+ typedef _RhsScalar RhsScalar;
+ typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+
+ enum {
+ ConjLhs = _ConjLhs,
+ ConjRhs = _ConjRhs,
+ Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable,
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
+
+ NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
+
+ // register block size along the N direction must be 1 or 4
+ nr = 4,
+
+ // register block size along the M direction (currently, this one cannot be modified)
+ default_mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,
+#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
+ // we assume 16 registers
+ mr = Vectorizable ? 3*LhsPacketSize : default_mr,
+#else
+ mr = default_mr,
+#endif
+
+ LhsProgress = LhsPacketSize,
+ RhsProgress = 1
+ };
+
+ typedef typename packet_traits<LhsScalar>::type _LhsPacket;
+ typedef typename packet_traits<RhsScalar>::type _RhsPacket;
+ typedef typename packet_traits<ResScalar>::type _ResPacket;
+
+ typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
+ typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
+ typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+
+ typedef ResPacket AccPacket;
+
+ EIGEN_STRONG_INLINE void initAcc(AccPacket& p)
+ {
+ p = pset1<ResPacket>(ResScalar(0));
+ }
+
+ EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
+ {
+ pbroadcast4(b, b0, b1, b2, b3);
+ }
+
+// EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1)
+// {
+// pbroadcast2(b, b0, b1);
+// }
+
+ template<typename RhsPacketType>
+ EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketType& dest) const
+ {
+ dest = pset1<RhsPacketType>(*b);
+ }
+
+ EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
+ {
+ dest = ploadquad<RhsPacket>(b);
+ }
+
+ template<typename LhsPacketType>
+ EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacketType& dest) const
+ {
+ dest = pload<LhsPacketType>(a);
+ }
+
+ template<typename LhsPacketType>
+ EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacketType& dest) const
+ {
+ dest = ploadu<LhsPacketType>(a);
+ }
+
+ template<typename LhsPacketType, typename RhsPacketType, typename AccPacketType>
+ EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c, AccPacketType& tmp) const
+ {
+ // It would be a lot cleaner to call pmadd all the time. Unfortunately if we
+ // let gcc allocate the register in which to store the result of the pmul
+ // (in the case where there is no FMA) gcc fails to figure out how to avoid
+ // spilling register.
+#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+ EIGEN_UNUSED_VARIABLE(tmp);
+ c = pmadd(a,b,c);
+#else
+ tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);
+#endif
+ }
+
+ EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const
+ {
+ r = pmadd(c,alpha,r);
+ }
+
+ template<typename ResPacketHalf>
+ EIGEN_STRONG_INLINE void acc(const ResPacketHalf& c, const ResPacketHalf& alpha, ResPacketHalf& r) const
+ {
+ r = pmadd(c,alpha,r);
+ }
+
+protected:
+// conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj;
+// conj_helper<LhsPacket,RhsPacket,ConjLhs,ConjRhs> pcj;
+};
+
+template<typename RealScalar, bool _ConjLhs>
+class gebp_traits<std::complex<RealScalar>, RealScalar, _ConjLhs, false>
+{
+public:
+ typedef std::complex<RealScalar> LhsScalar;
+ typedef RealScalar RhsScalar;
+ typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+
+ enum {
+ ConjLhs = _ConjLhs,
+ ConjRhs = false,
+ Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable,
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
+
+ NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
+ nr = 4,
+#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
+ // we assume 16 registers
+ mr = 3*LhsPacketSize,
+#else
+ mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,
+#endif
+
+ LhsProgress = LhsPacketSize,
+ RhsProgress = 1
+ };
+
+ typedef typename packet_traits<LhsScalar>::type _LhsPacket;
+ typedef typename packet_traits<RhsScalar>::type _RhsPacket;
+ typedef typename packet_traits<ResScalar>::type _ResPacket;
+
+ typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
+ typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
+ typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+
+ typedef ResPacket AccPacket;
+
+ EIGEN_STRONG_INLINE void initAcc(AccPacket& p)
+ {
+ p = pset1<ResPacket>(ResScalar(0));
+ }
+
+ EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
+ {
+ dest = pset1<RhsPacket>(*b);
+ }
+
+ EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
+ {
+ dest = pset1<RhsPacket>(*b);
+ }
+
+ EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
+ {
+ dest = pload<LhsPacket>(a);
+ }
+
+ EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacket& dest) const
+ {
+ dest = ploadu<LhsPacket>(a);
+ }
+
+ EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
+ {
+ pbroadcast4(b, b0, b1, b2, b3);
+ }
+
+// EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1)
+// {
+// pbroadcast2(b, b0, b1);
+// }
+
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp) const
+ {
+ madd_impl(a, b, c, tmp, typename conditional<Vectorizable,true_type,false_type>::type());
+ }
+
+ EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
+ {
+#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+ EIGEN_UNUSED_VARIABLE(tmp);
+ c.v = pmadd(a.v,b,c.v);
+#else
+ tmp = b; tmp = pmul(a.v,tmp); c.v = padd(c.v,tmp);
+#endif
+ }
+
+ EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const false_type&) const
+ {
+ c += a * b;
+ }
+
+ EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const
+ {
+ r = cj.pmadd(c,alpha,r);
+ }
+
+protected:
+ conj_helper<ResPacket,ResPacket,ConjLhs,false> cj;
+};
+
+template<typename Packet>
+struct DoublePacket
+{
+ Packet first;
+ Packet second;
+};
+
+template<typename Packet>
+DoublePacket<Packet> padd(const DoublePacket<Packet> &a, const DoublePacket<Packet> &b)
+{
+ DoublePacket<Packet> res;
+ res.first = padd(a.first, b.first);
+ res.second = padd(a.second,b.second);
+ return res;
+}
+
+template<typename Packet>
+const DoublePacket<Packet>& predux4(const DoublePacket<Packet> &a)
+{
+ return a;
+}
+
+template<typename Packet> struct unpacket_traits<DoublePacket<Packet> > { typedef DoublePacket<Packet> half; };
+// template<typename Packet>
+// DoublePacket<Packet> pmadd(const DoublePacket<Packet> &a, const DoublePacket<Packet> &b)
+// {
+// DoublePacket<Packet> res;
+// res.first = padd(a.first, b.first);
+// res.second = padd(a.second,b.second);
+// return res;
+// }
+
+template<typename RealScalar, bool _ConjLhs, bool _ConjRhs>
+class gebp_traits<std::complex<RealScalar>, std::complex<RealScalar>, _ConjLhs, _ConjRhs >
+{
+public:
+ typedef std::complex<RealScalar> Scalar;
+ typedef std::complex<RealScalar> LhsScalar;
+ typedef std::complex<RealScalar> RhsScalar;
+ typedef std::complex<RealScalar> ResScalar;
+
+ enum {
+ ConjLhs = _ConjLhs,
+ ConjRhs = _ConjRhs,
+ Vectorizable = packet_traits<RealScalar>::Vectorizable
+ && packet_traits<Scalar>::Vectorizable,
+ RealPacketSize = Vectorizable ? packet_traits<RealScalar>::size : 1,
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+
+ // FIXME: should depend on NumberOfRegisters
+ nr = 4,
+ mr = ResPacketSize,
+
+ LhsProgress = ResPacketSize,
+ RhsProgress = 1
+ };
+
+ typedef typename packet_traits<RealScalar>::type RealPacket;
+ typedef typename packet_traits<Scalar>::type ScalarPacket;
+ typedef DoublePacket<RealPacket> DoublePacketType;
+
+ typedef typename conditional<Vectorizable,RealPacket, Scalar>::type LhsPacket;
+ typedef typename conditional<Vectorizable,DoublePacketType,Scalar>::type RhsPacket;
+ typedef typename conditional<Vectorizable,ScalarPacket,Scalar>::type ResPacket;
+ typedef typename conditional<Vectorizable,DoublePacketType,Scalar>::type AccPacket;
+
+ EIGEN_STRONG_INLINE void initAcc(Scalar& p) { p = Scalar(0); }
+
+ EIGEN_STRONG_INLINE void initAcc(DoublePacketType& p)
+ {
+ p.first = pset1<RealPacket>(RealScalar(0));
+ p.second = pset1<RealPacket>(RealScalar(0));
+ }
+
+ // Scalar path
+ EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, ResPacket& dest) const
+ {
+ dest = pset1<ResPacket>(*b);
+ }
+
+ // Vectorized path
+ EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, DoublePacketType& dest) const
+ {
+ dest.first = pset1<RealPacket>(real(*b));
+ dest.second = pset1<RealPacket>(imag(*b));
+ }
+
+ EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, ResPacket& dest) const
+ {
+ loadRhs(b,dest);
+ }
+ EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, DoublePacketType& dest) const
+ {
+ eigen_internal_assert(unpacket_traits<ScalarPacket>::size<=4);
+ loadRhs(b,dest);
+ }
+
+ EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
+ {
+ // FIXME not sure that's the best way to implement it!
+ loadRhs(b+0, b0);
+ loadRhs(b+1, b1);
+ loadRhs(b+2, b2);
+ loadRhs(b+3, b3);
+ }
+
+ // Vectorized path
+ EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, DoublePacketType& b0, DoublePacketType& b1)
+ {
+ // FIXME not sure that's the best way to implement it!
+ loadRhs(b+0, b0);
+ loadRhs(b+1, b1);
+ }
+
+ // Scalar path
+ EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsScalar& b0, RhsScalar& b1)
+ {
+ // FIXME not sure that's the best way to implement it!
+ loadRhs(b+0, b0);
+ loadRhs(b+1, b1);
+ }
+
+ // nothing special here
+ EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
+ {
+ dest = pload<LhsPacket>((const typename unpacket_traits<LhsPacket>::type*)(a));
+ }
+
+ EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacket& dest) const
+ {
+ dest = ploadu<LhsPacket>((const typename unpacket_traits<LhsPacket>::type*)(a));
+ }
+
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, DoublePacketType& c, RhsPacket& /*tmp*/) const
+ {
+ c.first = padd(pmul(a,b.first), c.first);
+ c.second = padd(pmul(a,b.second),c.second);
+ }
+
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, ResPacket& c, RhsPacket& /*tmp*/) const
+ {
+ c = cj.pmadd(a,b,c);
+ }
+
+ EIGEN_STRONG_INLINE void acc(const Scalar& c, const Scalar& alpha, Scalar& r) const { r += alpha * c; }
+
+ EIGEN_STRONG_INLINE void acc(const DoublePacketType& c, const ResPacket& alpha, ResPacket& r) const
+ {
+ // assemble c
+ ResPacket tmp;
+ if((!ConjLhs)&&(!ConjRhs))
+ {
+ tmp = pcplxflip(pconj(ResPacket(c.second)));
+ tmp = padd(ResPacket(c.first),tmp);
+ }
+ else if((!ConjLhs)&&(ConjRhs))
+ {
+ tmp = pconj(pcplxflip(ResPacket(c.second)));
+ tmp = padd(ResPacket(c.first),tmp);
+ }
+ else if((ConjLhs)&&(!ConjRhs))
+ {
+ tmp = pcplxflip(ResPacket(c.second));
+ tmp = padd(pconj(ResPacket(c.first)),tmp);
+ }
+ else if((ConjLhs)&&(ConjRhs))
+ {
+ tmp = pcplxflip(ResPacket(c.second));
+ tmp = psub(pconj(ResPacket(c.first)),tmp);
+ }
+
+ r = pmadd(tmp,alpha,r);
+ }
+
+protected:
+ conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj;
+};
+
+template<typename RealScalar, bool _ConjRhs>
+class gebp_traits<RealScalar, std::complex<RealScalar>, false, _ConjRhs >
+{
+public:
+ typedef std::complex<RealScalar> Scalar;
+ typedef RealScalar LhsScalar;
+ typedef Scalar RhsScalar;
+ typedef Scalar ResScalar;
+
+ enum {
+ ConjLhs = false,
+ ConjRhs = _ConjRhs,
+ Vectorizable = packet_traits<RealScalar>::Vectorizable
+ && packet_traits<Scalar>::Vectorizable,
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
+
+ NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
+ // FIXME: should depend on NumberOfRegisters
+ nr = 4,
+ mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*ResPacketSize,
+
+ LhsProgress = ResPacketSize,
+ RhsProgress = 1
+ };
+
+ typedef typename packet_traits<LhsScalar>::type _LhsPacket;
+ typedef typename packet_traits<RhsScalar>::type _RhsPacket;
+ typedef typename packet_traits<ResScalar>::type _ResPacket;
+
+ typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
+ typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
+ typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+
+ typedef ResPacket AccPacket;
+
+ EIGEN_STRONG_INLINE void initAcc(AccPacket& p)
+ {
+ p = pset1<ResPacket>(ResScalar(0));
+ }
+
+ EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
+ {
+ dest = pset1<RhsPacket>(*b);
+ }
+
+ void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
+ {
+ pbroadcast4(b, b0, b1, b2, b3);
+ }
+
+// EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1)
+// {
+// // FIXME not sure that's the best way to implement it!
+// b0 = pload1<RhsPacket>(b+0);
+// b1 = pload1<RhsPacket>(b+1);
+// }
+
+ EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
+ {
+ dest = ploaddup<LhsPacket>(a);
+ }
+
+ EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
+ {
+ eigen_internal_assert(unpacket_traits<RhsPacket>::size<=4);
+ loadRhs(b,dest);
+ }
+
+ EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacket& dest) const
+ {
+ dest = ploaddup<LhsPacket>(a);
+ }
+
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp) const
+ {
+ madd_impl(a, b, c, tmp, typename conditional<Vectorizable,true_type,false_type>::type());
+ }
+
+ EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
+ {
+#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+ EIGEN_UNUSED_VARIABLE(tmp);
+ c.v = pmadd(a,b.v,c.v);
+#else
+ tmp = b; tmp.v = pmul(a,tmp.v); c = padd(c,tmp);
+#endif
+
+ }
+
+ EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const false_type&) const
+ {
+ c += a * b;
+ }
+
+ EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const
+ {
+ r = cj.pmadd(alpha,c,r);
+ }
+
+protected:
+ conj_helper<ResPacket,ResPacket,false,ConjRhs> cj;
+};
+
+// helper for the rotating kernel below
+template <typename GebpKernel, bool UseRotatingKernel = GebpKernel::UseRotatingKernel>
+struct PossiblyRotatingKernelHelper
+{
+ // default implementation, not rotating
+
+ typedef typename GebpKernel::Traits Traits;
+ typedef typename Traits::RhsScalar RhsScalar;
+ typedef typename Traits::RhsPacket RhsPacket;
+ typedef typename Traits::AccPacket AccPacket;
+
+ const Traits& traits;
+ EIGEN_ALWAYS_INLINE PossiblyRotatingKernelHelper(const Traits& t) : traits(t) {}
+
+
+ template <size_t K, size_t Index> EIGEN_ALWAYS_INLINE
+ void loadOrRotateRhs(RhsPacket& to, const RhsScalar* from) const
+ {
+ traits.loadRhs(from + (Index+4*K)*Traits::RhsProgress, to);
+ }
+
+ EIGEN_ALWAYS_INLINE void unrotateResult(AccPacket&,
+ AccPacket&,
+ AccPacket&,
+ AccPacket&)
+ {
+ }
+};
+
+// rotating implementation
+template <typename GebpKernel>
+struct PossiblyRotatingKernelHelper<GebpKernel, true>
+{
+ typedef typename GebpKernel::Traits Traits;
+ typedef typename Traits::RhsScalar RhsScalar;
+ typedef typename Traits::RhsPacket RhsPacket;
+ typedef typename Traits::AccPacket AccPacket;
+
+ const Traits& traits;
+ EIGEN_ALWAYS_INLINE PossiblyRotatingKernelHelper(const Traits& t) : traits(t) {}
+
+ template <size_t K, size_t Index> EIGEN_ALWAYS_INLINE
+ void loadOrRotateRhs(RhsPacket& to, const RhsScalar* from) const
+ {
+ if (Index == 0) {
+ to = pload<RhsPacket>(from + 4*K*Traits::RhsProgress);
+ } else {
+ EIGEN_ASM_COMMENT("Do not reorder code, we're very tight on registers");
+ to = protate<1>(to);
+ }
+ }
+
+ EIGEN_ALWAYS_INLINE void unrotateResult(AccPacket& res0,
+ AccPacket& res1,
+ AccPacket& res2,
+ AccPacket& res3)
+ {
+ PacketBlock<AccPacket> resblock;
+ resblock.packet[0] = res0;
+ resblock.packet[1] = res1;
+ resblock.packet[2] = res2;
+ resblock.packet[3] = res3;
+ ptranspose(resblock);
+ resblock.packet[3] = protate<1>(resblock.packet[3]);
+ resblock.packet[2] = protate<2>(resblock.packet[2]);
+ resblock.packet[1] = protate<3>(resblock.packet[1]);
+ ptranspose(resblock);
+ res0 = resblock.packet[0];
+ res1 = resblock.packet[1];
+ res2 = resblock.packet[2];
+ res3 = resblock.packet[3];
+ }
+};
+
+/* optimized GEneral packed Block * packed Panel product kernel
+ *
+ * Mixing type logic: C += A * B
+ * | A | B | comments
+ * |real |cplx | no vectorization yet, would require to pack A with duplication
+ * |cplx |real | easy vectorization
+ */
+template<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
+struct gebp_kernel
+{
+ typedef gebp_traits<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> Traits;
+ typedef typename Traits::ResScalar ResScalar;
+ typedef typename Traits::LhsPacket LhsPacket;
+ typedef typename Traits::RhsPacket RhsPacket;
+ typedef typename Traits::ResPacket ResPacket;
+ typedef typename Traits::AccPacket AccPacket;
+
+ typedef gebp_traits<RhsScalar,LhsScalar,ConjugateRhs,ConjugateLhs> SwappedTraits;
+ typedef typename SwappedTraits::ResScalar SResScalar;
+ typedef typename SwappedTraits::LhsPacket SLhsPacket;
+ typedef typename SwappedTraits::RhsPacket SRhsPacket;
+ typedef typename SwappedTraits::ResPacket SResPacket;
+ typedef typename SwappedTraits::AccPacket SAccPacket;
+
+ typedef typename DataMapper::LinearMapper LinearMapper;
+
+ enum {
+ Vectorizable = Traits::Vectorizable,
+ LhsProgress = Traits::LhsProgress,
+ RhsProgress = Traits::RhsProgress,
+ ResPacketSize = Traits::ResPacketSize
+ };
+
+ EIGEN_DONT_INLINE
+ void operator()(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB,
+ Index rows, Index depth, Index cols, ResScalar alpha,
+ Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0);
+
+ static const bool UseRotatingKernel =
+ EIGEN_ARCH_ARM &&
+ internal::is_same<LhsScalar, float>::value &&
+ internal::is_same<RhsScalar, float>::value &&
+ internal::is_same<ResScalar, float>::value &&
+ Traits::LhsPacketSize == 4 &&
+ Traits::RhsPacketSize == 4 &&
+ Traits::ResPacketSize == 4;
+};
+
+template<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
+EIGEN_DONT_INLINE
+void gebp_kernel<LhsScalar, RhsScalar, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
+ ::operator()(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB,
+ Index rows, Index depth, Index cols, ResScalar alpha,
+ Index strideA, Index strideB, Index offsetA, Index offsetB)
+ {
+ Traits traits;
+ SwappedTraits straits;
+
+ if(strideA==-1) strideA = depth;
+ if(strideB==-1) strideB = depth;
+ conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
+ Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
+ const Index peeled_mc3 = mr>=3*Traits::LhsProgress ? (rows/(3*LhsProgress))*(3*LhsProgress) : 0;
+ const Index peeled_mc2 = mr>=2*Traits::LhsProgress ? peeled_mc3+((rows-peeled_mc3)/(2*LhsProgress))*(2*LhsProgress) : 0;
+ const Index peeled_mc1 = mr>=1*Traits::LhsProgress ? (rows/(1*LhsProgress))*(1*LhsProgress) : 0;
+ enum { pk = 8 }; // NOTE Such a large peeling factor is important for large matrices (~ +5% when >1000 on Haswell)
+ const Index peeled_kc = depth & ~(pk-1);
+ const Index prefetch_res_offset = 0;
+// const Index depth2 = depth & ~1;
+
+ //---------- Process 3 * LhsProgress rows at once ----------
+ // This corresponds to 3*LhsProgress x nr register blocks.
+ // Usually, make sense only with FMA
+ if(mr>=3*Traits::LhsProgress)
+ {
+ PossiblyRotatingKernelHelper<gebp_kernel> possiblyRotatingKernelHelper(traits);
+
+ // loops on each largest micro horizontal panel of lhs (3*Traits::LhsProgress x depth)
+ for(Index i=0; i<peeled_mc3; i+=3*Traits::LhsProgress)
+ {
+ // loops on each largest micro vertical panel of rhs (depth * nr)
+ for(Index j2=0; j2<packet_cols4; j2+=nr)
+ {
+ // We select a 3*Traits::LhsProgress x nr micro block of res which is entirely
+ // stored into 3 x nr registers.
+
+ const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*Traits::LhsProgress)];
+ prefetch(&blA[0]);
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
+ prefetch(&blB[0]);
+ LhsPacket A0, A1;
+
+ // gets res block as register
+ AccPacket C0, C1, C2, C3,
+ C4, C5, C6, C7,
+ C8, C9, C10, C11;
+ traits.initAcc(C0); traits.initAcc(C1); traits.initAcc(C2); traits.initAcc(C3);
+ traits.initAcc(C4); traits.initAcc(C5); traits.initAcc(C6); traits.initAcc(C7);
+ traits.initAcc(C8); traits.initAcc(C9); traits.initAcc(C10); traits.initAcc(C11);
+
+ LinearMapper r0 = res.getLinearMapper(i, j2 + 0);
+ LinearMapper r1 = res.getLinearMapper(i, j2 + 1);
+ LinearMapper r2 = res.getLinearMapper(i, j2 + 2);
+ LinearMapper r3 = res.getLinearMapper(i, j2 + 3);
+
+ r0.prefetch(0);
+ r1.prefetch(0);
+ r2.prefetch(0);
+ r3.prefetch(0);
+
+ // performs "inner" products
+ for(Index k=0; k<peeled_kc; k+=pk)
+ {
+ EIGEN_ASM_COMMENT("begin gebp micro kernel 3pX4");
+ RhsPacket B_0, T0;
+ LhsPacket A2;
+
+#define EIGEN_GEBP_ONESTEP(K) \
+ do { \
+ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \
+ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ internal::prefetch(blA+(3*K+16)*LhsProgress); \
+ if (EIGEN_ARCH_ARM) internal::prefetch(blB+(4*K+16)*RhsProgress); /* Bug 953 */ \
+ traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
+ traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
+ traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
+ possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 0>(B_0, blB); \
+ traits.madd(A0, B_0, C0, T0); \
+ traits.madd(A1, B_0, C4, T0); \
+ traits.madd(A2, B_0, C8, B_0); \
+ possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 1>(B_0, blB); \
+ traits.madd(A0, B_0, C1, T0); \
+ traits.madd(A1, B_0, C5, T0); \
+ traits.madd(A2, B_0, C9, B_0); \
+ possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 2>(B_0, blB); \
+ traits.madd(A0, B_0, C2, T0); \
+ traits.madd(A1, B_0, C6, T0); \
+ traits.madd(A2, B_0, C10, B_0); \
+ possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 3>(B_0, blB); \
+ traits.madd(A0, B_0, C3 , T0); \
+ traits.madd(A1, B_0, C7, T0); \
+ traits.madd(A2, B_0, C11, B_0); \
+ EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX4"); \
+ } while(false)
+
+ internal::prefetch(blB);
+ EIGEN_GEBP_ONESTEP(0);
+ EIGEN_GEBP_ONESTEP(1);
+ EIGEN_GEBP_ONESTEP(2);
+ EIGEN_GEBP_ONESTEP(3);
+ EIGEN_GEBP_ONESTEP(4);
+ EIGEN_GEBP_ONESTEP(5);
+ EIGEN_GEBP_ONESTEP(6);
+ EIGEN_GEBP_ONESTEP(7);
+
+ blB += pk*4*RhsProgress;
+ blA += pk*3*Traits::LhsProgress;
+
+ EIGEN_ASM_COMMENT("end gebp micro kernel 3pX4");
+ }
+ // process remaining peeled loop
+ for(Index k=peeled_kc; k<depth; k++)
+ {
+ RhsPacket B_0, T0;
+ LhsPacket A2;
+ EIGEN_GEBP_ONESTEP(0);
+ blB += 4*RhsProgress;
+ blA += 3*Traits::LhsProgress;
+ }
+#undef EIGEN_GEBP_ONESTEP
+
+ possiblyRotatingKernelHelper.unrotateResult(C0, C1, C2, C3);
+ possiblyRotatingKernelHelper.unrotateResult(C4, C5, C6, C7);
+ possiblyRotatingKernelHelper.unrotateResult(C8, C9, C10, C11);
+
+ ResPacket R0, R1, R2;
+ ResPacket alphav = pset1<ResPacket>(alpha);
+
+ R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r0.loadPacket(1 * Traits::ResPacketSize);
+ R2 = r0.loadPacket(2 * Traits::ResPacketSize);
+ traits.acc(C0, alphav, R0);
+ traits.acc(C4, alphav, R1);
+ traits.acc(C8, alphav, R2);
+ r0.storePacket(0 * Traits::ResPacketSize, R0);
+ r0.storePacket(1 * Traits::ResPacketSize, R1);
+ r0.storePacket(2 * Traits::ResPacketSize, R2);
+
+ R0 = r1.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r1.loadPacket(1 * Traits::ResPacketSize);
+ R2 = r1.loadPacket(2 * Traits::ResPacketSize);
+ traits.acc(C1, alphav, R0);
+ traits.acc(C5, alphav, R1);
+ traits.acc(C9, alphav, R2);
+ r1.storePacket(0 * Traits::ResPacketSize, R0);
+ r1.storePacket(1 * Traits::ResPacketSize, R1);
+ r1.storePacket(2 * Traits::ResPacketSize, R2);
+
+ R0 = r2.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r2.loadPacket(1 * Traits::ResPacketSize);
+ R2 = r2.loadPacket(2 * Traits::ResPacketSize);
+ traits.acc(C2, alphav, R0);
+ traits.acc(C6, alphav, R1);
+ traits.acc(C10, alphav, R2);
+ r2.storePacket(0 * Traits::ResPacketSize, R0);
+ r2.storePacket(1 * Traits::ResPacketSize, R1);
+ r2.storePacket(2 * Traits::ResPacketSize, R2);
+
+ R0 = r3.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r3.loadPacket(1 * Traits::ResPacketSize);
+ R2 = r3.loadPacket(2 * Traits::ResPacketSize);
+ traits.acc(C3, alphav, R0);
+ traits.acc(C7, alphav, R1);
+ traits.acc(C11, alphav, R2);
+ r3.storePacket(0 * Traits::ResPacketSize, R0);
+ r3.storePacket(1 * Traits::ResPacketSize, R1);
+ r3.storePacket(2 * Traits::ResPacketSize, R2);
+ }
+
+ // Deal with remaining columns of the rhs
+ for(Index j2=packet_cols4; j2<cols; j2++)
+ {
+ // One column at a time
+ const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*Traits::LhsProgress)];
+ prefetch(&blA[0]);
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB];
+ prefetch(&blB[0]);
+ // gets res block as register
+ AccPacket C0, C4, C8;
+ traits.initAcc(C0);
+ traits.initAcc(C4);
+ traits.initAcc(C8);
+
+ LinearMapper r0 = res.getLinearMapper(i, j2);
+ r0.prefetch(0);
+ LhsPacket A0, A1, A2;
+
+ // performs "inner" products
+ for(Index k=0; k<peeled_kc; k+=pk)
+ {
+ EIGEN_ASM_COMMENT("begin gebp micro kernel 3pX1");
+ RhsPacket B_0;
+#define EIGEN_GEBGP_ONESTEP(K) \
+ do { \
+ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX1"); \
+ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
+ traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
+ traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
+ traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \
+ traits.madd(A0, B_0, C0, B_0); \
+ traits.madd(A1, B_0, C4, B_0); \
+ traits.madd(A2, B_0, C8, B_0); \
+ EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX1"); \
+ } while(false)
+
+ EIGEN_GEBGP_ONESTEP(0);
+ EIGEN_GEBGP_ONESTEP(1);
+ EIGEN_GEBGP_ONESTEP(2);
+ EIGEN_GEBGP_ONESTEP(3);
+ EIGEN_GEBGP_ONESTEP(4);
+ EIGEN_GEBGP_ONESTEP(5);
+ EIGEN_GEBGP_ONESTEP(6);
+ EIGEN_GEBGP_ONESTEP(7);
+
+ blB += pk*RhsProgress;
+ blA += pk*3*Traits::LhsProgress;
+
+ EIGEN_ASM_COMMENT("end gebp micro kernel 3pX1");
+ }
+
+ // process remaining peeled loop
+ for(Index k=peeled_kc; k<depth; k++)
+ {
+ RhsPacket B_0;
+ EIGEN_GEBGP_ONESTEP(0);
+ blB += RhsProgress;
+ blA += 3*Traits::LhsProgress;
+ }
+#undef EIGEN_GEBGP_ONESTEP
+ ResPacket R0, R1, R2;
+ ResPacket alphav = pset1<ResPacket>(alpha);
+
+ R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r0.loadPacket(1 * Traits::ResPacketSize);
+ R2 = r0.loadPacket(2 * Traits::ResPacketSize);
+ traits.acc(C0, alphav, R0);
+ traits.acc(C4, alphav, R1);
+ traits.acc(C8, alphav, R2);
+ r0.storePacket(0 * Traits::ResPacketSize, R0);
+ r0.storePacket(1 * Traits::ResPacketSize, R1);
+ r0.storePacket(2 * Traits::ResPacketSize, R2);
+ }
+ }
+ }
+
+ //---------- Process 2 * LhsProgress rows at once ----------
+ if(mr>=2*Traits::LhsProgress)
+ {
+ // loops on each largest micro horizontal panel of lhs (2*LhsProgress x depth)
+ for(Index i=peeled_mc3; i<peeled_mc2; i+=2*LhsProgress)
+ {
+ // loops on each largest micro vertical panel of rhs (depth * nr)
+ for(Index j2=0; j2<packet_cols4; j2+=nr)
+ {
+ // We select a 2*Traits::LhsProgress x nr micro block of res which is entirely
+ // stored into 2 x nr registers.
+
+ const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)];
+ prefetch(&blA[0]);
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
+ prefetch(&blB[0]);
+
+ // gets res block as register
+ AccPacket C0, C1, C2, C3,
+ C4, C5, C6, C7;
+ traits.initAcc(C0); traits.initAcc(C1); traits.initAcc(C2); traits.initAcc(C3);
+ traits.initAcc(C4); traits.initAcc(C5); traits.initAcc(C6); traits.initAcc(C7);
+
+ LinearMapper r0 = res.getLinearMapper(i, j2 + 0);
+ LinearMapper r1 = res.getLinearMapper(i, j2 + 1);
+ LinearMapper r2 = res.getLinearMapper(i, j2 + 2);
+ LinearMapper r3 = res.getLinearMapper(i, j2 + 3);
+
+ r0.prefetch(prefetch_res_offset);
+ r1.prefetch(prefetch_res_offset);
+ r2.prefetch(prefetch_res_offset);
+ r3.prefetch(prefetch_res_offset);
+
+ LhsPacket A0, A1;
+
+ // performs "inner" products
+ for(Index k=0; k<peeled_kc; k+=pk)
+ {
+ EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX4");
+ RhsPacket B_0, B1, B2, B3, T0;
+
+ // The 2 ASM comments in the #define are intended to prevent gcc
+ // from optimizing the code accross steps since it ends up spilling
+ // registers in this case.
+ #define EIGEN_GEBGP_ONESTEP(K) \
+ do { \
+ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX4"); \
+ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \
+ traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \
+ traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \
+ traits.madd(A0, B_0, C0, T0); \
+ traits.madd(A1, B_0, C4, B_0); \
+ traits.madd(A0, B1, C1, T0); \
+ traits.madd(A1, B1, C5, B1); \
+ traits.madd(A0, B2, C2, T0); \
+ traits.madd(A1, B2, C6, B2); \
+ traits.madd(A0, B3, C3, T0); \
+ traits.madd(A1, B3, C7, B3); \
+ EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX4"); \
+ } while(false)
+
+ prefetch(&blB[pk*4*RhsProgress]);
+ EIGEN_GEBGP_ONESTEP(0);
+ EIGEN_GEBGP_ONESTEP(1);
+ EIGEN_GEBGP_ONESTEP(2);
+ EIGEN_GEBGP_ONESTEP(3);
+ EIGEN_GEBGP_ONESTEP(4);
+ EIGEN_GEBGP_ONESTEP(5);
+ EIGEN_GEBGP_ONESTEP(6);
+ EIGEN_GEBGP_ONESTEP(7);
+
+ blB += pk*4*RhsProgress;
+ blA += pk*(2*Traits::LhsProgress);
+
+ EIGEN_ASM_COMMENT("end gebp micro kernel 2pX4");
+ }
+ // process remaining peeled loop
+ for(Index k=peeled_kc; k<depth; k++)
+ {
+ RhsPacket B_0, B1, B2, B3, T0;
+ EIGEN_GEBGP_ONESTEP(0);
+ blB += 4*RhsProgress;
+ blA += 2*Traits::LhsProgress;
+ }
+#undef EIGEN_GEBGP_ONESTEP
+
+ ResPacket R0, R1, R2, R3;
+ ResPacket alphav = pset1<ResPacket>(alpha);
+
+ R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r0.loadPacket(1 * Traits::ResPacketSize);
+ R2 = r1.loadPacket(0 * Traits::ResPacketSize);
+ R3 = r1.loadPacket(1 * Traits::ResPacketSize);
+ traits.acc(C0, alphav, R0);
+ traits.acc(C4, alphav, R1);
+ traits.acc(C1, alphav, R2);
+ traits.acc(C5, alphav, R3);
+ r0.storePacket(0 * Traits::ResPacketSize, R0);
+ r0.storePacket(1 * Traits::ResPacketSize, R1);
+ r1.storePacket(0 * Traits::ResPacketSize, R2);
+ r1.storePacket(1 * Traits::ResPacketSize, R3);
+
+ R0 = r2.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r2.loadPacket(1 * Traits::ResPacketSize);
+ R2 = r3.loadPacket(0 * Traits::ResPacketSize);
+ R3 = r3.loadPacket(1 * Traits::ResPacketSize);
+ traits.acc(C2, alphav, R0);
+ traits.acc(C6, alphav, R1);
+ traits.acc(C3, alphav, R2);
+ traits.acc(C7, alphav, R3);
+ r2.storePacket(0 * Traits::ResPacketSize, R0);
+ r2.storePacket(1 * Traits::ResPacketSize, R1);
+ r3.storePacket(0 * Traits::ResPacketSize, R2);
+ r3.storePacket(1 * Traits::ResPacketSize, R3);
+ }
+
+ // Deal with remaining columns of the rhs
+ for(Index j2=packet_cols4; j2<cols; j2++)
+ {
+ // One column at a time
+ const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)];
+ prefetch(&blA[0]);
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB];
+ prefetch(&blB[0]);
+
+ // gets res block as register
+ AccPacket C0, C4;
+ traits.initAcc(C0);
+ traits.initAcc(C4);
+
+ LinearMapper r0 = res.getLinearMapper(i, j2);
+ r0.prefetch(prefetch_res_offset);
+ LhsPacket A0, A1;
+
+ // performs "inner" products
+ for(Index k=0; k<peeled_kc; k+=pk)
+ {
+ EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX1");
+ RhsPacket B_0, B1;
+
+#define EIGEN_GEBGP_ONESTEP(K) \
+ do { \
+ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX1"); \
+ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \
+ traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \
+ traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \
+ traits.madd(A0, B_0, C0, B1); \
+ traits.madd(A1, B_0, C4, B_0); \
+ EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX1"); \
+ } while(false)
+
+ EIGEN_GEBGP_ONESTEP(0);
+ EIGEN_GEBGP_ONESTEP(1);
+ EIGEN_GEBGP_ONESTEP(2);
+ EIGEN_GEBGP_ONESTEP(3);
+ EIGEN_GEBGP_ONESTEP(4);
+ EIGEN_GEBGP_ONESTEP(5);
+ EIGEN_GEBGP_ONESTEP(6);
+ EIGEN_GEBGP_ONESTEP(7);
+
+ blB += pk*RhsProgress;
+ blA += pk*2*Traits::LhsProgress;
+
+ EIGEN_ASM_COMMENT("end gebp micro kernel 2pX1");
+ }
+
+ // process remaining peeled loop
+ for(Index k=peeled_kc; k<depth; k++)
+ {
+ RhsPacket B_0, B1;
+ EIGEN_GEBGP_ONESTEP(0);
+ blB += RhsProgress;
+ blA += 2*Traits::LhsProgress;
+ }
+#undef EIGEN_GEBGP_ONESTEP
+ ResPacket R0, R1;
+ ResPacket alphav = pset1<ResPacket>(alpha);
+
+ R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r0.loadPacket(1 * Traits::ResPacketSize);
+ traits.acc(C0, alphav, R0);
+ traits.acc(C4, alphav, R1);
+ r0.storePacket(0 * Traits::ResPacketSize, R0);
+ r0.storePacket(1 * Traits::ResPacketSize, R1);
+ }
+ }
+ }
+ //---------- Process 1 * LhsProgress rows at once ----------
+ if(mr>=1*Traits::LhsProgress)
+ {
+ // loops on each largest micro horizontal panel of lhs (1*LhsProgress x depth)
+ for(Index i=peeled_mc2; i<peeled_mc1; i+=1*LhsProgress)
+ {
+ // loops on each largest micro vertical panel of rhs (depth * nr)
+ for(Index j2=0; j2<packet_cols4; j2+=nr)
+ {
+ // We select a 1*Traits::LhsProgress x nr micro block of res which is entirely
+ // stored into 1 x nr registers.
+
+ const LhsScalar* blA = &blockA[i*strideA+offsetA*(1*Traits::LhsProgress)];
+ prefetch(&blA[0]);
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
+ prefetch(&blB[0]);
+
+ // gets res block as register
+ AccPacket C0, C1, C2, C3;
+ traits.initAcc(C0);
+ traits.initAcc(C1);
+ traits.initAcc(C2);
+ traits.initAcc(C3);
+
+ LinearMapper r0 = res.getLinearMapper(i, j2 + 0);
+ LinearMapper r1 = res.getLinearMapper(i, j2 + 1);
+ LinearMapper r2 = res.getLinearMapper(i, j2 + 2);
+ LinearMapper r3 = res.getLinearMapper(i, j2 + 3);
+
+ r0.prefetch(prefetch_res_offset);
+ r1.prefetch(prefetch_res_offset);
+ r2.prefetch(prefetch_res_offset);
+ r3.prefetch(prefetch_res_offset);
+ LhsPacket A0;
+
+ // performs "inner" products
+ for(Index k=0; k<peeled_kc; k+=pk)
+ {
+ EIGEN_ASM_COMMENT("begin gebp micro kernel 1pX4");
+ RhsPacket B_0, B1, B2, B3;
+
+#define EIGEN_GEBGP_ONESTEP(K) \
+ do { \
+ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 1pX4"); \
+ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0); \
+ traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \
+ traits.madd(A0, B_0, C0, B_0); \
+ traits.madd(A0, B1, C1, B1); \
+ traits.madd(A0, B2, C2, B2); \
+ traits.madd(A0, B3, C3, B3); \
+ EIGEN_ASM_COMMENT("end step of gebp micro kernel 1pX4"); \
+ } while(false)
+
+ EIGEN_GEBGP_ONESTEP(0);
+ EIGEN_GEBGP_ONESTEP(1);
+ EIGEN_GEBGP_ONESTEP(2);
+ EIGEN_GEBGP_ONESTEP(3);
+ EIGEN_GEBGP_ONESTEP(4);
+ EIGEN_GEBGP_ONESTEP(5);
+ EIGEN_GEBGP_ONESTEP(6);
+ EIGEN_GEBGP_ONESTEP(7);
+
+ blB += pk*4*RhsProgress;
+ blA += pk*1*LhsProgress;
+
+ EIGEN_ASM_COMMENT("end gebp micro kernel 1pX4");
+ }
+ // process remaining peeled loop
+ for(Index k=peeled_kc; k<depth; k++)
+ {
+ RhsPacket B_0, B1, B2, B3;
+ EIGEN_GEBGP_ONESTEP(0);
+ blB += 4*RhsProgress;
+ blA += 1*LhsProgress;
+ }
+#undef EIGEN_GEBGP_ONESTEP
+
+ ResPacket R0, R1;
+ ResPacket alphav = pset1<ResPacket>(alpha);
+
+ R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r1.loadPacket(0 * Traits::ResPacketSize);
+ traits.acc(C0, alphav, R0);
+ traits.acc(C1, alphav, R1);
+ r0.storePacket(0 * Traits::ResPacketSize, R0);
+ r1.storePacket(0 * Traits::ResPacketSize, R1);
+
+ R0 = r2.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r3.loadPacket(0 * Traits::ResPacketSize);
+ traits.acc(C2, alphav, R0);
+ traits.acc(C3, alphav, R1);
+ r2.storePacket(0 * Traits::ResPacketSize, R0);
+ r3.storePacket(0 * Traits::ResPacketSize, R1);
+ }
+
+ // Deal with remaining columns of the rhs
+ for(Index j2=packet_cols4; j2<cols; j2++)
+ {
+ // One column at a time
+ const LhsScalar* blA = &blockA[i*strideA+offsetA*(1*Traits::LhsProgress)];
+ prefetch(&blA[0]);
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB];
+ prefetch(&blB[0]);
+
+ // gets res block as register
+ AccPacket C0;
+ traits.initAcc(C0);
+
+ LinearMapper r0 = res.getLinearMapper(i, j2);
+ LhsPacket A0;
+
+ // performs "inner" products
+ for(Index k=0; k<peeled_kc; k+=pk)
+ {
+ EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX1");
+ RhsPacket B_0;
+
+#define EIGEN_GEBGP_ONESTEP(K) \
+ do { \
+ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX1"); \
+ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0); \
+ traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \
+ traits.madd(A0, B_0, C0, B_0); \
+ EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX1"); \
+ } while(false)
+
+ EIGEN_GEBGP_ONESTEP(0);
+ EIGEN_GEBGP_ONESTEP(1);
+ EIGEN_GEBGP_ONESTEP(2);
+ EIGEN_GEBGP_ONESTEP(3);
+ EIGEN_GEBGP_ONESTEP(4);
+ EIGEN_GEBGP_ONESTEP(5);
+ EIGEN_GEBGP_ONESTEP(6);
+ EIGEN_GEBGP_ONESTEP(7);
+
+ blB += pk*RhsProgress;
+ blA += pk*1*Traits::LhsProgress;
+
+ EIGEN_ASM_COMMENT("end gebp micro kernel 2pX1");
+ }
+
+ // process remaining peeled loop
+ for(Index k=peeled_kc; k<depth; k++)
+ {
+ RhsPacket B_0;
+ EIGEN_GEBGP_ONESTEP(0);
+ blB += RhsProgress;
+ blA += 1*Traits::LhsProgress;
+ }
+#undef EIGEN_GEBGP_ONESTEP
+ ResPacket R0;
+ ResPacket alphav = pset1<ResPacket>(alpha);
+ R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+ traits.acc(C0, alphav, R0);
+ r0.storePacket(0 * Traits::ResPacketSize, R0);
+ }
+ }
+ }
+ //---------- Process remaining rows, 1 by 1 ----------
+ for(Index i=peeled_mc1; i<rows; i+=1)
+ {
+ // loop on each panel of the rhs
+ for(Index j2=0; j2<packet_cols4; j2+=nr)
+ {
+ const LhsScalar* blA = &blockA[i*strideA+offsetA];
+ prefetch(&blA[0]);
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
+ prefetch(&blB[0]);
+
+ if( (SwappedTraits::LhsProgress % 4)==0 )
+ {
+ // NOTE The following piece of code wont work for 512 bit registers
+ SAccPacket C0, C1, C2, C3;
+ straits.initAcc(C0);
+ straits.initAcc(C1);
+ straits.initAcc(C2);
+ straits.initAcc(C3);
+
+ const Index spk = (std::max)(1,SwappedTraits::LhsProgress/4);
+ const Index endk = (depth/spk)*spk;
+ const Index endk4 = (depth/(spk*4))*(spk*4);
+
+ Index k=0;
+ for(; k<endk4; k+=4*spk)
+ {
+ prefetch(&blB[4*SwappedTraits::LhsProgress]);
+
+ SLhsPacket A0,A1,A2,A3;
+ SRhsPacket B_0,B_1,B_2,B_3;
+
+ straits.loadLhsUnaligned(blB+0*SwappedTraits::LhsProgress, A0);
+ straits.loadLhsUnaligned(blB+1*SwappedTraits::LhsProgress, A1);
+ straits.loadRhsQuad(blA+0*spk, B_0);
+ straits.loadRhsQuad(blA+1*spk, B_1);
+ straits.madd(A0,B_0,C0,B_0);
+ straits.madd(A1,B_1,C1,B_1);
+
+ straits.loadLhsUnaligned(blB+2*SwappedTraits::LhsProgress, A2);
+ straits.loadLhsUnaligned(blB+3*SwappedTraits::LhsProgress, A3);
+ straits.loadRhsQuad(blA+2*spk, B_2);
+ straits.loadRhsQuad(blA+3*spk, B_3);
+ straits.madd(A2,B_2,C2,B_2);
+ straits.madd(A3,B_3,C3,B_3);
+
+ blB += 4*SwappedTraits::LhsProgress;
+ blA += 4*spk;
+ }
+ C0 = padd(padd(C0,C1),padd(C2,C3));
+ for(; k<endk; k+=spk)
+ {
+ SLhsPacket A0;
+ SRhsPacket B_0;
+
+ straits.loadLhsUnaligned(blB, A0);
+ straits.loadRhsQuad(blA, B_0);
+ straits.madd(A0,B_0,C0,B_0);
+
+ blB += SwappedTraits::LhsProgress;
+ blA += spk;
+ }
+ if(SwappedTraits::LhsProgress==8)
+ {
+ // Special case where we have to first reduce the accumulation register C0
+ typedef typename conditional<SwappedTraits::LhsProgress==8,typename unpacket_traits<SResPacket>::half,SResPacket>::type SResPacketHalf;
+ typedef typename conditional<SwappedTraits::LhsProgress==8,typename unpacket_traits<SLhsPacket>::half,SLhsPacket>::type SLhsPacketHalf;
+ typedef typename conditional<SwappedTraits::LhsProgress==8,typename unpacket_traits<SLhsPacket>::half,SRhsPacket>::type SRhsPacketHalf;
+ typedef typename conditional<SwappedTraits::LhsProgress==8,typename unpacket_traits<SAccPacket>::half,SAccPacket>::type SAccPacketHalf;
+
+ SResPacketHalf R = res.template gatherPacket<SResPacketHalf>(i, j2);
+ SResPacketHalf alphav = pset1<SResPacketHalf>(alpha);
+
+ if(depth-endk>0)
+ {
+ // We have to handle the last row of the rhs which corresponds to a half-packet
+ SLhsPacketHalf a0;
+ SRhsPacketHalf b0;
+ straits.loadLhsUnaligned(blB, a0);
+ straits.loadRhs(blA, b0);
+ SAccPacketHalf c0 = predux4(C0);
+ straits.madd(a0,b0,c0,b0);
+ straits.acc(c0, alphav, R);
+ }
+ else
+ {
+ straits.acc(predux4(C0), alphav, R);
+ }
+ res.scatterPacket(i, j2, R);
+ }
+ else
+ {
+ SResPacket R = res.template gatherPacket<SResPacket>(i, j2);
+ SResPacket alphav = pset1<SResPacket>(alpha);
+ straits.acc(C0, alphav, R);
+ res.scatterPacket(i, j2, R);
+ }
+ }
+ else // scalar path
+ {
+ // get a 1 x 4 res block as registers
+ ResScalar C0(0), C1(0), C2(0), C3(0);
+
+ for(Index k=0; k<depth; k++)
+ {
+ LhsScalar A0 = blA[k];
+ RhsScalar B_0 = blB[0];
+ RhsScalar B_1 = blB[1];
+ CJMADD(cj,A0,B_0,C0, B_0);
+ CJMADD(cj,A0,B_1,C1, B_1);
+ RhsScalar B_2 = blB[2];
+ RhsScalar B_3 = blB[3];
+ CJMADD(cj,A0,B_2,C2, B_2);
+ CJMADD(cj,A0,B_3,C3, B_3);
+
+ blB += 4;
+ }
+ res(i, j2 + 0) += alpha * C0;
+ res(i, j2 + 1) += alpha * C1;
+ res(i, j2 + 2) += alpha * C2;
+ res(i, j2 + 3) += alpha * C3;
+ }
+ }
+
+ // remaining columns
+ for(Index j2=packet_cols4; j2<cols; j2++)
+ {
+ const LhsScalar* blA = &blockA[i*strideA+offsetA];
+ // prefetch(blA);
+ // gets a 1 x 1 res block as registers
+ ResScalar C0(0);
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB];
+ for(Index k=0; k<depth; k++)
+ {
+ LhsScalar A0 = blA[k];
+ RhsScalar B_0 = blB[k];
+ CJMADD(cj, A0, B_0, C0, B_0);
+ }
+ res(i, j2) += alpha * C0;
+ }
+ }
+ }
+
+
+#undef CJMADD
+
+// pack a block of the lhs
+// The traversal is as follow (mr==4):
+// 0 4 8 12 ...
+// 1 5 9 13 ...
+// 2 6 10 14 ...
+// 3 7 11 15 ...
+//
+// 16 20 24 28 ...
+// 17 21 25 29 ...
+// 18 22 26 30 ...
+// 19 23 27 31 ...
+//
+// 32 33 34 35 ...
+// 36 36 38 39 ...
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode>
+struct gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, ColMajor, Conjugate, PanelMode>
+{
+ typedef typename DataMapper::LinearMapper LinearMapper;
+ EIGEN_DONT_INLINE void operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0);
+};
+
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, ColMajor, Conjugate, PanelMode>
+ ::operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
+{
+ typedef typename packet_traits<Scalar>::type Packet;
+ enum { PacketSize = packet_traits<Scalar>::size };
+
+ EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
+ EIGEN_UNUSED_VARIABLE(stride);
+ EIGEN_UNUSED_VARIABLE(offset);
+ eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+ eigen_assert( ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) || (Pack1<=4) );
+ conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+
+ const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
+ const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;
+ const Index peeled_mc1 = Pack1>=1*PacketSize ? (rows/(1*PacketSize))*(1*PacketSize) : 0;
+ const Index peeled_mc0 = Pack2>=1*PacketSize ? peeled_mc1
+ : Pack2>1 ? (rows/Pack2)*Pack2 : 0;
+
+ Index i=0;
+
+ // Pack 3 packets
+ if(Pack1>=3*PacketSize)
+ {
+ if(PanelMode)
+ {
+ for(; i<peeled_mc3; i+=3*PacketSize)
+ {
+ blockA += (3*PacketSize) * offset;
+
+ for(Index k=0; k<depth; k++)
+ {
+ Packet A, B, C;
+ A = lhs.loadPacket(i+0*PacketSize, k);
+ B = lhs.loadPacket(i+1*PacketSize, k);
+ C = lhs.loadPacket(i+2*PacketSize, k);
+ pstore(blockA+0*PacketSize, cj.pconj(A));
+ pstore(blockA+1*PacketSize, cj.pconj(B));
+ pstore(blockA+2*PacketSize, cj.pconj(C));
+ blockA += 3*PacketSize;
+ }
+ blockA += (3*PacketSize) * (stride-offset-depth);
+ }
+ }
+ else
+ {
+ // Read the data from DRAM as sequentially as possible. We're writing to
+ // SRAM so the order of the writes shouldn't impact performance.
+ for(Index k=0; k<depth; k++)
+ {
+ Scalar* localBlockA = blockA + 3*PacketSize*k;
+ for(Index local_i = i; local_i<peeled_mc3; local_i+=3*PacketSize)
+ {
+ Packet A, B, C;
+ A = lhs.loadPacket(local_i+0*PacketSize, k);
+ B = lhs.loadPacket(local_i+1*PacketSize, k);
+ C = lhs.loadPacket(local_i+2*PacketSize, k);
+ pstore(localBlockA+0*PacketSize, cj.pconj(A));
+ pstore(localBlockA+1*PacketSize, cj.pconj(B));
+ pstore(localBlockA+2*PacketSize, cj.pconj(C));
+ localBlockA += 3*PacketSize*depth;
+ }
+ }
+ blockA += depth*peeled_mc3;
+ i = peeled_mc3;
+ }
+ }
+ // Pack 2 packets
+ if(Pack1>=2*PacketSize)
+ {
+ if(PanelMode)
+ {
+ for(; i<peeled_mc2; i+=2*PacketSize)
+ {
+ blockA += (2*PacketSize) * offset;
+
+ for(Index k=0; k<depth; k++)
+ {
+ Packet A, B;
+ A = lhs.loadPacket(i+0*PacketSize, k);
+ B = lhs.loadPacket(i+1*PacketSize, k);
+ pstore(blockA+0*PacketSize, cj.pconj(A));
+ pstore(blockA+1*PacketSize, cj.pconj(B));
+ blockA += 2*PacketSize;
+ }
+ blockA += (2*PacketSize) * (stride-offset-depth);
+ }
+ }
+ else
+ {
+ // Read the data from RAM as sequentially as possible.
+ for(Index k=0; k<depth; k++)
+ {
+ Scalar* localBlockA = blockA + 2*PacketSize*k;
+ for(Index local_i = i; local_i<peeled_mc2; local_i+=2*PacketSize)
+ {
+ Packet A, B;
+ A = lhs.loadPacket(local_i+0*PacketSize, k);
+ B = lhs.loadPacket(local_i+1*PacketSize, k);
+ pstore(localBlockA+0*PacketSize, cj.pconj(A));
+ pstore(localBlockA+1*PacketSize, cj.pconj(B));
+ localBlockA += 2*PacketSize*depth;
+ }
+ }
+ blockA += depth*(peeled_mc2-i);
+ i = peeled_mc2;
+ }
+ }
+ // Pack 1 packets
+ if(Pack1>=1*PacketSize)
+ {
+ if(PanelMode)
+ {
+ for(; i<peeled_mc1; i+=1*PacketSize)
+ {
+ blockA += (1*PacketSize) * offset;
+
+ for(Index k=0; k<depth; k++)
+ {
+ Packet A;
+ A = lhs.loadPacket(i+0*PacketSize, k);
+ pstore(blockA, cj.pconj(A));
+ blockA+=PacketSize;
+ }
+ blockA += (1*PacketSize) * (stride-offset-depth);
+ }
+ }
+ else
+ {
+ // Read the data from RAM as sequentially as possible.
+ for(Index k=0; k<depth; k++)
+ {
+ Scalar* localBlockA = blockA + PacketSize*k;
+ for(Index local_i = i; local_i<peeled_mc1; local_i+=1*PacketSize)
+ {
+ Packet A;
+ A = lhs.loadPacket(local_i+0*PacketSize, k);
+ pstore(localBlockA, cj.pconj(A));
+ localBlockA += PacketSize*depth;
+ }
+ }
+ blockA += depth*(peeled_mc1-i);
+ i = peeled_mc1;
+ }
+ }
+ // Pack scalars
+ if(Pack2<PacketSize && Pack2>1)
+ {
+ for(; i<peeled_mc0; i+=Pack2)
+ {
+ if (PanelMode) {
+ blockA += Pack2 * offset;
+ }
+
+ for(Index k=0; k<depth; k++) {
+ const LinearMapper dm0 = lhs.getLinearMapper(i, k);
+ for(Index w=0; w<Pack2; w++) {
+ *blockA = cj(dm0(w));
+ blockA += 1;
+ }
+ }
+
+ if(PanelMode) blockA += Pack2 * (stride-offset-depth);
+ }
+ }
+ for(; i<rows; i++)
+ {
+ if(PanelMode) blockA += offset;
+ for(Index k=0; k<depth; k++) {
+ *blockA = cj(lhs(i, k));
+ blockA += 1;
+ }
+ if(PanelMode) blockA += (stride-offset-depth);
+ }
+}
+
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode>
+struct gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, RowMajor, Conjugate, PanelMode>
+{
+ typedef typename DataMapper::LinearMapper LinearMapper;
+ EIGEN_DONT_INLINE void operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0);
+};
+
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, RowMajor, Conjugate, PanelMode>
+ ::operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
+{
+ typedef typename packet_traits<Scalar>::type Packet;
+ enum { PacketSize = packet_traits<Scalar>::size };
+
+ EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
+ EIGEN_UNUSED_VARIABLE(stride);
+ EIGEN_UNUSED_VARIABLE(offset);
+ eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+ conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+
+// const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
+// const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;
+// const Index peeled_mc1 = Pack1>=1*PacketSize ? (rows/(1*PacketSize))*(1*PacketSize) : 0;
+
+ int pack = Pack1;
+ Index i = 0;
+ while(pack>0)
+ {
+ Index remaining_rows = rows-i;
+ Index peeled_mc = i+(remaining_rows/pack)*pack;
+ for(; i<peeled_mc; i+=pack)
+ {
+ if(PanelMode) blockA += pack * offset;
+
+ const Index peeled_k = (depth/PacketSize)*PacketSize;
+ Index k=0;
+ if(pack>=PacketSize)
+ {
+ for(; k<peeled_k; k+=PacketSize)
+ {
+ for (Index m = 0; m < pack; m += PacketSize)
+ {
+ PacketBlock<Packet> kernel;
+ for (int p = 0; p < PacketSize; ++p) kernel.packet[p] = lhs.loadPacket(i+p+m, k);
+ ptranspose(kernel);
+ for (int p = 0; p < PacketSize; ++p) pstore(blockA+m+(pack)*p, cj.pconj(kernel.packet[p]));
+ }
+ blockA += PacketSize*pack;
+ }
+ }
+ for(; k<depth; k++)
+ {
+ Index w=0;
+ for(; w<pack-3; w+=4)
+ {
+ Scalar a(cj(lhs(i+w+0, k))),
+ b(cj(lhs(i+w+1, k))),
+ c(cj(lhs(i+w+2, k))),
+ d(cj(lhs(i+w+3, k)));
+ blockA[0] = a;
+ blockA[1] = b;
+ blockA[2] = c;
+ blockA[3] = d;
+ blockA += 4;
+ }
+ if(pack%4)
+ for(;w<pack;++w) {
+ *blockA = cj(lhs(i+w, k));
+ blockA += 1;
+ }
+ }
+
+ if(PanelMode) blockA += pack * (stride-offset-depth);
+ }
+
+ pack -= PacketSize;
+ if(pack<Pack2 && (pack+PacketSize)!=Pack2)
+ pack = Pack2;
+ }
+
+ for(; i<rows; i++)
+ {
+ if(PanelMode) blockA += offset;
+ for(Index k=0; k<depth; k++) {
+ *blockA = cj(lhs(i, k));
+ blockA += 1;
+ }
+ if(PanelMode) blockA += (stride-offset-depth);
+ }
+}
+
+// copy a complete panel of the rhs
+// this version is optimized for column major matrices
+// The traversal order is as follow: (nr==4):
+// 0 1 2 3 12 13 14 15 24 27
+// 4 5 6 7 16 17 18 19 25 28
+// 8 9 10 11 20 21 22 23 26 29
+// . . . . . . . . . .
+template<typename Scalar, typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
+struct gemm_pack_rhs<Scalar, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>
+{
+ typedef typename packet_traits<Scalar>::type Packet;
+ typedef typename DataMapper::LinearMapper LinearMapper;
+ enum { PacketSize = packet_traits<Scalar>::size };
+ EIGEN_DONT_INLINE void operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0);
+};
+
+template<typename Scalar, typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>
+::operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)
+{
+ EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
+ EIGEN_UNUSED_VARIABLE(stride);
+ EIGEN_UNUSED_VARIABLE(offset);
+ eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+ conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+ Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
+ Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
+ const Index peeled_k = (depth/PacketSize)*PacketSize;
+// if(nr>=8)
+// {
+// for(Index j2=0; j2<packet_cols8; j2+=8)
+// {
+// // skip what we have before
+// if(PanelMode) count += 8 * offset;
+// const Scalar* b0 = &rhs[(j2+0)*rhsStride];
+// const Scalar* b1 = &rhs[(j2+1)*rhsStride];
+// const Scalar* b2 = &rhs[(j2+2)*rhsStride];
+// const Scalar* b3 = &rhs[(j2+3)*rhsStride];
+// const Scalar* b4 = &rhs[(j2+4)*rhsStride];
+// const Scalar* b5 = &rhs[(j2+5)*rhsStride];
+// const Scalar* b6 = &rhs[(j2+6)*rhsStride];
+// const Scalar* b7 = &rhs[(j2+7)*rhsStride];
+// Index k=0;
+// if(PacketSize==8) // TODO enbale vectorized transposition for PacketSize==4
+// {
+// for(; k<peeled_k; k+=PacketSize) {
+// PacketBlock<Packet> kernel;
+// for (int p = 0; p < PacketSize; ++p) {
+// kernel.packet[p] = ploadu<Packet>(&rhs[(j2+p)*rhsStride+k]);
+// }
+// ptranspose(kernel);
+// for (int p = 0; p < PacketSize; ++p) {
+// pstoreu(blockB+count, cj.pconj(kernel.packet[p]));
+// count+=PacketSize;
+// }
+// }
+// }
+// for(; k<depth; k++)
+// {
+// blockB[count+0] = cj(b0[k]);
+// blockB[count+1] = cj(b1[k]);
+// blockB[count+2] = cj(b2[k]);
+// blockB[count+3] = cj(b3[k]);
+// blockB[count+4] = cj(b4[k]);
+// blockB[count+5] = cj(b5[k]);
+// blockB[count+6] = cj(b6[k]);
+// blockB[count+7] = cj(b7[k]);
+// count += 8;
+// }
+// // skip what we have after
+// if(PanelMode) count += 8 * (stride-offset-depth);
+// }
+// }
+
+ if(nr>=4)
+ {
+ for(Index j2=packet_cols8; j2<packet_cols4; j2+=4)
+ {
+ // skip what we have before
+ if(PanelMode) blockB += 4 * offset;
+
+ // TODO: each of these makes a copy of the stride :(
+ const LinearMapper dm0 = rhs.getLinearMapper(0, j2 + 0);
+ const LinearMapper dm1 = rhs.getLinearMapper(0, j2 + 1);
+ const LinearMapper dm2 = rhs.getLinearMapper(0, j2 + 2);
+ const LinearMapper dm3 = rhs.getLinearMapper(0, j2 + 3);
+
+ Index k=0;
+ if((PacketSize%4)==0) // TODO enable vectorized transposition for PacketSize==2 ??
+ {
+ for(; k<peeled_k; k+=PacketSize) {
+ PacketBlock<Packet, 4> kernel;
+ kernel.packet[0] = dm0.loadPacket(k);
+ kernel.packet[1] = dm1.loadPacket(k);
+ kernel.packet[2] = dm2.loadPacket(k);
+ kernel.packet[3] = dm3.loadPacket(k);
+ ptranspose(kernel);
+ pstoreu(blockB+0*PacketSize, cj.pconj(kernel.packet[0]));
+ pstoreu(blockB+1*PacketSize, cj.pconj(kernel.packet[1]));
+ pstoreu(blockB+2*PacketSize, cj.pconj(kernel.packet[2]));
+ pstoreu(blockB+3*PacketSize, cj.pconj(kernel.packet[3]));
+ blockB+=4*PacketSize;
+ }
+ }
+ for(; k<depth; k++)
+ {
+ blockB[0] = cj(dm0(k));
+ blockB[1] = cj(dm1(k));
+ blockB[2] = cj(dm2(k));
+ blockB[3] = cj(dm3(k));
+ blockB += 4;
+ }
+ // skip what we have after
+ if(PanelMode) blockB += 4 * (stride-offset-depth);
+ }
+ }
+
+ // copy the remaining columns one at a time (nr==1)
+ for(Index j2=packet_cols4; j2<cols; ++j2)
+ {
+ const LinearMapper dm0 = rhs.getLinearMapper(0, j2);
+ if(PanelMode) blockB += offset;
+ for(Index k=0; k<depth; k++)
+ {
+ *blockB = cj(dm0(k));
+ blockB += 1;
+ }
+ if(PanelMode) blockB += (stride-offset-depth);
+ }
+}
+
+// this version is optimized for row major matrices
+template<typename Scalar, typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
+struct gemm_pack_rhs<Scalar, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>
+{
+ typedef typename packet_traits<Scalar>::type Packet;
+ typedef typename packet_traits<Scalar>::half HalfPacket;
+ typedef typename DataMapper::LinearMapper LinearMapper;
+ enum {
+ PacketSize = packet_traits<Scalar>::size,
+ HalfPacketSize = packet_traits<Scalar>::HasHalfPacket ? unpacket_traits<typename packet_traits<Scalar>::half>::size : 0
+ };
+ EIGEN_DONT_INLINE void operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0);
+};
+
+template<typename Scalar, typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>
+ ::operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)
+{
+ EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
+ EIGEN_UNUSED_VARIABLE(stride);
+ EIGEN_UNUSED_VARIABLE(offset);
+ eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+ conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+ Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
+ Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
+
+// if(nr>=8)
+// {
+// for(Index j2=0; j2<packet_cols8; j2+=8)
+// {
+// // skip what we have before
+// if(PanelMode) count += 8 * offset;
+// for(Index k=0; k<depth; k++)
+// {
+// if (PacketSize==8) {
+// Packet A = ploadu<Packet>(&rhs[k*rhsStride + j2]);
+// pstoreu(blockB+count, cj.pconj(A));
+// } else if (PacketSize==4) {
+// Packet A = ploadu<Packet>(&rhs[k*rhsStride + j2]);
+// Packet B = ploadu<Packet>(&rhs[k*rhsStride + j2 + PacketSize]);
+// pstoreu(blockB+count, cj.pconj(A));
+// pstoreu(blockB+count+PacketSize, cj.pconj(B));
+// } else {
+// const Scalar* b0 = &rhs[k*rhsStride + j2];
+// blockB[count+0] = cj(b0[0]);
+// blockB[count+1] = cj(b0[1]);
+// blockB[count+2] = cj(b0[2]);
+// blockB[count+3] = cj(b0[3]);
+// blockB[count+4] = cj(b0[4]);
+// blockB[count+5] = cj(b0[5]);
+// blockB[count+6] = cj(b0[6]);
+// blockB[count+7] = cj(b0[7]);
+// }
+// count += 8;
+// }
+// // skip what we have after
+// if(PanelMode) count += 8 * (stride-offset-depth);
+// }
+// }
+ if(nr>=4)
+ {
+ for(Index j2=packet_cols8; j2<packet_cols4; j2+=4)
+ {
+ // skip what we have before
+ if(PanelMode) blockB += 4 * offset;
+ for(Index k=0; k<depth; k++)
+ {
+ if (PacketSize==4) {
+ Packet A = rhs.loadPacket(k, j2);
+ pstore(blockB, cj.pconj(A));
+ blockB += PacketSize;
+ }
+ else if (HalfPacketSize==4) {
+ HalfPacket A = rhs.loadHalfPacket(k, j2);
+ pstore<Scalar, HalfPacket>(blockB, cj.pconj(A));
+ blockB += HalfPacketSize;
+ }
+ else {
+ const LinearMapper dm0 = rhs.getLinearMapper(k, j2);
+ blockB[0] = cj(dm0(0));
+ blockB[1] = cj(dm0(1));
+ blockB[2] = cj(dm0(2));
+ blockB[3] = cj(dm0(3));
+ blockB += 4;
+ }
+ }
+ // skip what we have after
+ if(PanelMode) blockB += 4 * (stride-offset-depth);
+ }
+ }
+ // copy the remaining columns one at a time (nr==1)
+ for(Index j2=packet_cols4; j2<cols; ++j2)
+ {
+ if(PanelMode) blockB += offset;
+ for(Index k=0; k<depth; k++)
+ {
+ *blockB = cj(rhs(k, j2));
+ blockB += 1;
+ }
+ if(PanelMode) blockB += stride-offset-depth;
+ }
+}
+
+} // end namespace internal
+
+/** \returns the currently set level 1 cpu cache size (in bytes) used to estimate the ideal blocking size parameters.
+ * \sa setCpuCacheSize */
+inline std::ptrdiff_t l1CacheSize()
+{
+ std::ptrdiff_t l1, l2, l3;
+ internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
+ return l1;
+}
+
+/** \returns the currently set level 2 cpu cache size (in bytes) used to estimate the ideal blocking size parameters.
+ * \sa setCpuCacheSize */
+inline std::ptrdiff_t l2CacheSize()
+{
+ std::ptrdiff_t l1, l2, l3;
+ internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
+ return l2;
+}
+
+/** \returns the currently set level 3 cpu cache size (in bytes) used to estimate the ideal blocking size parameters.
+ * \sa setCpuCacheSize */
+inline std::ptrdiff_t l3CacheSize()
+{
+ std::ptrdiff_t l1, l2, l3;
+ internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
+ return l3;
+}
+
+/** Set the cpu L1 and L2 cache sizes (in bytes).
+ * These values are use to adjust the size of the blocks
+ * for the algorithms working per blocks.
+ *
+ * \sa computeProductBlockingSizes */
+inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2, std::ptrdiff_t l3)
+{
+ internal::manage_caching_sizes(SetAction, &l1, &l2, &l3);
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_GENERAL_BLOCK_PANEL_H
diff --git a/third_party/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h b/third_party/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h
new file mode 100644
index 0000000000..c3715b1a39
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -0,0 +1,465 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_GENERAL_MATRIX_MATRIX_H
+#define EIGEN_GENERAL_MATRIX_MATRIX_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<typename _LhsScalar, typename _RhsScalar> class level3_blocking;
+
+/* Specialization for a row-major destination matrix => simple transposition of the product */
+template<
+ typename Index,
+ typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
+ typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
+struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor>
+{
+ typedef gebp_traits<RhsScalar,LhsScalar> Traits;
+
+ typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+ static EIGEN_STRONG_INLINE void run(
+ Index rows, Index cols, Index depth,
+ const LhsScalar* lhs, Index lhsStride,
+ const RhsScalar* rhs, Index rhsStride,
+ ResScalar* res, Index resStride,
+ ResScalar alpha,
+ level3_blocking<RhsScalar,LhsScalar>& blocking,
+ GemmParallelInfo<Index>* info = 0)
+ {
+ // transpose the product such that the result is column major
+ general_matrix_matrix_product<Index,
+ RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
+ LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
+ ColMajor>
+ ::run(cols,rows,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking,info);
+ }
+};
+
+/* Specialization for a col-major destination matrix
+ * => Blocking algorithm following Goto's paper */
+template<
+ typename Index,
+ typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
+ typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
+struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor>
+{
+
+typedef gebp_traits<LhsScalar,RhsScalar> Traits;
+
+typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+static void run(Index rows, Index cols, Index depth,
+ const LhsScalar* _lhs, Index lhsStride,
+ const RhsScalar* _rhs, Index rhsStride,
+ ResScalar* _res, Index resStride,
+ ResScalar alpha,
+ level3_blocking<LhsScalar,RhsScalar>& blocking,
+ GemmParallelInfo<Index>* info = 0)
+{
+ typedef const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> LhsMapper;
+ typedef const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> RhsMapper;
+ typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
+ LhsMapper lhs(_lhs,lhsStride);
+ RhsMapper rhs(_rhs,rhsStride);
+ ResMapper res(_res, resStride);
+
+ Index kc = blocking.kc(); // cache block size along the K direction
+ Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
+ Index nc = (std::min)(cols,blocking.nc()); // cache block size along the N direction
+
+ gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
+ gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
+ gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
+
+#ifdef EIGEN_HAS_OPENMP
+ if(info)
+ {
+ // this is the parallel version!
+ Index tid = omp_get_thread_num();
+ Index threads = omp_get_num_threads();
+
+ LhsScalar* blockA = blocking.blockA();
+ eigen_internal_assert(blockA!=0);
+
+ std::size_t sizeB = kc*nc;
+ ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, 0);
+
+ // For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...
+ for(Index k=0; k<depth; k+=kc)
+ {
+ const Index actual_kc = (std::min)(k+kc,depth)-k; // => rows of B', and cols of the A'
+
+ // In order to reduce the chance that a thread has to wait for the other,
+ // let's start by packing B'.
+ pack_rhs(blockB, rhs.getSubMapper(k,0), actual_kc, nc);
+
+ // Pack A_k to A' in a parallel fashion:
+ // each thread packs the sub block A_k,i to A'_i where i is the thread id.
+
+ // However, before copying to A'_i, we have to make sure that no other thread is still using it,
+ // i.e., we test that info[tid].users equals 0.
+ // Then, we set info[tid].users to the number of threads to mark that all other threads are going to use it.
+ while(info[tid].users!=0) {}
+ info[tid].users += threads;
+
+ pack_lhs(blockA+info[tid].lhs_start*actual_kc, lhs.getSubMapper(info[tid].lhs_start,k), actual_kc, info[tid].lhs_length);
+
+ // Notify the other threads that the part A'_i is ready to go.
+ info[tid].sync = k;
+
+ // Computes C_i += A' * B' per A'_i
+ for(Index shift=0; shift<threads; ++shift)
+ {
+ Index i = (tid+shift)%threads;
+
+ // At this point we have to make sure that A'_i has been updated by the thread i,
+ // we use testAndSetOrdered to mimic a volatile access.
+ // However, no need to wait for the B' part which has been updated by the current thread!
+ if (shift>0) {
+ while(info[i].sync!=k) {
+ }
+ }
+
+ gebp(res.getSubMapper(info[i].lhs_start, 0), blockA+info[i].lhs_start*actual_kc, blockB, info[i].lhs_length, actual_kc, nc, alpha);
+ }
+
+ // Then keep going as usual with the remaining B'
+ for(Index j=nc; j<cols; j+=nc)
+ {
+ const Index actual_nc = (std::min)(j+nc,cols)-j;
+
+ // pack B_k,j to B'
+ pack_rhs(blockB, rhs.getSubMapper(k,j), actual_kc, actual_nc);
+
+ // C_j += A' * B'
+ gebp(res.getSubMapper(0, j), blockA, blockB, rows, actual_kc, actual_nc, alpha);
+ }
+
+ // Release all the sub blocks A'_i of A' for the current thread,
+ // i.e., we simply decrement the number of users by 1
+ #pragma omp critical
+ {
+ for(Index i=0; i<threads; ++i)
+ #pragma omp atomic
+ --(info[i].users);
+ }
+ }
+ }
+ else
+#endif // EIGEN_HAS_OPENMP
+ {
+ EIGEN_UNUSED_VARIABLE(info);
+
+ // this is the sequential version!
+ std::size_t sizeA = kc*mc;
+ std::size_t sizeB = kc*nc;
+
+ ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
+ ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
+
+ const bool pack_rhs_once = mc!=rows && kc==depth && nc==cols;
+
+ // For each horizontal panel of the rhs, and corresponding panel of the lhs...
+ for(Index i2=0; i2<rows; i2+=mc)
+ {
+ const Index actual_mc = (std::min)(i2+mc,rows)-i2;
+
+ for(Index k2=0; k2<depth; k2+=kc)
+ {
+ const Index actual_kc = (std::min)(k2+kc,depth)-k2;
+
+ // OK, here we have selected one horizontal panel of rhs and one vertical panel of lhs.
+ // => Pack lhs's panel into a sequential chunk of memory (L2/L3 caching)
+ // Note that this panel will be read as many times as the number of blocks in the rhs's
+ // horizontal panel which is, in practice, a very low number.
+ pack_lhs(blockA, lhs.getSubMapper(i2,k2), actual_kc, actual_mc);
+
+ // For each kc x nc block of the rhs's horizontal panel...
+ for(Index j2=0; j2<cols; j2+=nc)
+ {
+ const Index actual_nc = (std::min)(j2+nc,cols)-j2;
+
+ // We pack the rhs's block into a sequential chunk of memory (L2 caching)
+ // Note that this block will be read a very high number of times, which is equal to the number of
+ // micro horizontal panel of the large rhs's panel (e.g., rows/12 times).
+ if((!pack_rhs_once) || i2==0)
+ pack_rhs(blockB, rhs.getSubMapper(k2,j2), actual_kc, actual_nc);
+
+ // Everything is packed, we can now call the panel * block kernel:
+ gebp(res.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, alpha);
+ }
+ }
+ }
+ }
+}
+
+};
+
+/*********************************************************************************
+* Specialization of GeneralProduct<> for "large" GEMM, i.e.,
+* implementation of the high level wrapper to general_matrix_matrix_product
+**********************************************************************************/
+
+template<typename Lhs, typename Rhs>
+struct traits<GeneralProduct<Lhs,Rhs,GemmProduct> >
+ : traits<ProductBase<GeneralProduct<Lhs,Rhs,GemmProduct>, Lhs, Rhs> >
+{};
+
+template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType>
+struct gemm_functor
+{
+ gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, const Scalar& actualAlpha, BlockingType& blocking)
+ : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
+ {}
+
+ void initParallelSession() const
+ {
+ m_blocking.allocateA();
+ }
+
+ void operator() (Index row, Index rows, Index col=0, Index cols=-1, GemmParallelInfo<Index>* info=0) const
+ {
+ if(cols==-1)
+ cols = m_rhs.cols();
+
+ Gemm::run(rows, cols, m_lhs.cols(),
+ /*(const Scalar*)*/&m_lhs.coeffRef(row,0), m_lhs.outerStride(),
+ /*(const Scalar*)*/&m_rhs.coeffRef(0,col), m_rhs.outerStride(),
+ (Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(),
+ m_actualAlpha, m_blocking, info);
+ }
+
+ typedef typename Gemm::Traits Traits;
+
+ protected:
+ const Lhs& m_lhs;
+ const Rhs& m_rhs;
+ Dest& m_dest;
+ Scalar m_actualAlpha;
+ BlockingType& m_blocking;
+};
+
+template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor=1,
+bool FiniteAtCompileTime = MaxRows!=Dynamic && MaxCols!=Dynamic && MaxDepth != Dynamic> class gemm_blocking_space;
+
+template<typename _LhsScalar, typename _RhsScalar>
+class level3_blocking
+{
+ typedef _LhsScalar LhsScalar;
+ typedef _RhsScalar RhsScalar;
+
+ protected:
+ LhsScalar* m_blockA;
+ RhsScalar* m_blockB;
+
+ DenseIndex m_mc;
+ DenseIndex m_nc;
+ DenseIndex m_kc;
+
+ public:
+
+ level3_blocking()
+ : m_blockA(0), m_blockB(0), m_mc(0), m_nc(0), m_kc(0)
+ {}
+
+ inline DenseIndex mc() const { return m_mc; }
+ inline DenseIndex nc() const { return m_nc; }
+ inline DenseIndex kc() const { return m_kc; }
+
+ inline LhsScalar* blockA() { return m_blockA; }
+ inline RhsScalar* blockB() { return m_blockB; }
+};
+
+template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
+class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true>
+ : public level3_blocking<
+ typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
+ typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
+{
+ enum {
+ Transpose = StorageOrder==RowMajor,
+ ActualRows = Transpose ? MaxCols : MaxRows,
+ ActualCols = Transpose ? MaxRows : MaxCols
+ };
+ typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;
+ typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;
+ typedef gebp_traits<LhsScalar,RhsScalar> Traits;
+ enum {
+ SizeA = ActualRows * MaxDepth,
+ SizeB = ActualCols * MaxDepth
+ };
+
+ EIGEN_ALIGN_DEFAULT LhsScalar m_staticA[SizeA];
+ EIGEN_ALIGN_DEFAULT RhsScalar m_staticB[SizeB];
+
+ public:
+
+ gemm_blocking_space(DenseIndex /*rows*/, DenseIndex /*cols*/, DenseIndex /*depth*/, int /*num_threads*/, bool /*full_rows = false*/)
+ {
+ this->m_mc = ActualRows;
+ this->m_nc = ActualCols;
+ this->m_kc = MaxDepth;
+ this->m_blockA = m_staticA;
+ this->m_blockB = m_staticB;
+ }
+
+ inline void allocateA() {}
+ inline void allocateB() {}
+ inline void allocateAll() {}
+};
+
+template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
+class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, false>
+ : public level3_blocking<
+ typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
+ typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
+{
+ enum {
+ Transpose = StorageOrder==RowMajor
+ };
+ typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;
+ typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;
+ typedef gebp_traits<LhsScalar,RhsScalar> Traits;
+
+ DenseIndex m_sizeA;
+ DenseIndex m_sizeB;
+
+ public:
+
+ gemm_blocking_space(DenseIndex rows, DenseIndex cols, DenseIndex depth, DenseIndex num_threads, bool l3_blocking)
+ {
+ this->m_mc = Transpose ? cols : rows;
+ this->m_nc = Transpose ? rows : cols;
+ this->m_kc = depth;
+
+ if(l3_blocking)
+ {
+ computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, this->m_nc, num_threads);
+ }
+ else // no l3 blocking
+ {
+ DenseIndex m = this->m_mc;
+ DenseIndex n = this->m_nc;
+ computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, n, num_threads);
+ }
+
+ m_sizeA = this->m_mc * this->m_kc;
+ m_sizeB = this->m_kc * this->m_nc;
+ }
+
+ void allocateA()
+ {
+ if(this->m_blockA==0)
+ this->m_blockA = aligned_new<LhsScalar>(m_sizeA);
+ }
+
+ void allocateB()
+ {
+ if(this->m_blockB==0)
+ this->m_blockB = aligned_new<RhsScalar>(m_sizeB);
+ }
+
+ void allocateAll()
+ {
+ allocateA();
+ allocateB();
+ }
+
+ ~gemm_blocking_space()
+ {
+ aligned_delete(this->m_blockA, m_sizeA);
+ aligned_delete(this->m_blockB, m_sizeB);
+ }
+};
+
+} // end namespace internal
+
+template<typename Lhs, typename Rhs>
+class GeneralProduct<Lhs, Rhs, GemmProduct>
+ : public ProductBase<GeneralProduct<Lhs,Rhs,GemmProduct>, Lhs, Rhs>
+{
+ enum {
+ MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime)
+ };
+ public:
+ EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
+
+ typedef typename Lhs::Scalar LhsScalar;
+ typedef typename Rhs::Scalar RhsScalar;
+ typedef Scalar ResScalar;
+
+ GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
+ {
+ typedef internal::scalar_product_op<LhsScalar,RhsScalar> BinOp;
+ EIGEN_CHECK_BINARY_COMPATIBILIY(BinOp,LhsScalar,RhsScalar);
+ }
+
+ template<typename Dest>
+ inline void evalTo(Dest& dst) const
+ {
+ if((m_rhs.rows()+dst.rows()+dst.cols())<20 && m_rhs.rows()>0)
+ dst.noalias() = m_lhs .lazyProduct( m_rhs );
+ else
+ {
+ dst.setZero();
+ scaleAndAddTo(dst,Scalar(1));
+ }
+ }
+
+ template<typename Dest>
+ inline void addTo(Dest& dst) const
+ {
+ if((m_rhs.rows()+dst.rows()+dst.cols())<20 && m_rhs.rows()>0)
+ dst.noalias() += m_lhs .lazyProduct( m_rhs );
+ else
+ scaleAndAddTo(dst,Scalar(1));
+ }
+
+ template<typename Dest>
+ inline void subTo(Dest& dst) const
+ {
+ if((m_rhs.rows()+dst.rows()+dst.cols())<20 && m_rhs.rows()>0)
+ dst.noalias() -= m_lhs .lazyProduct( m_rhs );
+ else
+ scaleAndAddTo(dst,Scalar(-1));
+ }
+
+ template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
+ {
+ eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
+
+ typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
+ typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
+
+ Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
+ * RhsBlasTraits::extractScalarFactor(m_rhs);
+
+ typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar,
+ Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType;
+
+ typedef internal::gemm_functor<
+ Scalar, Index,
+ internal::general_matrix_matrix_product<
+ Index,
+ LhsScalar, (_ActualLhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate),
+ RhsScalar, (_ActualRhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate),
+ (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>,
+ _ActualLhsType, _ActualRhsType, Dest, BlockingType> GemmFunctor;
+
+ BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), 1, true);
+
+ internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>(GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), this->rows(), this->cols(), Dest::Flags&RowMajorBit);
+ }
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_GENERAL_MATRIX_MATRIX_H
diff --git a/third_party/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/third_party/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
new file mode 100644
index 0000000000..e4c10e88d1
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
@@ -0,0 +1,285 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
+#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
+
+namespace Eigen {
+
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs>
+struct selfadjoint_rank1_update;
+
+namespace internal {
+
+/**********************************************************************
+* This file implements a general A * B product while
+* evaluating only one triangular part of the product.
+* This is more general version of self adjoint product (C += A A^T)
+* as the level 3 SYRK Blas routine.
+**********************************************************************/
+
+// forward declarations (defined at the end of this file)
+template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo>
+struct tribb_kernel;
+
+/* Optimized matrix-matrix product evaluating only one triangular half */
+template <typename Index,
+ typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
+ typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
+ int ResStorageOrder, int UpLo, int Version = Specialized>
+struct general_matrix_matrix_triangular_product;
+
+// as usual if the result is row major => we transpose the product
+template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
+ typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version>
+struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo,Version>
+{
+ typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+ static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
+ const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha)
+ {
+ general_matrix_matrix_triangular_product<Index,
+ RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
+ LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
+ ColMajor, UpLo==Lower?Upper:Lower>
+ ::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha);
+ }
+};
+
+template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
+ typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version>
+struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Version>
+{
+ typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+ static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
+ const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride, const ResScalar& alpha)
+ {
+ typedef gebp_traits<LhsScalar,RhsScalar> Traits;
+
+ typedef const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> LhsMapper;
+ typedef const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> RhsMapper;
+ typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
+ LhsMapper lhs(_lhs,lhsStride);
+ RhsMapper rhs(_rhs,rhsStride);
+ ResMapper res(_res, resStride);
+
+ Index kc = depth; // cache block size along the K direction
+ Index mc = size; // cache block size along the M direction
+ Index nc = size; // cache block size along the N direction
+ computeProductBlockingSizes<LhsScalar,RhsScalar>(kc, mc, nc, Index(1));
+ // !!! mc must be a multiple of nr:
+ if(mc > Traits::nr)
+ mc = (mc/Traits::nr)*Traits::nr;
+
+ ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, kc*mc, 0);
+ ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, kc*size, 0);
+
+ gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
+ gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
+ gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
+ tribb_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs, UpLo> sybb;
+
+ for(Index k2=0; k2<depth; k2+=kc)
+ {
+ const Index actual_kc = (std::min)(k2+kc,depth)-k2;
+
+ // note that the actual rhs is the transpose/adjoint of mat
+ pack_rhs(blockB, rhs.getSubMapper(k2,0), actual_kc, size);
+
+ for(Index i2=0; i2<size; i2+=mc)
+ {
+ const Index actual_mc = (std::min)(i2+mc,size)-i2;
+
+ pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
+
+ // the selected actual_mc * size panel of res is split into three different part:
+ // 1 - before the diagonal => processed with gebp or skipped
+ // 2 - the actual_mc x actual_mc symmetric block => processed with a special kernel
+ // 3 - after the diagonal => processed with gebp or skipped
+ if (UpLo==Lower)
+ gebp(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc,
+ (std::min)(size,i2), alpha, -1, -1, 0, 0);
+
+
+ sybb(_res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha);
+
+ if (UpLo==Upper)
+ {
+ Index j2 = i2+actual_mc;
+ gebp(res.getSubMapper(i2, j2), blockA, blockB+actual_kc*j2, actual_mc,
+ actual_kc, (std::max)(Index(0), size-j2), alpha, -1, -1, 0, 0);
+ }
+ }
+ }
+ }
+};
+
+// Optimized packed Block * packed Block product kernel evaluating only one given triangular part
+// This kernel is built on top of the gebp kernel:
+// - the current destination block is processed per panel of actual_mc x BlockSize
+// where BlockSize is set to the minimal value allowing gebp to be as fast as possible
+// - then, as usual, each panel is split into three parts along the diagonal,
+// the sub blocks above and below the diagonal are processed as usual,
+// while the triangular block overlapping the diagonal is evaluated into a
+// small temporary buffer which is then accumulated into the result using a
+// triangular traversal.
+template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo>
+struct tribb_kernel
+{
+ typedef gebp_traits<LhsScalar,RhsScalar,ConjLhs,ConjRhs> Traits;
+ typedef typename Traits::ResScalar ResScalar;
+
+ enum {
+ BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr)
+ };
+ void operator()(ResScalar* _res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha)
+ {
+ typedef blas_data_mapper<ResScalar, Index, ColMajor> ResMapper;
+ ResMapper res(_res, resStride);
+ gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, mr, nr, ConjLhs, ConjRhs> gebp_kernel;
+
+ Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer;
+
+ // let's process the block per panel of actual_mc x BlockSize,
+ // again, each is split into three parts, etc.
+ for (Index j=0; j<size; j+=BlockSize)
+ {
+ Index actualBlockSize = std::min<Index>(BlockSize,size - j);
+ const RhsScalar* actual_b = blockB+j*depth;
+
+ if(UpLo==Upper)
+ gebp_kernel(res.getSubMapper(0, j), blockA, actual_b, j, depth, actualBlockSize, alpha,
+ -1, -1, 0, 0);
+
+ // selfadjoint micro block
+ {
+ Index i = j;
+ buffer.setZero();
+ // 1 - apply the kernel on the temporary buffer
+ gebp_kernel(ResMapper(buffer.data(), BlockSize), blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha,
+ -1, -1, 0, 0);
+ // 2 - triangular accumulation
+ for(Index j1=0; j1<actualBlockSize; ++j1)
+ {
+ ResScalar* r = &res(i, j + j1);
+ for(Index i1=UpLo==Lower ? j1 : 0;
+ UpLo==Lower ? i1<actualBlockSize : i1<=j1; ++i1)
+ r[i1] += buffer(i1,j1);
+ }
+ }
+
+ if(UpLo==Lower)
+ {
+ Index i = j+actualBlockSize;
+ gebp_kernel(res.getSubMapper(i, j), blockA+depth*i, actual_b, size-i,
+ depth, actualBlockSize, alpha, -1, -1, 0, 0);
+ }
+ }
+ }
+};
+
+} // end namespace internal
+
+// high level API
+
+template<typename MatrixType, typename ProductType, int UpLo, bool IsOuterProduct>
+struct general_product_to_triangular_selector;
+
+
+template<typename MatrixType, typename ProductType, int UpLo>
+struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,true>
+{
+ static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha)
+ {
+ typedef typename MatrixType::Scalar Scalar;
+ typedef typename MatrixType::Index Index;
+
+ typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
+ typedef internal::blas_traits<Lhs> LhsBlasTraits;
+ typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
+ typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
+ typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+
+ typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;
+ typedef internal::blas_traits<Rhs> RhsBlasTraits;
+ typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
+ typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
+ typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+
+ Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
+
+ enum {
+ StorageOrder = (internal::traits<MatrixType>::Flags&RowMajorBit) ? RowMajor : ColMajor,
+ UseLhsDirectly = _ActualLhs::InnerStrideAtCompileTime==1,
+ UseRhsDirectly = _ActualRhs::InnerStrideAtCompileTime==1
+ };
+
+ internal::gemv_static_vector_if<Scalar,Lhs::SizeAtCompileTime,Lhs::MaxSizeAtCompileTime,!UseLhsDirectly> static_lhs;
+ ei_declare_aligned_stack_constructed_variable(Scalar, actualLhsPtr, actualLhs.size(),
+ (UseLhsDirectly ? const_cast<Scalar*>(actualLhs.data()) : static_lhs.data()));
+ if(!UseLhsDirectly) Map<typename _ActualLhs::PlainObject>(actualLhsPtr, actualLhs.size()) = actualLhs;
+
+ internal::gemv_static_vector_if<Scalar,Rhs::SizeAtCompileTime,Rhs::MaxSizeAtCompileTime,!UseRhsDirectly> static_rhs;
+ ei_declare_aligned_stack_constructed_variable(Scalar, actualRhsPtr, actualRhs.size(),
+ (UseRhsDirectly ? const_cast<Scalar*>(actualRhs.data()) : static_rhs.data()));
+ if(!UseRhsDirectly) Map<typename _ActualRhs::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+
+
+ selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
+ LhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
+ RhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex>
+ ::run(actualLhs.size(), mat.data(), mat.outerStride(), actualLhsPtr, actualRhsPtr, actualAlpha);
+ }
+};
+
+template<typename MatrixType, typename ProductType, int UpLo>
+struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
+{
+ static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha)
+ {
+ typedef typename MatrixType::Index Index;
+
+ typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
+ typedef internal::blas_traits<Lhs> LhsBlasTraits;
+ typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
+ typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
+ typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+
+ typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;
+ typedef internal::blas_traits<Rhs> RhsBlasTraits;
+ typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
+ typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
+ typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+
+ typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
+
+ internal::general_matrix_matrix_triangular_product<Index,
+ typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
+ typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
+ MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
+ ::run(mat.cols(), actualLhs.cols(),
+ &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
+ mat.data(), mat.outerStride(), actualAlpha);
+ }
+};
+
+template<typename MatrixType, unsigned int UpLo>
+template<typename ProductDerived, typename _Lhs, typename _Rhs>
+TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(const ProductBase<ProductDerived, _Lhs,_Rhs>& prod, const Scalar& alpha)
+{
+ eigen_assert(m_matrix.rows() == prod.rows() && m_matrix.cols() == prod.cols());
+
+ general_product_to_triangular_selector<MatrixType, ProductDerived, UpLo, (_Lhs::ColsAtCompileTime==1) || (_Rhs::RowsAtCompileTime==1)>::run(m_matrix.const_cast_derived(), prod.derived(), alpha);
+
+ return *this;
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
diff --git a/third_party/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h b/third_party/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h
new file mode 100644
index 0000000000..3deed068e3
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h
@@ -0,0 +1,146 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ * Content : Eigen bindings to Intel(R) MKL
+ * Level 3 BLAS SYRK/HERK implementation.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
+#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
+
+namespace Eigen {
+
+namespace internal {
+
+template <typename Index, typename Scalar, int AStorageOrder, bool ConjugateA, int ResStorageOrder, int UpLo>
+struct general_matrix_matrix_rankupdate :
+ general_matrix_matrix_triangular_product<
+ Index,Scalar,AStorageOrder,ConjugateA,Scalar,AStorageOrder,ConjugateA,ResStorageOrder,UpLo,BuiltIn> {};
+
+
+// try to go to BLAS specialization
+#define EIGEN_MKL_RANKUPDATE_SPECIALIZE(Scalar) \
+template <typename Index, int LhsStorageOrder, bool ConjugateLhs, \
+ int RhsStorageOrder, bool ConjugateRhs, int UpLo> \
+struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,ConjugateLhs, \
+ Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Specialized> { \
+ static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
+ const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) \
+ { \
+ if (lhs==rhs) { \
+ general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
+ ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \
+ } else { \
+ general_matrix_matrix_triangular_product<Index, \
+ Scalar, LhsStorageOrder, ConjugateLhs, \
+ Scalar, RhsStorageOrder, ConjugateRhs, \
+ ColMajor, UpLo, BuiltIn> \
+ ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \
+ } \
+ } \
+};
+
+EIGEN_MKL_RANKUPDATE_SPECIALIZE(double)
+//EIGEN_MKL_RANKUPDATE_SPECIALIZE(dcomplex)
+EIGEN_MKL_RANKUPDATE_SPECIALIZE(float)
+//EIGEN_MKL_RANKUPDATE_SPECIALIZE(scomplex)
+
+// SYRK for float/double
+#define EIGEN_MKL_RANKUPDATE_R(EIGTYPE, MKLTYPE, MKLFUNC) \
+template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \
+struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
+ enum { \
+ IsLower = (UpLo&Lower) == Lower, \
+ LowUp = IsLower ? Lower : Upper, \
+ conjA = ((AStorageOrder==ColMajor) && ConjugateA) ? 1 : 0 \
+ }; \
+ static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
+ const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \
+ { \
+ /* typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs;*/ \
+\
+ MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \
+ char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'T':'N'; \
+ MKLTYPE alpha_, beta_; \
+\
+/* Set alpha_ & beta_ */ \
+ assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
+ assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
+ MKLFUNC(&uplo, &trans, &n, &k, &alpha_, lhs, &lda, &beta_, res, &ldc); \
+ } \
+};
+
+// HERK for complex data
+#define EIGEN_MKL_RANKUPDATE_C(EIGTYPE, MKLTYPE, RTYPE, MKLFUNC) \
+template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \
+struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
+ enum { \
+ IsLower = (UpLo&Lower) == Lower, \
+ LowUp = IsLower ? Lower : Upper, \
+ conjA = (((AStorageOrder==ColMajor) && ConjugateA) || ((AStorageOrder==RowMajor) && !ConjugateA)) ? 1 : 0 \
+ }; \
+ static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
+ const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \
+ { \
+ typedef Matrix<EIGTYPE, Dynamic, Dynamic, AStorageOrder> MatrixType; \
+\
+ MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \
+ char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'C':'N'; \
+ RTYPE alpha_, beta_; \
+ const EIGTYPE* a_ptr; \
+\
+/* Set alpha_ & beta_ */ \
+/* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); */\
+/* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1));*/ \
+ alpha_ = alpha.real(); \
+ beta_ = 1.0; \
+/* Copy with conjugation in some cases*/ \
+ MatrixType a; \
+ if (conjA) { \
+ Map<const MatrixType, 0, OuterStride<> > mapA(lhs,n,k,OuterStride<>(lhsStride)); \
+ a = mapA.conjugate(); \
+ lda = a.outerStride(); \
+ a_ptr = a.data(); \
+ } else a_ptr=lhs; \
+ MKLFUNC(&uplo, &trans, &n, &k, &alpha_, (MKLTYPE*)a_ptr, &lda, &beta_, (MKLTYPE*)res, &ldc); \
+ } \
+};
+
+
+EIGEN_MKL_RANKUPDATE_R(double, double, dsyrk)
+EIGEN_MKL_RANKUPDATE_R(float, float, ssyrk)
+
+//EIGEN_MKL_RANKUPDATE_C(dcomplex, MKL_Complex16, double, zherk)
+//EIGEN_MKL_RANKUPDATE_C(scomplex, MKL_Complex8, double, cherk)
+
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
diff --git a/third_party/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h b/third_party/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h
new file mode 100644
index 0000000000..060af328eb
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h
@@ -0,0 +1,118 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ * Content : Eigen bindings to Intel(R) MKL
+ * General matrix-matrix product functionality based on ?GEMM.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
+#define EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
+
+namespace Eigen {
+
+namespace internal {
+
+/**********************************************************************
+* This file implements general matrix-matrix multiplication using BLAS
+* gemm function via partial specialization of
+* general_matrix_matrix_product::run(..) method for float, double,
+* std::complex<float> and std::complex<double> types
+**********************************************************************/
+
+// gemm specialization
+
+#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, MKLTYPE, MKLPREFIX) \
+template< \
+ typename Index, \
+ int LhsStorageOrder, bool ConjugateLhs, \
+ int RhsStorageOrder, bool ConjugateRhs> \
+struct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor> \
+{ \
+static void run(Index rows, Index cols, Index depth, \
+ const EIGTYPE* _lhs, Index lhsStride, \
+ const EIGTYPE* _rhs, Index rhsStride, \
+ EIGTYPE* res, Index resStride, \
+ EIGTYPE alpha, \
+ level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/, \
+ GemmParallelInfo<Index>* /*info = 0*/) \
+{ \
+ using std::conj; \
+\
+ char transa, transb; \
+ MKL_INT m, n, k, lda, ldb, ldc; \
+ const EIGTYPE *a, *b; \
+ MKLTYPE alpha_, beta_; \
+ MatrixX##EIGPREFIX a_tmp, b_tmp; \
+ EIGTYPE myone(1);\
+\
+/* Set transpose options */ \
+ transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
+ transb = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
+\
+/* Set m, n, k */ \
+ m = (MKL_INT)rows; \
+ n = (MKL_INT)cols; \
+ k = (MKL_INT)depth; \
+\
+/* Set alpha_ & beta_ */ \
+ assign_scalar_eig2mkl(alpha_, alpha); \
+ assign_scalar_eig2mkl(beta_, myone); \
+\
+/* Set lda, ldb, ldc */ \
+ lda = (MKL_INT)lhsStride; \
+ ldb = (MKL_INT)rhsStride; \
+ ldc = (MKL_INT)resStride; \
+\
+/* Set a, b, c */ \
+ if ((LhsStorageOrder==ColMajor) && (ConjugateLhs)) { \
+ Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,k,OuterStride<>(lhsStride)); \
+ a_tmp = lhs.conjugate(); \
+ a = a_tmp.data(); \
+ lda = a_tmp.outerStride(); \
+ } else a = _lhs; \
+\
+ if ((RhsStorageOrder==ColMajor) && (ConjugateRhs)) { \
+ Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,k,n,OuterStride<>(rhsStride)); \
+ b_tmp = rhs.conjugate(); \
+ b = b_tmp.data(); \
+ ldb = b_tmp.outerStride(); \
+ } else b = _rhs; \
+\
+ MKLPREFIX##gemm(&transa, &transb, &m, &n, &k, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+}};
+
+GEMM_SPECIALIZATION(double, d, double, d)
+GEMM_SPECIALIZATION(float, f, float, s)
+GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, z)
+GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, c)
+
+} // end namespase internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
diff --git a/third_party/eigen3/Eigen/src/Core/products/GeneralMatrixVector.h b/third_party/eigen3/Eigen/src/Core/products/GeneralMatrixVector.h
new file mode 100644
index 0000000000..cb67d5d0a9
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -0,0 +1,618 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_GENERAL_MATRIX_VECTOR_H
+#define EIGEN_GENERAL_MATRIX_VECTOR_H
+
+namespace Eigen {
+
+namespace internal {
+
+/* Optimized col-major matrix * vector product:
+ * This algorithm processes 4 columns at onces that allows to both reduce
+ * the number of load/stores of the result by a factor 4 and to reduce
+ * the instruction dependency. Moreover, we know that all bands have the
+ * same alignment pattern.
+ *
+ * Mixing type logic: C += alpha * A * B
+ * | A | B |alpha| comments
+ * |real |cplx |cplx | no vectorization
+ * |real |cplx |real | alpha is converted to a cplx when calling the run function, no vectorization
+ * |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp
+ * |cplx |real |real | optimal case, vectorization possible via real-cplx mul
+ *
+ * Accesses to the matrix coefficients follow the following logic:
+ *
+ * - if all columns have the same alignment then
+ * - if the columns have the same alignment as the result vector, then easy! (-> AllAligned case)
+ * - otherwise perform unaligned loads only (-> NoneAligned case)
+ * - otherwise
+ * - if even columns have the same alignment then
+ * // odd columns are guaranteed to have the same alignment too
+ * - if even or odd columns have the same alignment as the result, then
+ * // for a register size of 2 scalars, this is guarantee to be the case (e.g., SSE with double)
+ * - perform half aligned and half unaligned loads (-> EvenAligned case)
+ * - otherwise perform unaligned loads only (-> NoneAligned case)
+ * - otherwise, if the register size is 4 scalars (e.g., SSE with float) then
+ * - one over 4 consecutive columns is guaranteed to be aligned with the result vector,
+ * perform simple aligned loads for this column and aligned loads plus re-alignment for the other. (-> FirstAligned case)
+ * // this re-alignment is done by the palign function implemented for SSE in Eigen/src/Core/arch/SSE/PacketMath.h
+ * - otherwise,
+ * // if we get here, this means the register size is greater than 4 (e.g., AVX with floats),
+ * // we currently fall back to the NoneAligned case
+ *
+ * The same reasoning apply for the transposed case.
+ *
+ * The last case (PacketSize>4) could probably be improved by generalizing the FirstAligned case, but since we do not support AVX yet...
+ * One might also wonder why in the EvenAligned case we perform unaligned loads instead of using the aligned-loads plus re-alignment
+ * strategy as in the FirstAligned case. The reason is that we observed that unaligned loads on a 8 byte boundary are not too slow
+ * compared to unaligned loads on a 4 byte boundary.
+ *
+ */
+template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
+struct general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>
+{
+ typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+
+enum {
+ Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable
+ && int(packet_traits<LhsScalar>::size)==int(packet_traits<RhsScalar>::size),
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1
+};
+
+typedef typename packet_traits<LhsScalar>::type _LhsPacket;
+typedef typename packet_traits<RhsScalar>::type _RhsPacket;
+typedef typename packet_traits<ResScalar>::type _ResPacket;
+
+typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
+typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
+typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+
+EIGEN_DONT_INLINE static void run(
+ Index rows, Index cols,
+ const LhsMapper& lhs,
+ const RhsMapper& rhs,
+ ResScalar* res, Index resIncr,
+ RhsScalar alpha);
+};
+
+template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run(
+ Index rows, Index cols,
+ const LhsMapper& lhs,
+ const RhsMapper& rhs,
+ ResScalar* res, Index resIncr,
+ RhsScalar alpha)
+{
+ EIGEN_UNUSED_VARIABLE(resIncr);
+ eigen_internal_assert(resIncr==1);
+ #ifdef _EIGEN_ACCUMULATE_PACKETS
+ #error _EIGEN_ACCUMULATE_PACKETS has already been defined
+ #endif
+ #define _EIGEN_ACCUMULATE_PACKETS(Alignment0,Alignment13,Alignment2) \
+ pstore(&res[j], \
+ padd(pload<ResPacket>(&res[j]), \
+ padd( \
+ padd(pcj.pmul(lhs0.template load<LhsPacket, Alignment0>(j), ptmp0), \
+ pcj.pmul(lhs1.template load<LhsPacket, Alignment13>(j), ptmp1)), \
+ padd(pcj.pmul(lhs2.template load<LhsPacket, Alignment2>(j), ptmp2), \
+ pcj.pmul(lhs3.template load<LhsPacket, Alignment13>(j), ptmp3)) )))
+
+ typedef typename LhsMapper::VectorMapper LhsScalars;
+
+ conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
+ conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
+ if(ConjugateRhs)
+ alpha = numext::conj(alpha);
+
+ enum { AllAligned = 0, EvenAligned, FirstAligned, NoneAligned };
+ const Index columnsAtOnce = 4;
+ const Index peels = 2;
+ const Index LhsPacketAlignedMask = LhsPacketSize-1;
+ const Index ResPacketAlignedMask = ResPacketSize-1;
+// const Index PeelAlignedMask = ResPacketSize*peels-1;
+ const Index size = rows;
+
+ const Index lhsStride = lhs.stride();
+
+ // How many coeffs of the result do we have to skip to be aligned.
+ // Here we assume data are at least aligned on the base scalar type.
+ Index alignedStart = internal::first_aligned(res,size);
+ Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
+ const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1;
+
+ const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
+ Index alignmentPattern = alignmentStep==0 ? AllAligned
+ : alignmentStep==(LhsPacketSize/2) ? EvenAligned
+ : FirstAligned;
+
+ // we cannot assume the first element is aligned because of sub-matrices
+ const Index lhsAlignmentOffset = lhs.firstAligned(size);
+
+ // find how many columns do we have to skip to be aligned with the result (if possible)
+ Index skipColumns = 0;
+ // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats)
+ if( (lhsAlignmentOffset < 0) || (lhsAlignmentOffset == size) || (size_t(res)%sizeof(ResScalar)) )
+ {
+ alignedSize = 0;
+ alignedStart = 0;
+ alignmentPattern = NoneAligned;
+ }
+ else if(LhsPacketSize > 4)
+ {
+ // TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4.
+ // Currently, it seems to be better to perform unaligned loads anyway
+ alignmentPattern = NoneAligned;
+ }
+ else if (LhsPacketSize>1)
+ {
+ // eigen_internal_assert(size_t(firstLhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size<LhsPacketSize);
+
+ while (skipColumns<LhsPacketSize &&
+ alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%LhsPacketSize))
+ ++skipColumns;
+ if (skipColumns==LhsPacketSize)
+ {
+ // nothing can be aligned, no need to skip any column
+ alignmentPattern = NoneAligned;
+ skipColumns = 0;
+ }
+ else
+ {
+ skipColumns = (std::min)(skipColumns,cols);
+ // note that the skiped columns are processed later.
+ }
+
+ /* eigen_internal_assert( (alignmentPattern==NoneAligned)
+ || (skipColumns + columnsAtOnce >= cols)
+ || LhsPacketSize > size
+ || (size_t(firstLhs+alignedStart+lhsStride*skipColumns)%sizeof(LhsPacket))==0);*/
+ }
+ else if(Vectorizable)
+ {
+ alignedStart = 0;
+ alignedSize = size;
+ alignmentPattern = AllAligned;
+ }
+
+ const Index offset1 = (FirstAligned && alignmentStep==1?3:1);
+ const Index offset3 = (FirstAligned && alignmentStep==1?1:3);
+
+ Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
+ for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
+ {
+ RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs(i, 0)),
+ ptmp1 = pset1<RhsPacket>(alpha*rhs(i+offset1, 0)),
+ ptmp2 = pset1<RhsPacket>(alpha*rhs(i+2, 0)),
+ ptmp3 = pset1<RhsPacket>(alpha*rhs(i+offset3, 0));
+
+ // this helps a lot generating better binary code
+ const LhsScalars lhs0 = lhs.getVectorMapper(0, i+0), lhs1 = lhs.getVectorMapper(0, i+offset1),
+ lhs2 = lhs.getVectorMapper(0, i+2), lhs3 = lhs.getVectorMapper(0, i+offset3);
+
+ if (Vectorizable)
+ {
+ /* explicit vectorization */
+ // process initial unaligned coeffs
+ for (Index j=0; j<alignedStart; ++j)
+ {
+ res[j] = cj.pmadd(lhs0(j), pfirst(ptmp0), res[j]);
+ res[j] = cj.pmadd(lhs1(j), pfirst(ptmp1), res[j]);
+ res[j] = cj.pmadd(lhs2(j), pfirst(ptmp2), res[j]);
+ res[j] = cj.pmadd(lhs3(j), pfirst(ptmp3), res[j]);
+ }
+
+ if (alignedSize>alignedStart)
+ {
+ switch(alignmentPattern)
+ {
+ case AllAligned:
+ for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
+ _EIGEN_ACCUMULATE_PACKETS(Aligned,Aligned,Aligned);
+ break;
+ case EvenAligned:
+ for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
+ _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Aligned);
+ break;
+ case FirstAligned:
+ {
+ Index j = alignedStart;
+ if(peels>1)
+ {
+ LhsPacket A00, A01, A02, A03, A10, A11, A12, A13;
+ ResPacket T0, T1;
+
+ A01 = lhs1.template load<LhsPacket, Aligned>(alignedStart-1);
+ A02 = lhs2.template load<LhsPacket, Aligned>(alignedStart-2);
+ A03 = lhs3.template load<LhsPacket, Aligned>(alignedStart-3);
+
+ for (; j<peeledSize; j+=peels*ResPacketSize)
+ {
+ A11 = lhs1.template load<LhsPacket, Aligned>(j-1+LhsPacketSize); palign<1>(A01,A11);
+ A12 = lhs2.template load<LhsPacket, Aligned>(j-2+LhsPacketSize); palign<2>(A02,A12);
+ A13 = lhs3.template load<LhsPacket, Aligned>(j-3+LhsPacketSize); palign<3>(A03,A13);
+
+ A00 = lhs0.template load<LhsPacket, Aligned>(j);
+ A10 = lhs0.template load<LhsPacket, Aligned>(j+LhsPacketSize);
+ T0 = pcj.pmadd(A00, ptmp0, pload<ResPacket>(&res[j]));
+ T1 = pcj.pmadd(A10, ptmp0, pload<ResPacket>(&res[j+ResPacketSize]));
+
+ T0 = pcj.pmadd(A01, ptmp1, T0);
+ A01 = lhs1.template load<LhsPacket, Aligned>(j-1+2*LhsPacketSize); palign<1>(A11,A01);
+ T0 = pcj.pmadd(A02, ptmp2, T0);
+ A02 = lhs2.template load<LhsPacket, Aligned>(j-2+2*LhsPacketSize); palign<2>(A12,A02);
+ T0 = pcj.pmadd(A03, ptmp3, T0);
+ pstore(&res[j],T0);
+ A03 = lhs3.template load<LhsPacket, Aligned>(j-3+2*LhsPacketSize); palign<3>(A13,A03);
+ T1 = pcj.pmadd(A11, ptmp1, T1);
+ T1 = pcj.pmadd(A12, ptmp2, T1);
+ T1 = pcj.pmadd(A13, ptmp3, T1);
+ pstore(&res[j+ResPacketSize],T1);
+ }
+ }
+ for (; j<alignedSize; j+=ResPacketSize)
+ _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Unaligned);
+ break;
+ }
+ default:
+ for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
+ _EIGEN_ACCUMULATE_PACKETS(Unaligned,Unaligned,Unaligned);
+ break;
+ }
+ }
+ } // end explicit vectorization
+
+ /* process remaining coeffs (or all if there is no explicit vectorization) */
+ for (Index j=alignedSize; j<size; ++j)
+ {
+ res[j] = cj.pmadd(lhs0(j), pfirst(ptmp0), res[j]);
+ res[j] = cj.pmadd(lhs1(j), pfirst(ptmp1), res[j]);
+ res[j] = cj.pmadd(lhs2(j), pfirst(ptmp2), res[j]);
+ res[j] = cj.pmadd(lhs3(j), pfirst(ptmp3), res[j]);
+ }
+ }
+
+ // process remaining first and last columns (at most columnsAtOnce-1)
+ Index end = cols;
+ Index start = columnBound;
+ do
+ {
+ for (Index k=start; k<end; ++k)
+ {
+ RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs(k, 0));
+ const LhsScalars lhs0 = lhs.getVectorMapper(0, k);
+
+ if (Vectorizable)
+ {
+ /* explicit vectorization */
+ // process first unaligned result's coeffs
+ for (Index j=0; j<alignedStart; ++j)
+ res[j] += cj.pmul(lhs0(j), pfirst(ptmp0));
+ // process aligned result's coeffs
+ if (lhs0.template aligned<LhsPacket>(alignedStart))
+ for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
+ pstore(&res[i], pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(i), ptmp0, pload<ResPacket>(&res[i])));
+ else
+ for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
+ pstore(&res[i], pcj.pmadd(lhs0.template load<LhsPacket, Unaligned>(i), ptmp0, pload<ResPacket>(&res[i])));
+ }
+
+ // process remaining scalars (or all if no explicit vectorization)
+ for (Index i=alignedSize; i<size; ++i)
+ res[i] += cj.pmul(lhs0(i), pfirst(ptmp0));
+ }
+ if (skipColumns)
+ {
+ start = 0;
+ end = skipColumns;
+ skipColumns = 0;
+ }
+ else
+ break;
+ } while(Vectorizable);
+ #undef _EIGEN_ACCUMULATE_PACKETS
+}
+
+/* Optimized row-major matrix * vector product:
+ * This algorithm processes 4 rows at onces that allows to both reduce
+ * the number of load/stores of the result by a factor 4 and to reduce
+ * the instruction dependency. Moreover, we know that all bands have the
+ * same alignment pattern.
+ *
+ * Mixing type logic:
+ * - alpha is always a complex (or converted to a complex)
+ * - no vectorization
+ */
+template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
+struct general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>
+{
+typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+
+enum {
+ Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable
+ && int(packet_traits<LhsScalar>::size)==int(packet_traits<RhsScalar>::size),
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1
+};
+
+typedef typename packet_traits<LhsScalar>::type _LhsPacket;
+typedef typename packet_traits<RhsScalar>::type _RhsPacket;
+typedef typename packet_traits<ResScalar>::type _ResPacket;
+
+typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
+typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
+typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+
+EIGEN_DONT_INLINE static void run(
+ Index rows, Index cols,
+ const LhsMapper& lhs,
+ const RhsMapper& rhs,
+ ResScalar* res, Index resIncr,
+ ResScalar alpha);
+};
+
+template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run(
+ Index rows, Index cols,
+ const LhsMapper& lhs,
+ const RhsMapper& rhs,
+ ResScalar* res, Index resIncr,
+ ResScalar alpha)
+{
+ eigen_internal_assert(rhs.stride()==1);
+
+ #ifdef _EIGEN_ACCUMULATE_PACKETS
+ #error _EIGEN_ACCUMULATE_PACKETS has already been defined
+ #endif
+
+ #define _EIGEN_ACCUMULATE_PACKETS(Alignment0,Alignment13,Alignment2) {\
+ RhsPacket b = rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0); \
+ ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Alignment0>(j), b, ptmp0); \
+ ptmp1 = pcj.pmadd(lhs1.template load<LhsPacket, Alignment13>(j), b, ptmp1); \
+ ptmp2 = pcj.pmadd(lhs2.template load<LhsPacket, Alignment2>(j), b, ptmp2); \
+ ptmp3 = pcj.pmadd(lhs3.template load<LhsPacket, Alignment13>(j), b, ptmp3); }
+
+ conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
+ conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
+
+ typedef typename LhsMapper::VectorMapper LhsScalars;
+
+ enum { AllAligned=0, EvenAligned=1, FirstAligned=2, NoneAligned=3 };
+ const Index rowsAtOnce = 4;
+ const Index peels = 2;
+ const Index RhsPacketAlignedMask = RhsPacketSize-1;
+ const Index LhsPacketAlignedMask = LhsPacketSize-1;
+ const Index depth = cols;
+ const Index lhsStride = lhs.stride();
+
+ // How many coeffs of the result do we have to skip to be aligned.
+ // Here we assume data are at least aligned on the base scalar type
+ // if that's not the case then vectorization is discarded, see below.
+ Index alignedStart = rhs.firstAligned(depth);
+ Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0;
+ const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1;
+
+ const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
+ Index alignmentPattern = alignmentStep==0 ? AllAligned
+ : alignmentStep==(LhsPacketSize/2) ? EvenAligned
+ : FirstAligned;
+
+ // we cannot assume the first element is aligned because of sub-matrices
+ const Index lhsAlignmentOffset = lhs.firstAligned(depth);
+ const Index rhsAlignmentOffset = rhs.firstAligned(rows);
+
+ // find how many rows do we have to skip to be aligned with rhs (if possible)
+ Index skipRows = 0;
+ // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats)
+ if( (sizeof(LhsScalar)!=sizeof(RhsScalar))
+ || (lhsAlignmentOffset < 0) || (lhsAlignmentOffset == depth)
+ || (rhsAlignmentOffset < 0) || (rhsAlignmentOffset == rows))
+ {
+ alignedSize = 0;
+ alignedStart = 0;
+ alignmentPattern = NoneAligned;
+ }
+ else if(LhsPacketSize > 4)
+ {
+ // TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4.
+ alignmentPattern = NoneAligned;
+ }
+ else if (LhsPacketSize>1)
+ {
+ // eigen_internal_assert(size_t(firstLhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || depth<LhsPacketSize);
+
+ while (skipRows<LhsPacketSize &&
+ alignedStart != ((lhsAlignmentOffset + alignmentStep*skipRows)%LhsPacketSize))
+ ++skipRows;
+ if (skipRows==LhsPacketSize)
+ {
+ // nothing can be aligned, no need to skip any column
+ alignmentPattern = NoneAligned;
+ skipRows = 0;
+ }
+ else
+ {
+ skipRows = (std::min)(skipRows,Index(rows));
+ // note that the skiped columns are processed later.
+ }
+ /* eigen_internal_assert( alignmentPattern==NoneAligned
+ || LhsPacketSize==1
+ || (skipRows + rowsAtOnce >= rows)
+ || LhsPacketSize > depth
+ || (size_t(firstLhs+alignedStart+lhsStride*skipRows)%sizeof(LhsPacket))==0);*/
+ }
+ else if(Vectorizable)
+ {
+ alignedStart = 0;
+ alignedSize = depth;
+ alignmentPattern = AllAligned;
+ }
+
+ const Index offset1 = (FirstAligned && alignmentStep==1?3:1);
+ const Index offset3 = (FirstAligned && alignmentStep==1?1:3);
+
+ Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
+ for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
+ {
+ EIGEN_ALIGN_DEFAULT ResScalar tmp0 = ResScalar(0);
+ ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0);
+
+ // this helps the compiler generating good binary code
+ const LhsScalars lhs0 = lhs.getVectorMapper(i+0, 0), lhs1 = lhs.getVectorMapper(i+offset1, 0),
+ lhs2 = lhs.getVectorMapper(i+2, 0), lhs3 = lhs.getVectorMapper(i+offset3, 0);
+
+ if (Vectorizable)
+ {
+ /* explicit vectorization */
+ ResPacket ptmp0 = pset1<ResPacket>(ResScalar(0)), ptmp1 = pset1<ResPacket>(ResScalar(0)),
+ ptmp2 = pset1<ResPacket>(ResScalar(0)), ptmp3 = pset1<ResPacket>(ResScalar(0));
+
+ // process initial unaligned coeffs
+ // FIXME this loop get vectorized by the compiler !
+ for (Index j=0; j<alignedStart; ++j)
+ {
+ RhsScalar b = rhs(j, 0);
+ tmp0 += cj.pmul(lhs0(j),b); tmp1 += cj.pmul(lhs1(j),b);
+ tmp2 += cj.pmul(lhs2(j),b); tmp3 += cj.pmul(lhs3(j),b);
+ }
+
+ if (alignedSize>alignedStart)
+ {
+ switch(alignmentPattern)
+ {
+ case AllAligned:
+ for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
+ _EIGEN_ACCUMULATE_PACKETS(Aligned,Aligned,Aligned);
+ break;
+ case EvenAligned:
+ for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
+ _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Aligned);
+ break;
+ case FirstAligned:
+ {
+ Index j = alignedStart;
+ if (peels>1)
+ {
+ /* Here we proccess 4 rows with with two peeled iterations to hide
+ * the overhead of unaligned loads. Moreover unaligned loads are handled
+ * using special shift/move operations between the two aligned packets
+ * overlaping the desired unaligned packet. This is *much* more efficient
+ * than basic unaligned loads.
+ */
+ LhsPacket A01, A02, A03, A11, A12, A13;
+ A01 = lhs1.template load<LhsPacket, Aligned>(alignedStart-1);
+ A02 = lhs2.template load<LhsPacket, Aligned>(alignedStart-2);
+ A03 = lhs3.template load<LhsPacket, Aligned>(alignedStart-3);
+
+ for (; j<peeledSize; j+=peels*RhsPacketSize)
+ {
+ RhsPacket b = rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0);
+ A11 = lhs1.template load<LhsPacket, Aligned>(j-1+LhsPacketSize); palign<1>(A01,A11);
+ A12 = lhs2.template load<LhsPacket, Aligned>(j-2+LhsPacketSize); palign<2>(A02,A12);
+ A13 = lhs3.template load<LhsPacket, Aligned>(j-3+LhsPacketSize); palign<3>(A03,A13);
+
+ ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(j), b, ptmp0);
+ ptmp1 = pcj.pmadd(A01, b, ptmp1);
+ A01 = lhs1.template load<LhsPacket, Aligned>(j-1+2*LhsPacketSize); palign<1>(A11,A01);
+ ptmp2 = pcj.pmadd(A02, b, ptmp2);
+ A02 = lhs2.template load<LhsPacket, Aligned>(j-2+2*LhsPacketSize); palign<2>(A12,A02);
+ ptmp3 = pcj.pmadd(A03, b, ptmp3);
+ A03 = lhs3.template load<LhsPacket, Aligned>(j-3+2*LhsPacketSize); palign<3>(A13,A03);
+
+ b = rhs.getVectorMapper(j+RhsPacketSize, 0).template load<RhsPacket, Aligned>(0);
+ ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(j+LhsPacketSize), b, ptmp0);
+ ptmp1 = pcj.pmadd(A11, b, ptmp1);
+ ptmp2 = pcj.pmadd(A12, b, ptmp2);
+ ptmp3 = pcj.pmadd(A13, b, ptmp3);
+ }
+ }
+ for (; j<alignedSize; j+=RhsPacketSize)
+ _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Unaligned);
+ break;
+ }
+ default:
+ for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
+ _EIGEN_ACCUMULATE_PACKETS(Unaligned,Unaligned,Unaligned);
+ break;
+ }
+ tmp0 += predux(ptmp0);
+ tmp1 += predux(ptmp1);
+ tmp2 += predux(ptmp2);
+ tmp3 += predux(ptmp3);
+ }
+ } // end explicit vectorization
+
+ // process remaining coeffs (or all if no explicit vectorization)
+ // FIXME this loop get vectorized by the compiler !
+ for (Index j=alignedSize; j<depth; ++j)
+ {
+ RhsScalar b = rhs(j, 0);
+ tmp0 += cj.pmul(lhs0(j),b); tmp1 += cj.pmul(lhs1(j),b);
+ tmp2 += cj.pmul(lhs2(j),b); tmp3 += cj.pmul(lhs3(j),b);
+ }
+ res[i*resIncr] += alpha*tmp0;
+ res[(i+offset1)*resIncr] += alpha*tmp1;
+ res[(i+2)*resIncr] += alpha*tmp2;
+ res[(i+offset3)*resIncr] += alpha*tmp3;
+ }
+
+ // process remaining first and last rows (at most columnsAtOnce-1)
+ Index end = rows;
+ Index start = rowBound;
+ do
+ {
+ for (Index i=start; i<end; ++i)
+ {
+ EIGEN_ALIGN_DEFAULT ResScalar tmp0 = ResScalar(0);
+ ResPacket ptmp0 = pset1<ResPacket>(tmp0);
+ const LhsScalars lhs0 = lhs.getVectorMapper(i, 0);
+ // process first unaligned result's coeffs
+ // FIXME this loop get vectorized by the compiler !
+ for (Index j=0; j<alignedStart; ++j)
+ tmp0 += cj.pmul(lhs0(j), rhs(j, 0));
+
+ if (alignedSize>alignedStart)
+ {
+ // process aligned rhs coeffs
+ if (lhs0.template aligned<LhsPacket>(alignedStart))
+ for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
+ ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(j), rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0), ptmp0);
+ else
+ for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
+ ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Unaligned>(j), rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0), ptmp0);
+ tmp0 += predux(ptmp0);
+ }
+
+ // process remaining scalars
+ // FIXME this loop get vectorized by the compiler !
+ for (Index j=alignedSize; j<depth; ++j)
+ tmp0 += cj.pmul(lhs0(j), rhs(j, 0));
+ res[i*resIncr] += alpha*tmp0;
+ }
+ if (skipRows)
+ {
+ start = 0;
+ end = skipRows;
+ skipRows = 0;
+ }
+ else
+ break;
+ } while(Vectorizable);
+
+ #undef _EIGEN_ACCUMULATE_PACKETS
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_GENERAL_MATRIX_VECTOR_H
diff --git a/third_party/eigen3/Eigen/src/Core/products/GeneralMatrixVector_MKL.h b/third_party/eigen3/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
new file mode 100644
index 0000000000..1cb9fe6b5a
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
@@ -0,0 +1,131 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ * Content : Eigen bindings to Intel(R) MKL
+ * General matrix-vector product functionality based on ?GEMV.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
+#define EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
+
+namespace Eigen {
+
+namespace internal {
+
+/**********************************************************************
+* This file implements general matrix-vector multiplication using BLAS
+* gemv function via partial specialization of
+* general_matrix_vector_product::run(..) method for float, double,
+* std::complex<float> and std::complex<double> types
+**********************************************************************/
+
+// gemv specialization
+
+template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
+struct general_matrix_vector_product_gemv :
+ general_matrix_vector_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,ConjugateRhs,BuiltIn> {};
+
+#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \
+template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
+struct general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
+static void run( \
+ Index rows, Index cols, \
+ const Scalar* lhs, Index lhsStride, \
+ const Scalar* rhs, Index rhsIncr, \
+ Scalar* res, Index resIncr, Scalar alpha) \
+{ \
+ if (ConjugateLhs) { \
+ general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,BuiltIn>::run( \
+ rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
+ } else { \
+ general_matrix_vector_product_gemv<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
+ rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
+ } \
+} \
+}; \
+template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
+struct general_matrix_vector_product<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
+static void run( \
+ Index rows, Index cols, \
+ const Scalar* lhs, Index lhsStride, \
+ const Scalar* rhs, Index rhsIncr, \
+ Scalar* res, Index resIncr, Scalar alpha) \
+{ \
+ general_matrix_vector_product_gemv<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
+ rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
+} \
+}; \
+
+EIGEN_MKL_GEMV_SPECIALIZE(double)
+EIGEN_MKL_GEMV_SPECIALIZE(float)
+EIGEN_MKL_GEMV_SPECIALIZE(dcomplex)
+EIGEN_MKL_GEMV_SPECIALIZE(scomplex)
+
+#define EIGEN_MKL_GEMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLPREFIX) \
+template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
+struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
+{ \
+typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> GEMVVector;\
+\
+static void run( \
+ Index rows, Index cols, \
+ const EIGTYPE* lhs, Index lhsStride, \
+ const EIGTYPE* rhs, Index rhsIncr, \
+ EIGTYPE* res, Index resIncr, EIGTYPE alpha) \
+{ \
+ MKL_INT m=rows, n=cols, lda=lhsStride, incx=rhsIncr, incy=resIncr; \
+ MKLTYPE alpha_, beta_; \
+ const EIGTYPE *x_ptr, myone(1); \
+ char trans=(LhsStorageOrder==ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \
+ if (LhsStorageOrder==RowMajor) { \
+ m=cols; \
+ n=rows; \
+ }\
+ assign_scalar_eig2mkl(alpha_, alpha); \
+ assign_scalar_eig2mkl(beta_, myone); \
+ GEMVVector x_tmp; \
+ if (ConjugateRhs) { \
+ Map<const GEMVVector, 0, InnerStride<> > map_x(rhs,cols,1,InnerStride<>(incx)); \
+ x_tmp=map_x.conjugate(); \
+ x_ptr=x_tmp.data(); \
+ incx=1; \
+ } else x_ptr=rhs; \
+ MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \
+}\
+};
+
+EIGEN_MKL_GEMV_SPECIALIZATION(double, double, d)
+EIGEN_MKL_GEMV_SPECIALIZATION(float, float, s)
+EIGEN_MKL_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, z)
+EIGEN_MKL_GEMV_SPECIALIZATION(scomplex, MKL_Complex8, c)
+
+} // end namespase internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
diff --git a/third_party/eigen3/Eigen/src/Core/products/Parallelizer.h b/third_party/eigen3/Eigen/src/Core/products/Parallelizer.h
new file mode 100644
index 0000000000..837e69415b
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/products/Parallelizer.h
@@ -0,0 +1,158 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PARALLELIZER_H
+#define EIGEN_PARALLELIZER_H
+
+namespace Eigen {
+
+namespace internal {
+
+/** \internal */
+inline void manage_multi_threading(Action action, int* v)
+{
+ static EIGEN_UNUSED int m_maxThreads = -1;
+
+ if(action==SetAction)
+ {
+ eigen_internal_assert(v!=0);
+ m_maxThreads = *v;
+ }
+ else if(action==GetAction)
+ {
+ eigen_internal_assert(v!=0);
+ #ifdef EIGEN_HAS_OPENMP
+ if(m_maxThreads>0)
+ *v = m_maxThreads;
+ else
+ *v = omp_get_max_threads();
+ #else
+ *v = 1;
+ #endif
+ }
+ else
+ {
+ eigen_internal_assert(false);
+ }
+}
+
+}
+
+/** Must be call first when calling Eigen from multiple threads */
+inline void initParallel()
+{
+ int nbt;
+ internal::manage_multi_threading(GetAction, &nbt);
+ std::ptrdiff_t l1, l2, l3;
+ internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
+}
+
+/** \returns the max number of threads reserved for Eigen
+ * \sa setNbThreads */
+inline int nbThreads()
+{
+ int ret;
+ internal::manage_multi_threading(GetAction, &ret);
+ return ret;
+}
+
+/** Sets the max number of threads reserved for Eigen
+ * \sa nbThreads */
+inline void setNbThreads(int v)
+{
+ internal::manage_multi_threading(SetAction, &v);
+}
+
+namespace internal {
+
+template<typename Index> struct GemmParallelInfo
+{
+ GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
+
+ int volatile sync;
+ int volatile users;
+
+ Index lhs_start;
+ Index lhs_length;
+};
+
+template<bool Condition, typename Functor, typename Index>
+void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
+{
+ // TODO when EIGEN_USE_BLAS is defined,
+ // we should still enable OMP for other scalar types
+#if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
+ // FIXME the transpose variable is only needed to properly split
+ // the matrix product when multithreading is enabled. This is a temporary
+ // fix to support row-major destination matrices. This whole
+ // parallelizer mechanism has to be redisigned anyway.
+ EIGEN_UNUSED_VARIABLE(transpose);
+ func(0,rows, 0,cols);
+#else
+
+ // Dynamically check whether we should enable or disable OpenMP.
+ // The conditions are:
+ // - the max number of threads we can create is greater than 1
+ // - we are not already in a parallel code
+ // - the sizes are large enough
+
+ // 1- are we already in a parallel session?
+ // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?
+ if((!Condition) || (omp_get_num_threads()>1))
+ return func(0,rows, 0,cols);
+
+ Index size = transpose ? rows : cols;
+
+ // 2- compute the maximal number of threads from the size of the product:
+ // FIXME this has to be fine tuned
+ Index max_threads = std::max<Index>(1,size / 32);
+
+ // 3 - compute the number of threads we are going to use
+ Index threads = std::min<Index>(nbThreads(), max_threads);
+
+ if(threads==1)
+ return func(0,rows, 0,cols);
+
+ Eigen::initParallel();
+ func.initParallelSession();
+
+ if(transpose)
+ std::swap(rows,cols);
+
+ Index blockCols = (cols / threads) & ~Index(0x3);
+ Index blockRows = (rows / threads);
+ blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
+
+ GemmParallelInfo<Index>* info = new GemmParallelInfo<Index>[threads];
+
+ #pragma omp parallel num_threads(threads)
+ {
+ Index i = omp_get_thread_num();
+ Index r0 = i*blockRows;
+ Index actualBlockRows = (i+1==threads) ? rows-r0 : blockRows;
+
+ Index c0 = i*blockCols;
+ Index actualBlockCols = (i+1==threads) ? cols-c0 : blockCols;
+
+ info[i].lhs_start = r0;
+ info[i].lhs_length = actualBlockRows;
+
+ if(transpose) func(c0, actualBlockCols, 0, rows, info);
+ else func(0, rows, c0, actualBlockCols, info);
+ }
+
+ delete[] info;
+#endif
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_PARALLELIZER_H
diff --git a/third_party/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/third_party/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
new file mode 100644
index 0000000000..4a60ef7dc5
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
@@ -0,0 +1,523 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
+#define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
+
+namespace Eigen {
+
+namespace internal {
+
+// pack a selfadjoint block diagonal for use with the gebp_kernel
+template<typename Scalar, typename Index, int Pack1, int Pack2_dummy, int StorageOrder>
+struct symm_pack_lhs
+{
+ template<int BlockRows> inline
+ void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
+ {
+ // normal copy
+ for(Index k=0; k<i; k++)
+ for(Index w=0; w<BlockRows; w++)
+ blockA[count++] = lhs(i+w,k); // normal
+ // symmetric copy
+ Index h = 0;
+ for(Index k=i; k<i+BlockRows; k++)
+ {
+ for(Index w=0; w<h; w++)
+ blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
+
+ blockA[count++] = numext::real(lhs(k,k)); // real (diagonal)
+
+ for(Index w=h+1; w<BlockRows; w++)
+ blockA[count++] = lhs(i+w, k); // normal
+ ++h;
+ }
+ // transposed copy
+ for(Index k=i+BlockRows; k<cols; k++)
+ for(Index w=0; w<BlockRows; w++)
+ blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
+ }
+ void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
+ {
+ enum { PacketSize = packet_traits<Scalar>::size };
+ const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
+ Index count = 0;
+ //Index peeled_mc3 = (rows/Pack1)*Pack1;
+
+ const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
+ const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;
+ const Index peeled_mc1 = Pack1>=1*PacketSize ? (rows/(1*PacketSize))*(1*PacketSize) : 0;
+
+ if(Pack1>=3*PacketSize)
+ for(Index i=0; i<peeled_mc3; i+=3*PacketSize)
+ pack<3*PacketSize>(blockA, lhs, cols, i, count);
+
+ if(Pack1>=2*PacketSize)
+ for(Index i=peeled_mc3; i<peeled_mc2; i+=2*PacketSize)
+ pack<2*PacketSize>(blockA, lhs, cols, i, count);
+
+ if(Pack1>=1*PacketSize)
+ for(Index i=peeled_mc2; i<peeled_mc1; i+=1*PacketSize)
+ pack<1*PacketSize>(blockA, lhs, cols, i, count);
+
+ // do the same with mr==1
+ for(Index i=peeled_mc1; i<rows; i++)
+ {
+ for(Index k=0; k<i; k++)
+ blockA[count++] = lhs(i, k); // normal
+
+ blockA[count++] = numext::real(lhs(i, i)); // real (diagonal)
+
+ for(Index k=i+1; k<cols; k++)
+ blockA[count++] = numext::conj(lhs(k, i)); // transposed
+ }
+ }
+};
+
+template<typename Scalar, typename Index, int nr, int StorageOrder>
+struct symm_pack_rhs
+{
+ enum { PacketSize = packet_traits<Scalar>::size };
+ void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
+ {
+ Index end_k = k2 + rows;
+ Index count = 0;
+ const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
+ Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
+ Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
+
+ // first part: normal case
+ for(Index j2=0; j2<k2; j2+=nr)
+ {
+ for(Index k=k2; k<end_k; k++)
+ {
+ blockB[count+0] = rhs(k,j2+0);
+ blockB[count+1] = rhs(k,j2+1);
+ if (nr>=4)
+ {
+ blockB[count+2] = rhs(k,j2+2);
+ blockB[count+3] = rhs(k,j2+3);
+ }
+ if (nr>=8)
+ {
+ blockB[count+4] = rhs(k,j2+4);
+ blockB[count+5] = rhs(k,j2+5);
+ blockB[count+6] = rhs(k,j2+6);
+ blockB[count+7] = rhs(k,j2+7);
+ }
+ count += nr;
+ }
+ }
+
+ // second part: diagonal block
+ Index end8 = nr>=8 ? (std::min)(k2+rows,packet_cols8) : k2;
+ if(nr>=8)
+ {
+ for(Index j2=k2; j2<end8; j2+=8)
+ {
+ // again we can split vertically in three different parts (transpose, symmetric, normal)
+ // transpose
+ for(Index k=k2; k<j2; k++)
+ {
+ blockB[count+0] = numext::conj(rhs(j2+0,k));
+ blockB[count+1] = numext::conj(rhs(j2+1,k));
+ blockB[count+2] = numext::conj(rhs(j2+2,k));
+ blockB[count+3] = numext::conj(rhs(j2+3,k));
+ blockB[count+4] = numext::conj(rhs(j2+4,k));
+ blockB[count+5] = numext::conj(rhs(j2+5,k));
+ blockB[count+6] = numext::conj(rhs(j2+6,k));
+ blockB[count+7] = numext::conj(rhs(j2+7,k));
+ count += 8;
+ }
+ // symmetric
+ Index h = 0;
+ for(Index k=j2; k<j2+8; k++)
+ {
+ // normal
+ for (Index w=0 ; w<h; ++w)
+ blockB[count+w] = rhs(k,j2+w);
+
+ blockB[count+h] = numext::real(rhs(k,k));
+
+ // transpose
+ for (Index w=h+1 ; w<8; ++w)
+ blockB[count+w] = numext::conj(rhs(j2+w,k));
+ count += 8;
+ ++h;
+ }
+ // normal
+ for(Index k=j2+8; k<end_k; k++)
+ {
+ blockB[count+0] = rhs(k,j2+0);
+ blockB[count+1] = rhs(k,j2+1);
+ blockB[count+2] = rhs(k,j2+2);
+ blockB[count+3] = rhs(k,j2+3);
+ blockB[count+4] = rhs(k,j2+4);
+ blockB[count+5] = rhs(k,j2+5);
+ blockB[count+6] = rhs(k,j2+6);
+ blockB[count+7] = rhs(k,j2+7);
+ count += 8;
+ }
+ }
+ }
+ if(nr>=4)
+ {
+ for(Index j2=end8; j2<(std::min)(k2+rows,packet_cols4); j2+=4)
+ {
+ // again we can split vertically in three different parts (transpose, symmetric, normal)
+ // transpose
+ for(Index k=k2; k<j2; k++)
+ {
+ blockB[count+0] = numext::conj(rhs(j2+0,k));
+ blockB[count+1] = numext::conj(rhs(j2+1,k));
+ blockB[count+2] = numext::conj(rhs(j2+2,k));
+ blockB[count+3] = numext::conj(rhs(j2+3,k));
+ count += 4;
+ }
+ // symmetric
+ Index h = 0;
+ for(Index k=j2; k<j2+4; k++)
+ {
+ // normal
+ for (Index w=0 ; w<h; ++w)
+ blockB[count+w] = rhs(k,j2+w);
+
+ blockB[count+h] = numext::real(rhs(k,k));
+
+ // transpose
+ for (Index w=h+1 ; w<4; ++w)
+ blockB[count+w] = numext::conj(rhs(j2+w,k));
+ count += 4;
+ ++h;
+ }
+ // normal
+ for(Index k=j2+4; k<end_k; k++)
+ {
+ blockB[count+0] = rhs(k,j2+0);
+ blockB[count+1] = rhs(k,j2+1);
+ blockB[count+2] = rhs(k,j2+2);
+ blockB[count+3] = rhs(k,j2+3);
+ count += 4;
+ }
+ }
+ }
+
+ // third part: transposed
+ if(nr>=8)
+ {
+ for(Index j2=k2+rows; j2<packet_cols8; j2+=8)
+ {
+ for(Index k=k2; k<end_k; k++)
+ {
+ blockB[count+0] = numext::conj(rhs(j2+0,k));
+ blockB[count+1] = numext::conj(rhs(j2+1,k));
+ blockB[count+2] = numext::conj(rhs(j2+2,k));
+ blockB[count+3] = numext::conj(rhs(j2+3,k));
+ blockB[count+4] = numext::conj(rhs(j2+4,k));
+ blockB[count+5] = numext::conj(rhs(j2+5,k));
+ blockB[count+6] = numext::conj(rhs(j2+6,k));
+ blockB[count+7] = numext::conj(rhs(j2+7,k));
+ count += 8;
+ }
+ }
+ }
+ if(nr>=4)
+ {
+ for(Index j2=(std::max)(packet_cols8,k2+rows); j2<packet_cols4; j2+=4)
+ {
+ for(Index k=k2; k<end_k; k++)
+ {
+ blockB[count+0] = numext::conj(rhs(j2+0,k));
+ blockB[count+1] = numext::conj(rhs(j2+1,k));
+ blockB[count+2] = numext::conj(rhs(j2+2,k));
+ blockB[count+3] = numext::conj(rhs(j2+3,k));
+ count += 4;
+ }
+ }
+ }
+
+ // copy the remaining columns one at a time (=> the same with nr==1)
+ for(Index j2=packet_cols4; j2<cols; ++j2)
+ {
+ // transpose
+ Index half = (std::min)(end_k,j2);
+ for(Index k=k2; k<half; k++)
+ {
+ blockB[count] = numext::conj(rhs(j2,k));
+ count += 1;
+ }
+
+ if(half==j2 && half<k2+rows)
+ {
+ blockB[count] = numext::real(rhs(j2,j2));
+ count += 1;
+ }
+ else
+ half--;
+
+ // normal
+ for(Index k=half+1; k<k2+rows; k++)
+ {
+ blockB[count] = rhs(k,j2);
+ count += 1;
+ }
+ }
+ }
+};
+
+/* Optimized selfadjoint matrix * matrix (_SYMM) product built on top of
+ * the general matrix matrix product.
+ */
+template <typename Scalar, typename Index,
+ int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
+ int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
+ int ResStorageOrder>
+struct product_selfadjoint_matrix;
+
+template <typename Scalar, typename Index,
+ int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
+ int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs>
+struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor>
+{
+
+ static EIGEN_STRONG_INLINE void run(
+ Index rows, Index cols,
+ const Scalar* lhs, Index lhsStride,
+ const Scalar* rhs, Index rhsStride,
+ Scalar* res, Index resStride,
+ const Scalar& alpha)
+ {
+ product_selfadjoint_matrix<Scalar, Index,
+ EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
+ RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
+ EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
+ LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
+ ColMajor>
+ ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha);
+ }
+};
+
+template <typename Scalar, typename Index,
+ int LhsStorageOrder, bool ConjugateLhs,
+ int RhsStorageOrder, bool ConjugateRhs>
+struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>
+{
+
+ static EIGEN_DONT_INLINE void run(
+ Index rows, Index cols,
+ const Scalar* _lhs, Index lhsStride,
+ const Scalar* _rhs, Index rhsStride,
+ Scalar* res, Index resStride,
+ const Scalar& alpha);
+};
+
+template <typename Scalar, typename Index,
+ int LhsStorageOrder, bool ConjugateLhs,
+ int RhsStorageOrder, bool ConjugateRhs>
+EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>::run(
+ Index rows, Index cols,
+ const Scalar* _lhs, Index lhsStride,
+ const Scalar* _rhs, Index rhsStride,
+ Scalar* _res, Index resStride,
+ const Scalar& alpha)
+ {
+ Index size = rows;
+
+ typedef gebp_traits<Scalar,Scalar> Traits;
+
+ typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
+ typedef const_blas_data_mapper<Scalar, Index, (LhsStorageOrder == RowMajor) ? ColMajor : RowMajor> LhsTransposeMapper;
+ typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
+ typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
+ LhsMapper lhs(_lhs,lhsStride);
+ LhsTransposeMapper lhs_transpose(_lhs,lhsStride);
+ RhsMapper rhs(_rhs,rhsStride);
+ ResMapper res(_res, resStride);
+
+ Index kc = size; // cache block size along the K direction
+ Index mc = rows; // cache block size along the M direction
+ Index nc = cols; // cache block size along the N direction
+ computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc, Index(1));
+ // kc must smaller than mc
+ kc = (std::min)(kc,mc);
+
+ std::size_t sizeB = kc*cols;
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
+ ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
+ Scalar* blockB = allocatedBlockB;
+
+ gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
+ symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
+ gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
+ gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
+
+ for(Index k2=0; k2<size; k2+=kc)
+ {
+ const Index actual_kc = (std::min)(k2+kc,size)-k2;
+
+ // we have selected one row panel of rhs and one column panel of lhs
+ // pack rhs's panel into a sequential chunk of memory
+ // and expand each coeff to a constant packet for further reuse
+ pack_rhs(blockB, rhs.getSubMapper(k2,0), actual_kc, cols);
+
+ // the select lhs's panel has to be split in three different parts:
+ // 1 - the transposed panel above the diagonal block => transposed packed copy
+ // 2 - the diagonal block => special packed copy
+ // 3 - the panel below the diagonal block => generic packed copy
+ for(Index i2=0; i2<k2; i2+=mc)
+ {
+ const Index actual_mc = (std::min)(i2+mc,k2)-i2;
+ // transposed packed copy
+ pack_lhs_transposed(blockA, lhs_transpose.getSubMapper(i2, k2), actual_kc, actual_mc);
+
+ gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
+ }
+ // the block diagonal
+ {
+ const Index actual_mc = (std::min)(k2+kc,size)-k2;
+ // symmetric packed copy
+ pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
+
+ gebp_kernel(res.getSubMapper(k2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
+ }
+
+ for(Index i2=k2+kc; i2<size; i2+=mc)
+ {
+ const Index actual_mc = (std::min)(i2+mc,size)-i2;
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
+ (blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
+
+ gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
+ }
+ }
+ }
+
+// matrix * selfadjoint product
+template <typename Scalar, typename Index,
+ int LhsStorageOrder, bool ConjugateLhs,
+ int RhsStorageOrder, bool ConjugateRhs>
+struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>
+{
+
+ static EIGEN_DONT_INLINE void run(
+ Index rows, Index cols,
+ const Scalar* _lhs, Index lhsStride,
+ const Scalar* _rhs, Index rhsStride,
+ Scalar* res, Index resStride,
+ const Scalar& alpha);
+};
+
+template <typename Scalar, typename Index,
+ int LhsStorageOrder, bool ConjugateLhs,
+ int RhsStorageOrder, bool ConjugateRhs>
+EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>::run(
+ Index rows, Index cols,
+ const Scalar* _lhs, Index lhsStride,
+ const Scalar* _rhs, Index rhsStride,
+ Scalar* _res, Index resStride,
+ const Scalar& alpha)
+ {
+ Index size = cols;
+
+ typedef gebp_traits<Scalar,Scalar> Traits;
+
+ typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
+ typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
+ LhsMapper lhs(_lhs,lhsStride);
+ ResMapper res(_res,resStride);
+
+ Index kc = size; // cache block size along the K direction
+ Index mc = rows; // cache block size along the M direction
+ Index nc = cols; // cache block size along the N direction
+ computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc, Index(1));
+ std::size_t sizeB = kc*cols;
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
+ ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
+ Scalar* blockB = allocatedBlockB;
+
+ gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
+ symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
+
+ for(Index k2=0; k2<size; k2+=kc)
+ {
+ const Index actual_kc = (std::min)(k2+kc,size)-k2;
+
+ pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
+
+ // => GEPP
+ for(Index i2=0; i2<rows; i2+=mc)
+ {
+ const Index actual_mc = (std::min)(i2+mc,rows)-i2;
+ pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
+
+ gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
+ }
+ }
+ }
+
+} // end namespace internal
+
+/***************************************************************************
+* Wrapper to product_selfadjoint_matrix
+***************************************************************************/
+
+namespace internal {
+template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
+struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> >
+ : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> >
+{};
+}
+
+template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
+struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
+ : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs >
+{
+ EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
+
+ SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
+
+ enum {
+ LhsIsUpper = (LhsMode&(Upper|Lower))==Upper,
+ LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint,
+ RhsIsUpper = (RhsMode&(Upper|Lower))==Upper,
+ RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
+ };
+
+ template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
+ {
+ eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
+
+ typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
+ typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
+
+ Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
+ * RhsBlasTraits::extractScalarFactor(m_rhs);
+
+ internal::product_selfadjoint_matrix<Scalar, Index,
+ EIGEN_LOGICAL_XOR(LhsIsUpper,
+ internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
+ NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
+ EIGEN_LOGICAL_XOR(RhsIsUpper,
+ internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
+ NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
+ internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor>
+ ::run(
+ lhs.rows(), rhs.cols(), // sizes
+ &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
+ &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
+ &dst.coeffRef(0,0), dst.outerStride(), // result info
+ actualAlpha // alpha
+ );
+ }
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H
diff --git a/third_party/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h b/third_party/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
new file mode 100644
index 0000000000..dfa687fefe
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
@@ -0,0 +1,295 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+ ********************************************************************************
+ * Content : Eigen bindings to Intel(R) MKL
+ * Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
+#define EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
+
+namespace Eigen {
+
+namespace internal {
+
+
+/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
+
+#define EIGEN_MKL_SYMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+template <typename Index, \
+ int LhsStorageOrder, bool ConjugateLhs, \
+ int RhsStorageOrder, bool ConjugateRhs> \
+struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
+{\
+\
+ static void run( \
+ Index rows, Index cols, \
+ const EIGTYPE* _lhs, Index lhsStride, \
+ const EIGTYPE* _rhs, Index rhsStride, \
+ EIGTYPE* res, Index resStride, \
+ EIGTYPE alpha) \
+ { \
+ char side='L', uplo='L'; \
+ MKL_INT m, n, lda, ldb, ldc; \
+ const EIGTYPE *a, *b; \
+ MKLTYPE alpha_, beta_; \
+ MatrixX##EIGPREFIX b_tmp; \
+ EIGTYPE myone(1);\
+\
+/* Set transpose options */ \
+/* Set m, n, k */ \
+ m = (MKL_INT)rows; \
+ n = (MKL_INT)cols; \
+\
+/* Set alpha_ & beta_ */ \
+ assign_scalar_eig2mkl(alpha_, alpha); \
+ assign_scalar_eig2mkl(beta_, myone); \
+\
+/* Set lda, ldb, ldc */ \
+ lda = (MKL_INT)lhsStride; \
+ ldb = (MKL_INT)rhsStride; \
+ ldc = (MKL_INT)resStride; \
+\
+/* Set a, b, c */ \
+ if (LhsStorageOrder==RowMajor) uplo='U'; \
+ a = _lhs; \
+\
+ if (RhsStorageOrder==RowMajor) { \
+ Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
+ b_tmp = rhs.adjoint(); \
+ b = b_tmp.data(); \
+ ldb = b_tmp.outerStride(); \
+ } else b = _rhs; \
+\
+ MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+\
+ } \
+};
+
+
+#define EIGEN_MKL_HEMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+template <typename Index, \
+ int LhsStorageOrder, bool ConjugateLhs, \
+ int RhsStorageOrder, bool ConjugateRhs> \
+struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
+{\
+ static void run( \
+ Index rows, Index cols, \
+ const EIGTYPE* _lhs, Index lhsStride, \
+ const EIGTYPE* _rhs, Index rhsStride, \
+ EIGTYPE* res, Index resStride, \
+ EIGTYPE alpha) \
+ { \
+ char side='L', uplo='L'; \
+ MKL_INT m, n, lda, ldb, ldc; \
+ const EIGTYPE *a, *b; \
+ MKLTYPE alpha_, beta_; \
+ MatrixX##EIGPREFIX b_tmp; \
+ Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> a_tmp; \
+ EIGTYPE myone(1); \
+\
+/* Set transpose options */ \
+/* Set m, n, k */ \
+ m = (MKL_INT)rows; \
+ n = (MKL_INT)cols; \
+\
+/* Set alpha_ & beta_ */ \
+ assign_scalar_eig2mkl(alpha_, alpha); \
+ assign_scalar_eig2mkl(beta_, myone); \
+\
+/* Set lda, ldb, ldc */ \
+ lda = (MKL_INT)lhsStride; \
+ ldb = (MKL_INT)rhsStride; \
+ ldc = (MKL_INT)resStride; \
+\
+/* Set a, b, c */ \
+ if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \
+ Map<const Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder>, 0, OuterStride<> > lhs(_lhs,m,m,OuterStride<>(lhsStride)); \
+ a_tmp = lhs.conjugate(); \
+ a = a_tmp.data(); \
+ lda = a_tmp.outerStride(); \
+ } else a = _lhs; \
+ if (LhsStorageOrder==RowMajor) uplo='U'; \
+\
+ if (RhsStorageOrder==ColMajor && (!ConjugateRhs)) { \
+ b = _rhs; } \
+ else { \
+ if (RhsStorageOrder==ColMajor && ConjugateRhs) { \
+ Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,m,n,OuterStride<>(rhsStride)); \
+ b_tmp = rhs.conjugate(); \
+ } else \
+ if (ConjugateRhs) { \
+ Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
+ b_tmp = rhs.adjoint(); \
+ } else { \
+ Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
+ b_tmp = rhs.transpose(); \
+ } \
+ b = b_tmp.data(); \
+ ldb = b_tmp.outerStride(); \
+ } \
+\
+ MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+\
+ } \
+};
+
+EIGEN_MKL_SYMM_L(double, double, d, d)
+EIGEN_MKL_SYMM_L(float, float, f, s)
+EIGEN_MKL_HEMM_L(dcomplex, MKL_Complex16, cd, z)
+EIGEN_MKL_HEMM_L(scomplex, MKL_Complex8, cf, c)
+
+
+/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
+
+#define EIGEN_MKL_SYMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+template <typename Index, \
+ int LhsStorageOrder, bool ConjugateLhs, \
+ int RhsStorageOrder, bool ConjugateRhs> \
+struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
+{\
+\
+ static void run( \
+ Index rows, Index cols, \
+ const EIGTYPE* _lhs, Index lhsStride, \
+ const EIGTYPE* _rhs, Index rhsStride, \
+ EIGTYPE* res, Index resStride, \
+ EIGTYPE alpha) \
+ { \
+ char side='R', uplo='L'; \
+ MKL_INT m, n, lda, ldb, ldc; \
+ const EIGTYPE *a, *b; \
+ MKLTYPE alpha_, beta_; \
+ MatrixX##EIGPREFIX b_tmp; \
+ EIGTYPE myone(1);\
+\
+/* Set m, n, k */ \
+ m = (MKL_INT)rows; \
+ n = (MKL_INT)cols; \
+\
+/* Set alpha_ & beta_ */ \
+ assign_scalar_eig2mkl(alpha_, alpha); \
+ assign_scalar_eig2mkl(beta_, myone); \
+\
+/* Set lda, ldb, ldc */ \
+ lda = (MKL_INT)rhsStride; \
+ ldb = (MKL_INT)lhsStride; \
+ ldc = (MKL_INT)resStride; \
+\
+/* Set a, b, c */ \
+ if (RhsStorageOrder==RowMajor) uplo='U'; \
+ a = _rhs; \
+\
+ if (LhsStorageOrder==RowMajor) { \
+ Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \
+ b_tmp = lhs.adjoint(); \
+ b = b_tmp.data(); \
+ ldb = b_tmp.outerStride(); \
+ } else b = _lhs; \
+\
+ MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+\
+ } \
+};
+
+
+#define EIGEN_MKL_HEMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+template <typename Index, \
+ int LhsStorageOrder, bool ConjugateLhs, \
+ int RhsStorageOrder, bool ConjugateRhs> \
+struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
+{\
+ static void run( \
+ Index rows, Index cols, \
+ const EIGTYPE* _lhs, Index lhsStride, \
+ const EIGTYPE* _rhs, Index rhsStride, \
+ EIGTYPE* res, Index resStride, \
+ EIGTYPE alpha) \
+ { \
+ char side='R', uplo='L'; \
+ MKL_INT m, n, lda, ldb, ldc; \
+ const EIGTYPE *a, *b; \
+ MKLTYPE alpha_, beta_; \
+ MatrixX##EIGPREFIX b_tmp; \
+ Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> a_tmp; \
+ EIGTYPE myone(1); \
+\
+/* Set m, n, k */ \
+ m = (MKL_INT)rows; \
+ n = (MKL_INT)cols; \
+\
+/* Set alpha_ & beta_ */ \
+ assign_scalar_eig2mkl(alpha_, alpha); \
+ assign_scalar_eig2mkl(beta_, myone); \
+\
+/* Set lda, ldb, ldc */ \
+ lda = (MKL_INT)rhsStride; \
+ ldb = (MKL_INT)lhsStride; \
+ ldc = (MKL_INT)resStride; \
+\
+/* Set a, b, c */ \
+ if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \
+ Map<const Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder>, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \
+ a_tmp = rhs.conjugate(); \
+ a = a_tmp.data(); \
+ lda = a_tmp.outerStride(); \
+ } else a = _rhs; \
+ if (RhsStorageOrder==RowMajor) uplo='U'; \
+\
+ if (LhsStorageOrder==ColMajor && (!ConjugateLhs)) { \
+ b = _lhs; } \
+ else { \
+ if (LhsStorageOrder==ColMajor && ConjugateLhs) { \
+ Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,n,OuterStride<>(lhsStride)); \
+ b_tmp = lhs.conjugate(); \
+ } else \
+ if (ConjugateLhs) { \
+ Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
+ b_tmp = lhs.adjoint(); \
+ } else { \
+ Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
+ b_tmp = lhs.transpose(); \
+ } \
+ b = b_tmp.data(); \
+ ldb = b_tmp.outerStride(); \
+ } \
+\
+ MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+ } \
+};
+
+EIGEN_MKL_SYMM_R(double, double, d, d)
+EIGEN_MKL_SYMM_R(float, float, f, s)
+EIGEN_MKL_HEMM_R(dcomplex, MKL_Complex16, cd, z)
+EIGEN_MKL_HEMM_R(scomplex, MKL_Complex8, cf, c)
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
diff --git a/third_party/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h b/third_party/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h
new file mode 100644
index 0000000000..fdc81205ab
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -0,0 +1,281 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H
+#define EIGEN_SELFADJOINT_MATRIX_VECTOR_H
+
+namespace Eigen {
+
+namespace internal {
+
+/* Optimized selfadjoint matrix * vector product:
+ * This algorithm processes 2 columns at onces that allows to both reduce
+ * the number of load/stores of the result by a factor 2 and to reduce
+ * the instruction dependency.
+ */
+
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version=Specialized>
+struct selfadjoint_matrix_vector_product;
+
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
+struct selfadjoint_matrix_vector_product
+
+{
+static EIGEN_DONT_INLINE void run(
+ Index size,
+ const Scalar* lhs, Index lhsStride,
+ const Scalar* _rhs, Index rhsIncr,
+ Scalar* res,
+ Scalar alpha);
+};
+
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Version>::run(
+ Index size,
+ const Scalar* lhs, Index lhsStride,
+ const Scalar* _rhs, Index rhsIncr,
+ Scalar* res,
+ Scalar alpha)
+{
+ typedef typename packet_traits<Scalar>::type Packet;
+ const Index PacketSize = sizeof(Packet)/sizeof(Scalar);
+
+ enum {
+ IsRowMajor = StorageOrder==RowMajor ? 1 : 0,
+ IsLower = UpLo == Lower ? 1 : 0,
+ FirstTriangular = IsRowMajor == IsLower
+ };
+
+ conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0;
+ conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1;
+ conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex, ConjugateRhs> cjd;
+
+ conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> pcj0;
+ conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1;
+
+ Scalar cjAlpha = ConjugateRhs ? numext::conj(alpha) : alpha;
+
+ // FIXME this copy is now handled outside product_selfadjoint_vector, so it could probably be removed.
+ // if the rhs is not sequentially stored in memory we copy it to a temporary buffer,
+ // this is because we need to extract packets
+ ei_declare_aligned_stack_constructed_variable(Scalar,rhs,size,rhsIncr==1 ? const_cast<Scalar*>(_rhs) : 0);
+ if (rhsIncr!=1)
+ {
+ const Scalar* it = _rhs;
+ for (Index i=0; i<size; ++i, it+=rhsIncr)
+ rhs[i] = *it;
+ }
+
+ Index bound = (std::max)(Index(0),size-8) & 0xfffffffe;
+ if (FirstTriangular)
+ bound = size - bound;
+
+ for (Index j=FirstTriangular ? bound : 0;
+ j<(FirstTriangular ? size : bound);j+=2)
+ {
+ const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
+ const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
+
+ Scalar t0 = cjAlpha * rhs[j];
+ Packet ptmp0 = pset1<Packet>(t0);
+ Scalar t1 = cjAlpha * rhs[j+1];
+ Packet ptmp1 = pset1<Packet>(t1);
+
+ Scalar t2(0);
+ Packet ptmp2 = pset1<Packet>(t2);
+ Scalar t3(0);
+ Packet ptmp3 = pset1<Packet>(t3);
+
+ size_t starti = FirstTriangular ? 0 : j+2;
+ size_t endi = FirstTriangular ? j : size;
+ size_t alignedStart = (starti) + internal::first_aligned(&res[starti], endi-starti);
+ size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
+
+ // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
+ res[j] += cjd.pmul(numext::real(A0[j]), t0);
+ res[j+1] += cjd.pmul(numext::real(A1[j+1]), t1);
+ if(FirstTriangular)
+ {
+ res[j] += cj0.pmul(A1[j], t1);
+ t3 += cj1.pmul(A1[j], rhs[j]);
+ }
+ else
+ {
+ res[j+1] += cj0.pmul(A0[j+1],t0);
+ t2 += cj1.pmul(A0[j+1], rhs[j+1]);
+ }
+
+ for (size_t i=starti; i<alignedStart; ++i)
+ {
+ res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i],t1);
+ t2 += cj1.pmul(A0[i], rhs[i]);
+ t3 += cj1.pmul(A1[i], rhs[i]);
+ }
+ // Yes this an optimization for gcc 4.3 and 4.4 (=> huge speed up)
+ // gcc 4.2 does this optimization automatically.
+ const Scalar* EIGEN_RESTRICT a0It = A0 + alignedStart;
+ const Scalar* EIGEN_RESTRICT a1It = A1 + alignedStart;
+ const Scalar* EIGEN_RESTRICT rhsIt = rhs + alignedStart;
+ Scalar* EIGEN_RESTRICT resIt = res + alignedStart;
+ for (size_t i=alignedStart; i<alignedEnd; i+=PacketSize)
+ {
+ Packet A0i = ploadu<Packet>(a0It); a0It += PacketSize;
+ Packet A1i = ploadu<Packet>(a1It); a1It += PacketSize;
+ Packet Bi = ploadu<Packet>(rhsIt); rhsIt += PacketSize; // FIXME should be aligned in most cases
+ Packet Xi = pload <Packet>(resIt);
+
+ Xi = pcj0.pmadd(A0i,ptmp0, pcj0.pmadd(A1i,ptmp1,Xi));
+ ptmp2 = pcj1.pmadd(A0i, Bi, ptmp2);
+ ptmp3 = pcj1.pmadd(A1i, Bi, ptmp3);
+ pstore(resIt,Xi); resIt += PacketSize;
+ }
+ for (size_t i=alignedEnd; i<endi; i++)
+ {
+ res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i],t1);
+ t2 += cj1.pmul(A0[i], rhs[i]);
+ t3 += cj1.pmul(A1[i], rhs[i]);
+ }
+
+ res[j] += alpha * (t2 + predux(ptmp2));
+ res[j+1] += alpha * (t3 + predux(ptmp3));
+ }
+ for (Index j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++)
+ {
+ const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
+
+ Scalar t1 = cjAlpha * rhs[j];
+ Scalar t2(0);
+ // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
+ res[j] += cjd.pmul(numext::real(A0[j]), t1);
+ for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++)
+ {
+ res[i] += cj0.pmul(A0[i], t1);
+ t2 += cj1.pmul(A0[i], rhs[i]);
+ }
+ res[j] += alpha * t2;
+ }
+}
+
+} // end namespace internal
+
+/***************************************************************************
+* Wrapper to product_selfadjoint_vector
+***************************************************************************/
+
+namespace internal {
+template<typename Lhs, int LhsMode, typename Rhs>
+struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> >
+ : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs> >
+{};
+}
+
+template<typename Lhs, int LhsMode, typename Rhs>
+struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
+ : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs >
+{
+ EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
+
+ enum {
+ LhsUpLo = LhsMode&(Upper|Lower)
+ };
+
+ SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
+
+ template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
+ {
+ typedef typename Dest::Scalar ResScalar;
+ typedef typename Base::RhsScalar RhsScalar;
+ typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
+
+ eigen_assert(dest.rows()==m_lhs.rows() && dest.cols()==m_rhs.cols());
+
+ typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
+ typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
+
+ Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
+ * RhsBlasTraits::extractScalarFactor(m_rhs);
+
+ enum {
+ EvalToDest = (Dest::InnerStrideAtCompileTime==1),
+ UseRhs = (_ActualRhsType::InnerStrideAtCompileTime==1)
+ };
+
+ internal::gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,!EvalToDest> static_dest;
+ internal::gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!UseRhs> static_rhs;
+
+ ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
+ EvalToDest ? dest.data() : static_dest.data());
+
+ ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,rhs.size(),
+ UseRhs ? const_cast<RhsScalar*>(rhs.data()) : static_rhs.data());
+
+ if(!EvalToDest)
+ {
+ #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+ int size = dest.size();
+ EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+ #endif
+ MappedDest(actualDestPtr, dest.size()) = dest;
+ }
+
+ if(!UseRhs)
+ {
+ #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+ int size = rhs.size();
+ EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+ #endif
+ Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs;
+ }
+
+
+ internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run
+ (
+ lhs.rows(), // size
+ &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
+ actualRhsPtr, 1, // rhs info
+ actualDestPtr, // result info
+ actualAlpha // scale factor
+ );
+
+ if(!EvalToDest)
+ dest = MappedDest(actualDestPtr, dest.size());
+ }
+};
+
+namespace internal {
+template<typename Lhs, typename Rhs, int RhsMode>
+struct traits<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> >
+ : traits<ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs> >
+{};
+}
+
+template<typename Lhs, typename Rhs, int RhsMode>
+struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>
+ : public ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs >
+{
+ EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
+
+ enum {
+ RhsUpLo = RhsMode&(Upper|Lower)
+ };
+
+ SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
+
+ template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
+ {
+ // let's simply transpose the product
+ Transpose<Dest> destT(dest);
+ SelfadjointProductMatrix<Transpose<const Rhs>, int(RhsUpLo)==Upper ? Lower : Upper, false,
+ Transpose<const Lhs>, 0, true>(m_rhs.transpose(), m_lhs.transpose()).scaleAndAddTo(destT, alpha);
+ }
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H
diff --git a/third_party/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h b/third_party/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
new file mode 100644
index 0000000000..86684b66d9
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
@@ -0,0 +1,114 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ * Content : Eigen bindings to Intel(R) MKL
+ * Selfadjoint matrix-vector product functionality based on ?SYMV/HEMV.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
+#define EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
+
+namespace Eigen {
+
+namespace internal {
+
+/**********************************************************************
+* This file implements selfadjoint matrix-vector multiplication using BLAS
+**********************************************************************/
+
+// symv/hemv specialization
+
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs>
+struct selfadjoint_matrix_vector_product_symv :
+ selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn> {};
+
+#define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \
+template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
+struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
+static void run( \
+ Index size, const Scalar* lhs, Index lhsStride, \
+ const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \
+ enum {\
+ IsColMajor = StorageOrder==ColMajor \
+ }; \
+ if (IsColMajor == ConjugateLhs) {\
+ selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn>::run( \
+ size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \
+ } else {\
+ selfadjoint_matrix_vector_product_symv<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs>::run( \
+ size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \
+ }\
+ } \
+}; \
+
+EIGEN_MKL_SYMV_SPECIALIZE(double)
+EIGEN_MKL_SYMV_SPECIALIZE(float)
+EIGEN_MKL_SYMV_SPECIALIZE(dcomplex)
+EIGEN_MKL_SYMV_SPECIALIZE(scomplex)
+
+#define EIGEN_MKL_SYMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLFUNC) \
+template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
+struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs> \
+{ \
+typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\
+\
+static void run( \
+Index size, const EIGTYPE* lhs, Index lhsStride, \
+const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \
+{ \
+ enum {\
+ IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \
+ IsLower = UpLo == Lower ? 1 : 0 \
+ }; \
+ MKL_INT n=size, lda=lhsStride, incx=rhsIncr, incy=1; \
+ MKLTYPE alpha_, beta_; \
+ const EIGTYPE *x_ptr, myone(1); \
+ char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \
+ assign_scalar_eig2mkl(alpha_, alpha); \
+ assign_scalar_eig2mkl(beta_, myone); \
+ SYMVVector x_tmp; \
+ if (ConjugateRhs) { \
+ Map<const SYMVVector, 0, InnerStride<> > map_x(_rhs,size,1,InnerStride<>(incx)); \
+ x_tmp=map_x.conjugate(); \
+ x_ptr=x_tmp.data(); \
+ incx=1; \
+ } else x_ptr=_rhs; \
+ MKLFUNC(&uplo, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \
+}\
+};
+
+EIGEN_MKL_SYMV_SPECIALIZATION(double, double, dsymv)
+EIGEN_MKL_SYMV_SPECIALIZATION(float, float, ssymv)
+EIGEN_MKL_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv)
+EIGEN_MKL_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv)
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
diff --git a/third_party/eigen3/Eigen/src/Core/products/SelfadjointProduct.h b/third_party/eigen3/Eigen/src/Core/products/SelfadjointProduct.h
new file mode 100644
index 0000000000..6ca4ae6c0f
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/products/SelfadjointProduct.h
@@ -0,0 +1,123 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SELFADJOINT_PRODUCT_H
+#define EIGEN_SELFADJOINT_PRODUCT_H
+
+/**********************************************************************
+* This file implements a self adjoint product: C += A A^T updating only
+* half of the selfadjoint matrix C.
+* It corresponds to the level 3 SYRK and level 2 SYR Blas routines.
+**********************************************************************/
+
+namespace Eigen {
+
+
+template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>
+struct selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo,ConjLhs,ConjRhs>
+{
+ static void run(Index size, Scalar* mat, Index stride, const Scalar* vecX, const Scalar* vecY, const Scalar& alpha)
+ {
+ internal::conj_if<ConjRhs> cj;
+ typedef Map<const Matrix<Scalar,Dynamic,1> > OtherMap;
+ typedef typename internal::conditional<ConjLhs,typename OtherMap::ConjugateReturnType,const OtherMap&>::type ConjLhsType;
+ for (Index i=0; i<size; ++i)
+ {
+ Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+(UpLo==Lower ? i : 0), (UpLo==Lower ? size-i : (i+1)))
+ += (alpha * cj(vecY[i])) * ConjLhsType(OtherMap(vecX+(UpLo==Lower ? i : 0),UpLo==Lower ? size-i : (i+1)));
+ }
+ }
+};
+
+template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>
+struct selfadjoint_rank1_update<Scalar,Index,RowMajor,UpLo,ConjLhs,ConjRhs>
+{
+ static void run(Index size, Scalar* mat, Index stride, const Scalar* vecX, const Scalar* vecY, const Scalar& alpha)
+ {
+ selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo==Lower?Upper:Lower,ConjRhs,ConjLhs>::run(size,mat,stride,vecY,vecX,alpha);
+ }
+};
+
+template<typename MatrixType, typename OtherType, int UpLo, bool OtherIsVector = OtherType::IsVectorAtCompileTime>
+struct selfadjoint_product_selector;
+
+template<typename MatrixType, typename OtherType, int UpLo>
+struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
+{
+ static void run(MatrixType& mat, const OtherType& other, const typename MatrixType::Scalar& alpha)
+ {
+ typedef typename MatrixType::Scalar Scalar;
+ typedef typename MatrixType::Index Index;
+ typedef internal::blas_traits<OtherType> OtherBlasTraits;
+ typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;
+ typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;
+ typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived());
+
+ Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
+
+ enum {
+ StorageOrder = (internal::traits<MatrixType>::Flags&RowMajorBit) ? RowMajor : ColMajor,
+ UseOtherDirectly = _ActualOtherType::InnerStrideAtCompileTime==1
+ };
+ internal::gemv_static_vector_if<Scalar,OtherType::SizeAtCompileTime,OtherType::MaxSizeAtCompileTime,!UseOtherDirectly> static_other;
+
+ ei_declare_aligned_stack_constructed_variable(Scalar, actualOtherPtr, other.size(),
+ (UseOtherDirectly ? const_cast<Scalar*>(actualOther.data()) : static_other.data()));
+
+ if(!UseOtherDirectly)
+ Map<typename _ActualOtherType::PlainObject>(actualOtherPtr, actualOther.size()) = actualOther;
+
+ selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
+ OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
+ (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex>
+ ::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualOtherPtr, actualAlpha);
+ }
+};
+
+template<typename MatrixType, typename OtherType, int UpLo>
+struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
+{
+ static void run(MatrixType& mat, const OtherType& other, const typename MatrixType::Scalar& alpha)
+ {
+ typedef typename MatrixType::Scalar Scalar;
+ typedef typename MatrixType::Index Index;
+ typedef internal::blas_traits<OtherType> OtherBlasTraits;
+ typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;
+ typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;
+ typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived());
+
+ Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
+
+ enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 };
+
+ internal::general_matrix_matrix_triangular_product<Index,
+ Scalar, _ActualOtherType::Flags&RowMajorBit ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
+ Scalar, _ActualOtherType::Flags&RowMajorBit ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,
+ MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
+ ::run(mat.cols(), actualOther.cols(),
+ &actualOther.coeffRef(0,0), actualOther.outerStride(), &actualOther.coeffRef(0,0), actualOther.outerStride(),
+ mat.data(), mat.outerStride(), actualAlpha);
+ }
+};
+
+// high level API
+
+template<typename MatrixType, unsigned int UpLo>
+template<typename DerivedU>
+SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
+::rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha)
+{
+ selfadjoint_product_selector<MatrixType,DerivedU,UpLo>::run(_expression().const_cast_derived(), u.derived(), alpha);
+
+ return *this;
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_SELFADJOINT_PRODUCT_H
diff --git a/third_party/eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h b/third_party/eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h
new file mode 100644
index 0000000000..8594a97cea
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h
@@ -0,0 +1,93 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SELFADJOINTRANK2UPTADE_H
+#define EIGEN_SELFADJOINTRANK2UPTADE_H
+
+namespace Eigen {
+
+namespace internal {
+
+/* Optimized selfadjoint matrix += alpha * uv' + conj(alpha)*vu'
+ * It corresponds to the Level2 syr2 BLAS routine
+ */
+
+template<typename Scalar, typename Index, typename UType, typename VType, int UpLo>
+struct selfadjoint_rank2_update_selector;
+
+template<typename Scalar, typename Index, typename UType, typename VType>
+struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
+{
+ static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
+ {
+ const Index size = u.size();
+ for (Index i=0; i<size; ++i)
+ {
+ Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+i, size-i) +=
+ (numext::conj(alpha) * numext::conj(u.coeff(i))) * v.tail(size-i)
+ + (alpha * numext::conj(v.coeff(i))) * u.tail(size-i);
+ }
+ }
+};
+
+template<typename Scalar, typename Index, typename UType, typename VType>
+struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Upper>
+{
+ static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
+ {
+ const Index size = u.size();
+ for (Index i=0; i<size; ++i)
+ Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i, i+1) +=
+ (numext::conj(alpha) * numext::conj(u.coeff(i))) * v.head(i+1)
+ + (alpha * numext::conj(v.coeff(i))) * u.head(i+1);
+ }
+};
+
+template<bool Cond, typename T> struct conj_expr_if
+ : conditional<!Cond, const T&,
+ CwiseUnaryOp<scalar_conjugate_op<typename traits<T>::Scalar>,T> > {};
+
+} // end namespace internal
+
+template<typename MatrixType, unsigned int UpLo>
+template<typename DerivedU, typename DerivedV>
+SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
+::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha)
+{
+ typedef internal::blas_traits<DerivedU> UBlasTraits;
+ typedef typename UBlasTraits::DirectLinearAccessType ActualUType;
+ typedef typename internal::remove_all<ActualUType>::type _ActualUType;
+ typename internal::add_const_on_value_type<ActualUType>::type actualU = UBlasTraits::extract(u.derived());
+
+ typedef internal::blas_traits<DerivedV> VBlasTraits;
+ typedef typename VBlasTraits::DirectLinearAccessType ActualVType;
+ typedef typename internal::remove_all<ActualVType>::type _ActualVType;
+ typename internal::add_const_on_value_type<ActualVType>::type actualV = VBlasTraits::extract(v.derived());
+
+ // If MatrixType is row major, then we use the routine for lower triangular in the upper triangular case and
+ // vice versa, and take the complex conjugate of all coefficients and vector entries.
+
+ enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 };
+ Scalar actualAlpha = alpha * UBlasTraits::extractScalarFactor(u.derived())
+ * numext::conj(VBlasTraits::extractScalarFactor(v.derived()));
+ if (IsRowMajor)
+ actualAlpha = numext::conj(actualAlpha);
+
+ internal::selfadjoint_rank2_update_selector<Scalar, Index,
+ typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ UBlasTraits::NeedToConjugate,_ActualUType>::type>::type,
+ typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ VBlasTraits::NeedToConjugate,_ActualVType>::type>::type,
+ (IsRowMajor ? int(UpLo==Upper ? Lower : Upper) : UpLo)>
+ ::run(_expression().const_cast_derived().data(),_expression().outerStride(),actualU,actualV,actualAlpha);
+
+ return *this;
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_SELFADJOINTRANK2UPTADE_H
diff --git a/third_party/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h b/third_party/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h
new file mode 100644
index 0000000000..4cbb79da0c
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h
@@ -0,0 +1,434 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_H
+#define EIGEN_TRIANGULAR_MATRIX_MATRIX_H
+
+namespace Eigen {
+
+namespace internal {
+
+// template<typename Scalar, int mr, int StorageOrder, bool Conjugate, int Mode>
+// struct gemm_pack_lhs_triangular
+// {
+// Matrix<Scalar,mr,mr,
+// void operator()(Scalar* blockA, const EIGEN_RESTRICT Scalar* _lhs, int lhsStride, int depth, int rows)
+// {
+// conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+// const_blas_data_mapper<Scalar, StorageOrder> lhs(_lhs,lhsStride);
+// int count = 0;
+// const int peeled_mc = (rows/mr)*mr;
+// for(int i=0; i<peeled_mc; i+=mr)
+// {
+// for(int k=0; k<depth; k++)
+// for(int w=0; w<mr; w++)
+// blockA[count++] = cj(lhs(i+w, k));
+// }
+// for(int i=peeled_mc; i<rows; i++)
+// {
+// for(int k=0; k<depth; k++)
+// blockA[count++] = cj(lhs(i, k));
+// }
+// }
+// };
+
+/* Optimized triangular matrix * matrix (_TRMM++) product built on top of
+ * the general matrix matrix product.
+ */
+template <typename Scalar, typename Index,
+ int Mode, bool LhsIsTriangular,
+ int LhsStorageOrder, bool ConjugateLhs,
+ int RhsStorageOrder, bool ConjugateRhs,
+ int ResStorageOrder, int Version = Specialized>
+struct product_triangular_matrix_matrix;
+
+template <typename Scalar, typename Index,
+ int Mode, bool LhsIsTriangular,
+ int LhsStorageOrder, bool ConjugateLhs,
+ int RhsStorageOrder, bool ConjugateRhs, int Version>
+struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
+ LhsStorageOrder,ConjugateLhs,
+ RhsStorageOrder,ConjugateRhs,RowMajor,Version>
+{
+ static EIGEN_STRONG_INLINE void run(
+ Index rows, Index cols, Index depth,
+ const Scalar* lhs, Index lhsStride,
+ const Scalar* rhs, Index rhsStride,
+ Scalar* res, Index resStride,
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
+ {
+ product_triangular_matrix_matrix<Scalar, Index,
+ (Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper),
+ (!LhsIsTriangular),
+ RhsStorageOrder==RowMajor ? ColMajor : RowMajor,
+ ConjugateRhs,
+ LhsStorageOrder==RowMajor ? ColMajor : RowMajor,
+ ConjugateLhs,
+ ColMajor>
+ ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
+ }
+};
+
+// implements col-major += alpha * op(triangular) * op(general)
+template <typename Scalar, typename Index, int Mode,
+ int LhsStorageOrder, bool ConjugateLhs,
+ int RhsStorageOrder, bool ConjugateRhs, int Version>
+struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
+ LhsStorageOrder,ConjugateLhs,
+ RhsStorageOrder,ConjugateRhs,ColMajor,Version>
+{
+
+ typedef gebp_traits<Scalar,Scalar> Traits;
+ enum {
+ SmallPanelWidth = 2 * EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
+ IsLower = (Mode&Lower) == Lower,
+ SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
+ };
+
+ static EIGEN_DONT_INLINE void run(
+ Index _rows, Index _cols, Index _depth,
+ const Scalar* _lhs, Index lhsStride,
+ const Scalar* _rhs, Index rhsStride,
+ Scalar* res, Index resStride,
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
+};
+
+template <typename Scalar, typename Index, int Mode,
+ int LhsStorageOrder, bool ConjugateLhs,
+ int RhsStorageOrder, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
+ LhsStorageOrder,ConjugateLhs,
+ RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
+ Index _rows, Index _cols, Index _depth,
+ const Scalar* _lhs, Index lhsStride,
+ const Scalar* _rhs, Index rhsStride,
+ Scalar* _res, Index resStride,
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
+ {
+ // strip zeros
+ Index diagSize = (std::min)(_rows,_depth);
+ Index rows = IsLower ? _rows : diagSize;
+ Index depth = IsLower ? diagSize : _depth;
+ Index cols = _cols;
+
+ typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
+ typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
+ typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
+ LhsMapper lhs(_lhs,lhsStride);
+ RhsMapper rhs(_rhs,rhsStride);
+ ResMapper res(_res, resStride);
+
+ Index kc = blocking.kc(); // cache block size along the K direction
+ Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
+
+ std::size_t sizeA = kc*mc;
+ std::size_t sizeB = kc*cols;
+
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
+
+ Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer;
+ triangularBuffer.setZero();
+ if((Mode&ZeroDiag)==ZeroDiag)
+ triangularBuffer.diagonal().setZero();
+ else
+ triangularBuffer.diagonal().setOnes();
+
+ gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
+ gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
+
+ for(Index k2=IsLower ? depth : 0;
+ IsLower ? k2>0 : k2<depth;
+ IsLower ? k2-=kc : k2+=kc)
+ {
+ Index actual_kc = (std::min)(IsLower ? k2 : depth-k2, kc);
+ Index actual_k2 = IsLower ? k2-actual_kc : k2;
+
+ // align blocks with the end of the triangular part for trapezoidal lhs
+ if((!IsLower)&&(k2<rows)&&(k2+actual_kc>rows))
+ {
+ actual_kc = rows-k2;
+ k2 = k2+actual_kc-kc;
+ }
+
+ pack_rhs(blockB, rhs.getSubMapper(actual_k2,0), actual_kc, cols);
+
+ // the selected lhs's panel has to be split in three different parts:
+ // 1 - the part which is zero => skip it
+ // 2 - the diagonal block => special kernel
+ // 3 - the dense panel below (lower case) or above (upper case) the diagonal block => GEPP
+
+ // the block diagonal, if any:
+ if(IsLower || actual_k2<rows)
+ {
+ // for each small vertical panels of lhs
+ for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth)
+ {
+ Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth);
+ Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1;
+ Index startBlock = actual_k2+k1;
+ Index blockBOffset = k1;
+
+ // => GEBP with the micro triangular block
+ // The trick is to pack this micro block while filling the opposite triangular part with zeros.
+ // To this end we do an extra triangular copy to a small temporary buffer
+ for (Index k=0;k<actualPanelWidth;++k)
+ {
+ if (SetDiag)
+ triangularBuffer.coeffRef(k,k) = lhs(startBlock+k,startBlock+k);
+ for (Index i=IsLower ? k+1 : 0; IsLower ? i<actualPanelWidth : i<k; ++i)
+ triangularBuffer.coeffRef(i,k) = lhs(startBlock+i,startBlock+k);
+ }
+ pack_lhs(blockA, LhsMapper(triangularBuffer.data(), triangularBuffer.outerStride()), actualPanelWidth, actualPanelWidth);
+
+ gebp_kernel(res.getSubMapper(startBlock, 0), blockA, blockB,
+ actualPanelWidth, actualPanelWidth, cols, alpha,
+ actualPanelWidth, actual_kc, 0, blockBOffset);
+
+ // GEBP with remaining micro panel
+ if (lengthTarget>0)
+ {
+ Index startTarget = IsLower ? actual_k2+k1+actualPanelWidth : actual_k2;
+
+ pack_lhs(blockA, lhs.getSubMapper(startTarget,startBlock), actualPanelWidth, lengthTarget);
+
+ gebp_kernel(res.getSubMapper(startTarget, 0), blockA, blockB,
+ lengthTarget, actualPanelWidth, cols, alpha,
+ actualPanelWidth, actual_kc, 0, blockBOffset);
+ }
+ }
+ }
+ // the part below (lower case) or above (upper case) the diagonal => GEPP
+ {
+ Index start = IsLower ? k2 : 0;
+ Index end = IsLower ? rows : (std::min)(actual_k2,rows);
+ for(Index i2=start; i2<end; i2+=mc)
+ {
+ const Index actual_mc = (std::min)(i2+mc,end)-i2;
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
+ (blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
+
+ gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc,
+ actual_kc, cols, alpha, -1, -1, 0, 0);
+ }
+ }
+ }
+ }
+
+// implements col-major += alpha * op(general) * op(triangular)
+template <typename Scalar, typename Index, int Mode,
+ int LhsStorageOrder, bool ConjugateLhs,
+ int RhsStorageOrder, bool ConjugateRhs, int Version>
+struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
+ LhsStorageOrder,ConjugateLhs,
+ RhsStorageOrder,ConjugateRhs,ColMajor,Version>
+{
+ typedef gebp_traits<Scalar,Scalar> Traits;
+ enum {
+ SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
+ IsLower = (Mode&Lower) == Lower,
+ SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
+ };
+
+ static EIGEN_DONT_INLINE void run(
+ Index _rows, Index _cols, Index _depth,
+ const Scalar* _lhs, Index lhsStride,
+ const Scalar* _rhs, Index rhsStride,
+ Scalar* res, Index resStride,
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
+};
+
+template <typename Scalar, typename Index, int Mode,
+ int LhsStorageOrder, bool ConjugateLhs,
+ int RhsStorageOrder, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
+ LhsStorageOrder,ConjugateLhs,
+ RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
+ Index _rows, Index _cols, Index _depth,
+ const Scalar* _lhs, Index lhsStride,
+ const Scalar* _rhs, Index rhsStride,
+ Scalar* _res, Index resStride,
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
+ {
+ // strip zeros
+ Index diagSize = (std::min)(_cols,_depth);
+ Index rows = _rows;
+ Index depth = IsLower ? _depth : diagSize;
+ Index cols = IsLower ? diagSize : _cols;
+
+ typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
+ typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
+ typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
+ LhsMapper lhs(_lhs,lhsStride);
+ RhsMapper rhs(_rhs,rhsStride);
+ ResMapper res(_res, resStride);
+
+ Index kc = blocking.kc(); // cache block size along the K direction
+ Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
+
+ std::size_t sizeA = kc*mc;
+ std::size_t sizeB = kc*cols+EIGEN_ALIGN_BYTES/sizeof(Scalar);
+
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
+
+ Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer;
+ triangularBuffer.setZero();
+ if((Mode&ZeroDiag)==ZeroDiag)
+ triangularBuffer.diagonal().setZero();
+ else
+ triangularBuffer.diagonal().setOnes();
+
+ gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
+ gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
+ gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;
+
+ for(Index k2=IsLower ? 0 : depth;
+ IsLower ? k2<depth : k2>0;
+ IsLower ? k2+=kc : k2-=kc)
+ {
+ Index actual_kc = (std::min)(IsLower ? depth-k2 : k2, kc);
+ Index actual_k2 = IsLower ? k2 : k2-actual_kc;
+
+ // align blocks with the end of the triangular part for trapezoidal rhs
+ if(IsLower && (k2<cols) && (actual_k2+actual_kc>cols))
+ {
+ actual_kc = cols-k2;
+ k2 = actual_k2 + actual_kc - kc;
+ }
+
+ // remaining size
+ Index rs = IsLower ? (std::min)(cols,actual_k2) : cols - k2;
+ // size of the triangular part
+ Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;
+
+ Scalar* geb = blockB+ts*ts;
+ geb = geb + internal::first_aligned(geb,EIGEN_ALIGN_BYTES/sizeof(Scalar));
+
+ pack_rhs(geb, rhs.getSubMapper(actual_k2,IsLower ? 0 : k2), actual_kc, rs);
+
+ // pack the triangular part of the rhs padding the unrolled blocks with zeros
+ if(ts>0)
+ {
+ for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
+ {
+ Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
+ Index actual_j2 = actual_k2 + j2;
+ Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
+ Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2;
+ // general part
+ pack_rhs_panel(blockB+j2*actual_kc,
+ rhs.getSubMapper(actual_k2+panelOffset, actual_j2),
+ panelLength, actualPanelWidth,
+ actual_kc, panelOffset);
+
+ // append the triangular part via a temporary buffer
+ for (Index j=0;j<actualPanelWidth;++j)
+ {
+ if (SetDiag)
+ triangularBuffer.coeffRef(j,j) = rhs(actual_j2+j,actual_j2+j);
+ for (Index k=IsLower ? j+1 : 0; IsLower ? k<actualPanelWidth : k<j; ++k)
+ triangularBuffer.coeffRef(k,j) = rhs(actual_j2+k,actual_j2+j);
+ }
+
+ pack_rhs_panel(blockB+j2*actual_kc,
+ RhsMapper(triangularBuffer.data(), triangularBuffer.outerStride()),
+ actualPanelWidth, actualPanelWidth,
+ actual_kc, j2);
+ }
+ }
+
+ for (Index i2=0; i2<rows; i2+=mc)
+ {
+ const Index actual_mc = (std::min)(mc,rows-i2);
+ pack_lhs(blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
+
+ // triangular kernel
+ if(ts>0)
+ {
+ for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
+ {
+ Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
+ Index panelLength = IsLower ? actual_kc-j2 : j2+actualPanelWidth;
+ Index blockOffset = IsLower ? j2 : 0;
+
+ gebp_kernel(res.getSubMapper(i2, actual_k2 + j2),
+ blockA, blockB+j2*actual_kc,
+ actual_mc, panelLength, actualPanelWidth,
+ alpha,
+ actual_kc, actual_kc, // strides
+ blockOffset, blockOffset);// offsets
+ }
+ }
+ gebp_kernel(res.getSubMapper(i2, IsLower ? 0 : k2),
+ blockA, geb, actual_mc, actual_kc, rs,
+ alpha,
+ -1, -1, 0, 0);
+ }
+ }
+ }
+
+/***************************************************************************
+* Wrapper to product_triangular_matrix_matrix
+***************************************************************************/
+
+template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
+struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false> >
+ : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs> >
+{};
+
+} // end namespace internal
+
+template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
+struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
+ : public ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs >
+{
+ EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct)
+
+ TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
+
+ template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
+ {
+ typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
+ typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
+
+ Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
+ * RhsBlasTraits::extractScalarFactor(m_rhs);
+
+ typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
+ Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;
+
+ enum { IsLower = (Mode&Lower) == Lower };
+ Index stripedRows = ((!LhsIsTriangular) || (IsLower)) ? lhs.rows() : (std::min)(lhs.rows(),lhs.cols());
+ Index stripedCols = ((LhsIsTriangular) || (!IsLower)) ? rhs.cols() : (std::min)(rhs.cols(),rhs.rows());
+ Index stripedDepth = LhsIsTriangular ? ((!IsLower) ? lhs.cols() : (std::min)(lhs.cols(),lhs.rows()))
+ : ((IsLower) ? rhs.rows() : (std::min)(rhs.rows(),rhs.cols()));
+
+ BlockingType blocking(stripedRows, stripedCols, stripedDepth, 1, false);
+
+ internal::product_triangular_matrix_matrix<Scalar, Index,
+ Mode, LhsIsTriangular,
+ (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
+ (internal::traits<_ActualRhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
+ (internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor>
+ ::run(
+ stripedRows, stripedCols, stripedDepth, // sizes
+ &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
+ &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
+ &dst.coeffRef(0,0), dst.outerStride(), // result info
+ actualAlpha, blocking
+ );
+ }
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H
diff --git a/third_party/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h b/third_party/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
new file mode 100644
index 0000000000..ba41a1c99f
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
@@ -0,0 +1,309 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ * Content : Eigen bindings to Intel(R) MKL
+ * Triangular matrix * matrix product functionality based on ?TRMM.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
+#define EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
+
+namespace Eigen {
+
+namespace internal {
+
+
+template <typename Scalar, typename Index,
+ int Mode, bool LhsIsTriangular,
+ int LhsStorageOrder, bool ConjugateLhs,
+ int RhsStorageOrder, bool ConjugateRhs,
+ int ResStorageOrder>
+struct product_triangular_matrix_matrix_trmm :
+ product_triangular_matrix_matrix<Scalar,Index,Mode,
+ LhsIsTriangular,LhsStorageOrder,ConjugateLhs,
+ RhsStorageOrder, ConjugateRhs, ResStorageOrder, BuiltIn> {};
+
+
+// try to go to BLAS specialization
+#define EIGEN_MKL_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \
+template <typename Index, int Mode, \
+ int LhsStorageOrder, bool ConjugateLhs, \
+ int RhsStorageOrder, bool ConjugateRhs> \
+struct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \
+ LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,Specialized> { \
+ static inline void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\
+ const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar,Scalar>& blocking) { \
+ product_triangular_matrix_matrix_trmm<Scalar,Index,Mode, \
+ LhsIsTriangular,LhsStorageOrder,ConjugateLhs, \
+ RhsStorageOrder, ConjugateRhs, ColMajor>::run( \
+ _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
+ } \
+};
+
+EIGEN_MKL_TRMM_SPECIALIZE(double, true)
+EIGEN_MKL_TRMM_SPECIALIZE(double, false)
+EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, true)
+EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, false)
+EIGEN_MKL_TRMM_SPECIALIZE(float, true)
+EIGEN_MKL_TRMM_SPECIALIZE(float, false)
+EIGEN_MKL_TRMM_SPECIALIZE(scomplex, true)
+EIGEN_MKL_TRMM_SPECIALIZE(scomplex, false)
+
+// implements col-major += alpha * op(triangular) * op(general)
+#define EIGEN_MKL_TRMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+template <typename Index, int Mode, \
+ int LhsStorageOrder, bool ConjugateLhs, \
+ int RhsStorageOrder, bool ConjugateRhs> \
+struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
+ LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \
+{ \
+ enum { \
+ IsLower = (Mode&Lower) == Lower, \
+ SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
+ IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
+ IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
+ LowUp = IsLower ? Lower : Upper, \
+ conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \
+ }; \
+\
+ static void run( \
+ Index _rows, Index _cols, Index _depth, \
+ const EIGTYPE* _lhs, Index lhsStride, \
+ const EIGTYPE* _rhs, Index rhsStride, \
+ EIGTYPE* res, Index resStride, \
+ EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \
+ { \
+ Index diagSize = (std::min)(_rows,_depth); \
+ Index rows = IsLower ? _rows : diagSize; \
+ Index depth = IsLower ? diagSize : _depth; \
+ Index cols = _cols; \
+\
+ typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
+ typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
+\
+/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
+ if (rows != depth) { \
+\
+ int nthr = mkl_domain_get_max_threads(MKL_BLAS); \
+\
+ if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \
+ /* Most likely no benefit to call TRMM or GEMM from MKL*/ \
+ product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \
+ LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
+ _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
+ /*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \
+ } else { \
+ /* Make sense to call GEMM */ \
+ Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \
+ MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \
+ MKL_INT aStride = aa_tmp.outerStride(); \
+ gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth); \
+ general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
+ rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, gemm_blocking, 0); \
+\
+ /*std::cout << "TRMM_L: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \
+ } \
+ return; \
+ } \
+ char side = 'L', transa, uplo, diag = 'N'; \
+ EIGTYPE *b; \
+ const EIGTYPE *a; \
+ MKL_INT m, n, lda, ldb; \
+ MKLTYPE alpha_; \
+\
+/* Set alpha_*/ \
+ assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
+\
+/* Set m, n */ \
+ m = (MKL_INT)diagSize; \
+ n = (MKL_INT)cols; \
+\
+/* Set trans */ \
+ transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
+\
+/* Set b, ldb */ \
+ Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols,OuterStride<>(rhsStride)); \
+ MatrixX##EIGPREFIX b_tmp; \
+\
+ if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \
+ b = b_tmp.data(); \
+ ldb = b_tmp.outerStride(); \
+\
+/* Set uplo */ \
+ uplo = IsLower ? 'L' : 'U'; \
+ if (LhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
+/* Set a, lda */ \
+ Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \
+ MatrixLhs a_tmp; \
+\
+ if ((conjA!=0) || (SetDiag==0)) { \
+ if (conjA) a_tmp = lhs.conjugate(); else a_tmp = lhs; \
+ if (IsZeroDiag) \
+ a_tmp.diagonal().setZero(); \
+ else if (IsUnitDiag) \
+ a_tmp.diagonal().setOnes();\
+ a = a_tmp.data(); \
+ lda = a_tmp.outerStride(); \
+ } else { \
+ a = _lhs; \
+ lda = lhsStride; \
+ } \
+ /*std::cout << "TRMM_L: A is square! Go to MKL TRMM implementation! \n";*/ \
+/* call ?trmm*/ \
+ MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \
+\
+/* Add op(a_triangular)*b into res*/ \
+ Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
+ res_tmp=res_tmp+b_tmp; \
+ } \
+};
+
+EIGEN_MKL_TRMM_L(double, double, d, d)
+EIGEN_MKL_TRMM_L(dcomplex, MKL_Complex16, cd, z)
+EIGEN_MKL_TRMM_L(float, float, f, s)
+EIGEN_MKL_TRMM_L(scomplex, MKL_Complex8, cf, c)
+
+// implements col-major += alpha * op(general) * op(triangular)
+#define EIGEN_MKL_TRMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+template <typename Index, int Mode, \
+ int LhsStorageOrder, bool ConjugateLhs, \
+ int RhsStorageOrder, bool ConjugateRhs> \
+struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
+ LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \
+{ \
+ enum { \
+ IsLower = (Mode&Lower) == Lower, \
+ SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
+ IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
+ IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
+ LowUp = IsLower ? Lower : Upper, \
+ conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \
+ }; \
+\
+ static void run( \
+ Index _rows, Index _cols, Index _depth, \
+ const EIGTYPE* _lhs, Index lhsStride, \
+ const EIGTYPE* _rhs, Index rhsStride, \
+ EIGTYPE* res, Index resStride, \
+ EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \
+ { \
+ Index diagSize = (std::min)(_cols,_depth); \
+ Index rows = _rows; \
+ Index depth = IsLower ? _depth : diagSize; \
+ Index cols = IsLower ? diagSize : _cols; \
+\
+ typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
+ typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
+\
+/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
+ if (cols != depth) { \
+\
+ int nthr = mkl_domain_get_max_threads(MKL_BLAS); \
+\
+ if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \
+ /* Most likely no benefit to call TRMM or GEMM from MKL*/ \
+ product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \
+ LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
+ _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
+ /*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \
+ } else { \
+ /* Make sense to call GEMM */ \
+ Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \
+ MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \
+ MKL_INT aStride = aa_tmp.outerStride(); \
+ gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth); \
+ general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
+ rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, gemm_blocking, 0); \
+\
+ /*std::cout << "TRMM_R: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \
+ } \
+ return; \
+ } \
+ char side = 'R', transa, uplo, diag = 'N'; \
+ EIGTYPE *b; \
+ const EIGTYPE *a; \
+ MKL_INT m, n, lda, ldb; \
+ MKLTYPE alpha_; \
+\
+/* Set alpha_*/ \
+ assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
+\
+/* Set m, n */ \
+ m = (MKL_INT)rows; \
+ n = (MKL_INT)diagSize; \
+\
+/* Set trans */ \
+ transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
+\
+/* Set b, ldb */ \
+ Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \
+ MatrixX##EIGPREFIX b_tmp; \
+\
+ if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \
+ b = b_tmp.data(); \
+ ldb = b_tmp.outerStride(); \
+\
+/* Set uplo */ \
+ uplo = IsLower ? 'L' : 'U'; \
+ if (RhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
+/* Set a, lda */ \
+ Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols, OuterStride<>(rhsStride)); \
+ MatrixRhs a_tmp; \
+\
+ if ((conjA!=0) || (SetDiag==0)) { \
+ if (conjA) a_tmp = rhs.conjugate(); else a_tmp = rhs; \
+ if (IsZeroDiag) \
+ a_tmp.diagonal().setZero(); \
+ else if (IsUnitDiag) \
+ a_tmp.diagonal().setOnes();\
+ a = a_tmp.data(); \
+ lda = a_tmp.outerStride(); \
+ } else { \
+ a = _rhs; \
+ lda = rhsStride; \
+ } \
+ /*std::cout << "TRMM_R: A is square! Go to MKL TRMM implementation! \n";*/ \
+/* call ?trmm*/ \
+ MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \
+\
+/* Add op(a_triangular)*b into res*/ \
+ Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
+ res_tmp=res_tmp+b_tmp; \
+ } \
+};
+
+EIGEN_MKL_TRMM_R(double, double, d, d)
+EIGEN_MKL_TRMM_R(dcomplex, MKL_Complex16, cd, z)
+EIGEN_MKL_TRMM_R(float, float, f, s)
+EIGEN_MKL_TRMM_R(scomplex, MKL_Complex8, cf, c)
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
diff --git a/third_party/eigen3/Eigen/src/Core/products/TriangularMatrixVector.h b/third_party/eigen3/Eigen/src/Core/products/TriangularMatrixVector.h
new file mode 100644
index 0000000000..9863076958
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -0,0 +1,354 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TRIANGULARMATRIXVECTOR_H
+#define EIGEN_TRIANGULARMATRIXVECTOR_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder, int Version=Specialized>
+struct triangular_matrix_vector_product;
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
+struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>
+{
+ typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+ enum {
+ IsLower = ((Mode&Lower)==Lower),
+ HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
+ HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
+ };
+ static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+ const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha);
+};
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
+EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>
+ ::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+ const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha)
+ {
+ static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
+ Index size = (std::min)(_rows,_cols);
+ Index rows = IsLower ? _rows : (std::min)(_rows,_cols);
+ Index cols = IsLower ? (std::min)(_rows,_cols) : _cols;
+
+ typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > LhsMap;
+ const LhsMap lhs(_lhs,rows,cols,OuterStride<>(lhsStride));
+ typename conj_expr_if<ConjLhs,LhsMap>::type cjLhs(lhs);
+
+ typedef Map<const Matrix<RhsScalar,Dynamic,1>, 0, InnerStride<> > RhsMap;
+ const RhsMap rhs(_rhs,cols,InnerStride<>(rhsIncr));
+ typename conj_expr_if<ConjRhs,RhsMap>::type cjRhs(rhs);
+
+ typedef Map<Matrix<ResScalar,Dynamic,1> > ResMap;
+ ResMap res(_res,rows);
+
+ typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
+ typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
+
+ for (Index pi=0; pi<size; pi+=PanelWidth)
+ {
+ Index actualPanelWidth = (std::min)(PanelWidth, size-pi);
+ for (Index k=0; k<actualPanelWidth; ++k)
+ {
+ Index i = pi + k;
+ Index s = IsLower ? ((HasUnitDiag||HasZeroDiag) ? i+1 : i ) : pi;
+ Index r = IsLower ? actualPanelWidth-k : k+1;
+ if ((!(HasUnitDiag||HasZeroDiag)) || (--r)>0)
+ res.segment(s,r) += (alpha * cjRhs.coeff(i)) * cjLhs.col(i).segment(s,r);
+ if (HasUnitDiag)
+ res.coeffRef(i) += alpha * cjRhs.coeff(i);
+ }
+ Index r = IsLower ? rows - pi - actualPanelWidth : pi;
+ if (r>0)
+ {
+ Index s = IsLower ? pi+actualPanelWidth : 0;
+ general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjLhs,RhsScalar,RhsMapper,ConjRhs,BuiltIn>::run(
+ r, actualPanelWidth,
+ LhsMapper(&lhs.coeffRef(s,pi), lhsStride),
+ RhsMapper(&rhs.coeffRef(pi), rhsIncr),
+ &res.coeffRef(s), resIncr, alpha);
+ }
+ }
+ if((!IsLower) && cols>size)
+ {
+ general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjLhs,RhsScalar,RhsMapper,ConjRhs>::run(
+ rows, cols-size,
+ LhsMapper(&lhs.coeffRef(0,size), lhsStride),
+ RhsMapper(&rhs.coeffRef(size), rhsIncr),
+ _res, resIncr, alpha);
+ }
+ }
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
+struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
+{
+ typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+ enum {
+ IsLower = ((Mode&Lower)==Lower),
+ HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
+ HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
+ };
+ static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+ const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha);
+};
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
+EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
+ ::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+ const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha)
+ {
+ static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
+ Index diagSize = (std::min)(_rows,_cols);
+ Index rows = IsLower ? _rows : diagSize;
+ Index cols = IsLower ? diagSize : _cols;
+
+ typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,RowMajor>, 0, OuterStride<> > LhsMap;
+ const LhsMap lhs(_lhs,rows,cols,OuterStride<>(lhsStride));
+ typename conj_expr_if<ConjLhs,LhsMap>::type cjLhs(lhs);
+
+ typedef Map<const Matrix<RhsScalar,Dynamic,1> > RhsMap;
+ const RhsMap rhs(_rhs,cols);
+ typename conj_expr_if<ConjRhs,RhsMap>::type cjRhs(rhs);
+
+ typedef Map<Matrix<ResScalar,Dynamic,1>, 0, InnerStride<> > ResMap;
+ ResMap res(_res,rows,InnerStride<>(resIncr));
+
+ typedef const_blas_data_mapper<LhsScalar,Index,RowMajor> LhsMapper;
+ typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
+
+ for (Index pi=0; pi<diagSize; pi+=PanelWidth)
+ {
+ Index actualPanelWidth = (std::min)(PanelWidth, diagSize-pi);
+ for (Index k=0; k<actualPanelWidth; ++k)
+ {
+ Index i = pi + k;
+ Index s = IsLower ? pi : ((HasUnitDiag||HasZeroDiag) ? i+1 : i);
+ Index r = IsLower ? k+1 : actualPanelWidth-k;
+ if ((!(HasUnitDiag||HasZeroDiag)) || (--r)>0)
+ res.coeffRef(i) += alpha * (cjLhs.row(i).segment(s,r).cwiseProduct(cjRhs.segment(s,r).transpose())).sum();
+ if (HasUnitDiag)
+ res.coeffRef(i) += alpha * cjRhs.coeff(i);
+ }
+ Index r = IsLower ? pi : cols - pi - actualPanelWidth;
+ if (r>0)
+ {
+ Index s = IsLower ? 0 : pi + actualPanelWidth;
+ general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjLhs,RhsScalar,RhsMapper,ConjRhs,BuiltIn>::run(
+ actualPanelWidth, r,
+ LhsMapper(&lhs.coeffRef(pi,s), lhsStride),
+ RhsMapper(&rhs.coeffRef(s), rhsIncr),
+ &res.coeffRef(pi), resIncr, alpha);
+ }
+ }
+ if(IsLower && rows>diagSize)
+ {
+ general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjLhs,RhsScalar,RhsMapper,ConjRhs>::run(
+ rows-diagSize, cols,
+ LhsMapper(&lhs.coeffRef(diagSize,0), lhsStride),
+ RhsMapper(&rhs.coeffRef(0), rhsIncr),
+ &res.coeffRef(diagSize), resIncr, alpha);
+ }
+ }
+
+/***************************************************************************
+* Wrapper to product_triangular_vector
+***************************************************************************/
+
+template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
+struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,true> >
+ : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,true>, Lhs, Rhs> >
+{};
+
+template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
+struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,true,Rhs,false> >
+ : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,true,Rhs,false>, Lhs, Rhs> >
+{};
+
+
+template<int StorageOrder>
+struct trmv_selector;
+
+} // end namespace internal
+
+template<int Mode, typename Lhs, typename Rhs>
+struct TriangularProduct<Mode,true,Lhs,false,Rhs,true>
+ : public ProductBase<TriangularProduct<Mode,true,Lhs,false,Rhs,true>, Lhs, Rhs >
+{
+ EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct)
+
+ TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
+
+ template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
+ {
+ eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
+
+ internal::trmv_selector<(int(internal::traits<Lhs>::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dst, alpha);
+ }
+};
+
+template<int Mode, typename Lhs, typename Rhs>
+struct TriangularProduct<Mode,false,Lhs,true,Rhs,false>
+ : public ProductBase<TriangularProduct<Mode,false,Lhs,true,Rhs,false>, Lhs, Rhs >
+{
+ EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct)
+
+ TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
+
+ template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
+ {
+ eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
+
+ typedef TriangularProduct<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower),true,Transpose<const Rhs>,false,Transpose<const Lhs>,true> TriangularProductTranspose;
+ Transpose<Dest> dstT(dst);
+ internal::trmv_selector<(int(internal::traits<Rhs>::Flags)&RowMajorBit) ? ColMajor : RowMajor>::run(
+ TriangularProductTranspose(m_rhs.transpose(),m_lhs.transpose()), dstT, alpha);
+ }
+};
+
+namespace internal {
+
+// TODO: find a way to factorize this piece of code with gemv_selector since the logic is exactly the same.
+
+template<> struct trmv_selector<ColMajor>
+{
+ template<int Mode, typename Lhs, typename Rhs, typename Dest>
+ static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, const typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar& alpha)
+ {
+ typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType;
+ typedef typename ProductType::Index Index;
+ typedef typename ProductType::LhsScalar LhsScalar;
+ typedef typename ProductType::RhsScalar RhsScalar;
+ typedef typename ProductType::Scalar ResScalar;
+ typedef typename ProductType::RealScalar RealScalar;
+ typedef typename ProductType::ActualLhsType ActualLhsType;
+ typedef typename ProductType::ActualRhsType ActualRhsType;
+ typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
+ typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
+ typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
+
+ typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+ typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+
+ ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
+ * RhsBlasTraits::extractScalarFactor(prod.rhs());
+
+ enum {
+ // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
+ // on, the other hand it is good for the cache to pack the vector anyways...
+ EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
+ ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
+ MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
+ };
+
+ gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
+
+ bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
+ bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
+
+ RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
+
+ ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
+ evalToDest ? dest.data() : static_dest.data());
+
+ if(!evalToDest)
+ {
+ #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+ Index size = dest.size();
+ EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+ #endif
+ if(!alphaIsCompatible)
+ {
+ MappedDest(actualDestPtr, dest.size()).setZero();
+ compatibleAlpha = RhsScalar(1);
+ }
+ else
+ MappedDest(actualDestPtr, dest.size()) = dest;
+ }
+
+ internal::triangular_matrix_vector_product
+ <Index,Mode,
+ LhsScalar, LhsBlasTraits::NeedToConjugate,
+ RhsScalar, RhsBlasTraits::NeedToConjugate,
+ ColMajor>
+ ::run(actualLhs.rows(),actualLhs.cols(),
+ actualLhs.data(),actualLhs.outerStride(),
+ actualRhs.data(),actualRhs.innerStride(),
+ actualDestPtr,1,compatibleAlpha);
+
+ if (!evalToDest)
+ {
+ if(!alphaIsCompatible)
+ dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
+ else
+ dest = MappedDest(actualDestPtr, dest.size());
+ }
+ }
+};
+
+template<> struct trmv_selector<RowMajor>
+{
+ template<int Mode, typename Lhs, typename Rhs, typename Dest>
+ static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, const typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar& alpha)
+ {
+ typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType;
+ typedef typename ProductType::LhsScalar LhsScalar;
+ typedef typename ProductType::RhsScalar RhsScalar;
+ typedef typename ProductType::Scalar ResScalar;
+ typedef typename ProductType::Index Index;
+ typedef typename ProductType::ActualLhsType ActualLhsType;
+ typedef typename ProductType::ActualRhsType ActualRhsType;
+ typedef typename ProductType::_ActualRhsType _ActualRhsType;
+ typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
+ typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
+
+ typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+ typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+
+ ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
+ * RhsBlasTraits::extractScalarFactor(prod.rhs());
+
+ enum {
+ DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
+ };
+
+ gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
+
+ ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
+ DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
+
+ if(!DirectlyUseRhs)
+ {
+ #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+ int size = actualRhs.size();
+ EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+ #endif
+ Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+ }
+
+ internal::triangular_matrix_vector_product
+ <Index,Mode,
+ LhsScalar, LhsBlasTraits::NeedToConjugate,
+ RhsScalar, RhsBlasTraits::NeedToConjugate,
+ RowMajor>
+ ::run(actualLhs.rows(),actualLhs.cols(),
+ actualLhs.data(),actualLhs.outerStride(),
+ actualRhsPtr,1,
+ dest.data(),dest.innerStride(),
+ actualAlpha);
+ }
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRIANGULARMATRIXVECTOR_H
diff --git a/third_party/eigen3/Eigen/src/Core/products/TriangularMatrixVector_MKL.h b/third_party/eigen3/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
new file mode 100644
index 0000000000..09f110da71
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
@@ -0,0 +1,247 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ * Content : Eigen bindings to Intel(R) MKL
+ * Triangular matrix-vector product functionality based on ?TRMV.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
+#define EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
+
+namespace Eigen {
+
+namespace internal {
+
+/**********************************************************************
+* This file implements triangular matrix-vector multiplication using BLAS
+**********************************************************************/
+
+// trmv/hemv specialization
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder>
+struct triangular_matrix_vector_product_trmv :
+ triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,StorageOrder,BuiltIn> {};
+
+#define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \
+template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
+struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \
+ static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
+ const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
+ triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor>::run( \
+ _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
+ } \
+}; \
+template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
+struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor,Specialized> { \
+ static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
+ const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
+ triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor>::run( \
+ _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
+ } \
+};
+
+EIGEN_MKL_TRMV_SPECIALIZE(double)
+EIGEN_MKL_TRMV_SPECIALIZE(float)
+EIGEN_MKL_TRMV_SPECIALIZE(dcomplex)
+EIGEN_MKL_TRMV_SPECIALIZE(scomplex)
+
+// implements col-major: res += alpha * op(triangular) * vector
+#define EIGEN_MKL_TRMV_CM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
+struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \
+ enum { \
+ IsLower = (Mode&Lower) == Lower, \
+ SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
+ IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
+ IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
+ LowUp = IsLower ? Lower : Upper \
+ }; \
+ static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
+ const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
+ { \
+ if (ConjLhs || IsZeroDiag) { \
+ triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor,BuiltIn>::run( \
+ _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
+ return; \
+ }\
+ Index size = (std::min)(_rows,_cols); \
+ Index rows = IsLower ? _rows : size; \
+ Index cols = IsLower ? size : _cols; \
+\
+ typedef VectorX##EIGPREFIX VectorRhs; \
+ EIGTYPE *x, *y;\
+\
+/* Set x*/ \
+ Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \
+ VectorRhs x_tmp; \
+ if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
+ x = x_tmp.data(); \
+\
+/* Square part handling */\
+\
+ char trans, uplo, diag; \
+ MKL_INT m, n, lda, incx, incy; \
+ EIGTYPE const *a; \
+ MKLTYPE alpha_, beta_; \
+ assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
+ assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
+\
+/* Set m, n */ \
+ n = (MKL_INT)size; \
+ lda = lhsStride; \
+ incx = 1; \
+ incy = resIncr; \
+\
+/* Set uplo, trans and diag*/ \
+ trans = 'N'; \
+ uplo = IsLower ? 'L' : 'U'; \
+ diag = IsUnitDiag ? 'U' : 'N'; \
+\
+/* call ?TRMV*/ \
+ MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \
+\
+/* Add op(a_tr)rhs into res*/ \
+ MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \
+/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \
+ if (size<(std::max)(rows,cols)) { \
+ typedef Matrix<EIGTYPE, Dynamic, Dynamic> MatrixLhs; \
+ if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
+ x = x_tmp.data(); \
+ if (size<rows) { \
+ y = _res + size*resIncr; \
+ a = _lhs + size; \
+ m = rows-size; \
+ n = size; \
+ } \
+ else { \
+ x += size; \
+ y = _res; \
+ a = _lhs + size*lda; \
+ m = size; \
+ n = cols-size; \
+ } \
+ MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \
+ } \
+ } \
+};
+
+EIGEN_MKL_TRMV_CM(double, double, d, d)
+EIGEN_MKL_TRMV_CM(dcomplex, MKL_Complex16, cd, z)
+EIGEN_MKL_TRMV_CM(float, float, f, s)
+EIGEN_MKL_TRMV_CM(scomplex, MKL_Complex8, cf, c)
+
+// implements row-major: res += alpha * op(triangular) * vector
+#define EIGEN_MKL_TRMV_RM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
+struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \
+ enum { \
+ IsLower = (Mode&Lower) == Lower, \
+ SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
+ IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
+ IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
+ LowUp = IsLower ? Lower : Upper \
+ }; \
+ static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
+ const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
+ { \
+ if (IsZeroDiag) { \
+ triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor,BuiltIn>::run( \
+ _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
+ return; \
+ }\
+ Index size = (std::min)(_rows,_cols); \
+ Index rows = IsLower ? _rows : size; \
+ Index cols = IsLower ? size : _cols; \
+\
+ typedef VectorX##EIGPREFIX VectorRhs; \
+ EIGTYPE *x, *y;\
+\
+/* Set x*/ \
+ Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \
+ VectorRhs x_tmp; \
+ if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
+ x = x_tmp.data(); \
+\
+/* Square part handling */\
+\
+ char trans, uplo, diag; \
+ MKL_INT m, n, lda, incx, incy; \
+ EIGTYPE const *a; \
+ MKLTYPE alpha_, beta_; \
+ assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
+ assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
+\
+/* Set m, n */ \
+ n = (MKL_INT)size; \
+ lda = lhsStride; \
+ incx = 1; \
+ incy = resIncr; \
+\
+/* Set uplo, trans and diag*/ \
+ trans = ConjLhs ? 'C' : 'T'; \
+ uplo = IsLower ? 'U' : 'L'; \
+ diag = IsUnitDiag ? 'U' : 'N'; \
+\
+/* call ?TRMV*/ \
+ MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \
+\
+/* Add op(a_tr)rhs into res*/ \
+ MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \
+/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \
+ if (size<(std::max)(rows,cols)) { \
+ typedef Matrix<EIGTYPE, Dynamic, Dynamic> MatrixLhs; \
+ if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
+ x = x_tmp.data(); \
+ if (size<rows) { \
+ y = _res + size*resIncr; \
+ a = _lhs + size*lda; \
+ m = rows-size; \
+ n = size; \
+ } \
+ else { \
+ x += size; \
+ y = _res; \
+ a = _lhs + size; \
+ m = size; \
+ n = cols-size; \
+ } \
+ MKLPREFIX##gemv(&trans, &n, &m, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \
+ } \
+ } \
+};
+
+EIGEN_MKL_TRMV_RM(double, double, d, d)
+EIGEN_MKL_TRMV_RM(dcomplex, MKL_Complex16, cd, z)
+EIGEN_MKL_TRMV_RM(float, float, f, s)
+EIGEN_MKL_TRMV_RM(scomplex, MKL_Complex8, cf, c)
+
+} // end namespase internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
diff --git a/third_party/eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h b/third_party/eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h
new file mode 100644
index 0000000000..f5de67c59f
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h
@@ -0,0 +1,331 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_H
+#define EIGEN_TRIANGULAR_SOLVER_MATRIX_H
+
+namespace Eigen {
+
+namespace internal {
+
+// if the rhs is row major, let's transpose the product
+template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder>
+struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor>
+{
+ static void run(
+ Index size, Index cols,
+ const Scalar* tri, Index triStride,
+ Scalar* _other, Index otherStride,
+ level3_blocking<Scalar,Scalar>& blocking)
+ {
+ triangular_solve_matrix<
+ Scalar, Index, Side==OnTheLeft?OnTheRight:OnTheLeft,
+ (Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper),
+ NumTraits<Scalar>::IsComplex && Conjugate,
+ TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor>
+ ::run(size, cols, tri, triStride, _other, otherStride, blocking);
+ }
+};
+
+/* Optimized triangular solver with multiple right hand side and the triangular matrix on the left
+ */
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
+struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor>
+{
+ static EIGEN_DONT_INLINE void run(
+ Index size, Index otherSize,
+ const Scalar* _tri, Index triStride,
+ Scalar* _other, Index otherStride,
+ level3_blocking<Scalar,Scalar>& blocking);
+};
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
+EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor>::run(
+ Index size, Index otherSize,
+ const Scalar* _tri, Index triStride,
+ Scalar* _other, Index otherStride,
+ level3_blocking<Scalar,Scalar>& blocking)
+ {
+ Index cols = otherSize;
+
+ typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> TriMapper;
+ typedef blas_data_mapper<Scalar, Index, ColMajor> OtherMapper;
+ TriMapper tri(_tri, triStride);
+ OtherMapper other(_other, otherStride);
+
+ typedef gebp_traits<Scalar,Scalar> Traits;
+
+ enum {
+ SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
+ IsLower = (Mode&Lower) == Lower
+ };
+
+ Index kc = blocking.kc(); // cache block size along the K direction
+ Index mc = (std::min)(size,blocking.mc()); // cache block size along the M direction
+
+ std::size_t sizeA = kc*mc;
+ std::size_t sizeB = kc*cols;
+
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
+
+ conj_if<Conjugate> conj;
+ gebp_kernel<Scalar, Scalar, Index, OtherMapper, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
+ gemm_pack_lhs<Scalar, Index, TriMapper, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs;
+ gemm_pack_rhs<Scalar, Index, OtherMapper, Traits::nr, ColMajor, false, true> pack_rhs;
+
+ // the goal here is to subdivise the Rhs panels such that we keep some cache
+ // coherence when accessing the rhs elements
+ std::ptrdiff_t l1, l2, l3;
+ manage_caching_sizes(GetAction, &l1, &l2, &l3);
+ Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * otherStride) : 0;
+ subcols = std::max<Index>((subcols/Traits::nr)*Traits::nr, Traits::nr);
+
+ for(Index k2=IsLower ? 0 : size;
+ IsLower ? k2<size : k2>0;
+ IsLower ? k2+=kc : k2-=kc)
+ {
+ const Index actual_kc = (std::min)(IsLower ? size-k2 : k2, kc);
+
+ // We have selected and packed a big horizontal panel R1 of rhs. Let B be the packed copy of this panel,
+ // and R2 the remaining part of rhs. The corresponding vertical panel of lhs is split into
+ // A11 (the triangular part) and A21 the remaining rectangular part.
+ // Then the high level algorithm is:
+ // - B = R1 => general block copy (done during the next step)
+ // - R1 = A11^-1 B => tricky part
+ // - update B from the new R1 => actually this has to be performed continuously during the above step
+ // - R2 -= A21 * B => GEPP
+
+ // The tricky part: compute R1 = A11^-1 B while updating B from R1
+ // The idea is to split A11 into multiple small vertical panels.
+ // Each panel can be split into a small triangular part T1k which is processed without optimization,
+ // and the remaining small part T2k which is processed using gebp with appropriate block strides
+ for(Index j2=0; j2<cols; j2+=subcols)
+ {
+ Index actual_cols = (std::min)(cols-j2,subcols);
+ // for each small vertical panels [T1k^T, T2k^T]^T of lhs
+ for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth)
+ {
+ Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth);
+ // tr solve
+ for (Index k=0; k<actualPanelWidth; ++k)
+ {
+ // TODO write a small kernel handling this (can be shared with trsv)
+ Index i = IsLower ? k2+k1+k : k2-k1-k-1;
+ Index s = IsLower ? k2+k1 : i+1;
+ Index rs = actualPanelWidth - k - 1; // remaining size
+
+ Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i));
+ for (Index j=j2; j<j2+actual_cols; ++j)
+ {
+ if (TriStorageOrder==RowMajor)
+ {
+ Scalar b(0);
+ const Scalar* l = &tri(i,s);
+ Scalar* r = &other(s,j);
+ for (Index i3=0; i3<k; ++i3)
+ b += conj(l[i3]) * r[i3];
+
+ other(i,j) = (other(i,j) - b)*a;
+ }
+ else
+ {
+ Index s = IsLower ? i+1 : i-rs;
+ Scalar b = (other(i,j) *= a);
+ Scalar* r = &other(s,j);
+ const Scalar* l = &tri(s,i);
+ for (Index i3=0;i3<rs;++i3)
+ r[i3] -= b * conj(l[i3]);
+ }
+ }
+ }
+
+ Index lengthTarget = actual_kc-k1-actualPanelWidth;
+ Index startBlock = IsLower ? k2+k1 : k2-k1-actualPanelWidth;
+ Index blockBOffset = IsLower ? k1 : lengthTarget;
+
+ // update the respective rows of B from other
+ pack_rhs(blockB+actual_kc*j2, other.getSubMapper(startBlock,j2), actualPanelWidth, actual_cols, actual_kc, blockBOffset);
+
+ // GEBP
+ if (lengthTarget>0)
+ {
+ Index startTarget = IsLower ? k2+k1+actualPanelWidth : k2-actual_kc;
+
+ pack_lhs(blockA, tri.getSubMapper(startTarget,startBlock), actualPanelWidth, lengthTarget);
+
+ gebp_kernel(other.getSubMapper(startTarget,j2), blockA, blockB+actual_kc*j2, lengthTarget, actualPanelWidth, actual_cols, Scalar(-1),
+ actualPanelWidth, actual_kc, 0, blockBOffset);
+ }
+ }
+ }
+
+ // R2 -= A21 * B => GEPP
+ {
+ Index start = IsLower ? k2+kc : 0;
+ Index end = IsLower ? size : k2-kc;
+ for(Index i2=start; i2<end; i2+=mc)
+ {
+ const Index actual_mc = (std::min)(mc,end-i2);
+ if (actual_mc>0)
+ {
+ pack_lhs(blockA, tri.getSubMapper(i2, IsLower ? k2 : k2-kc), actual_kc, actual_mc);
+
+ gebp_kernel(other.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1), -1, -1, 0, 0);
+ }
+ }
+ }
+ }
+ }
+
+/* Optimized triangular solver with multiple left hand sides and the trinagular matrix on the right
+ */
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
+struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>
+{
+ static EIGEN_DONT_INLINE void run(
+ Index size, Index otherSize,
+ const Scalar* _tri, Index triStride,
+ Scalar* _other, Index otherStride,
+ level3_blocking<Scalar,Scalar>& blocking);
+};
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
+EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>::run(
+ Index size, Index otherSize,
+ const Scalar* _tri, Index triStride,
+ Scalar* _other, Index otherStride,
+ level3_blocking<Scalar,Scalar>& blocking)
+ {
+ Index rows = otherSize;
+
+ typedef blas_data_mapper<Scalar, Index, ColMajor> LhsMapper;
+ typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> RhsMapper;
+ LhsMapper lhs(_other, otherStride);
+ RhsMapper rhs(_tri, triStride);
+
+ typedef gebp_traits<Scalar,Scalar> Traits;
+ enum {
+ RhsStorageOrder = TriStorageOrder,
+ SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
+ IsLower = (Mode&Lower) == Lower
+ };
+
+ Index kc = blocking.kc(); // cache block size along the K direction
+ Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
+
+ std::size_t sizeA = kc*mc;
+ std::size_t sizeB = kc*size;
+
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
+
+ conj_if<Conjugate> conj;
+ gebp_kernel<Scalar, Scalar, Index, LhsMapper, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;
+ gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
+ gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder,false,true> pack_rhs_panel;
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, ColMajor, false, true> pack_lhs_panel;
+
+ for(Index k2=IsLower ? size : 0;
+ IsLower ? k2>0 : k2<size;
+ IsLower ? k2-=kc : k2+=kc)
+ {
+ const Index actual_kc = (std::min)(IsLower ? k2 : size-k2, kc);
+ Index actual_k2 = IsLower ? k2-actual_kc : k2 ;
+
+ Index startPanel = IsLower ? 0 : k2+actual_kc;
+ Index rs = IsLower ? actual_k2 : size - actual_k2 - actual_kc;
+ Scalar* geb = blockB+actual_kc*actual_kc;
+
+ if (rs>0) pack_rhs(geb, rhs.getSubMapper(actual_k2,startPanel), actual_kc, rs);
+
+ // triangular packing (we only pack the panels off the diagonal,
+ // neglecting the blocks overlapping the diagonal
+ {
+ for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
+ {
+ Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
+ Index actual_j2 = actual_k2 + j2;
+ Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
+ Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2;
+
+ if (panelLength>0)
+ pack_rhs_panel(blockB+j2*actual_kc,
+ rhs.getSubMapper(actual_k2+panelOffset, actual_j2),
+ panelLength, actualPanelWidth,
+ actual_kc, panelOffset);
+ }
+ }
+
+ for(Index i2=0; i2<rows; i2+=mc)
+ {
+ const Index actual_mc = (std::min)(mc,rows-i2);
+
+ // triangular solver kernel
+ {
+ // for each small block of the diagonal (=> vertical panels of rhs)
+ for (Index j2 = IsLower
+ ? (actual_kc - ((actual_kc%SmallPanelWidth) ? Index(actual_kc%SmallPanelWidth)
+ : Index(SmallPanelWidth)))
+ : 0;
+ IsLower ? j2>=0 : j2<actual_kc;
+ IsLower ? j2-=SmallPanelWidth : j2+=SmallPanelWidth)
+ {
+ Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
+ Index absolute_j2 = actual_k2 + j2;
+ Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
+ Index panelLength = IsLower ? actual_kc - j2 - actualPanelWidth : j2;
+
+ // GEBP
+ if(panelLength>0)
+ {
+ gebp_kernel(lhs.getSubMapper(i2,absolute_j2),
+ blockA, blockB+j2*actual_kc,
+ actual_mc, panelLength, actualPanelWidth,
+ Scalar(-1),
+ actual_kc, actual_kc, // strides
+ panelOffset, panelOffset); // offsets
+ }
+
+ // unblocked triangular solve
+ for (Index k=0; k<actualPanelWidth; ++k)
+ {
+ Index j = IsLower ? absolute_j2+actualPanelWidth-k-1 : absolute_j2+k;
+
+ Scalar* r = &lhs(i2,j);
+ for (Index k3=0; k3<k; ++k3)
+ {
+ Scalar b = conj(rhs(IsLower ? j+1+k3 : absolute_j2+k3,j));
+ Scalar* a = &lhs(i2,IsLower ? j+1+k3 : absolute_j2+k3);
+ for (Index i=0; i<actual_mc; ++i)
+ r[i] -= a[i] * b;
+ }
+ Scalar b = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(rhs(j,j));
+ for (Index i=0; i<actual_mc; ++i)
+ r[i] *= b;
+ }
+
+ // pack the just computed part of lhs to A
+ pack_lhs_panel(blockA, LhsMapper(_other+absolute_j2*otherStride+i2, otherStride),
+ actualPanelWidth, actual_mc,
+ actual_kc, j2);
+ }
+ }
+
+ if (rs>0)
+ gebp_kernel(lhs.getSubMapper(i2, startPanel), blockA, geb,
+ actual_mc, actual_kc, rs, Scalar(-1),
+ -1, -1, 0, 0);
+ }
+ }
+ }
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_H
diff --git a/third_party/eigen3/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h b/third_party/eigen3/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
new file mode 100644
index 0000000000..6a0bb83393
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
@@ -0,0 +1,155 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ * Content : Eigen bindings to Intel(R) MKL
+ * Triangular matrix * matrix product functionality based on ?TRMM.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
+#define EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
+
+namespace Eigen {
+
+namespace internal {
+
+// implements LeftSide op(triangular)^-1 * general
+#define EIGEN_MKL_TRSM_L(EIGTYPE, MKLTYPE, MKLPREFIX) \
+template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
+struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \
+{ \
+ enum { \
+ IsLower = (Mode&Lower) == Lower, \
+ IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
+ IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
+ conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
+ }; \
+ static void run( \
+ Index size, Index otherSize, \
+ const EIGTYPE* _tri, Index triStride, \
+ EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
+ { \
+ MKL_INT m = size, n = otherSize, lda, ldb; \
+ char side = 'L', uplo, diag='N', transa; \
+ /* Set alpha_ */ \
+ MKLTYPE alpha; \
+ EIGTYPE myone(1); \
+ assign_scalar_eig2mkl(alpha, myone); \
+ ldb = otherStride;\
+\
+ const EIGTYPE *a; \
+/* Set trans */ \
+ transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \
+/* Set uplo */ \
+ uplo = IsLower ? 'L' : 'U'; \
+ if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
+/* Set a, lda */ \
+ typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \
+ Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \
+ MatrixTri a_tmp; \
+\
+ if (conjA) { \
+ a_tmp = tri.conjugate(); \
+ a = a_tmp.data(); \
+ lda = a_tmp.outerStride(); \
+ } else { \
+ a = _tri; \
+ lda = triStride; \
+ } \
+ if (IsUnitDiag) diag='U'; \
+/* call ?trsm*/ \
+ MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \
+ } \
+};
+
+EIGEN_MKL_TRSM_L(double, double, d)
+EIGEN_MKL_TRSM_L(dcomplex, MKL_Complex16, z)
+EIGEN_MKL_TRSM_L(float, float, s)
+EIGEN_MKL_TRSM_L(scomplex, MKL_Complex8, c)
+
+
+// implements RightSide general * op(triangular)^-1
+#define EIGEN_MKL_TRSM_R(EIGTYPE, MKLTYPE, MKLPREFIX) \
+template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
+struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \
+{ \
+ enum { \
+ IsLower = (Mode&Lower) == Lower, \
+ IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
+ IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
+ conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
+ }; \
+ static void run( \
+ Index size, Index otherSize, \
+ const EIGTYPE* _tri, Index triStride, \
+ EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
+ { \
+ MKL_INT m = otherSize, n = size, lda, ldb; \
+ char side = 'R', uplo, diag='N', transa; \
+ /* Set alpha_ */ \
+ MKLTYPE alpha; \
+ EIGTYPE myone(1); \
+ assign_scalar_eig2mkl(alpha, myone); \
+ ldb = otherStride;\
+\
+ const EIGTYPE *a; \
+/* Set trans */ \
+ transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \
+/* Set uplo */ \
+ uplo = IsLower ? 'L' : 'U'; \
+ if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
+/* Set a, lda */ \
+ typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \
+ Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \
+ MatrixTri a_tmp; \
+\
+ if (conjA) { \
+ a_tmp = tri.conjugate(); \
+ a = a_tmp.data(); \
+ lda = a_tmp.outerStride(); \
+ } else { \
+ a = _tri; \
+ lda = triStride; \
+ } \
+ if (IsUnitDiag) diag='U'; \
+/* call ?trsm*/ \
+ MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \
+ /*std::cout << "TRMS_L specialization!\n";*/ \
+ } \
+};
+
+EIGEN_MKL_TRSM_R(double, double, d)
+EIGEN_MKL_TRSM_R(dcomplex, MKL_Complex16, z)
+EIGEN_MKL_TRSM_R(float, float, s)
+EIGEN_MKL_TRSM_R(scomplex, MKL_Complex8, c)
+
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
diff --git a/third_party/eigen3/Eigen/src/Core/products/TriangularSolverVector.h b/third_party/eigen3/Eigen/src/Core/products/TriangularSolverVector.h
new file mode 100644
index 0000000000..b994759b26
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/products/TriangularSolverVector.h
@@ -0,0 +1,145 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TRIANGULAR_SOLVER_VECTOR_H
+#define EIGEN_TRIANGULAR_SOLVER_VECTOR_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<typename LhsScalar, typename RhsScalar, typename Index, int Mode, bool Conjugate, int StorageOrder>
+struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheRight, Mode, Conjugate, StorageOrder>
+{
+ static void run(Index size, const LhsScalar* _lhs, Index lhsStride, RhsScalar* rhs)
+ {
+ triangular_solve_vector<LhsScalar,RhsScalar,Index,OnTheLeft,
+ ((Mode&Upper)==Upper ? Lower : Upper) | (Mode&UnitDiag),
+ Conjugate,StorageOrder==RowMajor?ColMajor:RowMajor
+ >::run(size, _lhs, lhsStride, rhs);
+ }
+};
+
+// forward and backward substitution, row-major, rhs is a vector
+template<typename LhsScalar, typename RhsScalar, typename Index, int Mode, bool Conjugate>
+struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Conjugate, RowMajor>
+{
+ enum {
+ IsLower = ((Mode&Lower)==Lower)
+ };
+ static void run(Index size, const LhsScalar* _lhs, Index lhsStride, RhsScalar* rhs)
+ {
+ typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,RowMajor>, 0, OuterStride<> > LhsMap;
+ const LhsMap lhs(_lhs,size,size,OuterStride<>(lhsStride));
+
+ typedef const_blas_data_mapper<LhsScalar,Index,RowMajor> LhsMapper;
+ typedef const_blas_data_mapper<RhsScalar,Index,ColMajor> RhsMapper;
+
+ typename internal::conditional<
+ Conjugate,
+ const CwiseUnaryOp<typename internal::scalar_conjugate_op<LhsScalar>,LhsMap>,
+ const LhsMap&>
+ ::type cjLhs(lhs);
+ static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
+ for(Index pi=IsLower ? 0 : size;
+ IsLower ? pi<size : pi>0;
+ IsLower ? pi+=PanelWidth : pi-=PanelWidth)
+ {
+ Index actualPanelWidth = (std::min)(IsLower ? size - pi : pi, PanelWidth);
+
+ Index r = IsLower ? pi : size - pi; // remaining size
+ if (r > 0)
+ {
+ // let's directly call the low level product function because:
+ // 1 - it is faster to compile
+ // 2 - it is slighlty faster at runtime
+ Index startRow = IsLower ? pi : pi-actualPanelWidth;
+ Index startCol = IsLower ? 0 : pi;
+
+ general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,Conjugate,RhsScalar,RhsMapper,false>::run(
+ actualPanelWidth, r,
+ LhsMapper(&lhs.coeffRef(startRow,startCol), lhsStride),
+ RhsMapper(rhs + startCol, 1),
+ rhs + startRow, 1,
+ RhsScalar(-1));
+ }
+
+ for(Index k=0; k<actualPanelWidth; ++k)
+ {
+ Index i = IsLower ? pi+k : pi-k-1;
+ Index s = IsLower ? pi : i+1;
+ if (k>0)
+ rhs[i] -= (cjLhs.row(i).segment(s,k).transpose().cwiseProduct(Map<const Matrix<RhsScalar,Dynamic,1> >(rhs+s,k))).sum();
+
+ if(!(Mode & UnitDiag))
+ rhs[i] /= cjLhs(i,i);
+ }
+ }
+ }
+};
+
+// forward and backward substitution, column-major, rhs is a vector
+template<typename LhsScalar, typename RhsScalar, typename Index, int Mode, bool Conjugate>
+struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Conjugate, ColMajor>
+{
+ enum {
+ IsLower = ((Mode&Lower)==Lower)
+ };
+ static void run(Index size, const LhsScalar* _lhs, Index lhsStride, RhsScalar* rhs)
+ {
+ typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > LhsMap;
+ const LhsMap lhs(_lhs,size,size,OuterStride<>(lhsStride));
+ typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
+ typedef const_blas_data_mapper<RhsScalar,Index,ColMajor> RhsMapper;
+ typename internal::conditional<Conjugate,
+ const CwiseUnaryOp<typename internal::scalar_conjugate_op<LhsScalar>,LhsMap>,
+ const LhsMap&
+ >::type cjLhs(lhs);
+ static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
+
+ for(Index pi=IsLower ? 0 : size;
+ IsLower ? pi<size : pi>0;
+ IsLower ? pi+=PanelWidth : pi-=PanelWidth)
+ {
+ Index actualPanelWidth = (std::min)(IsLower ? size - pi : pi, PanelWidth);
+ Index startBlock = IsLower ? pi : pi-actualPanelWidth;
+ Index endBlock = IsLower ? pi + actualPanelWidth : 0;
+
+ for(Index k=0; k<actualPanelWidth; ++k)
+ {
+ Index i = IsLower ? pi+k : pi-k-1;
+ if(!(Mode & UnitDiag))
+ rhs[i] /= cjLhs.coeff(i,i);
+
+ Index r = actualPanelWidth - k - 1; // remaining size
+ Index s = IsLower ? i+1 : i-r;
+ if (r>0)
+ Map<Matrix<RhsScalar,Dynamic,1> >(rhs+s,r) -= rhs[i] * cjLhs.col(i).segment(s,r);
+ }
+ Index r = IsLower ? size - endBlock : startBlock; // remaining size
+ if (r > 0)
+ {
+ // let's directly call the low level product function because:
+ // 1 - it is faster to compile
+ // 2 - it is slighlty faster at runtime
+ general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,Conjugate,RhsScalar,RhsMapper,false>::run(
+ r, actualPanelWidth,
+ LhsMapper(&lhs.coeffRef(endBlock,startBlock), lhsStride),
+ RhsMapper(rhs+startBlock, 1),
+ rhs+endBlock, 1, RhsScalar(-1));
+ }
+ }
+ }
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRIANGULAR_SOLVER_VECTOR_H
diff --git a/third_party/eigen3/Eigen/src/Core/util/BlasUtil.h b/third_party/eigen3/Eigen/src/Core/util/BlasUtil.h
new file mode 100644
index 0000000000..bbaff8dd0e
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/util/BlasUtil.h
@@ -0,0 +1,237 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_BLASUTIL_H
+#define EIGEN_BLASUTIL_H
+
+// This file contains many lightweight helper classes used to
+// implement and control fast level 2 and level 3 BLAS-like routines.
+
+namespace Eigen {
+
+namespace internal {
+
+// forward declarations
+template<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs=false, bool ConjugateRhs=false>
+struct gebp_kernel;
+
+template<typename Scalar, typename Index, typename DataMapper, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false>
+struct gemm_pack_rhs;
+
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, int StorageOrder, bool Conjugate = false, bool PanelMode = false>
+struct gemm_pack_lhs;
+
+template<
+ typename Index,
+ typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
+ typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
+ int ResStorageOrder>
+struct general_matrix_matrix_product;
+
+template<typename Index, typename LhsScalar, typename LhsMapper, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version=Specialized>
+struct general_matrix_vector_product;
+
+
+template<bool Conjugate> struct conj_if;
+
+template<> struct conj_if<true> {
+ template<typename T>
+ inline T operator()(const T& x) { return numext::conj(x); }
+ template<typename T>
+ inline T pconj(const T& x) { return internal::pconj(x); }
+};
+
+template<> struct conj_if<false> {
+ template<typename T>
+ inline const T& operator()(const T& x) { return x; }
+ template<typename T>
+ inline const T& pconj(const T& x) { return x; }
+};
+
+template<typename Scalar> struct conj_helper<Scalar,Scalar,false,false>
+{
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const { return internal::pmadd(x,y,c); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const { return internal::pmul(x,y); }
+};
+
+template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, false,true>
+{
+ typedef std::complex<RealScalar> Scalar;
+ EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
+ { return c + pmul(x,y); }
+
+ EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
+ { return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::imag(x)*numext::real(y) - numext::real(x)*numext::imag(y)); }
+};
+
+template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,false>
+{
+ typedef std::complex<RealScalar> Scalar;
+ EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
+ { return c + pmul(x,y); }
+
+ EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
+ { return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); }
+};
+
+template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,true>
+{
+ typedef std::complex<RealScalar> Scalar;
+ EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
+ { return c + pmul(x,y); }
+
+ EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
+ { return Scalar(numext::real(x)*numext::real(y) - numext::imag(x)*numext::imag(y), - numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); }
+};
+
+template<typename RealScalar,bool Conj> struct conj_helper<std::complex<RealScalar>, RealScalar, Conj,false>
+{
+ typedef std::complex<RealScalar> Scalar;
+ EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const RealScalar& y, const Scalar& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const RealScalar& y) const
+ { return conj_if<Conj>()(x)*y; }
+};
+
+template<typename RealScalar,bool Conj> struct conj_helper<RealScalar, std::complex<RealScalar>, false,Conj>
+{
+ typedef std::complex<RealScalar> Scalar;
+ EIGEN_STRONG_INLINE Scalar pmadd(const RealScalar& x, const Scalar& y, const Scalar& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Scalar pmul(const RealScalar& x, const Scalar& y) const
+ { return x*conj_if<Conj>()(y); }
+};
+
+template<typename From,typename To> struct get_factor {
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE To run(const From& x) { return x; }
+};
+
+template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::Real> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE typename NumTraits<Scalar>::Real run(const Scalar& x) { return numext::real(x); }
+};
+
+
+/* Helper class to analyze the factors of a Product expression.
+ * In particular it allows to pop out operator-, scalar multiples,
+ * and conjugate */
+template<typename XprType> struct blas_traits
+{
+ typedef typename traits<XprType>::Scalar Scalar;
+ typedef const XprType& ExtractType;
+ typedef XprType _ExtractType;
+ enum {
+ IsComplex = NumTraits<Scalar>::IsComplex,
+ IsTransposed = false,
+ NeedToConjugate = false,
+ HasUsableDirectAccess = ( (int(XprType::Flags)&DirectAccessBit)
+ && ( bool(XprType::IsVectorAtCompileTime)
+ || int(inner_stride_at_compile_time<XprType>::ret) == 1)
+ ) ? 1 : 0
+ };
+ typedef typename conditional<bool(HasUsableDirectAccess),
+ ExtractType,
+ typename _ExtractType::PlainObject
+ >::type DirectLinearAccessType;
+ static inline ExtractType extract(const XprType& x) { return x; }
+ static inline const Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
+};
+
+// pop conjugate
+template<typename Scalar, typename NestedXpr>
+struct blas_traits<CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> >
+ : blas_traits<NestedXpr>
+{
+ typedef blas_traits<NestedXpr> Base;
+ typedef CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> XprType;
+ typedef typename Base::ExtractType ExtractType;
+
+ enum {
+ IsComplex = NumTraits<Scalar>::IsComplex,
+ NeedToConjugate = Base::NeedToConjugate ? 0 : IsComplex
+ };
+ static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
+ static inline Scalar extractScalarFactor(const XprType& x) { return conj(Base::extractScalarFactor(x.nestedExpression())); }
+};
+
+// pop scalar multiple
+template<typename Scalar, typename NestedXpr>
+struct blas_traits<CwiseUnaryOp<scalar_multiple_op<Scalar>, NestedXpr> >
+ : blas_traits<NestedXpr>
+{
+ typedef blas_traits<NestedXpr> Base;
+ typedef CwiseUnaryOp<scalar_multiple_op<Scalar>, NestedXpr> XprType;
+ typedef typename Base::ExtractType ExtractType;
+ static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
+ static inline Scalar extractScalarFactor(const XprType& x)
+ { return x.functor().m_other * Base::extractScalarFactor(x.nestedExpression()); }
+};
+
+// pop opposite
+template<typename Scalar, typename NestedXpr>
+struct blas_traits<CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> >
+ : blas_traits<NestedXpr>
+{
+ typedef blas_traits<NestedXpr> Base;
+ typedef CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> XprType;
+ typedef typename Base::ExtractType ExtractType;
+ static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
+ static inline Scalar extractScalarFactor(const XprType& x)
+ { return - Base::extractScalarFactor(x.nestedExpression()); }
+};
+
+// pop/push transpose
+template<typename NestedXpr>
+struct blas_traits<Transpose<NestedXpr> >
+ : blas_traits<NestedXpr>
+{
+ typedef typename NestedXpr::Scalar Scalar;
+ typedef blas_traits<NestedXpr> Base;
+ typedef Transpose<NestedXpr> XprType;
+ typedef Transpose<const typename Base::_ExtractType> ExtractType; // const to get rid of a compile error; anyway blas traits are only used on the RHS
+ typedef Transpose<const typename Base::_ExtractType> _ExtractType;
+ typedef typename conditional<bool(Base::HasUsableDirectAccess),
+ ExtractType,
+ typename ExtractType::PlainObject
+ >::type DirectLinearAccessType;
+ enum {
+ IsTransposed = Base::IsTransposed ? 0 : 1
+ };
+ static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
+ static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); }
+};
+
+template<typename T>
+struct blas_traits<const T>
+ : blas_traits<T>
+{};
+
+template<typename T, bool HasUsableDirectAccess=blas_traits<T>::HasUsableDirectAccess>
+struct extract_data_selector {
+ static const typename T::Scalar* run(const T& m)
+ {
+ return blas_traits<T>::extract(m).data();
+ }
+};
+
+template<typename T>
+struct extract_data_selector<T,false> {
+ static typename T::Scalar* run(const T&) { return 0; }
+};
+
+template<typename T> const typename T::Scalar* extract_data(const T& m)
+{
+ return extract_data_selector<T>::run(m);
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_BLASUTIL_H
diff --git a/third_party/eigen3/Eigen/src/Core/util/Constants.h b/third_party/eigen3/Eigen/src/Core/util/Constants.h
new file mode 100644
index 0000000000..be14df0168
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/util/Constants.h
@@ -0,0 +1,453 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CONSTANTS_H
+#define EIGEN_CONSTANTS_H
+
+namespace Eigen {
+
+/** This value means that a positive quantity (e.g., a size) is not known at compile-time, and that instead the value is
+ * stored in some runtime variable.
+ *
+ * Changing the value of Dynamic breaks the ABI, as Dynamic is often used as a template parameter for Matrix.
+ */
+const int Dynamic = -1;
+
+/** This value means that a signed quantity (e.g., a signed index) is not known at compile-time, and that instead its value
+ * has to be specified at runtime.
+ */
+const int DynamicIndex = 0xffffff;
+
+/** This value means +Infinity; it is currently used only as the p parameter to MatrixBase::lpNorm<int>().
+ * The value Infinity there means the L-infinity norm.
+ */
+const int Infinity = -1;
+
+/** \defgroup flags Flags
+ * \ingroup Core_Module
+ *
+ * These are the possible bits which can be OR'ed to constitute the flags of a matrix or
+ * expression.
+ *
+ * It is important to note that these flags are a purely compile-time notion. They are a compile-time property of
+ * an expression type, implemented as enum's. They are not stored in memory at runtime, and they do not incur any
+ * runtime overhead.
+ *
+ * \sa MatrixBase::Flags
+ */
+
+/** \ingroup flags
+ *
+ * for a matrix, this means that the storage order is row-major.
+ * If this bit is not set, the storage order is column-major.
+ * For an expression, this determines the storage order of
+ * the matrix created by evaluation of that expression.
+ * \sa \ref TopicStorageOrders */
+const unsigned int RowMajorBit = 0x1;
+
+/** \ingroup flags
+ *
+ * means the expression should be evaluated by the calling expression */
+const unsigned int EvalBeforeNestingBit = 0x2;
+
+/** \ingroup flags
+ *
+ * means the expression should be evaluated before any assignment */
+const unsigned int EvalBeforeAssigningBit = 0x4;
+
+/** \ingroup flags
+ *
+ * Short version: means the expression might be vectorized
+ *
+ * Long version: means that the coefficients can be handled by packets
+ * and start at a memory location whose alignment meets the requirements
+ * of the present CPU architecture for optimized packet access. In the fixed-size
+ * case, there is the additional condition that it be possible to access all the
+ * coefficients by packets (this implies the requirement that the size be a multiple of 16 bytes,
+ * and that any nontrivial strides don't break the alignment). In the dynamic-size case,
+ * there is no such condition on the total size and strides, so it might not be possible to access
+ * all coeffs by packets.
+ *
+ * \note This bit can be set regardless of whether vectorization is actually enabled.
+ * To check for actual vectorizability, see \a ActualPacketAccessBit.
+ */
+const unsigned int PacketAccessBit = 0x8;
+
+#ifdef EIGEN_VECTORIZE
+/** \ingroup flags
+ *
+ * If vectorization is enabled (EIGEN_VECTORIZE is defined) this constant
+ * is set to the value \a PacketAccessBit.
+ *
+ * If vectorization is not enabled (EIGEN_VECTORIZE is not defined) this constant
+ * is set to the value 0.
+ */
+const unsigned int ActualPacketAccessBit = PacketAccessBit;
+#else
+const unsigned int ActualPacketAccessBit = 0x0;
+#endif
+
+/** \ingroup flags
+ *
+ * Short version: means the expression can be seen as 1D vector.
+ *
+ * Long version: means that one can access the coefficients
+ * of this expression by coeff(int), and coeffRef(int) in the case of a lvalue expression. These
+ * index-based access methods are guaranteed
+ * to not have to do any runtime computation of a (row, col)-pair from the index, so that it
+ * is guaranteed that whenever it is available, index-based access is at least as fast as
+ * (row,col)-based access. Expressions for which that isn't possible don't have the LinearAccessBit.
+ *
+ * If both PacketAccessBit and LinearAccessBit are set, then the
+ * packets of this expression can be accessed by packet(int), and writePacket(int) in the case of a
+ * lvalue expression.
+ *
+ * Typically, all vector expressions have the LinearAccessBit, but there is one exception:
+ * Product expressions don't have it, because it would be troublesome for vectorization, even when the
+ * Product is a vector expression. Thus, vector Product expressions allow index-based coefficient access but
+ * not index-based packet access, so they don't have the LinearAccessBit.
+ */
+const unsigned int LinearAccessBit = 0x10;
+
+/** \ingroup flags
+ *
+ * Means the expression has a coeffRef() method, i.e. is writable as its individual coefficients are directly addressable.
+ * This rules out read-only expressions.
+ *
+ * Note that DirectAccessBit and LvalueBit are mutually orthogonal, as there are examples of expression having one but note
+ * the other:
+ * \li writable expressions that don't have a very simple memory layout as a strided array, have LvalueBit but not DirectAccessBit
+ * \li Map-to-const expressions, for example Map<const Matrix>, have DirectAccessBit but not LvalueBit
+ *
+ * Expressions having LvalueBit also have their coeff() method returning a const reference instead of returning a new value.
+ */
+const unsigned int LvalueBit = 0x20;
+
+/** \ingroup flags
+ *
+ * Means that the underlying array of coefficients can be directly accessed as a plain strided array. The memory layout
+ * of the array of coefficients must be exactly the natural one suggested by rows(), cols(),
+ * outerStride(), innerStride(), and the RowMajorBit. This rules out expressions such as Diagonal, whose coefficients,
+ * though referencable, do not have such a regular memory layout.
+ *
+ * See the comment on LvalueBit for an explanation of how LvalueBit and DirectAccessBit are mutually orthogonal.
+ */
+const unsigned int DirectAccessBit = 0x40;
+
+/** \ingroup flags
+ *
+ * means the first coefficient packet is guaranteed to be aligned.
+ * An expression cannot has the AlignedBit without the PacketAccessBit flag.
+ * In other words, this means we are allow to perform an aligned packet access to the first element regardless
+ * of the expression kind:
+ * \code
+ * expression.packet<Aligned>(0);
+ * \endcode
+ */
+const unsigned int AlignedBit = 0x80;
+
+const unsigned int NestByRefBit = 0x100;
+
+// list of flags that are inherited by default
+const unsigned int HereditaryBits = RowMajorBit
+ | EvalBeforeNestingBit
+ | EvalBeforeAssigningBit;
+
+/** \defgroup enums Enumerations
+ * \ingroup Core_Module
+ *
+ * Various enumerations used in %Eigen. Many of these are used as template parameters.
+ */
+
+/** \ingroup enums
+ * Enum containing possible values for the \p Mode parameter of
+ * MatrixBase::selfadjointView() and MatrixBase::triangularView(). */
+enum {
+ /** View matrix as a lower triangular matrix. */
+ Lower=0x1,
+ /** View matrix as an upper triangular matrix. */
+ Upper=0x2,
+ /** %Matrix has ones on the diagonal; to be used in combination with #Lower or #Upper. */
+ UnitDiag=0x4,
+ /** %Matrix has zeros on the diagonal; to be used in combination with #Lower or #Upper. */
+ ZeroDiag=0x8,
+ /** View matrix as a lower triangular matrix with ones on the diagonal. */
+ UnitLower=UnitDiag|Lower,
+ /** View matrix as an upper triangular matrix with ones on the diagonal. */
+ UnitUpper=UnitDiag|Upper,
+ /** View matrix as a lower triangular matrix with zeros on the diagonal. */
+ StrictlyLower=ZeroDiag|Lower,
+ /** View matrix as an upper triangular matrix with zeros on the diagonal. */
+ StrictlyUpper=ZeroDiag|Upper,
+ /** Used in BandMatrix and SelfAdjointView to indicate that the matrix is self-adjoint. */
+ SelfAdjoint=0x10,
+ /** Used to support symmetric, non-selfadjoint, complex matrices. */
+ Symmetric=0x20
+};
+
+/** \ingroup enums
+ * Enum for indicating whether an object is aligned or not. */
+enum {
+ /** Object is not correctly aligned for vectorization. */
+ Unaligned=0,
+ /** Object is aligned for vectorization. */
+ Aligned=1
+};
+
+/** \ingroup enums
+ * Enum used by DenseBase::corner() in Eigen2 compatibility mode. */
+// FIXME after the corner() API change, this was not needed anymore, except by AlignedBox
+// TODO: find out what to do with that. Adapt the AlignedBox API ?
+enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight };
+
+/** \ingroup enums
+ * Enum containing possible values for the \p Direction parameter of
+ * Reverse, PartialReduxExpr and VectorwiseOp. */
+enum DirectionType {
+ /** For Reverse, all columns are reversed;
+ * for PartialReduxExpr and VectorwiseOp, act on columns. */
+ Vertical,
+ /** For Reverse, all rows are reversed;
+ * for PartialReduxExpr and VectorwiseOp, act on rows. */
+ Horizontal,
+ /** For Reverse, both rows and columns are reversed;
+ * not used for PartialReduxExpr and VectorwiseOp. */
+ BothDirections
+};
+
+/** \internal \ingroup enums
+ * Enum to specify how to traverse the entries of a matrix. */
+enum {
+ /** \internal Default traversal, no vectorization, no index-based access */
+ DefaultTraversal,
+ /** \internal No vectorization, use index-based access to have only one for loop instead of 2 nested loops */
+ LinearTraversal,
+ /** \internal Equivalent to a slice vectorization for fixed-size matrices having good alignment
+ * and good size */
+ InnerVectorizedTraversal,
+ /** \internal Vectorization path using a single loop plus scalar loops for the
+ * unaligned boundaries */
+ LinearVectorizedTraversal,
+ /** \internal Generic vectorization path using one vectorized loop per row/column with some
+ * scalar loops to handle the unaligned boundaries */
+ SliceVectorizedTraversal,
+ /** \internal Special case to properly handle incompatible scalar types or other defecting cases*/
+ InvalidTraversal,
+ /** \internal Evaluate all entries at once */
+ AllAtOnceTraversal
+};
+
+/** \internal \ingroup enums
+ * Enum to specify whether to unroll loops when traversing over the entries of a matrix. */
+enum {
+ /** \internal Do not unroll loops. */
+ NoUnrolling,
+ /** \internal Unroll only the inner loop, but not the outer loop. */
+ InnerUnrolling,
+ /** \internal Unroll both the inner and the outer loop. If there is only one loop,
+ * because linear traversal is used, then unroll that loop. */
+ CompleteUnrolling
+};
+
+/** \internal \ingroup enums
+ * Enum to specify whether to use the default (built-in) implementation or the specialization. */
+enum {
+ Specialized,
+ BuiltIn
+};
+
+/** \ingroup enums
+ * Enum containing possible values for the \p _Options template parameter of
+ * Matrix, Array and BandMatrix. */
+enum {
+ /** Storage order is column major (see \ref TopicStorageOrders). */
+ ColMajor = 0,
+ /** Storage order is row major (see \ref TopicStorageOrders). */
+ RowMajor = 0x1, // it is only a coincidence that this is equal to RowMajorBit -- don't rely on that
+ /** Align the matrix itself if it is vectorizable fixed-size */
+ AutoAlign = 0,
+ /** Don't require alignment for the matrix itself (the array of coefficients, if dynamically allocated, may still be requested to be aligned) */ // FIXME --- clarify the situation
+ DontAlign = 0x2,
+ AllocateDefault = 0,
+ AllocateUVM = 0x8
+};
+
+/** \ingroup enums
+ * Enum for specifying whether to apply or solve on the left or right. */
+enum {
+ /** Apply transformation on the left. */
+ OnTheLeft = 1,
+ /** Apply transformation on the right. */
+ OnTheRight = 2
+};
+
+/* the following used to be written as:
+ *
+ * struct NoChange_t {};
+ * namespace {
+ * EIGEN_UNUSED NoChange_t NoChange;
+ * }
+ *
+ * on the ground that it feels dangerous to disambiguate overloaded functions on enum/integer types.
+ * However, this leads to "variable declared but never referenced" warnings on Intel Composer XE,
+ * and we do not know how to get rid of them (bug 450).
+ */
+
+enum NoChange_t { NoChange };
+enum Sequential_t { Sequential };
+enum Default_t { Default };
+
+/** \internal \ingroup enums
+ * Used in AmbiVector. */
+enum {
+ IsDense = 0,
+ IsSparse
+};
+
+/** \ingroup enums
+ * Used as template parameter in DenseCoeffBase and MapBase to indicate
+ * which accessors should be provided. */
+enum AccessorLevels {
+ /** Read-only access via a member function. */
+ ReadOnlyAccessors,
+ /** Read/write access via member functions. */
+ WriteAccessors,
+ /** Direct read-only access to the coefficients. */
+ DirectAccessors,
+ /** Direct read/write access to the coefficients. */
+ DirectWriteAccessors
+};
+
+/** \ingroup enums
+ * Enum with options to give to various decompositions. */
+enum DecompositionOptions {
+ /** \internal Not used (meant for LDLT?). */
+ Pivoting = 0x01,
+ /** \internal Not used (meant for LDLT?). */
+ NoPivoting = 0x02,
+ /** Used in JacobiSVD to indicate that the square matrix U is to be computed. */
+ ComputeFullU = 0x04,
+ /** Used in JacobiSVD to indicate that the thin matrix U is to be computed. */
+ ComputeThinU = 0x08,
+ /** Used in JacobiSVD to indicate that the square matrix V is to be computed. */
+ ComputeFullV = 0x10,
+ /** Used in JacobiSVD to indicate that the thin matrix V is to be computed. */
+ ComputeThinV = 0x20,
+ /** Used in SelfAdjointEigenSolver and GeneralizedSelfAdjointEigenSolver to specify
+ * that only the eigenvalues are to be computed and not the eigenvectors. */
+ EigenvaluesOnly = 0x40,
+ /** Used in SelfAdjointEigenSolver and GeneralizedSelfAdjointEigenSolver to specify
+ * that both the eigenvalues and the eigenvectors are to be computed. */
+ ComputeEigenvectors = 0x80,
+ /** \internal */
+ EigVecMask = EigenvaluesOnly | ComputeEigenvectors,
+ /** Used in GeneralizedSelfAdjointEigenSolver to indicate that it should
+ * solve the generalized eigenproblem \f$ Ax = \lambda B x \f$. */
+ Ax_lBx = 0x100,
+ /** Used in GeneralizedSelfAdjointEigenSolver to indicate that it should
+ * solve the generalized eigenproblem \f$ ABx = \lambda x \f$. */
+ ABx_lx = 0x200,
+ /** Used in GeneralizedSelfAdjointEigenSolver to indicate that it should
+ * solve the generalized eigenproblem \f$ BAx = \lambda x \f$. */
+ BAx_lx = 0x400,
+ /** \internal */
+ GenEigMask = Ax_lBx | ABx_lx | BAx_lx
+};
+
+/** \ingroup enums
+ * Possible values for the \p QRPreconditioner template parameter of JacobiSVD. */
+enum QRPreconditioners {
+ /** Do not specify what is to be done if the SVD of a non-square matrix is asked for. */
+ NoQRPreconditioner,
+ /** Use a QR decomposition without pivoting as the first step. */
+ HouseholderQRPreconditioner,
+ /** Use a QR decomposition with column pivoting as the first step. */
+ ColPivHouseholderQRPreconditioner,
+ /** Use a QR decomposition with full pivoting as the first step. */
+ FullPivHouseholderQRPreconditioner
+};
+
+#ifdef Success
+#error The preprocessor symbol 'Success' is defined, possibly by the X11 header file X.h
+#endif
+
+/** \ingroup enums
+ * Enum for reporting the status of a computation. */
+enum ComputationInfo {
+ /** Computation was successful. */
+ Success = 0,
+ /** The provided data did not satisfy the prerequisites. */
+ NumericalIssue = 1,
+ /** Iterative procedure did not converge. */
+ NoConvergence = 2,
+ /** The inputs are invalid, or the algorithm has been improperly called.
+ * When assertions are enabled, such errors trigger an assert. */
+ InvalidInput = 3
+};
+
+/** \ingroup enums
+ * Enum used to specify how a particular transformation is stored in a matrix.
+ * \sa Transform, Hyperplane::transform(). */
+enum TransformTraits {
+ /** Transformation is an isometry. */
+ Isometry = 0x1,
+ /** Transformation is an affine transformation stored as a (Dim+1)^2 matrix whose last row is
+ * assumed to be [0 ... 0 1]. */
+ Affine = 0x2,
+ /** Transformation is an affine transformation stored as a (Dim) x (Dim+1) matrix. */
+ AffineCompact = 0x10 | Affine,
+ /** Transformation is a general projective transformation stored as a (Dim+1)^2 matrix. */
+ Projective = 0x20
+};
+
+/** \internal \ingroup enums
+ * Enum used to choose between implementation depending on the computer architecture. */
+namespace Architecture
+{
+ enum Type {
+ Generic = 0x0,
+ SSE = 0x1,
+ AltiVec = 0x2,
+ VSX = 0x3,
+ NEON = 0x4,
+#if defined EIGEN_VECTORIZE_SSE
+ Target = SSE
+#elif defined EIGEN_VECTORIZE_ALTIVEC
+ Target = AltiVec
+#elif defined EIGEN_VECTORIZE_VSX
+ Target = VSX
+#elif defined EIGEN_VECTORIZE_NEON
+ Target = NEON
+#else
+ Target = Generic
+#endif
+ };
+}
+
+/** \internal \ingroup enums
+ * Enum used as template parameter in GeneralProduct. */
+enum { CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct };
+
+/** \internal \ingroup enums
+ * Enum used in experimental parallel implementation. */
+enum Action {GetAction, SetAction};
+
+/** The type used to identify a dense storage. */
+struct Dense {};
+
+/** The type used to identify a matrix expression */
+struct MatrixXpr {};
+
+/** The type used to identify an array expression */
+struct ArrayXpr {};
+
+} // end namespace Eigen
+
+#endif // EIGEN_CONSTANTS_H
diff --git a/third_party/eigen3/Eigen/src/Core/util/DisableStupidWarnings.h b/third_party/eigen3/Eigen/src/Core/util/DisableStupidWarnings.h
new file mode 100644
index 0000000000..6a0bf0629c
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/util/DisableStupidWarnings.h
@@ -0,0 +1,40 @@
+#ifndef EIGEN_WARNINGS_DISABLED
+#define EIGEN_WARNINGS_DISABLED
+
+#ifdef _MSC_VER
+ // 4100 - unreferenced formal parameter (occurred e.g. in aligned_allocator::destroy(pointer p))
+ // 4101 - unreferenced local variable
+ // 4127 - conditional expression is constant
+ // 4181 - qualifier applied to reference type ignored
+ // 4211 - nonstandard extension used : redefined extern to static
+ // 4244 - 'argument' : conversion from 'type1' to 'type2', possible loss of data
+ // 4273 - QtAlignedMalloc, inconsistent DLL linkage
+ // 4324 - structure was padded due to declspec(align())
+ // 4512 - assignment operator could not be generated
+ // 4522 - 'class' : multiple assignment operators specified
+ // 4700 - uninitialized local variable 'xyz' used
+ // 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow
+ #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
+ #pragma warning( push )
+ #endif
+ #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4512 4522 4700 4717 )
+#elif defined __INTEL_COMPILER
+ // 2196 - routine is both "inline" and "noinline" ("noinline" assumed)
+ // ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body
+ // typedef that may be a reference type.
+ // 279 - controlling expression is constant
+ // ICC 12 generates this warning on assert(constant_expression_depending_on_template_params) and frankly this is a legitimate use case.
+ #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
+ #pragma warning push
+ #endif
+ #pragma warning disable 2196 279
+#elif defined __clang__
+ // -Wconstant-logical-operand - warning: use of logical && with constant operand; switch to bitwise & or remove constant
+ // this is really a stupid warning as it warns on compile-time expressions involving enums
+ #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
+ #pragma clang diagnostic push
+ #endif
+ #pragma clang diagnostic ignored "-Wconstant-logical-operand"
+#endif
+
+#endif // not EIGEN_WARNINGS_DISABLED
diff --git a/third_party/eigen3/Eigen/src/Core/util/ForwardDeclarations.h b/third_party/eigen3/Eigen/src/Core/util/ForwardDeclarations.h
new file mode 100644
index 0000000000..be39d731ad
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/util/ForwardDeclarations.h
@@ -0,0 +1,301 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_FORWARDDECLARATIONS_H
+#define EIGEN_FORWARDDECLARATIONS_H
+
+namespace Eigen {
+namespace internal {
+
+template<typename T> struct traits;
+
+// here we say once and for all that traits<const T> == traits<T>
+// When constness must affect traits, it has to be constness on template parameters on which T itself depends.
+// For example, traits<Map<const T> > != traits<Map<T> >, but
+// traits<const Map<T> > == traits<Map<T> >
+template<typename T> struct traits<const T> : traits<T> {};
+
+template<typename Derived> struct has_direct_access
+{
+ enum { ret = (traits<Derived>::Flags & DirectAccessBit) ? 1 : 0 };
+};
+
+template<typename Derived> struct accessors_level
+{
+ enum { has_direct_access = (traits<Derived>::Flags & DirectAccessBit) ? 1 : 0,
+ has_write_access = (traits<Derived>::Flags & LvalueBit) ? 1 : 0,
+ value = has_direct_access ? (has_write_access ? DirectWriteAccessors : DirectAccessors)
+ : (has_write_access ? WriteAccessors : ReadOnlyAccessors)
+ };
+};
+
+} // end namespace internal
+
+template<typename T> struct NumTraits;
+
+template<typename Derived> struct EigenBase;
+template<typename Derived> class DenseBase;
+template<typename Derived> class PlainObjectBase;
+
+
+template<typename Derived,
+ int Level = internal::accessors_level<Derived>::value >
+class DenseCoeffsBase;
+
+template<typename _Scalar, int _Rows, int _Cols,
+ int _Options = AutoAlign |
+#if EIGEN_GNUC_AT(3,4)
+ // workaround a bug in at least gcc 3.4.6
+ // the innermost ?: ternary operator is misparsed. We write it slightly
+ // differently and this makes gcc 3.4.6 happy, but it's ugly.
+ // The error would only show up with EIGEN_DEFAULT_TO_ROW_MAJOR is defined
+ // (when EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION is RowMajor)
+ ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
+ : !(_Cols==1 && _Rows!=1) ? EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION
+ : Eigen::ColMajor ),
+#else
+ ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
+ : (_Cols==1 && _Rows!=1) ? Eigen::ColMajor
+ : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),
+#endif
+ int _MaxRows = _Rows,
+ int _MaxCols = _Cols
+> class Matrix;
+
+template<typename Derived> class MatrixBase;
+template<typename Derived> class ArrayBase;
+
+template<typename ExpressionType, unsigned int Added, unsigned int Removed> class Flagged;
+template<typename ExpressionType, template <typename> class StorageBase > class NoAlias;
+template<typename ExpressionType> class NestByValue;
+template<typename ExpressionType> class ForceAlignedAccess;
+template<typename ExpressionType> class SwapWrapper;
+
+template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false> class Block;
+
+template<typename MatrixType, int Size=Dynamic> class VectorBlock;
+template<typename MatrixType> class Transpose;
+template<typename MatrixType> class Conjugate;
+template<typename NullaryOp, typename MatrixType> class CwiseNullaryOp;
+template<typename UnaryOp, typename MatrixType> class CwiseUnaryOp;
+template<typename ViewOp, typename MatrixType> class CwiseUnaryView;
+template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
+template<typename BinOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp;
+template<typename Derived, typename Lhs, typename Rhs> class ProductBase;
+template<typename Lhs, typename Rhs> class Product;
+template<typename Lhs, typename Rhs, int Mode> class GeneralProduct;
+template<typename Lhs, typename Rhs, int NestingFlags> class CoeffBasedProduct;
+
+template<typename Derived> class DiagonalBase;
+template<typename _DiagonalVectorType> class DiagonalWrapper;
+template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime=SizeAtCompileTime> class DiagonalMatrix;
+template<typename MatrixType, typename DiagonalType, int ProductOrder> class DiagonalProduct;
+template<typename MatrixType, int Index = 0> class Diagonal;
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime, typename IndexType=int> class PermutationMatrix;
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime, typename IndexType=int> class Transpositions;
+template<typename Derived> class PermutationBase;
+template<typename Derived> class TranspositionsBase;
+template<typename _IndicesType> class PermutationWrapper;
+template<typename _IndicesType> class TranspositionsWrapper;
+
+template<typename Derived,
+ int Level = internal::accessors_level<Derived>::has_write_access ? WriteAccessors : ReadOnlyAccessors
+> class MapBase;
+template<int InnerStrideAtCompileTime, int OuterStrideAtCompileTime> class Stride;
+template<typename MatrixType, int MapOptions=Unaligned, typename StrideType = Stride<0,0> > class Map;
+
+template<typename Derived> class TriangularBase;
+template<typename MatrixType, unsigned int Mode> class TriangularView;
+template<typename MatrixType, unsigned int Mode> class SelfAdjointView;
+template<typename MatrixType> class SparseView;
+template<typename ExpressionType> class WithFormat;
+template<typename MatrixType> struct CommaInitializer;
+template<typename Derived> class ReturnByValue;
+template<typename ExpressionType> class ArrayWrapper;
+template<typename ExpressionType> class MatrixWrapper;
+
+namespace internal {
+template<typename DecompositionType, typename Rhs> struct solve_retval_base;
+template<typename DecompositionType, typename Rhs> struct solve_retval;
+template<typename DecompositionType> struct kernel_retval_base;
+template<typename DecompositionType> struct kernel_retval;
+template<typename DecompositionType> struct image_retval_base;
+template<typename DecompositionType> struct image_retval;
+} // end namespace internal
+
+namespace internal {
+template<typename _Scalar, int Rows=Dynamic, int Cols=Dynamic, int Supers=Dynamic, int Subs=Dynamic, int Options=0> class BandMatrix;
+}
+
+namespace internal {
+template<typename Lhs, typename Rhs> struct product_type;
+}
+
+template<typename Lhs, typename Rhs,
+ int ProductType = internal::product_type<Lhs,Rhs>::value>
+struct ProductReturnType;
+
+// this is a workaround for sun CC
+template<typename Lhs, typename Rhs> struct LazyProductReturnType;
+
+namespace internal {
+
+// Provides scalar/packet-wise product and product with accumulation
+// with optional conjugation of the arguments.
+template<typename LhsScalar, typename RhsScalar, bool ConjLhs=false, bool ConjRhs=false> struct conj_helper;
+
+template<typename Scalar> struct scalar_sum_op;
+template<typename Scalar> struct scalar_difference_op;
+template<typename LhsScalar,typename RhsScalar> struct scalar_conj_product_op;
+template<typename Scalar> struct scalar_opposite_op;
+template<typename Scalar> struct scalar_conjugate_op;
+template<typename Scalar> struct scalar_real_op;
+template<typename Scalar> struct scalar_imag_op;
+template<typename Scalar> struct scalar_abs_op;
+template<typename Scalar> struct scalar_abs2_op;
+template<typename Scalar> struct scalar_sqrt_op;
+template<typename Scalar> struct scalar_rsqrt_op;
+template<typename Scalar> struct scalar_exp_op;
+template<typename Scalar> struct scalar_log_op;
+template<typename Scalar> struct scalar_cos_op;
+template<typename Scalar> struct scalar_sin_op;
+template<typename Scalar> struct scalar_acos_op;
+template<typename Scalar> struct scalar_asin_op;
+template<typename Scalar> struct scalar_tan_op;
+template<typename Scalar> struct scalar_pow_op;
+template<typename Scalar> struct scalar_inverse_op;
+template<typename Scalar> struct scalar_square_op;
+template<typename Scalar> struct scalar_cube_op;
+template<typename Scalar, typename NewType> struct scalar_cast_op;
+template<typename Scalar> struct scalar_multiple_op;
+template<typename Scalar> struct scalar_quotient1_op;
+template<typename Scalar> struct scalar_min_op;
+template<typename Scalar> struct scalar_max_op;
+template<typename Scalar> struct scalar_random_op;
+template<typename Scalar> struct scalar_add_op;
+template<typename Scalar> struct scalar_constant_op;
+template<typename Scalar> struct scalar_identity_op;
+
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op;
+template<typename LhsScalar,typename RhsScalar> struct scalar_multiple2_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_quotient_op;
+
+} // end namespace internal
+
+struct IOFormat;
+
+// Array module
+template<typename _Scalar, int _Rows, int _Cols,
+ int _Options = AutoAlign |
+#if EIGEN_GNUC_AT(3,4)
+ // workaround a bug in at least gcc 3.4.6
+ // the innermost ?: ternary operator is misparsed. We write it slightly
+ // differently and this makes gcc 3.4.6 happy, but it's ugly.
+ // The error would only show up with EIGEN_DEFAULT_TO_ROW_MAJOR is defined
+ // (when EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION is RowMajor)
+ ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
+ : !(_Cols==1 && _Rows!=1) ? EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION
+ : Eigen::ColMajor ),
+#else
+ ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
+ : (_Cols==1 && _Rows!=1) ? Eigen::ColMajor
+ : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),
+#endif
+ int _MaxRows = _Rows, int _MaxCols = _Cols> class Array;
+template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType> class Select;
+template<typename MatrixType, typename BinaryOp, int Direction> class PartialReduxExpr;
+template<typename ExpressionType, int Direction> class VectorwiseOp;
+template<typename MatrixType,int RowFactor,int ColFactor> class Replicate;
+template<typename MatrixType, int Direction = BothDirections> class Reverse;
+
+template<typename MatrixType> class FullPivLU;
+template<typename MatrixType> class PartialPivLU;
+namespace internal {
+template<typename MatrixType> struct inverse_impl;
+}
+template<typename MatrixType> class HouseholderQR;
+template<typename MatrixType> class ColPivHouseholderQR;
+template<typename MatrixType> class FullPivHouseholderQR;
+template<typename MatrixType, int QRPreconditioner = ColPivHouseholderQRPreconditioner> class JacobiSVD;
+template<typename MatrixType, int UpLo = Lower> class LLT;
+template<typename MatrixType, int UpLo = Lower> class LDLT;
+template<typename VectorsType, typename CoeffsType, int Side=OnTheLeft> class HouseholderSequence;
+template<typename Scalar> class JacobiRotation;
+
+// Geometry module:
+template<typename Derived, int _Dim> class RotationBase;
+template<typename Lhs, typename Rhs> class Cross;
+template<typename Derived> class QuaternionBase;
+template<typename Scalar> class Rotation2D;
+template<typename Scalar> class AngleAxis;
+template<typename Scalar,int Dim> class Translation;
+
+#ifdef EIGEN2_SUPPORT
+template<typename Derived, int _Dim> class eigen2_RotationBase;
+template<typename Lhs, typename Rhs> class eigen2_Cross;
+template<typename Scalar> class eigen2_Quaternion;
+template<typename Scalar> class eigen2_Rotation2D;
+template<typename Scalar> class eigen2_AngleAxis;
+template<typename Scalar,int Dim> class eigen2_Transform;
+template <typename _Scalar, int _AmbientDim> class eigen2_ParametrizedLine;
+template <typename _Scalar, int _AmbientDim> class eigen2_Hyperplane;
+template<typename Scalar,int Dim> class eigen2_Translation;
+template<typename Scalar,int Dim> class eigen2_Scaling;
+#endif
+
+#if EIGEN2_SUPPORT_STAGE < STAGE20_RESOLVE_API_CONFLICTS
+template<typename Scalar> class Quaternion;
+template<typename Scalar,int Dim> class Transform;
+template <typename _Scalar, int _AmbientDim> class ParametrizedLine;
+template <typename _Scalar, int _AmbientDim> class Hyperplane;
+template<typename Scalar,int Dim> class Scaling;
+#endif
+
+#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
+template<typename Scalar, int Options = AutoAlign> class Quaternion;
+template<typename Scalar,int Dim,int Mode,int _Options=AutoAlign> class Transform;
+template <typename _Scalar, int _AmbientDim, int Options=AutoAlign> class ParametrizedLine;
+template <typename _Scalar, int _AmbientDim, int Options=AutoAlign> class Hyperplane;
+template<typename Scalar> class UniformScaling;
+template<typename MatrixType,int Direction> class Homogeneous;
+#endif
+
+// MatrixFunctions module
+template<typename Derived> struct MatrixExponentialReturnValue;
+template<typename Derived> class MatrixFunctionReturnValue;
+template<typename Derived> class MatrixSquareRootReturnValue;
+template<typename Derived> class MatrixLogarithmReturnValue;
+template<typename Derived> class MatrixPowerReturnValue;
+template<typename Derived> class MatrixComplexPowerReturnValue;
+
+namespace internal {
+template <typename Scalar>
+struct stem_function
+{
+ typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
+ typedef ComplexScalar type(ComplexScalar, int);
+};
+}
+
+
+#ifdef EIGEN2_SUPPORT
+template<typename ExpressionType> class Cwise;
+template<typename MatrixType> class Minor;
+template<typename MatrixType> class LU;
+template<typename MatrixType> class QR;
+template<typename MatrixType> class SVD;
+namespace internal {
+template<typename MatrixType, unsigned int Mode> struct eigen2_part_return_type;
+}
+#endif
+
+} // end namespace Eigen
+
+#endif // EIGEN_FORWARDDECLARATIONS_H
diff --git a/third_party/eigen3/Eigen/src/Core/util/MKL_support.h b/third_party/eigen3/Eigen/src/Core/util/MKL_support.h
new file mode 100644
index 0000000000..8acca9c8c5
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/util/MKL_support.h
@@ -0,0 +1,126 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ * Content : Eigen bindings to Intel(R) MKL
+ * Include file with common MKL declarations
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_MKL_SUPPORT_H
+#define EIGEN_MKL_SUPPORT_H
+
+#ifdef EIGEN_USE_MKL_ALL
+ #ifndef EIGEN_USE_BLAS
+ #define EIGEN_USE_BLAS
+ #endif
+ #ifndef EIGEN_USE_LAPACKE
+ #define EIGEN_USE_LAPACKE
+ #endif
+ #ifndef EIGEN_USE_MKL_VML
+ #define EIGEN_USE_MKL_VML
+ #endif
+#endif
+
+#ifdef EIGEN_USE_LAPACKE_STRICT
+ #define EIGEN_USE_LAPACKE
+#endif
+
+#if defined(EIGEN_USE_BLAS) || defined(EIGEN_USE_LAPACKE) || defined(EIGEN_USE_MKL_VML)
+ #define EIGEN_USE_MKL
+#endif
+
+#if defined EIGEN_USE_MKL
+# include <mkl.h>
+/*Check IMKL version for compatibility: < 10.3 is not usable with Eigen*/
+# ifndef INTEL_MKL_VERSION
+# undef EIGEN_USE_MKL /* INTEL_MKL_VERSION is not even defined on older versions */
+# elif INTEL_MKL_VERSION < 100305 /* the intel-mkl-103-release-notes say this was when the lapacke.h interface was added*/
+# undef EIGEN_USE_MKL
+# endif
+# ifndef EIGEN_USE_MKL
+ /*If the MKL version is too old, undef everything*/
+# undef EIGEN_USE_MKL_ALL
+# undef EIGEN_USE_BLAS
+# undef EIGEN_USE_LAPACKE
+# undef EIGEN_USE_MKL_VML
+# undef EIGEN_USE_LAPACKE_STRICT
+# undef EIGEN_USE_LAPACKE
+# endif
+#endif
+
+#if defined EIGEN_USE_MKL
+#include <mkl_lapacke.h>
+#define EIGEN_MKL_VML_THRESHOLD 128
+
+namespace Eigen {
+
+typedef std::complex<double> dcomplex;
+typedef std::complex<float> scomplex;
+
+namespace internal {
+
+template<typename MKLType, typename EigenType>
+static inline void assign_scalar_eig2mkl(MKLType& mklScalar, const EigenType& eigenScalar) {
+ mklScalar=eigenScalar;
+}
+
+template<typename MKLType, typename EigenType>
+static inline void assign_conj_scalar_eig2mkl(MKLType& mklScalar, const EigenType& eigenScalar) {
+ mklScalar=eigenScalar;
+}
+
+template <>
+inline void assign_scalar_eig2mkl<MKL_Complex16,dcomplex>(MKL_Complex16& mklScalar, const dcomplex& eigenScalar) {
+ mklScalar.real=eigenScalar.real();
+ mklScalar.imag=eigenScalar.imag();
+}
+
+template <>
+inline void assign_scalar_eig2mkl<MKL_Complex8,scomplex>(MKL_Complex8& mklScalar, const scomplex& eigenScalar) {
+ mklScalar.real=eigenScalar.real();
+ mklScalar.imag=eigenScalar.imag();
+}
+
+template <>
+inline void assign_conj_scalar_eig2mkl<MKL_Complex16,dcomplex>(MKL_Complex16& mklScalar, const dcomplex& eigenScalar) {
+ mklScalar.real=eigenScalar.real();
+ mklScalar.imag=-eigenScalar.imag();
+}
+
+template <>
+inline void assign_conj_scalar_eig2mkl<MKL_Complex8,scomplex>(MKL_Complex8& mklScalar, const scomplex& eigenScalar) {
+ mklScalar.real=eigenScalar.real();
+ mklScalar.imag=-eigenScalar.imag();
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif
+
+#endif // EIGEN_MKL_SUPPORT_H
diff --git a/third_party/eigen3/Eigen/src/Core/util/Macros.h b/third_party/eigen3/Eigen/src/Core/util/Macros.h
new file mode 100644
index 0000000000..729a451324
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/util/Macros.h
@@ -0,0 +1,740 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MACROS_H
+#define EIGEN_MACROS_H
+
+#define EIGEN_WORLD_VERSION 3
+#define EIGEN_MAJOR_VERSION 2
+#define EIGEN_MINOR_VERSION 90
+
+#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
+ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
+ EIGEN_MINOR_VERSION>=z))))
+
+// Compiler identification, EIGEN_COMP_*
+/// \internal EIGEN_COMP_GNUC set to 1 for all compilers compatible with GCC
+#ifdef __GNUC__
+ #define EIGEN_COMP_GNUC 1
+#else
+ #define EIGEN_COMP_GNUC 0
+#endif
+
+/// \internal EIGEN_COMP_CLANG set to 1 if the compiler is clang (alias for __clang__)
+#if defined(__clang__)
+ #define EIGEN_COMP_CLANG 1
+#else
+ #define EIGEN_COMP_CLANG 0
+#endif
+
+
+/// \internal EIGEN_COMP_LLVM set to 1 if the compiler backend is llvm
+#if defined(__llvm__)
+ #define EIGEN_COMP_LLVM 1
+#else
+ #define EIGEN_COMP_LLVM 0
+#endif
+
+/// \internal EIGEN_COMP_ICC set to __INTEL_COMPILER if the compiler is Intel compiler, 0 otherwise
+#if defined(__INTEL_COMPILER)
+ #define EIGEN_COMP_ICC __INTEL_COMPILER
+#else
+ #define EIGEN_COMP_ICC 0
+#endif
+
+/// \internal EIGEN_COMP_MINGW set to 1 if the compiler is mingw
+#if defined(__MINGW32__)
+ #define EIGEN_COMP_MINGW 1
+#else
+ #define EIGEN_COMP_MINGW 0
+#endif
+
+/// \internal EIGEN_COMP_SUNCC set to 1 if the compiler is Solaris Studio
+#if defined(__SUNPRO_CC)
+ #define EIGEN_COMP_SUNCC 1
+#else
+ #define EIGEN_COMP_SUNCC 0
+#endif
+
+/// \internal EIGEN_COMP_MSVC set to _MSC_VER if the compiler is Microsoft Visual C++, 0 otherwise.
+#if defined(_MSC_VER)
+ #define EIGEN_COMP_MSVC _MSC_VER
+#else
+ #define EIGEN_COMP_MSVC 0
+#endif
+
+/// \internal EIGEN_COMP_MSVC_STRICT set to 1 if the compiler is really Microsoft Visual C++ and not ,e.g., ICC
+#if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC)
+ #define EIGEN_COMP_MSVC_STRICT 1
+#else
+ #define EIGEN_COMP_MSVC_STRICT 0
+#endif
+
+/// \internal EIGEN_COMP_IBM set to 1 if the compiler is IBM XL C++
+#if defined(__IBMCPP__) || defined(__xlc__)
+ #define EIGEN_COMP_IBM 1
+#else
+ #define EIGEN_COMP_IBM 0
+#endif
+
+/// \internal EIGEN_COMP_PGI set to 1 if the compiler is Portland Group Compiler
+#if defined(__PGI)
+ #define EIGEN_COMP_PGI 1
+#else
+ #define EIGEN_COMP_PGI 0
+#endif
+
+/// \internal EIGEN_COMP_ARM set to 1 if the compiler is ARM Compiler
+#if defined(__CC_ARM) || defined(__ARMCC_VERSION)
+ #define EIGEN_COMP_ARM 1
+#else
+ #define EIGEN_COMP_ARM 0
+#endif
+
+
+/// \internal EIGEN_GNUC_STRICT set to 1 if the compiler is really GCC and not a compatible compiler (e.g., ICC, clang, mingw, etc.)
+#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_CLANG || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM )
+ #define EIGEN_COMP_GNUC_STRICT 1
+#else
+ #define EIGEN_COMP_GNUC_STRICT 0
+#endif
+
+
+#if EIGEN_COMP_GNUC
+ #define EIGEN_GNUC_AT_LEAST(x,y) ((__GNUC__==x && __GNUC_MINOR__>=y) || __GNUC__>x)
+ #define EIGEN_GNUC_AT_MOST(x,y) ((__GNUC__==x && __GNUC_MINOR__<=y) || __GNUC__<x)
+ #define EIGEN_GNUC_AT(x,y) ( __GNUC__==x && __GNUC_MINOR__==y )
+#else
+ #define EIGEN_GNUC_AT_LEAST(x,y) 0
+ #define EIGEN_GNUC_AT_MOST(x,y) 0
+ #define EIGEN_GNUC_AT(x,y) 0
+#endif
+
+// FIXME: could probably be removed as we do not support gcc 3.x anymore
+#if EIGEN_COMP_GNUC && (__GNUC__ <= 3)
+#define EIGEN_GCC3_OR_OLDER 1
+#else
+#define EIGEN_GCC3_OR_OLDER 0
+#endif
+
+
+// Architecture identification, EIGEN_ARCH_*
+
+#if defined(__x86_64__) || defined(_M_X64) || defined(__amd64)
+ #define EIGEN_ARCH_x86_64 1
+#else
+ #define EIGEN_ARCH_x86_64 0
+#endif
+
+#if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__i386)
+ #define EIGEN_ARCH_i386 1
+#else
+ #define EIGEN_ARCH_i386 0
+#endif
+
+#if EIGEN_ARCH_x86_64 || EIGEN_ARCH_i386
+ #define EIGEN_ARCH_i386_OR_x86_64 1
+#else
+ #define EIGEN_ARCH_i386_OR_x86_64 0
+#endif
+
+/// \internal EIGEN_ARCH_ARM set to 1 if the architecture is ARM
+#if defined(__arm__)
+ #define EIGEN_ARCH_ARM 1
+#else
+ #define EIGEN_ARCH_ARM 0
+#endif
+
+/// \internal EIGEN_ARCH_ARM64 set to 1 if the architecture is ARM64
+#if defined(__aarch64__)
+ #define EIGEN_ARCH_ARM64 1
+#else
+ #define EIGEN_ARCH_ARM64 0
+#endif
+
+#if EIGEN_ARCH_ARM || EIGEN_ARCH_ARM64
+ #define EIGEN_ARCH_ARM_OR_ARM64 1
+#else
+ #define EIGEN_ARCH_ARM_OR_ARM64 0
+#endif
+
+/// \internal EIGEN_ARCH_MIPS set to 1 if the architecture is MIPS
+#if defined(__mips__) || defined(__mips)
+ #define EIGEN_ARCH_MIPS 1
+#else
+ #define EIGEN_ARCH_MIPS 0
+#endif
+
+/// \internal EIGEN_ARCH_SPARC set to 1 if the architecture is SPARC
+#if defined(__sparc__) || defined(__sparc)
+ #define EIGEN_ARCH_SPARC 1
+#else
+ #define EIGEN_ARCH_SPARC 0
+#endif
+
+/// \internal EIGEN_ARCH_IA64 set to 1 if the architecture is Intel Itanium
+#if defined(__ia64__)
+ #define EIGEN_ARCH_IA64 1
+#else
+ #define EIGEN_ARCH_IA64 0
+#endif
+
+/// \internal EIGEN_ARCH_PPC set to 1 if the architecture is PowerPC
+#if defined(__powerpc__) || defined(__ppc__) || defined(_M_PPC)
+ #define EIGEN_ARCH_PPC 1
+#else
+ #define EIGEN_ARCH_PPC 0
+#endif
+
+
+
+// Operating system identification, EIGEN_OS_*
+
+/// \internal EIGEN_OS_UNIX set to 1 if the OS is a unix variant
+#if defined(__unix__) || defined(__unix)
+ #define EIGEN_OS_UNIX 1
+#else
+ #define EIGEN_OS_UNIX 0
+#endif
+
+/// \internal EIGEN_OS_LINUX set to 1 if the OS is based on Linux kernel
+#if defined(__linux__)
+ #define EIGEN_OS_LINUX 1
+#else
+ #define EIGEN_OS_LINUX 0
+#endif
+
+/// \internal EIGEN_OS_ANDROID set to 1 if the OS is Android
+// note: ANDROID is defined when using ndk_build, __ANDROID__ is defined when using a standalone toolchain.
+#if defined(__ANDROID__) || defined(ANDROID)
+ #define EIGEN_OS_ANDROID 1
+#else
+ #define EIGEN_OS_ANDROID 0
+#endif
+
+/// \internal EIGEN_OS_GNULINUX set to 1 if the OS is GNU Linux and not Linux-based OS (e.g., not android)
+#if defined(__gnu_linux__) && !(EIGEN_OS_ANDROID)
+ #define EIGEN_OS_GNULINUX 1
+#else
+ #define EIGEN_OS_GNULINUX 0
+#endif
+
+/// \internal EIGEN_OS_BSD set to 1 if the OS is a BSD variant
+#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__)
+ #define EIGEN_OS_BSD 1
+#else
+ #define EIGEN_OS_BSD 0
+#endif
+
+/// \internal EIGEN_OS_MAC set to 1 if the OS is MacOS
+#if defined(__APPLE__)
+ #define EIGEN_OS_MAC 1
+#else
+ #define EIGEN_OS_MAC 0
+#endif
+
+/// \internal EIGEN_OS_QNX set to 1 if the OS is QNX
+#if defined(__QNX__)
+ #define EIGEN_OS_QNX 1
+#else
+ #define EIGEN_OS_QNX 0
+#endif
+
+/// \internal EIGEN_OS_WIN set to 1 if the OS is Windows based
+#if defined(_WIN32)
+ #define EIGEN_OS_WIN 1
+#else
+ #define EIGEN_OS_WIN 0
+#endif
+
+/// \internal EIGEN_OS_WIN64 set to 1 if the OS is Windows 64bits
+#if defined(_WIN64)
+ #define EIGEN_OS_WIN64 1
+#else
+ #define EIGEN_OS_WIN64 0
+#endif
+
+/// \internal EIGEN_OS_WINCE set to 1 if the OS is Windows CE
+#if defined(_WIN32_WCE)
+ #define EIGEN_OS_WINCE 1
+#else
+ #define EIGEN_OS_WINCE 0
+#endif
+
+/// \internal EIGEN_OS_CYGWIN set to 1 if the OS is Windows/Cygwin
+#if defined(__CYGWIN__)
+ #define EIGEN_OS_CYGWIN 1
+#else
+ #define EIGEN_OS_CYGWIN 0
+#endif
+
+/// \internal EIGEN_OS_WIN_STRICT set to 1 if the OS is really Windows and not some variants
+#if EIGEN_OS_WIN && !( EIGEN_OS_WINCE || EIGEN_OS_CYGWIN )
+ #define EIGEN_OS_WIN_STRICT 1
+#else
+ #define EIGEN_OS_WIN_STRICT 0
+#endif
+
+
+
+
+#if EIGEN_GNUC_AT_MOST(4,3) && !EIGEN_COMP_CLANG
+ // see bug 89
+ #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0
+#else
+ #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1
+#endif
+
+// 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
+// 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
+// enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
+// certain common platform (compiler+architecture combinations) to avoid these problems.
+// Only static alignment is really problematic (relies on nonstandard compiler extensions that don't
+// work everywhere, for example don't work on GCC/ARM), try to keep heap alignment even
+// when we have to disable static alignment.
+#if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64)
+#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
+#else
+#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
+#endif
+
+// static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
+#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
+ && !EIGEN_GCC3_OR_OLDER \
+ && !EIGEN_COMP_SUNCC \
+ && !EIGEN_OS_QNX
+ #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
+#else
+ #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
+#endif
+
+// Defined the boundary (in bytes) on which the data needs to be aligned. Note
+// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
+// aligned at all regardless of the value of this #define.
+#define EIGEN_ALIGN_BYTES 16
+
+#ifdef EIGEN_DONT_ALIGN
+ #ifndef EIGEN_DONT_ALIGN_STATICALLY
+ #define EIGEN_DONT_ALIGN_STATICALLY
+ #endif
+ #define EIGEN_ALIGN 0
+#elif !defined(EIGEN_DONT_VECTORIZE)
+ #if defined(__AVX__)
+ #undef EIGEN_ALIGN_BYTES
+ #define EIGEN_ALIGN_BYTES 32
+ #endif
+ #define EIGEN_ALIGN 1
+#else
+ #define EIGEN_ALIGN 0
+#endif
+
+#define EIGEN_MAX_ALIGN_BYTES EIGEN_ALIGN_BYTES
+
+
+// This macro can be used to prevent from macro expansion, e.g.:
+// std::max EIGEN_NOT_A_MACRO(a,b)
+#define EIGEN_NOT_A_MACRO
+
+// EIGEN_ALIGN_STATICALLY is the true test whether we want to align arrays on the stack or not. It takes into account both the user choice to explicitly disable
+// alignment (EIGEN_DONT_ALIGN_STATICALLY) and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). Henceforth, only EIGEN_ALIGN_STATICALLY should be used.
+#if EIGEN_ARCH_WANTS_STACK_ALIGNMENT && !defined(EIGEN_DONT_ALIGN_STATICALLY)
+ #define EIGEN_ALIGN_STATICALLY 1
+#else
+ #define EIGEN_ALIGN_STATICALLY 0
+ #ifndef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
+ #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
+ #endif
+#endif
+
+#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
+#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::RowMajor
+#else
+#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::ColMajor
+#endif
+
+#ifndef EIGEN_DEFAULT_DENSE_INDEX_TYPE
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE std::ptrdiff_t
+#endif
+
+// Cross compiler wrapper around LLVM's __has_builtin
+#ifdef __has_builtin
+# define EIGEN_HAS_BUILTIN(x) __has_builtin(x)
+#else
+# define EIGEN_HAS_BUILTIN(x) 0
+#endif
+
+// A Clang feature extension to determine compiler features.
+// We use it to determine 'cxx_rvalue_references'
+#ifndef __has_feature
+# define __has_feature(x) 0
+#endif
+
+#if __cplusplus > 199711L
+#define EIGEN_HAS_VARIADIC_TEMPLATES 1
+#endif
+
+// Does the compiler support const expressions?
+#if __cplusplus > 199711L && !defined(__NVCC__) && !defined(GOOGLE_LIBCXX) && !defined(__APPLE__)
+#define EIGEN_HAS_CONSTEXPR 1
+#endif
+
+/** Allows to disable some optimizations which might affect the accuracy of the result.
+ * Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them.
+ * They currently include:
+ * - single precision Cwise::sin() and Cwise::cos() when SSE vectorization is enabled.
+ */
+#ifndef EIGEN_FAST_MATH
+#define EIGEN_FAST_MATH 1
+#endif
+
+#define EIGEN_DEBUG_VAR(x) std::cerr << #x << " = " << x << std::endl;
+
+// concatenate two tokens
+#define EIGEN_CAT2(a,b) a ## b
+#define EIGEN_CAT(a,b) EIGEN_CAT2(a,b)
+
+// convert a token to a string
+#define EIGEN_MAKESTRING2(a) #a
+#define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a)
+
+// EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC,
+// but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline
+// but GCC is still doing fine with just inline.
+#if EIGEN_COMP_MSVC || EIGEN_COMP_ICC
+#define EIGEN_STRONG_INLINE __forceinline
+#else
+#define EIGEN_STRONG_INLINE inline
+#endif
+
+// EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible
+// attribute to maximize inlining. This should only be used when really necessary: in particular,
+// it uses __attribute__((always_inline)) on GCC, which most of the time is useless and can severely harm compile times.
+// FIXME with the always_inline attribute,
+// gcc 3.4.x reports the following compilation error:
+// Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
+// : function body not available
+#if EIGEN_GNUC_AT_LEAST(4,0)
+#define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline
+#else
+#define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE
+#endif
+
+#if EIGEN_COMP_GNUC
+#define EIGEN_DONT_INLINE __attribute__((noinline))
+#elif EIGEN_COMP_MSVC
+#define EIGEN_DONT_INLINE __declspec(noinline)
+#else
+#define EIGEN_DONT_INLINE
+#endif
+
+#if EIGEN_COMP_GNUC
+#define EIGEN_PERMISSIVE_EXPR __extension__
+#else
+#define EIGEN_PERMISSIVE_EXPR
+#endif
+
+#if EIGEN_COMP_GNUC
+#define EIGEN_LIKELY(x) __builtin_expect((x), 1)
+#define EIGEN_UNLIKELY(x) __builtin_expect((x), 0)
+#else
+#define EIGEN_LIKELY(x) (x)
+#define EIGEN_UNLIKELY(x) (x)
+#endif
+
+// this macro allows to get rid of linking errors about multiply defined functions.
+// - static is not very good because it prevents definitions from different object files to be merged.
+// So static causes the resulting linked executable to be bloated with multiple copies of the same function.
+// - inline is not perfect either as it unwantedly hints the compiler toward inlining the function.
+#define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS inline
+
+#ifdef NDEBUG
+# ifndef EIGEN_NO_DEBUG
+# define EIGEN_NO_DEBUG
+# endif
+#endif
+
+#if !defined(EIGEN_NO_CHECK) || (!defined(EIGEN_NO_DEBUG) && !EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO)
+ // Custom assertion code that works regardless of the compilation mode.
+ #include <cstdlib> // for abort
+ #include <iostream> // for std::cerr
+
+ namespace Eigen {
+ namespace internal {
+ // trivial function copying a bool. Must be EIGEN_DONT_INLINE, so we implement it after including Eigen headers.
+ // see bug 89.
+ namespace {
+ EIGEN_DONT_INLINE bool copy_bool(bool b) { return b; }
+ }
+ inline void assert_fail(const char *condition, const char *function, const char *file, int line)
+ {
+ copy_bool(true); // dummy call to avoid warnings about unused functions.
+ std::cerr << "assertion failed: " << condition << " in function " << function << " at " << file << ":" << line << std::endl;
+ abort();
+ }
+ }
+ }
+ #define eigen_internal_check(x) \
+ do { \
+ if(!Eigen::internal::copy_bool(x)) \
+ Eigen::internal::assert_fail(EIGEN_MAKESTRING(x), __PRETTY_FUNCTION__, __FILE__, __LINE__); \
+ } while(false)
+#endif
+
+#ifdef EIGEN_NO_CHECK
+ #define eigen_check(x)
+#else
+ #define eigen_check(x) eigen_internal_check(x)
+#endif
+
+// eigen_plain_assert is where we implement the workaround for the assert() bug in GCC <= 4.3, see bug 89
+#ifdef EIGEN_NO_DEBUG
+ #define eigen_plain_assert(x)
+#else
+ #if EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO
+ namespace Eigen {
+ namespace internal {
+ inline bool copy_bool(bool b) { return b; }
+ }
+ }
+ #define eigen_plain_assert(x) assert(x)
+ #else
+ // work around bug 89
+ #define eigen_plain_assert(x) eigen_internal_check(x)
+ #endif
+#endif
+
+// eigen_assert can be overridden
+#ifndef eigen_assert
+#define eigen_assert(x) eigen_plain_assert(x)
+#endif
+
+#ifdef EIGEN_INTERNAL_DEBUGGING
+#define eigen_internal_assert(x) eigen_assert(x)
+#else
+#define eigen_internal_assert(x)
+#endif
+
+#ifdef EIGEN_NO_DEBUG
+#define EIGEN_ONLY_USED_FOR_DEBUG(x) (void)x
+#else
+#define EIGEN_ONLY_USED_FOR_DEBUG(x)
+#endif
+
+#ifndef EIGEN_NO_DEPRECATED_WARNING
+ #if EIGEN_COMP_GNUC
+ #define EIGEN_DEPRECATED __attribute__((deprecated))
+ #elif (defined _MSC_VER)
+ #define EIGEN_DEPRECATED __declspec(deprecated)
+ #else
+ #define EIGEN_DEPRECATED
+ #endif
+#else
+ #define EIGEN_DEPRECATED
+#endif
+
+#if EIGEN_COMP_GNUC
+#define EIGEN_UNUSED __attribute__((unused))
+#else
+#define EIGEN_UNUSED
+#endif
+
+// Suppresses 'unused variable' warnings.
+namespace Eigen {
+ namespace internal {
+ template<typename T> void ignore_unused_variable(const T&) {}
+ }
+}
+#define EIGEN_UNUSED_VARIABLE(var) Eigen::internal::ignore_unused_variable(var);
+
+#if !defined(EIGEN_ASM_COMMENT)
+ #if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64)
+ #define EIGEN_ASM_COMMENT(X) asm("#" X)
+ #else
+ #define EIGEN_ASM_COMMENT(X)
+ #endif
+#endif
+
+/* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.
+ * However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
+ * so that vectorization doesn't affect binary compatibility.
+ *
+ * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
+ * vectorized and non-vectorized code.
+ */
+#if (defined __CUDACC__)
+ #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
+#elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM
+ #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
+#elif EIGEN_COMP_MSVC
+ #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
+#elif EIGEN_COMP_SUNCC
+ // FIXME not sure about this one:
+ #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
+#else
+ #error Please tell me what is the equivalent of __attribute__((aligned(n))) for your compiler
+#endif
+
+#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
+#define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
+#define EIGEN_ALIGN_DEFAULT EIGEN_ALIGN_TO_BOUNDARY(EIGEN_ALIGN_BYTES)
+#define EIGEN_ALIGN_MAX EIGEN_ALIGN_DEFAULT
+
+#if EIGEN_ALIGN_STATICALLY
+#define EIGEN_USER_ALIGN_TO_BOUNDARY(n) EIGEN_ALIGN_TO_BOUNDARY(n)
+#define EIGEN_USER_ALIGN16 EIGEN_ALIGN16
+#define EIGEN_USER_ALIGN32 EIGEN_ALIGN32
+#define EIGEN_USER_ALIGN_DEFAULT EIGEN_ALIGN_DEFAULT
+#else
+#define EIGEN_USER_ALIGN_TO_BOUNDARY(n)
+#define EIGEN_USER_ALIGN16
+#define EIGEN_USER_ALIGN32
+#define EIGEN_USER_ALIGN_DEFAULT
+#endif
+
+#ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD
+ #define EIGEN_RESTRICT
+#endif
+#ifndef EIGEN_RESTRICT
+ #define EIGEN_RESTRICT __restrict
+#endif
+
+#ifndef EIGEN_STACK_ALLOCATION_LIMIT
+#define EIGEN_STACK_ALLOCATION_LIMIT 20000
+#endif
+
+#ifndef EIGEN_DEFAULT_IO_FORMAT
+#ifdef EIGEN_MAKING_DOCS
+// format used in Eigen's documentation
+// needed to define it here as escaping characters in CMake add_definition's argument seems very problematic.
+#define EIGEN_DEFAULT_IO_FORMAT Eigen::IOFormat(3, 0, " ", "\n", "", "")
+#else
+#define EIGEN_DEFAULT_IO_FORMAT Eigen::IOFormat()
+#endif
+#endif
+
+// just an empty macro !
+#define EIGEN_EMPTY
+
+#if EIGEN_COMP_MSVC_STRICT
+ #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
+ using Base::operator =;
+#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
+ #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
+ using Base::operator =; \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) { Base::operator=(other); return *this; } \
+ template <typename OtherDerived> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase<OtherDerived>& other) { Base::operator=(other.derived()); return *this; }
+#else
+ #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
+ using Base::operator =; \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) \
+ { \
+ Base::operator=(other); \
+ return *this; \
+ }
+#endif
+
+#define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived) EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived)
+
+/**
+* Just a side note. Commenting within defines works only by documenting
+* behind the object (via '!<'). Comments cannot be multi-line and thus
+* we have these extra long lines. What is confusing doxygen over here is
+* that we use '\' and basically have a bunch of typedefs with their
+* documentation in a single line.
+**/
+
+#define EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \
+ typedef typename Eigen::internal::traits<Derived>::Scalar Scalar; /*!< \brief Numeric type, e.g. float, double, int or std::complex<float>. */ \
+ typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; /*!< \brief The underlying numeric type for composed scalar types. \details In cases where Scalar is e.g. std::complex<T>, T were corresponding to RealScalar. */ \
+ typedef typename Base::CoeffReturnType CoeffReturnType; /*!< \brief The return type for coefficient access. \details Depending on whether the object allows direct coefficient access (e.g. for a MatrixXd), this type is either 'const Scalar&' or simply 'Scalar' for objects that do not allow direct coefficient access. */ \
+ typedef typename Eigen::internal::nested<Derived>::type Nested; \
+ typedef typename Eigen::internal::traits<Derived>::StorageKind StorageKind; \
+ typedef typename Eigen::internal::traits<Derived>::Index Index; \
+ enum { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \
+ ColsAtCompileTime = Eigen::internal::traits<Derived>::ColsAtCompileTime, \
+ Flags = Eigen::internal::traits<Derived>::Flags, \
+ CoeffReadCost = Eigen::internal::traits<Derived>::CoeffReadCost, \
+ SizeAtCompileTime = Base::SizeAtCompileTime, \
+ MaxSizeAtCompileTime = Base::MaxSizeAtCompileTime, \
+ IsVectorAtCompileTime = Base::IsVectorAtCompileTime };
+
+
+#define EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \
+ typedef typename Eigen::internal::traits<Derived>::Scalar Scalar; /*!< \brief Numeric type, e.g. float, double, int or std::complex<float>. */ \
+ typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; /*!< \brief The underlying numeric type for composed scalar types. \details In cases where Scalar is e.g. std::complex<T>, T were corresponding to RealScalar. */ \
+ typedef typename Base::PacketScalar PacketScalar; \
+ typedef typename Base::CoeffReturnType CoeffReturnType; /*!< \brief The return type for coefficient access. \details Depending on whether the object allows direct coefficient access (e.g. for a MatrixXd), this type is either 'const Scalar&' or simply 'Scalar' for objects that do not allow direct coefficient access. */ \
+ typedef typename Eigen::internal::nested<Derived>::type Nested; \
+ typedef typename Eigen::internal::traits<Derived>::StorageKind StorageKind; \
+ typedef typename Eigen::internal::traits<Derived>::Index Index; \
+ enum { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \
+ ColsAtCompileTime = Eigen::internal::traits<Derived>::ColsAtCompileTime, \
+ MaxRowsAtCompileTime = Eigen::internal::traits<Derived>::MaxRowsAtCompileTime, \
+ MaxColsAtCompileTime = Eigen::internal::traits<Derived>::MaxColsAtCompileTime, \
+ Flags = Eigen::internal::traits<Derived>::Flags, \
+ CoeffReadCost = Eigen::internal::traits<Derived>::CoeffReadCost, \
+ SizeAtCompileTime = Base::SizeAtCompileTime, \
+ MaxSizeAtCompileTime = Base::MaxSizeAtCompileTime, \
+ IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; \
+ using Base::derived; \
+ using Base::const_cast_derived;
+
+
+#define EIGEN_PLAIN_ENUM_MIN(a,b) (((int)a <= (int)b) ? (int)a : (int)b)
+#define EIGEN_PLAIN_ENUM_MAX(a,b) (((int)a >= (int)b) ? (int)a : (int)b)
+
+// EIGEN_SIZE_MIN_PREFER_DYNAMIC gives the min between compile-time sizes. 0 has absolute priority, followed by 1,
+// followed by Dynamic, followed by other finite values. The reason for giving Dynamic the priority over
+// finite values is that min(3, Dynamic) should be Dynamic, since that could be anything between 0 and 3.
+#define EIGEN_SIZE_MIN_PREFER_DYNAMIC(a,b) (((int)a == 0 || (int)b == 0) ? 0 \
+ : ((int)a == 1 || (int)b == 1) ? 1 \
+ : ((int)a == Dynamic || (int)b == Dynamic) ? Dynamic \
+ : ((int)a <= (int)b) ? (int)a : (int)b)
+
+// EIGEN_SIZE_MIN_PREFER_FIXED is a variant of EIGEN_SIZE_MIN_PREFER_DYNAMIC comparing MaxSizes. The difference is that finite values
+// now have priority over Dynamic, so that min(3, Dynamic) gives 3. Indeed, whatever the actual value is
+// (between 0 and 3), it is not more than 3.
+#define EIGEN_SIZE_MIN_PREFER_FIXED(a,b) (((int)a == 0 || (int)b == 0) ? 0 \
+ : ((int)a == 1 || (int)b == 1) ? 1 \
+ : ((int)a == Dynamic && (int)b == Dynamic) ? Dynamic \
+ : ((int)a == Dynamic) ? (int)b \
+ : ((int)b == Dynamic) ? (int)a \
+ : ((int)a <= (int)b) ? (int)a : (int)b)
+
+// see EIGEN_SIZE_MIN_PREFER_DYNAMIC. No need for a separate variant for MaxSizes here.
+#define EIGEN_SIZE_MAX(a,b) (((int)a == Dynamic || (int)b == Dynamic) ? Dynamic \
+ : ((int)a >= (int)b) ? (int)a : (int)b)
+
+#define EIGEN_LOGICAL_XOR(a,b) (((a) || (b)) && !((a) && (b)))
+
+#define EIGEN_IMPLIES(a,b) (!(a) || (b))
+
+#define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,FUNCTOR) \
+ template<typename OtherDerived> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp<FUNCTOR<Scalar>, const Derived, const OtherDerived> \
+ (METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
+ { \
+ return CwiseBinaryOp<FUNCTOR<Scalar>, const Derived, const OtherDerived>(derived(), other.derived()); \
+ }
+
+// the expression type of a cwise product
+#define EIGEN_CWISE_PRODUCT_RETURN_TYPE(LHS,RHS) \
+ CwiseBinaryOp< \
+ internal::scalar_product_op< \
+ typename internal::traits<LHS>::Scalar, \
+ typename internal::traits<RHS>::Scalar \
+ >, \
+ const LHS, \
+ const RHS \
+ >
+
+#endif // EIGEN_MACROS_H
diff --git a/third_party/eigen3/Eigen/src/Core/util/MatrixMapper.h b/third_party/eigen3/Eigen/src/Core/util/MatrixMapper.h
new file mode 100644
index 0000000000..ec2ad018ff
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/util/MatrixMapper.h
@@ -0,0 +1,155 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Eric Martin <eric@ericmart.in>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATRIXMAPPER_H
+#define EIGEN_MATRIXMAPPER_H
+
+// To support both matrices and tensors, we need a way to abstractly access an
+// element of a matrix (where the matrix might be an implicitly flattened
+// tensor). This file abstracts the logic needed to access elements in a row
+// major or column major matrix.
+
+namespace Eigen {
+
+namespace internal {
+
+template<typename Scalar, typename Index>
+class BlasVectorMapper {
+ public:
+ EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {}
+
+ EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const {
+ return m_data[i];
+ }
+ template <typename Packet, int AlignmentType>
+ EIGEN_ALWAYS_INLINE Packet load(Index i) const {
+ return ploadt<Packet, AlignmentType>(m_data + i);
+ }
+
+ template <typename Packet>
+ bool aligned(Index i) const {
+ return (size_t(m_data+i)%sizeof(Packet))==0;
+ }
+
+ protected:
+ Scalar* m_data;
+};
+
+// We need a fast way to iterate down columns (if column major) that doesn't
+// involves performing a multiplication for each lookup.
+template<typename Scalar, typename Index, int AlignmentType>
+class BlasLinearMapper {
+ public:
+ typedef typename packet_traits<Scalar>::type Packet;
+ typedef typename packet_traits<Scalar>::half HalfPacket;
+
+ EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {}
+
+ EIGEN_ALWAYS_INLINE void prefetch(int i) const {
+ internal::prefetch(&operator()(i));
+ }
+
+ EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const {
+ return m_data[i];
+ }
+
+ EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
+ return ploadt<Packet, AlignmentType>(m_data + i);
+ }
+
+ EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const {
+ return ploadt<HalfPacket, AlignmentType>(m_data + i);
+ }
+
+ EIGEN_ALWAYS_INLINE void storePacket(Index i, Packet p) const {
+ pstoret<Scalar, Packet, AlignmentType>(m_data + i, p);
+ }
+
+ protected:
+ Scalar* m_data;
+};
+
+// This mapper allows access into matrix by coordinates i and j.
+template<typename Scalar, typename Index, int StorageOrder, int AlignmentType = Unaligned>
+class blas_data_mapper {
+ public:
+ typedef typename packet_traits<Scalar>::type Packet;
+ typedef typename packet_traits<Scalar>::half HalfPacket;
+
+ typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper;
+ typedef BlasVectorMapper<Scalar, Index> VectorMapper;
+
+ EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {}
+
+ EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>
+ getSubMapper(Index i, Index j) const {
+ return blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>(&operator()(i, j), m_stride);
+ }
+
+ EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
+ return LinearMapper(&operator()(i, j));
+ }
+
+ EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
+ return VectorMapper(&operator()(i, j));
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const {
+ return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride];
+ }
+
+ EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
+ return ploadt<Packet, AlignmentType>(&operator()(i, j));
+ }
+
+ EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
+ return ploadt<HalfPacket, AlignmentType>(&operator()(i, j));
+ }
+
+ template<typename SubPacket>
+ EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, SubPacket p) const {
+ pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
+ }
+
+ template<typename SubPacket>
+ EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {
+ return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);
+ }
+
+ const Index stride() const { return m_stride; }
+
+ Index firstAligned(Index size) const {
+ if (size_t(m_data)%sizeof(Scalar)) {
+ return -1;
+ }
+ return internal::first_aligned(m_data, size);
+ }
+
+ protected:
+ Scalar* EIGEN_RESTRICT m_data;
+ const Index m_stride;
+};
+
+// This is just a convienent way to work with
+// blas_data_mapper<const Scalar, Index, StorageOrder>
+template<typename Scalar, typename Index, int StorageOrder>
+class const_blas_data_mapper : public blas_data_mapper<const Scalar, Index, StorageOrder> {
+ public:
+ EIGEN_ALWAYS_INLINE const_blas_data_mapper(const Scalar *data, Index stride) : blas_data_mapper<const Scalar, Index, StorageOrder>(data, stride) {}
+
+ EIGEN_ALWAYS_INLINE const_blas_data_mapper<Scalar, Index, StorageOrder> getSubMapper(Index i, Index j) const {
+ return const_blas_data_mapper<Scalar, Index, StorageOrder>(&(this->operator()(i, j)), this->m_stride);
+ }
+};
+
+} // end namespace internal
+} // end namespace eigen
+
+#endif //EIGEN_MATRIXMAPPER_H
diff --git a/third_party/eigen3/Eigen/src/Core/util/Memory.h b/third_party/eigen3/Eigen/src/Core/util/Memory.h
new file mode 100644
index 0000000000..03a699177a
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/util/Memory.h
@@ -0,0 +1,984 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com>
+// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
+// Copyright (C) 2010 Thomas Capricelli <orzel@freehackers.org>
+// Copyright (C) 2013 Pavel Holoborodko <pavel@holoborodko.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+/*****************************************************************************
+*** Platform checks for aligned malloc functions ***
+*****************************************************************************/
+
+#ifndef EIGEN_MEMORY_H
+#define EIGEN_MEMORY_H
+
+// See bug 554 (http://eigen.tuxfamily.org/bz/show_bug.cgi?id=554)
+// It seems to be unsafe to check _POSIX_ADVISORY_INFO without including unistd.h first.
+// Currently, let's include it only on unix systems:
+#if defined(__unix__) || defined(__unix)
+ #include <unistd.h>
+ #if ((defined __QNXNTO__) || (defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
+ #define EIGEN_HAS_POSIX_MEMALIGN 1
+ #endif
+#endif
+
+#ifndef EIGEN_HAS_POSIX_MEMALIGN
+ #define EIGEN_HAS_POSIX_MEMALIGN 0
+#endif
+
+#if defined EIGEN_VECTORIZE_SSE || defined EIGEN_VECTORIZE_AVX
+ #define EIGEN_HAS_MM_MALLOC 1
+#else
+ #define EIGEN_HAS_MM_MALLOC 0
+#endif
+
+namespace Eigen {
+
+namespace internal {
+
+EIGEN_DEVICE_FUNC inline void throw_std_bad_alloc()
+{
+#ifndef __CUDA_ARCH__
+ #ifdef EIGEN_EXCEPTIONS
+ throw std::bad_alloc();
+ #else
+ std::size_t huge = static_cast<std::size_t>(-1);
+ new int[huge];
+ #endif
+#endif
+}
+
+/*****************************************************************************
+*** Implementation of handmade aligned functions ***
+*****************************************************************************/
+
+/* ----- Hand made implementations of aligned malloc/free and realloc ----- */
+
+/** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
+ * Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
+ */
+inline void* handmade_aligned_malloc(std::size_t size)
+{
+ void *original = std::malloc(size+EIGEN_ALIGN_BYTES);
+ if (original == 0) return 0;
+ void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES);
+ *(reinterpret_cast<void**>(aligned) - 1) = original;
+ return aligned;
+}
+
+/** \internal Frees memory allocated with handmade_aligned_malloc */
+inline void handmade_aligned_free(void *ptr)
+{
+ if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1));
+}
+
+/** \internal
+ * \brief Reallocates aligned memory.
+ * Since we know that our handmade version is based on std::realloc
+ * we can use std::realloc to implement efficient reallocation.
+ */
+inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)
+{
+ if (ptr == 0) return handmade_aligned_malloc(size);
+ void *original = *(reinterpret_cast<void**>(ptr) - 1);
+ std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
+ original = std::realloc(original,size+EIGEN_ALIGN_BYTES);
+ if (original == 0) return 0;
+ void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES);
+ void *previous_aligned = static_cast<char *>(original)+previous_offset;
+ if(aligned!=previous_aligned)
+ std::memmove(aligned, previous_aligned, size);
+
+ *(reinterpret_cast<void**>(aligned) - 1) = original;
+ return aligned;
+}
+
+/*****************************************************************************
+*** Implementation of generic aligned realloc (when no realloc can be used)***
+*****************************************************************************/
+
+EIGEN_DEVICE_FUNC void* aligned_malloc(std::size_t size);
+EIGEN_DEVICE_FUNC void aligned_free(void *ptr);
+
+/** \internal
+ * \brief Reallocates aligned memory.
+ * Allows reallocation with aligned ptr types. This implementation will
+ * always create a new memory chunk and copy the old data.
+ */
+inline void* generic_aligned_realloc(void* ptr, size_t size, size_t old_size)
+{
+ if (ptr==0)
+ return aligned_malloc(size);
+
+ if (size==0)
+ {
+ aligned_free(ptr);
+ return 0;
+ }
+
+ void* newptr = aligned_malloc(size);
+ if (newptr == 0)
+ {
+ #ifdef EIGEN_HAS_ERRNO
+ errno = ENOMEM; // according to the standard
+ #endif
+ return 0;
+ }
+
+ if (ptr != 0)
+ {
+ std::memcpy(newptr, ptr, (std::min)(size,old_size));
+ aligned_free(ptr);
+ }
+
+ return newptr;
+}
+
+/*****************************************************************************
+*** Implementation of portable aligned versions of malloc/free/realloc ***
+*****************************************************************************/
+
+#ifdef EIGEN_NO_MALLOC
+EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
+{
+ eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
+}
+#elif defined EIGEN_RUNTIME_NO_MALLOC
+EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false)
+{
+ static bool value = true;
+ if (update == 1)
+ value = new_value;
+ return value;
+}
+EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); }
+EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); }
+EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
+{
+ eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
+}
+#else
+EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
+{}
+#endif
+
+/** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 or 32 bytes alignment depending on the requirements.
+ * On allocation error, the returned pointer is null, and std::bad_alloc is thrown.
+ */
+EIGEN_DEVICE_FUNC
+inline void* aligned_malloc(size_t size)
+{
+ check_that_malloc_is_allowed();
+
+ void *result;
+ #if !EIGEN_ALIGN
+ result = std::malloc(size);
+ #elif EIGEN_HAS_POSIX_MEMALIGN
+ if(posix_memalign(&result, EIGEN_ALIGN_BYTES, size)) result = 0;
+ #elif EIGEN_HAS_MM_MALLOC
+ result = _mm_malloc(size, EIGEN_ALIGN_BYTES);
+ #elif defined(_MSC_VER) && (!defined(_WIN32_WCE))
+ result = _aligned_malloc(size, EIGEN_ALIGN_BYTES);
+ #else
+ result = handmade_aligned_malloc(size);
+ #endif
+
+ if(!result && size)
+ throw_std_bad_alloc();
+
+ return result;
+}
+
+/** \internal Frees memory allocated with aligned_malloc. */
+EIGEN_DEVICE_FUNC
+inline void aligned_free(void *ptr)
+{
+ #if !EIGEN_ALIGN
+ std::free(ptr);
+ #elif EIGEN_HAS_POSIX_MEMALIGN
+ std::free(ptr);
+ #elif EIGEN_HAS_MM_MALLOC
+ _mm_free(ptr);
+ #elif defined(_MSC_VER) && (!defined(_WIN32_WCE))
+ _aligned_free(ptr);
+ #else
+ handmade_aligned_free(ptr);
+ #endif
+}
+
+/**
+* \internal
+* \brief Reallocates an aligned block of memory.
+* \throws std::bad_alloc on allocation failure
+**/
+inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
+{
+ EIGEN_UNUSED_VARIABLE(old_size);
+
+ void *result;
+#if !EIGEN_ALIGN
+ result = std::realloc(ptr,new_size);
+#elif EIGEN_HAS_POSIX_MEMALIGN
+ result = generic_aligned_realloc(ptr,new_size,old_size);
+#elif EIGEN_HAS_MM_MALLOC
+ // The defined(_mm_free) is just here to verify that this MSVC version
+ // implements _mm_malloc/_mm_free based on the corresponding _aligned_
+ // functions. This may not always be the case and we just try to be safe.
+ #if EIGEN_OS_WIN_STRICT && defined(_mm_free)
+ result = _aligned_realloc(ptr,new_size,EIGEN_ALIGN_BYTES);
+ #else
+ result = generic_aligned_realloc(ptr,new_size,old_size);
+ #endif
+#elif EIGEN_OS_WIN_STRICT
+ result = _aligned_realloc(ptr,new_size,EIGEN_ALIGN_BYTES);
+#else
+ result = handmade_aligned_realloc(ptr,new_size,old_size);
+#endif
+
+ if (!result && new_size)
+ throw_std_bad_alloc();
+
+ return result;
+}
+
+/*****************************************************************************
+*** Implementation of conditionally aligned functions ***
+*****************************************************************************/
+
+/** \internal Allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned.
+ * On allocation error, the returned pointer is null, and a std::bad_alloc is thrown.
+ */
+template<bool Align> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(size_t size)
+{
+ return aligned_malloc(size);
+}
+
+template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(size_t size)
+{
+ check_that_malloc_is_allowed();
+
+ void *result = std::malloc(size);
+ if(!result && size)
+ throw_std_bad_alloc();
+ return result;
+}
+
+/** \internal Frees memory allocated with conditional_aligned_malloc */
+template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void *ptr)
+{
+ aligned_free(ptr);
+}
+
+template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr)
+{
+ std::free(ptr);
+}
+
+template<bool Align> inline void* conditional_aligned_realloc(void* ptr, size_t new_size, size_t old_size)
+{
+ return aligned_realloc(ptr, new_size, old_size);
+}
+
+template<> inline void* conditional_aligned_realloc<false>(void* ptr, size_t new_size, size_t)
+{
+ return std::realloc(ptr, new_size);
+}
+
+/*****************************************************************************
+*** Construction/destruction of array elements ***
+*****************************************************************************/
+
+/** \internal Constructs the elements of an array.
+ * The \a size parameter tells on how many objects to call the constructor of T.
+ */
+template<typename T> EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *ptr, size_t size)
+{
+ for (size_t i=0; i < size; ++i) ::new (ptr + i) T;
+ return ptr;
+}
+
+/** \internal Destructs the elements of an array.
+ * The \a size parameters tells on how many objects to call the destructor of T.
+ */
+template<typename T> EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T *ptr, size_t size)
+{
+ // always destruct an array starting from the end.
+ if(ptr)
+ while(size) ptr[--size].~T();
+}
+
+/*****************************************************************************
+*** Implementation of aligned new/delete-like functions ***
+*****************************************************************************/
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size)
+{
+ if(size > size_t(-1) / sizeof(T))
+ throw_std_bad_alloc();
+}
+
+/** \internal Allocates \a size objects of type T. The returned pointer is guaranteed to have 16 bytes alignment.
+ * On allocation error, the returned pointer is undefined, but a std::bad_alloc is thrown.
+ * The default constructor of T is called.
+ */
+template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(size_t size)
+{
+ check_size_for_overflow<T>(size);
+ T *result = reinterpret_cast<T*>(aligned_malloc(sizeof(T)*size));
+ return construct_elements_of_array(result, size);
+}
+
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(size_t size)
+{
+ check_size_for_overflow<T>(size);
+ T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
+ return construct_elements_of_array(result, size);
+}
+
+template<typename T> EIGEN_DEVICE_FUNC inline T* allocate_uvm(size_t size)
+{
+#if defined(EIGEN_USE_GPU) && defined(__CUDA_ARCH__)
+ return (T*)malloc(size);
+#elif defined(EIGEN_USE_GPU) && defined(__NVCC__)
+ T* result = NULL;
+ if (cudaMallocManaged(&result, size) != cudaSuccess) {
+ throw_std_bad_alloc();
+ }
+ return result;
+#else
+ return reinterpret_cast<T*>(conditional_aligned_malloc<true>(sizeof(T)*size));
+#endif
+}
+
+template<typename T> EIGEN_DEVICE_FUNC void deallocate_uvm(T* ptr)
+{
+#if defined(EIGEN_USE_GPU) && defined(__CUDA_ARCH__)
+ free(ptr);
+#elif defined(EIGEN_USE_GPU) && defined(__NVCC__)
+ if (cudaFree(ptr) != cudaSuccess) {
+ throw_std_bad_alloc();
+ }
+#else
+ return conditional_aligned_free<true>(ptr);
+#endif
+}
+
+/** \internal Deletes objects constructed with aligned_new
+ * The \a size parameters tells on how many objects to call the destructor of T.
+ */
+template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, size_t size)
+{
+ destruct_elements_of_array<T>(ptr, size);
+ aligned_free(ptr);
+}
+
+/** \internal Deletes objects constructed with conditional_aligned_new
+ * The \a size parameters tells on how many objects to call the destructor of T.
+ */
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T *ptr, size_t size)
+{
+ destruct_elements_of_array<T>(ptr, size);
+ conditional_aligned_free<Align>(ptr);
+}
+
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, size_t new_size, size_t old_size)
+{
+ check_size_for_overflow<T>(new_size);
+ check_size_for_overflow<T>(old_size);
+ if(new_size < old_size)
+ destruct_elements_of_array(pts+new_size, old_size-new_size);
+ T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
+ if(new_size > old_size)
+ construct_elements_of_array(result+old_size, new_size-old_size);
+ return result;
+}
+
+
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(size_t size)
+{
+ check_size_for_overflow<T>(size);
+ T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
+ if(NumTraits<T>::RequireInitialization)
+ construct_elements_of_array(result, size);
+ return result;
+}
+
+template<typename T, bool Align, bool UseUVM> EIGEN_DEVICE_FUNC inline T* conditional_managed_new_auto(size_t size)
+{
+ check_size_for_overflow<T>(size);
+ T *result;
+ if (UseUVM) {
+ result = allocate_uvm<T>(size*sizeof(T));
+ }
+ else {
+ result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
+ }
+ if(NumTraits<T>::RequireInitialization)
+ construct_elements_of_array(result, size);
+ return result;
+}
+
+template<typename T, bool Align, bool UseUVM> EIGEN_DEVICE_FUNC inline void conditional_managed_delete_auto(T* ptr, size_t size)
+{
+ if(NumTraits<T>::RequireInitialization)
+ destruct_elements_of_array<T>(ptr, size);
+ if (UseUVM) {
+ deallocate_uvm(ptr);
+ }
+ else {
+ conditional_aligned_free<Align>(ptr);
+ }
+}
+
+template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(T* pts, size_t new_size, size_t old_size)
+{
+ check_size_for_overflow<T>(new_size);
+ check_size_for_overflow<T>(old_size);
+ if(NumTraits<T>::RequireInitialization && (new_size < old_size))
+ destruct_elements_of_array(pts+new_size, old_size-new_size);
+ T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
+ if(NumTraits<T>::RequireInitialization && (new_size > old_size))
+ construct_elements_of_array(result+old_size, new_size-old_size);
+ return result;
+}
+
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, size_t size)
+{
+ if(NumTraits<T>::RequireInitialization)
+ destruct_elements_of_array<T>(ptr, size);
+ conditional_aligned_free<Align>(ptr);
+}
+
+/****************************************************************************/
+
+/** \internal Returns the index of the first element of the array that is well aligned for vectorization.
+ *
+ * \param array the address of the start of the array
+ * \param size the size of the array
+ *
+ * \note If no element of the array is well aligned, the size of the array is returned. Typically,
+ * for example with SSE, "well aligned" means 16-byte-aligned. If vectorization is disabled or if the
+ * packet size for the given scalar type is 1, then everything is considered well-aligned.
+ *
+ * \note If the scalar type is vectorizable, we rely on the following assumptions: sizeof(Scalar) is a
+ * power of 2, the packet size in bytes is also a power of 2, and is a multiple of sizeof(Scalar). On the
+ * other hand, we do not assume that the array address is a multiple of sizeof(Scalar), as that fails for
+ * example with Scalar=double on certain 32-bit platforms, see bug #79.
+ *
+ * There is also the variant first_aligned(const MatrixBase&) defined in DenseCoeffsBase.h.
+ */
+template<typename Scalar, typename Index>
+inline Index first_aligned(const Scalar* array, Index size)
+{
+ enum { PacketSize = packet_traits<Scalar>::size,
+ PacketAlignedMask = PacketSize-1
+ };
+
+ if(PacketSize==1)
+ {
+ // Either there is no vectorization, or a packet consists of exactly 1 scalar so that all elements
+ // of the array have the same alignment.
+ return 0;
+ }
+ else if(size_t(array) & (sizeof(Scalar)-1))
+ {
+ // There is vectorization for this scalar type, but the array is not aligned to the size of a single scalar.
+ // Consequently, no element of the array is well aligned.
+ return size;
+ }
+ else
+ {
+ return std::min<Index>( (PacketSize - (Index((size_t(array)/sizeof(Scalar))) & PacketAlignedMask))
+ & PacketAlignedMask, size);
+ }
+}
+
+/** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size
+ */
+template<typename Index>
+inline Index first_multiple(Index size, Index base)
+{
+ return ((size+base-1)/base)*base;
+}
+
+// std::copy is much slower than memcpy, so let's introduce a smart_copy which
+// use memcpy on trivial types, i.e., on types that does not require an initialization ctor.
+template<typename T, bool UseMemcpy> struct smart_copy_helper;
+
+template<typename T> EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* end, T* target)
+{
+ smart_copy_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
+}
+
+template<typename T> struct smart_copy_helper<T,true> {
+ static inline EIGEN_DEVICE_FUNC void run(const T* start, const T* end, T* target)
+ { memcpy(target, start, std::ptrdiff_t(end)-std::ptrdiff_t(start)); }
+};
+
+template<typename T> struct smart_copy_helper<T,false> {
+ static inline EIGEN_DEVICE_FUNC void run(const T* start, const T* end, T* target)
+ { std::copy(start, end, target); }
+};
+
+// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise.
+template<typename T, bool UseMemmove> struct smart_memmove_helper;
+
+template<typename T> void smart_memmove(const T* start, const T* end, T* target)
+{
+ smart_memmove_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
+}
+
+template<typename T> struct smart_memmove_helper<T,true> {
+ static inline void run(const T* start, const T* end, T* target)
+ { std::memmove(target, start, std::ptrdiff_t(end)-std::ptrdiff_t(start)); }
+};
+
+template<typename T> struct smart_memmove_helper<T,false> {
+ static inline void run(const T* start, const T* end, T* target)
+ {
+ if (uintptr_t(target) < uintptr_t(start))
+ {
+ std::copy(start, end, target);
+ }
+ else
+ {
+ std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T);
+ std::copy_backward(start, end, target + count);
+ }
+ }
+};
+
+
+/*****************************************************************************
+*** Implementation of runtime stack allocation (falling back to malloc) ***
+*****************************************************************************/
+
+// you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA
+// to the appropriate stack allocation function
+#ifndef EIGEN_ALLOCA
+ #if (defined __linux__) || (defined __APPLE__)
+ #define EIGEN_ALLOCA alloca
+ #elif defined(_MSC_VER)
+ #define EIGEN_ALLOCA _alloca
+ #endif
+#endif
+
+// This helper class construct the allocated memory, and takes care of destructing and freeing the handled data
+// at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions.
+template<typename T> class aligned_stack_memory_handler
+{
+ public:
+ /* Creates a stack_memory_handler responsible for the buffer \a ptr of size \a size.
+ * Note that \a ptr can be 0 regardless of the other parameters.
+ * This constructor takes care of constructing/initializing the elements of the buffer if required by the scalar type T (see NumTraits<T>::RequireInitialization).
+ * In this case, the buffer elements will also be destructed when this handler will be destructed.
+ * Finally, if \a dealloc is true, then the pointer \a ptr is freed.
+ **/
+ aligned_stack_memory_handler(T* ptr, size_t size, bool dealloc)
+ : m_ptr(ptr), m_size(size), m_deallocate(dealloc)
+ {
+ if(NumTraits<T>::RequireInitialization && m_ptr)
+ Eigen::internal::construct_elements_of_array(m_ptr, size);
+ }
+ ~aligned_stack_memory_handler()
+ {
+ if(NumTraits<T>::RequireInitialization && m_ptr)
+ Eigen::internal::destruct_elements_of_array<T>(m_ptr, m_size);
+ if(m_deallocate)
+ Eigen::internal::aligned_free(m_ptr);
+ }
+ protected:
+ T* m_ptr;
+ size_t m_size;
+ bool m_deallocate;
+};
+
+} // end namespace internal
+
+/** \internal
+ * Declares, allocates and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack
+ * if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform
+ * (currently, this is Linux and Visual Studio only). Otherwise the memory is allocated on the heap.
+ * The allocated buffer is automatically deleted when exiting the scope of this declaration.
+ * If BUFFER is non null, then the declared variable is simply an alias for BUFFER, and no allocation/deletion occurs.
+ * Here is an example:
+ * \code
+ * {
+ * ei_declare_aligned_stack_constructed_variable(float,data,size,0);
+ * // use data[0] to data[size-1]
+ * }
+ * \endcode
+ * The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token.
+ */
+#ifdef EIGEN_ALLOCA
+ // The native alloca() that comes with llvm aligns buffer on 16 bytes even when AVX is enabled.
+#if defined(__arm__) || defined(_WIN32) || EIGEN_ALIGN_BYTES > 16
+ #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+EIGEN_ALIGN_BYTES)) & ~(size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES)
+ #else
+ #define EIGEN_ALIGNED_ALLOCA EIGEN_ALLOCA
+ #endif
+
+ #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
+ Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
+ TYPE* NAME = (BUFFER)!=0 ? (BUFFER) \
+ : reinterpret_cast<TYPE*>( \
+ (sizeof(TYPE)*SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) ? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE)*SIZE) \
+ : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \
+ Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT)
+
+#else
+
+ #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
+ Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
+ TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \
+ Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)
+
+#endif
+
+
+/*****************************************************************************
+*** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] ***
+*****************************************************************************/
+
+#if EIGEN_ALIGN
+ #ifdef EIGEN_EXCEPTIONS
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
+ void* operator new(size_t size, const std::nothrow_t&) throw() { \
+ try { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
+ catch (...) { return 0; } \
+ return 0; \
+ }
+ #else
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
+ void* operator new(size_t size, const std::nothrow_t&) throw() { \
+ return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
+ }
+ #endif
+
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
+ void *operator new(size_t size) { \
+ return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
+ } \
+ void *operator new[](size_t size) { \
+ return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
+ } \
+ void operator delete(void * ptr) throw() { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+ void operator delete[](void * ptr) throw() { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+ /* in-place new and delete. since (at least afaik) there is no actual */ \
+ /* memory allocated we can safely let the default implementation handle */ \
+ /* this particular case. */ \
+ static void *operator new(size_t size, void *ptr) { return ::operator new(size,ptr); } \
+ static void *operator new[](size_t size, void* ptr) { return ::operator new[](size,ptr); } \
+ void operator delete(void * memory, void *ptr) throw() { return ::operator delete(memory,ptr); } \
+ void operator delete[](void * memory, void *ptr) throw() { return ::operator delete[](memory,ptr); } \
+ /* nothrow-new (returns zero instead of std::bad_alloc) */ \
+ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
+ void operator delete(void *ptr, const std::nothrow_t&) throw() { \
+ Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
+ } \
+ typedef void eigen_aligned_operator_new_marker_type;
+#else
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
+#endif
+
+#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
+#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
+ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_ALIGN_BYTES==0)))
+
+/****************************************************************************/
+
+/** \class aligned_allocator
+* \ingroup Core_Module
+*
+* \brief STL compatible allocator to use with with 16 byte aligned types
+*
+* Example:
+* \code
+* // Matrix4f requires 16 bytes alignment:
+* std::map< int, Matrix4f, std::less<int>,
+* aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4;
+* // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
+* std::map< int, Vector3f > my_map_vec3;
+* \endcode
+*
+* \sa \ref TopicStlContainers.
+*/
+template<class T>
+class aligned_allocator : public std::allocator<T>
+{
+public:
+ typedef size_t size_type;
+ typedef std::ptrdiff_t difference_type;
+ typedef T* pointer;
+ typedef const T* const_pointer;
+ typedef T& reference;
+ typedef const T& const_reference;
+ typedef T value_type;
+
+ template<class U>
+ struct rebind
+ {
+ typedef aligned_allocator<U> other;
+ };
+
+ aligned_allocator() : std::allocator<T>() {}
+
+ aligned_allocator(const aligned_allocator& other) : std::allocator<T>(other) {}
+
+ template<class U>
+ aligned_allocator(const aligned_allocator<U>& other) : std::allocator<T>(other) {}
+
+ ~aligned_allocator() {}
+
+ pointer allocate(size_type num, const void* /*hint*/ = 0)
+ {
+ internal::check_size_for_overflow<T>(num);
+ return static_cast<pointer>( internal::aligned_malloc(num * sizeof(T)) );
+ }
+
+ void deallocate(pointer p, size_type /*num*/)
+ {
+ internal::aligned_free(p);
+ }
+};
+
+//---------- Cache sizes ----------
+
+#if !defined(EIGEN_NO_CPUID)
+# if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64
+# if defined(__PIC__) && EIGEN_ARCH_i386
+ // Case for x86 with PIC
+# define EIGEN_CPUID(abcd,func,id) \
+ __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
+# elif defined(__PIC__) && EIGEN_ARCH_x86_64
+ // Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with the default small code model.
+ // However, we cannot detect which code model is used, and the xchg overhead is negligible anyway.
+# define EIGEN_CPUID(abcd,func,id) \
+ __asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id));
+# else
+ // Case for x86_64 or x86 w/o PIC
+# define EIGEN_CPUID(abcd,func,id) \
+ __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) );
+# endif
+# elif EIGEN_COMP_MSVC
+# if (EIGEN_COMP_MSVC > 1500) && EIGEN_ARCH_i386_OR_x86_64
+# define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
+# endif
+# endif
+#endif
+
+namespace internal {
+
+#ifdef EIGEN_CPUID
+
+inline bool cpuid_is_vendor(int abcd[4], const char* vendor)
+{
+ return abcd[1]==(reinterpret_cast<const int*>(vendor))[0] && abcd[3]==(reinterpret_cast<const int*>(vendor))[1] && abcd[2]==(reinterpret_cast<const int*>(vendor))[2];
+}
+
+inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)
+{
+ int abcd[4];
+ l1 = l2 = l3 = 0;
+ int cache_id = 0;
+ int cache_type = 0;
+ do {
+ abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
+ EIGEN_CPUID(abcd,0x4,cache_id);
+ cache_type = (abcd[0] & 0x0F) >> 0;
+ if(cache_type==1||cache_type==3) // data or unified cache
+ {
+ int cache_level = (abcd[0] & 0xE0) >> 5; // A[7:5]
+ int ways = (abcd[1] & 0xFFC00000) >> 22; // B[31:22]
+ int partitions = (abcd[1] & 0x003FF000) >> 12; // B[21:12]
+ int line_size = (abcd[1] & 0x00000FFF) >> 0; // B[11:0]
+ int sets = (abcd[2]); // C[31:0]
+
+ int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1);
+
+ switch(cache_level)
+ {
+ case 1: l1 = cache_size; break;
+ case 2: l2 = cache_size; break;
+ case 3: l3 = cache_size; break;
+ default: break;
+ }
+ }
+ cache_id++;
+ } while(cache_type>0 && cache_id<16);
+}
+
+inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3)
+{
+ int abcd[4];
+ abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
+ l1 = l2 = l3 = 0;
+ EIGEN_CPUID(abcd,0x00000002,0);
+ unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2;
+ bool check_for_p2_core2 = false;
+ for(int i=0; i<14; ++i)
+ {
+ switch(bytes[i])
+ {
+ case 0x0A: l1 = 8; break; // 0Ah data L1 cache, 8 KB, 2 ways, 32 byte lines
+ case 0x0C: l1 = 16; break; // 0Ch data L1 cache, 16 KB, 4 ways, 32 byte lines
+ case 0x0E: l1 = 24; break; // 0Eh data L1 cache, 24 KB, 6 ways, 64 byte lines
+ case 0x10: l1 = 16; break; // 10h data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
+ case 0x15: l1 = 16; break; // 15h code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
+ case 0x2C: l1 = 32; break; // 2Ch data L1 cache, 32 KB, 8 ways, 64 byte lines
+ case 0x30: l1 = 32; break; // 30h code L1 cache, 32 KB, 8 ways, 64 byte lines
+ case 0x60: l1 = 16; break; // 60h data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored
+ case 0x66: l1 = 8; break; // 66h data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored
+ case 0x67: l1 = 16; break; // 67h data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored
+ case 0x68: l1 = 32; break; // 68h data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored
+ case 0x1A: l2 = 96; break; // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64)
+ case 0x22: l3 = 512; break; // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored
+ case 0x23: l3 = 1024; break; // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
+ case 0x25: l3 = 2048; break; // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored
+ case 0x29: l3 = 4096; break; // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored
+ case 0x39: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored
+ case 0x3A: l2 = 192; break; // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored
+ case 0x3B: l2 = 128; break; // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored
+ case 0x3C: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored
+ case 0x3D: l2 = 384; break; // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored
+ case 0x3E: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored
+ case 0x40: l2 = 0; break; // no integrated L2 cache (P6 core) or L3 cache (P4 core)
+ case 0x41: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 32 byte lines
+ case 0x42: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 32 byte lines
+ case 0x43: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 32 byte lines
+ case 0x44: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines
+ case 0x45: l2 = 2048; break; // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines
+ case 0x46: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines
+ case 0x47: l3 = 8192; break; // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines
+ case 0x48: l2 = 3072; break; // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines
+ case 0x49: if(l2!=0) l3 = 4096; else {check_for_p2_core2=true; l3 = l2 = 4096;} break;// code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or L2 for core2
+ case 0x4A: l3 = 6144; break; // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines
+ case 0x4B: l3 = 8192; break; // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines
+ case 0x4C: l3 = 12288; break; // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines
+ case 0x4D: l3 = 16384; break; // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines
+ case 0x4E: l2 = 6144; break; // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines
+ case 0x78: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines
+ case 0x79: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored
+ case 0x7A: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored
+ case 0x7B: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored
+ case 0x7C: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
+ case 0x7D: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines
+ case 0x7E: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64)
+ case 0x7F: l2 = 512; break; // code and data L2 cache, 512 KB, 2 ways, 64 byte lines
+ case 0x80: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines
+ case 0x81: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 32 byte lines
+ case 0x82: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 32 byte lines
+ case 0x83: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 32 byte lines
+ case 0x84: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines
+ case 0x85: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines
+ case 0x86: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines
+ case 0x87: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines
+ case 0x88: l3 = 2048; break; // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64)
+ case 0x89: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64)
+ case 0x8A: l3 = 8192; break; // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64)
+ case 0x8D: l3 = 3072; break; // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64)
+
+ default: break;
+ }
+ }
+ if(check_for_p2_core2 && l2 == l3)
+ l3 = 0;
+ l1 *= 1024;
+ l2 *= 1024;
+ l3 *= 1024;
+}
+
+inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)
+{
+ if(max_std_funcs>=4)
+ queryCacheSizes_intel_direct(l1,l2,l3);
+ else
+ queryCacheSizes_intel_codes(l1,l2,l3);
+}
+
+inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
+{
+ int abcd[4];
+ abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
+ EIGEN_CPUID(abcd,0x80000005,0);
+ l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
+ abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
+ EIGEN_CPUID(abcd,0x80000006,0);
+ l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
+ l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
+}
+#endif
+
+/** \internal
+ * Queries and returns the cache sizes in Bytes of the L1, L2, and L3 data caches respectively */
+inline void queryCacheSizes(int& l1, int& l2, int& l3)
+{
+ #ifdef EIGEN_CPUID
+ int abcd[4];
+
+ // identify the CPU vendor
+ EIGEN_CPUID(abcd,0x0,0);
+ int max_std_funcs = abcd[1];
+ if(cpuid_is_vendor(abcd,"GenuineIntel"))
+ queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
+ else if(cpuid_is_vendor(abcd,"AuthenticAMD") || cpuid_is_vendor(abcd,"AMDisbetter!"))
+ queryCacheSizes_amd(l1,l2,l3);
+ else
+ // by default let's use Intel's API
+ queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
+
+ // here is the list of other vendors:
+// ||cpuid_is_vendor(abcd,"VIA VIA VIA ")
+// ||cpuid_is_vendor(abcd,"CyrixInstead")
+// ||cpuid_is_vendor(abcd,"CentaurHauls")
+// ||cpuid_is_vendor(abcd,"GenuineTMx86")
+// ||cpuid_is_vendor(abcd,"TransmetaCPU")
+// ||cpuid_is_vendor(abcd,"RiseRiseRise")
+// ||cpuid_is_vendor(abcd,"Geode by NSC")
+// ||cpuid_is_vendor(abcd,"SiS SiS SiS ")
+// ||cpuid_is_vendor(abcd,"UMC UMC UMC ")
+// ||cpuid_is_vendor(abcd,"NexGenDriven")
+ #else
+ l1 = l2 = l3 = -1;
+ #endif
+}
+
+/** \internal
+ * \returns the size in Bytes of the L1 data cache */
+inline int queryL1CacheSize()
+{
+ int l1(-1), l2, l3;
+ queryCacheSizes(l1,l2,l3);
+ return l1;
+}
+
+inline int queryL2CacheSize()
+{
+ int l1, l2(-1), l3;
+ queryCacheSizes(l1,l2,l3);
+ return l2;
+}
+
+/** \internal
+ * \returns the size in Bytes of the L2 or L3 cache if this later is present */
+inline int queryTopLevelCacheSize()
+{
+ int l1, l2(-1), l3(-1);
+ queryCacheSizes(l1,l2,l3);
+ return (std::max)(l2,l3);
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_MEMORY_H
diff --git a/third_party/eigen3/Eigen/src/Core/util/Meta.h b/third_party/eigen3/Eigen/src/Core/util/Meta.h
new file mode 100644
index 0000000000..7576b32689
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/util/Meta.h
@@ -0,0 +1,334 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_META_H
+#define EIGEN_META_H
+
+#if defined(__CUDA_ARCH__) && !defined(__GCUDACC__)
+#include <math_constants.h>
+#endif
+
+namespace Eigen {
+
+namespace internal {
+
+/** \internal
+ * \file Meta.h
+ * This file contains generic metaprogramming classes which are not specifically related to Eigen.
+ * \note In case you wonder, yes we're aware that Boost already provides all these features,
+ * we however don't want to add a dependency to Boost.
+ */
+
+struct true_type { enum { value = 1 }; };
+struct false_type { enum { value = 0 }; };
+
+template<bool Condition, typename Then, typename Else>
+struct conditional { typedef Then type; };
+
+template<typename Then, typename Else>
+struct conditional <false, Then, Else> { typedef Else type; };
+
+template<typename T, typename U> struct is_same { enum { value = 0 }; };
+template<typename T> struct is_same<T,T> { enum { value = 1 }; };
+
+template<typename T> struct remove_reference { typedef T type; };
+template<typename T> struct remove_reference<T&> { typedef T type; };
+
+template<typename T> struct remove_pointer { typedef T type; };
+template<typename T> struct remove_pointer<T*> { typedef T type; };
+template<typename T> struct remove_pointer<T*const> { typedef T type; };
+
+template <class T> struct remove_const { typedef T type; };
+template <class T> struct remove_const<const T> { typedef T type; };
+template <class T> struct remove_const<const T[]> { typedef T type[]; };
+template <class T, unsigned int Size> struct remove_const<const T[Size]> { typedef T type[Size]; };
+
+template<typename T> struct remove_all { typedef T type; };
+template<typename T> struct remove_all<const T> { typedef typename remove_all<T>::type type; };
+template<typename T> struct remove_all<T const&> { typedef typename remove_all<T>::type type; };
+template<typename T> struct remove_all<T&> { typedef typename remove_all<T>::type type; };
+template<typename T> struct remove_all<T const*> { typedef typename remove_all<T>::type type; };
+template<typename T> struct remove_all<T*> { typedef typename remove_all<T>::type type; };
+
+template<typename T> struct is_arithmetic { enum { value = false }; };
+template<> struct is_arithmetic<float> { enum { value = true }; };
+template<> struct is_arithmetic<double> { enum { value = true }; };
+template<> struct is_arithmetic<long double> { enum { value = true }; };
+template<> struct is_arithmetic<bool> { enum { value = true }; };
+template<> struct is_arithmetic<char> { enum { value = true }; };
+template<> struct is_arithmetic<signed char> { enum { value = true }; };
+template<> struct is_arithmetic<unsigned char> { enum { value = true }; };
+template<> struct is_arithmetic<signed short> { enum { value = true }; };
+template<> struct is_arithmetic<unsigned short>{ enum { value = true }; };
+template<> struct is_arithmetic<signed int> { enum { value = true }; };
+template<> struct is_arithmetic<unsigned int> { enum { value = true }; };
+template<> struct is_arithmetic<signed long> { enum { value = true }; };
+template<> struct is_arithmetic<unsigned long> { enum { value = true }; };
+
+template <typename T> struct add_const { typedef const T type; };
+template <typename T> struct add_const<T&> { typedef T& type; };
+
+template <typename T> struct is_const { enum { value = 0 }; };
+template <typename T> struct is_const<T const> { enum { value = 1 }; };
+
+template<typename T> struct add_const_on_value_type { typedef const T type; };
+template<typename T> struct add_const_on_value_type<T&> { typedef T const& type; };
+template<typename T> struct add_const_on_value_type<T*> { typedef T const* type; };
+template<typename T> struct add_const_on_value_type<T* const> { typedef T const* const type; };
+template<typename T> struct add_const_on_value_type<T const* const> { typedef T const* const type; };
+
+/** \internal Allows to enable/disable an overload
+ * according to a compile time condition.
+ */
+template<bool Condition, typename T> struct enable_if;
+
+template<typename T> struct enable_if<true,T>
+{ typedef T type; };
+
+#if defined(__CUDA_ARCH__) && !defined(__GCUDACC__)
+
+namespace device {
+
+template<typename T> struct numeric_limits
+{
+ EIGEN_DEVICE_FUNC
+ static T epsilon() { return 0; }
+ static T max() { assert(false && "Max not suppoted for this type"); }
+ static T lowest() { assert(false && "Lowest not suppoted for this type"); }
+};
+template<> struct numeric_limits<float>
+{
+ EIGEN_DEVICE_FUNC
+ static float epsilon() { return __FLT_EPSILON__; }
+ EIGEN_DEVICE_FUNC
+ static float max() { return CUDART_MAX_NORMAL_F; }
+ EIGEN_DEVICE_FUNC
+ static float lowest() { return -CUDART_MAX_NORMAL_F; }
+};
+template<> struct numeric_limits<double>
+{
+ EIGEN_DEVICE_FUNC
+ static double epsilon() { return __DBL_EPSILON__; }
+ EIGEN_DEVICE_FUNC
+ static double max() { return CUDART_INF; }
+ EIGEN_DEVICE_FUNC
+ static double lowest() { return -CUDART_INF; }
+};
+template<> struct numeric_limits<int>
+{
+ EIGEN_DEVICE_FUNC
+ static int epsilon() { return 0; }
+ EIGEN_DEVICE_FUNC
+ static int max() { return INT_MAX; }
+ EIGEN_DEVICE_FUNC
+ static int lowest() { return INT_MIN; }
+};
+template<> struct numeric_limits<long>
+{
+ EIGEN_DEVICE_FUNC
+ static long epsilon() { return 0; }
+ EIGEN_DEVICE_FUNC
+ static long max() { return LONG_MAX; }
+ EIGEN_DEVICE_FUNC
+ static long lowest() { return LONG_MIN; }
+};
+template<> struct numeric_limits<long long>
+{
+ EIGEN_DEVICE_FUNC
+ static long long epsilon() { return 0; }
+ EIGEN_DEVICE_FUNC
+ static long long max() { return LLONG_MAX; }
+ EIGEN_DEVICE_FUNC
+ static long long lowest() { return LLONG_MIN; }
+};
+
+}
+
+#endif
+
+/** \internal
+ * A base class do disable default copy ctor and copy assignement operator.
+ */
+class noncopyable
+{
+ noncopyable(const noncopyable&);
+ const noncopyable& operator=(const noncopyable&);
+protected:
+ noncopyable() {}
+ ~noncopyable() {}
+};
+
+
+/** \internal
+ * Convenient struct to get the result type of a unary or binary functor.
+ *
+ * It supports both the current STL mechanism (using the result_type member) as well as
+ * upcoming next STL generation (using a templated result member).
+ * If none of these members is provided, then the type of the first argument is returned. FIXME, that behavior is a pretty bad hack.
+ */
+template<typename T> struct result_of {};
+
+struct has_none {int a[1];};
+struct has_std_result_type {int a[2];};
+struct has_tr1_result {int a[3];};
+
+template<typename Func, typename ArgType, int SizeOf=sizeof(has_none)>
+struct unary_result_of_select {typedef ArgType type;};
+
+template<typename Func, typename ArgType>
+struct unary_result_of_select<Func, ArgType, sizeof(has_std_result_type)> {typedef typename Func::result_type type;};
+
+template<typename Func, typename ArgType>
+struct unary_result_of_select<Func, ArgType, sizeof(has_tr1_result)> {typedef typename Func::template result<Func(ArgType)>::type type;};
+
+template<typename Func, typename ArgType>
+struct result_of<Func(ArgType)> {
+ template<typename T>
+ static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
+ template<typename T>
+ static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType)>::type const * = 0);
+ static has_none testFunctor(...);
+
+ // note that the following indirection is needed for gcc-3.3
+ enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
+ typedef typename unary_result_of_select<Func, ArgType, FunctorType>::type type;
+};
+
+template<typename Func, typename ArgType0, typename ArgType1, int SizeOf=sizeof(has_none)>
+struct binary_result_of_select {typedef ArgType0 type;};
+
+template<typename Func, typename ArgType0, typename ArgType1>
+struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_std_result_type)>
+{typedef typename Func::result_type type;};
+
+template<typename Func, typename ArgType0, typename ArgType1>
+struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_tr1_result)>
+{typedef typename Func::template result<Func(ArgType0,ArgType1)>::type type;};
+
+template<typename Func, typename ArgType0, typename ArgType1>
+struct result_of<Func(ArgType0,ArgType1)> {
+ template<typename T>
+ static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
+ template<typename T>
+ static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1)>::type const * = 0);
+ static has_none testFunctor(...);
+
+ // note that the following indirection is needed for gcc-3.3
+ enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
+ typedef typename binary_result_of_select<Func, ArgType0, ArgType1, FunctorType>::type type;
+};
+
+/** \internal In short, it computes int(sqrt(\a Y)) with \a Y an integer.
+ * Usage example: \code meta_sqrt<1023>::ret \endcode
+ */
+template<int Y,
+ int InfX = 0,
+ int SupX = ((Y==1) ? 1 : Y/2),
+ bool Done = ((SupX-InfX)<=1 ? true : ((SupX*SupX <= Y) && ((SupX+1)*(SupX+1) > Y))) >
+ // use ?: instead of || just to shut up a stupid gcc 4.3 warning
+class meta_sqrt
+{
+ enum {
+ MidX = (InfX+SupX)/2,
+ TakeInf = MidX*MidX > Y ? 1 : 0,
+ NewInf = int(TakeInf) ? InfX : int(MidX),
+ NewSup = int(TakeInf) ? int(MidX) : SupX
+ };
+ public:
+ enum { ret = meta_sqrt<Y,NewInf,NewSup>::ret };
+};
+
+template<int Y, int InfX, int SupX>
+class meta_sqrt<Y, InfX, SupX, true> { public: enum { ret = (SupX*SupX <= Y) ? SupX : InfX }; };
+
+/** \internal determines whether the product of two numeric types is allowed and what the return type is */
+template<typename T, typename U> struct scalar_product_traits
+{
+ enum { Defined = 0 };
+};
+
+template<typename T> struct scalar_product_traits<T,T>
+{
+ enum {
+ // Cost = NumTraits<T>::MulCost,
+ Defined = 1
+ };
+ typedef T ReturnType;
+};
+
+template<typename T> struct scalar_product_traits<T, const T>
+{
+ enum {
+ // Cost = NumTraits<T>::MulCost,
+ Defined = 1
+ };
+ typedef T ReturnType;
+};
+
+template<typename T> struct scalar_product_traits<const T, T>
+{
+ enum {
+ // Cost = NumTraits<T>::MulCost,
+ Defined = 1
+ };
+ typedef T ReturnType;
+};
+
+template<typename T> struct scalar_product_traits<T,std::complex<T> >
+{
+ enum {
+ // Cost = 2*NumTraits<T>::MulCost,
+ Defined = 1
+ };
+ typedef std::complex<T> ReturnType;
+};
+
+template<typename T> struct scalar_product_traits<std::complex<T>, T>
+{
+ enum {
+ // Cost = 2*NumTraits<T>::MulCost,
+ Defined = 1
+ };
+ typedef std::complex<T> ReturnType;
+};
+
+// FIXME quick workaround around current limitation of result_of
+// template<typename Scalar, typename ArgType0, typename ArgType1>
+// struct result_of<scalar_product_op<Scalar>(ArgType0,ArgType1)> {
+// typedef typename scalar_product_traits<typename remove_all<ArgType0>::type, typename remove_all<ArgType1>::type>::ReturnType type;
+// };
+
+template<typename T> struct is_diagonal
+{ enum { ret = false }; };
+
+template<typename T> struct is_diagonal<DiagonalBase<T> >
+{ enum { ret = true }; };
+
+template<typename T> struct is_diagonal<DiagonalWrapper<T> >
+{ enum { ret = true }; };
+
+template<typename T, int S> struct is_diagonal<DiagonalMatrix<T,S> >
+{ enum { ret = true }; };
+
+} // end namespace internal
+
+namespace numext {
+
+#if defined(__CUDA_ARCH__)
+template<typename T> EIGEN_DEVICE_FUNC void swap(T &a, T &b) { T tmp = b; b = a; a = tmp; }
+#else
+template<typename T> EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); }
+#endif
+
+} // end namespace numext
+
+} // end namespace Eigen
+
+#endif // EIGEN_META_H
diff --git a/third_party/eigen3/Eigen/src/Core/util/ReenableStupidWarnings.h b/third_party/eigen3/Eigen/src/Core/util/ReenableStupidWarnings.h
new file mode 100644
index 0000000000..5ddfbd4aa6
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/util/ReenableStupidWarnings.h
@@ -0,0 +1,14 @@
+#ifdef EIGEN_WARNINGS_DISABLED
+#undef EIGEN_WARNINGS_DISABLED
+
+#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
+ #ifdef _MSC_VER
+ #pragma warning( pop )
+ #elif defined __INTEL_COMPILER
+ #pragma warning pop
+ #elif defined __clang__
+ #pragma clang diagnostic pop
+ #endif
+#endif
+
+#endif // EIGEN_WARNINGS_DISABLED
diff --git a/third_party/eigen3/Eigen/src/Core/util/StaticAssert.h b/third_party/eigen3/Eigen/src/Core/util/StaticAssert.h
new file mode 100644
index 0000000000..396e27b900
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/util/StaticAssert.h
@@ -0,0 +1,206 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_STATIC_ASSERT_H
+#define EIGEN_STATIC_ASSERT_H
+
+/* Some notes on Eigen's static assertion mechanism:
+ *
+ * - in EIGEN_STATIC_ASSERT(CONDITION,MSG) the parameter CONDITION must be a compile time boolean
+ * expression, and MSG an enum listed in struct internal::static_assertion<true>
+ *
+ * - define EIGEN_NO_STATIC_ASSERT to disable them (and save compilation time)
+ * in that case, the static assertion is converted to the following runtime assert:
+ * eigen_assert(CONDITION && "MSG")
+ *
+ * - currently EIGEN_STATIC_ASSERT can only be used in function scope
+ *
+ */
+
+#ifndef EIGEN_NO_STATIC_ASSERT
+
+ #if defined(__GXX_EXPERIMENTAL_CXX0X__) || (EIGEN_COMP_MSVC >= 1600)
+
+ // if native static_assert is enabled, let's use it
+ #define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG);
+
+ #else // not CXX0X
+
+ namespace Eigen {
+
+ namespace internal {
+
+ template<bool condition>
+ struct static_assertion {};
+
+ template<>
+ struct static_assertion<true>
+ {
+ enum {
+ YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX,
+ YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES,
+ YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES,
+ THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE,
+ THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE,
+ THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE,
+ YOU_MADE_A_PROGRAMMING_MISTAKE,
+ EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT,
+ EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE,
+ YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR,
+ YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR,
+ UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC,
+ THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES,
+ FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED,
+ NUMERIC_TYPE_MUST_BE_REAL,
+ COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED,
+ WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED,
+ THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE,
+ INVALID_MATRIX_PRODUCT,
+ INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS,
+ INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION,
+ YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY,
+ THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES,
+ THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES,
+ INVALID_MATRIX_TEMPLATE_PARAMETERS,
+ INVALID_MATRIXBASE_TEMPLATE_PARAMETERS,
+ BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER,
+ THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX,
+ THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE,
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES,
+ YOU_ALREADY_SPECIFIED_THIS_STRIDE,
+ INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION,
+ THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD,
+ PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1,
+ THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS,
+ YOU_CANNOT_MIX_ARRAYS_AND_MATRICES,
+ YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION,
+ THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY,
+ YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT,
+ THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS,
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL,
+ THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES,
+ YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED,
+ YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED,
+ THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE,
+ THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH,
+ OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG
+ };
+ };
+
+ } // end namespace internal
+
+ } // end namespace Eigen
+
+ // Specialized implementation for MSVC to avoid "conditional
+ // expression is constant" warnings. This implementation doesn't
+ // appear to work under GCC, hence the multiple implementations.
+ #if EIGEN_COMP_MSVC
+
+ #define EIGEN_STATIC_ASSERT(CONDITION,MSG) \
+ {Eigen::internal::static_assertion<bool(CONDITION)>::MSG;}
+
+ #else
+ // In some cases clang interprets bool(CONDITION) as function declaration
+ #define EIGEN_STATIC_ASSERT(CONDITION,MSG) \
+ if (Eigen::internal::static_assertion<static_cast<bool>(CONDITION)>::MSG) {}
+
+ #endif
+
+ #endif // not CXX0X
+
+#else // EIGEN_NO_STATIC_ASSERT
+
+ #define EIGEN_STATIC_ASSERT(CONDITION,MSG) eigen_assert((CONDITION) && #MSG);
+
+#endif // EIGEN_NO_STATIC_ASSERT
+
+
+// static assertion failing if the type \a TYPE is not a vector type
+#define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) \
+ EIGEN_STATIC_ASSERT(TYPE::IsVectorAtCompileTime, \
+ YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX)
+
+// static assertion failing if the type \a TYPE is not fixed-size
+#define EIGEN_STATIC_ASSERT_FIXED_SIZE(TYPE) \
+ EIGEN_STATIC_ASSERT(TYPE::SizeAtCompileTime!=Eigen::Dynamic, \
+ YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR)
+
+// static assertion failing if the type \a TYPE is not dynamic-size
+#define EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(TYPE) \
+ EIGEN_STATIC_ASSERT(TYPE::SizeAtCompileTime==Eigen::Dynamic, \
+ YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR)
+
+// static assertion failing if the type \a TYPE is not a vector type of the given size
+#define EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(TYPE, SIZE) \
+ EIGEN_STATIC_ASSERT(TYPE::IsVectorAtCompileTime && TYPE::SizeAtCompileTime==SIZE, \
+ THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE)
+
+// static assertion failing if the type \a TYPE is not a vector type of the given size
+#define EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(TYPE, ROWS, COLS) \
+ EIGEN_STATIC_ASSERT(TYPE::RowsAtCompileTime==ROWS && TYPE::ColsAtCompileTime==COLS, \
+ THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE)
+
+// static assertion failing if the two vector expression types are not compatible (same fixed-size or dynamic size)
+#define EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(TYPE0,TYPE1) \
+ EIGEN_STATIC_ASSERT( \
+ (int(TYPE0::SizeAtCompileTime)==Eigen::Dynamic \
+ || int(TYPE1::SizeAtCompileTime)==Eigen::Dynamic \
+ || int(TYPE0::SizeAtCompileTime)==int(TYPE1::SizeAtCompileTime)),\
+ YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES)
+
+#define EIGEN_PREDICATE_SAME_MATRIX_SIZE(TYPE0,TYPE1) \
+ ( \
+ (int(TYPE0::SizeAtCompileTime)==0 && int(TYPE1::SizeAtCompileTime)==0) \
+ || (\
+ (int(TYPE0::RowsAtCompileTime)==Eigen::Dynamic \
+ || int(TYPE1::RowsAtCompileTime)==Eigen::Dynamic \
+ || int(TYPE0::RowsAtCompileTime)==int(TYPE1::RowsAtCompileTime)) \
+ && (int(TYPE0::ColsAtCompileTime)==Eigen::Dynamic \
+ || int(TYPE1::ColsAtCompileTime)==Eigen::Dynamic \
+ || int(TYPE0::ColsAtCompileTime)==int(TYPE1::ColsAtCompileTime))\
+ ) \
+ )
+
+#ifdef EIGEN2_SUPPORT
+ #define EIGEN_STATIC_ASSERT_NON_INTEGER(TYPE) \
+ eigen_assert(!NumTraits<Scalar>::IsInteger);
+#else
+ #define EIGEN_STATIC_ASSERT_NON_INTEGER(TYPE) \
+ EIGEN_STATIC_ASSERT(!NumTraits<TYPE>::IsInteger, THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES)
+#endif
+
+
+// static assertion failing if it is guaranteed at compile-time that the two matrix expression types have different sizes
+#define EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(TYPE0,TYPE1) \
+ EIGEN_STATIC_ASSERT( \
+ EIGEN_PREDICATE_SAME_MATRIX_SIZE(TYPE0,TYPE1),\
+ YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES)
+
+#define EIGEN_STATIC_ASSERT_SIZE_1x1(TYPE) \
+ EIGEN_STATIC_ASSERT((TYPE::RowsAtCompileTime == 1 || TYPE::RowsAtCompileTime == Dynamic) && \
+ (TYPE::ColsAtCompileTime == 1 || TYPE::ColsAtCompileTime == Dynamic), \
+ THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS)
+
+#define EIGEN_STATIC_ASSERT_LVALUE(Derived) \
+ EIGEN_STATIC_ASSERT(internal::is_lvalue<Derived>::value, \
+ THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY)
+
+#define EIGEN_STATIC_ASSERT_ARRAYXPR(Derived) \
+ EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Derived>::XprKind, ArrayXpr>::value), \
+ THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES)
+
+#define EIGEN_STATIC_ASSERT_SAME_XPR_KIND(Derived1, Derived2) \
+ EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Derived1>::XprKind, \
+ typename internal::traits<Derived2>::XprKind \
+ >::value), \
+ YOU_CANNOT_MIX_ARRAYS_AND_MATRICES)
+
+
+#endif // EIGEN_STATIC_ASSERT_H
diff --git a/third_party/eigen3/Eigen/src/Core/util/XprHelper.h b/third_party/eigen3/Eigen/src/Core/util/XprHelper.h
new file mode 100644
index 0000000000..13285909b4
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/util/XprHelper.h
@@ -0,0 +1,481 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_XPRHELPER_H
+#define EIGEN_XPRHELPER_H
+
+// just a workaround because GCC seems to not really like empty structs
+// FIXME: gcc 4.3 generates bad code when strict-aliasing is enabled
+// so currently we simply disable this optimization for gcc 4.3
+#if EIGEN_COMP_GNUC && !EIGEN_GNUC_AT(4,3)
+ #define EIGEN_EMPTY_STRUCT_CTOR(X) \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X() {} \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X(const X& ) {}
+#else
+ #define EIGEN_EMPTY_STRUCT_CTOR(X)
+#endif
+
+namespace Eigen {
+
+typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE DenseIndex;
+
+namespace internal {
+
+//classes inheriting no_assignment_operator don't generate a default operator=.
+class no_assignment_operator
+{
+ private:
+ no_assignment_operator& operator=(const no_assignment_operator&);
+};
+
+/** \internal return the index type with the largest number of bits */
+template<typename I1, typename I2>
+struct promote_index_type
+{
+ typedef typename conditional<(sizeof(I1)<sizeof(I2)), I2, I1>::type type;
+};
+
+/** \internal If the template parameter Value is Dynamic, this class is just a wrapper around a T variable that
+ * can be accessed using value() and setValue().
+ * Otherwise, this class is an empty structure and value() just returns the template parameter Value.
+ */
+template<typename T, int Value> class variable_if_dynamic
+{
+ public:
+ EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamic)
+ EIGEN_DEVICE_FUNC explicit variable_if_dynamic(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }
+ EIGEN_DEVICE_FUNC static T value() { return T(Value); }
+ EIGEN_DEVICE_FUNC void setValue(T) {}
+};
+
+template<typename T> class variable_if_dynamic<T, Dynamic>
+{
+ T m_value;
+ EIGEN_DEVICE_FUNC variable_if_dynamic() { eigen_assert(false); }
+ public:
+ EIGEN_DEVICE_FUNC explicit variable_if_dynamic(T value) : m_value(value) {}
+ EIGEN_DEVICE_FUNC T value() const { return m_value; }
+ EIGEN_DEVICE_FUNC void setValue(T value) { m_value = value; }
+};
+
+/** \internal like variable_if_dynamic but for DynamicIndex
+ */
+template<typename T, int Value> class variable_if_dynamicindex
+{
+ public:
+ EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamicindex)
+ EIGEN_DEVICE_FUNC explicit variable_if_dynamicindex(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }
+ EIGEN_DEVICE_FUNC static T value() { return T(Value); }
+ EIGEN_DEVICE_FUNC void setValue(T) {}
+};
+
+template<typename T> class variable_if_dynamicindex<T, DynamicIndex>
+{
+ T m_value;
+ EIGEN_DEVICE_FUNC variable_if_dynamicindex() { eigen_assert(false); }
+ public:
+ EIGEN_DEVICE_FUNC explicit variable_if_dynamicindex(T value) : m_value(value) {}
+ EIGEN_DEVICE_FUNC T value() const { return m_value; }
+ EIGEN_DEVICE_FUNC void setValue(T value) { m_value = value; }
+};
+
+template<typename T> struct functor_traits
+{
+ enum
+ {
+ Cost = 10,
+ PacketAccess = false,
+ IsRepeatable = false
+ };
+};
+
+template<typename T> struct packet_traits;
+
+template<typename T> struct unpacket_traits
+{
+ typedef T type;
+ typedef T half;
+ enum {size=1};
+};
+
+template<typename _Scalar, int _Rows, int _Cols,
+ int _Options = AutoAlign |
+ ( (_Rows==1 && _Cols!=1) ? RowMajor
+ : (_Cols==1 && _Rows!=1) ? ColMajor
+ : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),
+ int _MaxRows = _Rows,
+ int _MaxCols = _Cols
+> class make_proper_matrix_type
+{
+ enum {
+ IsColVector = _Cols==1 && _Rows!=1,
+ IsRowVector = _Rows==1 && _Cols!=1,
+ Options = IsColVector ? (_Options | ColMajor) & ~RowMajor
+ : IsRowVector ? (_Options | RowMajor) & ~ColMajor
+ : _Options
+ };
+ public:
+ typedef Matrix<_Scalar, _Rows, _Cols, Options, _MaxRows, _MaxCols> type;
+};
+
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+class compute_matrix_flags
+{
+ enum {
+ row_major_bit = Options&RowMajor ? RowMajorBit : 0,
+ is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic,
+
+ aligned_bit =
+ (
+ ((Options&DontAlign)==0)
+ && (
+#if EIGEN_ALIGN_STATICALLY
+ ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0))
+#else
+ 0
+#endif
+
+ ||
+
+#if EIGEN_ALIGN
+ is_dynamic_size_storage
+#else
+ 0
+#endif
+
+ )
+ ) ? AlignedBit : 0,
+ packet_access_bit = packet_traits<Scalar>::Vectorizable && aligned_bit ? PacketAccessBit : 0
+ };
+
+ public:
+ enum { ret = LinearAccessBit | LvalueBit | DirectAccessBit | NestByRefBit | packet_access_bit | row_major_bit | aligned_bit };
+};
+
+template<int _Rows, int _Cols> struct size_at_compile_time
+{
+ enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols };
+};
+
+/* plain_matrix_type : the difference from eval is that plain_matrix_type is always a plain matrix type,
+ * whereas eval is a const reference in the case of a matrix
+ */
+
+template<typename T, typename StorageKind = typename traits<T>::StorageKind> struct plain_matrix_type;
+template<typename T, typename BaseClassType> struct plain_matrix_type_dense;
+template<typename T> struct plain_matrix_type<T,Dense>
+{
+ typedef typename plain_matrix_type_dense<T,typename traits<T>::XprKind>::type type;
+};
+
+template<typename T> struct plain_matrix_type_dense<T,MatrixXpr>
+{
+ typedef Matrix<typename traits<T>::Scalar,
+ traits<T>::RowsAtCompileTime,
+ traits<T>::ColsAtCompileTime,
+ AutoAlign | (traits<T>::Flags&RowMajorBit ? RowMajor : ColMajor),
+ traits<T>::MaxRowsAtCompileTime,
+ traits<T>::MaxColsAtCompileTime
+ > type;
+};
+
+template<typename T> struct plain_matrix_type_dense<T,ArrayXpr>
+{
+ typedef Array<typename traits<T>::Scalar,
+ traits<T>::RowsAtCompileTime,
+ traits<T>::ColsAtCompileTime,
+ AutoAlign | (traits<T>::Flags&RowMajorBit ? RowMajor : ColMajor),
+ traits<T>::MaxRowsAtCompileTime,
+ traits<T>::MaxColsAtCompileTime
+ > type;
+};
+
+/* eval : the return type of eval(). For matrices, this is just a const reference
+ * in order to avoid a useless copy
+ */
+
+template<typename T, typename StorageKind = typename traits<T>::StorageKind> struct eval;
+
+template<typename T> struct eval<T,Dense>
+{
+ typedef typename plain_matrix_type<T>::type type;
+// typedef typename T::PlainObject type;
+// typedef T::Matrix<typename traits<T>::Scalar,
+// traits<T>::RowsAtCompileTime,
+// traits<T>::ColsAtCompileTime,
+// AutoAlign | (traits<T>::Flags&RowMajorBit ? RowMajor : ColMajor),
+// traits<T>::MaxRowsAtCompileTime,
+// traits<T>::MaxColsAtCompileTime
+// > type;
+};
+
+// for matrices, no need to evaluate, just use a const reference to avoid a useless copy
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct eval<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>, Dense>
+{
+ typedef const Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>& type;
+};
+
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct eval<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>, Dense>
+{
+ typedef const Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>& type;
+};
+
+
+
+/* plain_matrix_type_column_major : same as plain_matrix_type but guaranteed to be column-major
+ */
+template<typename T> struct plain_matrix_type_column_major
+{
+ enum { Rows = traits<T>::RowsAtCompileTime,
+ Cols = traits<T>::ColsAtCompileTime,
+ MaxRows = traits<T>::MaxRowsAtCompileTime,
+ MaxCols = traits<T>::MaxColsAtCompileTime
+ };
+ typedef Matrix<typename traits<T>::Scalar,
+ Rows,
+ Cols,
+ (MaxRows==1&&MaxCols!=1) ? RowMajor : ColMajor,
+ MaxRows,
+ MaxCols
+ > type;
+};
+
+/* plain_matrix_type_row_major : same as plain_matrix_type but guaranteed to be row-major
+ */
+template<typename T> struct plain_matrix_type_row_major
+{
+ enum { Rows = traits<T>::RowsAtCompileTime,
+ Cols = traits<T>::ColsAtCompileTime,
+ MaxRows = traits<T>::MaxRowsAtCompileTime,
+ MaxCols = traits<T>::MaxColsAtCompileTime
+ };
+ typedef Matrix<typename traits<T>::Scalar,
+ Rows,
+ Cols,
+ (MaxCols==1&&MaxRows!=1) ? RowMajor : ColMajor,
+ MaxRows,
+ MaxCols
+ > type;
+};
+
+// we should be able to get rid of this one too
+template<typename T> struct must_nest_by_value { enum { ret = false }; };
+
+/** \internal The reference selector for template expressions. The idea is that we don't
+ * need to use references for expressions since they are light weight proxy
+ * objects which should generate no copying overhead. */
+template <typename T>
+struct ref_selector
+{
+ typedef typename conditional<
+ bool(traits<T>::Flags & NestByRefBit),
+ T const&,
+ const T
+ >::type type;
+};
+
+/** \internal Adds the const qualifier on the value-type of T2 if and only if T1 is a const type */
+template<typename T1, typename T2>
+struct transfer_constness
+{
+ typedef typename conditional<
+ bool(internal::is_const<T1>::value),
+ typename internal::add_const_on_value_type<T2>::type,
+ T2
+ >::type type;
+};
+
+/** \internal Determines how a given expression should be nested into another one.
+ * For example, when you do a * (b+c), Eigen will determine how the expression b+c should be
+ * nested into the bigger product expression. The choice is between nesting the expression b+c as-is, or
+ * evaluating that expression b+c into a temporary variable d, and nest d so that the resulting expression is
+ * a*d. Evaluating can be beneficial for example if every coefficient access in the resulting expression causes
+ * many coefficient accesses in the nested expressions -- as is the case with matrix product for example.
+ *
+ * \param T the type of the expression being nested
+ * \param n the number of coefficient accesses in the nested expression for each coefficient access in the bigger expression.
+ *
+ * Note that if no evaluation occur, then the constness of T is preserved.
+ *
+ * Example. Suppose that a, b, and c are of type Matrix3d. The user forms the expression a*(b+c).
+ * b+c is an expression "sum of matrices", which we will denote by S. In order to determine how to nest it,
+ * the Product expression uses: nested<S, 3>::type, which turns out to be Matrix3d because the internal logic of
+ * nested determined that in this case it was better to evaluate the expression b+c into a temporary. On the other hand,
+ * since a is of type Matrix3d, the Product expression nests it as nested<Matrix3d, 3>::type, which turns out to be
+ * const Matrix3d&, because the internal logic of nested determined that since a was already a matrix, there was no point
+ * in copying it into another matrix.
+ */
+template<typename T, int n=1, typename PlainObject = typename eval<T>::type> struct nested
+{
+ enum {
+ // for the purpose of this test, to keep it reasonably simple, we arbitrarily choose a value of Dynamic values.
+ // the choice of 10000 makes it larger than any practical fixed value and even most dynamic values.
+ // in extreme cases where these assumptions would be wrong, we would still at worst suffer performance issues
+ // (poor choice of temporaries).
+ // it's important that this value can still be squared without integer overflowing.
+ DynamicAsInteger = 10000,
+ ScalarReadCost = NumTraits<typename traits<T>::Scalar>::ReadCost,
+ ScalarReadCostAsInteger = ScalarReadCost == Dynamic ? int(DynamicAsInteger) : int(ScalarReadCost),
+ CoeffReadCost = traits<T>::CoeffReadCost,
+ CoeffReadCostAsInteger = CoeffReadCost == Dynamic ? int(DynamicAsInteger) : int(CoeffReadCost),
+ NAsInteger = n == Dynamic ? int(DynamicAsInteger) : n,
+ CostEvalAsInteger = (NAsInteger+1) * ScalarReadCostAsInteger + CoeffReadCostAsInteger,
+ CostNoEvalAsInteger = NAsInteger * CoeffReadCostAsInteger
+ };
+
+ typedef typename conditional<
+ ( (int(traits<T>::Flags) & EvalBeforeNestingBit) ||
+ int(CostEvalAsInteger) < int(CostNoEvalAsInteger)
+ ),
+ PlainObject,
+ typename ref_selector<T>::type
+ >::type type;
+};
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+T* const_cast_ptr(const T* ptr)
+{
+ return const_cast<T*>(ptr);
+}
+
+template<typename Derived, typename XprKind = typename traits<Derived>::XprKind>
+struct dense_xpr_base
+{
+ /* dense_xpr_base should only ever be used on dense expressions, thus falling either into the MatrixXpr or into the ArrayXpr cases */
+};
+
+template<typename Derived>
+struct dense_xpr_base<Derived, MatrixXpr>
+{
+ typedef MatrixBase<Derived> type;
+};
+
+template<typename Derived>
+struct dense_xpr_base<Derived, ArrayXpr>
+{
+ typedef ArrayBase<Derived> type;
+};
+
+/** \internal Helper base class to add a scalar multiple operator
+ * overloads for complex types */
+template<typename Derived,typename Scalar,typename OtherScalar,
+ bool EnableIt = !is_same<Scalar,OtherScalar>::value >
+struct special_scalar_op_base : public DenseCoeffsBase<Derived>
+{
+ // dummy operator* so that the
+ // "using special_scalar_op_base::operator*" compiles
+ void operator*() const;
+};
+
+template<typename Derived,typename Scalar,typename OtherScalar>
+struct special_scalar_op_base<Derived,Scalar,OtherScalar,true> : public DenseCoeffsBase<Derived>
+{
+ const CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, Derived>
+ operator*(const OtherScalar& scalar) const
+ {
+ return CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, Derived>
+ (*static_cast<const Derived*>(this), scalar_multiple2_op<Scalar,OtherScalar>(scalar));
+ }
+
+ inline friend const CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, Derived>
+ operator*(const OtherScalar& scalar, const Derived& matrix)
+ { return static_cast<const special_scalar_op_base&>(matrix).operator*(scalar); }
+};
+
+template<typename XprType, typename CastType> struct cast_return_type
+{
+ typedef typename XprType::Scalar CurrentScalarType;
+ typedef typename remove_all<CastType>::type _CastType;
+ typedef typename _CastType::Scalar NewScalarType;
+ typedef typename conditional<is_same<CurrentScalarType,NewScalarType>::value,
+ const XprType&,CastType>::type type;
+};
+
+template <typename A, typename B> struct promote_storage_type;
+
+template <typename A> struct promote_storage_type<A,A>
+{
+ typedef A ret;
+};
+template <typename A> struct promote_storage_type<A, const A>
+{
+ typedef A ret;
+};
+template <typename A> struct promote_storage_type<const A, A>
+{
+ typedef A ret;
+};
+
+
+
+/** \internal gives the plain matrix or array type to store a row/column/diagonal of a matrix type.
+ * \param Scalar optional parameter allowing to pass a different scalar type than the one of the MatrixType.
+ */
+template<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar>
+struct plain_row_type
+{
+ typedef Matrix<Scalar, 1, ExpressionType::ColsAtCompileTime,
+ ExpressionType::PlainObject::Options | RowMajor, 1, ExpressionType::MaxColsAtCompileTime> MatrixRowType;
+ typedef Array<Scalar, 1, ExpressionType::ColsAtCompileTime,
+ ExpressionType::PlainObject::Options | RowMajor, 1, ExpressionType::MaxColsAtCompileTime> ArrayRowType;
+
+ typedef typename conditional<
+ is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,
+ MatrixRowType,
+ ArrayRowType
+ >::type type;
+};
+
+template<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar>
+struct plain_col_type
+{
+ typedef Matrix<Scalar, ExpressionType::RowsAtCompileTime, 1,
+ ExpressionType::PlainObject::Options & ~RowMajor, ExpressionType::MaxRowsAtCompileTime, 1> MatrixColType;
+ typedef Array<Scalar, ExpressionType::RowsAtCompileTime, 1,
+ ExpressionType::PlainObject::Options & ~RowMajor, ExpressionType::MaxRowsAtCompileTime, 1> ArrayColType;
+
+ typedef typename conditional<
+ is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,
+ MatrixColType,
+ ArrayColType
+ >::type type;
+};
+
+template<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar>
+struct plain_diag_type
+{
+ enum { diag_size = EIGEN_SIZE_MIN_PREFER_DYNAMIC(ExpressionType::RowsAtCompileTime, ExpressionType::ColsAtCompileTime),
+ max_diag_size = EIGEN_SIZE_MIN_PREFER_FIXED(ExpressionType::MaxRowsAtCompileTime, ExpressionType::MaxColsAtCompileTime)
+ };
+ typedef Matrix<Scalar, diag_size, 1, ExpressionType::PlainObject::Options & ~RowMajor, max_diag_size, 1> MatrixDiagType;
+ typedef Array<Scalar, diag_size, 1, ExpressionType::PlainObject::Options & ~RowMajor, max_diag_size, 1> ArrayDiagType;
+
+ typedef typename conditional<
+ is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,
+ MatrixDiagType,
+ ArrayDiagType
+ >::type type;
+};
+
+template<typename ExpressionType>
+struct is_lvalue
+{
+ enum { value = !bool(is_const<ExpressionType>::value) &&
+ bool(traits<ExpressionType>::Flags & LvalueBit) };
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_XPRHELPER_H