diff options
Diffstat (limited to 'Eigen/src')
-rw-r--r-- | Eigen/src/Core/Block.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/CwiseBinaryOp.h | 9 | ||||
-rw-r--r-- | Eigen/src/Core/CwiseUnaryOp.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/DiagonalCoeffs.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/DiagonalMatrix.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/ForwardDeclarations.h | 7 | ||||
-rw-r--r-- | Eigen/src/Core/Functors.h | 44 | ||||
-rw-r--r-- | Eigen/src/Core/Identity.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/Map.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/Matrix.h | 22 | ||||
-rw-r--r-- | Eigen/src/Core/MatrixBase.h | 10 | ||||
-rw-r--r-- | Eigen/src/Core/MatrixStorage.h | 22 | ||||
-rw-r--r-- | Eigen/src/Core/Minor.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/NumTraits.h | 7 | ||||
-rw-r--r-- | Eigen/src/Core/Ones.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/OperatorEquals.h | 170 | ||||
-rw-r--r-- | Eigen/src/Core/PacketMath.h | 85 | ||||
-rw-r--r-- | Eigen/src/Core/Product.h | 69 | ||||
-rw-r--r-- | Eigen/src/Core/Random.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/Redux.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/Transpose.h | 10 | ||||
-rw-r--r-- | Eigen/src/Core/Util.h | 14 | ||||
-rw-r--r-- | Eigen/src/Core/Zero.h | 2 |
23 files changed, 421 insertions, 82 deletions
diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h index 34f98030e..f0c1d11c0 100644 --- a/Eigen/src/Core/Block.h +++ b/Eigen/src/Core/Block.h @@ -67,9 +67,9 @@ struct ei_traits<Block<MatrixType, BlockRows, BlockCols> > : (BlockRows==Dynamic ? MatrixType::MaxRowsAtCompileTime : BlockRows), MaxColsAtCompileTime = ColsAtCompileTime == 1 ? 1 : (BlockCols==Dynamic ? MatrixType::MaxColsAtCompileTime : BlockCols), - Flags = RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic + Flags = (RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic ? (unsigned int)MatrixType::Flags - : (unsigned int)MatrixType::Flags &~ LargeBit, + : (unsigned int)MatrixType::Flags &~ LargeBit) & ~VectorizableBit, CoeffReadCost = MatrixType::CoeffReadCost }; }; diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h index f7764e9b4..0ca6d3922 100644 --- a/Eigen/src/Core/CwiseBinaryOp.h +++ b/Eigen/src/Core/CwiseBinaryOp.h @@ -60,7 +60,9 @@ struct ei_traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > ColsAtCompileTime = Lhs::ColsAtCompileTime, MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime, MaxColsAtCompileTime = Lhs::MaxColsAtCompileTime, - Flags = Lhs::Flags | Rhs::Flags, + Flags = ((Lhs::Flags | Rhs::Flags) & ~VectorizableBit) + | (ei_functor_traits<BinaryOp>::IsVectorizable && ((Lhs::Flags&RowMajorBit)==(Rhs::Flags&RowMajorBit)) + ? (Lhs::Flags & Rhs::Flags & VectorizableBit) : 0), CoeffReadCost = Lhs::CoeffReadCost + Rhs::CoeffReadCost + ei_functor_traits<BinaryOp>::Cost }; }; @@ -89,6 +91,11 @@ class CwiseBinaryOp : ei_no_assignment_operator, return m_functor(m_lhs.coeff(row, col), m_rhs.coeff(row, col)); } + PacketScalar _packetCoeff(int row, int col) const + { + return m_functor.packetOp(m_lhs.packetCoeff(row, col), m_rhs.packetCoeff(row, col)); + } + protected: const typename Lhs::XprCopy m_lhs; const typename Rhs::XprCopy m_rhs; diff --git a/Eigen/src/Core/CwiseUnaryOp.h b/Eigen/src/Core/CwiseUnaryOp.h index b26b55be8..5c2ba1b07 100644 --- a/Eigen/src/Core/CwiseUnaryOp.h +++ b/Eigen/src/Core/CwiseUnaryOp.h @@ -50,7 +50,8 @@ struct ei_traits<CwiseUnaryOp<UnaryOp, MatrixType> > ColsAtCompileTime = MatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, - Flags = MatrixType::Flags, + Flags = (MatrixType::Flags & ~VectorizableBit) + | (ei_functor_traits<UnaryOp>::IsVectorizable ? MatrixType::Flags & VectorizableBit : 0), CoeffReadCost = MatrixType::CoeffReadCost + ei_functor_traits<UnaryOp>::Cost }; }; @@ -76,6 +77,11 @@ class CwiseUnaryOp : ei_no_assignment_operator, return m_functor(m_matrix.coeff(row, col)); } + PacketScalar _packetCoeff(int row, int col) const + { + return m_functor.packetOp(m_matrix.packetCoeff(row, col)); + } + protected: const typename MatrixType::XprCopy m_matrix; const UnaryOp m_functor; diff --git a/Eigen/src/Core/DiagonalCoeffs.h b/Eigen/src/Core/DiagonalCoeffs.h index 030de5cf0..7f8fea162 100644 --- a/Eigen/src/Core/DiagonalCoeffs.h +++ b/Eigen/src/Core/DiagonalCoeffs.h @@ -52,9 +52,9 @@ struct ei_traits<DiagonalCoeffs<MatrixType> > : EIGEN_ENUM_MIN(MatrixType::MaxRowsAtCompileTime, MatrixType::MaxColsAtCompileTime), MaxColsAtCompileTime = 1, - Flags = RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic + Flags = (RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic ? (unsigned int)MatrixType::Flags - : (unsigned int)MatrixType::Flags &~ LargeBit, + : (unsigned int)MatrixType::Flags &~ LargeBit) & ~VectorizableBit, CoeffReadCost = MatrixType::CoeffReadCost }; }; diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h index b7fffab72..6a243a402 100644 --- a/Eigen/src/Core/DiagonalMatrix.h +++ b/Eigen/src/Core/DiagonalMatrix.h @@ -47,7 +47,7 @@ struct ei_traits<DiagonalMatrix<CoeffsVectorType> > ColsAtCompileTime = CoeffsVectorType::SizeAtCompileTime, MaxRowsAtCompileTime = CoeffsVectorType::MaxSizeAtCompileTime, MaxColsAtCompileTime = CoeffsVectorType::MaxSizeAtCompileTime, - Flags = CoeffsVectorType::Flags, + Flags = CoeffsVectorType::Flags & ~VectorizableBit, CoeffReadCost = CoeffsVectorType::CoeffReadCost }; }; diff --git a/Eigen/src/Core/ForwardDeclarations.h b/Eigen/src/Core/ForwardDeclarations.h index 32be8cd68..d9699301d 100644 --- a/Eigen/src/Core/ForwardDeclarations.h +++ b/Eigen/src/Core/ForwardDeclarations.h @@ -65,7 +65,7 @@ template<typename Scalar> struct ei_scalar_cos_op; template<typename Scalar> struct ei_scalar_sin_op; template<typename Scalar> struct ei_scalar_pow_op; template<typename Scalar, typename NewType> struct ei_scalar_cast_op; -template<typename Scalar> struct ei_scalar_multiple_op; +template<typename Scalar, bool IsVectorizable> struct ei_scalar_multiple_op; template<typename Scalar> struct ei_scalar_quotient1_op; template<typename Scalar> struct ei_scalar_min_op; template<typename Scalar> struct ei_scalar_max_op; @@ -116,5 +116,10 @@ template<typename T> struct ei_functor_traits }; }; +template<typename T> struct ei_packet_traits +{ + typedef T type; + enum {size=1}; +}; #endif // EIGEN_FORWARDDECLARATIONS_H diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h index 44f982d11..d0f5151bc 100644 --- a/Eigen/src/Core/Functors.h +++ b/Eigen/src/Core/Functors.h @@ -34,12 +34,15 @@ */ template<typename Scalar> struct ei_scalar_sum_op EIGEN_EMPTY_STRUCT { const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; } + template<typename PacketScalar> + PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const + { return ei_padd(a,b); } }; template<typename Scalar> struct ei_functor_traits<ei_scalar_sum_op<Scalar> > { enum { Cost = NumTraits<Scalar>::AddCost, - IsVectorizable = NumTraits<Scalar>::PacketSize>0 + IsVectorizable = ei_packet_traits<Scalar>::size>1 }; }; @@ -50,12 +53,15 @@ struct ei_functor_traits<ei_scalar_sum_op<Scalar> > { */ template<typename Scalar> struct ei_scalar_product_op EIGEN_EMPTY_STRUCT { const Scalar operator() (const Scalar& a, const Scalar& b) const { return a * b; } + template<typename PacketScalar> + PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const + { return ei_pmul(a,b); } }; template<typename Scalar> struct ei_functor_traits<ei_scalar_product_op<Scalar> > { enum { Cost = NumTraits<Scalar>::MulCost, - IsVectorizable = NumTraits<Scalar>::PacketSize>0 + IsVectorizable = ei_packet_traits<Scalar>::size>1 }; }; @@ -66,12 +72,15 @@ struct ei_functor_traits<ei_scalar_product_op<Scalar> > { */ template<typename Scalar> struct ei_scalar_min_op EIGEN_EMPTY_STRUCT { const Scalar operator() (const Scalar& a, const Scalar& b) const { return std::min(a, b); } + template<typename PacketScalar> + PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const + { return ei_pmin(a,b); } }; template<typename Scalar> struct ei_functor_traits<ei_scalar_min_op<Scalar> > { enum { Cost = NumTraits<Scalar>::AddCost, - IsVectorizable = NumTraits<Scalar>::PacketSize>0 + IsVectorizable = ei_packet_traits<Scalar>::size>1 }; }; @@ -82,12 +91,15 @@ struct ei_functor_traits<ei_scalar_min_op<Scalar> > { */ template<typename Scalar> struct ei_scalar_max_op EIGEN_EMPTY_STRUCT { const Scalar operator() (const Scalar& a, const Scalar& b) const { return std::max(a, b); } + template<typename PacketScalar> + PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const + { return ei_pmax(a,b); } }; template<typename Scalar> struct ei_functor_traits<ei_scalar_max_op<Scalar> > { enum { Cost = NumTraits<Scalar>::AddCost, - IsVectorizable = NumTraits<Scalar>::PacketSize>0 + IsVectorizable = ei_packet_traits<Scalar>::size>1 }; }; @@ -100,13 +112,16 @@ struct ei_functor_traits<ei_scalar_max_op<Scalar> > { * \sa class CwiseBinaryOp, MatrixBase::operator- */ template<typename Scalar> struct ei_scalar_difference_op EIGEN_EMPTY_STRUCT { - const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; } + const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; } + template<typename PacketScalar> + PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const + { return ei_psub(a,b); } }; template<typename Scalar> struct ei_functor_traits<ei_scalar_difference_op<Scalar> > { enum { Cost = NumTraits<Scalar>::AddCost, - IsVectorizable = NumTraits<Scalar>::PacketSize>0 + IsVectorizable = ei_packet_traits<Scalar>::size>1 }; }; @@ -194,15 +209,26 @@ struct ei_functor_traits<ei_scalar_cast_op<Scalar,NewType> > * * \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/ */ +template<typename Scalar, bool IsVectorizable = (int(ei_packet_traits<Scalar>::size)>1?true:false) > struct ei_scalar_multiple_op; + +template<typename Scalar> +struct ei_scalar_multiple_op<Scalar,true> { + typedef typename ei_packet_traits<Scalar>::type PacketScalar; + ei_scalar_multiple_op(const Scalar& other) : m_other(ei_pset1(other)) { } + Scalar operator() (const Scalar& a) const { return a * ei_pfirst(m_other); } + PacketScalar packetOp(const PacketScalar& a) const + { return ei_pmul(a, m_other); } + const PacketScalar m_other; +}; template<typename Scalar> -struct ei_scalar_multiple_op { - ei_scalar_multiple_op(const Scalar& other) : m_other(other) {} +struct ei_scalar_multiple_op<Scalar,false> { + ei_scalar_multiple_op(const Scalar& other) : m_other(other) { } Scalar operator() (const Scalar& a) const { return a * m_other; } const Scalar m_other; }; template<typename Scalar> struct ei_functor_traits<ei_scalar_multiple_op<Scalar> > -{ enum { Cost = NumTraits<Scalar>::MulCost, IsVectorizable = false }; }; +{ enum { Cost = NumTraits<Scalar>::MulCost, IsVectorizable = ei_packet_traits<Scalar>::size>1 }; }; template<typename Scalar, bool HasFloatingPoint> struct ei_scalar_quotient1_impl { diff --git a/Eigen/src/Core/Identity.h b/Eigen/src/Core/Identity.h index 104a06e2f..0783983c1 100644 --- a/Eigen/src/Core/Identity.h +++ b/Eigen/src/Core/Identity.h @@ -40,7 +40,7 @@ struct ei_traits<Identity<MatrixType> > ColsAtCompileTime = MatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, - Flags = MatrixType::Flags, + Flags = MatrixType::Flags & ~VectorizableBit, CoeffReadCost = NumTraits<Scalar>::ReadCost }; }; diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h index cbb1633ad..f17107a65 100644 --- a/Eigen/src/Core/Map.h +++ b/Eigen/src/Core/Map.h @@ -47,7 +47,7 @@ struct ei_traits<Map<MatrixType> > ColsAtCompileTime = MatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, - Flags = MatrixType::Flags, + Flags = MatrixType::Flags & ~VectorizableBit, CoeffReadCost = NumTraits<Scalar>::ReadCost }; }; diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index b9a47f7b0..da898f031 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -79,7 +79,10 @@ struct ei_traits<Matrix<_Scalar, _Rows, _Cols, _Flags, _MaxRows, _MaxCols> > ColsAtCompileTime = _Cols, MaxRowsAtCompileTime = _MaxRows, MaxColsAtCompileTime = _MaxCols, - Flags = _Flags, + Flags = (_Flags & ~VectorizableBit) + | (( (ei_packet_traits<Scalar>::size>1) && (_Rows!=Dynamic) && (_Cols!=Dynamic) + && ((_Flags&RowMajorBit) && ((_Cols%ei_packet_traits<Scalar>::size)==0) + || ((_Rows%ei_packet_traits<Scalar>::size)==0) ) ) ? VectorizableBit : 0), CoeffReadCost = NumTraits<Scalar>::ReadCost }; }; @@ -119,6 +122,23 @@ class Matrix : public MatrixBase<Matrix<_Scalar, _Rows, _Cols, return m_storage.data()[row + col * m_storage.rows()]; } + PacketScalar _packetCoeff(int row, int col) const + { + ei_internal_assert(Flags & VectorizableBit); + if(Flags & RowMajorBit) + return ei_pload(&m_storage.data()[col + row * m_storage.cols()]); + else + return ei_pload(&m_storage.data()[row + col * m_storage.rows()]); + } + void _writePacketCoeff(int row, int col, const PacketScalar& x) + { + ei_internal_assert(Flags & VectorizableBit); + if(Flags & RowMajorBit) + ei_pstore(&m_storage.data()[col + row * m_storage.cols()], x); + else + ei_pstore(&m_storage.data()[row + col * m_storage.rows()], x); + } + public: /** \returns a const pointer to the data array of this matrix */ const Scalar *data() const diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 2bc54701d..62953eded 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -59,6 +59,8 @@ template<typename Derived> class MatrixBase //@{ typedef typename ei_traits<Derived>::Scalar Scalar; + typedef typename ei_packet_traits<Scalar>::type PacketScalar; + enum { RowsAtCompileTime = ei_traits<Derived>::RowsAtCompileTime, @@ -211,6 +213,9 @@ template<typename Derived> class MatrixBase Scalar& coeffRef(int index); Scalar& operator[](int index); + PacketScalar packetCoeff(int row, int col) const { return derived()._packetCoeff(row,col); } + void writePacketCoeff(int row, int col, const PacketScalar& x) { return derived()._writePacketCoeff(row,col,x); } + const Scalar x() const; const Scalar y() const; const Scalar z() const; @@ -484,6 +489,11 @@ template<typename Derived> class MatrixBase { return *static_cast<Derived*>(const_cast<MatrixBase*>(this)); } //@} + private: + + PacketScalar _packetCoeff(int , int) const { ei_internal_assert(false && "_packetCoeff not defined"); } + void _writePacketCoeff(int , int, const PacketScalar&) { ei_internal_assert(false && "_packetCoeff not defined"); } + }; #endif // EIGEN_MATRIXBASE_H diff --git a/Eigen/src/Core/MatrixStorage.h b/Eigen/src/Core/MatrixStorage.h index 91290ea59..cca4414d3 100644 --- a/Eigen/src/Core/MatrixStorage.h +++ b/Eigen/src/Core/MatrixStorage.h @@ -6,12 +6,12 @@ // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either +// License as published by the Free Software Foundation; either // version 3 of the License, or (at your option) any later version. // // Alternatively, you can redistribute it and/or // modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of +// published by the Free Software Foundation; either version 2 of // the License, or (at your option) any later version. // // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY @@ -39,18 +39,28 @@ */ template<typename T, int Size, int _Rows, int _Cols> class ei_matrix_storage; -// purely fixed-size matrix. +template <typename T, int Size, bool Align> struct ei_aligned_array +{ + EIGEN_ALIGN_128 T array[Size]; +}; + +template <typename T, int Size> struct ei_aligned_array<T,Size,false> +{ + T array[Size]; +}; + +// purely fixed-size matrix template<typename T, int Size, int _Rows, int _Cols> class ei_matrix_storage { - T m_data[Size]; + ei_aligned_array<T,Size,((Size*sizeof(T))%16)==0> m_data; public: ei_matrix_storage() {} ei_matrix_storage(int,int,int) {} static int rows(void) {return _Rows;} static int cols(void) {return _Cols;} void resize(int,int,int) {} - const T *data() const { return m_data; } - T *data() { return m_data; } + const T *data() const { return m_data.array; } + T *data() { return m_data.array; } }; // dynamic-size matrix with fixed-size storage diff --git a/Eigen/src/Core/Minor.h b/Eigen/src/Core/Minor.h index 911ac2151..1b060928f 100644 --- a/Eigen/src/Core/Minor.h +++ b/Eigen/src/Core/Minor.h @@ -50,7 +50,7 @@ struct ei_traits<Minor<MatrixType> > MatrixType::MaxRowsAtCompileTime - 1 : Dynamic, MaxColsAtCompileTime = (MatrixType::MaxColsAtCompileTime != Dynamic) ? MatrixType::MaxColsAtCompileTime - 1 : Dynamic, - Flags = MatrixType::Flags, + Flags = MatrixType::Flags & ~VectorizableBit, CoeffReadCost = MatrixType::CoeffReadCost }; }; diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index ba546e86e..137f38ee2 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h @@ -57,7 +57,6 @@ template<> struct NumTraits<int> ReadCost = 1, AddCost = 1, MulCost = 1, - PacketSize = 4 }; }; @@ -71,7 +70,6 @@ template<> struct NumTraits<float> ReadCost = 1, AddCost = 1, MulCost = 1, - PacketSize = 4 }; }; @@ -85,7 +83,6 @@ template<> struct NumTraits<double> ReadCost = 1, AddCost = 1, MulCost = 1, - PacketSize = 2 }; }; @@ -99,7 +96,6 @@ template<typename _Real> struct NumTraits<std::complex<_Real> > ReadCost = 2, AddCost = 2 * NumTraits<Real>::AddCost, MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost, - PacketSize = 0 }; }; @@ -113,7 +109,6 @@ template<> struct NumTraits<long long int> ReadCost = 1, AddCost = 1, MulCost = 1, - PacketSize = 0 }; }; @@ -127,7 +122,6 @@ template<> struct NumTraits<long double> ReadCost = 1, AddCost = 2, MulCost = 2, - PacketSize = 0 }; }; @@ -141,7 +135,6 @@ template<> struct NumTraits<bool> ReadCost = 1, AddCost = 1, MulCost = 1, - PacketSize = 0 }; }; diff --git a/Eigen/src/Core/Ones.h b/Eigen/src/Core/Ones.h index 4cb4bc348..bcc71764c 100644 --- a/Eigen/src/Core/Ones.h +++ b/Eigen/src/Core/Ones.h @@ -41,7 +41,7 @@ struct ei_traits<Ones<MatrixType> > ColsAtCompileTime = MatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, - Flags = MatrixType::Flags, + Flags = MatrixType::Flags & ~VectorizableBit, CoeffReadCost = NumTraits<Scalar>::ReadCost }; }; diff --git a/Eigen/src/Core/OperatorEquals.h b/Eigen/src/Core/OperatorEquals.h index 5529c8313..dff1954cc 100644 --- a/Eigen/src/Core/OperatorEquals.h +++ b/Eigen/src/Core/OperatorEquals.h @@ -63,6 +63,48 @@ struct ei_matrix_operator_equals_unroller<Derived1, Derived2, Dynamic> static void run(Derived1 &, const Derived2 &) {} }; +//---- + +template<typename Derived1, typename Derived2, int UnrollCount> +struct ei_matrix_operator_equals_packet_unroller +{ + enum { + index = UnrollCount-ei_packet_traits<typename Derived1::Scalar>::size, + row = Derived1::Flags&RowMajorBit ? index / Derived1::ColsAtCompileTime : index % Derived1::RowsAtCompileTime, + col = Derived1::Flags&RowMajorBit ? index % Derived1::ColsAtCompileTime : index / Derived1::RowsAtCompileTime + }; + + static void run(Derived1 &dst, const Derived2 &src) + { + ei_matrix_operator_equals_packet_unroller<Derived1, Derived2, index>::run(dst, src); + dst.writePacketCoeff(row, col, src.packetCoeff(row, col)); + } +}; + +template<typename Derived1, typename Derived2> +struct ei_matrix_operator_equals_packet_unroller<Derived1, Derived2, 2> +{ + static void run(Derived1 &dst, const Derived2 &src) + { + dst.writePacketCoeff(0, 0, src.packetCoeff(0, 0)); + } +}; + +// prevent buggy user code from causing an infinite recursion +template<typename Derived1, typename Derived2> +struct ei_matrix_operator_equals_packet_unroller<Derived1, Derived2, 0> +{ + static void run(Derived1 &, const Derived2 &) {exit(666);} +}; + +template<typename Derived1, typename Derived2> +struct ei_matrix_operator_equals_packet_unroller<Derived1, Derived2, Dynamic> +{ + static void run(Derived1 &, const Derived2 &) {exit(666);} +}; + +//---- + template<typename Derived1, typename Derived2, int UnrollCount> struct ei_vector_operator_equals_unroller { @@ -97,68 +139,114 @@ struct ei_vector_operator_equals_unroller<Derived1, Derived2, Dynamic> static void run(Derived1 &, const Derived2 &) {} }; +template <typename Derived, typename OtherDerived, +bool Vectorize = (Derived::Flags & OtherDerived::Flags & VectorizableBit) + && ((Derived::Flags&RowMajorBit)==(OtherDerived::Flags&RowMajorBit))> +struct ei_operator_equals_impl; + template<typename Derived> template<typename OtherDerived> Derived& MatrixBase<Derived> ::lazyAssign(const MatrixBase<OtherDerived>& other) { - const bool unroll = SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT; - if(IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime) - // copying a vector expression into a vector + ei_operator_equals_impl<Derived,OtherDerived>::execute(derived(),other.derived()); + return derived(); +} + +template<typename Derived> +template<typename OtherDerived> +Derived& MatrixBase<Derived> + ::operator=(const MatrixBase<OtherDerived>& other) +{ + if(OtherDerived::Flags & EvalBeforeAssigningBit) { - ei_assert(size() == other.size()); - if(unroll) - ei_vector_operator_equals_unroller - <Derived, OtherDerived, - unroll ? SizeAtCompileTime : Dynamic - >::run(derived(), other.derived()); - else - for(int i = 0; i < size(); i++) - coeffRef(i) = other.coeff(i); + return lazyAssign(other.derived().eval()); } - else // copying a matrix expression into a matrix + else + return lazyAssign(other.derived()); +} + +template <typename Derived, typename OtherDerived> +struct ei_operator_equals_impl<Derived, OtherDerived, false> +{ + static void execute(Derived & dst, const OtherDerived & src) { - ei_assert(rows() == other.rows() && cols() == other.cols()); - if(unroll) + const bool unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT; + if(Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime) + // copying a vector expression into a vector { - ei_matrix_operator_equals_unroller - <Derived, OtherDerived, - unroll ? SizeAtCompileTime : Dynamic - >::run(derived(), other.derived()); + ei_assert(dst.size() == src.size()); + if(unroll) + ei_vector_operator_equals_unroller + <Derived, OtherDerived, + unroll ? Derived::SizeAtCompileTime : Dynamic + >::run(dst.derived(), src.derived()); + else + for(int i = 0; i < dst.size(); i++) + dst.coeffRef(i) = src.coeff(i); } - else + else // copying a matrix expression into a matrix { - if(ColsAtCompileTime == Dynamic || RowsAtCompileTime != Dynamic) + ei_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + if(unroll) { - // traverse in column-major order - for(int j = 0; j < cols(); j++) - for(int i = 0; i < rows(); i++) - coeffRef(i, j) = other.coeff(i, j); + ei_matrix_operator_equals_unroller + <Derived, OtherDerived, + unroll ? Derived::SizeAtCompileTime : Dynamic + >::run(dst.derived(), src.derived()); } else { - // traverse in row-major order - // in order to allow the compiler to unroll the inner loop - for(int i = 0; i < rows(); i++) - for(int j = 0; j < cols(); j++) - coeffRef(i, j) = other.coeff(i, j); + if(Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic) + { + // traverse in column-major order + for(int j = 0; j < dst.cols(); j++) + for(int i = 0; i < dst.rows(); i++) + dst.coeffRef(i, j) = src.coeff(i, j); + } + else + { + // traverse in row-major order + // in order to allow the compiler to unroll the inner loop + for(int i = 0; i < dst.rows(); i++) + for(int j = 0; j < dst.cols(); j++) + dst.coeffRef(i, j) = src.coeff(i, j); + } } } } - return derived(); -} +}; -template<typename Derived> -template<typename OtherDerived> -Derived& MatrixBase<Derived> - ::operator=(const MatrixBase<OtherDerived>& other) +template <typename Derived, typename OtherDerived> +struct ei_operator_equals_impl<Derived, OtherDerived, true> { - if(OtherDerived::Flags & EvalBeforeAssigningBit) + static void execute(Derived & dst, const OtherDerived & src) { - return lazyAssign(other.derived().eval()); + const bool unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT; + ei_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + if(unroll) + { + ei_matrix_operator_equals_packet_unroller + <Derived, OtherDerived, + unroll ? Derived::SizeAtCompileTime : Dynamic>::run + (dst.const_cast_derived(), src.derived()); + } + else + { + if(OtherDerived::Flags&RowMajorBit) + { + for(int i = 0; i < dst.rows(); i++) + for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size) + dst.writePacketCoeff(i, j, src.packetCoeff(i, j)); + } + else + { + for(int j = 0; j < dst.cols(); j++) + for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size) + dst.writePacketCoeff(i, j, src.packetCoeff(i, j)); + } + } } - else - return lazyAssign(other.derived()); -} +}; #endif // EIGEN_OPERATOREQUALS_H diff --git a/Eigen/src/Core/PacketMath.h b/Eigen/src/Core/PacketMath.h new file mode 100644 index 000000000..aab123533 --- /dev/null +++ b/Eigen/src/Core/PacketMath.h @@ -0,0 +1,85 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr> +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see <http://www.gnu.org/licenses/>. + +#ifndef EIGEN_PACKET_MATH_H +#define EIGEN_PACKET_MATH_H + +#ifdef EIGEN_INTEL_PLATFORM + +template<> struct ei_packet_traits<float> { typedef __m128 type; enum {size=4}; }; +template<> struct ei_packet_traits<double> { typedef __m128d type; enum {size=2}; }; +template<> struct ei_packet_traits<int> { typedef __m128i type; enum {size=4}; }; + +inline __m128 ei_padd(const __m128& a, const __m128& b) { return _mm_add_ps(a,b); } +inline __m128d ei_padd(const __m128d& a, const __m128d& b) { return _mm_add_pd(a,b); } +inline __m128i ei_padd(const __m128i& a, const __m128i& b) { return _mm_add_epi32(a,b); } + +inline __m128 ei_psub(const __m128& a, const __m128& b) { return _mm_sub_ps(a,b); } +inline __m128d ei_psub(const __m128d& a, const __m128d& b) { return _mm_sub_pd(a,b); } +inline __m128i ei_psub(const __m128i& a, const __m128i& b) { return _mm_sub_epi32(a,b); } + +inline __m128 ei_pmul(const __m128& a, const __m128& b) { return _mm_mul_ps(a,b); } +inline __m128d ei_pmul(const __m128d& a, const __m128d& b) { return _mm_mul_pd(a,b); } +inline __m128i ei_pmul(const __m128i& a, const __m128i& b) { return _mm_mul_epu32(a,b); } + +inline __m128 ei_pmin(const __m128& a, const __m128& b) { return _mm_min_ps(a,b); } +inline __m128d ei_pmin(const __m128d& a, const __m128d& b) { return _mm_min_pd(a,b); } +inline __m128i ei_pmin(const __m128i& a, const __m128i& b) +{ + __m128i mask = _mm_cmplt_epi32(a,b); + return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b)); +} + +inline __m128 ei_pmax(const __m128& a, const __m128& b) { return _mm_max_ps(a,b); } +inline __m128d ei_pmax(const __m128d& a, const __m128d& b) { return _mm_max_pd(a,b); } +inline __m128i ei_pmax(const __m128i& a, const __m128i& b) +{ + __m128i mask = _mm_cmpgt_epi32(a,b); + return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b)); +} + +inline __m128 ei_pload(const float* from) { return _mm_load_ps(from); } +inline __m128d ei_pload(const double* from) { return _mm_load_pd(from); } +inline __m128i ei_pload(const __m128i* from) { return _mm_load_si128(from); } + +inline __m128 ei_pload1(const float* from) { return _mm_load1_ps(from); } +inline __m128d ei_pload1(const double* from) { return _mm_load1_pd(from); } +inline __m128i ei_pload1(const int* from) { return _mm_set1_epi32(*from); } + +inline __m128 ei_pset1(const float& from) { return _mm_set1_ps(from); } +inline __m128d ei_pset1(const double& from) { return _mm_set1_pd(from); } +inline __m128i ei_pset1(const int& from) { return _mm_set1_epi32(from); } + +inline void ei_pstore(float* to, const __m128& from) { _mm_store_ps(to, from); } +inline void ei_pstore(double* to, const __m128d& from) { _mm_store_pd(to, from); } +inline void ei_pstore(__m128i* to, const __m128i& from) { _mm_store_si128(to, from); } + +inline float ei_pfirst(const __m128& a) { return _mm_cvtss_f32(a); } +inline double ei_pfirst(const __m128d& a) { return _mm_cvtsd_f64(a); } +inline int ei_pfirst(const __m128i& a) { return _mm_cvtsi128_si32(a); } + +#endif + +#endif // EIGEN_PACKET_MATH_H + diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index 7f149075b..cfb5d3e10 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -60,6 +60,38 @@ struct ei_product_unroller<Index, 0, Lhs, Rhs> static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {} }; + +template<bool RowMajor, int Index, int Size, typename Lhs, typename Rhs, typename PacketScalar> +struct ei_packet_product_unroller +{ + static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) + { + ei_packet_product_unroller<RowMajor, Index-1, Size, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res); + if (RowMajor) + res = ei_padd(res, ei_pmul(ei_pset1(lhs.coeff(row, Index)), rhs.packetCoeff(Index, col))); + else + res = ei_padd(res, ei_pmul(lhs.packetCoeff(row, Index), ei_pset1(rhs.coeff(Index, col)))); + } +}; + +template<bool RowMajor, int Size, typename Lhs, typename Rhs, typename PacketScalar> +struct ei_packet_product_unroller<RowMajor, 0, Size, Lhs, Rhs, PacketScalar> +{ + static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) + { + if (RowMajor) + res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.packetCoeff(0, col)); + else + res = ei_pmul(lhs.packetCoeff(row, 0), ei_pset1(rhs.coeff(0, col))); + } +}; + +template<bool RowMajor, int Index, typename Lhs, typename Rhs, typename PacketScalar> +struct ei_packet_product_unroller<RowMajor, Index, Dynamic, Lhs, Rhs, PacketScalar> +{ + static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {} +}; + /** \class Product * * \brief Expression of the product of two matrices @@ -97,11 +129,14 @@ struct ei_traits<Product<Lhs, Rhs, EvalMode> > ColsAtCompileTime = Rhs::ColsAtCompileTime, MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime, MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime, - Flags = ( (RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) + Flags = (( (RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? (unsigned int)(LhsFlags | RhsFlags) : (unsigned int)(LhsFlags | RhsFlags) & ~LargeBit ) | EvalBeforeAssigningBit - | (ei_product_eval_mode<Lhs, Rhs>::value == (int)CacheOptimal ? EvalBeforeNestingBit : 0), + | (ei_product_eval_mode<Lhs, Rhs>::value == (int)CacheOptimal ? EvalBeforeNestingBit : 0)) + & (~(RowMajorBit|VectorizableBit)) + | (((!Lhs::Flags&RowMajorBit) && Lhs::Flags&VectorizableBit) ? VectorizableBit + : ((Rhs::Flags&RowMajorBit && Rhs::Flags&VectorizableBit) ? (RowMajorBit|VectorizableBit) : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER)), CoeffReadCost = Lhs::ColsAtCompileTime == Dynamic ? Dynamic @@ -157,6 +192,36 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm return res; } + PacketScalar _packetCoeff(int row, int col) const EIGEN_ALWAYS_INLINE + { + PacketScalar res; + if(Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT) + { + ei_packet_product_unroller<Flags&RowMajorBit, Lhs::ColsAtCompileTime-1, + Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT + ? Lhs::ColsAtCompileTime : Dynamic, + Lhs, Rhs, PacketScalar> + ::run(row, col, m_lhs, m_rhs, res); +// std::cout << "vec unrolled product\n"; + } + else + { + if (Flags&RowMajorBit) + { + res = ei_pmul(ei_pset1(m_lhs.coeff(row, 0)),m_rhs.packetCoeff(0, col)); + for(int i = 1; i < m_lhs.cols(); i++) + res = ei_padd(res, ei_pmul(ei_pset1(m_lhs.coeff(row, i)), m_rhs.packetCoeff(i, col))); + } + else + { + res = ei_pmul(m_lhs.packetCoeff(row, 0), ei_pset1(m_rhs.coeff(0, col))); + for(int i = 1; i < m_lhs.cols(); i++) + res = ei_padd(res, ei_pmul(m_lhs.packetCoeff(row, i), ei_pset1(m_rhs.coeff(i, col)))); + } + } + return res; + } + protected: const LhsXprCopy m_lhs; const RhsXprCopy m_rhs; diff --git a/Eigen/src/Core/Random.h b/Eigen/src/Core/Random.h index 7ac3633fe..4d6a21da4 100644 --- a/Eigen/src/Core/Random.h +++ b/Eigen/src/Core/Random.h @@ -41,7 +41,7 @@ struct ei_traits<Random<MatrixType> > ColsAtCompileTime = ei_traits<MatrixType>::ColsAtCompileTime, MaxRowsAtCompileTime = ei_traits<MatrixType>::MaxRowsAtCompileTime, MaxColsAtCompileTime = ei_traits<MatrixType>::MaxColsAtCompileTime, - Flags = ei_traits<MatrixType>::Flags | EvalBeforeNestingBit, + Flags = (ei_traits<MatrixType>::Flags | EvalBeforeNestingBit) & ~VectorizableBit, CoeffReadCost = 2 * NumTraits<Scalar>::MulCost // FIXME: arbitrary value }; }; diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index f7fdbc077..12ceedd76 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -94,9 +94,9 @@ struct ei_traits<PartialRedux<Direction, BinaryOp, MatrixType> > ColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::MaxColsAtCompileTime, - Flags = (RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) + Flags = ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? (unsigned int)_MatrixTypeXprCopy::Flags - : (unsigned int)_MatrixTypeXprCopy::Flags & ~LargeBit, + : (unsigned int)_MatrixTypeXprCopy::Flags & ~LargeBit) & ~VectorizableBit, TraversalSize = Direction==Vertical ? RowsAtCompileTime : ColsAtCompileTime, CoeffReadCost = TraversalSize * _MatrixTypeXprCopy::CoeffReadCost + (TraversalSize - 1) * ei_functor_traits<BinaryOp>::Cost diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index f2e547225..6710f3092 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -77,6 +77,16 @@ template<typename MatrixType> class Transpose return m_matrix.coeff(col, row); } + PacketScalar _packetCoeff(int row, int col) const + { + return m_matrix.packetCoeff(col, row); + } + + void _writePacketCoeff(int row, int col, const PacketScalar& x) + { + m_matrix.const_cast_derived().writePacketCoeff(col, row, x); + } + protected: const typename MatrixType::XprCopy m_matrix; }; diff --git a/Eigen/src/Core/Util.h b/Eigen/src/Core/Util.h index ad8a15b07..e2c95bc53 100644 --- a/Eigen/src/Core/Util.h +++ b/Eigen/src/Core/Util.h @@ -1,6 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. Eigen itself is part of the KDE project. // +// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr> // Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr> // // Eigen is free software; you can redistribute it and/or @@ -51,11 +52,13 @@ using Eigen::MatrixBase; #define EIGEN_NO_DEBUG #endif +#ifndef ei_assert #ifdef EIGEN_NO_DEBUG #define ei_assert(x) #else #define ei_assert(x) assert(x) #endif +#endif #ifdef EIGEN_INTERNAL_DEBUGGING #define ei_internal_assert(x) ei_assert(x); @@ -79,6 +82,12 @@ using Eigen::MatrixBase; #define EIGEN_ALWAYS_INLINE #endif +#if (defined __GNUC__) +#define EIGEN_ALIGN_128 __attribute__ ((aligned(16))) +#else +#define EIGEN_ALIGN_128 +#endif + #define EIGEN_INHERIT_ASSIGNMENT_OPERATOR(Derived, Op) \ template<typename OtherDerived> \ Derived& operator Op(const MatrixBase<OtherDerived>& other) \ @@ -107,6 +116,7 @@ EIGEN_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Derived, /=) #define _EIGEN_GENERIC_PUBLIC_INTERFACE(Derived, BaseClass) \ typedef BaseClass Base; \ typedef typename Eigen::ei_traits<Derived>::Scalar Scalar; \ +typedef typename Base::PacketScalar PacketScalar; \ typedef typename Eigen::ei_xpr_copy<Derived>::type XprCopy; \ typedef typename Eigen::ei_eval<Derived>::type Eval; \ enum { RowsAtCompileTime = Base::RowsAtCompileTime, \ @@ -132,7 +142,11 @@ const unsigned int RowMajorBit = 0x1; const unsigned int EvalBeforeNestingBit = 0x2; const unsigned int EvalBeforeAssigningBit = 0x4; const unsigned int LargeBit = 0x8; +#ifdef EIGEN_VECTORIZE const unsigned int VectorizableBit = 0x10; +#else +const unsigned int VectorizableBit = 0x0; +#endif enum { ConditionalJumpCost = 5 }; diff --git a/Eigen/src/Core/Zero.h b/Eigen/src/Core/Zero.h index 15108b794..1daffd1c4 100644 --- a/Eigen/src/Core/Zero.h +++ b/Eigen/src/Core/Zero.h @@ -41,7 +41,7 @@ struct ei_traits<Zero<MatrixType> > ColsAtCompileTime = MatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, - Flags = MatrixType::Flags, + Flags = MatrixType::Flags & ~VectorizableBit, CoeffReadCost = NumTraits<Scalar>::ReadCost }; }; |