From f5d96df80075667b2e09d9df8bbf1640c97e51d3 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 6 Feb 2009 12:40:38 +0000 Subject: Add vectorization of Reverse (was more tricky than I thought) and simplify the index based functions --- Eigen/src/Array/Reverse.h | 123 +++++++++++++---------------------- Eigen/src/Core/GenericPacketMath.h | 3 + Eigen/src/Core/arch/SSE/PacketMath.h | 9 ++- test/packetmath.cpp | 5 ++ test/reverse.cpp | 20 ++---- 5 files changed, 65 insertions(+), 95 deletions(-) diff --git a/Eigen/src/Array/Reverse.h b/Eigen/src/Array/Reverse.h index f744bc194..ef008abc0 100644 --- a/Eigen/src/Array/Reverse.h +++ b/Eigen/src/Array/Reverse.h @@ -55,17 +55,27 @@ struct ei_traits > MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, - // TODO: check how to correctly set the new flags - Flags = ((int(_MatrixTypeNested::Flags) & HereditaryBits) - & ~(LowerTriangularBit | UpperTriangularBit)) + // let's enable LinearAccess only with vectorization because of the product overhead + LinearAccess = ( (Direction==BothDirections) && (int(_MatrixTypeNested::Flags)&PacketAccessBit) ) + ? LinearAccessBit : 0, + + Flags = (int(_MatrixTypeNested::Flags) & (HereditaryBits | PacketAccessBit | LinearAccess)) | (int(_MatrixTypeNested::Flags)&UpperTriangularBit ? LowerTriangularBit : 0) | (int(_MatrixTypeNested::Flags)&LowerTriangularBit ? UpperTriangularBit : 0), - // TODO: should add two add costs (due to the -1) or only one, and add the cost of calling .rows() and .cols() CoeffReadCost = _MatrixTypeNested::CoeffReadCost }; }; +template struct ei_reverse_packet_cond +{ + static inline PacketScalar run(const PacketScalar& x) { return ei_preverse(x); } +}; +template struct ei_reverse_packet_cond +{ + static inline PacketScalar run(const PacketScalar& x) { return x; } +}; + template class Reverse : public MatrixBase > { @@ -73,6 +83,22 @@ template class Reverse EIGEN_GENERIC_PUBLIC_INTERFACE(Reverse) + protected: + enum { + PacketSize = ei_packet_traits::size, + IsRowMajor = Flags & RowMajorBit, + IsColMajor = !IsRowMajor, + ReverseRow = (Direction == Vertical) || (Direction == BothDirections), + ReverseCol = (Direction == Horizontal) || (Direction == BothDirections), + OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1, + OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1, + ReversePacket = (Direction == BothDirections) + || ((Direction == Vertical) && IsColMajor) + || ((Direction == Horizontal) && IsRowMajor) + }; + typedef ei_reverse_packet_cond reverse_packet; + public: + inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { } EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse) @@ -82,113 +108,54 @@ template class Reverse inline Scalar& coeffRef(int row, int col) { - return m_matrix.const_cast_derived().coeffRef(((Direction == Vertical) || (Direction == BothDirections)) ? m_matrix.rows() - row - 1 : row, - ((Direction == Horizontal) || (Direction == BothDirections)) ? m_matrix.cols() - col - 1 : col); + return m_matrix.const_cast_derived().coeffRef(ReverseRow ? m_matrix.rows() - row - 1 : row, + ReverseCol ? m_matrix.cols() - col - 1 : col); } inline const Scalar coeff(int row, int col) const { - return m_matrix.coeff(((Direction == Vertical) || (Direction == BothDirections)) ? m_matrix.rows() - row - 1 : row, - ((Direction == Horizontal) || (Direction == BothDirections)) ? m_matrix.cols() - col - 1 : col); + return m_matrix.coeff(ReverseRow ? m_matrix.rows() - row - 1 : row, + ReverseCol ? m_matrix.cols() - col - 1 : col); } - /* TODO have to be updated for vector expression only */ inline const Scalar coeff(int index) const { - switch ( Direction ) - { - case Vertical: - return m_matrix.coeff( index + m_matrix.rows() - 2 * (index % m_matrix.rows()) - 1 ); - break; - - case Horizontal: - return m_matrix.coeff( (index % m_matrix.rows()) + (m_matrix.cols() - 1 - index/m_matrix.rows()) * m_matrix.rows() ); - break; - - case BothDirections: - return m_matrix.coeff((m_matrix.rows() * m_matrix.cols()) - index - 1); - break; - } - + return m_matrix.coeff(m_matrix.size() - index - 1); } - /* TODO have to be updated for vector expression only */ inline Scalar& coeffRef(int index) { - switch ( Direction ) - { - case Vertical: - return m_matrix.const_cast_derived().coeffRef( index + m_matrix.rows() - 2 * (index % m_matrix.rows()) - 1 ); - break; - - case Horizontal: - return m_matrix.const_cast_derived().coeffRef( (index % m_matrix.rows()) + (m_matrix.cols() - 1 - index/m_matrix.rows()) * m_matrix.rows() ); - break; - - case BothDirections: - return m_matrix.const_cast_derived().coeffRef( (m_matrix.rows() * m_matrix.cols()) - index - 1 ); - break; - } + return m_matrix.const_cast_derived().coeffRef(m_matrix.size() - index - 1); } - // the following is not ready yet - /* - // TODO: We must reverse the packet reading and writing, which is currently not done here, I think template inline const PacketScalar packet(int row, int col) const { - return m_matrix.template packet(((Direction == Vertical) || (Direction == BothDirections)) ? m_matrix.rows() - row - 1 : row, - ((Direction == Horizontal) || (Direction == BothDirections)) ? m_matrix.cols() - col - 1 : col); + return reverse_packet::run(m_matrix.template packet( + ReverseRow ? m_matrix.rows() - row - OffsetRow : row, + ReverseCol ? m_matrix.cols() - col - OffsetCol : col)); } template inline void writePacket(int row, int col, const PacketScalar& x) { - m_matrix.const_cast_derived().template writePacket(((Direction == Vertical) || (Direction == BothDirections)) ? m_matrix.rows() - row - 1 : row, - ((Direction == Horizontal) || (Direction == BothDirections)) ? m_matrix.cols() - col - 1 : col, - x); + m_matrix.const_cast_derived().template writePacket( + ReverseRow ? m_matrix.rows() - row - OffsetRow : row, + ReverseCol ? m_matrix.cols() - col - OffsetCol : col, + reverse_packet::run(x)); } - // TODO have to be updated for vector expression only template inline const PacketScalar packet(int index) const { - switch ( Direction ) - { - case Vertical: - return m_matrix.template packet( index + m_matrix.rows() - 2 * (index % m_matrix.rows()) - 1 ); - break; - - case Horizontal: - return m_matrix.template packet( (index % m_matrix.rows()) + (m_matrix.cols() - 1 - index/m_matrix.rows()) * m_matrix.rows() ); - break; - - case BothDirections: - return m_matrix.template packet( (m_matrix.rows() * m_matrix.cols()) - index - 1 ); - break; - } + return ei_preverse(m_matrix.template packet( m_matrix.size() - index - PacketSize )); } - // TODO have to be updated for vector expression only template inline void writePacket(int index, const PacketScalar& x) { - switch ( Direction ) - { - case Vertical: - return m_matrix.const_cast_derived().template packet( index + m_matrix.rows() - 2 * (index % m_matrix.rows()) - 1, x ); - break; - - case Horizontal: - return m_matrix.const_cast_derived().template packet( (index % m_matrix.rows()) + (m_matrix.cols() - 1 - index/m_matrix.rows()) * m_matrix.rows(), x ); - break; - - case BothDirections: - return m_matrix.const_cast_derived().template packet( (m_matrix.rows() * m_matrix.cols()) - index - 1, x ); - break; - } + m_matrix.const_cast_derived().template writePacket(m_matrix.size() - index - PacketSize, ei_preverse(x)); } - */ protected: const typename MatrixType::Nested m_matrix; diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index b0eee29f7..75eb69685 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -96,6 +96,9 @@ ei_preduxp(const Packet* vecs) { return vecs[0]; } template inline typename ei_unpacket_traits::type ei_predux(const Packet& a) { return a; } +/** \internal \returns the reversed elements of \a a*/ +template inline Packet ei_preverse(const Packet& a) +{ return a; } /*************************************************************************** * The following functions might not have to be overwritten for vectorized types diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 3a6bbba1b..cce00ed7b 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -100,7 +100,7 @@ template<> EIGEN_STRONG_INLINE __m128 ei_ploadu(const float* from) { r // template<> EIGEN_STRONG_INLINE __m128 ei_ploadu(const float* from) { // if (size_t(from)&0xF) // return _mm_loadu_ps(from); -// else +// else // return _mm_loadu_ps(from); // } template<> EIGEN_STRONG_INLINE __m128d ei_ploadu(const double* from) { return _mm_loadu_pd(from); } @@ -125,6 +125,13 @@ template<> EIGEN_STRONG_INLINE double ei_pfirst<__m128d>(const __m128d& a) { ret template<> EIGEN_STRONG_INLINE int ei_pfirst<__m128i>(const __m128i& a) { return _mm_cvtsi128_si32(a); } #endif +template<> EIGEN_STRONG_INLINE __m128 ei_preverse(const __m128& a) +{ return _mm_shuffle_ps(a,a,0x1B); } +template<> EIGEN_STRONG_INLINE __m128d ei_preverse(const __m128d& a) +{ return _mm_shuffle_pd(a,a,0x1); } +template<> EIGEN_STRONG_INLINE __m128i ei_preverse(const __m128i& a) +{ return _mm_shuffle_epi32(a,0x1B); } + #ifdef __SSE3__ // TODO implement SSE2 versions as well as integer versions template<> EIGEN_STRONG_INLINE __m128 ei_preduxp<__m128>(const __m128* vecs) diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 6fec9259d..d746a350e 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -134,6 +134,11 @@ template void packetmath() } ei_pstore(data2, ei_preduxp(packets)); VERIFY(areApprox(ref, data2, PacketSize) && "ei_preduxp"); + + for (int i=0; i void reverse(const MatrixType& m) } } - int ind = ei_random(0, (rows*cols) - 1); - - /* Reverse::coeff(int) is for vector only */ - /* - MatrixType m1_reversed(m1.reverse()); - VERIFY_IS_APPROX( m1_reversed.reverse().coeff( ind ), m1.coeff( ind ) ); - - MatrixType m1c_reversed = m1.colwise().reverse(); - VERIFY_IS_APPROX( m1c_reversed.colwise().reverse().coeff( ind ), m1.coeff( ind ) ); - - MatrixType m1r_reversed = m1.rowwise().reverse(); - VERIFY_IS_APPROX( m1r_reversed.rowwise().reverse().coeff( ind ), m1.coeff( ind ) ); - */ - /* cout << "m1:" << endl << m1 << endl; cout << "m1c_reversed:" << endl << m1c_reversed << endl; @@ -178,12 +164,14 @@ void test_reverse() { for(int i = 0; i < g_repeat; i++) { CALL_SUBTEST( reverse(Matrix()) ); + CALL_SUBTEST( reverse(Matrix2f()) ); + CALL_SUBTEST( reverse(Matrix4f()) ); CALL_SUBTEST( reverse(Matrix4d()) ); CALL_SUBTEST( reverse(MatrixXcf(3, 3)) ); - CALL_SUBTEST( reverse(MatrixXi(8, 12)) ); + CALL_SUBTEST( reverse(MatrixXi(6, 3)) ); CALL_SUBTEST( reverse(MatrixXcd(20, 20)) ); CALL_SUBTEST( reverse(Matrix()) ); - CALL_SUBTEST( reverse(Matrix(10,10)) ); + CALL_SUBTEST( reverse(Matrix(6,3)) ); } Vector4f x; x << 1, 2, 3, 4; Vector4f y; y << 4, 3, 2, 1; -- cgit v1.2.3