diff options
author | Gael Guennebaud <g.gael@free.fr> | 2015-01-26 17:09:01 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2015-01-26 17:09:01 +0100 |
commit | c6eb84aabcf102aaa3ba1c288e890984f4b49277 (patch) | |
tree | 698ca369c7ede22c568493ca41a6cb45519ef925 | |
parent | e1f1091fde660581d64b54ff1019bc494dbbca89 (diff) |
Enable vectorization of transposeInPlace for PacketSize x PacketSize matrices
-rw-r--r-- | Eigen/src/Core/Transpose.h | 27 | ||||
-rw-r--r-- | test/adjoint.cpp | 22 |
2 files changed, 46 insertions, 3 deletions
diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index a3b95256f..3bab6092c 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -217,18 +217,39 @@ MatrixBase<Derived>::adjoint() const namespace internal { template<typename MatrixType, - bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic> + bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic, + bool MatchPacketSize = + (int(MatrixType::RowsAtCompileTime) == int(internal::packet_traits<typename MatrixType::Scalar>::size)) + && (internal::evaluator<MatrixType>::Flags&PacketAccessBit) > struct inplace_transpose_selector; template<typename MatrixType> -struct inplace_transpose_selector<MatrixType,true> { // square matrix +struct inplace_transpose_selector<MatrixType,true,false> { // square matrix static void run(MatrixType& m) { m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose()); } }; +// TODO: vectorized path is currently limited to LargestPacketSize x LargestPacketSize cases only. template<typename MatrixType> -struct inplace_transpose_selector<MatrixType,false> { // non square matrix +struct inplace_transpose_selector<MatrixType,true,true> { // PacketSize x PacketSize + static void run(MatrixType& m) { + typedef typename MatrixType::Scalar Scalar; + typedef typename internal::packet_traits<typename MatrixType::Scalar>::type Packet; + typedef typename MatrixType::Index Index; + const Index PacketSize = internal::packet_traits<Scalar>::size; + const Index Alignment = internal::evaluator<MatrixType>::Flags&AlignedBit ? Aligned : Unaligned; + PacketBlock<Packet> A; + for (Index i=0; i<PacketSize; ++i) + A.packet[i] = m.template packetByOuterInner<Alignment>(i,0); + internal::ptranspose(A); + for (Index i=0; i<PacketSize; ++i) + m.template writePacket<Alignment>(m.rowIndexByOuterInner(i,0), m.colIndexByOuterInner(i,0), A.packet[i]); + } +}; + +template<typename MatrixType,bool MatchPacketSize> +struct inplace_transpose_selector<MatrixType,false,MatchPacketSize> { // non square matrix static void run(MatrixType& m) { if (m.rows()==m.cols()) m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose()); diff --git a/test/adjoint.cpp b/test/adjoint.cpp index ea36f7841..3b2a53c91 100644 --- a/test/adjoint.cpp +++ b/test/adjoint.cpp @@ -64,6 +64,7 @@ template<typename MatrixType> void adjoint(const MatrixType& m) typedef typename NumTraits<Scalar>::Real RealScalar; typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType; typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType; + const Index PacketSize = internal::packet_traits<Scalar>::size; Index rows = m.rows(); Index cols = m.cols(); @@ -108,6 +109,17 @@ template<typename MatrixType> void adjoint(const MatrixType& m) VERIFY_IS_APPROX(m3,m1.transpose()); m3.transposeInPlace(); VERIFY_IS_APPROX(m3,m1); + + if(PacketSize<m3.rows() && PacketSize<m3.cols()) + { + m3 = m1; + Index i = internal::random<Index>(0,m3.rows()-PacketSize); + Index j = internal::random<Index>(0,m3.cols()-PacketSize); + m3.template block<PacketSize,PacketSize>(i,j).transposeInPlace(); + VERIFY_IS_APPROX( (m3.template block<PacketSize,PacketSize>(i,j)), (m1.template block<PacketSize,PacketSize>(i,j).transpose()) ); + m3.template block<PacketSize,PacketSize>(i,j).transposeInPlace(); + VERIFY_IS_APPROX(m3,m1); + } // check inplace adjoint m3 = m1; @@ -129,9 +141,19 @@ void test_adjoint() CALL_SUBTEST_1( adjoint(Matrix<float, 1, 1>()) ); CALL_SUBTEST_2( adjoint(Matrix3d()) ); CALL_SUBTEST_3( adjoint(Matrix4f()) ); + CALL_SUBTEST_4( adjoint(MatrixXcf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2), internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2))) ); CALL_SUBTEST_5( adjoint(MatrixXi(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); CALL_SUBTEST_6( adjoint(MatrixXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); + + // Complement for 128 bits vectorization: + CALL_SUBTEST_8( adjoint(Matrix2d()) ); + CALL_SUBTEST_9( adjoint(Matrix<int,4,4>()) ); + + // 256 bits vectorization: + CALL_SUBTEST_10( adjoint(Matrix<float,8,8>()) ); + CALL_SUBTEST_11( adjoint(Matrix<double,4,4>()) ); + CALL_SUBTEST_12( adjoint(Matrix<int,8,8>()) ); } // test a large static matrix only once CALL_SUBTEST_7( adjoint(Matrix<float, 100, 100>()) ); |