diff options
author | Gael Guennebaud <g.gael@free.fr> | 2015-01-26 17:09:01 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2015-01-26 17:09:01 +0100 |
commit | c6eb84aabcf102aaa3ba1c288e890984f4b49277 (patch) | |
tree | 698ca369c7ede22c568493ca41a6cb45519ef925 /Eigen/src/Core/Transpose.h | |
parent | e1f1091fde660581d64b54ff1019bc494dbbca89 (diff) |
Enable vectorization of transposeInPlace for PacketSize x PacketSize matrices
Diffstat (limited to 'Eigen/src/Core/Transpose.h')
-rw-r--r-- | Eigen/src/Core/Transpose.h | 27 |
1 files changed, 24 insertions, 3 deletions
diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index a3b95256f..3bab6092c 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -217,18 +217,39 @@ MatrixBase<Derived>::adjoint() const namespace internal { template<typename MatrixType, - bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic> + bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic, + bool MatchPacketSize = + (int(MatrixType::RowsAtCompileTime) == int(internal::packet_traits<typename MatrixType::Scalar>::size)) + && (internal::evaluator<MatrixType>::Flags&PacketAccessBit) > struct inplace_transpose_selector; template<typename MatrixType> -struct inplace_transpose_selector<MatrixType,true> { // square matrix +struct inplace_transpose_selector<MatrixType,true,false> { // square matrix static void run(MatrixType& m) { m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose()); } }; +// TODO: vectorized path is currently limited to LargestPacketSize x LargestPacketSize cases only. template<typename MatrixType> -struct inplace_transpose_selector<MatrixType,false> { // non square matrix +struct inplace_transpose_selector<MatrixType,true,true> { // PacketSize x PacketSize + static void run(MatrixType& m) { + typedef typename MatrixType::Scalar Scalar; + typedef typename internal::packet_traits<typename MatrixType::Scalar>::type Packet; + typedef typename MatrixType::Index Index; + const Index PacketSize = internal::packet_traits<Scalar>::size; + const Index Alignment = internal::evaluator<MatrixType>::Flags&AlignedBit ? Aligned : Unaligned; + PacketBlock<Packet> A; + for (Index i=0; i<PacketSize; ++i) + A.packet[i] = m.template packetByOuterInner<Alignment>(i,0); + internal::ptranspose(A); + for (Index i=0; i<PacketSize; ++i) + m.template writePacket<Alignment>(m.rowIndexByOuterInner(i,0), m.colIndexByOuterInner(i,0), A.packet[i]); + } +}; + +template<typename MatrixType,bool MatchPacketSize> +struct inplace_transpose_selector<MatrixType,false,MatchPacketSize> { // non square matrix static void run(MatrixType& m) { if (m.rows()==m.cols()) m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose()); |