From c6eb84aabcf102aaa3ba1c288e890984f4b49277 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 26 Jan 2015 17:09:01 +0100 Subject: Enable vectorization of transposeInPlace for PacketSize x PacketSize matrices --- Eigen/src/Core/Transpose.h | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'Eigen/src/Core/Transpose.h') diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index a3b95256f..3bab6092c 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -217,18 +217,39 @@ MatrixBase::adjoint() const namespace internal { template + bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic, + bool MatchPacketSize = + (int(MatrixType::RowsAtCompileTime) == int(internal::packet_traits::size)) + && (internal::evaluator::Flags&PacketAccessBit) > struct inplace_transpose_selector; template -struct inplace_transpose_selector { // square matrix +struct inplace_transpose_selector { // square matrix static void run(MatrixType& m) { m.matrix().template triangularView().swap(m.matrix().transpose()); } }; +// TODO: vectorized path is currently limited to LargestPacketSize x LargestPacketSize cases only. template -struct inplace_transpose_selector { // non square matrix +struct inplace_transpose_selector { // PacketSize x PacketSize + static void run(MatrixType& m) { + typedef typename MatrixType::Scalar Scalar; + typedef typename internal::packet_traits::type Packet; + typedef typename MatrixType::Index Index; + const Index PacketSize = internal::packet_traits::size; + const Index Alignment = internal::evaluator::Flags&AlignedBit ? Aligned : Unaligned; + PacketBlock A; + for (Index i=0; i(i,0); + internal::ptranspose(A); + for (Index i=0; i(m.rowIndexByOuterInner(i,0), m.colIndexByOuterInner(i,0), A.packet[i]); + } +}; + +template +struct inplace_transpose_selector { // non square matrix static void run(MatrixType& m) { if (m.rows()==m.cols()) m.matrix().template triangularView().swap(m.matrix().transpose()); -- cgit v1.2.3