aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/products/GeneralMatrixVector.h
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2014-03-28 09:11:06 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2014-03-28 09:11:06 +0100
commitc94fde118a2b6b2b415a7860ba066ba240bc8ac3 (patch)
treef16c41ab0589fba0cd64e02aa1146fe797fe5dc7 /Eigen/src/Core/products/GeneralMatrixVector.h
parent51e85c936d5b2266f2a8f0953bf9c10853620c49 (diff)
Enable vectorization of gemv for PacketSize>4 through unaligned loads (still better than no vectorization)
Diffstat (limited to 'Eigen/src/Core/products/GeneralMatrixVector.h')
-rw-r--r--Eigen/src/Core/products/GeneralMatrixVector.h18
1 files changed, 13 insertions, 5 deletions
diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h
index 59fc54c4e..340c51394 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -136,12 +136,17 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,ColMajor,Co
// find how many columns do we have to skip to be aligned with the result (if possible)
Index skipColumns = 0;
// if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats)
- // TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4.
- if( (size_t(lhs)%sizeof(LhsScalar)) || (size_t(res)%sizeof(ResScalar)) || LhsPacketSize > 4)
+ if( (size_t(lhs)%sizeof(LhsScalar)) || (size_t(res)%sizeof(ResScalar)) )
{
alignedSize = 0;
alignedStart = 0;
}
+ else if(LhsPacketSize > 4)
+ {
+ // TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4.
+ // Currently, it seems to be better to perform unaligned loads anyway
+ alignmentPattern = NoneAligned;
+ }
else if (LhsPacketSize>1)
{
eigen_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size<LhsPacketSize);
@@ -401,13 +406,16 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,RowMajor,Co
// find how many rows do we have to skip to be aligned with rhs (if possible)
Index skipRows = 0;
// if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats)
- // TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4.
- if( (sizeof(LhsScalar)!=sizeof(RhsScalar)) || (size_t(lhs)%sizeof(LhsScalar)) || (size_t(rhs)%sizeof(RhsScalar)) ||
- (LhsPacketSize > 4))
+ if( (sizeof(LhsScalar)!=sizeof(RhsScalar)) || (size_t(lhs)%sizeof(LhsScalar)) || (size_t(rhs)%sizeof(RhsScalar)) )
{
alignedSize = 0;
alignedStart = 0;
}
+ else if(LhsPacketSize > 4)
+ {
+ // TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4.
+ alignmentPattern = NoneAligned;
+ }
else if (LhsPacketSize>1)
{
eigen_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || depth<LhsPacketSize);