aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/products
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2012-08-30 10:52:15 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2012-08-30 10:52:15 +0200
commitc5031edb923504e2f47b26335ad335b874d38d2d (patch)
tree4dedc641b7c03133e146d7caac44481d192cda9b /Eigen/src/Core/products
parentd0ee31aea6b271248cbfd9babcad3cf40acde9bb (diff)
Fix out-of-range memory access in GEMV (the memory was not used for the computation, only to assemble unaligned packets from aligned packet loads)
Diffstat (limited to 'Eigen/src/Core/products')
-rw-r--r--Eigen/src/Core/products/GeneralMatrixVector.h20
1 files changed, 13 insertions, 7 deletions
diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h
index ba1f73957..639af8ed4 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -88,7 +88,7 @@ EIGEN_DONT_INLINE static void run(
// Here we assume data are at least aligned on the base scalar type.
Index alignedStart = internal::first_aligned(res,size);
Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
- const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
+ const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1;
const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
Index alignmentPattern = alignmentStep==0 ? AllAligned
@@ -177,6 +177,8 @@ EIGEN_DONT_INLINE static void run(
_EIGEN_ACCUMULATE_PACKETS(d,du,d);
break;
case FirstAligned:
+ {
+ Index j = alignedStart;
if(peels>1)
{
LhsPacket A00, A01, A02, A03, A10, A11, A12, A13;
@@ -186,7 +188,7 @@ EIGEN_DONT_INLINE static void run(
A02 = pload<LhsPacket>(&lhs2[alignedStart-2]);
A03 = pload<LhsPacket>(&lhs3[alignedStart-3]);
- for (Index j = alignedStart; j<peeledSize; j+=peels*ResPacketSize)
+ for (; j<peeledSize; j+=peels*ResPacketSize)
{
A11 = pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); palign<1>(A01,A11);
A12 = pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]); palign<2>(A02,A12);
@@ -210,9 +212,10 @@ EIGEN_DONT_INLINE static void run(
pstore(&res[j+ResPacketSize],T1);
}
}
- for (Index j = peeledSize; j<alignedSize; j+=ResPacketSize)
+ for (; j<alignedSize; j+=ResPacketSize)
_EIGEN_ACCUMULATE_PACKETS(d,du,du);
break;
+ }
default:
for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
_EIGEN_ACCUMULATE_PACKETS(du,du,du);
@@ -340,7 +343,7 @@ EIGEN_DONT_INLINE static void run(
// if that's not the case then vectorization is discarded, see below.
Index alignedStart = internal::first_aligned(rhs, depth);
Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0;
- const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
+ const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1;
const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
Index alignmentPattern = alignmentStep==0 ? AllAligned
@@ -430,10 +433,12 @@ EIGEN_DONT_INLINE static void run(
_EIGEN_ACCUMULATE_PACKETS(d,du,d);
break;
case FirstAligned:
+ {
+ Index j = alignedStart;
if (peels>1)
{
/* Here we proccess 4 rows with with two peeled iterations to hide
- * tghe overhead of unaligned loads. Moreover unaligned loads are handled
+ * the overhead of unaligned loads. Moreover unaligned loads are handled
* using special shift/move operations between the two aligned packets
* overlaping the desired unaligned packet. This is *much* more efficient
* than basic unaligned loads.
@@ -443,7 +448,7 @@ EIGEN_DONT_INLINE static void run(
A02 = pload<LhsPacket>(&lhs2[alignedStart-2]);
A03 = pload<LhsPacket>(&lhs3[alignedStart-3]);
- for (Index j = alignedStart; j<peeledSize; j+=peels*RhsPacketSize)
+ for (; j<peeledSize; j+=peels*RhsPacketSize)
{
RhsPacket b = pload<RhsPacket>(&rhs[j]);
A11 = pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]); palign<1>(A01,A11);
@@ -465,9 +470,10 @@ EIGEN_DONT_INLINE static void run(
ptmp3 = pcj.pmadd(A13, b, ptmp3);
}
}
- for (Index j = peeledSize; j<alignedSize; j+=RhsPacketSize)
+ for (; j<alignedSize; j+=RhsPacketSize)
_EIGEN_ACCUMULATE_PACKETS(d,du,du);
break;
+ }
default:
for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
_EIGEN_ACCUMULATE_PACKETS(du,du,du);