aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2021-04-22 15:21:01 +0000
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2021-04-22 15:21:01 +0000
commit85a76a16ea835fcfa7d4c185a338ae2aef9a272a (patch)
tree32e9f3b58c87ef63677d681e8b60b733b843ca06 /Eigen
parentd72c794ccd21637ba56dec0dd8bd0cffef7bc47e (diff)
Make vectorized compute_inverse_size4 compile with AVX.
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/LU4
-rw-r--r--Eigen/src/LU/arch/InverseSize4.h45
2 files changed, 25 insertions, 24 deletions
diff --git a/Eigen/LU b/Eigen/LU
index 0fb184bcb..1236ceb04 100644
--- a/Eigen/LU
+++ b/Eigen/LU
@@ -38,9 +38,7 @@
#include "src/LU/Determinant.h"
#include "src/LU/InverseImpl.h"
-// Use the SSE optimized version whenever possible. At the moment the
-// SSE version doesn't compile when AVX is enabled
-#if (defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX) || defined EIGEN_VECTORIZE_NEON
+#if defined EIGEN_VECTORIZE_SSE || defined EIGEN_VECTORIZE_NEON
#include "src/LU/arch/InverseSize4.h"
#endif
diff --git a/Eigen/src/LU/arch/InverseSize4.h b/Eigen/src/LU/arch/InverseSize4.h
index 5a8d0c114..ee5548aed 100644
--- a/Eigen/src/LU/arch/InverseSize4.h
+++ b/Eigen/src/LU/arch/InverseSize4.h
@@ -54,10 +54,12 @@ struct compute_inverse_size4<Architecture::Target, float, MatrixType, ResultType
{
ActualMatrixType matrix(mat);
- Packet4f _L1 = matrix.template packet<MatrixAlignment>(0);
- Packet4f _L2 = matrix.template packet<MatrixAlignment>(4);
- Packet4f _L3 = matrix.template packet<MatrixAlignment>(8);
- Packet4f _L4 = matrix.template packet<MatrixAlignment>(12);
+ const float* data = matrix.data();
+ const Index stride = matrix.innerStride();
+ Packet4f _L1 = ploadt<Packet4f,MatrixAlignment>(data);
+ Packet4f _L2 = ploadt<Packet4f,MatrixAlignment>(data + stride*4);
+ Packet4f _L3 = ploadt<Packet4f,MatrixAlignment>(data + stride*8);
+ Packet4f _L4 = ploadt<Packet4f,MatrixAlignment>(data + stride*12);
// Four 2x2 sub-matrices of the input matrix
// input = [[A, B],
@@ -189,25 +191,26 @@ struct compute_inverse_size4<Architecture::Target, double, MatrixType, ResultTyp
Packet2d A1, A2, B1, B2, C1, C2, D1, D2;
+ const double* data = matrix.data();
+ const Index stride = matrix.innerStride();
if (StorageOrdersMatch)
{
- A1 = matrix.template packet<MatrixAlignment>(0);
- B1 = matrix.template packet<MatrixAlignment>(2);
- A2 = matrix.template packet<MatrixAlignment>(4);
- B2 = matrix.template packet<MatrixAlignment>(6);
- C1 = matrix.template packet<MatrixAlignment>(8);
- D1 = matrix.template packet<MatrixAlignment>(10);
- C2 = matrix.template packet<MatrixAlignment>(12);
- D2 = matrix.template packet<MatrixAlignment>(14);
+ A1 = ploadt<Packet2d,MatrixAlignment>(data + stride*0);
+ B1 = ploadt<Packet2d,MatrixAlignment>(data + stride*2);
+ A2 = ploadt<Packet2d,MatrixAlignment>(data + stride*4);
+ B2 = ploadt<Packet2d,MatrixAlignment>(data + stride*6);
+ C1 = ploadt<Packet2d,MatrixAlignment>(data + stride*8);
+ D1 = ploadt<Packet2d,MatrixAlignment>(data + stride*10);
+ C2 = ploadt<Packet2d,MatrixAlignment>(data + stride*12);
+ D2 = ploadt<Packet2d,MatrixAlignment>(data + stride*14);
}
else
{
Packet2d temp;
- A1 = matrix.template packet<MatrixAlignment>(0);
- C1 = matrix.template packet<MatrixAlignment>(2);
- A2 = matrix.template packet<MatrixAlignment>(4);
- C2 = matrix.template packet<MatrixAlignment>(6);
-
+ A1 = ploadt<Packet2d,MatrixAlignment>(data + stride*0);
+ C1 = ploadt<Packet2d,MatrixAlignment>(data + stride*2);
+ A2 = ploadt<Packet2d,MatrixAlignment>(data + stride*4);
+ C2 = ploadt<Packet2d,MatrixAlignment>(data + stride*6);
temp = A1;
A1 = vec2d_unpacklo(A1, A2);
A2 = vec2d_unpackhi(temp, A2);
@@ -216,10 +219,10 @@ struct compute_inverse_size4<Architecture::Target, double, MatrixType, ResultTyp
C1 = vec2d_unpacklo(C1, C2);
C2 = vec2d_unpackhi(temp, C2);
- B1 = matrix.template packet<MatrixAlignment>(8);
- D1 = matrix.template packet<MatrixAlignment>(10);
- B2 = matrix.template packet<MatrixAlignment>(12);
- D2 = matrix.template packet<MatrixAlignment>(14);
+ B1 = ploadt<Packet2d,MatrixAlignment>(data + stride*8);
+ D1 = ploadt<Packet2d,MatrixAlignment>(data + stride*10);
+ B2 = ploadt<Packet2d,MatrixAlignment>(data + stride*12);
+ D2 = ploadt<Packet2d,MatrixAlignment>(data + stride*14);
temp = B1;
B1 = vec2d_unpacklo(B1, B2);