aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/LU
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/LU')
-rw-r--r--Eigen/src/LU/CMakeLists.txt8
-rw-r--r--Eigen/src/LU/FullPivLU.h63
-rw-r--r--Eigen/src/LU/InverseImpl.h6
-rw-r--r--Eigen/src/LU/PartialPivLU.h77
-rw-r--r--Eigen/src/LU/PartialPivLU_LAPACKE.h (renamed from Eigen/src/LU/PartialPivLU_MKL.h)26
-rw-r--r--Eigen/src/LU/arch/CMakeLists.txt6
-rw-r--r--Eigen/src/LU/arch/Inverse_SSE.h28
7 files changed, 128 insertions, 86 deletions
diff --git a/Eigen/src/LU/CMakeLists.txt b/Eigen/src/LU/CMakeLists.txt
deleted file mode 100644
index e0d8d78c1..000000000
--- a/Eigen/src/LU/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-FILE(GLOB Eigen_LU_SRCS "*.h")
-
-INSTALL(FILES
- ${Eigen_LU_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/LU COMPONENT Devel
- )
-
-ADD_SUBDIRECTORY(arch)
diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index 64b9eb7f1..03b6af706 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -52,6 +52,8 @@ template<typename _MatrixType> struct traits<FullPivLU<_MatrixType> >
* \include class_FullPivLU.cpp
* Output: \verbinclude class_FullPivLU.out
*
+ * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
+ *
* \sa MatrixBase::fullPivLu(), MatrixBase::determinant(), MatrixBase::inverse()
*/
template<typename _MatrixType> class FullPivLU
@@ -97,6 +99,15 @@ template<typename _MatrixType> class FullPivLU
template<typename InputType>
explicit FullPivLU(const EigenBase<InputType>& matrix);
+ /** \brief Constructs a LU factorization from a given matrix
+ *
+ * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when \c MatrixType is a Eigen::Ref.
+ *
+ * \sa FullPivLU(const EigenBase&)
+ */
+ template<typename InputType>
+ explicit FullPivLU(EigenBase<InputType>& matrix);
+
/** Computes the LU decomposition of the given matrix.
*
* \param matrix the matrix of which to compute the LU decomposition.
@@ -105,7 +116,11 @@ template<typename _MatrixType> class FullPivLU
* \returns a reference to *this
*/
template<typename InputType>
- FullPivLU& compute(const EigenBase<InputType>& matrix);
+ FullPivLU& compute(const EigenBase<InputType>& matrix) {
+ m_lu = matrix.derived();
+ computeInPlace();
+ return *this;
+ }
/** \returns the LU decomposition matrix: the upper-triangular part is U, the
* unit-lower-triangular part is L (at least for square matrices; in the non-square
@@ -141,7 +156,7 @@ template<typename _MatrixType> class FullPivLU
*
* \sa permutationQ()
*/
- inline const PermutationPType& permutationP() const
+ EIGEN_DEVICE_FUNC inline const PermutationPType& permutationP() const
{
eigen_assert(m_isInitialized && "LU is not initialized.");
return m_p;
@@ -391,8 +406,8 @@ template<typename _MatrixType> class FullPivLU
MatrixType reconstructedMatrix() const;
- inline Index rows() const { return m_lu.rows(); }
- inline Index cols() const { return m_lu.cols(); }
+ EIGEN_DEVICE_FUNC inline Index rows() const { return m_lu.rows(); }
+ EIGEN_DEVICE_FUNC inline Index cols() const { return m_lu.cols(); }
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename RhsType, typename DstType>
@@ -418,9 +433,10 @@ template<typename _MatrixType> class FullPivLU
PermutationQType m_q;
IntColVectorType m_rowsTranspositions;
IntRowVectorType m_colsTranspositions;
- Index m_det_pq, m_nonzero_pivots;
+ Index m_nonzero_pivots;
RealScalar m_l1_norm;
RealScalar m_maxpivot, m_prescribedThreshold;
+ signed char m_det_pq;
bool m_isInitialized, m_usePrescribedThreshold;
};
@@ -458,25 +474,28 @@ FullPivLU<MatrixType>::FullPivLU(const EigenBase<InputType>& matrix)
template<typename MatrixType>
template<typename InputType>
-FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const EigenBase<InputType>& matrix)
+FullPivLU<MatrixType>::FullPivLU(EigenBase<InputType>& matrix)
+ : m_lu(matrix.derived()),
+ m_p(matrix.rows()),
+ m_q(matrix.cols()),
+ m_rowsTranspositions(matrix.rows()),
+ m_colsTranspositions(matrix.cols()),
+ m_isInitialized(false),
+ m_usePrescribedThreshold(false)
{
- check_template_parameters();
-
- // the permutations are stored as int indices, so just to be sure:
- eigen_assert(matrix.rows()<=NumTraits<int>::highest() && matrix.cols()<=NumTraits<int>::highest());
-
- m_lu = matrix.derived();
- m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff();
-
computeInPlace();
-
- m_isInitialized = true;
- return *this;
}
template<typename MatrixType>
void FullPivLU<MatrixType>::computeInPlace()
{
+ check_template_parameters();
+
+ // the permutations are stored as int indices, so just to be sure:
+ eigen_assert(m_lu.rows()<=NumTraits<int>::highest() && m_lu.cols()<=NumTraits<int>::highest());
+
+ m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff();
+
const Index size = m_lu.diagonalSize();
const Index rows = m_lu.rows();
const Index cols = m_lu.cols();
@@ -556,6 +575,8 @@ void FullPivLU<MatrixType>::computeInPlace()
m_q.applyTranspositionOnTheRight(k, m_colsTranspositions.coeff(k));
m_det_pq = (number_of_transpositions%2) ? -1 : 1;
+
+ m_isInitialized = true;
}
template<typename MatrixType>
@@ -838,12 +859,12 @@ namespace internal {
/***** Implementation of inverse() *****************************************************/
-template<typename DstXprType, typename MatrixType, typename Scalar>
-struct Assignment<DstXprType, Inverse<FullPivLU<MatrixType> >, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+template<typename DstXprType, typename MatrixType>
+struct Assignment<DstXprType, Inverse<FullPivLU<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename FullPivLU<MatrixType>::Scalar>, Dense2Dense>
{
typedef FullPivLU<MatrixType> LuType;
typedef Inverse<LuType> SrcXprType;
- static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename MatrixType::Scalar> &)
{
dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
}
@@ -858,14 +879,12 @@ struct Assignment<DstXprType, Inverse<FullPivLU<MatrixType> >, internal::assign_
*
* \sa class FullPivLU
*/
-#ifndef __CUDACC__
template<typename Derived>
inline const FullPivLU<typename MatrixBase<Derived>::PlainObject>
MatrixBase<Derived>::fullPivLu() const
{
return FullPivLU<PlainObject>(eval());
}
-#endif
} // end namespace Eigen
diff --git a/Eigen/src/LU/InverseImpl.h b/Eigen/src/LU/InverseImpl.h
index e202a55cb..3134632e1 100644
--- a/Eigen/src/LU/InverseImpl.h
+++ b/Eigen/src/LU/InverseImpl.h
@@ -286,11 +286,11 @@ struct compute_inverse_and_det_with_check<MatrixType, ResultType, 4>
namespace internal {
// Specialization for "dense = dense_xpr.inverse()"
-template<typename DstXprType, typename XprType, typename Scalar>
-struct Assignment<DstXprType, Inverse<XprType>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+template<typename DstXprType, typename XprType>
+struct Assignment<DstXprType, Inverse<XprType>, internal::assign_op<typename DstXprType::Scalar,typename XprType::Scalar>, Dense2Dense>
{
typedef Inverse<XprType> SrcXprType;
- static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename XprType::Scalar> &)
{
// FIXME shall we resize dst here?
const int Size = EIGEN_PLAIN_ENUM_MIN(XprType::ColsAtCompileTime,DstXprType::ColsAtCompileTime);
diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h
index 2e6d91939..d43961887 100644
--- a/Eigen/src/LU/PartialPivLU.h
+++ b/Eigen/src/LU/PartialPivLU.h
@@ -26,6 +26,17 @@ template<typename _MatrixType> struct traits<PartialPivLU<_MatrixType> >
};
};
+template<typename T,typename Derived>
+struct enable_if_ref;
+// {
+// typedef Derived type;
+// };
+
+template<typename T,typename Derived>
+struct enable_if_ref<Ref<T>,Derived> {
+ typedef Derived type;
+};
+
} // end namespace internal
/** \ingroup LU_Module
@@ -57,6 +68,8 @@ template<typename _MatrixType> struct traits<PartialPivLU<_MatrixType> >
*
* The data of the LU decomposition can be directly accessed through the methods matrixLU(), permutationP().
*
+ * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
+ *
* \sa MatrixBase::partialPivLu(), MatrixBase::determinant(), MatrixBase::inverse(), MatrixBase::computeInverse(), class FullPivLU
*/
template<typename _MatrixType> class PartialPivLU
@@ -102,8 +115,22 @@ template<typename _MatrixType> class PartialPivLU
template<typename InputType>
explicit PartialPivLU(const EigenBase<InputType>& matrix);
+ /** Constructor for \link InplaceDecomposition inplace decomposition \endlink
+ *
+ * \param matrix the matrix of which to compute the LU decomposition.
+ *
+ * \warning The matrix should have full rank (e.g. if it's square, it should be invertible).
+ * If you need to deal with non-full rank, use class FullPivLU instead.
+ */
template<typename InputType>
- PartialPivLU& compute(const EigenBase<InputType>& matrix);
+ explicit PartialPivLU(EigenBase<InputType>& matrix);
+
+ template<typename InputType>
+ PartialPivLU& compute(const EigenBase<InputType>& matrix) {
+ m_lu = matrix.derived();
+ compute();
+ return *this;
+ }
/** \returns the LU decomposition matrix: the upper-triangular part is U, the
* unit-lower-triangular part is L (at least for square matrices; in the non-square
@@ -251,11 +278,13 @@ template<typename _MatrixType> class PartialPivLU
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
+ void compute();
+
MatrixType m_lu;
PermutationType m_p;
TranspositionType m_rowsTranspositions;
- Index m_det_p;
RealScalar m_l1_norm;
+ signed char m_det_p;
bool m_isInitialized;
};
@@ -264,8 +293,8 @@ PartialPivLU<MatrixType>::PartialPivLU()
: m_lu(),
m_p(),
m_rowsTranspositions(),
- m_det_p(0),
m_l1_norm(0),
+ m_det_p(0),
m_isInitialized(false)
{
}
@@ -275,8 +304,8 @@ PartialPivLU<MatrixType>::PartialPivLU(Index size)
: m_lu(size, size),
m_p(size),
m_rowsTranspositions(size),
- m_det_p(0),
m_l1_norm(0),
+ m_det_p(0),
m_isInitialized(false)
{
}
@@ -284,16 +313,29 @@ PartialPivLU<MatrixType>::PartialPivLU(Index size)
template<typename MatrixType>
template<typename InputType>
PartialPivLU<MatrixType>::PartialPivLU(const EigenBase<InputType>& matrix)
- : m_lu(matrix.rows(), matrix.rows()),
+ : m_lu(matrix.rows(),matrix.cols()),
m_p(matrix.rows()),
m_rowsTranspositions(matrix.rows()),
- m_det_p(0),
m_l1_norm(0),
+ m_det_p(0),
m_isInitialized(false)
{
compute(matrix.derived());
}
+template<typename MatrixType>
+template<typename InputType>
+PartialPivLU<MatrixType>::PartialPivLU(EigenBase<InputType>& matrix)
+ : m_lu(matrix.derived()),
+ m_p(matrix.rows()),
+ m_rowsTranspositions(matrix.rows()),
+ m_l1_norm(0),
+ m_det_p(0),
+ m_isInitialized(false)
+{
+ compute();
+}
+
namespace internal {
/** \internal This is the blocked version of fullpivlu_unblocked() */
@@ -434,7 +476,7 @@ struct partial_lu_impl
// update permutations and apply them to A_0
for(Index i=k; i<k+bs; ++i)
{
- Index piv = (row_transpositions[i] += k);
+ Index piv = (row_transpositions[i] += internal::convert_index<PivIndex>(k));
A_0.row(i).swap(A_0.row(piv));
}
@@ -470,19 +512,17 @@ void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, t
} // end namespace internal
template<typename MatrixType>
-template<typename InputType>
-PartialPivLU<MatrixType>& PartialPivLU<MatrixType>::compute(const EigenBase<InputType>& matrix)
+void PartialPivLU<MatrixType>::compute()
{
check_template_parameters();
// the row permutation is stored as int indices, so just to be sure:
- eigen_assert(matrix.rows()<NumTraits<int>::highest());
+ eigen_assert(m_lu.rows()<NumTraits<int>::highest());
- m_lu = matrix.derived();
m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff();
- eigen_assert(matrix.rows() == matrix.cols() && "PartialPivLU is only for square (and moreover invertible) matrices");
- const Index size = matrix.rows();
+ eigen_assert(m_lu.rows() == m_lu.cols() && "PartialPivLU is only for square (and moreover invertible) matrices");
+ const Index size = m_lu.rows();
m_rowsTranspositions.resize(size);
@@ -493,7 +533,6 @@ PartialPivLU<MatrixType>& PartialPivLU<MatrixType>::compute(const EigenBase<Inpu
m_p = m_rowsTranspositions;
m_isInitialized = true;
- return *this;
}
template<typename MatrixType>
@@ -525,12 +564,12 @@ MatrixType PartialPivLU<MatrixType>::reconstructedMatrix() const
namespace internal {
/***** Implementation of inverse() *****************************************************/
-template<typename DstXprType, typename MatrixType, typename Scalar>
-struct Assignment<DstXprType, Inverse<PartialPivLU<MatrixType> >, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+template<typename DstXprType, typename MatrixType>
+struct Assignment<DstXprType, Inverse<PartialPivLU<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename PartialPivLU<MatrixType>::Scalar>, Dense2Dense>
{
typedef PartialPivLU<MatrixType> LuType;
typedef Inverse<LuType> SrcXprType;
- static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename LuType::Scalar> &)
{
dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
}
@@ -545,14 +584,12 @@ struct Assignment<DstXprType, Inverse<PartialPivLU<MatrixType> >, internal::assi
*
* \sa class PartialPivLU
*/
-#ifndef __CUDACC__
template<typename Derived>
inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
MatrixBase<Derived>::partialPivLu() const
{
return PartialPivLU<PlainObject>(eval());
}
-#endif
/** \lu_module
*
@@ -562,14 +599,12 @@ MatrixBase<Derived>::partialPivLu() const
*
* \sa class PartialPivLU
*/
-#ifndef __CUDACC__
template<typename Derived>
inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
MatrixBase<Derived>::lu() const
{
return PartialPivLU<PlainObject>(eval());
}
-#endif
} // end namespace Eigen
diff --git a/Eigen/src/LU/PartialPivLU_MKL.h b/Eigen/src/LU/PartialPivLU_LAPACKE.h
index 9035953c8..755168a94 100644
--- a/Eigen/src/LU/PartialPivLU_MKL.h
+++ b/Eigen/src/LU/PartialPivLU_LAPACKE.h
@@ -25,7 +25,7 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
- * Content : Eigen bindings to Intel(R) MKL
+ * Content : Eigen bindings to LAPACKe
* LU decomposition with partial pivoting based on LAPACKE_?getrf function.
********************************************************************************
*/
@@ -33,20 +33,18 @@
#ifndef EIGEN_PARTIALLU_LAPACK_H
#define EIGEN_PARTIALLU_LAPACK_H
-#include "Eigen/src/Core/util/MKL_support.h"
-
namespace Eigen {
namespace internal {
-/** \internal Specialization for the data types supported by MKL */
+/** \internal Specialization for the data types supported by LAPACKe */
-#define EIGEN_MKL_LU_PARTPIV(EIGTYPE, MKLTYPE, MKLPREFIX) \
+#define EIGEN_LAPACKE_LU_PARTPIV(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX) \
template<int StorageOrder> \
struct partial_lu_impl<EIGTYPE, StorageOrder, lapack_int> \
{ \
/* \internal performs the LU decomposition in-place of the matrix represented */ \
- static lapack_int blocked_lu(lapack_int rows, lapack_int cols, EIGTYPE* lu_data, lapack_int luStride, lapack_int* row_transpositions, lapack_int& nb_transpositions, lapack_int maxBlockSize=256) \
+ static lapack_int blocked_lu(Index rows, Index cols, EIGTYPE* lu_data, Index luStride, lapack_int* row_transpositions, lapack_int& nb_transpositions, lapack_int maxBlockSize=256) \
{ \
EIGEN_UNUSED_VARIABLE(maxBlockSize);\
lapack_int matrix_order, first_zero_pivot; \
@@ -54,14 +52,14 @@ struct partial_lu_impl<EIGTYPE, StorageOrder, lapack_int> \
EIGTYPE* a; \
/* Set up parameters for ?getrf */ \
matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
- lda = luStride; \
+ lda = convert_index<lapack_int>(luStride); \
a = lu_data; \
ipiv = row_transpositions; \
- m = rows; \
- n = cols; \
+ m = convert_index<lapack_int>(rows); \
+ n = convert_index<lapack_int>(cols); \
nb_transpositions = 0; \
\
- info = LAPACKE_##MKLPREFIX##getrf( matrix_order, m, n, (MKLTYPE*)a, lda, ipiv ); \
+ info = LAPACKE_##LAPACKE_PREFIX##getrf( matrix_order, m, n, (LAPACKE_TYPE*)a, lda, ipiv ); \
\
for(int i=0;i<m;i++) { ipiv[i]--; if (ipiv[i]!=i) nb_transpositions++; } \
\
@@ -73,10 +71,10 @@ struct partial_lu_impl<EIGTYPE, StorageOrder, lapack_int> \
} \
};
-EIGEN_MKL_LU_PARTPIV(double, double, d)
-EIGEN_MKL_LU_PARTPIV(float, float, s)
-EIGEN_MKL_LU_PARTPIV(dcomplex, MKL_Complex16, z)
-EIGEN_MKL_LU_PARTPIV(scomplex, MKL_Complex8, c)
+EIGEN_LAPACKE_LU_PARTPIV(double, double, d)
+EIGEN_LAPACKE_LU_PARTPIV(float, float, s)
+EIGEN_LAPACKE_LU_PARTPIV(dcomplex, lapack_complex_double, z)
+EIGEN_LAPACKE_LU_PARTPIV(scomplex, lapack_complex_float, c)
} // end namespace internal
diff --git a/Eigen/src/LU/arch/CMakeLists.txt b/Eigen/src/LU/arch/CMakeLists.txt
deleted file mode 100644
index f6b7ed9ec..000000000
--- a/Eigen/src/LU/arch/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_LU_arch_SRCS "*.h")
-
-INSTALL(FILES
- ${Eigen_LU_arch_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/LU/arch COMPONENT Devel
- )
diff --git a/Eigen/src/LU/arch/Inverse_SSE.h b/Eigen/src/LU/arch/Inverse_SSE.h
index e1470c664..ebb64a62b 100644
--- a/Eigen/src/LU/arch/Inverse_SSE.h
+++ b/Eigen/src/LU/arch/Inverse_SSE.h
@@ -153,10 +153,12 @@ struct compute_inverse_size4<Architecture::SSE, float, MatrixType, ResultType>
iC = _mm_mul_ps(rd,iC);
iD = _mm_mul_ps(rd,iD);
- result.template writePacket<ResultAlignment>( 0, _mm_shuffle_ps(iA,iB,0x77));
- result.template writePacket<ResultAlignment>( 4, _mm_shuffle_ps(iA,iB,0x22));
- result.template writePacket<ResultAlignment>( 8, _mm_shuffle_ps(iC,iD,0x77));
- result.template writePacket<ResultAlignment>(12, _mm_shuffle_ps(iC,iD,0x22));
+ Index res_stride = result.outerStride();
+ float* res = result.data();
+ pstoret<float, Packet4f, ResultAlignment>(res+0, _mm_shuffle_ps(iA,iB,0x77));
+ pstoret<float, Packet4f, ResultAlignment>(res+res_stride, _mm_shuffle_ps(iA,iB,0x22));
+ pstoret<float, Packet4f, ResultAlignment>(res+2*res_stride, _mm_shuffle_ps(iC,iD,0x77));
+ pstoret<float, Packet4f, ResultAlignment>(res+3*res_stride, _mm_shuffle_ps(iC,iD,0x22));
}
};
@@ -316,14 +318,16 @@ struct compute_inverse_size4<Architecture::SSE, double, MatrixType, ResultType>
iC1 = _mm_sub_pd(_mm_mul_pd(B1, dC), iC1);
iC2 = _mm_sub_pd(_mm_mul_pd(B2, dC), iC2);
- result.template writePacket<ResultAlignment>( 0, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 3), d1)); // iA# / det
- result.template writePacket<ResultAlignment>( 4, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 0), d2));
- result.template writePacket<ResultAlignment>( 2, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 3), d1)); // iB# / det
- result.template writePacket<ResultAlignment>( 6, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 0), d2));
- result.template writePacket<ResultAlignment>( 8, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 3), d1)); // iC# / det
- result.template writePacket<ResultAlignment>(12, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 0), d2));
- result.template writePacket<ResultAlignment>(10, _mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 3), d1)); // iD# / det
- result.template writePacket<ResultAlignment>(14, _mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 0), d2));
+ Index res_stride = result.outerStride();
+ double* res = result.data();
+ pstoret<double, Packet2d, ResultAlignment>(res+0, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 3), d1));
+ pstoret<double, Packet2d, ResultAlignment>(res+res_stride, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 0), d2));
+ pstoret<double, Packet2d, ResultAlignment>(res+2, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 3), d1));
+ pstoret<double, Packet2d, ResultAlignment>(res+res_stride+2, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 0), d2));
+ pstoret<double, Packet2d, ResultAlignment>(res+2*res_stride, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 3), d1));
+ pstoret<double, Packet2d, ResultAlignment>(res+3*res_stride, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 0), d2));
+ pstoret<double, Packet2d, ResultAlignment>(res+2*res_stride+2,_mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 3), d1));
+ pstoret<double, Packet2d, ResultAlignment>(res+3*res_stride+2,_mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 0), d2));
}
};