diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-11-26 11:28:25 -0800 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-11-26 11:28:25 -0800 |
commit | 9f8fbd9434a604e54fee59460cbd13fe629299fa (patch) | |
tree | afebfa832e8f978df9710eb0d6da8efae3672a2f /Eigen | |
parent | 7318daf887c4f06fa62e59e29fa675e48ad168f9 (diff) | |
parent | 67b2c41f30a29debcb720fe85c2581901ff36fd2 (diff) |
Merged eigen/eigen into default
Diffstat (limited to 'Eigen')
-rw-r--r-- | Eigen/src/Core/arch/AVX/PacketMath.h | 7 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/SSE/PacketMath.h | 42 | ||||
-rwxr-xr-x | Eigen/src/Core/util/DisableStupidWarnings.h | 3 | ||||
-rw-r--r-- | Eigen/src/SparseCore/SparseBlock.h | 12 | ||||
-rw-r--r-- | Eigen/src/SparseCore/SparseCwiseBinaryOp.h | 52 |
5 files changed, 62 insertions, 54 deletions
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index e60ef307b..195d40fb4 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -395,14 +395,11 @@ template<> EIGEN_STRONG_INLINE Packet4d preduxp<Packet4d>(const Packet4d* vecs) template<> EIGEN_STRONG_INLINE float predux<Packet8f>(const Packet8f& a) { - Packet8f tmp0 = _mm256_hadd_ps(a,_mm256_permute2f128_ps(a,a,1)); - tmp0 = _mm256_hadd_ps(tmp0,tmp0); - return pfirst(_mm256_hadd_ps(tmp0, tmp0)); + return predux(Packet4f(_mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1)))); } template<> EIGEN_STRONG_INLINE double predux<Packet4d>(const Packet4d& a) { - Packet4d tmp0 = _mm256_hadd_pd(a,_mm256_permute2f128_pd(a,a,1)); - return pfirst(_mm256_hadd_pd(tmp0,tmp0)); + return predux(Packet2d(_mm_add_pd(_mm256_castpd256_pd128(a),_mm256_extractf128_pd(a,1)))); } template<> EIGEN_STRONG_INLINE Packet4f predux_downto4<Packet8f>(const Packet8f& a) diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 6f31cf12b..80cf8af09 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -504,30 +504,13 @@ template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs) { return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3])); } + template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs) { return _mm_hadd_pd(vecs[0], vecs[1]); } -template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a) -{ - Packet4f tmp0 = _mm_hadd_ps(a,a); - return pfirst<Packet4f>(_mm_hadd_ps(tmp0, tmp0)); -} - -template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return pfirst<Packet2d>(_mm_hadd_pd(a, a)); } #else -// SSE2 versions -template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a) -{ - Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a)); - return pfirst<Packet4f>(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1))); -} -template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) -{ - return pfirst<Packet2d>(_mm_add_sd(a, _mm_unpackhi_pd(a,a))); -} - template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs) { Packet4f tmp0, tmp1, tmp2; @@ -548,6 +531,29 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs) } #endif // SSE3 +template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a) +{ + // Disable SSE3 _mm_hadd_pd that is extremely slow on all existing Intel's architectures + // (from Nehalem to Haswell) +// #ifdef EIGEN_VECTORIZE_SSE3 +// Packet4f tmp = _mm_add_ps(a, vec4f_swizzle1(a,2,3,2,3)); +// return pfirst<Packet4f>(_mm_hadd_ps(tmp, tmp)); +// #else + Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a)); + return pfirst<Packet4f>(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1))); +// #endif +} + +template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) +{ + // Disable SSE3 _mm_hadd_pd that is extremely slow on all existing Intel's architectures + // (from Nehalem to Haswell) +// #ifdef EIGEN_VECTORIZE_SSE3 +// return pfirst<Packet2d>(_mm_hadd_pd(a, a)); +// #else + return pfirst<Packet2d>(_mm_add_sd(a, _mm_unpackhi_pd(a,a))); +// #endif +} #ifdef EIGEN_VECTORIZE_SSSE3 template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs) diff --git a/Eigen/src/Core/util/DisableStupidWarnings.h b/Eigen/src/Core/util/DisableStupidWarnings.h index 7559e129c..21f80d86b 100755 --- a/Eigen/src/Core/util/DisableStupidWarnings.h +++ b/Eigen/src/Core/util/DisableStupidWarnings.h @@ -42,6 +42,9 @@ #pragma clang diagnostic push #endif #pragma clang diagnostic ignored "-Wconstant-logical-operand" + #if __clang_major__ >= 3 && __clang_minor__ >= 5 + #pragma clang diagnostic ignored "-Wabsolute-value" + #endif #elif defined __GNUC__ && __GNUC__>=6 diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h index 13e8b0bf1..acaf933f4 100644 --- a/Eigen/src/SparseCore/SparseBlock.h +++ b/Eigen/src/SparseCore/SparseBlock.h @@ -130,7 +130,7 @@ public: // 2 - let's check whether there is enough allocated memory
Index nnz = tmp.nonZeros();
- Index start = m_outerStart==0 ? 0 : matrix.outerIndexPtr()[m_outerStart]; // starting position of the current block
+ Index start = m_outerStart==0 ? 0 : m_matrix.outerIndexPtr()[m_outerStart]; // starting position of the current block
Index end = m_matrix.outerIndexPtr()[m_outerStart+m_outerSize.value()]; // ending position of the current block
Index block_size = end - start; // available room in the current block
Index tail_size = m_matrix.outerIndexPtr()[m_matrix.outerSize()] - end;
@@ -139,6 +139,8 @@ public: ? Index(matrix.data().allocatedSize()) + block_size
: block_size;
+ Index tmp_start = tmp.outerIndexPtr()[0];
+
bool update_trailing_pointers = false;
if(nnz>free_size)
{
@@ -148,8 +150,8 @@ public: internal::smart_copy(m_matrix.valuePtr(), m_matrix.valuePtr() + start, newdata.valuePtr());
internal::smart_copy(m_matrix.innerIndexPtr(), m_matrix.innerIndexPtr() + start, newdata.indexPtr());
- internal::smart_copy(tmp.valuePtr(), tmp.valuePtr() + nnz, newdata.valuePtr() + start);
- internal::smart_copy(tmp.innerIndexPtr(), tmp.innerIndexPtr() + nnz, newdata.indexPtr() + start);
+ internal::smart_copy(tmp.valuePtr() + tmp_start, tmp.valuePtr() + tmp_start + nnz, newdata.valuePtr() + start);
+ internal::smart_copy(tmp.innerIndexPtr() + tmp_start, tmp.innerIndexPtr() + tmp_start + nnz, newdata.indexPtr() + start);
internal::smart_copy(matrix.valuePtr()+end, matrix.valuePtr()+end + tail_size, newdata.valuePtr()+start+nnz);
internal::smart_copy(matrix.innerIndexPtr()+end, matrix.innerIndexPtr()+end + tail_size, newdata.indexPtr()+start+nnz);
@@ -173,8 +175,8 @@ public: update_trailing_pointers = true;
}
- internal::smart_copy(tmp.valuePtr(), tmp.valuePtr() + nnz, matrix.valuePtr() + start);
- internal::smart_copy(tmp.innerIndexPtr(), tmp.innerIndexPtr() + nnz, matrix.innerIndexPtr() + start);
+ internal::smart_copy(tmp.valuePtr() + tmp_start, tmp.valuePtr() + tmp_start + nnz, matrix.valuePtr() + start);
+ internal::smart_copy(tmp.innerIndexPtr() + tmp_start, tmp.innerIndexPtr() + tmp_start + nnz, matrix.innerIndexPtr() + start);
}
// update outer index pointers and innerNonZeros
diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h index 04cef66fc..4ba4d631d 100644 --- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h @@ -394,10 +394,10 @@ struct sparse_conjunction_evaluator<XprType, IteratorBased, IteratorBased> { protected: typedef typename XprType::Functor BinaryOp; - typedef typename XprType::Lhs Lhs; - typedef typename XprType::Rhs Rhs; - typedef typename evaluator<Lhs>::InnerIterator LhsIterator; - typedef typename evaluator<Rhs>::InnerIterator RhsIterator; + typedef typename XprType::Lhs LhsArg; + typedef typename XprType::Rhs RhsArg; + typedef typename evaluator<LhsArg>::InnerIterator LhsIterator; + typedef typename evaluator<RhsArg>::InnerIterator RhsIterator; typedef typename XprType::StorageIndex StorageIndex; typedef typename traits<XprType>::Scalar Scalar; public: @@ -449,7 +449,7 @@ public: enum { - CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost, + CoeffReadCost = evaluator<LhsArg>::CoeffReadCost + evaluator<RhsArg>::CoeffReadCost + functor_traits<BinaryOp>::Cost, Flags = XprType::Flags }; @@ -468,8 +468,8 @@ public: protected: const BinaryOp m_functor; - evaluator<Lhs> m_lhsImpl; - evaluator<Rhs> m_rhsImpl; + evaluator<LhsArg> m_lhsImpl; + evaluator<RhsArg> m_rhsImpl; }; // "dense ^ sparse" @@ -479,10 +479,10 @@ struct sparse_conjunction_evaluator<XprType, IndexBased, IteratorBased> { protected: typedef typename XprType::Functor BinaryOp; - typedef typename XprType::Lhs Lhs; - typedef typename XprType::Rhs Rhs; - typedef evaluator<Lhs> LhsEvaluator; - typedef typename evaluator<Rhs>::InnerIterator RhsIterator; + typedef typename XprType::Lhs LhsArg; + typedef typename XprType::Rhs RhsArg; + typedef evaluator<LhsArg> LhsEvaluator; + typedef typename evaluator<RhsArg>::InnerIterator RhsIterator; typedef typename XprType::StorageIndex StorageIndex; typedef typename traits<XprType>::Scalar Scalar; public: @@ -490,7 +490,7 @@ public: class ReverseInnerIterator; class InnerIterator { - enum { IsRowMajor = (int(Rhs::Flags)&RowMajorBit)==RowMajorBit }; + enum { IsRowMajor = (int(RhsArg::Flags)&RowMajorBit)==RowMajorBit }; public: @@ -522,9 +522,9 @@ public: enum { - CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost, + CoeffReadCost = evaluator<LhsArg>::CoeffReadCost + evaluator<RhsArg>::CoeffReadCost + functor_traits<BinaryOp>::Cost, // Expose storage order of the sparse expression - Flags = (XprType::Flags & ~RowMajorBit) | (int(Rhs::Flags)&RowMajorBit) + Flags = (XprType::Flags & ~RowMajorBit) | (int(RhsArg::Flags)&RowMajorBit) }; explicit sparse_conjunction_evaluator(const XprType& xpr) @@ -542,8 +542,8 @@ public: protected: const BinaryOp m_functor; - evaluator<Lhs> m_lhsImpl; - evaluator<Rhs> m_rhsImpl; + evaluator<LhsArg> m_lhsImpl; + evaluator<RhsArg> m_rhsImpl; }; // "sparse ^ dense" @@ -553,10 +553,10 @@ struct sparse_conjunction_evaluator<XprType, IteratorBased, IndexBased> { protected: typedef typename XprType::Functor BinaryOp; - typedef typename XprType::Lhs Lhs; - typedef typename XprType::Rhs Rhs; - typedef typename evaluator<Lhs>::InnerIterator LhsIterator; - typedef evaluator<Rhs> RhsEvaluator; + typedef typename XprType::Lhs LhsArg; + typedef typename XprType::Rhs RhsArg; + typedef typename evaluator<LhsArg>::InnerIterator LhsIterator; + typedef evaluator<RhsArg> RhsEvaluator; typedef typename XprType::StorageIndex StorageIndex; typedef typename traits<XprType>::Scalar Scalar; public: @@ -564,7 +564,7 @@ public: class ReverseInnerIterator; class InnerIterator { - enum { IsRowMajor = (int(Lhs::Flags)&RowMajorBit)==RowMajorBit }; + enum { IsRowMajor = (int(LhsArg::Flags)&RowMajorBit)==RowMajorBit }; public: @@ -590,16 +590,16 @@ public: protected: LhsIterator m_lhsIter; - const evaluator<Rhs> &m_rhsEval; + const evaluator<RhsArg> &m_rhsEval; const BinaryOp& m_functor; const Index m_outer; }; enum { - CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost, + CoeffReadCost = evaluator<LhsArg>::CoeffReadCost + evaluator<RhsArg>::CoeffReadCost + functor_traits<BinaryOp>::Cost, // Expose storage order of the sparse expression - Flags = (XprType::Flags & ~RowMajorBit) | (int(Lhs::Flags)&RowMajorBit) + Flags = (XprType::Flags & ~RowMajorBit) | (int(LhsArg::Flags)&RowMajorBit) }; explicit sparse_conjunction_evaluator(const XprType& xpr) @@ -617,8 +617,8 @@ public: protected: const BinaryOp m_functor; - evaluator<Lhs> m_lhsImpl; - evaluator<Rhs> m_rhsImpl; + evaluator<LhsArg> m_lhsImpl; + evaluator<RhsArg> m_rhsImpl; }; } |