aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-11-26 11:28:25 -0800
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-11-26 11:28:25 -0800
commit9f8fbd9434a604e54fee59460cbd13fe629299fa (patch)
treeafebfa832e8f978df9710eb0d6da8efae3672a2f /Eigen
parent7318daf887c4f06fa62e59e29fa675e48ad168f9 (diff)
parent67b2c41f30a29debcb720fe85c2581901ff36fd2 (diff)
Merged eigen/eigen into default
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h7
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h42
-rwxr-xr-xEigen/src/Core/util/DisableStupidWarnings.h3
-rw-r--r--Eigen/src/SparseCore/SparseBlock.h12
-rw-r--r--Eigen/src/SparseCore/SparseCwiseBinaryOp.h52
5 files changed, 62 insertions, 54 deletions
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index e60ef307b..195d40fb4 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -395,14 +395,11 @@ template<> EIGEN_STRONG_INLINE Packet4d preduxp<Packet4d>(const Packet4d* vecs)
template<> EIGEN_STRONG_INLINE float predux<Packet8f>(const Packet8f& a)
{
- Packet8f tmp0 = _mm256_hadd_ps(a,_mm256_permute2f128_ps(a,a,1));
- tmp0 = _mm256_hadd_ps(tmp0,tmp0);
- return pfirst(_mm256_hadd_ps(tmp0, tmp0));
+ return predux(Packet4f(_mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1))));
}
template<> EIGEN_STRONG_INLINE double predux<Packet4d>(const Packet4d& a)
{
- Packet4d tmp0 = _mm256_hadd_pd(a,_mm256_permute2f128_pd(a,a,1));
- return pfirst(_mm256_hadd_pd(tmp0,tmp0));
+ return predux(Packet2d(_mm_add_pd(_mm256_castpd256_pd128(a),_mm256_extractf128_pd(a,1))));
}
template<> EIGEN_STRONG_INLINE Packet4f predux_downto4<Packet8f>(const Packet8f& a)
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 6f31cf12b..80cf8af09 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -504,30 +504,13 @@ template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
{
return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3]));
}
+
template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
{
return _mm_hadd_pd(vecs[0], vecs[1]);
}
-template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
-{
- Packet4f tmp0 = _mm_hadd_ps(a,a);
- return pfirst<Packet4f>(_mm_hadd_ps(tmp0, tmp0));
-}
-
-template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return pfirst<Packet2d>(_mm_hadd_pd(a, a)); }
#else
-// SSE2 versions
-template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
-{
- Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
- return pfirst<Packet4f>(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
-}
-template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
-{
- return pfirst<Packet2d>(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
-}
-
template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
{
Packet4f tmp0, tmp1, tmp2;
@@ -548,6 +531,29 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
}
#endif // SSE3
+template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
+{
+ // Disable SSE3 _mm_hadd_pd that is extremely slow on all existing Intel's architectures
+ // (from Nehalem to Haswell)
+// #ifdef EIGEN_VECTORIZE_SSE3
+// Packet4f tmp = _mm_add_ps(a, vec4f_swizzle1(a,2,3,2,3));
+// return pfirst<Packet4f>(_mm_hadd_ps(tmp, tmp));
+// #else
+ Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+// #endif
+}
+
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
+{
+ // Disable SSE3 _mm_hadd_pd that is extremely slow on all existing Intel's architectures
+ // (from Nehalem to Haswell)
+// #ifdef EIGEN_VECTORIZE_SSE3
+// return pfirst<Packet2d>(_mm_hadd_pd(a, a));
+// #else
+ return pfirst<Packet2d>(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
+// #endif
+}
#ifdef EIGEN_VECTORIZE_SSSE3
template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
diff --git a/Eigen/src/Core/util/DisableStupidWarnings.h b/Eigen/src/Core/util/DisableStupidWarnings.h
index 7559e129c..21f80d86b 100755
--- a/Eigen/src/Core/util/DisableStupidWarnings.h
+++ b/Eigen/src/Core/util/DisableStupidWarnings.h
@@ -42,6 +42,9 @@
#pragma clang diagnostic push
#endif
#pragma clang diagnostic ignored "-Wconstant-logical-operand"
+ #if __clang_major__ >= 3 && __clang_minor__ >= 5
+ #pragma clang diagnostic ignored "-Wabsolute-value"
+ #endif
#elif defined __GNUC__ && __GNUC__>=6
diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h
index 13e8b0bf1..acaf933f4 100644
--- a/Eigen/src/SparseCore/SparseBlock.h
+++ b/Eigen/src/SparseCore/SparseBlock.h
@@ -130,7 +130,7 @@ public:
// 2 - let's check whether there is enough allocated memory
Index nnz = tmp.nonZeros();
- Index start = m_outerStart==0 ? 0 : matrix.outerIndexPtr()[m_outerStart]; // starting position of the current block
+ Index start = m_outerStart==0 ? 0 : m_matrix.outerIndexPtr()[m_outerStart]; // starting position of the current block
Index end = m_matrix.outerIndexPtr()[m_outerStart+m_outerSize.value()]; // ending position of the current block
Index block_size = end - start; // available room in the current block
Index tail_size = m_matrix.outerIndexPtr()[m_matrix.outerSize()] - end;
@@ -139,6 +139,8 @@ public:
? Index(matrix.data().allocatedSize()) + block_size
: block_size;
+ Index tmp_start = tmp.outerIndexPtr()[0];
+
bool update_trailing_pointers = false;
if(nnz>free_size)
{
@@ -148,8 +150,8 @@ public:
internal::smart_copy(m_matrix.valuePtr(), m_matrix.valuePtr() + start, newdata.valuePtr());
internal::smart_copy(m_matrix.innerIndexPtr(), m_matrix.innerIndexPtr() + start, newdata.indexPtr());
- internal::smart_copy(tmp.valuePtr(), tmp.valuePtr() + nnz, newdata.valuePtr() + start);
- internal::smart_copy(tmp.innerIndexPtr(), tmp.innerIndexPtr() + nnz, newdata.indexPtr() + start);
+ internal::smart_copy(tmp.valuePtr() + tmp_start, tmp.valuePtr() + tmp_start + nnz, newdata.valuePtr() + start);
+ internal::smart_copy(tmp.innerIndexPtr() + tmp_start, tmp.innerIndexPtr() + tmp_start + nnz, newdata.indexPtr() + start);
internal::smart_copy(matrix.valuePtr()+end, matrix.valuePtr()+end + tail_size, newdata.valuePtr()+start+nnz);
internal::smart_copy(matrix.innerIndexPtr()+end, matrix.innerIndexPtr()+end + tail_size, newdata.indexPtr()+start+nnz);
@@ -173,8 +175,8 @@ public:
update_trailing_pointers = true;
}
- internal::smart_copy(tmp.valuePtr(), tmp.valuePtr() + nnz, matrix.valuePtr() + start);
- internal::smart_copy(tmp.innerIndexPtr(), tmp.innerIndexPtr() + nnz, matrix.innerIndexPtr() + start);
+ internal::smart_copy(tmp.valuePtr() + tmp_start, tmp.valuePtr() + tmp_start + nnz, matrix.valuePtr() + start);
+ internal::smart_copy(tmp.innerIndexPtr() + tmp_start, tmp.innerIndexPtr() + tmp_start + nnz, matrix.innerIndexPtr() + start);
}
// update outer index pointers and innerNonZeros
diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
index 04cef66fc..4ba4d631d 100644
--- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
+++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
@@ -394,10 +394,10 @@ struct sparse_conjunction_evaluator<XprType, IteratorBased, IteratorBased>
{
protected:
typedef typename XprType::Functor BinaryOp;
- typedef typename XprType::Lhs Lhs;
- typedef typename XprType::Rhs Rhs;
- typedef typename evaluator<Lhs>::InnerIterator LhsIterator;
- typedef typename evaluator<Rhs>::InnerIterator RhsIterator;
+ typedef typename XprType::Lhs LhsArg;
+ typedef typename XprType::Rhs RhsArg;
+ typedef typename evaluator<LhsArg>::InnerIterator LhsIterator;
+ typedef typename evaluator<RhsArg>::InnerIterator RhsIterator;
typedef typename XprType::StorageIndex StorageIndex;
typedef typename traits<XprType>::Scalar Scalar;
public:
@@ -449,7 +449,7 @@ public:
enum {
- CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+ CoeffReadCost = evaluator<LhsArg>::CoeffReadCost + evaluator<RhsArg>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
Flags = XprType::Flags
};
@@ -468,8 +468,8 @@ public:
protected:
const BinaryOp m_functor;
- evaluator<Lhs> m_lhsImpl;
- evaluator<Rhs> m_rhsImpl;
+ evaluator<LhsArg> m_lhsImpl;
+ evaluator<RhsArg> m_rhsImpl;
};
// "dense ^ sparse"
@@ -479,10 +479,10 @@ struct sparse_conjunction_evaluator<XprType, IndexBased, IteratorBased>
{
protected:
typedef typename XprType::Functor BinaryOp;
- typedef typename XprType::Lhs Lhs;
- typedef typename XprType::Rhs Rhs;
- typedef evaluator<Lhs> LhsEvaluator;
- typedef typename evaluator<Rhs>::InnerIterator RhsIterator;
+ typedef typename XprType::Lhs LhsArg;
+ typedef typename XprType::Rhs RhsArg;
+ typedef evaluator<LhsArg> LhsEvaluator;
+ typedef typename evaluator<RhsArg>::InnerIterator RhsIterator;
typedef typename XprType::StorageIndex StorageIndex;
typedef typename traits<XprType>::Scalar Scalar;
public:
@@ -490,7 +490,7 @@ public:
class ReverseInnerIterator;
class InnerIterator
{
- enum { IsRowMajor = (int(Rhs::Flags)&RowMajorBit)==RowMajorBit };
+ enum { IsRowMajor = (int(RhsArg::Flags)&RowMajorBit)==RowMajorBit };
public:
@@ -522,9 +522,9 @@ public:
enum {
- CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+ CoeffReadCost = evaluator<LhsArg>::CoeffReadCost + evaluator<RhsArg>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
// Expose storage order of the sparse expression
- Flags = (XprType::Flags & ~RowMajorBit) | (int(Rhs::Flags)&RowMajorBit)
+ Flags = (XprType::Flags & ~RowMajorBit) | (int(RhsArg::Flags)&RowMajorBit)
};
explicit sparse_conjunction_evaluator(const XprType& xpr)
@@ -542,8 +542,8 @@ public:
protected:
const BinaryOp m_functor;
- evaluator<Lhs> m_lhsImpl;
- evaluator<Rhs> m_rhsImpl;
+ evaluator<LhsArg> m_lhsImpl;
+ evaluator<RhsArg> m_rhsImpl;
};
// "sparse ^ dense"
@@ -553,10 +553,10 @@ struct sparse_conjunction_evaluator<XprType, IteratorBased, IndexBased>
{
protected:
typedef typename XprType::Functor BinaryOp;
- typedef typename XprType::Lhs Lhs;
- typedef typename XprType::Rhs Rhs;
- typedef typename evaluator<Lhs>::InnerIterator LhsIterator;
- typedef evaluator<Rhs> RhsEvaluator;
+ typedef typename XprType::Lhs LhsArg;
+ typedef typename XprType::Rhs RhsArg;
+ typedef typename evaluator<LhsArg>::InnerIterator LhsIterator;
+ typedef evaluator<RhsArg> RhsEvaluator;
typedef typename XprType::StorageIndex StorageIndex;
typedef typename traits<XprType>::Scalar Scalar;
public:
@@ -564,7 +564,7 @@ public:
class ReverseInnerIterator;
class InnerIterator
{
- enum { IsRowMajor = (int(Lhs::Flags)&RowMajorBit)==RowMajorBit };
+ enum { IsRowMajor = (int(LhsArg::Flags)&RowMajorBit)==RowMajorBit };
public:
@@ -590,16 +590,16 @@ public:
protected:
LhsIterator m_lhsIter;
- const evaluator<Rhs> &m_rhsEval;
+ const evaluator<RhsArg> &m_rhsEval;
const BinaryOp& m_functor;
const Index m_outer;
};
enum {
- CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+ CoeffReadCost = evaluator<LhsArg>::CoeffReadCost + evaluator<RhsArg>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
// Expose storage order of the sparse expression
- Flags = (XprType::Flags & ~RowMajorBit) | (int(Lhs::Flags)&RowMajorBit)
+ Flags = (XprType::Flags & ~RowMajorBit) | (int(LhsArg::Flags)&RowMajorBit)
};
explicit sparse_conjunction_evaluator(const XprType& xpr)
@@ -617,8 +617,8 @@ public:
protected:
const BinaryOp m_functor;
- evaluator<Lhs> m_lhsImpl;
- evaluator<Rhs> m_rhsImpl;
+ evaluator<LhsArg> m_lhsImpl;
+ evaluator<RhsArg> m_rhsImpl;
};
}