diff options
author | Gael Guennebaud <g.gael@free.fr> | 2017-02-21 17:04:10 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2017-02-21 17:04:10 +0100 |
commit | b0f55ef85a900a130fe244a0baacd0247db0cb3b (patch) | |
tree | 28b56b9d0cc805d8410ef138c5c5365d5b816f79 /Eigen/src/Core | |
parent | d29e9d71192240b7daa5efd477bb72a205b0d9fe (diff) | |
parent | 76687f385c80a4d576d4fadeb271a94d9783b194 (diff) |
merge
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r-- | Eigen/src/Core/ArithmeticSequence.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/AssignEvaluator.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/DenseBase.h | 12 | ||||
-rw-r--r-- | Eigen/src/Core/DenseStorage.h | 29 | ||||
-rw-r--r-- | Eigen/src/Core/IO.h | 14 | ||||
-rw-r--r-- | Eigen/src/Core/IndexedView.h | 10 | ||||
-rw-r--r-- | Eigen/src/Core/PlainObjectBase.h | 10 | ||||
-rw-r--r-- | Eigen/src/Core/arch/NEON/PacketMath.h | 52 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/products/Parallelizer.h | 7 | ||||
-rw-r--r-- | Eigen/src/Core/products/TriangularMatrixMatrix.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/util/Macros.h | 16 | ||||
-rwxr-xr-x | Eigen/src/Core/util/Meta.h | 27 |
13 files changed, 108 insertions, 83 deletions
diff --git a/Eigen/src/Core/ArithmeticSequence.h b/Eigen/src/Core/ArithmeticSequence.h index 99b954432..ada1571f1 100644 --- a/Eigen/src/Core/ArithmeticSequence.h +++ b/Eigen/src/Core/ArithmeticSequence.h @@ -14,7 +14,7 @@ namespace Eigen { namespace internal { -#if !EIGEN_HAS_CXX11 +#if (!EIGEN_HAS_CXX11) || !((!EIGEN_COMP_GNUC) || EIGEN_COMP_GNUC>=48) template<typename T> struct aseq_negate {}; template<> struct aseq_negate<Index> { @@ -138,7 +138,7 @@ protected: public: -#if EIGEN_HAS_CXX11 +#if EIGEN_HAS_CXX11 && ((!EIGEN_COMP_GNUC) || EIGEN_COMP_GNUC>=48) auto reverse() const -> decltype(Eigen::seqN(m_first+(m_size+fix<-1>())*m_incr,m_size,-m_incr)) { return seqN(m_first+(m_size+fix<-1>())*m_incr,m_size,-m_incr); } diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 489935b83..b0ec7b7ca 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -515,7 +515,7 @@ struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling> template<typename Kernel> struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling> { - EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::Scalar Scalar; typedef typename Kernel::PacketType PacketType; @@ -563,7 +563,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling> template<typename Kernel> struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling> { - EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; typedef typename Kernel::PacketType PacketType; diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index 9cad1aa1e..cd84b07a5 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -463,7 +463,17 @@ template<typename Derived> class DenseBase EIGEN_DEVICE_FUNC void visit(Visitor& func) const; - inline const WithFormat<Derived> format(const IOFormat& fmt) const; + /** \returns a WithFormat proxy object allowing to print a matrix the with given + * format \a fmt. + * + * See class IOFormat for some examples. + * + * \sa class IOFormat, class WithFormat + */ + inline const WithFormat<Derived> format(const IOFormat& fmt) const + { + return WithFormat<Derived>(derived(), fmt); + } /** \returns the unique coefficient of a 1x1 expression */ EIGEN_DEVICE_FUNC diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h index 82201d96a..7958feeb9 100644 --- a/Eigen/src/Core/DenseStorage.h +++ b/Eigen/src/Core/DenseStorage.h @@ -13,9 +13,9 @@ #define EIGEN_MATRIXSTORAGE_H #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN EIGEN_DENSE_STORAGE_CTOR_PLUGIN; + #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X) X; EIGEN_DENSE_STORAGE_CTOR_PLUGIN; #else - #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X) #endif namespace Eigen { @@ -184,12 +184,16 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt { internal::plain_array<T,Size,_Options> m_data; public: - EIGEN_DEVICE_FUNC DenseStorage() {} + EIGEN_DEVICE_FUNC DenseStorage() { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size) + } EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(internal::constructor_without_unaligned_array_assert()) {} EIGEN_DEVICE_FUNC - DenseStorage(const DenseStorage& other) : m_data(other.m_data) {} + DenseStorage(const DenseStorage& other) : m_data(other.m_data) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size) + } EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { @@ -197,7 +201,7 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt return *this; } EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) { - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) eigen_internal_assert(size==rows*cols && rows==_Rows && cols==_Cols); EIGEN_UNUSED_VARIABLE(size); EIGEN_UNUSED_VARIABLE(rows); @@ -343,7 +347,7 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows), m_cols(cols) { - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) eigen_internal_assert(size==rows*cols && rows>=0 && cols >=0); } EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) @@ -351,6 +355,7 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam , m_rows(other.m_rows) , m_cols(other.m_cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*m_cols) internal::smart_copy(other.m_data, other.m_data+other.m_rows*other.m_cols, m_data); } EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) @@ -403,7 +408,7 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size); else m_data = 0; - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) } m_rows = rows; m_cols = cols; @@ -422,7 +427,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {} EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(cols) { - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) eigen_internal_assert(size==rows*cols && rows==_Rows && cols >=0); EIGEN_UNUSED_VARIABLE(rows); } @@ -430,6 +435,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(_Rows*other.m_cols)) , m_cols(other.m_cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_cols*_Rows) internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data); } EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) @@ -477,7 +483,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size); else m_data = 0; - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) } m_cols = cols; } @@ -495,7 +501,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {} EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows) { - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) eigen_internal_assert(size==rows*cols && rows>=0 && cols == _Cols); EIGEN_UNUSED_VARIABLE(cols); } @@ -503,6 +509,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*_Cols)) , m_rows(other.m_rows) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*_Cols) internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data); } EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) @@ -550,7 +557,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size); else m_data = 0; - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) } m_rows = rows; } diff --git a/Eigen/src/Core/IO.h b/Eigen/src/Core/IO.h index 644228c3f..da7fd6cce 100644 --- a/Eigen/src/Core/IO.h +++ b/Eigen/src/Core/IO.h @@ -109,20 +109,6 @@ class WithFormat IOFormat m_format; }; -/** \returns a WithFormat proxy object allowing to print a matrix the with given - * format \a fmt. - * - * See class IOFormat for some examples. - * - * \sa class IOFormat, class WithFormat - */ -template<typename Derived> -inline const WithFormat<Derived> -DenseBase<Derived>::format(const IOFormat& fmt) const -{ - return WithFormat<Derived>(derived(), fmt); -} - namespace internal { // NOTE: This helper is kept for backward compatibility with previous code specializing diff --git a/Eigen/src/Core/IndexedView.h b/Eigen/src/Core/IndexedView.h index 63878428e..8c57a277c 100644 --- a/Eigen/src/Core/IndexedView.h +++ b/Eigen/src/Core/IndexedView.h @@ -19,8 +19,8 @@ struct traits<IndexedView<XprType, RowIndices, ColIndices> > : traits<XprType> { enum { - RowsAtCompileTime = array_size<RowIndices>::value, - ColsAtCompileTime = array_size<ColIndices>::value, + RowsAtCompileTime = int(array_size<RowIndices>::value), + ColsAtCompileTime = int(array_size<ColIndices>::value), MaxRowsAtCompileTime = RowsAtCompileTime != Dynamic ? int(RowsAtCompileTime) : int(traits<XprType>::MaxRowsAtCompileTime), MaxColsAtCompileTime = ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime) : int(traits<XprType>::MaxColsAtCompileTime), @@ -29,8 +29,8 @@ struct traits<IndexedView<XprType, RowIndices, ColIndices> > : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 : XprTypeIsRowMajor, - RowIncr = get_compile_time_incr<RowIndices>::value, - ColIncr = get_compile_time_incr<ColIndices>::value, + RowIncr = int(get_compile_time_incr<RowIndices>::value), + ColIncr = int(get_compile_time_incr<ColIndices>::value), InnerIncr = IsRowMajor ? ColIncr : RowIncr, OuterIncr = IsRowMajor ? RowIncr : ColIncr, @@ -51,7 +51,7 @@ struct traits<IndexedView<XprType, RowIndices, ColIndices> > // FIXME we deal with compile-time strides if and only if we have DirectAccessBit flag, // but this is too strict regarding negative strides... - DirectAccessMask = (InnerIncr!=UndefinedIncr && OuterIncr!=UndefinedIncr && InnerIncr>=0 && OuterIncr>=0) ? DirectAccessBit : 0, + DirectAccessMask = (int(InnerIncr)!=UndefinedIncr && int(OuterIncr)!=UndefinedIncr && InnerIncr>=0 && OuterIncr>=0) ? DirectAccessBit : 0, FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0, FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0, Flags = (traits<XprType>::Flags & (HereditaryBits | DirectAccessMask)) | FlagsLvalueBit | FlagsRowMajorBit diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index 639fb92bf..77f4f6066 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -812,6 +812,13 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type this->_set_noalias(other); } + // Initialize an arbitrary matrix from an object convertible to the Derived type. + template<typename T> + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Derived& other){ + this->_set_noalias(other); + } + // Initialize an arbitrary matrix from a generic Eigen expression template<typename T, typename OtherDerived> EIGEN_DEVICE_FUNC @@ -834,7 +841,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type this->derived() = r; } - // For fixed -size arrays: + // For fixed-size Array<Scalar,...> template<typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const Scalar& val0, @@ -846,6 +853,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type Base::setConstant(val0); } + // For fixed-size Array<Index,...> template<typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const Index& val0, diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index d392bf3ff..84a56bdcc 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -46,7 +46,7 @@ typedef uint32x4_t Packet4ui; const Packet4f p4f_##NAME = pset1<Packet4f>(X) #define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \ - const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int>(X)) + const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int32_t>(X)) #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ const Packet4i p4i_##NAME = pset1<Packet4i>(X) @@ -83,7 +83,7 @@ template<> struct packet_traits<float> : default_packet_traits HasSqrt = 0 }; }; -template<> struct packet_traits<int> : default_packet_traits +template<> struct packet_traits<int32_t> : default_packet_traits { typedef Packet4i type; typedef Packet4i half; // Packet2i intrinsics not implemented yet @@ -105,11 +105,11 @@ EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); } #endif -template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; }; -template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; }; +template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; }; +template<> struct unpacket_traits<Packet4i> { typedef int32_t type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; }; template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); } -template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return vdupq_n_s32(from); } +template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t& from) { return vdupq_n_s32(from); } template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { @@ -117,7 +117,7 @@ template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) Packet4f countdown = vld1q_f32(f); return vaddq_f32(pset1<Packet4f>(a), countdown); } -template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) +template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int32_t& a) { const int32_t i[] = {0, 1, 2, 3}; Packet4i countdown = vld1q_s32(i); @@ -240,20 +240,20 @@ template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, con } template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); } -template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); } +template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); } +template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int32_t* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); } -template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); } -template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); } +template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); } +template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int32_t* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); } -template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from) +template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from) { float32x2_t lo, hi; lo = vld1_dup_f32(from); hi = vld1_dup_f32(from+1); return vcombine_f32(lo, hi); } -template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from) +template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int32_t* from) { int32x2_t lo, hi; lo = vld1_dup_s32(from); @@ -261,11 +261,11 @@ template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from) return vcombine_s32(lo, hi); } -template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); } -template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); } +template<> EIGEN_STRONG_INLINE void pstore<float> (float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); } +template<> EIGEN_STRONG_INLINE void pstore<int32_t>(int32_t* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); } -template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); } -template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); } +template<> EIGEN_STRONG_INLINE void pstoreu<float> (float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); } +template<> EIGEN_STRONG_INLINE void pstoreu<int32_t>(int32_t* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); } template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride) { @@ -276,7 +276,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const floa res = vsetq_lane_f32(from[3*stride], res, 3); return res; } -template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride) +template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int32_t, Packet4i>(const int32_t* from, Index stride) { Packet4i res = pset1<Packet4i>(0); res = vsetq_lane_s32(from[0*stride], res, 0); @@ -293,7 +293,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, co to[stride*2] = vgetq_lane_f32(from, 2); to[stride*3] = vgetq_lane_f32(from, 3); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<int32_t, Packet4i>(int32_t* to, const Packet4i& from, Index stride) { to[stride*0] = vgetq_lane_s32(from, 0); to[stride*1] = vgetq_lane_s32(from, 1); @@ -301,12 +301,12 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const to[stride*3] = vgetq_lane_s32(from, 3); } -template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ARM_PREFETCH(addr); } -template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ARM_PREFETCH(addr); } +template<> EIGEN_STRONG_INLINE void prefetch<float> (const float* addr) { EIGEN_ARM_PREFETCH(addr); } +template<> EIGEN_STRONG_INLINE void prefetch<int32_t>(const int32_t* addr) { EIGEN_ARM_PREFETCH(addr); } // FIXME only store the 2 first elements ? -template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; } -template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; } +template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; } +template<> EIGEN_STRONG_INLINE int32_t pfirst<Packet4i>(const Packet4i& a) { int32_t EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; } template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { float32x2_t a_lo, a_hi; @@ -361,7 +361,7 @@ template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs) return sum; } -template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a) +template<> EIGEN_STRONG_INLINE int32_t predux<Packet4i>(const Packet4i& a) { int32x2_t a_lo, a_hi, sum; @@ -408,7 +408,7 @@ template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a) return vget_lane_f32(prod, 0); } -template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a) +template<> EIGEN_STRONG_INLINE int32_t predux_mul<Packet4i>(const Packet4i& a) { int32x2_t a_lo, a_hi, prod; @@ -436,7 +436,7 @@ template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a) return vget_lane_f32(min, 0); } -template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a) +template<> EIGEN_STRONG_INLINE int32_t predux_min<Packet4i>(const Packet4i& a) { int32x2_t a_lo, a_hi, min; @@ -461,7 +461,7 @@ template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a) return vget_lane_f32(max, 0); } -template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a) +template<> EIGEN_STRONG_INLINE int32_t predux_max<Packet4i>(const Packet4i& a) { int32x2_t a_lo, a_hi, max; diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h index 5cd2794a4..7122efa60 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -148,7 +148,7 @@ struct tribb_kernel ResMapper res(_res, resStride); gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, mr, nr, ConjLhs, ConjRhs> gebp_kernel; - Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer; + Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer((internal::constructor_without_unaligned_array_assert())); // let's process the block per panel of actual_mc x BlockSize, // again, each is split into three parts, etc. diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h index 3477d7182..c2f084c82 100644 --- a/Eigen/src/Core/products/Parallelizer.h +++ b/Eigen/src/Core/products/Parallelizer.h @@ -104,13 +104,14 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, // - the sizes are large enough // compute the maximal number of threads from the size of the product: - // FIXME this has to be fine tuned + // This first heuristic takes into account that the product kernel is fully optimized when working with nr columns at once. Index size = transpose ? rows : cols; - Index pb_max_threads = std::max<Index>(1,size / 32); + Index pb_max_threads = std::max<Index>(1,size / Functor::Traits::nr); + // compute the maximal number of threads from the total amount of work: double work = static_cast<double>(rows) * static_cast<double>(cols) * static_cast<double>(depth); - double kMinTaskSize = 50000; // Heuristic. + double kMinTaskSize = 50000; // FIXME improve this heuristic. pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, work / kMinTaskSize)); // compute the number of threads we are going to use diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h index 8a2f7cd78..6ec5a8a0b 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -137,7 +137,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true, ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); - Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer; + Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer((internal::constructor_without_unaligned_array_assert())); triangularBuffer.setZero(); if((Mode&ZeroDiag)==ZeroDiag) triangularBuffer.diagonal().setZero(); @@ -284,7 +284,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false, ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); - Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer; + Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer((internal::constructor_without_unaligned_array_assert())); triangularBuffer.setZero(); if((Mode&ZeroDiag)==ZeroDiag) triangularBuffer.diagonal().setZero(); diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index ab0550895..29c796647 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -23,7 +23,7 @@ /// \internal EIGEN_COMP_GNUC set to 1 for all compilers compatible with GCC #ifdef __GNUC__ - #define EIGEN_COMP_GNUC 1 + #define EIGEN_COMP_GNUC (__GNUC__*10+__GNUC_MINOR__) #else #define EIGEN_COMP_GNUC 0 #endif @@ -80,8 +80,8 @@ // 2015 14 1900 // "15" 15 1900 -/// \internal EIGEN_COMP_MSVC_STRICT set to 1 if the compiler is really Microsoft Visual C++ and not ,e.g., ICC -#if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC) +/// \internal EIGEN_COMP_MSVC_STRICT set to 1 if the compiler is really Microsoft Visual C++ and not ,e.g., ICC or clang-cl +#if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC || EIGEN_COMP_LLVM || EIGEN_COMP_CLANG) #define EIGEN_COMP_MSVC_STRICT _MSC_VER #else #define EIGEN_COMP_MSVC_STRICT 0 @@ -349,6 +349,14 @@ # define __has_feature(x) 0 #endif +// Some old compilers do not support template specializations like: +// template<typename T,int N> void foo(const T x[N]); +#if !( EIGEN_COMP_CLANG && ((EIGEN_COMP_CLANG<309) || defined(__apple_build_version__)) || EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<49) +#define EIGEN_HAS_STATIC_ARRAY_TEMPLATE 1 +#else +#define EIGEN_HAS_STATIC_ARRAY_TEMPLATE 0 +#endif + // Upperbound on the C++ version to use. // Expected values are 03, 11, 14, 17, etc. // By default, let's use an arbitrarily large C++ version. @@ -829,7 +837,7 @@ namespace Eigen { // just an empty macro ! #define EIGEN_EMPTY -#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || __CUDACC_VER__) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324) +#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || defined(__CUDACC_VER__)) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324) #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ using Base::operator =; #elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653) diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index ee0531b32..90eda6e70 100755 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -97,17 +97,22 @@ template<> struct is_arithmetic<unsigned int> { enum { value = true }; }; template<> struct is_arithmetic<signed long> { enum { value = true }; }; template<> struct is_arithmetic<unsigned long> { enum { value = true }; }; -template<typename T> struct is_integral { enum { value = false }; }; -template<> struct is_integral<bool> { enum { value = true }; }; -template<> struct is_integral<char> { enum { value = true }; }; -template<> struct is_integral<signed char> { enum { value = true }; }; -template<> struct is_integral<unsigned char> { enum { value = true }; }; -template<> struct is_integral<signed short> { enum { value = true }; }; -template<> struct is_integral<unsigned short> { enum { value = true }; }; -template<> struct is_integral<signed int> { enum { value = true }; }; -template<> struct is_integral<unsigned int> { enum { value = true }; }; -template<> struct is_integral<signed long> { enum { value = true }; }; -template<> struct is_integral<unsigned long> { enum { value = true }; }; +#if EIGEN_HAS_CXX11 +using std::is_integral; +#else +template<typename T> struct is_integral { enum { value = false }; }; +template<> struct is_integral<bool> { enum { value = true }; }; +template<> struct is_integral<char> { enum { value = true }; }; +template<> struct is_integral<signed char> { enum { value = true }; }; +template<> struct is_integral<unsigned char> { enum { value = true }; }; +template<> struct is_integral<signed short> { enum { value = true }; }; +template<> struct is_integral<unsigned short> { enum { value = true }; }; +template<> struct is_integral<signed int> { enum { value = true }; }; +template<> struct is_integral<unsigned int> { enum { value = true }; }; +template<> struct is_integral<signed long> { enum { value = true }; }; +template<> struct is_integral<unsigned long> { enum { value = true }; }; +#endif + template <typename T> struct add_const { typedef const T type; }; template <typename T> struct add_const<T&> { typedef T& type; }; |