diff options
author | 2015-02-12 21:48:41 +0100 | |
---|---|---|
committer | 2015-02-12 21:48:41 +0100 | |
commit | 0918c51e600bed36a53448fa276b01387119a3c2 (patch) | |
tree | 8183416a03dc22d1cc37b886e0e8f0dd0afe4e85 /Eigen/src/Core/util | |
parent | 409547a0c83604b6dea70b8523674ac19e2af958 (diff) | |
parent | 4470c9997559522e9b81810948d9783b58444ae4 (diff) |
merge Tensor module within Eigen/unsupported and update gemv BLAS wrapper
Diffstat (limited to 'Eigen/src/Core/util')
-rw-r--r-- | Eigen/src/Core/util/BlasUtil.h | 147 | ||||
-rw-r--r-- | Eigen/src/Core/util/Macros.h | 15 | ||||
-rw-r--r-- | Eigen/src/Core/util/Memory.h | 48 | ||||
-rw-r--r-- | Eigen/src/Core/util/XprHelper.h | 15 |
4 files changed, 178 insertions, 47 deletions
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index 9f9115c2a..3ec55fad2 100644 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -18,13 +18,13 @@ namespace Eigen { namespace internal { // forward declarations -template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs=false, bool ConjugateRhs=false> +template<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs=false, bool ConjugateRhs=false> struct gebp_kernel; -template<typename Scalar, typename Index, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false> +template<typename Scalar, typename Index, typename DataMapper, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false> struct gemm_pack_rhs; -template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate = false, bool PanelMode = false> +template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, int StorageOrder, bool Conjugate = false, bool PanelMode = false> struct gemm_pack_lhs; template< @@ -34,7 +34,9 @@ template< int ResStorageOrder> struct general_matrix_matrix_product; -template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version=Specialized> +template<typename Index, + typename LhsScalar, typename LhsMapper, int LhsStorageOrder, bool ConjugateLhs, + typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version=Specialized> struct general_matrix_vector_product; @@ -117,32 +119,133 @@ template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::R static EIGEN_STRONG_INLINE typename NumTraits<Scalar>::Real run(const Scalar& x) { return numext::real(x); } }; + +template<typename Scalar, typename Index> +class BlasVectorMapper { + public: + EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {} + + EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const { + return m_data[i]; + } + template <typename Packet, int AlignmentType> + EIGEN_ALWAYS_INLINE Packet load(Index i) const { + return ploadt<Packet, AlignmentType>(m_data + i); + } + + template <typename Packet> + bool aligned(Index i) const { + return (size_t(m_data+i)%sizeof(Packet))==0; + } + + protected: + Scalar* m_data; +}; + +template<typename Scalar, typename Index, int AlignmentType> +class BlasLinearMapper { + public: + typedef typename packet_traits<Scalar>::type Packet; + typedef typename packet_traits<Scalar>::half HalfPacket; + + EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {} + + EIGEN_ALWAYS_INLINE void prefetch(int i) const { + internal::prefetch(&operator()(i)); + } + + EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const { + return m_data[i]; + } + + EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const { + return ploadt<Packet, AlignmentType>(m_data + i); + } + + EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const { + return ploadt<HalfPacket, AlignmentType>(m_data + i); + } + + EIGEN_ALWAYS_INLINE void storePacket(Index i, Packet p) const { + pstoret<Scalar, Packet, AlignmentType>(m_data + i, p); + } + + protected: + Scalar *m_data; +}; + // Lightweight helper class to access matrix coefficients. -// Yes, this is somehow redundant with Map<>, but this version is much much lighter, -// and so I hope better compilation performance (time and code quality). -template<typename Scalar, typename Index, int StorageOrder> -class blas_data_mapper -{ +template<typename Scalar, typename Index, int StorageOrder, int AlignmentType = Unaligned> +class blas_data_mapper { public: - blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {} - EIGEN_STRONG_INLINE Scalar& operator()(Index i, Index j) - { return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride]; } + typedef typename packet_traits<Scalar>::type Packet; + typedef typename packet_traits<Scalar>::half HalfPacket; + + typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper; + typedef BlasVectorMapper<Scalar, Index> VectorMapper; + + EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {} + + EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType> + getSubMapper(Index i, Index j) const { + return blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>(&operator()(i, j), m_stride); + } + + EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { + return LinearMapper(&operator()(i, j)); + } + + EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const { + return VectorMapper(&operator()(i, j)); + } + + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const { + return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride]; + } + + EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const { + return ploadt<Packet, AlignmentType>(&operator()(i, j)); + } + + EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const { + return ploadt<HalfPacket, AlignmentType>(&operator()(i, j)); + } + + template<typename SubPacket> + EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, SubPacket p) const { + pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride); + } + + template<typename SubPacket> + EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const { + return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride); + } + + const Index stride() const { return m_stride; } + + Index firstAligned(Index size) const { + if (size_t(m_data)%sizeof(Scalar)) { + return -1; + } + return internal::first_aligned(m_data, size); + } + protected: - Scalar* EIGEN_RESTRICT m_data; - Index m_stride; + Scalar* EIGEN_RESTRICT m_data; + const Index m_stride; }; // lightweight helper class to access matrix coefficients (const version) template<typename Scalar, typename Index, int StorageOrder> -class const_blas_data_mapper -{ +class const_blas_data_mapper : public blas_data_mapper<const Scalar, Index, StorageOrder> { public: - const_blas_data_mapper(const Scalar* data, Index stride) : m_data(data), m_stride(stride) {} - EIGEN_STRONG_INLINE const Scalar& operator()(Index i, Index j) const - { return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride]; } - protected: - const Scalar* EIGEN_RESTRICT m_data; - Index m_stride; + EIGEN_ALWAYS_INLINE const_blas_data_mapper(const Scalar *data, Index stride) : blas_data_mapper<const Scalar, Index, StorageOrder>(data, stride) {} + + EIGEN_ALWAYS_INLINE const_blas_data_mapper<Scalar, Index, StorageOrder> getSubMapper(Index i, Index j) const { + return const_blas_data_mapper<Scalar, Index, StorageOrder>(&(this->operator()(i, j)), this->m_stride); + } }; diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 27167d3dc..07c528a2f 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -382,6 +382,17 @@ #define EIGEN_HAVE_RVALUE_REFERENCES #endif +// Does the compiler support variadic templates? +#if __cplusplus > 199711L +#define EIGEN_HAS_VARIADIC_TEMPLATES 1 +#endif + +// Does the compiler support const expressions? +#if (defined(__plusplus) && __cplusplus >= 201402L) || \ + EIGEN_GNUC_AT_LEAST(4,9) +#define EIGEN_HAS_CONSTEXPR 1 +#endif + /** Allows to disable some optimizations which might affect the accuracy of the result. * Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them. * They currently include: @@ -546,7 +557,9 @@ namespace Eigen { * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link * vectorized and non-vectorized code. */ -#if EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM +#if (defined __CUDACC__) + #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n) +#elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n))) #elif EIGEN_COMP_MSVC #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n)) diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index bacf236fb..16f8cc1b0 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -143,8 +143,8 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = *** Implementation of generic aligned realloc (when no realloc can be used)*** *****************************************************************************/ -void* aligned_malloc(std::size_t size); -void aligned_free(void *ptr); +EIGEN_DEVICE_FUNC void* aligned_malloc(std::size_t size); +EIGEN_DEVICE_FUNC void aligned_free(void *ptr); /** \internal * \brief Reallocates aligned memory. @@ -185,33 +185,33 @@ inline void* generic_aligned_realloc(void* ptr, size_t size, size_t old_size) *****************************************************************************/ #ifdef EIGEN_NO_MALLOC -inline void check_that_malloc_is_allowed() +EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() { eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)"); } #elif defined EIGEN_RUNTIME_NO_MALLOC -inline bool is_malloc_allowed_impl(bool update, bool new_value = false) +EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false) { static bool value = true; if (update == 1) value = new_value; return value; } -inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); } -inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); } -inline void check_that_malloc_is_allowed() +EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); } +EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); } +EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() { eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)"); } #else -inline void check_that_malloc_is_allowed() +EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() {} #endif /** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 or 32 bytes alignment depending on the requirements. * On allocation error, the returned pointer is null, and std::bad_alloc is thrown. */ -inline void* aligned_malloc(size_t size) +EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size) { check_that_malloc_is_allowed(); @@ -237,7 +237,7 @@ inline void* aligned_malloc(size_t size) } /** \internal Frees memory allocated with aligned_malloc. */ -inline void aligned_free(void *ptr) +EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr) { #if !EIGEN_ALIGN std::free(ptr); @@ -298,12 +298,12 @@ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size) /** \internal Allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned. * On allocation error, the returned pointer is null, and a std::bad_alloc is thrown. */ -template<bool Align> inline void* conditional_aligned_malloc(size_t size) +template<bool Align> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(size_t size) { return aligned_malloc(size); } -template<> inline void* conditional_aligned_malloc<false>(size_t size) +template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(size_t size) { check_that_malloc_is_allowed(); @@ -314,12 +314,12 @@ template<> inline void* conditional_aligned_malloc<false>(size_t size) } /** \internal Frees memory allocated with conditional_aligned_malloc */ -template<bool Align> inline void conditional_aligned_free(void *ptr) +template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void *ptr) { aligned_free(ptr); } -template<> inline void conditional_aligned_free<false>(void *ptr) +template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr) { std::free(ptr); } @@ -341,7 +341,7 @@ template<> inline void* conditional_aligned_realloc<false>(void* ptr, size_t new /** \internal Destructs the elements of an array. * The \a size parameters tells on how many objects to call the destructor of T. */ -template<typename T> inline void destruct_elements_of_array(T *ptr, size_t size) +template<typename T> EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T *ptr, size_t size) { // always destruct an array starting from the end. if(ptr) @@ -351,7 +351,7 @@ template<typename T> inline void destruct_elements_of_array(T *ptr, size_t size) /** \internal Constructs the elements of an array. * The \a size parameter tells on how many objects to call the constructor of T. */ -template<typename T> inline T* construct_elements_of_array(T *ptr, size_t size) +template<typename T> EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *ptr, size_t size) { size_t i; EIGEN_TRY @@ -371,7 +371,7 @@ template<typename T> inline T* construct_elements_of_array(T *ptr, size_t size) *****************************************************************************/ template<typename T> -EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size) +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size) { if(size > size_t(-1) / sizeof(T)) throw_std_bad_alloc(); @@ -381,7 +381,7 @@ EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size) * On allocation error, the returned pointer is undefined, but a std::bad_alloc is thrown. * The default constructor of T is called. */ -template<typename T> inline T* aligned_new(size_t size) +template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(size_t size) { check_size_for_overflow<T>(size); T *result = reinterpret_cast<T*>(aligned_malloc(sizeof(T)*size)); @@ -396,7 +396,7 @@ template<typename T> inline T* aligned_new(size_t size) } } -template<typename T, bool Align> inline T* conditional_aligned_new(size_t size) +template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(size_t size) { check_size_for_overflow<T>(size); T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size)); @@ -414,7 +414,7 @@ template<typename T, bool Align> inline T* conditional_aligned_new(size_t size) /** \internal Deletes objects constructed with aligned_new * The \a size parameters tells on how many objects to call the destructor of T. */ -template<typename T> inline void aligned_delete(T *ptr, size_t size) +template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, size_t size) { destruct_elements_of_array<T>(ptr, size); aligned_free(ptr); @@ -423,13 +423,13 @@ template<typename T> inline void aligned_delete(T *ptr, size_t size) /** \internal Deletes objects constructed with conditional_aligned_new * The \a size parameters tells on how many objects to call the destructor of T. */ -template<typename T, bool Align> inline void conditional_aligned_delete(T *ptr, size_t size) +template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T *ptr, size_t size) { destruct_elements_of_array<T>(ptr, size); conditional_aligned_free<Align>(ptr); } -template<typename T, bool Align> inline T* conditional_aligned_realloc_new(T* pts, size_t new_size, size_t old_size) +template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, size_t new_size, size_t old_size) { check_size_for_overflow<T>(new_size); check_size_for_overflow<T>(old_size); @@ -452,7 +452,7 @@ template<typename T, bool Align> inline T* conditional_aligned_realloc_new(T* pt } -template<typename T, bool Align> inline T* conditional_aligned_new_auto(size_t size) +template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(size_t size) { if(size==0) return 0; // short-cut. Also fixes Bug 884 @@ -495,7 +495,7 @@ template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto( return result; } -template<typename T, bool Align> inline void conditional_aligned_delete_auto(T *ptr, size_t size) +template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, size_t size) { if(NumTraits<T>::RequireInitialization) destruct_elements_of_array<T>(ptr, size); diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 09866ad8d..3ac37df58 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -446,6 +446,21 @@ template<typename XprType, typename CastType> struct cast_return_type const XprType&,CastType>::type type; }; +template <typename A, typename B> struct promote_storage_type; + +template <typename A> struct promote_storage_type<A,A> +{ + typedef A ret; +}; +template <typename A> struct promote_storage_type<A, const A> +{ + typedef A ret; +}; +template <typename A> struct promote_storage_type<const A, A> +{ + typedef A ret; +}; + /** \internal Specify the "storage kind" of applying a coefficient-wise * binary operations between two expressions of kinds A and B respectively. * The template parameter Functor permits to specialize the resulting storage kind wrt to |