aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/util
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2015-02-12 21:48:41 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2015-02-12 21:48:41 +0100
commit0918c51e600bed36a53448fa276b01387119a3c2 (patch)
tree8183416a03dc22d1cc37b886e0e8f0dd0afe4e85 /Eigen/src/Core/util
parent409547a0c83604b6dea70b8523674ac19e2af958 (diff)
parent4470c9997559522e9b81810948d9783b58444ae4 (diff)
merge Tensor module within Eigen/unsupported and update gemv BLAS wrapper
Diffstat (limited to 'Eigen/src/Core/util')
-rw-r--r--Eigen/src/Core/util/BlasUtil.h147
-rw-r--r--Eigen/src/Core/util/Macros.h15
-rw-r--r--Eigen/src/Core/util/Memory.h48
-rw-r--r--Eigen/src/Core/util/XprHelper.h15
4 files changed, 178 insertions, 47 deletions
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
index 9f9115c2a..3ec55fad2 100644
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -18,13 +18,13 @@ namespace Eigen {
namespace internal {
// forward declarations
-template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs=false, bool ConjugateRhs=false>
+template<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs=false, bool ConjugateRhs=false>
struct gebp_kernel;
-template<typename Scalar, typename Index, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false>
+template<typename Scalar, typename Index, typename DataMapper, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false>
struct gemm_pack_rhs;
-template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate = false, bool PanelMode = false>
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, int StorageOrder, bool Conjugate = false, bool PanelMode = false>
struct gemm_pack_lhs;
template<
@@ -34,7 +34,9 @@ template<
int ResStorageOrder>
struct general_matrix_matrix_product;
-template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version=Specialized>
+template<typename Index,
+ typename LhsScalar, typename LhsMapper, int LhsStorageOrder, bool ConjugateLhs,
+ typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version=Specialized>
struct general_matrix_vector_product;
@@ -117,32 +119,133 @@ template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::R
static EIGEN_STRONG_INLINE typename NumTraits<Scalar>::Real run(const Scalar& x) { return numext::real(x); }
};
+
+template<typename Scalar, typename Index>
+class BlasVectorMapper {
+ public:
+ EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {}
+
+ EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const {
+ return m_data[i];
+ }
+ template <typename Packet, int AlignmentType>
+ EIGEN_ALWAYS_INLINE Packet load(Index i) const {
+ return ploadt<Packet, AlignmentType>(m_data + i);
+ }
+
+ template <typename Packet>
+ bool aligned(Index i) const {
+ return (size_t(m_data+i)%sizeof(Packet))==0;
+ }
+
+ protected:
+ Scalar* m_data;
+};
+
+template<typename Scalar, typename Index, int AlignmentType>
+class BlasLinearMapper {
+ public:
+ typedef typename packet_traits<Scalar>::type Packet;
+ typedef typename packet_traits<Scalar>::half HalfPacket;
+
+ EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {}
+
+ EIGEN_ALWAYS_INLINE void prefetch(int i) const {
+ internal::prefetch(&operator()(i));
+ }
+
+ EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const {
+ return m_data[i];
+ }
+
+ EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
+ return ploadt<Packet, AlignmentType>(m_data + i);
+ }
+
+ EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const {
+ return ploadt<HalfPacket, AlignmentType>(m_data + i);
+ }
+
+ EIGEN_ALWAYS_INLINE void storePacket(Index i, Packet p) const {
+ pstoret<Scalar, Packet, AlignmentType>(m_data + i, p);
+ }
+
+ protected:
+ Scalar *m_data;
+};
+
// Lightweight helper class to access matrix coefficients.
-// Yes, this is somehow redundant with Map<>, but this version is much much lighter,
-// and so I hope better compilation performance (time and code quality).
-template<typename Scalar, typename Index, int StorageOrder>
-class blas_data_mapper
-{
+template<typename Scalar, typename Index, int StorageOrder, int AlignmentType = Unaligned>
+class blas_data_mapper {
public:
- blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {}
- EIGEN_STRONG_INLINE Scalar& operator()(Index i, Index j)
- { return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride]; }
+ typedef typename packet_traits<Scalar>::type Packet;
+ typedef typename packet_traits<Scalar>::half HalfPacket;
+
+ typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper;
+ typedef BlasVectorMapper<Scalar, Index> VectorMapper;
+
+ EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {}
+
+ EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>
+ getSubMapper(Index i, Index j) const {
+ return blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>(&operator()(i, j), m_stride);
+ }
+
+ EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
+ return LinearMapper(&operator()(i, j));
+ }
+
+ EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
+ return VectorMapper(&operator()(i, j));
+ }
+
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const {
+ return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride];
+ }
+
+ EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
+ return ploadt<Packet, AlignmentType>(&operator()(i, j));
+ }
+
+ EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
+ return ploadt<HalfPacket, AlignmentType>(&operator()(i, j));
+ }
+
+ template<typename SubPacket>
+ EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, SubPacket p) const {
+ pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
+ }
+
+ template<typename SubPacket>
+ EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {
+ return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);
+ }
+
+ const Index stride() const { return m_stride; }
+
+ Index firstAligned(Index size) const {
+ if (size_t(m_data)%sizeof(Scalar)) {
+ return -1;
+ }
+ return internal::first_aligned(m_data, size);
+ }
+
protected:
- Scalar* EIGEN_RESTRICT m_data;
- Index m_stride;
+ Scalar* EIGEN_RESTRICT m_data;
+ const Index m_stride;
};
// lightweight helper class to access matrix coefficients (const version)
template<typename Scalar, typename Index, int StorageOrder>
-class const_blas_data_mapper
-{
+class const_blas_data_mapper : public blas_data_mapper<const Scalar, Index, StorageOrder> {
public:
- const_blas_data_mapper(const Scalar* data, Index stride) : m_data(data), m_stride(stride) {}
- EIGEN_STRONG_INLINE const Scalar& operator()(Index i, Index j) const
- { return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride]; }
- protected:
- const Scalar* EIGEN_RESTRICT m_data;
- Index m_stride;
+ EIGEN_ALWAYS_INLINE const_blas_data_mapper(const Scalar *data, Index stride) : blas_data_mapper<const Scalar, Index, StorageOrder>(data, stride) {}
+
+ EIGEN_ALWAYS_INLINE const_blas_data_mapper<Scalar, Index, StorageOrder> getSubMapper(Index i, Index j) const {
+ return const_blas_data_mapper<Scalar, Index, StorageOrder>(&(this->operator()(i, j)), this->m_stride);
+ }
};
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 27167d3dc..07c528a2f 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -382,6 +382,17 @@
#define EIGEN_HAVE_RVALUE_REFERENCES
#endif
+// Does the compiler support variadic templates?
+#if __cplusplus > 199711L
+#define EIGEN_HAS_VARIADIC_TEMPLATES 1
+#endif
+
+// Does the compiler support const expressions?
+#if (defined(__plusplus) && __cplusplus >= 201402L) || \
+ EIGEN_GNUC_AT_LEAST(4,9)
+#define EIGEN_HAS_CONSTEXPR 1
+#endif
+
/** Allows to disable some optimizations which might affect the accuracy of the result.
* Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them.
* They currently include:
@@ -546,7 +557,9 @@ namespace Eigen {
* If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
* vectorized and non-vectorized code.
*/
-#if EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM
+#if (defined __CUDACC__)
+ #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
+#elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM
#define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
#elif EIGEN_COMP_MSVC
#define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index bacf236fb..16f8cc1b0 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -143,8 +143,8 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t =
*** Implementation of generic aligned realloc (when no realloc can be used)***
*****************************************************************************/
-void* aligned_malloc(std::size_t size);
-void aligned_free(void *ptr);
+EIGEN_DEVICE_FUNC void* aligned_malloc(std::size_t size);
+EIGEN_DEVICE_FUNC void aligned_free(void *ptr);
/** \internal
* \brief Reallocates aligned memory.
@@ -185,33 +185,33 @@ inline void* generic_aligned_realloc(void* ptr, size_t size, size_t old_size)
*****************************************************************************/
#ifdef EIGEN_NO_MALLOC
-inline void check_that_malloc_is_allowed()
+EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
{
eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
}
#elif defined EIGEN_RUNTIME_NO_MALLOC
-inline bool is_malloc_allowed_impl(bool update, bool new_value = false)
+EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false)
{
static bool value = true;
if (update == 1)
value = new_value;
return value;
}
-inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); }
-inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); }
-inline void check_that_malloc_is_allowed()
+EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); }
+EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); }
+EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
{
eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
}
#else
-inline void check_that_malloc_is_allowed()
+EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
{}
#endif
/** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 or 32 bytes alignment depending on the requirements.
* On allocation error, the returned pointer is null, and std::bad_alloc is thrown.
*/
-inline void* aligned_malloc(size_t size)
+EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size)
{
check_that_malloc_is_allowed();
@@ -237,7 +237,7 @@ inline void* aligned_malloc(size_t size)
}
/** \internal Frees memory allocated with aligned_malloc. */
-inline void aligned_free(void *ptr)
+EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
{
#if !EIGEN_ALIGN
std::free(ptr);
@@ -298,12 +298,12 @@ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
/** \internal Allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned.
* On allocation error, the returned pointer is null, and a std::bad_alloc is thrown.
*/
-template<bool Align> inline void* conditional_aligned_malloc(size_t size)
+template<bool Align> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(size_t size)
{
return aligned_malloc(size);
}
-template<> inline void* conditional_aligned_malloc<false>(size_t size)
+template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(size_t size)
{
check_that_malloc_is_allowed();
@@ -314,12 +314,12 @@ template<> inline void* conditional_aligned_malloc<false>(size_t size)
}
/** \internal Frees memory allocated with conditional_aligned_malloc */
-template<bool Align> inline void conditional_aligned_free(void *ptr)
+template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void *ptr)
{
aligned_free(ptr);
}
-template<> inline void conditional_aligned_free<false>(void *ptr)
+template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr)
{
std::free(ptr);
}
@@ -341,7 +341,7 @@ template<> inline void* conditional_aligned_realloc<false>(void* ptr, size_t new
/** \internal Destructs the elements of an array.
* The \a size parameters tells on how many objects to call the destructor of T.
*/
-template<typename T> inline void destruct_elements_of_array(T *ptr, size_t size)
+template<typename T> EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T *ptr, size_t size)
{
// always destruct an array starting from the end.
if(ptr)
@@ -351,7 +351,7 @@ template<typename T> inline void destruct_elements_of_array(T *ptr, size_t size)
/** \internal Constructs the elements of an array.
* The \a size parameter tells on how many objects to call the constructor of T.
*/
-template<typename T> inline T* construct_elements_of_array(T *ptr, size_t size)
+template<typename T> EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *ptr, size_t size)
{
size_t i;
EIGEN_TRY
@@ -371,7 +371,7 @@ template<typename T> inline T* construct_elements_of_array(T *ptr, size_t size)
*****************************************************************************/
template<typename T>
-EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size)
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size)
{
if(size > size_t(-1) / sizeof(T))
throw_std_bad_alloc();
@@ -381,7 +381,7 @@ EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size)
* On allocation error, the returned pointer is undefined, but a std::bad_alloc is thrown.
* The default constructor of T is called.
*/
-template<typename T> inline T* aligned_new(size_t size)
+template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(size_t size)
{
check_size_for_overflow<T>(size);
T *result = reinterpret_cast<T*>(aligned_malloc(sizeof(T)*size));
@@ -396,7 +396,7 @@ template<typename T> inline T* aligned_new(size_t size)
}
}
-template<typename T, bool Align> inline T* conditional_aligned_new(size_t size)
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(size_t size)
{
check_size_for_overflow<T>(size);
T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
@@ -414,7 +414,7 @@ template<typename T, bool Align> inline T* conditional_aligned_new(size_t size)
/** \internal Deletes objects constructed with aligned_new
* The \a size parameters tells on how many objects to call the destructor of T.
*/
-template<typename T> inline void aligned_delete(T *ptr, size_t size)
+template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, size_t size)
{
destruct_elements_of_array<T>(ptr, size);
aligned_free(ptr);
@@ -423,13 +423,13 @@ template<typename T> inline void aligned_delete(T *ptr, size_t size)
/** \internal Deletes objects constructed with conditional_aligned_new
* The \a size parameters tells on how many objects to call the destructor of T.
*/
-template<typename T, bool Align> inline void conditional_aligned_delete(T *ptr, size_t size)
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T *ptr, size_t size)
{
destruct_elements_of_array<T>(ptr, size);
conditional_aligned_free<Align>(ptr);
}
-template<typename T, bool Align> inline T* conditional_aligned_realloc_new(T* pts, size_t new_size, size_t old_size)
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, size_t new_size, size_t old_size)
{
check_size_for_overflow<T>(new_size);
check_size_for_overflow<T>(old_size);
@@ -452,7 +452,7 @@ template<typename T, bool Align> inline T* conditional_aligned_realloc_new(T* pt
}
-template<typename T, bool Align> inline T* conditional_aligned_new_auto(size_t size)
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(size_t size)
{
if(size==0)
return 0; // short-cut. Also fixes Bug 884
@@ -495,7 +495,7 @@ template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(
return result;
}
-template<typename T, bool Align> inline void conditional_aligned_delete_auto(T *ptr, size_t size)
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, size_t size)
{
if(NumTraits<T>::RequireInitialization)
destruct_elements_of_array<T>(ptr, size);
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 09866ad8d..3ac37df58 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -446,6 +446,21 @@ template<typename XprType, typename CastType> struct cast_return_type
const XprType&,CastType>::type type;
};
+template <typename A, typename B> struct promote_storage_type;
+
+template <typename A> struct promote_storage_type<A,A>
+{
+ typedef A ret;
+};
+template <typename A> struct promote_storage_type<A, const A>
+{
+ typedef A ret;
+};
+template <typename A> struct promote_storage_type<const A, A>
+{
+ typedef A ret;
+};
+
/** \internal Specify the "storage kind" of applying a coefficient-wise
* binary operations between two expressions of kinds A and B respectively.
* The template parameter Functor permits to specialize the resulting storage kind wrt to