aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/util
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2013-04-19 11:21:39 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2013-04-19 11:21:39 +0200
commit9cd2d14005def8e7df0b0bf5fd6eb51f8a6591e9 (patch)
treeca4df13b58e923bdebd9d5f59aecda9d1e30ca58 /Eigen/src/Core/util
parent4e2e615a7c2c719d2d708ab32840bad353322d8c (diff)
parent46755648ec341aa5e0283b47456108bb2897b1b3 (diff)
merge with default branch
Diffstat (limited to 'Eigen/src/Core/util')
-rw-r--r--Eigen/src/Core/util/Macros.h2
-rw-r--r--Eigen/src/Core/util/Memory.h35
-rw-r--r--Eigen/src/Core/util/Meta.h20
3 files changed, 42 insertions, 15 deletions
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index bf6a9293c..64348cd16 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -13,7 +13,7 @@
#define EIGEN_WORLD_VERSION 3
#define EIGEN_MAJOR_VERSION 1
-#define EIGEN_MINOR_VERSION 90
+#define EIGEN_MINOR_VERSION 91
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index b03bc3701..3ca666fd9 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -19,6 +19,10 @@
#ifndef EIGEN_MEMORY_H
#define EIGEN_MEMORY_H
+#ifndef EIGEN_MALLOC_ALREADY_ALIGNED
+
+// Try to determine automatically if malloc is already aligned.
+
// On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see:
// http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html
// This is true at least since glibc 2.8.
@@ -27,7 +31,7 @@
// page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
// quite safe, at least within the context of glibc, to equate 64-bit with LP64.
#if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
- && defined(__LP64__)
+ && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ )
#define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
#else
#define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
@@ -52,6 +56,8 @@
#define EIGEN_MALLOC_ALREADY_ALIGNED 0
#endif
+#endif
+
#if ((defined __QNXNTO__) || (defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) \
&& (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
#define EIGEN_HAS_POSIX_MEMALIGN 1
@@ -88,11 +94,11 @@ inline void throw_std_bad_alloc()
/** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
* Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
*/
-inline void* handmade_aligned_malloc(size_t size)
+inline void* handmade_aligned_malloc(std::size_t size)
{
void *original = std::malloc(size+16);
if (original == 0) return 0;
- void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
+ void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(15))) + 16);
*(reinterpret_cast<void**>(aligned) - 1) = original;
return aligned;
}
@@ -108,13 +114,18 @@ inline void handmade_aligned_free(void *ptr)
* Since we know that our handmade version is based on std::realloc
* we can use std::realloc to implement efficient reallocation.
*/
-inline void* handmade_aligned_realloc(void* ptr, size_t size, size_t = 0)
+inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)
{
if (ptr == 0) return handmade_aligned_malloc(size);
void *original = *(reinterpret_cast<void**>(ptr) - 1);
+ std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
original = std::realloc(original,size+16);
if (original == 0) return 0;
- void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
+ void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(15))) + 16);
+ void *previous_aligned = static_cast<char *>(original)+previous_offset;
+ if(aligned!=previous_aligned)
+ std::memmove(aligned, previous_aligned, size);
+
*(reinterpret_cast<void**>(aligned) - 1) = original;
return aligned;
}
@@ -123,7 +134,7 @@ inline void* handmade_aligned_realloc(void* ptr, size_t size, size_t = 0)
*** Implementation of generic aligned realloc (when no realloc can be used)***
*****************************************************************************/
-void* aligned_malloc(size_t size);
+void* aligned_malloc(std::size_t size);
void aligned_free(void *ptr);
/** \internal
@@ -227,7 +238,7 @@ inline void aligned_free(void *ptr)
std::free(ptr);
#elif EIGEN_HAS_MM_MALLOC
_mm_free(ptr);
- #elif defined(_MSC_VER)
+ #elif defined(_MSC_VER) && (!defined(_WIN32_WCE))
_aligned_free(ptr);
#else
handmade_aligned_free(ptr);
@@ -446,7 +457,6 @@ template<typename T, bool Align> inline void conditional_aligned_delete_auto(T *
template<typename Scalar, typename Index>
static inline Index first_aligned(const Scalar* array, Index size)
{
- typedef typename packet_traits<Scalar>::type Packet;
enum { PacketSize = packet_traits<Scalar>::size,
PacketAlignedMask = PacketSize-1
};
@@ -745,11 +755,16 @@ public:
# if defined(__PIC__) && defined(__i386__)
// Case for x86 with PIC
# define EIGEN_CPUID(abcd,func,id) \
- __asm__ __volatile__ ("xchgl %%ebx, %%esi;cpuid; xchgl %%ebx,%%esi": "=a" (abcd[0]), "=S" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
+ __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
+# elif defined(__PIC__) && defined(__x86_64__)
+ // Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with the default small code model.
+ // However, we cannot detect which code model is used, and the xchg overhead is negligible anyway.
+# define EIGEN_CPUID(abcd,func,id) \
+ __asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id));
# else
// Case for x86_64 or x86 w/o PIC
# define EIGEN_CPUID(abcd,func,id) \
- __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id) );
+ __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) );
# endif
# elif defined(_MSC_VER)
# if (_MSC_VER > 1500) && ( defined(_M_IX86) || defined(_M_X64) )
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index a5f31164d..71d587108 100644
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -186,23 +186,35 @@ template<int Y, int InfX, int SupX>
class meta_sqrt<Y, InfX, SupX, true> { public: enum { ret = (SupX*SupX <= Y) ? SupX : InfX }; };
/** \internal determines whether the product of two numeric types is allowed and what the return type is */
-template<typename T, typename U> struct scalar_product_traits;
+template<typename T, typename U> struct scalar_product_traits
+{
+ enum { Defined = 0 };
+};
template<typename T> struct scalar_product_traits<T,T>
{
- //enum { Cost = NumTraits<T>::MulCost };
+ enum {
+ // Cost = NumTraits<T>::MulCost,
+ Defined = 1
+ };
typedef T ReturnType;
};
template<typename T> struct scalar_product_traits<T,std::complex<T> >
{
- //enum { Cost = 2*NumTraits<T>::MulCost };
+ enum {
+ // Cost = 2*NumTraits<T>::MulCost,
+ Defined = 1
+ };
typedef std::complex<T> ReturnType;
};
template<typename T> struct scalar_product_traits<std::complex<T>, T>
{
- //enum { Cost = 2*NumTraits<T>::MulCost };
+ enum {
+ // Cost = 2*NumTraits<T>::MulCost,
+ Defined = 1
+ };
typedef std::complex<T> ReturnType;
};