From ee06f786797b74e75e6c2eae1209fa6389c49876 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 4 Nov 2014 21:58:52 +0100 Subject: Introduce unified macros to identify compiler, OS, and architecture. They are all defined in util/Macros.h and prefixed with EIGEN_COMP_, EIGEN_OS_, and EIGEN_ARCH_ respectively. --- Eigen/src/Core/arch/SSE/PacketMath.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'Eigen/src/Core/arch/SSE/PacketMath.h') diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 380afe77c..28427c308 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -28,7 +28,7 @@ namespace internal { #endif #endif -#if defined EIGEN_VECTORIZE_AVX && defined __GNUC__ && !(defined __clang__ || defined __INTEL_COMPILER) +#if defined EIGEN_VECTORIZE_AVX && EIGEN_COMP_GNUC_STRICT // With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot // have overloads for both types without linking error. // One solution is to increase ABI version using -fabi-version=4 (or greater). @@ -143,7 +143,7 @@ template<> struct unpacket_traits { typedef float type; enum {size=4} template<> struct unpacket_traits { typedef double type; enum {size=2}; typedef Packet2d half; }; template<> struct unpacket_traits { typedef int type; enum {size=4}; typedef Packet4i half; }; -#if defined(_MSC_VER) && (_MSC_VER==1500) +#if EIGEN_COMP_MSVC==1500 // Workaround MSVC 9 internal compiler error. // TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode // TODO: let's check whether there does not exist a better fix, like adding a pset0() function. (it crashed on pset1(0)). @@ -161,7 +161,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1(const int& from) { re // Using inline assembly is also not an option because then gcc fails to reorder properly the instructions. // Therefore, we introduced the pload1 functions to be used in product kernels for which bug 203 does not apply. // Also note that with AVX, we want it to generate a vbroadcastss. -#if (defined __GNUC__) && (!defined __INTEL_COMPILER) && (!defined __clang__) && (!defined __AVX__) +#if EIGEN_COMP_GNUC_STRICT && (!defined __AVX__) template<> EIGEN_STRONG_INLINE Packet4f pload1(const float *from) { return vec4f_swizzle1(_mm_load_ss(from),0,0,0,0); } @@ -278,10 +278,10 @@ template<> EIGEN_STRONG_INLINE Packet4f pload(const float* from) { E template<> EIGEN_STRONG_INLINE Packet2d pload(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); } template<> EIGEN_STRONG_INLINE Packet4i pload(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast(from)); } -#if defined(_MSC_VER) +#if EIGEN_COMP_MSVC template<> EIGEN_STRONG_INLINE Packet4f ploadu(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD - #if (_MSC_VER==1600) + #if (EIGEN_COMP_MSVC==1600) // NOTE Some version of MSVC10 generates bad code when using _mm_loadu_ps // (i.e., it does not generate an unaligned load!! // TODO On most architectures this version should also be faster than a single _mm_loadu_ps @@ -303,11 +303,11 @@ template<> EIGEN_STRONG_INLINE Packet4i pload(const int* from) { E // TODO: do the same for MSVC (ICC is compatible) // NOTE: with the code below, MSVC's compiler crashes! -#if defined(__GNUC__) && (defined(__i386__) || (defined(__x86_64) && EIGEN_GNUC_AT_LEAST(4, 8))) +#if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386 || (EIGEN_ARCH_x86_64 && EIGEN_GNUC_AT_LEAST(4, 8))) // bug 195: gcc/i386 emits weird x87 fldl/fstpl instructions for _mm_load_sd #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1 #define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 1 -#elif defined(__clang__) +#elif EIGEN_COMP_CLANG // bug 201: Segfaults in __mm_loadh_pd with clang 2.8 #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1 #define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 0 @@ -435,13 +435,13 @@ template<> EIGEN_STRONG_INLINE void prefetch(const double* addr) { _mm_p template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } #endif -#if defined(_MSC_VER) && defined(_WIN64) && !defined(__INTEL_COMPILER) +#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64 // The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010 // Direct of the struct members fixed bug #62. template<> EIGEN_STRONG_INLINE float pfirst(const Packet4f& a) { return a.m128_f32[0]; } template<> EIGEN_STRONG_INLINE double pfirst(const Packet2d& a) { return a.m128d_f64[0]; } template<> EIGEN_STRONG_INLINE int pfirst(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; } -#elif defined(_MSC_VER) && !defined(__INTEL_COMPILER) +#elif EIGEN_COMP_MSVC_STRICT // The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010 template<> EIGEN_STRONG_INLINE float pfirst(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; } template<> EIGEN_STRONG_INLINE double pfirst(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; } @@ -676,7 +676,7 @@ template<> EIGEN_STRONG_INLINE int predux_max(const Packet4i& a) #endif // EIGEN_VECTORIZE_SSE4_1 } -#if (defined __GNUC__) +#if EIGEN_COMP_GNUC // template <> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) // { // Packet4f res = b; -- cgit v1.2.3 From 340b8afb14bb06788570ba22ba4ccba674402f09 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Sat, 31 Jan 2015 14:15:57 -0500 Subject: bug #936, patch 1.5/3: rename _FUSED_ macros to _SINGLE_INSTRUCTION_, because this is what they are about. "Fused" means "no intermediate rounding between the mul and the add, only one rounding at the end". Instead, what we are concerned about here is whether a temporary register is needed, i.e. whether the MUL and ADD are separate instructions. Concretely, on ARM NEON, a single-instruction mul-add is always available: VMLA. But a true fused mul-add is only available on VFPv4: VFMA. --- Eigen/src/Core/arch/AVX/PacketMath.h | 4 ++-- Eigen/src/Core/arch/AltiVec/PacketMath.h | 8 ++++---- Eigen/src/Core/arch/NEON/PacketMath.h | 8 ++++---- Eigen/src/Core/arch/SSE/PacketMath.h | 4 ++-- Eigen/src/Core/products/GeneralBlockPanelKernel.h | 12 ++++++------ 5 files changed, 18 insertions(+), 18 deletions(-) (limited to 'Eigen/src/Core/arch/SSE/PacketMath.h') diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index e2376bd1f..1d8c674a6 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -23,8 +23,8 @@ namespace internal { #endif #ifdef EIGEN_VECTORIZE_FMA -#ifndef EIGEN_HAS_FUSED_MADD -#define EIGEN_HAS_FUSED_MADD 1 +#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD +#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1 #endif #endif diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 27df5a025..578b303a0 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -18,12 +18,12 @@ namespace internal { #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4 #endif -#ifndef EIGEN_HAS_FUSED_MADD -#define EIGEN_HAS_FUSED_MADD 1 +#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD +#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1 #endif -#ifndef EIGEN_HAS_FUSED_CJMADD -#define EIGEN_HAS_FUSED_CJMADD +#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD +#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD #endif // NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16 diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 5a6eb8c1d..9cfb9c358 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -20,12 +20,12 @@ namespace internal { #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8 #endif -#ifndef EIGEN_HAS_FUSED_MADD -#define EIGEN_HAS_FUSED_MADD 1 +#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD +#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1 #endif -#ifndef EIGEN_HAS_FUSED_CJMADD -#define EIGEN_HAS_FUSED_CJMADD +#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD +#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD #endif // FIXME NEON has 16 quad registers, but since the current register allocator diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 28427c308..202aaa72f 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -23,8 +23,8 @@ namespace internal { #endif #ifdef EIGEN_VECTORIZE_FMA -#ifndef EIGEN_HAS_FUSED_MADD -#define EIGEN_HAS_FUSED_MADD 1 +#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD +#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1 #endif #endif diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index ae2fd9006..b5f06d831 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -120,7 +120,7 @@ inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n) computeProductBlockingSizes(k, m, n); } -#ifdef EIGEN_HAS_FUSED_CJMADD +#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD #define CJMADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C); #else @@ -182,7 +182,7 @@ public: nr = 4, // register block size along the M direction (currently, this one cannot be modified) -#if defined(EIGEN_HAS_FUSED_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX) +#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX) // we assume 16 registers mr = 3*LhsPacketSize, #else @@ -248,7 +248,7 @@ public: // let gcc allocate the register in which to store the result of the pmul // (in the case where there is no FMA) gcc fails to figure out how to avoid // spilling register. -#ifdef EIGEN_HAS_FUSED_MADD +#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD EIGEN_UNUSED_VARIABLE(tmp); c = pmadd(a,b,c); #else @@ -290,7 +290,7 @@ public: NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS, nr = 4, -#if defined(EIGEN_HAS_FUSED_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX) +#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX) // we assume 16 registers mr = 3*LhsPacketSize, #else @@ -353,7 +353,7 @@ public: EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const { -#ifdef EIGEN_HAS_FUSED_MADD +#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD EIGEN_UNUSED_VARIABLE(tmp); c.v = pmadd(a.v,b,c.v); #else @@ -637,7 +637,7 @@ public: EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const { -#ifdef EIGEN_HAS_FUSED_MADD +#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD EIGEN_UNUSED_VARIABLE(tmp); c.v = pmadd(a,b.v,c.v); #else -- cgit v1.2.3 From 0f216136980503c3792a90e382b4d6bbdbb870c0 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Fri, 30 Jan 2015 17:44:26 -0500 Subject: bug #936, patch 2/3: Remove EIGEN_VECTORIZE_FMA, was redundant with EIGEN_HAS_SINGLE_INSTRUCTION_MADD --- Eigen/Core | 4 +--- Eigen/src/Core/arch/AVX/PacketMath.h | 4 ++-- Eigen/src/Core/arch/AltiVec/PacketMath.h | 2 +- Eigen/src/Core/arch/NEON/PacketMath.h | 2 +- Eigen/src/Core/arch/SSE/PacketMath.h | 2 +- 5 files changed, 6 insertions(+), 8 deletions(-) (limited to 'Eigen/src/Core/arch/SSE/PacketMath.h') diff --git a/Eigen/Core b/Eigen/Core index dcb20bfd0..b5af63623 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -125,9 +125,7 @@ #define EIGEN_VECTORIZE_SSE4_1 #define EIGEN_VECTORIZE_SSE4_2 #endif - #ifdef __FMA__ - #define EIGEN_VECTORIZE_FMA - #endif + // include files // This extern "C" works around a MINGW-w64 compilation issue diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 1d8c674a6..485bac10b 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -22,9 +22,9 @@ namespace internal { #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*)) #endif -#ifdef EIGEN_VECTORIZE_FMA +#ifdef __FMA__ #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD -#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1 +#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD #endif #endif diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 578b303a0..6b68fc7a5 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -19,7 +19,7 @@ namespace internal { #endif #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD -#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1 +#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD #endif #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 9cfb9c358..71255ac85 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -21,7 +21,7 @@ namespace internal { #endif #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD -#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1 +#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD #endif #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 202aaa72f..3f6fb0254 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -22,7 +22,7 @@ namespace internal { #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*)) #endif -#ifdef EIGEN_VECTORIZE_FMA +#ifdef __FMA__ #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1 #endif -- cgit v1.2.3