diff options
Diffstat (limited to 'Eigen/src/Core/arch')
-rw-r--r-- | Eigen/src/Core/arch/AVX/PacketMath.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/arch/NEON/Complex.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/arch/NEON/PacketMath.h | 18 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/SSE/PacketMath.h | 20 |
4 files changed, 23 insertions, 23 deletions
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 01730c5ee..e2376bd1f 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -135,7 +135,7 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, co #ifdef EIGEN_VECTORIZE_FMA template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) { -#if defined(__clang__) || defined(__GNUC__) +#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG // clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers, // and gcc stupidly generates a vfmadd132ps instruction, // so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate @@ -148,7 +148,7 @@ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& #endif } template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) { -#if defined(__clang__) || defined(__GNUC__) +#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG // see above Packet4d res = c; __asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b)); diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index 57de400e5..0fdcb0741 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -272,7 +272,7 @@ ptranspose(PacketBlock<Packet2cf,2>& kernel) { } //---------- double ---------- -#ifdef __aarch64__ +#if EIGEN_ARCH_ARM64 static uint64x2_t p2ul_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x0, 0x8000000000000000); @@ -457,7 +457,7 @@ EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel) kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v)); kernel.packet[1].v = tmp; } -#endif // __aarch64__ +#endif // EIGEN_ARCH_ARM64 } // end namespace internal diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 472f7c0fe..6c5c669a1 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -49,7 +49,7 @@ typedef uint32x4_t Packet4ui; #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ const Packet4i p4i_##NAME = pset1<Packet4i>(X) -#if defined(__llvm__) && !defined(__clang__) +#if EIGEN_COMP_LLVM && !EIGEN_COMP_CLANG //Special treatment for Apple's llvm-gcc, its NEON packet types are unions #define EIGEN_INIT_NEON_PACKET2(X, Y) {{X, Y}} #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {{X, Y, Z, W}} @@ -62,11 +62,11 @@ typedef uint32x4_t Packet4ui; // arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function // which available on LLVM and GCC (at least) -#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || defined(__GNUC__) +#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR); #elif defined __pld #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR) -#elif !defined(__aarch64__) +#elif !EIGEN_ARCH_ARM64 #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ( " pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" ); #else // by default no explicit prefetching @@ -105,7 +105,7 @@ template<> struct packet_traits<int> : default_packet_traits }; }; -#if EIGEN_GNUC_AT_MOST(4,4) && !defined(__llvm__) +#if EIGEN_GNUC_AT_MOST(4,4) && !EIGEN_COMP_LLVM // workaround gcc 4.2, 4.3 and 4.4 compilatin issue EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); } EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); } @@ -148,7 +148,9 @@ template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { -#ifndef __aarch64__ +#if EIGEN_ARCH_ARM64 + return vdivq_f32(a,b); +#else Packet4f inv, restep, div; // NEON does not offer a divide instruction, we have to do a reciprocal approximation @@ -167,8 +169,6 @@ template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const div = vmulq_f32(a, inv); return div; -#else - return vdivq_f32(a,b); #endif } @@ -490,7 +490,7 @@ ptranspose(PacketBlock<Packet4i,4>& kernel) { } //---------- double ---------- -#ifdef __aarch64__ +#if EIGEN_ARCH_ARM64 typedef float64x2_t Packet2d; typedef float64x1_t Packet1d; @@ -646,7 +646,7 @@ ptranspose(PacketBlock<Packet2d,2>& kernel) { kernel.packet[0] = trn1; kernel.packet[1] = trn2; } -#endif // __aarch64__ +#endif // EIGEN_ARCH_ARM64 } // end namespace internal diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 380afe77c..28427c308 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -28,7 +28,7 @@ namespace internal { #endif #endif -#if defined EIGEN_VECTORIZE_AVX && defined __GNUC__ && !(defined __clang__ || defined __INTEL_COMPILER) +#if defined EIGEN_VECTORIZE_AVX && EIGEN_COMP_GNUC_STRICT // With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot // have overloads for both types without linking error. // One solution is to increase ABI version using -fabi-version=4 (or greater). @@ -143,7 +143,7 @@ template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4} template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; typedef Packet2d half; }; template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; }; -#if defined(_MSC_VER) && (_MSC_VER==1500) +#if EIGEN_COMP_MSVC==1500 // Workaround MSVC 9 internal compiler error. // TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode // TODO: let's check whether there does not exist a better fix, like adding a pset0() function. (it crashed on pset1(0)). @@ -161,7 +161,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { re // Using inline assembly is also not an option because then gcc fails to reorder properly the instructions. // Therefore, we introduced the pload1 functions to be used in product kernels for which bug 203 does not apply. // Also note that with AVX, we want it to generate a vbroadcastss. -#if (defined __GNUC__) && (!defined __INTEL_COMPILER) && (!defined __clang__) && (!defined __AVX__) +#if EIGEN_COMP_GNUC_STRICT && (!defined __AVX__) template<> EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float *from) { return vec4f_swizzle1(_mm_load_ss(from),0,0,0,0); } @@ -278,10 +278,10 @@ template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { E template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); } template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); } -#if defined(_MSC_VER) +#if EIGEN_COMP_MSVC template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD - #if (_MSC_VER==1600) + #if (EIGEN_COMP_MSVC==1600) // NOTE Some version of MSVC10 generates bad code when using _mm_loadu_ps // (i.e., it does not generate an unaligned load!! // TODO On most architectures this version should also be faster than a single _mm_loadu_ps @@ -303,11 +303,11 @@ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { E // TODO: do the same for MSVC (ICC is compatible) // NOTE: with the code below, MSVC's compiler crashes! -#if defined(__GNUC__) && (defined(__i386__) || (defined(__x86_64) && EIGEN_GNUC_AT_LEAST(4, 8))) +#if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386 || (EIGEN_ARCH_x86_64 && EIGEN_GNUC_AT_LEAST(4, 8))) // bug 195: gcc/i386 emits weird x87 fldl/fstpl instructions for _mm_load_sd #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1 #define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 1 -#elif defined(__clang__) +#elif EIGEN_COMP_CLANG // bug 201: Segfaults in __mm_loadh_pd with clang 2.8 #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1 #define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 0 @@ -435,13 +435,13 @@ template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_p template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } #endif -#if defined(_MSC_VER) && defined(_WIN64) && !defined(__INTEL_COMPILER) +#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64 // The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010 // Direct of the struct members fixed bug #62. template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return a.m128_f32[0]; } template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return a.m128d_f64[0]; } template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; } -#elif defined(_MSC_VER) && !defined(__INTEL_COMPILER) +#elif EIGEN_COMP_MSVC_STRICT // The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010 template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; } template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; } @@ -676,7 +676,7 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a) #endif // EIGEN_VECTORIZE_SSE4_1 } -#if (defined __GNUC__) +#if EIGEN_COMP_GNUC // template <> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) // { // Packet4f res = b; |