diff options
author | Rasmus Munk Larsen <rmlarsen@google.com> | 2021-06-24 18:52:17 -0700 |
---|---|---|
committer | Rasmus Munk Larsen <rmlarsen@google.com> | 2021-06-24 18:52:17 -0700 |
commit | bffd267d176410a517a0fe9afa6dde99c213c08a (patch) | |
tree | 05812c63b3c5ebea50e5857fb0251dd13e837166 | |
parent | 52a5f9821235e5a9f7e9b3e0198d45d42a1cb267 (diff) |
Small cleanup: Get rid of the macros EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD and CJMADD, which were effectively unused, apart from on x86, where the change results in identically performing code.
-rwxr-xr-x | Eigen/src/Core/arch/AltiVec/PacketMath.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/arch/MSA/PacketMath.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/arch/NEON/PacketMath.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SVE/PacketMath.h | 4 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/ZVector/PacketMath.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralBlockPanelKernel.h | 46 |
6 files changed, 7 insertions, 59 deletions
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index a3ebf9ec2..8c42f495c 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -22,10 +22,6 @@ namespace internal { #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD #endif -#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD -#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD -#endif - // NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32 diff --git a/Eigen/src/Core/arch/MSA/PacketMath.h b/Eigen/src/Core/arch/MSA/PacketMath.h index f03cf61ff..afe8f3375 100644 --- a/Eigen/src/Core/arch/MSA/PacketMath.h +++ b/Eigen/src/Core/arch/MSA/PacketMath.h @@ -28,10 +28,6 @@ namespace internal { #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD #endif -#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD -#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD -#endif - #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32 #endif diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index e1efe9bcb..5e7702a50 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -24,10 +24,6 @@ namespace internal { #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD #endif -#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD -#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD -#endif - #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS #if EIGEN_ARCH_ARM64 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32 diff --git a/Eigen/src/Core/arch/SVE/PacketMath.h b/Eigen/src/Core/arch/SVE/PacketMath.h index 4877b6d80..9060b372f 100644 --- a/Eigen/src/Core/arch/SVE/PacketMath.h +++ b/Eigen/src/Core/arch/SVE/PacketMath.h @@ -22,10 +22,6 @@ namespace internal #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD #endif -#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD -#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD -#endif - #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32 template <typename Scalar, int SVEVectorLength> diff --git a/Eigen/src/Core/arch/ZVector/PacketMath.h b/Eigen/src/Core/arch/ZVector/PacketMath.h index 2246439cc..1f55a90a5 100755 --- a/Eigen/src/Core/arch/ZVector/PacketMath.h +++ b/Eigen/src/Core/arch/ZVector/PacketMath.h @@ -22,10 +22,6 @@ namespace internal { #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD #endif -#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD -#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD -#endif - #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32 #endif diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 8362ecc02..1116321a9 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -349,36 +349,6 @@ inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_ computeProductBlockingSizes<LhsScalar,RhsScalar,1,Index>(k, m, n, num_threads); } -#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD - #define CJMADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C); -#else - - // FIXME (a bit overkill maybe ?) - - template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector { - EIGEN_ALWAYS_INLINE static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/) - { - c = cj.pmadd(a,b,c); - } - }; - - template<typename CJ, typename T> struct gebp_madd_selector<CJ,T,T,T,T> { - EIGEN_ALWAYS_INLINE static void run(const CJ& cj, T& a, T& b, T& c, T& t) - { - t = b; t = cj.pmul(a,t); c = padd(c,t); - } - }; - - template<typename CJ, typename A, typename B, typename C, typename T> - EIGEN_STRONG_INLINE void gebp_madd(const CJ& cj, A& a, B& b, C& c, T& t) - { - gebp_madd_selector<CJ,A,B,C,T>::run(cj,a,b,c,t); - } - - #define CJMADD(CJ,A,B,C,T) gebp_madd(CJ,A,B,C,T); -// #define CJMADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = padd(C,T); -#endif - template <typename RhsPacket, typename RhsPacketx4, int registers_taken> struct RhsPanelHelper { private: @@ -2060,14 +2030,14 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga B_0 = blB[0]; B_1 = blB[1]; - CJMADD(cj,A0,B_0,C0, B_0); - CJMADD(cj,A0,B_1,C1, B_1); - + C0 = cj.pmadd(A0,B_0,C0); + C1 = cj.pmadd(A0,B_1,C1); + B_0 = blB[2]; B_1 = blB[3]; - CJMADD(cj,A0,B_0,C2, B_0); - CJMADD(cj,A0,B_1,C3, B_1); - + C2 = cj.pmadd(A0,B_0,C2); + C3 = cj.pmadd(A0,B_1,C3); + blB += 4; } res(i, j2 + 0) += alpha * C0; @@ -2092,7 +2062,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga { LhsScalar A0 = blA[k]; RhsScalar B_0 = blB[k]; - CJMADD(cj, A0, B_0, C0, B_0); + C0 = cj.pmadd(A0, B_0, C0); } res(i, j2) += alpha * C0; } @@ -2101,8 +2071,6 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga } -#undef CJMADD - // pack a block of the lhs // The traversal is as follow (mr==4): // 0 4 8 12 ... |