aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Benoit Jacob <benoitjacob@google.com>2015-02-27 15:30:10 -0500
committerGravatar Benoit Jacob <benoitjacob@google.com>2015-02-27 15:30:10 -0500
commit6466fa63be51b32c493401bb7cf6aa4b2e1ef385 (patch)
tree6aebc98f506ccaf5a6220df703a85817baba5957
parentbf9877a92a4f049d5cbfda1817bac21167b5a324 (diff)
Reimplement the selection between rotating and non-rotating kernels
using templates instead of macros and if()'s. That was needed to fix the build of unit tests on ARM, which I had broken. My bad for not testing earlier.
-rw-r--r--Eigen/src/Core/GenericPacketMath.h7
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h157
2 files changed, 96 insertions, 68 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index afdd6f71e..e48c064ce 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -290,11 +290,8 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet&
template<size_t offset, typename Packet>
struct protate_impl
{
- static Packet run(const Packet& a)
- {
- eigen_assert(false && "unimplemented");
- return a;
- }
+ // Empty so attempts to use this unimplemented path will fail to compile.
+ // Only specializations of this template should be used.
};
/** \internal \returns a packet with the coefficients rotated to the right in little-endian convention,
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 06c7f362e..9061fd936 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -800,6 +800,80 @@ protected:
conj_helper<ResPacket,ResPacket,false,ConjRhs> cj;
};
+// helper for the rotating kernel below
+template <typename GebpKernel, bool UseRotatingKernel = GebpKernel::UseRotatingKernel>
+struct PossiblyRotatingKernelHelper
+{
+ // default implementation, not rotating
+
+ typedef typename GebpKernel::Traits Traits;
+ typedef typename Traits::RhsScalar RhsScalar;
+ typedef typename Traits::RhsPacket RhsPacket;
+ typedef typename Traits::AccPacket AccPacket;
+
+ const Traits& traits;
+ PossiblyRotatingKernelHelper(const Traits& t) : traits(t) {}
+
+
+ template <size_t K, size_t Index>
+ void loadOrRotateRhs(RhsPacket& to, const RhsScalar* from) const
+ {
+ traits.loadRhs(from + (Index+4*K)*Traits::RhsProgress, to);
+ }
+
+ void unrotateResult(AccPacket&,
+ AccPacket&,
+ AccPacket&,
+ AccPacket&)
+ {
+ }
+};
+
+// rotating implementation
+template <typename GebpKernel>
+struct PossiblyRotatingKernelHelper<GebpKernel, true>
+{
+ typedef typename GebpKernel::Traits Traits;
+ typedef typename Traits::RhsScalar RhsScalar;
+ typedef typename Traits::RhsPacket RhsPacket;
+ typedef typename Traits::AccPacket AccPacket;
+
+ const Traits& traits;
+ PossiblyRotatingKernelHelper(const Traits& t) : traits(t) {}
+
+ template <size_t K, size_t Index>
+ void loadOrRotateRhs(RhsPacket& to, const RhsScalar* from) const
+ {
+ if (Index == 0) {
+ to = pload<RhsPacket>(from + 4*K*Traits::RhsProgress);
+ } else {
+ EIGEN_ASM_COMMENT("Do not reorder code, we're very tight on registers");
+ to = protate<1>(to);
+ }
+ }
+
+ void unrotateResult(AccPacket& res0,
+ AccPacket& res1,
+ AccPacket& res2,
+ AccPacket& res3)
+ {
+ PacketBlock<AccPacket> resblock;
+ resblock.packet[0] = res0;
+ resblock.packet[1] = res1;
+ resblock.packet[2] = res2;
+ resblock.packet[3] = res3;
+ ptranspose(resblock);
+ resblock.packet[3] = protate<1>(resblock.packet[3]);
+ resblock.packet[2] = protate<2>(resblock.packet[2]);
+ resblock.packet[1] = protate<3>(resblock.packet[1]);
+ ptranspose(resblock);
+ res0 = resblock.packet[0];
+ res1 = resblock.packet[1];
+ res2 = resblock.packet[2];
+ res3 = resblock.packet[3];
+ }
+};
+
/* optimized GEneral packed Block * packed Panel product kernel
*
* Mixing type logic: C += A * B
@@ -833,6 +907,16 @@ struct gebp_kernel
ResPacketSize = Traits::ResPacketSize
};
+
+ static const bool UseRotatingKernel =
+ EIGEN_ARCH_ARM &&
+ internal::is_same<LhsScalar, float>::value &&
+ internal::is_same<RhsScalar, float>::value &&
+ internal::is_same<ResScalar, float>::value &&
+ Traits::LhsPacketSize == 4 &&
+ Traits::RhsPacketSize == 4 &&
+ Traits::ResPacketSize == 4;
+
EIGEN_DONT_INLINE
void operator()(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB,
Index rows, Index depth, Index cols, ResScalar alpha,
@@ -866,6 +950,8 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
// Usually, make sense only with FMA
if(mr>=3*Traits::LhsProgress)
{
+ PossiblyRotatingKernelHelper<gebp_kernel> possiblyRotatingKernelHelper(traits);
+
// loops on each largest micro horizontal panel of lhs (3*Traits::LhsProgress x depth)
for(Index i=0; i<peeled_mc3; i+=3*Traits::LhsProgress)
{
@@ -901,43 +987,12 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
prefetch(&blB[0]);
LhsPacket A0, A1;
-#define EIGEN_ARCH_PREFERS_ROTATING_KERNEL EIGEN_ARCH_ARM
-
-#if EIGEN_ARCH_PREFERS_ROTATING_KERNEL
- static const bool UseRotatingKernel =
- Traits::LhsPacketSize == 4 &&
- Traits::RhsPacketSize == 4 &&
- Traits::ResPacketSize == 4;
-#endif
-
for(Index k=0; k<peeled_kc; k+=pk)
{
EIGEN_ASM_COMMENT("begin gebp micro kernel 3pX4");
RhsPacket B_0, T0;
LhsPacket A2;
-#define EIGEN_GEBP_ONESTEP_LOADRHS_NONROTATING(K,N) \
- traits.loadRhs(&blB[(N+4*K)*RhsProgress], B_0);
-
-#if EIGEN_ARCH_PREFERS_ROTATING_KERNEL
-#define EIGEN_GEBP_ONESTEP_LOADRHS(K,N) \
- do { \
- if (UseRotatingKernel) { \
- if (N == 0) { \
- B_0 = pload<RhsPacket>(&blB[(0+4*K)*RhsProgress]); \
- } else { \
- EIGEN_ASM_COMMENT("Do not reorder code, we're very tight on registers"); \
- B_0 = protate<1>(B_0); \
- } \
- } else { \
- EIGEN_GEBP_ONESTEP_LOADRHS_NONROTATING(K,N); \
- } \
- } while (false)
-#else
-#define EIGEN_GEBP_ONESTEP_LOADRHS(K,N) \
- EIGEN_GEBP_ONESTEP_LOADRHS_NONROTATING(K,N)
-#endif
-
#define EIGEN_GEBP_ONESTEP(K) \
do { \
EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \
@@ -947,19 +1002,19 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
- EIGEN_GEBP_ONESTEP_LOADRHS(K, 0); \
+ possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 0>(B_0, blB); \
traits.madd(A0, B_0, C0, T0); \
traits.madd(A1, B_0, C4, T0); \
traits.madd(A2, B_0, C8, B_0); \
- EIGEN_GEBP_ONESTEP_LOADRHS(K, 1); \
+ possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 1>(B_0, blB); \
traits.madd(A0, B_0, C1, T0); \
traits.madd(A1, B_0, C5, T0); \
traits.madd(A2, B_0, C9, B_0); \
- EIGEN_GEBP_ONESTEP_LOADRHS(K, 2); \
+ possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 2>(B_0, blB); \
traits.madd(A0, B_0, C2, T0); \
traits.madd(A1, B_0, C6, T0); \
traits.madd(A2, B_0, C10, B_0); \
- EIGEN_GEBP_ONESTEP_LOADRHS(K, 3); \
+ possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 3>(B_0, blB); \
traits.madd(A0, B_0, C3 , T0); \
traits.madd(A1, B_0, C7, T0); \
traits.madd(A2, B_0, C11, B_0); \
@@ -992,34 +1047,10 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
}
#undef EIGEN_GEBP_ONESTEP
-#undef EIGEN_GEBP_ONESTEP_LOADRHS
-#undef EIGEN_GEBP_ONESTEP_LOADRHS_NONROTATING
-
-#if EIGEN_ARCH_PREFERS_ROTATING_KERNEL
- if (UseRotatingKernel) {
- #define EIGEN_GEBP_UNROTATE_RESULT(res0, res1, res2, res3) \
- do { \
- PacketBlock<ResPacket> resblock; \
- resblock.packet[0] = res0; \
- resblock.packet[1] = res1; \
- resblock.packet[2] = res2; \
- resblock.packet[3] = res3; \
- ptranspose(resblock); \
- resblock.packet[3] = protate<1>(resblock.packet[3]); \
- resblock.packet[2] = protate<2>(resblock.packet[2]); \
- resblock.packet[1] = protate<3>(resblock.packet[1]); \
- ptranspose(resblock); \
- res0 = resblock.packet[0]; \
- res1 = resblock.packet[1]; \
- res2 = resblock.packet[2]; \
- res3 = resblock.packet[3]; \
- } while (false)
-
- EIGEN_GEBP_UNROTATE_RESULT(C0, C1, C2, C3);
- EIGEN_GEBP_UNROTATE_RESULT(C4, C5, C6, C7);
- EIGEN_GEBP_UNROTATE_RESULT(C8, C9, C10, C11);
- }
-#endif
+
+ possiblyRotatingKernelHelper.unrotateResult(C0, C1, C2, C3);
+ possiblyRotatingKernelHelper.unrotateResult(C4, C5, C6, C7);
+ possiblyRotatingKernelHelper.unrotateResult(C8, C9, C10, C11);
ResPacket R0, R1, R2;
ResPacket alphav = pset1<ResPacket>(alpha);