aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src
diff options
context:
space:
mode:
authorGravatar Benoit Jacob <benoitjacob@google.com>2016-05-24 10:00:32 -0400
committerGravatar Benoit Jacob <benoitjacob@google.com>2016-05-24 10:00:32 -0400
commit6136f4fdd432dfff1c374348da8f76b9c93ac8ab (patch)
tree82626c4d6f578bc19b35e1ac3f6e492c0c1e0441 /Eigen/src
parente617711306745e9c5e3fe8a060373867102ecc4d (diff)
Remove the rotating kernel. It was only useful on some ARM CPUs (Qualcomm Krait) that are not as ubiquitous today as they were when I introduced it.
Diffstat (limited to 'Eigen/src')
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h100
1 files changed, 5 insertions, 95 deletions
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index e43529cc7..253c03462 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -860,80 +860,6 @@ protected:
conj_helper<ResPacket,ResPacket,false,ConjRhs> cj;
};
-// helper for the rotating kernel below
-template <typename GebpKernel, bool UseRotatingKernel = GebpKernel::UseRotatingKernel>
-struct PossiblyRotatingKernelHelper
-{
- // default implementation, not rotating
-
- typedef typename GebpKernel::Traits Traits;
- typedef typename Traits::RhsScalar RhsScalar;
- typedef typename Traits::RhsPacket RhsPacket;
- typedef typename Traits::AccPacket AccPacket;
-
- const Traits& traits;
- PossiblyRotatingKernelHelper(const Traits& t) : traits(t) {}
-
-
- template <size_t K, size_t Index>
- void loadOrRotateRhs(RhsPacket& to, const RhsScalar* from) const
- {
- traits.loadRhs(from + (Index+4*K)*Traits::RhsProgress, to);
- }
-
- void unrotateResult(AccPacket&,
- AccPacket&,
- AccPacket&,
- AccPacket&)
- {
- }
-};
-
-// rotating implementation
-template <typename GebpKernel>
-struct PossiblyRotatingKernelHelper<GebpKernel, true>
-{
- typedef typename GebpKernel::Traits Traits;
- typedef typename Traits::RhsScalar RhsScalar;
- typedef typename Traits::RhsPacket RhsPacket;
- typedef typename Traits::AccPacket AccPacket;
-
- const Traits& traits;
- PossiblyRotatingKernelHelper(const Traits& t) : traits(t) {}
-
- template <size_t K, size_t Index>
- void loadOrRotateRhs(RhsPacket& to, const RhsScalar* from) const
- {
- if (Index == 0) {
- to = pload<RhsPacket>(from + 4*K*Traits::RhsProgress);
- } else {
- EIGEN_ASM_COMMENT("Do not reorder code, we're very tight on registers");
- to = protate<1>(to);
- }
- }
-
- void unrotateResult(AccPacket& res0,
- AccPacket& res1,
- AccPacket& res2,
- AccPacket& res3)
- {
- PacketBlock<AccPacket> resblock;
- resblock.packet[0] = res0;
- resblock.packet[1] = res1;
- resblock.packet[2] = res2;
- resblock.packet[3] = res3;
- ptranspose(resblock);
- resblock.packet[3] = protate<1>(resblock.packet[3]);
- resblock.packet[2] = protate<2>(resblock.packet[2]);
- resblock.packet[1] = protate<3>(resblock.packet[1]);
- ptranspose(resblock);
- res0 = resblock.packet[0];
- res1 = resblock.packet[1];
- res2 = resblock.packet[2];
- res3 = resblock.packet[3];
- }
-};
-
/* optimized GEneral packed Block * packed Panel product kernel
*
* Mixing type logic: C += A * B
@@ -967,16 +893,6 @@ struct gebp_kernel
ResPacketSize = Traits::ResPacketSize
};
-
- static const bool UseRotatingKernel =
- EIGEN_ARCH_ARM &&
- internal::is_same<LhsScalar, float>::value &&
- internal::is_same<RhsScalar, float>::value &&
- internal::is_same<ResScalar, float>::value &&
- Traits::LhsPacketSize == 4 &&
- Traits::RhsPacketSize == 4 &&
- Traits::ResPacketSize == 4;
-
EIGEN_DONT_INLINE
void operator()(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB,
Index rows, Index depth, Index cols, ResScalar alpha,
@@ -1009,9 +925,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
// This corresponds to 3*LhsProgress x nr register blocks.
// Usually, make sense only with FMA
if(mr>=3*Traits::LhsProgress)
- {
- PossiblyRotatingKernelHelper<gebp_kernel> possiblyRotatingKernelHelper(traits);
-
+ {
// Here, the general idea is to loop on each largest micro horizontal panel of the lhs (3*Traits::LhsProgress x depth)
// and on each largest micro vertical panel of the rhs (depth * nr).
// Blocking sizes, i.e., 'depth' has been computed so that the micro horizontal panel of the lhs fit in L1.
@@ -1074,19 +988,19 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
- possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 0>(B_0, blB); \
+ traits.loadRhs(blB + (0+4*K)*Traits::RhsProgress, B_0); \
traits.madd(A0, B_0, C0, T0); \
traits.madd(A1, B_0, C4, T0); \
traits.madd(A2, B_0, C8, B_0); \
- possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 1>(B_0, blB); \
+ traits.loadRhs(blB + (1+4*K)*Traits::RhsProgress, B_0); \
traits.madd(A0, B_0, C1, T0); \
traits.madd(A1, B_0, C5, T0); \
traits.madd(A2, B_0, C9, B_0); \
- possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 2>(B_0, blB); \
+ traits.loadRhs(blB + (2+4*K)*Traits::RhsProgress, B_0); \
traits.madd(A0, B_0, C2, T0); \
traits.madd(A1, B_0, C6, T0); \
traits.madd(A2, B_0, C10, B_0); \
- possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 3>(B_0, blB); \
+ traits.loadRhs(blB + (3+4*K)*Traits::RhsProgress, B_0); \
traits.madd(A0, B_0, C3 , T0); \
traits.madd(A1, B_0, C7, T0); \
traits.madd(A2, B_0, C11, B_0); \
@@ -1120,10 +1034,6 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
#undef EIGEN_GEBP_ONESTEP
- possiblyRotatingKernelHelper.unrotateResult(C0, C1, C2, C3);
- possiblyRotatingKernelHelper.unrotateResult(C4, C5, C6, C7);
- possiblyRotatingKernelHelper.unrotateResult(C8, C9, C10, C11);
-
ResPacket R0, R1, R2;
ResPacket alphav = pset1<ResPacket>(alpha);