aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core
diff options
context:
space:
mode:
authorGravatar Konstantinos Margaritis <markos@codex.gr>2010-04-24 00:57:10 +0300
committerGravatar Konstantinos Margaritis <markos@codex.gr>2010-04-24 00:57:10 +0300
commit5acf46bd12edb79e9a83c42810654dec88227726 (patch)
tree00cfe0ab4f7871ae096c0440f6c416e9d7f05a88 /Eigen/src/Core
parent6972c140f737874d88da0e225c7c27b4563a4518 (diff)
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r--Eigen/src/Core/GenericPacketMath.h8
-rw-r--r--Eigen/src/Core/arch/AltiVec/PacketMath.h9
-rw-r--r--Eigen/src/Core/arch/NEON/PacketMath.h7
-rw-r--r--Eigen/src/Core/arch/SSE/PacketMath.h4
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h34
5 files changed, 26 insertions, 36 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 46fa5c186..08981f89d 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -169,14 +169,6 @@ template<typename Scalar, typename Packet> inline void ei_pstore(Scalar* to, con
template<typename Scalar, typename Packet> inline void ei_pstoreu(Scalar* to, const Packet& from)
{ (*to) = from; }
-/** \internal tries to do cache prefetching of \a addr */
-template<typename Scalar> inline void ei_prefetch(const Scalar* addr)
-{
-#if !defined(_MSC_VER)
-__builtin_prefetch(addr);
-#endif
-}
-
/** \internal \returns the first element of a packet */
template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_pfirst(const Packet& a)
{ return a; }
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index 0a7b07645..c6fc670d8 100644
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -67,8 +67,6 @@ typedef __vector unsigned char Packet16uc;
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
Packet4i ei_p4i_##NAME = ei_pset1<int>(X)
-#define DST_CHAN 1
-#define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride))
// Define global static constants:
static Packet4f ei_p4f_COUNTDOWN = { 3.0, 2.0, 1.0, 0.0 };
@@ -293,8 +291,8 @@ template<> EIGEN_STRONG_INLINE void ei_pstoreu<float>(float* to, const Packet4f
edgeAlign = vec_lvsl(0, to); // permute map to extract edges
edges=vec_perm(LSQ,MSQ,edgeAlign); // extract the edges
align = vec_lvsr( 0, to ); // permute map to misalign data
- MSQ = vec_perm(edges,(Packet16uc)from,align); // misalign the data (MSQ)
- LSQ = vec_perm((Packet16uc)from,edges,align); // misalign the data (LSQ)
+ MSQ = vec_perm(edges,(Packet16uc)from,align); // misalign the data (MSQ)
+ LSQ = vec_perm((Packet16uc)from,edges,align); // misalign the data (LSQ)
vec_st( LSQ, 15, (unsigned char *)to ); // Store the LSQ part first
vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part
}
@@ -317,9 +315,6 @@ template<> EIGEN_STRONG_INLINE void ei_pstoreu<int>(int* to, const Packet4i
vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part
}
-template<> EIGEN_STRONG_INLINE void ei_prefetch<float>(const float* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); }
-template<> EIGEN_STRONG_INLINE void ei_prefetch<int>(const int* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); }
-
template<> EIGEN_STRONG_INLINE float ei_pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; }
template<> EIGEN_STRONG_INLINE int ei_pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; }
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 96c75101c..2acb3633a 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -53,10 +53,6 @@ typedef int32x4_t Packet4i;
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
const Packet4i ei_p4i_##NAME = ei_pset1<int>(X)
-#ifndef __pld
-#define __pld(x) asm volatile ( " pld [%[addr]]\n" :: [addr] "r" (x) : "cc" );
-#endif
-
template<> struct ei_packet_traits<float> : ei_default_packet_traits
{
typedef Packet4f type; enum {size=4};
@@ -172,9 +168,6 @@ template<> EIGEN_STRONG_INLINE void ei_pstore<int>(int* to, const Packet4i
template<> EIGEN_STRONG_INLINE void ei_pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
template<> EIGEN_STRONG_INLINE void ei_pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
-template<> EIGEN_STRONG_INLINE void ei_prefetch<float>(const float* addr) { __pld(addr); }
-template<> EIGEN_STRONG_INLINE void ei_prefetch<int>(const int* addr) { __pld(addr); }
-
// FIXME only store the 2 first elements ?
template<> EIGEN_STRONG_INLINE float ei_pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; }
template<> EIGEN_STRONG_INLINE int ei_pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; }
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index d360081cd..77f15d982 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -233,10 +233,6 @@ template<> EIGEN_STRONG_INLINE void ei_pstoreu<double>(double* to, const Packet2
template<> EIGEN_STRONG_INLINE void ei_pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu((double*)to, _mm_castps_pd(from)); }
template<> EIGEN_STRONG_INLINE void ei_pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu((double*)to, _mm_castsi128_pd(from)); }
-template<> EIGEN_STRONG_INLINE void ei_prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
-template<> EIGEN_STRONG_INLINE void ei_prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
-template<> EIGEN_STRONG_INLINE void ei_prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
-
#if defined(_MSC_VER) && (_MSC_VER <= 1500) && defined(_WIN64)
// The temporary variable fixes an internal compilation error.
// Direct of the struct members fixed bug #62.
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index bc697cef5..5e219e077 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -117,7 +117,9 @@ struct ei_gebp_kernel
for(int i=0; i<peeled_mc; i+=mr)
{
const Scalar* blA = &blockA[i*strideA+offsetA*mr];
- ei_prefetch(&blA[0]);
+ #ifdef EIGEN_VECTORIZE_SSE
+ _mm_prefetch((const char*)(&blA[0]), _MM_HINT_T0);
+ #endif
// TODO move the res loads to the stores
@@ -137,10 +139,12 @@ struct ei_gebp_kernel
Scalar* r2 = r1 + resStride;
Scalar* r3 = r2 + resStride;
- ei_prefetch(r0+16);
- ei_prefetch(r1+16);
- ei_prefetch(r2+16);
- ei_prefetch(r3+16);
+ #ifdef EIGEN_VECTORIZE_SSE
+ _mm_prefetch((const char*)(r0+16), _MM_HINT_T0);
+ _mm_prefetch((const char*)(r1+16), _MM_HINT_T0);
+ _mm_prefetch((const char*)(r2+16), _MM_HINT_T0);
+ _mm_prefetch((const char*)(r3+16), _MM_HINT_T0);
+ #endif
// performs "inner" product
// TODO let's check wether the folowing peeled loop could not be
@@ -330,7 +334,9 @@ struct ei_gebp_kernel
{
int i = peeled_mc;
const Scalar* blA = &blockA[i*strideA+offsetA*PacketSize];
- ei_prefetch(&blA[0]);
+ #ifdef EIGEN_VECTORIZE_SSE
+ _mm_prefetch((const char*)(&blA[0]), _MM_HINT_T0);
+ #endif
// gets res block as register
PacketType C0, C1, C2, C3;
@@ -458,7 +464,9 @@ struct ei_gebp_kernel
for(int i=peeled_mc2; i<rows; i++)
{
const Scalar* blA = &blockA[i*strideA+offsetA];
- ei_prefetch(&blA[0]);
+ #ifdef EIGEN_VECTORIZE_SSE
+ _mm_prefetch((const char*)(&blA[0]), _MM_HINT_T0);
+ #endif
// gets a 1 x nr res block as registers
Scalar C0(0), C1(0), C2(0), C3(0);
@@ -516,7 +524,9 @@ struct ei_gebp_kernel
for(int i=0; i<peeled_mc; i+=mr)
{
const Scalar* blA = &blockA[i*strideA+offsetA*mr];
- ei_prefetch(&blA[0]);
+ #ifdef EIGEN_VECTORIZE_SSE
+ _mm_prefetch((const char*)(&blA[0]), _MM_HINT_T0);
+ #endif
// TODO move the res loads to the stores
@@ -547,7 +557,9 @@ struct ei_gebp_kernel
{
int i = peeled_mc;
const Scalar* blA = &blockA[i*strideA+offsetA*PacketSize];
- ei_prefetch(&blA[0]);
+ #ifdef EIGEN_VECTORIZE_SSE
+ _mm_prefetch((const char*)(&blA[0]), _MM_HINT_T0);
+ #endif
PacketType C0 = ei_ploadu(&res[(j2+0)*resStride + i]);
@@ -564,7 +576,9 @@ struct ei_gebp_kernel
for(int i=peeled_mc2; i<rows; i++)
{
const Scalar* blA = &blockA[i*strideA+offsetA];
- ei_prefetch(&blA[0]);
+ #ifdef EIGEN_VECTORIZE_SSE
+ _mm_prefetch((const char*)(&blA[0]), _MM_HINT_T0);
+ #endif
// gets a 1 x 1 res block as registers
Scalar C0(0);