aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2010-07-05 23:27:54 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2010-07-05 23:27:54 +0200
commitc69a22619293b791b884b7aefa65b245a902ea04 (patch)
tree2935dac75ec5cd896f11d5a61e9f2373eb9a7e3e /Eigen/src
parente1eccfad3f3b4e9e2ac94ca9a43256144ef72888 (diff)
* extend the Has* packet traits and makes all functor use it
* extend the packing routines to support conjugation
Diffstat (limited to 'Eigen/src')
-rw-r--r--Eigen/src/Core/Functors.h40
-rw-r--r--Eigen/src/Core/GenericPacketMath.h7
-rw-r--r--Eigen/src/Core/arch/SSE/Complex.h18
-rw-r--r--Eigen/src/Core/arch/SSE/PacketMath.h8
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h48
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrix.h9
-rw-r--r--Eigen/src/Core/util/BlasUtil.h13
7 files changed, 85 insertions, 58 deletions
diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h
index 78d1e5628..cbe20d50e 100644
--- a/Eigen/src/Core/Functors.h
+++ b/Eigen/src/Core/Functors.h
@@ -46,7 +46,7 @@ template<typename Scalar>
struct ei_functor_traits<ei_scalar_sum_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::AddCost,
- PacketAccess = ei_packet_traits<Scalar>::size>1
+ PacketAccess = ei_packet_traits<Scalar>::HasAdd
};
};
@@ -69,7 +69,7 @@ template<typename Scalar>
struct ei_functor_traits<ei_scalar_product_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::MulCost,
- PacketAccess = ei_packet_traits<Scalar>::size>1
+ PacketAccess = ei_packet_traits<Scalar>::HasMul
};
};
@@ -92,7 +92,7 @@ template<typename Scalar>
struct ei_functor_traits<ei_scalar_min_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::AddCost,
- PacketAccess = ei_packet_traits<Scalar>::size>1
+ PacketAccess = ei_packet_traits<Scalar>::HasMin
};
};
@@ -115,7 +115,7 @@ template<typename Scalar>
struct ei_functor_traits<ei_scalar_max_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::AddCost,
- PacketAccess = ei_packet_traits<Scalar>::size>1
+ PacketAccess = ei_packet_traits<Scalar>::HasMax
};
};
@@ -158,7 +158,7 @@ template<typename Scalar>
struct ei_functor_traits<ei_scalar_difference_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::AddCost,
- PacketAccess = ei_packet_traits<Scalar>::size>1
+ PacketAccess = ei_packet_traits<Scalar>::HasSub
};
};
@@ -178,10 +178,7 @@ template<typename Scalar>
struct ei_functor_traits<ei_scalar_quotient_op<Scalar> > {
enum {
Cost = 2 * NumTraits<Scalar>::MulCost,
- PacketAccess = ei_packet_traits<Scalar>::size>1
- #if (defined EIGEN_VECTORIZE)
- && !NumTraits<Scalar>::IsInteger
- #endif
+ PacketAccess = ei_packet_traits<Scalar>::HasDiv
};
};
@@ -203,7 +200,7 @@ template<typename Scalar>
struct ei_functor_traits<ei_scalar_opposite_op<Scalar> >
{ enum {
Cost = NumTraits<Scalar>::AddCost,
- PacketAccess = int(ei_packet_traits<Scalar>::size)>1 };
+ PacketAccess = ei_packet_traits<Scalar>::HasNegate };
};
/** \internal
@@ -224,7 +221,7 @@ struct ei_functor_traits<ei_scalar_abs_op<Scalar> >
{
enum {
Cost = NumTraits<Scalar>::AddCost,
- PacketAccess = int(ei_packet_traits<Scalar>::size)>1
+ PacketAccess = ei_packet_traits<Scalar>::HasAbs
};
};
@@ -243,7 +240,7 @@ template<typename Scalar> struct ei_scalar_abs2_op {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_abs2_op<Scalar> >
-{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; };
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasAbs2 }; };
/** \internal
* \brief Template functor to compute the conjugate of a complex value
@@ -254,14 +251,14 @@ template<typename Scalar> struct ei_scalar_conjugate_op {
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_conjugate_op)
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return ei_conj(a); }
template<typename PacketScalar>
- EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const { return a; }
+ EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const { return ei_pconj(a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_conjugate_op<Scalar> >
{
enum {
Cost = NumTraits<Scalar>::IsComplex ? NumTraits<Scalar>::AddCost : 0,
- PacketAccess = int(ei_packet_traits<Scalar>::size)>1
+ PacketAccess = ei_packet_traits<Scalar>::HasConj
};
};
@@ -398,7 +395,7 @@ struct ei_scalar_multiple_op {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_multiple_op<Scalar> >
-{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::size>1 }; };
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasMul }; };
template<typename Scalar1, typename Scalar2>
struct ei_scalar_multiple2_op {
@@ -425,7 +422,7 @@ struct ei_scalar_quotient1_impl {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_quotient1_impl<Scalar,false> >
-{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::size>1 }; };
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasMul }; };
template<typename Scalar>
struct ei_scalar_quotient1_impl<Scalar,true> {
@@ -472,6 +469,7 @@ struct ei_scalar_constant_op {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_constant_op<Scalar> >
+// FIXME replace this packet test by a safe one
{ enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::size>1, IsRepeatable = true }; };
template<typename Scalar> struct ei_scalar_identity_op {
@@ -543,7 +541,7 @@ struct ei_linspaced_op_impl<Scalar,true>
// nested expressions).
template <typename Scalar, bool RandomAccess = true> struct ei_linspaced_op;
template <typename Scalar, bool RandomAccess> struct ei_functor_traits< ei_linspaced_op<Scalar,RandomAccess> >
-{ enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::size>1, IsRepeatable = true }; };
+{ enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::HasSetLinear, IsRepeatable = true }; };
template <typename Scalar, bool RandomAccess> struct ei_linspaced_op
{
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
@@ -588,7 +586,7 @@ struct ei_scalar_add_op {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_add_op<Scalar> >
-{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = ei_packet_traits<Scalar>::size>1 }; };
+{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = ei_packet_traits<Scalar>::HasAdd }; };
/** \internal
* \brief Template functor to compute the square root of a scalar
@@ -676,7 +674,7 @@ struct ei_scalar_inverse_op {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_inverse_op<Scalar> >
-{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; };
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasDiv }; };
/** \internal
* \brief Template functor to compute the square of a scalar
@@ -692,7 +690,7 @@ struct ei_scalar_square_op {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_square_op<Scalar> >
-{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; };
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasMul }; };
/** \internal
* \brief Template functor to compute the cube of a scalar
@@ -708,7 +706,7 @@ struct ei_scalar_cube_op {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_cube_op<Scalar> >
-{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; };
+{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasMul }; };
// default functor traits for STL functors:
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 643e12e34..6cd288c55 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -58,8 +58,11 @@ struct ei_default_packet_traits
HasMul = 1,
HasNegate = 1,
HasAbs = 1,
+ HasAbs2 = 1,
HasMin = 1,
HasMax = 1,
+ HasConj = 1,
+ HasSetLinear = 1,
HasDiv = 0,
HasSqrt = 0,
@@ -105,6 +108,10 @@ ei_psub(const Packet& a,
template<typename Packet> inline Packet
ei_pnegate(const Packet& a) { return -a; }
+/** \internal \returns conj(a) (coeff-wise) */
+template<typename Packet> inline Packet
+ei_pconj(const Packet& a) { return ei_conj(a); }
+
/** \internal \returns a * b (coeff-wise) */
template<typename Packet> inline Packet
ei_pmul(const Packet& a,
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index ab8bf8b84..3f7a04b7d 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -37,6 +37,18 @@ typedef __m128d Packet1cd;
template<> struct ei_packet_traits<std::complex<float> > : ei_default_packet_traits
{
typedef Packet2cf type; enum {size=2};
+ enum {
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
};
template<> struct ei_unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
@@ -56,7 +68,11 @@ template<> EIGEN_STRONG_INLINE Packet2cf ei_pnegate(const Packet2cf& a)
{
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
return Packet2cf(_mm_xor_ps(a.v,mask));
-
+}
+template<> EIGEN_STRONG_INLINE Packet2cf ei_pconj(const Packet2cf& a)
+{
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+ return Packet2cf(_mm_xor_ps(a.v,mask));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 29375bdae..8a3cdf679 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -62,6 +62,7 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits
{
typedef Packet4f type; enum {size=4};
enum {
+ HasDiv = 1,
HasSin = EIGEN_FAST_MATH,
HasCos = EIGEN_FAST_MATH,
HasLog = 1,
@@ -70,7 +71,12 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits
};
};
template<> struct ei_packet_traits<double> : ei_default_packet_traits
-{ typedef Packet2d type; enum {size=2}; };
+{
+ typedef Packet2d type; enum {size=2};
+ enum {
+ HasDiv = 1
+ };
+};
template<> struct ei_packet_traits<int> : ei_default_packet_traits
{ typedef Packet4i type; enum {size=4}; };
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 4d9a09708..4a2ebb713 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -773,8 +773,8 @@ struct ei_gemm_pack_lhs
// 4 5 6 7 16 17 18 19 25 28
// 8 9 10 11 20 21 22 23 26 29
// . . . . . . . . . .
-template<typename Scalar, typename Index, int nr, bool PanelMode>
-struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, PanelMode>
+template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
+struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
{
typedef typename ei_packet_traits<Scalar>::type Packet;
enum { PacketSize = ei_packet_traits<Scalar>::size };
@@ -782,6 +782,7 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, PanelMode>
Index stride=0, Index offset=0)
{
ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+ ei_conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
bool hasAlpha = alpha != Scalar(1);
Index packet_cols = (cols/nr) * nr;
Index count = 0;
@@ -796,19 +797,19 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, PanelMode>
if (hasAlpha)
for(Index k=0; k<depth; k++)
{
- blockB[count+0] = alpha*b0[k];
- blockB[count+1] = alpha*b1[k];
- if(nr==4) blockB[count+2] = alpha*b2[k];
- if(nr==4) blockB[count+3] = alpha*b3[k];
+ blockB[count+0] = alpha*cj(b0[k]);
+ blockB[count+1] = alpha*cj(b1[k]);
+ if(nr==4) blockB[count+2] = alpha*cj(b2[k]);
+ if(nr==4) blockB[count+3] = alpha*cj(b3[k]);
count += nr;
}
else
for(Index k=0; k<depth; k++)
{
- blockB[count+0] = b0[k];
- blockB[count+1] = b1[k];
- if(nr==4) blockB[count+2] = b2[k];
- if(nr==4) blockB[count+3] = b3[k];
+ blockB[count+0] = cj(b0[k]);
+ blockB[count+1] = cj(b1[k]);
+ if(nr==4) blockB[count+2] = cj(b2[k]);
+ if(nr==4) blockB[count+3] = cj(b3[k]);
count += nr;
}
// skip what we have after
@@ -823,13 +824,13 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, PanelMode>
if (hasAlpha)
for(Index k=0; k<depth; k++)
{
- blockB[count] = alpha*b0[k];
+ blockB[count] = alpha*cj(b0[k]);
count += 1;
}
else
for(Index k=0; k<depth; k++)
{
- blockB[count] = b0[k];
+ blockB[count] = cj(b0[k]);
count += 1;
}
if(PanelMode) count += (stride-offset-depth);
@@ -838,14 +839,15 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, PanelMode>
};
// this version is optimized for row major matrices
-template<typename Scalar, typename Index, int nr, bool PanelMode>
-struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, PanelMode>
+template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
+struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
{
enum { PacketSize = ei_packet_traits<Scalar>::size };
void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Scalar alpha, Index depth, Index cols,
Index stride=0, Index offset=0)
{
ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+ ei_conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
bool hasAlpha = alpha != Scalar(1);
Index packet_cols = (cols/nr) * nr;
Index count = 0;
@@ -858,10 +860,10 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, PanelMode>
for(Index k=0; k<depth; k++)
{
const Scalar* b0 = &rhs[k*rhsStride + j2];
- blockB[count+0] = alpha*b0[0];
- blockB[count+1] = alpha*b0[1];
- if(nr==4) blockB[count+2] = alpha*b0[2];
- if(nr==4) blockB[count+3] = alpha*b0[3];
+ blockB[count+0] = alpha*cj(b0[0]);
+ blockB[count+1] = alpha*cj(b0[1]);
+ if(nr==4) blockB[count+2] = alpha*cj(b0[2]);
+ if(nr==4) blockB[count+3] = alpha*cj(b0[3]);
count += nr;
}
}
@@ -870,10 +872,10 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, PanelMode>
for(Index k=0; k<depth; k++)
{
const Scalar* b0 = &rhs[k*rhsStride + j2];
- blockB[count+0] = b0[0];
- blockB[count+1] = b0[1];
- if(nr==4) blockB[count+2] = b0[2];
- if(nr==4) blockB[count+3] = b0[3];
+ blockB[count+0] = cj(b0[0]);
+ blockB[count+1] = cj(b0[1]);
+ if(nr==4) blockB[count+2] = cj(b0[2]);
+ if(nr==4) blockB[count+3] = cj(b0[3]);
count += nr;
}
}
@@ -887,7 +889,7 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, PanelMode>
const Scalar* b0 = &rhs[j2];
for(Index k=0; k<depth; k++)
{
- blockB[count] = alpha*b0[k*rhsStride];
+ blockB[count] = alpha*cj(b0[k*rhsStride]);
count += 1;
}
if(PanelMode) count += stride-offset-depth;
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index 39b283a3f..5d63aee3d 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -73,9 +73,6 @@ static void run(Index rows, Index cols, Index depth,
ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
- if (ConjugateRhs)
- alpha = ei_conj(alpha);
-
typedef typename ei_packet_traits<Scalar>::type PacketType;
typedef ei_product_blocking_traits<Scalar> Blocking;
@@ -83,9 +80,9 @@ static void run(Index rows, Index cols, Index depth,
Index mc = std::min(rows,blocking.mc()); // cache block size along the M direction
//Index nc = blocking.nc(); // cache block size along the N direction
- ei_gemm_pack_rhs<Scalar, Index, Blocking::nr, RhsStorageOrder> pack_rhs;
- ei_gemm_pack_lhs<Scalar, Index, Blocking::mr, LhsStorageOrder> pack_lhs;
- ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, ei_conj_helper<ConjugateLhs,ConjugateRhs> > gebp;
+ ei_gemm_pack_lhs<Scalar, Index, Blocking::mr, LhsStorageOrder, ConjugateLhs> pack_lhs;
+ ei_gemm_pack_rhs<Scalar, Index, Blocking::nr, RhsStorageOrder, ConjugateRhs> pack_rhs;
+ ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr> gebp;
#ifdef EIGEN_HAS_OPENMP
if(info)
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
index 139ea73d2..cd0ed0ede 100644
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -29,10 +29,15 @@
// implement and control fast level 2 and level 3 BLAS-like routines.
// forward declarations
-template<typename Scalar, typename Index, int mr, int nr, typename Conj>
+
+// Provides scalar/packet-wise product and product with accumulation
+// with optional conjugation of the arguments.
+template<bool ConjLhs, bool ConjRhs> struct ei_conj_helper;
+
+template<typename Scalar, typename Index, int mr, int nr, typename Conj = ei_conj_helper<false,false> >
struct ei_gebp_kernel;
-template<typename Scalar, typename Index, int nr, int StorageOrder, bool PanelMode=false>
+template<typename Scalar, typename Index, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false>
struct ei_gemm_pack_rhs;
template<typename Scalar, typename Index, int mr, int StorageOrder, bool Conjugate = false, bool PanelMode = false>
@@ -53,10 +58,6 @@ template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename Index,
static void ei_cache_friendly_product_rowmajor_times_vector(
const Scalar* lhs, Index lhsStride, const Scalar* rhs, Index rhsSize, ResType& res, Scalar alpha);
-// Provides scalar/packet-wise product and product with accumulation
-// with optional conjugation of the arguments.
-template<bool ConjLhs, bool ConjRhs> struct ei_conj_helper;
-
template<> struct ei_conj_helper<false,false>
{
template<typename T>