aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2008-05-28 22:11:47 +0000
committerGravatar Gael Guennebaud <g.gael@free.fr>2008-05-28 22:11:47 +0000
commit8711e26c8a691818c5aa2f3a024f8ada4b7129dc (patch)
tree65be892627bc2c1cc89f21e36f9d1e8125ebe440 /Eigen/src
parent73084dc754a64a5ae5753ef119399ea7e45c0626 (diff)
* change Flagged to take into account NestByValue only
* bugfix in Assign and cache friendly product (weird that worked before) * improved argument evaluation in Product
Diffstat (limited to 'Eigen/src')
-rw-r--r--Eigen/src/Core/Assign.h8
-rw-r--r--Eigen/src/Core/CacheFriendlyProduct.h2
-rw-r--r--Eigen/src/Core/Flagged.h7
-rw-r--r--Eigen/src/Core/Product.h120
4 files changed, 105 insertions, 32 deletions
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h
index 56f4c956e..bf5ede1bc 100644
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -208,12 +208,12 @@ struct ei_assignment_impl<Derived, OtherDerived, true>
for ( ; index<alignedSize ; index+=ei_packet_traits<typename Derived::Scalar>::size)
{
// FIXME the following is not really efficient
- int i = index/dst.rows();
- int j = index%dst.rows();
+ int i = index/dst.cols();
+ int j = index%dst.cols();
dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j));
}
- for(int i = alignedSize/dst.rows(); i < dst.rows(); i++)
- for(int j = alignedSize%dst.rows(); j < dst.cols(); j++)
+ for(int i = alignedSize/dst.cols(); i < dst.rows(); i++)
+ for(int j = alignedSize%dst.cols(); j < dst.cols(); j++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
else
diff --git a/Eigen/src/Core/CacheFriendlyProduct.h b/Eigen/src/Core/CacheFriendlyProduct.h
index b484b1786..5e5d075b1 100644
--- a/Eigen/src/Core/CacheFriendlyProduct.h
+++ b/Eigen/src/Core/CacheFriendlyProduct.h
@@ -45,7 +45,7 @@ static void ei_cache_friendly_product(
rhsStride = _lhsStride;
cols = _rows;
rows = _cols;
- lhsRowMajor = _rhsRowMajor;
+ lhsRowMajor = !_rhsRowMajor;
ei_assert(_lhsRowMajor);
}
else
diff --git a/Eigen/src/Core/Flagged.h b/Eigen/src/Core/Flagged.h
index 1107e39aa..925066533 100644
--- a/Eigen/src/Core/Flagged.h
+++ b/Eigen/src/Core/Flagged.h
@@ -43,6 +43,7 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed>
struct ei_traits<Flagged<ExpressionType, Added, Removed> >
{
typedef typename ExpressionType::Scalar Scalar;
+
enum {
RowsAtCompileTime = ExpressionType::RowsAtCompileTime,
ColsAtCompileTime = ExpressionType::ColsAtCompileTime,
@@ -59,11 +60,13 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas
public:
EIGEN_GENERIC_PUBLIC_INTERFACE(Flagged)
+ typedef typename ei_meta_if<ei_must_nest_by_value<ExpressionType>::ret,
+ ExpressionType, const ExpressionType&>::ret ExpressionTypeNested;
inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {}
/** \internal */
- inline ExpressionType _expression() const { return m_matrix; }
+ inline const ExpressionType& _expression() const { return m_matrix; }
private:
@@ -94,7 +97,7 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas
}
protected:
- typename ExpressionType::Nested m_matrix;
+ ExpressionTypeNested m_matrix;
};
/** \returns an expression of *this with added flags
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index cd7d9ca93..2c8c73c88 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -165,12 +165,10 @@ template<typename T> class ei_product_eval_to_column_major
template<typename T, int n=1> struct ei_product_nested_rhs
{
typedef typename ei_meta_if<
- ei_must_nest_by_value<T>::ret && (!(ei_traits<T>::Flags & RowMajorBit)) && (int(ei_traits<T>::Flags) & DirectAccessBit),
+ ei_must_nest_by_value<T>::ret,
T,
typename ei_meta_if<
((ei_traits<T>::Flags & EvalBeforeNestingBit)
- || (ei_traits<T>::Flags & RowMajorBit)
- || (!(ei_traits<T>::Flags & DirectAccessBit))
|| (n+1) * (NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * T::CoeffReadCost),
typename ei_product_eval_to_column_major<T>::type,
const T&
@@ -178,19 +176,38 @@ template<typename T, int n=1> struct ei_product_nested_rhs
>::ret type;
};
-template<typename T, int n=1> struct ei_product_nested_lhs
+// template<typename T, int n=1> struct ei_product_nested_lhs
+// {
+// typedef typename ei_meta_if<
+// ei_must_nest_by_value<T>::ret && (int(ei_traits<T>::Flags) & DirectAccessBit),
+// T,
+// typename ei_meta_if<
+// int(ei_traits<T>::Flags) & EvalBeforeNestingBit
+// || (!(int(ei_traits<T>::Flags) & DirectAccessBit))
+// || (n+1) * int(NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * int(T::CoeffReadCost),
+// typename ei_eval<T>::type,
+// const T&
+// >::ret
+// >::ret type;
+// };
+
+template<typename T> struct ei_product_copy_rhs
{
typedef typename ei_meta_if<
- ei_must_nest_by_value<T>::ret && (int(ei_traits<T>::Flags) & DirectAccessBit),
- T,
- typename ei_meta_if<
- int(ei_traits<T>::Flags) & EvalBeforeNestingBit
- || (!(int(ei_traits<T>::Flags) & DirectAccessBit))
- || (n+1) * int(NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * int(T::CoeffReadCost),
+ (ei_traits<T>::Flags & RowMajorBit)
+ || (!(ei_traits<T>::Flags & DirectAccessBit)),
+ typename ei_product_eval_to_column_major<T>::type,
+ const T&
+ >::ret type;
+};
+
+template<typename T> struct ei_product_copy_lhs
+{
+ typedef typename ei_meta_if<
+ (!(int(ei_traits<T>::Flags) & DirectAccessBit)),
typename ei_eval<T>::type,
const T&
- >::ret
- >::ret type;
+ >::ret type;
};
template<typename Lhs, typename Rhs, int EvalMode>
@@ -199,9 +216,9 @@ struct ei_traits<Product<Lhs, Rhs, EvalMode> >
typedef typename Lhs::Scalar Scalar;
// the cache friendly product evals lhs once only
// FIXME what to do if we chose to dynamically call the normal product from the cache friendly one for small matrices ?
- typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct,
- typename ei_product_nested_lhs<Lhs,1>::type,
- typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type>::ret LhsNested;
+ typedef /*typename ei_meta_if<EvalMode==CacheFriendlyProduct,*/
+// typename ei_product_nested_lhs<Lhs,1>::type,
+ typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type/*>::ret*/ LhsNested;
// NOTE that rhs must be ColumnMajor, so we might need a special nested type calculation
typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct,
@@ -225,10 +242,9 @@ struct ei_traits<Product<Lhs, Rhs, EvalMode> >
_Vectorizable = (_LhsVectorizable || _RhsVectorizable) ? 1 : 0,
_RowMajor = (RhsFlags & RowMajorBit)
&& (EvalMode==(int)CacheFriendlyProduct ? (int)LhsFlags & RowMajorBit : (!_LhsVectorizable)),
- _LostBits = HereditaryBits & ~(
- (_RowMajor ? 0 : RowMajorBit)
- | ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit)),
- Flags = ((unsigned int)(LhsFlags | RhsFlags) & _LostBits & ~NestedByValue)
+ _LostBits = ~((_RowMajor ? 0 : RowMajorBit)
+ | ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit)),
+ Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & _LostBits)
| EvalBeforeAssigningBit
| EvalBeforeNestingBit
| (_Vectorizable ? VectorizableBit : 0),
@@ -369,6 +385,7 @@ template<typename Lhs,typename Rhs>
inline Derived&
MatrixBase<Derived>::operator+=(const Flagged<Product<Lhs,Rhs,CacheFriendlyProduct>, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>& other)
{
+ std::cout << "_cacheFriendlyEvalAndAdd\n";
other._expression()._cacheFriendlyEvalAndAdd(const_cast_derived());
return derived();
}
@@ -396,6 +413,7 @@ struct ei_cache_friendly_selector
)
{
res.setZero();
+// typename ei_product_copy_lhs<>::type
ei_cache_friendly_product<Scalar>(
product._rows(), product._cols(), product.m_lhs.cols(),
_LhsNested::Flags&RowMajorBit, &(product.m_lhs.const_cast_derived().coeffRef(0,0)), product.m_lhs.stride(),
@@ -452,18 +470,70 @@ template<typename Lhs, typename Rhs, int EvalMode>
template<typename DestDerived>
inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEval(DestDerived& res) const
{
- ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,
- _LhsNested::Flags&_RhsNested::Flags&DirectAccessBit>
- ::eval(*this, res);
+// ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,
+// _LhsNested::Flags&_RhsNested::Flags&DirectAccessBit>
+// ::eval(*this, res);
+
+ if ( _rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+ && _cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+ && m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+ )
+ {
+ res.setZero();
+
+
+// typedef typename ei_eval<_LhsNested>::type LhsCopy;
+// typedef typename ei_product_eval_to_column_major<_RhsNested>::type RhsCopy;
+ typedef typename ei_product_copy_lhs<_LhsNested>::type LhsCopy;
+ typedef typename ei_unref<LhsCopy>::type _LhsCopy;
+ typedef typename ei_product_copy_rhs<_RhsNested>::type RhsCopy;
+ typedef typename ei_unref<RhsCopy>::type _RhsCopy;
+ LhsCopy lhs(m_lhs);
+ RhsCopy rhs(m_rhs);
+ ei_cache_friendly_product<Scalar>(
+ _rows(), _cols(), lhs.cols(),
+ _LhsCopy::Flags&RowMajorBit, &(lhs.const_cast_derived().coeffRef(0,0)), lhs.stride(),
+ _RhsCopy::Flags&RowMajorBit, &(rhs.const_cast_derived().coeffRef(0,0)), rhs.stride(),
+ Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride()
+ );
+ }
+ else
+ {
+ res = Product<_LhsNested,_RhsNested,NormalProduct>(m_lhs, m_rhs).lazy();
+ }
+
}
template<typename Lhs, typename Rhs, int EvalMode>
template<typename DestDerived>
inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEvalAndAdd(DestDerived& res) const
{
- ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,
- _LhsNested::Flags&_RhsNested::Flags&DirectAccessBit>
- ::eval_and_add(*this, res);
+ std::cout << "_cacheFriendlyEvalAndAdd\n";
+// ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,
+// _LhsNested::Flags&_RhsNested::Flags&DirectAccessBit>
+// ::eval_and_add(*this, res);
+ if ( _rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+ && _cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+ && m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+ )
+ {
+ typedef typename ei_product_copy_lhs<_LhsNested>::type LhsCopy;
+ typedef typename ei_unref<LhsCopy>::type _LhsCopy;
+ typedef typename ei_product_copy_rhs<_RhsNested>::type RhsCopy;
+ typedef typename ei_unref<RhsCopy>::type _RhsCopy;
+ LhsCopy lhs(m_lhs);
+ RhsCopy rhs(m_rhs);
+ ei_cache_friendly_product<Scalar>(
+ _rows(), _cols(), lhs.cols(),
+ _LhsCopy::Flags&RowMajorBit, &(lhs.const_cast_derived().coeffRef(0,0)), lhs.stride(),
+ _RhsCopy::Flags&RowMajorBit, &(rhs.const_cast_derived().coeffRef(0,0)), rhs.stride(),
+ Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride()
+ );
+ }
+ else
+ {
+ res += Product<_LhsNested,_RhsNested,NormalProduct>(m_lhs, m_rhs).lazy();
+ }
}
#endif // EIGEN_PRODUCT_H