aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/Dot.h
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/Core/Dot.h')
-rw-r--r--Eigen/src/Core/Dot.h232
1 files changed, 20 insertions, 212 deletions
diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h
index 201bd23ca..fbdc67bd3 100644
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2006-2008, 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -25,224 +25,35 @@
#ifndef EIGEN_DOT_H
#define EIGEN_DOT_H
-/***************************************************************************
-* Part 1 : the logic deciding a strategy for vectorization and unrolling
-***************************************************************************/
-
-template<typename Derived1, typename Derived2>
-struct ei_dot_traits
-{
-public:
- enum {
- Traversal = (int(Derived1::Flags)&int(Derived2::Flags)&ActualPacketAccessBit)
- && (int(Derived1::Flags)&int(Derived2::Flags)&LinearAccessBit)
- ? LinearVectorizedTraversal
- : DefaultTraversal
- };
-
-private:
- typedef typename Derived1::Scalar Scalar;
- enum {
- PacketSize = ei_packet_traits<Scalar>::size,
- Cost = Derived1::SizeAtCompileTime * (Derived1::CoeffReadCost + Derived2::CoeffReadCost + NumTraits<Scalar>::MulCost)
- + (Derived1::SizeAtCompileTime-1) * NumTraits<Scalar>::AddCost,
- UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
- };
-
-public:
- enum {
- Unrolling = Cost <= UnrollingLimit
- ? CompleteUnrolling
- : NoUnrolling
- };
-};
-
-/***************************************************************************
-* Part 2 : unrollers
-***************************************************************************/
-
-/*** no vectorization ***/
-
-template<typename Derived1, typename Derived2, int Start, int Length>
-struct ei_dot_novec_unroller
-{
- enum {
- HalfLength = Length/2
- };
-
- typedef typename Derived1::Scalar Scalar;
-
- inline static Scalar run(const Derived1& v1, const Derived2& v2)
- {
- return ei_dot_novec_unroller<Derived1, Derived2, Start, HalfLength>::run(v1, v2)
- + ei_dot_novec_unroller<Derived1, Derived2, Start+HalfLength, Length-HalfLength>::run(v1, v2);
- }
-};
-
-template<typename Derived1, typename Derived2, int Start>
-struct ei_dot_novec_unroller<Derived1, Derived2, Start, 1>
-{
- typedef typename Derived1::Scalar Scalar;
-
- inline static Scalar run(const Derived1& v1, const Derived2& v2)
- {
- return ei_conj(v1.coeff(Start)) * v2.coeff(Start);
- }
-};
-
-/*** vectorization ***/
-
-template<typename Derived1, typename Derived2, int Index, int Stop,
- bool LastPacket = (Stop-Index == ei_packet_traits<typename Derived1::Scalar>::size)>
-struct ei_dot_vec_unroller
-{
- typedef typename Derived1::Scalar Scalar;
- typedef typename ei_packet_traits<Scalar>::type PacketScalar;
-
- enum {
- row1 = Derived1::RowsAtCompileTime == 1 ? 0 : Index,
- col1 = Derived1::RowsAtCompileTime == 1 ? Index : 0,
- row2 = Derived2::RowsAtCompileTime == 1 ? 0 : Index,
- col2 = Derived2::RowsAtCompileTime == 1 ? Index : 0
- };
-
- inline static PacketScalar run(const Derived1& v1, const Derived2& v2)
- {
- return ei_pmadd(
- v1.template packet<Aligned>(row1, col1),
- v2.template packet<Aligned>(row2, col2),
- ei_dot_vec_unroller<Derived1, Derived2, Index+ei_packet_traits<Scalar>::size, Stop>::run(v1, v2)
- );
- }
-};
-
-template<typename Derived1, typename Derived2, int Index, int Stop>
-struct ei_dot_vec_unroller<Derived1, Derived2, Index, Stop, true>
-{
- enum {
- row1 = Derived1::RowsAtCompileTime == 1 ? 0 : Index,
- col1 = Derived1::RowsAtCompileTime == 1 ? Index : 0,
- row2 = Derived2::RowsAtCompileTime == 1 ? 0 : Index,
- col2 = Derived2::RowsAtCompileTime == 1 ? Index : 0,
- alignment1 = (Derived1::Flags & AlignedBit) ? Aligned : Unaligned,
- alignment2 = (Derived2::Flags & AlignedBit) ? Aligned : Unaligned
- };
-
- typedef typename Derived1::Scalar Scalar;
- typedef typename ei_packet_traits<Scalar>::type PacketScalar;
-
- inline static PacketScalar run(const Derived1& v1, const Derived2& v2)
- {
- return ei_pmul(v1.template packet<alignment1>(row1, col1), v2.template packet<alignment2>(row2, col2));
- }
-};
-
-/***************************************************************************
-* Part 3 : implementation of all cases
-***************************************************************************/
-
-template<typename Derived1, typename Derived2,
- int Traversal = ei_dot_traits<Derived1, Derived2>::Traversal,
- int Unrolling = ei_dot_traits<Derived1, Derived2>::Unrolling
+// helper function for dot(). The problem is that if we put that in the body of dot(), then upon calling dot
+// with mismatched types, the compiler emits errors about failing to instantiate cwiseProduct BEFORE
+// looking at the static assertions. Thus this is a trick to get better compile errors.
+template<typename T, typename U,
+// the NeedToTranspose condition here is taken straight from Assign.h
+ bool NeedToTranspose = T::IsVectorAtCompileTime
+ && U::IsVectorAtCompileTime
+ && ((int(T::RowsAtCompileTime) == 1 && int(U::ColsAtCompileTime) == 1)
+ | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
+ // revert to || as soon as not needed anymore.
+ (int(T::ColsAtCompileTime) == 1 && int(U::RowsAtCompileTime) == 1))
>
-struct ei_dot_impl;
-
-template<typename Derived1, typename Derived2>
-struct ei_dot_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
+struct ei_dot_nocheck
{
- typedef typename Derived1::Scalar Scalar;
- static Scalar run(const Derived1& v1, const Derived2& v2)
+ static inline typename ei_traits<T>::Scalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
{
- ei_assert(v1.size()>0 && "you are using a non initialized vector");
- Scalar res;
- res = ei_conj(v1.coeff(0)) * v2.coeff(0);
- for(int i = 1; i < v1.size(); ++i)
- res += ei_conj(v1.coeff(i)) * v2.coeff(i);
- return res;
+ return a.conjugate().cwiseProduct(b).sum();
}
};
-template<typename Derived1, typename Derived2>
-struct ei_dot_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling>
- : public ei_dot_novec_unroller<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
-{};
-
-template<typename Derived1, typename Derived2>
-struct ei_dot_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
-{
- typedef typename Derived1::Scalar Scalar;
- typedef typename ei_packet_traits<Scalar>::type PacketScalar;
-
- static Scalar run(const Derived1& v1, const Derived2& v2)
- {
- const int size = v1.size();
- const int packetSize = ei_packet_traits<Scalar>::size;
- const int alignedSize = (size/packetSize)*packetSize;
- enum {
- alignment1 = (Derived1::Flags & AlignedBit) ? Aligned : Unaligned,
- alignment2 = (Derived2::Flags & AlignedBit) ? Aligned : Unaligned
- };
- Scalar res;
-
- // do the vectorizable part of the sum
- if(size >= packetSize)
- {
- PacketScalar packet_res = ei_pmul(
- v1.template packet<alignment1>(0),
- v2.template packet<alignment2>(0)
- );
- for(int index = packetSize; index<alignedSize; index += packetSize)
- {
- packet_res = ei_pmadd(
- v1.template packet<alignment1>(index),
- v2.template packet<alignment2>(index),
- packet_res
- );
- }
- res = ei_predux(packet_res);
-
- // now we must do the rest without vectorization.
- if(alignedSize == size) return res;
- }
- else // too small to vectorize anything.
- // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
- {
- res = Scalar(0);
- }
-
- // do the remainder of the vector
- for(int index = alignedSize; index < size; ++index)
- {
- res += v1.coeff(index) * v2.coeff(index);
- }
-
- return res;
- }
-};
-
-template<typename Derived1, typename Derived2>
-struct ei_dot_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling>
+template<typename T, typename U>
+struct ei_dot_nocheck<T, U, true>
{
- typedef typename Derived1::Scalar Scalar;
- typedef typename ei_packet_traits<Scalar>::type PacketScalar;
- enum {
- PacketSize = ei_packet_traits<Scalar>::size,
- Size = Derived1::SizeAtCompileTime,
- VectorizedSize = (Size / PacketSize) * PacketSize
- };
- static Scalar run(const Derived1& v1, const Derived2& v2)
+ static inline typename ei_traits<T>::Scalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
{
- Scalar res = ei_predux(ei_dot_vec_unroller<Derived1, Derived2, 0, VectorizedSize>::run(v1, v2));
- if (VectorizedSize != Size)
- res += ei_dot_novec_unroller<Derived1, Derived2, VectorizedSize, Size-VectorizedSize>::run(v1, v2);
- return res;
+ return a.adjoint().cwiseProduct(b).sum();
}
};
-/***************************************************************************
-* Part 4 : implementation of MatrixBase methods
-***************************************************************************/
-
/** \returns the dot product of *this with other.
*
* \only_for_vectors
@@ -266,10 +77,7 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
ei_assert(size() == other.size());
- // dot() must honor EvalBeforeNestingBit (eg: v.dot(M*v) )
- typedef typename ei_cleantype<typename Derived::Nested>::type ThisNested;
- typedef typename ei_cleantype<typename OtherDerived::Nested>::type OtherNested;
- return ei_dot_impl<ThisNested, OtherNested>::run(derived(), other.derived());
+ return ei_dot_nocheck<Derived,OtherDerived>::run(*this, other);
}
/** \returns the squared \em l2 norm of *this, i.e., for vectors, the dot product of *this with itself.