aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2009-07-13 13:17:55 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2009-07-13 13:17:55 +0200
commita2cf7ba9552bbe6b9371e1a2678914bab185968a (patch)
tree069f785e47348452411420a51241c1646e3efcdf /Eigen/src
parenta2087cd7a3674c3d3ef74a474e417a3ea1f1e82b (diff)
add triangular * vector product
Diffstat (limited to 'Eigen/src')
-rw-r--r--Eigen/src/Core/SolveTriangular.h4
-rw-r--r--Eigen/src/Core/TriangularMatrix.h15
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixVector.h8
-rw-r--r--Eigen/src/Core/products/TriangularMatrixVector.h161
-rw-r--r--Eigen/src/Core/util/Macros.h8
5 files changed, 184 insertions, 12 deletions
diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h
index 200b4a325..76e4289de 100644
--- a/Eigen/src/Core/SolveTriangular.h
+++ b/Eigen/src/Core/SolveTriangular.h
@@ -45,7 +45,7 @@ struct ei_triangular_solver_selector<Lhs,Rhs,Mode,NoUnrolling,RowMajor>
};
static void run(const Lhs& lhs, Rhs& other)
{
- static const int PanelWidth = EIGEN_TUNE_TRSV_PANEL_WIDTH;
+ static const int PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
const ActualLhsType& actualLhs = LhsProductTraits::extract(lhs);
const int size = lhs.cols();
@@ -104,7 +104,7 @@ struct ei_triangular_solver_selector<Lhs,Rhs,Mode,NoUnrolling,ColMajor>
static void run(const Lhs& lhs, Rhs& other)
{
- static const int PanelWidth = EIGEN_TUNE_TRSV_PANEL_WIDTH;
+ static const int PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
const ActualLhsType& actualLhs = LhsProductTraits::extract(lhs);
const int size = lhs.cols();
diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h
index 81621dbcc..506be7965 100644
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -142,13 +142,17 @@ struct ei_traits<TriangularView<MatrixType, _Mode> > : ei_traits<MatrixType>
};
};
-template<typename MatrixType, unsigned int _Mode> class TriangularView
- : public TriangularBase<TriangularView<MatrixType, _Mode> >
+template<typename Lhs,typename Rhs>
+struct ei_triangular_vector_product_returntype;
+
+template<typename _MatrixType, unsigned int _Mode> class TriangularView
+ : public TriangularBase<TriangularView<_MatrixType, _Mode> >
{
public:
typedef TriangularBase<TriangularView> Base;
typedef typename ei_traits<TriangularView>::Scalar Scalar;
+ typedef _MatrixType MatrixType;
typedef typename MatrixType::PlainMatrixType PlainMatrixType;
enum {
@@ -244,6 +248,13 @@ template<typename MatrixType, unsigned int _Mode> class TriangularView
}
template<typename OtherDerived>
+ ei_triangular_vector_product_returntype<TriangularView,OtherDerived>
+ operator*(const MatrixBase<OtherDerived>& rhs) const
+ {
+ return ei_triangular_vector_product_returntype<TriangularView,OtherDerived>(*this, rhs.derived(), 1);
+ }
+
+ template<typename OtherDerived>
typename ei_plain_matrix_type_column_major<OtherDerived>::type
solve(const MatrixBase<OtherDerived>& other) const;
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h
index aa3187a07..8ac83772c 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -25,7 +25,7 @@
#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H
#define EIGEN_SELFADJOINT_MATRIX_VECTOR_H
-/* Optimized col-major selfadjoint matrix * vector product:
+/* Optimized selfadjoint matrix * vector product:
* This algorithm processes 2 columns at onces that allows to both reduce
* the number of load/stores of the result by a factor 2 and to reduce
* the instruction dependency.
@@ -78,11 +78,11 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
size_t alignedStart = (starti) + ei_alignmentOffset(&res[starti], endi-starti);
alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
- res[j] += t0 * conj0(A0[j]);
+ res[j] += t0 * conj0(A0[j]);
if(FirstTriangular)
{
- res[j+1] += t1 * conj0(A1[j+1]);
- res[j] += t1 * conj0(A1[j]);
+ res[j+1] += t1 * conj0(A1[j+1]);
+ res[j] += t1 * conj0(A1[j]);
t3 += conj1(A1[j]) * rhs[j];
}
else
diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h
new file mode 100644
index 000000000..533aad170
--- /dev/null
+++ b/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -0,0 +1,161 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_TRIANGULARMATRIXVECTOR_H
+#define EIGEN_TRIANGULARMATRIXVECTOR_H
+
+template<typename MatrixType, typename Rhs, typename Result,
+ int Mode, bool ConjLhs, bool ConjRhs, int StorageOrder>
+struct ei_product_triangular_vector_selector;
+
+template<typename Lhs, typename Rhs, typename Result, int Mode, bool ConjLhs, bool ConjRhs>
+struct ei_product_triangular_vector_selector<Lhs,Rhs,Result,Mode,ConjLhs,ConjRhs,ColMajor>
+{
+ typedef typename Rhs::Scalar Scalar;
+ enum {
+ IsLowerTriangular = ((Mode&LowerTriangularBit)==LowerTriangularBit),
+ HasUnitDiag = (Mode & UnitDiagBit)==UnitDiagBit
+ };
+ static void run(const Lhs& lhs, const Rhs& rhs, Result& res, typename ei_traits<Lhs>::Scalar alpha)
+ {
+ static const int PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
+ typename ei_conj_expr_if<ConjLhs,Lhs>::ret cjLhs(lhs);
+ typename ei_conj_expr_if<ConjRhs,Rhs>::ret cjRhs(rhs);
+
+ int size = lhs.cols();
+ for (int pi=0; pi<size; pi+=PanelWidth)
+ {
+ int actualPanelWidth = std::min(PanelWidth, size-pi);
+ for (int k=0; k<actualPanelWidth; ++k)
+ {
+ int i = pi + k;
+ int s = IsLowerTriangular ? (HasUnitDiag ? i+1 : i ) : pi;
+ int r = IsLowerTriangular ? actualPanelWidth-k : k+1;
+ if ((!HasUnitDiag) || (--r)>0)
+ res.segment(s,r) += (alpha * cjRhs.coeff(i)) * cjLhs.col(i).segment(s,r);
+ if (HasUnitDiag)
+ res.coeffRef(i) += alpha * cjRhs.coeff(i);
+ }
+ int r = IsLowerTriangular ? size - pi - actualPanelWidth : pi;
+ if (r>0)
+ {
+ int s = IsLowerTriangular ? pi+actualPanelWidth : 0;
+ ei_cache_friendly_product_colmajor_times_vector<ConjLhs,ConjRhs>(
+ r,
+ &(lhs.const_cast_derived().coeffRef(s,pi)), lhs.stride(),
+ rhs.segment(pi, actualPanelWidth),
+ &(res.coeffRef(s)),
+ alpha);
+ }
+ }
+ }
+};
+
+template<typename Lhs, typename Rhs, typename Result, int Mode, bool ConjLhs, bool ConjRhs>
+struct ei_product_triangular_vector_selector<Lhs,Rhs,Result,Mode,ConjLhs,ConjRhs,RowMajor>
+{
+ typedef typename Rhs::Scalar Scalar;
+ enum {
+ IsLowerTriangular = ((Mode&LowerTriangularBit)==LowerTriangularBit),
+ HasUnitDiag = (Mode & UnitDiagBit)==UnitDiagBit
+ };
+ static void run(const Lhs& lhs, const Rhs& rhs, Result& res, typename ei_traits<Lhs>::Scalar alpha)
+ {
+ static const int PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
+ typename ei_conj_expr_if<ConjLhs,Lhs>::ret cjLhs(lhs);
+ typename ei_conj_expr_if<ConjRhs,Rhs>::ret cjRhs(rhs);
+ int size = lhs.cols();
+ for (int pi=0; pi<size; pi+=PanelWidth)
+ {
+ int actualPanelWidth = std::min(PanelWidth, size-pi);
+ for (int k=0; k<actualPanelWidth; ++k)
+ {
+ int i = pi + k;
+ int s = IsLowerTriangular ? pi : (HasUnitDiag ? i+1 : i);
+ int r = IsLowerTriangular ? k+1 : actualPanelWidth-k;
+ if ((!HasUnitDiag) || (--r)>0)
+ res.coeffRef(i) += alpha * (cjLhs.row(i).segment(s,r).cwise() * cjRhs.segment(s,r).transpose()).sum();
+ if (HasUnitDiag)
+ res.coeffRef(i) += alpha * cjRhs.coeff(i);
+ }
+ int r = IsLowerTriangular ? pi : size - pi - actualPanelWidth;
+ if (r>0)
+ {
+ int s = IsLowerTriangular ? 0 : pi + actualPanelWidth;
+ Block<Result,Dynamic,1> target(res,pi,0,actualPanelWidth,1);
+ ei_cache_friendly_product_rowmajor_times_vector<ConjLhs,ConjRhs>(
+ &(lhs.const_cast_derived().coeffRef(pi,s)), lhs.stride(),
+ &(rhs.const_cast_derived().coeffRef(s)), r,
+ target, alpha);
+ }
+ }
+ }
+};
+
+template<typename Lhs,typename Rhs>
+struct ei_triangular_vector_product_returntype
+ : public ReturnByValue<ei_triangular_vector_product_returntype<Lhs,Rhs>,
+ Matrix<typename ei_traits<Rhs>::Scalar,
+ Rhs::RowsAtCompileTime,Rhs::ColsAtCompileTime> >
+{
+ typedef typename Lhs::Scalar Scalar;
+ typedef typename ei_cleantype<typename Rhs::Nested>::type RhsNested;
+ ei_triangular_vector_product_returntype(const Lhs& lhs, const Rhs& rhs, Scalar alpha)
+ : m_lhs(lhs), m_rhs(rhs), m_alpha(alpha)
+ {}
+
+ template<typename Dest> void evalTo(Dest& dst) const
+ {
+ typedef typename Lhs::MatrixType MatrixType;
+
+ typedef ei_blas_traits<MatrixType> LhsBlasTraits;
+ typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+ typedef typename ei_cleantype<ActualLhsType>::type _ActualLhsType;
+ const ActualLhsType actualLhs = LhsBlasTraits::extract(m_lhs._expression());
+
+ typedef ei_blas_traits<Rhs> RhsBlasTraits;
+ typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+ typedef typename ei_cleantype<ActualRhsType>::type _ActualRhsType;
+ const ActualRhsType actualRhs = RhsBlasTraits::extract(m_rhs);
+
+ Scalar actualAlpha = m_alpha * LhsBlasTraits::extractScalarFactor(m_lhs._expression())
+ * RhsBlasTraits::extractScalarFactor(m_rhs);
+
+ dst.resize(m_rhs.rows(), m_rhs.cols());
+ dst.setZero();
+ ei_product_triangular_vector_selector
+ <_ActualLhsType,_ActualRhsType,Dest,
+ ei_traits<Lhs>::Mode,
+ LhsBlasTraits::NeedToConjugate,
+ RhsBlasTraits::NeedToConjugate,
+ ei_traits<Lhs>::Flags&RowMajorBit>
+ ::run(actualLhs,actualRhs,dst,actualAlpha);
+ }
+
+ const Lhs m_lhs;
+ const typename Rhs::Nested m_rhs;
+ const Scalar m_alpha;
+};
+
+#endif // EIGEN_TRIANGULARMATRIXVECTOR_H
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index bb73b03cc..0cc5ae9aa 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -94,11 +94,11 @@
#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE (sizeof(float)*256*256)
#endif
-/** Defines the maximal width of the blocks used in the triangular solver
- * for vectors (level 2 blas xTRSV). The default is 8.
+/** Defines the maximal width of the blocks used in the triangular product and solver
+ * for vectors (level 2 blas xTRMV and xTRSV). The default is 8.
*/
-#ifndef EIGEN_TUNE_TRSV_PANEL_WIDTH
-#define EIGEN_TUNE_TRSV_PANEL_WIDTH 8
+#ifndef EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH
+#define EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH 8
#endif
/** Allows to disable some optimizations which might affect the accuracy of the result.