diff options
author | Srinivas Vasudevan <srvasude@gmail.com> | 2016-12-02 14:14:45 -0800 |
---|---|---|
committer | Srinivas Vasudevan <srvasude@gmail.com> | 2016-12-02 14:14:45 -0800 |
commit | a0d3ac760f7af264bcd625a3edb6383cdb34b4f3 (patch) | |
tree | 862b3cb3fe66a83c6b6274ecbd8b3fe5d94fe224 | |
parent | 218764ee1f0a21e1faf20ed314ffafeae79eb170 (diff) | |
parent | 66f65ccc364034150bdc47333e05ebbde29825e5 (diff) |
Sync from Head.
-rw-r--r-- | Eigen/src/Core/Dot.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/GeneralProduct.h | 83 | ||||
-rw-r--r-- | Eigen/src/Core/PlainObjectBase.h | 1 | ||||
-rw-r--r-- | Eigen/src/Core/ProductEvaluators.h | 11 | ||||
-rw-r--r-- | Eigen/src/Core/util/Memory.h | 2 | ||||
-rw-r--r-- | Eigen/src/SparseCore/SparseBlock.h | 1204 | ||||
-rw-r--r-- | README.md | 6 | ||||
-rw-r--r-- | bench/btl/actions/basic_actions.hh | 2 | ||||
-rw-r--r-- | bench/btl/libs/BLAS/blas_interface_impl.hh | 6 | ||||
-rw-r--r-- | bench/btl/libs/BLAS/main.cpp | 2 | ||||
-rw-r--r-- | bench/btl/libs/STL/STL_interface.hh | 24 | ||||
-rw-r--r-- | bench/btl/libs/blaze/blaze_interface.hh | 16 | ||||
-rw-r--r-- | bench/btl/libs/blaze/main.cpp | 6 | ||||
-rw-r--r-- | bench/btl/libs/eigen3/eigen3_interface.hh | 8 | ||||
-rw-r--r-- | bench/btl/libs/eigen3/main_matmat.cpp | 2 | ||||
-rw-r--r-- | bench/perf_monitoring/gemm/changesets.txt | 3 | ||||
-rw-r--r-- | bench/perf_monitoring/gemm/gemv.cpp | 68 | ||||
-rw-r--r-- | bench/perf_monitoring/gemm/gemv_settings.txt | 11 |
18 files changed, 777 insertions, 682 deletions
diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index 1d7f2262e..f4fb4db7e 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -70,9 +70,11 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived) +#if !(defined(EIGEN_NO_STATIC_ASSERT) && defined(EIGEN_NO_DEBUG)) typedef internal::scalar_conj_product_op<Scalar,typename OtherDerived::Scalar> func; EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar); - +#endif + eigen_assert(size() == other.size()); return internal::dot_nocheck<Derived,OtherDerived>::run(*this, other); diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index f62ecfcbe..0f16cd8e3 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -224,50 +224,65 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true> // on, the other hand it is good for the cache to pack the vector anyways... EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1), ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex), - MightCannotUseDest = (ActualDest::InnerStrideAtCompileTime!=1) || ComplexByReal + MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal }; - gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest; + typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper; + typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper; + RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha); - const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0)); - const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; + if(!MightCannotUseDest) + { + // shortcut if we are sure to be able to use dest directly, + // this ease the compiler to generate cleaner and more optimzized code for most common cases + general_matrix_vector_product + <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run( + actualLhs.rows(), actualLhs.cols(), + LhsMapper(actualLhs.data(), actualLhs.outerStride()), + RhsMapper(actualRhs.data(), actualRhs.innerStride()), + dest.data(), 1, + compatibleAlpha); + } + else + { + gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest; - RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha); + const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0)); + const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; - ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), - evalToDest ? dest.data() : static_dest.data()); + ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), + evalToDest ? dest.data() : static_dest.data()); - if(!evalToDest) - { - #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - Index size = dest.size(); - EIGEN_DENSE_STORAGE_CTOR_PLUGIN - #endif - if(!alphaIsCompatible) + if(!evalToDest) { - MappedDest(actualDestPtr, dest.size()).setZero(); - compatibleAlpha = RhsScalar(1); + #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + Index size = dest.size(); + EIGEN_DENSE_STORAGE_CTOR_PLUGIN + #endif + if(!alphaIsCompatible) + { + MappedDest(actualDestPtr, dest.size()).setZero(); + compatibleAlpha = RhsScalar(1); + } + else + MappedDest(actualDestPtr, dest.size()) = dest; } - else - MappedDest(actualDestPtr, dest.size()) = dest; - } - typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper; - typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper; - general_matrix_vector_product - <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run( - actualLhs.rows(), actualLhs.cols(), - LhsMapper(actualLhs.data(), actualLhs.outerStride()), - RhsMapper(actualRhs.data(), actualRhs.innerStride()), - actualDestPtr, 1, - compatibleAlpha); + general_matrix_vector_product + <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run( + actualLhs.rows(), actualLhs.cols(), + LhsMapper(actualLhs.data(), actualLhs.outerStride()), + RhsMapper(actualRhs.data(), actualRhs.innerStride()), + actualDestPtr, 1, + compatibleAlpha); - if (!evalToDest) - { - if(!alphaIsCompatible) - dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size()); - else - dest = MappedDest(actualDestPtr, dest.size()); + if (!evalToDest) + { + if(!alphaIsCompatible) + dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size()); + else + dest = MappedDest(actualDestPtr, dest.size()); + } } } }; diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index 2dcd929e6..0c04f8250 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -763,6 +763,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type { // NOTE MSVC 2008 complains if we directly put bool(NumTraits<T>::IsInteger) as the EIGEN_STATIC_ASSERT argument. const bool is_integer = NumTraits<T>::IsInteger; + EIGEN_UNUSED_VARIABLE(is_integer); EIGEN_STATIC_ASSERT(is_integer, FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED) resize(size); diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index b0e8b189e..583b7f59e 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -158,10 +158,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op< static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &) { - Index dstRows = src.rows(); - Index dstCols = src.cols(); - if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) - dst.resize(dstRows, dstCols); + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); // FIXME shall we handle nested_eval here? generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs()); } @@ -176,10 +173,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op< static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &) { - Index dstRows = src.rows(); - Index dstCols = src.cols(); - if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) - dst.resize(dstRows, dstCols); + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); // FIXME shall we handle nested_eval here? generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs()); } @@ -377,7 +371,6 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> { LhsNested actual_lhs(lhs); RhsNested actual_rhs(rhs); - internal::gemv_dense_selector<Side, (int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor, bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess) diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 0439655ca..67053db62 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -523,7 +523,7 @@ template<typename T> struct smart_memmove_helper<T,true> { template<typename T> struct smart_memmove_helper<T,false> { static inline void run(const T* start, const T* end, T* target) { - if (uintptr_t(target) < uintptr_t(start)) + if (UIntPtr(target) < UIntPtr(start)) { std::copy(start, end, target); } diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h index cb8d9d2e2..c3ea7e0e2 100644 --- a/Eigen/src/SparseCore/SparseBlock.h +++ b/Eigen/src/SparseCore/SparseBlock.h @@ -1,602 +1,602 @@ -// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_SPARSE_BLOCK_H
-#define EIGEN_SPARSE_BLOCK_H
-
-namespace Eigen {
-
-// Subset of columns or rows
-template<typename XprType, int BlockRows, int BlockCols>
-class BlockImpl<XprType,BlockRows,BlockCols,true,Sparse>
- : public SparseMatrixBase<Block<XprType,BlockRows,BlockCols,true> >
-{
- typedef typename internal::remove_all<typename XprType::Nested>::type _MatrixTypeNested;
- typedef Block<XprType, BlockRows, BlockCols, true> BlockType;
-public:
- enum { IsRowMajor = internal::traits<BlockType>::IsRowMajor };
-protected:
- enum { OuterSize = IsRowMajor ? BlockRows : BlockCols };
- typedef SparseMatrixBase<BlockType> Base;
- using Base::convert_index;
-public:
- EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType)
-
- inline BlockImpl(XprType& xpr, Index i)
- : m_matrix(xpr), m_outerStart(convert_index(i)), m_outerSize(OuterSize)
- {}
-
- inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
- : m_matrix(xpr), m_outerStart(convert_index(IsRowMajor ? startRow : startCol)), m_outerSize(convert_index(IsRowMajor ? blockRows : blockCols))
- {}
-
- EIGEN_STRONG_INLINE Index rows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }
- EIGEN_STRONG_INLINE Index cols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); }
-
- Index nonZeros() const
- {
- typedef internal::evaluator<XprType> EvaluatorType;
- EvaluatorType matEval(m_matrix);
- Index nnz = 0;
- Index end = m_outerStart + m_outerSize.value();
- for(Index j=m_outerStart; j<end; ++j)
- for(typename EvaluatorType::InnerIterator it(matEval, j); it; ++it)
- ++nnz;
- return nnz;
- }
-
- inline const Scalar coeff(Index row, Index col) const
- {
- return m_matrix.coeff(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
- }
-
- inline const Scalar coeff(Index index) const
- {
- return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index : m_outerStart);
- }
-
- inline const XprType& nestedExpression() const { return m_matrix; }
- inline XprType& nestedExpression() { return m_matrix; }
- Index startRow() const { return IsRowMajor ? m_outerStart : 0; }
- Index startCol() const { return IsRowMajor ? 0 : m_outerStart; }
- Index blockRows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }
- Index blockCols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); }
-
- protected:
-
- typename internal::ref_selector<XprType>::non_const_type m_matrix;
- Index m_outerStart;
- const internal::variable_if_dynamic<Index, OuterSize> m_outerSize;
-
- protected:
- // Disable assignment with clear error message.
- // Note that simply removing operator= yields compilation errors with ICC+MSVC
- template<typename T>
- BlockImpl& operator=(const T&)
- {
- EIGEN_STATIC_ASSERT(sizeof(T)==0, THIS_SPARSE_BLOCK_SUBEXPRESSION_IS_READ_ONLY);
- return *this;
- }
-};
-
-
-/***************************************************************************
-* specialization for SparseMatrix
-***************************************************************************/
-
-namespace internal {
-
-template<typename SparseMatrixType, int BlockRows, int BlockCols>
-class sparse_matrix_block_impl
- : public SparseCompressedBase<Block<SparseMatrixType,BlockRows,BlockCols,true> >
-{
- typedef typename internal::remove_all<typename SparseMatrixType::Nested>::type _MatrixTypeNested;
- typedef Block<SparseMatrixType, BlockRows, BlockCols, true> BlockType;
- typedef SparseCompressedBase<Block<SparseMatrixType,BlockRows,BlockCols,true> > Base;
- using Base::convert_index;
-public:
- enum { IsRowMajor = internal::traits<BlockType>::IsRowMajor };
- EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType)
-protected:
- typedef typename Base::IndexVector IndexVector;
- enum { OuterSize = IsRowMajor ? BlockRows : BlockCols };
-public:
-
- inline sparse_matrix_block_impl(SparseMatrixType& xpr, Index i)
- : m_matrix(xpr), m_outerStart(convert_index(i)), m_outerSize(OuterSize)
- {}
-
- inline sparse_matrix_block_impl(SparseMatrixType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
- : m_matrix(xpr), m_outerStart(convert_index(IsRowMajor ? startRow : startCol)), m_outerSize(convert_index(IsRowMajor ? blockRows : blockCols))
- {}
-
- template<typename OtherDerived>
- inline BlockType& operator=(const SparseMatrixBase<OtherDerived>& other)
- {
- typedef typename internal::remove_all<typename SparseMatrixType::Nested>::type _NestedMatrixType;
- _NestedMatrixType& matrix = m_matrix;
- // This assignment is slow if this vector set is not empty
- // and/or it is not at the end of the nonzeros of the underlying matrix.
-
- // 1 - eval to a temporary to avoid transposition and/or aliasing issues
- Ref<const SparseMatrix<Scalar, IsRowMajor ? RowMajor : ColMajor, StorageIndex> > tmp(other.derived());
- eigen_internal_assert(tmp.outerSize()==m_outerSize.value());
-
- // 2 - let's check whether there is enough allocated memory
- Index nnz = tmp.nonZeros();
- Index start = m_outerStart==0 ? 0 : m_matrix.outerIndexPtr()[m_outerStart]; // starting position of the current block
- Index end = m_matrix.outerIndexPtr()[m_outerStart+m_outerSize.value()]; // ending position of the current block
- Index block_size = end - start; // available room in the current block
- Index tail_size = m_matrix.outerIndexPtr()[m_matrix.outerSize()] - end;
-
- Index free_size = m_matrix.isCompressed()
- ? Index(matrix.data().allocatedSize()) + block_size
- : block_size;
-
- Index tmp_start = tmp.outerIndexPtr()[0];
-
- bool update_trailing_pointers = false;
- if(nnz>free_size)
- {
- // realloc manually to reduce copies
- typename SparseMatrixType::Storage newdata(m_matrix.data().allocatedSize() - block_size + nnz);
-
- internal::smart_copy(m_matrix.valuePtr(), m_matrix.valuePtr() + start, newdata.valuePtr());
- internal::smart_copy(m_matrix.innerIndexPtr(), m_matrix.innerIndexPtr() + start, newdata.indexPtr());
-
- internal::smart_copy(tmp.valuePtr() + tmp_start, tmp.valuePtr() + tmp_start + nnz, newdata.valuePtr() + start);
- internal::smart_copy(tmp.innerIndexPtr() + tmp_start, tmp.innerIndexPtr() + tmp_start + nnz, newdata.indexPtr() + start);
-
- internal::smart_copy(matrix.valuePtr()+end, matrix.valuePtr()+end + tail_size, newdata.valuePtr()+start+nnz);
- internal::smart_copy(matrix.innerIndexPtr()+end, matrix.innerIndexPtr()+end + tail_size, newdata.indexPtr()+start+nnz);
-
- newdata.resize(m_matrix.outerIndexPtr()[m_matrix.outerSize()] - block_size + nnz);
-
- matrix.data().swap(newdata);
-
- update_trailing_pointers = true;
- }
- else
- {
- if(m_matrix.isCompressed())
- {
- // no need to realloc, simply copy the tail at its respective position and insert tmp
- matrix.data().resize(start + nnz + tail_size);
-
- internal::smart_memmove(matrix.valuePtr()+end, matrix.valuePtr() + end+tail_size, matrix.valuePtr() + start+nnz);
- internal::smart_memmove(matrix.innerIndexPtr()+end, matrix.innerIndexPtr() + end+tail_size, matrix.innerIndexPtr() + start+nnz);
-
- update_trailing_pointers = true;
- }
-
- internal::smart_copy(tmp.valuePtr() + tmp_start, tmp.valuePtr() + tmp_start + nnz, matrix.valuePtr() + start);
- internal::smart_copy(tmp.innerIndexPtr() + tmp_start, tmp.innerIndexPtr() + tmp_start + nnz, matrix.innerIndexPtr() + start);
- }
-
- // update outer index pointers and innerNonZeros
- if(IsVectorAtCompileTime)
- {
- if(!m_matrix.isCompressed())
- matrix.innerNonZeroPtr()[m_outerStart] = StorageIndex(nnz);
- matrix.outerIndexPtr()[m_outerStart] = StorageIndex(start);
- }
- else
- {
- StorageIndex p = StorageIndex(start);
- for(Index k=0; k<m_outerSize.value(); ++k)
- {
- StorageIndex nnz_k = internal::convert_index<StorageIndex>(tmp.innerVector(k).nonZeros());
- if(!m_matrix.isCompressed())
- matrix.innerNonZeroPtr()[m_outerStart+k] = nnz_k;
- matrix.outerIndexPtr()[m_outerStart+k] = p;
- p += nnz_k;
- }
- }
-
- if(update_trailing_pointers)
- {
- StorageIndex offset = internal::convert_index<StorageIndex>(nnz - block_size);
- for(Index k = m_outerStart + m_outerSize.value(); k<=matrix.outerSize(); ++k)
- {
- matrix.outerIndexPtr()[k] += offset;
- }
- }
-
- return derived();
- }
-
- inline BlockType& operator=(const BlockType& other)
- {
- return operator=<BlockType>(other);
- }
-
- inline const Scalar* valuePtr() const
- { return m_matrix.valuePtr(); }
- inline Scalar* valuePtr()
- { return m_matrix.valuePtr(); }
-
- inline const StorageIndex* innerIndexPtr() const
- { return m_matrix.innerIndexPtr(); }
- inline StorageIndex* innerIndexPtr()
- { return m_matrix.innerIndexPtr(); }
-
- inline const StorageIndex* outerIndexPtr() const
- { return m_matrix.outerIndexPtr() + m_outerStart; }
- inline StorageIndex* outerIndexPtr()
- { return m_matrix.outerIndexPtr() + m_outerStart; }
-
- inline const StorageIndex* innerNonZeroPtr() const
- { return isCompressed() ? 0 : (m_matrix.innerNonZeroPtr()+m_outerStart); }
- inline StorageIndex* innerNonZeroPtr()
- { return isCompressed() ? 0 : (m_matrix.innerNonZeroPtr()+m_outerStart); }
-
- bool isCompressed() const { return m_matrix.innerNonZeroPtr()==0; }
-
- inline Scalar& coeffRef(Index row, Index col)
- {
- return m_matrix.coeffRef(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
- }
-
- inline const Scalar coeff(Index row, Index col) const
- {
- return m_matrix.coeff(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
- }
-
- inline const Scalar coeff(Index index) const
- {
- return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index : m_outerStart);
- }
-
- const Scalar& lastCoeff() const
- {
- EIGEN_STATIC_ASSERT_VECTOR_ONLY(sparse_matrix_block_impl);
- eigen_assert(Base::nonZeros()>0);
- if(m_matrix.isCompressed())
- return m_matrix.valuePtr()[m_matrix.outerIndexPtr()[m_outerStart+1]-1];
- else
- return m_matrix.valuePtr()[m_matrix.outerIndexPtr()[m_outerStart]+m_matrix.innerNonZeroPtr()[m_outerStart]-1];
- }
-
- EIGEN_STRONG_INLINE Index rows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }
- EIGEN_STRONG_INLINE Index cols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); }
-
- inline const SparseMatrixType& nestedExpression() const { return m_matrix; }
- inline SparseMatrixType& nestedExpression() { return m_matrix; }
- Index startRow() const { return IsRowMajor ? m_outerStart : 0; }
- Index startCol() const { return IsRowMajor ? 0 : m_outerStart; }
- Index blockRows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }
- Index blockCols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); }
-
- protected:
-
- typename internal::ref_selector<SparseMatrixType>::non_const_type m_matrix;
- Index m_outerStart;
- const internal::variable_if_dynamic<Index, OuterSize> m_outerSize;
-
-};
-
-} // namespace internal
-
-template<typename _Scalar, int _Options, typename _StorageIndex, int BlockRows, int BlockCols>
-class BlockImpl<SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true,Sparse>
- : public internal::sparse_matrix_block_impl<SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols>
-{
-public:
- typedef _StorageIndex StorageIndex;
- typedef SparseMatrix<_Scalar, _Options, _StorageIndex> SparseMatrixType;
- typedef internal::sparse_matrix_block_impl<SparseMatrixType,BlockRows,BlockCols> Base;
- inline BlockImpl(SparseMatrixType& xpr, Index i)
- : Base(xpr, i)
- {}
-
- inline BlockImpl(SparseMatrixType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
- : Base(xpr, startRow, startCol, blockRows, blockCols)
- {}
-
- using Base::operator=;
-};
-
-template<typename _Scalar, int _Options, typename _StorageIndex, int BlockRows, int BlockCols>
-class BlockImpl<const SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true,Sparse>
- : public internal::sparse_matrix_block_impl<const SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols>
-{
-public:
- typedef _StorageIndex StorageIndex;
- typedef const SparseMatrix<_Scalar, _Options, _StorageIndex> SparseMatrixType;
- typedef internal::sparse_matrix_block_impl<SparseMatrixType,BlockRows,BlockCols> Base;
- inline BlockImpl(SparseMatrixType& xpr, Index i)
- : Base(xpr, i)
- {}
-
- inline BlockImpl(SparseMatrixType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
- : Base(xpr, startRow, startCol, blockRows, blockCols)
- {}
-
- using Base::operator=;
-private:
- template<typename Derived> BlockImpl(const SparseMatrixBase<Derived>& xpr, Index i);
- template<typename Derived> BlockImpl(const SparseMatrixBase<Derived>& xpr);
-};
-
-//----------
-
-/** \returns the \a outer -th column (resp. row) of the matrix \c *this if \c *this
- * is col-major (resp. row-major).
- */
-template<typename Derived>
-typename SparseMatrixBase<Derived>::InnerVectorReturnType SparseMatrixBase<Derived>::innerVector(Index outer)
-{ return InnerVectorReturnType(derived(), outer); }
-
-/** \returns the \a outer -th column (resp. row) of the matrix \c *this if \c *this
- * is col-major (resp. row-major). Read-only.
- */
-template<typename Derived>
-const typename SparseMatrixBase<Derived>::ConstInnerVectorReturnType SparseMatrixBase<Derived>::innerVector(Index outer) const
-{ return ConstInnerVectorReturnType(derived(), outer); }
-
-/** \returns the \a outer -th column (resp. row) of the matrix \c *this if \c *this
- * is col-major (resp. row-major).
- */
-template<typename Derived>
-typename SparseMatrixBase<Derived>::InnerVectorsReturnType
-SparseMatrixBase<Derived>::innerVectors(Index outerStart, Index outerSize)
-{
- return Block<Derived,Dynamic,Dynamic,true>(derived(),
- IsRowMajor ? outerStart : 0, IsRowMajor ? 0 : outerStart,
- IsRowMajor ? outerSize : rows(), IsRowMajor ? cols() : outerSize);
-
-}
-
-/** \returns the \a outer -th column (resp. row) of the matrix \c *this if \c *this
- * is col-major (resp. row-major). Read-only.
- */
-template<typename Derived>
-const typename SparseMatrixBase<Derived>::ConstInnerVectorsReturnType
-SparseMatrixBase<Derived>::innerVectors(Index outerStart, Index outerSize) const
-{
- return Block<const Derived,Dynamic,Dynamic,true>(derived(),
- IsRowMajor ? outerStart : 0, IsRowMajor ? 0 : outerStart,
- IsRowMajor ? outerSize : rows(), IsRowMajor ? cols() : outerSize);
-
-}
-
-/** Generic implementation of sparse Block expression.
- * Real-only.
- */
-template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
-class BlockImpl<XprType,BlockRows,BlockCols,InnerPanel,Sparse>
- : public SparseMatrixBase<Block<XprType,BlockRows,BlockCols,InnerPanel> >, internal::no_assignment_operator
-{
- typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
- typedef SparseMatrixBase<BlockType> Base;
- using Base::convert_index;
-public:
- enum { IsRowMajor = internal::traits<BlockType>::IsRowMajor };
- EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType)
-
- typedef typename internal::remove_all<typename XprType::Nested>::type _MatrixTypeNested;
-
- /** Column or Row constructor
- */
- inline BlockImpl(XprType& xpr, Index i)
- : m_matrix(xpr),
- m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? convert_index(i) : 0),
- m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? convert_index(i) : 0),
- m_blockRows(BlockRows==1 ? 1 : xpr.rows()),
- m_blockCols(BlockCols==1 ? 1 : xpr.cols())
- {}
-
- /** Dynamic-size constructor
- */
- inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
- : m_matrix(xpr), m_startRow(convert_index(startRow)), m_startCol(convert_index(startCol)), m_blockRows(convert_index(blockRows)), m_blockCols(convert_index(blockCols))
- {}
-
- inline Index rows() const { return m_blockRows.value(); }
- inline Index cols() const { return m_blockCols.value(); }
-
- inline Scalar& coeffRef(Index row, Index col)
- {
- return m_matrix.coeffRef(row + m_startRow.value(), col + m_startCol.value());
- }
-
- inline const Scalar coeff(Index row, Index col) const
- {
- return m_matrix.coeff(row + m_startRow.value(), col + m_startCol.value());
- }
-
- inline Scalar& coeffRef(Index index)
- {
- return m_matrix.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
- m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
- }
-
- inline const Scalar coeff(Index index) const
- {
- return m_matrix.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
- m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
- }
-
- inline const XprType& nestedExpression() const { return m_matrix; }
- inline XprType& nestedExpression() { return m_matrix; }
- Index startRow() const { return m_startRow.value(); }
- Index startCol() const { return m_startCol.value(); }
- Index blockRows() const { return m_blockRows.value(); }
- Index blockCols() const { return m_blockCols.value(); }
-
- protected:
-// friend class internal::GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel>;
- friend struct internal::unary_evaluator<Block<XprType,BlockRows,BlockCols,InnerPanel>, internal::IteratorBased, Scalar >;
-
- Index nonZeros() const { return Dynamic; }
-
- typename internal::ref_selector<XprType>::non_const_type m_matrix;
- const internal::variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
- const internal::variable_if_dynamic<Index, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
- const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_blockRows;
- const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_blockCols;
-
- protected:
- // Disable assignment with clear error message.
- // Note that simply removing operator= yields compilation errors with ICC+MSVC
- template<typename T>
- BlockImpl& operator=(const T&)
- {
- EIGEN_STATIC_ASSERT(sizeof(T)==0, THIS_SPARSE_BLOCK_SUBEXPRESSION_IS_READ_ONLY);
- return *this;
- }
-
-};
-
-namespace internal {
-
-template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
-struct unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBased >
- : public evaluator_base<Block<ArgType,BlockRows,BlockCols,InnerPanel> >
-{
- class InnerVectorInnerIterator;
- class OuterVectorInnerIterator;
- public:
- typedef Block<ArgType,BlockRows,BlockCols,InnerPanel> XprType;
- typedef typename XprType::StorageIndex StorageIndex;
- typedef typename XprType::Scalar Scalar;
-
- enum {
- IsRowMajor = XprType::IsRowMajor,
-
- OuterVector = (BlockCols==1 && ArgType::IsRowMajor)
- | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
- // revert to || as soon as not needed anymore.
- (BlockRows==1 && !ArgType::IsRowMajor),
-
- CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
- Flags = XprType::Flags
- };
-
- typedef typename internal::conditional<OuterVector,OuterVectorInnerIterator,InnerVectorInnerIterator>::type InnerIterator;
-
- explicit unary_evaluator(const XprType& op)
- : m_argImpl(op.nestedExpression()), m_block(op)
- {}
-
- inline Index nonZerosEstimate() const {
- Index nnz = m_block.nonZeros();
- if(nnz<0)
- return m_argImpl.nonZerosEstimate() * m_block.size() / m_block.nestedExpression().size();
- return nnz;
- }
-
- protected:
- typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
-
- evaluator<ArgType> m_argImpl;
- const XprType &m_block;
-};
-
-template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
-class unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBased>::InnerVectorInnerIterator
- : public EvalIterator
-{
- enum { IsRowMajor = unary_evaluator::IsRowMajor };
- const XprType& m_block;
- Index m_end;
-public:
-
- EIGEN_STRONG_INLINE InnerVectorInnerIterator(const unary_evaluator& aEval, Index outer)
- : EvalIterator(aEval.m_argImpl, outer + (IsRowMajor ? aEval.m_block.startRow() : aEval.m_block.startCol())),
- m_block(aEval.m_block),
- m_end(IsRowMajor ? aEval.m_block.startCol()+aEval.m_block.blockCols() : aEval.m_block.startRow()+aEval.m_block.blockRows())
- {
- while( (EvalIterator::operator bool()) && (EvalIterator::index() < (IsRowMajor ? m_block.startCol() : m_block.startRow())) )
- EvalIterator::operator++();
- }
-
- inline StorageIndex index() const { return EvalIterator::index() - convert_index<StorageIndex>(IsRowMajor ? m_block.startCol() : m_block.startRow()); }
- inline Index outer() const { return EvalIterator::outer() - (IsRowMajor ? m_block.startRow() : m_block.startCol()); }
- inline Index row() const { return EvalIterator::row() - m_block.startRow(); }
- inline Index col() const { return EvalIterator::col() - m_block.startCol(); }
-
- inline operator bool() const { return EvalIterator::operator bool() && EvalIterator::index() < m_end; }
-};
-
-template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
-class unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBased>::OuterVectorInnerIterator
-{
- enum { IsRowMajor = unary_evaluator::IsRowMajor };
- const unary_evaluator& m_eval;
- Index m_outerPos;
- Index m_innerIndex;
- Scalar m_value;
- Index m_end;
-public:
-
- EIGEN_STRONG_INLINE OuterVectorInnerIterator(const unary_evaluator& aEval, Index outer)
- : m_eval(aEval),
- m_outerPos( (IsRowMajor ? aEval.m_block.startCol() : aEval.m_block.startRow()) - 1), // -1 so that operator++ finds the first non-zero entry
- m_innerIndex(IsRowMajor ? aEval.m_block.startRow() : aEval.m_block.startCol()),
- m_value(0),
- m_end(IsRowMajor ? aEval.m_block.startCol()+aEval.m_block.blockCols() : aEval.m_block.startRow()+aEval.m_block.blockRows())
- {
- EIGEN_UNUSED_VARIABLE(outer);
- eigen_assert(outer==0);
-
- ++(*this);
- }
-
- inline StorageIndex index() const { return convert_index<StorageIndex>(m_outerPos - (IsRowMajor ? m_eval.m_block.startCol() : m_eval.m_block.startRow())); }
- inline Index outer() const { return 0; }
- inline Index row() const { return IsRowMajor ? 0 : index(); }
- inline Index col() const { return IsRowMajor ? index() : 0; }
-
- inline Scalar value() const { return m_value; }
-
- inline OuterVectorInnerIterator& operator++()
- {
- // search next non-zero entry
- while(++m_outerPos<m_end)
- {
- EvalIterator it(m_eval.m_argImpl, m_outerPos);
- // search for the key m_innerIndex in the current outer-vector
- while(it && it.index() < m_innerIndex) ++it;
- if(it && it.index()==m_innerIndex)
- {
- m_value = it.value();
- break;
- }
- }
- return *this;
- }
-
- inline operator bool() const { return m_outerPos < m_end; }
-};
-
-template<typename _Scalar, int _Options, typename _StorageIndex, int BlockRows, int BlockCols>
-struct unary_evaluator<Block<SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true>, IteratorBased>
- : evaluator<SparseCompressedBase<Block<SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true> > >
-{
- typedef Block<SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true> XprType;
- typedef evaluator<SparseCompressedBase<XprType> > Base;
- explicit unary_evaluator(const XprType &xpr) : Base(xpr) {}
-};
-
-template<typename _Scalar, int _Options, typename _StorageIndex, int BlockRows, int BlockCols>
-struct unary_evaluator<Block<const SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true>, IteratorBased>
- : evaluator<SparseCompressedBase<Block<const SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true> > >
-{
- typedef Block<const SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true> XprType;
- typedef evaluator<SparseCompressedBase<XprType> > Base;
- explicit unary_evaluator(const XprType &xpr) : Base(xpr) {}
-};
-
-} // end namespace internal
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_SPARSE_BLOCK_H
+// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSE_BLOCK_H +#define EIGEN_SPARSE_BLOCK_H + +namespace Eigen { + +// Subset of columns or rows +template<typename XprType, int BlockRows, int BlockCols> +class BlockImpl<XprType,BlockRows,BlockCols,true,Sparse> + : public SparseMatrixBase<Block<XprType,BlockRows,BlockCols,true> > +{ + typedef typename internal::remove_all<typename XprType::Nested>::type _MatrixTypeNested; + typedef Block<XprType, BlockRows, BlockCols, true> BlockType; +public: + enum { IsRowMajor = internal::traits<BlockType>::IsRowMajor }; +protected: + enum { OuterSize = IsRowMajor ? BlockRows : BlockCols }; + typedef SparseMatrixBase<BlockType> Base; + using Base::convert_index; +public: + EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType) + + inline BlockImpl(XprType& xpr, Index i) + : m_matrix(xpr), m_outerStart(convert_index(i)), m_outerSize(OuterSize) + {} + + inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols) + : m_matrix(xpr), m_outerStart(convert_index(IsRowMajor ? startRow : startCol)), m_outerSize(convert_index(IsRowMajor ? blockRows : blockCols)) + {} + + EIGEN_STRONG_INLINE Index rows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); } + EIGEN_STRONG_INLINE Index cols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); } + + Index nonZeros() const + { + typedef internal::evaluator<XprType> EvaluatorType; + EvaluatorType matEval(m_matrix); + Index nnz = 0; + Index end = m_outerStart + m_outerSize.value(); + for(Index j=m_outerStart; j<end; ++j) + for(typename EvaluatorType::InnerIterator it(matEval, j); it; ++it) + ++nnz; + return nnz; + } + + inline const Scalar coeff(Index row, Index col) const + { + return m_matrix.coeff(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart)); + } + + inline const Scalar coeff(Index index) const + { + return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index : m_outerStart); + } + + inline const XprType& nestedExpression() const { return m_matrix; } + inline XprType& nestedExpression() { return m_matrix; } + Index startRow() const { return IsRowMajor ? m_outerStart : 0; } + Index startCol() const { return IsRowMajor ? 0 : m_outerStart; } + Index blockRows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); } + Index blockCols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); } + + protected: + + typename internal::ref_selector<XprType>::non_const_type m_matrix; + Index m_outerStart; + const internal::variable_if_dynamic<Index, OuterSize> m_outerSize; + + protected: + // Disable assignment with clear error message. + // Note that simply removing operator= yields compilation errors with ICC+MSVC + template<typename T> + BlockImpl& operator=(const T&) + { + EIGEN_STATIC_ASSERT(sizeof(T)==0, THIS_SPARSE_BLOCK_SUBEXPRESSION_IS_READ_ONLY); + return *this; + } +}; + + +/*************************************************************************** +* specialization for SparseMatrix +***************************************************************************/ + +namespace internal { + +template<typename SparseMatrixType, int BlockRows, int BlockCols> +class sparse_matrix_block_impl + : public SparseCompressedBase<Block<SparseMatrixType,BlockRows,BlockCols,true> > +{ + typedef typename internal::remove_all<typename SparseMatrixType::Nested>::type _MatrixTypeNested; + typedef Block<SparseMatrixType, BlockRows, BlockCols, true> BlockType; + typedef SparseCompressedBase<Block<SparseMatrixType,BlockRows,BlockCols,true> > Base; + using Base::convert_index; +public: + enum { IsRowMajor = internal::traits<BlockType>::IsRowMajor }; + EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType) +protected: + typedef typename Base::IndexVector IndexVector; + enum { OuterSize = IsRowMajor ? BlockRows : BlockCols }; +public: + + inline sparse_matrix_block_impl(SparseMatrixType& xpr, Index i) + : m_matrix(xpr), m_outerStart(convert_index(i)), m_outerSize(OuterSize) + {} + + inline sparse_matrix_block_impl(SparseMatrixType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols) + : m_matrix(xpr), m_outerStart(convert_index(IsRowMajor ? startRow : startCol)), m_outerSize(convert_index(IsRowMajor ? blockRows : blockCols)) + {} + + template<typename OtherDerived> + inline BlockType& operator=(const SparseMatrixBase<OtherDerived>& other) + { + typedef typename internal::remove_all<typename SparseMatrixType::Nested>::type _NestedMatrixType; + _NestedMatrixType& matrix = m_matrix; + // This assignment is slow if this vector set is not empty + // and/or it is not at the end of the nonzeros of the underlying matrix. + + // 1 - eval to a temporary to avoid transposition and/or aliasing issues + Ref<const SparseMatrix<Scalar, IsRowMajor ? RowMajor : ColMajor, StorageIndex> > tmp(other.derived()); + eigen_internal_assert(tmp.outerSize()==m_outerSize.value()); + + // 2 - let's check whether there is enough allocated memory + Index nnz = tmp.nonZeros(); + Index start = m_outerStart==0 ? 0 : m_matrix.outerIndexPtr()[m_outerStart]; // starting position of the current block + Index end = m_matrix.outerIndexPtr()[m_outerStart+m_outerSize.value()]; // ending position of the current block + Index block_size = end - start; // available room in the current block + Index tail_size = m_matrix.outerIndexPtr()[m_matrix.outerSize()] - end; + + Index free_size = m_matrix.isCompressed() + ? Index(matrix.data().allocatedSize()) + block_size + : block_size; + + Index tmp_start = tmp.outerIndexPtr()[0]; + + bool update_trailing_pointers = false; + if(nnz>free_size) + { + // realloc manually to reduce copies + typename SparseMatrixType::Storage newdata(m_matrix.data().allocatedSize() - block_size + nnz); + + internal::smart_copy(m_matrix.valuePtr(), m_matrix.valuePtr() + start, newdata.valuePtr()); + internal::smart_copy(m_matrix.innerIndexPtr(), m_matrix.innerIndexPtr() + start, newdata.indexPtr()); + + internal::smart_copy(tmp.valuePtr() + tmp_start, tmp.valuePtr() + tmp_start + nnz, newdata.valuePtr() + start); + internal::smart_copy(tmp.innerIndexPtr() + tmp_start, tmp.innerIndexPtr() + tmp_start + nnz, newdata.indexPtr() + start); + + internal::smart_copy(matrix.valuePtr()+end, matrix.valuePtr()+end + tail_size, newdata.valuePtr()+start+nnz); + internal::smart_copy(matrix.innerIndexPtr()+end, matrix.innerIndexPtr()+end + tail_size, newdata.indexPtr()+start+nnz); + + newdata.resize(m_matrix.outerIndexPtr()[m_matrix.outerSize()] - block_size + nnz); + + matrix.data().swap(newdata); + + update_trailing_pointers = true; + } + else + { + if(m_matrix.isCompressed()) + { + // no need to realloc, simply copy the tail at its respective position and insert tmp + matrix.data().resize(start + nnz + tail_size); + + internal::smart_memmove(matrix.valuePtr()+end, matrix.valuePtr() + end+tail_size, matrix.valuePtr() + start+nnz); + internal::smart_memmove(matrix.innerIndexPtr()+end, matrix.innerIndexPtr() + end+tail_size, matrix.innerIndexPtr() + start+nnz); + + update_trailing_pointers = true; + } + + internal::smart_copy(tmp.valuePtr() + tmp_start, tmp.valuePtr() + tmp_start + nnz, matrix.valuePtr() + start); + internal::smart_copy(tmp.innerIndexPtr() + tmp_start, tmp.innerIndexPtr() + tmp_start + nnz, matrix.innerIndexPtr() + start); + } + + // update outer index pointers and innerNonZeros + if(IsVectorAtCompileTime) + { + if(!m_matrix.isCompressed()) + matrix.innerNonZeroPtr()[m_outerStart] = StorageIndex(nnz); + matrix.outerIndexPtr()[m_outerStart] = StorageIndex(start); + } + else + { + StorageIndex p = StorageIndex(start); + for(Index k=0; k<m_outerSize.value(); ++k) + { + StorageIndex nnz_k = internal::convert_index<StorageIndex>(tmp.innerVector(k).nonZeros()); + if(!m_matrix.isCompressed()) + matrix.innerNonZeroPtr()[m_outerStart+k] = nnz_k; + matrix.outerIndexPtr()[m_outerStart+k] = p; + p += nnz_k; + } + } + + if(update_trailing_pointers) + { + StorageIndex offset = internal::convert_index<StorageIndex>(nnz - block_size); + for(Index k = m_outerStart + m_outerSize.value(); k<=matrix.outerSize(); ++k) + { + matrix.outerIndexPtr()[k] += offset; + } + } + + return derived(); + } + + inline BlockType& operator=(const BlockType& other) + { + return operator=<BlockType>(other); + } + + inline const Scalar* valuePtr() const + { return m_matrix.valuePtr(); } + inline Scalar* valuePtr() + { return m_matrix.valuePtr(); } + + inline const StorageIndex* innerIndexPtr() const + { return m_matrix.innerIndexPtr(); } + inline StorageIndex* innerIndexPtr() + { return m_matrix.innerIndexPtr(); } + + inline const StorageIndex* outerIndexPtr() const + { return m_matrix.outerIndexPtr() + m_outerStart; } + inline StorageIndex* outerIndexPtr() + { return m_matrix.outerIndexPtr() + m_outerStart; } + + inline const StorageIndex* innerNonZeroPtr() const + { return isCompressed() ? 0 : (m_matrix.innerNonZeroPtr()+m_outerStart); } + inline StorageIndex* innerNonZeroPtr() + { return isCompressed() ? 0 : (m_matrix.innerNonZeroPtr()+m_outerStart); } + + bool isCompressed() const { return m_matrix.innerNonZeroPtr()==0; } + + inline Scalar& coeffRef(Index row, Index col) + { + return m_matrix.coeffRef(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart)); + } + + inline const Scalar coeff(Index row, Index col) const + { + return m_matrix.coeff(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart)); + } + + inline const Scalar coeff(Index index) const + { + return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index : m_outerStart); + } + + const Scalar& lastCoeff() const + { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(sparse_matrix_block_impl); + eigen_assert(Base::nonZeros()>0); + if(m_matrix.isCompressed()) + return m_matrix.valuePtr()[m_matrix.outerIndexPtr()[m_outerStart+1]-1]; + else + return m_matrix.valuePtr()[m_matrix.outerIndexPtr()[m_outerStart]+m_matrix.innerNonZeroPtr()[m_outerStart]-1]; + } + + EIGEN_STRONG_INLINE Index rows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); } + EIGEN_STRONG_INLINE Index cols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); } + + inline const SparseMatrixType& nestedExpression() const { return m_matrix; } + inline SparseMatrixType& nestedExpression() { return m_matrix; } + Index startRow() const { return IsRowMajor ? m_outerStart : 0; } + Index startCol() const { return IsRowMajor ? 0 : m_outerStart; } + Index blockRows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); } + Index blockCols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); } + + protected: + + typename internal::ref_selector<SparseMatrixType>::non_const_type m_matrix; + Index m_outerStart; + const internal::variable_if_dynamic<Index, OuterSize> m_outerSize; + +}; + +} // namespace internal + +template<typename _Scalar, int _Options, typename _StorageIndex, int BlockRows, int BlockCols> +class BlockImpl<SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true,Sparse> + : public internal::sparse_matrix_block_impl<SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols> +{ +public: + typedef _StorageIndex StorageIndex; + typedef SparseMatrix<_Scalar, _Options, _StorageIndex> SparseMatrixType; + typedef internal::sparse_matrix_block_impl<SparseMatrixType,BlockRows,BlockCols> Base; + inline BlockImpl(SparseMatrixType& xpr, Index i) + : Base(xpr, i) + {} + + inline BlockImpl(SparseMatrixType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols) + : Base(xpr, startRow, startCol, blockRows, blockCols) + {} + + using Base::operator=; +}; + +template<typename _Scalar, int _Options, typename _StorageIndex, int BlockRows, int BlockCols> +class BlockImpl<const SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true,Sparse> + : public internal::sparse_matrix_block_impl<const SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols> +{ +public: + typedef _StorageIndex StorageIndex; + typedef const SparseMatrix<_Scalar, _Options, _StorageIndex> SparseMatrixType; + typedef internal::sparse_matrix_block_impl<SparseMatrixType,BlockRows,BlockCols> Base; + inline BlockImpl(SparseMatrixType& xpr, Index i) + : Base(xpr, i) + {} + + inline BlockImpl(SparseMatrixType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols) + : Base(xpr, startRow, startCol, blockRows, blockCols) + {} + + using Base::operator=; +private: + template<typename Derived> BlockImpl(const SparseMatrixBase<Derived>& xpr, Index i); + template<typename Derived> BlockImpl(const SparseMatrixBase<Derived>& xpr); +}; + +//---------- + +/** \returns the \a outer -th column (resp. row) of the matrix \c *this if \c *this + * is col-major (resp. row-major). + */ +template<typename Derived> +typename SparseMatrixBase<Derived>::InnerVectorReturnType SparseMatrixBase<Derived>::innerVector(Index outer) +{ return InnerVectorReturnType(derived(), outer); } + +/** \returns the \a outer -th column (resp. row) of the matrix \c *this if \c *this + * is col-major (resp. row-major). Read-only. + */ +template<typename Derived> +const typename SparseMatrixBase<Derived>::ConstInnerVectorReturnType SparseMatrixBase<Derived>::innerVector(Index outer) const +{ return ConstInnerVectorReturnType(derived(), outer); } + +/** \returns the \a outer -th column (resp. row) of the matrix \c *this if \c *this + * is col-major (resp. row-major). + */ +template<typename Derived> +typename SparseMatrixBase<Derived>::InnerVectorsReturnType +SparseMatrixBase<Derived>::innerVectors(Index outerStart, Index outerSize) +{ + return Block<Derived,Dynamic,Dynamic,true>(derived(), + IsRowMajor ? outerStart : 0, IsRowMajor ? 0 : outerStart, + IsRowMajor ? outerSize : rows(), IsRowMajor ? cols() : outerSize); + +} + +/** \returns the \a outer -th column (resp. row) of the matrix \c *this if \c *this + * is col-major (resp. row-major). Read-only. + */ +template<typename Derived> +const typename SparseMatrixBase<Derived>::ConstInnerVectorsReturnType +SparseMatrixBase<Derived>::innerVectors(Index outerStart, Index outerSize) const +{ + return Block<const Derived,Dynamic,Dynamic,true>(derived(), + IsRowMajor ? outerStart : 0, IsRowMajor ? 0 : outerStart, + IsRowMajor ? outerSize : rows(), IsRowMajor ? cols() : outerSize); + +} + +/** Generic implementation of sparse Block expression. + * Real-only. + */ +template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> +class BlockImpl<XprType,BlockRows,BlockCols,InnerPanel,Sparse> + : public SparseMatrixBase<Block<XprType,BlockRows,BlockCols,InnerPanel> >, internal::no_assignment_operator +{ + typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType; + typedef SparseMatrixBase<BlockType> Base; + using Base::convert_index; +public: + enum { IsRowMajor = internal::traits<BlockType>::IsRowMajor }; + EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType) + + typedef typename internal::remove_all<typename XprType::Nested>::type _MatrixTypeNested; + + /** Column or Row constructor + */ + inline BlockImpl(XprType& xpr, Index i) + : m_matrix(xpr), + m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? convert_index(i) : 0), + m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? convert_index(i) : 0), + m_blockRows(BlockRows==1 ? 1 : xpr.rows()), + m_blockCols(BlockCols==1 ? 1 : xpr.cols()) + {} + + /** Dynamic-size constructor + */ + inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols) + : m_matrix(xpr), m_startRow(convert_index(startRow)), m_startCol(convert_index(startCol)), m_blockRows(convert_index(blockRows)), m_blockCols(convert_index(blockCols)) + {} + + inline Index rows() const { return m_blockRows.value(); } + inline Index cols() const { return m_blockCols.value(); } + + inline Scalar& coeffRef(Index row, Index col) + { + return m_matrix.coeffRef(row + m_startRow.value(), col + m_startCol.value()); + } + + inline const Scalar coeff(Index row, Index col) const + { + return m_matrix.coeff(row + m_startRow.value(), col + m_startCol.value()); + } + + inline Scalar& coeffRef(Index index) + { + return m_matrix.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); + } + + inline const Scalar coeff(Index index) const + { + return m_matrix.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); + } + + inline const XprType& nestedExpression() const { return m_matrix; } + inline XprType& nestedExpression() { return m_matrix; } + Index startRow() const { return m_startRow.value(); } + Index startCol() const { return m_startCol.value(); } + Index blockRows() const { return m_blockRows.value(); } + Index blockCols() const { return m_blockCols.value(); } + + protected: +// friend class internal::GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel>; + friend struct internal::unary_evaluator<Block<XprType,BlockRows,BlockCols,InnerPanel>, internal::IteratorBased, Scalar >; + + Index nonZeros() const { return Dynamic; } + + typename internal::ref_selector<XprType>::non_const_type m_matrix; + const internal::variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow; + const internal::variable_if_dynamic<Index, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol; + const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_blockRows; + const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_blockCols; + + protected: + // Disable assignment with clear error message. + // Note that simply removing operator= yields compilation errors with ICC+MSVC + template<typename T> + BlockImpl& operator=(const T&) + { + EIGEN_STATIC_ASSERT(sizeof(T)==0, THIS_SPARSE_BLOCK_SUBEXPRESSION_IS_READ_ONLY); + return *this; + } + +}; + +namespace internal { + +template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel> +struct unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBased > + : public evaluator_base<Block<ArgType,BlockRows,BlockCols,InnerPanel> > +{ + class InnerVectorInnerIterator; + class OuterVectorInnerIterator; + public: + typedef Block<ArgType,BlockRows,BlockCols,InnerPanel> XprType; + typedef typename XprType::StorageIndex StorageIndex; + typedef typename XprType::Scalar Scalar; + + enum { + IsRowMajor = XprType::IsRowMajor, + + OuterVector = (BlockCols==1 && ArgType::IsRowMajor) + | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&". + // revert to || as soon as not needed anymore. + (BlockRows==1 && !ArgType::IsRowMajor), + + CoeffReadCost = evaluator<ArgType>::CoeffReadCost, + Flags = XprType::Flags + }; + + typedef typename internal::conditional<OuterVector,OuterVectorInnerIterator,InnerVectorInnerIterator>::type InnerIterator; + + explicit unary_evaluator(const XprType& op) + : m_argImpl(op.nestedExpression()), m_block(op) + {} + + inline Index nonZerosEstimate() const { + Index nnz = m_block.nonZeros(); + if(nnz<0) + return m_argImpl.nonZerosEstimate() * m_block.size() / m_block.nestedExpression().size(); + return nnz; + } + + protected: + typedef typename evaluator<ArgType>::InnerIterator EvalIterator; + + evaluator<ArgType> m_argImpl; + const XprType &m_block; +}; + +template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel> +class unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBased>::InnerVectorInnerIterator + : public EvalIterator +{ + enum { IsRowMajor = unary_evaluator::IsRowMajor }; + const XprType& m_block; + Index m_end; +public: + + EIGEN_STRONG_INLINE InnerVectorInnerIterator(const unary_evaluator& aEval, Index outer) + : EvalIterator(aEval.m_argImpl, outer + (IsRowMajor ? aEval.m_block.startRow() : aEval.m_block.startCol())), + m_block(aEval.m_block), + m_end(IsRowMajor ? aEval.m_block.startCol()+aEval.m_block.blockCols() : aEval.m_block.startRow()+aEval.m_block.blockRows()) + { + while( (EvalIterator::operator bool()) && (EvalIterator::index() < (IsRowMajor ? m_block.startCol() : m_block.startRow())) ) + EvalIterator::operator++(); + } + + inline StorageIndex index() const { return EvalIterator::index() - convert_index<StorageIndex>(IsRowMajor ? m_block.startCol() : m_block.startRow()); } + inline Index outer() const { return EvalIterator::outer() - (IsRowMajor ? m_block.startRow() : m_block.startCol()); } + inline Index row() const { return EvalIterator::row() - m_block.startRow(); } + inline Index col() const { return EvalIterator::col() - m_block.startCol(); } + + inline operator bool() const { return EvalIterator::operator bool() && EvalIterator::index() < m_end; } +}; + +template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel> +class unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBased>::OuterVectorInnerIterator +{ + enum { IsRowMajor = unary_evaluator::IsRowMajor }; + const unary_evaluator& m_eval; + Index m_outerPos; + Index m_innerIndex; + Scalar m_value; + Index m_end; +public: + + EIGEN_STRONG_INLINE OuterVectorInnerIterator(const unary_evaluator& aEval, Index outer) + : m_eval(aEval), + m_outerPos( (IsRowMajor ? aEval.m_block.startCol() : aEval.m_block.startRow()) - 1), // -1 so that operator++ finds the first non-zero entry + m_innerIndex(IsRowMajor ? aEval.m_block.startRow() : aEval.m_block.startCol()), + m_value(0), + m_end(IsRowMajor ? aEval.m_block.startCol()+aEval.m_block.blockCols() : aEval.m_block.startRow()+aEval.m_block.blockRows()) + { + EIGEN_UNUSED_VARIABLE(outer); + eigen_assert(outer==0); + + ++(*this); + } + + inline StorageIndex index() const { return convert_index<StorageIndex>(m_outerPos - (IsRowMajor ? m_eval.m_block.startCol() : m_eval.m_block.startRow())); } + inline Index outer() const { return 0; } + inline Index row() const { return IsRowMajor ? 0 : index(); } + inline Index col() const { return IsRowMajor ? index() : 0; } + + inline Scalar value() const { return m_value; } + + inline OuterVectorInnerIterator& operator++() + { + // search next non-zero entry + while(++m_outerPos<m_end) + { + EvalIterator it(m_eval.m_argImpl, m_outerPos); + // search for the key m_innerIndex in the current outer-vector + while(it && it.index() < m_innerIndex) ++it; + if(it && it.index()==m_innerIndex) + { + m_value = it.value(); + break; + } + } + return *this; + } + + inline operator bool() const { return m_outerPos < m_end; } +}; + +template<typename _Scalar, int _Options, typename _StorageIndex, int BlockRows, int BlockCols> +struct unary_evaluator<Block<SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true>, IteratorBased> + : evaluator<SparseCompressedBase<Block<SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true> > > +{ + typedef Block<SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true> XprType; + typedef evaluator<SparseCompressedBase<XprType> > Base; + explicit unary_evaluator(const XprType &xpr) : Base(xpr) {} +}; + +template<typename _Scalar, int _Options, typename _StorageIndex, int BlockRows, int BlockCols> +struct unary_evaluator<Block<const SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true>, IteratorBased> + : evaluator<SparseCompressedBase<Block<const SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true> > > +{ + typedef Block<const SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true> XprType; + typedef evaluator<SparseCompressedBase<XprType> > Base; + explicit unary_evaluator(const XprType &xpr) : Base(xpr) {} +}; + +} // end namespace internal + + +} // end namespace Eigen + +#endif // EIGEN_SPARSE_BLOCK_H @@ -1,3 +1,3 @@ -**Eigen is a C++ template library for linear algebra: matrices, vectors, numerical solvers, and related algorithms.**
-
-For more information go to http://eigen.tuxfamily.org/.
+**Eigen is a C++ template library for linear algebra: matrices, vectors, numerical solvers, and related algorithms.** + +For more information go to http://eigen.tuxfamily.org/. diff --git a/bench/btl/actions/basic_actions.hh b/bench/btl/actions/basic_actions.hh index a3333ea26..62442f01f 100644 --- a/bench/btl/actions/basic_actions.hh +++ b/bench/btl/actions/basic_actions.hh @@ -6,7 +6,7 @@ #include "action_atv_product.hh" #include "action_matrix_matrix_product.hh" -// #include "action_ata_product.hh" +#include "action_ata_product.hh" #include "action_aat_product.hh" #include "action_trisolve.hh" diff --git a/bench/btl/libs/BLAS/blas_interface_impl.hh b/bench/btl/libs/BLAS/blas_interface_impl.hh index fc4ba2a1f..9e0a64905 100644 --- a/bench/btl/libs/BLAS/blas_interface_impl.hh +++ b/bench/btl/libs/BLAS/blas_interface_impl.hh @@ -46,9 +46,9 @@ public : BLAS_FUNC(gemm)(¬rans,¬rans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N); } -// static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){ -// ssyrk_(&lower,&trans,&N,&N,&fone,A,&N,&fzero,X,&N); -// } + static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){ + BLAS_FUNC(syrk)(&lower,&trans,&N,&N,&fone,A,&N,&fzero,X,&N); + } static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){ BLAS_FUNC(syrk)(&lower,¬rans,&N,&N,&fone,A,&N,&fzero,X,&N); diff --git a/bench/btl/libs/BLAS/main.cpp b/bench/btl/libs/BLAS/main.cpp index 564d55ef2..fd991490a 100644 --- a/bench/btl/libs/BLAS/main.cpp +++ b/bench/btl/libs/BLAS/main.cpp @@ -48,7 +48,7 @@ int main() bench<Action_rot<blas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT); bench<Action_matrix_matrix_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); -// bench<Action_ata_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + bench<Action_ata_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_aat_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_trisolve<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); diff --git a/bench/btl/libs/STL/STL_interface.hh b/bench/btl/libs/STL/STL_interface.hh index ef4cc9233..16658c4ba 100644 --- a/bench/btl/libs/STL/STL_interface.hh +++ b/bench/btl/libs/STL/STL_interface.hh @@ -78,18 +78,18 @@ public : cible[i][j]=source[i][j]; } -// static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N) -// { -// real somme; -// for (int j=0;j<N;j++){ -// for (int i=0;i<N;i++){ -// somme=0.0; -// for (int k=0;k<N;k++) -// somme += A[i][k]*A[j][k]; -// X[j][i]=somme; -// } -// } -// } + static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N) + { + real somme; + for (int j=0;j<N;j++){ + for (int i=0;i<N;i++){ + somme=0.0; + for (int k=0;k<N;k++) + somme += A[i][k]*A[j][k]; + X[j][i]=somme; + } + } + } static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N) { diff --git a/bench/btl/libs/blaze/blaze_interface.hh b/bench/btl/libs/blaze/blaze_interface.hh index ee1523944..3635339ef 100644 --- a/bench/btl/libs/blaze/blaze_interface.hh +++ b/bench/btl/libs/blaze/blaze_interface.hh @@ -80,35 +80,35 @@ public : } } - static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){ + static EIGEN_DONT_INLINE void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){ X = (A*B); } - static inline void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){ + static EIGEN_DONT_INLINE void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){ X = (trans(A)*trans(B)); } - static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N){ + static EIGEN_DONT_INLINE void ata_product(const gene_matrix & A, gene_matrix & X, int N){ X = (trans(A)*A); } - static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N){ + static EIGEN_DONT_INLINE void aat_product(const gene_matrix & A, gene_matrix & X, int N){ X = (A*trans(A)); } - static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ + static EIGEN_DONT_INLINE void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ X = (A*B); } - static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ + static EIGEN_DONT_INLINE void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ X = (trans(A)*B); } - static inline void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){ + static EIGEN_DONT_INLINE void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){ Y += coef * X; } - static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){ + static EIGEN_DONT_INLINE void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){ Y = a*X + b*Y; } diff --git a/bench/btl/libs/blaze/main.cpp b/bench/btl/libs/blaze/main.cpp index 80e8f4eaa..ccae0cbd5 100644 --- a/bench/btl/libs/blaze/main.cpp +++ b/bench/btl/libs/blaze/main.cpp @@ -30,9 +30,9 @@ int main() bench<Action_matrix_vector_product<blaze_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_atv_product<blaze_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); -// bench<Action_matrix_matrix_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); -// bench<Action_ata_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); -// bench<Action_aat_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + bench<Action_matrix_matrix_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + bench<Action_ata_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + bench<Action_aat_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); return 0; } diff --git a/bench/btl/libs/eigen3/eigen3_interface.hh b/bench/btl/libs/eigen3/eigen3_interface.hh index b821fd721..2e302d072 100644 --- a/bench/btl/libs/eigen3/eigen3_interface.hh +++ b/bench/btl/libs/eigen3/eigen3_interface.hh @@ -92,9 +92,11 @@ public : X.noalias() = A.transpose()*B.transpose(); } -// static inline void ata_product(const gene_matrix & A, gene_matrix & X, int /*N*/){ -// X.noalias() = A.transpose()*A; -// } + static inline void ata_product(const gene_matrix & A, gene_matrix & X, int /*N*/){ + //X.noalias() = A.transpose()*A; + X.template triangularView<Lower>().setZero(); + X.template selfadjointView<Lower>().rankUpdate(A.transpose()); + } static inline void aat_product(const gene_matrix & A, gene_matrix & X, int /*N*/){ X.template triangularView<Lower>().setZero(); diff --git a/bench/btl/libs/eigen3/main_matmat.cpp b/bench/btl/libs/eigen3/main_matmat.cpp index 926fa2b01..052810a16 100644 --- a/bench/btl/libs/eigen3/main_matmat.cpp +++ b/bench/btl/libs/eigen3/main_matmat.cpp @@ -25,7 +25,7 @@ BTL_MAIN; int main() { bench<Action_matrix_matrix_product<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); -// bench<Action_ata_product<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + bench<Action_ata_product<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_aat_product<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_trmm<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); diff --git a/bench/perf_monitoring/gemm/changesets.txt b/bench/perf_monitoring/gemm/changesets.txt index af8eb9b8f..4bff3cc4a 100644 --- a/bench/perf_monitoring/gemm/changesets.txt +++ b/bench/perf_monitoring/gemm/changesets.txt @@ -59,3 +59,6 @@ before-evaluators 9174:d228bc282ac9 # merge 9212:c90098affa7b # Fix performance regression introduced in changeset 8aad8f35c955 9213:9f1c14e4694b # Fix performance regression in dgemm introduced by changeset 81d53c711775 +3.3-beta2 +3.3-rc1 +3.3.0 diff --git a/bench/perf_monitoring/gemm/gemv.cpp b/bench/perf_monitoring/gemm/gemv.cpp new file mode 100644 index 000000000..b7441a357 --- /dev/null +++ b/bench/perf_monitoring/gemm/gemv.cpp @@ -0,0 +1,68 @@ +#include <iostream> +#include <fstream> +#include <vector> +#include <Eigen/Core> +#include "../../BenchTimer.h" +using namespace Eigen; + +#ifndef SCALAR +#error SCALAR must be defined +#endif + +typedef SCALAR Scalar; + +typedef Matrix<Scalar,Dynamic,Dynamic> Mat; +typedef Matrix<Scalar,Dynamic,1> Vec; + +EIGEN_DONT_INLINE +void gemv(const Mat &A, const Vec &B, Vec &C) +{ + C.noalias() += A * B; +} + +EIGEN_DONT_INLINE +double bench(long m, long n) +{ + Mat A(m,n); + Vec B(n); + Vec C(m); + A.setRandom(); + B.setRandom(); + C.setZero(); + + BenchTimer t; + + double up = 1e9*4/sizeof(Scalar); + double tm0 = 4, tm1 = 10; + if(NumTraits<Scalar>::IsComplex) + { + up /= 4; + tm0 = 2; + tm1 = 4; + } + + double flops = 2. * m * n; + long rep = std::max(1., std::min(100., up/flops) ); + long tries = std::max(tm0, std::min(tm1, up/flops) ); + + BENCH(t, tries, rep, gemv(A,B,C)); + + return 1e-9 * rep * flops / t.best(); +} + +int main(int argc, char **argv) +{ + std::vector<double> results; + + std::ifstream settings("gemv_settings.txt"); + long m, n; + while(settings >> m >> n) + { + //std::cerr << " Testing " << m << " " << n << " " << k << std::endl; + results.push_back( bench(m, n) ); + } + + std::cout << RowVectorXd::Map(results.data(), results.size()); + + return 0; +} diff --git a/bench/perf_monitoring/gemm/gemv_settings.txt b/bench/perf_monitoring/gemm/gemv_settings.txt new file mode 100644 index 000000000..21a5ee051 --- /dev/null +++ b/bench/perf_monitoring/gemm/gemv_settings.txt @@ -0,0 +1,11 @@ +8 8 +9 9 +24 24 +239 239 +240 240 +2400 24 +24 2400 +24 240 +2400 2400 +4800 23 +23 4800 |