diff options
author | Benoit Jacob <jacob.benoit.1@gmail.com> | 2007-09-29 08:28:36 +0000 |
---|---|---|
committer | Benoit Jacob <jacob.benoit.1@gmail.com> | 2007-09-29 08:28:36 +0000 |
commit | ee63e15e2cad25d8acd0ac6ba6dc9d7a84f74f6c (patch) | |
tree | 8a7ee0fbbf3a2033a0b1a9073f9d35f862e13f28 | |
parent | 51e29ae4bd9d725c3a68fe94766c73ba8c717688 (diff) |
make matrix multiplication do immediate evaluation; add lazyMul() for the old behaviour
some reorganization, especially in MatrixStorage
start playing with loop unrolling, always_inline, and __restrict__
-rw-r--r-- | CMakeLists.txt | 4 | ||||
-rw-r--r-- | doc/example.cpp | 2 | ||||
-rw-r--r-- | doc/tutorial.cpp | 18 | ||||
-rw-r--r-- | src/internal/Eval.h | 2 | ||||
-rw-r--r-- | src/internal/Matrix.h | 8 | ||||
-rw-r--r-- | src/internal/MatrixOps.h | 33 | ||||
-rw-r--r-- | src/internal/MatrixStorage.h | 59 | ||||
-rw-r--r-- | src/internal/Object.h | 23 | ||||
-rw-r--r-- | src/internal/Util.h | 8 | ||||
-rw-r--r-- | test/matrixops.cpp | 6 |
10 files changed, 113 insertions, 50 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 4bfe11c6d..180453180 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,8 +7,8 @@ set(CMAKE_INCLUDE_CURRENT_DIR ON) if(CMAKE_COMPILER_IS_GNUCXX) if (CMAKE_SYSTEM_NAME MATCHES Linux) - set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-long-long -ansi -Wundef -Wcast-align -Werror-implicit-function-declaration -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -Wmissing-format-attribute -fno-common") - set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fno-exceptions -fno-check-new -fno-common") + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-long-long -ansi -Wundef -Wcast-align -Werror-implicit-function-declaration -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -Wmissing-format-attribute -fno-common -fstrict-aliasing") + set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fno-exceptions -fno-check-new -fno-common -fstrict-aliasing") endif (CMAKE_SYSTEM_NAME MATCHES Linux) endif (CMAKE_COMPILER_IS_GNUCXX) diff --git a/doc/example.cpp b/doc/example.cpp index 89f7820b1..25321f4f8 100644 --- a/doc/example.cpp +++ b/doc/example.cpp @@ -12,7 +12,7 @@ template<typename Scalar, typename Derived> EiScalarProduct<Derived> twice(const EiObject<Scalar, Derived>& m) { - return static_cast<Scalar>(2) * m; + return 2 * m; } int main(int, char**) diff --git a/doc/tutorial.cpp b/doc/tutorial.cpp index 1da3deb1d..03495dbcb 100644 --- a/doc/tutorial.cpp +++ b/doc/tutorial.cpp @@ -29,23 +29,5 @@ int main(int, char **) cout << "Row 0 of m2, written as a column vector, is:" << endl << m2.row(0) << endl; cout << "Column 1 of m2 is:" << endl << m2.col(1) << endl; cout << "The matrix m2 with row 0 and column 1 removed is:" << endl << m2.minor(0,1) << endl; - - cout << endl << "Now let us study a tricky issue." << endl; - cout << "Recall that the matrix product m*m is:" << endl << m*m << endl; - cout << "We want to store that into m, i.e. do \"m = m * m;\"" << endl; - cout << "Here we must be very careful. For if we do \"m = m * m;\"," << endl - << "the matrix m becomes" << endl; - EiMatrix<double,2,2> m_save = m; - m = m * m; // the bogus operation - cout << m << "," << endl; - cout << "which is not what was wanted!" << endl - << "Explanation: because of the way expression templates work, the matrix m gets" << endl - << "overwritten _while_ the matrix product m * m is being computed." << endl - << "This is the counterpart of eliminating temporary objects!" << endl - << "Anyway, if you want to store m * m into m, you can do this:" << endl - << " m = (m * m).eval();" << endl; - m = m_save; - m = (m * m).eval(); - cout << "And m is now:" << endl << m << endl << "as was expected." << endl; return 0; } diff --git a/src/internal/Eval.h b/src/internal/Eval.h index 2d792d822..4de32ebac 100644 --- a/src/internal/Eval.h +++ b/src/internal/Eval.h @@ -33,7 +33,7 @@ template<typename Expression> class EiEval { public: typedef typename Expression::Scalar Scalar; - typedef EiMatrix< Scalar, Expression::RowsAtCompileTime, Expression::ColsAtCompileTime> MatrixType; + typedef EiMatrix<Scalar, Expression::RowsAtCompileTime, Expression::ColsAtCompileTime> MatrixType; typedef Expression Base; friend class EiObject<Scalar, Expression>; diff --git a/src/internal/Matrix.h b/src/internal/Matrix.h index 738cc9f6b..faed649c2 100644 --- a/src/internal/Matrix.h +++ b/src/internal/Matrix.h @@ -44,22 +44,22 @@ class EiMatrix : public EiObject<_Scalar, EiMatrix<_Scalar, _Rows, _Cols> >, static const int RowsAtCompileTime = _Rows, ColsAtCompileTime = _Cols; - const Scalar* array() const + const Scalar* EI_RESTRICT array() const { return Storage::m_array; } - Scalar* array() + Scalar* EI_RESTRICT array() { return Storage::m_array; } private: Ref _ref() const { return Ref(*const_cast<EiMatrix*>(this)); } - const Scalar& _read(int row, int col = 0) const + const Scalar& EI_RESTRICT _read(int row, int col = 0) const { EI_CHECK_RANGES(*this, row, col); return array()[row + col * Storage::_rows()]; } - Scalar& _write(int row, int col = 0) + Scalar& EI_RESTRICT _write(int row, int col = 0) { EI_CHECK_RANGES(*this, row, col); return array()[row + col * Storage::_rows()]; diff --git a/src/internal/MatrixOps.h b/src/internal/MatrixOps.h index f5120277b..2798dd9c1 100644 --- a/src/internal/MatrixOps.h +++ b/src/internal/MatrixOps.h @@ -39,7 +39,7 @@ template<typename Lhs, typename Rhs> class EiSum static const int RowsAtCompileTime = Lhs::RowsAtCompileTime, ColsAtCompileTime = Rhs::ColsAtCompileTime; - EiSum(const LhsRef& lhs, const RhsRef& rhs) + EiSum(const LhsRef& EI_RESTRICT lhs, const RhsRef& EI_RESTRICT rhs) : m_lhs(lhs), m_rhs(rhs) { assert(lhs.rows() == rhs.rows() && lhs.cols() == rhs.cols()); @@ -79,7 +79,7 @@ template<typename Lhs, typename Rhs> class EiDifference static const int RowsAtCompileTime = Lhs::RowsAtCompileTime, ColsAtCompileTime = Rhs::ColsAtCompileTime; - EiDifference(const LhsRef& lhs, const RhsRef& rhs) + EiDifference(const LhsRef& EI_RESTRICT lhs, const RhsRef& EI_RESTRICT rhs) : m_lhs(lhs), m_rhs(rhs) { assert(lhs.rows() == rhs.rows() && lhs.cols() == rhs.cols()); @@ -118,7 +118,7 @@ template<typename Lhs, typename Rhs> class EiMatrixProduct static const int RowsAtCompileTime = Lhs::RowsAtCompileTime, ColsAtCompileTime = Rhs::ColsAtCompileTime; - EiMatrixProduct(const LhsRef& lhs, const RhsRef& rhs) + EiMatrixProduct(const LhsRef& EI_RESTRICT lhs, const RhsRef& EI_RESTRICT rhs) : m_lhs(lhs), m_rhs(rhs) { assert(lhs.cols() == rhs.rows()); @@ -136,10 +136,17 @@ template<typename Lhs, typename Rhs> class EiMatrixProduct Scalar _read(int row, int col) const { - Scalar x = static_cast<Scalar>(0); - for(int i = 0; i < m_lhs.cols(); i++) - x += m_lhs.read(row, i) * m_rhs.read(i, col); - return x; + if(Lhs::ColsAtCompileTime == 3) + { + return m_lhs(row,0) * m_rhs(0,col) + m_lhs(row,1) * m_rhs(1,col) + m_lhs(row,2) * m_rhs(2,col); + } + else + { + Scalar x = static_cast<Scalar>(0); + for(int i = 0; i < m_lhs.cols(); i++) + x += m_lhs.read(row, i) * m_rhs.read(i, col); + return x; + } } protected: @@ -161,11 +168,19 @@ operator-(const EiObject<Scalar, Derived1> &mat1, const EiObject<Scalar, Derived return EiDifference<Derived1, Derived2>(mat1.ref(), mat2.ref()); } +template<typename Scalar, typename Derived> +template<typename OtherDerived> +EiMatrixProduct<Derived, OtherDerived> +EiObject<Scalar, Derived>::lazyMul(const EiObject<Scalar, OtherDerived> &other) const +{ + return EiMatrixProduct<Derived, OtherDerived>(ref(), other.ref()); +} + template<typename Scalar, typename Derived1, typename Derived2> -EiMatrixProduct<Derived1, Derived2> +EiEval<EiMatrixProduct<Derived1, Derived2> > operator*(const EiObject<Scalar, Derived1> &mat1, const EiObject<Scalar, Derived2> &mat2) { - return EiMatrixProduct<Derived1, Derived2>(mat1.ref(), mat2.ref()); + return mat1.lazyMul(mat2).eval(); } template<typename Scalar, typename Derived> diff --git a/src/internal/MatrixStorage.h b/src/internal/MatrixStorage.h index 822903b14..609bf7c29 100644 --- a/src/internal/MatrixStorage.h +++ b/src/internal/MatrixStorage.h @@ -32,7 +32,7 @@ template<typename Scalar, class EiMatrixStorage { protected: - Scalar m_array[RowsAtCompileTime * RowsAtCompileTime]; + Scalar EI_RESTRICT m_array[RowsAtCompileTime * RowsAtCompileTime]; void resize(int rows, int cols) { assert(rows == RowsAtCompileTime && cols == ColsAtCompileTime); } @@ -54,18 +54,20 @@ class EiMatrixStorage ~EiMatrixStorage() {}; }; -template<typename Scalar> -class EiMatrixStorage<Scalar, EiDynamic, 1> +template<typename Scalar, int ColsAtCompileTime> +class EiMatrixStorage<Scalar, EiDynamic, ColsAtCompileTime> { protected: int m_rows; - Scalar *m_array; + Scalar* EI_RESTRICT m_array; void resize(int rows, int cols) - { assert(rows > 0 && cols == 1); - if(rows > m_rows) { + { + assert(rows > 0 && cols == ColsAtCompileTime); + if(rows > m_rows) + { delete[] m_array; - m_array = new Scalar[rows]; + m_array = new Scalar[rows * ColsAtCompileTime]; } m_rows = rows; } @@ -74,13 +76,48 @@ class EiMatrixStorage<Scalar, EiDynamic, 1> { return m_rows; } int _cols() const - { return 1; } + { return ColsAtCompileTime; } public: EiMatrixStorage(int rows, int cols) : m_rows(rows) { - assert(m_rows > 0 && cols == 1); - m_array = new Scalar[m_rows]; + assert(m_rows > 0 && cols == ColsAtCompileTime); + m_array = new Scalar[m_rows * ColsAtCompileTime]; + } + + ~EiMatrixStorage() + { delete[] m_array; } +}; + +template<typename Scalar, int RowsAtCompileTime> +class EiMatrixStorage<Scalar, RowsAtCompileTime, EiDynamic> +{ + protected: + int m_cols; + Scalar* EI_RESTRICT m_array; + + void resize(int rows, int cols) + { + assert(rows == RowsAtCompileTime && cols > 0); + if(cols > m_cols) + { + delete[] m_array; + m_array = new Scalar[cols * RowsAtCompileTime]; + } + m_cols = cols; + } + + int _rows() const + { return RowsAtCompileTime; } + + int _cols() const + { return m_cols; } + + public: + EiMatrixStorage(int rows, int cols) : m_cols(cols) + { + assert(rows == RowsAtCompileTime && cols > 0); + m_array = new Scalar[m_cols * RowsAtCompileTime]; } ~EiMatrixStorage() @@ -92,7 +129,7 @@ class EiMatrixStorage<Scalar, EiDynamic, EiDynamic> { protected: int m_rows, m_cols; - Scalar *m_array; + Scalar* EI_RESTRICT m_array; void resize(int rows, int cols) { diff --git a/src/internal/Object.h b/src/internal/Object.h index fefa15897..7ec59da8c 100644 --- a/src/internal/Object.h +++ b/src/internal/Object.h @@ -36,6 +36,19 @@ template<typename Scalar, typename Derived> class EiObject template<typename OtherDerived> void _copy_helper(const EiObject<Scalar, OtherDerived>& other) { + if(RowsAtCompileTime == 3 && ColsAtCompileTime == 3) + { + write(0,0) = other.read(0,0); + write(1,0) = other.read(1,0); + write(2,0) = other.read(2,0); + write(0,1) = other.read(0,1); + write(1,1) = other.read(1,1); + write(2,1) = other.read(2,1); + write(0,2) = other.read(0,2); + write(1,2) = other.read(1,2); + write(2,2) = other.read(2,2); + } + else for(int i = 0; i < rows(); i++) for(int j = 0; j < cols(); j++) write(i, j) = other.read(i, j); @@ -54,7 +67,7 @@ template<typename Scalar, typename Derived> class EiObject Ref ref() const { return static_cast<const Derived *>(this)->_ref(); } - Scalar& write(int row, int col) + Scalar& EI_RESTRICT write(int row, int col) { return static_cast<Derived *>(this)->_write(row, col); } @@ -87,6 +100,10 @@ template<typename Scalar, typename Derived> class EiObject EiBlock<Derived> block(int startRow, int endRow, int startCol= 0, int endCol = 0); template<typename OtherDerived> + EiMatrixProduct<Derived, OtherDerived> + lazyMul(const EiObject<Scalar, OtherDerived>& other) const EI_ALWAYS_INLINE; + + template<typename OtherDerived> Derived& operator+=(const EiObject<Scalar, OtherDerived>& other); template<typename OtherDerived> Derived& operator-=(const EiObject<Scalar, OtherDerived>& other); @@ -110,10 +127,10 @@ template<typename Scalar, typename Derived> class EiObject Scalar operator()(int row, int col = 0) const { return read(row, col); } - Scalar& operator()(int row, int col = 0) + Scalar& EI_RESTRICT operator()(int row, int col = 0) { return write(row, col); } - EiEval<Derived> eval() const; + EiEval<Derived> eval() const EI_ALWAYS_INLINE; }; template<typename Scalar, typename Derived> diff --git a/src/internal/Util.h b/src/internal/Util.h index c7730d6fb..03df6b097 100644 --- a/src/internal/Util.h +++ b/src/internal/Util.h @@ -68,6 +68,14 @@ const int EiDynamic = -1; #define EI_UNUSED(x) (void)x +#ifdef __GNUC__ +# define EI_ALWAYS_INLINE __attribute__((always_inline)) +# define EI_RESTRICT __restrict__ +#else +# define EI_ALWAYS_INLINE +# define EI_RESTRICT +#endif + #define EI_INHERIT_ASSIGNMENT_OPERATOR(Derived, Op) \ template<typename OtherScalar, typename OtherDerived> \ Derived& operator Op(const EiObject<OtherScalar, OtherDerived>& other) \ diff --git a/test/matrixops.cpp b/test/matrixops.cpp index f8f787819..9df827a2d 100644 --- a/test/matrixops.cpp +++ b/test/matrixops.cpp @@ -49,7 +49,11 @@ template<typename MatrixType1, a -= b + b; a *= s; b /= s; - if(rows1 == cols1) a *= b; + if(rows1 == cols1) + { + a *= b; + a.lazyMul(b); + } MatrixType1 d(rows1, cols1); MatrixType2 e(rows2, cols2); |