aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2007-09-29 08:28:36 +0000
committerGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2007-09-29 08:28:36 +0000
commitee63e15e2cad25d8acd0ac6ba6dc9d7a84f74f6c (patch)
tree8a7ee0fbbf3a2033a0b1a9073f9d35f862e13f28
parent51e29ae4bd9d725c3a68fe94766c73ba8c717688 (diff)
make matrix multiplication do immediate evaluation; add lazyMul() for the old behaviour
some reorganization, especially in MatrixStorage start playing with loop unrolling, always_inline, and __restrict__
-rw-r--r--CMakeLists.txt4
-rw-r--r--doc/example.cpp2
-rw-r--r--doc/tutorial.cpp18
-rw-r--r--src/internal/Eval.h2
-rw-r--r--src/internal/Matrix.h8
-rw-r--r--src/internal/MatrixOps.h33
-rw-r--r--src/internal/MatrixStorage.h59
-rw-r--r--src/internal/Object.h23
-rw-r--r--src/internal/Util.h8
-rw-r--r--test/matrixops.cpp6
10 files changed, 113 insertions, 50 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4bfe11c6d..180453180 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,8 +7,8 @@ set(CMAKE_INCLUDE_CURRENT_DIR ON)
if(CMAKE_COMPILER_IS_GNUCXX)
if (CMAKE_SYSTEM_NAME MATCHES Linux)
- set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-long-long -ansi -Wundef -Wcast-align -Werror-implicit-function-declaration -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -Wmissing-format-attribute -fno-common")
- set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fno-exceptions -fno-check-new -fno-common")
+ set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-long-long -ansi -Wundef -Wcast-align -Werror-implicit-function-declaration -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -Wmissing-format-attribute -fno-common -fstrict-aliasing")
+ set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fno-exceptions -fno-check-new -fno-common -fstrict-aliasing")
endif (CMAKE_SYSTEM_NAME MATCHES Linux)
endif (CMAKE_COMPILER_IS_GNUCXX)
diff --git a/doc/example.cpp b/doc/example.cpp
index 89f7820b1..25321f4f8 100644
--- a/doc/example.cpp
+++ b/doc/example.cpp
@@ -12,7 +12,7 @@ template<typename Scalar, typename Derived>
EiScalarProduct<Derived>
twice(const EiObject<Scalar, Derived>& m)
{
- return static_cast<Scalar>(2) * m;
+ return 2 * m;
}
int main(int, char**)
diff --git a/doc/tutorial.cpp b/doc/tutorial.cpp
index 1da3deb1d..03495dbcb 100644
--- a/doc/tutorial.cpp
+++ b/doc/tutorial.cpp
@@ -29,23 +29,5 @@ int main(int, char **)
cout << "Row 0 of m2, written as a column vector, is:" << endl << m2.row(0) << endl;
cout << "Column 1 of m2 is:" << endl << m2.col(1) << endl;
cout << "The matrix m2 with row 0 and column 1 removed is:" << endl << m2.minor(0,1) << endl;
-
- cout << endl << "Now let us study a tricky issue." << endl;
- cout << "Recall that the matrix product m*m is:" << endl << m*m << endl;
- cout << "We want to store that into m, i.e. do \"m = m * m;\"" << endl;
- cout << "Here we must be very careful. For if we do \"m = m * m;\"," << endl
- << "the matrix m becomes" << endl;
- EiMatrix<double,2,2> m_save = m;
- m = m * m; // the bogus operation
- cout << m << "," << endl;
- cout << "which is not what was wanted!" << endl
- << "Explanation: because of the way expression templates work, the matrix m gets" << endl
- << "overwritten _while_ the matrix product m * m is being computed." << endl
- << "This is the counterpart of eliminating temporary objects!" << endl
- << "Anyway, if you want to store m * m into m, you can do this:" << endl
- << " m = (m * m).eval();" << endl;
- m = m_save;
- m = (m * m).eval();
- cout << "And m is now:" << endl << m << endl << "as was expected." << endl;
return 0;
}
diff --git a/src/internal/Eval.h b/src/internal/Eval.h
index 2d792d822..4de32ebac 100644
--- a/src/internal/Eval.h
+++ b/src/internal/Eval.h
@@ -33,7 +33,7 @@ template<typename Expression> class EiEval
{
public:
typedef typename Expression::Scalar Scalar;
- typedef EiMatrix< Scalar, Expression::RowsAtCompileTime, Expression::ColsAtCompileTime> MatrixType;
+ typedef EiMatrix<Scalar, Expression::RowsAtCompileTime, Expression::ColsAtCompileTime> MatrixType;
typedef Expression Base;
friend class EiObject<Scalar, Expression>;
diff --git a/src/internal/Matrix.h b/src/internal/Matrix.h
index 738cc9f6b..faed649c2 100644
--- a/src/internal/Matrix.h
+++ b/src/internal/Matrix.h
@@ -44,22 +44,22 @@ class EiMatrix : public EiObject<_Scalar, EiMatrix<_Scalar, _Rows, _Cols> >,
static const int RowsAtCompileTime = _Rows, ColsAtCompileTime = _Cols;
- const Scalar* array() const
+ const Scalar* EI_RESTRICT array() const
{ return Storage::m_array; }
- Scalar* array()
+ Scalar* EI_RESTRICT array()
{ return Storage::m_array; }
private:
Ref _ref() const { return Ref(*const_cast<EiMatrix*>(this)); }
- const Scalar& _read(int row, int col = 0) const
+ const Scalar& EI_RESTRICT _read(int row, int col = 0) const
{
EI_CHECK_RANGES(*this, row, col);
return array()[row + col * Storage::_rows()];
}
- Scalar& _write(int row, int col = 0)
+ Scalar& EI_RESTRICT _write(int row, int col = 0)
{
EI_CHECK_RANGES(*this, row, col);
return array()[row + col * Storage::_rows()];
diff --git a/src/internal/MatrixOps.h b/src/internal/MatrixOps.h
index f5120277b..2798dd9c1 100644
--- a/src/internal/MatrixOps.h
+++ b/src/internal/MatrixOps.h
@@ -39,7 +39,7 @@ template<typename Lhs, typename Rhs> class EiSum
static const int RowsAtCompileTime = Lhs::RowsAtCompileTime,
ColsAtCompileTime = Rhs::ColsAtCompileTime;
- EiSum(const LhsRef& lhs, const RhsRef& rhs)
+ EiSum(const LhsRef& EI_RESTRICT lhs, const RhsRef& EI_RESTRICT rhs)
: m_lhs(lhs), m_rhs(rhs)
{
assert(lhs.rows() == rhs.rows() && lhs.cols() == rhs.cols());
@@ -79,7 +79,7 @@ template<typename Lhs, typename Rhs> class EiDifference
static const int RowsAtCompileTime = Lhs::RowsAtCompileTime,
ColsAtCompileTime = Rhs::ColsAtCompileTime;
- EiDifference(const LhsRef& lhs, const RhsRef& rhs)
+ EiDifference(const LhsRef& EI_RESTRICT lhs, const RhsRef& EI_RESTRICT rhs)
: m_lhs(lhs), m_rhs(rhs)
{
assert(lhs.rows() == rhs.rows() && lhs.cols() == rhs.cols());
@@ -118,7 +118,7 @@ template<typename Lhs, typename Rhs> class EiMatrixProduct
static const int RowsAtCompileTime = Lhs::RowsAtCompileTime,
ColsAtCompileTime = Rhs::ColsAtCompileTime;
- EiMatrixProduct(const LhsRef& lhs, const RhsRef& rhs)
+ EiMatrixProduct(const LhsRef& EI_RESTRICT lhs, const RhsRef& EI_RESTRICT rhs)
: m_lhs(lhs), m_rhs(rhs)
{
assert(lhs.cols() == rhs.rows());
@@ -136,10 +136,17 @@ template<typename Lhs, typename Rhs> class EiMatrixProduct
Scalar _read(int row, int col) const
{
- Scalar x = static_cast<Scalar>(0);
- for(int i = 0; i < m_lhs.cols(); i++)
- x += m_lhs.read(row, i) * m_rhs.read(i, col);
- return x;
+ if(Lhs::ColsAtCompileTime == 3)
+ {
+ return m_lhs(row,0) * m_rhs(0,col) + m_lhs(row,1) * m_rhs(1,col) + m_lhs(row,2) * m_rhs(2,col);
+ }
+ else
+ {
+ Scalar x = static_cast<Scalar>(0);
+ for(int i = 0; i < m_lhs.cols(); i++)
+ x += m_lhs.read(row, i) * m_rhs.read(i, col);
+ return x;
+ }
}
protected:
@@ -161,11 +168,19 @@ operator-(const EiObject<Scalar, Derived1> &mat1, const EiObject<Scalar, Derived
return EiDifference<Derived1, Derived2>(mat1.ref(), mat2.ref());
}
+template<typename Scalar, typename Derived>
+template<typename OtherDerived>
+EiMatrixProduct<Derived, OtherDerived>
+EiObject<Scalar, Derived>::lazyMul(const EiObject<Scalar, OtherDerived> &other) const
+{
+ return EiMatrixProduct<Derived, OtherDerived>(ref(), other.ref());
+}
+
template<typename Scalar, typename Derived1, typename Derived2>
-EiMatrixProduct<Derived1, Derived2>
+EiEval<EiMatrixProduct<Derived1, Derived2> >
operator*(const EiObject<Scalar, Derived1> &mat1, const EiObject<Scalar, Derived2> &mat2)
{
- return EiMatrixProduct<Derived1, Derived2>(mat1.ref(), mat2.ref());
+ return mat1.lazyMul(mat2).eval();
}
template<typename Scalar, typename Derived>
diff --git a/src/internal/MatrixStorage.h b/src/internal/MatrixStorage.h
index 822903b14..609bf7c29 100644
--- a/src/internal/MatrixStorage.h
+++ b/src/internal/MatrixStorage.h
@@ -32,7 +32,7 @@ template<typename Scalar,
class EiMatrixStorage
{
protected:
- Scalar m_array[RowsAtCompileTime * RowsAtCompileTime];
+ Scalar EI_RESTRICT m_array[RowsAtCompileTime * RowsAtCompileTime];
void resize(int rows, int cols)
{ assert(rows == RowsAtCompileTime && cols == ColsAtCompileTime); }
@@ -54,18 +54,20 @@ class EiMatrixStorage
~EiMatrixStorage() {};
};
-template<typename Scalar>
-class EiMatrixStorage<Scalar, EiDynamic, 1>
+template<typename Scalar, int ColsAtCompileTime>
+class EiMatrixStorage<Scalar, EiDynamic, ColsAtCompileTime>
{
protected:
int m_rows;
- Scalar *m_array;
+ Scalar* EI_RESTRICT m_array;
void resize(int rows, int cols)
- { assert(rows > 0 && cols == 1);
- if(rows > m_rows) {
+ {
+ assert(rows > 0 && cols == ColsAtCompileTime);
+ if(rows > m_rows)
+ {
delete[] m_array;
- m_array = new Scalar[rows];
+ m_array = new Scalar[rows * ColsAtCompileTime];
}
m_rows = rows;
}
@@ -74,13 +76,48 @@ class EiMatrixStorage<Scalar, EiDynamic, 1>
{ return m_rows; }
int _cols() const
- { return 1; }
+ { return ColsAtCompileTime; }
public:
EiMatrixStorage(int rows, int cols) : m_rows(rows)
{
- assert(m_rows > 0 && cols == 1);
- m_array = new Scalar[m_rows];
+ assert(m_rows > 0 && cols == ColsAtCompileTime);
+ m_array = new Scalar[m_rows * ColsAtCompileTime];
+ }
+
+ ~EiMatrixStorage()
+ { delete[] m_array; }
+};
+
+template<typename Scalar, int RowsAtCompileTime>
+class EiMatrixStorage<Scalar, RowsAtCompileTime, EiDynamic>
+{
+ protected:
+ int m_cols;
+ Scalar* EI_RESTRICT m_array;
+
+ void resize(int rows, int cols)
+ {
+ assert(rows == RowsAtCompileTime && cols > 0);
+ if(cols > m_cols)
+ {
+ delete[] m_array;
+ m_array = new Scalar[cols * RowsAtCompileTime];
+ }
+ m_cols = cols;
+ }
+
+ int _rows() const
+ { return RowsAtCompileTime; }
+
+ int _cols() const
+ { return m_cols; }
+
+ public:
+ EiMatrixStorage(int rows, int cols) : m_cols(cols)
+ {
+ assert(rows == RowsAtCompileTime && cols > 0);
+ m_array = new Scalar[m_cols * RowsAtCompileTime];
}
~EiMatrixStorage()
@@ -92,7 +129,7 @@ class EiMatrixStorage<Scalar, EiDynamic, EiDynamic>
{
protected:
int m_rows, m_cols;
- Scalar *m_array;
+ Scalar* EI_RESTRICT m_array;
void resize(int rows, int cols)
{
diff --git a/src/internal/Object.h b/src/internal/Object.h
index fefa15897..7ec59da8c 100644
--- a/src/internal/Object.h
+++ b/src/internal/Object.h
@@ -36,6 +36,19 @@ template<typename Scalar, typename Derived> class EiObject
template<typename OtherDerived>
void _copy_helper(const EiObject<Scalar, OtherDerived>& other)
{
+ if(RowsAtCompileTime == 3 && ColsAtCompileTime == 3)
+ {
+ write(0,0) = other.read(0,0);
+ write(1,0) = other.read(1,0);
+ write(2,0) = other.read(2,0);
+ write(0,1) = other.read(0,1);
+ write(1,1) = other.read(1,1);
+ write(2,1) = other.read(2,1);
+ write(0,2) = other.read(0,2);
+ write(1,2) = other.read(1,2);
+ write(2,2) = other.read(2,2);
+ }
+ else
for(int i = 0; i < rows(); i++)
for(int j = 0; j < cols(); j++)
write(i, j) = other.read(i, j);
@@ -54,7 +67,7 @@ template<typename Scalar, typename Derived> class EiObject
Ref ref() const
{ return static_cast<const Derived *>(this)->_ref(); }
- Scalar& write(int row, int col)
+ Scalar& EI_RESTRICT write(int row, int col)
{
return static_cast<Derived *>(this)->_write(row, col);
}
@@ -87,6 +100,10 @@ template<typename Scalar, typename Derived> class EiObject
EiBlock<Derived> block(int startRow, int endRow, int startCol= 0, int endCol = 0);
template<typename OtherDerived>
+ EiMatrixProduct<Derived, OtherDerived>
+ lazyMul(const EiObject<Scalar, OtherDerived>& other) const EI_ALWAYS_INLINE;
+
+ template<typename OtherDerived>
Derived& operator+=(const EiObject<Scalar, OtherDerived>& other);
template<typename OtherDerived>
Derived& operator-=(const EiObject<Scalar, OtherDerived>& other);
@@ -110,10 +127,10 @@ template<typename Scalar, typename Derived> class EiObject
Scalar operator()(int row, int col = 0) const
{ return read(row, col); }
- Scalar& operator()(int row, int col = 0)
+ Scalar& EI_RESTRICT operator()(int row, int col = 0)
{ return write(row, col); }
- EiEval<Derived> eval() const;
+ EiEval<Derived> eval() const EI_ALWAYS_INLINE;
};
template<typename Scalar, typename Derived>
diff --git a/src/internal/Util.h b/src/internal/Util.h
index c7730d6fb..03df6b097 100644
--- a/src/internal/Util.h
+++ b/src/internal/Util.h
@@ -68,6 +68,14 @@ const int EiDynamic = -1;
#define EI_UNUSED(x) (void)x
+#ifdef __GNUC__
+# define EI_ALWAYS_INLINE __attribute__((always_inline))
+# define EI_RESTRICT __restrict__
+#else
+# define EI_ALWAYS_INLINE
+# define EI_RESTRICT
+#endif
+
#define EI_INHERIT_ASSIGNMENT_OPERATOR(Derived, Op) \
template<typename OtherScalar, typename OtherDerived> \
Derived& operator Op(const EiObject<OtherScalar, OtherDerived>& other) \
diff --git a/test/matrixops.cpp b/test/matrixops.cpp
index f8f787819..9df827a2d 100644
--- a/test/matrixops.cpp
+++ b/test/matrixops.cpp
@@ -49,7 +49,11 @@ template<typename MatrixType1,
a -= b + b;
a *= s;
b /= s;
- if(rows1 == cols1) a *= b;
+ if(rows1 == cols1)
+ {
+ a *= b;
+ a.lazyMul(b);
+ }
MatrixType1 d(rows1, cols1);
MatrixType2 e(rows2, cols2);