aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--Eigen/src/Core/Matrix.h11
-rw-r--r--Eigen/src/Core/MatrixStorage.h9
-rw-r--r--Eigen/src/Core/Product.h25
-rw-r--r--Eigen/src/Core/Swap.h2
-rw-r--r--bench/benchBlasGemm.cpp58
-rw-r--r--bench/benchmark.cpp13
-rw-r--r--test/basicstuff.cpp6
-rw-r--r--test/product.cpp38
8 files changed, 101 insertions, 61 deletions
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index 5074ea88f..379b5e1f4 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -346,6 +346,17 @@ class Matrix : public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCol
{
return *this;
}
+
+ /** Override MatrixBase::swap() since for dynamic-sized matrices of same type it is enough to swap the
+ * data pointers.
+ */
+ void swap(Matrix& other)
+ {
+ if (Base::SizeAtCompileTime==Dynamic)
+ m_storage.swap(other.m_storage);
+ else
+ this->Base::swap(other);
+ }
};
#define EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \
diff --git a/Eigen/src/Core/MatrixStorage.h b/Eigen/src/Core/MatrixStorage.h
index 72b9965b6..8f5c247be 100644
--- a/Eigen/src/Core/MatrixStorage.h
+++ b/Eigen/src/Core/MatrixStorage.h
@@ -84,6 +84,7 @@ template<typename T, int Size, int _Rows, int _Cols> class ei_matrix_storage
public:
inline ei_matrix_storage() {}
inline ei_matrix_storage(int,int,int) {}
+ inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); }
inline static int rows(void) {return _Rows;}
inline static int cols(void) {return _Cols;}
inline void resize(int,int,int) {}
@@ -100,6 +101,8 @@ template<typename T, int Size> class ei_matrix_storage<T, Size, Dynamic, Dynamic
public:
inline ei_matrix_storage(int, int rows, int cols) : m_rows(rows), m_cols(cols) {}
inline ~ei_matrix_storage() {}
+ inline void swap(ei_matrix_storage& other)
+ { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
inline int rows(void) const {return m_rows;}
inline int cols(void) const {return m_cols;}
inline void resize(int, int rows, int cols)
@@ -119,6 +122,7 @@ template<typename T, int Size, int _Cols> class ei_matrix_storage<T, Size, Dynam
public:
inline ei_matrix_storage(int, int rows, int) : m_rows(rows) {}
inline ~ei_matrix_storage() {}
+ inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
inline int rows(void) const {return m_rows;}
inline int cols(void) const {return _Cols;}
inline void resize(int size, int rows, int)
@@ -137,6 +141,7 @@ template<typename T, int Size, int _Rows> class ei_matrix_storage<T, Size, _Rows
public:
inline ei_matrix_storage(int, int, int cols) : m_cols(cols) {}
inline ~ei_matrix_storage() {}
+ inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
inline int rows(void) const {return _Rows;}
inline int cols(void) const {return m_cols;}
inline void resize(int size, int, int cols)
@@ -157,6 +162,8 @@ template<typename T> class ei_matrix_storage<T, Dynamic, Dynamic, Dynamic>
inline ei_matrix_storage(int size, int rows, int cols)
: m_data(ei_aligned_malloc<T>(size)), m_rows(rows), m_cols(cols) {}
inline ~ei_matrix_storage() { delete[] m_data; }
+ inline void swap(ei_matrix_storage& other)
+ { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
inline int rows(void) const {return m_rows;}
inline int cols(void) const {return m_cols;}
void resize(int size, int rows, int cols)
@@ -181,6 +188,7 @@ template<typename T, int _Rows> class ei_matrix_storage<T, Dynamic, _Rows, Dynam
public:
inline ei_matrix_storage(int size, int, int cols) : m_data(ei_aligned_malloc<T>(size)), m_cols(cols) {}
inline ~ei_matrix_storage() { delete[] m_data; }
+ inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
inline static int rows(void) {return _Rows;}
inline int cols(void) const {return m_cols;}
void resize(int size, int, int cols)
@@ -204,6 +212,7 @@ template<typename T, int _Cols> class ei_matrix_storage<T, Dynamic, Dynamic, _Co
public:
inline ei_matrix_storage(int size, int rows, int) : m_data(ei_aligned_malloc<T>(size)), m_rows(rows) {}
inline ~ei_matrix_storage() { delete[] m_data; }
+ inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
inline int rows(void) const {return m_rows;}
inline static int cols(void) {return _Cols;}
void resize(int size, int rows, int)
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index 2bfd15878..90f9bdca4 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -30,11 +30,6 @@
*** Forward declarations ***
***************************/
-enum {
- ColMajorProduct,
- RowMajorProduct
-};
-
template<int VectorizationMode, int Index, typename Lhs, typename Rhs>
struct ei_product_coeff_impl;
@@ -238,7 +233,7 @@ template<typename LhsNested, typename RhsNested, int ProductMode> class Product
const PacketScalar packet(int row, int col) const
{
PacketScalar res;
- ei_product_packet_impl<Flags&RowMajorBit ? RowMajorProduct : ColMajorProduct,
+ ei_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
Unroll ? InnerSize-1 : Dynamic,
_LhsNested, _RhsNested, PacketScalar, LoadMode>
::run(row, col, m_lhs, m_rhs, res);
@@ -362,27 +357,27 @@ struct ei_product_coeff_impl<InnerVectorization, Index, Lhs, Rhs>
*******************/
template<int Index, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
-struct ei_product_packet_impl<RowMajorProduct, Index, Lhs, Rhs, PacketScalar, LoadMode>
+struct ei_product_packet_impl<RowMajor, Index, Lhs, Rhs, PacketScalar, LoadMode>
{
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
{
- ei_product_packet_impl<RowMajorProduct, Index-1, Lhs, Rhs, PacketScalar, LoadMode>::run(row, col, lhs, rhs, res);
+ ei_product_packet_impl<RowMajor, Index-1, Lhs, Rhs, PacketScalar, LoadMode>::run(row, col, lhs, rhs, res);
res = ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packet<LoadMode>(Index, col), res);
}
};
template<int Index, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
-struct ei_product_packet_impl<ColMajorProduct, Index, Lhs, Rhs, PacketScalar, LoadMode>
+struct ei_product_packet_impl<ColMajor, Index, Lhs, Rhs, PacketScalar, LoadMode>
{
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
{
- ei_product_packet_impl<ColMajorProduct, Index-1, Lhs, Rhs, PacketScalar, LoadMode>::run(row, col, lhs, rhs, res);
+ ei_product_packet_impl<ColMajor, Index-1, Lhs, Rhs, PacketScalar, LoadMode>::run(row, col, lhs, rhs, res);
res = ei_pmadd(lhs.template packet<LoadMode>(row, Index), ei_pset1(rhs.coeff(Index, col)), res);
}
};
template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
-struct ei_product_packet_impl<RowMajorProduct, 0, Lhs, Rhs, PacketScalar, LoadMode>
+struct ei_product_packet_impl<RowMajor, 0, Lhs, Rhs, PacketScalar, LoadMode>
{
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
{
@@ -391,7 +386,7 @@ struct ei_product_packet_impl<RowMajorProduct, 0, Lhs, Rhs, PacketScalar, LoadMo
};
template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
-struct ei_product_packet_impl<ColMajorProduct, 0, Lhs, Rhs, PacketScalar, LoadMode>
+struct ei_product_packet_impl<ColMajor, 0, Lhs, Rhs, PacketScalar, LoadMode>
{
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
{
@@ -399,8 +394,8 @@ struct ei_product_packet_impl<ColMajorProduct, 0, Lhs, Rhs, PacketScalar, LoadMo
}
};
-template<int StorageOrder, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
-struct ei_product_packet_impl<StorageOrder, Dynamic, Lhs, Rhs, PacketScalar, LoadMode>
+template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
+struct ei_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, PacketScalar, LoadMode>
{
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res)
{
@@ -411,7 +406,7 @@ struct ei_product_packet_impl<StorageOrder, Dynamic, Lhs, Rhs, PacketScalar, Loa
};
template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
-struct ei_product_packet_impl<ColMajorProduct, Dynamic, Lhs, Rhs, PacketScalar, LoadMode>
+struct ei_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, PacketScalar, LoadMode>
{
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res)
{
diff --git a/Eigen/src/Core/Swap.h b/Eigen/src/Core/Swap.h
index 5e3187071..35590b56f 100644
--- a/Eigen/src/Core/Swap.h
+++ b/Eigen/src/Core/Swap.h
@@ -73,7 +73,7 @@ struct ei_swap_selector<Derived,OtherDerived,true>
template<typename Derived, typename OtherDerived>
struct ei_swap_selector<Derived,OtherDerived,false>
{
- inline void run(Derived& src, OtherDerived& other)
+ inline static void run(Derived& src, OtherDerived& other)
{
typename Derived::Scalar tmp;
for(int j = 0; j < src.cols(); j++)
diff --git a/bench/benchBlasGemm.cpp b/bench/benchBlasGemm.cpp
index d22af89da..02f067e1a 100644
--- a/bench/benchBlasGemm.cpp
+++ b/bench/benchBlasGemm.cpp
@@ -1,4 +1,8 @@
+// g++-4.2 -O3 -DNDEBUG -I.. benchBlasGemm.cpp /usr/lib/libcblas.so.3 - o benchBlasGemm
+// possible options:
+// -DEIGEN_DONT_VECTORIZE
+// -msse2
// #define EIGEN_DEFAULT_TO_ROW_MAJOR
#define _FLOAT
@@ -28,6 +32,8 @@ void check_product(void);
int main(int argc, char *argv[])
{
+ // disable SSE exceptions
+ #ifdef __GNUC__
{
int aux;
asm(
@@ -36,6 +42,7 @@ int main(int argc, char *argv[])
"ldmxcsr %[aux] \n\t"
: : [aux] "m" (aux));
}
+ #endif
int nbtries=1, nbloops=1, M, N, K;
@@ -70,8 +77,18 @@ int main(int argc, char *argv[])
}
else
{
+ std::cout << "Usage: " << argv[0] << " size \n";
+ std::cout << "Usage: " << argv[0] << " auto size\n";
std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n";
+ std::cout << "Usage: " << argv[0] << " check\n";
+ std::cout << "Options:\n"
+ std::cout << " size unique size of the 2 matrices (integer)\n";
+ std::cout << " auto automatically set the number of repetitions and tries\n";
+ std::cout << " nbloops number of times the GEMM routines is executed\n"
+ std::cout << " nbtries number of times the loop is benched (return the best try)\n"
+ std::cout << " M N K sizes of the matrices: MxN = MxK * KxN (integers)\n"
+ std::cout << " check check eigen product using cblas as a reference\n";
exit(1);
}
@@ -119,7 +136,7 @@ int main(int argc, char *argv[])
mb = MyMatrix::random(K,N);
mc = MyMatrix::random(M,N);
- // eigen fast ?
+ // eigen
// if (!(std::string(argv[1])=="auto"))
{
timer.reset();
@@ -158,20 +175,10 @@ int main(int argc, char *argv[])
using namespace Eigen;
-
-
void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops)
{
for (uint j=0 ; j<nbloops ; ++j)
- #ifdef EIGEN_WIP_PRODUCT_DIRTY
- mc = ma * mb;
- #else
mc += (ma * mb).lazy();
- /*static_cast<MatrixBase<MyMatrix>& >*//*(mc).operator+=( (ma * mb).lazy() );*/
-
-// Flagged<Product<MyMatrix,MyMatrix,CacheFriendlyProduct>, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>(
-// Product<MyMatrix,MyMatrix,CacheFriendlyProduct>(ma, mb)));
- #endif
}
void bench_eigengemm_normal(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops)
@@ -183,7 +190,6 @@ void bench_eigengemm_normal(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb
#define MYVERIFY(A,M) if (!(A)) { \
std::cout << "FAIL: " << M << "\n"; \
}
-
void check_product(int M, int N, int K)
{
MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N);
@@ -200,20 +206,20 @@ void check_product(int M, int N, int K)
meigen += ma * mb;
MYVERIFY(meigen.isApprox(mref, eps),". * .");
-// meigen = mref = mc;
-// CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M);
-// meigen += maT.transpose() * mb;
-// MYVERIFY(meigen.isApprox(mref, eps),"T * .");
-//
-// meigen = mref = mc;
-// CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M);
-// meigen += (maT.transpose()) * (mbT.transpose());
-// MYVERIFY(meigen.isApprox(mref, eps),"T * T");
-//
-// meigen = mref = mc;
-// CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M);
-// meigen += ma * mbT.transpose();
-// MYVERIFY(meigen.isApprox(mref, eps),". * T");
+ meigen = mref = mc;
+ CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M);
+ meigen += maT.transpose() * mb;
+ MYVERIFY(meigen.isApprox(mref, eps),"T * .");
+
+ meigen = mref = mc;
+ CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M);
+ meigen += (maT.transpose()) * (mbT.transpose());
+ MYVERIFY(meigen.isApprox(mref, eps),"T * T");
+
+ meigen = mref = mc;
+ CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M);
+ meigen += ma * mbT.transpose();
+ MYVERIFY(meigen.isApprox(mref, eps),". * T");
}
void check_product(void)
diff --git a/bench/benchmark.cpp b/bench/benchmark.cpp
index b48b21d68..5aeb2d315 100644
--- a/bench/benchmark.cpp
+++ b/bench/benchmark.cpp
@@ -18,17 +18,6 @@ USING_PART_OF_NAMESPACE_EIGEN
int main(int argc, char *argv[])
{
- Matrix4i m1, m2, m3;
- m1.setRandom();
- m2.setConstant(2);
- int s1 = 2;
- m3 = m1;
- std::cout << m1 << "\n\n";
- std::cout << m2 << "\n\n";
- m3 = m1.cwiseProduct(m2);
- std::cout << m3 << "\n==\n" << m1*s1 << "\n\n";
-// v(1,2,3,4);
-// std::cout << v * 2 << "\n";
Matrix<SCALAR,MATSIZE,MATSIZE> I = Matrix<SCALAR,MATSIZE,MATSIZE>::ones();
Matrix<SCALAR,MATSIZE,MATSIZE> m;
for(int i = 0; i < MATSIZE; i++)
@@ -39,7 +28,7 @@ int main(int argc, char *argv[])
asm("#begin");
for(int a = 0; a < REPEAT; a++)
{
- m = Matrix<SCALAR,MATSIZE,MATSIZE>::ones() + 0.00005 * (m + (m*m).eval());
+ m = Matrix<SCALAR,MATSIZE,MATSIZE>::ones() + 0.00005 * (m + (m*m));
}
asm("#end");
cout << m << endl;
diff --git a/test/basicstuff.cpp b/test/basicstuff.cpp
index a2ad3dedf..4e3db3c1b 100644
--- a/test/basicstuff.cpp
+++ b/test/basicstuff.cpp
@@ -82,6 +82,12 @@ template<typename MatrixType> void basicStuff(const MatrixType& m)
{
VERIFY_RAISES_ASSERT(m1 = (m2.block(0,0, rows-1, cols-1)));
}
+
+ // test swap
+ m3 = m1;
+ m1.swap(m2);
+ VERIFY_IS_APPROX(m3, m2);
+ VERIFY_IS_NOT_APPROX(m3, m1);
}
void test_basicstuff()
diff --git a/test/product.cpp b/test/product.cpp
index f1e26d20a..2fc50899f 100644
--- a/test/product.cpp
+++ b/test/product.cpp
@@ -31,8 +31,10 @@ template<typename MatrixType> void product(const MatrixType& m)
*/
typedef typename MatrixType::Scalar Scalar;
+ typedef typename NumTraits<Scalar>::FloatingPoint FloatingPoint;
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
- typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
+ typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime> RowSquareMatrixType;
+ typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, MatrixType::ColsAtCompileTime> ColSquareMatrixType;
int rows = m.rows();
int cols = m.cols();
@@ -43,11 +45,13 @@ template<typename MatrixType> void product(const MatrixType& m)
m2 = MatrixType::random(rows, cols),
m3(rows, cols),
mzero = MatrixType::zero(rows, cols);
- SquareMatrixType
- identity = Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime>
- ::identity(rows, rows),
- square = Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime>
- ::random(rows, rows);
+ RowSquareMatrixType
+ identity = RowSquareMatrixType::identity(rows, rows),
+ square = RowSquareMatrixType::random(rows, rows),
+ res = RowSquareMatrixType::random(rows, rows);
+ ColSquareMatrixType
+ square2 = ColSquareMatrixType::random(cols, cols),
+ res2 = ColSquareMatrixType::random(cols, cols);
VectorType v1 = VectorType::random(rows),
v2 = VectorType::random(rows),
vzero = VectorType::zero(rows);
@@ -83,6 +87,20 @@ template<typename MatrixType> void product(const MatrixType& m)
if (rows!=cols)
VERIFY_RAISES_ASSERT(m3 = m1*m1);
+
+ // test the previous tests were not screwed up because operator* returns 0
+ VERIFY_IS_NOT_APPROX((m1.transpose()*m2).template cast<FloatingPoint>(), (m2.transpose()*m1).template cast<FloatingPoint>());
+
+ // test optimized operator+= path
+ res = square;
+ res += (m1 * m2.transpose()).lazy();
+ VERIFY_IS_APPROX(res, square + m1 * m2.transpose());
+ VERIFY_IS_NOT_APPROX(res.template cast<FloatingPoint>(), (square + m2 * m1.transpose()).template cast<FloatingPoint>());
+
+ res2 = square2;
+ res2 += (m1.transpose() * m2).lazy();
+ VERIFY_IS_APPROX(res2, square2 + m1.transpose() * m2);
+ VERIFY_IS_NOT_APPROX(res2.template cast<FloatingPoint>(), (square2 + m2.transpose() * m1).template cast<FloatingPoint>());
}
void test_product()
@@ -93,11 +111,17 @@ void test_product()
CALL_SUBTEST( product(Matrix4d()) );
CALL_SUBTEST( product(Matrix4f()) );
CALL_SUBTEST( product(MatrixXf(3,5)) );
+ CALL_SUBTEST( product(MatrixXi(28,39)) );
}
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST( product(MatrixXf(ei_random<int>(1,320), ei_random<int>(1,320))) );
CALL_SUBTEST( product(MatrixXd(ei_random<int>(1,320), ei_random<int>(1,320))) );
- CALL_SUBTEST( product(MatrixXi(ei_random<int>(1,320), ei_random<int>(1,320))) );
+ CALL_SUBTEST( product(MatrixXi(ei_random<int>(1,256), ei_random<int>(1,256))) );
CALL_SUBTEST( product(MatrixXcf(ei_random<int>(1,50), ei_random<int>(1,50))) );
+ #ifndef EIGEN_DEFAULT_TO_ROW_MAJOR
+ CALL_SUBTEST( product(Matrix<float,Dynamic,Dynamic,Dynamic,Dynamic,RowMajorBit>(ei_random<int>(1,320), ei_random<int>(1,320))) );
+ #else
+ CALL_SUBTEST( product(Matrix<float,Dynamic,Dynamic,Dynamic,Dynamic,0>(ei_random<int>(1,320), ei_random<int>(1,320))) );
+ #endif
}
}