diff options
-rw-r--r-- | Eigen/src/Core/Matrix.h | 11 | ||||
-rw-r--r-- | Eigen/src/Core/MatrixStorage.h | 9 | ||||
-rw-r--r-- | Eigen/src/Core/Product.h | 25 | ||||
-rw-r--r-- | Eigen/src/Core/Swap.h | 2 | ||||
-rw-r--r-- | bench/benchBlasGemm.cpp | 58 | ||||
-rw-r--r-- | bench/benchmark.cpp | 13 | ||||
-rw-r--r-- | test/basicstuff.cpp | 6 | ||||
-rw-r--r-- | test/product.cpp | 38 |
8 files changed, 101 insertions, 61 deletions
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index 5074ea88f..379b5e1f4 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -346,6 +346,17 @@ class Matrix : public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCol { return *this; } + + /** Override MatrixBase::swap() since for dynamic-sized matrices of same type it is enough to swap the + * data pointers. + */ + void swap(Matrix& other) + { + if (Base::SizeAtCompileTime==Dynamic) + m_storage.swap(other.m_storage); + else + this->Base::swap(other); + } }; #define EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \ diff --git a/Eigen/src/Core/MatrixStorage.h b/Eigen/src/Core/MatrixStorage.h index 72b9965b6..8f5c247be 100644 --- a/Eigen/src/Core/MatrixStorage.h +++ b/Eigen/src/Core/MatrixStorage.h @@ -84,6 +84,7 @@ template<typename T, int Size, int _Rows, int _Cols> class ei_matrix_storage public: inline ei_matrix_storage() {} inline ei_matrix_storage(int,int,int) {} + inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); } inline static int rows(void) {return _Rows;} inline static int cols(void) {return _Cols;} inline void resize(int,int,int) {} @@ -100,6 +101,8 @@ template<typename T, int Size> class ei_matrix_storage<T, Size, Dynamic, Dynamic public: inline ei_matrix_storage(int, int rows, int cols) : m_rows(rows), m_cols(cols) {} inline ~ei_matrix_storage() {} + inline void swap(ei_matrix_storage& other) + { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); } inline int rows(void) const {return m_rows;} inline int cols(void) const {return m_cols;} inline void resize(int, int rows, int cols) @@ -119,6 +122,7 @@ template<typename T, int Size, int _Cols> class ei_matrix_storage<T, Size, Dynam public: inline ei_matrix_storage(int, int rows, int) : m_rows(rows) {} inline ~ei_matrix_storage() {} + inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } inline int rows(void) const {return m_rows;} inline int cols(void) const {return _Cols;} inline void resize(int size, int rows, int) @@ -137,6 +141,7 @@ template<typename T, int Size, int _Rows> class ei_matrix_storage<T, Size, _Rows public: inline ei_matrix_storage(int, int, int cols) : m_cols(cols) {} inline ~ei_matrix_storage() {} + inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); } inline int rows(void) const {return _Rows;} inline int cols(void) const {return m_cols;} inline void resize(int size, int, int cols) @@ -157,6 +162,8 @@ template<typename T> class ei_matrix_storage<T, Dynamic, Dynamic, Dynamic> inline ei_matrix_storage(int size, int rows, int cols) : m_data(ei_aligned_malloc<T>(size)), m_rows(rows), m_cols(cols) {} inline ~ei_matrix_storage() { delete[] m_data; } + inline void swap(ei_matrix_storage& other) + { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); } inline int rows(void) const {return m_rows;} inline int cols(void) const {return m_cols;} void resize(int size, int rows, int cols) @@ -181,6 +188,7 @@ template<typename T, int _Rows> class ei_matrix_storage<T, Dynamic, _Rows, Dynam public: inline ei_matrix_storage(int size, int, int cols) : m_data(ei_aligned_malloc<T>(size)), m_cols(cols) {} inline ~ei_matrix_storage() { delete[] m_data; } + inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); } inline static int rows(void) {return _Rows;} inline int cols(void) const {return m_cols;} void resize(int size, int, int cols) @@ -204,6 +212,7 @@ template<typename T, int _Cols> class ei_matrix_storage<T, Dynamic, Dynamic, _Co public: inline ei_matrix_storage(int size, int rows, int) : m_data(ei_aligned_malloc<T>(size)), m_rows(rows) {} inline ~ei_matrix_storage() { delete[] m_data; } + inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } inline int rows(void) const {return m_rows;} inline static int cols(void) {return _Cols;} void resize(int size, int rows, int) diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index 2bfd15878..90f9bdca4 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -30,11 +30,6 @@ *** Forward declarations *** ***************************/ -enum { - ColMajorProduct, - RowMajorProduct -}; - template<int VectorizationMode, int Index, typename Lhs, typename Rhs> struct ei_product_coeff_impl; @@ -238,7 +233,7 @@ template<typename LhsNested, typename RhsNested, int ProductMode> class Product const PacketScalar packet(int row, int col) const { PacketScalar res; - ei_product_packet_impl<Flags&RowMajorBit ? RowMajorProduct : ColMajorProduct, + ei_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor, Unroll ? InnerSize-1 : Dynamic, _LhsNested, _RhsNested, PacketScalar, LoadMode> ::run(row, col, m_lhs, m_rhs, res); @@ -362,27 +357,27 @@ struct ei_product_coeff_impl<InnerVectorization, Index, Lhs, Rhs> *******************/ template<int Index, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode> -struct ei_product_packet_impl<RowMajorProduct, Index, Lhs, Rhs, PacketScalar, LoadMode> +struct ei_product_packet_impl<RowMajor, Index, Lhs, Rhs, PacketScalar, LoadMode> { inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) { - ei_product_packet_impl<RowMajorProduct, Index-1, Lhs, Rhs, PacketScalar, LoadMode>::run(row, col, lhs, rhs, res); + ei_product_packet_impl<RowMajor, Index-1, Lhs, Rhs, PacketScalar, LoadMode>::run(row, col, lhs, rhs, res); res = ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packet<LoadMode>(Index, col), res); } }; template<int Index, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode> -struct ei_product_packet_impl<ColMajorProduct, Index, Lhs, Rhs, PacketScalar, LoadMode> +struct ei_product_packet_impl<ColMajor, Index, Lhs, Rhs, PacketScalar, LoadMode> { inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) { - ei_product_packet_impl<ColMajorProduct, Index-1, Lhs, Rhs, PacketScalar, LoadMode>::run(row, col, lhs, rhs, res); + ei_product_packet_impl<ColMajor, Index-1, Lhs, Rhs, PacketScalar, LoadMode>::run(row, col, lhs, rhs, res); res = ei_pmadd(lhs.template packet<LoadMode>(row, Index), ei_pset1(rhs.coeff(Index, col)), res); } }; template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode> -struct ei_product_packet_impl<RowMajorProduct, 0, Lhs, Rhs, PacketScalar, LoadMode> +struct ei_product_packet_impl<RowMajor, 0, Lhs, Rhs, PacketScalar, LoadMode> { inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) { @@ -391,7 +386,7 @@ struct ei_product_packet_impl<RowMajorProduct, 0, Lhs, Rhs, PacketScalar, LoadMo }; template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode> -struct ei_product_packet_impl<ColMajorProduct, 0, Lhs, Rhs, PacketScalar, LoadMode> +struct ei_product_packet_impl<ColMajor, 0, Lhs, Rhs, PacketScalar, LoadMode> { inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) { @@ -399,8 +394,8 @@ struct ei_product_packet_impl<ColMajorProduct, 0, Lhs, Rhs, PacketScalar, LoadMo } }; -template<int StorageOrder, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode> -struct ei_product_packet_impl<StorageOrder, Dynamic, Lhs, Rhs, PacketScalar, LoadMode> +template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode> +struct ei_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, PacketScalar, LoadMode> { inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res) { @@ -411,7 +406,7 @@ struct ei_product_packet_impl<StorageOrder, Dynamic, Lhs, Rhs, PacketScalar, Loa }; template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode> -struct ei_product_packet_impl<ColMajorProduct, Dynamic, Lhs, Rhs, PacketScalar, LoadMode> +struct ei_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, PacketScalar, LoadMode> { inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res) { diff --git a/Eigen/src/Core/Swap.h b/Eigen/src/Core/Swap.h index 5e3187071..35590b56f 100644 --- a/Eigen/src/Core/Swap.h +++ b/Eigen/src/Core/Swap.h @@ -73,7 +73,7 @@ struct ei_swap_selector<Derived,OtherDerived,true> template<typename Derived, typename OtherDerived> struct ei_swap_selector<Derived,OtherDerived,false> { - inline void run(Derived& src, OtherDerived& other) + inline static void run(Derived& src, OtherDerived& other) { typename Derived::Scalar tmp; for(int j = 0; j < src.cols(); j++) diff --git a/bench/benchBlasGemm.cpp b/bench/benchBlasGemm.cpp index d22af89da..02f067e1a 100644 --- a/bench/benchBlasGemm.cpp +++ b/bench/benchBlasGemm.cpp @@ -1,4 +1,8 @@ +// g++-4.2 -O3 -DNDEBUG -I.. benchBlasGemm.cpp /usr/lib/libcblas.so.3 - o benchBlasGemm +// possible options: +// -DEIGEN_DONT_VECTORIZE +// -msse2 // #define EIGEN_DEFAULT_TO_ROW_MAJOR #define _FLOAT @@ -28,6 +32,8 @@ void check_product(void); int main(int argc, char *argv[]) { + // disable SSE exceptions + #ifdef __GNUC__ { int aux; asm( @@ -36,6 +42,7 @@ int main(int argc, char *argv[]) "ldmxcsr %[aux] \n\t" : : [aux] "m" (aux)); } + #endif int nbtries=1, nbloops=1, M, N, K; @@ -70,8 +77,18 @@ int main(int argc, char *argv[]) } else { + std::cout << "Usage: " << argv[0] << " size \n"; + std::cout << "Usage: " << argv[0] << " auto size\n"; std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n"; std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n"; + std::cout << "Usage: " << argv[0] << " check\n"; + std::cout << "Options:\n" + std::cout << " size unique size of the 2 matrices (integer)\n"; + std::cout << " auto automatically set the number of repetitions and tries\n"; + std::cout << " nbloops number of times the GEMM routines is executed\n" + std::cout << " nbtries number of times the loop is benched (return the best try)\n" + std::cout << " M N K sizes of the matrices: MxN = MxK * KxN (integers)\n" + std::cout << " check check eigen product using cblas as a reference\n"; exit(1); } @@ -119,7 +136,7 @@ int main(int argc, char *argv[]) mb = MyMatrix::random(K,N); mc = MyMatrix::random(M,N); - // eigen fast ? + // eigen // if (!(std::string(argv[1])=="auto")) { timer.reset(); @@ -158,20 +175,10 @@ int main(int argc, char *argv[]) using namespace Eigen; - - void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops) { for (uint j=0 ; j<nbloops ; ++j) - #ifdef EIGEN_WIP_PRODUCT_DIRTY - mc = ma * mb; - #else mc += (ma * mb).lazy(); - /*static_cast<MatrixBase<MyMatrix>& >*//*(mc).operator+=( (ma * mb).lazy() );*/ - -// Flagged<Product<MyMatrix,MyMatrix,CacheFriendlyProduct>, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>( -// Product<MyMatrix,MyMatrix,CacheFriendlyProduct>(ma, mb))); - #endif } void bench_eigengemm_normal(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops) @@ -183,7 +190,6 @@ void bench_eigengemm_normal(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb #define MYVERIFY(A,M) if (!(A)) { \ std::cout << "FAIL: " << M << "\n"; \ } - void check_product(int M, int N, int K) { MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N); @@ -200,20 +206,20 @@ void check_product(int M, int N, int K) meigen += ma * mb; MYVERIFY(meigen.isApprox(mref, eps),". * ."); -// meigen = mref = mc; -// CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M); -// meigen += maT.transpose() * mb; -// MYVERIFY(meigen.isApprox(mref, eps),"T * ."); -// -// meigen = mref = mc; -// CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M); -// meigen += (maT.transpose()) * (mbT.transpose()); -// MYVERIFY(meigen.isApprox(mref, eps),"T * T"); -// -// meigen = mref = mc; -// CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M); -// meigen += ma * mbT.transpose(); -// MYVERIFY(meigen.isApprox(mref, eps),". * T"); + meigen = mref = mc; + CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M); + meigen += maT.transpose() * mb; + MYVERIFY(meigen.isApprox(mref, eps),"T * ."); + + meigen = mref = mc; + CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M); + meigen += (maT.transpose()) * (mbT.transpose()); + MYVERIFY(meigen.isApprox(mref, eps),"T * T"); + + meigen = mref = mc; + CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M); + meigen += ma * mbT.transpose(); + MYVERIFY(meigen.isApprox(mref, eps),". * T"); } void check_product(void) diff --git a/bench/benchmark.cpp b/bench/benchmark.cpp index b48b21d68..5aeb2d315 100644 --- a/bench/benchmark.cpp +++ b/bench/benchmark.cpp @@ -18,17 +18,6 @@ USING_PART_OF_NAMESPACE_EIGEN int main(int argc, char *argv[]) { - Matrix4i m1, m2, m3; - m1.setRandom(); - m2.setConstant(2); - int s1 = 2; - m3 = m1; - std::cout << m1 << "\n\n"; - std::cout << m2 << "\n\n"; - m3 = m1.cwiseProduct(m2); - std::cout << m3 << "\n==\n" << m1*s1 << "\n\n"; -// v(1,2,3,4); -// std::cout << v * 2 << "\n"; Matrix<SCALAR,MATSIZE,MATSIZE> I = Matrix<SCALAR,MATSIZE,MATSIZE>::ones(); Matrix<SCALAR,MATSIZE,MATSIZE> m; for(int i = 0; i < MATSIZE; i++) @@ -39,7 +28,7 @@ int main(int argc, char *argv[]) asm("#begin"); for(int a = 0; a < REPEAT; a++) { - m = Matrix<SCALAR,MATSIZE,MATSIZE>::ones() + 0.00005 * (m + (m*m).eval()); + m = Matrix<SCALAR,MATSIZE,MATSIZE>::ones() + 0.00005 * (m + (m*m)); } asm("#end"); cout << m << endl; diff --git a/test/basicstuff.cpp b/test/basicstuff.cpp index a2ad3dedf..4e3db3c1b 100644 --- a/test/basicstuff.cpp +++ b/test/basicstuff.cpp @@ -82,6 +82,12 @@ template<typename MatrixType> void basicStuff(const MatrixType& m) { VERIFY_RAISES_ASSERT(m1 = (m2.block(0,0, rows-1, cols-1))); } + + // test swap + m3 = m1; + m1.swap(m2); + VERIFY_IS_APPROX(m3, m2); + VERIFY_IS_NOT_APPROX(m3, m1); } void test_basicstuff() diff --git a/test/product.cpp b/test/product.cpp index f1e26d20a..2fc50899f 100644 --- a/test/product.cpp +++ b/test/product.cpp @@ -31,8 +31,10 @@ template<typename MatrixType> void product(const MatrixType& m) */ typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits<Scalar>::FloatingPoint FloatingPoint; typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType; - typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType; + typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime> RowSquareMatrixType; + typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, MatrixType::ColsAtCompileTime> ColSquareMatrixType; int rows = m.rows(); int cols = m.cols(); @@ -43,11 +45,13 @@ template<typename MatrixType> void product(const MatrixType& m) m2 = MatrixType::random(rows, cols), m3(rows, cols), mzero = MatrixType::zero(rows, cols); - SquareMatrixType - identity = Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> - ::identity(rows, rows), - square = Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> - ::random(rows, rows); + RowSquareMatrixType + identity = RowSquareMatrixType::identity(rows, rows), + square = RowSquareMatrixType::random(rows, rows), + res = RowSquareMatrixType::random(rows, rows); + ColSquareMatrixType + square2 = ColSquareMatrixType::random(cols, cols), + res2 = ColSquareMatrixType::random(cols, cols); VectorType v1 = VectorType::random(rows), v2 = VectorType::random(rows), vzero = VectorType::zero(rows); @@ -83,6 +87,20 @@ template<typename MatrixType> void product(const MatrixType& m) if (rows!=cols) VERIFY_RAISES_ASSERT(m3 = m1*m1); + + // test the previous tests were not screwed up because operator* returns 0 + VERIFY_IS_NOT_APPROX((m1.transpose()*m2).template cast<FloatingPoint>(), (m2.transpose()*m1).template cast<FloatingPoint>()); + + // test optimized operator+= path + res = square; + res += (m1 * m2.transpose()).lazy(); + VERIFY_IS_APPROX(res, square + m1 * m2.transpose()); + VERIFY_IS_NOT_APPROX(res.template cast<FloatingPoint>(), (square + m2 * m1.transpose()).template cast<FloatingPoint>()); + + res2 = square2; + res2 += (m1.transpose() * m2).lazy(); + VERIFY_IS_APPROX(res2, square2 + m1.transpose() * m2); + VERIFY_IS_NOT_APPROX(res2.template cast<FloatingPoint>(), (square2 + m2.transpose() * m1).template cast<FloatingPoint>()); } void test_product() @@ -93,11 +111,17 @@ void test_product() CALL_SUBTEST( product(Matrix4d()) ); CALL_SUBTEST( product(Matrix4f()) ); CALL_SUBTEST( product(MatrixXf(3,5)) ); + CALL_SUBTEST( product(MatrixXi(28,39)) ); } for(int i = 0; i < g_repeat; i++) { CALL_SUBTEST( product(MatrixXf(ei_random<int>(1,320), ei_random<int>(1,320))) ); CALL_SUBTEST( product(MatrixXd(ei_random<int>(1,320), ei_random<int>(1,320))) ); - CALL_SUBTEST( product(MatrixXi(ei_random<int>(1,320), ei_random<int>(1,320))) ); + CALL_SUBTEST( product(MatrixXi(ei_random<int>(1,256), ei_random<int>(1,256))) ); CALL_SUBTEST( product(MatrixXcf(ei_random<int>(1,50), ei_random<int>(1,50))) ); + #ifndef EIGEN_DEFAULT_TO_ROW_MAJOR + CALL_SUBTEST( product(Matrix<float,Dynamic,Dynamic,Dynamic,Dynamic,RowMajorBit>(ei_random<int>(1,320), ei_random<int>(1,320))) ); + #else + CALL_SUBTEST( product(Matrix<float,Dynamic,Dynamic,Dynamic,Dynamic,0>(ei_random<int>(1,320), ei_random<int>(1,320))) ); + #endif } } |