From f2c3e2b10fbc15fbcd3d5a24def771cbd7549d8c Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Thu, 12 Mar 2015 13:16:33 -0700 Subject: Add --only-cubic-sizes option to analyze-blocking-sizes tool --- bench/analyze-blocking-sizes.cpp | 81 ++++++++++++++++++++++++++++------------ 1 file changed, 58 insertions(+), 23 deletions(-) diff --git a/bench/analyze-blocking-sizes.cpp b/bench/analyze-blocking-sizes.cpp index 7aae57d93..cf8236a82 100644 --- a/bench/analyze-blocking-sizes.cpp +++ b/bench/analyze-blocking-sizes.cpp @@ -25,6 +25,9 @@ using namespace std; const int default_precision = 4; +// see --only-cubic-sizes +bool only_cubic_sizes = false; + uint8_t log2_pot(size_t x) { size_t l = 0; while (x >>= 1) l++; @@ -130,6 +133,9 @@ struct inputfile_t cerr << "offending line:" << endl << line << endl; exit(1); } + if (only_cubic_sizes && !size_triple_t(product_size).is_cubic()) { + continue; + } inputfile_entry_t entry; entry.product_size = uint16_t(product_size); entry.pot_block_size = uint16_t(block_size); @@ -155,6 +161,9 @@ struct inputfile_t cerr << "offending line:" << endl << line << endl; exit(1); } + if (only_cubic_sizes && !size_triple_t(product_size).is_cubic()) { + continue; + } inputfile_entry_t entry; entry.product_size = uint16_t(product_size); entry.pot_block_size = 0; @@ -505,28 +514,23 @@ void print_partition( struct action_t { virtual const char* invokation_name() const { abort(); return nullptr; } - virtual void run(int, char*[]) const { abort(); } + virtual void run(const vector&) const { abort(); } virtual ~action_t() {} }; struct partition_action_t : action_t { - virtual const char* invokation_name() const { return "partition"; } - virtual void run(int argc, char *argv[]) const + virtual const char* invokation_name() const override { return "partition"; } + virtual void run(const vector& input_filenames) const override { vector preprocessed_inputfiles; - if (!argc) { + if (input_filenames.empty()) { cerr << "The " << invokation_name() << " action needs a list of input files." << endl; exit(1); } - vector inputfilenames; - for (int i = 0; i < argc; i++) { - inputfilenames.emplace_back(argv[i]); - } - - for (auto it = inputfilenames.begin(); it != inputfilenames.end(); ++it) { + for (auto it = input_filenames.begin(); it != input_filenames.end(); ++it) { inputfile_t inputfile(*it); switch (inputfile.type) { case inputfile_t::type_t::all_pot_sizes: @@ -610,7 +614,7 @@ struct evaluate_defaults_action_t : action_t static bool lower_efficiency(const results_entry_t& e1, const results_entry_t& e2) { return e1.default_efficiency < e2.default_efficiency; } - virtual const char* invokation_name() const { return "evaluate-defaults"; } + virtual const char* invokation_name() const override { return "evaluate-defaults"; } void show_usage_and_exit() const { cerr << "usage: " << invokation_name() << " default-sizes-data all-pot-sizes-data" << endl; @@ -618,13 +622,13 @@ struct evaluate_defaults_action_t : action_t << "performance measured over all POT sizes." << endl; exit(1); } - virtual void run(int argc, char *argv[]) const + virtual void run(const vector& input_filenames) const override { - if (argc != 2) { + if (input_filenames.size() != 2) { show_usage_and_exit(); } - inputfile_t inputfile_default_sizes(argv[0]); - inputfile_t inputfile_all_pot_sizes(argv[1]); + inputfile_t inputfile_default_sizes(input_filenames[0]); + inputfile_t inputfile_all_pot_sizes(input_filenames[1]); if (inputfile_default_sizes.type != inputfile_t::type_t::default_sizes) { cerr << inputfile_default_sizes.filename << " is not an input file with default sizes." << endl; show_usage_and_exit(); @@ -719,7 +723,7 @@ struct evaluate_defaults_action_t : action_t void show_usage_and_exit(int argc, char* argv[], const vector>& available_actions) { - cerr << "usage: " << argv[0] << " " << endl; + cerr << "usage: " << argv[0] << " [options...] " << endl; cerr << "available actions:" << endl; for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { cerr << " " << (*it)->invokation_name() << endl; @@ -737,21 +741,52 @@ int main(int argc, char* argv[]) available_actions.emplace_back(new partition_action_t); available_actions.emplace_back(new evaluate_defaults_action_t); - auto action = available_actions.end(); + vector input_filenames; + + action_t* action = nullptr; if (argc < 2) { show_usage_and_exit(argc, argv, available_actions); } - for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { - if (!strcmp(argv[1], (*it)->invokation_name())) { - action = it; - break; + for (int i = 1; i < argc; i++) { + bool arg_handled = false; + // Step 1. Try to match action invokation names. + for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { + if (!strcmp(argv[i], (*it)->invokation_name())) { + if (!action) { + action = it->get(); + arg_handled = true; + break; + } else { + cerr << "can't specify more than one action!" << endl; + show_usage_and_exit(argc, argv, available_actions); + } + } + } + if (arg_handled) { + continue; + } + // Step 2. Try to match option names. + if (argv[i][0] == '-') { + if (!strcmp(argv[i], "--only-cubic-sizes")) { + only_cubic_sizes = true; + arg_handled = true; + } + if (!arg_handled) { + cerr << "Unrecognized option: " << argv[i] << endl; + show_usage_and_exit(argc, argv, available_actions); + } + } + if (arg_handled) { + continue; } + // Step 3. Default to interpreting args as input filenames. + input_filenames.emplace_back(argv[i]); } - if (action == available_actions.end()) { + if (!action) { show_usage_and_exit(argc, argv, available_actions); } - (*action)->run(argc - 2, argv + 2); + action->run(input_filenames); } -- cgit v1.2.3 From 2f6f8bf31c781347e48183d3681978eff6ddbede Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 13 Mar 2015 16:24:40 +0100 Subject: Add missing coeff/coeffRef members to Block, and extend unit tests. --- Eigen/src/SparseCore/SparseBlock.h | 25 ++++++++++++ test/sparse_basic.cpp | 78 +++++++++++++++++++++++++++++--------- 2 files changed, 86 insertions(+), 17 deletions(-) diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h index acd82e926..2b31716a3 100644 --- a/Eigen/src/SparseCore/SparseBlock.h +++ b/Eigen/src/SparseCore/SparseBlock.h @@ -49,6 +49,16 @@ public: return nnz; } + inline const Scalar coeff(Index row, Index col) const + { + return m_matrix.coeff(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart)); + } + + inline const Scalar coeff(Index index) const + { + return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index : m_outerStart); + } + inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; } Index startRow() const { return IsRowMajor ? m_outerStart : 0; } Index startCol() const { return IsRowMajor ? 0 : m_outerStart; } @@ -204,6 +214,21 @@ public: } bool isCompressed() const { return m_matrix.innerNonZeroPtr()==0; } + + inline Scalar& coeffRef(Index row, Index col) + { + return m_matrix.const_cast_derived().coeffRef(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart)); + } + + inline const Scalar coeff(Index row, Index col) const + { + return m_matrix.coeff(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart)); + } + + inline const Scalar coeff(Index index) const + { + return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index : m_outerStart); + } const Scalar& lastCoeff() const { diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp index e243964f4..d929e1463 100644 --- a/test/sparse_basic.cpp +++ b/test/sparse_basic.cpp @@ -30,6 +30,7 @@ template void sparse_basic(const SparseMatrixType& re double density = (std::max)(8./(rows*cols), 0.01); typedef Matrix DenseMatrix; typedef Matrix DenseVector; + typedef Matrix RowDenseVector; Scalar eps = 1e-6; Scalar s1 = internal::random(); @@ -59,32 +60,61 @@ template void sparse_basic(const SparseMatrixType& re VERIFY_IS_APPROX(m, refMat); // test InnerIterators and Block expressions - for (Index t=0; t<10; ++t) + for (int t=0; t<10; ++t) { - Index j = internal::random(0,cols-1); - Index i = internal::random(0,rows-1); - Index w = internal::random(1,cols-j-1); - Index h = internal::random(1,rows-i-1); + Index j = internal::random(0,cols-2); + Index i = internal::random(0,rows-2); + Index w = internal::random(1,cols-j); + Index h = internal::random(1,rows-i); - VERIFY_IS_APPROX(m.block(i,j,h,w), refMat.block(i,j,h,w)); + VERIFY_IS_APPROX(m.block(i,j,h,w), refMat.block(i,j,h,w)); for(Index c=0; c void sparse_basic(const SparseMatrixType& re refMat2.col(i) = refMat2.col(i) * s1; VERIFY_IS_APPROX(m2,refMat2); } + + Index r0 = internal::random(0,rows-2); + Index c0 = internal::random(0,cols-2); + Index r1 = internal::random(1,rows-r0); + Index c1 = internal::random(1,cols-c0); + + VERIFY_IS_APPROX(DenseVector(m2.col(c0)), refMat2.col(c0)); + VERIFY_IS_APPROX(m2.col(c0), refMat2.col(c0)); + + VERIFY_IS_APPROX(RowDenseVector(m2.row(r0)), refMat2.row(r0)); + VERIFY_IS_APPROX(m2.row(r0), refMat2.row(r0)); + + VERIFY_IS_APPROX(m2.block(r0,c0,r1,c1), refMat2.block(r0,c0,r1,c1)); + VERIFY_IS_APPROX((2*m2).block(r0,c0,r1,c1), (2*refMat2).block(r0,c0,r1,c1)); } // test prune -- cgit v1.2.3 From d73ccd717e000d0a91293db2b24c402e49c907ff Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Fri, 13 Mar 2015 10:36:01 -0700 Subject: Add support for dumping blocking sizes tables --- bench/analyze-blocking-sizes.cpp | 75 +++++++++++++++++++++++++++++++++++++- bench/benchmark-blocking-sizes.cpp | 4 +- 2 files changed, 75 insertions(+), 4 deletions(-) diff --git a/bench/analyze-blocking-sizes.cpp b/bench/analyze-blocking-sizes.cpp index cf8236a82..c133df599 100644 --- a/bench/analyze-blocking-sizes.cpp +++ b/bench/analyze-blocking-sizes.cpp @@ -28,6 +28,9 @@ const int default_precision = 4; // see --only-cubic-sizes bool only_cubic_sizes = false; +// see --dump-tables +bool dump_tables = false; + uint8_t log2_pot(size_t x) { size_t l = 0; while (x >>= 1) l++; @@ -318,14 +321,74 @@ float efficiency_of_subset( efficiency_this_product_size = max(efficiency_this_product_size, efficiency_this_entry); } efficiency = min(efficiency, efficiency_this_product_size); - first_entry_index_with_this_product_size = entry_index; - product_size = first_file.entries[entry_index].product_size; + if (entry_index < num_entries) { + first_entry_index_with_this_product_size = entry_index; + product_size = first_file.entries[entry_index].product_size; + } } } return efficiency; } +void dump_table_for_subset( + const vector& preprocessed_inputfiles, + const vector& subset) +{ + const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[subset[0]]; + const size_t num_entries = first_file.entries.size(); + size_t entry_index = 0; + size_t first_entry_index_with_this_product_size = 0; + uint16_t product_size = first_file.entries[0].product_size; + size_t i = 0; + size_triple_t min_product_size(first_file.entries.front().product_size); + size_triple_t max_product_size(first_file.entries.back().product_size); + if (!min_product_size.is_cubic() || !max_product_size.is_cubic()) { + abort(); + } + if (only_cubic_sizes) { + cout << "/* Warning: generated with --only-cubic-sizes ! */" << endl; + } + cout << "struct optimal_block_sizes_table {" << endl; + cout << " static const size_t min_size = " << min_product_size.k << ";" << endl; + cout << " static const size_t max_size = " << max_product_size.k << ";" << endl; + cout << " static const uint16_t* table() {" << endl; + cout << " static const uint16_t data[] = {"; + while (entry_index < num_entries) { + ++entry_index; + if (entry_index == num_entries || + first_file.entries[entry_index].product_size != product_size) + { + float best_efficiency_this_product_size = 0.0f; + uint16_t best_block_size_this_product_size = 0; + for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) { + float efficiency_this_entry = 1.0f; + for (auto i = subset.begin(); i != subset.end(); ++i) { + efficiency_this_entry = min(efficiency_this_entry, preprocessed_inputfiles[*i].entries[e].efficiency); + } + if (efficiency_this_entry > best_efficiency_this_product_size) { + best_efficiency_this_product_size = efficiency_this_entry; + best_block_size_this_product_size = first_file.entries[e].block_size; + } + } + if ((i++) % 8) { + cout << ", "; + } else { + cout << endl << " "; + } + cout << "0x" << hex << best_block_size_this_product_size << dec; + if (entry_index < num_entries) { + first_entry_index_with_this_product_size = entry_index; + product_size = first_file.entries[entry_index].product_size; + } + } + } + cout << endl << " };" << endl; + cout << " return data;" << endl; + cout << " }" << endl; + cout << "};" << endl; +} + float efficiency_of_partition( const vector& preprocessed_inputfiles, const vector>& partition) @@ -507,6 +570,10 @@ void print_partition( for (auto file = subset->begin(); file != subset->end(); ++file) { cout << " " << preprocessed_inputfiles[*file].filename << endl; } + if (dump_tables) { + cout << " Table:" << endl; + dump_table_for_subset(preprocessed_inputfiles, *subset); + } } cout << endl; } @@ -772,6 +839,10 @@ int main(int argc, char* argv[]) only_cubic_sizes = true; arg_handled = true; } + if (!strcmp(argv[i], "--dump-tables")) { + dump_tables = true; + arg_handled = true; + } if (!arg_handled) { cerr << "Unrecognized option: " << argv[i] << endl; show_usage_and_exit(argc, argv, available_actions); diff --git a/bench/benchmark-blocking-sizes.cpp b/bench/benchmark-blocking-sizes.cpp index 33d3824eb..827be2880 100644 --- a/bench/benchmark-blocking-sizes.cpp +++ b/bench/benchmark-blocking-sizes.cpp @@ -446,7 +446,7 @@ void try_run_some_benchmarks( unsigned int seconds_to_sleep_if_lower_clock_speed = 1; while (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) { - if (seconds_to_sleep_if_lower_clock_speed > 30) { + if (seconds_to_sleep_if_lower_clock_speed > 32) { cerr << "Sleeping longer probably won't make a difference." << endl; cerr << "Serializing benchmarks to " << session_filename << endl; serialize_benchmarks(session_filename, benchmarks, first_benchmark_to_run); @@ -456,7 +456,7 @@ void try_run_some_benchmarks( rerun_last_tests = true; cerr << "Sleeping " << seconds_to_sleep_if_lower_clock_speed - << " s..." << endl; + << " s... \r" << endl; sleep(seconds_to_sleep_if_lower_clock_speed); current_clock_speed = measure_clock_speed(); seconds_to_sleep_if_lower_clock_speed *= 2; -- cgit v1.2.3 From 5ffe29cb9f260f35f1ae0f878885a77d42a29d8b Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 13 Mar 2015 20:57:33 +0100 Subject: Bound pre-allocation to the maximal size representable by StorageIndex and throw bad_alloc if that's not possible. --- Eigen/src/SparseCore/CompressedStorage.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Eigen/src/SparseCore/CompressedStorage.h b/Eigen/src/SparseCore/CompressedStorage.h index 49fd46658..52c7da297 100644 --- a/Eigen/src/SparseCore/CompressedStorage.h +++ b/Eigen/src/SparseCore/CompressedStorage.h @@ -86,7 +86,12 @@ class CompressedStorage void resize(Index size, double reserveSizeFactor = 0) { if (m_allocatedSize)(NumTraits::highest(), size + Index(reserveSizeFactor*double(size))); + if(realloc_size Date: Fri, 13 Mar 2015 21:00:21 +0100 Subject: SparseMatrix::insert: switch to a fully uncompressed mode if sequential insertion is not possible (otherwise an arbitrary large amount of memory was preallocated in some cases) --- Eigen/src/SparseCore/SparseMatrix.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 0ba7e111a..4c3a47959 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -1172,8 +1172,12 @@ typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Op return (m_data.value(p) = 0); } - // make sure the matrix is compatible to random un-compressed insertion: - m_data.resize(m_data.allocatedSize()); + if(m_data.size() != m_data.allocatedSize()) + { + // make sure the matrix is compatible to random un-compressed insertion: + m_data.resize(m_data.allocatedSize()); + this->reserveInnerVectors(Array::Constant(2*m_outerSize, convert_index(m_outerSize))); + } return insertUncompressed(row,col); } -- cgit v1.2.3 From 8580eb6808428a53d5fb91be23fb5c6c8c9e9463 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 13 Mar 2015 21:06:20 +0100 Subject: bug #949: add static assertion for incompatible scalar types in dense end-user decompositions. --- Eigen/src/Cholesky/LDLT.h | 7 +++++++ Eigen/src/Cholesky/LLT.h | 8 ++++++++ Eigen/src/Eigenvalues/ComplexEigenSolver.h | 8 ++++++++ Eigen/src/Eigenvalues/EigenSolver.h | 9 +++++++++ Eigen/src/Eigenvalues/GeneralizedEigenSolver.h | 9 +++++++++ Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h | 7 +++++++ Eigen/src/LU/FullPivLU.h | 8 ++++++++ Eigen/src/LU/PartialPivLU.h | 8 ++++++++ Eigen/src/QR/ColPivHouseholderQR.h | 8 ++++++++ Eigen/src/QR/FullPivHouseholderQR.h | 8 ++++++++ Eigen/src/QR/HouseholderQR.h | 8 ++++++++ Eigen/src/SVD/SVDBase.h | 10 +++++++++- failtest/CMakeLists.txt | 12 ++++++++++++ failtest/bdcsvd_int.cpp | 14 ++++++++++++++ failtest/colpivqr_int.cpp | 14 ++++++++++++++ failtest/eigensolver_cplx.cpp | 14 ++++++++++++++ failtest/eigensolver_int.cpp | 14 ++++++++++++++ failtest/fullpivlu_int.cpp | 14 ++++++++++++++ failtest/fullpivqr_int.cpp | 14 ++++++++++++++ failtest/jacobisvd_int.cpp | 14 ++++++++++++++ failtest/ldlt_int.cpp | 14 ++++++++++++++ failtest/llt_int.cpp | 14 ++++++++++++++ failtest/partialpivlu_int.cpp | 14 ++++++++++++++ failtest/qr_int.cpp | 14 ++++++++++++++ 24 files changed, 263 insertions(+), 1 deletion(-) create mode 100644 failtest/bdcsvd_int.cpp create mode 100644 failtest/colpivqr_int.cpp create mode 100644 failtest/eigensolver_cplx.cpp create mode 100644 failtest/eigensolver_int.cpp create mode 100644 failtest/fullpivlu_int.cpp create mode 100644 failtest/fullpivqr_int.cpp create mode 100644 failtest/jacobisvd_int.cpp create mode 100644 failtest/ldlt_int.cpp create mode 100644 failtest/llt_int.cpp create mode 100644 failtest/partialpivlu_int.cpp create mode 100644 failtest/qr_int.cpp diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h index f46f7b758..93a726483 100644 --- a/Eigen/src/Cholesky/LDLT.h +++ b/Eigen/src/Cholesky/LDLT.h @@ -226,6 +226,11 @@ template class LDLT #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } /** \internal * Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U. @@ -424,6 +429,8 @@ template struct LDLT_Traits template LDLT& LDLT::compute(const MatrixType& a) { + check_template_parameters(); + eigen_assert(a.rows()==a.cols()); const Index size = a.rows(); diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h index 629c87161..745b74d95 100644 --- a/Eigen/src/Cholesky/LLT.h +++ b/Eigen/src/Cholesky/LLT.h @@ -170,6 +170,12 @@ template class LLT #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + /** \internal * Used to compute and store L * The strict upper part is not used and even not initialized. @@ -377,6 +383,8 @@ template struct LLT_Traits template LLT& LLT::compute(const MatrixType& a) { + check_template_parameters(); + eigen_assert(a.rows()==a.cols()); const Index size = a.rows(); m_matrix.resize(size, size); diff --git a/Eigen/src/Eigenvalues/ComplexEigenSolver.h b/Eigen/src/Eigenvalues/ComplexEigenSolver.h index 075a62848..6b010c312 100644 --- a/Eigen/src/Eigenvalues/ComplexEigenSolver.h +++ b/Eigen/src/Eigenvalues/ComplexEigenSolver.h @@ -234,6 +234,12 @@ template class ComplexEigenSolver } protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + EigenvectorType m_eivec; EigenvalueType m_eivalues; ComplexSchur m_schur; @@ -251,6 +257,8 @@ template ComplexEigenSolver& ComplexEigenSolver::compute(const MatrixType& matrix, bool computeEigenvectors) { + check_template_parameters(); + // this code is inspired from Jampack eigen_assert(matrix.cols() == matrix.rows()); diff --git a/Eigen/src/Eigenvalues/EigenSolver.h b/Eigen/src/Eigenvalues/EigenSolver.h index a63a42341..167cd99ab 100644 --- a/Eigen/src/Eigenvalues/EigenSolver.h +++ b/Eigen/src/Eigenvalues/EigenSolver.h @@ -299,6 +299,13 @@ template class EigenSolver void doComputeEigenvectors(); protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + EIGEN_STATIC_ASSERT(!NumTraits::IsComplex, NUMERIC_TYPE_MUST_BE_REAL); + } + MatrixType m_eivec; EigenvalueType m_eivalues; bool m_isInitialized; @@ -366,6 +373,8 @@ template EigenSolver& EigenSolver::compute(const MatrixType& matrix, bool computeEigenvectors) { + check_template_parameters(); + using std::sqrt; using std::abs; using numext::isfinite; diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h index c9da6740a..e2e28cd4a 100644 --- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h @@ -263,6 +263,13 @@ template class GeneralizedEigenSolver } protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + EIGEN_STATIC_ASSERT(!NumTraits::IsComplex, NUMERIC_TYPE_MUST_BE_REAL); + } + MatrixType m_eivec; ComplexVectorType m_alphas; VectorType m_betas; @@ -290,6 +297,8 @@ template GeneralizedEigenSolver& GeneralizedEigenSolver::compute(const MatrixType& A, const MatrixType& B, bool computeEigenvectors) { + check_template_parameters(); + using std::sqrt; using std::abs; eigen_assert(A.cols() == A.rows() && B.cols() == A.rows() && B.cols() == B.rows()); diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h index 66d1154cf..1dcfacf0b 100644 --- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h @@ -347,6 +347,11 @@ template class SelfAdjointEigenSolver static const int m_maxIterations = 30; protected: + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + MatrixType m_eivec; RealVectorType m_eivalues; typename TridiagonalizationType::SubDiagonalType m_subdiag; @@ -382,6 +387,8 @@ EIGEN_DEVICE_FUNC SelfAdjointEigenSolver& SelfAdjointEigenSolver ::compute(const MatrixType& matrix, int options) { + check_template_parameters(); + using std::abs; eigen_assert(matrix.cols() == matrix.rows()); eigen_assert((options&~(EigVecMask|GenEigMask))==0 diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h index d1a260a37..75dbc16b0 100644 --- a/Eigen/src/LU/FullPivLU.h +++ b/Eigen/src/LU/FullPivLU.h @@ -390,6 +390,12 @@ template class FullPivLU #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + MatrixType m_lu; PermutationPType m_p; PermutationQType m_q; @@ -434,6 +440,8 @@ FullPivLU::FullPivLU(const MatrixType& matrix) template FullPivLU& FullPivLU::compute(const MatrixType& matrix) { + check_template_parameters(); + // the permutations are stored as int indices, so just to be sure: eigen_assert(matrix.rows()<=NumTraits::highest() && matrix.cols()<=NumTraits::highest()); diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h index 3d8825a97..019fc4230 100644 --- a/Eigen/src/LU/PartialPivLU.h +++ b/Eigen/src/LU/PartialPivLU.h @@ -209,6 +209,12 @@ template class PartialPivLU #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + MatrixType m_lu; PermutationType m_p; TranspositionType m_rowsTranspositions; @@ -425,6 +431,8 @@ void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, t template PartialPivLU& PartialPivLU::compute(const MatrixType& matrix) { + check_template_parameters(); + // the row permutation is stored as int indices, so just to be sure: eigen_assert(matrix.rows()::highest()); diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h index 03ff0a8f2..7b3842cbe 100644 --- a/Eigen/src/QR/ColPivHouseholderQR.h +++ b/Eigen/src/QR/ColPivHouseholderQR.h @@ -398,6 +398,12 @@ template class ColPivHouseholderQR #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + MatrixType m_qr; HCoeffsType m_hCoeffs; PermutationType m_colsPermutation; @@ -436,6 +442,8 @@ typename MatrixType::RealScalar ColPivHouseholderQR::logAbsDetermina template ColPivHouseholderQR& ColPivHouseholderQR::compute(const MatrixType& matrix) { + check_template_parameters(); + using std::abs; Index rows = matrix.rows(); Index cols = matrix.cols(); diff --git a/Eigen/src/QR/FullPivHouseholderQR.h b/Eigen/src/QR/FullPivHouseholderQR.h index 4952fbb46..4c2c958a8 100644 --- a/Eigen/src/QR/FullPivHouseholderQR.h +++ b/Eigen/src/QR/FullPivHouseholderQR.h @@ -380,6 +380,12 @@ template class FullPivHouseholderQR #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + MatrixType m_qr; HCoeffsType m_hCoeffs; IntDiagSizeVectorType m_rows_transpositions; @@ -419,6 +425,8 @@ typename MatrixType::RealScalar FullPivHouseholderQR::logAbsDetermin template FullPivHouseholderQR& FullPivHouseholderQR::compute(const MatrixType& matrix) { + check_template_parameters(); + using std::abs; Index rows = matrix.rows(); Index cols = matrix.cols(); diff --git a/Eigen/src/QR/HouseholderQR.h b/Eigen/src/QR/HouseholderQR.h index 195bacb85..878654be5 100644 --- a/Eigen/src/QR/HouseholderQR.h +++ b/Eigen/src/QR/HouseholderQR.h @@ -196,6 +196,12 @@ template class HouseholderQR #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + MatrixType m_qr; HCoeffsType m_hCoeffs; RowVectorType m_temp; @@ -348,6 +354,8 @@ void HouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) c template HouseholderQR& HouseholderQR::compute(const MatrixType& matrix) { + check_template_parameters(); + Index rows = matrix.rows(); Index cols = matrix.cols(); Index size = (std::min)(rows,cols); diff --git a/Eigen/src/SVD/SVDBase.h b/Eigen/src/SVD/SVDBase.h index 8903755e7..b89393721 100644 --- a/Eigen/src/SVD/SVDBase.h +++ b/Eigen/src/SVD/SVDBase.h @@ -217,6 +217,12 @@ public: #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + // return true if already allocated bool allocate(Index rows, Index cols, unsigned int computationOptions) ; @@ -240,7 +246,9 @@ protected: m_usePrescribedThreshold(false), m_computationOptions(0), m_rows(-1), m_cols(-1), m_diagSize(0) - {} + { + check_template_parameters(); + } }; diff --git a/failtest/CMakeLists.txt b/failtest/CMakeLists.txt index c8795a344..d3e82ecd9 100644 --- a/failtest/CMakeLists.txt +++ b/failtest/CMakeLists.txt @@ -47,6 +47,18 @@ ei_add_failtest("sparse_ref_3") ei_add_failtest("sparse_ref_4") ei_add_failtest("sparse_ref_5") +ei_add_failtest("partialpivlu_int") +ei_add_failtest("fullpivlu_int") +ei_add_failtest("llt_int") +ei_add_failtest("ldlt_int") +ei_add_failtest("qr_int") +ei_add_failtest("colpivqr_int") +ei_add_failtest("fullpivqr_int") +ei_add_failtest("jacobisvd_int") +ei_add_failtest("bdcsvd_int") +ei_add_failtest("eigensolver_int") +ei_add_failtest("eigensolver_cplx") + if (EIGEN_FAILTEST_FAILURE_COUNT) message(FATAL_ERROR "${EIGEN_FAILTEST_FAILURE_COUNT} out of ${EIGEN_FAILTEST_COUNT} failtests FAILED. " diff --git a/failtest/bdcsvd_int.cpp b/failtest/bdcsvd_int.cpp new file mode 100644 index 000000000..670752cf5 --- /dev/null +++ b/failtest/bdcsvd_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/SVD" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + BDCSVD > qr(Matrix::Random(10,10)); +} diff --git a/failtest/colpivqr_int.cpp b/failtest/colpivqr_int.cpp new file mode 100644 index 000000000..db11910d4 --- /dev/null +++ b/failtest/colpivqr_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/QR" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + ColPivHouseholderQR > qr(Matrix::Random(10,10)); +} diff --git a/failtest/eigensolver_cplx.cpp b/failtest/eigensolver_cplx.cpp new file mode 100644 index 000000000..c2e21e189 --- /dev/null +++ b/failtest/eigensolver_cplx.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/Eigenvalues" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR std::complex +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + EigenSolver > eig(Matrix::Random(10,10)); +} diff --git a/failtest/eigensolver_int.cpp b/failtest/eigensolver_int.cpp new file mode 100644 index 000000000..eda8dc20b --- /dev/null +++ b/failtest/eigensolver_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/Eigenvalues" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + EigenSolver > eig(Matrix::Random(10,10)); +} diff --git a/failtest/fullpivlu_int.cpp b/failtest/fullpivlu_int.cpp new file mode 100644 index 000000000..e9d2c6eb3 --- /dev/null +++ b/failtest/fullpivlu_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/LU" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + FullPivLU > lu(Matrix::Random(10,10)); +} diff --git a/failtest/fullpivqr_int.cpp b/failtest/fullpivqr_int.cpp new file mode 100644 index 000000000..d182a7b6b --- /dev/null +++ b/failtest/fullpivqr_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/QR" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + FullPivHouseholderQR > qr(Matrix::Random(10,10)); +} diff --git a/failtest/jacobisvd_int.cpp b/failtest/jacobisvd_int.cpp new file mode 100644 index 000000000..12790aef1 --- /dev/null +++ b/failtest/jacobisvd_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/SVD" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + JacobiSVD > qr(Matrix::Random(10,10)); +} diff --git a/failtest/ldlt_int.cpp b/failtest/ldlt_int.cpp new file mode 100644 index 000000000..243e45746 --- /dev/null +++ b/failtest/ldlt_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/Cholesky" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + LDLT > ldlt(Matrix::Random(10,10)); +} diff --git a/failtest/llt_int.cpp b/failtest/llt_int.cpp new file mode 100644 index 000000000..cb020650d --- /dev/null +++ b/failtest/llt_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/Cholesky" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + LLT > llt(Matrix::Random(10,10)); +} diff --git a/failtest/partialpivlu_int.cpp b/failtest/partialpivlu_int.cpp new file mode 100644 index 000000000..98ef282ea --- /dev/null +++ b/failtest/partialpivlu_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/LU" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + PartialPivLU > lu(Matrix::Random(10,10)); +} diff --git a/failtest/qr_int.cpp b/failtest/qr_int.cpp new file mode 100644 index 000000000..ce200e818 --- /dev/null +++ b/failtest/qr_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/QR" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + HouseholderQR > qr(Matrix::Random(10,10)); +} -- cgit v1.2.3 From d99ab35f9e886a014be6d47606d232af1e668f76 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 13 Mar 2015 21:12:46 +0100 Subject: Fix internal::random(x,y) for integer types. The previous implementation could return y+1. The new implementation uses rejection sampling to get an unbiased behabior. --- Eigen/src/Core/MathFunctions.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 878f38e92..3c76a58b9 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -525,8 +525,25 @@ struct random_default_impl typedef typename NumTraits::NonInteger NonInteger; static inline Scalar run(const Scalar& x, const Scalar& y) - { - return x + Scalar((NonInteger(y)-x+1) * std::rand() / (RAND_MAX + NonInteger(1))); + { + using std::max; + Scalar range = (max)(Scalar(0),Scalar(y-x)); + Scalar offset = 0; + if(range<=RAND_MAX) + { + // rejection sampling + int divisor = RAND_MAX/(range+1); + + do { + offset = Scalar(std::rand() / divisor); + } while (offset > range); + } + else + { + offset = std::rand() * range; + } + + return x + offset; } static inline Scalar run() -- cgit v1.2.3 From 1330f8bbd12306cc4955d943f27e5281d413bed4 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 13 Mar 2015 21:15:50 +0100 Subject: bug #973, improve AVX support by enabling vectorization of Vector4i-like types, and enforcing alignement of Vector4f/Vector2d-like types to preserve compatibility with SSE and future Eigen versions that will vectorize them with AVX enabled. --- Eigen/src/Core/CoreEvaluators.h | 16 ++++++-- Eigen/src/Core/DenseStorage.h | 88 ++++++++++++++++++++++++++++++++++++++--- Eigen/src/Core/util/Macros.h | 3 ++ Eigen/src/Core/util/XprHelper.h | 5 ++- test/unalignedassert.cpp | 7 ++-- test/vectorization_logic.cpp | 2 +- 6 files changed, 108 insertions(+), 13 deletions(-) diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 9485080d3..85f46cb8d 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -647,11 +647,15 @@ struct evaluator > HasNoStride = HasNoInnerStride && HasNoOuterStride, IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned), IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, + + // TODO: should check for smaller packet types once we can handle multi-sized packet types + AlignBytes = int(packet_traits::size) * sizeof(Scalar), + KeepsPacketAccess = bool(HasNoInnerStride) && ( bool(IsDynamicSize) || HasNoOuterStride || ( OuterStrideAtCompileTime!=Dynamic - && ((static_cast(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ), + && ((static_cast(sizeof(Scalar))*OuterStrideAtCompileTime) % AlignBytes)==0 ) ), Flags0 = evaluator::Flags, Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit), Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) @@ -717,7 +721,10 @@ struct evaluator > && (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0, - MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0, + // TODO: should check for smaller packet types once we can handle multi-sized packet types + AlignBytes = int(packet_traits::size) * sizeof(Scalar), + + MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % AlignBytes) == 0)) ? AlignedBit : 0, FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator::Flags&LinearAccessBit))) ? LinearAccessBit : 0, FlagsRowMajorBit = XprType::Flags&RowMajorBit, Flags0 = evaluator::Flags & ( (HereditaryBits & ~RowMajorBit) | @@ -825,12 +832,15 @@ struct block_evaluator::PlainObject> { typedef Block XprType; + typedef typename XprType::Scalar Scalar; EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) : mapbase_evaluator(block) { + // TODO: should check for smaller packet types once we can handle multi-sized packet types + const int AlignBytes = int(packet_traits::size) * sizeof(Scalar); // FIXME this should be an internal assertion - eigen_assert(EIGEN_IMPLIES(evaluator::Flags&AlignedBit, (size_t(block.data()) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned"); + eigen_assert(EIGEN_IMPLIES(evaluator::Flags&AlignedBit, (size_t(block.data()) % AlignBytes) == 0) && "data is not aligned"); } }; diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h index 9186f59a7..522aaa299 100644 --- a/Eigen/src/Core/DenseStorage.h +++ b/Eigen/src/Core/DenseStorage.h @@ -34,14 +34,35 @@ void check_static_allocation_size() #endif } +template::type, + bool Match = bool((Size%unpacket_traits::size)==0), + bool TryHalf = bool(unpacket_traits::size > Size) + && bool(unpacket_traits::size > unpacket_traits::half>::size) > +struct compute_default_alignment +{ + enum { value = 0 }; +}; + +template +struct compute_default_alignment // Match +{ + enum { value = sizeof(T) * unpacket_traits::size }; +}; + +template +struct compute_default_alignment +{ + // current packet too large, try with an half-packet + enum { value = compute_default_alignment::half>::value }; +}; + /** \internal * Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned: * to 16 bytes boundary if the total size is a multiple of 16 bytes. */ template + : compute_default_alignment::value > struct plain_array { T array[Size]; @@ -81,14 +102,71 @@ struct plain_array #endif template -struct plain_array +struct plain_array +{ + EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7); + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + +template +struct plain_array +{ + EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15); + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + +template +struct plain_array +{ + EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31); + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + +template +struct plain_array { - EIGEN_USER_ALIGN_DEFAULT T array[Size]; + EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size]; EIGEN_DEVICE_FUNC plain_array() { - EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(EIGEN_ALIGN_BYTES-1); + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63); check_static_allocation_size(); } diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index aaea9f035..6b294e77f 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -318,6 +318,9 @@ // Defined the boundary (in bytes) on which the data needs to be aligned. Note // that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be // aligned at all regardless of the value of this #define. +// TODO should be renamed EIGEN_MAXIMAL_ALIGN_BYTES, +// for instance with AVX 1 EIGEN_MAXIMAL_ALIGN_BYTES=32 while for 'int' 16 bytes alignment is always enough, +// and 16 bytes alignment is also enough for Vector4f. #define EIGEN_ALIGN_BYTES 16 #ifdef EIGEN_DONT_ALIGN diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 528ebe297..562f425bd 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -159,13 +159,16 @@ class compute_matrix_evaluator_flags enum { row_major_bit = Options&RowMajor ? RowMajorBit : 0, is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic, + + // TODO: should check for smaller packet types once we can handle multi-sized packet types + align_bytes = int(packet_traits::size) * sizeof(Scalar), aligned_bit = ( ((Options&DontAlign)==0) && ( #if EIGEN_ALIGN_STATICALLY - ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) + ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % align_bytes) == 0)) #else 0 #endif diff --git a/test/unalignedassert.cpp b/test/unalignedassert.cpp index d8815263a..6f7b72167 100644 --- a/test/unalignedassert.cpp +++ b/test/unalignedassert.cpp @@ -81,7 +81,7 @@ void construct_at_boundary(int boundary) void unalignedassert() { - #if EIGEN_ALIGN_STATICALLY +#if EIGEN_ALIGN_STATICALLY construct_at_boundary(4); construct_at_boundary(4); construct_at_boundary(16); @@ -100,7 +100,7 @@ void unalignedassert() construct_at_boundary(4); construct_at_boundary(EIGEN_ALIGN_BYTES); construct_at_boundary(16); - #endif +#endif check_unalignedassert_good(); check_unalignedassert_good(); @@ -112,11 +112,12 @@ void unalignedassert() check_unalignedassert_good >(); #if EIGEN_ALIGN_STATICALLY - if(EIGEN_ALIGN_BYTES==16) + if(EIGEN_ALIGN_BYTES>=16) { VERIFY_RAISES_ASSERT(construct_at_boundary(8)); VERIFY_RAISES_ASSERT(construct_at_boundary(8)); VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); } for(int b=8; b::Vectori >(DefaultTraversal,CompleteUnrolling))); VERIFY((test_assign(Matrix11(), Matrix()*Matrix(), - PacketSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD?DefaultTraversal:InnerVectorizedTraversal, CompleteUnrolling))); + InnerVectorizedTraversal, CompleteUnrolling))); #endif VERIFY(test_assign(MatrixXX(10,10),MatrixXX(20,20).block(10,10,2,3), -- cgit v1.2.3 From 488c15615a31fcee310200d726d8e58bb005bc87 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Fri, 13 Mar 2015 14:51:26 -0700 Subject: organize a little our default cache sizes, and use a saner default L1 outside of x86 (10% faster on Nexus 5) --- Eigen/src/Core/products/GeneralBlockPanelKernel.h | 26 ++++++++++++++++------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 408281c82..1b47f1a6d 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -25,21 +25,31 @@ inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff return a<=0 ? b : a; } +#if EIGEN_ARCH_i386_OR_x86_64 +const std::ptrdiff_t defaultL1CacheSize = 32*1024; +const std::ptrdiff_t defaultL2CacheSize = 256*1024; +const std::ptrdiff_t defaultL3CacheSize = 2*1024*1024; +#else +const std::ptrdiff_t defaultL1CacheSize = 16*1024; +const std::ptrdiff_t defaultL2CacheSize = 512*1024; +const std::ptrdiff_t defaultL3CacheSize = 512*1024; +#endif + /** \internal */ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3) { static bool m_cache_sizes_initialized = false; - static std::ptrdiff_t m_l1CacheSize = 32*1024; - static std::ptrdiff_t m_l2CacheSize = 256*1024; - static std::ptrdiff_t m_l3CacheSize = 2*1024*1024; + static std::ptrdiff_t m_l1CacheSize = 0; + static std::ptrdiff_t m_l2CacheSize = 0; + static std::ptrdiff_t m_l3CacheSize = 0; if(!m_cache_sizes_initialized) { int l1CacheSize, l2CacheSize, l3CacheSize; queryCacheSizes(l1CacheSize, l2CacheSize, l3CacheSize); - m_l1CacheSize = manage_caching_sizes_helper(l1CacheSize, 8*1024); - m_l2CacheSize = manage_caching_sizes_helper(l2CacheSize, 256*1024); - m_l3CacheSize = manage_caching_sizes_helper(l3CacheSize, 8*1024*1024); + m_l1CacheSize = manage_caching_sizes_helper(l1CacheSize, defaultL1CacheSize); + m_l2CacheSize = manage_caching_sizes_helper(l2CacheSize, defaultL2CacheSize); + m_l3CacheSize = manage_caching_sizes_helper(l3CacheSize, defaultL3CacheSize); m_cache_sizes_initialized = true; } @@ -974,7 +984,7 @@ void gebp_kernel(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) )); #else @@ -1211,7 +1221,7 @@ void gebp_kernel=2*Traits::LhsProgress) { - const Index l1 = 32*1024; // in Bytes, TODO, l1 should be passed to this function. + const Index l1 = defaultL1CacheSize; // in Bytes, TODO, l1 should be passed to this function. #ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES Index actual_panel_rows = (2*LhsProgress) * std::max(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 2*LhsProgress) )); #else -- cgit v1.2.3 From b6b88c08082dcfc5dd81c6997d6090507267cc13 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Fri, 13 Mar 2015 14:57:05 -0700 Subject: update perf_monitoring/gemm/changesets.txt --- bench/perf_monitoring/gemm/changesets.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bench/perf_monitoring/gemm/changesets.txt b/bench/perf_monitoring/gemm/changesets.txt index a5b63bc89..40a71c781 100644 --- a/bench/perf_monitoring/gemm/changesets.txt +++ b/bench/perf_monitoring/gemm/changesets.txt @@ -41,3 +41,5 @@ before-evaluators 6981:7e5d6f78da59 # dynamic loop swapping 6984:45f26866c091 # rm dynamic loop swapping, adjust lhs's micro panel height to fully exploit L1 cache 6986:a675d05b6f8f # blocking heuristic: block on the rhs in L1 if the lhs fit in L1. +7013:f875e75f07e5 # organize a little our default cache sizes, and use a saner default L1 outside of x86 (10% faster on Nexus 5) + -- cgit v1.2.3 From e56aabf205a1e8f581dd8a46d7d46ce79c45e158 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Sun, 15 Mar 2015 18:05:12 -0400 Subject: Refactor computeProductBlockingSizes to make room for the possibility of using lookup tables --- Eigen/Core | 1 + Eigen/src/Core/MathFunctions.h | 36 ++++---- Eigen/src/Core/products/GeneralBlockPanelKernel.h | 100 ++++++++++++++------- Eigen/src/Core/products/LookupBlockingSizesTable.h | 89 ++++++++++++++++++ Eigen/src/Core/util/ForwardDeclarations.h | 8 ++ 5 files changed, 182 insertions(+), 52 deletions(-) create mode 100644 Eigen/src/Core/products/LookupBlockingSizesTable.h diff --git a/Eigen/Core b/Eigen/Core index 0b8eaa61c..138c34916 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -381,6 +381,7 @@ using std::ptrdiff_t; #include "src/Core/Inverse.h" #include "src/Core/TriangularMatrix.h" #include "src/Core/SelfAdjointView.h" +#include "src/Core/products/LookupBlockingSizesTable.h" #include "src/Core/products/GeneralBlockPanelKernel.h" #include "src/Core/products/Parallelizer.h" #include "src/Core/ProductEvaluators.h" diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 3c76a58b9..0fde5c71e 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -473,48 +473,48 @@ struct random_default_impl }; enum { - floor_log2_terminate, - floor_log2_move_up, - floor_log2_move_down, - floor_log2_bogus + meta_floor_log2_terminate, + meta_floor_log2_move_up, + meta_floor_log2_move_down, + meta_floor_log2_bogus }; -template struct floor_log2_selector +template struct meta_floor_log2_selector { enum { middle = (lower + upper) / 2, - value = (upper <= lower + 1) ? int(floor_log2_terminate) - : (n < (1 << middle)) ? int(floor_log2_move_down) - : (n==0) ? int(floor_log2_bogus) - : int(floor_log2_move_up) + value = (upper <= lower + 1) ? int(meta_floor_log2_terminate) + : (n < (1 << middle)) ? int(meta_floor_log2_move_down) + : (n==0) ? int(meta_floor_log2_bogus) + : int(meta_floor_log2_move_up) }; }; template::value> -struct floor_log2 {}; + int selector = meta_floor_log2_selector::value> +struct meta_floor_log2 {}; template -struct floor_log2 +struct meta_floor_log2 { - enum { value = floor_log2::middle>::value }; + enum { value = meta_floor_log2::middle>::value }; }; template -struct floor_log2 +struct meta_floor_log2 { - enum { value = floor_log2::middle, upper>::value }; + enum { value = meta_floor_log2::middle, upper>::value }; }; template -struct floor_log2 +struct meta_floor_log2 { enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower }; }; template -struct floor_log2 +struct meta_floor_log2 { // no value, error at compile time }; @@ -551,7 +551,7 @@ struct random_default_impl #ifdef EIGEN_MAKING_DOCS return run(Scalar(NumTraits::IsSigned ? -10 : 0), Scalar(10)); #else - enum { rand_bits = floor_log2<(unsigned int)(RAND_MAX)+1>::value, + enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value, scalar_bits = sizeof(Scalar) * CHAR_BIT, shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)), offset = NumTraits::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0 diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 1b47f1a6d..617439ff6 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -74,45 +74,23 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff } } -/** \brief Computes the blocking parameters for a m x k times k x n matrix product - * - * \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension. - * \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension. - * \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension. - * - * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar, - * this function computes the blocking size parameters along the respective dimensions - * for matrix products and related algorithms. The blocking sizes depends on various - * parameters: - * - the L1 and L2 cache sizes, - * - the register level blocking sizes defined by gebp_traits, - * - the number of scalars that fit into a packet (when vectorization is enabled). - * - * \sa setCpuCacheSizes */ +/* Helper for computeProductBlockingSizes. + * + * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar, + * this function computes the blocking size parameters along the respective dimensions + * for matrix products and related algorithms. The blocking sizes depends on various + * parameters: + * - the L1 and L2 cache sizes, + * - the register level blocking sizes defined by gebp_traits, + * - the number of scalars that fit into a packet (when vectorization is enabled). + * + * \sa setCpuCacheSizes */ template -void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1) +void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index num_threads = 1) { typedef gebp_traits Traits; -#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES - if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) { - EIGEN_UNUSED_VARIABLE(num_threads); - enum { - kr = 8, - mr = Traits::mr, - nr = Traits::nr - }; - k = std::min(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K); - if (k > kr) k -= k % kr; - m = std::min(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M); - if (m > mr) m -= m % mr; - n = std::min(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N); - if (n > nr) n -= n % nr; - return; - } -#endif - // Explanations: // Let's recall that the product algorithms form mc x kc vertical panels A' on the lhs and // kc x nc blocks B' on the rhs. B' has to fit into L2/L3 cache. Moreover, A' is processed @@ -281,6 +259,60 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads } } +inline bool useSpecificBlockingSizes(Index& k, Index& m, Index& n) +{ +#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES + if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) { + k = std::min(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K); + m = std::min(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M); + n = std::min(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N); + return true; + } +#else + EIGEN_UNUSED_VARIABLE(k) + EIGEN_UNUSED_VARIABLE(m) + EIGEN_UNUSED_VARIABLE(n) + return false; +#endif +} + +/** \brief Computes the blocking parameters for a m x k times k x n matrix product + * + * \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension. + * \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension. + * \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension. + * + * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar, + * this function computes the blocking size parameters along the respective dimensions + * for matrix products and related algorithms. + * + * The blocking size parameters may be evaluated: + * - either by a heuristic based on cache sizes; + * - or using a precomputed lookup table; + * - or using fixed prescribed values (for testing purposes). + * + * \sa setCpuCacheSizes */ + +template +void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1) +{ + if (!useSpecificBlockingSizes(k, m, n)) { + if (!lookupBlockingSizesFromTable(k, m, n, num_threads)) { + evaluateProductBlockingSizesHeuristic(k, m, n, num_threads); + } + } + + typedef gebp_traits Traits; + enum { + kr = 8, + mr = Traits::mr, + nr = Traits::nr + }; + if (k > kr) k -= k % kr; + if (m > mr) m -= m % mr; + if (n > nr) n -= n % nr; +} + template inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1) { diff --git a/Eigen/src/Core/products/LookupBlockingSizesTable.h b/Eigen/src/Core/products/LookupBlockingSizesTable.h new file mode 100644 index 000000000..85aeedec8 --- /dev/null +++ b/Eigen/src/Core/products/LookupBlockingSizesTable.h @@ -0,0 +1,89 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H +#define EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H + +namespace Eigen { + +namespace internal { + +template ::NumSizes != 0 > +struct LookupBlockingSizesFromTableImpl +{ + static bool run(Index&, Index&, Index&, Index) + { + return false; + } +}; + +inline uint8_t floor_log2_helper(uint16_t& x, size_t offset) +{ + uint16_t y = x >> offset; + if (y) { + x = y; + return offset; + } else { + return 0; + } +} + +inline uint8_t floor_log2(uint16_t x) +{ + return floor_log2_helper(x, 8) + + floor_log2_helper(x, 4) + + floor_log2_helper(x, 2) + + floor_log2_helper(x, 1); +} + +inline uint8_t ceil_log2(uint16_t x) +{ + return x > 1 ? floor_log2(x - 1) + 1 : 0; +} + +template +struct LookupBlockingSizesFromTableImpl +{ + static bool run(Index& k, Index& m, Index& n, Index) + { + using std::min; + using std::max; + typedef BlockingSizesLookupTable Table; + const uint16_t minsize = Table::BaseSize; + const uint16_t maxsize = minsize << (Table::NumSizes + 1); + const uint16_t k_clamped = max(minsize, min(k, maxsize)); + const uint16_t m_clamped = max(minsize, min(m, maxsize)); + const uint16_t n_clamped = max(minsize, min(n, maxsize)); + const size_t k_index = ceil_log2(k_clamped / minsize); + const size_t m_index = ceil_log2(m_clamped / minsize); + const size_t n_index = ceil_log2(n_clamped / minsize); + const size_t index = n_index + Table::NumSizes * (m_index + Table::NumSizes * k_index); + const uint16_t table_entry = Table::Data()[index]; + k = min(k, 1 << ((table_entry & 0xf00) >> 8)); + m = min(m, 1 << ((table_entry & 0x0f0) >> 4)); + n = min(n, 1 << ((table_entry & 0x00f) >> 0)); + return true; + } +}; + +template +bool lookupBlockingSizesFromTable(Index& k, Index& m, Index& n, Index num_threads) +{ + return LookupBlockingSizesFromTableImpl::run(k, m, n, num_threads); +} + +} + +} + +#endif // EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index c23892c50..8034f9b5e 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -287,6 +287,14 @@ struct stem_function typedef std::complex::Real> ComplexScalar; typedef ComplexScalar type(ComplexScalar, int); }; + +template +struct BlockingSizesLookupTable +{ + static const size_t NumSizes = 0; +}; + } } // end namespace Eigen -- cgit v1.2.3 From ca5c12587b6e51be7f401c2878800d5d49f615d8 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Sun, 15 Mar 2015 18:05:53 -0400 Subject: Polish lookup tables generation --- bench/analyze-blocking-sizes.cpp | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/bench/analyze-blocking-sizes.cpp b/bench/analyze-blocking-sizes.cpp index c133df599..3d57f4bb3 100644 --- a/bench/analyze-blocking-sizes.cpp +++ b/bench/analyze-blocking-sizes.cpp @@ -347,13 +347,16 @@ void dump_table_for_subset( abort(); } if (only_cubic_sizes) { - cout << "/* Warning: generated with --only-cubic-sizes ! */" << endl; + cerr << "Can't generate tables with --only-cubic-sizes." << endl; + abort(); } - cout << "struct optimal_block_sizes_table {" << endl; - cout << " static const size_t min_size = " << min_product_size.k << ";" << endl; - cout << " static const size_t max_size = " << max_product_size.k << ";" << endl; - cout << " static const uint16_t* table() {" << endl; - cout << " static const uint16_t data[] = {"; + cout << "struct LookupTable {" << endl; + cout << " static const size_t BaseSize = " << min_product_size.k << ";" << endl; + const size_t NumSizes = log2_pot(max_product_size.k / min_product_size.k) + 1; + const size_t TableSize = NumSizes * NumSizes * NumSizes; + cout << " static const size_t NumSizes = " << NumSizes << ";" << endl; + cout << " static const uint16_t* Data() {" << endl; + cout << " static const uint16_t data[" << TableSize << "] = {"; while (entry_index < num_entries) { ++entry_index; if (entry_index == num_entries || @@ -371,18 +374,23 @@ void dump_table_for_subset( best_block_size_this_product_size = first_file.entries[e].block_size; } } - if ((i++) % 8) { - cout << ", "; + if ((i++) % NumSizes) { + cout << " "; } else { cout << endl << " "; } cout << "0x" << hex << best_block_size_this_product_size << dec; if (entry_index < num_entries) { + cout << ","; first_entry_index_with_this_product_size = entry_index; product_size = first_file.entries[entry_index].product_size; } } } + if (i != TableSize) { + cerr << endl << "Wrote " << i << " table entries, expected " << TableSize << endl; + abort(); + } cout << endl << " };" << endl; cout << " return data;" << endl; cout << " }" << endl; @@ -855,6 +863,11 @@ int main(int argc, char* argv[]) input_filenames.emplace_back(argv[i]); } + if (dump_tables && only_cubic_sizes) { + cerr << "Incompatible options: --only-cubic-sizes and --dump-tables." << endl; + show_usage_and_exit(argc, argv, available_actions); + } + if (!action) { show_usage_and_exit(argc, argv, available_actions); } -- cgit v1.2.3 From 1dd3d89818cbdf0843de3c0b499eedc433af46f8 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Sun, 15 Mar 2015 18:07:19 -0400 Subject: Fix a unused-var warning --- Eigen/src/Core/CoreEvaluators.h | 1 + 1 file changed, 1 insertion(+) diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 85f46cb8d..ce00566a5 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -839,6 +839,7 @@ struct block_evaluator::size) * sizeof(Scalar); + EIGEN_ONLY_USED_FOR_DEBUG(AlignBytes) // FIXME this should be an internal assertion eigen_assert(EIGEN_IMPLIES(evaluator::Flags&AlignedBit, (size_t(block.data()) % AlignBytes) == 0) && "data is not aligned"); } -- cgit v1.2.3 From 3589a9c115a892ea3ca5dac74d71a1526764cb38 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Sun, 15 Mar 2015 18:12:18 -0400 Subject: actual_panel_rows computation should always be resilient to parameters not consistent with the known L1 cache size, see comment --- Eigen/src/Core/products/GeneralBlockPanelKernel.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 617439ff6..6aadfea83 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -1017,11 +1017,10 @@ void gebp_kernel(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) )); -#else - const Index actual_panel_rows = (3*LhsProgress) * ( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) ); -#endif for(Index i1=0; i1 Date: Sun, 15 Mar 2015 18:13:12 -0400 Subject: Provide a empirical lookup table for blocking sizes measured on a Nexus 5. Only for float, only for Android on ARM 32bit for now. --- Eigen/Core | 1 + .../src/Core/arch/NEON/BlockingSizesLookupTables.h | 110 +++++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h diff --git a/Eigen/Core b/Eigen/Core index 138c34916..1a3249604 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -308,6 +308,7 @@ using std::ptrdiff_t; #include "src/Core/arch/NEON/PacketMath.h" #include "src/Core/arch/NEON/MathFunctions.h" #include "src/Core/arch/NEON/Complex.h" + #include "src/Core/arch/NEON/BlockingSizesLookupTables.h" #endif #if defined EIGEN_VECTORIZE_CUDA diff --git a/Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h b/Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h new file mode 100644 index 000000000..c2366a347 --- /dev/null +++ b/Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h @@ -0,0 +1,110 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_NEON_BLOCKING_SIZES_LOOKUP_TABLES_H +#define EIGEN_NEON_BLOCKING_SIZES_LOOKUP_TABLES_H + +namespace Eigen { +namespace internal { + +/* The following lookup table was generated from measurements on a Nexus 5, + * which has a Qualcomm Krait 400 CPU. This is very representative of current + * 32bit (ARMv7) Android devices. On the other hand, I don't know how + * representative that is outside of these conditions. Accordingly, + * let's only use this lookup table on ARM 32bit on Android for now. + * + * Measurements were single-threaded, with Scalar=float, compiled with + * -mfpu=neon-vfpv4, so the pmadd instruction used was VFMA.F32. + * + * The device was cooled, allowing it to run a the max clock speed throughout. + * This may not be representative of real-world thermal conditions. + * + * The benchmark attempted to flush caches to test cold-cache performance. + */ +#if EIGEN_ARCH_ARM && EIGEN_OS_ANDROID +template<> +struct BlockingSizesLookupTable { + static const size_t BaseSize = 16; + static const size_t NumSizes = 8; + static const uint16_t* Data() { + static const uint16_t data[512] = { + 0x444, 0x445, 0x446, 0x447, 0x448, 0x449, 0x447, 0x447, + 0x454, 0x455, 0x456, 0x457, 0x458, 0x459, 0x45a, 0x457, + 0x464, 0x465, 0x466, 0x467, 0x468, 0x469, 0x46a, 0x467, + 0x474, 0x475, 0x476, 0x467, 0x478, 0x479, 0x477, 0x478, + 0x474, 0x475, 0x476, 0x477, 0x478, 0x476, 0x476, 0x476, + 0x474, 0x475, 0x476, 0x477, 0x478, 0x479, 0x496, 0x496, + 0x474, 0x475, 0x476, 0x4a6, 0x4a5, 0x4a6, 0x4a5, 0x4a6, + 0x474, 0x475, 0x466, 0x4a6, 0x4a6, 0x4a6, 0x496, 0x4a6, + 0x544, 0x545, 0x546, 0x547, 0x548, 0x549, 0x54a, 0x54b, + 0x554, 0x555, 0x556, 0x557, 0x558, 0x559, 0x55a, 0x55b, + 0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x56b, + 0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x576, + 0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x587, + 0x564, 0x565, 0x566, 0x567, 0x596, 0x596, 0x597, 0x596, + 0x564, 0x565, 0x566, 0x5a5, 0x5a6, 0x5a6, 0x596, 0x596, + 0x564, 0x565, 0x566, 0x596, 0x5a6, 0x596, 0x5a6, 0x5a6, + 0x644, 0x645, 0x646, 0x647, 0x648, 0x649, 0x64a, 0x64b, + 0x644, 0x655, 0x656, 0x657, 0x658, 0x659, 0x65a, 0x65b, + 0x664, 0x665, 0x666, 0x667, 0x668, 0x669, 0x65a, 0x667, + 0x674, 0x665, 0x666, 0x677, 0x678, 0x679, 0x67a, 0x675, + 0x684, 0x685, 0x686, 0x687, 0x678, 0x688, 0x687, 0x686, + 0x664, 0x665, 0x666, 0x657, 0x697, 0x696, 0x696, 0x697, + 0x664, 0x655, 0x686, 0x696, 0x685, 0x6a6, 0x686, 0x686, + 0x684, 0x675, 0x686, 0x685, 0x686, 0x696, 0x696, 0x696, + 0x744, 0x745, 0x746, 0x747, 0x748, 0x749, 0x74a, 0x746, + 0x754, 0x755, 0x756, 0x757, 0x758, 0x759, 0x75a, 0x757, + 0x764, 0x755, 0x756, 0x747, 0x768, 0x759, 0x75a, 0x767, + 0x744, 0x765, 0x766, 0x767, 0x768, 0x759, 0x778, 0x777, + 0x744, 0x745, 0x766, 0x767, 0x788, 0x788, 0x786, 0x788, + 0x754, 0x755, 0x766, 0x787, 0x787, 0x796, 0x787, 0x797, + 0x684, 0x695, 0x696, 0x6a5, 0x786, 0x786, 0x795, 0x796, + 0x684, 0x695, 0x686, 0x6a6, 0x786, 0x796, 0x786, 0x796, + 0x844, 0x845, 0x846, 0x847, 0x848, 0x849, 0x847, 0x848, + 0x844, 0x845, 0x846, 0x847, 0x848, 0x849, 0x857, 0x858, + 0x844, 0x865, 0x846, 0x847, 0x868, 0x849, 0x866, 0x867, + 0x844, 0x865, 0x846, 0x847, 0x878, 0x849, 0x877, 0x877, + 0x844, 0x845, 0x846, 0x867, 0x885, 0x887, 0x885, 0x887, + 0x784, 0x785, 0x786, 0x877, 0x885, 0x885, 0x896, 0x896, + 0x684, 0x695, 0x696, 0x885, 0x896, 0x885, 0x895, 0x895, + 0x694, 0x685, 0x6a6, 0x885, 0x885, 0x886, 0x896, 0x896, + 0x944, 0x945, 0x946, 0x947, 0x948, 0x847, 0x848, 0x848, + 0x944, 0x855, 0x756, 0x947, 0x858, 0x857, 0x858, 0x858, + 0x944, 0x945, 0x946, 0x867, 0x948, 0x866, 0x866, 0x867, + 0x944, 0x775, 0x976, 0x877, 0x877, 0x878, 0x877, 0x877, + 0x784, 0x785, 0x886, 0x887, 0x886, 0x986, 0x887, 0x887, + 0x784, 0x785, 0x786, 0x796, 0x885, 0x897, 0x896, 0x897, + 0x684, 0x695, 0x6a4, 0x886, 0x886, 0x896, 0x896, 0x896, + 0x6a4, 0x6a5, 0x696, 0x886, 0x886, 0x896, 0x896, 0x896, + 0x844, 0x845, 0x846, 0x847, 0x847, 0x847, 0x847, 0x847, + 0x854, 0x855, 0x856, 0x857, 0x857, 0x858, 0x857, 0x857, + 0x864, 0x865, 0x866, 0x867, 0x867, 0x866, 0x867, 0x867, + 0x774, 0x775, 0x876, 0x877, 0x877, 0x877, 0x877, 0x877, + 0x784, 0x785, 0x886, 0x887, 0x887, 0x887, 0x887, 0x887, + 0x784, 0x785, 0x786, 0x787, 0x887, 0x896, 0x897, 0x897, + 0x684, 0x6a5, 0x696, 0x886, 0x896, 0x896, 0x896, 0x896, + 0x694, 0x6a5, 0x6a5, 0x886, 0xa68, 0x896, 0x896, 0x896, + 0x844, 0x845, 0x846, 0x846, 0x847, 0x945, 0x847, 0x946, + 0x854, 0xb55, 0x856, 0x857, 0x857, 0x856, 0x857, 0x857, + 0x864, 0x865, 0x866, 0x867, 0x867, 0x867, 0x867, 0x867, + 0x864, 0x875, 0x876, 0x877, 0x877, 0x877, 0x877, 0x877, + 0x784, 0x885, 0x886, 0x886, 0x886, 0x887, 0x887, 0x887, + 0x784, 0x785, 0x786, 0x786, 0x886, 0x897, 0x897, 0x897, + 0x684, 0x695, 0x696, 0x886, 0x896, 0x896, 0x896, 0x896, + 0x684, 0x6a5, 0x696, 0xb57, 0x896, 0x896, 0x896, 0x896 + }; + return data; + } +}; +#endif + +} +} + +#endif // EIGEN_NEON_BLOCKING_SIZES_LOOKUP_TABLES_H -- cgit v1.2.3 From 151b8b95c642dea388c2f7da475e4db432bc6962 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Sun, 15 Mar 2015 19:10:51 -0400 Subject: Fix bug in case where EIGEN_TEST_SPECIFIC_BLOCKING_SIZE is defined but false --- Eigen/src/Core/products/GeneralBlockPanelKernel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 6aadfea83..bc24a17ac 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -272,8 +272,8 @@ inline bool useSpecificBlockingSizes(Index& k, Index& m, Index& n) EIGEN_UNUSED_VARIABLE(k) EIGEN_UNUSED_VARIABLE(m) EIGEN_UNUSED_VARIABLE(n) - return false; #endif + return false; } /** \brief Computes the blocking parameters for a m x k times k x n matrix product -- cgit v1.2.3 From e274607d7fcb021d0578e2fc95023eb7b5fab133 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Mon, 16 Mar 2015 10:48:27 -0400 Subject: fix compilation with GCC 4.8 --- Eigen/src/Core/products/LookupBlockingSizesTable.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/products/LookupBlockingSizesTable.h b/Eigen/src/Core/products/LookupBlockingSizesTable.h index 85aeedec8..f955253f8 100644 --- a/Eigen/src/Core/products/LookupBlockingSizesTable.h +++ b/Eigen/src/Core/products/LookupBlockingSizesTable.h @@ -68,9 +68,9 @@ struct LookupBlockingSizesFromTableImpl const size_t n_index = ceil_log2(n_clamped / minsize); const size_t index = n_index + Table::NumSizes * (m_index + Table::NumSizes * k_index); const uint16_t table_entry = Table::Data()[index]; - k = min(k, 1 << ((table_entry & 0xf00) >> 8)); - m = min(m, 1 << ((table_entry & 0x0f0) >> 4)); - n = min(n, 1 << ((table_entry & 0x00f) >> 0)); + k = min(k, 1 << ((table_entry & 0xf00) >> 8)); + m = min(m, 1 << ((table_entry & 0x0f0) >> 4)); + n = min(n, 1 << ((table_entry & 0x00f) >> 0)); return true; } }; -- cgit v1.2.3 From 35c3a8bb84778a81b2f90fdea10eadeae16863aa Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Mon, 16 Mar 2015 11:05:51 -0400 Subject: Update Nexus 5 lookup table from combining now 2 runs of the benchmark, using the analyze-blocking-sizes partition tool. Gives better worst-case performance. --- .../src/Core/arch/NEON/BlockingSizesLookupTables.h | 98 +++++++++++----------- 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h b/Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h index c2366a347..7905bdf83 100644 --- a/Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h +++ b/Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h @@ -35,69 +35,69 @@ struct BlockingSizesLookupTable { static const uint16_t* Data() { static const uint16_t data[512] = { 0x444, 0x445, 0x446, 0x447, 0x448, 0x449, 0x447, 0x447, - 0x454, 0x455, 0x456, 0x457, 0x458, 0x459, 0x45a, 0x457, + 0x454, 0x455, 0x456, 0x457, 0x458, 0x459, 0x45a, 0x456, 0x464, 0x465, 0x466, 0x467, 0x468, 0x469, 0x46a, 0x467, - 0x474, 0x475, 0x476, 0x467, 0x478, 0x479, 0x477, 0x478, - 0x474, 0x475, 0x476, 0x477, 0x478, 0x476, 0x476, 0x476, - 0x474, 0x475, 0x476, 0x477, 0x478, 0x479, 0x496, 0x496, - 0x474, 0x475, 0x476, 0x4a6, 0x4a5, 0x4a6, 0x4a5, 0x4a6, - 0x474, 0x475, 0x466, 0x4a6, 0x4a6, 0x4a6, 0x496, 0x4a6, + 0x474, 0x475, 0x476, 0x467, 0x478, 0x479, 0x476, 0x478, + 0x474, 0x475, 0x476, 0x477, 0x478, 0x479, 0x476, 0x476, + 0x474, 0x475, 0x476, 0x477, 0x478, 0x479, 0x496, 0x488, + 0x474, 0x475, 0x476, 0x4a6, 0x496, 0x496, 0x495, 0x4a6, + 0x474, 0x475, 0x466, 0x4a6, 0x497, 0x4a5, 0x496, 0x4a5, 0x544, 0x545, 0x546, 0x547, 0x548, 0x549, 0x54a, 0x54b, 0x554, 0x555, 0x556, 0x557, 0x558, 0x559, 0x55a, 0x55b, 0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x56b, 0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x576, 0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x587, - 0x564, 0x565, 0x566, 0x567, 0x596, 0x596, 0x597, 0x596, - 0x564, 0x565, 0x566, 0x5a5, 0x5a6, 0x5a6, 0x596, 0x596, - 0x564, 0x565, 0x566, 0x596, 0x5a6, 0x596, 0x5a6, 0x5a6, + 0x564, 0x565, 0x566, 0x567, 0x596, 0x596, 0x596, 0x597, + 0x574, 0x565, 0x566, 0x596, 0x596, 0x5a6, 0x5a6, 0x5a6, + 0x564, 0x565, 0x5a6, 0x596, 0x5a6, 0x5a6, 0x5a6, 0x5a6, 0x644, 0x645, 0x646, 0x647, 0x648, 0x649, 0x64a, 0x64b, 0x644, 0x655, 0x656, 0x657, 0x658, 0x659, 0x65a, 0x65b, 0x664, 0x665, 0x666, 0x667, 0x668, 0x669, 0x65a, 0x667, - 0x674, 0x665, 0x666, 0x677, 0x678, 0x679, 0x67a, 0x675, - 0x684, 0x685, 0x686, 0x687, 0x678, 0x688, 0x687, 0x686, - 0x664, 0x665, 0x666, 0x657, 0x697, 0x696, 0x696, 0x697, - 0x664, 0x655, 0x686, 0x696, 0x685, 0x6a6, 0x686, 0x686, - 0x684, 0x675, 0x686, 0x685, 0x686, 0x696, 0x696, 0x696, - 0x744, 0x745, 0x746, 0x747, 0x748, 0x749, 0x74a, 0x746, + 0x654, 0x665, 0x676, 0x677, 0x678, 0x679, 0x67a, 0x675, + 0x684, 0x675, 0x686, 0x687, 0x688, 0x688, 0x687, 0x686, + 0x664, 0x685, 0x666, 0x677, 0x697, 0x696, 0x697, 0x697, + 0x664, 0x665, 0x696, 0x696, 0x685, 0x6a6, 0x696, 0x696, + 0x664, 0x675, 0x686, 0x696, 0x6a6, 0x696, 0x696, 0x696, + 0x744, 0x745, 0x746, 0x747, 0x748, 0x749, 0x74a, 0x747, 0x754, 0x755, 0x756, 0x757, 0x758, 0x759, 0x75a, 0x757, - 0x764, 0x755, 0x756, 0x747, 0x768, 0x759, 0x75a, 0x767, - 0x744, 0x765, 0x766, 0x767, 0x768, 0x759, 0x778, 0x777, - 0x744, 0x745, 0x766, 0x767, 0x788, 0x788, 0x786, 0x788, - 0x754, 0x755, 0x766, 0x787, 0x787, 0x796, 0x787, 0x797, - 0x684, 0x695, 0x696, 0x6a5, 0x786, 0x786, 0x795, 0x796, - 0x684, 0x695, 0x686, 0x6a6, 0x786, 0x796, 0x786, 0x796, - 0x844, 0x845, 0x846, 0x847, 0x848, 0x849, 0x847, 0x848, - 0x844, 0x845, 0x846, 0x847, 0x848, 0x849, 0x857, 0x858, - 0x844, 0x865, 0x846, 0x847, 0x868, 0x849, 0x866, 0x867, - 0x844, 0x865, 0x846, 0x847, 0x878, 0x849, 0x877, 0x877, - 0x844, 0x845, 0x846, 0x867, 0x885, 0x887, 0x885, 0x887, - 0x784, 0x785, 0x786, 0x877, 0x885, 0x885, 0x896, 0x896, - 0x684, 0x695, 0x696, 0x885, 0x896, 0x885, 0x895, 0x895, - 0x694, 0x685, 0x6a6, 0x885, 0x885, 0x886, 0x896, 0x896, - 0x944, 0x945, 0x946, 0x947, 0x948, 0x847, 0x848, 0x848, - 0x944, 0x855, 0x756, 0x947, 0x858, 0x857, 0x858, 0x858, - 0x944, 0x945, 0x946, 0x867, 0x948, 0x866, 0x866, 0x867, - 0x944, 0x775, 0x976, 0x877, 0x877, 0x878, 0x877, 0x877, - 0x784, 0x785, 0x886, 0x887, 0x886, 0x986, 0x887, 0x887, - 0x784, 0x785, 0x786, 0x796, 0x885, 0x897, 0x896, 0x897, - 0x684, 0x695, 0x6a4, 0x886, 0x886, 0x896, 0x896, 0x896, - 0x6a4, 0x6a5, 0x696, 0x886, 0x886, 0x896, 0x896, 0x896, - 0x844, 0x845, 0x846, 0x847, 0x847, 0x847, 0x847, 0x847, - 0x854, 0x855, 0x856, 0x857, 0x857, 0x858, 0x857, 0x857, - 0x864, 0x865, 0x866, 0x867, 0x867, 0x866, 0x867, 0x867, - 0x774, 0x775, 0x876, 0x877, 0x877, 0x877, 0x877, 0x877, + 0x764, 0x765, 0x756, 0x767, 0x768, 0x759, 0x75a, 0x766, + 0x744, 0x755, 0x766, 0x777, 0x768, 0x759, 0x778, 0x777, + 0x744, 0x745, 0x766, 0x777, 0x788, 0x786, 0x786, 0x788, + 0x754, 0x755, 0x766, 0x787, 0x796, 0x796, 0x787, 0x796, + 0x684, 0x695, 0x696, 0x6a6, 0x795, 0x786, 0x795, 0x796, + 0x684, 0x695, 0x696, 0x795, 0x786, 0x796, 0x795, 0x796, + 0x844, 0x845, 0x846, 0x847, 0x848, 0x849, 0x848, 0x848, + 0x844, 0x855, 0x846, 0x847, 0x848, 0x849, 0x855, 0x857, + 0x844, 0x845, 0x846, 0x857, 0x848, 0x859, 0x866, 0x865, + 0x844, 0x855, 0x846, 0x847, 0x878, 0x859, 0x877, 0x877, + 0x844, 0x855, 0x846, 0x867, 0x886, 0x887, 0x885, 0x886, + 0x784, 0x785, 0x786, 0x877, 0x897, 0x885, 0x896, 0x896, + 0x684, 0x695, 0x686, 0x886, 0x885, 0x885, 0x886, 0x896, + 0x694, 0x6a5, 0x6a6, 0x885, 0x885, 0x886, 0x896, 0x896, + 0x944, 0x945, 0x946, 0x947, 0x948, 0x847, 0x847, 0x848, + 0x954, 0x855, 0x856, 0x947, 0x858, 0x857, 0x858, 0x858, + 0x944, 0x945, 0x946, 0x867, 0x948, 0x866, 0x867, 0x867, + 0x944, 0x975, 0x976, 0x877, 0x877, 0x877, 0x877, 0x877, + 0x784, 0x785, 0x886, 0x887, 0x886, 0x887, 0x887, 0x887, + 0x784, 0x785, 0x786, 0x796, 0x887, 0x897, 0x896, 0x896, + 0x684, 0x695, 0x6a6, 0x886, 0x886, 0x896, 0x896, 0x896, + 0x6a4, 0x6a5, 0x696, 0x896, 0x886, 0x896, 0x896, 0x896, + 0xa44, 0xa45, 0xa46, 0xa47, 0x847, 0x848, 0x847, 0x848, + 0xa44, 0xa45, 0x856, 0x857, 0x857, 0x857, 0x857, 0x857, + 0xa44, 0xa65, 0x866, 0x867, 0x867, 0x867, 0x867, 0x867, + 0x774, 0x875, 0x876, 0x877, 0x877, 0x877, 0x877, 0x877, 0x784, 0x785, 0x886, 0x887, 0x887, 0x887, 0x887, 0x887, 0x784, 0x785, 0x786, 0x787, 0x887, 0x896, 0x897, 0x897, - 0x684, 0x6a5, 0x696, 0x886, 0x896, 0x896, 0x896, 0x896, - 0x694, 0x6a5, 0x6a5, 0x886, 0xa68, 0x896, 0x896, 0x896, - 0x844, 0x845, 0x846, 0x846, 0x847, 0x945, 0x847, 0x946, - 0x854, 0xb55, 0x856, 0x857, 0x857, 0x856, 0x857, 0x857, - 0x864, 0x865, 0x866, 0x867, 0x867, 0x867, 0x867, 0x867, + 0x684, 0x6a5, 0x696, 0x886, 0x886, 0x896, 0x896, 0x896, + 0x684, 0x6a5, 0x6a5, 0x886, 0x886, 0x896, 0x896, 0x896, + 0xb44, 0x845, 0x846, 0x847, 0x847, 0x945, 0x846, 0x946, + 0xb54, 0x855, 0x856, 0x857, 0x857, 0x856, 0x857, 0x856, + 0x864, 0x865, 0x866, 0x867, 0x867, 0x866, 0x866, 0x867, 0x864, 0x875, 0x876, 0x877, 0x877, 0x877, 0x877, 0x877, - 0x784, 0x885, 0x886, 0x886, 0x886, 0x887, 0x887, 0x887, - 0x784, 0x785, 0x786, 0x786, 0x886, 0x897, 0x897, 0x897, + 0x784, 0x885, 0x886, 0x787, 0x887, 0x887, 0x887, 0x887, + 0x784, 0x785, 0x786, 0x796, 0x886, 0x897, 0x897, 0x897, 0x684, 0x695, 0x696, 0x886, 0x896, 0x896, 0x896, 0x896, - 0x684, 0x6a5, 0x696, 0xb57, 0x896, 0x896, 0x896, 0x896 + 0x684, 0x685, 0x696, 0xb57, 0x896, 0x896, 0x896, 0x896 }; return data; } -- cgit v1.2.3 From f218c0181d44d7dd129a77108ad6ec063cfbd6cc Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 16 Mar 2015 13:05:00 -0700 Subject: Fixes the Lvalue computation by actually setting the LvalueBit properly when instantiating tensors of const T. Added a test to check the fix. --- unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h | 8 ++--- unsupported/test/cxx11_tensor_ref.cpp | 40 +++++++++++++++++++++++ 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h index a844a4d68..66ddfd554 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h @@ -54,7 +54,7 @@ struct traits > static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor; enum { Options = Options_, - Flags = compute_tensor_flags::ret | LvalueBit, + Flags = compute_tensor_flags::ret | (is_const::value ? 0 : LvalueBit), }; }; @@ -69,7 +69,7 @@ struct traits > static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor; enum { Options = Options_, - Flags = compute_tensor_flags::ret | LvalueBit, + Flags = compute_tensor_flags::ret | (is_const::value ? 0: LvalueBit), }; }; @@ -86,7 +86,7 @@ struct traits > static const int Layout = BaseTraits::Layout; enum { Options = Options_, - Flags = ((BaseTraits::Flags | LvalueBit) & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0), + Flags = (BaseTraits::Flags & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0), }; }; @@ -102,7 +102,7 @@ struct traits > static const int Layout = BaseTraits::Layout; enum { Options = BaseTraits::Options, - Flags = ((BaseTraits::Flags | LvalueBit) & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0), + Flags = (BaseTraits::Flags & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0), }; }; diff --git a/unsupported/test/cxx11_tensor_ref.cpp b/unsupported/test/cxx11_tensor_ref.cpp index aa369f278..c7b5ecddb 100644 --- a/unsupported/test/cxx11_tensor_ref.cpp +++ b/unsupported/test/cxx11_tensor_ref.cpp @@ -196,6 +196,45 @@ static void test_coeff_ref() } +static void test_nested_ops_with_ref() +{ + Tensor t(2, 3, 5, 7); + t.setRandom(); + TensorMap > m(t.data(), 2, 3, 5, 7); + array, 4> paddings; + paddings[0] = make_pair(0, 0); + paddings[1] = make_pair(2, 1); + paddings[2] = make_pair(3, 4); + paddings[3] = make_pair(0, 0); + Eigen::DSizes shuffle_dims{0, 1, 2, 3}; + TensorRef > ref(m.pad(paddings)); + array, 4> trivial; + trivial[0] = make_pair(0, 0); + trivial[1] = make_pair(0, 0); + trivial[2] = make_pair(0, 0); + trivial[3] = make_pair(0, 0); + Tensor padded = ref.shuffle(shuffle_dims).pad(trivial); + VERIFY_IS_EQUAL(padded.dimension(0), 2+0); + VERIFY_IS_EQUAL(padded.dimension(1), 3+3); + VERIFY_IS_EQUAL(padded.dimension(2), 5+7); + VERIFY_IS_EQUAL(padded.dimension(3), 7+0); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 6; ++j) { + for (int k = 0; k < 12; ++k) { + for (int l = 0; l < 7; ++l) { + if (j >= 2 && j < 5 && k >= 3 && k < 8) { + VERIFY_IS_EQUAL(padded(i,j,k,l), t(i,j-2,k-3,l)); + } else { + VERIFY_IS_EQUAL(padded(i,j,k,l), 0.0f); + } + } + } + } + } +} + + void test_cxx11_tensor_ref() { CALL_SUBTEST(test_simple_lvalue_ref()); @@ -205,4 +244,5 @@ void test_cxx11_tensor_ref() CALL_SUBTEST(test_ref_of_ref()); CALL_SUBTEST(test_ref_in_expr()); CALL_SUBTEST(test_coeff_ref()); + CALL_SUBTEST(test_nested_ops_with_ref()); } -- cgit v1.2.3 From eb6929cb191c1b91dda784697faedcfdb245345a Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Mon, 16 Mar 2015 16:15:47 -0400 Subject: fix bug in maxsize calculation, which would cause products of size > 2048 to address the lookup table out of bounds --- Eigen/src/Core/products/LookupBlockingSizesTable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/products/LookupBlockingSizesTable.h b/Eigen/src/Core/products/LookupBlockingSizesTable.h index f955253f8..3c8aba6f8 100644 --- a/Eigen/src/Core/products/LookupBlockingSizesTable.h +++ b/Eigen/src/Core/products/LookupBlockingSizesTable.h @@ -59,7 +59,7 @@ struct LookupBlockingSizesFromTableImpl using std::max; typedef BlockingSizesLookupTable Table; const uint16_t minsize = Table::BaseSize; - const uint16_t maxsize = minsize << (Table::NumSizes + 1); + const uint16_t maxsize = minsize << (Table::NumSizes - 1); const uint16_t k_clamped = max(minsize, min(k, maxsize)); const uint16_t m_clamped = max(minsize, min(m, maxsize)); const uint16_t n_clamped = max(minsize, min(n, maxsize)); -- cgit v1.2.3 From 0fd6d52724555d70f663c7ec56db6419e95db6cc Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 16 Mar 2015 13:16:12 -0700 Subject: Fixed compilation error with clang --- unsupported/test/cxx11_tensor_ref.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/unsupported/test/cxx11_tensor_ref.cpp b/unsupported/test/cxx11_tensor_ref.cpp index c7b5ecddb..59530fe43 100644 --- a/unsupported/test/cxx11_tensor_ref.cpp +++ b/unsupported/test/cxx11_tensor_ref.cpp @@ -201,18 +201,18 @@ static void test_nested_ops_with_ref() Tensor t(2, 3, 5, 7); t.setRandom(); TensorMap > m(t.data(), 2, 3, 5, 7); - array, 4> paddings; - paddings[0] = make_pair(0, 0); - paddings[1] = make_pair(2, 1); - paddings[2] = make_pair(3, 4); - paddings[3] = make_pair(0, 0); - Eigen::DSizes shuffle_dims{0, 1, 2, 3}; + array, 4> paddings; + paddings[0] = std::make_pair(0, 0); + paddings[1] = std::make_pair(2, 1); + paddings[2] = std::make_pair(3, 4); + paddings[3] = std::make_pair(0, 0); + DSizes shuffle_dims{0, 1, 2, 3}; TensorRef > ref(m.pad(paddings)); - array, 4> trivial; - trivial[0] = make_pair(0, 0); - trivial[1] = make_pair(0, 0); - trivial[2] = make_pair(0, 0); - trivial[3] = make_pair(0, 0); + array, 4> trivial; + trivial[0] = std::make_pair(0, 0); + trivial[1] = std::make_pair(0, 0); + trivial[2] = std::make_pair(0, 0); + trivial[3] = std::make_pair(0, 0); Tensor padded = ref.shuffle(shuffle_dims).pad(trivial); VERIFY_IS_EQUAL(padded.dimension(0), 2+0); VERIFY_IS_EQUAL(padded.dimension(1), 3+3); -- cgit v1.2.3 From 5144f66728fb156402799682cfd926faf0e98fb4 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 16 Mar 2015 13:17:52 -0700 Subject: Fixed compilation warning --- unsupported/test/cxx11_tensor_ref.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/test/cxx11_tensor_ref.cpp b/unsupported/test/cxx11_tensor_ref.cpp index 59530fe43..c8f105e3d 100644 --- a/unsupported/test/cxx11_tensor_ref.cpp +++ b/unsupported/test/cxx11_tensor_ref.cpp @@ -206,7 +206,7 @@ static void test_nested_ops_with_ref() paddings[1] = std::make_pair(2, 1); paddings[2] = std::make_pair(3, 4); paddings[3] = std::make_pair(0, 0); - DSizes shuffle_dims{0, 1, 2, 3}; + DSizes shuffle_dims(0, 1, 2, 3); TensorRef > ref(m.pad(paddings)); array, 4> trivial; trivial[0] = std::make_pair(0, 0); -- cgit v1.2.3 From 577056aa9400459f1a1bad9e423857bc1763f18f Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Mon, 16 Mar 2015 16:21:50 -0400 Subject: Include stdint.h. Not going for cstdint because it is a C++11 addition. Needed for uint16_t at least, in lookup-table code. --- Eigen/Core | 1 + 1 file changed, 1 insertion(+) diff --git a/Eigen/Core b/Eigen/Core index 1a3249604..80842a1de 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -61,6 +61,7 @@ #pragma GCC optimize ("-fno-ipa-cp-clone") #endif +#include #include // this include file manages BLAS and MKL related macros -- cgit v1.2.3 From 364cfd529d853545e6b0a7404fe303cda2de2366 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Mon, 16 Mar 2015 16:28:44 -0400 Subject: Similar to cset 3589a9c115a892ea3ca5dac74d71a1526764cb38 , also in 2px4 kernel: actual_panel_rows computation should always be resilient to parameters not consistent with the known L1 cache size, see comment --- Eigen/src/Core/products/GeneralBlockPanelKernel.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index bc24a17ac..d32377a00 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -1253,11 +1253,11 @@ void gebp_kernel=2*Traits::LhsProgress) { const Index l1 = defaultL1CacheSize; // in Bytes, TODO, l1 should be passed to this function. -#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES + // The max(1, ...) here is needed because we may be using blocking params larger than what our known l1 cache size + // suggests we should be using: either because our known l1 cache size is inaccurate (e.g. on Android, we can only guess), + // or because we are testing specific blocking sizes. Index actual_panel_rows = (2*LhsProgress) * std::max(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 2*LhsProgress) )); -#else - Index actual_panel_rows = (2*LhsProgress) * ( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 2*LhsProgress) ); -#endif + for(Index i1=peeled_mc3; i1 Date: Tue, 17 Mar 2015 10:31:45 -0400 Subject: use unsigned short instead of uint16_t which doesn't exist in c++98 --- Eigen/Core | 1 - Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h | 4 ++-- Eigen/src/Core/products/LookupBlockingSizesTable.h | 20 ++++++++++---------- bench/analyze-blocking-sizes.cpp | 4 ++-- 4 files changed, 14 insertions(+), 15 deletions(-) diff --git a/Eigen/Core b/Eigen/Core index 80842a1de..1a3249604 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -61,7 +61,6 @@ #pragma GCC optimize ("-fno-ipa-cp-clone") #endif -#include #include // this include file manages BLAS and MKL related macros diff --git a/Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h b/Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h index 7905bdf83..5007c155d 100644 --- a/Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h +++ b/Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h @@ -32,8 +32,8 @@ template<> struct BlockingSizesLookupTable { static const size_t BaseSize = 16; static const size_t NumSizes = 8; - static const uint16_t* Data() { - static const uint16_t data[512] = { + static const unsigned short* Data() { + static const unsigned short data[512] = { 0x444, 0x445, 0x446, 0x447, 0x448, 0x449, 0x447, 0x447, 0x454, 0x455, 0x456, 0x457, 0x458, 0x459, 0x45a, 0x456, 0x464, 0x465, 0x466, 0x467, 0x468, 0x469, 0x46a, 0x467, diff --git a/Eigen/src/Core/products/LookupBlockingSizesTable.h b/Eigen/src/Core/products/LookupBlockingSizesTable.h index 3c8aba6f8..5ab4525df 100644 --- a/Eigen/src/Core/products/LookupBlockingSizesTable.h +++ b/Eigen/src/Core/products/LookupBlockingSizesTable.h @@ -25,9 +25,9 @@ struct LookupBlockingSizesFromTableImpl } }; -inline uint8_t floor_log2_helper(uint16_t& x, size_t offset) +inline size_t floor_log2_helper(unsigned short& x, size_t offset) { - uint16_t y = x >> offset; + unsigned short y = x >> offset; if (y) { x = y; return offset; @@ -36,7 +36,7 @@ inline uint8_t floor_log2_helper(uint16_t& x, size_t offset) } } -inline uint8_t floor_log2(uint16_t x) +inline size_t floor_log2(unsigned short x) { return floor_log2_helper(x, 8) + floor_log2_helper(x, 4) @@ -44,7 +44,7 @@ inline uint8_t floor_log2(uint16_t x) + floor_log2_helper(x, 1); } -inline uint8_t ceil_log2(uint16_t x) +inline size_t ceil_log2(unsigned short x) { return x > 1 ? floor_log2(x - 1) + 1 : 0; } @@ -58,16 +58,16 @@ struct LookupBlockingSizesFromTableImpl using std::min; using std::max; typedef BlockingSizesLookupTable Table; - const uint16_t minsize = Table::BaseSize; - const uint16_t maxsize = minsize << (Table::NumSizes - 1); - const uint16_t k_clamped = max(minsize, min(k, maxsize)); - const uint16_t m_clamped = max(minsize, min(m, maxsize)); - const uint16_t n_clamped = max(minsize, min(n, maxsize)); + const unsigned short minsize = Table::BaseSize; + const unsigned short maxsize = minsize << (Table::NumSizes - 1); + const unsigned short k_clamped = max(minsize, min(k, maxsize)); + const unsigned short m_clamped = max(minsize, min(m, maxsize)); + const unsigned short n_clamped = max(minsize, min(n, maxsize)); const size_t k_index = ceil_log2(k_clamped / minsize); const size_t m_index = ceil_log2(m_clamped / minsize); const size_t n_index = ceil_log2(n_clamped / minsize); const size_t index = n_index + Table::NumSizes * (m_index + Table::NumSizes * k_index); - const uint16_t table_entry = Table::Data()[index]; + const unsigned short table_entry = Table::Data()[index]; k = min(k, 1 << ((table_entry & 0xf00) >> 8)); m = min(m, 1 << ((table_entry & 0x0f0) >> 4)); n = min(n, 1 << ((table_entry & 0x00f) >> 0)); diff --git a/bench/analyze-blocking-sizes.cpp b/bench/analyze-blocking-sizes.cpp index 3d57f4bb3..d563a1d2d 100644 --- a/bench/analyze-blocking-sizes.cpp +++ b/bench/analyze-blocking-sizes.cpp @@ -355,8 +355,8 @@ void dump_table_for_subset( const size_t NumSizes = log2_pot(max_product_size.k / min_product_size.k) + 1; const size_t TableSize = NumSizes * NumSizes * NumSizes; cout << " static const size_t NumSizes = " << NumSizes << ";" << endl; - cout << " static const uint16_t* Data() {" << endl; - cout << " static const uint16_t data[" << TableSize << "] = {"; + cout << " static const unsigned short* Data() {" << endl; + cout << " static const unsigned short data[" << TableSize << "] = {"; while (entry_index < num_entries) { ++entry_index; if (entry_index == num_entries || -- cgit v1.2.3 From cc0f89eb3b07c65efb2b73890e4e7ac83525700a Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 17 Mar 2015 09:57:20 -0700 Subject: Changed the way lvalue operations are declared in TensorBase: this fixes constness isses that prevented some expressions mixing lvalues and rvalues from compiling. --- unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 73 +++++++++++++++++++++---- unsupported/test/cxx11_tensor_const.cpp | 27 ++++++++- 2 files changed, 88 insertions(+), 12 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 201b0fc9e..86e72c3a4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -520,48 +520,101 @@ class TensorBase : public TensorBase + const TensorLayoutSwapOp swap_layout() const { + return TensorLayoutSwapOp(derived()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorLayoutSwapOp + swap_layout() { return TensorLayoutSwapOp(derived()); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorConcatenationOp + const TensorConcatenationOp concatenate(const OtherDerived& other, const Axis& axis) const { - return TensorConcatenationOp(derived(), other.derived(), axis); + return TensorConcatenationOp(derived(), other, axis); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorConcatenationOp + concatenate(const OtherDerived& other, const Axis& axis) { + return TensorConcatenationOp(derived(), other, axis); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorReshapingOp + const TensorReshapingOp reshape(const NewDimensions& newDimensions) const { + return TensorReshapingOp(derived(), newDimensions); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorReshapingOp + reshape(const NewDimensions& newDimensions) { return TensorReshapingOp(derived(), newDimensions); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorSlicingOp + const TensorSlicingOp slice(const StartIndices& startIndices, const Sizes& sizes) const { + return TensorSlicingOp(derived(), startIndices, sizes); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorSlicingOp + slice(const StartIndices& startIndices, const Sizes& sizes) { return TensorSlicingOp(derived(), startIndices, sizes); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorChippingOp + const TensorChippingOp chip(const Index offset) const { + return TensorChippingOp(derived(), offset, DimId); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorChippingOp + chip(const Index offset) { return TensorChippingOp(derived(), offset, DimId); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorChippingOp + const TensorChippingOp chip(const Index offset, const Index dim) const { + return TensorChippingOp(derived(), offset, dim); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorChippingOp + chip(const Index offset, const Index dim) { return TensorChippingOp(derived(), offset, dim); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorReverseOp + const TensorReverseOp reverse(const ReverseDimensions& rev) const { + return TensorReverseOp(derived(), rev); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorReverseOp + reverse(const ReverseDimensions& rev) { return TensorReverseOp(derived(), rev); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorShufflingOp + const TensorShufflingOp shuffle(const Shuffle& shuffle) const { + return TensorShufflingOp(derived(), shuffle); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorShufflingOp + shuffle(const Shuffle& shuffle) { return TensorShufflingOp(derived(), shuffle); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorStridingOp + const TensorStridingOp stride(const Strides& strides) const { + return TensorStridingOp(derived(), strides); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorStridingOp + stride(const Strides& strides) { return TensorStridingOp(derived(), strides); } diff --git a/unsupported/test/cxx11_tensor_const.cpp b/unsupported/test/cxx11_tensor_const.cpp index 0ffb02afd..ad9c9da39 100644 --- a/unsupported/test/cxx11_tensor_const.cpp +++ b/unsupported/test/cxx11_tensor_const.cpp @@ -13,8 +13,6 @@ using Eigen::Tensor; - - static void test_simple_assign() { Tensor random(2,3,7); @@ -33,7 +31,32 @@ static void test_simple_assign() } } + +static void test_assign_of_const_tensor() +{ + Tensor random(2,3,7); + random.setRandom(); + + TensorMap > constant1(random.data(), 2, 3, 7); + TensorMap > constant2(random.data(), 2, 3, 7); + const TensorMap > constant3(random.data(), 2, 3, 7); + + Tensor result1 = constant1.chip(0, 2); + Tensor result2 = constant2.chip(0, 2); + Tensor result3 = constant3.chip(0, 2); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + VERIFY_IS_EQUAL((result1(i,j)), random(i,j,0)); + VERIFY_IS_EQUAL((result2(i,j)), random(i,j,0)); + VERIFY_IS_EQUAL((result3(i,j)), random(i,j,0)); + } + } +} + + void test_cxx11_tensor_const() { CALL_SUBTEST(test_simple_assign()); + CALL_SUBTEST(test_assign_of_const_tensor()); } -- cgit v1.2.3 From 2ab4922431cfdde2b88100cea3759807add23157 Mon Sep 17 00:00:00 2001 From: Deanna Hood Date: Wed, 18 Mar 2015 07:24:13 +1000 Subject: Make html directory before generating output image there --- doc/special_examples/CMakeLists.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/special_examples/CMakeLists.txt b/doc/special_examples/CMakeLists.txt index aab80a55d..101fbc5f9 100644 --- a/doc/special_examples/CMakeLists.txt +++ b/doc/special_examples/CMakeLists.txt @@ -10,9 +10,10 @@ if(QT4_FOUND) target_link_libraries(Tutorial_sparse_example ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO} ${QT_QTCORE_LIBRARY} ${QT_QTGUI_LIBRARY}) add_custom_command( - TARGET Tutorial_sparse_example - POST_BUILD - COMMAND Tutorial_sparse_example ARGS ${CMAKE_CURRENT_BINARY_DIR}/../html/Tutorial_sparse_example.jpeg + TARGET Tutorial_sparse_example + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/../html/ + COMMAND Tutorial_sparse_example ARGS ${CMAKE_CURRENT_BINARY_DIR}/../html/Tutorial_sparse_example.jpeg ) add_dependencies(all_examples Tutorial_sparse_example) -- cgit v1.2.3 From f329d0908af35fd17bdc4dfeb87046dcaa6e6937 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 19 Mar 2015 15:10:36 +0100 Subject: Improve random number generation for integer and add unit test --- Eigen/src/Core/MathFunctions.h | 34 +++++++--------- test/CMakeLists.txt | 1 + test/rand.cpp | 88 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+), 19 deletions(-) create mode 100644 test/rand.cpp diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 0fde5c71e..e1b233d82 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -522,28 +522,24 @@ struct meta_floor_log2 template struct random_default_impl { - typedef typename NumTraits::NonInteger NonInteger; - static inline Scalar run(const Scalar& x, const Scalar& y) { using std::max; - Scalar range = (max)(Scalar(0),Scalar(y-x)); - Scalar offset = 0; - if(range<=RAND_MAX) - { - // rejection sampling - int divisor = RAND_MAX/(range+1); - - do { - offset = Scalar(std::rand() / divisor); - } while (offset > range); - } - else - { - offset = std::rand() * range; - } - - return x + offset; + using std::min; + typedef typename conditional::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX; + if(y range); + + return Scalar(ScalarX(x) + offset); } static inline Scalar run() diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 1712b8718..734a0eb9b 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -139,6 +139,7 @@ endif(TEST_LIB) set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Official") add_custom_target(BuildOfficial) +ei_add_test(rand) ei_add_test(meta) ei_add_test(sizeof) ei_add_test(dynalloc) diff --git a/test/rand.cpp b/test/rand.cpp new file mode 100644 index 000000000..4e090cbad --- /dev/null +++ b/test/rand.cpp @@ -0,0 +1,88 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template Scalar check_in_range(Scalar x, Scalar y) +{ + Scalar r = internal::random(x,y); + VERIFY(r>=x); + if(y>=x) + { + VERIFY(r<=y); + } + return r; +} + +template void check_all_in_range(Scalar x, Scalar y) +{ + Array mask(y-x+1); + mask.fill(0); + long n = (y-x+1)*32; + for(long k=0; k0).all() ); +} + +void test_rand() +{ + for(int i = 0; i < g_repeat*10; i++) { + CALL_SUBTEST(check_in_range(10,11)); + CALL_SUBTEST(check_in_range(1.24234523,1.24234523)); + CALL_SUBTEST(check_in_range(-1,1)); + CALL_SUBTEST(check_in_range(-1432.2352,-1432.2352)); + + CALL_SUBTEST(check_in_range(10,11)); + CALL_SUBTEST(check_in_range(1.24234523,1.24234523)); + CALL_SUBTEST(check_in_range(-1,1)); + CALL_SUBTEST(check_in_range(-1432.2352,-1432.2352)); + + + CALL_SUBTEST(check_in_range(0,-1)); + CALL_SUBTEST(check_in_range(0,-1)); + CALL_SUBTEST(check_in_range(0,-1)); + CALL_SUBTEST(check_in_range(-673456,673456)); + CALL_SUBTEST(check_in_range(-24345,24345)); + CALL_SUBTEST(check_in_range(-6734565664234,6734565664234)); + } + + char char_offset = (std::min)(g_repeat,64); + CALL_SUBTEST(check_all_in_range(11,11)); + CALL_SUBTEST(check_all_in_range(11,11+char_offset)); + CALL_SUBTEST(check_all_in_range(-5,5)); + CALL_SUBTEST(check_all_in_range(-11-char_offset,-11)); + CALL_SUBTEST(check_all_in_range(-126,-126+char_offset)); + CALL_SUBTEST(check_all_in_range(126-char_offset,126)); + CALL_SUBTEST(check_all_in_range(-126,126)); + + char short_offset = (std::min)(g_repeat,16000); + CALL_SUBTEST(check_all_in_range(11,11)); + CALL_SUBTEST(check_all_in_range(11,11+short_offset)); + CALL_SUBTEST(check_all_in_range(-5,5)); + CALL_SUBTEST(check_all_in_range(-11-short_offset,-11)); + CALL_SUBTEST(check_all_in_range(-24345,-24345+short_offset)); + CALL_SUBTEST(check_all_in_range(24345,24345+short_offset)); + + + CALL_SUBTEST(check_all_in_range(11,11)); + CALL_SUBTEST(check_all_in_range(11,11+g_repeat)); + CALL_SUBTEST(check_all_in_range(-5,5)); + CALL_SUBTEST(check_all_in_range(-11-g_repeat,-11)); + CALL_SUBTEST(check_all_in_range(-673456,-673456+g_repeat)); + CALL_SUBTEST(check_all_in_range(673456,673456+g_repeat)); + + CALL_SUBTEST(check_all_in_range(11,11)); + CALL_SUBTEST(check_all_in_range(11,11+g_repeat)); + CALL_SUBTEST(check_all_in_range(-5,5)); + CALL_SUBTEST(check_all_in_range(-11-g_repeat,-11)); + CALL_SUBTEST(check_all_in_range(-6734565664234,-6734565664234+g_repeat)); + CALL_SUBTEST(check_all_in_range(6734565664234,6734565664234+g_repeat)); +} -- cgit v1.2.3 From d7698c18b7801f041c36adffcdfaefc99140887f Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 19 Mar 2015 15:11:05 +0100 Subject: Split sparse_basic unit test --- test/CMakeLists.txt | 1 + test/sparse_basic.cpp | 204 +--------------------------------------- test/sparse_block.cpp | 254 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 259 insertions(+), 200 deletions(-) create mode 100644 test/sparse_block.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 734a0eb9b..393c35b57 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -227,6 +227,7 @@ ei_add_test(stdvector_overload) ei_add_test(stdlist) ei_add_test(stddeque) ei_add_test(sparse_basic) +ei_add_test(sparse_block) ei_add_test(sparse_vector) ei_add_test(sparse_product) ei_add_test(sparse_ref) diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp index d929e1463..75f29a2b4 100644 --- a/test/sparse_basic.cpp +++ b/test/sparse_basic.cpp @@ -30,7 +30,6 @@ template void sparse_basic(const SparseMatrixType& re double density = (std::max)(8./(rows*cols), 0.01); typedef Matrix DenseMatrix; typedef Matrix DenseVector; - typedef Matrix RowDenseVector; Scalar eps = 1e-6; Scalar s1 = internal::random(); @@ -59,77 +58,6 @@ template void sparse_basic(const SparseMatrixType& re VERIFY_IS_APPROX(m, refMat); - // test InnerIterators and Block expressions - for (int t=0; t<10; ++t) - { - Index j = internal::random(0,cols-2); - Index i = internal::random(0,rows-2); - Index w = internal::random(1,cols-j); - Index h = internal::random(1,rows-i); - - VERIFY_IS_APPROX(m.block(i,j,h,w), refMat.block(i,j,h,w)); - for(Index c=0; c void sparse_basic(const SparseMatrixType& re VERIFY_IS_APPROX(m2,m1); } - // test innerVector() - { - DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols); - SparseMatrixType m2(rows, cols); - initSparse(density, refMat2, m2); - Index j0 = internal::random(0,outer-1); - Index j1 = internal::random(0,outer-1); - if(SparseMatrixType::IsRowMajor) - VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.row(j0)); - else - VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.col(j0)); - - if(SparseMatrixType::IsRowMajor) - VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.row(j0)+refMat2.row(j1)); - else - VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.col(j0)+refMat2.col(j1)); - - SparseMatrixType m3(rows,cols); - m3.reserve(VectorXi::Constant(outer,int(inner/2))); - for(Index j=0; j0) - VERIFY(j==numext::real(m3.innerVector(j).lastCoeff())); - } - m3.makeCompressed(); - for(Index j=0; j<(std::min)(outer, inner); ++j) - { - VERIFY(j==numext::real(m3.innerVector(j).nonZeros())); - if(j>0) - VERIFY(j==numext::real(m3.innerVector(j).lastCoeff())); - } - - VERIFY(m3.innerVector(j0).nonZeros() == m3.transpose().innerVector(j0).nonZeros()); - -// m2.innerVector(j0) = 2*m2.innerVector(j1); -// refMat2.col(j0) = 2*refMat2.col(j1); -// VERIFY_IS_APPROX(m2, refMat2); - } - - // test innerVectors() - { - DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols); - SparseMatrixType m2(rows, cols); - initSparse(density, refMat2, m2); - if(internal::random(0,1)>0.5) m2.makeCompressed(); - Index j0 = internal::random(0,outer-2); - Index j1 = internal::random(0,outer-2); - Index n0 = internal::random(1,outer-(std::max)(j0,j1)); - if(SparseMatrixType::IsRowMajor) - VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(j0,0,n0,cols)); - else - VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(0,j0,rows,n0)); - if(SparseMatrixType::IsRowMajor) - VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0), - refMat2.middleRows(j0,n0)+refMat2.middleRows(j1,n0)); - else - VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0), - refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0)); - - VERIFY_IS_APPROX(m2, refMat2); - - VERIFY(m2.innerVectors(j0,n0).nonZeros() == m2.transpose().innerVectors(j0,n0).nonZeros()); - - m2.innerVectors(j0,n0) = m2.innerVectors(j0,n0) + m2.innerVectors(j1,n0); - if(SparseMatrixType::IsRowMajor) - refMat2.middleRows(j0,n0) = (refMat2.middleRows(j0,n0) + refMat2.middleRows(j1,n0)).eval(); - else - refMat2.middleCols(j0,n0) = (refMat2.middleCols(j0,n0) + refMat2.middleCols(j1,n0)).eval(); - - VERIFY_IS_APPROX(m2, refMat2); - } - // test basic computations { DenseMatrix refM1 = DenseMatrix::Zero(rows, cols); @@ -360,54 +212,6 @@ template void sparse_basic(const SparseMatrixType& re VERIFY(m2.isApprox(m3)); } - - - // test generic blocks - { - DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols); - SparseMatrixType m2(rows, cols); - initSparse(density, refMat2, m2); - Index j0 = internal::random(0,outer-2); - Index j1 = internal::random(0,outer-2); - Index n0 = internal::random(1,outer-(std::max)(j0,j1)); - if(SparseMatrixType::IsRowMajor) - VERIFY_IS_APPROX(m2.block(j0,0,n0,cols), refMat2.block(j0,0,n0,cols)); - else - VERIFY_IS_APPROX(m2.block(0,j0,rows,n0), refMat2.block(0,j0,rows,n0)); - - if(SparseMatrixType::IsRowMajor) - VERIFY_IS_APPROX(m2.block(j0,0,n0,cols)+m2.block(j1,0,n0,cols), - refMat2.block(j0,0,n0,cols)+refMat2.block(j1,0,n0,cols)); - else - VERIFY_IS_APPROX(m2.block(0,j0,rows,n0)+m2.block(0,j1,rows,n0), - refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0)); - - Index i = internal::random(0,m2.outerSize()-1); - if(SparseMatrixType::IsRowMajor) { - m2.innerVector(i) = m2.innerVector(i) * s1; - refMat2.row(i) = refMat2.row(i) * s1; - VERIFY_IS_APPROX(m2,refMat2); - } else { - m2.innerVector(i) = m2.innerVector(i) * s1; - refMat2.col(i) = refMat2.col(i) * s1; - VERIFY_IS_APPROX(m2,refMat2); - } - - Index r0 = internal::random(0,rows-2); - Index c0 = internal::random(0,cols-2); - Index r1 = internal::random(1,rows-r0); - Index c1 = internal::random(1,cols-c0); - - VERIFY_IS_APPROX(DenseVector(m2.col(c0)), refMat2.col(c0)); - VERIFY_IS_APPROX(m2.col(c0), refMat2.col(c0)); - - VERIFY_IS_APPROX(RowDenseVector(m2.row(r0)), refMat2.row(r0)); - VERIFY_IS_APPROX(m2.row(r0), refMat2.row(r0)); - - VERIFY_IS_APPROX(m2.block(r0,c0,r1,c1), refMat2.block(r0,c0,r1,c1)); - VERIFY_IS_APPROX((2*m2).block(r0,c0,r1,c1), (2*refMat2).block(r0,c0,r1,c1)); - } - // test prune { SparseMatrixType m2(rows, cols); @@ -646,8 +450,8 @@ void test_sparse_basic() CALL_SUBTEST_2(( sparse_basic(SparseMatrix, ColMajor>(r, c)) )); CALL_SUBTEST_2(( sparse_basic(SparseMatrix, RowMajor>(r, c)) )); CALL_SUBTEST_1(( sparse_basic(SparseMatrix(r, c)) )); - CALL_SUBTEST_1(( sparse_basic(SparseMatrix(r, c)) )); - CALL_SUBTEST_1(( sparse_basic(SparseMatrix(r, c)) )); + CALL_SUBTEST_5(( sparse_basic(SparseMatrix(r, c)) )); + CALL_SUBTEST_5(( sparse_basic(SparseMatrix(r, c)) )); r = Eigen::internal::random(1,100); c = Eigen::internal::random(1,100); @@ -655,8 +459,8 @@ void test_sparse_basic() r = c; // check square matrices in 25% of tries } - CALL_SUBTEST_1(( sparse_basic(SparseMatrix(short(r), short(c))) )); - CALL_SUBTEST_1(( sparse_basic(SparseMatrix(short(r), short(c))) )); + CALL_SUBTEST_6(( sparse_basic(SparseMatrix(short(r), short(c))) )); + CALL_SUBTEST_6(( sparse_basic(SparseMatrix(short(r), short(c))) )); } // Regression test for bug 900: (manually insert higher values here, if you have enough RAM): diff --git a/test/sparse_block.cpp b/test/sparse_block.cpp new file mode 100644 index 000000000..8a6e0687c --- /dev/null +++ b/test/sparse_block.cpp @@ -0,0 +1,254 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "sparse.h" + +template void sparse_block(const SparseMatrixType& ref) +{ + const Index rows = ref.rows(); + const Index cols = ref.cols(); + const Index inner = ref.innerSize(); + const Index outer = ref.outerSize(); + + typedef typename SparseMatrixType::Scalar Scalar; + + double density = (std::max)(8./(rows*cols), 0.01); + typedef Matrix DenseMatrix; + typedef Matrix DenseVector; + typedef Matrix RowDenseVector; + + Scalar s1 = internal::random(); + { + SparseMatrixType m(rows, cols); + DenseMatrix refMat = DenseMatrix::Zero(rows, cols); + initSparse(density, refMat, m); + + VERIFY_IS_APPROX(m, refMat); + + // test InnerIterators and Block expressions + for (int t=0; t<10; ++t) + { + Index j = internal::random(0,cols-2); + Index i = internal::random(0,rows-2); + Index w = internal::random(1,cols-j); + Index h = internal::random(1,rows-i); + + VERIFY_IS_APPROX(m.block(i,j,h,w), refMat.block(i,j,h,w)); + for(Index c=0; c(density, refMat2, m2); + Index j0 = internal::random(0,outer-1); + Index j1 = internal::random(0,outer-1); + if(SparseMatrixType::IsRowMajor) + VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.row(j0)); + else + VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.col(j0)); + + if(SparseMatrixType::IsRowMajor) + VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.row(j0)+refMat2.row(j1)); + else + VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.col(j0)+refMat2.col(j1)); + + SparseMatrixType m3(rows,cols); + m3.reserve(VectorXi::Constant(outer,int(inner/2))); + for(Index j=0; j0) + VERIFY(j==numext::real(m3.innerVector(j).lastCoeff())); + } + m3.makeCompressed(); + for(Index j=0; j<(std::min)(outer, inner); ++j) + { + VERIFY(j==numext::real(m3.innerVector(j).nonZeros())); + if(j>0) + VERIFY(j==numext::real(m3.innerVector(j).lastCoeff())); + } + + VERIFY(m3.innerVector(j0).nonZeros() == m3.transpose().innerVector(j0).nonZeros()); + +// m2.innerVector(j0) = 2*m2.innerVector(j1); +// refMat2.col(j0) = 2*refMat2.col(j1); +// VERIFY_IS_APPROX(m2, refMat2); + } + + // test innerVectors() + { + DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols); + SparseMatrixType m2(rows, cols); + initSparse(density, refMat2, m2); + if(internal::random(0,1)>0.5) m2.makeCompressed(); + Index j0 = internal::random(0,outer-2); + Index j1 = internal::random(0,outer-2); + Index n0 = internal::random(1,outer-(std::max)(j0,j1)); + if(SparseMatrixType::IsRowMajor) + VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(j0,0,n0,cols)); + else + VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(0,j0,rows,n0)); + if(SparseMatrixType::IsRowMajor) + VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0), + refMat2.middleRows(j0,n0)+refMat2.middleRows(j1,n0)); + else + VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0), + refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0)); + + VERIFY_IS_APPROX(m2, refMat2); + + VERIFY(m2.innerVectors(j0,n0).nonZeros() == m2.transpose().innerVectors(j0,n0).nonZeros()); + + m2.innerVectors(j0,n0) = m2.innerVectors(j0,n0) + m2.innerVectors(j1,n0); + if(SparseMatrixType::IsRowMajor) + refMat2.middleRows(j0,n0) = (refMat2.middleRows(j0,n0) + refMat2.middleRows(j1,n0)).eval(); + else + refMat2.middleCols(j0,n0) = (refMat2.middleCols(j0,n0) + refMat2.middleCols(j1,n0)).eval(); + + VERIFY_IS_APPROX(m2, refMat2); + } + + // test generic blocks + { + DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols); + SparseMatrixType m2(rows, cols); + initSparse(density, refMat2, m2); + Index j0 = internal::random(0,outer-2); + Index j1 = internal::random(0,outer-2); + Index n0 = internal::random(1,outer-(std::max)(j0,j1)); + if(SparseMatrixType::IsRowMajor) + VERIFY_IS_APPROX(m2.block(j0,0,n0,cols), refMat2.block(j0,0,n0,cols)); + else + VERIFY_IS_APPROX(m2.block(0,j0,rows,n0), refMat2.block(0,j0,rows,n0)); + + if(SparseMatrixType::IsRowMajor) + VERIFY_IS_APPROX(m2.block(j0,0,n0,cols)+m2.block(j1,0,n0,cols), + refMat2.block(j0,0,n0,cols)+refMat2.block(j1,0,n0,cols)); + else + VERIFY_IS_APPROX(m2.block(0,j0,rows,n0)+m2.block(0,j1,rows,n0), + refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0)); + + Index i = internal::random(0,m2.outerSize()-1); + if(SparseMatrixType::IsRowMajor) { + m2.innerVector(i) = m2.innerVector(i) * s1; + refMat2.row(i) = refMat2.row(i) * s1; + VERIFY_IS_APPROX(m2,refMat2); + } else { + m2.innerVector(i) = m2.innerVector(i) * s1; + refMat2.col(i) = refMat2.col(i) * s1; + VERIFY_IS_APPROX(m2,refMat2); + } + + Index r0 = internal::random(0,rows-2); + Index c0 = internal::random(0,cols-2); + Index r1 = internal::random(1,rows-r0); + Index c1 = internal::random(1,cols-c0); + + VERIFY_IS_APPROX(DenseVector(m2.col(c0)), refMat2.col(c0)); + VERIFY_IS_APPROX(m2.col(c0), refMat2.col(c0)); + + VERIFY_IS_APPROX(RowDenseVector(m2.row(r0)), refMat2.row(r0)); + VERIFY_IS_APPROX(m2.row(r0), refMat2.row(r0)); + + VERIFY_IS_APPROX(m2.block(r0,c0,r1,c1), refMat2.block(r0,c0,r1,c1)); + VERIFY_IS_APPROX((2*m2).block(r0,c0,r1,c1), (2*refMat2).block(r0,c0,r1,c1)); + } +} + +void test_sparse_block() +{ + for(int i = 0; i < g_repeat; i++) { + int r = Eigen::internal::random(1,200), c = Eigen::internal::random(1,200); + if(Eigen::internal::random(0,4) == 0) { + r = c; // check square matrices in 25% of tries + } + EIGEN_UNUSED_VARIABLE(r+c); + CALL_SUBTEST_1(( sparse_block(SparseMatrix(1, 1)) )); + CALL_SUBTEST_1(( sparse_block(SparseMatrix(8, 8)) )); + CALL_SUBTEST_1(( sparse_block(SparseMatrix(r, c)) )); + CALL_SUBTEST_2(( sparse_block(SparseMatrix, ColMajor>(r, c)) )); + CALL_SUBTEST_2(( sparse_block(SparseMatrix, RowMajor>(r, c)) )); + + CALL_SUBTEST_3(( sparse_block(SparseMatrix(r, c)) )); + CALL_SUBTEST_3(( sparse_block(SparseMatrix(r, c)) )); + + r = Eigen::internal::random(1,100); + c = Eigen::internal::random(1,100); + if(Eigen::internal::random(0,4) == 0) { + r = c; // check square matrices in 25% of tries + } + + CALL_SUBTEST_4(( sparse_block(SparseMatrix(short(r), short(c))) )); + CALL_SUBTEST_4(( sparse_block(SparseMatrix(short(r), short(c))) )); + } +} -- cgit v1.2.3 From 61c45d7cfd29a161abefd095107b72c75db76ffc Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 19 Mar 2015 17:13:22 +0100 Subject: Fix comparison warning --- Eigen/src/Core/DenseStorage.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h index 522aaa299..ab41641f4 100644 --- a/Eigen/src/Core/DenseStorage.h +++ b/Eigen/src/Core/DenseStorage.h @@ -36,8 +36,8 @@ void check_static_allocation_size() template::type, bool Match = bool((Size%unpacket_traits::size)==0), - bool TryHalf = bool(unpacket_traits::size > Size) - && bool(unpacket_traits::size > unpacket_traits::half>::size) > + bool TryHalf = bool(int(unpacket_traits::size) > Size) + && bool(int(unpacket_traits::size) > int(unpacket_traits::half>::size)) > struct compute_default_alignment { enum { value = 0 }; -- cgit v1.2.3 From d6b2f300dbab3f11f6f8bd28558d2b91758ad514 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 19 Mar 2015 17:28:32 +0100 Subject: Fix MSVC compilation: aligned type must be passed by reference --- Eigen/src/Core/util/BlasUtil.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index 9bfa45106..ffeb5ac5f 100644 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -214,7 +214,7 @@ class blas_data_mapper { } template - EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, SubPacket p) const { + EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const { pscatter(&operator()(i, j), p, m_stride); } -- cgit v1.2.3 From 9ee62fdcd51e78a7e5f53c5868680fe0b6b261e4 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 19 Mar 2015 21:39:37 +0100 Subject: Fix random unit test for 32bits systems. --- test/rand.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test/rand.cpp b/test/rand.cpp index 4e090cbad..7c8068a3b 100644 --- a/test/rand.cpp +++ b/test/rand.cpp @@ -34,6 +34,10 @@ template void check_all_in_range(Scalar x, Scalar y) void test_rand() { + long long_ref = NumTraits::highest()/10; + char char_offset = (std::min)(g_repeat,64); + char short_offset = (std::min)(g_repeat,16000); + for(int i = 0; i < g_repeat*10; i++) { CALL_SUBTEST(check_in_range(10,11)); CALL_SUBTEST(check_in_range(1.24234523,1.24234523)); @@ -45,16 +49,14 @@ void test_rand() CALL_SUBTEST(check_in_range(-1,1)); CALL_SUBTEST(check_in_range(-1432.2352,-1432.2352)); - CALL_SUBTEST(check_in_range(0,-1)); CALL_SUBTEST(check_in_range(0,-1)); CALL_SUBTEST(check_in_range(0,-1)); CALL_SUBTEST(check_in_range(-673456,673456)); CALL_SUBTEST(check_in_range(-24345,24345)); - CALL_SUBTEST(check_in_range(-6734565664234,6734565664234)); + CALL_SUBTEST(check_in_range(-long_ref,long_ref)); } - char char_offset = (std::min)(g_repeat,64); CALL_SUBTEST(check_all_in_range(11,11)); CALL_SUBTEST(check_all_in_range(11,11+char_offset)); CALL_SUBTEST(check_all_in_range(-5,5)); @@ -63,7 +65,6 @@ void test_rand() CALL_SUBTEST(check_all_in_range(126-char_offset,126)); CALL_SUBTEST(check_all_in_range(-126,126)); - char short_offset = (std::min)(g_repeat,16000); CALL_SUBTEST(check_all_in_range(11,11)); CALL_SUBTEST(check_all_in_range(11,11+short_offset)); CALL_SUBTEST(check_all_in_range(-5,5)); @@ -71,7 +72,6 @@ void test_rand() CALL_SUBTEST(check_all_in_range(-24345,-24345+short_offset)); CALL_SUBTEST(check_all_in_range(24345,24345+short_offset)); - CALL_SUBTEST(check_all_in_range(11,11)); CALL_SUBTEST(check_all_in_range(11,11+g_repeat)); CALL_SUBTEST(check_all_in_range(-5,5)); @@ -83,6 +83,6 @@ void test_rand() CALL_SUBTEST(check_all_in_range(11,11+g_repeat)); CALL_SUBTEST(check_all_in_range(-5,5)); CALL_SUBTEST(check_all_in_range(-11-g_repeat,-11)); - CALL_SUBTEST(check_all_in_range(-6734565664234,-6734565664234+g_repeat)); - CALL_SUBTEST(check_all_in_range(6734565664234,6734565664234+g_repeat)); + CALL_SUBTEST(check_all_in_range(-long_ref,-long_ref+g_repeat)); + CALL_SUBTEST(check_all_in_range( long_ref, long_ref+g_repeat)); } -- cgit v1.2.3 From e134226a0352b0951b03e8ea56ce3bbd538e73db Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 19 Mar 2015 23:11:42 -0700 Subject: Fixed a bug in the handling of packets by the MeanReducer --- unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index 38586d067..25f085a59 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -77,7 +77,7 @@ template struct MeanReducer } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { - return (saccum + predux(vaccum)) / (scalarCount_ + packetCount_ * packet_traits::size); + return (saccum + predux(vaccum)) / (scalarCount_ + packetCount_ * unpacket_traits::size); } protected: -- cgit v1.2.3 From a6a628ca6b3c0d0dd6716d200ba8e7740847168a Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 19 Mar 2015 23:22:19 -0700 Subject: Added the -= operator to the device classes --- unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h | 39 +++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h index 649bdb308..7a67c56b3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h @@ -21,8 +21,7 @@ namespace Eigen { * Example: * C.device(EIGEN_GPU) = A + B; * - * Todo: thread pools. - * Todo: operator +=, -=, *= and so on. + * Todo: operator *= and /=. */ template class TensorDevice { @@ -50,6 +49,18 @@ template class TensorDevice { return *this; } + template + EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) { + typedef typename OtherDerived::Scalar Scalar; + typedef TensorCwiseBinaryOp, const ExpressionType, const OtherDerived> Difference; + Difference difference(m_expression, other); + typedef TensorAssignOp Assign; + Assign assign(m_expression, difference); + static const bool Vectorize = TensorEvaluator::PacketAccess; + internal::TensorExecutor::run(assign, m_device); + return *this; + } + protected: const DeviceType& m_device; ExpressionType& m_expression; @@ -82,6 +93,18 @@ template class TensorDevice + EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) { + typedef typename OtherDerived::Scalar Scalar; + typedef TensorCwiseBinaryOp, const ExpressionType, const OtherDerived> Difference; + Difference difference(m_expression, other); + typedef TensorAssignOp Assign; + Assign assign(m_expression, difference); + static const bool Vectorize = TensorEvaluator::PacketAccess; + internal::TensorExecutor::run(assign, m_device); + return *this; + } + protected: const ThreadPoolDevice& m_device; ExpressionType& m_expression; @@ -114,6 +137,18 @@ template class TensorDevice return *this; } + template + EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) { + typedef typename OtherDerived::Scalar Scalar; + typedef TensorCwiseBinaryOp, const ExpressionType, const OtherDerived> Difference; + Difference difference(m_expression, other); + typedef TensorAssignOp Assign; + Assign assign(m_expression, difference); + static const bool Vectorize = TensorEvaluator::PacketAccess; + internal::TensorExecutor::run(assign, m_device); + return *this; + } + protected: const GpuDevice& m_device; ExpressionType m_expression; -- cgit v1.2.3 From 8d9bfb3a7ba03a8eb12a71d7133217d768f7940a Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 20 Mar 2015 16:00:10 +0100 Subject: fix loadMarket wrt Index versus int --- unsupported/Eigen/src/SparseExtra/MarketIO.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/src/SparseExtra/MarketIO.h b/unsupported/Eigen/src/SparseExtra/MarketIO.h index 25ff4228d..100e617b2 100644 --- a/unsupported/Eigen/src/SparseExtra/MarketIO.h +++ b/unsupported/Eigen/src/SparseExtra/MarketIO.h @@ -18,7 +18,7 @@ namespace Eigen { namespace internal { template - inline bool GetMarketLine (std::stringstream& line, int& M, int& N, int& i, int& j, Scalar& value) + inline bool GetMarketLine (std::stringstream& line, Index& M, Index& N, Index& i, Index& j, Scalar& value) { line >> i >> j >> value; i--; @@ -31,7 +31,7 @@ namespace internal return false; } template - inline bool GetMarketLine (std::stringstream& line, int& M, int& N, int& i, int& j, std::complex& value) + inline bool GetMarketLine (std::stringstream& line, Index& M, Index& N, Index& i, Index& j, std::complex& value) { Scalar valR, valI; line >> i >> j >> valR >> valI; -- cgit v1.2.3 From 4e2b18d909f27f03f64c5fd8891f2e94c5e03802 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 20 Mar 2015 16:33:48 +0100 Subject: Update approx. minimum ordering method to push and keep structural empty diagonal elements to the bottom-right part of the matrix --- Eigen/src/OrderingMethods/Amd.h | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/Eigen/src/OrderingMethods/Amd.h b/Eigen/src/OrderingMethods/Amd.h index 3d2981f0c..63d996cb4 100644 --- a/Eigen/src/OrderingMethods/Amd.h +++ b/Eigen/src/OrderingMethods/Amd.h @@ -137,22 +137,27 @@ void minimum_degree_ordering(SparseMatrix& C, Perm degree[i] = len[i]; // degree of node i } mark = internal::cs_wclear(0, 0, w, n); /* clear w */ - elen[n] = -2; /* n is a dead element */ - Cp[n] = -1; /* n is a root of assembly tree */ - w[n] = 0; /* n is a dead element */ /* --- Initialize degree lists ------------------------------------------ */ for(i = 0; i < n; i++) { + bool has_diag = false; + for(p = Cp[i]; p dense) /* node i is dense */ + else if(d > dense || !has_diag) /* node i is dense or has no structural diagonal element */ { nv[i] = 0; /* absorb i into element n */ elen[i] = -1; /* node i is dead */ @@ -168,6 +173,10 @@ void minimum_degree_ordering(SparseMatrix& C, Perm } } + elen[n] = -2; /* n is a dead element */ + Cp[n] = -1; /* n is a root of assembly tree */ + w[n] = 0; /* n is a dead element */ + while (nel < n) /* while (selecting pivots) do */ { /* --- Select node of minimum approximate degree -------------------- */ -- cgit v1.2.3 From 4472f3e57884202b52c551da0b294c2883e2ccbf Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 23 Mar 2015 09:40:21 +0100 Subject: Avoid SVD: consider denormalized small numbers as zero when computing the rank of the matrix --- Eigen/src/SVD/SVDBase.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Eigen/src/SVD/SVDBase.h b/Eigen/src/SVD/SVDBase.h index b89393721..ad191085e 100644 --- a/Eigen/src/SVD/SVDBase.h +++ b/Eigen/src/SVD/SVDBase.h @@ -130,9 +130,10 @@ public: inline Index rank() const { using std::abs; + using std::max; eigen_assert(m_isInitialized && "JacobiSVD is not initialized."); if(m_singularValues.size()==0) return 0; - RealScalar premultiplied_threshold = m_singularValues.coeff(0) * threshold(); + RealScalar premultiplied_threshold = (max)(m_singularValues.coeff(0) * threshold(), (std::numeric_limits::min)()); Index i = m_nonzeroSingularValues-1; while(i>=0 && m_singularValues.coeff(i) < premultiplied_threshold) --i; return i+1; -- cgit v1.2.3 From d27968eb7ef3d2c7f9db17184850cf2071403ced Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 24 Mar 2015 13:38:07 +0100 Subject: D&C SVD: directly falls back to JacobiSVD for very small problems (by-pass upper-bidiagonalization) --- Eigen/src/SVD/BDCSVD.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h index fd7c8a4b2..e8bfa26c0 100644 --- a/Eigen/src/SVD/BDCSVD.h +++ b/Eigen/src/SVD/BDCSVD.h @@ -223,6 +223,18 @@ BDCSVD& BDCSVD::compute(const MatrixType& matrix, unsign allocate(matrix.rows(), matrix.cols(), computationOptions); using std::abs; + //**** step -1 - If the problem is too small, directly falls back to JacobiSVD and return + if(matrix.cols() < m_algoswap) + { + JacobiSVD jsvd(matrix,computationOptions); + if(computeU()) m_matrixU = jsvd.matrixU(); + if(computeV()) m_matrixV = jsvd.matrixV(); + m_singularValues = jsvd.singularValues(); + m_nonzeroSingularValues = jsvd.nonzeroSingularValues(); + m_isInitialized = true; + return *this; + } + //**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows RealScalar scale = matrix.cwiseAbs().maxCoeff(); if(scale==RealScalar(0)) scale = RealScalar(1); @@ -257,6 +269,7 @@ BDCSVD& BDCSVD::compute(const MatrixType& matrix, unsign break; } } + #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE // std::cout << "m_naiveU\n" << m_naiveU << "\n\n"; // std::cout << "m_naiveV\n" << m_naiveV << "\n\n"; @@ -438,7 +451,7 @@ void BDCSVD::divide (Index firstCol, Index lastCol, Index firstRowW, } else { - RealScalar q1 = (m_naiveU(0, firstCol + k)); + RealScalar q1 = m_naiveU(0, firstCol + k); // we shift Q1 to the right for (Index i = firstCol + k - 1; i >= firstCol; i--) m_naiveU(0, i + 1) = m_naiveU(0, i); -- cgit v1.2.3 From f42b105f73e69e05ea69c55d838a79555929731e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 24 Mar 2015 13:39:14 +0100 Subject: Add the possibility to make VERIFY* checks to output a warning instead of abording. --- test/main.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/test/main.h b/test/main.h index ecf0c6924..3591b57a1 100644 --- a/test/main.h +++ b/test/main.h @@ -95,6 +95,9 @@ namespace Eigen { static std::vector g_test_stack; + // level == 0 <=> abort if test fail + // level >= 1 <=> warning message to std::cerr if test fail + static int g_test_level = 0; static int g_repeat; static unsigned int g_seed; static bool g_has_set_repeat, g_has_set_seed; @@ -229,6 +232,8 @@ inline void verify_impl(bool condition, const char *testname, const char *file, { if (!condition) { + if(Eigen::g_test_level>0) + std::cerr << "WARNING: "; std::cerr << "Test " << testname << " failed in " << file << " (" << line << ")" << std::endl << " " << condition_as_string << std::endl; std::cerr << "Stack:\n"; @@ -236,7 +241,8 @@ inline void verify_impl(bool condition, const char *testname, const char *file, for(int i=test_stack_size-1; i>=0; --i) std::cerr << " - " << Eigen::g_test_stack[i] << "\n"; std::cerr << "\n"; - abort(); + if(Eigen::g_test_level==0) + abort(); } } -- cgit v1.2.3 From 29eaa2b0f175a94b44be2a4b1d9ae7048b169e62 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 24 Mar 2015 13:42:42 +0100 Subject: Make MatrixBase::is* methods aware of nested_eval. --- Eigen/src/Core/CwiseNullaryOp.h | 11 +++++++---- Eigen/src/Core/Dot.h | 6 +++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index 009fd845d..c7dfedae4 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -300,9 +300,10 @@ template bool DenseBase::isApproxToConstant (const Scalar& val, const RealScalar& prec) const { + typename internal::nested_eval::type self(derived()); for(Index j = 0; j < cols(); ++j) for(Index i = 0; i < rows(); ++i) - if(!internal::isApprox(this->coeff(i, j), val, prec)) + if(!internal::isApprox(self.coeff(i, j), val, prec)) return false; return true; } @@ -484,9 +485,10 @@ DenseBase::Zero() template bool DenseBase::isZero(const RealScalar& prec) const { + typename internal::nested_eval::type self(derived()); for(Index j = 0; j < cols(); ++j) for(Index i = 0; i < rows(); ++i) - if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast(1), prec)) + if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast(1), prec)) return false; return true; } @@ -719,18 +721,19 @@ template bool MatrixBase::isIdentity (const RealScalar& prec) const { + typename internal::nested_eval::type self(derived()); for(Index j = 0; j < cols(); ++j) { for(Index i = 0; i < rows(); ++i) { if(i == j) { - if(!internal::isApprox(this->coeff(i, j), static_cast(1), prec)) + if(!internal::isApprox(self.coeff(i, j), static_cast(1), prec)) return false; } else { - if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast(1), prec)) + if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast(1), prec)) return false; } } diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index 68e9c2660..6228f71bd 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -224,13 +224,13 @@ bool MatrixBase::isOrthogonal template bool MatrixBase::isUnitary(const RealScalar& prec) const { - typename Derived::Nested nested(derived()); + typename internal::nested_eval::type self(derived()); for(Index i = 0; i < cols(); ++i) { - if(!internal::isApprox(nested.col(i).squaredNorm(), static_cast(1), prec)) + if(!internal::isApprox(self.col(i).squaredNorm(), static_cast(1), prec)) return false; for(Index j = 0; j < i; ++j) - if(!internal::isMuchSmallerThan(nested.col(i).dot(nested.col(j)), static_cast(1), prec)) + if(!internal::isMuchSmallerThan(self.col(i).dot(self.col(j)), static_cast(1), prec)) return false; } return true; -- cgit v1.2.3 From abdbe8562e889a0ca0877d607cfd5c4cbf937e3a Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 24 Mar 2015 10:45:46 -0700 Subject: Fixed the CUDA packet primitives --- Eigen/src/Core/arch/CUDA/PacketMath.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index 19749c832..ceed1d1ef 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -197,21 +197,21 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro(cons } #endif -template<> EIGEN_DEVICE_FUNC inline float4 pgather(const float* from, int stride) { +template<> EIGEN_DEVICE_FUNC inline float4 pgather(const float* from, Index stride) { return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]); } -template<> EIGEN_DEVICE_FUNC inline double2 pgather(const double* from, int stride) { +template<> EIGEN_DEVICE_FUNC inline double2 pgather(const double* from, Index stride) { return make_double2(from[0*stride], from[1*stride]); } -template<> EIGEN_DEVICE_FUNC inline void pscatter(float* to, const float4& from, int stride) { +template<> EIGEN_DEVICE_FUNC inline void pscatter(float* to, const float4& from, Index stride) { to[stride*0] = from.x; to[stride*1] = from.y; to[stride*2] = from.z; to[stride*3] = from.w; } -template<> EIGEN_DEVICE_FUNC inline void pscatter(double* to, const double2& from, int stride) { +template<> EIGEN_DEVICE_FUNC inline void pscatter(double* to, const double2& from, Index stride) { to[stride*0] = from.x; to[stride*1] = from.y; } @@ -245,14 +245,14 @@ template<> EIGEN_DEVICE_FUNC inline double predux_min(const double2& a) } template<> EIGEN_DEVICE_FUNC inline float4 pabs(const float4& a) { - return make_float4(fabs(a.x), fabs(a.y), fabs(a.z), fabs(a.w)); + return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w)); } template<> EIGEN_DEVICE_FUNC inline double2 pabs(const double2& a) { - return make_double2(abs(a.x), abs(a.y)); + return make_double2(fabs(a.x), fabs(a.y)); } -template<> EIGEN_DEVICE_FUNC inline void +EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { double tmp = kernel.packet[0].y; kernel.packet[0].y = kernel.packet[1].x; @@ -279,7 +279,7 @@ ptranspose(PacketBlock& kernel) { kernel.packet[3].z = tmp; } -template<> EIGEN_DEVICE_FUNC inline void +EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { double tmp = kernel.packet[0].y; kernel.packet[0].y = kernel.packet[1].x; -- cgit v1.2.3 From ccf290a65cda00bfe12bbd5f4647aca5b371b6fb Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 25 Mar 2015 12:37:38 -0700 Subject: Cleaned up the TensorDevice code a little bit. --- unsupported/Eigen/CXX11/Tensor | 2 +- unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h | 25 ++++++++--------------- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index 34107ae71..200bcf966 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -80,8 +80,8 @@ #include "unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h" -#include "unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h" #include "unsupported/Eigen/CXX11/src/Tensor/Tensor.h" diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h index 7a67c56b3..b6ea655f3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h @@ -32,8 +32,7 @@ template class TensorDevice { EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) { typedef TensorAssignOp Assign; Assign assign(m_expression, other); - static const bool Vectorize = TensorEvaluator::PacketAccess; - internal::TensorExecutor::run(assign, m_device); + internal::TensorExecutor::run(assign, m_device); return *this; } @@ -44,8 +43,7 @@ template class TensorDevice { Sum sum(m_expression, other); typedef TensorAssignOp Assign; Assign assign(m_expression, sum); - static const bool Vectorize = TensorEvaluator::PacketAccess; - internal::TensorExecutor::run(assign, m_device); + internal::TensorExecutor::run(assign, m_device); return *this; } @@ -56,8 +54,7 @@ template class TensorDevice { Difference difference(m_expression, other); typedef TensorAssignOp Assign; Assign assign(m_expression, difference); - static const bool Vectorize = TensorEvaluator::PacketAccess; - internal::TensorExecutor::run(assign, m_device); + internal::TensorExecutor::run(assign, m_device); return *this; } @@ -76,8 +73,7 @@ template class TensorDevice Assign; Assign assign(m_expression, other); - static const bool Vectorize = TensorEvaluator::PacketAccess; - internal::TensorExecutor::run(assign, m_device); + internal::TensorExecutor::run(assign, m_device); return *this; } @@ -88,8 +84,7 @@ template class TensorDevice Assign; Assign assign(m_expression, sum); - static const bool Vectorize = TensorEvaluator::PacketAccess; - internal::TensorExecutor::run(assign, m_device); + internal::TensorExecutor::run(assign, m_device); return *this; } @@ -100,8 +95,7 @@ template class TensorDevice Assign; Assign assign(m_expression, difference); - static const bool Vectorize = TensorEvaluator::PacketAccess; - internal::TensorExecutor::run(assign, m_device); + internal::TensorExecutor::run(assign, m_device); return *this; } @@ -122,7 +116,7 @@ template class TensorDevice EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) { typedef TensorAssignOp Assign; Assign assign(m_expression, other); - internal::TensorExecutor::run(assign, m_device); + internal::TensorExecutor::run(assign, m_device); return *this; } @@ -133,7 +127,7 @@ template class TensorDevice Sum sum(m_expression, other); typedef TensorAssignOp Assign; Assign assign(m_expression, sum); - internal::TensorExecutor::run(assign, m_device); + internal::TensorExecutor::run(assign, m_device); return *this; } @@ -144,8 +138,7 @@ template class TensorDevice Difference difference(m_expression, other); typedef TensorAssignOp Assign; Assign assign(m_expression, difference); - static const bool Vectorize = TensorEvaluator::PacketAccess; - internal::TensorExecutor::run(assign, m_device); + internal::TensorExecutor::run(assign, m_device); return *this; } -- cgit v1.2.3 From b3343bfdae40815ae9e01ad2bd8fa226925248c8 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 25 Mar 2015 13:25:53 -0700 Subject: Fixed the vectorized implementation of the Tensor select() method --- unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index d084880de..9198c17ef 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -352,11 +352,12 @@ template, Device> { typedef TensorSelectOp XprType; + typedef typename XprType::Scalar Scalar; enum { IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, - PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess/* & - TensorEvaluator::PacketAccess*/, + PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & + internal::packet_traits::HasBlend, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented }; @@ -373,7 +374,6 @@ struct TensorEvaluator } typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; typedef typename internal::traits::Scalar CoeffReturnType; typedef typename internal::traits::Packet PacketReturnType; typedef typename TensorEvaluator::Dimensions Dimensions; @@ -403,7 +403,7 @@ struct TensorEvaluator template EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { - static const int PacketSize = internal::unpacket_traits::size; + const int PacketSize = internal::unpacket_traits::size; internal::Selector select; for (Index i = 0; i < PacketSize; ++i) { select.select[i] = m_condImpl.coeff(index+i); -- cgit v1.2.3 From 4df8b5a75e76a2f99e623da2a59cb9d6f591b914 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 25 Mar 2015 14:36:07 -0700 Subject: Avoid making an unecessary copy of the tensor expression when evaluating it on a GPU device --- unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h index b6ea655f3..17f10c07b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h @@ -144,7 +144,7 @@ template class TensorDevice protected: const GpuDevice& m_device; - ExpressionType m_expression; + ExpressionType& m_expression; }; #endif -- cgit v1.2.3 From 3d59ae02031c1aab8fbaa4457f06d31d9e9b2414 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 27 Mar 2015 09:59:24 +0100 Subject: Fix hypot(0,0). --- Eigen/src/Core/MathFunctions.h | 1 + 1 file changed, 1 insertion(+) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index e1b233d82..3c240c272 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -328,6 +328,7 @@ struct hypot_impl p = _y; qp = _x / p; } + if(p==RealScalar(0)) return RealScalar(0); return p * sqrt(RealScalar(1) + qp*qp); } }; -- cgit v1.2.3 From 1b8cc9af43374e1adf8cd7a2c18d94dddb6080a6 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 27 Mar 2015 10:55:00 +0100 Subject: Slight numerical stability improvement in 2x2 svd --- Eigen/src/SVD/JacobiSVD.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h index fcf01f518..6cef87f5e 100644 --- a/Eigen/src/SVD/JacobiSVD.h +++ b/Eigen/src/SVD/JacobiSVD.h @@ -425,12 +425,13 @@ void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q, // If d!=0, then t/d cannot overflow because the magnitude of the // entries forming d are not too small compared to the ones forming t. RealScalar u = t / d; - rot1.s() = RealScalar(1) / sqrt(RealScalar(1) + numext::abs2(u)); - rot1.c() = rot1.s() * u; + RealScalar tmp = sqrt(RealScalar(1) + numext::abs2(u)); + rot1.s() = RealScalar(1) / tmp; + rot1.c() = u / tmp; } m.applyOnTheLeft(0,1,rot1); j_right->makeJacobi(m,0,1); - *j_left = rot1 * j_right->transpose(); + *j_left = rot1 * j_right->transpose(); } template @@ -680,6 +681,8 @@ JacobiSVD::compute(const MatrixType& matrix, unsig const RealScalar precision = RealScalar(2) * NumTraits::epsilon(); // limit for very small denormal numbers to be considered zero in order to avoid infinite loops (see bug 286) + // FIXME What about considerering any denormal numbers as zero, using: + // const RealScalar considerAsZero = (std::numeric_limits::min)(); const RealScalar considerAsZero = RealScalar(2) * std::numeric_limits::denorm_min(); // Scaling factor to reduce over/under-flows -- cgit v1.2.3 From 7e225b6fa4a3865cf87a4ac927529ac2c8cf79d6 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 27 Mar 2015 10:55:53 +0100 Subject: Suppress some false negatives in SVD unit test --- test/svd_common.h | 64 +++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 14 deletions(-) diff --git a/test/svd_common.h b/test/svd_common.h index 4c172cf9d..2f6be6b2b 100644 --- a/test/svd_common.h +++ b/test/svd_common.h @@ -49,18 +49,39 @@ void svd_compare_to_full(const MatrixType& m, unsigned int computationOptions, const SvdType& referenceSvd) { - typedef typename MatrixType::Index Index; + typedef typename MatrixType::RealScalar RealScalar; Index rows = m.rows(); Index cols = m.cols(); Index diagSize = (std::min)(rows, cols); + RealScalar prec = test_precision(); SvdType svd(m, computationOptions); VERIFY_IS_APPROX(svd.singularValues(), referenceSvd.singularValues()); + + if(computationOptions & (ComputeFullV|ComputeThinV)) + { + VERIFY( (svd.matrixV().transpose()*svd.matrixV()).isIdentity(prec) ); + VERIFY_IS_APPROX( svd.matrixV().leftCols(diagSize) * svd.singularValues().asDiagonal() * svd.matrixV().leftCols(diagSize).transpose(), + referenceSvd.matrixV().leftCols(diagSize) * referenceSvd.singularValues().asDiagonal() * referenceSvd.matrixV().leftCols(diagSize).transpose()); + } + + if(computationOptions & (ComputeFullU|ComputeThinU)) + { + VERIFY( (svd.matrixU().transpose()*svd.matrixU()).isIdentity(prec) ); + VERIFY_IS_APPROX( svd.matrixU().leftCols(diagSize) * svd.singularValues().cwiseAbs2().asDiagonal() * svd.matrixU().leftCols(diagSize).transpose(), + referenceSvd.matrixU().leftCols(diagSize) * referenceSvd.singularValues().cwiseAbs2().asDiagonal() * referenceSvd.matrixU().leftCols(diagSize).transpose()); + } + + // The following checks are not critical. + // For instance, with Dived&Conquer SVD, if only the factor 'V' is computedt then different matrix-matrix product implementation will be used + // and the resulting 'V' factor might be significantly different when the SVD decomposition is not unique, especially with single precision float. + ++g_test_level; if(computationOptions & ComputeFullU) VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU()); if(computationOptions & ComputeThinU) VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU().leftCols(diagSize)); - if(computationOptions & ComputeFullV) VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV()); + if(computationOptions & ComputeFullV) VERIFY_IS_APPROX(svd.matrixV().cwiseAbs(), referenceSvd.matrixV().cwiseAbs()); if(computationOptions & ComputeThinV) VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV().leftCols(diagSize)); + --g_test_level; } // @@ -85,33 +106,48 @@ void svd_least_square(const MatrixType& m, unsigned int computationOptions) SvdType svd(m, computationOptions); if(internal::is_same::value) svd.setThreshold(1e-8); - else if(internal::is_same::value) svd.setThreshold(1e-4); - + else if(internal::is_same::value) svd.setThreshold(2e-4); + SolutionType x = svd.solve(rhs); - - // evaluate normal equation which works also for least-squares solutions - if(internal::is_same::value || svd.rank()==m.diagonal().size()) - { - // This test is not stable with single precision. - // This is probably because squaring m signicantly affects the precision. - VERIFY_IS_APPROX(m.adjoint()*(m*x),m.adjoint()*rhs); - } - + RealScalar residual = (m*x-rhs).norm(); - // Check that there is no significantly better solution in the neighborhood of x + RealScalar rhs_norm = rhs.norm(); if(!test_isMuchSmallerThan(residual,rhs.norm())) { // ^^^ If the residual is very small, then we have an exact solution, so we are already good. + + // evaluate normal equation which works also for least-squares solutions + if(internal::is_same::value || svd.rank()==m.diagonal().size()) + { + using std::sqrt; + // This test is not stable with single precision. + // This is probably because squaring m signicantly affects the precision. + if(internal::is_same::value) ++g_test_level; + + VERIFY_IS_APPROX(m.adjoint()*(m*x),m.adjoint()*rhs); + + if(internal::is_same::value) --g_test_level; + } + + // Check that there is no significantly better solution in the neighborhood of x for(Index k=0;k::epsilon())*x.row(k); RealScalar residual_y = (m*y-rhs).norm(); + VERIFY( test_isMuchSmallerThan(abs(residual_y-residual), rhs_norm) || residual < residual_y ); + if(internal::is_same::value) ++g_test_level; VERIFY( test_isApprox(residual_y,residual) || residual < residual_y ); + if(internal::is_same::value) --g_test_level; y.row(k) = (1.-2*NumTraits::epsilon())*x.row(k); residual_y = (m*y-rhs).norm(); + VERIFY( test_isMuchSmallerThan(abs(residual_y-residual), rhs_norm) || residual < residual_y ); + if(internal::is_same::value) ++g_test_level; VERIFY( test_isApprox(residual_y,residual) || residual < residual_y ); + if(internal::is_same::value) --g_test_level; } } } -- cgit v1.2.3 From ad044008da82b8eaf4fff638582ba9b69db6e711 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 27 Mar 2015 12:07:14 +0100 Subject: Fix transpose versus adjoint. --- test/svd_common.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/svd_common.h b/test/svd_common.h index 2f6be6b2b..b44b79124 100644 --- a/test/svd_common.h +++ b/test/svd_common.h @@ -61,16 +61,16 @@ void svd_compare_to_full(const MatrixType& m, if(computationOptions & (ComputeFullV|ComputeThinV)) { - VERIFY( (svd.matrixV().transpose()*svd.matrixV()).isIdentity(prec) ); - VERIFY_IS_APPROX( svd.matrixV().leftCols(diagSize) * svd.singularValues().asDiagonal() * svd.matrixV().leftCols(diagSize).transpose(), - referenceSvd.matrixV().leftCols(diagSize) * referenceSvd.singularValues().asDiagonal() * referenceSvd.matrixV().leftCols(diagSize).transpose()); + VERIFY( (svd.matrixV().adjoint()*svd.matrixV()).isIdentity(prec) ); + VERIFY_IS_APPROX( svd.matrixV().leftCols(diagSize) * svd.singularValues().asDiagonal() * svd.matrixV().leftCols(diagSize).adjoint(), + referenceSvd.matrixV().leftCols(diagSize) * referenceSvd.singularValues().asDiagonal() * referenceSvd.matrixV().leftCols(diagSize).adjoint()); } if(computationOptions & (ComputeFullU|ComputeThinU)) { - VERIFY( (svd.matrixU().transpose()*svd.matrixU()).isIdentity(prec) ); - VERIFY_IS_APPROX( svd.matrixU().leftCols(diagSize) * svd.singularValues().cwiseAbs2().asDiagonal() * svd.matrixU().leftCols(diagSize).transpose(), - referenceSvd.matrixU().leftCols(diagSize) * referenceSvd.singularValues().cwiseAbs2().asDiagonal() * referenceSvd.matrixU().leftCols(diagSize).transpose()); + VERIFY( (svd.matrixU().adjoint()*svd.matrixU()).isIdentity(prec) ); + VERIFY_IS_APPROX( svd.matrixU().leftCols(diagSize) * svd.singularValues().cwiseAbs2().asDiagonal() * svd.matrixU().leftCols(diagSize).adjoint(), + referenceSvd.matrixU().leftCols(diagSize) * referenceSvd.singularValues().cwiseAbs2().asDiagonal() * referenceSvd.matrixU().leftCols(diagSize).adjoint()); } // The following checks are not critical. -- cgit v1.2.3 From eb7e4c2b9c128a5f9a9ad1b0792d6b2aa8bf2852 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 27 Mar 2015 12:11:24 +0100 Subject: Pass Vector3 type by reference --- Eigen/src/Geometry/Quaternion.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h index e90ce77eb..e84fecf33 100644 --- a/Eigen/src/Geometry/Quaternion.h +++ b/Eigen/src/Geometry/Quaternion.h @@ -162,7 +162,7 @@ class QuaternionBase : public RotationBase { return coeffs().isApprox(other.coeffs(), prec); } /** return the result vector of \a v through the rotation*/ - EIGEN_STRONG_INLINE Vector3 _transformVector(Vector3 v) const; + EIGEN_STRONG_INLINE Vector3 _transformVector(const Vector3 &v) const; /** \returns \c *this with scalar type casted to \a NewScalarType * @@ -462,7 +462,7 @@ EIGEN_STRONG_INLINE Derived& QuaternionBase::operator*= (const Quaterni */ template EIGEN_STRONG_INLINE typename QuaternionBase::Vector3 -QuaternionBase::_transformVector(Vector3 v) const +QuaternionBase::_transformVector(const Vector3 &v) const { // Note that this algorithm comes from the optimization by hand // of the conversion to a Matrix followed by a Matrix/Vector product. -- cgit v1.2.3 From 266a84558fe204d561df3071e280731d80e5fe4c Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Fri, 27 Mar 2015 16:36:59 +0100 Subject: Optionally build the documentation when building unit tests. --- test/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 393c35b57..54ce7fb30 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -332,3 +332,8 @@ endif(EIGEN_TEST_NVCC) file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/failtests) add_test(NAME failtests WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/failtests COMMAND ${CMAKE_COMMAND} ${Eigen_SOURCE_DIR} -G "${CMAKE_GENERATOR}" -DEIGEN_FAILTEST=ON) + +option(EIGEN_TEST_BUILD_DOCUMENTATION "Test building the doxygen documentation" OFF) +IF(EIGEN_TEST_BUILD_DOCUMENTATION) + add_dependencies(buildtests doc) +ENDIF() -- cgit v1.2.3 From 09a5361d1ba1cc545739f72188d01a7ee781a48d Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Sat, 28 Mar 2015 12:36:24 +0100 Subject: bug #983: Pass Vector3 by const reference and not by value --- Eigen/src/Geometry/Quaternion.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h index e90ce77eb..e5ece3323 100644 --- a/Eigen/src/Geometry/Quaternion.h +++ b/Eigen/src/Geometry/Quaternion.h @@ -162,7 +162,7 @@ class QuaternionBase : public RotationBase { return coeffs().isApprox(other.coeffs(), prec); } /** return the result vector of \a v through the rotation*/ - EIGEN_STRONG_INLINE Vector3 _transformVector(Vector3 v) const; + EIGEN_STRONG_INLINE Vector3 _transformVector(const Vector3& v) const; /** \returns \c *this with scalar type casted to \a NewScalarType * @@ -462,7 +462,7 @@ EIGEN_STRONG_INLINE Derived& QuaternionBase::operator*= (const Quaterni */ template EIGEN_STRONG_INLINE typename QuaternionBase::Vector3 -QuaternionBase::_transformVector(Vector3 v) const +QuaternionBase::_transformVector(const Vector3& v) const { // Note that this algorithm comes from the optimization by hand // of the conversion to a Matrix followed by a Matrix/Vector product. -- cgit v1.2.3 From 2adbf6b8cac9a4318cf43cb2906c84fd2c52c71f Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 28 Mar 2015 22:34:54 +0100 Subject: fix stupid warning with old GCC --- Eigen/src/SparseCore/SparseBlock.h | 1 + 1 file changed, 1 insertion(+) diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h index 2b31716a3..e5ef10212 100644 --- a/Eigen/src/SparseCore/SparseBlock.h +++ b/Eigen/src/SparseCore/SparseBlock.h @@ -595,6 +595,7 @@ public: : m_eval(aEval), m_outerPos( (IsRowMajor ? aEval.m_block.startCol() : aEval.m_block.startRow()) - 1), // -1 so that operator++ finds the first non-zero entry m_innerIndex(IsRowMajor ? aEval.m_block.startRow() : aEval.m_block.startCol()), + m_value(0), m_end(IsRowMajor ? aEval.m_block.startCol()+aEval.m_block.blockCols() : aEval.m_block.startRow()+aEval.m_block.blockRows()) { EIGEN_UNUSED_VARIABLE(outer); -- cgit v1.2.3 From 58af8bf90c1e4b225f61672582f0d519b4963f30 Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Mon, 30 Mar 2015 16:47:22 +0200 Subject: bug #982: Make sure numext::maxi and numext::mini are called correctly, in case Scalar expressions return expression templates. --- Eigen/src/Eigenvalues/EigenSolver.h | 4 ++-- Eigen/src/SVD/BDCSVD.h | 6 +++--- Eigen/src/SVD/JacobiSVD.h | 5 +++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Eigen/src/Eigenvalues/EigenSolver.h b/Eigen/src/Eigenvalues/EigenSolver.h index 167cd99ab..b866544b4 100644 --- a/Eigen/src/Eigenvalues/EigenSolver.h +++ b/Eigen/src/Eigenvalues/EigenSolver.h @@ -417,7 +417,7 @@ EigenSolver::compute(const MatrixType& matrix, bool computeEigenvect { Scalar t0 = m_matT.coeff(i+1, i); Scalar t1 = m_matT.coeff(i, i+1); - Scalar maxval = numext::maxi(abs(p),numext::maxi(abs(t0),abs(t1))); + Scalar maxval = numext::maxi(abs(p),numext::maxi(abs(t0),abs(t1))); t0 /= maxval; t1 /= maxval; Scalar p0 = p/maxval; @@ -608,7 +608,7 @@ void EigenSolver::doComputeEigenvectors() } // Overflow control - Scalar t = numext::maxi(abs(m_matT.coeff(i,n-1)),abs(m_matT.coeff(i,n))); + Scalar t = numext::maxi(abs(m_matT.coeff(i,n-1)),abs(m_matT.coeff(i,n))); if ((eps * t) * t > Scalar(1)) m_matT.block(i, n-1, size-i, 2) /= t; diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h index e8bfa26c0..ca7bc30fc 100644 --- a/Eigen/src/SVD/BDCSVD.h +++ b/Eigen/src/SVD/BDCSVD.h @@ -743,7 +743,7 @@ void BDCSVD::computeSingVals(const ArrayXr& col0, const ArrayXr& dia // rational interpolation: fit a function of the form a / mu + b through the two previous // iterates and use its zero to compute the next iterate bool useBisection = fPrev*fCur>0; - while (fCur!=0 && abs(muCur - muPrev) > 8 * NumTraits::epsilon() * numext::maxi(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits::epsilon() && !useBisection) + while (fCur!=0 && abs(muCur - muPrev) > 8 * NumTraits::epsilon() * numext::maxi(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits::epsilon() && !useBisection) { ++m_numIters; @@ -794,7 +794,7 @@ void BDCSVD::computeSingVals(const ArrayXr& col0, const ArrayXr& dia #endif eigen_internal_assert(fLeft * fRight < 0); - while (rightShifted - leftShifted > 2 * NumTraits::epsilon() * numext::maxi(abs(leftShifted), abs(rightShifted))) + while (rightShifted - leftShifted > 2 * NumTraits::epsilon() * numext::maxi(abs(leftShifted), abs(rightShifted))) { RealScalar midShifted = (leftShifted + rightShifted) / 2; RealScalar fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift); @@ -1004,7 +1004,7 @@ void BDCSVD::deflation(Index firstCol, Index lastCol, Index k, Index RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff(); RealScalar epsilon_strict = NumTraits::epsilon() * maxDiag; - RealScalar epsilon_coarse = 8 * NumTraits::epsilon() * numext::maxi(col0.cwiseAbs().maxCoeff(), maxDiag); + RealScalar epsilon_coarse = 8 * NumTraits::epsilon() * numext::maxi(col0.cwiseAbs().maxCoeff(), maxDiag); #ifdef EIGEN_BDCSVD_SANITY_CHECKS assert(m_naiveU.allFinite()); diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h index 6cef87f5e..a46a47104 100644 --- a/Eigen/src/SVD/JacobiSVD.h +++ b/Eigen/src/SVD/JacobiSVD.h @@ -722,8 +722,9 @@ JacobiSVD::compute(const MatrixType& matrix, unsig // if this 2x2 sub-matrix is not diagonal already... // notice that this comparison will evaluate to false if any NaN is involved, ensuring that NaN's don't // keep us iterating forever. Similarly, small denormal numbers are considered zero. - RealScalar threshold = numext::maxi(considerAsZero, precision * numext::maxi(abs(m_workMatrix.coeff(p,p)), - abs(m_workMatrix.coeff(q,q)))); + RealScalar threshold = numext::maxi(considerAsZero, + precision * numext::maxi(abs(m_workMatrix.coeff(p,p)), + abs(m_workMatrix.coeff(q,q)))); // We compare both values to threshold instead of calling max to be robust to NaN (See bug 791) if(abs(m_workMatrix.coeff(p,q))>threshold || abs(m_workMatrix.coeff(q,p)) > threshold) { -- cgit v1.2.3 From 1efae98feed340e8b33dc40cab36aeb15836a792 Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Mon, 30 Mar 2015 23:56:20 +0200 Subject: bug #985: RealQZ failed when either matrix had zero rows or columns (report and patch by Ben Goodrich) Also added a regression test --- Eigen/src/Eigenvalues/RealQZ.h | 12 ++++++------ test/real_qz.cpp | 16 ++++++++++++++++ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/Eigen/src/Eigenvalues/RealQZ.h b/Eigen/src/Eigenvalues/RealQZ.h index ca75f2f50..677c7c0bb 100644 --- a/Eigen/src/Eigenvalues/RealQZ.h +++ b/Eigen/src/Eigenvalues/RealQZ.h @@ -240,10 +240,10 @@ namespace Eigen { m_S.coeffRef(i,j) = Scalar(0.0); m_S.rightCols(dim-j-1).applyOnTheLeft(i-1,i,G.adjoint()); m_T.rightCols(dim-i+1).applyOnTheLeft(i-1,i,G.adjoint()); + // update Q + if (m_computeQZ) + m_Q.applyOnTheRight(i-1,i,G); } - // update Q - if (m_computeQZ) - m_Q.applyOnTheRight(i-1,i,G); // kill T(i,i-1) if(m_T.coeff(i,i-1)!=Scalar(0)) { @@ -251,10 +251,10 @@ namespace Eigen { m_T.coeffRef(i,i-1) = Scalar(0.0); m_S.applyOnTheRight(i,i-1,G); m_T.topRows(i).applyOnTheRight(i,i-1,G); + // update Z + if (m_computeQZ) + m_Z.applyOnTheLeft(i,i-1,G.adjoint()); } - // update Z - if (m_computeQZ) - m_Z.applyOnTheLeft(i,i-1,G.adjoint()); } } } diff --git a/test/real_qz.cpp b/test/real_qz.cpp index 7d743a734..555dcbcb4 100644 --- a/test/real_qz.cpp +++ b/test/real_qz.cpp @@ -25,6 +25,22 @@ template void real_qz(const MatrixType& m) MatrixType A = MatrixType::Random(dim,dim), B = MatrixType::Random(dim,dim); + + // Regression test for bug 985: Randomly set rows or columns to zero + Index k=internal::random(0, dim-1); + switch(internal::random(0,10)) { + case 0: + A.row(k).setZero(); break; + case 1: + A.col(k).setZero(); break; + case 2: + B.row(k).setZero(); break; + case 3: + B.col(k).setZero(); break; + default: + break; + } + RealQZ qz(A,B); VERIFY_IS_EQUAL(qz.info(), Success); -- cgit v1.2.3 From 3238ca6abcce0f26035629aebe4e9018079a6917 Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Tue, 31 Mar 2015 00:42:14 +0200 Subject: Addendum to last patch: k is Index and not int --- test/real_qz.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/real_qz.cpp b/test/real_qz.cpp index 555dcbcb4..a1766c6d9 100644 --- a/test/real_qz.cpp +++ b/test/real_qz.cpp @@ -27,7 +27,7 @@ template void real_qz(const MatrixType& m) // Regression test for bug 985: Randomly set rows or columns to zero - Index k=internal::random(0, dim-1); + Index k=internal::random(0, dim-1); switch(internal::random(0,10)) { case 0: A.row(k).setZero(); break; -- cgit v1.2.3 From 3b169d792df8bcdd9ddbc645ffcfdb0636e585af Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Tue, 31 Mar 2015 00:49:08 +0200 Subject: Suppress unused variable warning --- Eigen/src/SVD/BDCSVD.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h index ca7bc30fc..a69e4cf96 100644 --- a/Eigen/src/SVD/BDCSVD.h +++ b/Eigen/src/SVD/BDCSVD.h @@ -786,9 +786,9 @@ void BDCSVD::computeSingVals(const ArrayXr& col0, const ArrayXr& dia } RealScalar fLeft = secularEq(leftShifted, col0, diag, perm, diagShifted, shift); - RealScalar fRight = secularEq(rightShifted, col0, diag, perm, diagShifted, shift); #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + RealScalar fRight = secularEq(rightShifted, col0, diag, perm, diagShifted, shift); if(!(fLeft * fRight<0)) std::cout << k << " : " << fLeft << " * " << fRight << " == " << fLeft * fRight << " ; " << left << " - " << right << " -> " << leftShifted << " " << rightShifted << " shift=" << shift << "\n"; #endif @@ -801,7 +801,6 @@ void BDCSVD::computeSingVals(const ArrayXr& col0, const ArrayXr& dia if (fLeft * fMid < 0) { rightShifted = midShifted; - fRight = fMid; } else { -- cgit v1.2.3 From 7bd578d11d93f82f76b7a6c7d39f8b6cc62d042f Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Tue, 31 Mar 2015 00:50:04 +0200 Subject: Change CMake warning to simple message for old Metis versions --- cmake/FindMetis.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/FindMetis.cmake b/cmake/FindMetis.cmake index e0040d320..6a0ce790c 100644 --- a/cmake/FindMetis.cmake +++ b/cmake/FindMetis.cmake @@ -26,7 +26,7 @@ macro(_metis_check_version) string(REGEX MATCH "define[ \t]+METIS_VER_SUBMINOR[ \t]+([0-9]+)" _metis_subminor_version_match "${_metis_version_header}") set(METIS_SUBMINOR_VERSION "${CMAKE_MATCH_1}") if(NOT METIS_MAJOR_VERSION) - message(WARNING "Could not determine Metis version. Assuming version 4.0.0") + message(STATUS "Could not determine Metis version. Assuming version 4.0.0") set(METIS_VERSION 4.0.0) else() set(METIS_VERSION ${METIS_MAJOR_VERSION}.${METIS_MINOR_VERSION}.${METIS_SUBMINOR_VERSION}) -- cgit v1.2.3 From 35d3053d55fdd7e1ff7c765381ff43b02346d542 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 31 Mar 2015 09:23:53 +0200 Subject: Fix regression introduced in 3b169d792df8bcdd9ddbc645ffcfdb0636e585af --- Eigen/src/SVD/BDCSVD.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h index a69e4cf96..cace915e7 100644 --- a/Eigen/src/SVD/BDCSVD.h +++ b/Eigen/src/SVD/BDCSVD.h @@ -787,8 +787,11 @@ void BDCSVD::computeSingVals(const ArrayXr& col0, const ArrayXr& dia RealScalar fLeft = secularEq(leftShifted, col0, diag, perm, diagShifted, shift); -#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE +#if defined EIGEN_INTERNAL_DEBUGGING || defined EIGEN_BDCSVD_DEBUG_VERBOSE RealScalar fRight = secularEq(rightShifted, col0, diag, perm, diagShifted, shift); +#endif + +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE if(!(fLeft * fRight<0)) std::cout << k << " : " << fLeft << " * " << fRight << " == " << fLeft * fRight << " ; " << left << " - " << right << " -> " << leftShifted << " " << rightShifted << " shift=" << shift << "\n"; #endif -- cgit v1.2.3 From bd76d837e6eeaf82dd7db30435d49e939b4674af Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 31 Mar 2015 14:57:32 +0200 Subject: Fix sign of SuperLU::determinant --- Eigen/src/SuperLUSupport/SuperLUSupport.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h index efdc6d046..b9d5e48fb 100644 --- a/Eigen/src/SuperLUSupport/SuperLUSupport.h +++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h @@ -302,6 +302,7 @@ class SuperLUBase : public SparseSolverBase typedef Matrix Vector; typedef Matrix IntRowVectorType; typedef Matrix IntColVectorType; + typedef Map > PermutationMap; typedef SparseMatrix LUMatrixType; public: @@ -459,10 +460,11 @@ class SuperLU : public SuperLUBase<_MatrixType,SuperLU<_MatrixType> > typedef typename Base::RealScalar RealScalar; typedef typename Base::StorageIndex StorageIndex; typedef typename Base::IntRowVectorType IntRowVectorType; - typedef typename Base::IntColVectorType IntColVectorType; + typedef typename Base::IntColVectorType IntColVectorType; + typedef typename Base::PermutationMap PermutationMap; typedef typename Base::LUMatrixType LUMatrixType; typedef TriangularView LMatrixType; - typedef TriangularView UMatrixType; + typedef TriangularView UMatrixType; public: using Base::_solve_impl; @@ -774,6 +776,8 @@ typename SuperLU::Scalar SuperLU::determinant() const det *= m_u.valuePtr()[lastId]; } } + if(PermutationMap(m_p.data(),m_p.size()).determinant()*PermutationMap(m_q.data(),m_q.size()).determinant()<0) + det = -det; if(m_sluEqued!='N') return det/m_sluRscale.prod()/m_sluCscale.prod(); else -- cgit v1.2.3 From ae01c05e184c62601521e785f733daf4a425b1c0 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 31 Mar 2015 15:19:57 +0200 Subject: Fix computeProductBlockingSizes with m==0, and add respective unit test. --- Eigen/src/Core/products/GeneralBlockPanelKernel.h | 3 +-- test/product_extra.cpp | 33 ++++++++++++++++++++--- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index d32377a00..428527820 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -249,10 +249,9 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n actual_lm = l2; max_mc = 576; } - Index mc = (std::min)(actual_lm/(3*k*sizeof(LhsScalar)), max_mc); if (mc > Traits::mr) mc -= mc % Traits::mr; - + else if (mc==0) return; m = (m%mc)==0 ? mc : (mc - Traits::mr * ((mc/*-1*/-(m%mc))/(Traits::mr*(m/mc+1)))); } diff --git a/test/product_extra.cpp b/test/product_extra.cpp index 1b4c6c33c..67ea13568 100644 --- a/test/product_extra.cpp +++ b/test/product_extra.cpp @@ -134,7 +134,7 @@ void zero_sized_objects(const MatrixType& m) } } - +template void bug_127() { // Bug 127 @@ -159,6 +159,7 @@ void bug_127() a*b; } +template void unaligned_objects() { // Regression test for the bug reported here: @@ -188,6 +189,29 @@ void unaligned_objects() } } +template +EIGEN_DONT_INLINE +Index test_compute_block_size(Index m, Index n, Index k) +{ + Index mc(m), nc(n), kc(k); + internal::computeProductBlockingSizes(kc, mc, nc); + return kc+mc+nc; +} + +template +Index compute_block_size() +{ + Index ret = 0; + ret += test_compute_block_size(0,1,1); + ret += test_compute_block_size(1,0,1); + ret += test_compute_block_size(1,1,0); + ret += test_compute_block_size(0,0,1); + ret += test_compute_block_size(0,1,0); + ret += test_compute_block_size(1,0,0); + ret += test_compute_block_size(0,0,0); + return ret; +} + void test_product_extra() { for(int i = 0; i < g_repeat; i++) { @@ -198,6 +222,9 @@ void test_product_extra() CALL_SUBTEST_4( product_extra(MatrixXcd(internal::random(1,EIGEN_TEST_MAX_SIZE/2), internal::random(1,EIGEN_TEST_MAX_SIZE/2))) ); CALL_SUBTEST_1( zero_sized_objects(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); } - CALL_SUBTEST_5( bug_127() ); - CALL_SUBTEST_6( unaligned_objects() ); + CALL_SUBTEST_5( bug_127<0>() ); + CALL_SUBTEST_6( unaligned_objects<0>() ); + CALL_SUBTEST_7( compute_block_size() ); + CALL_SUBTEST_7( compute_block_size() ); + CALL_SUBTEST_7( compute_block_size >() ); } -- cgit v1.2.3 From 0cbd5ae3cb0f7b73c945e1d69f3374d902a9f78d Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Tue, 31 Mar 2015 11:17:21 -0400 Subject: Correctly detect Android with ndk_build --- Eigen/src/Core/util/Macros.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 6b294e77f..7cedb4c97 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -213,7 +213,8 @@ #endif /// \internal EIGEN_OS_ANDROID set to 1 if the OS is Android -#if defined(__ANDROID__) +// note: ANDROID is defined when using ndk_build, __ANDROID__ is defined when using a standalone toolchain. +#if defined(__ANDROID__) || defined(ANDROID) #define EIGEN_OS_ANDROID 1 #else #define EIGEN_OS_ANDROID 0 -- cgit v1.2.3 From 73cdeae1d3756187cffd2a943ed635c67cb0c9eb Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Tue, 31 Mar 2015 11:17:23 -0400 Subject: Only use blocking sizes LUTs for single-thread products for now --- Eigen/src/Core/products/LookupBlockingSizesTable.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Eigen/src/Core/products/LookupBlockingSizesTable.h b/Eigen/src/Core/products/LookupBlockingSizesTable.h index 5ab4525df..39a53c8f1 100644 --- a/Eigen/src/Core/products/LookupBlockingSizesTable.h +++ b/Eigen/src/Core/products/LookupBlockingSizesTable.h @@ -79,6 +79,14 @@ template bool lookupBlockingSizesFromTable(Index& k, Index& m, Index& n, Index num_threads) { + if (num_threads > 1) { + // We don't currently have lookup tables recorded for multithread performance, + // and we have confirmed experimentally that our single-thread-recorded LUTs are + // poor for multithread performance, and our LUTs don't currently contain + // any annotation about multithread status (FIXME - we need that). + // So for now, we just early-return here. + return false; + } return LookupBlockingSizesFromTableImpl::run(k, m, n, num_threads); } -- cgit v1.2.3 From f8736866021ba4585cba7a4e97d1cc38320774c6 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 31 Mar 2015 08:27:23 -0700 Subject: Added documentation for the convolution operation --- unsupported/Eigen/CXX11/src/Tensor/README.md | 36 +++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/README.md b/unsupported/Eigen/CXX11/src/Tensor/README.md index ed1026be2..87e57cebb 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/README.md +++ b/unsupported/Eigen/CXX11/src/Tensor/README.md @@ -1157,7 +1157,41 @@ in TensorFunctors.h for information on how to implement a reduction operator. ## Convolutions -TBD: convolve(const KernelDerived& kernel, const Dimensions& dims) +### <Operation> convolve(const Kernel& kernel, const Dimensions& dims) + +Returns a tensor that is the output of the convolution of the input tensor with the kernel, +along the specified dimensions of the input tensor. The dimension size for dimensions of the output tensor +which were part of the convolution will be reduced by the formula: +output_dim_size = input_dim_size - kernel_dim_size + 1 (requires: input_dim_size >= kernel_dim_size). +The dimension sizes for dimensions that were not part of the convolution will remain the same. +Performance of the convolution can depend on the length of the stride(s) of the input tensor dimension(s) along which the +convolution is computed (the first dimension has the shortest stride for ColMajor, whereas RowMajor's shortest stride is +for the last dimension). + + // Compute convolution along the second and third dimension. + Tensor input(3, 3, 7, 11); + Tensor kernel(2, 2); + Tensor output(3, 2, 6, 11); + input.setRandom(); + kernel.setRandom(); + + Eigen::array dims({1, 2}); // Specify second and third dimension for convolution. + output = input.convolve(kernel, dims); + + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 2; ++j) { + for (int k = 0; k < 6; ++k) { + for (int l = 0; l < 11; ++l) { + const float result = output(i,j,k,l); + const float expected = input(i,j+0,k+0,l) * kernel(0,0) + + input(i,j+1,k+0,l) * kernel(1,0) + + input(i,j+0,k+1,l) * kernel(0,1) + + input(i,j+1,k+1,l) * kernel(1,1); + VERIFY_IS_APPROX(result, expected); + } + } + } + } ## Geometrical Operations -- cgit v1.2.3 From 68d4afe985f994f10e64b76d1476f5f08f006350 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 31 Mar 2015 09:07:09 -0700 Subject: Added support for convolution of tensors laid out in RowMajor mode --- .../Eigen/CXX11/src/Tensor/TensorConvolution.h | 357 +++++++++++++++------ 1 file changed, 256 insertions(+), 101 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h index 591fd2464..1db5f1232 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -21,8 +21,8 @@ namespace Eigen { */ namespace internal { - -template class IndexMapper { +template +class IndexMapper { public: IndexMapper(const InputDims& input_dims, const array& kernel_dims, const array& indices) { @@ -38,13 +38,19 @@ template class IndexM array inputStrides; array outputStrides; - for (int i = 0; i < NumDims; ++i) { - if (i > 0) { + if (static_cast(Layout) == static_cast(ColMajor)) { + inputStrides[0] = 1; + outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { inputStrides[i] = inputStrides[i-1] * input_dims[i-1]; outputStrides[i] = outputStrides[i-1] * dimensions[i-1]; - } else { - inputStrides[0] = 1; - outputStrides[0] = 1; + } + } else { + inputStrides[NumDims - 1] = 1; + outputStrides[NumDims - 1] = 1; + for (int i = static_cast(NumDims) - 2; i >= 0; --i) { + inputStrides[i] = inputStrides[i + 1] * input_dims[i + 1]; + outputStrides[i] = outputStrides[i + 1] * dimensions[i + 1]; } } @@ -52,13 +58,20 @@ template class IndexM array cudaOutputDimensions; array tmp = dimensions; array ordering; + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; for (int i = 0; i < NumKernelDims; ++i) { - ordering[i] = indices[i]; + const Index index = i + offset; + ordering[index] = indices[i]; tmp[indices[i]] = -1; - cudaInputDimensions[i] = input_dims[ordering[i]]; - cudaOutputDimensions[i] = dimensions[ordering[i]]; + cudaInputDimensions[index] = input_dims[indices[i]]; + cudaOutputDimensions[index] = dimensions[indices[i]]; } - int written = NumKernelDims; + + int written = static_cast(Layout) == static_cast(ColMajor) + ? NumKernelDims + : 0; for (int i = 0; i < NumDims; ++i) { if (tmp[i] >= 0) { ordering[written] = i; @@ -73,61 +86,123 @@ template class IndexM m_outputStrides[i] = outputStrides[ordering[i]]; } - for (int i = 0; i < NumDims; ++i) { - if (i > NumKernelDims) { - m_cudaInputStrides[i] = m_cudaInputStrides[i-1] * cudaInputDimensions[i-1]; - m_cudaOutputStrides[i] = m_cudaOutputStrides[i-1] * cudaOutputDimensions[i-1]; - } else { - m_cudaInputStrides[i] = 1; - m_cudaOutputStrides[i] = 1; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = 0; i < NumDims; ++i) { + if (i > NumKernelDims) { + m_cudaInputStrides[i] = + m_cudaInputStrides[i - 1] * cudaInputDimensions[i - 1]; + m_cudaOutputStrides[i] = + m_cudaOutputStrides[i - 1] * cudaOutputDimensions[i - 1]; + } else { + m_cudaInputStrides[i] = 1; + m_cudaOutputStrides[i] = 1; + } + } + } else { + for (int i = NumDims - 1; i >= 0; --i) { + if (i + 1 < offset) { + m_cudaInputStrides[i] = + m_cudaInputStrides[i + 1] * cudaInputDimensions[i + 1]; + m_cudaOutputStrides[i] = + m_cudaOutputStrides[i + 1] * cudaOutputDimensions[i + 1]; + } else { + m_cudaInputStrides[i] = 1; + m_cudaOutputStrides[i] = 1; + } } } } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputPlaneToTensorInputOffset(Index p) const { Index inputIndex = 0; - for (int d = NumDims - 1; d > NumKernelDims; --d) { - const Index idx = p / m_cudaInputStrides[d]; - inputIndex += idx * m_inputStrides[d]; - p -= idx * m_cudaInputStrides[d]; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int d = NumDims - 1; d > NumKernelDims; --d) { + const Index idx = p / m_cudaInputStrides[d]; + inputIndex += idx * m_inputStrides[d]; + p -= idx * m_cudaInputStrides[d]; + } + inputIndex += p * m_inputStrides[NumKernelDims]; + } else { + int limit = 0; + if (NumKernelDims < NumDims) { + limit = NumDims - NumKernelDims - 1; + } + for (int d = 0; d < limit; ++d) { + const Index idx = p / m_cudaInputStrides[d]; + inputIndex += idx * m_inputStrides[d]; + p -= idx * m_cudaInputStrides[d]; + } + inputIndex += p * m_inputStrides[limit]; } - inputIndex += p * m_inputStrides[NumKernelDims]; return inputIndex; } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputPlaneToTensorOutputOffset(Index p) const { Index outputIndex = 0; - for (int d = NumDims - 1; d > NumKernelDims; --d) { - const Index idx = p / m_cudaOutputStrides[d]; - outputIndex += idx * m_outputStrides[d]; - p -= idx * m_cudaOutputStrides[d]; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int d = NumDims - 1; d > NumKernelDims; --d) { + const Index idx = p / m_cudaOutputStrides[d]; + outputIndex += idx * m_outputStrides[d]; + p -= idx * m_cudaOutputStrides[d]; + } + outputIndex += p * m_outputStrides[NumKernelDims]; + } else { + int limit = 0; + if (NumKernelDims < NumDims) { + limit = NumDims - NumKernelDims - 1; + } + for (int d = 0; d < limit; ++d) { + const Index idx = p / m_cudaOutputStrides[d]; + outputIndex += idx * m_outputStrides[d]; + p -= idx * m_cudaOutputStrides[d]; + } + outputIndex += p * m_outputStrides[limit]; } - outputIndex += p * m_outputStrides[NumKernelDims]; return outputIndex; } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i) const { - return i * m_inputStrides[0]; + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + return i * m_inputStrides[offset]; } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i) const { - return i * m_outputStrides[0]; + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + return i * m_outputStrides[offset]; } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i, Index j) const { - return i * m_inputStrides[0] + j*m_inputStrides[1]; + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1]; } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i, Index j) const { - return i * m_outputStrides[0] + j * m_outputStrides[1]; + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1]; } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i, Index j, Index k) const { - return i * m_inputStrides[0] + j*m_inputStrides[1] + k*m_inputStrides[2]; + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1] + + k * m_inputStrides[offset + 2]; } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i, Index j, Index k) const { - return i * m_outputStrides[0] + j*m_outputStrides[1] + k*m_outputStrides[2]; + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1] + + k * m_outputStrides[offset + 2]; } private: @@ -237,35 +312,61 @@ struct TensorEvaluator(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); - // Only column major tensors are supported for now. - EIGEN_STATIC_ASSERT((static_cast(Layout) == static_cast(ColMajor)), YOU_MADE_A_PROGRAMMING_MISTAKE); const typename TensorEvaluator::Dimensions& input_dims = m_inputImpl.dimensions(); const typename TensorEvaluator::Dimensions& kernel_dims = m_kernelImpl.dimensions(); - m_inputStride[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_inputStride[i] = m_inputStride[i-1] * input_dims[i-1]; + if (static_cast(Layout) == static_cast(ColMajor)) { + m_inputStride[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_inputStride[i] = m_inputStride[i - 1] * input_dims[i - 1]; + } + } else { + m_inputStride[NumDims - 1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_inputStride[i] = m_inputStride[i + 1] * input_dims[i + 1]; + } } m_dimensions = m_inputImpl.dimensions(); - for (int i = 0; i < NumKernelDims; ++i) { - const Index index = op.indices()[i]; - const Index input_dim = input_dims[index]; - const Index kernel_dim = kernel_dims[i]; - const Index result_dim = input_dim - kernel_dim + 1; - m_dimensions[index] = result_dim; - if (i > 0) { - m_kernelStride[i] = m_kernelStride[i-1] * kernel_dims[i-1]; - } else { - m_kernelStride[0] = 1; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = 0; i < NumKernelDims; ++i) { + const Index index = op.indices()[i]; + const Index input_dim = input_dims[index]; + const Index kernel_dim = kernel_dims[i]; + const Index result_dim = input_dim - kernel_dim + 1; + m_dimensions[index] = result_dim; + if (i > 0) { + m_kernelStride[i] = m_kernelStride[i - 1] * kernel_dims[i - 1]; + } else { + m_kernelStride[0] = 1; + } + m_indexStride[i] = m_inputStride[index]; + } + + m_outputStride[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_outputStride[i] = m_outputStride[i - 1] * m_dimensions[i - 1]; + } + } else { + for (int i = NumKernelDims - 1; i >= 0; --i) { + const Index index = op.indices()[i]; + const Index input_dim = input_dims[index]; + const Index kernel_dim = kernel_dims[i]; + const Index result_dim = input_dim - kernel_dim + 1; + m_dimensions[index] = result_dim; + if (i < NumKernelDims - 1) { + m_kernelStride[i] = m_kernelStride[i + 1] * kernel_dims[i + 1]; + } else { + m_kernelStride[NumKernelDims - 1] = 1; + } + m_indexStride[i] = m_inputStride[index]; } - m_indexStride[i] = m_inputStride[index]; - } - m_outputStride[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_outputStride[i] = m_outputStride[i-1] * m_dimensions[i-1]; + m_outputStride[NumDims - 1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_outputStride[i] = m_outputStride[i + 1] * m_dimensions[i + 1]; + } } } @@ -310,13 +411,24 @@ struct TensorEvaluator::size; Index indices[2] = {index, index+PacketSize-1}; Index startInputs[2] = {0, 0}; - for (int i = NumDims - 1; i > 0; --i) { - const Index idx0 = indices[0] / m_outputStride[i]; - const Index idx1 = indices[1] / m_outputStride[i]; - startInputs[0] += idx0 * m_inputStride[i]; - startInputs[1] += idx1 * m_inputStride[i]; - indices[0] -= idx0 * m_outputStride[i]; - indices[1] -= idx1 * m_outputStride[i]; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + const Index idx0 = indices[0] / m_outputStride[i]; + const Index idx1 = indices[1] / m_outputStride[i]; + startInputs[0] += idx0 * m_inputStride[i]; + startInputs[1] += idx1 * m_inputStride[i]; + indices[0] -= idx0 * m_outputStride[i]; + indices[1] -= idx1 * m_outputStride[i]; + } + } else { + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx0 = indices[0] / m_outputStride[i]; + const Index idx1 = indices[1] / m_outputStride[i]; + startInputs[0] += idx0 * m_inputStride[i]; + startInputs[1] += idx1 * m_inputStride[i]; + indices[0] -= idx0 * m_outputStride[i]; + indices[1] -= idx1 * m_outputStride[i]; + } } startInputs[0] += indices[0]; startInputs[1] += indices[1]; @@ -344,10 +456,18 @@ struct TensorEvaluator 0; --i) { - const Index idx = index / m_outputStride[i]; - startInput += idx * m_inputStride[i]; - index -= idx * m_outputStride[i]; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_outputStride[i]; + startInput += idx * m_inputStride[i]; + index -= idx * m_outputStride[i]; + } + } else { + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_outputStride[i]; + startInput += idx * m_inputStride[i]; + index -= idx * m_outputStride[i]; + } } startInput += index; return startInput; @@ -378,7 +498,7 @@ struct TensorEvaluator { } }; - - - -template -__global__ void EigenConvolutionKernel1D(InputEvaluator eval, const internal::IndexMapper indexMapper, const float* __restrict kernel, const int numPlanes, const int numX, const int maxX, const int kernelSize, float* buffer) { +template +__global__ void EigenConvolutionKernel1D( + InputEvaluator eval, + const internal::IndexMapper + indexMapper, + const float* __restrict kernel, const int numPlanes, const int numX, + const int maxX, const int kernelSize, float* buffer) { extern __shared__ float s[]; const int first_x = blockIdx.x * maxX; @@ -453,7 +576,7 @@ __global__ void EigenConvolutionKernel1D(InputEvaluator eval, const internal::In #pragma unroll for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) { const int tensor_index = plane_input_offset + indexMapper.mapCudaInputKernelToTensorInputOffset(i+first_x); - s[i + plane_kernel_offset] = eval.coeff(tensor_index); + s[i + plane_kernel_offset] = eval.coeff(tensor_index); } __syncthreads(); @@ -476,9 +599,15 @@ __global__ void EigenConvolutionKernel1D(InputEvaluator eval, const internal::In } }; - -template -__global__ void EigenConvolutionKernel2D(InputEvaluator eval, const internal::IndexMapper indexMapper, const float* __restrict kernel, const int numPlanes, const int numX, const int maxX, const int numY, const int maxY, const int kernelSizeX, const int kernelSizeY, float* buffer) { +template +__global__ void EigenConvolutionKernel2D( + InputEvaluator eval, + const internal::IndexMapper + indexMapper, + const float* __restrict kernel, const int numPlanes, const int numX, + const int maxX, const int numY, const int maxY, const int kernelSizeX, + const int kernelSizeY, float* buffer) { extern __shared__ float s[]; const int first_x = blockIdx.x * maxX; @@ -538,9 +667,15 @@ __global__ void EigenConvolutionKernel2D(InputEvaluator eval, const internal::In } }; - template -__global__ void EigenConvolutionKernel3D(InputEvaluator eval, const internal::IndexMapper indexMapper, const float* __restrict kernel, const size_t numPlanes, const size_t numX, const size_t maxX, const size_t numY, const size_t maxY, const size_t numZ, const size_t maxZ, const size_t kernelSizeX, const size_t kernelSizeY, const size_t kernelSizeZ, float* buffer) { +__global__ void EigenConvolutionKernel3D( + InputEvaluator eval, + const internal::IndexMapper + indexMapper, + const float* __restrict kernel, const size_t numPlanes, const size_t numX, + const size_t maxX, const size_t numY, const size_t maxY, const size_t numZ, + const size_t maxZ, const size_t kernelSizeX, const size_t kernelSizeY, + const size_t kernelSizeZ, float* buffer) { extern __shared__ float s[]; // Load inputs to shared memory @@ -622,8 +757,6 @@ struct TensorEvaluator(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); - // Only column major tensors are supported for now. - EIGEN_STATIC_ASSERT((static_cast(Layout) == static_cast(ColMajor)), YOU_MADE_A_PROGRAMMING_MISTAKE); const typename TensorEvaluator::Dimensions& input_dims = m_inputImpl.dimensions(); const typename TensorEvaluator::Dimensions& kernel_dims = m_kernelImpl.dimensions(); @@ -712,10 +845,14 @@ struct TensorEvaluator(Layout) == static_cast(ColMajor) + ? 0 + : m_inputImpl.dimensions().rank() - 1; + if (m_indices[0] == single_stride_dim) { // Maximum the reuse const int inner_dim = ((maxSharedMem / (sizeof(Scalar)) - kernel_size + 1 + 31) / 32) * 32; maxX = (std::min)(inner_dim, numX); @@ -747,7 +884,8 @@ struct TensorEvaluator indices(m_indices[0]); const array kernel_dims(m_kernelImpl.dimensions()[0]); - internal::IndexMapper indexMapper(m_inputImpl.dimensions(), kernel_dims, indices); + internal::IndexMapper indexMapper( + m_inputImpl.dimensions(), kernel_dims, indices); switch(kernel_size) { case 4: { LAUNCH_CUDA_KERNEL((EigenConvolutionKernel1D, Index, InputDims, 4>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, 4, data); @@ -765,11 +903,15 @@ struct TensorEvaluator(Layout) == static_cast(ColMajor) ? 0 : 1; + const int idxY = + static_cast(Layout) == static_cast(ColMajor) ? 1 : 0; + const int kernel_size_x = m_kernelImpl.dimensions()[idxX]; + const int kernel_size_y = m_kernelImpl.dimensions()[idxY]; + + const int numX = dimensions()[m_indices[idxX]]; + const int numY = dimensions()[m_indices[idxY]]; const int numP = dimensions().TotalSize() / (numX*numY); const float scaling_factor = sqrtf(static_cast(maxSharedMem) / (sizeof(Scalar) * kernel_size_y * kernel_size_x)); @@ -798,9 +940,11 @@ struct TensorEvaluator indices(m_indices[0], m_indices[1]); - const array kernel_dims(m_kernelImpl.dimensions()[0], m_kernelImpl.dimensions()[1]); - internal::IndexMapper indexMapper(m_inputImpl.dimensions(), kernel_dims, indices); + const array indices(m_indices[idxX], m_indices[idxY]); + const array kernel_dims(m_kernelImpl.dimensions()[idxX], + m_kernelImpl.dimensions()[idxY]); + internal::IndexMapper indexMapper( + m_inputImpl.dimensions(), kernel_dims, indices); switch (kernel_size_x) { case 4: { switch (kernel_size_y) { @@ -837,13 +981,20 @@ struct TensorEvaluator(Layout) == static_cast(ColMajor) ? 0 : 2; + const int idxY = + static_cast(Layout) == static_cast(ColMajor) ? 1 : 1; + const int idxZ = + static_cast(Layout) == static_cast(ColMajor) ? 2 : 0; + + const int kernel_size_x = m_kernelImpl.dimensions()[idxX]; + const int kernel_size_y = m_kernelImpl.dimensions()[idxY]; + const int kernel_size_z = m_kernelImpl.dimensions()[idxZ]; + + const int numX = dimensions()[m_indices[idxX]]; + const int numY = dimensions()[m_indices[idxY]]; + const int numZ = dimensions()[m_indices[idxZ]]; const int numP = dimensions().TotalSize() / (numX*numY*numZ); const int maxX = (std::min)(128, (std::min)(maxSharedMem / (sizeof(Scalar) * kernel_size_y * kernel_size_z) - kernel_size_x + 1, numX)); @@ -860,16 +1011,20 @@ struct TensorEvaluator indices(m_indices[0], m_indices[1], m_indices[2]); - const array kernel_dims(m_kernelImpl.dimensions()[0], m_kernelImpl.dimensions()[1], m_kernelImpl.dimensions()[2]); - internal::IndexMapper indexMapper(m_inputImpl.dimensions(), kernel_dims, indices); + const array indices(m_indices[idxX], m_indices[idxY], + m_indices[idxZ]); + const array kernel_dims(m_kernelImpl.dimensions()[idxX], + m_kernelImpl.dimensions()[idxY], + m_kernelImpl.dimensions()[idxZ]); + internal::IndexMapper indexMapper( + m_inputImpl.dimensions(), kernel_dims, indices); LAUNCH_CUDA_KERNEL((EigenConvolutionKernel3D, Index, InputDims>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, numZ, maxZ, kernel_size_x, kernel_size_y, kernel_size_z, data); break; } default: { - assert(false && "not supported yet"); + EIGEN_STATIC_ASSERT((NumKernelDims >= 1 && NumKernelDims <= 3), THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE); } } } -- cgit v1.2.3 From 678207e02a35e32f2098fcb2cb8a510c9ee191e1 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 31 Mar 2015 09:08:08 -0700 Subject: Added regression tests for tensor convolutions --- unsupported/test/cxx11_tensor_convolution.cpp | 58 ++--- unsupported/test/cxx11_tensor_cuda.cpp | 313 ++++++++++++++------------ 2 files changed, 193 insertions(+), 178 deletions(-) diff --git a/unsupported/test/cxx11_tensor_convolution.cpp b/unsupported/test/cxx11_tensor_convolution.cpp index 4672db463..3a12dae62 100644 --- a/unsupported/test/cxx11_tensor_convolution.cpp +++ b/unsupported/test/cxx11_tensor_convolution.cpp @@ -14,15 +14,16 @@ using Eigen::Tensor; using Eigen::DefaultDevice; +template static void test_evals() { - Tensor input(3, 3); - Tensor kernel(2); + Tensor input(3, 3); + Tensor kernel(2); input.setRandom(); kernel.setRandom(); - Tensor result(2,3); + Tensor result(2,3); result.setZero(); Eigen::array::Index, 1> dims3({0}); @@ -41,15 +42,15 @@ static void test_evals() VERIFY_IS_APPROX(result(1,2), input(1,2)*kernel(0) + input(2,2)*kernel(1)); // index 5 } - +template static void test_expr() { - Tensor input(3, 3); - Tensor kernel(2, 2); + Tensor input(3, 3); + Tensor kernel(2, 2); input.setRandom(); kernel.setRandom(); - Tensor result(2,2); + Tensor result(2,2); Eigen::array dims({0, 1}); result = input.convolve(kernel, dims); @@ -63,10 +64,10 @@ static void test_expr() input(2,1)*kernel(1,0) + input(2,2)*kernel(1,1)); } - +template static void test_modes() { - Tensor input(3); - Tensor kernel(3); + Tensor input(3); + Tensor kernel(3); input(0) = 1.0f; input(1) = 2.0f; input(2) = 3.0f; @@ -74,13 +75,13 @@ static void test_modes() { kernel(1) = 1.0f; kernel(2) = 0.0f; - const Eigen::array dims{{0}}; + const Eigen::array dims({0}); Eigen::array, 1> padding; // Emulate VALID mode (as defined in // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html). padding[0] = std::make_pair(0, 0); - Tensor valid(1); + Tensor valid(1); valid = input.pad(padding).convolve(kernel, dims); VERIFY_IS_EQUAL(valid.dimension(0), 1); VERIFY_IS_APPROX(valid(0), 2.5f); @@ -88,7 +89,7 @@ static void test_modes() { // Emulate SAME mode (as defined in // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html). padding[0] = std::make_pair(1, 1); - Tensor same(3); + Tensor same(3); same = input.pad(padding).convolve(kernel, dims); VERIFY_IS_EQUAL(same.dimension(0), 3); VERIFY_IS_APPROX(same(0), 1.0f); @@ -98,7 +99,7 @@ static void test_modes() { // Emulate FULL mode (as defined in // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html). padding[0] = std::make_pair(2, 2); - Tensor full(5); + Tensor full(5); full = input.pad(padding).convolve(kernel, dims); VERIFY_IS_EQUAL(full.dimension(0), 5); VERIFY_IS_APPROX(full(0), 0.0f); @@ -108,18 +109,18 @@ static void test_modes() { VERIFY_IS_APPROX(full(4), 1.5f); } - +template static void test_strides() { - Tensor input(13); - Tensor kernel(3); + Tensor input(13); + Tensor kernel(3); input.setRandom(); kernel.setRandom(); - const Eigen::array dims{{0}}; - const Eigen::array stride_of_3{{3}}; - const Eigen::array stride_of_2{{2}}; + const Eigen::array dims({0}); + const Eigen::array stride_of_3({3}); + const Eigen::array stride_of_2({2}); - Tensor result; + Tensor result; result = input.stride(stride_of_3).convolve(kernel, dims).stride(stride_of_2); VERIFY_IS_EQUAL(result.dimension(0), 2); @@ -129,13 +130,14 @@ static void test_strides() { input(12)*kernel(2))); } - - - void test_cxx11_tensor_convolution() { - CALL_SUBTEST(test_evals()); - CALL_SUBTEST(test_expr()); - CALL_SUBTEST(test_modes()); - CALL_SUBTEST(test_strides()); + CALL_SUBTEST(test_evals()); + CALL_SUBTEST(test_evals()); + CALL_SUBTEST(test_expr()); + CALL_SUBTEST(test_expr()); + CALL_SUBTEST(test_modes()); + CALL_SUBTEST(test_modes()); + CALL_SUBTEST(test_strides()); + CALL_SUBTEST(test_strides()); } diff --git a/unsupported/test/cxx11_tensor_cuda.cpp b/unsupported/test/cxx11_tensor_cuda.cpp index 8c1ca1bf8..78934165f 100644 --- a/unsupported/test/cxx11_tensor_cuda.cpp +++ b/unsupported/test/cxx11_tensor_cuda.cpp @@ -117,11 +117,10 @@ void test_cuda_elementwise() } } - void test_cuda_reduction() { - Tensor in1(Eigen::array(72,53,97,113)); - Tensor out(Eigen::array(72,97)); + Tensor in1(72,53,97,113); + Tensor out(72,97); in1.setRandom(); std::size_t in1_bytes = in1.size() * sizeof(float); @@ -138,8 +137,8 @@ void test_cuda_reduction() assert(cudaStreamCreate(&stream) == cudaSuccess); Eigen::GpuDevice gpu_device(&stream); - Eigen::TensorMap > gpu_in1(d_in1, Eigen::array(72,53,97,113)); - Eigen::TensorMap > gpu_out(d_out, Eigen::array(72,97)); + Eigen::TensorMap > gpu_in1(d_in1, 72,53,97,113); + Eigen::TensorMap > gpu_out(d_out, 72,97); array reduction_axis; reduction_axis[0] = 1; @@ -156,10 +155,10 @@ void test_cuda_reduction() for (int k = 0; k < 53; ++k) { for (int l = 0; l < 113; ++l) { expected = - std::max(expected, in1(Eigen::array(i, k, j, l))); + std::max(expected, in1(i, k, j, l)); } } - VERIFY_IS_APPROX(out(Eigen::array(i,j)), expected); + VERIFY_IS_APPROX(out(i,j), expected); } } } @@ -170,7 +169,7 @@ static void test_cuda_contraction() // with these dimensions, the output has 300 * 140 elements, which is // more than 30 * 1024, which is the number of threads in blocks on // a 15 SM GK110 GPU - Tensor t_left(Eigen::array(6, 50, 3, 31)); + Tensor t_left(6, 50, 3, 31); Tensor t_right(Eigen::array(3, 31, 7, 20, 1)); Tensor t_result(Eigen::array(6, 50, 7, 20, 1)); @@ -196,12 +195,9 @@ static void test_cuda_contraction() assert(cudaStreamCreate(&stream) == cudaSuccess); Eigen::GpuDevice gpu_device(&stream); - Eigen::TensorMap > - gpu_t_left(d_t_left, Eigen::array(6, 50, 3, 31)); - Eigen::TensorMap > - gpu_t_right(d_t_right, Eigen::array(3, 31, 7, 20, 1)); - Eigen::TensorMap > - gpu_t_result(d_t_result, Eigen::array(6, 50, 7, 20, 1)); + Eigen::TensorMap > gpu_t_left(d_t_left, 6, 50, 3, 31); + Eigen::TensorMap > gpu_t_right(d_t_right, 3, 31, 7, 20, 1); + Eigen::TensorMap > gpu_t_result(d_t_result, 6, 50, 7, 20, 1); typedef Eigen::Map > MapXf; MapXf m_left(t_left.data(), 300, 93); @@ -226,11 +222,12 @@ static void test_cuda_contraction() } } +template static void test_cuda_convolution_1d() { - Tensor input(Eigen::array(74,37,11,137)); - Tensor kernel(Eigen::array(4)); - Tensor out(Eigen::array(74,34,11,137)); + Tensor input(74,37,11,137); + Tensor kernel(4); + Tensor out(74,34,11,137); input = input.constant(10.0f) + input.random(); kernel = kernel.constant(7.0f) + kernel.random(); @@ -252,9 +249,9 @@ static void test_cuda_convolution_1d() assert(cudaStreamCreate(&stream) == cudaSuccess); Eigen::GpuDevice gpu_device(&stream); - Eigen::TensorMap > gpu_input(d_input, Eigen::array(74,37,11,137)); - Eigen::TensorMap > gpu_kernel(d_kernel, Eigen::array(4)); - Eigen::TensorMap > gpu_out(d_out, Eigen::array(74,34,11,137)); + Eigen::TensorMap > gpu_input(d_input, 74,37,11,137); + Eigen::TensorMap > gpu_kernel(d_kernel, 4); + Eigen::TensorMap > gpu_out(d_out, 74,34,11,137); Eigen::array dims(1); gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); @@ -266,11 +263,9 @@ static void test_cuda_convolution_1d() for (int j = 0; j < 34; ++j) { for (int k = 0; k < 11; ++k) { for (int l = 0; l < 137; ++l) { - const float result = out(Eigen::array(i,j,k,l)); - const float expected = input(Eigen::array(i,j+0,k,l)) * kernel(Eigen::array(0)) + - input(Eigen::array(i,j+1,k,l)) * kernel(Eigen::array(1)) + - input(Eigen::array(i,j+2,k,l)) * kernel(Eigen::array(2)) + - input(Eigen::array(i,j+3,k,l)) * kernel(Eigen::array(3)); + const float result = out(i,j,k,l); + const float expected = input(i,j+0,k,l) * kernel(0) + input(i,j+1,k,l) * kernel(1) + + input(i,j+2,k,l) * kernel(2) + input(i,j+3,k,l) * kernel(3); VERIFY_IS_APPROX(result, expected); } } @@ -278,12 +273,11 @@ static void test_cuda_convolution_1d() } } - -static void test_cuda_convolution_2d() +static void test_cuda_convolution_inner_dim_col_major_1d() { - Tensor input(Eigen::array(74,37,11,137)); - Tensor kernel(Eigen::array(3,4)); - Tensor out(Eigen::array(74,35,8,137)); + Tensor input(74,9,11,7); + Tensor kernel(4); + Tensor out(71,9,11,7); input = input.constant(10.0f) + input.random(); kernel = kernel.constant(7.0f) + kernel.random(); @@ -305,46 +299,35 @@ static void test_cuda_convolution_2d() assert(cudaStreamCreate(&stream) == cudaSuccess); Eigen::GpuDevice gpu_device(&stream); - Eigen::TensorMap > gpu_input(d_input, Eigen::array(74,37,11,137)); - Eigen::TensorMap > gpu_kernel(d_kernel, Eigen::array(3,4)); - Eigen::TensorMap > gpu_out(d_out, Eigen::array(74,35,8,137)); + Eigen::TensorMap > gpu_input(d_input,74,9,11,7); + Eigen::TensorMap > gpu_kernel(d_kernel,4); + Eigen::TensorMap > gpu_out(d_out,71,9,11,7); - Eigen::array dims(1,2); + Eigen::array dims(0); gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); - for (int i = 0; i < 74; ++i) { - for (int j = 0; j < 35; ++j) { - for (int k = 0; k < 8; ++k) { - for (int l = 0; l < 137; ++l) { - const float result = out(Eigen::array(i,j,k,l)); - const float expected = input(Eigen::array(i,j+0,k+0,l)) * kernel(Eigen::array(0,0)) + - input(Eigen::array(i,j+1,k+0,l)) * kernel(Eigen::array(1,0)) + - input(Eigen::array(i,j+2,k+0,l)) * kernel(Eigen::array(2,0)) + - input(Eigen::array(i,j+0,k+1,l)) * kernel(Eigen::array(0,1)) + - input(Eigen::array(i,j+1,k+1,l)) * kernel(Eigen::array(1,1)) + - input(Eigen::array(i,j+2,k+1,l)) * kernel(Eigen::array(2,1)) + - input(Eigen::array(i,j+0,k+2,l)) * kernel(Eigen::array(0,2)) + - input(Eigen::array(i,j+1,k+2,l)) * kernel(Eigen::array(1,2)) + - input(Eigen::array(i,j+2,k+2,l)) * kernel(Eigen::array(2,2)) + - input(Eigen::array(i,j+0,k+3,l)) * kernel(Eigen::array(0,3)) + - input(Eigen::array(i,j+1,k+3,l)) * kernel(Eigen::array(1,3)) + - input(Eigen::array(i,j+2,k+3,l)) * kernel(Eigen::array(2,3)); - VERIFY_IS_APPROX(result, expected); + for (int i = 0; i < 71; ++i) { + for (int j = 0; j < 9; ++j) { + for (int k = 0; k < 11; ++k) { + for (int l = 0; l < 7; ++l) { + const float result = out(i,j,k,l); + const float expected = input(i+0,j,k,l) * kernel(0) + input(i+1,j,k,l) * kernel(1) + + input(i+2,j,k,l) * kernel(2) + input(i+3,j,k,l) * kernel(3); + VERIFY_IS_APPROX(result, expected); } } } } } - -static void test_cuda_convolution_3d() +static void test_cuda_convolution_inner_dim_row_major_1d() { - Tensor input(Eigen::array(74,37,11,137,17)); - Tensor kernel(Eigen::array(3,4,2)); - Tensor out(Eigen::array(74,35,8,136,17)); + Tensor input(7,9,11,74); + Tensor kernel(4); + Tensor out(7,9,11,71); input = input.constant(10.0f) + input.random(); kernel = kernel.constant(7.0f) + kernel.random(); @@ -366,139 +349,166 @@ static void test_cuda_convolution_3d() assert(cudaStreamCreate(&stream) == cudaSuccess); Eigen::GpuDevice gpu_device(&stream); - Eigen::TensorMap > gpu_input(d_input, Eigen::array(74,37,11,137,17)); - Eigen::TensorMap > gpu_kernel(d_kernel, Eigen::array(3,4,2)); - Eigen::TensorMap > gpu_out(d_out, Eigen::array(74,35,8,136,17)); + Eigen::TensorMap > gpu_input(d_input, 7,9,11,74); + Eigen::TensorMap > gpu_kernel(d_kernel, 4); + Eigen::TensorMap > gpu_out(d_out, 7,9,11,71); - Eigen::array dims(1,2,3); + Eigen::array dims(3); gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); - for (int i = 0; i < 74; ++i) { - for (int j = 0; j < 35; ++j) { - for (int k = 0; k < 8; ++k) { - for (int l = 0; l < 136; ++l) { - for (int m = 0; m < 17; ++m) { - const float result = out(Eigen::array(i,j,k,l,m)); - const float expected = input(Eigen::array(i,j+0,k+0,l+0,m)) * kernel(Eigen::array(0,0,0)) + - input(Eigen::array(i,j+1,k+0,l+0,m)) * kernel(Eigen::array(1,0,0)) + - input(Eigen::array(i,j+2,k+0,l+0,m)) * kernel(Eigen::array(2,0,0)) + - input(Eigen::array(i,j+0,k+1,l+0,m)) * kernel(Eigen::array(0,1,0)) + - input(Eigen::array(i,j+1,k+1,l+0,m)) * kernel(Eigen::array(1,1,0)) + - input(Eigen::array(i,j+2,k+1,l+0,m)) * kernel(Eigen::array(2,1,0)) + - input(Eigen::array(i,j+0,k+2,l+0,m)) * kernel(Eigen::array(0,2,0)) + - input(Eigen::array(i,j+1,k+2,l+0,m)) * kernel(Eigen::array(1,2,0)) + - input(Eigen::array(i,j+2,k+2,l+0,m)) * kernel(Eigen::array(2,2,0)) + - input(Eigen::array(i,j+0,k+3,l+0,m)) * kernel(Eigen::array(0,3,0)) + - input(Eigen::array(i,j+1,k+3,l+0,m)) * kernel(Eigen::array(1,3,0)) + - input(Eigen::array(i,j+2,k+3,l+0,m)) * kernel(Eigen::array(2,3,0)) + - input(Eigen::array(i,j+0,k+0,l+1,m)) * kernel(Eigen::array(0,0,1)) + - input(Eigen::array(i,j+1,k+0,l+1,m)) * kernel(Eigen::array(1,0,1)) + - input(Eigen::array(i,j+2,k+0,l+1,m)) * kernel(Eigen::array(2,0,1)) + - input(Eigen::array(i,j+0,k+1,l+1,m)) * kernel(Eigen::array(0,1,1)) + - input(Eigen::array(i,j+1,k+1,l+1,m)) * kernel(Eigen::array(1,1,1)) + - input(Eigen::array(i,j+2,k+1,l+1,m)) * kernel(Eigen::array(2,1,1)) + - input(Eigen::array(i,j+0,k+2,l+1,m)) * kernel(Eigen::array(0,2,1)) + - input(Eigen::array(i,j+1,k+2,l+1,m)) * kernel(Eigen::array(1,2,1)) + - input(Eigen::array(i,j+2,k+2,l+1,m)) * kernel(Eigen::array(2,2,1)) + - input(Eigen::array(i,j+0,k+3,l+1,m)) * kernel(Eigen::array(0,3,1)) + - input(Eigen::array(i,j+1,k+3,l+1,m)) * kernel(Eigen::array(1,3,1)) + - input(Eigen::array(i,j+2,k+3,l+1,m)) * kernel(Eigen::array(2,3,1)); - VERIFY_IS_APPROX(result, expected); - } + for (int i = 0; i < 7; ++i) { + for (int j = 0; j < 9; ++j) { + for (int k = 0; k < 11; ++k) { + for (int l = 0; l < 71; ++l) { + const float result = out(i,j,k,l); + const float expected = input(i,j,k,l+0) * kernel(0) + input(i,j,k,l+1) * kernel(1) + + input(i,j,k,l+2) * kernel(2) + input(i,j,k,l+3) * kernel(3); + VERIFY_IS_APPROX(result, expected); } } } } } -static float* CudaCopyFloat(float* data, int size) { - const int nbytes = size * sizeof(float); - float* result = NULL; - if (cudaMalloc((void**)(&result), nbytes) != cudaSuccess) { - return NULL; - } else { - if (data != NULL) { - cudaMemcpy(result, data, nbytes, cudaMemcpyHostToDevice); - } - return result; - } -} - -static void test_cuda_constant_broadcast() +template +static void test_cuda_convolution_2d() { + Tensor input(74,37,11,137); + Tensor kernel(3,4); + Tensor out(74,35,8,137); + input = input.constant(10.0f) + input.random(); + kernel = kernel.constant(7.0f) + kernel.random(); + + std::size_t input_bytes = input.size() * sizeof(float); + std::size_t kernel_bytes = kernel.size() * sizeof(float); + std::size_t out_bytes = out.size() * sizeof(float); + + float* d_input; + float* d_kernel; + float* d_out; + cudaMalloc((void**)(&d_input), input_bytes); + cudaMalloc((void**)(&d_kernel), kernel_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice); + cudaStream_t stream; assert(cudaStreamCreate(&stream) == cudaSuccess); Eigen::GpuDevice gpu_device(&stream); - Tensor t1(10); - for (int i = 0; i < 10; ++i) { - t1(i) = 10.0f * i; - } - float* t1_cuda = CudaCopyFloat(t1.data(), t1.size()); - Eigen::TensorMap > t1_gpu(t1_cuda, 10); - - Tensor t2(1); - t2 = t2.constant(20.0f); - float* t2_cuda = CudaCopyFloat(t2.data(), t2.size()); - Eigen::TensorMap > > t2_gpu(t2_cuda, 1); + Eigen::TensorMap > gpu_input(d_input,74,37,11,137); + Eigen::TensorMap > gpu_kernel(d_kernel,3,4); + Eigen::TensorMap > gpu_out(d_out,74,35,8,137); - float* t3_cuda = CudaCopyFloat(NULL, 10); - Eigen::TensorMap > t3_gpu(t3_cuda, 10); - - t3_gpu.device(gpu_device) = - t1_gpu + t2_gpu.broadcast(Eigen::array(10)); + Eigen::array dims(1,2); + gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); - Eigen::Tensor t3(10); - cudaMemcpy(t3.data(), t3_gpu.data(), 10 * sizeof(float), - cudaMemcpyDeviceToHost); + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); - for (int i = 0; i < 10; ++i) { - VERIFY_IS_APPROX(t3(i), t1(i) + t2(0)); + for (int i = 0; i < 74; ++i) { + for (int j = 0; j < 35; ++j) { + for (int k = 0; k < 8; ++k) { + for (int l = 0; l < 137; ++l) { + const float result = out(i,j,k,l); + const float expected = input(i,j+0,k+0,l) * kernel(0,0) + + input(i,j+1,k+0,l) * kernel(1,0) + + input(i,j+2,k+0,l) * kernel(2,0) + + input(i,j+0,k+1,l) * kernel(0,1) + + input(i,j+1,k+1,l) * kernel(1,1) + + input(i,j+2,k+1,l) * kernel(2,1) + + input(i,j+0,k+2,l) * kernel(0,2) + + input(i,j+1,k+2,l) * kernel(1,2) + + input(i,j+2,k+2,l) * kernel(2,2) + + input(i,j+0,k+3,l) * kernel(0,3) + + input(i,j+1,k+3,l) * kernel(1,3) + + input(i,j+2,k+3,l) * kernel(2,3); + VERIFY_IS_APPROX(result, expected); + } + } + } } } - -void test_cuda_cast() +template +static void test_cuda_convolution_3d() { - Tensor in(Eigen::array(72,53,97)); - Tensor out(Eigen::array(72,53,97)); - in.setRandom(); + Tensor input(Eigen::array(74,37,11,137,17)); + Tensor kernel(3,4,2); + Tensor out(Eigen::array(74,35,8,136,17)); + input = input.constant(10.0f) + input.random(); + kernel = kernel.constant(7.0f) + kernel.random(); - std::size_t in_bytes = in.size() * sizeof(double); + std::size_t input_bytes = input.size() * sizeof(float); + std::size_t kernel_bytes = kernel.size() * sizeof(float); std::size_t out_bytes = out.size() * sizeof(float); - double* d_in; + float* d_input; + float* d_kernel; float* d_out; - cudaMalloc((void**)(&d_in), in_bytes); + cudaMalloc((void**)(&d_input), input_bytes); + cudaMalloc((void**)(&d_kernel), kernel_bytes); cudaMalloc((void**)(&d_out), out_bytes); - cudaMemcpy(d_in, in.data(), in_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice); cudaStream_t stream; assert(cudaStreamCreate(&stream) == cudaSuccess); Eigen::GpuDevice gpu_device(&stream); - Eigen::TensorMap > gpu_in(d_in, Eigen::array(72,53,97)); - Eigen::TensorMap > gpu_out(d_out, Eigen::array(72,53,97)); + Eigen::TensorMap > gpu_input(d_input,74,37,11,137,17); + Eigen::TensorMap > gpu_kernel(d_kernel,3,4,2); + Eigen::TensorMap > gpu_out(d_out,74,35,8,136,17); - gpu_out.device(gpu_device) = gpu_in.template cast(); + Eigen::array dims(1,2,3); + gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); - for (int i = 0; i < 72; ++i) { - for (int j = 0; j < 53; ++j) { - for (int k = 0; k < 97; ++k) { - VERIFY_IS_APPROX(out(Eigen::array(i,j,k)), static_cast(in(Eigen::array(i,j,k)))); + for (int i = 0; i < 74; ++i) { + for (int j = 0; j < 35; ++j) { + for (int k = 0; k < 8; ++k) { + for (int l = 0; l < 136; ++l) { + for (int m = 0; m < 17; ++m) { + const float result = out(i,j,k,l,m); + const float expected = input(i,j+0,k+0,l+0,m) * kernel(0,0,0) + + input(i,j+1,k+0,l+0,m) * kernel(1,0,0) + + input(i,j+2,k+0,l+0,m) * kernel(2,0,0) + + input(i,j+0,k+1,l+0,m) * kernel(0,1,0) + + input(i,j+1,k+1,l+0,m) * kernel(1,1,0) + + input(i,j+2,k+1,l+0,m) * kernel(2,1,0) + + input(i,j+0,k+2,l+0,m) * kernel(0,2,0) + + input(i,j+1,k+2,l+0,m) * kernel(1,2,0) + + input(i,j+2,k+2,l+0,m) * kernel(2,2,0) + + input(i,j+0,k+3,l+0,m) * kernel(0,3,0) + + input(i,j+1,k+3,l+0,m) * kernel(1,3,0) + + input(i,j+2,k+3,l+0,m) * kernel(2,3,0) + + input(i,j+0,k+0,l+1,m) * kernel(0,0,1) + + input(i,j+1,k+0,l+1,m) * kernel(1,0,1) + + input(i,j+2,k+0,l+1,m) * kernel(2,0,1) + + input(i,j+0,k+1,l+1,m) * kernel(0,1,1) + + input(i,j+1,k+1,l+1,m) * kernel(1,1,1) + + input(i,j+2,k+1,l+1,m) * kernel(2,1,1) + + input(i,j+0,k+2,l+1,m) * kernel(0,2,1) + + input(i,j+1,k+2,l+1,m) * kernel(1,2,1) + + input(i,j+2,k+2,l+1,m) * kernel(2,2,1) + + input(i,j+0,k+3,l+1,m) * kernel(0,3,1) + + input(i,j+1,k+3,l+1,m) * kernel(1,3,1) + + input(i,j+2,k+3,l+1,m) * kernel(2,3,1); + VERIFY_IS_APPROX(result, expected); + } + } } } } } - void test_cxx11_tensor_cuda() { CALL_SUBTEST(test_cuda_elementwise_small()); @@ -506,9 +516,12 @@ void test_cxx11_tensor_cuda() CALL_SUBTEST(test_cuda_reduction()); CALL_SUBTEST(test_cuda_contraction()); CALL_SUBTEST(test_cuda_contraction()); - CALL_SUBTEST(test_cuda_convolution_1d()); - CALL_SUBTEST(test_cuda_convolution_2d()); - CALL_SUBTEST(test_cuda_convolution_3d()); - CALL_SUBTEST(test_cuda_constant_broadcast()); - CALL_SUBTEST(test_cuda_cast()); + CALL_SUBTEST(test_cuda_convolution_1d()); + CALL_SUBTEST(test_cuda_convolution_1d()); + CALL_SUBTEST(test_cuda_convolution_inner_dim_col_major_1d()); + CALL_SUBTEST(test_cuda_convolution_inner_dim_row_major_1d()); + CALL_SUBTEST(test_cuda_convolution_2d()); + CALL_SUBTEST(test_cuda_convolution_2d()); + CALL_SUBTEST(test_cuda_convolution_3d()); + CALL_SUBTEST(test_cuda_convolution_3d()); } -- cgit v1.2.3 From 20d030f207a8f92d75e8e4aca9d515b9939aa58a Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 31 Mar 2015 20:16:02 +0200 Subject: Fix vectorization of swap for non trivial expressions --- Eigen/src/Core/Swap.h | 8 ++++++-- Eigen/src/Core/functors/AssignmentFunctors.h | 8 -------- test/swap.cpp | 6 ++++-- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/Eigen/src/Core/Swap.h b/Eigen/src/Core/Swap.h index dcb42821f..3880f7b78 100644 --- a/Eigen/src/Core/Swap.h +++ b/Eigen/src/Core/Swap.h @@ -38,13 +38,17 @@ public: template void assignPacket(Index row, Index col) { - m_functor.template swapPacket(&m_dst.coeffRef(row,col), &const_cast(m_src).coeffRef(row,col)); + PacketScalar tmp = m_src.template packet(row,col); + const_cast(m_src).template writePacket(row,col, m_dst.template packet(row,col)); + m_dst.template writePacket(row,col,tmp); } template void assignPacket(Index index) { - m_functor.template swapPacket(&m_dst.coeffRef(index), &const_cast(m_src).coeffRef(index)); + PacketScalar tmp = m_src.template packet(index); + const_cast(m_src).template writePacket(index, m_dst.template packet(index)); + m_dst.template writePacket(index,tmp); } // TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael) diff --git a/Eigen/src/Core/functors/AssignmentFunctors.h b/Eigen/src/Core/functors/AssignmentFunctors.h index 161b0aa93..d55ae6096 100644 --- a/Eigen/src/Core/functors/AssignmentFunctors.h +++ b/Eigen/src/Core/functors/AssignmentFunctors.h @@ -150,14 +150,6 @@ template struct swap_assign_op { swap(a,const_cast(b)); #endif } - - template - EIGEN_STRONG_INLINE void swapPacket(Scalar* a, Scalar* b) const - { - Packet tmp = internal::ploadt(b); - internal::pstoret(b, internal::ploadt(a)); - internal::pstoret(a, tmp); - } }; template struct functor_traits > { diff --git a/test/swap.cpp b/test/swap.cpp index dc3610085..5d6f0e6af 100644 --- a/test/swap.cpp +++ b/test/swap.cpp @@ -82,8 +82,10 @@ template void swap(const MatrixType& m) void test_swap() { + int s = internal::random(1,EIGEN_TEST_MAX_SIZE); CALL_SUBTEST_1( swap(Matrix3f()) ); // fixed size, no vectorization CALL_SUBTEST_2( swap(Matrix4d()) ); // fixed size, possible vectorization - CALL_SUBTEST_3( swap(MatrixXd(3,3)) ); // dyn size, no vectorization - CALL_SUBTEST_4( swap(MatrixXf(30,30)) ); // dyn size, possible vectorization + CALL_SUBTEST_3( swap(MatrixXd(s,s)) ); // dyn size, no vectorization + CALL_SUBTEST_4( swap(MatrixXf(s,s)) ); // dyn size, possible vectorization + TEST_SET_BUT_UNUSED_VARIABLE(s) } -- cgit v1.2.3 From dfb674a25ead137118eebf0230c4c8a4c81db5d0 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 31 Mar 2015 20:17:10 +0200 Subject: Make reverseInPlace really work in-place. --- Eigen/src/Core/Reverse.h | 21 ++++++++++++++++++++- test/array_reverse.cpp | 22 +++++++++++++++++----- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/Eigen/src/Core/Reverse.h b/Eigen/src/Core/Reverse.h index 291300a4a..b3fba9704 100644 --- a/Eigen/src/Core/Reverse.h +++ b/Eigen/src/Core/Reverse.h @@ -210,7 +210,26 @@ DenseBase::reverse() const template inline void DenseBase::reverseInPlace() { - derived() = derived().reverse().eval(); + if(cols()>rows()) + { + Index half = cols()/2; + leftCols(half).swap(rightCols(half).reverse()); + if((cols()%2)==1) + { + Index half2 = rows()/2; + col(half).head(half2).swap(col(half).tail(half2).reverse()); + } + } + else + { + Index half = rows()/2; + topRows(half).swap(bottomRows(half).reverse()); + if((rows()%2)==1) + { + Index half2 = cols()/2; + row(half).head(half2).swap(row(half).tail(half2).reverse()); + } + } } } // end namespace Eigen diff --git a/test/array_reverse.cpp b/test/array_reverse.cpp index fbe7a9901..9ba19246b 100644 --- a/test/array_reverse.cpp +++ b/test/array_reverse.cpp @@ -24,7 +24,7 @@ template void reverse(const MatrixType& m) // this test relies a lot on Random.h, and there's not much more that we can do // to test it, hence I consider that we will have tested Random.h - MatrixType m1 = MatrixType::Random(rows, cols); + MatrixType m1 = MatrixType::Random(rows, cols), m2; VectorType v1 = VectorType::Random(rows); MatrixType m1_r = m1.reverse(); @@ -96,6 +96,18 @@ template void reverse(const MatrixType& m) m1.reverse()(r, c) = x; VERIFY_IS_APPROX(x, m1(rows - 1 - r, cols - 1 - c)); + + m2 = m1; + m2.reverseInPlace(); + VERIFY_IS_APPROX(m2,m1.reverse().eval()); + + m2 = m1; + m2.col(0).reverseInPlace(); + VERIFY_IS_APPROX(m2.col(0),m1.col(0).reverse().eval()); + + m2 = m1; + m2.row(0).reverseInPlace(); + VERIFY_IS_APPROX(m2.row(0),m1.row(0).reverse().eval()); /* m1.colwise().reverse()(r, c) = x; @@ -113,11 +125,11 @@ void test_array_reverse() CALL_SUBTEST_2( reverse(Matrix2f()) ); CALL_SUBTEST_3( reverse(Matrix4f()) ); CALL_SUBTEST_4( reverse(Matrix4d()) ); - CALL_SUBTEST_5( reverse(MatrixXcf(3, 3)) ); - CALL_SUBTEST_6( reverse(MatrixXi(6, 3)) ); - CALL_SUBTEST_7( reverse(MatrixXcd(20, 20)) ); + CALL_SUBTEST_5( reverse(MatrixXcf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_6( reverse(MatrixXi(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_7( reverse(MatrixXcd(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); CALL_SUBTEST_8( reverse(Matrix()) ); - CALL_SUBTEST_9( reverse(Matrix(6,3)) ); + CALL_SUBTEST_9( reverse(Matrix(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); } #ifdef EIGEN_TEST_PART_3 Vector4f x; x << 1, 2, 3, 4; -- cgit v1.2.3 From 8313fb7df7f5f116834b412d6a6f5aff8862a173 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 31 Mar 2015 21:35:53 +0200 Subject: Add row/column-wise reverseInPlace feature. --- Eigen/src/Core/Reverse.h | 52 ++++++++++++++++++++++++++++++++++++++++--- Eigen/src/Core/VectorwiseOp.h | 2 ++ test/array_reverse.cpp | 8 +++++++ 3 files changed, 59 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/Reverse.h b/Eigen/src/Core/Reverse.h index b3fba9704..5237fbf1c 100644 --- a/Eigen/src/Core/Reverse.h +++ b/Eigen/src/Core/Reverse.h @@ -200,13 +200,13 @@ DenseBase::reverse() const * In most cases it is probably better to simply use the reversed expression * of a matrix. However, when reversing the matrix data itself is really needed, * then this "in-place" version is probably the right choice because it provides - * the following additional features: + * the following additional benefits: * - less error prone: doing the same operation with .reverse() requires special care: * \code m = m.reverse().eval(); \endcode - * - this API allows to avoid creating a temporary (the current implementation creates a temporary, but that could be avoided using swap) + * - this API enables reverse operations without the need for a temporary * - it allows future optimizations (cache friendliness, etc.) * - * \sa reverse() */ + * \sa VectorwiseOp::reverseInPlace(), reverse() */ template inline void DenseBase::reverseInPlace() { @@ -232,6 +232,52 @@ inline void DenseBase::reverseInPlace() } } +namespace internal { + +template +struct vectorwise_reverse_inplace_impl; + +template<> +struct vectorwise_reverse_inplace_impl +{ + template + static void run(ExpressionType &xpr) + { + Index half = xpr.rows()/2; + xpr.topRows(half).swap(xpr.bottomRows(half).colwise().reverse()); + } +}; + +template<> +struct vectorwise_reverse_inplace_impl +{ + template + static void run(ExpressionType &xpr) + { + Index half = xpr.cols()/2; + xpr.leftCols(half).swap(xpr.rightCols(half).rowwise().reverse()); + } +}; + +} // end namespace internal + +/** This is the "in place" version of VectorwiseOp::reverse: it reverses each column or row of \c *this. + * + * In most cases it is probably better to simply use the reversed expression + * of a matrix. However, when reversing the matrix data itself is really needed, + * then this "in-place" version is probably the right choice because it provides + * the following additional benefits: + * - less error prone: doing the same operation with .reverse() requires special care: + * \code m = m.reverse().eval(); \endcode + * - this API enables reverse operations without the need for a temporary + * + * \sa DenseBase::reverseInPlace(), reverse() */ +template +void VectorwiseOp::reverseInPlace() +{ + internal::vectorwise_reverse_inplace_impl::run(_expression().const_cast_derived()); +} + } // end namespace Eigen #endif // EIGEN_REVERSE_H diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h index a15777a5e..ea3d8f4b1 100644 --- a/Eigen/src/Core/VectorwiseOp.h +++ b/Eigen/src/Core/VectorwiseOp.h @@ -562,6 +562,8 @@ template class VectorwiseOp void normalize() { m_matrix = this->normalized(); } + + inline void reverseInPlace(); /////////// Geometry module /////////// diff --git a/test/array_reverse.cpp b/test/array_reverse.cpp index 9ba19246b..a5c0d37f9 100644 --- a/test/array_reverse.cpp +++ b/test/array_reverse.cpp @@ -108,6 +108,14 @@ template void reverse(const MatrixType& m) m2 = m1; m2.row(0).reverseInPlace(); VERIFY_IS_APPROX(m2.row(0),m1.row(0).reverse().eval()); + + m2 = m1; + m2.rowwise().reverseInPlace(); + VERIFY_IS_APPROX(m2,m1.rowwise().reverse().eval()); + + m2 = m1; + m2.colwise().reverseInPlace(); + VERIFY_IS_APPROX(m2,m1.colwise().reverse().eval()); /* m1.colwise().reverse()(r, c) = x; -- cgit v1.2.3 From 3c38589984b784687944872534f48f4e0ae22d6c Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 31 Mar 2015 22:54:47 +0200 Subject: Remove most of the dynamic memory allocations that occured in D&C SVD. Still remains the calls to JacobiSVD and UpperBidiagonalization. --- Eigen/IterativeLinearSolvers | 2 +- Eigen/src/SVD/BDCSVD.h | 94 +++++++++++++++++++++++++++----------------- 2 files changed, 59 insertions(+), 37 deletions(-) diff --git a/Eigen/IterativeLinearSolvers b/Eigen/IterativeLinearSolvers index 7fab9eed0..f5fdcd9e5 100644 --- a/Eigen/IterativeLinearSolvers +++ b/Eigen/IterativeLinearSolvers @@ -17,7 +17,7 @@ * * These iterative solvers are associated with some preconditioners: * - IdentityPreconditioner - not really useful - * - DiagonalPreconditioner - also called JAcobi preconditioner, work very well on diagonal dominant matrices. + * - DiagonalPreconditioner - also called Jacobi preconditioner, work very well on diagonal dominant matrices. * - IncompleteLUT - incomplete LU factorization with dual thresholding * * Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport. diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h index cace915e7..9b141c8df 100644 --- a/Eigen/src/SVD/BDCSVD.h +++ b/Eigen/src/SVD/BDCSVD.h @@ -84,6 +84,8 @@ public: typedef Matrix VectorType; typedef Array ArrayXr; typedef Array ArrayXi; + typedef Ref ArrayRef; + typedef Ref IndicesRef; /** \brief Default Constructor. * @@ -159,21 +161,23 @@ private: void allocate(Index rows, Index cols, unsigned int computationOptions); void divide(Index firstCol, Index lastCol, Index firstRowW, Index firstColW, Index shift); void computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V); - void computeSingVals(const ArrayXr& col0, const ArrayXr& diag, const ArrayXi& perm, VectorType& singVals, ArrayXr& shifts, ArrayXr& mus); - void perturbCol0(const ArrayXr& col0, const ArrayXr& diag, const ArrayXi& perm, const VectorType& singVals, const ArrayXr& shifts, const ArrayXr& mus, ArrayXr& zhat); - void computeSingVecs(const ArrayXr& zhat, const ArrayXr& diag, const ArrayXi& perm, const VectorType& singVals, const ArrayXr& shifts, const ArrayXr& mus, MatrixXr& U, MatrixXr& V); + void computeSingVals(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm, VectorType& singVals, ArrayRef shifts, ArrayRef mus); + void perturbCol0(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm, const VectorType& singVals, const ArrayRef& shifts, const ArrayRef& mus, ArrayRef zhat); + void computeSingVecs(const ArrayRef& zhat, const ArrayRef& diag, const IndicesRef& perm, const VectorType& singVals, const ArrayRef& shifts, const ArrayRef& mus, MatrixXr& U, MatrixXr& V); void deflation43(Index firstCol, Index shift, Index i, Index size); void deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size); void deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift); template void copyUV(const HouseholderU &householderU, const HouseholderV &householderV, const NaiveU &naiveU, const NaiveV &naivev); - static void structured_update(Block A, const MatrixXr &B, Index n1); - static RealScalar secularEq(RealScalar x, const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm, const ArrayXr& diagShifted, RealScalar shift); + void structured_update(Block A, const MatrixXr &B, Index n1); + static RealScalar secularEq(RealScalar x, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift); protected: MatrixXr m_naiveU, m_naiveV; MatrixXr m_computed; Index m_nRec; + ArrayXr m_workspace; + ArrayXi m_workspaceI; int m_algoswap; bool m_isTranspose, m_compU, m_compV; @@ -212,6 +216,9 @@ void BDCSVD::allocate(Index rows, Index cols, unsigned int computati else m_naiveU = MatrixXr::Zero(2, m_diagSize + 1 ); if (m_compV) m_naiveV = MatrixXr::Zero(m_diagSize, m_diagSize); + + m_workspace.resize((m_diagSize+1)*(m_diagSize+1)*3); + m_workspaceI.resize(3*m_diagSize); }// end allocate template @@ -226,6 +233,7 @@ BDCSVD& BDCSVD::compute(const MatrixType& matrix, unsign //**** step -1 - If the problem is too small, directly falls back to JacobiSVD and return if(matrix.cols() < m_algoswap) { + // FIXME this line involves temporaries JacobiSVD jsvd(matrix,computationOptions); if(computeU()) m_matrixU = jsvd.matrixU(); if(computeV()) m_matrixV = jsvd.matrixV(); @@ -243,11 +251,13 @@ BDCSVD& BDCSVD::compute(const MatrixType& matrix, unsign else copy = matrix/scale; //**** step 1 - Bidiagonalization + // FIXME this line involves temporaries internal::UpperBidiagonalization bid(copy); //**** step 2 - Divide & Conquer m_naiveU.setZero(); m_naiveV.setZero(); + // FIXME this line involves a temporary matrix m_computed.topRows(m_diagSize) = bid.bidiagonal().toDenseMatrix().transpose(); m_computed.template bottomRows<1>().setZero(); divide(0, m_diagSize - 1, 0, 0, 0); @@ -292,14 +302,14 @@ void BDCSVD::copyUV(const HouseholderU &householderU, const Househol Index Ucols = m_computeThinU ? m_diagSize : householderU.cols(); m_matrixU = MatrixX::Identity(householderU.cols(), Ucols); m_matrixU.topLeftCorner(m_diagSize, m_diagSize) = naiveV.template cast().topLeftCorner(m_diagSize, m_diagSize); - householderU.applyThisOnTheLeft(m_matrixU); + householderU.applyThisOnTheLeft(m_matrixU); // FIXME this line involves a temporary buffer } if (computeV()) { Index Vcols = m_computeThinV ? m_diagSize : householderV.cols(); m_matrixV = MatrixX::Identity(householderV.cols(), Vcols); m_matrixV.topLeftCorner(m_diagSize, m_diagSize) = naiveU.template cast().topLeftCorner(m_diagSize, m_diagSize); - householderV.applyThisOnTheLeft(m_matrixV); + householderV.applyThisOnTheLeft(m_matrixV); // FIXME this line involves a temporary buffer } } @@ -320,7 +330,10 @@ void BDCSVD::structured_update(Block A, co // If the matrices are large enough, let's exploit the sparse structure of A by // splitting it in half (wrt n1), and packing the non-zero columns. Index n2 = n - n1; - MatrixXr A1(n1,n), A2(n2,n), B1(n,n), B2(n,n); + Map A1(m_workspace.data() , n1, n); + Map A2(m_workspace.data()+ n1*n, n2, n); + Map B1(m_workspace.data()+ n*n, n, n); + Map B2(m_workspace.data()+2*n*n, n, n); Index k1=0, k2=0; for(Index j=0; j::structured_update(Block A, co A.bottomRows(n2).noalias() = A2.leftCols(k2) * B2.topRows(k2); } else - A *= B; // FIXME this requires a temporary + { + Map tmp(m_workspace.data(),n,n); + tmp.noalias() = A*B; + A = tmp; + } } // The divide algorithm is done "in place", we are always working on subsets of the same matrix. The divide methods takes as argument the @@ -373,7 +390,8 @@ void BDCSVD::divide (Index firstCol, Index lastCol, Index firstRowW, // matrices. if (n < m_algoswap) { - JacobiSVD b(m_computed.block(firstCol, firstCol, n + 1, n), ComputeFullU | (m_compV ? ComputeFullV : 0)) ; + // FIXME this line involves temporaries + JacobiSVD b(m_computed.block(firstCol, firstCol, n + 1, n), ComputeFullU | (m_compV ? ComputeFullV : 0)); if (m_compU) m_naiveU.block(firstCol, firstCol, n + 1, n + 1).real() = b.matrixU(); else @@ -504,8 +522,14 @@ void BDCSVD::divide (Index firstCol, Index lastCol, Index firstRowW, assert(VofSVD.allFinite()); #endif - if (m_compU) structured_update(m_naiveU.block(firstCol, firstCol, n + 1, n + 1), UofSVD, (n+2)/2); - else m_naiveU.middleCols(firstCol, n + 1) *= UofSVD; // FIXME this requires a temporary, and exploit that there are 2 rows at compile time + if (m_compU) + structured_update(m_naiveU.block(firstCol, firstCol, n + 1, n + 1), UofSVD, (n+2)/2); + else + { + Map,Aligned> tmp(m_workspace.data(),2,n+1); + tmp.noalias() = m_naiveU.middleCols(firstCol, n+1) * UofSVD; + m_naiveU.middleCols(firstCol, n + 1) = tmp; + } if (m_compV) structured_update(m_naiveV.block(firstRowW, firstColW, n, n), VofSVD, (n+1)/2); @@ -530,10 +554,9 @@ void BDCSVD::divide (Index firstCol, Index lastCol, Index firstRowW, template void BDCSVD::computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V) { - // TODO Get rid of these copies (?) - // FIXME at least preallocate them - ArrayXr col0 = m_computed.col(firstCol).segment(firstCol, n); - ArrayXr diag = m_computed.block(firstCol, firstCol, n, n).diagonal(); + ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n); + m_workspace.head(n) = m_computed.block(firstCol, firstCol, n, n).diagonal(); + ArrayRef diag = m_workspace.head(n); diag(0) = 0; // Allocate space for singular values and vectors @@ -552,13 +575,14 @@ void BDCSVD::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec Index actual_n = n; while(actual_n>1 && diag(actual_n-1)==0) --actual_n; Index m = 0; // size of the deflated problem - ArrayXi perm(actual_n); for(Index k=0;k perm(m_workspaceI.data(),m); - ArrayXr shifts(n), mus(n), zhat(n); + Map shifts(m_workspace.data()+1*n, n); + Map mus(m_workspace.data()+2*n, n); + Map zhat(m_workspace.data()+3*n, n); #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE std::cout << "computeSVDofM using:\n"; @@ -635,8 +659,8 @@ void BDCSVD::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec // Reverse order so that singular values in increased order // Because of deflation, the zeros singular-values are already at the end singVals.head(actual_n).reverseInPlace(); - U.leftCols(actual_n) = U.leftCols(actual_n).rowwise().reverse().eval(); // FIXME this requires a temporary - if (m_compV) V.leftCols(actual_n) = V.leftCols(actual_n).rowwise().reverse().eval(); // FIXME this requires a temporary + U.leftCols(actual_n).rowwise().reverseInPlace(); + if (m_compV) V.leftCols(actual_n).rowwise().reverseInPlace(); #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE JacobiSVD jsvd(m_computed.block(firstCol, firstCol, n, n) ); @@ -647,7 +671,7 @@ void BDCSVD::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec } template -typename BDCSVD::RealScalar BDCSVD::secularEq(RealScalar mu, const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm, const ArrayXr& diagShifted, RealScalar shift) +typename BDCSVD::RealScalar BDCSVD::secularEq(RealScalar mu, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift) { Index m = perm.size(); RealScalar res = 1; @@ -660,8 +684,8 @@ typename BDCSVD::RealScalar BDCSVD::secularEq(RealScalar } template -void BDCSVD::computeSingVals(const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm, - VectorType& singVals, ArrayXr& shifts, ArrayXr& mus) +void BDCSVD::computeSingVals(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, + VectorType& singVals, ArrayRef shifts, ArrayRef mus) { using std::abs; using std::swap; @@ -716,7 +740,8 @@ void BDCSVD::computeSingVals(const ArrayXr& col0, const ArrayXr& dia RealScalar shift = (k == actual_n-1 || fMid > 0) ? left : right; // measure everything relative to shift - ArrayXr diagShifted = diag - shift; + Map diagShifted(m_workspace.data()+4*n, n); + diagShifted = diag - shift; // initial guess RealScalar muPrev, muCur; @@ -831,8 +856,8 @@ void BDCSVD::computeSingVals(const ArrayXr& col0, const ArrayXr& dia // zhat is perturbation of col0 for which singular vectors can be computed stably (see Section 3.1) template void BDCSVD::perturbCol0 - (const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm, const VectorType& singVals, - const ArrayXr& shifts, const ArrayXr& mus, ArrayXr& zhat) + (const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const VectorType& singVals, + const ArrayRef& shifts, const ArrayRef& mus, ArrayRef zhat) { using std::sqrt; Index n = col0.size(); @@ -880,8 +905,8 @@ void BDCSVD::perturbCol0 // compute singular vectors template void BDCSVD::computeSingVecs - (const ArrayXr& zhat, const ArrayXr& diag, const ArrayXi &perm, const VectorType& singVals, - const ArrayXr& shifts, const ArrayXr& mus, MatrixXr& U, MatrixXr& V) + (const ArrayRef& zhat, const ArrayRef& diag, const IndicesRef &perm, const VectorType& singVals, + const ArrayRef& shifts, const ArrayRef& mus, MatrixXr& U, MatrixXr& V) { Index n = zhat.size(); Index m = perm.size(); @@ -1062,7 +1087,7 @@ void BDCSVD::deflation(Index firstCol, Index lastCol, Index k, Index // Sort the diagonal entries, since diag(1:k-1) and diag(k:length) are already sorted, let's do a sorted merge. // First, compute the respective permutation. - Index *permutation = new Index[length]; // FIXME avoid repeated dynamic memory allocation + Index *permutation = m_workspaceI.data(); { permutation[0] = 0; Index p = 1; @@ -1099,8 +1124,8 @@ void BDCSVD::deflation(Index firstCol, Index lastCol, Index k, Index } // Current index of each col, and current column of each index - Index *realInd = new Index[length]; // FIXME avoid repeated dynamic memory allocation - Index *realCol = new Index[length]; // FIXME avoid repeated dynamic memory allocation + Index *realInd = m_workspaceI.data()+length; + Index *realCol = m_workspaceI.data()+2*length; for(int pos = 0; pos< length; pos++) { @@ -1130,9 +1155,6 @@ void BDCSVD::deflation(Index firstCol, Index lastCol, Index k, Index realInd[J] = realI; realInd[i] = pi; } - delete[] permutation; - delete[] realInd; - delete[] realCol; } #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE std::cout << "sorted: " << diag.transpose().format(bdcsvdfmt) << "\n"; -- cgit v1.2.3 From 79b4e6acaf9f81155f4cdda9eb4bb6fe12aef5d7 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 31 Mar 2015 23:35:12 +0200 Subject: Fix bug #987: wrong alignement guess in diagonal product. --- Eigen/src/Core/ProductEvaluators.h | 3 +-- test/diagonalmatrices.cpp | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index d84e7776b..7960c274b 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -678,8 +678,7 @@ public: //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))), _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))), _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0, - Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit - //(int(MatrixFlags)&int(DiagFlags)&AlignedBit), + Flags = ((HereditaryBits|_LinearAccessMask|AlignedBit) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0) }; diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) diff --git a/test/diagonalmatrices.cpp b/test/diagonalmatrices.cpp index 0227ba577..cd6dc8cf0 100644 --- a/test/diagonalmatrices.cpp +++ b/test/diagonalmatrices.cpp @@ -17,6 +17,7 @@ template void diagonalmatrices(const MatrixType& m) typedef Matrix VectorType; typedef Matrix RowVectorType; typedef Matrix SquareMatrixType; + typedef Matrix DynMatrixType; typedef DiagonalMatrix LeftDiagonalMatrix; typedef DiagonalMatrix RightDiagonalMatrix; typedef Matrix BigMatrix; @@ -64,6 +65,13 @@ template void diagonalmatrices(const MatrixType& m) VERIFY_IS_APPROX( (((v1+v2).asDiagonal() * (m1+m2))(i,j)) , (v1+v2)(i) * (m1+m2)(i,j) ); VERIFY_IS_APPROX( ((m1 * (rv1+rv2).asDiagonal())(i,j)) , (rv1+rv2)(j) * m1(i,j) ); VERIFY_IS_APPROX( (((m1+m2) * (rv1+rv2).asDiagonal())(i,j)) , (rv1+rv2)(j) * (m1+m2)(i,j) ); + + if(rows>1) + { + DynMatrixType tmp = m1.topRows(rows/2), res; + VERIFY_IS_APPROX( (res = m1.topRows(rows/2) * rv1.asDiagonal()), tmp * rv1.asDiagonal() ); + VERIFY_IS_APPROX( (res = v1.head(rows/2).asDiagonal()*m1.topRows(rows/2)), v1.head(rows/2).asDiagonal()*tmp ); + } BigMatrix big; big.setZero(2*rows, 2*cols); @@ -93,6 +101,17 @@ template void diagonalmatrices(const MatrixType& m) VERIFY_IS_APPROX( (sq_m1 = (s1*v1).asDiagonal()), (s1*v1).asDiagonal().toDenseMatrix() ); } +template +void bug987() +{ + Matrix3Xd points = Matrix3Xd::Random(3, 3); + Vector2d diag = Vector2d::Random(); + Matrix2Xd tmp1 = points.topRows<2>(), res1, res2; + VERIFY_IS_APPROX( res1 = diag.asDiagonal() * points.topRows<2>(), res2 = diag.asDiagonal() * tmp1 ); + Matrix2d tmp2 = points.topLeftCorner<2,2>(); + VERIFY_IS_APPROX(( res1 = points.topLeftCorner<2,2>()*diag.asDiagonal()) , res2 = tmp2*diag.asDiagonal() ); +} + void test_diagonalmatrices() { for(int i = 0; i < g_repeat; i++) { @@ -106,4 +125,5 @@ void test_diagonalmatrices() CALL_SUBTEST_8( diagonalmatrices(Matrix(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); CALL_SUBTEST_9( diagonalmatrices(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); } + CALL_SUBTEST_10( bug987<0>() ); } -- cgit v1.2.3 From 8481dc21eada115b20116b17826a761208602b02 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 1 Apr 2015 13:15:23 +0200 Subject: bug #986: add support for coefficient-based product with 0 depth. --- Eigen/src/Core/ProductEvaluators.h | 41 +++++++++++++++++++++++++++----------- test/product_extra.cpp | 35 ++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 12 deletions(-) diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 7960c274b..22b5e024b 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -409,7 +409,8 @@ struct product_evaluator, ProductTag, DenseShape, LhsCoeffReadCost = LhsEtorType::CoeffReadCost, RhsCoeffReadCost = RhsEtorType::CoeffReadCost, - CoeffReadCost = (InnerSize == Dynamic || LhsCoeffReadCost==Dynamic || RhsCoeffReadCost==Dynamic || NumTraits::AddCost==Dynamic || NumTraits::MulCost==Dynamic) ? Dynamic + CoeffReadCost = InnerSize==0 ? NumTraits::ReadCost + : (InnerSize == Dynamic || LhsCoeffReadCost==Dynamic || RhsCoeffReadCost==Dynamic || NumTraits::AddCost==Dynamic || NumTraits::MulCost==Dynamic) ? Dynamic : InnerSize * (NumTraits::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) + (InnerSize - 1) * NumTraits::AddCost, @@ -484,7 +485,7 @@ struct product_evaluator, ProductTag, DenseShape, { PacketScalar res; typedef etor_product_packet_impl PacketImpl; PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); @@ -527,7 +528,7 @@ struct etor_product_packet_impl::run(row, col, lhs, rhs, innerDim, res); - res = pmadd(pset1(lhs.coeff(row, UnrollingIndex)), rhs.template packet(UnrollingIndex, col), res); + res = pmadd(pset1(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet(UnrollingIndex-1, col), res); } }; @@ -537,12 +538,12 @@ struct etor_product_packet_impl::run(row, col, lhs, rhs, innerDim, res); - res = pmadd(lhs.template packet(row, UnrollingIndex), pset1(rhs.coeff(UnrollingIndex, col)), res); + res = pmadd(lhs.template packet(row, UnrollingIndex-1), pset1(rhs.coeff(UnrollingIndex-1, col)), res); } }; template -struct etor_product_packet_impl +struct etor_product_packet_impl { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) { @@ -551,7 +552,7 @@ struct etor_product_packet_impl }; template -struct etor_product_packet_impl +struct etor_product_packet_impl { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) { @@ -559,14 +560,31 @@ struct etor_product_packet_impl } }; +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) + { + res = pset1(0); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) + { + res = pset1(0); + } +}; + template struct etor_product_packet_impl { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) { - eigen_assert(innerDim>0 && "you are using a non initialized matrix"); - res = pmul(pset1(lhs.coeff(row, 0)),rhs.template packet(0, col)); - for(Index i = 1; i < innerDim; ++i) + res = pset1(0); + for(Index i = 0; i < innerDim; ++i) res = pmadd(pset1(lhs.coeff(row, i)), rhs.template packet(i, col), res); } }; @@ -576,9 +594,8 @@ struct etor_product_packet_impl { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) { - eigen_assert(innerDim>0 && "you are using a non initialized matrix"); - res = pmul(lhs.template packet(row, 0), pset1(rhs.coeff(0, col))); - for(Index i = 1; i < innerDim; ++i) + res = pset1(0); + for(Index i = 0; i < innerDim; ++i) res = pmadd(lhs.template packet(row, i), pset1(rhs.coeff(i, col)), res); } }; diff --git a/test/product_extra.cpp b/test/product_extra.cpp index 67ea13568..7c54b6977 100644 --- a/test/product_extra.cpp +++ b/test/product_extra.cpp @@ -113,6 +113,9 @@ void mat_mat_scalar_scalar_product() template void zero_sized_objects(const MatrixType& m) { + typedef typename MatrixType::Scalar Scalar; + const int PacketSize = internal::packet_traits::size; + const int PacketSize1 = PacketSize>1 ? PacketSize-1 : 1; Index rows = m.rows(); Index cols = m.cols(); @@ -132,6 +135,38 @@ void zero_sized_objects(const MatrixType& m) res = b*a; VERIFY(res.rows()==0 && res.cols()==cols); } + + { + Matrix a; + Matrix b; + Matrix res; + VERIFY_IS_APPROX( (res=a*b), MatrixType::Zero(PacketSize,1) ); + VERIFY_IS_APPROX( (res=a.lazyProduct(b)), MatrixType::Zero(PacketSize,1) ); + } + + { + Matrix a; + Matrix b; + Matrix res; + VERIFY_IS_APPROX( (res=a*b), MatrixType::Zero(PacketSize1,1) ); + VERIFY_IS_APPROX( (res=a.lazyProduct(b)), MatrixType::Zero(PacketSize1,1) ); + } + + { + Matrix a(PacketSize,0); + Matrix b(0,1); + Matrix res; + VERIFY_IS_APPROX( (res=a*b), MatrixType::Zero(PacketSize,1) ); + VERIFY_IS_APPROX( (res=a.lazyProduct(b)), MatrixType::Zero(PacketSize,1) ); + } + + { + Matrix a(PacketSize1,0); + Matrix b(0,1); + Matrix res; + VERIFY_IS_APPROX( (res=a*b), MatrixType::Zero(PacketSize1,1) ); + VERIFY_IS_APPROX( (res=a.lazyProduct(b)), MatrixType::Zero(PacketSize1,1) ); + } } template -- cgit v1.2.3 From 39dcd01b0ac8556d1d46d5d897bdefa82cf5d91c Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 1 Apr 2015 13:55:09 +0200 Subject: bug #973: enable alignment of multiples of half-packet size (e.g., Vector6d with AVX) --- Eigen/src/Core/DenseStorage.h | 10 +++++----- test/unalignedassert.cpp | 29 ++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h index ab41641f4..8fcc83a5a 100644 --- a/Eigen/src/Core/DenseStorage.h +++ b/Eigen/src/Core/DenseStorage.h @@ -35,22 +35,22 @@ void check_static_allocation_size() } template::type, - bool Match = bool((Size%unpacket_traits::size)==0), - bool TryHalf = bool(int(unpacket_traits::size) > Size) + bool Match = bool((Size%unpacket_traits::size)==0), + bool TryHalf = bool(int(unpacket_traits::size) > 1) && bool(int(unpacket_traits::size) > int(unpacket_traits::half>::size)) > struct compute_default_alignment { enum { value = 0 }; }; -template -struct compute_default_alignment // Match +template +struct compute_default_alignment // Match { enum { value = sizeof(T) * unpacket_traits::size }; }; template -struct compute_default_alignment +struct compute_default_alignment // Try-half { // current packet too large, try with an half-packet enum { value = compute_default_alignment::half>::value }; diff --git a/test/unalignedassert.cpp b/test/unalignedassert.cpp index 6f7b72167..9c6f0bc8f 100644 --- a/test/unalignedassert.cpp +++ b/test/unalignedassert.cpp @@ -9,7 +9,17 @@ #include "main.h" -typedef Matrix Vector8f; +typedef Matrix Vector6f; +typedef Matrix Vector8f; +typedef Matrix Vector12f; + +typedef Matrix Vector5d; +typedef Matrix Vector6d; +typedef Matrix Vector7d; +typedef Matrix Vector8d; +typedef Matrix Vector9d; +typedef Matrix Vector10d; +typedef Matrix Vector12d; struct TestNew1 { @@ -85,6 +95,9 @@ void unalignedassert() construct_at_boundary(4); construct_at_boundary(4); construct_at_boundary(16); + construct_at_boundary(4); + construct_at_boundary(EIGEN_ALIGN_BYTES); + construct_at_boundary(16); construct_at_boundary(16); construct_at_boundary(4); construct_at_boundary(EIGEN_ALIGN_BYTES); @@ -92,6 +105,13 @@ void unalignedassert() construct_at_boundary(16); construct_at_boundary(4); construct_at_boundary(EIGEN_ALIGN_BYTES); + construct_at_boundary(4); + construct_at_boundary(16); + construct_at_boundary(4); + construct_at_boundary(EIGEN_ALIGN_BYTES); + construct_at_boundary(4); + construct_at_boundary(16); + construct_at_boundary(EIGEN_ALIGN_BYTES); construct_at_boundary(EIGEN_ALIGN_BYTES); construct_at_boundary(4); construct_at_boundary(EIGEN_ALIGN_BYTES); @@ -115,7 +135,14 @@ void unalignedassert() if(EIGEN_ALIGN_BYTES>=16) { VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); VERIFY_RAISES_ASSERT(construct_at_boundary(8)); VERIFY_RAISES_ASSERT(construct_at_boundary(8)); } -- cgit v1.2.3 From 3105986e7125b659385ace69b95c1a38464cb157 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 1 Apr 2015 22:27:34 +0200 Subject: bug #875: remove broken SparseMatrixBase::nonZeros and introduce a nonZerosEstimate() method to sparse evaluators for internal uses. Factorize some code in SparseCompressedBase. --- .../SparseCore/ConservativeSparseSparseProduct.h | 8 +++--- Eigen/src/SparseCore/SparseBlock.h | 32 +++++++++++----------- Eigen/src/SparseCore/SparseCompressedBase.h | 23 ++++++++++++++++ Eigen/src/SparseCore/SparseCwiseBinaryOp.h | 20 ++++++++++++-- Eigen/src/SparseCore/SparseCwiseUnaryOp.h | 4 +++ Eigen/src/SparseCore/SparseMap.h | 3 -- Eigen/src/SparseCore/SparseMatrix.h | 12 +------- Eigen/src/SparseCore/SparseMatrixBase.h | 3 -- .../SparseCore/SparseSparseProductWithPruning.h | 16 +++++------ Eigen/src/SparseCore/SparseTranspose.h | 8 +++--- Eigen/src/SparseCore/SparseTriangularView.h | 11 +++----- Eigen/src/SparseCore/SparseVector.h | 4 +++ test/sparse_product.cpp | 5 +++- 13 files changed, 90 insertions(+), 59 deletions(-) diff --git a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h index 244f1b50e..d25a161f7 100644 --- a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +++ b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h @@ -30,16 +30,16 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r std::memset(mask,0,sizeof(bool)*rows); + typename evaluator::type lhsEval(lhs); + typename evaluator::type rhsEval(rhs); + // estimate the number of non zero entries // given a rhs column containing Y non zeros, we assume that the respective Y columns // of the lhs differs in average of one non zeros, thus the number of non zeros for // the product of a rhs column with the lhs is X+Y where X is the average number of non zero // per column of the lhs. // Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs) - Index estimated_nnz_prod = lhs.nonZeros() + rhs.nonZeros(); - - typename evaluator::type lhsEval(lhs); - typename evaluator::type rhsEval(rhs); + Index estimated_nnz_prod = lhsEval.nonZerosEstimate() + rhsEval.nonZerosEstimate(); res.setZero(); res.reserve(Index(estimated_nnz_prod)); diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h index e5ef10212..71f4b37b7 100644 --- a/Eigen/src/SparseCore/SparseBlock.h +++ b/Eigen/src/SparseCore/SparseBlock.h @@ -90,7 +90,8 @@ class sparse_matrix_block_impl typedef Block BlockType; public: enum { IsRowMajor = internal::traits::IsRowMajor }; - EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType) + typedef SparseCompressedBase > Base; + _EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType) protected: typedef typename Base::IndexVector IndexVector; enum { OuterSize = IsRowMajor ? BlockRows : BlockCols }; @@ -198,20 +199,9 @@ public: { return m_matrix.const_cast_derived().outerIndexPtr() + m_outerStart; } inline const StorageIndex* innerNonZeroPtr() const - { return isCompressed() ? 0 : m_matrix.innerNonZeroPtr(); } + { return isCompressed() ? 0 : (m_matrix.innerNonZeroPtr()+m_outerStart); } inline StorageIndex* innerNonZeroPtr() - { return isCompressed() ? 0 : m_matrix.const_cast_derived().innerNonZeroPtr(); } - - Index nonZeros() const - { - if(m_matrix.isCompressed()) - return ( (m_matrix.outerIndexPtr()[m_outerStart+m_outerSize.value()]) - - (m_matrix.outerIndexPtr()[m_outerStart])); - else if(m_outerSize.value()==0) - return 0; - else - return Map(m_matrix.innerNonZeroPtr()+m_outerStart, m_outerSize.value()).sum(); - } + { return isCompressed() ? 0 : (m_matrix.const_cast_derived().innerNonZeroPtr()+m_outerStart); } bool isCompressed() const { return m_matrix.innerNonZeroPtr()==0; } @@ -233,7 +223,7 @@ public: const Scalar& lastCoeff() const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(sparse_matrix_block_impl); - eigen_assert(nonZeros()>0); + eigen_assert(Base::nonZeros()>0); if(m_matrix.isCompressed()) return m_matrix.valuePtr()[m_matrix.outerIndexPtr()[m_outerStart+1]-1]; else @@ -417,6 +407,9 @@ public: protected: friend class internal::GenericSparseBlockInnerIteratorImpl; friend class ReverseInnerIterator; + friend struct internal::unary_evaluator, internal::IteratorBased, Scalar >; + + Index nonZeros() const { return Dynamic; } EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl) @@ -548,9 +541,16 @@ struct unary_evaluator, IteratorBa explicit unary_evaluator(const XprType& op) : m_argImpl(op.nestedExpression()), m_block(op) {} + + inline Index nonZerosEstimate() const { + Index nnz = m_block.nonZeros(); + if(nnz<0) + return m_argImpl.nonZerosEstimate() * m_block.size() / m_block.nestedExpression().size(); + return nnz; + } protected: - typedef typename evaluator::InnerIterator EvalIterator; + typedef typename evaluator::InnerIterator EvalIterator; typename evaluator::nestedType m_argImpl; const XprType &m_block; diff --git a/Eigen/src/SparseCore/SparseCompressedBase.h b/Eigen/src/SparseCore/SparseCompressedBase.h index a5ba45e04..0dbb94faf 100644 --- a/Eigen/src/SparseCore/SparseCompressedBase.h +++ b/Eigen/src/SparseCore/SparseCompressedBase.h @@ -35,6 +35,25 @@ class SparseCompressedBase class InnerIterator; class ReverseInnerIterator; + protected: + typedef typename Base::IndexVector IndexVector; + Eigen::Map innerNonZeros() { return Eigen::Map(innerNonZeroPtr(), isCompressed()?0:derived().outerSize()); } + const Eigen::Map innerNonZeros() const { return Eigen::Map(innerNonZeroPtr(), isCompressed()?0:derived().outerSize()); } + + public: + + /** \returns the number of non zero coefficients */ + inline Index nonZeros() const + { + if(isCompressed()) + return outerIndexPtr()[derived().outerSize()]-outerIndexPtr()[0]; + else if(derived().outerSize()==0) + return 0; + else + return innerNonZeros().sum(); + + } + /** \returns a const pointer to the array of values. * This function is aimed at interoperability with other libraries. * \sa innerIndexPtr(), outerIndexPtr() */ @@ -165,6 +184,10 @@ struct evaluator > evaluator() : m_matrix(0) {} explicit evaluator(const Derived &mat) : m_matrix(&mat) {} + inline Index nonZerosEstimate() const { + return m_matrix->nonZeros(); + } + operator Derived&() { return m_matrix->const_cast_derived(); } operator const Derived&() const { return *m_matrix; } diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h index 3b4e9df59..f53427abf 100644 --- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h @@ -121,6 +121,10 @@ public: m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) { } + + inline Index nonZerosEstimate() const { + return m_lhsImpl.nonZerosEstimate() + m_rhsImpl.nonZerosEstimate(); + } protected: const BinaryOp m_functor; @@ -198,6 +202,10 @@ public: m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) { } + + inline Index nonZerosEstimate() const { + return (std::min)(m_lhsImpl.nonZerosEstimate(), m_rhsImpl.nonZerosEstimate()); + } protected: const BinaryOp m_functor; @@ -243,7 +251,7 @@ public: EIGEN_STRONG_INLINE Index col() const { return m_rhsIter.col(); } EIGEN_STRONG_INLINE operator bool() const { return m_rhsIter; } - + protected: const LhsEvaluator &m_lhsEval; RhsIterator m_rhsIter; @@ -262,6 +270,10 @@ public: m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) { } + + inline Index nonZerosEstimate() const { + return m_rhsImpl.nonZerosEstimate(); + } protected: const BinaryOp m_functor; @@ -308,7 +320,7 @@ public: EIGEN_STRONG_INLINE Index col() const { return m_lhsIter.col(); } EIGEN_STRONG_INLINE operator bool() const { return m_lhsIter; } - + protected: LhsIterator m_lhsIter; const RhsEvaluator &m_rhsEval; @@ -327,6 +339,10 @@ public: m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) { } + + inline Index nonZerosEstimate() const { + return m_lhsImpl.nonZerosEstimate(); + } protected: const BinaryOp m_functor; diff --git a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h index 63d8f329c..d484be876 100644 --- a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h @@ -30,6 +30,10 @@ struct unary_evaluator, IteratorBased> }; explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) {} + + inline Index nonZerosEstimate() const { + return m_argImpl.nonZerosEstimate(); + } protected: typedef typename evaluator::InnerIterator EvalIterator; diff --git a/Eigen/src/SparseCore/SparseMap.h b/Eigen/src/SparseCore/SparseMap.h index a6ff7d559..7c512d9fe 100644 --- a/Eigen/src/SparseCore/SparseMap.h +++ b/Eigen/src/SparseCore/SparseMap.h @@ -105,9 +105,6 @@ class SparseMapBase return ((*r==inner) && (id Base; using Base::isCompressed; + using Base::nonZeros; _EIGEN_SPARSE_PUBLIC_INTERFACE(SparseMatrix) EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseMatrix, +=) EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseMatrix, -=) @@ -122,9 +123,6 @@ class SparseMatrix StorageIndex* m_outerIndex; StorageIndex* m_innerNonZeros; // optional, if null then the data is compressed Storage m_data; - - Eigen::Map innerNonZeros() { return Eigen::Map(m_innerNonZeros, m_innerNonZeros?m_outerSize:0); } - const Eigen::Map innerNonZeros() const { return Eigen::Map(m_innerNonZeros, m_innerNonZeros?m_outerSize:0); } public: @@ -252,14 +250,6 @@ class SparseMatrix memset(m_innerNonZeros, 0, (m_outerSize)*sizeof(StorageIndex)); } - /** \returns the number of non zero coefficients */ - inline Index nonZeros() const - { - if(m_innerNonZeros) - return innerNonZeros().sum(); - return convert_index(Index(m_data.size())); - } - /** Preallocates \a reserveSize non zeros. * * Precondition: the matrix must be in compressed mode. */ diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h index 55b0ad9d2..d4ab8b908 100644 --- a/Eigen/src/SparseCore/SparseMatrixBase.h +++ b/Eigen/src/SparseCore/SparseMatrixBase.h @@ -149,9 +149,6 @@ template class SparseMatrixBase : public EigenBase /** \returns the number of coefficients, which is \a rows()*cols(). * \sa rows(), cols(). */ inline Index size() const { return rows() * cols(); } - /** \returns the number of nonzero coefficients which is in practice the number - * of stored coefficients. */ - inline Index nonZeros() const { return derived().nonZeros(); } /** \returns true if either the number of rows or the number of columns is equal to 1. * In other words, this function returns * \code rows()==1 || cols()==1 \endcode diff --git a/Eigen/src/SparseCore/SparseSparseProductWithPruning.h b/Eigen/src/SparseCore/SparseSparseProductWithPruning.h index 3db01bf2d..48050077e 100644 --- a/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +++ b/Eigen/src/SparseCore/SparseSparseProductWithPruning.h @@ -33,14 +33,6 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r // allocate a temporary buffer AmbiVector tempVector(rows); - // estimate the number of non zero entries - // given a rhs column containing Y non zeros, we assume that the respective Y columns - // of the lhs differs in average of one non zeros, thus the number of non zeros for - // the product of a rhs column with the lhs is X+Y where X is the average number of non zero - // per column of the lhs. - // Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs) - Index estimated_nnz_prod = lhs.nonZeros() + rhs.nonZeros(); - // mimics a resizeByInnerOuter: if(ResultType::IsRowMajor) res.resize(cols, rows); @@ -49,6 +41,14 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r typename evaluator::type lhsEval(lhs); typename evaluator::type rhsEval(rhs); + + // estimate the number of non zero entries + // given a rhs column containing Y non zeros, we assume that the respective Y columns + // of the lhs differs in average of one non zeros, thus the number of non zeros for + // the product of a rhs column with the lhs is X+Y where X is the average number of non zero + // per column of the lhs. + // Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs) + Index estimated_nnz_prod = lhsEval.nonZerosEstimate() + rhsEval.nonZerosEstimate(); res.reserve(estimated_nnz_prod); double ratioColRes = double(estimated_nnz_prod)/double(lhs.rows()*rhs.cols()); diff --git a/Eigen/src/SparseCore/SparseTranspose.h b/Eigen/src/SparseCore/SparseTranspose.h index 45d9c6700..d3fc7f102 100644 --- a/Eigen/src/SparseCore/SparseTranspose.h +++ b/Eigen/src/SparseCore/SparseTranspose.h @@ -40,15 +40,11 @@ namespace internal { }; } -// Implement nonZeros() for transpose. I'm not sure that's the best approach for that. -// Perhaps it should be implemented in Transpose<> itself. template class TransposeImpl : public internal::SparseTransposeImpl { protected: typedef internal::SparseTransposeImpl Base; - public: - inline Index nonZeros() const { return Base::derived().nestedExpression().nonZeros(); } }; namespace internal { @@ -61,6 +57,10 @@ struct unary_evaluator, IteratorBased> typedef typename evaluator::ReverseInnerIterator EvalReverseIterator; public: typedef Transpose XprType; + + inline Index nonZerosEstimate() const { + return m_argImpl.nonZerosEstimate(); + } class InnerIterator : public EvalIterator { diff --git a/Eigen/src/SparseCore/SparseTriangularView.h b/Eigen/src/SparseCore/SparseTriangularView.h index b5fbcbdde..34ec07a13 100644 --- a/Eigen/src/SparseCore/SparseTriangularView.h +++ b/Eigen/src/SparseCore/SparseTriangularView.h @@ -50,13 +50,6 @@ protected: template void solveInPlace(MatrixBase& other) const; template void solveInPlace(SparseMatrixBase& other) const; - - inline Index nonZeros() const { - // FIXME HACK number of nonZeros is required for product logic - // this returns only an upper bound (but should be OK for most purposes) - return derived().nestedExpression().nonZeros(); - } - }; @@ -191,6 +184,10 @@ public: explicit unary_evaluator(const XprType &xpr) : m_argImpl(xpr.nestedExpression()) {} + inline Index nonZerosEstimate() const { + return m_argImpl.nonZerosEstimate(); + } + class InnerIterator : public EvalIterator { typedef EvalIterator Base; diff --git a/Eigen/src/SparseCore/SparseVector.h b/Eigen/src/SparseCore/SparseVector.h index 35bcec819..7b65f32bc 100644 --- a/Eigen/src/SparseCore/SparseVector.h +++ b/Eigen/src/SparseCore/SparseVector.h @@ -442,6 +442,10 @@ struct evaluator > explicit evaluator(const SparseVectorType &mat) : m_matrix(mat) {} + inline Index nonZerosEstimate() const { + return m_matrix.nonZeros(); + } + operator SparseVectorType&() { return m_matrix.const_cast_derived(); } operator const SparseVectorType&() const { return m_matrix; } diff --git a/test/sparse_product.cpp b/test/sparse_product.cpp index 480a660fc..3bad3def7 100644 --- a/test/sparse_product.cpp +++ b/test/sparse_product.cpp @@ -67,6 +67,9 @@ template void sparse_product() VERIFY_IS_APPROX(m4 = m2*m3/s1, refMat4 = refMat2*refMat3/s1); VERIFY_IS_APPROX(m4 = m2*m3*s1, refMat4 = refMat2*refMat3*s1); VERIFY_IS_APPROX(m4 = s2*m2*m3*s1, refMat4 = s2*refMat2*refMat3*s1); + VERIFY_IS_APPROX(m4 = (m2+m2)*m3, refMat4 = (refMat2+refMat2)*refMat3); + VERIFY_IS_APPROX(m4 = m2*m3.leftCols(cols/2), refMat4 = refMat2*refMat3.leftCols(cols/2)); + VERIFY_IS_APPROX(m4 = m2*(m3+m3).leftCols(cols/2), refMat4 = refMat2*(refMat3+refMat3).leftCols(cols/2)); VERIFY_IS_APPROX(m4=(m2*m3).pruned(0), refMat4=refMat2*refMat3); VERIFY_IS_APPROX(m4=(m2t.transpose()*m3).pruned(0), refMat4=refMat2t.transpose()*refMat3); @@ -194,7 +197,7 @@ template void sparse_product() VERIFY_IS_APPROX(d3=d1*m2.transpose(), refM3=d1*refM2.transpose()); } - // test self-adjoint and traingular-view products + // test self-adjoint and triangular-view products { DenseMatrix b = DenseMatrix::Random(rows, rows); DenseMatrix x = DenseMatrix::Random(rows, rows); -- cgit v1.2.3 From 5861cfb55e2242b13e474cdd86fa0c0e2b4ac7e9 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 1 Apr 2015 22:29:29 +0200 Subject: Remove unused GenericSparseBlockInnerIteratorImpl code. --- Eigen/src/SparseCore/SparseBlock.h | 101 +------------------------------------ 1 file changed, 1 insertion(+), 100 deletions(-) diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h index 71f4b37b7..778939791 100644 --- a/Eigen/src/SparseCore/SparseBlock.h +++ b/Eigen/src/SparseCore/SparseBlock.h @@ -329,17 +329,6 @@ SparseMatrixBase::innerVectors(Index outerStart, Index outerSize) const } -namespace internal { - -template< typename XprType, int BlockRows, int BlockCols, bool InnerPanel, - bool OuterVector = (BlockCols==1 && XprType::IsRowMajor) - | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&". - // revert to || as soon as not needed anymore. - (BlockRows==1 && !XprType::IsRowMajor)> -class GenericSparseBlockInnerIteratorImpl; - -} - /** Generic implementation of sparse Block expression. * Real-only. */ @@ -405,7 +394,7 @@ public: Index blockCols() const { return m_blockCols.value(); } protected: - friend class internal::GenericSparseBlockInnerIteratorImpl; +// friend class internal::GenericSparseBlockInnerIteratorImpl; friend class ReverseInnerIterator; friend struct internal::unary_evaluator, internal::IteratorBased, Scalar >; @@ -422,94 +411,6 @@ public: }; namespace internal { - template - class GenericSparseBlockInnerIteratorImpl : public Block::_MatrixTypeNested::InnerIterator - { - typedef Block BlockType; - enum { - IsRowMajor = BlockType::IsRowMajor - }; - typedef typename BlockType::_MatrixTypeNested _MatrixTypeNested; - typedef typename BlockType::StorageIndex StorageIndex; - typedef typename _MatrixTypeNested::InnerIterator Base; - const BlockType& m_block; - Index m_end; - public: - - EIGEN_STRONG_INLINE GenericSparseBlockInnerIteratorImpl(const BlockType& block, Index outer) - : Base(block.derived().nestedExpression(), outer + (IsRowMajor ? block.m_startRow.value() : block.m_startCol.value())), - m_block(block), - m_end(IsRowMajor ? block.m_startCol.value()+block.m_blockCols.value() : block.m_startRow.value()+block.m_blockRows.value()) - { - while( (Base::operator bool()) && (Base::index() < (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value())) ) - Base::operator++(); - } - - inline Index index() const { return Base::index() - (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()); } - inline Index outer() const { return Base::outer() - (IsRowMajor ? m_block.m_startRow.value() : m_block.m_startCol.value()); } - inline Index row() const { return Base::row() - m_block.m_startRow.value(); } - inline Index col() const { return Base::col() - m_block.m_startCol.value(); } - - inline operator bool() const { return Base::operator bool() && Base::index() < m_end; } - }; - - // Row vector of a column-major sparse matrix or column of a row-major one. - template - class GenericSparseBlockInnerIteratorImpl - { - typedef Block BlockType; - enum { - IsRowMajor = BlockType::IsRowMajor - }; - typedef typename BlockType::_MatrixTypeNested _MatrixTypeNested; - typedef typename BlockType::StorageIndex StorageIndex; - typedef typename BlockType::Scalar Scalar; - const BlockType& m_block; - Index m_outerPos; - Index m_innerIndex; - Scalar m_value; - Index m_end; - public: - - explicit EIGEN_STRONG_INLINE GenericSparseBlockInnerIteratorImpl(const BlockType& block, Index outer = 0) - : - m_block(block), - m_outerPos( (IsRowMajor ? block.m_startCol.value() : block.m_startRow.value()) - 1), // -1 so that operator++ finds the first non-zero entry - m_innerIndex(IsRowMajor ? block.m_startRow.value() : block.m_startCol.value()), - m_end(IsRowMajor ? block.m_startCol.value()+block.m_blockCols.value() : block.m_startRow.value()+block.m_blockRows.value()) - { - EIGEN_UNUSED_VARIABLE(outer); - eigen_assert(outer==0); - - ++(*this); - } - - inline Index index() const { return m_outerPos - (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()); } - inline Index outer() const { return 0; } - inline Index row() const { return IsRowMajor ? 0 : index(); } - inline Index col() const { return IsRowMajor ? index() : 0; } - - inline Scalar value() const { return m_value; } - - inline GenericSparseBlockInnerIteratorImpl& operator++() - { - // search next non-zero entry - while(++m_outerPos struct unary_evaluator, IteratorBased > -- cgit v1.2.3 From 383b6dfafeb4024b3199cfb09151b1bb7835110b Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 1 Apr 2015 16:44:36 -0700 Subject: Fixed 2 typos --- unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h index 5e805fd95..4d33aa2b6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -173,11 +173,11 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t IndexOfColMajor(const array& indices) const { - return internal::fixed_size_tensor_index_linearization_helper::run(indices, *static_cast(this); + return internal::fixed_size_tensor_index_linearization_helper::run(indices, *static_cast(this)); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t IndexOfRowMajor(const array& indices) const { - return internal::fixed_size_tensor_index_linearization_helper::run(indices, *static_cast(this); + return internal::fixed_size_tensor_index_linearization_helper::run(indices, *static_cast(this)); } }; -- cgit v1.2.3 From b8b78072696699875619829d422698a632ed7b1e Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 1 Apr 2015 21:48:18 -0700 Subject: Fixed some compilation warning triggered by the cxx11 emulation code --- unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h b/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h index 494f95690..9dea2055a 100644 --- a/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h +++ b/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h @@ -266,16 +266,16 @@ array repeat(t v) { } template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Head::type array_get(type_list& a) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Head::type array_get(type_list&) { return get >::value; } template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Head::type array_get(const type_list& a) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Head::type array_get(const type_list&) { return get >::value; } template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NList::HeadType::type array_prod(const NList& l) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NList::HeadType::type array_prod(const NList&) { return arg_prod::value; }; -- cgit v1.2.3 From 03a0df20100d2b89b38a70d3b0b7a15a4a44b5de Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 1 Apr 2015 22:51:33 -0700 Subject: Fixed some compilation warnings triggered by pre-cxx11 comoilers --- .../Eigen/CXX11/src/Tensor/TensorDimensions.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h index 4d33aa2b6..43917cbc3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -145,28 +145,28 @@ template - explicit Sizes(const array& indices) { + explicit Sizes(const array& /*indices*/) { // todo: add assertion } #ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template Sizes(DenseIndex... indices) { } - explicit Sizes(std::initializer_list l) { + template Sizes(DenseIndex... /*indices*/) { } + explicit Sizes(std::initializer_list) { // todo: add assertion } #else - EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0) { + EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex) { } - EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1) { + EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex, const DenseIndex) { } - EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2) { + EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex, const DenseIndex, const DenseIndex) { } - EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3) { + EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex) { } - EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3, const DenseIndex i4) { + EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex) { } #endif - template Sizes& operator = (const T& other) { + template Sizes& operator = (const T&) { // to do: check the size of other return *this; } @@ -343,7 +343,7 @@ template struct array_size > { static const size_t value = Sizes::count; }; -template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_get(const Sizes& a) { +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_get(const Sizes&) { return get::Base>::value; }; -- cgit v1.2.3 From 15b5adb327e4cdb571990d1a3daf33df9e38125b Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 2 Apr 2015 22:21:41 +0200 Subject: Fix regression in DynamicSparseMatrix and SuperLUSupport wrt recent change on nonZeros/nonZerosEstimate --- Eigen/src/SuperLUSupport/SuperLUSupport.h | 9 +++++---- unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h index b9d5e48fb..1bbd2758e 100644 --- a/Eigen/src/SuperLUSupport/SuperLUSupport.h +++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h @@ -165,8 +165,9 @@ struct SluMatrix : SuperMatrix } template - static SluMatrix Map(SparseMatrixBase& mat) + static SluMatrix Map(SparseMatrixBase& a_mat) { + MatrixType &mat(a_mat.derived()); SluMatrix res; if ((MatrixType::Flags&RowMajorBit)==RowMajorBit) { @@ -184,9 +185,9 @@ struct SluMatrix : SuperMatrix res.Mtype = SLU_GE; res.storage.nnz = internal::convert_index(mat.nonZeros()); - res.storage.values = mat.derived().valuePtr(); - res.storage.innerInd = mat.derived().innerIndexPtr(); - res.storage.outerInd = mat.derived().outerIndexPtr(); + res.storage.values = mat.valuePtr(); + res.storage.innerInd = mat.innerIndexPtr(); + res.storage.outerInd = mat.outerIndexPtr(); res.setScalarType(); diff --git a/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h b/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h index e1284c782..54e0c5d63 100644 --- a/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h +++ b/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h @@ -361,7 +361,6 @@ struct evaluator > : evaluator_base > { typedef _Scalar Scalar; - typedef _StorageIndex Index; typedef DynamicSparseMatrix<_Scalar,_Options,_StorageIndex> SparseMatrixType; typedef typename SparseMatrixType::InnerIterator InnerIterator; typedef typename SparseMatrixType::ReverseInnerIterator ReverseInnerIterator; @@ -378,6 +377,8 @@ struct evaluator > operator const SparseMatrixType&() const { return *m_matrix; } Scalar coeff(Index row, Index col) const { return m_matrix->coeff(row,col); } + + Index nonZerosEstimate() const { return m_matrix->nonZeros(); } const SparseMatrixType *m_matrix; }; -- cgit v1.2.3 From a1f1e1e51df316d1b37733770f5e7ab17006113a Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 6 Apr 2015 10:41:39 -0700 Subject: Fixed the order of 2 #includes --- unsupported/Eigen/CXX11/Tensor | 2 +- unsupported/test/cxx11_tensor_index_list.cpp | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index 200bcf966..ae6c3fe7e 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -49,8 +49,8 @@ #include "unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h" -#include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h" diff --git a/unsupported/test/cxx11_tensor_index_list.cpp b/unsupported/test/cxx11_tensor_index_list.cpp index c4d4f244f..4ce5add32 100644 --- a/unsupported/test/cxx11_tensor_index_list.cpp +++ b/unsupported/test/cxx11_tensor_index_list.cpp @@ -255,6 +255,17 @@ static void test_mixed_index_list() VERIFY_IS_APPROX(result3(0), expected); } + +static void test_dim_check() +{ + Eigen::IndexList, int> dim1; + dim1.set(1, 2); + Eigen::IndexList, int> dim2; + dim2.set(1, 2); + VERIFY(dimensions_match(dim1, dim2)); +} + + #endif void test_cxx11_tensor_index_list() @@ -264,5 +275,6 @@ void test_cxx11_tensor_index_list() CALL_SUBTEST(test_type2index_list()); CALL_SUBTEST(test_dynamic_index_list()); CALL_SUBTEST(test_mixed_index_list()); + CALL_SUBTEST(test_dim_check()); #endif } -- cgit v1.2.3 From 1de49ef4c2d96acc1c96628fa52e2330cf54dc19 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 7 Apr 2015 10:44:13 -0700 Subject: Fixed a bug when chipping tensors laid out in row major order. --- .../Eigen/CXX11/src/Tensor/TensorChipping.h | 6 ++- unsupported/test/cxx11_tensor_chipping.cpp | 52 +++++++++++++++++----- 2 files changed, 45 insertions(+), 13 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h index dc9586cbc..3b99ef069 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h @@ -157,6 +157,8 @@ struct TensorEvaluator, Device> eigen_assert(NumInputDims > m_dim.actualDim()); const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + eigen_assert(op.offset() < input_dims[m_dim.actualDim()]); + int j = 0; for (int i = 0; i < NumInputDims; ++i) { if (i != m_dim.actualDim()) { @@ -246,7 +248,9 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const { Scalar* result = m_impl.data(); - if (m_dim.actualDim() == NumDims && result) { + if (((static_cast(Layout) == static_cast(ColMajor) && m_dim.actualDim() == NumDims) || + (static_cast(Layout) == static_cast(RowMajor) && m_dim.actualDim() == 0)) && + result) { return result + m_inputOffset; } else { return NULL; diff --git a/unsupported/test/cxx11_tensor_chipping.cpp b/unsupported/test/cxx11_tensor_chipping.cpp index d83417872..bfc2bad18 100644 --- a/unsupported/test/cxx11_tensor_chipping.cpp +++ b/unsupported/test/cxx11_tensor_chipping.cpp @@ -340,11 +340,9 @@ static void test_chip_as_lvalue() } } - -template -static void test_chip_raw_data() +static void test_chip_raw_data_col_major() { - Tensor tensor(2,3,5,7,11); + Tensor tensor(2,3,5,7,11); tensor.setRandom(); typedef TensorEvaluator(3)), DefaultDevice> Evaluator4; @@ -353,12 +351,7 @@ static void test_chip_raw_data() for (int j = 0; j < 3; ++j) { for (int k = 0; k < 5; ++k) { for (int l = 0; l < 7; ++l) { - int chip_index; - if (DataLayout == ColMajor) { - chip_index = i + 2 * (j + 3 * (k + 5 * l)); - } else { - chip_index = 11 * (l + 7 * (k + 5 * (j + 3 * i))); - } + int chip_index = i + 2 * (j + 3 * (k + 5 * l)); VERIFY_IS_EQUAL(chip.data()[chip_index], tensor(i,j,k,l,3)); } } @@ -382,6 +375,41 @@ static void test_chip_raw_data() VERIFY_IS_EQUAL(chip3.data(), static_cast(0)); } +static void test_chip_raw_data_row_major() +{ + Tensor tensor(11,7,5,3,2); + tensor.setRandom(); + + typedef TensorEvaluator(3)), DefaultDevice> Evaluator0; + auto chip = Evaluator0(tensor.template chip<0>(3), DefaultDevice()); + for (int i = 0; i < 7; ++i) { + for (int j = 0; j < 5; ++j) { + for (int k = 0; k < 3; ++k) { + for (int l = 0; l < 2; ++l) { + int chip_index = l + 2 * (k + 3 * (j + 5 * i)); + VERIFY_IS_EQUAL(chip.data()[chip_index], tensor(3,i,j,k,l)); + } + } + } + } + + typedef TensorEvaluator(0)), DefaultDevice> Evaluator1; + auto chip1 = Evaluator1(tensor.template chip<1>(0), DefaultDevice()); + VERIFY_IS_EQUAL(chip1.data(), static_cast(0)); + + typedef TensorEvaluator(0)), DefaultDevice> Evaluator2; + auto chip2 = Evaluator2(tensor.template chip<2>(0), DefaultDevice()); + VERIFY_IS_EQUAL(chip2.data(), static_cast(0)); + + typedef TensorEvaluator(0)), DefaultDevice> Evaluator3; + auto chip3 = Evaluator3(tensor.template chip<3>(0), DefaultDevice()); + VERIFY_IS_EQUAL(chip3.data(), static_cast(0)); + + typedef TensorEvaluator(0)), DefaultDevice> Evaluator4; + auto chip4 = Evaluator4(tensor.template chip<4>(0), DefaultDevice()); + VERIFY_IS_EQUAL(chip4.data(), static_cast(0)); +} + void test_cxx11_tensor_chipping() { CALL_SUBTEST(test_simple_chip()); @@ -392,6 +420,6 @@ void test_cxx11_tensor_chipping() CALL_SUBTEST(test_chip_in_expr()); CALL_SUBTEST(test_chip_as_lvalue()); CALL_SUBTEST(test_chip_as_lvalue()); - CALL_SUBTEST(test_chip_raw_data()); - CALL_SUBTEST(test_chip_raw_data()); + CALL_SUBTEST(test_chip_raw_data_col_major()); + CALL_SUBTEST(test_chip_raw_data_row_major()); } -- cgit v1.2.3 From 0e9753c8dfffd8e20a445d3f150b8180c0815419 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Tue, 7 Apr 2015 14:03:21 -0400 Subject: Fix compiler flags on Android/ARM: - generate position-independent code (PIE), a requirement to run binaries on Android 5.0+ devices; - correctly handle EIGEN_TEST_FMA + EIGEN_TEST_NEON to pass -mfpu=neon-vfpv4. --- CMakeLists.txt | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2c1ae428e..a28ad07d8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -168,6 +168,11 @@ if(NOT MSVC) else() ei_add_cxx_compiler_flag("-ansi") endif() + + if(ANDROID_NDK) + ei_add_cxx_compiler_flag("-pie") + ei_add_cxx_compiler_flag("-fPIE") + endif() set(CMAKE_REQUIRED_FLAGS "") @@ -208,7 +213,7 @@ if(NOT MSVC) endif() option(EIGEN_TEST_FMA "Enable/Disable FMA in tests/examples" OFF) - if(EIGEN_TEST_FMA) + if(EIGEN_TEST_FMA AND NOT EIGEN_TEST_NEON) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfma") message(STATUS "Enabling FMA in tests/examples") endif() @@ -227,7 +232,12 @@ if(NOT MSVC) option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF) if(EIGEN_TEST_NEON) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mfloat-abi=softfp") + if(EIGEN_TEST_FMA) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon-vfpv4") + else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon") + endif() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp") message(STATUS "Enabling NEON in tests/examples") endif() -- cgit v1.2.3 From d7f51feb0773cc3843ea8c29c605d4eea4bda4ac Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Tue, 7 Apr 2015 15:13:55 -0400 Subject: bug #992: don't select a 3p GEMM path with non-vectorizable scalar types, this hits unsupported paths in symm/triangular products code --- Eigen/src/Core/products/GeneralBlockPanelKernel.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 428527820..2b4c1242f 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -380,11 +380,12 @@ public: nr = 4, // register block size along the M direction (currently, this one cannot be modified) + default_mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize, #if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX) // we assume 16 registers - mr = 3*LhsPacketSize, + mr = Vectorizable ? 3*LhsPacketSize : default_mr, #else - mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize, + mr = default_mr, #endif LhsProgress = LhsPacketSize, -- cgit v1.2.3 From 0eb220c00d9773c29c7d169ad0e20745b0ef21bb Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 8 Apr 2015 09:25:34 +0200 Subject: add a note on bug #992 --- Eigen/src/Core/products/GeneralBlockPanelKernel.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 2b4c1242f..24623963b 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -383,6 +383,8 @@ public: default_mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize, #if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX) // we assume 16 registers + // See bug 992, if the scalar type is not vectorizable but that EIGEN_HAS_SINGLE_INSTRUCTION_MADD is defined, + // then using 3*LhsPacketSize triggers non-implemented paths in syrk. mr = Vectorizable ? 3*LhsPacketSize : default_mr, #else mr = default_mr, -- cgit v1.2.3