aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2008-04-18 07:57:46 +0000
committerGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2008-04-18 07:57:46 +0000
commit6ae037dfb5b340d2d545ccbb4135b04903a2e44f (patch)
tree382dbc8f4e8e85cfc2f40c49b61ec996ad014d24
parentacfd6f3bdad9f7a690f4fd860a637f1f488e619c (diff)
give up on OpenMP... for now
-rw-r--r--Eigen/Core7
-rw-r--r--Eigen/src/Core/Assign.h37
-rw-r--r--Eigen/src/Core/Product.h114
-rw-r--r--Eigen/src/Core/util/Macros.h28
-rw-r--r--Eigen/src/LU/Inverse.h1
-rw-r--r--bench/benchmarkXcwise.cpp20
6 files changed, 73 insertions, 134 deletions
diff --git a/Eigen/Core b/Eigen/Core
index 6a315b09f..24dc37145 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -10,13 +10,6 @@
#endif
#endif
-#ifndef EIGEN_DONT_PARALLELIZE
-#ifdef _OPENMP
-#define EIGEN_USE_OPENMP
-#include <omp.h>
-#endif
-#endif
-
#include <cstdlib>
#include <cmath>
#include <complex>
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h
index 1b6e928d2..d0f126689 100644
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -135,11 +135,6 @@ Derived& MatrixBase<Derived>
}
}
-template<typename T1, typename T2> bool ei_should_parallelize_assignment(const T1& t, const T2&)
-{
- return (T1::Flags & T2::Flags & LargeBit) && t.size() >= EIGEN_PARALLELIZATION_TRESHOLD;
-}
-
template <typename Derived, typename OtherDerived>
struct ei_assignment_impl<Derived, OtherDerived, false>
{
@@ -158,23 +153,17 @@ struct ei_assignment_impl<Derived, OtherDerived, false>
{
if(Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic)
{
- #define EIGEN_THE_PARALLELIZABLE_LOOP \
- for(int j = 0; j < dst.cols(); j++) \
- for(int i = 0; i < dst.rows(); i++) \
- dst.coeffRef(i, j) = src.coeff(i, j);
- EIGEN_RUN_PARALLELIZABLE_LOOP(ei_should_parallelize_assignment(dst, src))
- #undef EIGEN_THE_PARALLELIZABLE_LOOP
+ for(int j = 0; j < dst.cols(); j++)
+ for(int i = 0; i < dst.rows(); i++)
+ dst.coeffRef(i, j) = src.coeff(i, j);
}
else
{
// traverse in row-major order
// in order to allow the compiler to unroll the inner loop
- #define EIGEN_THE_PARALLELIZABLE_LOOP \
- for(int i = 0; i < dst.rows(); i++) \
- for(int j = 0; j < dst.cols(); j++) \
- dst.coeffRef(i, j) = src.coeff(i, j);
- EIGEN_RUN_PARALLELIZABLE_LOOP(ei_should_parallelize_assignment(dst, src))
- #undef EIGEN_THE_PARALLELIZABLE_LOOP
+ for(int i = 0; i < dst.rows(); i++)
+ for(int j = 0; j < dst.cols(); j++)
+ dst.coeffRef(i, j) = src.coeff(i, j);
}
}
}
@@ -199,21 +188,15 @@ struct ei_assignment_impl<Derived, OtherDerived, true>
{
if(OtherDerived::Flags&RowMajorBit)
{
- #define EIGEN_THE_PARALLELIZABLE_LOOP \
- for(int i = 0; i < dst.rows(); i++) \
- for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size) \
+ for(int i = 0; i < dst.rows(); i++)
+ for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size)
dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
- EIGEN_RUN_PARALLELIZABLE_LOOP(ei_should_parallelize_assignment(dst, src))
- #undef EIGEN_THE_PARALLELIZABLE_LOOP
}
else
{
- #define EIGEN_THE_PARALLELIZABLE_LOOP \
- for(int j = 0; j < dst.cols(); j++) \
- for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size) \
+ for(int j = 0; j < dst.cols(); j++)
+ for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size)
dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
- EIGEN_RUN_PARALLELIZABLE_LOOP(ei_should_parallelize_assignment(dst, src))
- #undef EIGEN_THE_PARALLELIZABLE_LOOP
}
}
}
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index b593825f8..a49609f5c 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -280,75 +280,67 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res) const
{
res.setZero();
const int cols4 = m_lhs.cols() & 0xfffffffC;
- const bool should_parallelize = (Flags & DestDerived::Flags & LargeBit)
- && res.size() >= EIGEN_PARALLELIZATION_TRESHOLD;
#ifdef EIGEN_VECTORIZE
if( (Flags & VectorizableBit) && (!(Lhs::Flags & RowMajorBit)) )
- {
- #define EIGEN_THE_PARALLELIZABLE_LOOP \
- for(int k=0; k<this->cols(); k++) \
- { \
- int j=0; \
- for(; j<cols4; j+=4) \
- { \
- const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(j+0,k)); \
- const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(j+1,k)); \
- const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(j+2,k)); \
- const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(j+3,k)); \
- for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size) \
- { \
- res.writePacketCoeff(i,k,\
- ei_padd( \
- res.packetCoeff(i,k), \
- ei_padd( \
- ei_padd( \
- ei_pmul(tmp0, m_lhs.packetCoeff(i,j)), \
- ei_pmul(tmp1, m_lhs.packetCoeff(i,j+1))), \
- ei_padd( \
- ei_pmul(tmp2, m_lhs.packetCoeff(i,j+2)), \
- ei_pmul(tmp3, m_lhs.packetCoeff(i,j+3)) \
- ) \
- ) \
- ) \
- ); \
- } \
- } \
- for(; j<m_lhs.cols(); ++j) \
- { \
- const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(j,k)); \
- for (int i=0; i<this->rows(); ++i) \
- res.writePacketCoeff(i,k,ei_pmul(tmp, m_lhs.packetCoeff(i,j))); \
- } \
+ {
+ for(int k=0; k<this->cols(); k++)
+ {
+ int j=0;
+ for(; j<cols4; j+=4)
+ {
+ const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(j+0,k));
+ const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(j+1,k));
+ const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(j+2,k));
+ const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(j+3,k));
+ for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
+ {
+ res.writePacketCoeff(i,k,\
+ ei_padd(
+ res.packetCoeff(i,k),
+ ei_padd(
+ ei_padd(
+ ei_pmul(tmp0, m_lhs.packetCoeff(i,j)),
+ ei_pmul(tmp1, m_lhs.packetCoeff(i,j+1))),
+ ei_padd(
+ ei_pmul(tmp2, m_lhs.packetCoeff(i,j+2)),
+ ei_pmul(tmp3, m_lhs.packetCoeff(i,j+3))
+ )
+ )
+ )
+ );
+ }
+ }
+ for(; j<m_lhs.cols(); ++j)
+ {
+ const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(j,k));
+ for (int i=0; i<this->rows(); ++i)
+ res.writePacketCoeff(i,k,ei_pmul(tmp, m_lhs.packetCoeff(i,j)));
}
- EIGEN_RUN_PARALLELIZABLE_LOOP(should_parallelize)
- #undef EIGEN_THE_PARALLELIZABLE_LOOP
+ }
}
else
#endif // EIGEN_VECTORIZE
{
- #define EIGEN_THE_PARALLELIZABLE_LOOP \
- for(int k=0; k<this->cols(); ++k) \
- { \
- int j=0; \
- for(; j<cols4; j+=4) \
- { \
- const Scalar tmp0 = m_rhs.coeff(j ,k); \
- const Scalar tmp1 = m_rhs.coeff(j+1,k); \
- const Scalar tmp2 = m_rhs.coeff(j+2,k); \
- const Scalar tmp3 = m_rhs.coeff(j+3,k); \
- for (int i=0; i<this->rows(); ++i) \
- res.coeffRef(i,k) += tmp0 * m_lhs.coeff(i,j) + tmp1 * m_lhs.coeff(i,j+1) \
- + tmp2 * m_lhs.coeff(i,j+2) + tmp3 * m_lhs.coeff(i,j+3); \
- } \
- for(; j<m_lhs.cols(); ++j) \
- { \
- const Scalar tmp = m_rhs.coeff(j,k); \
- for (int i=0; i<this->rows(); ++i) \
- res.coeffRef(i,k) += tmp * m_lhs.coeff(i,j); \
- } \
+ for(int k=0; k<this->cols(); ++k)
+ {
+ int j=0;
+ for(; j<cols4; j+=4)
+ {
+ const Scalar tmp0 = m_rhs.coeff(j ,k);
+ const Scalar tmp1 = m_rhs.coeff(j+1,k);
+ const Scalar tmp2 = m_rhs.coeff(j+2,k);
+ const Scalar tmp3 = m_rhs.coeff(j+3,k);
+ for (int i=0; i<this->rows(); ++i)
+ res.coeffRef(i,k) += tmp0 * m_lhs.coeff(i,j) + tmp1 * m_lhs.coeff(i,j+1)
+ + tmp2 * m_lhs.coeff(i,j+2) + tmp3 * m_lhs.coeff(i,j+3);
}
- EIGEN_RUN_PARALLELIZABLE_LOOP(should_parallelize)
- #undef EIGEN_THE_PARALLELIZABLE_LOOP
+ for(; j<m_lhs.cols(); ++j)
+ {
+ const Scalar tmp = m_rhs.coeff(j,k);
+ for (int i=0; i<this->rows(); ++i)
+ res.coeffRef(i,k) += tmp * m_lhs.coeff(i,j);
+ }
+ }
}
}
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index fad046766..be5e7bba5 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -37,10 +37,6 @@
#define EIGEN_UNROLLING_LIMIT 400
#endif
-#ifndef EIGEN_PARALLELIZATION_TRESHOLD
-#define EIGEN_PARALLELIZATION_TRESHOLD 2000
-#endif
-
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER RowMajorBit
#else
@@ -78,30 +74,6 @@ using Eigen::MatrixBase;
#define EIGEN_ONLY_USED_FOR_DEBUG(x)
#endif
-#ifdef EIGEN_USE_OPENMP
-# ifdef __INTEL_COMPILER
-# define EIGEN_PRAGMA_OMP_PARALLEL _Pragma("omp parallel default(none) shared(other)")
-# else
-# define EIGEN_PRAGMA_OMP_PARALLEL _Pragma("omp parallel default(none)")
-# endif
-# define EIGEN_RUN_PARALLELIZABLE_LOOP(condition) \
- if(condition) \
- { \
- EIGEN_PRAGMA_OMP_PARALLEL \
- { \
- _Pragma("omp for") \
- EIGEN_THE_PARALLELIZABLE_LOOP \
- } \
- } \
- else \
- { \
- EIGEN_THE_PARALLELIZABLE_LOOP \
- }
-#else // EIGEN_USE_OPENMP
-# define EIGEN_RUN_PARALLELIZABLE_LOOP(condition) EIGEN_THE_PARALLELIZABLE_LOOP
-#endif
-
-
// FIXME with the always_inline attribute,
// gcc 3.4.x reports the following compilation error:
// Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
diff --git a/Eigen/src/LU/Inverse.h b/Eigen/src/LU/Inverse.h
index 1d4bd9bf0..eda20e1f3 100644
--- a/Eigen/src/LU/Inverse.h
+++ b/Eigen/src/LU/Inverse.h
@@ -92,7 +92,6 @@ template<typename MatrixType, bool CheckExistence> class Inverse : ei_no_assignm
enum { _Size = MatrixType::RowsAtCompileTime };
void _compute(const MatrixType& matrix);
void _compute_in_general_case(const MatrixType& matrix);
- void _compute_in_size1_case(const MatrixType& matrix);
void _compute_in_size2_case(const MatrixType& matrix);
void _compute_in_size3_case(const MatrixType& matrix);
void _compute_in_size4_case(const MatrixType& matrix);
diff --git a/bench/benchmarkXcwise.cpp b/bench/benchmarkXcwise.cpp
index b2a7fc24c..9b394ff35 100644
--- a/bench/benchmarkXcwise.cpp
+++ b/bench/benchmarkXcwise.cpp
@@ -5,12 +5,12 @@
using namespace std;
USING_PART_OF_NAMESPACE_EIGEN
-#ifndef MATTYPE
-#define MATTYPE MatrixXLd
+#ifndef VECTYPE
+#define VECTYPE VectorXLd
#endif
-#ifndef MATSIZE
-#define MATSIZE 1000000
+#ifndef VECSIZE
+#define VECSIZE 1000000
#endif
#ifndef REPEAT
@@ -19,16 +19,16 @@ USING_PART_OF_NAMESPACE_EIGEN
int main(int argc, char *argv[])
{
- MATTYPE I = MATTYPE::ones(MATSIZE,1);
- MATTYPE m(MATSIZE,1);
- for(int i = 0; i < MATSIZE; i++) for(int j = 0; j < 1; j++)
+ VECTYPE I = VECTYPE::ones(VECSIZE);
+ VECTYPE m(VECSIZE,1);
+ for(int i = 0; i < VECSIZE; i++)
{
- m(i,j) = 0.1 * (i+j+1)/MATSIZE/MATSIZE;
+ m[i] = 0.1 * i/VECSIZE;
}
for(int a = 0; a < REPEAT; a++)
{
- m = MATTYPE::ones(MATSIZE,1) + 0.00005 * (m.cwiseProduct(m) + m/4);
+ m = VECTYPE::ones(VECSIZE) + 0.00005 * (m.cwiseProduct(m) + m/4);
}
- cout << m(0,0) << endl;
+ cout << m[0] << endl;
return 0;
}