give up on OpenMP... for now

author: Benoit Jacob <jacob.benoit.1@gmail.com> 2008-04-18 07:57:46 +0000
committer: Benoit Jacob <jacob.benoit.1@gmail.com> 2008-04-18 07:57:46 +0000
commit: 6ae037dfb5b340d2d545ccbb4135b04903a2e44f (patch)
tree: 382dbc8f4e8e85cfc2f40c49b61ec996ad014d24
parent: acfd6f3bdad9f7a690f4fd860a637f1f488e619c (diff)
6 files changed, 73 insertions, 134 deletions
diff --git a/Eigen/Core b/Eigen/Core
index 6a315b09f..24dc37145 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -10,13 +10,6 @@
 #endif
 #endif
 
-#ifndef EIGEN_DONT_PARALLELIZE
-#ifdef _OPENMP
-#define EIGEN_USE_OPENMP
-#include <omp.h>
-#endif
-#endif
-
 #include <cstdlib>
 #include <cmath>
 #include <complex>
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h
index 1b6e928d2..d0f126689 100644
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -135,11 +135,6 @@ Derived& MatrixBase<Derived>
   }
 }
 
-template<typename T1, typename T2> bool ei_should_parallelize_assignment(const T1& t, const T2&)
-{
-  return (T1::Flags & T2::Flags & LargeBit) && t.size() >= EIGEN_PARALLELIZATION_TRESHOLD;
-}
-
 template <typename Derived, typename OtherDerived>
 struct ei_assignment_impl<Derived, OtherDerived, false>
 {
@@ -158,23 +153,17 @@ struct ei_assignment_impl<Derived, OtherDerived, false>
     {
       if(Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic)
       {
-        #define EIGEN_THE_PARALLELIZABLE_LOOP \
-            for(int j = 0; j < dst.cols(); j++) \
-              for(int i = 0; i < dst.rows(); i++) \
-                dst.coeffRef(i, j) = src.coeff(i, j);
-        EIGEN_RUN_PARALLELIZABLE_LOOP(ei_should_parallelize_assignment(dst, src))
-        #undef EIGEN_THE_PARALLELIZABLE_LOOP
+        for(int j = 0; j < dst.cols(); j++)
+          for(int i = 0; i < dst.rows(); i++)
+            dst.coeffRef(i, j) = src.coeff(i, j);
       }
       else
       {
         // traverse in row-major order
         // in order to allow the compiler to unroll the inner loop
-        #define EIGEN_THE_PARALLELIZABLE_LOOP \
-          for(int i = 0; i < dst.rows(); i++) \
-            for(int j = 0; j < dst.cols(); j++) \
-              dst.coeffRef(i, j) = src.coeff(i, j);
-        EIGEN_RUN_PARALLELIZABLE_LOOP(ei_should_parallelize_assignment(dst, src))
-        #undef EIGEN_THE_PARALLELIZABLE_LOOP
+        for(int i = 0; i < dst.rows(); i++)
+          for(int j = 0; j < dst.cols(); j++)
+            dst.coeffRef(i, j) = src.coeff(i, j);
       }
     }
   }
@@ -199,21 +188,15 @@ struct ei_assignment_impl<Derived, OtherDerived, true>
     {
       if(OtherDerived::Flags&RowMajorBit)
       {
-        #define EIGEN_THE_PARALLELIZABLE_LOOP \
-        for(int i = 0; i < dst.rows(); i++) \
-          for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size) \
+        for(int i = 0; i < dst.rows(); i++)
+          for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size)
             dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
-        EIGEN_RUN_PARALLELIZABLE_LOOP(ei_should_parallelize_assignment(dst, src))
-        #undef EIGEN_THE_PARALLELIZABLE_LOOP
       }
       else
       {
-        #define EIGEN_THE_PARALLELIZABLE_LOOP \
-        for(int j = 0; j < dst.cols(); j++) \
-          for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size) \
+        for(int j = 0; j < dst.cols(); j++)
+          for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size)
             dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
-        EIGEN_RUN_PARALLELIZABLE_LOOP(ei_should_parallelize_assignment(dst, src))
-        #undef EIGEN_THE_PARALLELIZABLE_LOOP
       }
     }
   }
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index b593825f8..a49609f5c 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -280,75 +280,67 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res) const
 {
   res.setZero();
   const int cols4 = m_lhs.cols() & 0xfffffffC;
-  const bool should_parallelize = (Flags & DestDerived::Flags & LargeBit)
-                                && res.size() >= EIGEN_PARALLELIZATION_TRESHOLD;
   #ifdef EIGEN_VECTORIZE
   if( (Flags & VectorizableBit) && (!(Lhs::Flags & RowMajorBit)) )
-  {
-    #define EIGEN_THE_PARALLELIZABLE_LOOP \
-      for(int k=0; k<this->cols(); k++) \
-      { \
-        int j=0; \
-        for(; j<cols4; j+=4) \
-        { \
-          const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(j+0,k)); \
-          const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(j+1,k)); \
-          const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(j+2,k)); \
-          const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(j+3,k)); \
-          for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size) \
-          { \
-            res.writePacketCoeff(i,k,\
-              ei_padd( \
-                res.packetCoeff(i,k), \
-                ei_padd( \
-                  ei_padd( \
-                    ei_pmul(tmp0, m_lhs.packetCoeff(i,j)), \
-                    ei_pmul(tmp1, m_lhs.packetCoeff(i,j+1))), \
-                  ei_padd( \
-                    ei_pmul(tmp2, m_lhs.packetCoeff(i,j+2)), \
-                    ei_pmul(tmp3, m_lhs.packetCoeff(i,j+3)) \
-                  ) \
-                ) \
-              ) \
-            ); \
-          } \
-        } \
-        for(; j<m_lhs.cols(); ++j) \
-        { \
-          const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(j,k)); \
-          for (int i=0; i<this->rows(); ++i) \
-            res.writePacketCoeff(i,k,ei_pmul(tmp, m_lhs.packetCoeff(i,j))); \
-        } \
+  {    
+    for(int k=0; k<this->cols(); k++)
+    {
+      int j=0;
+      for(; j<cols4; j+=4)
+      {
+        const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(j+0,k));
+        const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(j+1,k));
+        const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(j+2,k));
+        const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(j+3,k));
+        for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
+        {
+          res.writePacketCoeff(i,k,\
+            ei_padd(
+              res.packetCoeff(i,k),
+              ei_padd(
+                ei_padd(
+                  ei_pmul(tmp0, m_lhs.packetCoeff(i,j)),
+                  ei_pmul(tmp1, m_lhs.packetCoeff(i,j+1))),
+                ei_padd(
+                  ei_pmul(tmp2, m_lhs.packetCoeff(i,j+2)),
+                  ei_pmul(tmp3, m_lhs.packetCoeff(i,j+3))
+                )
+              )
+            )
+          );
+        }
+      }
+      for(; j<m_lhs.cols(); ++j)
+      {
+        const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(j,k));
+        for (int i=0; i<this->rows(); ++i)
+          res.writePacketCoeff(i,k,ei_pmul(tmp, m_lhs.packetCoeff(i,j)));
       }
-    EIGEN_RUN_PARALLELIZABLE_LOOP(should_parallelize)
-    #undef EIGEN_THE_PARALLELIZABLE_LOOP
+    }
   }
   else
   #endif // EIGEN_VECTORIZE
   {
-    #define EIGEN_THE_PARALLELIZABLE_LOOP \
-      for(int k=0; k<this->cols(); ++k) \
-      { \
-        int j=0; \
-        for(; j<cols4; j+=4) \
-        { \
-          const Scalar tmp0 = m_rhs.coeff(j  ,k); \
-          const Scalar tmp1 = m_rhs.coeff(j+1,k); \
-          const Scalar tmp2 = m_rhs.coeff(j+2,k); \
-          const Scalar tmp3 = m_rhs.coeff(j+3,k); \
-          for (int i=0; i<this->rows(); ++i) \
-            res.coeffRef(i,k) += tmp0 * m_lhs.coeff(i,j) + tmp1 * m_lhs.coeff(i,j+1) \
-                              + tmp2 * m_lhs.coeff(i,j+2) + tmp3 * m_lhs.coeff(i,j+3); \
-        } \
-        for(; j<m_lhs.cols(); ++j) \
-        { \
-          const Scalar tmp = m_rhs.coeff(j,k); \
-          for (int i=0; i<this->rows(); ++i) \
-            res.coeffRef(i,k) += tmp * m_lhs.coeff(i,j); \
-        } \
+    for(int k=0; k<this->cols(); ++k)
+    {
+      int j=0;
+      for(; j<cols4; j+=4)
+      {
+        const Scalar tmp0 = m_rhs.coeff(j  ,k);
+        const Scalar tmp1 = m_rhs.coeff(j+1,k);
+        const Scalar tmp2 = m_rhs.coeff(j+2,k);
+        const Scalar tmp3 = m_rhs.coeff(j+3,k);
+        for (int i=0; i<this->rows(); ++i)
+          res.coeffRef(i,k) += tmp0 * m_lhs.coeff(i,j) + tmp1 * m_lhs.coeff(i,j+1)
+                            + tmp2 * m_lhs.coeff(i,j+2) + tmp3 * m_lhs.coeff(i,j+3);
       }
-    EIGEN_RUN_PARALLELIZABLE_LOOP(should_parallelize)
-    #undef EIGEN_THE_PARALLELIZABLE_LOOP
+      for(; j<m_lhs.cols(); ++j)
+      {
+        const Scalar tmp = m_rhs.coeff(j,k);
+        for (int i=0; i<this->rows(); ++i)
+          res.coeffRef(i,k) += tmp * m_lhs.coeff(i,j);
+      }
+    }
   }
 }
 
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index fad046766..be5e7bba5 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -37,10 +37,6 @@
 #define EIGEN_UNROLLING_LIMIT 400
 #endif
 
-#ifndef EIGEN_PARALLELIZATION_TRESHOLD
-#define EIGEN_PARALLELIZATION_TRESHOLD 2000
-#endif
-
 #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
 #define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER RowMajorBit
 #else
@@ -78,30 +74,6 @@ using Eigen::MatrixBase;
 #define EIGEN_ONLY_USED_FOR_DEBUG(x)
 #endif
 
-#ifdef EIGEN_USE_OPENMP
-# ifdef __INTEL_COMPILER
-#   define EIGEN_PRAGMA_OMP_PARALLEL _Pragma("omp parallel default(none) shared(other)")
-# else
-#   define EIGEN_PRAGMA_OMP_PARALLEL _Pragma("omp parallel default(none)")
-# endif
-# define EIGEN_RUN_PARALLELIZABLE_LOOP(condition) \
-  if(condition) \
-  { \
-    EIGEN_PRAGMA_OMP_PARALLEL \
-    { \
-      _Pragma("omp for") \
-      EIGEN_THE_PARALLELIZABLE_LOOP \
-    } \
-  } \
-  else \
-  { \
-    EIGEN_THE_PARALLELIZABLE_LOOP \
-  }
-#else // EIGEN_USE_OPENMP
-# define EIGEN_RUN_PARALLELIZABLE_LOOP(condition) EIGEN_THE_PARALLELIZABLE_LOOP
-#endif
-
-
 // FIXME with the always_inline attribute,
 // gcc 3.4.x reports the following compilation error:
 //   Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
diff --git a/Eigen/src/LU/Inverse.h b/Eigen/src/LU/Inverse.h
index 1d4bd9bf0..eda20e1f3 100644
--- a/Eigen/src/LU/Inverse.h
+++ b/Eigen/src/LU/Inverse.h
@@ -92,7 +92,6 @@ template<typename MatrixType, bool CheckExistence> class Inverse : ei_no_assignm
     enum { _Size = MatrixType::RowsAtCompileTime };
     void _compute(const MatrixType& matrix);
     void _compute_in_general_case(const MatrixType& matrix);
-    void _compute_in_size1_case(const MatrixType& matrix);
     void _compute_in_size2_case(const MatrixType& matrix);
     void _compute_in_size3_case(const MatrixType& matrix);
     void _compute_in_size4_case(const MatrixType& matrix);
diff --git a/bench/benchmarkXcwise.cpp b/bench/benchmarkXcwise.cpp
index b2a7fc24c..9b394ff35 100644
--- a/bench/benchmarkXcwise.cpp
+++ b/bench/benchmarkXcwise.cpp
@@ -5,12 +5,12 @@
 using namespace std;
 USING_PART_OF_NAMESPACE_EIGEN
 
-#ifndef MATTYPE
-#define MATTYPE MatrixXLd
+#ifndef VECTYPE
+#define VECTYPE VectorXLd
 #endif
 
-#ifndef MATSIZE
-#define MATSIZE 1000000
+#ifndef VECSIZE
+#define VECSIZE 1000000
 #endif
 
 #ifndef REPEAT
@@ -19,16 +19,16 @@ USING_PART_OF_NAMESPACE_EIGEN
 
 int main(int argc, char *argv[])
 {
-	MATTYPE I = MATTYPE::ones(MATSIZE,1);
-	MATTYPE m(MATSIZE,1);
-	for(int i = 0; i < MATSIZE; i++) for(int j = 0; j < 1; j++)
+	VECTYPE I = VECTYPE::ones(VECSIZE);
+	VECTYPE m(VECSIZE,1);
+	for(int i = 0; i < VECSIZE; i++)
 	{
-		m(i,j) = 0.1 * (i+j+1)/MATSIZE/MATSIZE;
+		m[i] = 0.1 * i/VECSIZE;
 	}
 	for(int a = 0; a < REPEAT; a++)
 	{
-		m = MATTYPE::ones(MATSIZE,1) + 0.00005 * (m.cwiseProduct(m) + m/4);
+		m = VECTYPE::ones(VECSIZE) + 0.00005 * (m.cwiseProduct(m) + m/4);
 	}
-	cout << m(0,0) << endl;
+	cout << m[0] << endl;
 	return 0;
 }
author	Benoit Jacob <jacob.benoit.1@gmail.com>	2008-04-18 07:57:46 +0000
committer	Benoit Jacob <jacob.benoit.1@gmail.com>	2008-04-18 07:57:46 +0000
commit	6ae037dfb5b340d2d545ccbb4135b04903a2e44f (patch)
tree	382dbc8f4e8e85cfc2f40c49b61ec996ad014d24
parent	acfd6f3bdad9f7a690f4fd860a637f1f488e619c (diff)