aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2010-03-01 13:09:47 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2010-03-01 13:09:47 +0100
commit1710c07f63aa4be8d3ef11e2b4977ce7fe545948 (patch)
treed700f1166c190bfec1446e9f14ba427e766891bf
parent31aa17e4efafa25a5f9e27a3ba02b5ca030ad3f5 (diff)
remove Qt's atomic dependency, I don't know what I was doing wrong...
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrix.h11
-rw-r--r--Eigen/src/Core/products/Parallelizer.h11
-rw-r--r--bench/bench_gemm.cpp2
3 files changed, 12 insertions, 12 deletions
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index da700f8b7..cf42855eb 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -98,7 +98,6 @@ static void run(int rows, int cols, int depth,
// if you have the GOTO blas library you can try our parallelization strategy
// using GOTO's optimized routines.
-// #define USEGOTOROUTINES
#ifdef USEGOTOROUTINES
void* u = alloca(4096+sizeW);
#endif
@@ -125,7 +124,8 @@ static void run(int rows, int cols, int depth,
// However, before copying to B'_j, we have to make sure that no other thread is still using it,
// i.e., we test that info[tid].users equals 0.
// Then, we set info[tid].users to the number of threads to mark that all other threads are going to use it.
- while(!info[tid].users.testAndSetOrdered(0,threads)) {}
+ while(info[tid].users!=0) {}
+ info[tid].users += threads;
#ifndef USEGOTOROUTINES
pack_rhs(blockB+info[tid].rhs_start*kc, &rhs(k,info[tid].rhs_start), rhsStride, alpha, actual_kc, info[tid].rhs_length);
@@ -134,7 +134,7 @@ static void run(int rows, int cols, int depth,
#endif
// Notify the other threads that the part B'_j is ready to go.
- info[tid].sync.fetchAndStoreOrdered(k);
+ info[tid].sync = k;
// Computes C_i += A' * B' per B'_j
for(int shift=0; shift<threads; ++shift)
@@ -145,7 +145,7 @@ static void run(int rows, int cols, int depth,
// we use testAndSetOrdered to mimic a volatile access.
// However, no need to wait for the B' part which has been updated by the current thread!
if(shift>0)
- while(!info[j].sync.testAndSetOrdered(k,k)) {}
+ while(info[j].sync!=k) {}
#ifndef USEGOTOROUTINES
gebp(res+info[j].rhs_start*resStride, resStride, blockA, blockB+info[j].rhs_start*kc, mc, actual_kc, info[j].rhs_length, -1,-1,0,0, w);
@@ -178,7 +178,8 @@ static void run(int rows, int cols, int depth,
// Release all the sub blocks B'_j of B' for the current thread,
// i.e., we simply decrement the number of users by 1
for(int j=0; j<threads; ++j)
- info[j].users.fetchAndAddOrdered(-1);
+ #pragma omp atomic
+ --(info[j].users);
}
ei_aligned_stack_delete(Scalar, blockA, kc*mc);
diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h
index 620a5e8ba..404b8d390 100644
--- a/Eigen/src/Core/products/Parallelizer.h
+++ b/Eigen/src/Core/products/Parallelizer.h
@@ -92,8 +92,11 @@ void ei_run_parallel_2d(const Functor& func, int size1, int size2)
struct GemmParallelInfo
{
- QAtomicInt sync;
- QAtomicInt users;
+ GemmParallelInfo() : sync(-1), users(0) {}
+
+ int volatile sync;
+ int volatile users;
+
int rhs_start;
int rhs_length;
float* blockB;
@@ -118,7 +121,7 @@ void ei_run_parallel_gemm(const Functor& func, int rows, int cols)
GemmParallelInfo* info = new GemmParallelInfo[threads];
- #pragma omp parallel for schedule(static,1)
+ #pragma omp parallel for schedule(static,1) shared(info)
for(int i=0; i<threads; ++i)
{
int r0 = i*blockRows;
@@ -130,8 +133,6 @@ void ei_run_parallel_gemm(const Functor& func, int rows, int cols)
info[i].rhs_start = c0;
info[i].rhs_length = actualBlockCols;
info[i].blockB = sharedBlockB;
- info[i].sync.fetchAndStoreOrdered(-1);
- info[i].users.fetchAndStoreOrdered(0);
func(r0, actualBlockRows, 0,cols, info);
}
diff --git a/bench/bench_gemm.cpp b/bench/bench_gemm.cpp
index 12df7bcbc..653a880a8 100644
--- a/bench/bench_gemm.cpp
+++ b/bench/bench_gemm.cpp
@@ -2,8 +2,6 @@
// g++-4.4 bench_gemm.cpp -I .. -O2 -DNDEBUG -lrt -fopenmp && OMP_NUM_THREADS=2 ./a.out
// icpc bench_gemm.cpp -I .. -O3 -DNDEBUG -lrt -openmp && OMP_NUM_THREADS=2 ./a.out
-#include <QAtomicInt>
-
#include <Eigen/Core>
#include <bench/BenchTimer.h>