Removed unnecessary thread synchronization

author: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-05-13 10:49:38 -0700
committer: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-05-13 10:49:38 -0700
commit: c4fc8b70ecd453bec883f53ca17b94004f94528e (patch)
tree: 58ea3e3442def45a24e00c7d3687ce649fd1397f /unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
parent: 7aa3557d31a5ccd82486de1e445c76b9b77a33a4 (diff)
1 files changed, 2 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
index b433a14c9..8c2baec14 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
@@ -177,7 +177,7 @@ static __global__ void FullReductionKernelHalfFloat(Reducer reducer, const Self
   }
 
   half2 accum = reducer.template initializePacket<half2>();
-  Index max_iter = numext::mini<Index>((num_coeffs - first_index) / 2, NumPerThread*BlockSize / 2);
+  const Index max_iter = numext::mini<Index>((num_coeffs - first_index) / 2, NumPerThread*BlockSize / 2);
   for (Index i = 0; i < max_iter; i += BlockSize) {
     const Index index = first_index + 2*i;
     eigen_assert(index + 1 < num_coeffs);
@@ -333,7 +333,7 @@ __global__ void InnerReductionKernel(Reducer reducer, const Self input, Index nu
       for (Index j = 0; j < NumPerThread; j += unroll_times) {
         const Index last_col = col_begin + blockDim.x * (j + unroll_times - 1);
         if (last_col >= num_coeffs_to_reduce) {
-          for (Index col = col_begin + blockDim.x * j; col < num_coeffs_to_reduce; col +=blockDim.x) {
+          for (Index col = col_begin + blockDim.x * j; col < num_coeffs_to_reduce; col += blockDim.x) {
             const float val = input.m_impl.coeff(row * num_coeffs_to_reduce + col);
             reducer.reduce(val, &reduced_val);
           }
@@ -357,8 +357,6 @@ __global__ void InnerReductionKernel(Reducer reducer, const Self input, Index nu
         atomicReduce(&(output[row]), reduced_val, reducer);
       }
     }
-
-    __syncthreads();
   }
 }
author	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-05-13 10:49:38 -0700
committer	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-05-13 10:49:38 -0700
commit	c4fc8b70ecd453bec883f53ca17b94004f94528e (patch)
tree	58ea3e3442def45a24e00c7d3687ce649fd1397f /unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
parent	7aa3557d31a5ccd82486de1e445c76b9b77a33a4 (diff)