From b733b8b680885c0fcdfddea5423171468609b5a6 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Tue, 10 Mar 2020 20:28:43 +0000 Subject: remove duplicate pset1 for half and add some comments about why we need expose pmul/add/div/min/max on host --- .../Eigen/CXX11/src/Tensor/TensorReductionGpu.h | 290 +++++++++++++++------ 1 file changed, 210 insertions(+), 80 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h index 095bb54cc..9d3305cfd 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h @@ -98,7 +98,17 @@ __device__ inline void atomicReduce(half2* output, half2 accum, R& reducer } } } -#endif // EIGEN_HAS_GPU_FP16 +// reduction should be associative since reduction is not atomic in wide vector but atomic in half2 operations +template