aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
diff options
context:
space:
mode:
authorGravatar Deven Desai <deven.desai.amd@gmail.com>2018-07-11 10:39:54 -0400
committerGravatar Deven Desai <deven.desai.amd@gmail.com>2018-07-11 10:39:54 -0400
commit876f392c396318f33454168db36ed54308e54e0d (patch)
treea727bc91873b5c0aeec05312176a0f39e2cb64d5 /unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
parent1fe0b749042320501c59378f2860d9322b0c6e19 (diff)
Updates corresponding to the latest round of PR feedback
The major changes are 1. Moving CUDA/PacketMath.h to GPU/PacketMath.h 2. Moving CUDA/MathFunctions.h to GPU/MathFunction.h 3. Moving CUDA/CudaSpecialFunctions.h to GPU/GpuSpecialFunctions.h The above three changes effectively enable the Eigen "Packet" layer for the HIP platform 4. Merging the "hip_basic" and "cuda_basic" unit tests into one ("gpu_basic") 5. Updating the "EIGEN_DEVICE_FUNC" marking in some places The change has been tested on the HIP and CUDA platforms.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h11
1 files changed, 3 insertions, 8 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index fdd338b96..ce573d730 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -497,6 +497,9 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
EIGEN_STRONG_INLINE
#if !defined(EIGEN_HIPCC)
+ // Marking this as EIGEN_DEVICE_FUNC for HIPCC requires also doing the same for all the functions
+ // being called within here, which then leads to proliferation of EIGEN_DEVICE_FUNC markings, one
+ // of which will eventually result in an NVCC error
EIGEN_DEVICE_FUNC
#endif
bool evalSubExprsIfNeeded(typename MakePointer_<CoeffReturnType>::Type data) {
@@ -778,17 +781,9 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
// Indexed by reduced dimensions.
array<Index, NumReducedDims> m_reducedDims;
-#if defined(EIGEN_HIPCC)
- public:
-#endif
-
// Evaluator for the input expression.
TensorEvaluator<ArgType, Device> m_impl;
-#if defined(EIGEN_HIPCC)
- private:
-#endif
-
// Operation to apply for computing the reduction.
Op m_reducer;