diff options
-rw-r--r-- | Eigen/src/Core/arch/GPU/PacketMath.h | 2 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h | 14 |
2 files changed, 10 insertions, 6 deletions
diff --git a/Eigen/src/Core/arch/GPU/PacketMath.h b/Eigen/src/Core/arch/GPU/PacketMath.h index 5a66e2da9..3f90c450a 100644 --- a/Eigen/src/Core/arch/GPU/PacketMath.h +++ b/Eigen/src/Core/arch/GPU/PacketMath.h @@ -105,7 +105,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const do // We need to distinguish ‘clang as the CUDA compiler’ from ‘clang as the host compiler, // invoked by NVCC’ (e.g. on MacOS). The former needs to see both host and device implementation // of the functions, while the latter can only deal with one of them. -#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC) +#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC) namespace { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_and(const float& a, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h index 6cacf1cc1..f8814bc8c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h @@ -82,7 +82,8 @@ struct TensorBlockV2ResourceRequirements { : internal::kSkewedInnerDims; } - static TensorBlockV2ResourceRequirements + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE TensorBlockV2ResourceRequirements merge(const TensorBlockV2ResourceRequirements &lhs, const TensorBlockV2ResourceRequirements &rhs) { return {merge(lhs.shape_type, rhs.shape_type), merge(rhs.size, lhs.size)}; @@ -91,19 +92,22 @@ struct TensorBlockV2ResourceRequirements { // This is a resource requirement that should be returned from expressions // that do not have any block evaluation preference (e.g. default tensor // expression with raw buffer access). - static TensorBlockV2ResourceRequirements any() { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE TensorBlockV2ResourceRequirements any() { return {TensorBlockV2ShapeType::kUniformAllDims, 1}; } private: using Requirements = TensorBlockV2ResourceRequirements; - static size_t merge(size_t lhs_size, size_t rhs_size) { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE size_t merge(size_t lhs_size, size_t rhs_size) { return numext::maxi(lhs_size, rhs_size); } - static TensorBlockV2ShapeType merge(TensorBlockV2ShapeType lhs, - TensorBlockV2ShapeType rhs) { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE TensorBlockV2ShapeType merge(TensorBlockV2ShapeType lhs, + TensorBlockV2ShapeType rhs) { return (lhs == TensorBlockV2ShapeType::kSkewedInnerDims || rhs == TensorBlockV2ShapeType::kSkewedInnerDims) ? TensorBlockV2ShapeType::kSkewedInnerDims |