aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--Eigen/src/Core/arch/GPU/PacketMath.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h14
2 files changed, 10 insertions, 6 deletions
diff --git a/Eigen/src/Core/arch/GPU/PacketMath.h b/Eigen/src/Core/arch/GPU/PacketMath.h
index 5a66e2da9..3f90c450a 100644
--- a/Eigen/src/Core/arch/GPU/PacketMath.h
+++ b/Eigen/src/Core/arch/GPU/PacketMath.h
@@ -105,7 +105,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const do
// We need to distinguish ‘clang as the CUDA compiler’ from ‘clang as the host compiler,
// invoked by NVCC’ (e.g. on MacOS). The former needs to see both host and device implementation
// of the functions, while the latter can only deal with one of them.
-#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)
+#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)
namespace {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_and(const float& a,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h
index 6cacf1cc1..f8814bc8c 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h
@@ -82,7 +82,8 @@ struct TensorBlockV2ResourceRequirements {
: internal::kSkewedInnerDims;
}
- static TensorBlockV2ResourceRequirements
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE TensorBlockV2ResourceRequirements
merge(const TensorBlockV2ResourceRequirements &lhs,
const TensorBlockV2ResourceRequirements &rhs) {
return {merge(lhs.shape_type, rhs.shape_type), merge(rhs.size, lhs.size)};
@@ -91,19 +92,22 @@ struct TensorBlockV2ResourceRequirements {
// This is a resource requirement that should be returned from expressions
// that do not have any block evaluation preference (e.g. default tensor
// expression with raw buffer access).
- static TensorBlockV2ResourceRequirements any() {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE TensorBlockV2ResourceRequirements any() {
return {TensorBlockV2ShapeType::kUniformAllDims, 1};
}
private:
using Requirements = TensorBlockV2ResourceRequirements;
- static size_t merge(size_t lhs_size, size_t rhs_size) {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE size_t merge(size_t lhs_size, size_t rhs_size) {
return numext::maxi(lhs_size, rhs_size);
}
- static TensorBlockV2ShapeType merge(TensorBlockV2ShapeType lhs,
- TensorBlockV2ShapeType rhs) {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE TensorBlockV2ShapeType merge(TensorBlockV2ShapeType lhs,
+ TensorBlockV2ShapeType rhs) {
return (lhs == TensorBlockV2ShapeType::kSkewedInnerDims ||
rhs == TensorBlockV2ShapeType::kSkewedInnerDims)
? TensorBlockV2ShapeType::kSkewedInnerDims