aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Deven Desai <deven.desai.amd@gmail.com>2019-12-10 22:14:05 +0000
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-12-10 22:14:05 +0000
commitc49f0d851ab77c9e4d782b453b4b0428bce903d3 (patch)
treef811ec085485e21a0e4c4dabe42c3f5a8fae25c7
parent2918f85ba976dbfbf72f7d4c1961a577f5850148 (diff)
Fix for HIP breakage detected on 191210
The following commit introduces compile errors when running eigen with hipcc https://gitlab.com/libeigen/eigen/commit/2918f85ba976dbfbf72f7d4c1961a577f5850148 hipcc errors out because it requies the device attribute on the methods within the TensorBlockV2ResourceRequirements struct instroduced by the commit above. The fix is to add the device attribute to those methods
-rw-r--r--Eigen/src/Core/arch/GPU/PacketMath.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h14
2 files changed, 10 insertions, 6 deletions
diff --git a/Eigen/src/Core/arch/GPU/PacketMath.h b/Eigen/src/Core/arch/GPU/PacketMath.h
index 5a66e2da9..3f90c450a 100644
--- a/Eigen/src/Core/arch/GPU/PacketMath.h
+++ b/Eigen/src/Core/arch/GPU/PacketMath.h
@@ -105,7 +105,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const do
// We need to distinguish ‘clang as the CUDA compiler’ from ‘clang as the host compiler,
// invoked by NVCC’ (e.g. on MacOS). The former needs to see both host and device implementation
// of the functions, while the latter can only deal with one of them.
-#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)
+#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)
namespace {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_and(const float& a,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h
index 6cacf1cc1..f8814bc8c 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h
@@ -82,7 +82,8 @@ struct TensorBlockV2ResourceRequirements {
: internal::kSkewedInnerDims;
}
- static TensorBlockV2ResourceRequirements
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE TensorBlockV2ResourceRequirements
merge(const TensorBlockV2ResourceRequirements &lhs,
const TensorBlockV2ResourceRequirements &rhs) {
return {merge(lhs.shape_type, rhs.shape_type), merge(rhs.size, lhs.size)};
@@ -91,19 +92,22 @@ struct TensorBlockV2ResourceRequirements {
// This is a resource requirement that should be returned from expressions
// that do not have any block evaluation preference (e.g. default tensor
// expression with raw buffer access).
- static TensorBlockV2ResourceRequirements any() {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE TensorBlockV2ResourceRequirements any() {
return {TensorBlockV2ShapeType::kUniformAllDims, 1};
}
private:
using Requirements = TensorBlockV2ResourceRequirements;
- static size_t merge(size_t lhs_size, size_t rhs_size) {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE size_t merge(size_t lhs_size, size_t rhs_size) {
return numext::maxi(lhs_size, rhs_size);
}
- static TensorBlockV2ShapeType merge(TensorBlockV2ShapeType lhs,
- TensorBlockV2ShapeType rhs) {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE TensorBlockV2ShapeType merge(TensorBlockV2ShapeType lhs,
+ TensorBlockV2ShapeType rhs) {
return (lhs == TensorBlockV2ShapeType::kSkewedInnerDims ||
rhs == TensorBlockV2ShapeType::kSkewedInnerDims)
? TensorBlockV2ShapeType::kSkewedInnerDims