aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Antonio Sanchez <cantonios@google.com>2021-03-11 11:23:00 -0800
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2021-03-15 18:42:04 +0000
commitd24f9f9b5523d3ace069fe0b271f5b694f37153a (patch)
tree9d4bb4779e060b78ffdf730f38aac800ead9ed92
parent14487ed14e7e04cf1d84681274ae9d36fda23a39 (diff)
Fix NVCC+ICC issues.
NVCC does not understand `__forceinline`, so we need to use `inline` when compiling for GPU. ICC specializes `std::complex` operators for `float` and `double` by default, which cannot be used on device and conflict with Eigen's workaround in CUDA/Complex.h. This can be prevented by defining `_OVERRIDE_COMPLEX_SPECIALIZATION_` before including `<complex>`. Added this define to the tests and to `Eigen/Core`, but this will not work if the user includes `<complex>` before `<Eigen/Core>`. ICC also seems to generate a duplicate `Map` symbol in `PlainObjectBase`: ``` error: "Map" has already been declared in the current scope static ConstMapType Map(const Scalar *data) ``` I tracked this down to `friend class Eigen::Map`. Putting the `friend` statements at the bottom of the class seems to resolve this issue. Fixes #2180
-rw-r--r--Eigen/Core7
-rw-r--r--Eigen/src/Core/PlainObjectBase.h19
-rw-r--r--Eigen/src/Core/arch/CUDA/Complex.h18
-rw-r--r--Eigen/src/Core/util/Macros.h2
-rw-r--r--test/main.h2
5 files changed, 35 insertions, 13 deletions
diff --git a/Eigen/Core b/Eigen/Core
index 1a60dcba4..5921e15f9 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -40,6 +40,13 @@
#pragma GCC optimize ("-fno-ipa-cp-clone")
#endif
+// Prevent ICC from specializing std::complex operators that silently fail
+// on device. This allows us to use our own device-compatible specializations
+// instead.
+#if defined(EIGEN_COMP_ICC) && defined(EIGEN_GPU_COMPILE_PHASE) \
+ && !defined(_OVERRIDE_COMPLEX_SPECIALIZATION_)
+#define _OVERRIDE_COMPLEX_SPECIALIZATION_ 1
+#endif
#include <complex>
// this include file manages BLAS and MKL related macros
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index ca5b5ee1d..e53ca1b16 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -118,16 +118,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
using Base::IsVectorAtCompileTime;
using Base::Flags;
- template<typename PlainObjectType, int MapOptions, typename StrideType> friend class Eigen::Map;
- friend class Eigen::Map<Derived, Unaligned>;
typedef Eigen::Map<Derived, Unaligned> MapType;
- friend class Eigen::Map<const Derived, Unaligned>;
typedef const Eigen::Map<const Derived, Unaligned> ConstMapType;
-#if EIGEN_MAX_ALIGN_BYTES>0
- // for EIGEN_MAX_ALIGN_BYTES==0, AlignedMax==Unaligned, and many compilers generate warnings for friend-ing a class twice.
- friend class Eigen::Map<Derived, AlignedMax>;
- friend class Eigen::Map<const Derived, AlignedMax>;
-#endif
typedef Eigen::Map<Derived, AlignedMax> AlignedMapType;
typedef const Eigen::Map<const Derived, AlignedMax> ConstAlignedMapType;
template<typename StrideType> struct StridedMapType { typedef Eigen::Map<Derived, Unaligned, StrideType> type; };
@@ -989,6 +981,17 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
enum { IsPlainObjectBase = 1 };
#endif
+ public:
+ // These apparently need to be down here for nvcc+icc to prevent duplicate
+ // Map symbol.
+ template<typename PlainObjectType, int MapOptions, typename StrideType> friend class Eigen::Map;
+ friend class Eigen::Map<Derived, Unaligned>;
+ friend class Eigen::Map<const Derived, Unaligned>;
+#if EIGEN_MAX_ALIGN_BYTES>0
+ // for EIGEN_MAX_ALIGN_BYTES==0, AlignedMax==Unaligned, and many compilers generate warnings for friend-ing a class twice.
+ friend class Eigen::Map<Derived, AlignedMax>;
+ friend class Eigen::Map<const Derived, AlignedMax>;
+#endif
};
namespace internal {
diff --git a/Eigen/src/Core/arch/CUDA/Complex.h b/Eigen/src/Core/arch/CUDA/Complex.h
index caf3fe74b..b1618e567 100644
--- a/Eigen/src/Core/arch/CUDA/Complex.h
+++ b/Eigen/src/Core/arch/CUDA/Complex.h
@@ -12,9 +12,6 @@
#define EIGEN_COMPLEX_CUDA_H
// clang-format off
-
-#if defined(EIGEN_CUDACC) && defined(EIGEN_GPU_COMPILE_PHASE)
-
// Many std::complex methods such as operator+, operator-, operator* and
// operator/ are not constexpr. Due to this, GCC and older versions of clang do
// not treat them as device functions and thus Eigen functors making use of
@@ -22,6 +19,17 @@
// operators and functors for complex types when building for CUDA to enable
// their use on-device.
+#if defined(EIGEN_CUDACC) && defined(EIGEN_GPU_COMPILE_PHASE)
+
+// ICC already specializes std::complex<float> and std::complex<double>
+// operators, preventing us from making them device functions here.
+// This will lead to silent runtime errors if the operators are used on device.
+//
+// To allow std::complex operator use on device, define _OVERRIDE_COMPLEX_SPECIALIZATION_
+// prior to first inclusion of <complex>. This prevents ICC from adding
+// its own specializations, so our custom ones below can be used instead.
+#if !(defined(EIGEN_COMP_ICC) && defined(_USE_COMPLEX_SPECIALIZATION_))
+
// Import Eigen's internal operator specializations.
#define EIGEN_USING_STD_COMPLEX_OPERATORS \
using Eigen::complex_operator_detail::operator+; \
@@ -244,6 +252,8 @@ EIGEN_USING_STD_COMPLEX_OPERATORS
} // namespace internal
} // namespace Eigen
-#endif
+#endif // !(EIGEN_COMP_ICC && _USE_COMPLEX_SPECIALIZATION_)
+
+#endif // EIGEN_CUDACC && EIGEN_GPU_COMPILE_PHASE
#endif // EIGEN_COMPLEX_CUDA_H
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 00f3690d1..bdc0de0ea 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -905,7 +905,7 @@
// but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline
// but GCC is still doing fine with just inline.
#ifndef EIGEN_STRONG_INLINE
-#if EIGEN_COMP_MSVC || EIGEN_COMP_ICC
+#if (EIGEN_COMP_MSVC || EIGEN_COMP_ICC) && !defined(EIGEN_GPUCC)
#define EIGEN_STRONG_INLINE __forceinline
#else
#define EIGEN_STRONG_INLINE inline
diff --git a/test/main.h b/test/main.h
index cf061730f..3e80c9f7d 100644
--- a/test/main.h
+++ b/test/main.h
@@ -40,6 +40,8 @@
// definitions.
#include <limits>
#include <algorithm>
+// Disable ICC's std::complex operator specializations so we can use our own.
+#define _OVERRIDE_COMPLEX_SPECIALIZATION_ 1
#include <complex>
#include <deque>
#include <queue>