diff options
author | Antonio Sanchez <cantonios@google.com> | 2021-03-11 11:23:00 -0800 |
---|---|---|
committer | Rasmus Munk Larsen <rmlarsen@google.com> | 2021-03-15 18:42:04 +0000 |
commit | d24f9f9b5523d3ace069fe0b271f5b694f37153a (patch) | |
tree | 9d4bb4779e060b78ffdf730f38aac800ead9ed92 | |
parent | 14487ed14e7e04cf1d84681274ae9d36fda23a39 (diff) |
Fix NVCC+ICC issues.
NVCC does not understand `__forceinline`, so we need to use `inline`
when compiling for GPU.
ICC specializes `std::complex` operators for `float` and `double`
by default, which cannot be used on device and conflict with Eigen's
workaround in CUDA/Complex.h. This can be prevented by defining
`_OVERRIDE_COMPLEX_SPECIALIZATION_` before including `<complex>`.
Added this define to the tests and to `Eigen/Core`, but this will
not work if the user includes `<complex>` before `<Eigen/Core>`.
ICC also seems to generate a duplicate `Map` symbol in
`PlainObjectBase`:
```
error: "Map" has already been declared in the current scope
static ConstMapType Map(const Scalar *data)
```
I tracked this down to `friend class Eigen::Map`. Putting the `friend`
statements at the bottom of the class seems to resolve this issue.
Fixes #2180
-rw-r--r-- | Eigen/Core | 7 | ||||
-rw-r--r-- | Eigen/src/Core/PlainObjectBase.h | 19 | ||||
-rw-r--r-- | Eigen/src/Core/arch/CUDA/Complex.h | 18 | ||||
-rw-r--r-- | Eigen/src/Core/util/Macros.h | 2 | ||||
-rw-r--r-- | test/main.h | 2 |
5 files changed, 35 insertions, 13 deletions
diff --git a/Eigen/Core b/Eigen/Core index 1a60dcba4..5921e15f9 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -40,6 +40,13 @@ #pragma GCC optimize ("-fno-ipa-cp-clone") #endif +// Prevent ICC from specializing std::complex operators that silently fail +// on device. This allows us to use our own device-compatible specializations +// instead. +#if defined(EIGEN_COMP_ICC) && defined(EIGEN_GPU_COMPILE_PHASE) \ + && !defined(_OVERRIDE_COMPLEX_SPECIALIZATION_) +#define _OVERRIDE_COMPLEX_SPECIALIZATION_ 1 +#endif #include <complex> // this include file manages BLAS and MKL related macros diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index ca5b5ee1d..e53ca1b16 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -118,16 +118,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type using Base::IsVectorAtCompileTime; using Base::Flags; - template<typename PlainObjectType, int MapOptions, typename StrideType> friend class Eigen::Map; - friend class Eigen::Map<Derived, Unaligned>; typedef Eigen::Map<Derived, Unaligned> MapType; - friend class Eigen::Map<const Derived, Unaligned>; typedef const Eigen::Map<const Derived, Unaligned> ConstMapType; -#if EIGEN_MAX_ALIGN_BYTES>0 - // for EIGEN_MAX_ALIGN_BYTES==0, AlignedMax==Unaligned, and many compilers generate warnings for friend-ing a class twice. - friend class Eigen::Map<Derived, AlignedMax>; - friend class Eigen::Map<const Derived, AlignedMax>; -#endif typedef Eigen::Map<Derived, AlignedMax> AlignedMapType; typedef const Eigen::Map<const Derived, AlignedMax> ConstAlignedMapType; template<typename StrideType> struct StridedMapType { typedef Eigen::Map<Derived, Unaligned, StrideType> type; }; @@ -989,6 +981,17 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type enum { IsPlainObjectBase = 1 }; #endif + public: + // These apparently need to be down here for nvcc+icc to prevent duplicate + // Map symbol. + template<typename PlainObjectType, int MapOptions, typename StrideType> friend class Eigen::Map; + friend class Eigen::Map<Derived, Unaligned>; + friend class Eigen::Map<const Derived, Unaligned>; +#if EIGEN_MAX_ALIGN_BYTES>0 + // for EIGEN_MAX_ALIGN_BYTES==0, AlignedMax==Unaligned, and many compilers generate warnings for friend-ing a class twice. + friend class Eigen::Map<Derived, AlignedMax>; + friend class Eigen::Map<const Derived, AlignedMax>; +#endif }; namespace internal { diff --git a/Eigen/src/Core/arch/CUDA/Complex.h b/Eigen/src/Core/arch/CUDA/Complex.h index caf3fe74b..b1618e567 100644 --- a/Eigen/src/Core/arch/CUDA/Complex.h +++ b/Eigen/src/Core/arch/CUDA/Complex.h @@ -12,9 +12,6 @@ #define EIGEN_COMPLEX_CUDA_H // clang-format off - -#if defined(EIGEN_CUDACC) && defined(EIGEN_GPU_COMPILE_PHASE) - // Many std::complex methods such as operator+, operator-, operator* and // operator/ are not constexpr. Due to this, GCC and older versions of clang do // not treat them as device functions and thus Eigen functors making use of @@ -22,6 +19,17 @@ // operators and functors for complex types when building for CUDA to enable // their use on-device. +#if defined(EIGEN_CUDACC) && defined(EIGEN_GPU_COMPILE_PHASE) + +// ICC already specializes std::complex<float> and std::complex<double> +// operators, preventing us from making them device functions here. +// This will lead to silent runtime errors if the operators are used on device. +// +// To allow std::complex operator use on device, define _OVERRIDE_COMPLEX_SPECIALIZATION_ +// prior to first inclusion of <complex>. This prevents ICC from adding +// its own specializations, so our custom ones below can be used instead. +#if !(defined(EIGEN_COMP_ICC) && defined(_USE_COMPLEX_SPECIALIZATION_)) + // Import Eigen's internal operator specializations. #define EIGEN_USING_STD_COMPLEX_OPERATORS \ using Eigen::complex_operator_detail::operator+; \ @@ -244,6 +252,8 @@ EIGEN_USING_STD_COMPLEX_OPERATORS } // namespace internal } // namespace Eigen -#endif +#endif // !(EIGEN_COMP_ICC && _USE_COMPLEX_SPECIALIZATION_) + +#endif // EIGEN_CUDACC && EIGEN_GPU_COMPILE_PHASE #endif // EIGEN_COMPLEX_CUDA_H diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 00f3690d1..bdc0de0ea 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -905,7 +905,7 @@ // but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline // but GCC is still doing fine with just inline. #ifndef EIGEN_STRONG_INLINE -#if EIGEN_COMP_MSVC || EIGEN_COMP_ICC +#if (EIGEN_COMP_MSVC || EIGEN_COMP_ICC) && !defined(EIGEN_GPUCC) #define EIGEN_STRONG_INLINE __forceinline #else #define EIGEN_STRONG_INLINE inline diff --git a/test/main.h b/test/main.h index cf061730f..3e80c9f7d 100644 --- a/test/main.h +++ b/test/main.h @@ -40,6 +40,8 @@ // definitions. #include <limits> #include <algorithm> +// Disable ICC's std::complex operator specializations so we can use our own. +#define _OVERRIDE_COMPLEX_SPECIALIZATION_ 1 #include <complex> #include <deque> #include <queue> |