aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/CUDA
diff options
context:
space:
mode:
authorGravatar Antonio Sanchez <cantonios@google.com>2021-01-06 09:41:15 -0800
committerGravatar Antonio Sánchez <cantonios@google.com>2021-01-22 18:19:19 +0000
commitf19bcffee6b8018ca101ceb370e6e550a940289f (patch)
tree36447572f9f35914470c66811e613c20bc4e044e /Eigen/src/Core/arch/CUDA
parent65e2169c4521660d30f4d90df61da5f3dd9f45bd (diff)
Specialize std::complex operators for use on GPU device.
NVCC and older versions of clang do not fully support `std::complex` on device, leading to either compile errors (Cannot call `__host__` function) or worse, runtime errors (Illegal instruction). For most functions, we can implement specialized `numext` versions. Here we specialize the standard operators (with the exception of stream operators and member function operators with a scalar that are already specialized in `<complex>`) so they can be used in device code as well. To import these operators into the current scope, use `EIGEN_USING_STD_COMPLEX_OPERATORS`. By default, these are imported into the `Eigen`, `Eigen:internal`, and `Eigen::numext` namespaces. This allow us to remove specializations of the sum/difference/product/quotient ops, and allow us to treat complex numbers like most other scalars (e.g. in tests).
Diffstat (limited to 'Eigen/src/Core/arch/CUDA')
-rw-r--r--Eigen/src/Core/arch/CUDA/Complex.h299
1 files changed, 223 insertions, 76 deletions
diff --git a/Eigen/src/Core/arch/CUDA/Complex.h b/Eigen/src/Core/arch/CUDA/Complex.h
index 6e77372b0..caf3fe74b 100644
--- a/Eigen/src/Core/arch/CUDA/Complex.h
+++ b/Eigen/src/Core/arch/CUDA/Complex.h
@@ -2,6 +2,7 @@
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+// Copyright (C) 2021 C. Antonio Sanchez <cantonios@google.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -14,85 +15,231 @@
#if defined(EIGEN_CUDACC) && defined(EIGEN_GPU_COMPILE_PHASE)
+// Many std::complex methods such as operator+, operator-, operator* and
+// operator/ are not constexpr. Due to this, GCC and older versions of clang do
+// not treat them as device functions and thus Eigen functors making use of
+// these operators fail to compile. Here, we manually specialize these
+// operators and functors for complex types when building for CUDA to enable
+// their use on-device.
+
+// Import Eigen's internal operator specializations.
+#define EIGEN_USING_STD_COMPLEX_OPERATORS \
+ using Eigen::complex_operator_detail::operator+; \
+ using Eigen::complex_operator_detail::operator-; \
+ using Eigen::complex_operator_detail::operator*; \
+ using Eigen::complex_operator_detail::operator/; \
+ using Eigen::complex_operator_detail::operator+=; \
+ using Eigen::complex_operator_detail::operator-=; \
+ using Eigen::complex_operator_detail::operator*=; \
+ using Eigen::complex_operator_detail::operator/=; \
+ using Eigen::complex_operator_detail::operator==; \
+ using Eigen::complex_operator_detail::operator!=;
+
namespace Eigen {
-namespace internal {
+// Specialized std::complex overloads.
+namespace complex_operator_detail {
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+std::complex<T> complex_multiply(const std::complex<T>& a, const std::complex<T>& b) {
+ const T a_real = numext::real(a);
+ const T a_imag = numext::imag(a);
+ const T b_real = numext::real(b);
+ const T b_imag = numext::imag(b);
+ return std::complex<T>(
+ a_real * b_real - a_imag * b_imag,
+ a_imag * b_real + a_real * b_imag);
+}
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+std::complex<T> complex_divide_fast(const std::complex<T>& a, const std::complex<T>& b) {
+ const T a_real = numext::real(a);
+ const T a_imag = numext::imag(a);
+ const T b_real = numext::real(b);
+ const T b_imag = numext::imag(b);
+ const T norm = T(1) / (b_real * b_real + b_imag * b_imag);
+ return std::complex<T>((a_real * b_real + a_imag * b_imag) * norm,
+ (a_imag * b_real - a_real * b_imag) * norm);
+}
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+std::complex<T> complex_divide_stable(const std::complex<T>& a, const std::complex<T>& b) {
+ const T b_real = numext::real(b);
+ const T b_imag = numext::imag(b);
+ // Guard against over/under-flow.
+ const T scale = T(1) / (numext::abs(b_real) + numext::abs(b_imag));
+ const T a_real_scaled = numext::real(a) * scale;
+ const T a_imag_scaled = numext::imag(a) * scale;
+ const T b_real_scaled = b_real * scale;
+ const T b_imag_scaled = b_imag * scale;
+
+ const T b_norm2_scaled = b_real_scaled * b_real_scaled + b_imag_scaled * b_imag_scaled;
+ return std::complex<T>(
+ (a_real_scaled * b_real_scaled + a_imag_scaled * b_imag_scaled) / b_norm2_scaled,
+ (a_imag_scaled * b_real_scaled - a_real_scaled * b_imag_scaled) / b_norm2_scaled);
+}
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+std::complex<T> complex_divide(const std::complex<T>& a, const std::complex<T>& b) {
+#if EIGEN_FAST_MATH
+ return complex_divide_fast(a, b);
+#else
+ return complex_divide_stable(a, b);
+#endif
+}
+
+// NOTE: We cannot specialize compound assignment operators with Scalar T,
+// (i.e. operator@=(const T&), for @=+,-,*,/)
+// since they are already specialized for float/double/long double within
+// the standard <complex> header. We also do not specialize the stream
+// operators.
+#define EIGEN_CREATE_STD_COMPLEX_OPERATOR_SPECIALIZATIONS(T) \
+ \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+std::complex<T> operator+(const std::complex<T>& a) { return a; } \
+ \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+std::complex<T> operator-(const std::complex<T>& a) { \
+ return std::complex<T>(-numext::real(a), -numext::imag(a)); \
+} \
+ \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+std::complex<T> operator+(const std::complex<T>& a, const std::complex<T>& b) { \
+ return std::complex<T>(numext::real(a) + numext::real(b), numext::imag(a) + numext::imag(b)); \
+} \
+ \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+std::complex<T> operator+(const std::complex<T>& a, const T& b) { \
+ return std::complex<T>(numext::real(a) + b, numext::imag(a)); \
+} \
+ \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+std::complex<T> operator+(const T& a, const std::complex<T>& b) { \
+ return std::complex<T>(a + numext::real(b), numext::imag(b)); \
+} \
+ \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+std::complex<T> operator-(const std::complex<T>& a, const std::complex<T>& b) { \
+ return std::complex<T>(numext::real(a) - numext::real(b), numext::imag(a) - numext::imag(b)); \
+} \
+ \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+std::complex<T> operator-(const std::complex<T>& a, const T& b) { \
+ return std::complex<T>(numext::real(a) - b, numext::imag(a)); \
+} \
+ \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+std::complex<T> operator-(const T& a, const std::complex<T>& b) { \
+ return std::complex<T>(a - numext::real(b), -numext::imag(b)); \
+} \
+ \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+std::complex<T> operator*(const std::complex<T>& a, const std::complex<T>& b) { \
+ return complex_multiply(a, b); \
+} \
+ \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+std::complex<T> operator*(const std::complex<T>& a, const T& b) { \
+ return std::complex<T>(numext::real(a) * b, numext::imag(a) * b); \
+} \
+ \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+std::complex<T> operator*(const T& a, const std::complex<T>& b) { \
+ return std::complex<T>(a * numext::real(b), a * numext::imag(b)); \
+} \
+ \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+std::complex<T> operator/(const std::complex<T>& a, const std::complex<T>& b) { \
+ return complex_divide(a, b); \
+} \
+ \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+std::complex<T> operator/(const std::complex<T>& a, const T& b) { \
+ return std::complex<T>(numext::real(a) / b, numext::imag(a) / b); \
+} \
+ \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+std::complex<T> operator/(const T& a, const std::complex<T>& b) { \
+ return complex_divide(std::complex<T>(a, 0), b); \
+} \
+ \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+std::complex<T>& operator+=(std::complex<T>& a, const std::complex<T>& b) { \
+ numext::real_ref(a) += numext::real(b); \
+ numext::imag_ref(a) += numext::imag(b); \
+ return a; \
+} \
+ \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+std::complex<T>& operator-=(std::complex<T>& a, const std::complex<T>& b) { \
+ numext::real_ref(a) -= numext::real(b); \
+ numext::imag_ref(a) -= numext::imag(b); \
+ return a; \
+} \
+ \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+std::complex<T>& operator*=(std::complex<T>& a, const std::complex<T>& b) { \
+ a = complex_multiply(a, b); \
+ return a; \
+} \
+ \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+std::complex<T>& operator/=(std::complex<T>& a, const std::complex<T>& b) { \
+ a = complex_divide(a, b); \
+ return a; \
+} \
+ \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+bool operator==(const std::complex<T>& a, const std::complex<T>& b) { \
+ return numext::real(a) == numext::real(b) && numext::imag(a) == numext::imag(b); \
+} \
+ \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+bool operator==(const std::complex<T>& a, const T& b) { \
+ return numext::real(a) == b && numext::imag(a) == 0; \
+} \
+ \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+bool operator==(const T& a, const std::complex<T>& b) { \
+ return a == numext::real(b) && 0 == numext::imag(b); \
+} \
+ \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+bool operator!=(const std::complex<T>& a, const std::complex<T>& b) { \
+ return !(a == b); \
+} \
+ \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+bool operator!=(const std::complex<T>& a, const T& b) { \
+ return !(a == b); \
+} \
+ \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+bool operator!=(const T& a, const std::complex<T>& b) { \
+ return !(a == b); \
+}
+
+// Do not specialize for long double, since that reduces to double on device.
+EIGEN_CREATE_STD_COMPLEX_OPERATOR_SPECIALIZATIONS(float)
+EIGEN_CREATE_STD_COMPLEX_OPERATOR_SPECIALIZATIONS(double)
+
+#undef EIGEN_CREATE_STD_COMPLEX_OPERATOR_SPECIALIZATIONS
+
+
+} // namespace complex_operator_detail
+
+EIGEN_USING_STD_COMPLEX_OPERATORS
+
+namespace numext {
+EIGEN_USING_STD_COMPLEX_OPERATORS
+} // namespace numext
-// Many std::complex methods such as operator+, operator-, operator* and
-// operator/ are not constexpr. Due to this, clang does not treat them as device
-// functions and thus Eigen functors making use of these operators fail to
-// compile. Here, we manually specialize these functors for complex types when
-// building for CUDA to avoid non-constexpr methods.
-
-// Sum
-template<typename T> struct scalar_sum_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
- typedef typename std::complex<T> result_type;
-
- EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
- return std::complex<T>(numext::real(a) + numext::real(b),
- numext::imag(a) + numext::imag(b));
- }
-};
-
-template<typename T> struct scalar_sum_op<std::complex<T>, std::complex<T> > : scalar_sum_op<const std::complex<T>, const std::complex<T> > {};
-
-
-// Difference
-template<typename T> struct scalar_difference_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
- typedef typename std::complex<T> result_type;
-
- EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
- return std::complex<T>(numext::real(a) - numext::real(b),
- numext::imag(a) - numext::imag(b));
- }
-};
-
-template<typename T> struct scalar_difference_op<std::complex<T>, std::complex<T> > : scalar_difference_op<const std::complex<T>, const std::complex<T> > {};
-
-
-// Product
-template<typename T> struct scalar_product_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
- enum {
- Vectorizable = packet_traits<std::complex<T> >::HasMul
- };
- typedef typename std::complex<T> result_type;
-
- EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
- const T a_real = numext::real(a);
- const T a_imag = numext::imag(a);
- const T b_real = numext::real(b);
- const T b_imag = numext::imag(b);
- return std::complex<T>(a_real * b_real - a_imag * b_imag,
- a_real * b_imag + a_imag * b_real);
- }
-};
-
-template<typename T> struct scalar_product_op<std::complex<T>, std::complex<T> > : scalar_product_op<const std::complex<T>, const std::complex<T> > {};
-
-
-// Quotient
-template<typename T> struct scalar_quotient_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
- enum {
- Vectorizable = packet_traits<std::complex<T> >::HasDiv
- };
- typedef typename std::complex<T> result_type;
-
- EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
- const T a_real = numext::real(a);
- const T a_imag = numext::imag(a);
- const T b_real = numext::real(b);
- const T b_imag = numext::imag(b);
- const T norm = T(1) / (b_real * b_real + b_imag * b_imag);
- return std::complex<T>((a_real * b_real + a_imag * b_imag) * norm,
- (a_imag * b_real - a_real * b_imag) * norm);
- }
-};
-
-template<typename T> struct scalar_quotient_op<std::complex<T>, std::complex<T> > : scalar_quotient_op<const std::complex<T>, const std::complex<T> > {};
+namespace internal {
+EIGEN_USING_STD_COMPLEX_OPERATORS
} // namespace internal
} // namespace Eigen