diff options
-rw-r--r-- | Eigen/src/Core/arch/CUDA/Complex.h | 299 | ||||
-rw-r--r-- | test/gpu_basic.cu | 112 | ||||
-rw-r--r-- | test/gpu_common.h | 1 |
3 files changed, 336 insertions, 76 deletions
diff --git a/Eigen/src/Core/arch/CUDA/Complex.h b/Eigen/src/Core/arch/CUDA/Complex.h index 6e77372b0..caf3fe74b 100644 --- a/Eigen/src/Core/arch/CUDA/Complex.h +++ b/Eigen/src/Core/arch/CUDA/Complex.h @@ -2,6 +2,7 @@ // for linear algebra. // // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// Copyright (C) 2021 C. Antonio Sanchez <cantonios@google.com> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -14,85 +15,231 @@ #if defined(EIGEN_CUDACC) && defined(EIGEN_GPU_COMPILE_PHASE) +// Many std::complex methods such as operator+, operator-, operator* and +// operator/ are not constexpr. Due to this, GCC and older versions of clang do +// not treat them as device functions and thus Eigen functors making use of +// these operators fail to compile. Here, we manually specialize these +// operators and functors for complex types when building for CUDA to enable +// their use on-device. + +// Import Eigen's internal operator specializations. +#define EIGEN_USING_STD_COMPLEX_OPERATORS \ + using Eigen::complex_operator_detail::operator+; \ + using Eigen::complex_operator_detail::operator-; \ + using Eigen::complex_operator_detail::operator*; \ + using Eigen::complex_operator_detail::operator/; \ + using Eigen::complex_operator_detail::operator+=; \ + using Eigen::complex_operator_detail::operator-=; \ + using Eigen::complex_operator_detail::operator*=; \ + using Eigen::complex_operator_detail::operator/=; \ + using Eigen::complex_operator_detail::operator==; \ + using Eigen::complex_operator_detail::operator!=; + namespace Eigen { -namespace internal { +// Specialized std::complex overloads. +namespace complex_operator_detail { + +template<typename T> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex<T> complex_multiply(const std::complex<T>& a, const std::complex<T>& b) { + const T a_real = numext::real(a); + const T a_imag = numext::imag(a); + const T b_real = numext::real(b); + const T b_imag = numext::imag(b); + return std::complex<T>( + a_real * b_real - a_imag * b_imag, + a_imag * b_real + a_real * b_imag); +} + +template<typename T> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex<T> complex_divide_fast(const std::complex<T>& a, const std::complex<T>& b) { + const T a_real = numext::real(a); + const T a_imag = numext::imag(a); + const T b_real = numext::real(b); + const T b_imag = numext::imag(b); + const T norm = T(1) / (b_real * b_real + b_imag * b_imag); + return std::complex<T>((a_real * b_real + a_imag * b_imag) * norm, + (a_imag * b_real - a_real * b_imag) * norm); +} + +template<typename T> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex<T> complex_divide_stable(const std::complex<T>& a, const std::complex<T>& b) { + const T b_real = numext::real(b); + const T b_imag = numext::imag(b); + // Guard against over/under-flow. + const T scale = T(1) / (numext::abs(b_real) + numext::abs(b_imag)); + const T a_real_scaled = numext::real(a) * scale; + const T a_imag_scaled = numext::imag(a) * scale; + const T b_real_scaled = b_real * scale; + const T b_imag_scaled = b_imag * scale; + + const T b_norm2_scaled = b_real_scaled * b_real_scaled + b_imag_scaled * b_imag_scaled; + return std::complex<T>( + (a_real_scaled * b_real_scaled + a_imag_scaled * b_imag_scaled) / b_norm2_scaled, + (a_imag_scaled * b_real_scaled - a_real_scaled * b_imag_scaled) / b_norm2_scaled); +} + +template<typename T> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex<T> complex_divide(const std::complex<T>& a, const std::complex<T>& b) { +#if EIGEN_FAST_MATH + return complex_divide_fast(a, b); +#else + return complex_divide_stable(a, b); +#endif +} + +// NOTE: We cannot specialize compound assignment operators with Scalar T, +// (i.e. operator@=(const T&), for @=+,-,*,/) +// since they are already specialized for float/double/long double within +// the standard <complex> header. We also do not specialize the stream +// operators. +#define EIGEN_CREATE_STD_COMPLEX_OPERATOR_SPECIALIZATIONS(T) \ + \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +std::complex<T> operator+(const std::complex<T>& a) { return a; } \ + \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +std::complex<T> operator-(const std::complex<T>& a) { \ + return std::complex<T>(-numext::real(a), -numext::imag(a)); \ +} \ + \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +std::complex<T> operator+(const std::complex<T>& a, const std::complex<T>& b) { \ + return std::complex<T>(numext::real(a) + numext::real(b), numext::imag(a) + numext::imag(b)); \ +} \ + \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +std::complex<T> operator+(const std::complex<T>& a, const T& b) { \ + return std::complex<T>(numext::real(a) + b, numext::imag(a)); \ +} \ + \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +std::complex<T> operator+(const T& a, const std::complex<T>& b) { \ + return std::complex<T>(a + numext::real(b), numext::imag(b)); \ +} \ + \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +std::complex<T> operator-(const std::complex<T>& a, const std::complex<T>& b) { \ + return std::complex<T>(numext::real(a) - numext::real(b), numext::imag(a) - numext::imag(b)); \ +} \ + \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +std::complex<T> operator-(const std::complex<T>& a, const T& b) { \ + return std::complex<T>(numext::real(a) - b, numext::imag(a)); \ +} \ + \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +std::complex<T> operator-(const T& a, const std::complex<T>& b) { \ + return std::complex<T>(a - numext::real(b), -numext::imag(b)); \ +} \ + \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +std::complex<T> operator*(const std::complex<T>& a, const std::complex<T>& b) { \ + return complex_multiply(a, b); \ +} \ + \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +std::complex<T> operator*(const std::complex<T>& a, const T& b) { \ + return std::complex<T>(numext::real(a) * b, numext::imag(a) * b); \ +} \ + \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +std::complex<T> operator*(const T& a, const std::complex<T>& b) { \ + return std::complex<T>(a * numext::real(b), a * numext::imag(b)); \ +} \ + \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +std::complex<T> operator/(const std::complex<T>& a, const std::complex<T>& b) { \ + return complex_divide(a, b); \ +} \ + \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +std::complex<T> operator/(const std::complex<T>& a, const T& b) { \ + return std::complex<T>(numext::real(a) / b, numext::imag(a) / b); \ +} \ + \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +std::complex<T> operator/(const T& a, const std::complex<T>& b) { \ + return complex_divide(std::complex<T>(a, 0), b); \ +} \ + \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +std::complex<T>& operator+=(std::complex<T>& a, const std::complex<T>& b) { \ + numext::real_ref(a) += numext::real(b); \ + numext::imag_ref(a) += numext::imag(b); \ + return a; \ +} \ + \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +std::complex<T>& operator-=(std::complex<T>& a, const std::complex<T>& b) { \ + numext::real_ref(a) -= numext::real(b); \ + numext::imag_ref(a) -= numext::imag(b); \ + return a; \ +} \ + \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +std::complex<T>& operator*=(std::complex<T>& a, const std::complex<T>& b) { \ + a = complex_multiply(a, b); \ + return a; \ +} \ + \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +std::complex<T>& operator/=(std::complex<T>& a, const std::complex<T>& b) { \ + a = complex_divide(a, b); \ + return a; \ +} \ + \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +bool operator==(const std::complex<T>& a, const std::complex<T>& b) { \ + return numext::real(a) == numext::real(b) && numext::imag(a) == numext::imag(b); \ +} \ + \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +bool operator==(const std::complex<T>& a, const T& b) { \ + return numext::real(a) == b && numext::imag(a) == 0; \ +} \ + \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +bool operator==(const T& a, const std::complex<T>& b) { \ + return a == numext::real(b) && 0 == numext::imag(b); \ +} \ + \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +bool operator!=(const std::complex<T>& a, const std::complex<T>& b) { \ + return !(a == b); \ +} \ + \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +bool operator!=(const std::complex<T>& a, const T& b) { \ + return !(a == b); \ +} \ + \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +bool operator!=(const T& a, const std::complex<T>& b) { \ + return !(a == b); \ +} + +// Do not specialize for long double, since that reduces to double on device. +EIGEN_CREATE_STD_COMPLEX_OPERATOR_SPECIALIZATIONS(float) +EIGEN_CREATE_STD_COMPLEX_OPERATOR_SPECIALIZATIONS(double) + +#undef EIGEN_CREATE_STD_COMPLEX_OPERATOR_SPECIALIZATIONS + + +} // namespace complex_operator_detail + +EIGEN_USING_STD_COMPLEX_OPERATORS + +namespace numext { +EIGEN_USING_STD_COMPLEX_OPERATORS +} // namespace numext -// Many std::complex methods such as operator+, operator-, operator* and -// operator/ are not constexpr. Due to this, clang does not treat them as device -// functions and thus Eigen functors making use of these operators fail to -// compile. Here, we manually specialize these functors for complex types when -// building for CUDA to avoid non-constexpr methods. - -// Sum -template<typename T> struct scalar_sum_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > { - typedef typename std::complex<T> result_type; - - EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const { - return std::complex<T>(numext::real(a) + numext::real(b), - numext::imag(a) + numext::imag(b)); - } -}; - -template<typename T> struct scalar_sum_op<std::complex<T>, std::complex<T> > : scalar_sum_op<const std::complex<T>, const std::complex<T> > {}; - - -// Difference -template<typename T> struct scalar_difference_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > { - typedef typename std::complex<T> result_type; - - EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const { - return std::complex<T>(numext::real(a) - numext::real(b), - numext::imag(a) - numext::imag(b)); - } -}; - -template<typename T> struct scalar_difference_op<std::complex<T>, std::complex<T> > : scalar_difference_op<const std::complex<T>, const std::complex<T> > {}; - - -// Product -template<typename T> struct scalar_product_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > { - enum { - Vectorizable = packet_traits<std::complex<T> >::HasMul - }; - typedef typename std::complex<T> result_type; - - EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const { - const T a_real = numext::real(a); - const T a_imag = numext::imag(a); - const T b_real = numext::real(b); - const T b_imag = numext::imag(b); - return std::complex<T>(a_real * b_real - a_imag * b_imag, - a_real * b_imag + a_imag * b_real); - } -}; - -template<typename T> struct scalar_product_op<std::complex<T>, std::complex<T> > : scalar_product_op<const std::complex<T>, const std::complex<T> > {}; - - -// Quotient -template<typename T> struct scalar_quotient_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > { - enum { - Vectorizable = packet_traits<std::complex<T> >::HasDiv - }; - typedef typename std::complex<T> result_type; - - EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const { - const T a_real = numext::real(a); - const T a_imag = numext::imag(a); - const T b_real = numext::real(b); - const T b_imag = numext::imag(b); - const T norm = T(1) / (b_real * b_real + b_imag * b_imag); - return std::complex<T>((a_real * b_real + a_imag * b_imag) * norm, - (a_imag * b_real - a_real * b_imag) * norm); - } -}; - -template<typename T> struct scalar_quotient_op<std::complex<T>, std::complex<T> > : scalar_quotient_op<const std::complex<T>, const std::complex<T> > {}; +namespace internal { +EIGEN_USING_STD_COMPLEX_OPERATORS } // namespace internal } // namespace Eigen diff --git a/test/gpu_basic.cu b/test/gpu_basic.cu index b82b94d9b..46e4a436f 100644 --- a/test/gpu_basic.cu +++ b/test/gpu_basic.cu @@ -107,6 +107,116 @@ struct complex_sqrt { }; template<typename T> +struct complex_operators { + EIGEN_DEVICE_FUNC + void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const + { + using namespace Eigen; + typedef typename T::Scalar ComplexType; + typedef typename T::Scalar::value_type ValueType; + const int num_scalar_operators = 24; + const int num_vector_operators = 23; // no unary + operator. + int out_idx = i * (num_scalar_operators + num_vector_operators * T::MaxSizeAtCompileTime); + + // Scalar operators. + const ComplexType a = in[i]; + const ComplexType b = in[i + 1]; + + out[out_idx++] = +a; + out[out_idx++] = -a; + + out[out_idx++] = a + b; + out[out_idx++] = a + numext::real(b); + out[out_idx++] = numext::real(a) + b; + out[out_idx++] = a - b; + out[out_idx++] = a - numext::real(b); + out[out_idx++] = numext::real(a) - b; + out[out_idx++] = a * b; + out[out_idx++] = a * numext::real(b); + out[out_idx++] = numext::real(a) * b; + out[out_idx++] = a / b; + out[out_idx++] = a / numext::real(b); + out[out_idx++] = numext::real(a) / b; + + out[out_idx] = a; out[out_idx++] += b; + out[out_idx] = a; out[out_idx++] -= b; + out[out_idx] = a; out[out_idx++] *= b; + out[out_idx] = a; out[out_idx++] /= b; + + const ComplexType true_value = ComplexType(ValueType(1), ValueType(0)); + const ComplexType false_value = ComplexType(ValueType(0), ValueType(0)); + out[out_idx++] = (a == b ? true_value : false_value); + out[out_idx++] = (a == numext::real(b) ? true_value : false_value); + out[out_idx++] = (numext::real(a) == b ? true_value : false_value); + out[out_idx++] = (a != b ? true_value : false_value); + out[out_idx++] = (a != numext::real(b) ? true_value : false_value); + out[out_idx++] = (numext::real(a) != b ? true_value : false_value); + + // Vector versions. + T x1(in + i); + T x2(in + i + 1); + const int res_size = T::MaxSizeAtCompileTime * num_scalar_operators; + const int size = T::MaxSizeAtCompileTime; + int block_idx = 0; + + Map<VectorX<ComplexType>> res(out + out_idx, res_size); + res.segment(block_idx, size) = -x1; + block_idx += size; + + res.segment(block_idx, size) = x1 + x2; + block_idx += size; + res.segment(block_idx, size) = x1 + x2.real(); + block_idx += size; + res.segment(block_idx, size) = x1.real() + x2; + block_idx += size; + res.segment(block_idx, size) = x1 - x2; + block_idx += size; + res.segment(block_idx, size) = x1 - x2.real(); + block_idx += size; + res.segment(block_idx, size) = x1.real() - x2; + block_idx += size; + res.segment(block_idx, size) = x1.array() * x2.array(); + block_idx += size; + res.segment(block_idx, size) = x1.array() * x2.real().array(); + block_idx += size; + res.segment(block_idx, size) = x1.real().array() * x2.array(); + block_idx += size; + res.segment(block_idx, size) = x1.array() / x2.array(); + block_idx += size; + res.segment(block_idx, size) = x1.array() / x2.real().array(); + block_idx += size; + res.segment(block_idx, size) = x1.real().array() / x2.array(); + block_idx += size; + + res.segment(block_idx, size) = x1; res.segment(block_idx, size) += x2; + block_idx += size; + res.segment(block_idx, size) = x1; res.segment(block_idx, size) -= x2; + block_idx += size; + res.segment(block_idx, size) = x1; res.segment(block_idx, size).array() *= x2.array(); + block_idx += size; + res.segment(block_idx, size) = x1; res.segment(block_idx, size).array() /= x2.array(); + block_idx += size; + + // Equality comparisons currently not functional on device + // (std::equal_to<T> is host-only). + // const T true_vector = T::Constant(true_value); + // const T false_vector = T::Constant(false_value); + // res.segment(block_idx, size) = (x1 == x2 ? true_vector : false_vector); + // block_idx += size; + // res.segment(block_idx, size) = (x1 == x2.real() ? true_vector : false_vector); + // block_idx += size; + // res.segment(block_idx, size) = (x1.real() == x2 ? true_vector : false_vector); + // block_idx += size; + // res.segment(block_idx, size) = (x1 != x2 ? true_vector : false_vector); + // block_idx += size; + // res.segment(block_idx, size) = (x1 != x2.real() ? true_vector : false_vector); + // block_idx += size; + // res.segment(block_idx, size) = (x1.real() != x2 ? true_vector : false_vector); + // block_idx += size; + } +}; + +template<typename T> struct replicate { EIGEN_DEVICE_FUNC void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const @@ -297,6 +407,8 @@ EIGEN_DECLARE_TEST(gpu_basic) CALL_SUBTEST( run_and_compare_to_gpu(eigenvalues_direct<Matrix3f>(), nthreads, in, out) ); CALL_SUBTEST( run_and_compare_to_gpu(eigenvalues_direct<Matrix2f>(), nthreads, in, out) ); + // Test std::complex. + CALL_SUBTEST( run_and_compare_to_gpu(complex_operators<Vector3cf>(), nthreads, cfin, cfout) ); CALL_SUBTEST( test_with_infs_nans(complex_sqrt<Vector3cf>(), nthreads, cfin, cfout) ); #if defined(__NVCC__) diff --git a/test/gpu_common.h b/test/gpu_common.h index fe0485e98..c37eaa13f 100644 --- a/test/gpu_common.h +++ b/test/gpu_common.h @@ -117,6 +117,7 @@ struct compile_time_device_info { void operator()(int i, const int* /*in*/, int* info) const { if (i == 0) { + EIGEN_UNUSED_VARIABLE(info) #if defined(__CUDA_ARCH__) info[0] = int(__CUDA_ARCH__ +0); #endif |