From 070d303d56d46d2e018a58214da24ca629ea454f Mon Sep 17 00:00:00 2001 From: Antonio Sanchez Date: Tue, 22 Dec 2020 22:49:06 -0800 Subject: Add CUDA complex sqrt. This is to support scalar `sqrt` of complex numbers `std::complex` on device, requested by Tensorflow folks. Technically `std::complex` is not supported by NVCC on device (though it is by clang), so the default `sqrt(std::complex)` function only works on the host. Here we create an overload to add back the functionality. Also modified the CMake file to add `--relaxed-constexpr` (or equivalent) flag for NVCC to allow calling constexpr functions from device functions, and added support for specifying compute architecture for NVCC (was already available for clang). --- test/CMakeLists.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'test/CMakeLists.txt') diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8eda8d2f1..ce3782171 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -395,6 +395,12 @@ find_package(CUDA 5.0) if(CUDA_FOUND) set(CUDA_PROPAGATE_HOST_FLAGS OFF) + + set(EIGEN_CUDA_RELAXED_CONSTEXPR "--expt-relaxed-constexpr") + if (${CUDA_VERSION} STREQUAL "7.0") + set(EIGEN_CUDA_RELAXED_CONSTEXPR "--relaxed-constexpr") + endif() + if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") set(CUDA_NVCC_FLAGS "-ccbin ${CMAKE_C_COMPILER}" CACHE STRING "nvcc flags" FORCE) endif() @@ -404,7 +410,12 @@ if(CUDA_FOUND) foreach(GPU IN LISTS EIGEN_CUDA_COMPUTE_ARCH) string(APPEND CMAKE_CXX_FLAGS " --cuda-gpu-arch=sm_${GPU}") endforeach() + else() + foreach(GPU IN LISTS EIGEN_CUDA_COMPUTE_ARCH) + string(APPEND CUDA_NVCC_FLAGS " -gencode arch=compute_${GPU},code=sm_${GPU}") + endforeach() endif() + string(APPEND CUDA_NVCC_FLAGS " ${EIGEN_CUDA_RELAXED_CONSTEXPR}") set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu") ei_add_test(gpu_basic) -- cgit v1.2.3