aboutsummaryrefslogtreecommitdiffhomepage
path: root/test/CMakeLists.txt
diff options
context:
space:
mode:
authorGravatar Antonio Sanchez <cantonios@google.com>2020-12-22 22:49:06 -0800
committerGravatar Antonio Sanchez <cantonios@google.com>2020-12-22 23:25:23 -0800
commit070d303d56d46d2e018a58214da24ca629ea454f (patch)
tree3dfa72bf48ffdca0a67bd794596e4e452d50ed19 /test/CMakeLists.txt
parentfdf2ee62c5174441076fb64c9737d89bbe102759 (diff)
Add CUDA complex sqrt.
This is to support scalar `sqrt` of complex numbers `std::complex<T>` on device, requested by Tensorflow folks. Technically `std::complex` is not supported by NVCC on device (though it is by clang), so the default `sqrt(std::complex<T>)` function only works on the host. Here we create an overload to add back the functionality. Also modified the CMake file to add `--relaxed-constexpr` (or equivalent) flag for NVCC to allow calling constexpr functions from device functions, and added support for specifying compute architecture for NVCC (was already available for clang).
Diffstat (limited to 'test/CMakeLists.txt')
-rw-r--r--test/CMakeLists.txt11
1 files changed, 11 insertions, 0 deletions
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 8eda8d2f1..ce3782171 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -395,6 +395,12 @@ find_package(CUDA 5.0)
if(CUDA_FOUND)
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
+
+ set(EIGEN_CUDA_RELAXED_CONSTEXPR "--expt-relaxed-constexpr")
+ if (${CUDA_VERSION} STREQUAL "7.0")
+ set(EIGEN_CUDA_RELAXED_CONSTEXPR "--relaxed-constexpr")
+ endif()
+
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(CUDA_NVCC_FLAGS "-ccbin ${CMAKE_C_COMPILER}" CACHE STRING "nvcc flags" FORCE)
endif()
@@ -404,7 +410,12 @@ if(CUDA_FOUND)
foreach(GPU IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
string(APPEND CMAKE_CXX_FLAGS " --cuda-gpu-arch=sm_${GPU}")
endforeach()
+ else()
+ foreach(GPU IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
+ string(APPEND CUDA_NVCC_FLAGS " -gencode arch=compute_${GPU},code=sm_${GPU}")
+ endforeach()
endif()
+ string(APPEND CUDA_NVCC_FLAGS " ${EIGEN_CUDA_RELAXED_CONSTEXPR}")
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
ei_add_test(gpu_basic)