diff options
-rw-r--r-- | Eigen/src/Core/util/StaticAssert.h | 3 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h | 9 | ||||
-rw-r--r-- | unsupported/test/CMakeLists.txt | 2 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_gpu.cu | 8 |
4 files changed, 15 insertions, 7 deletions
diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h index b2f95153e..67714e444 100644 --- a/Eigen/src/Core/util/StaticAssert.h +++ b/Eigen/src/Core/util/StaticAssert.h @@ -104,7 +104,8 @@ STORAGE_INDEX_MUST_MATCH=1, CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY=1, SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY=1, - INVALID_TEMPLATE_PARAMETER=1 + INVALID_TEMPLATE_PARAMETER=1, + GPU_TENSOR_CONTRACTION_DOES_NOT_SUPPORT_OUTPUT_KERNELS=1 }; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h index 056665749..5d19652e6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h @@ -1219,9 +1219,6 @@ template<typename Indices, typename LeftArgType, typename RightArgType, typename struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType>, GpuDevice> : public TensorContractionEvaluatorBase<TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType>, GpuDevice> > { - static_assert(std::is_same<OutputKernelType, const NoOpOutputKernel>::value, - "GPU tensor contraction does not support output kernels."); - typedef GpuDevice Device; typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType>, Device> Self; @@ -1274,7 +1271,11 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT typedef typename RightEvaluator::Dimensions RightDimensions; EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : - Base(op, device) {} + Base(op, device) + { + EIGEN_STATIC_ASSERT( (internal::is_same<OutputKernelType, const NoOpOutputKernel>::value), + GPU_TENSOR_CONTRACTION_DOES_NOT_SUPPORT_OUTPUT_KERNELS); + } // We need to redefine this method to make nvcc happy EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index cda658e0e..e8e1dc832 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -258,7 +258,7 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA) set(EIGEN_CUDA_RELAXED_CONSTEXPR "--relaxed-constexpr") endif() - if( (NOT EIGEN_TEST_CXX11) OR (CMAKE_VERSION VERSION_LESS 3.3)) + if(( (NOT EIGEN_TEST_CXX11) OR (CMAKE_VERSION VERSION_LESS 3.3)) AND EIGEN_TEST_CXX11) set(EIGEN_CUDA_CXX11_FLAG "-std=c++11") else() # otherwise the flag has already been added because of the above set(CMAKE_CXX_STANDARD 11) diff --git a/unsupported/test/cxx11_tensor_gpu.cu b/unsupported/test/cxx11_tensor_gpu.cu index 14fc0bd04..94625e6a3 100644 --- a/unsupported/test/cxx11_tensor_gpu.cu +++ b/unsupported/test/cxx11_tensor_gpu.cu @@ -17,6 +17,8 @@ #include <unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h> +#define EIGEN_GPU_TEST_C99_MATH EIGEN_HAS_CXX11 + using Eigen::Tensor; void test_gpu_nullary() { @@ -617,6 +619,7 @@ void test_gpu_convolution_3d() } +#if EIGEN_GPU_TEST_C99_MATH template <typename Scalar> void test_gpu_lgamma(const Scalar stddev) { @@ -655,6 +658,7 @@ void test_gpu_lgamma(const Scalar stddev) gpuFree(d_in); gpuFree(d_out); } +#endif template <typename Scalar> void test_gpu_digamma() @@ -986,6 +990,7 @@ void test_gpu_igammac() gpuFree(d_out); } +#if EIGEN_GPU_TEST_C99_MATH template <typename Scalar> void test_gpu_erf(const Scalar stddev) { @@ -1063,6 +1068,7 @@ void test_gpu_erfc(const Scalar stddev) gpuFree(d_in); gpuFree(d_out); } +#endif template <typename Scalar> void test_gpu_betainc() @@ -1494,7 +1500,7 @@ EIGEN_DECLARE_TEST(cxx11_tensor_gpu) CALL_SUBTEST_3(test_gpu_convolution_3d<RowMajor>()); #endif -#if __cplusplus > 199711L +#if EIGEN_GPU_TEST_C99_MATH // std::erf, std::erfc, and so on where only added in c++11. We use them // as a golden reference to validate the results produced by Eigen. Therefore // we can only run these tests if we use a c++11 compiler. |