diff options
-rw-r--r-- | Eigen/src/Core/util/Macros.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/util/Memory.h | 2 | ||||
-rw-r--r-- | Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h | 7 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h | 3 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h | 10 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_argmax_gpu.cu | 2 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_cast_float16_gpu.cu | 2 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_contract_gpu.cu | 2 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_device.cu | 2 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_gpu.cu | 2 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_of_float16_gpu.cu | 2 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_random_gpu.cu | 2 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_reduction_gpu.cu | 2 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_scan_gpu.cu | 2 |
14 files changed, 24 insertions, 22 deletions
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index fc4c0815c..adf25ee9b 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -395,8 +395,10 @@ // Means the compiler is HIPCC (analogous to EIGEN_CUDACC, but for HIP) #define EIGEN_HIPCC __HIPCC__ - // We need hip_common.h here because __HIP_DEVICE_COMPILE__ is defined in this header. - #include <hip/hip_common.h> + // We need to include hip_runtime.h here because it pulls in + // ++ hip_common.h which contains the define for __HIP_DEVICE_COMPILE__ + // ++ host_defines.h which contains the defines for the __host__ and __device__ macros + #include <hip/hip_runtime.h> #if defined(__HIP_DEVICE_COMPILE__) // analogous to EIGEN_CUDA_ARCH, but for HIP diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 956f64d1d..8dbd4d93b 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -580,7 +580,7 @@ template<typename T> struct smart_memmove_helper<T,false> { // you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA // to the appropriate stack allocation function -#if ! defined EIGEN_ALLOCA && ! defined EIGEN_CUDA_ARCH +#if ! defined EIGEN_ALLOCA && ! defined EIGEN_GPU_COMPILE_PHASE #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca) #define EIGEN_ALLOCA alloca #elif EIGEN_COMP_MSVC diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h index fbc1ee2f6..2d1a82c57 100644 --- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h @@ -610,6 +610,7 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3 static inline bool extract_kernel(MatrixType& mat, Ref<VectorType> res, Ref<VectorType> representative) { EIGEN_USING_STD_MATH(abs); + EIGEN_USING_STD_MATH(sqrt); Index i0; // Find non-zero column i0 (by construction, there must exist a non zero coefficient on the diagonal): mat.diagonal().cwiseAbs().maxCoeff(&i0); @@ -620,8 +621,8 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3 VectorType c0, c1; n0 = (c0 = representative.cross(mat.col((i0+1)%3))).squaredNorm(); n1 = (c1 = representative.cross(mat.col((i0+2)%3))).squaredNorm(); - if(n0>n1) res = c0/std::sqrt(n0); - else res = c1/std::sqrt(n1); + if(n0>n1) res = c0/sqrt(n0); + else res = c1/sqrt(n1); return true; } @@ -723,7 +724,7 @@ struct direct_selfadjoint_eigenvalues<SolverType,2,false> EIGEN_DEVICE_FUNC static inline void computeRoots(const MatrixType& m, VectorType& roots) { - using std::sqrt; + EIGEN_USING_STD_MATH(sqrt); const Scalar t0 = Scalar(0.5) * sqrt( numext::abs2(m(0,0)-m(1,1)) + Scalar(4)*numext::abs2(m(1,0))); const Scalar t1 = Scalar(0.5) * (m(0,0) + m(1,1)); roots(0) = t1 - t0; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index 0fbffa34c..c7c443a59 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -449,8 +449,7 @@ struct TensorContractionEvaluatorBase // tensor dimensions (i, j) into the original tensor dimensions. // TODO(ezhulenev): Add parameters required to infer output tensor index for // more complex contractions than 2x2 on internal dimension. - m_tensor_contraction_params = { - /**swapped_arguments=*/static_cast<int>(Layout) == RowMajor}; + m_tensor_contraction_params.swapped_arguments = static_cast<int>(Layout) == RowMajor; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h index a4f92ee44..b9956cd43 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h @@ -1215,16 +1215,16 @@ EigenFloatContractionKernel16x16(const LhsMapper lhs, const RhsMapper rhs, } -template<typename Indices, typename LeftArgType, typename RightArgType> -struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, GpuDevice> : - public TensorContractionEvaluatorBase<TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, GpuDevice> > { +template<typename Indices, typename LeftArgType, typename RightArgType, typename OutputKernelType> +struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType>, GpuDevice> : + public TensorContractionEvaluatorBase<TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType>, GpuDevice> > { typedef GpuDevice Device; - typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> Self; + typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType>, Device> Self; typedef TensorContractionEvaluatorBase<Self> Base; - typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType; + typedef TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType> XprType; typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; diff --git a/unsupported/test/cxx11_tensor_argmax_gpu.cu b/unsupported/test/cxx11_tensor_argmax_gpu.cu index f6c3a9908..79f4066e9 100644 --- a/unsupported/test/cxx11_tensor_argmax_gpu.cu +++ b/unsupported/test/cxx11_tensor_argmax_gpu.cu @@ -242,7 +242,7 @@ void test_gpu_argmin_dim() } } -void test_cxx11_tensor_gpu() +EIGEN_DECLARE_TEST(cxx11_tensor_argmax_gpu) { CALL_SUBTEST_1(test_gpu_simple_argmax<RowMajor>()); CALL_SUBTEST_1(test_gpu_simple_argmax<ColMajor>()); diff --git a/unsupported/test/cxx11_tensor_cast_float16_gpu.cu b/unsupported/test/cxx11_tensor_cast_float16_gpu.cu index 0a37a555c..97923d15f 100644 --- a/unsupported/test/cxx11_tensor_cast_float16_gpu.cu +++ b/unsupported/test/cxx11_tensor_cast_float16_gpu.cu @@ -72,7 +72,7 @@ void test_fallback_conversion() { } -void test_cxx11_tensor_cast_float16_gpu() +EIGEN_DECLARE_TEST(cxx11_tensor_cast_float16_gpu) { CALL_SUBTEST(test_gpu_conversion()); CALL_SUBTEST(test_fallback_conversion()); diff --git a/unsupported/test/cxx11_tensor_contract_gpu.cu b/unsupported/test/cxx11_tensor_contract_gpu.cu index cb1416478..575bdc1f9 100644 --- a/unsupported/test/cxx11_tensor_contract_gpu.cu +++ b/unsupported/test/cxx11_tensor_contract_gpu.cu @@ -193,7 +193,7 @@ void test_gpu_contraction_sizes() { } } -void test_cxx11_tensor_gpu() +EIGEN_DECLARE_TEST(cxx11_tensor_contract_gpu) { CALL_SUBTEST_1(test_gpu_contraction<ColMajor>(128, 128, 128)); CALL_SUBTEST_1(test_gpu_contraction<RowMajor>(128, 128, 128)); diff --git a/unsupported/test/cxx11_tensor_device.cu b/unsupported/test/cxx11_tensor_device.cu index cd9ba3ecd..c9f78d2d3 100644 --- a/unsupported/test/cxx11_tensor_device.cu +++ b/unsupported/test/cxx11_tensor_device.cu @@ -389,7 +389,7 @@ void test_gpu() { } -void test_cxx11_tensor_device() +EIGEN_DECLARE_TEST(cxx11_tensor_device) { CALL_SUBTEST_1(test_cpu()); CALL_SUBTEST_2(test_gpu()); diff --git a/unsupported/test/cxx11_tensor_gpu.cu b/unsupported/test/cxx11_tensor_gpu.cu index faaac73cf..14fc0bd04 100644 --- a/unsupported/test/cxx11_tensor_gpu.cu +++ b/unsupported/test/cxx11_tensor_gpu.cu @@ -1472,7 +1472,7 @@ void test_gpu_gamma_sample_der_alpha() gpuFree(d_out); } -void test_cxx11_tensor_gpu() +EIGEN_DECLARE_TEST(cxx11_tensor_gpu) { CALL_SUBTEST_1(test_gpu_nullary()); CALL_SUBTEST_1(test_gpu_elementwise_small()); diff --git a/unsupported/test/cxx11_tensor_of_float16_gpu.cu b/unsupported/test/cxx11_tensor_of_float16_gpu.cu index 1f1ec26c2..4d74e6138 100644 --- a/unsupported/test/cxx11_tensor_of_float16_gpu.cu +++ b/unsupported/test/cxx11_tensor_of_float16_gpu.cu @@ -479,7 +479,7 @@ void test_gpu_forced_evals() { #endif -void test_cxx11_tensor_of_float16_gpu() +EIGEN_DECLARE_TEST(cxx11_tensor_of_float16_gpu) { CALL_SUBTEST_1(test_gpu_numext<void>()); diff --git a/unsupported/test/cxx11_tensor_random_gpu.cu b/unsupported/test/cxx11_tensor_random_gpu.cu index 262182d30..090986ebc 100644 --- a/unsupported/test/cxx11_tensor_random_gpu.cu +++ b/unsupported/test/cxx11_tensor_random_gpu.cu @@ -78,7 +78,7 @@ static void test_complex() } -void test_cxx11_tensor_random_gpu() +EIGEN_DECLARE_TEST(cxx11_tensor_random_gpu) { CALL_SUBTEST(test_gpu_random_uniform()); CALL_SUBTEST(test_gpu_random_normal()); diff --git a/unsupported/test/cxx11_tensor_reduction_gpu.cu b/unsupported/test/cxx11_tensor_reduction_gpu.cu index 7b8ac2309..122ac946b 100644 --- a/unsupported/test/cxx11_tensor_reduction_gpu.cu +++ b/unsupported/test/cxx11_tensor_reduction_gpu.cu @@ -134,7 +134,7 @@ static void test_last_dim_reductions() { } -void test_cxx11_tensor_reduction_gpu() { +EIGEN_DECLARE_TEST(cxx11_tensor_reduction_gpu) { CALL_SUBTEST_1((test_full_reductions<float, ColMajor>())); CALL_SUBTEST_1((test_full_reductions<double, ColMajor>())); CALL_SUBTEST_2((test_full_reductions<float, RowMajor>())); diff --git a/unsupported/test/cxx11_tensor_scan_gpu.cu b/unsupported/test/cxx11_tensor_scan_gpu.cu index f3e773db5..770a144f1 100644 --- a/unsupported/test/cxx11_tensor_scan_gpu.cu +++ b/unsupported/test/cxx11_tensor_scan_gpu.cu @@ -71,7 +71,7 @@ void test_gpu_cumsum(int m_size, int k_size, int n_size) } -void test_cxx11_tensor_scan_gpu() +EIGEN_DECLARE_TEST(cxx11_tensor_scan_gpu) { CALL_SUBTEST_1(test_gpu_cumsum<ColMajor>(128, 128, 128)); CALL_SUBTEST_2(test_gpu_cumsum<RowMajor>(128, 128, 128)); |