diff options
author | Deven Desai <deven.desai.amd@gmail.com> | 2018-07-11 10:39:54 -0400 |
---|---|---|
committer | Deven Desai <deven.desai.amd@gmail.com> | 2018-07-11 10:39:54 -0400 |
commit | 876f392c396318f33454168db36ed54308e54e0d (patch) | |
tree | a727bc91873b5c0aeec05312176a0f39e2cb64d5 /test/gpu_basic.cu | |
parent | 1fe0b749042320501c59378f2860d9322b0c6e19 (diff) |
Updates corresponding to the latest round of PR feedback
The major changes are
1. Moving CUDA/PacketMath.h to GPU/PacketMath.h
2. Moving CUDA/MathFunctions.h to GPU/MathFunction.h
3. Moving CUDA/CudaSpecialFunctions.h to GPU/GpuSpecialFunctions.h
The above three changes effectively enable the Eigen "Packet" layer for the HIP platform
4. Merging the "hip_basic" and "cuda_basic" unit tests into one ("gpu_basic")
5. Updating the "EIGEN_DEVICE_FUNC" marking in some places
The change has been tested on the HIP and CUDA platforms.
Diffstat (limited to 'test/gpu_basic.cu')
-rw-r--r-- | test/gpu_basic.cu | 59 |
1 files changed, 34 insertions, 25 deletions
diff --git a/test/gpu_basic.cu b/test/gpu_basic.cu index 33e5fd119..897834dff 100644 --- a/test/gpu_basic.cu +++ b/test/gpu_basic.cu @@ -15,13 +15,11 @@ #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cuda_basic +#define EIGEN_TEST_FUNC gpu_basic #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int -#include <math_constants.h> -#include <cuda.h> #include "main.h" -#include "cuda_common.h" +#include "gpu_common.h" // Check that dense modules can be properly parsed by nvcc #include <Eigen/Dense> @@ -164,40 +162,51 @@ struct matrix_inverse { } }; -void test_cuda_basic() +void test_gpu_basic() { - ei_test_init_cuda(); + ei_test_init_gpu(); int nthreads = 100; Eigen::VectorXf in, out; - #ifndef __CUDA_ARCH__ + #if !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__) int data_size = nthreads * 512; in.setRandom(data_size); out.setRandom(data_size); #endif - CALL_SUBTEST( run_and_compare_to_cuda(coeff_wise<Vector3f>(), nthreads, in, out) ); - CALL_SUBTEST( run_and_compare_to_cuda(coeff_wise<Array44f>(), nthreads, in, out) ); - - CALL_SUBTEST( run_and_compare_to_cuda(replicate<Array4f>(), nthreads, in, out) ); - CALL_SUBTEST( run_and_compare_to_cuda(replicate<Array33f>(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_gpu(coeff_wise<Vector3f>(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_gpu(coeff_wise<Array44f>(), nthreads, in, out) ); + +#if !defined(EIGEN_USE_HIP) + // FIXME + // These subtests result in a compile failure on the HIP platform + // + // eigen-upstream/Eigen/src/Core/Replicate.h:61:65: error: + // base class 'internal::dense_xpr_base<Replicate<Array<float, 4, 1, 0, 4, 1>, -1, -1> >::type' + // (aka 'ArrayBase<Eigen::Replicate<Eigen::Array<float, 4, 1, 0, 4, 1>, -1, -1> >') has protected default constructor + CALL_SUBTEST( run_and_compare_to_gpu(replicate<Array4f>(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_gpu(replicate<Array33f>(), nthreads, in, out) ); +#endif - CALL_SUBTEST( run_and_compare_to_cuda(redux<Array4f>(), nthreads, in, out) ); - CALL_SUBTEST( run_and_compare_to_cuda(redux<Matrix3f>(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_gpu(redux<Array4f>(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_gpu(redux<Matrix3f>(), nthreads, in, out) ); - CALL_SUBTEST( run_and_compare_to_cuda(prod_test<Matrix3f,Matrix3f>(), nthreads, in, out) ); - CALL_SUBTEST( run_and_compare_to_cuda(prod_test<Matrix4f,Vector4f>(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_gpu(prod_test<Matrix3f,Matrix3f>(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_gpu(prod_test<Matrix4f,Vector4f>(), nthreads, in, out) ); - CALL_SUBTEST( run_and_compare_to_cuda(diagonal<Matrix3f,Vector3f>(), nthreads, in, out) ); - CALL_SUBTEST( run_and_compare_to_cuda(diagonal<Matrix4f,Vector4f>(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_gpu(diagonal<Matrix3f,Vector3f>(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_gpu(diagonal<Matrix4f,Vector4f>(), nthreads, in, out) ); - CALL_SUBTEST( run_and_compare_to_cuda(matrix_inverse<Matrix2f>(), nthreads, in, out) ); - CALL_SUBTEST( run_and_compare_to_cuda(matrix_inverse<Matrix3f>(), nthreads, in, out) ); - CALL_SUBTEST( run_and_compare_to_cuda(matrix_inverse<Matrix4f>(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_gpu(matrix_inverse<Matrix2f>(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_gpu(matrix_inverse<Matrix3f>(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_gpu(matrix_inverse<Matrix4f>(), nthreads, in, out) ); - CALL_SUBTEST( run_and_compare_to_cuda(eigenvalues_direct<Matrix3f>(), nthreads, in, out) ); - CALL_SUBTEST( run_and_compare_to_cuda(eigenvalues_direct<Matrix2f>(), nthreads, in, out) ); - CALL_SUBTEST( run_and_compare_to_cuda(eigenvalues<Matrix4f>(), nthreads, in, out) ); - +#if !defined(EIGEN_USE_HIP) + // FIXME + // These subtests result in a linking error on the HIP platform + CALL_SUBTEST( run_and_compare_to_gpu(eigenvalues_direct<Matrix3f>(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_gpu(eigenvalues_direct<Matrix2f>(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_gpu(eigenvalues<Matrix4f>(), nthreads, in, out) ); +#endif } |