applying EIGEN_DECLARE_TEST to *gpu* tests

Also, a few minor fixes for GPU tests running in HIP mode. 1. Adding an include for hip/hip_runtime.h in the Macros.h file For HIP __host__ and __device__ are macros which are defined in hip headers. Their definitions need to be included before their use in the file. 2. Fixing the compile failure in TensorContractionGpu introduced by the commit to "Fuse computations into the Tensor contractions using output kernel" 3. Fixing a HIP/clang specific compile error by making the struct-member assignment explicit
author: Deven Desai <deven.desai.amd@gmail.com> 2018-07-17 14:16:48 -0400
committer: Deven Desai <deven.desai.amd@gmail.com> 2018-07-17 14:16:48 -0400
commit: f124f0796533081cb7b061c4aa8667df6ed58863 (patch)
tree: 20efd29fdfaf3da79cad39f468d330927b62ebc2 /unsupported
parent: 82f0ce27261df3b21037d93d4595655b3df754a6 (diff)
11 files changed, 15 insertions, 16 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
index 0fbffa34c..c7c443a59 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
@@ -449,8 +449,7 @@ struct TensorContractionEvaluatorBase
     // tensor dimensions (i, j) into the original tensor dimensions.
     // TODO(ezhulenev): Add parameters required to infer output tensor index for
     // more complex contractions than 2x2 on internal dimension.
-    m_tensor_contraction_params = {
-        /**swapped_arguments=*/static_cast<int>(Layout) == RowMajor};
+    m_tensor_contraction_params.swapped_arguments = static_cast<int>(Layout) == RowMajor;
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
index a4f92ee44..b9956cd43 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
@@ -1215,16 +1215,16 @@ EigenFloatContractionKernel16x16(const LhsMapper lhs, const RhsMapper rhs,
 }
 
 
-template<typename Indices, typename LeftArgType, typename RightArgType>
-struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, GpuDevice> :
-    public TensorContractionEvaluatorBase<TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, GpuDevice> > {
+template<typename Indices, typename LeftArgType, typename RightArgType, typename OutputKernelType>
+struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType>, GpuDevice> :
+    public TensorContractionEvaluatorBase<TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType>, GpuDevice> > {
 
   typedef GpuDevice Device;
 
-  typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> Self;
+  typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType>, Device> Self;
   typedef TensorContractionEvaluatorBase<Self> Base;
 
-  typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType;
+  typedef TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType> XprType;
   typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
   typedef typename XprType::Index Index;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
diff --git a/unsupported/test/cxx11_tensor_argmax_gpu.cu b/unsupported/test/cxx11_tensor_argmax_gpu.cu
index f6c3a9908..79f4066e9 100644
--- a/unsupported/test/cxx11_tensor_argmax_gpu.cu
+++ b/unsupported/test/cxx11_tensor_argmax_gpu.cu
@@ -242,7 +242,7 @@ void test_gpu_argmin_dim()
   }
 }
 
-void test_cxx11_tensor_gpu()
+EIGEN_DECLARE_TEST(cxx11_tensor_argmax_gpu)
 {
   CALL_SUBTEST_1(test_gpu_simple_argmax<RowMajor>());
   CALL_SUBTEST_1(test_gpu_simple_argmax<ColMajor>());
diff --git a/unsupported/test/cxx11_tensor_cast_float16_gpu.cu b/unsupported/test/cxx11_tensor_cast_float16_gpu.cu
index 0a37a555c..97923d15f 100644
--- a/unsupported/test/cxx11_tensor_cast_float16_gpu.cu
+++ b/unsupported/test/cxx11_tensor_cast_float16_gpu.cu
@@ -72,7 +72,7 @@ void test_fallback_conversion() {
 }
 
 
-void test_cxx11_tensor_cast_float16_gpu()
+EIGEN_DECLARE_TEST(cxx11_tensor_cast_float16_gpu)
 {
   CALL_SUBTEST(test_gpu_conversion());
   CALL_SUBTEST(test_fallback_conversion());
diff --git a/unsupported/test/cxx11_tensor_contract_gpu.cu b/unsupported/test/cxx11_tensor_contract_gpu.cu
index cb1416478..575bdc1f9 100644
--- a/unsupported/test/cxx11_tensor_contract_gpu.cu
+++ b/unsupported/test/cxx11_tensor_contract_gpu.cu
@@ -193,7 +193,7 @@ void test_gpu_contraction_sizes() {
   }
 }
 
-void test_cxx11_tensor_gpu()
+EIGEN_DECLARE_TEST(cxx11_tensor_contract_gpu)
 {
   CALL_SUBTEST_1(test_gpu_contraction<ColMajor>(128, 128, 128));
   CALL_SUBTEST_1(test_gpu_contraction<RowMajor>(128, 128, 128));
diff --git a/unsupported/test/cxx11_tensor_device.cu b/unsupported/test/cxx11_tensor_device.cu
index cd9ba3ecd..c9f78d2d3 100644
--- a/unsupported/test/cxx11_tensor_device.cu
+++ b/unsupported/test/cxx11_tensor_device.cu
@@ -389,7 +389,7 @@ void test_gpu() {
 }
 
 
-void test_cxx11_tensor_device()
+EIGEN_DECLARE_TEST(cxx11_tensor_device)
 {
   CALL_SUBTEST_1(test_cpu());
   CALL_SUBTEST_2(test_gpu());
diff --git a/unsupported/test/cxx11_tensor_gpu.cu b/unsupported/test/cxx11_tensor_gpu.cu
index faaac73cf..14fc0bd04 100644
--- a/unsupported/test/cxx11_tensor_gpu.cu
+++ b/unsupported/test/cxx11_tensor_gpu.cu
@@ -1472,7 +1472,7 @@ void test_gpu_gamma_sample_der_alpha()
   gpuFree(d_out);
 }
 
-void test_cxx11_tensor_gpu()
+EIGEN_DECLARE_TEST(cxx11_tensor_gpu)
 {
   CALL_SUBTEST_1(test_gpu_nullary());
   CALL_SUBTEST_1(test_gpu_elementwise_small());
diff --git a/unsupported/test/cxx11_tensor_of_float16_gpu.cu b/unsupported/test/cxx11_tensor_of_float16_gpu.cu
index 1f1ec26c2..4d74e6138 100644
--- a/unsupported/test/cxx11_tensor_of_float16_gpu.cu
+++ b/unsupported/test/cxx11_tensor_of_float16_gpu.cu
@@ -479,7 +479,7 @@ void test_gpu_forced_evals() {
 #endif
 
 
-void test_cxx11_tensor_of_float16_gpu()
+EIGEN_DECLARE_TEST(cxx11_tensor_of_float16_gpu)
 {
   CALL_SUBTEST_1(test_gpu_numext<void>());
 
diff --git a/unsupported/test/cxx11_tensor_random_gpu.cu b/unsupported/test/cxx11_tensor_random_gpu.cu
index 262182d30..090986ebc 100644
--- a/unsupported/test/cxx11_tensor_random_gpu.cu
+++ b/unsupported/test/cxx11_tensor_random_gpu.cu
@@ -78,7 +78,7 @@ static void test_complex()
 }
 
 
-void test_cxx11_tensor_random_gpu()
+EIGEN_DECLARE_TEST(cxx11_tensor_random_gpu)
 {
   CALL_SUBTEST(test_gpu_random_uniform());
   CALL_SUBTEST(test_gpu_random_normal());
diff --git a/unsupported/test/cxx11_tensor_reduction_gpu.cu b/unsupported/test/cxx11_tensor_reduction_gpu.cu
index 7b8ac2309..122ac946b 100644
--- a/unsupported/test/cxx11_tensor_reduction_gpu.cu
+++ b/unsupported/test/cxx11_tensor_reduction_gpu.cu
@@ -134,7 +134,7 @@ static void test_last_dim_reductions() {
 }
 
 
-void test_cxx11_tensor_reduction_gpu() {
+EIGEN_DECLARE_TEST(cxx11_tensor_reduction_gpu) {
   CALL_SUBTEST_1((test_full_reductions<float, ColMajor>()));
   CALL_SUBTEST_1((test_full_reductions<double, ColMajor>()));
   CALL_SUBTEST_2((test_full_reductions<float, RowMajor>()));
diff --git a/unsupported/test/cxx11_tensor_scan_gpu.cu b/unsupported/test/cxx11_tensor_scan_gpu.cu
index f3e773db5..770a144f1 100644
--- a/unsupported/test/cxx11_tensor_scan_gpu.cu
+++ b/unsupported/test/cxx11_tensor_scan_gpu.cu
@@ -71,7 +71,7 @@ void test_gpu_cumsum(int m_size, int k_size, int n_size)
 }
 
 
-void test_cxx11_tensor_scan_gpu()
+EIGEN_DECLARE_TEST(cxx11_tensor_scan_gpu)
 {
   CALL_SUBTEST_1(test_gpu_cumsum<ColMajor>(128, 128, 128));
   CALL_SUBTEST_2(test_gpu_cumsum<RowMajor>(128, 128, 128));
author	Deven Desai <deven.desai.amd@gmail.com>	2018-07-17 14:16:48 -0400
committer	Deven Desai <deven.desai.amd@gmail.com>	2018-07-17 14:16:48 -0400
commit	f124f0796533081cb7b061c4aa8667df6ed58863 (patch)
tree	20efd29fdfaf3da79cad39f468d330927b62ebc2 /unsupported
parent	82f0ce27261df3b21037d93d4595655b3df754a6 (diff)