1 files changed, 69 insertions, 78 deletions
diff --git a/unsupported/test/cxx11_tensor_reduction_sycl.cpp b/unsupported/test/cxx11_tensor_reduction_sycl.cpp
index bd09744a6..a9ef82907 100644
--- a/unsupported/test/cxx11_tensor_reduction_sycl.cpp
+++ b/unsupported/test/cxx11_tensor_reduction_sycl.cpp
@@ -22,126 +22,117 @@
 
 
 
-static void test_full_reductions_sycl() {
-
-
-  cl::sycl::gpu_selector s;
-    cl::sycl::queue q(s, [=](cl::sycl::exception_list l) {
-      for (const auto& e : l) {
-        try {
-          std::rethrow_exception(e);
-        } catch (cl::sycl::exception e) {
-          std::cout << e.what() << std::endl;
-        }
-      }
-    });
-  Eigen::SyclDevice sycl_device(q);
+static void test_full_reductions_sycl(const Eigen::SyclDevice&  sycl_device) {
 
   const int num_rows = 452;
   const int num_cols = 765;
   array<int, 2> tensorRange = {{num_rows, num_cols}};
 
   Tensor<float, 2> in(tensorRange);
+  Tensor<float, 0> full_redux;
+  Tensor<float, 0> full_redux_gpu;
+
   in.setRandom();
 
-  Tensor<float, 0> full_redux;
-  Tensor<float, 0> full_redux_g;
   full_redux = in.sum();
-  float* out_data = (float*)sycl_device.allocate(sizeof(float));
-  TensorMap<Tensor<float, 2> >  in_gpu(in.data(), tensorRange);
-  TensorMap<Tensor<float, 0> >  full_redux_gpu(out_data);
-  full_redux_gpu.device(sycl_device) = in_gpu.sum();
-  sycl_device.deallocate(out_data);
-  // Check that the CPU and GPU reductions return the same result.
-  VERIFY_IS_APPROX(full_redux_gpu(), full_redux());
 
-}
+  float* gpu_in_data = static_cast<float*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(float)));
+  float* gpu_out_data =(float*)sycl_device.allocate(sizeof(float));
 
+  TensorMap<Tensor<float, 2> >  in_gpu(gpu_in_data, tensorRange);
+  TensorMap<Tensor<float, 0> >  out_gpu(gpu_out_data);
 
-static void test_first_dim_reductions_sycl() {
+  sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(float));
+  out_gpu.device(sycl_device) = in_gpu.sum();
+  sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data, sizeof(float));
+  // Check that the CPU and GPU reductions return the same result.
+  VERIFY_IS_APPROX(full_redux_gpu(), full_redux());
 
+  sycl_device.deallocate(gpu_in_data);
+  sycl_device.deallocate(gpu_out_data);
+}
 
-  cl::sycl::gpu_selector s;
-    cl::sycl::queue q(s, [=](cl::sycl::exception_list l) {
-      for (const auto& e : l) {
-        try {
-          std::rethrow_exception(e);
-        } catch (cl::sycl::exception e) {
-          std::cout << e.what() << std::endl;
-        }
-      }
-    });
-  Eigen::SyclDevice sycl_device(q);
+static void test_first_dim_reductions_sycl(const Eigen::SyclDevice& sycl_device) {
 
   int dim_x = 145;
   int dim_y = 1;
   int dim_z = 67;
 
   array<int, 3> tensorRange = {{dim_x, dim_y, dim_z}};
-
-  Tensor<float, 3> in(tensorRange);
-  in.setRandom();
   Eigen::array<int, 1> red_axis;
   red_axis[0] = 0;
-  Tensor<float, 2> redux = in.sum(red_axis);
   array<int, 2> reduced_tensorRange = {{dim_y, dim_z}};
-  Tensor<float, 2> redux_g(reduced_tensorRange);
-  TensorMap<Tensor<float, 3> >  in_gpu(in.data(), tensorRange);
-  float* out_data = (float*)sycl_device.allocate(dim_y*dim_z*sizeof(float));
-  TensorMap<Tensor<float, 2> >  redux_gpu(out_data, dim_y, dim_z );
-  redux_gpu.device(sycl_device) = in_gpu.sum(red_axis);
 
-  sycl_device.deallocate(out_data);
-  // Check that the CPU and GPU reductions return the same result.
-  for(int j=0; j<dim_y; j++ )
-    for(int k=0; k<dim_z; k++ )
-      VERIFY_IS_APPROX(redux_gpu(j,k), redux(j,k));
-}
+  Tensor<float, 3> in(tensorRange);
+  Tensor<float, 2> redux(reduced_tensorRange);
+  Tensor<float, 2> redux_gpu(reduced_tensorRange);
+
+  in.setRandom();
 
+  redux= in.sum(red_axis);
 
-static void test_last_dim_reductions_sycl() {
+  float* gpu_in_data = static_cast<float*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(float)));
+  float* gpu_out_data = static_cast<float*>(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(float)));
 
+  TensorMap<Tensor<float, 3> >  in_gpu(gpu_in_data, tensorRange);
+  TensorMap<Tensor<float, 2> >  out_gpu(gpu_out_data, reduced_tensorRange);
 
-  cl::sycl::gpu_selector s;
-    cl::sycl::queue q(s, [=](cl::sycl::exception_list l) {
-      for (const auto& e : l) {
-        try {
-          std::rethrow_exception(e);
-        } catch (cl::sycl::exception e) {
-          std::cout << e.what() << std::endl;
-        }
-      }
-    });
-  Eigen::SyclDevice sycl_device(q);
+  sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(float));
+  out_gpu.device(sycl_device) = in_gpu.sum(red_axis);
+  sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(float));
+
+  // Check that the CPU and GPU reductions return the same result.
+  for(int j=0; j<reduced_tensorRange[0]; j++ )
+    for(int k=0; k<reduced_tensorRange[1]; k++ )
+      VERIFY_IS_APPROX(redux_gpu(j,k), redux(j,k));
+
+  sycl_device.deallocate(gpu_in_data);
+  sycl_device.deallocate(gpu_out_data);
+}
+
+static void test_last_dim_reductions_sycl(const Eigen::SyclDevice &sycl_device) {
 
   int dim_x = 567;
   int dim_y = 1;
   int dim_z = 47;
 
   array<int, 3> tensorRange = {{dim_x, dim_y, dim_z}};
-
-  Tensor<float, 3> in(tensorRange);
-  in.setRandom();
   Eigen::array<int, 1> red_axis;
   red_axis[0] = 2;
-  Tensor<float, 2> redux = in.sum(red_axis);
   array<int, 2> reduced_tensorRange = {{dim_x, dim_y}};
-  Tensor<float, 2> redux_g(reduced_tensorRange);
-  TensorMap<Tensor<float, 3> >  in_gpu(in.data(), tensorRange);
-  float* out_data = (float*)sycl_device.allocate(dim_x*dim_y*sizeof(float));
-  TensorMap<Tensor<float, 2> >  redux_gpu(out_data, dim_x, dim_y );
-  redux_gpu.device(sycl_device) = in_gpu.sum(red_axis);
 
-  sycl_device.deallocate(out_data);
+  Tensor<float, 3> in(tensorRange);
+  Tensor<float, 2> redux(reduced_tensorRange);
+  Tensor<float, 2> redux_gpu(reduced_tensorRange);
+
+  in.setRandom();
+
+  redux= in.sum(red_axis);
+
+  float* gpu_in_data = static_cast<float*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(float)));
+  float* gpu_out_data = static_cast<float*>(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(float)));
+
+  TensorMap<Tensor<float, 3> >  in_gpu(gpu_in_data, tensorRange);
+  TensorMap<Tensor<float, 2> >  out_gpu(gpu_out_data, reduced_tensorRange);
+
+  sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(float));
+  out_gpu.device(sycl_device) = in_gpu.sum(red_axis);
+  sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(float));
   // Check that the CPU and GPU reductions return the same result.
-  for(int j=0; j<dim_x; j++ )
-    for(int k=0; k<dim_y; k++ )
+  for(int j=0; j<reduced_tensorRange[0]; j++ )
+    for(int k=0; k<reduced_tensorRange[1]; k++ )
       VERIFY_IS_APPROX(redux_gpu(j,k), redux(j,k));
+
+  sycl_device.deallocate(gpu_in_data);
+  sycl_device.deallocate(gpu_out_data);
+
 }
 
 void test_cxx11_tensor_reduction_sycl() {
-  CALL_SUBTEST((test_full_reductions_sycl()));
-  CALL_SUBTEST((test_first_dim_reductions_sycl()));
-  CALL_SUBTEST((test_last_dim_reductions_sycl()));
+  cl::sycl::gpu_selector s;
+  Eigen::SyclDevice sycl_device(s);
+  CALL_SUBTEST((test_full_reductions_sycl(sycl_device)));
+  CALL_SUBTEST((test_first_dim_reductions_sycl(sycl_device)));
+  CALL_SUBTEST((test_last_dim_reductions_sycl(sycl_device)));
 
 }