From 622805a0c5d216141eca3090e80d58c159e175ee Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Fri, 18 Nov 2016 16:20:42 +0000 Subject: Modifying TensorDeviceSycl.h to always create buffer of type uint8_t and convert them to the actual type at the execution on the device; adding the queue interface class to separate the lifespan of sycl queue and buffers,created for that queue, from Eigen::SyclDevice; modifying sycl tests to support the evaluation of the results for both row major and column major data layout on all different devices that are supported by Sycl{CPU; GPU; and Host}. --- unsupported/test/cxx11_tensor_reduction_sycl.cpp | 83 ++++++++++++++---------- 1 file changed, 47 insertions(+), 36 deletions(-) (limited to 'unsupported/test/cxx11_tensor_reduction_sycl.cpp') diff --git a/unsupported/test/cxx11_tensor_reduction_sycl.cpp b/unsupported/test/cxx11_tensor_reduction_sycl.cpp index a9ef82907..6b62737b8 100644 --- a/unsupported/test/cxx11_tensor_reduction_sycl.cpp +++ b/unsupported/test/cxx11_tensor_reduction_sycl.cpp @@ -21,37 +21,37 @@ #include - +template static void test_full_reductions_sycl(const Eigen::SyclDevice& sycl_device) { const int num_rows = 452; const int num_cols = 765; array tensorRange = {{num_rows, num_cols}}; - Tensor in(tensorRange); - Tensor full_redux; - Tensor full_redux_gpu; + Tensor in(tensorRange); + Tensor full_redux; + Tensor full_redux_gpu; in.setRandom(); full_redux = in.sum(); - float* gpu_in_data = static_cast(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(float))); - float* gpu_out_data =(float*)sycl_device.allocate(sizeof(float)); + DataType* gpu_in_data = static_cast(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType))); + DataType* gpu_out_data =(DataType*)sycl_device.allocate(sizeof(DataType)); - TensorMap > in_gpu(gpu_in_data, tensorRange); - TensorMap > out_gpu(gpu_out_data); + TensorMap > in_gpu(gpu_in_data, tensorRange); + TensorMap > out_gpu(gpu_out_data); - sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(float)); + sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType)); out_gpu.device(sycl_device) = in_gpu.sum(); - sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data, sizeof(float)); + sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data, sizeof(DataType)); // Check that the CPU and GPU reductions return the same result. VERIFY_IS_APPROX(full_redux_gpu(), full_redux()); sycl_device.deallocate(gpu_in_data); sycl_device.deallocate(gpu_out_data); } - +template static void test_first_dim_reductions_sycl(const Eigen::SyclDevice& sycl_device) { int dim_x = 145; @@ -63,23 +63,23 @@ static void test_first_dim_reductions_sycl(const Eigen::SyclDevice& sycl_device) red_axis[0] = 0; array reduced_tensorRange = {{dim_y, dim_z}}; - Tensor in(tensorRange); - Tensor redux(reduced_tensorRange); - Tensor redux_gpu(reduced_tensorRange); + Tensor in(tensorRange); + Tensor redux(reduced_tensorRange); + Tensor redux_gpu(reduced_tensorRange); in.setRandom(); redux= in.sum(red_axis); - float* gpu_in_data = static_cast(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(float))); - float* gpu_out_data = static_cast(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(float))); + DataType* gpu_in_data = static_cast(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType))); + DataType* gpu_out_data = static_cast(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(DataType))); - TensorMap > in_gpu(gpu_in_data, tensorRange); - TensorMap > out_gpu(gpu_out_data, reduced_tensorRange); + TensorMap > in_gpu(gpu_in_data, tensorRange); + TensorMap > out_gpu(gpu_out_data, reduced_tensorRange); - sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(float)); + sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType)); out_gpu.device(sycl_device) = in_gpu.sum(red_axis); - sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(float)); + sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(DataType)); // Check that the CPU and GPU reductions return the same result. for(int j=0; j static void test_last_dim_reductions_sycl(const Eigen::SyclDevice &sycl_device) { int dim_x = 567; @@ -101,23 +102,23 @@ static void test_last_dim_reductions_sycl(const Eigen::SyclDevice &sycl_device) red_axis[0] = 2; array reduced_tensorRange = {{dim_x, dim_y}}; - Tensor in(tensorRange); - Tensor redux(reduced_tensorRange); - Tensor redux_gpu(reduced_tensorRange); + Tensor in(tensorRange); + Tensor redux(reduced_tensorRange); + Tensor redux_gpu(reduced_tensorRange); in.setRandom(); redux= in.sum(red_axis); - float* gpu_in_data = static_cast(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(float))); - float* gpu_out_data = static_cast(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(float))); + DataType* gpu_in_data = static_cast(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType))); + DataType* gpu_out_data = static_cast(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(DataType))); - TensorMap > in_gpu(gpu_in_data, tensorRange); - TensorMap > out_gpu(gpu_out_data, reduced_tensorRange); + TensorMap > in_gpu(gpu_in_data, tensorRange); + TensorMap > out_gpu(gpu_out_data, reduced_tensorRange); - sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(float)); + sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType)); out_gpu.device(sycl_device) = in_gpu.sum(red_axis); - sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(float)); + sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(DataType)); // Check that the CPU and GPU reductions return the same result. for(int j=0; j void sycl_reduction_test_per_device(dev_Selector s){ + QueueInterface queueInterface(s); + auto sycl_device = Eigen::SyclDevice(&queueInterface); + test_full_reductions_sycl(sycl_device); + test_first_dim_reductions_sycl(sycl_device); + test_last_dim_reductions_sycl(sycl_device); + test_full_reductions_sycl(sycl_device); + test_first_dim_reductions_sycl(sycl_device); + test_last_dim_reductions_sycl(sycl_device); +} void test_cxx11_tensor_reduction_sycl() { - cl::sycl::gpu_selector s; - Eigen::SyclDevice sycl_device(s); - CALL_SUBTEST((test_full_reductions_sycl(sycl_device))); - CALL_SUBTEST((test_first_dim_reductions_sycl(sycl_device))); - CALL_SUBTEST((test_last_dim_reductions_sycl(sycl_device))); - + printf("Test on GPU: OpenCL\n"); + CALL_SUBTEST(sycl_reduction_test_per_device((cl::sycl::gpu_selector()))); + printf("repeating the test on CPU: OpenCL\n"); + CALL_SUBTEST(sycl_reduction_test_per_device((cl::sycl::cpu_selector()))); + printf("repeating the test on CPU: HOST\n"); + CALL_SUBTEST(sycl_reduction_test_per_device((cl::sycl::host_selector()))); + printf("Test Passed******************\n" ); } -- cgit v1.2.3