From 622805a0c5d216141eca3090e80d58c159e175ee Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Fri, 18 Nov 2016 16:20:42 +0000 Subject: Modifying TensorDeviceSycl.h to always create buffer of type uint8_t and convert them to the actual type at the execution on the device; adding the queue interface class to separate the lifespan of sycl queue and buffers,created for that queue, from Eigen::SyclDevice; modifying sycl tests to support the evaluation of the results for both row major and column major data layout on all different devices that are supported by Sycl{CPU; GPU; and Host}. --- unsupported/test/cxx11_tensor_forced_eval_sycl.cpp | 47 +++++++++++++--------- 1 file changed, 29 insertions(+), 18 deletions(-) (limited to 'unsupported/test/cxx11_tensor_forced_eval_sycl.cpp') diff --git a/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp b/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp index 5690da723..70b182558 100644 --- a/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp +++ b/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp @@ -21,33 +21,33 @@ #include using Eigen::Tensor; - +template void test_forced_eval_sycl(const Eigen::SyclDevice &sycl_device) { int sizeDim1 = 100; - int sizeDim2 = 200; - int sizeDim3 = 200; + int sizeDim2 = 20; + int sizeDim3 = 20; Eigen::array tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; - Eigen::Tensor in1(tensorRange); - Eigen::Tensor in2(tensorRange); - Eigen::Tensor out(tensorRange); + Eigen::Tensor in1(tensorRange); + Eigen::Tensor in2(tensorRange); + Eigen::Tensor out(tensorRange); - float * gpu_in1_data = static_cast(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(float))); - float * gpu_in2_data = static_cast(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(float))); - float * gpu_out_data = static_cast(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(float))); + DataType * gpu_in1_data = static_cast(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(DataType))); + DataType * gpu_in2_data = static_cast(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(DataType))); + DataType * gpu_out_data = static_cast(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType))); in1 = in1.random() + in1.constant(10.0f); in2 = in2.random() + in2.constant(10.0f); // creating TensorMap from tensor - Eigen::TensorMap> gpu_in1(gpu_in1_data, tensorRange); - Eigen::TensorMap> gpu_in2(gpu_in2_data, tensorRange); - Eigen::TensorMap> gpu_out(gpu_out_data, tensorRange); - sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.dimensions().TotalSize())*sizeof(float)); - sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in1.dimensions().TotalSize())*sizeof(float)); + Eigen::TensorMap> gpu_in1(gpu_in1_data, tensorRange); + Eigen::TensorMap> gpu_in2(gpu_in2_data, tensorRange); + Eigen::TensorMap> gpu_out(gpu_out_data, tensorRange); + sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.dimensions().TotalSize())*sizeof(DataType)); + sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in1.dimensions().TotalSize())*sizeof(DataType)); /// c=(a+b)*b gpu_out.device(sycl_device) =(gpu_in1 + gpu_in2).eval() * gpu_in2; - sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float)); + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType)); for (int i = 0; i < sizeDim1; ++i) { for (int j = 0; j < sizeDim2; ++j) { for (int k = 0; k < sizeDim3; ++k) { @@ -63,8 +63,19 @@ void test_forced_eval_sycl(const Eigen::SyclDevice &sycl_device) { } +template void tensorForced_evalperDevice(Dev_selector s){ + QueueInterface queueInterface(s); + auto sycl_device = Eigen::SyclDevice(&queueInterface); + test_forced_eval_sycl(sycl_device); + test_forced_eval_sycl(sycl_device); +} void test_cxx11_tensor_forced_eval_sycl() { - cl::sycl::gpu_selector s; - Eigen::SyclDevice sycl_device(s); - CALL_SUBTEST(test_forced_eval_sycl(sycl_device)); + + printf("Test on GPU: OpenCL\n"); + CALL_SUBTEST(tensorForced_evalperDevice((cl::sycl::gpu_selector()))); + printf("repeating the test on CPU: OpenCL\n"); + CALL_SUBTEST(tensorForced_evalperDevice((cl::sycl::cpu_selector()))); + printf("repeating the test on CPU: HOST\n"); + CALL_SUBTEST(tensorForced_evalperDevice((cl::sycl::host_selector()))); + printf("Test Passed******************\n" ); } -- cgit v1.2.3