diff options
author | Mehdi Goli <mehdi.goli@codeplay.com> | 2019-11-28 10:08:54 +0000 |
---|---|---|
committer | Mehdi Goli <mehdi.goli@codeplay.com> | 2019-11-28 10:08:54 +0000 |
commit | 00f32752f7d0b193c6788691c3cf0b76457a044d (patch) | |
tree | 792e46110f0751ea8802fa9d403d1472d5977ac3 /unsupported/test/cxx11_tensor_reverse_sycl.cpp | |
parent | ea51a9eace7e4f0ea839e61eb2df85ccfb94aee8 (diff) |
[SYCL] Rebasing the SYCL support branch on top of the Einge upstream master branch.
* Unifying all loadLocalTile from lhs and rhs to an extract_block function.
* Adding get_tensor operation which was missing in TensorContractionMapper.
* Adding the -D method missing from cmake for Disable_Skinny Contraction operation.
* Wrapping all the indices in TensorScanSycl into Scan parameter struct.
* Fixing typo in Device SYCL
* Unifying load to private register for tall/skinny no shared
* Unifying load to vector tile for tensor-vector/vector-tensor operation
* Removing all the LHS/RHS class for extracting data from global
* Removing Outputfunction from TensorContractionSkinnyNoshared.
* Combining the local memory version of tall/skinny and normal tensor contraction into one kernel.
* Combining the no-local memory version of tall/skinny and normal tensor contraction into one kernel.
* Combining General Tensor-Vector and VectorTensor contraction into one kernel.
* Making double buffering optional for Tensor contraction when local memory is version is used.
* Modifying benchmark to accept custom Reduction Sizes
* Disabling AVX optimization for SYCL backend on the host to allow SSE optimization to the host
* Adding Test for SYCL
* Modifying SYCL CMake
Diffstat (limited to 'unsupported/test/cxx11_tensor_reverse_sycl.cpp')
-rw-r--r-- | unsupported/test/cxx11_tensor_reverse_sycl.cpp | 128 |
1 files changed, 80 insertions, 48 deletions
diff --git a/unsupported/test/cxx11_tensor_reverse_sycl.cpp b/unsupported/test/cxx11_tensor_reverse_sycl.cpp index 77c2235d1..dd30c235d 100644 --- a/unsupported/test/cxx11_tensor_reverse_sycl.cpp +++ b/unsupported/test/cxx11_tensor_reverse_sycl.cpp @@ -20,10 +20,8 @@ #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> - template <typename DataType, int DataLayout, typename IndexType> -static void test_simple_reverse(const Eigen::SyclDevice& sycl_device) { - +static void test_simple_reverse(const Eigen::SyclDevice& sycl_device) { IndexType dim1 = 2; IndexType dim2 = 3; IndexType dim3 = 5; @@ -40,21 +38,30 @@ static void test_simple_reverse(const Eigen::SyclDevice& sycl_device) { dim_rev[2] = true; dim_rev[3] = false; - DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(tensor.dimensions().TotalSize()*sizeof(DataType))); - DataType* gpu_out_data =static_cast<DataType*>(sycl_device.allocate(reversed_tensor.dimensions().TotalSize()*sizeof(DataType))); + DataType* gpu_in_data = static_cast<DataType*>( + sycl_device.allocate(tensor.dimensions().TotalSize() * sizeof(DataType))); + DataType* gpu_out_data = static_cast<DataType*>(sycl_device.allocate( + reversed_tensor.dimensions().TotalSize() * sizeof(DataType))); - TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange); - TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu(gpu_out_data, tensorRange); + TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > in_gpu(gpu_in_data, + tensorRange); + TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu(gpu_out_data, + tensorRange); - sycl_device.memcpyHostToDevice(gpu_in_data, tensor.data(),(tensor.dimensions().TotalSize())*sizeof(DataType)); + sycl_device.memcpyHostToDevice( + gpu_in_data, tensor.data(), + (tensor.dimensions().TotalSize()) * sizeof(DataType)); out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev); - sycl_device.memcpyDeviceToHost(reversed_tensor.data(), gpu_out_data, reversed_tensor.dimensions().TotalSize()*sizeof(DataType)); + sycl_device.memcpyDeviceToHost( + reversed_tensor.data(), gpu_out_data, + reversed_tensor.dimensions().TotalSize() * sizeof(DataType)); // Check that the CPU and GPU reductions return the same result. for (IndexType i = 0; i < 2; ++i) { for (IndexType j = 0; j < 3; ++j) { for (IndexType k = 0; k < 5; ++k) { for (IndexType l = 0; l < 7; ++l) { - VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(i,2-j,4-k,l)); + VERIFY_IS_EQUAL(tensor(i, j, k, l), + reversed_tensor(i, 2 - j, 4 - k, l)); } } } @@ -65,13 +72,15 @@ static void test_simple_reverse(const Eigen::SyclDevice& sycl_device) { dim_rev[3] = false; out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev); - sycl_device.memcpyDeviceToHost(reversed_tensor.data(), gpu_out_data, reversed_tensor.dimensions().TotalSize()*sizeof(DataType)); + sycl_device.memcpyDeviceToHost( + reversed_tensor.data(), gpu_out_data, + reversed_tensor.dimensions().TotalSize() * sizeof(DataType)); for (IndexType i = 0; i < 2; ++i) { for (IndexType j = 0; j < 3; ++j) { for (IndexType k = 0; k < 5; ++k) { for (IndexType l = 0; l < 7; ++l) { - VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(1-i,j,k,l)); + VERIFY_IS_EQUAL(tensor(i, j, k, l), reversed_tensor(1 - i, j, k, l)); } } } @@ -82,13 +91,16 @@ static void test_simple_reverse(const Eigen::SyclDevice& sycl_device) { dim_rev[2] = false; dim_rev[3] = true; out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev); - sycl_device.memcpyDeviceToHost(reversed_tensor.data(), gpu_out_data, reversed_tensor.dimensions().TotalSize()*sizeof(DataType)); + sycl_device.memcpyDeviceToHost( + reversed_tensor.data(), gpu_out_data, + reversed_tensor.dimensions().TotalSize() * sizeof(DataType)); for (IndexType i = 0; i < 2; ++i) { for (IndexType j = 0; j < 3; ++j) { for (IndexType k = 0; k < 5; ++k) { for (IndexType l = 0; l < 7; ++l) { - VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(1-i,j,k,6-l)); + VERIFY_IS_EQUAL(tensor(i, j, k, l), + reversed_tensor(1 - i, j, k, 6 - l)); } } } @@ -98,11 +110,9 @@ static void test_simple_reverse(const Eigen::SyclDevice& sycl_device) { sycl_device.deallocate(gpu_out_data); } - - template <typename DataType, int DataLayout, typename IndexType> -static void test_expr_reverse(const Eigen::SyclDevice& sycl_device, bool LValue) -{ +static void test_expr_reverse(const Eigen::SyclDevice& sycl_device, + bool LValue) { IndexType dim1 = 2; IndexType dim2 = 3; IndexType dim3 = 5; @@ -120,24 +130,32 @@ static void test_expr_reverse(const Eigen::SyclDevice& sycl_device, bool LValue dim_rev[2] = false; dim_rev[3] = true; - DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(tensor.dimensions().TotalSize()*sizeof(DataType))); - DataType* gpu_out_data_expected =static_cast<DataType*>(sycl_device.allocate(expected.dimensions().TotalSize()*sizeof(DataType))); - DataType* gpu_out_data_result =static_cast<DataType*>(sycl_device.allocate(result.dimensions().TotalSize()*sizeof(DataType))); - - TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange); - TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu_expected(gpu_out_data_expected, tensorRange); - TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu_result(gpu_out_data_result, tensorRange); + DataType* gpu_in_data = static_cast<DataType*>( + sycl_device.allocate(tensor.dimensions().TotalSize() * sizeof(DataType))); + DataType* gpu_out_data_expected = static_cast<DataType*>(sycl_device.allocate( + expected.dimensions().TotalSize() * sizeof(DataType))); + DataType* gpu_out_data_result = static_cast<DataType*>( + sycl_device.allocate(result.dimensions().TotalSize() * sizeof(DataType))); + TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > in_gpu(gpu_in_data, + tensorRange); + TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu_expected( + gpu_out_data_expected, tensorRange); + TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu_result( + gpu_out_data_result, tensorRange); - sycl_device.memcpyHostToDevice(gpu_in_data, tensor.data(),(tensor.dimensions().TotalSize())*sizeof(DataType)); + sycl_device.memcpyHostToDevice( + gpu_in_data, tensor.data(), + (tensor.dimensions().TotalSize()) * sizeof(DataType)); if (LValue) { out_gpu_expected.reverse(dim_rev).device(sycl_device) = in_gpu; } else { out_gpu_expected.device(sycl_device) = in_gpu.reverse(dim_rev); } - sycl_device.memcpyDeviceToHost(expected.data(), gpu_out_data_expected, expected.dimensions().TotalSize()*sizeof(DataType)); - + sycl_device.memcpyDeviceToHost( + expected.data(), gpu_out_data_expected, + expected.dimensions().TotalSize() * sizeof(DataType)); array<IndexType, 4> src_slice_dim; src_slice_dim[0] = 2; @@ -154,8 +172,9 @@ static void test_expr_reverse(const Eigen::SyclDevice& sycl_device, bool LValue for (IndexType i = 0; i < 5; ++i) { if (LValue) { - out_gpu_result.slice(dst_slice_start, dst_slice_dim).reverse(dim_rev).device(sycl_device) = - in_gpu.slice(src_slice_start, src_slice_dim); + out_gpu_result.slice(dst_slice_start, dst_slice_dim) + .reverse(dim_rev) + .device(sycl_device) = in_gpu.slice(src_slice_start, src_slice_dim); } else { out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) = in_gpu.slice(src_slice_start, src_slice_dim).reverse(dim_rev); @@ -163,13 +182,15 @@ static void test_expr_reverse(const Eigen::SyclDevice& sycl_device, bool LValue src_slice_start[2] += 1; dst_slice_start[2] += 1; } - sycl_device.memcpyDeviceToHost(result.data(), gpu_out_data_result, result.dimensions().TotalSize()*sizeof(DataType)); + sycl_device.memcpyDeviceToHost( + result.data(), gpu_out_data_result, + result.dimensions().TotalSize() * sizeof(DataType)); for (IndexType i = 0; i < expected.dimension(0); ++i) { for (IndexType j = 0; j < expected.dimension(1); ++j) { for (IndexType k = 0; k < expected.dimension(2); ++k) { for (IndexType l = 0; l < expected.dimension(3); ++l) { - VERIFY_IS_EQUAL(result(i,j,k,l), expected(i,j,k,l)); + VERIFY_IS_EQUAL(result(i, j, k, l), expected(i, j, k, l)); } } } @@ -177,34 +198,37 @@ static void test_expr_reverse(const Eigen::SyclDevice& sycl_device, bool LValue dst_slice_start[2] = 0; result.setRandom(); - sycl_device.memcpyHostToDevice(gpu_out_data_result, result.data(),(result.dimensions().TotalSize())*sizeof(DataType)); + sycl_device.memcpyHostToDevice( + gpu_out_data_result, result.data(), + (result.dimensions().TotalSize()) * sizeof(DataType)); for (IndexType i = 0; i < 5; ++i) { - if (LValue) { - out_gpu_result.slice(dst_slice_start, dst_slice_dim).reverse(dim_rev).device(sycl_device) = - in_gpu.slice(dst_slice_start, dst_slice_dim); - } else { - out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) = - in_gpu.reverse(dim_rev).slice(dst_slice_start, dst_slice_dim); - } + if (LValue) { + out_gpu_result.slice(dst_slice_start, dst_slice_dim) + .reverse(dim_rev) + .device(sycl_device) = in_gpu.slice(dst_slice_start, dst_slice_dim); + } else { + out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) = + in_gpu.reverse(dim_rev).slice(dst_slice_start, dst_slice_dim); + } dst_slice_start[2] += 1; } - sycl_device.memcpyDeviceToHost(result.data(), gpu_out_data_result, result.dimensions().TotalSize()*sizeof(DataType)); + sycl_device.memcpyDeviceToHost( + result.data(), gpu_out_data_result, + result.dimensions().TotalSize() * sizeof(DataType)); for (IndexType i = 0; i < expected.dimension(0); ++i) { for (IndexType j = 0; j < expected.dimension(1); ++j) { for (IndexType k = 0; k < expected.dimension(2); ++k) { for (IndexType l = 0; l < expected.dimension(3); ++l) { - VERIFY_IS_EQUAL(result(i,j,k,l), expected(i,j,k,l)); + VERIFY_IS_EQUAL(result(i, j, k, l), expected(i, j, k, l)); } } } } } - - -template<typename DataType> void sycl_reverse_test_per_device(const cl::sycl::device& d){ - std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl; +template <typename DataType> +void sycl_reverse_test_per_device(const cl::sycl::device& d) { QueueInterface queueInterface(d); auto sycl_device = Eigen::SyclDevice(&queueInterface); test_simple_reverse<DataType, RowMajor, int64_t>(sycl_device); @@ -215,7 +239,15 @@ template<typename DataType> void sycl_reverse_test_per_device(const cl::sycl::de test_expr_reverse<DataType, ColMajor, int64_t>(sycl_device, true); } EIGEN_DECLARE_TEST(cxx11_tensor_reverse_sycl) { - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(sycl_reverse_test_per_device<float>(device)); + for (const auto& device : Eigen::get_sycl_supported_devices()) { + std::cout << "Running on " + << device.get_info<cl::sycl::info::device::name>() << std::endl; + CALL_SUBTEST_1(sycl_reverse_test_per_device<short>(device)); + CALL_SUBTEST_2(sycl_reverse_test_per_device<int>(device)); + CALL_SUBTEST_3(sycl_reverse_test_per_device<unsigned int>(device)); +#ifdef EIGEN_SYCL_DOUBLE_SUPPORT + CALL_SUBTEST_4(sycl_reverse_test_per_device<double>(device)); +#endif + CALL_SUBTEST_5(sycl_reverse_test_per_device<float>(device)); } } |