From 00f32752f7d0b193c6788691c3cf0b76457a044d Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Thu, 28 Nov 2019 10:08:54 +0000 Subject: [SYCL] Rebasing the SYCL support branch on top of the Einge upstream master branch. * Unifying all loadLocalTile from lhs and rhs to an extract_block function. * Adding get_tensor operation which was missing in TensorContractionMapper. * Adding the -D method missing from cmake for Disable_Skinny Contraction operation. * Wrapping all the indices in TensorScanSycl into Scan parameter struct. * Fixing typo in Device SYCL * Unifying load to private register for tall/skinny no shared * Unifying load to vector tile for tensor-vector/vector-tensor operation * Removing all the LHS/RHS class for extracting data from global * Removing Outputfunction from TensorContractionSkinnyNoshared. * Combining the local memory version of tall/skinny and normal tensor contraction into one kernel. * Combining the no-local memory version of tall/skinny and normal tensor contraction into one kernel. * Combining General Tensor-Vector and VectorTensor contraction into one kernel. * Making double buffering optional for Tensor contraction when local memory is version is used. * Modifying benchmark to accept custom Reduction Sizes * Disabling AVX optimization for SYCL backend on the host to allow SSE optimization to the host * Adding Test for SYCL * Modifying SYCL CMake --- unsupported/test/cxx11_tensor_shuffling_sycl.cpp | 52 ++++++++++++------------ 1 file changed, 25 insertions(+), 27 deletions(-) (limited to 'unsupported/test/cxx11_tensor_shuffling_sycl.cpp') diff --git a/unsupported/test/cxx11_tensor_shuffling_sycl.cpp b/unsupported/test/cxx11_tensor_shuffling_sycl.cpp index 0e8cc3bd2..ca4e8b5ef 100644 --- a/unsupported/test/cxx11_tensor_shuffling_sycl.cpp +++ b/unsupported/test/cxx11_tensor_shuffling_sycl.cpp @@ -12,14 +12,12 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t #define EIGEN_USE_SYCL - #include "main.h" #include @@ -29,33 +27,33 @@ using Eigen::Tensor; using Eigen::TensorMap; template -static void test_simple_shuffling_sycl(const Eigen::SyclDevice& sycl_device) -{ +static void test_simple_shuffling_sycl(const Eigen::SyclDevice& sycl_device) { IndexType sizeDim1 = 2; IndexType sizeDim2 = 3; IndexType sizeDim3 = 5; IndexType sizeDim4 = 7; array tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; - Tensor tensor(tensorRange); - Tensor no_shuffle(tensorRange); + Tensor tensor(tensorRange); + Tensor no_shuffle(tensorRange); tensor.setRandom(); - const size_t buffSize =tensor.size()*sizeof(DataType); + const size_t buffSize = tensor.size() * sizeof(DataType); array shuffles; shuffles[0] = 0; shuffles[1] = 1; shuffles[2] = 2; shuffles[3] = 3; - DataType* gpu_data1 = static_cast(sycl_device.allocate(buffSize)); - DataType* gpu_data2 = static_cast(sycl_device.allocate(buffSize)); - + DataType* gpu_data1 = static_cast(sycl_device.allocate(buffSize)); + DataType* gpu_data2 = static_cast(sycl_device.allocate(buffSize)); - TensorMap> gpu1(gpu_data1, tensorRange); - TensorMap> gpu2(gpu_data2, tensorRange); + TensorMap> gpu1(gpu_data1, + tensorRange); + TensorMap> gpu2(gpu_data2, + tensorRange); sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(), buffSize); - gpu2.device(sycl_device)=gpu1.shuffle(shuffles); + gpu2.device(sycl_device) = gpu1.shuffle(shuffles); sycl_device.memcpyDeviceToHost(no_shuffle.data(), gpu_data2, buffSize); sycl_device.synchronize(); @@ -68,7 +66,7 @@ static void test_simple_shuffling_sycl(const Eigen::SyclDevice& sycl_device) for (IndexType j = 0; j < sizeDim2; ++j) { for (IndexType k = 0; k < sizeDim3; ++k) { for (IndexType l = 0; l < sizeDim4; ++l) { - VERIFY_IS_EQUAL(tensor(i,j,k,l), no_shuffle(i,j,k,l)); + VERIFY_IS_EQUAL(tensor(i, j, k, l), no_shuffle(i, j, k, l)); } } } @@ -78,12 +76,14 @@ static void test_simple_shuffling_sycl(const Eigen::SyclDevice& sycl_device) shuffles[1] = 3; shuffles[2] = 1; shuffles[3] = 0; - array tensorrangeShuffle = {{sizeDim3, sizeDim4, sizeDim2, sizeDim1}}; - Tensor shuffle(tensorrangeShuffle); - DataType* gpu_data3 = static_cast(sycl_device.allocate(buffSize)); - TensorMap> gpu3(gpu_data3, tensorrangeShuffle); - - gpu3.device(sycl_device)=gpu1.shuffle(shuffles); + array tensorrangeShuffle = { + {sizeDim3, sizeDim4, sizeDim2, sizeDim1}}; + Tensor shuffle(tensorrangeShuffle); + DataType* gpu_data3 = static_cast(sycl_device.allocate(buffSize)); + TensorMap> gpu3( + gpu_data3, tensorrangeShuffle); + + gpu3.device(sycl_device) = gpu1.shuffle(shuffles); sycl_device.memcpyDeviceToHost(shuffle.data(), gpu_data3, buffSize); sycl_device.synchronize(); @@ -96,24 +96,22 @@ static void test_simple_shuffling_sycl(const Eigen::SyclDevice& sycl_device) for (IndexType j = 0; j < sizeDim2; ++j) { for (IndexType k = 0; k < sizeDim3; ++k) { for (IndexType l = 0; l < sizeDim4; ++l) { - VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,l,j,i)); + VERIFY_IS_EQUAL(tensor(i, j, k, l), shuffle(k, l, j, i)); } } } } } - -template void sycl_shuffling_test_per_device(dev_Selector s){ +template +void sycl_shuffling_test_per_device(dev_Selector s) { QueueInterface queueInterface(s); auto sycl_device = Eigen::SyclDevice(&queueInterface); test_simple_shuffling_sycl(sycl_device); test_simple_shuffling_sycl(sycl_device); - } -EIGEN_DECLARE_TEST(cxx11_tensor_shuffling_sycl) -{ - for (const auto& device :Eigen::get_sycl_supported_devices()) { +EIGEN_DECLARE_TEST(cxx11_tensor_shuffling_sycl) { + for (const auto& device : Eigen::get_sycl_supported_devices()) { CALL_SUBTEST(sycl_shuffling_test_per_device(device)); } } -- cgit v1.2.3