diff options
author | Mehdi Goli <mehdi.goli@codeplay.com> | 2017-01-19 11:30:59 +0000 |
---|---|---|
committer | Mehdi Goli <mehdi.goli@codeplay.com> | 2017-01-19 11:30:59 +0000 |
commit | 6bdd15f572c0b8cd21f5acba3671d536f50a9b53 (patch) | |
tree | 8343c43748cfbdefdac6e7b4e52aec7196669589 /unsupported/test | |
parent | e46e7223817cfd982edec6d8e25c77e8e2493d78 (diff) |
Adding non-deferrenciable pointer track for ComputeCpp backend; Adding TensorConvolutionOp for ComputeCpp; fixing typos. modifying TensorDeviceSycl to use the LegacyPointer class.
Diffstat (limited to 'unsupported/test')
-rw-r--r-- | unsupported/test/CMakeLists.txt | 1 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_contract_sycl.cpp | 13 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_convolution_sycl.cpp | 469 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_sycl.cpp | 3 |
4 files changed, 477 insertions, 9 deletions
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index cbbd3efb4..cf07b033d 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -153,6 +153,7 @@ if(EIGEN_TEST_CXX11) ei_add_test_sycl(cxx11_tensor_contract_sycl "-std=c++11") ei_add_test_sycl(cxx11_tensor_concatenation_sycl "-std=c++11") ei_add_test_sycl(cxx11_tensor_reverse_sycl "-std=c++11") + ei_add_test_sycl(cxx11_tensor_convolution_sycl "-std=c++11") ei_add_test_sycl(cxx11_tensor_striding_sycl "-std=c++11") endif(EIGEN_TEST_SYCL) # It should be safe to always run these tests as there is some fallback code for diff --git a/unsupported/test/cxx11_tensor_contract_sycl.cpp b/unsupported/test/cxx11_tensor_contract_sycl.cpp index 5dacc87f2..cb8fcb74c 100644 --- a/unsupported/test/cxx11_tensor_contract_sycl.cpp +++ b/unsupported/test/cxx11_tensor_contract_sycl.cpp @@ -28,6 +28,7 @@ using Eigen::array; using Eigen::SyclDevice; using Eigen::Tensor; using Eigen::TensorMap; +static const float error_threshold =1e-4f; typedef Tensor<float, 1>::DimensionPair DimPair; template<int DataLayout, typename Device> void test_sycl_contraction(const Device& sycl_device, int m_size, int k_size, int n_size) @@ -70,10 +71,10 @@ void test_sycl_contraction(const Device& sycl_device, int m_size, int k_size, in t_result = t_left.contract(t_right, dims); for (DenseIndex i = 0; i < t_result.size(); i++) { - if (static_cast<float>(fabs(t_result(i) - t_result_gpu(i))) < 1e-4f) { + if (static_cast<float>(fabs(t_result(i) - t_result_gpu(i))) < error_threshold) { continue; } - if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), 1e-4f)) { + if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), error_threshold)) { continue; } std::cout << "mismatch detected at index " << i << ": " << t_result(i) @@ -132,10 +133,10 @@ void test_TF(const Device& sycl_device) t_result = t_left.contract(t_right, dims); for (DenseIndex i = 0; i < t_result.size(); i++) { - if (static_cast<float>(fabs(t_result(i) - t_result_gpu(i))) < 1e-4f) { + if (static_cast<float>(fabs(t_result(i) - t_result_gpu(i))) < error_threshold) { continue; } - if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), 1e-4f)) { + if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), error_threshold)) { continue; } std::cout << "mismatch detected at index " << i << ": " << t_result(i) @@ -187,8 +188,8 @@ void test_scalar(const Device& sycl_device, int m_size, int k_size, int n_size) t_result = t_left.contract(t_right, dims); - if (static_cast<float>(fabs(t_result() - t_result_gpu())) > 1e-4f && - !Eigen::internal::isApprox(t_result(), t_result_gpu(), 1e-4f)) { + if (static_cast<float>(fabs(t_result() - t_result_gpu())) > error_threshold && + !Eigen::internal::isApprox(t_result(), t_result_gpu(), error_threshold)) { std::cout << "mismatch detected: " << t_result() << " vs " << t_result_gpu() << std::endl; assert(false); diff --git a/unsupported/test/cxx11_tensor_convolution_sycl.cpp b/unsupported/test/cxx11_tensor_convolution_sycl.cpp new file mode 100644 index 000000000..f7e0a2742 --- /dev/null +++ b/unsupported/test/cxx11_tensor_convolution_sycl.cpp @@ -0,0 +1,469 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_convolution_sycl +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_USE_SYCL + +#include <iostream> +#include <chrono> +#include <ctime> + +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> +#include <iomanip> + +using Eigen::array; +using Eigen::SyclDevice; +using Eigen::Tensor; +using Eigen::TensorMap; +static const float error_threshold =1e-4f; + + +template <typename DataType, int DataLayout, typename IndexType> +static void test_larg_expr1D(const Eigen::SyclDevice& sycl_device) +{ + int indim0 =53; + int indim1= 55; + int indim2= 51; + int outdim0=50; + int outdim1=55; + int outdim2=51; + Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}}; + Eigen::array<IndexType, 1> kernel_dims = {{4}}; + Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}}; + + Tensor<DataType, 3, DataLayout, IndexType> input(input_dims); + Tensor<DataType, 1, DataLayout,IndexType> kernel(kernel_dims); + Tensor<DataType, 3, DataLayout,IndexType> result(result_dims); + Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims); + + Eigen::array<IndexType, 1> dims3{{0}}; + + input.setRandom(); + kernel.setRandom(); + result.setZero(); + result_host.setZero(); + + std::size_t input_bytes = input.size() * sizeof(DataType); + std::size_t kernel_bytes = kernel.size() * sizeof(DataType); + std::size_t result_bytes = result.size() * sizeof(DataType); + + DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); + DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); + DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes)); + + Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims); + sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); + sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); + + gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3); + sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); + + result_host=input.convolve(kernel, dims3); + +for(int i=0; i< outdim0; i++ ){ + for(int j=0; j< outdim1; j++ ){ + for(int k=0; k< outdim2; k++ ){ + if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) { + std::cout <<std::setprecision(16)<< "mismatch detected at index ( "<< i << " , " << j << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<< result_host(i,j,k) << std::endl; + assert(false); + } + } + } +} + sycl_device.deallocate(d_input); + sycl_device.deallocate(d_kernel); + sycl_device.deallocate(d_result); + +} + + +template <typename DataType, int DataLayout, typename IndexType> +static void test_larg_expr2D(const Eigen::SyclDevice& sycl_device) +{ + int indim0 =53; + int indim1= 55; + int indim2= 51; + int outdim0=50; + int outdim1=51; + int outdim2=51; + Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}}; + Eigen::array<IndexType, 2> kernel_dims = {{4,5}}; + Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}}; + + Tensor<DataType, 3, DataLayout, IndexType> input(input_dims); + Tensor<DataType, 2, DataLayout,IndexType> kernel(kernel_dims); + Tensor<DataType, 3, DataLayout,IndexType> result(result_dims); + Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims); + + Eigen::array<IndexType, 2> dims3{{0,1}}; + + input.setRandom(); + kernel.setRandom(); + result.setZero(); + result_host.setZero(); + + std::size_t input_bytes = input.size() * sizeof(DataType); + std::size_t kernel_bytes = kernel.size() * sizeof(DataType); + std::size_t result_bytes = result.size() * sizeof(DataType); + + DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); + DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); + DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes)); + + Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims); + sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); + sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); + + gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3); + sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); + + result_host=input.convolve(kernel, dims3); + +for(int i=0; i< outdim0; i++ ){ + for(int j=0; j< outdim1; j++ ){ + for(int k=0; k< outdim2; k++ ){ + if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) { + std::cout <<std::setprecision(16)<< "mismatch detected at index ( "<< i << " , " << j << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<< result_host(i,j,k) << std::endl; + assert(false); + } + } + } +} + sycl_device.deallocate(d_input); + sycl_device.deallocate(d_kernel); + sycl_device.deallocate(d_result); + +} + + +template <typename DataType, int DataLayout, typename IndexType> +static void test_larg_expr3D(const Eigen::SyclDevice& sycl_device) +{ + int indim0 =53; + int indim1= 55; + int indim2= 51; + int outdim0=50; + int outdim1=51; + int outdim2=49; + Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}}; + Eigen::array<IndexType, 3> kernel_dims = {{4,5,3}}; + Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}}; + + Tensor<DataType, 3, DataLayout, IndexType> input(input_dims); + Tensor<DataType, 3, DataLayout,IndexType> kernel(kernel_dims); + Tensor<DataType, 3, DataLayout,IndexType> result(result_dims); + Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims); + + Eigen::array<IndexType, 3> dims3{{0,1,2}}; + + input.setRandom(); + kernel.setRandom(); + result.setZero(); + result_host.setZero(); + + std::size_t input_bytes = input.size() * sizeof(DataType); + std::size_t kernel_bytes = kernel.size() * sizeof(DataType); + std::size_t result_bytes = result.size() * sizeof(DataType); + + DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); + DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); + DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes)); + + Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims); + sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); + sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); + + gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3); + sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); + + result_host=input.convolve(kernel, dims3); + +for(int i=0; i< outdim0; i++ ){ + for(int j=0; j< outdim1; j++ ){ + for(int k=0; k< outdim2; k++ ){ + if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) { + std::cout <<std::setprecision(16)<< "mismatch detected at index ( "<< i << " , " << j << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<< result_host(i,j,k) << std::endl; + assert(false); + } + } + } +} + sycl_device.deallocate(d_input); + sycl_device.deallocate(d_kernel); + sycl_device.deallocate(d_result); + +} + + +template <typename DataType, int DataLayout, typename IndexType> +static void test_evals(const Eigen::SyclDevice& sycl_device) +{ + Eigen::array<IndexType, 2> input_dims = {{3, 3}}; + Eigen::array<IndexType, 1> kernel_dims = {{2}}; + Eigen::array<IndexType, 2> result_dims = {{2, 3}}; + + Tensor<DataType, 2, DataLayout, IndexType> input(input_dims); + Tensor<DataType, 1, DataLayout,IndexType> kernel(kernel_dims); + Tensor<DataType, 2, DataLayout,IndexType> result(result_dims); + + Eigen::array<IndexType, 1> dims3{{0}}; + + input.setRandom(); + kernel.setRandom(); + result.setZero(); + + std::size_t input_bytes = input.size() * sizeof(DataType); + std::size_t kernel_bytes = kernel.size() * sizeof(DataType); + std::size_t result_bytes = result.size() * sizeof(DataType); + + DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); + DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); + DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes)); + + Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_input(d_input, input_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_result(d_result, result_dims); + sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); + sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); + + gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3); + sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); + + VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0) + input(1,0)*kernel(1)); // index 0 + VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0) + input(1,1)*kernel(1)); // index 2 + VERIFY_IS_APPROX(result(0,2), input(0,2)*kernel(0) + input(1,2)*kernel(1)); // index 4 + VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0) + input(2,0)*kernel(1)); // index 1 + VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0) + input(2,1)*kernel(1)); // index 3 + VERIFY_IS_APPROX(result(1,2), input(1,2)*kernel(0) + input(2,2)*kernel(1)); // index 5 + + sycl_device.deallocate(d_input); + sycl_device.deallocate(d_kernel); + sycl_device.deallocate(d_result); +} + +template <typename DataType, int DataLayout, typename IndexType> +static void test_expr(const Eigen::SyclDevice& sycl_device) +{ + Eigen::array<IndexType, 2> input_dims = {{3, 3}}; + Eigen::array<IndexType, 2> kernel_dims = {{2, 2}}; + Eigen::array<IndexType, 2> result_dims = {{2, 2}}; + + Tensor<DataType, 2, DataLayout, IndexType> input(input_dims); + Tensor<DataType, 2, DataLayout, IndexType> kernel(kernel_dims); + Tensor<DataType, 2, DataLayout, IndexType> result(result_dims); + + input.setRandom(); + kernel.setRandom(); + Eigen::array<IndexType, 2> dims; + dims[0] = 0; + dims[1] = 1; + + std::size_t input_bytes = input.size() * sizeof(DataType); + std::size_t kernel_bytes = kernel.size() * sizeof(DataType); + std::size_t result_bytes = result.size() * sizeof(DataType); + + DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); + DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); + DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes)); + + Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_input(d_input, input_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_result(d_result, result_dims); + sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); + sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); + + gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims); + sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); + + VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0,0) + input(0,1)*kernel(0,1) + + input(1,0)*kernel(1,0) + input(1,1)*kernel(1,1)); + VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0,0) + input(0,2)*kernel(0,1) + + input(1,1)*kernel(1,0) + input(1,2)*kernel(1,1)); + VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0,0) + input(1,1)*kernel(0,1) + + input(2,0)*kernel(1,0) + input(2,1)*kernel(1,1)); + VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0,0) + input(1,2)*kernel(0,1) + + input(2,1)*kernel(1,0) + input(2,2)*kernel(1,1)); + + sycl_device.deallocate(d_input); + sycl_device.deallocate(d_kernel); + sycl_device.deallocate(d_result); +} + + +template <typename DataType, int DataLayout, typename IndexType> +static void test_modes(const Eigen::SyclDevice& sycl_device){ + +Eigen::array<IndexType, 1> input_dims = {{3}}; +Eigen::array<IndexType, 1> kernel_dims = {{3}}; + +Tensor<DataType, 1, DataLayout, IndexType> input(input_dims); +Tensor<DataType, 1, DataLayout, IndexType> kernel(kernel_dims); + +input.setRandom(); +kernel.setRandom(); +Eigen::array<IndexType, 1> dims; +dims[0] = 0; + + input(0) = 1.0f; + input(1) = 2.0f; + input(2) = 3.0f; + kernel(0) = 0.5f; + kernel(1) = 1.0f; + kernel(2) = 0.0f; + + Eigen::array<std::pair<IndexType, IndexType>, 1> padding; + + // Emulate VALID mode (as defined in + // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html). + padding[0] = std::make_pair(0, 0); + Tensor<DataType, 1, DataLayout, IndexType> valid(1); + + std::size_t input_bytes = input.size() * sizeof(DataType); + std::size_t kernel_bytes = kernel.size() * sizeof(DataType); + std::size_t valid_bytes = valid.size() * sizeof(DataType); + + DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); + DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); + DataType * d_valid = static_cast<DataType*>(sycl_device.allocate(valid_bytes)); + + Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_input(d_input, input_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_valid(d_valid, valid.dimensions()); + sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); + sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); + + gpu_valid.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims); + sycl_device.memcpyDeviceToHost(valid.data(), d_valid, valid_bytes); + + VERIFY_IS_EQUAL(valid.dimension(0), 1); + VERIFY_IS_APPROX(valid(0), 2.5f); + + // Emulate SAME mode (as defined in + // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html). + padding[0] = std::make_pair(1, 1); + Tensor<DataType, 1, DataLayout, IndexType> same(3); + std::size_t same_bytes = same.size() * sizeof(DataType); + DataType * d_same = static_cast<DataType*>(sycl_device.allocate(same_bytes)); + Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_same(d_same, same.dimensions()); + gpu_same.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims); + sycl_device.memcpyDeviceToHost(same.data(), d_same, same_bytes); + + VERIFY_IS_EQUAL(same.dimension(0), 3); + VERIFY_IS_APPROX(same(0), 1.0f); + VERIFY_IS_APPROX(same(1), 2.5f); + VERIFY_IS_APPROX(same(2), 4.0f); + + // Emulate FULL mode (as defined in + // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html). + padding[0] = std::make_pair(2, 2); + + Tensor<DataType, 1, DataLayout, IndexType> full(5); + std::size_t full_bytes = full.size() * sizeof(DataType); + DataType * d_full = static_cast<DataType*>(sycl_device.allocate(full_bytes)); + Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_full(d_full, full.dimensions()); + gpu_full.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims); + sycl_device.memcpyDeviceToHost(full.data(), d_full, full_bytes); + + VERIFY_IS_EQUAL(full.dimension(0), 5); + VERIFY_IS_APPROX(full(0), 0.0f); + VERIFY_IS_APPROX(full(1), 1.0f); + VERIFY_IS_APPROX(full(2), 2.5f); + VERIFY_IS_APPROX(full(3), 4.0f); + VERIFY_IS_APPROX(full(4), 1.5f); + + sycl_device.deallocate(d_input); + sycl_device.deallocate(d_kernel); + sycl_device.deallocate(d_valid); + sycl_device.deallocate(d_same); + sycl_device.deallocate(d_full); + +} + +template <typename DataType, int DataLayout, typename IndexType> +static void test_strides(const Eigen::SyclDevice& sycl_device){ + + Eigen::array<IndexType, 1> input_dims = {{13}}; + Eigen::array<IndexType, 1> kernel_dims = {{3}}; + + Tensor<DataType, 1, DataLayout, IndexType> input(input_dims); + Tensor<DataType, 1, DataLayout, IndexType> kernel(kernel_dims); + Tensor<DataType, 1, DataLayout, IndexType> result(2); + + input.setRandom(); + kernel.setRandom(); + Eigen::array<IndexType, 1> dims; + dims[0] = 0; + + Eigen::array<IndexType, 1> stride_of_3; + stride_of_3[0] = 3; + Eigen::array<IndexType, 1> stride_of_2; + stride_of_2[0] = 2; + + std::size_t input_bytes = input.size() * sizeof(DataType); + std::size_t kernel_bytes = kernel.size() * sizeof(DataType); + std::size_t result_bytes = result.size() * sizeof(DataType); + + DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); + DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); + DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes)); + + Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_input(d_input, input_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_result(d_result, result.dimensions()); + sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); + sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); + + gpu_result.device(sycl_device)=gpu_input.stride(stride_of_3).convolve(gpu_kernel, dims).stride(stride_of_2); + sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); + + VERIFY_IS_EQUAL(result.dimension(0), 2); + VERIFY_IS_APPROX(result(0), (input(0)*kernel(0) + input(3)*kernel(1) + + input(6)*kernel(2))); + VERIFY_IS_APPROX(result(1), (input(6)*kernel(0) + input(9)*kernel(1) + + input(12)*kernel(2))); +} + +template <typename Dev_selector> void tensorConvolutionPerDevice(Dev_selector& s){ + QueueInterface queueInterface(s); + auto sycl_device=Eigen::SyclDevice(&queueInterface); + test_larg_expr1D<float, RowMajor, ptrdiff_t>(sycl_device); + test_larg_expr1D<float, ColMajor, ptrdiff_t>(sycl_device); + test_larg_expr2D<float, RowMajor, ptrdiff_t>(sycl_device); + test_larg_expr2D<float, ColMajor, ptrdiff_t>(sycl_device); + test_larg_expr3D<float, RowMajor, ptrdiff_t>(sycl_device); + test_larg_expr3D<float, ColMajor, ptrdiff_t>(sycl_device); + test_evals<float, ColMajor, ptrdiff_t>(sycl_device); + test_evals<float, RowMajor, ptrdiff_t>(sycl_device); + test_expr<float, ColMajor, ptrdiff_t>(sycl_device); + test_expr<float, RowMajor, ptrdiff_t>(sycl_device); + test_modes<float, ColMajor, ptrdiff_t>(sycl_device); + test_modes<float, RowMajor, ptrdiff_t>(sycl_device); + test_strides<float, ColMajor, ptrdiff_t>(sycl_device); + test_strides<float, RowMajor, ptrdiff_t>(sycl_device); +} + +void test_cxx11_tensor_convolution_sycl() { + for (const auto& device :Eigen::get_sycl_supported_devices()) { + CALL_SUBTEST(tensorConvolutionPerDevice(device)); + } +} diff --git a/unsupported/test/cxx11_tensor_sycl.cpp b/unsupported/test/cxx11_tensor_sycl.cpp index 5992a306d..6f7e29890 100644 --- a/unsupported/test/cxx11_tensor_sycl.cpp +++ b/unsupported/test/cxx11_tensor_sycl.cpp @@ -242,9 +242,6 @@ static void test_sycl_cast(const Eigen::SyclDevice& sycl_device){ Scalar1* gpu_in_data = static_cast<Scalar1*>(sycl_device.allocate(in.size()*sizeof(Scalar1))); Scalar2 * gpu_out_data = static_cast<Scalar2*>(sycl_device.allocate(out.size()*sizeof(Scalar2))); - - - TensorMap<Tensor<Scalar1, 1, DataLayout>> gpu_in(gpu_in_data, tensorRange); TensorMap<Tensor<Scalar2, 1, DataLayout>> gpu_out(gpu_out_data, tensorRange); sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.size())*sizeof(Scalar1)); |