diff options
Diffstat (limited to 'unsupported/test/cxx11_tensor_reduction_sycl.cpp')
-rw-r--r-- | unsupported/test/cxx11_tensor_reduction_sycl.cpp | 136 |
1 files changed, 85 insertions, 51 deletions
diff --git a/unsupported/test/cxx11_tensor_reduction_sycl.cpp b/unsupported/test/cxx11_tensor_reduction_sycl.cpp index 941469029..440d48bca 100644 --- a/unsupported/test/cxx11_tensor_reduction_sycl.cpp +++ b/unsupported/test/cxx11_tensor_reduction_sycl.cpp @@ -14,97 +14,129 @@ #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX #define EIGEN_TEST_FUNC cxx11_tensor_reduction_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t #define EIGEN_USE_SYCL #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> -template <typename DataType, int DataLayout> -static void test_full_reductions_sycl(const Eigen::SyclDevice& sycl_device) { +template <typename DataType, int DataLayout, typename IndexType> +static void test_full_reductions_mean_sycl(const Eigen::SyclDevice& sycl_device) { - const int num_rows = 452; - const int num_cols = 765; - array<int, 2> tensorRange = {{num_rows, num_cols}}; + const IndexType num_rows = 452; + const IndexType num_cols = 765; + array<IndexType, 2> tensorRange = {{num_rows, num_cols}}; - Tensor<DataType, 2, DataLayout> in(tensorRange); - Tensor<DataType, 0, DataLayout> full_redux; - Tensor<DataType, 0, DataLayout> full_redux_gpu; + Tensor<DataType, 2, DataLayout, IndexType> in(tensorRange); + Tensor<DataType, 0, DataLayout, IndexType> full_redux; + Tensor<DataType, 0, DataLayout, IndexType> full_redux_gpu; in.setRandom(); - full_redux = in.sum(); + full_redux = in.mean(); DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType))); DataType* gpu_out_data =(DataType*)sycl_device.allocate(sizeof(DataType)); - TensorMap<Tensor<DataType, 2, DataLayout> > in_gpu(gpu_in_data, tensorRange); - TensorMap<Tensor<DataType, 0, DataLayout> > out_gpu(gpu_out_data); + TensorMap<Tensor<DataType, 2, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange); + TensorMap<Tensor<DataType, 0, DataLayout, IndexType> > out_gpu(gpu_out_data); sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType)); - out_gpu.device(sycl_device) = in_gpu.sum(); + out_gpu.device(sycl_device) = in_gpu.mean(); sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data, sizeof(DataType)); // Check that the CPU and GPU reductions return the same result. VERIFY_IS_APPROX(full_redux_gpu(), full_redux()); + sycl_device.deallocate(gpu_in_data); + sycl_device.deallocate(gpu_out_data); +} + + +template <typename DataType, int DataLayout, typename IndexType> +static void test_full_reductions_min_sycl(const Eigen::SyclDevice& sycl_device) { + + const IndexType num_rows = 876; + const IndexType num_cols = 953; + array<IndexType, 2> tensorRange = {{num_rows, num_cols}}; + + Tensor<DataType, 2, DataLayout, IndexType> in(tensorRange); + Tensor<DataType, 0, DataLayout, IndexType> full_redux; + Tensor<DataType, 0, DataLayout, IndexType> full_redux_gpu; + + in.setRandom(); + + full_redux = in.minimum(); + + DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType))); + DataType* gpu_out_data =(DataType*)sycl_device.allocate(sizeof(DataType)); + + TensorMap<Tensor<DataType, 2, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange); + TensorMap<Tensor<DataType, 0, DataLayout, IndexType> > out_gpu(gpu_out_data); + sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType)); + out_gpu.device(sycl_device) = in_gpu.minimum(); + sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data, sizeof(DataType)); + // Check that the CPU and GPU reductions return the same result. + VERIFY_IS_APPROX(full_redux_gpu(), full_redux()); sycl_device.deallocate(gpu_in_data); sycl_device.deallocate(gpu_out_data); } -template <typename DataType, int DataLayout> -static void test_first_dim_reductions_sycl(const Eigen::SyclDevice& sycl_device) { - int dim_x = 145; - int dim_y = 1; - int dim_z = 67; - array<int, 3> tensorRange = {{dim_x, dim_y, dim_z}}; - Eigen::array<int, 1> red_axis; +template <typename DataType, int DataLayout, typename IndexType> +static void test_first_dim_reductions_max_sycl(const Eigen::SyclDevice& sycl_device) { + + IndexType dim_x = 145; + IndexType dim_y = 1; + IndexType dim_z = 67; + + array<IndexType, 3> tensorRange = {{dim_x, dim_y, dim_z}}; + Eigen::array<IndexType, 1> red_axis; red_axis[0] = 0; - array<int, 2> reduced_tensorRange = {{dim_y, dim_z}}; + array<IndexType, 2> reduced_tensorRange = {{dim_y, dim_z}}; - Tensor<DataType, 3, DataLayout> in(tensorRange); - Tensor<DataType, 2, DataLayout> redux(reduced_tensorRange); - Tensor<DataType, 2, DataLayout> redux_gpu(reduced_tensorRange); + Tensor<DataType, 3, DataLayout, IndexType> in(tensorRange); + Tensor<DataType, 2, DataLayout, IndexType> redux(reduced_tensorRange); + Tensor<DataType, 2, DataLayout, IndexType> redux_gpu(reduced_tensorRange); in.setRandom(); - redux= in.sum(red_axis); + redux= in.maximum(red_axis); DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType))); DataType* gpu_out_data = static_cast<DataType*>(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(DataType))); - TensorMap<Tensor<DataType, 3, DataLayout> > in_gpu(gpu_in_data, tensorRange); - TensorMap<Tensor<DataType, 2, DataLayout> > out_gpu(gpu_out_data, reduced_tensorRange); + TensorMap<Tensor<DataType, 3, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange); + TensorMap<Tensor<DataType, 2, DataLayout, IndexType> > out_gpu(gpu_out_data, reduced_tensorRange); sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType)); - out_gpu.device(sycl_device) = in_gpu.sum(red_axis); + out_gpu.device(sycl_device) = in_gpu.maximum(red_axis); sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(DataType)); // Check that the CPU and GPU reductions return the same result. - for(int j=0; j<reduced_tensorRange[0]; j++ ) - for(int k=0; k<reduced_tensorRange[1]; k++ ) + for(IndexType j=0; j<reduced_tensorRange[0]; j++ ) + for(IndexType k=0; k<reduced_tensorRange[1]; k++ ) VERIFY_IS_APPROX(redux_gpu(j,k), redux(j,k)); sycl_device.deallocate(gpu_in_data); sycl_device.deallocate(gpu_out_data); } -template <typename DataType, int DataLayout> -static void test_last_dim_reductions_sycl(const Eigen::SyclDevice &sycl_device) { +template <typename DataType, int DataLayout, typename IndexType> +static void test_last_dim_reductions_sum_sycl(const Eigen::SyclDevice &sycl_device) { - int dim_x = 567; - int dim_y = 1; - int dim_z = 47; + IndexType dim_x = 567; + IndexType dim_y = 1; + IndexType dim_z = 47; - array<int, 3> tensorRange = {{dim_x, dim_y, dim_z}}; - Eigen::array<int, 1> red_axis; + array<IndexType, 3> tensorRange = {{dim_x, dim_y, dim_z}}; + Eigen::array<IndexType, 1> red_axis; red_axis[0] = 2; - array<int, 2> reduced_tensorRange = {{dim_x, dim_y}}; + array<IndexType, 2> reduced_tensorRange = {{dim_x, dim_y}}; - Tensor<DataType, 3, DataLayout> in(tensorRange); - Tensor<DataType, 2, DataLayout> redux(reduced_tensorRange); - Tensor<DataType, 2, DataLayout> redux_gpu(reduced_tensorRange); + Tensor<DataType, 3, DataLayout, IndexType> in(tensorRange); + Tensor<DataType, 2, DataLayout, IndexType> redux(reduced_tensorRange); + Tensor<DataType, 2, DataLayout, IndexType> redux_gpu(reduced_tensorRange); in.setRandom(); @@ -113,15 +145,15 @@ static void test_last_dim_reductions_sycl(const Eigen::SyclDevice &sycl_device) DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType))); DataType* gpu_out_data = static_cast<DataType*>(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(DataType))); - TensorMap<Tensor<DataType, 3, DataLayout> > in_gpu(gpu_in_data, tensorRange); - TensorMap<Tensor<DataType, 2, DataLayout> > out_gpu(gpu_out_data, reduced_tensorRange); + TensorMap<Tensor<DataType, 3, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange); + TensorMap<Tensor<DataType, 2, DataLayout, IndexType> > out_gpu(gpu_out_data, reduced_tensorRange); sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType)); out_gpu.device(sycl_device) = in_gpu.sum(red_axis); sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(DataType)); // Check that the CPU and GPU reductions return the same result. - for(int j=0; j<reduced_tensorRange[0]; j++ ) - for(int k=0; k<reduced_tensorRange[1]; k++ ) + for(IndexType j=0; j<reduced_tensorRange[0]; j++ ) + for(IndexType k=0; k<reduced_tensorRange[1]; k++ ) VERIFY_IS_APPROX(redux_gpu(j,k), redux(j,k)); sycl_device.deallocate(gpu_in_data); @@ -133,12 +165,14 @@ template<typename DataType> void sycl_reduction_test_per_device(const cl::sycl:: QueueInterface queueInterface(d); auto sycl_device = Eigen::SyclDevice(&queueInterface); - test_full_reductions_sycl<DataType, RowMajor>(sycl_device); - test_first_dim_reductions_sycl<DataType, RowMajor>(sycl_device); - test_last_dim_reductions_sycl<DataType, RowMajor>(sycl_device); - test_full_reductions_sycl<DataType, ColMajor>(sycl_device); - test_first_dim_reductions_sycl<DataType, ColMajor>(sycl_device); - test_last_dim_reductions_sycl<DataType, ColMajor>(sycl_device); + test_full_reductions_mean_sycl<DataType, RowMajor, int64_t>(sycl_device); + test_full_reductions_min_sycl<DataType, RowMajor, int64_t>(sycl_device); + test_first_dim_reductions_max_sycl<DataType, RowMajor, int64_t>(sycl_device); + test_last_dim_reductions_sum_sycl<DataType, RowMajor, int64_t>(sycl_device); + test_full_reductions_mean_sycl<DataType, ColMajor, int64_t>(sycl_device); + test_full_reductions_min_sycl<DataType, ColMajor, int64_t>(sycl_device); + test_first_dim_reductions_max_sycl<DataType, ColMajor, int64_t>(sycl_device); + test_last_dim_reductions_sum_sycl<DataType, ColMajor, int64_t>(sycl_device); } void test_cxx11_tensor_reduction_sycl() { for (const auto& device :Eigen::get_sycl_supported_devices()) { |