From 8296b87d7bd98c19c6064241880691f164790ede Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Tue, 28 Feb 2017 17:16:14 +0000 Subject: Adding sycl backend for TensorCustomOp; fixing the partial lhs modification issue on sycl when the rhs is TensorContraction, reduction or convolution; Fixing the partial modification for memset when sycl backend is used. --- unsupported/test/cxx11_tensor_custom_op_sycl.cpp | 165 +++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 unsupported/test/cxx11_tensor_custom_op_sycl.cpp (limited to 'unsupported/test/cxx11_tensor_custom_op_sycl.cpp') diff --git a/unsupported/test/cxx11_tensor_custom_op_sycl.cpp b/unsupported/test/cxx11_tensor_custom_op_sycl.cpp new file mode 100644 index 000000000..9ff287fff --- /dev/null +++ b/unsupported/test/cxx11_tensor_custom_op_sycl.cpp @@ -0,0 +1,165 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_custom_op_sycl +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_USE_SYCL + +#include "main.h" +#include + +using Eigen::Tensor; +template +struct InsertZeros { + DSizes dimensions(const TensorType& input) const { + DSizes result; + result[0] = input.dimension(0) * 2; + result[1] = input.dimension(1) * 2; + return result; + } + + template + void eval(const TensorType& input, Output& output, const Device& device) const + { + array strides; + strides[0] = 2; + strides[1] = 2; + output.stride(strides).device(device) = input; + + Eigen::DSizes offsets(1,1); + Eigen::DSizes extents(output.dimension(0)-1, output.dimension(1)-1); + output.slice(offsets, extents).stride(strides).device(device) = input.constant(0.0f); + } +}; + +template +static void test_custom_unary_op_sycl(const Eigen::SyclDevice &sycl_device) +{ + IndexType sizeDim1 = 3; + IndexType sizeDim2 = 5; + Eigen::array tensorRange = {{sizeDim1, sizeDim2}}; + Eigen::array tensorResultRange = {{6, 10}}; + + Eigen::Tensor in1(tensorRange); + Eigen::Tensor out(tensorResultRange); + + DataType * gpu_in1_data = static_cast(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(DataType))); + DataType * gpu_out_data = static_cast(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType))); + + typedef Eigen::TensorMap > TensorType; + TensorType gpu_in1(gpu_in1_data, tensorRange); + TensorType gpu_out(gpu_out_data, tensorResultRange); + + in1.setRandom(); + sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.dimensions().TotalSize())*sizeof(DataType)); + gpu_out.device(sycl_device) = gpu_in1.customOp(InsertZeros()); + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType)); + + VERIFY_IS_EQUAL(out.dimension(0), 6); + VERIFY_IS_EQUAL(out.dimension(1), 10); + + for (int i = 0; i < 6; i+=2) { + for (int j = 0; j < 10; j+=2) { + VERIFY_IS_EQUAL(out(i, j), in1(i/2, j/2)); + } + } + for (int i = 1; i < 6; i+=2) { + for (int j = 1; j < 10; j+=2) { + VERIFY_IS_EQUAL(out(i, j), 0); + } + } +} + +template +struct BatchMatMul { + DSizes dimensions(const TensorType& input1, const TensorType& input2) const { + DSizes result; + result[0] = input1.dimension(0); + result[1] = input2.dimension(1); + result[2] = input2.dimension(2); + return result; + } + + template + void eval(const TensorType& input1, const TensorType& input2, + Output& output, const Device& device) const + { + typedef typename TensorType::DimensionPair DimPair; + array dims; + dims[0] = DimPair(1, 0); + for (int64_t i = 0; i < output.dimension(2); ++i) { + output.template chip<2>(i).device(device) = input1.template chip<2>(i).contract(input2.template chip<2>(i), dims); + } + } +}; + +template +static void test_custom_binary_op_sycl(const Eigen::SyclDevice &sycl_device) +{ + + Eigen::array tensorRange1 = {{2, 3, 5}}; + Eigen::array tensorRange2 = {{3,7,5}}; + Eigen::array tensorResultRange = {{2, 7, 5}}; + + Eigen::Tensor in1(tensorRange1); + Eigen::Tensor in2(tensorRange2); + Eigen::Tensor out(tensorResultRange); + + DataType * gpu_in1_data = static_cast(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(DataType))); + DataType * gpu_in2_data = static_cast(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(DataType))); + DataType * gpu_out_data = static_cast(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType))); + + typedef Eigen::TensorMap > TensorType; + TensorType gpu_in1(gpu_in1_data, tensorRange1); + TensorType gpu_in2(gpu_in2_data, tensorRange2); + TensorType gpu_out(gpu_out_data, tensorResultRange); + + in1.setRandom(); + in2.setRandom(); + + sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.dimensions().TotalSize())*sizeof(DataType)); + sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in2.dimensions().TotalSize())*sizeof(DataType)); + + gpu_out.device(sycl_device) = gpu_in1.customOp(gpu_in2, BatchMatMul()); + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType)); + + for (IndexType i = 0; i < 5; ++i) { + typedef typename Eigen::Tensor::DimensionPair DimPair; + array dims; + dims[0] = DimPair(1, 0); + Eigen::Tensor reference = in1.template chip<2>(i).contract(in2.template chip<2>(i), dims); + TensorRef > val = out.template chip<2>(i); + for (IndexType j = 0; j < 2; ++j) { + for (IndexType k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(val(j, k), reference(j, k)); + } + } + } +} + +template void custom_op_perDevice(Dev_selector s){ + QueueInterface queueInterface(s); + auto sycl_device = Eigen::SyclDevice(&queueInterface); + test_custom_unary_op_sycl(sycl_device); + test_custom_unary_op_sycl(sycl_device); + test_custom_binary_op_sycl(sycl_device); + test_custom_binary_op_sycl(sycl_device); + +} +void test_cxx11_tensor_custom_op_sycl() { + for (const auto& device :Eigen::get_sycl_supported_devices()) { + CALL_SUBTEST(custom_op_perDevice(device)); + } +} -- cgit v1.2.3