diff options
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h | 84 |
1 files changed, 84 insertions, 0 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h new file mode 100644 index 000000000..3758d46a0 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h @@ -0,0 +1,84 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Cummins Chris PhD student at The University of Edinburgh. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclRun.h + * + * \brief: + * Schedule_kernel invoke an specialised version of kernel struct. The + * specialisation is based on the data dimension in sycl buffer + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORSYCL_SYCLRUN_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORSYCL_SYCLRUN_HPP + +namespace Eigen { +namespace TensorSycl { +/// The run function in tensor sycl convert the expression tree to a buffer +/// based expression tree; +/// creates the expression tree for the device with accessor to buffers; +/// construct the kernel and submit it to the sycl queue. +template <typename Expr, typename Dev> +void run(Expr &expr, Dev &dev) { + Eigen::TensorEvaluator<Expr, Dev> evaluator(expr, dev); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); + if (needs_assign) { + using PlaceHolderExpr = + typename internal::createPlaceHolderExpression<Expr>::Type; + auto functors = internal::extractFunctors(evaluator); + + dev.m_queue.submit([&](cl::sycl::handler &cgh) { + + // create a tuple of accessors from Evaluator + auto tuple_of_accessors = + internal::createTupleOfAccessors<decltype(evaluator)>(cgh, evaluator); + const auto range = + utility::tuple::get<0>(tuple_of_accessors).get_range()[0]; + + size_t outTileSize = range; + if (range > 64) outTileSize = 64; + size_t yMode = range % outTileSize; + int yRange = static_cast<int>(range); + if (yMode != 0) yRange += (outTileSize - yMode); + + // run the kernel + cgh.parallel_for<PlaceHolderExpr>( + cl::sycl::nd_range<1>(cl::sycl::range<1>(yRange), + cl::sycl::range<1>(outTileSize)), + [=](cl::sycl::nd_item<1> itemID) { + using DevExpr = + typename internal::ConvertToDeviceExpression<Expr>::Type; + + auto device_expr = + internal::createDeviceExpression<DevExpr, PlaceHolderExpr>( + functors, tuple_of_accessors); + auto device_evaluator = + Eigen::TensorEvaluator<decltype(device_expr.expr), + Eigen::DefaultDevice>( + device_expr.expr, Eigen::DefaultDevice()); + + if (itemID.get_global_linear_id() < range) { + device_evaluator.evalScalar( + static_cast<int>(itemID.get_global_linear_id())); + } + }); + }); + dev.m_queue.throw_asynchronous(); + } + evaluator.cleanup(); +} +} // namespace TensorSycl +} // namespace Eigen + +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORSYCL_SYCLRUN_HPP |