aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h
blob: 3758d46a0e1e96c1e7f367d914f0c2a4032c15b9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Mehdi Goli    Codeplay Software Ltd.
// Ralph Potter  Codeplay Software Ltd.
// Luke Iwanski  Codeplay Software Ltd.
// Cummins Chris PhD student at The University of Edinburgh.
// Contact: <eigen@codeplay.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

/*****************************************************************
 * TensorSyclRun.h
 *
 * \brief:
 * Schedule_kernel invoke an specialised version of kernel struct. The
 * specialisation is based on the data dimension in sycl buffer
 *
*****************************************************************/

#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORSYCL_SYCLRUN_HPP
#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORSYCL_SYCLRUN_HPP

namespace Eigen {
namespace TensorSycl {
/// The run function in tensor sycl convert the expression tree to a buffer
/// based expression tree;
/// creates the expression tree for the device with accessor to buffers;
/// construct the kernel and submit it to the sycl queue.
template <typename Expr, typename Dev>
void run(Expr &expr, Dev &dev) {
  Eigen::TensorEvaluator<Expr, Dev> evaluator(expr, dev);
  const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
  if (needs_assign) {
    using PlaceHolderExpr =
        typename internal::createPlaceHolderExpression<Expr>::Type;
    auto functors = internal::extractFunctors(evaluator);

    dev.m_queue.submit([&](cl::sycl::handler &cgh) {

      // create a tuple of accessors from Evaluator
      auto tuple_of_accessors =
          internal::createTupleOfAccessors<decltype(evaluator)>(cgh, evaluator);
      const auto range =
          utility::tuple::get<0>(tuple_of_accessors).get_range()[0];

      size_t outTileSize = range;
      if (range > 64) outTileSize = 64;
      size_t yMode = range % outTileSize;
      int yRange = static_cast<int>(range);
      if (yMode != 0) yRange += (outTileSize - yMode);

      // run the kernel
      cgh.parallel_for<PlaceHolderExpr>(
          cl::sycl::nd_range<1>(cl::sycl::range<1>(yRange),
                                cl::sycl::range<1>(outTileSize)),
          [=](cl::sycl::nd_item<1> itemID) {
            using DevExpr =
                typename internal::ConvertToDeviceExpression<Expr>::Type;

            auto device_expr =
                internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(
                    functors, tuple_of_accessors);
            auto device_evaluator =
                Eigen::TensorEvaluator<decltype(device_expr.expr),
                                       Eigen::DefaultDevice>(
                    device_expr.expr, Eigen::DefaultDevice());

            if (itemID.get_global_linear_id() < range) {
              device_evaluator.evalScalar(
                  static_cast<int>(itemID.get_global_linear_id()));
            }
          });
    });
    dev.m_queue.throw_asynchronous();
  }
  evaluator.cleanup();
}
}  // namespace TensorSycl
}  // namespace Eigen

#endif  // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORSYCL_SYCLRUN_HPP