aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorSyclFunctors.h
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorSyclFunctors.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSyclFunctors.h83
1 files changed, 83 insertions, 0 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclFunctors.h
new file mode 100644
index 000000000..adbb2ae72
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclFunctors.h
@@ -0,0 +1,83 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: eigen@codeplay.com
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+// General include header of SYCL target for Tensor Module
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCLFUNCTORS_H
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCLFUNCTORS_H
+
+namespace Eigen {
+namespace TensorSycl {
+namespace internal {
+
+/// ReductionFunctor
+template < typename HostExpr, typename PlaceHolderExpr, typename FunctorExpr, typename Tuple_of_Acc, typename Dims, typename Op, typename Index> class ReductionFunctor {
+ public:
+ typedef cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::discard_write, cl::sycl::access::target::global_buffer> write_accessor;
+ ReductionFunctor(write_accessor output_accessor_, FunctorExpr functors_, Tuple_of_Acc tuple_of_accessors_,Dims dims_, Op functor_, Index range_)
+ :output_accessor(output_accessor_), functors(functors_), tuple_of_accessors(tuple_of_accessors_), dims(dims_), functor(functor_), range(range_) {}
+ void operator()(cl::sycl::nd_item<1> itemID) {
+
+ typedef typename ConvertToDeviceExpression<const HostExpr>::Type DevExpr;
+ auto device_expr = createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors);
+ /// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour
+ /// the first behaviour is when it is used as a root to lauch the sub-kernel. The second one is when it is treated as a leafnode to pass the
+ /// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here.
+ const auto device_self_expr= Eigen::TensorReductionOp<Op, Dims, decltype(device_expr.expr) ,MakeGlobalPointer>(device_expr.expr, dims, functor);
+ /// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is
+ /// the device_evaluator is detectable and recognisable on the device.
+ typedef Eigen::TensorEvaluator<decltype(device_self_expr), Eigen::DefaultDevice> DeviceSelf;
+ auto device_self_evaluator = Eigen::TensorEvaluator<decltype(device_self_expr), Eigen::DefaultDevice>(device_self_expr, Eigen::DefaultDevice());
+ auto output_accessor_ptr =ConvertToActualTypeSycl(typename DeviceSelf::CoeffReturnType, output_accessor);
+ /// const cast added as a naive solution to solve the qualifier drop error
+ auto globalid=static_cast<Index>(itemID.get_global_linear_id());
+ if (globalid< range) {
+ typename DeviceSelf::CoeffReturnType accum = functor.initialize();
+ Eigen::internal::GenericDimReducer<DeviceSelf::NumReducedDims-1, DeviceSelf, Op>::reduce(device_self_evaluator, device_self_evaluator.firstInput(static_cast<typename DevExpr::Index>(globalid)),const_cast<Op&>(functor), &accum);
+ functor.finalize(accum);
+ output_accessor_ptr[globalid]= accum;
+ }
+ }
+ private:
+ write_accessor output_accessor;
+ FunctorExpr functors;
+ Tuple_of_Acc tuple_of_accessors;
+ Dims dims;
+ Op functor;
+ Index range;
+};
+
+/// Memcopyfuncdeveicetohost
+template <typename T> class MemCopyFunctor {
+ public:
+ typedef cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::read, cl::sycl::access::target::global_buffer> read_accessor;
+ typedef cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::discard_write, cl::sycl::access::target::global_buffer> write_accessor;
+ MemCopyFunctor(read_accessor src_acc, write_accessor dst_acc, size_t rng, size_t i, size_t offset): m_src_acc(src_acc), m_dst_acc(dst_acc), m_rng(rng), m_i(i), m_offset(offset) {}
+ void operator()(cl::sycl::nd_item<1> itemID) {
+ auto src_ptr = ConvertToActualTypeSycl(T, m_src_acc);
+ auto dst_ptr = ConvertToActualTypeSycl(T, m_dst_acc);
+ auto globalid = itemID.get_global_linear_id();
+ if (globalid < m_rng) {
+ dst_ptr[globalid + m_i] = src_ptr[globalid + m_offset];
+ }
+ }
+ private:
+ read_accessor m_src_acc;
+ write_accessor m_dst_acc;
+ size_t m_rng;
+ size_t m_i;
+ size_t m_offset;
+};
+
+}
+}
+}
+#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCLFUNCTORS_H