aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/Eigen')
-rw-r--r--unsupported/Eigen/CXX11/Tensor6
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h5
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h7
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h122
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h58
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h58
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h14
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h50
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h17
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorMap.h19
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h21
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h50
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h77
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h109
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h213
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h201
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h154
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h111
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h99
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h158
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h69
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h234
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h41
-rw-r--r--unsupported/Eigen/SpecialFunctions2
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h56
26 files changed, 1839 insertions, 116 deletions
diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor
index 73aae3da8..388976d2e 100644
--- a/unsupported/Eigen/CXX11/Tensor
+++ b/unsupported/Eigen/CXX11/Tensor
@@ -69,6 +69,10 @@ typedef unsigned __int64 uint64_t;
#endif
#endif
+#ifdef EIGEN_USE_SYCL
+#include <SYCL/sycl.hpp>
+#endif
+
#include "src/Tensor/TensorMacros.h"
#include "src/Tensor/TensorForwardDeclarations.h"
#include "src/Tensor/TensorMeta.h"
@@ -77,6 +81,8 @@ typedef unsigned __int64 uint64_t;
#include "src/Tensor/TensorDeviceDefault.h"
#include "src/Tensor/TensorDeviceThreadPool.h"
#include "src/Tensor/TensorDeviceCuda.h"
+#include "src/Tensor/TensorSycl.h"
+#include "src/Tensor/TensorDeviceSycl.h"
#include "src/Tensor/TensorIndexList.h"
#include "src/Tensor/TensorDimensionList.h"
#include "src/Tensor/TensorDimensions.h"
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
index cb615c75b..166be200c 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
@@ -163,6 +163,11 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize);
}
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<LeftArgType, Device>& left_impl() const { return m_leftImpl; }
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<RightArgType, Device>& right_impl() const { return m_rightImpl; }
+
EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_leftImpl.data(); }
private:
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
index 5d67f69f3..4cfe300eb 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
@@ -113,7 +113,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_impl(op.expression(), device)
+ : m_broadcast(op.broadcast()),m_impl(op.expression(), device)
{
// The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar
// and store the result in a scalar. Instead one should reshape the scalar into a a N-D
@@ -374,7 +374,12 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
+ const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
+
+ Broadcast functor() const { return m_broadcast; }
+
protected:
+ const Broadcast m_broadcast;
Dimensions m_dimensions;
array<Index, NumDims> m_outputStrides;
array<Index, NumDims> m_inputStrides;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
new file mode 100644
index 000000000..bfd36f5aa
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
@@ -0,0 +1,122 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Cummins Chris PhD student at The University of Edinburgh.
+// Contact: <eigen@codeplay.com>
+
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#if defined(EIGEN_USE_SYCL) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H)
+#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H
+
+namespace Eigen {
+/// \struct BufferT is used to specialise add_sycl_buffer function for
+// two types of buffer we have. When the MapAllocator is true, we create the
+// sycl buffer with MapAllocator.
+/// We have to const_cast the input pointer in order to work around the fact
+/// that sycl does not accept map allocator for const pointer.
+template <typename T, bool MapAllocator>
+struct BufferT {
+ using Type = cl::sycl::buffer<T, 1, cl::sycl::map_allocator<T>>;
+ static inline void add_sycl_buffer(
+ const T *ptr, size_t num_bytes,
+ std::map<const void *, std::shared_ptr<void>> &buffer_map) {
+ buffer_map.insert(std::pair<const void *, std::shared_ptr<void>>(
+ ptr, std::shared_ptr<void>(std::make_shared<Type>(
+ Type(const_cast<T *>(ptr), cl::sycl::range<1>(num_bytes))))));
+ }
+};
+
+/// specialisation of the \ref BufferT when the MapAllocator is false. In this
+/// case we only create the device-only buffer.
+template <typename T>
+struct BufferT<T, false> {
+ using Type = cl::sycl::buffer<T, 1>;
+ static inline void add_sycl_buffer(
+ const T *ptr, size_t num_bytes,
+ std::map<const void *, std::shared_ptr<void>> &buffer_map) {
+ buffer_map.insert(std::pair<const void *, std::shared_ptr<void>>(
+ ptr, std::shared_ptr<void>(
+ std::make_shared<Type>(Type(cl::sycl::range<1>(num_bytes))))));
+ }
+};
+
+struct SyclDevice {
+ /// class members
+ /// sycl queue
+ cl::sycl::queue &m_queue;
+ /// std::map is the container used to make sure that we create only one buffer
+ /// per pointer. The lifespan of the buffer
+ /// now depends on the lifespan of SyclDevice. If a non-read-only pointer is
+ /// needed to be accessed on the host we should manually deallocate it.
+ mutable std::map<const void *, std::shared_ptr<void>> buffer_map;
+
+ SyclDevice(cl::sycl::queue &q) : m_queue(q) {}
+ // destructor
+ ~SyclDevice() { deallocate_all(); }
+
+ template <typename T>
+ void deallocate(const T *p) const {
+ auto it = buffer_map.find(p);
+ if (it != buffer_map.end()) {
+ buffer_map.erase(it);
+ }
+ }
+ void deallocate_all() const { buffer_map.clear(); }
+
+ /// creation of sycl accessor for a buffer. This function first tries to find
+ /// the buffer in the buffer_map.
+ /// If found it gets the accessor from it, if not, the function then adds an
+ /// entry by creating a sycl buffer
+ /// for that particular pointer.
+ template <cl::sycl::access::mode AcMd, bool MapAllocator, typename T>
+ inline cl::sycl::accessor<T, 1, AcMd, cl::sycl::access::target::global_buffer>
+ get_sycl_accessor(size_t num_bytes, cl::sycl::handler &cgh,
+ const T *ptr) const {
+ auto it = buffer_map.find(ptr);
+ if (it == buffer_map.end()) {
+ BufferT<T, MapAllocator>::add_sycl_buffer(ptr, num_bytes, buffer_map);
+ }
+ return (
+ ((typename BufferT<T, MapAllocator>::Type *)(buffer_map.at(ptr).get()))
+ ->template get_access<AcMd>(cgh));
+ }
+
+ /// allocating memory on the cpu
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void *allocate(size_t num_bytes) const {
+ return internal::aligned_malloc(num_bytes);
+ }
+
+ // some runtime conditions that can be applied here
+ bool isDeviceSuitable() const { return true; }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void *buffer) const {
+ internal::aligned_free(buffer);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void *dst, const void *src,
+ size_t n) const {
+ ::memcpy(dst, src, n);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(
+ void *dst, const void *src, size_t n) const {
+ memcpy(dst, src, n);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(
+ void *dst, const void *src, size_t n) const {
+ memcpy(dst, src, n);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void *buffer, int c,
+ size_t n) const {
+ ::memset(buffer, c, n);
+ }
+};
+} // end namespace Eigen
+
+#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
index a08dfa7c3..68d14a7e5 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
@@ -20,8 +20,8 @@ namespace Eigen {
*
*/
namespace internal {
-template<typename XprType>
-struct traits<TensorEvalToOp<XprType> >
+template<typename XprType, template <class> class MakePointer_>
+struct traits<TensorEvalToOp<XprType, MakePointer_> >
{
// Type promotion to handle the case where the types of the lhs and the rhs are different.
typedef typename XprType::Scalar Scalar;
@@ -36,16 +36,22 @@ struct traits<TensorEvalToOp<XprType> >
enum {
Flags = 0
};
+ template <class T>
+ struct MakePointer {
+ // Intermediate typedef to workaround MSVC issue.
+ typedef MakePointer_<T> MakePointerT;
+ typedef typename MakePointerT::Type Type;
+ };
};
-template<typename XprType>
-struct eval<TensorEvalToOp<XprType>, Eigen::Dense>
+template<typename XprType, template <class> class MakePointer_>
+struct eval<TensorEvalToOp<XprType, MakePointer_>, Eigen::Dense>
{
typedef const TensorEvalToOp<XprType>& type;
};
-template<typename XprType>
-struct nested<TensorEvalToOp<XprType>, 1, typename eval<TensorEvalToOp<XprType> >::type>
+template<typename XprType, template <class> class MakePointer_>
+struct nested<TensorEvalToOp<XprType, MakePointer_>, 1, typename eval<TensorEvalToOp<XprType, MakePointer_> >::type>
{
typedef TensorEvalToOp<XprType> type;
};
@@ -55,37 +61,38 @@ struct nested<TensorEvalToOp<XprType>, 1, typename eval<TensorEvalToOp<XprType>
-template<typename XprType>
-class TensorEvalToOp : public TensorBase<TensorEvalToOp<XprType>, ReadOnlyAccessors>
+template<typename XprType, template <class> class MakePointer_>
+class TensorEvalToOp : public TensorBase<TensorEvalToOp<XprType, MakePointer_>, ReadOnlyAccessors>
{
public:
typedef typename Eigen::internal::traits<TensorEvalToOp>::Scalar Scalar;
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
+ typedef typename MakePointer_<CoeffReturnType>::Type PointerType;
typedef typename Eigen::internal::nested<TensorEvalToOp>::type Nested;
typedef typename Eigen::internal::traits<TensorEvalToOp>::StorageKind StorageKind;
typedef typename Eigen::internal::traits<TensorEvalToOp>::Index Index;
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvalToOp(CoeffReturnType* buffer, const XprType& expr)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvalToOp(PointerType buffer, const XprType& expr)
: m_xpr(expr), m_buffer(buffer) {}
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
expression() const { return m_xpr; }
- EIGEN_DEVICE_FUNC CoeffReturnType* buffer() const { return m_buffer; }
+ EIGEN_DEVICE_FUNC PointerType buffer() const { return m_buffer; }
protected:
typename XprType::Nested m_xpr;
- CoeffReturnType* m_buffer;
+ PointerType m_buffer;
};
-template<typename ArgType, typename Device>
-struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device>
+template<typename ArgType, typename Device, template <class> class MakePointer_>
+struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
{
- typedef TensorEvalToOp<ArgType> XprType;
+ typedef TensorEvalToOp<ArgType, MakePointer_> XprType;
typedef typename ArgType::Scalar Scalar;
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
typedef typename XprType::Index Index;
@@ -102,15 +109,22 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device>
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_impl(op.expression(), device), m_device(device), m_buffer(op.buffer())
+ : m_impl(op.expression(), device), m_device(device),
+ m_buffer(op.buffer()), m_op(op), m_expression(op.expression())
{ }
+ // Used for accessor extraction in SYCL Managed TensorMap:
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const XprType& op() const {
+ return m_op;
+ }
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~TensorEvaluator() {
}
+ typedef typename internal::traits<const TensorEvalToOp<ArgType, MakePointer_> >::template MakePointer<CoeffReturnType>::Type DevicePointer;
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* scalar) {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(DevicePointer scalar) {
EIGEN_UNUSED_VARIABLE(scalar);
eigen_assert(scalar == NULL);
return m_impl.evalSubExprsIfNeeded(m_buffer);
@@ -145,12 +159,20 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device>
TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize);
}
- EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_buffer; }
+ EIGEN_DEVICE_FUNC DevicePointer data() const { return m_buffer; }
+ ArgType expression() const { return m_expression; }
+
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
+ /// added for sycl in order to construct the buffer from the sycl device
+ const Device& device() const{return m_device;}
private:
TensorEvaluator<ArgType, Device> m_impl;
const Device& m_device;
- CoeffReturnType* m_buffer;
+ DevicePointer m_buffer;
+ const XprType& m_op;
+ const ArgType m_expression;
};
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index 61c111cec..834ce07df 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -46,9 +46,11 @@ struct TensorEvaluator
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
- : m_data(const_cast<Scalar*>(m.data())), m_dims(m.dimensions()), m_device(device)
+ : m_data(const_cast<typename internal::traits<Derived>::template MakePointer<Scalar>::Type>(m.data())), m_dims(m.dimensions()), m_device(device), m_impl(m)
{ }
+ // Used for accessor extraction in SYCL Managed TensorMap:
+ const Derived& derived() const { return m_impl; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* dest) {
@@ -106,12 +108,16 @@ struct TensorEvaluator
internal::unpacket_traits<PacketReturnType>::size);
}
- EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; }
+ EIGEN_DEVICE_FUNC typename internal::traits<Derived>::template MakePointer<Scalar>::Type data() const { return m_data; }
+
+ /// required by sycl in order to construct sycl buffer from raw pointer
+ const Device& device() const{return m_device;}
protected:
- Scalar* m_data;
+ typename internal::traits<Derived>::template MakePointer<Scalar>::Type m_data;
Dimensions m_dims;
const Device& m_device;
+ const Derived& m_impl;
};
namespace {
@@ -159,8 +165,11 @@ struct TensorEvaluator<const Derived, Device>
RawAccess = true
};
+ // Used for accessor extraction in SYCL Managed TensorMap:
+ const Derived& derived() const { return m_impl; }
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
- : m_data(m.data()), m_dims(m.dimensions()), m_device(device)
+ : m_data(m.data()), m_dims(m.dimensions()), m_device(device), m_impl(m)
{ }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
@@ -198,12 +207,16 @@ struct TensorEvaluator<const Derived, Device>
internal::unpacket_traits<PacketReturnType>::size);
}
- EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }
+ EIGEN_DEVICE_FUNC typename internal::traits<Derived>::template MakePointer<const Scalar>::Type data() const { return m_data; }
+
+ /// added for sycl in order to construct the buffer from the sycl device
+ const Device& device() const{return m_device;}
protected:
- const Scalar* m_data;
+ typename internal::traits<Derived>::template MakePointer<const Scalar>::Type m_data;
Dimensions m_dims;
const Device& m_device;
+ const Derived& m_impl;
};
@@ -260,6 +273,12 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<ArgType, Device>& impl() const { return m_argImpl; }
+ /// required by sycl in order to extract the accessor
+ NullaryOp functor() const { return m_functor; }
+
+
private:
const NullaryOp m_functor;
TensorEvaluator<ArgType, Device> m_argImpl;
@@ -324,6 +343,12 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<ArgType, Device> & impl() const { return m_argImpl; }
+ /// added for sycl in order to construct the buffer from sycl device
+ UnaryOp functor() const { return m_functor; }
+
+
private:
const UnaryOp m_functor;
TensorEvaluator<ArgType, Device> m_argImpl;
@@ -397,6 +422,12 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
}
EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<LeftArgType, Device>& left_impl() const { return m_leftImpl; }
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<RightArgType, Device>& right_impl() const { return m_rightImpl; }
+ /// required by sycl in order to extract the accessor
+ BinaryOp functor() const { return m_functor; }
private:
const BinaryOp m_functor;
@@ -492,10 +523,17 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<Arg1Type, Device> & arg1Impl() const { return m_arg1Impl; }
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<Arg2Type, Device>& arg2Impl() const { return m_arg2Impl; }
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<Arg3Type, Device>& arg3Impl() const { return m_arg3Impl; }
+
private:
const TernaryOp m_functor;
TensorEvaluator<Arg1Type, Device> m_arg1Impl;
- TensorEvaluator<Arg1Type, Device> m_arg2Impl;
+ TensorEvaluator<Arg2Type, Device> m_arg2Impl;
TensorEvaluator<Arg3Type, Device> m_arg3Impl;
};
@@ -576,6 +614,12 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const { return NULL; }
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<IfArgType, Device> & cond_impl() const { return m_condImpl; }
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<ThenArgType, Device>& then_impl() const { return m_thenImpl; }
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<ElseArgType, Device>& else_impl() const { return m_elseImpl; }
private:
TensorEvaluator<IfArgType, Device> m_condImpl;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index 0cac7b179..f01d77c0a 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -267,6 +267,20 @@ inline void TensorExecutor<Expression, GpuDevice, Vectorizable>::run(
#endif // __CUDACC__
#endif // EIGEN_USE_GPU
+// SYCL Executor policy
+#ifdef EIGEN_USE_SYCL
+
+template <typename Expression, bool Vectorizable>
+class TensorExecutor<Expression, SyclDevice, Vectorizable> {
+public:
+ static inline void run(const Expression &expr, const SyclDevice &device) {
+ // call TensorSYCL module
+ TensorSycl::run(expr, device);
+ }
+};
+
+#endif
+
} // end namespace internal
} // end namespace Eigen
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h
index 5f2e329f2..85dfc7a69 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h
@@ -283,7 +283,7 @@ class TensorCwiseTernaryOp : public TensorBase<TensorCwiseTernaryOp<TernaryOp, A
arg1Expression() const { return m_arg1_xpr; }
EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename Arg1XprType::Nested>::type&
+ const typename internal::remove_all<typename Arg2XprType::Nested>::type&
arg2Expression() const { return m_arg2_xpr; }
EIGEN_DEVICE_FUNC
@@ -292,7 +292,7 @@ class TensorCwiseTernaryOp : public TensorBase<TensorCwiseTernaryOp<TernaryOp, A
protected:
typename Arg1XprType::Nested m_arg1_xpr;
- typename Arg1XprType::Nested m_arg2_xpr;
+ typename Arg2XprType::Nested m_arg2_xpr;
typename Arg3XprType::Nested m_arg3_xpr;
const TernaryOp m_functor;
};
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
index c23ecdbc4..bbd5eb374 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
@@ -19,9 +19,15 @@ namespace Eigen {
*
*
*/
+/// template <class> class MakePointer_ is added to convert the host pointer to the device pointer.
+/// It is added due to the fact that for our device compiler T* is not allowed.
+/// If we wanted to use the same Evaluator functions we have to convert that type to our pointer T.
+/// This is done through our MakePointer_ class. By default the Type in the MakePointer_<T> is T* .
+/// Therefore, by adding the default value, we managed to convert the type and it does not break any
+/// existing code as its default value is T*.
namespace internal {
-template<typename XprType>
-struct traits<TensorForcedEvalOp<XprType> >
+template<typename XprType, template <class> class MakePointer_>
+struct traits<TensorForcedEvalOp<XprType, MakePointer_> >
{
// Type promotion to handle the case where the types of the lhs and the rhs are different.
typedef typename XprType::Scalar Scalar;
@@ -36,26 +42,31 @@ struct traits<TensorForcedEvalOp<XprType> >
enum {
Flags = 0
};
+ template <class T> struct MakePointer {
+ // Intermediate typedef to workaround MSVC issue.
+ typedef MakePointer_<T> MakePointerT;
+ typedef typename MakePointerT::Type Type;
+ };
};
-template<typename XprType>
-struct eval<TensorForcedEvalOp<XprType>, Eigen::Dense>
+template<typename XprType, template <class> class MakePointer_>
+struct eval<TensorForcedEvalOp<XprType, MakePointer_>, Eigen::Dense>
{
- typedef const TensorForcedEvalOp<XprType>& type;
+ typedef const TensorForcedEvalOp<XprType, MakePointer_>& type;
};
-template<typename XprType>
-struct nested<TensorForcedEvalOp<XprType>, 1, typename eval<TensorForcedEvalOp<XprType> >::type>
+template<typename XprType, template <class> class MakePointer_>
+struct nested<TensorForcedEvalOp<XprType, MakePointer_>, 1, typename eval<TensorForcedEvalOp<XprType, MakePointer_> >::type>
{
- typedef TensorForcedEvalOp<XprType> type;
+ typedef TensorForcedEvalOp<XprType, MakePointer_> type;
};
} // end namespace internal
-template<typename XprType>
-class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType>, ReadOnlyAccessors>
+template<typename XprType, template <class> class MakePointer_>
+class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType, MakePointer_>, ReadOnlyAccessors>
{
public:
typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Scalar Scalar;
@@ -77,10 +88,10 @@ class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType>, ReadOn
};
-template<typename ArgType, typename Device>
-struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
+template<typename ArgType, typename Device, template <class> class MakePointer_>
+struct TensorEvaluator<const TensorForcedEvalOp<ArgType, MakePointer_>, Device>
{
- typedef TensorForcedEvalOp<ArgType> XprType;
+ typedef TensorForcedEvalOp<ArgType, MakePointer_> XprType;
typedef typename ArgType::Scalar Scalar;
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
typedef typename XprType::Index Index;
@@ -96,6 +107,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
};
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
+ /// op_ is used for sycl
: m_impl(op.expression(), device), m_op(op.expression()), m_device(device), m_buffer(NULL)
{ }
@@ -110,10 +122,10 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
new(m_buffer+i) CoeffReturnType();
}
}
- typedef TensorEvalToOp<const ArgType> EvalTo;
+ typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo;
EvalTo evalToTmp(m_buffer, m_op);
const bool PacketAccess = internal::IsVectorizable<Device, const ArgType>::value;
- internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device);
+ internal::TensorExecutor<const EvalTo, typename internal::remove_const<Device>::type, PacketAccess>::run(evalToTmp, m_device);
return true;
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
@@ -136,13 +148,17 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
}
- EIGEN_DEVICE_FUNC Scalar* data() const { return m_buffer; }
+ EIGEN_DEVICE_FUNC typename MakePointer<Scalar>::Type data() const { return m_buffer; }
+ /// required by sycl in order to extract the sycl accessor
+ const TensorEvaluator<ArgType, Device>& impl() { return m_impl; }
+ /// used by sycl in order to build the sycl buffer
+ const Device& device() const{return m_device;}
private:
TensorEvaluator<ArgType, Device> m_impl;
const ArgType m_op;
const Device& m_device;
- CoeffReturnType* m_buffer;
+ typename MakePointer<CoeffReturnType>::Type m_buffer;
};
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
index 490ddd8bd..6497b1830 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
@@ -12,9 +12,19 @@
namespace Eigen {
+// MakePointer class is used as a container of the adress space of the pointer
+// on the host and on the device. From the host side it generates the T* pointer
+// and when EIGEN_USE_SYCL is used it construct a buffer with a map_allocator to
+// T* m_data on the host. It is always called on the device.
+// Specialisation of MakePointer class for creating the sycl buffer with
+// map_allocator.
+template<typename T> struct MakePointer {
+ typedef T* Type;
+};
+
+template<typename PlainObjectType, int Options_ = Unaligned, template <class> class MakePointer_ = MakePointer> class TensorMap;
template<typename Scalar_, int NumIndices_, int Options_ = 0, typename IndexType = DenseIndex> class Tensor;
template<typename Scalar_, typename Dimensions, int Options_ = 0, typename IndexType = DenseIndex> class TensorFixedSize;
-template<typename PlainObjectType, int Options_ = Unaligned> class TensorMap;
template<typename PlainObjectType> class TensorRef;
template<typename Derived, int AccessLevel> class TensorBase;
@@ -52,8 +62,8 @@ template<typename Op, typename XprType> class TensorScanOp;
template<typename CustomUnaryFunc, typename XprType> class TensorCustomUnaryOp;
template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> class TensorCustomBinaryOp;
-template<typename XprType> class TensorEvalToOp;
-template<typename XprType> class TensorForcedEvalOp;
+template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorEvalToOp;
+template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorForcedEvalOp;
template<typename ExpressionType, typename DeviceType> class TensorDevice;
template<typename Derived, typename Device> struct TensorEvaluator;
@@ -61,6 +71,7 @@ template<typename Derived, typename Device> struct TensorEvaluator;
struct DefaultDevice;
struct ThreadPoolDevice;
struct GpuDevice;
+struct SyclDevice;
enum FFTResultType {
RealPart = 0,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
index 6fb4f4a31..a8e55757e 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
@@ -18,11 +18,16 @@ namespace Eigen {
* \brief A tensor expression mapping an existing array of data.
*
*/
-
-template<typename PlainObjectType, int Options_> class TensorMap : public TensorBase<TensorMap<PlainObjectType, Options_> >
+/// template <class> class MakePointer_ is added to convert the host pointer to the device pointer.
+/// It is added due to the fact that for our device compiler T* is not allowed.
+/// If we wanted to use the same Evaluator functions we have to convert that type to our pointer T.
+/// This is done through our MakePointer_ class. By default the Type in the MakePointer_<T> is T* .
+/// Therefore, by adding the default value, we managed to convert the type and it does not break any
+/// existing code as its default value is T*.
+template<typename PlainObjectType, int Options_, template <class> class MakePointer_> class TensorMap : public TensorBase<TensorMap<PlainObjectType, Options_, MakePointer_> >
{
public:
- typedef TensorMap<PlainObjectType, Options_> Self;
+ typedef TensorMap<PlainObjectType, Options_, MakePointer_> Self;
typedef typename PlainObjectType::Base Base;
typedef typename Eigen::internal::nested<Self>::type Nested;
typedef typename internal::traits<PlainObjectType>::StorageKind StorageKind;
@@ -36,7 +41,7 @@ template<typename PlainObjectType, int Options_> class TensorMap : public Tensor
Scalar *,
const Scalar *>::type
PointerType;*/
- typedef Scalar* PointerType;
+ typedef typename MakePointer_<Scalar>::Type PointerType;
typedef PointerType PointerArgType;
static const int Options = Options_;
@@ -109,9 +114,9 @@ template<typename PlainObjectType, int Options_> class TensorMap : public Tensor
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Index size() const { return m_dimensions.TotalSize(); }
EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar* data() { return m_data; }
+ EIGEN_STRONG_INLINE PointerType data() { return m_data; }
EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar* data() const { return m_data; }
+ EIGEN_STRONG_INLINE const PointerType data() const { return m_data; }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const
@@ -307,7 +312,7 @@ template<typename PlainObjectType, int Options_> class TensorMap : public Tensor
}
private:
- Scalar* m_data;
+ typename MakePointer_<Scalar>::Type m_data;
Dimensions m_dimensions;
};
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
index fdb5ee6b8..615559d44 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
@@ -83,6 +83,27 @@ struct PacketType<half, GpuDevice> {
};
#endif
+#if defined(EIGEN_USE_SYCL)
+template <typename T>
+ struct PacketType<T, SyclDevice> {
+ typedef T type;
+ static const int size = 1;
+ enum {
+ HasAdd = 0,
+ HasSub = 0,
+ HasMul = 0,
+ HasNegate = 0,
+ HasAbs = 0,
+ HasArg = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasConj = 0,
+ HasSetLinear = 0,
+ HasBlend = 0
+ };
+};
+#endif
// Tuple mimics std::pair but works on e.g. nvcc.
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index a87777b22..d34ff98b0 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -423,15 +423,15 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
// Precompute output strides.
if (NumOutputDims > 0) {
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- m_outputStrides[0] = 1;
- for (int i = 1; i < NumOutputDims; ++i) {
- m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1];
- }
+ m_outputStrides[0] = 1;
+ for (int i = 1; i < NumOutputDims; ++i) {
+ m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1];
+ }
} else {
- m_outputStrides.back() = 1;
- for (int i = NumOutputDims - 2; i >= 0; --i) {
- m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1];
- }
+ m_outputStrides.back() = 1;
+ for (int i = NumOutputDims - 2; i >= 0; --i) {
+ m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1];
+ }
}
}
@@ -439,27 +439,27 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
if (NumInputDims > 0) {
array<Index, NumInputDims> input_strides;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- input_strides[0] = 1;
- for (int i = 1; i < NumInputDims; ++i) {
- input_strides[i] = input_strides[i-1] * input_dims[i-1];
- }
+ input_strides[0] = 1;
+ for (int i = 1; i < NumInputDims; ++i) {
+ input_strides[i] = input_strides[i-1] * input_dims[i-1];
+ }
} else {
- input_strides.back() = 1;
- for (int i = NumInputDims - 2; i >= 0; --i) {
- input_strides[i] = input_strides[i + 1] * input_dims[i + 1];
- }
+ input_strides.back() = 1;
+ for (int i = NumInputDims - 2; i >= 0; --i) {
+ input_strides[i] = input_strides[i + 1] * input_dims[i + 1];
+ }
}
int outputIndex = 0;
int reduceIndex = 0;
for (int i = 0; i < NumInputDims; ++i) {
- if (m_reduced[i]) {
- m_reducedStrides[reduceIndex] = input_strides[i];
- ++reduceIndex;
- } else {
- m_preservedStrides[outputIndex] = input_strides[i];
- ++outputIndex;
- }
+ if (m_reduced[i]) {
+ m_reducedStrides[reduceIndex] = input_strides[i];
+ ++reduceIndex;
+ } else {
+ m_preservedStrides[outputIndex] = input_strides[i];
+ ++outputIndex;
+ }
}
}
@@ -578,7 +578,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
Op reducer(m_reducer);
if (ReducingInnerMostDims || RunningFullReduction) {
const Index num_values_to_reduce =
- (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1];
+ (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1];
return internal::InnerMostDimReducer<Self, Op>::reduce(*this, firstInput(index),
num_values_to_reduce, reducer);
} else {
@@ -602,7 +602,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
if (ReducingInnerMostDims) {
const Index num_values_to_reduce =
- (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1];
+ (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1];
const Index firstIndex = firstInput(index);
for (Index i = 0; i < PacketSize; ++i) {
Op reducer(m_reducer);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h
new file mode 100644
index 000000000..da15f7942
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h
@@ -0,0 +1,77 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: eigen@codeplay.com
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+// General include header of SYCL target for Tensor Module
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_H
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_H
+
+#ifdef EIGEN_USE_SYCL
+
+// global pointer to set different attribute state for a class
+template <class T>
+struct MakeGlobalPointer {
+ typedef typename cl::sycl::global_ptr<T>::pointer_t Type;
+};
+
+namespace Eigen {
+namespace TensorSycl {
+namespace internal {
+
+/// This struct is used for special expression nodes with no operations (for example assign and selectOP).
+ struct NoOP;
+
+template<bool IsConst, typename T> struct GetType{
+ typedef const T Type;
+};
+template<typename T> struct GetType<false, T>{
+ typedef T Type;
+};
+
+}
+}
+}
+
+// tuple construction
+#include "TensorSyclTuple.h"
+
+// This file contains the PlaceHolder that replaces the actual data
+#include "TensorSyclPlaceHolder.h"
+
+#include "TensorSyclLeafCount.h"
+
+// The index PlaceHolder takes the actual expression and replaces the actual
+// data on it with the place holder. It uses the same pre-order expression tree
+// traverse as the leaf count in order to give the right access number to each
+// node in the expression
+#include "TensorSyclPlaceHolderExpr.h"
+
+// creation of an accessor tuple from a tuple of SYCL buffers
+#include "TensorSyclExtractAccessor.h"
+
+// actual data extraction using accessors
+//#include "GetDeviceData.h"
+
+// this is used to change the address space type in tensor map for GPU
+#include "TensorSyclConvertToDeviceExpression.h"
+
+// this is used to extract the functors
+#include "TensorSyclExtractFunctors.h"
+
+// this is used to create tensormap on the device
+// this is used to construct the expression on the device
+#include "TensorSyclExprConstructor.h"
+
+// kernel execution using fusion
+#include "TensorSyclRun.h"
+
+#endif // end of EIGEN_USE_SYCL
+#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_H
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h
new file mode 100644
index 000000000..a94c30426
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h
@@ -0,0 +1,109 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensorSyclConvertToDeviceExpression.h
+ *
+ * \brief:
+ * Conversion from host pointer to device pointer
+ * inside leaf nodes of the expression.
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_CONVERT_TO_DEVICE_EXPRESSION_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_CONVERT_TO_DEVICE_EXPRESSION_HPP
+
+namespace Eigen {
+namespace TensorSycl {
+namespace internal {
+
+/// \struct ConvertToDeviceExpression
+/// \brief This struct is used to convert the MakePointer in the host expression
+/// to the MakeGlobalPointer for the device expression. For the leafNodes
+/// containing the pointer. This is due to the fact that the address space of
+/// the pointer T* is different on the host and the device.
+template <typename Expr>
+struct ConvertToDeviceExpression;
+
+template<template<class...> class NonOpCategory, bool IsConst, typename... Args>
+struct NonOpConversion{
+ typedef typename GetType<IsConst, NonOpCategory<typename ConvertToDeviceExpression<Args>::Type...> >::Type Type;
+};
+
+
+template<template<class, template <class> class > class NonOpCategory, bool IsConst, typename Args>
+struct DeviceConvertor{
+ typedef typename GetType<IsConst, NonOpCategory<typename ConvertToDeviceExpression<Args>::Type, MakeGlobalPointer> >::Type Type;
+};
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is TensorMap
+#define TENSORMAPCONVERT(CVQual)\
+template <typename Scalar_, int Options_, int Options2_, int NumIndices_, typename IndexType_, template <class> class MakePointer_>\
+struct ConvertToDeviceExpression<CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakePointer_> > {\
+ typedef CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakeGlobalPointer> Type;\
+};
+
+TENSORMAPCONVERT(const)
+TENSORMAPCONVERT()
+#undef TENSORMAPCONVERT
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is TensorCwiseNullaryOp, TensorCwiseUnaryOp, TensorCwiseBinaryOp, TensorCwiseTernaryOp, TensorBroadcastingOp
+#define CATEGORYCONVERT(CVQual)\
+template <template<class, class...> class Category, typename OP, typename... subExprs>\
+struct ConvertToDeviceExpression<CVQual Category<OP, subExprs...> > {\
+ typedef CVQual Category<OP, typename ConvertToDeviceExpression<subExprs>::Type... > Type;\
+};
+CATEGORYCONVERT(const)
+CATEGORYCONVERT()
+#undef CATEGORYCONVERT
+
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is TensorCwiseSelectOp
+#define SELECTOPCONVERT(CVQual, Res)\
+template <typename IfExpr, typename ThenExpr, typename ElseExpr>\
+struct ConvertToDeviceExpression<CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr> >\
+: NonOpConversion<TensorSelectOp, Res, IfExpr, ThenExpr, ElseExpr> {};
+SELECTOPCONVERT(const, true)
+SELECTOPCONVERT(, false)
+#undef SELECTOPCONVERT
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is const AssingOP
+#define ASSIGNCONVERT(CVQual, Res)\
+template <typename LHSExpr, typename RHSExpr>\
+struct ConvertToDeviceExpression<CVQual TensorAssignOp<LHSExpr, RHSExpr> >\
+: NonOpConversion<TensorAssignOp, Res, LHSExpr, RHSExpr>{};
+
+ASSIGNCONVERT(const, true)
+ASSIGNCONVERT(, false)
+#undef ASSIGNCONVERT
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is either TensorForcedEvalOp or TensorEvalToOp
+#define KERNELBROKERCONVERT(CVQual, Res, ExprNode)\
+template <typename Expr>\
+struct ConvertToDeviceExpression<CVQual ExprNode<Expr> > \
+: DeviceConvertor<ExprNode, Res, Expr>{};
+
+KERNELBROKERCONVERT(const, true, TensorForcedEvalOp)
+KERNELBROKERCONVERT(, false, TensorForcedEvalOp)
+KERNELBROKERCONVERT(const, true, TensorEvalToOp)
+KERNELBROKERCONVERT(, false, TensorEvalToOp)
+#undef KERNELBROKERCONVERT
+} // namespace internal
+} // namespace TensorSycl
+} // namespace Eigen
+
+#endif // UNSUPPORTED_EIGEN_CXX1
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h
new file mode 100644
index 000000000..833d5e271
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h
@@ -0,0 +1,213 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensorSyclExprConstructor.h
+ *
+ * \brief:
+ * This file re-create an expression on the SYCL device in order
+ * to use the original tensor evaluator.
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXPR_CONSTRUCTOR_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXPR_CONSTRUCTOR_HPP
+
+namespace Eigen {
+namespace TensorSycl {
+namespace internal {
+/// this class is used by EvalToOp in order to create an lhs expression which is
+/// a pointer from an accessor on device-only buffer
+template <typename PtrType, size_t N, typename... Params>
+struct EvalToLHSConstructor {
+ PtrType expr;
+ EvalToLHSConstructor(const utility::tuple::Tuple<Params...> &t): expr((&(*(utility::tuple::get<N>(t).get_pointer())))) {}
+};
+
+/// \struct ExprConstructor is used to reconstruct the expression on the device
+/// and
+/// recreate the expression with MakeGlobalPointer containing the device address
+/// space for the TensorMap pointers used in eval function.
+/// It receives the original expression type, the functor of the node, the tuple
+/// of accessors, and the device expression type to re-instantiate the
+/// expression tree for the device
+template <typename OrigExpr, typename IndexExpr, typename... Params>
+struct ExprConstructor;
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// TensorMap
+#define TENSORMAP(CVQual)\
+template <typename Scalar_, int Options_, int Options2_, int Options3_, int NumIndices_, typename IndexType_,\
+template <class> class MakePointer_, size_t N, typename... Params>\
+struct ExprConstructor< CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakeGlobalPointer>,\
+CVQual Eigen::internal::PlaceHolder<CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options3_, MakePointer_>, N>, Params...>{\
+ typedef CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakeGlobalPointer> Type;\
+ Type expr;\
+ template <typename FuncDetector>\
+ ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)\
+ : expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))), fd.dimensions())) {}\
+};
+
+TENSORMAP(const)
+TENSORMAP()
+#undef TENSORMAP
+
+#define UNARYCATEGORY(CVQual)\
+template <template<class, class> class UnaryCategory, typename OP, typename OrigRHSExpr, typename RHSExpr, typename... Params>\
+struct ExprConstructor<CVQual UnaryCategory<OP, OrigRHSExpr>, CVQual UnaryCategory<OP, RHSExpr>, Params...> {\
+ typedef ExprConstructor<OrigRHSExpr, RHSExpr, Params...> my_type;\
+ my_type rhsExpr;\
+ typedef CVQual UnaryCategory<OP, typename my_type::Type> Type;\
+ Type expr;\
+ template <typename FuncDetector>\
+ ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\
+ : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {}\
+};
+
+UNARYCATEGORY(const)
+UNARYCATEGORY()
+#undef UNARYCATEGORY
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// TensorBinaryOp
+#define BINARYCATEGORY(CVQual)\
+template <template<class, class, class> class BinaryCategory, typename OP, typename OrigLHSExpr, typename OrigRHSExpr, typename LHSExpr,\
+typename RHSExpr, typename... Params>\
+struct ExprConstructor<CVQual BinaryCategory<OP, OrigLHSExpr, OrigRHSExpr>, CVQual BinaryCategory<OP, LHSExpr, RHSExpr>, Params...> {\
+ typedef ExprConstructor<OrigLHSExpr, LHSExpr, Params...> my_left_type;\
+ typedef ExprConstructor<OrigRHSExpr, RHSExpr, Params...> my_right_type;\
+ typedef CVQual BinaryCategory<OP, typename my_left_type::Type, typename my_right_type::Type> Type;\
+ my_left_type lhsExpr;\
+ my_right_type rhsExpr;\
+ Type expr;\
+ template <typename FuncDetector>\
+ ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\
+ : lhsExpr(funcD.lhsExpr, t),rhsExpr(funcD.rhsExpr, t), expr(lhsExpr.expr, rhsExpr.expr, funcD.func) {}\
+};
+
+BINARYCATEGORY(const)
+BINARYCATEGORY()
+#undef BINARYCATEGORY
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// TensorCwiseTernaryOp
+#define TERNARYCATEGORY(CVQual)\
+template <template <class, class, class, class> class TernaryCategory, typename OP, typename OrigArg1Expr, typename OrigArg2Expr,typename OrigArg3Expr,\
+typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename... Params>\
+struct ExprConstructor<CVQual TernaryCategory<OP, OrigArg1Expr, OrigArg2Expr, OrigArg3Expr>, CVQual TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Params...> {\
+ typedef ExprConstructor<OrigArg1Expr, Arg1Expr, Params...> my_arg1_type;\
+ typedef ExprConstructor<OrigArg2Expr, Arg2Expr, Params...> my_arg2_type;\
+ typedef ExprConstructor<OrigArg3Expr, Arg3Expr, Params...> my_arg3_type;\
+ typedef CVQual TernaryCategory<OP, typename my_arg1_type::Type, typename my_arg2_type::Type, typename my_arg3_type::Type> Type;\
+ my_arg1_type arg1Expr;\
+ my_arg2_type arg2Expr;\
+ my_arg3_type arg3Expr;\
+ Type expr;\
+ template <typename FuncDetector>\
+ ExprConstructor(FuncDetector &funcD,const utility::tuple::Tuple<Params...> &t)\
+ : arg1Expr(funcD.arg1Expr, t), arg2Expr(funcD.arg2Expr, t), arg3Expr(funcD.arg3Expr, t), expr(arg1Expr.expr, arg2Expr.expr, arg3Expr.expr, funcD.func) {}\
+};
+
+TERNARYCATEGORY(const)
+TERNARYCATEGORY()
+#undef TERNARYCATEGORY
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// TensorCwiseSelectOp
+#define SELECTOP(CVQual)\
+template <typename OrigIfExpr, typename OrigThenExpr, typename OrigElseExpr, typename IfExpr, typename ThenExpr, typename ElseExpr, typename... Params>\
+struct ExprConstructor< CVQual TensorSelectOp<OrigIfExpr, OrigThenExpr, OrigElseExpr>, CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Params...> {\
+ typedef ExprConstructor<OrigIfExpr, IfExpr, Params...> my_if_type;\
+ typedef ExprConstructor<OrigThenExpr, ThenExpr, Params...> my_then_type;\
+ typedef ExprConstructor<OrigElseExpr, ElseExpr, Params...> my_else_type;\
+ typedef CVQual TensorSelectOp<typename my_if_type::Type, typename my_then_type::Type, typename my_else_type::Type> Type;\
+ my_if_type ifExpr;\
+ my_then_type thenExpr;\
+ my_else_type elseExpr;\
+ Type expr;\
+ template <typename FuncDetector>\
+ ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\
+ : ifExpr(funcD.ifExpr, t), thenExpr(funcD.thenExpr, t), elseExpr(funcD.elseExpr, t), expr(ifExpr.expr, thenExpr.expr, elseExpr.expr) {}\
+};
+
+SELECTOP(const)
+SELECTOP()
+#undef SELECTOP
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// const TensorAssignOp
+#define ASSIGN(CVQual)\
+template <typename OrigLHSExpr, typename OrigRHSExpr, typename LHSExpr, typename RHSExpr, typename... Params>\
+struct ExprConstructor<CVQual TensorAssignOp<OrigLHSExpr, OrigRHSExpr>, CVQual TensorAssignOp<LHSExpr, RHSExpr>, Params...> {\
+ typedef ExprConstructor<OrigLHSExpr, LHSExpr, Params...> my_left_type;\
+ typedef ExprConstructor<OrigRHSExpr, RHSExpr, Params...> my_right_type;\
+ typedef CVQual TensorAssignOp<typename my_left_type::Type, typename my_right_type::Type> Type;\
+ my_left_type lhsExpr;\
+ my_right_type rhsExpr;\
+ Type expr;\
+ template <typename FuncDetector>\
+ ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\
+ : lhsExpr(funcD.lhsExpr, t), rhsExpr(funcD.rhsExpr, t), expr(lhsExpr.expr, rhsExpr.expr) {}\
+ };
+
+ ASSIGN(const)
+ ASSIGN()
+ #undef ASSIGN
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// TensorEvalToOp
+#define EVALTO(CVQual)\
+template <typename OrigExpr, typename Expr, typename... Params>\
+struct ExprConstructor<CVQual TensorEvalToOp<OrigExpr, MakeGlobalPointer>, CVQual TensorEvalToOp<Expr>, Params...> {\
+ typedef ExprConstructor<OrigExpr, Expr, Params...> my_expr_type;\
+ typedef typename TensorEvalToOp<OrigExpr, MakeGlobalPointer>::PointerType my_buffer_type;\
+ typedef CVQual TensorEvalToOp<typename my_expr_type::Type, MakeGlobalPointer> Type;\
+ my_expr_type nestedExpression;\
+ EvalToLHSConstructor<my_buffer_type, 0, Params...> buffer;\
+ Type expr;\
+ template <typename FuncDetector>\
+ ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\
+ : nestedExpression(funcD.rhsExpr, t), buffer(t), expr(buffer.expr, nestedExpression.expr) {}\
+};
+
+EVALTO(const)
+EVALTO()
+#undef EVALTO
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// TensorForcedEvalOp
+#define FORCEDEVAL(CVQual)\
+template <typename OrigExpr, typename DevExpr, size_t N, typename... Params>\
+struct ExprConstructor<CVQual TensorForcedEvalOp<OrigExpr, MakeGlobalPointer>,\
+CVQual Eigen::internal::PlaceHolder<CVQual TensorForcedEvalOp<DevExpr>, N>, Params...> {\
+ typedef CVQual TensorMap<Tensor<typename TensorForcedEvalOp<DevExpr, MakeGlobalPointer>::Scalar,\
+ TensorForcedEvalOp<DevExpr, MakeGlobalPointer>::NumDimensions, 0, typename TensorForcedEvalOp<DevExpr>::Index>, 0, MakeGlobalPointer> Type;\
+ Type expr;\
+ template <typename FuncDetector>\
+ ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)\
+ : expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))), fd.dimensions())) {}\
+};
+
+FORCEDEVAL(const)
+FORCEDEVAL()
+#undef FORCEDEVAL
+
+/// template deduction for \ref ExprConstructor struct
+template <typename OrigExpr, typename IndexExpr, typename FuncD, typename... Params>
+auto createDeviceExpression(FuncD &funcD, const utility::tuple::Tuple<Params...> &t)
+ -> decltype(ExprConstructor<OrigExpr, IndexExpr, Params...>(funcD, t)) {
+ return ExprConstructor<OrigExpr, IndexExpr, Params...>(funcD, t);
+}
+}
+}
+} // namespace Eigen
+
+#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXPR_CONSTRUCTOR_HPP
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h
new file mode 100644
index 000000000..ceec528ea
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h
@@ -0,0 +1,201 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensorSyclExtractAccessor.h
+ *
+ * \brief:
+ * ExtractAccessor takes Expression placeHolder expression and the tuple of sycl
+ * buffers as an input. Using pre-order tree traversal, ExtractAccessor
+ * recursively calls itself for its children in the expression tree. The
+ * leaf node in the PlaceHolder expression is nothing but a container preserving
+ * the order of the actual data in the tuple of sycl buffer. By invoking the
+ * extract accessor for the PlaceHolder<N>, an accessor is created for the Nth
+ * buffer in the tuple of buffers. This accessor is then added as an Nth
+ * element in the tuple of accessors. In this case we preserve the order of data
+ * in the expression tree.
+ *
+ * This is the specialisation of extract accessor method for different operation
+ * type in the PlaceHolder expression.
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_ACCESSOR_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_ACCESSOR_HPP
+
+namespace Eigen {
+namespace TensorSycl {
+namespace internal {
+/// \struct ExtractAccessor: Extract Accessor Class is used to extract the
+/// accessor from a buffer.
+/// Depending on the type of the leaf node we can get a read accessor or a
+/// read_write accessor
+template <typename Evaluator>
+struct ExtractAccessor;
+
+struct AccessorConstructor{
+ template<typename Arg> static inline auto getTuple(cl::sycl::handler& cgh, Arg eval)
+ -> decltype(ExtractAccessor<Arg>::getTuple(cgh, eval)) {
+ return ExtractAccessor<Arg>::getTuple(cgh, eval);
+ }
+
+ template<typename Arg1, typename Arg2> static inline auto getTuple(cl::sycl::handler& cgh, Arg1 eval1, Arg2 eval2)
+ -> decltype(utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1), ExtractAccessor<Arg2>::getTuple(cgh, eval2))) {
+ return utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1), ExtractAccessor<Arg2>::getTuple(cgh, eval2));
+ }
+ template<typename Arg1, typename Arg2, typename Arg3> static inline auto getTuple(cl::sycl::handler& cgh, Arg1 eval1 , Arg2 eval2 , Arg3 eval3)
+ -> decltype(utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1),utility::tuple::append(ExtractAccessor<Arg2>::getTuple(cgh, eval2), ExtractAccessor<Arg3>::getTuple(cgh, eval3)))) {
+ return utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1),utility::tuple::append(ExtractAccessor<Arg2>::getTuple(cgh, eval2), ExtractAccessor<Arg3>::getTuple(cgh, eval3)));
+ }
+ template< cl::sycl::access::mode AcM, typename Arg> static inline auto getAccessor(cl::sycl::handler& cgh, Arg eval)
+ -> decltype(utility::tuple::make_tuple( eval.device().template get_sycl_accessor<AcM, true,
+ typename Eigen::internal::remove_all<typename Arg::CoeffReturnType>::type>(eval.dimensions().TotalSize(), cgh,eval.data()))){
+ return utility::tuple::make_tuple(eval.device().template get_sycl_accessor<AcM, true, typename Eigen::internal::remove_all<typename Arg::CoeffReturnType>::type>(eval.dimensions().TotalSize(), cgh,eval.data()));
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorCwiseNullaryOp, const TensorCwiseUnaryOp and const TensorBroadcastingOp
+template <template<class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> > {
+ static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> eval)
+ -> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){
+ return AccessorConstructor::getTuple(cgh, eval.impl());
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// TensorCwiseNullaryOp, TensorCwiseUnaryOp and TensorBroadcastingOp
+template <template<class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<UnaryCategory<OP, RHSExpr>, Dev> >
+: ExtractAccessor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> > {};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorCwiseBinaryOp
+template <template<class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > {
+ static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> eval)
+ -> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){
+ return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl());
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// TensorCwiseBinaryOp
+template <template<class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> >
+: ExtractAccessor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> >{};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorCwiseTernaryOp
+template <template<class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > {
+ static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> eval)
+ -> decltype(AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl())){
+ return AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl());
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// TensorCwiseTernaryOp
+template <template<class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> >
+: ExtractAccessor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> >{};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorCwiseSelectOp. This is a special case where there is no OP
+template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > {
+ static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> eval)
+ -> decltype(AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl())){
+ return AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl());
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// TensorCwiseSelectOp. This is a special case where there is no OP
+template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> >
+: ExtractAccessor<TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> >{};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorAssignOp
+template <typename LHSExpr, typename RHSExpr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> > {
+ static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> eval)
+ -> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){
+ return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl());
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// TensorAssignOp
+template <typename LHSExpr, typename RHSExpr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<TensorAssignOp<LHSExpr, RHSExpr>, Dev> >
+: ExtractAccessor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> >{};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorMap
+#define TENSORMAPEXPR(CVQual, ACCType)\
+template <typename PlainObjectType, int Options_, typename Dev>\
+struct ExtractAccessor<TensorEvaluator<CVQual TensorMap<PlainObjectType, Options_>, Dev> > {\
+ static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator<CVQual TensorMap<PlainObjectType, Options_>, Dev> eval)\
+ -> decltype(AccessorConstructor::template getAccessor<ACCType>(cgh, eval)){\
+ return AccessorConstructor::template getAccessor<ACCType>(cgh, eval);\
+ }\
+};
+TENSORMAPEXPR(const, cl::sycl::access::mode::read)
+TENSORMAPEXPR(, cl::sycl::access::mode::read_write)
+#undef TENSORMAPEXPR
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorForcedEvalOp
+template <typename Expr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> > {
+ static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> eval)
+ -> decltype(AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval)){
+ return AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval);
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// TensorForcedEvalOp
+template <typename Expr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<TensorForcedEvalOp<Expr>, Dev> >
+: ExtractAccessor<TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> >{};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorEvalToOp
+template <typename Expr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<const TensorEvalToOp<Expr>, Dev> > {
+ static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator<const TensorEvalToOp<Expr>, Dev> eval)
+ -> decltype(utility::tuple::append(AccessorConstructor::template getAccessor<cl::sycl::access::mode::write>(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl()))){
+ return utility::tuple::append(AccessorConstructor::template getAccessor<cl::sycl::access::mode::write>(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl()));
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// TensorEvalToOp
+template <typename Expr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<TensorEvalToOp<Expr>, Dev> >
+: ExtractAccessor<TensorEvaluator<const TensorEvalToOp<Expr>, Dev> >{};
+
+/// template deduction for \ref ExtractAccessor
+template <typename Evaluator>
+auto createTupleOfAccessors(cl::sycl::handler& cgh, const Evaluator& expr)
+-> decltype(ExtractAccessor<Evaluator>::getTuple(cgh, expr)) {
+ return ExtractAccessor<Evaluator>::getTuple(cgh, expr);
+}
+}
+}
+}
+#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_ACCESSOR_HPP
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h
new file mode 100644
index 000000000..801b4f5d7
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h
@@ -0,0 +1,154 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensorSyclextractFunctors.h
+ *
+ * \brief:
+ * Used to extract all the functors allocated to each node of the expression
+*tree.
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_FUNCTORS_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_FUNCTORS_HPP
+
+namespace Eigen {
+namespace TensorSycl {
+namespace internal {
+/// \struct FunctorExtractor: This struct is used to extract the functors
+/// constructed on
+/// the host-side, to pack them and reuse them in reconstruction of the
+/// expression on the device.
+/// We have to do that as in Eigen the functors are not stateless so we cannot
+/// re-instantiate them on the device.
+/// We have to pass instantiated functors to the device.
+// This struct is used for leafNode (TensorMap) and nodes behaving like leafNode (TensorForcedEval).
+template <typename Evaluator> struct FunctorExtractor{
+ typedef typename Evaluator::Dimensions Dimensions;
+ const Dimensions m_dimensions;
+ const Dimensions& dimensions() const { return m_dimensions; }
+ FunctorExtractor(const Evaluator& expr)
+ : m_dimensions(expr.dimensions()) {}
+
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// const TensorCwiseNullaryOp, const TensorCwiseUnaryOp, and const TensorBroadcastingOp
+template <template <class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev>
+struct FunctorExtractor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> > {
+ FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;
+ OP func;
+ FunctorExtractor(const TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev>& expr)
+ : rhsExpr(expr.impl()), func(expr.functor()) {}
+};
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// TensorCwiseNullaryOp, TensorCwiseUnaryOp, and TensorBroadcastingOp
+template <template <class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev>
+struct FunctorExtractor<TensorEvaluator<UnaryCategory<OP, RHSExpr>, Dev> >
+: FunctorExtractor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> >{};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// const TensorCwiseBinaryOp
+template <template<class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev>
+struct FunctorExtractor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > {
+ FunctorExtractor<TensorEvaluator<LHSExpr, Dev> > lhsExpr;
+ FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;
+ OP func;
+ FunctorExtractor(const TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev>& expr)
+ : lhsExpr(expr.left_impl()),rhsExpr(expr.right_impl()),func(expr.functor()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// const TensorCwiseBinaryOp
+template <template <class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev>
+struct FunctorExtractor<TensorEvaluator<BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> >
+: FunctorExtractor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> >{};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// const TensorCwiseTernaryOp
+template <template <class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr,typename Dev>
+struct FunctorExtractor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > {
+ FunctorExtractor<TensorEvaluator<Arg1Expr, Dev> > arg1Expr;
+ FunctorExtractor<TensorEvaluator<Arg2Expr, Dev> > arg2Expr;
+ FunctorExtractor<TensorEvaluator<Arg3Expr, Dev> > arg3Expr;
+ OP func;
+ FunctorExtractor(const TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev>& expr)
+ : arg1Expr(expr.arg1Impl()), arg2Expr(expr.arg2Impl()), arg3Expr(expr.arg3Impl()), func(expr.functor()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// TensorCwiseTernaryOp
+template <template <class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev>
+struct FunctorExtractor<TensorEvaluator< TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> >
+:FunctorExtractor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> >{};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// const TensorCwiseSelectOp. This is an specialisation without OP so it has to be separated.
+template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev>
+struct FunctorExtractor< TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > {
+ FunctorExtractor<TensorEvaluator<IfExpr, Dev> > ifExpr;
+ FunctorExtractor<TensorEvaluator<ThenExpr, Dev> > thenExpr;
+ FunctorExtractor<TensorEvaluator<ElseExpr, Dev> > elseExpr;
+ FunctorExtractor(const TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev>& expr)
+ : ifExpr(expr.cond_impl()), thenExpr(expr.then_impl()), elseExpr(expr.else_impl()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// TensorCwiseSelectOp. This is an specialisation without OP so it has to be separated
+template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev>
+struct FunctorExtractor<TensorEvaluator<TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> >
+:FunctorExtractor< TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > {};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// const TensorAssignOp. This is an specialisation without OP so it has to be separated.
+template <typename LHSExpr, typename RHSExpr, typename Dev>
+struct FunctorExtractor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> > {
+ FunctorExtractor<TensorEvaluator<LHSExpr, Dev> > lhsExpr;
+ FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;
+ FunctorExtractor(const TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev>& expr)
+ : lhsExpr(expr.left_impl()), rhsExpr(expr.right_impl()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// TensorAssignOp. This is an specialisation without OP so it has to be separated.
+template <typename LHSExpr, typename RHSExpr, typename Dev>
+struct FunctorExtractor<TensorEvaluator<TensorAssignOp<LHSExpr, RHSExpr>, Dev> >
+:FunctorExtractor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> >{};
+
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// const TensorEvalToOp, This is an specialisation without OP so it has to be separated.
+template <typename RHSExpr, typename Dev>
+struct FunctorExtractor<TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev> > {
+ FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;
+ FunctorExtractor(const TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev>& expr)
+ : rhsExpr(expr.impl()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// TensorEvalToOp. This is a specialisation without OP so it has to be separated.
+template <typename RHSExpr, typename Dev>
+struct FunctorExtractor<TensorEvaluator<TensorEvalToOp<RHSExpr>, Dev> >
+: FunctorExtractor<TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev> > {};
+
+
+/// template deduction function for FunctorExtractor
+template <typename Evaluator>
+auto inline extractFunctors(const Evaluator& evaluator)-> FunctorExtractor<Evaluator> {
+ return FunctorExtractor<Evaluator>(evaluator);
+}
+} // namespace internal
+} // namespace TensorSycl
+} // namespace Eigen
+
+#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_FUNCTORS_HPP
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h
new file mode 100644
index 000000000..8d520d2da
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h
@@ -0,0 +1,111 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensorSyclLeafCount.h
+ *
+ * \brief:
+ * The leaf count used the pre-order expression tree traverse in order to name
+ * count the number of leaf nodes in the expression
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_LEAF_COUNT_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_LEAF_COUNT_HPP
+
+namespace Eigen {
+namespace TensorSycl {
+namespace internal {
+/// \brief LeafCount used to counting terminal nodes. The total number of
+/// leaf nodes is used by MakePlaceHolderExprHelper to find the order
+/// of the leaf node in a expression tree at compile time.
+template <typename Expr>
+struct LeafCount;
+
+template<typename... Args> struct CategoryCount;
+
+template<> struct CategoryCount<>
+{
+ static const size_t Count =0;
+};
+
+template<typename Arg, typename... Args>
+struct CategoryCount<Arg,Args...>{
+ static const size_t Count = LeafCount<Arg>::Count + CategoryCount<Args...>::Count;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is const
+/// TensorMap
+template <typename PlainObjectType, int Options_, template <class> class MakePointer_>
+struct LeafCount<const TensorMap<PlainObjectType, Options_, MakePointer_> > {
+ static const size_t Count =1;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is TensorMap
+template <typename PlainObjectType, int Options_, template <class> class MakePointer_>
+struct LeafCount<TensorMap<PlainObjectType, Options_, MakePointer_> > :LeafCount<const TensorMap<PlainObjectType, Options_, MakePointer_> >{};
+
+// const TensorCwiseUnaryOp, const TensorCwiseNullaryOp, const TensorCwiseBinaryOp, const TensorCwiseTernaryOp, and Const TensorBroadcastingOp
+template <template <class, class...> class CategoryExpr, typename OP, typename... RHSExpr>
+struct LeafCount<const CategoryExpr<OP, RHSExpr...> >: CategoryCount<RHSExpr...> {};
+// TensorCwiseUnaryOp, TensorCwiseNullaryOp, TensorCwiseBinaryOp, TensorCwiseTernaryOp, and TensorBroadcastingOp
+template <template <class, class...> class CategoryExpr, typename OP, typename... RHSExpr>
+struct LeafCount<CategoryExpr<OP, RHSExpr...> > :LeafCount<const CategoryExpr<OP, RHSExpr...> >{};
+
+/// specialisation of the \ref LeafCount struct when the node type is
+/// const TensorSelectOp is an exception
+template <typename IfExpr, typename ThenExpr, typename ElseExpr>
+struct LeafCount<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr> > : CategoryCount<IfExpr, ThenExpr, ElseExpr> {};
+/// specialisation of the \ref LeafCount struct when the node type is
+/// TensorSelectOp
+template <typename IfExpr, typename ThenExpr, typename ElseExpr>
+struct LeafCount<TensorSelectOp<IfExpr, ThenExpr, ElseExpr> >: LeafCount<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr> > {};
+
+
+/// specialisation of the \ref LeafCount struct when the node type is const
+/// TensorAssignOp
+template <typename LHSExpr, typename RHSExpr>
+struct LeafCount<const TensorAssignOp<LHSExpr, RHSExpr> >: CategoryCount<LHSExpr,RHSExpr> {};
+
+/// specialisation of the \ref LeafCount struct when the node type is
+/// TensorAssignOp is an exception. It is not the same as Unary
+template <typename LHSExpr, typename RHSExpr>
+struct LeafCount<TensorAssignOp<LHSExpr, RHSExpr> > :LeafCount<const TensorAssignOp<LHSExpr, RHSExpr> >{};
+
+/// specialisation of the \ref LeafCount struct when the node type is const
+/// TensorForcedEvalOp
+template <typename Expr>
+struct LeafCount<const TensorForcedEvalOp<Expr> > {
+ static const size_t Count =1;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is
+/// TensorForcedEvalOp
+template <typename Expr>
+struct LeafCount<TensorForcedEvalOp<Expr> >: LeafCount<const TensorForcedEvalOp<Expr> > {};
+
+/// specialisation of the \ref LeafCount struct when the node type is const
+/// TensorEvalToOp
+template <typename Expr>
+struct LeafCount<const TensorEvalToOp<Expr> > {
+ static const size_t Count = 1 + CategoryCount<Expr>::Count;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is
+/// TensorEvalToOp
+template <typename Expr>
+struct LeafCount<TensorEvalToOp<Expr> >: LeafCount<const TensorEvalToOp<Expr> >{};
+}
+}
+} // namespace Eigen
+
+#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_LEAF_COUNT_HPP
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h
new file mode 100644
index 000000000..43a63c73d
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h
@@ -0,0 +1,99 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensorSyclPlaceHolder.h
+ *
+ * \brief:
+ * The PlaceHolder expression are nothing but a container preserving
+ * the order of actual data in the tuple of sycl buffer.
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_HPP
+
+namespace Eigen {
+namespace internal {
+/// \struct PlaceHolder
+/// \brief PlaceHolder is used to replace the \ref TensorMap in the expression
+/// tree.
+/// PlaceHolder contains the order of the leaf node in the expression tree.
+template <typename Scalar, size_t N>
+struct PlaceHolder {
+ static constexpr size_t I = N;
+ typedef Scalar Type;
+};
+
+/// \brief specialisation of the PlaceHolder node for const TensorMap
+#define TENSORMAPPLACEHOLDER(CVQual)\
+template <typename PlainObjectType, int Options_, template <class> class MakePointer_, size_t N>\
+struct PlaceHolder<CVQual TensorMap<PlainObjectType, Options_, MakePointer_>, N> {\
+ static const size_t I = N;\
+ typedef CVQual TensorMap<PlainObjectType, Options_, MakePointer_> Type;\
+ typedef typename Type::Self Self;\
+ typedef typename Type::Base Base;\
+ typedef typename Type::Nested Nested;\
+ typedef typename Type::StorageKind StorageKind;\
+ typedef typename Type::Index Index;\
+ typedef typename Type::Scalar Scalar;\
+ typedef typename Type::RealScalar RealScalar;\
+ typedef typename Type::CoeffReturnType CoeffReturnType;\
+};
+
+TENSORMAPPLACEHOLDER(const)
+TENSORMAPPLACEHOLDER()
+#undef TENSORMAPPLACEHOLDER
+
+/// \brief specialisation of the PlaceHolder node for TensorForcedEvalOp. The
+/// TensorForcedEvalOp acts as a leaf node for its parent node.
+#define TENSORFORCEDEVALPLACEHOLDER(CVQual)\
+template <typename Expression, size_t N>\
+struct PlaceHolder<CVQual TensorForcedEvalOp<Expression>, N> {\
+ static const size_t I = N;\
+ typedef CVQual TensorForcedEvalOp<Expression> Type;\
+ typedef typename Type::Nested Nested;\
+ typedef typename Type::StorageKind StorageKind;\
+ typedef typename Type::Index Index;\
+ typedef typename Type::Scalar Scalar;\
+ typedef typename Type::Packet Packet;\
+ typedef typename Type::RealScalar RealScalar;\
+ typedef typename Type::CoeffReturnType CoeffReturnType;\
+ typedef typename Type::PacketReturnType PacketReturnType;\
+};
+
+TENSORFORCEDEVALPLACEHOLDER(const)
+TENSORFORCEDEVALPLACEHOLDER()
+#undef TENSORFORCEDEVALPLACEHOLDER
+
+template <typename PlainObjectType, int Options_, template <class> class Makepointer_, size_t N>
+struct traits<PlaceHolder<const TensorMap<PlainObjectType, Options_, Makepointer_>, N> >: public traits<PlainObjectType> {
+ typedef traits<PlainObjectType> BaseTraits;
+ typedef typename BaseTraits::Scalar Scalar;
+ typedef typename BaseTraits::StorageKind StorageKind;
+ typedef typename BaseTraits::Index Index;
+ static const int NumDimensions = BaseTraits::NumDimensions;
+ static const int Layout = BaseTraits::Layout;
+ enum {
+ Options = Options_,
+ Flags = BaseTraits::Flags,
+ };
+};
+
+template <typename PlainObjectType, int Options_, template <class> class Makepointer_, size_t N>
+struct traits<PlaceHolder<TensorMap<PlainObjectType, Options_, Makepointer_>, N> >
+: traits<PlaceHolder<const TensorMap<PlainObjectType, Options_, Makepointer_>, N> > {};
+
+} // end namespace internal
+} // end namespoace Eigen
+
+#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_HPP
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h
new file mode 100644
index 000000000..f456c35aa
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h
@@ -0,0 +1,158 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensorSyclPlaceHolderExpr.h
+ *
+ * \brief:
+ * This is the specialisation of the placeholder expression based on the
+ * operation type
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_EXPR_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_EXPR_HPP
+
+namespace Eigen {
+namespace TensorSycl {
+namespace internal {
+/// \sttruct PlaceHolderExpression
+/// \brief it is used to create the PlaceHolder expression. The PlaceHolder
+/// expression is a copy of expression type in which the TensorMap of the has
+/// been replaced with PlaceHolder.
+template <typename Expr, size_t N>
+struct PlaceHolderExpression;
+
+template<size_t N, typename... Args>
+struct CalculateIndex;
+
+template<size_t N, typename Arg>
+struct CalculateIndex<N, Arg>{
+ typedef typename PlaceHolderExpression<Arg, N>::Type ArgType;
+ typedef utility::tuple::Tuple<ArgType> ArgsTuple;
+};
+
+template<size_t N, typename Arg1, typename Arg2>
+struct CalculateIndex<N, Arg1, Arg2>{
+ static const size_t Arg2LeafCount = LeafCount<Arg2>::Count;
+ typedef typename PlaceHolderExpression<Arg1, N - Arg2LeafCount>::Type Arg1Type;
+ typedef typename PlaceHolderExpression<Arg2, N>::Type Arg2Type;
+ typedef utility::tuple::Tuple<Arg1Type, Arg2Type> ArgsTuple;
+};
+
+template<size_t N, typename Arg1, typename Arg2, typename Arg3>
+struct CalculateIndex<N, Arg1, Arg2, Arg3> {
+ static const size_t Arg3LeafCount = LeafCount<Arg3>::Count;
+ static const size_t Arg2LeafCount = LeafCount<Arg2>::Count;
+ typedef typename PlaceHolderExpression<Arg1, N - Arg3LeafCount - Arg2LeafCount>::Type Arg1Type;
+ typedef typename PlaceHolderExpression<Arg2, N - Arg3LeafCount>::Type Arg2Type;
+ typedef typename PlaceHolderExpression<Arg3, N>::Type Arg3Type;
+ typedef utility::tuple::Tuple<Arg1Type, Arg2Type, Arg3Type> ArgsTuple;
+};
+
+template<template<class...> class Category , class OP, class TPL>
+struct CategoryHelper;
+
+template<template<class...> class Category , class OP, class ...T >
+struct CategoryHelper<Category, OP, utility::tuple::Tuple<T...> > {
+ typedef Category<OP, T... > Type;
+};
+
+template<template<class...> class Category , class ...T >
+struct CategoryHelper<Category, NoOP, utility::tuple::Tuple<T...> > {
+ typedef Category<T... > Type;
+};
+
+/// specialisation of the \ref PlaceHolderExpression when the node is
+/// TensorCwiseNullaryOp, TensorCwiseUnaryOp, TensorBroadcastingOp, TensorCwiseBinaryOp, TensorCwiseTernaryOp
+#define OPEXPRCATEGORY(CVQual)\
+template <template <class, class... > class Category, typename OP, typename... SubExpr, size_t N>\
+struct PlaceHolderExpression<CVQual Category<OP, SubExpr...>, N>{\
+ typedef CVQual typename CategoryHelper<Category, OP, typename CalculateIndex<N, SubExpr...>::ArgsTuple>::Type Type;\
+};
+
+OPEXPRCATEGORY(const)
+OPEXPRCATEGORY()
+#undef OPEXPRCATEGORY
+
+/// specialisation of the \ref PlaceHolderExpression when the node is
+/// TensorCwiseSelectOp
+#define SELECTEXPR(CVQual)\
+template <typename IfExpr, typename ThenExpr, typename ElseExpr, size_t N>\
+struct PlaceHolderExpression<CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, N> {\
+ typedef CVQual typename CategoryHelper<TensorSelectOp, NoOP, typename CalculateIndex<N, IfExpr, ThenExpr, ElseExpr>::ArgsTuple>::Type Type;\
+};
+
+SELECTEXPR(const)
+SELECTEXPR()
+#undef SELECTEXPR
+
+/// specialisation of the \ref PlaceHolderExpression when the node is
+/// TensorAssignOp
+#define ASSIGNEXPR(CVQual)\
+template <typename LHSExpr, typename RHSExpr, size_t N>\
+struct PlaceHolderExpression<CVQual TensorAssignOp<LHSExpr, RHSExpr>, N> {\
+ typedef CVQual typename CategoryHelper<TensorAssignOp, NoOP, typename CalculateIndex<N, LHSExpr, RHSExpr>::ArgsTuple>::Type Type;\
+};
+
+ASSIGNEXPR(const)
+ASSIGNEXPR()
+#undef ASSIGNEXPR
+
+/// specialisation of the \ref PlaceHolderExpression when the node is
+/// TensorMap
+#define TENSORMAPEXPR(CVQual)\
+template <typename Scalar_, int Options_, int Options2_, int NumIndices_, typename IndexType_, template <class> class MakePointer_, size_t N>\
+struct PlaceHolderExpression< CVQual TensorMap< Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakePointer_>, N> {\
+ typedef CVQual Eigen::internal::PlaceHolder<CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakePointer_>, N> Type;\
+};
+
+TENSORMAPEXPR(const)
+TENSORMAPEXPR()
+#undef TENSORMAPEXPR
+
+/// specialisation of the \ref PlaceHolderExpression when the node is
+/// TensorForcedEvalOp
+#define FORCEDEVAL(CVQual)\
+template <typename Expr, size_t N>\
+struct PlaceHolderExpression<CVQual TensorForcedEvalOp<Expr>, N> {\
+ typedef CVQual Eigen::internal::PlaceHolder<CVQual TensorForcedEvalOp<Expr>, N> Type;\
+};
+
+FORCEDEVAL(const)
+FORCEDEVAL()
+#undef FORCEDEVAL
+
+/// specialisation of the \ref PlaceHolderExpression when the node is
+/// TensorEvalToOp
+#define EVALTO(CVQual)\
+template <typename Expr, size_t N>\
+struct PlaceHolderExpression<CVQual TensorEvalToOp<Expr>, N> {\
+ typedef CVQual TensorEvalToOp<typename CalculateIndex <N, Expr>::ArgType> Type;\
+};
+
+EVALTO(const)
+EVALTO()
+#undef EVALTO
+
+/// template deduction for \ref PlaceHolderExpression struct
+template <typename Expr>
+struct createPlaceHolderExpression {
+ static const size_t TotalLeaves = LeafCount<Expr>::Count;
+ typedef typename PlaceHolderExpression<Expr, TotalLeaves - 1>::Type Type;
+};
+
+}
+}
+} // namespace Eigen
+
+#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_EXPR_HPP
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h
new file mode 100644
index 000000000..57f2dda26
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h
@@ -0,0 +1,69 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Cummins Chris PhD student at The University of Edinburgh.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensorSyclRun.h
+ *
+ * \brief:
+ * Schedule_kernel invoke an specialised version of kernel struct. The
+ * specialisation is based on the data dimension in sycl buffer
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP
+
+namespace Eigen {
+namespace TensorSycl {
+/// The run function in tensor sycl convert the expression tree to a buffer
+/// based expression tree;
+/// creates the expression tree for the device with accessor to buffers;
+/// construct the kernel and submit it to the sycl queue.
+template <typename Expr, typename Dev>
+void run(Expr &expr, Dev &dev) {
+ Eigen::TensorEvaluator<Expr, Dev> evaluator(expr, dev);
+ const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
+ if (needs_assign) {
+ typedef typename internal::createPlaceHolderExpression<Expr>::Type PlaceHolderExpr;
+ auto functors = internal::extractFunctors(evaluator);
+
+ dev.m_queue.submit([&](cl::sycl::handler &cgh) {
+
+ // create a tuple of accessors from Evaluator
+ auto tuple_of_accessors = internal::createTupleOfAccessors<decltype(evaluator)>(cgh, evaluator);
+ const auto range = utility::tuple::get<0>(tuple_of_accessors).get_range()[0];
+
+ size_t outTileSize = range;
+ if (range > 64) outTileSize = 64;
+ size_t yMode = range % outTileSize;
+ int yRange = static_cast<int>(range);
+ if (yMode != 0) yRange += (outTileSize - yMode);
+
+ // run the kernel
+ cgh.parallel_for<PlaceHolderExpr>( cl::sycl::nd_range<1>(cl::sycl::range<1>(yRange), cl::sycl::range<1>(outTileSize)), [=](cl::sycl::nd_item<1> itemID) {
+ typedef typename internal::ConvertToDeviceExpression<Expr>::Type DevExpr;
+ auto device_expr =internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors);
+ auto device_evaluator = Eigen::TensorEvaluator<decltype(device_expr.expr), Eigen::DefaultDevice>(device_expr.expr, Eigen::DefaultDevice());
+ if (itemID.get_global_linear_id() < range) {
+ device_evaluator.evalScalar(static_cast<int>(itemID.get_global_linear_id()));
+ }
+ });
+ });
+ dev.m_queue.throw_asynchronous();
+ }
+ evaluator.cleanup();
+}
+} // namespace TensorSycl
+} // namespace Eigen
+
+#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h
new file mode 100644
index 000000000..063b027e8
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h
@@ -0,0 +1,234 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensroSyclTuple.h
+ *
+ * \brief:
+ * Minimal implementation of std::tuple that can be used inside a SYCL kernel.
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_TUPLE_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_TUPLE_HPP
+namespace utility {
+namespace tuple {
+/// \struct StaticIf
+/// \brief The StaticIf struct is used to statically choose the type based on the
+/// condition.
+template <bool, typename T = void> struct StaticIf;
+/// \brief specialisation of the \ref StaticIf when the condition is true
+template <typename T>
+struct StaticIf<true, T> {
+ typedef T type;
+};
+
+/// \struct Tuple
+/// \brief is a fixed-size collection of heterogeneous values
+/// \ztparam Ts... - the types of the elements that the tuple stores.
+/// Empty list is supported.
+template <class... Ts>
+struct Tuple {};
+
+/// \brief specialisation of the \ref Tuple class when the tuple has at least
+/// one element.
+/// \tparam T : the type of the first element in the tuple.
+/// \tparam Ts... the rest of the elements in the tuple. Ts... can be empty.
+template <class T, class... Ts>
+struct Tuple<T, Ts...> {
+ Tuple(T t, Ts... ts) : head(t), tail(ts...) {}
+ T head;
+ Tuple<Ts...> tail;
+};
+
+///\ struct ElemTypeHolder
+/// \brief ElemTypeHolder class is used to specify the types of the
+/// elements inside the tuple
+/// \tparam size_t the number of elements inside the tuple
+/// \tparam class the tuple class
+template <size_t, class>
+struct ElemTypeHolder;
+
+/// \brief specialisation of the \ref ElemTypeHolder class when the number of
+/// elements inside the tuple is 1
+template <class T, class... Ts>
+struct ElemTypeHolder<0, Tuple<T, Ts...> > {
+ typedef T type;
+};
+
+/// \brief specialisation of the \ref ElemTypeHolder class when the number of
+/// elements inside the tuple is bigger than 1. It recursively calls itself to
+/// detect the type of each element in the tuple
+/// \tparam T : the type of the first element in the tuple.
+/// \tparam Ts... the rest of the elements in the tuple. Ts... can be empty.
+/// \tparam K is the Kth element in the tuple
+template <size_t k, class T, class... Ts>
+struct ElemTypeHolder<k, Tuple<T, Ts...> > {
+ typedef typename ElemTypeHolder<k - 1, Tuple<Ts...> >::type type;
+};
+
+/// get
+/// \brief Extracts the first element from the tuple.
+/// K=0 represents the first element of the tuple. The tuple cannot be empty.
+/// \tparam Ts... are the type of the elements in the tuple.
+/// \param t is the tuple whose contents to extract
+/// \return typename ElemTypeHolder<0, Tuple<Ts...> >::type &>::type
+
+#define TERMINATE_CONDS_TUPLE_GET(CVQual) \
+template <size_t k, class... Ts> \
+typename StaticIf<k == 0, CVQual typename ElemTypeHolder<0, Tuple<Ts...> >::type &>::type \
+get(CVQual Tuple<Ts...> &t) { \
+ static_assert(sizeof...(Ts)!=0, "The requseted value is bigger than the size of the tuple"); \
+ return t.head; \
+}
+
+TERMINATE_CONDS_TUPLE_GET(const)
+TERMINATE_CONDS_TUPLE_GET()
+#undef TERMINATE_CONDS_TUPLE_GET
+/// get
+/// \brief Extracts the Kth element from the tuple.
+///\tparam K is an integer value in [0,sizeof...(Types)).
+/// \tparam T is the (sizeof...(Types) -(K+1)) element in the tuple
+/// \tparam Ts... are the type of the elements in the tuple.
+/// \param t is the tuple whose contents to extract
+/// \return typename ElemTypeHolder<K, Tuple<Ts...> >::type &>::type
+#define RECURSIVE_TUPLE_GET(CVQual) \
+template <size_t k, class T, class... Ts> \
+typename StaticIf<k != 0, CVQual typename ElemTypeHolder<k, Tuple<T, Ts...> >::type &>::type \
+get(CVQual Tuple<T, Ts...> &t) { \
+ return utility::tuple::get<k - 1>(t.tail); \
+}
+RECURSIVE_TUPLE_GET(const)
+RECURSIVE_TUPLE_GET()
+#undef RECURSIVE_TUPLE_GET
+
+/// make_tuple
+/// \brief Creates a tuple object, deducing the target type from the types of
+/// arguments.
+/// \tparam Args the type of the arguments to construct the tuple from
+/// \param args zero or more arguments to construct the tuple from
+/// \return Tuple<Args...>
+template <typename... Args>
+Tuple<Args...> make_tuple(Args... args) {
+ return Tuple<Args...>(args...);
+}
+
+/// size
+/// \brief Provides access to the number of elements in a tuple as a
+/// compile-time constant expression.
+/// \tparam Args the type of the arguments to construct the tuple from
+/// \return size_t
+template <typename... Args>
+static constexpr size_t size(Tuple<Args...> &) {
+ return sizeof...(Args);
+}
+
+/// \struct IndexList
+/// \brief Creates a list of index from the elements in the tuple
+/// \tparam Is... a list of index from [0 to sizeof...(tuple elements))
+template <size_t... Is>
+struct IndexList {};
+
+/// \struct RangeBuilder
+/// \brief Collects internal details for generating index ranges [MIN, MAX)
+/// Declare primary template for index range builder
+/// \tparam MIN is the starting index in the tuple
+/// \tparam N represents sizeof..(elemens)- sizeof...(Is)
+/// \tparam Is... are the list of generated index so far
+template <size_t MIN, size_t N, size_t... Is>
+struct RangeBuilder;
+
+/// \brief base Step: Specialisation of the \ref RangeBuilder when the
+/// MIN==MAX. In this case the Is... is [0 to sizeof...(tuple elements))
+/// \tparam MIN is the starting index of the tuple
+/// \tparam Is is [0 to sizeof...(tuple elements))
+template <size_t MIN, size_t... Is>
+struct RangeBuilder<MIN, MIN, Is...> {
+ typedef IndexList<Is...> type;
+};
+
+/// Induction step: Specialisation of the RangeBuilder class when N!=MIN
+/// in this case we are recursively subtracting N by one and adding one
+/// index to Is... list until MIN==N
+/// \tparam MIN is the starting index in the tuple
+/// \tparam N represents sizeof..(elemens)- sizeof...(Is)
+/// \tparam Is... are the list of generated index so far
+template <size_t MIN, size_t N, size_t... Is>
+struct RangeBuilder : public RangeBuilder<MIN, N - 1, N - 1, Is...> {};
+
+/// \brief IndexRange that returns a [MIN, MAX) index range
+/// \tparam MIN is the starting index in the tuple
+/// \tparam MAX is the size of the tuple
+template <size_t MIN, size_t MAX>
+struct IndexRange: RangeBuilder<MIN, MAX>::type {};
+
+/// append_base
+/// \brief unpacking the elements of the input tuple t and creating a new tuple
+/// by adding element a at the end of it.
+///\tparam Args... the type of the elements inside the tuple t
+/// \tparam T the type of the new element going to be added at the end of tuple
+/// \tparam I... is the list of index from [0 to sizeof...(t))
+/// \param t the tuple on which we want to append a.
+/// \param a the new elements going to be added to the tuple
+/// \return Tuple<Args..., T>
+template <typename... Args, typename T, size_t... I>
+Tuple<Args..., T> append_base(Tuple<Args...> t, T a,IndexList<I...>) {
+ return utility::tuple::make_tuple(get<I>(t)..., a);
+}
+
+/// append
+/// \brief the deduction function for \ref append_base that automatically
+/// generate the \ref IndexRange
+///\tparam Args... the type of the elements inside the tuple t
+/// \tparam T the type of the new element going to be added at the end of tuple
+/// \param t the tuple on which we want to append a.
+/// \param a the new elements going to be added to the tuple
+/// \return Tuple<Args..., T>
+template <typename... Args, typename T>
+Tuple<Args..., T> append(Tuple<Args...> t, T a) {
+ return utility::tuple::append_base(t, a, IndexRange<0, sizeof...(Args)>());
+}
+
+/// append_base
+/// \brief This is a specialisation of \ref append_base when we want to
+/// concatenate
+/// tuple t2 at the end of the tuple t1. Here we unpack both tuples, generate the
+/// IndexRange for each of them and create an output tuple T that contains both
+/// elements of t1 and t2.
+///\tparam Args1... the type of the elements inside the tuple t1
+///\tparam Args2... the type of the elements inside the tuple t2
+/// \tparam I1... is the list of index from [0 to sizeof...(t1))
+/// \tparam I2... is the list of index from [0 to sizeof...(t2))
+/// \param t1 is the tuple on which we want to append t2.
+/// \param t2 is the tuple that is going to be added on t1.
+/// \return Tuple<Args1..., Args2...>
+template <typename... Args1, typename... Args2, size_t... I1, size_t... I2>
+Tuple<Args1..., Args2...> append_base(Tuple<Args1...> t1, Tuple<Args2...> t2, IndexList<I1...>, IndexList<I2...>) {
+ return utility::tuple::make_tuple(get<I1>(t1)...,get<I2>(t2)...);
+}
+
+/// append
+/// \brief deduction function for \ref append_base when we are appending tuple
+/// t1 by tuple t2. In this case the \ref IndexRange for both tuple are
+/// automatically generated.
+///\tparam Args1... the type of the elements inside the tuple t1
+///\tparam Args2... the type of the elements inside the tuple t2
+/// \param t1 is the tuple on which we want to append t2.
+/// \param t2 is the tuple that is going to be added on t1.
+/// \return Tuple<Args1..., Args2...>
+template <typename... Args1, typename... Args2>
+Tuple<Args1..., Args2...> append(Tuple<Args1...> t1,Tuple<Args2...> t2) {
+ return utility::tuple::append_base(t1, t2, IndexRange<0, sizeof...(Args1)>(), IndexRange<0, sizeof...(Args2)>());
+}
+} // tuple
+} // utility
+#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_TUPLE_HPP
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
index b7597b3a5..ffcf8b00f 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
@@ -28,7 +28,7 @@ class compute_tensor_flags
#else
0
#endif
- ||
+ |
#if EIGEN_MAX_ALIGN_BYTES>0
is_dynamic_size_storage
#else
@@ -56,6 +56,9 @@ struct traits<Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
Options = Options_,
Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0 : LvalueBit)
};
+ template <typename T> struct MakePointer {
+ typedef T* Type;
+ };
};
@@ -71,11 +74,14 @@ struct traits<TensorFixedSize<Scalar_, Dimensions, Options_, IndexType_> >
Options = Options_,
Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0: LvalueBit)
};
+ template <typename T> struct MakePointer {
+ typedef T* Type;
+ };
};
-template<typename PlainObjectType, int Options_>
-struct traits<TensorMap<PlainObjectType, Options_> >
+template<typename PlainObjectType, int Options_, template <class> class MakePointer_>
+struct traits<TensorMap<PlainObjectType, Options_, MakePointer_> >
: public traits<PlainObjectType>
{
typedef traits<PlainObjectType> BaseTraits;
@@ -88,6 +94,11 @@ struct traits<TensorMap<PlainObjectType, Options_> >
Options = Options_,
Flags = BaseTraits::Flags
};
+ template <class T> struct MakePointer {
+ // Intermediate typedef to workaround MSVC issue.
+ typedef MakePointer_<T> MakePointerT;
+ typedef typename MakePointerT::Type Type;
+ };
};
template<typename PlainObjectType>
@@ -131,16 +142,16 @@ struct eval<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, Eig
typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type;
};
-template<typename PlainObjectType, int Options>
-struct eval<TensorMap<PlainObjectType, Options>, Eigen::Dense>
+template<typename PlainObjectType, int Options, template <class> class MakePointer>
+struct eval<TensorMap<PlainObjectType, Options, MakePointer>, Eigen::Dense>
{
- typedef const TensorMap<PlainObjectType, Options>& type;
+ typedef const TensorMap<PlainObjectType, Options, MakePointer>& type;
};
-template<typename PlainObjectType, int Options>
-struct eval<const TensorMap<PlainObjectType, Options>, Eigen::Dense>
+template<typename PlainObjectType, int Options, template <class> class MakePointer>
+struct eval<const TensorMap<PlainObjectType, Options, MakePointer>, Eigen::Dense>
{
- typedef const TensorMap<PlainObjectType, Options>& type;
+ typedef const TensorMap<PlainObjectType, Options, MakePointer>& type;
};
template<typename PlainObjectType>
@@ -186,16 +197,16 @@ struct nested<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_> >
};
-template <typename PlainObjectType, int Options>
-struct nested<TensorMap<PlainObjectType, Options> >
+template <typename PlainObjectType, int Options, template <class> class MakePointer>
+struct nested<TensorMap<PlainObjectType, Options, MakePointer> >
{
- typedef const TensorMap<PlainObjectType, Options>& type;
+ typedef const TensorMap<PlainObjectType, Options, MakePointer>& type;
};
-template <typename PlainObjectType, int Options>
-struct nested<const TensorMap<PlainObjectType, Options> >
+template <typename PlainObjectType, int Options, template <class> class MakePointer>
+struct nested<const TensorMap<PlainObjectType, Options, MakePointer> >
{
- typedef const TensorMap<PlainObjectType, Options>& type;
+ typedef const TensorMap<PlainObjectType, Options, MakePointer>& type;
};
template <typename PlainObjectType>
diff --git a/unsupported/Eigen/SpecialFunctions b/unsupported/Eigen/SpecialFunctions
index 7c7493c56..a2ad4925e 100644
--- a/unsupported/Eigen/SpecialFunctions
+++ b/unsupported/Eigen/SpecialFunctions
@@ -10,6 +10,8 @@
#ifndef EIGEN_SPECIALFUNCTIONS_MODULE
#define EIGEN_SPECIALFUNCTIONS_MODULE
+#include <math.h>
+
#include "../../Eigen/Core"
#include "../../Eigen/src/Core/util/DisableStupidWarnings.h"
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
index 52619fc0c..f524d7137 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
@@ -120,13 +120,27 @@ struct lgamma_retval {
template <>
struct lgamma_impl<float> {
EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE float run(float x) { return ::lgammaf(x); }
+ static EIGEN_STRONG_INLINE float run(float x) {
+#if !defined(__CUDA_ARCH__) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
+ int signgam;
+ return ::lgammaf_r(x, &signgam);
+#else
+ return ::lgammaf(x);
+#endif
+ }
};
template <>
struct lgamma_impl<double> {
EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE double run(double x) { return ::lgamma(x); }
+ static EIGEN_STRONG_INLINE double run(double x) {
+#if !defined(__CUDA_ARCH__) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
+ int signgam;
+ return ::lgamma_r(x, &signgam);
+#else
+ return ::lgamma(x);
+#endif
+ }
};
#endif
@@ -794,7 +808,7 @@ template <>
struct zeta_impl_series<float> {
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE bool run(float& a, float& b, float& s, const float x, const float machep) {
- int i = 0;
+ int i = 0;
while(i < 9)
{
i += 1;
@@ -804,7 +818,7 @@ struct zeta_impl_series<float> {
if( numext::abs(b/s) < machep )
return true;
}
-
+
//Return whether we are done
return false;
}
@@ -814,7 +828,7 @@ template <>
struct zeta_impl_series<double> {
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE bool run(double& a, double& b, double& s, const double x, const double machep) {
- int i = 0;
+ int i = 0;
while( (i < 9) || (a <= 9.0) )
{
i += 1;
@@ -824,12 +838,12 @@ struct zeta_impl_series<double> {
if( numext::abs(b/s) < machep )
return true;
}
-
+
//Return whether we are done
return false;
}
};
-
+
template <typename Scalar>
struct zeta_impl {
EIGEN_DEVICE_FUNC
@@ -894,10 +908,10 @@ struct zeta_impl {
* Series, and Products, p. 1073; Academic Press, 1980.
*
*/
-
+
int i;
Scalar p, r, a, b, k, s, t, w;
-
+
const Scalar A[] = {
Scalar(12.0),
Scalar(-720.0),
@@ -912,20 +926,20 @@ struct zeta_impl {
Scalar(1.8152105401943546773e17), /*1.5511210043330985984e23/854513*/
Scalar(-7.1661652561756670113e18) /*1.6938241367317436694528e27/236364091*/
};
-
+
const Scalar maxnum = NumTraits<Scalar>::infinity();
const Scalar zero = 0.0, half = 0.5, one = 1.0;
const Scalar machep = cephes_helper<Scalar>::machep();
const Scalar nan = NumTraits<Scalar>::quiet_NaN();
-
+
if( x == one )
return maxnum;
-
+
if( x < one )
{
return nan;
}
-
+
if( q <= zero )
{
if(q == numext::floor(q))
@@ -937,7 +951,7 @@ struct zeta_impl {
if (p != r)
return nan;
}
-
+
/* Permit negative q but continue sum until n+q > +9 .
* This case should be handled by a reflection formula.
* If q<0 and x is an integer, there is a relation to
@@ -950,7 +964,7 @@ struct zeta_impl {
if (zeta_impl_series<Scalar>::run(a, b, s, x, machep)) {
return s;
}
-
+
w = a;
s += b*w/(x-one);
s -= half * b;
@@ -983,9 +997,9 @@ template <typename Scalar>
struct polygamma_retval {
typedef Scalar type;
};
-
+
#if !EIGEN_HAS_C99_MATH
-
+
template <typename Scalar>
struct polygamma_impl {
EIGEN_DEVICE_FUNC
@@ -995,9 +1009,9 @@ struct polygamma_impl {
return Scalar(0);
}
};
-
+
#else
-
+
template <typename Scalar>
struct polygamma_impl {
EIGEN_DEVICE_FUNC
@@ -1005,7 +1019,7 @@ struct polygamma_impl {
Scalar zero = 0.0, one = 1.0;
Scalar nplus = n + one;
const Scalar nan = NumTraits<Scalar>::quiet_NaN();
-
+
// Check that n is an integer
if (numext::floor(n) != n) {
return nan;
@@ -1021,7 +1035,7 @@ struct polygamma_impl {
}
}
};
-
+
#endif // EIGEN_HAS_C99_MATH
/************************************************************************************************