diff options
Diffstat (limited to 'unsupported/Eigen')
26 files changed, 1839 insertions, 116 deletions
diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index 73aae3da8..388976d2e 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -69,6 +69,10 @@ typedef unsigned __int64 uint64_t; #endif #endif +#ifdef EIGEN_USE_SYCL +#include <SYCL/sycl.hpp> +#endif + #include "src/Tensor/TensorMacros.h" #include "src/Tensor/TensorForwardDeclarations.h" #include "src/Tensor/TensorMeta.h" @@ -77,6 +81,8 @@ typedef unsigned __int64 uint64_t; #include "src/Tensor/TensorDeviceDefault.h" #include "src/Tensor/TensorDeviceThreadPool.h" #include "src/Tensor/TensorDeviceCuda.h" +#include "src/Tensor/TensorSycl.h" +#include "src/Tensor/TensorDeviceSycl.h" #include "src/Tensor/TensorIndexList.h" #include "src/Tensor/TensorDimensionList.h" #include "src/Tensor/TensorDimensions.h" diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h index cb615c75b..166be200c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h @@ -163,6 +163,11 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device> TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize); } + /// required by sycl in order to extract the accessor + const TensorEvaluator<LeftArgType, Device>& left_impl() const { return m_leftImpl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator<RightArgType, Device>& right_impl() const { return m_rightImpl; } + EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_leftImpl.data(); } private: diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index 5d67f69f3..4cfe300eb 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -113,7 +113,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device> }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device) + : m_broadcast(op.broadcast()),m_impl(op.expression(), device) { // The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar // and store the result in a scalar. Instead one should reshape the scalar into a a N-D @@ -374,7 +374,12 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device> EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } + + Broadcast functor() const { return m_broadcast; } + protected: + const Broadcast m_broadcast; Dimensions m_dimensions; array<Index, NumDims> m_outputStrides; array<Index, NumDims> m_inputStrides; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h new file mode 100644 index 000000000..bfd36f5aa --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h @@ -0,0 +1,122 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Cummins Chris PhD student at The University of Edinburgh. +// Contact: <eigen@codeplay.com> + +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#if defined(EIGEN_USE_SYCL) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H) +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H + +namespace Eigen { +/// \struct BufferT is used to specialise add_sycl_buffer function for +// two types of buffer we have. When the MapAllocator is true, we create the +// sycl buffer with MapAllocator. +/// We have to const_cast the input pointer in order to work around the fact +/// that sycl does not accept map allocator for const pointer. +template <typename T, bool MapAllocator> +struct BufferT { + using Type = cl::sycl::buffer<T, 1, cl::sycl::map_allocator<T>>; + static inline void add_sycl_buffer( + const T *ptr, size_t num_bytes, + std::map<const void *, std::shared_ptr<void>> &buffer_map) { + buffer_map.insert(std::pair<const void *, std::shared_ptr<void>>( + ptr, std::shared_ptr<void>(std::make_shared<Type>( + Type(const_cast<T *>(ptr), cl::sycl::range<1>(num_bytes)))))); + } +}; + +/// specialisation of the \ref BufferT when the MapAllocator is false. In this +/// case we only create the device-only buffer. +template <typename T> +struct BufferT<T, false> { + using Type = cl::sycl::buffer<T, 1>; + static inline void add_sycl_buffer( + const T *ptr, size_t num_bytes, + std::map<const void *, std::shared_ptr<void>> &buffer_map) { + buffer_map.insert(std::pair<const void *, std::shared_ptr<void>>( + ptr, std::shared_ptr<void>( + std::make_shared<Type>(Type(cl::sycl::range<1>(num_bytes)))))); + } +}; + +struct SyclDevice { + /// class members + /// sycl queue + cl::sycl::queue &m_queue; + /// std::map is the container used to make sure that we create only one buffer + /// per pointer. The lifespan of the buffer + /// now depends on the lifespan of SyclDevice. If a non-read-only pointer is + /// needed to be accessed on the host we should manually deallocate it. + mutable std::map<const void *, std::shared_ptr<void>> buffer_map; + + SyclDevice(cl::sycl::queue &q) : m_queue(q) {} + // destructor + ~SyclDevice() { deallocate_all(); } + + template <typename T> + void deallocate(const T *p) const { + auto it = buffer_map.find(p); + if (it != buffer_map.end()) { + buffer_map.erase(it); + } + } + void deallocate_all() const { buffer_map.clear(); } + + /// creation of sycl accessor for a buffer. This function first tries to find + /// the buffer in the buffer_map. + /// If found it gets the accessor from it, if not, the function then adds an + /// entry by creating a sycl buffer + /// for that particular pointer. + template <cl::sycl::access::mode AcMd, bool MapAllocator, typename T> + inline cl::sycl::accessor<T, 1, AcMd, cl::sycl::access::target::global_buffer> + get_sycl_accessor(size_t num_bytes, cl::sycl::handler &cgh, + const T *ptr) const { + auto it = buffer_map.find(ptr); + if (it == buffer_map.end()) { + BufferT<T, MapAllocator>::add_sycl_buffer(ptr, num_bytes, buffer_map); + } + return ( + ((typename BufferT<T, MapAllocator>::Type *)(buffer_map.at(ptr).get())) + ->template get_access<AcMd>(cgh)); + } + + /// allocating memory on the cpu + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void *allocate(size_t num_bytes) const { + return internal::aligned_malloc(num_bytes); + } + + // some runtime conditions that can be applied here + bool isDeviceSuitable() const { return true; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void *buffer) const { + internal::aligned_free(buffer); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void *dst, const void *src, + size_t n) const { + ::memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice( + void *dst, const void *src, size_t n) const { + memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost( + void *dst, const void *src, size_t n) const { + memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void *buffer, int c, + size_t n) const { + ::memset(buffer, c, n); + } +}; +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h index a08dfa7c3..68d14a7e5 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h @@ -20,8 +20,8 @@ namespace Eigen { * */ namespace internal { -template<typename XprType> -struct traits<TensorEvalToOp<XprType> > +template<typename XprType, template <class> class MakePointer_> +struct traits<TensorEvalToOp<XprType, MakePointer_> > { // Type promotion to handle the case where the types of the lhs and the rhs are different. typedef typename XprType::Scalar Scalar; @@ -36,16 +36,22 @@ struct traits<TensorEvalToOp<XprType> > enum { Flags = 0 }; + template <class T> + struct MakePointer { + // Intermediate typedef to workaround MSVC issue. + typedef MakePointer_<T> MakePointerT; + typedef typename MakePointerT::Type Type; + }; }; -template<typename XprType> -struct eval<TensorEvalToOp<XprType>, Eigen::Dense> +template<typename XprType, template <class> class MakePointer_> +struct eval<TensorEvalToOp<XprType, MakePointer_>, Eigen::Dense> { typedef const TensorEvalToOp<XprType>& type; }; -template<typename XprType> -struct nested<TensorEvalToOp<XprType>, 1, typename eval<TensorEvalToOp<XprType> >::type> +template<typename XprType, template <class> class MakePointer_> +struct nested<TensorEvalToOp<XprType, MakePointer_>, 1, typename eval<TensorEvalToOp<XprType, MakePointer_> >::type> { typedef TensorEvalToOp<XprType> type; }; @@ -55,37 +61,38 @@ struct nested<TensorEvalToOp<XprType>, 1, typename eval<TensorEvalToOp<XprType> -template<typename XprType> -class TensorEvalToOp : public TensorBase<TensorEvalToOp<XprType>, ReadOnlyAccessors> +template<typename XprType, template <class> class MakePointer_> +class TensorEvalToOp : public TensorBase<TensorEvalToOp<XprType, MakePointer_>, ReadOnlyAccessors> { public: typedef typename Eigen::internal::traits<TensorEvalToOp>::Scalar Scalar; typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; + typedef typename MakePointer_<CoeffReturnType>::Type PointerType; typedef typename Eigen::internal::nested<TensorEvalToOp>::type Nested; typedef typename Eigen::internal::traits<TensorEvalToOp>::StorageKind StorageKind; typedef typename Eigen::internal::traits<TensorEvalToOp>::Index Index; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvalToOp(CoeffReturnType* buffer, const XprType& expr) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvalToOp(PointerType buffer, const XprType& expr) : m_xpr(expr), m_buffer(buffer) {} EIGEN_DEVICE_FUNC const typename internal::remove_all<typename XprType::Nested>::type& expression() const { return m_xpr; } - EIGEN_DEVICE_FUNC CoeffReturnType* buffer() const { return m_buffer; } + EIGEN_DEVICE_FUNC PointerType buffer() const { return m_buffer; } protected: typename XprType::Nested m_xpr; - CoeffReturnType* m_buffer; + PointerType m_buffer; }; -template<typename ArgType, typename Device> -struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device> +template<typename ArgType, typename Device, template <class> class MakePointer_> +struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device> { - typedef TensorEvalToOp<ArgType> XprType; + typedef TensorEvalToOp<ArgType, MakePointer_> XprType; typedef typename ArgType::Scalar Scalar; typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; typedef typename XprType::Index Index; @@ -102,15 +109,22 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device> }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_device(device), m_buffer(op.buffer()) + : m_impl(op.expression(), device), m_device(device), + m_buffer(op.buffer()), m_op(op), m_expression(op.expression()) { } + // Used for accessor extraction in SYCL Managed TensorMap: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const XprType& op() const { + return m_op; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~TensorEvaluator() { } + typedef typename internal::traits<const TensorEvalToOp<ArgType, MakePointer_> >::template MakePointer<CoeffReturnType>::Type DevicePointer; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* scalar) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(DevicePointer scalar) { EIGEN_UNUSED_VARIABLE(scalar); eigen_assert(scalar == NULL); return m_impl.evalSubExprsIfNeeded(m_buffer); @@ -145,12 +159,20 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device> TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize); } - EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_buffer; } + EIGEN_DEVICE_FUNC DevicePointer data() const { return m_buffer; } + ArgType expression() const { return m_expression; } + + /// required by sycl in order to extract the accessor + const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } + /// added for sycl in order to construct the buffer from the sycl device + const Device& device() const{return m_device;} private: TensorEvaluator<ArgType, Device> m_impl; const Device& m_device; - CoeffReturnType* m_buffer; + DevicePointer m_buffer; + const XprType& m_op; + const ArgType m_expression; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index 61c111cec..834ce07df 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -46,9 +46,11 @@ struct TensorEvaluator }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) - : m_data(const_cast<Scalar*>(m.data())), m_dims(m.dimensions()), m_device(device) + : m_data(const_cast<typename internal::traits<Derived>::template MakePointer<Scalar>::Type>(m.data())), m_dims(m.dimensions()), m_device(device), m_impl(m) { } + // Used for accessor extraction in SYCL Managed TensorMap: + const Derived& derived() const { return m_impl; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* dest) { @@ -106,12 +108,16 @@ struct TensorEvaluator internal::unpacket_traits<PacketReturnType>::size); } - EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; } + EIGEN_DEVICE_FUNC typename internal::traits<Derived>::template MakePointer<Scalar>::Type data() const { return m_data; } + + /// required by sycl in order to construct sycl buffer from raw pointer + const Device& device() const{return m_device;} protected: - Scalar* m_data; + typename internal::traits<Derived>::template MakePointer<Scalar>::Type m_data; Dimensions m_dims; const Device& m_device; + const Derived& m_impl; }; namespace { @@ -159,8 +165,11 @@ struct TensorEvaluator<const Derived, Device> RawAccess = true }; + // Used for accessor extraction in SYCL Managed TensorMap: + const Derived& derived() const { return m_impl; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) - : m_data(m.data()), m_dims(m.dimensions()), m_device(device) + : m_data(m.data()), m_dims(m.dimensions()), m_device(device), m_impl(m) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } @@ -198,12 +207,16 @@ struct TensorEvaluator<const Derived, Device> internal::unpacket_traits<PacketReturnType>::size); } - EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; } + EIGEN_DEVICE_FUNC typename internal::traits<Derived>::template MakePointer<const Scalar>::Type data() const { return m_data; } + + /// added for sycl in order to construct the buffer from the sycl device + const Device& device() const{return m_device;} protected: - const Scalar* m_data; + typename internal::traits<Derived>::template MakePointer<const Scalar>::Type m_data; Dimensions m_dims; const Device& m_device; + const Derived& m_impl; }; @@ -260,6 +273,12 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device> EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } + /// required by sycl in order to extract the accessor + const TensorEvaluator<ArgType, Device>& impl() const { return m_argImpl; } + /// required by sycl in order to extract the accessor + NullaryOp functor() const { return m_functor; } + + private: const NullaryOp m_functor; TensorEvaluator<ArgType, Device> m_argImpl; @@ -324,6 +343,12 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device> EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } + /// required by sycl in order to extract the accessor + const TensorEvaluator<ArgType, Device> & impl() const { return m_argImpl; } + /// added for sycl in order to construct the buffer from sycl device + UnaryOp functor() const { return m_functor; } + + private: const UnaryOp m_functor; TensorEvaluator<ArgType, Device> m_argImpl; @@ -397,6 +422,12 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg } EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } + /// required by sycl in order to extract the accessor + const TensorEvaluator<LeftArgType, Device>& left_impl() const { return m_leftImpl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator<RightArgType, Device>& right_impl() const { return m_rightImpl; } + /// required by sycl in order to extract the accessor + BinaryOp functor() const { return m_functor; } private: const BinaryOp m_functor; @@ -492,10 +523,17 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } + /// required by sycl in order to extract the accessor + const TensorEvaluator<Arg1Type, Device> & arg1Impl() const { return m_arg1Impl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator<Arg2Type, Device>& arg2Impl() const { return m_arg2Impl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator<Arg3Type, Device>& arg3Impl() const { return m_arg3Impl; } + private: const TernaryOp m_functor; TensorEvaluator<Arg1Type, Device> m_arg1Impl; - TensorEvaluator<Arg1Type, Device> m_arg2Impl; + TensorEvaluator<Arg2Type, Device> m_arg2Impl; TensorEvaluator<Arg3Type, Device> m_arg3Impl; }; @@ -576,6 +614,12 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType> } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const { return NULL; } + /// required by sycl in order to extract the accessor + const TensorEvaluator<IfArgType, Device> & cond_impl() const { return m_condImpl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator<ThenArgType, Device>& then_impl() const { return m_thenImpl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator<ElseArgType, Device>& else_impl() const { return m_elseImpl; } private: TensorEvaluator<IfArgType, Device> m_condImpl; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 0cac7b179..f01d77c0a 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -267,6 +267,20 @@ inline void TensorExecutor<Expression, GpuDevice, Vectorizable>::run( #endif // __CUDACC__ #endif // EIGEN_USE_GPU +// SYCL Executor policy +#ifdef EIGEN_USE_SYCL + +template <typename Expression, bool Vectorizable> +class TensorExecutor<Expression, SyclDevice, Vectorizable> { +public: + static inline void run(const Expression &expr, const SyclDevice &device) { + // call TensorSYCL module + TensorSycl::run(expr, device); + } +}; + +#endif + } // end namespace internal } // end namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h index 5f2e329f2..85dfc7a69 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h @@ -283,7 +283,7 @@ class TensorCwiseTernaryOp : public TensorBase<TensorCwiseTernaryOp<TernaryOp, A arg1Expression() const { return m_arg1_xpr; } EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename Arg1XprType::Nested>::type& + const typename internal::remove_all<typename Arg2XprType::Nested>::type& arg2Expression() const { return m_arg2_xpr; } EIGEN_DEVICE_FUNC @@ -292,7 +292,7 @@ class TensorCwiseTernaryOp : public TensorBase<TensorCwiseTernaryOp<TernaryOp, A protected: typename Arg1XprType::Nested m_arg1_xpr; - typename Arg1XprType::Nested m_arg2_xpr; + typename Arg2XprType::Nested m_arg2_xpr; typename Arg3XprType::Nested m_arg3_xpr; const TernaryOp m_functor; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index c23ecdbc4..bbd5eb374 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -19,9 +19,15 @@ namespace Eigen { * * */ +/// template <class> class MakePointer_ is added to convert the host pointer to the device pointer. +/// It is added due to the fact that for our device compiler T* is not allowed. +/// If we wanted to use the same Evaluator functions we have to convert that type to our pointer T. +/// This is done through our MakePointer_ class. By default the Type in the MakePointer_<T> is T* . +/// Therefore, by adding the default value, we managed to convert the type and it does not break any +/// existing code as its default value is T*. namespace internal { -template<typename XprType> -struct traits<TensorForcedEvalOp<XprType> > +template<typename XprType, template <class> class MakePointer_> +struct traits<TensorForcedEvalOp<XprType, MakePointer_> > { // Type promotion to handle the case where the types of the lhs and the rhs are different. typedef typename XprType::Scalar Scalar; @@ -36,26 +42,31 @@ struct traits<TensorForcedEvalOp<XprType> > enum { Flags = 0 }; + template <class T> struct MakePointer { + // Intermediate typedef to workaround MSVC issue. + typedef MakePointer_<T> MakePointerT; + typedef typename MakePointerT::Type Type; + }; }; -template<typename XprType> -struct eval<TensorForcedEvalOp<XprType>, Eigen::Dense> +template<typename XprType, template <class> class MakePointer_> +struct eval<TensorForcedEvalOp<XprType, MakePointer_>, Eigen::Dense> { - typedef const TensorForcedEvalOp<XprType>& type; + typedef const TensorForcedEvalOp<XprType, MakePointer_>& type; }; -template<typename XprType> -struct nested<TensorForcedEvalOp<XprType>, 1, typename eval<TensorForcedEvalOp<XprType> >::type> +template<typename XprType, template <class> class MakePointer_> +struct nested<TensorForcedEvalOp<XprType, MakePointer_>, 1, typename eval<TensorForcedEvalOp<XprType, MakePointer_> >::type> { - typedef TensorForcedEvalOp<XprType> type; + typedef TensorForcedEvalOp<XprType, MakePointer_> type; }; } // end namespace internal -template<typename XprType> -class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType>, ReadOnlyAccessors> +template<typename XprType, template <class> class MakePointer_> +class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType, MakePointer_>, ReadOnlyAccessors> { public: typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Scalar Scalar; @@ -77,10 +88,10 @@ class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType>, ReadOn }; -template<typename ArgType, typename Device> -struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device> +template<typename ArgType, typename Device, template <class> class MakePointer_> +struct TensorEvaluator<const TensorForcedEvalOp<ArgType, MakePointer_>, Device> { - typedef TensorForcedEvalOp<ArgType> XprType; + typedef TensorForcedEvalOp<ArgType, MakePointer_> XprType; typedef typename ArgType::Scalar Scalar; typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; typedef typename XprType::Index Index; @@ -96,6 +107,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device> }; EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) + /// op_ is used for sycl : m_impl(op.expression(), device), m_op(op.expression()), m_device(device), m_buffer(NULL) { } @@ -110,10 +122,10 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device> new(m_buffer+i) CoeffReturnType(); } } - typedef TensorEvalToOp<const ArgType> EvalTo; + typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo; EvalTo evalToTmp(m_buffer, m_op); const bool PacketAccess = internal::IsVectorizable<Device, const ArgType>::value; - internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device); + internal::TensorExecutor<const EvalTo, typename internal::remove_const<Device>::type, PacketAccess>::run(evalToTmp, m_device); return true; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { @@ -136,13 +148,17 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device> return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); } - EIGEN_DEVICE_FUNC Scalar* data() const { return m_buffer; } + EIGEN_DEVICE_FUNC typename MakePointer<Scalar>::Type data() const { return m_buffer; } + /// required by sycl in order to extract the sycl accessor + const TensorEvaluator<ArgType, Device>& impl() { return m_impl; } + /// used by sycl in order to build the sycl buffer + const Device& device() const{return m_device;} private: TensorEvaluator<ArgType, Device> m_impl; const ArgType m_op; const Device& m_device; - CoeffReturnType* m_buffer; + typename MakePointer<CoeffReturnType>::Type m_buffer; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index 490ddd8bd..6497b1830 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -12,9 +12,19 @@ namespace Eigen { +// MakePointer class is used as a container of the adress space of the pointer +// on the host and on the device. From the host side it generates the T* pointer +// and when EIGEN_USE_SYCL is used it construct a buffer with a map_allocator to +// T* m_data on the host. It is always called on the device. +// Specialisation of MakePointer class for creating the sycl buffer with +// map_allocator. +template<typename T> struct MakePointer { + typedef T* Type; +}; + +template<typename PlainObjectType, int Options_ = Unaligned, template <class> class MakePointer_ = MakePointer> class TensorMap; template<typename Scalar_, int NumIndices_, int Options_ = 0, typename IndexType = DenseIndex> class Tensor; template<typename Scalar_, typename Dimensions, int Options_ = 0, typename IndexType = DenseIndex> class TensorFixedSize; -template<typename PlainObjectType, int Options_ = Unaligned> class TensorMap; template<typename PlainObjectType> class TensorRef; template<typename Derived, int AccessLevel> class TensorBase; @@ -52,8 +62,8 @@ template<typename Op, typename XprType> class TensorScanOp; template<typename CustomUnaryFunc, typename XprType> class TensorCustomUnaryOp; template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> class TensorCustomBinaryOp; -template<typename XprType> class TensorEvalToOp; -template<typename XprType> class TensorForcedEvalOp; +template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorEvalToOp; +template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorForcedEvalOp; template<typename ExpressionType, typename DeviceType> class TensorDevice; template<typename Derived, typename Device> struct TensorEvaluator; @@ -61,6 +71,7 @@ template<typename Derived, typename Device> struct TensorEvaluator; struct DefaultDevice; struct ThreadPoolDevice; struct GpuDevice; +struct SyclDevice; enum FFTResultType { RealPart = 0, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h index 6fb4f4a31..a8e55757e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h @@ -18,11 +18,16 @@ namespace Eigen { * \brief A tensor expression mapping an existing array of data. * */ - -template<typename PlainObjectType, int Options_> class TensorMap : public TensorBase<TensorMap<PlainObjectType, Options_> > +/// template <class> class MakePointer_ is added to convert the host pointer to the device pointer. +/// It is added due to the fact that for our device compiler T* is not allowed. +/// If we wanted to use the same Evaluator functions we have to convert that type to our pointer T. +/// This is done through our MakePointer_ class. By default the Type in the MakePointer_<T> is T* . +/// Therefore, by adding the default value, we managed to convert the type and it does not break any +/// existing code as its default value is T*. +template<typename PlainObjectType, int Options_, template <class> class MakePointer_> class TensorMap : public TensorBase<TensorMap<PlainObjectType, Options_, MakePointer_> > { public: - typedef TensorMap<PlainObjectType, Options_> Self; + typedef TensorMap<PlainObjectType, Options_, MakePointer_> Self; typedef typename PlainObjectType::Base Base; typedef typename Eigen::internal::nested<Self>::type Nested; typedef typename internal::traits<PlainObjectType>::StorageKind StorageKind; @@ -36,7 +41,7 @@ template<typename PlainObjectType, int Options_> class TensorMap : public Tensor Scalar *, const Scalar *>::type PointerType;*/ - typedef Scalar* PointerType; + typedef typename MakePointer_<Scalar>::Type PointerType; typedef PointerType PointerArgType; static const int Options = Options_; @@ -109,9 +114,9 @@ template<typename PlainObjectType, int Options_> class TensorMap : public Tensor EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_dimensions.TotalSize(); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar* data() { return m_data; } + EIGEN_STRONG_INLINE PointerType data() { return m_data; } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar* data() const { return m_data; } + EIGEN_STRONG_INLINE const PointerType data() const { return m_data; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const @@ -307,7 +312,7 @@ template<typename PlainObjectType, int Options_> class TensorMap : public Tensor } private: - Scalar* m_data; + typename MakePointer_<Scalar>::Type m_data; Dimensions m_dimensions; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index fdb5ee6b8..615559d44 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -83,6 +83,27 @@ struct PacketType<half, GpuDevice> { }; #endif +#if defined(EIGEN_USE_SYCL) +template <typename T> + struct PacketType<T, SyclDevice> { + typedef T type; + static const int size = 1; + enum { + HasAdd = 0, + HasSub = 0, + HasMul = 0, + HasNegate = 0, + HasAbs = 0, + HasArg = 0, + HasAbs2 = 0, + HasMin = 0, + HasMax = 0, + HasConj = 0, + HasSetLinear = 0, + HasBlend = 0 + }; +}; +#endif // Tuple mimics std::pair but works on e.g. nvcc. diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index a87777b22..d34ff98b0 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -423,15 +423,15 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> // Precompute output strides. if (NumOutputDims > 0) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_outputStrides[0] = 1; - for (int i = 1; i < NumOutputDims; ++i) { - m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; - } + m_outputStrides[0] = 1; + for (int i = 1; i < NumOutputDims; ++i) { + m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; + } } else { - m_outputStrides.back() = 1; - for (int i = NumOutputDims - 2; i >= 0; --i) { - m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; - } + m_outputStrides.back() = 1; + for (int i = NumOutputDims - 2; i >= 0; --i) { + m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; + } } } @@ -439,27 +439,27 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> if (NumInputDims > 0) { array<Index, NumInputDims> input_strides; if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - input_strides[0] = 1; - for (int i = 1; i < NumInputDims; ++i) { - input_strides[i] = input_strides[i-1] * input_dims[i-1]; - } + input_strides[0] = 1; + for (int i = 1; i < NumInputDims; ++i) { + input_strides[i] = input_strides[i-1] * input_dims[i-1]; + } } else { - input_strides.back() = 1; - for (int i = NumInputDims - 2; i >= 0; --i) { - input_strides[i] = input_strides[i + 1] * input_dims[i + 1]; - } + input_strides.back() = 1; + for (int i = NumInputDims - 2; i >= 0; --i) { + input_strides[i] = input_strides[i + 1] * input_dims[i + 1]; + } } int outputIndex = 0; int reduceIndex = 0; for (int i = 0; i < NumInputDims; ++i) { - if (m_reduced[i]) { - m_reducedStrides[reduceIndex] = input_strides[i]; - ++reduceIndex; - } else { - m_preservedStrides[outputIndex] = input_strides[i]; - ++outputIndex; - } + if (m_reduced[i]) { + m_reducedStrides[reduceIndex] = input_strides[i]; + ++reduceIndex; + } else { + m_preservedStrides[outputIndex] = input_strides[i]; + ++outputIndex; + } } } @@ -578,7 +578,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> Op reducer(m_reducer); if (ReducingInnerMostDims || RunningFullReduction) { const Index num_values_to_reduce = - (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1]; + (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1]; return internal::InnerMostDimReducer<Self, Op>::reduce(*this, firstInput(index), num_values_to_reduce, reducer); } else { @@ -602,7 +602,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; if (ReducingInnerMostDims) { const Index num_values_to_reduce = - (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1]; + (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1]; const Index firstIndex = firstInput(index); for (Index i = 0; i < PacketSize; ++i) { Op reducer(m_reducer); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h new file mode 100644 index 000000000..da15f7942 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h @@ -0,0 +1,77 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: eigen@codeplay.com +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// General include header of SYCL target for Tensor Module +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_H +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_H + +#ifdef EIGEN_USE_SYCL + +// global pointer to set different attribute state for a class +template <class T> +struct MakeGlobalPointer { + typedef typename cl::sycl::global_ptr<T>::pointer_t Type; +}; + +namespace Eigen { +namespace TensorSycl { +namespace internal { + +/// This struct is used for special expression nodes with no operations (for example assign and selectOP). + struct NoOP; + +template<bool IsConst, typename T> struct GetType{ + typedef const T Type; +}; +template<typename T> struct GetType<false, T>{ + typedef T Type; +}; + +} +} +} + +// tuple construction +#include "TensorSyclTuple.h" + +// This file contains the PlaceHolder that replaces the actual data +#include "TensorSyclPlaceHolder.h" + +#include "TensorSyclLeafCount.h" + +// The index PlaceHolder takes the actual expression and replaces the actual +// data on it with the place holder. It uses the same pre-order expression tree +// traverse as the leaf count in order to give the right access number to each +// node in the expression +#include "TensorSyclPlaceHolderExpr.h" + +// creation of an accessor tuple from a tuple of SYCL buffers +#include "TensorSyclExtractAccessor.h" + +// actual data extraction using accessors +//#include "GetDeviceData.h" + +// this is used to change the address space type in tensor map for GPU +#include "TensorSyclConvertToDeviceExpression.h" + +// this is used to extract the functors +#include "TensorSyclExtractFunctors.h" + +// this is used to create tensormap on the device +// this is used to construct the expression on the device +#include "TensorSyclExprConstructor.h" + +// kernel execution using fusion +#include "TensorSyclRun.h" + +#endif // end of EIGEN_USE_SYCL +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h new file mode 100644 index 000000000..a94c30426 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h @@ -0,0 +1,109 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclConvertToDeviceExpression.h + * + * \brief: + * Conversion from host pointer to device pointer + * inside leaf nodes of the expression. + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_CONVERT_TO_DEVICE_EXPRESSION_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_CONVERT_TO_DEVICE_EXPRESSION_HPP + +namespace Eigen { +namespace TensorSycl { +namespace internal { + +/// \struct ConvertToDeviceExpression +/// \brief This struct is used to convert the MakePointer in the host expression +/// to the MakeGlobalPointer for the device expression. For the leafNodes +/// containing the pointer. This is due to the fact that the address space of +/// the pointer T* is different on the host and the device. +template <typename Expr> +struct ConvertToDeviceExpression; + +template<template<class...> class NonOpCategory, bool IsConst, typename... Args> +struct NonOpConversion{ + typedef typename GetType<IsConst, NonOpCategory<typename ConvertToDeviceExpression<Args>::Type...> >::Type Type; +}; + + +template<template<class, template <class> class > class NonOpCategory, bool IsConst, typename Args> +struct DeviceConvertor{ + typedef typename GetType<IsConst, NonOpCategory<typename ConvertToDeviceExpression<Args>::Type, MakeGlobalPointer> >::Type Type; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is TensorMap +#define TENSORMAPCONVERT(CVQual)\ +template <typename Scalar_, int Options_, int Options2_, int NumIndices_, typename IndexType_, template <class> class MakePointer_>\ +struct ConvertToDeviceExpression<CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakePointer_> > {\ + typedef CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakeGlobalPointer> Type;\ +}; + +TENSORMAPCONVERT(const) +TENSORMAPCONVERT() +#undef TENSORMAPCONVERT + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is TensorCwiseNullaryOp, TensorCwiseUnaryOp, TensorCwiseBinaryOp, TensorCwiseTernaryOp, TensorBroadcastingOp +#define CATEGORYCONVERT(CVQual)\ +template <template<class, class...> class Category, typename OP, typename... subExprs>\ +struct ConvertToDeviceExpression<CVQual Category<OP, subExprs...> > {\ + typedef CVQual Category<OP, typename ConvertToDeviceExpression<subExprs>::Type... > Type;\ +}; +CATEGORYCONVERT(const) +CATEGORYCONVERT() +#undef CATEGORYCONVERT + + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is TensorCwiseSelectOp +#define SELECTOPCONVERT(CVQual, Res)\ +template <typename IfExpr, typename ThenExpr, typename ElseExpr>\ +struct ConvertToDeviceExpression<CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr> >\ +: NonOpConversion<TensorSelectOp, Res, IfExpr, ThenExpr, ElseExpr> {}; +SELECTOPCONVERT(const, true) +SELECTOPCONVERT(, false) +#undef SELECTOPCONVERT + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is const AssingOP +#define ASSIGNCONVERT(CVQual, Res)\ +template <typename LHSExpr, typename RHSExpr>\ +struct ConvertToDeviceExpression<CVQual TensorAssignOp<LHSExpr, RHSExpr> >\ +: NonOpConversion<TensorAssignOp, Res, LHSExpr, RHSExpr>{}; + +ASSIGNCONVERT(const, true) +ASSIGNCONVERT(, false) +#undef ASSIGNCONVERT + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is either TensorForcedEvalOp or TensorEvalToOp +#define KERNELBROKERCONVERT(CVQual, Res, ExprNode)\ +template <typename Expr>\ +struct ConvertToDeviceExpression<CVQual ExprNode<Expr> > \ +: DeviceConvertor<ExprNode, Res, Expr>{}; + +KERNELBROKERCONVERT(const, true, TensorForcedEvalOp) +KERNELBROKERCONVERT(, false, TensorForcedEvalOp) +KERNELBROKERCONVERT(const, true, TensorEvalToOp) +KERNELBROKERCONVERT(, false, TensorEvalToOp) +#undef KERNELBROKERCONVERT +} // namespace internal +} // namespace TensorSycl +} // namespace Eigen + +#endif // UNSUPPORTED_EIGEN_CXX1 diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h new file mode 100644 index 000000000..833d5e271 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h @@ -0,0 +1,213 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclExprConstructor.h + * + * \brief: + * This file re-create an expression on the SYCL device in order + * to use the original tensor evaluator. + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXPR_CONSTRUCTOR_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXPR_CONSTRUCTOR_HPP + +namespace Eigen { +namespace TensorSycl { +namespace internal { +/// this class is used by EvalToOp in order to create an lhs expression which is +/// a pointer from an accessor on device-only buffer +template <typename PtrType, size_t N, typename... Params> +struct EvalToLHSConstructor { + PtrType expr; + EvalToLHSConstructor(const utility::tuple::Tuple<Params...> &t): expr((&(*(utility::tuple::get<N>(t).get_pointer())))) {} +}; + +/// \struct ExprConstructor is used to reconstruct the expression on the device +/// and +/// recreate the expression with MakeGlobalPointer containing the device address +/// space for the TensorMap pointers used in eval function. +/// It receives the original expression type, the functor of the node, the tuple +/// of accessors, and the device expression type to re-instantiate the +/// expression tree for the device +template <typename OrigExpr, typename IndexExpr, typename... Params> +struct ExprConstructor; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorMap +#define TENSORMAP(CVQual)\ +template <typename Scalar_, int Options_, int Options2_, int Options3_, int NumIndices_, typename IndexType_,\ +template <class> class MakePointer_, size_t N, typename... Params>\ +struct ExprConstructor< CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakeGlobalPointer>,\ +CVQual Eigen::internal::PlaceHolder<CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options3_, MakePointer_>, N>, Params...>{\ + typedef CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakeGlobalPointer> Type;\ + Type expr;\ + template <typename FuncDetector>\ + ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)\ + : expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))), fd.dimensions())) {}\ +}; + +TENSORMAP(const) +TENSORMAP() +#undef TENSORMAP + +#define UNARYCATEGORY(CVQual)\ +template <template<class, class> class UnaryCategory, typename OP, typename OrigRHSExpr, typename RHSExpr, typename... Params>\ +struct ExprConstructor<CVQual UnaryCategory<OP, OrigRHSExpr>, CVQual UnaryCategory<OP, RHSExpr>, Params...> {\ + typedef ExprConstructor<OrigRHSExpr, RHSExpr, Params...> my_type;\ + my_type rhsExpr;\ + typedef CVQual UnaryCategory<OP, typename my_type::Type> Type;\ + Type expr;\ + template <typename FuncDetector>\ + ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\ + : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {}\ +}; + +UNARYCATEGORY(const) +UNARYCATEGORY() +#undef UNARYCATEGORY + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorBinaryOp +#define BINARYCATEGORY(CVQual)\ +template <template<class, class, class> class BinaryCategory, typename OP, typename OrigLHSExpr, typename OrigRHSExpr, typename LHSExpr,\ +typename RHSExpr, typename... Params>\ +struct ExprConstructor<CVQual BinaryCategory<OP, OrigLHSExpr, OrigRHSExpr>, CVQual BinaryCategory<OP, LHSExpr, RHSExpr>, Params...> {\ + typedef ExprConstructor<OrigLHSExpr, LHSExpr, Params...> my_left_type;\ + typedef ExprConstructor<OrigRHSExpr, RHSExpr, Params...> my_right_type;\ + typedef CVQual BinaryCategory<OP, typename my_left_type::Type, typename my_right_type::Type> Type;\ + my_left_type lhsExpr;\ + my_right_type rhsExpr;\ + Type expr;\ + template <typename FuncDetector>\ + ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\ + : lhsExpr(funcD.lhsExpr, t),rhsExpr(funcD.rhsExpr, t), expr(lhsExpr.expr, rhsExpr.expr, funcD.func) {}\ +}; + +BINARYCATEGORY(const) +BINARYCATEGORY() +#undef BINARYCATEGORY + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorCwiseTernaryOp +#define TERNARYCATEGORY(CVQual)\ +template <template <class, class, class, class> class TernaryCategory, typename OP, typename OrigArg1Expr, typename OrigArg2Expr,typename OrigArg3Expr,\ +typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename... Params>\ +struct ExprConstructor<CVQual TernaryCategory<OP, OrigArg1Expr, OrigArg2Expr, OrigArg3Expr>, CVQual TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Params...> {\ + typedef ExprConstructor<OrigArg1Expr, Arg1Expr, Params...> my_arg1_type;\ + typedef ExprConstructor<OrigArg2Expr, Arg2Expr, Params...> my_arg2_type;\ + typedef ExprConstructor<OrigArg3Expr, Arg3Expr, Params...> my_arg3_type;\ + typedef CVQual TernaryCategory<OP, typename my_arg1_type::Type, typename my_arg2_type::Type, typename my_arg3_type::Type> Type;\ + my_arg1_type arg1Expr;\ + my_arg2_type arg2Expr;\ + my_arg3_type arg3Expr;\ + Type expr;\ + template <typename FuncDetector>\ + ExprConstructor(FuncDetector &funcD,const utility::tuple::Tuple<Params...> &t)\ + : arg1Expr(funcD.arg1Expr, t), arg2Expr(funcD.arg2Expr, t), arg3Expr(funcD.arg3Expr, t), expr(arg1Expr.expr, arg2Expr.expr, arg3Expr.expr, funcD.func) {}\ +}; + +TERNARYCATEGORY(const) +TERNARYCATEGORY() +#undef TERNARYCATEGORY + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorCwiseSelectOp +#define SELECTOP(CVQual)\ +template <typename OrigIfExpr, typename OrigThenExpr, typename OrigElseExpr, typename IfExpr, typename ThenExpr, typename ElseExpr, typename... Params>\ +struct ExprConstructor< CVQual TensorSelectOp<OrigIfExpr, OrigThenExpr, OrigElseExpr>, CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Params...> {\ + typedef ExprConstructor<OrigIfExpr, IfExpr, Params...> my_if_type;\ + typedef ExprConstructor<OrigThenExpr, ThenExpr, Params...> my_then_type;\ + typedef ExprConstructor<OrigElseExpr, ElseExpr, Params...> my_else_type;\ + typedef CVQual TensorSelectOp<typename my_if_type::Type, typename my_then_type::Type, typename my_else_type::Type> Type;\ + my_if_type ifExpr;\ + my_then_type thenExpr;\ + my_else_type elseExpr;\ + Type expr;\ + template <typename FuncDetector>\ + ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\ + : ifExpr(funcD.ifExpr, t), thenExpr(funcD.thenExpr, t), elseExpr(funcD.elseExpr, t), expr(ifExpr.expr, thenExpr.expr, elseExpr.expr) {}\ +}; + +SELECTOP(const) +SELECTOP() +#undef SELECTOP + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// const TensorAssignOp +#define ASSIGN(CVQual)\ +template <typename OrigLHSExpr, typename OrigRHSExpr, typename LHSExpr, typename RHSExpr, typename... Params>\ +struct ExprConstructor<CVQual TensorAssignOp<OrigLHSExpr, OrigRHSExpr>, CVQual TensorAssignOp<LHSExpr, RHSExpr>, Params...> {\ + typedef ExprConstructor<OrigLHSExpr, LHSExpr, Params...> my_left_type;\ + typedef ExprConstructor<OrigRHSExpr, RHSExpr, Params...> my_right_type;\ + typedef CVQual TensorAssignOp<typename my_left_type::Type, typename my_right_type::Type> Type;\ + my_left_type lhsExpr;\ + my_right_type rhsExpr;\ + Type expr;\ + template <typename FuncDetector>\ + ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\ + : lhsExpr(funcD.lhsExpr, t), rhsExpr(funcD.rhsExpr, t), expr(lhsExpr.expr, rhsExpr.expr) {}\ + }; + + ASSIGN(const) + ASSIGN() + #undef ASSIGN +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorEvalToOp +#define EVALTO(CVQual)\ +template <typename OrigExpr, typename Expr, typename... Params>\ +struct ExprConstructor<CVQual TensorEvalToOp<OrigExpr, MakeGlobalPointer>, CVQual TensorEvalToOp<Expr>, Params...> {\ + typedef ExprConstructor<OrigExpr, Expr, Params...> my_expr_type;\ + typedef typename TensorEvalToOp<OrigExpr, MakeGlobalPointer>::PointerType my_buffer_type;\ + typedef CVQual TensorEvalToOp<typename my_expr_type::Type, MakeGlobalPointer> Type;\ + my_expr_type nestedExpression;\ + EvalToLHSConstructor<my_buffer_type, 0, Params...> buffer;\ + Type expr;\ + template <typename FuncDetector>\ + ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\ + : nestedExpression(funcD.rhsExpr, t), buffer(t), expr(buffer.expr, nestedExpression.expr) {}\ +}; + +EVALTO(const) +EVALTO() +#undef EVALTO + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorForcedEvalOp +#define FORCEDEVAL(CVQual)\ +template <typename OrigExpr, typename DevExpr, size_t N, typename... Params>\ +struct ExprConstructor<CVQual TensorForcedEvalOp<OrigExpr, MakeGlobalPointer>,\ +CVQual Eigen::internal::PlaceHolder<CVQual TensorForcedEvalOp<DevExpr>, N>, Params...> {\ + typedef CVQual TensorMap<Tensor<typename TensorForcedEvalOp<DevExpr, MakeGlobalPointer>::Scalar,\ + TensorForcedEvalOp<DevExpr, MakeGlobalPointer>::NumDimensions, 0, typename TensorForcedEvalOp<DevExpr>::Index>, 0, MakeGlobalPointer> Type;\ + Type expr;\ + template <typename FuncDetector>\ + ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)\ + : expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))), fd.dimensions())) {}\ +}; + +FORCEDEVAL(const) +FORCEDEVAL() +#undef FORCEDEVAL + +/// template deduction for \ref ExprConstructor struct +template <typename OrigExpr, typename IndexExpr, typename FuncD, typename... Params> +auto createDeviceExpression(FuncD &funcD, const utility::tuple::Tuple<Params...> &t) + -> decltype(ExprConstructor<OrigExpr, IndexExpr, Params...>(funcD, t)) { + return ExprConstructor<OrigExpr, IndexExpr, Params...>(funcD, t); +} +} +} +} // namespace Eigen + +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXPR_CONSTRUCTOR_HPP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h new file mode 100644 index 000000000..ceec528ea --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h @@ -0,0 +1,201 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclExtractAccessor.h + * + * \brief: + * ExtractAccessor takes Expression placeHolder expression and the tuple of sycl + * buffers as an input. Using pre-order tree traversal, ExtractAccessor + * recursively calls itself for its children in the expression tree. The + * leaf node in the PlaceHolder expression is nothing but a container preserving + * the order of the actual data in the tuple of sycl buffer. By invoking the + * extract accessor for the PlaceHolder<N>, an accessor is created for the Nth + * buffer in the tuple of buffers. This accessor is then added as an Nth + * element in the tuple of accessors. In this case we preserve the order of data + * in the expression tree. + * + * This is the specialisation of extract accessor method for different operation + * type in the PlaceHolder expression. + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_ACCESSOR_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_ACCESSOR_HPP + +namespace Eigen { +namespace TensorSycl { +namespace internal { +/// \struct ExtractAccessor: Extract Accessor Class is used to extract the +/// accessor from a buffer. +/// Depending on the type of the leaf node we can get a read accessor or a +/// read_write accessor +template <typename Evaluator> +struct ExtractAccessor; + +struct AccessorConstructor{ + template<typename Arg> static inline auto getTuple(cl::sycl::handler& cgh, Arg eval) + -> decltype(ExtractAccessor<Arg>::getTuple(cgh, eval)) { + return ExtractAccessor<Arg>::getTuple(cgh, eval); + } + + template<typename Arg1, typename Arg2> static inline auto getTuple(cl::sycl::handler& cgh, Arg1 eval1, Arg2 eval2) + -> decltype(utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1), ExtractAccessor<Arg2>::getTuple(cgh, eval2))) { + return utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1), ExtractAccessor<Arg2>::getTuple(cgh, eval2)); + } + template<typename Arg1, typename Arg2, typename Arg3> static inline auto getTuple(cl::sycl::handler& cgh, Arg1 eval1 , Arg2 eval2 , Arg3 eval3) + -> decltype(utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1),utility::tuple::append(ExtractAccessor<Arg2>::getTuple(cgh, eval2), ExtractAccessor<Arg3>::getTuple(cgh, eval3)))) { + return utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1),utility::tuple::append(ExtractAccessor<Arg2>::getTuple(cgh, eval2), ExtractAccessor<Arg3>::getTuple(cgh, eval3))); + } + template< cl::sycl::access::mode AcM, typename Arg> static inline auto getAccessor(cl::sycl::handler& cgh, Arg eval) + -> decltype(utility::tuple::make_tuple( eval.device().template get_sycl_accessor<AcM, true, + typename Eigen::internal::remove_all<typename Arg::CoeffReturnType>::type>(eval.dimensions().TotalSize(), cgh,eval.data()))){ + return utility::tuple::make_tuple(eval.device().template get_sycl_accessor<AcM, true, typename Eigen::internal::remove_all<typename Arg::CoeffReturnType>::type>(eval.dimensions().TotalSize(), cgh,eval.data())); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorCwiseNullaryOp, const TensorCwiseUnaryOp and const TensorBroadcastingOp +template <template<class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev> +struct ExtractAccessor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> > { + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> eval) + -> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){ + return AccessorConstructor::getTuple(cgh, eval.impl()); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorCwiseNullaryOp, TensorCwiseUnaryOp and TensorBroadcastingOp +template <template<class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev> +struct ExtractAccessor<TensorEvaluator<UnaryCategory<OP, RHSExpr>, Dev> > +: ExtractAccessor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> > {}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorCwiseBinaryOp +template <template<class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev> +struct ExtractAccessor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > { + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> eval) + -> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){ + return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl()); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorCwiseBinaryOp +template <template<class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev> +struct ExtractAccessor<TensorEvaluator<BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > +: ExtractAccessor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> >{}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorCwiseTernaryOp +template <template<class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev> +struct ExtractAccessor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > { + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> eval) + -> decltype(AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl())){ + return AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl()); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorCwiseTernaryOp +template <template<class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev> +struct ExtractAccessor<TensorEvaluator<TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > +: ExtractAccessor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> >{}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorCwiseSelectOp. This is a special case where there is no OP +template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev> +struct ExtractAccessor<TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > { + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> eval) + -> decltype(AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl())){ + return AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl()); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorCwiseSelectOp. This is a special case where there is no OP +template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev> +struct ExtractAccessor<TensorEvaluator<TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > +: ExtractAccessor<TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> >{}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorAssignOp +template <typename LHSExpr, typename RHSExpr, typename Dev> +struct ExtractAccessor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> > { + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> eval) + -> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){ + return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl()); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorAssignOp +template <typename LHSExpr, typename RHSExpr, typename Dev> +struct ExtractAccessor<TensorEvaluator<TensorAssignOp<LHSExpr, RHSExpr>, Dev> > +: ExtractAccessor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> >{}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorMap +#define TENSORMAPEXPR(CVQual, ACCType)\ +template <typename PlainObjectType, int Options_, typename Dev>\ +struct ExtractAccessor<TensorEvaluator<CVQual TensorMap<PlainObjectType, Options_>, Dev> > {\ + static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator<CVQual TensorMap<PlainObjectType, Options_>, Dev> eval)\ + -> decltype(AccessorConstructor::template getAccessor<ACCType>(cgh, eval)){\ + return AccessorConstructor::template getAccessor<ACCType>(cgh, eval);\ + }\ +}; +TENSORMAPEXPR(const, cl::sycl::access::mode::read) +TENSORMAPEXPR(, cl::sycl::access::mode::read_write) +#undef TENSORMAPEXPR + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorForcedEvalOp +template <typename Expr, typename Dev> +struct ExtractAccessor<TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> > { + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> eval) + -> decltype(AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval)){ + return AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorForcedEvalOp +template <typename Expr, typename Dev> +struct ExtractAccessor<TensorEvaluator<TensorForcedEvalOp<Expr>, Dev> > +: ExtractAccessor<TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> >{}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorEvalToOp +template <typename Expr, typename Dev> +struct ExtractAccessor<TensorEvaluator<const TensorEvalToOp<Expr>, Dev> > { + static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator<const TensorEvalToOp<Expr>, Dev> eval) + -> decltype(utility::tuple::append(AccessorConstructor::template getAccessor<cl::sycl::access::mode::write>(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl()))){ + return utility::tuple::append(AccessorConstructor::template getAccessor<cl::sycl::access::mode::write>(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl())); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorEvalToOp +template <typename Expr, typename Dev> +struct ExtractAccessor<TensorEvaluator<TensorEvalToOp<Expr>, Dev> > +: ExtractAccessor<TensorEvaluator<const TensorEvalToOp<Expr>, Dev> >{}; + +/// template deduction for \ref ExtractAccessor +template <typename Evaluator> +auto createTupleOfAccessors(cl::sycl::handler& cgh, const Evaluator& expr) +-> decltype(ExtractAccessor<Evaluator>::getTuple(cgh, expr)) { + return ExtractAccessor<Evaluator>::getTuple(cgh, expr); +} +} +} +} +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_ACCESSOR_HPP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h new file mode 100644 index 000000000..801b4f5d7 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h @@ -0,0 +1,154 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclextractFunctors.h + * + * \brief: + * Used to extract all the functors allocated to each node of the expression +*tree. + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_FUNCTORS_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_FUNCTORS_HPP + +namespace Eigen { +namespace TensorSycl { +namespace internal { +/// \struct FunctorExtractor: This struct is used to extract the functors +/// constructed on +/// the host-side, to pack them and reuse them in reconstruction of the +/// expression on the device. +/// We have to do that as in Eigen the functors are not stateless so we cannot +/// re-instantiate them on the device. +/// We have to pass instantiated functors to the device. +// This struct is used for leafNode (TensorMap) and nodes behaving like leafNode (TensorForcedEval). +template <typename Evaluator> struct FunctorExtractor{ + typedef typename Evaluator::Dimensions Dimensions; + const Dimensions m_dimensions; + const Dimensions& dimensions() const { return m_dimensions; } + FunctorExtractor(const Evaluator& expr) + : m_dimensions(expr.dimensions()) {} + +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorCwiseNullaryOp, const TensorCwiseUnaryOp, and const TensorBroadcastingOp +template <template <class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev> +struct FunctorExtractor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> > { + FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr; + OP func; + FunctorExtractor(const TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev>& expr) + : rhsExpr(expr.impl()), func(expr.functor()) {} +}; +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorCwiseNullaryOp, TensorCwiseUnaryOp, and TensorBroadcastingOp +template <template <class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev> +struct FunctorExtractor<TensorEvaluator<UnaryCategory<OP, RHSExpr>, Dev> > +: FunctorExtractor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> >{}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorCwiseBinaryOp +template <template<class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev> +struct FunctorExtractor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > { + FunctorExtractor<TensorEvaluator<LHSExpr, Dev> > lhsExpr; + FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr; + OP func; + FunctorExtractor(const TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev>& expr) + : lhsExpr(expr.left_impl()),rhsExpr(expr.right_impl()),func(expr.functor()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorCwiseBinaryOp +template <template <class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev> +struct FunctorExtractor<TensorEvaluator<BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > +: FunctorExtractor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> >{}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorCwiseTernaryOp +template <template <class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr,typename Dev> +struct FunctorExtractor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > { + FunctorExtractor<TensorEvaluator<Arg1Expr, Dev> > arg1Expr; + FunctorExtractor<TensorEvaluator<Arg2Expr, Dev> > arg2Expr; + FunctorExtractor<TensorEvaluator<Arg3Expr, Dev> > arg3Expr; + OP func; + FunctorExtractor(const TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev>& expr) + : arg1Expr(expr.arg1Impl()), arg2Expr(expr.arg2Impl()), arg3Expr(expr.arg3Impl()), func(expr.functor()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorCwiseTernaryOp +template <template <class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev> +struct FunctorExtractor<TensorEvaluator< TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > +:FunctorExtractor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> >{}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorCwiseSelectOp. This is an specialisation without OP so it has to be separated. +template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev> +struct FunctorExtractor< TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > { + FunctorExtractor<TensorEvaluator<IfExpr, Dev> > ifExpr; + FunctorExtractor<TensorEvaluator<ThenExpr, Dev> > thenExpr; + FunctorExtractor<TensorEvaluator<ElseExpr, Dev> > elseExpr; + FunctorExtractor(const TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev>& expr) + : ifExpr(expr.cond_impl()), thenExpr(expr.then_impl()), elseExpr(expr.else_impl()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorCwiseSelectOp. This is an specialisation without OP so it has to be separated +template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev> +struct FunctorExtractor<TensorEvaluator<TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > +:FunctorExtractor< TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > {}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorAssignOp. This is an specialisation without OP so it has to be separated. +template <typename LHSExpr, typename RHSExpr, typename Dev> +struct FunctorExtractor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> > { + FunctorExtractor<TensorEvaluator<LHSExpr, Dev> > lhsExpr; + FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr; + FunctorExtractor(const TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev>& expr) + : lhsExpr(expr.left_impl()), rhsExpr(expr.right_impl()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorAssignOp. This is an specialisation without OP so it has to be separated. +template <typename LHSExpr, typename RHSExpr, typename Dev> +struct FunctorExtractor<TensorEvaluator<TensorAssignOp<LHSExpr, RHSExpr>, Dev> > +:FunctorExtractor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> >{}; + + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorEvalToOp, This is an specialisation without OP so it has to be separated. +template <typename RHSExpr, typename Dev> +struct FunctorExtractor<TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev> > { + FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr; + FunctorExtractor(const TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev>& expr) + : rhsExpr(expr.impl()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorEvalToOp. This is a specialisation without OP so it has to be separated. +template <typename RHSExpr, typename Dev> +struct FunctorExtractor<TensorEvaluator<TensorEvalToOp<RHSExpr>, Dev> > +: FunctorExtractor<TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev> > {}; + + +/// template deduction function for FunctorExtractor +template <typename Evaluator> +auto inline extractFunctors(const Evaluator& evaluator)-> FunctorExtractor<Evaluator> { + return FunctorExtractor<Evaluator>(evaluator); +} +} // namespace internal +} // namespace TensorSycl +} // namespace Eigen + +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_FUNCTORS_HPP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h new file mode 100644 index 000000000..8d520d2da --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h @@ -0,0 +1,111 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclLeafCount.h + * + * \brief: + * The leaf count used the pre-order expression tree traverse in order to name + * count the number of leaf nodes in the expression + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_LEAF_COUNT_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_LEAF_COUNT_HPP + +namespace Eigen { +namespace TensorSycl { +namespace internal { +/// \brief LeafCount used to counting terminal nodes. The total number of +/// leaf nodes is used by MakePlaceHolderExprHelper to find the order +/// of the leaf node in a expression tree at compile time. +template <typename Expr> +struct LeafCount; + +template<typename... Args> struct CategoryCount; + +template<> struct CategoryCount<> +{ + static const size_t Count =0; +}; + +template<typename Arg, typename... Args> +struct CategoryCount<Arg,Args...>{ + static const size_t Count = LeafCount<Arg>::Count + CategoryCount<Args...>::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is const +/// TensorMap +template <typename PlainObjectType, int Options_, template <class> class MakePointer_> +struct LeafCount<const TensorMap<PlainObjectType, Options_, MakePointer_> > { + static const size_t Count =1; +}; + +/// specialisation of the \ref LeafCount struct when the node type is TensorMap +template <typename PlainObjectType, int Options_, template <class> class MakePointer_> +struct LeafCount<TensorMap<PlainObjectType, Options_, MakePointer_> > :LeafCount<const TensorMap<PlainObjectType, Options_, MakePointer_> >{}; + +// const TensorCwiseUnaryOp, const TensorCwiseNullaryOp, const TensorCwiseBinaryOp, const TensorCwiseTernaryOp, and Const TensorBroadcastingOp +template <template <class, class...> class CategoryExpr, typename OP, typename... RHSExpr> +struct LeafCount<const CategoryExpr<OP, RHSExpr...> >: CategoryCount<RHSExpr...> {}; +// TensorCwiseUnaryOp, TensorCwiseNullaryOp, TensorCwiseBinaryOp, TensorCwiseTernaryOp, and TensorBroadcastingOp +template <template <class, class...> class CategoryExpr, typename OP, typename... RHSExpr> +struct LeafCount<CategoryExpr<OP, RHSExpr...> > :LeafCount<const CategoryExpr<OP, RHSExpr...> >{}; + +/// specialisation of the \ref LeafCount struct when the node type is +/// const TensorSelectOp is an exception +template <typename IfExpr, typename ThenExpr, typename ElseExpr> +struct LeafCount<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr> > : CategoryCount<IfExpr, ThenExpr, ElseExpr> {}; +/// specialisation of the \ref LeafCount struct when the node type is +/// TensorSelectOp +template <typename IfExpr, typename ThenExpr, typename ElseExpr> +struct LeafCount<TensorSelectOp<IfExpr, ThenExpr, ElseExpr> >: LeafCount<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr> > {}; + + +/// specialisation of the \ref LeafCount struct when the node type is const +/// TensorAssignOp +template <typename LHSExpr, typename RHSExpr> +struct LeafCount<const TensorAssignOp<LHSExpr, RHSExpr> >: CategoryCount<LHSExpr,RHSExpr> {}; + +/// specialisation of the \ref LeafCount struct when the node type is +/// TensorAssignOp is an exception. It is not the same as Unary +template <typename LHSExpr, typename RHSExpr> +struct LeafCount<TensorAssignOp<LHSExpr, RHSExpr> > :LeafCount<const TensorAssignOp<LHSExpr, RHSExpr> >{}; + +/// specialisation of the \ref LeafCount struct when the node type is const +/// TensorForcedEvalOp +template <typename Expr> +struct LeafCount<const TensorForcedEvalOp<Expr> > { + static const size_t Count =1; +}; + +/// specialisation of the \ref LeafCount struct when the node type is +/// TensorForcedEvalOp +template <typename Expr> +struct LeafCount<TensorForcedEvalOp<Expr> >: LeafCount<const TensorForcedEvalOp<Expr> > {}; + +/// specialisation of the \ref LeafCount struct when the node type is const +/// TensorEvalToOp +template <typename Expr> +struct LeafCount<const TensorEvalToOp<Expr> > { + static const size_t Count = 1 + CategoryCount<Expr>::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is +/// TensorEvalToOp +template <typename Expr> +struct LeafCount<TensorEvalToOp<Expr> >: LeafCount<const TensorEvalToOp<Expr> >{}; +} +} +} // namespace Eigen + +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_LEAF_COUNT_HPP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h new file mode 100644 index 000000000..43a63c73d --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h @@ -0,0 +1,99 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclPlaceHolder.h + * + * \brief: + * The PlaceHolder expression are nothing but a container preserving + * the order of actual data in the tuple of sycl buffer. + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_HPP + +namespace Eigen { +namespace internal { +/// \struct PlaceHolder +/// \brief PlaceHolder is used to replace the \ref TensorMap in the expression +/// tree. +/// PlaceHolder contains the order of the leaf node in the expression tree. +template <typename Scalar, size_t N> +struct PlaceHolder { + static constexpr size_t I = N; + typedef Scalar Type; +}; + +/// \brief specialisation of the PlaceHolder node for const TensorMap +#define TENSORMAPPLACEHOLDER(CVQual)\ +template <typename PlainObjectType, int Options_, template <class> class MakePointer_, size_t N>\ +struct PlaceHolder<CVQual TensorMap<PlainObjectType, Options_, MakePointer_>, N> {\ + static const size_t I = N;\ + typedef CVQual TensorMap<PlainObjectType, Options_, MakePointer_> Type;\ + typedef typename Type::Self Self;\ + typedef typename Type::Base Base;\ + typedef typename Type::Nested Nested;\ + typedef typename Type::StorageKind StorageKind;\ + typedef typename Type::Index Index;\ + typedef typename Type::Scalar Scalar;\ + typedef typename Type::RealScalar RealScalar;\ + typedef typename Type::CoeffReturnType CoeffReturnType;\ +}; + +TENSORMAPPLACEHOLDER(const) +TENSORMAPPLACEHOLDER() +#undef TENSORMAPPLACEHOLDER + +/// \brief specialisation of the PlaceHolder node for TensorForcedEvalOp. The +/// TensorForcedEvalOp acts as a leaf node for its parent node. +#define TENSORFORCEDEVALPLACEHOLDER(CVQual)\ +template <typename Expression, size_t N>\ +struct PlaceHolder<CVQual TensorForcedEvalOp<Expression>, N> {\ + static const size_t I = N;\ + typedef CVQual TensorForcedEvalOp<Expression> Type;\ + typedef typename Type::Nested Nested;\ + typedef typename Type::StorageKind StorageKind;\ + typedef typename Type::Index Index;\ + typedef typename Type::Scalar Scalar;\ + typedef typename Type::Packet Packet;\ + typedef typename Type::RealScalar RealScalar;\ + typedef typename Type::CoeffReturnType CoeffReturnType;\ + typedef typename Type::PacketReturnType PacketReturnType;\ +}; + +TENSORFORCEDEVALPLACEHOLDER(const) +TENSORFORCEDEVALPLACEHOLDER() +#undef TENSORFORCEDEVALPLACEHOLDER + +template <typename PlainObjectType, int Options_, template <class> class Makepointer_, size_t N> +struct traits<PlaceHolder<const TensorMap<PlainObjectType, Options_, Makepointer_>, N> >: public traits<PlainObjectType> { + typedef traits<PlainObjectType> BaseTraits; + typedef typename BaseTraits::Scalar Scalar; + typedef typename BaseTraits::StorageKind StorageKind; + typedef typename BaseTraits::Index Index; + static const int NumDimensions = BaseTraits::NumDimensions; + static const int Layout = BaseTraits::Layout; + enum { + Options = Options_, + Flags = BaseTraits::Flags, + }; +}; + +template <typename PlainObjectType, int Options_, template <class> class Makepointer_, size_t N> +struct traits<PlaceHolder<TensorMap<PlainObjectType, Options_, Makepointer_>, N> > +: traits<PlaceHolder<const TensorMap<PlainObjectType, Options_, Makepointer_>, N> > {}; + +} // end namespace internal +} // end namespoace Eigen + +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_HPP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h new file mode 100644 index 000000000..f456c35aa --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h @@ -0,0 +1,158 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclPlaceHolderExpr.h + * + * \brief: + * This is the specialisation of the placeholder expression based on the + * operation type + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_EXPR_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_EXPR_HPP + +namespace Eigen { +namespace TensorSycl { +namespace internal { +/// \sttruct PlaceHolderExpression +/// \brief it is used to create the PlaceHolder expression. The PlaceHolder +/// expression is a copy of expression type in which the TensorMap of the has +/// been replaced with PlaceHolder. +template <typename Expr, size_t N> +struct PlaceHolderExpression; + +template<size_t N, typename... Args> +struct CalculateIndex; + +template<size_t N, typename Arg> +struct CalculateIndex<N, Arg>{ + typedef typename PlaceHolderExpression<Arg, N>::Type ArgType; + typedef utility::tuple::Tuple<ArgType> ArgsTuple; +}; + +template<size_t N, typename Arg1, typename Arg2> +struct CalculateIndex<N, Arg1, Arg2>{ + static const size_t Arg2LeafCount = LeafCount<Arg2>::Count; + typedef typename PlaceHolderExpression<Arg1, N - Arg2LeafCount>::Type Arg1Type; + typedef typename PlaceHolderExpression<Arg2, N>::Type Arg2Type; + typedef utility::tuple::Tuple<Arg1Type, Arg2Type> ArgsTuple; +}; + +template<size_t N, typename Arg1, typename Arg2, typename Arg3> +struct CalculateIndex<N, Arg1, Arg2, Arg3> { + static const size_t Arg3LeafCount = LeafCount<Arg3>::Count; + static const size_t Arg2LeafCount = LeafCount<Arg2>::Count; + typedef typename PlaceHolderExpression<Arg1, N - Arg3LeafCount - Arg2LeafCount>::Type Arg1Type; + typedef typename PlaceHolderExpression<Arg2, N - Arg3LeafCount>::Type Arg2Type; + typedef typename PlaceHolderExpression<Arg3, N>::Type Arg3Type; + typedef utility::tuple::Tuple<Arg1Type, Arg2Type, Arg3Type> ArgsTuple; +}; + +template<template<class...> class Category , class OP, class TPL> +struct CategoryHelper; + +template<template<class...> class Category , class OP, class ...T > +struct CategoryHelper<Category, OP, utility::tuple::Tuple<T...> > { + typedef Category<OP, T... > Type; +}; + +template<template<class...> class Category , class ...T > +struct CategoryHelper<Category, NoOP, utility::tuple::Tuple<T...> > { + typedef Category<T... > Type; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorCwiseNullaryOp, TensorCwiseUnaryOp, TensorBroadcastingOp, TensorCwiseBinaryOp, TensorCwiseTernaryOp +#define OPEXPRCATEGORY(CVQual)\ +template <template <class, class... > class Category, typename OP, typename... SubExpr, size_t N>\ +struct PlaceHolderExpression<CVQual Category<OP, SubExpr...>, N>{\ + typedef CVQual typename CategoryHelper<Category, OP, typename CalculateIndex<N, SubExpr...>::ArgsTuple>::Type Type;\ +}; + +OPEXPRCATEGORY(const) +OPEXPRCATEGORY() +#undef OPEXPRCATEGORY + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorCwiseSelectOp +#define SELECTEXPR(CVQual)\ +template <typename IfExpr, typename ThenExpr, typename ElseExpr, size_t N>\ +struct PlaceHolderExpression<CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, N> {\ + typedef CVQual typename CategoryHelper<TensorSelectOp, NoOP, typename CalculateIndex<N, IfExpr, ThenExpr, ElseExpr>::ArgsTuple>::Type Type;\ +}; + +SELECTEXPR(const) +SELECTEXPR() +#undef SELECTEXPR + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorAssignOp +#define ASSIGNEXPR(CVQual)\ +template <typename LHSExpr, typename RHSExpr, size_t N>\ +struct PlaceHolderExpression<CVQual TensorAssignOp<LHSExpr, RHSExpr>, N> {\ + typedef CVQual typename CategoryHelper<TensorAssignOp, NoOP, typename CalculateIndex<N, LHSExpr, RHSExpr>::ArgsTuple>::Type Type;\ +}; + +ASSIGNEXPR(const) +ASSIGNEXPR() +#undef ASSIGNEXPR + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorMap +#define TENSORMAPEXPR(CVQual)\ +template <typename Scalar_, int Options_, int Options2_, int NumIndices_, typename IndexType_, template <class> class MakePointer_, size_t N>\ +struct PlaceHolderExpression< CVQual TensorMap< Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakePointer_>, N> {\ + typedef CVQual Eigen::internal::PlaceHolder<CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakePointer_>, N> Type;\ +}; + +TENSORMAPEXPR(const) +TENSORMAPEXPR() +#undef TENSORMAPEXPR + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorForcedEvalOp +#define FORCEDEVAL(CVQual)\ +template <typename Expr, size_t N>\ +struct PlaceHolderExpression<CVQual TensorForcedEvalOp<Expr>, N> {\ + typedef CVQual Eigen::internal::PlaceHolder<CVQual TensorForcedEvalOp<Expr>, N> Type;\ +}; + +FORCEDEVAL(const) +FORCEDEVAL() +#undef FORCEDEVAL + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorEvalToOp +#define EVALTO(CVQual)\ +template <typename Expr, size_t N>\ +struct PlaceHolderExpression<CVQual TensorEvalToOp<Expr>, N> {\ + typedef CVQual TensorEvalToOp<typename CalculateIndex <N, Expr>::ArgType> Type;\ +}; + +EVALTO(const) +EVALTO() +#undef EVALTO + +/// template deduction for \ref PlaceHolderExpression struct +template <typename Expr> +struct createPlaceHolderExpression { + static const size_t TotalLeaves = LeafCount<Expr>::Count; + typedef typename PlaceHolderExpression<Expr, TotalLeaves - 1>::Type Type; +}; + +} +} +} // namespace Eigen + +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_EXPR_HPP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h new file mode 100644 index 000000000..57f2dda26 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h @@ -0,0 +1,69 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Cummins Chris PhD student at The University of Edinburgh. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclRun.h + * + * \brief: + * Schedule_kernel invoke an specialised version of kernel struct. The + * specialisation is based on the data dimension in sycl buffer + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP + +namespace Eigen { +namespace TensorSycl { +/// The run function in tensor sycl convert the expression tree to a buffer +/// based expression tree; +/// creates the expression tree for the device with accessor to buffers; +/// construct the kernel and submit it to the sycl queue. +template <typename Expr, typename Dev> +void run(Expr &expr, Dev &dev) { + Eigen::TensorEvaluator<Expr, Dev> evaluator(expr, dev); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); + if (needs_assign) { + typedef typename internal::createPlaceHolderExpression<Expr>::Type PlaceHolderExpr; + auto functors = internal::extractFunctors(evaluator); + + dev.m_queue.submit([&](cl::sycl::handler &cgh) { + + // create a tuple of accessors from Evaluator + auto tuple_of_accessors = internal::createTupleOfAccessors<decltype(evaluator)>(cgh, evaluator); + const auto range = utility::tuple::get<0>(tuple_of_accessors).get_range()[0]; + + size_t outTileSize = range; + if (range > 64) outTileSize = 64; + size_t yMode = range % outTileSize; + int yRange = static_cast<int>(range); + if (yMode != 0) yRange += (outTileSize - yMode); + + // run the kernel + cgh.parallel_for<PlaceHolderExpr>( cl::sycl::nd_range<1>(cl::sycl::range<1>(yRange), cl::sycl::range<1>(outTileSize)), [=](cl::sycl::nd_item<1> itemID) { + typedef typename internal::ConvertToDeviceExpression<Expr>::Type DevExpr; + auto device_expr =internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors); + auto device_evaluator = Eigen::TensorEvaluator<decltype(device_expr.expr), Eigen::DefaultDevice>(device_expr.expr, Eigen::DefaultDevice()); + if (itemID.get_global_linear_id() < range) { + device_evaluator.evalScalar(static_cast<int>(itemID.get_global_linear_id())); + } + }); + }); + dev.m_queue.throw_asynchronous(); + } + evaluator.cleanup(); +} +} // namespace TensorSycl +} // namespace Eigen + +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h new file mode 100644 index 000000000..063b027e8 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h @@ -0,0 +1,234 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensroSyclTuple.h + * + * \brief: + * Minimal implementation of std::tuple that can be used inside a SYCL kernel. + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_TUPLE_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_TUPLE_HPP +namespace utility { +namespace tuple { +/// \struct StaticIf +/// \brief The StaticIf struct is used to statically choose the type based on the +/// condition. +template <bool, typename T = void> struct StaticIf; +/// \brief specialisation of the \ref StaticIf when the condition is true +template <typename T> +struct StaticIf<true, T> { + typedef T type; +}; + +/// \struct Tuple +/// \brief is a fixed-size collection of heterogeneous values +/// \ztparam Ts... - the types of the elements that the tuple stores. +/// Empty list is supported. +template <class... Ts> +struct Tuple {}; + +/// \brief specialisation of the \ref Tuple class when the tuple has at least +/// one element. +/// \tparam T : the type of the first element in the tuple. +/// \tparam Ts... the rest of the elements in the tuple. Ts... can be empty. +template <class T, class... Ts> +struct Tuple<T, Ts...> { + Tuple(T t, Ts... ts) : head(t), tail(ts...) {} + T head; + Tuple<Ts...> tail; +}; + +///\ struct ElemTypeHolder +/// \brief ElemTypeHolder class is used to specify the types of the +/// elements inside the tuple +/// \tparam size_t the number of elements inside the tuple +/// \tparam class the tuple class +template <size_t, class> +struct ElemTypeHolder; + +/// \brief specialisation of the \ref ElemTypeHolder class when the number of +/// elements inside the tuple is 1 +template <class T, class... Ts> +struct ElemTypeHolder<0, Tuple<T, Ts...> > { + typedef T type; +}; + +/// \brief specialisation of the \ref ElemTypeHolder class when the number of +/// elements inside the tuple is bigger than 1. It recursively calls itself to +/// detect the type of each element in the tuple +/// \tparam T : the type of the first element in the tuple. +/// \tparam Ts... the rest of the elements in the tuple. Ts... can be empty. +/// \tparam K is the Kth element in the tuple +template <size_t k, class T, class... Ts> +struct ElemTypeHolder<k, Tuple<T, Ts...> > { + typedef typename ElemTypeHolder<k - 1, Tuple<Ts...> >::type type; +}; + +/// get +/// \brief Extracts the first element from the tuple. +/// K=0 represents the first element of the tuple. The tuple cannot be empty. +/// \tparam Ts... are the type of the elements in the tuple. +/// \param t is the tuple whose contents to extract +/// \return typename ElemTypeHolder<0, Tuple<Ts...> >::type &>::type + +#define TERMINATE_CONDS_TUPLE_GET(CVQual) \ +template <size_t k, class... Ts> \ +typename StaticIf<k == 0, CVQual typename ElemTypeHolder<0, Tuple<Ts...> >::type &>::type \ +get(CVQual Tuple<Ts...> &t) { \ + static_assert(sizeof...(Ts)!=0, "The requseted value is bigger than the size of the tuple"); \ + return t.head; \ +} + +TERMINATE_CONDS_TUPLE_GET(const) +TERMINATE_CONDS_TUPLE_GET() +#undef TERMINATE_CONDS_TUPLE_GET +/// get +/// \brief Extracts the Kth element from the tuple. +///\tparam K is an integer value in [0,sizeof...(Types)). +/// \tparam T is the (sizeof...(Types) -(K+1)) element in the tuple +/// \tparam Ts... are the type of the elements in the tuple. +/// \param t is the tuple whose contents to extract +/// \return typename ElemTypeHolder<K, Tuple<Ts...> >::type &>::type +#define RECURSIVE_TUPLE_GET(CVQual) \ +template <size_t k, class T, class... Ts> \ +typename StaticIf<k != 0, CVQual typename ElemTypeHolder<k, Tuple<T, Ts...> >::type &>::type \ +get(CVQual Tuple<T, Ts...> &t) { \ + return utility::tuple::get<k - 1>(t.tail); \ +} +RECURSIVE_TUPLE_GET(const) +RECURSIVE_TUPLE_GET() +#undef RECURSIVE_TUPLE_GET + +/// make_tuple +/// \brief Creates a tuple object, deducing the target type from the types of +/// arguments. +/// \tparam Args the type of the arguments to construct the tuple from +/// \param args zero or more arguments to construct the tuple from +/// \return Tuple<Args...> +template <typename... Args> +Tuple<Args...> make_tuple(Args... args) { + return Tuple<Args...>(args...); +} + +/// size +/// \brief Provides access to the number of elements in a tuple as a +/// compile-time constant expression. +/// \tparam Args the type of the arguments to construct the tuple from +/// \return size_t +template <typename... Args> +static constexpr size_t size(Tuple<Args...> &) { + return sizeof...(Args); +} + +/// \struct IndexList +/// \brief Creates a list of index from the elements in the tuple +/// \tparam Is... a list of index from [0 to sizeof...(tuple elements)) +template <size_t... Is> +struct IndexList {}; + +/// \struct RangeBuilder +/// \brief Collects internal details for generating index ranges [MIN, MAX) +/// Declare primary template for index range builder +/// \tparam MIN is the starting index in the tuple +/// \tparam N represents sizeof..(elemens)- sizeof...(Is) +/// \tparam Is... are the list of generated index so far +template <size_t MIN, size_t N, size_t... Is> +struct RangeBuilder; + +/// \brief base Step: Specialisation of the \ref RangeBuilder when the +/// MIN==MAX. In this case the Is... is [0 to sizeof...(tuple elements)) +/// \tparam MIN is the starting index of the tuple +/// \tparam Is is [0 to sizeof...(tuple elements)) +template <size_t MIN, size_t... Is> +struct RangeBuilder<MIN, MIN, Is...> { + typedef IndexList<Is...> type; +}; + +/// Induction step: Specialisation of the RangeBuilder class when N!=MIN +/// in this case we are recursively subtracting N by one and adding one +/// index to Is... list until MIN==N +/// \tparam MIN is the starting index in the tuple +/// \tparam N represents sizeof..(elemens)- sizeof...(Is) +/// \tparam Is... are the list of generated index so far +template <size_t MIN, size_t N, size_t... Is> +struct RangeBuilder : public RangeBuilder<MIN, N - 1, N - 1, Is...> {}; + +/// \brief IndexRange that returns a [MIN, MAX) index range +/// \tparam MIN is the starting index in the tuple +/// \tparam MAX is the size of the tuple +template <size_t MIN, size_t MAX> +struct IndexRange: RangeBuilder<MIN, MAX>::type {}; + +/// append_base +/// \brief unpacking the elements of the input tuple t and creating a new tuple +/// by adding element a at the end of it. +///\tparam Args... the type of the elements inside the tuple t +/// \tparam T the type of the new element going to be added at the end of tuple +/// \tparam I... is the list of index from [0 to sizeof...(t)) +/// \param t the tuple on which we want to append a. +/// \param a the new elements going to be added to the tuple +/// \return Tuple<Args..., T> +template <typename... Args, typename T, size_t... I> +Tuple<Args..., T> append_base(Tuple<Args...> t, T a,IndexList<I...>) { + return utility::tuple::make_tuple(get<I>(t)..., a); +} + +/// append +/// \brief the deduction function for \ref append_base that automatically +/// generate the \ref IndexRange +///\tparam Args... the type of the elements inside the tuple t +/// \tparam T the type of the new element going to be added at the end of tuple +/// \param t the tuple on which we want to append a. +/// \param a the new elements going to be added to the tuple +/// \return Tuple<Args..., T> +template <typename... Args, typename T> +Tuple<Args..., T> append(Tuple<Args...> t, T a) { + return utility::tuple::append_base(t, a, IndexRange<0, sizeof...(Args)>()); +} + +/// append_base +/// \brief This is a specialisation of \ref append_base when we want to +/// concatenate +/// tuple t2 at the end of the tuple t1. Here we unpack both tuples, generate the +/// IndexRange for each of them and create an output tuple T that contains both +/// elements of t1 and t2. +///\tparam Args1... the type of the elements inside the tuple t1 +///\tparam Args2... the type of the elements inside the tuple t2 +/// \tparam I1... is the list of index from [0 to sizeof...(t1)) +/// \tparam I2... is the list of index from [0 to sizeof...(t2)) +/// \param t1 is the tuple on which we want to append t2. +/// \param t2 is the tuple that is going to be added on t1. +/// \return Tuple<Args1..., Args2...> +template <typename... Args1, typename... Args2, size_t... I1, size_t... I2> +Tuple<Args1..., Args2...> append_base(Tuple<Args1...> t1, Tuple<Args2...> t2, IndexList<I1...>, IndexList<I2...>) { + return utility::tuple::make_tuple(get<I1>(t1)...,get<I2>(t2)...); +} + +/// append +/// \brief deduction function for \ref append_base when we are appending tuple +/// t1 by tuple t2. In this case the \ref IndexRange for both tuple are +/// automatically generated. +///\tparam Args1... the type of the elements inside the tuple t1 +///\tparam Args2... the type of the elements inside the tuple t2 +/// \param t1 is the tuple on which we want to append t2. +/// \param t2 is the tuple that is going to be added on t1. +/// \return Tuple<Args1..., Args2...> +template <typename... Args1, typename... Args2> +Tuple<Args1..., Args2...> append(Tuple<Args1...> t1,Tuple<Args2...> t2) { + return utility::tuple::append_base(t1, t2, IndexRange<0, sizeof...(Args1)>(), IndexRange<0, sizeof...(Args2)>()); +} +} // tuple +} // utility +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_TUPLE_HPP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h index b7597b3a5..ffcf8b00f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h @@ -28,7 +28,7 @@ class compute_tensor_flags #else 0 #endif - || + | #if EIGEN_MAX_ALIGN_BYTES>0 is_dynamic_size_storage #else @@ -56,6 +56,9 @@ struct traits<Tensor<Scalar_, NumIndices_, Options_, IndexType_> > Options = Options_, Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0 : LvalueBit) }; + template <typename T> struct MakePointer { + typedef T* Type; + }; }; @@ -71,11 +74,14 @@ struct traits<TensorFixedSize<Scalar_, Dimensions, Options_, IndexType_> > Options = Options_, Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0: LvalueBit) }; + template <typename T> struct MakePointer { + typedef T* Type; + }; }; -template<typename PlainObjectType, int Options_> -struct traits<TensorMap<PlainObjectType, Options_> > +template<typename PlainObjectType, int Options_, template <class> class MakePointer_> +struct traits<TensorMap<PlainObjectType, Options_, MakePointer_> > : public traits<PlainObjectType> { typedef traits<PlainObjectType> BaseTraits; @@ -88,6 +94,11 @@ struct traits<TensorMap<PlainObjectType, Options_> > Options = Options_, Flags = BaseTraits::Flags }; + template <class T> struct MakePointer { + // Intermediate typedef to workaround MSVC issue. + typedef MakePointer_<T> MakePointerT; + typedef typename MakePointerT::Type Type; + }; }; template<typename PlainObjectType> @@ -131,16 +142,16 @@ struct eval<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, Eig typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type; }; -template<typename PlainObjectType, int Options> -struct eval<TensorMap<PlainObjectType, Options>, Eigen::Dense> +template<typename PlainObjectType, int Options, template <class> class MakePointer> +struct eval<TensorMap<PlainObjectType, Options, MakePointer>, Eigen::Dense> { - typedef const TensorMap<PlainObjectType, Options>& type; + typedef const TensorMap<PlainObjectType, Options, MakePointer>& type; }; -template<typename PlainObjectType, int Options> -struct eval<const TensorMap<PlainObjectType, Options>, Eigen::Dense> +template<typename PlainObjectType, int Options, template <class> class MakePointer> +struct eval<const TensorMap<PlainObjectType, Options, MakePointer>, Eigen::Dense> { - typedef const TensorMap<PlainObjectType, Options>& type; + typedef const TensorMap<PlainObjectType, Options, MakePointer>& type; }; template<typename PlainObjectType> @@ -186,16 +197,16 @@ struct nested<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_> > }; -template <typename PlainObjectType, int Options> -struct nested<TensorMap<PlainObjectType, Options> > +template <typename PlainObjectType, int Options, template <class> class MakePointer> +struct nested<TensorMap<PlainObjectType, Options, MakePointer> > { - typedef const TensorMap<PlainObjectType, Options>& type; + typedef const TensorMap<PlainObjectType, Options, MakePointer>& type; }; -template <typename PlainObjectType, int Options> -struct nested<const TensorMap<PlainObjectType, Options> > +template <typename PlainObjectType, int Options, template <class> class MakePointer> +struct nested<const TensorMap<PlainObjectType, Options, MakePointer> > { - typedef const TensorMap<PlainObjectType, Options>& type; + typedef const TensorMap<PlainObjectType, Options, MakePointer>& type; }; template <typename PlainObjectType> diff --git a/unsupported/Eigen/SpecialFunctions b/unsupported/Eigen/SpecialFunctions index 7c7493c56..a2ad4925e 100644 --- a/unsupported/Eigen/SpecialFunctions +++ b/unsupported/Eigen/SpecialFunctions @@ -10,6 +10,8 @@ #ifndef EIGEN_SPECIALFUNCTIONS_MODULE #define EIGEN_SPECIALFUNCTIONS_MODULE +#include <math.h> + #include "../../Eigen/Core" #include "../../Eigen/src/Core/util/DisableStupidWarnings.h" diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h index 52619fc0c..f524d7137 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h @@ -120,13 +120,27 @@ struct lgamma_retval { template <> struct lgamma_impl<float> { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE float run(float x) { return ::lgammaf(x); } + static EIGEN_STRONG_INLINE float run(float x) { +#if !defined(__CUDA_ARCH__) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__) + int signgam; + return ::lgammaf_r(x, &signgam); +#else + return ::lgammaf(x); +#endif + } }; template <> struct lgamma_impl<double> { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE double run(double x) { return ::lgamma(x); } + static EIGEN_STRONG_INLINE double run(double x) { +#if !defined(__CUDA_ARCH__) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__) + int signgam; + return ::lgamma_r(x, &signgam); +#else + return ::lgamma(x); +#endif + } }; #endif @@ -794,7 +808,7 @@ template <> struct zeta_impl_series<float> { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE bool run(float& a, float& b, float& s, const float x, const float machep) { - int i = 0; + int i = 0; while(i < 9) { i += 1; @@ -804,7 +818,7 @@ struct zeta_impl_series<float> { if( numext::abs(b/s) < machep ) return true; } - + //Return whether we are done return false; } @@ -814,7 +828,7 @@ template <> struct zeta_impl_series<double> { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE bool run(double& a, double& b, double& s, const double x, const double machep) { - int i = 0; + int i = 0; while( (i < 9) || (a <= 9.0) ) { i += 1; @@ -824,12 +838,12 @@ struct zeta_impl_series<double> { if( numext::abs(b/s) < machep ) return true; } - + //Return whether we are done return false; } }; - + template <typename Scalar> struct zeta_impl { EIGEN_DEVICE_FUNC @@ -894,10 +908,10 @@ struct zeta_impl { * Series, and Products, p. 1073; Academic Press, 1980. * */ - + int i; Scalar p, r, a, b, k, s, t, w; - + const Scalar A[] = { Scalar(12.0), Scalar(-720.0), @@ -912,20 +926,20 @@ struct zeta_impl { Scalar(1.8152105401943546773e17), /*1.5511210043330985984e23/854513*/ Scalar(-7.1661652561756670113e18) /*1.6938241367317436694528e27/236364091*/ }; - + const Scalar maxnum = NumTraits<Scalar>::infinity(); const Scalar zero = 0.0, half = 0.5, one = 1.0; const Scalar machep = cephes_helper<Scalar>::machep(); const Scalar nan = NumTraits<Scalar>::quiet_NaN(); - + if( x == one ) return maxnum; - + if( x < one ) { return nan; } - + if( q <= zero ) { if(q == numext::floor(q)) @@ -937,7 +951,7 @@ struct zeta_impl { if (p != r) return nan; } - + /* Permit negative q but continue sum until n+q > +9 . * This case should be handled by a reflection formula. * If q<0 and x is an integer, there is a relation to @@ -950,7 +964,7 @@ struct zeta_impl { if (zeta_impl_series<Scalar>::run(a, b, s, x, machep)) { return s; } - + w = a; s += b*w/(x-one); s -= half * b; @@ -983,9 +997,9 @@ template <typename Scalar> struct polygamma_retval { typedef Scalar type; }; - + #if !EIGEN_HAS_C99_MATH - + template <typename Scalar> struct polygamma_impl { EIGEN_DEVICE_FUNC @@ -995,9 +1009,9 @@ struct polygamma_impl { return Scalar(0); } }; - + #else - + template <typename Scalar> struct polygamma_impl { EIGEN_DEVICE_FUNC @@ -1005,7 +1019,7 @@ struct polygamma_impl { Scalar zero = 0.0, one = 1.0; Scalar nplus = n + one; const Scalar nan = NumTraits<Scalar>::quiet_NaN(); - + // Check that n is an integer if (numext::floor(n) != n) { return nan; @@ -1021,7 +1035,7 @@ struct polygamma_impl { } } }; - + #endif // EIGEN_HAS_C99_MATH /************************************************************************************************ |