diff options
author | Mehdi Goli <mehdi.goli@codeplay.com> | 2019-06-28 10:08:23 +0100 |
---|---|---|
committer | Mehdi Goli <mehdi.goli@codeplay.com> | 2019-06-28 10:08:23 +0100 |
commit | 7d08fa805a38f9ebb9e0e487c4e2d23d32a0fcde (patch) | |
tree | fbff4d80b6b373dcd53632de4c1fab5c393bdd64 /unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h | |
parent | 16a56b2dddbfaf2d4b81d62be5e3139f12783ac8 (diff) |
[SYCL] This PR adds the minimum modifications to the Eigen unsupported module required to run it on devices supporting SYCL.
* Abstracting the pointer type so that both SYCL memory and pointer can be captured.
* Converting SYCL virtual pointer to SYCL device memory in Eigen evaluator class.
* Binding SYCL placeholder accessor to command group handler by using bind method in Eigen evaluator node.
* Adding SYCL macro for controlling loop unrolling.
* Modifying the TensorDeviceSycl.h and SYCL executor method to adopt the above changes.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h | 228 |
1 files changed, 128 insertions, 100 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index 7f0f4acbc..1d48b5eed 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -34,6 +34,9 @@ struct TensorEvaluator typedef typename Derived::Dimensions Dimensions; typedef Derived XprType; static const int PacketSize = PacketType<CoeffReturnType, Device>::size; + typedef typename internal::traits<Derived>::template MakePointer<Scalar>::Type TensorPointerType; + typedef StorageMemory<Scalar, Device> Storage; + typedef typename Storage::Type EvaluatorPointerType; // NumDimensions is -1 for variable dim tensors static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ? @@ -60,16 +63,17 @@ struct TensorEvaluator TensorBlockWriter; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) - : m_data(const_cast<typename internal::traits<Derived>::template MakePointer<Scalar>::Type>(m.data())), m_dims(m.dimensions()), m_device(device), m_impl(m) + : m_data(device.get((const_cast<TensorPointerType>(m.data())))), + m_dims(m.dimensions()), + m_device(device) { } - // Used for accessor extraction in SYCL Managed TensorMap: - const Derived& derived() const { return m_impl; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* dest) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType dest) { if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && dest) { - m_device.memcpy((void*)dest, m_data, sizeof(Scalar) * m_dims.TotalSize()); + m_device.memcpy((void*)(m_device.get(dest)), m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar)); return false; } return true; @@ -78,14 +82,12 @@ struct TensorEvaluator EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - eigen_assert(m_data); + eigen_assert(m_data != NULL); return m_data[index]; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - typename internal::traits<Derived>::template MakePointer<Scalar>::RefType - coeffRef(Index index) { - eigen_assert(m_data); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) { + eigen_assert(m_data != NULL); return m_data[index]; } @@ -114,7 +116,7 @@ struct TensorEvaluator } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const { - eigen_assert(m_data); + eigen_assert(m_data != NULL); if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { return m_data[m_dims.IndexOfColMajor(coords)]; } else { @@ -122,10 +124,9 @@ struct TensorEvaluator } } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - typename internal::traits<Derived>::template MakePointer<Scalar>::RefType + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(const array<DenseIndex, NumCoords>& coords) { - eigen_assert(m_data); + eigen_assert(m_data != NULL); if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { return m_data[m_dims.IndexOfColMajor(coords)]; } else { @@ -152,16 +153,18 @@ struct TensorEvaluator TensorBlockWriter::Run(block, m_data); } - EIGEN_DEVICE_FUNC typename internal::traits<Derived>::template MakePointer<Scalar>::Type data() const { return m_data; } - - /// required by sycl in order to construct sycl buffer from raw pointer - const Device& device() const{return m_device;} + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; } +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_data.bind(cgh); + } +#endif protected: - typename internal::traits<Derived>::template MakePointer<Scalar>::Type m_data; + EvaluatorPointerType m_data; Dimensions m_dims; - const Device& m_device; - const Derived& m_impl; + const Device m_device; }; namespace { @@ -184,6 +187,13 @@ Eigen::half loadConstant(const Eigen::half* address) { return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x))); } #endif +#ifdef EIGEN_USE_SYCL +// overload of load constant should be implemented here based on range access +template <cl::sycl::access::mode AcMd, typename T> +T &loadConstant(const Eigen::TensorSycl::internal::RangeAccess<AcMd, T> &address) { + return *address; +} +#endif } @@ -197,7 +207,9 @@ struct TensorEvaluator<const Derived, Device> typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename Derived::Dimensions Dimensions; typedef const Derived XprType; - + typedef typename internal::traits<Derived>::template MakePointer<const Scalar>::Type TensorPointerType; + typedef StorageMemory<const Scalar, Device> Storage; + typedef typename Storage::Type EvaluatorPointerType; // NumDimensions is -1 for variable dim tensors static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ? @@ -221,18 +233,15 @@ struct TensorEvaluator<const Derived, Device> typename internal::remove_const<Scalar>::type, Index, NumCoords, Layout> TensorBlockReader; - // Used for accessor extraction in SYCL Managed TensorMap: - const Derived& derived() const { return m_impl; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) - : m_data(m.data()), m_dims(m.dimensions()), m_device(device), m_impl(m) + : m_data(device.get(m.data())), m_dims(m.dimensions()), m_device(device) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) { if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data) { - m_device.memcpy((void*)data, m_data, m_dims.TotalSize() * sizeof(Scalar)); + m_device.memcpy((void*)(m_device.get(data)),m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar)); return false; } return true; @@ -241,13 +250,8 @@ struct TensorEvaluator<const Derived, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - eigen_assert(m_data); -#ifndef __SYCL_DEVICE_ONLY__ + eigen_assert(m_data != NULL); return loadConstant(m_data+index); -#else - CoeffReturnType tmp = m_data[index]; - return tmp; -#endif } template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -269,7 +273,7 @@ struct TensorEvaluator<const Derived, Device> } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const { - eigen_assert(m_data); + eigen_assert(m_data != NULL); const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords) : m_dims.IndexOfRowMajor(coords); return loadConstant(m_data+index); @@ -288,16 +292,17 @@ struct TensorEvaluator<const Derived, Device> TensorBlockReader::Run(block, m_data); } - EIGEN_DEVICE_FUNC typename internal::traits<Derived>::template MakePointer<const Scalar>::Type data() const { return m_data; } - - /// added for sycl in order to construct the buffer from the sycl device - const Device& device() const{return m_device;} - + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; } +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_data.bind(cgh); + } +#endif protected: - typename internal::traits<Derived>::template MakePointer<const Scalar>::Type m_data; + EvaluatorPointerType m_data; Dimensions m_dims; - const Device& m_device; - const Derived& m_impl; + const Device m_device; }; @@ -310,16 +315,6 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device> { typedef TensorCwiseNullaryOp<NullaryOp, ArgType> XprType; - enum { - IsAligned = true, - PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess, - BlockAccess = false, - PreferBlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false - }; - EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper() @@ -331,10 +326,26 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device> typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; static const int PacketSize = PacketType<CoeffReturnType, Device>::size; typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; + typedef StorageMemory<CoeffReturnType, Device> Storage; + typedef typename Storage::Type EvaluatorPointerType; + + enum { + IsAligned = true, + PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess + #ifdef EIGEN_USE_SYCL + && (PacketType<CoeffReturnType, Device>::size >1) + #endif + , + BlockAccess = false, + PreferBlockAccess = false, + Layout = TensorEvaluator<ArgType, Device>::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { return true; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { return true; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const @@ -354,13 +365,14 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device> PacketType<CoeffReturnType, Device>::size); } - EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } - - /// required by sycl in order to extract the accessor - const TensorEvaluator<ArgType, Device>& impl() const { return m_argImpl; } - /// required by sycl in order to extract the accessor - NullaryOp functor() const { return m_functor; } + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_argImpl.bind(cgh); + } +#endif private: const NullaryOp m_functor; @@ -401,14 +413,15 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device> typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; static const int PacketSize = PacketType<CoeffReturnType, Device>::size; typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; - + typedef StorageMemory<CoeffReturnType, Device> Storage; + typedef typename Storage::Type EvaluatorPointerType; static const int NumDims = internal::array_size<Dimensions>::value; typedef internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout> TensorBlock; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { m_argImpl.evalSubExprsIfNeeded(NULL); return true; } @@ -456,16 +469,18 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device> arg_block.data()); } - EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } - /// required by sycl in order to extract the accessor - const TensorEvaluator<ArgType, Device> & impl() const { return m_argImpl; } - /// added for sycl in order to construct the buffer from sycl device - UnaryOp functor() const { return m_functor; } +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const{ + m_argImpl.bind(cgh); + } +#endif private: - const Device& m_device; + const Device m_device; const UnaryOp m_functor; TensorEvaluator<ArgType, Device> m_argImpl; }; @@ -509,6 +524,8 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; static const int PacketSize = PacketType<CoeffReturnType, Device>::size; typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions; + typedef StorageMemory<CoeffReturnType, Device> Storage; + typedef typename Storage::Type EvaluatorPointerType; static const int NumDims = internal::array_size< typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value; @@ -524,7 +541,7 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg return m_leftImpl.dimensions(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { m_leftImpl.evalSubExprsIfNeeded(NULL); m_rightImpl.evalSubExprsIfNeeded(NULL); return true; @@ -576,16 +593,17 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg right_block.block_strides(), right_block.data()); } - EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } - /// required by sycl in order to extract the accessor - const TensorEvaluator<LeftArgType, Device>& left_impl() const { return m_leftImpl; } - /// required by sycl in order to extract the accessor - const TensorEvaluator<RightArgType, Device>& right_impl() const { return m_rightImpl; } - /// required by sycl in order to extract the accessor - BinaryOp functor() const { return m_functor; } + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } + #ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_leftImpl.bind(cgh); + m_rightImpl.bind(cgh); + } + #endif private: - const Device& m_device; + const Device m_device; const BinaryOp m_functor; TensorEvaluator<LeftArgType, Device> m_leftImpl; TensorEvaluator<RightArgType, Device> m_rightImpl; @@ -639,6 +657,8 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; static const int PacketSize = PacketType<CoeffReturnType, Device>::size; typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions; + typedef StorageMemory<CoeffReturnType, Device> Storage; + typedef typename Storage::Type EvaluatorPointerType; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { @@ -646,7 +666,7 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, return m_arg1Impl.dimensions(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { m_arg1Impl.evalSubExprsIfNeeded(NULL); m_arg2Impl.evalSubExprsIfNeeded(NULL); m_arg3Impl.evalSubExprsIfNeeded(NULL); @@ -679,14 +699,16 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); } - EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } - /// required by sycl in order to extract the accessor - const TensorEvaluator<Arg1Type, Device> & arg1Impl() const { return m_arg1Impl; } - /// required by sycl in order to extract the accessor - const TensorEvaluator<Arg2Type, Device>& arg2Impl() const { return m_arg2Impl; } - /// required by sycl in order to extract the accessor - const TensorEvaluator<Arg3Type, Device>& arg3Impl() const { return m_arg3Impl; } +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_arg1Impl.bind(cgh); + m_arg2Impl.bind(cgh); + m_arg3Impl.bind(cgh); + } +#endif private: const TernaryOp m_functor; @@ -731,6 +753,8 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType> typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; static const int PacketSize = PacketType<CoeffReturnType, Device>::size; typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions; + typedef StorageMemory<CoeffReturnType, Device> Storage; + typedef typename Storage::Type EvaluatorPointerType; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { @@ -738,7 +762,7 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType> return m_condImpl.dimensions(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { m_condImpl.evalSubExprsIfNeeded(NULL); m_thenImpl.evalSubExprsIfNeeded(NULL); m_elseImpl.evalSubExprsIfNeeded(NULL); @@ -757,13 +781,15 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType> template<int LoadMode> EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { - internal::Selector<PacketSize> select; - for (Index i = 0; i < PacketSize; ++i) { - select.select[i] = m_condImpl.coeff(index+i); - } - return internal::pblend(select, - m_thenImpl.template packet<LoadMode>(index), - m_elseImpl.template packet<LoadMode>(index)); + internal::Selector<PacketSize> select; + EIGEN_UNROLL_LOOP + for (Index i = 0; i < PacketSize; ++i) { + select.select[i] = m_condImpl.coeff(index+i); + } + return internal::pblend(select, + m_thenImpl.template packet<LoadMode>(index), + m_elseImpl.template packet<LoadMode>(index)); + } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost @@ -773,14 +799,16 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType> .cwiseMax(m_elseImpl.costPerCoeff(vectorized)); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } - /// required by sycl in order to extract the accessor - const TensorEvaluator<IfArgType, Device> & cond_impl() const { return m_condImpl; } - /// required by sycl in order to extract the accessor - const TensorEvaluator<ThenArgType, Device>& then_impl() const { return m_thenImpl; } - /// required by sycl in order to extract the accessor - const TensorEvaluator<ElseArgType, Device>& else_impl() const { return m_elseImpl; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return NULL; } +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_condImpl.bind(cgh); + m_thenImpl.bind(cgh); + m_elseImpl.bind(cgh); + } +#endif private: TensorEvaluator<IfArgType, Device> m_condImpl; TensorEvaluator<ThenArgType, Device> m_thenImpl; |