diff options
author | Mehdi Goli <mehdi.goli@codeplay.com> | 2016-11-29 15:30:42 +0000 |
---|---|---|
committer | Mehdi Goli <mehdi.goli@codeplay.com> | 2016-11-29 15:30:42 +0000 |
commit | 577ce78085d2e09675abb5976ab3026235de8eec (patch) | |
tree | b88f8db6290c625fd35a72594e816b8ff4094e15 /unsupported/Eigen | |
parent | 02080e2b673c17302872a05e0fac8c20ac756b44 (diff) |
Adding TensorShuffling backend for sycl; adding TensorReshaping backend for sycl; cleaning up the sycl backend.
Diffstat (limited to 'unsupported/Eigen')
11 files changed, 382 insertions, 287 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h index c0d94b4eb..bcaf542e2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h @@ -31,7 +31,6 @@ auto get_sycl_supported_devices()->decltype(cl::sycl::device::get_devices()){ ++it; } } - printf("Device size %ld\n", devices.size()); return devices; } #define ConvertToActualTypeSycl(T, buf_acc) reinterpret_cast<typename cl::sycl::global_ptr<T>::pointer_t>((&(*buf_acc.get_pointer()))) @@ -93,11 +92,6 @@ struct QueueInterface { } } - EIGEN_STRONG_INLINE void deallocate_all() const { - std::lock_guard<std::mutex> lock(mutex_); - buffer_map.clear(); - } - EIGEN_STRONG_INLINE std::map<const uint8_t *, cl::sycl::buffer<uint8_t,1>>::iterator find_buffer(const void* ptr) const { std::lock_guard<std::mutex> lock(mutex_); auto it1 = buffer_map.find(static_cast<const uint8_t*>(ptr)); @@ -118,10 +112,11 @@ struct QueueInterface { // underlying stream device. EIGEN_STRONG_INLINE bool ok() const { if (!exception_caught_) { - m_queue.throw_asynchronous(); + m_queue.wait_and_throw(); } return !exception_caught_; } + // destructor ~QueueInterface() { buffer_map.clear(); } }; @@ -186,7 +181,7 @@ struct SyclDevice { auto dst_acc =it2->second.template get_access<cl::sycl::access::mode::discard_write, cl::sycl::access::target::global_buffer>(cgh); cgh.parallel_for(cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), TensorSycl::internal::MemCopyFunctor<T>(src_acc, dst_acc, rng, 0, offset)); }); - sycl_queue().throw_asynchronous(); + synchronize(); } /// The memcpyHostToDevice is used to copy the device only pointer to a host pointer. Using the device @@ -217,7 +212,7 @@ struct SyclDevice { auto dst_acc =dest_buf.template get_access<cl::sycl::access::mode::discard_write, cl::sycl::access::target::global_buffer>(cgh); cgh.parallel_for( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), TensorSycl::internal::MemCopyFunctor<T>(src_acc, dst_acc, rng, 0, offset)); }); - sycl_queue().throw_asynchronous(); + synchronize(); } /// returning the sycl queue EIGEN_STRONG_INLINE cl::sycl::queue& sycl_queue() const { return m_queue_stream->m_queue;} @@ -235,13 +230,13 @@ struct SyclDevice { } }); }); - sycl_queue().throw_asynchronous(); + synchronize(); } /// No need for sycl it should act the same as CPU version EIGEN_STRONG_INLINE int majorDeviceVersion() const { return 1; } - /// There is no need to synchronise the buffer in sycl as it is automatically handled by sycl runtime scheduler. + EIGEN_STRONG_INLINE void synchronize() const { - sycl_queue().wait_and_throw(); + sycl_queue().wait_and_throw(); //pass } // This function checks if the runtime recorded an error for the // underlying stream device. diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index 615559d44..25ce471f9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -168,12 +168,12 @@ template <typename Idx> struct IndexPair { #ifdef EIGEN_HAS_SFINAE namespace internal { - template<typename IndexType, Index... Is> + template<typename IndexType, typename Index, Index... Is> EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE array<Index, sizeof...(Is)> customIndices2Array(IndexType& idx, numeric_list<Index, Is...>) { return { idx[Is]... }; } - template<typename IndexType> + template<typename IndexType, typename Index> EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE array<Index, 0> customIndices2Array(IndexType&, numeric_list<Index>) { return array<Index, 0>(); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h index 00f8b70ed..abb8420a6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h @@ -81,7 +81,7 @@ static void run(BufferTOut& bufOut, BufferTIn& bufI, const Eigen::SyclDevice& de }); }; dev.sycl_queue().submit(f); - dev.sycl_queue().throw_asynchronous(); + dev.synchronize(); /* At this point, you could queue::wait_and_throw() to ensure that * errors are caught quickly. However, this would likely impact @@ -173,7 +173,7 @@ struct FullReducer<Self, Op, const Eigen::SyclDevice, Vectorizable> { tmp_global_accessor.get_pointer()[0]+=InnerMostDimReducer<decltype(device_self_evaluator), Op, false>::reduce(device_self_evaluator, static_cast<typename DevExpr::Index>(red_factor*(rng)), static_cast<typename DevExpr::Index>(remaining), const_cast<Op&>(functor)); }); }); - dev.sycl_queue().throw_asynchronous(); + dev.synchronize(); /// This is used to recursively reduce the tmp value to an element of 1; syclGenericBufferReducer<CoeffReturnType,HostExpr>::run(out_buffer, temp_global_buffer,dev, GRange, outTileSize); @@ -237,7 +237,7 @@ struct InnerReducer<Self, Op, const Eigen::SyclDevice> { // } // }); }); - dev.sycl_queue().throw_asynchronous(); + dev.synchronize(); return false; } }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h index 113c060e3..edc9dd3f3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h @@ -117,7 +117,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device) + : m_impl(op.expression(), device), m_shuffle(op.shufflePermutation()) { const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); const Shuffle& shuffle = op.shufflePermutation(); @@ -187,6 +187,11 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + // required by sycl + EIGEN_STRONG_INLINE const Shuffle& shufflePermutation() const {return m_shuffle;} + // required by sycl + EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const {return m_impl;} + protected: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const { Index inputIndex = 0; @@ -206,11 +211,12 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> return inputIndex + index * m_inputStrides[NumDims - 1]; } } - Dimensions m_dimensions; array<Index, NumDims> m_outputStrides; array<Index, NumDims> m_inputStrides; TensorEvaluator<ArgType, Device> m_impl; + /// required by sycl + Shuffle m_shuffle; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h index 0336c9866..cc13ca963 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h @@ -124,6 +124,20 @@ KERNELBROKERCONVERTSLICEOP(const) KERNELBROKERCONVERTSLICEOP() #undef KERNELBROKERCONVERTSLICEOP + +#define KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(OPEXPR, CVQual)\ +template<typename Param, typename XprType>\ +struct ConvertToDeviceExpression<CVQual OPEXPR <Param, XprType> >{\ + typedef CVQual OPEXPR<Param, typename ConvertToDeviceExpression<XprType>::Type> Type;\ +}; + +KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(TensorReshapingOp, const) +KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(TensorReshapingOp, ) + +KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(TensorShufflingOp, const) +KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(TensorShufflingOp, ) +#undef KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP + } // namespace internal } // namespace TensorSycl } // namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h index 3d3142996..4433fec01 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h @@ -31,7 +31,6 @@ template <typename PtrType, size_t N, typename... Params> struct EvalToLHSConstructor { PtrType expr; EvalToLHSConstructor(const utility::tuple::Tuple<Params...> &t) : expr(ConvertToActualTypeSycl(typename Eigen::internal::remove_all<PtrType>::type, utility::tuple::get<N>(t))) {} - //EvalToLHSConstructor(const utility::tuple::Tuple<Params...> &t): expr((&(*(utility::tuple::get<N>(t).get_pointer())))) {} }; /// \struct ExprConstructor is used to reconstruct the expression on the device and @@ -57,8 +56,6 @@ CVQual PlaceHolder<CVQual TensorMap<T, Options_, MakePointer_>, N>, Params...>{\ : expr(Type(ConvertToActualTypeSycl(typename Type::Scalar, utility::tuple::get<N>(t)), fd.dimensions())){}\ }; -//: expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))), fd.dimensions())) {} - TENSORMAP(const) TENSORMAP() @@ -198,7 +195,6 @@ CVQual PlaceHolder<CVQual TensorForcedEvalOp<DevExpr>, N>, Params...> {\ ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)\ : expr(Type(ConvertToActualTypeSycl(typename Type::Scalar, utility::tuple::get<N>(t)), fd.dimensions())) {}\ }; -//: expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))), fd.dimensions())) {} FORCEDEVAL(const) FORCEDEVAL() @@ -224,7 +220,6 @@ CVQual PlaceHolder<CVQual TensorReductionOp<OP, Dim, DevExpr>, N>, Params...> {\ ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)\ :expr(Type(ConvertToActualTypeSycl(typename Type::Scalar, utility::tuple::get<N>(t)), fd.dimensions())) {}\ }; -//: expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))), fd.dimensions())) {} SYCLREDUCTIONEXPR(const) SYCLREDUCTIONEXPR() @@ -249,6 +244,26 @@ SYCLSLICEOPEXPR() #undef SYCLSLICEOPEXPR +#define SYCLRESHAPEANDSHUFFLEOPEXPRCONST(OPEXPR, CVQual)\ +template<typename Param, typename OrigXprType, typename XprType, typename... Params>\ +struct ExprConstructor<CVQual OPEXPR <Param, OrigXprType> , CVQual OPEXPR <Param, XprType>, Params... >{\ + typedef ExprConstructor<OrigXprType, XprType, Params...> my_xpr_type;\ + typedef CVQual OPEXPR <Param, typename my_xpr_type::Type> Type ;\ + my_xpr_type xprExpr;\ + Type expr;\ + template <typename FuncDetector>\ + ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\ + : xprExpr(funcD.xprExpr, t), expr(xprExpr.expr, funcD.param()) {}\ +}; + +SYCLRESHAPEANDSHUFFLEOPEXPRCONST(TensorReshapingOp, const) +SYCLRESHAPEANDSHUFFLEOPEXPRCONST(TensorReshapingOp, ) + +SYCLRESHAPEANDSHUFFLEOPEXPRCONST(TensorShufflingOp, const) +SYCLRESHAPEANDSHUFFLEOPEXPRCONST(TensorShufflingOp, ) +#undef SYCLRESHAPEANDSHUFFLEOPEXPRCONST + + /// template deduction for \ref ExprConstructor struct template <typename OrigExpr, typename IndexExpr, typename FuncD, typename... Params> auto createDeviceExpression(FuncD &funcD, const utility::tuple::Tuple<Params...> &t) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h index 06e2d5ae0..f5ef05e36 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h @@ -43,172 +43,193 @@ template <typename Evaluator> struct ExtractAccessor; struct AccessorConstructor{ - template<typename Arg> static inline auto getTuple(cl::sycl::handler& cgh, Arg eval) + template<typename Arg> static inline auto getTuple(cl::sycl::handler& cgh, const Arg& eval) -> decltype(ExtractAccessor<Arg>::getTuple(cgh, eval)) { return ExtractAccessor<Arg>::getTuple(cgh, eval); } - template<typename Arg1, typename Arg2> static inline auto getTuple(cl::sycl::handler& cgh, Arg1 eval1, Arg2 eval2) + template<typename Arg1, typename Arg2> static inline auto getTuple(cl::sycl::handler& cgh, const Arg1& eval1, const Arg2& eval2) -> decltype(utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1), ExtractAccessor<Arg2>::getTuple(cgh, eval2))) { return utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1), ExtractAccessor<Arg2>::getTuple(cgh, eval2)); } - template<typename Arg1, typename Arg2, typename Arg3> static inline auto getTuple(cl::sycl::handler& cgh, Arg1 eval1 , Arg2 eval2 , Arg3 eval3) + template<typename Arg1, typename Arg2, typename Arg3> static inline auto getTuple(cl::sycl::handler& cgh, const Arg1& eval1 , const Arg2& eval2 , const Arg3& eval3) -> decltype(utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1),utility::tuple::append(ExtractAccessor<Arg2>::getTuple(cgh, eval2), ExtractAccessor<Arg3>::getTuple(cgh, eval3)))) { return utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1),utility::tuple::append(ExtractAccessor<Arg2>::getTuple(cgh, eval2), ExtractAccessor<Arg3>::getTuple(cgh, eval3))); } - template< cl::sycl::access::mode AcM, typename Arg> static inline auto getAccessor(cl::sycl::handler& cgh, Arg eval) + template< cl::sycl::access::mode AcM, typename Arg> static inline auto getAccessor(cl::sycl::handler& cgh, const Arg& eval) -> decltype(utility::tuple::make_tuple( eval.device().template get_sycl_accessor<AcM>(cgh,eval.data()))){ return utility::tuple::make_tuple(eval.device().template get_sycl_accessor<AcM>(cgh,eval.data())); } }; /// specialisation of the \ref ExtractAccessor struct when the node type is -/// const TensorCwiseNullaryOp, const TensorCwiseUnaryOp and const TensorBroadcastingOp -template <template<class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev> -struct ExtractAccessor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> eval) - -> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){ - return AccessorConstructor::getTuple(cgh, eval.impl()); - } +/// TensorCwiseNullaryOp, TensorCwiseUnaryOp and TensorBroadcastingOp +#define SYCLUNARYCATEGORYEXTACC(CVQual)\ +template <template<class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev>\ +struct ExtractAccessor<TensorEvaluator<CVQual UnaryCategory<OP, RHSExpr>, Dev> > {\ + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual UnaryCategory<OP, RHSExpr>, Dev>& eval)\ + -> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){\ + return AccessorConstructor::getTuple(cgh, eval.impl());\ + }\ }; -/// specialisation of the \ref ExtractAccessor struct when the node type is TensorCwiseNullaryOp, TensorCwiseUnaryOp and TensorBroadcastingOp -template <template<class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev> -struct ExtractAccessor<TensorEvaluator<UnaryCategory<OP, RHSExpr>, Dev> > -: ExtractAccessor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> > {}; - -/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorCwiseBinaryOp -template <template<class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev> -struct ExtractAccessor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> eval) - -> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){ - return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl()); - } -}; +SYCLUNARYCATEGORYEXTACC(const) +SYCLUNARYCATEGORYEXTACC() +#undef SYCLUNARYCATEGORYEXTACC + + /// specialisation of the \ref ExtractAccessor struct when the node type is TensorCwiseBinaryOp -template <template<class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev> -struct ExtractAccessor<TensorEvaluator<BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > -: ExtractAccessor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> >{}; +#define SYCLBINARYCATEGORYEXTACC(CVQual)\ +template <template<class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev>\ +struct ExtractAccessor<TensorEvaluator<CVQual BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > {\ + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual BinaryCategory<OP, LHSExpr, RHSExpr>, Dev>& eval)\ + -> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){\ + return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl());\ + }\ +}; + +SYCLBINARYCATEGORYEXTACC(const) +SYCLBINARYCATEGORYEXTACC() +#undef SYCLBINARYCATEGORYEXTACC /// specialisation of the \ref ExtractAccessor struct when the node type is /// const TensorCwiseTernaryOp -template <template<class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev> -struct ExtractAccessor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> eval) - -> decltype(AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl())){ - return AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl()); - } +#define SYCLTERNARYCATEGORYEXTACC(CVQual)\ +template <template<class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev>\ +struct ExtractAccessor<TensorEvaluator<CVQual TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > {\ + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev>& eval)\ + -> decltype(AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl())){\ + return AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl());\ + }\ }; -/// specialisation of the \ref ExtractAccessor struct when the node type is TensorCwiseTernaryOp -template <template<class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev> -struct ExtractAccessor<TensorEvaluator<TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > -: ExtractAccessor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> >{}; +SYCLTERNARYCATEGORYEXTACC(const) +SYCLTERNARYCATEGORYEXTACC() +#undef SYCLTERNARYCATEGORYEXTACC -/// specialisation of the \ref ExtractAccessor struct when the node type is -/// const TensorCwiseSelectOp. This is a special case where there is no OP -template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev> -struct ExtractAccessor<TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> eval) - -> decltype(AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl())){ - return AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl()); - } -}; /// specialisation of the \ref ExtractAccessor struct when the node type is /// TensorCwiseSelectOp. This is a special case where there is no OP -template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev> -struct ExtractAccessor<TensorEvaluator<TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > -: ExtractAccessor<TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> >{}; - -/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorAssignOp -template <typename LHSExpr, typename RHSExpr, typename Dev> -struct ExtractAccessor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> eval) - -> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){ - return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl()); - } +#define SYCLSELECTOPEXTACC(CVQual)\ +template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev>\ +struct ExtractAccessor<TensorEvaluator<CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > {\ + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev>& eval)\ + -> decltype(AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl())){\ + return AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl());\ + }\ }; +SYCLSELECTOPEXTACC(const) +SYCLSELECTOPEXTACC() +#undef SYCLSELECTOPEXTACC + /// specialisation of the \ref ExtractAccessor struct when the node type is TensorAssignOp -template <typename LHSExpr, typename RHSExpr, typename Dev> -struct ExtractAccessor<TensorEvaluator<TensorAssignOp<LHSExpr, RHSExpr>, Dev> > -: ExtractAccessor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> >{}; +#define SYCLTENSORASSIGNOPEXTACC(CVQual)\ +template <typename LHSExpr, typename RHSExpr, typename Dev>\ +struct ExtractAccessor<TensorEvaluator<CVQual TensorAssignOp<LHSExpr, RHSExpr>, Dev> > {\ + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual TensorAssignOp<LHSExpr, RHSExpr>, Dev>& eval)\ + -> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){\ + return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl());\ + }\ +}; + + SYCLTENSORASSIGNOPEXTACC(const) + SYCLTENSORASSIGNOPEXTACC() + #undef SYCLTENSORASSIGNOPEXTACC /// specialisation of the \ref ExtractAccessor struct when the node type is const TensorMap #define TENSORMAPEXPR(CVQual, ACCType)\ template <typename PlainObjectType, int Options_, typename Dev>\ struct ExtractAccessor<TensorEvaluator<CVQual TensorMap<PlainObjectType, Options_>, Dev> > {\ - static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator<CVQual TensorMap<PlainObjectType, Options_>, Dev> eval)\ + static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator<CVQual TensorMap<PlainObjectType, Options_>, Dev>& eval)\ -> decltype(AccessorConstructor::template getAccessor<ACCType>(cgh, eval)){\ return AccessorConstructor::template getAccessor<ACCType>(cgh, eval);\ }\ }; + TENSORMAPEXPR(const, cl::sycl::access::mode::read) TENSORMAPEXPR(, cl::sycl::access::mode::read_write) #undef TENSORMAPEXPR -/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorForcedEvalOp -template <typename Expr, typename Dev> -struct ExtractAccessor<TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> eval) - -> decltype(AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval)){ - return AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval); - } -}; - /// specialisation of the \ref ExtractAccessor struct when the node type is TensorForcedEvalOp -template <typename Expr, typename Dev> -struct ExtractAccessor<TensorEvaluator<TensorForcedEvalOp<Expr>, Dev> > -: ExtractAccessor<TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> >{}; - -/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorEvalToOp -template <typename Expr, typename Dev> -struct ExtractAccessor<TensorEvaluator<const TensorEvalToOp<Expr>, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator<const TensorEvalToOp<Expr>, Dev> eval) - -> decltype(utility::tuple::append(AccessorConstructor::template getAccessor<cl::sycl::access::mode::write>(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl()))){ - return utility::tuple::append(AccessorConstructor::template getAccessor<cl::sycl::access::mode::write>(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl())); - } +#define SYCLFORCEDEVALEXTACC(CVQual)\ +template <typename Expr, typename Dev>\ +struct ExtractAccessor<TensorEvaluator<CVQual TensorForcedEvalOp<Expr>, Dev> > {\ + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual TensorForcedEvalOp<Expr>, Dev>& eval)\ + -> decltype(AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval)){\ + return AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval);\ + }\ }; +SYCLFORCEDEVALEXTACC(const) +SYCLFORCEDEVALEXTACC() +#undef SYCLFORCEDEVALEXTACC + + /// specialisation of the \ref ExtractAccessor struct when the node type is TensorEvalToOp -template <typename Expr, typename Dev> -struct ExtractAccessor<TensorEvaluator<TensorEvalToOp<Expr>, Dev> > -: ExtractAccessor<TensorEvaluator<const TensorEvalToOp<Expr>, Dev> >{}; - -/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorReductionOp -template <typename OP, typename Dim, typename Expr, typename Dev> -struct ExtractAccessor<TensorEvaluator<const TensorReductionOp<OP, Dim, Expr>, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorReductionOp<OP, Dim, Expr>, Dev> eval) - -> decltype(AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval)){ - return AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval); - } +#define SYCLEVALTOEXTACC(CVQual)\ +template <typename Expr, typename Dev>\ +struct ExtractAccessor<TensorEvaluator<CVQual TensorEvalToOp<Expr>, Dev> > {\ + static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator<CVQual TensorEvalToOp<Expr>, Dev>& eval)\ + -> decltype(utility::tuple::append(AccessorConstructor::template getAccessor<cl::sycl::access::mode::write>(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl()))){\ + return utility::tuple::append(AccessorConstructor::template getAccessor<cl::sycl::access::mode::write>(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl()));\ + }\ }; +SYCLEVALTOEXTACC(const) +SYCLEVALTOEXTACC() +#undef SYCLEVALTOEXTACC + /// specialisation of the \ref ExtractAccessor struct when the node type is TensorReductionOp -template <typename OP, typename Dim, typename Expr, typename Dev> -struct ExtractAccessor<TensorEvaluator<TensorReductionOp<OP, Dim, Expr>, Dev> > -: ExtractAccessor<TensorEvaluator<const TensorReductionOp<OP, Dim, Expr>, Dev> >{}; +#define SYCLREDUCTIONEXTACC(CVQual)\ +template <typename OP, typename Dim, typename Expr, typename Dev>\ +struct ExtractAccessor<TensorEvaluator<CVQual TensorReductionOp<OP, Dim, Expr>, Dev> > {\ + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual TensorReductionOp<OP, Dim, Expr>, Dev>& eval)\ + -> decltype(AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval)){\ + return AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval);\ + }\ +}; +SYCLREDUCTIONEXTACC(const) +SYCLREDUCTIONEXTACC() +#undef SYCLREDUCTIONEXTACC /// specialisation of the \ref ExtractAccessor struct when the node type is /// const TensorSlicingOp. This is a special case where there is no OP -template <typename StartIndices, typename Sizes, typename XprType, typename Dev> -struct ExtractAccessor<TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, XprType>, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, XprType>, Dev> eval) - -> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){ - return AccessorConstructor::getTuple(cgh, eval.impl()); - } +#define SYCLSLICEOPEXTACC(CVQual)\ +template <typename StartIndices, typename Sizes, typename XprType, typename Dev>\ +struct ExtractAccessor<TensorEvaluator<CVQual TensorSlicingOp<StartIndices, Sizes, XprType>, Dev> > {\ + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual TensorSlicingOp<StartIndices, Sizes, XprType>, Dev>& eval)\ + -> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){\ + return AccessorConstructor::getTuple(cgh, eval.impl());\ + }\ +}; + +SYCLSLICEOPEXTACC(const) +SYCLSLICEOPEXTACC() +#undef SYCLSLICEOPEXTACC + +#define RESHAPEANDSHUFFOPEXTRACC(OPEXPR, CVQual)\ +template<typename Param, typename XprType, typename Dev>\ +struct ExtractAccessor<TensorEvaluator<CVQual OPEXPR<Param, XprType>, Dev> > {\ + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual OPEXPR<Param, XprType>, Dev>& eval)\ + -> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){\ + return AccessorConstructor::getTuple(cgh, eval.impl());\ + }\ }; +// tensor reshaping +RESHAPEANDSHUFFOPEXTRACC(TensorReshapingOp, const) +RESHAPEANDSHUFFOPEXTRACC(TensorReshapingOp, ) +/// Tensor shuffling +RESHAPEANDSHUFFOPEXTRACC(TensorShufflingOp, const) +RESHAPEANDSHUFFOPEXTRACC(TensorShufflingOp, ) +#undef RESHAPEANDSHUFFOPEXTRACC -template <typename StartIndices, typename Sizes, typename XprType, typename Dev> -struct ExtractAccessor<TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, XprType>, Dev> > -:ExtractAccessor<TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, XprType>, Dev> >{}; /// template deduction for \ref ExtractAccessor template <typename Evaluator> -auto createTupleOfAccessors(cl::sycl::handler& cgh, const Evaluator& expr) --> decltype(ExtractAccessor<Evaluator>::getTuple(cgh, expr)) { - return ExtractAccessor<Evaluator>::getTuple(cgh, expr); +auto createTupleOfAccessors(cl::sycl::handler& cgh, const Evaluator& eval) +-> decltype(ExtractAccessor<Evaluator>::getTuple(cgh, eval)) { + return ExtractAccessor<Evaluator>::getTuple(cgh, eval); } } /// namespace TensorSycl diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h index 382f0cb50..5bc57b59a 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h @@ -36,152 +36,164 @@ namespace internal { template <typename Evaluator> struct FunctorExtractor{ typedef typename Evaluator::Dimensions Dimensions; const Dimensions m_dimensions; - const Dimensions& dimensions() const { return m_dimensions; } + EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } FunctorExtractor(const Evaluator& expr) : m_dimensions(expr.dimensions()) {} }; - /// specialisation of the \ref FunctorExtractor struct when the node type is -/// const TensorCwiseNullaryOp, const TensorCwiseUnaryOp, and const TensorBroadcastingOp -template <template <class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev> -struct FunctorExtractor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> > { - FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr; - OP func; - FunctorExtractor(const TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev>& expr) - : rhsExpr(expr.impl()), func(expr.functor()) {} +/// TensorCwiseNullaryOp, TensorCwiseUnaryOp, and TensorBroadcastingOp +#define SYCLEXTRFUNCUNARY(CVQual)\ +template <template <class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev>\ +struct FunctorExtractor<TensorEvaluator<CVQual UnaryCategory<OP, RHSExpr>, Dev> > {\ + FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;\ + OP func;\ + FunctorExtractor(const TensorEvaluator<CVQual UnaryCategory<OP, RHSExpr>, Dev>& expr)\ + : rhsExpr(expr.impl()), func(expr.functor()) {}\ }; -/// specialisation of the \ref FunctorExtractor struct when the node type is -/// TensorCwiseNullaryOp, TensorCwiseUnaryOp, and TensorBroadcastingOp -template <template <class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev> -struct FunctorExtractor<TensorEvaluator<UnaryCategory<OP, RHSExpr>, Dev> > -: FunctorExtractor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> >{}; -/// specialisation of the \ref FunctorExtractor struct when the node type is -/// const TensorCwiseBinaryOp -template <template<class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev> -struct FunctorExtractor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > { - FunctorExtractor<TensorEvaluator<LHSExpr, Dev> > lhsExpr; - FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr; - OP func; - FunctorExtractor(const TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev>& expr) - : lhsExpr(expr.left_impl()),rhsExpr(expr.right_impl()),func(expr.functor()) {} -}; +SYCLEXTRFUNCUNARY(const) +SYCLEXTRFUNCUNARY() +#undef SYCLEXTRFUNCUNARY /// specialisation of the \ref FunctorExtractor struct when the node type is -/// const TensorCwiseBinaryOp -template <template <class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev> -struct FunctorExtractor<TensorEvaluator<BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > -: FunctorExtractor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> >{}; +/// TensorCwiseBinaryOp +#define SYCLEXTRFUNCBIINARY(CVQual)\ +template <template<class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev>\ +struct FunctorExtractor<TensorEvaluator<CVQual BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > {\ + FunctorExtractor<TensorEvaluator<LHSExpr, Dev> > lhsExpr;\ + FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;\ + OP func;\ + FunctorExtractor(const TensorEvaluator<CVQual BinaryCategory<OP, LHSExpr, RHSExpr>, Dev>& expr)\ + : lhsExpr(expr.left_impl()),rhsExpr(expr.right_impl()),func(expr.functor()) {}\ +}; -/// specialisation of the \ref FunctorExtractor struct when the node type is -/// const TensorCwiseTernaryOp -template <template <class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr,typename Dev> -struct FunctorExtractor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > { - FunctorExtractor<TensorEvaluator<Arg1Expr, Dev> > arg1Expr; - FunctorExtractor<TensorEvaluator<Arg2Expr, Dev> > arg2Expr; - FunctorExtractor<TensorEvaluator<Arg3Expr, Dev> > arg3Expr; - OP func; - FunctorExtractor(const TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev>& expr) - : arg1Expr(expr.arg1Impl()), arg2Expr(expr.arg2Impl()), arg3Expr(expr.arg3Impl()), func(expr.functor()) {} +SYCLEXTRFUNCBIINARY(const) +SYCLEXTRFUNCBIINARY() +#undef SYCLEXTRFUNCBIINARY + +/// specialisation of the \ref FunctorExtractor struct when the node type is TensorCwiseTernaryOp +#define SYCLEXTRFUNCTERNARY(CVQual)\ +template <template <class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr,typename Dev>\ +struct FunctorExtractor<TensorEvaluator<CVQual TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > {\ + FunctorExtractor<TensorEvaluator<Arg1Expr, Dev> > arg1Expr;\ + FunctorExtractor<TensorEvaluator<Arg2Expr, Dev> > arg2Expr;\ + FunctorExtractor<TensorEvaluator<Arg3Expr, Dev> > arg3Expr;\ + OP func;\ + FunctorExtractor(const TensorEvaluator<CVQual TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev>& expr)\ + : arg1Expr(expr.arg1Impl()), arg2Expr(expr.arg2Impl()), arg3Expr(expr.arg3Impl()), func(expr.functor()) {}\ }; -/// specialisation of the \ref FunctorExtractor struct when the node type is -/// TensorCwiseTernaryOp -template <template <class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev> -struct FunctorExtractor<TensorEvaluator< TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > -:FunctorExtractor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> >{}; +SYCLEXTRFUNCTERNARY(const) +SYCLEXTRFUNCTERNARY() +#undef SYCLEXTRFUNCTERNARY /// specialisation of the \ref FunctorExtractor struct when the node type is -/// const TensorCwiseSelectOp. This is an specialisation without OP so it has to be separated. -template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev> -struct FunctorExtractor< TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > { - FunctorExtractor<TensorEvaluator<IfExpr, Dev> > ifExpr; - FunctorExtractor<TensorEvaluator<ThenExpr, Dev> > thenExpr; - FunctorExtractor<TensorEvaluator<ElseExpr, Dev> > elseExpr; - FunctorExtractor(const TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev>& expr) - : ifExpr(expr.cond_impl()), thenExpr(expr.then_impl()), elseExpr(expr.else_impl()) {} +/// TensorCwiseSelectOp. This is an specialisation without OP so it has to be separated. +#define SYCLEXTRFUNCSELECTOP(CVQual)\ +template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev>\ +struct FunctorExtractor< TensorEvaluator<CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > {\ + FunctorExtractor<TensorEvaluator<IfExpr, Dev> > ifExpr;\ + FunctorExtractor<TensorEvaluator<ThenExpr, Dev> > thenExpr;\ + FunctorExtractor<TensorEvaluator<ElseExpr, Dev> > elseExpr;\ + FunctorExtractor(const TensorEvaluator<CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev>& expr)\ + : ifExpr(expr.cond_impl()), thenExpr(expr.then_impl()), elseExpr(expr.else_impl()) {}\ }; -/// specialisation of the \ref FunctorExtractor struct when the node type is -/// TensorCwiseSelectOp. This is an specialisation without OP so it has to be separated -template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev> -struct FunctorExtractor<TensorEvaluator<TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > -:FunctorExtractor< TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > {}; +SYCLEXTRFUNCSELECTOP(const) +SYCLEXTRFUNCSELECTOP() +#undef SYCLEXTRFUNCSELECTOP /// specialisation of the \ref FunctorExtractor struct when the node type is /// const TensorAssignOp. This is an specialisation without OP so it has to be separated. -template <typename LHSExpr, typename RHSExpr, typename Dev> -struct FunctorExtractor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> > { - FunctorExtractor<TensorEvaluator<LHSExpr, Dev> > lhsExpr; - FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr; - FunctorExtractor(const TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev>& expr) - : lhsExpr(expr.left_impl()), rhsExpr(expr.right_impl()) {} +#define SYCLEXTRFUNCASSIGNOP(CVQual)\ +template <typename LHSExpr, typename RHSExpr, typename Dev>\ +struct FunctorExtractor<TensorEvaluator<CVQual TensorAssignOp<LHSExpr, RHSExpr>, Dev> > {\ + FunctorExtractor<TensorEvaluator<LHSExpr, Dev> > lhsExpr;\ + FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;\ + FunctorExtractor(const TensorEvaluator<CVQual TensorAssignOp<LHSExpr, RHSExpr>, Dev>& expr)\ + : lhsExpr(expr.left_impl()), rhsExpr(expr.right_impl()) {}\ }; +SYCLEXTRFUNCASSIGNOP(const) +SYCLEXTRFUNCASSIGNOP() +#undef SYCLEXTRFUNCASSIGNOP /// specialisation of the \ref FunctorExtractor struct when the node type is -/// TensorAssignOp. This is an specialisation without OP so it has to be separated. -template <typename LHSExpr, typename RHSExpr, typename Dev> -struct FunctorExtractor<TensorEvaluator<TensorAssignOp<LHSExpr, RHSExpr>, Dev> > -:FunctorExtractor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> >{}; - - -/// specialisation of the \ref FunctorExtractor struct when the node type is -/// const TensorEvalToOp, This is an specialisation without OP so it has to be separated. -template <typename RHSExpr, typename Dev> -struct FunctorExtractor<TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev> > { - FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr; - FunctorExtractor(const TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev>& expr) - : rhsExpr(expr.impl()) {} +/// TensorEvalToOp, This is an specialisation without OP so it has to be separated. +#define SYCLEXTRFUNCEVALTOOP(CVQual)\ +template <typename RHSExpr, typename Dev>\ +struct FunctorExtractor<TensorEvaluator<CVQual TensorEvalToOp<RHSExpr>, Dev> > {\ + FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;\ + FunctorExtractor(const TensorEvaluator<CVQual TensorEvalToOp<RHSExpr>, Dev>& expr)\ + : rhsExpr(expr.impl()) {}\ }; -/// specialisation of the \ref FunctorExtractor struct when the node type is -/// TensorEvalToOp. This is a specialisation without OP so it has to be separated. -template <typename RHSExpr, typename Dev> -struct FunctorExtractor<TensorEvaluator<TensorEvalToOp<RHSExpr>, Dev> > -: FunctorExtractor<TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev> > {}; +SYCLEXTRFUNCEVALTOOP(const) +SYCLEXTRFUNCEVALTOOP() +#undef SYCLEXTRFUNCEVALTOOP template<typename Dim, size_t NumOutputDim> struct DimConstr { template<typename InDim> - static inline Dim getDim(InDim dims ) {return dims;} + static EIGEN_STRONG_INLINE Dim getDim(InDim dims ) {return dims;} }; template<typename Dim> struct DimConstr<Dim, 0> { template<typename InDim> - static inline Dim getDim(InDim dims ) {return Dim(static_cast<Dim>(dims.TotalSize()));} + static EIGEN_STRONG_INLINE Dim getDim(InDim dims ) {return Dim(static_cast<Dim>(dims.TotalSize()));} }; -template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device> -struct FunctorExtractor<TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>>{ - typedef TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> Evaluator; - typedef typename Eigen::internal::conditional<Evaluator::NumOutputDims==0, DSizes<typename Evaluator::Index, 1>, typename Evaluator::Dimensions >::type Dimensions; - const Dimensions m_dimensions; - const Dimensions& dimensions() const { return m_dimensions; } - FunctorExtractor(const TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>& expr) - : m_dimensions(DimConstr<Dimensions, Evaluator::NumOutputDims>::getDim(expr.dimensions())) {} +#define SYCLEXTRFUNCREDUCTIONOP(CVQual)\ +template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device>\ +struct FunctorExtractor<TensorEvaluator<CVQual TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>>{\ + typedef TensorEvaluator<CVQual TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> Evaluator;\ + typedef typename Eigen::internal::conditional<Evaluator::NumOutputDims==0, DSizes<typename Evaluator::Index, 1>, typename Evaluator::Dimensions >::type Dimensions;\ + const Dimensions m_dimensions;\ + EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }\ + FunctorExtractor(const TensorEvaluator<CVQual TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>& expr)\ + : m_dimensions(DimConstr<Dimensions, Evaluator::NumOutputDims>::getDim(expr.dimensions())) {}\ }; -template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device> -struct FunctorExtractor<TensorEvaluator<TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>> -: FunctorExtractor<TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>>{}; +SYCLEXTRFUNCREDUCTIONOP(const) +SYCLEXTRFUNCREDUCTIONOP() +#undef SYCLEXTRFUNCREDUCTIONOP /// specialisation of the \ref FunctorExtractor struct when the node type is /// const TensorSlicingOp. This is an specialisation without OP so it has to be separated. -template <typename StartIndices, typename Sizes, typename XprType, typename Dev> -struct FunctorExtractor<TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, XprType>, Dev> > { - FunctorExtractor<TensorEvaluator<XprType, Dev> > xprExpr; - const StartIndices m_offsets; - const Sizes m_dimensions; - FunctorExtractor(const TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, XprType>, Dev>& expr) - : xprExpr(expr.impl()), m_offsets(expr.startIndices()), m_dimensions(expr.dimensions()) {} - EIGEN_STRONG_INLINE const StartIndices& startIndices() const {return m_offsets;} - EIGEN_STRONG_INLINE const Sizes& dimensions() const {return m_dimensions;} +#define SYCLEXTRFUNCTSLICEOP(CVQual)\ +template <typename StartIndices, typename Sizes, typename XprType, typename Dev>\ +struct FunctorExtractor<TensorEvaluator<CVQual TensorSlicingOp<StartIndices, Sizes, XprType>, Dev> > {\ + FunctorExtractor<TensorEvaluator<XprType, Dev> > xprExpr;\ + const StartIndices m_offsets;\ + const Sizes m_dimensions;\ + FunctorExtractor(const TensorEvaluator<CVQual TensorSlicingOp<StartIndices, Sizes, XprType>, Dev>& expr)\ + : xprExpr(expr.impl()), m_offsets(expr.startIndices()), m_dimensions(expr.dimensions()) {}\ + EIGEN_STRONG_INLINE const StartIndices& startIndices() const {return m_offsets;}\ + EIGEN_STRONG_INLINE const Sizes& dimensions() const {return m_dimensions;}\ }; -template <typename StartIndices, typename Sizes, typename XprType, typename Dev> -struct FunctorExtractor<TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, XprType>, Dev> > -:FunctorExtractor<TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, XprType>, Dev> > {}; +SYCLEXTRFUNCTSLICEOP(const) +SYCLEXTRFUNCTSLICEOP() +#undef SYCLEXTRFUNCTSLICEOP + +// Had to separate reshapeOP otherwise it will be mistaken by UnaryCategory +#define SYCLRESHAPEANDSHUFFLEOPFUNCEXT(OPEXPR, FUNCCALL, CVQual)\ +template<typename Param, typename XprType, typename Dev>\ +struct FunctorExtractor<Eigen::TensorEvaluator<CVQual Eigen::OPEXPR<Param, XprType>, Dev> > {\ + FunctorExtractor<Eigen::TensorEvaluator<XprType, Dev> > xprExpr;\ + const Param m_param;\ + EIGEN_STRONG_INLINE const Param& param() const { return m_param; }\ + FunctorExtractor(const Eigen::TensorEvaluator<CVQual Eigen::OPEXPR<Param, XprType>, Dev>& expr)\ + : xprExpr(expr.impl()), m_param(expr.FUNCCALL) {}\ +}; + +SYCLRESHAPEANDSHUFFLEOPFUNCEXT(TensorReshapingOp, dimensions(), const) +SYCLRESHAPEANDSHUFFLEOPFUNCEXT(TensorReshapingOp, dimensions(), ) + +SYCLRESHAPEANDSHUFFLEOPFUNCEXT(TensorShufflingOp, shufflePermutation(), const) +SYCLRESHAPEANDSHUFFLEOPFUNCEXT(TensorShufflingOp, shufflePermutation(), ) +#undef SYCLRESHAPEOPEXPR + /// template deduction function for FunctorExtractor template <typename Evaluator> auto inline extractFunctors(const Evaluator& evaluator)-> FunctorExtractor<Evaluator> { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h index 54d2a8bdd..a548aab29 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h @@ -44,77 +44,97 @@ struct CategoryCount<Arg,Args...>{ }; /// specialisation of the \ref LeafCount struct when the node type is const TensorMap -template <typename PlainObjectType, int Options_, template <class> class MakePointer_> -struct LeafCount<const TensorMap<PlainObjectType, Options_, MakePointer_> > { - static const size_t Count =1; +#define SYCLTENSORMAPLEAFCOUNT(CVQual)\ +template <typename PlainObjectType, int Options_, template <class> class MakePointer_>\ +struct LeafCount<CVQual TensorMap<PlainObjectType, Options_, MakePointer_> > {\ + static const size_t Count =1;\ }; -/// specialisation of the \ref LeafCount struct when the node type is TensorMap -template <typename PlainObjectType, int Options_, template <class> class MakePointer_> -struct LeafCount<TensorMap<PlainObjectType, Options_, MakePointer_> > :LeafCount<const TensorMap<PlainObjectType, Options_, MakePointer_> >{}; +SYCLTENSORMAPLEAFCOUNT(const) +SYCLTENSORMAPLEAFCOUNT() +#undef SYCLTENSORMAPLEAFCOUNT -// const TensorCwiseUnaryOp, const TensorCwiseNullaryOp, const TensorCwiseBinaryOp, const TensorCwiseTernaryOp, and Const TensorBroadcastingOp -template <template <class, class...> class CategoryExpr, typename OP, typename... RHSExpr> -struct LeafCount<const CategoryExpr<OP, RHSExpr...> >: CategoryCount<RHSExpr...> {}; -// TensorCwiseUnaryOp, TensorCwiseNullaryOp, TensorCwiseBinaryOp, TensorCwiseTernaryOp, and TensorBroadcastingOp -template <template <class, class...> class CategoryExpr, typename OP, typename... RHSExpr> -struct LeafCount<CategoryExpr<OP, RHSExpr...> > :LeafCount<const CategoryExpr<OP, RHSExpr...> >{}; +// TensorCwiseUnaryOp, TensorCwiseNullaryOp, TensorCwiseBinaryOp, TensorCwiseTernaryOp, and TensorBroadcastingOp +#define SYCLCATEGORYLEAFCOUNT(CVQual)\ +template <template <class, class...> class CategoryExpr, typename OP, typename... RHSExpr>\ +struct LeafCount<CVQual CategoryExpr<OP, RHSExpr...> >: CategoryCount<RHSExpr...> {}; + +SYCLCATEGORYLEAFCOUNT(const) +SYCLCATEGORYLEAFCOUNT() +#undef SYCLCATEGORYLEAFCOUNT /// specialisation of the \ref LeafCount struct when the node type is const TensorSelectOp is an exception -template <typename IfExpr, typename ThenExpr, typename ElseExpr> -struct LeafCount<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr> > : CategoryCount<IfExpr, ThenExpr, ElseExpr> {}; -/// specialisation of the \ref LeafCount struct when the node type is TensorSelectOp -template <typename IfExpr, typename ThenExpr, typename ElseExpr> -struct LeafCount<TensorSelectOp<IfExpr, ThenExpr, ElseExpr> >: LeafCount<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr> > {}; +#define SYCLSELECTOPLEAFCOUNT(CVQual)\ +template <typename IfExpr, typename ThenExpr, typename ElseExpr>\ +struct LeafCount<CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr> > : CategoryCount<IfExpr, ThenExpr, ElseExpr> {}; + +SYCLSELECTOPLEAFCOUNT(const) +SYCLSELECTOPLEAFCOUNT() +#undef SYCLSELECTOPLEAFCOUNT -/// specialisation of the \ref LeafCount struct when the node type is const TensorAssignOp -template <typename LHSExpr, typename RHSExpr> -struct LeafCount<const TensorAssignOp<LHSExpr, RHSExpr> >: CategoryCount<LHSExpr,RHSExpr> {}; +/// specialisation of the \ref LeafCount struct when the node type is TensorAssignOp +#define SYCLLEAFCOUNTASSIGNOP(CVQual)\ +template <typename LHSExpr, typename RHSExpr>\ +struct LeafCount<CVQual TensorAssignOp<LHSExpr, RHSExpr> >: CategoryCount<LHSExpr,RHSExpr> {}; -/// specialisation of the \ref LeafCount struct when the node type is -/// TensorAssignOp is an exception. It is not the same as Unary -template <typename LHSExpr, typename RHSExpr> -struct LeafCount<TensorAssignOp<LHSExpr, RHSExpr> > :LeafCount<const TensorAssignOp<LHSExpr, RHSExpr> >{}; +SYCLLEAFCOUNTASSIGNOP(const) +SYCLLEAFCOUNTASSIGNOP() +#undef SYCLLEAFCOUNTASSIGNOP /// specialisation of the \ref LeafCount struct when the node type is const TensorForcedEvalOp -template <typename Expr> -struct LeafCount<const TensorForcedEvalOp<Expr> > { - static const size_t Count =1; +#define SYCLFORCEDEVALLEAFCOUNT(CVQual)\ +template <typename Expr>\ +struct LeafCount<CVQual TensorForcedEvalOp<Expr> > {\ + static const size_t Count =1;\ }; -/// specialisation of the \ref LeafCount struct when the node type is TensorForcedEvalOp -template <typename Expr> -struct LeafCount<TensorForcedEvalOp<Expr> >: LeafCount<const TensorForcedEvalOp<Expr> > {}; +SYCLFORCEDEVALLEAFCOUNT(const) +SYCLFORCEDEVALLEAFCOUNT() +#undef SYCLFORCEDEVALLEAFCOUNT -/// specialisation of the \ref LeafCount struct when the node type is const TensorEvalToOp -template <typename Expr> -struct LeafCount<const TensorEvalToOp<Expr> > { - static const size_t Count = 1 + CategoryCount<Expr>::Count; +/// specialisation of the \ref LeafCount struct when the node type is TensorEvalToOp +#define EVALTOLEAFCOUNT(CVQual)\ +template <typename Expr>\ +struct LeafCount<CVQual TensorEvalToOp<Expr> > {\ + static const size_t Count = 1 + CategoryCount<Expr>::Count;\ }; +EVALTOLEAFCOUNT(const) +EVALTOLEAFCOUNT() +#undef EVALTOLEAFCOUNT + /// specialisation of the \ref LeafCount struct when the node type is const TensorReductionOp -template <typename OP, typename Dim, typename Expr> -struct LeafCount<const TensorReductionOp<OP, Dim, Expr> > { - static const size_t Count =1; +#define REDUCTIONLEAFCOUNT(CVQual)\ +template <typename OP, typename Dim, typename Expr>\ +struct LeafCount<CVQual TensorReductionOp<OP, Dim, Expr> > {\ + static const size_t Count =1;\ }; -/// specialisation of the \ref LeafCount struct when the node type is TensorReductionOp -template <typename OP, typename Dim, typename Expr> -struct LeafCount<TensorReductionOp<OP, Dim, Expr> >: LeafCount<const TensorReductionOp<OP, Dim, Expr> >{}; +REDUCTIONLEAFCOUNT(const) +REDUCTIONLEAFCOUNT() +#undef REDUCTIONLEAFCOUNT -/// specialisation of the \ref LeafCount struct when the node type is const TensorSlicingOp -template <typename StartIndices, typename Sizes, typename XprType> -struct LeafCount<const TensorSlicingOp<StartIndices, Sizes, XprType> >:CategoryCount<XprType>{}; +/// specialisation of the \ref LeafCount struct when the node type is TensorSlicingOp +#define SLICEOPLEAFCOUNT(CVQual)\ +template <typename StartIndices, typename Sizes, typename XprType>\ +struct LeafCount<CVQual TensorSlicingOp<StartIndices, Sizes, XprType> >:CategoryCount<XprType>{}; -/// specialisation of the \ref LeafCount struct when the node type is TensorSlicingOp -template <typename StartIndices, typename Sizes, typename XprType> -struct LeafCount<TensorSlicingOp<StartIndices, Sizes, XprType> > -: LeafCount<const TensorSlicingOp<StartIndices, Sizes, XprType> >{}; +SLICEOPLEAFCOUNT(const) +SLICEOPLEAFCOUNT() +#undef SLICEOPLEAFCOUNT + +#define RESHAPEANDSHUFFLELEAFCOUNT(OPEXPR, CVQual)\ +template<typename Param, typename XprType>\ +struct LeafCount<CVQual OPEXPR<Param, XprType> >:CategoryCount<XprType>{}; + +RESHAPEANDSHUFFLELEAFCOUNT(TensorReshapingOp, const) +RESHAPEANDSHUFFLELEAFCOUNT(TensorReshapingOp, ) + +RESHAPEANDSHUFFLELEAFCOUNT(TensorShufflingOp, const) +RESHAPEANDSHUFFLELEAFCOUNT(TensorShufflingOp, ) +#undef RESHAPEANDSHUFFLELEAFCOUNT -/// specialisation of the \ref LeafCount struct when the node type is TensorEvalToOp -template <typename Expr> -struct LeafCount<TensorEvalToOp<Expr> >: LeafCount<const TensorEvalToOp<Expr> >{}; } /// namespace TensorSycl } /// namespace internal diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h index 5a2df7807..bb042ade2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h @@ -180,6 +180,18 @@ SLICEOPEXPR(const) SLICEOPEXPR() #undef SLICEOPEXPR +#define RESHAPEANDSHUFFLEOPPLH(OPEXP , CVQual)\ +template<typename Param, typename XprType, size_t N>\ +struct PlaceHolderExpression<CVQual OPEXP<Param, XprType>, N > {\ + typedef CVQual OPEXP<Param, typename CalculateIndex<N, XprType>::ArgType> Type;\ +}; + +RESHAPEANDSHUFFLEOPPLH(TensorReshapingOp, const) +RESHAPEANDSHUFFLEOPPLH(TensorReshapingOp, ) + +RESHAPEANDSHUFFLEOPPLH(TensorShufflingOp, const) +RESHAPEANDSHUFFLEOPPLH(TensorShufflingOp,) +#undef RESHAPEANDSHUFFLEOPPLH /// template deduction for \ref PlaceHolderExpression struct template <typename Expr> diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h index 306250f7c..f259f03c4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h @@ -54,7 +54,7 @@ void run(Expr &expr, Dev &dev) { } }); }); - dev.sycl_queue().throw_asynchronous(); + dev.synchronize(); } evaluator.cleanup(); |