From 577ce78085d2e09675abb5976ab3026235de8eec Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Tue, 29 Nov 2016 15:30:42 +0000 Subject: Adding TensorShuffling backend for sycl; adding TensorReshaping backend for sycl; cleaning up the sycl backend. --- .../Eigen/CXX11/src/Tensor/TensorDeviceSycl.h | 19 +- unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h | 4 +- .../Eigen/CXX11/src/Tensor/TensorReductionSycl.h | 6 +- .../Eigen/CXX11/src/Tensor/TensorShuffling.h | 10 +- .../Tensor/TensorSyclConvertToDeviceExpression.h | 14 ++ .../CXX11/src/Tensor/TensorSyclExprConstructor.h | 25 ++- .../CXX11/src/Tensor/TensorSyclExtractAccessor.h | 235 +++++++++++---------- .../CXX11/src/Tensor/TensorSyclExtractFunctors.h | 222 ++++++++++--------- .../Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h | 120 ++++++----- .../CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h | 12 ++ unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h | 2 +- 11 files changed, 382 insertions(+), 287 deletions(-) (limited to 'unsupported/Eigen') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h index c0d94b4eb..bcaf542e2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h @@ -31,7 +31,6 @@ auto get_sycl_supported_devices()->decltype(cl::sycl::device::get_devices()){ ++it; } } - printf("Device size %ld\n", devices.size()); return devices; } #define ConvertToActualTypeSycl(T, buf_acc) reinterpret_cast::pointer_t>((&(*buf_acc.get_pointer()))) @@ -93,11 +92,6 @@ struct QueueInterface { } } - EIGEN_STRONG_INLINE void deallocate_all() const { - std::lock_guard lock(mutex_); - buffer_map.clear(); - } - EIGEN_STRONG_INLINE std::map>::iterator find_buffer(const void* ptr) const { std::lock_guard lock(mutex_); auto it1 = buffer_map.find(static_cast(ptr)); @@ -118,10 +112,11 @@ struct QueueInterface { // underlying stream device. EIGEN_STRONG_INLINE bool ok() const { if (!exception_caught_) { - m_queue.throw_asynchronous(); + m_queue.wait_and_throw(); } return !exception_caught_; } + // destructor ~QueueInterface() { buffer_map.clear(); } }; @@ -186,7 +181,7 @@ struct SyclDevice { auto dst_acc =it2->second.template get_access(cgh); cgh.parallel_for(cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), TensorSycl::internal::MemCopyFunctor(src_acc, dst_acc, rng, 0, offset)); }); - sycl_queue().throw_asynchronous(); + synchronize(); } /// The memcpyHostToDevice is used to copy the device only pointer to a host pointer. Using the device @@ -217,7 +212,7 @@ struct SyclDevice { auto dst_acc =dest_buf.template get_access(cgh); cgh.parallel_for( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), TensorSycl::internal::MemCopyFunctor(src_acc, dst_acc, rng, 0, offset)); }); - sycl_queue().throw_asynchronous(); + synchronize(); } /// returning the sycl queue EIGEN_STRONG_INLINE cl::sycl::queue& sycl_queue() const { return m_queue_stream->m_queue;} @@ -235,13 +230,13 @@ struct SyclDevice { } }); }); - sycl_queue().throw_asynchronous(); + synchronize(); } /// No need for sycl it should act the same as CPU version EIGEN_STRONG_INLINE int majorDeviceVersion() const { return 1; } - /// There is no need to synchronise the buffer in sycl as it is automatically handled by sycl runtime scheduler. + EIGEN_STRONG_INLINE void synchronize() const { - sycl_queue().wait_and_throw(); + sycl_queue().wait_and_throw(); //pass } // This function checks if the runtime recorded an error for the // underlying stream device. diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index 615559d44..25ce471f9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -168,12 +168,12 @@ template struct IndexPair { #ifdef EIGEN_HAS_SFINAE namespace internal { - template + template EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE array customIndices2Array(IndexType& idx, numeric_list) { return { idx[Is]... }; } - template + template EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE array customIndices2Array(IndexType&, numeric_list) { return array(); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h index 00f8b70ed..abb8420a6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h @@ -81,7 +81,7 @@ static void run(BufferTOut& bufOut, BufferTIn& bufI, const Eigen::SyclDevice& de }); }; dev.sycl_queue().submit(f); - dev.sycl_queue().throw_asynchronous(); + dev.synchronize(); /* At this point, you could queue::wait_and_throw() to ensure that * errors are caught quickly. However, this would likely impact @@ -173,7 +173,7 @@ struct FullReducer { tmp_global_accessor.get_pointer()[0]+=InnerMostDimReducer::reduce(device_self_evaluator, static_cast(red_factor*(rng)), static_cast(remaining), const_cast(functor)); }); }); - dev.sycl_queue().throw_asynchronous(); + dev.synchronize(); /// This is used to recursively reduce the tmp value to an element of 1; syclGenericBufferReducer::run(out_buffer, temp_global_buffer,dev, GRange, outTileSize); @@ -237,7 +237,7 @@ struct InnerReducer { // } // }); }); - dev.sycl_queue().throw_asynchronous(); + dev.synchronize(); return false; } }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h index 113c060e3..edc9dd3f3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h @@ -117,7 +117,7 @@ struct TensorEvaluator, Device> }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device) + : m_impl(op.expression(), device), m_shuffle(op.shufflePermutation()) { const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); const Shuffle& shuffle = op.shufflePermutation(); @@ -187,6 +187,11 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + // required by sycl + EIGEN_STRONG_INLINE const Shuffle& shufflePermutation() const {return m_shuffle;} + // required by sycl + EIGEN_STRONG_INLINE const TensorEvaluator& impl() const {return m_impl;} + protected: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const { Index inputIndex = 0; @@ -206,11 +211,12 @@ struct TensorEvaluator, Device> return inputIndex + index * m_inputStrides[NumDims - 1]; } } - Dimensions m_dimensions; array m_outputStrides; array m_inputStrides; TensorEvaluator m_impl; + /// required by sycl + Shuffle m_shuffle; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h index 0336c9866..cc13ca963 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h @@ -124,6 +124,20 @@ KERNELBROKERCONVERTSLICEOP(const) KERNELBROKERCONVERTSLICEOP() #undef KERNELBROKERCONVERTSLICEOP + +#define KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(OPEXPR, CVQual)\ +template\ +struct ConvertToDeviceExpression >{\ + typedef CVQual OPEXPR::Type> Type;\ +}; + +KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(TensorReshapingOp, const) +KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(TensorReshapingOp, ) + +KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(TensorShufflingOp, const) +KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(TensorShufflingOp, ) +#undef KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP + } // namespace internal } // namespace TensorSycl } // namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h index 3d3142996..4433fec01 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h @@ -31,7 +31,6 @@ template struct EvalToLHSConstructor { PtrType expr; EvalToLHSConstructor(const utility::tuple::Tuple &t) : expr(ConvertToActualTypeSycl(typename Eigen::internal::remove_all::type, utility::tuple::get(t))) {} - //EvalToLHSConstructor(const utility::tuple::Tuple &t): expr((&(*(utility::tuple::get(t).get_pointer())))) {} }; /// \struct ExprConstructor is used to reconstruct the expression on the device and @@ -57,8 +56,6 @@ CVQual PlaceHolder, N>, Params...>{\ : expr(Type(ConvertToActualTypeSycl(typename Type::Scalar, utility::tuple::get(t)), fd.dimensions())){}\ }; -//: expr(Type((&(*(utility::tuple::get(t).get_pointer()))), fd.dimensions())) {} - TENSORMAP(const) TENSORMAP() @@ -198,7 +195,6 @@ CVQual PlaceHolder, N>, Params...> {\ ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple &t)\ : expr(Type(ConvertToActualTypeSycl(typename Type::Scalar, utility::tuple::get(t)), fd.dimensions())) {}\ }; -//: expr(Type((&(*(utility::tuple::get(t).get_pointer()))), fd.dimensions())) {} FORCEDEVAL(const) FORCEDEVAL() @@ -224,7 +220,6 @@ CVQual PlaceHolder, N>, Params...> {\ ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple &t)\ :expr(Type(ConvertToActualTypeSycl(typename Type::Scalar, utility::tuple::get(t)), fd.dimensions())) {}\ }; -//: expr(Type((&(*(utility::tuple::get(t).get_pointer()))), fd.dimensions())) {} SYCLREDUCTIONEXPR(const) SYCLREDUCTIONEXPR() @@ -249,6 +244,26 @@ SYCLSLICEOPEXPR() #undef SYCLSLICEOPEXPR +#define SYCLRESHAPEANDSHUFFLEOPEXPRCONST(OPEXPR, CVQual)\ +template\ +struct ExprConstructor , CVQual OPEXPR , Params... >{\ + typedef ExprConstructor my_xpr_type;\ + typedef CVQual OPEXPR Type ;\ + my_xpr_type xprExpr;\ + Type expr;\ + template \ + ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple &t)\ + : xprExpr(funcD.xprExpr, t), expr(xprExpr.expr, funcD.param()) {}\ +}; + +SYCLRESHAPEANDSHUFFLEOPEXPRCONST(TensorReshapingOp, const) +SYCLRESHAPEANDSHUFFLEOPEXPRCONST(TensorReshapingOp, ) + +SYCLRESHAPEANDSHUFFLEOPEXPRCONST(TensorShufflingOp, const) +SYCLRESHAPEANDSHUFFLEOPEXPRCONST(TensorShufflingOp, ) +#undef SYCLRESHAPEANDSHUFFLEOPEXPRCONST + + /// template deduction for \ref ExprConstructor struct template auto createDeviceExpression(FuncD &funcD, const utility::tuple::Tuple &t) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h index 06e2d5ae0..f5ef05e36 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h @@ -43,172 +43,193 @@ template struct ExtractAccessor; struct AccessorConstructor{ - template static inline auto getTuple(cl::sycl::handler& cgh, Arg eval) + template static inline auto getTuple(cl::sycl::handler& cgh, const Arg& eval) -> decltype(ExtractAccessor::getTuple(cgh, eval)) { return ExtractAccessor::getTuple(cgh, eval); } - template static inline auto getTuple(cl::sycl::handler& cgh, Arg1 eval1, Arg2 eval2) + template static inline auto getTuple(cl::sycl::handler& cgh, const Arg1& eval1, const Arg2& eval2) -> decltype(utility::tuple::append(ExtractAccessor::getTuple(cgh, eval1), ExtractAccessor::getTuple(cgh, eval2))) { return utility::tuple::append(ExtractAccessor::getTuple(cgh, eval1), ExtractAccessor::getTuple(cgh, eval2)); } - template static inline auto getTuple(cl::sycl::handler& cgh, Arg1 eval1 , Arg2 eval2 , Arg3 eval3) + template static inline auto getTuple(cl::sycl::handler& cgh, const Arg1& eval1 , const Arg2& eval2 , const Arg3& eval3) -> decltype(utility::tuple::append(ExtractAccessor::getTuple(cgh, eval1),utility::tuple::append(ExtractAccessor::getTuple(cgh, eval2), ExtractAccessor::getTuple(cgh, eval3)))) { return utility::tuple::append(ExtractAccessor::getTuple(cgh, eval1),utility::tuple::append(ExtractAccessor::getTuple(cgh, eval2), ExtractAccessor::getTuple(cgh, eval3))); } - template< cl::sycl::access::mode AcM, typename Arg> static inline auto getAccessor(cl::sycl::handler& cgh, Arg eval) + template< cl::sycl::access::mode AcM, typename Arg> static inline auto getAccessor(cl::sycl::handler& cgh, const Arg& eval) -> decltype(utility::tuple::make_tuple( eval.device().template get_sycl_accessor(cgh,eval.data()))){ return utility::tuple::make_tuple(eval.device().template get_sycl_accessor(cgh,eval.data())); } }; /// specialisation of the \ref ExtractAccessor struct when the node type is -/// const TensorCwiseNullaryOp, const TensorCwiseUnaryOp and const TensorBroadcastingOp -template class UnaryCategory, typename OP, typename RHSExpr, typename Dev> -struct ExtractAccessor, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev> eval) - -> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){ - return AccessorConstructor::getTuple(cgh, eval.impl()); - } +/// TensorCwiseNullaryOp, TensorCwiseUnaryOp and TensorBroadcastingOp +#define SYCLUNARYCATEGORYEXTACC(CVQual)\ +template class UnaryCategory, typename OP, typename RHSExpr, typename Dev>\ +struct ExtractAccessor, Dev> > {\ + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev>& eval)\ + -> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){\ + return AccessorConstructor::getTuple(cgh, eval.impl());\ + }\ }; -/// specialisation of the \ref ExtractAccessor struct when the node type is TensorCwiseNullaryOp, TensorCwiseUnaryOp and TensorBroadcastingOp -template class UnaryCategory, typename OP, typename RHSExpr, typename Dev> -struct ExtractAccessor, Dev> > -: ExtractAccessor, Dev> > {}; - -/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorCwiseBinaryOp -template class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev> -struct ExtractAccessor, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev> eval) - -> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){ - return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl()); - } -}; +SYCLUNARYCATEGORYEXTACC(const) +SYCLUNARYCATEGORYEXTACC() +#undef SYCLUNARYCATEGORYEXTACC + + /// specialisation of the \ref ExtractAccessor struct when the node type is TensorCwiseBinaryOp -template class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev> -struct ExtractAccessor, Dev> > -: ExtractAccessor, Dev> >{}; +#define SYCLBINARYCATEGORYEXTACC(CVQual)\ +template class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev>\ +struct ExtractAccessor, Dev> > {\ + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev>& eval)\ + -> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){\ + return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl());\ + }\ +}; + +SYCLBINARYCATEGORYEXTACC(const) +SYCLBINARYCATEGORYEXTACC() +#undef SYCLBINARYCATEGORYEXTACC /// specialisation of the \ref ExtractAccessor struct when the node type is /// const TensorCwiseTernaryOp -template class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev> -struct ExtractAccessor, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev> eval) - -> decltype(AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl())){ - return AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl()); - } +#define SYCLTERNARYCATEGORYEXTACC(CVQual)\ +template class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev>\ +struct ExtractAccessor, Dev> > {\ + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev>& eval)\ + -> decltype(AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl())){\ + return AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl());\ + }\ }; -/// specialisation of the \ref ExtractAccessor struct when the node type is TensorCwiseTernaryOp -template class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev> -struct ExtractAccessor, Dev> > -: ExtractAccessor, Dev> >{}; +SYCLTERNARYCATEGORYEXTACC(const) +SYCLTERNARYCATEGORYEXTACC() +#undef SYCLTERNARYCATEGORYEXTACC -/// specialisation of the \ref ExtractAccessor struct when the node type is -/// const TensorCwiseSelectOp. This is a special case where there is no OP -template -struct ExtractAccessor, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev> eval) - -> decltype(AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl())){ - return AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl()); - } -}; /// specialisation of the \ref ExtractAccessor struct when the node type is /// TensorCwiseSelectOp. This is a special case where there is no OP -template -struct ExtractAccessor, Dev> > -: ExtractAccessor, Dev> >{}; - -/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorAssignOp -template -struct ExtractAccessor, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev> eval) - -> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){ - return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl()); - } +#define SYCLSELECTOPEXTACC(CVQual)\ +template \ +struct ExtractAccessor, Dev> > {\ + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev>& eval)\ + -> decltype(AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl())){\ + return AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl());\ + }\ }; +SYCLSELECTOPEXTACC(const) +SYCLSELECTOPEXTACC() +#undef SYCLSELECTOPEXTACC + /// specialisation of the \ref ExtractAccessor struct when the node type is TensorAssignOp -template -struct ExtractAccessor, Dev> > -: ExtractAccessor, Dev> >{}; +#define SYCLTENSORASSIGNOPEXTACC(CVQual)\ +template \ +struct ExtractAccessor, Dev> > {\ + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev>& eval)\ + -> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){\ + return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl());\ + }\ +}; + + SYCLTENSORASSIGNOPEXTACC(const) + SYCLTENSORASSIGNOPEXTACC() + #undef SYCLTENSORASSIGNOPEXTACC /// specialisation of the \ref ExtractAccessor struct when the node type is const TensorMap #define TENSORMAPEXPR(CVQual, ACCType)\ template \ struct ExtractAccessor, Dev> > {\ - static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator, Dev> eval)\ + static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator, Dev>& eval)\ -> decltype(AccessorConstructor::template getAccessor(cgh, eval)){\ return AccessorConstructor::template getAccessor(cgh, eval);\ }\ }; + TENSORMAPEXPR(const, cl::sycl::access::mode::read) TENSORMAPEXPR(, cl::sycl::access::mode::read_write) #undef TENSORMAPEXPR -/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorForcedEvalOp -template -struct ExtractAccessor, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev> eval) - -> decltype(AccessorConstructor::template getAccessor(cgh, eval)){ - return AccessorConstructor::template getAccessor(cgh, eval); - } -}; - /// specialisation of the \ref ExtractAccessor struct when the node type is TensorForcedEvalOp -template -struct ExtractAccessor, Dev> > -: ExtractAccessor, Dev> >{}; - -/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorEvalToOp -template -struct ExtractAccessor, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator, Dev> eval) - -> decltype(utility::tuple::append(AccessorConstructor::template getAccessor(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl()))){ - return utility::tuple::append(AccessorConstructor::template getAccessor(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl())); - } +#define SYCLFORCEDEVALEXTACC(CVQual)\ +template \ +struct ExtractAccessor, Dev> > {\ + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev>& eval)\ + -> decltype(AccessorConstructor::template getAccessor(cgh, eval)){\ + return AccessorConstructor::template getAccessor(cgh, eval);\ + }\ }; +SYCLFORCEDEVALEXTACC(const) +SYCLFORCEDEVALEXTACC() +#undef SYCLFORCEDEVALEXTACC + + /// specialisation of the \ref ExtractAccessor struct when the node type is TensorEvalToOp -template -struct ExtractAccessor, Dev> > -: ExtractAccessor, Dev> >{}; - -/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorReductionOp -template -struct ExtractAccessor, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev> eval) - -> decltype(AccessorConstructor::template getAccessor(cgh, eval)){ - return AccessorConstructor::template getAccessor(cgh, eval); - } +#define SYCLEVALTOEXTACC(CVQual)\ +template \ +struct ExtractAccessor, Dev> > {\ + static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator, Dev>& eval)\ + -> decltype(utility::tuple::append(AccessorConstructor::template getAccessor(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl()))){\ + return utility::tuple::append(AccessorConstructor::template getAccessor(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl()));\ + }\ }; +SYCLEVALTOEXTACC(const) +SYCLEVALTOEXTACC() +#undef SYCLEVALTOEXTACC + /// specialisation of the \ref ExtractAccessor struct when the node type is TensorReductionOp -template -struct ExtractAccessor, Dev> > -: ExtractAccessor, Dev> >{}; +#define SYCLREDUCTIONEXTACC(CVQual)\ +template \ +struct ExtractAccessor, Dev> > {\ + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev>& eval)\ + -> decltype(AccessorConstructor::template getAccessor(cgh, eval)){\ + return AccessorConstructor::template getAccessor(cgh, eval);\ + }\ +}; +SYCLREDUCTIONEXTACC(const) +SYCLREDUCTIONEXTACC() +#undef SYCLREDUCTIONEXTACC /// specialisation of the \ref ExtractAccessor struct when the node type is /// const TensorSlicingOp. This is a special case where there is no OP -template -struct ExtractAccessor, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev> eval) - -> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){ - return AccessorConstructor::getTuple(cgh, eval.impl()); - } +#define SYCLSLICEOPEXTACC(CVQual)\ +template \ +struct ExtractAccessor, Dev> > {\ + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev>& eval)\ + -> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){\ + return AccessorConstructor::getTuple(cgh, eval.impl());\ + }\ +}; + +SYCLSLICEOPEXTACC(const) +SYCLSLICEOPEXTACC() +#undef SYCLSLICEOPEXTACC + +#define RESHAPEANDSHUFFOPEXTRACC(OPEXPR, CVQual)\ +template\ +struct ExtractAccessor, Dev> > {\ + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev>& eval)\ + -> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){\ + return AccessorConstructor::getTuple(cgh, eval.impl());\ + }\ }; +// tensor reshaping +RESHAPEANDSHUFFOPEXTRACC(TensorReshapingOp, const) +RESHAPEANDSHUFFOPEXTRACC(TensorReshapingOp, ) +/// Tensor shuffling +RESHAPEANDSHUFFOPEXTRACC(TensorShufflingOp, const) +RESHAPEANDSHUFFOPEXTRACC(TensorShufflingOp, ) +#undef RESHAPEANDSHUFFOPEXTRACC -template -struct ExtractAccessor, Dev> > -:ExtractAccessor, Dev> >{}; /// template deduction for \ref ExtractAccessor template -auto createTupleOfAccessors(cl::sycl::handler& cgh, const Evaluator& expr) --> decltype(ExtractAccessor::getTuple(cgh, expr)) { - return ExtractAccessor::getTuple(cgh, expr); +auto createTupleOfAccessors(cl::sycl::handler& cgh, const Evaluator& eval) +-> decltype(ExtractAccessor::getTuple(cgh, eval)) { + return ExtractAccessor::getTuple(cgh, eval); } } /// namespace TensorSycl diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h index 382f0cb50..5bc57b59a 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h @@ -36,152 +36,164 @@ namespace internal { template struct FunctorExtractor{ typedef typename Evaluator::Dimensions Dimensions; const Dimensions m_dimensions; - const Dimensions& dimensions() const { return m_dimensions; } + EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } FunctorExtractor(const Evaluator& expr) : m_dimensions(expr.dimensions()) {} }; - /// specialisation of the \ref FunctorExtractor struct when the node type is -/// const TensorCwiseNullaryOp, const TensorCwiseUnaryOp, and const TensorBroadcastingOp -template