diff options
author | Mehdi Goli <mehdi.goli@example.com> | 2016-12-01 13:02:27 +0000 |
---|---|---|
committer | Mehdi Goli <mehdi.goli@example.com> | 2016-12-01 13:02:27 +0000 |
commit | 79aa2b784ecc26d6a8ef6fb2b2b053f4ad81593b (patch) | |
tree | 626e91024c30ad3caa510ca2e06548dbd6ffadce /unsupported/Eigen | |
parent | a70393fd02fb56f432c6258ab1744e6d299797e3 (diff) |
Adding sycl backend for TensorPadding.h; disbaling __unit128 for sycl in TensorIntDiv.h; disabling cashsize for sycl in tensorDeviceDefault.h; adding sycl backend for StrideSliceOP ; removing sycl compiler warning for creating an array of size 0 in CXX11Meta.h; cleaning up the sycl backend code.
Diffstat (limited to 'unsupported/Eigen')
13 files changed, 216 insertions, 80 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h index 9d141395b..ccaaa6cb2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h @@ -45,7 +45,7 @@ struct DefaultDevice { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { -#ifndef __CUDA_ARCH__ +#if !defined(__CUDA_ARCH__) && !defined(__SYCL_DEVICE_ONLY__) // Running on the host CPU return l1CacheSize(); #else @@ -55,7 +55,7 @@ struct DefaultDevice { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { -#ifndef __CUDA_ARCH__ +#if !defined(__CUDA_ARCH__) && !defined(__SYCL_DEVICE_ONLY__) // Running single threaded on the host CPU return l3CacheSize(); #else diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h index 1fd00d4f6..40dd5d81a 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h @@ -17,6 +17,32 @@ namespace Eigen { + #define ConvertToActualTypeSycl(Scalar, buf_acc) reinterpret_cast<typename cl::sycl::global_ptr<Scalar>::pointer_t>((&(*buf_acc.get_pointer()))) + + template <typename Scalar> class MemCopyFunctor { + public: + typedef cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::read, cl::sycl::access::target::global_buffer> read_accessor; + typedef cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::discard_write, cl::sycl::access::target::global_buffer> write_accessor; + + MemCopyFunctor(read_accessor src_acc, write_accessor dst_acc, size_t rng, size_t i, size_t offset): m_src_acc(src_acc), m_dst_acc(dst_acc), m_rng(rng), m_i(i), m_offset(offset) {} + + void operator()(cl::sycl::nd_item<1> itemID) { + auto src_ptr = ConvertToActualTypeSycl(Scalar, m_src_acc); + auto dst_ptr = ConvertToActualTypeSycl(Scalar, m_dst_acc); + auto globalid = itemID.get_global_linear_id(); + if (globalid < m_rng) { + dst_ptr[globalid + m_i] = src_ptr[globalid + m_offset]; + } + } + + private: + read_accessor m_src_acc; + write_accessor m_dst_acc; + size_t m_rng; + size_t m_i; + size_t m_offset; + }; + EIGEN_STRONG_INLINE auto get_sycl_supported_devices()->decltype(cl::sycl::device::get_devices()){ auto devices = cl::sycl::device::get_devices(); std::vector<cl::sycl::device>::iterator it =devices.begin(); @@ -33,7 +59,6 @@ EIGEN_STRONG_INLINE auto get_sycl_supported_devices()->decltype(cl::sycl::device } return devices; } -#define ConvertToActualTypeSycl(T, buf_acc) reinterpret_cast<typename cl::sycl::global_ptr<T>::pointer_t>((&(*buf_acc.get_pointer()))) struct QueueInterface { /// class members: @@ -170,30 +195,6 @@ struct SyclDevice { // some runtime conditions that can be applied here EIGEN_STRONG_INLINE bool isDeviceSuitable() const { return true; } - template <typename T> class MemCopyFunctor { - public: - typedef cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::read, cl::sycl::access::target::global_buffer> read_accessor; - typedef cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::discard_write, cl::sycl::access::target::global_buffer> write_accessor; - - MemCopyFunctor(read_accessor src_acc, write_accessor dst_acc, size_t rng, size_t i, size_t offset): m_src_acc(src_acc), m_dst_acc(dst_acc), m_rng(rng), m_i(i), m_offset(offset) {} - - void operator()(cl::sycl::nd_item<1> itemID) { - auto src_ptr = ConvertToActualTypeSycl(T, m_src_acc); - auto dst_ptr = ConvertToActualTypeSycl(T, m_dst_acc); - auto globalid = itemID.get_global_linear_id(); - if (globalid < m_rng) { - dst_ptr[globalid + m_i] = src_ptr[globalid + m_offset]; - } - } - - private: - read_accessor m_src_acc; - write_accessor m_dst_acc; - size_t m_rng; - size_t m_i; - size_t m_offset; - }; - /// the memcpy function template<typename T> EIGEN_STRONG_INLINE void memcpy(void *dst, const T *src, size_t n) const { auto it1 = m_queue_stream->find_buffer((void*)src); @@ -260,6 +261,17 @@ struct SyclDevice { }); synchronize(); } + + EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { + // FIXME + return 48*1024; + } + + EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { + // We won't try to take advantage of the l2 cache for the time being, and + // there is no l3 cache on cuda devices. + return firstLevelCacheSize(); + } /// No need for sycl it should act the same as CPU version EIGEN_STRONG_INLINE int majorDeviceVersion() const { return 1; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h index eea25ac33..485a082e2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h @@ -124,7 +124,7 @@ namespace { template <typename T> struct DividerHelper<64, T> { static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) { -#if defined(__SIZEOF_INT128__) && !defined(__CUDA_ARCH__) +#if defined(__SIZEOF_INT128__) && !defined(__CUDA_ARCH__) && !defined(__SYCL_DEVICE_ONLY__) return static_cast<uint64_t>((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1); #else const uint64_t shift = 1ULL << log_div; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index 284f29345..d582ccbe1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -723,7 +723,7 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_device(device), m_strides(op.strides()) + : m_impl(op.expression(), device), m_device(device), m_strides(op.strides()), m_exprStartIndices(op.startIndices()), m_exprStopIndices(op.stopIndices()) { // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero DSizes<Index,NumDims> startIndicesClamped, stopIndicesClamped; @@ -828,6 +828,15 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, return NULL; } + //use by sycl + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StartIndices& exprStartIndices() const { return m_exprStartIndices; } + //use by sycl + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StartIndices& exprStopIndices() const { return m_exprStopIndices; } + //use by sycl + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StartIndices& strides() const { return m_strides; } + /// used by sycl + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const{return m_impl;} + protected: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const { @@ -862,6 +871,10 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, DSizes<Index, NumDims> m_offsets; // offset in a flattened shape const Strides m_strides; std::size_t m_block_total_size_max; + //use by sycl + const StartIndices m_exprStartIndices; + //use by sycl + const StopIndices m_exprStopIndices; }; // Eval as lvalue diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h index 647bcf108..a8e255246 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -200,6 +200,13 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + /// used by sycl + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PaddingDimensions& padding() const { return m_padding; } + /// used by sycl + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& padding_value() const { return m_paddingValue; } + /// used by sycl + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const{return m_impl;} + private: EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim( Index index, int dim_index) const { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h index abb8420a6..48c5f9a47 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h @@ -200,9 +200,6 @@ struct InnerReducer<Self, Op, const Eigen::SyclDevice> { /// creating the shared memory for calculating reduction. /// This one is used to collect all the reduced value of shared memory as we dont have global barrier on GPU. Once it is saved we can /// recursively apply reduction on it in order to reduce the whole. - // Dims dims= self.xprDims(); - //Op functor = reducer; - dev.parallel_for_setup(num_coeffs_to_preserve, tileSize, range, GRange); dev.sycl_queue().submit([&](cl::sycl::handler &cgh) { // create a tuple of accessors from Evaluator @@ -214,28 +211,6 @@ struct InnerReducer<Self, Op, const Eigen::SyclDevice> { TensorSycl::internal::ReductionFunctor<HostExpr, PlaceHolderExpr, FunctorExpr, Tuple_of_Acc, Dims, Op, typename Self::Index> (output_accessor, functors, tuple_of_accessors, self.xprDims(), reducer, range)); - - // [=](cl::sycl::nd_item<1> itemID) { - // typedef typename TensorSycl::internal::ConvertToDeviceExpression<const HostExpr>::Type DevExpr; - // auto device_expr = TensorSycl::internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors); - /// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour - /// the first behaviour is when it is used as a root to lauch the sub-kernel. The second one is when it is treated as a leafnode to pass the - /// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here. - // const auto device_self_expr= TensorReductionOp<Op, Dims, decltype(device_expr.expr) ,MakeGlobalPointer>(device_expr.expr, dims, functor); - /// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is - /// the device_evaluator is detectable and recognisable on the device. - // typedef Eigen::TensorEvaluator<decltype(device_self_expr), Eigen::DefaultDevice> DeviceSelf; - // auto device_self_evaluator = Eigen::TensorEvaluator<decltype(device_self_expr), Eigen::DefaultDevice>(device_self_expr, Eigen::DefaultDevice()); - // auto output_accessor_ptr =ConvertToActualTypeSycl(typename DeviceSelf::CoeffReturnType, output_accessor); - /// const cast added as a naive solution to solve the qualifier drop error - // auto globalid=itemID.get_global_linear_id(); - // if (globalid< range) { - // typename DeviceSelf::CoeffReturnType accum = functor.initialize(); - // GenericDimReducer<DeviceSelf::NumReducedDims-1, DeviceSelf, Op>::reduce(device_self_evaluator, device_self_evaluator.firstInput(static_cast<typename DevExpr::Index>(globalid)),const_cast<Op&>(functor), &accum); - // functor.finalize(accum); - // output_accessor_ptr[globalid]= accum; - // } - // }); }); dev.synchronize(); return false; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h index cc13ca963..e940c8a9d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h @@ -125,18 +125,31 @@ KERNELBROKERCONVERTSLICEOP() #undef KERNELBROKERCONVERTSLICEOP -#define KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(OPEXPR, CVQual)\ +#define KERNELBROKERCONVERTERSLICESTRIDEOP(CVQual)\ +template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>\ +struct ConvertToDeviceExpression<CVQual TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >{\ + typedef CVQual TensorStridingSlicingOp<StartIndices, StopIndices, Strides, typename ConvertToDeviceExpression<XprType>::Type> Type;\ +}; + +KERNELBROKERCONVERTERSLICESTRIDEOP(const) +KERNELBROKERCONVERTERSLICESTRIDEOP() +#undef KERNELBROKERCONVERTERSLICESTRIDEOP + +#define KERNELBROKERCONVERTPADDINGANDRESHAPEANDSHUFFLEOP(OPEXPR, CVQual)\ template<typename Param, typename XprType>\ struct ConvertToDeviceExpression<CVQual OPEXPR <Param, XprType> >{\ typedef CVQual OPEXPR<Param, typename ConvertToDeviceExpression<XprType>::Type> Type;\ }; -KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(TensorReshapingOp, const) -KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(TensorReshapingOp, ) +KERNELBROKERCONVERTPADDINGANDRESHAPEANDSHUFFLEOP(TensorPaddingOp, const) +KERNELBROKERCONVERTPADDINGANDRESHAPEANDSHUFFLEOP(TensorPaddingOp, ) + +KERNELBROKERCONVERTPADDINGANDRESHAPEANDSHUFFLEOP(TensorReshapingOp, const) +KERNELBROKERCONVERTPADDINGANDRESHAPEANDSHUFFLEOP(TensorReshapingOp, ) -KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(TensorShufflingOp, const) -KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(TensorShufflingOp, ) -#undef KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP +KERNELBROKERCONVERTPADDINGANDRESHAPEANDSHUFFLEOP(TensorShufflingOp, const) +KERNELBROKERCONVERTPADDINGANDRESHAPEANDSHUFFLEOP(TensorShufflingOp, ) +#undef KERNELBROKERCONVERTPADDINGANDRESHAPEANDSHUFFLEOP } // namespace internal } // namespace TensorSycl diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h index 4433fec01..7b15f93fe 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h @@ -231,7 +231,7 @@ SYCLREDUCTIONEXPR() template<typename StartIndices, typename Sizes, typename OrigXprType, typename XprType, typename... Params>\ struct ExprConstructor<CVQual TensorSlicingOp <StartIndices, Sizes, OrigXprType> , CVQual TensorSlicingOp<StartIndices, Sizes, XprType>, Params... >{\ typedef ExprConstructor<OrigXprType, XprType, Params...> my_xpr_type;\ - typedef CVQual TensorSlicingOp<StartIndices, Sizes, typename my_xpr_type::Type> Type ;\ + typedef CVQual TensorSlicingOp<StartIndices, Sizes, typename my_xpr_type::Type> Type;\ my_xpr_type xprExpr;\ Type expr;\ template <typename FuncDetector>\ @@ -244,6 +244,22 @@ SYCLSLICEOPEXPR() #undef SYCLSLICEOPEXPR +#define SYCLSLICESTRIDEOPEXPR(CVQual)\ +template<typename StartIndices, typename StopIndices, typename Strides, typename OrigXprType, typename XprType, typename... Params>\ +struct ExprConstructor<CVQual TensorStridingSlicingOp<StartIndices, StopIndices, Strides, OrigXprType>, CVQual TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Params... >{\ + typedef ExprConstructor<OrigXprType, XprType, Params...> my_xpr_type;\ + typedef CVQual TensorStridingSlicingOp<StartIndices, StopIndices, Strides, typename my_xpr_type::Type> Type;\ + my_xpr_type xprExpr;\ + Type expr;\ + template <typename FuncDetector>\ + ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\ + : xprExpr(funcD.xprExpr, t), expr(xprExpr.expr, funcD.startIndices(), funcD.stopIndices(),funcD.strides()) {}\ +}; + +SYCLSLICESTRIDEOPEXPR(const) +SYCLSLICESTRIDEOPEXPR() +#undef SYCLSLICESTRIDEOPEXPR + #define SYCLRESHAPEANDSHUFFLEOPEXPRCONST(OPEXPR, CVQual)\ template<typename Param, typename OrigXprType, typename XprType, typename... Params>\ struct ExprConstructor<CVQual OPEXPR <Param, OrigXprType> , CVQual OPEXPR <Param, XprType>, Params... >{\ @@ -263,6 +279,23 @@ SYCLRESHAPEANDSHUFFLEOPEXPRCONST(TensorShufflingOp, const) SYCLRESHAPEANDSHUFFLEOPEXPRCONST(TensorShufflingOp, ) #undef SYCLRESHAPEANDSHUFFLEOPEXPRCONST +#define SYCLPADDINGOPEXPRCONST(OPEXPR, CVQual)\ +template<typename Param, typename OrigXprType, typename XprType, typename... Params>\ +struct ExprConstructor<CVQual OPEXPR <Param, OrigXprType> , CVQual OPEXPR <Param, XprType>, Params... >{\ + typedef ExprConstructor<OrigXprType, XprType, Params...> my_xpr_type;\ + typedef CVQual OPEXPR <Param, typename my_xpr_type::Type> Type ;\ + my_xpr_type xprExpr;\ + Type expr;\ + template <typename FuncDetector>\ + ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\ + : xprExpr(funcD.xprExpr, t), expr(xprExpr.expr, funcD.param() , funcD.scalar_param()) {}\ +}; + +SYCLPADDINGOPEXPRCONST(TensorPaddingOp, const) +SYCLPADDINGOPEXPRCONST(TensorPaddingOp, ) +#undef SYCLPADDINGOPEXPRCONST + + /// template deduction for \ref ExprConstructor struct template <typename OrigExpr, typename IndexExpr, typename FuncD, typename... Params> diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h index f5ef05e36..dc8356cf4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h @@ -209,7 +209,21 @@ SYCLSLICEOPEXTACC(const) SYCLSLICEOPEXTACC() #undef SYCLSLICEOPEXTACC -#define RESHAPEANDSHUFFOPEXTRACC(OPEXPR, CVQual)\ +#define SYCLSLICESTRIDEOPEXTACC(CVQual)\ +template<typename StartIndices, typename StopIndices, typename Strides, typename XprType, typename Dev>\ +struct ExtractAccessor<TensorEvaluator<CVQual TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Dev> >{\ + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Dev>& eval)\ + -> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){\ + return AccessorConstructor::getTuple(cgh, eval.impl());\ + }\ +}; + +SYCLSLICESTRIDEOPEXTACC(const) +SYCLSLICESTRIDEOPEXTACC() +#undef SYCLSLICESTRIDEOPEXTACC + + +#define PADDINGRESHAPEANDSHUFFOPEXTRACC(OPEXPR, CVQual)\ template<typename Param, typename XprType, typename Dev>\ struct ExtractAccessor<TensorEvaluator<CVQual OPEXPR<Param, XprType>, Dev> > {\ static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual OPEXPR<Param, XprType>, Dev>& eval)\ @@ -217,13 +231,17 @@ struct ExtractAccessor<TensorEvaluator<CVQual OPEXPR<Param, XprType>, Dev> > {\ return AccessorConstructor::getTuple(cgh, eval.impl());\ }\ }; + +// tensor padding +PADDINGRESHAPEANDSHUFFOPEXTRACC(TensorPaddingOp, const) +PADDINGRESHAPEANDSHUFFOPEXTRACC(TensorPaddingOp, ) // tensor reshaping -RESHAPEANDSHUFFOPEXTRACC(TensorReshapingOp, const) -RESHAPEANDSHUFFOPEXTRACC(TensorReshapingOp, ) +PADDINGRESHAPEANDSHUFFOPEXTRACC(TensorReshapingOp, const) +PADDINGRESHAPEANDSHUFFOPEXTRACC(TensorReshapingOp, ) /// Tensor shuffling -RESHAPEANDSHUFFOPEXTRACC(TensorShufflingOp, const) -RESHAPEANDSHUFFOPEXTRACC(TensorShufflingOp, ) -#undef RESHAPEANDSHUFFOPEXTRACC +PADDINGRESHAPEANDSHUFFOPEXTRACC(TensorShufflingOp, const) +PADDINGRESHAPEANDSHUFFOPEXTRACC(TensorShufflingOp, ) +#undef PADDINGRESHAPEANDSHUFFOPEXTRACC /// template deduction for \ref ExtractAccessor template <typename Evaluator> diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h index 5bc57b59a..1293b14e2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h @@ -176,6 +176,24 @@ SYCLEXTRFUNCTSLICEOP(const) SYCLEXTRFUNCTSLICEOP() #undef SYCLEXTRFUNCTSLICEOP +#define SYCLEXTRFUNCTSLICESTRIDEOP(CVQual)\ +template<typename StartIndices, typename StopIndices, typename Strides, typename XprType, typename Dev>\ +struct FunctorExtractor<TensorEvaluator<CVQual TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Dev> >{\ + FunctorExtractor<TensorEvaluator<XprType, Dev> > xprExpr;\ + const StartIndices m_startIndices;\ + const StopIndices m_stopIndices;\ + const Strides m_strides;\ + FunctorExtractor(const TensorEvaluator<CVQual TensorStridingSlicingOp<StartIndices, StopIndices,Strides, XprType>, Dev>& expr)\ + : xprExpr(expr.impl()), m_startIndices(expr.exprStartIndices()), m_stopIndices(expr.exprStopIndices()), m_strides(expr.strides()) {}\ + EIGEN_STRONG_INLINE const StartIndices& startIndices() const { return m_startIndices; }\ + EIGEN_STRONG_INLINE const StartIndices& stopIndices() const { return m_stopIndices; }\ + EIGEN_STRONG_INLINE const StartIndices& strides() const { return m_strides; }\ +}; + +SYCLEXTRFUNCTSLICESTRIDEOP(const) +SYCLEXTRFUNCTSLICESTRIDEOP() +#undef SYCLEXTRFUNCTSLICESTRIDEOP + // Had to separate reshapeOP otherwise it will be mistaken by UnaryCategory #define SYCLRESHAPEANDSHUFFLEOPFUNCEXT(OPEXPR, FUNCCALL, CVQual)\ template<typename Param, typename XprType, typename Dev>\ @@ -192,7 +210,25 @@ SYCLRESHAPEANDSHUFFLEOPFUNCEXT(TensorReshapingOp, dimensions(), ) SYCLRESHAPEANDSHUFFLEOPFUNCEXT(TensorShufflingOp, shufflePermutation(), const) SYCLRESHAPEANDSHUFFLEOPFUNCEXT(TensorShufflingOp, shufflePermutation(), ) -#undef SYCLRESHAPEOPEXPR +#undef SYCLRESHAPEANDSHUFFLEOPFUNCEXT + +// Had to separate reshapeOP otherwise it will be mistaken by UnaryCategory +#define PADDINGOPFUNCEXT(OPEXPR, FUNCCALL, SCALARFUNCCALL, CVQual)\ +template<typename Param, typename XprType, typename Dev>\ +struct FunctorExtractor<Eigen::TensorEvaluator<CVQual Eigen::OPEXPR<Param, XprType>, Dev> > {\ + FunctorExtractor<Eigen::TensorEvaluator<XprType, Dev> > xprExpr;\ + const Param m_param;\ + typedef typename Eigen::TensorEvaluator<CVQual Eigen::OPEXPR<Param, XprType>, Dev>::Scalar Scalar;\ + const Scalar m_scalar_param;\ + EIGEN_STRONG_INLINE const Param& param() const { return m_param; }\ + EIGEN_STRONG_INLINE const Scalar& scalar_param() const { return m_scalar_param; }\ + FunctorExtractor(const Eigen::TensorEvaluator<CVQual Eigen::OPEXPR<Param, XprType>, Dev>& expr)\ + : xprExpr(expr.impl()), m_param(expr.FUNCCALL), m_scalar_param(expr.SCALARFUNCCALL) {}\ +}; + +PADDINGOPFUNCEXT(TensorPaddingOp, padding(), padding_value(), const) +PADDINGOPFUNCEXT(TensorPaddingOp, padding(), padding_value(), ) +#undef PADDINGOPFUNCEXT /// template deduction function for FunctorExtractor template <typename Evaluator> diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h index a548aab29..5d392218e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h @@ -124,17 +124,27 @@ SLICEOPLEAFCOUNT(const) SLICEOPLEAFCOUNT() #undef SLICEOPLEAFCOUNT -#define RESHAPEANDSHUFFLELEAFCOUNT(OPEXPR, CVQual)\ +#define SLICESTRIDEOPLEAFCOUNT(CVQual)\ +template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>\ +struct LeafCount<CVQual TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >:CategoryCount<XprType>{}; + +SLICESTRIDEOPLEAFCOUNT(const) +SLICESTRIDEOPLEAFCOUNT() +#undef SLICESTRIDEOPLEAFCOUNT + +#define PADDINGRESHAPEANDSHUFFLELEAFCOUNT(OPEXPR, CVQual)\ template<typename Param, typename XprType>\ struct LeafCount<CVQual OPEXPR<Param, XprType> >:CategoryCount<XprType>{}; -RESHAPEANDSHUFFLELEAFCOUNT(TensorReshapingOp, const) -RESHAPEANDSHUFFLELEAFCOUNT(TensorReshapingOp, ) +PADDINGRESHAPEANDSHUFFLELEAFCOUNT(TensorPaddingOp, const) +PADDINGRESHAPEANDSHUFFLELEAFCOUNT(TensorPaddingOp, ) -RESHAPEANDSHUFFLELEAFCOUNT(TensorShufflingOp, const) -RESHAPEANDSHUFFLELEAFCOUNT(TensorShufflingOp, ) -#undef RESHAPEANDSHUFFLELEAFCOUNT +PADDINGRESHAPEANDSHUFFLELEAFCOUNT(TensorReshapingOp, const) +PADDINGRESHAPEANDSHUFFLELEAFCOUNT(TensorReshapingOp, ) +PADDINGRESHAPEANDSHUFFLELEAFCOUNT(TensorShufflingOp, const) +PADDINGRESHAPEANDSHUFFLELEAFCOUNT(TensorShufflingOp, ) +#undef PADDINGRESHAPEANDSHUFFLELEAFCOUNT } /// namespace TensorSycl } /// namespace internal diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h index bb042ade2..e1dbd0c6c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h @@ -180,18 +180,32 @@ SLICEOPEXPR(const) SLICEOPEXPR() #undef SLICEOPEXPR -#define RESHAPEANDSHUFFLEOPPLH(OPEXP , CVQual)\ + +#define SYCLSLICESTRIDEOPPLH(CVQual)\ +template<typename StartIndices, typename StopIndices, typename Strides, typename XprType, size_t N>\ +struct PlaceHolderExpression<CVQual TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, N> {\ + typedef CVQual TensorStridingSlicingOp<StartIndices, StopIndices, Strides, typename CalculateIndex<N, XprType>::ArgType> Type;\ +}; + +SYCLSLICESTRIDEOPPLH(const) +SYCLSLICESTRIDEOPPLH() +#undef SYCLSLICESTRIDEOPPLH + +#define PADDINGRESHAPEANDSHUFFLEOPPLH(OPEXP , CVQual)\ template<typename Param, typename XprType, size_t N>\ struct PlaceHolderExpression<CVQual OPEXP<Param, XprType>, N > {\ typedef CVQual OPEXP<Param, typename CalculateIndex<N, XprType>::ArgType> Type;\ }; -RESHAPEANDSHUFFLEOPPLH(TensorReshapingOp, const) -RESHAPEANDSHUFFLEOPPLH(TensorReshapingOp, ) +PADDINGRESHAPEANDSHUFFLEOPPLH(TensorPaddingOp, const) +PADDINGRESHAPEANDSHUFFLEOPPLH(TensorPaddingOp,) + +PADDINGRESHAPEANDSHUFFLEOPPLH(TensorReshapingOp, const) +PADDINGRESHAPEANDSHUFFLEOPPLH(TensorReshapingOp, ) -RESHAPEANDSHUFFLEOPPLH(TensorShufflingOp, const) -RESHAPEANDSHUFFLEOPPLH(TensorShufflingOp,) -#undef RESHAPEANDSHUFFLEOPPLH +PADDINGRESHAPEANDSHUFFLEOPPLH(TensorShufflingOp, const) +PADDINGRESHAPEANDSHUFFLEOPPLH(TensorShufflingOp,) +#undef PADDINGRESHAPEANDSHUFFLEOPPLH /// template deduction for \ref PlaceHolderExpression struct template <typename Expr> diff --git a/unsupported/Eigen/CXX11/src/util/CXX11Meta.h b/unsupported/Eigen/CXX11/src/util/CXX11Meta.h index 197fddab6..e7c1a1bae 100644 --- a/unsupported/Eigen/CXX11/src/util/CXX11Meta.h +++ b/unsupported/Eigen/CXX11/src/util/CXX11Meta.h @@ -49,6 +49,11 @@ struct numeric_list { static constexpr std::size_t count = sizeof...(nn); const T values[count] = {nn...}; }; +template<typename T> +struct numeric_list<T>{ + static constexpr std::size_t count = 0; + //Array of size zero strictly forbiden in ISO C++ +}; #endif |