diff options
author | Mehdi Goli <mehdi.goli@codeplay.com> | 2016-11-04 18:18:19 +0000 |
---|---|---|
committer | Mehdi Goli <mehdi.goli@codeplay.com> | 2016-11-04 18:18:19 +0000 |
commit | 0ebe3808ca8b2c96d9d77024ba8d4d0bdfb7e23c (patch) | |
tree | 1358b27b6a27cb89b3665016ec651f6081babfef /unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h | |
parent | 0585b2965d06cc2c57be35844bd2d0d56e6334ac (diff) |
Removed the sycl include from Eigen/Core and moved it to Unsupported/Eigen/CXX11/Tensor; added TensorReduction for sycl (full reduction and partial reduction); added TensorReduction test case for sycl (full reduction and partial reduction); fixed the tile size on TensorSyclRun.h based on the device max work group size;
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h | 81 |
1 files changed, 59 insertions, 22 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index d34ff98b0..367bccf63 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -2,6 +2,7 @@ // for linear algebra. // // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// Copyright (C) 2016 Mehdi Goli, Codeplay Software Ltd <eigen@codeplay.com> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -20,8 +21,8 @@ namespace Eigen { */ namespace internal { -template<typename Op, typename Dims, typename XprType> -struct traits<TensorReductionOp<Op, Dims, XprType> > + template<typename Op, typename Dims, typename XprType,template <class> class MakePointer_ > + struct traits<TensorReductionOp<Op, Dims, XprType, MakePointer_> > : traits<XprType> { typedef traits<XprType> XprTraits; @@ -31,18 +32,24 @@ struct traits<TensorReductionOp<Op, Dims, XprType> > typedef typename XprType::Nested Nested; static const int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value; static const int Layout = XprTraits::Layout; + + template <class T> struct MakePointer { + // Intermediate typedef to workaround MSVC issue. + typedef MakePointer_<T> MakePointerT; + typedef typename MakePointerT::Type Type; + }; }; -template<typename Op, typename Dims, typename XprType> -struct eval<TensorReductionOp<Op, Dims, XprType>, Eigen::Dense> +template<typename Op, typename Dims, typename XprType, template <class> class MakePointer_> +struct eval<TensorReductionOp<Op, Dims, XprType, MakePointer_>, Eigen::Dense> { - typedef const TensorReductionOp<Op, Dims, XprType>& type; + typedef const TensorReductionOp<Op, Dims, XprType, MakePointer_>& type; }; -template<typename Op, typename Dims, typename XprType> -struct nested<TensorReductionOp<Op, Dims, XprType>, 1, typename eval<TensorReductionOp<Op, Dims, XprType> >::type> +template<typename Op, typename Dims, typename XprType, template <class> class MakePointer_> +struct nested<TensorReductionOp<Op, Dims, XprType, MakePointer_>, 1, typename eval<TensorReductionOp<Op, Dims, XprType, MakePointer_> >::type> { - typedef TensorReductionOp<Op, Dims, XprType> type; + typedef TensorReductionOp<Op, Dims, XprType, MakePointer_> type; }; @@ -339,8 +346,8 @@ __global__ void OuterReductionKernel(R, const S, I, I, typename S::CoeffReturnTy } // end namespace internal -template <typename Op, typename Dims, typename XprType> -class TensorReductionOp : public TensorBase<TensorReductionOp<Op, Dims, XprType>, ReadOnlyAccessors> { +template <typename Op, typename Dims, typename XprType, template <class> class MakePointer_> +class TensorReductionOp : public TensorBase<TensorReductionOp<Op, Dims, XprType, MakePointer_>, ReadOnlyAccessors> { public: typedef typename Eigen::internal::traits<TensorReductionOp>::Scalar Scalar; typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; @@ -371,18 +378,19 @@ class TensorReductionOp : public TensorBase<TensorReductionOp<Op, Dims, XprType> // Eval as rvalue -template<typename Op, typename Dims, typename ArgType, typename Device> -struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> +template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device> +struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> { - typedef TensorReductionOp<Op, Dims, ArgType> XprType; + typedef TensorReductionOp<Op, Dims, ArgType, MakePointer_> XprType; typedef typename XprType::Index Index; + typedef ArgType ChildType; typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions; static const int NumInputDims = internal::array_size<InputDimensions>::value; static const int NumReducedDims = internal::array_size<Dims>::value; static const int NumOutputDims = NumInputDims - NumReducedDims; typedef typename internal::conditional<NumOutputDims==0, Sizes<>, DSizes<Index, NumOutputDims> >::type Dimensions; typedef typename XprType::Scalar Scalar; - typedef TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> Self; + typedef TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> Self; static const bool InputPacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess; typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; @@ -401,7 +409,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> static const bool RunningFullReduction = (NumOutputDims==0); EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device) + : m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device), m_xpr_dims(op.dims()) { EIGEN_STATIC_ASSERT((NumInputDims >= NumReducedDims), YOU_MADE_A_PROGRAMMING_MISTAKE); EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)), @@ -471,25 +479,35 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool evalSubExprsIfNeeded(CoeffReturnType* data) { + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool evalSubExprsIfNeeded(typename MakePointer_<CoeffReturnType>::Type data) { m_impl.evalSubExprsIfNeeded(NULL); // Use the FullReducer if possible. - if (RunningFullReduction && + if ((RunningFullReduction && RunningOnSycl) ||(RunningFullReduction && internal::FullReducer<Self, Op, Device>::HasOptimizedImplementation && ((RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) || - !RunningOnGPU)) { + !RunningOnGPU))) { bool need_assign = false; if (!data) { m_result = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType))); data = m_result; need_assign = true; } - Op reducer(m_reducer); internal::FullReducer<Self, Op, Device>::run(*this, reducer, m_device, data); return need_assign; } + else if(RunningOnSycl){ + const Index num_values_to_reduce = internal::array_prod(m_reducedDims); + const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); + if (!data) { + data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); + m_result = data; + } + Op reducer(m_reducer); + internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve); + return (m_result != NULL); + } // Attempt to use an optimized reduction. else if (RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) { @@ -572,7 +590,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - if ((RunningFullReduction || RunningOnGPU) && m_result) { + if ((RunningOnSycl || RunningFullReduction || RunningOnGPU) && m_result) { return *(m_result + index); } Op reducer(m_reducer); @@ -644,7 +662,20 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> } } - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + /// required by sycl in order to extract the output accessor +#ifndef EIGEN_USE_SYCL + EIGEN_DEVICE_FUNC typename MakePointer_<Scalar>::Type data() const { return NULL; } +#else + EIGEN_DEVICE_FUNC typename MakePointer_<Scalar>::Type data() const { + return m_result; } +#endif + /// required by sycl in order to extract the accessor + const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } + /// added for sycl in order to construct the buffer from the sycl device + const Device& device() const{return m_device;} + /// added for sycl in order to re-construct the reduction eval on the device for the sub-kernel + const Dims& xprDims() const {return m_xpr_dims;} + private: template <int, typename, typename> friend struct internal::GenericDimReducer; @@ -737,12 +768,18 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> // For full reductions #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) static const bool RunningOnGPU = internal::is_same<Device, Eigen::GpuDevice>::value; + static const bool RunningOnSycl=false; +#elif defined(EIGEN_USE_SYCL) +static const bool RunningOnSycl = internal::is_same<typename internal::remove_all<Device>::type, Eigen::SyclDevice>::value; +static const bool RunningOnGPU = false; #else static const bool RunningOnGPU = false; + static const bool RunningOnSycl=false; #endif - CoeffReturnType* m_result; + typename MakePointer_<CoeffReturnType>::Type m_result; const Device& m_device; + const Dims& m_xpr_dims; }; } // end namespace Eigen |