diff options
author | Mehdi Goli <mehdi.goli@codeplay.com> | 2019-06-28 10:08:23 +0100 |
---|---|---|
committer | Mehdi Goli <mehdi.goli@codeplay.com> | 2019-06-28 10:08:23 +0100 |
commit | 7d08fa805a38f9ebb9e0e487c4e2d23d32a0fcde (patch) | |
tree | fbff4d80b6b373dcd53632de4c1fab5c393bdd64 /unsupported/Eigen/CXX11/src/Tensor/TensorScan.h | |
parent | 16a56b2dddbfaf2d4b81d62be5e3139f12783ac8 (diff) |
[SYCL] This PR adds the minimum modifications to the Eigen unsupported module required to run it on devices supporting SYCL.
* Abstracting the pointer type so that both SYCL memory and pointer can be captured.
* Converting SYCL virtual pointer to SYCL device memory in Eigen evaluator class.
* Binding SYCL placeholder accessor to command group handler by using bind method in Eigen evaluator node.
* Adding SYCL macro for controlling loop unrolling.
* Modifying the TensorDeviceSycl.h and SYCL executor method to adopt the above changes.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorScan.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorScan.h | 31 |
1 files changed, 23 insertions, 8 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h index 64f10d0a4..44156126d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h @@ -86,12 +86,15 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> { typedef TensorScanOp<Op, ArgType> XprType; typedef typename XprType::Index Index; + typedef const ArgType ChildType; static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; typedef DSizes<Index, NumDims> Dimensions; typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> Self; + typedef StorageMemory<Scalar, Device> Storage; + typedef typename Storage::Type EvaluatorPointerType; enum { IsAligned = false, @@ -110,7 +113,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> { m_exclusive(op.exclusive()), m_accumulator(op.accumulator()), m_size(m_impl.dimensions()[op.axis()]), - m_stride(1), + m_stride(1), m_consume_dim(op.axis()), m_output(NULL) { // Accumulating a scalar isn't supported. @@ -142,6 +145,10 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> { return m_stride; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index& consume_dim() const { + return m_consume_dim; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index& size() const { return m_size; } @@ -162,7 +169,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> { return m_device; } - EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) { m_impl.evalSubExprsIfNeeded(NULL); ScanLauncher<Self, Op, Device> launcher; if (data) { @@ -171,7 +178,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> { } const Index total_size = internal::array_prod(dimensions()); - m_output = static_cast<CoeffReturnType*>(m_device.allocate(total_size * sizeof(Scalar))); + m_output = static_cast<EvaluatorPointerType>(m_device.get((Scalar*) m_device.allocate_temp(total_size * sizeof(Scalar)))); launcher(*this, m_output); return true; } @@ -181,7 +188,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> { return internal::ploadt<PacketReturnType, LoadMode>(m_output + index); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Eigen::internal::traits<XprType>::PointerType data() const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return m_output; } @@ -196,21 +203,29 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - if (m_output != NULL) { - m_device.deallocate(m_output); + if (m_output) { + m_device.deallocate_temp(m_output); m_output = NULL; } m_impl.cleanup(); } +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_impl.bind(cgh); + m_output.bind(cgh); + } +#endif protected: TensorEvaluator<ArgType, Device> m_impl; - const Device& m_device; + const Device EIGEN_DEVICE_REF m_device; const bool m_exclusive; Op m_accumulator; const Index m_size; Index m_stride; - CoeffReturnType* m_output; + Index m_consume_dim; + EvaluatorPointerType m_output; }; // CPU implementation of scan |