From 7d08fa805a38f9ebb9e0e487c4e2d23d32a0fcde Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Fri, 28 Jun 2019 10:08:23 +0100 Subject: [SYCL] This PR adds the minimum modifications to the Eigen unsupported module required to run it on devices supporting SYCL. * Abstracting the pointer type so that both SYCL memory and pointer can be captured. * Converting SYCL virtual pointer to SYCL device memory in Eigen evaluator class. * Binding SYCL placeholder accessor to command group handler by using bind method in Eigen evaluator node. * Adding SYCL macro for controlling loop unrolling. * Modifying the TensorDeviceSycl.h and SYCL executor method to adopt the above changes. --- .../Eigen/CXX11/src/Tensor/TensorBroadcasting.h | 33 +++++++++++++++++----- 1 file changed, 26 insertions(+), 7 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index c102a43fb..10bdbc6a0 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -37,7 +37,7 @@ struct traits > : public traits struct eval, Eigen::Dense> { - typedef const TensorBroadcastingOp& type; + typedef const TensorBroadcastingOp EIGEN_DEVICE_REF type; }; template @@ -105,7 +105,11 @@ struct TensorEvaluator, Device> typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType::type PacketReturnType; static const int PacketSize = PacketType::size; + protected: // all the non-static fields must have the same access control, otherwise the TensorEvaluator wont be standard layout; bool isCopy, nByOne, oneByN; + public: + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; enum { IsAligned = true, @@ -205,7 +209,7 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { m_impl.evalSubExprsIfNeeded(NULL); return true; } @@ -238,6 +242,7 @@ struct TensorEvaluator, Device> // TODO: attempt to speed this up. The integer divisions and modulo are slow EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index indexColMajor(Index index) const { Index inputIndex = 0; + EIGEN_UNROLL_LOOP for (int i = NumDims - 1; i > 0; --i) { const Index idx = index / m_outputStrides[i]; if (internal::index_statically_eq(i, 1)) { @@ -272,6 +277,7 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index indexRowMajor(Index index) const { Index inputIndex = 0; + EIGEN_UNROLL_LOOP for (int i = 0; i < NumDims - 1; ++i) { const Index idx = index / m_outputStrides[i]; if (internal::index_statically_eq(i, 1)) { @@ -376,6 +382,7 @@ struct TensorEvaluator, Device> values[0] = m_impl.coeff(inputIndex); return internal::pload1(values); } else { + EIGEN_UNROLL_LOOP for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) { if (outputOffset + cur < m_outputStrides[endDim]) { values[i] = m_impl.coeff(inputIndex); @@ -410,6 +417,7 @@ struct TensorEvaluator, Device> return m_impl.template packet(inputIndex); } else { EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + EIGEN_UNROLL_LOOP for (int i = 0; i < PacketSize; ++i) { if (inputIndex > m_inputStrides[dim]-1) { inputIndex = 0; @@ -441,6 +449,7 @@ struct TensorEvaluator, Device> values[0] = m_impl.coeff(inputIndex); return internal::pload1(values); } else { + EIGEN_UNROLL_LOOP for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) { if (outputOffset + cur < m_outputStrides[dim]) { values[i] = m_impl.coeff(inputIndex); @@ -465,6 +474,7 @@ struct TensorEvaluator, Device> const Index originalIndex = index; Index inputIndex = 0; + EIGEN_UNROLL_LOOP for (int i = NumDims - 1; i > 0; --i) { const Index idx = index / m_outputStrides[i]; if (internal::index_statically_eq(i, 1)) { @@ -500,6 +510,7 @@ struct TensorEvaluator, Device> } else { EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; values[0] = m_impl.coeff(inputIndex); + EIGEN_UNROLL_LOOP for (int i = 1; i < PacketSize; ++i) { if (innermostLoc + i < m_impl.dimensions()[0]) { values[i] = m_impl.coeff(inputIndex+i); @@ -521,6 +532,7 @@ struct TensorEvaluator, Device> const Index originalIndex = index; Index inputIndex = 0; + EIGEN_UNROLL_LOOP for (int i = 0; i < NumDims - 1; ++i) { const Index idx = index / m_outputStrides[i]; if (internal::index_statically_eq(i, 1)) { @@ -556,6 +568,7 @@ struct TensorEvaluator, Device> } else { EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; values[0] = m_impl.coeff(inputIndex); + EIGEN_UNROLL_LOOP for (int i = 1; i < PacketSize; ++i) { if (innermostLoc + i < m_impl.dimensions()[NumDims-1]) { values[i] = m_impl.coeff(inputIndex+i); @@ -572,6 +585,7 @@ struct TensorEvaluator, Device> costPerCoeff(bool vectorized) const { double compute_cost = TensorOpCost::AddCost(); if (!isCopy && NumDims > 0) { + EIGEN_UNROLL_LOOP for (int i = NumDims - 1; i > 0; --i) { compute_cost += TensorOpCost::DivCost(); if (internal::index_statically_eq(i, 1)) { @@ -845,12 +859,17 @@ struct TensorEvaluator, Device> } } - EIGEN_DEVICE_FUNC typename Eigen::internal::traits::PointerType data() const { return NULL; } + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } const TensorEvaluator& impl() const { return m_impl; } Broadcast functor() const { return m_broadcast; } - + #ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_impl.bind(cgh); + } + #endif private: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void BroadcastBlock( const Dimensions& input_block_sizes, @@ -874,9 +893,9 @@ struct TensorEvaluator, Device> BroadcastTensorBlockReader::Run(&broadcast_block, input_block.data()); } - protected: - const Device& m_device; - const Broadcast m_broadcast; +protected: + const Device EIGEN_DEVICE_REF m_device; + const typename internal::remove_reference::type m_broadcast; Dimensions m_dimensions; array m_outputStrides; array m_inputStrides; -- cgit v1.2.3