aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
diff options
context:
space:
mode:
authorGravatar Mehdi Goli <mehdi.goli@codeplay.com>2019-06-28 10:08:23 +0100
committerGravatar Mehdi Goli <mehdi.goli@codeplay.com>2019-06-28 10:08:23 +0100
commit7d08fa805a38f9ebb9e0e487c4e2d23d32a0fcde (patch)
treefbff4d80b6b373dcd53632de4c1fab5c393bdd64 /unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
parent16a56b2dddbfaf2d4b81d62be5e3139f12783ac8 (diff)
[SYCL] This PR adds the minimum modifications to the Eigen unsupported module required to run it on devices supporting SYCL.
* Abstracting the pointer type so that both SYCL memory and pointer can be captured. * Converting SYCL virtual pointer to SYCL device memory in Eigen evaluator class. * Binding SYCL placeholder accessor to command group handler by using bind method in Eigen evaluator node. * Adding SYCL macro for controlling loop unrolling. * Modifying the TensorDeviceSycl.h and SYCL executor method to adopt the above changes.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorScan.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorScan.h31
1 files changed, 23 insertions, 8 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
index 64f10d0a4..44156126d 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
@@ -86,12 +86,15 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
typedef TensorScanOp<Op, ArgType> XprType;
typedef typename XprType::Index Index;
+ typedef const ArgType ChildType;
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
typedef DSizes<Index, NumDims> Dimensions;
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> Self;
+ typedef StorageMemory<Scalar, Device> Storage;
+ typedef typename Storage::Type EvaluatorPointerType;
enum {
IsAligned = false,
@@ -110,7 +113,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
m_exclusive(op.exclusive()),
m_accumulator(op.accumulator()),
m_size(m_impl.dimensions()[op.axis()]),
- m_stride(1),
+ m_stride(1), m_consume_dim(op.axis()),
m_output(NULL) {
// Accumulating a scalar isn't supported.
@@ -142,6 +145,10 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
return m_stride;
}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index& consume_dim() const {
+ return m_consume_dim;
+ }
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index& size() const {
return m_size;
}
@@ -162,7 +169,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
return m_device;
}
- EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
m_impl.evalSubExprsIfNeeded(NULL);
ScanLauncher<Self, Op, Device> launcher;
if (data) {
@@ -171,7 +178,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
}
const Index total_size = internal::array_prod(dimensions());
- m_output = static_cast<CoeffReturnType*>(m_device.allocate(total_size * sizeof(Scalar)));
+ m_output = static_cast<EvaluatorPointerType>(m_device.get((Scalar*) m_device.allocate_temp(total_size * sizeof(Scalar))));
launcher(*this, m_output);
return true;
}
@@ -181,7 +188,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
return internal::ploadt<PacketReturnType, LoadMode>(m_output + index);
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Eigen::internal::traits<XprType>::PointerType data() const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const
{
return m_output;
}
@@ -196,21 +203,29 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- if (m_output != NULL) {
- m_device.deallocate(m_output);
+ if (m_output) {
+ m_device.deallocate_temp(m_output);
m_output = NULL;
}
m_impl.cleanup();
}
+#ifdef EIGEN_USE_SYCL
+ // binding placeholder accessors to a command group handler for SYCL
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
+ m_impl.bind(cgh);
+ m_output.bind(cgh);
+ }
+#endif
protected:
TensorEvaluator<ArgType, Device> m_impl;
- const Device& m_device;
+ const Device EIGEN_DEVICE_REF m_device;
const bool m_exclusive;
Op m_accumulator;
const Index m_size;
Index m_stride;
- CoeffReturnType* m_output;
+ Index m_consume_dim;
+ EvaluatorPointerType m_output;
};
// CPU implementation of scan