aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
diff options
context:
space:
mode:
authorGravatar Mehdi Goli <mehdi.goli@codeplay.com>2019-06-28 10:08:23 +0100
committerGravatar Mehdi Goli <mehdi.goli@codeplay.com>2019-06-28 10:08:23 +0100
commit7d08fa805a38f9ebb9e0e487c4e2d23d32a0fcde (patch)
treefbff4d80b6b373dcd53632de4c1fab5c393bdd64 /unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
parent16a56b2dddbfaf2d4b81d62be5e3139f12783ac8 (diff)
[SYCL] This PR adds the minimum modifications to the Eigen unsupported module required to run it on devices supporting SYCL.
* Abstracting the pointer type so that both SYCL memory and pointer can be captured. * Converting SYCL virtual pointer to SYCL device memory in Eigen evaluator class. * Binding SYCL placeholder accessor to command group handler by using bind method in Eigen evaluator node. * Adding SYCL macro for controlling loop unrolling. * Modifying the TensorDeviceSycl.h and SYCL executor method to adopt the above changes.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h94
1 files changed, 55 insertions, 39 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
index 5352c8f7b..8f6e987b3 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
@@ -37,7 +37,7 @@ struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprTyp
template<typename NewDimensions, typename XprType>
struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense>
{
- typedef const TensorReshapingOp<NewDimensions, XprType>& type;
+ typedef const TensorReshapingOp<NewDimensions, XprType>EIGEN_DEVICE_REF type;
};
template<typename NewDimensions, typename XprType>
@@ -106,6 +106,9 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
+ typedef StorageMemory<CoeffReturnType, Device> Storage;
+ typedef typename Storage::Type EvaluatorPointerType;
+ typedef StorageMemory<typename internal::remove_const<CoeffReturnType>::type, Device> ConstCastStorage;
static const int NumOutputDims = internal::array_size<Dimensions>::value;
static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
@@ -168,7 +171,7 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
return m_impl.evalSubExprsIfNeeded(data);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
@@ -326,10 +329,18 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
}
}
- EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return const_cast<Scalar*>(m_impl.data()); }
+ EIGEN_DEVICE_FUNC typename Storage::Type data() const {
+ return constCast(m_impl.data());
+ }
EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
+ #ifdef EIGEN_USE_SYCL
+ // binding placeholder accessors to a command group handler for SYCL
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
+ m_impl.bind(cgh);
+ }
+ #endif
protected:
TensorEvaluator<ArgType, Device> m_impl;
NewDimensions m_dimensions;
@@ -404,7 +415,7 @@ struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<Xp
template<typename StartIndices, typename Sizes, typename XprType>
struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense>
{
- typedef const TensorSlicingOp<StartIndices, Sizes, XprType>& type;
+ typedef const TensorSlicingOp<StartIndices, Sizes, XprType>EIGEN_DEVICE_REF type;
};
template<typename StartIndices, typename Sizes, typename XprType>
@@ -488,7 +499,7 @@ template <typename Index> struct MemcpyTriggerForSlicing<Index, GpuDevice> {
// It is very expensive to start the memcpy kernel on GPU: we therefore only
// use it for large copies.
#ifdef EIGEN_USE_SYCL
-template <typename Index> struct MemcpyTriggerForSlicing<Index, const Eigen::SyclDevice> {
+template <typename Index> struct MemcpyTriggerForSlicing<Index, Eigen::SyclDevice> {
EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const SyclDevice&) { }
EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > 4*1024*1024; }
};
@@ -508,6 +519,9 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef Sizes Dimensions;
+ typedef StorageMemory<CoeffReturnType, Device> Storage;
+ typedef StorageMemory<typename internal::remove_const<CoeffReturnType>::type, Device> ConstCastStorage;
+ typedef typename Storage::Type EvaluatorPointerType;
enum {
// Alignment can't be guaranteed at compile time since it depends on the
@@ -575,7 +589,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
m_impl.evalSubExprsIfNeeded(NULL);
if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization
&& data && m_impl.data()
@@ -599,10 +613,10 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
// Use memcpy if it's going to be faster than using the regular evaluation.
const MemcpyTriggerForSlicing<Index, Device> trigger(m_device);
if (trigger(contiguous_values)) {
- Scalar* src = (Scalar*)m_impl.data();
+ EvaluatorPointerType src = (EvaluatorPointerType)m_impl.data();
for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
Index offset = srcCoeff(i);
- m_device.memcpy((void*)(data+i), src+offset, contiguous_values * sizeof(Scalar));
+ m_device.memcpy((void*)(m_device.get(data + i)), m_device.get(src+offset), contiguous_values * sizeof(Scalar));
}
return false;
}
@@ -637,6 +651,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
Index inputIndices[] = {0, 0};
Index indices[] = {index, index + packetSize - 1};
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
+ EIGEN_UNROLL_LOOP
for (int i = NumDims - 1; i > 0; --i) {
const Index idx0 = indices[0] / m_fastOutputStrides[i];
const Index idx1 = indices[1] / m_fastOutputStrides[i];
@@ -648,6 +663,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
inputIndices[0] += (indices[0] + m_offsets[0]);
inputIndices[1] += (indices[1] + m_offsets[0]);
} else {
+ EIGEN_UNROLL_LOOP
for (int i = 0; i < NumDims - 1; ++i) {
const Index idx0 = indices[0] / m_fastOutputStrides[i];
const Index idx1 = indices[1] / m_fastOutputStrides[i];
@@ -667,6 +683,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize];
values[0] = m_impl.coeff(inputIndices[0]);
values[packetSize-1] = m_impl.coeff(inputIndices[1]);
+ EIGEN_UNROLL_LOOP
for (int i = 1; i < packetSize-1; ++i) {
values[i] = coeff(index+i);
}
@@ -698,8 +715,8 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
m_impl.block(&input_block);
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Eigen::internal::traits<XprType>::PointerType data() const {
- Scalar* result = const_cast<Scalar*>(m_impl.data());
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const {
+ typename Storage::Type result = constCast(m_impl.data());
if (result) {
Index offset = 0;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
@@ -733,19 +750,19 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
}
return NULL;
}
- /// used by sycl
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const{
- return m_impl;
- }
- /// used by sycl
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StartIndices& startIndices() const{
- return m_offsets;
+#ifdef EIGEN_USE_SYCL
+ // binding placeholder accessors to a command group handler for SYCL
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
+ m_impl.bind(cgh);
}
+#endif
+
protected:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
{
Index inputIndex = 0;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
+ EIGEN_UNROLL_LOOP
for (int i = NumDims - 1; i > 0; --i) {
const Index idx = index / m_fastOutputStrides[i];
inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
@@ -753,6 +770,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
}
inputIndex += (index + m_offsets[0]);
} else {
+ EIGEN_UNROLL_LOOP
for (int i = 0; i < NumDims - 1; ++i) {
const Index idx = index / m_fastOutputStrides[i];
inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
@@ -767,7 +785,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
array<Index, NumDims> m_inputStrides;
TensorEvaluator<ArgType, Device> m_impl;
- const Device& m_device;
+ const Device EIGEN_DEVICE_REF m_device;
Dimensions m_dimensions;
bool m_is_identity;
const StartIndices m_offsets;
@@ -829,6 +847,7 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
Index inputIndices[] = {0, 0};
Index indices[] = {index, index + packetSize - 1};
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
+ EIGEN_UNROLL_LOOP
for (int i = NumDims - 1; i > 0; --i) {
const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
@@ -840,6 +859,7 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
inputIndices[0] += (indices[0] + this->m_offsets[0]);
inputIndices[1] += (indices[1] + this->m_offsets[0]);
} else {
+ EIGEN_UNROLL_LOOP
for (int i = 0; i < NumDims - 1; ++i) {
const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
@@ -859,6 +879,7 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
this->m_impl.coeffRef(inputIndices[0]) = values[0];
this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1];
+ EIGEN_UNROLL_LOOP
for (int i = 1; i < packetSize-1; ++i) {
this->coeffRef(index+i) = values[i];
}
@@ -892,7 +913,7 @@ struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprTyp
template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Eigen::Dense>
{
- typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>& type;
+ typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>EIGEN_DEVICE_REF type;
};
template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
@@ -969,6 +990,8 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
+ typedef StorageMemory<CoeffReturnType, Device> Storage;
+ typedef typename Storage::Type EvaluatorPointerType;
typedef Strides Dimensions;
enum {
@@ -985,8 +1008,7 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device),
m_device(device),
- m_strides(op.strides()), m_exprStartIndices(op.startIndices()),
- m_exprStopIndices(op.stopIndices())
+ m_strides(op.strides())
{
// Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero
DSizes<Index, NumDims> startIndicesClamped, stopIndicesClamped;
@@ -1069,7 +1091,7 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_impl.evalSubExprsIfNeeded(NULL);
return true;
}
@@ -1091,30 +1113,28 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims);
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Eigen::internal::traits<XprType>::PointerType data() const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const {
return NULL;
}
-
- //use by sycl
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StartIndices& exprStartIndices() const { return m_exprStartIndices; }
- //use by sycl
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StartIndices& exprStopIndices() const { return m_exprStopIndices; }
- //use by sycl
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StartIndices& strides() const { return m_strides; }
- /// used by sycl
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const{return m_impl;}
-
+#ifdef EIGEN_USE_SYCL
+ // binding placeholder accessors to a command group handler for SYCL
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
+ m_impl.bind(cgh);
+ }
+#endif
protected:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
{
Index inputIndex = 0;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
+ EIGEN_UNROLL_LOOP
for (int i = NumDims - 1; i >= 0; --i) {
const Index idx = index / m_fastOutputStrides[i];
inputIndex += idx * m_inputStrides[i] + m_offsets[i];
index -= idx * m_outputStrides[i];
}
} else {
+ EIGEN_UNROLL_LOOP
for (int i = 0; i < NumDims; ++i) {
const Index idx = index / m_fastOutputStrides[i];
inputIndex += idx * m_inputStrides[i] + m_offsets[i];
@@ -1125,7 +1145,7 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
}
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) {
-#ifndef __SYCL_DEVICE_ONLY__
+#ifndef SYCL_DEVICE_ONLY
return numext::maxi(min, numext::mini(max,value));
#else
return cl::sycl::clamp(value, min, max);
@@ -1137,15 +1157,11 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
array<Index, NumDims> m_inputStrides;
bool m_is_identity;
TensorEvaluator<ArgType, Device> m_impl;
- const Device& m_device;
+ const Device EIGEN_DEVICE_REF m_device;
DSizes<Index, NumDims> m_startIndices; // clamped startIndices
DSizes<Index, NumDims> m_dimensions;
DSizes<Index, NumDims> m_offsets; // offset in a flattened shape
const Strides m_strides;
- //use by sycl
- const StartIndices m_exprStartIndices;
- //use by sycl
- const StopIndices m_exprStopIndices;
};
// Eval as lvalue