aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBase.h24
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h30
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h6
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h38
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h4
10 files changed, 57 insertions, 61 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
index f1f877c16..bcaf5c97f 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
@@ -187,7 +187,7 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlock(TensorBlock* block) {
if (TensorEvaluator<LeftArgType, Device>::RawAccess &&
- m_leftImpl.data() != nullptr) {
+ m_leftImpl.data() != NULL) {
TensorBlock left_block(block->first_coeff_index(), block->block_sizes(),
block->tensor_strides(), block->tensor_strides(),
m_leftImpl.data() + block->first_coeff_index());
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
index 7cc71d99e..9b9d330c1 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@@ -538,8 +538,8 @@ class TensorBase<Derived, ReadOnlyAccessors>
// Fourier transforms
template <int FFTDataType, int FFTDirection, typename FFT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorFFTOp<const FFT, const Derived, FFTDataType, FFTDirection>
- fft(const FFT& fft) const {
- return TensorFFTOp<const FFT, const Derived, FFTDataType, FFTDirection>(derived(), fft);
+ fft(const FFT& dims) const {
+ return TensorFFTOp<const FFT, const Derived, FFTDataType, FFTDirection>(derived(), dims);
}
// Scan.
@@ -723,8 +723,8 @@ class TensorBase<Derived, ReadOnlyAccessors>
template <typename Broadcast> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorBroadcastingOp<const Broadcast, const Derived>
- broadcast(const Broadcast& broadcast) const {
- return TensorBroadcastingOp<const Broadcast, const Derived>(derived(), broadcast);
+ broadcast(const Broadcast& bcast) const {
+ return TensorBroadcastingOp<const Broadcast, const Derived>(derived(), bcast);
}
template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -832,8 +832,8 @@ class TensorBase<Derived, ReadOnlyAccessors>
}
template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorShufflingOp<const Shuffle, const Derived>
- shuffle(const Shuffle& shuffle) const {
- return TensorShufflingOp<const Shuffle, const Derived>(derived(), shuffle);
+ shuffle(const Shuffle& shfl) const {
+ return TensorShufflingOp<const Shuffle, const Derived>(derived(), shfl);
}
template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorStridingOp<const Strides, const Derived>
@@ -1030,13 +1030,13 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> {
template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorShufflingOp<const Shuffle, const Derived>
- shuffle(const Shuffle& shuffle) const {
- return TensorShufflingOp<const Shuffle, const Derived>(derived(), shuffle);
+ shuffle(const Shuffle& shfl) const {
+ return TensorShufflingOp<const Shuffle, const Derived>(derived(), shfl);
}
template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorShufflingOp<const Shuffle, Derived>
- shuffle(const Shuffle& shuffle) {
- return TensorShufflingOp<const Shuffle, Derived>(derived(), shuffle);
+ shuffle(const Shuffle& shfl) {
+ return TensorShufflingOp<const Shuffle, Derived>(derived(), shfl);
}
template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -1052,8 +1052,8 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> {
// Select the device on which to evaluate the expression.
template <typename DeviceType>
- TensorDevice<Derived, DeviceType> device(const DeviceType& device) {
- return TensorDevice<Derived, DeviceType>(device, derived());
+ TensorDevice<Derived, DeviceType> device(const DeviceType& dev) {
+ return TensorDevice<Derived, DeviceType>(dev, derived());
}
protected:
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
index 84cf6d216..21a6b66e8 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
@@ -60,7 +60,7 @@ struct cond<RowMajor> {
* - kSkewedInnerDims: 100 blocks of size 100x1 (or 1x100 depending on a column
* or row major layout)
*/
-enum class TensorBlockShapeType {
+enum TensorBlockShapeType {
kUniformAllDims,
kSkewedInnerDims,
};
@@ -89,7 +89,7 @@ EIGEN_STRONG_INLINE void MergeResourceRequirements(
// policy if block shapes/sizes conflict).
*block_shape = resources[0].block_shape;
*block_total_size = resources[0].block_total_size;
- for (int i = 1; i < resources.size(); ++i) {
+ for (std::vector<TensorOpResourceRequirements>::size_type i = 1; i < resources.size(); ++i) {
if (resources[i].block_shape == TensorBlockShapeType::kSkewedInnerDims &&
*block_shape != TensorBlockShapeType::kSkewedInnerDims) {
*block_shape = TensorBlockShapeType::kSkewedInnerDims;
@@ -152,11 +152,11 @@ struct TensorBlockCopyOp {
const Scalar* src_base = &src_data[src_index];
Scalar* dst_base = &dst_data[dst_index];
- using Src = const Eigen::Array<Scalar, Dynamic, 1>;
- using Dst = Eigen::Array<Scalar, Dynamic, 1>;
+ typedef const Eigen::Array<Scalar, Dynamic, 1> Src;
+ typedef Eigen::Array<Scalar, Dynamic, 1> Dst;
- using SrcMap = Eigen::Map<Src, 0, InnerStride<>>;
- using DstMap = Eigen::Map<Dst, 0, InnerStride<>>;
+ typedef Eigen::Map<Src, 0, InnerStride<> > SrcMap;
+ typedef Eigen::Map<Dst, 0, InnerStride<> > DstMap;
const SrcMap src(src_base, num_coeff_to_copy, InnerStride<>(src_stride));
DstMap dst(dst_base, num_coeff_to_copy, InnerStride<>(dst_stride));
@@ -401,13 +401,13 @@ struct TensorBlockCwiseBinaryOp {
const StorageIndex left_stride, const LeftScalar* left_data,
const StorageIndex right_index, const StorageIndex right_stride,
const RightScalar* right_data) {
- using Lhs = const Eigen::Array<LeftScalar, Dynamic, 1>;
- using Rhs = const Eigen::Array<RightScalar, Dynamic, 1>;
- using Out = Eigen::Array<OutputScalar, Dynamic, 1>;
+ typedef const Eigen::Array<LeftScalar, Dynamic, 1> Lhs;
+ typedef const Eigen::Array<RightScalar, Dynamic, 1> Rhs;
+ typedef Eigen::Array<OutputScalar, Dynamic, 1> Out;
- using LhsMap = Eigen::Map<Lhs, 0, InnerStride<>>;
- using RhsMap = Eigen::Map<Rhs, 0, InnerStride<>>;
- using OutMap = Eigen::Map<Out, 0, InnerStride<>>;
+ typedef Eigen::Map<Lhs, 0, InnerStride<> > LhsMap;
+ typedef Eigen::Map<Rhs, 0, InnerStride<> > RhsMap;
+ typedef Eigen::Map<Out, 0, InnerStride<> > OutMap;
const LeftScalar* lhs_base = &left_data[left_index];
const RightScalar* rhs_base = &right_data[right_index];
@@ -501,7 +501,7 @@ struct TensorBlockCwiseBinaryIO {
if (size == 1) {
continue;
}
- auto& state = block_iter_state[num_squeezed_dims];
+ BlockIteratorState& state = block_iter_state[num_squeezed_dims];
state.output_stride = block_strides[dim];
state.left_stride = left_strides[dim];
state.right_stride = right_strides[dim];
@@ -523,7 +523,7 @@ struct TensorBlockCwiseBinaryIO {
right_stride, right_data);
// Update index.
for (int j = 0; j < num_squeezed_dims; ++j) {
- auto& state = block_iter_state[j];
+ BlockIteratorState& state = block_iter_state[j];
if (++state.count < state.size) {
output_index += state.output_stride;
left_index += state.left_stride;
@@ -747,7 +747,7 @@ class TensorBlockMapper {
// block dimension sizes based on "square" dimension size target.
const size_t dim_size_target = static_cast<const size_t>(
std::pow(static_cast<float>(min_target_size),
- 1.0 / static_cast<float>(block_dim_sizes.rank())));
+ 1.0f / static_cast<float>(block_dim_sizes.rank())));
for (size_t i = 0; i < block_dim_sizes.rank(); ++i) {
// TODO(andydavis) Adjust the inner most 'block_dim_size' to make it
// a multiple of the packet size. Note that reducing
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
index 0d3ca966c..a07e32db0 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
@@ -527,8 +527,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
typedef TensorEvalToOp<const KernelArgType> EvalTo;
EvalTo evalToTmp(local, m_kernelArg);
- const bool PacketAccess = internal::IsVectorizable<Device, KernelArgType>::value;
- internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device);
+ const bool Vectorize = internal::IsVectorizable<Device, KernelArgType>::value;
+ internal::TensorExecutor<const EvalTo, Device, Vectorize>::run(evalToTmp, m_device);
m_kernel = local;
m_local_kernel = true;
@@ -786,7 +786,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
};
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const GpuDevice& device)
- : m_inputImpl(op.inputExpression(), device), m_kernelArg(op.kernelExpression()), m_kernelImpl(op.kernelExpression(), device), m_indices(op.indices()), m_buf(NULL), m_kernel(NULL), m_local_kernel(false), m_device(device)
+ : m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_indices(op.indices()), m_buf(NULL), m_kernel(NULL), m_local_kernel(false), m_device(device)
{
EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, GpuDevice>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, GpuDevice>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
index cc134228a..3e3665efb 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@@ -102,7 +102,7 @@ class Allocator {
// Build a thread pool device on top the an existing pool of threads.
struct ThreadPoolDevice {
// The ownership of the thread pool remains with the caller.
- ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores, Allocator* allocator = nullptr)
+ ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores, Allocator* allocator = NULL)
: pool_(pool), num_threads_(num_cores), allocator_(allocator) { }
EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
@@ -282,7 +282,7 @@ struct ThreadPoolDevice {
// Convenience wrapper for parallelFor that does not align blocks.
void parallelFor(Index n, const TensorOpCost& cost,
std::function<void(Index, Index)> f) const {
- parallelFor(n, cost, nullptr, std::move(f));
+ parallelFor(n, cost, NULL, std::move(f));
}
// Thread pool accessor.
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index 8f7a81575..028902fea 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -126,7 +126,7 @@ struct TensorEvaluator
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
- std::vector<internal::TensorOpResourceRequirements>* resources) const {}
+ std::vector<internal::TensorOpResourceRequirements>*) const {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(TensorBlock* block) const {
assert(m_data != NULL);
@@ -255,7 +255,7 @@ struct TensorEvaluator<const Derived, Device>
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
- std::vector<internal::TensorOpResourceRequirements>* resources) const {}
+ std::vector<internal::TensorOpResourceRequirements>*) const {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(TensorBlock* block) const {
assert(m_data != NULL);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index 17008917a..0294aa62e 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -36,7 +36,7 @@ template <typename Expression, typename Device, bool Vectorizable,
bool Tileable>
class TensorExecutor {
public:
- using StorageIndex = typename Expression::Index;
+ typedef typename Expression::Index StorageIndex;
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(const Expression& expr,
@@ -60,7 +60,7 @@ template <typename Expression>
class TensorExecutor<Expression, DefaultDevice, /*Vectorizable*/ true,
/*Tileable*/ false> {
public:
- using StorageIndex = typename Expression::Index;
+ typedef typename Expression::Index StorageIndex;
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(const Expression& expr,
@@ -102,21 +102,19 @@ template <typename Expression, bool Vectorizable>
class TensorExecutor<Expression, DefaultDevice, Vectorizable,
/*Tileable*/ true> {
public:
- using Scalar = typename traits<Expression>::Scalar;
- using ScalarNoConst = typename remove_const<Scalar>::type;
+ typedef typename traits<Expression>::Scalar Scalar;
+ typedef typename remove_const<Scalar>::type ScalarNoConst;
- using Evaluator = TensorEvaluator<Expression, DefaultDevice>;
- using StorageIndex = typename traits<Expression>::Index;
+ typedef TensorEvaluator<Expression, DefaultDevice> Evaluator;
+ typedef typename traits<Expression>::Index StorageIndex;
static const int NumDims = traits<Expression>::NumDimensions;
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(const Expression& expr,
const DefaultDevice& device = DefaultDevice()) {
- using TensorBlock =
- TensorBlock<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout>;
- using TensorBlockMapper = TensorBlockMapper<ScalarNoConst, StorageIndex,
- NumDims, Evaluator::Layout>;
+ typedef TensorBlock<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlock;
+ typedef TensorBlockMapper<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlockMapper;
Evaluator evaluator(expr, device);
Index total_size = array_prod(evaluator.dimensions());
@@ -221,7 +219,7 @@ struct EvalRange<Evaluator, StorageIndex, /*Vectorizable*/ true> {
template <typename Expression, bool Vectorizable, bool Tileable>
class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, Tileable> {
public:
- using StorageIndex = typename Expression::Index;
+ typedef typename Expression::Index StorageIndex;
static EIGEN_STRONG_INLINE void run(const Expression& expr,
const ThreadPoolDevice& device) {
@@ -229,7 +227,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, Tileable> {
typedef EvalRange<Evaluator, StorageIndex, Vectorizable> EvalRange;
Evaluator evaluator(expr, device);
- const bool needs_assign = evaluator.evalSubExprsIfNeeded(nullptr);
+ const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
if (needs_assign) {
const StorageIndex PacketSize =
Vectorizable
@@ -249,20 +247,18 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, Tileable> {
template <typename Expression, bool Vectorizable>
class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ true> {
public:
- using Scalar = typename traits<Expression>::Scalar;
- using ScalarNoConst = typename remove_const<Scalar>::type;
+ typedef typename traits<Expression>::Scalar Scalar;
+ typedef typename remove_const<Scalar>::type ScalarNoConst;
- using Evaluator = TensorEvaluator<Expression, ThreadPoolDevice>;
- using StorageIndex = typename traits<Expression>::Index;
+ typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
+ typedef typename traits<Expression>::Index StorageIndex;
static const int NumDims = traits<Expression>::NumDimensions;
static EIGEN_STRONG_INLINE void run(const Expression& expr,
const ThreadPoolDevice& device) {
- using TensorBlock =
- TensorBlock<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout>;
- using TensorBlockMapper =
- TensorBlockMapper<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout>;
+ typedef TensorBlock<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlock;
+ typedef TensorBlockMapper<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlockMapper;
Evaluator evaluator(expr, device);
StorageIndex total_size = array_prod(evaluator.dimensions());
@@ -275,7 +271,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr
return;
}
- const bool needs_assign = evaluator.evalSubExprsIfNeeded(nullptr);
+ const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
if (needs_assign) {
TensorBlockShapeType block_shape = TensorBlockShapeType::kSkewedInnerDims;
Index block_total_size = 0;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
index a456f308b..2778bf5ec 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
@@ -124,8 +124,8 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
}
typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo;
EvalTo evalToTmp(m_buffer, m_op);
- const bool PacketAccess = internal::IsVectorizable<Device, const ArgType>::value;
- internal::TensorExecutor<const EvalTo, typename internal::remove_const<Device>::type, PacketAccess>::run(evalToTmp, m_device);
+ const bool Vectorize = internal::IsVectorizable<Device, const ArgType>::value;
+ internal::TensorExecutor<const EvalTo, typename internal::remove_const<Device>::type, Vectorize>::run(evalToTmp, m_device);
return true;
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
index ec1dc0fab..0dd524a30 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
@@ -98,7 +98,7 @@ template<typename XprType> class TensorForcedEvalOp;
template<typename ExpressionType, typename DeviceType> class TensorDevice;
template<typename Derived, typename Device> struct TensorEvaluator;
-class NoOpOutputKernel;
+struct NoOpOutputKernel;
struct DefaultDevice;
struct ThreadPoolDevice;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
index 0fc49255d..e25dd9cf8 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
@@ -61,8 +61,8 @@ class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType>
typedef typename Eigen::internal::traits<TensorShufflingOp>::StorageKind StorageKind;
typedef typename Eigen::internal::traits<TensorShufflingOp>::Index Index;
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(const XprType& expr, const Shuffle& shuffle)
- : m_xpr(expr), m_shuffle(shuffle) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(const XprType& expr, const Shuffle& shfl)
+ : m_xpr(expr), m_shuffle(shfl) {}
EIGEN_DEVICE_FUNC
const Shuffle& shufflePermutation() const { return m_shuffle; }