aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
blob: 246ebe44eeefcd6093006c5fb1b7f375d45679b1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

#ifndef EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H
#define EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H

namespace Eigen {

// MakePointer class is used as a container of the address space of the pointer
// on the host and on the device. From the host side it generates the T* pointer
// and when EIGEN_USE_SYCL is used it construct a buffer with a map_allocator to
// T* m_data on the host. It is always called on the device.
// Specialisation of MakePointer class for creating the sycl buffer with
// map_allocator.
template<typename T> struct MakePointer {
  typedef T* Type;
  typedef const T* ConstType;
};

template <typename T>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T* constCast(const T* data) {
  return const_cast<T*>(data);
}

// The StorageMemory class is a container of the device specific pointer
// used for refering to a Pointer on TensorEvaluator class. While the TensorExpression
// is a device-agnostic type and need MakePointer class for type conversion,
// the TensorEvaluator class can be specialized for a device, hence it is possible
// to construct different types of temproray storage memory in TensorEvaluator
// for different devices by specializing the following StorageMemory class.
template<typename T, typename device> struct StorageMemory: MakePointer <T> {};

namespace internal{
template<typename A, typename B> struct Pointer_type_promotion {
  static const bool val=false;
};
template<typename A> struct Pointer_type_promotion<A, A> {
  static const bool val = true;
};
template<typename A, typename B> struct TypeConversion {
  typedef A* type;
};
}


template<typename PlainObjectType, int Options_ = Unaligned, template <class> class MakePointer_ = MakePointer> class TensorMap;
template<typename Scalar_, int NumIndices_, int Options_ = 0, typename IndexType = DenseIndex> class Tensor;
template<typename Scalar_, typename Dimensions, int Options_ = 0, typename IndexType = DenseIndex> class TensorFixedSize;
template<typename PlainObjectType> class TensorRef;
template<typename Derived, int AccessLevel> class TensorBase;

template<typename NullaryOp, typename PlainObjectType> class TensorCwiseNullaryOp;
template<typename UnaryOp, typename XprType> class TensorCwiseUnaryOp;
template<typename BinaryOp, typename LeftXprType, typename RightXprType> class TensorCwiseBinaryOp;
template<typename TernaryOp, typename Arg1XprType, typename Arg2XprType, typename Arg3XprType> class TensorCwiseTernaryOp;
template<typename IfXprType, typename ThenXprType, typename ElseXprType> class TensorSelectOp;
template<typename Op, typename Dims, typename XprType, template <class> class MakePointer_ = MakePointer > class TensorReductionOp;
template<typename XprType> class TensorIndexTupleOp;
template<typename ReduceOp, typename Dims, typename XprType> class TensorTupleReducerOp;
template<typename Axis, typename LeftXprType, typename RightXprType> class TensorConcatenationOp;
template<typename Dimensions, typename LeftXprType, typename RightXprType, typename OutputKernelType> class TensorContractionOp;
template<typename TargetType, typename XprType> class TensorConversionOp;
template<typename Dimensions, typename InputXprType, typename KernelXprType> class TensorConvolutionOp;
template<typename FFT, typename XprType, int FFTDataType, int FFTDirection> class TensorFFTOp;
template<typename PatchDim, typename XprType> class TensorPatchOp;
template<DenseIndex Rows, DenseIndex Cols, typename XprType> class TensorImagePatchOp;
template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType> class TensorVolumePatchOp;
template<typename Broadcast, typename XprType> class TensorBroadcastingOp;
template<DenseIndex DimId, typename XprType> class TensorChippingOp;
template<typename NewDimensions, typename XprType> class TensorReshapingOp;
template<typename XprType> class TensorLayoutSwapOp;
template<typename StartIndices, typename Sizes, typename XprType> class TensorSlicingOp;
template<typename ReverseDimensions, typename XprType> class TensorReverseOp;
template<typename PaddingDimensions, typename XprType> class TensorPaddingOp;
template<typename Shuffle, typename XprType> class TensorShufflingOp;
template<typename Strides, typename XprType> class TensorStridingOp;
template<typename StartIndices, typename StopIndices, typename Strides, typename XprType> class TensorStridingSlicingOp;
template<typename Strides, typename XprType> class TensorInflationOp;
template<typename Generator, typename XprType> class TensorGeneratorOp;
template<typename LeftXprType, typename RightXprType> class TensorAssignOp;
template<typename Op, typename XprType> class TensorScanOp;
template<typename Dims, typename XprType> class TensorTraceOp;

template<typename CustomUnaryFunc, typename XprType> class TensorCustomUnaryOp;
template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> class TensorCustomBinaryOp;

template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorEvalToOp;
template<typename XprType> class TensorForcedEvalOp;

template<typename ExpressionType, typename DeviceType> class TensorDevice;
template<typename ExpressionType, typename DeviceType, typename DoneCallback> class TensorAsyncDevice;
template<typename Derived, typename Device> struct TensorEvaluator;

struct NoOpOutputKernel;

struct DefaultDevice;
struct ThreadPoolDevice;
struct GpuDevice;
struct SyclDevice;

#ifdef EIGEN_USE_SYCL

template <typename T> struct MakeSYCLPointer {
  typedef Eigen::TensorSycl::internal::RangeAccess<cl::sycl::access::mode::read_write, T> Type;
};

template <typename T>
EIGEN_STRONG_INLINE const Eigen::TensorSycl::internal::RangeAccess<cl::sycl::access::mode::read_write, T>&
constCast(const Eigen::TensorSycl::internal::RangeAccess<cl::sycl::access::mode::read_write, T>& data) {
  return data;
}

template <typename T>
struct StorageMemory<T, SyclDevice> : MakeSYCLPointer<T> {};
template <typename T>
struct StorageMemory<T, const SyclDevice> : StorageMemory<T, SyclDevice> {};

namespace TensorSycl {
namespace internal{
template <typename Evaluator, typename Op> class GenericNondeterministicReducer;
}
}
#endif


enum FFTResultType {
  RealPart = 0,
  ImagPart = 1,
  BothParts = 2
};

enum FFTDirection {
    FFT_FORWARD = 0,
    FFT_REVERSE = 1
};


namespace internal {

template <typename Device, typename Expression>
struct IsVectorizable {
  static const bool value = TensorEvaluator<Expression, Device>::PacketAccess;
};

template <typename Expression>
struct IsVectorizable<GpuDevice, Expression> {
  static const bool value = TensorEvaluator<Expression, GpuDevice>::PacketAccess &&
                            TensorEvaluator<Expression, GpuDevice>::IsAligned;
};

// Tiled evaluation strategy.
enum TiledEvaluation {
  Off = 0,    // tiled evaluation is not supported
  On = 1,     // still work in progress (see TensorBlock.h)
};

template <typename Device, typename Expression>
struct IsTileable {
  // Check that block evaluation is supported and it's a preferred option (at
  // least one sub-expression has much faster block evaluation, e.g.
  // broadcasting).
  static const bool BlockAccess =
      TensorEvaluator<Expression, Device>::BlockAccess &&
      TensorEvaluator<Expression, Device>::PreferBlockAccess;

  static const TiledEvaluation value =
      BlockAccess ? TiledEvaluation::On : TiledEvaluation::Off;
};

template <typename Expression, typename Device,
          bool Vectorizable      = IsVectorizable<Device, Expression>::value,
          TiledEvaluation Tiling = IsTileable<Device, Expression>::value>
class TensorExecutor;

template <typename Expression, typename Device, typename DoneCallback,
          bool Vectorizable = IsVectorizable<Device, Expression>::value,
          TiledEvaluation Tiling = IsTileable<Device, Expression>::value>
class TensorAsyncExecutor;


}  // end namespace internal

}  // end namespace Eigen

#endif // EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H