aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <ezhulenev@google.com>2018-08-01 11:59:04 -0700
committerGravatar Eugene Zhulenev <ezhulenev@google.com>2018-08-01 11:59:04 -0700
commit385b3ff12f1dd41a096908a0103873a768a8597d (patch)
tree3d465cbc2c37c43adbe1b01bba35bb47eb92f48b /unsupported/Eigen/CXX11
parent83c0a16baf5ecac6288cd9b74536a82de8985b31 (diff)
parent17221115c9f7e382c84c5d053f885470e904f4a4 (diff)
Merged latest changes from upstream/eigen
Diffstat (limited to 'unsupported/Eigen/CXX11')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/Tensor.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h13
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h107
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h7
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h122
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h16
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h6
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h9
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h8
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h14
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h24
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h14
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h8
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h8
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorMap.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h16
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorScan.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h8
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h2
-rw-r--r--unsupported/Eigen/CXX11/src/util/CXX11Meta.h74
35 files changed, 266 insertions, 240 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
index e3f6e37f0..aed71b265 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
@@ -112,7 +112,7 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
#if EIGEN_HAS_VARIADIC_TEMPLATES
template<typename... IndexTypes>
- EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const
{
// The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
index 199ddb123..f1f877c16 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
@@ -98,7 +98,7 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef typename TensorEvaluator<RightArgType, Device>::Dimensions Dimensions;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static const int NumDims = XprType::NumDims;
enum {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
index a851e7f55..b6dbe5a22 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
@@ -104,7 +104,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
bool isCopy= false, nByOne = false, oneByN = false;
enum {
@@ -306,7 +306,13 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
if (isCopy) {
+ #ifdef EIGEN_GPU_COMPILE_PHASE
+ // See PR 437: on NVIDIA P100 and K20m we observed a x3-4 speed up by enforcing
+ // unaligned loads here. The reason is unclear though.
+ return m_impl.template packet<Unaligned>(index);
+ #else
return m_impl.template packet<LoadMode>(index);
+ #endif
} else if (oneByN && !nByOne) {
return packetNByOne<LoadMode>(index);
} else if (!oneByN && nByOne) {
@@ -318,7 +324,12 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
}
} else {
if (isCopy) {
+ #ifdef EIGEN_GPU_COMPILE_PHASE
+ // See above.
+ return m_impl.template packet<Unaligned>(index);
+ #else
return m_impl.template packet<LoadMode>(index);
+ #endif
} else if (oneByN && !nByOne) {
return packetOneByN<LoadMode>(index);
} else if (!oneByN && nByOne) {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
index a0d039e64..7579ab507 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
@@ -138,7 +138,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum {
@@ -417,7 +417,7 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum {
IsAligned = false,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h
index 9f0321880..27c92d8f6 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h
@@ -251,7 +251,7 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
- const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
+ const int packetSize = PacketType<CoeffReturnType, Device>::size;
EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index + packetSize - 1 < dimensions().TotalSize());
@@ -354,7 +354,7 @@ template<typename Axis, typename LeftArgType, typename RightArgType, typename De
template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketReturnType& x)
{
- const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
+ const int packetSize = PacketType<CoeffReturnType, Device>::size;
EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index + packetSize - 1 < this->dimensions().TotalSize());
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
index 12cfa8df3..e1649fb47 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
@@ -177,9 +177,9 @@ struct NoOpOutputKernel {
*/
template <typename Index, typename Scalar>
EIGEN_ALWAYS_INLINE void operator()(
- const OutputKernel::OutputMapper<Index, Scalar>& output_mapper,
- const TensorContractionParams& params, Index i, Index j, Index num_rows,
- Index num_cols) const {}
+ const OutputKernel::OutputMapper<Index, Scalar>& /*output_mapper*/,
+ const TensorContractionParams& /*params*/, Index /*i*/,
+ Index /*j*/, Index /*num_rows*/, Index /*num_cols*/) const {}
};
template<typename Indices, typename LhsXprType, typename RhsXprType, typename OutputKernelType = const NoOpOutputKernel>
@@ -239,7 +239,7 @@ struct TensorContractionEvaluatorBase
enum {
IsAligned = true,
- PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1),
+ PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = false,
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
CoordAccess = false, // to be implemented
@@ -468,42 +468,58 @@ struct TensorContractionEvaluatorBase
}
}
- EIGEN_DEVICE_FUNC void evalTo(Scalar* buffer) const {
- if (this->m_lhs_inner_dim_contiguous) {
- if (this->m_rhs_inner_dim_contiguous) {
- if (this->m_rhs_inner_dim_reordered) {
- static_cast<const Derived*>(this)->template evalProduct<true, true, true, Unaligned>(buffer);
- }
- else {
- static_cast<const Derived*>(this)->template evalProduct<true, true, false, Unaligned>(buffer);
- }
- }
- else {
- if (this->m_rhs_inner_dim_reordered) {
- static_cast<const Derived*>(this)->template evalProduct<true, false, true, Unaligned>(buffer);
- }
- else {
- static_cast<const Derived*>(this)->template evalProduct<true, false, false, Unaligned>(buffer);
- }
- }
+#define TENSOR_CONTRACTION_DISPATCH(METHOD, ALIGNMENT, ARGS) \
+ if (this->m_lhs_inner_dim_contiguous) { \
+ if (this->m_rhs_inner_dim_contiguous) { \
+ if (this->m_rhs_inner_dim_reordered) { \
+ METHOD<true, true, true, ALIGNMENT>ARGS; \
+ } \
+ else { \
+ METHOD<true, true, false, ALIGNMENT>ARGS; \
+ } \
+ } \
+ else { \
+ if (this->m_rhs_inner_dim_reordered) { \
+ METHOD<true, false, true, ALIGNMENT>ARGS; \
+ } \
+ else { \
+ METHOD<true, false, false, ALIGNMENT>ARGS; \
+ } \
+ } \
+ } \
+ else { \
+ if (this->m_rhs_inner_dim_contiguous) { \
+ if (this->m_rhs_inner_dim_reordered) { \
+ METHOD<false, true, true, ALIGNMENT>ARGS; \
+ } \
+ else { \
+ METHOD<false, true, false, ALIGNMENT>ARGS; \
+ } \
+ } \
+ else { \
+ if (this->m_rhs_inner_dim_reordered) { \
+ METHOD<false, false, true, ALIGNMENT>ARGS; \
+ } \
+ else { \
+ METHOD<false, false, false, ALIGNMENT>ARGS; \
+ } \
+ } \
}
- else {
- if (this->m_rhs_inner_dim_contiguous) {
- if (this->m_rhs_inner_dim_reordered) {
- static_cast<const Derived*>(this)->template evalProduct<false, true, true, Unaligned>(buffer);
- }
- else {
- static_cast<const Derived*>(this)->template evalProduct<false, true, false, Unaligned>(buffer);
- }
- }
- else {
- if (this->m_rhs_inner_dim_reordered) {
- static_cast<const Derived*>(this)->template evalProduct<false, false, true, Unaligned>(buffer);
- }
- else {
- static_cast<const Derived*>(this)->template evalProduct<false, false, false, Unaligned>(buffer);
- }
- }
+
+ EIGEN_DEVICE_FUNC void evalTo(Scalar* buffer) const {
+ static_cast<const Derived*>(this)->template evalProduct<Unaligned>(buffer);
+ }
+
+ template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous,
+ bool rhs_inner_dim_reordered, int Alignment>
+ void evalProductSequential(Scalar* buffer) const {
+ if (this->m_j_size == 1) {
+ this->template evalGemv<lhs_inner_dim_contiguous,
+ rhs_inner_dim_contiguous, rhs_inner_dim_reordered,
+ Alignment>(buffer);
+ } else {
+ this->template evalGemm<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous,
+ rhs_inner_dim_reordered, Alignment>(buffer);
}
}
@@ -624,7 +640,7 @@ struct TensorContractionEvaluatorBase
OutputMapper output(buffer, m);
// Sizes of the blocks to load in cache. See the Goto paper for details.
- internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index, internal::ShardByCol> blocking(k, m, n, 1);
+ internal::TensorContractionBlocking<LhsScalar, RhsScalar, Index, internal::ShardByCol> blocking(k, m, n, 1);
const Index kc = blocking.kc();
const Index mc = numext::mini(m, blocking.mc());
const Index nc = numext::mini(n, blocking.nc());
@@ -977,14 +993,9 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) :
Base(op, device) { }
- template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>
- EIGEN_DEVICE_FUNC void evalProduct(Scalar* buffer) const {
- if (this->m_j_size == 1) {
- this->template evalGemv<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer);
- return;
- }
-
- this->template evalGemm<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer);
+ template <int Alignment>
+ void evalProduct(Scalar* buffer) const {
+ TENSOR_CONTRACTION_DISPATCH(this->template evalProductSequential, Alignment, (buffer));
}
};
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h
index 8c1af1da8..cf281192c 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h
@@ -21,13 +21,10 @@ enum {
// Default Blocking Strategy
-template <typename LhsMapper, typename RhsMapper, typename Index, int ShardingType=ShardByCol>
+template <typename LhsScalar, typename RhsScalar, typename Index, int ShardingType=ShardByCol>
class TensorContractionBlocking {
public:
- typedef typename LhsMapper::Scalar LhsScalar;
- typedef typename RhsMapper::Scalar RhsScalar;
-
/*
adding EIGEN_DEVICE_FUNC unconditionally to 'TensorContractionBlocking' constructor in `TensorContractionBlocking.h`
requires adding EIGEN_DEVICE_FUNC to `computeProductBlockingSizes` in `GeneralBlockPanelKernel.h`
@@ -41,7 +38,7 @@ class TensorContractionBlocking {
../Eigen/src/Core/products/GeneralBlockPanelKernel.h(57): error #2901:
dynamic initialization is not supported for function-scope static variables within a __device__/__global__ function
*/
-
+
#if !defined(EIGEN_HIPCC)
EIGEN_DEVICE_FUNC
#endif
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
index 8b86d7aaf..1d145c4b1 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
@@ -71,8 +71,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
TensorEvaluator(const XprType& op, const Device& device) :
Base(op, device) {}
- template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous,
- bool rhs_inner_dim_reordered, int Alignment>
+ template <int Alignment>
void evalProduct(Scalar* buffer) const {
const Index m = this->m_i_size;
const Index n = this->m_j_size;
@@ -96,39 +95,6 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
}
#endif
- typedef
- typename internal::remove_const<typename EvalLeftArgType::Scalar>::type
- LhsScalar;
- typedef
- typename internal::remove_const<typename EvalRightArgType::Scalar>::type
- RhsScalar;
- typedef typename internal::gebp_traits<LhsScalar, RhsScalar> Traits;
- typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator;
- typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator;
- typedef internal::TensorContractionInputMapper<
- LhsScalar, Index, internal::Lhs, LeftEvaluator, left_nocontract_t,
- contract_t, internal::packet_traits<LhsScalar>::size,
- lhs_inner_dim_contiguous, false, Unaligned>
- LhsMapper;
- typedef internal::TensorContractionInputMapper<
- RhsScalar, Index, internal::Rhs, RightEvaluator, right_nocontract_t,
- contract_t, internal::packet_traits<RhsScalar>::size,
- rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Unaligned>
- RhsMapper;
- typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper;
- typedef internal::gemm_pack_lhs<LhsScalar, Index,
- typename LhsMapper::SubMapper, Traits::mr,
- Traits::LhsProgress, ColMajor>
- LhsPacker;
- typedef internal::gemm_pack_rhs<
- RhsScalar, Index, typename RhsMapper::SubMapper, Traits::nr, ColMajor>
- RhsPacker;
- typedef internal::gebp_kernel<LhsScalar, RhsScalar, Index, OutputMapper,
- Traits::mr, Traits::nr, false, false>
- GebpKernel;
-
-
-
// Compute a set of algorithm parameters:
// - kernel block sizes (bm, bn, bk)
// - task grain sizes (number of kernels executed per task: gm, gn)
@@ -158,14 +124,14 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
// Again, we don't know number of threads yet, so we use 2.
Index bm, bn, bk;
if (shard_by_col) {
- internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index,
+ internal::TensorContractionBlocking<LhsScalar, RhsScalar, Index,
internal::ShardByCol>
blocking(k, m, n, 2);
bm = blocking.mc();
bn = blocking.nc();
bk = blocking.kc();
} else {
- internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index,
+ internal::TensorContractionBlocking<LhsScalar, RhsScalar, Index,
internal::ShardByRow>
blocking(k, m, n, 2);
bm = blocking.mc();
@@ -187,29 +153,22 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
if (n == 1) num_threads = 1;
if (num_threads == 1) {
- // The single-threaded algorithm should be faster in this case.
- if (n == 1)
- this->template evalGemv<lhs_inner_dim_contiguous,
- rhs_inner_dim_contiguous,
- rhs_inner_dim_reordered, Alignment>(buffer);
- else
- this->template evalGemm<lhs_inner_dim_contiguous,
- rhs_inner_dim_contiguous,
- rhs_inner_dim_reordered, Alignment>(buffer);
+ TENSOR_CONTRACTION_DISPATCH(this->template evalProductSequential,
+ Unaligned, (buffer));
return;
}
// Now that we know number of threads, recalculate sharding and blocking.
shard_by_col = shardByCol(m, n, num_threads);
if (shard_by_col) {
- internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index,
+ internal::TensorContractionBlocking<LhsScalar, RhsScalar, Index,
internal::ShardByCol>
blocking(k, m, n, num_threads);
bm = blocking.mc();
bn = blocking.nc();
bk = blocking.kc();
} else {
- internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index,
+ internal::TensorContractionBlocking<LhsScalar, RhsScalar, Index,
internal::ShardByRow>
blocking(k, m, n, num_threads);
bm = blocking.mc();
@@ -257,34 +216,55 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
// more important in this case.
if ((shard_by_col ? nm : nn) == 1) parallel_pack = false;
- LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides,
- this->m_i_strides, this->m_left_contracting_strides,
- this->m_k_strides);
+ #define CONTEXT_ARGS \
+ (this, num_threads, buffer, m, n, k, bm, bn, bk, nm, nn, nk, gm, gn, nm0, \
+ nn0, shard_by_col, parallel_pack) \
+ .run()
+
+ TENSOR_CONTRACTION_DISPATCH(Context, Alignment, CONTEXT_ARGS);
- RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides,
- this->m_j_strides, this->m_right_contracting_strides,
- this->m_k_strides);
+#undef CONTEXT_ARGS
- Context<LhsPacker, RhsPacker, GebpKernel, LhsMapper, RhsMapper,
- OutputMapper>(this, num_threads, lhs, rhs, buffer, m, n,
- k, bm, bn, bk, nm, nn, nk, gm, gn, nm0, nn0,
- shard_by_col, parallel_pack)
- .run();
}
// Context coordinates a single parallel gemm operation.
- template <typename LhsPacker, typename RhsPacker, typename GebpKernel,
- typename LhsMapper, typename RhsMapper, typename OutputMapper>
+ template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous,
+ bool rhs_inner_dim_reordered, int Alignment>
class Context {
public:
- Context(const Self* self, int num_threads, LhsMapper& lhs,
- RhsMapper& rhs, Scalar* buffer, Index tm, Index tn, Index tk, Index bm,
- Index bn, Index bk, Index nm, Index nn, Index nk, Index gm,
- Index gn, Index nm0, Index nn0, bool shard_by_col,
+ typedef internal::TensorContractionInputMapper<
+ LhsScalar, Index, internal::Lhs, LeftEvaluator, left_nocontract_t,
+ contract_t, internal::packet_traits<LhsScalar>::size,
+ lhs_inner_dim_contiguous, false, Unaligned>
+ LhsMapper;
+ typedef internal::TensorContractionInputMapper<
+ RhsScalar, Index, internal::Rhs, RightEvaluator, right_nocontract_t,
+ contract_t, internal::packet_traits<RhsScalar>::size,
+ rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Unaligned>
+ RhsMapper;
+ typedef internal::gemm_pack_lhs<LhsScalar, Index,
+ typename LhsMapper::SubMapper, Traits::mr,
+ Traits::LhsProgress, ColMajor>
+ LhsPacker;
+ typedef internal::gemm_pack_rhs<
+ RhsScalar, Index, typename RhsMapper::SubMapper, Traits::nr, ColMajor>
+ RhsPacker;
+ typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper;
+ typedef internal::gebp_kernel<LhsScalar, RhsScalar, Index, OutputMapper,
+ Traits::mr, Traits::nr, false, false>
+ GebpKernel;
+
+ Context(const Self* self, int num_threads, Scalar* buffer, Index tm, Index tn,
+ Index tk, Index bm, Index bn, Index bk, Index nm, Index nn, Index nk,
+ Index gm, Index gn, Index nm0, Index nn0, bool shard_by_col,
bool parallel_pack)
: device_(self->m_device),
- lhs_(lhs),
- rhs_(rhs),
+ lhs_(self->m_leftImpl, self->m_left_nocontract_strides,
+ self->m_i_strides, self->m_left_contracting_strides,
+ self->m_k_strides),
+ rhs_(self->m_rightImpl, self->m_right_nocontract_strides,
+ self->m_j_strides, self->m_right_contracting_strides,
+ self->m_k_strides),
buffer_(buffer),
output_(buffer, tm),
output_kernel_(self->m_output_kernel),
@@ -337,7 +317,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
divup<size_t>(bm_ * bk_ * sizeof(LhsScalar), align) * align;
size_t rhs_size =
divup<size_t>(bn_ * bk_ * sizeof(RhsScalar), align) * align;
- packed_mem_ = static_cast<char*>(internal::aligned_malloc(
+ packed_mem_ = static_cast<char*>(device_.allocate(
(nm0_ * lhs_size + nn0_ * rhs_size) * std::min<size_t>(nk_, P - 1)));
char* mem = static_cast<char*>(packed_mem_);
for (Index x = 0; x < numext::mini<Index>(nk_, P - 1); x++) {
@@ -359,7 +339,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
for (Index m = 0; m < nm_; m++) delete[] state_kernel_[x][m];
delete[] state_kernel_[x];
}
- internal::aligned_free(packed_mem_);
+ device_.deallocate(packed_mem_);
}
void run() {
@@ -376,8 +356,8 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
private:
Notification done_;
const Device& device_;
- LhsMapper& lhs_;
- RhsMapper& rhs_;
+ LhsMapper lhs_;
+ RhsMapper rhs_;
Scalar* const buffer_;
OutputMapper output_;
OutputKernelType output_kernel_;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
index e0cbbb315..a7751eee1 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
@@ -190,7 +190,7 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
typedef typename internal::remove_all<typename internal::traits<ArgType>::Scalar>::type SrcType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef typename PacketType<SrcType, Device>::type PacketSourceType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum {
IsAligned = false,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
index 1ec5819a7..0d3ca966c 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
@@ -302,7 +302,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum {
IsAligned = TensorEvaluator<InputArgType, Device>::IsAligned & TensorEvaluator<KernelArgType, Device>::IsAligned,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
index 0e4db46de..47b5a5a5e 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
@@ -87,11 +87,11 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
typedef typename internal::remove_const<typename ArgType::Scalar>::type Scalar;
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum {
IsAligned = false,
- PacketAccess = (internal::packet_traits<Scalar>::size > 1),
+ PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = false,
Layout = TensorEvaluator<XprType, Device>::Layout,
CoordAccess = false, // to be implemented
@@ -112,7 +112,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
return false;
} else {
m_result = static_cast<CoeffReturnType*>(
- m_device.allocate(dimensions().TotalSize() * sizeof(Scalar)));
+ m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar)));
evalTo(m_result);
return true;
}
@@ -120,7 +120,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
if (m_result != NULL) {
- m_device.deallocate(m_result);
+ m_device.deallocate_temp(m_result);
m_result = NULL;
}
}
@@ -249,11 +249,11 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
typedef typename XprType::Scalar Scalar;
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum {
IsAligned = false,
- PacketAccess = (internal::packet_traits<Scalar>::size > 1),
+ PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = false,
Layout = TensorEvaluator<LhsXprType, Device>::Layout,
CoordAccess = false, // to be implemented
@@ -273,7 +273,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
evalTo(data);
return false;
} else {
- m_result = static_cast<Scalar *>(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar)));
+ m_result = static_cast<Scalar *>(m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar)));
evalTo(m_result);
return true;
}
@@ -281,7 +281,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
if (m_result != NULL) {
- m_device.deallocate(m_result);
+ m_device.deallocate_temp(m_result);
m_result = NULL;
}
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
index 5c1c68912..8cb95f731 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
@@ -21,6 +21,12 @@ struct DefaultDevice {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
internal::aligned_free(buffer);
}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate_temp(size_t num_bytes) const {
+ return allocate(num_bytes);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate_temp(void* buffer) const {
+ deallocate(buffer);
+ }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
::memcpy(dst, src, n);
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h
index 0c036833f..b490433db 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h
@@ -207,6 +207,15 @@ struct GpuDevice {
stream_->deallocate(buffer);
}
+ EIGEN_STRONG_INLINE void* allocate_temp(size_t num_bytes) const {
+ return stream_->allocate(num_bytes);
+ }
+
+ EIGEN_STRONG_INLINE void deallocate_temp(void* buffer) const {
+ stream_->deallocate(buffer);
+ }
+
+
EIGEN_STRONG_INLINE void* scratchpad() const {
return stream_->scratchpad();
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
index 90fd99027..5a16ebe50 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@@ -105,6 +105,14 @@ struct ThreadPoolDevice {
internal::aligned_free(buffer);
}
+ EIGEN_STRONG_INLINE void* allocate_temp(size_t num_bytes) const {
+ return allocate(num_bytes);
+ }
+
+ EIGEN_STRONG_INLINE void deallocate_temp(void* buffer) const {
+ deallocate(buffer);
+ }
+
EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
::memcpy(dst, src, n);
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
index 5ca47cca7..4f973a5b7 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
@@ -41,7 +41,7 @@ template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor>
struct fixed_size_tensor_index_linearization_helper
{
template <typename Dimensions> EIGEN_DEVICE_FUNC
- static inline Index run(array<Index, NumIndices> const& indices,
+ static EIGEN_STRONG_INLINE Index run(array<Index, NumIndices> const& indices,
const Dimensions& dimensions)
{
return array_get<RowMajor ? n - 1 : (NumIndices - n)>(indices) +
@@ -54,7 +54,7 @@ template<typename Index, std::size_t NumIndices, bool RowMajor>
struct fixed_size_tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor>
{
template <typename Dimensions> EIGEN_DEVICE_FUNC
- static inline Index run(array<Index, NumIndices> const&, const Dimensions&)
+ static EIGEN_STRONG_INLINE Index run(array<Index, NumIndices> const&, const Dimensions&)
{
return 0;
}
@@ -64,7 +64,7 @@ template<typename Index, std::size_t n>
struct fixed_size_tensor_index_extraction_helper
{
template <typename Dimensions> EIGEN_DEVICE_FUNC
- static inline Index run(const Index index,
+ static EIGEN_STRONG_INLINE Index run(const Index index,
const Dimensions& dimensions)
{
const Index mult = (index == n-1) ? 1 : 0;
@@ -77,7 +77,7 @@ template<typename Index>
struct fixed_size_tensor_index_extraction_helper<Index, 0>
{
template <typename Dimensions> EIGEN_DEVICE_FUNC
- static inline Index run(const Index,
+ static EIGEN_STRONG_INLINE Index run(const Index,
const Dimensions&)
{
return 0;
@@ -421,20 +421,20 @@ template <std::size_t n, std::size_t V1, std::size_t V2, std::size_t V3, std::si
template <typename Dims1, typename Dims2, size_t n, size_t m>
struct sizes_match_below_dim {
- static EIGEN_DEVICE_FUNC inline bool run(Dims1&, Dims2&) {
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1&, Dims2&) {
return false;
}
};
template <typename Dims1, typename Dims2, size_t n>
struct sizes_match_below_dim<Dims1, Dims2, n, n> {
- static EIGEN_DEVICE_FUNC inline bool run(Dims1& dims1, Dims2& dims2) {
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1& dims1, Dims2& dims2) {
return (array_get<n-1>(dims1) == array_get<n-1>(dims2)) &
sizes_match_below_dim<Dims1, Dims2, n-1, n-1>::run(dims1, dims2);
}
};
template <typename Dims1, typename Dims2>
struct sizes_match_below_dim<Dims1, Dims2, 0, 0> {
- static EIGEN_DEVICE_FUNC inline bool run(Dims1&, Dims2&) {
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1&, Dims2&) {
return true;
}
};
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
index af39daa91..256d499f2 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
@@ -102,7 +102,7 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
typedef typename XprType::Index Index;
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum {
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index f9a1bd68c..8f7a81575 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -33,6 +33,7 @@ struct TensorEvaluator
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef typename Derived::Dimensions Dimensions;
typedef Derived XprType;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
// NumDimensions is -1 for variable dim tensors
static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
@@ -40,7 +41,7 @@ struct TensorEvaluator
enum {
IsAligned = Derived::IsAligned,
- PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1),
+ PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value,
Layout = Derived::Layout,
CoordAccess = NumCoords > 0,
@@ -121,7 +122,7 @@ struct TensorEvaluator
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
- internal::unpacket_traits<PacketReturnType>::size);
+ PacketType<CoeffReturnType, Device>::size);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
@@ -188,10 +189,11 @@ struct TensorEvaluator<const Derived, Device>
// NumDimensions is -1 for variable dim tensors
static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
internal::traits<Derived>::NumDimensions : 0;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum {
IsAligned = Derived::IsAligned,
- PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1),
+ PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value,
Layout = Derived::Layout,
CoordAccess = NumCoords > 0,
@@ -249,7 +251,7 @@ struct TensorEvaluator<const Derived, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
- internal::unpacket_traits<PacketReturnType>::size);
+ PacketType<CoeffReturnType, Device>::size);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
@@ -300,7 +302,7 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
typedef typename XprType::Scalar Scalar;
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
@@ -322,7 +324,7 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
costPerCoeff(bool vectorized) const {
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
- internal::unpacket_traits<PacketReturnType>::size);
+ PacketType<CoeffReturnType, Device>::size);
}
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; }
@@ -367,7 +369,7 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
typedef typename XprType::Scalar Scalar;
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
@@ -445,7 +447,7 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
typedef typename XprType::Scalar Scalar;
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions;
static const int NumDims = internal::array_size<
@@ -574,7 +576,7 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
typedef typename XprType::Scalar Scalar;
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions;
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
@@ -644,7 +646,7 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
enum {
IsAligned = TensorEvaluator<ThenArgType, Device>::IsAligned & TensorEvaluator<ElseArgType, Device>::IsAligned,
PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess & TensorEvaluator<ElseArgType, Device>::PacketAccess &
- internal::packet_traits<Scalar>::HasBlend,
+ PacketType<Scalar, Device>::HasBlend,
BlockAccess = false,
Layout = TensorEvaluator<IfArgType, Device>::Layout,
CoordAccess = false, // to be implemented
@@ -665,7 +667,7 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
typedef typename XprType::Index Index;
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions;
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index ac5afd891..17008917a 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -39,7 +39,7 @@ class TensorExecutor {
using StorageIndex = typename Expression::Index;
EIGEN_DEVICE_FUNC
- static inline void run(const Expression& expr,
+ static EIGEN_STRONG_INLINE void run(const Expression& expr,
const Device& device = Device()) {
TensorEvaluator<Expression, Device> evaluator(expr, device);
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
@@ -63,7 +63,7 @@ class TensorExecutor<Expression, DefaultDevice, /*Vectorizable*/ true,
using StorageIndex = typename Expression::Index;
EIGEN_DEVICE_FUNC
- static inline void run(const Expression& expr,
+ static EIGEN_STRONG_INLINE void run(const Expression& expr,
const DefaultDevice& device = DefaultDevice()) {
TensorEvaluator<Expression, DefaultDevice> evaluator(expr, device);
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
@@ -111,7 +111,7 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
static const int NumDims = traits<Expression>::NumDimensions;
EIGEN_DEVICE_FUNC
- static inline void run(const Expression& expr,
+ static EIGEN_STRONG_INLINE void run(const Expression& expr,
const DefaultDevice& device = DefaultDevice()) {
using TensorBlock =
TensorBlock<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout>;
@@ -223,7 +223,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, Tileable> {
public:
using StorageIndex = typename Expression::Index;
- static inline void run(const Expression& expr,
+ static EIGEN_STRONG_INLINE void run(const Expression& expr,
const ThreadPoolDevice& device) {
typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
typedef EvalRange<Evaluator, StorageIndex, Vectorizable> EvalRange;
@@ -257,7 +257,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr
static const int NumDims = traits<Expression>::NumDimensions;
- static inline void run(const Expression& expr,
+ static EIGEN_STRONG_INLINE void run(const Expression& expr,
const ThreadPoolDevice& device) {
using TensorBlock =
TensorBlock<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout>;
@@ -376,7 +376,7 @@ EigenMetaKernel(Evaluator eval, StorageIndex size) {
/*static*/
template <typename Expression, bool Vectorizable, bool Tileable>
-inline void TensorExecutor<Expression, GpuDevice, Vectorizable, Tileable>::run(
+EIGEN_STRONG_INLINE void TensorExecutor<Expression, GpuDevice, Vectorizable, Tileable>::run(
const Expression& expr, const GpuDevice& device) {
TensorEvaluator<Expression, GpuDevice> evaluator(expr, device);
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
@@ -405,7 +405,7 @@ inline void TensorExecutor<Expression, GpuDevice, Vectorizable, Tileable>::run(
template <typename Expression, bool Vectorizable>
class TensorExecutor<Expression, SyclDevice, Vectorizable> {
public:
- static inline void run(const Expression &expr, const SyclDevice &device) {
+ static EIGEN_STRONG_INLINE void run(const Expression &expr, const SyclDevice &device) {
// call TensorSYCL module
TensorSycl::run(expr, device);
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
index fdb31928f..a456f308b 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
@@ -93,11 +93,11 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
typedef typename XprType::Index Index;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum {
IsAligned = true,
- PacketAccess = (PacketSize > 1),
+ PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout,
RawAccess = true
@@ -115,7 +115,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
#endif
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
const Index numValues = internal::array_prod(m_impl.dimensions());
- m_buffer = (CoeffReturnType*)m_device.allocate(numValues * sizeof(CoeffReturnType));
+ m_buffer = (CoeffReturnType*)m_device.allocate_temp(numValues * sizeof(CoeffReturnType));
// Should initialize the memory in case we're dealing with non POD types.
if (NumTraits<CoeffReturnType>::RequireInitialization) {
for (Index i = 0; i < numValues; ++i) {
@@ -129,7 +129,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
return true;
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_device.deallocate(m_buffer);
+ m_device.deallocate_temp(m_buffer);
m_buffer = NULL;
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
index 3d0e4035a..7ecd4d1ac 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
@@ -20,7 +20,7 @@ namespace internal {
template <typename Scalar>
struct scalar_mod_op {
EIGEN_DEVICE_FUNC scalar_mod_op(const Scalar& divisor) : m_divisor(divisor) {}
- EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a % m_divisor; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a % m_divisor; }
const Scalar m_divisor;
};
template <typename Scalar>
@@ -34,7 +34,7 @@ struct functor_traits<scalar_mod_op<Scalar> >
template <typename Scalar>
struct scalar_mod2_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_mod2_op)
- EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a, const Scalar& b) const { return a % b; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a, const Scalar& b) const { return a % b; }
};
template <typename Scalar>
struct functor_traits<scalar_mod2_op<Scalar> >
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
index fa269b8c6..97c8d4a02 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
@@ -90,7 +90,7 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
enum {
IsAligned = false,
- PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1),
+ PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
@@ -137,7 +137,7 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
- const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
+ const int packetSize = PacketType<CoeffReturnType, Device>::size;
EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+packetSize-1 < dimensions().TotalSize());
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
index 4987b898b..39759b6c3 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
@@ -241,7 +241,7 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
typedef TensorEvaluator<ArgType, Device> Impl;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum {
IsAligned = false,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h
index 8810d78cf..98ad661ca 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h
@@ -75,10 +75,10 @@ template<DenseIndex n> struct NumTraits<type2index<n> >
MulCost = 1
};
- EIGEN_DEVICE_FUNC static inline Real epsilon() { return 0; }
- EIGEN_DEVICE_FUNC static inline Real dummy_precision() { return 0; }
- EIGEN_DEVICE_FUNC static inline Real highest() { return n; }
- EIGEN_DEVICE_FUNC static inline Real lowest() { return n; }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Real epsilon() { return 0; }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Real dummy_precision() { return 0; }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Real highest() { return n; }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Real lowest() { return n; }
};
namespace internal {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h b/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h
index 6147fbdf1..64f2ad81f 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h
@@ -85,7 +85,7 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum {
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
index a8e55757e..d1cc0593f 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
@@ -150,6 +150,7 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const
{
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
+ eigen_assert(internal::all((Eigen::NumTraits<Index>::highest() >= otherIndices)...));
if (PlainObjectType::Options&RowMajor) {
const Index index = m_dimensions.IndexOfRowMajor(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
return m_data[index];
@@ -237,6 +238,7 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices)
{
static_assert(sizeof...(otherIndices) + 2 == NumIndices || NumIndices == Dynamic, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor.");
+ eigen_assert(internal::all((Eigen::NumTraits<Index>::highest() >= otherIndices)...));
const std::size_t NumDims = sizeof...(otherIndices) + 2;
if (PlainObjectType::Options&RowMajor) {
const Index index = m_dimensions.IndexOfRowMajor(array<Index, NumDims>{{firstIndex, secondIndex, otherIndices...}});
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
index 3747bff9e..2630311b8 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
@@ -617,7 +617,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
- const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
+ const int packetSize = PacketType<CoeffReturnType, Device>::size;
EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+packetSize-1 < internal::array_prod(dimensions()));
@@ -814,7 +814,7 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
return;
}
- const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
+ const int packetSize = PacketType<CoeffReturnType, Device>::size;
Index inputIndices[] = {0, 0};
Index indices[] = {index, index + packetSize - 1};
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
index ffa22f31e..aa1db3c73 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
@@ -91,7 +91,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum {
IsAligned = true,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h
index 950ac32af..a0a1ad8f4 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h
@@ -88,7 +88,7 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index 05c0990dc..c41783106 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -472,7 +472,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
static const bool InputPacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess;
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum {
IsAligned = false,
@@ -596,7 +596,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
!RunningOnGPU))) {
bool need_assign = false;
if (!data) {
- m_result = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType)));
+ m_result = static_cast<CoeffReturnType*>(m_device.allocate_temp(sizeof(CoeffReturnType)));
data = m_result;
need_assign = true;
}
@@ -608,7 +608,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
if (!data) {
- data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
+ data = static_cast<CoeffReturnType*>(m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
m_result = data;
}
Op reducer(m_reducer);
@@ -632,7 +632,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
if (!data) {
if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 128) {
- data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
+ data = static_cast<CoeffReturnType*>(m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
m_result = data;
}
else {
@@ -642,7 +642,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
Op reducer(m_reducer);
if (internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) {
if (m_result) {
- m_device.deallocate(m_result);
+ m_device.deallocate_temp(m_result);
m_result = NULL;
}
return true;
@@ -665,7 +665,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
if (!data) {
if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 32) {
- data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
+ data = static_cast<CoeffReturnType*>(m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
m_result = data;
}
else {
@@ -675,7 +675,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
Op reducer(m_reducer);
if (internal::OuterReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) {
if (m_result) {
- m_device.deallocate(m_result);
+ m_device.deallocate_temp(m_result);
m_result = NULL;
}
return true;
@@ -690,7 +690,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
m_impl.cleanup();
if (m_result) {
- m_device.deallocate(m_result);
+ m_device.deallocate_temp(m_result);
m_result = NULL;
}
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
index bb2768ab1..9193bdd8e 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
@@ -108,7 +108,7 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum {
IsAligned = false,
@@ -266,7 +266,7 @@ struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions() const { return this->m_dimensions; }
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
index 39717efaa..b1135f297 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
@@ -95,7 +95,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
enum {
IsAligned = false,
- PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1),
+ PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
index 3add9dac0..77f47bf64 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
@@ -108,11 +108,11 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum {
IsAligned = false,
- PacketAccess = (internal::packet_traits<Scalar>::size > 1),
+ PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
@@ -405,11 +405,11 @@ struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device>
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum {
IsAligned = false,
- PacketAccess = (internal::packet_traits<Scalar>::size > 1),
+ PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
Layout = TensorEvaluator<ArgType, Device>::Layout,
RawAccess = false
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h
index c09513c10..4b69072f2 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h
@@ -107,7 +107,7 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum {
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
@@ -287,7 +287,7 @@ struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
{
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h
index ef199bfb6..3c7d8bbc0 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h
@@ -194,7 +194,7 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum {
IsAligned = false,
diff --git a/unsupported/Eigen/CXX11/src/util/CXX11Meta.h b/unsupported/Eigen/CXX11/src/util/CXX11Meta.h
index 8de3bbcab..6c95d0a6c 100644
--- a/unsupported/Eigen/CXX11/src/util/CXX11Meta.h
+++ b/unsupported/Eigen/CXX11/src/util/CXX11Meta.h
@@ -104,9 +104,9 @@ template<> struct h_skip_helper_type<0>
template<int n>
struct h_skip {
template<typename T, T... ii>
- constexpr static inline typename h_skip_helper_numeric<T, n, ii...>::type helper(numeric_list<T, ii...>) { return typename h_skip_helper_numeric<T, n, ii...>::type(); }
+ constexpr static EIGEN_STRONG_INLINE typename h_skip_helper_numeric<T, n, ii...>::type helper(numeric_list<T, ii...>) { return typename h_skip_helper_numeric<T, n, ii...>::type(); }
template<typename... tt>
- constexpr static inline typename h_skip_helper_type<n, tt...>::type helper(type_list<tt...>) { return typename h_skip_helper_type<n, tt...>::type(); }
+ constexpr static EIGEN_STRONG_INLINE typename h_skip_helper_type<n, tt...>::type helper(type_list<tt...>) { return typename h_skip_helper_type<n, tt...>::type(); }
};
template<int n, typename a> struct skip { typedef decltype(h_skip<n>::helper(a())) type; };
@@ -268,7 +268,7 @@ template<
typename Reducer
> struct reduce<Reducer>
{
- EIGEN_DEVICE_FUNC constexpr static inline int run() { return Reducer::Identity; }
+ EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE int run() { return Reducer::Identity; }
};
template<
@@ -276,7 +276,7 @@ template<
typename A
> struct reduce<Reducer, A>
{
- EIGEN_DEVICE_FUNC constexpr static inline A run(A a) { return a; }
+ EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE A run(A a) { return a; }
};
template<
@@ -285,7 +285,7 @@ template<
typename... Ts
> struct reduce<Reducer, A, Ts...>
{
- EIGEN_DEVICE_FUNC constexpr static inline auto run(A a, Ts... ts) -> decltype(Reducer::run(a, reduce<Reducer, Ts...>::run(ts...))) {
+ EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE auto run(A a, Ts... ts) -> decltype(Reducer::run(a, reduce<Reducer, Ts...>::run(ts...))) {
return Reducer::run(a, reduce<Reducer, Ts...>::run(ts...));
}
};
@@ -293,29 +293,29 @@ template<
/* generic binary operations */
struct sum_op {
- template<typename A, typename B> EIGEN_DEVICE_FUNC constexpr static inline auto run(A a, B b) -> decltype(a + b) { return a + b; }
+ template<typename A, typename B> EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a + b) { return a + b; }
static constexpr int Identity = 0;
};
struct product_op {
- template<typename A, typename B> EIGEN_DEVICE_FUNC constexpr static inline auto run(A a, B b) -> decltype(a * b) { return a * b; }
+ template<typename A, typename B> EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a * b) { return a * b; }
static constexpr int Identity = 1;
};
-struct logical_and_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a && b) { return a && b; } };
-struct logical_or_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a || b) { return a || b; } };
+struct logical_and_op { template<typename A, typename B> constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a && b) { return a && b; } };
+struct logical_or_op { template<typename A, typename B> constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a || b) { return a || b; } };
-struct equal_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a == b) { return a == b; } };
-struct not_equal_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a != b) { return a != b; } };
-struct lesser_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a < b) { return a < b; } };
-struct lesser_equal_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a <= b) { return a <= b; } };
-struct greater_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a > b) { return a > b; } };
-struct greater_equal_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a >= b) { return a >= b; } };
+struct equal_op { template<typename A, typename B> constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a == b) { return a == b; } };
+struct not_equal_op { template<typename A, typename B> constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a != b) { return a != b; } };
+struct lesser_op { template<typename A, typename B> constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a < b) { return a < b; } };
+struct lesser_equal_op { template<typename A, typename B> constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a <= b) { return a <= b; } };
+struct greater_op { template<typename A, typename B> constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a > b) { return a > b; } };
+struct greater_equal_op { template<typename A, typename B> constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a >= b) { return a >= b; } };
/* generic unary operations */
-struct not_op { template<typename A> constexpr static inline auto run(A a) -> decltype(!a) { return !a; } };
-struct negation_op { template<typename A> constexpr static inline auto run(A a) -> decltype(-a) { return -a; } };
-struct greater_equal_zero_op { template<typename A> constexpr static inline auto run(A a) -> decltype(a >= 0) { return a >= 0; } };
+struct not_op { template<typename A> constexpr static EIGEN_STRONG_INLINE auto run(A a) -> decltype(!a) { return !a; } };
+struct negation_op { template<typename A> constexpr static EIGEN_STRONG_INLINE auto run(A a) -> decltype(-a) { return -a; } };
+struct greater_equal_zero_op { template<typename A> constexpr static EIGEN_STRONG_INLINE auto run(A a) -> decltype(a >= 0) { return a >= 0; } };
/* reductions for lists */
@@ -324,13 +324,13 @@ struct greater_equal_zero_op { template<typename A> constexpr static inline auto
// together in front... (13.0 doesn't work with array_prod/array_reduce/... anyway, but 13.1
// does...
template<typename... Ts>
-EIGEN_DEVICE_FUNC constexpr inline decltype(reduce<product_op, Ts...>::run((*((Ts*)0))...)) arg_prod(Ts... ts)
+EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE decltype(reduce<product_op, Ts...>::run((*((Ts*)0))...)) arg_prod(Ts... ts)
{
return reduce<product_op, Ts...>::run(ts...);
}
template<typename... Ts>
-constexpr inline decltype(reduce<sum_op, Ts...>::run((*((Ts*)0))...)) arg_sum(Ts... ts)
+constexpr EIGEN_STRONG_INLINE decltype(reduce<sum_op, Ts...>::run((*((Ts*)0))...)) arg_sum(Ts... ts)
{
return reduce<sum_op, Ts...>::run(ts...);
}
@@ -338,13 +338,13 @@ constexpr inline decltype(reduce<sum_op, Ts...>::run((*((Ts*)0))...)) arg_sum(Ts
/* reverse arrays */
template<typename Array, int... n>
-constexpr inline Array h_array_reverse(Array arr, numeric_list<int, n...>)
+constexpr EIGEN_STRONG_INLINE Array h_array_reverse(Array arr, numeric_list<int, n...>)
{
return {{array_get<sizeof...(n) - n - 1>(arr)...}};
}
template<typename T, std::size_t N>
-constexpr inline array<T, N> array_reverse(array<T, N> arr)
+constexpr EIGEN_STRONG_INLINE array<T, N> array_reverse(array<T, N> arr)
{
return h_array_reverse(arr, typename gen_numeric_list<int, N>::type());
}
@@ -359,7 +359,7 @@ constexpr inline array<T, N> array_reverse(array<T, N> arr)
// an infinite loop)
template<typename Reducer, typename T, std::size_t N, std::size_t n = N - 1>
struct h_array_reduce {
- EIGEN_DEVICE_FUNC constexpr static inline auto run(array<T, N> arr, T identity) -> decltype(Reducer::run(h_array_reduce<Reducer, T, N, n - 1>::run(arr, identity), array_get<n>(arr)))
+ EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE auto run(array<T, N> arr, T identity) -> decltype(Reducer::run(h_array_reduce<Reducer, T, N, n - 1>::run(arr, identity), array_get<n>(arr)))
{
return Reducer::run(h_array_reduce<Reducer, T, N, n - 1>::run(arr, identity), array_get<n>(arr));
}
@@ -368,7 +368,7 @@ struct h_array_reduce {
template<typename Reducer, typename T, std::size_t N>
struct h_array_reduce<Reducer, T, N, 0>
{
- EIGEN_DEVICE_FUNC constexpr static inline T run(const array<T, N>& arr, T)
+ EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE T run(const array<T, N>& arr, T)
{
return array_get<0>(arr);
}
@@ -377,14 +377,14 @@ struct h_array_reduce<Reducer, T, N, 0>
template<typename Reducer, typename T>
struct h_array_reduce<Reducer, T, 0>
{
- EIGEN_DEVICE_FUNC constexpr static inline T run(const array<T, 0>&, T identity)
+ EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE T run(const array<T, 0>&, T identity)
{
return identity;
}
};
template<typename Reducer, typename T, std::size_t N>
-EIGEN_DEVICE_FUNC constexpr inline auto array_reduce(const array<T, N>& arr, T identity) -> decltype(h_array_reduce<Reducer, T, N>::run(arr, identity))
+EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE auto array_reduce(const array<T, N>& arr, T identity) -> decltype(h_array_reduce<Reducer, T, N>::run(arr, identity))
{
return h_array_reduce<Reducer, T, N>::run(arr, identity);
}
@@ -392,13 +392,13 @@ EIGEN_DEVICE_FUNC constexpr inline auto array_reduce(const array<T, N>& arr, T i
/* standard array reductions */
template<typename T, std::size_t N>
-EIGEN_DEVICE_FUNC constexpr inline auto array_sum(const array<T, N>& arr) -> decltype(array_reduce<sum_op, T, N>(arr, static_cast<T>(0)))
+EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE auto array_sum(const array<T, N>& arr) -> decltype(array_reduce<sum_op, T, N>(arr, static_cast<T>(0)))
{
return array_reduce<sum_op, T, N>(arr, static_cast<T>(0));
}
template<typename T, std::size_t N>
-EIGEN_DEVICE_FUNC constexpr inline auto array_prod(const array<T, N>& arr) -> decltype(array_reduce<product_op, T, N>(arr, static_cast<T>(1)))
+EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE auto array_prod(const array<T, N>& arr) -> decltype(array_reduce<product_op, T, N>(arr, static_cast<T>(1)))
{
return array_reduce<product_op, T, N>(arr, static_cast<T>(1));
}
@@ -414,13 +414,13 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const std::vector<t>& a) {
/* zip an array */
template<typename Op, typename A, typename B, std::size_t N, int... n>
-constexpr inline array<decltype(Op::run(A(), B())),N> h_array_zip(array<A, N> a, array<B, N> b, numeric_list<int, n...>)
+constexpr EIGEN_STRONG_INLINE array<decltype(Op::run(A(), B())),N> h_array_zip(array<A, N> a, array<B, N> b, numeric_list<int, n...>)
{
return array<decltype(Op::run(A(), B())),N>{{ Op::run(array_get<n>(a), array_get<n>(b))... }};
}
template<typename Op, typename A, typename B, std::size_t N>
-constexpr inline array<decltype(Op::run(A(), B())),N> array_zip(array<A, N> a, array<B, N> b)
+constexpr EIGEN_STRONG_INLINE array<decltype(Op::run(A(), B())),N> array_zip(array<A, N> a, array<B, N> b)
{
return h_array_zip<Op>(a, b, typename gen_numeric_list<int, N>::type());
}
@@ -428,13 +428,13 @@ constexpr inline array<decltype(Op::run(A(), B())),N> array_zip(array<A, N> a, a
/* zip an array and reduce the result */
template<typename Reducer, typename Op, typename A, typename B, std::size_t N, int... n>
-constexpr inline auto h_array_zip_and_reduce(array<A, N> a, array<B, N> b, numeric_list<int, n...>) -> decltype(reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A(), B()))>::type...>::run(Op::run(array_get<n>(a), array_get<n>(b))...))
+constexpr EIGEN_STRONG_INLINE auto h_array_zip_and_reduce(array<A, N> a, array<B, N> b, numeric_list<int, n...>) -> decltype(reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A(), B()))>::type...>::run(Op::run(array_get<n>(a), array_get<n>(b))...))
{
return reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A(), B()))>::type...>::run(Op::run(array_get<n>(a), array_get<n>(b))...);
}
template<typename Reducer, typename Op, typename A, typename B, std::size_t N>
-constexpr inline auto array_zip_and_reduce(array<A, N> a, array<B, N> b) -> decltype(h_array_zip_and_reduce<Reducer, Op, A, B, N>(a, b, typename gen_numeric_list<int, N>::type()))
+constexpr EIGEN_STRONG_INLINE auto array_zip_and_reduce(array<A, N> a, array<B, N> b) -> decltype(h_array_zip_and_reduce<Reducer, Op, A, B, N>(a, b, typename gen_numeric_list<int, N>::type()))
{
return h_array_zip_and_reduce<Reducer, Op, A, B, N>(a, b, typename gen_numeric_list<int, N>::type());
}
@@ -442,13 +442,13 @@ constexpr inline auto array_zip_and_reduce(array<A, N> a, array<B, N> b) -> decl
/* apply stuff to an array */
template<typename Op, typename A, std::size_t N, int... n>
-constexpr inline array<decltype(Op::run(A())),N> h_array_apply(array<A, N> a, numeric_list<int, n...>)
+constexpr EIGEN_STRONG_INLINE array<decltype(Op::run(A())),N> h_array_apply(array<A, N> a, numeric_list<int, n...>)
{
return array<decltype(Op::run(A())),N>{{ Op::run(array_get<n>(a))... }};
}
template<typename Op, typename A, std::size_t N>
-constexpr inline array<decltype(Op::run(A())),N> array_apply(array<A, N> a)
+constexpr EIGEN_STRONG_INLINE array<decltype(Op::run(A())),N> array_apply(array<A, N> a)
{
return h_array_apply<Op>(a, typename gen_numeric_list<int, N>::type());
}
@@ -456,13 +456,13 @@ constexpr inline array<decltype(Op::run(A())),N> array_apply(array<A, N> a)
/* apply stuff to an array and reduce */
template<typename Reducer, typename Op, typename A, std::size_t N, int... n>
-constexpr inline auto h_array_apply_and_reduce(array<A, N> arr, numeric_list<int, n...>) -> decltype(reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A()))>::type...>::run(Op::run(array_get<n>(arr))...))
+constexpr EIGEN_STRONG_INLINE auto h_array_apply_and_reduce(array<A, N> arr, numeric_list<int, n...>) -> decltype(reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A()))>::type...>::run(Op::run(array_get<n>(arr))...))
{
return reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A()))>::type...>::run(Op::run(array_get<n>(arr))...);
}
template<typename Reducer, typename Op, typename A, std::size_t N>
-constexpr inline auto array_apply_and_reduce(array<A, N> a) -> decltype(h_array_apply_and_reduce<Reducer, Op, A, N>(a, typename gen_numeric_list<int, N>::type()))
+constexpr EIGEN_STRONG_INLINE auto array_apply_and_reduce(array<A, N> a) -> decltype(h_array_apply_and_reduce<Reducer, Op, A, N>(a, typename gen_numeric_list<int, N>::type()))
{
return h_array_apply_and_reduce<Reducer, Op, A, N>(a, typename gen_numeric_list<int, N>::type());
}
@@ -476,7 +476,7 @@ template<int n>
struct h_repeat
{
template<typename t, int... ii>
- constexpr static inline array<t, n> run(t v, numeric_list<int, ii...>)
+ constexpr static EIGEN_STRONG_INLINE array<t, n> run(t v, numeric_list<int, ii...>)
{
return {{ typename id_numeric<int, ii, t>::type(v)... }};
}