aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <ezhulenev@google.com>2018-08-27 14:34:07 -0700
committerGravatar Eugene Zhulenev <ezhulenev@google.com>2018-08-27 14:34:07 -0700
commitc144bb355b74f4600156284e8202fcf9c0c135d8 (patch)
tree3e35d145c624b544906a25a447e07104960cd77e /unsupported/Eigen/CXX11/src/Tensor
parent35d90e89600ff2524ec8bdd4ef4b95dd7c78b656 (diff)
parent57472886764ff71ad45338c6538649f7a8fa3d0e (diff)
Merge with upstream eigen/default
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBase.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h121
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h9
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h14
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h27
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h54
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h72
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h17
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h8
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h30
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h4
13 files changed, 136 insertions, 228 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
index 9ec1ec726..06bf422c5 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
@@ -189,7 +189,7 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlock(TensorBlock* block) {
if (TensorEvaluator<LeftArgType, Device>::RawAccess &&
- m_leftImpl.data() != nullptr) {
+ m_leftImpl.data() != NULL) {
TensorBlock left_block(block->first_coeff_index(), block->block_sizes(),
block->tensor_strides(), block->tensor_strides(),
m_leftImpl.data() + block->first_coeff_index());
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
index ab3731952..9b9d330c1 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@@ -200,9 +200,9 @@ class TensorBase<Derived, ReadOnlyAccessors>
}
EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_sigmoid_op<Scalar>, const Derived>
+ EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_logistic_op<Scalar>, const Derived>
sigmoid() const {
- return unaryExpr(internal::scalar_sigmoid_op<Scalar>());
+ return unaryExpr(internal::scalar_logistic_op<Scalar>());
}
EIGEN_DEVICE_FUNC
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
index f111964dd..6d90af2d3 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
@@ -62,7 +62,7 @@ struct cond<RowMajor> {
*/
enum TensorBlockShapeType {
kUniformAllDims,
- kSkewedInnerDims,
+ kSkewedInnerDims
};
struct TensorOpResourceRequirements {
@@ -73,7 +73,7 @@ struct TensorOpResourceRequirements {
// expression tree (like reductions) to communicate resources
// requirements based on local state (like the total number of reductions
// to be computed).
- TensorOpResourceRequirements(internal::TensorBlockShapeType shape,
+ TensorOpResourceRequirements(TensorBlockShapeType shape,
const Index size)
: block_shape(shape), block_total_size(size) {}
};
@@ -90,9 +90,9 @@ EIGEN_STRONG_INLINE void MergeResourceRequirements(
*block_shape = resources[0].block_shape;
*block_total_size = resources[0].block_total_size;
for (std::vector<TensorOpResourceRequirements>::size_type i = 1; i < resources.size(); ++i) {
- if (resources[i].block_shape == TensorBlockShapeType::kSkewedInnerDims &&
- *block_shape != TensorBlockShapeType::kSkewedInnerDims) {
- *block_shape = TensorBlockShapeType::kSkewedInnerDims;
+ if (resources[i].block_shape == kSkewedInnerDims &&
+ *block_shape != kSkewedInnerDims) {
+ *block_shape = kSkewedInnerDims;
}
*block_total_size =
numext::maxi(*block_total_size, resources[i].block_total_size);
@@ -152,11 +152,11 @@ struct TensorBlockCopyOp {
const Scalar* src_base = &src_data[src_index];
Scalar* dst_base = &dst_data[dst_index];
- typedef const Eigen::Array<Scalar, Dynamic, 1> Src;
- typedef Eigen::Array<Scalar, Dynamic, 1> Dst;
+ typedef const Array<Scalar, Dynamic, 1> Src;
+ typedef Array<Scalar, Dynamic, 1> Dst;
- typedef Eigen::Map<Src, 0, InnerStride<>> SrcMap;
- typedef Eigen::Map<Dst, 0, InnerStride<>> DstMap;
+ typedef Map<Src, 0, InnerStride<> > SrcMap;
+ typedef Map<Dst, 0, InnerStride<> > DstMap;
const SrcMap src(src_base, num_coeff_to_copy, InnerStride<>(src_stride));
DstMap dst(dst_base, num_coeff_to_copy, InnerStride<>(dst_stride));
@@ -178,10 +178,8 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout,
bool BlockRead>
class TensorBlockIO {
public:
- typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout>
- TensorBlock;
- typedef typename internal::TensorBlockCopyOp<Scalar, StorageIndex>
- TensorBlockCopyOp;
+ typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
+ typedef TensorBlockCopyOp<Scalar, StorageIndex> BlockCopyOp;
protected:
struct BlockIteratorState {
@@ -194,7 +192,7 @@ class TensorBlockIO {
};
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Copy(
- const TensorBlock& block, StorageIndex first_coeff_index,
+ const Block& block, StorageIndex first_coeff_index,
const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data,
Scalar* dst_data) {
@@ -214,11 +212,11 @@ class TensorBlockIO {
num_size_one_inner_dims, NumDims - num_size_one_inner_dims - 1);
const StorageIndex block_dim_for_tensor_stride1_dim =
NumDims == 0 ? 1 : tensor_to_block_dim_map[tensor_stride1_dim];
- Index block_inner_dim_size =
+ StorageIndex block_inner_dim_size =
NumDims == 0 ? 1
: block.block_sizes()[block_dim_for_tensor_stride1_dim];
- for (int i = num_size_one_inner_dims + 1; i < NumDims; ++i) {
- const int dim = cond<Layout>()(i, NumDims - i - 1);
+ for (Index i = num_size_one_inner_dims + 1; i < NumDims; ++i) {
+ const Index dim = cond<Layout>()(i, NumDims - i - 1);
const StorageIndex block_stride =
block.block_strides()[tensor_to_block_dim_map[dim]];
if (block_inner_dim_size == block_stride &&
@@ -260,8 +258,8 @@ class TensorBlockIO {
// Initialize block iterator state. Squeeze away any dimension of size 1.
int num_squeezed_dims = 0;
- for (int i = num_size_one_inner_dims; i < NumDims - 1; ++i) {
- const int dim = cond<Layout>()(i + 1, NumDims - i - 2);
+ for (Index i = num_size_one_inner_dims; i < NumDims - 1; ++i) {
+ const Index dim = cond<Layout>()(i + 1, NumDims - i - 2);
const StorageIndex size = block.block_sizes()[tensor_to_block_dim_map[dim]];
if (size == 1) {
continue;
@@ -290,8 +288,8 @@ class TensorBlockIO {
const StorageIndex block_total_size =
NumDims == 0 ? 1 : block.block_sizes().TotalSize();
for (StorageIndex i = 0; i < block_total_size; i += block_inner_dim_size) {
- TensorBlockCopyOp::Run(block_inner_dim_size, outputIndex, output_stride,
- dst_data, inputIndex, input_stride, src_data);
+ BlockCopyOp::Run(block_inner_dim_size, outputIndex, output_stride,
+ dst_data, inputIndex, input_stride, src_data);
// Update index.
for (int j = 0; j < num_squeezed_dims; ++j) {
if (++block_iter_state[j].count < block_iter_state[j].size) {
@@ -320,13 +318,11 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
class TensorBlockReader : public TensorBlockIO<Scalar, StorageIndex, NumDims,
Layout, /*BlockRead=*/true> {
public:
- typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout>
- TensorBlock;
- typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/true>
- Base;
+ typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
+ typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/true> Base;
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
- TensorBlock* block, const Scalar* src_data) {
+ Block* block, const Scalar* src_data) {
array<StorageIndex, NumDims> tensor_to_block_dim_map;
for (int i = 0; i < NumDims; ++i) {
tensor_to_block_dim_map[i] = i;
@@ -336,7 +332,7 @@ class TensorBlockReader : public TensorBlockIO<Scalar, StorageIndex, NumDims,
}
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
- TensorBlock* block, StorageIndex first_coeff_index,
+ Block* block, StorageIndex first_coeff_index,
const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data) {
Base::Copy(*block, first_coeff_index, tensor_to_block_dim_map,
@@ -357,13 +353,11 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims,
Layout, /*BlockRead=*/false> {
public:
- typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout>
- TensorBlock;
- typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/false>
- Base;
+ typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
+ typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/false> Base;
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
- const TensorBlock& block, Scalar* dst_data) {
+ const Block& block, Scalar* dst_data) {
array<StorageIndex, NumDims> tensor_to_block_dim_map;
for (int i = 0; i < NumDims; ++i) {
tensor_to_block_dim_map[i] = i;
@@ -373,7 +367,7 @@ class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims,
}
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
- const TensorBlock& block, StorageIndex first_coeff_index,
+ const Block& block, StorageIndex first_coeff_index,
const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
const array<StorageIndex, NumDims>& tensor_strides, Scalar* dst_data) {
Base::Copy(block, first_coeff_index, tensor_to_block_dim_map,
@@ -542,13 +536,13 @@ struct TensorBlockCwiseBinaryOp {
const StorageIndex left_stride, const LeftScalar* left_data,
const StorageIndex right_index, const StorageIndex right_stride,
const RightScalar* right_data) {
- typedef const Eigen::Array<LeftScalar, Dynamic, 1> Lhs;
- typedef const Eigen::Array<RightScalar, Dynamic, 1> Rhs;
- typedef Eigen::Array<OutputScalar, Dynamic, 1> Out;
+ typedef const Array<LeftScalar, Dynamic, 1> Lhs;
+ typedef const Array<RightScalar, Dynamic, 1> Rhs;
+ typedef Array<OutputScalar, Dynamic, 1> Out;
- typedef Eigen::Map<Lhs, 0, InnerStride<>> LhsMap;
- typedef Eigen::Map<Rhs, 0, InnerStride<>> RhsMap;
- typedef Eigen::Map<Out, 0, InnerStride<>> OutMap;
+ typedef Map<Lhs, 0, InnerStride<> > LhsMap;
+ typedef Map<Rhs, 0, InnerStride<> > RhsMap;
+ typedef Map<Out, 0, InnerStride<> > OutMap;
const LeftScalar* lhs_base = &left_data[left_index];
const RightScalar* rhs_base = &right_data[right_index];
@@ -558,8 +552,7 @@ struct TensorBlockCwiseBinaryOp {
const RhsMap rhs(rhs_base, num_coeff, InnerStride<>(right_stride));
OutMap out(out_base, num_coeff, InnerStride<>(output_stride));
- out =
- Eigen::CwiseBinaryOp<BinaryFunctor, LhsMap, RhsMap>(lhs, rhs, functor);
+ out = CwiseBinaryOp<BinaryFunctor, LhsMap, RhsMap>(lhs, rhs, functor);
}
};
@@ -575,8 +568,7 @@ struct TensorBlockCwiseBinaryOp {
template <typename BinaryFunctor, typename StorageIndex, typename OutputScalar,
int NumDims, int Layout>
struct TensorBlockCwiseBinaryIO {
- typedef typename internal::TensorBlock<OutputScalar, StorageIndex, NumDims,
- Layout>::Dimensions Dimensions;
+ typedef typename TensorBlock<OutputScalar, StorageIndex, NumDims, Layout>::Dimensions Dimensions;
struct BlockIteratorState {
StorageIndex output_stride, output_span;
@@ -642,7 +634,7 @@ struct TensorBlockCwiseBinaryIO {
if (size == 1) {
continue;
}
- auto& state = block_iter_state[num_squeezed_dims];
+ BlockIteratorState& state = block_iter_state[num_squeezed_dims];
state.output_stride = block_strides[dim];
state.left_stride = left_strides[dim];
state.right_stride = right_strides[dim];
@@ -664,7 +656,7 @@ struct TensorBlockCwiseBinaryIO {
right_stride, right_data);
// Update index.
for (int j = 0; j < num_squeezed_dims; ++j) {
- auto& state = block_iter_state[j];
+ BlockIteratorState& state = block_iter_state[j];
if (++state.count < state.size) {
output_index += state.output_stride;
left_index += state.left_stride;
@@ -768,15 +760,14 @@ struct TensorBlockView {
template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
class TensorBlockMapper {
public:
- typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout>
- TensorBlock;
+ typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
typedef DSizes<StorageIndex, NumDims> Dimensions;
TensorBlockMapper(const Dimensions& dims,
const TensorBlockShapeType block_shape,
Index min_target_size)
: m_dimensions(dims),
- m_block_dim_sizes(BlockDimensions(dims, block_shape, min_target_size)) {
+ m_block_dim_sizes(BlockDimensions(dims, block_shape, internal::convert_index<StorageIndex>(min_target_size))) {
// Calculate block counts by dimension and total block count.
DSizes<StorageIndex, NumDims> block_count;
for (Index i = 0; i < block_count.rank(); ++i) {
@@ -804,7 +795,7 @@ class TensorBlockMapper {
}
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block
GetBlockForIndex(StorageIndex block_index, Scalar* data) const {
StorageIndex first_coeff_index = 0;
DSizes<StorageIndex, NumDims> coords;
@@ -852,8 +843,7 @@ class TensorBlockMapper {
}
}
- return TensorBlock(first_coeff_index, sizes, strides, m_tensor_strides,
- data);
+ return Block(first_coeff_index, sizes, strides, m_tensor_strides, data);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const {
@@ -868,8 +858,8 @@ class TensorBlockMapper {
private:
static Dimensions BlockDimensions(const Dimensions& tensor_dims,
const TensorBlockShapeType block_shape,
- Index min_target_size) {
- min_target_size = numext::maxi<Index>(1, min_target_size);
+ StorageIndex min_target_size) {
+ min_target_size = numext::maxi<StorageIndex>(1, min_target_size);
// If tensor fully fits into the target size, we'll treat it a single block.
Dimensions block_dim_sizes = tensor_dims;
@@ -883,12 +873,12 @@ class TensorBlockMapper {
block_dim_sizes[i] = 1;
}
} else if (block_dim_sizes.TotalSize() > min_target_size) {
- if (block_shape == TensorBlockShapeType::kUniformAllDims) {
+ if (block_shape == kUniformAllDims) {
// Tensor will not fit within 'min_target_size' budget: calculate tensor
// block dimension sizes based on "square" dimension size target.
- const Index dim_size_target = static_cast<Index>(
- std::pow(static_cast<float>(min_target_size),
- 1.0 / static_cast<float>(block_dim_sizes.rank())));
+ const StorageIndex dim_size_target = internal::convert_index<StorageIndex>(
+ std::pow(static_cast<float>(min_target_size),
+ 1.0f / static_cast<float>(block_dim_sizes.rank())));
for (Index i = 0; i < block_dim_sizes.rank(); ++i) {
// TODO(andydavis) Adjust the inner most 'block_dim_size' to make it
// a multiple of the packet size. Note that reducing
@@ -913,7 +903,7 @@ class TensorBlockMapper {
total_size = total_size_other_dims * block_dim_sizes[dim];
}
}
- } else if (block_shape == TensorBlockShapeType::kSkewedInnerDims) {
+ } else if (block_shape == kSkewedInnerDims) {
StorageIndex coeff_to_allocate = min_target_size;
for (int i = 0; i < NumDims; ++i) {
const int dim = cond<Layout>()(i, NumDims - i - 1);
@@ -929,8 +919,9 @@ class TensorBlockMapper {
}
}
- eigen_assert(block_dim_sizes.TotalSize() >=
- numext::mini<Index>(min_target_size, tensor_dims.TotalSize()));
+ eigen_assert(
+ block_dim_sizes.TotalSize() >=
+ numext::mini<Index>(min_target_size, tensor_dims.TotalSize()));
return block_dim_sizes;
}
@@ -957,8 +948,7 @@ class TensorBlockMapper {
template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
class TensorSliceBlockMapper {
public:
- typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout>
- TensorBlock;
+ typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
typedef DSizes<StorageIndex, NumDims> Dimensions;
TensorSliceBlockMapper(const Dimensions& tensor_dims,
@@ -974,7 +964,7 @@ class TensorSliceBlockMapper {
m_total_block_count(1) {
// Calculate block counts by dimension and total block count.
DSizes<StorageIndex, NumDims> block_count;
- for (size_t i = 0; i < block_count.rank(); ++i) {
+ for (Index i = 0; i < block_count.rank(); ++i) {
block_count[i] = divup(m_tensor_slice_extents[i], m_block_dim_sizes[i]);
}
m_total_block_count = array_prod(block_count);
@@ -999,7 +989,7 @@ class TensorSliceBlockMapper {
}
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block
GetBlockForIndex(StorageIndex block_index, Scalar* data) const {
StorageIndex first_coeff_index = 0;
DSizes<StorageIndex, NumDims> coords;
@@ -1056,8 +1046,7 @@ class TensorSliceBlockMapper {
}
}
- return TensorBlock(first_coeff_index, sizes, strides, m_tensor_strides,
- data);
+ return Block(first_coeff_index, sizes, strides, m_tensor_strides, data);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
index 5e812b04d..02d061a9c 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
@@ -105,7 +105,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
- bool isCopy= false, nByOne = false, oneByN = false;
+ bool isCopy, nByOne, oneByN;
enum {
IsAligned = true,
@@ -134,9 +134,10 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
const Device& device)
- : m_device(device),
- m_broadcast(op.broadcast()),
- m_impl(op.expression(), device) {
+ : isCopy(false), nByOne(false), oneByN(false),
+ m_device(device), m_broadcast(op.broadcast()), m_impl(op.expression(), device)
+ {
+
// The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar
// and store the result in a scalar. Instead one should reshape the scalar into a a N-D
// tensor with N >= 1 of 1 element first and then broadcast.
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
index c459fc649..f0f61fade 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
@@ -152,13 +152,7 @@ struct TensorContractionParams {
// 1. Elementwise Relu transformation following Conv2D.
// 2. AddBias to the Conv2D output channels dimension.
//
-// See expected implementation in NoOpOutputKernel.
-struct OutputKernel {
- template <typename Index, typename Scalar>
- using OutputMapper = internal::blas_data_mapper<Scalar, Index, ColMajor>;
-};
-
-// Output kernel that does absolutely nothing.
+// The NoOpOutputKernel implements an output kernel that does absolutely nothing.
struct NoOpOutputKernel {
/**
* Tensor contraction evaluator calls this kernel after finishing each block
@@ -177,7 +171,7 @@ struct NoOpOutputKernel {
*/
template <typename Index, typename Scalar>
EIGEN_ALWAYS_INLINE void operator()(
- const OutputKernel::OutputMapper<Index, Scalar>& /*output_mapper*/,
+ const internal::blas_data_mapper<Scalar, Index, ColMajor>& /*output_mapper*/,
const TensorContractionParams& /*params*/, Index /*i*/,
Index /*j*/, Index /*num_rows*/, Index /*num_cols*/) const {}
};
@@ -354,7 +348,7 @@ struct TensorContractionEvaluatorBase
// dimensions and right non-contracting dimensions.
m_lhs_inner_dim_contiguous = true;
int dim_idx = 0;
- unsigned int nocontract_idx = 0;
+ Index nocontract_idx = 0;
for (int i = 0; i < LDims; i++) {
// find if we are contracting on index i of left tensor
@@ -667,7 +661,7 @@ struct TensorContractionEvaluatorBase
// call gebp (matrix kernel)
// The parameters here are copied from Eigen's GEMM implementation
- const auto output_mapper = output.getSubMapper(i2, j2);
+ const OutputMapper output_mapper = output.getSubMapper(i2, j2);
gebp(output_mapper, blockA, blockB, actual_mc, actual_kc, actual_nc,
Scalar(1), -1, -1, 0, 0);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
index d71b2e34b..6ee3827f3 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
@@ -88,6 +88,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+ typedef typename PointerType<CoeffReturnType, Device>::Type PointerT;
enum {
IsAligned = false,
@@ -107,12 +108,12 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerT data) {
if (data) {
evalTo(data);
return false;
} else {
- m_result = static_cast<CoeffReturnType*>(
+ m_result = static_cast<PointerT>(
m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar)));
evalTo(m_result);
return true;
@@ -140,23 +141,22 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
}
- EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return m_result; }
+ EIGEN_DEVICE_FUNC PointerT data() const { return m_result; }
#ifdef EIGEN_USE_SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; }
#endif
protected:
- EIGEN_DEVICE_FUNC void evalTo(Scalar* data) {
- TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result(
- data, m_dimensions);
+ EIGEN_DEVICE_FUNC void evalTo(PointerT data) {
+ TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result(data, m_dimensions);
m_op.func().eval(m_op.expression(), result, m_device);
}
Dimensions m_dimensions;
const ArgType m_op;
const Device& m_device;
- CoeffReturnType* m_result;
+ PointerT m_result;
};
@@ -251,6 +251,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+ typedef typename PointerType<CoeffReturnType, Device>::Type PointerT;
enum {
IsAligned = false,
@@ -270,12 +271,12 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerT data) {
if (data) {
evalTo(data);
return false;
} else {
- m_result = static_cast<Scalar *>(m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar)));
+ m_result = static_cast<PointerT>(m_device.allocate_temp(dimensions().TotalSize() * sizeof(CoeffReturnType)));
evalTo(m_result);
return true;
}
@@ -302,22 +303,22 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
}
- EIGEN_DEVICE_FUNC typename internal::traits<XprType>::PointerType data() const { return m_result; }
+ EIGEN_DEVICE_FUNC PointerT data() const { return m_result; }
#ifdef EIGEN_USE_SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; }
#endif
protected:
- EIGEN_DEVICE_FUNC void evalTo(Scalar* data) {
- TensorMap<Tensor<Scalar, NumDims, Layout> > result(data, m_dimensions);
+ EIGEN_DEVICE_FUNC void evalTo(PointerT data) {
+ TensorMap<Tensor<CoeffReturnType, NumDims, Layout> > result(data, m_dimensions);
m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device);
}
Dimensions m_dimensions;
const XprType m_op;
const Device& m_device;
- CoeffReturnType* m_result;
+ PointerT m_result;
};
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
index cc134228a..6fc6688d3 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@@ -12,56 +12,6 @@
namespace Eigen {
-// Barrier is an object that allows one or more threads to wait until
-// Notify has been called a specified number of times.
-class Barrier {
- public:
- Barrier(unsigned int count) : state_(count << 1), notified_(false) {
- eigen_assert(((count << 1) >> 1) == count);
- }
- ~Barrier() {
- eigen_assert((state_>>1) == 0);
- }
-
- void Notify() {
- unsigned int v = state_.fetch_sub(2, std::memory_order_acq_rel) - 2;
- if (v != 1) {
- eigen_assert(((v + 2) & ~1) != 0);
- return; // either count has not dropped to 0, or waiter is not waiting
- }
- std::unique_lock<std::mutex> l(mu_);
- eigen_assert(!notified_);
- notified_ = true;
- cv_.notify_all();
- }
-
- void Wait() {
- unsigned int v = state_.fetch_or(1, std::memory_order_acq_rel);
- if ((v >> 1) == 0) return;
- std::unique_lock<std::mutex> l(mu_);
- while (!notified_) {
- cv_.wait(l);
- }
- }
-
- private:
- std::mutex mu_;
- std::condition_variable cv_;
- std::atomic<unsigned int> state_; // low bit is waiter flag
- bool notified_;
-};
-
-
-// Notification is an object that allows a user to to wait for another
-// thread to signal a notification that an event has occurred.
-//
-// Multiple threads can wait on the same Notification object,
-// but only one caller must call Notify() on the object.
-struct Notification : Barrier {
- Notification() : Barrier(1) {};
-};
-
-
// Runs an arbitrary function and then calls Notify() on the passed in
// Notification.
template <typename Function, typename... Args> struct FunctionWrapperWithNotification
@@ -102,7 +52,7 @@ class Allocator {
// Build a thread pool device on top the an existing pool of threads.
struct ThreadPoolDevice {
// The ownership of the thread pool remains with the caller.
- ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores, Allocator* allocator = nullptr)
+ ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores, Allocator* allocator = NULL)
: pool_(pool), num_threads_(num_cores), allocator_(allocator) { }
EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
@@ -282,7 +232,7 @@ struct ThreadPoolDevice {
// Convenience wrapper for parallelFor that does not align blocks.
void parallelFor(Index n, const TensorOpCost& cost,
std::function<void(Index, Index)> f) const {
- parallelFor(n, cost, nullptr, std::move(f));
+ parallelFor(n, cost, NULL, std::move(f));
}
// Thread pool accessor.
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
index 4f973a5b7..ce91bc2a6 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
@@ -32,12 +32,12 @@ namespace Eigen {
// Boilerplate code
namespace internal {
-template<std::size_t n, typename Dimension> struct dget {
+template<std::ptrdiff_t n, typename Dimension> struct dget {
static const std::ptrdiff_t value = get<n, Dimension>::value;
};
-template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor>
+template<typename Index, std::ptrdiff_t NumIndices, std::ptrdiff_t n, bool RowMajor>
struct fixed_size_tensor_index_linearization_helper
{
template <typename Dimensions> EIGEN_DEVICE_FUNC
@@ -50,7 +50,7 @@ struct fixed_size_tensor_index_linearization_helper
}
};
-template<typename Index, std::size_t NumIndices, bool RowMajor>
+template<typename Index, std::ptrdiff_t NumIndices, bool RowMajor>
struct fixed_size_tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor>
{
template <typename Dimensions> EIGEN_DEVICE_FUNC
@@ -60,7 +60,7 @@ struct fixed_size_tensor_index_linearization_helper<Index, NumIndices, 0, RowMaj
}
};
-template<typename Index, std::size_t n>
+template<typename Index, std::ptrdiff_t n>
struct fixed_size_tensor_index_extraction_helper
{
template <typename Dimensions> EIGEN_DEVICE_FUNC
@@ -94,7 +94,7 @@ struct Sizes {
typedef internal::numeric_list<std::ptrdiff_t, Indices...> Base;
const Base t = Base();
static const std::ptrdiff_t total_size = internal::arg_prod(Indices...);
- static const size_t count = Base::count;
+ static const ptrdiff_t count = Base::count;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t rank() const {
return Base::count;
@@ -121,16 +121,16 @@ struct Sizes {
return *this;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t operator[] (const std::size_t index) const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t operator[] (const std::ptrdiff_t index) const {
return internal::fixed_size_tensor_index_extraction_helper<std::ptrdiff_t, Base::count>::run(index, t);
}
template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- size_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const {
+ ptrdiff_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const {
return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, false>::run(indices, t);
}
template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- size_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const {
+ ptrdiff_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const {
return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, true>::run(indices, t);
}
};
@@ -144,25 +144,25 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes<Indi
#else
-template <std::size_t n>
+template <std::ptrdiff_t n>
struct non_zero_size {
- typedef internal::type2val<std::size_t, n> type;
+ typedef internal::type2val<std::ptrdiff_t, n> type;
};
template <>
struct non_zero_size<0> {
typedef internal::null_type type;
};
-template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0, std::size_t V5=0> struct Sizes {
+template <std::ptrdiff_t V1=0, std::ptrdiff_t V2=0, std::ptrdiff_t V3=0, std::ptrdiff_t V4=0, std::ptrdiff_t V5=0> struct Sizes {
typedef typename internal::make_type_list<typename non_zero_size<V1>::type, typename non_zero_size<V2>::type, typename non_zero_size<V3>::type, typename non_zero_size<V4>::type, typename non_zero_size<V5>::type >::type Base;
- static const size_t count = Base::count;
- static const std::size_t total_size = internal::arg_prod<Base>::value;
+ static const std::ptrdiff_t count = Base::count;
+ static const std::ptrdiff_t total_size = internal::arg_prod<Base>::value;
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptrdiff_t rank() const {
return count;
}
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t TotalSize() {
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptrdiff_t TotalSize() {
return internal::arg_prod<Base>::value;
}
@@ -178,7 +178,7 @@ template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0
#if EIGEN_HAS_VARIADIC_TEMPLATES
template <typename... DenseIndex> Sizes(DenseIndex... /*indices*/) { }
- explicit Sizes(std::initializer_list<std::size_t>) {
+ explicit Sizes(std::initializer_list<std::ptrdiff_t>) {
// todo: add assertion
}
#else
@@ -213,18 +213,18 @@ template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0
}
template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- size_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const {
+ ptrdiff_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const {
return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, false>::run(indices, *reinterpret_cast<const Base*>(this));
}
template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- size_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const {
+ ptrdiff_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const {
return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, true>::run(indices, *reinterpret_cast<const Base*>(this));
}
};
namespace internal {
-template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes<V1, V2, V3, V4, V5>&) {
+template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes<V1, V2, V3, V4, V5>&) {
return Sizes<V1, V2, V3, V4, V5>::total_size;
}
}
@@ -233,7 +233,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes<V1, V2,
// Boilerplate
namespace internal {
-template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor>
+template<typename Index, std::ptrdiff_t NumIndices, std::ptrdiff_t n, bool RowMajor>
struct tensor_index_linearization_helper
{
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -245,7 +245,7 @@ struct tensor_index_linearization_helper
}
};
-template<typename Index, std::size_t NumIndices, bool RowMajor>
+template<typename Index, std::ptrdiff_t NumIndices, bool RowMajor>
struct tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor>
{
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -264,7 +264,7 @@ struct DSizes : array<DenseIndex, NumDims> {
typedef array<DenseIndex, NumDims> Base;
static const int count = NumDims;
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const {
return NumDims;
}
@@ -298,7 +298,7 @@ struct DSizes : array<DenseIndex, NumDims> {
}
}
#else
- template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5>
+ template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5>
EIGEN_DEVICE_FUNC DSizes(const Sizes<V1, V2, V3, V4, V5>& a) {
for (int i = 0 ; i < NumDims; ++i) {
(*this)[i] = a[i];
@@ -359,7 +359,7 @@ struct DSizes : array<DenseIndex, NumDims> {
// Boilerplate
namespace internal {
-template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor>
+template<typename Index, std::ptrdiff_t NumIndices, std::ptrdiff_t n, bool RowMajor>
struct tensor_vsize_index_linearization_helper
{
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -371,7 +371,7 @@ struct tensor_vsize_index_linearization_helper
}
};
-template<typename Index, std::size_t NumIndices, bool RowMajor>
+template<typename Index, std::ptrdiff_t NumIndices, bool RowMajor>
struct tensor_vsize_index_linearization_helper<Index, NumIndices, 0, RowMajor>
{
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -386,10 +386,10 @@ struct tensor_vsize_index_linearization_helper<Index, NumIndices, 0, RowMajor>
namespace internal {
template <typename DenseIndex, int NumDims> struct array_size<const DSizes<DenseIndex, NumDims> > {
- static const size_t value = NumDims;
+ static const ptrdiff_t value = NumDims;
};
template <typename DenseIndex, int NumDims> struct array_size<DSizes<DenseIndex, NumDims> > {
- static const size_t value = NumDims;
+ static const ptrdiff_t value = NumDims;
};
#ifndef EIGEN_EMULATE_CXX11_META_H
template <typename std::ptrdiff_t... Indices> struct array_size<const Sizes<Indices...> > {
@@ -399,33 +399,33 @@ template <typename std::ptrdiff_t... Indices> struct array_size<Sizes<Indices...
static const std::ptrdiff_t value = Sizes<Indices...>::count;
};
template <std::ptrdiff_t n, typename std::ptrdiff_t... Indices> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<Indices...>&) {
- return get<n, internal::numeric_list<std::size_t, Indices...> >::value;
+ return get<n, internal::numeric_list<std::ptrdiff_t, Indices...> >::value;
}
template <std::ptrdiff_t n> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<>&) {
eigen_assert(false && "should never be called");
return -1;
}
#else
-template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> struct array_size<const Sizes<V1,V2,V3,V4,V5> > {
- static const size_t value = Sizes<V1,V2,V3,V4,V5>::count;
+template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5> struct array_size<const Sizes<V1,V2,V3,V4,V5> > {
+ static const ptrdiff_t value = Sizes<V1,V2,V3,V4,V5>::count;
};
-template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> struct array_size<Sizes<V1,V2,V3,V4,V5> > {
- static const size_t value = Sizes<V1,V2,V3,V4,V5>::count;
+template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5> struct array_size<Sizes<V1,V2,V3,V4,V5> > {
+ static const ptrdiff_t value = Sizes<V1,V2,V3,V4,V5>::count;
};
-template <std::size_t n, std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_get(const Sizes<V1,V2,V3,V4,V5>&) {
+template <std::ptrdiff_t n, std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<V1,V2,V3,V4,V5>&) {
return get<n, typename Sizes<V1,V2,V3,V4,V5>::Base>::value;
}
#endif
-template <typename Dims1, typename Dims2, size_t n, size_t m>
+template <typename Dims1, typename Dims2, ptrdiff_t n, ptrdiff_t m>
struct sizes_match_below_dim {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1&, Dims2&) {
return false;
}
};
-template <typename Dims1, typename Dims2, size_t n>
+template <typename Dims1, typename Dims2, ptrdiff_t n>
struct sizes_match_below_dim<Dims1, Dims2, n, n> {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1& dims1, Dims2& dims2) {
return (array_get<n-1>(dims1) == array_get<n-1>(dims2)) &
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index d9b61dc70..ba5ab1396 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -133,7 +133,7 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
if (needs_assign) {
// Size tensor blocks to fit in cache (or requested target block size).
Index block_total_size = numext::mini(cache_size, total_size);
- TensorBlockShapeType block_shape = TensorBlockShapeType::kSkewedInnerDims;
+ TensorBlockShapeType block_shape = kSkewedInnerDims;
// Query expression tree for desired block size/shape.
std::vector<TensorOpResourceRequirements> resources;
evaluator.getResourceRequirements(&resources);
@@ -229,12 +229,8 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, Tileable> {
typedef EvalRange<Evaluator, StorageIndex, Vectorizable> EvalRange;
Evaluator evaluator(expr, device);
- const bool needs_assign = evaluator.evalSubExprsIfNeeded(nullptr);
+ const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
if (needs_assign) {
- const StorageIndex PacketSize =
- Vectorizable
- ? unpacket_traits<typename Evaluator::PacketReturnType>::size
- : 1;
const StorageIndex size = array_prod(evaluator.dimensions());
device.parallelFor(size, evaluator.costPerCoeff(Vectorizable),
EvalRange::alignBlockSize,
@@ -259,12 +255,11 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr
static EIGEN_STRONG_INLINE void run(const Expression& expr,
const ThreadPoolDevice& device) {
- typedef TensorBlock<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlock;
typedef TensorBlockMapper<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlockMapper;
Evaluator evaluator(expr, device);
- StorageIndex total_size = array_prod(evaluator.dimensions());
- StorageIndex cache_size = device.firstLevelCacheSize() / sizeof(Scalar);
+ Index total_size = array_prod(evaluator.dimensions());
+ Index cache_size = device.firstLevelCacheSize() / sizeof(Scalar);
if (total_size < cache_size) {
// TODO(andydavis) Reduce block management overhead for small tensors.
internal::TensorExecutor<Expression, ThreadPoolDevice, Vectorizable,
@@ -273,9 +268,9 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr
return;
}
- const bool needs_assign = evaluator.evalSubExprsIfNeeded(nullptr);
+ const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
if (needs_assign) {
- TensorBlockShapeType block_shape = TensorBlockShapeType::kSkewedInnerDims;
+ TensorBlockShapeType block_shape = kSkewedInnerDims;
Index block_total_size = 0;
// Query expression tree for desired block size/shape.
std::vector<internal::TensorOpResourceRequirements> resources;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
index b7a0193fe..04a8b953d 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
@@ -24,6 +24,14 @@ template<typename T> struct MakePointer {
typedef T ScalarType;
};
+// The PointerType class is a container of the device specefic pointer
+// used for refering to a Pointer on TensorEvaluator class. While the TensorExpression
+// is a device-agnostic type and need MakePointer class for type conversion,
+// the TensorEvaluator calss can be specialized for a device, hence it is possible
+// to construct different types of temproray storage memory in TensorEvaluator
+// for different devices by specializing the following PointerType class.
+template<typename T, typename Device> struct PointerType : MakePointer<T>{};
+
namespace internal{
template<typename A, typename B> struct Pointer_type_promotion {
static const bool val=false;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
index 7ecd4d1ac..cd666c173 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
@@ -54,36 +54,6 @@ struct functor_traits<scalar_fmod_op<Scalar> > {
PacketAccess = false };
};
-
-/** \internal
- * \brief Template functor to compute the sigmoid of a scalar
- * \sa class CwiseUnaryOp, ArrayBase::sigmoid()
- */
-template <typename T>
-struct scalar_sigmoid_op {
- EIGEN_EMPTY_STRUCT_CTOR(scalar_sigmoid_op)
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const {
- const T one = T(1);
- return one / (one + numext::exp(-x));
- }
-
- template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- Packet packetOp(const Packet& x) const {
- const Packet one = pset1<Packet>(T(1));
- return pdiv(one, padd(one, pexp(pnegate(x))));
- }
-};
-
-template <typename T>
-struct functor_traits<scalar_sigmoid_op<T> > {
- enum {
- Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost * 6,
- PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasDiv &&
- packet_traits<T>::HasNegate && packet_traits<T>::HasExp
- };
-};
-
-
template<typename Reducer, typename Device>
struct reducer_traits {
enum {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h
index 98ad661ca..3f7d26b18 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h
@@ -84,7 +84,7 @@ template<DenseIndex n> struct NumTraits<type2index<n> >
namespace internal {
template <typename T>
EIGEN_DEVICE_FUNC void update_value(T& val, DenseIndex new_val) {
- val = new_val;
+ val = internal::convert_index<T>(new_val);
}
template <DenseIndex n>
EIGEN_DEVICE_FUNC void update_value(type2index<n>& val, DenseIndex new_val) {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
index a32743677..2f765acb7 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
@@ -527,7 +527,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices())
{
- for (std::size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
+ for (Index i = 0; i < internal::array_size<Dimensions>::value; ++i) {
eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]);
}
@@ -985,7 +985,7 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
// Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero
DSizes<Index,NumDims> startIndicesClamped, stopIndicesClamped;
m_is_identity = true;
- for (size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
+ for (Index i = 0; i < internal::array_size<Dimensions>::value; ++i) {
if (m_strides[i] != 1 || op.startIndices()[i] != 0 ||
op.stopIndices()[i] != (m_impl.dimensions()[i] - 1)) {
m_is_identity = false;