diff options
author | Eugene Zhulenev <ezhulenev@google.com> | 2018-08-27 14:34:07 -0700 |
---|---|---|
committer | Eugene Zhulenev <ezhulenev@google.com> | 2018-08-27 14:34:07 -0700 |
commit | c144bb355b74f4600156284e8202fcf9c0c135d8 (patch) | |
tree | 3e35d145c624b544906a25a447e07104960cd77e /unsupported/Eigen/CXX11/src/Tensor | |
parent | 35d90e89600ff2524ec8bdd4ef4b95dd7c78b656 (diff) | |
parent | 57472886764ff71ad45338c6538649f7a8fa3d0e (diff) |
Merge with upstream eigen/default
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor')
13 files changed, 136 insertions, 228 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h index 9ec1ec726..06bf422c5 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h @@ -189,7 +189,7 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlock(TensorBlock* block) { if (TensorEvaluator<LeftArgType, Device>::RawAccess && - m_leftImpl.data() != nullptr) { + m_leftImpl.data() != NULL) { TensorBlock left_block(block->first_coeff_index(), block->block_sizes(), block->tensor_strides(), block->tensor_strides(), m_leftImpl.data() + block->first_coeff_index()); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index ab3731952..9b9d330c1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -200,9 +200,9 @@ class TensorBase<Derived, ReadOnlyAccessors> } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_sigmoid_op<Scalar>, const Derived> + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_logistic_op<Scalar>, const Derived> sigmoid() const { - return unaryExpr(internal::scalar_sigmoid_op<Scalar>()); + return unaryExpr(internal::scalar_logistic_op<Scalar>()); } EIGEN_DEVICE_FUNC diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h index f111964dd..6d90af2d3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h @@ -62,7 +62,7 @@ struct cond<RowMajor> { */ enum TensorBlockShapeType { kUniformAllDims, - kSkewedInnerDims, + kSkewedInnerDims }; struct TensorOpResourceRequirements { @@ -73,7 +73,7 @@ struct TensorOpResourceRequirements { // expression tree (like reductions) to communicate resources // requirements based on local state (like the total number of reductions // to be computed). - TensorOpResourceRequirements(internal::TensorBlockShapeType shape, + TensorOpResourceRequirements(TensorBlockShapeType shape, const Index size) : block_shape(shape), block_total_size(size) {} }; @@ -90,9 +90,9 @@ EIGEN_STRONG_INLINE void MergeResourceRequirements( *block_shape = resources[0].block_shape; *block_total_size = resources[0].block_total_size; for (std::vector<TensorOpResourceRequirements>::size_type i = 1; i < resources.size(); ++i) { - if (resources[i].block_shape == TensorBlockShapeType::kSkewedInnerDims && - *block_shape != TensorBlockShapeType::kSkewedInnerDims) { - *block_shape = TensorBlockShapeType::kSkewedInnerDims; + if (resources[i].block_shape == kSkewedInnerDims && + *block_shape != kSkewedInnerDims) { + *block_shape = kSkewedInnerDims; } *block_total_size = numext::maxi(*block_total_size, resources[i].block_total_size); @@ -152,11 +152,11 @@ struct TensorBlockCopyOp { const Scalar* src_base = &src_data[src_index]; Scalar* dst_base = &dst_data[dst_index]; - typedef const Eigen::Array<Scalar, Dynamic, 1> Src; - typedef Eigen::Array<Scalar, Dynamic, 1> Dst; + typedef const Array<Scalar, Dynamic, 1> Src; + typedef Array<Scalar, Dynamic, 1> Dst; - typedef Eigen::Map<Src, 0, InnerStride<>> SrcMap; - typedef Eigen::Map<Dst, 0, InnerStride<>> DstMap; + typedef Map<Src, 0, InnerStride<> > SrcMap; + typedef Map<Dst, 0, InnerStride<> > DstMap; const SrcMap src(src_base, num_coeff_to_copy, InnerStride<>(src_stride)); DstMap dst(dst_base, num_coeff_to_copy, InnerStride<>(dst_stride)); @@ -178,10 +178,8 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout, bool BlockRead> class TensorBlockIO { public: - typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> - TensorBlock; - typedef typename internal::TensorBlockCopyOp<Scalar, StorageIndex> - TensorBlockCopyOp; + typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block; + typedef TensorBlockCopyOp<Scalar, StorageIndex> BlockCopyOp; protected: struct BlockIteratorState { @@ -194,7 +192,7 @@ class TensorBlockIO { }; static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Copy( - const TensorBlock& block, StorageIndex first_coeff_index, + const Block& block, StorageIndex first_coeff_index, const array<StorageIndex, NumDims>& tensor_to_block_dim_map, const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data, Scalar* dst_data) { @@ -214,11 +212,11 @@ class TensorBlockIO { num_size_one_inner_dims, NumDims - num_size_one_inner_dims - 1); const StorageIndex block_dim_for_tensor_stride1_dim = NumDims == 0 ? 1 : tensor_to_block_dim_map[tensor_stride1_dim]; - Index block_inner_dim_size = + StorageIndex block_inner_dim_size = NumDims == 0 ? 1 : block.block_sizes()[block_dim_for_tensor_stride1_dim]; - for (int i = num_size_one_inner_dims + 1; i < NumDims; ++i) { - const int dim = cond<Layout>()(i, NumDims - i - 1); + for (Index i = num_size_one_inner_dims + 1; i < NumDims; ++i) { + const Index dim = cond<Layout>()(i, NumDims - i - 1); const StorageIndex block_stride = block.block_strides()[tensor_to_block_dim_map[dim]]; if (block_inner_dim_size == block_stride && @@ -260,8 +258,8 @@ class TensorBlockIO { // Initialize block iterator state. Squeeze away any dimension of size 1. int num_squeezed_dims = 0; - for (int i = num_size_one_inner_dims; i < NumDims - 1; ++i) { - const int dim = cond<Layout>()(i + 1, NumDims - i - 2); + for (Index i = num_size_one_inner_dims; i < NumDims - 1; ++i) { + const Index dim = cond<Layout>()(i + 1, NumDims - i - 2); const StorageIndex size = block.block_sizes()[tensor_to_block_dim_map[dim]]; if (size == 1) { continue; @@ -290,8 +288,8 @@ class TensorBlockIO { const StorageIndex block_total_size = NumDims == 0 ? 1 : block.block_sizes().TotalSize(); for (StorageIndex i = 0; i < block_total_size; i += block_inner_dim_size) { - TensorBlockCopyOp::Run(block_inner_dim_size, outputIndex, output_stride, - dst_data, inputIndex, input_stride, src_data); + BlockCopyOp::Run(block_inner_dim_size, outputIndex, output_stride, + dst_data, inputIndex, input_stride, src_data); // Update index. for (int j = 0; j < num_squeezed_dims; ++j) { if (++block_iter_state[j].count < block_iter_state[j].size) { @@ -320,13 +318,11 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout> class TensorBlockReader : public TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/true> { public: - typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> - TensorBlock; - typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/true> - Base; + typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block; + typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/true> Base; static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( - TensorBlock* block, const Scalar* src_data) { + Block* block, const Scalar* src_data) { array<StorageIndex, NumDims> tensor_to_block_dim_map; for (int i = 0; i < NumDims; ++i) { tensor_to_block_dim_map[i] = i; @@ -336,7 +332,7 @@ class TensorBlockReader : public TensorBlockIO<Scalar, StorageIndex, NumDims, } static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( - TensorBlock* block, StorageIndex first_coeff_index, + Block* block, StorageIndex first_coeff_index, const array<StorageIndex, NumDims>& tensor_to_block_dim_map, const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data) { Base::Copy(*block, first_coeff_index, tensor_to_block_dim_map, @@ -357,13 +353,11 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout> class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/false> { public: - typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> - TensorBlock; - typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/false> - Base; + typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block; + typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/false> Base; static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( - const TensorBlock& block, Scalar* dst_data) { + const Block& block, Scalar* dst_data) { array<StorageIndex, NumDims> tensor_to_block_dim_map; for (int i = 0; i < NumDims; ++i) { tensor_to_block_dim_map[i] = i; @@ -373,7 +367,7 @@ class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims, } static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( - const TensorBlock& block, StorageIndex first_coeff_index, + const Block& block, StorageIndex first_coeff_index, const array<StorageIndex, NumDims>& tensor_to_block_dim_map, const array<StorageIndex, NumDims>& tensor_strides, Scalar* dst_data) { Base::Copy(block, first_coeff_index, tensor_to_block_dim_map, @@ -542,13 +536,13 @@ struct TensorBlockCwiseBinaryOp { const StorageIndex left_stride, const LeftScalar* left_data, const StorageIndex right_index, const StorageIndex right_stride, const RightScalar* right_data) { - typedef const Eigen::Array<LeftScalar, Dynamic, 1> Lhs; - typedef const Eigen::Array<RightScalar, Dynamic, 1> Rhs; - typedef Eigen::Array<OutputScalar, Dynamic, 1> Out; + typedef const Array<LeftScalar, Dynamic, 1> Lhs; + typedef const Array<RightScalar, Dynamic, 1> Rhs; + typedef Array<OutputScalar, Dynamic, 1> Out; - typedef Eigen::Map<Lhs, 0, InnerStride<>> LhsMap; - typedef Eigen::Map<Rhs, 0, InnerStride<>> RhsMap; - typedef Eigen::Map<Out, 0, InnerStride<>> OutMap; + typedef Map<Lhs, 0, InnerStride<> > LhsMap; + typedef Map<Rhs, 0, InnerStride<> > RhsMap; + typedef Map<Out, 0, InnerStride<> > OutMap; const LeftScalar* lhs_base = &left_data[left_index]; const RightScalar* rhs_base = &right_data[right_index]; @@ -558,8 +552,7 @@ struct TensorBlockCwiseBinaryOp { const RhsMap rhs(rhs_base, num_coeff, InnerStride<>(right_stride)); OutMap out(out_base, num_coeff, InnerStride<>(output_stride)); - out = - Eigen::CwiseBinaryOp<BinaryFunctor, LhsMap, RhsMap>(lhs, rhs, functor); + out = CwiseBinaryOp<BinaryFunctor, LhsMap, RhsMap>(lhs, rhs, functor); } }; @@ -575,8 +568,7 @@ struct TensorBlockCwiseBinaryOp { template <typename BinaryFunctor, typename StorageIndex, typename OutputScalar, int NumDims, int Layout> struct TensorBlockCwiseBinaryIO { - typedef typename internal::TensorBlock<OutputScalar, StorageIndex, NumDims, - Layout>::Dimensions Dimensions; + typedef typename TensorBlock<OutputScalar, StorageIndex, NumDims, Layout>::Dimensions Dimensions; struct BlockIteratorState { StorageIndex output_stride, output_span; @@ -642,7 +634,7 @@ struct TensorBlockCwiseBinaryIO { if (size == 1) { continue; } - auto& state = block_iter_state[num_squeezed_dims]; + BlockIteratorState& state = block_iter_state[num_squeezed_dims]; state.output_stride = block_strides[dim]; state.left_stride = left_strides[dim]; state.right_stride = right_strides[dim]; @@ -664,7 +656,7 @@ struct TensorBlockCwiseBinaryIO { right_stride, right_data); // Update index. for (int j = 0; j < num_squeezed_dims; ++j) { - auto& state = block_iter_state[j]; + BlockIteratorState& state = block_iter_state[j]; if (++state.count < state.size) { output_index += state.output_stride; left_index += state.left_stride; @@ -768,15 +760,14 @@ struct TensorBlockView { template <typename Scalar, typename StorageIndex, int NumDims, int Layout> class TensorBlockMapper { public: - typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> - TensorBlock; + typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block; typedef DSizes<StorageIndex, NumDims> Dimensions; TensorBlockMapper(const Dimensions& dims, const TensorBlockShapeType block_shape, Index min_target_size) : m_dimensions(dims), - m_block_dim_sizes(BlockDimensions(dims, block_shape, min_target_size)) { + m_block_dim_sizes(BlockDimensions(dims, block_shape, internal::convert_index<StorageIndex>(min_target_size))) { // Calculate block counts by dimension and total block count. DSizes<StorageIndex, NumDims> block_count; for (Index i = 0; i < block_count.rank(); ++i) { @@ -804,7 +795,7 @@ class TensorBlockMapper { } } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block GetBlockForIndex(StorageIndex block_index, Scalar* data) const { StorageIndex first_coeff_index = 0; DSizes<StorageIndex, NumDims> coords; @@ -852,8 +843,7 @@ class TensorBlockMapper { } } - return TensorBlock(first_coeff_index, sizes, strides, m_tensor_strides, - data); + return Block(first_coeff_index, sizes, strides, m_tensor_strides, data); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const { @@ -868,8 +858,8 @@ class TensorBlockMapper { private: static Dimensions BlockDimensions(const Dimensions& tensor_dims, const TensorBlockShapeType block_shape, - Index min_target_size) { - min_target_size = numext::maxi<Index>(1, min_target_size); + StorageIndex min_target_size) { + min_target_size = numext::maxi<StorageIndex>(1, min_target_size); // If tensor fully fits into the target size, we'll treat it a single block. Dimensions block_dim_sizes = tensor_dims; @@ -883,12 +873,12 @@ class TensorBlockMapper { block_dim_sizes[i] = 1; } } else if (block_dim_sizes.TotalSize() > min_target_size) { - if (block_shape == TensorBlockShapeType::kUniformAllDims) { + if (block_shape == kUniformAllDims) { // Tensor will not fit within 'min_target_size' budget: calculate tensor // block dimension sizes based on "square" dimension size target. - const Index dim_size_target = static_cast<Index>( - std::pow(static_cast<float>(min_target_size), - 1.0 / static_cast<float>(block_dim_sizes.rank()))); + const StorageIndex dim_size_target = internal::convert_index<StorageIndex>( + std::pow(static_cast<float>(min_target_size), + 1.0f / static_cast<float>(block_dim_sizes.rank()))); for (Index i = 0; i < block_dim_sizes.rank(); ++i) { // TODO(andydavis) Adjust the inner most 'block_dim_size' to make it // a multiple of the packet size. Note that reducing @@ -913,7 +903,7 @@ class TensorBlockMapper { total_size = total_size_other_dims * block_dim_sizes[dim]; } } - } else if (block_shape == TensorBlockShapeType::kSkewedInnerDims) { + } else if (block_shape == kSkewedInnerDims) { StorageIndex coeff_to_allocate = min_target_size; for (int i = 0; i < NumDims; ++i) { const int dim = cond<Layout>()(i, NumDims - i - 1); @@ -929,8 +919,9 @@ class TensorBlockMapper { } } - eigen_assert(block_dim_sizes.TotalSize() >= - numext::mini<Index>(min_target_size, tensor_dims.TotalSize())); + eigen_assert( + block_dim_sizes.TotalSize() >= + numext::mini<Index>(min_target_size, tensor_dims.TotalSize())); return block_dim_sizes; } @@ -957,8 +948,7 @@ class TensorBlockMapper { template <typename Scalar, typename StorageIndex, int NumDims, int Layout> class TensorSliceBlockMapper { public: - typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> - TensorBlock; + typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block; typedef DSizes<StorageIndex, NumDims> Dimensions; TensorSliceBlockMapper(const Dimensions& tensor_dims, @@ -974,7 +964,7 @@ class TensorSliceBlockMapper { m_total_block_count(1) { // Calculate block counts by dimension and total block count. DSizes<StorageIndex, NumDims> block_count; - for (size_t i = 0; i < block_count.rank(); ++i) { + for (Index i = 0; i < block_count.rank(); ++i) { block_count[i] = divup(m_tensor_slice_extents[i], m_block_dim_sizes[i]); } m_total_block_count = array_prod(block_count); @@ -999,7 +989,7 @@ class TensorSliceBlockMapper { } } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block GetBlockForIndex(StorageIndex block_index, Scalar* data) const { StorageIndex first_coeff_index = 0; DSizes<StorageIndex, NumDims> coords; @@ -1056,8 +1046,7 @@ class TensorSliceBlockMapper { } } - return TensorBlock(first_coeff_index, sizes, strides, m_tensor_strides, - data); + return Block(first_coeff_index, sizes, strides, m_tensor_strides, data); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index 5e812b04d..02d061a9c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -105,7 +105,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device> typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; static const int PacketSize = PacketType<CoeffReturnType, Device>::size; - bool isCopy= false, nByOne = false, oneByN = false; + bool isCopy, nByOne, oneByN; enum { IsAligned = true, @@ -134,9 +134,10 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_device(device), - m_broadcast(op.broadcast()), - m_impl(op.expression(), device) { + : isCopy(false), nByOne(false), oneByN(false), + m_device(device), m_broadcast(op.broadcast()), m_impl(op.expression(), device) + { + // The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar // and store the result in a scalar. Instead one should reshape the scalar into a a N-D // tensor with N >= 1 of 1 element first and then broadcast. diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index c459fc649..f0f61fade 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -152,13 +152,7 @@ struct TensorContractionParams { // 1. Elementwise Relu transformation following Conv2D. // 2. AddBias to the Conv2D output channels dimension. // -// See expected implementation in NoOpOutputKernel. -struct OutputKernel { - template <typename Index, typename Scalar> - using OutputMapper = internal::blas_data_mapper<Scalar, Index, ColMajor>; -}; - -// Output kernel that does absolutely nothing. +// The NoOpOutputKernel implements an output kernel that does absolutely nothing. struct NoOpOutputKernel { /** * Tensor contraction evaluator calls this kernel after finishing each block @@ -177,7 +171,7 @@ struct NoOpOutputKernel { */ template <typename Index, typename Scalar> EIGEN_ALWAYS_INLINE void operator()( - const OutputKernel::OutputMapper<Index, Scalar>& /*output_mapper*/, + const internal::blas_data_mapper<Scalar, Index, ColMajor>& /*output_mapper*/, const TensorContractionParams& /*params*/, Index /*i*/, Index /*j*/, Index /*num_rows*/, Index /*num_cols*/) const {} }; @@ -354,7 +348,7 @@ struct TensorContractionEvaluatorBase // dimensions and right non-contracting dimensions. m_lhs_inner_dim_contiguous = true; int dim_idx = 0; - unsigned int nocontract_idx = 0; + Index nocontract_idx = 0; for (int i = 0; i < LDims; i++) { // find if we are contracting on index i of left tensor @@ -667,7 +661,7 @@ struct TensorContractionEvaluatorBase // call gebp (matrix kernel) // The parameters here are copied from Eigen's GEMM implementation - const auto output_mapper = output.getSubMapper(i2, j2); + const OutputMapper output_mapper = output.getSubMapper(i2, j2); gebp(output_mapper, blockA, blockB, actual_mc, actual_kc, actual_nc, Scalar(1), -1, -1, 0, 0); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h index d71b2e34b..6ee3827f3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h @@ -88,6 +88,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; static const int PacketSize = PacketType<CoeffReturnType, Device>::size; + typedef typename PointerType<CoeffReturnType, Device>::Type PointerT; enum { IsAligned = false, @@ -107,12 +108,12 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerT data) { if (data) { evalTo(data); return false; } else { - m_result = static_cast<CoeffReturnType*>( + m_result = static_cast<PointerT>( m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar))); evalTo(m_result); return true; @@ -140,23 +141,22 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); } - EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return m_result; } + EIGEN_DEVICE_FUNC PointerT data() const { return m_result; } #ifdef EIGEN_USE_SYCL EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; } #endif protected: - EIGEN_DEVICE_FUNC void evalTo(Scalar* data) { - TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result( - data, m_dimensions); + EIGEN_DEVICE_FUNC void evalTo(PointerT data) { + TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result(data, m_dimensions); m_op.func().eval(m_op.expression(), result, m_device); } Dimensions m_dimensions; const ArgType m_op; const Device& m_device; - CoeffReturnType* m_result; + PointerT m_result; }; @@ -251,6 +251,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; static const int PacketSize = PacketType<CoeffReturnType, Device>::size; + typedef typename PointerType<CoeffReturnType, Device>::Type PointerT; enum { IsAligned = false, @@ -270,12 +271,12 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerT data) { if (data) { evalTo(data); return false; } else { - m_result = static_cast<Scalar *>(m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar))); + m_result = static_cast<PointerT>(m_device.allocate_temp(dimensions().TotalSize() * sizeof(CoeffReturnType))); evalTo(m_result); return true; } @@ -302,22 +303,22 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); } - EIGEN_DEVICE_FUNC typename internal::traits<XprType>::PointerType data() const { return m_result; } + EIGEN_DEVICE_FUNC PointerT data() const { return m_result; } #ifdef EIGEN_USE_SYCL EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; } #endif protected: - EIGEN_DEVICE_FUNC void evalTo(Scalar* data) { - TensorMap<Tensor<Scalar, NumDims, Layout> > result(data, m_dimensions); + EIGEN_DEVICE_FUNC void evalTo(PointerT data) { + TensorMap<Tensor<CoeffReturnType, NumDims, Layout> > result(data, m_dimensions); m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device); } Dimensions m_dimensions; const XprType m_op; const Device& m_device; - CoeffReturnType* m_result; + PointerT m_result; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index cc134228a..6fc6688d3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -12,56 +12,6 @@ namespace Eigen { -// Barrier is an object that allows one or more threads to wait until -// Notify has been called a specified number of times. -class Barrier { - public: - Barrier(unsigned int count) : state_(count << 1), notified_(false) { - eigen_assert(((count << 1) >> 1) == count); - } - ~Barrier() { - eigen_assert((state_>>1) == 0); - } - - void Notify() { - unsigned int v = state_.fetch_sub(2, std::memory_order_acq_rel) - 2; - if (v != 1) { - eigen_assert(((v + 2) & ~1) != 0); - return; // either count has not dropped to 0, or waiter is not waiting - } - std::unique_lock<std::mutex> l(mu_); - eigen_assert(!notified_); - notified_ = true; - cv_.notify_all(); - } - - void Wait() { - unsigned int v = state_.fetch_or(1, std::memory_order_acq_rel); - if ((v >> 1) == 0) return; - std::unique_lock<std::mutex> l(mu_); - while (!notified_) { - cv_.wait(l); - } - } - - private: - std::mutex mu_; - std::condition_variable cv_; - std::atomic<unsigned int> state_; // low bit is waiter flag - bool notified_; -}; - - -// Notification is an object that allows a user to to wait for another -// thread to signal a notification that an event has occurred. -// -// Multiple threads can wait on the same Notification object, -// but only one caller must call Notify() on the object. -struct Notification : Barrier { - Notification() : Barrier(1) {}; -}; - - // Runs an arbitrary function and then calls Notify() on the passed in // Notification. template <typename Function, typename... Args> struct FunctionWrapperWithNotification @@ -102,7 +52,7 @@ class Allocator { // Build a thread pool device on top the an existing pool of threads. struct ThreadPoolDevice { // The ownership of the thread pool remains with the caller. - ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores, Allocator* allocator = nullptr) + ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores, Allocator* allocator = NULL) : pool_(pool), num_threads_(num_cores), allocator_(allocator) { } EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { @@ -282,7 +232,7 @@ struct ThreadPoolDevice { // Convenience wrapper for parallelFor that does not align blocks. void parallelFor(Index n, const TensorOpCost& cost, std::function<void(Index, Index)> f) const { - parallelFor(n, cost, nullptr, std::move(f)); + parallelFor(n, cost, NULL, std::move(f)); } // Thread pool accessor. diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h index 4f973a5b7..ce91bc2a6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -32,12 +32,12 @@ namespace Eigen { // Boilerplate code namespace internal { -template<std::size_t n, typename Dimension> struct dget { +template<std::ptrdiff_t n, typename Dimension> struct dget { static const std::ptrdiff_t value = get<n, Dimension>::value; }; -template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor> +template<typename Index, std::ptrdiff_t NumIndices, std::ptrdiff_t n, bool RowMajor> struct fixed_size_tensor_index_linearization_helper { template <typename Dimensions> EIGEN_DEVICE_FUNC @@ -50,7 +50,7 @@ struct fixed_size_tensor_index_linearization_helper } }; -template<typename Index, std::size_t NumIndices, bool RowMajor> +template<typename Index, std::ptrdiff_t NumIndices, bool RowMajor> struct fixed_size_tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor> { template <typename Dimensions> EIGEN_DEVICE_FUNC @@ -60,7 +60,7 @@ struct fixed_size_tensor_index_linearization_helper<Index, NumIndices, 0, RowMaj } }; -template<typename Index, std::size_t n> +template<typename Index, std::ptrdiff_t n> struct fixed_size_tensor_index_extraction_helper { template <typename Dimensions> EIGEN_DEVICE_FUNC @@ -94,7 +94,7 @@ struct Sizes { typedef internal::numeric_list<std::ptrdiff_t, Indices...> Base; const Base t = Base(); static const std::ptrdiff_t total_size = internal::arg_prod(Indices...); - static const size_t count = Base::count; + static const ptrdiff_t count = Base::count; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t rank() const { return Base::count; @@ -121,16 +121,16 @@ struct Sizes { return *this; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t operator[] (const std::size_t index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t operator[] (const std::ptrdiff_t index) const { return internal::fixed_size_tensor_index_extraction_helper<std::ptrdiff_t, Base::count>::run(index, t); } template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - size_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const { + ptrdiff_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const { return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, false>::run(indices, t); } template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - size_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const { + ptrdiff_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const { return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, true>::run(indices, t); } }; @@ -144,25 +144,25 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes<Indi #else -template <std::size_t n> +template <std::ptrdiff_t n> struct non_zero_size { - typedef internal::type2val<std::size_t, n> type; + typedef internal::type2val<std::ptrdiff_t, n> type; }; template <> struct non_zero_size<0> { typedef internal::null_type type; }; -template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0, std::size_t V5=0> struct Sizes { +template <std::ptrdiff_t V1=0, std::ptrdiff_t V2=0, std::ptrdiff_t V3=0, std::ptrdiff_t V4=0, std::ptrdiff_t V5=0> struct Sizes { typedef typename internal::make_type_list<typename non_zero_size<V1>::type, typename non_zero_size<V2>::type, typename non_zero_size<V3>::type, typename non_zero_size<V4>::type, typename non_zero_size<V5>::type >::type Base; - static const size_t count = Base::count; - static const std::size_t total_size = internal::arg_prod<Base>::value; + static const std::ptrdiff_t count = Base::count; + static const std::ptrdiff_t total_size = internal::arg_prod<Base>::value; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptrdiff_t rank() const { return count; } - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t TotalSize() { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptrdiff_t TotalSize() { return internal::arg_prod<Base>::value; } @@ -178,7 +178,7 @@ template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0 #if EIGEN_HAS_VARIADIC_TEMPLATES template <typename... DenseIndex> Sizes(DenseIndex... /*indices*/) { } - explicit Sizes(std::initializer_list<std::size_t>) { + explicit Sizes(std::initializer_list<std::ptrdiff_t>) { // todo: add assertion } #else @@ -213,18 +213,18 @@ template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0 } template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - size_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const { + ptrdiff_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const { return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, false>::run(indices, *reinterpret_cast<const Base*>(this)); } template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - size_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const { + ptrdiff_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const { return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, true>::run(indices, *reinterpret_cast<const Base*>(this)); } }; namespace internal { -template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes<V1, V2, V3, V4, V5>&) { +template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes<V1, V2, V3, V4, V5>&) { return Sizes<V1, V2, V3, V4, V5>::total_size; } } @@ -233,7 +233,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes<V1, V2, // Boilerplate namespace internal { -template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor> +template<typename Index, std::ptrdiff_t NumIndices, std::ptrdiff_t n, bool RowMajor> struct tensor_index_linearization_helper { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -245,7 +245,7 @@ struct tensor_index_linearization_helper } }; -template<typename Index, std::size_t NumIndices, bool RowMajor> +template<typename Index, std::ptrdiff_t NumIndices, bool RowMajor> struct tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor> { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -264,7 +264,7 @@ struct DSizes : array<DenseIndex, NumDims> { typedef array<DenseIndex, NumDims> Base; static const int count = NumDims; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return NumDims; } @@ -298,7 +298,7 @@ struct DSizes : array<DenseIndex, NumDims> { } } #else - template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> + template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5> EIGEN_DEVICE_FUNC DSizes(const Sizes<V1, V2, V3, V4, V5>& a) { for (int i = 0 ; i < NumDims; ++i) { (*this)[i] = a[i]; @@ -359,7 +359,7 @@ struct DSizes : array<DenseIndex, NumDims> { // Boilerplate namespace internal { -template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor> +template<typename Index, std::ptrdiff_t NumIndices, std::ptrdiff_t n, bool RowMajor> struct tensor_vsize_index_linearization_helper { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -371,7 +371,7 @@ struct tensor_vsize_index_linearization_helper } }; -template<typename Index, std::size_t NumIndices, bool RowMajor> +template<typename Index, std::ptrdiff_t NumIndices, bool RowMajor> struct tensor_vsize_index_linearization_helper<Index, NumIndices, 0, RowMajor> { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -386,10 +386,10 @@ struct tensor_vsize_index_linearization_helper<Index, NumIndices, 0, RowMajor> namespace internal { template <typename DenseIndex, int NumDims> struct array_size<const DSizes<DenseIndex, NumDims> > { - static const size_t value = NumDims; + static const ptrdiff_t value = NumDims; }; template <typename DenseIndex, int NumDims> struct array_size<DSizes<DenseIndex, NumDims> > { - static const size_t value = NumDims; + static const ptrdiff_t value = NumDims; }; #ifndef EIGEN_EMULATE_CXX11_META_H template <typename std::ptrdiff_t... Indices> struct array_size<const Sizes<Indices...> > { @@ -399,33 +399,33 @@ template <typename std::ptrdiff_t... Indices> struct array_size<Sizes<Indices... static const std::ptrdiff_t value = Sizes<Indices...>::count; }; template <std::ptrdiff_t n, typename std::ptrdiff_t... Indices> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<Indices...>&) { - return get<n, internal::numeric_list<std::size_t, Indices...> >::value; + return get<n, internal::numeric_list<std::ptrdiff_t, Indices...> >::value; } template <std::ptrdiff_t n> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<>&) { eigen_assert(false && "should never be called"); return -1; } #else -template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> struct array_size<const Sizes<V1,V2,V3,V4,V5> > { - static const size_t value = Sizes<V1,V2,V3,V4,V5>::count; +template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5> struct array_size<const Sizes<V1,V2,V3,V4,V5> > { + static const ptrdiff_t value = Sizes<V1,V2,V3,V4,V5>::count; }; -template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> struct array_size<Sizes<V1,V2,V3,V4,V5> > { - static const size_t value = Sizes<V1,V2,V3,V4,V5>::count; +template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5> struct array_size<Sizes<V1,V2,V3,V4,V5> > { + static const ptrdiff_t value = Sizes<V1,V2,V3,V4,V5>::count; }; -template <std::size_t n, std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_get(const Sizes<V1,V2,V3,V4,V5>&) { +template <std::ptrdiff_t n, std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<V1,V2,V3,V4,V5>&) { return get<n, typename Sizes<V1,V2,V3,V4,V5>::Base>::value; } #endif -template <typename Dims1, typename Dims2, size_t n, size_t m> +template <typename Dims1, typename Dims2, ptrdiff_t n, ptrdiff_t m> struct sizes_match_below_dim { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1&, Dims2&) { return false; } }; -template <typename Dims1, typename Dims2, size_t n> +template <typename Dims1, typename Dims2, ptrdiff_t n> struct sizes_match_below_dim<Dims1, Dims2, n, n> { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1& dims1, Dims2& dims2) { return (array_get<n-1>(dims1) == array_get<n-1>(dims2)) & diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index d9b61dc70..ba5ab1396 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -133,7 +133,7 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable, if (needs_assign) { // Size tensor blocks to fit in cache (or requested target block size). Index block_total_size = numext::mini(cache_size, total_size); - TensorBlockShapeType block_shape = TensorBlockShapeType::kSkewedInnerDims; + TensorBlockShapeType block_shape = kSkewedInnerDims; // Query expression tree for desired block size/shape. std::vector<TensorOpResourceRequirements> resources; evaluator.getResourceRequirements(&resources); @@ -229,12 +229,8 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, Tileable> { typedef EvalRange<Evaluator, StorageIndex, Vectorizable> EvalRange; Evaluator evaluator(expr, device); - const bool needs_assign = evaluator.evalSubExprsIfNeeded(nullptr); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); if (needs_assign) { - const StorageIndex PacketSize = - Vectorizable - ? unpacket_traits<typename Evaluator::PacketReturnType>::size - : 1; const StorageIndex size = array_prod(evaluator.dimensions()); device.parallelFor(size, evaluator.costPerCoeff(Vectorizable), EvalRange::alignBlockSize, @@ -259,12 +255,11 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr static EIGEN_STRONG_INLINE void run(const Expression& expr, const ThreadPoolDevice& device) { - typedef TensorBlock<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlock; typedef TensorBlockMapper<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlockMapper; Evaluator evaluator(expr, device); - StorageIndex total_size = array_prod(evaluator.dimensions()); - StorageIndex cache_size = device.firstLevelCacheSize() / sizeof(Scalar); + Index total_size = array_prod(evaluator.dimensions()); + Index cache_size = device.firstLevelCacheSize() / sizeof(Scalar); if (total_size < cache_size) { // TODO(andydavis) Reduce block management overhead for small tensors. internal::TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, @@ -273,9 +268,9 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr return; } - const bool needs_assign = evaluator.evalSubExprsIfNeeded(nullptr); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); if (needs_assign) { - TensorBlockShapeType block_shape = TensorBlockShapeType::kSkewedInnerDims; + TensorBlockShapeType block_shape = kSkewedInnerDims; Index block_total_size = 0; // Query expression tree for desired block size/shape. std::vector<internal::TensorOpResourceRequirements> resources; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index b7a0193fe..04a8b953d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -24,6 +24,14 @@ template<typename T> struct MakePointer { typedef T ScalarType; }; +// The PointerType class is a container of the device specefic pointer +// used for refering to a Pointer on TensorEvaluator class. While the TensorExpression +// is a device-agnostic type and need MakePointer class for type conversion, +// the TensorEvaluator calss can be specialized for a device, hence it is possible +// to construct different types of temproray storage memory in TensorEvaluator +// for different devices by specializing the following PointerType class. +template<typename T, typename Device> struct PointerType : MakePointer<T>{}; + namespace internal{ template<typename A, typename B> struct Pointer_type_promotion { static const bool val=false; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index 7ecd4d1ac..cd666c173 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -54,36 +54,6 @@ struct functor_traits<scalar_fmod_op<Scalar> > { PacketAccess = false }; }; - -/** \internal - * \brief Template functor to compute the sigmoid of a scalar - * \sa class CwiseUnaryOp, ArrayBase::sigmoid() - */ -template <typename T> -struct scalar_sigmoid_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_sigmoid_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const { - const T one = T(1); - return one / (one + numext::exp(-x)); - } - - template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Packet packetOp(const Packet& x) const { - const Packet one = pset1<Packet>(T(1)); - return pdiv(one, padd(one, pexp(pnegate(x)))); - } -}; - -template <typename T> -struct functor_traits<scalar_sigmoid_op<T> > { - enum { - Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost * 6, - PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasDiv && - packet_traits<T>::HasNegate && packet_traits<T>::HasExp - }; -}; - - template<typename Reducer, typename Device> struct reducer_traits { enum { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h index 98ad661ca..3f7d26b18 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h @@ -84,7 +84,7 @@ template<DenseIndex n> struct NumTraits<type2index<n> > namespace internal { template <typename T> EIGEN_DEVICE_FUNC void update_value(T& val, DenseIndex new_val) { - val = new_val; + val = internal::convert_index<T>(new_val); } template <DenseIndex n> EIGEN_DEVICE_FUNC void update_value(type2index<n>& val, DenseIndex new_val) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index a32743677..2f765acb7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -527,7 +527,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices()) { - for (std::size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) { + for (Index i = 0; i < internal::array_size<Dimensions>::value; ++i) { eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]); } @@ -985,7 +985,7 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero DSizes<Index,NumDims> startIndicesClamped, stopIndicesClamped; m_is_identity = true; - for (size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) { + for (Index i = 0; i < internal::array_size<Dimensions>::value; ++i) { if (m_strides[i] != 1 || op.startIndices()[i] != 0 || op.stopIndices()[i] != (m_impl.dimensions()[i] - 1)) { m_is_identity = false; |