From 2918f85ba976dbfbf72f7d4c1961a577f5850148 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Mon, 9 Dec 2019 16:19:38 -0800 Subject: Do not use std::vector in getResourceRequirements --- unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h | 9 ++-- unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h | 34 -------------- unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h | 54 ++++++++++++++++++++++ .../Eigen/CXX11/src/Tensor/TensorBroadcasting.h | 13 +++--- .../Eigen/CXX11/src/Tensor/TensorChipping.h | 15 +++--- .../Eigen/CXX11/src/Tensor/TensorConversion.h | 6 +-- unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h | 6 +-- .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 39 +++++++++------- .../Eigen/CXX11/src/Tensor/TensorExecutor.h | 29 ++++-------- .../Eigen/CXX11/src/Tensor/TensorForcedEval.h | 6 ++- .../Eigen/CXX11/src/Tensor/TensorGenerator.h | 10 ++-- .../Eigen/CXX11/src/Tensor/TensorImagePatch.h | 8 ---- .../Eigen/CXX11/src/Tensor/TensorMorphing.h | 21 ++++----- unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h | 15 +++--- .../Eigen/CXX11/src/Tensor/TensorReduction.h | 9 ---- unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h | 13 +++--- .../Eigen/CXX11/src/Tensor/TensorShuffling.h | 17 +++---- 17 files changed, 153 insertions(+), 151 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h index d7795a00d..c4f6f86e8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h @@ -208,10 +208,11 @@ struct TensorEvaluator, Device> TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector* resources) const { - m_leftImpl.getResourceRequirements(resources); - m_rightImpl.getResourceRequirements(resources); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockV2ResourceRequirements getResourceRequirements() const { + return internal::TensorBlockV2ResourceRequirements::merge( + m_leftImpl.getResourceRequirements(), + m_rightImpl.getResourceRequirements()); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlockV2( diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h index 447da9121..ba11bf7a8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h @@ -65,40 +65,6 @@ enum TensorBlockShapeType { kSkewedInnerDims }; -struct TensorOpResourceRequirements { - TensorBlockShapeType block_shape; - Index block_total_size; - // TODO(andydavis) Add 'target_num_threads' to support communication of - // thread-resource requirements. This will allow ops deep in the - // expression tree (like reductions) to communicate resources - // requirements based on local state (like the total number of reductions - // to be computed). - TensorOpResourceRequirements(TensorBlockShapeType shape, - const Index size) - : block_shape(shape), block_total_size(size) {} -}; - -// Tries to merge multiple resource requirements. -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void MergeResourceRequirements( - const std::vector& resources, - TensorBlockShapeType* block_shape, Index* block_total_size) { - if (resources.empty()) { - return; - } - // TODO(andydavis) Implement different policies (i.e. revert to a default - // policy if block shapes/sizes conflict). - *block_shape = resources[0].block_shape; - *block_total_size = resources[0].block_total_size; - for (std::vector::size_type i = 1; i < resources.size(); ++i) { - if (resources[i].block_shape == kSkewedInnerDims && - *block_shape != kSkewedInnerDims) { - *block_shape = kSkewedInnerDims; - } - *block_total_size = - numext::maxi(*block_total_size, resources[i].block_total_size); - } -} - /** * \class TensorBlock * \ingroup CXX11_Tensor_Module diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h index 221f8e843..6cacf1cc1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h @@ -57,6 +57,60 @@ EIGEN_STRONG_INLINE DSizes strides( return strides(DSizes(sizes)); } +// -------------------------------------------------------------------------- // + +// Tensor block shape type defines what are the shape preference for the blocks +// extracted from the larger tensor. +// +// Example: blocks of 100 elements from the large 100x100 tensor: +// - tensor: 100x100 +// - target_block_size: 100 +// +// TensorBlockShapeType: +// - kUniformAllDims: 100 blocks of size 10x10 +// - kSkewedInnerDims: 100 blocks of size 100x1 (or 1x100 depending on a column +// or row major layout) +enum class TensorBlockV2ShapeType { kUniformAllDims, kSkewedInnerDims }; + +struct TensorBlockV2ResourceRequirements { + TensorBlockV2ShapeType shape_type; + size_t size; + + TensorBlockShapeType shapeV1() const { + return shape_type == TensorBlockV2ShapeType::kUniformAllDims + ? internal::kUniformAllDims + : internal::kSkewedInnerDims; + } + + static TensorBlockV2ResourceRequirements + merge(const TensorBlockV2ResourceRequirements &lhs, + const TensorBlockV2ResourceRequirements &rhs) { + return {merge(lhs.shape_type, rhs.shape_type), merge(rhs.size, lhs.size)}; + } + + // This is a resource requirement that should be returned from expressions + // that do not have any block evaluation preference (e.g. default tensor + // expression with raw buffer access). + static TensorBlockV2ResourceRequirements any() { + return {TensorBlockV2ShapeType::kUniformAllDims, 1}; + } + +private: + using Requirements = TensorBlockV2ResourceRequirements; + + static size_t merge(size_t lhs_size, size_t rhs_size) { + return numext::maxi(lhs_size, rhs_size); + } + + static TensorBlockV2ShapeType merge(TensorBlockV2ShapeType lhs, + TensorBlockV2ShapeType rhs) { + return (lhs == TensorBlockV2ShapeType::kSkewedInnerDims || + rhs == TensorBlockV2ShapeType::kSkewedInnerDims) + ? TensorBlockV2ShapeType::kSkewedInnerDims + : TensorBlockV2ShapeType::kUniformAllDims; + } +}; + // -------------------------------------------------------------------------- // // TensorBlockDescriptor specifies a block offset within a tensor and the block // sizes along each of the tensor dimensions. diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index 80162ad12..454b0f752 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -616,17 +616,16 @@ struct TensorEvaluator, Device> TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector* resources) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockV2ResourceRequirements getResourceRequirements() const { // TODO(wuke): Targeting L1 size is 30% faster than targeting L{-1} on large // tensors. But this might need further tuning. - Eigen::Index block_total_size_max = numext::maxi( + const size_t target_block_size = numext::maxi( 1, m_device.firstLevelCacheSize() / sizeof(Scalar)); - resources->push_back(internal::TensorOpResourceRequirements( - internal::kSkewedInnerDims, block_total_size_max)); - - m_impl.getResourceRequirements(resources); + return internal::TensorBlockV2ResourceRequirements::merge( + {internal::TensorBlockV2ShapeType::kSkewedInnerDims, target_block_size}, + m_impl.getResourceRequirements()); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h index 098110217..9b835c4de 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h @@ -294,13 +294,14 @@ struct TensorEvaluator, Device> TensorOpCost(0, 0, cost, vectorized, PacketSize); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector* resources) const { - Eigen::Index block_total_size_max = numext::maxi( - 1, m_device.lastLevelCacheSize() / sizeof(Scalar)); - resources->push_back(internal::TensorOpResourceRequirements( - internal::kSkewedInnerDims, block_total_size_max)); - m_impl.getResourceRequirements(resources); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockV2ResourceRequirements getResourceRequirements() const { + const size_t target_block_size = + numext::maxi(1, m_device.lastLevelCacheSize() / sizeof(Scalar)); + + return internal::TensorBlockV2ResourceRequirements::merge( + {internal::TensorBlockV2ShapeType::kSkewedInnerDims, target_block_size}, + m_impl.getResourceRequirements()); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h index 027322582..f9f90ec02 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h @@ -397,9 +397,9 @@ struct TensorEvaluator, Device> } } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector* resources) const { - m_impl.getResourceRequirements(resources); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockV2ResourceRequirements getResourceRequirements() const { + return m_impl.getResourceRequirements(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h index e6bb56136..2cbc4e878 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h @@ -164,9 +164,9 @@ struct TensorEvaluator, Device> internal::pstoret(m_buffer + i, m_impl.template packet::IsAligned ? Aligned : Unaligned>(i)); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector* resources) const { - m_impl.getResourceRequirements(resources); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockV2ResourceRequirements getResourceRequirements() const { + return m_impl.getResourceRequirements(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlockV2( diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index c0314499d..4085ad314 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -149,8 +149,10 @@ struct TensorEvaluator PacketType::size); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector*) const {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockV2ResourceRequirements getResourceRequirements() const { + return internal::TensorBlockV2ResourceRequirements::any(); + } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch, @@ -320,8 +322,10 @@ struct TensorEvaluator PacketType::size); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector*) const {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockV2ResourceRequirements getResourceRequirements() const { + return internal::TensorBlockV2ResourceRequirements::any(); + } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch, @@ -517,9 +521,9 @@ struct TensorEvaluator, Device> TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector* resources) const { - m_argImpl.getResourceRequirements(resources); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockV2ResourceRequirements getResourceRequirements() const { + return m_argImpl.getResourceRequirements(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 @@ -655,10 +659,11 @@ struct TensorEvaluator* resources) const { - m_leftImpl.getResourceRequirements(resources); - m_rightImpl.getResourceRequirements(resources); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockV2ResourceRequirements getResourceRequirements() const { + return internal::TensorBlockV2ResourceRequirements::merge( + m_leftImpl.getResourceRequirements(), + m_rightImpl.getResourceRequirements()); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 @@ -934,11 +939,13 @@ struct TensorEvaluator .cwiseMax(m_elseImpl.costPerCoeff(vectorized)); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector* resources) const { - m_condImpl.getResourceRequirements(resources); - m_thenImpl.getResourceRequirements(resources); - m_elseImpl.getResourceRequirements(resources); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockV2ResourceRequirements getResourceRequirements() const { + return internal::TensorBlockV2ResourceRequirements::merge( + m_condImpl.getResourceRequirements(), + internal::TensorBlockV2ResourceRequirements::merge( + m_thenImpl.getResourceRequirements(), + m_elseImpl.getResourceRequirements())); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index ca056e96e..db123d8a4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -182,25 +182,18 @@ class TensorExecutor resources; - evaluator.getResourceRequirements(&resources); - MergeResourceRequirements(resources, &block_shape, &block_total_size); + const TensorBlockV2ResourceRequirements requirements = + evaluator.getResourceRequirements(); - TensorBlockMapper block_mapper( - TensorBlockDimensions(evaluator.dimensions()), block_shape, - block_total_size); - block_total_size = block_mapper.block_dims_total_size(); + const TensorBlockMapper block_mapper( + TensorBlockDimensions(evaluator.dimensions()), requirements.shapeV1(), + requirements.size); // Share scratch memory allocator between all blocks. TensorBlockScratch scratch(device); @@ -268,14 +261,10 @@ template TensorExecutorTilingContext GetTensorExecutorTilingContext( const ThreadPoolDevice& device, const Evaluator& evaluator, bool allocate_buffer = true) { - // Prefer blocks skewed toward inner dimension. - TensorBlockShapeType block_shape = kSkewedInnerDims; - Index block_total_size = 0; - // Query expression tree for desired block size/shape. - std::vector resources; - evaluator.getResourceRequirements(&resources); - MergeResourceRequirements(resources, &block_shape, &block_total_size); + const TensorBlockV2ResourceRequirements requirements = + evaluator.getResourceRequirements(); + int num_threads = device.numThreads(); // Estimate minimum block size based on cost. @@ -285,7 +274,7 @@ TensorExecutorTilingContext GetTensorExecutorTilingContext( TensorBlockMapper block_mapper( typename TensorBlockMapper::Dimensions(evaluator.dimensions()), - block_shape, block_size); + requirements.shapeV1(), block_size); block_size = block_mapper.block_dims_total_size(); const size_t align = numext::maxi(EIGEN_MAX_ALIGN_BYTES, 1); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index 8d17d4b76..95fa4f509 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -176,8 +176,10 @@ struct TensorEvaluator, Device> return internal::ploadt(m_buffer + index); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector*) const {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockV2ResourceRequirements getResourceRequirements() const { + return internal::TensorBlockV2ResourceRequirements::any(); + } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h index 77fa32dc7..bed7a1b00 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h @@ -167,12 +167,12 @@ struct TensorEvaluator, Device> return rslt; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector* resources) const { - Eigen::Index block_total_size_max = numext::maxi( + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockV2ResourceRequirements getResourceRequirements() const { + const size_t target_block_size = numext::maxi( 1, m_device.firstLevelCacheSize() / sizeof(Scalar)); - resources->push_back(internal::TensorOpResourceRequirements( - internal::kSkewedInnerDims, block_total_size_max)); + return {internal::TensorBlockV2ShapeType::kSkewedInnerDims, + target_block_size}; } struct BlockIteratorState { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h index 76bed5526..959e77e01 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h @@ -540,14 +540,6 @@ struct TensorEvaluator, Device> TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector* resources) const { - Eigen::Index block_total_size_max = numext::maxi( - 1, m_device.lastLevelCacheSize() / sizeof(Scalar)); - resources->push_back(internal::TensorOpResourceRequirements( - internal::kSkewedInnerDims, block_total_size_max)); - } - protected: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index d3628f94e..7299cdcdb 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -198,10 +198,9 @@ struct TensorEvaluator, Device> return m_impl.costPerCoeff(vectorized); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector*) const { - // TODO(ezhulenev): If we'll ever support block evaluation without raw - // access we'll need to get requirements from `m_impl`. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockV2ResourceRequirements getResourceRequirements() const { + return internal::TensorBlockV2ResourceRequirements::any(); } // required in block(OutputTensorBlock* output_block) const @@ -636,13 +635,13 @@ struct TensorEvaluator, Devi return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector* resources) const { - Eigen::Index block_total_size_max = numext::maxi( - 1, m_device.lastLevelCacheSize() / sizeof(Scalar)); - resources->push_back(internal::TensorOpResourceRequirements( - internal::kSkewedInnerDims, block_total_size_max)); - m_impl.getResourceRequirements(resources); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockV2ResourceRequirements getResourceRequirements() const { + const size_t target_block_size = + numext::maxi(1, m_device.lastLevelCacheSize() / sizeof(Scalar)); + return internal::TensorBlockV2ResourceRequirements::merge( + {internal::TensorBlockV2ShapeType::kSkewedInnerDims, target_block_size}, + m_impl.getResourceRequirements()); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h index 4d1a09ada..0d7444730 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -227,14 +227,13 @@ struct TensorEvaluator, Device return cost; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector* resources) const { - Eigen::Index block_total_size_max = numext::maxi( - 1, m_device.lastLevelCacheSize() / sizeof(Scalar)); - resources->push_back(internal::TensorOpResourceRequirements( - internal::kSkewedInnerDims, block_total_size_max)); - - m_impl.getResourceRequirements(resources); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockV2ResourceRequirements getResourceRequirements() const { + const size_t target_block_size = + numext::maxi(1, m_device.lastLevelCacheSize() / sizeof(Scalar)); + return internal::TensorBlockV2ResourceRequirements::merge( + {internal::TensorBlockV2ShapeType::kSkewedInnerDims, target_block_size}, + m_impl.getResourceRequirements()); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 0bb1e643e..c600c319d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -905,15 +905,6 @@ struct TensorReductionEvaluatorBase* resources) const { - Eigen::Index block_total_size_max = numext::maxi( - 1, m_device.lastLevelCacheSize() / sizeof(Scalar)); - resources->push_back(internal::TensorOpResourceRequirements( - internal::kSkewedInnerDims, block_total_size_max)); - m_impl.getResourceRequirements(resources); - } - EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_result; } EIGEN_DEVICE_FUNC const TensorEvaluator& impl() const { return m_impl; } EIGEN_DEVICE_FUNC const Device& device() const { return m_device; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h index c5830da0a..0d18cfc36 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h @@ -248,12 +248,13 @@ struct TensorEvaluator, Device return rslt; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector* resources) const { - Eigen::Index block_total_size_max = numext::maxi( - 1, m_device.lastLevelCacheSize() / sizeof(Scalar)); - resources->push_back(internal::TensorOpResourceRequirements( - internal::kSkewedInnerDims, block_total_size_max)); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockV2ResourceRequirements getResourceRequirements() const { + const size_t target_block_size = + numext::maxi(1, m_device.lastLevelCacheSize() / sizeof(Scalar)); + return internal::TensorBlockV2ResourceRequirements::merge( + {internal::TensorBlockV2ShapeType::kSkewedInnerDims, target_block_size}, + m_impl.getResourceRequirements()); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h index 655fd91e8..42bca8172 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h @@ -244,18 +244,19 @@ struct TensorEvaluator, Device> return PacketLoader::PacketAccess>::Run(*this, index); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector* resources) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockV2ResourceRequirements getResourceRequirements() const { static const int inner_dim = Layout == static_cast(ColMajor) ? 0 : NumDims - 1; - const bool inner_dim_shuffled = m_shuffle[inner_dim] != inner_dim; - Eigen::Index block_total_size_max = numext::maxi( + const size_t target_block_size = numext::maxi( 1, m_device.firstLevelCacheSize() / sizeof(Scalar)); - resources->push_back(internal::TensorOpResourceRequirements( - inner_dim_shuffled ? internal::kUniformAllDims - : internal::kSkewedInnerDims, - block_total_size_max)); + + const bool inner_dim_shuffled = m_shuffle[inner_dim] != inner_dim; + return {inner_dim_shuffled + ? internal::TensorBlockV2ShapeType::kUniformAllDims + : internal::TensorBlockV2ShapeType::kSkewedInnerDims, + target_block_size}; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 -- cgit v1.2.3