diff options
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h | 13 |
1 files changed, 6 insertions, 7 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index 80162ad12..454b0f752 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -616,17 +616,16 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device> TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector<internal::TensorOpResourceRequirements>* resources) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockV2ResourceRequirements getResourceRequirements() const { // TODO(wuke): Targeting L1 size is 30% faster than targeting L{-1} on large // tensors. But this might need further tuning. - Eigen::Index block_total_size_max = numext::maxi<Eigen::Index>( + const size_t target_block_size = numext::maxi<size_t>( 1, m_device.firstLevelCacheSize() / sizeof(Scalar)); - resources->push_back(internal::TensorOpResourceRequirements( - internal::kSkewedInnerDims, block_total_size_max)); - - m_impl.getResourceRequirements(resources); + return internal::TensorBlockV2ResourceRequirements::merge( + {internal::TensorBlockV2ShapeType::kSkewedInnerDims, target_block_size}, + m_impl.getResourceRequirements()); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 |