diff options
author | Eugene Zhulenev <ezhulenev@google.com> | 2019-11-12 10:12:28 -0800 |
---|---|---|
committer | Eugene Zhulenev <ezhulenev@google.com> | 2019-11-12 10:12:28 -0800 |
commit | 13c3327f5cf829fd9d04a2ab46861e722cd74ca0 (patch) | |
tree | 20bd1a5f361023db822298696efbcff7378ab4a7 /unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | |
parent | 71aa53dd6dfdc497324d9e87f59c4ba820191856 (diff) |
Remove legacy block evaluation support
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 202 |
1 files changed, 0 insertions, 202 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 0fb0a9227..9926046b9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -159,70 +159,6 @@ class TensorExecutor<Expression, DefaultDevice, /*Vectorizable=*/true, */ template <typename Expression, bool Vectorizable> class TensorExecutor<Expression, DefaultDevice, Vectorizable, - /*Tiling=*/TiledEvaluation::Legacy> { - public: - typedef typename traits<Expression>::Scalar Scalar; - typedef typename remove_const<Scalar>::type ScalarNoConst; - - typedef TensorEvaluator<Expression, DefaultDevice> Evaluator; - typedef typename traits<Expression>::Index StorageIndex; - - static const int NumDims = traits<Expression>::NumDimensions; - - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(const Expression& expr, - const DefaultDevice& device = DefaultDevice()) { - typedef TensorBlock<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlock; - typedef TensorBlockMapper<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlockMapper; - typedef typename TensorBlock::Dimensions TensorBlockDimensions; - - Evaluator evaluator(expr, device); - Index total_size = array_prod(evaluator.dimensions()); - Index cache_size = device.firstLevelCacheSize() / sizeof(Scalar); - - if (total_size < cache_size - && !ExpressionHasTensorBroadcastingOp<Expression>::value) { - // TODO(andydavis) Reduce block management overhead for small tensors. - internal::TensorExecutor<Expression, DefaultDevice, Vectorizable, /*Tiling=*/TiledEvaluation::Off>::run(expr,device); - evaluator.cleanup(); - return; - } - - const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); - if (needs_assign) { - // Size tensor blocks to fit in cache (or requested target block size). - Index block_total_size = numext::mini(cache_size, total_size); - TensorBlockShapeType block_shape = kSkewedInnerDims; - // Query expression tree for desired block size/shape. - std::vector<TensorOpResourceRequirements> resources; - evaluator.getResourceRequirements(&resources); - MergeResourceRequirements(resources, &block_shape, &block_total_size); - - TensorBlockMapper block_mapper( - TensorBlockDimensions(evaluator.dimensions()), block_shape, - block_total_size); - block_total_size = block_mapper.block_dims_total_size(); - - ScalarNoConst* data = static_cast<ScalarNoConst*>( - device.allocate(block_total_size * sizeof(Scalar))); - - const StorageIndex total_block_count = block_mapper.total_block_count(); - for (StorageIndex i = 0; i < total_block_count; ++i) { - TensorBlock block = block_mapper.GetBlockForIndex(i, data); - evaluator.evalBlock(&block); - } - device.deallocate(data); - } - evaluator.cleanup(); - } -}; - -/** - * Process all the data with a single cpu thread, using blocks of data. By - * sizing a block to fit L1 cache we get better cache performance. - */ -template <typename Expression, bool Vectorizable> -class TensorExecutor<Expression, DefaultDevice, Vectorizable, /*Tiling=*/TiledEvaluation::On> { public: typedef typename traits<Expression>::Scalar Scalar; @@ -448,59 +384,6 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, Tiling> { template <typename Expression, bool Vectorizable> class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, - /*Tiling=*/TiledEvaluation::Legacy> { - public: - typedef typename traits<Expression>::Index StorageIndex; - typedef typename traits<Expression>::Scalar Scalar; - typedef typename remove_const<Scalar>::type ScalarNoConst; - - static const int NumDims = traits<Expression>::NumDimensions; - - typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator; - typedef TensorBlockMapper<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> BlockMapper; - typedef TensorExecutorTilingContext<BlockMapper> TilingContext; - - static EIGEN_STRONG_INLINE void run(const Expression& expr, - const ThreadPoolDevice& device) { - Evaluator evaluator(expr, device); - Index total_size = array_prod(evaluator.dimensions()); - Index cache_size = device.firstLevelCacheSize() / sizeof(Scalar); - - if (total_size < cache_size && - !ExpressionHasTensorBroadcastingOp<Expression>::value) { - // TODO(andydavis) Reduce block management overhead for small tensors. - internal::TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, - /*Tiling=*/TiledEvaluation::Off>::run(expr, - device); - evaluator.cleanup(); - return; - } - - const bool needs_assign = evaluator.evalSubExprsIfNeeded(nullptr); - if (needs_assign) { - const TilingContext tiling = - internal::GetTensorExecutorTilingContext<Evaluator, BlockMapper, - Vectorizable>(device, evaluator); - - device.parallelFor( - tiling.block_mapper.total_block_count(), tiling.cost, - [=, &device, &evaluator, &tiling](StorageIndex firstIdx, - StorageIndex lastIdx) { - ScalarNoConst* thread_buf = - tiling.template GetCurrentThreadBuffer<ScalarNoConst>(device); - for (StorageIndex i = firstIdx; i < lastIdx; ++i) { - auto block = tiling.block_mapper.GetBlockForIndex(i, thread_buf); - evaluator.evalBlock(&block); - } - }); - device.deallocate(tiling.buffer); - } - evaluator.cleanup(); - } -}; - -template <typename Expression, bool Vectorizable> -class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tiling=*/TiledEvaluation::On> { public: typedef typename traits<Expression>::Index IndexType; @@ -605,91 +488,6 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, DoneCallback, template <typename Expression, typename DoneCallback, bool Vectorizable> class TensorAsyncExecutor<Expression, ThreadPoolDevice, DoneCallback, - Vectorizable, /*Tileable*/ TiledEvaluation::Legacy> { - public: - typedef typename traits<Expression>::Index StorageIndex; - typedef typename traits<Expression>::Scalar Scalar; - typedef typename remove_const<Scalar>::type ScalarNoConst; - - static const int NumDims = traits<Expression>::NumDimensions; - - typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator; - typedef TensorBlockMapper<ScalarNoConst, StorageIndex, NumDims, - Evaluator::Layout> - BlockMapper; - typedef TensorExecutorTilingContext<BlockMapper> TilingContext; - - static EIGEN_STRONG_INLINE void runAsync(const Expression& expr, - const ThreadPoolDevice& device, - DoneCallback done) { - TensorAsyncExecutorContext* const ctx = - new TensorAsyncExecutorContext(expr, device, std::move(done)); - - Index total_size = array_prod(ctx->evaluator.dimensions()); - Index cache_size = device.firstLevelCacheSize() / sizeof(Scalar); - - if (total_size < cache_size && - !ExpressionHasTensorBroadcastingOp<Expression>::value) { - auto delete_ctx = [ctx]() { delete ctx; }; - internal::TensorAsyncExecutor< - Expression, ThreadPoolDevice, decltype(delete_ctx), Vectorizable, - /*Tileable*/ TiledEvaluation::Off>::runAsync(expr, device, std::move(delete_ctx)); - return; - } - - const auto on_eval_subexprs = [ctx, &device](bool need_assign) -> void { - if (!need_assign) { - delete ctx; - return; - } - - ctx->tiling = - GetTensorExecutorTilingContext<Evaluator, BlockMapper, Vectorizable>( - device, ctx->evaluator); - - auto eval_block = [ctx](StorageIndex firstIdx, StorageIndex lastIdx) { - ScalarNoConst* thread_buf = - ctx->tiling.template GetCurrentThreadBuffer<ScalarNoConst>( - ctx->device); - for (StorageIndex i = firstIdx; i < lastIdx; ++i) { - auto block = ctx->tiling.block_mapper.GetBlockForIndex(i, thread_buf); - ctx->evaluator.evalBlock(&block); - } - }; - device.parallelForAsync(ctx->tiling.block_mapper.total_block_count(), - ctx->tiling.cost, eval_block, - [ctx]() { delete ctx; }); - }; - - ctx->evaluator.evalSubExprsIfNeededAsync(nullptr, on_eval_subexprs); - } - - private: - struct TensorAsyncExecutorContext { - TensorAsyncExecutorContext(const Expression& expr, - const ThreadPoolDevice& thread_pool, - DoneCallback done) - : device(thread_pool), - evaluator(expr, thread_pool), - on_done(std::move(done)) {} - - ~TensorAsyncExecutorContext() { - device.deallocate(tiling.buffer); - evaluator.cleanup(); - on_done(); - } - - const ThreadPoolDevice& device; - Evaluator evaluator; - TilingContext tiling; - - private: - DoneCallback on_done; - }; -}; - -template <typename Expression, typename DoneCallback, bool Vectorizable> -class TensorAsyncExecutor<Expression, ThreadPoolDevice, DoneCallback, Vectorizable, /*Tileable*/ TiledEvaluation::On> { public: typedef typename traits<Expression>::Index IndexType; |