aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <ezhulenev@google.com>2019-11-12 10:12:28 -0800
committerGravatar Eugene Zhulenev <ezhulenev@google.com>2019-11-12 10:12:28 -0800
commit13c3327f5cf829fd9d04a2ab46861e722cd74ca0 (patch)
tree20bd1a5f361023db822298696efbcff7378ab4a7 /unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
parent71aa53dd6dfdc497324d9e87f59c4ba820191856 (diff)
Remove legacy block evaluation support
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h202
1 files changed, 0 insertions, 202 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index 0fb0a9227..9926046b9 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -159,70 +159,6 @@ class TensorExecutor<Expression, DefaultDevice, /*Vectorizable=*/true,
*/
template <typename Expression, bool Vectorizable>
class TensorExecutor<Expression, DefaultDevice, Vectorizable,
- /*Tiling=*/TiledEvaluation::Legacy> {
- public:
- typedef typename traits<Expression>::Scalar Scalar;
- typedef typename remove_const<Scalar>::type ScalarNoConst;
-
- typedef TensorEvaluator<Expression, DefaultDevice> Evaluator;
- typedef typename traits<Expression>::Index StorageIndex;
-
- static const int NumDims = traits<Expression>::NumDimensions;
-
- EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE void run(const Expression& expr,
- const DefaultDevice& device = DefaultDevice()) {
- typedef TensorBlock<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlock;
- typedef TensorBlockMapper<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlockMapper;
- typedef typename TensorBlock::Dimensions TensorBlockDimensions;
-
- Evaluator evaluator(expr, device);
- Index total_size = array_prod(evaluator.dimensions());
- Index cache_size = device.firstLevelCacheSize() / sizeof(Scalar);
-
- if (total_size < cache_size
- && !ExpressionHasTensorBroadcastingOp<Expression>::value) {
- // TODO(andydavis) Reduce block management overhead for small tensors.
- internal::TensorExecutor<Expression, DefaultDevice, Vectorizable, /*Tiling=*/TiledEvaluation::Off>::run(expr,device);
- evaluator.cleanup();
- return;
- }
-
- const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
- if (needs_assign) {
- // Size tensor blocks to fit in cache (or requested target block size).
- Index block_total_size = numext::mini(cache_size, total_size);
- TensorBlockShapeType block_shape = kSkewedInnerDims;
- // Query expression tree for desired block size/shape.
- std::vector<TensorOpResourceRequirements> resources;
- evaluator.getResourceRequirements(&resources);
- MergeResourceRequirements(resources, &block_shape, &block_total_size);
-
- TensorBlockMapper block_mapper(
- TensorBlockDimensions(evaluator.dimensions()), block_shape,
- block_total_size);
- block_total_size = block_mapper.block_dims_total_size();
-
- ScalarNoConst* data = static_cast<ScalarNoConst*>(
- device.allocate(block_total_size * sizeof(Scalar)));
-
- const StorageIndex total_block_count = block_mapper.total_block_count();
- for (StorageIndex i = 0; i < total_block_count; ++i) {
- TensorBlock block = block_mapper.GetBlockForIndex(i, data);
- evaluator.evalBlock(&block);
- }
- device.deallocate(data);
- }
- evaluator.cleanup();
- }
-};
-
-/**
- * Process all the data with a single cpu thread, using blocks of data. By
- * sizing a block to fit L1 cache we get better cache performance.
- */
-template <typename Expression, bool Vectorizable>
-class TensorExecutor<Expression, DefaultDevice, Vectorizable,
/*Tiling=*/TiledEvaluation::On> {
public:
typedef typename traits<Expression>::Scalar Scalar;
@@ -448,59 +384,6 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, Tiling> {
template <typename Expression, bool Vectorizable>
class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable,
- /*Tiling=*/TiledEvaluation::Legacy> {
- public:
- typedef typename traits<Expression>::Index StorageIndex;
- typedef typename traits<Expression>::Scalar Scalar;
- typedef typename remove_const<Scalar>::type ScalarNoConst;
-
- static const int NumDims = traits<Expression>::NumDimensions;
-
- typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
- typedef TensorBlockMapper<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> BlockMapper;
- typedef TensorExecutorTilingContext<BlockMapper> TilingContext;
-
- static EIGEN_STRONG_INLINE void run(const Expression& expr,
- const ThreadPoolDevice& device) {
- Evaluator evaluator(expr, device);
- Index total_size = array_prod(evaluator.dimensions());
- Index cache_size = device.firstLevelCacheSize() / sizeof(Scalar);
-
- if (total_size < cache_size &&
- !ExpressionHasTensorBroadcastingOp<Expression>::value) {
- // TODO(andydavis) Reduce block management overhead for small tensors.
- internal::TensorExecutor<Expression, ThreadPoolDevice, Vectorizable,
- /*Tiling=*/TiledEvaluation::Off>::run(expr,
- device);
- evaluator.cleanup();
- return;
- }
-
- const bool needs_assign = evaluator.evalSubExprsIfNeeded(nullptr);
- if (needs_assign) {
- const TilingContext tiling =
- internal::GetTensorExecutorTilingContext<Evaluator, BlockMapper,
- Vectorizable>(device, evaluator);
-
- device.parallelFor(
- tiling.block_mapper.total_block_count(), tiling.cost,
- [=, &device, &evaluator, &tiling](StorageIndex firstIdx,
- StorageIndex lastIdx) {
- ScalarNoConst* thread_buf =
- tiling.template GetCurrentThreadBuffer<ScalarNoConst>(device);
- for (StorageIndex i = firstIdx; i < lastIdx; ++i) {
- auto block = tiling.block_mapper.GetBlockForIndex(i, thread_buf);
- evaluator.evalBlock(&block);
- }
- });
- device.deallocate(tiling.buffer);
- }
- evaluator.cleanup();
- }
-};
-
-template <typename Expression, bool Vectorizable>
-class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable,
/*Tiling=*/TiledEvaluation::On> {
public:
typedef typename traits<Expression>::Index IndexType;
@@ -605,91 +488,6 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, DoneCallback,
template <typename Expression, typename DoneCallback, bool Vectorizable>
class TensorAsyncExecutor<Expression, ThreadPoolDevice, DoneCallback,
- Vectorizable, /*Tileable*/ TiledEvaluation::Legacy> {
- public:
- typedef typename traits<Expression>::Index StorageIndex;
- typedef typename traits<Expression>::Scalar Scalar;
- typedef typename remove_const<Scalar>::type ScalarNoConst;
-
- static const int NumDims = traits<Expression>::NumDimensions;
-
- typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
- typedef TensorBlockMapper<ScalarNoConst, StorageIndex, NumDims,
- Evaluator::Layout>
- BlockMapper;
- typedef TensorExecutorTilingContext<BlockMapper> TilingContext;
-
- static EIGEN_STRONG_INLINE void runAsync(const Expression& expr,
- const ThreadPoolDevice& device,
- DoneCallback done) {
- TensorAsyncExecutorContext* const ctx =
- new TensorAsyncExecutorContext(expr, device, std::move(done));
-
- Index total_size = array_prod(ctx->evaluator.dimensions());
- Index cache_size = device.firstLevelCacheSize() / sizeof(Scalar);
-
- if (total_size < cache_size &&
- !ExpressionHasTensorBroadcastingOp<Expression>::value) {
- auto delete_ctx = [ctx]() { delete ctx; };
- internal::TensorAsyncExecutor<
- Expression, ThreadPoolDevice, decltype(delete_ctx), Vectorizable,
- /*Tileable*/ TiledEvaluation::Off>::runAsync(expr, device, std::move(delete_ctx));
- return;
- }
-
- const auto on_eval_subexprs = [ctx, &device](bool need_assign) -> void {
- if (!need_assign) {
- delete ctx;
- return;
- }
-
- ctx->tiling =
- GetTensorExecutorTilingContext<Evaluator, BlockMapper, Vectorizable>(
- device, ctx->evaluator);
-
- auto eval_block = [ctx](StorageIndex firstIdx, StorageIndex lastIdx) {
- ScalarNoConst* thread_buf =
- ctx->tiling.template GetCurrentThreadBuffer<ScalarNoConst>(
- ctx->device);
- for (StorageIndex i = firstIdx; i < lastIdx; ++i) {
- auto block = ctx->tiling.block_mapper.GetBlockForIndex(i, thread_buf);
- ctx->evaluator.evalBlock(&block);
- }
- };
- device.parallelForAsync(ctx->tiling.block_mapper.total_block_count(),
- ctx->tiling.cost, eval_block,
- [ctx]() { delete ctx; });
- };
-
- ctx->evaluator.evalSubExprsIfNeededAsync(nullptr, on_eval_subexprs);
- }
-
- private:
- struct TensorAsyncExecutorContext {
- TensorAsyncExecutorContext(const Expression& expr,
- const ThreadPoolDevice& thread_pool,
- DoneCallback done)
- : device(thread_pool),
- evaluator(expr, thread_pool),
- on_done(std::move(done)) {}
-
- ~TensorAsyncExecutorContext() {
- device.deallocate(tiling.buffer);
- evaluator.cleanup();
- on_done();
- }
-
- const ThreadPoolDevice& device;
- Evaluator evaluator;
- TilingContext tiling;
-
- private:
- DoneCallback on_done;
- };
-};
-
-template <typename Expression, typename DoneCallback, bool Vectorizable>
-class TensorAsyncExecutor<Expression, ThreadPoolDevice, DoneCallback,
Vectorizable, /*Tileable*/ TiledEvaluation::On> {
public:
typedef typename traits<Expression>::Index IndexType;