diff options
author | Eugene Zhulenev <ezhulenev@google.com> | 2018-09-14 16:55:05 -0700 |
---|---|---|
committer | Eugene Zhulenev <ezhulenev@google.com> | 2018-09-14 16:55:05 -0700 |
commit | 48633757235b522b37e60022dfbe8813c6cdfdc2 (patch) | |
tree | 09a63ff38bf2409f7042ec3a00ce29cdd18324f2 /unsupported/Eigen | |
parent | 281e631839436221f8fdd3b18334c866070399dd (diff) |
Explicitly construct tensor block dimensions from evaluator dimensions
Diffstat (limited to 'unsupported/Eigen')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 5 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h | 4 |
2 files changed, 5 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index ba5ab1396..bfe1f97b8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -282,8 +282,9 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr TensorOpCost cost = evaluator.costPerCoeff(Vectorizable); double taskSize = TensorCostModel<ThreadPoolDevice>::taskSize(1, cost); size_t block_size = static_cast<size_t>(1.0 / taskSize); - TensorBlockMapper block_mapper(evaluator.dimensions(), block_shape, - block_size); + TensorBlockMapper block_mapper( + typename TensorBlockMapper::Dimensions(evaluator.dimensions()), + block_shape, block_size); block_size = block_mapper.block_dims_total_size(); const size_t aligned_blocksize = EIGEN_MAX_ALIGN_BYTES * diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 3a405f5e6..4263f11a6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -976,7 +976,8 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, // find that scattered reads are not worth supporting in // TensorSliceBlockMapper. TensorSliceBlockMapper block_mapper( - input_tensor_dims, tensor_slice_offsets, tensor_slice_extents, + typename TensorSliceBlockMapper::Dimensions(input_tensor_dims), + tensor_slice_offsets, tensor_slice_extents, target_input_block_sizes, DimensionList<Index, NumInputDims>()); const Index num_outputs_to_update = @@ -1232,7 +1233,6 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, } else if (!first_preserved_dim_allocated) { // TODO(andydavis) Include output block size in this L1 working set // calculation. - const Index allocated = max_coeff_count - coeff_to_allocate; const Index alloc_size = numext::maxi( static_cast<Index>(1), coeff_to_allocate / reducer_overhead); (*target_input_block_sizes)[dim] = |