aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <ezhulenev@google.com>2019-10-02 12:44:06 -0700
committerGravatar Eugene Zhulenev <ezhulenev@google.com>2019-10-02 12:44:06 -0700
commit60ae24ee1a6c16114de456d77fcfba6f5a1160ca (patch)
tree7b9d5463018055571a5050ca31a8d3df12a3e6fc /unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
parent6e40454a6e6cc57c07c7340148657c985ca6c928 (diff)
Add block evaluation to TensorReshaping/TensorCasting/TensorPadding/TensorSelect
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h254
1 files changed, 244 insertions, 10 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
index 7b9ad7374..be2449ebd 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
@@ -96,22 +96,29 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
typedef typename Storage::Type EvaluatorPointerType;
enum {
- IsAligned = true,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- BlockAccess = false,
- BlockAccessV2 = false,
- PreferBlockAccess = false,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = true,
- RawAccess = false
+ IsAligned = true,
+ PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
+ BlockAccess = false,
+ BlockAccessV2 = TensorEvaluator<ArgType, Device>::RawAccess,
+ PreferBlockAccess = true,
+ Layout = TensorEvaluator<ArgType, Device>::Layout,
+ CoordAccess = true,
+ RawAccess = false
};
+ typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
+
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
- typedef internal::TensorBlockNotImplemented TensorBlockV2;
+ typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
+ typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
+
+ typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims,
+ Layout, Index>
+ TensorBlockV2;
//===--------------------------------------------------------------------===//
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_impl(op.expression(), device), m_padding(op.padding()), m_paddingValue(op.padding_value())
+ : m_impl(op.expression(), device), m_padding(op.padding()), m_paddingValue(op.padding_value()), m_device(device)
{
// The padding op doesn't change the rank of the tensor. Directly padding a scalar would lead
// to a vector, which doesn't make sense. Instead one should reshape the scalar into a vector
@@ -212,6 +219,214 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
return cost;
}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
+ std::vector<internal::TensorOpResourceRequirements>* resources) const {
+ Eigen::Index block_total_size_max = numext::maxi<Eigen::Index>(
+ 1, m_device.lastLevelCacheSize() / sizeof(Scalar));
+ resources->push_back(internal::TensorOpResourceRequirements(
+ internal::kSkewedInnerDims, block_total_size_max));
+
+ m_impl.getResourceRequirements(resources);
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
+ blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch) const {
+ eigen_assert(m_impl.data() != NULL);
+
+ // Check if we can reuse `desc` destination, or allocate new scratch buffer.
+ ScalarNoConst* materialized_output =
+ desc.template destination<ScalarNoConst, Layout>();
+
+ bool materialized_in_output;
+ if (materialized_output != NULL) {
+ desc.DropDestinationBuffer();
+ materialized_in_output = true;
+
+ } else {
+ const size_t materialized_output_size = desc.size() * sizeof(Scalar);
+ void* output_scratch_mem = scratch.allocate(materialized_output_size);
+ materialized_output = static_cast<ScalarNoConst*>(output_scratch_mem);
+ materialized_in_output = false;
+ }
+
+ static const bool IsColMajor = Layout == static_cast<int>(ColMajor);
+
+ Index offset = desc.offset();
+
+ // Compute offsets in the output tensor corresponding to the desc.offset().
+ DSizes<Index, NumDims> output_offsets;
+ for (int i = NumDims - 1; i > 0; --i) {
+ const int dim = IsColMajor ? i : NumDims - i - 1;
+ const int stride_dim = IsColMajor ? dim : dim + 1;
+ output_offsets[dim] = offset / m_outputStrides[stride_dim];
+ offset -= output_offsets[dim] * m_outputStrides[stride_dim];
+ }
+ output_offsets[IsColMajor ? 0 : NumDims - 1] = offset;
+
+ // Offsets in the input corresponding to output offsets.
+ DSizes<Index, NumDims> input_offsets = output_offsets;
+ for (int i = 0; i < NumDims; ++i) {
+ const int dim = IsColMajor ? i : NumDims - i - 1;
+ input_offsets[dim] = input_offsets[dim] - m_padding[dim].first;
+ }
+
+ // Compute offset in the input buffer (at this point it might be illegal and
+ // point outside of the input buffer, because we don't check for negative
+ // offsets, it will be autocorrected in the block iteration loop below).
+ Index input_offset = 0;
+ for (int i = 0; i < NumDims; ++i) {
+ const int dim = IsColMajor ? i : NumDims - i - 1;
+ input_offset += input_offsets[dim] * m_inputStrides[dim];
+ }
+
+ // Destination buffer and scratch buffer both indexed from 0 and have the
+ // same dimensions as the requested block (for destination buffer this
+ // property is guaranteed by `desc.destination()`).
+ Index output_offset = 0;
+ const DSizes<Index, NumDims> output_strides =
+ internal::strides<Layout>(desc.dimensions());
+
+ // NOTE(ezhulenev): We initialize bock iteration state for `NumDims - 1`
+ // dimensions, skipping innermost dimension. In theory it should be possible
+ // to squeeze matching innermost dimensions, however in practice that did
+ // not show any improvements in benchmarks. Also in practice first outer
+ // dimension usually has padding, and will prevent squeezing.
+
+ // Initialize output block iterator state. Dimension in this array are
+ // always in inner_most -> outer_most order (col major layout).
+ array<BlockIteratorState, NumDims - 1> it;
+ for (Index i = 0; i < NumDims - 1; ++i) {
+ const Index dim = IsColMajor ? i + 1 : NumDims - i - 2;
+ it[i].count = 0;
+ it[i].size = desc.dimension(dim);
+
+ it[i].input_stride = m_inputStrides[dim];
+ it[i].input_span = it[i].input_stride * (it[i].size - 1);
+
+ it[i].output_stride = output_strides[dim];
+ it[i].output_span = it[i].output_stride * (it[i].size - 1);
+ }
+
+ const int inner_dim_idx = IsColMajor ? 0 : NumDims - 1;
+
+ // Total output size.
+ const Index output_size = desc.size();
+
+ // We will fill inner dimension of this size in the output. It might be
+ // larger than the inner dimension in the input, so we might have to pad
+ // before/after we copy values from the input inner dimension.
+ const Index output_inner_dim_size = desc.dimension(inner_dim_idx);
+
+ // How many values to fill with padding BEFORE reading from the input inner
+ // dimension.
+ const Index output_inner_pad_before_size =
+ input_offsets[inner_dim_idx] < 0
+ ? numext::mini(numext::abs(input_offsets[inner_dim_idx]),
+ output_inner_dim_size)
+ : 0;
+
+ // How many values we can actually copy from the input inner dimension.
+ const Index output_inner_copy_size = numext::mini(
+ // Want to copy from input.
+ (output_inner_dim_size - output_inner_pad_before_size),
+ // Can copy from input.
+ (static_cast<Index>(m_impl.dimensions()[inner_dim_idx]) -
+ numext::maxi(input_offsets[inner_dim_idx], Index(0))));
+
+ // How many values to fill with padding AFTER reading from the input inner
+ // dimension.
+ const Index output_inner_pad_after_size =
+ (output_inner_dim_size - output_inner_copy_size -
+ output_inner_pad_before_size);
+
+ // Sanity check, sum of all sizes must be equal to the output size.
+ eigen_assert(output_inner_dim_size ==
+ (output_inner_pad_before_size + output_inner_copy_size +
+ output_inner_pad_after_size));
+
+ // Keep track of current coordinates and padding in the output.
+ DSizes<Index, NumDims> output_coord = output_offsets;
+ DSizes<Index, NumDims> output_padded;
+ for (int i = 0; i < NumDims; ++i) {
+ const int dim = IsColMajor ? i : NumDims - i - 1;
+ output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
+ }
+
+ typedef internal::StridedLinearBufferCopy<ScalarNoConst, Index> LinCopy;
+
+ // Iterate copying data from `m_impl.data()` to the output buffer.
+ for (Index size = 0; size < output_size; size += output_inner_dim_size) {
+ // Detect if we are in the padded region (exclude innermost dimension).
+ bool is_padded = false;
+ for (int j = 1; j < NumDims; ++j) {
+ const int dim = IsColMajor ? j : NumDims - j - 1;
+ is_padded = output_padded[dim];
+ if (is_padded) break;
+ }
+
+ if (is_padded) {
+ // Fill with padding value.
+ LinCopy::template Run<LinCopy::Kind::FillLinear>(
+ typename LinCopy::Dst(output_offset, 1, materialized_output),
+ typename LinCopy::Src(0, 0, &m_paddingValue),
+ output_inner_dim_size);
+
+ } else {
+ { // Fill with padding before copying from input inner dimension.
+ const Index out = output_offset;
+
+ LinCopy::template Run<LinCopy::Kind::FillLinear>(
+ typename LinCopy::Dst(out, 1, materialized_output),
+ typename LinCopy::Src(0, 0, &m_paddingValue),
+ output_inner_pad_before_size);
+ }
+
+ { // Copy data from input inner dimension.
+ const Index out = output_offset + output_inner_pad_before_size;
+ const Index in = input_offset + output_inner_pad_before_size;
+
+ LinCopy::template Run<LinCopy::Kind::Linear>(
+ typename LinCopy::Dst(out, 1, materialized_output),
+ typename LinCopy::Src(in, 1, m_impl.data()),
+ output_inner_copy_size);
+ }
+
+ { // Fill with padding after copying from input inner dimension.
+ const Index out = output_offset + output_inner_pad_before_size +
+ output_inner_copy_size;
+
+ LinCopy::template Run<LinCopy::Kind::FillLinear>(
+ typename LinCopy::Dst(out, 1, materialized_output),
+ typename LinCopy::Src(0, 0, &m_paddingValue),
+ output_inner_pad_after_size);
+ }
+ }
+
+ for (int j = 0; j < NumDims - 1; ++j) {
+ const int dim = IsColMajor ? j + 1 : NumDims - j - 2;
+
+ if (++it[j].count < it[j].size) {
+ input_offset += it[j].input_stride;
+ output_offset += it[j].output_stride;
+ output_coord[dim] += 1;
+ output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
+ break;
+ }
+ it[j].count = 0;
+ input_offset -= it[j].input_span;
+ output_offset -= it[j].output_span;
+ output_coord[dim] -= it[j].size - 1;
+ output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
+ }
+ }
+
+ return TensorBlockV2(materialized_in_output
+ ? internal::TensorBlockKind::kMaterializedInOutput
+ : internal::TensorBlockKind::kMaterializedInScratch,
+ materialized_output,
+ desc.dimensions());
+ }
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return NULL; }
#ifdef EIGEN_USE_SYCL
@@ -222,6 +437,23 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
#endif
private:
+ struct BlockIteratorState {
+ BlockIteratorState()
+ : count(0),
+ size(0),
+ input_stride(0),
+ input_span(0),
+ output_stride(0),
+ output_span(0) {}
+
+ Index count;
+ Index size;
+ Index input_stride;
+ Index input_span;
+ Index output_stride;
+ Index output_span;
+ };
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim(
Index index, int dim_index) const {
#if defined(EIGEN_HAS_INDEX_LIST)
@@ -410,6 +642,8 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
PaddingDimensions m_padding;
Scalar m_paddingValue;
+
+ const Device EIGEN_DEVICE_REF m_device;
};