aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <ezhulenev@google.com>2019-12-10 11:58:30 -0800
committerGravatar Eugene Zhulenev <ezhulenev@google.com>2019-12-10 14:31:44 -0800
commitdbca11e8805ec07660d8f966a1884ad0be302f15 (patch)
tree9da1438132a9a40de7ca3abafec2e559eb0449e3 /unsupported/Eigen/CXX11
parentc49f0d851ab77c9e4d782b453b4b0428bce903d3 (diff)
Remove TensorBlock.h and old TensorBlock/BlockMapper
Diffstat (limited to 'unsupported/Eigen/CXX11')
-rw-r--r--unsupported/Eigen/CXX11/Tensor1
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h8
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h305
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h168
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h51
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h7
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h41
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h3
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h3
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h14
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h9
11 files changed, 179 insertions, 431 deletions
diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor
index f8a62253c..10786048e 100644
--- a/unsupported/Eigen/CXX11/Tensor
+++ b/unsupported/Eigen/CXX11/Tensor
@@ -97,7 +97,6 @@ typedef unsigned __int64 uint64_t;
#include "src/Tensor/TensorGlobalFunctions.h"
#include "src/Tensor/TensorBase.h"
-#include "src/Tensor/TensorBlock.h"
#include "src/Tensor/TensorBlockV2.h"
#include "src/Tensor/TensorEvaluator.h"
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
index c4f6f86e8..22d672aa4 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
@@ -116,20 +116,12 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
RawAccess = TensorEvaluator<LeftArgType, Device>::RawAccess
};
- typedef typename internal::TensorBlock<
- typename internal::remove_const<Scalar>::type, Index, NumDims, Layout>
- TensorBlock;
-
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
typedef typename TensorEvaluator<const RightArgType, Device>::TensorBlockV2
RightTensorBlock;
-
- typedef internal::TensorBlockAssignment<
- Scalar, NumDims, typename RightTensorBlock::XprType, Index>
- TensorBlockAssignment;
//===--------------------------------------------------------------------===//
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) :
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
deleted file mode 100644
index ba11bf7a8..000000000
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
+++ /dev/null
@@ -1,305 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2018 Andy Davis <andydavis@google.com>
-// Copyright (C) 2018 Eugene Zhulenev <ezhulenev@google.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H
-#define EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H
-
-namespace Eigen {
-namespace internal {
-
-namespace {
-
-// Helper template to choose between ColMajor and RowMajor values.
-template <int Layout>
-struct cond;
-
-template <>
-struct cond<ColMajor> {
- template <typename T>
- EIGEN_STRONG_INLINE const T& operator()(const T& col,
- const T& /*row*/) const {
- return col;
- }
-};
-
-template <>
-struct cond<RowMajor> {
- template <typename T>
- EIGEN_STRONG_INLINE const T& operator()(const T& /*col*/,
- const T& row) const {
- return row;
- }
-};
-
-} // namespace
-
-/**
- * \enum TensorBlockShapeType
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor block shape type.
- *
- * Tensor block shape type defines what are the shape preference for the blocks
- * extracted from the larger tensor.
- *
- * Example:
- *
- * We want to extract blocks of 100 elements from the large 100x100 tensor:
- * - tensor: 100x100
- * - target_block_size: 100
- *
- * TensorBlockShapeType:
- * - kUniformAllDims: 100 blocks of size 10x10
- * - kSkewedInnerDims: 100 blocks of size 100x1 (or 1x100 depending on a column
- * or row major layout)
- */
-enum TensorBlockShapeType {
- kUniformAllDims,
- kSkewedInnerDims
-};
-
-/**
- * \class TensorBlock
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor block class.
- *
- * This class represents a tensor block specified by the index of the
- * first block coefficient, and the size of the block in each dimension.
- */
-template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
-class TensorBlock {
- public:
- typedef DSizes<StorageIndex, NumDims> Dimensions;
-
- TensorBlock(const StorageIndex first_coeff_index, const Dimensions& block_sizes,
- const Dimensions& block_strides, const Dimensions& tensor_strides,
- Scalar* data)
- : m_first_coeff_index(first_coeff_index),
- m_block_sizes(block_sizes),
- m_block_strides(block_strides),
- m_tensor_strides(tensor_strides),
- m_data(data) {}
-
- StorageIndex first_coeff_index() const { return m_first_coeff_index; }
-
- const Dimensions& block_sizes() const { return m_block_sizes; }
-
- const Dimensions& block_strides() const { return m_block_strides; }
-
- const Dimensions& tensor_strides() const { return m_tensor_strides; }
-
- Scalar* data() { return m_data; }
-
- const Scalar* data() const { return m_data; }
-
- private:
- StorageIndex m_first_coeff_index;
- Dimensions m_block_sizes;
- Dimensions m_block_strides;
- Dimensions m_tensor_strides;
- Scalar* m_data; // Not owned.
-};
-
-/**
- * \class TensorBlockMapper
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor block mapper class.
- *
- * This class is responsible for iterating over the blocks of a tensor.
- */
-template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
-class TensorBlockMapper {
- public:
- typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
- typedef DSizes<StorageIndex, NumDims> Dimensions;
-
- TensorBlockMapper() {}
- TensorBlockMapper(const Dimensions& dims,
- const TensorBlockShapeType block_shape,
- Index min_target_size)
- : m_dimensions(dims),
- m_block_dim_sizes(BlockDimensions(dims, block_shape, convert_index<StorageIndex>(min_target_size))) {
- // Calculate block counts by dimension and total block count.
- DSizes<StorageIndex, NumDims> block_count;
- for (Index i = 0; i < block_count.rank(); ++i) {
- block_count[i] = divup(m_dimensions[i], m_block_dim_sizes[i]);
- }
- m_total_block_count = array_prod(block_count);
-
- // Calculate block strides (used for enumerating blocks).
- if (NumDims > 0) {
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- m_block_strides[0] = 1;
- m_tensor_strides[0] = 1;
- for (int i = 1; i < NumDims; ++i) {
- m_block_strides[i] = m_block_strides[i - 1] * block_count[i - 1];
- m_tensor_strides[i] = m_tensor_strides[i - 1] * m_dimensions[i - 1];
- }
- } else {
- m_block_strides[NumDims - 1] = 1;
- m_tensor_strides[NumDims - 1] = 1;
- for (int i = NumDims - 2; i >= 0; --i) {
- m_block_strides[i] = m_block_strides[i + 1] * block_count[i + 1];
- m_tensor_strides[i] = m_tensor_strides[i + 1] * m_dimensions[i + 1];
- }
- }
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block
- GetBlockForIndex(StorageIndex block_index, Scalar* data) const {
- StorageIndex first_coeff_index = 0;
- DSizes<StorageIndex, NumDims> coords;
- DSizes<StorageIndex, NumDims> sizes;
- DSizes<StorageIndex, NumDims> strides;
- if (NumDims > 0) {
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = NumDims - 1; i > 0; --i) {
- const StorageIndex idx = block_index / m_block_strides[i];
- coords[i] = idx * m_block_dim_sizes[i];
- sizes[i] =
- numext::mini((m_dimensions[i] - coords[i]), m_block_dim_sizes[i]);
- block_index -= idx * m_block_strides[i];
- first_coeff_index += coords[i] * m_tensor_strides[i];
- }
- coords[0] = block_index * m_block_dim_sizes[0];
- sizes[0] =
- numext::mini((m_dimensions[0] - coords[0]), m_block_dim_sizes[0]);
- first_coeff_index += coords[0] * m_tensor_strides[0];
-
- strides[0] = 1;
- for (int i = 1; i < NumDims; ++i) {
- strides[i] = strides[i - 1] * sizes[i - 1];
- }
- } else {
- for (int i = 0; i < NumDims - 1; ++i) {
- const StorageIndex idx = block_index / m_block_strides[i];
- coords[i] = idx * m_block_dim_sizes[i];
- sizes[i] =
- numext::mini((m_dimensions[i] - coords[i]), m_block_dim_sizes[i]);
- block_index -= idx * m_block_strides[i];
- first_coeff_index += coords[i] * m_tensor_strides[i];
- }
- coords[NumDims - 1] = block_index * m_block_dim_sizes[NumDims - 1];
- sizes[NumDims - 1] =
- numext::mini((m_dimensions[NumDims - 1] - coords[NumDims - 1]),
- m_block_dim_sizes[NumDims - 1]);
- first_coeff_index +=
- coords[NumDims - 1] * m_tensor_strides[NumDims - 1];
-
- strides[NumDims - 1] = 1;
- for (int i = NumDims - 2; i >= 0; --i) {
- strides[i] = strides[i + 1] * sizes[i + 1];
- }
- }
- }
-
- return Block(first_coeff_index, sizes, strides, m_tensor_strides, data);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const {
- return m_total_block_count;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex
- block_dims_total_size() const {
- return m_block_dim_sizes.TotalSize();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions&
- block_dim_sizes() const {
- return m_block_dim_sizes;
- }
-
- private:
- static Dimensions BlockDimensions(const Dimensions& tensor_dims,
- const TensorBlockShapeType block_shape,
- StorageIndex min_target_size) {
- min_target_size = numext::maxi<StorageIndex>(1, min_target_size);
-
- // If tensor fully fits into the target size, we'll treat it a single block.
- Dimensions block_dim_sizes = tensor_dims;
-
- if (tensor_dims.TotalSize() == 0) {
- // Corner case: one of the dimensions is zero. Logic below is too complex
- // to handle this case on a general basis, just use unit block size.
- // Note: we must not yield blocks with zero dimensions (recipe for
- // overflows/underflows, divisions by zero and NaNs later).
- for (int i = 0; i < NumDims; ++i) {
- block_dim_sizes[i] = 1;
- }
- } else if (block_dim_sizes.TotalSize() > min_target_size) {
- if (block_shape == kUniformAllDims) {
- // Tensor will not fit within 'min_target_size' budget: calculate tensor
- // block dimension sizes based on "square" dimension size target.
- const StorageIndex dim_size_target = convert_index<StorageIndex>(
- std::pow(static_cast<float>(min_target_size),
- 1.0f / static_cast<float>(block_dim_sizes.rank())));
- for (Index i = 0; i < block_dim_sizes.rank(); ++i) {
- // TODO(andydavis) Adjust the inner most 'block_dim_size' to make it
- // a multiple of the packet size. Note that reducing
- // 'block_dim_size' in this manner can increase the number of
- // blocks, and so will amplify any per-block overhead.
- block_dim_sizes[i] = numext::mini(dim_size_target, tensor_dims[i]);
- }
- // Add any un-allocated coefficients to inner dimension(s).
- StorageIndex total_size = block_dim_sizes.TotalSize();
- for (int i = 0; i < NumDims; ++i) {
- const int dim = cond<Layout>()(i, NumDims - i - 1);
- if (block_dim_sizes[dim] < tensor_dims[dim]) {
- const StorageIndex total_size_other_dims =
- total_size / block_dim_sizes[dim];
- const StorageIndex alloc_avail =
- divup<StorageIndex>(min_target_size, total_size_other_dims);
- if (alloc_avail == block_dim_sizes[dim]) {
- // Insufficient excess coefficients to allocate.
- break;
- }
- block_dim_sizes[dim] = numext::mini(tensor_dims[dim], alloc_avail);
- total_size = total_size_other_dims * block_dim_sizes[dim];
- }
- }
- } else if (block_shape == kSkewedInnerDims) {
- StorageIndex coeff_to_allocate = min_target_size;
- for (int i = 0; i < NumDims; ++i) {
- const int dim = cond<Layout>()(i, NumDims - i - 1);
- block_dim_sizes[dim] =
- numext::mini(coeff_to_allocate, tensor_dims[dim]);
- coeff_to_allocate = divup(
- coeff_to_allocate,
- numext::maxi(static_cast<StorageIndex>(1), block_dim_sizes[dim]));
- }
- eigen_assert(coeff_to_allocate == 1);
- } else {
- eigen_assert(false); // someone added new block shape type
- }
- }
-
- eigen_assert(
- block_dim_sizes.TotalSize() >=
- numext::mini<Index>(min_target_size, tensor_dims.TotalSize()));
-
- return block_dim_sizes;
- }
-
- Dimensions m_dimensions;
- Dimensions m_block_dim_sizes;
- Dimensions m_block_strides;
- Dimensions m_tensor_strides;
- StorageIndex m_total_block_count;
-};
-
-} // namespace internal
-
-} // namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h
index f8814bc8c..029180ca5 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h
@@ -76,12 +76,6 @@ struct TensorBlockV2ResourceRequirements {
TensorBlockV2ShapeType shape_type;
size_t size;
- TensorBlockShapeType shapeV1() const {
- return shape_type == TensorBlockV2ShapeType::kUniformAllDims
- ? internal::kUniformAllDims
- : internal::kSkewedInnerDims;
- }
-
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE TensorBlockV2ResourceRequirements
merge(const TensorBlockV2ResourceRequirements &lhs,
@@ -275,6 +269,168 @@ class TensorBlockDescriptor {
};
// -------------------------------------------------------------------------- //
+// TensorBlockMapper is responsible for iterating over the blocks of a tensor.
+
+template <int NumDims, int Layout, typename IndexType = Eigen::Index>
+class TensorBlockV2Mapper {
+ typedef TensorBlockDescriptor<NumDims, IndexType> BlockDescriptor;
+
+ public:
+ typedef DSizes<IndexType, NumDims> Dimensions;
+
+ TensorBlockV2Mapper() = default;
+ TensorBlockV2Mapper(const DSizes<IndexType, NumDims>& dimensions,
+ const TensorBlockV2ResourceRequirements& requirements)
+ : m_tensor_dimensions(dimensions), m_requirements(requirements) {
+ // Initialize `m_block_dimensions`.
+ InitializeBlockDimensions();
+
+ // Calculate block counts by dimension and total block count.
+ DSizes<IndexType, NumDims> block_count;
+ for (int i = 0; i < NumDims; ++i) {
+ block_count[i] = divup(m_tensor_dimensions[i], m_block_dimensions[i]);
+ }
+ m_total_block_count = array_prod(block_count);
+
+ // Calculate block strides (used for enumerating blocks).
+ m_tensor_strides = strides<Layout>(m_tensor_dimensions);
+ m_block_strides = strides<Layout>(block_count);
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE IndexType blockCount() const {
+ return m_total_block_count;
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE IndexType blockTotalSize() const {
+ return m_block_dimensions.TotalSize();
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const DSizes<IndexType, NumDims>&
+ blockDimensions() const {
+ return m_block_dimensions;
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ BlockDescriptor blockDescriptor(IndexType block_index) const {
+ static const bool isColMajor = Layout == static_cast<int>(ColMajor);
+
+ IndexType offset = 0;
+ DSizes<IndexType, NumDims> dimensions;
+
+ if (NumDims == 0) return BlockDescriptor(offset, dimensions);
+
+ // Iterate outer -> inner dimensions.
+ for (int i = NumDims - 1; i >= 0; --i) {
+ const int dim = isColMajor ? i : NumDims - i - 1;
+
+ const IndexType idx = block_index / m_block_strides[dim];
+ block_index -= idx * m_block_strides[dim];
+
+ const IndexType coord = idx * m_block_dimensions[dim];
+ dimensions[dim] = numext::mini(m_tensor_dimensions[dim] - coord,
+ m_block_dimensions[dim]);
+ offset += coord * m_tensor_strides[dim];
+ }
+
+ return {offset, dimensions};
+ }
+
+ private:
+ void InitializeBlockDimensions() {
+ // Requested block shape and size.
+ const TensorBlockV2ShapeType shape_type = m_requirements.shape_type;
+ const IndexType target_block_size =
+ numext::maxi<IndexType>(1, static_cast<IndexType>(m_requirements.size));
+
+ // Corner case: one of the dimensions is zero. Logic below is too complex
+ // to handle this case on a general basis, just use unit block size.
+ // Note: we must not yield blocks with zero dimensions (recipe for
+ // overflows/underflows, divisions by zero and NaNs later).
+ if (m_tensor_dimensions.TotalSize() == 0) {
+ for (int i = 0; i < NumDims; ++i) {
+ m_block_dimensions[i] = 1;
+ }
+ return;
+ }
+
+ // If tensor fits into a target block size, evaluate it as a single block.
+ if (m_tensor_dimensions.TotalSize() <= target_block_size) {
+ m_block_dimensions = m_tensor_dimensions;
+ return;
+ }
+
+ static const bool isColMajor = Layout == static_cast<int>(ColMajor);
+
+ // Block shape skewed towards inner dimension.
+ if (shape_type == TensorBlockV2ShapeType::kSkewedInnerDims) {
+ IndexType coeff_to_allocate = target_block_size;
+
+ for (int i = 0; i < NumDims; ++i) {
+ const int dim = isColMajor ? i : NumDims - i - 1;
+ m_block_dimensions[dim] =
+ numext::mini(coeff_to_allocate, m_tensor_dimensions[dim]);
+ coeff_to_allocate = divup(
+ coeff_to_allocate,
+ numext::maxi(static_cast<IndexType>(1), m_block_dimensions[dim]));
+ }
+ eigen_assert(coeff_to_allocate == 1);
+
+ } else if (shape_type == TensorBlockV2ShapeType::kUniformAllDims) {
+ // Tensor will not fit within 'target_block_size' budget: calculate tensor
+ // block dimension sizes based on "square" dimension size target.
+ const IndexType dim_size_target = convert_index<IndexType>(
+ std::pow(static_cast<float>(target_block_size),
+ 1.0f / static_cast<float>(m_block_dimensions.rank())));
+
+ for (int i = 0; i < NumDims; ++i) {
+ // TODO(andydavis) Adjust the inner most 'block_dim_size' to make it
+ // a multiple of the packet size. Note that reducing
+ // 'block_dim_size' in this manner can increase the number of
+ // blocks, and so will amplify any per-block overhead.
+ m_block_dimensions[i] =
+ numext::mini(dim_size_target, m_tensor_dimensions[i]);
+ }
+
+ // Add any un-allocated coefficients to inner dimension(s).
+ IndexType total_size = m_block_dimensions.TotalSize();
+ for (int i = 0; i < NumDims; ++i) {
+ const int dim = isColMajor ? i : NumDims - i - 1;
+
+ if (m_block_dimensions[dim] < m_tensor_dimensions[dim]) {
+ const IndexType total_size_other_dims =
+ total_size / m_block_dimensions[dim];
+ const IndexType alloc_avail =
+ divup<IndexType>(target_block_size, total_size_other_dims);
+ if (alloc_avail == m_block_dimensions[dim]) {
+ // Insufficient excess coefficients to allocate.
+ break;
+ }
+ m_block_dimensions[dim] =
+ numext::mini(m_tensor_dimensions[dim], alloc_avail);
+ total_size = total_size_other_dims * m_block_dimensions[dim];
+ }
+ }
+
+ } else {
+ eigen_assert(false); // unknown block shape
+ }
+
+ eigen_assert(m_block_dimensions.TotalSize() >=
+ numext::mini<IndexType>(target_block_size,
+ m_tensor_dimensions.TotalSize()));
+ }
+
+ DSizes<IndexType, NumDims> m_tensor_dimensions;
+ TensorBlockV2ResourceRequirements m_requirements;
+
+ DSizes<IndexType, NumDims> m_block_dimensions;
+ IndexType m_total_block_count;
+
+ DSizes<IndexType, NumDims> m_tensor_strides;
+ DSizes<IndexType, NumDims> m_block_strides;
+};
+
+// -------------------------------------------------------------------------- //
// TensorBlockScratchAllocator is responsible for allocating temporary buffers
// for block evaluation (output or input block materialization). Given that
// Eigen expression traversal order is deterministic, all temporary allocations
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
index 9b835c4de..268c3246a 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
@@ -447,13 +447,6 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
RawAccess = false
};
- typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
-
- typedef internal::TensorBlock<ScalarNoConst, Index, NumInputDims, Layout>
- InputTensorBlock;
- typedef internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout>
- OutputTensorBlock;
-
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
//===--------------------------------------------------------------------===//
@@ -506,50 +499,6 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
}
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
- const OutputTensorBlock& output_block) {
- // Calculate input block sizes.
- const DSizes<Index, NumDims>& output_block_sizes =
- output_block.block_sizes();
- const DSizes<Index, NumDims>& output_block_strides =
- output_block.block_strides();
- const Index chip_dim = this->m_dim.actualDim();
- DSizes<Index, NumInputDims> input_block_sizes;
- DSizes<Index, NumInputDims> input_block_strides;
- for (Index i = 0; i < NumInputDims; ++i) {
- if (i < chip_dim) {
- input_block_sizes[i] = output_block_sizes[i];
- input_block_strides[i] = output_block_strides[i];
- } else if (i > chip_dim) {
- input_block_sizes[i] = output_block_sizes[i - 1];
- input_block_strides[i] = output_block_strides[i - 1];
- } else {
- input_block_sizes[i] = 1;
- }
- }
- // Fix up input_block_stride for chip dimension.
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- if (chip_dim == 0) {
- input_block_strides[chip_dim] = 1;
- } else {
- input_block_strides[chip_dim] =
- input_block_strides[chip_dim - 1] * input_block_sizes[chip_dim - 1];
- }
- } else {
- if (chip_dim == NumInputDims - 1) {
- input_block_strides[chip_dim] = 1;
- } else {
- input_block_strides[chip_dim] =
- input_block_strides[chip_dim + 1] * input_block_sizes[chip_dim + 1];
- }
- }
- // Write input block.
- this->m_impl.writeBlock(InputTensorBlock(
- this->srcCoeff(output_block.first_coeff_index()), input_block_sizes,
- input_block_strides, this->m_inputStrides,
- const_cast<ScalarNoConst*>(output_block.data())));
- }
-
template <typename TensorBlockV2>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlockV2(
const TensorBlockDesc& desc, const TensorBlockV2& block) {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index 4085ad314..613a8347d 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -471,8 +471,6 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static const int NumDims = internal::array_size<Dimensions>::value;
- typedef internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout>
- TensorBlock;
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
@@ -593,11 +591,6 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
static const int NumDims = internal::array_size<
typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
- typedef internal::TensorBlock<
- typename internal::remove_const<Scalar>::type, Index, NumDims,
- TensorEvaluator<LeftArgType, Device>::Layout>
- TensorBlock;
-
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index db123d8a4..7b7b670ed 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -172,9 +172,8 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(const Expression& expr,
const DefaultDevice& device = DefaultDevice()) {
- typedef TensorBlock<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlock;
- typedef TensorBlockMapper<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlockMapper;
- typedef typename TensorBlock::Dimensions TensorBlockDimensions;
+ typedef TensorBlockV2Mapper<NumDims, Evaluator::Layout, StorageIndex>
+ TensorBlockMapper;
typedef internal::TensorBlockDescriptor<NumDims, StorageIndex>
TensorBlockDesc;
@@ -192,17 +191,15 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
evaluator.getResourceRequirements();
const TensorBlockMapper block_mapper(
- TensorBlockDimensions(evaluator.dimensions()), requirements.shapeV1(),
- requirements.size);
+ typename TensorBlockDesc::Dimensions(evaluator.dimensions()),
+ requirements);
// Share scratch memory allocator between all blocks.
TensorBlockScratch scratch(device);
- const StorageIndex total_block_count = block_mapper.total_block_count();
+ const StorageIndex total_block_count = block_mapper.blockCount();
for (StorageIndex i = 0; i < total_block_count; ++i) {
- TensorBlock block = block_mapper.GetBlockForIndex(i, NULL);
-
- TensorBlockDesc desc(block.first_coeff_index(), block.block_sizes());
+ TensorBlockDesc desc = block_mapper.blockDescriptor(i);
evaluator.evalBlockV2(desc, scratch);
scratch.reset();
}
@@ -226,8 +223,6 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
template <typename TensorBlockMapper>
struct TensorExecutorTilingContext {
- typedef typename TensorBlockMapper::Block TensorBlock;
-
TensorExecutorTilingContext() : buffer(nullptr) {}
TensorExecutorTilingContext(const TensorBlockMapper& b_mapper,
const TensorOpCost& b_cost, void* b_buffer,
@@ -274,9 +269,9 @@ TensorExecutorTilingContext<TensorBlockMapper> GetTensorExecutorTilingContext(
TensorBlockMapper block_mapper(
typename TensorBlockMapper::Dimensions(evaluator.dimensions()),
- requirements.shapeV1(), block_size);
+ requirements);
- block_size = block_mapper.block_dims_total_size();
+ block_size = block_mapper.blockTotalSize();
const size_t align = numext::maxi(EIGEN_MAX_ALIGN_BYTES, 1);
const size_t aligned_blocksize =
align *
@@ -382,9 +377,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable,
static const int NumDims = traits<Expression>::NumDimensions;
typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
- typedef TensorBlockMapper<ScalarNoConst, IndexType, NumDims,
- Evaluator::Layout>
- BlockMapper;
+ typedef TensorBlockV2Mapper<NumDims, Evaluator::Layout, IndexType> BlockMapper;
typedef TensorExecutorTilingContext<BlockMapper> TilingContext;
typedef internal::TensorBlockDescriptor<NumDims, IndexType>
@@ -408,14 +401,13 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable,
TensorBlockScratch scratch(device);
for (IndexType block_idx = firstBlockIdx; block_idx < lastBlockIdx; ++block_idx) {
- auto block = tiling.block_mapper.GetBlockForIndex(block_idx, nullptr);
- TensorBlockDesc desc(block.first_coeff_index(), block.block_sizes());
+ TensorBlockDesc desc = tiling.block_mapper.blockDescriptor(block_idx);
evaluator.evalBlockV2(desc, scratch);
scratch.reset();
}
};
- device.parallelFor(tiling.block_mapper.total_block_count(), tiling.cost,
+ device.parallelFor(tiling.block_mapper.blockCount(), tiling.cost,
eval_block);
}
evaluator.cleanup();
@@ -486,9 +478,7 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, DoneCallback,
static const int NumDims = traits<Expression>::NumDimensions;
typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
- typedef TensorBlockMapper<ScalarNoConst, IndexType, NumDims,
- Evaluator::Layout>
- BlockMapper;
+ typedef TensorBlockV2Mapper<NumDims, Evaluator::Layout, IndexType> BlockMapper;
typedef TensorExecutorTilingContext<BlockMapper> TilingContext;
typedef internal::TensorBlockDescriptor<NumDims, IndexType> TensorBlockDesc;
@@ -518,14 +508,13 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, DoneCallback,
for (IndexType block_idx = firstBlockIdx; block_idx < lastBlockIdx;
++block_idx) {
- auto block =
- ctx->tiling.block_mapper.GetBlockForIndex(block_idx, nullptr);
- TensorBlockDesc desc(block.first_coeff_index(), block.block_sizes());
+ TensorBlockDesc desc =
+ ctx->tiling.block_mapper.blockDescriptor(block_idx);
ctx->evaluator.evalBlockV2(desc, scratch);
scratch.reset();
}
};
- ctx->device.parallelForAsync(ctx->tiling.block_mapper.total_block_count(),
+ ctx->device.parallelForAsync(ctx->tiling.block_mapper.blockCount(),
ctx->tiling.cost, eval_block, [ctx]() { delete ctx; });
};
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
index bed7a1b00..2bbbbcf37 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
@@ -102,9 +102,6 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
typedef internal::TensorIntDivisor<Index> IndexDivisor;
- typedef internal::TensorBlock<CoeffReturnType, Index, NumDims, Layout>
- TensorBlock;
-
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
index 959e77e01..5010d5c95 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
@@ -238,9 +238,6 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
RawAccess = false
};
- typedef internal::TensorBlock<Scalar, Index, NumDims, Layout>
- OutputTensorBlock;
-
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockNotImplemented TensorBlockV2;
//===--------------------------------------------------------------------===//
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
index 7299cdcdb..7697add4b 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
@@ -465,9 +465,6 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
- typedef internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout> TensorBlock;
- typedef typename TensorBlock::Dimensions TensorBlockDimensions;
-
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
@@ -757,9 +754,6 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
- typedef internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout> TensorBlock;
- typedef typename TensorBlock::Dimensions TensorBlockDimensions;
-
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
@@ -829,14 +823,6 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
}
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
- const TensorBlock& block) {
- this->m_impl.writeBlock(TensorBlock(
- this->srcCoeff(block.first_coeff_index()), block.block_sizes(),
- block.block_strides(), TensorBlockDimensions(this->m_inputStrides),
- const_cast<ScalarNoConst*>(block.data())));
- }
-
template<typename TensorBlockV2>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlockV2(
const TensorBlockDesc& desc, const TensorBlockV2& block) {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
index 0d18cfc36..68699351b 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
@@ -124,10 +124,6 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
typedef internal::TensorIntDivisor<Index> IndexDivisor;
- typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
- typedef internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout>
- OutputTensorBlock;
-
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
@@ -252,9 +248,8 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
const size_t target_block_size =
numext::maxi<size_t>(1, m_device.lastLevelCacheSize() / sizeof(Scalar));
- return internal::TensorBlockV2ResourceRequirements::merge(
- {internal::TensorBlockV2ShapeType::kSkewedInnerDims, target_block_size},
- m_impl.getResourceRequirements());
+ return {internal::TensorBlockV2ShapeType::kSkewedInnerDims,
+ target_block_size};
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2