aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h258
1 files changed, 152 insertions, 106 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h
index 1f89c4e4a..c85c4c6c8 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h
@@ -70,91 +70,89 @@ class TensorBlockDescriptor {
// If we evaluate a Tensor assignment, and expression on the left, already has
// a memory buffer, then we might do performance optimization, and evaluate
- // the root expression directly into the memory, or maybe use it as temporary
- // storage for some of the subexpressions, to avoid dynamic memory allocation.
+ // the root expression directly into the final output memory. Some time it's
+ // possible to reuse it for materializing subexpressions inside an expression
+ // tree, to to avoid dynamic memory allocation.
//
- // This is a type erased storage, because passing Scalar type through all the
- // expression evaluation layers it way too many templates. Also it should be
- // possible to use this destination as a temp buffer for materializing
- // expressions with type, not matching the final output.
+ // The pointer type of the underlying storage is erased, because passing
+ // Scalar type through all the expression evaluation layers is way too many
+ // templates. In practice destination buffer type should always match the
+ // evaluated expression scalar type.
class DestinationBuffer {
public:
+ enum DestinationBufferKind {
+ // Destination buffer is not defined (`m_data` == nullptr).
+ kEmpty,
+
+ // Tensor block defined by an owning tensor block descriptor can fit
+ // contiguously into the destination buffer. In this case it's safe to
+ // materialize tensor block in the destination buffer, wrap it in a
+ // TensorMap, and use to build Eigen expression on top of it.
+ kContiguous,
+
+ // Destination buffer strides do not match strides of the contiguously
+ // stored block, and it's impossible to define a TensorMap over this
+ // buffer. However if we are evaluating a root of an expression tree, we
+ // still can materialize an output into this destination, because we can
+ // guarantee that no one will ever access it through block API.
+ //
+ // In theory it is possible to build valid TensorStriding<TensorMap>
+ // expression on top of this destination buffer, however it has
+ // inefficient coeff/packet access, and defeats the purpose of fast block
+ // evaluation API.
+ kStrided
+ };
+
template <typename Scalar>
Scalar* data() const {
+ eigen_assert(m_data_type_size == sizeof(Scalar));
return static_cast<Scalar*>(m_data);
}
- template <typename Scalar>
- Dimensions dimensions() const {
- Dimensions dimensions;
- for (int i = 0; i < NumDims; ++i) {
- eigen_assert(m_dimensions[i] % sizeof(Scalar) == 0);
- dimensions[i] = m_dimensions[i] / sizeof(Scalar);
- }
- return dimensions;
- }
-
- template <typename Scalar>
- Dimensions strides() const {
- Dimensions strides;
- for (int i = 0; i < NumDims; ++i) {
- eigen_assert(m_strides[i] % sizeof(Scalar) == 0);
- strides[i] = m_strides[i] / sizeof(Scalar);
- }
- return strides;
- }
-
- // Returns true if the tensor block corresponding to `desc` fits into the
- // contiguous block of memory defined by `*this`.
- template <typename Scalar, int Layout>
- bool fitsContiguously(const TensorBlockDescriptor& desc) const {
- if (m_data == NULL) return false;
-
- const Dimensions& desc_dims = desc.dimensions();
- const Dimensions& dst_dims = dimensions<Scalar>();
-
- if (!dimensions_match(desc_dims, dst_dims)) return false;
-
- const Dimensions& desc_strides = internal::strides<Layout>(desc_dims);
- const Dimensions& dst_strides = strides<Scalar>();
-
- // Compare strides ignoring dimensions of size `1`.
- for (int i = 0; i < NumDims; ++i) {
- if (desc_dims[i] == 1) continue;
- if (desc_strides[i] != dst_strides[i]) return false;
- }
-
- return true;
- }
+ const Dimensions& strides() const { return m_strides; }
+ const DestinationBufferKind& kind() const { return m_kind; }
private:
friend class TensorBlockDescriptor;
- DestinationBuffer() : m_data(NULL), m_total_dst_bytes(0) {}
+ DestinationBuffer() : m_data(NULL), m_data_type_size(0), m_kind(kEmpty) {}
template <typename Scalar>
- DestinationBuffer(Scalar* data, const Dimensions& dimensions,
- const Dimensions& strides, size_t total_dst_bytes)
+ DestinationBuffer(Scalar* data, const Dimensions& strides,
+ DestinationBufferKind kind)
: m_data(static_cast<void*>(data)),
- m_dimensions(dimensions),
+ m_data_type_size(sizeof(Scalar)),
m_strides(strides),
- m_total_dst_bytes(total_dst_bytes) {
- // TODO(ezhulenev): Benchmark template meta-unroll for this loop.
+ m_kind(kind) {}
+
+ template <int Layout, typename Scalar>
+ static DestinationBuffer make(const TensorBlockDescriptor& desc,
+ Scalar* data, const Dimensions& strides) {
+ return DestinationBuffer(data, strides, kind<Layout>(desc, strides));
+ }
+
+ template <int Layout>
+ static DestinationBufferKind kind(const TensorBlockDescriptor& desc,
+ const Dimensions& strides) {
+ const Dimensions& desc_dims = desc.dimensions();
+ const Dimensions& desc_strides = internal::strides<Layout>(desc_dims);
for (int i = 0; i < NumDims; ++i) {
- m_dimensions[i] *= sizeof(Scalar);
- m_strides[i] *= sizeof(Scalar);
+ if (desc_dims[i] == 1) continue;
+ if (desc_strides[i] != strides[i]) return kStrided;
}
+ return kContiguous;
}
+ // Storage pointer is type erased, to reduce template bloat, but we still
+ // keep the size of the underlying element type for error checking.
void* m_data;
- Dimensions m_dimensions;
+ size_t m_data_type_size;
+
+ // Destination buffer dimensions always match the dimensions of a tensor
+ // block descriptor it belongs to, however strides might be different.
Dimensions m_strides;
- // Total size of the memory buffer at the destination (typically the total
- // size of the left hand side of an assignment expression). This can be the
- // same as `array_prod(m_dimensions)` if the assignment target has just a
- // single block, but typically it's a larger number.
- size_t m_total_dst_bytes;
+ DestinationBufferKind m_kind;
};
TensorBlockDescriptor(const IndexType offset, const Dimensions& dimensions,
@@ -173,40 +171,31 @@ class TensorBlockDescriptor {
IndexType dimension(int index) const { return m_dimensions[index]; }
IndexType size() const { return array_prod<IndexType>(m_dimensions); }
- template <typename Scalar>
- void AddDestinationBuffer(Scalar* dst_base, const Dimensions& dst_strides,
- size_t total_dst_bytes) {
+ const DestinationBuffer& destination() const { return m_destination; }
+
+ template <int Layout, typename Scalar>
+ void AddDestinationBuffer(Scalar* dst_base, const Dimensions& dst_strides) {
+ eigen_assert(dst_base != NULL);
m_destination =
- DestinationBuffer(dst_base, m_dimensions, dst_strides, total_dst_bytes);
+ DestinationBuffer::template make<Layout>(*this, dst_base, dst_strides);
}
- template <typename Scalar, typename DstStridesIndexType>
+ template <int Layout, typename Scalar, typename DstStridesIndexType>
void AddDestinationBuffer(
- Scalar* dst_base, const DSizes<DstStridesIndexType, NumDims>& dst_strides,
- size_t total_dst_bytes) {
+ Scalar* dst_base,
+ const DSizes<DstStridesIndexType, NumDims>& dst_strides) {
// DSizes constructor will do index type promotion if it's safe.
- AddDestinationBuffer(dst_base, Dimensions(dst_strides), total_dst_bytes);
+ AddDestinationBuffer<Layout>(*this, dst_base, Dimensions(dst_strides));
}
TensorBlockDescriptor& DropDestinationBuffer() {
m_destination.m_data = NULL;
+ m_destination.m_kind = DestinationBuffer::kEmpty;
return *this;
}
- bool HasDestinationBuffer() const { return m_destination.m_data != NULL; }
-
- const DestinationBuffer& GetDestinationBuffer() const {
- return m_destination;
- }
-
- // Returns a non-nullptr pointer to a destination buffer memory if this
- // block has a contiguous destination buffer.
- template <typename Scalar, int Layout>
- Scalar* destination() const {
- if (m_destination.template fitsContiguously<Scalar, Layout>(*this)) {
- return m_destination.template data<Scalar>();
- }
- return NULL;
+ bool HasDestinationBuffer() const {
+ return m_destination.kind() != DestinationBuffer::kEmpty;
}
// Returns a copy of `*this` with updated offset.
@@ -404,6 +393,80 @@ class TensorMaterializedBlock {
typedef internal::TensorBlockDescriptor<NumDims, IndexType> TensorBlockDesc;
+ // TensorMaterializedBlock can be backed by different types of storage:
+ //
+ // (1) Contiguous block of memory allocated with scratch allocator.
+ // (2) Contiguous block of memory reused from tensor block descriptor
+ // destination buffer.
+ // (3) Strided block of memory reused from tensor block descriptor
+ // destination buffer.
+ //
+ class Storage {
+ public:
+ Scalar* data() const { return m_data; }
+ const Dimensions& dimensions() const { return m_dimensions; }
+ const Dimensions& strides() const { return m_strides; }
+
+ TensorMaterializedBlock AsTensorMaterializedBlock() const {
+ return TensorMaterializedBlock(
+ m_materialized_in_output
+ ? internal::TensorBlockKind::kMaterializedInOutput
+ : internal::TensorBlockKind::kMaterializedInScratch,
+ m_data, m_dimensions, !m_strided_storage);
+ }
+
+ private:
+ friend class TensorMaterializedBlock;
+
+ Storage(Scalar* data, const Dimensions& dimensions,
+ const Dimensions& strides, bool materialized_in_output,
+ bool strided_storage)
+ : m_data(data),
+ m_dimensions(dimensions),
+ m_strides(strides),
+ m_materialized_in_output(materialized_in_output),
+ m_strided_storage(strided_storage) {}
+
+ Scalar* m_data;
+ Dimensions m_dimensions;
+ Dimensions m_strides;
+ bool m_materialized_in_output;
+ bool m_strided_storage;
+ };
+
+ // Creates a storage for materialized block either from the block descriptor
+ // destination buffer, or allocates a new buffer with scratch allocator.
+ template <typename TensorBlockScratch>
+ EIGEN_STRONG_INLINE static Storage prepareStorage(
+ TensorBlockDesc& desc, TensorBlockScratch& scratch,
+ bool allow_strided_storage = false) {
+ // Try to reuse destination as an output block buffer.
+ typedef typename TensorBlockDesc::DestinationBuffer DestinationBuffer;
+
+ if (desc.destination().kind() == DestinationBuffer::kContiguous) {
+ Scalar* buffer = desc.destination().template data<Scalar>();
+ desc.DropDestinationBuffer();
+ return Storage(buffer, desc.dimensions(),
+ internal::strides<Layout>(desc.dimensions()),
+ /*materialized_in_output=*/true,
+ /*strided_storage=*/false);
+
+ } else if (desc.destination().kind() == DestinationBuffer::kStrided &&
+ allow_strided_storage) {
+ Scalar* buffer = desc.destination().template data<Scalar>();
+ desc.DropDestinationBuffer();
+ return Storage(buffer, desc.dimensions(), desc.destination().strides(),
+ /*materialized_in_output=*/true, /*strided_storage=*/true);
+
+ } else {
+ void* mem = scratch.allocate(desc.size() * sizeof(Scalar));
+ return Storage(static_cast<Scalar*>(mem), desc.dimensions(),
+ internal::strides<Layout>(desc.dimensions()),
+ /*materialized_in_output=*/false,
+ /*strided_storage=*/false);
+ }
+ }
+
// Creates a materialized block for the given descriptor from a memory buffer.
template <typename DataDimensions, typename TensorBlockScratch>
EIGEN_STRONG_INLINE static TensorMaterializedBlock materialize(
@@ -448,19 +511,8 @@ class TensorMaterializedBlock {
block_start, desc.dimensions());
} else {
- // Try to reuse destination as an output block buffer.
- Scalar* block_buffer = desc.template destination<Scalar, Layout>();
- bool materialized_in_output;
-
- if (block_buffer != NULL) {
- desc.DropDestinationBuffer();
- materialized_in_output = true;
-
- } else {
- materialized_in_output = false;
- void* mem = scratch.allocate(desc.size() * sizeof(Scalar));
- block_buffer = static_cast<Scalar*>(mem);
- }
+ // Reuse destination buffer or allocate new buffer with scratch allocator.
+ const Storage storage = prepareStorage(desc, scratch);
typedef internal::TensorBlockIOV2<Scalar, IndexType, NumDims, Layout>
TensorBlockIO;
@@ -469,17 +521,11 @@ class TensorMaterializedBlock {
TensorBlockIOSrc src(internal::strides<Layout>(Dimensions(data_dims)),
data, desc.offset());
- TensorBlockIODst dst(desc.dimensions(),
- internal::strides<Layout>(desc.dimensions()),
- block_buffer);
+ TensorBlockIODst dst(storage.dimensions(), storage.strides(),
+ storage.data());
TensorBlockIO::Copy(dst, src);
-
- return TensorMaterializedBlock(
- materialized_in_output
- ? internal::TensorBlockKind::kMaterializedInOutput
- : internal::TensorBlockKind::kMaterializedInScratch,
- block_buffer, desc.dimensions());
+ return storage.AsTensorMaterializedBlock();
}
}