diff options
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h | 112 |
1 files changed, 70 insertions, 42 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h index b8c592543..099d7cd57 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h @@ -45,6 +45,12 @@ EIGEN_ALWAYS_INLINE DSizes<IndexType, NumDims> strides( return strides; } +template<int Layout, typename IndexType, size_t NumDims> +EIGEN_ALWAYS_INLINE DSizes<IndexType, NumDims> strides( + const Eigen::array<IndexType, NumDims>& dimensions) { + return strides<Layout>(DSizes<IndexType, NumDims>(dimensions)); +} + #if EIGEN_HAS_CXX11 template <int Layout, std::ptrdiff_t... Indices> EIGEN_STRONG_INLINE DSizes<std::ptrdiff_t, sizeof...(Indices)> strides( @@ -78,23 +84,24 @@ class TensorBlockDescriptor { return static_cast<Scalar*>(m_data); } - private: - friend class TensorBlockDescriptor; - - DestinationBuffer() : m_data(NULL), m_total_dst_bytes(0) {} + template <typename Scalar> + Dimensions dimensions() const { + Dimensions dimensions; + for (int i = 0; i < NumDims; ++i) { + eigen_assert(m_dimensions[i] % sizeof(Scalar) == 0); + dimensions[i] = m_dimensions[i] / sizeof(Scalar); + } + return dimensions; + } template <typename Scalar> - DestinationBuffer(Scalar* data, const Dimensions& dimensions, - const Dimensions& strides, size_t total_dst_bytes) - : m_data(static_cast<void*>(data)), - m_dimensions(dimensions), - m_strides(strides), - m_total_dst_bytes(total_dst_bytes) { - // TODO(ezhulenev): Benchmark template meta-unroll for this loop. + Dimensions strides() const { + Dimensions strides; for (int i = 0; i < NumDims; ++i) { - m_dimensions[i] *= sizeof(Scalar); - m_strides[i] *= sizeof(Scalar); + eigen_assert(m_strides[i] % sizeof(Scalar) == 0); + strides[i] = m_strides[i] / sizeof(Scalar); } + return strides; } // Returns true if the tensor block corresponding to `desc` fits into the @@ -109,29 +116,34 @@ class TensorBlockDescriptor { if (!dimensions_match(desc_dims, dst_dims)) return false; const Dimensions& desc_strides = internal::strides<Layout>(desc_dims); - const Dimensions& dst_strides = internal::strides<Layout>(dst_dims); + const Dimensions& dst_strides = strides<Scalar>(); - return dimensions_match(desc_strides, dst_strides); - } - - template <typename Scalar> - Dimensions dimensions() const { - Dimensions dimensions; + // Compare strides ignoring dimensions of size `1`. for (int i = 0; i < NumDims; ++i) { - eigen_assert(m_dimensions[i] % sizeof(Scalar) == 0); - dimensions[i] = m_dimensions[i] / sizeof(Scalar); + if (desc_dims[i] == 1) continue; + if (desc_strides[i] != dst_strides[i]) return false; } - return dimensions; + + return true; } + private: + friend class TensorBlockDescriptor; + + DestinationBuffer() : m_data(NULL), m_total_dst_bytes(0) {} + template <typename Scalar> - Dimensions strides() const { - Dimensions strides; + DestinationBuffer(Scalar* data, const Dimensions& dimensions, + const Dimensions& strides, size_t total_dst_bytes) + : m_data(static_cast<void*>(data)), + m_dimensions(dimensions), + m_strides(strides), + m_total_dst_bytes(total_dst_bytes) { + // TODO(ezhulenev): Benchmark template meta-unroll for this loop. for (int i = 0; i < NumDims; ++i) { - eigen_assert(m_strides[i] % sizeof(Scalar) == 0); - strides[i] = m_strides[i] / sizeof(Scalar); + m_dimensions[i] *= sizeof(Scalar); + m_strides[i] *= sizeof(Scalar); } - return strides; } void* m_data; @@ -181,6 +193,12 @@ class TensorBlockDescriptor { return *this; } + bool HasDestinationBuffer() const { return m_destination.m_data != NULL; } + + const DestinationBuffer& GetDestinationBuffer() const { + return m_destination; + } + // Returns a non-nullptr pointer to a destination buffer memory if this // block has a contiguous destination buffer. template <typename Scalar, int Layout> @@ -191,6 +209,11 @@ class TensorBlockDescriptor { return NULL; } + // Returns a copy of `*this` with updated offset. + TensorBlockDescriptor WithOffset(IndexType offset) const { + return TensorBlockDescriptor(offset, m_dimensions, m_destination); + } + private: // Offset and dimensions are immutable after construction. Block descriptor // can only be mutated by adding or dropping destination. @@ -294,18 +317,12 @@ enum TensorBlockKind { // Tensor block that was materialized directly into the final output memory // buffer. For example if the left side of an assignment is a Tensor, we can - // directly materialize the block in the destination memory. The block - // expression is still a valid Tensor expression, and can be used to build - // lazy expressions. + // directly materialize the block in the destination memory. + // + // If strides in the output buffer do not match tensor block strides, the + // Tensor expression will be invalid, and should not be used by + // TensorBlockAssign or for constructing another block expression. kMaterializedInOutput - - // TODO(ezhulenev): If we know that we are evaluating a block, for the root of - // the expression tree, it might be beneficial to do an assignment to the - // output memory buffer, even if it will be impossible to construct a valid - // block expression after that (e.g. output memory buffer has strides not - // compatible with TensorMap). This might be a performance optimization for - // uniformly shaped blocks, because for blocks skewed towards inner dimension - // `kMaterializedInOutput` should always work. }; #if !EIGEN_HAS_CXX11 } // namespace TensorBlockKind @@ -346,6 +363,11 @@ struct XprScalar<void> { // Tensor), or a memory buffer allocated with scratch allocator, and in this // case the scratch allocator will deallocate it at the end of block based // expression execution. +// +// If the block was evaluated directly into the output buffer, and strides in +// the output buffer do not match block strides, the TensorMap expression will +// be invalid, and should never be used in block assignment or any other tensor +// expression. template <typename Scalar, int NumDims, int Layout, typename IndexType = Eigen::Index> @@ -358,11 +380,12 @@ class TensorMaterializedBlock { typedef TensorMap<const Tensor<Scalar, NumDims, Layout> > XprType; TensorMaterializedBlock(TensorBlockKind kind, const Scalar* data, - const Dimensions& dimensions) + const Dimensions& dimensions, bool valid_expr = true) : m_kind(kind), m_data(data), m_dimensions(dimensions), - m_expr(m_data, m_dimensions) { + m_expr(m_data, m_dimensions), + m_valid_expr(valid_expr) { eigen_assert(m_kind == internal::TensorBlockKind::kView || m_kind == internal::TensorBlockKind::kMaterializedInScratch || m_kind == internal::TensorBlockKind::kMaterializedInOutput); @@ -372,7 +395,10 @@ class TensorMaterializedBlock { // NOTE(ezhulenev): Returning XprType by value like in other block types // causes asan failures. The theory is that XprType::Nested doesn't work // properly for TensorMap. - const XprType& expr() const { return m_expr; } + const XprType& expr() const { + eigen_assert(m_valid_expr); + return m_expr; + } const Scalar* data() const { return m_data; } void cleanup() {} @@ -427,6 +453,7 @@ class TensorMaterializedBlock { bool materialized_in_output; if (block_buffer != NULL) { + desc.DropDestinationBuffer(); materialized_in_output = true; } else { @@ -461,6 +488,7 @@ class TensorMaterializedBlock { const Scalar* m_data; Dimensions m_dimensions; XprType m_expr; + bool m_valid_expr; }; // -------------------------------------------------------------------------- // |