diff options
author | 2019-10-10 10:56:58 -0700 | |
---|---|---|
committer | 2019-10-10 10:56:58 -0700 | |
commit | a411e9f344a354673b72a490433cf3cc2148ddf1 (patch) | |
tree | 65d0e152a0cc6649ecb8b67c0579386475dbaf53 /unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h | |
parent | b03eb63d7cb869cc4486ac393fad75fbcc36027f (diff) |
Block evaluation for TensorGenerator + TensorReverse + fixed bug in tensor reverse op
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h | 81 |
1 files changed, 79 insertions, 2 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h index 639e1dbb0..38d0bf7d3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h @@ -94,7 +94,7 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device> IsAligned = false, PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1), BlockAccess = true, - BlockAccessV2 = false, + BlockAccessV2 = true, PreferBlockAccess = true, Layout = TensorEvaluator<ArgType, Device>::Layout, CoordAccess = false, // to be implemented @@ -107,7 +107,12 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device> TensorBlock; //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// - typedef internal::TensorBlockNotImplemented TensorBlockV2; + typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch; + + typedef typename internal::TensorMaterializedBlock<CoeffReturnType, NumDims, + Layout, Index> + TensorBlockV2; //===--------------------------------------------------------------------===// EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) @@ -232,6 +237,78 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device> } } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 + blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch) const { + static const bool is_col_major = + static_cast<int>(Layout) == static_cast<int>(ColMajor); + + // Compute spatial coordinates for the first block element. + array<Index, NumDims> coords; + extract_coordinates(desc.offset(), coords); + array<Index, NumDims> initial_coords = coords; + + // Try to reuse destination as an output block buffer. + CoeffReturnType* block_buffer = + desc.template destination<CoeffReturnType, Layout>(); + bool materialized_in_output; + + if (block_buffer != NULL) { + materialized_in_output = true; + + } else { + materialized_in_output = false; + void* mem = scratch.allocate(desc.size() * sizeof(CoeffReturnType)); + block_buffer = static_cast<CoeffReturnType*>(mem); + } + + // Offset in the output block buffer. + Index offset = 0; + + // Initialize output block iterator state. Dimension in this array are + // always in inner_most -> outer_most order (col major layout). + array<BlockIteratorState, NumDims> it; + for (int i = 0; i < NumDims; ++i) { + const int dim = is_col_major ? i : NumDims - 1 - i; + it[i].size = desc.dimension(dim); + it[i].stride = i == 0 ? 1 : (it[i - 1].size * it[i - 1].stride); + it[i].span = it[i].stride * (it[i].size - 1); + it[i].count = 0; + } + eigen_assert(it[0].stride == 1); + + while (it[NumDims - 1].count < it[NumDims - 1].size) { + // Generate data for the inner-most dimension. + for (Index i = 0; i < it[0].size; ++i) { + *(block_buffer + offset + i) = m_generator(coords); + coords[is_col_major ? 0 : NumDims - 1]++; + } + coords[is_col_major ? 0 : NumDims - 1] = + initial_coords[is_col_major ? 0 : NumDims - 1]; + + // For the 1d tensor we need to generate only one inner-most dimension. + if (NumDims == 1) break; + + // Update offset. + for (Index i = 1; i < NumDims; ++i) { + if (++it[i].count < it[i].size) { + offset += it[i].stride; + coords[is_col_major ? i : NumDims - 1 - i]++; + break; + } + if (i != NumDims - 1) it[i].count = 0; + coords[is_col_major ? i : NumDims - 1 - i] = + initial_coords[is_col_major ? i : NumDims - 1 - i]; + offset -= it[i].span; + } + } + + return TensorBlockV2( + materialized_in_output + ? internal::TensorBlockKind::kMaterializedInOutput + : internal::TensorBlockKind::kMaterializedInScratch, + block_buffer, desc.dimensions()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const { // TODO(rmlarsen): This is just a placeholder. Define interface to make |