aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/test
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <ezhulenev@google.com>2019-09-24 15:17:35 -0700
committerGravatar Eugene Zhulenev <ezhulenev@google.com>2019-09-24 15:17:35 -0700
commitc97b208468ccb2e6414fb4086ed997b5f1903d90 (patch)
treee2c6e8de3d37feaf634de201c29e2bb6b80c7461 /unsupported/test
parentef9dfee7bdc8e0d82c9b7ddf9414ef99d866d7ba (diff)
Add new TensorBlock api implementation + tests
Diffstat (limited to 'unsupported/test')
-rw-r--r--unsupported/test/cxx11_tensor_block_eval.cpp339
-rw-r--r--unsupported/test/cxx11_tensor_block_io.cpp438
2 files changed, 777 insertions, 0 deletions
diff --git a/unsupported/test/cxx11_tensor_block_eval.cpp b/unsupported/test/cxx11_tensor_block_eval.cpp
new file mode 100644
index 000000000..e85b81141
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_block_eval.cpp
@@ -0,0 +1,339 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+// clang-format off
+#include "main.h"
+#include <Eigen/CXX11/Tensor>
+// clang-format on
+
+using Eigen::internal::TensorBlockDescriptor;
+using Eigen::internal::TensorExecutor;
+
+// -------------------------------------------------------------------------- //
+// Utility functions to generate random tensors, blocks, and evaluate them.
+
+template <int NumDims>
+static DSizes<Index, NumDims> RandomDims(Index min, Index max) {
+ DSizes<Index, NumDims> dims;
+ for (int i = 0; i < NumDims; ++i) {
+ dims[i] = internal::random<Index>(min, max);
+ }
+ return DSizes<Index, NumDims>(dims);
+}
+
+// Block offsets and extents allows to construct a TensorSlicingOp corresponding
+// to a TensorBlockDescriptor.
+template <int NumDims>
+struct TensorBlockParams {
+ DSizes<Index, NumDims> offsets;
+ DSizes<Index, NumDims> sizes;
+ TensorBlockDescriptor<NumDims, Index> desc;
+};
+
+template <int Layout, int NumDims>
+static TensorBlockParams<NumDims> RandomBlock(DSizes<Index, NumDims> dims,
+ Index min, Index max) {
+ // Choose random offsets and sizes along all tensor dimensions.
+ DSizes<Index, NumDims> offsets(RandomDims<NumDims>(min, max));
+ DSizes<Index, NumDims> sizes(RandomDims<NumDims>(min, max));
+
+ // Make sure that offset + size do not overflow dims.
+ for (int i = 0; i < NumDims; ++i) {
+ offsets[i] = numext::mini(dims[i] - 1, offsets[i]);
+ sizes[i] = numext::mini(sizes[i], dims[i] - offsets[i]);
+ }
+
+ Index offset = 0;
+ DSizes<Index, NumDims> strides = Eigen::internal::strides<Layout>(dims);
+ for (int i = 0; i < NumDims; ++i) {
+ offset += strides[i] * offsets[i];
+ }
+
+ return {offsets, sizes, TensorBlockDescriptor<NumDims, Index>(offset, sizes)};
+}
+
+// Generate block with block sizes skewed towards inner dimensions. This type of
+// block is required for evaluating broadcast expressions.
+template <int Layout, int NumDims>
+static TensorBlockParams<NumDims> SkewedInnerBlock(
+ DSizes<Index, NumDims> dims) {
+ using BlockMapper = internal::TensorBlockMapper<int, Index, NumDims, Layout>;
+ BlockMapper block_mapper(dims,
+ internal::TensorBlockShapeType::kSkewedInnerDims,
+ internal::random<Index>(1, dims.TotalSize()));
+
+ Index total_blocks = block_mapper.total_block_count();
+ Index block_index = internal::random<Index>(0, total_blocks - 1);
+ auto block = block_mapper.GetBlockForIndex(block_index, nullptr);
+ DSizes<Index, NumDims> sizes = block.block_sizes();
+
+ auto strides = internal::strides<Layout>(dims);
+ DSizes<Index, NumDims> offsets;
+
+ // Compute offsets for the first block coefficient.
+ Index index = block.first_coeff_index();
+ if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
+ for (int i = NumDims - 1; i > 0; --i) {
+ const Index idx = index / strides[i];
+ index -= idx * strides[i];
+ offsets[i] = idx;
+ }
+ offsets[0] = index;
+ } else {
+ for (int i = 0; i < NumDims - 1; ++i) {
+ const Index idx = index / strides[i];
+ index -= idx * strides[i];
+ offsets[i] = idx;
+ }
+ offsets[NumDims - 1] = index;
+ }
+
+ auto desc = TensorBlockDescriptor<NumDims>(block.first_coeff_index(), sizes);
+ return {offsets, sizes, desc};
+}
+
+template <int NumDims>
+static TensorBlockParams<NumDims> FixedSizeBlock(DSizes<Index, NumDims> dims) {
+ DSizes<Index, NumDims> offsets;
+ for (int i = 0; i < NumDims; ++i) offsets[i] = 0;
+
+ return {offsets, dims, TensorBlockDescriptor<NumDims, Index>(0, dims)};
+}
+
+// -------------------------------------------------------------------------- //
+// Verify that block expression evaluation produces the same result as a
+// TensorSliceOp (reading a tensor block is same to taking a tensor slice).
+
+template <typename T, int NumDims, int Layout, typename Expression,
+ typename GenBlockParams>
+static void VerifyBlockEvaluator(Expression expr, GenBlockParams gen_block) {
+ using Device = DefaultDevice;
+ auto d = Device();
+
+ // Scratch memory allocator for block evaluation.
+ typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
+ TensorBlockScratch scratch(d);
+
+ // TensorEvaluator is needed to produce tensor blocks of the expression.
+ auto eval = TensorEvaluator<const decltype(expr), Device>(expr, d);
+
+ // Choose a random offsets, sizes and TensorBlockDescriptor.
+ TensorBlockParams<NumDims> block_params = gen_block();
+
+ // Evaluate TensorBlock expression into a tensor.
+ Tensor<T, NumDims, Layout> block(block_params.desc.dimensions());
+
+ // Maybe use this tensor as a block desc destination.
+ Tensor<T, NumDims, Layout> dst(block_params.desc.dimensions());
+ if (internal::random<bool>()) {
+ block_params.desc.template AddDestinationBuffer(
+ dst.data(), internal::strides<Layout>(dst.dimensions()),
+ dst.dimensions().TotalSize() * sizeof(T));
+ }
+
+ auto tensor_block = eval.blockV2(block_params.desc, scratch);
+ auto b_expr = tensor_block.expr();
+
+ // We explicitly disable vectorization and tiling, to run a simple coefficient
+ // wise assignment loop, because it's very simple and should be correct.
+ using BlockAssign = TensorAssignOp<decltype(block), const decltype(b_expr)>;
+ using BlockExecutor = TensorExecutor<const BlockAssign, Device, false,
+ internal::TiledEvaluation::Off>;
+ BlockExecutor::run(BlockAssign(block, b_expr), d);
+
+ // Cleanup temporary buffers owned by a tensor block.
+ tensor_block.cleanup();
+
+ // Compute a Tensor slice corresponding to a Tensor block.
+ Tensor<T, NumDims, Layout> slice(block_params.desc.dimensions());
+ auto s_expr = expr.slice(block_params.offsets, block_params.sizes);
+
+ // Explicitly use coefficient assignment to evaluate slice expression.
+ using SliceAssign = TensorAssignOp<decltype(slice), const decltype(s_expr)>;
+ using SliceExecutor = TensorExecutor<const SliceAssign, Device, false,
+ internal::TiledEvaluation::Off>;
+ SliceExecutor::run(SliceAssign(slice, s_expr), d);
+
+ // Tensor block and tensor slice must be the same.
+ for (Index i = 0; i < block.dimensions().TotalSize(); ++i) {
+ VERIFY_IS_EQUAL(block.coeff(i), slice.coeff(i));
+ }
+}
+
+// -------------------------------------------------------------------------- //
+
+template <typename T, int NumDims, int Layout>
+static void test_eval_tensor_block() {
+ DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
+ Tensor<T, NumDims, Layout> input(dims);
+ input.setRandom();
+
+ // Identity tensor expression transformation.
+ VerifyBlockEvaluator<T, NumDims, Layout>(
+ input, [&dims]() { return RandomBlock<Layout>(dims, 10, 20); });
+}
+
+template <typename T, int NumDims, int Layout>
+static void test_eval_tensor_unary_expr_block() {
+ DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
+ Tensor<T, NumDims, Layout> input(dims);
+ input.setRandom();
+
+ VerifyBlockEvaluator<T, NumDims, Layout>(
+ input.square(), [&dims]() { return RandomBlock<Layout>(dims, 10, 20); });
+}
+
+template <typename T, int NumDims, int Layout>
+static void test_eval_tensor_binary_expr_block() {
+ DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
+ Tensor<T, NumDims, Layout> lhs(dims), rhs(dims);
+ lhs.setRandom();
+ rhs.setRandom();
+
+ VerifyBlockEvaluator<T, NumDims, Layout>(
+ lhs + rhs, [&dims]() { return RandomBlock<Layout>(dims, 10, 20); });
+}
+
+template <typename T, int NumDims, int Layout>
+static void test_eval_tensor_binary_with_unary_expr_block() {
+ DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
+ Tensor<T, NumDims, Layout> lhs(dims), rhs(dims);
+ lhs.setRandom();
+ rhs.setRandom();
+
+ VerifyBlockEvaluator<T, NumDims, Layout>(
+ (lhs.square() + rhs.square()).sqrt(),
+ [&dims]() { return RandomBlock<Layout>(dims, 10, 20); });
+}
+
+template <typename T, int NumDims, int Layout>
+static void test_eval_tensor_broadcast() {
+ DSizes<Index, NumDims> dims = RandomDims<NumDims>(1, 10);
+ Tensor<T, NumDims, Layout> input(dims);
+ input.setRandom();
+
+ DSizes<Index, NumDims> bcast = RandomDims<NumDims>(1, 5);
+
+ DSizes<Index, NumDims> bcasted_dims;
+ for (int i = 0; i < NumDims; ++i) bcasted_dims[i] = dims[i] * bcast[i];
+
+ VerifyBlockEvaluator<T, NumDims, Layout>(
+ input.broadcast(bcast),
+ [&bcasted_dims]() { return SkewedInnerBlock<Layout>(bcasted_dims); });
+
+ VerifyBlockEvaluator<T, NumDims, Layout>(
+ input.broadcast(bcast),
+ [&bcasted_dims]() { return FixedSizeBlock(bcasted_dims); });
+
+ // Check that desc.destination() memory is not shared between two broadcast
+ // materializations.
+ VerifyBlockEvaluator<T, NumDims, Layout>(
+ input.broadcast(bcast) + input.square().broadcast(bcast),
+ [&bcasted_dims]() { return SkewedInnerBlock<Layout>(bcasted_dims); });
+}
+
+// -------------------------------------------------------------------------- //
+// Verify that assigning block to a Tensor expression produces the same result
+// as an assignment to TensorSliceOp (writing a block is is identical to
+// assigning one tensor to a slice of another tensor).
+
+template <typename T, int NumDims, int Layout, typename Expression,
+ typename GenBlockParams>
+static void VerifyBlockAssignment(Tensor<T, NumDims, Layout>& tensor,
+ Expression expr, GenBlockParams gen_block) {
+ using Device = DefaultDevice;
+ auto d = Device();
+
+ // We use tensor evaluator as a target for block and slice assignments.
+ auto eval = TensorEvaluator<decltype(expr), Device>(expr, d);
+
+ // Generate a random block, or choose a block that fits in full expression.
+ TensorBlockParams<NumDims> block_params = gen_block();
+
+ // Generate random data of the selected block size.
+ Tensor<T, NumDims, Layout> block(block_params.desc.dimensions());
+ block.setRandom();
+
+ // ************************************************************************ //
+ // (1) Assignment from a block.
+
+ // Construct a materialize block from a random generated block tensor.
+ internal::TensorMaterializedBlock<T, NumDims, Layout> blk(
+ internal::TensorBlockKind::kView, block.data(), block.dimensions());
+
+ // Reset all underlying tensor values to zero.
+ tensor.setZero();
+
+ // Use evaluator to write block into a tensor.
+ eval.writeBlockV2(block_params.desc, blk);
+
+ // Make a copy of the result after assignment.
+ Tensor<T, NumDims, Layout> block_assigned = tensor;
+
+ // ************************************************************************ //
+ // (2) Assignment to a slice
+
+ // Reset all underlying tensor values to zero.
+ tensor.setZero();
+
+ // Assign block to a slice of original expression
+ auto s_expr = expr.slice(block_params.offsets, block_params.sizes);
+
+ // Explicitly use coefficient assignment to evaluate slice expression.
+ using SliceAssign = TensorAssignOp<decltype(s_expr), const decltype(block)>;
+ using SliceExecutor = TensorExecutor<const SliceAssign, Device, false,
+ internal::TiledEvaluation::Off>;
+ SliceExecutor::run(SliceAssign(s_expr, block), d);
+
+ // Make a copy of the result after assignment.
+ Tensor<T, NumDims, Layout> slice_assigned = tensor;
+
+ for (Index i = 0; i < tensor.dimensions().TotalSize(); ++i) {
+ VERIFY_IS_EQUAL(block_assigned.coeff(i), slice_assigned.coeff(i));
+ }
+}
+
+// -------------------------------------------------------------------------- //
+
+template <typename T, int NumDims, int Layout>
+static void test_assign_tensor_block() {
+ DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
+ Tensor<T, NumDims, Layout> tensor(dims);
+
+ TensorMap<Tensor<T, NumDims, Layout>> map(tensor.data(), dims);
+
+ VerifyBlockAssignment<T, NumDims, Layout>(
+ tensor, map, [&dims]() { return RandomBlock<Layout>(dims, 10, 20); });
+ VerifyBlockAssignment<T, NumDims, Layout>(
+ tensor, map, [&dims]() { return FixedSizeBlock(dims); });
+}
+
+// -------------------------------------------------------------------------- //
+
+//#define CALL_SUBTESTS(NAME) CALL_SUBTEST((NAME<float, 2, RowMajor>()))
+
+#define CALL_SUBTESTS(NAME) \
+ CALL_SUBTEST((NAME<float, 1, RowMajor>())); \
+ CALL_SUBTEST((NAME<float, 2, RowMajor>())); \
+ CALL_SUBTEST((NAME<float, 4, RowMajor>())); \
+ CALL_SUBTEST((NAME<float, 5, RowMajor>())); \
+ CALL_SUBTEST((NAME<float, 1, ColMajor>())); \
+ CALL_SUBTEST((NAME<float, 2, ColMajor>())); \
+ CALL_SUBTEST((NAME<float, 4, ColMajor>())); \
+ CALL_SUBTEST((NAME<float, 5, ColMajor>()))
+
+EIGEN_DECLARE_TEST(cxx11_tensor_block_eval) {
+ // clang-format off
+ CALL_SUBTESTS(test_eval_tensor_block);
+ CALL_SUBTESTS(test_eval_tensor_unary_expr_block);
+ CALL_SUBTESTS(test_eval_tensor_binary_expr_block);
+ CALL_SUBTESTS(test_eval_tensor_binary_with_unary_expr_block);
+ CALL_SUBTESTS(test_eval_tensor_broadcast);
+
+ CALL_SUBTESTS(test_assign_tensor_block);
+ // clang-format on
+}
diff --git a/unsupported/test/cxx11_tensor_block_io.cpp b/unsupported/test/cxx11_tensor_block_io.cpp
new file mode 100644
index 000000000..8a03c7dd4
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_block_io.cpp
@@ -0,0 +1,438 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+// clang-format off
+#include "main.h"
+#include <Eigen/CXX11/Tensor>
+// clang-format on
+
+// -------------------------------------------------------------------------- //
+// A set of tests for TensorBlockIO: copying data between tensor blocks.
+
+template <int NumDims>
+static DSizes<Index, NumDims> RandomDims(Index min, Index max) {
+ DSizes<Index, NumDims> dims;
+ for (int i = 0; i < NumDims; ++i) {
+ dims[i] = internal::random<Index>(min, max);
+ }
+ return DSizes<Index, NumDims>(dims);
+}
+
+static internal::TensorBlockShapeType RandomBlockShape() {
+ return internal::random<bool>() ? internal::kUniformAllDims
+ : internal::kSkewedInnerDims;
+}
+
+template <int NumDims>
+static Index RandomTargetBlockSize(const DSizes<Index, NumDims>& dims) {
+ return internal::random<Index>(1, dims.TotalSize());
+}
+
+template <int Layout, int NumDims>
+static Index GetInputIndex(Index output_index,
+ const array<Index, NumDims>& output_to_input_dim_map,
+ const array<Index, NumDims>& input_strides,
+ const array<Index, NumDims>& output_strides) {
+ int input_index = 0;
+ if (Layout == ColMajor) {
+ for (int i = NumDims - 1; i > 0; --i) {
+ const Index idx = output_index / output_strides[i];
+ input_index += idx * input_strides[output_to_input_dim_map[i]];
+ output_index -= idx * output_strides[i];
+ }
+ return input_index +
+ output_index * input_strides[output_to_input_dim_map[0]];
+ } else {
+ for (int i = 0; i < NumDims - 1; ++i) {
+ const Index idx = output_index / output_strides[i];
+ input_index += idx * input_strides[output_to_input_dim_map[i]];
+ output_index -= idx * output_strides[i];
+ }
+ return input_index +
+ output_index * input_strides[output_to_input_dim_map[NumDims - 1]];
+ }
+}
+
+template <typename T, int NumDims, int Layout>
+static void test_block_io_copy_data_from_source_to_target() {
+ using TensorBlockIO = internal::TensorBlockIOV2<T, Index, NumDims, Layout>;
+ using IODst = typename TensorBlockIO::Dst;
+ using IOSrc = typename TensorBlockIO::Src;
+
+ // Generate a random input Tensor.
+ DSizes<Index, NumDims> dims = RandomDims<NumDims>(1, 30);
+ Tensor<T, NumDims, Layout> input(dims);
+ input.setRandom();
+
+ // Write data to an output Tensor.
+ Tensor<T, NumDims, Layout> output(dims);
+
+ // Construct a tensor block mapper.
+ using TensorBlockMapper =
+ internal::TensorBlockMapper<T, Index, NumDims, Layout>;
+ TensorBlockMapper block_mapper(dims, RandomBlockShape(),
+ RandomTargetBlockSize(dims));
+
+ // We will copy data from input to output through this buffer.
+ Tensor<T, NumDims, Layout> block(block_mapper.block_dim_sizes());
+
+ // Precompute strides for TensorBlockIO::Copy.
+ auto input_strides = internal::strides<Layout>(dims);
+ auto output_strides = internal::strides<Layout>(dims);
+
+ const T* input_data = input.data();
+ T* output_data = output.data();
+ T* block_data = block.data();
+
+ for (int i = 0; i < block_mapper.total_block_count(); ++i) {
+ using TensorBlock = internal::TensorBlock<T, Index, NumDims, Layout>;
+ TensorBlock blk = block_mapper.GetBlockForIndex(i, block_data);
+
+ auto blk_dims = blk.block_sizes();
+ auto blk_strides = internal::strides<Layout>(blk_dims);
+
+ {
+ // Read from input into a block buffer.
+ IODst dst(blk_dims, blk_strides, block_data, 0);
+ IOSrc src(input_strides, input_data, blk.first_coeff_index());
+
+ TensorBlockIO::Copy(dst, src);
+ }
+
+ {
+ // Write from block buffer to output.
+ IODst dst(blk_dims, output_strides, output_data, blk.first_coeff_index());
+ IOSrc src(blk_strides, block_data, 0);
+
+ TensorBlockIO::Copy(dst, src);
+ }
+ }
+
+ for (int i = 0; i < dims.TotalSize(); ++i) {
+ VERIFY_IS_EQUAL(input_data[i], output_data[i]);
+ }
+}
+
+template <typename T, int NumDims, int Layout>
+static void test_block_io_copy_using_reordered_dimensions() {
+ // Generate a random input Tensor.
+ DSizes<Index, NumDims> dims = RandomDims<NumDims>(1, 30);
+ Tensor<T, NumDims, Layout> input(dims);
+ input.setRandom();
+
+ // Create a random dimension re-ordering/shuffle.
+ std::vector<int> shuffle;
+
+ for (int i = 0; i < NumDims; ++i) shuffle.push_back(i);
+ std::shuffle(shuffle.begin(), shuffle.end(), std::mt19937(g_seed));
+
+ DSizes<Index, NumDims> output_tensor_dims;
+ DSizes<Index, NumDims> input_to_output_dim_map;
+ DSizes<Index, NumDims> output_to_input_dim_map;
+ for (Index i = 0; i < NumDims; ++i) {
+ output_tensor_dims[shuffle[i]] = dims[i];
+ input_to_output_dim_map[i] = shuffle[i];
+ output_to_input_dim_map[shuffle[i]] = i;
+ }
+
+ // Write data to an output Tensor.
+ Tensor<T, NumDims, Layout> output(output_tensor_dims);
+
+ // Construct a tensor block mapper.
+ // NOTE: Tensor block mapper works with shuffled dimensions.
+ using TensorBlockMapper =
+ internal::TensorBlockMapper<T, Index, NumDims, Layout>;
+ TensorBlockMapper block_mapper(output_tensor_dims, RandomBlockShape(),
+ RandomTargetBlockSize(output_tensor_dims));
+
+ // We will copy data from input to output through this buffer.
+ Tensor<T, NumDims, Layout> block(block_mapper.block_dim_sizes());
+
+ // Precompute strides for TensorBlockIO::Copy.
+ auto input_strides = internal::strides<Layout>(dims);
+ auto output_strides = internal::strides<Layout>(output_tensor_dims);
+
+ const T* input_data = input.data();
+ T* output_data = output.data();
+ T* block_data = block.data();
+
+ for (Index i = 0; i < block_mapper.total_block_count(); ++i) {
+ using TensorBlock = internal::TensorBlock<T, Index, NumDims, Layout>;
+ TensorBlock blk = block_mapper.GetBlockForIndex(i, block_data);
+
+ const Index first_coeff_index = GetInputIndex<Layout, NumDims>(
+ blk.first_coeff_index(), output_to_input_dim_map, input_strides,
+ output_strides);
+
+ // NOTE: Block dimensions are in the same order as output dimensions.
+
+ using TensorBlockIO = internal::TensorBlockIOV2<T, Index, NumDims, Layout>;
+ using IODst = typename TensorBlockIO::Dst;
+ using IOSrc = typename TensorBlockIO::Src;
+
+ auto blk_dims = blk.block_sizes();
+ auto blk_strides = internal::strides<Layout>(blk_dims);
+
+ {
+ // Read from input into a block buffer.
+ IODst dst(blk_dims, blk_strides, block_data, 0);
+ IOSrc src(input_strides, input_data, first_coeff_index);
+
+ TensorBlockIO::Copy(dst, src,
+ /*dst_to_src_dim_map=*/output_to_input_dim_map);
+ }
+
+ {
+ // We need to convert block dimensions from output to input order.
+ auto dst_dims = blk_dims;
+ for (int out_dim = 0; out_dim < NumDims; ++out_dim) {
+ dst_dims[output_to_input_dim_map[out_dim]] = blk_dims[out_dim];
+ }
+
+ // Write from block buffer to output.
+ IODst dst(dst_dims, input_strides, output_data, first_coeff_index);
+ IOSrc src(blk_strides, block_data, 0);
+
+ TensorBlockIO::Copy(dst, src,
+ /*dst_to_src_dim_map=*/input_to_output_dim_map);
+ }
+ }
+
+ for (Index i = 0; i < dims.TotalSize(); ++i) {
+ VERIFY_IS_EQUAL(input_data[i], output_data[i]);
+ }
+}
+
+// This is the special case for reading data with reordering, when dimensions
+// before/after reordering are the same. Squeezing reads along inner dimensions
+// in this case is illegal, because we reorder innermost dimension.
+template <int Layout>
+static void test_block_io_copy_using_reordered_dimensions_do_not_squeeze() {
+ DSizes<Index, 3> tensor_dims(7, 9, 7);
+ DSizes<Index, 3> block_dims = tensor_dims;
+
+ DSizes<Index, 3> block_to_tensor_dim;
+ block_to_tensor_dim[0] = 2;
+ block_to_tensor_dim[1] = 1;
+ block_to_tensor_dim[2] = 0;
+
+ auto tensor_strides = internal::strides<Layout>(tensor_dims);
+ auto block_strides = internal::strides<Layout>(block_dims);
+
+ Tensor<float, 3, Layout> block(block_dims);
+ Tensor<float, 3, Layout> tensor(tensor_dims);
+ tensor.setRandom();
+
+ float* tensor_data = tensor.data();
+ float* block_data = block.data();
+
+ typedef internal::TensorBlock<float, Index, 3, Layout> TensorBlock;
+ TensorBlock blk(0, block_dims, block_strides, tensor_strides, block_data);
+
+ using TensorBlockIO = internal::TensorBlockIOV2<float, Index, 3, Layout>;
+ using IODst = typename TensorBlockIO::Dst;
+ using IOSrc = typename TensorBlockIO::Src;
+
+ // Read from a tensor into a block.
+ IODst dst(blk.block_sizes(), block_strides, block_data, 0);
+ IOSrc src(tensor_strides, tensor_data, blk.first_coeff_index());
+
+ TensorBlockIO::Copy(dst, src,
+ /*dst_to_src_dim_map=*/block_to_tensor_dim);
+
+ TensorMap<Tensor<float, 3, Layout> > block_tensor(block_data, block_dims);
+ TensorMap<Tensor<float, 3, Layout> > tensor_tensor(tensor_data, tensor_dims);
+
+ for (Index d0 = 0; d0 < tensor_dims[0]; ++d0) {
+ for (Index d1 = 0; d1 < tensor_dims[1]; ++d1) {
+ for (Index d2 = 0; d2 < tensor_dims[2]; ++d2) {
+ float block_value = block_tensor(d2, d1, d0);
+ float tensor_value = tensor_tensor(d0, d1, d2);
+ VERIFY_IS_EQUAL(block_value, tensor_value);
+ }
+ }
+ }
+}
+
+// This is the special case for reading data with reordering, when dimensions
+// before/after reordering are the same. Squeezing reads in this case is allowed
+// because we reorder outer dimensions.
+template <int Layout>
+static void test_block_io_copy_using_reordered_dimensions_squeeze() {
+ DSizes<Index, 4> tensor_dims(7, 5, 9, 9);
+ DSizes<Index, 4> block_dims = tensor_dims;
+
+ DSizes<Index, 4> block_to_tensor_dim;
+ block_to_tensor_dim[0] = 0;
+ block_to_tensor_dim[1] = 1;
+ block_to_tensor_dim[2] = 3;
+ block_to_tensor_dim[3] = 2;
+
+ auto tensor_strides = internal::strides<Layout>(tensor_dims);
+ auto block_strides = internal::strides<Layout>(block_dims);
+
+ Tensor<float, 4, Layout> block(block_dims);
+ Tensor<float, 4, Layout> tensor(tensor_dims);
+ tensor.setRandom();
+
+ float* tensor_data = tensor.data();
+ float* block_data = block.data();
+
+ typedef internal::TensorBlock<float, Index, 4, Layout> TensorBlock;
+ TensorBlock blk(0, block_dims, block_strides, tensor_strides, block_data);
+
+ using TensorBlockIO = internal::TensorBlockIOV2<float, Index, 4, Layout>;
+ using IODst = typename TensorBlockIO::Dst;
+ using IOSrc = typename TensorBlockIO::Src;
+
+ // Read from a tensor into a block.
+ IODst dst(blk.block_sizes(), block_strides, block_data, 0);
+ IOSrc src(tensor_strides, tensor_data, blk.first_coeff_index());
+
+ TensorBlockIO::Copy(dst, src,
+ /*dst_to_src_dim_map=*/block_to_tensor_dim);
+
+ TensorMap<Tensor<float, 4, Layout> > block_tensor(block_data, block_dims);
+ TensorMap<Tensor<float, 4, Layout> > tensor_tensor(tensor_data, tensor_dims);
+
+ for (Index d0 = 0; d0 < tensor_dims[0]; ++d0) {
+ for (Index d1 = 0; d1 < tensor_dims[1]; ++d1) {
+ for (Index d2 = 0; d2 < tensor_dims[2]; ++d2) {
+ for (Index d3 = 0; d3 < tensor_dims[3]; ++d3) {
+ float block_value = block_tensor(d0, d1, d3, d2);
+ float tensor_value = tensor_tensor(d0, d1, d2, d3);
+ VERIFY_IS_EQUAL(block_value, tensor_value);
+ }
+ }
+ }
+ }
+}
+
+template <int Layout>
+static void test_block_io_zero_stride() {
+ DSizes<Index, 5> rnd_dims = RandomDims<5>(1, 30);
+
+ DSizes<Index, 5> input_tensor_dims = rnd_dims;
+ input_tensor_dims[0] = 1;
+ input_tensor_dims[2] = 1;
+ input_tensor_dims[4] = 1;
+
+ Tensor<float, 5, Layout> input(input_tensor_dims);
+ input.setRandom();
+
+ DSizes<Index, 5> output_tensor_dims = rnd_dims;
+
+ auto input_tensor_strides = internal::strides<Layout>(input_tensor_dims);
+ auto output_tensor_strides = internal::strides<Layout>(output_tensor_dims);
+
+ auto input_tensor_strides_with_zeros = input_tensor_strides;
+ input_tensor_strides_with_zeros[0] = 0;
+ input_tensor_strides_with_zeros[2] = 0;
+ input_tensor_strides_with_zeros[4] = 0;
+
+ Tensor<float, 5, Layout> output(output_tensor_dims);
+ output.setRandom();
+
+ using TensorBlockIO = internal::TensorBlockIOV2<float, Index, 5, Layout>;
+ using IODst = typename TensorBlockIO::Dst;
+ using IOSrc = typename TensorBlockIO::Src;
+
+ // Write data from input to output with broadcasting in dims [0, 2, 4].
+ IODst dst(output_tensor_dims, output_tensor_strides, output.data(), 0);
+ IOSrc src(input_tensor_strides_with_zeros, input.data(), 0);
+ TensorBlockIO::Copy(dst, src);
+
+ for (int i = 0; i < output_tensor_dims[0]; ++i) {
+ for (int j = 0; j < output_tensor_dims[1]; ++j) {
+ for (int k = 0; k < output_tensor_dims[2]; ++k) {
+ for (int l = 0; l < output_tensor_dims[3]; ++l) {
+ for (int m = 0; m < output_tensor_dims[4]; ++m) {
+ float input_value = input(0, j, 0, l, 0);
+ float output_value = output(i, j, k, l, m);
+ VERIFY_IS_EQUAL(input_value, output_value);
+ }
+ }
+ }
+ }
+ }
+}
+
+template <int Layout>
+static void test_block_io_squeeze_ones() {
+ using TensorBlockIO = internal::TensorBlockIOV2<float, Index, 5, Layout>;
+ using IODst = typename TensorBlockIO::Dst;
+ using IOSrc = typename TensorBlockIO::Src;
+
+ // Total size > 1.
+ {
+ DSizes<Index, 5> block_sizes(1, 2, 1, 2, 1);
+ auto strides = internal::strides<Layout>(block_sizes);
+
+ // Create a random input tensor.
+ Tensor<float, 5> input(block_sizes);
+ input.setRandom();
+
+ Tensor<float, 5> output(block_sizes);
+
+ IODst dst(block_sizes, strides, output.data(), 0);
+ IOSrc src(strides, input.data());
+ TensorBlockIO::Copy(dst, src);
+
+ for (Index i = 0; i < block_sizes.TotalSize(); ++i) {
+ VERIFY_IS_EQUAL(output.data()[i], input.data()[i]);
+ }
+ }
+
+ // Total size == 1.
+ {
+ DSizes<Index, 5> block_sizes(1, 1, 1, 1, 1);
+ auto strides = internal::strides<Layout>(block_sizes);
+
+ // Create a random input tensor.
+ Tensor<float, 5> input(block_sizes);
+ input.setRandom();
+
+ Tensor<float, 5> output(block_sizes);
+
+ IODst dst(block_sizes, strides, output.data(), 0);
+ IOSrc src(strides, input.data());
+ TensorBlockIO::Copy(dst, src);
+
+ for (Index i = 0; i < block_sizes.TotalSize(); ++i) {
+ VERIFY_IS_EQUAL(output.data()[i], input.data()[i]);
+ }
+ }
+}
+
+#define CALL_SUBTESTS(NAME) \
+ CALL_SUBTEST((NAME<float, 1, RowMajor>())); \
+ CALL_SUBTEST((NAME<float, 2, RowMajor>())); \
+ CALL_SUBTEST((NAME<float, 4, RowMajor>())); \
+ CALL_SUBTEST((NAME<float, 5, RowMajor>())); \
+ CALL_SUBTEST((NAME<float, 1, ColMajor>())); \
+ CALL_SUBTEST((NAME<float, 2, ColMajor>())); \
+ CALL_SUBTEST((NAME<float, 4, ColMajor>())); \
+ CALL_SUBTEST((NAME<float, 5, ColMajor>()))
+
+EIGEN_DECLARE_TEST(cxx11_tensor_block_io) {
+ // clang-format off
+ CALL_SUBTESTS(test_block_io_copy_data_from_source_to_target);
+ CALL_SUBTESTS(test_block_io_copy_using_reordered_dimensions);
+
+ CALL_SUBTEST(test_block_io_copy_using_reordered_dimensions_do_not_squeeze<RowMajor>());
+ CALL_SUBTEST(test_block_io_copy_using_reordered_dimensions_do_not_squeeze<ColMajor>());
+
+ CALL_SUBTEST(test_block_io_copy_using_reordered_dimensions_squeeze<RowMajor>());
+ CALL_SUBTEST(test_block_io_copy_using_reordered_dimensions_squeeze<ColMajor>());
+
+ CALL_SUBTEST(test_block_io_zero_stride<RowMajor>());
+ CALL_SUBTEST(test_block_io_zero_stride<ColMajor>());
+
+ CALL_SUBTEST(test_block_io_squeeze_ones<RowMajor>());
+ CALL_SUBTEST(test_block_io_squeeze_ones<ColMajor>());
+ // clang-format on
+} \ No newline at end of file