aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h46
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h3
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h8
-rw-r--r--unsupported/test/cxx11_tensor_block_eval.cpp71
4 files changed, 95 insertions, 33 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
index bf7522682..d1e4c82d2 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
@@ -111,22 +111,28 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
BlockAccess = true,
- BlockAccessV2 = false,
+ BlockAccessV2 = true,
PreferBlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = true
};
- typedef typename internal::TensorBlock<
- CoeffReturnType, Index, internal::traits<ArgType>::NumDimensions, Layout>
- TensorBlock;
- typedef typename internal::TensorBlockReader<
- CoeffReturnType, Index, internal::traits<ArgType>::NumDimensions, Layout>
- TensorBlockReader;
+ static const int NumDims = internal::traits<ArgType>::NumDimensions;
+
+ typedef typename internal::TensorBlock<CoeffReturnType, Index, NumDims, Layout> TensorBlock;
+ typedef typename internal::TensorBlockReader<CoeffReturnType, Index, NumDims, Layout> TensorBlockReader;
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
- typedef internal::TensorBlockNotImplemented TensorBlockV2;
+ typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
+ typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
+
+ typedef typename TensorEvaluator<const ArgType, Device>::TensorBlockV2
+ ArgTensorBlock;
+
+ typedef internal::TensorBlockAssignment<
+ Scalar, NumDims, typename ArgTensorBlock::XprType, Index>
+ TensorBlockAssignment;
//===--------------------------------------------------------------------===//
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
@@ -164,6 +170,30 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
m_impl.block(&eval_to_block);
}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlockV2(
+ TensorBlockDesc& desc, TensorBlockScratch& scratch) {
+ // Add `m_buffer` as destination buffer to the block descriptor.
+ desc.AddDestinationBuffer(
+ /*dst_base=*/m_buffer + desc.offset(),
+ /*dst_strides=*/internal::strides<Layout>(m_impl.dimensions()),
+ /*total_dst_bytes=*/
+ (internal::array_prod(m_impl.dimensions())
+ * sizeof(Scalar)));
+
+ ArgTensorBlock block = m_impl.blockV2(desc, scratch);
+
+ // If block was evaluated into a destination buffer, there is no need to do
+ // an assignment.
+ if (block.kind() != internal::TensorBlockKind::kMaterializedInOutput) {
+ TensorBlockAssignment::Run(
+ TensorBlockAssignment::target(
+ desc.dimensions(), internal::strides<Layout>(m_impl.dimensions()),
+ m_buffer, desc.offset()),
+ block.expr());
+ }
+ block.cleanup();
+ }
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
m_impl.cleanup();
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index a7cb8dc97..97ac96db1 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -238,7 +238,8 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
typedef TensorBlockMapper<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlockMapper;
typedef typename TensorBlock::Dimensions TensorBlockDimensions;
- typedef internal::TensorBlockDescriptor<NumDims> TensorBlockDesc;
+ typedef internal::TensorBlockDescriptor<NumDims, StorageIndex>
+ TensorBlockDesc;
typedef internal::TensorBlockScratchAllocator<DefaultDevice>
TensorBlockScratch;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
index 489b915ac..f3907be6e 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
@@ -231,7 +231,11 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch) const {
- eigen_assert(m_impl.data() != NULL);
+ // If one of the dimensions is zero, return empty block view.
+ if (desc.size() == 0) {
+ return TensorBlockV2(internal::TensorBlockKind::kView, NULL,
+ desc.dimensions());
+ }
// Check if we can reuse `desc` destination, or allocate new scratch buffer.
ScalarNoConst* materialized_output =
@@ -385,6 +389,8 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
const Index out = output_offset + output_inner_pad_before_size;
const Index in = input_offset + output_inner_pad_before_size;
+ eigen_assert(output_inner_copy_size == 0 || m_impl.data() != NULL);
+
LinCopy::template Run<LinCopy::Kind::Linear>(
typename LinCopy::Dst(out, 1, materialized_output),
typename LinCopy::Src(in, 1, m_impl.data()),
diff --git a/unsupported/test/cxx11_tensor_block_eval.cpp b/unsupported/test/cxx11_tensor_block_eval.cpp
index 75252362c..1dc0a9e2c 100644
--- a/unsupported/test/cxx11_tensor_block_eval.cpp
+++ b/unsupported/test/cxx11_tensor_block_eval.cpp
@@ -131,6 +131,7 @@ static void VerifyBlockEvaluator(Expression expr, GenBlockParams gen_block) {
// TensorEvaluator is needed to produce tensor blocks of the expression.
auto eval = TensorEvaluator<const decltype(expr), Device>(expr, d);
+ eval.evalSubExprsIfNeeded(nullptr);
// Choose a random offsets, sizes and TensorBlockDescriptor.
TensorBlockParams<NumDims> block_params = gen_block();
@@ -266,29 +267,6 @@ static void test_eval_tensor_reshape() {
[&shuffled]() { return SkewedInnerBlock<Layout>(shuffled); });
}
-template <typename T, int Layout>
-static void test_eval_tensor_reshape_with_bcast() {
- Index dim = internal::random<Index>(1, 100);
-
- Tensor<T, 2, Layout> lhs(1, dim);
- Tensor<T, 2, Layout> rhs(dim, 1);
- lhs.setRandom();
- rhs.setRandom();
-
- auto reshapeLhs = NByOne(dim);
- auto reshapeRhs = OneByM(dim);
-
- auto bcastLhs = OneByM(dim);
- auto bcastRhs = NByOne(dim);
-
- DSizes<Index, 2> dims(dim, dim);
-
- VerifyBlockEvaluator<T, 2, Layout>(
- lhs.reshape(reshapeLhs).broadcast(bcastLhs) +
- rhs.reshape(reshapeRhs).broadcast(bcastRhs),
- [dims]() { return SkewedInnerBlock<Layout, 2>(dims); });
-}
-
template <typename T, int NumDims, int Layout>
static void test_eval_tensor_cast() {
DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
@@ -355,6 +333,52 @@ static void test_eval_tensor_padding() {
[&padded_dims]() { return SkewedInnerBlock<Layout>(padded_dims); });
}
+template <typename T, int Layout>
+static void test_eval_tensor_reshape_with_bcast() {
+ Index dim = internal::random<Index>(1, 100);
+
+ Tensor<T, 2, Layout> lhs(1, dim);
+ Tensor<T, 2, Layout> rhs(dim, 1);
+ lhs.setRandom();
+ rhs.setRandom();
+
+ auto reshapeLhs = NByOne(dim);
+ auto reshapeRhs = OneByM(dim);
+
+ auto bcastLhs = OneByM(dim);
+ auto bcastRhs = NByOne(dim);
+
+ DSizes<Index, 2> dims(dim, dim);
+
+ VerifyBlockEvaluator<T, 2, Layout>(
+ lhs.reshape(reshapeLhs).broadcast(bcastLhs) +
+ rhs.reshape(reshapeRhs).broadcast(bcastRhs),
+ [dims]() { return SkewedInnerBlock<Layout, 2>(dims); });
+}
+
+template <typename T, int Layout>
+static void test_eval_tensor_forced_eval() {
+ Index dim = internal::random<Index>(1, 100);
+
+ Tensor<T, 2, Layout> lhs(dim, 1);
+ Tensor<T, 2, Layout> rhs(1, dim);
+ lhs.setRandom();
+ rhs.setRandom();
+
+ auto bcastLhs = OneByM(dim);
+ auto bcastRhs = NByOne(dim);
+
+ DSizes<Index, 2> dims(dim, dim);
+
+ VerifyBlockEvaluator<T, 2, Layout>(
+ (lhs.broadcast(bcastLhs) + rhs.broadcast(bcastRhs)).eval().reshape(dims),
+ [dims]() { return SkewedInnerBlock<Layout, 2>(dims); });
+
+ VerifyBlockEvaluator<T, 2, Layout>(
+ (lhs.broadcast(bcastLhs) + rhs.broadcast(bcastRhs)).eval().reshape(dims),
+ [dims]() { return RandomBlock<Layout, 2>(dims, 1, 50); });
+}
+
// -------------------------------------------------------------------------- //
// Verify that assigning block to a Tensor expression produces the same result
// as an assignment to TensorSliceOp (writing a block is is identical to
@@ -482,6 +506,7 @@ EIGEN_DECLARE_TEST(cxx11_tensor_block_eval) {
CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_padding);
CALL_SUBTESTS_LAYOUTS(test_eval_tensor_reshape_with_bcast);
+ CALL_SUBTESTS_LAYOUTS(test_eval_tensor_forced_eval);
CALL_SUBTESTS_DIMS_LAYOUTS(test_assign_to_tensor);
CALL_SUBTESTS_DIMS_LAYOUTS(test_assign_to_tensor_reshape);