diff options
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h | 46 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 3 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h | 8 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_block_eval.cpp | 71 |
4 files changed, 95 insertions, 33 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h index bf7522682..d1e4c82d2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h @@ -111,22 +111,28 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device> IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, BlockAccess = true, - BlockAccessV2 = false, + BlockAccessV2 = true, PreferBlockAccess = false, Layout = TensorEvaluator<ArgType, Device>::Layout, CoordAccess = false, // to be implemented RawAccess = true }; - typedef typename internal::TensorBlock< - CoeffReturnType, Index, internal::traits<ArgType>::NumDimensions, Layout> - TensorBlock; - typedef typename internal::TensorBlockReader< - CoeffReturnType, Index, internal::traits<ArgType>::NumDimensions, Layout> - TensorBlockReader; + static const int NumDims = internal::traits<ArgType>::NumDimensions; + + typedef typename internal::TensorBlock<CoeffReturnType, Index, NumDims, Layout> TensorBlock; + typedef typename internal::TensorBlockReader<CoeffReturnType, Index, NumDims, Layout> TensorBlockReader; //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// - typedef internal::TensorBlockNotImplemented TensorBlockV2; + typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch; + + typedef typename TensorEvaluator<const ArgType, Device>::TensorBlockV2 + ArgTensorBlock; + + typedef internal::TensorBlockAssignment< + Scalar, NumDims, typename ArgTensorBlock::XprType, Index> + TensorBlockAssignment; //===--------------------------------------------------------------------===// EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) @@ -164,6 +170,30 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device> m_impl.block(&eval_to_block); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlockV2( + TensorBlockDesc& desc, TensorBlockScratch& scratch) { + // Add `m_buffer` as destination buffer to the block descriptor. + desc.AddDestinationBuffer( + /*dst_base=*/m_buffer + desc.offset(), + /*dst_strides=*/internal::strides<Layout>(m_impl.dimensions()), + /*total_dst_bytes=*/ + (internal::array_prod(m_impl.dimensions()) + * sizeof(Scalar))); + + ArgTensorBlock block = m_impl.blockV2(desc, scratch); + + // If block was evaluated into a destination buffer, there is no need to do + // an assignment. + if (block.kind() != internal::TensorBlockKind::kMaterializedInOutput) { + TensorBlockAssignment::Run( + TensorBlockAssignment::target( + desc.dimensions(), internal::strides<Layout>(m_impl.dimensions()), + m_buffer, desc.offset()), + block.expr()); + } + block.cleanup(); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { m_impl.cleanup(); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index a7cb8dc97..97ac96db1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -238,7 +238,8 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable, typedef TensorBlockMapper<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlockMapper; typedef typename TensorBlock::Dimensions TensorBlockDimensions; - typedef internal::TensorBlockDescriptor<NumDims> TensorBlockDesc; + typedef internal::TensorBlockDescriptor<NumDims, StorageIndex> + TensorBlockDesc; typedef internal::TensorBlockScratchAllocator<DefaultDevice> TensorBlockScratch; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h index 489b915ac..f3907be6e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -231,7 +231,11 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch) const { - eigen_assert(m_impl.data() != NULL); + // If one of the dimensions is zero, return empty block view. + if (desc.size() == 0) { + return TensorBlockV2(internal::TensorBlockKind::kView, NULL, + desc.dimensions()); + } // Check if we can reuse `desc` destination, or allocate new scratch buffer. ScalarNoConst* materialized_output = @@ -385,6 +389,8 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device const Index out = output_offset + output_inner_pad_before_size; const Index in = input_offset + output_inner_pad_before_size; + eigen_assert(output_inner_copy_size == 0 || m_impl.data() != NULL); + LinCopy::template Run<LinCopy::Kind::Linear>( typename LinCopy::Dst(out, 1, materialized_output), typename LinCopy::Src(in, 1, m_impl.data()), diff --git a/unsupported/test/cxx11_tensor_block_eval.cpp b/unsupported/test/cxx11_tensor_block_eval.cpp index 75252362c..1dc0a9e2c 100644 --- a/unsupported/test/cxx11_tensor_block_eval.cpp +++ b/unsupported/test/cxx11_tensor_block_eval.cpp @@ -131,6 +131,7 @@ static void VerifyBlockEvaluator(Expression expr, GenBlockParams gen_block) { // TensorEvaluator is needed to produce tensor blocks of the expression. auto eval = TensorEvaluator<const decltype(expr), Device>(expr, d); + eval.evalSubExprsIfNeeded(nullptr); // Choose a random offsets, sizes and TensorBlockDescriptor. TensorBlockParams<NumDims> block_params = gen_block(); @@ -266,29 +267,6 @@ static void test_eval_tensor_reshape() { [&shuffled]() { return SkewedInnerBlock<Layout>(shuffled); }); } -template <typename T, int Layout> -static void test_eval_tensor_reshape_with_bcast() { - Index dim = internal::random<Index>(1, 100); - - Tensor<T, 2, Layout> lhs(1, dim); - Tensor<T, 2, Layout> rhs(dim, 1); - lhs.setRandom(); - rhs.setRandom(); - - auto reshapeLhs = NByOne(dim); - auto reshapeRhs = OneByM(dim); - - auto bcastLhs = OneByM(dim); - auto bcastRhs = NByOne(dim); - - DSizes<Index, 2> dims(dim, dim); - - VerifyBlockEvaluator<T, 2, Layout>( - lhs.reshape(reshapeLhs).broadcast(bcastLhs) + - rhs.reshape(reshapeRhs).broadcast(bcastRhs), - [dims]() { return SkewedInnerBlock<Layout, 2>(dims); }); -} - template <typename T, int NumDims, int Layout> static void test_eval_tensor_cast() { DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20); @@ -355,6 +333,52 @@ static void test_eval_tensor_padding() { [&padded_dims]() { return SkewedInnerBlock<Layout>(padded_dims); }); } +template <typename T, int Layout> +static void test_eval_tensor_reshape_with_bcast() { + Index dim = internal::random<Index>(1, 100); + + Tensor<T, 2, Layout> lhs(1, dim); + Tensor<T, 2, Layout> rhs(dim, 1); + lhs.setRandom(); + rhs.setRandom(); + + auto reshapeLhs = NByOne(dim); + auto reshapeRhs = OneByM(dim); + + auto bcastLhs = OneByM(dim); + auto bcastRhs = NByOne(dim); + + DSizes<Index, 2> dims(dim, dim); + + VerifyBlockEvaluator<T, 2, Layout>( + lhs.reshape(reshapeLhs).broadcast(bcastLhs) + + rhs.reshape(reshapeRhs).broadcast(bcastRhs), + [dims]() { return SkewedInnerBlock<Layout, 2>(dims); }); +} + +template <typename T, int Layout> +static void test_eval_tensor_forced_eval() { + Index dim = internal::random<Index>(1, 100); + + Tensor<T, 2, Layout> lhs(dim, 1); + Tensor<T, 2, Layout> rhs(1, dim); + lhs.setRandom(); + rhs.setRandom(); + + auto bcastLhs = OneByM(dim); + auto bcastRhs = NByOne(dim); + + DSizes<Index, 2> dims(dim, dim); + + VerifyBlockEvaluator<T, 2, Layout>( + (lhs.broadcast(bcastLhs) + rhs.broadcast(bcastRhs)).eval().reshape(dims), + [dims]() { return SkewedInnerBlock<Layout, 2>(dims); }); + + VerifyBlockEvaluator<T, 2, Layout>( + (lhs.broadcast(bcastLhs) + rhs.broadcast(bcastRhs)).eval().reshape(dims), + [dims]() { return RandomBlock<Layout, 2>(dims, 1, 50); }); +} + // -------------------------------------------------------------------------- // // Verify that assigning block to a Tensor expression produces the same result // as an assignment to TensorSliceOp (writing a block is is identical to @@ -482,6 +506,7 @@ EIGEN_DECLARE_TEST(cxx11_tensor_block_eval) { CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_padding); CALL_SUBTESTS_LAYOUTS(test_eval_tensor_reshape_with_bcast); + CALL_SUBTESTS_LAYOUTS(test_eval_tensor_forced_eval); CALL_SUBTESTS_DIMS_LAYOUTS(test_assign_to_tensor); CALL_SUBTESTS_DIMS_LAYOUTS(test_assign_to_tensor_reshape); |