diff options
author | Eugene Zhulenev <ezhulenev@google.com> | 2019-03-04 16:02:22 -0800 |
---|---|---|
committer | Eugene Zhulenev <ezhulenev@google.com> | 2019-03-04 16:02:22 -0800 |
commit | b95941e5c2cf8886a54e510be662cf4ecadc4f6f (patch) | |
tree | f53e57bf540a63b4b533b2d8113b9d02c3dbcb62 /unsupported | |
parent | 694084ecbd12c5183a8ff0604d04971d043abfff (diff) |
Add tiled evaluation for TensorForcedEvalOp
Diffstat (limited to 'unsupported')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h | 25 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_executor.cpp | 39 |
2 files changed, 58 insertions, 6 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index 78068be35..74b905329 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -90,14 +90,21 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device> static const int PacketSize = PacketType<CoeffReturnType, Device>::size; enum { - IsAligned = true, - PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1), - BlockAccess = false, + IsAligned = true, + PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1), + BlockAccess = internal::is_arithmetic<CoeffReturnType>::value, PreferBlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - RawAccess = true + Layout = TensorEvaluator<ArgType, Device>::Layout, + RawAccess = true }; + typedef typename internal::TensorBlock< + CoeffReturnType, Index, internal::traits<ArgType>::NumDimensions, Layout> + TensorBlock; + typedef typename internal::TensorBlockReader< + CoeffReturnType, Index, internal::traits<ArgType>::NumDimensions, Layout> + TensorBlockReader; + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) /// op_ is used for sycl : m_impl(op.expression(), device), m_op(op.expression()), m_device(device), m_buffer(NULL) @@ -139,6 +146,14 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device> return internal::ploadt<PacketReturnType, LoadMode>(m_buffer + index); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( + std::vector<internal::TensorOpResourceRequirements>*) const {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(TensorBlock* block) const { + assert(m_buffer != NULL); + TensorBlockReader::Run(block, m_buffer); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); } diff --git a/unsupported/test/cxx11_tensor_executor.cpp b/unsupported/test/cxx11_tensor_executor.cpp index 18c87b35e..608306613 100644 --- a/unsupported/test/cxx11_tensor_executor.cpp +++ b/unsupported/test/cxx11_tensor_executor.cpp @@ -452,6 +452,38 @@ static void test_execute_slice_lvalue(Device d) } } +template <typename T, int NumDims, typename Device, bool Vectorizable, + bool Tileable, int Layout> +static void test_execute_broadcasting_of_forced_eval(Device d) +{ + static constexpr int Options = 0 | Layout; + + auto dims = RandomDims<NumDims>(1, 10); + Tensor<T, NumDims, Options, Index> src(dims); + src.setRandom(); + + const auto broadcasts = RandomDims<NumDims>(1, 7); + const auto expr = src.square().eval().broadcast(broadcasts); + + // We assume that broadcasting on a default device is tested and correct, so + // we can rely on it to verify correctness of tensor executor and tiling. + Tensor<T, NumDims, Options, Index> golden; + golden = expr; + + // Now do the broadcasting using configured tensor executor. + Tensor<T, NumDims, Options, Index> dst(golden.dimensions()); + + using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>; + using Executor = + internal::TensorExecutor<const Assign, Device, Vectorizable, Tileable>; + + Executor::run(Assign(dst, expr), d); + + for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { + VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i)); + } +} + #define CALL_SUBTEST_PART(PART) \ CALL_SUBTEST_##PART @@ -528,8 +560,13 @@ EIGEN_DECLARE_TEST(cxx11_tensor_executor) { CALL_SUBTEST_COMBINATIONS(11, test_execute_slice_lvalue, float, 4); CALL_SUBTEST_COMBINATIONS(11, test_execute_slice_lvalue, float, 5); + CALL_SUBTEST_COMBINATIONS(12, test_execute_broadcasting_of_forced_eval, float, 2); + CALL_SUBTEST_COMBINATIONS(12, test_execute_broadcasting_of_forced_eval, float, 3); + CALL_SUBTEST_COMBINATIONS(12, test_execute_broadcasting_of_forced_eval, float, 4); + CALL_SUBTEST_COMBINATIONS(12, test_execute_broadcasting_of_forced_eval, float, 5); + // Force CMake to split this test. - // EIGEN_SUFFIXES;1;2;3;4;5;6;7;8;9;10;11 + // EIGEN_SUFFIXES;1;2;3;4;5;6;7;8;9;10;11;12 } #undef CALL_SUBTEST_COMBINATIONS |