From fbd7ed6ff73eca76aa6e0691228d26098ad9c19e Mon Sep 17 00:00:00 2001 From: Igor Babuschkin Date: Thu, 2 Jun 2016 13:35:47 +0100 Subject: Add tensor scan op This is the initial implementation a generic scan operation. Based on this, cumsum and cumprod method have been added to TensorBase. --- unsupported/Eigen/CXX11/src/Tensor/TensorScan.h | 197 ++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorScan.h (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorScan.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h new file mode 100644 index 000000000..031dbf6f2 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h @@ -0,0 +1,197 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Igor Babuschkin +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_SCAN_H +#define EIGEN_CXX11_TENSOR_TENSOR_SCAN_H +namespace Eigen { + +namespace internal { +template +struct traits > + : public traits { + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorScanOp& type; +}; + +template +struct nested, 1, + typename eval >::type> +{ + typedef TensorScanOp type; +}; +} // end namespace internal + +/** \class TensorScan + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor scan class. + * + */ + +template +class TensorScanOp + : public TensorBase, ReadOnlyAccessors> { +public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorScanOp( + const XprType& expr, const Index& axis, const Op& op = Op()) + : m_expr(expr), m_axis(axis), m_accumulator(op) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Index axis() const { return m_axis; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const XprType& expression() const { return m_expr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Op accumulator() const { return m_accumulator; } + +protected: + typename XprType::Nested m_expr; + const Index m_axis; + const Op m_accumulator; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> { + + typedef TensorScanOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + enum { + IsAligned = false, + PacketAccess = (internal::packet_traits::size > 1), + BlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, + RawAccess = true + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, + const Device& device) + : m_impl(op.expression(), device), + m_device(device), + m_axis(op.axis()), + m_accumulator(op.accumulator()), + m_dimensions(m_impl.dimensions()), + m_size(m_dimensions[m_axis]), + m_stride(1), + m_output(NULL) { + + // Accumulating a scalar isn't supported. + EIGEN_STATIC_ASSERT(NumDims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + eigen_assert(m_axis >= 0 && m_axis < NumDims); + + // Compute stride of scan axis + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = 0; i < m_axis; ++i) { + m_stride = m_stride * m_dimensions[i]; + } + } else { + for (int i = NumDims - 1; i > m_axis; --i) { + m_stride = m_stride * m_dimensions[i]; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { + return m_dimensions; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { + m_impl.evalSubExprsIfNeeded(NULL); + if (data) { + accumulateTo(data); + return false; + } else { + m_output = static_cast(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); + accumulateTo(m_output); + return true; + } + } + + template + EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { + return internal::ploadt(m_output + index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const + { + return m_output; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return m_output[index]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + if (m_output != NULL) { + m_device.deallocate(m_output); + m_output = NULL; + } + m_impl.cleanup(); + } + +protected: + TensorEvaluator m_impl; + const Device& m_device; + const Index m_axis; + Op m_accumulator; + const Dimensions& m_dimensions; + const Index& m_size; + Index m_stride; + CoeffReturnType* m_output; + + // TODO(ibab) Parallelize this single-threaded implementation if desired + EIGEN_DEVICE_FUNC void accumulateTo(Scalar* data) { + // We fix the index along the scan axis to 0 and perform an + // scan per remaining entry. The iteration is split into two nested + // loops to avoid an integer division by keeping track of each idx1 and idx2. + for (Index idx1 = 0; idx1 < dimensions().TotalSize() / m_size; idx1 += m_stride) { + for (Index idx2 = 0; idx2 < m_stride; idx2++) { + // Calculate the starting offset for the scan + Index offset = idx1 * m_size + idx2; + + // Compute the prefix sum along the axis, starting at the calculated offset + CoeffReturnType accum = m_accumulator.initialize(); + for (Index idx3 = 0; idx3 < m_size; idx3++) { + Index curr = offset + idx3 * m_stride; + m_accumulator.reduce(m_impl.coeff(curr), &accum); + data[curr] = m_accumulator.finalize(accum); + } + } + } + } +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_SCAN_H -- cgit v1.2.3