// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2015 Ke Yang // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H #define EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H namespace Eigen { /** \class TensorInflation * \ingroup CXX11_Tensor_Module * * \brief Tensor inflation class. * * */ namespace internal { template struct traits > : public traits { typedef typename XprType::Scalar Scalar; typedef traits XprTraits; typedef typename XprTraits::StorageKind StorageKind; typedef typename XprTraits::Index Index; typedef typename XprType::Nested Nested; typedef typename remove_reference::type _Nested; static const int NumDimensions = XprTraits::NumDimensions; static const int Layout = XprTraits::Layout; typedef typename XprTraits::PointerType PointerType; }; template struct eval, Eigen::Dense> { typedef const TensorInflationOp& type; }; template struct nested, 1, typename eval >::type> { typedef TensorInflationOp type; }; } // end namespace internal template class TensorInflationOp : public TensorBase, ReadOnlyAccessors> { public: typedef typename Eigen::internal::traits::Scalar Scalar; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorInflationOp(const XprType& expr, const Strides& strides) : m_xpr(expr), m_strides(strides) {} EIGEN_DEVICE_FUNC const Strides& strides() const { return m_strides; } EIGEN_DEVICE_FUNC const typename internal::remove_all::type& expression() const { return m_xpr; } protected: typename XprType::Nested m_xpr; const Strides m_strides; }; // Eval as rvalue template struct TensorEvaluator, Device> { typedef TensorInflationOp XprType; typedef typename XprType::Index Index; static const int NumDims = internal::array_size::Dimensions>::value; typedef DSizes Dimensions; typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType::type PacketReturnType; static const int PacketSize = PacketType::size; typedef StorageMemory Storage; typedef typename Storage::Type EvaluatorPointerType; enum { IsAligned = /*TensorEvaluator::IsAligned*/ false, PacketAccess = TensorEvaluator::PacketAccess, BlockAccess = false, PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false }; //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// typedef internal::TensorBlockNotImplemented TensorBlock; //===--------------------------------------------------------------------===// EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_impl(op.expression(), device), m_strides(op.strides()) { m_dimensions = m_impl.dimensions(); // Expand each dimension to the inflated dimension. for (int i = 0; i < NumDims; ++i) { m_dimensions[i] = (m_dimensions[i] - 1) * op.strides()[i] + 1; } // Remember the strides for fast division. for (int i = 0; i < NumDims; ++i) { m_fastStrides[i] = internal::TensorIntDivisor(m_strides[i]); } const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); if (static_cast(Layout) == static_cast(ColMajor)) { m_outputStrides[0] = 1; m_inputStrides[0] = 1; for (int i = 1; i < NumDims; ++i) { m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; } } else { // RowMajor m_outputStrides[NumDims-1] = 1; m_inputStrides[NumDims-1] = 1; for (int i = NumDims - 2; i >= 0; --i) { m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; } } } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) { m_impl.evalSubExprsIfNeeded(NULL); return true; } EIGEN_STRONG_INLINE void cleanup() { m_impl.cleanup(); } // Computes the input index given the output index. Returns true if the output // index doesn't fall into a hole. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool getInputIndex(Index index, Index* inputIndex) const { eigen_assert(index < dimensions().TotalSize()); *inputIndex = 0; if (static_cast(Layout) == static_cast(ColMajor)) { EIGEN_UNROLL_LOOP for (int i = NumDims - 1; i > 0; --i) { const Index idx = index / m_outputStrides[i]; if (idx != idx / m_fastStrides[i] * m_strides[i]) { return false; } *inputIndex += idx / m_strides[i] * m_inputStrides[i]; index -= idx * m_outputStrides[i]; } if (index != index / m_fastStrides[0] * m_strides[0]) { return false; } *inputIndex += index / m_strides[0]; return true; } else { EIGEN_UNROLL_LOOP for (int i = 0; i < NumDims - 1; ++i) { const Index idx = index / m_outputStrides[i]; if (idx != idx / m_fastStrides[i] * m_strides[i]) { return false; } *inputIndex += idx / m_strides[i] * m_inputStrides[i]; index -= idx * m_outputStrides[i]; } if (index != index / m_fastStrides[NumDims-1] * m_strides[NumDims-1]) { return false; } *inputIndex += index / m_strides[NumDims - 1]; } return true; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { Index inputIndex = 0; if (getInputIndex(index, &inputIndex)) { return m_impl.coeff(inputIndex); } else { return Scalar(0); } } // TODO(yangke): optimize this function so that we can detect and produce // all-zero packets template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; EIGEN_UNROLL_LOOP for (int i = 0; i < PacketSize; ++i) { values[i] = coeff(index+i); } PacketReturnType rslt = internal::pload(values); return rslt; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { const double compute_cost = NumDims * (3 * TensorOpCost::DivCost() + 3 * TensorOpCost::MulCost() + 2 * TensorOpCost::AddCost()); const double input_size = m_impl.dimensions().TotalSize(); const double output_size = m_dimensions.TotalSize(); if (output_size == 0) return TensorOpCost(); return m_impl.costPerCoeff(vectorized) + TensorOpCost(sizeof(CoeffReturnType) * input_size / output_size, 0, compute_cost, vectorized, PacketSize); } EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } #ifdef EIGEN_USE_SYCL // binding placeholder accessors to a command group handler for SYCL EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { m_impl.bind(cgh); } #endif protected: Dimensions m_dimensions; array m_outputStrides; array m_inputStrides; TensorEvaluator m_impl; const Strides m_strides; array, NumDims> m_fastStrides; }; } // end namespace Eigen #endif // EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H