// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2014 Benoit Steiner // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H #define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H namespace Eigen { /** \class TensorEvaluator * \ingroup CXX11_Tensor_Module * * \brief The tensor evaluator classes. * * These classes are responsible for the evaluation of the tensor expression. * * TODO: add support for more types of expressions, in particular expressions * leading to lvalues (slicing, reshaping, etc...) */ // Generic evaluator template struct TensorEvaluator { typedef typename Derived::Index Index; typedef typename Derived::Scalar Scalar; typedef typename Derived::Scalar CoeffReturnType; typedef typename PacketType::type PacketReturnType; typedef typename Derived::Dimensions Dimensions; typedef Derived XprType; // NumDimensions is -1 for variable dim tensors static const int NumCoords = internal::traits::NumDimensions > 0 ? internal::traits::NumDimensions : 0; enum { IsAligned = Derived::IsAligned, PacketAccess = (internal::unpacket_traits::size > 1), Layout = Derived::Layout, CoordAccess = NumCoords > 0, RawAccess = true }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) : m_data(const_cast::template MakePointer::Type>(m.data())), m_dims(m.dimensions()), m_device(device), m_impl(m) { } // Used for accessor extraction in SYCL Managed TensorMap: const Derived& derived() const { return m_impl; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* dest) { if (dest) { m_device.memcpy((void*)dest, m_data, sizeof(Scalar) * m_dims.TotalSize()); return false; } return true; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { eigen_assert(m_data); return m_data[index]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits::template MakePointer::RefType coeffRef(Index index) { eigen_assert(m_data); return m_data[index]; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { return internal::ploadt(m_data + index); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType& x) { return internal::pstoret(m_data + index, x); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array& coords) const { eigen_assert(m_data); if (static_cast(Layout) == static_cast(ColMajor)) { return m_data[m_dims.IndexOfColMajor(coords)]; } else { return m_data[m_dims.IndexOfRowMajor(coords)]; } } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits::template MakePointer::RefType coeffRef(const array& coords) { eigen_assert(m_data); if (static_cast(Layout) == static_cast(ColMajor)) { return m_data[m_dims.IndexOfColMajor(coords)]; } else { return m_data[m_dims.IndexOfRowMajor(coords)]; } } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, internal::unpacket_traits::size); } EIGEN_DEVICE_FUNC typename internal::traits::template MakePointer::Type data() const { return m_data; } /// required by sycl in order to construct sycl buffer from raw pointer const Device& device() const{return m_device;} protected: typename internal::traits::template MakePointer::Type m_data; Dimensions m_dims; const Device& m_device; const Derived& m_impl; }; namespace { template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T loadConstant(const T* address) { return *address; } // Use the texture cache on CUDA devices whenever possible #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float loadConstant(const float* address) { return __ldg(address); } template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double loadConstant(const double* address) { return __ldg(address); } template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half loadConstant(const Eigen::half* address) { return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x))); } #endif } // Default evaluator for rvalues template struct TensorEvaluator { typedef typename Derived::Index Index; typedef typename Derived::Scalar Scalar; typedef typename Derived::Scalar CoeffReturnType; typedef typename PacketType::type PacketReturnType; typedef typename Derived::Dimensions Dimensions; typedef const Derived XprType; // NumDimensions is -1 for variable dim tensors static const int NumCoords = internal::traits::NumDimensions > 0 ? internal::traits::NumDimensions : 0; enum { IsAligned = Derived::IsAligned, PacketAccess = (internal::unpacket_traits::size > 1), Layout = Derived::Layout, CoordAccess = NumCoords > 0, RawAccess = true }; // Used for accessor extraction in SYCL Managed TensorMap: const Derived& derived() const { return m_impl; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) : m_data(m.data()), m_dims(m.dimensions()), m_device(device), m_impl(m) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { if (!NumTraits::type>::RequireInitialization && data) { m_device.memcpy((void*)data, m_data, m_dims.TotalSize() * sizeof(Scalar)); return false; } return true; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { eigen_assert(m_data); #ifndef __SYCL_DEVICE_ONLY__ return loadConstant(m_data+index); #else CoeffReturnType tmp = m_data[index]; return tmp; #endif } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { return internal::ploadt_ro(m_data + index); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array& coords) const { eigen_assert(m_data); const Index index = (static_cast(Layout) == static_cast(ColMajor)) ? m_dims.IndexOfColMajor(coords) : m_dims.IndexOfRowMajor(coords); return loadConstant(m_data+index); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, internal::unpacket_traits::size); } EIGEN_DEVICE_FUNC typename internal::traits::template MakePointer::Type data() const { return m_data; } /// added for sycl in order to construct the buffer from the sycl device const Device& device() const{return m_device;} protected: typename internal::traits::template MakePointer::Type m_data; Dimensions m_dims; const Device& m_device; const Derived& m_impl; }; // -------------------- CwiseNullaryOp -------------------- template struct TensorEvaluator, Device> { typedef TensorCwiseNullaryOp XprType; enum { IsAligned = true, PacketAccess = internal::functor_traits::PacketAccess, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false }; EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper() { } typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; typedef typename internal::traits::Scalar CoeffReturnType; typedef typename PacketType::type PacketReturnType; static const int PacketSize = internal::unpacket_traits::size; typedef typename TensorEvaluator::Dimensions Dimensions; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { return true; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_wrapper(m_functor, index); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { return m_wrapper.template packetOp(m_functor, index); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, internal::unpacket_traits::size); } EIGEN_DEVICE_FUNC typename Eigen::internal::traits::PointerType data() const { return NULL; } /// required by sycl in order to extract the accessor const TensorEvaluator& impl() const { return m_argImpl; } /// required by sycl in order to extract the accessor NullaryOp functor() const { return m_functor; } private: const NullaryOp m_functor; TensorEvaluator m_argImpl; const internal::nullary_wrapper m_wrapper; }; // -------------------- CwiseUnaryOp -------------------- template struct TensorEvaluator, Device> { typedef TensorCwiseUnaryOp XprType; enum { IsAligned = TensorEvaluator::IsAligned, PacketAccess = TensorEvaluator::PacketAccess & internal::functor_traits::PacketAccess, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false }; EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device) { } typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; typedef typename internal::traits::Scalar CoeffReturnType; typedef typename PacketType::type PacketReturnType; static const int PacketSize = internal::unpacket_traits::size; typedef typename TensorEvaluator::Dimensions Dimensions; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { m_argImpl.evalSubExprsIfNeeded(NULL); return true; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { m_argImpl.cleanup(); } EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_functor(m_argImpl.coeff(index)); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { return m_functor.packetOp(m_argImpl.template packet(index)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { const double functor_cost = internal::functor_traits::Cost; return m_argImpl.costPerCoeff(vectorized) + TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); } EIGEN_DEVICE_FUNC typename Eigen::internal::traits::PointerType data() const { return NULL; } /// required by sycl in order to extract the accessor const TensorEvaluator & impl() const { return m_argImpl; } /// added for sycl in order to construct the buffer from sycl device UnaryOp functor() const { return m_functor; } private: const UnaryOp m_functor; TensorEvaluator m_argImpl; }; // -------------------- CwiseBinaryOp -------------------- template struct TensorEvaluator, Device> { typedef TensorCwiseBinaryOp XprType; enum { IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & internal::functor_traits::PacketAccess, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false }; EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : m_functor(op.functor()), m_leftImpl(op.lhsExpression(), device), m_rightImpl(op.rhsExpression(), device) { EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout) || internal::traits::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions())); } typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; typedef typename internal::traits::Scalar CoeffReturnType; typedef typename PacketType::type PacketReturnType; static const int PacketSize = internal::unpacket_traits::size; typedef typename TensorEvaluator::Dimensions Dimensions; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { // TODO: use right impl instead if right impl dimensions are known at compile time. return m_leftImpl.dimensions(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { m_leftImpl.evalSubExprsIfNeeded(NULL); m_rightImpl.evalSubExprsIfNeeded(NULL); return true; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { m_leftImpl.cleanup(); m_rightImpl.cleanup(); } EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index)); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { return m_functor.packetOp(m_leftImpl.template packet(index), m_rightImpl.template packet(index)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { const double functor_cost = internal::functor_traits::Cost; return m_leftImpl.costPerCoeff(vectorized) + m_rightImpl.costPerCoeff(vectorized) + TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); } EIGEN_DEVICE_FUNC typename Eigen::internal::traits::PointerType data() const { return NULL; } /// required by sycl in order to extract the accessor const TensorEvaluator& left_impl() const { return m_leftImpl; } /// required by sycl in order to extract the accessor const TensorEvaluator& right_impl() const { return m_rightImpl; } /// required by sycl in order to extract the accessor BinaryOp functor() const { return m_functor; } private: const BinaryOp m_functor; TensorEvaluator m_leftImpl; TensorEvaluator m_rightImpl; }; // -------------------- CwiseTernaryOp -------------------- template struct TensorEvaluator, Device> { typedef TensorCwiseTernaryOp XprType; enum { IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & internal::functor_traits::PacketAccess, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false }; EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : m_functor(op.functor()), m_arg1Impl(op.arg1Expression(), device), m_arg2Impl(op.arg2Expression(), device), m_arg3Impl(op.arg3Expression(), device) { EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout) || internal::traits::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); EIGEN_STATIC_ASSERT((internal::is_same::StorageKind, typename internal::traits::StorageKind>::value), STORAGE_KIND_MUST_MATCH) EIGEN_STATIC_ASSERT((internal::is_same::StorageKind, typename internal::traits::StorageKind>::value), STORAGE_KIND_MUST_MATCH) EIGEN_STATIC_ASSERT((internal::is_same::Index, typename internal::traits::Index>::value), STORAGE_INDEX_MUST_MATCH) EIGEN_STATIC_ASSERT((internal::is_same::Index, typename internal::traits::Index>::value), STORAGE_INDEX_MUST_MATCH) eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions())); } typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; typedef typename internal::traits::Scalar CoeffReturnType; typedef typename PacketType::type PacketReturnType; static const int PacketSize = internal::unpacket_traits::size; typedef typename TensorEvaluator::Dimensions Dimensions; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { // TODO: use arg2 or arg3 dimensions if they are known at compile time. return m_arg1Impl.dimensions(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { m_arg1Impl.evalSubExprsIfNeeded(NULL); m_arg2Impl.evalSubExprsIfNeeded(NULL); m_arg3Impl.evalSubExprsIfNeeded(NULL); return true; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { m_arg1Impl.cleanup(); m_arg2Impl.cleanup(); m_arg3Impl.cleanup(); } EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index)); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { return m_functor.packetOp(m_arg1Impl.template packet(index), m_arg2Impl.template packet(index), m_arg3Impl.template packet(index)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { const double functor_cost = internal::functor_traits::Cost; return m_arg1Impl.costPerCoeff(vectorized) + m_arg2Impl.costPerCoeff(vectorized) + m_arg3Impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); } EIGEN_DEVICE_FUNC typename Eigen::internal::traits::PointerType data() const { return NULL; } /// required by sycl in order to extract the accessor const TensorEvaluator & arg1Impl() const { return m_arg1Impl; } /// required by sycl in order to extract the accessor const TensorEvaluator& arg2Impl() const { return m_arg2Impl; } /// required by sycl in order to extract the accessor const TensorEvaluator& arg3Impl() const { return m_arg3Impl; } private: const TernaryOp m_functor; TensorEvaluator m_arg1Impl; TensorEvaluator m_arg2Impl; TensorEvaluator m_arg3Impl; }; // -------------------- SelectOp -------------------- template struct TensorEvaluator, Device> { typedef TensorSelectOp XprType; typedef typename XprType::Scalar Scalar; enum { IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & internal::packet_traits::HasBlend, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false }; EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : m_condImpl(op.ifExpression(), device), m_thenImpl(op.thenExpression(), device), m_elseImpl(op.elseExpression(), device) { EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); eigen_assert(dimensions_match(m_condImpl.dimensions(), m_thenImpl.dimensions())); eigen_assert(dimensions_match(m_thenImpl.dimensions(), m_elseImpl.dimensions())); } typedef typename XprType::Index Index; typedef typename internal::traits::Scalar CoeffReturnType; typedef typename PacketType::type PacketReturnType; static const int PacketSize = internal::unpacket_traits::size; typedef typename TensorEvaluator::Dimensions Dimensions; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { // TODO: use then or else impl instead if they happen to be known at compile time. return m_condImpl.dimensions(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { m_condImpl.evalSubExprsIfNeeded(NULL); m_thenImpl.evalSubExprsIfNeeded(NULL); m_elseImpl.evalSubExprsIfNeeded(NULL); return true; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { m_condImpl.cleanup(); m_thenImpl.cleanup(); m_elseImpl.cleanup(); } EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index); } template EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { internal::Selector select; for (Index i = 0; i < PacketSize; ++i) { select.select[i] = m_condImpl.coeff(index+i); } return internal::pblend(select, m_thenImpl.template packet(index), m_elseImpl.template packet(index)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { return m_condImpl.costPerCoeff(vectorized) + m_thenImpl.costPerCoeff(vectorized) .cwiseMax(m_elseImpl.costPerCoeff(vectorized)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Eigen::internal::traits::PointerType data() const { return NULL; } /// required by sycl in order to extract the accessor const TensorEvaluator & cond_impl() const { return m_condImpl; } /// required by sycl in order to extract the accessor const TensorEvaluator& then_impl() const { return m_thenImpl; } /// required by sycl in order to extract the accessor const TensorEvaluator& else_impl() const { return m_elseImpl; } private: TensorEvaluator m_condImpl; TensorEvaluator m_thenImpl; TensorEvaluator m_elseImpl; }; } // end namespace Eigen #endif // EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H