From 7402fea0a8e63e3ea248257047c584afee8f8bde Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 16 May 2014 15:08:05 -0700 Subject: Vectorized the evaluation of tensor expression (using SSE, AVX, NEON, ...) Added the ability to parallelize the evaluation of a tensor expression over multiple cpu cores. Added the ability to offload the evaluation of a tensor expression to a GPU. --- unsupported/Eigen/CXX11/src/Tensor/Tensor.h | 8 +- unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h | 145 ++++++++++++++++++- unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 12 ++ unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h | 83 +++++++++++ .../Eigen/CXX11/src/Tensor/TensorDeviceType.h | 56 ++++++++ .../Eigen/CXX11/src/Tensor/TensorDimensions.h | 14 +- .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 54 +++++-- unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h | 27 ++-- .../Eigen/CXX11/src/Tensor/TensorFixedSize.h | 10 +- .../CXX11/src/Tensor/TensorForwardDeclarations.h | 4 +- unsupported/Eigen/CXX11/src/Tensor/TensorMap.h | 158 +++++++++++++++++++-- unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h | 19 --- 12 files changed, 524 insertions(+), 66 deletions(-) create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h (limited to 'unsupported/Eigen/CXX11/src/Tensor') diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h index f5c027d1c..d8ff3f584 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h @@ -75,9 +75,15 @@ class Tensor : public TensorBase > typedef typename internal::traits::StorageKind StorageKind; typedef typename internal::traits::Index Index; typedef Scalar_ Scalar; - typedef typename internal::packet_traits::type PacketScalar; + typedef typename internal::packet_traits::type Packet; typedef typename NumTraits::Real RealScalar; typedef typename Base::CoeffReturnType CoeffReturnType; + typedef typename Base::PacketReturnType PacketReturnType; + + enum { + IsAligned = bool(EIGEN_ALIGN), + PacketAccess = true, + }; static const int Options = Options_; static const std::size_t NumIndices = NumIndices_; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h index f1df827f9..e69ff6188 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h @@ -10,6 +10,9 @@ #ifndef EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H #define EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H +#ifdef EIGEN_USE_THREADS +#include +#endif namespace Eigen { @@ -28,7 +31,8 @@ namespace Eigen { */ namespace internal { -template +// Default strategy: the expressions are evaluated with a single cpu thread. +template::PacketAccess & TensorEvaluator::PacketAccess> struct TensorAssign { typedef typename Derived1::Index Index; @@ -38,13 +42,150 @@ struct TensorAssign TensorEvaluator evalDst(dst); TensorEvaluator evalSrc(src); const Index size = dst.size(); - for(Index i = 0; i < size; ++i) { + for (Index i = 0; i < size; ++i) { + evalDst.coeffRef(i) = evalSrc.coeff(i); + } + } +}; + + +template +struct TensorAssign +{ + typedef typename Derived1::Index Index; + EIGEN_DEVICE_FUNC + static inline void run(Derived1& dst, const Derived2& src) + { + TensorEvaluator evalDst(dst); + TensorEvaluator evalSrc(src); + const Index size = dst.size(); + + static const int LhsStoreMode = TensorEvaluator::IsAligned ? Aligned : Unaligned; + static const int RhsLoadMode = TensorEvaluator::IsAligned ? Aligned : Unaligned; + static const int PacketSize = unpacket_traits::PacketReturnType>::size; + static const int VectorizedSize = (size / PacketSize) * PacketSize; + + for (Index i = 0; i < VectorizedSize; i += PacketSize) { + evalDst.template writePacket(i, evalSrc.template packet(i)); + } + for (Index i = VectorizedSize; i < size; ++i) { evalDst.coeffRef(i) = evalSrc.coeff(i); } } }; + +// Multicore strategy: the index space is partitioned and each core is assigned to a partition +#ifdef EIGEN_USE_THREADS +template +struct EvalRange { + static void run(LhsEval& dst, const RhsEval& src, const Index first, const Index last) { + eigen_assert(last > first); + for (Index i = first; i < last; ++i) { + dst.coeffRef(i) = src.coeff(i); + } + } +}; + +template +struct EvalRange { + static void run(LhsEval& dst, const RhsEval& src, const Index first, const Index last) { + eigen_assert(last > first); + + Index i = first; + static const int PacketSize = unpacket_traits::size; + if (last - first > PacketSize) { + static const int LhsStoreMode = LhsEval::IsAligned ? Aligned : Unaligned; + static const int RhsLoadMode = RhsEval::IsAligned ? Aligned : Unaligned; + eigen_assert(first % PacketSize == 0); + Index lastPacket = last - (last % PacketSize); + for (; i < lastPacket; i += PacketSize) { + dst.template writePacket(i, src.template packet(i)); + } + } + + for (; i < last; ++i) { + dst.coeffRef(i) = src.coeff(i); + } + } +}; + +template +struct TensorAssignMultiThreaded +{ + typedef typename Derived1::Index Index; + static inline void run(Derived1& dst, const Derived2& src, const ThreadPoolDevice& device) + { + TensorEvaluator evalDst(dst); + TensorEvaluator evalSrc(src); + const Index size = dst.size(); + + static const bool Vectorizable = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess; + static const int PacketSize = Vectorizable ? unpacket_traits::PacketReturnType>::size : 1; + + int blocksz = static_cast(ceil(static_cast(size)/device.numThreads()) + PacketSize - 1); + const Index blocksize = std::max(PacketSize, (blocksz - (blocksz % PacketSize))); + const Index numblocks = size / blocksize; + + Index i = 0; + vector > results; + results.reserve(numblocks); + for (int i = 0; i < numblocks; ++i) { + results.push_back(std::async(std::launch::async, &EvalRange, TensorEvaluator, Index>::run, evalDst, evalSrc, i*blocksize, (i+1)*blocksize)); + } + + for (int i = 0; i < numblocks; ++i) { + results[i].get(); + } + + if (numblocks * blocksize < size) { + EvalRange, TensorEvaluator, Index>::run(evalDst, evalSrc, numblocks * blocksize, size); + } + } +}; +#endif + + +// GPU: the evaluation of the expressions is offloaded to a GPU. +#ifdef EIGEN_USE_GPU +template +__global__ void EigenMetaKernelNoCheck(LhsEvaluator evalDst, const RhsEvaluator evalSrc) { + const int index = blockIdx.x * blockDim.x + threadIdx.x; + evalDst.coeffRef(index) = evalSrc.coeff(index); +} +template +__global__ void EigenMetaKernelPeel(LhsEvaluator evalDst, const RhsEvaluator evalSrc, int peel_start_offset, int size) { + const int index = peel_start_offset + blockIdx.x * blockDim.x + threadIdx.x; + if (index < size) { + evalDst.coeffRef(index) = evalSrc.coeff(index); + } +} + +template +struct TensorAssignGpu +{ + typedef typename Derived1::Index Index; + static inline void run(Derived1& dst, const Derived2& src, const GpuDevice& device) + { + TensorEvaluator evalDst(dst); + TensorEvaluator evalSrc(src); + const Index size = dst.size(); + const int block_size = std::min(size, 32*32); + const int num_blocks = size / block_size; + EigenMetaKernelNoCheck, TensorEvaluator > <<>>(evalDst, evalSrc); + + const int remaining_items = size % block_size; + if (remaining_items > 0) { + const int peel_start_offset = num_blocks * block_size; + const int peel_block_size = std::min(size, 32); + const int peel_num_blocks = (remaining_items + peel_block_size - 1) / peel_block_size; + EigenMetaKernelPeel, TensorEvaluator > <<>>(evalDst, evalSrc, peel_start_offset, size); + } + } +}; +#endif + } // end namespace internal } // end namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 9c7783aaf..fa1bd3498 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -28,6 +28,7 @@ class TensorBase typedef typename internal::traits::Scalar Scalar; typedef typename internal::traits::Index Index; typedef Scalar CoeffReturnType; + typedef typename internal::packet_traits::type PacketReturnType; Derived& setZero() { return setConstant(Scalar(0)); @@ -83,6 +84,17 @@ class TensorBase return TensorCwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator-(const OtherDerived& other) const { + return TensorCwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); + } + + template + TensorDevice device(const DeviceType& device) { + return TensorDevice(device, derived()); + } + protected: template friend class TensorBase; EIGEN_DEVICE_FUNC diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h new file mode 100644 index 000000000..71890e187 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h @@ -0,0 +1,83 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H + +namespace Eigen { + +/** \class TensorDevice + * \ingroup CXX11_Tensor_Module + * + * \brief Pseudo expression providing an operator = that will evaluate its argument + * on the specified computing 'device' (GPU, thread pool, ...) + * + * Example: + * C.device(EIGEN_GPU) = A + B; + * + * Todo: thread pools. + * Todo: operator +=, -=, *= and so on. + */ + +template class TensorDevice { + public: + TensorDevice(const DeviceType& device, ExpressionType& expression) : m_device(device), m_expression(expression) {} + + template + EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) { + internal::TensorAssign::run(m_expression, other); + return *this; + } + + protected: + const DeviceType& m_device; + ExpressionType& m_expression; +}; + + +#ifdef EIGEN_USE_THREADS +template class TensorDevice { + public: + TensorDevice(const ThreadPoolDevice& device, ExpressionType& expression) : m_device(device), m_expression(expression) {} + + template + EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) { + internal::TensorAssignMultiThreaded::run(m_expression, other, m_device); + return *this; + } + + protected: + const ThreadPoolDevice& m_device; + ExpressionType& m_expression; +}; +#endif + + +#ifdef EIGEN_USE_GPU +template class TensorDevice +{ + public: + TensorDevice(const GpuDevice& device, ExpressionType& expression) : m_device(device), m_expression(expression) {} + + template + EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) { + internal::TensorAssignGpu::run(m_expression, other, m_device); + return *this; + } + + protected: + const GpuDevice& m_device; + ExpressionType& m_expression; +}; +#endif + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h new file mode 100644 index 000000000..ded6ca604 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h @@ -0,0 +1,56 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H + + +namespace Eigen { + +// Default device for the machine (typically a single cpu core) +struct DefaultDevice { +}; + + +// Multiple cpu cores +// We should really use a thread pool here but first we need to find a portable thread pool library. +#ifdef EIGEN_USE_THREADS +struct ThreadPoolDevice { + ThreadPoolDevice(/*ThreadPool* pool, */size_t num_cores) : /*pool_(pool), */num_threads_(num_cores) { } + size_t numThreads() const { return num_threads_; } + /*ThreadPool* threadPool() const { return pool_; }*/ + + private: + // todo: NUMA, ... + size_t num_threads_; + /*ThreadPool* pool_;*/ +}; +#endif + + +// GPU offloading +#ifdef EIGEN_USE_GPU +struct GpuDevice { + // todo: support for multiple gpu; + GpuDevice() { + cudaStreamCreate(&stream_); + } + ~GpuDevice() { + cudaStreamDestroy(stream_); + } + const cudaStream_t& stream() const { return stream_; } + + private: + cudaStream_t stream_; +}; +#endif + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h index bd3bd5aca..43e9d6550 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -79,16 +79,16 @@ struct Sizes : internal::numeric_list { Sizes() { } template - explicit Sizes(const array&/* indices*/) { + explicit Sizes(const array& /*indices*/) { // todo: add assertion } #ifdef EIGEN_HAS_VARIADIC_TEMPLATES - explicit Sizes(std::initializer_list/* l*/) { + explicit Sizes(std::initializer_list /*l*/) { // todo: add assertion } #endif - template Sizes& operator = (const T&/* other*/) { + template Sizes& operator = (const T& /*other*/) { // add assertion failure if the size of other is different return *this; } @@ -119,7 +119,7 @@ template ::value; - static const size_t TotalSize() { + static size_t TotalSize() { return internal::arg_prod::value; } @@ -181,14 +181,11 @@ template struct DSizes : array { typedef array Base; - size_t TotalSize() const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t TotalSize() const { return internal::array_prod(*static_cast(this)); } DSizes() { } -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - // explicit DSizes(std::initializer_list l) : Base(l) { } -#endif explicit DSizes(const array& a) : Base(a) { } DSizes& operator = (const array& other) { @@ -203,7 +200,6 @@ struct DSizes : array { size_t IndexOfRowMajor(const array& indices) const { return internal::tensor_index_linearization_helper::run(indices, *static_cast(this)); } - }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index b0dbca041..3ce924dc3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -29,32 +29,38 @@ struct TensorEvaluator { typedef typename Derived::Index Index; typedef typename Derived::Scalar Scalar; - typedef typename Derived::Scalar& CoeffReturnType; + typedef typename Derived::Packet Packet; + typedef typename Derived::Scalar CoeffReturnType; + typedef typename Derived::Packet PacketReturnType; + + enum { + IsAligned = Derived::IsAligned, + PacketAccess = Derived::PacketAccess, + }; TensorEvaluator(Derived& m) : m_data(const_cast(m.data())) { } - CoeffReturnType coeff(Index index) const { + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_data[index]; } - Scalar& coeffRef(Index index) { + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { return m_data[index]; } - // to do: vectorized evaluation. - /* template + template PacketReturnType packet(Index index) const { - return ploadt(m_data + index); + return internal::ploadt(m_data + index); } - template - void writePacket(Index index, const PacketScalar& x) + template + void writePacket(Index index, const Packet& x) { - return pstoret(const_cast(m_data) + index, x); - }*/ + return internal::pstoret(m_data + index, x); + } protected: Scalar* m_data; @@ -70,6 +76,11 @@ struct TensorEvaluator > { typedef TensorCwiseUnaryOp XprType; + enum { + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & internal::functor_traits::PacketAccess, + }; + TensorEvaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) @@ -77,12 +88,19 @@ struct TensorEvaluator > typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketReturnType PacketReturnType; - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_functor(m_argImpl.coeff(index)); } + template + EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const + { + return m_functor.packetOp(m_argImpl.template packet(index)); + } + private: const UnaryOp m_functor; TensorEvaluator m_argImpl; @@ -96,6 +114,12 @@ struct TensorEvaluator XprType; + enum { + IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & + internal::functor_traits::PacketAccess, + }; + TensorEvaluator(const XprType& op) : m_functor(op.functor()), m_leftImpl(op.lhsExpression()), @@ -104,11 +128,17 @@ struct TensorEvaluator + EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const + { + return m_functor.packetOp(m_leftImpl.template packet(index), m_rightImpl.template packet(index)); + } private: const BinaryOp m_functor; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h index aa875dc31..e32077f6e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h @@ -33,6 +33,9 @@ struct traits > typedef typename result_of< UnaryOp(typename XprType::Scalar) >::type Scalar; + typedef typename result_of< + UnaryOp(typename XprType::Packet) + >::type Packet; typedef typename XprType::Nested XprTypeNested; typedef typename remove_reference::type _XprTypeNested; }; @@ -57,14 +60,16 @@ template class TensorCwiseUnaryOp : public TensorBase > { public: - typedef typename Eigen::internal::traits::Scalar Scalar; - typedef typename Eigen::NumTraits::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename Eigen::internal::nested::type Nested; - typedef typename Eigen::internal::traits::StorageKind StorageKind; - typedef typename Eigen::internal::traits::Index Index; - - inline TensorCwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::internal::traits::Packet Packet; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) : m_xpr(xpr), m_functor(func) {} EIGEN_DEVICE_FUNC @@ -92,6 +97,7 @@ struct traits > typename RhsXprType::Scalar ) >::type Scalar; + typedef typename internal::packet_traits::type Packet; typedef typename promote_storage_type::StorageKind, typename traits::StorageKind>::ret StorageKind; typedef typename promote_index_type::Index, @@ -123,14 +129,17 @@ class TensorCwiseBinaryOp : public TensorBase::Scalar Scalar; + typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename internal::promote_storage_type::ret CoeffReturnType; + typedef typename internal::promote_storage_type::ret PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; - inline TensorCwiseBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const BinaryOp& func = BinaryOp()) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const BinaryOp& func = BinaryOp()) : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_functor(func) {} EIGEN_DEVICE_FUNC diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h index 953880123..dcc7ccd65 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h @@ -33,11 +33,17 @@ class TensorFixedSize : public TensorBase::StorageKind StorageKind; typedef typename internal::traits::Index Index; typedef Scalar_ Scalar; - typedef typename internal::packet_traits::type PacketScalar; + typedef typename internal::packet_traits::type Packet; typedef typename NumTraits::Real RealScalar; typedef typename Base::CoeffReturnType CoeffReturnType; - static const int Options = Options_; + static const int Options = Options_; + + enum { + IsAligned = bool(EIGEN_ALIGN), + PacketAccess = true, + }; + typedef Dimensions_ Dimensions; static const std::size_t NumIndices = Dimensions::count; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index e8a2125c4..09b0fe66d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -14,12 +14,14 @@ namespace Eigen { template class Tensor; template class TensorFixedSize; -template class TensorMap; +template class TensorMap; template class TensorBase; template class TensorCwiseUnaryOp; template class TensorCwiseBinaryOp; +template class TensorDevice; + // Move to internal? template struct TensorEvaluator; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h index bb0b39c5a..3fc9c5335 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h @@ -22,16 +22,16 @@ template class Strid * */ -template class TensorMap : public TensorBase > +template class TensorMap : public TensorBase > { public: - typedef TensorMap Self; + typedef TensorMap Self; typedef typename PlainObjectType::Base Base; typedef typename Eigen::internal::nested::type Nested; typedef typename internal::traits::StorageKind StorageKind; typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; - typedef typename internal::packet_traits::type PacketScalar; + typedef typename internal::packet_traits::type Packet; typedef typename NumTraits::Real RealScalar; typedef typename Base::CoeffReturnType CoeffReturnType; @@ -43,13 +43,12 @@ template class TensorMap : public TensorBase({{firstDimension}})) { @@ -65,7 +64,7 @@ template class TensorMap : public TensorBase& dimensions) + inline TensorMap(PointerArgType dataPtr, const array& dimensions) : m_data(dataPtr), m_dimensions(dimensions) { } @@ -80,12 +79,97 @@ template class TensorMap : public TensorBase& indices) const + { + // eigen_assert(checkIndexRange(indices)); + if (PlainObjectType::Options&RowMajor) { + const Index index = m_dimensions.IndexOfRowMajor(indices); + return m_data[index]; + } else { + const Index index = m_dimensions.IndexOfColMajor(indices); + return m_data[index]; + } + } + +#ifdef EIGEN_HAS_VARIADIC_TEMPLATES + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const + { + static_assert(sizeof...(otherIndices) + 1 == PlainObjectType::NumIndices, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); + if (PlainObjectType::Options&RowMajor) { + const Index index = m_dimensions.IndexOfRowMajor(array{{firstIndex, otherIndices...}}); + return m_data[index]; + } else { + const Index index = m_dimensions.IndexOfColMajor(array{{firstIndex, otherIndices...}}); + return m_data[index]; + } + } +#else EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const { eigen_internal_assert(index >= 0 && index < size()); return m_data[index]; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i1 + i0 * m_dimensions[0]; + return m_data[index]; + } else { + const Index index = i0 + i1 * m_dimensions[0]; + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i2 + m_dimensions[1] * (i1 + m_dimensions[0] * i0); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2); + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3)); + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0))); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4))); + return m_data[index]; + } + } +#endif + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(const array& indices) + { + // eigen_assert(checkIndexRange(indices)); + if (PlainObjectType::Options&RowMajor) { + const Index index = m_dimensions.IndexOfRowMajor(indices); + return m_data[index]; + } else { + const Index index = m_dimensions.IndexOfColMajor(indices); + return m_data[index]; + } + } #ifdef EIGEN_HAS_VARIADIC_TEMPLATES template EIGEN_DEVICE_FUNC @@ -100,8 +184,60 @@ template class TensorMap : public TensorBase= 0 && index < size()); + return m_data[index]; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1) + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i1 + i0 * m_dimensions[0]; + return m_data[index]; + } else { + const Index index = i0 + i1 * m_dimensions[0]; + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2) + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i2 + m_dimensions[1] * (i1 + m_dimensions[0] * i0); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2); + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3) + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3)); + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0))); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4))); + return m_data[index]; + } + } #endif + template EIGEN_DEVICE_FUNC Self& operator=(const OtherDerived& other) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h b/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h index efcb39559..64098343e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h @@ -72,9 +72,6 @@ class TensorStorage TensorStorage() { } TensorStorage(const TensorStorage& other) : Base_(other) { } -#ifdef EIGEN_HAVE_RVALUE_REFERENCES - // TensorStorage(TensorStorage&&) = default; -#endif TensorStorage(internal::constructor_without_unaligned_array_assert) : Base_(internal::constructor_without_unaligned_array_assert()) {} TensorStorage(DenseIndex size, const array& dimensions) : Base_(size, dimensions) {} @@ -111,22 +108,6 @@ class TensorStorage(m_data, internal::array_prod(m_dimensions)); } void swap(Self_& other) { std::swap(m_data,other.m_data); std::swap(m_dimensions,other.m_dimensions); } -- cgit v1.2.3