diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-05-16 15:08:05 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-05-16 15:08:05 -0700 |
commit | 7402fea0a8e63e3ea248257047c584afee8f8bde (patch) | |
tree | 429aee7ea314c579ed62c1c5e1ff84850b14370a | |
parent | 0320f7e3a71406b9a03d1bab0d168fd76e63d457 (diff) |
Vectorized the evaluation of tensor expression (using SSE, AVX, NEON, ...)
Added the ability to parallelize the evaluation of a tensor expression over multiple cpu cores.
Added the ability to offload the evaluation of a tensor expression to a GPU.
17 files changed, 720 insertions, 66 deletions
diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index f2b18ef31..323d9edff 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -31,6 +31,7 @@ #include "Eigen/Core" #include "unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h" @@ -39,6 +40,7 @@ #include "unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h" #include "unsupported/Eigen/CXX11/src/Tensor/Tensor.h" diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h index f5c027d1c..d8ff3f584 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h @@ -75,9 +75,15 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_> > typedef typename internal::traits<Self>::StorageKind StorageKind; typedef typename internal::traits<Self>::Index Index; typedef Scalar_ Scalar; - typedef typename internal::packet_traits<Scalar>::type PacketScalar; + typedef typename internal::packet_traits<Scalar>::type Packet; typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename Base::CoeffReturnType CoeffReturnType; + typedef typename Base::PacketReturnType PacketReturnType; + + enum { + IsAligned = bool(EIGEN_ALIGN), + PacketAccess = true, + }; static const int Options = Options_; static const std::size_t NumIndices = NumIndices_; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h index f1df827f9..e69ff6188 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h @@ -10,6 +10,9 @@ #ifndef EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H #define EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H +#ifdef EIGEN_USE_THREADS +#include <future> +#endif namespace Eigen { @@ -28,7 +31,8 @@ namespace Eigen { */ namespace internal { -template<typename Derived1, typename Derived2> +// Default strategy: the expressions are evaluated with a single cpu thread. +template<typename Derived1, typename Derived2, bool Vectorizable = TensorEvaluator<Derived1>::PacketAccess & TensorEvaluator<Derived2>::PacketAccess> struct TensorAssign { typedef typename Derived1::Index Index; @@ -38,13 +42,150 @@ struct TensorAssign TensorEvaluator<Derived1> evalDst(dst); TensorEvaluator<Derived2> evalSrc(src); const Index size = dst.size(); - for(Index i = 0; i < size; ++i) { + for (Index i = 0; i < size; ++i) { + evalDst.coeffRef(i) = evalSrc.coeff(i); + } + } +}; + + +template<typename Derived1, typename Derived2> +struct TensorAssign<Derived1, Derived2, true> +{ + typedef typename Derived1::Index Index; + EIGEN_DEVICE_FUNC + static inline void run(Derived1& dst, const Derived2& src) + { + TensorEvaluator<Derived1> evalDst(dst); + TensorEvaluator<Derived2> evalSrc(src); + const Index size = dst.size(); + + static const int LhsStoreMode = TensorEvaluator<Derived1>::IsAligned ? Aligned : Unaligned; + static const int RhsLoadMode = TensorEvaluator<Derived2>::IsAligned ? Aligned : Unaligned; + static const int PacketSize = unpacket_traits<typename TensorEvaluator<Derived1>::PacketReturnType>::size; + static const int VectorizedSize = (size / PacketSize) * PacketSize; + + for (Index i = 0; i < VectorizedSize; i += PacketSize) { + evalDst.template writePacket<LhsStoreMode>(i, evalSrc.template packet<RhsLoadMode>(i)); + } + for (Index i = VectorizedSize; i < size; ++i) { evalDst.coeffRef(i) = evalSrc.coeff(i); } } }; + +// Multicore strategy: the index space is partitioned and each core is assigned to a partition +#ifdef EIGEN_USE_THREADS +template <typename LhsEval, typename RhsEval, typename Index, bool Vectorizable = LhsEval::PacketAccess & RhsEval::PacketAccess> +struct EvalRange { + static void run(LhsEval& dst, const RhsEval& src, const Index first, const Index last) { + eigen_assert(last > first); + for (Index i = first; i < last; ++i) { + dst.coeffRef(i) = src.coeff(i); + } + } +}; + +template <typename LhsEval, typename RhsEval, typename Index> +struct EvalRange<LhsEval, RhsEval, Index, true> { + static void run(LhsEval& dst, const RhsEval& src, const Index first, const Index last) { + eigen_assert(last > first); + + Index i = first; + static const int PacketSize = unpacket_traits<typename LhsEval::PacketReturnType>::size; + if (last - first > PacketSize) { + static const int LhsStoreMode = LhsEval::IsAligned ? Aligned : Unaligned; + static const int RhsLoadMode = RhsEval::IsAligned ? Aligned : Unaligned; + eigen_assert(first % PacketSize == 0); + Index lastPacket = last - (last % PacketSize); + for (; i < lastPacket; i += PacketSize) { + dst.template writePacket<LhsStoreMode>(i, src.template packet<RhsLoadMode>(i)); + } + } + + for (; i < last; ++i) { + dst.coeffRef(i) = src.coeff(i); + } + } +}; + +template<typename Derived1, typename Derived2> +struct TensorAssignMultiThreaded +{ + typedef typename Derived1::Index Index; + static inline void run(Derived1& dst, const Derived2& src, const ThreadPoolDevice& device) + { + TensorEvaluator<Derived1> evalDst(dst); + TensorEvaluator<Derived2> evalSrc(src); + const Index size = dst.size(); + + static const bool Vectorizable = TensorEvaluator<Derived1>::PacketAccess & TensorEvaluator<Derived2>::PacketAccess; + static const int PacketSize = Vectorizable ? unpacket_traits<typename TensorEvaluator<Derived1>::PacketReturnType>::size : 1; + + int blocksz = static_cast<int>(ceil(static_cast<float>(size)/device.numThreads()) + PacketSize - 1); + const Index blocksize = std::max<Index>(PacketSize, (blocksz - (blocksz % PacketSize))); + const Index numblocks = size / blocksize; + + Index i = 0; + vector<std::future<void> > results; + results.reserve(numblocks); + for (int i = 0; i < numblocks; ++i) { + results.push_back(std::async(std::launch::async, &EvalRange<TensorEvaluator<Derived1>, TensorEvaluator<Derived2>, Index>::run, evalDst, evalSrc, i*blocksize, (i+1)*blocksize)); + } + + for (int i = 0; i < numblocks; ++i) { + results[i].get(); + } + + if (numblocks * blocksize < size) { + EvalRange<TensorEvaluator<Derived1>, TensorEvaluator<Derived2>, Index>::run(evalDst, evalSrc, numblocks * blocksize, size); + } + } +}; +#endif + + +// GPU: the evaluation of the expressions is offloaded to a GPU. +#ifdef EIGEN_USE_GPU +template <typename LhsEvaluator, typename RhsEvaluator> +__global__ void EigenMetaKernelNoCheck(LhsEvaluator evalDst, const RhsEvaluator evalSrc) { + const int index = blockIdx.x * blockDim.x + threadIdx.x; + evalDst.coeffRef(index) = evalSrc.coeff(index); +} +template <typename LhsEvaluator, typename RhsEvaluator> +__global__ void EigenMetaKernelPeel(LhsEvaluator evalDst, const RhsEvaluator evalSrc, int peel_start_offset, int size) { + const int index = peel_start_offset + blockIdx.x * blockDim.x + threadIdx.x; + if (index < size) { + evalDst.coeffRef(index) = evalSrc.coeff(index); + } +} + +template<typename Derived1, typename Derived2> +struct TensorAssignGpu +{ + typedef typename Derived1::Index Index; + static inline void run(Derived1& dst, const Derived2& src, const GpuDevice& device) + { + TensorEvaluator<Derived1> evalDst(dst); + TensorEvaluator<Derived2> evalSrc(src); + const Index size = dst.size(); + const int block_size = std::min<int>(size, 32*32); + const int num_blocks = size / block_size; + EigenMetaKernelNoCheck<TensorEvaluator<Derived1>, TensorEvaluator<Derived2> > <<<num_blocks, block_size, 0, device.stream()>>>(evalDst, evalSrc); + + const int remaining_items = size % block_size; + if (remaining_items > 0) { + const int peel_start_offset = num_blocks * block_size; + const int peel_block_size = std::min<int>(size, 32); + const int peel_num_blocks = (remaining_items + peel_block_size - 1) / peel_block_size; + EigenMetaKernelPeel<TensorEvaluator<Derived1>, TensorEvaluator<Derived2> > <<<peel_num_blocks, peel_block_size, 0, device.stream()>>>(evalDst, evalSrc, peel_start_offset, size); + } + } +}; +#endif + } // end namespace internal } // end namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 9c7783aaf..fa1bd3498 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -28,6 +28,7 @@ class TensorBase typedef typename internal::traits<Derived>::Scalar Scalar; typedef typename internal::traits<Derived>::Index Index; typedef Scalar CoeffReturnType; + typedef typename internal::packet_traits<Scalar>::type PacketReturnType; Derived& setZero() { return setConstant(Scalar(0)); @@ -83,6 +84,17 @@ class TensorBase return TensorCwiseBinaryOp<internal::scalar_sum_op<Scalar>, const Derived, const OtherDerived>(derived(), other.derived()); } + template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const Derived, const OtherDerived> + operator-(const OtherDerived& other) const { + return TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const Derived, const OtherDerived>(derived(), other.derived()); + } + + template <typename DeviceType> + TensorDevice<Derived, DeviceType> device(const DeviceType& device) { + return TensorDevice<Derived, DeviceType>(device, derived()); + } + protected: template <typename OtherDerived> friend class TensorBase; EIGEN_DEVICE_FUNC diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h new file mode 100644 index 000000000..71890e187 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h @@ -0,0 +1,83 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H + +namespace Eigen { + +/** \class TensorDevice + * \ingroup CXX11_Tensor_Module + * + * \brief Pseudo expression providing an operator = that will evaluate its argument + * on the specified computing 'device' (GPU, thread pool, ...) + * + * Example: + * C.device(EIGEN_GPU) = A + B; + * + * Todo: thread pools. + * Todo: operator +=, -=, *= and so on. + */ + +template <typename ExpressionType, typename DeviceType> class TensorDevice { + public: + TensorDevice(const DeviceType& device, ExpressionType& expression) : m_device(device), m_expression(expression) {} + + template<typename OtherDerived> + EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) { + internal::TensorAssign<ExpressionType, const OtherDerived>::run(m_expression, other); + return *this; + } + + protected: + const DeviceType& m_device; + ExpressionType& m_expression; +}; + + +#ifdef EIGEN_USE_THREADS +template <typename ExpressionType> class TensorDevice<ExpressionType, ThreadPoolDevice> { + public: + TensorDevice(const ThreadPoolDevice& device, ExpressionType& expression) : m_device(device), m_expression(expression) {} + + template<typename OtherDerived> + EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) { + internal::TensorAssignMultiThreaded<ExpressionType, const OtherDerived>::run(m_expression, other, m_device); + return *this; + } + + protected: + const ThreadPoolDevice& m_device; + ExpressionType& m_expression; +}; +#endif + + +#ifdef EIGEN_USE_GPU +template <typename ExpressionType> class TensorDevice<ExpressionType, GpuDevice> +{ + public: + TensorDevice(const GpuDevice& device, ExpressionType& expression) : m_device(device), m_expression(expression) {} + + template<typename OtherDerived> + EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) { + internal::TensorAssignGpu<ExpressionType, const OtherDerived>::run(m_expression, other, m_device); + return *this; + } + + protected: + const GpuDevice& m_device; + ExpressionType& m_expression; +}; +#endif + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h new file mode 100644 index 000000000..ded6ca604 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h @@ -0,0 +1,56 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H + + +namespace Eigen { + +// Default device for the machine (typically a single cpu core) +struct DefaultDevice { +}; + + +// Multiple cpu cores +// We should really use a thread pool here but first we need to find a portable thread pool library. +#ifdef EIGEN_USE_THREADS +struct ThreadPoolDevice { + ThreadPoolDevice(/*ThreadPool* pool, */size_t num_cores) : /*pool_(pool), */num_threads_(num_cores) { } + size_t numThreads() const { return num_threads_; } + /*ThreadPool* threadPool() const { return pool_; }*/ + + private: + // todo: NUMA, ... + size_t num_threads_; + /*ThreadPool* pool_;*/ +}; +#endif + + +// GPU offloading +#ifdef EIGEN_USE_GPU +struct GpuDevice { + // todo: support for multiple gpu; + GpuDevice() { + cudaStreamCreate(&stream_); + } + ~GpuDevice() { + cudaStreamDestroy(stream_); + } + const cudaStream_t& stream() const { return stream_; } + + private: + cudaStream_t stream_; +}; +#endif + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h index bd3bd5aca..43e9d6550 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -79,16 +79,16 @@ struct Sizes : internal::numeric_list<std::size_t, Indices...> { Sizes() { } template <typename DenseIndex> - explicit Sizes(const array<DenseIndex, Base::count>&/* indices*/) { + explicit Sizes(const array<DenseIndex, Base::count>& /*indices*/) { // todo: add assertion } #ifdef EIGEN_HAS_VARIADIC_TEMPLATES - explicit Sizes(std::initializer_list<std::size_t>/* l*/) { + explicit Sizes(std::initializer_list<std::size_t> /*l*/) { // todo: add assertion } #endif - template <typename T> Sizes& operator = (const T&/* other*/) { + template <typename T> Sizes& operator = (const T& /*other*/) { // add assertion failure if the size of other is different return *this; } @@ -119,7 +119,7 @@ template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0 static const size_t count = Base::count; static const std::size_t total_size = internal::arg_prod<Base>::value; - static const size_t TotalSize() { + static size_t TotalSize() { return internal::arg_prod<Base>::value; } @@ -181,14 +181,11 @@ template <typename DenseIndex, std::size_t NumDims> struct DSizes : array<DenseIndex, NumDims> { typedef array<DenseIndex, NumDims> Base; - size_t TotalSize() const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t TotalSize() const { return internal::array_prod(*static_cast<const Base*>(this)); } DSizes() { } -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - // explicit DSizes(std::initializer_list<DenseIndex> l) : Base(l) { } -#endif explicit DSizes(const array<DenseIndex, NumDims>& a) : Base(a) { } DSizes& operator = (const array<DenseIndex, NumDims>& other) { @@ -203,7 +200,6 @@ struct DSizes : array<DenseIndex, NumDims> { size_t IndexOfRowMajor(const array<DenseIndex, NumDims>& indices) const { return internal::tensor_index_linearization_helper<DenseIndex, NumDims, NumDims - 1, true>::run(indices, *static_cast<const Base*>(this)); } - }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index b0dbca041..3ce924dc3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -29,32 +29,38 @@ struct TensorEvaluator { typedef typename Derived::Index Index; typedef typename Derived::Scalar Scalar; - typedef typename Derived::Scalar& CoeffReturnType; + typedef typename Derived::Packet Packet; + typedef typename Derived::Scalar CoeffReturnType; + typedef typename Derived::Packet PacketReturnType; + + enum { + IsAligned = Derived::IsAligned, + PacketAccess = Derived::PacketAccess, + }; TensorEvaluator(Derived& m) : m_data(const_cast<Scalar*>(m.data())) { } - CoeffReturnType coeff(Index index) const { + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_data[index]; } - Scalar& coeffRef(Index index) { + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { return m_data[index]; } - // to do: vectorized evaluation. - /* template<int LoadMode> + template<int LoadMode> PacketReturnType packet(Index index) const { - return ploadt<PacketScalar, LoadMode>(m_data + index); + return internal::ploadt<Packet, LoadMode>(m_data + index); } - template<int StoreMode> - void writePacket(Index index, const PacketScalar& x) + template <int StoreMode> + void writePacket(Index index, const Packet& x) { - return pstoret<Scalar, PacketScalar, StoreMode>(const_cast<Scalar*>(m_data) + index, x); - }*/ + return internal::pstoret<Scalar, Packet, StoreMode>(m_data + index, x); + } protected: Scalar* m_data; @@ -70,6 +76,11 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType> > { typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType; + enum { + IsAligned = TensorEvaluator<ArgType>::IsAligned, + PacketAccess = TensorEvaluator<ArgType>::PacketAccess & internal::functor_traits<UnaryOp>::PacketAccess, + }; + TensorEvaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) @@ -77,12 +88,19 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType> > typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketReturnType PacketReturnType; - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_functor(m_argImpl.coeff(index)); } + template<int LoadMode> + EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const + { + return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index)); + } + private: const UnaryOp m_functor; TensorEvaluator<ArgType> m_argImpl; @@ -96,6 +114,12 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg { typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType; + enum { + IsAligned = TensorEvaluator<LeftArgType>::IsAligned & TensorEvaluator<RightArgType>::IsAligned, + PacketAccess = TensorEvaluator<LeftArgType>::PacketAccess & TensorEvaluator<RightArgType>::PacketAccess & + internal::functor_traits<BinaryOp>::PacketAccess, + }; + TensorEvaluator(const XprType& op) : m_functor(op.functor()), m_leftImpl(op.lhsExpression()), @@ -104,11 +128,17 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketReturnType PacketReturnType; - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index)); } + template<int LoadMode> + EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const + { + return m_functor.packetOp(m_leftImpl.template packet<LoadMode>(index), m_rightImpl.template packet<LoadMode>(index)); + } private: const BinaryOp m_functor; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h index aa875dc31..e32077f6e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h @@ -33,6 +33,9 @@ struct traits<TensorCwiseUnaryOp<UnaryOp, XprType> > typedef typename result_of< UnaryOp(typename XprType::Scalar) >::type Scalar; + typedef typename result_of< + UnaryOp(typename XprType::Packet) + >::type Packet; typedef typename XprType::Nested XprTypeNested; typedef typename remove_reference<XprTypeNested>::type _XprTypeNested; }; @@ -57,14 +60,16 @@ template<typename UnaryOp, typename XprType> class TensorCwiseUnaryOp : public TensorBase<TensorCwiseUnaryOp<UnaryOp, XprType> > { public: - typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename Eigen::internal::nested<TensorCwiseUnaryOp>::type Nested; - typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::Index Index; - - inline TensorCwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) + typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::Scalar Scalar; + typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::Packet Packet; + typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename Eigen::internal::nested<TensorCwiseUnaryOp>::type Nested; + typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::StorageKind StorageKind; + typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) : m_xpr(xpr), m_functor(func) {} EIGEN_DEVICE_FUNC @@ -92,6 +97,7 @@ struct traits<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> > typename RhsXprType::Scalar ) >::type Scalar; + typedef typename internal::packet_traits<Scalar>::type Packet; typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind, typename traits<RhsXprType>::StorageKind>::ret StorageKind; typedef typename promote_index_type<typename traits<LhsXprType>::Index, @@ -123,14 +129,17 @@ class TensorCwiseBinaryOp : public TensorBase<TensorCwiseBinaryOp<BinaryOp, LhsX { public: typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::Scalar Scalar; + typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::Packet Packet; typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; typedef typename internal::promote_storage_type<typename LhsXprType::CoeffReturnType, typename RhsXprType::CoeffReturnType>::ret CoeffReturnType; + typedef typename internal::promote_storage_type<typename LhsXprType::PacketReturnType, + typename RhsXprType::PacketReturnType>::ret PacketReturnType; typedef typename Eigen::internal::nested<TensorCwiseBinaryOp>::type Nested; typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::StorageKind StorageKind; typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::Index Index; - inline TensorCwiseBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const BinaryOp& func = BinaryOp()) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const BinaryOp& func = BinaryOp()) : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_functor(func) {} EIGEN_DEVICE_FUNC diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h index 953880123..dcc7ccd65 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h @@ -33,11 +33,17 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_, typedef typename internal::traits<Self>::StorageKind StorageKind; typedef typename internal::traits<Self>::Index Index; typedef Scalar_ Scalar; - typedef typename internal::packet_traits<Scalar>::type PacketScalar; + typedef typename internal::packet_traits<Scalar>::type Packet; typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename Base::CoeffReturnType CoeffReturnType; - static const int Options = Options_; + static const int Options = Options_; + + enum { + IsAligned = bool(EIGEN_ALIGN), + PacketAccess = true, + }; + typedef Dimensions_ Dimensions; static const std::size_t NumIndices = Dimensions::count; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index e8a2125c4..09b0fe66d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -14,12 +14,14 @@ namespace Eigen { template<typename Scalar_, std::size_t NumIndices_, int Options_ = 0> class Tensor; template<typename Scalar_, typename Dimensions, int Options_ = 0> class TensorFixedSize; -template<typename PlainObjectType> class TensorMap; +template<typename PlainObjectType, int Options_ = Unaligned> class TensorMap; template<typename Derived> class TensorBase; template<typename UnaryOp, typename XprType> class TensorCwiseUnaryOp; template<typename BinaryOp, typename LeftXprType, typename RightXprType> class TensorCwiseBinaryOp; +template<typename ExpressionType, typename DeviceType> class TensorDevice; + // Move to internal? template<typename Derived> struct TensorEvaluator; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h index bb0b39c5a..3fc9c5335 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h @@ -22,16 +22,16 @@ template<int InnerStrideAtCompileTime, int OuterStrideAtCompileTime> class Strid * */ -template<typename PlainObjectType> class TensorMap : public TensorBase<TensorMap<PlainObjectType> > +template<typename PlainObjectType, int Options_> class TensorMap : public TensorBase<TensorMap<PlainObjectType, Options_> > { public: - typedef TensorMap<PlainObjectType> Self; + typedef TensorMap<PlainObjectType, Options_> Self; typedef typename PlainObjectType::Base Base; typedef typename Eigen::internal::nested<Self>::type Nested; typedef typename internal::traits<PlainObjectType>::StorageKind StorageKind; typedef typename internal::traits<PlainObjectType>::Index Index; typedef typename internal::traits<PlainObjectType>::Scalar Scalar; - typedef typename internal::packet_traits<Scalar>::type PacketScalar; + typedef typename internal::packet_traits<Scalar>::type Packet; typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename Base::CoeffReturnType CoeffReturnType; @@ -43,13 +43,12 @@ template<typename PlainObjectType> class TensorMap : public TensorBase<TensorMap typedef Scalar* PointerType; typedef PointerType PointerArgType; - // Fixed size plain object type only - /* EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr) : m_data(dataPtr) { - // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. - //EIGEN_STATIC_ASSERT(1 == PlainObjectType::NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - // todo: add assert to ensure we don't screw up here. - }*/ + static const int Options = Options_; + + enum { + IsAligned = bool(EIGEN_ALIGN) && ((int(Options_)&Aligned)==Aligned), + PacketAccess = true, + }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension) : m_data(dataPtr), m_dimensions(array<DenseIndex, PlainObjectType::NumIndices>({{firstDimension}})) { @@ -65,7 +64,7 @@ template<typename PlainObjectType> class TensorMap : public TensorBase<TensorMap } #endif - inline TensorMap(PointerArgType dataPtr, const array<Index, PlainObjectType::NumIndices>& dimensions) + inline TensorMap(PointerArgType dataPtr, const array<Index, PlainObjectType::NumIndices>& dimensions) : m_data(dataPtr), m_dimensions(dimensions) { } @@ -81,11 +80,96 @@ template<typename PlainObjectType> class TensorMap : public TensorBase<TensorMap EIGEN_STRONG_INLINE const Scalar* data() const { return m_data; } EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, PlainObjectType::NumIndices>& indices) const + { + // eigen_assert(checkIndexRange(indices)); + if (PlainObjectType::Options&RowMajor) { + const Index index = m_dimensions.IndexOfRowMajor(indices); + return m_data[index]; + } else { + const Index index = m_dimensions.IndexOfColMajor(indices); + return m_data[index]; + } + } + +#ifdef EIGEN_HAS_VARIADIC_TEMPLATES + template<typename... IndexTypes> EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const + { + static_assert(sizeof...(otherIndices) + 1 == PlainObjectType::NumIndices, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); + if (PlainObjectType::Options&RowMajor) { + const Index index = m_dimensions.IndexOfRowMajor(array<Index, PlainObjectType::NumIndices>{{firstIndex, otherIndices...}}); + return m_data[index]; + } else { + const Index index = m_dimensions.IndexOfColMajor(array<Index, PlainObjectType::NumIndices>{{firstIndex, otherIndices...}}); + return m_data[index]; + } + } +#else + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const { eigen_internal_assert(index >= 0 && index < size()); return m_data[index]; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i1 + i0 * m_dimensions[0]; + return m_data[index]; + } else { + const Index index = i0 + i1 * m_dimensions[0]; + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i2 + m_dimensions[1] * (i1 + m_dimensions[0] * i0); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2); + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3)); + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0))); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4))); + return m_data[index]; + } + } +#endif + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, PlainObjectType::NumIndices>& indices) + { + // eigen_assert(checkIndexRange(indices)); + if (PlainObjectType::Options&RowMajor) { + const Index index = m_dimensions.IndexOfRowMajor(indices); + return m_data[index]; + } else { + const Index index = m_dimensions.IndexOfColMajor(indices); + return m_data[index]; + } + } #ifdef EIGEN_HAS_VARIADIC_TEMPLATES template<typename... IndexTypes> EIGEN_DEVICE_FUNC @@ -100,8 +184,60 @@ template<typename PlainObjectType> class TensorMap : public TensorBase<TensorMap return m_data[index]; } } +#else + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index index) + { + eigen_internal_assert(index >= 0 && index < size()); + return m_data[index]; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1) + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i1 + i0 * m_dimensions[0]; + return m_data[index]; + } else { + const Index index = i0 + i1 * m_dimensions[0]; + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2) + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i2 + m_dimensions[1] * (i1 + m_dimensions[0] * i0); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2); + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3) + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3)); + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0))); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4))); + return m_data[index]; + } + } #endif + template<typename OtherDerived> EIGEN_DEVICE_FUNC Self& operator=(const OtherDerived& other) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h b/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h index efcb39559..64098343e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h @@ -72,9 +72,6 @@ class TensorStorage<T, NumIndices_, Dynamic, Options_, void> TensorStorage() { } TensorStorage(const TensorStorage<T, NumIndices_, Dynamic, Options_, void>& other) : Base_(other) { } -#ifdef EIGEN_HAVE_RVALUE_REFERENCES - // TensorStorage(TensorStorage<T, NumIndices_, Dynamic, Options_, void>&&) = default; -#endif TensorStorage(internal::constructor_without_unaligned_array_assert) : Base_(internal::constructor_without_unaligned_array_assert()) {} TensorStorage(DenseIndex size, const array<DenseIndex, NumIndices_>& dimensions) : Base_(size, dimensions) {} @@ -111,22 +108,6 @@ class TensorStorage<T, NumIndices_, Dynamic, Options_, typename internal::gen_nu return *this; } -#ifdef EIGEN_HAVE_RVALUE_REFERENCES - /* TensorStorage(Self_&& other) - : m_data(std::move(other.m_data)), m_dimensions(std::move(other.m_dimensions)) - { - other.m_data = nullptr; - } - - Self_& operator=(Self_&& other) - { - using std::swap; - swap(m_data, other.m_data); - swap(m_dimensions, other.m_dimensions); - return *this; - }*/ -#endif - ~TensorStorage() { internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, internal::array_prod(m_dimensions)); } void swap(Self_& other) { std::swap(m_data,other.m_data); std::swap(m_dimensions,other.m_dimensions); } diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 31583d3ca..abc3375e5 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -104,4 +104,7 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_tensor_assign "-std=c++0x") ei_add_test(cxx11_tensor_expr "-std=c++0x") ei_add_test(cxx11_tensor_map "-std=c++0x") + ei_add_test(cxx11_tensor_device "-std=c++0x") +# ei_add_test(cxx11_tensor_fixed_size "-std=c++0x") + ei_add_test(cxx11_tensor_thread_pool "-std=c++0x") endif() diff --git a/unsupported/test/cxx11_tensor_device.cpp b/unsupported/test/cxx11_tensor_device.cpp new file mode 100644 index 000000000..9eb1d0420 --- /dev/null +++ b/unsupported/test/cxx11_tensor_device.cpp @@ -0,0 +1,126 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_device +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_USE_GPU + + +#include "main.h" +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; +using Eigen::RowMajor; + +// Context for evaluation on cpu +struct CPUContext { + CPUContext(const Eigen::Tensor<float, 3>& in1, Eigen::Tensor<float, 3>& in2, Eigen::Tensor<float, 3>& out) : in1_(in1), in2_(in2), out_(out) { } + + const Eigen::Tensor<float, 3>& in1() const { return in1_; } + const Eigen::Tensor<float, 3>& in2() const { return in2_; } + Eigen::TensorDevice<Eigen::Tensor<float, 3>, Eigen::DefaultDevice> out() { return TensorDevice<Eigen::Tensor<float, 3>, Eigen::DefaultDevice>(cpu_device_, out_); } + + private: + const Eigen::Tensor<float, 3>& in1_; + const Eigen::Tensor<float, 3>& in2_; + Eigen::Tensor<float, 3>& out_; + + Eigen::DefaultDevice cpu_device_; +}; + + +// Context for evaluation on GPU +struct GPUContext { + GPUContext(const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in1, Eigen::TensorMap<Eigen::Tensor<float, 3> >& in2, Eigen::TensorMap<Eigen::Tensor<float, 3> >& out) : in1_(in1), in2_(in2), out_(out) { } + + const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in1() const { return in1_; } + const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in2() const { return in2_; } + Eigen::TensorDevice<Eigen::TensorMap<Eigen::Tensor<float, 3> >, Eigen::GpuDevice> out() { return TensorDevice<Eigen::TensorMap<Eigen::Tensor<float, 3> >, Eigen::GpuDevice>(gpu_device_, out_); } + + private: + const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in1_; + const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in2_; + Eigen::TensorMap<Eigen::Tensor<float, 3> >& out_; + Eigen::GpuDevice gpu_device_; +}; + + +// The actual expression to evaluate +template <typename Context> +static void test_contextual_eval(Context* context) +{ + context->out() = context->in1() + context->in2() * 3.14f; +} + +static void test_cpu() { + Eigen::Tensor<float, 3> in1(Eigen::array<int, 3>(2,3,7)); + Eigen::Tensor<float, 3> in2(Eigen::array<int, 3>(2,3,7)); + Eigen::Tensor<float, 3> out(Eigen::array<int, 3>(2,3,7)); + + in1.setRandom(); + in2.setRandom(); + CPUContext context(in1, in2, out); + test_contextual_eval(&context); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(out(Eigen::array<int, 3>(i,j,k)), in1(Eigen::array<int, 3>(i,j,k)) + in2(Eigen::array<int, 3>(i,j,k)) * 3.14f); + } + } + } +} + +static void test_gpu() { + Eigen::Tensor<float, 3> in1(Eigen::array<int, 3>(2,3,7)); + Eigen::Tensor<float, 3> in2(Eigen::array<int, 3>(2,3,7)); + Eigen::Tensor<float, 3> out(Eigen::array<int, 3>(2,3,7)); + in1.setRandom(); + in2.setRandom(); + + std::size_t in1_bytes = in1.size() * sizeof(float); + std::size_t in2_bytes = in2.size() * sizeof(float); + std::size_t out_bytes = out.size() * sizeof(float); + + float* d_in1; + float* d_in2; + float* d_out; + cudaMalloc((void**)(&d_in1), in1_bytes); + cudaMalloc((void**)(&d_in2), in2_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_in1, in1.data(), in1_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in2, in2.data(), in2_bytes, cudaMemcpyHostToDevice); + + Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in1(d_in1, Eigen::array<int, 3>(2,3,7)); + Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in2(d_in2, Eigen::array<int, 3>(2,3,7)); + Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_out(d_out, Eigen::array<int, 3>(2,3,7)); + + GPUContext context(gpu_in1, gpu_in2, gpu_out); + test_contextual_eval(&context); + + cudaMemcpy(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(out(Eigen::array<int, 3>(i,j,k)), in1(Eigen::array<int, 3>(i,j,k)) + in2(Eigen::array<int, 3>(i,j,k)) * 3.14f); + } + } + } +} + + + +void test_cxx11_tensor_device() +{ + CALL_SUBTEST(test_cpu()); + CALL_SUBTEST(test_gpu()); +} diff --git a/unsupported/test/cxx11_tensor_fixed_size.cpp b/unsupported/test/cxx11_tensor_fixed_size.cpp index c1d74d881..214f6951d 100644 --- a/unsupported/test/cxx11_tensor_fixed_size.cpp +++ b/unsupported/test/cxx11_tensor_fixed_size.cpp @@ -159,9 +159,37 @@ static void test_3d() } +static void test_array() +{ + TensorFixedSize<float, Sizes<2, 3, 7> > mat1; + float val = 0.0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + mat1(array<ptrdiff_t, 3>(i,j,k)) = val; + val += 1.0; + } + } + } + + TensorFixedSize<float, Sizes<2, 3, 7> > mat3; + mat3 = mat1.cwisePow(3.5f); + + val = 0.0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(mat3(array<ptrdiff_t, 3>(i,j,k)), powf(val, 3.5f)); + val += 1.0; + } + } + } +} + void test_cxx11_tensor_fixed_size() { CALL_SUBTEST(test_1d()); CALL_SUBTEST(test_2d()); CALL_SUBTEST(test_3d()); + CALL_SUBTEST(test_array()); } diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp new file mode 100644 index 000000000..c9de71da3 --- /dev/null +++ b/unsupported/test/cxx11_tensor_thread_pool.cpp @@ -0,0 +1,37 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_USE_THREADS + + +#include "main.h" +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +void test_cxx11_tensor_thread_pool() +{ + Eigen::Tensor<float, 3> in1(Eigen::array<ptrdiff_t, 3>(2,3,7)); + Eigen::Tensor<float, 3> in2(Eigen::array<ptrdiff_t, 3>(2,3,7)); + Eigen::Tensor<float, 3> out(Eigen::array<ptrdiff_t, 3>(2,3,7)); + + in1.setRandom(); + in2.setRandom(); + + Eigen::ThreadPoolDevice thread_pool_device(3); + out.device(thread_pool_device) = in1 + in2 * 3.14; + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(out(Eigen::array<ptrdiff_t, 3>(i,j,k)), in1(Eigen::array<ptrdiff_t, 3>(i,j,k)) + in2(Eigen::array<ptrdiff_t, 3>(i,j,k)) * 3.14f); + } + } + } +} |