diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-06-04 09:21:48 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-06-04 09:21:48 -0700 |
commit | 6fa6cdd2b988da98cbdd2b1a5fd2fd3b9d56a4b1 (patch) | |
tree | 195d19a0318e92323a6148570c7e68831c3c77b2 /unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h | |
parent | 736267cf6b17832a571acf7e34ca07c7f55907ee (diff) |
Added support for tensor contractions
Updated expression evaluation mechanism to also compute the size of the tensor result
Misc fixes and improvements.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h | 5 |
1 files changed, 2 insertions, 3 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h index e69ff6188..da1eb62cb 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h @@ -53,7 +53,6 @@ template<typename Derived1, typename Derived2> struct TensorAssign<Derived1, Derived2, true> { typedef typename Derived1::Index Index; - EIGEN_DEVICE_FUNC static inline void run(Derived1& dst, const Derived2& src) { TensorEvaluator<Derived1> evalDst(dst); @@ -63,7 +62,7 @@ struct TensorAssign<Derived1, Derived2, true> static const int LhsStoreMode = TensorEvaluator<Derived1>::IsAligned ? Aligned : Unaligned; static const int RhsLoadMode = TensorEvaluator<Derived2>::IsAligned ? Aligned : Unaligned; static const int PacketSize = unpacket_traits<typename TensorEvaluator<Derived1>::PacketReturnType>::size; - static const int VectorizedSize = (size / PacketSize) * PacketSize; + const int VectorizedSize = (size / PacketSize) * PacketSize; for (Index i = 0; i < VectorizedSize; i += PacketSize) { evalDst.template writePacket<LhsStoreMode>(i, evalSrc.template packet<RhsLoadMode>(i)); @@ -148,7 +147,7 @@ struct TensorAssignMultiThreaded // GPU: the evaluation of the expressions is offloaded to a GPU. -#ifdef EIGEN_USE_GPU +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) template <typename LhsEvaluator, typename RhsEvaluator> __global__ void EigenMetaKernelNoCheck(LhsEvaluator evalDst, const RhsEvaluator evalSrc) { const int index = blockIdx.x * blockDim.x + threadIdx.x; |