aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-06-04 09:21:48 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-06-04 09:21:48 -0700
commit6fa6cdd2b988da98cbdd2b1a5fd2fd3b9d56a4b1 (patch)
tree195d19a0318e92323a6148570c7e68831c3c77b2 /unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
parent736267cf6b17832a571acf7e34ca07c7f55907ee (diff)
Added support for tensor contractions
Updated expression evaluation mechanism to also compute the size of the tensor result Misc fixes and improvements.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h5
1 files changed, 2 insertions, 3 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
index e69ff6188..da1eb62cb 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
@@ -53,7 +53,6 @@ template<typename Derived1, typename Derived2>
struct TensorAssign<Derived1, Derived2, true>
{
typedef typename Derived1::Index Index;
- EIGEN_DEVICE_FUNC
static inline void run(Derived1& dst, const Derived2& src)
{
TensorEvaluator<Derived1> evalDst(dst);
@@ -63,7 +62,7 @@ struct TensorAssign<Derived1, Derived2, true>
static const int LhsStoreMode = TensorEvaluator<Derived1>::IsAligned ? Aligned : Unaligned;
static const int RhsLoadMode = TensorEvaluator<Derived2>::IsAligned ? Aligned : Unaligned;
static const int PacketSize = unpacket_traits<typename TensorEvaluator<Derived1>::PacketReturnType>::size;
- static const int VectorizedSize = (size / PacketSize) * PacketSize;
+ const int VectorizedSize = (size / PacketSize) * PacketSize;
for (Index i = 0; i < VectorizedSize; i += PacketSize) {
evalDst.template writePacket<LhsStoreMode>(i, evalSrc.template packet<RhsLoadMode>(i));
@@ -148,7 +147,7 @@ struct TensorAssignMultiThreaded
// GPU: the evaluation of the expressions is offloaded to a GPU.
-#ifdef EIGEN_USE_GPU
+#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
template <typename LhsEvaluator, typename RhsEvaluator>
__global__ void EigenMetaKernelNoCheck(LhsEvaluator evalDst, const RhsEvaluator evalSrc) {
const int index = blockIdx.x * blockDim.x + threadIdx.x;