Make it possible for a vectorized tensor expression to be executed in a CUDA kernel.

author: Benoit Steiner <benoit.steiner.goog@gmail.com> 2015-11-11 15:22:50 -0800
committer: Benoit Steiner <benoit.steiner.goog@gmail.com> 2015-11-11 15:22:50 -0800
commit: 7f1c29fb0c26e92f31446926c441e13b0f6aec68 (patch)
tree: 3deb8c9f5c1d0cdc97d75720cdb5e743d3e1411b /unsupported
parent: 4f471146fbb22dab20896ae2b1c31d0f549cd10e (diff)
1 files changed, 2 insertions, 1 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index bc06ca1f0..956672771 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -50,6 +50,7 @@ class TensorExecutor<Expression, DefaultDevice, true>
 {
  public:
   typedef typename Expression::Index Index;
+  EIGEN_DEVICE_FUNC
   static inline void run(const Expression& expr, const DefaultDevice& device = DefaultDevice())
   {
     TensorEvaluator<Expression, DefaultDevice> evaluator(expr, device);
@@ -57,7 +58,7 @@ class TensorExecutor<Expression, DefaultDevice, true>
     if (needs_assign)
     {
       const Index size = array_prod(evaluator.dimensions());
-      static const int PacketSize = unpacket_traits<typename TensorEvaluator<Expression, DefaultDevice>::PacketReturnType>::size;
+      const int PacketSize = unpacket_traits<typename TensorEvaluator<Expression, DefaultDevice>::PacketReturnType>::size;
       const Index VectorizedSize = (size / PacketSize) * PacketSize;
 
       for (Index i = 0; i < VectorizedSize; i += PacketSize) {
author	Benoit Steiner <benoit.steiner.goog@gmail.com>	2015-11-11 15:22:50 -0800
committer	Benoit Steiner <benoit.steiner.goog@gmail.com>	2015-11-11 15:22:50 -0800
commit	7f1c29fb0c26e92f31446926c441e13b0f6aec68 (patch)
tree	3deb8c9f5c1d0cdc97d75720cdb5e743d3e1411b /unsupported
parent	4f471146fbb22dab20896ae2b1c31d0f549cd10e (diff)