aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2015-11-11 15:22:50 -0800
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2015-11-11 15:22:50 -0800
commit7f1c29fb0c26e92f31446926c441e13b0f6aec68 (patch)
tree3deb8c9f5c1d0cdc97d75720cdb5e743d3e1411b /unsupported
parent4f471146fbb22dab20896ae2b1c31d0f549cd10e (diff)
Make it possible for a vectorized tensor expression to be executed in a CUDA kernel.
Diffstat (limited to 'unsupported')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h3
1 files changed, 2 insertions, 1 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index bc06ca1f0..956672771 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -50,6 +50,7 @@ class TensorExecutor<Expression, DefaultDevice, true>
{
public:
typedef typename Expression::Index Index;
+ EIGEN_DEVICE_FUNC
static inline void run(const Expression& expr, const DefaultDevice& device = DefaultDevice())
{
TensorEvaluator<Expression, DefaultDevice> evaluator(expr, device);
@@ -57,7 +58,7 @@ class TensorExecutor<Expression, DefaultDevice, true>
if (needs_assign)
{
const Index size = array_prod(evaluator.dimensions());
- static const int PacketSize = unpacket_traits<typename TensorEvaluator<Expression, DefaultDevice>::PacketReturnType>::size;
+ const int PacketSize = unpacket_traits<typename TensorEvaluator<Expression, DefaultDevice>::PacketReturnType>::size;
const Index VectorizedSize = (size / PacketSize) * PacketSize;
for (Index i = 0; i < VectorizedSize; i += PacketSize) {