aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-11-18 14:32:41 -0800
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-11-18 14:32:41 -0800
commitb33cf92878a57ec86d5e5715e7cde3a0cd360fd6 (patch)
tree9c880ab065237dcfa0ccfb160c4f8183e9733e5e /unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
parent1d3c8306f87b284c26180be6eac13dc8d4aa1b52 (diff)
Fixed the evaluation of expressions involving tensors of 2 or 3 elements on CUDA devices.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h5
1 files changed, 2 insertions, 3 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index 4fa8e83ef..f27f643c1 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -168,11 +168,10 @@ __launch_bounds__(1024)
const Index PacketSize = unpacket_traits<typename Evaluator::PacketReturnType>::size;
const Index vectorized_step_size = step_size * PacketSize;
const Index vectorized_size = (size / PacketSize) * PacketSize;
- Index i = first_index * PacketSize;
- for ( ; i < vectorized_size; i += vectorized_step_size) {
+ for (Index i = first_index * PacketSize; i < vectorized_size; i += vectorized_step_size) {
eval.evalPacket(i);
}
- for ( ; i < size; i += step_size) {
+ for (Index i = vectorized_size + first_index; i < size; i += step_size) {
eval.evalScalar(i);
}
}