diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-11-18 14:32:41 -0800 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-11-18 14:32:41 -0800 |
commit | b33cf92878a57ec86d5e5715e7cde3a0cd360fd6 (patch) | |
tree | 9c880ab065237dcfa0ccfb160c4f8183e9733e5e /unsupported/Eigen | |
parent | 1d3c8306f87b284c26180be6eac13dc8d4aa1b52 (diff) |
Fixed the evaluation of expressions involving tensors of 2 or 3 elements on CUDA devices.
Diffstat (limited to 'unsupported/Eigen')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 5 |
1 files changed, 2 insertions, 3 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 4fa8e83ef..f27f643c1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -168,11 +168,10 @@ __launch_bounds__(1024) const Index PacketSize = unpacket_traits<typename Evaluator::PacketReturnType>::size; const Index vectorized_step_size = step_size * PacketSize; const Index vectorized_size = (size / PacketSize) * PacketSize; - Index i = first_index * PacketSize; - for ( ; i < vectorized_size; i += vectorized_step_size) { + for (Index i = first_index * PacketSize; i < vectorized_size; i += vectorized_step_size) { eval.evalPacket(i); } - for ( ; i < size; i += step_size) { + for (Index i = vectorized_size + first_index; i < size; i += step_size) { eval.evalScalar(i); } } |