diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-10-04 08:52:13 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-10-04 08:52:13 -0700 |
commit | 6af5ac7e2749bdea7a31323855ef3b4333b91c3e (patch) | |
tree | bb563ba9fee6752ebff448e25588ffb45b4c1d62 /unsupported | |
parent | 2f6d1607c84bd828e77a44465e0dccfd3524d7a6 (diff) |
Cleanup the cuda executor code.
Diffstat (limited to 'unsupported')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 7 |
1 files changed, 1 insertions, 6 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index a116bf17f..0cac7b179 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -234,16 +234,11 @@ struct EigenMetaKernelEval<Evaluator, Index, true> { template <typename Evaluator, typename Index> __global__ void __launch_bounds__(1024) -EigenMetaKernel(Evaluator memcopied_eval, Index size) { +EigenMetaKernel(Evaluator eval, Index size) { const Index first_index = blockIdx.x * blockDim.x + threadIdx.x; const Index step_size = blockDim.x * gridDim.x; - // Cuda memcopies the kernel arguments. That's fine for POD, but for more - // complex types such as evaluators we should really conform to the C++ - // standard and call a proper copy constructor. - Evaluator eval(memcopied_eval); - const bool vectorizable = Evaluator::PacketAccess & Evaluator::IsAligned; EigenMetaKernelEval<Evaluator, Index, vectorizable>::run(eval, first_index, size, step_size); } |