diff options
author | 2015-04-21 16:15:45 -0700 | |
---|---|---|
committer | 2015-04-21 16:15:45 -0700 | |
commit | dfa991cbae98cde7db5aef5ff1bb4b3d51cc362b (patch) | |
tree | 993241653d26567b1904263f48a7e275bae08806 /unsupported | |
parent | e7094883610137e784e845c6e60c3ea920a91deb (diff) |
Make sure that the copy constructor of the evaluator is always called before launching the evaluation of a tensor expression on a cuda device.
Diffstat (limited to 'unsupported')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 12 |
1 files changed, 10 insertions, 2 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index bb2f8b977..02e1667b9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -157,7 +157,11 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable> template <typename Evaluator, typename Index> __global__ void __launch_bounds__(1024) -EigenMetaKernel_NonVectorizable(Evaluator eval, Index size) { +EigenMetaKernel_NonVectorizable(Evaluator memcopied_eval, Index size) { + // Cuda memcopies the kernel arguments. That's fine for POD, but for more + // complex types such as evaluators we should really conform to the C++ + // standard and call a proper copy constructor. + Evaluator eval(memcopied_eval); const Index first_index = blockIdx.x * blockDim.x + threadIdx.x; const Index step_size = blockDim.x * gridDim.x; @@ -171,7 +175,11 @@ EigenMetaKernel_NonVectorizable(Evaluator eval, Index size) { template <typename Evaluator, typename Index> __global__ void __launch_bounds__(1024) -EigenMetaKernel_Vectorizable(Evaluator eval, Index size) { +EigenMetaKernel_Vectorizable(Evaluator memcopied_eval, Index size) { + // Cuda memcopies the kernel arguments. That's fine for POD, but for more + // complex types such as evaluators we should really conform to the C++ + // standard and call a proper copy constructor. + Evaluator eval(memcopied_eval); const Index first_index = blockIdx.x * blockDim.x + threadIdx.x; const Index step_size = blockDim.x * gridDim.x; |