Make sure that the copy constructor of the evaluator is always called before launching the evaluation of a tensor expression on a cuda device.

author: Benoit Steiner <benoit.steiner.goog@gmail.com> 2015-04-21 16:15:45 -0700
committer: Benoit Steiner <benoit.steiner.goog@gmail.com> 2015-04-21 16:15:45 -0700
commit: dfa991cbae98cde7db5aef5ff1bb4b3d51cc362b (patch)
tree: 993241653d26567b1904263f48a7e275bae08806 /unsupported
parent: e7094883610137e784e845c6e60c3ea920a91deb (diff)
1 files changed, 10 insertions, 2 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index bb2f8b977..02e1667b9 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -157,7 +157,11 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable>
 template <typename Evaluator, typename Index>
 __global__ void
 __launch_bounds__(1024)
-EigenMetaKernel_NonVectorizable(Evaluator eval, Index size) {
+EigenMetaKernel_NonVectorizable(Evaluator memcopied_eval, Index size) {
+  // Cuda memcopies the kernel arguments. That's fine for POD, but for more
+  // complex types such as evaluators we should really conform to the C++
+  // standard and call a proper copy constructor.
+  Evaluator eval(memcopied_eval);
 
   const Index first_index = blockIdx.x * blockDim.x + threadIdx.x;
   const Index step_size = blockDim.x * gridDim.x;
@@ -171,7 +175,11 @@ EigenMetaKernel_NonVectorizable(Evaluator eval, Index size) {
 template <typename Evaluator, typename Index>
 __global__ void
 __launch_bounds__(1024)
-EigenMetaKernel_Vectorizable(Evaluator eval, Index size) {
+EigenMetaKernel_Vectorizable(Evaluator memcopied_eval, Index size) {
+  // Cuda memcopies the kernel arguments. That's fine for POD, but for more
+  // complex types such as evaluators we should really conform to the C++
+  // standard and call a proper copy constructor.
+  Evaluator eval(memcopied_eval);
 
   const Index first_index = blockIdx.x * blockDim.x + threadIdx.x;
   const Index step_size = blockDim.x * gridDim.x;
author	Benoit Steiner <benoit.steiner.goog@gmail.com>	2015-04-21 16:15:45 -0700
committer	Benoit Steiner <benoit.steiner.goog@gmail.com>	2015-04-21 16:15:45 -0700
commit	dfa991cbae98cde7db5aef5ff1bb4b3d51cc362b (patch)
tree	993241653d26567b1904263f48a7e275bae08806 /unsupported
parent	e7094883610137e784e845c6e60c3ea920a91deb (diff)