Misc improvements and optimizations

author: Benoit Steiner <benoit.steiner.goog@gmail.com> 2015-07-01 13:59:11 -0700
committer: Benoit Steiner <benoit.steiner.goog@gmail.com> 2015-07-01 13:59:11 -0700
commit: 1e911b276c6a5ed85b036abf07d848994d6dc144 (patch)
tree: 321af6ab7a12ade43302c4de152e439ffa589f63
parent: 4ed213f97b621db9dff44291040109e42a701fdb (diff)
3 files changed, 8 insertions, 6 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
index 07cba649b..88db9d410 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
@@ -510,7 +510,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
       Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
       typedef TensorEvalToOp<const KernelArgType> EvalTo;
       EvalTo evalToTmp(local, m_kernelArg);
-      internal::TensorExecutor<const EvalTo, Device>::run(evalToTmp, m_device);
+      const bool PacketAccess = internal::IsVectorizable<Device, KernelArgType>::value;
+      internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device);
 
       m_kernel = local;
       m_local_kernel = true;
@@ -815,7 +816,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
       Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
       typedef TensorEvalToOp<const KernelArgType> EvalTo;
       EvalTo evalToTmp(local, m_kernelArg);
-      internal::TensorExecutor<const EvalTo, GpuDevice>::run(evalToTmp, m_device);
+      const bool PacketAccess = internal::IsVectorizable<GpuDevice, KernelArgType>::value;
+      internal::TensorExecutor<const EvalTo, GpuDevice, PacketAccess>::run(evalToTmp, m_device);
 
       m_kernel = local;
       m_local_kernel = true;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
index 883e6cab1..ff4373f59 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
@@ -113,9 +113,9 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device>
 
   EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
-    m_impl.evalSubExprsIfNeeded(NULL);
-    return true;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* scalar) {
+    eigen_assert(scalar == NULL);
+    return m_impl.evalSubExprsIfNeeded(m_buffer);
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
index d2eae35fd..d253b70f3 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
@@ -116,7 +116,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
     }
     typedef TensorEvalToOp<const ArgType> EvalTo;
     EvalTo evalToTmp(m_buffer, m_op);
-    static const bool PacketAccess = internal::IsVectorizable<Device, ArgType>::value;
+    const bool PacketAccess = internal::IsVectorizable<Device, ArgType>::value;
     internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device);
     m_impl.cleanup();
     return true;
author	Benoit Steiner <benoit.steiner.goog@gmail.com>	2015-07-01 13:59:11 -0700
committer	Benoit Steiner <benoit.steiner.goog@gmail.com>	2015-07-01 13:59:11 -0700
commit	1e911b276c6a5ed85b036abf07d848994d6dc144 (patch)
tree	321af6ab7a12ade43302c4de152e439ffa589f63
parent	4ed213f97b621db9dff44291040109e42a701fdb (diff)