diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2015-07-01 13:59:11 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2015-07-01 13:59:11 -0700 |
commit | 1e911b276c6a5ed85b036abf07d848994d6dc144 (patch) | |
tree | 321af6ab7a12ade43302c4de152e439ffa589f63 | |
parent | 4ed213f97b621db9dff44291040109e42a701fdb (diff) |
Misc improvements and optimizations
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h | 6 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h | 6 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h | 2 |
3 files changed, 8 insertions, 6 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h index 07cba649b..88db9d410 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -510,7 +510,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr Scalar* local = (Scalar*)m_device.allocate(kernel_sz); typedef TensorEvalToOp<const KernelArgType> EvalTo; EvalTo evalToTmp(local, m_kernelArg); - internal::TensorExecutor<const EvalTo, Device>::run(evalToTmp, m_device); + const bool PacketAccess = internal::IsVectorizable<Device, KernelArgType>::value; + internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device); m_kernel = local; m_local_kernel = true; @@ -815,7 +816,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr Scalar* local = (Scalar*)m_device.allocate(kernel_sz); typedef TensorEvalToOp<const KernelArgType> EvalTo; EvalTo evalToTmp(local, m_kernelArg); - internal::TensorExecutor<const EvalTo, GpuDevice>::run(evalToTmp, m_device); + const bool PacketAccess = internal::IsVectorizable<GpuDevice, KernelArgType>::value; + internal::TensorExecutor<const EvalTo, GpuDevice, PacketAccess>::run(evalToTmp, m_device); m_kernel = local; m_local_kernel = true; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h index 883e6cab1..ff4373f59 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h @@ -113,9 +113,9 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device> EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* scalar) { + eigen_assert(scalar == NULL); + return m_impl.evalSubExprsIfNeeded(m_buffer); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index d2eae35fd..d253b70f3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -116,7 +116,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device> } typedef TensorEvalToOp<const ArgType> EvalTo; EvalTo evalToTmp(m_buffer, m_op); - static const bool PacketAccess = internal::IsVectorizable<Device, ArgType>::value; + const bool PacketAccess = internal::IsVectorizable<Device, ArgType>::value; internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device); m_impl.cleanup(); return true; |