diff options
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h index 07cba649b..88db9d410 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -510,7 +510,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr Scalar* local = (Scalar*)m_device.allocate(kernel_sz); typedef TensorEvalToOp<const KernelArgType> EvalTo; EvalTo evalToTmp(local, m_kernelArg); - internal::TensorExecutor<const EvalTo, Device>::run(evalToTmp, m_device); + const bool PacketAccess = internal::IsVectorizable<Device, KernelArgType>::value; + internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device); m_kernel = local; m_local_kernel = true; @@ -815,7 +816,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr Scalar* local = (Scalar*)m_device.allocate(kernel_sz); typedef TensorEvalToOp<const KernelArgType> EvalTo; EvalTo evalToTmp(local, m_kernelArg); - internal::TensorExecutor<const EvalTo, GpuDevice>::run(evalToTmp, m_device); + const bool PacketAccess = internal::IsVectorizable<GpuDevice, KernelArgType>::value; + internal::TensorExecutor<const EvalTo, GpuDevice, PacketAccess>::run(evalToTmp, m_device); m_kernel = local; m_local_kernel = true; |