aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2015-07-01 13:59:11 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2015-07-01 13:59:11 -0700
commit1e911b276c6a5ed85b036abf07d848994d6dc144 (patch)
tree321af6ab7a12ade43302c4de152e439ffa589f63 /unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
parent4ed213f97b621db9dff44291040109e42a701fdb (diff)
Misc improvements and optimizations
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h6
1 files changed, 4 insertions, 2 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
index 07cba649b..88db9d410 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
@@ -510,7 +510,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
typedef TensorEvalToOp<const KernelArgType> EvalTo;
EvalTo evalToTmp(local, m_kernelArg);
- internal::TensorExecutor<const EvalTo, Device>::run(evalToTmp, m_device);
+ const bool PacketAccess = internal::IsVectorizable<Device, KernelArgType>::value;
+ internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device);
m_kernel = local;
m_local_kernel = true;
@@ -815,7 +816,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
typedef TensorEvalToOp<const KernelArgType> EvalTo;
EvalTo evalToTmp(local, m_kernelArg);
- internal::TensorExecutor<const EvalTo, GpuDevice>::run(evalToTmp, m_device);
+ const bool PacketAccess = internal::IsVectorizable<GpuDevice, KernelArgType>::value;
+ internal::TensorExecutor<const EvalTo, GpuDevice, PacketAccess>::run(evalToTmp, m_device);
m_kernel = local;
m_local_kernel = true;