aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2015-07-01 11:32:04 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2015-07-01 11:32:04 -0700
commit925d0d375a05f09c3e3abb91f29fc2513bb25d0d (patch)
treec88a75d40e6e005ea6b6c0630c74a2707a8d7455 /unsupported/Eigen/CXX11/src/Tensor
parent44eedd89159d75b319bc714775b58bb54eb92927 (diff)
Enabled the vectorized evaluation of several tensor expressions that was previously disabled by mistake
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h6
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h6
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h11
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h21
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h10
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h4
11 files changed, 42 insertions, 34 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
index 3b99ef069..2ef5ff205 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
@@ -106,8 +106,7 @@ class TensorChippingOp : public TensorBase<TensorChippingOp<DimId, XprType> >
{
typedef TensorAssignOp<TensorChippingOp, const TensorChippingOp> Assign;
Assign assign(*this, other);
- static const bool Vectorize = TensorEvaluator<const Assign, DefaultDevice>::PacketAccess;
- internal::TensorExecutor<const Assign, DefaultDevice, Vectorize>::run(assign, DefaultDevice());
+ internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
return *this;
}
@@ -117,8 +116,7 @@ class TensorChippingOp : public TensorBase<TensorChippingOp<DimId, XprType> >
{
typedef TensorAssignOp<TensorChippingOp, const OtherDerived> Assign;
Assign assign(*this, other);
- static const bool Vectorize = TensorEvaluator<const Assign, DefaultDevice>::PacketAccess;
- internal::TensorExecutor<const Assign, DefaultDevice, Vectorize>::run(assign, DefaultDevice());
+ internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
return *this;
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h
index 6979fb4ec..759e8208f 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h
@@ -88,7 +88,7 @@ class TensorConcatenationOp : public TensorBase<TensorConcatenationOp<Axis, LhsX
{
typedef TensorAssignOp<TensorConcatenationOp, const TensorConcatenationOp> Assign;
Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
+ internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
return *this;
}
@@ -98,7 +98,7 @@ class TensorConcatenationOp : public TensorBase<TensorConcatenationOp<Axis, LhsX
{
typedef TensorAssignOp<TensorConcatenationOp, const OtherDerived> Assign;
Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
+ internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
return *this;
}
@@ -334,7 +334,7 @@ template<typename Axis, typename LeftArgType, typename RightArgType, typename De
eigen_assert(index + packetSize - 1 < this->dimensions().TotalSize());
EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize];
- PacketReturnType rslt = internal::pstore<PacketReturnType>(values, x);
+ internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
for (int i = 0; i < packetSize; ++i) {
coeffRef(index+i) = values[i];
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
index 6b8f71b96..07cba649b 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
@@ -510,7 +510,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
typedef TensorEvalToOp<const KernelArgType> EvalTo;
EvalTo evalToTmp(local, m_kernelArg);
- internal::TensorExecutor<const EvalTo, Device, TensorEvaluator<KernelArgType, Device>::PacketAccess>::run(evalToTmp, m_device);
+ internal::TensorExecutor<const EvalTo, Device>::run(evalToTmp, m_device);
m_kernel = local;
m_local_kernel = true;
@@ -815,7 +815,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
typedef TensorEvalToOp<const KernelArgType> EvalTo;
EvalTo evalToTmp(local, m_kernelArg);
- internal::TensorExecutor<const EvalTo, GpuDevice, TensorEvaluator<KernelArgType, GpuDevice>::PacketAccess>::run(evalToTmp, m_device);
+ internal::TensorExecutor<const EvalTo, GpuDevice>::run(evalToTmp, m_device);
m_kernel = local;
m_local_kernel = true;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index 6ea588e4b..24606b0c8 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -22,13 +22,8 @@ namespace Eigen {
*/
namespace internal {
-template <typename Device, typename Expression>
-struct IsVectorizable {
- static const bool value = TensorEvaluator<Expression, Device>::PacketAccess;
-};
-
// Default strategy: the expression is evaluated with a single cpu thread.
-template<typename Expression, typename Device = DefaultDevice, bool Vectorizable = IsVectorizable<Device, Expression>::value>
+template<typename Expression, typename Device, bool Vectorizable>
class TensorExecutor
{
public:
@@ -198,10 +193,6 @@ EigenMetaKernel_Vectorizable(Evaluator memcopied_eval, Index size) {
}
}
-template <typename Expression>
-struct IsVectorizable<GpuDevice, Expression> {
- static const bool value = TensorEvaluator<Expression, GpuDevice>::PacketAccess && TensorEvaluator<Expression, GpuDevice>::IsAligned;
-};
template<typename Expression>
class TensorExecutor<Expression, GpuDevice, false>
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
index 41a36cb75..bd32249b6 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
@@ -116,7 +116,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
}
typedef TensorEvalToOp<const ArgType> EvalTo;
EvalTo evalToTmp(m_buffer, m_op);
- internal::TensorExecutor<const EvalTo, Device, TensorEvaluator<ArgType, Device>::PacketAccess>::run(evalToTmp, m_device);
+ internal::TensorExecutor<const EvalTo, Device>::run(evalToTmp, m_device);
m_impl.cleanup();
return true;
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
index 7df8d1453..b3bc16bc4 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
@@ -51,8 +51,27 @@ template<typename XprType> class TensorForcedEvalOp;
template<typename ExpressionType, typename DeviceType> class TensorDevice;
template<typename Derived, typename Device> struct TensorEvaluator;
+class DefaultDevice;
+class ThreadPoolDevice;
+class GpuDevice;
+
namespace internal {
-template<typename Expression, typename Device, bool Vectorizable> class TensorExecutor;
+
+template <typename Device, typename Expression>
+struct IsVectorizable {
+ static const bool value = TensorEvaluator<Expression, Device>::PacketAccess;
+};
+
+template <typename Expression>
+struct IsVectorizable<GpuDevice, Expression> {
+ static const bool value = TensorEvaluator<Expression, GpuDevice>::PacketAccess &&
+ TensorEvaluator<Expression, GpuDevice>::IsAligned;
+};
+
+template <typename Expression, typename Device,
+ bool Vectorizable = IsVectorizable<Device, Expression>::value>
+class TensorExecutor;
+
} // end namespace internal
} // end namespace Eigen
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h
index 054ecf7b5..ee66ae192 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h
@@ -90,7 +90,7 @@ class TensorLayoutSwapOp : public TensorBase<TensorLayoutSwapOp<XprType>, WriteA
{
typedef TensorAssignOp<TensorLayoutSwapOp, const TensorLayoutSwapOp> Assign;
Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
+ internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
return *this;
}
@@ -100,7 +100,7 @@ class TensorLayoutSwapOp : public TensorBase<TensorLayoutSwapOp<XprType>, WriteA
{
typedef TensorAssignOp<TensorLayoutSwapOp, const OtherDerived> Assign;
Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
+ internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
return *this;
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
index fa1e6931c..15e004ee9 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
@@ -78,7 +78,7 @@ class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, Xpr
{
typedef TensorAssignOp<TensorReshapingOp, const TensorReshapingOp> Assign;
Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
+ internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
return *this;
}
@@ -88,7 +88,7 @@ class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, Xpr
{
typedef TensorAssignOp<TensorReshapingOp, const OtherDerived> Assign;
Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
+ internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
return *this;
}
@@ -262,7 +262,7 @@ class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, X
{
typedef TensorAssignOp<TensorSlicingOp, const OtherDerived> Assign;
Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
+ internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
return *this;
}
@@ -271,7 +271,7 @@ class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, X
{
typedef TensorAssignOp<TensorSlicingOp, const TensorSlicingOp> Assign;
Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
+ internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
return *this;
}
@@ -411,7 +411,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
{
const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(index+packetSize-1 < dimensions().TotalSize());
+ eigen_assert(index+packetSize-1 < array_prod(dimensions()));
Index inputIndices[] = {0, 0};
Index indices[] = {index, index + packetSize - 1};
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
index 16bef2ad3..52f95b2a2 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
@@ -80,7 +80,7 @@ class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions,
{
typedef TensorAssignOp<TensorReverseOp, const TensorReverseOp> Assign;
Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
+ internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
return *this;
}
@@ -90,7 +90,7 @@ class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions,
{
typedef TensorAssignOp<TensorReverseOp, const OtherDerived> Assign;
Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
+ internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
return *this;
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
index 1012ecd69..02f73dd37 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
@@ -78,7 +78,7 @@ class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType>
{
typedef TensorAssignOp<TensorShufflingOp, const TensorShufflingOp> Assign;
Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
+ internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
return *this;
}
@@ -88,7 +88,7 @@ class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType>
{
typedef TensorAssignOp<TensorShufflingOp, const OtherDerived> Assign;
Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
+ internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
return *this;
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h
index 00cb8e373..dd913fbae 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h
@@ -78,7 +78,7 @@ class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> >
{
typedef TensorAssignOp<TensorStridingOp, const TensorStridingOp> Assign;
Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
+ internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
return *this;
}
@@ -88,7 +88,7 @@ class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> >
{
typedef TensorAssignOp<TensorStridingOp, const OtherDerived> Assign;
Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
+ internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
return *this;
}