aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h40
1 files changed, 31 insertions, 9 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
index a2f1f71f5..860a6949a 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
@@ -164,14 +164,14 @@ class TensorConversionOp : public TensorBase<TensorConversionOp<TargetType, XprT
};
template <bool SameType, typename Eval, typename Scalar> struct ConversionSubExprEval {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static bool run(Eval& impl, Scalar*) {
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar*) {
impl.evalSubExprsIfNeeded(NULL);
return true;
}
};
template <typename Eval, typename Scalar> struct ConversionSubExprEval<true, Eval, Scalar> {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static bool run(Eval& impl, Scalar* data) {
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar* data) {
return impl.evalSubExprsIfNeeded(data);
}
};
@@ -193,7 +193,7 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
enum {
IsAligned = false,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess && internal::type_casting_traits<SrcType, TargetType>::VectorizedCast,
+ PacketAccess = true,
Layout = TensorEvaluator<ArgType, Device>::Layout,
RawAccess = false
};
@@ -224,11 +224,9 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
- const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
- const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
- PacketConverter<TensorEvaluator<ArgType, Device>, PacketSourceType, PacketReturnType,
- SrcCoeffRatio, TgtCoeffRatio> converter(m_impl);
- return converter.template packet<LoadMode>(index);
+ const bool Vectorizable = TensorEvaluator<ArgType, Device>::PacketAccess &
+ internal::type_casting_traits<SrcType, TargetType>::VectorizedCast;
+ return PacketConv<LoadMode, Vectorizable>::run(m_impl, index);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
@@ -249,7 +247,31 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
protected:
- TensorEvaluator<ArgType, Device> m_impl;
+ template <int LoadMode, bool ActuallyVectorize>
+ struct PacketConv {
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
+ internal::scalar_cast_op<SrcType, TargetType> converter;
+ EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
+ for (int i = 0; i < PacketSize; ++i) {
+ values[i] = converter(impl.coeff(index+i));
+ }
+ PacketReturnType rslt = internal::pload<PacketReturnType>(values);
+ return rslt;
+ }
+ };
+
+ template <int LoadMode>
+ struct PacketConv<LoadMode, true> {
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
+ const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
+ const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
+ PacketConverter<TensorEvaluator<ArgType, Device>, PacketSourceType, PacketReturnType,
+ SrcCoeffRatio, TgtCoeffRatio> converter(impl);
+ return converter.template packet<LoadMode>(index);
+ }
+ };
+
+ TensorEvaluator<ArgType, Device> m_impl;
};
} // end namespace Eigen