aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <ezhulenev@google.com>2019-02-08 15:13:24 -0800
committerGravatar Eugene Zhulenev <ezhulenev@google.com>2019-02-08 15:13:24 -0800
commit1e36166ed1cd9a2e6fd5a946e2ec418406963a1a (patch)
tree4060b6dc8972c741d70a538bc1a6aa23b533cd13 /unsupported
parent953ca5ba2f007650944017bff423582afeaf0696 (diff)
Optimize TensorConversion evaluator: do not convert same type
Diffstat (limited to 'unsupported')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h99
1 files changed, 69 insertions, 30 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
index 1f613d3c7..27c9d4a20 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
@@ -32,7 +32,7 @@ struct traits<TensorConversionOp<TargetType, XprType> >
static const int NumDimensions = traits<XprType>::NumDimensions;
static const int Layout = traits<XprType>::Layout;
enum { Flags = 0 };
- typedef typename TypeConversion<Scalar, typename traits<XprType>::PointerType>::type PointerType;
+ typedef typename TypeConversion<Scalar, typename traits<XprType>::PointerType>::type PointerType;
};
template<typename TargetType, typename XprType>
@@ -177,6 +177,68 @@ template <typename Eval, typename Scalar> struct ConversionSubExprEval<true, Eva
}
};
+namespace internal {
+
+template <typename SrcType, typename TargetType, bool IsSameT>
+struct CoeffConv {
+ template <typename ArgType, typename Device>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetType run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
+ internal::scalar_cast_op<SrcType, TargetType> converter;
+ return converter(impl.coeff(index));
+ }
+};
+
+template <typename SrcType, typename TargetType>
+struct CoeffConv<SrcType, TargetType, true> {
+ template <typename ArgType, typename Device>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetType run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
+ return impl.coeff(index);
+ }
+};
+
+template <typename SrcPacket, typename TargetPacket, int LoadMode, bool ActuallyVectorize, bool IsSameT>
+struct PacketConv {
+ typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
+ typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
+
+ static const int PacketSize = internal::unpacket_traits<TargetPacket>::size;
+
+ template <typename ArgType, typename Device>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
+ internal::scalar_cast_op<SrcType, TargetType> converter;
+ EIGEN_ALIGN_MAX typename internal::remove_const<TargetType>::type values[PacketSize];
+ for (int i = 0; i < PacketSize; ++i) {
+ values[i] = converter(impl.coeff(index+i));
+ }
+ TargetPacket rslt = internal::pload<TargetPacket>(values);
+ return rslt;
+ }
+};
+
+template <typename SrcPacket, typename TargetPacket, int LoadMode, bool IsSameT>
+struct PacketConv<SrcPacket, TargetPacket, LoadMode, true, IsSameT> {
+ typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
+ typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
+
+ template <typename ArgType, typename Device>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
+ const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
+ const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
+ PacketConverter<TensorEvaluator<ArgType, Device>, SrcPacket, TargetPacket,
+ SrcCoeffRatio, TgtCoeffRatio> converter(impl);
+ return converter.template packet<LoadMode>(index);
+ }
+};
+
+template <typename SrcPacket, typename TargetPacket, int LoadMode, bool ActuallyVectorize>
+struct PacketConv<SrcPacket, TargetPacket, LoadMode, ActuallyVectorize, true> {
+ template <typename ArgType, typename Device>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
+ return impl.template packet<LoadMode>(index);
+ }
+};
+
+} // namespace internal
// Eval as rvalue
template<typename TargetType, typename ArgType, typename Device>
@@ -191,6 +253,7 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef typename PacketType<SrcType, Device>::type PacketSourceType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+ static const bool IsSameType = internal::is_same<TargetType, SrcType>::value;
enum {
IsAligned = false,
@@ -210,7 +273,7 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data)
{
- return ConversionSubExprEval<internal::is_same<TargetType, SrcType>::value, TensorEvaluator<ArgType, Device>, Scalar>::run(m_impl, data);
+ return ConversionSubExprEval<IsSameType, TensorEvaluator<ArgType, Device>, Scalar>::run(m_impl, data);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup()
@@ -220,8 +283,7 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
{
- internal::scalar_cast_op<SrcType, TargetType> converter;
- return converter(m_impl.coeff(index));
+ return internal::CoeffConv<SrcType, TargetType, IsSameType>::run(m_impl,index);
}
template<int LoadMode>
@@ -229,7 +291,8 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
{
const bool Vectorizable = TensorEvaluator<ArgType, Device>::PacketAccess &
internal::type_casting_traits<SrcType, TargetType>::VectorizedCast;
- return PacketConv<LoadMode, Vectorizable>::run(m_impl, index);
+ return internal::PacketConv<PacketSourceType, PacketReturnType, LoadMode,
+ Vectorizable, IsSameType>::run(m_impl, index);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
@@ -252,31 +315,7 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
/// required by sycl in order to extract the sycl accessor
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
- protected:
- template <int LoadMode, bool ActuallyVectorize>
- struct PacketConv {
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
- internal::scalar_cast_op<SrcType, TargetType> converter;
- EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
- for (int i = 0; i < PacketSize; ++i) {
- values[i] = converter(impl.coeff(index+i));
- }
- PacketReturnType rslt = internal::pload<PacketReturnType>(values);
- return rslt;
- }
- };
-
- template <int LoadMode>
- struct PacketConv<LoadMode, true> {
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
- const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
- const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
- PacketConverter<TensorEvaluator<ArgType, Device>, PacketSourceType, PacketReturnType,
- SrcCoeffRatio, TgtCoeffRatio> converter(impl);
- return converter.template packet<LoadMode>(index);
- }
- };
-
+ protected:
TensorEvaluator<ArgType, Device> m_impl;
};