diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-10-01 20:38:22 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-10-01 20:38:22 -0700 |
commit | 7caaf6453b7b1f58d953729380d596b2d9b27835 (patch) | |
tree | 59003efb25e7119af4da63b0371efb53daddbcf8 /unsupported/Eigen/CXX11/src/Tensor | |
parent | 1c236f4c9ae78cc58156eebe3b2bb43588897af4 (diff) |
Added support for tensor reductions and concatenations
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor')
5 files changed, 535 insertions, 1 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 2da8f8cc8..2f7c9ecda 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -204,12 +204,40 @@ class TensorBase<Derived, ReadOnlyAccessors> return TensorSelectOp<const Derived, const ThenDerived, const ElseDerived>(derived(), thenTensor.derived(), elseTensor.derived()); } + // Reductions. + template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp<internal::SumReducer<Scalar>, const Dims, const Derived> + sum(const Dims& dims) const { + return TensorReductionOp<internal::SumReducer<Scalar>, const Dims, const Derived>(derived(), dims, internal::SumReducer<Scalar>()); + } + template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp<internal::MaxReducer<Scalar>, const Dims, const Derived> + maximum(const Dims& dims) const { + return TensorReductionOp<internal::MaxReducer<Scalar>, const Dims, const Derived>(derived(), dims, internal::MaxReducer<Scalar>()); + } + template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp<internal::MinReducer<Scalar>, const Dims, const Derived> + minimum(const Dims& dims) const { + return TensorReductionOp<internal::MinReducer<Scalar>, const Dims, const Derived>(derived(), dims, internal::MinReducer<Scalar>()); + } + template <typename Reducer, typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp<Reducer, const Dims, const Derived> + reduce(const Dims& dims, const Reducer& reducer) const { + return TensorReductionOp<Reducer, const Dims, const Derived>(derived(), dims, reducer); + } + template <typename Broadcast> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorBroadcastingOp<const Broadcast, const Derived> broadcast(const Broadcast& broadcast) const { return TensorBroadcastingOp<const Broadcast, const Derived>(derived(), broadcast); } + template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorConcatenationOp<Axis, const Derived, const OtherDerived> + concatenate(const OtherDerived& other, Axis axis) const { + return TensorConcatenationOp<Axis, const Derived, const OtherDerived>(derived(), other.derived(), axis); + } + // Morphing operators. template <typename NewDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorReshapingOp<const NewDimensions, const Derived> diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h new file mode 100644 index 000000000..b8e43f484 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h @@ -0,0 +1,217 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H + +namespace Eigen { + +/** \class TensorConcatenationOp + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor concatenation class. + * + * + */ +namespace internal { +template<typename Axis, typename LhsXprType, typename RhsXprType> +struct traits<TensorConcatenationOp<Axis, LhsXprType, RhsXprType> > +{ + // Type promotion to handle the case where the types of the lhs and the rhs are different. + typedef typename promote_storage_type<typename LhsXprType::Scalar, + typename RhsXprType::Scalar>::ret Scalar; + typedef typename packet_traits<Scalar>::type Packet; + typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind, + typename traits<RhsXprType>::StorageKind>::ret StorageKind; + typedef typename promote_index_type<typename traits<LhsXprType>::Index, + typename traits<RhsXprType>::Index>::type Index; + typedef typename LhsXprType::Nested LhsNested; + typedef typename RhsXprType::Nested RhsNested; + typedef typename remove_reference<LhsNested>::type _LhsNested; + typedef typename remove_reference<RhsNested>::type _RhsNested; + enum { Flags = 0 }; +}; + +template<typename Axis, typename LhsXprType, typename RhsXprType> +struct eval<TensorConcatenationOp<Axis, LhsXprType, RhsXprType>, Eigen::Dense> +{ + typedef const TensorConcatenationOp<Axis, LhsXprType, RhsXprType>& type; +}; + +template<typename Axis, typename LhsXprType, typename RhsXprType> +struct nested<TensorConcatenationOp<Axis, LhsXprType, RhsXprType>, 1, typename eval<TensorConcatenationOp<Axis, LhsXprType, RhsXprType> >::type> +{ + typedef TensorConcatenationOp<Axis, LhsXprType, RhsXprType> type; +}; + +} // end namespace internal + + +template<typename Axis, typename LhsXprType, typename RhsXprType> +class TensorConcatenationOp : public TensorBase<TensorConcatenationOp<Axis, LhsXprType, RhsXprType>, WriteAccessors> +{ + public: + typedef typename internal::traits<TensorConcatenationOp>::Scalar Scalar; + typedef typename internal::traits<TensorConcatenationOp>::Packet Packet; + typedef typename internal::traits<TensorConcatenationOp>::StorageKind StorageKind; + typedef typename internal::traits<TensorConcatenationOp>::Index Index; + typedef typename internal::nested<TensorConcatenationOp>::type Nested; + typedef typename internal::promote_storage_type<typename LhsXprType::CoeffReturnType, + typename RhsXprType::CoeffReturnType>::ret CoeffReturnType; + typedef typename internal::promote_storage_type<typename LhsXprType::PacketReturnType, + typename RhsXprType::PacketReturnType>::ret PacketReturnType; + typedef typename NumTraits<Scalar>::Real RealScalar; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConcatenationOp(const LhsXprType& lhs, const RhsXprType& rhs, Axis axis) + : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_axis(axis) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all<typename LhsXprType::Nested>::type& + lhsExpression() const { return m_lhs_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all<typename RhsXprType::Nested>::type& + rhsExpression() const { return m_rhs_xpr; } + + EIGEN_DEVICE_FUNC Axis axis() const { return m_axis; } + + protected: + typename LhsXprType::Nested m_lhs_xpr; + typename RhsXprType::Nested m_rhs_xpr; + const Axis m_axis; +}; + + +// Eval as rvalue +template<typename Axis, typename LeftArgType, typename RightArgType, typename Device> +struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device> +{ + typedef TensorConcatenationOp<Axis, LeftArgType, RightArgType> XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size<typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value; + static const int RightNumDims = internal::array_size<typename TensorEvaluator<RightArgType, Device>::Dimensions>::value; + typedef DSizes<Index, NumDims> Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketReturnType PacketReturnType; + enum { + IsAligned = false, + PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess, + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_leftImpl(op.lhsExpression(), device), m_rightImpl(op.rhsExpression(), device), m_axis(op.axis()) + { + EIGEN_STATIC_ASSERT(NumDims == RightNumDims, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(0 <= m_axis && m_axis < NumDims); + const Dimensions& lhs_dims = m_leftImpl.dimensions(); + const Dimensions& rhs_dims = m_rightImpl.dimensions(); + int i = 0; + for (; i < m_axis; ++i) { + eigen_assert(lhs_dims[i] > 0); + eigen_assert(lhs_dims[i] == rhs_dims[i]); + m_dimensions[i] = lhs_dims[i]; + } + eigen_assert(lhs_dims[i] > 0); // Now i == m_axis. + eigen_assert(rhs_dims[i] > 0); + m_dimensions[i] = lhs_dims[i] + rhs_dims[i]; + for (++i; i < NumDims; ++i) { + eigen_assert(lhs_dims[i] > 0); + eigen_assert(lhs_dims[i] == rhs_dims[i]); + m_dimensions[i] = lhs_dims[i]; + } + + m_leftStrides[0] = 1; + m_rightStrides[0] = 1; + m_outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_leftStrides[i] = m_leftStrides[i-1] * lhs_dims[i-1]; + m_rightStrides[i] = m_rightStrides[i-1] * rhs_dims[i-1]; + m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + // TODO(phli): Add short-circuit memcpy evaluation if underlying data are linear? + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) + { + m_leftImpl.evalSubExprsIfNeeded(NULL); + m_rightImpl.evalSubExprsIfNeeded(NULL); + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() + { + m_leftImpl.cleanup(); + m_rightImpl.cleanup(); + } + + // TODO(phli): attempt to speed this up. The integer divisions and modulo are slow. + // See CL/76180724 comments for more ideas. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + // Collect dimension-wise indices (subs). + array<Index, NumDims> subs; + for (int i = NumDims - 1; i > 0; --i) { + subs[i] = index / m_outputStrides[i]; + index -= subs[i] * m_outputStrides[i]; + } + subs[0] = index; + + const Dimensions& left_dims = m_leftImpl.dimensions(); + if (subs[m_axis] < left_dims[m_axis]) { + Index left_index = subs[0]; + for (int i = 1; i < NumDims; ++i) { + left_index += (subs[i] % left_dims[i]) * m_leftStrides[i]; + } + return m_leftImpl.coeff(left_index); + } else { + subs[m_axis] -= left_dims[m_axis]; + const Dimensions& right_dims = m_rightImpl.dimensions(); + Index right_index = subs[0]; + for (int i = 1; i < NumDims; ++i) { + right_index += (subs[i] % right_dims[i]) * m_rightStrides[i]; + } + return m_rightImpl.coeff(right_index); + } + } + + // TODO(phli): Add a real vectorization. + template<int LoadMode> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + static const int packetSize = internal::unpacket_traits<PacketReturnType>::size; + EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index + packetSize - 1 < dimensions().TotalSize()); + + EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize]; + for (int i = 0; i < packetSize; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload<PacketReturnType>(values); + return rslt; + } + + Scalar* data() const { return NULL; } + + protected: + const Axis m_axis; + Dimensions m_dimensions; + array<Index, NumDims> m_outputStrides; + array<Index, NumDims> m_leftStrides; + array<Index, NumDims> m_rightStrides; + TensorEvaluator<LeftArgType, Device> m_leftImpl; + TensorEvaluator<RightArgType, Device> m_rightImpl; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index afbcc9486..bc67586a4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -21,8 +21,9 @@ template<typename NullaryOp, typename PlainObjectType> class TensorCwiseNullaryO template<typename UnaryOp, typename XprType> class TensorCwiseUnaryOp; template<typename BinaryOp, typename LeftXprType, typename RightXprType> class TensorCwiseBinaryOp; template<typename IfXprType, typename ThenXprType, typename ElseXprType> class TensorSelectOp; -template<typename XprType> class TensorReductionOp; template<typename Broadcast, typename XprType> class TensorBroadcastingOp; +template<typename Op, typename Dims, typename XprType> class TensorReductionOp; +template<typename Axis, typename LeftXprType, typename RightXprType> class TensorConcatenationOp; template<typename Dimensions, typename LeftXprType, typename RightXprType> class TensorContractionOp; template<typename Dimensions, typename InputXprType, typename KernelXprType> class TensorConvolutionOp; template<typename NewDimensions, typename XprType> class TensorReshapingOp; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h new file mode 100644 index 000000000..92984336c --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -0,0 +1,62 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H +#define EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H + +namespace Eigen { +namespace internal { + +// Standard reduction functors +template <typename T> struct SumReducer +{ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE SumReducer() : m_sum(0) { } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t) { + m_sum += t; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize() const { + return m_sum; + } + + private: + T m_sum; +}; + +template <typename T> struct MaxReducer +{ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE MaxReducer() : m_max((std::numeric_limits<T>::min)()) { } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t) { + if (t > m_max) { m_max = t; } + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize() const { + return m_max; + } + + private: + T m_max; +}; + +template <typename T> struct MinReducer +{ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE MinReducer() : m_min((std::numeric_limits<T>::max)()) { } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t) { + if (t < m_min) { m_min = t; } + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize() const { + return m_min; + } + + private: + T m_min; +}; + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h new file mode 100644 index 000000000..eef992106 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -0,0 +1,226 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H +#define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H + +namespace Eigen { + +/** \class TensorReduction + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor reduction class. + * + */ + +namespace internal { +template<typename Op, typename Dims, typename XprType> +struct traits<TensorReductionOp<Op, Dims, XprType> > + : traits<XprType> +{ + typedef typename traits<XprType>::Scalar Scalar; + typedef typename internal::packet_traits<Scalar>::type Packet; + typedef typename traits<XprType>::StorageKind StorageKind; + typedef typename traits<XprType>::Index Index; + typedef typename XprType::Nested Nested; +}; + +template<typename Op, typename Dims, typename XprType> +struct eval<TensorReductionOp<Op, Dims, XprType>, Eigen::Dense> +{ + typedef const TensorReductionOp<Op, Dims, XprType>& type; +}; + +template<typename Op, typename Dims, typename XprType> +struct nested<TensorReductionOp<Op, Dims, XprType>, 1, typename eval<TensorReductionOp<Op, Dims, XprType> >::type> +{ + typedef TensorReductionOp<Op, Dims, XprType> type; +}; + +} // end namespace internal + + +template <typename Op, typename Dims, typename XprType> +class TensorReductionOp : public TensorBase<TensorReductionOp<Op, Dims, XprType>, ReadOnlyAccessors> { + public: + typedef typename Eigen::internal::traits<TensorReductionOp>::Scalar Scalar; + typedef typename Eigen::internal::traits<TensorReductionOp>::Packet Packet; + typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename Eigen::internal::nested<TensorReductionOp>::type Nested; + typedef typename Eigen::internal::traits<TensorReductionOp>::StorageKind StorageKind; + typedef typename Eigen::internal::traits<TensorReductionOp>::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorReductionOp(const XprType& expr, const Dims& dims) : m_expr(expr), m_dims(dims) + { } + TensorReductionOp(const XprType& expr, const Dims& dims, const Op& reducer) : m_expr(expr), m_dims(dims), m_reducer(reducer) + { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const XprType& expression() const { return m_expr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Dims& dims() const { return m_dims; } + const Op& reducer() const { return m_reducer; } + + protected: + typename XprType::Nested m_expr; + const Dims m_dims; + const Op m_reducer; +}; + + +// Eval as rvalue +template<typename Op, typename Dims, typename ArgType, typename Device> +struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> +{ + typedef TensorReductionOp<Op, Dims, ArgType> XprType; + typedef typename XprType::Index Index; + static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; + static const int NumReducedDims = internal::array_size<Dims>::value; + static const int NumDims = (NumInputDims==NumReducedDims) ? 1 : NumInputDims - NumReducedDims; + typedef DSizes<Index, NumDims> Dimensions; + typedef typename XprType::Scalar Scalar; + + enum { + IsAligned = false, + PacketAccess = false, // The code isn't vectorized properly yet + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_reducer(op.reducer()) + { + EIGEN_STATIC_ASSERT(NumInputDims >= NumReducedDims, YOU_MADE_A_PROGRAMMING_MISTAKE); + + array<bool, NumInputDims> reduced; + for (int i = 0; i < NumInputDims; ++i) { + reduced[i] = false; + } + for (int i = 0; i < NumReducedDims; ++i) { + eigen_assert(op.dims()[i] >= 0); + eigen_assert(op.dims()[i] < NumInputDims); + reduced[op.dims()[i]] = true; + } + + const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); + int outputIndex = 0; + int reduceIndex = 0; + for (int i = 0; i < NumInputDims; ++i) { + if (reduced[i]) { + m_reducedDims[reduceIndex] = input_dims[i]; + ++reduceIndex; + } else { + m_dimensions[outputIndex] = input_dims[i]; + ++outputIndex; + } + } + + m_outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; + } + + array<Index, NumInputDims> strides; + strides[0] = 1; + for (int i = 1; i < NumInputDims; ++i) { + strides[i] = strides[i-1] * input_dims[i-1]; + } + outputIndex = 0; + reduceIndex = 0; + for (int i = 0; i < NumInputDims; ++i) { + if (reduced[i]) { + m_reducedStrides[reduceIndex] = strides[i]; + ++reduceIndex; + } else { + m_preservedStrides[outputIndex] = strides[i]; + ++outputIndex; + } + } + + // Special case for full reductions + if (NumInputDims == NumReducedDims) { + m_dimensions[0] = 1; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketReturnType PacketReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + Op reducer(m_reducer); + reduce(firstInput(index), 0, reducer); + return reducer.finalize(); + } + + // TODO(bsteiner): provide a more efficient implementation. + template<int LoadMode> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + const int packetSize = internal::unpacket_traits<PacketReturnType>::size; + EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index + packetSize - 1 < dimensions().TotalSize()); + + EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize]; + for (int i = 0; i < packetSize; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload<PacketReturnType>(values); + return rslt; + } + + Scalar* data() const { return NULL; } + + private: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const { + Index startInput = 0; + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_outputStrides[i]; + startInput += idx * m_preservedStrides[i]; + index -= idx * m_outputStrides[i]; + } + startInput += index * m_preservedStrides[0]; + return startInput; + } + + EIGEN_DEVICE_FUNC void reduce(Index firstIndex, int DimIndex, Op& reducer) const { + for (int j = 0; j < m_reducedDims[DimIndex]; ++j) { + const Index input = firstIndex + j * m_reducedStrides[DimIndex]; + if (DimIndex < NumReducedDims-1) { + reduce(input, DimIndex+1, reducer); + } else { + reducer.reduce(m_impl.coeff(input)); + } + } + } + + Dimensions m_dimensions; + array<Index, NumDims> m_outputStrides; + array<Index, NumDims> m_preservedStrides; + array<Index, NumReducedDims> m_reducedStrides; + array<Index, NumReducedDims> m_reducedDims; + Op m_reducer; + TensorEvaluator<ArgType, Device> m_impl; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H |