From f697df723798779bc29d9f7299bb5398767d5db0 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 14 Jan 2015 15:38:48 -0800 Subject: Improved support for RowMajor tensors Misc fixes and API cleanups. --- .../Eigen/CXX11/src/Tensor/TensorChipping.h | 208 ++++++++++++++++----- 1 file changed, 161 insertions(+), 47 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h index b862a8fd3..bc336e488 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h @@ -21,34 +21,61 @@ namespace Eigen { */ namespace internal { -template +template struct traits > : public traits { typedef typename XprType::Scalar Scalar; - typedef typename internal::packet_traits::type Packet; - typedef typename traits::StorageKind StorageKind; - typedef typename traits::Index Index; + typedef traits XprTraits; + typedef typename packet_traits::type Packet; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; typedef typename XprType::Nested Nested; typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions - 1; + static const int Layout = XprTraits::Layout; }; -template +template struct eval, Eigen::Dense> { typedef const TensorChippingOp& type; }; -template +template struct nested, 1, typename eval >::type> { typedef TensorChippingOp type; }; +template +struct DimensionId +{ + DimensionId(DenseIndex dim) { + eigen_assert(dim == DimId); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const { + return DimId; + } +}; +template <> +struct DimensionId +{ + DimensionId(DenseIndex dim) : actual_dim(dim) { + eigen_assert(dim >= 0); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const { + return actual_dim; + } + private: + const DenseIndex actual_dim; +}; + + } // end namespace internal -template +template class TensorChippingOp : public TensorBase > { public: @@ -61,34 +88,39 @@ class TensorChippingOp : public TensorBase > typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorChippingOp(const XprType& expr, const Index offset) - : m_xpr(expr), m_offset(offset) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorChippingOp(const XprType& expr, const Index offset, const Index dim) + : m_xpr(expr), m_offset(offset), m_dim(dim) { + } - EIGEN_DEVICE_FUNC - const Index offset() const { return m_offset; } + EIGEN_DEVICE_FUNC + const Index offset() const { return m_offset; } + EIGEN_DEVICE_FUNC + const Index dim() const { return m_dim.actualDim(); } - EIGEN_DEVICE_FUNC - const typename internal::remove_all::type& - expression() const { return m_xpr; } + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } - template - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorChippingOp& operator = (const OtherDerived& other) - { - typedef TensorAssignOp Assign; - Assign assign(*this, other); - internal::TensorExecutor::run(assign, DefaultDevice()); - return *this; - } + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorChippingOp& operator = (const OtherDerived& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + static const bool Vectorize = TensorEvaluator::PacketAccess; + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } protected: typename XprType::Nested m_xpr; const Index m_offset; + const internal::DimensionId m_dim; }; // Eval as rvalue -template +template struct TensorEvaluator, Device> { typedef TensorChippingOp XprType; @@ -96,41 +128,50 @@ struct TensorEvaluator, Device> static const int NumDims = NumInputDims-1; typedef typename XprType::Index Index; typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; enum { // Alignment can't be guaranteed at compile time since it depends on the // slice offsets. IsAligned = false, - PacketAccess = false, // not yet implemented + PacketAccess = TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_device(device) + : m_impl(op.expression(), device), m_dim(op.dim()), m_device(device) { // We could also support the case where NumInputDims==1 if needed. EIGEN_STATIC_ASSERT(NumInputDims >= 2, YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT(NumInputDims > DimId, YOU_MADE_A_PROGRAMMING_MISTAKE); + eigen_assert(NumInputDims > m_dim.actualDim()); const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); int j = 0; for (int i = 0; i < NumInputDims; ++i) { - if (i != DimId) { + if (i != m_dim.actualDim()) { m_dimensions[j] = input_dims[i]; ++j; } } - m_stride = 1; - m_inputStride = 1; - for (int i = 0; i < DimId; ++i) { - m_stride *= input_dims[i]; - m_inputStride *= input_dims[i]; - } - m_inputStride *= input_dims[DimId]; - m_inputOffset = m_stride * op.offset(); + m_stride = 1; + m_inputStride = 1; + if (Layout == ColMajor) { + for (int i = 0; i < m_dim.actualDim(); ++i) { + m_stride *= input_dims[i]; + m_inputStride *= input_dims[i]; + } + } else { + for (int i = NumInputDims-1; i > m_dim.actualDim(); --i) { + m_stride *= input_dims[i]; + m_inputStride *= input_dims[i]; + } + } + m_inputStride *= input_dims[m_dim.actualDim()]; + m_inputOffset = m_stride * op.offset(); } - typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketReturnType PacketReturnType; @@ -150,16 +191,52 @@ struct TensorEvaluator, Device> return m_impl.coeff(srcCoeff(index)); } - /* to be done template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { + const int packetSize = internal::unpacket_traits::size; + EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+packetSize-1 < dimensions().TotalSize()); - }*/ + if ((Layout == ColMajor && m_dim.actualDim() == 0) || + (Layout == RowMajor && m_dim.actualDim() == NumInputDims-1)) { + // m_stride is equal to 1, so let's avoid the integer division. + eigen_assert(m_stride == 1); + Index inputIndex = index * m_inputStride + m_inputOffset; + EIGEN_ALIGN_DEFAULT typename internal::remove_const::type values[packetSize]; + for (int i = 0; i < packetSize; ++i) { + values[i] = m_impl.coeff(inputIndex); + inputIndex += m_inputStride; + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } else if ((Layout == ColMajor && m_dim.actualDim() == NumInputDims - 1) || + (Layout == RowMajor && m_dim.actualDim() == 0)) { + // m_stride is aways greater than index, so let's avoid the integer division. + eigen_assert(m_stride > index); + return m_impl.template packet(index + m_inputOffset); + } else { + const Index idx = index / m_stride; + const Index rem = index - idx * m_stride; + if (rem + packetSize <= m_stride) { + Index inputIndex = idx * m_inputStride + m_inputOffset + rem; + return m_impl.template packet(inputIndex); + } else { + // Cross the stride boundary. Fallback to slow path. + EIGEN_ALIGN_DEFAULT typename internal::remove_const::type values[packetSize]; + for (int i = 0; i < packetSize; ++i) { + values[i] = coeff(index); + ++index; + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + } + } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const { Scalar* result = m_impl.data(); - if (DimId == NumDims && result) { + if (m_dim.actualDim() == NumDims && result) { return result + m_inputOffset; } else { return NULL; @@ -170,11 +247,13 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const { Index inputIndex; - if (DimId == 0) { + if ((Layout == ColMajor && m_dim.actualDim() == 0) || + (Layout == RowMajor && m_dim.actualDim() == NumInputDims-1)) { // m_stride is equal to 1, so let's avoid the integer division. eigen_assert(m_stride == 1); inputIndex = index * m_inputStride + m_inputOffset; - } else if (DimId == NumInputDims-1) { + } else if ((Layout == ColMajor && m_dim.actualDim() == NumInputDims-1) || + (Layout == RowMajor && m_dim.actualDim() == 0)) { // m_stride is aways greater than index, so let's avoid the integer division. eigen_assert(m_stride > index); inputIndex = index + m_inputOffset; @@ -192,12 +271,13 @@ struct TensorEvaluator, Device> Index m_inputOffset; Index m_inputStride; TensorEvaluator m_impl; + const internal::DimensionId m_dim; const Device& m_device; }; // Eval as lvalue -template +template struct TensorEvaluator, Device> : public TensorEvaluator, Device> { @@ -207,17 +287,17 @@ struct TensorEvaluator, Device> static const int NumDims = NumInputDims-1; typedef typename XprType::Index Index; typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; enum { IsAligned = false, - PacketAccess = false, + PacketAccess = TensorEvaluator::PacketAccess, }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : Base(op, device) { } - typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketReturnType PacketReturnType; @@ -226,11 +306,45 @@ struct TensorEvaluator, Device> return this->m_impl.coeffRef(this->srcCoeff(index)); } - /* to be done template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType& x) { - } */ + static const int packetSize = internal::unpacket_traits::size; + EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + + if ((this->Layout == ColMajor && this->m_dim.actualDim() == 0) || + (this->Layout == RowMajor && this->m_dim.actualDim() == NumInputDims-1)) { + // m_stride is equal to 1, so let's avoid the integer division. + eigen_assert(this->m_stride == 1); + EIGEN_ALIGN_DEFAULT typename internal::remove_const::type values[packetSize]; + internal::pstore(values, x); + Index inputIndex = index * this->m_inputStride + this->m_inputOffset; + for (int i = 0; i < packetSize; ++i) { + this->m_impl.coeffRef(inputIndex) = values[i]; + inputIndex += this->m_inputStride; + } + } else if ((this->Layout == ColMajor && this->m_dim.actualDim() == NumInputDims-1) || + (this->Layout == RowMajor && this->m_dim.actualDim() == 0)) { + // m_stride is aways greater than index, so let's avoid the integer division. + eigen_assert(this->m_stride > index); + this->m_impl.template writePacket(index + this->m_inputOffset, x); + } else { + const Index idx = index / this->m_stride; + const Index rem = index - idx * this->m_stride; + if (rem + packetSize <= this->m_stride) { + const Index inputIndex = idx * this->m_inputStride + this->m_inputOffset + rem; + this->m_impl.template writePacket(inputIndex, x); + } else { + // Cross stride boundary. Fallback to slow path. + EIGEN_ALIGN_DEFAULT typename internal::remove_const::type values[packetSize]; + internal::pstore(values, x); + for (int i = 0; i < packetSize; ++i) { + this->coeffRef(index) = values[i]; + ++index; + } + } + } + } }; -- cgit v1.2.3