diff options
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h | 415 |
1 files changed, 33 insertions, 382 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index eda93a1de..f070ba61e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -21,365 +21,12 @@ namespace Eigen { */ namespace internal { -enum { - Rhs = 0, - Lhs = 1, -}; - -/* - * Implementation of the Eigen blas_data_mapper class for tensors. - */ -template<typename Scalar, typename Index, int side, - typename Tensor, - typename nocontract_t, typename contract_t, - int packet_size, bool inner_dim_contiguous> -class SimpleTensorContractionMapper { - public: - EIGEN_DEVICE_FUNC - SimpleTensorContractionMapper(const Tensor& tensor, - const nocontract_t& nocontract_strides, - const nocontract_t& ij_strides, - const contract_t& contract_strides, - const contract_t& k_strides) : - m_tensor(tensor), - m_nocontract_strides(nocontract_strides), - m_ij_strides(ij_strides), - m_contract_strides(contract_strides), - m_k_strides(k_strides) { } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE void prefetch(Index /*i*/) { } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar operator()(Index row) const { - // column major assumption - return operator()(row, 0); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar operator()(Index row, Index col) const { - return m_tensor.coeff(computeIndex(row, col)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Index computeIndex(Index row, Index col) const { - const bool left = (side == Lhs); - Index nocontract_val = left ? row : col; - Index linidx = 0; - for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) { - const Index idx = nocontract_val / m_ij_strides[i]; - linidx += idx * m_nocontract_strides[i]; - nocontract_val -= idx * m_ij_strides[i]; - } - if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) { - if (side == Lhs && inner_dim_contiguous) { - eigen_assert(m_nocontract_strides[0] == 1); - linidx += nocontract_val; - } else { - linidx += nocontract_val * m_nocontract_strides[0]; - } - } - - Index contract_val = left ? col : row; - for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) { - const Index idx = contract_val / m_k_strides[i]; - linidx += idx * m_contract_strides[i]; - contract_val -= idx * m_k_strides[i]; - } - - if(array_size<contract_t>::value > 0) { - if (side == Rhs && inner_dim_contiguous) { - eigen_assert(m_contract_strides[0] == 1); - linidx += contract_val; - } else { - linidx += contract_val * m_contract_strides[0]; - } - } - - return linidx; - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE IndexPair<Index> computeIndexPair(Index row, Index col, const Index distance) const { - const bool left = (side == Lhs); - Index nocontract_val[2] = {left ? row : col, left ? row + distance : col}; - Index linidx[2] = {0, 0}; - for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) { - const Index idx0 = nocontract_val[0] / m_ij_strides[i]; - const Index idx1 = nocontract_val[1] / m_ij_strides[i]; - linidx[0] += idx0 * m_nocontract_strides[i]; - linidx[1] += idx1 * m_nocontract_strides[i]; - nocontract_val[0] -= idx0 * m_ij_strides[i]; - nocontract_val[1] -= idx1 * m_ij_strides[i]; - } - if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) { - if (side == Lhs && inner_dim_contiguous) { - eigen_assert(m_nocontract_strides[0] == 1); - linidx[0] += nocontract_val[0]; - linidx[1] += nocontract_val[1]; - } else { - linidx[0] += nocontract_val[0] * m_nocontract_strides[0]; - linidx[1] += nocontract_val[1] * m_nocontract_strides[0]; - } - } - - Index contract_val[2] = {left ? col : row, left ? col : row + distance}; - for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) { - const Index idx0 = contract_val[0] / m_k_strides[i]; - const Index idx1 = contract_val[1] / m_k_strides[i]; - linidx[0] += idx0 * m_contract_strides[i]; - linidx[1] += idx1 * m_contract_strides[i]; - contract_val[0] -= idx0 * m_k_strides[i]; - contract_val[1] -= idx1 * m_k_strides[i]; - } - - if (side == Rhs && inner_dim_contiguous) { - eigen_assert(m_contract_strides[0] == 1); - linidx[0] += contract_val[0]; - linidx[1] += contract_val[1]; - } else { - linidx[0] += contract_val[0] * m_contract_strides[0]; - linidx[1] += contract_val[1] * m_contract_strides[0]; - } - return IndexPair<Index>(linidx[0], linidx[1]); - } - - Index firstAligned(Index size) const { - return size; - } - Index stride() const { - return 1; - } - - protected: - const Tensor m_tensor; - const nocontract_t m_nocontract_strides; - const nocontract_t m_ij_strides; - const contract_t m_contract_strides; - const contract_t m_k_strides; -}; - - -template<typename Scalar, typename Index, int side, - typename Tensor, - typename nocontract_t, typename contract_t, - int packet_size, bool inner_dim_contiguous, - bool inner_dim_reordered, int Alignment> - class BaseTensorContractionMapper : public SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous> -{ - public: - typedef SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous> ParentMapper; - - EIGEN_DEVICE_FUNC - BaseTensorContractionMapper(const Tensor& tensor, - const nocontract_t& nocontract_strides, - const nocontract_t& ij_strides, - const contract_t& contract_strides, - const contract_t& k_strides) : - ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } - - typedef typename packet_traits<Scalar>::type Packet; - typedef typename packet_traits<Scalar>::half HalfPacket; - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const { - // whole method makes column major assumption - - // don't need to add offsets for now (because operator handles that) - // current code assumes packet size must be a multiple of 2 - EIGEN_STATIC_ASSERT(packet_size % 2 == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - - if (Tensor::PacketAccess && inner_dim_contiguous && !inner_dim_reordered) { - const Index index = this->computeIndex(i, j); - eigen_assert(this->computeIndex(i+packet_size-1, j) == index + packet_size-1); - return this->m_tensor.template packet<Alignment>(index); - } - - const IndexPair<Index> indexPair = this->computeIndexPair(i, j, packet_size - 1); - const Index first = indexPair.first; - const Index last = indexPair.second; - - // We can always do optimized packet reads from left hand side right now, because - // the vertical matrix dimension on the left hand side is never contracting. - // On the right hand side we need to check if the contracting dimensions may have - // been shuffled first. - if (Tensor::PacketAccess && - (side == Lhs || internal::array_size<contract_t>::value <= 1 || !inner_dim_reordered) && - (last - first) == (packet_size - 1)) { - - return this->m_tensor.template packet<Alignment>(first); - } - - EIGEN_ALIGN_MAX Scalar data[packet_size]; - - data[0] = this->m_tensor.coeff(first); - for (Index k = 1; k < packet_size - 1; k += 2) { - const IndexPair<Index> internal_pair = this->computeIndexPair(i + k, j, 1); - data[k] = this->m_tensor.coeff(internal_pair.first); - data[k + 1] = this->m_tensor.coeff(internal_pair.second); - } - data[packet_size - 1] = this->m_tensor.coeff(last); - - return pload<Packet>(data); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE HalfPacket loadHalfPacket(Index i, Index j) const { - // whole method makes column major assumption - - // don't need to add offsets for now (because operator handles that) - const Index half_packet_size = unpacket_traits<HalfPacket>::size; - if (half_packet_size == packet_size) { - return loadPacket(i, j); - } - EIGEN_ALIGN_MAX Scalar data[half_packet_size]; - for (Index k = 0; k < half_packet_size; k++) { - data[k] = operator()(i + k, j); - } - return pload<HalfPacket>(data); - } -}; - - -template<typename Scalar, typename Index, int side, - typename Tensor, - typename nocontract_t, typename contract_t, - bool inner_dim_contiguous, - bool inner_dim_reordered, int Alignment> -class BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment> : public SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous> -{ - public: - typedef SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous> ParentMapper; - - EIGEN_DEVICE_FUNC - BaseTensorContractionMapper(const Tensor& tensor, - const nocontract_t& nocontract_strides, - const nocontract_t& ij_strides, - const contract_t& contract_strides, - const contract_t& k_strides) : - ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } - - typedef typename packet_traits<Scalar>::type Packet; - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const { - EIGEN_ALIGN_MAX Scalar data[1]; - data[0] = this->m_tensor.coeff(this->computeIndex(i, j)); - return pload<typename packet_traits<Scalar>::type>(data); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Packet loadHalfPacket(Index i, Index j) const { - return loadPacket(i, j); - } -}; - -template<typename Scalar, typename Index, int side, - typename Tensor, - typename nocontract_t, typename contract_t, - int packet_size, - bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment> -class TensorContractionInputMapper; - -template<typename Scalar, typename Index, int side, - typename Tensor, - typename nocontract_t, typename contract_t, - int packet_size, - bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment> -class TensorContractionSubMapper { - public: - typedef typename packet_traits<Scalar>::type Packet; - typedef typename packet_traits<Scalar>::half HalfPacket; - - typedef TensorContractionInputMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> ParentMapper; - typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Self; - typedef Self LinearMapper; - - EIGEN_DEVICE_FUNC TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset) - : m_base_mapper(base_mapper), m_vert_offset(vert_offset), m_horiz_offset(horiz_offset) { } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const { - return m_base_mapper(i + m_vert_offset, m_horiz_offset); - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i, Index j) const { - return m_base_mapper(i + m_vert_offset, j + m_horiz_offset); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const { - return m_base_mapper.loadPacket(i + m_vert_offset, m_horiz_offset); - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const { - return m_base_mapper.loadPacket(i + m_vert_offset, j + m_horiz_offset); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const { - return m_base_mapper.loadHalfPacket(i + m_vert_offset, m_horiz_offset); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, Packet p) const { - m_base_mapper.storePacket(i + m_vert_offset, m_horiz_offset, p); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { - return LinearMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset); - } - - template <typename PacketT, int AlignmentType> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i) const { - EIGEN_STATIC_ASSERT((internal::is_same<PacketT, Packet>::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((AlignmentType == Aligned || Alignment == Unaligned), YOU_MADE_A_PROGRAMMING_MISTAKE); - return loadPacket(i); - } - - template <typename Packet> - EIGEN_DEVICE_FUNC bool aligned(Index) const { - return false; - } - - private: - const ParentMapper& m_base_mapper; - const Index m_vert_offset; - const Index m_horiz_offset; -}; - - -template<typename Scalar, typename Index, int side, - typename Tensor, - typename nocontract_t, typename contract_t, - int packet_size, - bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment> -class TensorContractionInputMapper - : public BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> { - - public: - typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Base; - typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> SubMapper; - typedef SubMapper VectorMapper; - - EIGEN_DEVICE_FUNC TensorContractionInputMapper(const Tensor& tensor, - const nocontract_t& nocontract_strides, - const nocontract_t& ij_strides, - const contract_t& contract_strides, - const contract_t& k_strides) - : Base(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const { - return SubMapper(*this, i, j); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const { - return VectorMapper(*this, i, j); - } -}; - - - template<typename Dimensions, typename LhsXprType, typename RhsXprType> struct traits<TensorContractionOp<Dimensions, LhsXprType, RhsXprType> > { // Type promotion to handle the case where the types of the lhs and the rhs are different. typedef typename internal::promote_storage_type<typename LhsXprType::Scalar, typename RhsXprType::Scalar>::ret Scalar; - typedef typename internal::packet_traits<Scalar>::type Packet; typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind, typename traits<RhsXprType>::StorageKind>::ret StorageKind; typedef typename promote_index_type<typename traits<LhsXprType>::Index, @@ -428,11 +75,8 @@ class TensorContractionOp : public TensorBase<TensorContractionOp<Indices, LhsXp { public: typedef typename Eigen::internal::traits<TensorContractionOp>::Scalar Scalar; - typedef typename Eigen::internal::traits<TensorContractionOp>::Packet Packet; typedef typename internal::promote_storage_type<typename LhsXprType::CoeffReturnType, typename RhsXprType::CoeffReturnType>::ret CoeffReturnType; - typedef typename internal::promote_storage_type<typename LhsXprType::PacketReturnType, - typename RhsXprType::PacketReturnType>::ret PacketReturnType; typedef typename Eigen::internal::nested<TensorContractionOp>::type Nested; typedef typename Eigen::internal::traits<TensorContractionOp>::StorageKind StorageKind; typedef typename Eigen::internal::traits<TensorContractionOp>::Index Index; @@ -470,16 +114,16 @@ struct TensorContractionEvaluatorBase typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType; typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; - typedef typename XprType::Packet Packet; typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; enum { IsAligned = true, - PacketAccess = (internal::packet_traits<Scalar>::size > 1), + PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1), Layout = TensorEvaluator<LeftArgType, Device>::Layout, CoordAccess = false, // to be implemented + RawAccess = true }; // Most of the code is assuming that both input tensors are ColMajor. If the @@ -498,8 +142,6 @@ struct TensorContractionEvaluatorBase static const int ContractDims = internal::array_size<Indices>::value; static const int NumDims = max_n_1<LDims + RDims - 2 * ContractDims>::size; - typedef array<Index, LDims> left_dim_mapper_t; - typedef array<Index, RDims> right_dim_mapper_t; typedef array<Index, ContractDims> contract_t; typedef array<Index, max_n_1<LDims - ContractDims>::size> left_nocontract_t; typedef array<Index, max_n_1<RDims - ContractDims>::size> right_nocontract_t; @@ -546,8 +188,21 @@ struct TensorContractionEvaluatorBase // We need to flip all the pairs of contracting indices as well as // reversing the dimensions. for (int i = 0; i < ContractDims; i++) { - eval_op_indices[i].first = LDims - 1 - op.indices()[i].second; - eval_op_indices[i].second = RDims - 1 - op.indices()[i].first; + eval_op_indices[i].first = LDims - 1 - op.indices()[ContractDims - 1 - i].second; + eval_op_indices[i].second = RDims - 1 - op.indices()[ContractDims - 1 - i].first; + } + } + + // Check for duplicate axes and make sure the first index in eval_op_indices + // is increasing. Using O(n^2) sorting is OK since ContractDims is small + for (int i = 0; i < ContractDims; i++) { + for (int j = i + 1; j < ContractDims; j++) { + eigen_assert(eval_op_indices[j].first != eval_op_indices[i].first && + eval_op_indices[j].second != eval_op_indices[i].second && + "contraction axes should be unique"); + if (eval_op_indices[j].first < eval_op_indices[i].first) { + numext::swap(eval_op_indices[j], eval_op_indices[i]); + } } } @@ -731,7 +386,7 @@ struct TensorContractionEvaluatorBase } template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment> - void evalGemv(Scalar* buffer) const { + EIGEN_DEVICE_FUNC void evalGemv(Scalar* buffer) const { const Index rows = m_i_size; const Index cols = m_k_size; @@ -739,19 +394,21 @@ struct TensorContractionEvaluatorBase typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar; typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator; typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator; - const Index lhs_packet_size = internal::packet_traits<LhsScalar>::size; - const Index rhs_packet_size = internal::packet_traits<RhsScalar>::size; + const Index lhs_packet_size = internal::unpacket_traits<typename LeftEvaluator::PacketReturnType>::size; + const Index rhs_packet_size = internal::unpacket_traits<typename RightEvaluator::PacketReturnType>::size; + const int lhs_alignment = LeftEvaluator::IsAligned ? Aligned : Unaligned; + const int rhs_alignment = RightEvaluator::IsAligned ? Aligned : Unaligned; typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs, LeftEvaluator, left_nocontract_t, contract_t, lhs_packet_size, lhs_inner_dim_contiguous, - false, Unaligned> LhsMapper; + false, lhs_alignment> LhsMapper; typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs, RightEvaluator, right_nocontract_t, contract_t, rhs_packet_size, rhs_inner_dim_contiguous, - rhs_inner_dim_reordered, Unaligned> RhsMapper; + rhs_inner_dim_reordered, rhs_alignment> RhsMapper; LhsMapper lhs(m_leftImpl, m_left_nocontract_strides, m_i_strides, m_left_contracting_strides, m_k_strides); @@ -784,11 +441,11 @@ struct TensorContractionEvaluatorBase } template<int LoadMode> - EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { - return internal::ploadt<Packet, LoadMode>(m_result + index); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { + return internal::ploadt<PacketReturnType, LoadMode>(m_result + index); } - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const { return m_result; } protected: // Prevent assignment @@ -829,10 +486,9 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType; typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; - typedef typename XprType::Packet Packet; typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; enum { Layout = TensorEvaluator<LeftArgType, Device>::Layout, @@ -853,9 +509,6 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value; static const int ContractDims = internal::array_size<Indices>::value; - typedef array<Index, LDims> left_dim_mapper_t; - typedef array<Index, RDims> right_dim_mapper_t; - typedef array<Index, ContractDims> contract_t; typedef array<Index, max_n_1<LDims - ContractDims>::size> left_nocontract_t; typedef array<Index, max_n_1<RDims - ContractDims>::size> right_nocontract_t; @@ -870,7 +523,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT Base(op, device) { } template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment> - void evalProduct(Scalar* buffer) const { + EIGEN_DEVICE_FUNC void evalProduct(Scalar* buffer) const { if (this->m_j_size == 1) { this->template evalGemv<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer); return; @@ -904,8 +557,8 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator; typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator; - const Index lhs_packet_size = internal::packet_traits<LhsScalar>::size; - const Index rhs_packet_size = internal::packet_traits<RhsScalar>::size; + const Index lhs_packet_size = internal::unpacket_traits<typename LeftEvaluator::PacketReturnType>::size; + const Index rhs_packet_size = internal::unpacket_traits<typename RightEvaluator::PacketReturnType>::size; typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs, LeftEvaluator, left_nocontract_t, @@ -936,10 +589,8 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT OutputMapper output(buffer, m); - typedef typename internal::gemm_blocking_space<ColMajor, LhsScalar, RhsScalar, Dynamic, Dynamic, Dynamic> BlockingType; - // Sizes of the blocks to load in cache. See the Goto paper for details. - BlockingType blocking(m, n, k, 1, true); + internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index, internal::ShardByCol> blocking(k, m, n, 1); const Index kc = blocking.kc(); const Index mc = numext::mini(m, blocking.mc()); const Index nc = numext::mini(n, blocking.nc()); |