diff options
Diffstat (limited to 'Eigen/src/Core/ProductEvaluators.h')
-rw-r--r-- | Eigen/src/Core/ProductEvaluators.h | 191 |
1 files changed, 107 insertions, 84 deletions
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index d9fd888cf..63faca822 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -35,22 +35,28 @@ struct evaluator<Product<Lhs, Rhs, Options> > EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} }; -// Catch scalar * ( A * B ) and transform it to (A*scalar) * B +// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B" // TODO we should apply that rule only if that's really helpful -template<typename Lhs, typename Rhs, typename Scalar> -struct evaluator_assume_aliasing<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > > +template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1> +struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>, + const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>, + const Product<Lhs, Rhs, DefaultProduct> > > { static const bool value = true; }; -template<typename Lhs, typename Rhs, typename Scalar> -struct evaluator<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > > - : public evaluator<Product<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,const Lhs>, Rhs, DefaultProduct> > +template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1> +struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>, + const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>, + const Product<Lhs, Rhs, DefaultProduct> > > + : public evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > { - typedef CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > XprType; - typedef evaluator<Product<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,const Lhs>, Rhs, DefaultProduct> > Base; - + typedef CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>, + const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>, + const Product<Lhs, Rhs, DefaultProduct> > XprType; + typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base; + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) - : Base(xpr.functor().m_other * xpr.nestedExpression().lhs() * xpr.nestedExpression().rhs()) + : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs()) {} }; @@ -122,13 +128,17 @@ protected: PlainObject m_result; }; +// The following three shortcuts are enabled only if the scalar types match excatly. +// TODO: we could enable them for different scalar types when the product is not vectorized. + // Dense = Product template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> -struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar>, Dense2Dense, - typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> +struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar,Scalar>, Dense2Dense, + typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> { typedef Product<Lhs,Rhs,Options> SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &) + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &) { // FIXME shall we handle nested_eval here? generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs()); @@ -137,11 +147,12 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scal // Dense += Product template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> -struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar>, Dense2Dense, - typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> +struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar,Scalar>, Dense2Dense, + typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> { typedef Product<Lhs,Rhs,Options> SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar> &) + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &) { // FIXME shall we handle nested_eval here? generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs()); @@ -150,11 +161,12 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op< // Dense -= Product template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> -struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar>, Dense2Dense, - typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> +struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar,Scalar>, Dense2Dense, + typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> { typedef Product<Lhs,Rhs,Options> SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar> &) + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &) { // FIXME shall we handle nested_eval here? generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs()); @@ -165,55 +177,57 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op< // Dense ?= scalar * Product // TODO we should apply that rule if that's really helpful // for instance, this is not good for inner products -template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis> -struct Assignment<DstXprType, CwiseUnaryOp<internal::scalar_multiple_op<ScalarBis>, - const Product<Lhs,Rhs,DefaultProduct> >, AssignFunc, Dense2Dense, Scalar> +template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis, typename Plain> +struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>, const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>, + const Product<Lhs,Rhs,DefaultProduct> >, AssignFunc, Dense2Dense> { - typedef CwiseUnaryOp<internal::scalar_multiple_op<ScalarBis>, - const Product<Lhs,Rhs,DefaultProduct> > SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func) + typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>, + const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>, + const Product<Lhs,Rhs,DefaultProduct> > SrcXprType; + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func) { - call_assignment_no_alias(dst, (src.functor().m_other * src.nestedExpression().lhs())*src.nestedExpression().rhs(), func); + call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func); } }; //---------------------------------------- // Catch "Dense ?= xpr + Product<>" expression to save one temporary // FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct -// TODO enable it for "Dense ?= xpr - Product<>" as well. template<typename OtherXpr, typename Lhs, typename Rhs> -struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar>, const OtherXpr, +struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr, const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > { static const bool value = true; }; -template<typename DstXprType, typename OtherXpr, typename ProductType, typename Scalar, typename Func1, typename Func2> -struct assignment_from_xpr_plus_product +template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2> +struct assignment_from_xpr_op_product { - typedef CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr, const ProductType> SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const Func1& func) + template<typename SrcXprType, typename InitialFunc> + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/) { - call_assignment_no_alias(dst, src.lhs(), func); + call_assignment_no_alias(dst, src.lhs(), Func1()); call_assignment_no_alias(dst, src.rhs(), Func2()); } }; -template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> -struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr, - const Product<Lhs,Rhs,DefaultProduct> >, internal::assign_op<Scalar>, Dense2Dense> - : assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, Scalar, internal::assign_op<Scalar>, internal::add_assign_op<Scalar> > -{}; -template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> -struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr, - const Product<Lhs,Rhs,DefaultProduct> >, internal::add_assign_op<Scalar>, Dense2Dense> - : assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, Scalar, internal::add_assign_op<Scalar>, internal::add_assign_op<Scalar> > -{}; -template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> -struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr, - const Product<Lhs,Rhs,DefaultProduct> >, internal::sub_assign_op<Scalar>, Dense2Dense> - : assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, Scalar, internal::sub_assign_op<Scalar>, internal::sub_assign_op<Scalar> > -{}; +#define EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(ASSIGN_OP,BINOP,ASSIGN_OP2) \ + template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> \ + struct Assignment<DstXprType, CwiseBinaryOp<internal::BINOP<OtherScalar,ProdScalar>, const OtherXpr, \ + const Product<Lhs,Rhs,DefaultProduct> >, internal::ASSIGN_OP<DstScalar,SrcScalar>, Dense2Dense> \ + : assignment_from_xpr_op_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, internal::ASSIGN_OP<DstScalar,OtherScalar>, internal::ASSIGN_OP2<DstScalar,ProdScalar> > \ + {} + +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_sum_op,add_assign_op); +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_sum_op,add_assign_op); +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_sum_op,sub_assign_op); + +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_difference_op,sub_assign_op); +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_difference_op,sub_assign_op); +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_difference_op,add_assign_op); + //---------------------------------------- template<typename Lhs, typename Rhs> @@ -243,7 +257,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct> // Column major result template<typename Dst, typename Lhs, typename Rhs, typename Func> -EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&) +void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&) { evaluator<Rhs> rhsEval(rhs); typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs); @@ -251,12 +265,12 @@ EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, cons // FIXME not very good if rhs is real and lhs complex while alpha is real too const Index cols = dst.cols(); for (Index j=0; j<cols; ++j) - func(dst.col(j), rhsEval.coeff(0,j) * actual_lhs); + func(dst.col(j), rhsEval.coeff(Index(0),j) * actual_lhs); } // Row major result template<typename Dst, typename Lhs, typename Rhs, typename Func> -EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&) +void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&) { evaluator<Lhs> lhsEval(lhs); typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs); @@ -264,7 +278,7 @@ EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, cons // FIXME not very good if lhs is real and rhs complex while alpha is real too const Index rows = dst.rows(); for (Index i=0; i<rows; ++i) - func(dst.row(i), lhsEval.coeff(i,0) * actual_rhs); + func(dst.row(i), lhsEval.coeff(i,Index(0)) * actual_rhs); } template<typename Lhs, typename Rhs> @@ -319,19 +333,19 @@ struct generic_product_impl_base typedef typename Product<Lhs,Rhs>::Scalar Scalar; template<typename Dst> - static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); } template<typename Dst> - static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); } template<typename Dst> - static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); } template<typename Dst> - static void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); } }; @@ -345,7 +359,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> typedef typename internal::conditional<int(Side)==OnTheRight,Lhs,Rhs>::type MatrixType; template<typename Dest> - static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { internal::gemv_dense_selector<Side, (int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor, @@ -360,25 +374,25 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> typedef typename Product<Lhs,Rhs>::Scalar Scalar; template<typename Dst> - static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { // Same as: dst.noalias() = lhs.lazyProduct(rhs); // but easier on the compiler side - call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<Scalar>()); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<typename Dst::Scalar,Scalar>()); } template<typename Dst> - static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { // dst.noalias() += lhs.lazyProduct(rhs); - call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<Scalar>()); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>()); } template<typename Dst> - static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { // dst.noalias() -= lhs.lazyProduct(rhs); - call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<Scalar>()); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>()); } // template<typename Dst> @@ -423,6 +437,18 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost); EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); +#if 0 + std::cerr << "LhsOuterStrideBytes= " << LhsOuterStrideBytes << "\n"; + std::cerr << "RhsOuterStrideBytes= " << RhsOuterStrideBytes << "\n"; + std::cerr << "LhsAlignment= " << LhsAlignment << "\n"; + std::cerr << "RhsAlignment= " << RhsAlignment << "\n"; + std::cerr << "CanVectorizeLhs= " << CanVectorizeLhs << "\n"; + std::cerr << "CanVectorizeRhs= " << CanVectorizeRhs << "\n"; + std::cerr << "CanVectorizeInner= " << CanVectorizeInner << "\n"; + std::cerr << "EvalToRowMajor= " << EvalToRowMajor << "\n"; + std::cerr << "Alignment= " << Alignment << "\n"; + std::cerr << "Flags= " << Flags << "\n"; +#endif } // Everything below here is taken from CoeffBasedProduct.h @@ -473,15 +499,12 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value, - CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) - && (ColsAtCompileTime == Dynamic || ((ColsAtCompileTime % RhsVecPacketSize) == 0) ), - - CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) - && (RowsAtCompileTime == Dynamic || ((RowsAtCompileTime % LhsVecPacketSize) == 0) ), + CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1), + CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime!=1), EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 - : (RhsRowMajor && !CanVectorizeLhs), + : (bool(RhsRowMajor) && !CanVectorizeLhs), Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) | (EvalToRowMajor ? RowMajorBit : 0) @@ -492,8 +515,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)), RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)), - Alignment = CanVectorizeLhs ? (LhsOuterStrideBytes<0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment) - : CanVectorizeRhs ? (RhsOuterStrideBytes<0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment) + Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment) + : bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment) : 0, /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside @@ -519,8 +542,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, */ EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const { - const Index row = RowsAtCompileTime == 1 ? 0 : index; - const Index col = RowsAtCompileTime == 1 ? index : 0; + const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index; + const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0; return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); } @@ -538,8 +561,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, template<int LoadMode, typename PacketType> const PacketType packet(Index index) const { - const Index row = RowsAtCompileTime == 1 ? 0 : index; - const Index col = RowsAtCompileTime == 1 ? index : 0; + const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index; + const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0; return packet<LoadMode,PacketType>(row,col); } @@ -579,7 +602,7 @@ struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, Load static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) { etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res); - res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode,Packet>(UnrollingIndex-1, col), res); + res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res); } }; @@ -589,7 +612,7 @@ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, Load static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) { etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res); - res = pmadd(lhs.template packet<LoadMode,Packet>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res); + res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res); } }; @@ -598,7 +621,7 @@ struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode> { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) { - res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode,Packet>(0, col)); + res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col)); } }; @@ -607,7 +630,7 @@ struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode> { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) { - res = pmul(lhs.template packet<LoadMode,Packet>(row, 0), pset1<Packet>(rhs.coeff(0, col))); + res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col))); } }; @@ -616,7 +639,7 @@ struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> { static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) { - res = pset1<Packet>(0); + res = pset1<Packet>(typename unpacket_traits<Packet>::type(0)); } }; @@ -625,7 +648,7 @@ struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> { static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) { - res = pset1<Packet>(0); + res = pset1<Packet>(typename unpacket_traits<Packet>::type(0)); } }; @@ -634,7 +657,7 @@ struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) { - res = pset1<Packet>(0); + res = pset1<Packet>(typename unpacket_traits<Packet>::type(0)); for(Index i = 0; i < innerDim; ++i) res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode,Packet>(i, col), res); } @@ -645,7 +668,7 @@ struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) { - res = pset1<Packet>(0); + res = pset1<Packet>(typename unpacket_traits<Packet>::type(0)); for(Index i = 0; i < innerDim; ++i) res = pmadd(lhs.template packet<LoadMode,Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res); } @@ -730,7 +753,7 @@ template<typename MatrixType, typename DiagonalType, typename Derived, int Produ struct diagonal_product_evaluator_base : evaluator_base<Derived> { - typedef typename scalar_product_traits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar; + typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar; public: enum { CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost, |