From c31ead8a15e7ddc91b4d02962164639f7ce9f4dc Mon Sep 17 00:00:00 2001 From: Chip-Kerchner Date: Wed, 24 Feb 2021 17:40:34 -0600 Subject: Having forward template function declarations in a P10 file causes bad code in certain situations. --- Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h | 46 +++++++++++++++++++++++ Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h | 46 ----------------------- 2 files changed, 46 insertions(+), 46 deletions(-) (limited to 'Eigen/src/Core/arch') diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h b/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h index 87b60c22c..a1799c061 100644 --- a/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +++ b/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h @@ -2,6 +2,52 @@ namespace Eigen { namespace internal { +template +EIGEN_STRONG_INLINE void gemm_extra_col( + const DataMapper& res, + const Scalar *lhs_base, + const Scalar *rhs_base, + Index depth, + Index strideA, + Index offsetA, + Index row, + Index col, + Index remaining_rows, + Index remaining_cols, + const Packet& pAlpha); + +template +EIGEN_STRONG_INLINE void gemm_extra_row( + const DataMapper& res, + const Scalar *lhs_base, + const Scalar *rhs_base, + Index depth, + Index strideA, + Index offsetA, + Index row, + Index col, + Index cols, + Index remaining_rows, + const Packet& pAlpha, + const Packet& pMask); + +template +EIGEN_STRONG_INLINE void gemm_unrolled_col( + const DataMapper& res, + const Scalar *lhs_base, + const Scalar *rhs_base, + Index depth, + Index strideA, + Index offsetA, + Index& row, + Index rows, + Index col, + Index remaining_cols, + const Packet& pAlpha); + +template +EIGEN_STRONG_INLINE Packet bmask(const int remaining_rows); + const static Packet16uc p16uc_SETCOMPLEX32_FIRST = { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h b/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h index 888f7cd08..bfee9ee92 100644 --- a/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +++ b/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h @@ -238,52 +238,6 @@ EIGEN_STRONG_INLINE void ploadRhsMMA(const double *rhs, _ __builtin_vsx_assemble_pair(&rhsV, (__vector unsigned char)(*(((Packet2d *)rhs) + 1)), (__vector unsigned char)(*((Packet2d *)rhs))); } -template -EIGEN_STRONG_INLINE void gemm_extra_col( - const DataMapper& res, - const Scalar *lhs_base, - const Scalar *rhs_base, - Index depth, - Index strideA, - Index offsetA, - Index row, - Index col, - Index remaining_rows, - Index remaining_cols, - const Packet& pAlpha); - -template -EIGEN_STRONG_INLINE void gemm_extra_row( - const DataMapper& res, - const Scalar *lhs_base, - const Scalar *rhs_base, - Index depth, - Index strideA, - Index offsetA, - Index row, - Index col, - Index cols, - Index remaining_rows, - const Packet& pAlpha, - const Packet& pMask); - -template -EIGEN_STRONG_INLINE void gemm_unrolled_col( - const DataMapper& res, - const Scalar *lhs_base, - const Scalar *rhs_base, - Index depth, - Index strideA, - Index offsetA, - Index& row, - Index rows, - Index col, - Index remaining_cols, - const Packet& pAlpha); - -template -EIGEN_STRONG_INLINE Packet bmask(const int remaining_rows); - #define MICRO_MMA_DST \ __vector_quad *accZero0, __vector_quad *accZero1, __vector_quad *accZero2, \ __vector_quad *accZero3, __vector_quad *accZero4, __vector_quad *accZero5, \ -- cgit v1.2.3