diff options
author | Chip-Kerchner <chip.kerchner@ibm.com> | 2021-02-24 17:40:34 -0600 |
---|---|---|
committer | Chip Kerchner <chip.kerchner@ibm.com> | 2021-02-24 23:43:30 +0000 |
commit | c31ead8a15e7ddc91b4d02962164639f7ce9f4dc (patch) | |
tree | 5458ad068f40998451565a882973767f15879261 /Eigen/src/Core/arch | |
parent | f44197fabdfeb6008fba543e0a12959dc20735ae (diff) |
Having forward template function declarations in a P10 file causes bad code in certain situations.
Diffstat (limited to 'Eigen/src/Core/arch')
-rw-r--r-- | Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h | 46 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h | 46 |
2 files changed, 46 insertions, 46 deletions
diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h b/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h index 87b60c22c..a1799c061 100644 --- a/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +++ b/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h @@ -2,6 +2,52 @@ namespace Eigen { namespace internal { +template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows> +EIGEN_STRONG_INLINE void gemm_extra_col( + const DataMapper& res, + const Scalar *lhs_base, + const Scalar *rhs_base, + Index depth, + Index strideA, + Index offsetA, + Index row, + Index col, + Index remaining_rows, + Index remaining_cols, + const Packet& pAlpha); + +template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows> +EIGEN_STRONG_INLINE void gemm_extra_row( + const DataMapper& res, + const Scalar *lhs_base, + const Scalar *rhs_base, + Index depth, + Index strideA, + Index offsetA, + Index row, + Index col, + Index cols, + Index remaining_rows, + const Packet& pAlpha, + const Packet& pMask); + +template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accCols> +EIGEN_STRONG_INLINE void gemm_unrolled_col( + const DataMapper& res, + const Scalar *lhs_base, + const Scalar *rhs_base, + Index depth, + Index strideA, + Index offsetA, + Index& row, + Index rows, + Index col, + Index remaining_cols, + const Packet& pAlpha); + +template<typename Packet> +EIGEN_STRONG_INLINE Packet bmask(const int remaining_rows); + const static Packet16uc p16uc_SETCOMPLEX32_FIRST = { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h b/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h index 888f7cd08..bfee9ee92 100644 --- a/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +++ b/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h @@ -238,52 +238,6 @@ EIGEN_STRONG_INLINE void ploadRhsMMA<double, __vector_pair>(const double *rhs, _ __builtin_vsx_assemble_pair(&rhsV, (__vector unsigned char)(*(((Packet2d *)rhs) + 1)), (__vector unsigned char)(*((Packet2d *)rhs))); } -template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows> -EIGEN_STRONG_INLINE void gemm_extra_col( - const DataMapper& res, - const Scalar *lhs_base, - const Scalar *rhs_base, - Index depth, - Index strideA, - Index offsetA, - Index row, - Index col, - Index remaining_rows, - Index remaining_cols, - const Packet& pAlpha); - -template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows> -EIGEN_STRONG_INLINE void gemm_extra_row( - const DataMapper& res, - const Scalar *lhs_base, - const Scalar *rhs_base, - Index depth, - Index strideA, - Index offsetA, - Index row, - Index col, - Index cols, - Index remaining_rows, - const Packet& pAlpha, - const Packet& pMask); - -template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accCols> -EIGEN_STRONG_INLINE void gemm_unrolled_col( - const DataMapper& res, - const Scalar *lhs_base, - const Scalar *rhs_base, - Index depth, - Index strideA, - Index offsetA, - Index& row, - Index rows, - Index col, - Index remaining_cols, - const Packet& pAlpha); - -template<typename Packet> -EIGEN_STRONG_INLINE Packet bmask(const int remaining_rows); - #define MICRO_MMA_DST \ __vector_quad *accZero0, __vector_quad *accZero1, __vector_quad *accZero2, \ __vector_quad *accZero3, __vector_quad *accZero4, __vector_quad *accZero5, \ |