aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch
diff options
context:
space:
mode:
authorGravatar Chip-Kerchner <chip.kerchner@ibm.com>2021-02-24 17:40:34 -0600
committerGravatar Chip Kerchner <chip.kerchner@ibm.com>2021-02-24 23:43:30 +0000
commitc31ead8a15e7ddc91b4d02962164639f7ce9f4dc (patch)
tree5458ad068f40998451565a882973767f15879261 /Eigen/src/Core/arch
parentf44197fabdfeb6008fba543e0a12959dc20735ae (diff)
Having forward template function declarations in a P10 file causes bad code in certain situations.
Diffstat (limited to 'Eigen/src/Core/arch')
-rw-r--r--Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h46
-rw-r--r--Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h46
2 files changed, 46 insertions, 46 deletions
diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h b/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h
index 87b60c22c..a1799c061 100644
--- a/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h
+++ b/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h
@@ -2,6 +2,52 @@ namespace Eigen {
namespace internal {
+template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows>
+EIGEN_STRONG_INLINE void gemm_extra_col(
+ const DataMapper& res,
+ const Scalar *lhs_base,
+ const Scalar *rhs_base,
+ Index depth,
+ Index strideA,
+ Index offsetA,
+ Index row,
+ Index col,
+ Index remaining_rows,
+ Index remaining_cols,
+ const Packet& pAlpha);
+
+template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows>
+EIGEN_STRONG_INLINE void gemm_extra_row(
+ const DataMapper& res,
+ const Scalar *lhs_base,
+ const Scalar *rhs_base,
+ Index depth,
+ Index strideA,
+ Index offsetA,
+ Index row,
+ Index col,
+ Index cols,
+ Index remaining_rows,
+ const Packet& pAlpha,
+ const Packet& pMask);
+
+template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accCols>
+EIGEN_STRONG_INLINE void gemm_unrolled_col(
+ const DataMapper& res,
+ const Scalar *lhs_base,
+ const Scalar *rhs_base,
+ Index depth,
+ Index strideA,
+ Index offsetA,
+ Index& row,
+ Index rows,
+ Index col,
+ Index remaining_cols,
+ const Packet& pAlpha);
+
+template<typename Packet>
+EIGEN_STRONG_INLINE Packet bmask(const int remaining_rows);
+
const static Packet16uc p16uc_SETCOMPLEX32_FIRST = { 0, 1, 2, 3,
16, 17, 18, 19,
4, 5, 6, 7,
diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h b/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h
index 888f7cd08..bfee9ee92 100644
--- a/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h
+++ b/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h
@@ -238,52 +238,6 @@ EIGEN_STRONG_INLINE void ploadRhsMMA<double, __vector_pair>(const double *rhs, _
__builtin_vsx_assemble_pair(&rhsV, (__vector unsigned char)(*(((Packet2d *)rhs) + 1)), (__vector unsigned char)(*((Packet2d *)rhs)));
}
-template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows>
-EIGEN_STRONG_INLINE void gemm_extra_col(
- const DataMapper& res,
- const Scalar *lhs_base,
- const Scalar *rhs_base,
- Index depth,
- Index strideA,
- Index offsetA,
- Index row,
- Index col,
- Index remaining_rows,
- Index remaining_cols,
- const Packet& pAlpha);
-
-template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows>
-EIGEN_STRONG_INLINE void gemm_extra_row(
- const DataMapper& res,
- const Scalar *lhs_base,
- const Scalar *rhs_base,
- Index depth,
- Index strideA,
- Index offsetA,
- Index row,
- Index col,
- Index cols,
- Index remaining_rows,
- const Packet& pAlpha,
- const Packet& pMask);
-
-template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accCols>
-EIGEN_STRONG_INLINE void gemm_unrolled_col(
- const DataMapper& res,
- const Scalar *lhs_base,
- const Scalar *rhs_base,
- Index depth,
- Index strideA,
- Index offsetA,
- Index& row,
- Index rows,
- Index col,
- Index remaining_cols,
- const Packet& pAlpha);
-
-template<typename Packet>
-EIGEN_STRONG_INLINE Packet bmask(const int remaining_rows);
-
#define MICRO_MMA_DST \
__vector_quad *accZero0, __vector_quad *accZero1, __vector_quad *accZero2, \
__vector_quad *accZero3, __vector_quad *accZero4, __vector_quad *accZero5, \