diff options
author | Everton Constantino <everton.constantino@ibm.com> | 2020-05-20 14:01:02 -0300 |
---|---|---|
committer | Everton Constantino <everton.constantino@ibm.com> | 2020-09-02 18:21:36 -0300 |
commit | 6fe88a3c9db27c00a3817e391cf70116451bf046 (patch) | |
tree | 4d62e610f6fdb0c3f5f571f70cf1f984fbeff907 /Eigen/src/Core/util | |
parent | 6568856275de8bfcdd74e1de8fdf8656aca5ddb4 (diff) |
MatrixProuct enhancements:
- Changes to Altivec/MatrixProduct
Adapting code to gcc 10.
Generic code style and performance enhancements.
Adding PanelMode support.
Adding stride/offset support.
Enabling float64, std::complex and std::complex.
Fixing lack of symm_pack.
Enabling mixedtypes.
- Adding std::complex tests to blasutil.
- Adding an implementation of storePacketBlock when Incr!= 1.
Diffstat (limited to 'Eigen/src/Core/util')
-rwxr-xr-x | Eigen/src/Core/util/BlasUtil.h | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index 01e647f17..a90e57446 100755 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -391,6 +391,77 @@ public: return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride); } + // storePacketBlock_helper defines a way to access values inside the PacketBlock, this is essentially required by the Complex types. + template<typename SubPacket, typename ScalarT, int n, int idx> + struct storePacketBlock_helper + { + storePacketBlock_helper<SubPacket, ScalarT, n, idx-1> spbh; + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const { + spbh.store(sup, i,j,block); + for(int l = 0; l < unpacket_traits<SubPacket>::size; l++) + { + ScalarT *v = &sup->operator()(i+l, j+idx); + *v = block.packet[idx][l]; + } + } + }; + + template<typename SubPacket, int n, int idx> + struct storePacketBlock_helper<SubPacket, std::complex<float>, n, idx> + { + storePacketBlock_helper<SubPacket, std::complex<float>, n, idx-1> spbh; + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const { + spbh.store(sup,i,j,block); + for(int l = 0; l < unpacket_traits<SubPacket>::size; l++) + { + std::complex<float> *v = &sup->operator()(i+l, j+idx); + v->real(block.packet[idx].v[2*l+0]); + v->imag(block.packet[idx].v[2*l+1]); + } + } + }; + + template<typename SubPacket, int n, int idx> + struct storePacketBlock_helper<SubPacket, std::complex<double>, n, idx> + { + storePacketBlock_helper<SubPacket, std::complex<double>, n, idx-1> spbh; + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const { + spbh.store(sup,i,j,block); + for(int l = 0; l < unpacket_traits<SubPacket>::size; l++) + { + std::complex<double> *v = &sup->operator()(i+l, j+idx); + v->real(block.packet[idx].v[2*l+0]); + v->imag(block.packet[idx].v[2*l+1]); + } + } + }; + + template<typename SubPacket, typename ScalarT, int n> + struct storePacketBlock_helper<SubPacket, ScalarT, n, -1> + { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const { + } + }; + + template<typename SubPacket, int n> + struct storePacketBlock_helper<SubPacket, std::complex<float>, n, -1> + { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const { + } + }; + + template<typename SubPacket, int n> + struct storePacketBlock_helper<SubPacket, std::complex<double>, n, -1> + { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const { + } + }; + // This function stores a PacketBlock on m_data, this approach is really quite slow compare to Incr=1 and should be avoided when possible. + template<typename SubPacket, int n> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketBlock(Index i, Index j, const PacketBlock<SubPacket, n>&block) const { + storePacketBlock_helper<SubPacket, Scalar, n, n-1> spb; + spb.store(this, i,j,block); + } protected: Scalar* EIGEN_RESTRICT m_data; const Index m_stride; |