aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/util
diff options
context:
space:
mode:
authorGravatar Everton Constantino <everton.constantino@ibm.com>2020-05-20 14:01:02 -0300
committerGravatar Everton Constantino <everton.constantino@ibm.com>2020-09-02 18:21:36 -0300
commit6fe88a3c9db27c00a3817e391cf70116451bf046 (patch)
tree4d62e610f6fdb0c3f5f571f70cf1f984fbeff907 /Eigen/src/Core/util
parent6568856275de8bfcdd74e1de8fdf8656aca5ddb4 (diff)
MatrixProuct enhancements:
- Changes to Altivec/MatrixProduct Adapting code to gcc 10. Generic code style and performance enhancements. Adding PanelMode support. Adding stride/offset support. Enabling float64, std::complex and std::complex. Fixing lack of symm_pack. Enabling mixedtypes. - Adding std::complex tests to blasutil. - Adding an implementation of storePacketBlock when Incr!= 1.
Diffstat (limited to 'Eigen/src/Core/util')
-rwxr-xr-xEigen/src/Core/util/BlasUtil.h71
1 files changed, 71 insertions, 0 deletions
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
index 01e647f17..a90e57446 100755
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -391,6 +391,77 @@ public:
return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);
}
+ // storePacketBlock_helper defines a way to access values inside the PacketBlock, this is essentially required by the Complex types.
+ template<typename SubPacket, typename ScalarT, int n, int idx>
+ struct storePacketBlock_helper
+ {
+ storePacketBlock_helper<SubPacket, ScalarT, n, idx-1> spbh;
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {
+ spbh.store(sup, i,j,block);
+ for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)
+ {
+ ScalarT *v = &sup->operator()(i+l, j+idx);
+ *v = block.packet[idx][l];
+ }
+ }
+ };
+
+ template<typename SubPacket, int n, int idx>
+ struct storePacketBlock_helper<SubPacket, std::complex<float>, n, idx>
+ {
+ storePacketBlock_helper<SubPacket, std::complex<float>, n, idx-1> spbh;
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {
+ spbh.store(sup,i,j,block);
+ for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)
+ {
+ std::complex<float> *v = &sup->operator()(i+l, j+idx);
+ v->real(block.packet[idx].v[2*l+0]);
+ v->imag(block.packet[idx].v[2*l+1]);
+ }
+ }
+ };
+
+ template<typename SubPacket, int n, int idx>
+ struct storePacketBlock_helper<SubPacket, std::complex<double>, n, idx>
+ {
+ storePacketBlock_helper<SubPacket, std::complex<double>, n, idx-1> spbh;
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {
+ spbh.store(sup,i,j,block);
+ for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)
+ {
+ std::complex<double> *v = &sup->operator()(i+l, j+idx);
+ v->real(block.packet[idx].v[2*l+0]);
+ v->imag(block.packet[idx].v[2*l+1]);
+ }
+ }
+ };
+
+ template<typename SubPacket, typename ScalarT, int n>
+ struct storePacketBlock_helper<SubPacket, ScalarT, n, -1>
+ {
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const {
+ }
+ };
+
+ template<typename SubPacket, int n>
+ struct storePacketBlock_helper<SubPacket, std::complex<float>, n, -1>
+ {
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const {
+ }
+ };
+
+ template<typename SubPacket, int n>
+ struct storePacketBlock_helper<SubPacket, std::complex<double>, n, -1>
+ {
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const {
+ }
+ };
+ // This function stores a PacketBlock on m_data, this approach is really quite slow compare to Incr=1 and should be avoided when possible.
+ template<typename SubPacket, int n>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketBlock(Index i, Index j, const PacketBlock<SubPacket, n>&block) const {
+ storePacketBlock_helper<SubPacket, Scalar, n, n-1> spb;
+ spb.store(this, i,j,block);
+ }
protected:
Scalar* EIGEN_RESTRICT m_data;
const Index m_stride;