diff options
author | 2014-03-27 16:03:03 -0700 | |
---|---|---|
committer | 2014-03-27 16:03:03 -0700 | |
commit | ee866790967ab4ab11a62987dd21bac66237cba9 (patch) | |
tree | bec58bf7d0cd24b8845ec1d1bb641a3c5b7699c3 | |
parent | 729363114f0c7ea8ff3f8ddd8f6a83335b0f3909 (diff) |
Introduced pscatter/pgather packet primitives. They will be used to optimize the loop peeling code of the block-panel matrix multiplication kernel.
-rwxr-xr-x | Eigen/src/Core/GenericPacketMath.h | 6 | ||||
-rw-r--r-- | test/packetmath.cpp | 39 |
2 files changed, 43 insertions, 2 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index f9ddf4718..03e7f410c 100755 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -217,6 +217,12 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore( /** \internal copy the packet \a from to \a *to, (un-aligned store) */ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from) +{ (*to) = from; } + + template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, int /*stride*/) + { return ploadu<Packet>(from); } + + template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, int /*stride*/) { (*to) = from; } /** \internal tries to do cache prefetching of \a addr */ diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 735af7017..7deefe890 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -356,8 +356,38 @@ template<typename Scalar> void packetmath_complex() internal::pstore(pval,internal::pcplxflip(internal::pload<Packet>(data1))); VERIFY(areApprox(ref, pval, PacketSize) && "pcplxflip"); } - - +} + +template<typename Scalar> void packetmath_scatter_gather() { + typedef typename internal::packet_traits<Scalar>::type Packet; + typedef typename NumTraits<Scalar>::Real RealScalar; + const int PacketSize = internal::packet_traits<Scalar>::size; + Scalar data1[PacketSize]; + RealScalar refvalue = 0; + for (int i=0; i<PacketSize; ++i) { + data1[i] = internal::random<Scalar>()/RealScalar(PacketSize); + } + Scalar buffer[PacketSize*11]; + memset(buffer, 0, 11*sizeof(Packet)); + Packet packet = internal::pload<Packet>(data1); + internal::pscatter<Scalar, Packet>(buffer, packet, 11); + + for (int i = 0; i < PacketSize*11; ++i) { + if ((i%11) == 0) { + VERIFY(isApproxAbs(buffer[i], data1[i/11], refvalue)); + } else { + VERIFY(isApproxAbs(buffer[i], Scalar(0), refvalue)); + } + } + + for (int i=0; i<PacketSize*7; ++i) { + buffer[i] = internal::random<Scalar>()/RealScalar(PacketSize); + } + packet = internal::pgather<Scalar, Packet>(buffer, 7); + internal::pstore(data1, packet); + for (int i = 0; i < PacketSize; ++i) { + VERIFY(isApproxAbs(data1[i], buffer[i*7], refvalue)); + } } void test_packetmath() @@ -378,5 +408,10 @@ void test_packetmath() CALL_SUBTEST_1( packetmath_complex<std::complex<float> >() ); CALL_SUBTEST_2( packetmath_complex<std::complex<double> >() ); + + CALL_SUBTEST_1( packetmath_scatter_gather<float>() ); + CALL_SUBTEST_2( packetmath_scatter_gather<double>() ); + CALL_SUBTEST_3( packetmath_scatter_gather<std::complex<float> >() ); + CALL_SUBTEST_3( packetmath_scatter_gather<std::complex<double> >() ); } } |