aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-03-27 16:03:03 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-03-27 16:03:03 -0700
commitee866790967ab4ab11a62987dd21bac66237cba9 (patch)
treebec58bf7d0cd24b8845ec1d1bb641a3c5b7699c3
parent729363114f0c7ea8ff3f8ddd8f6a83335b0f3909 (diff)
Introduced pscatter/pgather packet primitives. They will be used to optimize the loop peeling code of the block-panel matrix multiplication kernel.
-rwxr-xr-xEigen/src/Core/GenericPacketMath.h6
-rw-r--r--test/packetmath.cpp39
2 files changed, 43 insertions, 2 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index f9ddf4718..03e7f410c 100755
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -217,6 +217,12 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(
/** \internal copy the packet \a from to \a *to, (un-aligned store) */
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
+{ (*to) = from; }
+
+ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, int /*stride*/)
+ { return ploadu<Packet>(from); }
+
+ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, int /*stride*/)
{ (*to) = from; }
/** \internal tries to do cache prefetching of \a addr */
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index 735af7017..7deefe890 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -356,8 +356,38 @@ template<typename Scalar> void packetmath_complex()
internal::pstore(pval,internal::pcplxflip(internal::pload<Packet>(data1)));
VERIFY(areApprox(ref, pval, PacketSize) && "pcplxflip");
}
-
-
+}
+
+template<typename Scalar> void packetmath_scatter_gather() {
+ typedef typename internal::packet_traits<Scalar>::type Packet;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ const int PacketSize = internal::packet_traits<Scalar>::size;
+ Scalar data1[PacketSize];
+ RealScalar refvalue = 0;
+ for (int i=0; i<PacketSize; ++i) {
+ data1[i] = internal::random<Scalar>()/RealScalar(PacketSize);
+ }
+ Scalar buffer[PacketSize*11];
+ memset(buffer, 0, 11*sizeof(Packet));
+ Packet packet = internal::pload<Packet>(data1);
+ internal::pscatter<Scalar, Packet>(buffer, packet, 11);
+
+ for (int i = 0; i < PacketSize*11; ++i) {
+ if ((i%11) == 0) {
+ VERIFY(isApproxAbs(buffer[i], data1[i/11], refvalue));
+ } else {
+ VERIFY(isApproxAbs(buffer[i], Scalar(0), refvalue));
+ }
+ }
+
+ for (int i=0; i<PacketSize*7; ++i) {
+ buffer[i] = internal::random<Scalar>()/RealScalar(PacketSize);
+ }
+ packet = internal::pgather<Scalar, Packet>(buffer, 7);
+ internal::pstore(data1, packet);
+ for (int i = 0; i < PacketSize; ++i) {
+ VERIFY(isApproxAbs(data1[i], buffer[i*7], refvalue));
+ }
}
void test_packetmath()
@@ -378,5 +408,10 @@ void test_packetmath()
CALL_SUBTEST_1( packetmath_complex<std::complex<float> >() );
CALL_SUBTEST_2( packetmath_complex<std::complex<double> >() );
+
+ CALL_SUBTEST_1( packetmath_scatter_gather<float>() );
+ CALL_SUBTEST_2( packetmath_scatter_gather<double>() );
+ CALL_SUBTEST_3( packetmath_scatter_gather<std::complex<float> >() );
+ CALL_SUBTEST_3( packetmath_scatter_gather<std::complex<double> >() );
}
}