aboutsummaryrefslogtreecommitdiffhomepage
path: root/test/packetmath.cpp
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-04-20 20:16:28 +0000
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-04-20 20:16:28 +0000
commit2f6ddaa25c605b3fdfb991ebd6c4e945c81f1067 (patch)
treea7e0a3067141f79156da0dae03f9a3c9055d40e2 /test/packetmath.cpp
parent00f6340153860ffb3e4776a3f42aa851b596a094 (diff)
Add partial vectorization for matrices and tensors of bool. This speeds up boolean operations on Tensors by up to 25x.
Benchmark numbers for the logical and of two NxN tensors: name old time/op new time/op delta BM_booleanAnd_1T/3 [using 1 threads] 14.6ns ± 0% 14.4ns ± 0% -0.96% BM_booleanAnd_1T/4 [using 1 threads] 20.5ns ±12% 9.0ns ± 0% -56.07% BM_booleanAnd_1T/7 [using 1 threads] 41.7ns ± 0% 10.5ns ± 0% -74.87% BM_booleanAnd_1T/8 [using 1 threads] 52.1ns ± 0% 10.1ns ± 0% -80.59% BM_booleanAnd_1T/10 [using 1 threads] 76.3ns ± 0% 13.8ns ± 0% -81.87% BM_booleanAnd_1T/15 [using 1 threads] 167ns ± 0% 16ns ± 0% -90.45% BM_booleanAnd_1T/16 [using 1 threads] 188ns ± 0% 16ns ± 0% -91.57% BM_booleanAnd_1T/31 [using 1 threads] 667ns ± 0% 34ns ± 0% -94.83% BM_booleanAnd_1T/32 [using 1 threads] 710ns ± 0% 35ns ± 0% -95.01% BM_booleanAnd_1T/64 [using 1 threads] 2.80µs ± 0% 0.11µs ± 0% -95.93% BM_booleanAnd_1T/128 [using 1 threads] 11.2µs ± 0% 0.4µs ± 0% -96.11% BM_booleanAnd_1T/256 [using 1 threads] 44.6µs ± 0% 2.5µs ± 0% -94.31% BM_booleanAnd_1T/512 [using 1 threads] 178µs ± 0% 10µs ± 0% -94.35% BM_booleanAnd_1T/1k [using 1 threads] 717µs ± 0% 78µs ± 1% -89.07% BM_booleanAnd_1T/2k [using 1 threads] 2.87ms ± 0% 0.31ms ± 1% -89.08% BM_booleanAnd_1T/4k [using 1 threads] 11.7ms ± 0% 1.9ms ± 4% -83.55% BM_booleanAnd_1T/10k [using 1 threads] 70.3ms ± 0% 17.2ms ± 4% -75.48%
Diffstat (limited to 'test/packetmath.cpp')
-rw-r--r--test/packetmath.cpp44
1 files changed, 29 insertions, 15 deletions
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index 5d38ce6b4..761273b86 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -70,6 +70,23 @@ void test_cast() {
test_cast_helper<FromScalar, FromPacket, ToScalar, ToPacket, CanCast>::run();
}
+template<typename Scalar,typename Packet> void packetmath_boolean()
+{
+ const int PacketSize = internal::unpacket_traits<Packet>::size;
+ const int size = 2*PacketSize;
+ EIGEN_ALIGN_MAX Scalar data1[size];
+ EIGEN_ALIGN_MAX Scalar data2[size];
+ EIGEN_ALIGN_MAX Scalar ref[size];
+
+ for (int i=0; i<size; ++i)
+ {
+ data1[i] = internal::random<Scalar>();
+ }
+ CHECK_CWISE2_IF(true, internal::por, internal::por);
+ CHECK_CWISE2_IF(true, internal::pxor, internal::pxor);
+ CHECK_CWISE2_IF(true, internal::pand, internal::pand);
+}
+
template<typename Scalar,typename Packet> void packetmath()
{
typedef internal::packet_traits<Scalar> PacketTraits;
@@ -338,21 +355,6 @@ template<typename Scalar,typename Packet> void packetmath()
}
{
- for (int i=0; i<PacketSize; ++i)
- {
- data1[i] = internal::random<Scalar>();
- unsigned char v = internal::random<bool>() ? 0xff : 0;
- char* bytes = (char*)(data1+PacketSize+i);
- for(int k=0; k<int(sizeof(Scalar)); ++k) {
- bytes[k] = v;
- }
- }
- CHECK_CWISE2_IF(true, internal::por, internal::por);
- CHECK_CWISE2_IF(true, internal::pxor, internal::pxor);
- CHECK_CWISE2_IF(true, internal::pand, internal::pand);
- CHECK_CWISE2_IF(true, internal::pandnot, internal::pandnot);
- }
- {
for (int i = 0; i < PacketSize; ++i) {
// "if" mask
unsigned char v = internal::random<bool>() ? 0xff : 0;
@@ -377,8 +379,17 @@ template<typename Scalar,typename Packet> void packetmath()
}
CHECK_CWISE1_IF(PacketTraits::HasSqrt, numext::sqrt, internal::psqrt);
+
+ for (int i=0; i<size; ++i)
+ {
+ data1[i] = internal::random<Scalar>();
+ }
+ CHECK_CWISE2_IF(true, internal::pandnot, internal::pandnot);
+
+ packetmath_boolean<Scalar, Packet>();
}
+
template<typename Scalar,typename Packet> void packetmath_real()
{
typedef internal::packet_traits<Scalar> PacketTraits;
@@ -807,6 +818,9 @@ EIGEN_DECLARE_TEST(packetmath)
CALL_SUBTEST_11( test::runner<std::complex<float> >::run() );
CALL_SUBTEST_12( test::runner<std::complex<double> >::run() );
CALL_SUBTEST_13(( packetmath<half,internal::packet_traits<half>::type>() ));
+#ifdef EIGEN_PACKET_MATH_SSE_H
+ CALL_SUBTEST_14(( packetmath_boolean<bool,internal::packet_traits<bool>::type>() ));
+#endif
g_first_pass = false;
}
}