aboutsummaryrefslogtreecommitdiffhomepage
path: root/test
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-05-11 13:23:31 -0700
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-05-14 22:39:13 +0000
commit9b411757abd8458f9689b1384c6bf75da9b82357 (patch)
treeeb23c0c65eaa4df204880c94b287fcf1b43e14cd /test
parentd640276d31a7dea9207a68a061a6fa7c9fdf50e5 (diff)
Add missing packet ops for bool, and make it pass the same packet op unit tests as other arithmetic types.
This change also contains a few minor cleanups: 1. Remove packet op pnot, which is not needed for anything other than pcmp_le_or_nan, which can be done in other ways. 2. Remove the "HasInsert" enum, which is no longer needed since we removed the corresponding packet ops. 3. Add faster pselect op for Packet4i when SSE4.1 is supported. Among other things, this makes the fast transposeInPlace() method available for Matrix<bool>. Run on ************** (72 X 2994 MHz CPUs); 2020-05-09T10:51:02.372347913-07:00 CPU: Intel Skylake Xeon with HyperThreading (36 cores) dL1:32KB dL2:1024KB dL3:24MB Benchmark Time(ns) CPU(ns) Iterations ----------------------------------------------------------------------- BM_TransposeInPlace<float>/4 9.77 9.77 71670320 BM_TransposeInPlace<float>/8 21.9 21.9 31929525 BM_TransposeInPlace<float>/16 66.6 66.6 10000000 BM_TransposeInPlace<float>/32 243 243 2879561 BM_TransposeInPlace<float>/59 844 844 829767 BM_TransposeInPlace<float>/64 933 933 750567 BM_TransposeInPlace<float>/128 3944 3945 177405 BM_TransposeInPlace<float>/256 16853 16853 41457 BM_TransposeInPlace<float>/512 204952 204968 3448 BM_TransposeInPlace<float>/1k 1053889 1053861 664 BM_TransposeInPlace<bool>/4 14.4 14.4 48637301 BM_TransposeInPlace<bool>/8 36.0 36.0 19370222 BM_TransposeInPlace<bool>/16 31.5 31.5 22178902 BM_TransposeInPlace<bool>/32 111 111 6272048 BM_TransposeInPlace<bool>/59 626 626 1000000 BM_TransposeInPlace<bool>/64 428 428 1632689 BM_TransposeInPlace<bool>/128 1677 1677 417377 BM_TransposeInPlace<bool>/256 7126 7126 96264 BM_TransposeInPlace<bool>/512 29021 29024 24165 BM_TransposeInPlace<bool>/1k 116321 116330 6068
Diffstat (limited to 'test')
-rw-r--r--test/packetmath.cpp66
1 files changed, 42 insertions, 24 deletions
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index c7732e6e6..e59e9df21 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -10,11 +10,25 @@
#include "packetmath_test_shared.h"
-#define REF_ADD(a,b) ((a)+(b))
-#define REF_SUB(a,b) ((a)-(b))
-#define REF_MUL(a,b) ((a)*(b))
-#define REF_DIV(a,b) ((a)/(b))
-#define REF_ABS_DIFF(a,b) ((a)>(b)?(a)-(b):(b)-(a))
+template <typename T>
+inline T REF_ADD(const T& a, const T& b) { return a + b;}
+template <typename T>
+inline T REF_SUB(const T& a, const T& b) { return a - b;}
+template <typename T>
+inline T REF_MUL(const T& a, const T& b) { return a * b;}
+template <typename T>
+inline T REF_DIV(const T& a, const T& b) { return a / b;}
+template <typename T>
+inline T REF_ABS_DIFF(const T& a, const T& b) { return a>b ? a - b : b-a;}
+
+// Specializations for bool
+template <>
+inline bool REF_ADD(const bool& a, const bool& b) { return a || b;}
+template <>
+inline bool REF_SUB(const bool& a, const bool& b) { return a ^ b;}
+template <>
+inline bool REF_MUL(const bool& a, const bool& b) { return a && b;}
+
template<typename FromScalar, typename FromPacket, typename ToScalar, typename ToPacket, bool CanCast = false>
struct test_cast_helper;
@@ -70,7 +84,8 @@ void test_cast() {
test_cast_helper<FromScalar, FromPacket, ToScalar, ToPacket, CanCast>::run();
}
-template<typename Scalar,typename Packet> void packetmath_boolean()
+template<typename Scalar,typename Packet>
+void packetmath_boolean_mask_ops()
{
const int PacketSize = internal::unpacket_traits<Packet>::size;
const int size = 2*PacketSize;
@@ -82,9 +97,18 @@ template<typename Scalar,typename Packet> void packetmath_boolean()
{
data1[i] = internal::random<Scalar>();
}
- CHECK_CWISE2_IF(true, internal::por, internal::por);
- CHECK_CWISE2_IF(true, internal::pxor, internal::pxor);
- CHECK_CWISE2_IF(true, internal::pand, internal::pand);
+ CHECK_CWISE1(internal::ptrue, internal::ptrue);
+ CHECK_CWISE2_IF(true, internal::pandnot, internal::pandnot);
+ for (int i = 0; i < PacketSize; ++i) {
+ data1[i] = Scalar(i);
+ data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
+ }
+ CHECK_CWISE2_IF(true, internal::pcmp_eq, internal::pcmp_eq);
+}
+
+template<>
+void packetmath_boolean_mask_ops<bool, internal::Packet16b>()
+{
}
template<typename Scalar,typename Packet> void packetmath()
@@ -171,9 +195,6 @@ template<typename Scalar,typename Packet> void packetmath()
CHECK_CWISE2_IF(PacketTraits::HasMul, REF_MUL, internal::pmul);
CHECK_CWISE2_IF(PacketTraits::HasDiv, REF_DIV, internal::pdiv);
- CHECK_CWISE1(internal::pnot, internal::pnot);
- CHECK_CWISE1(internal::pzero, internal::pzero);
- CHECK_CWISE1(internal::ptrue, internal::ptrue);
if (PacketTraits::HasNegate)
CHECK_CWISE1(internal::negate, internal::pnegate);
CHECK_CWISE1(numext::conj, internal::pconj);
@@ -252,7 +273,7 @@ template<typename Scalar,typename Packet> void packetmath()
ref[0] = Scalar(1);
for (int i=0; i<PacketSize; ++i)
- ref[0] *= data1[i];
+ ref[0] = REF_MUL(ref[0], data1[i]);
VERIFY(internal::isApprox(ref[0], internal::predux_mul(internal::pload<Packet>(data1))) && "internal::predux_mul");
for (int i=0; i<PacketSize; ++i)
@@ -272,6 +293,7 @@ template<typename Scalar,typename Packet> void packetmath()
}
}
+
if (PacketTraits::HasBlend) {
Packet thenPacket = internal::pload<Packet>(data1);
Packet elsePacket = internal::pload<Packet>(data2);
@@ -304,26 +326,22 @@ template<typename Scalar,typename Packet> void packetmath()
CHECK_CWISE3_IF(true, internal::pselect, internal::pselect);
}
- {
- for (int i = 0; i < PacketSize; ++i) {
- data1[i] = Scalar(i);
- data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
- }
- CHECK_CWISE2_IF(true, internal::pcmp_eq, internal::pcmp_eq);
- }
-
CHECK_CWISE1_IF(PacketTraits::HasSqrt, numext::sqrt, internal::psqrt);
for (int i=0; i<size; ++i)
{
data1[i] = internal::random<Scalar>();
}
- CHECK_CWISE2_IF(true, internal::pandnot, internal::pandnot);
+ CHECK_CWISE1(internal::pzero, internal::pzero);
+ CHECK_CWISE2_IF(true, internal::por, internal::por);
+ CHECK_CWISE2_IF(true, internal::pxor, internal::pxor);
+ CHECK_CWISE2_IF(true, internal::pand, internal::pand);
- packetmath_boolean<Scalar, Packet>();
+ packetmath_boolean_mask_ops<Scalar, Packet>();
}
+
template<typename Scalar,typename Packet> void packetmath_real()
{
typedef internal::packet_traits<Scalar> PacketTraits;
@@ -753,7 +771,7 @@ EIGEN_DECLARE_TEST(packetmath)
CALL_SUBTEST_12( test::runner<std::complex<double> >::run() );
CALL_SUBTEST_13(( packetmath<half,internal::packet_traits<half>::type>() ));
#ifdef EIGEN_PACKET_MATH_SSE_H
- CALL_SUBTEST_14(( packetmath_boolean<bool,internal::packet_traits<bool>::type>() ));
+ CALL_SUBTEST_14(( packetmath<bool,internal::packet_traits<bool>::type>() ));
#endif
g_first_pass = false;
}