aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/functors
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-04-20 20:16:28 +0000
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-04-20 20:16:28 +0000
commit2f6ddaa25c605b3fdfb991ebd6c4e945c81f1067 (patch)
treea7e0a3067141f79156da0dae03f9a3c9055d40e2 /Eigen/src/Core/functors
parent00f6340153860ffb3e4776a3f42aa851b596a094 (diff)
Add partial vectorization for matrices and tensors of bool. This speeds up boolean operations on Tensors by up to 25x.
Benchmark numbers for the logical and of two NxN tensors: name old time/op new time/op delta BM_booleanAnd_1T/3 [using 1 threads] 14.6ns ± 0% 14.4ns ± 0% -0.96% BM_booleanAnd_1T/4 [using 1 threads] 20.5ns ±12% 9.0ns ± 0% -56.07% BM_booleanAnd_1T/7 [using 1 threads] 41.7ns ± 0% 10.5ns ± 0% -74.87% BM_booleanAnd_1T/8 [using 1 threads] 52.1ns ± 0% 10.1ns ± 0% -80.59% BM_booleanAnd_1T/10 [using 1 threads] 76.3ns ± 0% 13.8ns ± 0% -81.87% BM_booleanAnd_1T/15 [using 1 threads] 167ns ± 0% 16ns ± 0% -90.45% BM_booleanAnd_1T/16 [using 1 threads] 188ns ± 0% 16ns ± 0% -91.57% BM_booleanAnd_1T/31 [using 1 threads] 667ns ± 0% 34ns ± 0% -94.83% BM_booleanAnd_1T/32 [using 1 threads] 710ns ± 0% 35ns ± 0% -95.01% BM_booleanAnd_1T/64 [using 1 threads] 2.80µs ± 0% 0.11µs ± 0% -95.93% BM_booleanAnd_1T/128 [using 1 threads] 11.2µs ± 0% 0.4µs ± 0% -96.11% BM_booleanAnd_1T/256 [using 1 threads] 44.6µs ± 0% 2.5µs ± 0% -94.31% BM_booleanAnd_1T/512 [using 1 threads] 178µs ± 0% 10µs ± 0% -94.35% BM_booleanAnd_1T/1k [using 1 threads] 717µs ± 0% 78µs ± 1% -89.07% BM_booleanAnd_1T/2k [using 1 threads] 2.87ms ± 0% 0.31ms ± 1% -89.08% BM_booleanAnd_1T/4k [using 1 threads] 11.7ms ± 0% 1.9ms ± 4% -83.55% BM_booleanAnd_1T/10k [using 1 threads] 70.3ms ± 0% 17.2ms ± 4% -75.48%
Diffstat (limited to 'Eigen/src/Core/functors')
-rw-r--r--Eigen/src/Core/functors/BinaryFunctors.h15
1 files changed, 12 insertions, 3 deletions
diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h
index 0ea40bab0..a2bc58c76 100644
--- a/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/Eigen/src/Core/functors/BinaryFunctors.h
@@ -382,11 +382,14 @@ struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
struct scalar_boolean_and_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::pand(a,b); }
};
template<> struct functor_traits<scalar_boolean_and_op> {
enum {
Cost = NumTraits<bool>::AddCost,
- PacketAccess = false
+ PacketAccess = true
};
};
@@ -398,11 +401,14 @@ template<> struct functor_traits<scalar_boolean_and_op> {
struct scalar_boolean_or_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::por(a,b); }
};
template<> struct functor_traits<scalar_boolean_or_op> {
enum {
Cost = NumTraits<bool>::AddCost,
- PacketAccess = false
+ PacketAccess = true
};
};
@@ -414,11 +420,14 @@ template<> struct functor_traits<scalar_boolean_or_op> {
struct scalar_boolean_xor_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_xor_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a ^ b; }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::pxor(a,b); }
};
template<> struct functor_traits<scalar_boolean_xor_op> {
enum {
Cost = NumTraits<bool>::AddCost,
- PacketAccess = false
+ PacketAccess = true
};
};