diff options
author | Rasmus Munk Larsen <rmlarsen@google.com> | 2020-04-20 20:16:28 +0000 |
---|---|---|
committer | Rasmus Munk Larsen <rmlarsen@google.com> | 2020-04-20 20:16:28 +0000 |
commit | 2f6ddaa25c605b3fdfb991ebd6c4e945c81f1067 (patch) | |
tree | a7e0a3067141f79156da0dae03f9a3c9055d40e2 /Eigen/src/Core/functors | |
parent | 00f6340153860ffb3e4776a3f42aa851b596a094 (diff) |
Add partial vectorization for matrices and tensors of bool. This speeds up boolean operations on Tensors by up to 25x.
Benchmark numbers for the logical and of two NxN tensors:
name old time/op new time/op delta
BM_booleanAnd_1T/3 [using 1 threads] 14.6ns ± 0% 14.4ns ± 0% -0.96%
BM_booleanAnd_1T/4 [using 1 threads] 20.5ns ±12% 9.0ns ± 0% -56.07%
BM_booleanAnd_1T/7 [using 1 threads] 41.7ns ± 0% 10.5ns ± 0% -74.87%
BM_booleanAnd_1T/8 [using 1 threads] 52.1ns ± 0% 10.1ns ± 0% -80.59%
BM_booleanAnd_1T/10 [using 1 threads] 76.3ns ± 0% 13.8ns ± 0% -81.87%
BM_booleanAnd_1T/15 [using 1 threads] 167ns ± 0% 16ns ± 0% -90.45%
BM_booleanAnd_1T/16 [using 1 threads] 188ns ± 0% 16ns ± 0% -91.57%
BM_booleanAnd_1T/31 [using 1 threads] 667ns ± 0% 34ns ± 0% -94.83%
BM_booleanAnd_1T/32 [using 1 threads] 710ns ± 0% 35ns ± 0% -95.01%
BM_booleanAnd_1T/64 [using 1 threads] 2.80µs ± 0% 0.11µs ± 0% -95.93%
BM_booleanAnd_1T/128 [using 1 threads] 11.2µs ± 0% 0.4µs ± 0% -96.11%
BM_booleanAnd_1T/256 [using 1 threads] 44.6µs ± 0% 2.5µs ± 0% -94.31%
BM_booleanAnd_1T/512 [using 1 threads] 178µs ± 0% 10µs ± 0% -94.35%
BM_booleanAnd_1T/1k [using 1 threads] 717µs ± 0% 78µs ± 1% -89.07%
BM_booleanAnd_1T/2k [using 1 threads] 2.87ms ± 0% 0.31ms ± 1% -89.08%
BM_booleanAnd_1T/4k [using 1 threads] 11.7ms ± 0% 1.9ms ± 4% -83.55%
BM_booleanAnd_1T/10k [using 1 threads] 70.3ms ± 0% 17.2ms ± 4% -75.48%
Diffstat (limited to 'Eigen/src/Core/functors')
-rw-r--r-- | Eigen/src/Core/functors/BinaryFunctors.h | 15 |
1 files changed, 12 insertions, 3 deletions
diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index 0ea40bab0..a2bc58c76 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -382,11 +382,14 @@ struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > { struct scalar_boolean_and_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; } + template<typename Packet> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + { return internal::pand(a,b); } }; template<> struct functor_traits<scalar_boolean_and_op> { enum { Cost = NumTraits<bool>::AddCost, - PacketAccess = false + PacketAccess = true }; }; @@ -398,11 +401,14 @@ template<> struct functor_traits<scalar_boolean_and_op> { struct scalar_boolean_or_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; } + template<typename Packet> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + { return internal::por(a,b); } }; template<> struct functor_traits<scalar_boolean_or_op> { enum { Cost = NumTraits<bool>::AddCost, - PacketAccess = false + PacketAccess = true }; }; @@ -414,11 +420,14 @@ template<> struct functor_traits<scalar_boolean_or_op> { struct scalar_boolean_xor_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_xor_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a ^ b; } + template<typename Packet> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + { return internal::pxor(a,b); } }; template<> struct functor_traits<scalar_boolean_xor_op> { enum { Cost = NumTraits<bool>::AddCost, - PacketAccess = false + PacketAccess = true }; }; |