From 68ff92f78addb3c862cc28bcee2b97ff4679c290 Mon Sep 17 00:00:00 2001 From: Mike Klein Date: Mon, 26 Mar 2018 13:04:14 -0400 Subject: specialize arm64 allTrue()/anyTrue() aarch64 added vector-wise add/mul/min/max instructions. We can use min and max to implement allTrue() and anyTrue(), respectively. (This CL is mostly so I don't forget these intrinsics exist.) In assembly, these actually compile to two instructions, the folding operation into a vector register, then a move from the vector register to a general purpose register. Change-Id: Ia6a999ac250740de765e871094e911979a8711c7 Reviewed-on: https://skia-review.googlesource.com/116482 Reviewed-by: Chris Dalton Commit-Queue: Mike Klein --- src/opts/SkNx_neon.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'src') diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h index f8170ef962..52247c8f66 100644 --- a/src/opts/SkNx_neon.h +++ b/src/opts/SkNx_neon.h @@ -112,12 +112,20 @@ public: } AI bool allTrue() const { + #if defined(__aarch64__) + return 0 != vminv_u32(vreinterpret_u32_f32(fVec)); + #else auto v = vreinterpret_u32_f32(fVec); return vget_lane_u32(v,0) && vget_lane_u32(v,1); + #endif } AI bool anyTrue() const { + #if defined(__aarch64__) + return 0 != vmaxv_u32(vreinterpret_u32_f32(fVec)); + #else auto v = vreinterpret_u32_f32(fVec); return vget_lane_u32(v,0) || vget_lane_u32(v,1); + #endif } AI SkNx thenElse(const SkNx& t, const SkNx& e) const { @@ -229,14 +237,22 @@ public: } AI bool allTrue() const { + #if defined(__aarch64__) + return 0 != vminvq_u32(vreinterpretq_u32_f32(fVec)); + #else auto v = vreinterpretq_u32_f32(fVec); return vgetq_lane_u32(v,0) && vgetq_lane_u32(v,1) && vgetq_lane_u32(v,2) && vgetq_lane_u32(v,3); + #endif } AI bool anyTrue() const { + #if defined(__aarch64__) + return 0 != vmaxvq_u32(vreinterpretq_u32_f32(fVec)); + #else auto v = vreinterpretq_u32_f32(fVec); return vgetq_lane_u32(v,0) || vgetq_lane_u32(v,1) || vgetq_lane_u32(v,2) || vgetq_lane_u32(v,3); + #endif } AI SkNx thenElse(const SkNx& t, const SkNx& e) const { -- cgit v1.2.3