From f0348c2413c5c72820a42749879d41c6dd4ab16c Mon Sep 17 00:00:00 2001 From: Mike Klein Date: Thu, 3 Nov 2016 14:43:48 -0400 Subject: Implement SkNx_fma() for Sk4f on ARMv8. I was looking at the disassembly of matrix_4x5() and noticed it didn't have any FMAs. This makes things that call SkNx_fma() actually use the FMA instruction. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=4400 CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Change-Id: Ia353a77b0ca14385a43b564997b05586f9472996 Reviewed-on: https://skia-review.googlesource.com/4400 Reviewed-by: Matt Sarett Commit-Queue: Mike Klein --- src/opts/SkNx_neon.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/opts') diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h index b5d89891d1..c85d583ea2 100644 --- a/src/opts/SkNx_neon.h +++ b/src/opts/SkNx_neon.h @@ -218,6 +218,12 @@ public: float32x4_t fVec; }; +#if defined(SK_CPU_ARM64) + AI static Sk4f SkNx_fma(const Sk4f& f, const Sk4f& m, const Sk4f& a) { + return vfmaq_f32(a.fVec, f.fVec, m.fVec); + } +#endif + // It's possible that for our current use cases, representing this as // half a uint16x8_t might be better than representing it as a uint16x4_t. // It'd make conversion to Sk4b one step simpler. -- cgit v1.2.3