diff options
author | Mike Klein <mtklein@chromium.org> | 2018-03-10 11:34:53 -0500 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2018-03-14 02:38:52 +0000 |
commit | d8853ec0fd01ecd45c225aee1f22b3d342dcacd7 (patch) | |
tree | 671a3ec019016f1f42565b7f99ab3ecf9e140a60 | |
parent | ba321b601782b4dd7cd060506dcc222ccdaac408 (diff) |
exact divide by 255 with NEON
Change-Id: Ib121eb0d5af1f22f48f517fe909112a77d92032e
Reviewed-on: https://skia-review.googlesource.com/113666
Commit-Queue: Mike Klein <mtklein@chromium.org>
Reviewed-by: Herb Derby <herb@google.com>
-rw-r--r-- | src/opts/SkRasterPipeline_opts.h | 5 | ||||
-rw-r--r-- | tests/MathTest.cpp | 20 |
2 files changed, 25 insertions, 0 deletions
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h index 212a5d16aa..73624d1b02 100644 --- a/src/opts/SkRasterPipeline_opts.h +++ b/src/opts/SkRasterPipeline_opts.h @@ -2487,6 +2487,11 @@ static void start_pipeline(const size_t x0, const size_t y0, SI U16 div255(U16 v) { #if 0 return (v+127)/255; // The ideal rounding divide by 255. +#elif 1 && defined(__ARM_NEON) + // With NEON we can compute (v+127)/255 as (v + ((v+128)>>8) + 128)>>8 + // just as fast as we can do the approximation below, so might as well be correct! + // First we compute v + ((v+128)>>8), then one more round of (...+128)>>8 to finish up. + return vrshrq_n_u16(vrsraq_n_u16(v, v, 8), 8); #else return (v+255)/256; // A good approximation of (v+127)/255. #endif diff --git a/tests/MathTest.cpp b/tests/MathTest.cpp index 94b15207f7..565f76f819 100644 --- a/tests/MathTest.cpp +++ b/tests/MathTest.cpp @@ -750,3 +750,23 @@ DEF_TEST(DoubleSaturate32, reporter) { REPORTER_ASSERT(reporter, r.fExpectedInt == i); } } + +#if defined(__ARM_NEON) + #include <arm_neon.h> + + DEF_TEST(NeonU16Div255, r) { + + for (int v = 0; v <= 255*255; v++) { + int want = (v + 127)/255; + + uint16x8_t V = vdupq_n_u16(v); + int got = vrshrq_n_u16(vrsraq_n_u16(V, V, 8), 8)[0]; + + if (got != want) { + SkDebugf("%d -> %d, want %d\n", v, got, want); + } + REPORTER_ASSERT(r, got == want); + } + } + +#endif |