aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--src/opts/SkRasterPipeline_opts.h5
-rw-r--r--tests/MathTest.cpp20
2 files changed, 25 insertions, 0 deletions
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h
index 212a5d16aa..73624d1b02 100644
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@@ -2487,6 +2487,11 @@ static void start_pipeline(const size_t x0, const size_t y0,
SI U16 div255(U16 v) {
#if 0
return (v+127)/255; // The ideal rounding divide by 255.
+#elif 1 && defined(__ARM_NEON)
+ // With NEON we can compute (v+127)/255 as (v + ((v+128)>>8) + 128)>>8
+ // just as fast as we can do the approximation below, so might as well be correct!
+ // First we compute v + ((v+128)>>8), then one more round of (...+128)>>8 to finish up.
+ return vrshrq_n_u16(vrsraq_n_u16(v, v, 8), 8);
#else
return (v+255)/256; // A good approximation of (v+127)/255.
#endif
diff --git a/tests/MathTest.cpp b/tests/MathTest.cpp
index 94b15207f7..565f76f819 100644
--- a/tests/MathTest.cpp
+++ b/tests/MathTest.cpp
@@ -750,3 +750,23 @@ DEF_TEST(DoubleSaturate32, reporter) {
REPORTER_ASSERT(reporter, r.fExpectedInt == i);
}
}
+
+#if defined(__ARM_NEON)
+ #include <arm_neon.h>
+
+ DEF_TEST(NeonU16Div255, r) {
+
+ for (int v = 0; v <= 255*255; v++) {
+ int want = (v + 127)/255;
+
+ uint16x8_t V = vdupq_n_u16(v);
+ int got = vrshrq_n_u16(vrsraq_n_u16(V, V, 8), 8)[0];
+
+ if (got != want) {
+ SkDebugf("%d -> %d, want %d\n", v, got, want);
+ }
+ REPORTER_ASSERT(r, got == want);
+ }
+ }
+
+#endif