low-hanging bilerp_clamp_8888 wins

- support sampling bgra too - lowp impl Bug: skia:7810 Change-Id: I21db805483f612024802f2b508c140c42a029c54 Reviewed-on: https://skia-review.googlesource.com/121582 Commit-Queue: Mike Klein <mtklein@google.com> Reviewed-by: Florin Malita <fmalita@chromium.org>
author: Mike Klein <mtklein@chromium.org> 2018-04-16 12:56:24 -0400
committer: Skia Commit-Bot <skia-commit-bot@chromium.org> 2018-04-16 23:42:00 +0000
commit: f946b41643e049f3b81f40316373ad22e1518818 (patch)
tree: dcdf90635953d443c8c5c0585f179c85f2cb457e /src/opts
parent: 1d4af54a7b61badf2a7e6c3730b2ef01b9cd1fa2 (diff)
1 files changed, 56 insertions, 3 deletions
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h
index 5a2c5b26a0..8332e4c12e 100644
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@@ -2264,7 +2264,7 @@ STAGE(gauss_a_to_rgba, Ctx::None) {
 }
 
 // A specialized fused image shader for clamp-x, clamp-y, non-sRGB sampling.
-STAGE(bilerp_clamp_8888, SkJumper_GatherCtx* ctx) {
+STAGE(bilerp_clamp_8888, const SkJumper_GatherCtx* ctx) {
     // (cx,cy) are the center of our sample.
     F cx = r,
       cy = g;
@@ -2600,6 +2600,7 @@ SI F floor_(F x) {
     return roundtrip - if_then_else(roundtrip > x, F(1), F(0));
 #endif
 }
+SI F fract(F x) { return x - floor_(x); }
 SI F abs_(F x) { return bit_cast<F>( bit_cast<I32>(x) & 0x7fffffff ); }
 
 // ~~~~~~ Basic / misc. stages ~~~~~~ //
@@ -3320,6 +3321,59 @@ STAGE_PP(srcover_bgra_8888, const SkJumper_MemoryCtx* ctx) {
     store_8888_(ptr, tail, b,g,r,a);
 }
 
+#if defined(SK_DISABLE_LOWP_BILERP_CLAMP_CLAMP_STAGE)
+    static void(*bilerp_clamp_8888)(void) = nullptr;
+#else
+STAGE_GP(bilerp_clamp_8888, const SkJumper_GatherCtx* ctx) {
+    // (cx,cy) are the center of our sample.
+    F cx = x,
+      cy = y;
+
+    // All sample points are at the same fractional offset (fx,fy).
+    // They're the 4 corners of a logical 1x1 pixel surrounding (x,y) at (0.5,0.5) offsets.
+    F fx = fract(cx + 0.5f),
+      fy = fract(cy + 0.5f);
+
+    // We'll accumulate the color of all four samples into {r,g,b,a} directly.
+    r = g = b = a = 0;
+
+    for (float dy = -0.5f; dy <= +0.5f; dy += 1.0f)
+    for (float dx = -0.5f; dx <= +0.5f; dx += 1.0f) {
+        // (x,y) are the coordinates of this sample point.
+        F x = cx + dx,
+          y = cy + dy;
+
+        // ix_and_ptr() will clamp to the image's bounds for us.
+        const uint32_t* ptr;
+        U32 ix = ix_and_ptr(&ptr, ctx, x,y);
+
+        U16 sr,sg,sb,sa;
+        from_8888(gather<U32>(ptr, ix), &sr,&sg,&sb,&sa);
+
+        // In bilinear interpolation, the 4 pixels at +/- 0.5 offsets from the sample pixel center
+        // are combined in direct proportion to their area overlapping that logical query pixel.
+        // At positive offsets, the x-axis contribution to that rectangle is fx,
+        // or (1-fx) at negative x.  Same deal for y.
+        F sx = (dx > 0) ? fx : 1.0f - fx,
+          sy = (dy > 0) ? fy : 1.0f - fy;
+
+        // The sum of the four sx*sy products will add up to 1.0, so we can keep up to
+        // 8 bits of fractional precision here and not worry about overflowing 16-bit.
+        U16 area = cast<U16>(sx * sy * 256);
+
+        r += sr * area;
+        g += sg * area;
+        b += sb * area;
+        a += sa * area;
+    }
+
+    r /= 256;
+    g /= 256;
+    b /= 256;
+    a /= 256;
+}
+#endif
+
 // Now we'll add null stand-ins for stages we haven't implemented in lowp.
 // If a pipeline uses these stages, it'll boot it out of lowp into highp.
 
@@ -3360,8 +3414,7 @@ static NotImplemented
         alter_2pt_conical_unswap,
         mask_2pt_conical_nan,
         mask_2pt_conical_degenerates,
-        apply_vector_mask,
-        bilerp_clamp_8888;
+        apply_vector_mask;
 
 #endif//defined(JUMPER_IS_SCALAR) controlling whether we build lowp stages
 }  // namespace lowp
author	Mike Klein <mtklein@chromium.org>	2018-04-16 12:56:24 -0400
committer	Skia Commit-Bot <skia-commit-bot@chromium.org>	2018-04-16 23:42:00 +0000
commit	f946b41643e049f3b81f40316373ad22e1518818 (patch)
tree	dcdf90635953d443c8c5c0585f179c85f2cb457e /src/opts
parent	1d4af54a7b61badf2a7e6c3730b2ef01b9cd1fa2 (diff)