diff options
author | Mike Klein <mtklein@chromium.org> | 2018-04-16 12:56:24 -0400 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2018-04-16 23:42:00 +0000 |
commit | f946b41643e049f3b81f40316373ad22e1518818 (patch) | |
tree | dcdf90635953d443c8c5c0585f179c85f2cb457e /src/opts | |
parent | 1d4af54a7b61badf2a7e6c3730b2ef01b9cd1fa2 (diff) |
low-hanging bilerp_clamp_8888 wins
- support sampling bgra too
- lowp impl
Bug: skia:7810
Change-Id: I21db805483f612024802f2b508c140c42a029c54
Reviewed-on: https://skia-review.googlesource.com/121582
Commit-Queue: Mike Klein <mtklein@google.com>
Reviewed-by: Florin Malita <fmalita@chromium.org>
Diffstat (limited to 'src/opts')
-rw-r--r-- | src/opts/SkRasterPipeline_opts.h | 59 |
1 files changed, 56 insertions, 3 deletions
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h index 5a2c5b26a0..8332e4c12e 100644 --- a/src/opts/SkRasterPipeline_opts.h +++ b/src/opts/SkRasterPipeline_opts.h @@ -2264,7 +2264,7 @@ STAGE(gauss_a_to_rgba, Ctx::None) { } // A specialized fused image shader for clamp-x, clamp-y, non-sRGB sampling. -STAGE(bilerp_clamp_8888, SkJumper_GatherCtx* ctx) { +STAGE(bilerp_clamp_8888, const SkJumper_GatherCtx* ctx) { // (cx,cy) are the center of our sample. F cx = r, cy = g; @@ -2600,6 +2600,7 @@ SI F floor_(F x) { return roundtrip - if_then_else(roundtrip > x, F(1), F(0)); #endif } +SI F fract(F x) { return x - floor_(x); } SI F abs_(F x) { return bit_cast<F>( bit_cast<I32>(x) & 0x7fffffff ); } // ~~~~~~ Basic / misc. stages ~~~~~~ // @@ -3320,6 +3321,59 @@ STAGE_PP(srcover_bgra_8888, const SkJumper_MemoryCtx* ctx) { store_8888_(ptr, tail, b,g,r,a); } +#if defined(SK_DISABLE_LOWP_BILERP_CLAMP_CLAMP_STAGE) + static void(*bilerp_clamp_8888)(void) = nullptr; +#else +STAGE_GP(bilerp_clamp_8888, const SkJumper_GatherCtx* ctx) { + // (cx,cy) are the center of our sample. + F cx = x, + cy = y; + + // All sample points are at the same fractional offset (fx,fy). + // They're the 4 corners of a logical 1x1 pixel surrounding (x,y) at (0.5,0.5) offsets. + F fx = fract(cx + 0.5f), + fy = fract(cy + 0.5f); + + // We'll accumulate the color of all four samples into {r,g,b,a} directly. + r = g = b = a = 0; + + for (float dy = -0.5f; dy <= +0.5f; dy += 1.0f) + for (float dx = -0.5f; dx <= +0.5f; dx += 1.0f) { + // (x,y) are the coordinates of this sample point. + F x = cx + dx, + y = cy + dy; + + // ix_and_ptr() will clamp to the image's bounds for us. + const uint32_t* ptr; + U32 ix = ix_and_ptr(&ptr, ctx, x,y); + + U16 sr,sg,sb,sa; + from_8888(gather<U32>(ptr, ix), &sr,&sg,&sb,&sa); + + // In bilinear interpolation, the 4 pixels at +/- 0.5 offsets from the sample pixel center + // are combined in direct proportion to their area overlapping that logical query pixel. + // At positive offsets, the x-axis contribution to that rectangle is fx, + // or (1-fx) at negative x. Same deal for y. + F sx = (dx > 0) ? fx : 1.0f - fx, + sy = (dy > 0) ? fy : 1.0f - fy; + + // The sum of the four sx*sy products will add up to 1.0, so we can keep up to + // 8 bits of fractional precision here and not worry about overflowing 16-bit. + U16 area = cast<U16>(sx * sy * 256); + + r += sr * area; + g += sg * area; + b += sb * area; + a += sa * area; + } + + r /= 256; + g /= 256; + b /= 256; + a /= 256; +} +#endif + // Now we'll add null stand-ins for stages we haven't implemented in lowp. // If a pipeline uses these stages, it'll boot it out of lowp into highp. @@ -3360,8 +3414,7 @@ static NotImplemented alter_2pt_conical_unswap, mask_2pt_conical_nan, mask_2pt_conical_degenerates, - apply_vector_mask, - bilerp_clamp_8888; + apply_vector_mask; #endif//defined(JUMPER_IS_SCALAR) controlling whether we build lowp stages } // namespace lowp |