From f946b41643e049f3b81f40316373ad22e1518818 Mon Sep 17 00:00:00 2001 From: Mike Klein Date: Mon, 16 Apr 2018 12:56:24 -0400 Subject: low-hanging bilerp_clamp_8888 wins - support sampling bgra too - lowp impl Bug: skia:7810 Change-Id: I21db805483f612024802f2b508c140c42a029c54 Reviewed-on: https://skia-review.googlesource.com/121582 Commit-Queue: Mike Klein Reviewed-by: Florin Malita --- src/opts/SkRasterPipeline_opts.h | 59 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 3 deletions(-) (limited to 'src/opts') diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h index 5a2c5b26a0..8332e4c12e 100644 --- a/src/opts/SkRasterPipeline_opts.h +++ b/src/opts/SkRasterPipeline_opts.h @@ -2264,7 +2264,7 @@ STAGE(gauss_a_to_rgba, Ctx::None) { } // A specialized fused image shader for clamp-x, clamp-y, non-sRGB sampling. -STAGE(bilerp_clamp_8888, SkJumper_GatherCtx* ctx) { +STAGE(bilerp_clamp_8888, const SkJumper_GatherCtx* ctx) { // (cx,cy) are the center of our sample. F cx = r, cy = g; @@ -2600,6 +2600,7 @@ SI F floor_(F x) { return roundtrip - if_then_else(roundtrip > x, F(1), F(0)); #endif } +SI F fract(F x) { return x - floor_(x); } SI F abs_(F x) { return bit_cast( bit_cast(x) & 0x7fffffff ); } // ~~~~~~ Basic / misc. stages ~~~~~~ // @@ -3320,6 +3321,59 @@ STAGE_PP(srcover_bgra_8888, const SkJumper_MemoryCtx* ctx) { store_8888_(ptr, tail, b,g,r,a); } +#if defined(SK_DISABLE_LOWP_BILERP_CLAMP_CLAMP_STAGE) + static void(*bilerp_clamp_8888)(void) = nullptr; +#else +STAGE_GP(bilerp_clamp_8888, const SkJumper_GatherCtx* ctx) { + // (cx,cy) are the center of our sample. + F cx = x, + cy = y; + + // All sample points are at the same fractional offset (fx,fy). + // They're the 4 corners of a logical 1x1 pixel surrounding (x,y) at (0.5,0.5) offsets. + F fx = fract(cx + 0.5f), + fy = fract(cy + 0.5f); + + // We'll accumulate the color of all four samples into {r,g,b,a} directly. + r = g = b = a = 0; + + for (float dy = -0.5f; dy <= +0.5f; dy += 1.0f) + for (float dx = -0.5f; dx <= +0.5f; dx += 1.0f) { + // (x,y) are the coordinates of this sample point. + F x = cx + dx, + y = cy + dy; + + // ix_and_ptr() will clamp to the image's bounds for us. + const uint32_t* ptr; + U32 ix = ix_and_ptr(&ptr, ctx, x,y); + + U16 sr,sg,sb,sa; + from_8888(gather(ptr, ix), &sr,&sg,&sb,&sa); + + // In bilinear interpolation, the 4 pixels at +/- 0.5 offsets from the sample pixel center + // are combined in direct proportion to their area overlapping that logical query pixel. + // At positive offsets, the x-axis contribution to that rectangle is fx, + // or (1-fx) at negative x. Same deal for y. + F sx = (dx > 0) ? fx : 1.0f - fx, + sy = (dy > 0) ? fy : 1.0f - fy; + + // The sum of the four sx*sy products will add up to 1.0, so we can keep up to + // 8 bits of fractional precision here and not worry about overflowing 16-bit. + U16 area = cast(sx * sy * 256); + + r += sr * area; + g += sg * area; + b += sb * area; + a += sa * area; + } + + r /= 256; + g /= 256; + b /= 256; + a /= 256; +} +#endif + // Now we'll add null stand-ins for stages we haven't implemented in lowp. // If a pipeline uses these stages, it'll boot it out of lowp into highp. @@ -3360,8 +3414,7 @@ static NotImplemented alter_2pt_conical_unswap, mask_2pt_conical_nan, mask_2pt_conical_degenerates, - apply_vector_mask, - bilerp_clamp_8888; + apply_vector_mask; #endif//defined(JUMPER_IS_SCALAR) controlling whether we build lowp stages } // namespace lowp -- cgit v1.2.3