diff options
-rw-r--r-- | bench/SkRasterPipelineBench.cpp | 19 | ||||
-rw-r--r-- | src/core/SkRasterPipeline.cpp | 5 | ||||
-rw-r--r-- | src/core/SkRasterPipeline.h | 7 | ||||
-rw-r--r-- | src/core/SkRasterPipelineBlitter.cpp | 46 | ||||
-rw-r--r-- | src/core/SkSRGB.h | 30 | ||||
-rw-r--r-- | src/opts/SkRasterPipeline_opts.h | 52 |
6 files changed, 79 insertions, 80 deletions
diff --git a/bench/SkRasterPipelineBench.cpp b/bench/SkRasterPipelineBench.cpp index 15576c41d1..84c0930466 100644 --- a/bench/SkRasterPipelineBench.cpp +++ b/bench/SkRasterPipelineBench.cpp @@ -37,13 +37,22 @@ public: void* dst_ctx = dst; SkRasterPipeline p; - p.append(SkRasterPipeline::load_s_srgb, &src_ctx); + p.append(SkRasterPipeline::load_s_8888, &src_ctx); + p.append(SkRasterPipeline::from_srgb_s); p.append(SkRasterPipeline::scale_u8, &mask_ctx); - p.append(kF16 ? SkRasterPipeline::load_d_f16 - : SkRasterPipeline::load_d_srgb, &dst_ctx); + if (kF16) { + p.append(SkRasterPipeline::load_d_f16, &dst_ctx); + } else { + p.append(SkRasterPipeline::load_d_8888, &dst_ctx); + p.append(SkRasterPipeline::from_srgb_d); + } p.append(SkRasterPipeline::srcover); - p.append(kF16 ? SkRasterPipeline::store_f16 - : SkRasterPipeline::store_srgb, &dst_ctx); + if (kF16) { + p.append(SkRasterPipeline::store_f16, &dst_ctx); + } else { + p.append(SkRasterPipeline::to_srgb); + p.append(SkRasterPipeline::store_8888, &dst_ctx); + } auto compiled = p.compile(); while (loops --> 0) { diff --git a/src/core/SkRasterPipeline.cpp b/src/core/SkRasterPipeline.cpp index 04d187d9e6..e52a2429b0 100644 --- a/src/core/SkRasterPipeline.cpp +++ b/src/core/SkRasterPipeline.cpp @@ -11,6 +11,11 @@ SkRasterPipeline::SkRasterPipeline() {} void SkRasterPipeline::append(StockStage stage, void* ctx) { +#ifdef SK_DEBUG + if (fNum == (int)SK_ARRAY_COUNT(fStages)) { + this->dump(); + } +#endif SkASSERT(fNum < (int)SK_ARRAY_COUNT(fStages)); fStages[fNum++] = { stage, ctx }; } diff --git a/src/core/SkRasterPipeline.h b/src/core/SkRasterPipeline.h index 0b49af59d1..b66d5bf52b 100644 --- a/src/core/SkRasterPipeline.h +++ b/src/core/SkRasterPipeline.h @@ -56,15 +56,14 @@ #define SK_RASTER_PIPELINE_STAGES(M) \ M(trace) M(registers) \ - M(move_src_dst) M(move_dst_src) M(swap_rb) \ + M(move_src_dst) M(move_dst_src) M(swap_rb) M(swap_drdb) \ M(clamp_0) M(clamp_a) M(clamp_1) \ M(unpremul) M(premul) \ M(from_srgb_s) M(from_srgb_d) M(to_srgb) \ M(constant_color) M(store_f32) \ M(load_s_565) M(load_d_565) M(store_565) \ - M(load_s_srgb) M(load_d_srgb) M(store_srgb) \ M(load_s_f16) M(load_d_f16) M(store_f16) \ - M(load_s_8888) M(store_8888) \ + M(load_s_8888) M(load_d_8888) M(store_8888) \ M(scale_u8) M(scale_constant_float) \ M(lerp_u8) M(lerp_565) M(lerp_constant_float) \ M(dstatop) M(dstin) M(dstout) M(dstover) \ @@ -88,7 +87,7 @@ class SkRasterPipeline { public: // No pipeline may be more than kMaxStages long. - static const int kMaxStages = 32; + static const int kMaxStages = 48; SkRasterPipeline(); diff --git a/src/core/SkRasterPipelineBlitter.cpp b/src/core/SkRasterPipelineBlitter.cpp index ca6313eccd..480a23c6b3 100644 --- a/src/core/SkRasterPipelineBlitter.cpp +++ b/src/core/SkRasterPipelineBlitter.cpp @@ -159,36 +159,34 @@ void SkRasterPipelineBlitter::append_load_d(SkRasterPipeline* p) const { SkASSERT(supported(fDst.info())); switch (fDst.info().colorType()) { - case kN32_SkColorType: - if (fDst.info().gammaCloseToSRGB()) { - p->append(SkRasterPipeline::load_d_srgb, &fDstPtr); - } - break; - case kRGBA_F16_SkColorType: - p->append(SkRasterPipeline::load_d_f16, &fDstPtr); - break; - case kRGB_565_SkColorType: - p->append(SkRasterPipeline::load_d_565, &fDstPtr); - break; + case kRGB_565_SkColorType: p->append(SkRasterPipeline::load_d_565, &fDstPtr); break; + case kBGRA_8888_SkColorType: + case kRGBA_8888_SkColorType: p->append(SkRasterPipeline::load_d_8888, &fDstPtr); break; + case kRGBA_F16_SkColorType: p->append(SkRasterPipeline::load_d_f16, &fDstPtr); break; default: break; } + if (fDst.info().colorType() == kBGRA_8888_SkColorType) { + p->append(SkRasterPipeline::swap_drdb); + } + if (fDst.info().gammaCloseToSRGB()) { + p->append(SkRasterPipeline::from_srgb_d); + } } void SkRasterPipelineBlitter::append_store(SkRasterPipeline* p) const { SkASSERT(supported(fDst.info())); + if (fDst.info().gammaCloseToSRGB()) { + p->append(SkRasterPipeline::to_srgb); + } + if (fDst.info().colorType() == kBGRA_8888_SkColorType) { + p->append(SkRasterPipeline::swap_rb); + } switch (fDst.info().colorType()) { - case kN32_SkColorType: - if (fDst.info().gammaCloseToSRGB()) { - p->append(SkRasterPipeline::store_srgb, &fDstPtr); - } - break; - case kRGBA_F16_SkColorType: - p->append(SkRasterPipeline::store_f16, &fDstPtr); - break; - case kRGB_565_SkColorType: - p->append(SkRasterPipeline::store_565, &fDstPtr); - break; + case kRGB_565_SkColorType: p->append(SkRasterPipeline::store_565, &fDstPtr); break; + case kBGRA_8888_SkColorType: + case kRGBA_8888_SkColorType: p->append(SkRasterPipeline::store_8888, &fDstPtr); break; + case kRGBA_F16_SkColorType: p->append(SkRasterPipeline::store_f16, &fDstPtr); break; default: break; } } @@ -198,7 +196,9 @@ void SkRasterPipelineBlitter::append_blend(SkRasterPipeline* p) const { } void SkRasterPipelineBlitter::maybe_clamp(SkRasterPipeline* p) const { - if (SkBlendMode_CanOverflow(fBlend)) { p->append(SkRasterPipeline::clamp_a); } + if (SkBlendMode_CanOverflow(fBlend)) { + p->append(SkRasterPipeline::clamp_a); + } } void SkRasterPipelineBlitter::blitH(int x, int y, int w) { diff --git a/src/core/SkSRGB.h b/src/core/SkSRGB.h index d1b22133c6..c182f329de 100644 --- a/src/core/SkSRGB.h +++ b/src/core/SkSRGB.h @@ -29,8 +29,7 @@ static inline V sk_clamp_0_255(const V& x) { return V::Min(V::Max(x, 0.0f), 255.0f); } -// This should probably only be called from sk_linear_to_srgb(). -// It generally doesn't make sense to work with sRGB floats. +// [0.0f, 1.0f] -> [0.0f, 255.xf], for small x. Correct after truncation. template <typename V> static inline V sk_linear_to_srgb_needs_trunc(const V& x) { // Approximation of the sRGB gamma curve (within 1 when scaled to 8-bit pixels). @@ -51,20 +50,37 @@ static inline V sk_linear_to_srgb_needs_trunc(const V& x) { return (x < 0.0048f).thenElse(lo, hi); } +// [0.0f, 1.0f] -> [0.0f, 1.xf], for small x. Correct after rounding. +template <typename V> +static inline V sk_linear_to_srgb_needs_round(const V& x) { + // Tuned to round trip each sRGB byte after rounding. + auto rsqrt = x.rsqrt(), + sqrt = rsqrt.invert(), + ftrt = rsqrt.rsqrt(); + + auto lo = 12.46f * x; + + auto hi = SkNx_fma(V{+0.411192f}, ftrt, + SkNx_fma(V{+0.689206f}, sqrt, + V{-0.0988f})); + return (x < 0.0043f).thenElse(lo, hi); +} + template <int N> static inline SkNx<N,int> sk_linear_to_srgb(const SkNx<N,float>& x) { auto f = sk_linear_to_srgb_needs_trunc(x); return SkNx_cast<int>(sk_clamp_0_255(f)); } + // sRGB -> linear, using math instead of table lookups. -template <int N> -static inline SkNx<N,float> sk_linear_from_srgb_math(const SkNx<N,float>& x) { +template <typename V> +static inline V sk_linear_from_srgb_math(const V& x) { // Non-linear segment of sRGB curve approximated by // l = 0.0025 + 0.6975x^2 + 0.3x^3 - const SkNx<N,float> k0 = 0.0025f, - k2 = 0.6975f, - k3 = 0.3000f; + const V k0 = 0.0025f, + k2 = 0.6975f, + k3 = 0.3000f; auto hi = SkNx_fma(x*x, SkNx_fma(x, k3, k2), k0); // Linear segment of sRGB curve: the normal slope, extended a little further than normal. diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h index 8a261de027..589f388682 100644 --- a/src/opts/SkRasterPipeline_opts.h +++ b/src/opts/SkRasterPipeline_opts.h @@ -288,9 +288,8 @@ STAGE(move_dst_src, true) { a = da; } -STAGE(swap_rb, true) { - SkTSwap(r, b); -} +STAGE(swap_rb, true) { SkTSwap( r, b); } +STAGE(swap_drdb, true) { SkTSwap(dr, db); } STAGE(from_srgb_s, true) { r = sk_linear_from_srgb_math(r); @@ -303,7 +302,9 @@ STAGE(from_srgb_d, true) { db = sk_linear_from_srgb_math(db); } STAGE(to_srgb, true) { - // TODO + r = sk_linear_to_srgb_needs_round(r); + g = sk_linear_to_srgb_needs_round(g); + b = sk_linear_to_srgb_needs_round(b); } // The default shader produces a constant color (from the SkPaint). @@ -478,42 +479,14 @@ STAGE(store_f32, false) { } -// Load 8-bit SkPMColor-order sRGB. -STAGE(load_d_srgb, true) { - auto ptr = *(const uint32_t**)ctx + x; - - auto px = load<kIsTail>(tail, ptr); - auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); }; - dr = sk_linear_from_srgb_math(to_int((px >> SK_R32_SHIFT) & 0xff)); - dg = sk_linear_from_srgb_math(to_int((px >> SK_G32_SHIFT) & 0xff)); - db = sk_linear_from_srgb_math(to_int((px >> SK_B32_SHIFT) & 0xff)); - da = (1/255.0f)*SkNx_cast<float>(to_int( px >> SK_A32_SHIFT )); -} - -STAGE(load_s_srgb, true) { - auto ptr = *(const uint32_t**)ctx + x; - - auto px = load<kIsTail>(tail, ptr); - auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); }; - r = sk_linear_from_srgb_math(to_int((px >> SK_R32_SHIFT) & 0xff)); - g = sk_linear_from_srgb_math(to_int((px >> SK_G32_SHIFT) & 0xff)); - b = sk_linear_from_srgb_math(to_int((px >> SK_B32_SHIFT) & 0xff)); - a = (1/255.0f)*SkNx_cast<float>(to_int( px >> SK_A32_SHIFT )); -} - -STAGE(store_srgb, false) { - auto ptr = *(uint32_t**)ctx + x; - store<kIsTail>(tail, ( sk_linear_to_srgb(r) << SK_R32_SHIFT - | sk_linear_to_srgb(g) << SK_G32_SHIFT - | sk_linear_to_srgb(b) << SK_B32_SHIFT - | SkNx_cast<int>(0.5f + 255.0f * a) << SK_A32_SHIFT), (int*)ptr); -} - STAGE(load_s_8888, true) { auto ptr = *(const uint32_t**)ctx + x; from_8888(load<kIsTail>(tail, ptr), &r, &g, &b, &a); } - +STAGE(load_d_8888, true) { + auto ptr = *(const uint32_t**)ctx + x; + from_8888(load<kIsTail>(tail, ptr), &dr, &dg, &db, &da); +} STAGE(store_8888, false) { auto ptr = *(uint32_t**)ctx + x; store<kIsTail>(tail, ( SkNx_cast<int>(255.0f * r + 0.5f) << 0 @@ -523,9 +496,6 @@ STAGE(store_8888, false) { } RGBA_XFERMODE(clear) { return 0.0f; } -//RGBA_XFERMODE(src) { return s; } // This would be a no-op stage, so we just omit it. -RGBA_XFERMODE(dst) { return d; } - RGBA_XFERMODE(srcatop) { return s*da + d*inv(sa); } RGBA_XFERMODE(srcin) { return s * da; } RGBA_XFERMODE(srcout) { return s * inv(da); } @@ -927,8 +897,8 @@ namespace SK_OPTS_NS { return Memset16{(uint16_t**)dst, SkPackRGB16(src.r() * SK_R16_MASK + 0.5f, src.g() * SK_G16_MASK + 0.5f, src.b() * SK_B16_MASK + 0.5f)}; - case SkRasterPipeline::store_srgb: - return Memset32{(uint32_t**)dst, Sk4f_toS32(src.to4f_pmorder())}; + case SkRasterPipeline::store_8888: + return Memset32{(uint32_t**)dst, Sk4f_toL32(src.to4f())}; case SkRasterPipeline::store_f16: return Memset64{(uint64_t**)dst, src.toF16()}; |