diff options
author | 2017-06-27 16:58:00 -0400 | |
---|---|---|
committer | 2017-06-27 21:20:53 +0000 | |
commit | 279091ef85eec98969b72805bbf613f1c0660380 (patch) | |
tree | 968ef9fa3de43a9ba1ee519609b6e07933939d80 /src/jumper/SkJumper_stages.cpp | |
parent | 7a86987815be3af78fe3801d4f347ebb0c63141a (diff) |
specialize loaders for dst registers, to avoid move/swap stages
Bug: skia:
Change-Id: I75d82ef2226c5f116b7de2208c4e914739414b6d
Reviewed-on: https://skia-review.googlesource.com/20984
Commit-Queue: Mike Reed <reed@google.com>
Reviewed-by: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src/jumper/SkJumper_stages.cpp')
-rw-r--r-- | src/jumper/SkJumper_stages.cpp | 75 |
1 files changed, 67 insertions, 8 deletions
diff --git a/src/jumper/SkJumper_stages.cpp b/src/jumper/SkJumper_stages.cpp index 9ca5a08776..e3c76fc31b 100644 --- a/src/jumper/SkJumper_stages.cpp +++ b/src/jumper/SkJumper_stages.cpp @@ -501,6 +501,13 @@ STAGE(clamp_a) { b = min(b, a); } +STAGE(clamp_a_dst) { + da = min(da, 1.0f); + dr = min(dr, da); + dg = min(dg, da); + db = min(db, da); +} + STAGE(set_rgb) { auto rgb = (const float*)ctx; r = rgb[0]; @@ -513,6 +520,12 @@ STAGE(swap_rb) { b = tmp; } +STAGE(swap_rb_dst) { + auto tmp = dr; + dr = db; + db = tmp; +} + STAGE(swap) { auto swap = [](F& v, F& dv) { auto tmp = v; @@ -549,15 +562,21 @@ STAGE(unpremul) { b *= scale; } +SI F from_srgb(F s) { + auto lo = s * (1/12.92f); + auto hi = mad(s*s, mad(s, 0.3000f, 0.6975f), 0.0025f); + return if_then_else(s < 0.055f, lo, hi); +} + STAGE(from_srgb) { - auto fn = [&](F s) { - auto lo = s * (1/12.92f); - auto hi = mad(s*s, mad(s, 0.3000f, 0.6975f), 0.0025f); - return if_then_else(s < 0.055f, lo, hi); - }; - r = fn(r); - g = fn(g); - b = fn(b); + r = from_srgb(r); + g = from_srgb(g); + b = from_srgb(b); +} +STAGE(from_srgb_dst) { + dr = from_srgb(dr); + dg = from_srgb(dg); + db = from_srgb(db); } STAGE(to_srgb) { auto fn = [&](F l) { @@ -780,6 +799,12 @@ STAGE(load_a8) { r = g = b = 0.0f; a = from_byte(load<U8>(ptr, tail)); } +STAGE(load_a8_dst) { + auto ptr = *(const uint8_t**)ctx + x; + + dr = dg = db = 0.0f; + da = from_byte(load<U8>(ptr, tail)); +} STAGE(gather_a8) { const uint8_t* ptr; U32 ix = ix_and_ptr(&ptr, ctx, r,g); @@ -799,6 +824,12 @@ STAGE(load_g8) { r = g = b = from_byte(load<U8>(ptr, tail)); a = 1.0f; } +STAGE(load_g8_dst) { + auto ptr = *(const uint8_t**)ctx + x; + + dr = dg = db = from_byte(load<U8>(ptr, tail)); + da = 1.0f; +} STAGE(gather_g8) { const uint8_t* ptr; U32 ix = ix_and_ptr(&ptr, ctx, r,g); @@ -820,6 +851,12 @@ STAGE(load_565) { from_565(load<U16>(ptr, tail), &r,&g,&b); a = 1.0f; } +STAGE(load_565_dst) { + auto ptr = *(const uint16_t**)ctx + x; + + from_565(load<U16>(ptr, tail), &dr,&dg,&db); + da = 1.0f; +} STAGE(gather_565) { const uint16_t* ptr; U32 ix = ix_and_ptr(&ptr, ctx, r,g); @@ -839,6 +876,10 @@ STAGE(load_4444) { auto ptr = *(const uint16_t**)ctx + x; from_4444(load<U16>(ptr, tail), &r,&g,&b,&a); } +STAGE(load_4444_dst) { + auto ptr = *(const uint16_t**)ctx + x; + from_4444(load<U16>(ptr, tail), &dr,&dg,&db,&da); +} STAGE(gather_4444) { const uint16_t* ptr; U32 ix = ix_and_ptr(&ptr, ctx, r,g); @@ -857,6 +898,10 @@ STAGE(load_8888) { auto ptr = *(const uint32_t**)ctx + x; from_8888(load<U32>(ptr, tail), &r,&g,&b,&a); } +STAGE(load_8888_dst) { + auto ptr = *(const uint32_t**)ctx + x; + from_8888(load<U32>(ptr, tail), &dr,&dg,&db,&da); +} STAGE(gather_8888) { const uint32_t* ptr; U32 ix = ix_and_ptr(&ptr, ctx, r,g); @@ -882,6 +927,16 @@ STAGE(load_f16) { b = from_half(B); a = from_half(A); } +STAGE(load_f16_dst) { + auto ptr = *(const uint64_t**)ctx + x; + + U16 R,G,B,A; + load4((const uint16_t*)ptr,tail, &R,&G,&B,&A); + dr = from_half(R); + dg = from_half(G); + db = from_half(B); + da = from_half(A); +} STAGE(gather_f16) { const uint64_t* ptr; U32 ix = ix_and_ptr(&ptr, ctx, r,g); @@ -939,6 +994,10 @@ STAGE(load_f32) { auto ptr = *(const float**)ctx + 4*x; load4(ptr,tail, &r,&g,&b,&a); } +STAGE(load_f32_dst) { + auto ptr = *(const float**)ctx + 4*x; + load4(ptr,tail, &dr,&dg,&db,&da); +} STAGE(store_f32) { auto ptr = *(float**)ctx + 4*x; store4(ptr,tail, r,g,b,a); |