diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/jumper/SkJumper_stages.cpp | 132 | ||||
-rw-r--r-- | src/jumper/SkJumper_stages_lowp.cpp | 66 |
2 files changed, 99 insertions, 99 deletions
diff --git a/src/jumper/SkJumper_stages.cpp b/src/jumper/SkJumper_stages.cpp index d192728419..1f9848e1fa 100644 --- a/src/jumper/SkJumper_stages.cpp +++ b/src/jumper/SkJumper_stages.cpp @@ -47,40 +47,40 @@ static const size_t N = sizeof(F) / sizeof(float); // General-purpose registers are also tight, so we put most of those on the stack too. // On ARMv7, we do the same so that we can make the r,g,b,a vectors wider. struct Params { - size_t x, y, tail; + size_t dx, dy, tail; F dr,dg,db,da; }; using Stage = void(Params*, void** program, F r, F g, F b, F a); #else // We keep program the second argument, so that it's passed in rsi for load_and_inc(). - using Stage = void(size_t tail, void** program, size_t x, size_t y, F,F,F,F, F,F,F,F); + using Stage = void(size_t tail, void** program, size_t dx, size_t dy, F,F,F,F, F,F,F,F); #endif MAYBE_MSABI -extern "C" void WRAP(start_pipeline)(size_t x, size_t y, size_t xlimit, size_t ylimit, +extern "C" void WRAP(start_pipeline)(size_t dx, size_t dy, size_t xlimit, size_t ylimit, void** program) { auto start = (Stage*)load_and_inc(program); - const size_t x0 = x; - for (; y < ylimit; y++) { + const size_t x0 = dx; + for (; dy < ylimit; dy++) { #if defined(__i386__) || defined(_M_IX86) || defined(__arm__) - Params params = { x0,y,0, 0,0,0,0 }; - while (params.x + N <= xlimit) { + Params params = { x0,dy,0, 0,0,0,0 }; + while (params.dx + N <= xlimit) { start(¶ms,program, 0,0,0,0); - params.x += N; + params.dx += N; } - if (size_t tail = xlimit - params.x) { + if (size_t tail = xlimit - params.dx) { params.tail = tail; start(¶ms,program, 0,0,0,0); } #else - x = x0; - while (x + N <= xlimit) { - start(0,program,x,y, 0,0,0,0, 0,0,0,0); - x += N; + dx = x0; + while (dx + N <= xlimit) { + start(0,program,dx,dy, 0,0,0,0, 0,0,0,0); + dx += N; } - if (size_t tail = xlimit - x) { - start(tail,program,x,y, 0,0,0,0, 0,0,0,0); + if (size_t tail = xlimit - dx) { + start(tail,program,dx,dy, 0,0,0,0, 0,0,0,0); } #endif } @@ -88,28 +88,28 @@ extern "C" void WRAP(start_pipeline)(size_t x, size_t y, size_t xlimit, size_t y #if defined(__i386__) || defined(_M_IX86) || defined(__arm__) #define STAGE(name, ...) \ - SI void name##_k(__VA_ARGS__, size_t x, size_t y, size_t tail, \ + SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \ F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da); \ extern "C" void WRAP(name)(Params* params, void** program, \ F r, F g, F b, F a) { \ - name##_k(Ctx{program},params->x,params->y,params->tail, r,g,b,a, \ + name##_k(Ctx{program},params->dx,params->dy,params->tail, r,g,b,a, \ params->dr, params->dg, params->db, params->da); \ auto next = (Stage*)load_and_inc(program); \ next(params,program, r,g,b,a); \ } \ - SI void name##_k(__VA_ARGS__, size_t x, size_t y, size_t tail, \ + SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \ F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da) #else #define STAGE(name, ...) \ - SI void name##_k(__VA_ARGS__, size_t x, size_t y, size_t tail, \ + SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \ F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da); \ - extern "C" void WRAP(name)(size_t tail, void** program, size_t x, size_t y, \ + extern "C" void WRAP(name)(size_t tail, void** program, size_t dx, size_t dy, \ F r, F g, F b, F a, F dr, F dg, F db, F da) { \ - name##_k(Ctx{program},x,y,tail, r,g,b,a, dr,dg,db,da); \ + name##_k(Ctx{program},dx,dy,tail, r,g,b,a, dr,dg,db,da); \ auto next = (Stage*)load_and_inc(program); \ - next(tail,program,x,y, r,g,b,a, dr,dg,db,da); \ + next(tail,program,dx,dy, r,g,b,a, dr,dg,db,da); \ } \ - SI void name##_k(__VA_ARGS__, size_t x, size_t y, size_t tail, \ + SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \ F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da) #endif @@ -192,10 +192,10 @@ SI void from_8888(U32 _8888, F* r, F* g, F* b, F* a) { *a = cast((_8888 >> 24) ) * (1/255.0f); } -// Used by load_ and store_ stages to get to the right (x,y) starting point of contiguous memory. +// Used by load_ and store_ stages to get to the right (dx,dy) starting point of contiguous memory. template <typename T> -SI T* ptr_at_xy(const SkJumper_MemoryCtx* ctx, int x, int y) { - return (T*)ctx->pixels + y*ctx->stride + x; +SI T* ptr_at_xy(const SkJumper_MemoryCtx* ctx, int dx, int dy) { + return (T*)ctx->pixels + dy*ctx->stride + dx; } // Used by gather_ stages to calculate the base pointer and a vector of indices to load. @@ -215,22 +215,22 @@ SI U32 ix_and_ptr(T** ptr, const SkJumper_GatherCtx* ctx, F x, F y) { // Now finally, normal Stages! STAGE(seed_shader, Ctx::None) { - // It's important for speed to explicitly cast(x) and cast(y), + // It's important for speed to explicitly cast(dx) and cast(dy), // which has the effect of splatting them to vectors before converting to floats. // On Intel this breaks a data dependency on previous loop iterations' registers. float iota[] = { 0.5f,1.5f,2.5f,3.5f,4.5f,5.5f,6.5f,7.5f }; - r = cast(x) + unaligned_load<F>(iota); - g = cast(y) + 0.5f; + r = cast(dx) + unaligned_load<F>(iota); + g = cast(dy) + 0.5f; b = 1.0f; a = 0; dr = dg = db = da = 0; } STAGE(dither, const float* rate) { - // Get [(x,y), (x+1,y), (x+2,y), ...] loaded up in integer vectors. + // Get [(dx,dy), (dx+1,dy), (dx+2,dy), ...] loaded up in integer vectors. uint32_t iota[] = {0,1,2,3,4,5,6,7}; - U32 X = x + unaligned_load<U32>(iota), - Y = y; + U32 X = dx + unaligned_load<U32>(iota), + Y = dy; // We're doing 8x8 ordered dithering, see https://en.wikipedia.org/wiki/Ordered_dithering. // In this case n=8 and we're using the matrix that looks like 1/64 x [ 0 48 12 60 ... ]. @@ -480,7 +480,7 @@ STAGE(luminosity, Ctx::None) { } STAGE(srcover_rgba_8888, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<uint32_t>(ctx, x,y); + auto ptr = ptr_at_xy<uint32_t>(ctx, dx,dy); U32 dst = load<U32>(ptr, tail); dr = cast((dst ) & 0xff); @@ -679,7 +679,7 @@ STAGE(scale_1_float, const float* c) { a = a * *c; } STAGE(scale_u8, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<const uint8_t>(ctx, x,y); + auto ptr = ptr_at_xy<const uint8_t>(ctx, dx,dy); auto scales = load<U8>(ptr, tail); auto c = from_byte(scales); @@ -690,7 +690,7 @@ STAGE(scale_u8, const SkJumper_MemoryCtx* ctx) { a = a * c; } STAGE(scale_565, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<const uint16_t>(ctx, x,y); + auto ptr = ptr_at_xy<const uint16_t>(ctx, dx,dy); F cr,cg,cb; from_565(load<U16>(ptr, tail), &cr, &cg, &cb); @@ -714,7 +714,7 @@ STAGE(lerp_1_float, const float* c) { a = lerp(da, a, *c); } STAGE(lerp_u8, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<const uint8_t>(ctx, x,y); + auto ptr = ptr_at_xy<const uint8_t>(ctx, dx,dy); auto scales = load<U8>(ptr, tail); auto c = from_byte(scales); @@ -725,7 +725,7 @@ STAGE(lerp_u8, const SkJumper_MemoryCtx* ctx) { a = lerp(da, a, c); } STAGE(lerp_565, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<const uint16_t>(ctx, x,y); + auto ptr = ptr_at_xy<const uint16_t>(ctx, dx,dy); F cr,cg,cb; from_565(load<U16>(ptr, tail), &cr, &cg, &cb); @@ -739,14 +739,14 @@ STAGE(lerp_565, const SkJumper_MemoryCtx* ctx) { } STAGE(load_tables, const SkJumper_LoadTablesCtx* c) { - auto px = load<U32>((const uint32_t*)c->src + x, tail); + auto px = load<U32>((const uint32_t*)c->src + dx, tail); r = gather(c->r, (px ) & 0xff); g = gather(c->g, (px >> 8) & 0xff); b = gather(c->b, (px >> 16) & 0xff); a = cast( (px >> 24)) * (1/255.0f); } STAGE(load_tables_u16_be, const SkJumper_LoadTablesCtx* c) { - auto ptr = (const uint16_t*)c->src + 4*x; + auto ptr = (const uint16_t*)c->src + 4*dx; U16 R,G,B,A; load4(ptr, tail, &R,&G,&B,&A); @@ -758,7 +758,7 @@ STAGE(load_tables_u16_be, const SkJumper_LoadTablesCtx* c) { a = (1/65535.0f) * cast(expand(bswap(A))); } STAGE(load_tables_rgb_u16_be, const SkJumper_LoadTablesCtx* c) { - auto ptr = (const uint16_t*)c->src + 3*x; + auto ptr = (const uint16_t*)c->src + 3*dx; U16 R,G,B; load3(ptr, tail, &R,&G,&B); @@ -834,13 +834,13 @@ STAGE(lab_to_xyz, Ctx::None) { } STAGE(load_a8, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<const uint8_t>(ctx, x,y); + auto ptr = ptr_at_xy<const uint8_t>(ctx, dx,dy); r = g = b = 0.0f; a = from_byte(load<U8>(ptr, tail)); } STAGE(load_a8_dst, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<const uint8_t>(ctx, x,y); + auto ptr = ptr_at_xy<const uint8_t>(ctx, dx,dy); dr = dg = db = 0.0f; da = from_byte(load<U8>(ptr, tail)); @@ -852,20 +852,20 @@ STAGE(gather_a8, const SkJumper_GatherCtx* ctx) { a = from_byte(gather(ptr, ix)); } STAGE(store_a8, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<uint8_t>(ctx, x,y); + auto ptr = ptr_at_xy<uint8_t>(ctx, dx,dy); U8 packed = pack(pack(round(a, 255.0f))); store(ptr, packed, tail); } STAGE(load_g8, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<const uint8_t>(ctx, x,y); + auto ptr = ptr_at_xy<const uint8_t>(ctx, dx,dy); r = g = b = from_byte(load<U8>(ptr, tail)); a = 1.0f; } STAGE(load_g8_dst, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<const uint8_t>(ctx, x,y); + auto ptr = ptr_at_xy<const uint8_t>(ctx, dx,dy); dr = dg = db = from_byte(load<U8>(ptr, tail)); da = 1.0f; @@ -878,13 +878,13 @@ STAGE(gather_g8, const SkJumper_GatherCtx* ctx) { } STAGE(load_565, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<const uint16_t>(ctx, x,y); + auto ptr = ptr_at_xy<const uint16_t>(ctx, dx,dy); from_565(load<U16>(ptr, tail), &r,&g,&b); a = 1.0f; } STAGE(load_565_dst, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<const uint16_t>(ctx, x,y); + auto ptr = ptr_at_xy<const uint16_t>(ctx, dx,dy); from_565(load<U16>(ptr, tail), &dr,&dg,&db); da = 1.0f; @@ -896,7 +896,7 @@ STAGE(gather_565, const SkJumper_GatherCtx* ctx) { a = 1.0f; } STAGE(store_565, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<uint16_t>(ctx, x,y); + auto ptr = ptr_at_xy<uint16_t>(ctx, dx,dy); U16 px = pack( round(r, 31.0f) << 11 | round(g, 63.0f) << 5 @@ -905,11 +905,11 @@ STAGE(store_565, const SkJumper_MemoryCtx* ctx) { } STAGE(load_4444, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<const uint16_t>(ctx, x,y); + auto ptr = ptr_at_xy<const uint16_t>(ctx, dx,dy); from_4444(load<U16>(ptr, tail), &r,&g,&b,&a); } STAGE(load_4444_dst, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<const uint16_t>(ctx, x,y); + auto ptr = ptr_at_xy<const uint16_t>(ctx, dx,dy); from_4444(load<U16>(ptr, tail), &dr,&dg,&db,&da); } STAGE(gather_4444, const SkJumper_GatherCtx* ctx) { @@ -918,7 +918,7 @@ STAGE(gather_4444, const SkJumper_GatherCtx* ctx) { from_4444(gather(ptr, ix), &r,&g,&b,&a); } STAGE(store_4444, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<uint16_t>(ctx, x,y); + auto ptr = ptr_at_xy<uint16_t>(ctx, dx,dy); U16 px = pack( round(r, 15.0f) << 12 | round(g, 15.0f) << 8 | round(b, 15.0f) << 4 @@ -927,11 +927,11 @@ STAGE(store_4444, const SkJumper_MemoryCtx* ctx) { } STAGE(load_8888, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<const uint32_t>(ctx, x,y); + auto ptr = ptr_at_xy<const uint32_t>(ctx, dx,dy); from_8888(load<U32>(ptr, tail), &r,&g,&b,&a); } STAGE(load_8888_dst, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<const uint32_t>(ctx, x,y); + auto ptr = ptr_at_xy<const uint32_t>(ctx, dx,dy); from_8888(load<U32>(ptr, tail), &dr,&dg,&db,&da); } STAGE(gather_8888, const SkJumper_GatherCtx* ctx) { @@ -940,7 +940,7 @@ STAGE(gather_8888, const SkJumper_GatherCtx* ctx) { from_8888(gather(ptr, ix), &r,&g,&b,&a); } STAGE(store_8888, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<uint32_t>(ctx, x,y); + auto ptr = ptr_at_xy<uint32_t>(ctx, dx,dy); U32 px = round(r, 255.0f) | round(g, 255.0f) << 8 @@ -950,11 +950,11 @@ STAGE(store_8888, const SkJumper_MemoryCtx* ctx) { } STAGE(load_bgra, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<const uint32_t>(ctx, x,y); + auto ptr = ptr_at_xy<const uint32_t>(ctx, dx,dy); from_8888(load<U32>(ptr, tail), &b,&g,&r,&a); } STAGE(load_bgra_dst, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<const uint32_t>(ctx, x,y); + auto ptr = ptr_at_xy<const uint32_t>(ctx, dx,dy); from_8888(load<U32>(ptr, tail), &db,&dg,&dr,&da); } STAGE(gather_bgra, const SkJumper_GatherCtx* ctx) { @@ -963,7 +963,7 @@ STAGE(gather_bgra, const SkJumper_GatherCtx* ctx) { from_8888(gather(ptr, ix), &b,&g,&r,&a); } STAGE(store_bgra, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<uint32_t>(ctx, x,y); + auto ptr = ptr_at_xy<uint32_t>(ctx, dx,dy); U32 px = round(b, 255.0f) | round(g, 255.0f) << 8 @@ -973,7 +973,7 @@ STAGE(store_bgra, const SkJumper_MemoryCtx* ctx) { } STAGE(load_f16, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<const uint64_t>(ctx, x,y); + auto ptr = ptr_at_xy<const uint64_t>(ctx, dx,dy); U16 R,G,B,A; load4((const uint16_t*)ptr,tail, &R,&G,&B,&A); @@ -983,7 +983,7 @@ STAGE(load_f16, const SkJumper_MemoryCtx* ctx) { a = from_half(A); } STAGE(load_f16_dst, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<const uint64_t>(ctx, x,y); + auto ptr = ptr_at_xy<const uint64_t>(ctx, dx,dy); U16 R,G,B,A; load4((const uint16_t*)ptr,tail, &R,&G,&B,&A); @@ -1005,7 +1005,7 @@ STAGE(gather_f16, const SkJumper_GatherCtx* ctx) { a = from_half(A); } STAGE(store_f16, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<uint64_t>(ctx, x,y); + auto ptr = ptr_at_xy<uint64_t>(ctx, dx,dy); store4((uint16_t*)ptr,tail, to_half(r) , to_half(g) , to_half(b) @@ -1013,7 +1013,7 @@ STAGE(store_f16, const SkJumper_MemoryCtx* ctx) { } STAGE(load_u16_be, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<const uint16_t>(ctx, 4*x,y); + auto ptr = ptr_at_xy<const uint16_t>(ctx, 4*dx,dy); U16 R,G,B,A; load4(ptr,tail, &R,&G,&B,&A); @@ -1024,7 +1024,7 @@ STAGE(load_u16_be, const SkJumper_MemoryCtx* ctx) { a = (1/65535.0f) * cast(expand(bswap(A))); } STAGE(load_rgb_u16_be, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<const uint16_t>(ctx, 3*x,y); + auto ptr = ptr_at_xy<const uint16_t>(ctx, 3*dx,dy); U16 R,G,B; load3(ptr,tail, &R,&G,&B); @@ -1035,7 +1035,7 @@ STAGE(load_rgb_u16_be, const SkJumper_MemoryCtx* ctx) { a = 1.0f; } STAGE(store_u16_be, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<uint16_t>(ctx, 4*x,y); + auto ptr = ptr_at_xy<uint16_t>(ctx, 4*dx,dy); U16 R = bswap(pack(round(r, 65535.0f))), G = bswap(pack(round(g, 65535.0f))), @@ -1046,15 +1046,15 @@ STAGE(store_u16_be, const SkJumper_MemoryCtx* ctx) { } STAGE(load_f32, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<const float>(ctx, 4*x,y); + auto ptr = ptr_at_xy<const float>(ctx, 4*dx,dy); load4(ptr,tail, &r,&g,&b,&a); } STAGE(load_f32_dst, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<const float>(ctx, 4*x,y); + auto ptr = ptr_at_xy<const float>(ctx, 4*dx,dy); load4(ptr,tail, &dr,&dg,&db,&da); } STAGE(store_f32, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<float>(ctx, 4*x,y); + auto ptr = ptr_at_xy<float>(ctx, 4*dx,dy); store4(ptr,tail, r,g,b,a); } diff --git a/src/jumper/SkJumper_stages_lowp.cpp b/src/jumper/SkJumper_stages_lowp.cpp index 0882f81cf5..79e3fbd0d7 100644 --- a/src/jumper/SkJumper_stages_lowp.cpp +++ b/src/jumper/SkJumper_stages_lowp.cpp @@ -53,7 +53,7 @@ static const size_t N = sizeof(U16) / sizeof(uint16_t); // We pass program as the second argument so that load_and_inc() will find it in %rsi on x86-64. -using Stage = void (ABI*)(size_t tail, void** program, size_t x, size_t y, +using Stage = void (ABI*)(size_t tail, void** program, size_t dx, size_t dy, U16 r, U16 g, U16 b, U16 a, U16 dr, U16 dg, U16 db, U16 da); @@ -64,13 +64,13 @@ ABI extern "C" void WRAP(start_pipeline)(const size_t x0, const size_t ylimit, void** program) { auto start = (Stage)load_and_inc(program); - for (size_t y = y0; y < ylimit; y++) { - size_t x = x0; - for (; x + N <= xlimit; x += N) { - start( 0,program,x,y, 0,0,0,0, 0,0,0,0); + for (size_t dy = y0; dy < ylimit; dy++) { + size_t dx = x0; + for (; dx + N <= xlimit; dx += N) { + start( 0,program,dx,dy, 0,0,0,0, 0,0,0,0); } - if (size_t tail = xlimit - x) { - start(tail,program,x,y, 0,0,0,0, 0,0,0,0); + if (size_t tail = xlimit - dx) { + start(tail,program,dx,dy, 0,0,0,0, 0,0,0,0); } } } @@ -79,17 +79,17 @@ ABI extern "C" void WRAP(just_return)(size_t,void**,size_t,size_t, U16,U16,U16,U16, U16,U16,U16,U16) {} #define STAGE(name, ...) \ - SI void name##_k(__VA_ARGS__, size_t x, size_t y, size_t tail, \ + SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \ U16& r, U16& g, U16& b, U16& a, \ U16& dr, U16& dg, U16& db, U16& da); \ - ABI extern "C" void WRAP(name)(size_t tail, void** program, size_t x, size_t y, \ + ABI extern "C" void WRAP(name)(size_t tail, void** program, size_t dx, size_t dy, \ U16 r, U16 g, U16 b, U16 a, \ U16 dr, U16 dg, U16 db, U16 da) { \ - name##_k(Ctx{program}, x,y,tail, r,g,b,a, dr,dg,db,da); \ + name##_k(Ctx{program}, dx,dy,tail, r,g,b,a, dr,dg,db,da); \ auto next = (Stage)load_and_inc(program); \ - next(tail,program,x,y, r,g,b,a, dr,dg,db,da); \ + next(tail,program,dx,dy, r,g,b,a, dr,dg,db,da); \ } \ - SI void name##_k(__VA_ARGS__, size_t x, size_t y, size_t tail, \ + SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \ U16& r, U16& g, U16& b, U16& a, \ U16& dr, U16& dg, U16& db, U16& da) @@ -244,8 +244,8 @@ STAGE(invert, Ctx::None) { // ~~~~~~ Helpers for interacting with memory ~~~~~~ // template <typename T> -SI T* ptr_at_xy(const SkJumper_MemoryCtx* ctx, size_t x, size_t y) { - return (T*)ctx->pixels + y*ctx->stride + x; +SI T* ptr_at_xy(const SkJumper_MemoryCtx* ctx, size_t dx, size_t dy) { + return (T*)ctx->pixels + dy*ctx->stride + dx; } template <typename V, typename T> @@ -369,23 +369,23 @@ SI void store_8888(uint32_t* ptr, size_t tail, U16 r, U16 g, U16 b, U16 a) { } STAGE(load_8888, const SkJumper_MemoryCtx* ctx) { - load_8888(ptr_at_xy<const uint32_t>(ctx, x,y), tail, &r,&g,&b,&a); + load_8888(ptr_at_xy<const uint32_t>(ctx, dx,dy), tail, &r,&g,&b,&a); } STAGE(load_8888_dst, const SkJumper_MemoryCtx* ctx) { - load_8888(ptr_at_xy<const uint32_t>(ctx, x,y), tail, &dr,&dg,&db,&da); + load_8888(ptr_at_xy<const uint32_t>(ctx, dx,dy), tail, &dr,&dg,&db,&da); } STAGE(store_8888, const SkJumper_MemoryCtx* ctx) { - store_8888(ptr_at_xy<uint32_t>(ctx, x,y), tail, r,g,b,a); + store_8888(ptr_at_xy<uint32_t>(ctx, dx,dy), tail, r,g,b,a); } STAGE(load_bgra, const SkJumper_MemoryCtx* ctx) { - load_8888(ptr_at_xy<const uint32_t>(ctx, x,y), tail, &b,&g,&r,&a); + load_8888(ptr_at_xy<const uint32_t>(ctx, dx,dy), tail, &b,&g,&r,&a); } STAGE(load_bgra_dst, const SkJumper_MemoryCtx* ctx) { - load_8888(ptr_at_xy<const uint32_t>(ctx, x,y), tail, &db,&dg,&dr,&da); + load_8888(ptr_at_xy<const uint32_t>(ctx, dx,dy), tail, &db,&dg,&dr,&da); } STAGE(store_bgra, const SkJumper_MemoryCtx* ctx) { - store_8888(ptr_at_xy<uint32_t>(ctx, x,y), tail, b,g,r,a); + store_8888(ptr_at_xy<uint32_t>(ctx, dx,dy), tail, b,g,r,a); } // ~~~~~~ 16-bit memory loads and stores ~~~~~~ // @@ -414,15 +414,15 @@ SI void store_565(uint16_t* ptr, size_t tail, U16 r, U16 g, U16 b) { } STAGE(load_565, const SkJumper_MemoryCtx* ctx) { - load_565(ptr_at_xy<const uint16_t>(ctx, x,y), tail, &r,&g,&b); + load_565(ptr_at_xy<const uint16_t>(ctx, dx,dy), tail, &r,&g,&b); a = 255; } STAGE(load_565_dst, const SkJumper_MemoryCtx* ctx) { - load_565(ptr_at_xy<const uint16_t>(ctx, x,y), tail, &dr,&dg,&db); + load_565(ptr_at_xy<const uint16_t>(ctx, dx,dy), tail, &dr,&dg,&db); da = 255; } STAGE(store_565, const SkJumper_MemoryCtx* ctx) { - store_565(ptr_at_xy<uint16_t>(ctx, x,y), tail, r,g,b); + store_565(ptr_at_xy<uint16_t>(ctx, dx,dy), tail, r,g,b); } // ~~~~~~ 8-bit memory loads and stores ~~~~~~ // @@ -436,22 +436,22 @@ SI void store_8(uint8_t* ptr, size_t tail, U16 v) { STAGE(load_a8, const SkJumper_MemoryCtx* ctx) { r = g = b = 0; - a = load_8(ptr_at_xy<const uint8_t>(ctx, x,y), tail); + a = load_8(ptr_at_xy<const uint8_t>(ctx, dx,dy), tail); } STAGE(load_a8_dst, const SkJumper_MemoryCtx* ctx) { dr = dg = db = 0; - da = load_8(ptr_at_xy<const uint8_t>(ctx, x,y), tail); + da = load_8(ptr_at_xy<const uint8_t>(ctx, dx,dy), tail); } STAGE(store_a8, const SkJumper_MemoryCtx* ctx) { - store_8(ptr_at_xy<uint8_t>(ctx, x,y), tail, a); + store_8(ptr_at_xy<uint8_t>(ctx, dx,dy), tail, a); } STAGE(load_g8, const SkJumper_MemoryCtx* ctx) { - r = g = b = load_8(ptr_at_xy<const uint8_t>(ctx, x,y), tail); + r = g = b = load_8(ptr_at_xy<const uint8_t>(ctx, dx,dy), tail); a = 255; } STAGE(load_g8_dst, const SkJumper_MemoryCtx* ctx) { - dr = dg = db = load_8(ptr_at_xy<const uint8_t>(ctx, x,y), tail); + dr = dg = db = load_8(ptr_at_xy<const uint8_t>(ctx, dx,dy), tail); da = 255; } STAGE(luminance_to_alpha, Ctx::None) { @@ -477,14 +477,14 @@ STAGE(lerp_1_float, const float* f) { } STAGE(scale_u8, const SkJumper_MemoryCtx* ctx) { - U16 c = load_8(ptr_at_xy<const uint8_t>(ctx, x,y), tail); + U16 c = load_8(ptr_at_xy<const uint8_t>(ctx, dx,dy), tail); r = div255( r * c ); g = div255( g * c ); b = div255( b * c ); a = div255( a * c ); } STAGE(lerp_u8, const SkJumper_MemoryCtx* ctx) { - U16 c = load_8(ptr_at_xy<const uint8_t>(ctx, x,y), tail); + U16 c = load_8(ptr_at_xy<const uint8_t>(ctx, dx,dy), tail); r = lerp(dr, r, c); g = lerp(dg, g, c); b = lerp(db, b, c); @@ -498,7 +498,7 @@ SI U16 alpha_coverage_from_rgb_coverage(U16 a, U16 da, U16 cr, U16 cg, U16 cb) { } STAGE(scale_565, const SkJumper_MemoryCtx* ctx) { U16 cr,cg,cb; - load_565(ptr_at_xy<const uint16_t>(ctx, x,y), tail, &cr,&cg,&cb); + load_565(ptr_at_xy<const uint16_t>(ctx, dx,dy), tail, &cr,&cg,&cb); U16 ca = alpha_coverage_from_rgb_coverage(a,da, cr,cg,cb); r = div255( r * cr ); @@ -508,7 +508,7 @@ STAGE(scale_565, const SkJumper_MemoryCtx* ctx) { } STAGE(lerp_565, const SkJumper_MemoryCtx* ctx) { U16 cr,cg,cb; - load_565(ptr_at_xy<const uint16_t>(ctx, x,y), tail, &cr,&cg,&cb); + load_565(ptr_at_xy<const uint16_t>(ctx, dx,dy), tail, &cr,&cg,&cb); U16 ca = alpha_coverage_from_rgb_coverage(a,da, cr,cg,cb); r = lerp(dr, r, cr); @@ -520,7 +520,7 @@ STAGE(lerp_565, const SkJumper_MemoryCtx* ctx) { // ~~~~~~ Compound stages ~~~~~~ // STAGE(srcover_rgba_8888, const SkJumper_MemoryCtx* ctx) { - auto ptr = ptr_at_xy<uint32_t>(ctx, x,y); + auto ptr = ptr_at_xy<uint32_t>(ctx, dx,dy); load_8888(ptr, tail, &dr,&dg,&db,&da); r = r + div255( dr*inv(a) ); |