aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/jumper/SkJumper_stages.cpp132
-rw-r--r--src/jumper/SkJumper_stages_lowp.cpp66
2 files changed, 99 insertions, 99 deletions
diff --git a/src/jumper/SkJumper_stages.cpp b/src/jumper/SkJumper_stages.cpp
index d192728419..1f9848e1fa 100644
--- a/src/jumper/SkJumper_stages.cpp
+++ b/src/jumper/SkJumper_stages.cpp
@@ -47,40 +47,40 @@ static const size_t N = sizeof(F) / sizeof(float);
// General-purpose registers are also tight, so we put most of those on the stack too.
// On ARMv7, we do the same so that we can make the r,g,b,a vectors wider.
struct Params {
- size_t x, y, tail;
+ size_t dx, dy, tail;
F dr,dg,db,da;
};
using Stage = void(Params*, void** program, F r, F g, F b, F a);
#else
// We keep program the second argument, so that it's passed in rsi for load_and_inc().
- using Stage = void(size_t tail, void** program, size_t x, size_t y, F,F,F,F, F,F,F,F);
+ using Stage = void(size_t tail, void** program, size_t dx, size_t dy, F,F,F,F, F,F,F,F);
#endif
MAYBE_MSABI
-extern "C" void WRAP(start_pipeline)(size_t x, size_t y, size_t xlimit, size_t ylimit,
+extern "C" void WRAP(start_pipeline)(size_t dx, size_t dy, size_t xlimit, size_t ylimit,
void** program) {
auto start = (Stage*)load_and_inc(program);
- const size_t x0 = x;
- for (; y < ylimit; y++) {
+ const size_t x0 = dx;
+ for (; dy < ylimit; dy++) {
#if defined(__i386__) || defined(_M_IX86) || defined(__arm__)
- Params params = { x0,y,0, 0,0,0,0 };
- while (params.x + N <= xlimit) {
+ Params params = { x0,dy,0, 0,0,0,0 };
+ while (params.dx + N <= xlimit) {
start(&params,program, 0,0,0,0);
- params.x += N;
+ params.dx += N;
}
- if (size_t tail = xlimit - params.x) {
+ if (size_t tail = xlimit - params.dx) {
params.tail = tail;
start(&params,program, 0,0,0,0);
}
#else
- x = x0;
- while (x + N <= xlimit) {
- start(0,program,x,y, 0,0,0,0, 0,0,0,0);
- x += N;
+ dx = x0;
+ while (dx + N <= xlimit) {
+ start(0,program,dx,dy, 0,0,0,0, 0,0,0,0);
+ dx += N;
}
- if (size_t tail = xlimit - x) {
- start(tail,program,x,y, 0,0,0,0, 0,0,0,0);
+ if (size_t tail = xlimit - dx) {
+ start(tail,program,dx,dy, 0,0,0,0, 0,0,0,0);
}
#endif
}
@@ -88,28 +88,28 @@ extern "C" void WRAP(start_pipeline)(size_t x, size_t y, size_t xlimit, size_t y
#if defined(__i386__) || defined(_M_IX86) || defined(__arm__)
#define STAGE(name, ...) \
- SI void name##_k(__VA_ARGS__, size_t x, size_t y, size_t tail, \
+ SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \
F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da); \
extern "C" void WRAP(name)(Params* params, void** program, \
F r, F g, F b, F a) { \
- name##_k(Ctx{program},params->x,params->y,params->tail, r,g,b,a, \
+ name##_k(Ctx{program},params->dx,params->dy,params->tail, r,g,b,a, \
params->dr, params->dg, params->db, params->da); \
auto next = (Stage*)load_and_inc(program); \
next(params,program, r,g,b,a); \
} \
- SI void name##_k(__VA_ARGS__, size_t x, size_t y, size_t tail, \
+ SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \
F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da)
#else
#define STAGE(name, ...) \
- SI void name##_k(__VA_ARGS__, size_t x, size_t y, size_t tail, \
+ SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \
F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da); \
- extern "C" void WRAP(name)(size_t tail, void** program, size_t x, size_t y, \
+ extern "C" void WRAP(name)(size_t tail, void** program, size_t dx, size_t dy, \
F r, F g, F b, F a, F dr, F dg, F db, F da) { \
- name##_k(Ctx{program},x,y,tail, r,g,b,a, dr,dg,db,da); \
+ name##_k(Ctx{program},dx,dy,tail, r,g,b,a, dr,dg,db,da); \
auto next = (Stage*)load_and_inc(program); \
- next(tail,program,x,y, r,g,b,a, dr,dg,db,da); \
+ next(tail,program,dx,dy, r,g,b,a, dr,dg,db,da); \
} \
- SI void name##_k(__VA_ARGS__, size_t x, size_t y, size_t tail, \
+ SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \
F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da)
#endif
@@ -192,10 +192,10 @@ SI void from_8888(U32 _8888, F* r, F* g, F* b, F* a) {
*a = cast((_8888 >> 24) ) * (1/255.0f);
}
-// Used by load_ and store_ stages to get to the right (x,y) starting point of contiguous memory.
+// Used by load_ and store_ stages to get to the right (dx,dy) starting point of contiguous memory.
template <typename T>
-SI T* ptr_at_xy(const SkJumper_MemoryCtx* ctx, int x, int y) {
- return (T*)ctx->pixels + y*ctx->stride + x;
+SI T* ptr_at_xy(const SkJumper_MemoryCtx* ctx, int dx, int dy) {
+ return (T*)ctx->pixels + dy*ctx->stride + dx;
}
// Used by gather_ stages to calculate the base pointer and a vector of indices to load.
@@ -215,22 +215,22 @@ SI U32 ix_and_ptr(T** ptr, const SkJumper_GatherCtx* ctx, F x, F y) {
// Now finally, normal Stages!
STAGE(seed_shader, Ctx::None) {
- // It's important for speed to explicitly cast(x) and cast(y),
+ // It's important for speed to explicitly cast(dx) and cast(dy),
// which has the effect of splatting them to vectors before converting to floats.
// On Intel this breaks a data dependency on previous loop iterations' registers.
float iota[] = { 0.5f,1.5f,2.5f,3.5f,4.5f,5.5f,6.5f,7.5f };
- r = cast(x) + unaligned_load<F>(iota);
- g = cast(y) + 0.5f;
+ r = cast(dx) + unaligned_load<F>(iota);
+ g = cast(dy) + 0.5f;
b = 1.0f;
a = 0;
dr = dg = db = da = 0;
}
STAGE(dither, const float* rate) {
- // Get [(x,y), (x+1,y), (x+2,y), ...] loaded up in integer vectors.
+ // Get [(dx,dy), (dx+1,dy), (dx+2,dy), ...] loaded up in integer vectors.
uint32_t iota[] = {0,1,2,3,4,5,6,7};
- U32 X = x + unaligned_load<U32>(iota),
- Y = y;
+ U32 X = dx + unaligned_load<U32>(iota),
+ Y = dy;
// We're doing 8x8 ordered dithering, see https://en.wikipedia.org/wiki/Ordered_dithering.
// In this case n=8 and we're using the matrix that looks like 1/64 x [ 0 48 12 60 ... ].
@@ -480,7 +480,7 @@ STAGE(luminosity, Ctx::None) {
}
STAGE(srcover_rgba_8888, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<uint32_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<uint32_t>(ctx, dx,dy);
U32 dst = load<U32>(ptr, tail);
dr = cast((dst ) & 0xff);
@@ -679,7 +679,7 @@ STAGE(scale_1_float, const float* c) {
a = a * *c;
}
STAGE(scale_u8, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint8_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint8_t>(ctx, dx,dy);
auto scales = load<U8>(ptr, tail);
auto c = from_byte(scales);
@@ -690,7 +690,7 @@ STAGE(scale_u8, const SkJumper_MemoryCtx* ctx) {
a = a * c;
}
STAGE(scale_565, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint16_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint16_t>(ctx, dx,dy);
F cr,cg,cb;
from_565(load<U16>(ptr, tail), &cr, &cg, &cb);
@@ -714,7 +714,7 @@ STAGE(lerp_1_float, const float* c) {
a = lerp(da, a, *c);
}
STAGE(lerp_u8, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint8_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint8_t>(ctx, dx,dy);
auto scales = load<U8>(ptr, tail);
auto c = from_byte(scales);
@@ -725,7 +725,7 @@ STAGE(lerp_u8, const SkJumper_MemoryCtx* ctx) {
a = lerp(da, a, c);
}
STAGE(lerp_565, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint16_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint16_t>(ctx, dx,dy);
F cr,cg,cb;
from_565(load<U16>(ptr, tail), &cr, &cg, &cb);
@@ -739,14 +739,14 @@ STAGE(lerp_565, const SkJumper_MemoryCtx* ctx) {
}
STAGE(load_tables, const SkJumper_LoadTablesCtx* c) {
- auto px = load<U32>((const uint32_t*)c->src + x, tail);
+ auto px = load<U32>((const uint32_t*)c->src + dx, tail);
r = gather(c->r, (px ) & 0xff);
g = gather(c->g, (px >> 8) & 0xff);
b = gather(c->b, (px >> 16) & 0xff);
a = cast( (px >> 24)) * (1/255.0f);
}
STAGE(load_tables_u16_be, const SkJumper_LoadTablesCtx* c) {
- auto ptr = (const uint16_t*)c->src + 4*x;
+ auto ptr = (const uint16_t*)c->src + 4*dx;
U16 R,G,B,A;
load4(ptr, tail, &R,&G,&B,&A);
@@ -758,7 +758,7 @@ STAGE(load_tables_u16_be, const SkJumper_LoadTablesCtx* c) {
a = (1/65535.0f) * cast(expand(bswap(A)));
}
STAGE(load_tables_rgb_u16_be, const SkJumper_LoadTablesCtx* c) {
- auto ptr = (const uint16_t*)c->src + 3*x;
+ auto ptr = (const uint16_t*)c->src + 3*dx;
U16 R,G,B;
load3(ptr, tail, &R,&G,&B);
@@ -834,13 +834,13 @@ STAGE(lab_to_xyz, Ctx::None) {
}
STAGE(load_a8, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint8_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint8_t>(ctx, dx,dy);
r = g = b = 0.0f;
a = from_byte(load<U8>(ptr, tail));
}
STAGE(load_a8_dst, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint8_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint8_t>(ctx, dx,dy);
dr = dg = db = 0.0f;
da = from_byte(load<U8>(ptr, tail));
@@ -852,20 +852,20 @@ STAGE(gather_a8, const SkJumper_GatherCtx* ctx) {
a = from_byte(gather(ptr, ix));
}
STAGE(store_a8, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<uint8_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<uint8_t>(ctx, dx,dy);
U8 packed = pack(pack(round(a, 255.0f)));
store(ptr, packed, tail);
}
STAGE(load_g8, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint8_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint8_t>(ctx, dx,dy);
r = g = b = from_byte(load<U8>(ptr, tail));
a = 1.0f;
}
STAGE(load_g8_dst, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint8_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint8_t>(ctx, dx,dy);
dr = dg = db = from_byte(load<U8>(ptr, tail));
da = 1.0f;
@@ -878,13 +878,13 @@ STAGE(gather_g8, const SkJumper_GatherCtx* ctx) {
}
STAGE(load_565, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint16_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint16_t>(ctx, dx,dy);
from_565(load<U16>(ptr, tail), &r,&g,&b);
a = 1.0f;
}
STAGE(load_565_dst, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint16_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint16_t>(ctx, dx,dy);
from_565(load<U16>(ptr, tail), &dr,&dg,&db);
da = 1.0f;
@@ -896,7 +896,7 @@ STAGE(gather_565, const SkJumper_GatherCtx* ctx) {
a = 1.0f;
}
STAGE(store_565, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<uint16_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<uint16_t>(ctx, dx,dy);
U16 px = pack( round(r, 31.0f) << 11
| round(g, 63.0f) << 5
@@ -905,11 +905,11 @@ STAGE(store_565, const SkJumper_MemoryCtx* ctx) {
}
STAGE(load_4444, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint16_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint16_t>(ctx, dx,dy);
from_4444(load<U16>(ptr, tail), &r,&g,&b,&a);
}
STAGE(load_4444_dst, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint16_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint16_t>(ctx, dx,dy);
from_4444(load<U16>(ptr, tail), &dr,&dg,&db,&da);
}
STAGE(gather_4444, const SkJumper_GatherCtx* ctx) {
@@ -918,7 +918,7 @@ STAGE(gather_4444, const SkJumper_GatherCtx* ctx) {
from_4444(gather(ptr, ix), &r,&g,&b,&a);
}
STAGE(store_4444, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<uint16_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<uint16_t>(ctx, dx,dy);
U16 px = pack( round(r, 15.0f) << 12
| round(g, 15.0f) << 8
| round(b, 15.0f) << 4
@@ -927,11 +927,11 @@ STAGE(store_4444, const SkJumper_MemoryCtx* ctx) {
}
STAGE(load_8888, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint32_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint32_t>(ctx, dx,dy);
from_8888(load<U32>(ptr, tail), &r,&g,&b,&a);
}
STAGE(load_8888_dst, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint32_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint32_t>(ctx, dx,dy);
from_8888(load<U32>(ptr, tail), &dr,&dg,&db,&da);
}
STAGE(gather_8888, const SkJumper_GatherCtx* ctx) {
@@ -940,7 +940,7 @@ STAGE(gather_8888, const SkJumper_GatherCtx* ctx) {
from_8888(gather(ptr, ix), &r,&g,&b,&a);
}
STAGE(store_8888, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<uint32_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<uint32_t>(ctx, dx,dy);
U32 px = round(r, 255.0f)
| round(g, 255.0f) << 8
@@ -950,11 +950,11 @@ STAGE(store_8888, const SkJumper_MemoryCtx* ctx) {
}
STAGE(load_bgra, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint32_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint32_t>(ctx, dx,dy);
from_8888(load<U32>(ptr, tail), &b,&g,&r,&a);
}
STAGE(load_bgra_dst, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint32_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint32_t>(ctx, dx,dy);
from_8888(load<U32>(ptr, tail), &db,&dg,&dr,&da);
}
STAGE(gather_bgra, const SkJumper_GatherCtx* ctx) {
@@ -963,7 +963,7 @@ STAGE(gather_bgra, const SkJumper_GatherCtx* ctx) {
from_8888(gather(ptr, ix), &b,&g,&r,&a);
}
STAGE(store_bgra, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<uint32_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<uint32_t>(ctx, dx,dy);
U32 px = round(b, 255.0f)
| round(g, 255.0f) << 8
@@ -973,7 +973,7 @@ STAGE(store_bgra, const SkJumper_MemoryCtx* ctx) {
}
STAGE(load_f16, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint64_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint64_t>(ctx, dx,dy);
U16 R,G,B,A;
load4((const uint16_t*)ptr,tail, &R,&G,&B,&A);
@@ -983,7 +983,7 @@ STAGE(load_f16, const SkJumper_MemoryCtx* ctx) {
a = from_half(A);
}
STAGE(load_f16_dst, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint64_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint64_t>(ctx, dx,dy);
U16 R,G,B,A;
load4((const uint16_t*)ptr,tail, &R,&G,&B,&A);
@@ -1005,7 +1005,7 @@ STAGE(gather_f16, const SkJumper_GatherCtx* ctx) {
a = from_half(A);
}
STAGE(store_f16, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<uint64_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<uint64_t>(ctx, dx,dy);
store4((uint16_t*)ptr,tail, to_half(r)
, to_half(g)
, to_half(b)
@@ -1013,7 +1013,7 @@ STAGE(store_f16, const SkJumper_MemoryCtx* ctx) {
}
STAGE(load_u16_be, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint16_t>(ctx, 4*x,y);
+ auto ptr = ptr_at_xy<const uint16_t>(ctx, 4*dx,dy);
U16 R,G,B,A;
load4(ptr,tail, &R,&G,&B,&A);
@@ -1024,7 +1024,7 @@ STAGE(load_u16_be, const SkJumper_MemoryCtx* ctx) {
a = (1/65535.0f) * cast(expand(bswap(A)));
}
STAGE(load_rgb_u16_be, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint16_t>(ctx, 3*x,y);
+ auto ptr = ptr_at_xy<const uint16_t>(ctx, 3*dx,dy);
U16 R,G,B;
load3(ptr,tail, &R,&G,&B);
@@ -1035,7 +1035,7 @@ STAGE(load_rgb_u16_be, const SkJumper_MemoryCtx* ctx) {
a = 1.0f;
}
STAGE(store_u16_be, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<uint16_t>(ctx, 4*x,y);
+ auto ptr = ptr_at_xy<uint16_t>(ctx, 4*dx,dy);
U16 R = bswap(pack(round(r, 65535.0f))),
G = bswap(pack(round(g, 65535.0f))),
@@ -1046,15 +1046,15 @@ STAGE(store_u16_be, const SkJumper_MemoryCtx* ctx) {
}
STAGE(load_f32, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const float>(ctx, 4*x,y);
+ auto ptr = ptr_at_xy<const float>(ctx, 4*dx,dy);
load4(ptr,tail, &r,&g,&b,&a);
}
STAGE(load_f32_dst, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const float>(ctx, 4*x,y);
+ auto ptr = ptr_at_xy<const float>(ctx, 4*dx,dy);
load4(ptr,tail, &dr,&dg,&db,&da);
}
STAGE(store_f32, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<float>(ctx, 4*x,y);
+ auto ptr = ptr_at_xy<float>(ctx, 4*dx,dy);
store4(ptr,tail, r,g,b,a);
}
diff --git a/src/jumper/SkJumper_stages_lowp.cpp b/src/jumper/SkJumper_stages_lowp.cpp
index 0882f81cf5..79e3fbd0d7 100644
--- a/src/jumper/SkJumper_stages_lowp.cpp
+++ b/src/jumper/SkJumper_stages_lowp.cpp
@@ -53,7 +53,7 @@
static const size_t N = sizeof(U16) / sizeof(uint16_t);
// We pass program as the second argument so that load_and_inc() will find it in %rsi on x86-64.
-using Stage = void (ABI*)(size_t tail, void** program, size_t x, size_t y,
+using Stage = void (ABI*)(size_t tail, void** program, size_t dx, size_t dy,
U16 r, U16 g, U16 b, U16 a,
U16 dr, U16 dg, U16 db, U16 da);
@@ -64,13 +64,13 @@ ABI extern "C" void WRAP(start_pipeline)(const size_t x0,
const size_t ylimit,
void** program) {
auto start = (Stage)load_and_inc(program);
- for (size_t y = y0; y < ylimit; y++) {
- size_t x = x0;
- for (; x + N <= xlimit; x += N) {
- start( 0,program,x,y, 0,0,0,0, 0,0,0,0);
+ for (size_t dy = y0; dy < ylimit; dy++) {
+ size_t dx = x0;
+ for (; dx + N <= xlimit; dx += N) {
+ start( 0,program,dx,dy, 0,0,0,0, 0,0,0,0);
}
- if (size_t tail = xlimit - x) {
- start(tail,program,x,y, 0,0,0,0, 0,0,0,0);
+ if (size_t tail = xlimit - dx) {
+ start(tail,program,dx,dy, 0,0,0,0, 0,0,0,0);
}
}
}
@@ -79,17 +79,17 @@ ABI extern "C" void WRAP(just_return)(size_t,void**,size_t,size_t,
U16,U16,U16,U16, U16,U16,U16,U16) {}
#define STAGE(name, ...) \
- SI void name##_k(__VA_ARGS__, size_t x, size_t y, size_t tail, \
+ SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \
U16& r, U16& g, U16& b, U16& a, \
U16& dr, U16& dg, U16& db, U16& da); \
- ABI extern "C" void WRAP(name)(size_t tail, void** program, size_t x, size_t y, \
+ ABI extern "C" void WRAP(name)(size_t tail, void** program, size_t dx, size_t dy, \
U16 r, U16 g, U16 b, U16 a, \
U16 dr, U16 dg, U16 db, U16 da) { \
- name##_k(Ctx{program}, x,y,tail, r,g,b,a, dr,dg,db,da); \
+ name##_k(Ctx{program}, dx,dy,tail, r,g,b,a, dr,dg,db,da); \
auto next = (Stage)load_and_inc(program); \
- next(tail,program,x,y, r,g,b,a, dr,dg,db,da); \
+ next(tail,program,dx,dy, r,g,b,a, dr,dg,db,da); \
} \
- SI void name##_k(__VA_ARGS__, size_t x, size_t y, size_t tail, \
+ SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \
U16& r, U16& g, U16& b, U16& a, \
U16& dr, U16& dg, U16& db, U16& da)
@@ -244,8 +244,8 @@ STAGE(invert, Ctx::None) {
// ~~~~~~ Helpers for interacting with memory ~~~~~~ //
template <typename T>
-SI T* ptr_at_xy(const SkJumper_MemoryCtx* ctx, size_t x, size_t y) {
- return (T*)ctx->pixels + y*ctx->stride + x;
+SI T* ptr_at_xy(const SkJumper_MemoryCtx* ctx, size_t dx, size_t dy) {
+ return (T*)ctx->pixels + dy*ctx->stride + dx;
}
template <typename V, typename T>
@@ -369,23 +369,23 @@ SI void store_8888(uint32_t* ptr, size_t tail, U16 r, U16 g, U16 b, U16 a) {
}
STAGE(load_8888, const SkJumper_MemoryCtx* ctx) {
- load_8888(ptr_at_xy<const uint32_t>(ctx, x,y), tail, &r,&g,&b,&a);
+ load_8888(ptr_at_xy<const uint32_t>(ctx, dx,dy), tail, &r,&g,&b,&a);
}
STAGE(load_8888_dst, const SkJumper_MemoryCtx* ctx) {
- load_8888(ptr_at_xy<const uint32_t>(ctx, x,y), tail, &dr,&dg,&db,&da);
+ load_8888(ptr_at_xy<const uint32_t>(ctx, dx,dy), tail, &dr,&dg,&db,&da);
}
STAGE(store_8888, const SkJumper_MemoryCtx* ctx) {
- store_8888(ptr_at_xy<uint32_t>(ctx, x,y), tail, r,g,b,a);
+ store_8888(ptr_at_xy<uint32_t>(ctx, dx,dy), tail, r,g,b,a);
}
STAGE(load_bgra, const SkJumper_MemoryCtx* ctx) {
- load_8888(ptr_at_xy<const uint32_t>(ctx, x,y), tail, &b,&g,&r,&a);
+ load_8888(ptr_at_xy<const uint32_t>(ctx, dx,dy), tail, &b,&g,&r,&a);
}
STAGE(load_bgra_dst, const SkJumper_MemoryCtx* ctx) {
- load_8888(ptr_at_xy<const uint32_t>(ctx, x,y), tail, &db,&dg,&dr,&da);
+ load_8888(ptr_at_xy<const uint32_t>(ctx, dx,dy), tail, &db,&dg,&dr,&da);
}
STAGE(store_bgra, const SkJumper_MemoryCtx* ctx) {
- store_8888(ptr_at_xy<uint32_t>(ctx, x,y), tail, b,g,r,a);
+ store_8888(ptr_at_xy<uint32_t>(ctx, dx,dy), tail, b,g,r,a);
}
// ~~~~~~ 16-bit memory loads and stores ~~~~~~ //
@@ -414,15 +414,15 @@ SI void store_565(uint16_t* ptr, size_t tail, U16 r, U16 g, U16 b) {
}
STAGE(load_565, const SkJumper_MemoryCtx* ctx) {
- load_565(ptr_at_xy<const uint16_t>(ctx, x,y), tail, &r,&g,&b);
+ load_565(ptr_at_xy<const uint16_t>(ctx, dx,dy), tail, &r,&g,&b);
a = 255;
}
STAGE(load_565_dst, const SkJumper_MemoryCtx* ctx) {
- load_565(ptr_at_xy<const uint16_t>(ctx, x,y), tail, &dr,&dg,&db);
+ load_565(ptr_at_xy<const uint16_t>(ctx, dx,dy), tail, &dr,&dg,&db);
da = 255;
}
STAGE(store_565, const SkJumper_MemoryCtx* ctx) {
- store_565(ptr_at_xy<uint16_t>(ctx, x,y), tail, r,g,b);
+ store_565(ptr_at_xy<uint16_t>(ctx, dx,dy), tail, r,g,b);
}
// ~~~~~~ 8-bit memory loads and stores ~~~~~~ //
@@ -436,22 +436,22 @@ SI void store_8(uint8_t* ptr, size_t tail, U16 v) {
STAGE(load_a8, const SkJumper_MemoryCtx* ctx) {
r = g = b = 0;
- a = load_8(ptr_at_xy<const uint8_t>(ctx, x,y), tail);
+ a = load_8(ptr_at_xy<const uint8_t>(ctx, dx,dy), tail);
}
STAGE(load_a8_dst, const SkJumper_MemoryCtx* ctx) {
dr = dg = db = 0;
- da = load_8(ptr_at_xy<const uint8_t>(ctx, x,y), tail);
+ da = load_8(ptr_at_xy<const uint8_t>(ctx, dx,dy), tail);
}
STAGE(store_a8, const SkJumper_MemoryCtx* ctx) {
- store_8(ptr_at_xy<uint8_t>(ctx, x,y), tail, a);
+ store_8(ptr_at_xy<uint8_t>(ctx, dx,dy), tail, a);
}
STAGE(load_g8, const SkJumper_MemoryCtx* ctx) {
- r = g = b = load_8(ptr_at_xy<const uint8_t>(ctx, x,y), tail);
+ r = g = b = load_8(ptr_at_xy<const uint8_t>(ctx, dx,dy), tail);
a = 255;
}
STAGE(load_g8_dst, const SkJumper_MemoryCtx* ctx) {
- dr = dg = db = load_8(ptr_at_xy<const uint8_t>(ctx, x,y), tail);
+ dr = dg = db = load_8(ptr_at_xy<const uint8_t>(ctx, dx,dy), tail);
da = 255;
}
STAGE(luminance_to_alpha, Ctx::None) {
@@ -477,14 +477,14 @@ STAGE(lerp_1_float, const float* f) {
}
STAGE(scale_u8, const SkJumper_MemoryCtx* ctx) {
- U16 c = load_8(ptr_at_xy<const uint8_t>(ctx, x,y), tail);
+ U16 c = load_8(ptr_at_xy<const uint8_t>(ctx, dx,dy), tail);
r = div255( r * c );
g = div255( g * c );
b = div255( b * c );
a = div255( a * c );
}
STAGE(lerp_u8, const SkJumper_MemoryCtx* ctx) {
- U16 c = load_8(ptr_at_xy<const uint8_t>(ctx, x,y), tail);
+ U16 c = load_8(ptr_at_xy<const uint8_t>(ctx, dx,dy), tail);
r = lerp(dr, r, c);
g = lerp(dg, g, c);
b = lerp(db, b, c);
@@ -498,7 +498,7 @@ SI U16 alpha_coverage_from_rgb_coverage(U16 a, U16 da, U16 cr, U16 cg, U16 cb) {
}
STAGE(scale_565, const SkJumper_MemoryCtx* ctx) {
U16 cr,cg,cb;
- load_565(ptr_at_xy<const uint16_t>(ctx, x,y), tail, &cr,&cg,&cb);
+ load_565(ptr_at_xy<const uint16_t>(ctx, dx,dy), tail, &cr,&cg,&cb);
U16 ca = alpha_coverage_from_rgb_coverage(a,da, cr,cg,cb);
r = div255( r * cr );
@@ -508,7 +508,7 @@ STAGE(scale_565, const SkJumper_MemoryCtx* ctx) {
}
STAGE(lerp_565, const SkJumper_MemoryCtx* ctx) {
U16 cr,cg,cb;
- load_565(ptr_at_xy<const uint16_t>(ctx, x,y), tail, &cr,&cg,&cb);
+ load_565(ptr_at_xy<const uint16_t>(ctx, dx,dy), tail, &cr,&cg,&cb);
U16 ca = alpha_coverage_from_rgb_coverage(a,da, cr,cg,cb);
r = lerp(dr, r, cr);
@@ -520,7 +520,7 @@ STAGE(lerp_565, const SkJumper_MemoryCtx* ctx) {
// ~~~~~~ Compound stages ~~~~~~ //
STAGE(srcover_rgba_8888, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<uint32_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<uint32_t>(ctx, dx,dy);
load_8888(ptr, tail, &dr,&dg,&db,&da);
r = r + div255( dr*inv(a) );