aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/jumper/SkJumper_stages.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/jumper/SkJumper_stages.cpp')
-rw-r--r--src/jumper/SkJumper_stages.cpp132
1 files changed, 66 insertions, 66 deletions
diff --git a/src/jumper/SkJumper_stages.cpp b/src/jumper/SkJumper_stages.cpp
index d192728419..1f9848e1fa 100644
--- a/src/jumper/SkJumper_stages.cpp
+++ b/src/jumper/SkJumper_stages.cpp
@@ -47,40 +47,40 @@ static const size_t N = sizeof(F) / sizeof(float);
// General-purpose registers are also tight, so we put most of those on the stack too.
// On ARMv7, we do the same so that we can make the r,g,b,a vectors wider.
struct Params {
- size_t x, y, tail;
+ size_t dx, dy, tail;
F dr,dg,db,da;
};
using Stage = void(Params*, void** program, F r, F g, F b, F a);
#else
// We keep program the second argument, so that it's passed in rsi for load_and_inc().
- using Stage = void(size_t tail, void** program, size_t x, size_t y, F,F,F,F, F,F,F,F);
+ using Stage = void(size_t tail, void** program, size_t dx, size_t dy, F,F,F,F, F,F,F,F);
#endif
MAYBE_MSABI
-extern "C" void WRAP(start_pipeline)(size_t x, size_t y, size_t xlimit, size_t ylimit,
+extern "C" void WRAP(start_pipeline)(size_t dx, size_t dy, size_t xlimit, size_t ylimit,
void** program) {
auto start = (Stage*)load_and_inc(program);
- const size_t x0 = x;
- for (; y < ylimit; y++) {
+ const size_t x0 = dx;
+ for (; dy < ylimit; dy++) {
#if defined(__i386__) || defined(_M_IX86) || defined(__arm__)
- Params params = { x0,y,0, 0,0,0,0 };
- while (params.x + N <= xlimit) {
+ Params params = { x0,dy,0, 0,0,0,0 };
+ while (params.dx + N <= xlimit) {
start(&params,program, 0,0,0,0);
- params.x += N;
+ params.dx += N;
}
- if (size_t tail = xlimit - params.x) {
+ if (size_t tail = xlimit - params.dx) {
params.tail = tail;
start(&params,program, 0,0,0,0);
}
#else
- x = x0;
- while (x + N <= xlimit) {
- start(0,program,x,y, 0,0,0,0, 0,0,0,0);
- x += N;
+ dx = x0;
+ while (dx + N <= xlimit) {
+ start(0,program,dx,dy, 0,0,0,0, 0,0,0,0);
+ dx += N;
}
- if (size_t tail = xlimit - x) {
- start(tail,program,x,y, 0,0,0,0, 0,0,0,0);
+ if (size_t tail = xlimit - dx) {
+ start(tail,program,dx,dy, 0,0,0,0, 0,0,0,0);
}
#endif
}
@@ -88,28 +88,28 @@ extern "C" void WRAP(start_pipeline)(size_t x, size_t y, size_t xlimit, size_t y
#if defined(__i386__) || defined(_M_IX86) || defined(__arm__)
#define STAGE(name, ...) \
- SI void name##_k(__VA_ARGS__, size_t x, size_t y, size_t tail, \
+ SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \
F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da); \
extern "C" void WRAP(name)(Params* params, void** program, \
F r, F g, F b, F a) { \
- name##_k(Ctx{program},params->x,params->y,params->tail, r,g,b,a, \
+ name##_k(Ctx{program},params->dx,params->dy,params->tail, r,g,b,a, \
params->dr, params->dg, params->db, params->da); \
auto next = (Stage*)load_and_inc(program); \
next(params,program, r,g,b,a); \
} \
- SI void name##_k(__VA_ARGS__, size_t x, size_t y, size_t tail, \
+ SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \
F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da)
#else
#define STAGE(name, ...) \
- SI void name##_k(__VA_ARGS__, size_t x, size_t y, size_t tail, \
+ SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \
F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da); \
- extern "C" void WRAP(name)(size_t tail, void** program, size_t x, size_t y, \
+ extern "C" void WRAP(name)(size_t tail, void** program, size_t dx, size_t dy, \
F r, F g, F b, F a, F dr, F dg, F db, F da) { \
- name##_k(Ctx{program},x,y,tail, r,g,b,a, dr,dg,db,da); \
+ name##_k(Ctx{program},dx,dy,tail, r,g,b,a, dr,dg,db,da); \
auto next = (Stage*)load_and_inc(program); \
- next(tail,program,x,y, r,g,b,a, dr,dg,db,da); \
+ next(tail,program,dx,dy, r,g,b,a, dr,dg,db,da); \
} \
- SI void name##_k(__VA_ARGS__, size_t x, size_t y, size_t tail, \
+ SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \
F& r, F& g, F& b, F& a, F& dr, F& dg, F& db, F& da)
#endif
@@ -192,10 +192,10 @@ SI void from_8888(U32 _8888, F* r, F* g, F* b, F* a) {
*a = cast((_8888 >> 24) ) * (1/255.0f);
}
-// Used by load_ and store_ stages to get to the right (x,y) starting point of contiguous memory.
+// Used by load_ and store_ stages to get to the right (dx,dy) starting point of contiguous memory.
template <typename T>
-SI T* ptr_at_xy(const SkJumper_MemoryCtx* ctx, int x, int y) {
- return (T*)ctx->pixels + y*ctx->stride + x;
+SI T* ptr_at_xy(const SkJumper_MemoryCtx* ctx, int dx, int dy) {
+ return (T*)ctx->pixels + dy*ctx->stride + dx;
}
// Used by gather_ stages to calculate the base pointer and a vector of indices to load.
@@ -215,22 +215,22 @@ SI U32 ix_and_ptr(T** ptr, const SkJumper_GatherCtx* ctx, F x, F y) {
// Now finally, normal Stages!
STAGE(seed_shader, Ctx::None) {
- // It's important for speed to explicitly cast(x) and cast(y),
+ // It's important for speed to explicitly cast(dx) and cast(dy),
// which has the effect of splatting them to vectors before converting to floats.
// On Intel this breaks a data dependency on previous loop iterations' registers.
float iota[] = { 0.5f,1.5f,2.5f,3.5f,4.5f,5.5f,6.5f,7.5f };
- r = cast(x) + unaligned_load<F>(iota);
- g = cast(y) + 0.5f;
+ r = cast(dx) + unaligned_load<F>(iota);
+ g = cast(dy) + 0.5f;
b = 1.0f;
a = 0;
dr = dg = db = da = 0;
}
STAGE(dither, const float* rate) {
- // Get [(x,y), (x+1,y), (x+2,y), ...] loaded up in integer vectors.
+ // Get [(dx,dy), (dx+1,dy), (dx+2,dy), ...] loaded up in integer vectors.
uint32_t iota[] = {0,1,2,3,4,5,6,7};
- U32 X = x + unaligned_load<U32>(iota),
- Y = y;
+ U32 X = dx + unaligned_load<U32>(iota),
+ Y = dy;
// We're doing 8x8 ordered dithering, see https://en.wikipedia.org/wiki/Ordered_dithering.
// In this case n=8 and we're using the matrix that looks like 1/64 x [ 0 48 12 60 ... ].
@@ -480,7 +480,7 @@ STAGE(luminosity, Ctx::None) {
}
STAGE(srcover_rgba_8888, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<uint32_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<uint32_t>(ctx, dx,dy);
U32 dst = load<U32>(ptr, tail);
dr = cast((dst ) & 0xff);
@@ -679,7 +679,7 @@ STAGE(scale_1_float, const float* c) {
a = a * *c;
}
STAGE(scale_u8, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint8_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint8_t>(ctx, dx,dy);
auto scales = load<U8>(ptr, tail);
auto c = from_byte(scales);
@@ -690,7 +690,7 @@ STAGE(scale_u8, const SkJumper_MemoryCtx* ctx) {
a = a * c;
}
STAGE(scale_565, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint16_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint16_t>(ctx, dx,dy);
F cr,cg,cb;
from_565(load<U16>(ptr, tail), &cr, &cg, &cb);
@@ -714,7 +714,7 @@ STAGE(lerp_1_float, const float* c) {
a = lerp(da, a, *c);
}
STAGE(lerp_u8, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint8_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint8_t>(ctx, dx,dy);
auto scales = load<U8>(ptr, tail);
auto c = from_byte(scales);
@@ -725,7 +725,7 @@ STAGE(lerp_u8, const SkJumper_MemoryCtx* ctx) {
a = lerp(da, a, c);
}
STAGE(lerp_565, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint16_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint16_t>(ctx, dx,dy);
F cr,cg,cb;
from_565(load<U16>(ptr, tail), &cr, &cg, &cb);
@@ -739,14 +739,14 @@ STAGE(lerp_565, const SkJumper_MemoryCtx* ctx) {
}
STAGE(load_tables, const SkJumper_LoadTablesCtx* c) {
- auto px = load<U32>((const uint32_t*)c->src + x, tail);
+ auto px = load<U32>((const uint32_t*)c->src + dx, tail);
r = gather(c->r, (px ) & 0xff);
g = gather(c->g, (px >> 8) & 0xff);
b = gather(c->b, (px >> 16) & 0xff);
a = cast( (px >> 24)) * (1/255.0f);
}
STAGE(load_tables_u16_be, const SkJumper_LoadTablesCtx* c) {
- auto ptr = (const uint16_t*)c->src + 4*x;
+ auto ptr = (const uint16_t*)c->src + 4*dx;
U16 R,G,B,A;
load4(ptr, tail, &R,&G,&B,&A);
@@ -758,7 +758,7 @@ STAGE(load_tables_u16_be, const SkJumper_LoadTablesCtx* c) {
a = (1/65535.0f) * cast(expand(bswap(A)));
}
STAGE(load_tables_rgb_u16_be, const SkJumper_LoadTablesCtx* c) {
- auto ptr = (const uint16_t*)c->src + 3*x;
+ auto ptr = (const uint16_t*)c->src + 3*dx;
U16 R,G,B;
load3(ptr, tail, &R,&G,&B);
@@ -834,13 +834,13 @@ STAGE(lab_to_xyz, Ctx::None) {
}
STAGE(load_a8, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint8_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint8_t>(ctx, dx,dy);
r = g = b = 0.0f;
a = from_byte(load<U8>(ptr, tail));
}
STAGE(load_a8_dst, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint8_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint8_t>(ctx, dx,dy);
dr = dg = db = 0.0f;
da = from_byte(load<U8>(ptr, tail));
@@ -852,20 +852,20 @@ STAGE(gather_a8, const SkJumper_GatherCtx* ctx) {
a = from_byte(gather(ptr, ix));
}
STAGE(store_a8, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<uint8_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<uint8_t>(ctx, dx,dy);
U8 packed = pack(pack(round(a, 255.0f)));
store(ptr, packed, tail);
}
STAGE(load_g8, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint8_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint8_t>(ctx, dx,dy);
r = g = b = from_byte(load<U8>(ptr, tail));
a = 1.0f;
}
STAGE(load_g8_dst, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint8_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint8_t>(ctx, dx,dy);
dr = dg = db = from_byte(load<U8>(ptr, tail));
da = 1.0f;
@@ -878,13 +878,13 @@ STAGE(gather_g8, const SkJumper_GatherCtx* ctx) {
}
STAGE(load_565, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint16_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint16_t>(ctx, dx,dy);
from_565(load<U16>(ptr, tail), &r,&g,&b);
a = 1.0f;
}
STAGE(load_565_dst, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint16_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint16_t>(ctx, dx,dy);
from_565(load<U16>(ptr, tail), &dr,&dg,&db);
da = 1.0f;
@@ -896,7 +896,7 @@ STAGE(gather_565, const SkJumper_GatherCtx* ctx) {
a = 1.0f;
}
STAGE(store_565, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<uint16_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<uint16_t>(ctx, dx,dy);
U16 px = pack( round(r, 31.0f) << 11
| round(g, 63.0f) << 5
@@ -905,11 +905,11 @@ STAGE(store_565, const SkJumper_MemoryCtx* ctx) {
}
STAGE(load_4444, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint16_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint16_t>(ctx, dx,dy);
from_4444(load<U16>(ptr, tail), &r,&g,&b,&a);
}
STAGE(load_4444_dst, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint16_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint16_t>(ctx, dx,dy);
from_4444(load<U16>(ptr, tail), &dr,&dg,&db,&da);
}
STAGE(gather_4444, const SkJumper_GatherCtx* ctx) {
@@ -918,7 +918,7 @@ STAGE(gather_4444, const SkJumper_GatherCtx* ctx) {
from_4444(gather(ptr, ix), &r,&g,&b,&a);
}
STAGE(store_4444, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<uint16_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<uint16_t>(ctx, dx,dy);
U16 px = pack( round(r, 15.0f) << 12
| round(g, 15.0f) << 8
| round(b, 15.0f) << 4
@@ -927,11 +927,11 @@ STAGE(store_4444, const SkJumper_MemoryCtx* ctx) {
}
STAGE(load_8888, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint32_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint32_t>(ctx, dx,dy);
from_8888(load<U32>(ptr, tail), &r,&g,&b,&a);
}
STAGE(load_8888_dst, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint32_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint32_t>(ctx, dx,dy);
from_8888(load<U32>(ptr, tail), &dr,&dg,&db,&da);
}
STAGE(gather_8888, const SkJumper_GatherCtx* ctx) {
@@ -940,7 +940,7 @@ STAGE(gather_8888, const SkJumper_GatherCtx* ctx) {
from_8888(gather(ptr, ix), &r,&g,&b,&a);
}
STAGE(store_8888, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<uint32_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<uint32_t>(ctx, dx,dy);
U32 px = round(r, 255.0f)
| round(g, 255.0f) << 8
@@ -950,11 +950,11 @@ STAGE(store_8888, const SkJumper_MemoryCtx* ctx) {
}
STAGE(load_bgra, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint32_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint32_t>(ctx, dx,dy);
from_8888(load<U32>(ptr, tail), &b,&g,&r,&a);
}
STAGE(load_bgra_dst, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint32_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint32_t>(ctx, dx,dy);
from_8888(load<U32>(ptr, tail), &db,&dg,&dr,&da);
}
STAGE(gather_bgra, const SkJumper_GatherCtx* ctx) {
@@ -963,7 +963,7 @@ STAGE(gather_bgra, const SkJumper_GatherCtx* ctx) {
from_8888(gather(ptr, ix), &b,&g,&r,&a);
}
STAGE(store_bgra, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<uint32_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<uint32_t>(ctx, dx,dy);
U32 px = round(b, 255.0f)
| round(g, 255.0f) << 8
@@ -973,7 +973,7 @@ STAGE(store_bgra, const SkJumper_MemoryCtx* ctx) {
}
STAGE(load_f16, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint64_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint64_t>(ctx, dx,dy);
U16 R,G,B,A;
load4((const uint16_t*)ptr,tail, &R,&G,&B,&A);
@@ -983,7 +983,7 @@ STAGE(load_f16, const SkJumper_MemoryCtx* ctx) {
a = from_half(A);
}
STAGE(load_f16_dst, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint64_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<const uint64_t>(ctx, dx,dy);
U16 R,G,B,A;
load4((const uint16_t*)ptr,tail, &R,&G,&B,&A);
@@ -1005,7 +1005,7 @@ STAGE(gather_f16, const SkJumper_GatherCtx* ctx) {
a = from_half(A);
}
STAGE(store_f16, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<uint64_t>(ctx, x,y);
+ auto ptr = ptr_at_xy<uint64_t>(ctx, dx,dy);
store4((uint16_t*)ptr,tail, to_half(r)
, to_half(g)
, to_half(b)
@@ -1013,7 +1013,7 @@ STAGE(store_f16, const SkJumper_MemoryCtx* ctx) {
}
STAGE(load_u16_be, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint16_t>(ctx, 4*x,y);
+ auto ptr = ptr_at_xy<const uint16_t>(ctx, 4*dx,dy);
U16 R,G,B,A;
load4(ptr,tail, &R,&G,&B,&A);
@@ -1024,7 +1024,7 @@ STAGE(load_u16_be, const SkJumper_MemoryCtx* ctx) {
a = (1/65535.0f) * cast(expand(bswap(A)));
}
STAGE(load_rgb_u16_be, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const uint16_t>(ctx, 3*x,y);
+ auto ptr = ptr_at_xy<const uint16_t>(ctx, 3*dx,dy);
U16 R,G,B;
load3(ptr,tail, &R,&G,&B);
@@ -1035,7 +1035,7 @@ STAGE(load_rgb_u16_be, const SkJumper_MemoryCtx* ctx) {
a = 1.0f;
}
STAGE(store_u16_be, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<uint16_t>(ctx, 4*x,y);
+ auto ptr = ptr_at_xy<uint16_t>(ctx, 4*dx,dy);
U16 R = bswap(pack(round(r, 65535.0f))),
G = bswap(pack(round(g, 65535.0f))),
@@ -1046,15 +1046,15 @@ STAGE(store_u16_be, const SkJumper_MemoryCtx* ctx) {
}
STAGE(load_f32, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const float>(ctx, 4*x,y);
+ auto ptr = ptr_at_xy<const float>(ctx, 4*dx,dy);
load4(ptr,tail, &r,&g,&b,&a);
}
STAGE(load_f32_dst, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<const float>(ctx, 4*x,y);
+ auto ptr = ptr_at_xy<const float>(ctx, 4*dx,dy);
load4(ptr,tail, &dr,&dg,&db,&da);
}
STAGE(store_f32, const SkJumper_MemoryCtx* ctx) {
- auto ptr = ptr_at_xy<float>(ctx, 4*x,y);
+ auto ptr = ptr_at_xy<float>(ctx, 4*dx,dy);
store4(ptr,tail, r,g,b,a);
}