aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-01-07 06:09:43 -0500
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-01-07 22:36:59 +0000
commit1523dbdd7211561957eec466c87af84f1f2bf5c5 (patch)
treed7ce23a2116f3df5c5285d09a415380216ba8653
parent86d55b312a2649d80890ccf75f24571ada0265f1 (diff)
SkXbyak: loop inside, only body
SkXbyak_… 927 …JITCompiled 1x …Interpreted 1.33x …HandWritten 1.97x CQ_INCLUDE_TRYBOTS=skia.primary:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD Change-Id: I486bbc341a38354345bfcf3d6150d1628f83f186 Reviewed-on: https://skia-review.googlesource.com/6726 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
-rw-r--r--src/opts/SkXbyak.cpp40
1 files changed, 20 insertions, 20 deletions
diff --git a/src/opts/SkXbyak.cpp b/src/opts/SkXbyak.cpp
index 59863e982f..501d68752a 100644
--- a/src/opts/SkXbyak.cpp
+++ b/src/opts/SkXbyak.cpp
@@ -33,16 +33,15 @@ namespace {
return nullptr;
}
- Pipeline(const SkRasterPipeline::Stage* stages, int n, bool* supported) {
+ Pipeline(const SkRasterPipeline::Stage* stages, int nstages, bool* supported) {
// Set up some register name aliases.
- // y = rsi, tail = rdx;
- auto x = rdi;
+ // y = rsi
+ auto x = rdi, n = rdx;
auto r = ymm0, g = ymm1, b = ymm2, a = ymm3,
dr = ymm4, dg = ymm5, db = ymm6, da = ymm7;
- Xbyak::Label floatOneStorage;
-
- //trap();
+ Xbyak::Label floatOneStorage,
+ start;
// TODO: set up (x+0.5,y+0.5) in (r,g)
vxorps(r,r);
@@ -54,7 +53,14 @@ namespace {
vxorps(db,db);
vxorps(da,da);
- for (int i = 0; i < n; i++) {
+ auto zero = ymm14,
+ one = ymm15;
+ vxorps(zero, zero);
+ vbroadcastss(one, ptr[rip + floatOneStorage]);
+
+ L(start);
+ //trap();
+ for (int i = 0; i < nstages; i++) {
switch(stages[i].stage) {
case SkRasterPipeline::load_f16:
mov(rax, (size_t)stages[i].ctx);
@@ -77,11 +83,9 @@ namespace {
break;
case SkRasterPipeline::unpremul:
- vxorps(ymm8, ymm8); // ymm8: 0
- vcmpeqps(ymm10, ymm8, a); // ymm10: a == 0
- vbroadcastss(ymm9, ptr[rip + floatOneStorage]); // ymm9: 1.0f
- vdivps(ymm11, ymm9, a); // ymm11: 1/a
- vblendvps(ymm10, ymm10, ymm8, ymm11); // ymm10: (a==0) ? 0 : 1/a
+ vcmpeqps(ymm10, zero, a); // ymm10: a == 0
+ vdivps(ymm11, one, a); // ymm11: 1/a
+ vblendvps(ymm10, ymm10, zero, ymm11); // ymm10: (a==0) ? 0 : 1/a
vmulps(r, r, ymm10);
vmulps(g, g, ymm10);
vmulps(b, b, ymm10);
@@ -112,6 +116,9 @@ namespace {
return;
}
}
+ add(x, 8);
+ cmp(x, n);
+ jl(start);
vzeroupper();
ret();
@@ -140,14 +147,7 @@ std::function<void(size_t, size_t, size_t)> SkRasterPipeline::jit() const {
return [pipeline] (size_t x, size_t y, size_t n) {
auto call = pipeline->getCode<void(*)(size_t, size_t, size_t)>();
//printf("fn addr: %p\n", (void*)call);
- while (n >= 8) {
- call(x,y,0);
- x += 8;
- n -= 8;
- }
- if (n) {
- call(x,y,n);
- }
+ call(x,y,n);
};
}
#if 0