aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-01-06 14:54:09 -0500
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-01-06 21:02:18 +0000
commit83f532e9b5b81ec60e02f0cd2a04f2ccafaa2bb4 (patch)
tree9dbd543a22c29c2ee7edb05dbbda881bbf7d11e0 /src
parentfa710670328ec80bb553dcce8ff9ef41b60e60e3 (diff)
Add a real SkXbyak bench, implement enough to run it.
CQ_INCLUDE_TRYBOTS=skia.primary:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD SkXbyak_… 9320 …JITCompiled 1x …Interpreted 1.24x …HandWritten 2.5x Change-Id: I37d2d255ff32dcce73d29081d506e2d67477af97 Reviewed-on: https://skia-review.googlesource.com/6697 Commit-Queue: Mike Klein <mtklein@chromium.org> Reviewed-by: Herb Derby <herb@google.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/SkRasterPipeline.cpp1
-rw-r--r--src/opts/SkXbyak.cpp69
2 files changed, 67 insertions, 3 deletions
diff --git a/src/core/SkRasterPipeline.cpp b/src/core/SkRasterPipeline.cpp
index 8d0840f7fb..13a7caecbd 100644
--- a/src/core/SkRasterPipeline.cpp
+++ b/src/core/SkRasterPipeline.cpp
@@ -29,7 +29,6 @@ void SkRasterPipeline::run(size_t x, size_t y, size_t n) const {
std::function<void(size_t, size_t, size_t)> SkRasterPipeline::compile() const {
#ifdef SK_XBYAK
if (auto fn = this->jit()) {
- SkDebugf("Jitted with xbyak!\n");
return fn;
}
#endif
diff --git a/src/opts/SkXbyak.cpp b/src/opts/SkXbyak.cpp
index 12f5200939..59863e982f 100644
--- a/src/opts/SkXbyak.cpp
+++ b/src/opts/SkXbyak.cpp
@@ -35,12 +35,14 @@ namespace {
Pipeline(const SkRasterPipeline::Stage* stages, int n, bool* supported) {
// Set up some register name aliases.
- //auto x = rdi, y = rsi, tail = rdx;
+ // y = rsi, tail = rdx;
+ auto x = rdi;
auto r = ymm0, g = ymm1, b = ymm2, a = ymm3,
dr = ymm4, dg = ymm5, db = ymm6, da = ymm7;
Xbyak::Label floatOneStorage;
- vbroadcastss(ymm8, ptr[rip + floatOneStorage]);
+
+ //trap();
// TODO: set up (x+0.5,y+0.5) in (r,g)
vxorps(r,r);
@@ -54,6 +56,56 @@ namespace {
for (int i = 0; i < n; i++) {
switch(stages[i].stage) {
+ case SkRasterPipeline::load_f16:
+ mov(rax, (size_t)stages[i].ctx);
+ mov(rax, ptr[rax]);
+
+ vmovdqu(xmm0, ptr[rax+x*8+ 0]);
+ vmovdqu(xmm1, ptr[rax+x*8+16]);
+ vmovdqu(xmm2, ptr[rax+x*8+32]);
+ vmovdqu(xmm3, ptr[rax+x*8+48]);
+
+ vpunpcklwd(xmm8, xmm1, xmm0); vpunpckhwd(xmm0 , xmm1, xmm0);
+ vpunpcklwd(xmm1, xmm3, xmm2); vpunpckhwd(xmm2 , xmm3, xmm2);
+ vpunpcklwd(xmm9, xmm0, xmm8); vpunpckhwd(xmm8 , xmm0, xmm8);
+ vpunpcklwd(xmm3, xmm2, xmm1); vpunpckhwd(xmm10, xmm2, xmm1);
+
+ vpunpcklqdq(xmm0, xmm3, xmm9); vcvtph2ps(ymm0, xmm0);
+ vpunpckhqdq(xmm1, xmm3, xmm9); vcvtph2ps(ymm1, xmm1);
+ vpunpcklqdq(xmm2, xmm10, xmm8); vcvtph2ps(ymm2, xmm2);
+ vpunpckhqdq(xmm3, xmm10, xmm8); vcvtph2ps(ymm3, xmm3);
+ break;
+
+ case SkRasterPipeline::unpremul:
+ vxorps(ymm8, ymm8); // ymm8: 0
+ vcmpeqps(ymm10, ymm8, a); // ymm10: a == 0
+ vbroadcastss(ymm9, ptr[rip + floatOneStorage]); // ymm9: 1.0f
+ vdivps(ymm11, ymm9, a); // ymm11: 1/a
+ vblendvps(ymm10, ymm10, ymm8, ymm11); // ymm10: (a==0) ? 0 : 1/a
+ vmulps(r, r, ymm10);
+ vmulps(g, g, ymm10);
+ vmulps(b, b, ymm10);
+ break;
+
+ case SkRasterPipeline::store_f16:
+ mov(rax, (size_t)stages[i].ctx);
+ mov(rax, ptr[rax]);
+
+ vcvtps2ph(xmm8 , ymm0, 4);
+ vcvtps2ph(xmm9 , ymm1, 4);
+ vcvtps2ph(xmm10, ymm2, 4);
+ vcvtps2ph(xmm11, ymm3, 4);
+
+ vpunpcklwd(xmm12, xmm9 , xmm8 );
+ vpunpckhwd(xmm8 , xmm9 , xmm8 );
+ vpunpcklwd(xmm9 , xmm11, xmm10);
+ vpunpckhwd(xmm10, xmm11, xmm10);
+
+ vpunpckldq(xmm11, xmm9 , xmm12); vmovdqu(ptr[rax+x*8+ 0], xmm11);
+ vpunpckhdq(xmm9 , xmm9 , xmm12); vmovdqu(ptr[rax+x*8+16], xmm9 );
+ vpunpckldq(xmm9 , xmm10, xmm8 ); vmovdqu(ptr[rax+x*8+32], xmm9 );
+ vpunpckhdq(xmm8 , xmm10, xmm8 ); vmovdqu(ptr[rax+x*8+48], xmm8 );
+ break;
default:
*supported = false;
@@ -61,6 +113,7 @@ namespace {
}
}
+ vzeroupper();
ret();
L(floatOneStorage); df(1.0f);
}
@@ -69,6 +122,14 @@ namespace {
union { float f; uint32_t x; } pun = {f};
dd(pun.x);
}
+ void dp(void* p) {
+ union { void* p; uint64_t x; } pun = {p};
+ dq(pun.x);
+ }
+
+ void trap() {
+ dw(0x0b0f);
+ }
};
} // namespace
@@ -78,6 +139,7 @@ std::function<void(size_t, size_t, size_t)> SkRasterPipeline::jit() const {
if (auto pipeline = Pipeline::Create(fStages.data(), SkToInt(fStages.size()))) {
return [pipeline] (size_t x, size_t y, size_t n) {
auto call = pipeline->getCode<void(*)(size_t, size_t, size_t)>();
+ //printf("fn addr: %p\n", (void*)call);
while (n >= 8) {
call(x,y,0);
x += 8;
@@ -88,10 +150,13 @@ std::function<void(size_t, size_t, size_t)> SkRasterPipeline::jit() const {
}
};
}
+#if 0
SkDebugf("Cannot yet JIT with xbyak:\n");
this->dump();
+#endif
return nullptr;
} catch(...) {
+ SkDebugf("caught exception\n");
return nullptr;
}
}