aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-02-07 14:32:25 -0500
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-02-07 20:09:37 +0000
commita960095e2e591cc8c20ee2007aa9d6cad3c15345 (patch)
treef0dd94e2219b89f596efe6976c40aedc93f24bce
parent83e939bcb79790f5ae3b28c398fbcf034675a6e5 (diff)
Tack on ret instructions to each SkSplicer stage.
This is just a warm up refactor to make the existing stages more amenable to running in place without splicing. Hopefully the driver interpreter loop for that will be my next CL. Change-Id: Ie76868b59af8ff3a10e2cd64f03c3f8d8a5031dd Reviewed-on: https://skia-review.googlesource.com/8142 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
-rw-r--r--src/splicer/SkSplicer.cpp11
-rw-r--r--src/splicer/SkSplicer_generated.h66
-rwxr-xr-xsrc/splicer/build_stages.py19
3 files changed, 87 insertions, 9 deletions
diff --git a/src/splicer/SkSplicer.cpp b/src/splicer/SkSplicer.cpp
index a25a578943..6a0fc3cdb7 100644
--- a/src/splicer/SkSplicer.cpp
+++ b/src/splicer/SkSplicer.cpp
@@ -49,6 +49,13 @@ namespace {
buf->write(&val, sizeof(val));
}
+ // Splice up to (but not including) the final return instruction in code.
+ template <typename T, size_t N>
+ static void splice_until_ret(SkWStream* buf, const T (&code)[N]) {
+ // On all platforms we splice today, return is a single T (byte on x86, u32 on ARM).
+ buf->write(&code, sizeof(T) * (N-1));
+ }
+
#if defined(__aarch64__)
static constexpr int kStride = 4;
static void set_ctx(SkWStream* buf, void* ctx) {
@@ -236,7 +243,7 @@ namespace {
static bool splice(SkWStream* buf, SkRasterPipeline::StockStage st) {
switch (st) {
default: return false;
- #define CASE(st) case SkRasterPipeline::st: splice(buf, kSplice_##st); break
+ #define CASE(st) case SkRasterPipeline::st: splice_until_ret(buf, kSplice_##st); break
CASE(clear);
CASE(plus_);
CASE(srcover);
@@ -310,7 +317,7 @@ namespace {
}
}
- splice(&buf, kSplice_inc_x);
+ splice_until_ret(&buf, kSplice_inc_x);
loop(&buf, loop_start); // Loop back to handle more pixels if not done.
after_loop(&buf);
ret(&buf); // We're done.
diff --git a/src/splicer/SkSplicer_generated.h b/src/splicer/SkSplicer_generated.h
index a7eb82780b..a179e1df00 100644
--- a/src/splicer/SkSplicer_generated.h
+++ b/src/splicer/SkSplicer_generated.h
@@ -15,18 +15,21 @@
static const unsigned int kSplice_inc_x[] = {
0x91001000, // add x0, x0, #0x4
+ 0xd65f03c0, // return (synthetic)
};
static const unsigned int kSplice_clear[] = {
0x6f00e400, // movi v0.2d, #0x0
0x6f00e401, // movi v1.2d, #0x0
0x6f00e402, // movi v2.2d, #0x0
0x6f00e403, // movi v3.2d, #0x0
+ 0xd65f03c0, // return (synthetic)
};
static const unsigned int kSplice_plus_[] = {
0x4e24d400, // fadd v0.4s, v0.4s, v4.4s
0x4e25d421, // fadd v1.4s, v1.4s, v5.4s
0x4e26d442, // fadd v2.4s, v2.4s, v6.4s
0x4e27d463, // fadd v3.4s, v3.4s, v7.4s
+ 0xd65f03c0, // return (synthetic)
};
static const unsigned int kSplice_srcover[] = {
0x4d40c870, // ld1r {v16.4s}, [x3]
@@ -35,6 +38,7 @@ static const unsigned int kSplice_srcover[] = {
0x4e25ce01, // fmla v1.4s, v16.4s, v5.4s
0x4e26ce02, // fmla v2.4s, v16.4s, v6.4s
0x4e27ce03, // fmla v3.4s, v16.4s, v7.4s
+ 0xd65f03c0, // return (synthetic)
};
static const unsigned int kSplice_dstover[] = {
0x4d40c870, // ld1r {v16.4s}, [x3]
@@ -43,6 +47,7 @@ static const unsigned int kSplice_dstover[] = {
0x4e21ce05, // fmla v5.4s, v16.4s, v1.4s
0x4e22ce06, // fmla v6.4s, v16.4s, v2.4s
0x4e23ce07, // fmla v7.4s, v16.4s, v3.4s
+ 0xd65f03c0, // return (synthetic)
};
static const unsigned int kSplice_clamp_0[] = {
0x6f00e410, // movi v16.2d, #0x0
@@ -50,6 +55,7 @@ static const unsigned int kSplice_clamp_0[] = {
0x4e30f421, // fmax v1.4s, v1.4s, v16.4s
0x4e30f442, // fmax v2.4s, v2.4s, v16.4s
0x4e30f463, // fmax v3.4s, v3.4s, v16.4s
+ 0xd65f03c0, // return (synthetic)
};
static const unsigned int kSplice_clamp_1[] = {
0x4d40c870, // ld1r {v16.4s}, [x3]
@@ -57,6 +63,7 @@ static const unsigned int kSplice_clamp_1[] = {
0x4eb0f421, // fmin v1.4s, v1.4s, v16.4s
0x4eb0f442, // fmin v2.4s, v2.4s, v16.4s
0x4eb0f463, // fmin v3.4s, v3.4s, v16.4s
+ 0xd65f03c0, // return (synthetic)
};
static const unsigned int kSplice_clamp_a[] = {
0x4d40c870, // ld1r {v16.4s}, [x3]
@@ -64,6 +71,7 @@ static const unsigned int kSplice_clamp_a[] = {
0x4ea3f400, // fmin v0.4s, v0.4s, v3.4s
0x4ea3f421, // fmin v1.4s, v1.4s, v3.4s
0x4ea3f442, // fmin v2.4s, v2.4s, v3.4s
+ 0xd65f03c0, // return (synthetic)
};
static const unsigned int kSplice_swap[] = {
0x4ea31c70, // mov v16.16b, v3.16b
@@ -78,23 +86,27 @@ static const unsigned int kSplice_swap[] = {
0x4eb21e45, // mov v5.16b, v18.16b
0x4eb11e26, // mov v6.16b, v17.16b
0x4eb01e07, // mov v7.16b, v16.16b
+ 0xd65f03c0, // return (synthetic)
};
static const unsigned int kSplice_move_src_dst[] = {
0x4ea01c04, // mov v4.16b, v0.16b
0x4ea11c25, // mov v5.16b, v1.16b
0x4ea21c46, // mov v6.16b, v2.16b
0x4ea31c67, // mov v7.16b, v3.16b
+ 0xd65f03c0, // return (synthetic)
};
static const unsigned int kSplice_move_dst_src[] = {
0x4ea41c80, // mov v0.16b, v4.16b
0x4ea51ca1, // mov v1.16b, v5.16b
0x4ea61cc2, // mov v2.16b, v6.16b
0x4ea71ce3, // mov v3.16b, v7.16b
+ 0xd65f03c0, // return (synthetic)
};
static const unsigned int kSplice_premul[] = {
0x6e23dc00, // fmul v0.4s, v0.4s, v3.4s
0x6e23dc21, // fmul v1.4s, v1.4s, v3.4s
0x6e23dc42, // fmul v2.4s, v2.4s, v3.4s
+ 0xd65f03c0, // return (synthetic)
};
static const unsigned int kSplice_unpremul[] = {
0x4d40c870, // ld1r {v16.4s}, [x3]
@@ -104,6 +116,7 @@ static const unsigned int kSplice_unpremul[] = {
0x6e20de00, // fmul v0.4s, v16.4s, v0.4s
0x6e21de01, // fmul v1.4s, v16.4s, v1.4s
0x6e22de02, // fmul v2.4s, v16.4s, v2.4s
+ 0xd65f03c0, // return (synthetic)
};
static const unsigned int kSplice_from_srgb[] = {
0x91005068, // add x8, x3, #0x14
@@ -135,6 +148,7 @@ static const unsigned int kSplice_from_srgb[] = {
0x6e761ea0, // bsl v0.16b, v21.16b, v22.16b
0x6e781e41, // bsl v1.16b, v18.16b, v24.16b
0x6e711e62, // bsl v2.16b, v19.16b, v17.16b
+ 0xd65f03c0, // return (synthetic)
};
static const unsigned int kSplice_to_srgb[] = {
0x6ea1d810, // frsqrte v16.4s, v0.4s
@@ -197,6 +211,7 @@ static const unsigned int kSplice_to_srgb[] = {
0x6e701f40, // bsl v0.16b, v26.16b, v16.16b
0x6e721e61, // bsl v1.16b, v19.16b, v18.16b
0x6e741e22, // bsl v2.16b, v17.16b, v20.16b
+ 0xd65f03c0, // return (synthetic)
};
static const unsigned int kSplice_scale_u8[] = {
0xf9400048, // ldr x8, [x2]
@@ -218,6 +233,7 @@ static const unsigned int kSplice_scale_u8[] = {
0x6e21de01, // fmul v1.4s, v16.4s, v1.4s
0x6e22de02, // fmul v2.4s, v16.4s, v2.4s
0x6e23de03, // fmul v3.4s, v16.4s, v3.4s
+ 0xd65f03c0, // return (synthetic)
};
static const unsigned int kSplice_load_tables[] = {
0xa9402849, // ldp x9, x10, [x2]
@@ -271,6 +287,7 @@ static const unsigned int kSplice_load_tables[] = {
0x4e21d863, // scvtf v3.4s, v3.4s
0x6e1c0622, // mov v2.s[3], v17.s[0]
0x4f909063, // fmul v3.4s, v3.4s, v16.s[0]
+ 0xd65f03c0, // return (synthetic)
};
static const unsigned int kSplice_load_8888[] = {
0xf9400048, // ldr x8, [x2]
@@ -293,6 +310,7 @@ static const unsigned int kSplice_load_8888[] = {
0x4f839021, // fmul v1.4s, v1.4s, v3.s[0]
0x4f839042, // fmul v2.4s, v2.4s, v3.s[0]
0x4f839243, // fmul v3.4s, v18.4s, v3.s[0]
+ 0xd65f03c0, // return (synthetic)
};
static const unsigned int kSplice_store_8888[] = {
0xbd400470, // ldr s16, [x3,#4]
@@ -313,6 +331,7 @@ static const unsigned int kSplice_store_8888[] = {
0x4f385610, // shl v16.4s, v16.4s, #24
0x4eb01e30, // orr v16.16b, v17.16b, v16.16b
0x3ca96910, // str q16, [x8,x9]
+ 0xd65f03c0, // return (synthetic)
};
static const unsigned int kSplice_load_f16[] = {
0xf9400048, // ldr x8, [x2]
@@ -322,6 +341,7 @@ static const unsigned int kSplice_load_f16[] = {
0x0e217a21, // fcvtl v1.4s, v17.4h
0x0e217a42, // fcvtl v2.4s, v18.4h
0x0e217a63, // fcvtl v3.4s, v19.4h
+ 0xd65f03c0, // return (synthetic)
};
static const unsigned int kSplice_store_f16[] = {
0xf9400048, // ldr x8, [x2]
@@ -331,6 +351,7 @@ static const unsigned int kSplice_store_f16[] = {
0x8b000d08, // add x8, x8, x0, lsl #3
0x0e216873, // fcvtn v19.4h, v3.4s
0x0c000510, // st4 {v16.4h-v19.4h}, [x8]
+ 0xd65f03c0, // return (synthetic)
};
static const unsigned int kSplice_matrix_3x4[] = {
0xaa0203e8, // mov x8, x2
@@ -358,24 +379,28 @@ static const unsigned int kSplice_matrix_3x4[] = {
0x4eb01e00, // mov v0.16b, v16.16b
0x4eb11e21, // mov v1.16b, v17.16b
0x4eb21e42, // mov v2.16b, v18.16b
+ 0xd65f03c0, // return (synthetic)
};
#elif defined(__ARM_NEON__)
static const unsigned int kSplice_inc_x[] = {
0xe2800002, // add r0, r0, #2
+ 0xe12fff1e, // return (synthetic)
};
static const unsigned int kSplice_clear[] = {
0xf2800010, // vmov.i32 d0, #0
0xf2801010, // vmov.i32 d1, #0
0xf2802010, // vmov.i32 d2, #0
0xf2803010, // vmov.i32 d3, #0
+ 0xe12fff1e, // return (synthetic)
};
static const unsigned int kSplice_plus_[] = {
0xf2000d04, // vadd.f32 d0, d0, d4
0xf2011d05, // vadd.f32 d1, d1, d5
0xf2022d06, // vadd.f32 d2, d2, d6
0xf2033d07, // vadd.f32 d3, d3, d7
+ 0xe12fff1e, // return (synthetic)
};
static const unsigned int kSplice_srcover[] = {
0xf4e30c9f, // vld1.32 {d16[]}, [r3 :32]
@@ -384,6 +409,7 @@ static const unsigned int kSplice_srcover[] = {
0xf2051c30, // vfma.f32 d1, d5, d16
0xf2062c30, // vfma.f32 d2, d6, d16
0xf2073c30, // vfma.f32 d3, d7, d16
+ 0xe12fff1e, // return (synthetic)
};
static const unsigned int kSplice_dstover[] = {
0xf4e30c9f, // vld1.32 {d16[]}, [r3 :32]
@@ -392,6 +418,7 @@ static const unsigned int kSplice_dstover[] = {
0xf2015c30, // vfma.f32 d5, d1, d16
0xf2026c30, // vfma.f32 d6, d2, d16
0xf2037c30, // vfma.f32 d7, d3, d16
+ 0xe12fff1e, // return (synthetic)
};
static const unsigned int kSplice_clamp_0[] = {
0xf2c00010, // vmov.i32 d16, #0
@@ -399,6 +426,7 @@ static const unsigned int kSplice_clamp_0[] = {
0xf2011f20, // vmax.f32 d1, d1, d16
0xf2022f20, // vmax.f32 d2, d2, d16
0xf2033f20, // vmax.f32 d3, d3, d16
+ 0xe12fff1e, // return (synthetic)
};
static const unsigned int kSplice_clamp_1[] = {
0xf4e30c9f, // vld1.32 {d16[]}, [r3 :32]
@@ -406,6 +434,7 @@ static const unsigned int kSplice_clamp_1[] = {
0xf2211f20, // vmin.f32 d1, d1, d16
0xf2222f20, // vmin.f32 d2, d2, d16
0xf2233f20, // vmin.f32 d3, d3, d16
+ 0xe12fff1e, // return (synthetic)
};
static const unsigned int kSplice_clamp_a[] = {
0xf4e30c9f, // vld1.32 {d16[]}, [r3 :32]
@@ -413,6 +442,7 @@ static const unsigned int kSplice_clamp_a[] = {
0xf2200f03, // vmin.f32 d0, d0, d3
0xf2211f03, // vmin.f32 d1, d1, d3
0xf2222f03, // vmin.f32 d2, d2, d3
+ 0xe12fff1e, // return (synthetic)
};
static const unsigned int kSplice_swap[] = {
0xeef00b43, // vmov.f64 d16, d3
@@ -427,23 +457,27 @@ static const unsigned int kSplice_swap[] = {
0xeeb05b62, // vmov.f64 d5, d18
0xeeb06b61, // vmov.f64 d6, d17
0xeeb07b60, // vmov.f64 d7, d16
+ 0xe12fff1e, // return (synthetic)
};
static const unsigned int kSplice_move_src_dst[] = {
0xeeb04b40, // vmov.f64 d4, d0
0xeeb05b41, // vmov.f64 d5, d1
0xeeb06b42, // vmov.f64 d6, d2
0xeeb07b43, // vmov.f64 d7, d3
+ 0xe12fff1e, // return (synthetic)
};
static const unsigned int kSplice_move_dst_src[] = {
0xeeb00b44, // vmov.f64 d0, d4
0xeeb01b45, // vmov.f64 d1, d5
0xeeb02b46, // vmov.f64 d2, d6
0xeeb03b47, // vmov.f64 d3, d7
+ 0xe12fff1e, // return (synthetic)
};
static const unsigned int kSplice_premul[] = {
0xf3000d13, // vmul.f32 d0, d0, d3
0xf3011d13, // vmul.f32 d1, d1, d3
0xf3022d13, // vmul.f32 d2, d2, d3
+ 0xe12fff1e, // return (synthetic)
};
static const unsigned int kSplice_unpremul[] = {
0xed2d8b04, // vpush {d8-d9}
@@ -457,6 +491,7 @@ static const unsigned int kSplice_unpremul[] = {
0xf3011d91, // vmul.f32 d1, d17, d1
0xf3012d92, // vmul.f32 d2, d17, d2
0xecbd8b04, // vpop {d8-d9}
+ 0xe12fff1e, // return (synthetic)
};
static const unsigned int kSplice_from_srgb[] = {
0xed2d8b02, // vpush {d8}
@@ -492,6 +527,7 @@ static const unsigned int kSplice_from_srgb[] = {
0xf31611b2, // vbsl d1, d22, d18
0xf31921b8, // vbsl d2, d25, d24
0xecbd8b02, // vpop {d8}
+ 0xe12fff1e, // return (synthetic)
};
static const unsigned int kSplice_to_srgb[] = {
0xed2d8b02, // vpush {d8}
@@ -559,6 +595,7 @@ static const unsigned int kSplice_to_srgb[] = {
0xf31111b3, // vbsl d1, d17, d19
0xf31221b4, // vbsl d2, d18, d20
0xecbd8b02, // vpop {d8}
+ 0xe12fff1e, // return (synthetic)
};
static const unsigned int kSplice_scale_u8[] = {
0xed2d8b02, // vpush {d8}
@@ -580,6 +617,7 @@ static const unsigned int kSplice_scale_u8[] = {
0xf3003d93, // vmul.f32 d3, d16, d3
0xe28dd008, // add sp, sp, #8
0xecbd8b02, // vpop {d8}
+ 0xe12fff1e, // return (synthetic)
};
static const unsigned int kSplice_load_tables[] = {
0xe92d41f0, // push {r4, r5, r6, r7, r8, lr}
@@ -619,6 +657,7 @@ static const unsigned int kSplice_load_tables[] = {
0xe0887105, // add r7, r8, r5, lsl #2
0xed972a00, // vldr s4, [r7]
0xe8bd41f0, // pop {r4, r5, r6, r7, r8, lr}
+ 0xe12fff1e, // return (synthetic)
};
static const unsigned int kSplice_load_8888[] = {
0xe92d4800, // push {fp, lr}
@@ -643,6 +682,7 @@ static const unsigned int kSplice_load_8888[] = {
0xf2a119c2, // vmul.f32 d1, d17, d2[0]
0xf2a029c2, // vmul.f32 d2, d16, d2[0]
0xe8bd4800, // pop {fp, lr}
+ 0xe12fff1e, // return (synthetic)
};
static const unsigned int kSplice_store_8888[] = {
0xe283c004, // add ip, r3, #4
@@ -668,6 +708,7 @@ static const unsigned int kSplice_store_8888[] = {
0xf26001b2, // vorr d16, d16, d18
0xf26001b1, // vorr d16, d16, d17
0xedcc0b00, // vstr d16, [ip]
+ 0xe12fff1e, // return (synthetic)
};
static const unsigned int kSplice_load_f16[] = {
0xed2d8b04, // vpush {d8-d9}
@@ -685,6 +726,7 @@ static const unsigned int kSplice_load_f16[] = {
0xeeb02b43, // vmov.f64 d2, d3
0xeeb03b49, // vmov.f64 d3, d9
0xecbd8b04, // vpop {d8-d9}
+ 0xe12fff1e, // return (synthetic)
};
static const unsigned int kSplice_store_f16[] = {
0xeef00b41, // vmov.f64 d16, d1
@@ -698,6 +740,7 @@ static const unsigned int kSplice_store_f16[] = {
0xf3f60622, // vcvt.f16.f32 d16, q9
0xe08cc180, // add ip, ip, r0, lsl #3
0xf44c084f, // vst2.16 {d16-d17}, [ip]
+ 0xe12fff1e, // return (synthetic)
};
static const unsigned int kSplice_matrix_3x4[] = {
0xe282c020, // add ip, r2, #32
@@ -735,24 +778,28 @@ static const unsigned int kSplice_matrix_3x4[] = {
0xf22101b1, // vorr d0, d17, d17
0xf22021b0, // vorr d2, d16, d16
0xf22211b2, // vorr d1, d18, d18
+ 0xe12fff1e, // return (synthetic)
};
#else
static const unsigned char kSplice_inc_x[] = {
0x48,0x83,0xc7,0x08, // add $0x8,%rdi
+ 0xc3, // return (synthetic)
};
static const unsigned char kSplice_clear[] = {
0xc5,0xfc,0x57,0xc0, // vxorps %ymm0,%ymm0,%ymm0
0xc5,0xf4,0x57,0xc9, // vxorps %ymm1,%ymm1,%ymm1
0xc5,0xec,0x57,0xd2, // vxorps %ymm2,%ymm2,%ymm2
0xc5,0xe4,0x57,0xdb, // vxorps %ymm3,%ymm3,%ymm3
+ 0xc3, // return (synthetic)
};
static const unsigned char kSplice_plus_[] = {
0xc5,0xfc,0x58,0xc4, // vaddps %ymm4,%ymm0,%ymm0
0xc5,0xf4,0x58,0xcd, // vaddps %ymm5,%ymm1,%ymm1
0xc5,0xec,0x58,0xd6, // vaddps %ymm6,%ymm2,%ymm2
0xc5,0xe4,0x58,0xdf, // vaddps %ymm7,%ymm3,%ymm3
+ 0xc3, // return (synthetic)
};
static const unsigned char kSplice_srcover[] = {
0xc4,0x62,0x7d,0x18,0x01, // vbroadcastss (%rcx),%ymm8
@@ -761,6 +808,7 @@ static const unsigned char kSplice_srcover[] = {
0xc4,0xc2,0x55,0xb8,0xc8, // vfmadd231ps %ymm8,%ymm5,%ymm1
0xc4,0xc2,0x4d,0xb8,0xd0, // vfmadd231ps %ymm8,%ymm6,%ymm2
0xc4,0xc2,0x45,0xb8,0xd8, // vfmadd231ps %ymm8,%ymm7,%ymm3
+ 0xc3, // return (synthetic)
};
static const unsigned char kSplice_dstover[] = {
0xc4,0x62,0x7d,0x18,0x01, // vbroadcastss (%rcx),%ymm8
@@ -769,6 +817,7 @@ static const unsigned char kSplice_dstover[] = {
0xc4,0xc2,0x75,0xb8,0xe8, // vfmadd231ps %ymm8,%ymm1,%ymm5
0xc4,0xc2,0x6d,0xb8,0xf0, // vfmadd231ps %ymm8,%ymm2,%ymm6
0xc4,0xc2,0x65,0xb8,0xf8, // vfmadd231ps %ymm8,%ymm3,%ymm7
+ 0xc3, // return (synthetic)
};
static const unsigned char kSplice_clamp_0[] = {
0xc4,0x41,0x3c,0x57,0xc0, // vxorps %ymm8,%ymm8,%ymm8
@@ -776,6 +825,7 @@ static const unsigned char kSplice_clamp_0[] = {
0xc4,0xc1,0x74,0x5f,0xc8, // vmaxps %ymm8,%ymm1,%ymm1
0xc4,0xc1,0x6c,0x5f,0xd0, // vmaxps %ymm8,%ymm2,%ymm2
0xc4,0xc1,0x64,0x5f,0xd8, // vmaxps %ymm8,%ymm3,%ymm3
+ 0xc3, // return (synthetic)
};
static const unsigned char kSplice_clamp_1[] = {
0xc4,0x62,0x7d,0x18,0x01, // vbroadcastss (%rcx),%ymm8
@@ -783,6 +833,7 @@ static const unsigned char kSplice_clamp_1[] = {
0xc4,0xc1,0x74,0x5d,0xc8, // vminps %ymm8,%ymm1,%ymm1
0xc4,0xc1,0x6c,0x5d,0xd0, // vminps %ymm8,%ymm2,%ymm2
0xc4,0xc1,0x64,0x5d,0xd8, // vminps %ymm8,%ymm3,%ymm3
+ 0xc3, // return (synthetic)
};
static const unsigned char kSplice_clamp_a[] = {
0xc4,0x62,0x7d,0x18,0x01, // vbroadcastss (%rcx),%ymm8
@@ -790,6 +841,7 @@ static const unsigned char kSplice_clamp_a[] = {
0xc5,0xfc,0x5d,0xc3, // vminps %ymm3,%ymm0,%ymm0
0xc5,0xf4,0x5d,0xcb, // vminps %ymm3,%ymm1,%ymm1
0xc5,0xec,0x5d,0xd3, // vminps %ymm3,%ymm2,%ymm2
+ 0xc3, // return (synthetic)
};
static const unsigned char kSplice_swap[] = {
0xc5,0x7c,0x28,0xc3, // vmovaps %ymm3,%ymm8
@@ -804,23 +856,27 @@ static const unsigned char kSplice_swap[] = {
0xc5,0x7c,0x29,0xd5, // vmovaps %ymm10,%ymm5
0xc5,0x7c,0x29,0xce, // vmovaps %ymm9,%ymm6
0xc5,0x7c,0x29,0xc7, // vmovaps %ymm8,%ymm7
+ 0xc3, // return (synthetic)
};
static const unsigned char kSplice_move_src_dst[] = {
0xc5,0xfc,0x28,0xe0, // vmovaps %ymm0,%ymm4
0xc5,0xfc,0x28,0xe9, // vmovaps %ymm1,%ymm5
0xc5,0xfc,0x28,0xf2, // vmovaps %ymm2,%ymm6
0xc5,0xfc,0x28,0xfb, // vmovaps %ymm3,%ymm7
+ 0xc3, // return (synthetic)
};
static const unsigned char kSplice_move_dst_src[] = {
0xc5,0xfc,0x28,0xc4, // vmovaps %ymm4,%ymm0
0xc5,0xfc,0x28,0xcd, // vmovaps %ymm5,%ymm1
0xc5,0xfc,0x28,0xd6, // vmovaps %ymm6,%ymm2
0xc5,0xfc,0x28,0xdf, // vmovaps %ymm7,%ymm3
+ 0xc3, // return (synthetic)
};
static const unsigned char kSplice_premul[] = {
0xc5,0xfc,0x59,0xc3, // vmulps %ymm3,%ymm0,%ymm0
0xc5,0xf4,0x59,0xcb, // vmulps %ymm3,%ymm1,%ymm1
0xc5,0xec,0x59,0xd3, // vmulps %ymm3,%ymm2,%ymm2
+ 0xc3, // return (synthetic)
};
static const unsigned char kSplice_unpremul[] = {
0xc4,0x41,0x3c,0x57,0xc0, // vxorps %ymm8,%ymm8,%ymm8
@@ -831,6 +887,7 @@ static const unsigned char kSplice_unpremul[] = {
0xc5,0xbc,0x59,0xc0, // vmulps %ymm0,%ymm8,%ymm0
0xc5,0xbc,0x59,0xc9, // vmulps %ymm1,%ymm8,%ymm1
0xc5,0xbc,0x59,0xd2, // vmulps %ymm2,%ymm8,%ymm2
+ 0xc3, // return (synthetic)
};
static const unsigned char kSplice_from_srgb[] = {
0xc4,0x62,0x7d,0x18,0x41,0x1c, // vbroadcastss 0x1c(%rcx),%ymm8
@@ -858,6 +915,7 @@ static const unsigned char kSplice_from_srgb[] = {
0xc4,0x42,0x35,0xa8,0xde, // vfmadd213ps %ymm14,%ymm9,%ymm11
0xc4,0xc1,0x6c,0xc2,0xd2,0x01, // vcmpltps %ymm10,%ymm2,%ymm2
0xc4,0xc3,0x25,0x4a,0xd0,0x20, // vblendvps %ymm2,%ymm8,%ymm11,%ymm2
+ 0xc3, // return (synthetic)
};
static const unsigned char kSplice_to_srgb[] = {
0xc5,0x7c,0x52,0xc0, // vrsqrtps %ymm0,%ymm8
@@ -893,6 +951,7 @@ static const unsigned char kSplice_to_srgb[] = {
0xc5,0x3c,0x59,0xc2, // vmulps %ymm2,%ymm8,%ymm8
0xc4,0xc1,0x6c,0xc2,0xd2,0x01, // vcmpltps %ymm10,%ymm2,%ymm2
0xc4,0xc3,0x35,0x4a,0xd0,0x20, // vblendvps %ymm2,%ymm8,%ymm9,%ymm2
+ 0xc3, // return (synthetic)
};
static const unsigned char kSplice_scale_u8[] = {
0x48,0x8b,0x02, // mov (%rdx),%rax
@@ -904,6 +963,7 @@ static const unsigned char kSplice_scale_u8[] = {
0xc5,0xbc,0x59,0xc9, // vmulps %ymm1,%ymm8,%ymm1
0xc5,0xbc,0x59,0xd2, // vmulps %ymm2,%ymm8,%ymm2
0xc5,0xbc,0x59,0xdb, // vmulps %ymm3,%ymm8,%ymm3
+ 0xc3, // return (synthetic)
};
static const unsigned char kSplice_load_tables[] = {
0x48,0x8b,0x02, // mov (%rdx),%rax
@@ -928,6 +988,7 @@ static const unsigned char kSplice_load_tables[] = {
0xc5,0xfc,0x5b,0xdb, // vcvtdq2ps %ymm3,%ymm3
0xc4,0x62,0x7d,0x18,0x41,0x08, // vbroadcastss 0x8(%rcx),%ymm8
0xc4,0xc1,0x64,0x59,0xd8, // vmulps %ymm8,%ymm3,%ymm3
+ 0xc3, // return (synthetic)
};
static const unsigned char kSplice_load_8888[] = {
0x48,0x8b,0x02, // mov (%rdx),%rax
@@ -948,6 +1009,7 @@ static const unsigned char kSplice_load_8888[] = {
0xc5,0xe5,0x72,0xd3,0x18, // vpsrld $0x18,%ymm3,%ymm3
0xc5,0xfc,0x5b,0xdb, // vcvtdq2ps %ymm3,%ymm3
0xc4,0xc1,0x64,0x59,0xd8, // vmulps %ymm8,%ymm3,%ymm3
+ 0xc3, // return (synthetic)
};
static const unsigned char kSplice_store_8888[] = {
0x48,0x8b,0x02, // mov (%rdx),%rax
@@ -967,6 +1029,7 @@ static const unsigned char kSplice_store_8888[] = {
0xc4,0x41,0x2d,0xeb,0xc0, // vpor %ymm8,%ymm10,%ymm8
0xc4,0x41,0x35,0xeb,0xc0, // vpor %ymm8,%ymm9,%ymm8
0xc5,0x7e,0x7f,0x04,0xb8, // vmovdqu %ymm8,(%rax,%rdi,4)
+ 0xc3, // return (synthetic)
};
static const unsigned char kSplice_load_f16[] = {
0x48,0x8b,0x02, // mov (%rdx),%rax
@@ -990,6 +1053,7 @@ static const unsigned char kSplice_load_f16[] = {
0xc4,0xe2,0x7d,0x13,0xd2, // vcvtph2ps %xmm2,%ymm2
0xc4,0xc1,0x39,0x6d,0xda, // vpunpckhqdq %xmm10,%xmm8,%xmm3
0xc4,0xe2,0x7d,0x13,0xdb, // vcvtph2ps %xmm3,%ymm3
+ 0xc3, // return (synthetic)
};
static const unsigned char kSplice_store_f16[] = {
0x48,0x8b,0x02, // mov (%rdx),%rax
@@ -1009,6 +1073,7 @@ static const unsigned char kSplice_store_f16[] = {
0xc5,0x7a,0x7f,0x4c,0xf8,0x20, // vmovdqu %xmm9,0x20(%rax,%rdi,8)
0xc4,0x41,0x39,0x6a,0xc2, // vpunpckhdq %xmm10,%xmm8,%xmm8
0xc5,0x7a,0x7f,0x44,0xf8,0x30, // vmovdqu %xmm8,0x30(%rax,%rdi,8)
+ 0xc3, // return (synthetic)
};
static const unsigned char kSplice_matrix_3x4[] = {
0xc4,0x62,0x7d,0x18,0x0a, // vbroadcastss (%rdx),%ymm9
@@ -1035,6 +1100,7 @@ static const unsigned char kSplice_matrix_3x4[] = {
0xc5,0x7c,0x29,0xc0, // vmovaps %ymm8,%ymm0
0xc5,0x7c,0x29,0xc9, // vmovaps %ymm9,%ymm1
0xc5,0x7c,0x29,0xd2, // vmovaps %ymm10,%ymm2
+ 0xc3, // return (synthetic)
};
#endif
diff --git a/src/splicer/build_stages.py b/src/splicer/build_stages.py
index e301494f2f..900b47ff6d 100755
--- a/src/splicer/build_stages.py
+++ b/src/splicer/build_stages.py
@@ -40,7 +40,7 @@ subprocess.check_call(['clang++'] + cflags + armv7 +
['-c', 'src/splicer/SkSplicer_stages.cpp'] +
['-o', 'armv7.o'])
-def parse_object_file(dst, dot_o, array_type, done, target=None):
+def parse_object_file(dst, dot_o, array_type, jump, ret, target=None):
cmd = [ objdump, '-d', dot_o]
if target:
cmd += ['--target', target]
@@ -68,13 +68,18 @@ def parse_object_file(dst, dot_o, array_type, done, target=None):
for arg in args:
assert 'rip' not in arg # TODO: detect on aarch64 too
- if code == done:
- print >>dst,'};'
- continue
+ # At the end of every stage function there's a jump to next().
+ # We replace that with a ret to make these stages work with an interpreter.
+ if code == jump:
+ code = ret
+ inst = 'return'
+ args = '(synthetic)'
hexed = ''.join('0x'+x+',' for x in code.split(' '))
print >>dst,' ' + hexed + ' '*(44-len(hexed)) + \
'// ' + inst + ' '*(14-len(inst)) + args
+ if code == ret:
+ print >>dst,'};'
with open('src/splicer/SkSplicer_generated.h', 'w') as f:
print >>f,'''/*
@@ -92,11 +97,11 @@ with open('src/splicer/SkSplicer_generated.h', 'w') as f:
#if defined(__aarch64__)
'''
- parse_object_file(f, 'aarch64.o', 'unsigned int', '14000000')
+ parse_object_file(f, 'aarch64.o', 'unsigned int', '14000000', 'd65f03c0')
print >>f,'\n#elif defined(__ARM_NEON__)\n'
- parse_object_file(f, 'armv7.o', 'unsigned int', 'eafffffe',
+ parse_object_file(f, 'armv7.o', 'unsigned int', 'eafffffe', 'e12fff1e',
target='elf32-littlearm')
print >>f,'\n#else\n'
- parse_object_file(f, 'hsw.o', 'unsigned char', 'e9 00 00 00 00')
+ parse_object_file(f, 'hsw.o', 'unsigned char', 'e9 00 00 00 00', 'c3')
print >>f,'\n#endif\n'
print >>f,'#endif//SkSplicer_generated_DEFINED'