aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/splicer/SkSplicer_generated.h
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-01-09 17:21:32 -0500
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-01-10 16:15:00 +0000
commit8e619a2b4eb31753e6fcb4a9ec494d31ace755da (patch)
tree366015a3ecea77d92fbd62477e1afc0be48166d6 /src/splicer/SkSplicer_generated.h
parent1e74cad9b4ed8079433d4e62ab3198d97436f5ec (diff)
SkSplicer: start on arm64
Seems to be working. The jump to loop_start might be a little off, but not by much. Correctness is really still a big TODO. $ adb shell 'cd /data/local/tmp; ./monobench SkRasterPipeline 200' SkRasterPipeline_… 200 …f16_compile 1x …f16_run 1.42x …srgb_compile 2.21x …srgb_run 2.59x⏎ Change-Id: I0e1acc6404cf3ce8084d9ef8011cbe0b5f1fd6e3 Reviewed-on: https://skia-review.googlesource.com/6811 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src/splicer/SkSplicer_generated.h')
-rw-r--r--src/splicer/SkSplicer_generated.h276
1 files changed, 276 insertions, 0 deletions
diff --git a/src/splicer/SkSplicer_generated.h b/src/splicer/SkSplicer_generated.h
index ab045839d6..f5f07a2101 100644
--- a/src/splicer/SkSplicer_generated.h
+++ b/src/splicer/SkSplicer_generated.h
@@ -11,6 +11,279 @@
// This file is generated semi-automatically with this command:
// $ src/splicer/build_stages.py > src/splicer/SkSplicer_generated.h
+#if defined(__aarch64__)
+
+static const unsigned int kSplice_clear[] = {
+ 0x6f00e400, // movi v0.2d, #0x0
+ 0x6f00e401, // movi v1.2d, #0x0
+ 0x6f00e402, // movi v2.2d, #0x0
+ 0x6f00e403, // movi v3.2d, #0x0
+};
+static const unsigned int kSplice_plus[] = {
+ 0x4e24d400, // fadd v0.4s, v0.4s, v4.4s
+ 0x4e25d421, // fadd v1.4s, v1.4s, v5.4s
+ 0x4e26d442, // fadd v2.4s, v2.4s, v6.4s
+ 0x4e27d463, // fadd v3.4s, v3.4s, v7.4s
+};
+static const unsigned int kSplice_srcover[] = {
+ 0x91001068, // add x8, x3, #0x4
+ 0x4d40c910, // ld1r {v16.4s}, [x8]
+ 0x4ea3d610, // fsub v16.4s, v16.4s, v3.4s
+ 0x4e24ce00, // fmla v0.4s, v16.4s, v4.4s
+ 0x4e25ce01, // fmla v1.4s, v16.4s, v5.4s
+ 0x4e26ce02, // fmla v2.4s, v16.4s, v6.4s
+ 0x4e26ce03, // fmla v3.4s, v16.4s, v6.4s
+};
+static const unsigned int kSplice_dstover[] = {
+ 0x91001068, // add x8, x3, #0x4
+ 0x4d40c910, // ld1r {v16.4s}, [x8]
+ 0x4ea7d610, // fsub v16.4s, v16.4s, v7.4s
+ 0x4e20ce04, // fmla v4.4s, v16.4s, v0.4s
+ 0x4e21ce05, // fmla v5.4s, v16.4s, v1.4s
+ 0x4e22ce06, // fmla v6.4s, v16.4s, v2.4s
+ 0x4e22ce07, // fmla v7.4s, v16.4s, v2.4s
+};
+static const unsigned int kSplice_clamp_0[] = {
+ 0x6f00e410, // movi v16.2d, #0x0
+ 0x4e30f400, // fmax v0.4s, v0.4s, v16.4s
+ 0x4e30f421, // fmax v1.4s, v1.4s, v16.4s
+ 0x4e30f442, // fmax v2.4s, v2.4s, v16.4s
+ 0x4e30f463, // fmax v3.4s, v3.4s, v16.4s
+};
+static const unsigned int kSplice_clamp_1[] = {
+ 0x91001068, // add x8, x3, #0x4
+ 0x4d40c910, // ld1r {v16.4s}, [x8]
+ 0x4eb0f400, // fmin v0.4s, v0.4s, v16.4s
+ 0x4eb0f421, // fmin v1.4s, v1.4s, v16.4s
+ 0x4eb0f442, // fmin v2.4s, v2.4s, v16.4s
+ 0x4eb0f463, // fmin v3.4s, v3.4s, v16.4s
+};
+static const unsigned int kSplice_clamp_a[] = {
+ 0x91001068, // add x8, x3, #0x4
+ 0x4d40c910, // ld1r {v16.4s}, [x8]
+ 0x4eb0f463, // fmin v3.4s, v3.4s, v16.4s
+ 0x4ea3f400, // fmin v0.4s, v0.4s, v3.4s
+ 0x4ea3f421, // fmin v1.4s, v1.4s, v3.4s
+ 0x4ea3f442, // fmin v2.4s, v2.4s, v3.4s
+};
+static const unsigned int kSplice_swap[] = {
+ 0x4ea31c70, // mov v16.16b, v3.16b
+ 0x4ea21c51, // mov v17.16b, v2.16b
+ 0x4ea11c32, // mov v18.16b, v1.16b
+ 0x4ea01c13, // mov v19.16b, v0.16b
+ 0x4ea41c80, // mov v0.16b, v4.16b
+ 0x4ea51ca1, // mov v1.16b, v5.16b
+ 0x4ea61cc2, // mov v2.16b, v6.16b
+ 0x4ea71ce3, // mov v3.16b, v7.16b
+ 0x4eb31e64, // mov v4.16b, v19.16b
+ 0x4eb21e45, // mov v5.16b, v18.16b
+ 0x4eb11e26, // mov v6.16b, v17.16b
+ 0x4eb01e07, // mov v7.16b, v16.16b
+};
+static const unsigned int kSplice_move_src_dst[] = {
+ 0x4ea01c04, // mov v4.16b, v0.16b
+ 0x4ea11c25, // mov v5.16b, v1.16b
+ 0x4ea21c46, // mov v6.16b, v2.16b
+ 0x4ea31c67, // mov v7.16b, v3.16b
+};
+static const unsigned int kSplice_move_dst_src[] = {
+ 0x4ea41c80, // mov v0.16b, v4.16b
+ 0x4ea51ca1, // mov v1.16b, v5.16b
+ 0x4ea61cc2, // mov v2.16b, v6.16b
+ 0x4ea71ce3, // mov v3.16b, v7.16b
+};
+static const unsigned int kSplice_premul[] = {
+ 0x6e23dc00, // fmul v0.4s, v0.4s, v3.4s
+ 0x6e23dc21, // fmul v1.4s, v1.4s, v3.4s
+ 0x6e23dc42, // fmul v2.4s, v2.4s, v3.4s
+};
+static const unsigned int kSplice_unpremul[] = {
+ 0x91001068, // add x8, x3, #0x4
+ 0x4d40c910, // ld1r {v16.4s}, [x8]
+ 0x4ea0d871, // fcmeq v17.4s, v3.4s, #0.0
+ 0x6e23fe10, // fdiv v16.4s, v16.4s, v3.4s
+ 0x4e711e10, // bic v16.16b, v16.16b, v17.16b
+ 0x6e20de00, // fmul v0.4s, v16.4s, v0.4s
+ 0x6e21de01, // fmul v1.4s, v16.4s, v1.4s
+ 0x6e22de02, // fmul v2.4s, v16.4s, v2.4s
+};
+static const unsigned int kSplice_from_srgb[] = {
+ 0x91005068, // add x8, x3, #0x14
+ 0x4d40c910, // ld1r {v16.4s}, [x8]
+ 0x91004068, // add x8, x3, #0x10
+ 0x4d40c911, // ld1r {v17.4s}, [x8]
+ 0x2d434c72, // ldp s18, s19, [x3,#24]
+ 0x6e22dc54, // fmul v20.4s, v2.4s, v2.4s
+ 0x4eb01e15, // mov v21.16b, v16.16b
+ 0x4eb01e17, // mov v23.16b, v16.16b
+ 0x4f921050, // fmla v16.4s, v2.4s, v18.s[0]
+ 0x4eb11e36, // mov v22.16b, v17.16b
+ 0x4eb11e38, // mov v24.16b, v17.16b
+ 0x4e34ce11, // fmla v17.4s, v16.4s, v20.4s
+ 0x6e20dc10, // fmul v16.4s, v0.4s, v0.4s
+ 0x91008068, // add x8, x3, #0x20
+ 0x4f921015, // fmla v21.4s, v0.4s, v18.s[0]
+ 0x4e30ceb6, // fmla v22.4s, v21.4s, v16.4s
+ 0x4d40c910, // ld1r {v16.4s}, [x8]
+ 0x6e21dc34, // fmul v20.4s, v1.4s, v1.4s
+ 0x4f921037, // fmla v23.4s, v1.4s, v18.s[0]
+ 0x4f939015, // fmul v21.4s, v0.4s, v19.s[0]
+ 0x4f939032, // fmul v18.4s, v1.4s, v19.s[0]
+ 0x4f939053, // fmul v19.4s, v2.4s, v19.s[0]
+ 0x6ea0e600, // fcmgt v0.4s, v16.4s, v0.4s
+ 0x6ea1e601, // fcmgt v1.4s, v16.4s, v1.4s
+ 0x6ea2e602, // fcmgt v2.4s, v16.4s, v2.4s
+ 0x4e34cef8, // fmla v24.4s, v23.4s, v20.4s
+ 0x6e761ea0, // bsl v0.16b, v21.16b, v22.16b
+ 0x6e781e41, // bsl v1.16b, v18.16b, v24.16b
+ 0x6e711e62, // bsl v2.16b, v19.16b, v17.16b
+};
+static const unsigned int kSplice_to_srgb[] = {
+ 0x6ea1d810, // frsqrte v16.4s, v0.4s
+ 0x6ea1d835, // frsqrte v21.4s, v1.4s
+ 0x6e30de17, // fmul v23.4s, v16.4s, v16.4s
+ 0x6ea1d856, // frsqrte v22.4s, v2.4s
+ 0x6e35deb9, // fmul v25.4s, v21.4s, v21.4s
+ 0x4eb7fc17, // frsqrts v23.4s, v0.4s, v23.4s
+ 0x9100c068, // add x8, x3, #0x30
+ 0x6e36deda, // fmul v26.4s, v22.4s, v22.4s
+ 0x4eb9fc39, // frsqrts v25.4s, v1.4s, v25.4s
+ 0x6e37de10, // fmul v16.4s, v16.4s, v23.4s
+ 0x2d44c871, // ldp s17, s18, [x3,#36]
+ 0x4d40c914, // ld1r {v20.4s}, [x8]
+ 0x4ebafc5a, // frsqrts v26.4s, v2.4s, v26.4s
+ 0x6e39deb5, // fmul v21.4s, v21.4s, v25.4s
+ 0x4ea1da17, // frecpe v23.4s, v16.4s
+ 0xbd402c73, // ldr s19, [x3,#44]
+ 0x9100d068, // add x8, x3, #0x34
+ 0x6e3aded6, // fmul v22.4s, v22.4s, v26.4s
+ 0x4ea1dabb, // frecpe v27.4s, v21.4s
+ 0x4e37fe1d, // frecps v29.4s, v16.4s, v23.4s
+ 0x4d40c918, // ld1r {v24.4s}, [x8]
+ 0x4ea1dadc, // frecpe v28.4s, v22.4s
+ 0x6e3ddef7, // fmul v23.4s, v23.4s, v29.4s
+ 0x4e3bfebd, // frecps v29.4s, v21.4s, v27.4s
+ 0x6e3ddf7b, // fmul v27.4s, v27.4s, v29.4s
+ 0x4e3cfedd, // frecps v29.4s, v22.4s, v28.4s
+ 0x6e3ddf9c, // fmul v28.4s, v28.4s, v29.4s
+ 0x4eb41e9d, // mov v29.16b, v20.16b
+ 0x6ea1da19, // frsqrte v25.4s, v16.4s
+ 0x4f9312fd, // fmla v29.4s, v23.4s, v19.s[0]
+ 0x4eb41e97, // mov v23.16b, v20.16b
+ 0x4f91901a, // fmul v26.4s, v0.4s, v17.s[0]
+ 0x4f931377, // fmla v23.4s, v27.4s, v19.s[0]
+ 0x6ea1dabb, // frsqrte v27.4s, v21.4s
+ 0x4f931394, // fmla v20.4s, v28.4s, v19.s[0]
+ 0x4f919033, // fmul v19.4s, v1.4s, v17.s[0]
+ 0x4f919051, // fmul v17.4s, v2.4s, v17.s[0]
+ 0x6ea0e700, // fcmgt v0.4s, v24.4s, v0.4s
+ 0x6ea1e701, // fcmgt v1.4s, v24.4s, v1.4s
+ 0x6ea2e702, // fcmgt v2.4s, v24.4s, v2.4s
+ 0x6e39df38, // fmul v24.4s, v25.4s, v25.4s
+ 0x6ea1dadc, // frsqrte v28.4s, v22.4s
+ 0x4eb8fe10, // frsqrts v16.4s, v16.4s, v24.4s
+ 0x6e3bdf78, // fmul v24.4s, v27.4s, v27.4s
+ 0x4eb8feb5, // frsqrts v21.4s, v21.4s, v24.4s
+ 0x6e3cdf98, // fmul v24.4s, v28.4s, v28.4s
+ 0x91001068, // add x8, x3, #0x4
+ 0x4eb8fed6, // frsqrts v22.4s, v22.4s, v24.4s
+ 0x4d40c918, // ld1r {v24.4s}, [x8]
+ 0x6e30df30, // fmul v16.4s, v25.4s, v16.4s
+ 0x6e35df75, // fmul v21.4s, v27.4s, v21.4s
+ 0x6e36df96, // fmul v22.4s, v28.4s, v22.4s
+ 0x4f92121d, // fmla v29.4s, v16.4s, v18.s[0]
+ 0x4f9212b7, // fmla v23.4s, v21.4s, v18.s[0]
+ 0x4f9212d4, // fmla v20.4s, v22.4s, v18.s[0]
+ 0x4ebdf710, // fmin v16.4s, v24.4s, v29.4s
+ 0x4eb7f712, // fmin v18.4s, v24.4s, v23.4s
+ 0x4eb4f714, // fmin v20.4s, v24.4s, v20.4s
+ 0x6e701f40, // bsl v0.16b, v26.16b, v16.16b
+ 0x6e721e61, // bsl v1.16b, v19.16b, v18.16b
+ 0x6e741e22, // bsl v2.16b, v17.16b, v20.16b
+};
+static const unsigned int kSplice_scale_u8[] = {
+ 0xf9400048, // ldr x8, [x2]
+ 0xbd400c71, // ldr s17, [x3,#12]
+ 0x8b000108, // add x8, x8, x0
+ 0x39400109, // ldrb w9, [x8]
+ 0x3940050a, // ldrb w10, [x8,#1]
+ 0x4e021d30, // mov v16.h[0], w9
+ 0x39400909, // ldrb w9, [x8,#2]
+ 0x39400d08, // ldrb w8, [x8,#3]
+ 0x4e061d50, // mov v16.h[1], w10
+ 0x4e0a1d30, // mov v16.h[2], w9
+ 0x4e0e1d10, // mov v16.h[3], w8
+ 0x2f07b7f0, // bic v16.4h, #0xff, lsl #8
+ 0x2f10a610, // uxtl v16.4s, v16.4h
+ 0x6e21da10, // ucvtf v16.4s, v16.4s
+ 0x4f919210, // fmul v16.4s, v16.4s, v17.s[0]
+ 0x6e20de00, // fmul v0.4s, v16.4s, v0.4s
+ 0x6e21de01, // fmul v1.4s, v16.4s, v1.4s
+ 0x6e22de02, // fmul v2.4s, v16.4s, v2.4s
+ 0x6e23de03, // fmul v3.4s, v16.4s, v3.4s
+};
+static const unsigned int kSplice_load_8888[] = {
+ 0xf9400048, // ldr x8, [x2]
+ 0xd37ef409, // lsl x9, x0, #2
+ 0x4d40c860, // ld1r {v0.4s}, [x3]
+ 0xbd400c63, // ldr s3, [x3,#12]
+ 0x3ce96901, // ldr q1, [x8,x9]
+ 0x4e211c02, // and v2.16b, v0.16b, v1.16b
+ 0x6f380430, // ushr v16.4s, v1.4s, #8
+ 0x6f300431, // ushr v17.4s, v1.4s, #16
+ 0x6f280421, // ushr v1.4s, v1.4s, #24
+ 0x4e21d842, // scvtf v2.4s, v2.4s
+ 0x4e301c10, // and v16.16b, v0.16b, v16.16b
+ 0x4e311c11, // and v17.16b, v0.16b, v17.16b
+ 0x4e21d832, // scvtf v18.4s, v1.4s
+ 0x4f839040, // fmul v0.4s, v2.4s, v3.s[0]
+ 0x4e21da01, // scvtf v1.4s, v16.4s
+ 0x4e21da22, // scvtf v2.4s, v17.4s
+ 0x4f839021, // fmul v1.4s, v1.4s, v3.s[0]
+ 0x4f839042, // fmul v2.4s, v2.4s, v3.s[0]
+ 0x4f839243, // fmul v3.4s, v18.4s, v3.s[0]
+};
+static const unsigned int kSplice_store_8888[] = {
+ 0xbd400870, // ldr s16, [x3,#8]
+ 0xf9400048, // ldr x8, [x2]
+ 0xd37ef409, // lsl x9, x0, #2
+ 0x4f909032, // fmul v18.4s, v1.4s, v16.s[0]
+ 0x4f909011, // fmul v17.4s, v0.4s, v16.s[0]
+ 0x6e21aa52, // fcvtnu v18.4s, v18.4s
+ 0x6e21aa31, // fcvtnu v17.4s, v17.4s
+ 0x4f285652, // shl v18.4s, v18.4s, #8
+ 0x4eb11e51, // orr v17.16b, v18.16b, v17.16b
+ 0x4f909052, // fmul v18.4s, v2.4s, v16.s[0]
+ 0x4f909070, // fmul v16.4s, v3.4s, v16.s[0]
+ 0x6e21aa52, // fcvtnu v18.4s, v18.4s
+ 0x6e21aa10, // fcvtnu v16.4s, v16.4s
+ 0x4f305652, // shl v18.4s, v18.4s, #16
+ 0x4eb21e31, // orr v17.16b, v17.16b, v18.16b
+ 0x4f385610, // shl v16.4s, v16.4s, #24
+ 0x4eb01e30, // orr v16.16b, v17.16b, v16.16b
+ 0x3ca96910, // str q16, [x8,x9]
+};
+static const unsigned int kSplice_load_f16[] = {
+ 0xf9400048, // ldr x8, [x2]
+ 0x8b000d08, // add x8, x8, x0, lsl #3
+ 0x0c400510, // ld4 {v16.4h-v19.4h}, [x8]
+ 0x0e217a00, // fcvtl v0.4s, v16.4h
+ 0x0e217a21, // fcvtl v1.4s, v17.4h
+ 0x0e217a42, // fcvtl v2.4s, v18.4h
+ 0x0e217a63, // fcvtl v3.4s, v19.4h
+};
+static const unsigned int kSplice_store_f16[] = {
+ 0xf9400048, // ldr x8, [x2]
+ 0x0e216810, // fcvtn v16.4h, v0.4s
+ 0x0e216831, // fcvtn v17.4h, v1.4s
+ 0x0e216852, // fcvtn v18.4h, v2.4s
+ 0x8b000d08, // add x8, x8, x0, lsl #3
+ 0x0e216873, // fcvtn v19.4h, v3.4s
+ 0x0c000510, // st4 {v16.4h-v19.4h}, [x8]
+};
+
+#else
+
static const unsigned char kSplice_clear[] = {
0xc5,0xfc,0x57,0xc0, // vxorps %ymm0, %ymm0, %ymm0
0xc5,0xf4,0x57,0xc9, // vxorps %ymm1, %ymm1, %ymm1
@@ -255,4 +528,7 @@ static const unsigned char kSplice_store_f16[] = {
0xc4,0x41,0x39,0x6a,0xc2, // vpunpckhdq %xmm10, %xmm8, %xmm8
0xc5,0x7a,0x7f,0x44,0xf8,0x30, // vmovdqu %xmm8, 0x30(%rax,%rdi,8)
};
+
+#endif
+
#endif//SkSplicer_generated_DEFINED