aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--bench/SkRasterPipelineBench.cpp21
-rw-r--r--src/jumper/SkJumper_generated.S2709
-rw-r--r--src/jumper/SkJumper_generated_win.S1916
-rw-r--r--src/jumper/SkJumper_stages.cpp30
-rw-r--r--tests/ParametricStageTest.cpp33
5 files changed, 3137 insertions, 1572 deletions
diff --git a/bench/SkRasterPipelineBench.cpp b/bench/SkRasterPipelineBench.cpp
index 0ae48ee15d..fa2df4b3e1 100644
--- a/bench/SkRasterPipelineBench.cpp
+++ b/bench/SkRasterPipelineBench.cpp
@@ -90,3 +90,24 @@ public:
}
};
DEF_BENCH( return (new SkRasterPipelineLegacyBench); )
+
+class SkRasterPipeline_2dot2 : public Benchmark {
+public:
+ bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }
+ const char* onGetName() override {
+ return "SkRasterPipeline_2dot2";
+ }
+
+ void onDraw(int loops, SkCanvas*) override {
+ SkColor4f c = { 1.0f, 1.0f, 1.0f, 1.0f };
+ SkRasterPipeline p;
+ p.append(SkRasterPipeline::constant_color, &c);
+ p.append(SkRasterPipeline::from_2dot2);
+ p.append(SkRasterPipeline::to_2dot2);
+
+ while (loops --> 0) {
+ p.run(0,N);
+ }
+ }
+};
+DEF_BENCH( return (new SkRasterPipeline_2dot2); )
diff --git a/src/jumper/SkJumper_generated.S b/src/jumper/SkJumper_generated.S
index 6b73406fc5..d9540e94ea 100644
--- a/src/jumper/SkJumper_generated.S
+++ b/src/jumper/SkJumper_generated.S
@@ -978,205 +978,186 @@ HIDDEN _sk_from_2dot2_aarch64
.globl _sk_from_2dot2_aarch64
FUNCTION(_sk_from_2dot2_aarch64)
_sk_from_2dot2_aarch64:
- .long 0x6ea1d810 // frsqrte v16.4s, v0.4s
- .long 0x6ea1d832 // frsqrte v18.4s, v1.4s
- .long 0x6e30de15 // fmul v21.4s, v16.4s, v16.4s
- .long 0x6e20dc11 // fmul v17.4s, v0.4s, v0.4s
- .long 0x6ea1d854 // frsqrte v20.4s, v2.4s
- .long 0x6e32de56 // fmul v22.4s, v18.4s, v18.4s
- .long 0x4eb5fc00 // frsqrts v0.4s, v0.4s, v21.4s
- .long 0x6e21dc33 // fmul v19.4s, v1.4s, v1.4s
- .long 0x6e34de97 // fmul v23.4s, v20.4s, v20.4s
- .long 0x4eb6fc21 // frsqrts v1.4s, v1.4s, v22.4s
- .long 0x6e20de00 // fmul v0.4s, v16.4s, v0.4s
- .long 0x4eb7fc55 // frsqrts v21.4s, v2.4s, v23.4s
- .long 0x6e21de41 // fmul v1.4s, v18.4s, v1.4s
- .long 0x6ea1d812 // frsqrte v18.4s, v0.4s
- .long 0x6e35de90 // fmul v16.4s, v20.4s, v21.4s
- .long 0x6ea1d834 // frsqrte v20.4s, v1.4s
- .long 0x6e32de56 // fmul v22.4s, v18.4s, v18.4s
- .long 0x6ea1da15 // frsqrte v21.4s, v16.4s
- .long 0x6e34de97 // fmul v23.4s, v20.4s, v20.4s
- .long 0x4eb6fc00 // frsqrts v0.4s, v0.4s, v22.4s
- .long 0x6e35deb6 // fmul v22.4s, v21.4s, v21.4s
- .long 0x4eb7fc21 // frsqrts v1.4s, v1.4s, v23.4s
- .long 0x6e20de40 // fmul v0.4s, v18.4s, v0.4s
- .long 0x4eb6fe10 // frsqrts v16.4s, v16.4s, v22.4s
- .long 0x6e21de81 // fmul v1.4s, v20.4s, v1.4s
- .long 0x6ea1d812 // frsqrte v18.4s, v0.4s
- .long 0x6e30deb0 // fmul v16.4s, v21.4s, v16.4s
- .long 0x6ea1d834 // frsqrte v20.4s, v1.4s
- .long 0x6e32de56 // fmul v22.4s, v18.4s, v18.4s
- .long 0x6ea1da15 // frsqrte v21.4s, v16.4s
- .long 0x6e34de97 // fmul v23.4s, v20.4s, v20.4s
- .long 0x4eb6fc00 // frsqrts v0.4s, v0.4s, v22.4s
- .long 0x6e35deb6 // fmul v22.4s, v21.4s, v21.4s
- .long 0x4eb7fc21 // frsqrts v1.4s, v1.4s, v23.4s
- .long 0x6e20de40 // fmul v0.4s, v18.4s, v0.4s
- .long 0x4eb6fe10 // frsqrts v16.4s, v16.4s, v22.4s
- .long 0x6e21de81 // fmul v1.4s, v20.4s, v1.4s
- .long 0x6ea1d812 // frsqrte v18.4s, v0.4s
- .long 0x6e30deb0 // fmul v16.4s, v21.4s, v16.4s
- .long 0x6ea1d834 // frsqrte v20.4s, v1.4s
- .long 0x6e32de56 // fmul v22.4s, v18.4s, v18.4s
- .long 0x6ea1da15 // frsqrte v21.4s, v16.4s
- .long 0x6e34de97 // fmul v23.4s, v20.4s, v20.4s
- .long 0x4eb6fc00 // frsqrts v0.4s, v0.4s, v22.4s
- .long 0x6e35deb6 // fmul v22.4s, v21.4s, v21.4s
- .long 0x4eb7fc21 // frsqrts v1.4s, v1.4s, v23.4s
- .long 0x6e20de40 // fmul v0.4s, v18.4s, v0.4s
- .long 0x4eb6fe10 // frsqrts v16.4s, v16.4s, v22.4s
- .long 0x6e21de81 // fmul v1.4s, v20.4s, v1.4s
- .long 0x6ea1d812 // frsqrte v18.4s, v0.4s
- .long 0x6e20dc14 // fmul v20.4s, v0.4s, v0.4s
- .long 0x6e30deb0 // fmul v16.4s, v21.4s, v16.4s
- .long 0x6ea1d835 // frsqrte v21.4s, v1.4s
- .long 0x6e21dc36 // fmul v22.4s, v1.4s, v1.4s
- .long 0x6e32de57 // fmul v23.4s, v18.4s, v18.4s
- .long 0x6e34dc14 // fmul v20.4s, v0.4s, v20.4s
- .long 0x4eb7fc00 // frsqrts v0.4s, v0.4s, v23.4s
- .long 0x6ea1da17 // frsqrte v23.4s, v16.4s
- .long 0x6e34de31 // fmul v17.4s, v17.4s, v20.4s
- .long 0x6e35deb4 // fmul v20.4s, v21.4s, v21.4s
- .long 0x6e36dc36 // fmul v22.4s, v1.4s, v22.4s
- .long 0x4eb4fc21 // frsqrts v1.4s, v1.4s, v20.4s
- .long 0x6e30de14 // fmul v20.4s, v16.4s, v16.4s
- .long 0x6e36de73 // fmul v19.4s, v19.4s, v22.4s
- .long 0x6e37def6 // fmul v22.4s, v23.4s, v23.4s
- .long 0x6e20de40 // fmul v0.4s, v18.4s, v0.4s
- .long 0x6e34de14 // fmul v20.4s, v16.4s, v20.4s
- .long 0x4eb6fe10 // frsqrts v16.4s, v16.4s, v22.4s
- .long 0x6e22dc42 // fmul v2.4s, v2.4s, v2.4s
- .long 0x6e21dea1 // fmul v1.4s, v21.4s, v1.4s
- .long 0x6ea1d812 // frsqrte v18.4s, v0.4s
- .long 0x6e34dc42 // fmul v2.4s, v2.4s, v20.4s
- .long 0x6e30def0 // fmul v16.4s, v23.4s, v16.4s
- .long 0x6ea1d834 // frsqrte v20.4s, v1.4s
- .long 0x6e32de56 // fmul v22.4s, v18.4s, v18.4s
- .long 0x6ea1da15 // frsqrte v21.4s, v16.4s
- .long 0x4eb6fc00 // frsqrts v0.4s, v0.4s, v22.4s
- .long 0x6e34de96 // fmul v22.4s, v20.4s, v20.4s
- .long 0x4eb6fc21 // frsqrts v1.4s, v1.4s, v22.4s
- .long 0x6e35deb6 // fmul v22.4s, v21.4s, v21.4s
- .long 0x4eb6fe10 // frsqrts v16.4s, v16.4s, v22.4s
+ .long 0x52b85f08 // mov w8, #0xc2f80000
+ .long 0x728e6ee8 // movk w8, #0x7377
+ .long 0x4e040d11 // dup v17.4s, w8
+ .long 0x52a7f7e8 // mov w8, #0x3fbf0000
+ .long 0x7297eea8 // movk w8, #0xbf75
+ .long 0x4e040d12 // dup v18.4s, w8
+ .long 0x52a7d688 // mov w8, #0x3eb40000
+ .long 0x72889f28 // movk w8, #0x44f9
+ .long 0x4e040d13 // dup v19.4s, w8
+ .long 0x52a7fb88 // mov w8, #0x3fdc0000
+ .long 0x729d3468 // movk w8, #0xe9a3
+ .long 0x4e040d14 // dup v20.4s, w8
+ .long 0x52a80188 // mov w8, #0x400c0000
+ .long 0x4f03d7fa // movi v26.4s, #0x7f, msl #16
+ .long 0x729999a8 // movk w8, #0xcccd
+ .long 0x4e21d818 // scvtf v24.4s, v0.4s
+ .long 0x4f016690 // movi v16.4s, #0x34, lsl #24
+ .long 0x4e040d15 // dup v21.4s, w8
+ .long 0x52a85e48 // mov w8, #0x42f20000
+ .long 0x4e21d85b // scvtf v27.4s, v2.4s
+ .long 0x4e3a1c00 // and v0.16b, v0.16b, v26.16b
+ .long 0x4e3a1c42 // and v2.16b, v2.16b, v26.16b
+ .long 0x4e3a1c3a // and v26.16b, v1.16b, v26.16b
+ .long 0x72918a28 // movk w8, #0x8c51
+ .long 0x4eb11e3c // mov v28.16b, v17.16b
+ .long 0x4eb11e3d // mov v29.16b, v17.16b
+ .long 0x4e3bce11 // fmla v17.4s, v16.4s, v27.4s
+ .long 0x4e21d821 // scvtf v1.4s, v1.4s
+ .long 0x4f0177e0 // orr v0.4s, #0x3f, lsl #24
+ .long 0x4f0177fa // orr v26.4s, #0x3f, lsl #24
+ .long 0x4f0177e2 // orr v2.4s, #0x3f, lsl #24
+ .long 0x4e040d17 // dup v23.4s, w8
+ .long 0x52a7f7c8 // mov w8, #0x3fbe0000
+ .long 0x4e38ce1c // fmla v28.4s, v16.4s, v24.4s
+ .long 0x4e21ce1d // fmla v29.4s, v16.4s, v1.4s
+ .long 0x4e33d401 // fadd v1.4s, v0.4s, v19.4s
+ .long 0x4e33d750 // fadd v16.4s, v26.4s, v19.4s
+ .long 0x4eb2cc51 // fmls v17.4s, v2.4s, v18.4s
+ .long 0x4e33d442 // fadd v2.4s, v2.4s, v19.4s
+ .long 0x729791a8 // movk w8, #0xbc8d
+ .long 0x4eb2cc1c // fmls v28.4s, v0.4s, v18.4s
+ .long 0x6e21fe80 // fdiv v0.4s, v20.4s, v1.4s
+ .long 0x4eb2cf5d // fmls v29.4s, v26.4s, v18.4s
+ .long 0x6e30fe81 // fdiv v1.4s, v20.4s, v16.4s
+ .long 0x6e22fe82 // fdiv v2.4s, v20.4s, v2.4s
+ .long 0x4e040d16 // dup v22.4s, w8
+ .long 0x52a81348 // mov w8, #0x409a0000
+ .long 0x4ea0d780 // fsub v0.4s, v28.4s, v0.4s
+ .long 0x4ea1d7a1 // fsub v1.4s, v29.4s, v1.4s
+ .long 0x4ea2d622 // fsub v2.4s, v17.4s, v2.4s
+ .long 0x729ebf08 // movk w8, #0xf5f8
+ .long 0x6e35dc00 // fmul v0.4s, v0.4s, v21.4s
+ .long 0x6e35dc21 // fmul v1.4s, v1.4s, v21.4s
+ .long 0x6e35dc42 // fmul v2.4s, v2.4s, v21.4s
+ .long 0x4e040d19 // dup v25.4s, w8
+ .long 0x52a83ba8 // mov w8, #0x41dd0000
+ .long 0x4e219810 // frintm v16.4s, v0.4s
+ .long 0x4e219832 // frintm v18.4s, v1.4s
+ .long 0x4e219854 // frintm v20.4s, v2.4s
+ .long 0x729a5fc8 // movk w8, #0xd2fe
+ .long 0x4e37d411 // fadd v17.4s, v0.4s, v23.4s
+ .long 0x4e37d433 // fadd v19.4s, v1.4s, v23.4s
+ .long 0x4e37d455 // fadd v21.4s, v2.4s, v23.4s
+ .long 0x4eb0d400 // fsub v0.4s, v0.4s, v16.4s
+ .long 0x4eb2d421 // fsub v1.4s, v1.4s, v18.4s
+ .long 0x4eb4d442 // fsub v2.4s, v2.4s, v20.4s
+ .long 0x4e040d18 // dup v24.4s, w8
+ .long 0x4eb6cc11 // fmls v17.4s, v0.4s, v22.4s
+ .long 0x4ea0d720 // fsub v0.4s, v25.4s, v0.4s
+ .long 0x4eb6cc33 // fmls v19.4s, v1.4s, v22.4s
+ .long 0x4ea1d721 // fsub v1.4s, v25.4s, v1.4s
+ .long 0x4eb6cc55 // fmls v21.4s, v2.4s, v22.4s
+ .long 0x4ea2d722 // fsub v2.4s, v25.4s, v2.4s
.long 0xf8408423 // ldr x3, [x1], #8
- .long 0x6e20de40 // fmul v0.4s, v18.4s, v0.4s
- .long 0x6e21de81 // fmul v1.4s, v20.4s, v1.4s
- .long 0x6e30deb0 // fmul v16.4s, v21.4s, v16.4s
- .long 0x6f00e412 // movi v18.2d, #0x0
- .long 0x6e20de20 // fmul v0.4s, v17.4s, v0.4s
- .long 0x6e21de61 // fmul v1.4s, v19.4s, v1.4s
- .long 0x6e30dc42 // fmul v2.4s, v2.4s, v16.4s
- .long 0x4e32f400 // fmax v0.4s, v0.4s, v18.4s
- .long 0x4e32f421 // fmax v1.4s, v1.4s, v18.4s
- .long 0x4e32f442 // fmax v2.4s, v2.4s, v18.4s
+ .long 0x6e20ff00 // fdiv v0.4s, v24.4s, v0.4s
+ .long 0x6e21ff01 // fdiv v1.4s, v24.4s, v1.4s
+ .long 0x6e22ff02 // fdiv v2.4s, v24.4s, v2.4s
+ .long 0x4f02657b // movi v27.4s, #0x4b, lsl #24
+ .long 0x4e20d620 // fadd v0.4s, v17.4s, v0.4s
+ .long 0x4e21d661 // fadd v1.4s, v19.4s, v1.4s
+ .long 0x4e22d6a2 // fadd v2.4s, v21.4s, v2.4s
+ .long 0x6e3bdc00 // fmul v0.4s, v0.4s, v27.4s
+ .long 0x6e3bdc21 // fmul v1.4s, v1.4s, v27.4s
+ .long 0x6e3bdc42 // fmul v2.4s, v2.4s, v27.4s
+ .long 0x6e21a800 // fcvtnu v0.4s, v0.4s
+ .long 0x6e21a821 // fcvtnu v1.4s, v1.4s
+ .long 0x6e21a842 // fcvtnu v2.4s, v2.4s
.long 0xd61f0060 // br x3
HIDDEN _sk_to_2dot2_aarch64
.globl _sk_to_2dot2_aarch64
FUNCTION(_sk_to_2dot2_aarch64)
_sk_to_2dot2_aarch64:
- .long 0x6ea1d810 // frsqrte v16.4s, v0.4s
- .long 0x6e30de13 // fmul v19.4s, v16.4s, v16.4s
- .long 0x6ea1d831 // frsqrte v17.4s, v1.4s
- .long 0x4eb3fc00 // frsqrts v0.4s, v0.4s, v19.4s
- .long 0x6ea1d852 // frsqrte v18.4s, v2.4s
- .long 0x6e31de34 // fmul v20.4s, v17.4s, v17.4s
- .long 0x6e20de00 // fmul v0.4s, v16.4s, v0.4s
- .long 0x6e32de55 // fmul v21.4s, v18.4s, v18.4s
- .long 0x4eb4fc21 // frsqrts v1.4s, v1.4s, v20.4s
- .long 0x6ea1d810 // frsqrte v16.4s, v0.4s
- .long 0x4eb5fc42 // frsqrts v2.4s, v2.4s, v21.4s
- .long 0x6e21de21 // fmul v1.4s, v17.4s, v1.4s
- .long 0x4ea1d811 // frecpe v17.4s, v0.4s
- .long 0x6e30de16 // fmul v22.4s, v16.4s, v16.4s
- .long 0x6e22de42 // fmul v2.4s, v18.4s, v2.4s
- .long 0x6ea1d832 // frsqrte v18.4s, v1.4s
- .long 0x4eb6fc16 // frsqrts v22.4s, v0.4s, v22.4s
- .long 0x4e31fc00 // frecps v0.4s, v0.4s, v17.4s
- .long 0x4ea1d833 // frecpe v19.4s, v1.4s
- .long 0x6e20de20 // fmul v0.4s, v17.4s, v0.4s
- .long 0x6e32de51 // fmul v17.4s, v18.4s, v18.4s
- .long 0x6ea1d854 // frsqrte v20.4s, v2.4s
- .long 0x4eb1fc31 // frsqrts v17.4s, v1.4s, v17.4s
- .long 0x4e33fc21 // frecps v1.4s, v1.4s, v19.4s
- .long 0x6e21de61 // fmul v1.4s, v19.4s, v1.4s
- .long 0x6e34de93 // fmul v19.4s, v20.4s, v20.4s
- .long 0x4eb3fc53 // frsqrts v19.4s, v2.4s, v19.4s
- .long 0x6e36de10 // fmul v16.4s, v16.4s, v22.4s
- .long 0x6e31de51 // fmul v17.4s, v18.4s, v17.4s
- .long 0x6e33de92 // fmul v18.4s, v20.4s, v19.4s
- .long 0x6ea1da13 // frsqrte v19.4s, v16.4s
- .long 0x4ea1d855 // frecpe v21.4s, v2.4s
- .long 0x6e33de76 // fmul v22.4s, v19.4s, v19.4s
- .long 0x4e35fc42 // frecps v2.4s, v2.4s, v21.4s
- .long 0x6ea1da34 // frsqrte v20.4s, v17.4s
- .long 0x4eb6fe10 // frsqrts v16.4s, v16.4s, v22.4s
- .long 0x6e22dea2 // fmul v2.4s, v21.4s, v2.4s
- .long 0x6ea1da55 // frsqrte v21.4s, v18.4s
- .long 0x6e34de96 // fmul v22.4s, v20.4s, v20.4s
- .long 0x6e30de70 // fmul v16.4s, v19.4s, v16.4s
- .long 0x4eb6fe31 // frsqrts v17.4s, v17.4s, v22.4s
- .long 0x6e35deb6 // fmul v22.4s, v21.4s, v21.4s
- .long 0x6ea1da13 // frsqrte v19.4s, v16.4s
- .long 0x4eb6fe52 // frsqrts v18.4s, v18.4s, v22.4s
- .long 0x6e31de91 // fmul v17.4s, v20.4s, v17.4s
- .long 0x6e33de76 // fmul v22.4s, v19.4s, v19.4s
- .long 0x6e32deb2 // fmul v18.4s, v21.4s, v18.4s
- .long 0x6ea1da34 // frsqrte v20.4s, v17.4s
- .long 0x4eb6fe10 // frsqrts v16.4s, v16.4s, v22.4s
- .long 0x6ea1da55 // frsqrte v21.4s, v18.4s
- .long 0x6e34de96 // fmul v22.4s, v20.4s, v20.4s
- .long 0x6e30de70 // fmul v16.4s, v19.4s, v16.4s
- .long 0x4eb6fe31 // frsqrts v17.4s, v17.4s, v22.4s
- .long 0x6e35deb6 // fmul v22.4s, v21.4s, v21.4s
- .long 0x6ea1da13 // frsqrte v19.4s, v16.4s
- .long 0x4eb6fe52 // frsqrts v18.4s, v18.4s, v22.4s
- .long 0x6e31de91 // fmul v17.4s, v20.4s, v17.4s
- .long 0x6e33de76 // fmul v22.4s, v19.4s, v19.4s
- .long 0x6e32deb2 // fmul v18.4s, v21.4s, v18.4s
- .long 0x6ea1da34 // frsqrte v20.4s, v17.4s
- .long 0x4eb6fe10 // frsqrts v16.4s, v16.4s, v22.4s
- .long 0x6ea1da55 // frsqrte v21.4s, v18.4s
- .long 0x6e34de96 // fmul v22.4s, v20.4s, v20.4s
- .long 0x6e30de70 // fmul v16.4s, v19.4s, v16.4s
- .long 0x4eb6fe31 // frsqrts v17.4s, v17.4s, v22.4s
- .long 0x6e35deb6 // fmul v22.4s, v21.4s, v21.4s
- .long 0x6ea1da13 // frsqrte v19.4s, v16.4s
- .long 0x4eb6fe52 // frsqrts v18.4s, v18.4s, v22.4s
- .long 0x6e31de91 // fmul v17.4s, v20.4s, v17.4s
- .long 0x6e33de76 // fmul v22.4s, v19.4s, v19.4s
- .long 0x6e20de00 // fmul v0.4s, v16.4s, v0.4s
- .long 0x6ea1da34 // frsqrte v20.4s, v17.4s
- .long 0x4eb6fe10 // frsqrts v16.4s, v16.4s, v22.4s
- .long 0x6e32deb2 // fmul v18.4s, v21.4s, v18.4s
- .long 0x6e34de96 // fmul v22.4s, v20.4s, v20.4s
- .long 0x6e30de70 // fmul v16.4s, v19.4s, v16.4s
- .long 0x6e21de21 // fmul v1.4s, v17.4s, v1.4s
- .long 0x6ea1da55 // frsqrte v21.4s, v18.4s
- .long 0x4eb6fe31 // frsqrts v17.4s, v17.4s, v22.4s
- .long 0x4ea1da13 // frecpe v19.4s, v16.4s
- .long 0x6e35deb6 // fmul v22.4s, v21.4s, v21.4s
- .long 0x6e31de91 // fmul v17.4s, v20.4s, v17.4s
- .long 0x4e33fe10 // frecps v16.4s, v16.4s, v19.4s
- .long 0x6e22de42 // fmul v2.4s, v18.4s, v2.4s
- .long 0x4eb6fe52 // frsqrts v18.4s, v18.4s, v22.4s
- .long 0x6e30de70 // fmul v16.4s, v19.4s, v16.4s
- .long 0x4ea1da33 // frecpe v19.4s, v17.4s
- .long 0x6e32deb2 // fmul v18.4s, v21.4s, v18.4s
- .long 0x4e33fe31 // frecps v17.4s, v17.4s, v19.4s
- .long 0x6e31de71 // fmul v17.4s, v19.4s, v17.4s
- .long 0x4ea1da53 // frecpe v19.4s, v18.4s
- .long 0x4e33fe52 // frecps v18.4s, v18.4s, v19.4s
+ .long 0x52b85f08 // mov w8, #0xc2f80000
+ .long 0x728e6ee8 // movk w8, #0x7377
+ .long 0x4e040d11 // dup v17.4s, w8
+ .long 0x52a7f7e8 // mov w8, #0x3fbf0000
+ .long 0x7297eea8 // movk w8, #0xbf75
+ .long 0x4e040d12 // dup v18.4s, w8
+ .long 0x52a7d688 // mov w8, #0x3eb40000
+ .long 0x72889f28 // movk w8, #0x44f9
+ .long 0x4e040d13 // dup v19.4s, w8
+ .long 0x52a7fb88 // mov w8, #0x3fdc0000
+ .long 0x729d3468 // movk w8, #0xe9a3
+ .long 0x4e040d14 // dup v20.4s, w8
+ .long 0x52a7dd08 // mov w8, #0x3ee80000
+ .long 0x4f03d7fa // movi v26.4s, #0x7f, msl #16
+ .long 0x729745c8 // movk w8, #0xba2e
+ .long 0x4e21d818 // scvtf v24.4s, v0.4s
+ .long 0x4f016690 // movi v16.4s, #0x34, lsl #24
+ .long 0x4e040d15 // dup v21.4s, w8
+ .long 0x52a85e48 // mov w8, #0x42f20000
+ .long 0x4e21d85b // scvtf v27.4s, v2.4s
+ .long 0x4e3a1c00 // and v0.16b, v0.16b, v26.16b
+ .long 0x4e3a1c42 // and v2.16b, v2.16b, v26.16b
+ .long 0x4e3a1c3a // and v26.16b, v1.16b, v26.16b
+ .long 0x72918a28 // movk w8, #0x8c51
+ .long 0x4eb11e3c // mov v28.16b, v17.16b
+ .long 0x4eb11e3d // mov v29.16b, v17.16b
+ .long 0x4e3bce11 // fmla v17.4s, v16.4s, v27.4s
+ .long 0x4e21d821 // scvtf v1.4s, v1.4s
+ .long 0x4f0177e0 // orr v0.4s, #0x3f, lsl #24
+ .long 0x4f0177fa // orr v26.4s, #0x3f, lsl #24
+ .long 0x4f0177e2 // orr v2.4s, #0x3f, lsl #24
+ .long 0x4e040d17 // dup v23.4s, w8
+ .long 0x52a7f7c8 // mov w8, #0x3fbe0000
+ .long 0x4e38ce1c // fmla v28.4s, v16.4s, v24.4s
+ .long 0x4e21ce1d // fmla v29.4s, v16.4s, v1.4s
+ .long 0x4e33d401 // fadd v1.4s, v0.4s, v19.4s
+ .long 0x4e33d750 // fadd v16.4s, v26.4s, v19.4s
+ .long 0x4eb2cc51 // fmls v17.4s, v2.4s, v18.4s
+ .long 0x4e33d442 // fadd v2.4s, v2.4s, v19.4s
+ .long 0x729791a8 // movk w8, #0xbc8d
+ .long 0x4eb2cc1c // fmls v28.4s, v0.4s, v18.4s
+ .long 0x6e21fe80 // fdiv v0.4s, v20.4s, v1.4s
+ .long 0x4eb2cf5d // fmls v29.4s, v26.4s, v18.4s
+ .long 0x6e30fe81 // fdiv v1.4s, v20.4s, v16.4s
+ .long 0x6e22fe82 // fdiv v2.4s, v20.4s, v2.4s
+ .long 0x4e040d16 // dup v22.4s, w8
+ .long 0x52a81348 // mov w8, #0x409a0000
+ .long 0x4ea0d780 // fsub v0.4s, v28.4s, v0.4s
+ .long 0x4ea1d7a1 // fsub v1.4s, v29.4s, v1.4s
+ .long 0x4ea2d622 // fsub v2.4s, v17.4s, v2.4s
+ .long 0x729ebf08 // movk w8, #0xf5f8
+ .long 0x6e35dc00 // fmul v0.4s, v0.4s, v21.4s
+ .long 0x6e35dc21 // fmul v1.4s, v1.4s, v21.4s
+ .long 0x6e35dc42 // fmul v2.4s, v2.4s, v21.4s
+ .long 0x4e040d19 // dup v25.4s, w8
+ .long 0x52a83ba8 // mov w8, #0x41dd0000
+ .long 0x4e219810 // frintm v16.4s, v0.4s
+ .long 0x4e219832 // frintm v18.4s, v1.4s
+ .long 0x4e219854 // frintm v20.4s, v2.4s
+ .long 0x729a5fc8 // movk w8, #0xd2fe
+ .long 0x4e37d411 // fadd v17.4s, v0.4s, v23.4s
+ .long 0x4e37d433 // fadd v19.4s, v1.4s, v23.4s
+ .long 0x4e37d455 // fadd v21.4s, v2.4s, v23.4s
+ .long 0x4eb0d400 // fsub v0.4s, v0.4s, v16.4s
+ .long 0x4eb2d421 // fsub v1.4s, v1.4s, v18.4s
+ .long 0x4eb4d442 // fsub v2.4s, v2.4s, v20.4s
+ .long 0x4e040d18 // dup v24.4s, w8
+ .long 0x4eb6cc11 // fmls v17.4s, v0.4s, v22.4s
+ .long 0x4ea0d720 // fsub v0.4s, v25.4s, v0.4s
+ .long 0x4eb6cc33 // fmls v19.4s, v1.4s, v22.4s
+ .long 0x4ea1d721 // fsub v1.4s, v25.4s, v1.4s
+ .long 0x4eb6cc55 // fmls v21.4s, v2.4s, v22.4s
+ .long 0x4ea2d722 // fsub v2.4s, v25.4s, v2.4s
.long 0xf8408423 // ldr x3, [x1], #8
- .long 0x6e32de72 // fmul v18.4s, v19.4s, v18.4s
- .long 0x6f00e413 // movi v19.2d, #0x0
- .long 0x6e30dc00 // fmul v0.4s, v0.4s, v16.4s
- .long 0x6e31dc21 // fmul v1.4s, v1.4s, v17.4s
- .long 0x6e32dc42 // fmul v2.4s, v2.4s, v18.4s
- .long 0x4e33f400 // fmax v0.4s, v0.4s, v19.4s
- .long 0x4e33f421 // fmax v1.4s, v1.4s, v19.4s
- .long 0x4e33f442 // fmax v2.4s, v2.4s, v19.4s
+ .long 0x6e20ff00 // fdiv v0.4s, v24.4s, v0.4s
+ .long 0x6e21ff01 // fdiv v1.4s, v24.4s, v1.4s
+ .long 0x6e22ff02 // fdiv v2.4s, v24.4s, v2.4s
+ .long 0x4f02657b // movi v27.4s, #0x4b, lsl #24
+ .long 0x4e20d620 // fadd v0.4s, v17.4s, v0.4s
+ .long 0x4e21d661 // fadd v1.4s, v19.4s, v1.4s
+ .long 0x4e22d6a2 // fadd v2.4s, v21.4s, v2.4s
+ .long 0x6e3bdc00 // fmul v0.4s, v0.4s, v27.4s
+ .long 0x6e3bdc21 // fmul v1.4s, v1.4s, v27.4s
+ .long 0x6e3bdc42 // fmul v2.4s, v2.4s, v27.4s
+ .long 0x6e21a800 // fcvtnu v0.4s, v0.4s
+ .long 0x6e21a821 // fcvtnu v1.4s, v1.4s
+ .long 0x6e21a842 // fcvtnu v2.4s, v2.4s
.long 0xd61f0060 // br x3
HIDDEN _sk_rgb_to_hsl_aarch64
@@ -2415,9 +2396,9 @@ FUNCTION(_sk_gather_i8_aarch64)
_sk_gather_i8_aarch64:
.long 0xaa0103e8 // mov x8, x1
.long 0xf8408429 // ldr x9, [x1], #8
- .long 0xb4000069 // cbz x9, 2034 <sk_gather_i8_aarch64+0x14>
+ .long 0xb4000069 // cbz x9, 1fe8 <sk_gather_i8_aarch64+0x14>
.long 0xaa0903ea // mov x10, x9
- .long 0x14000003 // b 203c <sk_gather_i8_aarch64+0x1c>
+ .long 0x14000003 // b 1ff0 <sk_gather_i8_aarch64+0x1c>
.long 0xf940050a // ldr x10, [x8, #8]
.long 0x91004101 // add x1, x8, #0x10
.long 0xf8410548 // ldr x8, [x10], #16
@@ -3266,7 +3247,7 @@ _sk_linear_gradient_aarch64:
.long 0x4d40c902 // ld1r {v2.4s}, [x8]
.long 0xf9400128 // ldr x8, [x9]
.long 0x4d40c943 // ld1r {v3.4s}, [x10]
- .long 0xb40006c8 // cbz x8, 2c08 <sk_linear_gradient_aarch64+0x100>
+ .long 0xb40006c8 // cbz x8, 2bbc <sk_linear_gradient_aarch64+0x100>
.long 0x6dbf23e9 // stp d9, d8, [sp, #-16]!
.long 0xf9400529 // ldr x9, [x9, #8]
.long 0x6f00e413 // movi v19.2d, #0x0
@@ -3317,9 +3298,9 @@ _sk_linear_gradient_aarch64:
.long 0xd1000508 // sub x8, x8, #0x1
.long 0x6e771fd0 // bsl v16.16b, v30.16b, v23.16b
.long 0x91009129 // add x9, x9, #0x24
- .long 0xb5fffaa8 // cbnz x8, 2b50 <sk_linear_gradient_aarch64+0x48>
+ .long 0xb5fffaa8 // cbnz x8, 2b04 <sk_linear_gradient_aarch64+0x48>
.long 0x6cc123e9 // ldp d9, d8, [sp], #16
- .long 0x14000005 // b 2c18 <sk_linear_gradient_aarch64+0x110>
+ .long 0x14000005 // b 2bcc <sk_linear_gradient_aarch64+0x110>
.long 0x6f00e414 // movi v20.2d, #0x0
.long 0x6f00e412 // movi v18.2d, #0x0
.long 0x6f00e411 // movi v17.2d, #0x0
@@ -4722,207 +4703,251 @@ HIDDEN _sk_from_2dot2_vfp4
.globl _sk_from_2dot2_vfp4
FUNCTION(_sk_from_2dot2_vfp4)
_sk_from_2dot2_vfp4:
- .long 0xf3fb0580 // vrsqrte.f32 d16, d0
+ .long 0xed2d8b0a // vpush {d8-d12}
+ .long 0xf2c70d1f // vmov.i32 d16, #8388607
+ .long 0xeddf3b62 // vldr d19, [pc, #392]
+ .long 0xed9faa71 // vldr s20, [pc, #452]
+ .long 0xf2c34614 // vmov.i32 d20, #872415232
+ .long 0xf2402130 // vand d18, d0, d16
+ .long 0xeddf7b60 // vldr d23, [pc, #384]
+ .long 0xf2c3271f // vorr.i32 d18, #1056964608
+ .long 0xeddfab64 // vldr d26, [pc, #400]
+ .long 0xf2411130 // vand d17, d1, d16
.long 0xe4913004 // ldr r3, [r1], #4
- .long 0xf3fb1581 // vrsqrte.f32 d17, d1
- .long 0xf3fb2582 // vrsqrte.f32 d18, d2
- .long 0xf3403db0 // vmul.f32 d19, d16, d16
- .long 0xf3414db1 // vmul.f32 d20, d17, d17
- .long 0xf3425db2 // vmul.f32 d21, d18, d18
- .long 0xf2603f33 // vrsqrts.f32 d19, d0, d19
- .long 0xf2614f34 // vrsqrts.f32 d20, d1, d20
- .long 0xf2625f35 // vrsqrts.f32 d21, d2, d21
+ .long 0xf2029da3 // vadd.f32 d9, d18, d19
+ .long 0xf2420130 // vand d16, d2, d16
+ .long 0xf2c3171f // vorr.i32 d17, #1056964608
+ .long 0xf2c3071f // vorr.i32 d16, #1056964608
+ .long 0xf201bda3 // vadd.f32 d11, d17, d19
+ .long 0xeeca8a29 // vdiv.f32 s17, s20, s19
+ .long 0xf200cda3 // vadd.f32 d12, d16, d19
+ .long 0xf3fb3600 // vcvt.f32.s32 d19, d0
+ .long 0xee8a8a09 // vdiv.f32 s16, s20, s18
+ .long 0xeeca9a2b // vdiv.f32 s19, s20, s23
+ .long 0xeeca0a2c // vdiv.f32 s1, s20, s25
+ .long 0xee8a9a0b // vdiv.f32 s18, s20, s22
+ .long 0xee8a0a0c // vdiv.f32 s0, s20, s24
+ .long 0xf3fb5602 // vcvt.f32.s32 d21, d2
+ .long 0xf3fb6601 // vcvt.f32.s32 d22, d1
+ .long 0xed9f1a5b // vldr s2, [pc, #364]
+ .long 0xf3433db4 // vmul.f32 d19, d19, d20
+ .long 0xf3422db7 // vmul.f32 d18, d18, d23
+ .long 0xf3455db4 // vmul.f32 d21, d21, d20
+ .long 0xf3464db4 // vmul.f32 d20, d22, d20
+ .long 0xeddf6b49 // vldr d22, [pc, #292]
+ .long 0xf2433da6 // vadd.f32 d19, d19, d22
+ .long 0xf3411db7 // vmul.f32 d17, d17, d23
+ .long 0xf3400db7 // vmul.f32 d16, d16, d23
+ .long 0xf2444da6 // vadd.f32 d20, d20, d22
+ .long 0xf2632da2 // vsub.f32 d18, d19, d18
+ .long 0xeddf3b45 // vldr d19, [pc, #276]
+ .long 0xf2455da6 // vadd.f32 d21, d21, d22
+ .long 0xf2c09010 // vmov.i32 d25, #0
+ .long 0xf2641da1 // vsub.f32 d17, d20, d17
+ .long 0xf2622d88 // vsub.f32 d18, d18, d8
+ .long 0xf2650da0 // vsub.f32 d16, d21, d16
+ .long 0xf2611d89 // vsub.f32 d17, d17, d9
+ .long 0xf3422db3 // vmul.f32 d18, d18, d19
+ .long 0xf2600d80 // vsub.f32 d16, d16, d0
+ .long 0xf3411db3 // vmul.f32 d17, d17, d19
.long 0xf3400db3 // vmul.f32 d16, d16, d19
- .long 0xf3411db4 // vmul.f32 d17, d17, d20
- .long 0xf3422db5 // vmul.f32 d18, d18, d21
- .long 0xf3fb35a0 // vrsqrte.f32 d19, d16
- .long 0xf3fb45a1 // vrsqrte.f32 d20, d17
- .long 0xf3fb55a2 // vrsqrte.f32 d21, d18
- .long 0xf3436db3 // vmul.f32 d22, d19, d19
- .long 0xf3447db4 // vmul.f32 d23, d20, d20
- .long 0xf3458db5 // vmul.f32 d24, d21, d21
- .long 0xf2600fb6 // vrsqrts.f32 d16, d16, d22
- .long 0xf2611fb7 // vrsqrts.f32 d17, d17, d23
- .long 0xf2622fb8 // vrsqrts.f32 d18, d18, d24
- .long 0xf3430db0 // vmul.f32 d16, d19, d16
- .long 0xf3441db1 // vmul.f32 d17, d20, d17
- .long 0xf3452db2 // vmul.f32 d18, d21, d18
- .long 0xf3fb35a0 // vrsqrte.f32 d19, d16
- .long 0xf3fb45a1 // vrsqrte.f32 d20, d17
- .long 0xf3fb55a2 // vrsqrte.f32 d21, d18
- .long 0xf3436db3 // vmul.f32 d22, d19, d19
- .long 0xf3447db4 // vmul.f32 d23, d20, d20
- .long 0xf3458db5 // vmul.f32 d24, d21, d21
- .long 0xf2600fb6 // vrsqrts.f32 d16, d16, d22
- .long 0xf2611fb7 // vrsqrts.f32 d17, d17, d23
- .long 0xf2622fb8 // vrsqrts.f32 d18, d18, d24
- .long 0xf3430db0 // vmul.f32 d16, d19, d16
- .long 0xf3441db1 // vmul.f32 d17, d20, d17
- .long 0xf3452db2 // vmul.f32 d18, d21, d18
- .long 0xf3fb35a0 // vrsqrte.f32 d19, d16
- .long 0xf3fb45a1 // vrsqrte.f32 d20, d17
- .long 0xf3fb55a2 // vrsqrte.f32 d21, d18
- .long 0xf3436db3 // vmul.f32 d22, d19, d19
- .long 0xf3447db4 // vmul.f32 d23, d20, d20
- .long 0xf3458db5 // vmul.f32 d24, d21, d21
- .long 0xf2600fb6 // vrsqrts.f32 d16, d16, d22
- .long 0xf2611fb7 // vrsqrts.f32 d17, d17, d23
- .long 0xf2622fb8 // vrsqrts.f32 d18, d18, d24
- .long 0xf3430db0 // vmul.f32 d16, d19, d16
- .long 0xf3441db1 // vmul.f32 d17, d20, d17
- .long 0xf3452db2 // vmul.f32 d18, d21, d18
- .long 0xf3fb35a0 // vrsqrte.f32 d19, d16
- .long 0xf3fb45a1 // vrsqrte.f32 d20, d17
- .long 0xf3fb55a2 // vrsqrte.f32 d21, d18
- .long 0xf340bdb0 // vmul.f32 d27, d16, d16
- .long 0xf341ddb1 // vmul.f32 d29, d17, d17
- .long 0xf3436db3 // vmul.f32 d22, d19, d19
- .long 0xf3447db4 // vmul.f32 d23, d20, d20
- .long 0xf3458db5 // vmul.f32 d24, d21, d21
- .long 0xf2606fb6 // vrsqrts.f32 d22, d16, d22
- .long 0xf2617fb7 // vrsqrts.f32 d23, d17, d23
- .long 0xf2628fb8 // vrsqrts.f32 d24, d18, d24
- .long 0xf3400dbb // vmul.f32 d16, d16, d27
- .long 0xf3411dbd // vmul.f32 d17, d17, d29
- .long 0xf341bd11 // vmul.f32 d27, d1, d1
+ .long 0xf3fb3722 // vcvt.s32.f32 d19, d18
+ .long 0xf3fb4721 // vcvt.s32.f32 d20, d17
+ .long 0xf3fb5720 // vcvt.s32.f32 d21, d16
+ .long 0xf3fb3623 // vcvt.f32.s32 d19, d19
+ .long 0xf3fb4624 // vcvt.f32.s32 d20, d20
+ .long 0xf3fb5625 // vcvt.f32.s32 d21, d21
+ .long 0xf3636ea2 // vcgt.f32 d22, d19, d18
+ .long 0xf3647ea1 // vcgt.f32 d23, d20, d17
+ .long 0xf3658ea0 // vcgt.f32 d24, d21, d16
+ .long 0xf35a61b9 // vbsl d22, d26, d25
+ .long 0xf35a71b9 // vbsl d23, d26, d25
+ .long 0xf2633da6 // vsub.f32 d19, d19, d22
+ .long 0xeddf6b32 // vldr d22, [pc, #200]
+ .long 0xf2644da7 // vsub.f32 d20, d20, d23
+ .long 0xeddf7b34 // vldr d23, [pc, #208]
+ .long 0xf35a81b9 // vbsl d24, d26, d25
+ .long 0xf2623da3 // vsub.f32 d19, d18, d19
+ .long 0xf2614da4 // vsub.f32 d20, d17, d20
+ .long 0xf2655da8 // vsub.f32 d21, d21, d24
+ .long 0xf2422da7 // vadd.f32 d18, d18, d23
+ .long 0xf2260da3 // vsub.f32 d0, d22, d19
+ .long 0xf2262da4 // vsub.f32 d2, d22, d20
+ .long 0xf2605da5 // vsub.f32 d21, d16, d21
+ .long 0xf2411da7 // vadd.f32 d17, d17, d23
+ .long 0xf2400da7 // vadd.f32 d16, d16, d23
+ .long 0xeec19a20 // vdiv.f32 s19, s2, s1
+ .long 0xee819a00 // vdiv.f32 s18, s2, s0
+ .long 0xeec10a22 // vdiv.f32 s1, s2, s5
+ .long 0xf2268da5 // vsub.f32 d8, d22, d21
+ .long 0xeddf6b23 // vldr d22, [pc, #140]
+ .long 0xee810a02 // vdiv.f32 s0, s2, s4
+ .long 0xeec12a28 // vdiv.f32 s5, s2, s17
+ .long 0xee812a08 // vdiv.f32 s4, s2, s16
.long 0xf3433db6 // vmul.f32 d19, d19, d22
- .long 0xf3444db7 // vmul.f32 d20, d20, d23
- .long 0xf3455db8 // vmul.f32 d21, d21, d24
- .long 0xf34b1db1 // vmul.f32 d17, d27, d17
- .long 0xf3fb65a3 // vrsqrte.f32 d22, d19
- .long 0xf3fb75a4 // vrsqrte.f32 d23, d20
- .long 0xf3fb85a5 // vrsqrte.f32 d24, d21
- .long 0xf3469db6 // vmul.f32 d25, d22, d22
- .long 0xf347adb7 // vmul.f32 d26, d23, d23
- .long 0xf348cdb8 // vmul.f32 d28, d24, d24
- .long 0xf2633fb9 // vrsqrts.f32 d19, d19, d25
- .long 0xf2644fba // vrsqrts.f32 d20, d20, d26
- .long 0xf3429db2 // vmul.f32 d25, d18, d18
- .long 0xf2655fbc // vrsqrts.f32 d21, d21, d28
- .long 0xf340ad10 // vmul.f32 d26, d0, d0
- .long 0xf3422db9 // vmul.f32 d18, d18, d25
- .long 0xf3429d12 // vmul.f32 d25, d2, d2
- .long 0xf3463db3 // vmul.f32 d19, d22, d19
- .long 0xf3474db4 // vmul.f32 d20, d23, d20
- .long 0xf34a0db0 // vmul.f32 d16, d26, d16
- .long 0xf3485db5 // vmul.f32 d21, d24, d21
- .long 0xf3492db2 // vmul.f32 d18, d25, d18
- .long 0xf3400db3 // vmul.f32 d16, d16, d19
- .long 0xf3411db4 // vmul.f32 d17, d17, d20
- .long 0xf3422db5 // vmul.f32 d18, d18, d21
- .long 0xf2c03010 // vmov.i32 d19, #0
- .long 0xf2000fa3 // vmax.f32 d0, d16, d19
- .long 0xf2011fa3 // vmax.f32 d1, d17, d19
- .long 0xf2022fa3 // vmax.f32 d2, d18, d19
+ .long 0xf3444db6 // vmul.f32 d20, d20, d22
+ .long 0xf3455db6 // vmul.f32 d21, d21, d22
+ .long 0xf2622da3 // vsub.f32 d18, d18, d19
+ .long 0xf2611da4 // vsub.f32 d17, d17, d20
+ .long 0xf2600da5 // vsub.f32 d16, d16, d21
+ .long 0xf2c4361b // vmov.i32 d19, #1258291200
+ .long 0xf2422d89 // vadd.f32 d18, d18, d9
+ .long 0xf2411d80 // vadd.f32 d17, d17, d0
+ .long 0xf2400d82 // vadd.f32 d16, d16, d2
+ .long 0xf2c3561f // vmov.i32 d21, #1056964608
+ .long 0xf2c3461f // vmov.i32 d20, #1056964608
+ .long 0xf2425cb3 // vfma.f32 d21, d18, d19
+ .long 0xf2c3261f // vmov.i32 d18, #1056964608
+ .long 0xf2412cb3 // vfma.f32 d18, d17, d19
+ .long 0xf2404cb3 // vfma.f32 d20, d16, d19
+ .long 0xf3bb07a5 // vcvt.u32.f32 d0, d21
+ .long 0xf3bb17a2 // vcvt.u32.f32 d1, d18
+ .long 0xf3bb27a4 // vcvt.u32.f32 d2, d20
+ .long 0xecbd8b0a // vpop {d8-d12}
.long 0xe12fff13 // bx r3
+ .long 0x3eb444f9 // .word 0x3eb444f9
+ .long 0x3eb444f9 // .word 0x3eb444f9
+ .long 0x3fbfbf75 // .word 0x3fbfbf75
+ .long 0x3fbfbf75 // .word 0x3fbfbf75
+ .long 0xc2f87377 // .word 0xc2f87377
+ .long 0xc2f87377 // .word 0xc2f87377
+ .long 0x400ccccd // .word 0x400ccccd
+ .long 0x400ccccd // .word 0x400ccccd
+ .long 0x3f800000 // .word 0x3f800000
+ .long 0x3f800000 // .word 0x3f800000
+ .long 0x409af5f8 // .word 0x409af5f8
+ .long 0x409af5f8 // .word 0x409af5f8
+ .long 0x3fbebc8d // .word 0x3fbebc8d
+ .long 0x3fbebc8d // .word 0x3fbebc8d
+ .long 0x42f28c51 // .word 0x42f28c51
+ .long 0x42f28c51 // .word 0x42f28c51
+ .long 0x3fdce9a3 // .word 0x3fdce9a3
+ .long 0x41ddd2fe // .word 0x41ddd2fe
HIDDEN _sk_to_2dot2_vfp4
.globl _sk_to_2dot2_vfp4
FUNCTION(_sk_to_2dot2_vfp4)
_sk_to_2dot2_vfp4:
- .long 0xf3fb0580 // vrsqrte.f32 d16, d0
+ .long 0xed2d8b0a // vpush {d8-d12}
+ .long 0xf2c70d1f // vmov.i32 d16, #8388607
+ .long 0xeddf3b62 // vldr d19, [pc, #392]
+ .long 0xed9faa71 // vldr s20, [pc, #452]
+ .long 0xf2c34614 // vmov.i32 d20, #872415232
+ .long 0xf2402130 // vand d18, d0, d16
+ .long 0xeddf7b60 // vldr d23, [pc, #384]
+ .long 0xf2c3271f // vorr.i32 d18, #1056964608
+ .long 0xeddfab64 // vldr d26, [pc, #400]
+ .long 0xf2411130 // vand d17, d1, d16
.long 0xe4913004 // ldr r3, [r1], #4
- .long 0xf3fb1581 // vrsqrte.f32 d17, d1
- .long 0xf3fb3582 // vrsqrte.f32 d19, d2
- .long 0xf3402db0 // vmul.f32 d18, d16, d16
- .long 0xf3414db1 // vmul.f32 d20, d17, d17
- .long 0xf3435db3 // vmul.f32 d21, d19, d19
- .long 0xf2602f32 // vrsqrts.f32 d18, d0, d18
- .long 0xf2614f34 // vrsqrts.f32 d20, d1, d20
- .long 0xf2625f35 // vrsqrts.f32 d21, d2, d21
- .long 0xf3402db2 // vmul.f32 d18, d16, d18
- .long 0xf3411db4 // vmul.f32 d17, d17, d20
- .long 0xf3430db5 // vmul.f32 d16, d19, d21
- .long 0xf3fb35a2 // vrsqrte.f32 d19, d18
- .long 0xf3fb45a1 // vrsqrte.f32 d20, d17
- .long 0xf3fb55a0 // vrsqrte.f32 d21, d16
- .long 0xf3fbc522 // vrecpe.f32 d28, d18
- .long 0xf3436db3 // vmul.f32 d22, d19, d19
- .long 0xf3447db4 // vmul.f32 d23, d20, d20
- .long 0xf3458db5 // vmul.f32 d24, d21, d21
- .long 0xf2626fb6 // vrsqrts.f32 d22, d18, d22
- .long 0xf2617fb7 // vrsqrts.f32 d23, d17, d23
- .long 0xf2608fb8 // vrsqrts.f32 d24, d16, d24
- .long 0xf2422fbc // vrecps.f32 d18, d18, d28
- .long 0xf3433db6 // vmul.f32 d19, d19, d22
- .long 0xf3444db7 // vmul.f32 d20, d20, d23
- .long 0xf3455db8 // vmul.f32 d21, d21, d24
- .long 0xf34c2db2 // vmul.f32 d18, d28, d18
- .long 0xf3fb65a3 // vrsqrte.f32 d22, d19
- .long 0xf3fb75a4 // vrsqrte.f32 d23, d20
- .long 0xf3fb85a5 // vrsqrte.f32 d24, d21
- .long 0xf3469db6 // vmul.f32 d25, d22, d22
- .long 0xf347adb7 // vmul.f32 d26, d23, d23
- .long 0xf348bdb8 // vmul.f32 d27, d24, d24
- .long 0xf2633fb9 // vrsqrts.f32 d19, d19, d25
- .long 0xf2644fba // vrsqrts.f32 d20, d20, d26
- .long 0xf2655fbb // vrsqrts.f32 d21, d21, d27
- .long 0xf3463db3 // vmul.f32 d19, d22, d19
- .long 0xf3474db4 // vmul.f32 d20, d23, d20
- .long 0xf3485db5 // vmul.f32 d21, d24, d21
- .long 0xf3fb65a3 // vrsqrte.f32 d22, d19
- .long 0xf3fb75a4 // vrsqrte.f32 d23, d20
- .long 0xf3fb85a5 // vrsqrte.f32 d24, d21
- .long 0xf3469db6 // vmul.f32 d25, d22, d22
- .long 0xf347adb7 // vmul.f32 d26, d23, d23
- .long 0xf348bdb8 // vmul.f32 d27, d24, d24
- .long 0xf2633fb9 // vrsqrts.f32 d19, d19, d25
- .long 0xf2644fba // vrsqrts.f32 d20, d20, d26
- .long 0xf2655fbb // vrsqrts.f32 d21, d21, d27
- .long 0xf3463db3 // vmul.f32 d19, d22, d19
- .long 0xf3474db4 // vmul.f32 d20, d23, d20
- .long 0xf3485db5 // vmul.f32 d21, d24, d21
- .long 0xf3fb65a3 // vrsqrte.f32 d22, d19
- .long 0xf3fb75a4 // vrsqrte.f32 d23, d20
- .long 0xf3fb85a5 // vrsqrte.f32 d24, d21
- .long 0xf3469db6 // vmul.f32 d25, d22, d22
- .long 0xf347adb7 // vmul.f32 d26, d23, d23
- .long 0xf348bdb8 // vmul.f32 d27, d24, d24
- .long 0xf2633fb9 // vrsqrts.f32 d19, d19, d25
- .long 0xf2644fba // vrsqrts.f32 d20, d20, d26
- .long 0xf2655fbb // vrsqrts.f32 d21, d21, d27
- .long 0xf3463db3 // vmul.f32 d19, d22, d19
- .long 0xf3474db4 // vmul.f32 d20, d23, d20
- .long 0xf3485db5 // vmul.f32 d21, d24, d21
- .long 0xf3fb65a3 // vrsqrte.f32 d22, d19
- .long 0xf3fb75a4 // vrsqrte.f32 d23, d20
- .long 0xf3fb85a5 // vrsqrte.f32 d24, d21
- .long 0xf3432db2 // vmul.f32 d18, d19, d18
- .long 0xf3469db6 // vmul.f32 d25, d22, d22
- .long 0xf347adb7 // vmul.f32 d26, d23, d23
- .long 0xf348bdb8 // vmul.f32 d27, d24, d24
- .long 0xf2639fb9 // vrsqrts.f32 d25, d19, d25
- .long 0xf264afba // vrsqrts.f32 d26, d20, d26
- .long 0xf265bfbb // vrsqrts.f32 d27, d21, d27
- .long 0xf3466db9 // vmul.f32 d22, d22, d25
- .long 0xf3fb9521 // vrecpe.f32 d25, d17
- .long 0xf3477dba // vmul.f32 d23, d23, d26
- .long 0xf3fba520 // vrecpe.f32 d26, d16
- .long 0xf3488dbb // vmul.f32 d24, d24, d27
- .long 0xf2411fb9 // vrecps.f32 d17, d17, d25
- .long 0xf3fbb526 // vrecpe.f32 d27, d22
- .long 0xf3fbd527 // vrecpe.f32 d29, d23
- .long 0xf2400fba // vrecps.f32 d16, d16, d26
- .long 0xf3fbe528 // vrecpe.f32 d30, d24
- .long 0xf2466fbb // vrecps.f32 d22, d22, d27
- .long 0xf2477fbd // vrecps.f32 d23, d23, d29
- .long 0xf2488fbe // vrecps.f32 d24, d24, d30
- .long 0xf3491db1 // vmul.f32 d17, d25, d17
- .long 0xf34a0db0 // vmul.f32 d16, d26, d16
- .long 0xf34b6db6 // vmul.f32 d22, d27, d22
- .long 0xf3441db1 // vmul.f32 d17, d20, d17
- .long 0xf34d3db7 // vmul.f32 d19, d29, d23
- .long 0xf34e4db8 // vmul.f32 d20, d30, d24
- .long 0xf3450db0 // vmul.f32 d16, d21, d16
- .long 0xf3422db6 // vmul.f32 d18, d18, d22
+ .long 0xf2029da3 // vadd.f32 d9, d18, d19
+ .long 0xf2420130 // vand d16, d2, d16
+ .long 0xf2c3171f // vorr.i32 d17, #1056964608
+ .long 0xf2c3071f // vorr.i32 d16, #1056964608
+ .long 0xf201bda3 // vadd.f32 d11, d17, d19
+ .long 0xeeca8a29 // vdiv.f32 s17, s20, s19
+ .long 0xf200cda3 // vadd.f32 d12, d16, d19
+ .long 0xf3fb3600 // vcvt.f32.s32 d19, d0
+ .long 0xee8a8a09 // vdiv.f32 s16, s20, s18
+ .long 0xeeca9a2b // vdiv.f32 s19, s20, s23
+ .long 0xeeca0a2c // vdiv.f32 s1, s20, s25
+ .long 0xee8a9a0b // vdiv.f32 s18, s20, s22
+ .long 0xee8a0a0c // vdiv.f32 s0, s20, s24
+ .long 0xf3fb5602 // vcvt.f32.s32 d21, d2
+ .long 0xf3fb6601 // vcvt.f32.s32 d22, d1
+ .long 0xed9f1a5b // vldr s2, [pc, #364]
+ .long 0xf3433db4 // vmul.f32 d19, d19, d20
+ .long 0xf3422db7 // vmul.f32 d18, d18, d23
+ .long 0xf3455db4 // vmul.f32 d21, d21, d20
+ .long 0xf3464db4 // vmul.f32 d20, d22, d20
+ .long 0xeddf6b49 // vldr d22, [pc, #292]
+ .long 0xf2433da6 // vadd.f32 d19, d19, d22
+ .long 0xf3411db7 // vmul.f32 d17, d17, d23
+ .long 0xf3400db7 // vmul.f32 d16, d16, d23
+ .long 0xf2444da6 // vadd.f32 d20, d20, d22
+ .long 0xf2632da2 // vsub.f32 d18, d19, d18
+ .long 0xeddf3b45 // vldr d19, [pc, #276]
+ .long 0xf2455da6 // vadd.f32 d21, d21, d22
+ .long 0xf2c09010 // vmov.i32 d25, #0
+ .long 0xf2641da1 // vsub.f32 d17, d20, d17
+ .long 0xf2622d88 // vsub.f32 d18, d18, d8
+ .long 0xf2650da0 // vsub.f32 d16, d21, d16
+ .long 0xf2611d89 // vsub.f32 d17, d17, d9
+ .long 0xf3422db3 // vmul.f32 d18, d18, d19
+ .long 0xf2600d80 // vsub.f32 d16, d16, d0
.long 0xf3411db3 // vmul.f32 d17, d17, d19
- .long 0xf3400db4 // vmul.f32 d16, d16, d20
- .long 0xf2c03010 // vmov.i32 d19, #0
- .long 0xf2020fa3 // vmax.f32 d0, d18, d19
- .long 0xf2011fa3 // vmax.f32 d1, d17, d19
- .long 0xf2002fa3 // vmax.f32 d2, d16, d19
+ .long 0xf3400db3 // vmul.f32 d16, d16, d19
+ .long 0xf3fb3722 // vcvt.s32.f32 d19, d18
+ .long 0xf3fb4721 // vcvt.s32.f32 d20, d17
+ .long 0xf3fb5720 // vcvt.s32.f32 d21, d16
+ .long 0xf3fb3623 // vcvt.f32.s32 d19, d19
+ .long 0xf3fb4624 // vcvt.f32.s32 d20, d20
+ .long 0xf3fb5625 // vcvt.f32.s32 d21, d21
+ .long 0xf3636ea2 // vcgt.f32 d22, d19, d18
+ .long 0xf3647ea1 // vcgt.f32 d23, d20, d17
+ .long 0xf3658ea0 // vcgt.f32 d24, d21, d16
+ .long 0xf35a61b9 // vbsl d22, d26, d25
+ .long 0xf35a71b9 // vbsl d23, d26, d25
+ .long 0xf2633da6 // vsub.f32 d19, d19, d22
+ .long 0xeddf6b32 // vldr d22, [pc, #200]
+ .long 0xf2644da7 // vsub.f32 d20, d20, d23
+ .long 0xeddf7b34 // vldr d23, [pc, #208]
+ .long 0xf35a81b9 // vbsl d24, d26, d25
+ .long 0xf2623da3 // vsub.f32 d19, d18, d19
+ .long 0xf2614da4 // vsub.f32 d20, d17, d20
+ .long 0xf2655da8 // vsub.f32 d21, d21, d24
+ .long 0xf2422da7 // vadd.f32 d18, d18, d23
+ .long 0xf2260da3 // vsub.f32 d0, d22, d19
+ .long 0xf2262da4 // vsub.f32 d2, d22, d20
+ .long 0xf2605da5 // vsub.f32 d21, d16, d21
+ .long 0xf2411da7 // vadd.f32 d17, d17, d23
+ .long 0xf2400da7 // vadd.f32 d16, d16, d23
+ .long 0xeec19a20 // vdiv.f32 s19, s2, s1
+ .long 0xee819a00 // vdiv.f32 s18, s2, s0
+ .long 0xeec10a22 // vdiv.f32 s1, s2, s5
+ .long 0xf2268da5 // vsub.f32 d8, d22, d21
+ .long 0xeddf6b23 // vldr d22, [pc, #140]
+ .long 0xee810a02 // vdiv.f32 s0, s2, s4
+ .long 0xeec12a28 // vdiv.f32 s5, s2, s17
+ .long 0xee812a08 // vdiv.f32 s4, s2, s16
+ .long 0xf3433db6 // vmul.f32 d19, d19, d22
+ .long 0xf3444db6 // vmul.f32 d20, d20, d22
+ .long 0xf3455db6 // vmul.f32 d21, d21, d22
+ .long 0xf2622da3 // vsub.f32 d18, d18, d19
+ .long 0xf2611da4 // vsub.f32 d17, d17, d20
+ .long 0xf2600da5 // vsub.f32 d16, d16, d21
+ .long 0xf2c4361b // vmov.i32 d19, #1258291200
+ .long 0xf2422d89 // vadd.f32 d18, d18, d9
+ .long 0xf2411d80 // vadd.f32 d17, d17, d0
+ .long 0xf2400d82 // vadd.f32 d16, d16, d2
+ .long 0xf2c3561f // vmov.i32 d21, #1056964608
+ .long 0xf2c3461f // vmov.i32 d20, #1056964608
+ .long 0xf2425cb3 // vfma.f32 d21, d18, d19
+ .long 0xf2c3261f // vmov.i32 d18, #1056964608
+ .long 0xf2412cb3 // vfma.f32 d18, d17, d19
+ .long 0xf2404cb3 // vfma.f32 d20, d16, d19
+ .long 0xf3bb07a5 // vcvt.u32.f32 d0, d21
+ .long 0xf3bb17a2 // vcvt.u32.f32 d1, d18
+ .long 0xf3bb27a4 // vcvt.u32.f32 d2, d20
+ .long 0xecbd8b0a // vpop {d8-d12}
.long 0xe12fff13 // bx r3
- .long 0xe320f000 // nop {0}
+ .long 0x3eb444f9 // .word 0x3eb444f9
+ .long 0x3eb444f9 // .word 0x3eb444f9
+ .long 0x3fbfbf75 // .word 0x3fbfbf75
+ .long 0x3fbfbf75 // .word 0x3fbfbf75
+ .long 0xc2f87377 // .word 0xc2f87377
+ .long 0xc2f87377 // .word 0xc2f87377
+ .long 0x3ee8ba2e // .word 0x3ee8ba2e
+ .long 0x3ee8ba2e // .word 0x3ee8ba2e
+ .long 0x3f800000 // .word 0x3f800000
+ .long 0x3f800000 // .word 0x3f800000
+ .long 0x409af5f8 // .word 0x409af5f8
+ .long 0x409af5f8 // .word 0x409af5f8
+ .long 0x3fbebc8d // .word 0x3fbebc8d
+ .long 0x3fbebc8d // .word 0x3fbebc8d
+ .long 0x42f28c51 // .word 0x42f28c51
+ .long 0x42f28c51 // .word 0x42f28c51
+ .long 0x3fdce9a3 // .word 0x3fdce9a3
+ .long 0x41ddd2fe // .word 0x41ddd2fe
HIDDEN _sk_rgb_to_hsl_vfp4
.globl _sk_rgb_to_hsl_vfp4
@@ -7217,7 +7242,7 @@ _sk_linear_gradient_vfp4:
.long 0xe494c00c // ldr ip, [r4], #12
.long 0xf4a41c9f // vld1.32 {d1[]}, [r4 :32]
.long 0xe35c0000 // cmp ip, #0
- .long 0x0a000036 // beq 3080 <sk_linear_gradient_vfp4+0x110>
+ .long 0x0a000036 // beq 3130 <sk_linear_gradient_vfp4+0x110>
.long 0xe59e3004 // ldr r3, [lr, #4]
.long 0xf2c01010 // vmov.i32 d17, #0
.long 0xf2c07010 // vmov.i32 d23, #0
@@ -7267,12 +7292,12 @@ _sk_linear_gradient_vfp4:
.long 0xf26371b3 // vorr d23, d19, d19
.long 0xf26481b4 // vorr d24, d20, d20
.long 0xf26561b5 // vorr d22, d21, d21
- .long 0x1affffd3 // bne 2fbc <sk_linear_gradient_vfp4+0x4c>
+ .long 0x1affffd3 // bne 306c <sk_linear_gradient_vfp4+0x4c>
.long 0xf26c01bc // vorr d16, d28, d28
.long 0xf22b11bb // vorr d1, d27, d27
.long 0xf22a21ba // vorr d2, d26, d26
.long 0xf22931b9 // vorr d3, d25, d25
- .long 0xea000003 // b 3090 <sk_linear_gradient_vfp4+0x120>
+ .long 0xea000003 // b 3140 <sk_linear_gradient_vfp4+0x120>
.long 0xf2c05010 // vmov.i32 d21, #0
.long 0xf2c04010 // vmov.i32 d20, #0
.long 0xf2c03010 // vmov.i32 d19, #0
@@ -8656,85 +8681,232 @@ HIDDEN _sk_from_2dot2_hsw
.globl _sk_from_2dot2_hsw
FUNCTION(_sk_from_2dot2_hsw)
_sk_from_2dot2_hsw:
- .byte 197,124,82,192 // vrsqrtps %ymm0,%ymm8
- .byte 196,65,124,82,192 // vrsqrtps %ymm8,%ymm8
- .byte 196,65,124,82,192 // vrsqrtps %ymm8,%ymm8
- .byte 196,65,124,82,192 // vrsqrtps %ymm8,%ymm8
- .byte 196,65,124,82,200 // vrsqrtps %ymm8,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 197,252,89,192 // vmulps %ymm0,%ymm0,%ymm0
- .byte 196,65,60,89,208 // vmulps %ymm8,%ymm8,%ymm10
- .byte 196,65,60,89,194 // vmulps %ymm10,%ymm8,%ymm8
- .byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
- .byte 197,180,89,192 // vmulps %ymm0,%ymm9,%ymm0
- .byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
- .byte 196,193,124,95,192 // vmaxps %ymm8,%ymm0,%ymm0
- .byte 197,124,82,201 // vrsqrtps %ymm1,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 196,65,124,82,209 // vrsqrtps %ymm9,%ymm10
- .byte 196,65,124,82,210 // vrsqrtps %ymm10,%ymm10
- .byte 197,244,89,201 // vmulps %ymm1,%ymm1,%ymm1
- .byte 196,65,52,89,217 // vmulps %ymm9,%ymm9,%ymm11
- .byte 196,65,52,89,203 // vmulps %ymm11,%ymm9,%ymm9
- .byte 196,193,116,89,201 // vmulps %ymm9,%ymm1,%ymm1
- .byte 197,172,89,201 // vmulps %ymm1,%ymm10,%ymm1
- .byte 196,193,116,95,200 // vmaxps %ymm8,%ymm1,%ymm1
- .byte 197,124,82,202 // vrsqrtps %ymm2,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 196,65,124,82,209 // vrsqrtps %ymm9,%ymm10
- .byte 196,65,124,82,210 // vrsqrtps %ymm10,%ymm10
- .byte 197,236,89,210 // vmulps %ymm2,%ymm2,%ymm2
- .byte 196,65,52,89,217 // vmulps %ymm9,%ymm9,%ymm11
- .byte 196,65,52,89,203 // vmulps %ymm11,%ymm9,%ymm9
- .byte 196,193,108,89,209 // vmulps %ymm9,%ymm2,%ymm2
- .byte 197,172,89,210 // vmulps %ymm2,%ymm10,%ymm2
- .byte 196,193,108,95,208 // vmaxps %ymm8,%ymm2,%ymm2
- .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 72,131,236,88 // sub $0x58,%rsp
+ .byte 197,252,17,124,36,32 // vmovups %ymm7,0x20(%rsp)
+ .byte 197,252,17,52,36 // vmovups %ymm6,(%rsp)
+ .byte 197,252,17,108,36,224 // vmovups %ymm5,-0x20(%rsp)
+ .byte 197,252,17,100,36,192 // vmovups %ymm4,-0x40(%rsp)
+ .byte 197,252,17,92,36,160 // vmovups %ymm3,-0x60(%rsp)
+ .byte 197,124,40,225 // vmovaps %ymm1,%ymm12
+ .byte 65,184,205,204,12,64 // mov $0x400ccccd,%r8d
+ .byte 197,124,91,208 // vcvtdq2ps %ymm0,%ymm10
+ .byte 184,0,0,0,52 // mov $0x34000000,%eax
+ .byte 197,121,110,192 // vmovd %eax,%xmm8
+ .byte 196,66,125,88,216 // vpbroadcastd %xmm8,%ymm11
+ .byte 184,255,255,127,0 // mov $0x7fffff,%eax
+ .byte 197,121,110,192 // vmovd %eax,%xmm8
+ .byte 196,194,125,88,216 // vpbroadcastd %xmm8,%ymm3
+ .byte 197,254,127,92,36,128 // vmovdqu %ymm3,-0x80(%rsp)
+ .byte 197,101,219,200 // vpand %ymm0,%ymm3,%ymm9
+ .byte 184,0,0,0,63 // mov $0x3f000000,%eax
+ .byte 197,249,110,192 // vmovd %eax,%xmm0
+ .byte 196,98,125,88,248 // vpbroadcastd %xmm0,%ymm15
+ .byte 196,193,53,235,223 // vpor %ymm15,%ymm9,%ymm3
+ .byte 184,119,115,248,66 // mov $0x42f87377,%eax
+ .byte 197,249,110,192 // vmovd %eax,%xmm0
+ .byte 196,98,125,88,232 // vpbroadcastd %xmm0,%ymm13
+ .byte 196,66,37,170,213 // vfmsub213ps %ymm13,%ymm11,%ymm10
+ .byte 184,117,191,191,63 // mov $0x3fbfbf75,%eax
+ .byte 197,249,110,192 // vmovd %eax,%xmm0
+ .byte 196,98,125,88,200 // vpbroadcastd %xmm0,%ymm9
+ .byte 196,66,101,188,209 // vfnmadd231ps %ymm9,%ymm3,%ymm10
+ .byte 184,163,233,220,63 // mov $0x3fdce9a3,%eax
+ .byte 196,65,124,91,244 // vcvtdq2ps %ymm12,%ymm14
+ .byte 196,66,37,170,245 // vfmsub213ps %ymm13,%ymm11,%ymm14
+ .byte 197,252,91,202 // vcvtdq2ps %ymm2,%ymm1
+ .byte 197,124,40,194 // vmovaps %ymm2,%ymm8
+ .byte 196,194,37,170,205 // vfmsub213ps %ymm13,%ymm11,%ymm1
+ .byte 197,249,110,192 // vmovd %eax,%xmm0
+ .byte 196,226,125,88,192 // vpbroadcastd %xmm0,%ymm0
+ .byte 184,249,68,180,62 // mov $0x3eb444f9,%eax
+ .byte 197,249,110,248 // vmovd %eax,%xmm7
+ .byte 196,226,125,88,255 // vpbroadcastd %xmm7,%ymm7
+ .byte 197,100,88,223 // vaddps %ymm7,%ymm3,%ymm11
+ .byte 196,65,124,94,219 // vdivps %ymm11,%ymm0,%ymm11
+ .byte 196,65,44,92,211 // vsubps %ymm11,%ymm10,%ymm10
+ .byte 196,193,121,110,240 // vmovd %r8d,%xmm6
+ .byte 196,226,125,88,246 // vpbroadcastd %xmm6,%ymm6
+ .byte 196,65,76,89,210 // vmulps %ymm10,%ymm6,%ymm10
+ .byte 196,67,125,8,218,1 // vroundps $0x1,%ymm10,%ymm11
+ .byte 196,65,44,92,219 // vsubps %ymm11,%ymm10,%ymm11
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 184,81,140,242,66 // mov $0x42f28c51,%eax
+ .byte 197,249,110,232 // vmovd %eax,%xmm5
+ .byte 196,226,125,88,237 // vpbroadcastd %xmm5,%ymm5
+ .byte 196,65,84,88,210 // vaddps %ymm10,%ymm5,%ymm10
+ .byte 184,141,188,190,63 // mov $0x3fbebc8d,%eax
+ .byte 197,249,110,224 // vmovd %eax,%xmm4
+ .byte 196,226,125,88,228 // vpbroadcastd %xmm4,%ymm4
+ .byte 196,66,93,188,211 // vfnmadd231ps %ymm11,%ymm4,%ymm10
+ .byte 184,254,210,221,65 // mov $0x41ddd2fe,%eax
+ .byte 197,249,110,216 // vmovd %eax,%xmm3
+ .byte 196,226,125,88,219 // vpbroadcastd %xmm3,%ymm3
+ .byte 184,248,245,154,64 // mov $0x409af5f8,%eax
+ .byte 197,249,110,208 // vmovd %eax,%xmm2
+ .byte 196,226,125,88,210 // vpbroadcastd %xmm2,%ymm2
+ .byte 196,65,108,92,219 // vsubps %ymm11,%ymm2,%ymm11
+ .byte 196,65,100,94,219 // vdivps %ymm11,%ymm3,%ymm11
+ .byte 196,65,44,88,211 // vaddps %ymm11,%ymm10,%ymm10
+ .byte 197,124,16,108,36,128 // vmovups -0x80(%rsp),%ymm13
+ .byte 196,65,20,84,220 // vandps %ymm12,%ymm13,%ymm11
+ .byte 196,65,36,86,223 // vorps %ymm15,%ymm11,%ymm11
+ .byte 196,66,37,188,241 // vfnmadd231ps %ymm9,%ymm11,%ymm14
+ .byte 197,36,88,223 // vaddps %ymm7,%ymm11,%ymm11
+ .byte 196,65,124,94,219 // vdivps %ymm11,%ymm0,%ymm11
+ .byte 196,65,12,92,219 // vsubps %ymm11,%ymm14,%ymm11
+ .byte 196,65,76,89,219 // vmulps %ymm11,%ymm6,%ymm11
+ .byte 196,67,125,8,227,1 // vroundps $0x1,%ymm11,%ymm12
+ .byte 196,65,36,92,228 // vsubps %ymm12,%ymm11,%ymm12
+ .byte 196,65,84,88,219 // vaddps %ymm11,%ymm5,%ymm11
+ .byte 196,66,93,188,220 // vfnmadd231ps %ymm12,%ymm4,%ymm11
+ .byte 196,65,108,92,228 // vsubps %ymm12,%ymm2,%ymm12
+ .byte 196,65,100,94,228 // vdivps %ymm12,%ymm3,%ymm12
+ .byte 196,65,36,88,220 // vaddps %ymm12,%ymm11,%ymm11
+ .byte 196,65,20,84,192 // vandps %ymm8,%ymm13,%ymm8
+ .byte 196,65,60,86,199 // vorps %ymm15,%ymm8,%ymm8
+ .byte 196,194,61,188,201 // vfnmadd231ps %ymm9,%ymm8,%ymm1
+ .byte 197,188,88,255 // vaddps %ymm7,%ymm8,%ymm7
+ .byte 197,252,94,199 // vdivps %ymm7,%ymm0,%ymm0
+ .byte 197,244,92,192 // vsubps %ymm0,%ymm1,%ymm0
+ .byte 197,204,89,192 // vmulps %ymm0,%ymm6,%ymm0
+ .byte 196,227,125,8,200,1 // vroundps $0x1,%ymm0,%ymm1
+ .byte 197,252,92,201 // vsubps %ymm1,%ymm0,%ymm1
+ .byte 197,212,88,192 // vaddps %ymm0,%ymm5,%ymm0
+ .byte 196,226,117,172,224 // vfnmadd213ps %ymm0,%ymm1,%ymm4
+ .byte 197,236,92,193 // vsubps %ymm1,%ymm2,%ymm0
+ .byte 197,228,94,192 // vdivps %ymm0,%ymm3,%ymm0
+ .byte 197,220,88,192 // vaddps %ymm0,%ymm4,%ymm0
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,226,125,88,201 // vpbroadcastd %xmm1,%ymm1
+ .byte 196,193,116,89,210 // vmulps %ymm10,%ymm1,%ymm2
+ .byte 196,193,116,89,219 // vmulps %ymm11,%ymm1,%ymm3
+ .byte 197,244,89,224 // vmulps %ymm0,%ymm1,%ymm4
+ .byte 197,253,91,194 // vcvtps2dq %ymm2,%ymm0
+ .byte 197,253,91,203 // vcvtps2dq %ymm3,%ymm1
+ .byte 197,253,91,212 // vcvtps2dq %ymm4,%ymm2
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 197,252,16,92,36,160 // vmovups -0x60(%rsp),%ymm3
+ .byte 197,252,16,100,36,192 // vmovups -0x40(%rsp),%ymm4
+ .byte 197,252,16,108,36,224 // vmovups -0x20(%rsp),%ymm5
+ .byte 197,252,16,52,36 // vmovups (%rsp),%ymm6
+ .byte 197,252,16,124,36,32 // vmovups 0x20(%rsp),%ymm7
+ .byte 72,131,196,88 // add $0x58,%rsp
.byte 255,224 // jmpq *%rax
HIDDEN _sk_to_2dot2_hsw
.globl _sk_to_2dot2_hsw
FUNCTION(_sk_to_2dot2_hsw)
_sk_to_2dot2_hsw:
- .byte 197,252,82,192 // vrsqrtps %ymm0,%ymm0
- .byte 197,124,82,192 // vrsqrtps %ymm0,%ymm8
- .byte 196,65,124,82,192 // vrsqrtps %ymm8,%ymm8
- .byte 196,65,124,82,192 // vrsqrtps %ymm8,%ymm8
- .byte 196,65,124,82,192 // vrsqrtps %ymm8,%ymm8
- .byte 196,65,124,82,200 // vrsqrtps %ymm8,%ymm9
- .byte 197,252,83,192 // vrcpps %ymm0,%ymm0
- .byte 197,188,89,192 // vmulps %ymm0,%ymm8,%ymm0
- .byte 196,65,124,83,193 // vrcpps %ymm9,%ymm8
- .byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
- .byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
- .byte 196,193,124,95,192 // vmaxps %ymm8,%ymm0,%ymm0
- .byte 197,252,82,201 // vrsqrtps %ymm1,%ymm1
- .byte 197,124,82,201 // vrsqrtps %ymm1,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 196,65,124,82,209 // vrsqrtps %ymm9,%ymm10
- .byte 197,252,83,201 // vrcpps %ymm1,%ymm1
- .byte 197,180,89,201 // vmulps %ymm1,%ymm9,%ymm1
- .byte 196,65,124,83,202 // vrcpps %ymm10,%ymm9
- .byte 196,193,116,89,201 // vmulps %ymm9,%ymm1,%ymm1
- .byte 196,193,116,95,200 // vmaxps %ymm8,%ymm1,%ymm1
- .byte 197,252,82,210 // vrsqrtps %ymm2,%ymm2
- .byte 197,124,82,202 // vrsqrtps %ymm2,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 196,65,124,82,209 // vrsqrtps %ymm9,%ymm10
- .byte 197,252,83,210 // vrcpps %ymm2,%ymm2
- .byte 197,180,89,210 // vmulps %ymm2,%ymm9,%ymm2
- .byte 196,65,124,83,202 // vrcpps %ymm10,%ymm9
- .byte 196,193,108,89,209 // vmulps %ymm9,%ymm2,%ymm2
- .byte 196,193,108,95,208 // vmaxps %ymm8,%ymm2,%ymm2
- .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 72,131,236,88 // sub $0x58,%rsp
+ .byte 197,252,17,124,36,32 // vmovups %ymm7,0x20(%rsp)
+ .byte 197,252,17,52,36 // vmovups %ymm6,(%rsp)
+ .byte 197,252,17,108,36,224 // vmovups %ymm5,-0x20(%rsp)
+ .byte 197,252,17,100,36,192 // vmovups %ymm4,-0x40(%rsp)
+ .byte 197,252,17,92,36,160 // vmovups %ymm3,-0x60(%rsp)
+ .byte 197,124,40,225 // vmovaps %ymm1,%ymm12
+ .byte 65,184,46,186,232,62 // mov $0x3ee8ba2e,%r8d
+ .byte 197,124,91,208 // vcvtdq2ps %ymm0,%ymm10
+ .byte 184,0,0,0,52 // mov $0x34000000,%eax
+ .byte 197,121,110,192 // vmovd %eax,%xmm8
+ .byte 196,66,125,88,216 // vpbroadcastd %xmm8,%ymm11
+ .byte 184,255,255,127,0 // mov $0x7fffff,%eax
+ .byte 197,121,110,192 // vmovd %eax,%xmm8
+ .byte 196,194,125,88,216 // vpbroadcastd %xmm8,%ymm3
+ .byte 197,254,127,92,36,128 // vmovdqu %ymm3,-0x80(%rsp)
+ .byte 197,101,219,200 // vpand %ymm0,%ymm3,%ymm9
+ .byte 184,0,0,0,63 // mov $0x3f000000,%eax
+ .byte 197,249,110,192 // vmovd %eax,%xmm0
+ .byte 196,98,125,88,248 // vpbroadcastd %xmm0,%ymm15
+ .byte 196,193,53,235,223 // vpor %ymm15,%ymm9,%ymm3
+ .byte 184,119,115,248,66 // mov $0x42f87377,%eax
+ .byte 197,249,110,192 // vmovd %eax,%xmm0
+ .byte 196,98,125,88,232 // vpbroadcastd %xmm0,%ymm13
+ .byte 196,66,37,170,213 // vfmsub213ps %ymm13,%ymm11,%ymm10
+ .byte 184,117,191,191,63 // mov $0x3fbfbf75,%eax
+ .byte 197,249,110,192 // vmovd %eax,%xmm0
+ .byte 196,98,125,88,200 // vpbroadcastd %xmm0,%ymm9
+ .byte 196,66,101,188,209 // vfnmadd231ps %ymm9,%ymm3,%ymm10
+ .byte 184,163,233,220,63 // mov $0x3fdce9a3,%eax
+ .byte 196,65,124,91,244 // vcvtdq2ps %ymm12,%ymm14
+ .byte 196,66,37,170,245 // vfmsub213ps %ymm13,%ymm11,%ymm14
+ .byte 197,252,91,202 // vcvtdq2ps %ymm2,%ymm1
+ .byte 197,124,40,194 // vmovaps %ymm2,%ymm8
+ .byte 196,194,37,170,205 // vfmsub213ps %ymm13,%ymm11,%ymm1
+ .byte 197,249,110,192 // vmovd %eax,%xmm0
+ .byte 196,226,125,88,192 // vpbroadcastd %xmm0,%ymm0
+ .byte 184,249,68,180,62 // mov $0x3eb444f9,%eax
+ .byte 197,249,110,248 // vmovd %eax,%xmm7
+ .byte 196,226,125,88,255 // vpbroadcastd %xmm7,%ymm7
+ .byte 197,100,88,223 // vaddps %ymm7,%ymm3,%ymm11
+ .byte 196,65,124,94,219 // vdivps %ymm11,%ymm0,%ymm11
+ .byte 196,65,44,92,211 // vsubps %ymm11,%ymm10,%ymm10
+ .byte 196,193,121,110,240 // vmovd %r8d,%xmm6
+ .byte 196,226,125,88,246 // vpbroadcastd %xmm6,%ymm6
+ .byte 196,65,76,89,210 // vmulps %ymm10,%ymm6,%ymm10
+ .byte 196,67,125,8,218,1 // vroundps $0x1,%ymm10,%ymm11
+ .byte 196,65,44,92,219 // vsubps %ymm11,%ymm10,%ymm11
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 184,81,140,242,66 // mov $0x42f28c51,%eax
+ .byte 197,249,110,232 // vmovd %eax,%xmm5
+ .byte 196,226,125,88,237 // vpbroadcastd %xmm5,%ymm5
+ .byte 196,65,84,88,210 // vaddps %ymm10,%ymm5,%ymm10
+ .byte 184,141,188,190,63 // mov $0x3fbebc8d,%eax
+ .byte 197,249,110,224 // vmovd %eax,%xmm4
+ .byte 196,226,125,88,228 // vpbroadcastd %xmm4,%ymm4
+ .byte 196,66,93,188,211 // vfnmadd231ps %ymm11,%ymm4,%ymm10
+ .byte 184,254,210,221,65 // mov $0x41ddd2fe,%eax
+ .byte 197,249,110,216 // vmovd %eax,%xmm3
+ .byte 196,226,125,88,219 // vpbroadcastd %xmm3,%ymm3
+ .byte 184,248,245,154,64 // mov $0x409af5f8,%eax
+ .byte 197,249,110,208 // vmovd %eax,%xmm2
+ .byte 196,226,125,88,210 // vpbroadcastd %xmm2,%ymm2
+ .byte 196,65,108,92,219 // vsubps %ymm11,%ymm2,%ymm11
+ .byte 196,65,100,94,219 // vdivps %ymm11,%ymm3,%ymm11
+ .byte 196,65,44,88,211 // vaddps %ymm11,%ymm10,%ymm10
+ .byte 197,124,16,108,36,128 // vmovups -0x80(%rsp),%ymm13
+ .byte 196,65,20,84,220 // vandps %ymm12,%ymm13,%ymm11
+ .byte 196,65,36,86,223 // vorps %ymm15,%ymm11,%ymm11
+ .byte 196,66,37,188,241 // vfnmadd231ps %ymm9,%ymm11,%ymm14
+ .byte 197,36,88,223 // vaddps %ymm7,%ymm11,%ymm11
+ .byte 196,65,124,94,219 // vdivps %ymm11,%ymm0,%ymm11
+ .byte 196,65,12,92,219 // vsubps %ymm11,%ymm14,%ymm11
+ .byte 196,65,76,89,219 // vmulps %ymm11,%ymm6,%ymm11
+ .byte 196,67,125,8,227,1 // vroundps $0x1,%ymm11,%ymm12
+ .byte 196,65,36,92,228 // vsubps %ymm12,%ymm11,%ymm12
+ .byte 196,65,84,88,219 // vaddps %ymm11,%ymm5,%ymm11
+ .byte 196,66,93,188,220 // vfnmadd231ps %ymm12,%ymm4,%ymm11
+ .byte 196,65,108,92,228 // vsubps %ymm12,%ymm2,%ymm12
+ .byte 196,65,100,94,228 // vdivps %ymm12,%ymm3,%ymm12
+ .byte 196,65,36,88,220 // vaddps %ymm12,%ymm11,%ymm11
+ .byte 196,65,20,84,192 // vandps %ymm8,%ymm13,%ymm8
+ .byte 196,65,60,86,199 // vorps %ymm15,%ymm8,%ymm8
+ .byte 196,194,61,188,201 // vfnmadd231ps %ymm9,%ymm8,%ymm1
+ .byte 197,188,88,255 // vaddps %ymm7,%ymm8,%ymm7
+ .byte 197,252,94,199 // vdivps %ymm7,%ymm0,%ymm0
+ .byte 197,244,92,192 // vsubps %ymm0,%ymm1,%ymm0
+ .byte 197,204,89,192 // vmulps %ymm0,%ymm6,%ymm0
+ .byte 196,227,125,8,200,1 // vroundps $0x1,%ymm0,%ymm1
+ .byte 197,252,92,201 // vsubps %ymm1,%ymm0,%ymm1
+ .byte 197,212,88,192 // vaddps %ymm0,%ymm5,%ymm0
+ .byte 196,226,117,172,224 // vfnmadd213ps %ymm0,%ymm1,%ymm4
+ .byte 197,236,92,193 // vsubps %ymm1,%ymm2,%ymm0
+ .byte 197,228,94,192 // vdivps %ymm0,%ymm3,%ymm0
+ .byte 197,220,88,192 // vaddps %ymm0,%ymm4,%ymm0
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,226,125,88,201 // vpbroadcastd %xmm1,%ymm1
+ .byte 196,193,116,89,210 // vmulps %ymm10,%ymm1,%ymm2
+ .byte 196,193,116,89,219 // vmulps %ymm11,%ymm1,%ymm3
+ .byte 197,244,89,224 // vmulps %ymm0,%ymm1,%ymm4
+ .byte 197,253,91,194 // vcvtps2dq %ymm2,%ymm0
+ .byte 197,253,91,203 // vcvtps2dq %ymm3,%ymm1
+ .byte 197,253,91,212 // vcvtps2dq %ymm4,%ymm2
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 197,252,16,92,36,160 // vmovups -0x60(%rsp),%ymm3
+ .byte 197,252,16,100,36,192 // vmovups -0x40(%rsp),%ymm4
+ .byte 197,252,16,108,36,224 // vmovups -0x20(%rsp),%ymm5
+ .byte 197,252,16,52,36 // vmovups (%rsp),%ymm6
+ .byte 197,252,16,124,36,32 // vmovups 0x20(%rsp),%ymm7
+ .byte 72,131,196,88 // add $0x58,%rsp
.byte 255,224 // jmpq *%rax
HIDDEN _sk_rgb_to_hsl_hsw
@@ -8930,7 +9102,7 @@ _sk_scale_u8_hsw:
.byte 72,139,0 // mov (%rax),%rax
.byte 72,1,248 // add %rdi,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 117,56 // jne 11bc <_sk_scale_u8_hsw+0x48>
+ .byte 117,56 // jne 148c <_sk_scale_u8_hsw+0x48>
.byte 197,122,126,0 // vmovq (%rax),%xmm8
.byte 196,66,125,49,192 // vpmovzxbd %xmm8,%ymm8
.byte 196,65,124,91,192 // vcvtdq2ps %ymm8,%ymm8
@@ -8954,9 +9126,9 @@ _sk_scale_u8_hsw:
.byte 77,9,217 // or %r11,%r9
.byte 72,131,193,8 // add $0x8,%rcx
.byte 73,255,202 // dec %r10
- .byte 117,234 // jne 11c4 <_sk_scale_u8_hsw+0x50>
+ .byte 117,234 // jne 1494 <_sk_scale_u8_hsw+0x50>
.byte 196,65,249,110,193 // vmovq %r9,%xmm8
- .byte 235,167 // jmp 1188 <_sk_scale_u8_hsw+0x14>
+ .byte 235,167 // jmp 1458 <_sk_scale_u8_hsw+0x14>
HIDDEN _sk_lerp_1_float_hsw
.globl _sk_lerp_1_float_hsw
@@ -8984,7 +9156,7 @@ _sk_lerp_u8_hsw:
.byte 72,139,0 // mov (%rax),%rax
.byte 72,1,248 // add %rdi,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 117,76 // jne 126c <_sk_lerp_u8_hsw+0x5c>
+ .byte 117,76 // jne 153c <_sk_lerp_u8_hsw+0x5c>
.byte 197,122,126,0 // vmovq (%rax),%xmm8
.byte 196,66,125,49,192 // vpmovzxbd %xmm8,%ymm8
.byte 196,65,124,91,192 // vcvtdq2ps %ymm8,%ymm8
@@ -9012,9 +9184,9 @@ _sk_lerp_u8_hsw:
.byte 77,9,217 // or %r11,%r9
.byte 72,131,193,8 // add $0x8,%rcx
.byte 73,255,202 // dec %r10
- .byte 117,234 // jne 1274 <_sk_lerp_u8_hsw+0x64>
+ .byte 117,234 // jne 1544 <_sk_lerp_u8_hsw+0x64>
.byte 196,65,249,110,193 // vmovq %r9,%xmm8
- .byte 235,147 // jmp 1224 <_sk_lerp_u8_hsw+0x14>
+ .byte 235,147 // jmp 14f4 <_sk_lerp_u8_hsw+0x14>
HIDDEN _sk_lerp_565_hsw
.globl _sk_lerp_565_hsw
@@ -9023,7 +9195,7 @@ _sk_lerp_565_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,179,0,0,0 // jne 1352 <_sk_lerp_565_hsw+0xc1>
+ .byte 15,133,179,0,0,0 // jne 1622 <_sk_lerp_565_hsw+0xc1>
.byte 196,193,122,111,28,122 // vmovdqu (%r10,%rdi,2),%xmm3
.byte 196,98,125,51,195 // vpmovzxwd %xmm3,%ymm8
.byte 184,0,248,0,0 // mov $0xf800,%eax
@@ -9069,9 +9241,9 @@ _sk_lerp_565_hsw:
.byte 197,225,239,219 // vpxor %xmm3,%xmm3,%xmm3
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 15,135,59,255,255,255 // ja 12a5 <_sk_lerp_565_hsw+0x14>
+ .byte 15,135,59,255,255,255 // ja 1575 <_sk_lerp_565_hsw+0x14>
.byte 69,15,182,192 // movzbl %r8b,%r8d
- .byte 76,141,13,75,0,0,0 // lea 0x4b(%rip),%r9 # 13c0 <_sk_lerp_565_hsw+0x12f>
+ .byte 76,141,13,75,0,0,0 // lea 0x4b(%rip),%r9 # 1690 <_sk_lerp_565_hsw+0x12f>
.byte 75,99,4,129 // movslq (%r9,%r8,4),%rax
.byte 76,1,200 // add %r9,%rax
.byte 255,224 // jmpq *%rax
@@ -9083,7 +9255,7 @@ _sk_lerp_565_hsw:
.byte 196,193,97,196,92,122,4,2 // vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm3,%xmm3
.byte 196,193,97,196,92,122,2,1 // vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm3,%xmm3
.byte 196,193,97,196,28,122,0 // vpinsrw $0x0,(%r10,%rdi,2),%xmm3,%xmm3
- .byte 233,231,254,255,255 // jmpq 12a5 <_sk_lerp_565_hsw+0x14>
+ .byte 233,231,254,255,255 // jmpq 1575 <_sk_lerp_565_hsw+0x14>
.byte 102,144 // xchg %ax,%ax
.byte 242,255 // repnz (bad)
.byte 255 // (bad)
@@ -9118,7 +9290,7 @@ _sk_load_tables_hsw:
.byte 76,141,12,189,0,0,0,0 // lea 0x0(,%rdi,4),%r9
.byte 76,3,8 // add (%rax),%r9
.byte 77,133,192 // test %r8,%r8
- .byte 117,121 // jne 146a <_sk_load_tables_hsw+0x8e>
+ .byte 117,121 // jne 173a <_sk_load_tables_hsw+0x8e>
.byte 196,193,126,111,25 // vmovdqu (%r9),%ymm3
.byte 185,255,0,0,0 // mov $0xff,%ecx
.byte 197,249,110,193 // vmovd %ecx,%xmm0
@@ -9154,7 +9326,7 @@ _sk_load_tables_hsw:
.byte 196,193,249,110,194 // vmovq %r10,%xmm0
.byte 196,226,125,33,192 // vpmovsxbd %xmm0,%ymm0
.byte 196,194,125,140,25 // vpmaskmovd (%r9),%ymm0,%ymm3
- .byte 233,99,255,255,255 // jmpq 13f6 <_sk_load_tables_hsw+0x1a>
+ .byte 233,99,255,255,255 // jmpq 16c6 <_sk_load_tables_hsw+0x1a>
HIDDEN _sk_load_tables_u16_be_hsw
.globl _sk_load_tables_u16_be_hsw
@@ -9164,7 +9336,7 @@ _sk_load_tables_u16_be_hsw:
.byte 76,139,0 // mov (%rax),%r8
.byte 76,141,12,189,0,0,0,0 // lea 0x0(,%rdi,4),%r9
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,216,0,0,0 // jne 1581 <_sk_load_tables_u16_be_hsw+0xee>
+ .byte 15,133,216,0,0,0 // jne 1851 <_sk_load_tables_u16_be_hsw+0xee>
.byte 196,1,121,16,4,72 // vmovupd (%r8,%r9,2),%xmm8
.byte 196,129,121,16,84,72,16 // vmovupd 0x10(%r8,%r9,2),%xmm2
.byte 196,129,121,16,92,72,32 // vmovupd 0x20(%r8,%r9,2),%xmm3
@@ -9213,29 +9385,29 @@ _sk_load_tables_u16_be_hsw:
.byte 196,1,123,16,4,72 // vmovsd (%r8,%r9,2),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,85 // je 15e7 <_sk_load_tables_u16_be_hsw+0x154>
+ .byte 116,85 // je 18b7 <_sk_load_tables_u16_be_hsw+0x154>
.byte 196,1,57,22,68,72,8 // vmovhpd 0x8(%r8,%r9,2),%xmm8,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,72 // jb 15e7 <_sk_load_tables_u16_be_hsw+0x154>
+ .byte 114,72 // jb 18b7 <_sk_load_tables_u16_be_hsw+0x154>
.byte 196,129,123,16,84,72,16 // vmovsd 0x10(%r8,%r9,2),%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 116,72 // je 15f4 <_sk_load_tables_u16_be_hsw+0x161>
+ .byte 116,72 // je 18c4 <_sk_load_tables_u16_be_hsw+0x161>
.byte 196,129,105,22,84,72,24 // vmovhpd 0x18(%r8,%r9,2),%xmm2,%xmm2
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,59 // jb 15f4 <_sk_load_tables_u16_be_hsw+0x161>
+ .byte 114,59 // jb 18c4 <_sk_load_tables_u16_be_hsw+0x161>
.byte 196,129,123,16,92,72,32 // vmovsd 0x20(%r8,%r9,2),%xmm3
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 15,132,250,254,255,255 // je 14c4 <_sk_load_tables_u16_be_hsw+0x31>
+ .byte 15,132,250,254,255,255 // je 1794 <_sk_load_tables_u16_be_hsw+0x31>
.byte 196,129,97,22,92,72,40 // vmovhpd 0x28(%r8,%r9,2),%xmm3,%xmm3
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 15,130,233,254,255,255 // jb 14c4 <_sk_load_tables_u16_be_hsw+0x31>
+ .byte 15,130,233,254,255,255 // jb 1794 <_sk_load_tables_u16_be_hsw+0x31>
.byte 196,1,122,126,76,72,48 // vmovq 0x30(%r8,%r9,2),%xmm9
- .byte 233,221,254,255,255 // jmpq 14c4 <_sk_load_tables_u16_be_hsw+0x31>
+ .byte 233,221,254,255,255 // jmpq 1794 <_sk_load_tables_u16_be_hsw+0x31>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
.byte 197,233,87,210 // vxorpd %xmm2,%xmm2,%xmm2
- .byte 233,208,254,255,255 // jmpq 14c4 <_sk_load_tables_u16_be_hsw+0x31>
+ .byte 233,208,254,255,255 // jmpq 1794 <_sk_load_tables_u16_be_hsw+0x31>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
- .byte 233,199,254,255,255 // jmpq 14c4 <_sk_load_tables_u16_be_hsw+0x31>
+ .byte 233,199,254,255,255 // jmpq 1794 <_sk_load_tables_u16_be_hsw+0x31>
HIDDEN _sk_load_tables_rgb_u16_be_hsw
.globl _sk_load_tables_rgb_u16_be_hsw
@@ -9245,7 +9417,7 @@ _sk_load_tables_rgb_u16_be_hsw:
.byte 76,139,0 // mov (%rax),%r8
.byte 76,141,12,127 // lea (%rdi,%rdi,2),%r9
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,207,0,0,0 // jne 16de <_sk_load_tables_rgb_u16_be_hsw+0xe1>
+ .byte 15,133,207,0,0,0 // jne 19ae <_sk_load_tables_rgb_u16_be_hsw+0xe1>
.byte 196,129,122,111,4,72 // vmovdqu (%r8,%r9,2),%xmm0
.byte 196,129,122,111,84,72,12 // vmovdqu 0xc(%r8,%r9,2),%xmm2
.byte 196,129,122,111,76,72,24 // vmovdqu 0x18(%r8,%r9,2),%xmm1
@@ -9292,36 +9464,36 @@ _sk_load_tables_rgb_u16_be_hsw:
.byte 196,129,121,110,4,72 // vmovd (%r8,%r9,2),%xmm0
.byte 196,129,121,196,68,72,4,2 // vpinsrw $0x2,0x4(%r8,%r9,2),%xmm0,%xmm0
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 117,5 // jne 16f7 <_sk_load_tables_rgb_u16_be_hsw+0xfa>
- .byte 233,76,255,255,255 // jmpq 1643 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ .byte 117,5 // jne 19c7 <_sk_load_tables_rgb_u16_be_hsw+0xfa>
+ .byte 233,76,255,255,255 // jmpq 1913 <_sk_load_tables_rgb_u16_be_hsw+0x46>
.byte 196,129,121,110,76,72,6 // vmovd 0x6(%r8,%r9,2),%xmm1
.byte 196,1,113,196,68,72,10,2 // vpinsrw $0x2,0xa(%r8,%r9,2),%xmm1,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,26 // jb 1726 <_sk_load_tables_rgb_u16_be_hsw+0x129>
+ .byte 114,26 // jb 19f6 <_sk_load_tables_rgb_u16_be_hsw+0x129>
.byte 196,129,121,110,76,72,12 // vmovd 0xc(%r8,%r9,2),%xmm1
.byte 196,129,113,196,84,72,16,2 // vpinsrw $0x2,0x10(%r8,%r9,2),%xmm1,%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 117,10 // jne 172b <_sk_load_tables_rgb_u16_be_hsw+0x12e>
- .byte 233,29,255,255,255 // jmpq 1643 <_sk_load_tables_rgb_u16_be_hsw+0x46>
- .byte 233,24,255,255,255 // jmpq 1643 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ .byte 117,10 // jne 19fb <_sk_load_tables_rgb_u16_be_hsw+0x12e>
+ .byte 233,29,255,255,255 // jmpq 1913 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ .byte 233,24,255,255,255 // jmpq 1913 <_sk_load_tables_rgb_u16_be_hsw+0x46>
.byte 196,129,121,110,76,72,18 // vmovd 0x12(%r8,%r9,2),%xmm1
.byte 196,1,113,196,76,72,22,2 // vpinsrw $0x2,0x16(%r8,%r9,2),%xmm1,%xmm9
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,26 // jb 175a <_sk_load_tables_rgb_u16_be_hsw+0x15d>
+ .byte 114,26 // jb 1a2a <_sk_load_tables_rgb_u16_be_hsw+0x15d>
.byte 196,129,121,110,76,72,24 // vmovd 0x18(%r8,%r9,2),%xmm1
.byte 196,129,113,196,76,72,28,2 // vpinsrw $0x2,0x1c(%r8,%r9,2),%xmm1,%xmm1
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 117,10 // jne 175f <_sk_load_tables_rgb_u16_be_hsw+0x162>
- .byte 233,233,254,255,255 // jmpq 1643 <_sk_load_tables_rgb_u16_be_hsw+0x46>
- .byte 233,228,254,255,255 // jmpq 1643 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ .byte 117,10 // jne 1a2f <_sk_load_tables_rgb_u16_be_hsw+0x162>
+ .byte 233,233,254,255,255 // jmpq 1913 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ .byte 233,228,254,255,255 // jmpq 1913 <_sk_load_tables_rgb_u16_be_hsw+0x46>
.byte 196,129,121,110,92,72,30 // vmovd 0x1e(%r8,%r9,2),%xmm3
.byte 196,1,97,196,92,72,34,2 // vpinsrw $0x2,0x22(%r8,%r9,2),%xmm3,%xmm11
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,20 // jb 1788 <_sk_load_tables_rgb_u16_be_hsw+0x18b>
+ .byte 114,20 // jb 1a58 <_sk_load_tables_rgb_u16_be_hsw+0x18b>
.byte 196,129,121,110,92,72,36 // vmovd 0x24(%r8,%r9,2),%xmm3
.byte 196,129,97,196,92,72,40,2 // vpinsrw $0x2,0x28(%r8,%r9,2),%xmm3,%xmm3
- .byte 233,187,254,255,255 // jmpq 1643 <_sk_load_tables_rgb_u16_be_hsw+0x46>
- .byte 233,182,254,255,255 // jmpq 1643 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ .byte 233,187,254,255,255 // jmpq 1913 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ .byte 233,182,254,255,255 // jmpq 1913 <_sk_load_tables_rgb_u16_be_hsw+0x46>
HIDDEN _sk_byte_tables_hsw
.globl _sk_byte_tables_hsw
@@ -10084,7 +10256,7 @@ _sk_load_a8_hsw:
.byte 72,139,0 // mov (%rax),%rax
.byte 72,1,248 // add %rdi,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 117,50 // jne 2485 <_sk_load_a8_hsw+0x42>
+ .byte 117,50 // jne 2755 <_sk_load_a8_hsw+0x42>
.byte 197,250,126,0 // vmovq (%rax),%xmm0
.byte 196,226,125,49,192 // vpmovzxbd %xmm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
@@ -10107,9 +10279,9 @@ _sk_load_a8_hsw:
.byte 77,9,217 // or %r11,%r9
.byte 72,131,193,8 // add $0x8,%rcx
.byte 73,255,202 // dec %r10
- .byte 117,234 // jne 248d <_sk_load_a8_hsw+0x4a>
+ .byte 117,234 // jne 275d <_sk_load_a8_hsw+0x4a>
.byte 196,193,249,110,193 // vmovq %r9,%xmm0
- .byte 235,173 // jmp 2457 <_sk_load_a8_hsw+0x14>
+ .byte 235,173 // jmp 2727 <_sk_load_a8_hsw+0x14>
HIDDEN _sk_gather_a8_hsw
.globl _sk_gather_a8_hsw
@@ -10184,7 +10356,7 @@ _sk_store_a8_hsw:
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 196,65,57,103,192 // vpackuswb %xmm8,%xmm8,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 25c2 <_sk_store_a8_hsw+0x3b>
+ .byte 117,10 // jne 2892 <_sk_store_a8_hsw+0x3b>
.byte 196,65,123,17,4,57 // vmovsd %xmm8,(%r9,%rdi,1)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -10192,10 +10364,10 @@ _sk_store_a8_hsw:
.byte 65,128,224,7 // and $0x7,%r8b
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 25be <_sk_store_a8_hsw+0x37>
+ .byte 119,236 // ja 288e <_sk_store_a8_hsw+0x37>
.byte 196,66,121,48,192 // vpmovzxbw %xmm8,%xmm8
.byte 65,15,182,192 // movzbl %r8b,%eax
- .byte 76,141,5,66,0,0,0 // lea 0x42(%rip),%r8 # 2624 <_sk_store_a8_hsw+0x9d>
+ .byte 76,141,5,66,0,0,0 // lea 0x42(%rip),%r8 # 28f4 <_sk_store_a8_hsw+0x9d>
.byte 73,99,4,128 // movslq (%r8,%rax,4),%rax
.byte 76,1,192 // add %r8,%rax
.byte 255,224 // jmpq *%rax
@@ -10206,7 +10378,7 @@ _sk_store_a8_hsw:
.byte 196,67,121,20,68,57,2,4 // vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1)
.byte 196,67,121,20,68,57,1,2 // vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1)
.byte 196,67,121,20,4,57,0 // vpextrb $0x0,%xmm8,(%r9,%rdi,1)
- .byte 235,154 // jmp 25be <_sk_store_a8_hsw+0x37>
+ .byte 235,154 // jmp 288e <_sk_store_a8_hsw+0x37>
.byte 247,255 // idiv %edi
.byte 255 // (bad)
.byte 255 // (bad)
@@ -10239,7 +10411,7 @@ _sk_load_g8_hsw:
.byte 72,139,0 // mov (%rax),%rax
.byte 72,1,248 // add %rdi,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 117,60 // jne 268c <_sk_load_g8_hsw+0x4c>
+ .byte 117,60 // jne 295c <_sk_load_g8_hsw+0x4c>
.byte 197,250,126,0 // vmovq (%rax),%xmm0
.byte 196,226,125,49,192 // vpmovzxbd %xmm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
@@ -10264,9 +10436,9 @@ _sk_load_g8_hsw:
.byte 77,9,217 // or %r11,%r9
.byte 72,131,193,8 // add $0x8,%rcx
.byte 73,255,202 // dec %r10
- .byte 117,234 // jne 2694 <_sk_load_g8_hsw+0x54>
+ .byte 117,234 // jne 2964 <_sk_load_g8_hsw+0x54>
.byte 196,193,249,110,193 // vmovq %r9,%xmm0
- .byte 235,163 // jmp 2654 <_sk_load_g8_hsw+0x14>
+ .byte 235,163 // jmp 2924 <_sk_load_g8_hsw+0x14>
HIDDEN _sk_gather_g8_hsw
.globl _sk_gather_g8_hsw
@@ -10335,9 +10507,9 @@ _sk_gather_i8_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 73,137,192 // mov %rax,%r8
.byte 77,133,192 // test %r8,%r8
- .byte 116,5 // je 27a7 <_sk_gather_i8_hsw+0xf>
+ .byte 116,5 // je 2a77 <_sk_gather_i8_hsw+0xf>
.byte 76,137,192 // mov %r8,%rax
- .byte 235,2 // jmp 27a9 <_sk_gather_i8_hsw+0x11>
+ .byte 235,2 // jmp 2a79 <_sk_gather_i8_hsw+0x11>
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 65,87 // push %r15
.byte 65,86 // push %r14
@@ -10410,7 +10582,7 @@ _sk_load_565_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,149,0,0,0 // jne 295b <_sk_load_565_hsw+0xa3>
+ .byte 15,133,149,0,0,0 // jne 2c2b <_sk_load_565_hsw+0xa3>
.byte 196,193,122,111,4,122 // vmovdqu (%r10,%rdi,2),%xmm0
.byte 196,226,125,51,208 // vpmovzxwd %xmm0,%ymm2
.byte 184,0,248,0,0 // mov $0xf800,%eax
@@ -10450,9 +10622,9 @@ _sk_load_565_hsw:
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 15,135,89,255,255,255 // ja 28cc <_sk_load_565_hsw+0x14>
+ .byte 15,135,89,255,255,255 // ja 2b9c <_sk_load_565_hsw+0x14>
.byte 69,15,182,192 // movzbl %r8b,%r8d
- .byte 76,141,13,74,0,0,0 // lea 0x4a(%rip),%r9 # 29c8 <_sk_load_565_hsw+0x110>
+ .byte 76,141,13,74,0,0,0 // lea 0x4a(%rip),%r9 # 2c98 <_sk_load_565_hsw+0x110>
.byte 75,99,4,129 // movslq (%r9,%r8,4),%rax
.byte 76,1,200 // add %r9,%rax
.byte 255,224 // jmpq *%rax
@@ -10464,12 +10636,12 @@ _sk_load_565_hsw:
.byte 196,193,121,196,68,122,4,2 // vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,68,122,2,1 // vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,4,122,0 // vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- .byte 233,5,255,255,255 // jmpq 28cc <_sk_load_565_hsw+0x14>
+ .byte 233,5,255,255,255 // jmpq 2b9c <_sk_load_565_hsw+0x14>
.byte 144 // nop
.byte 243,255 // repz (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 235,255 // jmp 29cd <_sk_load_565_hsw+0x115>
+ .byte 235,255 // jmp 2c9d <_sk_load_565_hsw+0x115>
.byte 255 // (bad)
.byte 255,227 // jmpq *%rbx
.byte 255 // (bad)
@@ -10596,7 +10768,7 @@ _sk_store_565_hsw:
.byte 196,67,125,57,193,1 // vextracti128 $0x1,%ymm8,%xmm9
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 2b93 <_sk_store_565_hsw+0x6c>
+ .byte 117,10 // jne 2e63 <_sk_store_565_hsw+0x6c>
.byte 196,65,122,127,4,121 // vmovdqu %xmm8,(%r9,%rdi,2)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -10604,9 +10776,9 @@ _sk_store_565_hsw:
.byte 65,128,224,7 // and $0x7,%r8b
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 2b8f <_sk_store_565_hsw+0x68>
+ .byte 119,236 // ja 2e5f <_sk_store_565_hsw+0x68>
.byte 65,15,182,192 // movzbl %r8b,%eax
- .byte 76,141,5,66,0,0,0 // lea 0x42(%rip),%r8 # 2bf0 <_sk_store_565_hsw+0xc9>
+ .byte 76,141,5,66,0,0,0 // lea 0x42(%rip),%r8 # 2ec0 <_sk_store_565_hsw+0xc9>
.byte 73,99,4,128 // movslq (%r8,%rax,4),%rax
.byte 76,1,192 // add %r8,%rax
.byte 255,224 // jmpq *%rax
@@ -10617,7 +10789,7 @@ _sk_store_565_hsw:
.byte 196,67,121,21,68,121,4,2 // vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
.byte 196,67,121,21,68,121,2,1 // vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
.byte 196,67,121,21,4,121,0 // vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- .byte 235,159 // jmp 2b8f <_sk_store_565_hsw+0x68>
+ .byte 235,159 // jmp 2e5f <_sk_store_565_hsw+0x68>
.byte 247,255 // idiv %edi
.byte 255 // (bad)
.byte 255 // (bad)
@@ -10648,7 +10820,7 @@ _sk_load_4444_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,179,0,0,0 // jne 2ccd <_sk_load_4444_hsw+0xc1>
+ .byte 15,133,179,0,0,0 // jne 2f9d <_sk_load_4444_hsw+0xc1>
.byte 196,193,122,111,4,122 // vmovdqu (%r10,%rdi,2),%xmm0
.byte 196,98,125,51,200 // vpmovzxwd %xmm0,%ymm9
.byte 184,0,240,0,0 // mov $0xf000,%eax
@@ -10694,9 +10866,9 @@ _sk_load_4444_hsw:
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 15,135,59,255,255,255 // ja 2c20 <_sk_load_4444_hsw+0x14>
+ .byte 15,135,59,255,255,255 // ja 2ef0 <_sk_load_4444_hsw+0x14>
.byte 69,15,182,192 // movzbl %r8b,%r8d
- .byte 76,141,13,76,0,0,0 // lea 0x4c(%rip),%r9 # 2d3c <_sk_load_4444_hsw+0x130>
+ .byte 76,141,13,76,0,0,0 // lea 0x4c(%rip),%r9 # 300c <_sk_load_4444_hsw+0x130>
.byte 75,99,4,129 // movslq (%r9,%r8,4),%rax
.byte 76,1,200 // add %r9,%rax
.byte 255,224 // jmpq *%rax
@@ -10708,13 +10880,13 @@ _sk_load_4444_hsw:
.byte 196,193,121,196,68,122,4,2 // vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,68,122,2,1 // vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,4,122,0 // vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- .byte 233,231,254,255,255 // jmpq 2c20 <_sk_load_4444_hsw+0x14>
+ .byte 233,231,254,255,255 // jmpq 2ef0 <_sk_load_4444_hsw+0x14>
.byte 15,31,0 // nopl (%rax)
.byte 241 // icebp
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 233,255,255,255,225 // jmpq ffffffffe2002d44 <_sk_callback_hsw+0xffffffffe1ffeace>
+ .byte 233,255,255,255,225 // jmpq ffffffffe2003014 <_sk_callback_hsw+0xffffffffe1ffeace>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
@@ -10846,7 +11018,7 @@ _sk_store_4444_hsw:
.byte 196,67,125,57,193,1 // vextracti128 $0x1,%ymm8,%xmm9
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 2f2b <_sk_store_4444_hsw+0x72>
+ .byte 117,10 // jne 31fb <_sk_store_4444_hsw+0x72>
.byte 196,65,122,127,4,121 // vmovdqu %xmm8,(%r9,%rdi,2)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -10854,9 +11026,9 @@ _sk_store_4444_hsw:
.byte 65,128,224,7 // and $0x7,%r8b
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 2f27 <_sk_store_4444_hsw+0x6e>
+ .byte 119,236 // ja 31f7 <_sk_store_4444_hsw+0x6e>
.byte 65,15,182,192 // movzbl %r8b,%eax
- .byte 76,141,5,66,0,0,0 // lea 0x42(%rip),%r8 # 2f88 <_sk_store_4444_hsw+0xcf>
+ .byte 76,141,5,66,0,0,0 // lea 0x42(%rip),%r8 # 3258 <_sk_store_4444_hsw+0xcf>
.byte 73,99,4,128 // movslq (%r8,%rax,4),%rax
.byte 76,1,192 // add %r8,%rax
.byte 255,224 // jmpq *%rax
@@ -10867,7 +11039,7 @@ _sk_store_4444_hsw:
.byte 196,67,121,21,68,121,4,2 // vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
.byte 196,67,121,21,68,121,2,1 // vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
.byte 196,67,121,21,4,121,0 // vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- .byte 235,159 // jmp 2f27 <_sk_store_4444_hsw+0x6e>
+ .byte 235,159 // jmp 31f7 <_sk_store_4444_hsw+0x6e>
.byte 247,255 // idiv %edi
.byte 255 // (bad)
.byte 255 // (bad)
@@ -10900,7 +11072,7 @@ _sk_load_8888_hsw:
.byte 76,141,12,189,0,0,0,0 // lea 0x0(,%rdi,4),%r9
.byte 76,3,8 // add (%rax),%r9
.byte 77,133,192 // test %r8,%r8
- .byte 117,104 // jne 3021 <_sk_load_8888_hsw+0x7d>
+ .byte 117,104 // jne 32f1 <_sk_load_8888_hsw+0x7d>
.byte 196,193,126,111,25 // vmovdqu (%r9),%ymm3
.byte 184,255,0,0,0 // mov $0xff,%eax
.byte 197,249,110,192 // vmovd %eax,%xmm0
@@ -10933,7 +11105,7 @@ _sk_load_8888_hsw:
.byte 196,225,249,110,192 // vmovq %rax,%xmm0
.byte 196,226,125,33,192 // vpmovsxbd %xmm0,%ymm0
.byte 196,194,125,140,25 // vpmaskmovd (%r9),%ymm0,%ymm3
- .byte 233,116,255,255,255 // jmpq 2fbe <_sk_load_8888_hsw+0x1a>
+ .byte 233,116,255,255,255 // jmpq 328e <_sk_load_8888_hsw+0x1a>
HIDDEN _sk_gather_8888_hsw
.globl _sk_gather_8888_hsw
@@ -10997,7 +11169,7 @@ _sk_store_8888_hsw:
.byte 196,65,45,235,192 // vpor %ymm8,%ymm10,%ymm8
.byte 196,65,53,235,192 // vpor %ymm8,%ymm9,%ymm8
.byte 77,133,192 // test %r8,%r8
- .byte 117,12 // jne 3144 <_sk_store_8888_hsw+0x74>
+ .byte 117,12 // jne 3414 <_sk_store_8888_hsw+0x74>
.byte 196,65,126,127,1 // vmovdqu %ymm8,(%r9)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,137,193 // mov %r8,%rcx
@@ -11010,7 +11182,7 @@ _sk_store_8888_hsw:
.byte 196,97,249,110,200 // vmovq %rax,%xmm9
.byte 196,66,125,33,201 // vpmovsxbd %xmm9,%ymm9
.byte 196,66,53,142,1 // vpmaskmovd %ymm8,%ymm9,(%r9)
- .byte 235,211 // jmp 313d <_sk_store_8888_hsw+0x6d>
+ .byte 235,211 // jmp 340d <_sk_store_8888_hsw+0x6d>
HIDDEN _sk_load_f16_hsw
.globl _sk_load_f16_hsw
@@ -11019,7 +11191,7 @@ _sk_load_f16_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 72,139,0 // mov (%rax),%rax
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,97 // jne 31d5 <_sk_load_f16_hsw+0x6b>
+ .byte 117,97 // jne 34a5 <_sk_load_f16_hsw+0x6b>
.byte 197,121,16,4,248 // vmovupd (%rax,%rdi,8),%xmm8
.byte 197,249,16,84,248,16 // vmovupd 0x10(%rax,%rdi,8),%xmm2
.byte 197,249,16,92,248,32 // vmovupd 0x20(%rax,%rdi,8),%xmm3
@@ -11045,29 +11217,29 @@ _sk_load_f16_hsw:
.byte 197,123,16,4,248 // vmovsd (%rax,%rdi,8),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,79 // je 3234 <_sk_load_f16_hsw+0xca>
+ .byte 116,79 // je 3504 <_sk_load_f16_hsw+0xca>
.byte 197,57,22,68,248,8 // vmovhpd 0x8(%rax,%rdi,8),%xmm8,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,67 // jb 3234 <_sk_load_f16_hsw+0xca>
+ .byte 114,67 // jb 3504 <_sk_load_f16_hsw+0xca>
.byte 197,251,16,84,248,16 // vmovsd 0x10(%rax,%rdi,8),%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 116,68 // je 3241 <_sk_load_f16_hsw+0xd7>
+ .byte 116,68 // je 3511 <_sk_load_f16_hsw+0xd7>
.byte 197,233,22,84,248,24 // vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,56 // jb 3241 <_sk_load_f16_hsw+0xd7>
+ .byte 114,56 // jb 3511 <_sk_load_f16_hsw+0xd7>
.byte 197,251,16,92,248,32 // vmovsd 0x20(%rax,%rdi,8),%xmm3
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 15,132,114,255,255,255 // je 318b <_sk_load_f16_hsw+0x21>
+ .byte 15,132,114,255,255,255 // je 345b <_sk_load_f16_hsw+0x21>
.byte 197,225,22,92,248,40 // vmovhpd 0x28(%rax,%rdi,8),%xmm3,%xmm3
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 15,130,98,255,255,255 // jb 318b <_sk_load_f16_hsw+0x21>
+ .byte 15,130,98,255,255,255 // jb 345b <_sk_load_f16_hsw+0x21>
.byte 197,122,126,76,248,48 // vmovq 0x30(%rax,%rdi,8),%xmm9
- .byte 233,87,255,255,255 // jmpq 318b <_sk_load_f16_hsw+0x21>
+ .byte 233,87,255,255,255 // jmpq 345b <_sk_load_f16_hsw+0x21>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
.byte 197,233,87,210 // vxorpd %xmm2,%xmm2,%xmm2
- .byte 233,74,255,255,255 // jmpq 318b <_sk_load_f16_hsw+0x21>
+ .byte 233,74,255,255,255 // jmpq 345b <_sk_load_f16_hsw+0x21>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
- .byte 233,65,255,255,255 // jmpq 318b <_sk_load_f16_hsw+0x21>
+ .byte 233,65,255,255,255 // jmpq 345b <_sk_load_f16_hsw+0x21>
HIDDEN _sk_gather_f16_hsw
.globl _sk_gather_f16_hsw
@@ -11125,7 +11297,7 @@ _sk_store_f16_hsw:
.byte 196,65,57,98,205 // vpunpckldq %xmm13,%xmm8,%xmm9
.byte 196,65,57,106,197 // vpunpckhdq %xmm13,%xmm8,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,27 // jne 3339 <_sk_store_f16_hsw+0x65>
+ .byte 117,27 // jne 3609 <_sk_store_f16_hsw+0x65>
.byte 197,120,17,28,248 // vmovups %xmm11,(%rax,%rdi,8)
.byte 197,120,17,84,248,16 // vmovups %xmm10,0x10(%rax,%rdi,8)
.byte 197,120,17,76,248,32 // vmovups %xmm9,0x20(%rax,%rdi,8)
@@ -11134,22 +11306,22 @@ _sk_store_f16_hsw:
.byte 255,224 // jmpq *%rax
.byte 197,121,214,28,248 // vmovq %xmm11,(%rax,%rdi,8)
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,241 // je 3335 <_sk_store_f16_hsw+0x61>
+ .byte 116,241 // je 3605 <_sk_store_f16_hsw+0x61>
.byte 197,121,23,92,248,8 // vmovhpd %xmm11,0x8(%rax,%rdi,8)
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,229 // jb 3335 <_sk_store_f16_hsw+0x61>
+ .byte 114,229 // jb 3605 <_sk_store_f16_hsw+0x61>
.byte 197,121,214,84,248,16 // vmovq %xmm10,0x10(%rax,%rdi,8)
- .byte 116,221 // je 3335 <_sk_store_f16_hsw+0x61>
+ .byte 116,221 // je 3605 <_sk_store_f16_hsw+0x61>
.byte 197,121,23,84,248,24 // vmovhpd %xmm10,0x18(%rax,%rdi,8)
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,209 // jb 3335 <_sk_store_f16_hsw+0x61>
+ .byte 114,209 // jb 3605 <_sk_store_f16_hsw+0x61>
.byte 197,121,214,76,248,32 // vmovq %xmm9,0x20(%rax,%rdi,8)
- .byte 116,201 // je 3335 <_sk_store_f16_hsw+0x61>
+ .byte 116,201 // je 3605 <_sk_store_f16_hsw+0x61>
.byte 197,121,23,76,248,40 // vmovhpd %xmm9,0x28(%rax,%rdi,8)
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,189 // jb 3335 <_sk_store_f16_hsw+0x61>
+ .byte 114,189 // jb 3605 <_sk_store_f16_hsw+0x61>
.byte 197,121,214,68,248,48 // vmovq %xmm8,0x30(%rax,%rdi,8)
- .byte 235,181 // jmp 3335 <_sk_store_f16_hsw+0x61>
+ .byte 235,181 // jmp 3605 <_sk_store_f16_hsw+0x61>
HIDDEN _sk_load_u16_be_hsw
.globl _sk_load_u16_be_hsw
@@ -11159,7 +11331,7 @@ _sk_load_u16_be_hsw:
.byte 76,139,0 // mov (%rax),%r8
.byte 72,141,4,189,0,0,0,0 // lea 0x0(,%rdi,4),%rax
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,205,0,0,0 // jne 3463 <_sk_load_u16_be_hsw+0xe3>
+ .byte 15,133,205,0,0,0 // jne 3733 <_sk_load_u16_be_hsw+0xe3>
.byte 196,65,121,16,4,64 // vmovupd (%r8,%rax,2),%xmm8
.byte 196,193,121,16,84,64,16 // vmovupd 0x10(%r8,%rax,2),%xmm2
.byte 196,193,121,16,92,64,32 // vmovupd 0x20(%r8,%rax,2),%xmm3
@@ -11208,29 +11380,29 @@ _sk_load_u16_be_hsw:
.byte 196,65,123,16,4,64 // vmovsd (%r8,%rax,2),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,85 // je 34c9 <_sk_load_u16_be_hsw+0x149>
+ .byte 116,85 // je 3799 <_sk_load_u16_be_hsw+0x149>
.byte 196,65,57,22,68,64,8 // vmovhpd 0x8(%r8,%rax,2),%xmm8,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,72 // jb 34c9 <_sk_load_u16_be_hsw+0x149>
+ .byte 114,72 // jb 3799 <_sk_load_u16_be_hsw+0x149>
.byte 196,193,123,16,84,64,16 // vmovsd 0x10(%r8,%rax,2),%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 116,72 // je 34d6 <_sk_load_u16_be_hsw+0x156>
+ .byte 116,72 // je 37a6 <_sk_load_u16_be_hsw+0x156>
.byte 196,193,105,22,84,64,24 // vmovhpd 0x18(%r8,%rax,2),%xmm2,%xmm2
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,59 // jb 34d6 <_sk_load_u16_be_hsw+0x156>
+ .byte 114,59 // jb 37a6 <_sk_load_u16_be_hsw+0x156>
.byte 196,193,123,16,92,64,32 // vmovsd 0x20(%r8,%rax,2),%xmm3
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 15,132,5,255,255,255 // je 33b1 <_sk_load_u16_be_hsw+0x31>
+ .byte 15,132,5,255,255,255 // je 3681 <_sk_load_u16_be_hsw+0x31>
.byte 196,193,97,22,92,64,40 // vmovhpd 0x28(%r8,%rax,2),%xmm3,%xmm3
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 15,130,244,254,255,255 // jb 33b1 <_sk_load_u16_be_hsw+0x31>
+ .byte 15,130,244,254,255,255 // jb 3681 <_sk_load_u16_be_hsw+0x31>
.byte 196,65,122,126,76,64,48 // vmovq 0x30(%r8,%rax,2),%xmm9
- .byte 233,232,254,255,255 // jmpq 33b1 <_sk_load_u16_be_hsw+0x31>
+ .byte 233,232,254,255,255 // jmpq 3681 <_sk_load_u16_be_hsw+0x31>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
.byte 197,233,87,210 // vxorpd %xmm2,%xmm2,%xmm2
- .byte 233,219,254,255,255 // jmpq 33b1 <_sk_load_u16_be_hsw+0x31>
+ .byte 233,219,254,255,255 // jmpq 3681 <_sk_load_u16_be_hsw+0x31>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
- .byte 233,210,254,255,255 // jmpq 33b1 <_sk_load_u16_be_hsw+0x31>
+ .byte 233,210,254,255,255 // jmpq 3681 <_sk_load_u16_be_hsw+0x31>
HIDDEN _sk_load_rgb_u16_be_hsw
.globl _sk_load_rgb_u16_be_hsw
@@ -11240,7 +11412,7 @@ _sk_load_rgb_u16_be_hsw:
.byte 76,139,0 // mov (%rax),%r8
.byte 72,141,4,127 // lea (%rdi,%rdi,2),%rax
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,211,0,0,0 // jne 35c4 <_sk_load_rgb_u16_be_hsw+0xe5>
+ .byte 15,133,211,0,0,0 // jne 3894 <_sk_load_rgb_u16_be_hsw+0xe5>
.byte 196,193,122,111,4,64 // vmovdqu (%r8,%rax,2),%xmm0
.byte 196,193,122,111,84,64,12 // vmovdqu 0xc(%r8,%rax,2),%xmm2
.byte 196,193,122,111,76,64,24 // vmovdqu 0x18(%r8,%rax,2),%xmm1
@@ -11290,36 +11462,36 @@ _sk_load_rgb_u16_be_hsw:
.byte 196,193,121,110,4,64 // vmovd (%r8,%rax,2),%xmm0
.byte 196,193,121,196,68,64,4,2 // vpinsrw $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 117,5 // jne 35dd <_sk_load_rgb_u16_be_hsw+0xfe>
- .byte 233,72,255,255,255 // jmpq 3525 <_sk_load_rgb_u16_be_hsw+0x46>
+ .byte 117,5 // jne 38ad <_sk_load_rgb_u16_be_hsw+0xfe>
+ .byte 233,72,255,255,255 // jmpq 37f5 <_sk_load_rgb_u16_be_hsw+0x46>
.byte 196,193,121,110,76,64,6 // vmovd 0x6(%r8,%rax,2),%xmm1
.byte 196,65,113,196,68,64,10,2 // vpinsrw $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,26 // jb 360c <_sk_load_rgb_u16_be_hsw+0x12d>
+ .byte 114,26 // jb 38dc <_sk_load_rgb_u16_be_hsw+0x12d>
.byte 196,193,121,110,76,64,12 // vmovd 0xc(%r8,%rax,2),%xmm1
.byte 196,193,113,196,84,64,16,2 // vpinsrw $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 117,10 // jne 3611 <_sk_load_rgb_u16_be_hsw+0x132>
- .byte 233,25,255,255,255 // jmpq 3525 <_sk_load_rgb_u16_be_hsw+0x46>
- .byte 233,20,255,255,255 // jmpq 3525 <_sk_load_rgb_u16_be_hsw+0x46>
+ .byte 117,10 // jne 38e1 <_sk_load_rgb_u16_be_hsw+0x132>
+ .byte 233,25,255,255,255 // jmpq 37f5 <_sk_load_rgb_u16_be_hsw+0x46>
+ .byte 233,20,255,255,255 // jmpq 37f5 <_sk_load_rgb_u16_be_hsw+0x46>
.byte 196,193,121,110,76,64,18 // vmovd 0x12(%r8,%rax,2),%xmm1
.byte 196,65,113,196,76,64,22,2 // vpinsrw $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,26 // jb 3640 <_sk_load_rgb_u16_be_hsw+0x161>
+ .byte 114,26 // jb 3910 <_sk_load_rgb_u16_be_hsw+0x161>
.byte 196,193,121,110,76,64,24 // vmovd 0x18(%r8,%rax,2),%xmm1
.byte 196,193,113,196,76,64,28,2 // vpinsrw $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 117,10 // jne 3645 <_sk_load_rgb_u16_be_hsw+0x166>
- .byte 233,229,254,255,255 // jmpq 3525 <_sk_load_rgb_u16_be_hsw+0x46>
- .byte 233,224,254,255,255 // jmpq 3525 <_sk_load_rgb_u16_be_hsw+0x46>
+ .byte 117,10 // jne 3915 <_sk_load_rgb_u16_be_hsw+0x166>
+ .byte 233,229,254,255,255 // jmpq 37f5 <_sk_load_rgb_u16_be_hsw+0x46>
+ .byte 233,224,254,255,255 // jmpq 37f5 <_sk_load_rgb_u16_be_hsw+0x46>
.byte 196,193,121,110,92,64,30 // vmovd 0x1e(%r8,%rax,2),%xmm3
.byte 196,65,97,196,92,64,34,2 // vpinsrw $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,20 // jb 366e <_sk_load_rgb_u16_be_hsw+0x18f>
+ .byte 114,20 // jb 393e <_sk_load_rgb_u16_be_hsw+0x18f>
.byte 196,193,121,110,92,64,36 // vmovd 0x24(%r8,%rax,2),%xmm3
.byte 196,193,97,196,92,64,40,2 // vpinsrw $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3
- .byte 233,183,254,255,255 // jmpq 3525 <_sk_load_rgb_u16_be_hsw+0x46>
- .byte 233,178,254,255,255 // jmpq 3525 <_sk_load_rgb_u16_be_hsw+0x46>
+ .byte 233,183,254,255,255 // jmpq 37f5 <_sk_load_rgb_u16_be_hsw+0x46>
+ .byte 233,178,254,255,255 // jmpq 37f5 <_sk_load_rgb_u16_be_hsw+0x46>
HIDDEN _sk_store_u16_be_hsw
.globl _sk_store_u16_be_hsw
@@ -11368,7 +11540,7 @@ _sk_store_u16_be_hsw:
.byte 196,65,17,98,200 // vpunpckldq %xmm8,%xmm13,%xmm9
.byte 196,65,17,106,192 // vpunpckhdq %xmm8,%xmm13,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,31 // jne 376e <_sk_store_u16_be_hsw+0xfb>
+ .byte 117,31 // jne 3a3e <_sk_store_u16_be_hsw+0xfb>
.byte 196,1,120,17,28,72 // vmovups %xmm11,(%r8,%r9,2)
.byte 196,1,120,17,84,72,16 // vmovups %xmm10,0x10(%r8,%r9,2)
.byte 196,1,120,17,76,72,32 // vmovups %xmm9,0x20(%r8,%r9,2)
@@ -11377,22 +11549,22 @@ _sk_store_u16_be_hsw:
.byte 255,224 // jmpq *%rax
.byte 196,1,121,214,28,72 // vmovq %xmm11,(%r8,%r9,2)
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,240 // je 376a <_sk_store_u16_be_hsw+0xf7>
+ .byte 116,240 // je 3a3a <_sk_store_u16_be_hsw+0xf7>
.byte 196,1,121,23,92,72,8 // vmovhpd %xmm11,0x8(%r8,%r9,2)
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,227 // jb 376a <_sk_store_u16_be_hsw+0xf7>
+ .byte 114,227 // jb 3a3a <_sk_store_u16_be_hsw+0xf7>
.byte 196,1,121,214,84,72,16 // vmovq %xmm10,0x10(%r8,%r9,2)
- .byte 116,218 // je 376a <_sk_store_u16_be_hsw+0xf7>
+ .byte 116,218 // je 3a3a <_sk_store_u16_be_hsw+0xf7>
.byte 196,1,121,23,84,72,24 // vmovhpd %xmm10,0x18(%r8,%r9,2)
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,205 // jb 376a <_sk_store_u16_be_hsw+0xf7>
+ .byte 114,205 // jb 3a3a <_sk_store_u16_be_hsw+0xf7>
.byte 196,1,121,214,76,72,32 // vmovq %xmm9,0x20(%r8,%r9,2)
- .byte 116,196 // je 376a <_sk_store_u16_be_hsw+0xf7>
+ .byte 116,196 // je 3a3a <_sk_store_u16_be_hsw+0xf7>
.byte 196,1,121,23,76,72,40 // vmovhpd %xmm9,0x28(%r8,%r9,2)
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,183 // jb 376a <_sk_store_u16_be_hsw+0xf7>
+ .byte 114,183 // jb 3a3a <_sk_store_u16_be_hsw+0xf7>
.byte 196,1,121,214,68,72,48 // vmovq %xmm8,0x30(%r8,%r9,2)
- .byte 235,174 // jmp 376a <_sk_store_u16_be_hsw+0xf7>
+ .byte 235,174 // jmp 3a3a <_sk_store_u16_be_hsw+0xf7>
HIDDEN _sk_load_f32_hsw
.globl _sk_load_f32_hsw
@@ -11400,10 +11572,10 @@ FUNCTION(_sk_load_f32_hsw)
_sk_load_f32_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 119,110 // ja 3832 <_sk_load_f32_hsw+0x76>
+ .byte 119,110 // ja 3b02 <_sk_load_f32_hsw+0x76>
.byte 76,139,0 // mov (%rax),%r8
.byte 76,141,12,189,0,0,0,0 // lea 0x0(,%rdi,4),%r9
- .byte 76,141,21,134,0,0,0 // lea 0x86(%rip),%r10 # 385c <_sk_load_f32_hsw+0xa0>
+ .byte 76,141,21,134,0,0,0 // lea 0x86(%rip),%r10 # 3b2c <_sk_load_f32_hsw+0xa0>
.byte 73,99,4,138 // movslq (%r10,%rcx,4),%rax
.byte 76,1,208 // add %r10,%rax
.byte 255,224 // jmpq *%rax
@@ -11462,7 +11634,7 @@ _sk_store_f32_hsw:
.byte 196,65,37,20,196 // vunpcklpd %ymm12,%ymm11,%ymm8
.byte 196,65,37,21,220 // vunpckhpd %ymm12,%ymm11,%ymm11
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,55 // jne 38e9 <_sk_store_f32_hsw+0x6d>
+ .byte 117,55 // jne 3bb9 <_sk_store_f32_hsw+0x6d>
.byte 196,67,45,24,225,1 // vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
.byte 196,67,61,24,235,1 // vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
.byte 196,67,45,6,201,49 // vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
@@ -11475,22 +11647,22 @@ _sk_store_f32_hsw:
.byte 255,224 // jmpq *%rax
.byte 196,65,121,17,20,128 // vmovupd %xmm10,(%r8,%rax,4)
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,240 // je 38e5 <_sk_store_f32_hsw+0x69>
+ .byte 116,240 // je 3bb5 <_sk_store_f32_hsw+0x69>
.byte 196,65,121,17,76,128,16 // vmovupd %xmm9,0x10(%r8,%rax,4)
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,227 // jb 38e5 <_sk_store_f32_hsw+0x69>
+ .byte 114,227 // jb 3bb5 <_sk_store_f32_hsw+0x69>
.byte 196,65,121,17,68,128,32 // vmovupd %xmm8,0x20(%r8,%rax,4)
- .byte 116,218 // je 38e5 <_sk_store_f32_hsw+0x69>
+ .byte 116,218 // je 3bb5 <_sk_store_f32_hsw+0x69>
.byte 196,65,121,17,92,128,48 // vmovupd %xmm11,0x30(%r8,%rax,4)
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,205 // jb 38e5 <_sk_store_f32_hsw+0x69>
+ .byte 114,205 // jb 3bb5 <_sk_store_f32_hsw+0x69>
.byte 196,67,125,25,84,128,64,1 // vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4)
- .byte 116,195 // je 38e5 <_sk_store_f32_hsw+0x69>
+ .byte 116,195 // je 3bb5 <_sk_store_f32_hsw+0x69>
.byte 196,67,125,25,76,128,80,1 // vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4)
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,181 // jb 38e5 <_sk_store_f32_hsw+0x69>
+ .byte 114,181 // jb 3bb5 <_sk_store_f32_hsw+0x69>
.byte 196,67,125,25,68,128,96,1 // vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4)
- .byte 235,171 // jmp 38e5 <_sk_store_f32_hsw+0x69>
+ .byte 235,171 // jmp 3bb5 <_sk_store_f32_hsw+0x69>
HIDDEN _sk_clamp_x_hsw
.globl _sk_clamp_x_hsw
@@ -11755,7 +11927,7 @@ _sk_linear_gradient_hsw:
.byte 196,98,125,24,72,28 // vbroadcastss 0x1c(%rax),%ymm9
.byte 76,139,0 // mov (%rax),%r8
.byte 77,133,192 // test %r8,%r8
- .byte 15,132,143,0,0,0 // je 3d75 <_sk_linear_gradient_hsw+0xb5>
+ .byte 15,132,143,0,0,0 // je 4045 <_sk_linear_gradient_hsw+0xb5>
.byte 72,139,64,8 // mov 0x8(%rax),%rax
.byte 72,131,192,32 // add $0x20,%rax
.byte 196,65,28,87,228 // vxorps %ymm12,%ymm12,%ymm12
@@ -11782,8 +11954,8 @@ _sk_linear_gradient_hsw:
.byte 196,67,13,74,201,208 // vblendvps %ymm13,%ymm9,%ymm14,%ymm9
.byte 72,131,192,36 // add $0x24,%rax
.byte 73,255,200 // dec %r8
- .byte 117,140 // jne 3cff <_sk_linear_gradient_hsw+0x3f>
- .byte 235,17 // jmp 3d86 <_sk_linear_gradient_hsw+0xc6>
+ .byte 117,140 // jne 3fcf <_sk_linear_gradient_hsw+0x3f>
+ .byte 235,17 // jmp 4056 <_sk_linear_gradient_hsw+0xc6>
.byte 197,244,87,201 // vxorps %ymm1,%ymm1,%ymm1
.byte 197,236,87,210 // vxorps %ymm2,%ymm2,%ymm2
.byte 197,228,87,219 // vxorps %ymm3,%ymm3,%ymm3
@@ -13298,85 +13470,274 @@ HIDDEN _sk_from_2dot2_avx
.globl _sk_from_2dot2_avx
FUNCTION(_sk_from_2dot2_avx)
_sk_from_2dot2_avx:
- .byte 197,124,82,192 // vrsqrtps %ymm0,%ymm8
- .byte 196,65,124,82,192 // vrsqrtps %ymm8,%ymm8
- .byte 196,65,124,82,192 // vrsqrtps %ymm8,%ymm8
- .byte 196,65,124,82,192 // vrsqrtps %ymm8,%ymm8
- .byte 196,65,124,82,200 // vrsqrtps %ymm8,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 197,252,89,192 // vmulps %ymm0,%ymm0,%ymm0
- .byte 196,65,60,89,208 // vmulps %ymm8,%ymm8,%ymm10
- .byte 196,65,60,89,194 // vmulps %ymm10,%ymm8,%ymm8
- .byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
- .byte 197,180,89,192 // vmulps %ymm0,%ymm9,%ymm0
- .byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
- .byte 196,193,124,95,192 // vmaxps %ymm8,%ymm0,%ymm0
- .byte 197,124,82,201 // vrsqrtps %ymm1,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 196,65,124,82,209 // vrsqrtps %ymm9,%ymm10
- .byte 196,65,124,82,210 // vrsqrtps %ymm10,%ymm10
- .byte 197,244,89,201 // vmulps %ymm1,%ymm1,%ymm1
- .byte 196,65,52,89,217 // vmulps %ymm9,%ymm9,%ymm11
- .byte 196,65,52,89,203 // vmulps %ymm11,%ymm9,%ymm9
- .byte 196,193,116,89,201 // vmulps %ymm9,%ymm1,%ymm1
- .byte 197,172,89,201 // vmulps %ymm1,%ymm10,%ymm1
- .byte 196,193,116,95,200 // vmaxps %ymm8,%ymm1,%ymm1
- .byte 197,124,82,202 // vrsqrtps %ymm2,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 196,65,124,82,209 // vrsqrtps %ymm9,%ymm10
- .byte 196,65,124,82,210 // vrsqrtps %ymm10,%ymm10
- .byte 197,236,89,210 // vmulps %ymm2,%ymm2,%ymm2
- .byte 196,65,52,89,217 // vmulps %ymm9,%ymm9,%ymm11
- .byte 196,65,52,89,203 // vmulps %ymm11,%ymm9,%ymm9
- .byte 196,193,108,89,209 // vmulps %ymm9,%ymm2,%ymm2
- .byte 197,172,89,210 // vmulps %ymm2,%ymm10,%ymm2
- .byte 196,193,108,95,208 // vmaxps %ymm8,%ymm2,%ymm2
- .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 72,131,236,88 // sub $0x58,%rsp
+ .byte 197,252,17,124,36,32 // vmovups %ymm7,0x20(%rsp)
+ .byte 197,252,17,52,36 // vmovups %ymm6,(%rsp)
+ .byte 197,252,17,108,36,224 // vmovups %ymm5,-0x20(%rsp)
+ .byte 197,252,17,100,36,192 // vmovups %ymm4,-0x40(%rsp)
+ .byte 197,252,17,92,36,160 // vmovups %ymm3,-0x60(%rsp)
+ .byte 197,252,17,84,36,128 // vmovups %ymm2,-0x80(%rsp)
+ .byte 197,252,40,241 // vmovaps %ymm1,%ymm6
+ .byte 65,184,205,204,12,64 // mov $0x400ccccd,%r8d
+ .byte 197,252,91,200 // vcvtdq2ps %ymm0,%ymm1
+ .byte 184,0,0,0,52 // mov $0x34000000,%eax
+ .byte 197,249,110,208 // vmovd %eax,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,99,109,24,194,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm8
+ .byte 196,193,116,89,200 // vmulps %ymm8,%ymm1,%ymm1
+ .byte 184,255,255,127,0 // mov $0x7fffff,%eax
+ .byte 197,249,110,208 // vmovd %eax,%xmm2
+ .byte 197,249,112,210,0 // vpshufd $0x0,%xmm2,%xmm2
+ .byte 196,99,109,24,202,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm9
+ .byte 197,180,84,192 // vandps %ymm0,%ymm9,%ymm0
+ .byte 184,0,0,0,63 // mov $0x3f000000,%eax
+ .byte 197,249,110,208 // vmovd %eax,%xmm2
+ .byte 197,249,112,210,0 // vpshufd $0x0,%xmm2,%xmm2
+ .byte 196,227,109,24,234,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm5
+ .byte 197,252,86,197 // vorps %ymm5,%ymm0,%ymm0
+ .byte 184,119,115,248,66 // mov $0x42f87377,%eax
+ .byte 197,249,110,208 // vmovd %eax,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,99,109,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm10
+ .byte 196,193,116,92,202 // vsubps %ymm10,%ymm1,%ymm1
+ .byte 184,117,191,191,63 // mov $0x3fbfbf75,%eax
+ .byte 197,249,110,208 // vmovd %eax,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,99,109,24,218,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm11
+ .byte 196,193,124,89,211 // vmulps %ymm11,%ymm0,%ymm2
+ .byte 197,244,92,202 // vsubps %ymm2,%ymm1,%ymm1
+ .byte 184,163,233,220,63 // mov $0x3fdce9a3,%eax
+ .byte 197,249,110,208 // vmovd %eax,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,99,109,24,226,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm12
+ .byte 184,249,68,180,62 // mov $0x3eb444f9,%eax
+ .byte 197,249,110,208 // vmovd %eax,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,99,109,24,234,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm13
+ .byte 196,193,124,88,197 // vaddps %ymm13,%ymm0,%ymm0
+ .byte 197,156,94,192 // vdivps %ymm0,%ymm12,%ymm0
+ .byte 197,244,92,192 // vsubps %ymm0,%ymm1,%ymm0
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,99,117,24,241,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm14
+ .byte 197,140,89,192 // vmulps %ymm0,%ymm14,%ymm0
+ .byte 196,227,125,8,200,1 // vroundps $0x1,%ymm0,%ymm1
+ .byte 197,252,92,225 // vsubps %ymm1,%ymm0,%ymm4
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 184,81,140,242,66 // mov $0x42f28c51,%eax
+ .byte 197,249,110,200 // vmovd %eax,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,99,117,24,249,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm15
+ .byte 197,132,88,192 // vaddps %ymm0,%ymm15,%ymm0
+ .byte 184,141,188,190,63 // mov $0x3fbebc8d,%eax
+ .byte 197,249,110,200 // vmovd %eax,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,217,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm3
+ .byte 197,228,89,204 // vmulps %ymm4,%ymm3,%ymm1
+ .byte 197,252,92,209 // vsubps %ymm1,%ymm0,%ymm2
+ .byte 184,254,210,221,65 // mov $0x41ddd2fe,%eax
+ .byte 197,249,110,192 // vmovd %eax,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,200,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm1
+ .byte 184,248,245,154,64 // mov $0x409af5f8,%eax
+ .byte 197,249,110,192 // vmovd %eax,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 197,252,92,228 // vsubps %ymm4,%ymm0,%ymm4
+ .byte 197,244,94,228 // vdivps %ymm4,%ymm1,%ymm4
+ .byte 197,236,88,228 // vaddps %ymm4,%ymm2,%ymm4
+ .byte 197,252,91,214 // vcvtdq2ps %ymm6,%ymm2
+ .byte 196,193,108,89,208 // vmulps %ymm8,%ymm2,%ymm2
+ .byte 197,180,84,246 // vandps %ymm6,%ymm9,%ymm6
+ .byte 197,204,86,245 // vorps %ymm5,%ymm6,%ymm6
+ .byte 196,193,108,92,210 // vsubps %ymm10,%ymm2,%ymm2
+ .byte 196,193,76,89,251 // vmulps %ymm11,%ymm6,%ymm7
+ .byte 197,236,92,215 // vsubps %ymm7,%ymm2,%ymm2
+ .byte 196,193,76,88,245 // vaddps %ymm13,%ymm6,%ymm6
+ .byte 197,156,94,246 // vdivps %ymm6,%ymm12,%ymm6
+ .byte 197,236,92,214 // vsubps %ymm6,%ymm2,%ymm2
+ .byte 197,140,89,210 // vmulps %ymm2,%ymm14,%ymm2
+ .byte 196,227,125,8,242,1 // vroundps $0x1,%ymm2,%ymm6
+ .byte 197,236,92,246 // vsubps %ymm6,%ymm2,%ymm6
+ .byte 197,132,88,210 // vaddps %ymm2,%ymm15,%ymm2
+ .byte 197,228,89,254 // vmulps %ymm6,%ymm3,%ymm7
+ .byte 197,236,92,215 // vsubps %ymm7,%ymm2,%ymm2
+ .byte 197,252,92,246 // vsubps %ymm6,%ymm0,%ymm6
+ .byte 197,244,94,246 // vdivps %ymm6,%ymm1,%ymm6
+ .byte 197,236,88,214 // vaddps %ymm6,%ymm2,%ymm2
+ .byte 197,252,16,124,36,128 // vmovups -0x80(%rsp),%ymm7
+ .byte 197,252,91,247 // vcvtdq2ps %ymm7,%ymm6
+ .byte 196,193,76,89,240 // vmulps %ymm8,%ymm6,%ymm6
+ .byte 197,180,84,255 // vandps %ymm7,%ymm9,%ymm7
+ .byte 197,196,86,237 // vorps %ymm5,%ymm7,%ymm5
+ .byte 196,193,76,92,242 // vsubps %ymm10,%ymm6,%ymm6
+ .byte 196,193,84,89,251 // vmulps %ymm11,%ymm5,%ymm7
+ .byte 197,204,92,247 // vsubps %ymm7,%ymm6,%ymm6
+ .byte 196,193,84,88,237 // vaddps %ymm13,%ymm5,%ymm5
+ .byte 197,156,94,237 // vdivps %ymm5,%ymm12,%ymm5
+ .byte 197,204,92,237 // vsubps %ymm5,%ymm6,%ymm5
+ .byte 197,140,89,237 // vmulps %ymm5,%ymm14,%ymm5
+ .byte 196,227,125,8,245,1 // vroundps $0x1,%ymm5,%ymm6
+ .byte 197,212,92,246 // vsubps %ymm6,%ymm5,%ymm6
+ .byte 197,132,88,237 // vaddps %ymm5,%ymm15,%ymm5
+ .byte 197,228,89,222 // vmulps %ymm6,%ymm3,%ymm3
+ .byte 197,212,92,219 // vsubps %ymm3,%ymm5,%ymm3
+ .byte 197,252,92,198 // vsubps %ymm6,%ymm0,%ymm0
+ .byte 197,244,94,192 // vdivps %ymm0,%ymm1,%ymm0
+ .byte 197,228,88,192 // vaddps %ymm0,%ymm3,%ymm0
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ .byte 197,244,89,220 // vmulps %ymm4,%ymm1,%ymm3
+ .byte 197,244,89,210 // vmulps %ymm2,%ymm1,%ymm2
+ .byte 197,244,89,224 // vmulps %ymm0,%ymm1,%ymm4
+ .byte 197,253,91,195 // vcvtps2dq %ymm3,%ymm0
+ .byte 197,253,91,202 // vcvtps2dq %ymm2,%ymm1
+ .byte 197,253,91,212 // vcvtps2dq %ymm4,%ymm2
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 197,252,16,92,36,160 // vmovups -0x60(%rsp),%ymm3
+ .byte 197,252,16,100,36,192 // vmovups -0x40(%rsp),%ymm4
+ .byte 197,252,16,108,36,224 // vmovups -0x20(%rsp),%ymm5
+ .byte 197,252,16,52,36 // vmovups (%rsp),%ymm6
+ .byte 197,252,16,124,36,32 // vmovups 0x20(%rsp),%ymm7
+ .byte 72,131,196,88 // add $0x58,%rsp
.byte 255,224 // jmpq *%rax
HIDDEN _sk_to_2dot2_avx
.globl _sk_to_2dot2_avx
FUNCTION(_sk_to_2dot2_avx)
_sk_to_2dot2_avx:
- .byte 197,252,82,192 // vrsqrtps %ymm0,%ymm0
- .byte 197,124,82,192 // vrsqrtps %ymm0,%ymm8
- .byte 196,65,124,82,192 // vrsqrtps %ymm8,%ymm8
- .byte 196,65,124,82,192 // vrsqrtps %ymm8,%ymm8
- .byte 196,65,124,82,192 // vrsqrtps %ymm8,%ymm8
- .byte 196,65,124,82,200 // vrsqrtps %ymm8,%ymm9
- .byte 197,252,83,192 // vrcpps %ymm0,%ymm0
- .byte 197,188,89,192 // vmulps %ymm0,%ymm8,%ymm0
- .byte 196,65,124,83,193 // vrcpps %ymm9,%ymm8
- .byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
- .byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
- .byte 196,193,124,95,192 // vmaxps %ymm8,%ymm0,%ymm0
- .byte 197,252,82,201 // vrsqrtps %ymm1,%ymm1
- .byte 197,124,82,201 // vrsqrtps %ymm1,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 196,65,124,82,209 // vrsqrtps %ymm9,%ymm10
- .byte 197,252,83,201 // vrcpps %ymm1,%ymm1
- .byte 197,180,89,201 // vmulps %ymm1,%ymm9,%ymm1
- .byte 196,65,124,83,202 // vrcpps %ymm10,%ymm9
- .byte 196,193,116,89,201 // vmulps %ymm9,%ymm1,%ymm1
- .byte 196,193,116,95,200 // vmaxps %ymm8,%ymm1,%ymm1
- .byte 197,252,82,210 // vrsqrtps %ymm2,%ymm2
- .byte 197,124,82,202 // vrsqrtps %ymm2,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 196,65,124,82,201 // vrsqrtps %ymm9,%ymm9
- .byte 196,65,124,82,209 // vrsqrtps %ymm9,%ymm10
- .byte 197,252,83,210 // vrcpps %ymm2,%ymm2
- .byte 197,180,89,210 // vmulps %ymm2,%ymm9,%ymm2
- .byte 196,65,124,83,202 // vrcpps %ymm10,%ymm9
- .byte 196,193,108,89,209 // vmulps %ymm9,%ymm2,%ymm2
- .byte 196,193,108,95,208 // vmaxps %ymm8,%ymm2,%ymm2
- .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 72,131,236,88 // sub $0x58,%rsp
+ .byte 197,252,17,124,36,32 // vmovups %ymm7,0x20(%rsp)
+ .byte 197,252,17,52,36 // vmovups %ymm6,(%rsp)
+ .byte 197,252,17,108,36,224 // vmovups %ymm5,-0x20(%rsp)
+ .byte 197,252,17,100,36,192 // vmovups %ymm4,-0x40(%rsp)
+ .byte 197,252,17,92,36,160 // vmovups %ymm3,-0x60(%rsp)
+ .byte 197,252,17,84,36,128 // vmovups %ymm2,-0x80(%rsp)
+ .byte 197,252,40,241 // vmovaps %ymm1,%ymm6
+ .byte 65,184,46,186,232,62 // mov $0x3ee8ba2e,%r8d
+ .byte 197,252,91,200 // vcvtdq2ps %ymm0,%ymm1
+ .byte 184,0,0,0,52 // mov $0x34000000,%eax
+ .byte 197,249,110,208 // vmovd %eax,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,99,109,24,194,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm8
+ .byte 196,193,116,89,200 // vmulps %ymm8,%ymm1,%ymm1
+ .byte 184,255,255,127,0 // mov $0x7fffff,%eax
+ .byte 197,249,110,208 // vmovd %eax,%xmm2
+ .byte 197,249,112,210,0 // vpshufd $0x0,%xmm2,%xmm2
+ .byte 196,99,109,24,202,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm9
+ .byte 197,180,84,192 // vandps %ymm0,%ymm9,%ymm0
+ .byte 184,0,0,0,63 // mov $0x3f000000,%eax
+ .byte 197,249,110,208 // vmovd %eax,%xmm2
+ .byte 197,249,112,210,0 // vpshufd $0x0,%xmm2,%xmm2
+ .byte 196,227,109,24,234,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm5
+ .byte 197,252,86,197 // vorps %ymm5,%ymm0,%ymm0
+ .byte 184,119,115,248,66 // mov $0x42f87377,%eax
+ .byte 197,249,110,208 // vmovd %eax,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,99,109,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm10
+ .byte 196,193,116,92,202 // vsubps %ymm10,%ymm1,%ymm1
+ .byte 184,117,191,191,63 // mov $0x3fbfbf75,%eax
+ .byte 197,249,110,208 // vmovd %eax,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,99,109,24,218,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm11
+ .byte 196,193,124,89,211 // vmulps %ymm11,%ymm0,%ymm2
+ .byte 197,244,92,202 // vsubps %ymm2,%ymm1,%ymm1
+ .byte 184,163,233,220,63 // mov $0x3fdce9a3,%eax
+ .byte 197,249,110,208 // vmovd %eax,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,99,109,24,226,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm12
+ .byte 184,249,68,180,62 // mov $0x3eb444f9,%eax
+ .byte 197,249,110,208 // vmovd %eax,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,99,109,24,234,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm13
+ .byte 196,193,124,88,197 // vaddps %ymm13,%ymm0,%ymm0
+ .byte 197,156,94,192 // vdivps %ymm0,%ymm12,%ymm0
+ .byte 197,244,92,192 // vsubps %ymm0,%ymm1,%ymm0
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,99,117,24,241,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm14
+ .byte 197,140,89,192 // vmulps %ymm0,%ymm14,%ymm0
+ .byte 196,227,125,8,200,1 // vroundps $0x1,%ymm0,%ymm1
+ .byte 197,252,92,225 // vsubps %ymm1,%ymm0,%ymm4
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 184,81,140,242,66 // mov $0x42f28c51,%eax
+ .byte 197,249,110,200 // vmovd %eax,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,99,117,24,249,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm15
+ .byte 197,132,88,192 // vaddps %ymm0,%ymm15,%ymm0
+ .byte 184,141,188,190,63 // mov $0x3fbebc8d,%eax
+ .byte 197,249,110,200 // vmovd %eax,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,217,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm3
+ .byte 197,228,89,204 // vmulps %ymm4,%ymm3,%ymm1
+ .byte 197,252,92,209 // vsubps %ymm1,%ymm0,%ymm2
+ .byte 184,254,210,221,65 // mov $0x41ddd2fe,%eax
+ .byte 197,249,110,192 // vmovd %eax,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,200,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm1
+ .byte 184,248,245,154,64 // mov $0x409af5f8,%eax
+ .byte 197,249,110,192 // vmovd %eax,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 197,252,92,228 // vsubps %ymm4,%ymm0,%ymm4
+ .byte 197,244,94,228 // vdivps %ymm4,%ymm1,%ymm4
+ .byte 197,236,88,228 // vaddps %ymm4,%ymm2,%ymm4
+ .byte 197,252,91,214 // vcvtdq2ps %ymm6,%ymm2
+ .byte 196,193,108,89,208 // vmulps %ymm8,%ymm2,%ymm2
+ .byte 197,180,84,246 // vandps %ymm6,%ymm9,%ymm6
+ .byte 197,204,86,245 // vorps %ymm5,%ymm6,%ymm6
+ .byte 196,193,108,92,210 // vsubps %ymm10,%ymm2,%ymm2
+ .byte 196,193,76,89,251 // vmulps %ymm11,%ymm6,%ymm7
+ .byte 197,236,92,215 // vsubps %ymm7,%ymm2,%ymm2
+ .byte 196,193,76,88,245 // vaddps %ymm13,%ymm6,%ymm6
+ .byte 197,156,94,246 // vdivps %ymm6,%ymm12,%ymm6
+ .byte 197,236,92,214 // vsubps %ymm6,%ymm2,%ymm2
+ .byte 197,140,89,210 // vmulps %ymm2,%ymm14,%ymm2
+ .byte 196,227,125,8,242,1 // vroundps $0x1,%ymm2,%ymm6
+ .byte 197,236,92,246 // vsubps %ymm6,%ymm2,%ymm6
+ .byte 197,132,88,210 // vaddps %ymm2,%ymm15,%ymm2
+ .byte 197,228,89,254 // vmulps %ymm6,%ymm3,%ymm7
+ .byte 197,236,92,215 // vsubps %ymm7,%ymm2,%ymm2
+ .byte 197,252,92,246 // vsubps %ymm6,%ymm0,%ymm6
+ .byte 197,244,94,246 // vdivps %ymm6,%ymm1,%ymm6
+ .byte 197,236,88,214 // vaddps %ymm6,%ymm2,%ymm2
+ .byte 197,252,16,124,36,128 // vmovups -0x80(%rsp),%ymm7
+ .byte 197,252,91,247 // vcvtdq2ps %ymm7,%ymm6
+ .byte 196,193,76,89,240 // vmulps %ymm8,%ymm6,%ymm6
+ .byte 197,180,84,255 // vandps %ymm7,%ymm9,%ymm7
+ .byte 197,196,86,237 // vorps %ymm5,%ymm7,%ymm5
+ .byte 196,193,76,92,242 // vsubps %ymm10,%ymm6,%ymm6
+ .byte 196,193,84,89,251 // vmulps %ymm11,%ymm5,%ymm7
+ .byte 197,204,92,247 // vsubps %ymm7,%ymm6,%ymm6
+ .byte 196,193,84,88,237 // vaddps %ymm13,%ymm5,%ymm5
+ .byte 197,156,94,237 // vdivps %ymm5,%ymm12,%ymm5
+ .byte 197,204,92,237 // vsubps %ymm5,%ymm6,%ymm5
+ .byte 197,140,89,237 // vmulps %ymm5,%ymm14,%ymm5
+ .byte 196,227,125,8,245,1 // vroundps $0x1,%ymm5,%ymm6
+ .byte 197,212,92,246 // vsubps %ymm6,%ymm5,%ymm6
+ .byte 197,132,88,237 // vaddps %ymm5,%ymm15,%ymm5
+ .byte 197,228,89,222 // vmulps %ymm6,%ymm3,%ymm3
+ .byte 197,212,92,219 // vsubps %ymm3,%ymm5,%ymm3
+ .byte 197,252,92,198 // vsubps %ymm6,%ymm0,%ymm0
+ .byte 197,244,94,192 // vdivps %ymm0,%ymm1,%ymm0
+ .byte 197,228,88,192 // vaddps %ymm0,%ymm3,%ymm0
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ .byte 197,244,89,220 // vmulps %ymm4,%ymm1,%ymm3
+ .byte 197,244,89,210 // vmulps %ymm2,%ymm1,%ymm2
+ .byte 197,244,89,224 // vmulps %ymm0,%ymm1,%ymm4
+ .byte 197,253,91,195 // vcvtps2dq %ymm3,%ymm0
+ .byte 197,253,91,202 // vcvtps2dq %ymm2,%ymm1
+ .byte 197,253,91,212 // vcvtps2dq %ymm4,%ymm2
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 197,252,16,92,36,160 // vmovups -0x60(%rsp),%ymm3
+ .byte 197,252,16,100,36,192 // vmovups -0x40(%rsp),%ymm4
+ .byte 197,252,16,108,36,224 // vmovups -0x20(%rsp),%ymm5
+ .byte 197,252,16,52,36 // vmovups (%rsp),%ymm6
+ .byte 197,252,16,124,36,32 // vmovups 0x20(%rsp),%ymm7
+ .byte 72,131,196,88 // add $0x58,%rsp
.byte 255,224 // jmpq *%rax
HIDDEN _sk_rgb_to_hsl_avx
@@ -13588,7 +13949,7 @@ _sk_scale_u8_avx:
.byte 72,139,0 // mov (%rax),%rax
.byte 72,1,248 // add %rdi,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 117,80 // jne 1456 <_sk_scale_u8_avx+0x60>
+ .byte 117,80 // jne 17ee <_sk_scale_u8_avx+0x60>
.byte 197,122,126,0 // vmovq (%rax),%xmm8
.byte 196,66,121,49,200 // vpmovzxbd %xmm8,%xmm9
.byte 196,67,121,4,192,229 // vpermilps $0xe5,%xmm8,%xmm8
@@ -13616,9 +13977,9 @@ _sk_scale_u8_avx:
.byte 77,9,217 // or %r11,%r9
.byte 72,131,193,8 // add $0x8,%rcx
.byte 73,255,202 // dec %r10
- .byte 117,234 // jne 145e <_sk_scale_u8_avx+0x68>
+ .byte 117,234 // jne 17f6 <_sk_scale_u8_avx+0x68>
.byte 196,65,249,110,193 // vmovq %r9,%xmm8
- .byte 235,143 // jmp 140a <_sk_scale_u8_avx+0x14>
+ .byte 235,143 // jmp 17a2 <_sk_scale_u8_avx+0x14>
HIDDEN _sk_lerp_1_float_avx
.globl _sk_lerp_1_float_avx
@@ -13650,7 +14011,7 @@ _sk_lerp_u8_avx:
.byte 72,139,0 // mov (%rax),%rax
.byte 72,1,248 // add %rdi,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 117,116 // jne 153e <_sk_lerp_u8_avx+0x84>
+ .byte 117,116 // jne 18d6 <_sk_lerp_u8_avx+0x84>
.byte 197,122,126,0 // vmovq (%rax),%xmm8
.byte 196,66,121,49,200 // vpmovzxbd %xmm8,%xmm9
.byte 196,67,121,4,192,229 // vpermilps $0xe5,%xmm8,%xmm8
@@ -13686,9 +14047,9 @@ _sk_lerp_u8_avx:
.byte 77,9,217 // or %r11,%r9
.byte 72,131,193,8 // add $0x8,%rcx
.byte 73,255,202 // dec %r10
- .byte 117,234 // jne 1546 <_sk_lerp_u8_avx+0x8c>
+ .byte 117,234 // jne 18de <_sk_lerp_u8_avx+0x8c>
.byte 196,65,249,110,193 // vmovq %r9,%xmm8
- .byte 233,104,255,255,255 // jmpq 14ce <_sk_lerp_u8_avx+0x14>
+ .byte 233,104,255,255,255 // jmpq 1866 <_sk_lerp_u8_avx+0x14>
HIDDEN _sk_lerp_565_avx
.globl _sk_lerp_565_avx
@@ -13697,7 +14058,7 @@ _sk_lerp_565_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,250,0,0,0 // jne 166e <_sk_lerp_565_avx+0x108>
+ .byte 15,133,250,0,0,0 // jne 1a06 <_sk_lerp_565_avx+0x108>
.byte 196,65,122,111,4,122 // vmovdqu (%r10,%rdi,2),%xmm8
.byte 197,225,239,219 // vpxor %xmm3,%xmm3,%xmm3
.byte 197,185,105,219 // vpunpckhwd %xmm3,%xmm8,%xmm3
@@ -13756,9 +14117,9 @@ _sk_lerp_565_avx:
.byte 196,65,57,239,192 // vpxor %xmm8,%xmm8,%xmm8
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 15,135,243,254,255,255 // ja 157a <_sk_lerp_565_avx+0x14>
+ .byte 15,135,243,254,255,255 // ja 1912 <_sk_lerp_565_avx+0x14>
.byte 69,15,182,192 // movzbl %r8b,%r8d
- .byte 76,141,13,74,0,0,0 // lea 0x4a(%rip),%r9 # 16dc <_sk_lerp_565_avx+0x176>
+ .byte 76,141,13,74,0,0,0 // lea 0x4a(%rip),%r9 # 1a74 <_sk_lerp_565_avx+0x176>
.byte 75,99,4,129 // movslq (%r9,%r8,4),%rax
.byte 76,1,200 // add %r9,%rax
.byte 255,224 // jmpq *%rax
@@ -13770,12 +14131,12 @@ _sk_lerp_565_avx:
.byte 196,65,57,196,68,122,4,2 // vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm8,%xmm8
.byte 196,65,57,196,68,122,2,1 // vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm8,%xmm8
.byte 196,65,57,196,4,122,0 // vpinsrw $0x0,(%r10,%rdi,2),%xmm8,%xmm8
- .byte 233,159,254,255,255 // jmpq 157a <_sk_lerp_565_avx+0x14>
+ .byte 233,159,254,255,255 // jmpq 1912 <_sk_lerp_565_avx+0x14>
.byte 144 // nop
.byte 243,255 // repz (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 235,255 // jmp 16e1 <_sk_lerp_565_avx+0x17b>
+ .byte 235,255 // jmp 1a79 <_sk_lerp_565_avx+0x17b>
.byte 255 // (bad)
.byte 255,227 // jmpq *%rbx
.byte 255 // (bad)
@@ -13808,7 +14169,7 @@ _sk_load_tables_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,0 // mov (%rax),%r8
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,56,2,0,0 // jne 1948 <_sk_load_tables_avx+0x250>
+ .byte 15,133,56,2,0,0 // jne 1ce0 <_sk_load_tables_avx+0x250>
.byte 196,65,124,16,4,184 // vmovups (%r8,%rdi,4),%ymm8
.byte 187,255,0,0,0 // mov $0xff,%ebx
.byte 197,249,110,195 // vmovd %ebx,%xmm0
@@ -13927,9 +14288,9 @@ _sk_load_tables_avx:
.byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
.byte 254,203 // dec %bl
.byte 128,251,6 // cmp $0x6,%bl
- .byte 15,135,185,253,255,255 // ja 1716 <_sk_load_tables_avx+0x1e>
+ .byte 15,135,185,253,255,255 // ja 1aae <_sk_load_tables_avx+0x1e>
.byte 15,182,219 // movzbl %bl,%ebx
- .byte 76,141,13,137,0,0,0 // lea 0x89(%rip),%r9 # 19f0 <_sk_load_tables_avx+0x2f8>
+ .byte 76,141,13,137,0,0,0 // lea 0x89(%rip),%r9 # 1d88 <_sk_load_tables_avx+0x2f8>
.byte 73,99,28,153 // movslq (%r9,%rbx,4),%rbx
.byte 76,1,203 // add %r9,%rbx
.byte 255,227 // jmpq *%rbx
@@ -13952,7 +14313,7 @@ _sk_load_tables_avx:
.byte 196,99,61,12,192,15 // vblendps $0xf,%ymm0,%ymm8,%ymm8
.byte 196,195,57,34,4,184,0 // vpinsrd $0x0,(%r8,%rdi,4),%xmm8,%xmm0
.byte 196,99,61,12,192,15 // vblendps $0xf,%ymm0,%ymm8,%ymm8
- .byte 233,38,253,255,255 // jmpq 1716 <_sk_load_tables_avx+0x1e>
+ .byte 233,38,253,255,255 // jmpq 1aae <_sk_load_tables_avx+0x1e>
.byte 238 // out %al,(%dx)
.byte 255 // (bad)
.byte 255 // (bad)
@@ -13980,7 +14341,7 @@ _sk_load_tables_u16_be_avx:
.byte 76,139,0 // mov (%rax),%r8
.byte 76,141,12,189,0,0,0,0 // lea 0x0(,%rdi,4),%r9
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,165,2,0,0 // jne 1cc7 <_sk_load_tables_u16_be_avx+0x2bb>
+ .byte 15,133,165,2,0,0 // jne 205f <_sk_load_tables_u16_be_avx+0x2bb>
.byte 196,1,121,16,4,72 // vmovupd (%r8,%r9,2),%xmm8
.byte 196,129,121,16,84,72,16 // vmovupd 0x10(%r8,%r9,2),%xmm2
.byte 196,129,121,16,92,72,32 // vmovupd 0x20(%r8,%r9,2),%xmm3
@@ -14124,29 +14485,29 @@ _sk_load_tables_u16_be_avx:
.byte 196,1,123,16,4,72 // vmovsd (%r8,%r9,2),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,85 // je 1d2d <_sk_load_tables_u16_be_avx+0x321>
+ .byte 116,85 // je 20c5 <_sk_load_tables_u16_be_avx+0x321>
.byte 196,1,57,22,68,72,8 // vmovhpd 0x8(%r8,%r9,2),%xmm8,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,72 // jb 1d2d <_sk_load_tables_u16_be_avx+0x321>
+ .byte 114,72 // jb 20c5 <_sk_load_tables_u16_be_avx+0x321>
.byte 196,129,123,16,84,72,16 // vmovsd 0x10(%r8,%r9,2),%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 116,72 // je 1d3a <_sk_load_tables_u16_be_avx+0x32e>
+ .byte 116,72 // je 20d2 <_sk_load_tables_u16_be_avx+0x32e>
.byte 196,129,105,22,84,72,24 // vmovhpd 0x18(%r8,%r9,2),%xmm2,%xmm2
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,59 // jb 1d3a <_sk_load_tables_u16_be_avx+0x32e>
+ .byte 114,59 // jb 20d2 <_sk_load_tables_u16_be_avx+0x32e>
.byte 196,129,123,16,92,72,32 // vmovsd 0x20(%r8,%r9,2),%xmm3
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 15,132,45,253,255,255 // je 1a3d <_sk_load_tables_u16_be_avx+0x31>
+ .byte 15,132,45,253,255,255 // je 1dd5 <_sk_load_tables_u16_be_avx+0x31>
.byte 196,129,97,22,92,72,40 // vmovhpd 0x28(%r8,%r9,2),%xmm3,%xmm3
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 15,130,28,253,255,255 // jb 1a3d <_sk_load_tables_u16_be_avx+0x31>
+ .byte 15,130,28,253,255,255 // jb 1dd5 <_sk_load_tables_u16_be_avx+0x31>
.byte 196,1,122,126,76,72,48 // vmovq 0x30(%r8,%r9,2),%xmm9
- .byte 233,16,253,255,255 // jmpq 1a3d <_sk_load_tables_u16_be_avx+0x31>
+ .byte 233,16,253,255,255 // jmpq 1dd5 <_sk_load_tables_u16_be_avx+0x31>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
.byte 197,233,87,210 // vxorpd %xmm2,%xmm2,%xmm2
- .byte 233,3,253,255,255 // jmpq 1a3d <_sk_load_tables_u16_be_avx+0x31>
+ .byte 233,3,253,255,255 // jmpq 1dd5 <_sk_load_tables_u16_be_avx+0x31>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
- .byte 233,250,252,255,255 // jmpq 1a3d <_sk_load_tables_u16_be_avx+0x31>
+ .byte 233,250,252,255,255 // jmpq 1dd5 <_sk_load_tables_u16_be_avx+0x31>
HIDDEN _sk_load_tables_rgb_u16_be_avx
.globl _sk_load_tables_rgb_u16_be_avx
@@ -14156,7 +14517,7 @@ _sk_load_tables_rgb_u16_be_avx:
.byte 76,139,0 // mov (%rax),%r8
.byte 76,141,12,127 // lea (%rdi,%rdi,2),%r9
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,146,2,0,0 // jne 1fe7 <_sk_load_tables_rgb_u16_be_avx+0x2a4>
+ .byte 15,133,146,2,0,0 // jne 237f <_sk_load_tables_rgb_u16_be_avx+0x2a4>
.byte 196,129,122,111,4,72 // vmovdqu (%r8,%r9,2),%xmm0
.byte 196,129,122,111,84,72,12 // vmovdqu 0xc(%r8,%r9,2),%xmm2
.byte 196,129,122,111,76,72,24 // vmovdqu 0x18(%r8,%r9,2),%xmm1
@@ -14296,36 +14657,36 @@ _sk_load_tables_rgb_u16_be_avx:
.byte 196,129,121,110,4,72 // vmovd (%r8,%r9,2),%xmm0
.byte 196,129,121,196,68,72,4,2 // vpinsrw $0x2,0x4(%r8,%r9,2),%xmm0,%xmm0
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 117,5 // jne 2000 <_sk_load_tables_rgb_u16_be_avx+0x2bd>
- .byte 233,137,253,255,255 // jmpq 1d89 <_sk_load_tables_rgb_u16_be_avx+0x46>
+ .byte 117,5 // jne 2398 <_sk_load_tables_rgb_u16_be_avx+0x2bd>
+ .byte 233,137,253,255,255 // jmpq 2121 <_sk_load_tables_rgb_u16_be_avx+0x46>
.byte 196,129,121,110,76,72,6 // vmovd 0x6(%r8,%r9,2),%xmm1
.byte 196,1,113,196,68,72,10,2 // vpinsrw $0x2,0xa(%r8,%r9,2),%xmm1,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,26 // jb 202f <_sk_load_tables_rgb_u16_be_avx+0x2ec>
+ .byte 114,26 // jb 23c7 <_sk_load_tables_rgb_u16_be_avx+0x2ec>
.byte 196,129,121,110,76,72,12 // vmovd 0xc(%r8,%r9,2),%xmm1
.byte 196,129,113,196,84,72,16,2 // vpinsrw $0x2,0x10(%r8,%r9,2),%xmm1,%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 117,10 // jne 2034 <_sk_load_tables_rgb_u16_be_avx+0x2f1>
- .byte 233,90,253,255,255 // jmpq 1d89 <_sk_load_tables_rgb_u16_be_avx+0x46>
- .byte 233,85,253,255,255 // jmpq 1d89 <_sk_load_tables_rgb_u16_be_avx+0x46>
+ .byte 117,10 // jne 23cc <_sk_load_tables_rgb_u16_be_avx+0x2f1>
+ .byte 233,90,253,255,255 // jmpq 2121 <_sk_load_tables_rgb_u16_be_avx+0x46>
+ .byte 233,85,253,255,255 // jmpq 2121 <_sk_load_tables_rgb_u16_be_avx+0x46>
.byte 196,129,121,110,76,72,18 // vmovd 0x12(%r8,%r9,2),%xmm1
.byte 196,1,113,196,76,72,22,2 // vpinsrw $0x2,0x16(%r8,%r9,2),%xmm1,%xmm9
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,26 // jb 2063 <_sk_load_tables_rgb_u16_be_avx+0x320>
+ .byte 114,26 // jb 23fb <_sk_load_tables_rgb_u16_be_avx+0x320>
.byte 196,129,121,110,76,72,24 // vmovd 0x18(%r8,%r9,2),%xmm1
.byte 196,129,113,196,76,72,28,2 // vpinsrw $0x2,0x1c(%r8,%r9,2),%xmm1,%xmm1
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 117,10 // jne 2068 <_sk_load_tables_rgb_u16_be_avx+0x325>
- .byte 233,38,253,255,255 // jmpq 1d89 <_sk_load_tables_rgb_u16_be_avx+0x46>
- .byte 233,33,253,255,255 // jmpq 1d89 <_sk_load_tables_rgb_u16_be_avx+0x46>
+ .byte 117,10 // jne 2400 <_sk_load_tables_rgb_u16_be_avx+0x325>
+ .byte 233,38,253,255,255 // jmpq 2121 <_sk_load_tables_rgb_u16_be_avx+0x46>
+ .byte 233,33,253,255,255 // jmpq 2121 <_sk_load_tables_rgb_u16_be_avx+0x46>
.byte 196,129,121,110,92,72,30 // vmovd 0x1e(%r8,%r9,2),%xmm3
.byte 196,1,97,196,92,72,34,2 // vpinsrw $0x2,0x22(%r8,%r9,2),%xmm3,%xmm11
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,20 // jb 2091 <_sk_load_tables_rgb_u16_be_avx+0x34e>
+ .byte 114,20 // jb 2429 <_sk_load_tables_rgb_u16_be_avx+0x34e>
.byte 196,129,121,110,92,72,36 // vmovd 0x24(%r8,%r9,2),%xmm3
.byte 196,129,97,196,92,72,40,2 // vpinsrw $0x2,0x28(%r8,%r9,2),%xmm3,%xmm3
- .byte 233,248,252,255,255 // jmpq 1d89 <_sk_load_tables_rgb_u16_be_avx+0x46>
- .byte 233,243,252,255,255 // jmpq 1d89 <_sk_load_tables_rgb_u16_be_avx+0x46>
+ .byte 233,248,252,255,255 // jmpq 2121 <_sk_load_tables_rgb_u16_be_avx+0x46>
+ .byte 233,243,252,255,255 // jmpq 2121 <_sk_load_tables_rgb_u16_be_avx+0x46>
HIDDEN _sk_byte_tables_avx
.globl _sk_byte_tables_avx
@@ -15313,7 +15674,7 @@ _sk_load_a8_avx:
.byte 72,139,0 // mov (%rax),%rax
.byte 72,1,248 // add %rdi,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 117,74 // jne 3228 <_sk_load_a8_avx+0x5a>
+ .byte 117,74 // jne 35c0 <_sk_load_a8_avx+0x5a>
.byte 197,250,126,0 // vmovq (%rax),%xmm0
.byte 196,226,121,49,200 // vpmovzxbd %xmm0,%xmm1
.byte 196,227,121,4,192,229 // vpermilps $0xe5,%xmm0,%xmm0
@@ -15340,9 +15701,9 @@ _sk_load_a8_avx:
.byte 77,9,217 // or %r11,%r9
.byte 72,131,193,8 // add $0x8,%rcx
.byte 73,255,202 // dec %r10
- .byte 117,234 // jne 3230 <_sk_load_a8_avx+0x62>
+ .byte 117,234 // jne 35c8 <_sk_load_a8_avx+0x62>
.byte 196,193,249,110,193 // vmovq %r9,%xmm0
- .byte 235,149 // jmp 31e2 <_sk_load_a8_avx+0x14>
+ .byte 235,149 // jmp 357a <_sk_load_a8_avx+0x14>
HIDDEN _sk_gather_a8_avx
.globl _sk_gather_a8_avx
@@ -15423,7 +15784,7 @@ _sk_store_a8_avx:
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 196,65,57,103,192 // vpackuswb %xmm8,%xmm8,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 3389 <_sk_store_a8_avx+0x42>
+ .byte 117,10 // jne 3721 <_sk_store_a8_avx+0x42>
.byte 196,65,123,17,4,57 // vmovsd %xmm8,(%r9,%rdi,1)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -15431,10 +15792,10 @@ _sk_store_a8_avx:
.byte 65,128,224,7 // and $0x7,%r8b
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 3385 <_sk_store_a8_avx+0x3e>
+ .byte 119,236 // ja 371d <_sk_store_a8_avx+0x3e>
.byte 196,66,121,48,192 // vpmovzxbw %xmm8,%xmm8
.byte 65,15,182,192 // movzbl %r8b,%eax
- .byte 76,141,5,67,0,0,0 // lea 0x43(%rip),%r8 # 33ec <_sk_store_a8_avx+0xa5>
+ .byte 76,141,5,67,0,0,0 // lea 0x43(%rip),%r8 # 3784 <_sk_store_a8_avx+0xa5>
.byte 73,99,4,128 // movslq (%r8,%rax,4),%rax
.byte 76,1,192 // add %r8,%rax
.byte 255,224 // jmpq *%rax
@@ -15445,7 +15806,7 @@ _sk_store_a8_avx:
.byte 196,67,121,20,68,57,2,4 // vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1)
.byte 196,67,121,20,68,57,1,2 // vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1)
.byte 196,67,121,20,4,57,0 // vpextrb $0x0,%xmm8,(%r9,%rdi,1)
- .byte 235,154 // jmp 3385 <_sk_store_a8_avx+0x3e>
+ .byte 235,154 // jmp 371d <_sk_store_a8_avx+0x3e>
.byte 144 // nop
.byte 246,255 // idiv %bh
.byte 255 // (bad)
@@ -15479,7 +15840,7 @@ _sk_load_g8_avx:
.byte 72,139,0 // mov (%rax),%rax
.byte 72,1,248 // add %rdi,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 117,91 // jne 3473 <_sk_load_g8_avx+0x6b>
+ .byte 117,91 // jne 380b <_sk_load_g8_avx+0x6b>
.byte 197,250,126,0 // vmovq (%rax),%xmm0
.byte 196,226,121,49,200 // vpmovzxbd %xmm0,%xmm1
.byte 196,227,121,4,192,229 // vpermilps $0xe5,%xmm0,%xmm0
@@ -15509,9 +15870,9 @@ _sk_load_g8_avx:
.byte 77,9,217 // or %r11,%r9
.byte 72,131,193,8 // add $0x8,%rcx
.byte 73,255,202 // dec %r10
- .byte 117,234 // jne 347b <_sk_load_g8_avx+0x73>
+ .byte 117,234 // jne 3813 <_sk_load_g8_avx+0x73>
.byte 196,193,249,110,193 // vmovq %r9,%xmm0
- .byte 235,132 // jmp 341c <_sk_load_g8_avx+0x14>
+ .byte 235,132 // jmp 37b4 <_sk_load_g8_avx+0x14>
HIDDEN _sk_gather_g8_avx
.globl _sk_gather_g8_avx
@@ -15586,9 +15947,9 @@ _sk_gather_i8_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 73,137,192 // mov %rax,%r8
.byte 77,133,192 // test %r8,%r8
- .byte 116,5 // je 35b2 <_sk_gather_i8_avx+0xf>
+ .byte 116,5 // je 394a <_sk_gather_i8_avx+0xf>
.byte 76,137,192 // mov %r8,%rax
- .byte 235,2 // jmp 35b4 <_sk_gather_i8_avx+0x11>
+ .byte 235,2 // jmp 394c <_sk_gather_i8_avx+0x11>
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 65,87 // push %r15
.byte 65,86 // push %r14
@@ -15693,7 +16054,7 @@ _sk_load_565_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,209,0,0,0 // jne 384e <_sk_load_565_avx+0xdf>
+ .byte 15,133,209,0,0,0 // jne 3be6 <_sk_load_565_avx+0xdf>
.byte 196,193,122,111,4,122 // vmovdqu (%r10,%rdi,2),%xmm0
.byte 197,241,239,201 // vpxor %xmm1,%xmm1,%xmm1
.byte 197,249,105,201 // vpunpckhwd %xmm1,%xmm0,%xmm1
@@ -15743,9 +16104,9 @@ _sk_load_565_avx:
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 15,135,29,255,255,255 // ja 3783 <_sk_load_565_avx+0x14>
+ .byte 15,135,29,255,255,255 // ja 3b1b <_sk_load_565_avx+0x14>
.byte 69,15,182,192 // movzbl %r8b,%r8d
- .byte 76,141,13,75,0,0,0 // lea 0x4b(%rip),%r9 # 38bc <_sk_load_565_avx+0x14d>
+ .byte 76,141,13,75,0,0,0 // lea 0x4b(%rip),%r9 # 3c54 <_sk_load_565_avx+0x14d>
.byte 75,99,4,129 // movslq (%r9,%r8,4),%rax
.byte 76,1,200 // add %r9,%rax
.byte 255,224 // jmpq *%rax
@@ -15757,7 +16118,7 @@ _sk_load_565_avx:
.byte 196,193,121,196,68,122,4,2 // vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,68,122,2,1 // vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,4,122,0 // vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- .byte 233,201,254,255,255 // jmpq 3783 <_sk_load_565_avx+0x14>
+ .byte 233,201,254,255,255 // jmpq 3b1b <_sk_load_565_avx+0x14>
.byte 102,144 // xchg %ax,%ax
.byte 242,255 // repnz (bad)
.byte 255 // (bad)
@@ -15914,7 +16275,7 @@ _sk_store_565_avx:
.byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 3b07 <_sk_store_565_avx+0x9e>
+ .byte 117,10 // jne 3e9f <_sk_store_565_avx+0x9e>
.byte 196,65,122,127,4,121 // vmovdqu %xmm8,(%r9,%rdi,2)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -15922,9 +16283,9 @@ _sk_store_565_avx:
.byte 65,128,224,7 // and $0x7,%r8b
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 3b03 <_sk_store_565_avx+0x9a>
+ .byte 119,236 // ja 3e9b <_sk_store_565_avx+0x9a>
.byte 65,15,182,192 // movzbl %r8b,%eax
- .byte 76,141,5,66,0,0,0 // lea 0x42(%rip),%r8 # 3b64 <_sk_store_565_avx+0xfb>
+ .byte 76,141,5,66,0,0,0 // lea 0x42(%rip),%r8 # 3efc <_sk_store_565_avx+0xfb>
.byte 73,99,4,128 // movslq (%r8,%rax,4),%rax
.byte 76,1,192 // add %r8,%rax
.byte 255,224 // jmpq *%rax
@@ -15935,7 +16296,7 @@ _sk_store_565_avx:
.byte 196,67,121,21,68,121,4,2 // vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
.byte 196,67,121,21,68,121,2,1 // vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
.byte 196,67,121,21,4,121,0 // vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- .byte 235,159 // jmp 3b03 <_sk_store_565_avx+0x9a>
+ .byte 235,159 // jmp 3e9b <_sk_store_565_avx+0x9a>
.byte 247,255 // idiv %edi
.byte 255 // (bad)
.byte 255 // (bad)
@@ -15966,7 +16327,7 @@ _sk_load_4444_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,245,0,0,0 // jne 3c83 <_sk_load_4444_avx+0x103>
+ .byte 15,133,245,0,0,0 // jne 401b <_sk_load_4444_avx+0x103>
.byte 196,193,122,111,4,122 // vmovdqu (%r10,%rdi,2),%xmm0
.byte 197,241,239,201 // vpxor %xmm1,%xmm1,%xmm1
.byte 197,249,105,201 // vpunpckhwd %xmm1,%xmm0,%xmm1
@@ -16023,9 +16384,9 @@ _sk_load_4444_avx:
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 15,135,249,254,255,255 // ja 3b94 <_sk_load_4444_avx+0x14>
+ .byte 15,135,249,254,255,255 // ja 3f2c <_sk_load_4444_avx+0x14>
.byte 69,15,182,192 // movzbl %r8b,%r8d
- .byte 76,141,13,74,0,0,0 // lea 0x4a(%rip),%r9 # 3cf0 <_sk_load_4444_avx+0x170>
+ .byte 76,141,13,74,0,0,0 // lea 0x4a(%rip),%r9 # 4088 <_sk_load_4444_avx+0x170>
.byte 75,99,4,129 // movslq (%r9,%r8,4),%rax
.byte 76,1,200 // add %r9,%rax
.byte 255,224 // jmpq *%rax
@@ -16037,12 +16398,12 @@ _sk_load_4444_avx:
.byte 196,193,121,196,68,122,4,2 // vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,68,122,2,1 // vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,4,122,0 // vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- .byte 233,165,254,255,255 // jmpq 3b94 <_sk_load_4444_avx+0x14>
+ .byte 233,165,254,255,255 // jmpq 3f2c <_sk_load_4444_avx+0x14>
.byte 144 // nop
.byte 243,255 // repz (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 235,255 // jmp 3cf5 <_sk_load_4444_avx+0x175>
+ .byte 235,255 // jmp 408d <_sk_load_4444_avx+0x175>
.byte 255 // (bad)
.byte 255,227 // jmpq *%rbx
.byte 255 // (bad)
@@ -16203,7 +16564,7 @@ _sk_store_4444_avx:
.byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 3f70 <_sk_store_4444_avx+0xaf>
+ .byte 117,10 // jne 4308 <_sk_store_4444_avx+0xaf>
.byte 196,65,122,127,4,121 // vmovdqu %xmm8,(%r9,%rdi,2)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -16211,9 +16572,9 @@ _sk_store_4444_avx:
.byte 65,128,224,7 // and $0x7,%r8b
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 3f6c <_sk_store_4444_avx+0xab>
+ .byte 119,236 // ja 4304 <_sk_store_4444_avx+0xab>
.byte 65,15,182,192 // movzbl %r8b,%eax
- .byte 76,141,5,69,0,0,0 // lea 0x45(%rip),%r8 # 3fd0 <_sk_store_4444_avx+0x10f>
+ .byte 76,141,5,69,0,0,0 // lea 0x45(%rip),%r8 # 4368 <_sk_store_4444_avx+0x10f>
.byte 73,99,4,128 // movslq (%r8,%rax,4),%rax
.byte 76,1,192 // add %r8,%rax
.byte 255,224 // jmpq *%rax
@@ -16224,7 +16585,7 @@ _sk_store_4444_avx:
.byte 196,67,121,21,68,121,4,2 // vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
.byte 196,67,121,21,68,121,2,1 // vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
.byte 196,67,121,21,4,121,0 // vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- .byte 235,159 // jmp 3f6c <_sk_store_4444_avx+0xab>
+ .byte 235,159 // jmp 4304 <_sk_store_4444_avx+0xab>
.byte 15,31,0 // nopl (%rax)
.byte 244 // hlt
.byte 255 // (bad)
@@ -16257,7 +16618,7 @@ _sk_load_8888_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,157,0,0,0 // jne 4097 <_sk_load_8888_avx+0xab>
+ .byte 15,133,157,0,0,0 // jne 442f <_sk_load_8888_avx+0xab>
.byte 196,65,124,16,12,186 // vmovups (%r10,%rdi,4),%ymm9
.byte 184,255,0,0,0 // mov $0xff,%eax
.byte 197,249,110,192 // vmovd %eax,%xmm0
@@ -16295,9 +16656,9 @@ _sk_load_8888_avx:
.byte 196,65,52,87,201 // vxorps %ymm9,%ymm9,%ymm9
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 15,135,80,255,255,255 // ja 4000 <_sk_load_8888_avx+0x14>
+ .byte 15,135,80,255,255,255 // ja 4398 <_sk_load_8888_avx+0x14>
.byte 69,15,182,192 // movzbl %r8b,%r8d
- .byte 76,141,13,137,0,0,0 // lea 0x89(%rip),%r9 # 4144 <_sk_load_8888_avx+0x158>
+ .byte 76,141,13,137,0,0,0 // lea 0x89(%rip),%r9 # 44dc <_sk_load_8888_avx+0x158>
.byte 75,99,4,129 // movslq (%r9,%r8,4),%rax
.byte 76,1,200 // add %r9,%rax
.byte 255,224 // jmpq *%rax
@@ -16320,7 +16681,7 @@ _sk_load_8888_avx:
.byte 196,99,53,12,200,15 // vblendps $0xf,%ymm0,%ymm9,%ymm9
.byte 196,195,49,34,4,186,0 // vpinsrd $0x0,(%r10,%rdi,4),%xmm9,%xmm0
.byte 196,99,53,12,200,15 // vblendps $0xf,%ymm0,%ymm9,%ymm9
- .byte 233,188,254,255,255 // jmpq 4000 <_sk_load_8888_avx+0x14>
+ .byte 233,188,254,255,255 // jmpq 4398 <_sk_load_8888_avx+0x14>
.byte 238 // out %al,(%dx)
.byte 255 // (bad)
.byte 255 // (bad)
@@ -16450,7 +16811,7 @@ _sk_store_8888_avx:
.byte 196,65,45,86,192 // vorpd %ymm8,%ymm10,%ymm8
.byte 196,65,53,86,192 // vorpd %ymm8,%ymm9,%ymm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 4345 <_sk_store_8888_avx+0xa4>
+ .byte 117,10 // jne 46dd <_sk_store_8888_avx+0xa4>
.byte 196,65,124,17,4,185 // vmovups %ymm8,(%r9,%rdi,4)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -16458,9 +16819,9 @@ _sk_store_8888_avx:
.byte 65,128,224,7 // and $0x7,%r8b
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 4341 <_sk_store_8888_avx+0xa0>
+ .byte 119,236 // ja 46d9 <_sk_store_8888_avx+0xa0>
.byte 65,15,182,192 // movzbl %r8b,%eax
- .byte 76,141,5,84,0,0,0 // lea 0x54(%rip),%r8 # 43b4 <_sk_store_8888_avx+0x113>
+ .byte 76,141,5,84,0,0,0 // lea 0x54(%rip),%r8 # 474c <_sk_store_8888_avx+0x113>
.byte 73,99,4,128 // movslq (%r8,%rax,4),%rax
.byte 76,1,192 // add %r8,%rax
.byte 255,224 // jmpq *%rax
@@ -16474,7 +16835,7 @@ _sk_store_8888_avx:
.byte 196,67,121,22,68,185,8,2 // vpextrd $0x2,%xmm8,0x8(%r9,%rdi,4)
.byte 196,67,121,22,68,185,4,1 // vpextrd $0x1,%xmm8,0x4(%r9,%rdi,4)
.byte 196,65,121,126,4,185 // vmovd %xmm8,(%r9,%rdi,4)
- .byte 235,143 // jmp 4341 <_sk_store_8888_avx+0xa0>
+ .byte 235,143 // jmp 46d9 <_sk_store_8888_avx+0xa0>
.byte 102,144 // xchg %ax,%ax
.byte 246,255 // idiv %bh
.byte 255 // (bad)
@@ -16509,7 +16870,7 @@ _sk_load_f16_avx:
.byte 197,252,17,124,36,200 // vmovups %ymm7,-0x38(%rsp)
.byte 197,252,17,116,36,168 // vmovups %ymm6,-0x58(%rsp)
.byte 197,252,17,108,36,136 // vmovups %ymm5,-0x78(%rsp)
- .byte 15,133,46,2,0,0 // jne 461e <_sk_load_f16_avx+0x24e>
+ .byte 15,133,46,2,0,0 // jne 49b6 <_sk_load_f16_avx+0x24e>
.byte 197,121,16,4,248 // vmovupd (%rax,%rdi,8),%xmm8
.byte 197,249,16,84,248,16 // vmovupd 0x10(%rax,%rdi,8),%xmm2
.byte 197,249,16,76,248,32 // vmovupd 0x20(%rax,%rdi,8),%xmm1
@@ -16626,29 +16987,29 @@ _sk_load_f16_avx:
.byte 197,123,16,4,248 // vmovsd (%rax,%rdi,8),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,79 // je 467d <_sk_load_f16_avx+0x2ad>
+ .byte 116,79 // je 4a15 <_sk_load_f16_avx+0x2ad>
.byte 197,57,22,68,248,8 // vmovhpd 0x8(%rax,%rdi,8),%xmm8,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,67 // jb 467d <_sk_load_f16_avx+0x2ad>
+ .byte 114,67 // jb 4a15 <_sk_load_f16_avx+0x2ad>
.byte 197,251,16,84,248,16 // vmovsd 0x10(%rax,%rdi,8),%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 116,68 // je 468a <_sk_load_f16_avx+0x2ba>
+ .byte 116,68 // je 4a22 <_sk_load_f16_avx+0x2ba>
.byte 197,233,22,84,248,24 // vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,56 // jb 468a <_sk_load_f16_avx+0x2ba>
+ .byte 114,56 // jb 4a22 <_sk_load_f16_avx+0x2ba>
.byte 197,251,16,76,248,32 // vmovsd 0x20(%rax,%rdi,8),%xmm1
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 15,132,165,253,255,255 // je 4407 <_sk_load_f16_avx+0x37>
+ .byte 15,132,165,253,255,255 // je 479f <_sk_load_f16_avx+0x37>
.byte 197,241,22,76,248,40 // vmovhpd 0x28(%rax,%rdi,8),%xmm1,%xmm1
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 15,130,149,253,255,255 // jb 4407 <_sk_load_f16_avx+0x37>
+ .byte 15,130,149,253,255,255 // jb 479f <_sk_load_f16_avx+0x37>
.byte 197,122,126,76,248,48 // vmovq 0x30(%rax,%rdi,8),%xmm9
- .byte 233,138,253,255,255 // jmpq 4407 <_sk_load_f16_avx+0x37>
+ .byte 233,138,253,255,255 // jmpq 479f <_sk_load_f16_avx+0x37>
.byte 197,241,87,201 // vxorpd %xmm1,%xmm1,%xmm1
.byte 197,233,87,210 // vxorpd %xmm2,%xmm2,%xmm2
- .byte 233,125,253,255,255 // jmpq 4407 <_sk_load_f16_avx+0x37>
+ .byte 233,125,253,255,255 // jmpq 479f <_sk_load_f16_avx+0x37>
.byte 197,241,87,201 // vxorpd %xmm1,%xmm1,%xmm1
- .byte 233,116,253,255,255 // jmpq 4407 <_sk_load_f16_avx+0x37>
+ .byte 233,116,253,255,255 // jmpq 479f <_sk_load_f16_avx+0x37>
HIDDEN _sk_gather_f16_avx
.globl _sk_gather_f16_avx
@@ -16925,7 +17286,7 @@ _sk_store_f16_avx:
.byte 197,113,98,202 // vpunpckldq %xmm2,%xmm1,%xmm9
.byte 197,113,106,194 // vpunpckhdq %xmm2,%xmm1,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,70 // jne 4bde <_sk_store_f16_avx+0x25f>
+ .byte 117,70 // jne 4f76 <_sk_store_f16_avx+0x25f>
.byte 196,65,120,17,28,248 // vmovups %xmm11,(%r8,%rdi,8)
.byte 196,65,120,17,84,248,16 // vmovups %xmm10,0x10(%r8,%rdi,8)
.byte 196,65,120,17,76,248,32 // vmovups %xmm9,0x20(%r8,%rdi,8)
@@ -16941,22 +17302,22 @@ _sk_store_f16_avx:
.byte 255,224 // jmpq *%rax
.byte 196,65,121,214,28,248 // vmovq %xmm11,(%r8,%rdi,8)
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,201 // je 4bb3 <_sk_store_f16_avx+0x234>
+ .byte 116,201 // je 4f4b <_sk_store_f16_avx+0x234>
.byte 196,65,121,23,92,248,8 // vmovhpd %xmm11,0x8(%r8,%rdi,8)
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,188 // jb 4bb3 <_sk_store_f16_avx+0x234>
+ .byte 114,188 // jb 4f4b <_sk_store_f16_avx+0x234>
.byte 196,65,121,214,84,248,16 // vmovq %xmm10,0x10(%r8,%rdi,8)
- .byte 116,179 // je 4bb3 <_sk_store_f16_avx+0x234>
+ .byte 116,179 // je 4f4b <_sk_store_f16_avx+0x234>
.byte 196,65,121,23,84,248,24 // vmovhpd %xmm10,0x18(%r8,%rdi,8)
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,166 // jb 4bb3 <_sk_store_f16_avx+0x234>
+ .byte 114,166 // jb 4f4b <_sk_store_f16_avx+0x234>
.byte 196,65,121,214,76,248,32 // vmovq %xmm9,0x20(%r8,%rdi,8)
- .byte 116,157 // je 4bb3 <_sk_store_f16_avx+0x234>
+ .byte 116,157 // je 4f4b <_sk_store_f16_avx+0x234>
.byte 196,65,121,23,76,248,40 // vmovhpd %xmm9,0x28(%r8,%rdi,8)
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,144 // jb 4bb3 <_sk_store_f16_avx+0x234>
+ .byte 114,144 // jb 4f4b <_sk_store_f16_avx+0x234>
.byte 196,65,121,214,68,248,48 // vmovq %xmm8,0x30(%r8,%rdi,8)
- .byte 235,135 // jmp 4bb3 <_sk_store_f16_avx+0x234>
+ .byte 235,135 // jmp 4f4b <_sk_store_f16_avx+0x234>
HIDDEN _sk_load_u16_be_avx
.globl _sk_load_u16_be_avx
@@ -16966,7 +17327,7 @@ _sk_load_u16_be_avx:
.byte 76,139,0 // mov (%rax),%r8
.byte 72,141,4,189,0,0,0,0 // lea 0x0(,%rdi,4),%rax
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,5,1,0,0 // jne 4d47 <_sk_load_u16_be_avx+0x11b>
+ .byte 15,133,5,1,0,0 // jne 50df <_sk_load_u16_be_avx+0x11b>
.byte 196,65,121,16,4,64 // vmovupd (%r8,%rax,2),%xmm8
.byte 196,193,121,16,84,64,16 // vmovupd 0x10(%r8,%rax,2),%xmm2
.byte 196,193,121,16,92,64,32 // vmovupd 0x20(%r8,%rax,2),%xmm3
@@ -17025,29 +17386,29 @@ _sk_load_u16_be_avx:
.byte 196,65,123,16,4,64 // vmovsd (%r8,%rax,2),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,85 // je 4dad <_sk_load_u16_be_avx+0x181>
+ .byte 116,85 // je 5145 <_sk_load_u16_be_avx+0x181>
.byte 196,65,57,22,68,64,8 // vmovhpd 0x8(%r8,%rax,2),%xmm8,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,72 // jb 4dad <_sk_load_u16_be_avx+0x181>
+ .byte 114,72 // jb 5145 <_sk_load_u16_be_avx+0x181>
.byte 196,193,123,16,84,64,16 // vmovsd 0x10(%r8,%rax,2),%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 116,72 // je 4dba <_sk_load_u16_be_avx+0x18e>
+ .byte 116,72 // je 5152 <_sk_load_u16_be_avx+0x18e>
.byte 196,193,105,22,84,64,24 // vmovhpd 0x18(%r8,%rax,2),%xmm2,%xmm2
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,59 // jb 4dba <_sk_load_u16_be_avx+0x18e>
+ .byte 114,59 // jb 5152 <_sk_load_u16_be_avx+0x18e>
.byte 196,193,123,16,92,64,32 // vmovsd 0x20(%r8,%rax,2),%xmm3
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 15,132,205,254,255,255 // je 4c5d <_sk_load_u16_be_avx+0x31>
+ .byte 15,132,205,254,255,255 // je 4ff5 <_sk_load_u16_be_avx+0x31>
.byte 196,193,97,22,92,64,40 // vmovhpd 0x28(%r8,%rax,2),%xmm3,%xmm3
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 15,130,188,254,255,255 // jb 4c5d <_sk_load_u16_be_avx+0x31>
+ .byte 15,130,188,254,255,255 // jb 4ff5 <_sk_load_u16_be_avx+0x31>
.byte 196,65,122,126,76,64,48 // vmovq 0x30(%r8,%rax,2),%xmm9
- .byte 233,176,254,255,255 // jmpq 4c5d <_sk_load_u16_be_avx+0x31>
+ .byte 233,176,254,255,255 // jmpq 4ff5 <_sk_load_u16_be_avx+0x31>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
.byte 197,233,87,210 // vxorpd %xmm2,%xmm2,%xmm2
- .byte 233,163,254,255,255 // jmpq 4c5d <_sk_load_u16_be_avx+0x31>
+ .byte 233,163,254,255,255 // jmpq 4ff5 <_sk_load_u16_be_avx+0x31>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
- .byte 233,154,254,255,255 // jmpq 4c5d <_sk_load_u16_be_avx+0x31>
+ .byte 233,154,254,255,255 // jmpq 4ff5 <_sk_load_u16_be_avx+0x31>
HIDDEN _sk_load_rgb_u16_be_avx
.globl _sk_load_rgb_u16_be_avx
@@ -17057,7 +17418,7 @@ _sk_load_rgb_u16_be_avx:
.byte 76,139,0 // mov (%rax),%r8
.byte 72,141,4,127 // lea (%rdi,%rdi,2),%rax
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,8,1,0,0 // jne 4edd <_sk_load_rgb_u16_be_avx+0x11a>
+ .byte 15,133,8,1,0,0 // jne 5275 <_sk_load_rgb_u16_be_avx+0x11a>
.byte 196,193,122,111,4,64 // vmovdqu (%r8,%rax,2),%xmm0
.byte 196,193,122,111,84,64,12 // vmovdqu 0xc(%r8,%rax,2),%xmm2
.byte 196,193,122,111,76,64,24 // vmovdqu 0x18(%r8,%rax,2),%xmm1
@@ -17116,36 +17477,36 @@ _sk_load_rgb_u16_be_avx:
.byte 196,193,121,110,4,64 // vmovd (%r8,%rax,2),%xmm0
.byte 196,193,121,196,68,64,4,2 // vpinsrw $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 117,5 // jne 4ef6 <_sk_load_rgb_u16_be_avx+0x133>
- .byte 233,19,255,255,255 // jmpq 4e09 <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 117,5 // jne 528e <_sk_load_rgb_u16_be_avx+0x133>
+ .byte 233,19,255,255,255 // jmpq 51a1 <_sk_load_rgb_u16_be_avx+0x46>
.byte 196,193,121,110,76,64,6 // vmovd 0x6(%r8,%rax,2),%xmm1
.byte 196,65,113,196,68,64,10,2 // vpinsrw $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,26 // jb 4f25 <_sk_load_rgb_u16_be_avx+0x162>
+ .byte 114,26 // jb 52bd <_sk_load_rgb_u16_be_avx+0x162>
.byte 196,193,121,110,76,64,12 // vmovd 0xc(%r8,%rax,2),%xmm1
.byte 196,193,113,196,84,64,16,2 // vpinsrw $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 117,10 // jne 4f2a <_sk_load_rgb_u16_be_avx+0x167>
- .byte 233,228,254,255,255 // jmpq 4e09 <_sk_load_rgb_u16_be_avx+0x46>
- .byte 233,223,254,255,255 // jmpq 4e09 <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 117,10 // jne 52c2 <_sk_load_rgb_u16_be_avx+0x167>
+ .byte 233,228,254,255,255 // jmpq 51a1 <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 233,223,254,255,255 // jmpq 51a1 <_sk_load_rgb_u16_be_avx+0x46>
.byte 196,193,121,110,76,64,18 // vmovd 0x12(%r8,%rax,2),%xmm1
.byte 196,65,113,196,76,64,22,2 // vpinsrw $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,26 // jb 4f59 <_sk_load_rgb_u16_be_avx+0x196>
+ .byte 114,26 // jb 52f1 <_sk_load_rgb_u16_be_avx+0x196>
.byte 196,193,121,110,76,64,24 // vmovd 0x18(%r8,%rax,2),%xmm1
.byte 196,193,113,196,76,64,28,2 // vpinsrw $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 117,10 // jne 4f5e <_sk_load_rgb_u16_be_avx+0x19b>
- .byte 233,176,254,255,255 // jmpq 4e09 <_sk_load_rgb_u16_be_avx+0x46>
- .byte 233,171,254,255,255 // jmpq 4e09 <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 117,10 // jne 52f6 <_sk_load_rgb_u16_be_avx+0x19b>
+ .byte 233,176,254,255,255 // jmpq 51a1 <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 233,171,254,255,255 // jmpq 51a1 <_sk_load_rgb_u16_be_avx+0x46>
.byte 196,193,121,110,92,64,30 // vmovd 0x1e(%r8,%rax,2),%xmm3
.byte 196,65,97,196,92,64,34,2 // vpinsrw $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,20 // jb 4f87 <_sk_load_rgb_u16_be_avx+0x1c4>
+ .byte 114,20 // jb 531f <_sk_load_rgb_u16_be_avx+0x1c4>
.byte 196,193,121,110,92,64,36 // vmovd 0x24(%r8,%rax,2),%xmm3
.byte 196,193,97,196,92,64,40,2 // vpinsrw $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3
- .byte 233,130,254,255,255 // jmpq 4e09 <_sk_load_rgb_u16_be_avx+0x46>
- .byte 233,125,254,255,255 // jmpq 4e09 <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 233,130,254,255,255 // jmpq 51a1 <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 233,125,254,255,255 // jmpq 51a1 <_sk_load_rgb_u16_be_avx+0x46>
HIDDEN _sk_store_u16_be_avx
.globl _sk_store_u16_be_avx
@@ -17195,7 +17556,7 @@ _sk_store_u16_be_avx:
.byte 196,65,17,98,200 // vpunpckldq %xmm8,%xmm13,%xmm9
.byte 196,65,17,106,192 // vpunpckhdq %xmm8,%xmm13,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,31 // jne 508e <_sk_store_u16_be_avx+0x102>
+ .byte 117,31 // jne 5426 <_sk_store_u16_be_avx+0x102>
.byte 196,1,120,17,28,72 // vmovups %xmm11,(%r8,%r9,2)
.byte 196,1,120,17,84,72,16 // vmovups %xmm10,0x10(%r8,%r9,2)
.byte 196,1,120,17,76,72,32 // vmovups %xmm9,0x20(%r8,%r9,2)
@@ -17204,22 +17565,22 @@ _sk_store_u16_be_avx:
.byte 255,224 // jmpq *%rax
.byte 196,1,121,214,28,72 // vmovq %xmm11,(%r8,%r9,2)
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,240 // je 508a <_sk_store_u16_be_avx+0xfe>
+ .byte 116,240 // je 5422 <_sk_store_u16_be_avx+0xfe>
.byte 196,1,121,23,92,72,8 // vmovhpd %xmm11,0x8(%r8,%r9,2)
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,227 // jb 508a <_sk_store_u16_be_avx+0xfe>
+ .byte 114,227 // jb 5422 <_sk_store_u16_be_avx+0xfe>
.byte 196,1,121,214,84,72,16 // vmovq %xmm10,0x10(%r8,%r9,2)
- .byte 116,218 // je 508a <_sk_store_u16_be_avx+0xfe>
+ .byte 116,218 // je 5422 <_sk_store_u16_be_avx+0xfe>
.byte 196,1,121,23,84,72,24 // vmovhpd %xmm10,0x18(%r8,%r9,2)
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,205 // jb 508a <_sk_store_u16_be_avx+0xfe>
+ .byte 114,205 // jb 5422 <_sk_store_u16_be_avx+0xfe>
.byte 196,1,121,214,76,72,32 // vmovq %xmm9,0x20(%r8,%r9,2)
- .byte 116,196 // je 508a <_sk_store_u16_be_avx+0xfe>
+ .byte 116,196 // je 5422 <_sk_store_u16_be_avx+0xfe>
.byte 196,1,121,23,76,72,40 // vmovhpd %xmm9,0x28(%r8,%r9,2)
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,183 // jb 508a <_sk_store_u16_be_avx+0xfe>
+ .byte 114,183 // jb 5422 <_sk_store_u16_be_avx+0xfe>
.byte 196,1,121,214,68,72,48 // vmovq %xmm8,0x30(%r8,%r9,2)
- .byte 235,174 // jmp 508a <_sk_store_u16_be_avx+0xfe>
+ .byte 235,174 // jmp 5422 <_sk_store_u16_be_avx+0xfe>
HIDDEN _sk_load_f32_avx
.globl _sk_load_f32_avx
@@ -17227,10 +17588,10 @@ FUNCTION(_sk_load_f32_avx)
_sk_load_f32_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 119,110 // ja 5152 <_sk_load_f32_avx+0x76>
+ .byte 119,110 // ja 54ea <_sk_load_f32_avx+0x76>
.byte 76,139,0 // mov (%rax),%r8
.byte 76,141,12,189,0,0,0,0 // lea 0x0(,%rdi,4),%r9
- .byte 76,141,21,134,0,0,0 // lea 0x86(%rip),%r10 # 517c <_sk_load_f32_avx+0xa0>
+ .byte 76,141,21,134,0,0,0 // lea 0x86(%rip),%r10 # 5514 <_sk_load_f32_avx+0xa0>
.byte 73,99,4,138 // movslq (%r10,%rcx,4),%rax
.byte 76,1,208 // add %r10,%rax
.byte 255,224 // jmpq *%rax
@@ -17289,7 +17650,7 @@ _sk_store_f32_avx:
.byte 196,65,37,20,196 // vunpcklpd %ymm12,%ymm11,%ymm8
.byte 196,65,37,21,220 // vunpckhpd %ymm12,%ymm11,%ymm11
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,55 // jne 5209 <_sk_store_f32_avx+0x6d>
+ .byte 117,55 // jne 55a1 <_sk_store_f32_avx+0x6d>
.byte 196,67,45,24,225,1 // vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
.byte 196,67,61,24,235,1 // vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
.byte 196,67,45,6,201,49 // vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
@@ -17302,22 +17663,22 @@ _sk_store_f32_avx:
.byte 255,224 // jmpq *%rax
.byte 196,65,121,17,20,128 // vmovupd %xmm10,(%r8,%rax,4)
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,240 // je 5205 <_sk_store_f32_avx+0x69>
+ .byte 116,240 // je 559d <_sk_store_f32_avx+0x69>
.byte 196,65,121,17,76,128,16 // vmovupd %xmm9,0x10(%r8,%rax,4)
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,227 // jb 5205 <_sk_store_f32_avx+0x69>
+ .byte 114,227 // jb 559d <_sk_store_f32_avx+0x69>
.byte 196,65,121,17,68,128,32 // vmovupd %xmm8,0x20(%r8,%rax,4)
- .byte 116,218 // je 5205 <_sk_store_f32_avx+0x69>
+ .byte 116,218 // je 559d <_sk_store_f32_avx+0x69>
.byte 196,65,121,17,92,128,48 // vmovupd %xmm11,0x30(%r8,%rax,4)
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,205 // jb 5205 <_sk_store_f32_avx+0x69>
+ .byte 114,205 // jb 559d <_sk_store_f32_avx+0x69>
.byte 196,67,125,25,84,128,64,1 // vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4)
- .byte 116,195 // je 5205 <_sk_store_f32_avx+0x69>
+ .byte 116,195 // je 559d <_sk_store_f32_avx+0x69>
.byte 196,67,125,25,76,128,80,1 // vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4)
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,181 // jb 5205 <_sk_store_f32_avx+0x69>
+ .byte 114,181 // jb 559d <_sk_store_f32_avx+0x69>
.byte 196,67,125,25,68,128,96,1 // vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4)
- .byte 235,171 // jmp 5205 <_sk_store_f32_avx+0x69>
+ .byte 235,171 // jmp 559d <_sk_store_f32_avx+0x69>
HIDDEN _sk_clamp_x_avx
.globl _sk_clamp_x_avx
@@ -17645,7 +18006,7 @@ _sk_linear_gradient_avx:
.byte 196,226,125,24,88,28 // vbroadcastss 0x1c(%rax),%ymm3
.byte 76,139,0 // mov (%rax),%r8
.byte 77,133,192 // test %r8,%r8
- .byte 15,132,146,0,0,0 // je 57bd <_sk_linear_gradient_avx+0xb8>
+ .byte 15,132,146,0,0,0 // je 5b55 <_sk_linear_gradient_avx+0xb8>
.byte 72,139,64,8 // mov 0x8(%rax),%rax
.byte 72,131,192,32 // add $0x20,%rax
.byte 196,65,28,87,228 // vxorps %ymm12,%ymm12,%ymm12
@@ -17672,8 +18033,8 @@ _sk_linear_gradient_avx:
.byte 196,227,13,74,219,208 // vblendvps %ymm13,%ymm3,%ymm14,%ymm3
.byte 72,131,192,36 // add $0x24,%rax
.byte 73,255,200 // dec %r8
- .byte 117,140 // jne 5747 <_sk_linear_gradient_avx+0x42>
- .byte 235,20 // jmp 57d1 <_sk_linear_gradient_avx+0xcc>
+ .byte 117,140 // jne 5adf <_sk_linear_gradient_avx+0x42>
+ .byte 235,20 // jmp 5b69 <_sk_linear_gradient_avx+0xcc>
.byte 196,65,36,87,219 // vxorps %ymm11,%ymm11,%ymm11
.byte 196,65,44,87,210 // vxorps %ymm10,%ymm10,%ymm10
.byte 196,65,52,87,201 // vxorps %ymm9,%ymm9,%ymm9
@@ -19447,91 +19808,274 @@ HIDDEN _sk_from_2dot2_sse41
.globl _sk_from_2dot2_sse41
FUNCTION(_sk_from_2dot2_sse41)
_sk_from_2dot2_sse41:
- .byte 68,15,40,192 // movaps %xmm0,%xmm8
- .byte 65,15,82,192 // rsqrtps %xmm8,%xmm0
- .byte 15,82,192 // rsqrtps %xmm0,%xmm0
- .byte 15,82,192 // rsqrtps %xmm0,%xmm0
- .byte 68,15,82,200 // rsqrtps %xmm0,%xmm9
- .byte 65,15,82,193 // rsqrtps %xmm9,%xmm0
- .byte 68,15,82,208 // rsqrtps %xmm0,%xmm10
- .byte 69,15,89,192 // mulps %xmm8,%xmm8
+ .byte 15,41,124,36,232 // movaps %xmm7,-0x18(%rsp)
+ .byte 15,41,116,36,216 // movaps %xmm6,-0x28(%rsp)
+ .byte 15,41,108,36,200 // movaps %xmm5,-0x38(%rsp)
+ .byte 15,41,100,36,184 // movaps %xmm4,-0x48(%rsp)
+ .byte 15,41,92,36,168 // movaps %xmm3,-0x58(%rsp)
+ .byte 15,41,84,36,152 // movaps %xmm2,-0x68(%rsp)
+ .byte 15,40,209 // movaps %xmm1,%xmm2
+ .byte 184,205,204,12,64 // mov $0x400ccccd,%eax
+ .byte 15,91,216 // cvtdq2ps %xmm0,%xmm3
+ .byte 185,0,0,0,52 // mov $0x34000000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 65,15,89,218 // mulps %xmm10,%xmm3
+ .byte 185,255,255,127,0 // mov $0x7fffff,%ecx
+ .byte 102,15,110,201 // movd %ecx,%xmm1
+ .byte 102,68,15,112,193,0 // pshufd $0x0,%xmm1,%xmm8
+ .byte 65,15,84,192 // andps %xmm8,%xmm0
+ .byte 185,0,0,0,63 // mov $0x3f000000,%ecx
+ .byte 102,15,110,201 // movd %ecx,%xmm1
+ .byte 102,15,112,201,0 // pshufd $0x0,%xmm1,%xmm1
+ .byte 15,86,193 // orps %xmm1,%xmm0
+ .byte 15,40,241 // movaps %xmm1,%xmm6
+ .byte 15,41,116,36,136 // movaps %xmm6,-0x78(%rsp)
+ .byte 185,119,115,248,66 // mov $0x42f87377,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 65,15,92,219 // subps %xmm11,%xmm3
+ .byte 185,117,191,191,63 // mov $0x3fbfbf75,%ecx
+ .byte 102,68,15,110,225 // movd %ecx,%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 15,40,200 // movaps %xmm0,%xmm1
+ .byte 65,15,89,204 // mulps %xmm12,%xmm1
+ .byte 15,92,217 // subps %xmm1,%xmm3
+ .byte 185,163,233,220,63 // mov $0x3fdce9a3,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 185,249,68,180,62 // mov $0x3eb444f9,%ecx
+ .byte 102,68,15,110,241 // movd %ecx,%xmm14
+ .byte 69,15,198,246,0 // shufps $0x0,%xmm14,%xmm14
+ .byte 65,15,88,198 // addps %xmm14,%xmm0
+ .byte 65,15,40,205 // movaps %xmm13,%xmm1
+ .byte 15,94,200 // divps %xmm0,%xmm1
+ .byte 15,92,217 // subps %xmm1,%xmm3
+ .byte 102,68,15,110,248 // movd %eax,%xmm15
+ .byte 69,15,198,255,0 // shufps $0x0,%xmm15,%xmm15
+ .byte 65,15,89,223 // mulps %xmm15,%xmm3
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 185,81,140,242,66 // mov $0x42f28c51,%ecx
+ .byte 102,15,110,225 // movd %ecx,%xmm4
+ .byte 15,198,228,0 // shufps $0x0,%xmm4,%xmm4
+ .byte 15,40,204 // movaps %xmm4,%xmm1
+ .byte 15,88,203 // addps %xmm3,%xmm1
+ .byte 102,15,58,8,195,1 // roundps $0x1,%xmm3,%xmm0
+ .byte 15,92,216 // subps %xmm0,%xmm3
+ .byte 185,141,188,190,63 // mov $0x3fbebc8d,%ecx
+ .byte 102,68,15,110,201 // movd %ecx,%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
.byte 65,15,40,193 // movaps %xmm9,%xmm0
- .byte 15,89,192 // mulps %xmm0,%xmm0
- .byte 65,15,89,193 // mulps %xmm9,%xmm0
- .byte 65,15,89,192 // mulps %xmm8,%xmm0
- .byte 65,15,89,194 // mulps %xmm10,%xmm0
- .byte 69,15,87,210 // xorps %xmm10,%xmm10
- .byte 65,15,95,194 // maxps %xmm10,%xmm0
- .byte 68,15,82,193 // rsqrtps %xmm1,%xmm8
- .byte 69,15,82,192 // rsqrtps %xmm8,%xmm8
- .byte 69,15,82,192 // rsqrtps %xmm8,%xmm8
- .byte 69,15,82,200 // rsqrtps %xmm8,%xmm9
- .byte 69,15,82,193 // rsqrtps %xmm9,%xmm8
- .byte 69,15,82,216 // rsqrtps %xmm8,%xmm11
- .byte 15,89,201 // mulps %xmm1,%xmm1
- .byte 69,15,40,193 // movaps %xmm9,%xmm8
- .byte 69,15,89,192 // mulps %xmm8,%xmm8
- .byte 69,15,89,193 // mulps %xmm9,%xmm8
- .byte 68,15,89,193 // mulps %xmm1,%xmm8
- .byte 69,15,89,195 // mulps %xmm11,%xmm8
- .byte 69,15,95,194 // maxps %xmm10,%xmm8
- .byte 15,82,202 // rsqrtps %xmm2,%xmm1
- .byte 15,82,201 // rsqrtps %xmm1,%xmm1
- .byte 15,82,201 // rsqrtps %xmm1,%xmm1
- .byte 68,15,82,217 // rsqrtps %xmm1,%xmm11
- .byte 65,15,82,203 // rsqrtps %xmm11,%xmm1
- .byte 15,82,201 // rsqrtps %xmm1,%xmm1
- .byte 15,89,210 // mulps %xmm2,%xmm2
- .byte 69,15,40,203 // movaps %xmm11,%xmm9
- .byte 69,15,89,201 // mulps %xmm9,%xmm9
- .byte 69,15,89,203 // mulps %xmm11,%xmm9
- .byte 68,15,89,202 // mulps %xmm2,%xmm9
+ .byte 15,89,195 // mulps %xmm3,%xmm0
+ .byte 15,92,200 // subps %xmm0,%xmm1
+ .byte 185,254,210,221,65 // mov $0x41ddd2fe,%ecx
+ .byte 184,248,245,154,64 // mov $0x409af5f8,%eax
+ .byte 102,15,110,248 // movd %eax,%xmm7
+ .byte 15,198,255,0 // shufps $0x0,%xmm7,%xmm7
+ .byte 15,40,239 // movaps %xmm7,%xmm5
+ .byte 15,92,235 // subps %xmm3,%xmm5
+ .byte 102,15,110,193 // movd %ecx,%xmm0
+ .byte 15,198,192,0 // shufps $0x0,%xmm0,%xmm0
+ .byte 15,40,216 // movaps %xmm0,%xmm3
+ .byte 15,94,221 // divps %xmm5,%xmm3
+ .byte 15,88,217 // addps %xmm1,%xmm3
+ .byte 15,91,202 // cvtdq2ps %xmm2,%xmm1
+ .byte 65,15,89,202 // mulps %xmm10,%xmm1
+ .byte 65,15,84,208 // andps %xmm8,%xmm2
+ .byte 15,86,214 // orps %xmm6,%xmm2
+ .byte 65,15,92,203 // subps %xmm11,%xmm1
+ .byte 15,40,234 // movaps %xmm2,%xmm5
+ .byte 65,15,89,236 // mulps %xmm12,%xmm5
+ .byte 15,92,205 // subps %xmm5,%xmm1
+ .byte 65,15,88,214 // addps %xmm14,%xmm2
+ .byte 65,15,40,237 // movaps %xmm13,%xmm5
+ .byte 15,94,234 // divps %xmm2,%xmm5
+ .byte 15,92,205 // subps %xmm5,%xmm1
+ .byte 65,15,89,207 // mulps %xmm15,%xmm1
+ .byte 15,40,236 // movaps %xmm4,%xmm5
+ .byte 15,88,233 // addps %xmm1,%xmm5
+ .byte 102,15,58,8,209,1 // roundps $0x1,%xmm1,%xmm2
+ .byte 15,92,202 // subps %xmm2,%xmm1
+ .byte 65,15,40,209 // movaps %xmm9,%xmm2
+ .byte 15,89,209 // mulps %xmm1,%xmm2
+ .byte 15,92,234 // subps %xmm2,%xmm5
+ .byte 15,40,247 // movaps %xmm7,%xmm6
+ .byte 15,92,241 // subps %xmm1,%xmm6
+ .byte 15,40,208 // movaps %xmm0,%xmm2
+ .byte 15,94,214 // divps %xmm6,%xmm2
+ .byte 15,88,213 // addps %xmm5,%xmm2
+ .byte 15,40,108,36,152 // movaps -0x68(%rsp),%xmm5
+ .byte 15,91,205 // cvtdq2ps %xmm5,%xmm1
+ .byte 65,15,89,202 // mulps %xmm10,%xmm1
+ .byte 68,15,84,197 // andps %xmm5,%xmm8
+ .byte 68,15,86,68,36,136 // orps -0x78(%rsp),%xmm8
+ .byte 65,15,92,203 // subps %xmm11,%xmm1
+ .byte 69,15,89,224 // mulps %xmm8,%xmm12
+ .byte 65,15,92,204 // subps %xmm12,%xmm1
+ .byte 69,15,88,198 // addps %xmm14,%xmm8
+ .byte 69,15,94,232 // divps %xmm8,%xmm13
+ .byte 65,15,92,205 // subps %xmm13,%xmm1
+ .byte 65,15,89,207 // mulps %xmm15,%xmm1
+ .byte 102,15,58,8,233,1 // roundps $0x1,%xmm1,%xmm5
+ .byte 15,88,225 // addps %xmm1,%xmm4
+ .byte 15,92,205 // subps %xmm5,%xmm1
.byte 68,15,89,201 // mulps %xmm1,%xmm9
- .byte 69,15,95,202 // maxps %xmm10,%xmm9
+ .byte 65,15,92,225 // subps %xmm9,%xmm4
+ .byte 15,92,249 // subps %xmm1,%xmm7
+ .byte 15,94,199 // divps %xmm7,%xmm0
+ .byte 15,88,196 // addps %xmm4,%xmm0
+ .byte 102,65,15,110,200 // movd %r8d,%xmm1
+ .byte 15,198,201,0 // shufps $0x0,%xmm1,%xmm1
+ .byte 15,89,217 // mulps %xmm1,%xmm3
+ .byte 15,89,209 // mulps %xmm1,%xmm2
+ .byte 15,89,193 // mulps %xmm1,%xmm0
+ .byte 102,15,91,219 // cvtps2dq %xmm3,%xmm3
+ .byte 102,15,91,202 // cvtps2dq %xmm2,%xmm1
+ .byte 102,15,91,208 // cvtps2dq %xmm0,%xmm2
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 65,15,40,200 // movaps %xmm8,%xmm1
- .byte 65,15,40,209 // movaps %xmm9,%xmm2
+ .byte 102,15,40,195 // movapd %xmm3,%xmm0
+ .byte 15,40,92,36,168 // movaps -0x58(%rsp),%xmm3
+ .byte 15,40,100,36,184 // movaps -0x48(%rsp),%xmm4
+ .byte 15,40,108,36,200 // movaps -0x38(%rsp),%xmm5
+ .byte 15,40,116,36,216 // movaps -0x28(%rsp),%xmm6
+ .byte 15,40,124,36,232 // movaps -0x18(%rsp),%xmm7
.byte 255,224 // jmpq *%rax
HIDDEN _sk_to_2dot2_sse41
.globl _sk_to_2dot2_sse41
FUNCTION(_sk_to_2dot2_sse41)
_sk_to_2dot2_sse41:
- .byte 68,15,82,192 // rsqrtps %xmm0,%xmm8
- .byte 65,15,82,192 // rsqrtps %xmm8,%xmm0
- .byte 15,82,192 // rsqrtps %xmm0,%xmm0
- .byte 15,82,192 // rsqrtps %xmm0,%xmm0
- .byte 15,82,192 // rsqrtps %xmm0,%xmm0
- .byte 68,15,82,200 // rsqrtps %xmm0,%xmm9
- .byte 69,15,83,192 // rcpps %xmm8,%xmm8
- .byte 68,15,89,192 // mulps %xmm0,%xmm8
- .byte 65,15,83,193 // rcpps %xmm9,%xmm0
- .byte 65,15,89,192 // mulps %xmm8,%xmm0
- .byte 69,15,87,192 // xorps %xmm8,%xmm8
- .byte 65,15,95,192 // maxps %xmm8,%xmm0
- .byte 68,15,82,201 // rsqrtps %xmm1,%xmm9
- .byte 65,15,82,201 // rsqrtps %xmm9,%xmm1
- .byte 15,82,201 // rsqrtps %xmm1,%xmm1
- .byte 15,82,201 // rsqrtps %xmm1,%xmm1
- .byte 15,82,201 // rsqrtps %xmm1,%xmm1
- .byte 68,15,82,209 // rsqrtps %xmm1,%xmm10
- .byte 69,15,83,201 // rcpps %xmm9,%xmm9
+ .byte 15,41,124,36,232 // movaps %xmm7,-0x18(%rsp)
+ .byte 15,41,116,36,216 // movaps %xmm6,-0x28(%rsp)
+ .byte 15,41,108,36,200 // movaps %xmm5,-0x38(%rsp)
+ .byte 15,41,100,36,184 // movaps %xmm4,-0x48(%rsp)
+ .byte 15,41,92,36,168 // movaps %xmm3,-0x58(%rsp)
+ .byte 15,41,84,36,152 // movaps %xmm2,-0x68(%rsp)
+ .byte 15,40,209 // movaps %xmm1,%xmm2
+ .byte 184,46,186,232,62 // mov $0x3ee8ba2e,%eax
+ .byte 15,91,216 // cvtdq2ps %xmm0,%xmm3
+ .byte 185,0,0,0,52 // mov $0x34000000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 65,15,89,218 // mulps %xmm10,%xmm3
+ .byte 185,255,255,127,0 // mov $0x7fffff,%ecx
+ .byte 102,15,110,201 // movd %ecx,%xmm1
+ .byte 102,68,15,112,193,0 // pshufd $0x0,%xmm1,%xmm8
+ .byte 65,15,84,192 // andps %xmm8,%xmm0
+ .byte 185,0,0,0,63 // mov $0x3f000000,%ecx
+ .byte 102,15,110,201 // movd %ecx,%xmm1
+ .byte 102,15,112,201,0 // pshufd $0x0,%xmm1,%xmm1
+ .byte 15,86,193 // orps %xmm1,%xmm0
+ .byte 15,40,241 // movaps %xmm1,%xmm6
+ .byte 15,41,116,36,136 // movaps %xmm6,-0x78(%rsp)
+ .byte 185,119,115,248,66 // mov $0x42f87377,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 65,15,92,219 // subps %xmm11,%xmm3
+ .byte 185,117,191,191,63 // mov $0x3fbfbf75,%ecx
+ .byte 102,68,15,110,225 // movd %ecx,%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 15,40,200 // movaps %xmm0,%xmm1
+ .byte 65,15,89,204 // mulps %xmm12,%xmm1
+ .byte 15,92,217 // subps %xmm1,%xmm3
+ .byte 185,163,233,220,63 // mov $0x3fdce9a3,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 185,249,68,180,62 // mov $0x3eb444f9,%ecx
+ .byte 102,68,15,110,241 // movd %ecx,%xmm14
+ .byte 69,15,198,246,0 // shufps $0x0,%xmm14,%xmm14
+ .byte 65,15,88,198 // addps %xmm14,%xmm0
+ .byte 65,15,40,205 // movaps %xmm13,%xmm1
+ .byte 15,94,200 // divps %xmm0,%xmm1
+ .byte 15,92,217 // subps %xmm1,%xmm3
+ .byte 102,68,15,110,248 // movd %eax,%xmm15
+ .byte 69,15,198,255,0 // shufps $0x0,%xmm15,%xmm15
+ .byte 65,15,89,223 // mulps %xmm15,%xmm3
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 185,81,140,242,66 // mov $0x42f28c51,%ecx
+ .byte 102,15,110,225 // movd %ecx,%xmm4
+ .byte 15,198,228,0 // shufps $0x0,%xmm4,%xmm4
+ .byte 15,40,204 // movaps %xmm4,%xmm1
+ .byte 15,88,203 // addps %xmm3,%xmm1
+ .byte 102,15,58,8,195,1 // roundps $0x1,%xmm3,%xmm0
+ .byte 15,92,216 // subps %xmm0,%xmm3
+ .byte 185,141,188,190,63 // mov $0x3fbebc8d,%ecx
+ .byte 102,68,15,110,201 // movd %ecx,%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 65,15,40,193 // movaps %xmm9,%xmm0
+ .byte 15,89,195 // mulps %xmm3,%xmm0
+ .byte 15,92,200 // subps %xmm0,%xmm1
+ .byte 185,254,210,221,65 // mov $0x41ddd2fe,%ecx
+ .byte 184,248,245,154,64 // mov $0x409af5f8,%eax
+ .byte 102,15,110,248 // movd %eax,%xmm7
+ .byte 15,198,255,0 // shufps $0x0,%xmm7,%xmm7
+ .byte 15,40,239 // movaps %xmm7,%xmm5
+ .byte 15,92,235 // subps %xmm3,%xmm5
+ .byte 102,15,110,193 // movd %ecx,%xmm0
+ .byte 15,198,192,0 // shufps $0x0,%xmm0,%xmm0
+ .byte 15,40,216 // movaps %xmm0,%xmm3
+ .byte 15,94,221 // divps %xmm5,%xmm3
+ .byte 15,88,217 // addps %xmm1,%xmm3
+ .byte 15,91,202 // cvtdq2ps %xmm2,%xmm1
+ .byte 65,15,89,202 // mulps %xmm10,%xmm1
+ .byte 65,15,84,208 // andps %xmm8,%xmm2
+ .byte 15,86,214 // orps %xmm6,%xmm2
+ .byte 65,15,92,203 // subps %xmm11,%xmm1
+ .byte 15,40,234 // movaps %xmm2,%xmm5
+ .byte 65,15,89,236 // mulps %xmm12,%xmm5
+ .byte 15,92,205 // subps %xmm5,%xmm1
+ .byte 65,15,88,214 // addps %xmm14,%xmm2
+ .byte 65,15,40,237 // movaps %xmm13,%xmm5
+ .byte 15,94,234 // divps %xmm2,%xmm5
+ .byte 15,92,205 // subps %xmm5,%xmm1
+ .byte 65,15,89,207 // mulps %xmm15,%xmm1
+ .byte 15,40,236 // movaps %xmm4,%xmm5
+ .byte 15,88,233 // addps %xmm1,%xmm5
+ .byte 102,15,58,8,209,1 // roundps $0x1,%xmm1,%xmm2
+ .byte 15,92,202 // subps %xmm2,%xmm1
+ .byte 65,15,40,209 // movaps %xmm9,%xmm2
+ .byte 15,89,209 // mulps %xmm1,%xmm2
+ .byte 15,92,234 // subps %xmm2,%xmm5
+ .byte 15,40,247 // movaps %xmm7,%xmm6
+ .byte 15,92,241 // subps %xmm1,%xmm6
+ .byte 15,40,208 // movaps %xmm0,%xmm2
+ .byte 15,94,214 // divps %xmm6,%xmm2
+ .byte 15,88,213 // addps %xmm5,%xmm2
+ .byte 15,40,108,36,152 // movaps -0x68(%rsp),%xmm5
+ .byte 15,91,205 // cvtdq2ps %xmm5,%xmm1
+ .byte 65,15,89,202 // mulps %xmm10,%xmm1
+ .byte 68,15,84,197 // andps %xmm5,%xmm8
+ .byte 68,15,86,68,36,136 // orps -0x78(%rsp),%xmm8
+ .byte 65,15,92,203 // subps %xmm11,%xmm1
+ .byte 69,15,89,224 // mulps %xmm8,%xmm12
+ .byte 65,15,92,204 // subps %xmm12,%xmm1
+ .byte 69,15,88,198 // addps %xmm14,%xmm8
+ .byte 69,15,94,232 // divps %xmm8,%xmm13
+ .byte 65,15,92,205 // subps %xmm13,%xmm1
+ .byte 65,15,89,207 // mulps %xmm15,%xmm1
+ .byte 102,15,58,8,233,1 // roundps $0x1,%xmm1,%xmm5
+ .byte 15,88,225 // addps %xmm1,%xmm4
+ .byte 15,92,205 // subps %xmm5,%xmm1
.byte 68,15,89,201 // mulps %xmm1,%xmm9
- .byte 65,15,83,202 // rcpps %xmm10,%xmm1
- .byte 65,15,89,201 // mulps %xmm9,%xmm1
- .byte 65,15,95,200 // maxps %xmm8,%xmm1
- .byte 68,15,82,202 // rsqrtps %xmm2,%xmm9
- .byte 65,15,82,209 // rsqrtps %xmm9,%xmm2
- .byte 15,82,210 // rsqrtps %xmm2,%xmm2
- .byte 15,82,210 // rsqrtps %xmm2,%xmm2
- .byte 15,82,210 // rsqrtps %xmm2,%xmm2
- .byte 68,15,82,210 // rsqrtps %xmm2,%xmm10
- .byte 69,15,83,201 // rcpps %xmm9,%xmm9
- .byte 68,15,89,202 // mulps %xmm2,%xmm9
- .byte 65,15,83,210 // rcpps %xmm10,%xmm2
- .byte 65,15,89,209 // mulps %xmm9,%xmm2
- .byte 65,15,95,208 // maxps %xmm8,%xmm2
+ .byte 65,15,92,225 // subps %xmm9,%xmm4
+ .byte 15,92,249 // subps %xmm1,%xmm7
+ .byte 15,94,199 // divps %xmm7,%xmm0
+ .byte 15,88,196 // addps %xmm4,%xmm0
+ .byte 102,65,15,110,200 // movd %r8d,%xmm1
+ .byte 15,198,201,0 // shufps $0x0,%xmm1,%xmm1
+ .byte 15,89,217 // mulps %xmm1,%xmm3
+ .byte 15,89,209 // mulps %xmm1,%xmm2
+ .byte 15,89,193 // mulps %xmm1,%xmm0
+ .byte 102,15,91,219 // cvtps2dq %xmm3,%xmm3
+ .byte 102,15,91,202 // cvtps2dq %xmm2,%xmm1
+ .byte 102,15,91,208 // cvtps2dq %xmm0,%xmm2
.byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 102,15,40,195 // movapd %xmm3,%xmm0
+ .byte 15,40,92,36,168 // movaps -0x58(%rsp),%xmm3
+ .byte 15,40,100,36,184 // movaps -0x48(%rsp),%xmm4
+ .byte 15,40,108,36,200 // movaps -0x38(%rsp),%xmm5
+ .byte 15,40,116,36,216 // movaps -0x28(%rsp),%xmm6
+ .byte 15,40,124,36,232 // movaps -0x18(%rsp),%xmm7
.byte 255,224 // jmpq *%rax
HIDDEN _sk_rgb_to_hsl_sse41
@@ -20993,9 +21537,9 @@ _sk_gather_i8_sse41:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 73,137,192 // mov %rax,%r8
.byte 77,133,192 // test %r8,%r8
- .byte 116,5 // je 27eb <_sk_gather_i8_sse41+0xf>
+ .byte 116,5 // je 2aed <_sk_gather_i8_sse41+0xf>
.byte 76,137,192 // mov %r8,%rax
- .byte 235,2 // jmp 27ed <_sk_gather_i8_sse41+0x11>
+ .byte 235,2 // jmp 2aef <_sk_gather_i8_sse41+0x11>
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 243,15,91,201 // cvttps2dq %xmm1,%xmm1
@@ -22228,7 +22772,7 @@ _sk_linear_gradient_sse41:
.byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
.byte 72,139,8 // mov (%rax),%rcx
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,132,254,0,0,0 // je 3bd6 <_sk_linear_gradient_sse41+0x138>
+ .byte 15,132,254,0,0,0 // je 3ed8 <_sk_linear_gradient_sse41+0x138>
.byte 15,41,100,36,168 // movaps %xmm4,-0x58(%rsp)
.byte 15,41,108,36,184 // movaps %xmm5,-0x48(%rsp)
.byte 15,41,116,36,200 // movaps %xmm6,-0x38(%rsp)
@@ -22278,12 +22822,12 @@ _sk_linear_gradient_sse41:
.byte 15,40,196 // movaps %xmm4,%xmm0
.byte 72,131,192,36 // add $0x24,%rax
.byte 72,255,201 // dec %rcx
- .byte 15,133,65,255,255,255 // jne 3b01 <_sk_linear_gradient_sse41+0x63>
+ .byte 15,133,65,255,255,255 // jne 3e03 <_sk_linear_gradient_sse41+0x63>
.byte 15,40,124,36,216 // movaps -0x28(%rsp),%xmm7
.byte 15,40,116,36,200 // movaps -0x38(%rsp),%xmm6
.byte 15,40,108,36,184 // movaps -0x48(%rsp),%xmm5
.byte 15,40,100,36,168 // movaps -0x58(%rsp),%xmm4
- .byte 235,13 // jmp 3be3 <_sk_linear_gradient_sse41+0x145>
+ .byte 235,13 // jmp 3ee5 <_sk_linear_gradient_sse41+0x145>
.byte 15,87,201 // xorps %xmm1,%xmm1
.byte 15,87,210 // xorps %xmm2,%xmm2
.byte 15,87,219 // xorps %xmm3,%xmm3
@@ -24067,91 +24611,322 @@ HIDDEN _sk_from_2dot2_sse2
.globl _sk_from_2dot2_sse2
FUNCTION(_sk_from_2dot2_sse2)
_sk_from_2dot2_sse2:
- .byte 68,15,40,192 // movaps %xmm0,%xmm8
- .byte 65,15,82,192 // rsqrtps %xmm8,%xmm0
- .byte 15,82,192 // rsqrtps %xmm0,%xmm0
- .byte 15,82,192 // rsqrtps %xmm0,%xmm0
- .byte 68,15,82,200 // rsqrtps %xmm0,%xmm9
- .byte 65,15,82,193 // rsqrtps %xmm9,%xmm0
- .byte 68,15,82,208 // rsqrtps %xmm0,%xmm10
- .byte 69,15,89,192 // mulps %xmm8,%xmm8
- .byte 65,15,40,193 // movaps %xmm9,%xmm0
- .byte 15,89,192 // mulps %xmm0,%xmm0
- .byte 65,15,89,193 // mulps %xmm9,%xmm0
- .byte 65,15,89,192 // mulps %xmm8,%xmm0
- .byte 65,15,89,194 // mulps %xmm10,%xmm0
- .byte 69,15,87,210 // xorps %xmm10,%xmm10
- .byte 65,15,95,194 // maxps %xmm10,%xmm0
- .byte 68,15,82,193 // rsqrtps %xmm1,%xmm8
- .byte 69,15,82,192 // rsqrtps %xmm8,%xmm8
- .byte 69,15,82,192 // rsqrtps %xmm8,%xmm8
- .byte 69,15,82,200 // rsqrtps %xmm8,%xmm9
- .byte 69,15,82,193 // rsqrtps %xmm9,%xmm8
- .byte 69,15,82,216 // rsqrtps %xmm8,%xmm11
- .byte 15,89,201 // mulps %xmm1,%xmm1
- .byte 69,15,40,193 // movaps %xmm9,%xmm8
- .byte 69,15,89,192 // mulps %xmm8,%xmm8
- .byte 69,15,89,193 // mulps %xmm9,%xmm8
- .byte 68,15,89,193 // mulps %xmm1,%xmm8
- .byte 69,15,89,195 // mulps %xmm11,%xmm8
- .byte 69,15,95,194 // maxps %xmm10,%xmm8
- .byte 15,82,202 // rsqrtps %xmm2,%xmm1
- .byte 15,82,201 // rsqrtps %xmm1,%xmm1
- .byte 15,82,201 // rsqrtps %xmm1,%xmm1
- .byte 68,15,82,217 // rsqrtps %xmm1,%xmm11
- .byte 65,15,82,203 // rsqrtps %xmm11,%xmm1
- .byte 15,82,201 // rsqrtps %xmm1,%xmm1
- .byte 15,89,210 // mulps %xmm2,%xmm2
- .byte 69,15,40,203 // movaps %xmm11,%xmm9
- .byte 69,15,89,201 // mulps %xmm9,%xmm9
- .byte 69,15,89,203 // mulps %xmm11,%xmm9
- .byte 68,15,89,202 // mulps %xmm2,%xmm9
- .byte 68,15,89,201 // mulps %xmm1,%xmm9
- .byte 69,15,95,202 // maxps %xmm10,%xmm9
+ .byte 72,131,236,24 // sub $0x18,%rsp
+ .byte 15,41,60,36 // movaps %xmm7,(%rsp)
+ .byte 15,41,116,36,240 // movaps %xmm6,-0x10(%rsp)
+ .byte 15,41,108,36,224 // movaps %xmm5,-0x20(%rsp)
+ .byte 15,41,100,36,208 // movaps %xmm4,-0x30(%rsp)
+ .byte 15,41,92,36,192 // movaps %xmm3,-0x40(%rsp)
+ .byte 15,41,84,36,176 // movaps %xmm2,-0x50(%rsp)
+ .byte 15,40,208 // movaps %xmm0,%xmm2
+ .byte 184,205,204,12,64 // mov $0x400ccccd,%eax
+ .byte 15,91,194 // cvtdq2ps %xmm2,%xmm0
+ .byte 185,0,0,0,52 // mov $0x34000000,%ecx
+ .byte 102,15,110,217 // movd %ecx,%xmm3
+ .byte 15,198,219,0 // shufps $0x0,%xmm3,%xmm3
+ .byte 15,89,195 // mulps %xmm3,%xmm0
+ .byte 68,15,40,219 // movaps %xmm3,%xmm11
+ .byte 68,15,41,92,36,144 // movaps %xmm11,-0x70(%rsp)
+ .byte 185,255,255,127,0 // mov $0x7fffff,%ecx
+ .byte 102,15,110,217 // movd %ecx,%xmm3
+ .byte 102,68,15,112,195,0 // pshufd $0x0,%xmm3,%xmm8
+ .byte 65,15,84,208 // andps %xmm8,%xmm2
+ .byte 185,0,0,0,63 // mov $0x3f000000,%ecx
+ .byte 102,15,110,217 // movd %ecx,%xmm3
+ .byte 102,15,112,219,0 // pshufd $0x0,%xmm3,%xmm3
+ .byte 102,15,127,92,36,160 // movdqa %xmm3,-0x60(%rsp)
+ .byte 15,86,211 // orps %xmm3,%xmm2
+ .byte 185,119,115,248,66 // mov $0x42f87377,%ecx
+ .byte 102,15,110,233 // movd %ecx,%xmm5
+ .byte 15,198,237,0 // shufps $0x0,%xmm5,%xmm5
+ .byte 15,92,197 // subps %xmm5,%xmm0
+ .byte 15,41,108,36,128 // movaps %xmm5,-0x80(%rsp)
+ .byte 185,117,191,191,63 // mov $0x3fbfbf75,%ecx
+ .byte 102,68,15,110,225 // movd %ecx,%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 15,40,218 // movaps %xmm2,%xmm3
+ .byte 65,15,89,220 // mulps %xmm12,%xmm3
+ .byte 15,92,195 // subps %xmm3,%xmm0
+ .byte 185,163,233,220,63 // mov $0x3fdce9a3,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 185,249,68,180,62 // mov $0x3eb444f9,%ecx
+ .byte 102,68,15,110,241 // movd %ecx,%xmm14
+ .byte 69,15,198,246,0 // shufps $0x0,%xmm14,%xmm14
+ .byte 65,15,88,214 // addps %xmm14,%xmm2
+ .byte 65,15,40,221 // movaps %xmm13,%xmm3
+ .byte 15,94,218 // divps %xmm2,%xmm3
+ .byte 15,92,195 // subps %xmm3,%xmm0
+ .byte 102,68,15,110,248 // movd %eax,%xmm15
+ .byte 69,15,198,255,0 // shufps $0x0,%xmm15,%xmm15
+ .byte 65,15,89,199 // mulps %xmm15,%xmm0
+ .byte 243,15,91,208 // cvttps2dq %xmm0,%xmm2
+ .byte 15,91,210 // cvtdq2ps %xmm2,%xmm2
+ .byte 15,40,216 // movaps %xmm0,%xmm3
+ .byte 15,194,218,1 // cmpltps %xmm2,%xmm3
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 102,68,15,110,208 // movd %eax,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 65,15,84,218 // andps %xmm10,%xmm3
+ .byte 15,92,211 // subps %xmm3,%xmm2
+ .byte 15,40,224 // movaps %xmm0,%xmm4
+ .byte 15,92,226 // subps %xmm2,%xmm4
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 185,81,140,242,66 // mov $0x42f28c51,%ecx
+ .byte 102,68,15,110,201 // movd %ecx,%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 65,15,88,193 // addps %xmm9,%xmm0
+ .byte 185,141,188,190,63 // mov $0x3fbebc8d,%ecx
+ .byte 102,15,110,249 // movd %ecx,%xmm7
+ .byte 15,198,255,0 // shufps $0x0,%xmm7,%xmm7
+ .byte 15,40,215 // movaps %xmm7,%xmm2
+ .byte 15,89,212 // mulps %xmm4,%xmm2
+ .byte 15,92,194 // subps %xmm2,%xmm0
+ .byte 185,254,210,221,65 // mov $0x41ddd2fe,%ecx
+ .byte 184,248,245,154,64 // mov $0x409af5f8,%eax
+ .byte 102,15,110,240 // movd %eax,%xmm6
+ .byte 15,198,246,0 // shufps $0x0,%xmm6,%xmm6
+ .byte 15,40,222 // movaps %xmm6,%xmm3
+ .byte 15,92,220 // subps %xmm4,%xmm3
+ .byte 102,15,110,209 // movd %ecx,%xmm2
+ .byte 15,198,210,0 // shufps $0x0,%xmm2,%xmm2
+ .byte 15,40,226 // movaps %xmm2,%xmm4
+ .byte 15,94,227 // divps %xmm3,%xmm4
+ .byte 15,88,224 // addps %xmm0,%xmm4
+ .byte 15,91,193 // cvtdq2ps %xmm1,%xmm0
+ .byte 65,15,89,195 // mulps %xmm11,%xmm0
+ .byte 65,15,84,200 // andps %xmm8,%xmm1
+ .byte 68,15,40,92,36,160 // movaps -0x60(%rsp),%xmm11
+ .byte 65,15,86,203 // orps %xmm11,%xmm1
+ .byte 15,92,197 // subps %xmm5,%xmm0
+ .byte 15,40,217 // movaps %xmm1,%xmm3
+ .byte 65,15,89,220 // mulps %xmm12,%xmm3
+ .byte 15,92,195 // subps %xmm3,%xmm0
+ .byte 65,15,88,206 // addps %xmm14,%xmm1
+ .byte 65,15,40,221 // movaps %xmm13,%xmm3
+ .byte 15,94,217 // divps %xmm1,%xmm3
+ .byte 15,92,195 // subps %xmm3,%xmm0
+ .byte 65,15,89,199 // mulps %xmm15,%xmm0
+ .byte 243,15,91,200 // cvttps2dq %xmm0,%xmm1
+ .byte 15,91,201 // cvtdq2ps %xmm1,%xmm1
+ .byte 15,40,216 // movaps %xmm0,%xmm3
+ .byte 15,194,217,1 // cmpltps %xmm1,%xmm3
+ .byte 65,15,84,218 // andps %xmm10,%xmm3
+ .byte 15,92,203 // subps %xmm3,%xmm1
+ .byte 15,40,216 // movaps %xmm0,%xmm3
+ .byte 15,92,217 // subps %xmm1,%xmm3
+ .byte 65,15,88,193 // addps %xmm9,%xmm0
+ .byte 15,40,207 // movaps %xmm7,%xmm1
+ .byte 15,89,203 // mulps %xmm3,%xmm1
+ .byte 15,92,193 // subps %xmm1,%xmm0
+ .byte 15,40,238 // movaps %xmm6,%xmm5
+ .byte 15,92,235 // subps %xmm3,%xmm5
+ .byte 15,40,202 // movaps %xmm2,%xmm1
+ .byte 15,94,205 // divps %xmm5,%xmm1
+ .byte 15,88,200 // addps %xmm0,%xmm1
+ .byte 15,40,92,36,176 // movaps -0x50(%rsp),%xmm3
+ .byte 15,91,195 // cvtdq2ps %xmm3,%xmm0
+ .byte 15,89,68,36,144 // mulps -0x70(%rsp),%xmm0
+ .byte 68,15,84,195 // andps %xmm3,%xmm8
+ .byte 69,15,86,195 // orps %xmm11,%xmm8
+ .byte 15,92,68,36,128 // subps -0x80(%rsp),%xmm0
+ .byte 69,15,89,224 // mulps %xmm8,%xmm12
+ .byte 65,15,92,196 // subps %xmm12,%xmm0
+ .byte 69,15,88,198 // addps %xmm14,%xmm8
+ .byte 69,15,94,232 // divps %xmm8,%xmm13
+ .byte 65,15,92,197 // subps %xmm13,%xmm0
+ .byte 65,15,89,199 // mulps %xmm15,%xmm0
+ .byte 243,15,91,216 // cvttps2dq %xmm0,%xmm3
+ .byte 15,91,219 // cvtdq2ps %xmm3,%xmm3
+ .byte 15,40,232 // movaps %xmm0,%xmm5
+ .byte 15,194,235,1 // cmpltps %xmm3,%xmm5
+ .byte 65,15,84,234 // andps %xmm10,%xmm5
+ .byte 15,92,221 // subps %xmm5,%xmm3
+ .byte 15,40,232 // movaps %xmm0,%xmm5
+ .byte 15,92,235 // subps %xmm3,%xmm5
+ .byte 65,15,88,193 // addps %xmm9,%xmm0
+ .byte 15,89,253 // mulps %xmm5,%xmm7
+ .byte 15,92,199 // subps %xmm7,%xmm0
+ .byte 15,92,245 // subps %xmm5,%xmm6
+ .byte 15,94,214 // divps %xmm6,%xmm2
+ .byte 15,88,208 // addps %xmm0,%xmm2
+ .byte 102,65,15,110,192 // movd %r8d,%xmm0
+ .byte 15,198,192,0 // shufps $0x0,%xmm0,%xmm0
+ .byte 15,89,224 // mulps %xmm0,%xmm4
+ .byte 15,89,200 // mulps %xmm0,%xmm1
+ .byte 15,89,208 // mulps %xmm0,%xmm2
+ .byte 102,15,91,220 // cvtps2dq %xmm4,%xmm3
+ .byte 102,15,91,201 // cvtps2dq %xmm1,%xmm1
+ .byte 102,15,91,210 // cvtps2dq %xmm2,%xmm2
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 65,15,40,200 // movaps %xmm8,%xmm1
- .byte 65,15,40,209 // movaps %xmm9,%xmm2
+ .byte 102,15,40,195 // movapd %xmm3,%xmm0
+ .byte 15,40,92,36,192 // movaps -0x40(%rsp),%xmm3
+ .byte 15,40,100,36,208 // movaps -0x30(%rsp),%xmm4
+ .byte 15,40,108,36,224 // movaps -0x20(%rsp),%xmm5
+ .byte 15,40,116,36,240 // movaps -0x10(%rsp),%xmm6
+ .byte 15,40,60,36 // movaps (%rsp),%xmm7
+ .byte 72,131,196,24 // add $0x18,%rsp
.byte 255,224 // jmpq *%rax
HIDDEN _sk_to_2dot2_sse2
.globl _sk_to_2dot2_sse2
FUNCTION(_sk_to_2dot2_sse2)
_sk_to_2dot2_sse2:
- .byte 68,15,82,192 // rsqrtps %xmm0,%xmm8
- .byte 65,15,82,192 // rsqrtps %xmm8,%xmm0
- .byte 15,82,192 // rsqrtps %xmm0,%xmm0
- .byte 15,82,192 // rsqrtps %xmm0,%xmm0
- .byte 15,82,192 // rsqrtps %xmm0,%xmm0
- .byte 68,15,82,200 // rsqrtps %xmm0,%xmm9
- .byte 69,15,83,192 // rcpps %xmm8,%xmm8
- .byte 68,15,89,192 // mulps %xmm0,%xmm8
- .byte 65,15,83,193 // rcpps %xmm9,%xmm0
- .byte 65,15,89,192 // mulps %xmm8,%xmm0
- .byte 69,15,87,192 // xorps %xmm8,%xmm8
- .byte 65,15,95,192 // maxps %xmm8,%xmm0
- .byte 68,15,82,201 // rsqrtps %xmm1,%xmm9
- .byte 65,15,82,201 // rsqrtps %xmm9,%xmm1
- .byte 15,82,201 // rsqrtps %xmm1,%xmm1
- .byte 15,82,201 // rsqrtps %xmm1,%xmm1
- .byte 15,82,201 // rsqrtps %xmm1,%xmm1
- .byte 68,15,82,209 // rsqrtps %xmm1,%xmm10
- .byte 69,15,83,201 // rcpps %xmm9,%xmm9
- .byte 68,15,89,201 // mulps %xmm1,%xmm9
- .byte 65,15,83,202 // rcpps %xmm10,%xmm1
- .byte 65,15,89,201 // mulps %xmm9,%xmm1
- .byte 65,15,95,200 // maxps %xmm8,%xmm1
- .byte 68,15,82,202 // rsqrtps %xmm2,%xmm9
- .byte 65,15,82,209 // rsqrtps %xmm9,%xmm2
- .byte 15,82,210 // rsqrtps %xmm2,%xmm2
- .byte 15,82,210 // rsqrtps %xmm2,%xmm2
- .byte 15,82,210 // rsqrtps %xmm2,%xmm2
- .byte 68,15,82,210 // rsqrtps %xmm2,%xmm10
- .byte 69,15,83,201 // rcpps %xmm9,%xmm9
- .byte 68,15,89,202 // mulps %xmm2,%xmm9
- .byte 65,15,83,210 // rcpps %xmm10,%xmm2
- .byte 65,15,89,209 // mulps %xmm9,%xmm2
- .byte 65,15,95,208 // maxps %xmm8,%xmm2
+ .byte 72,131,236,24 // sub $0x18,%rsp
+ .byte 15,41,60,36 // movaps %xmm7,(%rsp)
+ .byte 15,41,116,36,240 // movaps %xmm6,-0x10(%rsp)
+ .byte 15,41,108,36,224 // movaps %xmm5,-0x20(%rsp)
+ .byte 15,41,100,36,208 // movaps %xmm4,-0x30(%rsp)
+ .byte 15,41,92,36,192 // movaps %xmm3,-0x40(%rsp)
+ .byte 15,41,84,36,176 // movaps %xmm2,-0x50(%rsp)
+ .byte 15,40,208 // movaps %xmm0,%xmm2
+ .byte 184,46,186,232,62 // mov $0x3ee8ba2e,%eax
+ .byte 15,91,194 // cvtdq2ps %xmm2,%xmm0
+ .byte 185,0,0,0,52 // mov $0x34000000,%ecx
+ .byte 102,15,110,217 // movd %ecx,%xmm3
+ .byte 15,198,219,0 // shufps $0x0,%xmm3,%xmm3
+ .byte 15,89,195 // mulps %xmm3,%xmm0
+ .byte 68,15,40,219 // movaps %xmm3,%xmm11
+ .byte 68,15,41,92,36,144 // movaps %xmm11,-0x70(%rsp)
+ .byte 185,255,255,127,0 // mov $0x7fffff,%ecx
+ .byte 102,15,110,217 // movd %ecx,%xmm3
+ .byte 102,68,15,112,195,0 // pshufd $0x0,%xmm3,%xmm8
+ .byte 65,15,84,208 // andps %xmm8,%xmm2
+ .byte 185,0,0,0,63 // mov $0x3f000000,%ecx
+ .byte 102,15,110,217 // movd %ecx,%xmm3
+ .byte 102,15,112,219,0 // pshufd $0x0,%xmm3,%xmm3
+ .byte 102,15,127,92,36,160 // movdqa %xmm3,-0x60(%rsp)
+ .byte 15,86,211 // orps %xmm3,%xmm2
+ .byte 185,119,115,248,66 // mov $0x42f87377,%ecx
+ .byte 102,15,110,233 // movd %ecx,%xmm5
+ .byte 15,198,237,0 // shufps $0x0,%xmm5,%xmm5
+ .byte 15,92,197 // subps %xmm5,%xmm0
+ .byte 15,41,108,36,128 // movaps %xmm5,-0x80(%rsp)
+ .byte 185,117,191,191,63 // mov $0x3fbfbf75,%ecx
+ .byte 102,68,15,110,225 // movd %ecx,%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 15,40,218 // movaps %xmm2,%xmm3
+ .byte 65,15,89,220 // mulps %xmm12,%xmm3
+ .byte 15,92,195 // subps %xmm3,%xmm0
+ .byte 185,163,233,220,63 // mov $0x3fdce9a3,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 185,249,68,180,62 // mov $0x3eb444f9,%ecx
+ .byte 102,68,15,110,241 // movd %ecx,%xmm14
+ .byte 69,15,198,246,0 // shufps $0x0,%xmm14,%xmm14
+ .byte 65,15,88,214 // addps %xmm14,%xmm2
+ .byte 65,15,40,221 // movaps %xmm13,%xmm3
+ .byte 15,94,218 // divps %xmm2,%xmm3
+ .byte 15,92,195 // subps %xmm3,%xmm0
+ .byte 102,68,15,110,248 // movd %eax,%xmm15
+ .byte 69,15,198,255,0 // shufps $0x0,%xmm15,%xmm15
+ .byte 65,15,89,199 // mulps %xmm15,%xmm0
+ .byte 243,15,91,208 // cvttps2dq %xmm0,%xmm2
+ .byte 15,91,210 // cvtdq2ps %xmm2,%xmm2
+ .byte 15,40,216 // movaps %xmm0,%xmm3
+ .byte 15,194,218,1 // cmpltps %xmm2,%xmm3
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 102,68,15,110,208 // movd %eax,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 65,15,84,218 // andps %xmm10,%xmm3
+ .byte 15,92,211 // subps %xmm3,%xmm2
+ .byte 15,40,224 // movaps %xmm0,%xmm4
+ .byte 15,92,226 // subps %xmm2,%xmm4
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 185,81,140,242,66 // mov $0x42f28c51,%ecx
+ .byte 102,68,15,110,201 // movd %ecx,%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 65,15,88,193 // addps %xmm9,%xmm0
+ .byte 185,141,188,190,63 // mov $0x3fbebc8d,%ecx
+ .byte 102,15,110,249 // movd %ecx,%xmm7
+ .byte 15,198,255,0 // shufps $0x0,%xmm7,%xmm7
+ .byte 15,40,215 // movaps %xmm7,%xmm2
+ .byte 15,89,212 // mulps %xmm4,%xmm2
+ .byte 15,92,194 // subps %xmm2,%xmm0
+ .byte 185,254,210,221,65 // mov $0x41ddd2fe,%ecx
+ .byte 184,248,245,154,64 // mov $0x409af5f8,%eax
+ .byte 102,15,110,240 // movd %eax,%xmm6
+ .byte 15,198,246,0 // shufps $0x0,%xmm6,%xmm6
+ .byte 15,40,222 // movaps %xmm6,%xmm3
+ .byte 15,92,220 // subps %xmm4,%xmm3
+ .byte 102,15,110,209 // movd %ecx,%xmm2
+ .byte 15,198,210,0 // shufps $0x0,%xmm2,%xmm2
+ .byte 15,40,226 // movaps %xmm2,%xmm4
+ .byte 15,94,227 // divps %xmm3,%xmm4
+ .byte 15,88,224 // addps %xmm0,%xmm4
+ .byte 15,91,193 // cvtdq2ps %xmm1,%xmm0
+ .byte 65,15,89,195 // mulps %xmm11,%xmm0
+ .byte 65,15,84,200 // andps %xmm8,%xmm1
+ .byte 68,15,40,92,36,160 // movaps -0x60(%rsp),%xmm11
+ .byte 65,15,86,203 // orps %xmm11,%xmm1
+ .byte 15,92,197 // subps %xmm5,%xmm0
+ .byte 15,40,217 // movaps %xmm1,%xmm3
+ .byte 65,15,89,220 // mulps %xmm12,%xmm3
+ .byte 15,92,195 // subps %xmm3,%xmm0
+ .byte 65,15,88,206 // addps %xmm14,%xmm1
+ .byte 65,15,40,221 // movaps %xmm13,%xmm3
+ .byte 15,94,217 // divps %xmm1,%xmm3
+ .byte 15,92,195 // subps %xmm3,%xmm0
+ .byte 65,15,89,199 // mulps %xmm15,%xmm0
+ .byte 243,15,91,200 // cvttps2dq %xmm0,%xmm1
+ .byte 15,91,201 // cvtdq2ps %xmm1,%xmm1
+ .byte 15,40,216 // movaps %xmm0,%xmm3
+ .byte 15,194,217,1 // cmpltps %xmm1,%xmm3
+ .byte 65,15,84,218 // andps %xmm10,%xmm3
+ .byte 15,92,203 // subps %xmm3,%xmm1
+ .byte 15,40,216 // movaps %xmm0,%xmm3
+ .byte 15,92,217 // subps %xmm1,%xmm3
+ .byte 65,15,88,193 // addps %xmm9,%xmm0
+ .byte 15,40,207 // movaps %xmm7,%xmm1
+ .byte 15,89,203 // mulps %xmm3,%xmm1
+ .byte 15,92,193 // subps %xmm1,%xmm0
+ .byte 15,40,238 // movaps %xmm6,%xmm5
+ .byte 15,92,235 // subps %xmm3,%xmm5
+ .byte 15,40,202 // movaps %xmm2,%xmm1
+ .byte 15,94,205 // divps %xmm5,%xmm1
+ .byte 15,88,200 // addps %xmm0,%xmm1
+ .byte 15,40,92,36,176 // movaps -0x50(%rsp),%xmm3
+ .byte 15,91,195 // cvtdq2ps %xmm3,%xmm0
+ .byte 15,89,68,36,144 // mulps -0x70(%rsp),%xmm0
+ .byte 68,15,84,195 // andps %xmm3,%xmm8
+ .byte 69,15,86,195 // orps %xmm11,%xmm8
+ .byte 15,92,68,36,128 // subps -0x80(%rsp),%xmm0
+ .byte 69,15,89,224 // mulps %xmm8,%xmm12
+ .byte 65,15,92,196 // subps %xmm12,%xmm0
+ .byte 69,15,88,198 // addps %xmm14,%xmm8
+ .byte 69,15,94,232 // divps %xmm8,%xmm13
+ .byte 65,15,92,197 // subps %xmm13,%xmm0
+ .byte 65,15,89,199 // mulps %xmm15,%xmm0
+ .byte 243,15,91,216 // cvttps2dq %xmm0,%xmm3
+ .byte 15,91,219 // cvtdq2ps %xmm3,%xmm3
+ .byte 15,40,232 // movaps %xmm0,%xmm5
+ .byte 15,194,235,1 // cmpltps %xmm3,%xmm5
+ .byte 65,15,84,234 // andps %xmm10,%xmm5
+ .byte 15,92,221 // subps %xmm5,%xmm3
+ .byte 15,40,232 // movaps %xmm0,%xmm5
+ .byte 15,92,235 // subps %xmm3,%xmm5
+ .byte 65,15,88,193 // addps %xmm9,%xmm0
+ .byte 15,89,253 // mulps %xmm5,%xmm7
+ .byte 15,92,199 // subps %xmm7,%xmm0
+ .byte 15,92,245 // subps %xmm5,%xmm6
+ .byte 15,94,214 // divps %xmm6,%xmm2
+ .byte 15,88,208 // addps %xmm0,%xmm2
+ .byte 102,65,15,110,192 // movd %r8d,%xmm0
+ .byte 15,198,192,0 // shufps $0x0,%xmm0,%xmm0
+ .byte 15,89,224 // mulps %xmm0,%xmm4
+ .byte 15,89,200 // mulps %xmm0,%xmm1
+ .byte 15,89,208 // mulps %xmm0,%xmm2
+ .byte 102,15,91,220 // cvtps2dq %xmm4,%xmm3
+ .byte 102,15,91,201 // cvtps2dq %xmm1,%xmm1
+ .byte 102,15,91,210 // cvtps2dq %xmm2,%xmm2
.byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 102,15,40,195 // movapd %xmm3,%xmm0
+ .byte 15,40,92,36,192 // movaps -0x40(%rsp),%xmm3
+ .byte 15,40,100,36,208 // movaps -0x30(%rsp),%xmm4
+ .byte 15,40,108,36,224 // movaps -0x20(%rsp),%xmm5
+ .byte 15,40,116,36,240 // movaps -0x10(%rsp),%xmm6
+ .byte 15,40,60,36 // movaps (%rsp),%xmm7
+ .byte 72,131,196,24 // add $0x18,%rsp
.byte 255,224 // jmpq *%rax
HIDDEN _sk_rgb_to_hsl_sse2
@@ -25765,9 +26540,9 @@ _sk_gather_i8_sse2:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 73,137,192 // mov %rax,%r8
.byte 77,133,192 // test %r8,%r8
- .byte 116,5 // je 2a02 <_sk_gather_i8_sse2+0xf>
+ .byte 116,5 // je 2da6 <_sk_gather_i8_sse2+0xf>
.byte 76,137,192 // mov %r8,%rax
- .byte 235,2 // jmp 2a04 <_sk_gather_i8_sse2+0x11>
+ .byte 235,2 // jmp 2da8 <_sk_gather_i8_sse2+0x11>
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 243,15,91,201 // cvttps2dq %xmm1,%xmm1
@@ -27107,7 +27882,7 @@ _sk_linear_gradient_sse2:
.byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
.byte 72,139,8 // mov (%rax),%rcx
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,132,15,1,0,0 // je 3fbb <_sk_linear_gradient_sse2+0x149>
+ .byte 15,132,15,1,0,0 // je 435f <_sk_linear_gradient_sse2+0x149>
.byte 72,139,64,8 // mov 0x8(%rax),%rax
.byte 72,131,192,32 // add $0x20,%rax
.byte 69,15,87,192 // xorps %xmm8,%xmm8
@@ -27168,8 +27943,8 @@ _sk_linear_gradient_sse2:
.byte 69,15,86,231 // orps %xmm15,%xmm12
.byte 72,131,192,36 // add $0x24,%rax
.byte 72,255,201 // dec %rcx
- .byte 15,133,8,255,255,255 // jne 3ec1 <_sk_linear_gradient_sse2+0x4f>
- .byte 235,13 // jmp 3fc8 <_sk_linear_gradient_sse2+0x156>
+ .byte 15,133,8,255,255,255 // jne 4265 <_sk_linear_gradient_sse2+0x4f>
+ .byte 235,13 // jmp 436c <_sk_linear_gradient_sse2+0x156>
.byte 15,87,201 // xorps %xmm1,%xmm1
.byte 15,87,210 // xorps %xmm2,%xmm2
.byte 15,87,219 // xorps %xmm3,%xmm3
diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S
index 1a6fe0c8fd..0188dd4ed2 100644
--- a/src/jumper/SkJumper_generated_win.S
+++ b/src/jumper/SkJumper_generated_win.S
@@ -938,83 +938,230 @@ _sk_to_srgb_hsw LABEL PROC
PUBLIC _sk_from_2dot2_hsw
_sk_from_2dot2_hsw LABEL PROC
- DB 197,124,82,192 ; vrsqrtps %ymm0,%ymm8
- DB 196,65,124,82,192 ; vrsqrtps %ymm8,%ymm8
- DB 196,65,124,82,192 ; vrsqrtps %ymm8,%ymm8
- DB 196,65,124,82,192 ; vrsqrtps %ymm8,%ymm8
- DB 196,65,124,82,200 ; vrsqrtps %ymm8,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 197,252,89,192 ; vmulps %ymm0,%ymm0,%ymm0
- DB 196,65,60,89,208 ; vmulps %ymm8,%ymm8,%ymm10
- DB 196,65,60,89,194 ; vmulps %ymm10,%ymm8,%ymm8
- DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
- DB 197,180,89,192 ; vmulps %ymm0,%ymm9,%ymm0
- DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
- DB 196,193,124,95,192 ; vmaxps %ymm8,%ymm0,%ymm0
- DB 197,124,82,201 ; vrsqrtps %ymm1,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 196,65,124,82,209 ; vrsqrtps %ymm9,%ymm10
- DB 196,65,124,82,210 ; vrsqrtps %ymm10,%ymm10
- DB 197,244,89,201 ; vmulps %ymm1,%ymm1,%ymm1
- DB 196,65,52,89,217 ; vmulps %ymm9,%ymm9,%ymm11
- DB 196,65,52,89,203 ; vmulps %ymm11,%ymm9,%ymm9
- DB 196,193,116,89,201 ; vmulps %ymm9,%ymm1,%ymm1
- DB 197,172,89,201 ; vmulps %ymm1,%ymm10,%ymm1
- DB 196,193,116,95,200 ; vmaxps %ymm8,%ymm1,%ymm1
- DB 197,124,82,202 ; vrsqrtps %ymm2,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 196,65,124,82,209 ; vrsqrtps %ymm9,%ymm10
- DB 196,65,124,82,210 ; vrsqrtps %ymm10,%ymm10
- DB 197,236,89,210 ; vmulps %ymm2,%ymm2,%ymm2
- DB 196,65,52,89,217 ; vmulps %ymm9,%ymm9,%ymm11
- DB 196,65,52,89,203 ; vmulps %ymm11,%ymm9,%ymm9
- DB 196,193,108,89,209 ; vmulps %ymm9,%ymm2,%ymm2
- DB 197,172,89,210 ; vmulps %ymm2,%ymm10,%ymm2
- DB 196,193,108,95,208 ; vmaxps %ymm8,%ymm2,%ymm2
- DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 72,129,236,216,0,0,0 ; sub $0xd8,%rsp
+ DB 197,252,17,188,36,160,0,0,0 ; vmovups %ymm7,0xa0(%rsp)
+ DB 197,252,17,180,36,128,0,0,0 ; vmovups %ymm6,0x80(%rsp)
+ DB 197,252,17,108,36,96 ; vmovups %ymm5,0x60(%rsp)
+ DB 197,252,17,100,36,64 ; vmovups %ymm4,0x40(%rsp)
+ DB 197,252,17,92,36,32 ; vmovups %ymm3,0x20(%rsp)
+ DB 197,124,40,225 ; vmovaps %ymm1,%ymm12
+ DB 65,184,205,204,12,64 ; mov $0x400ccccd,%r8d
+ DB 197,124,91,208 ; vcvtdq2ps %ymm0,%ymm10
+ DB 184,0,0,0,52 ; mov $0x34000000,%eax
+ DB 197,121,110,192 ; vmovd %eax,%xmm8
+ DB 196,66,125,88,216 ; vpbroadcastd %xmm8,%ymm11
+ DB 184,255,255,127,0 ; mov $0x7fffff,%eax
+ DB 197,121,110,192 ; vmovd %eax,%xmm8
+ DB 196,194,125,88,216 ; vpbroadcastd %xmm8,%ymm3
+ DB 197,254,127,28,36 ; vmovdqu %ymm3,(%rsp)
+ DB 197,101,219,200 ; vpand %ymm0,%ymm3,%ymm9
+ DB 184,0,0,0,63 ; mov $0x3f000000,%eax
+ DB 197,249,110,192 ; vmovd %eax,%xmm0
+ DB 196,98,125,88,248 ; vpbroadcastd %xmm0,%ymm15
+ DB 196,193,53,235,223 ; vpor %ymm15,%ymm9,%ymm3
+ DB 184,119,115,248,66 ; mov $0x42f87377,%eax
+ DB 197,249,110,192 ; vmovd %eax,%xmm0
+ DB 196,98,125,88,232 ; vpbroadcastd %xmm0,%ymm13
+ DB 196,66,37,170,213 ; vfmsub213ps %ymm13,%ymm11,%ymm10
+ DB 184,117,191,191,63 ; mov $0x3fbfbf75,%eax
+ DB 197,249,110,192 ; vmovd %eax,%xmm0
+ DB 196,98,125,88,200 ; vpbroadcastd %xmm0,%ymm9
+ DB 196,66,101,188,209 ; vfnmadd231ps %ymm9,%ymm3,%ymm10
+ DB 184,163,233,220,63 ; mov $0x3fdce9a3,%eax
+ DB 196,65,124,91,244 ; vcvtdq2ps %ymm12,%ymm14
+ DB 196,66,37,170,245 ; vfmsub213ps %ymm13,%ymm11,%ymm14
+ DB 197,252,91,202 ; vcvtdq2ps %ymm2,%ymm1
+ DB 197,124,40,194 ; vmovaps %ymm2,%ymm8
+ DB 196,194,37,170,205 ; vfmsub213ps %ymm13,%ymm11,%ymm1
+ DB 197,249,110,192 ; vmovd %eax,%xmm0
+ DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0
+ DB 184,249,68,180,62 ; mov $0x3eb444f9,%eax
+ DB 197,249,110,248 ; vmovd %eax,%xmm7
+ DB 196,226,125,88,255 ; vpbroadcastd %xmm7,%ymm7
+ DB 197,100,88,223 ; vaddps %ymm7,%ymm3,%ymm11
+ DB 196,65,124,94,219 ; vdivps %ymm11,%ymm0,%ymm11
+ DB 196,65,44,92,211 ; vsubps %ymm11,%ymm10,%ymm10
+ DB 196,193,121,110,240 ; vmovd %r8d,%xmm6
+ DB 196,226,125,88,246 ; vpbroadcastd %xmm6,%ymm6
+ DB 196,65,76,89,210 ; vmulps %ymm10,%ymm6,%ymm10
+ DB 196,67,125,8,218,1 ; vroundps $0x1,%ymm10,%ymm11
+ DB 196,65,44,92,219 ; vsubps %ymm11,%ymm10,%ymm11
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 184,81,140,242,66 ; mov $0x42f28c51,%eax
+ DB 197,249,110,232 ; vmovd %eax,%xmm5
+ DB 196,226,125,88,237 ; vpbroadcastd %xmm5,%ymm5
+ DB 196,65,84,88,210 ; vaddps %ymm10,%ymm5,%ymm10
+ DB 184,141,188,190,63 ; mov $0x3fbebc8d,%eax
+ DB 197,249,110,224 ; vmovd %eax,%xmm4
+ DB 196,226,125,88,228 ; vpbroadcastd %xmm4,%ymm4
+ DB 196,66,93,188,211 ; vfnmadd231ps %ymm11,%ymm4,%ymm10
+ DB 184,254,210,221,65 ; mov $0x41ddd2fe,%eax
+ DB 197,249,110,216 ; vmovd %eax,%xmm3
+ DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3
+ DB 184,248,245,154,64 ; mov $0x409af5f8,%eax
+ DB 197,249,110,208 ; vmovd %eax,%xmm2
+ DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2
+ DB 196,65,108,92,219 ; vsubps %ymm11,%ymm2,%ymm11
+ DB 196,65,100,94,219 ; vdivps %ymm11,%ymm3,%ymm11
+ DB 196,65,44,88,211 ; vaddps %ymm11,%ymm10,%ymm10
+ DB 197,124,16,44,36 ; vmovups (%rsp),%ymm13
+ DB 196,65,20,84,220 ; vandps %ymm12,%ymm13,%ymm11
+ DB 196,65,36,86,223 ; vorps %ymm15,%ymm11,%ymm11
+ DB 196,66,37,188,241 ; vfnmadd231ps %ymm9,%ymm11,%ymm14
+ DB 197,36,88,223 ; vaddps %ymm7,%ymm11,%ymm11
+ DB 196,65,124,94,219 ; vdivps %ymm11,%ymm0,%ymm11
+ DB 196,65,12,92,219 ; vsubps %ymm11,%ymm14,%ymm11
+ DB 196,65,76,89,219 ; vmulps %ymm11,%ymm6,%ymm11
+ DB 196,67,125,8,227,1 ; vroundps $0x1,%ymm11,%ymm12
+ DB 196,65,36,92,228 ; vsubps %ymm12,%ymm11,%ymm12
+ DB 196,65,84,88,219 ; vaddps %ymm11,%ymm5,%ymm11
+ DB 196,66,93,188,220 ; vfnmadd231ps %ymm12,%ymm4,%ymm11
+ DB 196,65,108,92,228 ; vsubps %ymm12,%ymm2,%ymm12
+ DB 196,65,100,94,228 ; vdivps %ymm12,%ymm3,%ymm12
+ DB 196,65,36,88,220 ; vaddps %ymm12,%ymm11,%ymm11
+ DB 196,65,20,84,192 ; vandps %ymm8,%ymm13,%ymm8
+ DB 196,65,60,86,199 ; vorps %ymm15,%ymm8,%ymm8
+ DB 196,194,61,188,201 ; vfnmadd231ps %ymm9,%ymm8,%ymm1
+ DB 197,188,88,255 ; vaddps %ymm7,%ymm8,%ymm7
+ DB 197,252,94,199 ; vdivps %ymm7,%ymm0,%ymm0
+ DB 197,244,92,192 ; vsubps %ymm0,%ymm1,%ymm0
+ DB 197,204,89,192 ; vmulps %ymm0,%ymm6,%ymm0
+ DB 196,227,125,8,200,1 ; vroundps $0x1,%ymm0,%ymm1
+ DB 197,252,92,201 ; vsubps %ymm1,%ymm0,%ymm1
+ DB 197,212,88,192 ; vaddps %ymm0,%ymm5,%ymm0
+ DB 196,226,117,172,224 ; vfnmadd213ps %ymm0,%ymm1,%ymm4
+ DB 197,236,92,193 ; vsubps %ymm1,%ymm2,%ymm0
+ DB 197,228,94,192 ; vdivps %ymm0,%ymm3,%ymm0
+ DB 197,220,88,192 ; vaddps %ymm0,%ymm4,%ymm0
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1
+ DB 196,193,116,89,210 ; vmulps %ymm10,%ymm1,%ymm2
+ DB 196,193,116,89,219 ; vmulps %ymm11,%ymm1,%ymm3
+ DB 197,244,89,224 ; vmulps %ymm0,%ymm1,%ymm4
+ DB 197,253,91,194 ; vcvtps2dq %ymm2,%ymm0
+ DB 197,253,91,203 ; vcvtps2dq %ymm3,%ymm1
+ DB 197,253,91,212 ; vcvtps2dq %ymm4,%ymm2
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 197,252,16,92,36,32 ; vmovups 0x20(%rsp),%ymm3
+ DB 197,252,16,100,36,64 ; vmovups 0x40(%rsp),%ymm4
+ DB 197,252,16,108,36,96 ; vmovups 0x60(%rsp),%ymm5
+ DB 197,252,16,180,36,128,0,0,0 ; vmovups 0x80(%rsp),%ymm6
+ DB 197,252,16,188,36,160,0,0,0 ; vmovups 0xa0(%rsp),%ymm7
+ DB 72,129,196,216,0,0,0 ; add $0xd8,%rsp
DB 255,224 ; jmpq *%rax
PUBLIC _sk_to_2dot2_hsw
_sk_to_2dot2_hsw LABEL PROC
- DB 197,252,82,192 ; vrsqrtps %ymm0,%ymm0
- DB 197,124,82,192 ; vrsqrtps %ymm0,%ymm8
- DB 196,65,124,82,192 ; vrsqrtps %ymm8,%ymm8
- DB 196,65,124,82,192 ; vrsqrtps %ymm8,%ymm8
- DB 196,65,124,82,192 ; vrsqrtps %ymm8,%ymm8
- DB 196,65,124,82,200 ; vrsqrtps %ymm8,%ymm9
- DB 197,252,83,192 ; vrcpps %ymm0,%ymm0
- DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0
- DB 196,65,124,83,193 ; vrcpps %ymm9,%ymm8
- DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
- DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
- DB 196,193,124,95,192 ; vmaxps %ymm8,%ymm0,%ymm0
- DB 197,252,82,201 ; vrsqrtps %ymm1,%ymm1
- DB 197,124,82,201 ; vrsqrtps %ymm1,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 196,65,124,82,209 ; vrsqrtps %ymm9,%ymm10
- DB 197,252,83,201 ; vrcpps %ymm1,%ymm1
- DB 197,180,89,201 ; vmulps %ymm1,%ymm9,%ymm1
- DB 196,65,124,83,202 ; vrcpps %ymm10,%ymm9
- DB 196,193,116,89,201 ; vmulps %ymm9,%ymm1,%ymm1
- DB 196,193,116,95,200 ; vmaxps %ymm8,%ymm1,%ymm1
- DB 197,252,82,210 ; vrsqrtps %ymm2,%ymm2
- DB 197,124,82,202 ; vrsqrtps %ymm2,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 196,65,124,82,209 ; vrsqrtps %ymm9,%ymm10
- DB 197,252,83,210 ; vrcpps %ymm2,%ymm2
- DB 197,180,89,210 ; vmulps %ymm2,%ymm9,%ymm2
- DB 196,65,124,83,202 ; vrcpps %ymm10,%ymm9
- DB 196,193,108,89,209 ; vmulps %ymm9,%ymm2,%ymm2
- DB 196,193,108,95,208 ; vmaxps %ymm8,%ymm2,%ymm2
- DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 72,129,236,216,0,0,0 ; sub $0xd8,%rsp
+ DB 197,252,17,188,36,160,0,0,0 ; vmovups %ymm7,0xa0(%rsp)
+ DB 197,252,17,180,36,128,0,0,0 ; vmovups %ymm6,0x80(%rsp)
+ DB 197,252,17,108,36,96 ; vmovups %ymm5,0x60(%rsp)
+ DB 197,252,17,100,36,64 ; vmovups %ymm4,0x40(%rsp)
+ DB 197,252,17,92,36,32 ; vmovups %ymm3,0x20(%rsp)
+ DB 197,124,40,225 ; vmovaps %ymm1,%ymm12
+ DB 65,184,46,186,232,62 ; mov $0x3ee8ba2e,%r8d
+ DB 197,124,91,208 ; vcvtdq2ps %ymm0,%ymm10
+ DB 184,0,0,0,52 ; mov $0x34000000,%eax
+ DB 197,121,110,192 ; vmovd %eax,%xmm8
+ DB 196,66,125,88,216 ; vpbroadcastd %xmm8,%ymm11
+ DB 184,255,255,127,0 ; mov $0x7fffff,%eax
+ DB 197,121,110,192 ; vmovd %eax,%xmm8
+ DB 196,194,125,88,216 ; vpbroadcastd %xmm8,%ymm3
+ DB 197,254,127,28,36 ; vmovdqu %ymm3,(%rsp)
+ DB 197,101,219,200 ; vpand %ymm0,%ymm3,%ymm9
+ DB 184,0,0,0,63 ; mov $0x3f000000,%eax
+ DB 197,249,110,192 ; vmovd %eax,%xmm0
+ DB 196,98,125,88,248 ; vpbroadcastd %xmm0,%ymm15
+ DB 196,193,53,235,223 ; vpor %ymm15,%ymm9,%ymm3
+ DB 184,119,115,248,66 ; mov $0x42f87377,%eax
+ DB 197,249,110,192 ; vmovd %eax,%xmm0
+ DB 196,98,125,88,232 ; vpbroadcastd %xmm0,%ymm13
+ DB 196,66,37,170,213 ; vfmsub213ps %ymm13,%ymm11,%ymm10
+ DB 184,117,191,191,63 ; mov $0x3fbfbf75,%eax
+ DB 197,249,110,192 ; vmovd %eax,%xmm0
+ DB 196,98,125,88,200 ; vpbroadcastd %xmm0,%ymm9
+ DB 196,66,101,188,209 ; vfnmadd231ps %ymm9,%ymm3,%ymm10
+ DB 184,163,233,220,63 ; mov $0x3fdce9a3,%eax
+ DB 196,65,124,91,244 ; vcvtdq2ps %ymm12,%ymm14
+ DB 196,66,37,170,245 ; vfmsub213ps %ymm13,%ymm11,%ymm14
+ DB 197,252,91,202 ; vcvtdq2ps %ymm2,%ymm1
+ DB 197,124,40,194 ; vmovaps %ymm2,%ymm8
+ DB 196,194,37,170,205 ; vfmsub213ps %ymm13,%ymm11,%ymm1
+ DB 197,249,110,192 ; vmovd %eax,%xmm0
+ DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0
+ DB 184,249,68,180,62 ; mov $0x3eb444f9,%eax
+ DB 197,249,110,248 ; vmovd %eax,%xmm7
+ DB 196,226,125,88,255 ; vpbroadcastd %xmm7,%ymm7
+ DB 197,100,88,223 ; vaddps %ymm7,%ymm3,%ymm11
+ DB 196,65,124,94,219 ; vdivps %ymm11,%ymm0,%ymm11
+ DB 196,65,44,92,211 ; vsubps %ymm11,%ymm10,%ymm10
+ DB 196,193,121,110,240 ; vmovd %r8d,%xmm6
+ DB 196,226,125,88,246 ; vpbroadcastd %xmm6,%ymm6
+ DB 196,65,76,89,210 ; vmulps %ymm10,%ymm6,%ymm10
+ DB 196,67,125,8,218,1 ; vroundps $0x1,%ymm10,%ymm11
+ DB 196,65,44,92,219 ; vsubps %ymm11,%ymm10,%ymm11
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 184,81,140,242,66 ; mov $0x42f28c51,%eax
+ DB 197,249,110,232 ; vmovd %eax,%xmm5
+ DB 196,226,125,88,237 ; vpbroadcastd %xmm5,%ymm5
+ DB 196,65,84,88,210 ; vaddps %ymm10,%ymm5,%ymm10
+ DB 184,141,188,190,63 ; mov $0x3fbebc8d,%eax
+ DB 197,249,110,224 ; vmovd %eax,%xmm4
+ DB 196,226,125,88,228 ; vpbroadcastd %xmm4,%ymm4
+ DB 196,66,93,188,211 ; vfnmadd231ps %ymm11,%ymm4,%ymm10
+ DB 184,254,210,221,65 ; mov $0x41ddd2fe,%eax
+ DB 197,249,110,216 ; vmovd %eax,%xmm3
+ DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3
+ DB 184,248,245,154,64 ; mov $0x409af5f8,%eax
+ DB 197,249,110,208 ; vmovd %eax,%xmm2
+ DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2
+ DB 196,65,108,92,219 ; vsubps %ymm11,%ymm2,%ymm11
+ DB 196,65,100,94,219 ; vdivps %ymm11,%ymm3,%ymm11
+ DB 196,65,44,88,211 ; vaddps %ymm11,%ymm10,%ymm10
+ DB 197,124,16,44,36 ; vmovups (%rsp),%ymm13
+ DB 196,65,20,84,220 ; vandps %ymm12,%ymm13,%ymm11
+ DB 196,65,36,86,223 ; vorps %ymm15,%ymm11,%ymm11
+ DB 196,66,37,188,241 ; vfnmadd231ps %ymm9,%ymm11,%ymm14
+ DB 197,36,88,223 ; vaddps %ymm7,%ymm11,%ymm11
+ DB 196,65,124,94,219 ; vdivps %ymm11,%ymm0,%ymm11
+ DB 196,65,12,92,219 ; vsubps %ymm11,%ymm14,%ymm11
+ DB 196,65,76,89,219 ; vmulps %ymm11,%ymm6,%ymm11
+ DB 196,67,125,8,227,1 ; vroundps $0x1,%ymm11,%ymm12
+ DB 196,65,36,92,228 ; vsubps %ymm12,%ymm11,%ymm12
+ DB 196,65,84,88,219 ; vaddps %ymm11,%ymm5,%ymm11
+ DB 196,66,93,188,220 ; vfnmadd231ps %ymm12,%ymm4,%ymm11
+ DB 196,65,108,92,228 ; vsubps %ymm12,%ymm2,%ymm12
+ DB 196,65,100,94,228 ; vdivps %ymm12,%ymm3,%ymm12
+ DB 196,65,36,88,220 ; vaddps %ymm12,%ymm11,%ymm11
+ DB 196,65,20,84,192 ; vandps %ymm8,%ymm13,%ymm8
+ DB 196,65,60,86,199 ; vorps %ymm15,%ymm8,%ymm8
+ DB 196,194,61,188,201 ; vfnmadd231ps %ymm9,%ymm8,%ymm1
+ DB 197,188,88,255 ; vaddps %ymm7,%ymm8,%ymm7
+ DB 197,252,94,199 ; vdivps %ymm7,%ymm0,%ymm0
+ DB 197,244,92,192 ; vsubps %ymm0,%ymm1,%ymm0
+ DB 197,204,89,192 ; vmulps %ymm0,%ymm6,%ymm0
+ DB 196,227,125,8,200,1 ; vroundps $0x1,%ymm0,%ymm1
+ DB 197,252,92,201 ; vsubps %ymm1,%ymm0,%ymm1
+ DB 197,212,88,192 ; vaddps %ymm0,%ymm5,%ymm0
+ DB 196,226,117,172,224 ; vfnmadd213ps %ymm0,%ymm1,%ymm4
+ DB 197,236,92,193 ; vsubps %ymm1,%ymm2,%ymm0
+ DB 197,228,94,192 ; vdivps %ymm0,%ymm3,%ymm0
+ DB 197,220,88,192 ; vaddps %ymm0,%ymm4,%ymm0
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1
+ DB 196,193,116,89,210 ; vmulps %ymm10,%ymm1,%ymm2
+ DB 196,193,116,89,219 ; vmulps %ymm11,%ymm1,%ymm3
+ DB 197,244,89,224 ; vmulps %ymm0,%ymm1,%ymm4
+ DB 197,253,91,194 ; vcvtps2dq %ymm2,%ymm0
+ DB 197,253,91,203 ; vcvtps2dq %ymm3,%ymm1
+ DB 197,253,91,212 ; vcvtps2dq %ymm4,%ymm2
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 197,252,16,92,36,32 ; vmovups 0x20(%rsp),%ymm3
+ DB 197,252,16,100,36,64 ; vmovups 0x40(%rsp),%ymm4
+ DB 197,252,16,108,36,96 ; vmovups 0x60(%rsp),%ymm5
+ DB 197,252,16,180,36,128,0,0,0 ; vmovups 0x80(%rsp),%ymm6
+ DB 197,252,16,188,36,160,0,0,0 ; vmovups 0xa0(%rsp),%ymm7
+ DB 72,129,196,216,0,0,0 ; add $0xd8,%rsp
DB 255,224 ; jmpq *%rax
PUBLIC _sk_rgb_to_hsl_hsw
@@ -1204,7 +1351,7 @@ _sk_scale_u8_hsw LABEL PROC
DB 72,139,0 ; mov (%rax),%rax
DB 72,1,248 ; add %rdi,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,56 ; jne 126b <_sk_scale_u8_hsw+0x48>
+ DB 117,56 ; jne 155f <_sk_scale_u8_hsw+0x48>
DB 197,122,126,0 ; vmovq (%rax),%xmm8
DB 196,66,125,49,192 ; vpmovzxbd %xmm8,%ymm8
DB 196,65,124,91,192 ; vcvtdq2ps %ymm8,%ymm8
@@ -1228,9 +1375,9 @@ _sk_scale_u8_hsw LABEL PROC
DB 77,9,217 ; or %r11,%r9
DB 72,131,193,8 ; add $0x8,%rcx
DB 73,255,202 ; dec %r10
- DB 117,234 ; jne 1273 <_sk_scale_u8_hsw+0x50>
+ DB 117,234 ; jne 1567 <_sk_scale_u8_hsw+0x50>
DB 196,65,249,110,193 ; vmovq %r9,%xmm8
- DB 235,167 ; jmp 1237 <_sk_scale_u8_hsw+0x14>
+ DB 235,167 ; jmp 152b <_sk_scale_u8_hsw+0x14>
PUBLIC _sk_lerp_1_float_hsw
_sk_lerp_1_float_hsw LABEL PROC
@@ -1254,7 +1401,7 @@ _sk_lerp_u8_hsw LABEL PROC
DB 72,139,0 ; mov (%rax),%rax
DB 72,1,248 ; add %rdi,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,76 ; jne 131b <_sk_lerp_u8_hsw+0x5c>
+ DB 117,76 ; jne 160f <_sk_lerp_u8_hsw+0x5c>
DB 197,122,126,0 ; vmovq (%rax),%xmm8
DB 196,66,125,49,192 ; vpmovzxbd %xmm8,%ymm8
DB 196,65,124,91,192 ; vcvtdq2ps %ymm8,%ymm8
@@ -1282,16 +1429,16 @@ _sk_lerp_u8_hsw LABEL PROC
DB 77,9,217 ; or %r11,%r9
DB 72,131,193,8 ; add $0x8,%rcx
DB 73,255,202 ; dec %r10
- DB 117,234 ; jne 1323 <_sk_lerp_u8_hsw+0x64>
+ DB 117,234 ; jne 1617 <_sk_lerp_u8_hsw+0x64>
DB 196,65,249,110,193 ; vmovq %r9,%xmm8
- DB 235,147 ; jmp 12d3 <_sk_lerp_u8_hsw+0x14>
+ DB 235,147 ; jmp 15c7 <_sk_lerp_u8_hsw+0x14>
PUBLIC _sk_lerp_565_hsw
_sk_lerp_565_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,179,0,0,0 ; jne 1401 <_sk_lerp_565_hsw+0xc1>
+ DB 15,133,179,0,0,0 ; jne 16f5 <_sk_lerp_565_hsw+0xc1>
DB 196,193,122,111,28,122 ; vmovdqu (%r10,%rdi,2),%xmm3
DB 196,98,125,51,195 ; vpmovzxwd %xmm3,%ymm8
DB 184,0,248,0,0 ; mov $0xf800,%eax
@@ -1337,9 +1484,9 @@ _sk_lerp_565_hsw LABEL PROC
DB 197,225,239,219 ; vpxor %xmm3,%xmm3,%xmm3
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,59,255,255,255 ; ja 1354 <_sk_lerp_565_hsw+0x14>
+ DB 15,135,59,255,255,255 ; ja 1648 <_sk_lerp_565_hsw+0x14>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,76,0,0,0 ; lea 0x4c(%rip),%r9 # 1470 <_sk_lerp_565_hsw+0x130>
+ DB 76,141,13,76,0,0,0 ; lea 0x4c(%rip),%r9 # 1764 <_sk_lerp_565_hsw+0x130>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -1351,13 +1498,13 @@ _sk_lerp_565_hsw LABEL PROC
DB 196,193,97,196,92,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm3,%xmm3
DB 196,193,97,196,92,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm3,%xmm3
DB 196,193,97,196,28,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm3,%xmm3
- DB 233,231,254,255,255 ; jmpq 1354 <_sk_lerp_565_hsw+0x14>
+ DB 233,231,254,255,255 ; jmpq 1648 <_sk_lerp_565_hsw+0x14>
DB 15,31,0 ; nopl (%rax)
DB 241 ; icebp
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 233,255,255,255,225 ; jmpq ffffffffe2001478 <_sk_callback_hsw+0xffffffffe1ffd152>
+ DB 233,255,255,255,225 ; jmpq ffffffffe200176c <_sk_callback_hsw+0xffffffffe1ffd152>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
@@ -1382,7 +1529,7 @@ _sk_load_tables_hsw LABEL PROC
DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9
DB 76,3,8 ; add (%rax),%r9
DB 77,133,192 ; test %r8,%r8
- DB 117,121 ; jne 151a <_sk_load_tables_hsw+0x8e>
+ DB 117,121 ; jne 180e <_sk_load_tables_hsw+0x8e>
DB 196,193,126,111,25 ; vmovdqu (%r9),%ymm3
DB 185,255,0,0,0 ; mov $0xff,%ecx
DB 197,249,110,193 ; vmovd %ecx,%xmm0
@@ -1418,7 +1565,7 @@ _sk_load_tables_hsw LABEL PROC
DB 196,193,249,110,194 ; vmovq %r10,%xmm0
DB 196,226,125,33,192 ; vpmovsxbd %xmm0,%ymm0
DB 196,194,125,140,25 ; vpmaskmovd (%r9),%ymm0,%ymm3
- DB 233,99,255,255,255 ; jmpq 14a6 <_sk_load_tables_hsw+0x1a>
+ DB 233,99,255,255,255 ; jmpq 179a <_sk_load_tables_hsw+0x1a>
PUBLIC _sk_load_tables_u16_be_hsw
_sk_load_tables_u16_be_hsw LABEL PROC
@@ -1426,7 +1573,7 @@ _sk_load_tables_u16_be_hsw LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,216,0,0,0 ; jne 1631 <_sk_load_tables_u16_be_hsw+0xee>
+ DB 15,133,216,0,0,0 ; jne 1925 <_sk_load_tables_u16_be_hsw+0xee>
DB 196,1,121,16,4,72 ; vmovupd (%r8,%r9,2),%xmm8
DB 196,129,121,16,84,72,16 ; vmovupd 0x10(%r8,%r9,2),%xmm2
DB 196,129,121,16,92,72,32 ; vmovupd 0x20(%r8,%r9,2),%xmm3
@@ -1475,29 +1622,29 @@ _sk_load_tables_u16_be_hsw LABEL PROC
DB 196,1,123,16,4,72 ; vmovsd (%r8,%r9,2),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,85 ; je 1697 <_sk_load_tables_u16_be_hsw+0x154>
+ DB 116,85 ; je 198b <_sk_load_tables_u16_be_hsw+0x154>
DB 196,1,57,22,68,72,8 ; vmovhpd 0x8(%r8,%r9,2),%xmm8,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,72 ; jb 1697 <_sk_load_tables_u16_be_hsw+0x154>
+ DB 114,72 ; jb 198b <_sk_load_tables_u16_be_hsw+0x154>
DB 196,129,123,16,84,72,16 ; vmovsd 0x10(%r8,%r9,2),%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 116,72 ; je 16a4 <_sk_load_tables_u16_be_hsw+0x161>
+ DB 116,72 ; je 1998 <_sk_load_tables_u16_be_hsw+0x161>
DB 196,129,105,22,84,72,24 ; vmovhpd 0x18(%r8,%r9,2),%xmm2,%xmm2
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,59 ; jb 16a4 <_sk_load_tables_u16_be_hsw+0x161>
+ DB 114,59 ; jb 1998 <_sk_load_tables_u16_be_hsw+0x161>
DB 196,129,123,16,92,72,32 ; vmovsd 0x20(%r8,%r9,2),%xmm3
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 15,132,250,254,255,255 ; je 1574 <_sk_load_tables_u16_be_hsw+0x31>
+ DB 15,132,250,254,255,255 ; je 1868 <_sk_load_tables_u16_be_hsw+0x31>
DB 196,129,97,22,92,72,40 ; vmovhpd 0x28(%r8,%r9,2),%xmm3,%xmm3
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 15,130,233,254,255,255 ; jb 1574 <_sk_load_tables_u16_be_hsw+0x31>
+ DB 15,130,233,254,255,255 ; jb 1868 <_sk_load_tables_u16_be_hsw+0x31>
DB 196,1,122,126,76,72,48 ; vmovq 0x30(%r8,%r9,2),%xmm9
- DB 233,221,254,255,255 ; jmpq 1574 <_sk_load_tables_u16_be_hsw+0x31>
+ DB 233,221,254,255,255 ; jmpq 1868 <_sk_load_tables_u16_be_hsw+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,208,254,255,255 ; jmpq 1574 <_sk_load_tables_u16_be_hsw+0x31>
+ DB 233,208,254,255,255 ; jmpq 1868 <_sk_load_tables_u16_be_hsw+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
- DB 233,199,254,255,255 ; jmpq 1574 <_sk_load_tables_u16_be_hsw+0x31>
+ DB 233,199,254,255,255 ; jmpq 1868 <_sk_load_tables_u16_be_hsw+0x31>
PUBLIC _sk_load_tables_rgb_u16_be_hsw
_sk_load_tables_rgb_u16_be_hsw LABEL PROC
@@ -1505,7 +1652,7 @@ _sk_load_tables_rgb_u16_be_hsw LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 76,141,12,127 ; lea (%rdi,%rdi,2),%r9
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,207,0,0,0 ; jne 178e <_sk_load_tables_rgb_u16_be_hsw+0xe1>
+ DB 15,133,207,0,0,0 ; jne 1a82 <_sk_load_tables_rgb_u16_be_hsw+0xe1>
DB 196,129,122,111,4,72 ; vmovdqu (%r8,%r9,2),%xmm0
DB 196,129,122,111,84,72,12 ; vmovdqu 0xc(%r8,%r9,2),%xmm2
DB 196,129,122,111,76,72,24 ; vmovdqu 0x18(%r8,%r9,2),%xmm1
@@ -1552,36 +1699,36 @@ _sk_load_tables_rgb_u16_be_hsw LABEL PROC
DB 196,129,121,110,4,72 ; vmovd (%r8,%r9,2),%xmm0
DB 196,129,121,196,68,72,4,2 ; vpinsrw $0x2,0x4(%r8,%r9,2),%xmm0,%xmm0
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 117,5 ; jne 17a7 <_sk_load_tables_rgb_u16_be_hsw+0xfa>
- DB 233,76,255,255,255 ; jmpq 16f3 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ DB 117,5 ; jne 1a9b <_sk_load_tables_rgb_u16_be_hsw+0xfa>
+ DB 233,76,255,255,255 ; jmpq 19e7 <_sk_load_tables_rgb_u16_be_hsw+0x46>
DB 196,129,121,110,76,72,6 ; vmovd 0x6(%r8,%r9,2),%xmm1
DB 196,1,113,196,68,72,10,2 ; vpinsrw $0x2,0xa(%r8,%r9,2),%xmm1,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,26 ; jb 17d6 <_sk_load_tables_rgb_u16_be_hsw+0x129>
+ DB 114,26 ; jb 1aca <_sk_load_tables_rgb_u16_be_hsw+0x129>
DB 196,129,121,110,76,72,12 ; vmovd 0xc(%r8,%r9,2),%xmm1
DB 196,129,113,196,84,72,16,2 ; vpinsrw $0x2,0x10(%r8,%r9,2),%xmm1,%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 117,10 ; jne 17db <_sk_load_tables_rgb_u16_be_hsw+0x12e>
- DB 233,29,255,255,255 ; jmpq 16f3 <_sk_load_tables_rgb_u16_be_hsw+0x46>
- DB 233,24,255,255,255 ; jmpq 16f3 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ DB 117,10 ; jne 1acf <_sk_load_tables_rgb_u16_be_hsw+0x12e>
+ DB 233,29,255,255,255 ; jmpq 19e7 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ DB 233,24,255,255,255 ; jmpq 19e7 <_sk_load_tables_rgb_u16_be_hsw+0x46>
DB 196,129,121,110,76,72,18 ; vmovd 0x12(%r8,%r9,2),%xmm1
DB 196,1,113,196,76,72,22,2 ; vpinsrw $0x2,0x16(%r8,%r9,2),%xmm1,%xmm9
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,26 ; jb 180a <_sk_load_tables_rgb_u16_be_hsw+0x15d>
+ DB 114,26 ; jb 1afe <_sk_load_tables_rgb_u16_be_hsw+0x15d>
DB 196,129,121,110,76,72,24 ; vmovd 0x18(%r8,%r9,2),%xmm1
DB 196,129,113,196,76,72,28,2 ; vpinsrw $0x2,0x1c(%r8,%r9,2),%xmm1,%xmm1
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 117,10 ; jne 180f <_sk_load_tables_rgb_u16_be_hsw+0x162>
- DB 233,233,254,255,255 ; jmpq 16f3 <_sk_load_tables_rgb_u16_be_hsw+0x46>
- DB 233,228,254,255,255 ; jmpq 16f3 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ DB 117,10 ; jne 1b03 <_sk_load_tables_rgb_u16_be_hsw+0x162>
+ DB 233,233,254,255,255 ; jmpq 19e7 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ DB 233,228,254,255,255 ; jmpq 19e7 <_sk_load_tables_rgb_u16_be_hsw+0x46>
DB 196,129,121,110,92,72,30 ; vmovd 0x1e(%r8,%r9,2),%xmm3
DB 196,1,97,196,92,72,34,2 ; vpinsrw $0x2,0x22(%r8,%r9,2),%xmm3,%xmm11
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,20 ; jb 1838 <_sk_load_tables_rgb_u16_be_hsw+0x18b>
+ DB 114,20 ; jb 1b2c <_sk_load_tables_rgb_u16_be_hsw+0x18b>
DB 196,129,121,110,92,72,36 ; vmovd 0x24(%r8,%r9,2),%xmm3
DB 196,129,97,196,92,72,40,2 ; vpinsrw $0x2,0x28(%r8,%r9,2),%xmm3,%xmm3
- DB 233,187,254,255,255 ; jmpq 16f3 <_sk_load_tables_rgb_u16_be_hsw+0x46>
- DB 233,182,254,255,255 ; jmpq 16f3 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ DB 233,187,254,255,255 ; jmpq 19e7 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ DB 233,182,254,255,255 ; jmpq 19e7 <_sk_load_tables_rgb_u16_be_hsw+0x46>
PUBLIC _sk_byte_tables_hsw
_sk_byte_tables_hsw LABEL PROC
@@ -2320,7 +2467,7 @@ _sk_load_a8_hsw LABEL PROC
DB 72,139,0 ; mov (%rax),%rax
DB 72,1,248 ; add %rdi,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,50 ; jne 2535 <_sk_load_a8_hsw+0x42>
+ DB 117,50 ; jne 2829 <_sk_load_a8_hsw+0x42>
DB 197,250,126,0 ; vmovq (%rax),%xmm0
DB 196,226,125,49,192 ; vpmovzxbd %xmm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
@@ -2343,9 +2490,9 @@ _sk_load_a8_hsw LABEL PROC
DB 77,9,217 ; or %r11,%r9
DB 72,131,193,8 ; add $0x8,%rcx
DB 73,255,202 ; dec %r10
- DB 117,234 ; jne 253d <_sk_load_a8_hsw+0x4a>
+ DB 117,234 ; jne 2831 <_sk_load_a8_hsw+0x4a>
DB 196,193,249,110,193 ; vmovq %r9,%xmm0
- DB 235,173 ; jmp 2507 <_sk_load_a8_hsw+0x14>
+ DB 235,173 ; jmp 27fb <_sk_load_a8_hsw+0x14>
PUBLIC _sk_gather_a8_hsw
_sk_gather_a8_hsw LABEL PROC
@@ -2416,7 +2563,7 @@ _sk_store_a8_hsw LABEL PROC
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 196,65,57,103,192 ; vpackuswb %xmm8,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 2672 <_sk_store_a8_hsw+0x3b>
+ DB 117,10 ; jne 2966 <_sk_store_a8_hsw+0x3b>
DB 196,65,123,17,4,57 ; vmovsd %xmm8,(%r9,%rdi,1)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -2424,10 +2571,10 @@ _sk_store_a8_hsw LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 266e <_sk_store_a8_hsw+0x37>
+ DB 119,236 ; ja 2962 <_sk_store_a8_hsw+0x37>
DB 196,66,121,48,192 ; vpmovzxbw %xmm8,%xmm8
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 26d4 <_sk_store_a8_hsw+0x9d>
+ DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 29c8 <_sk_store_a8_hsw+0x9d>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -2438,7 +2585,7 @@ _sk_store_a8_hsw LABEL PROC
DB 196,67,121,20,68,57,2,4 ; vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1)
DB 196,67,121,20,68,57,1,2 ; vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1)
DB 196,67,121,20,4,57,0 ; vpextrb $0x0,%xmm8,(%r9,%rdi,1)
- DB 235,154 ; jmp 266e <_sk_store_a8_hsw+0x37>
+ DB 235,154 ; jmp 2962 <_sk_store_a8_hsw+0x37>
DB 247,255 ; idiv %edi
DB 255 ; (bad)
DB 255 ; (bad)
@@ -2469,7 +2616,7 @@ _sk_load_g8_hsw LABEL PROC
DB 72,139,0 ; mov (%rax),%rax
DB 72,1,248 ; add %rdi,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,60 ; jne 273c <_sk_load_g8_hsw+0x4c>
+ DB 117,60 ; jne 2a30 <_sk_load_g8_hsw+0x4c>
DB 197,250,126,0 ; vmovq (%rax),%xmm0
DB 196,226,125,49,192 ; vpmovzxbd %xmm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
@@ -2494,9 +2641,9 @@ _sk_load_g8_hsw LABEL PROC
DB 77,9,217 ; or %r11,%r9
DB 72,131,193,8 ; add $0x8,%rcx
DB 73,255,202 ; dec %r10
- DB 117,234 ; jne 2744 <_sk_load_g8_hsw+0x54>
+ DB 117,234 ; jne 2a38 <_sk_load_g8_hsw+0x54>
DB 196,193,249,110,193 ; vmovq %r9,%xmm0
- DB 235,163 ; jmp 2704 <_sk_load_g8_hsw+0x14>
+ DB 235,163 ; jmp 29f8 <_sk_load_g8_hsw+0x14>
PUBLIC _sk_gather_g8_hsw
_sk_gather_g8_hsw LABEL PROC
@@ -2561,9 +2708,9 @@ _sk_gather_i8_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 73,137,192 ; mov %rax,%r8
DB 77,133,192 ; test %r8,%r8
- DB 116,5 ; je 2857 <_sk_gather_i8_hsw+0xf>
+ DB 116,5 ; je 2b4b <_sk_gather_i8_hsw+0xf>
DB 76,137,192 ; mov %r8,%rax
- DB 235,2 ; jmp 2859 <_sk_gather_i8_hsw+0x11>
+ DB 235,2 ; jmp 2b4d <_sk_gather_i8_hsw+0x11>
DB 72,173 ; lods %ds:(%rsi),%rax
DB 65,87 ; push %r15
DB 65,86 ; push %r14
@@ -2634,7 +2781,7 @@ _sk_load_565_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,149,0,0,0 ; jne 2a0b <_sk_load_565_hsw+0xa3>
+ DB 15,133,149,0,0,0 ; jne 2cff <_sk_load_565_hsw+0xa3>
DB 196,193,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm0
DB 196,226,125,51,208 ; vpmovzxwd %xmm0,%ymm2
DB 184,0,248,0,0 ; mov $0xf800,%eax
@@ -2674,9 +2821,9 @@ _sk_load_565_hsw LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,89,255,255,255 ; ja 297c <_sk_load_565_hsw+0x14>
+ DB 15,135,89,255,255,255 ; ja 2c70 <_sk_load_565_hsw+0x14>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,74,0,0,0 ; lea 0x4a(%rip),%r9 # 2a78 <_sk_load_565_hsw+0x110>
+ DB 76,141,13,74,0,0,0 ; lea 0x4a(%rip),%r9 # 2d6c <_sk_load_565_hsw+0x110>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -2688,12 +2835,12 @@ _sk_load_565_hsw LABEL PROC
DB 196,193,121,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- DB 233,5,255,255,255 ; jmpq 297c <_sk_load_565_hsw+0x14>
+ DB 233,5,255,255,255 ; jmpq 2c70 <_sk_load_565_hsw+0x14>
DB 144 ; nop
DB 243,255 ; repz (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 235,255 ; jmp 2a7d <_sk_load_565_hsw+0x115>
+ DB 235,255 ; jmp 2d71 <_sk_load_565_hsw+0x115>
DB 255 ; (bad)
DB 255,227 ; jmpq *%rbx
DB 255 ; (bad)
@@ -2816,7 +2963,7 @@ _sk_store_565_hsw LABEL PROC
DB 196,67,125,57,193,1 ; vextracti128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 2c43 <_sk_store_565_hsw+0x6c>
+ DB 117,10 ; jne 2f37 <_sk_store_565_hsw+0x6c>
DB 196,65,122,127,4,121 ; vmovdqu %xmm8,(%r9,%rdi,2)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -2824,9 +2971,9 @@ _sk_store_565_hsw LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 2c3f <_sk_store_565_hsw+0x68>
+ DB 119,236 ; ja 2f33 <_sk_store_565_hsw+0x68>
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 2ca0 <_sk_store_565_hsw+0xc9>
+ DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 2f94 <_sk_store_565_hsw+0xc9>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -2837,7 +2984,7 @@ _sk_store_565_hsw LABEL PROC
DB 196,67,121,21,68,121,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
DB 196,67,121,21,68,121,2,1 ; vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
DB 196,67,121,21,4,121,0 ; vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- DB 235,159 ; jmp 2c3f <_sk_store_565_hsw+0x68>
+ DB 235,159 ; jmp 2f33 <_sk_store_565_hsw+0x68>
DB 247,255 ; idiv %edi
DB 255 ; (bad)
DB 255 ; (bad)
@@ -2866,7 +3013,7 @@ _sk_load_4444_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,179,0,0,0 ; jne 2d7d <_sk_load_4444_hsw+0xc1>
+ DB 15,133,179,0,0,0 ; jne 3071 <_sk_load_4444_hsw+0xc1>
DB 196,193,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm0
DB 196,98,125,51,200 ; vpmovzxwd %xmm0,%ymm9
DB 184,0,240,0,0 ; mov $0xf000,%eax
@@ -2912,9 +3059,9 @@ _sk_load_4444_hsw LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,59,255,255,255 ; ja 2cd0 <_sk_load_4444_hsw+0x14>
+ DB 15,135,59,255,255,255 ; ja 2fc4 <_sk_load_4444_hsw+0x14>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,76,0,0,0 ; lea 0x4c(%rip),%r9 # 2dec <_sk_load_4444_hsw+0x130>
+ DB 76,141,13,76,0,0,0 ; lea 0x4c(%rip),%r9 # 30e0 <_sk_load_4444_hsw+0x130>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -2926,13 +3073,13 @@ _sk_load_4444_hsw LABEL PROC
DB 196,193,121,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- DB 233,231,254,255,255 ; jmpq 2cd0 <_sk_load_4444_hsw+0x14>
+ DB 233,231,254,255,255 ; jmpq 2fc4 <_sk_load_4444_hsw+0x14>
DB 15,31,0 ; nopl (%rax)
DB 241 ; icebp
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 233,255,255,255,225 ; jmpq ffffffffe2002df4 <_sk_callback_hsw+0xffffffffe1ffeace>
+ DB 233,255,255,255,225 ; jmpq ffffffffe20030e8 <_sk_callback_hsw+0xffffffffe1ffeace>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
@@ -3060,7 +3207,7 @@ _sk_store_4444_hsw LABEL PROC
DB 196,67,125,57,193,1 ; vextracti128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 2fdb <_sk_store_4444_hsw+0x72>
+ DB 117,10 ; jne 32cf <_sk_store_4444_hsw+0x72>
DB 196,65,122,127,4,121 ; vmovdqu %xmm8,(%r9,%rdi,2)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -3068,9 +3215,9 @@ _sk_store_4444_hsw LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 2fd7 <_sk_store_4444_hsw+0x6e>
+ DB 119,236 ; ja 32cb <_sk_store_4444_hsw+0x6e>
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 3038 <_sk_store_4444_hsw+0xcf>
+ DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 332c <_sk_store_4444_hsw+0xcf>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -3081,7 +3228,7 @@ _sk_store_4444_hsw LABEL PROC
DB 196,67,121,21,68,121,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
DB 196,67,121,21,68,121,2,1 ; vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
DB 196,67,121,21,4,121,0 ; vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- DB 235,159 ; jmp 2fd7 <_sk_store_4444_hsw+0x6e>
+ DB 235,159 ; jmp 32cb <_sk_store_4444_hsw+0x6e>
DB 247,255 ; idiv %edi
DB 255 ; (bad)
DB 255 ; (bad)
@@ -3112,7 +3259,7 @@ _sk_load_8888_hsw LABEL PROC
DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9
DB 76,3,8 ; add (%rax),%r9
DB 77,133,192 ; test %r8,%r8
- DB 117,104 ; jne 30d1 <_sk_load_8888_hsw+0x7d>
+ DB 117,104 ; jne 33c5 <_sk_load_8888_hsw+0x7d>
DB 196,193,126,111,25 ; vmovdqu (%r9),%ymm3
DB 184,255,0,0,0 ; mov $0xff,%eax
DB 197,249,110,192 ; vmovd %eax,%xmm0
@@ -3145,7 +3292,7 @@ _sk_load_8888_hsw LABEL PROC
DB 196,225,249,110,192 ; vmovq %rax,%xmm0
DB 196,226,125,33,192 ; vpmovsxbd %xmm0,%ymm0
DB 196,194,125,140,25 ; vpmaskmovd (%r9),%ymm0,%ymm3
- DB 233,116,255,255,255 ; jmpq 306e <_sk_load_8888_hsw+0x1a>
+ DB 233,116,255,255,255 ; jmpq 3362 <_sk_load_8888_hsw+0x1a>
PUBLIC _sk_gather_8888_hsw
_sk_gather_8888_hsw LABEL PROC
@@ -3205,7 +3352,7 @@ _sk_store_8888_hsw LABEL PROC
DB 196,65,45,235,192 ; vpor %ymm8,%ymm10,%ymm8
DB 196,65,53,235,192 ; vpor %ymm8,%ymm9,%ymm8
DB 77,133,192 ; test %r8,%r8
- DB 117,12 ; jne 31f4 <_sk_store_8888_hsw+0x74>
+ DB 117,12 ; jne 34e8 <_sk_store_8888_hsw+0x74>
DB 196,65,126,127,1 ; vmovdqu %ymm8,(%r9)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,137,193 ; mov %r8,%rcx
@@ -3218,14 +3365,14 @@ _sk_store_8888_hsw LABEL PROC
DB 196,97,249,110,200 ; vmovq %rax,%xmm9
DB 196,66,125,33,201 ; vpmovsxbd %xmm9,%ymm9
DB 196,66,53,142,1 ; vpmaskmovd %ymm8,%ymm9,(%r9)
- DB 235,211 ; jmp 31ed <_sk_store_8888_hsw+0x6d>
+ DB 235,211 ; jmp 34e1 <_sk_store_8888_hsw+0x6d>
PUBLIC _sk_load_f16_hsw
_sk_load_f16_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 72,139,0 ; mov (%rax),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 117,97 ; jne 3285 <_sk_load_f16_hsw+0x6b>
+ DB 117,97 ; jne 3579 <_sk_load_f16_hsw+0x6b>
DB 197,121,16,4,248 ; vmovupd (%rax,%rdi,8),%xmm8
DB 197,249,16,84,248,16 ; vmovupd 0x10(%rax,%rdi,8),%xmm2
DB 197,249,16,92,248,32 ; vmovupd 0x20(%rax,%rdi,8),%xmm3
@@ -3251,29 +3398,29 @@ _sk_load_f16_hsw LABEL PROC
DB 197,123,16,4,248 ; vmovsd (%rax,%rdi,8),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,79 ; je 32e4 <_sk_load_f16_hsw+0xca>
+ DB 116,79 ; je 35d8 <_sk_load_f16_hsw+0xca>
DB 197,57,22,68,248,8 ; vmovhpd 0x8(%rax,%rdi,8),%xmm8,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,67 ; jb 32e4 <_sk_load_f16_hsw+0xca>
+ DB 114,67 ; jb 35d8 <_sk_load_f16_hsw+0xca>
DB 197,251,16,84,248,16 ; vmovsd 0x10(%rax,%rdi,8),%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 116,68 ; je 32f1 <_sk_load_f16_hsw+0xd7>
+ DB 116,68 ; je 35e5 <_sk_load_f16_hsw+0xd7>
DB 197,233,22,84,248,24 ; vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,56 ; jb 32f1 <_sk_load_f16_hsw+0xd7>
+ DB 114,56 ; jb 35e5 <_sk_load_f16_hsw+0xd7>
DB 197,251,16,92,248,32 ; vmovsd 0x20(%rax,%rdi,8),%xmm3
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 15,132,114,255,255,255 ; je 323b <_sk_load_f16_hsw+0x21>
+ DB 15,132,114,255,255,255 ; je 352f <_sk_load_f16_hsw+0x21>
DB 197,225,22,92,248,40 ; vmovhpd 0x28(%rax,%rdi,8),%xmm3,%xmm3
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 15,130,98,255,255,255 ; jb 323b <_sk_load_f16_hsw+0x21>
+ DB 15,130,98,255,255,255 ; jb 352f <_sk_load_f16_hsw+0x21>
DB 197,122,126,76,248,48 ; vmovq 0x30(%rax,%rdi,8),%xmm9
- DB 233,87,255,255,255 ; jmpq 323b <_sk_load_f16_hsw+0x21>
+ DB 233,87,255,255,255 ; jmpq 352f <_sk_load_f16_hsw+0x21>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,74,255,255,255 ; jmpq 323b <_sk_load_f16_hsw+0x21>
+ DB 233,74,255,255,255 ; jmpq 352f <_sk_load_f16_hsw+0x21>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
- DB 233,65,255,255,255 ; jmpq 323b <_sk_load_f16_hsw+0x21>
+ DB 233,65,255,255,255 ; jmpq 352f <_sk_load_f16_hsw+0x21>
PUBLIC _sk_gather_f16_hsw
_sk_gather_f16_hsw LABEL PROC
@@ -3327,7 +3474,7 @@ _sk_store_f16_hsw LABEL PROC
DB 196,65,57,98,205 ; vpunpckldq %xmm13,%xmm8,%xmm9
DB 196,65,57,106,197 ; vpunpckhdq %xmm13,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,27 ; jne 33e9 <_sk_store_f16_hsw+0x65>
+ DB 117,27 ; jne 36dd <_sk_store_f16_hsw+0x65>
DB 197,120,17,28,248 ; vmovups %xmm11,(%rax,%rdi,8)
DB 197,120,17,84,248,16 ; vmovups %xmm10,0x10(%rax,%rdi,8)
DB 197,120,17,76,248,32 ; vmovups %xmm9,0x20(%rax,%rdi,8)
@@ -3336,22 +3483,22 @@ _sk_store_f16_hsw LABEL PROC
DB 255,224 ; jmpq *%rax
DB 197,121,214,28,248 ; vmovq %xmm11,(%rax,%rdi,8)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,241 ; je 33e5 <_sk_store_f16_hsw+0x61>
+ DB 116,241 ; je 36d9 <_sk_store_f16_hsw+0x61>
DB 197,121,23,92,248,8 ; vmovhpd %xmm11,0x8(%rax,%rdi,8)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,229 ; jb 33e5 <_sk_store_f16_hsw+0x61>
+ DB 114,229 ; jb 36d9 <_sk_store_f16_hsw+0x61>
DB 197,121,214,84,248,16 ; vmovq %xmm10,0x10(%rax,%rdi,8)
- DB 116,221 ; je 33e5 <_sk_store_f16_hsw+0x61>
+ DB 116,221 ; je 36d9 <_sk_store_f16_hsw+0x61>
DB 197,121,23,84,248,24 ; vmovhpd %xmm10,0x18(%rax,%rdi,8)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,209 ; jb 33e5 <_sk_store_f16_hsw+0x61>
+ DB 114,209 ; jb 36d9 <_sk_store_f16_hsw+0x61>
DB 197,121,214,76,248,32 ; vmovq %xmm9,0x20(%rax,%rdi,8)
- DB 116,201 ; je 33e5 <_sk_store_f16_hsw+0x61>
+ DB 116,201 ; je 36d9 <_sk_store_f16_hsw+0x61>
DB 197,121,23,76,248,40 ; vmovhpd %xmm9,0x28(%rax,%rdi,8)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,189 ; jb 33e5 <_sk_store_f16_hsw+0x61>
+ DB 114,189 ; jb 36d9 <_sk_store_f16_hsw+0x61>
DB 197,121,214,68,248,48 ; vmovq %xmm8,0x30(%rax,%rdi,8)
- DB 235,181 ; jmp 33e5 <_sk_store_f16_hsw+0x61>
+ DB 235,181 ; jmp 36d9 <_sk_store_f16_hsw+0x61>
PUBLIC _sk_load_u16_be_hsw
_sk_load_u16_be_hsw LABEL PROC
@@ -3359,7 +3506,7 @@ _sk_load_u16_be_hsw LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 72,141,4,189,0,0,0,0 ; lea 0x0(,%rdi,4),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,205,0,0,0 ; jne 3513 <_sk_load_u16_be_hsw+0xe3>
+ DB 15,133,205,0,0,0 ; jne 3807 <_sk_load_u16_be_hsw+0xe3>
DB 196,65,121,16,4,64 ; vmovupd (%r8,%rax,2),%xmm8
DB 196,193,121,16,84,64,16 ; vmovupd 0x10(%r8,%rax,2),%xmm2
DB 196,193,121,16,92,64,32 ; vmovupd 0x20(%r8,%rax,2),%xmm3
@@ -3408,29 +3555,29 @@ _sk_load_u16_be_hsw LABEL PROC
DB 196,65,123,16,4,64 ; vmovsd (%r8,%rax,2),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,85 ; je 3579 <_sk_load_u16_be_hsw+0x149>
+ DB 116,85 ; je 386d <_sk_load_u16_be_hsw+0x149>
DB 196,65,57,22,68,64,8 ; vmovhpd 0x8(%r8,%rax,2),%xmm8,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,72 ; jb 3579 <_sk_load_u16_be_hsw+0x149>
+ DB 114,72 ; jb 386d <_sk_load_u16_be_hsw+0x149>
DB 196,193,123,16,84,64,16 ; vmovsd 0x10(%r8,%rax,2),%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 116,72 ; je 3586 <_sk_load_u16_be_hsw+0x156>
+ DB 116,72 ; je 387a <_sk_load_u16_be_hsw+0x156>
DB 196,193,105,22,84,64,24 ; vmovhpd 0x18(%r8,%rax,2),%xmm2,%xmm2
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,59 ; jb 3586 <_sk_load_u16_be_hsw+0x156>
+ DB 114,59 ; jb 387a <_sk_load_u16_be_hsw+0x156>
DB 196,193,123,16,92,64,32 ; vmovsd 0x20(%r8,%rax,2),%xmm3
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 15,132,5,255,255,255 ; je 3461 <_sk_load_u16_be_hsw+0x31>
+ DB 15,132,5,255,255,255 ; je 3755 <_sk_load_u16_be_hsw+0x31>
DB 196,193,97,22,92,64,40 ; vmovhpd 0x28(%r8,%rax,2),%xmm3,%xmm3
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 15,130,244,254,255,255 ; jb 3461 <_sk_load_u16_be_hsw+0x31>
+ DB 15,130,244,254,255,255 ; jb 3755 <_sk_load_u16_be_hsw+0x31>
DB 196,65,122,126,76,64,48 ; vmovq 0x30(%r8,%rax,2),%xmm9
- DB 233,232,254,255,255 ; jmpq 3461 <_sk_load_u16_be_hsw+0x31>
+ DB 233,232,254,255,255 ; jmpq 3755 <_sk_load_u16_be_hsw+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,219,254,255,255 ; jmpq 3461 <_sk_load_u16_be_hsw+0x31>
+ DB 233,219,254,255,255 ; jmpq 3755 <_sk_load_u16_be_hsw+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
- DB 233,210,254,255,255 ; jmpq 3461 <_sk_load_u16_be_hsw+0x31>
+ DB 233,210,254,255,255 ; jmpq 3755 <_sk_load_u16_be_hsw+0x31>
PUBLIC _sk_load_rgb_u16_be_hsw
_sk_load_rgb_u16_be_hsw LABEL PROC
@@ -3438,7 +3585,7 @@ _sk_load_rgb_u16_be_hsw LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 72,141,4,127 ; lea (%rdi,%rdi,2),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,211,0,0,0 ; jne 3674 <_sk_load_rgb_u16_be_hsw+0xe5>
+ DB 15,133,211,0,0,0 ; jne 3968 <_sk_load_rgb_u16_be_hsw+0xe5>
DB 196,193,122,111,4,64 ; vmovdqu (%r8,%rax,2),%xmm0
DB 196,193,122,111,84,64,12 ; vmovdqu 0xc(%r8,%rax,2),%xmm2
DB 196,193,122,111,76,64,24 ; vmovdqu 0x18(%r8,%rax,2),%xmm1
@@ -3488,36 +3635,36 @@ _sk_load_rgb_u16_be_hsw LABEL PROC
DB 196,193,121,110,4,64 ; vmovd (%r8,%rax,2),%xmm0
DB 196,193,121,196,68,64,4,2 ; vpinsrw $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 117,5 ; jne 368d <_sk_load_rgb_u16_be_hsw+0xfe>
- DB 233,72,255,255,255 ; jmpq 35d5 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 117,5 ; jne 3981 <_sk_load_rgb_u16_be_hsw+0xfe>
+ DB 233,72,255,255,255 ; jmpq 38c9 <_sk_load_rgb_u16_be_hsw+0x46>
DB 196,193,121,110,76,64,6 ; vmovd 0x6(%r8,%rax,2),%xmm1
DB 196,65,113,196,68,64,10,2 ; vpinsrw $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,26 ; jb 36bc <_sk_load_rgb_u16_be_hsw+0x12d>
+ DB 114,26 ; jb 39b0 <_sk_load_rgb_u16_be_hsw+0x12d>
DB 196,193,121,110,76,64,12 ; vmovd 0xc(%r8,%rax,2),%xmm1
DB 196,193,113,196,84,64,16,2 ; vpinsrw $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 117,10 ; jne 36c1 <_sk_load_rgb_u16_be_hsw+0x132>
- DB 233,25,255,255,255 ; jmpq 35d5 <_sk_load_rgb_u16_be_hsw+0x46>
- DB 233,20,255,255,255 ; jmpq 35d5 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 117,10 ; jne 39b5 <_sk_load_rgb_u16_be_hsw+0x132>
+ DB 233,25,255,255,255 ; jmpq 38c9 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 233,20,255,255,255 ; jmpq 38c9 <_sk_load_rgb_u16_be_hsw+0x46>
DB 196,193,121,110,76,64,18 ; vmovd 0x12(%r8,%rax,2),%xmm1
DB 196,65,113,196,76,64,22,2 ; vpinsrw $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,26 ; jb 36f0 <_sk_load_rgb_u16_be_hsw+0x161>
+ DB 114,26 ; jb 39e4 <_sk_load_rgb_u16_be_hsw+0x161>
DB 196,193,121,110,76,64,24 ; vmovd 0x18(%r8,%rax,2),%xmm1
DB 196,193,113,196,76,64,28,2 ; vpinsrw $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 117,10 ; jne 36f5 <_sk_load_rgb_u16_be_hsw+0x166>
- DB 233,229,254,255,255 ; jmpq 35d5 <_sk_load_rgb_u16_be_hsw+0x46>
- DB 233,224,254,255,255 ; jmpq 35d5 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 117,10 ; jne 39e9 <_sk_load_rgb_u16_be_hsw+0x166>
+ DB 233,229,254,255,255 ; jmpq 38c9 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 233,224,254,255,255 ; jmpq 38c9 <_sk_load_rgb_u16_be_hsw+0x46>
DB 196,193,121,110,92,64,30 ; vmovd 0x1e(%r8,%rax,2),%xmm3
DB 196,65,97,196,92,64,34,2 ; vpinsrw $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,20 ; jb 371e <_sk_load_rgb_u16_be_hsw+0x18f>
+ DB 114,20 ; jb 3a12 <_sk_load_rgb_u16_be_hsw+0x18f>
DB 196,193,121,110,92,64,36 ; vmovd 0x24(%r8,%rax,2),%xmm3
DB 196,193,97,196,92,64,40,2 ; vpinsrw $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3
- DB 233,183,254,255,255 ; jmpq 35d5 <_sk_load_rgb_u16_be_hsw+0x46>
- DB 233,178,254,255,255 ; jmpq 35d5 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 233,183,254,255,255 ; jmpq 38c9 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 233,178,254,255,255 ; jmpq 38c9 <_sk_load_rgb_u16_be_hsw+0x46>
PUBLIC _sk_store_u16_be_hsw
_sk_store_u16_be_hsw LABEL PROC
@@ -3564,7 +3711,7 @@ _sk_store_u16_be_hsw LABEL PROC
DB 196,65,17,98,200 ; vpunpckldq %xmm8,%xmm13,%xmm9
DB 196,65,17,106,192 ; vpunpckhdq %xmm8,%xmm13,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,31 ; jne 381e <_sk_store_u16_be_hsw+0xfb>
+ DB 117,31 ; jne 3b12 <_sk_store_u16_be_hsw+0xfb>
DB 196,1,120,17,28,72 ; vmovups %xmm11,(%r8,%r9,2)
DB 196,1,120,17,84,72,16 ; vmovups %xmm10,0x10(%r8,%r9,2)
DB 196,1,120,17,76,72,32 ; vmovups %xmm9,0x20(%r8,%r9,2)
@@ -3573,31 +3720,31 @@ _sk_store_u16_be_hsw LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,1,121,214,28,72 ; vmovq %xmm11,(%r8,%r9,2)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,240 ; je 381a <_sk_store_u16_be_hsw+0xf7>
+ DB 116,240 ; je 3b0e <_sk_store_u16_be_hsw+0xf7>
DB 196,1,121,23,92,72,8 ; vmovhpd %xmm11,0x8(%r8,%r9,2)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,227 ; jb 381a <_sk_store_u16_be_hsw+0xf7>
+ DB 114,227 ; jb 3b0e <_sk_store_u16_be_hsw+0xf7>
DB 196,1,121,214,84,72,16 ; vmovq %xmm10,0x10(%r8,%r9,2)
- DB 116,218 ; je 381a <_sk_store_u16_be_hsw+0xf7>
+ DB 116,218 ; je 3b0e <_sk_store_u16_be_hsw+0xf7>
DB 196,1,121,23,84,72,24 ; vmovhpd %xmm10,0x18(%r8,%r9,2)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,205 ; jb 381a <_sk_store_u16_be_hsw+0xf7>
+ DB 114,205 ; jb 3b0e <_sk_store_u16_be_hsw+0xf7>
DB 196,1,121,214,76,72,32 ; vmovq %xmm9,0x20(%r8,%r9,2)
- DB 116,196 ; je 381a <_sk_store_u16_be_hsw+0xf7>
+ DB 116,196 ; je 3b0e <_sk_store_u16_be_hsw+0xf7>
DB 196,1,121,23,76,72,40 ; vmovhpd %xmm9,0x28(%r8,%r9,2)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,183 ; jb 381a <_sk_store_u16_be_hsw+0xf7>
+ DB 114,183 ; jb 3b0e <_sk_store_u16_be_hsw+0xf7>
DB 196,1,121,214,68,72,48 ; vmovq %xmm8,0x30(%r8,%r9,2)
- DB 235,174 ; jmp 381a <_sk_store_u16_be_hsw+0xf7>
+ DB 235,174 ; jmp 3b0e <_sk_store_u16_be_hsw+0xf7>
PUBLIC _sk_load_f32_hsw
_sk_load_f32_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 119,110 ; ja 38e2 <_sk_load_f32_hsw+0x76>
+ DB 119,110 ; ja 3bd6 <_sk_load_f32_hsw+0x76>
DB 76,139,0 ; mov (%rax),%r8
DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9
- DB 76,141,21,134,0,0,0 ; lea 0x86(%rip),%r10 # 390c <_sk_load_f32_hsw+0xa0>
+ DB 76,141,21,134,0,0,0 ; lea 0x86(%rip),%r10 # 3c00 <_sk_load_f32_hsw+0xa0>
DB 73,99,4,138 ; movslq (%r10,%rcx,4),%rax
DB 76,1,208 ; add %r10,%rax
DB 255,224 ; jmpq *%rax
@@ -3654,7 +3801,7 @@ _sk_store_f32_hsw LABEL PROC
DB 196,65,37,20,196 ; vunpcklpd %ymm12,%ymm11,%ymm8
DB 196,65,37,21,220 ; vunpckhpd %ymm12,%ymm11,%ymm11
DB 72,133,201 ; test %rcx,%rcx
- DB 117,55 ; jne 3999 <_sk_store_f32_hsw+0x6d>
+ DB 117,55 ; jne 3c8d <_sk_store_f32_hsw+0x6d>
DB 196,67,45,24,225,1 ; vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
DB 196,67,61,24,235,1 ; vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
DB 196,67,45,6,201,49 ; vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
@@ -3667,22 +3814,22 @@ _sk_store_f32_hsw LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,65,121,17,20,128 ; vmovupd %xmm10,(%r8,%rax,4)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,240 ; je 3995 <_sk_store_f32_hsw+0x69>
+ DB 116,240 ; je 3c89 <_sk_store_f32_hsw+0x69>
DB 196,65,121,17,76,128,16 ; vmovupd %xmm9,0x10(%r8,%rax,4)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,227 ; jb 3995 <_sk_store_f32_hsw+0x69>
+ DB 114,227 ; jb 3c89 <_sk_store_f32_hsw+0x69>
DB 196,65,121,17,68,128,32 ; vmovupd %xmm8,0x20(%r8,%rax,4)
- DB 116,218 ; je 3995 <_sk_store_f32_hsw+0x69>
+ DB 116,218 ; je 3c89 <_sk_store_f32_hsw+0x69>
DB 196,65,121,17,92,128,48 ; vmovupd %xmm11,0x30(%r8,%rax,4)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,205 ; jb 3995 <_sk_store_f32_hsw+0x69>
+ DB 114,205 ; jb 3c89 <_sk_store_f32_hsw+0x69>
DB 196,67,125,25,84,128,64,1 ; vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4)
- DB 116,195 ; je 3995 <_sk_store_f32_hsw+0x69>
+ DB 116,195 ; je 3c89 <_sk_store_f32_hsw+0x69>
DB 196,67,125,25,76,128,80,1 ; vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,181 ; jb 3995 <_sk_store_f32_hsw+0x69>
+ DB 114,181 ; jb 3c89 <_sk_store_f32_hsw+0x69>
DB 196,67,125,25,68,128,96,1 ; vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4)
- DB 235,171 ; jmp 3995 <_sk_store_f32_hsw+0x69>
+ DB 235,171 ; jmp 3c89 <_sk_store_f32_hsw+0x69>
PUBLIC _sk_clamp_x_hsw
_sk_clamp_x_hsw LABEL PROC
@@ -3923,7 +4070,7 @@ _sk_linear_gradient_hsw LABEL PROC
DB 196,98,125,24,72,28 ; vbroadcastss 0x1c(%rax),%ymm9
DB 76,139,0 ; mov (%rax),%r8
DB 77,133,192 ; test %r8,%r8
- DB 15,132,143,0,0,0 ; je 3e25 <_sk_linear_gradient_hsw+0xb5>
+ DB 15,132,143,0,0,0 ; je 4119 <_sk_linear_gradient_hsw+0xb5>
DB 72,139,64,8 ; mov 0x8(%rax),%rax
DB 72,131,192,32 ; add $0x20,%rax
DB 196,65,28,87,228 ; vxorps %ymm12,%ymm12,%ymm12
@@ -3950,8 +4097,8 @@ _sk_linear_gradient_hsw LABEL PROC
DB 196,67,13,74,201,208 ; vblendvps %ymm13,%ymm9,%ymm14,%ymm9
DB 72,131,192,36 ; add $0x24,%rax
DB 73,255,200 ; dec %r8
- DB 117,140 ; jne 3daf <_sk_linear_gradient_hsw+0x3f>
- DB 235,17 ; jmp 3e36 <_sk_linear_gradient_hsw+0xc6>
+ DB 117,140 ; jne 40a3 <_sk_linear_gradient_hsw+0x3f>
+ DB 235,17 ; jmp 412a <_sk_linear_gradient_hsw+0xc6>
DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1
DB 197,236,87,210 ; vxorps %ymm2,%ymm2,%ymm2
DB 197,228,87,219 ; vxorps %ymm3,%ymm3,%ymm3
@@ -5375,83 +5522,272 @@ _sk_to_srgb_avx LABEL PROC
PUBLIC _sk_from_2dot2_avx
_sk_from_2dot2_avx LABEL PROC
- DB 197,124,82,192 ; vrsqrtps %ymm0,%ymm8
- DB 196,65,124,82,192 ; vrsqrtps %ymm8,%ymm8
- DB 196,65,124,82,192 ; vrsqrtps %ymm8,%ymm8
- DB 196,65,124,82,192 ; vrsqrtps %ymm8,%ymm8
- DB 196,65,124,82,200 ; vrsqrtps %ymm8,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 197,252,89,192 ; vmulps %ymm0,%ymm0,%ymm0
- DB 196,65,60,89,208 ; vmulps %ymm8,%ymm8,%ymm10
- DB 196,65,60,89,194 ; vmulps %ymm10,%ymm8,%ymm8
- DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
- DB 197,180,89,192 ; vmulps %ymm0,%ymm9,%ymm0
- DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
- DB 196,193,124,95,192 ; vmaxps %ymm8,%ymm0,%ymm0
- DB 197,124,82,201 ; vrsqrtps %ymm1,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 196,65,124,82,209 ; vrsqrtps %ymm9,%ymm10
- DB 196,65,124,82,210 ; vrsqrtps %ymm10,%ymm10
- DB 197,244,89,201 ; vmulps %ymm1,%ymm1,%ymm1
- DB 196,65,52,89,217 ; vmulps %ymm9,%ymm9,%ymm11
- DB 196,65,52,89,203 ; vmulps %ymm11,%ymm9,%ymm9
- DB 196,193,116,89,201 ; vmulps %ymm9,%ymm1,%ymm1
- DB 197,172,89,201 ; vmulps %ymm1,%ymm10,%ymm1
- DB 196,193,116,95,200 ; vmaxps %ymm8,%ymm1,%ymm1
- DB 197,124,82,202 ; vrsqrtps %ymm2,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 196,65,124,82,209 ; vrsqrtps %ymm9,%ymm10
- DB 196,65,124,82,210 ; vrsqrtps %ymm10,%ymm10
- DB 197,236,89,210 ; vmulps %ymm2,%ymm2,%ymm2
- DB 196,65,52,89,217 ; vmulps %ymm9,%ymm9,%ymm11
- DB 196,65,52,89,203 ; vmulps %ymm11,%ymm9,%ymm9
- DB 196,193,108,89,209 ; vmulps %ymm9,%ymm2,%ymm2
- DB 197,172,89,210 ; vmulps %ymm2,%ymm10,%ymm2
- DB 196,193,108,95,208 ; vmaxps %ymm8,%ymm2,%ymm2
- DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 72,129,236,216,0,0,0 ; sub $0xd8,%rsp
+ DB 197,252,17,188,36,160,0,0,0 ; vmovups %ymm7,0xa0(%rsp)
+ DB 197,252,17,180,36,128,0,0,0 ; vmovups %ymm6,0x80(%rsp)
+ DB 197,252,17,108,36,96 ; vmovups %ymm5,0x60(%rsp)
+ DB 197,252,17,100,36,64 ; vmovups %ymm4,0x40(%rsp)
+ DB 197,252,17,92,36,32 ; vmovups %ymm3,0x20(%rsp)
+ DB 197,252,17,20,36 ; vmovups %ymm2,(%rsp)
+ DB 197,252,40,241 ; vmovaps %ymm1,%ymm6
+ DB 65,184,205,204,12,64 ; mov $0x400ccccd,%r8d
+ DB 197,252,91,200 ; vcvtdq2ps %ymm0,%ymm1
+ DB 184,0,0,0,52 ; mov $0x34000000,%eax
+ DB 197,249,110,208 ; vmovd %eax,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,99,109,24,194,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm8
+ DB 196,193,116,89,200 ; vmulps %ymm8,%ymm1,%ymm1
+ DB 184,255,255,127,0 ; mov $0x7fffff,%eax
+ DB 197,249,110,208 ; vmovd %eax,%xmm2
+ DB 197,249,112,210,0 ; vpshufd $0x0,%xmm2,%xmm2
+ DB 196,99,109,24,202,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm9
+ DB 197,180,84,192 ; vandps %ymm0,%ymm9,%ymm0
+ DB 184,0,0,0,63 ; mov $0x3f000000,%eax
+ DB 197,249,110,208 ; vmovd %eax,%xmm2
+ DB 197,249,112,210,0 ; vpshufd $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,234,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm5
+ DB 197,252,86,197 ; vorps %ymm5,%ymm0,%ymm0
+ DB 184,119,115,248,66 ; mov $0x42f87377,%eax
+ DB 197,249,110,208 ; vmovd %eax,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,99,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm10
+ DB 196,193,116,92,202 ; vsubps %ymm10,%ymm1,%ymm1
+ DB 184,117,191,191,63 ; mov $0x3fbfbf75,%eax
+ DB 197,249,110,208 ; vmovd %eax,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,99,109,24,218,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm11
+ DB 196,193,124,89,211 ; vmulps %ymm11,%ymm0,%ymm2
+ DB 197,244,92,202 ; vsubps %ymm2,%ymm1,%ymm1
+ DB 184,163,233,220,63 ; mov $0x3fdce9a3,%eax
+ DB 197,249,110,208 ; vmovd %eax,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,99,109,24,226,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm12
+ DB 184,249,68,180,62 ; mov $0x3eb444f9,%eax
+ DB 197,249,110,208 ; vmovd %eax,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,99,109,24,234,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm13
+ DB 196,193,124,88,197 ; vaddps %ymm13,%ymm0,%ymm0
+ DB 197,156,94,192 ; vdivps %ymm0,%ymm12,%ymm0
+ DB 197,244,92,192 ; vsubps %ymm0,%ymm1,%ymm0
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,99,117,24,241,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm14
+ DB 197,140,89,192 ; vmulps %ymm0,%ymm14,%ymm0
+ DB 196,227,125,8,200,1 ; vroundps $0x1,%ymm0,%ymm1
+ DB 197,252,92,225 ; vsubps %ymm1,%ymm0,%ymm4
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 184,81,140,242,66 ; mov $0x42f28c51,%eax
+ DB 197,249,110,200 ; vmovd %eax,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,99,117,24,249,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm15
+ DB 197,132,88,192 ; vaddps %ymm0,%ymm15,%ymm0
+ DB 184,141,188,190,63 ; mov $0x3fbebc8d,%eax
+ DB 197,249,110,200 ; vmovd %eax,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,217,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm3
+ DB 197,228,89,204 ; vmulps %ymm4,%ymm3,%ymm1
+ DB 197,252,92,209 ; vsubps %ymm1,%ymm0,%ymm2
+ DB 184,254,210,221,65 ; mov $0x41ddd2fe,%eax
+ DB 197,249,110,192 ; vmovd %eax,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,200,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm1
+ DB 184,248,245,154,64 ; mov $0x409af5f8,%eax
+ DB 197,249,110,192 ; vmovd %eax,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,252,92,228 ; vsubps %ymm4,%ymm0,%ymm4
+ DB 197,244,94,228 ; vdivps %ymm4,%ymm1,%ymm4
+ DB 197,236,88,228 ; vaddps %ymm4,%ymm2,%ymm4
+ DB 197,252,91,214 ; vcvtdq2ps %ymm6,%ymm2
+ DB 196,193,108,89,208 ; vmulps %ymm8,%ymm2,%ymm2
+ DB 197,180,84,246 ; vandps %ymm6,%ymm9,%ymm6
+ DB 197,204,86,245 ; vorps %ymm5,%ymm6,%ymm6
+ DB 196,193,108,92,210 ; vsubps %ymm10,%ymm2,%ymm2
+ DB 196,193,76,89,251 ; vmulps %ymm11,%ymm6,%ymm7
+ DB 197,236,92,215 ; vsubps %ymm7,%ymm2,%ymm2
+ DB 196,193,76,88,245 ; vaddps %ymm13,%ymm6,%ymm6
+ DB 197,156,94,246 ; vdivps %ymm6,%ymm12,%ymm6
+ DB 197,236,92,214 ; vsubps %ymm6,%ymm2,%ymm2
+ DB 197,140,89,210 ; vmulps %ymm2,%ymm14,%ymm2
+ DB 196,227,125,8,242,1 ; vroundps $0x1,%ymm2,%ymm6
+ DB 197,236,92,246 ; vsubps %ymm6,%ymm2,%ymm6
+ DB 197,132,88,210 ; vaddps %ymm2,%ymm15,%ymm2
+ DB 197,228,89,254 ; vmulps %ymm6,%ymm3,%ymm7
+ DB 197,236,92,215 ; vsubps %ymm7,%ymm2,%ymm2
+ DB 197,252,92,246 ; vsubps %ymm6,%ymm0,%ymm6
+ DB 197,244,94,246 ; vdivps %ymm6,%ymm1,%ymm6
+ DB 197,236,88,214 ; vaddps %ymm6,%ymm2,%ymm2
+ DB 197,252,16,60,36 ; vmovups (%rsp),%ymm7
+ DB 197,252,91,247 ; vcvtdq2ps %ymm7,%ymm6
+ DB 196,193,76,89,240 ; vmulps %ymm8,%ymm6,%ymm6
+ DB 197,180,84,255 ; vandps %ymm7,%ymm9,%ymm7
+ DB 197,196,86,237 ; vorps %ymm5,%ymm7,%ymm5
+ DB 196,193,76,92,242 ; vsubps %ymm10,%ymm6,%ymm6
+ DB 196,193,84,89,251 ; vmulps %ymm11,%ymm5,%ymm7
+ DB 197,204,92,247 ; vsubps %ymm7,%ymm6,%ymm6
+ DB 196,193,84,88,237 ; vaddps %ymm13,%ymm5,%ymm5
+ DB 197,156,94,237 ; vdivps %ymm5,%ymm12,%ymm5
+ DB 197,204,92,237 ; vsubps %ymm5,%ymm6,%ymm5
+ DB 197,140,89,237 ; vmulps %ymm5,%ymm14,%ymm5
+ DB 196,227,125,8,245,1 ; vroundps $0x1,%ymm5,%ymm6
+ DB 197,212,92,246 ; vsubps %ymm6,%ymm5,%ymm6
+ DB 197,132,88,237 ; vaddps %ymm5,%ymm15,%ymm5
+ DB 197,228,89,222 ; vmulps %ymm6,%ymm3,%ymm3
+ DB 197,212,92,219 ; vsubps %ymm3,%ymm5,%ymm3
+ DB 197,252,92,198 ; vsubps %ymm6,%ymm0,%ymm0
+ DB 197,244,94,192 ; vdivps %ymm0,%ymm1,%ymm0
+ DB 197,228,88,192 ; vaddps %ymm0,%ymm3,%ymm0
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 197,244,89,220 ; vmulps %ymm4,%ymm1,%ymm3
+ DB 197,244,89,210 ; vmulps %ymm2,%ymm1,%ymm2
+ DB 197,244,89,224 ; vmulps %ymm0,%ymm1,%ymm4
+ DB 197,253,91,195 ; vcvtps2dq %ymm3,%ymm0
+ DB 197,253,91,202 ; vcvtps2dq %ymm2,%ymm1
+ DB 197,253,91,212 ; vcvtps2dq %ymm4,%ymm2
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 197,252,16,92,36,32 ; vmovups 0x20(%rsp),%ymm3
+ DB 197,252,16,100,36,64 ; vmovups 0x40(%rsp),%ymm4
+ DB 197,252,16,108,36,96 ; vmovups 0x60(%rsp),%ymm5
+ DB 197,252,16,180,36,128,0,0,0 ; vmovups 0x80(%rsp),%ymm6
+ DB 197,252,16,188,36,160,0,0,0 ; vmovups 0xa0(%rsp),%ymm7
+ DB 72,129,196,216,0,0,0 ; add $0xd8,%rsp
DB 255,224 ; jmpq *%rax
PUBLIC _sk_to_2dot2_avx
_sk_to_2dot2_avx LABEL PROC
- DB 197,252,82,192 ; vrsqrtps %ymm0,%ymm0
- DB 197,124,82,192 ; vrsqrtps %ymm0,%ymm8
- DB 196,65,124,82,192 ; vrsqrtps %ymm8,%ymm8
- DB 196,65,124,82,192 ; vrsqrtps %ymm8,%ymm8
- DB 196,65,124,82,192 ; vrsqrtps %ymm8,%ymm8
- DB 196,65,124,82,200 ; vrsqrtps %ymm8,%ymm9
- DB 197,252,83,192 ; vrcpps %ymm0,%ymm0
- DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0
- DB 196,65,124,83,193 ; vrcpps %ymm9,%ymm8
- DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
- DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
- DB 196,193,124,95,192 ; vmaxps %ymm8,%ymm0,%ymm0
- DB 197,252,82,201 ; vrsqrtps %ymm1,%ymm1
- DB 197,124,82,201 ; vrsqrtps %ymm1,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 196,65,124,82,209 ; vrsqrtps %ymm9,%ymm10
- DB 197,252,83,201 ; vrcpps %ymm1,%ymm1
- DB 197,180,89,201 ; vmulps %ymm1,%ymm9,%ymm1
- DB 196,65,124,83,202 ; vrcpps %ymm10,%ymm9
- DB 196,193,116,89,201 ; vmulps %ymm9,%ymm1,%ymm1
- DB 196,193,116,95,200 ; vmaxps %ymm8,%ymm1,%ymm1
- DB 197,252,82,210 ; vrsqrtps %ymm2,%ymm2
- DB 197,124,82,202 ; vrsqrtps %ymm2,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9
- DB 196,65,124,82,209 ; vrsqrtps %ymm9,%ymm10
- DB 197,252,83,210 ; vrcpps %ymm2,%ymm2
- DB 197,180,89,210 ; vmulps %ymm2,%ymm9,%ymm2
- DB 196,65,124,83,202 ; vrcpps %ymm10,%ymm9
- DB 196,193,108,89,209 ; vmulps %ymm9,%ymm2,%ymm2
- DB 196,193,108,95,208 ; vmaxps %ymm8,%ymm2,%ymm2
- DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 72,129,236,216,0,0,0 ; sub $0xd8,%rsp
+ DB 197,252,17,188,36,160,0,0,0 ; vmovups %ymm7,0xa0(%rsp)
+ DB 197,252,17,180,36,128,0,0,0 ; vmovups %ymm6,0x80(%rsp)
+ DB 197,252,17,108,36,96 ; vmovups %ymm5,0x60(%rsp)
+ DB 197,252,17,100,36,64 ; vmovups %ymm4,0x40(%rsp)
+ DB 197,252,17,92,36,32 ; vmovups %ymm3,0x20(%rsp)
+ DB 197,252,17,20,36 ; vmovups %ymm2,(%rsp)
+ DB 197,252,40,241 ; vmovaps %ymm1,%ymm6
+ DB 65,184,46,186,232,62 ; mov $0x3ee8ba2e,%r8d
+ DB 197,252,91,200 ; vcvtdq2ps %ymm0,%ymm1
+ DB 184,0,0,0,52 ; mov $0x34000000,%eax
+ DB 197,249,110,208 ; vmovd %eax,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,99,109,24,194,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm8
+ DB 196,193,116,89,200 ; vmulps %ymm8,%ymm1,%ymm1
+ DB 184,255,255,127,0 ; mov $0x7fffff,%eax
+ DB 197,249,110,208 ; vmovd %eax,%xmm2
+ DB 197,249,112,210,0 ; vpshufd $0x0,%xmm2,%xmm2
+ DB 196,99,109,24,202,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm9
+ DB 197,180,84,192 ; vandps %ymm0,%ymm9,%ymm0
+ DB 184,0,0,0,63 ; mov $0x3f000000,%eax
+ DB 197,249,110,208 ; vmovd %eax,%xmm2
+ DB 197,249,112,210,0 ; vpshufd $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,234,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm5
+ DB 197,252,86,197 ; vorps %ymm5,%ymm0,%ymm0
+ DB 184,119,115,248,66 ; mov $0x42f87377,%eax
+ DB 197,249,110,208 ; vmovd %eax,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,99,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm10
+ DB 196,193,116,92,202 ; vsubps %ymm10,%ymm1,%ymm1
+ DB 184,117,191,191,63 ; mov $0x3fbfbf75,%eax
+ DB 197,249,110,208 ; vmovd %eax,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,99,109,24,218,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm11
+ DB 196,193,124,89,211 ; vmulps %ymm11,%ymm0,%ymm2
+ DB 197,244,92,202 ; vsubps %ymm2,%ymm1,%ymm1
+ DB 184,163,233,220,63 ; mov $0x3fdce9a3,%eax
+ DB 197,249,110,208 ; vmovd %eax,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,99,109,24,226,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm12
+ DB 184,249,68,180,62 ; mov $0x3eb444f9,%eax
+ DB 197,249,110,208 ; vmovd %eax,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,99,109,24,234,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm13
+ DB 196,193,124,88,197 ; vaddps %ymm13,%ymm0,%ymm0
+ DB 197,156,94,192 ; vdivps %ymm0,%ymm12,%ymm0
+ DB 197,244,92,192 ; vsubps %ymm0,%ymm1,%ymm0
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,99,117,24,241,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm14
+ DB 197,140,89,192 ; vmulps %ymm0,%ymm14,%ymm0
+ DB 196,227,125,8,200,1 ; vroundps $0x1,%ymm0,%ymm1
+ DB 197,252,92,225 ; vsubps %ymm1,%ymm0,%ymm4
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 184,81,140,242,66 ; mov $0x42f28c51,%eax
+ DB 197,249,110,200 ; vmovd %eax,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,99,117,24,249,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm15
+ DB 197,132,88,192 ; vaddps %ymm0,%ymm15,%ymm0
+ DB 184,141,188,190,63 ; mov $0x3fbebc8d,%eax
+ DB 197,249,110,200 ; vmovd %eax,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,217,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm3
+ DB 197,228,89,204 ; vmulps %ymm4,%ymm3,%ymm1
+ DB 197,252,92,209 ; vsubps %ymm1,%ymm0,%ymm2
+ DB 184,254,210,221,65 ; mov $0x41ddd2fe,%eax
+ DB 197,249,110,192 ; vmovd %eax,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,200,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm1
+ DB 184,248,245,154,64 ; mov $0x409af5f8,%eax
+ DB 197,249,110,192 ; vmovd %eax,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,252,92,228 ; vsubps %ymm4,%ymm0,%ymm4
+ DB 197,244,94,228 ; vdivps %ymm4,%ymm1,%ymm4
+ DB 197,236,88,228 ; vaddps %ymm4,%ymm2,%ymm4
+ DB 197,252,91,214 ; vcvtdq2ps %ymm6,%ymm2
+ DB 196,193,108,89,208 ; vmulps %ymm8,%ymm2,%ymm2
+ DB 197,180,84,246 ; vandps %ymm6,%ymm9,%ymm6
+ DB 197,204,86,245 ; vorps %ymm5,%ymm6,%ymm6
+ DB 196,193,108,92,210 ; vsubps %ymm10,%ymm2,%ymm2
+ DB 196,193,76,89,251 ; vmulps %ymm11,%ymm6,%ymm7
+ DB 197,236,92,215 ; vsubps %ymm7,%ymm2,%ymm2
+ DB 196,193,76,88,245 ; vaddps %ymm13,%ymm6,%ymm6
+ DB 197,156,94,246 ; vdivps %ymm6,%ymm12,%ymm6
+ DB 197,236,92,214 ; vsubps %ymm6,%ymm2,%ymm2
+ DB 197,140,89,210 ; vmulps %ymm2,%ymm14,%ymm2
+ DB 196,227,125,8,242,1 ; vroundps $0x1,%ymm2,%ymm6
+ DB 197,236,92,246 ; vsubps %ymm6,%ymm2,%ymm6
+ DB 197,132,88,210 ; vaddps %ymm2,%ymm15,%ymm2
+ DB 197,228,89,254 ; vmulps %ymm6,%ymm3,%ymm7
+ DB 197,236,92,215 ; vsubps %ymm7,%ymm2,%ymm2
+ DB 197,252,92,246 ; vsubps %ymm6,%ymm0,%ymm6
+ DB 197,244,94,246 ; vdivps %ymm6,%ymm1,%ymm6
+ DB 197,236,88,214 ; vaddps %ymm6,%ymm2,%ymm2
+ DB 197,252,16,60,36 ; vmovups (%rsp),%ymm7
+ DB 197,252,91,247 ; vcvtdq2ps %ymm7,%ymm6
+ DB 196,193,76,89,240 ; vmulps %ymm8,%ymm6,%ymm6
+ DB 197,180,84,255 ; vandps %ymm7,%ymm9,%ymm7
+ DB 197,196,86,237 ; vorps %ymm5,%ymm7,%ymm5
+ DB 196,193,76,92,242 ; vsubps %ymm10,%ymm6,%ymm6
+ DB 196,193,84,89,251 ; vmulps %ymm11,%ymm5,%ymm7
+ DB 197,204,92,247 ; vsubps %ymm7,%ymm6,%ymm6
+ DB 196,193,84,88,237 ; vaddps %ymm13,%ymm5,%ymm5
+ DB 197,156,94,237 ; vdivps %ymm5,%ymm12,%ymm5
+ DB 197,204,92,237 ; vsubps %ymm5,%ymm6,%ymm5
+ DB 197,140,89,237 ; vmulps %ymm5,%ymm14,%ymm5
+ DB 196,227,125,8,245,1 ; vroundps $0x1,%ymm5,%ymm6
+ DB 197,212,92,246 ; vsubps %ymm6,%ymm5,%ymm6
+ DB 197,132,88,237 ; vaddps %ymm5,%ymm15,%ymm5
+ DB 197,228,89,222 ; vmulps %ymm6,%ymm3,%ymm3
+ DB 197,212,92,219 ; vsubps %ymm3,%ymm5,%ymm3
+ DB 197,252,92,198 ; vsubps %ymm6,%ymm0,%ymm0
+ DB 197,244,94,192 ; vdivps %ymm0,%ymm1,%ymm0
+ DB 197,228,88,192 ; vaddps %ymm0,%ymm3,%ymm0
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 197,244,89,220 ; vmulps %ymm4,%ymm1,%ymm3
+ DB 197,244,89,210 ; vmulps %ymm2,%ymm1,%ymm2
+ DB 197,244,89,224 ; vmulps %ymm0,%ymm1,%ymm4
+ DB 197,253,91,195 ; vcvtps2dq %ymm3,%ymm0
+ DB 197,253,91,202 ; vcvtps2dq %ymm2,%ymm1
+ DB 197,253,91,212 ; vcvtps2dq %ymm4,%ymm2
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 197,252,16,92,36,32 ; vmovups 0x20(%rsp),%ymm3
+ DB 197,252,16,100,36,64 ; vmovups 0x40(%rsp),%ymm4
+ DB 197,252,16,108,36,96 ; vmovups 0x60(%rsp),%ymm5
+ DB 197,252,16,180,36,128,0,0,0 ; vmovups 0x80(%rsp),%ymm6
+ DB 197,252,16,188,36,160,0,0,0 ; vmovups 0xa0(%rsp),%ymm7
+ DB 72,129,196,216,0,0,0 ; add $0xd8,%rsp
DB 255,224 ; jmpq *%rax
PUBLIC _sk_rgb_to_hsl_avx
@@ -5655,7 +5991,7 @@ _sk_scale_u8_avx LABEL PROC
DB 72,139,0 ; mov (%rax),%rax
DB 72,1,248 ; add %rdi,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,80 ; jne 14ff <_sk_scale_u8_avx+0x60>
+ DB 117,80 ; jne 18bb <_sk_scale_u8_avx+0x60>
DB 197,122,126,0 ; vmovq (%rax),%xmm8
DB 196,66,121,49,200 ; vpmovzxbd %xmm8,%xmm9
DB 196,67,121,4,192,229 ; vpermilps $0xe5,%xmm8,%xmm8
@@ -5683,9 +6019,9 @@ _sk_scale_u8_avx LABEL PROC
DB 77,9,217 ; or %r11,%r9
DB 72,131,193,8 ; add $0x8,%rcx
DB 73,255,202 ; dec %r10
- DB 117,234 ; jne 1507 <_sk_scale_u8_avx+0x68>
+ DB 117,234 ; jne 18c3 <_sk_scale_u8_avx+0x68>
DB 196,65,249,110,193 ; vmovq %r9,%xmm8
- DB 235,143 ; jmp 14b3 <_sk_scale_u8_avx+0x14>
+ DB 235,143 ; jmp 186f <_sk_scale_u8_avx+0x14>
PUBLIC _sk_lerp_1_float_avx
_sk_lerp_1_float_avx LABEL PROC
@@ -5713,7 +6049,7 @@ _sk_lerp_u8_avx LABEL PROC
DB 72,139,0 ; mov (%rax),%rax
DB 72,1,248 ; add %rdi,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,116 ; jne 15e7 <_sk_lerp_u8_avx+0x84>
+ DB 117,116 ; jne 19a3 <_sk_lerp_u8_avx+0x84>
DB 197,122,126,0 ; vmovq (%rax),%xmm8
DB 196,66,121,49,200 ; vpmovzxbd %xmm8,%xmm9
DB 196,67,121,4,192,229 ; vpermilps $0xe5,%xmm8,%xmm8
@@ -5749,16 +6085,16 @@ _sk_lerp_u8_avx LABEL PROC
DB 77,9,217 ; or %r11,%r9
DB 72,131,193,8 ; add $0x8,%rcx
DB 73,255,202 ; dec %r10
- DB 117,234 ; jne 15ef <_sk_lerp_u8_avx+0x8c>
+ DB 117,234 ; jne 19ab <_sk_lerp_u8_avx+0x8c>
DB 196,65,249,110,193 ; vmovq %r9,%xmm8
- DB 233,104,255,255,255 ; jmpq 1577 <_sk_lerp_u8_avx+0x14>
+ DB 233,104,255,255,255 ; jmpq 1933 <_sk_lerp_u8_avx+0x14>
PUBLIC _sk_lerp_565_avx
_sk_lerp_565_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,250,0,0,0 ; jne 1717 <_sk_lerp_565_avx+0x108>
+ DB 15,133,250,0,0,0 ; jne 1ad3 <_sk_lerp_565_avx+0x108>
DB 196,65,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm8
DB 197,225,239,219 ; vpxor %xmm3,%xmm3,%xmm3
DB 197,185,105,219 ; vpunpckhwd %xmm3,%xmm8,%xmm3
@@ -5817,9 +6153,9 @@ _sk_lerp_565_avx LABEL PROC
DB 196,65,57,239,192 ; vpxor %xmm8,%xmm8,%xmm8
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,243,254,255,255 ; ja 1623 <_sk_lerp_565_avx+0x14>
+ DB 15,135,243,254,255,255 ; ja 19df <_sk_lerp_565_avx+0x14>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,73,0,0,0 ; lea 0x49(%rip),%r9 # 1784 <_sk_lerp_565_avx+0x175>
+ DB 76,141,13,73,0,0,0 ; lea 0x49(%rip),%r9 # 1b40 <_sk_lerp_565_avx+0x175>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -5831,7 +6167,7 @@ _sk_lerp_565_avx LABEL PROC
DB 196,65,57,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm8,%xmm8
DB 196,65,57,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm8,%xmm8
DB 196,65,57,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm8,%xmm8
- DB 233,159,254,255,255 ; jmpq 1623 <_sk_lerp_565_avx+0x14>
+ DB 233,159,254,255,255 ; jmpq 19df <_sk_lerp_565_avx+0x14>
DB 244 ; hlt
DB 255 ; (bad)
DB 255 ; (bad)
@@ -5867,7 +6203,7 @@ _sk_load_tables_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,0 ; mov (%rax),%r8
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,56,2,0,0 ; jne 19f0 <_sk_load_tables_avx+0x250>
+ DB 15,133,56,2,0,0 ; jne 1dac <_sk_load_tables_avx+0x250>
DB 196,65,124,16,4,184 ; vmovups (%r8,%rdi,4),%ymm8
DB 187,255,0,0,0 ; mov $0xff,%ebx
DB 197,249,110,195 ; vmovd %ebx,%xmm0
@@ -5986,9 +6322,9 @@ _sk_load_tables_avx LABEL PROC
DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
DB 254,203 ; dec %bl
DB 128,251,6 ; cmp $0x6,%bl
- DB 15,135,185,253,255,255 ; ja 17be <_sk_load_tables_avx+0x1e>
+ DB 15,135,185,253,255,255 ; ja 1b7a <_sk_load_tables_avx+0x1e>
DB 15,182,219 ; movzbl %bl,%ebx
- DB 76,141,13,137,0,0,0 ; lea 0x89(%rip),%r9 # 1a98 <_sk_load_tables_avx+0x2f8>
+ DB 76,141,13,137,0,0,0 ; lea 0x89(%rip),%r9 # 1e54 <_sk_load_tables_avx+0x2f8>
DB 73,99,28,153 ; movslq (%r9,%rbx,4),%rbx
DB 76,1,203 ; add %r9,%rbx
DB 255,227 ; jmpq *%rbx
@@ -6011,7 +6347,7 @@ _sk_load_tables_avx LABEL PROC
DB 196,99,61,12,192,15 ; vblendps $0xf,%ymm0,%ymm8,%ymm8
DB 196,195,57,34,4,184,0 ; vpinsrd $0x0,(%r8,%rdi,4),%xmm8,%xmm0
DB 196,99,61,12,192,15 ; vblendps $0xf,%ymm0,%ymm8,%ymm8
- DB 233,38,253,255,255 ; jmpq 17be <_sk_load_tables_avx+0x1e>
+ DB 233,38,253,255,255 ; jmpq 1b7a <_sk_load_tables_avx+0x1e>
DB 238 ; out %al,(%dx)
DB 255 ; (bad)
DB 255 ; (bad)
@@ -6037,7 +6373,7 @@ _sk_load_tables_u16_be_avx LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,165,2,0,0 ; jne 1d6f <_sk_load_tables_u16_be_avx+0x2bb>
+ DB 15,133,165,2,0,0 ; jne 212b <_sk_load_tables_u16_be_avx+0x2bb>
DB 196,1,121,16,4,72 ; vmovupd (%r8,%r9,2),%xmm8
DB 196,129,121,16,84,72,16 ; vmovupd 0x10(%r8,%r9,2),%xmm2
DB 196,129,121,16,92,72,32 ; vmovupd 0x20(%r8,%r9,2),%xmm3
@@ -6181,29 +6517,29 @@ _sk_load_tables_u16_be_avx LABEL PROC
DB 196,1,123,16,4,72 ; vmovsd (%r8,%r9,2),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,85 ; je 1dd5 <_sk_load_tables_u16_be_avx+0x321>
+ DB 116,85 ; je 2191 <_sk_load_tables_u16_be_avx+0x321>
DB 196,1,57,22,68,72,8 ; vmovhpd 0x8(%r8,%r9,2),%xmm8,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,72 ; jb 1dd5 <_sk_load_tables_u16_be_avx+0x321>
+ DB 114,72 ; jb 2191 <_sk_load_tables_u16_be_avx+0x321>
DB 196,129,123,16,84,72,16 ; vmovsd 0x10(%r8,%r9,2),%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 116,72 ; je 1de2 <_sk_load_tables_u16_be_avx+0x32e>
+ DB 116,72 ; je 219e <_sk_load_tables_u16_be_avx+0x32e>
DB 196,129,105,22,84,72,24 ; vmovhpd 0x18(%r8,%r9,2),%xmm2,%xmm2
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,59 ; jb 1de2 <_sk_load_tables_u16_be_avx+0x32e>
+ DB 114,59 ; jb 219e <_sk_load_tables_u16_be_avx+0x32e>
DB 196,129,123,16,92,72,32 ; vmovsd 0x20(%r8,%r9,2),%xmm3
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 15,132,45,253,255,255 ; je 1ae5 <_sk_load_tables_u16_be_avx+0x31>
+ DB 15,132,45,253,255,255 ; je 1ea1 <_sk_load_tables_u16_be_avx+0x31>
DB 196,129,97,22,92,72,40 ; vmovhpd 0x28(%r8,%r9,2),%xmm3,%xmm3
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 15,130,28,253,255,255 ; jb 1ae5 <_sk_load_tables_u16_be_avx+0x31>
+ DB 15,130,28,253,255,255 ; jb 1ea1 <_sk_load_tables_u16_be_avx+0x31>
DB 196,1,122,126,76,72,48 ; vmovq 0x30(%r8,%r9,2),%xmm9
- DB 233,16,253,255,255 ; jmpq 1ae5 <_sk_load_tables_u16_be_avx+0x31>
+ DB 233,16,253,255,255 ; jmpq 1ea1 <_sk_load_tables_u16_be_avx+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,3,253,255,255 ; jmpq 1ae5 <_sk_load_tables_u16_be_avx+0x31>
+ DB 233,3,253,255,255 ; jmpq 1ea1 <_sk_load_tables_u16_be_avx+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
- DB 233,250,252,255,255 ; jmpq 1ae5 <_sk_load_tables_u16_be_avx+0x31>
+ DB 233,250,252,255,255 ; jmpq 1ea1 <_sk_load_tables_u16_be_avx+0x31>
PUBLIC _sk_load_tables_rgb_u16_be_avx
_sk_load_tables_rgb_u16_be_avx LABEL PROC
@@ -6211,7 +6547,7 @@ _sk_load_tables_rgb_u16_be_avx LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 76,141,12,127 ; lea (%rdi,%rdi,2),%r9
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,146,2,0,0 ; jne 208f <_sk_load_tables_rgb_u16_be_avx+0x2a4>
+ DB 15,133,146,2,0,0 ; jne 244b <_sk_load_tables_rgb_u16_be_avx+0x2a4>
DB 196,129,122,111,4,72 ; vmovdqu (%r8,%r9,2),%xmm0
DB 196,129,122,111,84,72,12 ; vmovdqu 0xc(%r8,%r9,2),%xmm2
DB 196,129,122,111,76,72,24 ; vmovdqu 0x18(%r8,%r9,2),%xmm1
@@ -6351,36 +6687,36 @@ _sk_load_tables_rgb_u16_be_avx LABEL PROC
DB 196,129,121,110,4,72 ; vmovd (%r8,%r9,2),%xmm0
DB 196,129,121,196,68,72,4,2 ; vpinsrw $0x2,0x4(%r8,%r9,2),%xmm0,%xmm0
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 117,5 ; jne 20a8 <_sk_load_tables_rgb_u16_be_avx+0x2bd>
- DB 233,137,253,255,255 ; jmpq 1e31 <_sk_load_tables_rgb_u16_be_avx+0x46>
+ DB 117,5 ; jne 2464 <_sk_load_tables_rgb_u16_be_avx+0x2bd>
+ DB 233,137,253,255,255 ; jmpq 21ed <_sk_load_tables_rgb_u16_be_avx+0x46>
DB 196,129,121,110,76,72,6 ; vmovd 0x6(%r8,%r9,2),%xmm1
DB 196,1,113,196,68,72,10,2 ; vpinsrw $0x2,0xa(%r8,%r9,2),%xmm1,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,26 ; jb 20d7 <_sk_load_tables_rgb_u16_be_avx+0x2ec>
+ DB 114,26 ; jb 2493 <_sk_load_tables_rgb_u16_be_avx+0x2ec>
DB 196,129,121,110,76,72,12 ; vmovd 0xc(%r8,%r9,2),%xmm1
DB 196,129,113,196,84,72,16,2 ; vpinsrw $0x2,0x10(%r8,%r9,2),%xmm1,%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 117,10 ; jne 20dc <_sk_load_tables_rgb_u16_be_avx+0x2f1>
- DB 233,90,253,255,255 ; jmpq 1e31 <_sk_load_tables_rgb_u16_be_avx+0x46>
- DB 233,85,253,255,255 ; jmpq 1e31 <_sk_load_tables_rgb_u16_be_avx+0x46>
+ DB 117,10 ; jne 2498 <_sk_load_tables_rgb_u16_be_avx+0x2f1>
+ DB 233,90,253,255,255 ; jmpq 21ed <_sk_load_tables_rgb_u16_be_avx+0x46>
+ DB 233,85,253,255,255 ; jmpq 21ed <_sk_load_tables_rgb_u16_be_avx+0x46>
DB 196,129,121,110,76,72,18 ; vmovd 0x12(%r8,%r9,2),%xmm1
DB 196,1,113,196,76,72,22,2 ; vpinsrw $0x2,0x16(%r8,%r9,2),%xmm1,%xmm9
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,26 ; jb 210b <_sk_load_tables_rgb_u16_be_avx+0x320>
+ DB 114,26 ; jb 24c7 <_sk_load_tables_rgb_u16_be_avx+0x320>
DB 196,129,121,110,76,72,24 ; vmovd 0x18(%r8,%r9,2),%xmm1
DB 196,129,113,196,76,72,28,2 ; vpinsrw $0x2,0x1c(%r8,%r9,2),%xmm1,%xmm1
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 117,10 ; jne 2110 <_sk_load_tables_rgb_u16_be_avx+0x325>
- DB 233,38,253,255,255 ; jmpq 1e31 <_sk_load_tables_rgb_u16_be_avx+0x46>
- DB 233,33,253,255,255 ; jmpq 1e31 <_sk_load_tables_rgb_u16_be_avx+0x46>
+ DB 117,10 ; jne 24cc <_sk_load_tables_rgb_u16_be_avx+0x325>
+ DB 233,38,253,255,255 ; jmpq 21ed <_sk_load_tables_rgb_u16_be_avx+0x46>
+ DB 233,33,253,255,255 ; jmpq 21ed <_sk_load_tables_rgb_u16_be_avx+0x46>
DB 196,129,121,110,92,72,30 ; vmovd 0x1e(%r8,%r9,2),%xmm3
DB 196,1,97,196,92,72,34,2 ; vpinsrw $0x2,0x22(%r8,%r9,2),%xmm3,%xmm11
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,20 ; jb 2139 <_sk_load_tables_rgb_u16_be_avx+0x34e>
+ DB 114,20 ; jb 24f5 <_sk_load_tables_rgb_u16_be_avx+0x34e>
DB 196,129,121,110,92,72,36 ; vmovd 0x24(%r8,%r9,2),%xmm3
DB 196,129,97,196,92,72,40,2 ; vpinsrw $0x2,0x28(%r8,%r9,2),%xmm3,%xmm3
- DB 233,248,252,255,255 ; jmpq 1e31 <_sk_load_tables_rgb_u16_be_avx+0x46>
- DB 233,243,252,255,255 ; jmpq 1e31 <_sk_load_tables_rgb_u16_be_avx+0x46>
+ DB 233,248,252,255,255 ; jmpq 21ed <_sk_load_tables_rgb_u16_be_avx+0x46>
+ DB 233,243,252,255,255 ; jmpq 21ed <_sk_load_tables_rgb_u16_be_avx+0x46>
PUBLIC _sk_byte_tables_avx
_sk_byte_tables_avx LABEL PROC
@@ -7344,7 +7680,7 @@ _sk_load_a8_avx LABEL PROC
DB 72,139,0 ; mov (%rax),%rax
DB 72,1,248 ; add %rdi,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,74 ; jne 32d0 <_sk_load_a8_avx+0x5a>
+ DB 117,74 ; jne 368c <_sk_load_a8_avx+0x5a>
DB 197,250,126,0 ; vmovq (%rax),%xmm0
DB 196,226,121,49,200 ; vpmovzxbd %xmm0,%xmm1
DB 196,227,121,4,192,229 ; vpermilps $0xe5,%xmm0,%xmm0
@@ -7371,9 +7707,9 @@ _sk_load_a8_avx LABEL PROC
DB 77,9,217 ; or %r11,%r9
DB 72,131,193,8 ; add $0x8,%rcx
DB 73,255,202 ; dec %r10
- DB 117,234 ; jne 32d8 <_sk_load_a8_avx+0x62>
+ DB 117,234 ; jne 3694 <_sk_load_a8_avx+0x62>
DB 196,193,249,110,193 ; vmovq %r9,%xmm0
- DB 235,149 ; jmp 328a <_sk_load_a8_avx+0x14>
+ DB 235,149 ; jmp 3646 <_sk_load_a8_avx+0x14>
PUBLIC _sk_gather_a8_avx
_sk_gather_a8_avx LABEL PROC
@@ -7450,7 +7786,7 @@ _sk_store_a8_avx LABEL PROC
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 196,65,57,103,192 ; vpackuswb %xmm8,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 3431 <_sk_store_a8_avx+0x42>
+ DB 117,10 ; jne 37ed <_sk_store_a8_avx+0x42>
DB 196,65,123,17,4,57 ; vmovsd %xmm8,(%r9,%rdi,1)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -7458,10 +7794,10 @@ _sk_store_a8_avx LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 342d <_sk_store_a8_avx+0x3e>
+ DB 119,236 ; ja 37e9 <_sk_store_a8_avx+0x3e>
DB 196,66,121,48,192 ; vpmovzxbw %xmm8,%xmm8
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,67,0,0,0 ; lea 0x43(%rip),%r8 # 3494 <_sk_store_a8_avx+0xa5>
+ DB 76,141,5,67,0,0,0 ; lea 0x43(%rip),%r8 # 3850 <_sk_store_a8_avx+0xa5>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -7472,7 +7808,7 @@ _sk_store_a8_avx LABEL PROC
DB 196,67,121,20,68,57,2,4 ; vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1)
DB 196,67,121,20,68,57,1,2 ; vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1)
DB 196,67,121,20,4,57,0 ; vpextrb $0x0,%xmm8,(%r9,%rdi,1)
- DB 235,154 ; jmp 342d <_sk_store_a8_avx+0x3e>
+ DB 235,154 ; jmp 37e9 <_sk_store_a8_avx+0x3e>
DB 144 ; nop
DB 246,255 ; idiv %bh
DB 255 ; (bad)
@@ -7504,7 +7840,7 @@ _sk_load_g8_avx LABEL PROC
DB 72,139,0 ; mov (%rax),%rax
DB 72,1,248 ; add %rdi,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,91 ; jne 351b <_sk_load_g8_avx+0x6b>
+ DB 117,91 ; jne 38d7 <_sk_load_g8_avx+0x6b>
DB 197,250,126,0 ; vmovq (%rax),%xmm0
DB 196,226,121,49,200 ; vpmovzxbd %xmm0,%xmm1
DB 196,227,121,4,192,229 ; vpermilps $0xe5,%xmm0,%xmm0
@@ -7534,9 +7870,9 @@ _sk_load_g8_avx LABEL PROC
DB 77,9,217 ; or %r11,%r9
DB 72,131,193,8 ; add $0x8,%rcx
DB 73,255,202 ; dec %r10
- DB 117,234 ; jne 3523 <_sk_load_g8_avx+0x73>
+ DB 117,234 ; jne 38df <_sk_load_g8_avx+0x73>
DB 196,193,249,110,193 ; vmovq %r9,%xmm0
- DB 235,132 ; jmp 34c4 <_sk_load_g8_avx+0x14>
+ DB 235,132 ; jmp 3880 <_sk_load_g8_avx+0x14>
PUBLIC _sk_gather_g8_avx
_sk_gather_g8_avx LABEL PROC
@@ -7607,9 +7943,9 @@ _sk_gather_i8_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 73,137,192 ; mov %rax,%r8
DB 77,133,192 ; test %r8,%r8
- DB 116,5 ; je 365a <_sk_gather_i8_avx+0xf>
+ DB 116,5 ; je 3a16 <_sk_gather_i8_avx+0xf>
DB 76,137,192 ; mov %r8,%rax
- DB 235,2 ; jmp 365c <_sk_gather_i8_avx+0x11>
+ DB 235,2 ; jmp 3a18 <_sk_gather_i8_avx+0x11>
DB 72,173 ; lods %ds:(%rsi),%rax
DB 65,87 ; push %r15
DB 65,86 ; push %r14
@@ -7712,7 +8048,7 @@ _sk_load_565_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,209,0,0,0 ; jne 38f6 <_sk_load_565_avx+0xdf>
+ DB 15,133,209,0,0,0 ; jne 3cb2 <_sk_load_565_avx+0xdf>
DB 196,193,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm0
DB 197,241,239,201 ; vpxor %xmm1,%xmm1,%xmm1
DB 197,249,105,201 ; vpunpckhwd %xmm1,%xmm0,%xmm1
@@ -7762,9 +8098,9 @@ _sk_load_565_avx LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,29,255,255,255 ; ja 382b <_sk_load_565_avx+0x14>
+ DB 15,135,29,255,255,255 ; ja 3be7 <_sk_load_565_avx+0x14>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,75,0,0,0 ; lea 0x4b(%rip),%r9 # 3964 <_sk_load_565_avx+0x14d>
+ DB 76,141,13,75,0,0,0 ; lea 0x4b(%rip),%r9 # 3d20 <_sk_load_565_avx+0x14d>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -7776,7 +8112,7 @@ _sk_load_565_avx LABEL PROC
DB 196,193,121,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- DB 233,201,254,255,255 ; jmpq 382b <_sk_load_565_avx+0x14>
+ DB 233,201,254,255,255 ; jmpq 3be7 <_sk_load_565_avx+0x14>
DB 102,144 ; xchg %ax,%ax
DB 242,255 ; repnz (bad)
DB 255 ; (bad)
@@ -7929,7 +8265,7 @@ _sk_store_565_avx LABEL PROC
DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 3baf <_sk_store_565_avx+0x9e>
+ DB 117,10 ; jne 3f6b <_sk_store_565_avx+0x9e>
DB 196,65,122,127,4,121 ; vmovdqu %xmm8,(%r9,%rdi,2)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -7937,9 +8273,9 @@ _sk_store_565_avx LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 3bab <_sk_store_565_avx+0x9a>
+ DB 119,236 ; ja 3f67 <_sk_store_565_avx+0x9a>
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 3c0c <_sk_store_565_avx+0xfb>
+ DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 3fc8 <_sk_store_565_avx+0xfb>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -7950,7 +8286,7 @@ _sk_store_565_avx LABEL PROC
DB 196,67,121,21,68,121,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
DB 196,67,121,21,68,121,2,1 ; vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
DB 196,67,121,21,4,121,0 ; vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- DB 235,159 ; jmp 3bab <_sk_store_565_avx+0x9a>
+ DB 235,159 ; jmp 3f67 <_sk_store_565_avx+0x9a>
DB 247,255 ; idiv %edi
DB 255 ; (bad)
DB 255 ; (bad)
@@ -7979,7 +8315,7 @@ _sk_load_4444_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,245,0,0,0 ; jne 3d2b <_sk_load_4444_avx+0x103>
+ DB 15,133,245,0,0,0 ; jne 40e7 <_sk_load_4444_avx+0x103>
DB 196,193,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm0
DB 197,241,239,201 ; vpxor %xmm1,%xmm1,%xmm1
DB 197,249,105,201 ; vpunpckhwd %xmm1,%xmm0,%xmm1
@@ -8036,9 +8372,9 @@ _sk_load_4444_avx LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,249,254,255,255 ; ja 3c3c <_sk_load_4444_avx+0x14>
+ DB 15,135,249,254,255,255 ; ja 3ff8 <_sk_load_4444_avx+0x14>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,74,0,0,0 ; lea 0x4a(%rip),%r9 # 3d98 <_sk_load_4444_avx+0x170>
+ DB 76,141,13,74,0,0,0 ; lea 0x4a(%rip),%r9 # 4154 <_sk_load_4444_avx+0x170>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -8050,12 +8386,12 @@ _sk_load_4444_avx LABEL PROC
DB 196,193,121,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- DB 233,165,254,255,255 ; jmpq 3c3c <_sk_load_4444_avx+0x14>
+ DB 233,165,254,255,255 ; jmpq 3ff8 <_sk_load_4444_avx+0x14>
DB 144 ; nop
DB 243,255 ; repz (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 235,255 ; jmp 3d9d <_sk_load_4444_avx+0x175>
+ DB 235,255 ; jmp 4159 <_sk_load_4444_avx+0x175>
DB 255 ; (bad)
DB 255,227 ; jmpq *%rbx
DB 255 ; (bad)
@@ -8212,7 +8548,7 @@ _sk_store_4444_avx LABEL PROC
DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 4018 <_sk_store_4444_avx+0xaf>
+ DB 117,10 ; jne 43d4 <_sk_store_4444_avx+0xaf>
DB 196,65,122,127,4,121 ; vmovdqu %xmm8,(%r9,%rdi,2)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -8220,9 +8556,9 @@ _sk_store_4444_avx LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 4014 <_sk_store_4444_avx+0xab>
+ DB 119,236 ; ja 43d0 <_sk_store_4444_avx+0xab>
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,69,0,0,0 ; lea 0x45(%rip),%r8 # 4078 <_sk_store_4444_avx+0x10f>
+ DB 76,141,5,69,0,0,0 ; lea 0x45(%rip),%r8 # 4434 <_sk_store_4444_avx+0x10f>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -8233,7 +8569,7 @@ _sk_store_4444_avx LABEL PROC
DB 196,67,121,21,68,121,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
DB 196,67,121,21,68,121,2,1 ; vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
DB 196,67,121,21,4,121,0 ; vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- DB 235,159 ; jmp 4014 <_sk_store_4444_avx+0xab>
+ DB 235,159 ; jmp 43d0 <_sk_store_4444_avx+0xab>
DB 15,31,0 ; nopl (%rax)
DB 244 ; hlt
DB 255 ; (bad)
@@ -8264,7 +8600,7 @@ _sk_load_8888_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,157,0,0,0 ; jne 413f <_sk_load_8888_avx+0xab>
+ DB 15,133,157,0,0,0 ; jne 44fb <_sk_load_8888_avx+0xab>
DB 196,65,124,16,12,186 ; vmovups (%r10,%rdi,4),%ymm9
DB 184,255,0,0,0 ; mov $0xff,%eax
DB 197,249,110,192 ; vmovd %eax,%xmm0
@@ -8302,9 +8638,9 @@ _sk_load_8888_avx LABEL PROC
DB 196,65,52,87,201 ; vxorps %ymm9,%ymm9,%ymm9
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,80,255,255,255 ; ja 40a8 <_sk_load_8888_avx+0x14>
+ DB 15,135,80,255,255,255 ; ja 4464 <_sk_load_8888_avx+0x14>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,137,0,0,0 ; lea 0x89(%rip),%r9 # 41ec <_sk_load_8888_avx+0x158>
+ DB 76,141,13,137,0,0,0 ; lea 0x89(%rip),%r9 # 45a8 <_sk_load_8888_avx+0x158>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -8327,7 +8663,7 @@ _sk_load_8888_avx LABEL PROC
DB 196,99,53,12,200,15 ; vblendps $0xf,%ymm0,%ymm9,%ymm9
DB 196,195,49,34,4,186,0 ; vpinsrd $0x0,(%r10,%rdi,4),%xmm9,%xmm0
DB 196,99,53,12,200,15 ; vblendps $0xf,%ymm0,%ymm9,%ymm9
- DB 233,188,254,255,255 ; jmpq 40a8 <_sk_load_8888_avx+0x14>
+ DB 233,188,254,255,255 ; jmpq 4464 <_sk_load_8888_avx+0x14>
DB 238 ; out %al,(%dx)
DB 255 ; (bad)
DB 255 ; (bad)
@@ -8453,7 +8789,7 @@ _sk_store_8888_avx LABEL PROC
DB 196,65,45,86,192 ; vorpd %ymm8,%ymm10,%ymm8
DB 196,65,53,86,192 ; vorpd %ymm8,%ymm9,%ymm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 43ed <_sk_store_8888_avx+0xa4>
+ DB 117,10 ; jne 47a9 <_sk_store_8888_avx+0xa4>
DB 196,65,124,17,4,185 ; vmovups %ymm8,(%r9,%rdi,4)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -8461,9 +8797,9 @@ _sk_store_8888_avx LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 43e9 <_sk_store_8888_avx+0xa0>
+ DB 119,236 ; ja 47a5 <_sk_store_8888_avx+0xa0>
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,84,0,0,0 ; lea 0x54(%rip),%r8 # 445c <_sk_store_8888_avx+0x113>
+ DB 76,141,5,84,0,0,0 ; lea 0x54(%rip),%r8 # 4818 <_sk_store_8888_avx+0x113>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -8477,7 +8813,7 @@ _sk_store_8888_avx LABEL PROC
DB 196,67,121,22,68,185,8,2 ; vpextrd $0x2,%xmm8,0x8(%r9,%rdi,4)
DB 196,67,121,22,68,185,4,1 ; vpextrd $0x1,%xmm8,0x4(%r9,%rdi,4)
DB 196,65,121,126,4,185 ; vmovd %xmm8,(%r9,%rdi,4)
- DB 235,143 ; jmp 43e9 <_sk_store_8888_avx+0xa0>
+ DB 235,143 ; jmp 47a5 <_sk_store_8888_avx+0xa0>
DB 102,144 ; xchg %ax,%ax
DB 246,255 ; idiv %bh
DB 255 ; (bad)
@@ -8511,7 +8847,7 @@ _sk_load_f16_avx LABEL PROC
DB 197,252,17,124,36,64 ; vmovups %ymm7,0x40(%rsp)
DB 197,252,17,116,36,32 ; vmovups %ymm6,0x20(%rsp)
DB 197,252,17,44,36 ; vmovups %ymm5,(%rsp)
- DB 15,133,49,2,0,0 ; jne 46cc <_sk_load_f16_avx+0x254>
+ DB 15,133,49,2,0,0 ; jne 4a88 <_sk_load_f16_avx+0x254>
DB 197,121,16,4,248 ; vmovupd (%rax,%rdi,8),%xmm8
DB 197,249,16,84,248,16 ; vmovupd 0x10(%rax,%rdi,8),%xmm2
DB 197,249,16,76,248,32 ; vmovupd 0x20(%rax,%rdi,8),%xmm1
@@ -8629,29 +8965,29 @@ _sk_load_f16_avx LABEL PROC
DB 197,123,16,4,248 ; vmovsd (%rax,%rdi,8),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,79 ; je 472b <_sk_load_f16_avx+0x2b3>
+ DB 116,79 ; je 4ae7 <_sk_load_f16_avx+0x2b3>
DB 197,57,22,68,248,8 ; vmovhpd 0x8(%rax,%rdi,8),%xmm8,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,67 ; jb 472b <_sk_load_f16_avx+0x2b3>
+ DB 114,67 ; jb 4ae7 <_sk_load_f16_avx+0x2b3>
DB 197,251,16,84,248,16 ; vmovsd 0x10(%rax,%rdi,8),%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 116,68 ; je 4738 <_sk_load_f16_avx+0x2c0>
+ DB 116,68 ; je 4af4 <_sk_load_f16_avx+0x2c0>
DB 197,233,22,84,248,24 ; vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,56 ; jb 4738 <_sk_load_f16_avx+0x2c0>
+ DB 114,56 ; jb 4af4 <_sk_load_f16_avx+0x2c0>
DB 197,251,16,76,248,32 ; vmovsd 0x20(%rax,%rdi,8),%xmm1
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 15,132,162,253,255,255 ; je 44b2 <_sk_load_f16_avx+0x3a>
+ DB 15,132,162,253,255,255 ; je 486e <_sk_load_f16_avx+0x3a>
DB 197,241,22,76,248,40 ; vmovhpd 0x28(%rax,%rdi,8),%xmm1,%xmm1
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 15,130,146,253,255,255 ; jb 44b2 <_sk_load_f16_avx+0x3a>
+ DB 15,130,146,253,255,255 ; jb 486e <_sk_load_f16_avx+0x3a>
DB 197,122,126,76,248,48 ; vmovq 0x30(%rax,%rdi,8),%xmm9
- DB 233,135,253,255,255 ; jmpq 44b2 <_sk_load_f16_avx+0x3a>
+ DB 233,135,253,255,255 ; jmpq 486e <_sk_load_f16_avx+0x3a>
DB 197,241,87,201 ; vxorpd %xmm1,%xmm1,%xmm1
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,122,253,255,255 ; jmpq 44b2 <_sk_load_f16_avx+0x3a>
+ DB 233,122,253,255,255 ; jmpq 486e <_sk_load_f16_avx+0x3a>
DB 197,241,87,201 ; vxorpd %xmm1,%xmm1,%xmm1
- DB 233,113,253,255,255 ; jmpq 44b2 <_sk_load_f16_avx+0x3a>
+ DB 233,113,253,255,255 ; jmpq 486e <_sk_load_f16_avx+0x3a>
PUBLIC _sk_gather_f16_avx
_sk_gather_f16_avx LABEL PROC
@@ -8924,7 +9260,7 @@ _sk_store_f16_avx LABEL PROC
DB 197,113,98,202 ; vpunpckldq %xmm2,%xmm1,%xmm9
DB 197,113,106,194 ; vpunpckhdq %xmm2,%xmm1,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,79 ; jne 4ca2 <_sk_store_f16_avx+0x271>
+ DB 117,79 ; jne 505e <_sk_store_f16_avx+0x271>
DB 196,65,120,17,28,248 ; vmovups %xmm11,(%r8,%rdi,8)
DB 196,65,120,17,84,248,16 ; vmovups %xmm10,0x10(%r8,%rdi,8)
DB 196,65,120,17,76,248,32 ; vmovups %xmm9,0x20(%r8,%rdi,8)
@@ -8940,22 +9276,22 @@ _sk_store_f16_avx LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,65,121,214,28,248 ; vmovq %xmm11,(%r8,%rdi,8)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,192 ; je 4c6e <_sk_store_f16_avx+0x23d>
+ DB 116,192 ; je 502a <_sk_store_f16_avx+0x23d>
DB 196,65,121,23,92,248,8 ; vmovhpd %xmm11,0x8(%r8,%rdi,8)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,179 ; jb 4c6e <_sk_store_f16_avx+0x23d>
+ DB 114,179 ; jb 502a <_sk_store_f16_avx+0x23d>
DB 196,65,121,214,84,248,16 ; vmovq %xmm10,0x10(%r8,%rdi,8)
- DB 116,170 ; je 4c6e <_sk_store_f16_avx+0x23d>
+ DB 116,170 ; je 502a <_sk_store_f16_avx+0x23d>
DB 196,65,121,23,84,248,24 ; vmovhpd %xmm10,0x18(%r8,%rdi,8)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,157 ; jb 4c6e <_sk_store_f16_avx+0x23d>
+ DB 114,157 ; jb 502a <_sk_store_f16_avx+0x23d>
DB 196,65,121,214,76,248,32 ; vmovq %xmm9,0x20(%r8,%rdi,8)
- DB 116,148 ; je 4c6e <_sk_store_f16_avx+0x23d>
+ DB 116,148 ; je 502a <_sk_store_f16_avx+0x23d>
DB 196,65,121,23,76,248,40 ; vmovhpd %xmm9,0x28(%r8,%rdi,8)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,135 ; jb 4c6e <_sk_store_f16_avx+0x23d>
+ DB 114,135 ; jb 502a <_sk_store_f16_avx+0x23d>
DB 196,65,121,214,68,248,48 ; vmovq %xmm8,0x30(%r8,%rdi,8)
- DB 233,123,255,255,255 ; jmpq 4c6e <_sk_store_f16_avx+0x23d>
+ DB 233,123,255,255,255 ; jmpq 502a <_sk_store_f16_avx+0x23d>
PUBLIC _sk_load_u16_be_avx
_sk_load_u16_be_avx LABEL PROC
@@ -8963,7 +9299,7 @@ _sk_load_u16_be_avx LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 72,141,4,189,0,0,0,0 ; lea 0x0(,%rdi,4),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,5,1,0,0 ; jne 4e0e <_sk_load_u16_be_avx+0x11b>
+ DB 15,133,5,1,0,0 ; jne 51ca <_sk_load_u16_be_avx+0x11b>
DB 196,65,121,16,4,64 ; vmovupd (%r8,%rax,2),%xmm8
DB 196,193,121,16,84,64,16 ; vmovupd 0x10(%r8,%rax,2),%xmm2
DB 196,193,121,16,92,64,32 ; vmovupd 0x20(%r8,%rax,2),%xmm3
@@ -9022,29 +9358,29 @@ _sk_load_u16_be_avx LABEL PROC
DB 196,65,123,16,4,64 ; vmovsd (%r8,%rax,2),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,85 ; je 4e74 <_sk_load_u16_be_avx+0x181>
+ DB 116,85 ; je 5230 <_sk_load_u16_be_avx+0x181>
DB 196,65,57,22,68,64,8 ; vmovhpd 0x8(%r8,%rax,2),%xmm8,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,72 ; jb 4e74 <_sk_load_u16_be_avx+0x181>
+ DB 114,72 ; jb 5230 <_sk_load_u16_be_avx+0x181>
DB 196,193,123,16,84,64,16 ; vmovsd 0x10(%r8,%rax,2),%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 116,72 ; je 4e81 <_sk_load_u16_be_avx+0x18e>
+ DB 116,72 ; je 523d <_sk_load_u16_be_avx+0x18e>
DB 196,193,105,22,84,64,24 ; vmovhpd 0x18(%r8,%rax,2),%xmm2,%xmm2
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,59 ; jb 4e81 <_sk_load_u16_be_avx+0x18e>
+ DB 114,59 ; jb 523d <_sk_load_u16_be_avx+0x18e>
DB 196,193,123,16,92,64,32 ; vmovsd 0x20(%r8,%rax,2),%xmm3
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 15,132,205,254,255,255 ; je 4d24 <_sk_load_u16_be_avx+0x31>
+ DB 15,132,205,254,255,255 ; je 50e0 <_sk_load_u16_be_avx+0x31>
DB 196,193,97,22,92,64,40 ; vmovhpd 0x28(%r8,%rax,2),%xmm3,%xmm3
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 15,130,188,254,255,255 ; jb 4d24 <_sk_load_u16_be_avx+0x31>
+ DB 15,130,188,254,255,255 ; jb 50e0 <_sk_load_u16_be_avx+0x31>
DB 196,65,122,126,76,64,48 ; vmovq 0x30(%r8,%rax,2),%xmm9
- DB 233,176,254,255,255 ; jmpq 4d24 <_sk_load_u16_be_avx+0x31>
+ DB 233,176,254,255,255 ; jmpq 50e0 <_sk_load_u16_be_avx+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,163,254,255,255 ; jmpq 4d24 <_sk_load_u16_be_avx+0x31>
+ DB 233,163,254,255,255 ; jmpq 50e0 <_sk_load_u16_be_avx+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
- DB 233,154,254,255,255 ; jmpq 4d24 <_sk_load_u16_be_avx+0x31>
+ DB 233,154,254,255,255 ; jmpq 50e0 <_sk_load_u16_be_avx+0x31>
PUBLIC _sk_load_rgb_u16_be_avx
_sk_load_rgb_u16_be_avx LABEL PROC
@@ -9052,7 +9388,7 @@ _sk_load_rgb_u16_be_avx LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 72,141,4,127 ; lea (%rdi,%rdi,2),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,8,1,0,0 ; jne 4fa4 <_sk_load_rgb_u16_be_avx+0x11a>
+ DB 15,133,8,1,0,0 ; jne 5360 <_sk_load_rgb_u16_be_avx+0x11a>
DB 196,193,122,111,4,64 ; vmovdqu (%r8,%rax,2),%xmm0
DB 196,193,122,111,84,64,12 ; vmovdqu 0xc(%r8,%rax,2),%xmm2
DB 196,193,122,111,76,64,24 ; vmovdqu 0x18(%r8,%rax,2),%xmm1
@@ -9111,36 +9447,36 @@ _sk_load_rgb_u16_be_avx LABEL PROC
DB 196,193,121,110,4,64 ; vmovd (%r8,%rax,2),%xmm0
DB 196,193,121,196,68,64,4,2 ; vpinsrw $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 117,5 ; jne 4fbd <_sk_load_rgb_u16_be_avx+0x133>
- DB 233,19,255,255,255 ; jmpq 4ed0 <_sk_load_rgb_u16_be_avx+0x46>
+ DB 117,5 ; jne 5379 <_sk_load_rgb_u16_be_avx+0x133>
+ DB 233,19,255,255,255 ; jmpq 528c <_sk_load_rgb_u16_be_avx+0x46>
DB 196,193,121,110,76,64,6 ; vmovd 0x6(%r8,%rax,2),%xmm1
DB 196,65,113,196,68,64,10,2 ; vpinsrw $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,26 ; jb 4fec <_sk_load_rgb_u16_be_avx+0x162>
+ DB 114,26 ; jb 53a8 <_sk_load_rgb_u16_be_avx+0x162>
DB 196,193,121,110,76,64,12 ; vmovd 0xc(%r8,%rax,2),%xmm1
DB 196,193,113,196,84,64,16,2 ; vpinsrw $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 117,10 ; jne 4ff1 <_sk_load_rgb_u16_be_avx+0x167>
- DB 233,228,254,255,255 ; jmpq 4ed0 <_sk_load_rgb_u16_be_avx+0x46>
- DB 233,223,254,255,255 ; jmpq 4ed0 <_sk_load_rgb_u16_be_avx+0x46>
+ DB 117,10 ; jne 53ad <_sk_load_rgb_u16_be_avx+0x167>
+ DB 233,228,254,255,255 ; jmpq 528c <_sk_load_rgb_u16_be_avx+0x46>
+ DB 233,223,254,255,255 ; jmpq 528c <_sk_load_rgb_u16_be_avx+0x46>
DB 196,193,121,110,76,64,18 ; vmovd 0x12(%r8,%rax,2),%xmm1
DB 196,65,113,196,76,64,22,2 ; vpinsrw $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,26 ; jb 5020 <_sk_load_rgb_u16_be_avx+0x196>
+ DB 114,26 ; jb 53dc <_sk_load_rgb_u16_be_avx+0x196>
DB 196,193,121,110,76,64,24 ; vmovd 0x18(%r8,%rax,2),%xmm1
DB 196,193,113,196,76,64,28,2 ; vpinsrw $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 117,10 ; jne 5025 <_sk_load_rgb_u16_be_avx+0x19b>
- DB 233,176,254,255,255 ; jmpq 4ed0 <_sk_load_rgb_u16_be_avx+0x46>
- DB 233,171,254,255,255 ; jmpq 4ed0 <_sk_load_rgb_u16_be_avx+0x46>
+ DB 117,10 ; jne 53e1 <_sk_load_rgb_u16_be_avx+0x19b>
+ DB 233,176,254,255,255 ; jmpq 528c <_sk_load_rgb_u16_be_avx+0x46>
+ DB 233,171,254,255,255 ; jmpq 528c <_sk_load_rgb_u16_be_avx+0x46>
DB 196,193,121,110,92,64,30 ; vmovd 0x1e(%r8,%rax,2),%xmm3
DB 196,65,97,196,92,64,34,2 ; vpinsrw $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,20 ; jb 504e <_sk_load_rgb_u16_be_avx+0x1c4>
+ DB 114,20 ; jb 540a <_sk_load_rgb_u16_be_avx+0x1c4>
DB 196,193,121,110,92,64,36 ; vmovd 0x24(%r8,%rax,2),%xmm3
DB 196,193,97,196,92,64,40,2 ; vpinsrw $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3
- DB 233,130,254,255,255 ; jmpq 4ed0 <_sk_load_rgb_u16_be_avx+0x46>
- DB 233,125,254,255,255 ; jmpq 4ed0 <_sk_load_rgb_u16_be_avx+0x46>
+ DB 233,130,254,255,255 ; jmpq 528c <_sk_load_rgb_u16_be_avx+0x46>
+ DB 233,125,254,255,255 ; jmpq 528c <_sk_load_rgb_u16_be_avx+0x46>
PUBLIC _sk_store_u16_be_avx
_sk_store_u16_be_avx LABEL PROC
@@ -9188,7 +9524,7 @@ _sk_store_u16_be_avx LABEL PROC
DB 196,65,17,98,200 ; vpunpckldq %xmm8,%xmm13,%xmm9
DB 196,65,17,106,192 ; vpunpckhdq %xmm8,%xmm13,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,31 ; jne 5155 <_sk_store_u16_be_avx+0x102>
+ DB 117,31 ; jne 5511 <_sk_store_u16_be_avx+0x102>
DB 196,1,120,17,28,72 ; vmovups %xmm11,(%r8,%r9,2)
DB 196,1,120,17,84,72,16 ; vmovups %xmm10,0x10(%r8,%r9,2)
DB 196,1,120,17,76,72,32 ; vmovups %xmm9,0x20(%r8,%r9,2)
@@ -9197,31 +9533,31 @@ _sk_store_u16_be_avx LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,1,121,214,28,72 ; vmovq %xmm11,(%r8,%r9,2)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,240 ; je 5151 <_sk_store_u16_be_avx+0xfe>
+ DB 116,240 ; je 550d <_sk_store_u16_be_avx+0xfe>
DB 196,1,121,23,92,72,8 ; vmovhpd %xmm11,0x8(%r8,%r9,2)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,227 ; jb 5151 <_sk_store_u16_be_avx+0xfe>
+ DB 114,227 ; jb 550d <_sk_store_u16_be_avx+0xfe>
DB 196,1,121,214,84,72,16 ; vmovq %xmm10,0x10(%r8,%r9,2)
- DB 116,218 ; je 5151 <_sk_store_u16_be_avx+0xfe>
+ DB 116,218 ; je 550d <_sk_store_u16_be_avx+0xfe>
DB 196,1,121,23,84,72,24 ; vmovhpd %xmm10,0x18(%r8,%r9,2)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,205 ; jb 5151 <_sk_store_u16_be_avx+0xfe>
+ DB 114,205 ; jb 550d <_sk_store_u16_be_avx+0xfe>
DB 196,1,121,214,76,72,32 ; vmovq %xmm9,0x20(%r8,%r9,2)
- DB 116,196 ; je 5151 <_sk_store_u16_be_avx+0xfe>
+ DB 116,196 ; je 550d <_sk_store_u16_be_avx+0xfe>
DB 196,1,121,23,76,72,40 ; vmovhpd %xmm9,0x28(%r8,%r9,2)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,183 ; jb 5151 <_sk_store_u16_be_avx+0xfe>
+ DB 114,183 ; jb 550d <_sk_store_u16_be_avx+0xfe>
DB 196,1,121,214,68,72,48 ; vmovq %xmm8,0x30(%r8,%r9,2)
- DB 235,174 ; jmp 5151 <_sk_store_u16_be_avx+0xfe>
+ DB 235,174 ; jmp 550d <_sk_store_u16_be_avx+0xfe>
PUBLIC _sk_load_f32_avx
_sk_load_f32_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 119,110 ; ja 5219 <_sk_load_f32_avx+0x76>
+ DB 119,110 ; ja 55d5 <_sk_load_f32_avx+0x76>
DB 76,139,0 ; mov (%rax),%r8
DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9
- DB 76,141,21,135,0,0,0 ; lea 0x87(%rip),%r10 # 5244 <_sk_load_f32_avx+0xa1>
+ DB 76,141,21,135,0,0,0 ; lea 0x87(%rip),%r10 # 5600 <_sk_load_f32_avx+0xa1>
DB 73,99,4,138 ; movslq (%r10,%rcx,4),%rax
DB 76,1,208 ; add %r10,%rax
DB 255,224 ; jmpq *%rax
@@ -9280,7 +9616,7 @@ _sk_store_f32_avx LABEL PROC
DB 196,65,37,20,196 ; vunpcklpd %ymm12,%ymm11,%ymm8
DB 196,65,37,21,220 ; vunpckhpd %ymm12,%ymm11,%ymm11
DB 72,133,201 ; test %rcx,%rcx
- DB 117,55 ; jne 52d1 <_sk_store_f32_avx+0x6d>
+ DB 117,55 ; jne 568d <_sk_store_f32_avx+0x6d>
DB 196,67,45,24,225,1 ; vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
DB 196,67,61,24,235,1 ; vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
DB 196,67,45,6,201,49 ; vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
@@ -9293,22 +9629,22 @@ _sk_store_f32_avx LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,65,121,17,20,128 ; vmovupd %xmm10,(%r8,%rax,4)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,240 ; je 52cd <_sk_store_f32_avx+0x69>
+ DB 116,240 ; je 5689 <_sk_store_f32_avx+0x69>
DB 196,65,121,17,76,128,16 ; vmovupd %xmm9,0x10(%r8,%rax,4)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,227 ; jb 52cd <_sk_store_f32_avx+0x69>
+ DB 114,227 ; jb 5689 <_sk_store_f32_avx+0x69>
DB 196,65,121,17,68,128,32 ; vmovupd %xmm8,0x20(%r8,%rax,4)
- DB 116,218 ; je 52cd <_sk_store_f32_avx+0x69>
+ DB 116,218 ; je 5689 <_sk_store_f32_avx+0x69>
DB 196,65,121,17,92,128,48 ; vmovupd %xmm11,0x30(%r8,%rax,4)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,205 ; jb 52cd <_sk_store_f32_avx+0x69>
+ DB 114,205 ; jb 5689 <_sk_store_f32_avx+0x69>
DB 196,67,125,25,84,128,64,1 ; vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4)
- DB 116,195 ; je 52cd <_sk_store_f32_avx+0x69>
+ DB 116,195 ; je 5689 <_sk_store_f32_avx+0x69>
DB 196,67,125,25,76,128,80,1 ; vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,181 ; jb 52cd <_sk_store_f32_avx+0x69>
+ DB 114,181 ; jb 5689 <_sk_store_f32_avx+0x69>
DB 196,67,125,25,68,128,96,1 ; vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4)
- DB 235,171 ; jmp 52cd <_sk_store_f32_avx+0x69>
+ DB 235,171 ; jmp 5689 <_sk_store_f32_avx+0x69>
PUBLIC _sk_clamp_x_avx
_sk_clamp_x_avx LABEL PROC
@@ -9612,7 +9948,7 @@ _sk_linear_gradient_avx LABEL PROC
DB 196,226,125,24,88,28 ; vbroadcastss 0x1c(%rax),%ymm3
DB 76,139,0 ; mov (%rax),%r8
DB 77,133,192 ; test %r8,%r8
- DB 15,132,146,0,0,0 ; je 5885 <_sk_linear_gradient_avx+0xb8>
+ DB 15,132,146,0,0,0 ; je 5c41 <_sk_linear_gradient_avx+0xb8>
DB 72,139,64,8 ; mov 0x8(%rax),%rax
DB 72,131,192,32 ; add $0x20,%rax
DB 196,65,28,87,228 ; vxorps %ymm12,%ymm12,%ymm12
@@ -9639,8 +9975,8 @@ _sk_linear_gradient_avx LABEL PROC
DB 196,227,13,74,219,208 ; vblendvps %ymm13,%ymm3,%ymm14,%ymm3
DB 72,131,192,36 ; add $0x24,%rax
DB 73,255,200 ; dec %r8
- DB 117,140 ; jne 580f <_sk_linear_gradient_avx+0x42>
- DB 235,20 ; jmp 5899 <_sk_linear_gradient_avx+0xcc>
+ DB 117,140 ; jne 5bcb <_sk_linear_gradient_avx+0x42>
+ DB 235,20 ; jmp 5c55 <_sk_linear_gradient_avx+0xcc>
DB 196,65,36,87,219 ; vxorps %ymm11,%ymm11,%ymm11
DB 196,65,44,87,210 ; vxorps %ymm10,%ymm10,%ymm10
DB 196,65,52,87,201 ; vxorps %ymm9,%ymm9,%ymm9
@@ -11327,89 +11663,276 @@ _sk_to_srgb_sse41 LABEL PROC
PUBLIC _sk_from_2dot2_sse41
_sk_from_2dot2_sse41 LABEL PROC
- DB 68,15,40,192 ; movaps %xmm0,%xmm8
- DB 65,15,82,192 ; rsqrtps %xmm8,%xmm0
- DB 15,82,192 ; rsqrtps %xmm0,%xmm0
- DB 15,82,192 ; rsqrtps %xmm0,%xmm0
- DB 68,15,82,200 ; rsqrtps %xmm0,%xmm9
- DB 65,15,82,193 ; rsqrtps %xmm9,%xmm0
- DB 68,15,82,208 ; rsqrtps %xmm0,%xmm10
- DB 69,15,89,192 ; mulps %xmm8,%xmm8
+ DB 72,131,236,120 ; sub $0x78,%rsp
+ DB 15,41,124,36,96 ; movaps %xmm7,0x60(%rsp)
+ DB 15,41,116,36,80 ; movaps %xmm6,0x50(%rsp)
+ DB 15,41,108,36,64 ; movaps %xmm5,0x40(%rsp)
+ DB 15,41,100,36,48 ; movaps %xmm4,0x30(%rsp)
+ DB 15,41,92,36,32 ; movaps %xmm3,0x20(%rsp)
+ DB 15,41,84,36,16 ; movaps %xmm2,0x10(%rsp)
+ DB 15,40,209 ; movaps %xmm1,%xmm2
+ DB 184,205,204,12,64 ; mov $0x400ccccd,%eax
+ DB 15,91,216 ; cvtdq2ps %xmm0,%xmm3
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 65,15,89,218 ; mulps %xmm10,%xmm3
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 102,68,15,112,193,0 ; pshufd $0x0,%xmm1,%xmm8
+ DB 65,15,84,192 ; andps %xmm8,%xmm0
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 102,15,112,201,0 ; pshufd $0x0,%xmm1,%xmm1
+ DB 15,86,193 ; orps %xmm1,%xmm0
+ DB 15,40,241 ; movaps %xmm1,%xmm6
+ DB 15,41,52,36 ; movaps %xmm6,(%rsp)
+ DB 185,119,115,248,66 ; mov $0x42f87377,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 65,15,92,219 ; subps %xmm11,%xmm3
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 15,40,200 ; movaps %xmm0,%xmm1
+ DB 65,15,89,204 ; mulps %xmm12,%xmm1
+ DB 15,92,217 ; subps %xmm1,%xmm3
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 65,15,88,198 ; addps %xmm14,%xmm0
+ DB 65,15,40,205 ; movaps %xmm13,%xmm1
+ DB 15,94,200 ; divps %xmm0,%xmm1
+ DB 15,92,217 ; subps %xmm1,%xmm3
+ DB 102,68,15,110,248 ; movd %eax,%xmm15
+ DB 69,15,198,255,0 ; shufps $0x0,%xmm15,%xmm15
+ DB 65,15,89,223 ; mulps %xmm15,%xmm3
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,15,110,225 ; movd %ecx,%xmm4
+ DB 15,198,228,0 ; shufps $0x0,%xmm4,%xmm4
+ DB 15,40,204 ; movaps %xmm4,%xmm1
+ DB 15,88,203 ; addps %xmm3,%xmm1
+ DB 102,15,58,8,195,1 ; roundps $0x1,%xmm3,%xmm0
+ DB 15,92,216 ; subps %xmm0,%xmm3
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,68,15,110,201 ; movd %ecx,%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
DB 65,15,40,193 ; movaps %xmm9,%xmm0
- DB 15,89,192 ; mulps %xmm0,%xmm0
- DB 65,15,89,193 ; mulps %xmm9,%xmm0
- DB 65,15,89,192 ; mulps %xmm8,%xmm0
- DB 65,15,89,194 ; mulps %xmm10,%xmm0
- DB 69,15,87,210 ; xorps %xmm10,%xmm10
- DB 65,15,95,194 ; maxps %xmm10,%xmm0
- DB 68,15,82,193 ; rsqrtps %xmm1,%xmm8
- DB 69,15,82,192 ; rsqrtps %xmm8,%xmm8
- DB 69,15,82,192 ; rsqrtps %xmm8,%xmm8
- DB 69,15,82,200 ; rsqrtps %xmm8,%xmm9
- DB 69,15,82,193 ; rsqrtps %xmm9,%xmm8
- DB 69,15,82,216 ; rsqrtps %xmm8,%xmm11
- DB 15,89,201 ; mulps %xmm1,%xmm1
- DB 69,15,40,193 ; movaps %xmm9,%xmm8
- DB 69,15,89,192 ; mulps %xmm8,%xmm8
- DB 69,15,89,193 ; mulps %xmm9,%xmm8
- DB 68,15,89,193 ; mulps %xmm1,%xmm8
- DB 69,15,89,195 ; mulps %xmm11,%xmm8
- DB 69,15,95,194 ; maxps %xmm10,%xmm8
- DB 15,82,202 ; rsqrtps %xmm2,%xmm1
- DB 15,82,201 ; rsqrtps %xmm1,%xmm1
- DB 15,82,201 ; rsqrtps %xmm1,%xmm1
- DB 68,15,82,217 ; rsqrtps %xmm1,%xmm11
- DB 65,15,82,203 ; rsqrtps %xmm11,%xmm1
- DB 15,82,201 ; rsqrtps %xmm1,%xmm1
- DB 15,89,210 ; mulps %xmm2,%xmm2
- DB 69,15,40,203 ; movaps %xmm11,%xmm9
- DB 69,15,89,201 ; mulps %xmm9,%xmm9
- DB 69,15,89,203 ; mulps %xmm11,%xmm9
- DB 68,15,89,202 ; mulps %xmm2,%xmm9
- DB 68,15,89,201 ; mulps %xmm1,%xmm9
- DB 69,15,95,202 ; maxps %xmm10,%xmm9
- DB 72,173 ; lods %ds:(%rsi),%rax
- DB 65,15,40,200 ; movaps %xmm8,%xmm1
+ DB 15,89,195 ; mulps %xmm3,%xmm0
+ DB 15,92,200 ; subps %xmm0,%xmm1
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 184,248,245,154,64 ; mov $0x409af5f8,%eax
+ DB 102,15,110,248 ; movd %eax,%xmm7
+ DB 15,198,255,0 ; shufps $0x0,%xmm7,%xmm7
+ DB 15,40,239 ; movaps %xmm7,%xmm5
+ DB 15,92,235 ; subps %xmm3,%xmm5
+ DB 102,15,110,193 ; movd %ecx,%xmm0
+ DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0
+ DB 15,40,216 ; movaps %xmm0,%xmm3
+ DB 15,94,221 ; divps %xmm5,%xmm3
+ DB 15,88,217 ; addps %xmm1,%xmm3
+ DB 15,91,202 ; cvtdq2ps %xmm2,%xmm1
+ DB 65,15,89,202 ; mulps %xmm10,%xmm1
+ DB 65,15,84,208 ; andps %xmm8,%xmm2
+ DB 15,86,214 ; orps %xmm6,%xmm2
+ DB 65,15,92,203 ; subps %xmm11,%xmm1
+ DB 15,40,234 ; movaps %xmm2,%xmm5
+ DB 65,15,89,236 ; mulps %xmm12,%xmm5
+ DB 15,92,205 ; subps %xmm5,%xmm1
+ DB 65,15,88,214 ; addps %xmm14,%xmm2
+ DB 65,15,40,237 ; movaps %xmm13,%xmm5
+ DB 15,94,234 ; divps %xmm2,%xmm5
+ DB 15,92,205 ; subps %xmm5,%xmm1
+ DB 65,15,89,207 ; mulps %xmm15,%xmm1
+ DB 15,40,236 ; movaps %xmm4,%xmm5
+ DB 15,88,233 ; addps %xmm1,%xmm5
+ DB 102,15,58,8,209,1 ; roundps $0x1,%xmm1,%xmm2
+ DB 15,92,202 ; subps %xmm2,%xmm1
DB 65,15,40,209 ; movaps %xmm9,%xmm2
+ DB 15,89,209 ; mulps %xmm1,%xmm2
+ DB 15,92,234 ; subps %xmm2,%xmm5
+ DB 15,40,247 ; movaps %xmm7,%xmm6
+ DB 15,92,241 ; subps %xmm1,%xmm6
+ DB 15,40,208 ; movaps %xmm0,%xmm2
+ DB 15,94,214 ; divps %xmm6,%xmm2
+ DB 15,88,213 ; addps %xmm5,%xmm2
+ DB 15,40,108,36,16 ; movaps 0x10(%rsp),%xmm5
+ DB 15,91,205 ; cvtdq2ps %xmm5,%xmm1
+ DB 65,15,89,202 ; mulps %xmm10,%xmm1
+ DB 68,15,84,197 ; andps %xmm5,%xmm8
+ DB 68,15,86,4,36 ; orps (%rsp),%xmm8
+ DB 65,15,92,203 ; subps %xmm11,%xmm1
+ DB 69,15,89,224 ; mulps %xmm8,%xmm12
+ DB 65,15,92,204 ; subps %xmm12,%xmm1
+ DB 69,15,88,198 ; addps %xmm14,%xmm8
+ DB 69,15,94,232 ; divps %xmm8,%xmm13
+ DB 65,15,92,205 ; subps %xmm13,%xmm1
+ DB 65,15,89,207 ; mulps %xmm15,%xmm1
+ DB 102,15,58,8,233,1 ; roundps $0x1,%xmm1,%xmm5
+ DB 15,88,225 ; addps %xmm1,%xmm4
+ DB 15,92,205 ; subps %xmm5,%xmm1
+ DB 68,15,89,201 ; mulps %xmm1,%xmm9
+ DB 65,15,92,225 ; subps %xmm9,%xmm4
+ DB 15,92,249 ; subps %xmm1,%xmm7
+ DB 15,94,199 ; divps %xmm7,%xmm0
+ DB 15,88,196 ; addps %xmm4,%xmm0
+ DB 102,65,15,110,200 ; movd %r8d,%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 15,89,217 ; mulps %xmm1,%xmm3
+ DB 15,89,209 ; mulps %xmm1,%xmm2
+ DB 15,89,193 ; mulps %xmm1,%xmm0
+ DB 102,15,91,219 ; cvtps2dq %xmm3,%xmm3
+ DB 102,15,91,202 ; cvtps2dq %xmm2,%xmm1
+ DB 102,15,91,208 ; cvtps2dq %xmm0,%xmm2
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 102,15,40,195 ; movapd %xmm3,%xmm0
+ DB 15,40,92,36,32 ; movaps 0x20(%rsp),%xmm3
+ DB 15,40,100,36,48 ; movaps 0x30(%rsp),%xmm4
+ DB 15,40,108,36,64 ; movaps 0x40(%rsp),%xmm5
+ DB 15,40,116,36,80 ; movaps 0x50(%rsp),%xmm6
+ DB 15,40,124,36,96 ; movaps 0x60(%rsp),%xmm7
+ DB 72,131,196,120 ; add $0x78,%rsp
DB 255,224 ; jmpq *%rax
PUBLIC _sk_to_2dot2_sse41
_sk_to_2dot2_sse41 LABEL PROC
- DB 68,15,82,192 ; rsqrtps %xmm0,%xmm8
- DB 65,15,82,192 ; rsqrtps %xmm8,%xmm0
- DB 15,82,192 ; rsqrtps %xmm0,%xmm0
- DB 15,82,192 ; rsqrtps %xmm0,%xmm0
- DB 15,82,192 ; rsqrtps %xmm0,%xmm0
- DB 68,15,82,200 ; rsqrtps %xmm0,%xmm9
- DB 69,15,83,192 ; rcpps %xmm8,%xmm8
- DB 68,15,89,192 ; mulps %xmm0,%xmm8
- DB 65,15,83,193 ; rcpps %xmm9,%xmm0
- DB 65,15,89,192 ; mulps %xmm8,%xmm0
- DB 69,15,87,192 ; xorps %xmm8,%xmm8
- DB 65,15,95,192 ; maxps %xmm8,%xmm0
- DB 68,15,82,201 ; rsqrtps %xmm1,%xmm9
- DB 65,15,82,201 ; rsqrtps %xmm9,%xmm1
- DB 15,82,201 ; rsqrtps %xmm1,%xmm1
- DB 15,82,201 ; rsqrtps %xmm1,%xmm1
- DB 15,82,201 ; rsqrtps %xmm1,%xmm1
- DB 68,15,82,209 ; rsqrtps %xmm1,%xmm10
- DB 69,15,83,201 ; rcpps %xmm9,%xmm9
+ DB 72,131,236,120 ; sub $0x78,%rsp
+ DB 15,41,124,36,96 ; movaps %xmm7,0x60(%rsp)
+ DB 15,41,116,36,80 ; movaps %xmm6,0x50(%rsp)
+ DB 15,41,108,36,64 ; movaps %xmm5,0x40(%rsp)
+ DB 15,41,100,36,48 ; movaps %xmm4,0x30(%rsp)
+ DB 15,41,92,36,32 ; movaps %xmm3,0x20(%rsp)
+ DB 15,41,84,36,16 ; movaps %xmm2,0x10(%rsp)
+ DB 15,40,209 ; movaps %xmm1,%xmm2
+ DB 184,46,186,232,62 ; mov $0x3ee8ba2e,%eax
+ DB 15,91,216 ; cvtdq2ps %xmm0,%xmm3
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 65,15,89,218 ; mulps %xmm10,%xmm3
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 102,68,15,112,193,0 ; pshufd $0x0,%xmm1,%xmm8
+ DB 65,15,84,192 ; andps %xmm8,%xmm0
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 102,15,112,201,0 ; pshufd $0x0,%xmm1,%xmm1
+ DB 15,86,193 ; orps %xmm1,%xmm0
+ DB 15,40,241 ; movaps %xmm1,%xmm6
+ DB 15,41,52,36 ; movaps %xmm6,(%rsp)
+ DB 185,119,115,248,66 ; mov $0x42f87377,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 65,15,92,219 ; subps %xmm11,%xmm3
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 15,40,200 ; movaps %xmm0,%xmm1
+ DB 65,15,89,204 ; mulps %xmm12,%xmm1
+ DB 15,92,217 ; subps %xmm1,%xmm3
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 65,15,88,198 ; addps %xmm14,%xmm0
+ DB 65,15,40,205 ; movaps %xmm13,%xmm1
+ DB 15,94,200 ; divps %xmm0,%xmm1
+ DB 15,92,217 ; subps %xmm1,%xmm3
+ DB 102,68,15,110,248 ; movd %eax,%xmm15
+ DB 69,15,198,255,0 ; shufps $0x0,%xmm15,%xmm15
+ DB 65,15,89,223 ; mulps %xmm15,%xmm3
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,15,110,225 ; movd %ecx,%xmm4
+ DB 15,198,228,0 ; shufps $0x0,%xmm4,%xmm4
+ DB 15,40,204 ; movaps %xmm4,%xmm1
+ DB 15,88,203 ; addps %xmm3,%xmm1
+ DB 102,15,58,8,195,1 ; roundps $0x1,%xmm3,%xmm0
+ DB 15,92,216 ; subps %xmm0,%xmm3
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,68,15,110,201 ; movd %ecx,%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 65,15,40,193 ; movaps %xmm9,%xmm0
+ DB 15,89,195 ; mulps %xmm3,%xmm0
+ DB 15,92,200 ; subps %xmm0,%xmm1
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 184,248,245,154,64 ; mov $0x409af5f8,%eax
+ DB 102,15,110,248 ; movd %eax,%xmm7
+ DB 15,198,255,0 ; shufps $0x0,%xmm7,%xmm7
+ DB 15,40,239 ; movaps %xmm7,%xmm5
+ DB 15,92,235 ; subps %xmm3,%xmm5
+ DB 102,15,110,193 ; movd %ecx,%xmm0
+ DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0
+ DB 15,40,216 ; movaps %xmm0,%xmm3
+ DB 15,94,221 ; divps %xmm5,%xmm3
+ DB 15,88,217 ; addps %xmm1,%xmm3
+ DB 15,91,202 ; cvtdq2ps %xmm2,%xmm1
+ DB 65,15,89,202 ; mulps %xmm10,%xmm1
+ DB 65,15,84,208 ; andps %xmm8,%xmm2
+ DB 15,86,214 ; orps %xmm6,%xmm2
+ DB 65,15,92,203 ; subps %xmm11,%xmm1
+ DB 15,40,234 ; movaps %xmm2,%xmm5
+ DB 65,15,89,236 ; mulps %xmm12,%xmm5
+ DB 15,92,205 ; subps %xmm5,%xmm1
+ DB 65,15,88,214 ; addps %xmm14,%xmm2
+ DB 65,15,40,237 ; movaps %xmm13,%xmm5
+ DB 15,94,234 ; divps %xmm2,%xmm5
+ DB 15,92,205 ; subps %xmm5,%xmm1
+ DB 65,15,89,207 ; mulps %xmm15,%xmm1
+ DB 15,40,236 ; movaps %xmm4,%xmm5
+ DB 15,88,233 ; addps %xmm1,%xmm5
+ DB 102,15,58,8,209,1 ; roundps $0x1,%xmm1,%xmm2
+ DB 15,92,202 ; subps %xmm2,%xmm1
+ DB 65,15,40,209 ; movaps %xmm9,%xmm2
+ DB 15,89,209 ; mulps %xmm1,%xmm2
+ DB 15,92,234 ; subps %xmm2,%xmm5
+ DB 15,40,247 ; movaps %xmm7,%xmm6
+ DB 15,92,241 ; subps %xmm1,%xmm6
+ DB 15,40,208 ; movaps %xmm0,%xmm2
+ DB 15,94,214 ; divps %xmm6,%xmm2
+ DB 15,88,213 ; addps %xmm5,%xmm2
+ DB 15,40,108,36,16 ; movaps 0x10(%rsp),%xmm5
+ DB 15,91,205 ; cvtdq2ps %xmm5,%xmm1
+ DB 65,15,89,202 ; mulps %xmm10,%xmm1
+ DB 68,15,84,197 ; andps %xmm5,%xmm8
+ DB 68,15,86,4,36 ; orps (%rsp),%xmm8
+ DB 65,15,92,203 ; subps %xmm11,%xmm1
+ DB 69,15,89,224 ; mulps %xmm8,%xmm12
+ DB 65,15,92,204 ; subps %xmm12,%xmm1
+ DB 69,15,88,198 ; addps %xmm14,%xmm8
+ DB 69,15,94,232 ; divps %xmm8,%xmm13
+ DB 65,15,92,205 ; subps %xmm13,%xmm1
+ DB 65,15,89,207 ; mulps %xmm15,%xmm1
+ DB 102,15,58,8,233,1 ; roundps $0x1,%xmm1,%xmm5
+ DB 15,88,225 ; addps %xmm1,%xmm4
+ DB 15,92,205 ; subps %xmm5,%xmm1
DB 68,15,89,201 ; mulps %xmm1,%xmm9
- DB 65,15,83,202 ; rcpps %xmm10,%xmm1
- DB 65,15,89,201 ; mulps %xmm9,%xmm1
- DB 65,15,95,200 ; maxps %xmm8,%xmm1
- DB 68,15,82,202 ; rsqrtps %xmm2,%xmm9
- DB 65,15,82,209 ; rsqrtps %xmm9,%xmm2
- DB 15,82,210 ; rsqrtps %xmm2,%xmm2
- DB 15,82,210 ; rsqrtps %xmm2,%xmm2
- DB 15,82,210 ; rsqrtps %xmm2,%xmm2
- DB 68,15,82,210 ; rsqrtps %xmm2,%xmm10
- DB 69,15,83,201 ; rcpps %xmm9,%xmm9
- DB 68,15,89,202 ; mulps %xmm2,%xmm9
- DB 65,15,83,210 ; rcpps %xmm10,%xmm2
- DB 65,15,89,209 ; mulps %xmm9,%xmm2
- DB 65,15,95,208 ; maxps %xmm8,%xmm2
- DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 65,15,92,225 ; subps %xmm9,%xmm4
+ DB 15,92,249 ; subps %xmm1,%xmm7
+ DB 15,94,199 ; divps %xmm7,%xmm0
+ DB 15,88,196 ; addps %xmm4,%xmm0
+ DB 102,65,15,110,200 ; movd %r8d,%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 15,89,217 ; mulps %xmm1,%xmm3
+ DB 15,89,209 ; mulps %xmm1,%xmm2
+ DB 15,89,193 ; mulps %xmm1,%xmm0
+ DB 102,15,91,219 ; cvtps2dq %xmm3,%xmm3
+ DB 102,15,91,202 ; cvtps2dq %xmm2,%xmm1
+ DB 102,15,91,208 ; cvtps2dq %xmm0,%xmm2
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 102,15,40,195 ; movapd %xmm3,%xmm0
+ DB 15,40,92,36,32 ; movaps 0x20(%rsp),%xmm3
+ DB 15,40,100,36,48 ; movaps 0x30(%rsp),%xmm4
+ DB 15,40,108,36,64 ; movaps 0x40(%rsp),%xmm5
+ DB 15,40,116,36,80 ; movaps 0x50(%rsp),%xmm6
+ DB 15,40,124,36,96 ; movaps 0x60(%rsp),%xmm7
+ DB 72,131,196,120 ; add $0x78,%rsp
DB 255,224 ; jmpq *%rax
PUBLIC _sk_rgb_to_hsl_sse41
@@ -12819,9 +13342,9 @@ _sk_gather_i8_sse41 LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 73,137,192 ; mov %rax,%r8
DB 77,133,192 ; test %r8,%r8
- DB 116,5 ; je 28a1 <_sk_gather_i8_sse41+0xf>
+ DB 116,5 ; je 2baf <_sk_gather_i8_sse41+0xf>
DB 76,137,192 ; mov %r8,%rax
- DB 235,2 ; jmp 28a3 <_sk_gather_i8_sse41+0x11>
+ DB 235,2 ; jmp 2bb1 <_sk_gather_i8_sse41+0x11>
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 243,15,91,201 ; cvttps2dq %xmm1,%xmm1
@@ -13996,7 +14519,7 @@ _sk_linear_gradient_sse41 LABEL PROC
DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
DB 72,139,8 ; mov (%rax),%rcx
DB 72,133,201 ; test %rcx,%rcx
- DB 15,132,4,1,0,0 ; je 3c92 <_sk_linear_gradient_sse41+0x13e>
+ DB 15,132,4,1,0,0 ; je 3fa0 <_sk_linear_gradient_sse41+0x13e>
DB 72,131,236,88 ; sub $0x58,%rsp
DB 15,41,36,36 ; movaps %xmm4,(%rsp)
DB 15,41,108,36,16 ; movaps %xmm5,0x10(%rsp)
@@ -14047,13 +14570,13 @@ _sk_linear_gradient_sse41 LABEL PROC
DB 15,40,196 ; movaps %xmm4,%xmm0
DB 72,131,192,36 ; add $0x24,%rax
DB 72,255,201 ; dec %rcx
- DB 15,133,65,255,255,255 ; jne 3bba <_sk_linear_gradient_sse41+0x66>
+ DB 15,133,65,255,255,255 ; jne 3ec8 <_sk_linear_gradient_sse41+0x66>
DB 15,40,124,36,48 ; movaps 0x30(%rsp),%xmm7
DB 15,40,116,36,32 ; movaps 0x20(%rsp),%xmm6
DB 15,40,108,36,16 ; movaps 0x10(%rsp),%xmm5
DB 15,40,36,36 ; movaps (%rsp),%xmm4
DB 72,131,196,88 ; add $0x58,%rsp
- DB 235,13 ; jmp 3c9f <_sk_linear_gradient_sse41+0x14b>
+ DB 235,13 ; jmp 3fad <_sk_linear_gradient_sse41+0x14b>
DB 15,87,201 ; xorps %xmm1,%xmm1
DB 15,87,210 ; xorps %xmm2,%xmm2
DB 15,87,219 ; xorps %xmm3,%xmm3
@@ -15744,89 +16267,320 @@ _sk_to_srgb_sse2 LABEL PROC
PUBLIC _sk_from_2dot2_sse2
_sk_from_2dot2_sse2 LABEL PROC
- DB 68,15,40,192 ; movaps %xmm0,%xmm8
- DB 65,15,82,192 ; rsqrtps %xmm8,%xmm0
- DB 15,82,192 ; rsqrtps %xmm0,%xmm0
- DB 15,82,192 ; rsqrtps %xmm0,%xmm0
- DB 68,15,82,200 ; rsqrtps %xmm0,%xmm9
- DB 65,15,82,193 ; rsqrtps %xmm9,%xmm0
- DB 68,15,82,208 ; rsqrtps %xmm0,%xmm10
- DB 69,15,89,192 ; mulps %xmm8,%xmm8
- DB 65,15,40,193 ; movaps %xmm9,%xmm0
- DB 15,89,192 ; mulps %xmm0,%xmm0
- DB 65,15,89,193 ; mulps %xmm9,%xmm0
- DB 65,15,89,192 ; mulps %xmm8,%xmm0
- DB 65,15,89,194 ; mulps %xmm10,%xmm0
- DB 69,15,87,210 ; xorps %xmm10,%xmm10
- DB 65,15,95,194 ; maxps %xmm10,%xmm0
- DB 68,15,82,193 ; rsqrtps %xmm1,%xmm8
- DB 69,15,82,192 ; rsqrtps %xmm8,%xmm8
- DB 69,15,82,192 ; rsqrtps %xmm8,%xmm8
- DB 69,15,82,200 ; rsqrtps %xmm8,%xmm9
- DB 69,15,82,193 ; rsqrtps %xmm9,%xmm8
- DB 69,15,82,216 ; rsqrtps %xmm8,%xmm11
- DB 15,89,201 ; mulps %xmm1,%xmm1
- DB 69,15,40,193 ; movaps %xmm9,%xmm8
- DB 69,15,89,192 ; mulps %xmm8,%xmm8
- DB 69,15,89,193 ; mulps %xmm9,%xmm8
- DB 68,15,89,193 ; mulps %xmm1,%xmm8
- DB 69,15,89,195 ; mulps %xmm11,%xmm8
- DB 69,15,95,194 ; maxps %xmm10,%xmm8
- DB 15,82,202 ; rsqrtps %xmm2,%xmm1
- DB 15,82,201 ; rsqrtps %xmm1,%xmm1
- DB 15,82,201 ; rsqrtps %xmm1,%xmm1
- DB 68,15,82,217 ; rsqrtps %xmm1,%xmm11
- DB 65,15,82,203 ; rsqrtps %xmm11,%xmm1
- DB 15,82,201 ; rsqrtps %xmm1,%xmm1
- DB 15,89,210 ; mulps %xmm2,%xmm2
- DB 69,15,40,203 ; movaps %xmm11,%xmm9
- DB 69,15,89,201 ; mulps %xmm9,%xmm9
- DB 69,15,89,203 ; mulps %xmm11,%xmm9
- DB 68,15,89,202 ; mulps %xmm2,%xmm9
- DB 68,15,89,201 ; mulps %xmm1,%xmm9
- DB 69,15,95,202 ; maxps %xmm10,%xmm9
+ DB 72,129,236,152,0,0,0 ; sub $0x98,%rsp
+ DB 15,41,188,36,128,0,0,0 ; movaps %xmm7,0x80(%rsp)
+ DB 15,41,116,36,112 ; movaps %xmm6,0x70(%rsp)
+ DB 15,41,108,36,96 ; movaps %xmm5,0x60(%rsp)
+ DB 15,41,100,36,80 ; movaps %xmm4,0x50(%rsp)
+ DB 15,41,92,36,64 ; movaps %xmm3,0x40(%rsp)
+ DB 15,41,84,36,48 ; movaps %xmm2,0x30(%rsp)
+ DB 15,40,208 ; movaps %xmm0,%xmm2
+ DB 184,205,204,12,64 ; mov $0x400ccccd,%eax
+ DB 15,91,194 ; cvtdq2ps %xmm2,%xmm0
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 15,89,195 ; mulps %xmm3,%xmm0
+ DB 68,15,40,219 ; movaps %xmm3,%xmm11
+ DB 68,15,41,92,36,16 ; movaps %xmm11,0x10(%rsp)
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 102,68,15,112,195,0 ; pshufd $0x0,%xmm3,%xmm8
+ DB 65,15,84,208 ; andps %xmm8,%xmm2
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 102,15,112,219,0 ; pshufd $0x0,%xmm3,%xmm3
+ DB 102,15,127,92,36,32 ; movdqa %xmm3,0x20(%rsp)
+ DB 15,86,211 ; orps %xmm3,%xmm2
+ DB 185,119,115,248,66 ; mov $0x42f87377,%ecx
+ DB 102,15,110,233 ; movd %ecx,%xmm5
+ DB 15,198,237,0 ; shufps $0x0,%xmm5,%xmm5
+ DB 15,92,197 ; subps %xmm5,%xmm0
+ DB 15,41,44,36 ; movaps %xmm5,(%rsp)
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 15,40,218 ; movaps %xmm2,%xmm3
+ DB 65,15,89,220 ; mulps %xmm12,%xmm3
+ DB 15,92,195 ; subps %xmm3,%xmm0
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 65,15,88,214 ; addps %xmm14,%xmm2
+ DB 65,15,40,221 ; movaps %xmm13,%xmm3
+ DB 15,94,218 ; divps %xmm2,%xmm3
+ DB 15,92,195 ; subps %xmm3,%xmm0
+ DB 102,68,15,110,248 ; movd %eax,%xmm15
+ DB 69,15,198,255,0 ; shufps $0x0,%xmm15,%xmm15
+ DB 65,15,89,199 ; mulps %xmm15,%xmm0
+ DB 243,15,91,208 ; cvttps2dq %xmm0,%xmm2
+ DB 15,91,210 ; cvtdq2ps %xmm2,%xmm2
+ DB 15,40,216 ; movaps %xmm0,%xmm3
+ DB 15,194,218,1 ; cmpltps %xmm2,%xmm3
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 102,68,15,110,208 ; movd %eax,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 65,15,84,218 ; andps %xmm10,%xmm3
+ DB 15,92,211 ; subps %xmm3,%xmm2
+ DB 15,40,224 ; movaps %xmm0,%xmm4
+ DB 15,92,226 ; subps %xmm2,%xmm4
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,201 ; movd %ecx,%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 65,15,88,193 ; addps %xmm9,%xmm0
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,15,110,249 ; movd %ecx,%xmm7
+ DB 15,198,255,0 ; shufps $0x0,%xmm7,%xmm7
+ DB 15,40,215 ; movaps %xmm7,%xmm2
+ DB 15,89,212 ; mulps %xmm4,%xmm2
+ DB 15,92,194 ; subps %xmm2,%xmm0
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 184,248,245,154,64 ; mov $0x409af5f8,%eax
+ DB 102,15,110,240 ; movd %eax,%xmm6
+ DB 15,198,246,0 ; shufps $0x0,%xmm6,%xmm6
+ DB 15,40,222 ; movaps %xmm6,%xmm3
+ DB 15,92,220 ; subps %xmm4,%xmm3
+ DB 102,15,110,209 ; movd %ecx,%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 15,40,226 ; movaps %xmm2,%xmm4
+ DB 15,94,227 ; divps %xmm3,%xmm4
+ DB 15,88,224 ; addps %xmm0,%xmm4
+ DB 15,91,193 ; cvtdq2ps %xmm1,%xmm0
+ DB 65,15,89,195 ; mulps %xmm11,%xmm0
+ DB 65,15,84,200 ; andps %xmm8,%xmm1
+ DB 68,15,40,92,36,32 ; movaps 0x20(%rsp),%xmm11
+ DB 65,15,86,203 ; orps %xmm11,%xmm1
+ DB 15,92,197 ; subps %xmm5,%xmm0
+ DB 15,40,217 ; movaps %xmm1,%xmm3
+ DB 65,15,89,220 ; mulps %xmm12,%xmm3
+ DB 15,92,195 ; subps %xmm3,%xmm0
+ DB 65,15,88,206 ; addps %xmm14,%xmm1
+ DB 65,15,40,221 ; movaps %xmm13,%xmm3
+ DB 15,94,217 ; divps %xmm1,%xmm3
+ DB 15,92,195 ; subps %xmm3,%xmm0
+ DB 65,15,89,199 ; mulps %xmm15,%xmm0
+ DB 243,15,91,200 ; cvttps2dq %xmm0,%xmm1
+ DB 15,91,201 ; cvtdq2ps %xmm1,%xmm1
+ DB 15,40,216 ; movaps %xmm0,%xmm3
+ DB 15,194,217,1 ; cmpltps %xmm1,%xmm3
+ DB 65,15,84,218 ; andps %xmm10,%xmm3
+ DB 15,92,203 ; subps %xmm3,%xmm1
+ DB 15,40,216 ; movaps %xmm0,%xmm3
+ DB 15,92,217 ; subps %xmm1,%xmm3
+ DB 65,15,88,193 ; addps %xmm9,%xmm0
+ DB 15,40,207 ; movaps %xmm7,%xmm1
+ DB 15,89,203 ; mulps %xmm3,%xmm1
+ DB 15,92,193 ; subps %xmm1,%xmm0
+ DB 15,40,238 ; movaps %xmm6,%xmm5
+ DB 15,92,235 ; subps %xmm3,%xmm5
+ DB 15,40,202 ; movaps %xmm2,%xmm1
+ DB 15,94,205 ; divps %xmm5,%xmm1
+ DB 15,88,200 ; addps %xmm0,%xmm1
+ DB 15,40,92,36,48 ; movaps 0x30(%rsp),%xmm3
+ DB 15,91,195 ; cvtdq2ps %xmm3,%xmm0
+ DB 15,89,68,36,16 ; mulps 0x10(%rsp),%xmm0
+ DB 68,15,84,195 ; andps %xmm3,%xmm8
+ DB 69,15,86,195 ; orps %xmm11,%xmm8
+ DB 15,92,4,36 ; subps (%rsp),%xmm0
+ DB 69,15,89,224 ; mulps %xmm8,%xmm12
+ DB 65,15,92,196 ; subps %xmm12,%xmm0
+ DB 69,15,88,198 ; addps %xmm14,%xmm8
+ DB 69,15,94,232 ; divps %xmm8,%xmm13
+ DB 65,15,92,197 ; subps %xmm13,%xmm0
+ DB 65,15,89,199 ; mulps %xmm15,%xmm0
+ DB 243,15,91,216 ; cvttps2dq %xmm0,%xmm3
+ DB 15,91,219 ; cvtdq2ps %xmm3,%xmm3
+ DB 15,40,232 ; movaps %xmm0,%xmm5
+ DB 15,194,235,1 ; cmpltps %xmm3,%xmm5
+ DB 65,15,84,234 ; andps %xmm10,%xmm5
+ DB 15,92,221 ; subps %xmm5,%xmm3
+ DB 15,40,232 ; movaps %xmm0,%xmm5
+ DB 15,92,235 ; subps %xmm3,%xmm5
+ DB 65,15,88,193 ; addps %xmm9,%xmm0
+ DB 15,89,253 ; mulps %xmm5,%xmm7
+ DB 15,92,199 ; subps %xmm7,%xmm0
+ DB 15,92,245 ; subps %xmm5,%xmm6
+ DB 15,94,214 ; divps %xmm6,%xmm2
+ DB 15,88,208 ; addps %xmm0,%xmm2
+ DB 102,65,15,110,192 ; movd %r8d,%xmm0
+ DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0
+ DB 15,89,224 ; mulps %xmm0,%xmm4
+ DB 15,89,200 ; mulps %xmm0,%xmm1
+ DB 15,89,208 ; mulps %xmm0,%xmm2
+ DB 102,15,91,220 ; cvtps2dq %xmm4,%xmm3
+ DB 102,15,91,201 ; cvtps2dq %xmm1,%xmm1
+ DB 102,15,91,210 ; cvtps2dq %xmm2,%xmm2
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 65,15,40,200 ; movaps %xmm8,%xmm1
- DB 65,15,40,209 ; movaps %xmm9,%xmm2
+ DB 102,15,40,195 ; movapd %xmm3,%xmm0
+ DB 15,40,92,36,64 ; movaps 0x40(%rsp),%xmm3
+ DB 15,40,100,36,80 ; movaps 0x50(%rsp),%xmm4
+ DB 15,40,108,36,96 ; movaps 0x60(%rsp),%xmm5
+ DB 15,40,116,36,112 ; movaps 0x70(%rsp),%xmm6
+ DB 15,40,188,36,128,0,0,0 ; movaps 0x80(%rsp),%xmm7
+ DB 72,129,196,152,0,0,0 ; add $0x98,%rsp
DB 255,224 ; jmpq *%rax
PUBLIC _sk_to_2dot2_sse2
_sk_to_2dot2_sse2 LABEL PROC
- DB 68,15,82,192 ; rsqrtps %xmm0,%xmm8
- DB 65,15,82,192 ; rsqrtps %xmm8,%xmm0
- DB 15,82,192 ; rsqrtps %xmm0,%xmm0
- DB 15,82,192 ; rsqrtps %xmm0,%xmm0
- DB 15,82,192 ; rsqrtps %xmm0,%xmm0
- DB 68,15,82,200 ; rsqrtps %xmm0,%xmm9
- DB 69,15,83,192 ; rcpps %xmm8,%xmm8
- DB 68,15,89,192 ; mulps %xmm0,%xmm8
- DB 65,15,83,193 ; rcpps %xmm9,%xmm0
- DB 65,15,89,192 ; mulps %xmm8,%xmm0
- DB 69,15,87,192 ; xorps %xmm8,%xmm8
- DB 65,15,95,192 ; maxps %xmm8,%xmm0
- DB 68,15,82,201 ; rsqrtps %xmm1,%xmm9
- DB 65,15,82,201 ; rsqrtps %xmm9,%xmm1
- DB 15,82,201 ; rsqrtps %xmm1,%xmm1
- DB 15,82,201 ; rsqrtps %xmm1,%xmm1
- DB 15,82,201 ; rsqrtps %xmm1,%xmm1
- DB 68,15,82,209 ; rsqrtps %xmm1,%xmm10
- DB 69,15,83,201 ; rcpps %xmm9,%xmm9
- DB 68,15,89,201 ; mulps %xmm1,%xmm9
- DB 65,15,83,202 ; rcpps %xmm10,%xmm1
- DB 65,15,89,201 ; mulps %xmm9,%xmm1
- DB 65,15,95,200 ; maxps %xmm8,%xmm1
- DB 68,15,82,202 ; rsqrtps %xmm2,%xmm9
- DB 65,15,82,209 ; rsqrtps %xmm9,%xmm2
- DB 15,82,210 ; rsqrtps %xmm2,%xmm2
- DB 15,82,210 ; rsqrtps %xmm2,%xmm2
- DB 15,82,210 ; rsqrtps %xmm2,%xmm2
- DB 68,15,82,210 ; rsqrtps %xmm2,%xmm10
- DB 69,15,83,201 ; rcpps %xmm9,%xmm9
- DB 68,15,89,202 ; mulps %xmm2,%xmm9
- DB 65,15,83,210 ; rcpps %xmm10,%xmm2
- DB 65,15,89,209 ; mulps %xmm9,%xmm2
- DB 65,15,95,208 ; maxps %xmm8,%xmm2
+ DB 72,129,236,152,0,0,0 ; sub $0x98,%rsp
+ DB 15,41,188,36,128,0,0,0 ; movaps %xmm7,0x80(%rsp)
+ DB 15,41,116,36,112 ; movaps %xmm6,0x70(%rsp)
+ DB 15,41,108,36,96 ; movaps %xmm5,0x60(%rsp)
+ DB 15,41,100,36,80 ; movaps %xmm4,0x50(%rsp)
+ DB 15,41,92,36,64 ; movaps %xmm3,0x40(%rsp)
+ DB 15,41,84,36,48 ; movaps %xmm2,0x30(%rsp)
+ DB 15,40,208 ; movaps %xmm0,%xmm2
+ DB 184,46,186,232,62 ; mov $0x3ee8ba2e,%eax
+ DB 15,91,194 ; cvtdq2ps %xmm2,%xmm0
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 15,89,195 ; mulps %xmm3,%xmm0
+ DB 68,15,40,219 ; movaps %xmm3,%xmm11
+ DB 68,15,41,92,36,16 ; movaps %xmm11,0x10(%rsp)
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 102,68,15,112,195,0 ; pshufd $0x0,%xmm3,%xmm8
+ DB 65,15,84,208 ; andps %xmm8,%xmm2
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 102,15,112,219,0 ; pshufd $0x0,%xmm3,%xmm3
+ DB 102,15,127,92,36,32 ; movdqa %xmm3,0x20(%rsp)
+ DB 15,86,211 ; orps %xmm3,%xmm2
+ DB 185,119,115,248,66 ; mov $0x42f87377,%ecx
+ DB 102,15,110,233 ; movd %ecx,%xmm5
+ DB 15,198,237,0 ; shufps $0x0,%xmm5,%xmm5
+ DB 15,92,197 ; subps %xmm5,%xmm0
+ DB 15,41,44,36 ; movaps %xmm5,(%rsp)
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 15,40,218 ; movaps %xmm2,%xmm3
+ DB 65,15,89,220 ; mulps %xmm12,%xmm3
+ DB 15,92,195 ; subps %xmm3,%xmm0
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 65,15,88,214 ; addps %xmm14,%xmm2
+ DB 65,15,40,221 ; movaps %xmm13,%xmm3
+ DB 15,94,218 ; divps %xmm2,%xmm3
+ DB 15,92,195 ; subps %xmm3,%xmm0
+ DB 102,68,15,110,248 ; movd %eax,%xmm15
+ DB 69,15,198,255,0 ; shufps $0x0,%xmm15,%xmm15
+ DB 65,15,89,199 ; mulps %xmm15,%xmm0
+ DB 243,15,91,208 ; cvttps2dq %xmm0,%xmm2
+ DB 15,91,210 ; cvtdq2ps %xmm2,%xmm2
+ DB 15,40,216 ; movaps %xmm0,%xmm3
+ DB 15,194,218,1 ; cmpltps %xmm2,%xmm3
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 102,68,15,110,208 ; movd %eax,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 65,15,84,218 ; andps %xmm10,%xmm3
+ DB 15,92,211 ; subps %xmm3,%xmm2
+ DB 15,40,224 ; movaps %xmm0,%xmm4
+ DB 15,92,226 ; subps %xmm2,%xmm4
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,201 ; movd %ecx,%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 65,15,88,193 ; addps %xmm9,%xmm0
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,15,110,249 ; movd %ecx,%xmm7
+ DB 15,198,255,0 ; shufps $0x0,%xmm7,%xmm7
+ DB 15,40,215 ; movaps %xmm7,%xmm2
+ DB 15,89,212 ; mulps %xmm4,%xmm2
+ DB 15,92,194 ; subps %xmm2,%xmm0
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 184,248,245,154,64 ; mov $0x409af5f8,%eax
+ DB 102,15,110,240 ; movd %eax,%xmm6
+ DB 15,198,246,0 ; shufps $0x0,%xmm6,%xmm6
+ DB 15,40,222 ; movaps %xmm6,%xmm3
+ DB 15,92,220 ; subps %xmm4,%xmm3
+ DB 102,15,110,209 ; movd %ecx,%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 15,40,226 ; movaps %xmm2,%xmm4
+ DB 15,94,227 ; divps %xmm3,%xmm4
+ DB 15,88,224 ; addps %xmm0,%xmm4
+ DB 15,91,193 ; cvtdq2ps %xmm1,%xmm0
+ DB 65,15,89,195 ; mulps %xmm11,%xmm0
+ DB 65,15,84,200 ; andps %xmm8,%xmm1
+ DB 68,15,40,92,36,32 ; movaps 0x20(%rsp),%xmm11
+ DB 65,15,86,203 ; orps %xmm11,%xmm1
+ DB 15,92,197 ; subps %xmm5,%xmm0
+ DB 15,40,217 ; movaps %xmm1,%xmm3
+ DB 65,15,89,220 ; mulps %xmm12,%xmm3
+ DB 15,92,195 ; subps %xmm3,%xmm0
+ DB 65,15,88,206 ; addps %xmm14,%xmm1
+ DB 65,15,40,221 ; movaps %xmm13,%xmm3
+ DB 15,94,217 ; divps %xmm1,%xmm3
+ DB 15,92,195 ; subps %xmm3,%xmm0
+ DB 65,15,89,199 ; mulps %xmm15,%xmm0
+ DB 243,15,91,200 ; cvttps2dq %xmm0,%xmm1
+ DB 15,91,201 ; cvtdq2ps %xmm1,%xmm1
+ DB 15,40,216 ; movaps %xmm0,%xmm3
+ DB 15,194,217,1 ; cmpltps %xmm1,%xmm3
+ DB 65,15,84,218 ; andps %xmm10,%xmm3
+ DB 15,92,203 ; subps %xmm3,%xmm1
+ DB 15,40,216 ; movaps %xmm0,%xmm3
+ DB 15,92,217 ; subps %xmm1,%xmm3
+ DB 65,15,88,193 ; addps %xmm9,%xmm0
+ DB 15,40,207 ; movaps %xmm7,%xmm1
+ DB 15,89,203 ; mulps %xmm3,%xmm1
+ DB 15,92,193 ; subps %xmm1,%xmm0
+ DB 15,40,238 ; movaps %xmm6,%xmm5
+ DB 15,92,235 ; subps %xmm3,%xmm5
+ DB 15,40,202 ; movaps %xmm2,%xmm1
+ DB 15,94,205 ; divps %xmm5,%xmm1
+ DB 15,88,200 ; addps %xmm0,%xmm1
+ DB 15,40,92,36,48 ; movaps 0x30(%rsp),%xmm3
+ DB 15,91,195 ; cvtdq2ps %xmm3,%xmm0
+ DB 15,89,68,36,16 ; mulps 0x10(%rsp),%xmm0
+ DB 68,15,84,195 ; andps %xmm3,%xmm8
+ DB 69,15,86,195 ; orps %xmm11,%xmm8
+ DB 15,92,4,36 ; subps (%rsp),%xmm0
+ DB 69,15,89,224 ; mulps %xmm8,%xmm12
+ DB 65,15,92,196 ; subps %xmm12,%xmm0
+ DB 69,15,88,198 ; addps %xmm14,%xmm8
+ DB 69,15,94,232 ; divps %xmm8,%xmm13
+ DB 65,15,92,197 ; subps %xmm13,%xmm0
+ DB 65,15,89,199 ; mulps %xmm15,%xmm0
+ DB 243,15,91,216 ; cvttps2dq %xmm0,%xmm3
+ DB 15,91,219 ; cvtdq2ps %xmm3,%xmm3
+ DB 15,40,232 ; movaps %xmm0,%xmm5
+ DB 15,194,235,1 ; cmpltps %xmm3,%xmm5
+ DB 65,15,84,234 ; andps %xmm10,%xmm5
+ DB 15,92,221 ; subps %xmm5,%xmm3
+ DB 15,40,232 ; movaps %xmm0,%xmm5
+ DB 15,92,235 ; subps %xmm3,%xmm5
+ DB 65,15,88,193 ; addps %xmm9,%xmm0
+ DB 15,89,253 ; mulps %xmm5,%xmm7
+ DB 15,92,199 ; subps %xmm7,%xmm0
+ DB 15,92,245 ; subps %xmm5,%xmm6
+ DB 15,94,214 ; divps %xmm6,%xmm2
+ DB 15,88,208 ; addps %xmm0,%xmm2
+ DB 102,65,15,110,192 ; movd %r8d,%xmm0
+ DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0
+ DB 15,89,224 ; mulps %xmm0,%xmm4
+ DB 15,89,200 ; mulps %xmm0,%xmm1
+ DB 15,89,208 ; mulps %xmm0,%xmm2
+ DB 102,15,91,220 ; cvtps2dq %xmm4,%xmm3
+ DB 102,15,91,201 ; cvtps2dq %xmm1,%xmm1
+ DB 102,15,91,210 ; cvtps2dq %xmm2,%xmm2
DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 102,15,40,195 ; movapd %xmm3,%xmm0
+ DB 15,40,92,36,64 ; movaps 0x40(%rsp),%xmm3
+ DB 15,40,100,36,80 ; movaps 0x50(%rsp),%xmm4
+ DB 15,40,108,36,96 ; movaps 0x60(%rsp),%xmm5
+ DB 15,40,116,36,112 ; movaps 0x70(%rsp),%xmm6
+ DB 15,40,188,36,128,0,0,0 ; movaps 0x80(%rsp),%xmm7
+ DB 72,129,196,152,0,0,0 ; add $0x98,%rsp
DB 255,224 ; jmpq *%rax
PUBLIC _sk_rgb_to_hsl_sse2
@@ -17388,9 +18142,9 @@ _sk_gather_i8_sse2 LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 73,137,192 ; mov %rax,%r8
DB 77,133,192 ; test %r8,%r8
- DB 116,5 ; je 2aa7 <_sk_gather_i8_sse2+0xf>
+ DB 116,5 ; je 2e63 <_sk_gather_i8_sse2+0xf>
DB 76,137,192 ; mov %r8,%rax
- DB 235,2 ; jmp 2aa9 <_sk_gather_i8_sse2+0x11>
+ DB 235,2 ; jmp 2e65 <_sk_gather_i8_sse2+0x11>
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 243,15,91,201 ; cvttps2dq %xmm1,%xmm1
@@ -18672,7 +19426,7 @@ _sk_linear_gradient_sse2 LABEL PROC
DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
DB 72,139,8 ; mov (%rax),%rcx
DB 72,133,201 ; test %rcx,%rcx
- DB 15,132,15,1,0,0 ; je 4060 <_sk_linear_gradient_sse2+0x149>
+ DB 15,132,15,1,0,0 ; je 441c <_sk_linear_gradient_sse2+0x149>
DB 72,139,64,8 ; mov 0x8(%rax),%rax
DB 72,131,192,32 ; add $0x20,%rax
DB 69,15,87,192 ; xorps %xmm8,%xmm8
@@ -18733,8 +19487,8 @@ _sk_linear_gradient_sse2 LABEL PROC
DB 69,15,86,231 ; orps %xmm15,%xmm12
DB 72,131,192,36 ; add $0x24,%rax
DB 72,255,201 ; dec %rcx
- DB 15,133,8,255,255,255 ; jne 3f66 <_sk_linear_gradient_sse2+0x4f>
- DB 235,13 ; jmp 406d <_sk_linear_gradient_sse2+0x156>
+ DB 15,133,8,255,255,255 ; jne 4322 <_sk_linear_gradient_sse2+0x4f>
+ DB 235,13 ; jmp 4429 <_sk_linear_gradient_sse2+0x156>
DB 15,87,201 ; xorps %xmm1,%xmm1
DB 15,87,210 ; xorps %xmm2,%xmm2
DB 15,87,219 ; xorps %xmm3,%xmm3
diff --git a/src/jumper/SkJumper_stages.cpp b/src/jumper/SkJumper_stages.cpp
index ea0e56fde6..26e26676c6 100644
--- a/src/jumper/SkJumper_stages.cpp
+++ b/src/jumper/SkJumper_stages.cpp
@@ -480,32 +480,14 @@ STAGE(to_srgb) {
}
STAGE(from_2dot2) {
- auto fn = [](F x) {
- // x^(141/64) = x^(2.20312) is a great approximation of the true value, x^(2.2).
- // (note: x^(35/16) = x^(2.1875) is an okay one as well and would be quicker)
- F x16 = rsqrt(rsqrt(rsqrt(rsqrt(x)))), // x^(1/16) = x^(4/64);
- x64 = rsqrt(rsqrt(x16)); // x^(1/64)
-
- // 141/64 = 128/64 + 12/64 + 1/64
- return max((x*x) * (x16*x16*x16) * x64, 0);
- };
- r = fn(r);
- g = fn(g);
- b = fn(b);
+ r = approx_powf(r, C(2.2f));
+ g = approx_powf(g, C(2.2f));
+ b = approx_powf(b, C(2.2f));
}
STAGE(to_2dot2) {
- auto fn = [](F x) {
- // x^(29/64) is a very good approximation of the true value, x^(1/2.2).
- F x2 = rsqrt(x), // x^(-1/2)
- x32 = rsqrt(rsqrt(rsqrt(rsqrt(x2)))), // x^(-1/32)
- x64 = rsqrt(x32); // x^(+1/64)
-
- // 29/64 = 32/64 - 2/64 - 1/64
- return max(rcp(x2) * x32 * rcp(x64), 0);
- };
- r = fn(r);
- g = fn(g);
- b = fn(b);
+ r = approx_powf(r, C(1/2.2f));
+ g = approx_powf(g, C(1/2.2f));
+ b = approx_powf(b, C(1/2.2f));
}
STAGE(rgb_to_hsl) {
diff --git a/tests/ParametricStageTest.cpp b/tests/ParametricStageTest.cpp
index 5810148454..0185abcb36 100644
--- a/tests/ParametricStageTest.cpp
+++ b/tests/ParametricStageTest.cpp
@@ -73,3 +73,36 @@ DEF_TEST(Parametric_inv_1dot8, r) { check_error(r, 1/510.0f, 1/1.8f); }
DEF_TEST(Parametric_inv_2dot0, r) { check_error(r, 1/510.0f, 1/2.0f); }
DEF_TEST(Parametric_inv_2dot2, r) { check_error(r, 1/510.0f, 1/2.2f); }
DEF_TEST(Parametric_inv_2dot4, r) { check_error(r, 1/510.0f, 1/2.4f); }
+
+// As above, checking that the stage implements gamma within limit.
+static void check_error(skiatest::Reporter* r, float limit,
+ float gamma, SkRasterPipeline::StockStage stage) {
+
+ // We expect the gamma will only be applied to R,G,B, leaving A alone.
+ // So this isn't quite exhaustive, but it's pretty good.
+ float in[256], out[256];
+ for (int i = 0; i < 256; i++) {
+ in [i] = i / 255.0f;
+ out[i] = 0.0f; // Not likely important. Just being tidy.
+ }
+
+ const float* ip = in;
+ float* op = out;
+
+ SkRasterPipeline p;
+ p.append(SkRasterPipeline::load_f32, &ip);
+ p.append(stage);
+ p.append(SkRasterPipeline::store_f32, &op);
+ p.run(0, 256/4);
+
+ for (int i = 0; i < 256; i++) {
+ float want = powf(i/255.0f, (i%4) == 3 ? 1.0f
+ : gamma);
+ float err = fabsf(out[i] - want);
+ if (err > limit) {
+ ERRORF(r, "At %d, error was %g (got %g, want %g)", i, err, out[i], want);
+ }
+ }
+}
+DEF_TEST(from_2dot2, r) { check_error(r, 1/510.f, 2.2f, SkRasterPipeline::from_2dot2); }
+DEF_TEST( to_2dot2, r) { check_error(r, 1/510.f, 1/2.2f,SkRasterPipeline:: to_2dot2); }