aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-04-17 19:32:05 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-04-19 17:43:58 +0000
commit44375176c06f00682518a03d4983554ca8fb5b6a (patch)
tree553fc077c2060173f3022e9831978f23704bd2b6
parent8f2911f8407b5e151768690ed40ecedde6cd7ad8 (diff)
jumper, parametric_{r,g,b,a}
I've tried a couple of ideas for approx_powf(): 1) accumulate integer powers of x, then 4th roots, then 16th roots 2) continue 1) all the way to 256th roots 3) decompose into pow2 and log2, exploiting IEEE float layout 4) slightly tune constants used in 3) 5) accumulate integer powers of x, then 3+4) with different tuning 6) follow a source online, basically 5 with finesse 7) a new source quoting and improving on the method in 6). 7) seems perfect, enough that maybe we can explore improving its speed at cost of precision. Might be nice to get rid of those divides. If we allow a small tolerance (2-5) in our tests, we could use the very simple fast forms from 3) (e.g. PS 5). I wish I had some images to look at! Anything involving roots seems to be subverted by poor rsqrt precision. This change of course affects the pipelines created by the tests for exponential and full parametric gamma curves. What's less obvious is that it also means SkJumper can now for the first time run the pipeline created by the mixed gamma curves test. This means we now need to relax our tolerance for the table-based channel, just like we did when implementing table_{r,g,b,a}. This took me an embarassingly long time to figure out. *face palm* Change-Id: I451ee3c970a0a4a4e285f8aa8f6ef709a654d247 Reviewed-on: https://skia-review.googlesource.com/13656 Commit-Queue: Mike Klein <mtklein@chromium.org> Reviewed-by: Matt Sarett <msarett@google.com> Reviewed-by: Herb Derby <herb@google.com>
-rw-r--r--src/jumper/SkJumper.cpp1
-rw-r--r--src/jumper/SkJumper.h5
-rw-r--r--src/jumper/SkJumper_generated.S2680
-rw-r--r--src/jumper/SkJumper_generated_win.S1942
-rw-r--r--src/jumper/SkJumper_stages.cpp35
-rw-r--r--src/jumper/SkJumper_vectors.h2
-rw-r--r--tests/ColorSpaceXformTest.cpp5
7 files changed, 4245 insertions, 425 deletions
diff --git a/src/jumper/SkJumper.cpp b/src/jumper/SkJumper.cpp
index 7c72e85711..89fda93206 100644
--- a/src/jumper/SkJumper.cpp
+++ b/src/jumper/SkJumper.cpp
@@ -92,6 +92,7 @@ static K kConstants = {
M(byte_tables) \
M(byte_tables_rgb) \
M(table_r) M(table_g) M(table_b) M(table_a) \
+ M(parametric_r) M(parametric_g) M(parametric_b) M(parametric_a) \
M(load_a8) \
M(gather_a8) \
M(store_a8) \
diff --git a/src/jumper/SkJumper.h b/src/jumper/SkJumper.h
index 2f0db4e819..d4ab9684a4 100644
--- a/src/jumper/SkJumper.h
+++ b/src/jumper/SkJumper.h
@@ -96,4 +96,9 @@ struct SkJumper_TableCtx {
int size;
};
+// This should line up with the memory layout of SkColorSpaceTransferFn.
+struct SkJumper_ParametricTransferFunction {
+ float G, A,B,C,D,E,F;
+};
+
#endif//SkJumper_DEFINED
diff --git a/src/jumper/SkJumper_generated.S b/src/jumper/SkJumper_generated.S
index 8ac3441ee4..f12e5e252d 100644
--- a/src/jumper/SkJumper_generated.S
+++ b/src/jumper/SkJumper_generated.S
@@ -1897,6 +1897,318 @@ _sk_table_a_aarch64:
.long 0x6e1c0623 // mov v3.s[3], v17.s[0]
.long 0xd61f0060 // br x3
+HIDDEN _sk_parametric_r_aarch64
+.globl _sk_parametric_r_aarch64
+FUNCTION(_sk_parametric_r_aarch64)
+_sk_parametric_r_aarch64:
+ .long 0xf9400028 // ldr x8, [x1]
+ .long 0x4f016696 // movi v22.4s, #0x34, lsl #24
+ .long 0x91004109 // add x9, x8, #0x10
+ .long 0x9100610a // add x10, x8, #0x18
+ .long 0x4d40c933 // ld1r {v19.4s}, [x9]
+ .long 0xaa0803e9 // mov x9, x8
+ .long 0xbd400d12 // ldr s18, [x8, #12]
+ .long 0x4d40c950 // ld1r {v16.4s}, [x10]
+ .long 0x4ddfc931 // ld1r {v17.4s}, [x9], #4
+ .long 0x9100210a // add x10, x8, #0x8
+ .long 0x4d40c954 // ld1r {v20.4s}, [x10]
+ .long 0x4f921010 // fmla v16.4s, v0.4s, v18.s[0]
+ .long 0xbd400135 // ldr s21, [x9]
+ .long 0x52b85fc9 // mov w9, #0xc2fe0000
+ .long 0x4e040d37 // dup v23.4s, w9
+ .long 0x52a80629 // mov w9, #0x40310000
+ .long 0x72922549 // movk w9, #0x912a
+ .long 0x4f951014 // fmla v20.4s, v0.4s, v21.s[0]
+ .long 0x6e20e660 // fcmge v0.4s, v19.4s, v0.4s
+ .long 0x4e040d33 // dup v19.4s, w9
+ .long 0x52a7f7e9 // mov w9, #0x3fbf0000
+ .long 0x4f03d7f2 // movi v18.4s, #0x7f, msl #16
+ .long 0x7297eea9 // movk w9, #0xbf75
+ .long 0x4e21da95 // scvtf v21.4s, v20.4s
+ .long 0x4e321e92 // and v18.16b, v20.16b, v18.16b
+ .long 0x4e040d34 // dup v20.4s, w9
+ .long 0x52a7d689 // mov w9, #0x3eb40000
+ .long 0x72889f29 // movk w9, #0x44f9
+ .long 0x4e35ced7 // fmla v23.4s, v22.4s, v21.4s
+ .long 0x4e040d35 // dup v21.4s, w9
+ .long 0x52a7fb89 // mov w9, #0x3fdc0000
+ .long 0x4e33d6f3 // fadd v19.4s, v23.4s, v19.4s
+ .long 0x729d3469 // movk w9, #0xe9a3
+ .long 0x4f0177f2 // orr v18.4s, #0x3f, lsl #24
+ .long 0x4eb4ce53 // fmls v19.4s, v18.4s, v20.4s
+ .long 0x4e040d34 // dup v20.4s, w9
+ .long 0x52a85e49 // mov w9, #0x42f20000
+ .long 0x72918a29 // movk w9, #0x8c51
+ .long 0x4e35d652 // fadd v18.4s, v18.4s, v21.4s
+ .long 0x4e040d35 // dup v21.4s, w9
+ .long 0x52a7f7c9 // mov w9, #0x3fbe0000
+ .long 0x729791a9 // movk w9, #0xbc8d
+ .long 0x6e32fe92 // fdiv v18.4s, v20.4s, v18.4s
+ .long 0x4e040d34 // dup v20.4s, w9
+ .long 0x52a81349 // mov w9, #0x409a0000
+ .long 0x4eb2d672 // fsub v18.4s, v19.4s, v18.4s
+ .long 0x729ebf09 // movk w9, #0xf5f8
+ .long 0x6e32de31 // fmul v17.4s, v17.4s, v18.4s
+ .long 0x4e040d33 // dup v19.4s, w9
+ .long 0x52a83ba9 // mov w9, #0x41dd0000
+ .long 0x4e219a32 // frintm v18.4s, v17.4s
+ .long 0x729a5fc9 // movk w9, #0xd2fe
+ .long 0x4e35d635 // fadd v21.4s, v17.4s, v21.4s
+ .long 0x4eb2d631 // fsub v17.4s, v17.4s, v18.4s
+ .long 0x4eb4ce35 // fmls v21.4s, v17.4s, v20.4s
+ .long 0x4eb1d671 // fsub v17.4s, v19.4s, v17.4s
+ .long 0x4e040d33 // dup v19.4s, w9
+ .long 0x91005108 // add x8, x8, #0x14
+ .long 0x6e31fe71 // fdiv v17.4s, v19.4s, v17.4s
+ .long 0x4e31d6b1 // fadd v17.4s, v21.4s, v17.4s
+ .long 0x4d40c915 // ld1r {v21.4s}, [x8]
+ .long 0x4f026572 // movi v18.4s, #0x4b, lsl #24
+ .long 0x6e32de31 // fmul v17.4s, v17.4s, v18.4s
+ .long 0x6e21aa31 // fcvtnu v17.4s, v17.4s
+ .long 0xf9400423 // ldr x3, [x1, #8]
+ .long 0x4e35d631 // fadd v17.4s, v17.4s, v21.4s
+ .long 0x6f00e414 // movi v20.2d, #0x0
+ .long 0x6e711e00 // bsl v0.16b, v16.16b, v17.16b
+ .long 0x4f03f613 // fmov v19.4s, #1.000000000000000000e+00
+ .long 0x4e34f400 // fmax v0.4s, v0.4s, v20.4s
+ .long 0x4eb3f400 // fmin v0.4s, v0.4s, v19.4s
+ .long 0x91004021 // add x1, x1, #0x10
+ .long 0xd61f0060 // br x3
+
+HIDDEN _sk_parametric_g_aarch64
+.globl _sk_parametric_g_aarch64
+FUNCTION(_sk_parametric_g_aarch64)
+_sk_parametric_g_aarch64:
+ .long 0xf9400028 // ldr x8, [x1]
+ .long 0x4f016696 // movi v22.4s, #0x34, lsl #24
+ .long 0x91004109 // add x9, x8, #0x10
+ .long 0x9100610a // add x10, x8, #0x18
+ .long 0x4d40c933 // ld1r {v19.4s}, [x9]
+ .long 0xaa0803e9 // mov x9, x8
+ .long 0xbd400d12 // ldr s18, [x8, #12]
+ .long 0x4d40c950 // ld1r {v16.4s}, [x10]
+ .long 0x4ddfc931 // ld1r {v17.4s}, [x9], #4
+ .long 0x9100210a // add x10, x8, #0x8
+ .long 0x4d40c954 // ld1r {v20.4s}, [x10]
+ .long 0x4f921030 // fmla v16.4s, v1.4s, v18.s[0]
+ .long 0xbd400135 // ldr s21, [x9]
+ .long 0x52b85fc9 // mov w9, #0xc2fe0000
+ .long 0x4e040d37 // dup v23.4s, w9
+ .long 0x52a80629 // mov w9, #0x40310000
+ .long 0x72922549 // movk w9, #0x912a
+ .long 0x4f951034 // fmla v20.4s, v1.4s, v21.s[0]
+ .long 0x6e21e661 // fcmge v1.4s, v19.4s, v1.4s
+ .long 0x4e040d33 // dup v19.4s, w9
+ .long 0x52a7f7e9 // mov w9, #0x3fbf0000
+ .long 0x4f03d7f2 // movi v18.4s, #0x7f, msl #16
+ .long 0x7297eea9 // movk w9, #0xbf75
+ .long 0x4e21da95 // scvtf v21.4s, v20.4s
+ .long 0x4e321e92 // and v18.16b, v20.16b, v18.16b
+ .long 0x4e040d34 // dup v20.4s, w9
+ .long 0x52a7d689 // mov w9, #0x3eb40000
+ .long 0x72889f29 // movk w9, #0x44f9
+ .long 0x4e35ced7 // fmla v23.4s, v22.4s, v21.4s
+ .long 0x4e040d35 // dup v21.4s, w9
+ .long 0x52a7fb89 // mov w9, #0x3fdc0000
+ .long 0x4e33d6f3 // fadd v19.4s, v23.4s, v19.4s
+ .long 0x729d3469 // movk w9, #0xe9a3
+ .long 0x4f0177f2 // orr v18.4s, #0x3f, lsl #24
+ .long 0x4eb4ce53 // fmls v19.4s, v18.4s, v20.4s
+ .long 0x4e040d34 // dup v20.4s, w9
+ .long 0x52a85e49 // mov w9, #0x42f20000
+ .long 0x72918a29 // movk w9, #0x8c51
+ .long 0x4e35d652 // fadd v18.4s, v18.4s, v21.4s
+ .long 0x4e040d35 // dup v21.4s, w9
+ .long 0x52a7f7c9 // mov w9, #0x3fbe0000
+ .long 0x729791a9 // movk w9, #0xbc8d
+ .long 0x6e32fe92 // fdiv v18.4s, v20.4s, v18.4s
+ .long 0x4e040d34 // dup v20.4s, w9
+ .long 0x52a81349 // mov w9, #0x409a0000
+ .long 0x4eb2d672 // fsub v18.4s, v19.4s, v18.4s
+ .long 0x729ebf09 // movk w9, #0xf5f8
+ .long 0x6e32de31 // fmul v17.4s, v17.4s, v18.4s
+ .long 0x4e040d33 // dup v19.4s, w9
+ .long 0x52a83ba9 // mov w9, #0x41dd0000
+ .long 0x4e219a32 // frintm v18.4s, v17.4s
+ .long 0x729a5fc9 // movk w9, #0xd2fe
+ .long 0x4e35d635 // fadd v21.4s, v17.4s, v21.4s
+ .long 0x4eb2d631 // fsub v17.4s, v17.4s, v18.4s
+ .long 0x4eb4ce35 // fmls v21.4s, v17.4s, v20.4s
+ .long 0x4eb1d671 // fsub v17.4s, v19.4s, v17.4s
+ .long 0x4e040d33 // dup v19.4s, w9
+ .long 0x91005108 // add x8, x8, #0x14
+ .long 0x6e31fe71 // fdiv v17.4s, v19.4s, v17.4s
+ .long 0x4e31d6b1 // fadd v17.4s, v21.4s, v17.4s
+ .long 0x4d40c915 // ld1r {v21.4s}, [x8]
+ .long 0x4f026572 // movi v18.4s, #0x4b, lsl #24
+ .long 0x6e32de31 // fmul v17.4s, v17.4s, v18.4s
+ .long 0x6e21aa31 // fcvtnu v17.4s, v17.4s
+ .long 0xf9400423 // ldr x3, [x1, #8]
+ .long 0x4e35d631 // fadd v17.4s, v17.4s, v21.4s
+ .long 0x6f00e414 // movi v20.2d, #0x0
+ .long 0x6e711e01 // bsl v1.16b, v16.16b, v17.16b
+ .long 0x4f03f613 // fmov v19.4s, #1.000000000000000000e+00
+ .long 0x4e34f421 // fmax v1.4s, v1.4s, v20.4s
+ .long 0x4eb3f421 // fmin v1.4s, v1.4s, v19.4s
+ .long 0x91004021 // add x1, x1, #0x10
+ .long 0xd61f0060 // br x3
+
+HIDDEN _sk_parametric_b_aarch64
+.globl _sk_parametric_b_aarch64
+FUNCTION(_sk_parametric_b_aarch64)
+_sk_parametric_b_aarch64:
+ .long 0xf9400028 // ldr x8, [x1]
+ .long 0x4f016696 // movi v22.4s, #0x34, lsl #24
+ .long 0x91004109 // add x9, x8, #0x10
+ .long 0x9100610a // add x10, x8, #0x18
+ .long 0x4d40c933 // ld1r {v19.4s}, [x9]
+ .long 0xaa0803e9 // mov x9, x8
+ .long 0xbd400d12 // ldr s18, [x8, #12]
+ .long 0x4d40c950 // ld1r {v16.4s}, [x10]
+ .long 0x4ddfc931 // ld1r {v17.4s}, [x9], #4
+ .long 0x9100210a // add x10, x8, #0x8
+ .long 0x4d40c954 // ld1r {v20.4s}, [x10]
+ .long 0x4f921050 // fmla v16.4s, v2.4s, v18.s[0]
+ .long 0xbd400135 // ldr s21, [x9]
+ .long 0x52b85fc9 // mov w9, #0xc2fe0000
+ .long 0x4e040d37 // dup v23.4s, w9
+ .long 0x52a80629 // mov w9, #0x40310000
+ .long 0x72922549 // movk w9, #0x912a
+ .long 0x4f951054 // fmla v20.4s, v2.4s, v21.s[0]
+ .long 0x6e22e662 // fcmge v2.4s, v19.4s, v2.4s
+ .long 0x4e040d33 // dup v19.4s, w9
+ .long 0x52a7f7e9 // mov w9, #0x3fbf0000
+ .long 0x4f03d7f2 // movi v18.4s, #0x7f, msl #16
+ .long 0x7297eea9 // movk w9, #0xbf75
+ .long 0x4e21da95 // scvtf v21.4s, v20.4s
+ .long 0x4e321e92 // and v18.16b, v20.16b, v18.16b
+ .long 0x4e040d34 // dup v20.4s, w9
+ .long 0x52a7d689 // mov w9, #0x3eb40000
+ .long 0x72889f29 // movk w9, #0x44f9
+ .long 0x4e35ced7 // fmla v23.4s, v22.4s, v21.4s
+ .long 0x4e040d35 // dup v21.4s, w9
+ .long 0x52a7fb89 // mov w9, #0x3fdc0000
+ .long 0x4e33d6f3 // fadd v19.4s, v23.4s, v19.4s
+ .long 0x729d3469 // movk w9, #0xe9a3
+ .long 0x4f0177f2 // orr v18.4s, #0x3f, lsl #24
+ .long 0x4eb4ce53 // fmls v19.4s, v18.4s, v20.4s
+ .long 0x4e040d34 // dup v20.4s, w9
+ .long 0x52a85e49 // mov w9, #0x42f20000
+ .long 0x72918a29 // movk w9, #0x8c51
+ .long 0x4e35d652 // fadd v18.4s, v18.4s, v21.4s
+ .long 0x4e040d35 // dup v21.4s, w9
+ .long 0x52a7f7c9 // mov w9, #0x3fbe0000
+ .long 0x729791a9 // movk w9, #0xbc8d
+ .long 0x6e32fe92 // fdiv v18.4s, v20.4s, v18.4s
+ .long 0x4e040d34 // dup v20.4s, w9
+ .long 0x52a81349 // mov w9, #0x409a0000
+ .long 0x4eb2d672 // fsub v18.4s, v19.4s, v18.4s
+ .long 0x729ebf09 // movk w9, #0xf5f8
+ .long 0x6e32de31 // fmul v17.4s, v17.4s, v18.4s
+ .long 0x4e040d33 // dup v19.4s, w9
+ .long 0x52a83ba9 // mov w9, #0x41dd0000
+ .long 0x4e219a32 // frintm v18.4s, v17.4s
+ .long 0x729a5fc9 // movk w9, #0xd2fe
+ .long 0x4e35d635 // fadd v21.4s, v17.4s, v21.4s
+ .long 0x4eb2d631 // fsub v17.4s, v17.4s, v18.4s
+ .long 0x4eb4ce35 // fmls v21.4s, v17.4s, v20.4s
+ .long 0x4eb1d671 // fsub v17.4s, v19.4s, v17.4s
+ .long 0x4e040d33 // dup v19.4s, w9
+ .long 0x91005108 // add x8, x8, #0x14
+ .long 0x6e31fe71 // fdiv v17.4s, v19.4s, v17.4s
+ .long 0x4e31d6b1 // fadd v17.4s, v21.4s, v17.4s
+ .long 0x4d40c915 // ld1r {v21.4s}, [x8]
+ .long 0x4f026572 // movi v18.4s, #0x4b, lsl #24
+ .long 0x6e32de31 // fmul v17.4s, v17.4s, v18.4s
+ .long 0x6e21aa31 // fcvtnu v17.4s, v17.4s
+ .long 0xf9400423 // ldr x3, [x1, #8]
+ .long 0x4e35d631 // fadd v17.4s, v17.4s, v21.4s
+ .long 0x6f00e414 // movi v20.2d, #0x0
+ .long 0x6e711e02 // bsl v2.16b, v16.16b, v17.16b
+ .long 0x4f03f613 // fmov v19.4s, #1.000000000000000000e+00
+ .long 0x4e34f442 // fmax v2.4s, v2.4s, v20.4s
+ .long 0x4eb3f442 // fmin v2.4s, v2.4s, v19.4s
+ .long 0x91004021 // add x1, x1, #0x10
+ .long 0xd61f0060 // br x3
+
+HIDDEN _sk_parametric_a_aarch64
+.globl _sk_parametric_a_aarch64
+FUNCTION(_sk_parametric_a_aarch64)
+_sk_parametric_a_aarch64:
+ .long 0xf9400028 // ldr x8, [x1]
+ .long 0x4f016696 // movi v22.4s, #0x34, lsl #24
+ .long 0x91004109 // add x9, x8, #0x10
+ .long 0x9100610a // add x10, x8, #0x18
+ .long 0x4d40c933 // ld1r {v19.4s}, [x9]
+ .long 0xaa0803e9 // mov x9, x8
+ .long 0xbd400d12 // ldr s18, [x8, #12]
+ .long 0x4d40c950 // ld1r {v16.4s}, [x10]
+ .long 0x4ddfc931 // ld1r {v17.4s}, [x9], #4
+ .long 0x9100210a // add x10, x8, #0x8
+ .long 0x4d40c954 // ld1r {v20.4s}, [x10]
+ .long 0x4f921070 // fmla v16.4s, v3.4s, v18.s[0]
+ .long 0xbd400135 // ldr s21, [x9]
+ .long 0x52b85fc9 // mov w9, #0xc2fe0000
+ .long 0x4e040d37 // dup v23.4s, w9
+ .long 0x52a80629 // mov w9, #0x40310000
+ .long 0x72922549 // movk w9, #0x912a
+ .long 0x4f951074 // fmla v20.4s, v3.4s, v21.s[0]
+ .long 0x6e23e663 // fcmge v3.4s, v19.4s, v3.4s
+ .long 0x4e040d33 // dup v19.4s, w9
+ .long 0x52a7f7e9 // mov w9, #0x3fbf0000
+ .long 0x4f03d7f2 // movi v18.4s, #0x7f, msl #16
+ .long 0x7297eea9 // movk w9, #0xbf75
+ .long 0x4e21da95 // scvtf v21.4s, v20.4s
+ .long 0x4e321e92 // and v18.16b, v20.16b, v18.16b
+ .long 0x4e040d34 // dup v20.4s, w9
+ .long 0x52a7d689 // mov w9, #0x3eb40000
+ .long 0x72889f29 // movk w9, #0x44f9
+ .long 0x4e35ced7 // fmla v23.4s, v22.4s, v21.4s
+ .long 0x4e040d35 // dup v21.4s, w9
+ .long 0x52a7fb89 // mov w9, #0x3fdc0000
+ .long 0x4e33d6f3 // fadd v19.4s, v23.4s, v19.4s
+ .long 0x729d3469 // movk w9, #0xe9a3
+ .long 0x4f0177f2 // orr v18.4s, #0x3f, lsl #24
+ .long 0x4eb4ce53 // fmls v19.4s, v18.4s, v20.4s
+ .long 0x4e040d34 // dup v20.4s, w9
+ .long 0x52a85e49 // mov w9, #0x42f20000
+ .long 0x72918a29 // movk w9, #0x8c51
+ .long 0x4e35d652 // fadd v18.4s, v18.4s, v21.4s
+ .long 0x4e040d35 // dup v21.4s, w9
+ .long 0x52a7f7c9 // mov w9, #0x3fbe0000
+ .long 0x729791a9 // movk w9, #0xbc8d
+ .long 0x6e32fe92 // fdiv v18.4s, v20.4s, v18.4s
+ .long 0x4e040d34 // dup v20.4s, w9
+ .long 0x52a81349 // mov w9, #0x409a0000
+ .long 0x4eb2d672 // fsub v18.4s, v19.4s, v18.4s
+ .long 0x729ebf09 // movk w9, #0xf5f8
+ .long 0x6e32de31 // fmul v17.4s, v17.4s, v18.4s
+ .long 0x4e040d33 // dup v19.4s, w9
+ .long 0x52a83ba9 // mov w9, #0x41dd0000
+ .long 0x4e219a32 // frintm v18.4s, v17.4s
+ .long 0x729a5fc9 // movk w9, #0xd2fe
+ .long 0x4e35d635 // fadd v21.4s, v17.4s, v21.4s
+ .long 0x4eb2d631 // fsub v17.4s, v17.4s, v18.4s
+ .long 0x4eb4ce35 // fmls v21.4s, v17.4s, v20.4s
+ .long 0x4eb1d671 // fsub v17.4s, v19.4s, v17.4s
+ .long 0x4e040d33 // dup v19.4s, w9
+ .long 0x91005108 // add x8, x8, #0x14
+ .long 0x6e31fe71 // fdiv v17.4s, v19.4s, v17.4s
+ .long 0x4e31d6b1 // fadd v17.4s, v21.4s, v17.4s
+ .long 0x4d40c915 // ld1r {v21.4s}, [x8]
+ .long 0x4f026572 // movi v18.4s, #0x4b, lsl #24
+ .long 0x6e32de31 // fmul v17.4s, v17.4s, v18.4s
+ .long 0x6e21aa31 // fcvtnu v17.4s, v17.4s
+ .long 0xf9400423 // ldr x3, [x1, #8]
+ .long 0x4e35d631 // fadd v17.4s, v17.4s, v21.4s
+ .long 0x6f00e414 // movi v20.2d, #0x0
+ .long 0x6e711e03 // bsl v3.16b, v16.16b, v17.16b
+ .long 0x4f03f613 // fmov v19.4s, #1.000000000000000000e+00
+ .long 0x4e34f463 // fmax v3.4s, v3.4s, v20.4s
+ .long 0x4eb3f463 // fmin v3.4s, v3.4s, v19.4s
+ .long 0x91004021 // add x1, x1, #0x10
+ .long 0xd61f0060 // br x3
+
HIDDEN _sk_load_a8_aarch64
.globl _sk_load_a8_aarch64
FUNCTION(_sk_load_a8_aarch64)
@@ -2049,9 +2361,9 @@ FUNCTION(_sk_gather_i8_aarch64)
_sk_gather_i8_aarch64:
.long 0xaa0103e8 // mov x8, x1
.long 0xf8408429 // ldr x9, [x1], #8
- .long 0xb4000069 // cbz x9, 1ae0 <sk_gather_i8_aarch64+0x14>
+ .long 0xb4000069 // cbz x9, 1f70 <sk_gather_i8_aarch64+0x14>
.long 0xaa0903ea // mov x10, x9
- .long 0x14000003 // b 1ae8 <sk_gather_i8_aarch64+0x1c>
+ .long 0x14000003 // b 1f78 <sk_gather_i8_aarch64+0x1c>
.long 0xf940050a // ldr x10, [x8, #8]
.long 0x91004101 // add x1, x8, #0x10
.long 0xf8410548 // ldr x8, [x10], #16
@@ -2900,7 +3212,7 @@ _sk_linear_gradient_aarch64:
.long 0x4d40c902 // ld1r {v2.4s}, [x8]
.long 0xf9400128 // ldr x8, [x9]
.long 0x4d40c943 // ld1r {v3.4s}, [x10]
- .long 0xb40006c8 // cbz x8, 26b4 <sk_linear_gradient_aarch64+0x100>
+ .long 0xb40006c8 // cbz x8, 2b44 <sk_linear_gradient_aarch64+0x100>
.long 0x6dbf23e9 // stp d9, d8, [sp, #-16]!
.long 0xf9400529 // ldr x9, [x9, #8]
.long 0x6f00e413 // movi v19.2d, #0x0
@@ -2951,9 +3263,9 @@ _sk_linear_gradient_aarch64:
.long 0xd1000508 // sub x8, x8, #0x1
.long 0x6e771fd0 // bsl v16.16b, v30.16b, v23.16b
.long 0x91009129 // add x9, x9, #0x24
- .long 0xb5fffaa8 // cbnz x8, 25fc <sk_linear_gradient_aarch64+0x48>
+ .long 0xb5fffaa8 // cbnz x8, 2a8c <sk_linear_gradient_aarch64+0x48>
.long 0x6cc123e9 // ldp d9, d8, [sp], #16
- .long 0x14000005 // b 26c4 <sk_linear_gradient_aarch64+0x110>
+ .long 0x14000005 // b 2b54 <sk_linear_gradient_aarch64+0x110>
.long 0x6f00e414 // movi v20.2d, #0x0
.long 0x6f00e412 // movi v18.2d, #0x0
.long 0x6f00e411 // movi v17.2d, #0x0
@@ -5234,6 +5546,386 @@ _sk_table_a_vfp4:
.long 0xe8bd4010 // pop {r4, lr}
.long 0xe12fff1c // bx ip
+HIDDEN _sk_parametric_r_vfp4
+.globl _sk_parametric_r_vfp4
+FUNCTION(_sk_parametric_r_vfp4)
+_sk_parametric_r_vfp4:
+ .long 0xe92d4800 // push {fp, lr}
+ .long 0xed2d8b06 // vpush {d8-d10}
+ .long 0xe591e000 // ldr lr, [r1]
+ .long 0xeddf3b43 // vldr d19, [pc, #268]
+ .long 0xed9f8a52 // vldr s16, [pc, #328]
+ .long 0xe1a0300e // mov r3, lr
+ .long 0xeddf4b46 // vldr d20, [pc, #280]
+ .long 0xf4e30c9d // vld1.32 {d16[]}, [r3 :32]!
+ .long 0xe591c004 // ldr ip, [r1, #4]
+ .long 0xe2811008 // add r1, r1, #8
+ .long 0xf4e31c9f // vld1.32 {d17[]}, [r3 :32]
+ .long 0xe28e3008 // add r3, lr, #8
+ .long 0xf4e32c9f // vld1.32 {d18[]}, [r3 :32]
+ .long 0xe28e300c // add r3, lr, #12
+ .long 0xf2412c90 // vfma.f32 d18, d17, d0
+ .long 0xf2c71d1f // vmov.i32 d17, #8388607
+ .long 0xf24211b1 // vand d17, d18, d17
+ .long 0xf2c3171f // vorr.i32 d17, #1056964608
+ .long 0xf3fb2622 // vcvt.f32.s32 d18, d18
+ .long 0xf2019da3 // vadd.f32 d9, d17, d19
+ .long 0xf2c33614 // vmov.i32 d19, #872415232
+ .long 0xf3422db3 // vmul.f32 d18, d18, d19
+ .long 0xeddf3b32 // vldr d19, [pc, #200]
+ .long 0xeec8aa29 // vdiv.f32 s21, s16, s19
+ .long 0xee88aa09 // vdiv.f32 s20, s16, s18
+ .long 0xf2422da3 // vadd.f32 d18, d18, d19
+ .long 0xeddf3b30 // vldr d19, [pc, #192]
+ .long 0xed9f8a3c // vldr s16, [pc, #240]
+ .long 0xf3411db3 // vmul.f32 d17, d17, d19
+ .long 0xf2c03010 // vmov.i32 d19, #0
+ .long 0xf2422da4 // vadd.f32 d18, d18, d20
+ .long 0xeddf4b2f // vldr d20, [pc, #188]
+ .long 0xf2621da1 // vsub.f32 d17, d18, d17
+ .long 0xf2611d8a // vsub.f32 d17, d17, d10
+ .long 0xf3400db1 // vmul.f32 d16, d16, d17
+ .long 0xf3fb1720 // vcvt.s32.f32 d17, d16
+ .long 0xf3fb1621 // vcvt.f32.s32 d17, d17
+ .long 0xf3612ea0 // vcgt.f32 d18, d17, d16
+ .long 0xf35421b3 // vbsl d18, d20, d19
+ .long 0xeddf4b2d // vldr d20, [pc, #180]
+ .long 0xf2611da2 // vsub.f32 d17, d17, d18
+ .long 0xeddf2b27 // vldr d18, [pc, #156]
+ .long 0xf2601da1 // vsub.f32 d17, d16, d17
+ .long 0xf2400da4 // vadd.f32 d16, d16, d20
+ .long 0xf2229da1 // vsub.f32 d9, d18, d17
+ .long 0xeddf2b25 // vldr d18, [pc, #148]
+ .long 0xf3411db2 // vmul.f32 d17, d17, d18
+ .long 0xf2c3261f // vmov.i32 d18, #1056964608
+ .long 0xeec8aa29 // vdiv.f32 s21, s16, s19
+ .long 0xee88aa09 // vdiv.f32 s20, s16, s18
+ .long 0xf2600da1 // vsub.f32 d16, d16, d17
+ .long 0xf2c4161b // vmov.i32 d17, #1258291200
+ .long 0xf2400d8a // vadd.f32 d16, d16, d10
+ .long 0xf2402cb1 // vfma.f32 d18, d16, d17
+ .long 0xf4e30c9f // vld1.32 {d16[]}, [r3 :32]
+ .long 0xe28e3018 // add r3, lr, #24
+ .long 0xf4e31c9f // vld1.32 {d17[]}, [r3 :32]
+ .long 0xe28e3010 // add r3, lr, #16
+ .long 0xf2401c90 // vfma.f32 d17, d16, d0
+ .long 0xf4e30c9f // vld1.32 {d16[]}, [r3 :32]
+ .long 0xe28e3014 // add r3, lr, #20
+ .long 0xf3400e80 // vcge.f32 d16, d16, d0
+ .long 0xf4e34c9f // vld1.32 {d20[]}, [r3 :32]
+ .long 0xf3fb27a2 // vcvt.u32.f32 d18, d18
+ .long 0xf2442da2 // vadd.f32 d18, d20, d18
+ .long 0xf35101b2 // vbsl d16, d17, d18
+ .long 0xf2c71f10 // vmov.f32 d17, #1
+ .long 0xf2400fa3 // vmax.f32 d16, d16, d19
+ .long 0xf2200fa1 // vmin.f32 d0, d16, d17
+ .long 0xecbd8b06 // vpop {d8-d10}
+ .long 0xe8bd4800 // pop {fp, lr}
+ .long 0xe12fff1c // bx ip
+ .long 0x3eb444f9 // .word 0x3eb444f9
+ .long 0x3eb444f9 // .word 0x3eb444f9
+ .long 0xc2fe0000 // .word 0xc2fe0000
+ .long 0xc2fe0000 // .word 0xc2fe0000
+ .long 0x3fbfbf75 // .word 0x3fbfbf75
+ .long 0x3fbfbf75 // .word 0x3fbfbf75
+ .long 0x4031912a // .word 0x4031912a
+ .long 0x4031912a // .word 0x4031912a
+ .long 0x3f800000 // .word 0x3f800000
+ .long 0x3f800000 // .word 0x3f800000
+ .long 0x409af5f8 // .word 0x409af5f8
+ .long 0x409af5f8 // .word 0x409af5f8
+ .long 0x3fbebc8d // .word 0x3fbebc8d
+ .long 0x3fbebc8d // .word 0x3fbebc8d
+ .long 0x42f28c51 // .word 0x42f28c51
+ .long 0x42f28c51 // .word 0x42f28c51
+ .long 0x3fdce9a3 // .word 0x3fdce9a3
+ .long 0x41ddd2fe // .word 0x41ddd2fe
+
+HIDDEN _sk_parametric_g_vfp4
+.globl _sk_parametric_g_vfp4
+FUNCTION(_sk_parametric_g_vfp4)
+_sk_parametric_g_vfp4:
+ .long 0xe92d4800 // push {fp, lr}
+ .long 0xed2d8b06 // vpush {d8-d10}
+ .long 0xe591e000 // ldr lr, [r1]
+ .long 0xeddf3b43 // vldr d19, [pc, #268]
+ .long 0xed9f8a52 // vldr s16, [pc, #328]
+ .long 0xe1a0300e // mov r3, lr
+ .long 0xeddf4b46 // vldr d20, [pc, #280]
+ .long 0xf4e30c9d // vld1.32 {d16[]}, [r3 :32]!
+ .long 0xe591c004 // ldr ip, [r1, #4]
+ .long 0xe2811008 // add r1, r1, #8
+ .long 0xf4e31c9f // vld1.32 {d17[]}, [r3 :32]
+ .long 0xe28e3008 // add r3, lr, #8
+ .long 0xf4e32c9f // vld1.32 {d18[]}, [r3 :32]
+ .long 0xe28e300c // add r3, lr, #12
+ .long 0xf2412c91 // vfma.f32 d18, d17, d1
+ .long 0xf2c71d1f // vmov.i32 d17, #8388607
+ .long 0xf24211b1 // vand d17, d18, d17
+ .long 0xf2c3171f // vorr.i32 d17, #1056964608
+ .long 0xf3fb2622 // vcvt.f32.s32 d18, d18
+ .long 0xf2019da3 // vadd.f32 d9, d17, d19
+ .long 0xf2c33614 // vmov.i32 d19, #872415232
+ .long 0xf3422db3 // vmul.f32 d18, d18, d19
+ .long 0xeddf3b32 // vldr d19, [pc, #200]
+ .long 0xeec8aa29 // vdiv.f32 s21, s16, s19
+ .long 0xee88aa09 // vdiv.f32 s20, s16, s18
+ .long 0xf2422da3 // vadd.f32 d18, d18, d19
+ .long 0xeddf3b30 // vldr d19, [pc, #192]
+ .long 0xed9f8a3c // vldr s16, [pc, #240]
+ .long 0xf3411db3 // vmul.f32 d17, d17, d19
+ .long 0xf2c03010 // vmov.i32 d19, #0
+ .long 0xf2422da4 // vadd.f32 d18, d18, d20
+ .long 0xeddf4b2f // vldr d20, [pc, #188]
+ .long 0xf2621da1 // vsub.f32 d17, d18, d17
+ .long 0xf2611d8a // vsub.f32 d17, d17, d10
+ .long 0xf3400db1 // vmul.f32 d16, d16, d17
+ .long 0xf3fb1720 // vcvt.s32.f32 d17, d16
+ .long 0xf3fb1621 // vcvt.f32.s32 d17, d17
+ .long 0xf3612ea0 // vcgt.f32 d18, d17, d16
+ .long 0xf35421b3 // vbsl d18, d20, d19
+ .long 0xeddf4b2d // vldr d20, [pc, #180]
+ .long 0xf2611da2 // vsub.f32 d17, d17, d18
+ .long 0xeddf2b27 // vldr d18, [pc, #156]
+ .long 0xf2601da1 // vsub.f32 d17, d16, d17
+ .long 0xf2400da4 // vadd.f32 d16, d16, d20
+ .long 0xf2229da1 // vsub.f32 d9, d18, d17
+ .long 0xeddf2b25 // vldr d18, [pc, #148]
+ .long 0xf3411db2 // vmul.f32 d17, d17, d18
+ .long 0xf2c3261f // vmov.i32 d18, #1056964608
+ .long 0xeec8aa29 // vdiv.f32 s21, s16, s19
+ .long 0xee88aa09 // vdiv.f32 s20, s16, s18
+ .long 0xf2600da1 // vsub.f32 d16, d16, d17
+ .long 0xf2c4161b // vmov.i32 d17, #1258291200
+ .long 0xf2400d8a // vadd.f32 d16, d16, d10
+ .long 0xf2402cb1 // vfma.f32 d18, d16, d17
+ .long 0xf4e30c9f // vld1.32 {d16[]}, [r3 :32]
+ .long 0xe28e3018 // add r3, lr, #24
+ .long 0xf4e31c9f // vld1.32 {d17[]}, [r3 :32]
+ .long 0xe28e3010 // add r3, lr, #16
+ .long 0xf2401c91 // vfma.f32 d17, d16, d1
+ .long 0xf4e30c9f // vld1.32 {d16[]}, [r3 :32]
+ .long 0xe28e3014 // add r3, lr, #20
+ .long 0xf3400e81 // vcge.f32 d16, d16, d1
+ .long 0xf4e34c9f // vld1.32 {d20[]}, [r3 :32]
+ .long 0xf3fb27a2 // vcvt.u32.f32 d18, d18
+ .long 0xf2442da2 // vadd.f32 d18, d20, d18
+ .long 0xf35101b2 // vbsl d16, d17, d18
+ .long 0xf2c71f10 // vmov.f32 d17, #1
+ .long 0xf2400fa3 // vmax.f32 d16, d16, d19
+ .long 0xf2201fa1 // vmin.f32 d1, d16, d17
+ .long 0xecbd8b06 // vpop {d8-d10}
+ .long 0xe8bd4800 // pop {fp, lr}
+ .long 0xe12fff1c // bx ip
+ .long 0x3eb444f9 // .word 0x3eb444f9
+ .long 0x3eb444f9 // .word 0x3eb444f9
+ .long 0xc2fe0000 // .word 0xc2fe0000
+ .long 0xc2fe0000 // .word 0xc2fe0000
+ .long 0x3fbfbf75 // .word 0x3fbfbf75
+ .long 0x3fbfbf75 // .word 0x3fbfbf75
+ .long 0x4031912a // .word 0x4031912a
+ .long 0x4031912a // .word 0x4031912a
+ .long 0x3f800000 // .word 0x3f800000
+ .long 0x3f800000 // .word 0x3f800000
+ .long 0x409af5f8 // .word 0x409af5f8
+ .long 0x409af5f8 // .word 0x409af5f8
+ .long 0x3fbebc8d // .word 0x3fbebc8d
+ .long 0x3fbebc8d // .word 0x3fbebc8d
+ .long 0x42f28c51 // .word 0x42f28c51
+ .long 0x42f28c51 // .word 0x42f28c51
+ .long 0x3fdce9a3 // .word 0x3fdce9a3
+ .long 0x41ddd2fe // .word 0x41ddd2fe
+
+HIDDEN _sk_parametric_b_vfp4
+.globl _sk_parametric_b_vfp4
+FUNCTION(_sk_parametric_b_vfp4)
+_sk_parametric_b_vfp4:
+ .long 0xe92d4800 // push {fp, lr}
+ .long 0xed2d8b06 // vpush {d8-d10}
+ .long 0xe591e000 // ldr lr, [r1]
+ .long 0xeddf3b43 // vldr d19, [pc, #268]
+ .long 0xed9f8a52 // vldr s16, [pc, #328]
+ .long 0xe1a0300e // mov r3, lr
+ .long 0xeddf4b46 // vldr d20, [pc, #280]
+ .long 0xf4e30c9d // vld1.32 {d16[]}, [r3 :32]!
+ .long 0xe591c004 // ldr ip, [r1, #4]
+ .long 0xe2811008 // add r1, r1, #8
+ .long 0xf4e31c9f // vld1.32 {d17[]}, [r3 :32]
+ .long 0xe28e3008 // add r3, lr, #8
+ .long 0xf4e32c9f // vld1.32 {d18[]}, [r3 :32]
+ .long 0xe28e300c // add r3, lr, #12
+ .long 0xf2412c92 // vfma.f32 d18, d17, d2
+ .long 0xf2c71d1f // vmov.i32 d17, #8388607
+ .long 0xf24211b1 // vand d17, d18, d17
+ .long 0xf2c3171f // vorr.i32 d17, #1056964608
+ .long 0xf3fb2622 // vcvt.f32.s32 d18, d18
+ .long 0xf2019da3 // vadd.f32 d9, d17, d19
+ .long 0xf2c33614 // vmov.i32 d19, #872415232
+ .long 0xf3422db3 // vmul.f32 d18, d18, d19
+ .long 0xeddf3b32 // vldr d19, [pc, #200]
+ .long 0xeec8aa29 // vdiv.f32 s21, s16, s19
+ .long 0xee88aa09 // vdiv.f32 s20, s16, s18
+ .long 0xf2422da3 // vadd.f32 d18, d18, d19
+ .long 0xeddf3b30 // vldr d19, [pc, #192]
+ .long 0xed9f8a3c // vldr s16, [pc, #240]
+ .long 0xf3411db3 // vmul.f32 d17, d17, d19
+ .long 0xf2c03010 // vmov.i32 d19, #0
+ .long 0xf2422da4 // vadd.f32 d18, d18, d20
+ .long 0xeddf4b2f // vldr d20, [pc, #188]
+ .long 0xf2621da1 // vsub.f32 d17, d18, d17
+ .long 0xf2611d8a // vsub.f32 d17, d17, d10
+ .long 0xf3400db1 // vmul.f32 d16, d16, d17
+ .long 0xf3fb1720 // vcvt.s32.f32 d17, d16
+ .long 0xf3fb1621 // vcvt.f32.s32 d17, d17
+ .long 0xf3612ea0 // vcgt.f32 d18, d17, d16
+ .long 0xf35421b3 // vbsl d18, d20, d19
+ .long 0xeddf4b2d // vldr d20, [pc, #180]
+ .long 0xf2611da2 // vsub.f32 d17, d17, d18
+ .long 0xeddf2b27 // vldr d18, [pc, #156]
+ .long 0xf2601da1 // vsub.f32 d17, d16, d17
+ .long 0xf2400da4 // vadd.f32 d16, d16, d20
+ .long 0xf2229da1 // vsub.f32 d9, d18, d17
+ .long 0xeddf2b25 // vldr d18, [pc, #148]
+ .long 0xf3411db2 // vmul.f32 d17, d17, d18
+ .long 0xf2c3261f // vmov.i32 d18, #1056964608
+ .long 0xeec8aa29 // vdiv.f32 s21, s16, s19
+ .long 0xee88aa09 // vdiv.f32 s20, s16, s18
+ .long 0xf2600da1 // vsub.f32 d16, d16, d17
+ .long 0xf2c4161b // vmov.i32 d17, #1258291200
+ .long 0xf2400d8a // vadd.f32 d16, d16, d10
+ .long 0xf2402cb1 // vfma.f32 d18, d16, d17
+ .long 0xf4e30c9f // vld1.32 {d16[]}, [r3 :32]
+ .long 0xe28e3018 // add r3, lr, #24
+ .long 0xf4e31c9f // vld1.32 {d17[]}, [r3 :32]
+ .long 0xe28e3010 // add r3, lr, #16
+ .long 0xf2401c92 // vfma.f32 d17, d16, d2
+ .long 0xf4e30c9f // vld1.32 {d16[]}, [r3 :32]
+ .long 0xe28e3014 // add r3, lr, #20
+ .long 0xf3400e82 // vcge.f32 d16, d16, d2
+ .long 0xf4e34c9f // vld1.32 {d20[]}, [r3 :32]
+ .long 0xf3fb27a2 // vcvt.u32.f32 d18, d18
+ .long 0xf2442da2 // vadd.f32 d18, d20, d18
+ .long 0xf35101b2 // vbsl d16, d17, d18
+ .long 0xf2c71f10 // vmov.f32 d17, #1
+ .long 0xf2400fa3 // vmax.f32 d16, d16, d19
+ .long 0xf2202fa1 // vmin.f32 d2, d16, d17
+ .long 0xecbd8b06 // vpop {d8-d10}
+ .long 0xe8bd4800 // pop {fp, lr}
+ .long 0xe12fff1c // bx ip
+ .long 0x3eb444f9 // .word 0x3eb444f9
+ .long 0x3eb444f9 // .word 0x3eb444f9
+ .long 0xc2fe0000 // .word 0xc2fe0000
+ .long 0xc2fe0000 // .word 0xc2fe0000
+ .long 0x3fbfbf75 // .word 0x3fbfbf75
+ .long 0x3fbfbf75 // .word 0x3fbfbf75
+ .long 0x4031912a // .word 0x4031912a
+ .long 0x4031912a // .word 0x4031912a
+ .long 0x3f800000 // .word 0x3f800000
+ .long 0x3f800000 // .word 0x3f800000
+ .long 0x409af5f8 // .word 0x409af5f8
+ .long 0x409af5f8 // .word 0x409af5f8
+ .long 0x3fbebc8d // .word 0x3fbebc8d
+ .long 0x3fbebc8d // .word 0x3fbebc8d
+ .long 0x42f28c51 // .word 0x42f28c51
+ .long 0x42f28c51 // .word 0x42f28c51
+ .long 0x3fdce9a3 // .word 0x3fdce9a3
+ .long 0x41ddd2fe // .word 0x41ddd2fe
+
+HIDDEN _sk_parametric_a_vfp4
+.globl _sk_parametric_a_vfp4
+FUNCTION(_sk_parametric_a_vfp4)
+_sk_parametric_a_vfp4:
+ .long 0xe92d4800 // push {fp, lr}
+ .long 0xed2d8b06 // vpush {d8-d10}
+ .long 0xe591e000 // ldr lr, [r1]
+ .long 0xeddf3b43 // vldr d19, [pc, #268]
+ .long 0xed9f8a52 // vldr s16, [pc, #328]
+ .long 0xe1a0300e // mov r3, lr
+ .long 0xeddf4b46 // vldr d20, [pc, #280]
+ .long 0xf4e30c9d // vld1.32 {d16[]}, [r3 :32]!
+ .long 0xe591c004 // ldr ip, [r1, #4]
+ .long 0xe2811008 // add r1, r1, #8
+ .long 0xf4e31c9f // vld1.32 {d17[]}, [r3 :32]
+ .long 0xe28e3008 // add r3, lr, #8
+ .long 0xf4e32c9f // vld1.32 {d18[]}, [r3 :32]
+ .long 0xe28e300c // add r3, lr, #12
+ .long 0xf2412c93 // vfma.f32 d18, d17, d3
+ .long 0xf2c71d1f // vmov.i32 d17, #8388607
+ .long 0xf24211b1 // vand d17, d18, d17
+ .long 0xf2c3171f // vorr.i32 d17, #1056964608
+ .long 0xf3fb2622 // vcvt.f32.s32 d18, d18
+ .long 0xf2019da3 // vadd.f32 d9, d17, d19
+ .long 0xf2c33614 // vmov.i32 d19, #872415232
+ .long 0xf3422db3 // vmul.f32 d18, d18, d19
+ .long 0xeddf3b32 // vldr d19, [pc, #200]
+ .long 0xeec8aa29 // vdiv.f32 s21, s16, s19
+ .long 0xee88aa09 // vdiv.f32 s20, s16, s18
+ .long 0xf2422da3 // vadd.f32 d18, d18, d19
+ .long 0xeddf3b30 // vldr d19, [pc, #192]
+ .long 0xed9f8a3c // vldr s16, [pc, #240]
+ .long 0xf3411db3 // vmul.f32 d17, d17, d19
+ .long 0xf2c03010 // vmov.i32 d19, #0
+ .long 0xf2422da4 // vadd.f32 d18, d18, d20
+ .long 0xeddf4b2f // vldr d20, [pc, #188]
+ .long 0xf2621da1 // vsub.f32 d17, d18, d17
+ .long 0xf2611d8a // vsub.f32 d17, d17, d10
+ .long 0xf3400db1 // vmul.f32 d16, d16, d17
+ .long 0xf3fb1720 // vcvt.s32.f32 d17, d16
+ .long 0xf3fb1621 // vcvt.f32.s32 d17, d17
+ .long 0xf3612ea0 // vcgt.f32 d18, d17, d16
+ .long 0xf35421b3 // vbsl d18, d20, d19
+ .long 0xeddf4b2d // vldr d20, [pc, #180]
+ .long 0xf2611da2 // vsub.f32 d17, d17, d18
+ .long 0xeddf2b27 // vldr d18, [pc, #156]
+ .long 0xf2601da1 // vsub.f32 d17, d16, d17
+ .long 0xf2400da4 // vadd.f32 d16, d16, d20
+ .long 0xf2229da1 // vsub.f32 d9, d18, d17
+ .long 0xeddf2b25 // vldr d18, [pc, #148]
+ .long 0xf3411db2 // vmul.f32 d17, d17, d18
+ .long 0xf2c3261f // vmov.i32 d18, #1056964608
+ .long 0xeec8aa29 // vdiv.f32 s21, s16, s19
+ .long 0xee88aa09 // vdiv.f32 s20, s16, s18
+ .long 0xf2600da1 // vsub.f32 d16, d16, d17
+ .long 0xf2c4161b // vmov.i32 d17, #1258291200
+ .long 0xf2400d8a // vadd.f32 d16, d16, d10
+ .long 0xf2402cb1 // vfma.f32 d18, d16, d17
+ .long 0xf4e30c9f // vld1.32 {d16[]}, [r3 :32]
+ .long 0xe28e3018 // add r3, lr, #24
+ .long 0xf4e31c9f // vld1.32 {d17[]}, [r3 :32]
+ .long 0xe28e3010 // add r3, lr, #16
+ .long 0xf2401c93 // vfma.f32 d17, d16, d3
+ .long 0xf4e30c9f // vld1.32 {d16[]}, [r3 :32]
+ .long 0xe28e3014 // add r3, lr, #20
+ .long 0xf3400e83 // vcge.f32 d16, d16, d3
+ .long 0xf4e34c9f // vld1.32 {d20[]}, [r3 :32]
+ .long 0xf3fb27a2 // vcvt.u32.f32 d18, d18
+ .long 0xf2442da2 // vadd.f32 d18, d20, d18
+ .long 0xf35101b2 // vbsl d16, d17, d18
+ .long 0xf2c71f10 // vmov.f32 d17, #1
+ .long 0xf2400fa3 // vmax.f32 d16, d16, d19
+ .long 0xf2203fa1 // vmin.f32 d3, d16, d17
+ .long 0xecbd8b06 // vpop {d8-d10}
+ .long 0xe8bd4800 // pop {fp, lr}
+ .long 0xe12fff1c // bx ip
+ .long 0x3eb444f9 // .word 0x3eb444f9
+ .long 0x3eb444f9 // .word 0x3eb444f9
+ .long 0xc2fe0000 // .word 0xc2fe0000
+ .long 0xc2fe0000 // .word 0xc2fe0000
+ .long 0x3fbfbf75 // .word 0x3fbfbf75
+ .long 0x3fbfbf75 // .word 0x3fbfbf75
+ .long 0x4031912a // .word 0x4031912a
+ .long 0x4031912a // .word 0x4031912a
+ .long 0x3f800000 // .word 0x3f800000
+ .long 0x3f800000 // .word 0x3f800000
+ .long 0x409af5f8 // .word 0x409af5f8
+ .long 0x409af5f8 // .word 0x409af5f8
+ .long 0x3fbebc8d // .word 0x3fbebc8d
+ .long 0x3fbebc8d // .word 0x3fbebc8d
+ .long 0x42f28c51 // .word 0x42f28c51
+ .long 0x42f28c51 // .word 0x42f28c51
+ .long 0x3fdce9a3 // .word 0x3fdce9a3
+ .long 0x41ddd2fe // .word 0x41ddd2fe
+
HIDDEN _sk_load_a8_vfp4
.globl _sk_load_a8_vfp4
FUNCTION(_sk_load_a8_vfp4)
@@ -6414,7 +7106,7 @@ _sk_linear_gradient_vfp4:
.long 0xe494c00c // ldr ip, [r4], #12
.long 0xf4a41c9f // vld1.32 {d1[]}, [r4 :32]
.long 0xe35c0000 // cmp ip, #0
- .long 0x0a000036 // beq 2a18 <sk_linear_gradient_vfp4+0x110>
+ .long 0x0a000036 // beq 2fb8 <sk_linear_gradient_vfp4+0x110>
.long 0xe59e3004 // ldr r3, [lr, #4]
.long 0xf2c01010 // vmov.i32 d17, #0
.long 0xf2c07010 // vmov.i32 d23, #0
@@ -6464,12 +7156,12 @@ _sk_linear_gradient_vfp4:
.long 0xf26371b3 // vorr d23, d19, d19
.long 0xf26481b4 // vorr d24, d20, d20
.long 0xf26561b5 // vorr d22, d21, d21
- .long 0x1affffd3 // bne 2954 <sk_linear_gradient_vfp4+0x4c>
+ .long 0x1affffd3 // bne 2ef4 <sk_linear_gradient_vfp4+0x4c>
.long 0xf26c01bc // vorr d16, d28, d28
.long 0xf22b11bb // vorr d1, d27, d27
.long 0xf22a21ba // vorr d2, d26, d26
.long 0xf22931b9 // vorr d3, d25, d25
- .long 0xea000003 // b 2a28 <sk_linear_gradient_vfp4+0x120>
+ .long 0xea000003 // b 2fc8 <sk_linear_gradient_vfp4+0x120>
.long 0xf2c05010 // vmov.i32 d21, #0
.long 0xf2c04010 // vmov.i32 d20, #0
.long 0xf2c03010 // vmov.i32 d19, #0
@@ -8887,6 +9579,342 @@ _sk_table_a_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
+HIDDEN _sk_parametric_r_hsw
+.globl _sk_parametric_r_hsw
+FUNCTION(_sk_parametric_r_hsw)
+_sk_parametric_r_hsw:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 196,98,125,24,64,16 // vbroadcastss 0x10(%rax),%ymm8
+ .byte 196,65,124,194,192,2 // vcmpleps %ymm8,%ymm0,%ymm8
+ .byte 196,98,125,24,72,12 // vbroadcastss 0xc(%rax),%ymm9
+ .byte 196,98,125,24,80,24 // vbroadcastss 0x18(%rax),%ymm10
+ .byte 196,66,125,168,202 // vfmadd213ps %ymm10,%ymm0,%ymm9
+ .byte 196,98,125,24,80,4 // vbroadcastss 0x4(%rax),%ymm10
+ .byte 196,98,125,24,88,8 // vbroadcastss 0x8(%rax),%ymm11
+ .byte 196,66,125,168,211 // vfmadd213ps %ymm11,%ymm0,%ymm10
+ .byte 196,65,124,91,218 // vcvtdq2ps %ymm10,%ymm11
+ .byte 65,184,0,0,0,52 // mov $0x34000000,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,98,125,88,224 // vpbroadcastd %xmm0,%ymm12
+ .byte 65,184,0,0,254,66 // mov $0x42fe0000,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,98,125,88,232 // vpbroadcastd %xmm0,%ymm13
+ .byte 196,66,37,186,236 // vfmsub231ps %ymm12,%ymm11,%ymm13
+ .byte 65,184,255,255,127,0 // mov $0x7fffff,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,226,125,88,192 // vpbroadcastd %xmm0,%ymm0
+ .byte 196,65,125,219,210 // vpand %ymm10,%ymm0,%ymm10
+ .byte 65,184,0,0,0,63 // mov $0x3f000000,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,226,125,88,192 // vpbroadcastd %xmm0,%ymm0
+ .byte 197,45,235,208 // vpor %ymm0,%ymm10,%ymm10
+ .byte 65,184,42,145,49,64 // mov $0x4031912a,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,226,125,88,192 // vpbroadcastd %xmm0,%ymm0
+ .byte 197,20,88,216 // vaddps %ymm0,%ymm13,%ymm11
+ .byte 65,184,117,191,191,63 // mov $0x3fbfbf75,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,98,125,88,224 // vpbroadcastd %xmm0,%ymm12
+ .byte 196,66,45,172,227 // vfnmadd213ps %ymm11,%ymm10,%ymm12
+ .byte 65,184,163,233,220,63 // mov $0x3fdce9a3,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,98,125,88,216 // vpbroadcastd %xmm0,%ymm11
+ .byte 65,184,249,68,180,62 // mov $0x3eb444f9,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,226,125,88,192 // vpbroadcastd %xmm0,%ymm0
+ .byte 197,172,88,192 // vaddps %ymm0,%ymm10,%ymm0
+ .byte 197,164,94,192 // vdivps %ymm0,%ymm11,%ymm0
+ .byte 197,156,92,192 // vsubps %ymm0,%ymm12,%ymm0
+ .byte 196,98,125,24,16 // vbroadcastss (%rax),%ymm10
+ .byte 197,44,89,216 // vmulps %ymm0,%ymm10,%ymm11
+ .byte 196,67,125,8,211,1 // vroundps $0x1,%ymm11,%ymm10
+ .byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,98,125,88,224 // vpbroadcastd %xmm0,%ymm12
+ .byte 65,184,81,140,242,66 // mov $0x42f28c51,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,226,125,88,192 // vpbroadcastd %xmm0,%ymm0
+ .byte 196,65,124,88,219 // vaddps %ymm11,%ymm0,%ymm11
+ .byte 65,184,141,188,190,63 // mov $0x3fbebc8d,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,98,125,88,232 // vpbroadcastd %xmm0,%ymm13
+ .byte 196,66,45,172,235 // vfnmadd213ps %ymm11,%ymm10,%ymm13
+ .byte 65,184,254,210,221,65 // mov $0x41ddd2fe,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,98,125,88,216 // vpbroadcastd %xmm0,%ymm11
+ .byte 65,184,248,245,154,64 // mov $0x409af5f8,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,226,125,88,192 // vpbroadcastd %xmm0,%ymm0
+ .byte 196,193,124,92,194 // vsubps %ymm10,%ymm0,%ymm0
+ .byte 197,164,94,192 // vdivps %ymm0,%ymm11,%ymm0
+ .byte 197,148,88,192 // vaddps %ymm0,%ymm13,%ymm0
+ .byte 197,156,89,192 // vmulps %ymm0,%ymm12,%ymm0
+ .byte 197,253,91,192 // vcvtps2dq %ymm0,%ymm0
+ .byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
+ .byte 196,193,124,88,194 // vaddps %ymm10,%ymm0,%ymm0
+ .byte 196,195,125,74,193,128 // vblendvps %ymm8,%ymm9,%ymm0,%ymm0
+ .byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
+ .byte 196,65,124,95,192 // vmaxps %ymm8,%ymm0,%ymm8
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 197,249,110,192 // vmovd %eax,%xmm0
+ .byte 196,226,125,88,192 // vpbroadcastd %xmm0,%ymm0
+ .byte 197,188,93,192 // vminps %ymm0,%ymm8,%ymm0
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_parametric_g_hsw
+.globl _sk_parametric_g_hsw
+FUNCTION(_sk_parametric_g_hsw)
+_sk_parametric_g_hsw:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 196,98,125,24,64,16 // vbroadcastss 0x10(%rax),%ymm8
+ .byte 196,65,116,194,192,2 // vcmpleps %ymm8,%ymm1,%ymm8
+ .byte 196,98,125,24,72,12 // vbroadcastss 0xc(%rax),%ymm9
+ .byte 196,98,125,24,80,24 // vbroadcastss 0x18(%rax),%ymm10
+ .byte 196,66,117,168,202 // vfmadd213ps %ymm10,%ymm1,%ymm9
+ .byte 196,98,125,24,80,4 // vbroadcastss 0x4(%rax),%ymm10
+ .byte 196,98,125,24,88,8 // vbroadcastss 0x8(%rax),%ymm11
+ .byte 196,66,117,168,211 // vfmadd213ps %ymm11,%ymm1,%ymm10
+ .byte 196,65,124,91,218 // vcvtdq2ps %ymm10,%ymm11
+ .byte 65,184,0,0,0,52 // mov $0x34000000,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,98,125,88,225 // vpbroadcastd %xmm1,%ymm12
+ .byte 65,184,0,0,254,66 // mov $0x42fe0000,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,98,125,88,233 // vpbroadcastd %xmm1,%ymm13
+ .byte 196,66,37,186,236 // vfmsub231ps %ymm12,%ymm11,%ymm13
+ .byte 65,184,255,255,127,0 // mov $0x7fffff,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,226,125,88,201 // vpbroadcastd %xmm1,%ymm1
+ .byte 196,65,117,219,210 // vpand %ymm10,%ymm1,%ymm10
+ .byte 65,184,0,0,0,63 // mov $0x3f000000,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,226,125,88,201 // vpbroadcastd %xmm1,%ymm1
+ .byte 197,45,235,209 // vpor %ymm1,%ymm10,%ymm10
+ .byte 65,184,42,145,49,64 // mov $0x4031912a,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,226,125,88,201 // vpbroadcastd %xmm1,%ymm1
+ .byte 197,20,88,217 // vaddps %ymm1,%ymm13,%ymm11
+ .byte 65,184,117,191,191,63 // mov $0x3fbfbf75,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,98,125,88,225 // vpbroadcastd %xmm1,%ymm12
+ .byte 196,66,45,172,227 // vfnmadd213ps %ymm11,%ymm10,%ymm12
+ .byte 65,184,163,233,220,63 // mov $0x3fdce9a3,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,98,125,88,217 // vpbroadcastd %xmm1,%ymm11
+ .byte 65,184,249,68,180,62 // mov $0x3eb444f9,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,226,125,88,201 // vpbroadcastd %xmm1,%ymm1
+ .byte 197,172,88,201 // vaddps %ymm1,%ymm10,%ymm1
+ .byte 197,164,94,201 // vdivps %ymm1,%ymm11,%ymm1
+ .byte 197,156,92,201 // vsubps %ymm1,%ymm12,%ymm1
+ .byte 196,98,125,24,16 // vbroadcastss (%rax),%ymm10
+ .byte 197,44,89,217 // vmulps %ymm1,%ymm10,%ymm11
+ .byte 196,67,125,8,211,1 // vroundps $0x1,%ymm11,%ymm10
+ .byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,98,125,88,225 // vpbroadcastd %xmm1,%ymm12
+ .byte 65,184,81,140,242,66 // mov $0x42f28c51,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,226,125,88,201 // vpbroadcastd %xmm1,%ymm1
+ .byte 196,65,116,88,219 // vaddps %ymm11,%ymm1,%ymm11
+ .byte 65,184,141,188,190,63 // mov $0x3fbebc8d,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,98,125,88,233 // vpbroadcastd %xmm1,%ymm13
+ .byte 196,66,45,172,235 // vfnmadd213ps %ymm11,%ymm10,%ymm13
+ .byte 65,184,254,210,221,65 // mov $0x41ddd2fe,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,98,125,88,217 // vpbroadcastd %xmm1,%ymm11
+ .byte 65,184,248,245,154,64 // mov $0x409af5f8,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,226,125,88,201 // vpbroadcastd %xmm1,%ymm1
+ .byte 196,193,116,92,202 // vsubps %ymm10,%ymm1,%ymm1
+ .byte 197,164,94,201 // vdivps %ymm1,%ymm11,%ymm1
+ .byte 197,148,88,201 // vaddps %ymm1,%ymm13,%ymm1
+ .byte 197,156,89,201 // vmulps %ymm1,%ymm12,%ymm1
+ .byte 197,253,91,201 // vcvtps2dq %ymm1,%ymm1
+ .byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
+ .byte 196,193,116,88,202 // vaddps %ymm10,%ymm1,%ymm1
+ .byte 196,195,117,74,201,128 // vblendvps %ymm8,%ymm9,%ymm1,%ymm1
+ .byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
+ .byte 196,65,116,95,192 // vmaxps %ymm8,%ymm1,%ymm8
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 197,249,110,200 // vmovd %eax,%xmm1
+ .byte 196,226,125,88,201 // vpbroadcastd %xmm1,%ymm1
+ .byte 197,188,93,201 // vminps %ymm1,%ymm8,%ymm1
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_parametric_b_hsw
+.globl _sk_parametric_b_hsw
+FUNCTION(_sk_parametric_b_hsw)
+_sk_parametric_b_hsw:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 196,98,125,24,64,16 // vbroadcastss 0x10(%rax),%ymm8
+ .byte 196,65,108,194,192,2 // vcmpleps %ymm8,%ymm2,%ymm8
+ .byte 196,98,125,24,72,12 // vbroadcastss 0xc(%rax),%ymm9
+ .byte 196,98,125,24,80,24 // vbroadcastss 0x18(%rax),%ymm10
+ .byte 196,66,109,168,202 // vfmadd213ps %ymm10,%ymm2,%ymm9
+ .byte 196,98,125,24,80,4 // vbroadcastss 0x4(%rax),%ymm10
+ .byte 196,98,125,24,88,8 // vbroadcastss 0x8(%rax),%ymm11
+ .byte 196,66,109,168,211 // vfmadd213ps %ymm11,%ymm2,%ymm10
+ .byte 196,65,124,91,218 // vcvtdq2ps %ymm10,%ymm11
+ .byte 65,184,0,0,0,52 // mov $0x34000000,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,98,125,88,226 // vpbroadcastd %xmm2,%ymm12
+ .byte 65,184,0,0,254,66 // mov $0x42fe0000,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,98,125,88,234 // vpbroadcastd %xmm2,%ymm13
+ .byte 196,66,37,186,236 // vfmsub231ps %ymm12,%ymm11,%ymm13
+ .byte 65,184,255,255,127,0 // mov $0x7fffff,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,226,125,88,210 // vpbroadcastd %xmm2,%ymm2
+ .byte 196,65,109,219,210 // vpand %ymm10,%ymm2,%ymm10
+ .byte 65,184,0,0,0,63 // mov $0x3f000000,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,226,125,88,210 // vpbroadcastd %xmm2,%ymm2
+ .byte 197,45,235,210 // vpor %ymm2,%ymm10,%ymm10
+ .byte 65,184,42,145,49,64 // mov $0x4031912a,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,226,125,88,210 // vpbroadcastd %xmm2,%ymm2
+ .byte 197,20,88,218 // vaddps %ymm2,%ymm13,%ymm11
+ .byte 65,184,117,191,191,63 // mov $0x3fbfbf75,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,98,125,88,226 // vpbroadcastd %xmm2,%ymm12
+ .byte 196,66,45,172,227 // vfnmadd213ps %ymm11,%ymm10,%ymm12
+ .byte 65,184,163,233,220,63 // mov $0x3fdce9a3,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,98,125,88,218 // vpbroadcastd %xmm2,%ymm11
+ .byte 65,184,249,68,180,62 // mov $0x3eb444f9,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,226,125,88,210 // vpbroadcastd %xmm2,%ymm2
+ .byte 197,172,88,210 // vaddps %ymm2,%ymm10,%ymm2
+ .byte 197,164,94,210 // vdivps %ymm2,%ymm11,%ymm2
+ .byte 197,156,92,210 // vsubps %ymm2,%ymm12,%ymm2
+ .byte 196,98,125,24,16 // vbroadcastss (%rax),%ymm10
+ .byte 197,44,89,218 // vmulps %ymm2,%ymm10,%ymm11
+ .byte 196,67,125,8,211,1 // vroundps $0x1,%ymm11,%ymm10
+ .byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,98,125,88,226 // vpbroadcastd %xmm2,%ymm12
+ .byte 65,184,81,140,242,66 // mov $0x42f28c51,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,226,125,88,210 // vpbroadcastd %xmm2,%ymm2
+ .byte 196,65,108,88,219 // vaddps %ymm11,%ymm2,%ymm11
+ .byte 65,184,141,188,190,63 // mov $0x3fbebc8d,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,98,125,88,234 // vpbroadcastd %xmm2,%ymm13
+ .byte 196,66,45,172,235 // vfnmadd213ps %ymm11,%ymm10,%ymm13
+ .byte 65,184,254,210,221,65 // mov $0x41ddd2fe,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,98,125,88,218 // vpbroadcastd %xmm2,%ymm11
+ .byte 65,184,248,245,154,64 // mov $0x409af5f8,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,226,125,88,210 // vpbroadcastd %xmm2,%ymm2
+ .byte 196,193,108,92,210 // vsubps %ymm10,%ymm2,%ymm2
+ .byte 197,164,94,210 // vdivps %ymm2,%ymm11,%ymm2
+ .byte 197,148,88,210 // vaddps %ymm2,%ymm13,%ymm2
+ .byte 197,156,89,210 // vmulps %ymm2,%ymm12,%ymm2
+ .byte 197,253,91,210 // vcvtps2dq %ymm2,%ymm2
+ .byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
+ .byte 196,193,108,88,210 // vaddps %ymm10,%ymm2,%ymm2
+ .byte 196,195,109,74,209,128 // vblendvps %ymm8,%ymm9,%ymm2,%ymm2
+ .byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
+ .byte 196,65,108,95,192 // vmaxps %ymm8,%ymm2,%ymm8
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 197,249,110,208 // vmovd %eax,%xmm2
+ .byte 196,226,125,88,210 // vpbroadcastd %xmm2,%ymm2
+ .byte 197,188,93,210 // vminps %ymm2,%ymm8,%ymm2
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_parametric_a_hsw
+.globl _sk_parametric_a_hsw
+FUNCTION(_sk_parametric_a_hsw)
+_sk_parametric_a_hsw:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 196,98,125,24,64,16 // vbroadcastss 0x10(%rax),%ymm8
+ .byte 196,65,100,194,192,2 // vcmpleps %ymm8,%ymm3,%ymm8
+ .byte 196,98,125,24,72,12 // vbroadcastss 0xc(%rax),%ymm9
+ .byte 196,98,125,24,80,24 // vbroadcastss 0x18(%rax),%ymm10
+ .byte 196,66,101,168,202 // vfmadd213ps %ymm10,%ymm3,%ymm9
+ .byte 196,98,125,24,80,4 // vbroadcastss 0x4(%rax),%ymm10
+ .byte 196,98,125,24,88,8 // vbroadcastss 0x8(%rax),%ymm11
+ .byte 196,66,101,168,211 // vfmadd213ps %ymm11,%ymm3,%ymm10
+ .byte 196,65,124,91,218 // vcvtdq2ps %ymm10,%ymm11
+ .byte 65,184,0,0,0,52 // mov $0x34000000,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,98,125,88,227 // vpbroadcastd %xmm3,%ymm12
+ .byte 65,184,0,0,254,66 // mov $0x42fe0000,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,98,125,88,235 // vpbroadcastd %xmm3,%ymm13
+ .byte 196,66,37,186,236 // vfmsub231ps %ymm12,%ymm11,%ymm13
+ .byte 65,184,255,255,127,0 // mov $0x7fffff,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,226,125,88,219 // vpbroadcastd %xmm3,%ymm3
+ .byte 196,65,101,219,210 // vpand %ymm10,%ymm3,%ymm10
+ .byte 65,184,0,0,0,63 // mov $0x3f000000,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,226,125,88,219 // vpbroadcastd %xmm3,%ymm3
+ .byte 197,45,235,211 // vpor %ymm3,%ymm10,%ymm10
+ .byte 65,184,42,145,49,64 // mov $0x4031912a,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,226,125,88,219 // vpbroadcastd %xmm3,%ymm3
+ .byte 197,20,88,219 // vaddps %ymm3,%ymm13,%ymm11
+ .byte 65,184,117,191,191,63 // mov $0x3fbfbf75,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,98,125,88,227 // vpbroadcastd %xmm3,%ymm12
+ .byte 196,66,45,172,227 // vfnmadd213ps %ymm11,%ymm10,%ymm12
+ .byte 65,184,163,233,220,63 // mov $0x3fdce9a3,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,98,125,88,219 // vpbroadcastd %xmm3,%ymm11
+ .byte 65,184,249,68,180,62 // mov $0x3eb444f9,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,226,125,88,219 // vpbroadcastd %xmm3,%ymm3
+ .byte 197,172,88,219 // vaddps %ymm3,%ymm10,%ymm3
+ .byte 197,164,94,219 // vdivps %ymm3,%ymm11,%ymm3
+ .byte 197,156,92,219 // vsubps %ymm3,%ymm12,%ymm3
+ .byte 196,98,125,24,16 // vbroadcastss (%rax),%ymm10
+ .byte 197,44,89,219 // vmulps %ymm3,%ymm10,%ymm11
+ .byte 196,67,125,8,211,1 // vroundps $0x1,%ymm11,%ymm10
+ .byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,98,125,88,227 // vpbroadcastd %xmm3,%ymm12
+ .byte 65,184,81,140,242,66 // mov $0x42f28c51,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,226,125,88,219 // vpbroadcastd %xmm3,%ymm3
+ .byte 196,65,100,88,219 // vaddps %ymm11,%ymm3,%ymm11
+ .byte 65,184,141,188,190,63 // mov $0x3fbebc8d,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,98,125,88,235 // vpbroadcastd %xmm3,%ymm13
+ .byte 196,66,45,172,235 // vfnmadd213ps %ymm11,%ymm10,%ymm13
+ .byte 65,184,254,210,221,65 // mov $0x41ddd2fe,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,98,125,88,219 // vpbroadcastd %xmm3,%ymm11
+ .byte 65,184,248,245,154,64 // mov $0x409af5f8,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,226,125,88,219 // vpbroadcastd %xmm3,%ymm3
+ .byte 196,193,100,92,218 // vsubps %ymm10,%ymm3,%ymm3
+ .byte 197,164,94,219 // vdivps %ymm3,%ymm11,%ymm3
+ .byte 197,148,88,219 // vaddps %ymm3,%ymm13,%ymm3
+ .byte 197,156,89,219 // vmulps %ymm3,%ymm12,%ymm3
+ .byte 197,253,91,219 // vcvtps2dq %ymm3,%ymm3
+ .byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
+ .byte 196,193,100,88,218 // vaddps %ymm10,%ymm3,%ymm3
+ .byte 196,195,101,74,217,128 // vblendvps %ymm8,%ymm9,%ymm3,%ymm3
+ .byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
+ .byte 196,65,100,95,192 // vmaxps %ymm8,%ymm3,%ymm8
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 197,249,110,216 // vmovd %eax,%xmm3
+ .byte 196,226,125,88,219 // vpbroadcastd %xmm3,%ymm3
+ .byte 197,188,93,219 // vminps %ymm3,%ymm8,%ymm3
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
HIDDEN _sk_load_a8_hsw
.globl _sk_load_a8_hsw
FUNCTION(_sk_load_a8_hsw)
@@ -8896,7 +9924,7 @@ _sk_load_a8_hsw:
.byte 72,139,0 // mov (%rax),%rax
.byte 72,1,248 // add %rdi,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 117,50 // jne 1d78 <_sk_load_a8_hsw+0x42>
+ .byte 117,50 // jne 23a8 <_sk_load_a8_hsw+0x42>
.byte 197,250,126,0 // vmovq (%rax),%xmm0
.byte 196,226,125,49,192 // vpmovzxbd %xmm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
@@ -8919,9 +9947,9 @@ _sk_load_a8_hsw:
.byte 77,9,217 // or %r11,%r9
.byte 72,131,193,8 // add $0x8,%rcx
.byte 73,255,202 // dec %r10
- .byte 117,234 // jne 1d80 <_sk_load_a8_hsw+0x4a>
+ .byte 117,234 // jne 23b0 <_sk_load_a8_hsw+0x4a>
.byte 196,193,249,110,193 // vmovq %r9,%xmm0
- .byte 235,173 // jmp 1d4a <_sk_load_a8_hsw+0x14>
+ .byte 235,173 // jmp 237a <_sk_load_a8_hsw+0x14>
HIDDEN _sk_gather_a8_hsw
.globl _sk_gather_a8_hsw
@@ -8996,7 +10024,7 @@ _sk_store_a8_hsw:
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 196,65,57,103,192 // vpackuswb %xmm8,%xmm8,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 1eb5 <_sk_store_a8_hsw+0x3b>
+ .byte 117,10 // jne 24e5 <_sk_store_a8_hsw+0x3b>
.byte 196,65,123,17,4,57 // vmovsd %xmm8,(%r9,%rdi,1)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -9004,10 +10032,10 @@ _sk_store_a8_hsw:
.byte 65,128,224,7 // and $0x7,%r8b
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 1eb1 <_sk_store_a8_hsw+0x37>
+ .byte 119,236 // ja 24e1 <_sk_store_a8_hsw+0x37>
.byte 196,66,121,48,192 // vpmovzxbw %xmm8,%xmm8
.byte 65,15,182,192 // movzbl %r8b,%eax
- .byte 76,141,5,67,0,0,0 // lea 0x43(%rip),%r8 # 1f18 <_sk_store_a8_hsw+0x9e>
+ .byte 76,141,5,67,0,0,0 // lea 0x43(%rip),%r8 # 2548 <_sk_store_a8_hsw+0x9e>
.byte 73,99,4,128 // movslq (%r8,%rax,4),%rax
.byte 76,1,192 // add %r8,%rax
.byte 255,224 // jmpq *%rax
@@ -9018,7 +10046,7 @@ _sk_store_a8_hsw:
.byte 196,67,121,20,68,57,2,4 // vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1)
.byte 196,67,121,20,68,57,1,2 // vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1)
.byte 196,67,121,20,4,57,0 // vpextrb $0x0,%xmm8,(%r9,%rdi,1)
- .byte 235,154 // jmp 1eb1 <_sk_store_a8_hsw+0x37>
+ .byte 235,154 // jmp 24e1 <_sk_store_a8_hsw+0x37>
.byte 144 // nop
.byte 246,255 // idiv %bh
.byte 255 // (bad)
@@ -9052,7 +10080,7 @@ _sk_load_g8_hsw:
.byte 72,139,0 // mov (%rax),%rax
.byte 72,1,248 // add %rdi,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 117,60 // jne 1f80 <_sk_load_g8_hsw+0x4c>
+ .byte 117,60 // jne 25b0 <_sk_load_g8_hsw+0x4c>
.byte 197,250,126,0 // vmovq (%rax),%xmm0
.byte 196,226,125,49,192 // vpmovzxbd %xmm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
@@ -9077,9 +10105,9 @@ _sk_load_g8_hsw:
.byte 77,9,217 // or %r11,%r9
.byte 72,131,193,8 // add $0x8,%rcx
.byte 73,255,202 // dec %r10
- .byte 117,234 // jne 1f88 <_sk_load_g8_hsw+0x54>
+ .byte 117,234 // jne 25b8 <_sk_load_g8_hsw+0x54>
.byte 196,193,249,110,193 // vmovq %r9,%xmm0
- .byte 235,163 // jmp 1f48 <_sk_load_g8_hsw+0x14>
+ .byte 235,163 // jmp 2578 <_sk_load_g8_hsw+0x14>
HIDDEN _sk_gather_g8_hsw
.globl _sk_gather_g8_hsw
@@ -9148,9 +10176,9 @@ _sk_gather_i8_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 73,137,192 // mov %rax,%r8
.byte 77,133,192 // test %r8,%r8
- .byte 116,5 // je 209b <_sk_gather_i8_hsw+0xf>
+ .byte 116,5 // je 26cb <_sk_gather_i8_hsw+0xf>
.byte 76,137,192 // mov %r8,%rax
- .byte 235,2 // jmp 209d <_sk_gather_i8_hsw+0x11>
+ .byte 235,2 // jmp 26cd <_sk_gather_i8_hsw+0x11>
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 65,87 // push %r15
.byte 65,86 // push %r14
@@ -9223,7 +10251,7 @@ _sk_load_565_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,149,0,0,0 // jne 224f <_sk_load_565_hsw+0xa3>
+ .byte 15,133,149,0,0,0 // jne 287f <_sk_load_565_hsw+0xa3>
.byte 196,193,122,111,4,122 // vmovdqu (%r10,%rdi,2),%xmm0
.byte 196,226,125,51,208 // vpmovzxwd %xmm0,%ymm2
.byte 184,0,248,0,0 // mov $0xf800,%eax
@@ -9263,9 +10291,9 @@ _sk_load_565_hsw:
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 15,135,89,255,255,255 // ja 21c0 <_sk_load_565_hsw+0x14>
+ .byte 15,135,89,255,255,255 // ja 27f0 <_sk_load_565_hsw+0x14>
.byte 69,15,182,192 // movzbl %r8b,%r8d
- .byte 76,141,13,74,0,0,0 // lea 0x4a(%rip),%r9 # 22bc <_sk_load_565_hsw+0x110>
+ .byte 76,141,13,74,0,0,0 // lea 0x4a(%rip),%r9 # 28ec <_sk_load_565_hsw+0x110>
.byte 75,99,4,129 // movslq (%r9,%r8,4),%rax
.byte 76,1,200 // add %r9,%rax
.byte 255,224 // jmpq *%rax
@@ -9277,12 +10305,12 @@ _sk_load_565_hsw:
.byte 196,193,121,196,68,122,4,2 // vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,68,122,2,1 // vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,4,122,0 // vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- .byte 233,5,255,255,255 // jmpq 21c0 <_sk_load_565_hsw+0x14>
+ .byte 233,5,255,255,255 // jmpq 27f0 <_sk_load_565_hsw+0x14>
.byte 144 // nop
.byte 243,255 // repz (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 235,255 // jmp 22c1 <_sk_load_565_hsw+0x115>
+ .byte 235,255 // jmp 28f1 <_sk_load_565_hsw+0x115>
.byte 255 // (bad)
.byte 255,227 // jmpq *%rbx
.byte 255 // (bad)
@@ -9409,7 +10437,7 @@ _sk_store_565_hsw:
.byte 196,67,125,57,193,1 // vextracti128 $0x1,%ymm8,%xmm9
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 2487 <_sk_store_565_hsw+0x6c>
+ .byte 117,10 // jne 2ab7 <_sk_store_565_hsw+0x6c>
.byte 196,65,122,127,4,121 // vmovdqu %xmm8,(%r9,%rdi,2)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -9417,9 +10445,9 @@ _sk_store_565_hsw:
.byte 65,128,224,7 // and $0x7,%r8b
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 2483 <_sk_store_565_hsw+0x68>
+ .byte 119,236 // ja 2ab3 <_sk_store_565_hsw+0x68>
.byte 65,15,182,192 // movzbl %r8b,%eax
- .byte 76,141,5,66,0,0,0 // lea 0x42(%rip),%r8 # 24e4 <_sk_store_565_hsw+0xc9>
+ .byte 76,141,5,66,0,0,0 // lea 0x42(%rip),%r8 # 2b14 <_sk_store_565_hsw+0xc9>
.byte 73,99,4,128 // movslq (%r8,%rax,4),%rax
.byte 76,1,192 // add %r8,%rax
.byte 255,224 // jmpq *%rax
@@ -9430,7 +10458,7 @@ _sk_store_565_hsw:
.byte 196,67,121,21,68,121,4,2 // vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
.byte 196,67,121,21,68,121,2,1 // vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
.byte 196,67,121,21,4,121,0 // vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- .byte 235,159 // jmp 2483 <_sk_store_565_hsw+0x68>
+ .byte 235,159 // jmp 2ab3 <_sk_store_565_hsw+0x68>
.byte 247,255 // idiv %edi
.byte 255 // (bad)
.byte 255 // (bad)
@@ -9461,7 +10489,7 @@ _sk_load_4444_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,179,0,0,0 // jne 25c1 <_sk_load_4444_hsw+0xc1>
+ .byte 15,133,179,0,0,0 // jne 2bf1 <_sk_load_4444_hsw+0xc1>
.byte 196,193,122,111,4,122 // vmovdqu (%r10,%rdi,2),%xmm0
.byte 196,98,125,51,200 // vpmovzxwd %xmm0,%ymm9
.byte 184,0,240,0,0 // mov $0xf000,%eax
@@ -9507,9 +10535,9 @@ _sk_load_4444_hsw:
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 15,135,59,255,255,255 // ja 2514 <_sk_load_4444_hsw+0x14>
+ .byte 15,135,59,255,255,255 // ja 2b44 <_sk_load_4444_hsw+0x14>
.byte 69,15,182,192 // movzbl %r8b,%r8d
- .byte 76,141,13,76,0,0,0 // lea 0x4c(%rip),%r9 # 2630 <_sk_load_4444_hsw+0x130>
+ .byte 76,141,13,76,0,0,0 // lea 0x4c(%rip),%r9 # 2c60 <_sk_load_4444_hsw+0x130>
.byte 75,99,4,129 // movslq (%r9,%r8,4),%rax
.byte 76,1,200 // add %r9,%rax
.byte 255,224 // jmpq *%rax
@@ -9521,13 +10549,13 @@ _sk_load_4444_hsw:
.byte 196,193,121,196,68,122,4,2 // vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,68,122,2,1 // vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,4,122,0 // vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- .byte 233,231,254,255,255 // jmpq 2514 <_sk_load_4444_hsw+0x14>
+ .byte 233,231,254,255,255 // jmpq 2b44 <_sk_load_4444_hsw+0x14>
.byte 15,31,0 // nopl (%rax)
.byte 241 // icebp
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 233,255,255,255,225 // jmpq ffffffffe2002638 <_sk_callback_hsw+0xffffffffe1ffeace>
+ .byte 233,255,255,255,225 // jmpq ffffffffe2002c68 <_sk_callback_hsw+0xffffffffe1ffeace>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
@@ -9659,7 +10687,7 @@ _sk_store_4444_hsw:
.byte 196,67,125,57,193,1 // vextracti128 $0x1,%ymm8,%xmm9
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 281f <_sk_store_4444_hsw+0x72>
+ .byte 117,10 // jne 2e4f <_sk_store_4444_hsw+0x72>
.byte 196,65,122,127,4,121 // vmovdqu %xmm8,(%r9,%rdi,2)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -9667,9 +10695,9 @@ _sk_store_4444_hsw:
.byte 65,128,224,7 // and $0x7,%r8b
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 281b <_sk_store_4444_hsw+0x6e>
+ .byte 119,236 // ja 2e4b <_sk_store_4444_hsw+0x6e>
.byte 65,15,182,192 // movzbl %r8b,%eax
- .byte 76,141,5,66,0,0,0 // lea 0x42(%rip),%r8 # 287c <_sk_store_4444_hsw+0xcf>
+ .byte 76,141,5,66,0,0,0 // lea 0x42(%rip),%r8 # 2eac <_sk_store_4444_hsw+0xcf>
.byte 73,99,4,128 // movslq (%r8,%rax,4),%rax
.byte 76,1,192 // add %r8,%rax
.byte 255,224 // jmpq *%rax
@@ -9680,7 +10708,7 @@ _sk_store_4444_hsw:
.byte 196,67,121,21,68,121,4,2 // vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
.byte 196,67,121,21,68,121,2,1 // vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
.byte 196,67,121,21,4,121,0 // vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- .byte 235,159 // jmp 281b <_sk_store_4444_hsw+0x6e>
+ .byte 235,159 // jmp 2e4b <_sk_store_4444_hsw+0x6e>
.byte 247,255 // idiv %edi
.byte 255 // (bad)
.byte 255 // (bad)
@@ -9713,7 +10741,7 @@ _sk_load_8888_hsw:
.byte 76,141,12,189,0,0,0,0 // lea 0x0(,%rdi,4),%r9
.byte 76,3,8 // add (%rax),%r9
.byte 77,133,192 // test %r8,%r8
- .byte 117,104 // jne 2915 <_sk_load_8888_hsw+0x7d>
+ .byte 117,104 // jne 2f45 <_sk_load_8888_hsw+0x7d>
.byte 196,193,126,111,25 // vmovdqu (%r9),%ymm3
.byte 184,255,0,0,0 // mov $0xff,%eax
.byte 197,249,110,192 // vmovd %eax,%xmm0
@@ -9746,7 +10774,7 @@ _sk_load_8888_hsw:
.byte 196,225,249,110,192 // vmovq %rax,%xmm0
.byte 196,226,125,33,192 // vpmovsxbd %xmm0,%ymm0
.byte 196,194,125,140,25 // vpmaskmovd (%r9),%ymm0,%ymm3
- .byte 233,116,255,255,255 // jmpq 28b2 <_sk_load_8888_hsw+0x1a>
+ .byte 233,116,255,255,255 // jmpq 2ee2 <_sk_load_8888_hsw+0x1a>
HIDDEN _sk_gather_8888_hsw
.globl _sk_gather_8888_hsw
@@ -9810,7 +10838,7 @@ _sk_store_8888_hsw:
.byte 196,65,45,235,192 // vpor %ymm8,%ymm10,%ymm8
.byte 196,65,53,235,192 // vpor %ymm8,%ymm9,%ymm8
.byte 77,133,192 // test %r8,%r8
- .byte 117,12 // jne 2a38 <_sk_store_8888_hsw+0x74>
+ .byte 117,12 // jne 3068 <_sk_store_8888_hsw+0x74>
.byte 196,65,126,127,1 // vmovdqu %ymm8,(%r9)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,137,193 // mov %r8,%rcx
@@ -9823,7 +10851,7 @@ _sk_store_8888_hsw:
.byte 196,97,249,110,200 // vmovq %rax,%xmm9
.byte 196,66,125,33,201 // vpmovsxbd %xmm9,%ymm9
.byte 196,66,53,142,1 // vpmaskmovd %ymm8,%ymm9,(%r9)
- .byte 235,211 // jmp 2a31 <_sk_store_8888_hsw+0x6d>
+ .byte 235,211 // jmp 3061 <_sk_store_8888_hsw+0x6d>
HIDDEN _sk_load_f16_hsw
.globl _sk_load_f16_hsw
@@ -9832,7 +10860,7 @@ _sk_load_f16_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 72,139,0 // mov (%rax),%rax
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,97 // jne 2ac9 <_sk_load_f16_hsw+0x6b>
+ .byte 117,97 // jne 30f9 <_sk_load_f16_hsw+0x6b>
.byte 197,121,16,4,248 // vmovupd (%rax,%rdi,8),%xmm8
.byte 197,249,16,84,248,16 // vmovupd 0x10(%rax,%rdi,8),%xmm2
.byte 197,249,16,92,248,32 // vmovupd 0x20(%rax,%rdi,8),%xmm3
@@ -9858,29 +10886,29 @@ _sk_load_f16_hsw:
.byte 197,123,16,4,248 // vmovsd (%rax,%rdi,8),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,79 // je 2b28 <_sk_load_f16_hsw+0xca>
+ .byte 116,79 // je 3158 <_sk_load_f16_hsw+0xca>
.byte 197,57,22,68,248,8 // vmovhpd 0x8(%rax,%rdi,8),%xmm8,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,67 // jb 2b28 <_sk_load_f16_hsw+0xca>
+ .byte 114,67 // jb 3158 <_sk_load_f16_hsw+0xca>
.byte 197,251,16,84,248,16 // vmovsd 0x10(%rax,%rdi,8),%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 116,68 // je 2b35 <_sk_load_f16_hsw+0xd7>
+ .byte 116,68 // je 3165 <_sk_load_f16_hsw+0xd7>
.byte 197,233,22,84,248,24 // vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,56 // jb 2b35 <_sk_load_f16_hsw+0xd7>
+ .byte 114,56 // jb 3165 <_sk_load_f16_hsw+0xd7>
.byte 197,251,16,92,248,32 // vmovsd 0x20(%rax,%rdi,8),%xmm3
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 15,132,114,255,255,255 // je 2a7f <_sk_load_f16_hsw+0x21>
+ .byte 15,132,114,255,255,255 // je 30af <_sk_load_f16_hsw+0x21>
.byte 197,225,22,92,248,40 // vmovhpd 0x28(%rax,%rdi,8),%xmm3,%xmm3
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 15,130,98,255,255,255 // jb 2a7f <_sk_load_f16_hsw+0x21>
+ .byte 15,130,98,255,255,255 // jb 30af <_sk_load_f16_hsw+0x21>
.byte 197,122,126,76,248,48 // vmovq 0x30(%rax,%rdi,8),%xmm9
- .byte 233,87,255,255,255 // jmpq 2a7f <_sk_load_f16_hsw+0x21>
+ .byte 233,87,255,255,255 // jmpq 30af <_sk_load_f16_hsw+0x21>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
.byte 197,233,87,210 // vxorpd %xmm2,%xmm2,%xmm2
- .byte 233,74,255,255,255 // jmpq 2a7f <_sk_load_f16_hsw+0x21>
+ .byte 233,74,255,255,255 // jmpq 30af <_sk_load_f16_hsw+0x21>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
- .byte 233,65,255,255,255 // jmpq 2a7f <_sk_load_f16_hsw+0x21>
+ .byte 233,65,255,255,255 // jmpq 30af <_sk_load_f16_hsw+0x21>
HIDDEN _sk_gather_f16_hsw
.globl _sk_gather_f16_hsw
@@ -9938,7 +10966,7 @@ _sk_store_f16_hsw:
.byte 196,65,57,98,205 // vpunpckldq %xmm13,%xmm8,%xmm9
.byte 196,65,57,106,197 // vpunpckhdq %xmm13,%xmm8,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,27 // jne 2c2d <_sk_store_f16_hsw+0x65>
+ .byte 117,27 // jne 325d <_sk_store_f16_hsw+0x65>
.byte 197,120,17,28,248 // vmovups %xmm11,(%rax,%rdi,8)
.byte 197,120,17,84,248,16 // vmovups %xmm10,0x10(%rax,%rdi,8)
.byte 197,120,17,76,248,32 // vmovups %xmm9,0x20(%rax,%rdi,8)
@@ -9947,22 +10975,22 @@ _sk_store_f16_hsw:
.byte 255,224 // jmpq *%rax
.byte 197,121,214,28,248 // vmovq %xmm11,(%rax,%rdi,8)
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,241 // je 2c29 <_sk_store_f16_hsw+0x61>
+ .byte 116,241 // je 3259 <_sk_store_f16_hsw+0x61>
.byte 197,121,23,92,248,8 // vmovhpd %xmm11,0x8(%rax,%rdi,8)
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,229 // jb 2c29 <_sk_store_f16_hsw+0x61>
+ .byte 114,229 // jb 3259 <_sk_store_f16_hsw+0x61>
.byte 197,121,214,84,248,16 // vmovq %xmm10,0x10(%rax,%rdi,8)
- .byte 116,221 // je 2c29 <_sk_store_f16_hsw+0x61>
+ .byte 116,221 // je 3259 <_sk_store_f16_hsw+0x61>
.byte 197,121,23,84,248,24 // vmovhpd %xmm10,0x18(%rax,%rdi,8)
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,209 // jb 2c29 <_sk_store_f16_hsw+0x61>
+ .byte 114,209 // jb 3259 <_sk_store_f16_hsw+0x61>
.byte 197,121,214,76,248,32 // vmovq %xmm9,0x20(%rax,%rdi,8)
- .byte 116,201 // je 2c29 <_sk_store_f16_hsw+0x61>
+ .byte 116,201 // je 3259 <_sk_store_f16_hsw+0x61>
.byte 197,121,23,76,248,40 // vmovhpd %xmm9,0x28(%rax,%rdi,8)
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,189 // jb 2c29 <_sk_store_f16_hsw+0x61>
+ .byte 114,189 // jb 3259 <_sk_store_f16_hsw+0x61>
.byte 197,121,214,68,248,48 // vmovq %xmm8,0x30(%rax,%rdi,8)
- .byte 235,181 // jmp 2c29 <_sk_store_f16_hsw+0x61>
+ .byte 235,181 // jmp 3259 <_sk_store_f16_hsw+0x61>
HIDDEN _sk_load_u16_be_hsw
.globl _sk_load_u16_be_hsw
@@ -9972,7 +11000,7 @@ _sk_load_u16_be_hsw:
.byte 76,139,0 // mov (%rax),%r8
.byte 72,141,4,189,0,0,0,0 // lea 0x0(,%rdi,4),%rax
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,205,0,0,0 // jne 2d57 <_sk_load_u16_be_hsw+0xe3>
+ .byte 15,133,205,0,0,0 // jne 3387 <_sk_load_u16_be_hsw+0xe3>
.byte 196,65,121,16,4,64 // vmovupd (%r8,%rax,2),%xmm8
.byte 196,193,121,16,84,64,16 // vmovupd 0x10(%r8,%rax,2),%xmm2
.byte 196,193,121,16,92,64,32 // vmovupd 0x20(%r8,%rax,2),%xmm3
@@ -10021,29 +11049,29 @@ _sk_load_u16_be_hsw:
.byte 196,65,123,16,4,64 // vmovsd (%r8,%rax,2),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,85 // je 2dbd <_sk_load_u16_be_hsw+0x149>
+ .byte 116,85 // je 33ed <_sk_load_u16_be_hsw+0x149>
.byte 196,65,57,22,68,64,8 // vmovhpd 0x8(%r8,%rax,2),%xmm8,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,72 // jb 2dbd <_sk_load_u16_be_hsw+0x149>
+ .byte 114,72 // jb 33ed <_sk_load_u16_be_hsw+0x149>
.byte 196,193,123,16,84,64,16 // vmovsd 0x10(%r8,%rax,2),%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 116,72 // je 2dca <_sk_load_u16_be_hsw+0x156>
+ .byte 116,72 // je 33fa <_sk_load_u16_be_hsw+0x156>
.byte 196,193,105,22,84,64,24 // vmovhpd 0x18(%r8,%rax,2),%xmm2,%xmm2
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,59 // jb 2dca <_sk_load_u16_be_hsw+0x156>
+ .byte 114,59 // jb 33fa <_sk_load_u16_be_hsw+0x156>
.byte 196,193,123,16,92,64,32 // vmovsd 0x20(%r8,%rax,2),%xmm3
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 15,132,5,255,255,255 // je 2ca5 <_sk_load_u16_be_hsw+0x31>
+ .byte 15,132,5,255,255,255 // je 32d5 <_sk_load_u16_be_hsw+0x31>
.byte 196,193,97,22,92,64,40 // vmovhpd 0x28(%r8,%rax,2),%xmm3,%xmm3
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 15,130,244,254,255,255 // jb 2ca5 <_sk_load_u16_be_hsw+0x31>
+ .byte 15,130,244,254,255,255 // jb 32d5 <_sk_load_u16_be_hsw+0x31>
.byte 196,65,122,126,76,64,48 // vmovq 0x30(%r8,%rax,2),%xmm9
- .byte 233,232,254,255,255 // jmpq 2ca5 <_sk_load_u16_be_hsw+0x31>
+ .byte 233,232,254,255,255 // jmpq 32d5 <_sk_load_u16_be_hsw+0x31>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
.byte 197,233,87,210 // vxorpd %xmm2,%xmm2,%xmm2
- .byte 233,219,254,255,255 // jmpq 2ca5 <_sk_load_u16_be_hsw+0x31>
+ .byte 233,219,254,255,255 // jmpq 32d5 <_sk_load_u16_be_hsw+0x31>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
- .byte 233,210,254,255,255 // jmpq 2ca5 <_sk_load_u16_be_hsw+0x31>
+ .byte 233,210,254,255,255 // jmpq 32d5 <_sk_load_u16_be_hsw+0x31>
HIDDEN _sk_load_rgb_u16_be_hsw
.globl _sk_load_rgb_u16_be_hsw
@@ -10053,7 +11081,7 @@ _sk_load_rgb_u16_be_hsw:
.byte 76,139,0 // mov (%rax),%r8
.byte 72,141,4,127 // lea (%rdi,%rdi,2),%rax
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,211,0,0,0 // jne 2eb8 <_sk_load_rgb_u16_be_hsw+0xe5>
+ .byte 15,133,211,0,0,0 // jne 34e8 <_sk_load_rgb_u16_be_hsw+0xe5>
.byte 196,193,122,111,4,64 // vmovdqu (%r8,%rax,2),%xmm0
.byte 196,193,122,111,84,64,12 // vmovdqu 0xc(%r8,%rax,2),%xmm2
.byte 196,193,122,111,76,64,24 // vmovdqu 0x18(%r8,%rax,2),%xmm1
@@ -10103,36 +11131,36 @@ _sk_load_rgb_u16_be_hsw:
.byte 196,193,121,110,4,64 // vmovd (%r8,%rax,2),%xmm0
.byte 196,193,121,196,68,64,4,2 // vpinsrw $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 117,5 // jne 2ed1 <_sk_load_rgb_u16_be_hsw+0xfe>
- .byte 233,72,255,255,255 // jmpq 2e19 <_sk_load_rgb_u16_be_hsw+0x46>
+ .byte 117,5 // jne 3501 <_sk_load_rgb_u16_be_hsw+0xfe>
+ .byte 233,72,255,255,255 // jmpq 3449 <_sk_load_rgb_u16_be_hsw+0x46>
.byte 196,193,121,110,76,64,6 // vmovd 0x6(%r8,%rax,2),%xmm1
.byte 196,65,113,196,68,64,10,2 // vpinsrw $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,26 // jb 2f00 <_sk_load_rgb_u16_be_hsw+0x12d>
+ .byte 114,26 // jb 3530 <_sk_load_rgb_u16_be_hsw+0x12d>
.byte 196,193,121,110,76,64,12 // vmovd 0xc(%r8,%rax,2),%xmm1
.byte 196,193,113,196,84,64,16,2 // vpinsrw $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 117,10 // jne 2f05 <_sk_load_rgb_u16_be_hsw+0x132>
- .byte 233,25,255,255,255 // jmpq 2e19 <_sk_load_rgb_u16_be_hsw+0x46>
- .byte 233,20,255,255,255 // jmpq 2e19 <_sk_load_rgb_u16_be_hsw+0x46>
+ .byte 117,10 // jne 3535 <_sk_load_rgb_u16_be_hsw+0x132>
+ .byte 233,25,255,255,255 // jmpq 3449 <_sk_load_rgb_u16_be_hsw+0x46>
+ .byte 233,20,255,255,255 // jmpq 3449 <_sk_load_rgb_u16_be_hsw+0x46>
.byte 196,193,121,110,76,64,18 // vmovd 0x12(%r8,%rax,2),%xmm1
.byte 196,65,113,196,76,64,22,2 // vpinsrw $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,26 // jb 2f34 <_sk_load_rgb_u16_be_hsw+0x161>
+ .byte 114,26 // jb 3564 <_sk_load_rgb_u16_be_hsw+0x161>
.byte 196,193,121,110,76,64,24 // vmovd 0x18(%r8,%rax,2),%xmm1
.byte 196,193,113,196,76,64,28,2 // vpinsrw $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 117,10 // jne 2f39 <_sk_load_rgb_u16_be_hsw+0x166>
- .byte 233,229,254,255,255 // jmpq 2e19 <_sk_load_rgb_u16_be_hsw+0x46>
- .byte 233,224,254,255,255 // jmpq 2e19 <_sk_load_rgb_u16_be_hsw+0x46>
+ .byte 117,10 // jne 3569 <_sk_load_rgb_u16_be_hsw+0x166>
+ .byte 233,229,254,255,255 // jmpq 3449 <_sk_load_rgb_u16_be_hsw+0x46>
+ .byte 233,224,254,255,255 // jmpq 3449 <_sk_load_rgb_u16_be_hsw+0x46>
.byte 196,193,121,110,92,64,30 // vmovd 0x1e(%r8,%rax,2),%xmm3
.byte 196,65,97,196,92,64,34,2 // vpinsrw $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,20 // jb 2f62 <_sk_load_rgb_u16_be_hsw+0x18f>
+ .byte 114,20 // jb 3592 <_sk_load_rgb_u16_be_hsw+0x18f>
.byte 196,193,121,110,92,64,36 // vmovd 0x24(%r8,%rax,2),%xmm3
.byte 196,193,97,196,92,64,40,2 // vpinsrw $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3
- .byte 233,183,254,255,255 // jmpq 2e19 <_sk_load_rgb_u16_be_hsw+0x46>
- .byte 233,178,254,255,255 // jmpq 2e19 <_sk_load_rgb_u16_be_hsw+0x46>
+ .byte 233,183,254,255,255 // jmpq 3449 <_sk_load_rgb_u16_be_hsw+0x46>
+ .byte 233,178,254,255,255 // jmpq 3449 <_sk_load_rgb_u16_be_hsw+0x46>
HIDDEN _sk_store_u16_be_hsw
.globl _sk_store_u16_be_hsw
@@ -10181,7 +11209,7 @@ _sk_store_u16_be_hsw:
.byte 196,65,17,98,200 // vpunpckldq %xmm8,%xmm13,%xmm9
.byte 196,65,17,106,192 // vpunpckhdq %xmm8,%xmm13,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,31 // jne 3062 <_sk_store_u16_be_hsw+0xfb>
+ .byte 117,31 // jne 3692 <_sk_store_u16_be_hsw+0xfb>
.byte 196,1,120,17,28,72 // vmovups %xmm11,(%r8,%r9,2)
.byte 196,1,120,17,84,72,16 // vmovups %xmm10,0x10(%r8,%r9,2)
.byte 196,1,120,17,76,72,32 // vmovups %xmm9,0x20(%r8,%r9,2)
@@ -10190,22 +11218,22 @@ _sk_store_u16_be_hsw:
.byte 255,224 // jmpq *%rax
.byte 196,1,121,214,28,72 // vmovq %xmm11,(%r8,%r9,2)
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,240 // je 305e <_sk_store_u16_be_hsw+0xf7>
+ .byte 116,240 // je 368e <_sk_store_u16_be_hsw+0xf7>
.byte 196,1,121,23,92,72,8 // vmovhpd %xmm11,0x8(%r8,%r9,2)
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,227 // jb 305e <_sk_store_u16_be_hsw+0xf7>
+ .byte 114,227 // jb 368e <_sk_store_u16_be_hsw+0xf7>
.byte 196,1,121,214,84,72,16 // vmovq %xmm10,0x10(%r8,%r9,2)
- .byte 116,218 // je 305e <_sk_store_u16_be_hsw+0xf7>
+ .byte 116,218 // je 368e <_sk_store_u16_be_hsw+0xf7>
.byte 196,1,121,23,84,72,24 // vmovhpd %xmm10,0x18(%r8,%r9,2)
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,205 // jb 305e <_sk_store_u16_be_hsw+0xf7>
+ .byte 114,205 // jb 368e <_sk_store_u16_be_hsw+0xf7>
.byte 196,1,121,214,76,72,32 // vmovq %xmm9,0x20(%r8,%r9,2)
- .byte 116,196 // je 305e <_sk_store_u16_be_hsw+0xf7>
+ .byte 116,196 // je 368e <_sk_store_u16_be_hsw+0xf7>
.byte 196,1,121,23,76,72,40 // vmovhpd %xmm9,0x28(%r8,%r9,2)
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,183 // jb 305e <_sk_store_u16_be_hsw+0xf7>
+ .byte 114,183 // jb 368e <_sk_store_u16_be_hsw+0xf7>
.byte 196,1,121,214,68,72,48 // vmovq %xmm8,0x30(%r8,%r9,2)
- .byte 235,174 // jmp 305e <_sk_store_u16_be_hsw+0xf7>
+ .byte 235,174 // jmp 368e <_sk_store_u16_be_hsw+0xf7>
HIDDEN _sk_load_f32_hsw
.globl _sk_load_f32_hsw
@@ -10213,10 +11241,10 @@ FUNCTION(_sk_load_f32_hsw)
_sk_load_f32_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 119,110 // ja 3126 <_sk_load_f32_hsw+0x76>
+ .byte 119,110 // ja 3756 <_sk_load_f32_hsw+0x76>
.byte 76,139,0 // mov (%rax),%r8
.byte 76,141,12,189,0,0,0,0 // lea 0x0(,%rdi,4),%r9
- .byte 76,141,21,134,0,0,0 // lea 0x86(%rip),%r10 # 3150 <_sk_load_f32_hsw+0xa0>
+ .byte 76,141,21,134,0,0,0 // lea 0x86(%rip),%r10 # 3780 <_sk_load_f32_hsw+0xa0>
.byte 73,99,4,138 // movslq (%r10,%rcx,4),%rax
.byte 76,1,208 // add %r10,%rax
.byte 255,224 // jmpq *%rax
@@ -10275,7 +11303,7 @@ _sk_store_f32_hsw:
.byte 196,65,37,20,196 // vunpcklpd %ymm12,%ymm11,%ymm8
.byte 196,65,37,21,220 // vunpckhpd %ymm12,%ymm11,%ymm11
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,55 // jne 31dd <_sk_store_f32_hsw+0x6d>
+ .byte 117,55 // jne 380d <_sk_store_f32_hsw+0x6d>
.byte 196,67,45,24,225,1 // vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
.byte 196,67,61,24,235,1 // vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
.byte 196,67,45,6,201,49 // vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
@@ -10288,22 +11316,22 @@ _sk_store_f32_hsw:
.byte 255,224 // jmpq *%rax
.byte 196,65,121,17,20,128 // vmovupd %xmm10,(%r8,%rax,4)
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,240 // je 31d9 <_sk_store_f32_hsw+0x69>
+ .byte 116,240 // je 3809 <_sk_store_f32_hsw+0x69>
.byte 196,65,121,17,76,128,16 // vmovupd %xmm9,0x10(%r8,%rax,4)
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,227 // jb 31d9 <_sk_store_f32_hsw+0x69>
+ .byte 114,227 // jb 3809 <_sk_store_f32_hsw+0x69>
.byte 196,65,121,17,68,128,32 // vmovupd %xmm8,0x20(%r8,%rax,4)
- .byte 116,218 // je 31d9 <_sk_store_f32_hsw+0x69>
+ .byte 116,218 // je 3809 <_sk_store_f32_hsw+0x69>
.byte 196,65,121,17,92,128,48 // vmovupd %xmm11,0x30(%r8,%rax,4)
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,205 // jb 31d9 <_sk_store_f32_hsw+0x69>
+ .byte 114,205 // jb 3809 <_sk_store_f32_hsw+0x69>
.byte 196,67,125,25,84,128,64,1 // vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4)
- .byte 116,195 // je 31d9 <_sk_store_f32_hsw+0x69>
+ .byte 116,195 // je 3809 <_sk_store_f32_hsw+0x69>
.byte 196,67,125,25,76,128,80,1 // vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4)
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,181 // jb 31d9 <_sk_store_f32_hsw+0x69>
+ .byte 114,181 // jb 3809 <_sk_store_f32_hsw+0x69>
.byte 196,67,125,25,68,128,96,1 // vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4)
- .byte 235,171 // jmp 31d9 <_sk_store_f32_hsw+0x69>
+ .byte 235,171 // jmp 3809 <_sk_store_f32_hsw+0x69>
HIDDEN _sk_clamp_x_hsw
.globl _sk_clamp_x_hsw
@@ -10568,7 +11596,7 @@ _sk_linear_gradient_hsw:
.byte 196,98,125,24,72,28 // vbroadcastss 0x1c(%rax),%ymm9
.byte 76,139,0 // mov (%rax),%r8
.byte 77,133,192 // test %r8,%r8
- .byte 15,132,143,0,0,0 // je 3669 <_sk_linear_gradient_hsw+0xb5>
+ .byte 15,132,143,0,0,0 // je 3c99 <_sk_linear_gradient_hsw+0xb5>
.byte 72,139,64,8 // mov 0x8(%rax),%rax
.byte 72,131,192,32 // add $0x20,%rax
.byte 196,65,28,87,228 // vxorps %ymm12,%ymm12,%ymm12
@@ -10595,8 +11623,8 @@ _sk_linear_gradient_hsw:
.byte 196,67,13,74,201,208 // vblendvps %ymm13,%ymm9,%ymm14,%ymm9
.byte 72,131,192,36 // add $0x24,%rax
.byte 73,255,200 // dec %r8
- .byte 117,140 // jne 35f3 <_sk_linear_gradient_hsw+0x3f>
- .byte 235,17 // jmp 367a <_sk_linear_gradient_hsw+0xc6>
+ .byte 117,140 // jne 3c23 <_sk_linear_gradient_hsw+0x3f>
+ .byte 235,17 // jmp 3caa <_sk_linear_gradient_hsw+0xc6>
.byte 197,244,87,201 // vxorps %ymm1,%ymm1,%ymm1
.byte 197,236,87,210 // vxorps %ymm2,%ymm2,%ymm2
.byte 197,228,87,219 // vxorps %ymm3,%ymm3,%ymm3
@@ -13611,6 +14639,418 @@ _sk_table_a_avx:
.byte 65,95 // pop %r15
.byte 255,224 // jmpq *%rax
+HIDDEN _sk_parametric_r_avx
+.globl _sk_parametric_r_avx
+FUNCTION(_sk_parametric_r_avx)
+_sk_parametric_r_avx:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 196,98,125,24,64,16 // vbroadcastss 0x10(%rax),%ymm8
+ .byte 196,65,124,194,192,2 // vcmpleps %ymm8,%ymm0,%ymm8
+ .byte 196,98,125,24,72,12 // vbroadcastss 0xc(%rax),%ymm9
+ .byte 196,98,125,24,80,24 // vbroadcastss 0x18(%rax),%ymm10
+ .byte 197,52,89,200 // vmulps %ymm0,%ymm9,%ymm9
+ .byte 196,65,52,88,202 // vaddps %ymm10,%ymm9,%ymm9
+ .byte 196,98,125,24,80,4 // vbroadcastss 0x4(%rax),%ymm10
+ .byte 196,98,125,24,88,8 // vbroadcastss 0x8(%rax),%ymm11
+ .byte 197,172,89,192 // vmulps %ymm0,%ymm10,%ymm0
+ .byte 196,65,124,88,219 // vaddps %ymm11,%ymm0,%ymm11
+ .byte 196,65,124,91,211 // vcvtdq2ps %ymm11,%ymm10
+ .byte 65,184,0,0,0,52 // mov $0x34000000,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 197,44,89,208 // vmulps %ymm0,%ymm10,%ymm10
+ .byte 65,184,0,0,254,66 // mov $0x42fe0000,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 197,44,92,208 // vsubps %ymm0,%ymm10,%ymm10
+ .byte 65,184,255,255,127,0 // mov $0x7fffff,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 197,249,112,192,0 // vpshufd $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 196,65,124,84,219 // vandps %ymm11,%ymm0,%ymm11
+ .byte 65,184,0,0,0,63 // mov $0x3f000000,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 197,249,112,192,0 // vpshufd $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 197,36,86,216 // vorps %ymm0,%ymm11,%ymm11
+ .byte 65,184,42,145,49,64 // mov $0x4031912a,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 197,44,88,208 // vaddps %ymm0,%ymm10,%ymm10
+ .byte 65,184,117,191,191,63 // mov $0x3fbfbf75,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 197,164,89,192 // vmulps %ymm0,%ymm11,%ymm0
+ .byte 197,44,92,208 // vsubps %ymm0,%ymm10,%ymm10
+ .byte 65,184,163,233,220,63 // mov $0x3fdce9a3,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,99,125,24,224,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm12
+ .byte 65,184,249,68,180,62 // mov $0x3eb444f9,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 197,164,88,192 // vaddps %ymm0,%ymm11,%ymm0
+ .byte 197,156,94,192 // vdivps %ymm0,%ymm12,%ymm0
+ .byte 197,172,92,192 // vsubps %ymm0,%ymm10,%ymm0
+ .byte 196,98,125,24,16 // vbroadcastss (%rax),%ymm10
+ .byte 197,44,89,216 // vmulps %ymm0,%ymm10,%ymm11
+ .byte 196,67,125,8,211,1 // vroundps $0x1,%ymm11,%ymm10
+ .byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,99,125,24,224,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm12
+ .byte 65,184,81,140,242,66 // mov $0x42f28c51,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 196,65,124,88,219 // vaddps %ymm11,%ymm0,%ymm11
+ .byte 65,184,141,188,190,63 // mov $0x3fbebc8d,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 196,193,124,89,194 // vmulps %ymm10,%ymm0,%ymm0
+ .byte 197,36,92,216 // vsubps %ymm0,%ymm11,%ymm11
+ .byte 65,184,254,210,221,65 // mov $0x41ddd2fe,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,99,125,24,232,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm13
+ .byte 65,184,248,245,154,64 // mov $0x409af5f8,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 196,193,124,92,194 // vsubps %ymm10,%ymm0,%ymm0
+ .byte 197,148,94,192 // vdivps %ymm0,%ymm13,%ymm0
+ .byte 197,164,88,192 // vaddps %ymm0,%ymm11,%ymm0
+ .byte 197,156,89,192 // vmulps %ymm0,%ymm12,%ymm0
+ .byte 197,253,91,192 // vcvtps2dq %ymm0,%ymm0
+ .byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
+ .byte 196,193,124,88,194 // vaddps %ymm10,%ymm0,%ymm0
+ .byte 196,195,125,74,193,128 // vblendvps %ymm8,%ymm9,%ymm0,%ymm0
+ .byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
+ .byte 196,65,124,95,192 // vmaxps %ymm8,%ymm0,%ymm8
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 197,249,110,192 // vmovd %eax,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 197,188,93,192 // vminps %ymm0,%ymm8,%ymm0
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_parametric_g_avx
+.globl _sk_parametric_g_avx
+FUNCTION(_sk_parametric_g_avx)
+_sk_parametric_g_avx:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 196,98,125,24,64,16 // vbroadcastss 0x10(%rax),%ymm8
+ .byte 196,65,116,194,192,2 // vcmpleps %ymm8,%ymm1,%ymm8
+ .byte 196,98,125,24,72,12 // vbroadcastss 0xc(%rax),%ymm9
+ .byte 196,98,125,24,80,24 // vbroadcastss 0x18(%rax),%ymm10
+ .byte 197,52,89,201 // vmulps %ymm1,%ymm9,%ymm9
+ .byte 196,65,52,88,202 // vaddps %ymm10,%ymm9,%ymm9
+ .byte 196,98,125,24,80,4 // vbroadcastss 0x4(%rax),%ymm10
+ .byte 196,98,125,24,88,8 // vbroadcastss 0x8(%rax),%ymm11
+ .byte 197,172,89,201 // vmulps %ymm1,%ymm10,%ymm1
+ .byte 196,65,116,88,219 // vaddps %ymm11,%ymm1,%ymm11
+ .byte 196,65,124,91,211 // vcvtdq2ps %ymm11,%ymm10
+ .byte 65,184,0,0,0,52 // mov $0x34000000,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ .byte 197,44,89,209 // vmulps %ymm1,%ymm10,%ymm10
+ .byte 65,184,0,0,254,66 // mov $0x42fe0000,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ .byte 197,44,92,209 // vsubps %ymm1,%ymm10,%ymm10
+ .byte 65,184,255,255,127,0 // mov $0x7fffff,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 197,249,112,201,0 // vpshufd $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ .byte 196,65,116,84,219 // vandps %ymm11,%ymm1,%ymm11
+ .byte 65,184,0,0,0,63 // mov $0x3f000000,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 197,249,112,201,0 // vpshufd $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ .byte 197,36,86,217 // vorps %ymm1,%ymm11,%ymm11
+ .byte 65,184,42,145,49,64 // mov $0x4031912a,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ .byte 197,44,88,209 // vaddps %ymm1,%ymm10,%ymm10
+ .byte 65,184,117,191,191,63 // mov $0x3fbfbf75,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ .byte 197,164,89,201 // vmulps %ymm1,%ymm11,%ymm1
+ .byte 197,44,92,209 // vsubps %ymm1,%ymm10,%ymm10
+ .byte 65,184,163,233,220,63 // mov $0x3fdce9a3,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,99,117,24,225,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm12
+ .byte 65,184,249,68,180,62 // mov $0x3eb444f9,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ .byte 197,164,88,201 // vaddps %ymm1,%ymm11,%ymm1
+ .byte 197,156,94,201 // vdivps %ymm1,%ymm12,%ymm1
+ .byte 197,172,92,201 // vsubps %ymm1,%ymm10,%ymm1
+ .byte 196,98,125,24,16 // vbroadcastss (%rax),%ymm10
+ .byte 197,44,89,217 // vmulps %ymm1,%ymm10,%ymm11
+ .byte 196,67,125,8,211,1 // vroundps $0x1,%ymm11,%ymm10
+ .byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,99,117,24,225,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm12
+ .byte 65,184,81,140,242,66 // mov $0x42f28c51,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ .byte 196,65,116,88,219 // vaddps %ymm11,%ymm1,%ymm11
+ .byte 65,184,141,188,190,63 // mov $0x3fbebc8d,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ .byte 196,193,116,89,202 // vmulps %ymm10,%ymm1,%ymm1
+ .byte 197,36,92,217 // vsubps %ymm1,%ymm11,%ymm11
+ .byte 65,184,254,210,221,65 // mov $0x41ddd2fe,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,99,117,24,233,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm13
+ .byte 65,184,248,245,154,64 // mov $0x409af5f8,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ .byte 196,193,116,92,202 // vsubps %ymm10,%ymm1,%ymm1
+ .byte 197,148,94,201 // vdivps %ymm1,%ymm13,%ymm1
+ .byte 197,164,88,201 // vaddps %ymm1,%ymm11,%ymm1
+ .byte 197,156,89,201 // vmulps %ymm1,%ymm12,%ymm1
+ .byte 197,253,91,201 // vcvtps2dq %ymm1,%ymm1
+ .byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
+ .byte 196,193,116,88,202 // vaddps %ymm10,%ymm1,%ymm1
+ .byte 196,195,117,74,201,128 // vblendvps %ymm8,%ymm9,%ymm1,%ymm1
+ .byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
+ .byte 196,65,116,95,192 // vmaxps %ymm8,%ymm1,%ymm8
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 197,249,110,200 // vmovd %eax,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ .byte 197,188,93,201 // vminps %ymm1,%ymm8,%ymm1
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_parametric_b_avx
+.globl _sk_parametric_b_avx
+FUNCTION(_sk_parametric_b_avx)
+_sk_parametric_b_avx:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 196,98,125,24,64,16 // vbroadcastss 0x10(%rax),%ymm8
+ .byte 196,65,108,194,192,2 // vcmpleps %ymm8,%ymm2,%ymm8
+ .byte 196,98,125,24,72,12 // vbroadcastss 0xc(%rax),%ymm9
+ .byte 196,98,125,24,80,24 // vbroadcastss 0x18(%rax),%ymm10
+ .byte 197,52,89,202 // vmulps %ymm2,%ymm9,%ymm9
+ .byte 196,65,52,88,202 // vaddps %ymm10,%ymm9,%ymm9
+ .byte 196,98,125,24,80,4 // vbroadcastss 0x4(%rax),%ymm10
+ .byte 196,98,125,24,88,8 // vbroadcastss 0x8(%rax),%ymm11
+ .byte 197,172,89,210 // vmulps %ymm2,%ymm10,%ymm2
+ .byte 196,65,108,88,219 // vaddps %ymm11,%ymm2,%ymm11
+ .byte 196,65,124,91,211 // vcvtdq2ps %ymm11,%ymm10
+ .byte 65,184,0,0,0,52 // mov $0x34000000,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,227,109,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ .byte 197,44,89,210 // vmulps %ymm2,%ymm10,%ymm10
+ .byte 65,184,0,0,254,66 // mov $0x42fe0000,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,227,109,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ .byte 197,44,92,210 // vsubps %ymm2,%ymm10,%ymm10
+ .byte 65,184,255,255,127,0 // mov $0x7fffff,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 197,249,112,210,0 // vpshufd $0x0,%xmm2,%xmm2
+ .byte 196,227,109,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ .byte 196,65,108,84,219 // vandps %ymm11,%ymm2,%ymm11
+ .byte 65,184,0,0,0,63 // mov $0x3f000000,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 197,249,112,210,0 // vpshufd $0x0,%xmm2,%xmm2
+ .byte 196,227,109,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ .byte 197,36,86,218 // vorps %ymm2,%ymm11,%ymm11
+ .byte 65,184,42,145,49,64 // mov $0x4031912a,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,227,109,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ .byte 197,44,88,210 // vaddps %ymm2,%ymm10,%ymm10
+ .byte 65,184,117,191,191,63 // mov $0x3fbfbf75,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,227,109,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ .byte 197,164,89,210 // vmulps %ymm2,%ymm11,%ymm2
+ .byte 197,44,92,210 // vsubps %ymm2,%ymm10,%ymm10
+ .byte 65,184,163,233,220,63 // mov $0x3fdce9a3,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,99,109,24,226,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm12
+ .byte 65,184,249,68,180,62 // mov $0x3eb444f9,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,227,109,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ .byte 197,164,88,210 // vaddps %ymm2,%ymm11,%ymm2
+ .byte 197,156,94,210 // vdivps %ymm2,%ymm12,%ymm2
+ .byte 197,172,92,210 // vsubps %ymm2,%ymm10,%ymm2
+ .byte 196,98,125,24,16 // vbroadcastss (%rax),%ymm10
+ .byte 197,44,89,218 // vmulps %ymm2,%ymm10,%ymm11
+ .byte 196,67,125,8,211,1 // vroundps $0x1,%ymm11,%ymm10
+ .byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,99,109,24,226,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm12
+ .byte 65,184,81,140,242,66 // mov $0x42f28c51,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,227,109,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ .byte 196,65,108,88,219 // vaddps %ymm11,%ymm2,%ymm11
+ .byte 65,184,141,188,190,63 // mov $0x3fbebc8d,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,227,109,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ .byte 196,193,108,89,210 // vmulps %ymm10,%ymm2,%ymm2
+ .byte 197,36,92,218 // vsubps %ymm2,%ymm11,%ymm11
+ .byte 65,184,254,210,221,65 // mov $0x41ddd2fe,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,99,109,24,234,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm13
+ .byte 65,184,248,245,154,64 // mov $0x409af5f8,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,227,109,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ .byte 196,193,108,92,210 // vsubps %ymm10,%ymm2,%ymm2
+ .byte 197,148,94,210 // vdivps %ymm2,%ymm13,%ymm2
+ .byte 197,164,88,210 // vaddps %ymm2,%ymm11,%ymm2
+ .byte 197,156,89,210 // vmulps %ymm2,%ymm12,%ymm2
+ .byte 197,253,91,210 // vcvtps2dq %ymm2,%ymm2
+ .byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
+ .byte 196,193,108,88,210 // vaddps %ymm10,%ymm2,%ymm2
+ .byte 196,195,109,74,209,128 // vblendvps %ymm8,%ymm9,%ymm2,%ymm2
+ .byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
+ .byte 196,65,108,95,192 // vmaxps %ymm8,%ymm2,%ymm8
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 197,249,110,208 // vmovd %eax,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,227,109,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ .byte 197,188,93,210 // vminps %ymm2,%ymm8,%ymm2
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_parametric_a_avx
+.globl _sk_parametric_a_avx
+FUNCTION(_sk_parametric_a_avx)
+_sk_parametric_a_avx:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 196,98,125,24,64,16 // vbroadcastss 0x10(%rax),%ymm8
+ .byte 196,65,100,194,192,2 // vcmpleps %ymm8,%ymm3,%ymm8
+ .byte 196,98,125,24,72,12 // vbroadcastss 0xc(%rax),%ymm9
+ .byte 196,98,125,24,80,24 // vbroadcastss 0x18(%rax),%ymm10
+ .byte 197,52,89,203 // vmulps %ymm3,%ymm9,%ymm9
+ .byte 196,65,52,88,202 // vaddps %ymm10,%ymm9,%ymm9
+ .byte 196,98,125,24,80,4 // vbroadcastss 0x4(%rax),%ymm10
+ .byte 196,98,125,24,88,8 // vbroadcastss 0x8(%rax),%ymm11
+ .byte 197,172,89,219 // vmulps %ymm3,%ymm10,%ymm3
+ .byte 196,65,100,88,219 // vaddps %ymm11,%ymm3,%ymm11
+ .byte 196,65,124,91,211 // vcvtdq2ps %ymm11,%ymm10
+ .byte 65,184,0,0,0,52 // mov $0x34000000,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,227,121,4,219,0 // vpermilps $0x0,%xmm3,%xmm3
+ .byte 196,227,101,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ .byte 197,44,89,211 // vmulps %ymm3,%ymm10,%ymm10
+ .byte 65,184,0,0,254,66 // mov $0x42fe0000,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,227,121,4,219,0 // vpermilps $0x0,%xmm3,%xmm3
+ .byte 196,227,101,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ .byte 197,44,92,211 // vsubps %ymm3,%ymm10,%ymm10
+ .byte 65,184,255,255,127,0 // mov $0x7fffff,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 197,249,112,219,0 // vpshufd $0x0,%xmm3,%xmm3
+ .byte 196,227,101,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ .byte 196,65,100,84,219 // vandps %ymm11,%ymm3,%ymm11
+ .byte 65,184,0,0,0,63 // mov $0x3f000000,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 197,249,112,219,0 // vpshufd $0x0,%xmm3,%xmm3
+ .byte 196,227,101,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ .byte 197,36,86,219 // vorps %ymm3,%ymm11,%ymm11
+ .byte 65,184,42,145,49,64 // mov $0x4031912a,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,227,121,4,219,0 // vpermilps $0x0,%xmm3,%xmm3
+ .byte 196,227,101,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ .byte 197,44,88,211 // vaddps %ymm3,%ymm10,%ymm10
+ .byte 65,184,117,191,191,63 // mov $0x3fbfbf75,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,227,121,4,219,0 // vpermilps $0x0,%xmm3,%xmm3
+ .byte 196,227,101,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ .byte 197,164,89,219 // vmulps %ymm3,%ymm11,%ymm3
+ .byte 197,44,92,211 // vsubps %ymm3,%ymm10,%ymm10
+ .byte 65,184,163,233,220,63 // mov $0x3fdce9a3,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,227,121,4,219,0 // vpermilps $0x0,%xmm3,%xmm3
+ .byte 196,99,101,24,227,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm12
+ .byte 65,184,249,68,180,62 // mov $0x3eb444f9,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,227,121,4,219,0 // vpermilps $0x0,%xmm3,%xmm3
+ .byte 196,227,101,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ .byte 197,164,88,219 // vaddps %ymm3,%ymm11,%ymm3
+ .byte 197,156,94,219 // vdivps %ymm3,%ymm12,%ymm3
+ .byte 197,172,92,219 // vsubps %ymm3,%ymm10,%ymm3
+ .byte 196,98,125,24,16 // vbroadcastss (%rax),%ymm10
+ .byte 197,44,89,219 // vmulps %ymm3,%ymm10,%ymm11
+ .byte 196,67,125,8,211,1 // vroundps $0x1,%ymm11,%ymm10
+ .byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,227,121,4,219,0 // vpermilps $0x0,%xmm3,%xmm3
+ .byte 196,99,101,24,227,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm12
+ .byte 65,184,81,140,242,66 // mov $0x42f28c51,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,227,121,4,219,0 // vpermilps $0x0,%xmm3,%xmm3
+ .byte 196,227,101,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ .byte 196,65,100,88,219 // vaddps %ymm11,%ymm3,%ymm11
+ .byte 65,184,141,188,190,63 // mov $0x3fbebc8d,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,227,121,4,219,0 // vpermilps $0x0,%xmm3,%xmm3
+ .byte 196,227,101,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ .byte 196,193,100,89,218 // vmulps %ymm10,%ymm3,%ymm3
+ .byte 197,36,92,219 // vsubps %ymm3,%ymm11,%ymm11
+ .byte 65,184,254,210,221,65 // mov $0x41ddd2fe,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,227,121,4,219,0 // vpermilps $0x0,%xmm3,%xmm3
+ .byte 196,99,101,24,235,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm13
+ .byte 65,184,248,245,154,64 // mov $0x409af5f8,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,227,121,4,219,0 // vpermilps $0x0,%xmm3,%xmm3
+ .byte 196,227,101,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ .byte 196,193,100,92,218 // vsubps %ymm10,%ymm3,%ymm3
+ .byte 197,148,94,219 // vdivps %ymm3,%ymm13,%ymm3
+ .byte 197,164,88,219 // vaddps %ymm3,%ymm11,%ymm3
+ .byte 197,156,89,219 // vmulps %ymm3,%ymm12,%ymm3
+ .byte 197,253,91,219 // vcvtps2dq %ymm3,%ymm3
+ .byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
+ .byte 196,193,100,88,218 // vaddps %ymm10,%ymm3,%ymm3
+ .byte 196,195,101,74,217,128 // vblendvps %ymm8,%ymm9,%ymm3,%ymm3
+ .byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
+ .byte 196,65,100,95,192 // vmaxps %ymm8,%ymm3,%ymm8
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 197,249,110,216 // vmovd %eax,%xmm3
+ .byte 196,227,121,4,219,0 // vpermilps $0x0,%xmm3,%xmm3
+ .byte 196,227,101,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ .byte 197,188,93,219 // vminps %ymm3,%ymm8,%ymm3
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
HIDDEN _sk_load_a8_avx
.globl _sk_load_a8_avx
FUNCTION(_sk_load_a8_avx)
@@ -13620,7 +15060,7 @@ _sk_load_a8_avx:
.byte 72,139,0 // mov (%rax),%rax
.byte 72,1,248 // add %rdi,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 117,74 // jne 2912 <_sk_load_a8_avx+0x5a>
+ .byte 117,74 // jne 310a <_sk_load_a8_avx+0x5a>
.byte 197,250,126,0 // vmovq (%rax),%xmm0
.byte 196,226,121,49,200 // vpmovzxbd %xmm0,%xmm1
.byte 196,227,121,4,192,229 // vpermilps $0xe5,%xmm0,%xmm0
@@ -13647,9 +15087,9 @@ _sk_load_a8_avx:
.byte 77,9,217 // or %r11,%r9
.byte 72,131,193,8 // add $0x8,%rcx
.byte 73,255,202 // dec %r10
- .byte 117,234 // jne 291a <_sk_load_a8_avx+0x62>
+ .byte 117,234 // jne 3112 <_sk_load_a8_avx+0x62>
.byte 196,193,249,110,193 // vmovq %r9,%xmm0
- .byte 235,149 // jmp 28cc <_sk_load_a8_avx+0x14>
+ .byte 235,149 // jmp 30c4 <_sk_load_a8_avx+0x14>
HIDDEN _sk_gather_a8_avx
.globl _sk_gather_a8_avx
@@ -13730,7 +15170,7 @@ _sk_store_a8_avx:
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 196,65,57,103,192 // vpackuswb %xmm8,%xmm8,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 2a73 <_sk_store_a8_avx+0x42>
+ .byte 117,10 // jne 326b <_sk_store_a8_avx+0x42>
.byte 196,65,123,17,4,57 // vmovsd %xmm8,(%r9,%rdi,1)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -13738,10 +15178,10 @@ _sk_store_a8_avx:
.byte 65,128,224,7 // and $0x7,%r8b
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 2a6f <_sk_store_a8_avx+0x3e>
+ .byte 119,236 // ja 3267 <_sk_store_a8_avx+0x3e>
.byte 196,66,121,48,192 // vpmovzxbw %xmm8,%xmm8
.byte 65,15,182,192 // movzbl %r8b,%eax
- .byte 76,141,5,69,0,0,0 // lea 0x45(%rip),%r8 # 2ad8 <_sk_store_a8_avx+0xa7>
+ .byte 76,141,5,69,0,0,0 // lea 0x45(%rip),%r8 # 32d0 <_sk_store_a8_avx+0xa7>
.byte 73,99,4,128 // movslq (%r8,%rax,4),%rax
.byte 76,1,192 // add %r8,%rax
.byte 255,224 // jmpq *%rax
@@ -13752,7 +15192,7 @@ _sk_store_a8_avx:
.byte 196,67,121,20,68,57,2,4 // vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1)
.byte 196,67,121,20,68,57,1,2 // vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1)
.byte 196,67,121,20,4,57,0 // vpextrb $0x0,%xmm8,(%r9,%rdi,1)
- .byte 235,154 // jmp 2a6f <_sk_store_a8_avx+0x3e>
+ .byte 235,154 // jmp 3267 <_sk_store_a8_avx+0x3e>
.byte 15,31,0 // nopl (%rax)
.byte 244 // hlt
.byte 255 // (bad)
@@ -13787,7 +15227,7 @@ _sk_load_g8_avx:
.byte 72,139,0 // mov (%rax),%rax
.byte 72,1,248 // add %rdi,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 117,91 // jne 2b5f <_sk_load_g8_avx+0x6b>
+ .byte 117,91 // jne 3357 <_sk_load_g8_avx+0x6b>
.byte 197,250,126,0 // vmovq (%rax),%xmm0
.byte 196,226,121,49,200 // vpmovzxbd %xmm0,%xmm1
.byte 196,227,121,4,192,229 // vpermilps $0xe5,%xmm0,%xmm0
@@ -13817,9 +15257,9 @@ _sk_load_g8_avx:
.byte 77,9,217 // or %r11,%r9
.byte 72,131,193,8 // add $0x8,%rcx
.byte 73,255,202 // dec %r10
- .byte 117,234 // jne 2b67 <_sk_load_g8_avx+0x73>
+ .byte 117,234 // jne 335f <_sk_load_g8_avx+0x73>
.byte 196,193,249,110,193 // vmovq %r9,%xmm0
- .byte 235,132 // jmp 2b08 <_sk_load_g8_avx+0x14>
+ .byte 235,132 // jmp 3300 <_sk_load_g8_avx+0x14>
HIDDEN _sk_gather_g8_avx
.globl _sk_gather_g8_avx
@@ -13894,9 +15334,9 @@ _sk_gather_i8_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 73,137,192 // mov %rax,%r8
.byte 77,133,192 // test %r8,%r8
- .byte 116,5 // je 2c9e <_sk_gather_i8_avx+0xf>
+ .byte 116,5 // je 3496 <_sk_gather_i8_avx+0xf>
.byte 76,137,192 // mov %r8,%rax
- .byte 235,2 // jmp 2ca0 <_sk_gather_i8_avx+0x11>
+ .byte 235,2 // jmp 3498 <_sk_gather_i8_avx+0x11>
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 65,87 // push %r15
.byte 65,86 // push %r14
@@ -14001,7 +15441,7 @@ _sk_load_565_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,209,0,0,0 // jne 2f3a <_sk_load_565_avx+0xdf>
+ .byte 15,133,209,0,0,0 // jne 3732 <_sk_load_565_avx+0xdf>
.byte 196,193,122,111,4,122 // vmovdqu (%r10,%rdi,2),%xmm0
.byte 197,241,239,201 // vpxor %xmm1,%xmm1,%xmm1
.byte 197,249,105,201 // vpunpckhwd %xmm1,%xmm0,%xmm1
@@ -14051,9 +15491,9 @@ _sk_load_565_avx:
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 15,135,29,255,255,255 // ja 2e6f <_sk_load_565_avx+0x14>
+ .byte 15,135,29,255,255,255 // ja 3667 <_sk_load_565_avx+0x14>
.byte 69,15,182,192 // movzbl %r8b,%r8d
- .byte 76,141,13,75,0,0,0 // lea 0x4b(%rip),%r9 # 2fa8 <_sk_load_565_avx+0x14d>
+ .byte 76,141,13,75,0,0,0 // lea 0x4b(%rip),%r9 # 37a0 <_sk_load_565_avx+0x14d>
.byte 75,99,4,129 // movslq (%r9,%r8,4),%rax
.byte 76,1,200 // add %r9,%rax
.byte 255,224 // jmpq *%rax
@@ -14065,7 +15505,7 @@ _sk_load_565_avx:
.byte 196,193,121,196,68,122,4,2 // vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,68,122,2,1 // vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,4,122,0 // vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- .byte 233,201,254,255,255 // jmpq 2e6f <_sk_load_565_avx+0x14>
+ .byte 233,201,254,255,255 // jmpq 3667 <_sk_load_565_avx+0x14>
.byte 102,144 // xchg %ax,%ax
.byte 242,255 // repnz (bad)
.byte 255 // (bad)
@@ -14222,7 +15662,7 @@ _sk_store_565_avx:
.byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 31f3 <_sk_store_565_avx+0x9e>
+ .byte 117,10 // jne 39eb <_sk_store_565_avx+0x9e>
.byte 196,65,122,127,4,121 // vmovdqu %xmm8,(%r9,%rdi,2)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -14230,9 +15670,9 @@ _sk_store_565_avx:
.byte 65,128,224,7 // and $0x7,%r8b
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 31ef <_sk_store_565_avx+0x9a>
+ .byte 119,236 // ja 39e7 <_sk_store_565_avx+0x9a>
.byte 65,15,182,192 // movzbl %r8b,%eax
- .byte 76,141,5,66,0,0,0 // lea 0x42(%rip),%r8 # 3250 <_sk_store_565_avx+0xfb>
+ .byte 76,141,5,66,0,0,0 // lea 0x42(%rip),%r8 # 3a48 <_sk_store_565_avx+0xfb>
.byte 73,99,4,128 // movslq (%r8,%rax,4),%rax
.byte 76,1,192 // add %r8,%rax
.byte 255,224 // jmpq *%rax
@@ -14243,7 +15683,7 @@ _sk_store_565_avx:
.byte 196,67,121,21,68,121,4,2 // vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
.byte 196,67,121,21,68,121,2,1 // vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
.byte 196,67,121,21,4,121,0 // vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- .byte 235,159 // jmp 31ef <_sk_store_565_avx+0x9a>
+ .byte 235,159 // jmp 39e7 <_sk_store_565_avx+0x9a>
.byte 247,255 // idiv %edi
.byte 255 // (bad)
.byte 255 // (bad)
@@ -14274,7 +15714,7 @@ _sk_load_4444_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,245,0,0,0 // jne 336f <_sk_load_4444_avx+0x103>
+ .byte 15,133,245,0,0,0 // jne 3b67 <_sk_load_4444_avx+0x103>
.byte 196,193,122,111,4,122 // vmovdqu (%r10,%rdi,2),%xmm0
.byte 197,241,239,201 // vpxor %xmm1,%xmm1,%xmm1
.byte 197,249,105,201 // vpunpckhwd %xmm1,%xmm0,%xmm1
@@ -14331,9 +15771,9 @@ _sk_load_4444_avx:
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 15,135,249,254,255,255 // ja 3280 <_sk_load_4444_avx+0x14>
+ .byte 15,135,249,254,255,255 // ja 3a78 <_sk_load_4444_avx+0x14>
.byte 69,15,182,192 // movzbl %r8b,%r8d
- .byte 76,141,13,74,0,0,0 // lea 0x4a(%rip),%r9 # 33dc <_sk_load_4444_avx+0x170>
+ .byte 76,141,13,74,0,0,0 // lea 0x4a(%rip),%r9 # 3bd4 <_sk_load_4444_avx+0x170>
.byte 75,99,4,129 // movslq (%r9,%r8,4),%rax
.byte 76,1,200 // add %r9,%rax
.byte 255,224 // jmpq *%rax
@@ -14345,12 +15785,12 @@ _sk_load_4444_avx:
.byte 196,193,121,196,68,122,4,2 // vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,68,122,2,1 // vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,4,122,0 // vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- .byte 233,165,254,255,255 // jmpq 3280 <_sk_load_4444_avx+0x14>
+ .byte 233,165,254,255,255 // jmpq 3a78 <_sk_load_4444_avx+0x14>
.byte 144 // nop
.byte 243,255 // repz (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 235,255 // jmp 33e1 <_sk_load_4444_avx+0x175>
+ .byte 235,255 // jmp 3bd9 <_sk_load_4444_avx+0x175>
.byte 255 // (bad)
.byte 255,227 // jmpq *%rbx
.byte 255 // (bad)
@@ -14511,7 +15951,7 @@ _sk_store_4444_avx:
.byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 365c <_sk_store_4444_avx+0xaf>
+ .byte 117,10 // jne 3e54 <_sk_store_4444_avx+0xaf>
.byte 196,65,122,127,4,121 // vmovdqu %xmm8,(%r9,%rdi,2)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -14519,9 +15959,9 @@ _sk_store_4444_avx:
.byte 65,128,224,7 // and $0x7,%r8b
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 3658 <_sk_store_4444_avx+0xab>
+ .byte 119,236 // ja 3e50 <_sk_store_4444_avx+0xab>
.byte 65,15,182,192 // movzbl %r8b,%eax
- .byte 76,141,5,69,0,0,0 // lea 0x45(%rip),%r8 # 36bc <_sk_store_4444_avx+0x10f>
+ .byte 76,141,5,69,0,0,0 // lea 0x45(%rip),%r8 # 3eb4 <_sk_store_4444_avx+0x10f>
.byte 73,99,4,128 // movslq (%r8,%rax,4),%rax
.byte 76,1,192 // add %r8,%rax
.byte 255,224 // jmpq *%rax
@@ -14532,7 +15972,7 @@ _sk_store_4444_avx:
.byte 196,67,121,21,68,121,4,2 // vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
.byte 196,67,121,21,68,121,2,1 // vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
.byte 196,67,121,21,4,121,0 // vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- .byte 235,159 // jmp 3658 <_sk_store_4444_avx+0xab>
+ .byte 235,159 // jmp 3e50 <_sk_store_4444_avx+0xab>
.byte 15,31,0 // nopl (%rax)
.byte 244 // hlt
.byte 255 // (bad)
@@ -14565,7 +16005,7 @@ _sk_load_8888_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,157,0,0,0 // jne 3783 <_sk_load_8888_avx+0xab>
+ .byte 15,133,157,0,0,0 // jne 3f7b <_sk_load_8888_avx+0xab>
.byte 196,65,124,16,12,186 // vmovups (%r10,%rdi,4),%ymm9
.byte 184,255,0,0,0 // mov $0xff,%eax
.byte 197,249,110,192 // vmovd %eax,%xmm0
@@ -14603,9 +16043,9 @@ _sk_load_8888_avx:
.byte 196,65,52,87,201 // vxorps %ymm9,%ymm9,%ymm9
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 15,135,80,255,255,255 // ja 36ec <_sk_load_8888_avx+0x14>
+ .byte 15,135,80,255,255,255 // ja 3ee4 <_sk_load_8888_avx+0x14>
.byte 69,15,182,192 // movzbl %r8b,%r8d
- .byte 76,141,13,137,0,0,0 // lea 0x89(%rip),%r9 # 3830 <_sk_load_8888_avx+0x158>
+ .byte 76,141,13,137,0,0,0 // lea 0x89(%rip),%r9 # 4028 <_sk_load_8888_avx+0x158>
.byte 75,99,4,129 // movslq (%r9,%r8,4),%rax
.byte 76,1,200 // add %r9,%rax
.byte 255,224 // jmpq *%rax
@@ -14628,7 +16068,7 @@ _sk_load_8888_avx:
.byte 196,99,53,12,200,15 // vblendps $0xf,%ymm0,%ymm9,%ymm9
.byte 196,195,49,34,4,186,0 // vpinsrd $0x0,(%r10,%rdi,4),%xmm9,%xmm0
.byte 196,99,53,12,200,15 // vblendps $0xf,%ymm0,%ymm9,%ymm9
- .byte 233,188,254,255,255 // jmpq 36ec <_sk_load_8888_avx+0x14>
+ .byte 233,188,254,255,255 // jmpq 3ee4 <_sk_load_8888_avx+0x14>
.byte 238 // out %al,(%dx)
.byte 255 // (bad)
.byte 255 // (bad)
@@ -14758,7 +16198,7 @@ _sk_store_8888_avx:
.byte 196,65,45,86,192 // vorpd %ymm8,%ymm10,%ymm8
.byte 196,65,53,86,192 // vorpd %ymm8,%ymm9,%ymm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 3a31 <_sk_store_8888_avx+0xa4>
+ .byte 117,10 // jne 4229 <_sk_store_8888_avx+0xa4>
.byte 196,65,124,17,4,185 // vmovups %ymm8,(%r9,%rdi,4)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -14766,9 +16206,9 @@ _sk_store_8888_avx:
.byte 65,128,224,7 // and $0x7,%r8b
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 3a2d <_sk_store_8888_avx+0xa0>
+ .byte 119,236 // ja 4225 <_sk_store_8888_avx+0xa0>
.byte 65,15,182,192 // movzbl %r8b,%eax
- .byte 76,141,5,84,0,0,0 // lea 0x54(%rip),%r8 # 3aa0 <_sk_store_8888_avx+0x113>
+ .byte 76,141,5,84,0,0,0 // lea 0x54(%rip),%r8 # 4298 <_sk_store_8888_avx+0x113>
.byte 73,99,4,128 // movslq (%r8,%rax,4),%rax
.byte 76,1,192 // add %r8,%rax
.byte 255,224 // jmpq *%rax
@@ -14782,7 +16222,7 @@ _sk_store_8888_avx:
.byte 196,67,121,22,68,185,8,2 // vpextrd $0x2,%xmm8,0x8(%r9,%rdi,4)
.byte 196,67,121,22,68,185,4,1 // vpextrd $0x1,%xmm8,0x4(%r9,%rdi,4)
.byte 196,65,121,126,4,185 // vmovd %xmm8,(%r9,%rdi,4)
- .byte 235,143 // jmp 3a2d <_sk_store_8888_avx+0xa0>
+ .byte 235,143 // jmp 4225 <_sk_store_8888_avx+0xa0>
.byte 102,144 // xchg %ax,%ax
.byte 246,255 // idiv %bh
.byte 255 // (bad)
@@ -14814,7 +16254,7 @@ _sk_load_f16_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 72,139,0 // mov (%rax),%rax
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,17,1,0,0 // jne 3bdb <_sk_load_f16_avx+0x11f>
+ .byte 15,133,17,1,0,0 // jne 43d3 <_sk_load_f16_avx+0x11f>
.byte 197,121,16,4,248 // vmovupd (%rax,%rdi,8),%xmm8
.byte 197,249,16,84,248,16 // vmovupd 0x10(%rax,%rdi,8),%xmm2
.byte 197,249,16,92,248,32 // vmovupd 0x20(%rax,%rdi,8),%xmm3
@@ -14876,29 +16316,29 @@ _sk_load_f16_avx:
.byte 197,123,16,4,248 // vmovsd (%rax,%rdi,8),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,79 // je 3c3a <_sk_load_f16_avx+0x17e>
+ .byte 116,79 // je 4432 <_sk_load_f16_avx+0x17e>
.byte 197,57,22,68,248,8 // vmovhpd 0x8(%rax,%rdi,8),%xmm8,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,67 // jb 3c3a <_sk_load_f16_avx+0x17e>
+ .byte 114,67 // jb 4432 <_sk_load_f16_avx+0x17e>
.byte 197,251,16,84,248,16 // vmovsd 0x10(%rax,%rdi,8),%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 116,68 // je 3c47 <_sk_load_f16_avx+0x18b>
+ .byte 116,68 // je 443f <_sk_load_f16_avx+0x18b>
.byte 197,233,22,84,248,24 // vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,56 // jb 3c47 <_sk_load_f16_avx+0x18b>
+ .byte 114,56 // jb 443f <_sk_load_f16_avx+0x18b>
.byte 197,251,16,92,248,32 // vmovsd 0x20(%rax,%rdi,8),%xmm3
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 15,132,194,254,255,255 // je 3ae1 <_sk_load_f16_avx+0x25>
+ .byte 15,132,194,254,255,255 // je 42d9 <_sk_load_f16_avx+0x25>
.byte 197,225,22,92,248,40 // vmovhpd 0x28(%rax,%rdi,8),%xmm3,%xmm3
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 15,130,178,254,255,255 // jb 3ae1 <_sk_load_f16_avx+0x25>
+ .byte 15,130,178,254,255,255 // jb 42d9 <_sk_load_f16_avx+0x25>
.byte 197,122,126,76,248,48 // vmovq 0x30(%rax,%rdi,8),%xmm9
- .byte 233,167,254,255,255 // jmpq 3ae1 <_sk_load_f16_avx+0x25>
+ .byte 233,167,254,255,255 // jmpq 42d9 <_sk_load_f16_avx+0x25>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
.byte 197,233,87,210 // vxorpd %xmm2,%xmm2,%xmm2
- .byte 233,154,254,255,255 // jmpq 3ae1 <_sk_load_f16_avx+0x25>
+ .byte 233,154,254,255,255 // jmpq 42d9 <_sk_load_f16_avx+0x25>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
- .byte 233,145,254,255,255 // jmpq 3ae1 <_sk_load_f16_avx+0x25>
+ .byte 233,145,254,255,255 // jmpq 42d9 <_sk_load_f16_avx+0x25>
HIDDEN _sk_gather_f16_avx
.globl _sk_gather_f16_avx
@@ -15042,7 +16482,7 @@ _sk_store_f16_avx:
.byte 196,65,17,98,200 // vpunpckldq %xmm8,%xmm13,%xmm9
.byte 196,65,17,106,192 // vpunpckhdq %xmm8,%xmm13,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,31 // jne 3ed0 <_sk_store_f16_avx+0xd2>
+ .byte 117,31 // jne 46c8 <_sk_store_f16_avx+0xd2>
.byte 196,65,120,17,28,248 // vmovups %xmm11,(%r8,%rdi,8)
.byte 196,65,120,17,84,248,16 // vmovups %xmm10,0x10(%r8,%rdi,8)
.byte 196,65,120,17,76,248,32 // vmovups %xmm9,0x20(%r8,%rdi,8)
@@ -15051,22 +16491,22 @@ _sk_store_f16_avx:
.byte 255,224 // jmpq *%rax
.byte 196,65,121,214,28,248 // vmovq %xmm11,(%r8,%rdi,8)
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,240 // je 3ecc <_sk_store_f16_avx+0xce>
+ .byte 116,240 // je 46c4 <_sk_store_f16_avx+0xce>
.byte 196,65,121,23,92,248,8 // vmovhpd %xmm11,0x8(%r8,%rdi,8)
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,227 // jb 3ecc <_sk_store_f16_avx+0xce>
+ .byte 114,227 // jb 46c4 <_sk_store_f16_avx+0xce>
.byte 196,65,121,214,84,248,16 // vmovq %xmm10,0x10(%r8,%rdi,8)
- .byte 116,218 // je 3ecc <_sk_store_f16_avx+0xce>
+ .byte 116,218 // je 46c4 <_sk_store_f16_avx+0xce>
.byte 196,65,121,23,84,248,24 // vmovhpd %xmm10,0x18(%r8,%rdi,8)
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,205 // jb 3ecc <_sk_store_f16_avx+0xce>
+ .byte 114,205 // jb 46c4 <_sk_store_f16_avx+0xce>
.byte 196,65,121,214,76,248,32 // vmovq %xmm9,0x20(%r8,%rdi,8)
- .byte 116,196 // je 3ecc <_sk_store_f16_avx+0xce>
+ .byte 116,196 // je 46c4 <_sk_store_f16_avx+0xce>
.byte 196,65,121,23,76,248,40 // vmovhpd %xmm9,0x28(%r8,%rdi,8)
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,183 // jb 3ecc <_sk_store_f16_avx+0xce>
+ .byte 114,183 // jb 46c4 <_sk_store_f16_avx+0xce>
.byte 196,65,121,214,68,248,48 // vmovq %xmm8,0x30(%r8,%rdi,8)
- .byte 235,174 // jmp 3ecc <_sk_store_f16_avx+0xce>
+ .byte 235,174 // jmp 46c4 <_sk_store_f16_avx+0xce>
HIDDEN _sk_load_u16_be_avx
.globl _sk_load_u16_be_avx
@@ -15076,7 +16516,7 @@ _sk_load_u16_be_avx:
.byte 76,139,0 // mov (%rax),%r8
.byte 72,141,4,189,0,0,0,0 // lea 0x0(,%rdi,4),%rax
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,5,1,0,0 // jne 4039 <_sk_load_u16_be_avx+0x11b>
+ .byte 15,133,5,1,0,0 // jne 4831 <_sk_load_u16_be_avx+0x11b>
.byte 196,65,121,16,4,64 // vmovupd (%r8,%rax,2),%xmm8
.byte 196,193,121,16,84,64,16 // vmovupd 0x10(%r8,%rax,2),%xmm2
.byte 196,193,121,16,92,64,32 // vmovupd 0x20(%r8,%rax,2),%xmm3
@@ -15135,29 +16575,29 @@ _sk_load_u16_be_avx:
.byte 196,65,123,16,4,64 // vmovsd (%r8,%rax,2),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,85 // je 409f <_sk_load_u16_be_avx+0x181>
+ .byte 116,85 // je 4897 <_sk_load_u16_be_avx+0x181>
.byte 196,65,57,22,68,64,8 // vmovhpd 0x8(%r8,%rax,2),%xmm8,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,72 // jb 409f <_sk_load_u16_be_avx+0x181>
+ .byte 114,72 // jb 4897 <_sk_load_u16_be_avx+0x181>
.byte 196,193,123,16,84,64,16 // vmovsd 0x10(%r8,%rax,2),%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 116,72 // je 40ac <_sk_load_u16_be_avx+0x18e>
+ .byte 116,72 // je 48a4 <_sk_load_u16_be_avx+0x18e>
.byte 196,193,105,22,84,64,24 // vmovhpd 0x18(%r8,%rax,2),%xmm2,%xmm2
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,59 // jb 40ac <_sk_load_u16_be_avx+0x18e>
+ .byte 114,59 // jb 48a4 <_sk_load_u16_be_avx+0x18e>
.byte 196,193,123,16,92,64,32 // vmovsd 0x20(%r8,%rax,2),%xmm3
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 15,132,205,254,255,255 // je 3f4f <_sk_load_u16_be_avx+0x31>
+ .byte 15,132,205,254,255,255 // je 4747 <_sk_load_u16_be_avx+0x31>
.byte 196,193,97,22,92,64,40 // vmovhpd 0x28(%r8,%rax,2),%xmm3,%xmm3
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 15,130,188,254,255,255 // jb 3f4f <_sk_load_u16_be_avx+0x31>
+ .byte 15,130,188,254,255,255 // jb 4747 <_sk_load_u16_be_avx+0x31>
.byte 196,65,122,126,76,64,48 // vmovq 0x30(%r8,%rax,2),%xmm9
- .byte 233,176,254,255,255 // jmpq 3f4f <_sk_load_u16_be_avx+0x31>
+ .byte 233,176,254,255,255 // jmpq 4747 <_sk_load_u16_be_avx+0x31>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
.byte 197,233,87,210 // vxorpd %xmm2,%xmm2,%xmm2
- .byte 233,163,254,255,255 // jmpq 3f4f <_sk_load_u16_be_avx+0x31>
+ .byte 233,163,254,255,255 // jmpq 4747 <_sk_load_u16_be_avx+0x31>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
- .byte 233,154,254,255,255 // jmpq 3f4f <_sk_load_u16_be_avx+0x31>
+ .byte 233,154,254,255,255 // jmpq 4747 <_sk_load_u16_be_avx+0x31>
HIDDEN _sk_load_rgb_u16_be_avx
.globl _sk_load_rgb_u16_be_avx
@@ -15167,7 +16607,7 @@ _sk_load_rgb_u16_be_avx:
.byte 76,139,0 // mov (%rax),%r8
.byte 72,141,4,127 // lea (%rdi,%rdi,2),%rax
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,8,1,0,0 // jne 41cf <_sk_load_rgb_u16_be_avx+0x11a>
+ .byte 15,133,8,1,0,0 // jne 49c7 <_sk_load_rgb_u16_be_avx+0x11a>
.byte 196,193,122,111,4,64 // vmovdqu (%r8,%rax,2),%xmm0
.byte 196,193,122,111,84,64,12 // vmovdqu 0xc(%r8,%rax,2),%xmm2
.byte 196,193,122,111,76,64,24 // vmovdqu 0x18(%r8,%rax,2),%xmm1
@@ -15226,36 +16666,36 @@ _sk_load_rgb_u16_be_avx:
.byte 196,193,121,110,4,64 // vmovd (%r8,%rax,2),%xmm0
.byte 196,193,121,196,68,64,4,2 // vpinsrw $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 117,5 // jne 41e8 <_sk_load_rgb_u16_be_avx+0x133>
- .byte 233,19,255,255,255 // jmpq 40fb <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 117,5 // jne 49e0 <_sk_load_rgb_u16_be_avx+0x133>
+ .byte 233,19,255,255,255 // jmpq 48f3 <_sk_load_rgb_u16_be_avx+0x46>
.byte 196,193,121,110,76,64,6 // vmovd 0x6(%r8,%rax,2),%xmm1
.byte 196,65,113,196,68,64,10,2 // vpinsrw $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,26 // jb 4217 <_sk_load_rgb_u16_be_avx+0x162>
+ .byte 114,26 // jb 4a0f <_sk_load_rgb_u16_be_avx+0x162>
.byte 196,193,121,110,76,64,12 // vmovd 0xc(%r8,%rax,2),%xmm1
.byte 196,193,113,196,84,64,16,2 // vpinsrw $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 117,10 // jne 421c <_sk_load_rgb_u16_be_avx+0x167>
- .byte 233,228,254,255,255 // jmpq 40fb <_sk_load_rgb_u16_be_avx+0x46>
- .byte 233,223,254,255,255 // jmpq 40fb <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 117,10 // jne 4a14 <_sk_load_rgb_u16_be_avx+0x167>
+ .byte 233,228,254,255,255 // jmpq 48f3 <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 233,223,254,255,255 // jmpq 48f3 <_sk_load_rgb_u16_be_avx+0x46>
.byte 196,193,121,110,76,64,18 // vmovd 0x12(%r8,%rax,2),%xmm1
.byte 196,65,113,196,76,64,22,2 // vpinsrw $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,26 // jb 424b <_sk_load_rgb_u16_be_avx+0x196>
+ .byte 114,26 // jb 4a43 <_sk_load_rgb_u16_be_avx+0x196>
.byte 196,193,121,110,76,64,24 // vmovd 0x18(%r8,%rax,2),%xmm1
.byte 196,193,113,196,76,64,28,2 // vpinsrw $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 117,10 // jne 4250 <_sk_load_rgb_u16_be_avx+0x19b>
- .byte 233,176,254,255,255 // jmpq 40fb <_sk_load_rgb_u16_be_avx+0x46>
- .byte 233,171,254,255,255 // jmpq 40fb <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 117,10 // jne 4a48 <_sk_load_rgb_u16_be_avx+0x19b>
+ .byte 233,176,254,255,255 // jmpq 48f3 <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 233,171,254,255,255 // jmpq 48f3 <_sk_load_rgb_u16_be_avx+0x46>
.byte 196,193,121,110,92,64,30 // vmovd 0x1e(%r8,%rax,2),%xmm3
.byte 196,65,97,196,92,64,34,2 // vpinsrw $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,20 // jb 4279 <_sk_load_rgb_u16_be_avx+0x1c4>
+ .byte 114,20 // jb 4a71 <_sk_load_rgb_u16_be_avx+0x1c4>
.byte 196,193,121,110,92,64,36 // vmovd 0x24(%r8,%rax,2),%xmm3
.byte 196,193,97,196,92,64,40,2 // vpinsrw $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3
- .byte 233,130,254,255,255 // jmpq 40fb <_sk_load_rgb_u16_be_avx+0x46>
- .byte 233,125,254,255,255 // jmpq 40fb <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 233,130,254,255,255 // jmpq 48f3 <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 233,125,254,255,255 // jmpq 48f3 <_sk_load_rgb_u16_be_avx+0x46>
HIDDEN _sk_store_u16_be_avx
.globl _sk_store_u16_be_avx
@@ -15305,7 +16745,7 @@ _sk_store_u16_be_avx:
.byte 196,65,17,98,200 // vpunpckldq %xmm8,%xmm13,%xmm9
.byte 196,65,17,106,192 // vpunpckhdq %xmm8,%xmm13,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,31 // jne 4380 <_sk_store_u16_be_avx+0x102>
+ .byte 117,31 // jne 4b78 <_sk_store_u16_be_avx+0x102>
.byte 196,1,120,17,28,72 // vmovups %xmm11,(%r8,%r9,2)
.byte 196,1,120,17,84,72,16 // vmovups %xmm10,0x10(%r8,%r9,2)
.byte 196,1,120,17,76,72,32 // vmovups %xmm9,0x20(%r8,%r9,2)
@@ -15314,22 +16754,22 @@ _sk_store_u16_be_avx:
.byte 255,224 // jmpq *%rax
.byte 196,1,121,214,28,72 // vmovq %xmm11,(%r8,%r9,2)
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,240 // je 437c <_sk_store_u16_be_avx+0xfe>
+ .byte 116,240 // je 4b74 <_sk_store_u16_be_avx+0xfe>
.byte 196,1,121,23,92,72,8 // vmovhpd %xmm11,0x8(%r8,%r9,2)
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,227 // jb 437c <_sk_store_u16_be_avx+0xfe>
+ .byte 114,227 // jb 4b74 <_sk_store_u16_be_avx+0xfe>
.byte 196,1,121,214,84,72,16 // vmovq %xmm10,0x10(%r8,%r9,2)
- .byte 116,218 // je 437c <_sk_store_u16_be_avx+0xfe>
+ .byte 116,218 // je 4b74 <_sk_store_u16_be_avx+0xfe>
.byte 196,1,121,23,84,72,24 // vmovhpd %xmm10,0x18(%r8,%r9,2)
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,205 // jb 437c <_sk_store_u16_be_avx+0xfe>
+ .byte 114,205 // jb 4b74 <_sk_store_u16_be_avx+0xfe>
.byte 196,1,121,214,76,72,32 // vmovq %xmm9,0x20(%r8,%r9,2)
- .byte 116,196 // je 437c <_sk_store_u16_be_avx+0xfe>
+ .byte 116,196 // je 4b74 <_sk_store_u16_be_avx+0xfe>
.byte 196,1,121,23,76,72,40 // vmovhpd %xmm9,0x28(%r8,%r9,2)
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,183 // jb 437c <_sk_store_u16_be_avx+0xfe>
+ .byte 114,183 // jb 4b74 <_sk_store_u16_be_avx+0xfe>
.byte 196,1,121,214,68,72,48 // vmovq %xmm8,0x30(%r8,%r9,2)
- .byte 235,174 // jmp 437c <_sk_store_u16_be_avx+0xfe>
+ .byte 235,174 // jmp 4b74 <_sk_store_u16_be_avx+0xfe>
HIDDEN _sk_load_f32_avx
.globl _sk_load_f32_avx
@@ -15337,10 +16777,10 @@ FUNCTION(_sk_load_f32_avx)
_sk_load_f32_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 119,110 // ja 4444 <_sk_load_f32_avx+0x76>
+ .byte 119,110 // ja 4c3c <_sk_load_f32_avx+0x76>
.byte 76,139,0 // mov (%rax),%r8
.byte 76,141,12,189,0,0,0,0 // lea 0x0(,%rdi,4),%r9
- .byte 76,141,21,132,0,0,0 // lea 0x84(%rip),%r10 # 446c <_sk_load_f32_avx+0x9e>
+ .byte 76,141,21,132,0,0,0 // lea 0x84(%rip),%r10 # 4c64 <_sk_load_f32_avx+0x9e>
.byte 73,99,4,138 // movslq (%r10,%rcx,4),%rax
.byte 76,1,208 // add %r10,%rax
.byte 255,224 // jmpq *%rax
@@ -15399,7 +16839,7 @@ _sk_store_f32_avx:
.byte 196,65,37,20,196 // vunpcklpd %ymm12,%ymm11,%ymm8
.byte 196,65,37,21,220 // vunpckhpd %ymm12,%ymm11,%ymm11
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,55 // jne 44f9 <_sk_store_f32_avx+0x6d>
+ .byte 117,55 // jne 4cf1 <_sk_store_f32_avx+0x6d>
.byte 196,67,45,24,225,1 // vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
.byte 196,67,61,24,235,1 // vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
.byte 196,67,45,6,201,49 // vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
@@ -15412,22 +16852,22 @@ _sk_store_f32_avx:
.byte 255,224 // jmpq *%rax
.byte 196,65,121,17,20,128 // vmovupd %xmm10,(%r8,%rax,4)
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,240 // je 44f5 <_sk_store_f32_avx+0x69>
+ .byte 116,240 // je 4ced <_sk_store_f32_avx+0x69>
.byte 196,65,121,17,76,128,16 // vmovupd %xmm9,0x10(%r8,%rax,4)
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,227 // jb 44f5 <_sk_store_f32_avx+0x69>
+ .byte 114,227 // jb 4ced <_sk_store_f32_avx+0x69>
.byte 196,65,121,17,68,128,32 // vmovupd %xmm8,0x20(%r8,%rax,4)
- .byte 116,218 // je 44f5 <_sk_store_f32_avx+0x69>
+ .byte 116,218 // je 4ced <_sk_store_f32_avx+0x69>
.byte 196,65,121,17,92,128,48 // vmovupd %xmm11,0x30(%r8,%rax,4)
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,205 // jb 44f5 <_sk_store_f32_avx+0x69>
+ .byte 114,205 // jb 4ced <_sk_store_f32_avx+0x69>
.byte 196,67,125,25,84,128,64,1 // vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4)
- .byte 116,195 // je 44f5 <_sk_store_f32_avx+0x69>
+ .byte 116,195 // je 4ced <_sk_store_f32_avx+0x69>
.byte 196,67,125,25,76,128,80,1 // vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4)
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,181 // jb 44f5 <_sk_store_f32_avx+0x69>
+ .byte 114,181 // jb 4ced <_sk_store_f32_avx+0x69>
.byte 196,67,125,25,68,128,96,1 // vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4)
- .byte 235,171 // jmp 44f5 <_sk_store_f32_avx+0x69>
+ .byte 235,171 // jmp 4ced <_sk_store_f32_avx+0x69>
HIDDEN _sk_clamp_x_avx
.globl _sk_clamp_x_avx
@@ -15755,7 +17195,7 @@ _sk_linear_gradient_avx:
.byte 196,226,125,24,88,28 // vbroadcastss 0x1c(%rax),%ymm3
.byte 76,139,0 // mov (%rax),%r8
.byte 77,133,192 // test %r8,%r8
- .byte 15,132,146,0,0,0 // je 4aad <_sk_linear_gradient_avx+0xb8>
+ .byte 15,132,146,0,0,0 // je 52a5 <_sk_linear_gradient_avx+0xb8>
.byte 72,139,64,8 // mov 0x8(%rax),%rax
.byte 72,131,192,32 // add $0x20,%rax
.byte 196,65,28,87,228 // vxorps %ymm12,%ymm12,%ymm12
@@ -15782,8 +17222,8 @@ _sk_linear_gradient_avx:
.byte 196,227,13,74,219,208 // vblendvps %ymm13,%ymm3,%ymm14,%ymm3
.byte 72,131,192,36 // add $0x24,%rax
.byte 73,255,200 // dec %r8
- .byte 117,140 // jne 4a37 <_sk_linear_gradient_avx+0x42>
- .byte 235,20 // jmp 4ac1 <_sk_linear_gradient_avx+0xcc>
+ .byte 117,140 // jne 522f <_sk_linear_gradient_avx+0x42>
+ .byte 235,20 // jmp 52b9 <_sk_linear_gradient_avx+0xcc>
.byte 196,65,36,87,219 // vxorps %ymm11,%ymm11,%ymm11
.byte 196,65,44,87,210 // vxorps %ymm10,%ymm10,%ymm10
.byte 196,65,52,87,201 // vxorps %ymm9,%ymm9,%ymm9
@@ -18479,6 +19919,400 @@ _sk_table_a_sse41:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
+HIDDEN _sk_parametric_r_sse41
+.globl _sk_parametric_r_sse41
+FUNCTION(_sk_parametric_r_sse41)
+_sk_parametric_r_sse41:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 243,68,15,16,64,16 // movss 0x10(%rax),%xmm8
+ .byte 69,15,198,192,0 // shufps $0x0,%xmm8,%xmm8
+ .byte 243,68,15,16,72,12 // movss 0xc(%rax),%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 68,15,89,200 // mulps %xmm0,%xmm9
+ .byte 243,68,15,16,80,4 // movss 0x4(%rax),%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 68,15,89,208 // mulps %xmm0,%xmm10
+ .byte 65,15,194,192,2 // cmpleps %xmm8,%xmm0
+ .byte 243,68,15,16,64,24 // movss 0x18(%rax),%xmm8
+ .byte 69,15,198,192,0 // shufps $0x0,%xmm8,%xmm8
+ .byte 69,15,88,200 // addps %xmm8,%xmm9
+ .byte 243,68,15,16,0 // movss (%rax),%xmm8
+ .byte 243,68,15,16,88,8 // movss 0x8(%rax),%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 69,15,88,211 // addps %xmm11,%xmm10
+ .byte 69,15,91,218 // cvtdq2ps %xmm10,%xmm11
+ .byte 185,0,0,0,52 // mov $0x34000000,%ecx
+ .byte 102,68,15,110,225 // movd %ecx,%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,89,227 // mulps %xmm11,%xmm12
+ .byte 185,0,0,254,66 // mov $0x42fe0000,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 69,15,92,227 // subps %xmm11,%xmm12
+ .byte 185,255,255,127,0 // mov $0x7fffff,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 102,69,15,112,219,0 // pshufd $0x0,%xmm11,%xmm11
+ .byte 102,69,15,219,218 // pand %xmm10,%xmm11
+ .byte 185,0,0,0,63 // mov $0x3f000000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 102,69,15,112,210,0 // pshufd $0x0,%xmm10,%xmm10
+ .byte 102,69,15,235,211 // por %xmm11,%xmm10
+ .byte 185,42,145,49,64 // mov $0x4031912a,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 69,15,88,220 // addps %xmm12,%xmm11
+ .byte 185,117,191,191,63 // mov $0x3fbfbf75,%ecx
+ .byte 102,68,15,110,225 // movd %ecx,%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,89,226 // mulps %xmm10,%xmm12
+ .byte 69,15,92,220 // subps %xmm12,%xmm11
+ .byte 185,163,233,220,63 // mov $0x3fdce9a3,%ecx
+ .byte 102,68,15,110,225 // movd %ecx,%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 185,249,68,180,62 // mov $0x3eb444f9,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,88,234 // addps %xmm10,%xmm13
+ .byte 69,15,94,229 // divps %xmm13,%xmm12
+ .byte 69,15,92,220 // subps %xmm12,%xmm11
+ .byte 69,15,198,192,0 // shufps $0x0,%xmm8,%xmm8
+ .byte 69,15,89,195 // mulps %xmm11,%xmm8
+ .byte 102,69,15,58,8,216,1 // roundps $0x1,%xmm8,%xmm11
+ .byte 185,0,0,0,75 // mov $0x4b000000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 185,81,140,242,66 // mov $0x42f28c51,%ecx
+ .byte 102,68,15,110,225 // movd %ecx,%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,88,224 // addps %xmm8,%xmm12
+ .byte 69,15,92,195 // subps %xmm11,%xmm8
+ .byte 185,141,188,190,63 // mov $0x3fbebc8d,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 69,15,89,216 // mulps %xmm8,%xmm11
+ .byte 69,15,92,227 // subps %xmm11,%xmm12
+ .byte 185,254,210,221,65 // mov $0x41ddd2fe,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 185,248,245,154,64 // mov $0x409af5f8,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,92,232 // subps %xmm8,%xmm13
+ .byte 69,15,94,221 // divps %xmm13,%xmm11
+ .byte 69,15,88,220 // addps %xmm12,%xmm11
+ .byte 69,15,89,218 // mulps %xmm10,%xmm11
+ .byte 102,69,15,91,211 // cvtps2dq %xmm11,%xmm10
+ .byte 243,68,15,16,64,20 // movss 0x14(%rax),%xmm8
+ .byte 69,15,198,192,0 // shufps $0x0,%xmm8,%xmm8
+ .byte 69,15,88,194 // addps %xmm10,%xmm8
+ .byte 102,69,15,56,20,193 // blendvps %xmm0,%xmm9,%xmm8
+ .byte 15,87,192 // xorps %xmm0,%xmm0
+ .byte 68,15,95,192 // maxps %xmm0,%xmm8
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 102,15,110,192 // movd %eax,%xmm0
+ .byte 15,198,192,0 // shufps $0x0,%xmm0,%xmm0
+ .byte 68,15,93,192 // minps %xmm0,%xmm8
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 65,15,40,192 // movaps %xmm8,%xmm0
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_parametric_g_sse41
+.globl _sk_parametric_g_sse41
+FUNCTION(_sk_parametric_g_sse41)
+_sk_parametric_g_sse41:
+ .byte 68,15,40,192 // movaps %xmm0,%xmm8
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 243,68,15,16,80,16 // movss 0x10(%rax),%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 243,68,15,16,72,12 // movss 0xc(%rax),%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 68,15,89,201 // mulps %xmm1,%xmm9
+ .byte 243,68,15,16,88,4 // movss 0x4(%rax),%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 68,15,89,217 // mulps %xmm1,%xmm11
+ .byte 15,40,193 // movaps %xmm1,%xmm0
+ .byte 65,15,194,194,2 // cmpleps %xmm10,%xmm0
+ .byte 243,15,16,72,24 // movss 0x18(%rax),%xmm1
+ .byte 15,198,201,0 // shufps $0x0,%xmm1,%xmm1
+ .byte 68,15,88,201 // addps %xmm1,%xmm9
+ .byte 243,68,15,16,32 // movss (%rax),%xmm12
+ .byte 243,15,16,72,8 // movss 0x8(%rax),%xmm1
+ .byte 15,198,201,0 // shufps $0x0,%xmm1,%xmm1
+ .byte 68,15,88,217 // addps %xmm1,%xmm11
+ .byte 69,15,91,211 // cvtdq2ps %xmm11,%xmm10
+ .byte 185,0,0,0,52 // mov $0x34000000,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,89,234 // mulps %xmm10,%xmm13
+ .byte 185,0,0,254,66 // mov $0x42fe0000,%ecx
+ .byte 102,15,110,201 // movd %ecx,%xmm1
+ .byte 15,198,201,0 // shufps $0x0,%xmm1,%xmm1
+ .byte 68,15,92,233 // subps %xmm1,%xmm13
+ .byte 185,255,255,127,0 // mov $0x7fffff,%ecx
+ .byte 102,15,110,201 // movd %ecx,%xmm1
+ .byte 102,68,15,112,209,0 // pshufd $0x0,%xmm1,%xmm10
+ .byte 102,69,15,219,211 // pand %xmm11,%xmm10
+ .byte 185,0,0,0,63 // mov $0x3f000000,%ecx
+ .byte 102,15,110,201 // movd %ecx,%xmm1
+ .byte 102,68,15,112,217,0 // pshufd $0x0,%xmm1,%xmm11
+ .byte 102,69,15,235,218 // por %xmm10,%xmm11
+ .byte 185,42,145,49,64 // mov $0x4031912a,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 69,15,88,213 // addps %xmm13,%xmm10
+ .byte 185,117,191,191,63 // mov $0x3fbfbf75,%ecx
+ .byte 102,15,110,201 // movd %ecx,%xmm1
+ .byte 15,198,201,0 // shufps $0x0,%xmm1,%xmm1
+ .byte 65,15,89,203 // mulps %xmm11,%xmm1
+ .byte 68,15,92,209 // subps %xmm1,%xmm10
+ .byte 185,163,233,220,63 // mov $0x3fdce9a3,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 185,249,68,180,62 // mov $0x3eb444f9,%ecx
+ .byte 102,15,110,201 // movd %ecx,%xmm1
+ .byte 15,198,201,0 // shufps $0x0,%xmm1,%xmm1
+ .byte 65,15,88,203 // addps %xmm11,%xmm1
+ .byte 68,15,94,233 // divps %xmm1,%xmm13
+ .byte 69,15,92,213 // subps %xmm13,%xmm10
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,89,226 // mulps %xmm10,%xmm12
+ .byte 102,69,15,58,8,220,1 // roundps $0x1,%xmm12,%xmm11
+ .byte 185,0,0,0,75 // mov $0x4b000000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 185,81,140,242,66 // mov $0x42f28c51,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,88,236 // addps %xmm12,%xmm13
+ .byte 69,15,92,227 // subps %xmm11,%xmm12
+ .byte 185,141,188,190,63 // mov $0x3fbebc8d,%ecx
+ .byte 102,15,110,201 // movd %ecx,%xmm1
+ .byte 15,198,201,0 // shufps $0x0,%xmm1,%xmm1
+ .byte 65,15,89,204 // mulps %xmm12,%xmm1
+ .byte 68,15,92,233 // subps %xmm1,%xmm13
+ .byte 185,254,210,221,65 // mov $0x41ddd2fe,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 185,248,245,154,64 // mov $0x409af5f8,%ecx
+ .byte 102,15,110,201 // movd %ecx,%xmm1
+ .byte 15,198,201,0 // shufps $0x0,%xmm1,%xmm1
+ .byte 65,15,92,204 // subps %xmm12,%xmm1
+ .byte 68,15,94,217 // divps %xmm1,%xmm11
+ .byte 69,15,88,221 // addps %xmm13,%xmm11
+ .byte 69,15,89,218 // mulps %xmm10,%xmm11
+ .byte 102,69,15,91,211 // cvtps2dq %xmm11,%xmm10
+ .byte 243,15,16,72,20 // movss 0x14(%rax),%xmm1
+ .byte 15,198,201,0 // shufps $0x0,%xmm1,%xmm1
+ .byte 65,15,88,202 // addps %xmm10,%xmm1
+ .byte 102,65,15,56,20,201 // blendvps %xmm0,%xmm9,%xmm1
+ .byte 15,87,192 // xorps %xmm0,%xmm0
+ .byte 15,95,200 // maxps %xmm0,%xmm1
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 102,15,110,192 // movd %eax,%xmm0
+ .byte 15,198,192,0 // shufps $0x0,%xmm0,%xmm0
+ .byte 15,93,200 // minps %xmm0,%xmm1
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 65,15,40,192 // movaps %xmm8,%xmm0
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_parametric_b_sse41
+.globl _sk_parametric_b_sse41
+FUNCTION(_sk_parametric_b_sse41)
+_sk_parametric_b_sse41:
+ .byte 68,15,40,192 // movaps %xmm0,%xmm8
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 243,68,15,16,80,16 // movss 0x10(%rax),%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 243,68,15,16,72,12 // movss 0xc(%rax),%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 68,15,89,202 // mulps %xmm2,%xmm9
+ .byte 243,68,15,16,88,4 // movss 0x4(%rax),%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 68,15,89,218 // mulps %xmm2,%xmm11
+ .byte 15,40,194 // movaps %xmm2,%xmm0
+ .byte 65,15,194,194,2 // cmpleps %xmm10,%xmm0
+ .byte 243,15,16,80,24 // movss 0x18(%rax),%xmm2
+ .byte 15,198,210,0 // shufps $0x0,%xmm2,%xmm2
+ .byte 68,15,88,202 // addps %xmm2,%xmm9
+ .byte 243,68,15,16,32 // movss (%rax),%xmm12
+ .byte 243,15,16,80,8 // movss 0x8(%rax),%xmm2
+ .byte 15,198,210,0 // shufps $0x0,%xmm2,%xmm2
+ .byte 68,15,88,218 // addps %xmm2,%xmm11
+ .byte 69,15,91,211 // cvtdq2ps %xmm11,%xmm10
+ .byte 185,0,0,0,52 // mov $0x34000000,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,89,234 // mulps %xmm10,%xmm13
+ .byte 185,0,0,254,66 // mov $0x42fe0000,%ecx
+ .byte 102,15,110,209 // movd %ecx,%xmm2
+ .byte 15,198,210,0 // shufps $0x0,%xmm2,%xmm2
+ .byte 68,15,92,234 // subps %xmm2,%xmm13
+ .byte 185,255,255,127,0 // mov $0x7fffff,%ecx
+ .byte 102,15,110,209 // movd %ecx,%xmm2
+ .byte 102,68,15,112,210,0 // pshufd $0x0,%xmm2,%xmm10
+ .byte 102,69,15,219,211 // pand %xmm11,%xmm10
+ .byte 185,0,0,0,63 // mov $0x3f000000,%ecx
+ .byte 102,15,110,209 // movd %ecx,%xmm2
+ .byte 102,68,15,112,218,0 // pshufd $0x0,%xmm2,%xmm11
+ .byte 102,69,15,235,218 // por %xmm10,%xmm11
+ .byte 185,42,145,49,64 // mov $0x4031912a,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 69,15,88,213 // addps %xmm13,%xmm10
+ .byte 185,117,191,191,63 // mov $0x3fbfbf75,%ecx
+ .byte 102,15,110,209 // movd %ecx,%xmm2
+ .byte 15,198,210,0 // shufps $0x0,%xmm2,%xmm2
+ .byte 65,15,89,211 // mulps %xmm11,%xmm2
+ .byte 68,15,92,210 // subps %xmm2,%xmm10
+ .byte 185,163,233,220,63 // mov $0x3fdce9a3,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 185,249,68,180,62 // mov $0x3eb444f9,%ecx
+ .byte 102,15,110,209 // movd %ecx,%xmm2
+ .byte 15,198,210,0 // shufps $0x0,%xmm2,%xmm2
+ .byte 65,15,88,211 // addps %xmm11,%xmm2
+ .byte 68,15,94,234 // divps %xmm2,%xmm13
+ .byte 69,15,92,213 // subps %xmm13,%xmm10
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,89,226 // mulps %xmm10,%xmm12
+ .byte 102,69,15,58,8,220,1 // roundps $0x1,%xmm12,%xmm11
+ .byte 185,0,0,0,75 // mov $0x4b000000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 185,81,140,242,66 // mov $0x42f28c51,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,88,236 // addps %xmm12,%xmm13
+ .byte 69,15,92,227 // subps %xmm11,%xmm12
+ .byte 185,141,188,190,63 // mov $0x3fbebc8d,%ecx
+ .byte 102,15,110,209 // movd %ecx,%xmm2
+ .byte 15,198,210,0 // shufps $0x0,%xmm2,%xmm2
+ .byte 65,15,89,212 // mulps %xmm12,%xmm2
+ .byte 68,15,92,234 // subps %xmm2,%xmm13
+ .byte 185,254,210,221,65 // mov $0x41ddd2fe,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 185,248,245,154,64 // mov $0x409af5f8,%ecx
+ .byte 102,15,110,209 // movd %ecx,%xmm2
+ .byte 15,198,210,0 // shufps $0x0,%xmm2,%xmm2
+ .byte 65,15,92,212 // subps %xmm12,%xmm2
+ .byte 68,15,94,218 // divps %xmm2,%xmm11
+ .byte 69,15,88,221 // addps %xmm13,%xmm11
+ .byte 69,15,89,218 // mulps %xmm10,%xmm11
+ .byte 102,69,15,91,211 // cvtps2dq %xmm11,%xmm10
+ .byte 243,15,16,80,20 // movss 0x14(%rax),%xmm2
+ .byte 15,198,210,0 // shufps $0x0,%xmm2,%xmm2
+ .byte 65,15,88,210 // addps %xmm10,%xmm2
+ .byte 102,65,15,56,20,209 // blendvps %xmm0,%xmm9,%xmm2
+ .byte 15,87,192 // xorps %xmm0,%xmm0
+ .byte 15,95,208 // maxps %xmm0,%xmm2
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 102,15,110,192 // movd %eax,%xmm0
+ .byte 15,198,192,0 // shufps $0x0,%xmm0,%xmm0
+ .byte 15,93,208 // minps %xmm0,%xmm2
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 65,15,40,192 // movaps %xmm8,%xmm0
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_parametric_a_sse41
+.globl _sk_parametric_a_sse41
+FUNCTION(_sk_parametric_a_sse41)
+_sk_parametric_a_sse41:
+ .byte 68,15,40,192 // movaps %xmm0,%xmm8
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 243,68,15,16,80,16 // movss 0x10(%rax),%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 243,68,15,16,72,12 // movss 0xc(%rax),%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 68,15,89,203 // mulps %xmm3,%xmm9
+ .byte 243,68,15,16,88,4 // movss 0x4(%rax),%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 68,15,89,219 // mulps %xmm3,%xmm11
+ .byte 15,40,195 // movaps %xmm3,%xmm0
+ .byte 65,15,194,194,2 // cmpleps %xmm10,%xmm0
+ .byte 243,15,16,88,24 // movss 0x18(%rax),%xmm3
+ .byte 15,198,219,0 // shufps $0x0,%xmm3,%xmm3
+ .byte 68,15,88,203 // addps %xmm3,%xmm9
+ .byte 243,68,15,16,32 // movss (%rax),%xmm12
+ .byte 243,15,16,88,8 // movss 0x8(%rax),%xmm3
+ .byte 15,198,219,0 // shufps $0x0,%xmm3,%xmm3
+ .byte 68,15,88,219 // addps %xmm3,%xmm11
+ .byte 69,15,91,211 // cvtdq2ps %xmm11,%xmm10
+ .byte 185,0,0,0,52 // mov $0x34000000,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,89,234 // mulps %xmm10,%xmm13
+ .byte 185,0,0,254,66 // mov $0x42fe0000,%ecx
+ .byte 102,15,110,217 // movd %ecx,%xmm3
+ .byte 15,198,219,0 // shufps $0x0,%xmm3,%xmm3
+ .byte 68,15,92,235 // subps %xmm3,%xmm13
+ .byte 185,255,255,127,0 // mov $0x7fffff,%ecx
+ .byte 102,15,110,217 // movd %ecx,%xmm3
+ .byte 102,68,15,112,211,0 // pshufd $0x0,%xmm3,%xmm10
+ .byte 102,69,15,219,211 // pand %xmm11,%xmm10
+ .byte 185,0,0,0,63 // mov $0x3f000000,%ecx
+ .byte 102,15,110,217 // movd %ecx,%xmm3
+ .byte 102,68,15,112,219,0 // pshufd $0x0,%xmm3,%xmm11
+ .byte 102,69,15,235,218 // por %xmm10,%xmm11
+ .byte 185,42,145,49,64 // mov $0x4031912a,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 69,15,88,213 // addps %xmm13,%xmm10
+ .byte 185,117,191,191,63 // mov $0x3fbfbf75,%ecx
+ .byte 102,15,110,217 // movd %ecx,%xmm3
+ .byte 15,198,219,0 // shufps $0x0,%xmm3,%xmm3
+ .byte 65,15,89,219 // mulps %xmm11,%xmm3
+ .byte 68,15,92,211 // subps %xmm3,%xmm10
+ .byte 185,163,233,220,63 // mov $0x3fdce9a3,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 185,249,68,180,62 // mov $0x3eb444f9,%ecx
+ .byte 102,15,110,217 // movd %ecx,%xmm3
+ .byte 15,198,219,0 // shufps $0x0,%xmm3,%xmm3
+ .byte 65,15,88,219 // addps %xmm11,%xmm3
+ .byte 68,15,94,235 // divps %xmm3,%xmm13
+ .byte 69,15,92,213 // subps %xmm13,%xmm10
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,89,226 // mulps %xmm10,%xmm12
+ .byte 102,69,15,58,8,220,1 // roundps $0x1,%xmm12,%xmm11
+ .byte 185,0,0,0,75 // mov $0x4b000000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 185,81,140,242,66 // mov $0x42f28c51,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,88,236 // addps %xmm12,%xmm13
+ .byte 69,15,92,227 // subps %xmm11,%xmm12
+ .byte 185,141,188,190,63 // mov $0x3fbebc8d,%ecx
+ .byte 102,15,110,217 // movd %ecx,%xmm3
+ .byte 15,198,219,0 // shufps $0x0,%xmm3,%xmm3
+ .byte 65,15,89,220 // mulps %xmm12,%xmm3
+ .byte 68,15,92,235 // subps %xmm3,%xmm13
+ .byte 185,254,210,221,65 // mov $0x41ddd2fe,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 185,248,245,154,64 // mov $0x409af5f8,%ecx
+ .byte 102,15,110,217 // movd %ecx,%xmm3
+ .byte 15,198,219,0 // shufps $0x0,%xmm3,%xmm3
+ .byte 65,15,92,220 // subps %xmm12,%xmm3
+ .byte 68,15,94,219 // divps %xmm3,%xmm11
+ .byte 69,15,88,221 // addps %xmm13,%xmm11
+ .byte 69,15,89,218 // mulps %xmm10,%xmm11
+ .byte 102,69,15,91,211 // cvtps2dq %xmm11,%xmm10
+ .byte 243,15,16,88,20 // movss 0x14(%rax),%xmm3
+ .byte 15,198,219,0 // shufps $0x0,%xmm3,%xmm3
+ .byte 65,15,88,218 // addps %xmm10,%xmm3
+ .byte 102,65,15,56,20,217 // blendvps %xmm0,%xmm9,%xmm3
+ .byte 15,87,192 // xorps %xmm0,%xmm0
+ .byte 15,95,216 // maxps %xmm0,%xmm3
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 102,15,110,192 // movd %eax,%xmm0
+ .byte 15,198,192,0 // shufps $0x0,%xmm0,%xmm0
+ .byte 15,93,216 // minps %xmm0,%xmm3
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 65,15,40,192 // movaps %xmm8,%xmm0
+ .byte 255,224 // jmpq *%rax
+
HIDDEN _sk_load_a8_sse41
.globl _sk_load_a8_sse41
FUNCTION(_sk_load_a8_sse41)
@@ -18615,9 +20449,9 @@ _sk_gather_i8_sse41:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 73,137,192 // mov %rax,%r8
.byte 77,133,192 // test %r8,%r8
- .byte 116,5 // je 2054 <_sk_gather_i8_sse41+0xf>
+ .byte 116,5 // je 26ed <_sk_gather_i8_sse41+0xf>
.byte 76,137,192 // mov %r8,%rax
- .byte 235,2 // jmp 2056 <_sk_gather_i8_sse41+0x11>
+ .byte 235,2 // jmp 26ef <_sk_gather_i8_sse41+0x11>
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 243,15,91,201 // cvttps2dq %xmm1,%xmm1
@@ -19769,7 +21603,7 @@ _sk_linear_gradient_sse41:
.byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
.byte 72,139,8 // mov (%rax),%rcx
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,132,254,0,0,0 // je 327c <_sk_linear_gradient_sse41+0x138>
+ .byte 15,132,254,0,0,0 // je 3915 <_sk_linear_gradient_sse41+0x138>
.byte 15,41,100,36,168 // movaps %xmm4,-0x58(%rsp)
.byte 15,41,108,36,184 // movaps %xmm5,-0x48(%rsp)
.byte 15,41,116,36,200 // movaps %xmm6,-0x38(%rsp)
@@ -19819,12 +21653,12 @@ _sk_linear_gradient_sse41:
.byte 15,40,196 // movaps %xmm4,%xmm0
.byte 72,131,192,36 // add $0x24,%rax
.byte 72,255,201 // dec %rcx
- .byte 15,133,65,255,255,255 // jne 31a7 <_sk_linear_gradient_sse41+0x63>
+ .byte 15,133,65,255,255,255 // jne 3840 <_sk_linear_gradient_sse41+0x63>
.byte 15,40,124,36,216 // movaps -0x28(%rsp),%xmm7
.byte 15,40,116,36,200 // movaps -0x38(%rsp),%xmm6
.byte 15,40,108,36,184 // movaps -0x48(%rsp),%xmm5
.byte 15,40,100,36,168 // movaps -0x58(%rsp),%xmm4
- .byte 235,13 // jmp 3289 <_sk_linear_gradient_sse41+0x145>
+ .byte 235,13 // jmp 3922 <_sk_linear_gradient_sse41+0x145>
.byte 15,87,201 // xorps %xmm1,%xmm1
.byte 15,87,210 // xorps %xmm2,%xmm2
.byte 15,87,219 // xorps %xmm3,%xmm3
@@ -22628,6 +24462,422 @@ _sk_table_a_sse2:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
+HIDDEN _sk_parametric_r_sse2
+.globl _sk_parametric_r_sse2
+FUNCTION(_sk_parametric_r_sse2)
+_sk_parametric_r_sse2:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 243,68,15,16,72,16 // movss 0x10(%rax),%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 243,68,15,16,64,12 // movss 0xc(%rax),%xmm8
+ .byte 69,15,198,192,0 // shufps $0x0,%xmm8,%xmm8
+ .byte 68,15,89,192 // mulps %xmm0,%xmm8
+ .byte 243,68,15,16,80,4 // movss 0x4(%rax),%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 68,15,89,208 // mulps %xmm0,%xmm10
+ .byte 65,15,194,193,2 // cmpleps %xmm9,%xmm0
+ .byte 243,68,15,16,72,24 // movss 0x18(%rax),%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 69,15,88,193 // addps %xmm9,%xmm8
+ .byte 243,68,15,16,8 // movss (%rax),%xmm9
+ .byte 243,68,15,16,88,8 // movss 0x8(%rax),%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 69,15,88,211 // addps %xmm11,%xmm10
+ .byte 69,15,91,218 // cvtdq2ps %xmm10,%xmm11
+ .byte 185,0,0,0,52 // mov $0x34000000,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,89,235 // mulps %xmm11,%xmm13
+ .byte 185,0,0,254,66 // mov $0x42fe0000,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 69,15,92,235 // subps %xmm11,%xmm13
+ .byte 185,255,255,127,0 // mov $0x7fffff,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 102,69,15,112,227,0 // pshufd $0x0,%xmm11,%xmm12
+ .byte 102,69,15,219,226 // pand %xmm10,%xmm12
+ .byte 185,0,0,0,63 // mov $0x3f000000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 102,69,15,112,218,0 // pshufd $0x0,%xmm10,%xmm11
+ .byte 102,69,15,235,220 // por %xmm12,%xmm11
+ .byte 185,42,145,49,64 // mov $0x4031912a,%ecx
+ .byte 102,68,15,110,225 // movd %ecx,%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,88,229 // addps %xmm13,%xmm12
+ .byte 185,117,191,191,63 // mov $0x3fbfbf75,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 69,15,89,211 // mulps %xmm11,%xmm10
+ .byte 69,15,92,226 // subps %xmm10,%xmm12
+ .byte 185,163,233,220,63 // mov $0x3fdce9a3,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 185,249,68,180,62 // mov $0x3eb444f9,%ecx
+ .byte 102,68,15,110,241 // movd %ecx,%xmm14
+ .byte 185,0,0,128,63 // mov $0x3f800000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 185,81,140,242,66 // mov $0x42f28c51,%ecx
+ .byte 102,68,15,110,249 // movd %ecx,%xmm15
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,198,246,0 // shufps $0x0,%xmm14,%xmm14
+ .byte 69,15,88,243 // addps %xmm11,%xmm14
+ .byte 69,15,94,238 // divps %xmm14,%xmm13
+ .byte 69,15,92,229 // subps %xmm13,%xmm12
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 69,15,89,204 // mulps %xmm12,%xmm9
+ .byte 243,69,15,91,217 // cvttps2dq %xmm9,%xmm11
+ .byte 69,15,91,219 // cvtdq2ps %xmm11,%xmm11
+ .byte 69,15,40,225 // movaps %xmm9,%xmm12
+ .byte 69,15,198,255,0 // shufps $0x0,%xmm15,%xmm15
+ .byte 69,15,88,249 // addps %xmm9,%xmm15
+ .byte 69,15,40,233 // movaps %xmm9,%xmm13
+ .byte 69,15,194,235,1 // cmpltps %xmm11,%xmm13
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 69,15,84,234 // andps %xmm10,%xmm13
+ .byte 69,15,87,201 // xorps %xmm9,%xmm9
+ .byte 69,15,92,221 // subps %xmm13,%xmm11
+ .byte 69,15,92,227 // subps %xmm11,%xmm12
+ .byte 102,69,15,110,216 // movd %r8d,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 185,141,188,190,63 // mov $0x3fbebc8d,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,89,236 // mulps %xmm12,%xmm13
+ .byte 69,15,92,253 // subps %xmm13,%xmm15
+ .byte 185,254,210,221,65 // mov $0x41ddd2fe,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 185,248,245,154,64 // mov $0x409af5f8,%ecx
+ .byte 102,68,15,110,241 // movd %ecx,%xmm14
+ .byte 69,15,198,246,0 // shufps $0x0,%xmm14,%xmm14
+ .byte 69,15,92,244 // subps %xmm12,%xmm14
+ .byte 69,15,94,238 // divps %xmm14,%xmm13
+ .byte 69,15,88,239 // addps %xmm15,%xmm13
+ .byte 69,15,89,235 // mulps %xmm11,%xmm13
+ .byte 102,69,15,91,221 // cvtps2dq %xmm13,%xmm11
+ .byte 243,68,15,16,96,20 // movss 0x14(%rax),%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,88,227 // addps %xmm11,%xmm12
+ .byte 68,15,84,192 // andps %xmm0,%xmm8
+ .byte 65,15,85,196 // andnps %xmm12,%xmm0
+ .byte 65,15,86,192 // orps %xmm8,%xmm0
+ .byte 65,15,95,193 // maxps %xmm9,%xmm0
+ .byte 65,15,93,194 // minps %xmm10,%xmm0
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_parametric_g_sse2
+.globl _sk_parametric_g_sse2
+FUNCTION(_sk_parametric_g_sse2)
+_sk_parametric_g_sse2:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 243,68,15,16,72,16 // movss 0x10(%rax),%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 243,68,15,16,64,12 // movss 0xc(%rax),%xmm8
+ .byte 69,15,198,192,0 // shufps $0x0,%xmm8,%xmm8
+ .byte 68,15,89,193 // mulps %xmm1,%xmm8
+ .byte 243,68,15,16,80,4 // movss 0x4(%rax),%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 68,15,89,209 // mulps %xmm1,%xmm10
+ .byte 65,15,194,201,2 // cmpleps %xmm9,%xmm1
+ .byte 243,68,15,16,72,24 // movss 0x18(%rax),%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 69,15,88,193 // addps %xmm9,%xmm8
+ .byte 243,68,15,16,8 // movss (%rax),%xmm9
+ .byte 243,68,15,16,88,8 // movss 0x8(%rax),%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 69,15,88,211 // addps %xmm11,%xmm10
+ .byte 69,15,91,218 // cvtdq2ps %xmm10,%xmm11
+ .byte 185,0,0,0,52 // mov $0x34000000,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,89,235 // mulps %xmm11,%xmm13
+ .byte 185,0,0,254,66 // mov $0x42fe0000,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 69,15,92,235 // subps %xmm11,%xmm13
+ .byte 185,255,255,127,0 // mov $0x7fffff,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 102,69,15,112,227,0 // pshufd $0x0,%xmm11,%xmm12
+ .byte 102,69,15,219,226 // pand %xmm10,%xmm12
+ .byte 185,0,0,0,63 // mov $0x3f000000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 102,69,15,112,218,0 // pshufd $0x0,%xmm10,%xmm11
+ .byte 102,69,15,235,220 // por %xmm12,%xmm11
+ .byte 185,42,145,49,64 // mov $0x4031912a,%ecx
+ .byte 102,68,15,110,225 // movd %ecx,%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,88,229 // addps %xmm13,%xmm12
+ .byte 185,117,191,191,63 // mov $0x3fbfbf75,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 69,15,89,211 // mulps %xmm11,%xmm10
+ .byte 69,15,92,226 // subps %xmm10,%xmm12
+ .byte 185,163,233,220,63 // mov $0x3fdce9a3,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 185,249,68,180,62 // mov $0x3eb444f9,%ecx
+ .byte 102,68,15,110,241 // movd %ecx,%xmm14
+ .byte 185,0,0,128,63 // mov $0x3f800000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 185,81,140,242,66 // mov $0x42f28c51,%ecx
+ .byte 102,68,15,110,249 // movd %ecx,%xmm15
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,198,246,0 // shufps $0x0,%xmm14,%xmm14
+ .byte 69,15,88,243 // addps %xmm11,%xmm14
+ .byte 69,15,94,238 // divps %xmm14,%xmm13
+ .byte 69,15,92,229 // subps %xmm13,%xmm12
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 69,15,89,204 // mulps %xmm12,%xmm9
+ .byte 243,69,15,91,217 // cvttps2dq %xmm9,%xmm11
+ .byte 69,15,91,219 // cvtdq2ps %xmm11,%xmm11
+ .byte 69,15,40,225 // movaps %xmm9,%xmm12
+ .byte 69,15,198,255,0 // shufps $0x0,%xmm15,%xmm15
+ .byte 69,15,88,249 // addps %xmm9,%xmm15
+ .byte 69,15,40,233 // movaps %xmm9,%xmm13
+ .byte 69,15,194,235,1 // cmpltps %xmm11,%xmm13
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 69,15,84,234 // andps %xmm10,%xmm13
+ .byte 69,15,87,201 // xorps %xmm9,%xmm9
+ .byte 69,15,92,221 // subps %xmm13,%xmm11
+ .byte 69,15,92,227 // subps %xmm11,%xmm12
+ .byte 102,69,15,110,216 // movd %r8d,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 185,141,188,190,63 // mov $0x3fbebc8d,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,89,236 // mulps %xmm12,%xmm13
+ .byte 69,15,92,253 // subps %xmm13,%xmm15
+ .byte 185,254,210,221,65 // mov $0x41ddd2fe,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 185,248,245,154,64 // mov $0x409af5f8,%ecx
+ .byte 102,68,15,110,241 // movd %ecx,%xmm14
+ .byte 69,15,198,246,0 // shufps $0x0,%xmm14,%xmm14
+ .byte 69,15,92,244 // subps %xmm12,%xmm14
+ .byte 69,15,94,238 // divps %xmm14,%xmm13
+ .byte 69,15,88,239 // addps %xmm15,%xmm13
+ .byte 69,15,89,235 // mulps %xmm11,%xmm13
+ .byte 102,69,15,91,221 // cvtps2dq %xmm13,%xmm11
+ .byte 243,68,15,16,96,20 // movss 0x14(%rax),%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,88,227 // addps %xmm11,%xmm12
+ .byte 68,15,84,193 // andps %xmm1,%xmm8
+ .byte 65,15,85,204 // andnps %xmm12,%xmm1
+ .byte 65,15,86,200 // orps %xmm8,%xmm1
+ .byte 65,15,95,201 // maxps %xmm9,%xmm1
+ .byte 65,15,93,202 // minps %xmm10,%xmm1
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_parametric_b_sse2
+.globl _sk_parametric_b_sse2
+FUNCTION(_sk_parametric_b_sse2)
+_sk_parametric_b_sse2:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 243,68,15,16,72,16 // movss 0x10(%rax),%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 243,68,15,16,64,12 // movss 0xc(%rax),%xmm8
+ .byte 69,15,198,192,0 // shufps $0x0,%xmm8,%xmm8
+ .byte 68,15,89,194 // mulps %xmm2,%xmm8
+ .byte 243,68,15,16,80,4 // movss 0x4(%rax),%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 68,15,89,210 // mulps %xmm2,%xmm10
+ .byte 65,15,194,209,2 // cmpleps %xmm9,%xmm2
+ .byte 243,68,15,16,72,24 // movss 0x18(%rax),%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 69,15,88,193 // addps %xmm9,%xmm8
+ .byte 243,68,15,16,8 // movss (%rax),%xmm9
+ .byte 243,68,15,16,88,8 // movss 0x8(%rax),%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 69,15,88,211 // addps %xmm11,%xmm10
+ .byte 69,15,91,218 // cvtdq2ps %xmm10,%xmm11
+ .byte 185,0,0,0,52 // mov $0x34000000,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,89,235 // mulps %xmm11,%xmm13
+ .byte 185,0,0,254,66 // mov $0x42fe0000,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 69,15,92,235 // subps %xmm11,%xmm13
+ .byte 185,255,255,127,0 // mov $0x7fffff,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 102,69,15,112,227,0 // pshufd $0x0,%xmm11,%xmm12
+ .byte 102,69,15,219,226 // pand %xmm10,%xmm12
+ .byte 185,0,0,0,63 // mov $0x3f000000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 102,69,15,112,218,0 // pshufd $0x0,%xmm10,%xmm11
+ .byte 102,69,15,235,220 // por %xmm12,%xmm11
+ .byte 185,42,145,49,64 // mov $0x4031912a,%ecx
+ .byte 102,68,15,110,225 // movd %ecx,%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,88,229 // addps %xmm13,%xmm12
+ .byte 185,117,191,191,63 // mov $0x3fbfbf75,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 69,15,89,211 // mulps %xmm11,%xmm10
+ .byte 69,15,92,226 // subps %xmm10,%xmm12
+ .byte 185,163,233,220,63 // mov $0x3fdce9a3,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 185,249,68,180,62 // mov $0x3eb444f9,%ecx
+ .byte 102,68,15,110,241 // movd %ecx,%xmm14
+ .byte 185,0,0,128,63 // mov $0x3f800000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 185,81,140,242,66 // mov $0x42f28c51,%ecx
+ .byte 102,68,15,110,249 // movd %ecx,%xmm15
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,198,246,0 // shufps $0x0,%xmm14,%xmm14
+ .byte 69,15,88,243 // addps %xmm11,%xmm14
+ .byte 69,15,94,238 // divps %xmm14,%xmm13
+ .byte 69,15,92,229 // subps %xmm13,%xmm12
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 69,15,89,204 // mulps %xmm12,%xmm9
+ .byte 243,69,15,91,217 // cvttps2dq %xmm9,%xmm11
+ .byte 69,15,91,219 // cvtdq2ps %xmm11,%xmm11
+ .byte 69,15,40,225 // movaps %xmm9,%xmm12
+ .byte 69,15,198,255,0 // shufps $0x0,%xmm15,%xmm15
+ .byte 69,15,88,249 // addps %xmm9,%xmm15
+ .byte 69,15,40,233 // movaps %xmm9,%xmm13
+ .byte 69,15,194,235,1 // cmpltps %xmm11,%xmm13
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 69,15,84,234 // andps %xmm10,%xmm13
+ .byte 69,15,87,201 // xorps %xmm9,%xmm9
+ .byte 69,15,92,221 // subps %xmm13,%xmm11
+ .byte 69,15,92,227 // subps %xmm11,%xmm12
+ .byte 102,69,15,110,216 // movd %r8d,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 185,141,188,190,63 // mov $0x3fbebc8d,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,89,236 // mulps %xmm12,%xmm13
+ .byte 69,15,92,253 // subps %xmm13,%xmm15
+ .byte 185,254,210,221,65 // mov $0x41ddd2fe,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 185,248,245,154,64 // mov $0x409af5f8,%ecx
+ .byte 102,68,15,110,241 // movd %ecx,%xmm14
+ .byte 69,15,198,246,0 // shufps $0x0,%xmm14,%xmm14
+ .byte 69,15,92,244 // subps %xmm12,%xmm14
+ .byte 69,15,94,238 // divps %xmm14,%xmm13
+ .byte 69,15,88,239 // addps %xmm15,%xmm13
+ .byte 69,15,89,235 // mulps %xmm11,%xmm13
+ .byte 102,69,15,91,221 // cvtps2dq %xmm13,%xmm11
+ .byte 243,68,15,16,96,20 // movss 0x14(%rax),%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,88,227 // addps %xmm11,%xmm12
+ .byte 68,15,84,194 // andps %xmm2,%xmm8
+ .byte 65,15,85,212 // andnps %xmm12,%xmm2
+ .byte 65,15,86,208 // orps %xmm8,%xmm2
+ .byte 65,15,95,209 // maxps %xmm9,%xmm2
+ .byte 65,15,93,210 // minps %xmm10,%xmm2
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_parametric_a_sse2
+.globl _sk_parametric_a_sse2
+FUNCTION(_sk_parametric_a_sse2)
+_sk_parametric_a_sse2:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 243,68,15,16,72,16 // movss 0x10(%rax),%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 243,68,15,16,64,12 // movss 0xc(%rax),%xmm8
+ .byte 69,15,198,192,0 // shufps $0x0,%xmm8,%xmm8
+ .byte 68,15,89,195 // mulps %xmm3,%xmm8
+ .byte 243,68,15,16,80,4 // movss 0x4(%rax),%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 68,15,89,211 // mulps %xmm3,%xmm10
+ .byte 65,15,194,217,2 // cmpleps %xmm9,%xmm3
+ .byte 243,68,15,16,72,24 // movss 0x18(%rax),%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 69,15,88,193 // addps %xmm9,%xmm8
+ .byte 243,68,15,16,8 // movss (%rax),%xmm9
+ .byte 243,68,15,16,88,8 // movss 0x8(%rax),%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 69,15,88,211 // addps %xmm11,%xmm10
+ .byte 69,15,91,218 // cvtdq2ps %xmm10,%xmm11
+ .byte 185,0,0,0,52 // mov $0x34000000,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,89,235 // mulps %xmm11,%xmm13
+ .byte 185,0,0,254,66 // mov $0x42fe0000,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 69,15,92,235 // subps %xmm11,%xmm13
+ .byte 185,255,255,127,0 // mov $0x7fffff,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 102,69,15,112,227,0 // pshufd $0x0,%xmm11,%xmm12
+ .byte 102,69,15,219,226 // pand %xmm10,%xmm12
+ .byte 185,0,0,0,63 // mov $0x3f000000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 102,69,15,112,218,0 // pshufd $0x0,%xmm10,%xmm11
+ .byte 102,69,15,235,220 // por %xmm12,%xmm11
+ .byte 185,42,145,49,64 // mov $0x4031912a,%ecx
+ .byte 102,68,15,110,225 // movd %ecx,%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,88,229 // addps %xmm13,%xmm12
+ .byte 185,117,191,191,63 // mov $0x3fbfbf75,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 69,15,89,211 // mulps %xmm11,%xmm10
+ .byte 69,15,92,226 // subps %xmm10,%xmm12
+ .byte 185,163,233,220,63 // mov $0x3fdce9a3,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 185,249,68,180,62 // mov $0x3eb444f9,%ecx
+ .byte 102,68,15,110,241 // movd %ecx,%xmm14
+ .byte 185,0,0,128,63 // mov $0x3f800000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 185,81,140,242,66 // mov $0x42f28c51,%ecx
+ .byte 102,68,15,110,249 // movd %ecx,%xmm15
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,198,246,0 // shufps $0x0,%xmm14,%xmm14
+ .byte 69,15,88,243 // addps %xmm11,%xmm14
+ .byte 69,15,94,238 // divps %xmm14,%xmm13
+ .byte 69,15,92,229 // subps %xmm13,%xmm12
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 69,15,89,204 // mulps %xmm12,%xmm9
+ .byte 243,69,15,91,217 // cvttps2dq %xmm9,%xmm11
+ .byte 69,15,91,219 // cvtdq2ps %xmm11,%xmm11
+ .byte 69,15,40,225 // movaps %xmm9,%xmm12
+ .byte 69,15,198,255,0 // shufps $0x0,%xmm15,%xmm15
+ .byte 69,15,88,249 // addps %xmm9,%xmm15
+ .byte 69,15,40,233 // movaps %xmm9,%xmm13
+ .byte 69,15,194,235,1 // cmpltps %xmm11,%xmm13
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 69,15,84,234 // andps %xmm10,%xmm13
+ .byte 69,15,87,201 // xorps %xmm9,%xmm9
+ .byte 69,15,92,221 // subps %xmm13,%xmm11
+ .byte 69,15,92,227 // subps %xmm11,%xmm12
+ .byte 102,69,15,110,216 // movd %r8d,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 185,141,188,190,63 // mov $0x3fbebc8d,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,89,236 // mulps %xmm12,%xmm13
+ .byte 69,15,92,253 // subps %xmm13,%xmm15
+ .byte 185,254,210,221,65 // mov $0x41ddd2fe,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 185,248,245,154,64 // mov $0x409af5f8,%ecx
+ .byte 102,68,15,110,241 // movd %ecx,%xmm14
+ .byte 69,15,198,246,0 // shufps $0x0,%xmm14,%xmm14
+ .byte 69,15,92,244 // subps %xmm12,%xmm14
+ .byte 69,15,94,238 // divps %xmm14,%xmm13
+ .byte 69,15,88,239 // addps %xmm15,%xmm13
+ .byte 69,15,89,235 // mulps %xmm11,%xmm13
+ .byte 102,69,15,91,221 // cvtps2dq %xmm13,%xmm11
+ .byte 243,68,15,16,96,20 // movss 0x14(%rax),%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,88,227 // addps %xmm11,%xmm12
+ .byte 68,15,84,195 // andps %xmm3,%xmm8
+ .byte 65,15,85,220 // andnps %xmm12,%xmm3
+ .byte 65,15,86,216 // orps %xmm8,%xmm3
+ .byte 65,15,95,217 // maxps %xmm9,%xmm3
+ .byte 65,15,93,218 // minps %xmm10,%xmm3
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
HIDDEN _sk_load_a8_sse2
.globl _sk_load_a8_sse2
FUNCTION(_sk_load_a8_sse2)
@@ -22796,9 +25046,9 @@ _sk_gather_i8_sse2:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 73,137,192 // mov %rax,%r8
.byte 77,133,192 // test %r8,%r8
- .byte 116,5 // je 21e0 <_sk_gather_i8_sse2+0xf>
+ .byte 116,5 // je 2914 <_sk_gather_i8_sse2+0xf>
.byte 76,137,192 // mov %r8,%rax
- .byte 235,2 // jmp 21e2 <_sk_gather_i8_sse2+0x11>
+ .byte 235,2 // jmp 2916 <_sk_gather_i8_sse2+0x11>
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 243,15,91,201 // cvttps2dq %xmm1,%xmm1
@@ -24053,7 +26303,7 @@ _sk_linear_gradient_sse2:
.byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
.byte 72,139,8 // mov (%rax),%rcx
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,132,15,1,0,0 // je 35ca <_sk_linear_gradient_sse2+0x149>
+ .byte 15,132,15,1,0,0 // je 3cfe <_sk_linear_gradient_sse2+0x149>
.byte 72,139,64,8 // mov 0x8(%rax),%rax
.byte 72,131,192,32 // add $0x20,%rax
.byte 69,15,87,192 // xorps %xmm8,%xmm8
@@ -24114,8 +26364,8 @@ _sk_linear_gradient_sse2:
.byte 69,15,86,231 // orps %xmm15,%xmm12
.byte 72,131,192,36 // add $0x24,%rax
.byte 72,255,201 // dec %rcx
- .byte 15,133,8,255,255,255 // jne 34d0 <_sk_linear_gradient_sse2+0x4f>
- .byte 235,13 // jmp 35d7 <_sk_linear_gradient_sse2+0x156>
+ .byte 15,133,8,255,255,255 // jne 3c04 <_sk_linear_gradient_sse2+0x4f>
+ .byte 235,13 // jmp 3d0b <_sk_linear_gradient_sse2+0x156>
.byte 15,87,201 // xorps %xmm1,%xmm1
.byte 15,87,210 // xorps %xmm2,%xmm2
.byte 15,87,219 // xorps %xmm3,%xmm3
diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S
index eb77b6d5a5..d8f82efef0 100644
--- a/src/jumper/SkJumper_generated_win.S
+++ b/src/jumper/SkJumper_generated_win.S
@@ -1357,7 +1357,7 @@ _sk_lerp_565_hsw LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 233,255,255,255,225 ; jmpq ffffffffe2001478 <_sk_callback_hsw+0xffffffffe1ffd85e>
+ DB 233,255,255,255,225 ; jmpq ffffffffe2001478 <_sk_callback_hsw+0xffffffffe1ffd22e>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
@@ -1934,6 +1934,334 @@ _sk_table_a_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
+PUBLIC _sk_parametric_r_hsw
+_sk_parametric_r_hsw LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8
+ DB 196,65,124,194,192,2 ; vcmpleps %ymm8,%ymm0,%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
+ DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10
+ DB 196,66,125,168,202 ; vfmadd213ps %ymm10,%ymm0,%ymm9
+ DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10
+ DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11
+ DB 196,66,125,168,211 ; vfmadd213ps %ymm11,%ymm0,%ymm10
+ DB 196,65,124,91,218 ; vcvtdq2ps %ymm10,%ymm11
+ DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,98,125,88,224 ; vpbroadcastd %xmm0,%ymm12
+ DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,98,125,88,232 ; vpbroadcastd %xmm0,%ymm13
+ DB 196,66,37,186,236 ; vfmsub231ps %ymm12,%ymm11,%ymm13
+ DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0
+ DB 196,65,125,219,210 ; vpand %ymm10,%ymm0,%ymm10
+ DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0
+ DB 197,45,235,208 ; vpor %ymm0,%ymm10,%ymm10
+ DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0
+ DB 197,20,88,216 ; vaddps %ymm0,%ymm13,%ymm11
+ DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,98,125,88,224 ; vpbroadcastd %xmm0,%ymm12
+ DB 196,66,45,172,227 ; vfnmadd213ps %ymm11,%ymm10,%ymm12
+ DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,98,125,88,216 ; vpbroadcastd %xmm0,%ymm11
+ DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0
+ DB 197,172,88,192 ; vaddps %ymm0,%ymm10,%ymm0
+ DB 197,164,94,192 ; vdivps %ymm0,%ymm11,%ymm0
+ DB 197,156,92,192 ; vsubps %ymm0,%ymm12,%ymm0
+ DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
+ DB 197,44,89,216 ; vmulps %ymm0,%ymm10,%ymm11
+ DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10
+ DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,98,125,88,224 ; vpbroadcastd %xmm0,%ymm12
+ DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0
+ DB 196,65,124,88,219 ; vaddps %ymm11,%ymm0,%ymm11
+ DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,98,125,88,232 ; vpbroadcastd %xmm0,%ymm13
+ DB 196,66,45,172,235 ; vfnmadd213ps %ymm11,%ymm10,%ymm13
+ DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,98,125,88,216 ; vpbroadcastd %xmm0,%ymm11
+ DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0
+ DB 196,193,124,92,194 ; vsubps %ymm10,%ymm0,%ymm0
+ DB 197,164,94,192 ; vdivps %ymm0,%ymm11,%ymm0
+ DB 197,148,88,192 ; vaddps %ymm0,%ymm13,%ymm0
+ DB 197,156,89,192 ; vmulps %ymm0,%ymm12,%ymm0
+ DB 197,253,91,192 ; vcvtps2dq %ymm0,%ymm0
+ DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
+ DB 196,193,124,88,194 ; vaddps %ymm10,%ymm0,%ymm0
+ DB 196,195,125,74,193,128 ; vblendvps %ymm8,%ymm9,%ymm0,%ymm0
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 196,65,124,95,192 ; vmaxps %ymm8,%ymm0,%ymm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 197,249,110,192 ; vmovd %eax,%xmm0
+ DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0
+ DB 197,188,93,192 ; vminps %ymm0,%ymm8,%ymm0
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_g_hsw
+_sk_parametric_g_hsw LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8
+ DB 196,65,116,194,192,2 ; vcmpleps %ymm8,%ymm1,%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
+ DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10
+ DB 196,66,117,168,202 ; vfmadd213ps %ymm10,%ymm1,%ymm9
+ DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10
+ DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11
+ DB 196,66,117,168,211 ; vfmadd213ps %ymm11,%ymm1,%ymm10
+ DB 196,65,124,91,218 ; vcvtdq2ps %ymm10,%ymm11
+ DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,98,125,88,225 ; vpbroadcastd %xmm1,%ymm12
+ DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,98,125,88,233 ; vpbroadcastd %xmm1,%ymm13
+ DB 196,66,37,186,236 ; vfmsub231ps %ymm12,%ymm11,%ymm13
+ DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1
+ DB 196,65,117,219,210 ; vpand %ymm10,%ymm1,%ymm10
+ DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1
+ DB 197,45,235,209 ; vpor %ymm1,%ymm10,%ymm10
+ DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1
+ DB 197,20,88,217 ; vaddps %ymm1,%ymm13,%ymm11
+ DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,98,125,88,225 ; vpbroadcastd %xmm1,%ymm12
+ DB 196,66,45,172,227 ; vfnmadd213ps %ymm11,%ymm10,%ymm12
+ DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,98,125,88,217 ; vpbroadcastd %xmm1,%ymm11
+ DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1
+ DB 197,172,88,201 ; vaddps %ymm1,%ymm10,%ymm1
+ DB 197,164,94,201 ; vdivps %ymm1,%ymm11,%ymm1
+ DB 197,156,92,201 ; vsubps %ymm1,%ymm12,%ymm1
+ DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
+ DB 197,44,89,217 ; vmulps %ymm1,%ymm10,%ymm11
+ DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10
+ DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,98,125,88,225 ; vpbroadcastd %xmm1,%ymm12
+ DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1
+ DB 196,65,116,88,219 ; vaddps %ymm11,%ymm1,%ymm11
+ DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,98,125,88,233 ; vpbroadcastd %xmm1,%ymm13
+ DB 196,66,45,172,235 ; vfnmadd213ps %ymm11,%ymm10,%ymm13
+ DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,98,125,88,217 ; vpbroadcastd %xmm1,%ymm11
+ DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1
+ DB 196,193,116,92,202 ; vsubps %ymm10,%ymm1,%ymm1
+ DB 197,164,94,201 ; vdivps %ymm1,%ymm11,%ymm1
+ DB 197,148,88,201 ; vaddps %ymm1,%ymm13,%ymm1
+ DB 197,156,89,201 ; vmulps %ymm1,%ymm12,%ymm1
+ DB 197,253,91,201 ; vcvtps2dq %ymm1,%ymm1
+ DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
+ DB 196,193,116,88,202 ; vaddps %ymm10,%ymm1,%ymm1
+ DB 196,195,117,74,201,128 ; vblendvps %ymm8,%ymm9,%ymm1,%ymm1
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 196,65,116,95,192 ; vmaxps %ymm8,%ymm1,%ymm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 197,249,110,200 ; vmovd %eax,%xmm1
+ DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1
+ DB 197,188,93,201 ; vminps %ymm1,%ymm8,%ymm1
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_b_hsw
+_sk_parametric_b_hsw LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8
+ DB 196,65,108,194,192,2 ; vcmpleps %ymm8,%ymm2,%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
+ DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10
+ DB 196,66,109,168,202 ; vfmadd213ps %ymm10,%ymm2,%ymm9
+ DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10
+ DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11
+ DB 196,66,109,168,211 ; vfmadd213ps %ymm11,%ymm2,%ymm10
+ DB 196,65,124,91,218 ; vcvtdq2ps %ymm10,%ymm11
+ DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,98,125,88,226 ; vpbroadcastd %xmm2,%ymm12
+ DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,98,125,88,234 ; vpbroadcastd %xmm2,%ymm13
+ DB 196,66,37,186,236 ; vfmsub231ps %ymm12,%ymm11,%ymm13
+ DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2
+ DB 196,65,109,219,210 ; vpand %ymm10,%ymm2,%ymm10
+ DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2
+ DB 197,45,235,210 ; vpor %ymm2,%ymm10,%ymm10
+ DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2
+ DB 197,20,88,218 ; vaddps %ymm2,%ymm13,%ymm11
+ DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,98,125,88,226 ; vpbroadcastd %xmm2,%ymm12
+ DB 196,66,45,172,227 ; vfnmadd213ps %ymm11,%ymm10,%ymm12
+ DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,98,125,88,218 ; vpbroadcastd %xmm2,%ymm11
+ DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2
+ DB 197,172,88,210 ; vaddps %ymm2,%ymm10,%ymm2
+ DB 197,164,94,210 ; vdivps %ymm2,%ymm11,%ymm2
+ DB 197,156,92,210 ; vsubps %ymm2,%ymm12,%ymm2
+ DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
+ DB 197,44,89,218 ; vmulps %ymm2,%ymm10,%ymm11
+ DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10
+ DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,98,125,88,226 ; vpbroadcastd %xmm2,%ymm12
+ DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2
+ DB 196,65,108,88,219 ; vaddps %ymm11,%ymm2,%ymm11
+ DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,98,125,88,234 ; vpbroadcastd %xmm2,%ymm13
+ DB 196,66,45,172,235 ; vfnmadd213ps %ymm11,%ymm10,%ymm13
+ DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,98,125,88,218 ; vpbroadcastd %xmm2,%ymm11
+ DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2
+ DB 196,193,108,92,210 ; vsubps %ymm10,%ymm2,%ymm2
+ DB 197,164,94,210 ; vdivps %ymm2,%ymm11,%ymm2
+ DB 197,148,88,210 ; vaddps %ymm2,%ymm13,%ymm2
+ DB 197,156,89,210 ; vmulps %ymm2,%ymm12,%ymm2
+ DB 197,253,91,210 ; vcvtps2dq %ymm2,%ymm2
+ DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
+ DB 196,193,108,88,210 ; vaddps %ymm10,%ymm2,%ymm2
+ DB 196,195,109,74,209,128 ; vblendvps %ymm8,%ymm9,%ymm2,%ymm2
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 196,65,108,95,192 ; vmaxps %ymm8,%ymm2,%ymm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 197,249,110,208 ; vmovd %eax,%xmm2
+ DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2
+ DB 197,188,93,210 ; vminps %ymm2,%ymm8,%ymm2
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_a_hsw
+_sk_parametric_a_hsw LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8
+ DB 196,65,100,194,192,2 ; vcmpleps %ymm8,%ymm3,%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
+ DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10
+ DB 196,66,101,168,202 ; vfmadd213ps %ymm10,%ymm3,%ymm9
+ DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10
+ DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11
+ DB 196,66,101,168,211 ; vfmadd213ps %ymm11,%ymm3,%ymm10
+ DB 196,65,124,91,218 ; vcvtdq2ps %ymm10,%ymm11
+ DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,98,125,88,227 ; vpbroadcastd %xmm3,%ymm12
+ DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,98,125,88,235 ; vpbroadcastd %xmm3,%ymm13
+ DB 196,66,37,186,236 ; vfmsub231ps %ymm12,%ymm11,%ymm13
+ DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3
+ DB 196,65,101,219,210 ; vpand %ymm10,%ymm3,%ymm10
+ DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3
+ DB 197,45,235,211 ; vpor %ymm3,%ymm10,%ymm10
+ DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3
+ DB 197,20,88,219 ; vaddps %ymm3,%ymm13,%ymm11
+ DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,98,125,88,227 ; vpbroadcastd %xmm3,%ymm12
+ DB 196,66,45,172,227 ; vfnmadd213ps %ymm11,%ymm10,%ymm12
+ DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,98,125,88,219 ; vpbroadcastd %xmm3,%ymm11
+ DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3
+ DB 197,172,88,219 ; vaddps %ymm3,%ymm10,%ymm3
+ DB 197,164,94,219 ; vdivps %ymm3,%ymm11,%ymm3
+ DB 197,156,92,219 ; vsubps %ymm3,%ymm12,%ymm3
+ DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
+ DB 197,44,89,219 ; vmulps %ymm3,%ymm10,%ymm11
+ DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10
+ DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,98,125,88,227 ; vpbroadcastd %xmm3,%ymm12
+ DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3
+ DB 196,65,100,88,219 ; vaddps %ymm11,%ymm3,%ymm11
+ DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,98,125,88,235 ; vpbroadcastd %xmm3,%ymm13
+ DB 196,66,45,172,235 ; vfnmadd213ps %ymm11,%ymm10,%ymm13
+ DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,98,125,88,219 ; vpbroadcastd %xmm3,%ymm11
+ DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3
+ DB 196,193,100,92,218 ; vsubps %ymm10,%ymm3,%ymm3
+ DB 197,164,94,219 ; vdivps %ymm3,%ymm11,%ymm3
+ DB 197,148,88,219 ; vaddps %ymm3,%ymm13,%ymm3
+ DB 197,156,89,219 ; vmulps %ymm3,%ymm12,%ymm3
+ DB 197,253,91,219 ; vcvtps2dq %ymm3,%ymm3
+ DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
+ DB 196,193,100,88,218 ; vaddps %ymm10,%ymm3,%ymm3
+ DB 196,195,101,74,217,128 ; vblendvps %ymm8,%ymm9,%ymm3,%ymm3
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 196,65,100,95,192 ; vmaxps %ymm8,%ymm3,%ymm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 197,249,110,216 ; vmovd %eax,%xmm3
+ DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3
+ DB 197,188,93,219 ; vminps %ymm3,%ymm8,%ymm3
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
PUBLIC _sk_load_a8_hsw
_sk_load_a8_hsw LABEL PROC
DB 73,137,200 ; mov %rcx,%r8
@@ -1941,7 +2269,7 @@ _sk_load_a8_hsw LABEL PROC
DB 72,139,0 ; mov (%rax),%rax
DB 72,1,248 ; add %rdi,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,50 ; jne 1e28 <_sk_load_a8_hsw+0x42>
+ DB 117,50 ; jne 2458 <_sk_load_a8_hsw+0x42>
DB 197,250,126,0 ; vmovq (%rax),%xmm0
DB 196,226,125,49,192 ; vpmovzxbd %xmm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
@@ -1964,9 +2292,9 @@ _sk_load_a8_hsw LABEL PROC
DB 77,9,217 ; or %r11,%r9
DB 72,131,193,8 ; add $0x8,%rcx
DB 73,255,202 ; dec %r10
- DB 117,234 ; jne 1e30 <_sk_load_a8_hsw+0x4a>
+ DB 117,234 ; jne 2460 <_sk_load_a8_hsw+0x4a>
DB 196,193,249,110,193 ; vmovq %r9,%xmm0
- DB 235,173 ; jmp 1dfa <_sk_load_a8_hsw+0x14>
+ DB 235,173 ; jmp 242a <_sk_load_a8_hsw+0x14>
PUBLIC _sk_gather_a8_hsw
_sk_gather_a8_hsw LABEL PROC
@@ -2037,7 +2365,7 @@ _sk_store_a8_hsw LABEL PROC
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 196,65,57,103,192 ; vpackuswb %xmm8,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 1f65 <_sk_store_a8_hsw+0x3b>
+ DB 117,10 ; jne 2595 <_sk_store_a8_hsw+0x3b>
DB 196,65,123,17,4,57 ; vmovsd %xmm8,(%r9,%rdi,1)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -2045,10 +2373,10 @@ _sk_store_a8_hsw LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 1f61 <_sk_store_a8_hsw+0x37>
+ DB 119,236 ; ja 2591 <_sk_store_a8_hsw+0x37>
DB 196,66,121,48,192 ; vpmovzxbw %xmm8,%xmm8
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,67,0,0,0 ; lea 0x43(%rip),%r8 # 1fc8 <_sk_store_a8_hsw+0x9e>
+ DB 76,141,5,67,0,0,0 ; lea 0x43(%rip),%r8 # 25f8 <_sk_store_a8_hsw+0x9e>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -2059,7 +2387,7 @@ _sk_store_a8_hsw LABEL PROC
DB 196,67,121,20,68,57,2,4 ; vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1)
DB 196,67,121,20,68,57,1,2 ; vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1)
DB 196,67,121,20,4,57,0 ; vpextrb $0x0,%xmm8,(%r9,%rdi,1)
- DB 235,154 ; jmp 1f61 <_sk_store_a8_hsw+0x37>
+ DB 235,154 ; jmp 2591 <_sk_store_a8_hsw+0x37>
DB 144 ; nop
DB 246,255 ; idiv %bh
DB 255 ; (bad)
@@ -2091,7 +2419,7 @@ _sk_load_g8_hsw LABEL PROC
DB 72,139,0 ; mov (%rax),%rax
DB 72,1,248 ; add %rdi,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,60 ; jne 2030 <_sk_load_g8_hsw+0x4c>
+ DB 117,60 ; jne 2660 <_sk_load_g8_hsw+0x4c>
DB 197,250,126,0 ; vmovq (%rax),%xmm0
DB 196,226,125,49,192 ; vpmovzxbd %xmm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
@@ -2116,9 +2444,9 @@ _sk_load_g8_hsw LABEL PROC
DB 77,9,217 ; or %r11,%r9
DB 72,131,193,8 ; add $0x8,%rcx
DB 73,255,202 ; dec %r10
- DB 117,234 ; jne 2038 <_sk_load_g8_hsw+0x54>
+ DB 117,234 ; jne 2668 <_sk_load_g8_hsw+0x54>
DB 196,193,249,110,193 ; vmovq %r9,%xmm0
- DB 235,163 ; jmp 1ff8 <_sk_load_g8_hsw+0x14>
+ DB 235,163 ; jmp 2628 <_sk_load_g8_hsw+0x14>
PUBLIC _sk_gather_g8_hsw
_sk_gather_g8_hsw LABEL PROC
@@ -2183,9 +2511,9 @@ _sk_gather_i8_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 73,137,192 ; mov %rax,%r8
DB 77,133,192 ; test %r8,%r8
- DB 116,5 ; je 214b <_sk_gather_i8_hsw+0xf>
+ DB 116,5 ; je 277b <_sk_gather_i8_hsw+0xf>
DB 76,137,192 ; mov %r8,%rax
- DB 235,2 ; jmp 214d <_sk_gather_i8_hsw+0x11>
+ DB 235,2 ; jmp 277d <_sk_gather_i8_hsw+0x11>
DB 72,173 ; lods %ds:(%rsi),%rax
DB 65,87 ; push %r15
DB 65,86 ; push %r14
@@ -2256,7 +2584,7 @@ _sk_load_565_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,149,0,0,0 ; jne 22ff <_sk_load_565_hsw+0xa3>
+ DB 15,133,149,0,0,0 ; jne 292f <_sk_load_565_hsw+0xa3>
DB 196,193,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm0
DB 196,226,125,51,208 ; vpmovzxwd %xmm0,%ymm2
DB 184,0,248,0,0 ; mov $0xf800,%eax
@@ -2296,9 +2624,9 @@ _sk_load_565_hsw LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,89,255,255,255 ; ja 2270 <_sk_load_565_hsw+0x14>
+ DB 15,135,89,255,255,255 ; ja 28a0 <_sk_load_565_hsw+0x14>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,74,0,0,0 ; lea 0x4a(%rip),%r9 # 236c <_sk_load_565_hsw+0x110>
+ DB 76,141,13,74,0,0,0 ; lea 0x4a(%rip),%r9 # 299c <_sk_load_565_hsw+0x110>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -2310,12 +2638,12 @@ _sk_load_565_hsw LABEL PROC
DB 196,193,121,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- DB 233,5,255,255,255 ; jmpq 2270 <_sk_load_565_hsw+0x14>
+ DB 233,5,255,255,255 ; jmpq 28a0 <_sk_load_565_hsw+0x14>
DB 144 ; nop
DB 243,255 ; repz (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 235,255 ; jmp 2371 <_sk_load_565_hsw+0x115>
+ DB 235,255 ; jmp 29a1 <_sk_load_565_hsw+0x115>
DB 255 ; (bad)
DB 255,227 ; jmpq *%rbx
DB 255 ; (bad)
@@ -2438,7 +2766,7 @@ _sk_store_565_hsw LABEL PROC
DB 196,67,125,57,193,1 ; vextracti128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 2537 <_sk_store_565_hsw+0x6c>
+ DB 117,10 ; jne 2b67 <_sk_store_565_hsw+0x6c>
DB 196,65,122,127,4,121 ; vmovdqu %xmm8,(%r9,%rdi,2)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -2446,9 +2774,9 @@ _sk_store_565_hsw LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 2533 <_sk_store_565_hsw+0x68>
+ DB 119,236 ; ja 2b63 <_sk_store_565_hsw+0x68>
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 2594 <_sk_store_565_hsw+0xc9>
+ DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 2bc4 <_sk_store_565_hsw+0xc9>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -2459,7 +2787,7 @@ _sk_store_565_hsw LABEL PROC
DB 196,67,121,21,68,121,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
DB 196,67,121,21,68,121,2,1 ; vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
DB 196,67,121,21,4,121,0 ; vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- DB 235,159 ; jmp 2533 <_sk_store_565_hsw+0x68>
+ DB 235,159 ; jmp 2b63 <_sk_store_565_hsw+0x68>
DB 247,255 ; idiv %edi
DB 255 ; (bad)
DB 255 ; (bad)
@@ -2488,7 +2816,7 @@ _sk_load_4444_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,179,0,0,0 ; jne 2671 <_sk_load_4444_hsw+0xc1>
+ DB 15,133,179,0,0,0 ; jne 2ca1 <_sk_load_4444_hsw+0xc1>
DB 196,193,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm0
DB 196,98,125,51,200 ; vpmovzxwd %xmm0,%ymm9
DB 184,0,240,0,0 ; mov $0xf000,%eax
@@ -2534,9 +2862,9 @@ _sk_load_4444_hsw LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,59,255,255,255 ; ja 25c4 <_sk_load_4444_hsw+0x14>
+ DB 15,135,59,255,255,255 ; ja 2bf4 <_sk_load_4444_hsw+0x14>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,76,0,0,0 ; lea 0x4c(%rip),%r9 # 26e0 <_sk_load_4444_hsw+0x130>
+ DB 76,141,13,76,0,0,0 ; lea 0x4c(%rip),%r9 # 2d10 <_sk_load_4444_hsw+0x130>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -2548,13 +2876,13 @@ _sk_load_4444_hsw LABEL PROC
DB 196,193,121,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- DB 233,231,254,255,255 ; jmpq 25c4 <_sk_load_4444_hsw+0x14>
+ DB 233,231,254,255,255 ; jmpq 2bf4 <_sk_load_4444_hsw+0x14>
DB 15,31,0 ; nopl (%rax)
DB 241 ; icebp
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 233,255,255,255,225 ; jmpq ffffffffe20026e8 <_sk_callback_hsw+0xffffffffe1ffeace>
+ DB 233,255,255,255,225 ; jmpq ffffffffe2002d18 <_sk_callback_hsw+0xffffffffe1ffeace>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
@@ -2682,7 +3010,7 @@ _sk_store_4444_hsw LABEL PROC
DB 196,67,125,57,193,1 ; vextracti128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 28cf <_sk_store_4444_hsw+0x72>
+ DB 117,10 ; jne 2eff <_sk_store_4444_hsw+0x72>
DB 196,65,122,127,4,121 ; vmovdqu %xmm8,(%r9,%rdi,2)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -2690,9 +3018,9 @@ _sk_store_4444_hsw LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 28cb <_sk_store_4444_hsw+0x6e>
+ DB 119,236 ; ja 2efb <_sk_store_4444_hsw+0x6e>
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 292c <_sk_store_4444_hsw+0xcf>
+ DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 2f5c <_sk_store_4444_hsw+0xcf>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -2703,7 +3031,7 @@ _sk_store_4444_hsw LABEL PROC
DB 196,67,121,21,68,121,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
DB 196,67,121,21,68,121,2,1 ; vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
DB 196,67,121,21,4,121,0 ; vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- DB 235,159 ; jmp 28cb <_sk_store_4444_hsw+0x6e>
+ DB 235,159 ; jmp 2efb <_sk_store_4444_hsw+0x6e>
DB 247,255 ; idiv %edi
DB 255 ; (bad)
DB 255 ; (bad)
@@ -2734,7 +3062,7 @@ _sk_load_8888_hsw LABEL PROC
DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9
DB 76,3,8 ; add (%rax),%r9
DB 77,133,192 ; test %r8,%r8
- DB 117,104 ; jne 29c5 <_sk_load_8888_hsw+0x7d>
+ DB 117,104 ; jne 2ff5 <_sk_load_8888_hsw+0x7d>
DB 196,193,126,111,25 ; vmovdqu (%r9),%ymm3
DB 184,255,0,0,0 ; mov $0xff,%eax
DB 197,249,110,192 ; vmovd %eax,%xmm0
@@ -2767,7 +3095,7 @@ _sk_load_8888_hsw LABEL PROC
DB 196,225,249,110,192 ; vmovq %rax,%xmm0
DB 196,226,125,33,192 ; vpmovsxbd %xmm0,%ymm0
DB 196,194,125,140,25 ; vpmaskmovd (%r9),%ymm0,%ymm3
- DB 233,116,255,255,255 ; jmpq 2962 <_sk_load_8888_hsw+0x1a>
+ DB 233,116,255,255,255 ; jmpq 2f92 <_sk_load_8888_hsw+0x1a>
PUBLIC _sk_gather_8888_hsw
_sk_gather_8888_hsw LABEL PROC
@@ -2827,7 +3155,7 @@ _sk_store_8888_hsw LABEL PROC
DB 196,65,45,235,192 ; vpor %ymm8,%ymm10,%ymm8
DB 196,65,53,235,192 ; vpor %ymm8,%ymm9,%ymm8
DB 77,133,192 ; test %r8,%r8
- DB 117,12 ; jne 2ae8 <_sk_store_8888_hsw+0x74>
+ DB 117,12 ; jne 3118 <_sk_store_8888_hsw+0x74>
DB 196,65,126,127,1 ; vmovdqu %ymm8,(%r9)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,137,193 ; mov %r8,%rcx
@@ -2840,14 +3168,14 @@ _sk_store_8888_hsw LABEL PROC
DB 196,97,249,110,200 ; vmovq %rax,%xmm9
DB 196,66,125,33,201 ; vpmovsxbd %xmm9,%ymm9
DB 196,66,53,142,1 ; vpmaskmovd %ymm8,%ymm9,(%r9)
- DB 235,211 ; jmp 2ae1 <_sk_store_8888_hsw+0x6d>
+ DB 235,211 ; jmp 3111 <_sk_store_8888_hsw+0x6d>
PUBLIC _sk_load_f16_hsw
_sk_load_f16_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 72,139,0 ; mov (%rax),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 117,97 ; jne 2b79 <_sk_load_f16_hsw+0x6b>
+ DB 117,97 ; jne 31a9 <_sk_load_f16_hsw+0x6b>
DB 197,121,16,4,248 ; vmovupd (%rax,%rdi,8),%xmm8
DB 197,249,16,84,248,16 ; vmovupd 0x10(%rax,%rdi,8),%xmm2
DB 197,249,16,92,248,32 ; vmovupd 0x20(%rax,%rdi,8),%xmm3
@@ -2873,29 +3201,29 @@ _sk_load_f16_hsw LABEL PROC
DB 197,123,16,4,248 ; vmovsd (%rax,%rdi,8),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,79 ; je 2bd8 <_sk_load_f16_hsw+0xca>
+ DB 116,79 ; je 3208 <_sk_load_f16_hsw+0xca>
DB 197,57,22,68,248,8 ; vmovhpd 0x8(%rax,%rdi,8),%xmm8,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,67 ; jb 2bd8 <_sk_load_f16_hsw+0xca>
+ DB 114,67 ; jb 3208 <_sk_load_f16_hsw+0xca>
DB 197,251,16,84,248,16 ; vmovsd 0x10(%rax,%rdi,8),%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 116,68 ; je 2be5 <_sk_load_f16_hsw+0xd7>
+ DB 116,68 ; je 3215 <_sk_load_f16_hsw+0xd7>
DB 197,233,22,84,248,24 ; vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,56 ; jb 2be5 <_sk_load_f16_hsw+0xd7>
+ DB 114,56 ; jb 3215 <_sk_load_f16_hsw+0xd7>
DB 197,251,16,92,248,32 ; vmovsd 0x20(%rax,%rdi,8),%xmm3
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 15,132,114,255,255,255 ; je 2b2f <_sk_load_f16_hsw+0x21>
+ DB 15,132,114,255,255,255 ; je 315f <_sk_load_f16_hsw+0x21>
DB 197,225,22,92,248,40 ; vmovhpd 0x28(%rax,%rdi,8),%xmm3,%xmm3
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 15,130,98,255,255,255 ; jb 2b2f <_sk_load_f16_hsw+0x21>
+ DB 15,130,98,255,255,255 ; jb 315f <_sk_load_f16_hsw+0x21>
DB 197,122,126,76,248,48 ; vmovq 0x30(%rax,%rdi,8),%xmm9
- DB 233,87,255,255,255 ; jmpq 2b2f <_sk_load_f16_hsw+0x21>
+ DB 233,87,255,255,255 ; jmpq 315f <_sk_load_f16_hsw+0x21>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,74,255,255,255 ; jmpq 2b2f <_sk_load_f16_hsw+0x21>
+ DB 233,74,255,255,255 ; jmpq 315f <_sk_load_f16_hsw+0x21>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
- DB 233,65,255,255,255 ; jmpq 2b2f <_sk_load_f16_hsw+0x21>
+ DB 233,65,255,255,255 ; jmpq 315f <_sk_load_f16_hsw+0x21>
PUBLIC _sk_gather_f16_hsw
_sk_gather_f16_hsw LABEL PROC
@@ -2949,7 +3277,7 @@ _sk_store_f16_hsw LABEL PROC
DB 196,65,57,98,205 ; vpunpckldq %xmm13,%xmm8,%xmm9
DB 196,65,57,106,197 ; vpunpckhdq %xmm13,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,27 ; jne 2cdd <_sk_store_f16_hsw+0x65>
+ DB 117,27 ; jne 330d <_sk_store_f16_hsw+0x65>
DB 197,120,17,28,248 ; vmovups %xmm11,(%rax,%rdi,8)
DB 197,120,17,84,248,16 ; vmovups %xmm10,0x10(%rax,%rdi,8)
DB 197,120,17,76,248,32 ; vmovups %xmm9,0x20(%rax,%rdi,8)
@@ -2958,22 +3286,22 @@ _sk_store_f16_hsw LABEL PROC
DB 255,224 ; jmpq *%rax
DB 197,121,214,28,248 ; vmovq %xmm11,(%rax,%rdi,8)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,241 ; je 2cd9 <_sk_store_f16_hsw+0x61>
+ DB 116,241 ; je 3309 <_sk_store_f16_hsw+0x61>
DB 197,121,23,92,248,8 ; vmovhpd %xmm11,0x8(%rax,%rdi,8)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,229 ; jb 2cd9 <_sk_store_f16_hsw+0x61>
+ DB 114,229 ; jb 3309 <_sk_store_f16_hsw+0x61>
DB 197,121,214,84,248,16 ; vmovq %xmm10,0x10(%rax,%rdi,8)
- DB 116,221 ; je 2cd9 <_sk_store_f16_hsw+0x61>
+ DB 116,221 ; je 3309 <_sk_store_f16_hsw+0x61>
DB 197,121,23,84,248,24 ; vmovhpd %xmm10,0x18(%rax,%rdi,8)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,209 ; jb 2cd9 <_sk_store_f16_hsw+0x61>
+ DB 114,209 ; jb 3309 <_sk_store_f16_hsw+0x61>
DB 197,121,214,76,248,32 ; vmovq %xmm9,0x20(%rax,%rdi,8)
- DB 116,201 ; je 2cd9 <_sk_store_f16_hsw+0x61>
+ DB 116,201 ; je 3309 <_sk_store_f16_hsw+0x61>
DB 197,121,23,76,248,40 ; vmovhpd %xmm9,0x28(%rax,%rdi,8)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,189 ; jb 2cd9 <_sk_store_f16_hsw+0x61>
+ DB 114,189 ; jb 3309 <_sk_store_f16_hsw+0x61>
DB 197,121,214,68,248,48 ; vmovq %xmm8,0x30(%rax,%rdi,8)
- DB 235,181 ; jmp 2cd9 <_sk_store_f16_hsw+0x61>
+ DB 235,181 ; jmp 3309 <_sk_store_f16_hsw+0x61>
PUBLIC _sk_load_u16_be_hsw
_sk_load_u16_be_hsw LABEL PROC
@@ -2981,7 +3309,7 @@ _sk_load_u16_be_hsw LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 72,141,4,189,0,0,0,0 ; lea 0x0(,%rdi,4),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,205,0,0,0 ; jne 2e07 <_sk_load_u16_be_hsw+0xe3>
+ DB 15,133,205,0,0,0 ; jne 3437 <_sk_load_u16_be_hsw+0xe3>
DB 196,65,121,16,4,64 ; vmovupd (%r8,%rax,2),%xmm8
DB 196,193,121,16,84,64,16 ; vmovupd 0x10(%r8,%rax,2),%xmm2
DB 196,193,121,16,92,64,32 ; vmovupd 0x20(%r8,%rax,2),%xmm3
@@ -3030,29 +3358,29 @@ _sk_load_u16_be_hsw LABEL PROC
DB 196,65,123,16,4,64 ; vmovsd (%r8,%rax,2),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,85 ; je 2e6d <_sk_load_u16_be_hsw+0x149>
+ DB 116,85 ; je 349d <_sk_load_u16_be_hsw+0x149>
DB 196,65,57,22,68,64,8 ; vmovhpd 0x8(%r8,%rax,2),%xmm8,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,72 ; jb 2e6d <_sk_load_u16_be_hsw+0x149>
+ DB 114,72 ; jb 349d <_sk_load_u16_be_hsw+0x149>
DB 196,193,123,16,84,64,16 ; vmovsd 0x10(%r8,%rax,2),%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 116,72 ; je 2e7a <_sk_load_u16_be_hsw+0x156>
+ DB 116,72 ; je 34aa <_sk_load_u16_be_hsw+0x156>
DB 196,193,105,22,84,64,24 ; vmovhpd 0x18(%r8,%rax,2),%xmm2,%xmm2
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,59 ; jb 2e7a <_sk_load_u16_be_hsw+0x156>
+ DB 114,59 ; jb 34aa <_sk_load_u16_be_hsw+0x156>
DB 196,193,123,16,92,64,32 ; vmovsd 0x20(%r8,%rax,2),%xmm3
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 15,132,5,255,255,255 ; je 2d55 <_sk_load_u16_be_hsw+0x31>
+ DB 15,132,5,255,255,255 ; je 3385 <_sk_load_u16_be_hsw+0x31>
DB 196,193,97,22,92,64,40 ; vmovhpd 0x28(%r8,%rax,2),%xmm3,%xmm3
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 15,130,244,254,255,255 ; jb 2d55 <_sk_load_u16_be_hsw+0x31>
+ DB 15,130,244,254,255,255 ; jb 3385 <_sk_load_u16_be_hsw+0x31>
DB 196,65,122,126,76,64,48 ; vmovq 0x30(%r8,%rax,2),%xmm9
- DB 233,232,254,255,255 ; jmpq 2d55 <_sk_load_u16_be_hsw+0x31>
+ DB 233,232,254,255,255 ; jmpq 3385 <_sk_load_u16_be_hsw+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,219,254,255,255 ; jmpq 2d55 <_sk_load_u16_be_hsw+0x31>
+ DB 233,219,254,255,255 ; jmpq 3385 <_sk_load_u16_be_hsw+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
- DB 233,210,254,255,255 ; jmpq 2d55 <_sk_load_u16_be_hsw+0x31>
+ DB 233,210,254,255,255 ; jmpq 3385 <_sk_load_u16_be_hsw+0x31>
PUBLIC _sk_load_rgb_u16_be_hsw
_sk_load_rgb_u16_be_hsw LABEL PROC
@@ -3060,7 +3388,7 @@ _sk_load_rgb_u16_be_hsw LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 72,141,4,127 ; lea (%rdi,%rdi,2),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,211,0,0,0 ; jne 2f68 <_sk_load_rgb_u16_be_hsw+0xe5>
+ DB 15,133,211,0,0,0 ; jne 3598 <_sk_load_rgb_u16_be_hsw+0xe5>
DB 196,193,122,111,4,64 ; vmovdqu (%r8,%rax,2),%xmm0
DB 196,193,122,111,84,64,12 ; vmovdqu 0xc(%r8,%rax,2),%xmm2
DB 196,193,122,111,76,64,24 ; vmovdqu 0x18(%r8,%rax,2),%xmm1
@@ -3110,36 +3438,36 @@ _sk_load_rgb_u16_be_hsw LABEL PROC
DB 196,193,121,110,4,64 ; vmovd (%r8,%rax,2),%xmm0
DB 196,193,121,196,68,64,4,2 ; vpinsrw $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 117,5 ; jne 2f81 <_sk_load_rgb_u16_be_hsw+0xfe>
- DB 233,72,255,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 117,5 ; jne 35b1 <_sk_load_rgb_u16_be_hsw+0xfe>
+ DB 233,72,255,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46>
DB 196,193,121,110,76,64,6 ; vmovd 0x6(%r8,%rax,2),%xmm1
DB 196,65,113,196,68,64,10,2 ; vpinsrw $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,26 ; jb 2fb0 <_sk_load_rgb_u16_be_hsw+0x12d>
+ DB 114,26 ; jb 35e0 <_sk_load_rgb_u16_be_hsw+0x12d>
DB 196,193,121,110,76,64,12 ; vmovd 0xc(%r8,%rax,2),%xmm1
DB 196,193,113,196,84,64,16,2 ; vpinsrw $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 117,10 ; jne 2fb5 <_sk_load_rgb_u16_be_hsw+0x132>
- DB 233,25,255,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46>
- DB 233,20,255,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 117,10 ; jne 35e5 <_sk_load_rgb_u16_be_hsw+0x132>
+ DB 233,25,255,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 233,20,255,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46>
DB 196,193,121,110,76,64,18 ; vmovd 0x12(%r8,%rax,2),%xmm1
DB 196,65,113,196,76,64,22,2 ; vpinsrw $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,26 ; jb 2fe4 <_sk_load_rgb_u16_be_hsw+0x161>
+ DB 114,26 ; jb 3614 <_sk_load_rgb_u16_be_hsw+0x161>
DB 196,193,121,110,76,64,24 ; vmovd 0x18(%r8,%rax,2),%xmm1
DB 196,193,113,196,76,64,28,2 ; vpinsrw $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 117,10 ; jne 2fe9 <_sk_load_rgb_u16_be_hsw+0x166>
- DB 233,229,254,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46>
- DB 233,224,254,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 117,10 ; jne 3619 <_sk_load_rgb_u16_be_hsw+0x166>
+ DB 233,229,254,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 233,224,254,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46>
DB 196,193,121,110,92,64,30 ; vmovd 0x1e(%r8,%rax,2),%xmm3
DB 196,65,97,196,92,64,34,2 ; vpinsrw $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,20 ; jb 3012 <_sk_load_rgb_u16_be_hsw+0x18f>
+ DB 114,20 ; jb 3642 <_sk_load_rgb_u16_be_hsw+0x18f>
DB 196,193,121,110,92,64,36 ; vmovd 0x24(%r8,%rax,2),%xmm3
DB 196,193,97,196,92,64,40,2 ; vpinsrw $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3
- DB 233,183,254,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46>
- DB 233,178,254,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 233,183,254,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 233,178,254,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46>
PUBLIC _sk_store_u16_be_hsw
_sk_store_u16_be_hsw LABEL PROC
@@ -3186,7 +3514,7 @@ _sk_store_u16_be_hsw LABEL PROC
DB 196,65,17,98,200 ; vpunpckldq %xmm8,%xmm13,%xmm9
DB 196,65,17,106,192 ; vpunpckhdq %xmm8,%xmm13,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,31 ; jne 3112 <_sk_store_u16_be_hsw+0xfb>
+ DB 117,31 ; jne 3742 <_sk_store_u16_be_hsw+0xfb>
DB 196,1,120,17,28,72 ; vmovups %xmm11,(%r8,%r9,2)
DB 196,1,120,17,84,72,16 ; vmovups %xmm10,0x10(%r8,%r9,2)
DB 196,1,120,17,76,72,32 ; vmovups %xmm9,0x20(%r8,%r9,2)
@@ -3195,31 +3523,31 @@ _sk_store_u16_be_hsw LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,1,121,214,28,72 ; vmovq %xmm11,(%r8,%r9,2)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,240 ; je 310e <_sk_store_u16_be_hsw+0xf7>
+ DB 116,240 ; je 373e <_sk_store_u16_be_hsw+0xf7>
DB 196,1,121,23,92,72,8 ; vmovhpd %xmm11,0x8(%r8,%r9,2)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,227 ; jb 310e <_sk_store_u16_be_hsw+0xf7>
+ DB 114,227 ; jb 373e <_sk_store_u16_be_hsw+0xf7>
DB 196,1,121,214,84,72,16 ; vmovq %xmm10,0x10(%r8,%r9,2)
- DB 116,218 ; je 310e <_sk_store_u16_be_hsw+0xf7>
+ DB 116,218 ; je 373e <_sk_store_u16_be_hsw+0xf7>
DB 196,1,121,23,84,72,24 ; vmovhpd %xmm10,0x18(%r8,%r9,2)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,205 ; jb 310e <_sk_store_u16_be_hsw+0xf7>
+ DB 114,205 ; jb 373e <_sk_store_u16_be_hsw+0xf7>
DB 196,1,121,214,76,72,32 ; vmovq %xmm9,0x20(%r8,%r9,2)
- DB 116,196 ; je 310e <_sk_store_u16_be_hsw+0xf7>
+ DB 116,196 ; je 373e <_sk_store_u16_be_hsw+0xf7>
DB 196,1,121,23,76,72,40 ; vmovhpd %xmm9,0x28(%r8,%r9,2)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,183 ; jb 310e <_sk_store_u16_be_hsw+0xf7>
+ DB 114,183 ; jb 373e <_sk_store_u16_be_hsw+0xf7>
DB 196,1,121,214,68,72,48 ; vmovq %xmm8,0x30(%r8,%r9,2)
- DB 235,174 ; jmp 310e <_sk_store_u16_be_hsw+0xf7>
+ DB 235,174 ; jmp 373e <_sk_store_u16_be_hsw+0xf7>
PUBLIC _sk_load_f32_hsw
_sk_load_f32_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 119,110 ; ja 31d6 <_sk_load_f32_hsw+0x76>
+ DB 119,110 ; ja 3806 <_sk_load_f32_hsw+0x76>
DB 76,139,0 ; mov (%rax),%r8
DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9
- DB 76,141,21,134,0,0,0 ; lea 0x86(%rip),%r10 # 3200 <_sk_load_f32_hsw+0xa0>
+ DB 76,141,21,134,0,0,0 ; lea 0x86(%rip),%r10 # 3830 <_sk_load_f32_hsw+0xa0>
DB 73,99,4,138 ; movslq (%r10,%rcx,4),%rax
DB 76,1,208 ; add %r10,%rax
DB 255,224 ; jmpq *%rax
@@ -3276,7 +3604,7 @@ _sk_store_f32_hsw LABEL PROC
DB 196,65,37,20,196 ; vunpcklpd %ymm12,%ymm11,%ymm8
DB 196,65,37,21,220 ; vunpckhpd %ymm12,%ymm11,%ymm11
DB 72,133,201 ; test %rcx,%rcx
- DB 117,55 ; jne 328d <_sk_store_f32_hsw+0x6d>
+ DB 117,55 ; jne 38bd <_sk_store_f32_hsw+0x6d>
DB 196,67,45,24,225,1 ; vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
DB 196,67,61,24,235,1 ; vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
DB 196,67,45,6,201,49 ; vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
@@ -3289,22 +3617,22 @@ _sk_store_f32_hsw LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,65,121,17,20,128 ; vmovupd %xmm10,(%r8,%rax,4)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,240 ; je 3289 <_sk_store_f32_hsw+0x69>
+ DB 116,240 ; je 38b9 <_sk_store_f32_hsw+0x69>
DB 196,65,121,17,76,128,16 ; vmovupd %xmm9,0x10(%r8,%rax,4)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,227 ; jb 3289 <_sk_store_f32_hsw+0x69>
+ DB 114,227 ; jb 38b9 <_sk_store_f32_hsw+0x69>
DB 196,65,121,17,68,128,32 ; vmovupd %xmm8,0x20(%r8,%rax,4)
- DB 116,218 ; je 3289 <_sk_store_f32_hsw+0x69>
+ DB 116,218 ; je 38b9 <_sk_store_f32_hsw+0x69>
DB 196,65,121,17,92,128,48 ; vmovupd %xmm11,0x30(%r8,%rax,4)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,205 ; jb 3289 <_sk_store_f32_hsw+0x69>
+ DB 114,205 ; jb 38b9 <_sk_store_f32_hsw+0x69>
DB 196,67,125,25,84,128,64,1 ; vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4)
- DB 116,195 ; je 3289 <_sk_store_f32_hsw+0x69>
+ DB 116,195 ; je 38b9 <_sk_store_f32_hsw+0x69>
DB 196,67,125,25,76,128,80,1 ; vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,181 ; jb 3289 <_sk_store_f32_hsw+0x69>
+ DB 114,181 ; jb 38b9 <_sk_store_f32_hsw+0x69>
DB 196,67,125,25,68,128,96,1 ; vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4)
- DB 235,171 ; jmp 3289 <_sk_store_f32_hsw+0x69>
+ DB 235,171 ; jmp 38b9 <_sk_store_f32_hsw+0x69>
PUBLIC _sk_clamp_x_hsw
_sk_clamp_x_hsw LABEL PROC
@@ -3545,7 +3873,7 @@ _sk_linear_gradient_hsw LABEL PROC
DB 196,98,125,24,72,28 ; vbroadcastss 0x1c(%rax),%ymm9
DB 76,139,0 ; mov (%rax),%r8
DB 77,133,192 ; test %r8,%r8
- DB 15,132,143,0,0,0 ; je 3719 <_sk_linear_gradient_hsw+0xb5>
+ DB 15,132,143,0,0,0 ; je 3d49 <_sk_linear_gradient_hsw+0xb5>
DB 72,139,64,8 ; mov 0x8(%rax),%rax
DB 72,131,192,32 ; add $0x20,%rax
DB 196,65,28,87,228 ; vxorps %ymm12,%ymm12,%ymm12
@@ -3572,8 +3900,8 @@ _sk_linear_gradient_hsw LABEL PROC
DB 196,67,13,74,201,208 ; vblendvps %ymm13,%ymm9,%ymm14,%ymm9
DB 72,131,192,36 ; add $0x24,%rax
DB 73,255,200 ; dec %r8
- DB 117,140 ; jne 36a3 <_sk_linear_gradient_hsw+0x3f>
- DB 235,17 ; jmp 372a <_sk_linear_gradient_hsw+0xc6>
+ DB 117,140 ; jne 3cd3 <_sk_linear_gradient_hsw+0x3f>
+ DB 235,17 ; jmp 3d5a <_sk_linear_gradient_hsw+0xc6>
DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1
DB 197,236,87,210 ; vxorps %ymm2,%ymm2,%ymm2
DB 197,228,87,219 ; vxorps %ymm3,%ymm3,%ymm3
@@ -6463,6 +6791,410 @@ _sk_table_a_avx LABEL PROC
DB 65,95 ; pop %r15
DB 255,224 ; jmpq *%rax
+PUBLIC _sk_parametric_r_avx
+_sk_parametric_r_avx LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8
+ DB 196,65,124,194,192,2 ; vcmpleps %ymm8,%ymm0,%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
+ DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10
+ DB 197,52,89,200 ; vmulps %ymm0,%ymm9,%ymm9
+ DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9
+ DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10
+ DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11
+ DB 197,172,89,192 ; vmulps %ymm0,%ymm10,%ymm0
+ DB 196,65,124,88,219 ; vaddps %ymm11,%ymm0,%ymm11
+ DB 196,65,124,91,211 ; vcvtdq2ps %ymm11,%ymm10
+ DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,44,89,208 ; vmulps %ymm0,%ymm10,%ymm10
+ DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,44,92,208 ; vsubps %ymm0,%ymm10,%ymm10
+ DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 197,249,112,192,0 ; vpshufd $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 196,65,124,84,219 ; vandps %ymm11,%ymm0,%ymm11
+ DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 197,249,112,192,0 ; vpshufd $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,36,86,216 ; vorps %ymm0,%ymm11,%ymm11
+ DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,44,88,208 ; vaddps %ymm0,%ymm10,%ymm10
+ DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,164,89,192 ; vmulps %ymm0,%ymm11,%ymm0
+ DB 197,44,92,208 ; vsubps %ymm0,%ymm10,%ymm10
+ DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,99,125,24,224,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm12
+ DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,164,88,192 ; vaddps %ymm0,%ymm11,%ymm0
+ DB 197,156,94,192 ; vdivps %ymm0,%ymm12,%ymm0
+ DB 197,172,92,192 ; vsubps %ymm0,%ymm10,%ymm0
+ DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
+ DB 197,44,89,216 ; vmulps %ymm0,%ymm10,%ymm11
+ DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10
+ DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,99,125,24,224,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm12
+ DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 196,65,124,88,219 ; vaddps %ymm11,%ymm0,%ymm11
+ DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 196,193,124,89,194 ; vmulps %ymm10,%ymm0,%ymm0
+ DB 197,36,92,216 ; vsubps %ymm0,%ymm11,%ymm11
+ DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,99,125,24,232,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm13
+ DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 196,193,124,92,194 ; vsubps %ymm10,%ymm0,%ymm0
+ DB 197,148,94,192 ; vdivps %ymm0,%ymm13,%ymm0
+ DB 197,164,88,192 ; vaddps %ymm0,%ymm11,%ymm0
+ DB 197,156,89,192 ; vmulps %ymm0,%ymm12,%ymm0
+ DB 197,253,91,192 ; vcvtps2dq %ymm0,%ymm0
+ DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
+ DB 196,193,124,88,194 ; vaddps %ymm10,%ymm0,%ymm0
+ DB 196,195,125,74,193,128 ; vblendvps %ymm8,%ymm9,%ymm0,%ymm0
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 196,65,124,95,192 ; vmaxps %ymm8,%ymm0,%ymm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 197,249,110,192 ; vmovd %eax,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,188,93,192 ; vminps %ymm0,%ymm8,%ymm0
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_g_avx
+_sk_parametric_g_avx LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8
+ DB 196,65,116,194,192,2 ; vcmpleps %ymm8,%ymm1,%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
+ DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10
+ DB 197,52,89,201 ; vmulps %ymm1,%ymm9,%ymm9
+ DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9
+ DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10
+ DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11
+ DB 197,172,89,201 ; vmulps %ymm1,%ymm10,%ymm1
+ DB 196,65,116,88,219 ; vaddps %ymm11,%ymm1,%ymm11
+ DB 196,65,124,91,211 ; vcvtdq2ps %ymm11,%ymm10
+ DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 197,44,89,209 ; vmulps %ymm1,%ymm10,%ymm10
+ DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 197,44,92,209 ; vsubps %ymm1,%ymm10,%ymm10
+ DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 197,249,112,201,0 ; vpshufd $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 196,65,116,84,219 ; vandps %ymm11,%ymm1,%ymm11
+ DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 197,249,112,201,0 ; vpshufd $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 197,36,86,217 ; vorps %ymm1,%ymm11,%ymm11
+ DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 197,44,88,209 ; vaddps %ymm1,%ymm10,%ymm10
+ DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 197,164,89,201 ; vmulps %ymm1,%ymm11,%ymm1
+ DB 197,44,92,209 ; vsubps %ymm1,%ymm10,%ymm10
+ DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,99,117,24,225,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm12
+ DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 197,164,88,201 ; vaddps %ymm1,%ymm11,%ymm1
+ DB 197,156,94,201 ; vdivps %ymm1,%ymm12,%ymm1
+ DB 197,172,92,201 ; vsubps %ymm1,%ymm10,%ymm1
+ DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
+ DB 197,44,89,217 ; vmulps %ymm1,%ymm10,%ymm11
+ DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10
+ DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,99,117,24,225,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm12
+ DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 196,65,116,88,219 ; vaddps %ymm11,%ymm1,%ymm11
+ DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 196,193,116,89,202 ; vmulps %ymm10,%ymm1,%ymm1
+ DB 197,36,92,217 ; vsubps %ymm1,%ymm11,%ymm11
+ DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,99,117,24,233,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm13
+ DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 196,193,116,92,202 ; vsubps %ymm10,%ymm1,%ymm1
+ DB 197,148,94,201 ; vdivps %ymm1,%ymm13,%ymm1
+ DB 197,164,88,201 ; vaddps %ymm1,%ymm11,%ymm1
+ DB 197,156,89,201 ; vmulps %ymm1,%ymm12,%ymm1
+ DB 197,253,91,201 ; vcvtps2dq %ymm1,%ymm1
+ DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
+ DB 196,193,116,88,202 ; vaddps %ymm10,%ymm1,%ymm1
+ DB 196,195,117,74,201,128 ; vblendvps %ymm8,%ymm9,%ymm1,%ymm1
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 196,65,116,95,192 ; vmaxps %ymm8,%ymm1,%ymm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 197,249,110,200 ; vmovd %eax,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 197,188,93,201 ; vminps %ymm1,%ymm8,%ymm1
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_b_avx
+_sk_parametric_b_avx LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8
+ DB 196,65,108,194,192,2 ; vcmpleps %ymm8,%ymm2,%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
+ DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10
+ DB 197,52,89,202 ; vmulps %ymm2,%ymm9,%ymm9
+ DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9
+ DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10
+ DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11
+ DB 197,172,89,210 ; vmulps %ymm2,%ymm10,%ymm2
+ DB 196,65,108,88,219 ; vaddps %ymm11,%ymm2,%ymm11
+ DB 196,65,124,91,211 ; vcvtdq2ps %ymm11,%ymm10
+ DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 197,44,89,210 ; vmulps %ymm2,%ymm10,%ymm10
+ DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 197,44,92,210 ; vsubps %ymm2,%ymm10,%ymm10
+ DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 197,249,112,210,0 ; vpshufd $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 196,65,108,84,219 ; vandps %ymm11,%ymm2,%ymm11
+ DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 197,249,112,210,0 ; vpshufd $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 197,36,86,218 ; vorps %ymm2,%ymm11,%ymm11
+ DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 197,44,88,210 ; vaddps %ymm2,%ymm10,%ymm10
+ DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 197,164,89,210 ; vmulps %ymm2,%ymm11,%ymm2
+ DB 197,44,92,210 ; vsubps %ymm2,%ymm10,%ymm10
+ DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,99,109,24,226,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm12
+ DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 197,164,88,210 ; vaddps %ymm2,%ymm11,%ymm2
+ DB 197,156,94,210 ; vdivps %ymm2,%ymm12,%ymm2
+ DB 197,172,92,210 ; vsubps %ymm2,%ymm10,%ymm2
+ DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
+ DB 197,44,89,218 ; vmulps %ymm2,%ymm10,%ymm11
+ DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10
+ DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,99,109,24,226,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm12
+ DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 196,65,108,88,219 ; vaddps %ymm11,%ymm2,%ymm11
+ DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 196,193,108,89,210 ; vmulps %ymm10,%ymm2,%ymm2
+ DB 197,36,92,218 ; vsubps %ymm2,%ymm11,%ymm11
+ DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,99,109,24,234,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm13
+ DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 196,193,108,92,210 ; vsubps %ymm10,%ymm2,%ymm2
+ DB 197,148,94,210 ; vdivps %ymm2,%ymm13,%ymm2
+ DB 197,164,88,210 ; vaddps %ymm2,%ymm11,%ymm2
+ DB 197,156,89,210 ; vmulps %ymm2,%ymm12,%ymm2
+ DB 197,253,91,210 ; vcvtps2dq %ymm2,%ymm2
+ DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
+ DB 196,193,108,88,210 ; vaddps %ymm10,%ymm2,%ymm2
+ DB 196,195,109,74,209,128 ; vblendvps %ymm8,%ymm9,%ymm2,%ymm2
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 196,65,108,95,192 ; vmaxps %ymm8,%ymm2,%ymm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 197,249,110,208 ; vmovd %eax,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 197,188,93,210 ; vminps %ymm2,%ymm8,%ymm2
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_a_avx
+_sk_parametric_a_avx LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8
+ DB 196,65,100,194,192,2 ; vcmpleps %ymm8,%ymm3,%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
+ DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10
+ DB 197,52,89,203 ; vmulps %ymm3,%ymm9,%ymm9
+ DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9
+ DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10
+ DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11
+ DB 197,172,89,219 ; vmulps %ymm3,%ymm10,%ymm3
+ DB 196,65,100,88,219 ; vaddps %ymm11,%ymm3,%ymm11
+ DB 196,65,124,91,211 ; vcvtdq2ps %ymm11,%ymm10
+ DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 197,44,89,211 ; vmulps %ymm3,%ymm10,%ymm10
+ DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 197,44,92,211 ; vsubps %ymm3,%ymm10,%ymm10
+ DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 197,249,112,219,0 ; vpshufd $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 196,65,100,84,219 ; vandps %ymm11,%ymm3,%ymm11
+ DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 197,249,112,219,0 ; vpshufd $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 197,36,86,219 ; vorps %ymm3,%ymm11,%ymm11
+ DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 197,44,88,211 ; vaddps %ymm3,%ymm10,%ymm10
+ DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 197,164,89,219 ; vmulps %ymm3,%ymm11,%ymm3
+ DB 197,44,92,211 ; vsubps %ymm3,%ymm10,%ymm10
+ DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,99,101,24,227,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm12
+ DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 197,164,88,219 ; vaddps %ymm3,%ymm11,%ymm3
+ DB 197,156,94,219 ; vdivps %ymm3,%ymm12,%ymm3
+ DB 197,172,92,219 ; vsubps %ymm3,%ymm10,%ymm3
+ DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
+ DB 197,44,89,219 ; vmulps %ymm3,%ymm10,%ymm11
+ DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10
+ DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,99,101,24,227,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm12
+ DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 196,65,100,88,219 ; vaddps %ymm11,%ymm3,%ymm11
+ DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 196,193,100,89,218 ; vmulps %ymm10,%ymm3,%ymm3
+ DB 197,36,92,219 ; vsubps %ymm3,%ymm11,%ymm11
+ DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,99,101,24,235,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm13
+ DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 196,193,100,92,218 ; vsubps %ymm10,%ymm3,%ymm3
+ DB 197,148,94,219 ; vdivps %ymm3,%ymm13,%ymm3
+ DB 197,164,88,219 ; vaddps %ymm3,%ymm11,%ymm3
+ DB 197,156,89,219 ; vmulps %ymm3,%ymm12,%ymm3
+ DB 197,253,91,219 ; vcvtps2dq %ymm3,%ymm3
+ DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
+ DB 196,193,100,88,218 ; vaddps %ymm10,%ymm3,%ymm3
+ DB 196,195,101,74,217,128 ; vblendvps %ymm8,%ymm9,%ymm3,%ymm3
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 196,65,100,95,192 ; vmaxps %ymm8,%ymm3,%ymm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 197,249,110,216 ; vmovd %eax,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 197,188,93,219 ; vminps %ymm3,%ymm8,%ymm3
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
PUBLIC _sk_load_a8_avx
_sk_load_a8_avx LABEL PROC
DB 73,137,200 ; mov %rcx,%r8
@@ -6470,7 +7202,7 @@ _sk_load_a8_avx LABEL PROC
DB 72,139,0 ; mov (%rax),%rax
DB 72,1,248 ; add %rdi,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,74 ; jne 29ba <_sk_load_a8_avx+0x5a>
+ DB 117,74 ; jne 31b2 <_sk_load_a8_avx+0x5a>
DB 197,250,126,0 ; vmovq (%rax),%xmm0
DB 196,226,121,49,200 ; vpmovzxbd %xmm0,%xmm1
DB 196,227,121,4,192,229 ; vpermilps $0xe5,%xmm0,%xmm0
@@ -6497,9 +7229,9 @@ _sk_load_a8_avx LABEL PROC
DB 77,9,217 ; or %r11,%r9
DB 72,131,193,8 ; add $0x8,%rcx
DB 73,255,202 ; dec %r10
- DB 117,234 ; jne 29c2 <_sk_load_a8_avx+0x62>
+ DB 117,234 ; jne 31ba <_sk_load_a8_avx+0x62>
DB 196,193,249,110,193 ; vmovq %r9,%xmm0
- DB 235,149 ; jmp 2974 <_sk_load_a8_avx+0x14>
+ DB 235,149 ; jmp 316c <_sk_load_a8_avx+0x14>
PUBLIC _sk_gather_a8_avx
_sk_gather_a8_avx LABEL PROC
@@ -6576,7 +7308,7 @@ _sk_store_a8_avx LABEL PROC
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 196,65,57,103,192 ; vpackuswb %xmm8,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 2b1b <_sk_store_a8_avx+0x42>
+ DB 117,10 ; jne 3313 <_sk_store_a8_avx+0x42>
DB 196,65,123,17,4,57 ; vmovsd %xmm8,(%r9,%rdi,1)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -6584,10 +7316,10 @@ _sk_store_a8_avx LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 2b17 <_sk_store_a8_avx+0x3e>
+ DB 119,236 ; ja 330f <_sk_store_a8_avx+0x3e>
DB 196,66,121,48,192 ; vpmovzxbw %xmm8,%xmm8
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,69,0,0,0 ; lea 0x45(%rip),%r8 # 2b80 <_sk_store_a8_avx+0xa7>
+ DB 76,141,5,69,0,0,0 ; lea 0x45(%rip),%r8 # 3378 <_sk_store_a8_avx+0xa7>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -6598,7 +7330,7 @@ _sk_store_a8_avx LABEL PROC
DB 196,67,121,20,68,57,2,4 ; vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1)
DB 196,67,121,20,68,57,1,2 ; vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1)
DB 196,67,121,20,4,57,0 ; vpextrb $0x0,%xmm8,(%r9,%rdi,1)
- DB 235,154 ; jmp 2b17 <_sk_store_a8_avx+0x3e>
+ DB 235,154 ; jmp 330f <_sk_store_a8_avx+0x3e>
DB 15,31,0 ; nopl (%rax)
DB 244 ; hlt
DB 255 ; (bad)
@@ -6631,7 +7363,7 @@ _sk_load_g8_avx LABEL PROC
DB 72,139,0 ; mov (%rax),%rax
DB 72,1,248 ; add %rdi,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,91 ; jne 2c07 <_sk_load_g8_avx+0x6b>
+ DB 117,91 ; jne 33ff <_sk_load_g8_avx+0x6b>
DB 197,250,126,0 ; vmovq (%rax),%xmm0
DB 196,226,121,49,200 ; vpmovzxbd %xmm0,%xmm1
DB 196,227,121,4,192,229 ; vpermilps $0xe5,%xmm0,%xmm0
@@ -6661,9 +7393,9 @@ _sk_load_g8_avx LABEL PROC
DB 77,9,217 ; or %r11,%r9
DB 72,131,193,8 ; add $0x8,%rcx
DB 73,255,202 ; dec %r10
- DB 117,234 ; jne 2c0f <_sk_load_g8_avx+0x73>
+ DB 117,234 ; jne 3407 <_sk_load_g8_avx+0x73>
DB 196,193,249,110,193 ; vmovq %r9,%xmm0
- DB 235,132 ; jmp 2bb0 <_sk_load_g8_avx+0x14>
+ DB 235,132 ; jmp 33a8 <_sk_load_g8_avx+0x14>
PUBLIC _sk_gather_g8_avx
_sk_gather_g8_avx LABEL PROC
@@ -6734,9 +7466,9 @@ _sk_gather_i8_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 73,137,192 ; mov %rax,%r8
DB 77,133,192 ; test %r8,%r8
- DB 116,5 ; je 2d46 <_sk_gather_i8_avx+0xf>
+ DB 116,5 ; je 353e <_sk_gather_i8_avx+0xf>
DB 76,137,192 ; mov %r8,%rax
- DB 235,2 ; jmp 2d48 <_sk_gather_i8_avx+0x11>
+ DB 235,2 ; jmp 3540 <_sk_gather_i8_avx+0x11>
DB 72,173 ; lods %ds:(%rsi),%rax
DB 65,87 ; push %r15
DB 65,86 ; push %r14
@@ -6839,7 +7571,7 @@ _sk_load_565_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,209,0,0,0 ; jne 2fe2 <_sk_load_565_avx+0xdf>
+ DB 15,133,209,0,0,0 ; jne 37da <_sk_load_565_avx+0xdf>
DB 196,193,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm0
DB 197,241,239,201 ; vpxor %xmm1,%xmm1,%xmm1
DB 197,249,105,201 ; vpunpckhwd %xmm1,%xmm0,%xmm1
@@ -6889,9 +7621,9 @@ _sk_load_565_avx LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,29,255,255,255 ; ja 2f17 <_sk_load_565_avx+0x14>
+ DB 15,135,29,255,255,255 ; ja 370f <_sk_load_565_avx+0x14>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,75,0,0,0 ; lea 0x4b(%rip),%r9 # 3050 <_sk_load_565_avx+0x14d>
+ DB 76,141,13,75,0,0,0 ; lea 0x4b(%rip),%r9 # 3848 <_sk_load_565_avx+0x14d>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -6903,7 +7635,7 @@ _sk_load_565_avx LABEL PROC
DB 196,193,121,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- DB 233,201,254,255,255 ; jmpq 2f17 <_sk_load_565_avx+0x14>
+ DB 233,201,254,255,255 ; jmpq 370f <_sk_load_565_avx+0x14>
DB 102,144 ; xchg %ax,%ax
DB 242,255 ; repnz (bad)
DB 255 ; (bad)
@@ -7056,7 +7788,7 @@ _sk_store_565_avx LABEL PROC
DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 329b <_sk_store_565_avx+0x9e>
+ DB 117,10 ; jne 3a93 <_sk_store_565_avx+0x9e>
DB 196,65,122,127,4,121 ; vmovdqu %xmm8,(%r9,%rdi,2)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -7064,9 +7796,9 @@ _sk_store_565_avx LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 3297 <_sk_store_565_avx+0x9a>
+ DB 119,236 ; ja 3a8f <_sk_store_565_avx+0x9a>
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 32f8 <_sk_store_565_avx+0xfb>
+ DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 3af0 <_sk_store_565_avx+0xfb>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -7077,7 +7809,7 @@ _sk_store_565_avx LABEL PROC
DB 196,67,121,21,68,121,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
DB 196,67,121,21,68,121,2,1 ; vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
DB 196,67,121,21,4,121,0 ; vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- DB 235,159 ; jmp 3297 <_sk_store_565_avx+0x9a>
+ DB 235,159 ; jmp 3a8f <_sk_store_565_avx+0x9a>
DB 247,255 ; idiv %edi
DB 255 ; (bad)
DB 255 ; (bad)
@@ -7106,7 +7838,7 @@ _sk_load_4444_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,245,0,0,0 ; jne 3417 <_sk_load_4444_avx+0x103>
+ DB 15,133,245,0,0,0 ; jne 3c0f <_sk_load_4444_avx+0x103>
DB 196,193,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm0
DB 197,241,239,201 ; vpxor %xmm1,%xmm1,%xmm1
DB 197,249,105,201 ; vpunpckhwd %xmm1,%xmm0,%xmm1
@@ -7163,9 +7895,9 @@ _sk_load_4444_avx LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,249,254,255,255 ; ja 3328 <_sk_load_4444_avx+0x14>
+ DB 15,135,249,254,255,255 ; ja 3b20 <_sk_load_4444_avx+0x14>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,74,0,0,0 ; lea 0x4a(%rip),%r9 # 3484 <_sk_load_4444_avx+0x170>
+ DB 76,141,13,74,0,0,0 ; lea 0x4a(%rip),%r9 # 3c7c <_sk_load_4444_avx+0x170>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -7177,12 +7909,12 @@ _sk_load_4444_avx LABEL PROC
DB 196,193,121,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- DB 233,165,254,255,255 ; jmpq 3328 <_sk_load_4444_avx+0x14>
+ DB 233,165,254,255,255 ; jmpq 3b20 <_sk_load_4444_avx+0x14>
DB 144 ; nop
DB 243,255 ; repz (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 235,255 ; jmp 3489 <_sk_load_4444_avx+0x175>
+ DB 235,255 ; jmp 3c81 <_sk_load_4444_avx+0x175>
DB 255 ; (bad)
DB 255,227 ; jmpq *%rbx
DB 255 ; (bad)
@@ -7339,7 +8071,7 @@ _sk_store_4444_avx LABEL PROC
DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 3704 <_sk_store_4444_avx+0xaf>
+ DB 117,10 ; jne 3efc <_sk_store_4444_avx+0xaf>
DB 196,65,122,127,4,121 ; vmovdqu %xmm8,(%r9,%rdi,2)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -7347,9 +8079,9 @@ _sk_store_4444_avx LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 3700 <_sk_store_4444_avx+0xab>
+ DB 119,236 ; ja 3ef8 <_sk_store_4444_avx+0xab>
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,69,0,0,0 ; lea 0x45(%rip),%r8 # 3764 <_sk_store_4444_avx+0x10f>
+ DB 76,141,5,69,0,0,0 ; lea 0x45(%rip),%r8 # 3f5c <_sk_store_4444_avx+0x10f>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -7360,7 +8092,7 @@ _sk_store_4444_avx LABEL PROC
DB 196,67,121,21,68,121,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
DB 196,67,121,21,68,121,2,1 ; vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
DB 196,67,121,21,4,121,0 ; vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- DB 235,159 ; jmp 3700 <_sk_store_4444_avx+0xab>
+ DB 235,159 ; jmp 3ef8 <_sk_store_4444_avx+0xab>
DB 15,31,0 ; nopl (%rax)
DB 244 ; hlt
DB 255 ; (bad)
@@ -7391,7 +8123,7 @@ _sk_load_8888_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,157,0,0,0 ; jne 382b <_sk_load_8888_avx+0xab>
+ DB 15,133,157,0,0,0 ; jne 4023 <_sk_load_8888_avx+0xab>
DB 196,65,124,16,12,186 ; vmovups (%r10,%rdi,4),%ymm9
DB 184,255,0,0,0 ; mov $0xff,%eax
DB 197,249,110,192 ; vmovd %eax,%xmm0
@@ -7429,9 +8161,9 @@ _sk_load_8888_avx LABEL PROC
DB 196,65,52,87,201 ; vxorps %ymm9,%ymm9,%ymm9
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,80,255,255,255 ; ja 3794 <_sk_load_8888_avx+0x14>
+ DB 15,135,80,255,255,255 ; ja 3f8c <_sk_load_8888_avx+0x14>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,137,0,0,0 ; lea 0x89(%rip),%r9 # 38d8 <_sk_load_8888_avx+0x158>
+ DB 76,141,13,137,0,0,0 ; lea 0x89(%rip),%r9 # 40d0 <_sk_load_8888_avx+0x158>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -7454,7 +8186,7 @@ _sk_load_8888_avx LABEL PROC
DB 196,99,53,12,200,15 ; vblendps $0xf,%ymm0,%ymm9,%ymm9
DB 196,195,49,34,4,186,0 ; vpinsrd $0x0,(%r10,%rdi,4),%xmm9,%xmm0
DB 196,99,53,12,200,15 ; vblendps $0xf,%ymm0,%ymm9,%ymm9
- DB 233,188,254,255,255 ; jmpq 3794 <_sk_load_8888_avx+0x14>
+ DB 233,188,254,255,255 ; jmpq 3f8c <_sk_load_8888_avx+0x14>
DB 238 ; out %al,(%dx)
DB 255 ; (bad)
DB 255 ; (bad)
@@ -7580,7 +8312,7 @@ _sk_store_8888_avx LABEL PROC
DB 196,65,45,86,192 ; vorpd %ymm8,%ymm10,%ymm8
DB 196,65,53,86,192 ; vorpd %ymm8,%ymm9,%ymm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 3ad9 <_sk_store_8888_avx+0xa4>
+ DB 117,10 ; jne 42d1 <_sk_store_8888_avx+0xa4>
DB 196,65,124,17,4,185 ; vmovups %ymm8,(%r9,%rdi,4)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -7588,9 +8320,9 @@ _sk_store_8888_avx LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 3ad5 <_sk_store_8888_avx+0xa0>
+ DB 119,236 ; ja 42cd <_sk_store_8888_avx+0xa0>
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,84,0,0,0 ; lea 0x54(%rip),%r8 # 3b48 <_sk_store_8888_avx+0x113>
+ DB 76,141,5,84,0,0,0 ; lea 0x54(%rip),%r8 # 4340 <_sk_store_8888_avx+0x113>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -7604,7 +8336,7 @@ _sk_store_8888_avx LABEL PROC
DB 196,67,121,22,68,185,8,2 ; vpextrd $0x2,%xmm8,0x8(%r9,%rdi,4)
DB 196,67,121,22,68,185,4,1 ; vpextrd $0x1,%xmm8,0x4(%r9,%rdi,4)
DB 196,65,121,126,4,185 ; vmovd %xmm8,(%r9,%rdi,4)
- DB 235,143 ; jmp 3ad5 <_sk_store_8888_avx+0xa0>
+ DB 235,143 ; jmp 42cd <_sk_store_8888_avx+0xa0>
DB 102,144 ; xchg %ax,%ax
DB 246,255 ; idiv %bh
DB 255 ; (bad)
@@ -7634,7 +8366,7 @@ _sk_load_f16_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 72,139,0 ; mov (%rax),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,17,1,0,0 ; jne 3c83 <_sk_load_f16_avx+0x11f>
+ DB 15,133,17,1,0,0 ; jne 447b <_sk_load_f16_avx+0x11f>
DB 197,121,16,4,248 ; vmovupd (%rax,%rdi,8),%xmm8
DB 197,249,16,84,248,16 ; vmovupd 0x10(%rax,%rdi,8),%xmm2
DB 197,249,16,92,248,32 ; vmovupd 0x20(%rax,%rdi,8),%xmm3
@@ -7696,29 +8428,29 @@ _sk_load_f16_avx LABEL PROC
DB 197,123,16,4,248 ; vmovsd (%rax,%rdi,8),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,79 ; je 3ce2 <_sk_load_f16_avx+0x17e>
+ DB 116,79 ; je 44da <_sk_load_f16_avx+0x17e>
DB 197,57,22,68,248,8 ; vmovhpd 0x8(%rax,%rdi,8),%xmm8,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,67 ; jb 3ce2 <_sk_load_f16_avx+0x17e>
+ DB 114,67 ; jb 44da <_sk_load_f16_avx+0x17e>
DB 197,251,16,84,248,16 ; vmovsd 0x10(%rax,%rdi,8),%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 116,68 ; je 3cef <_sk_load_f16_avx+0x18b>
+ DB 116,68 ; je 44e7 <_sk_load_f16_avx+0x18b>
DB 197,233,22,84,248,24 ; vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,56 ; jb 3cef <_sk_load_f16_avx+0x18b>
+ DB 114,56 ; jb 44e7 <_sk_load_f16_avx+0x18b>
DB 197,251,16,92,248,32 ; vmovsd 0x20(%rax,%rdi,8),%xmm3
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 15,132,194,254,255,255 ; je 3b89 <_sk_load_f16_avx+0x25>
+ DB 15,132,194,254,255,255 ; je 4381 <_sk_load_f16_avx+0x25>
DB 197,225,22,92,248,40 ; vmovhpd 0x28(%rax,%rdi,8),%xmm3,%xmm3
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 15,130,178,254,255,255 ; jb 3b89 <_sk_load_f16_avx+0x25>
+ DB 15,130,178,254,255,255 ; jb 4381 <_sk_load_f16_avx+0x25>
DB 197,122,126,76,248,48 ; vmovq 0x30(%rax,%rdi,8),%xmm9
- DB 233,167,254,255,255 ; jmpq 3b89 <_sk_load_f16_avx+0x25>
+ DB 233,167,254,255,255 ; jmpq 4381 <_sk_load_f16_avx+0x25>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,154,254,255,255 ; jmpq 3b89 <_sk_load_f16_avx+0x25>
+ DB 233,154,254,255,255 ; jmpq 4381 <_sk_load_f16_avx+0x25>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
- DB 233,145,254,255,255 ; jmpq 3b89 <_sk_load_f16_avx+0x25>
+ DB 233,145,254,255,255 ; jmpq 4381 <_sk_load_f16_avx+0x25>
PUBLIC _sk_gather_f16_avx
_sk_gather_f16_avx LABEL PROC
@@ -7858,7 +8590,7 @@ _sk_store_f16_avx LABEL PROC
DB 196,65,17,98,200 ; vpunpckldq %xmm8,%xmm13,%xmm9
DB 196,65,17,106,192 ; vpunpckhdq %xmm8,%xmm13,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,31 ; jne 3f78 <_sk_store_f16_avx+0xd2>
+ DB 117,31 ; jne 4770 <_sk_store_f16_avx+0xd2>
DB 196,65,120,17,28,248 ; vmovups %xmm11,(%r8,%rdi,8)
DB 196,65,120,17,84,248,16 ; vmovups %xmm10,0x10(%r8,%rdi,8)
DB 196,65,120,17,76,248,32 ; vmovups %xmm9,0x20(%r8,%rdi,8)
@@ -7867,22 +8599,22 @@ _sk_store_f16_avx LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,65,121,214,28,248 ; vmovq %xmm11,(%r8,%rdi,8)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,240 ; je 3f74 <_sk_store_f16_avx+0xce>
+ DB 116,240 ; je 476c <_sk_store_f16_avx+0xce>
DB 196,65,121,23,92,248,8 ; vmovhpd %xmm11,0x8(%r8,%rdi,8)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,227 ; jb 3f74 <_sk_store_f16_avx+0xce>
+ DB 114,227 ; jb 476c <_sk_store_f16_avx+0xce>
DB 196,65,121,214,84,248,16 ; vmovq %xmm10,0x10(%r8,%rdi,8)
- DB 116,218 ; je 3f74 <_sk_store_f16_avx+0xce>
+ DB 116,218 ; je 476c <_sk_store_f16_avx+0xce>
DB 196,65,121,23,84,248,24 ; vmovhpd %xmm10,0x18(%r8,%rdi,8)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,205 ; jb 3f74 <_sk_store_f16_avx+0xce>
+ DB 114,205 ; jb 476c <_sk_store_f16_avx+0xce>
DB 196,65,121,214,76,248,32 ; vmovq %xmm9,0x20(%r8,%rdi,8)
- DB 116,196 ; je 3f74 <_sk_store_f16_avx+0xce>
+ DB 116,196 ; je 476c <_sk_store_f16_avx+0xce>
DB 196,65,121,23,76,248,40 ; vmovhpd %xmm9,0x28(%r8,%rdi,8)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,183 ; jb 3f74 <_sk_store_f16_avx+0xce>
+ DB 114,183 ; jb 476c <_sk_store_f16_avx+0xce>
DB 196,65,121,214,68,248,48 ; vmovq %xmm8,0x30(%r8,%rdi,8)
- DB 235,174 ; jmp 3f74 <_sk_store_f16_avx+0xce>
+ DB 235,174 ; jmp 476c <_sk_store_f16_avx+0xce>
PUBLIC _sk_load_u16_be_avx
_sk_load_u16_be_avx LABEL PROC
@@ -7890,7 +8622,7 @@ _sk_load_u16_be_avx LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 72,141,4,189,0,0,0,0 ; lea 0x0(,%rdi,4),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,5,1,0,0 ; jne 40e1 <_sk_load_u16_be_avx+0x11b>
+ DB 15,133,5,1,0,0 ; jne 48d9 <_sk_load_u16_be_avx+0x11b>
DB 196,65,121,16,4,64 ; vmovupd (%r8,%rax,2),%xmm8
DB 196,193,121,16,84,64,16 ; vmovupd 0x10(%r8,%rax,2),%xmm2
DB 196,193,121,16,92,64,32 ; vmovupd 0x20(%r8,%rax,2),%xmm3
@@ -7949,29 +8681,29 @@ _sk_load_u16_be_avx LABEL PROC
DB 196,65,123,16,4,64 ; vmovsd (%r8,%rax,2),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,85 ; je 4147 <_sk_load_u16_be_avx+0x181>
+ DB 116,85 ; je 493f <_sk_load_u16_be_avx+0x181>
DB 196,65,57,22,68,64,8 ; vmovhpd 0x8(%r8,%rax,2),%xmm8,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,72 ; jb 4147 <_sk_load_u16_be_avx+0x181>
+ DB 114,72 ; jb 493f <_sk_load_u16_be_avx+0x181>
DB 196,193,123,16,84,64,16 ; vmovsd 0x10(%r8,%rax,2),%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 116,72 ; je 4154 <_sk_load_u16_be_avx+0x18e>
+ DB 116,72 ; je 494c <_sk_load_u16_be_avx+0x18e>
DB 196,193,105,22,84,64,24 ; vmovhpd 0x18(%r8,%rax,2),%xmm2,%xmm2
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,59 ; jb 4154 <_sk_load_u16_be_avx+0x18e>
+ DB 114,59 ; jb 494c <_sk_load_u16_be_avx+0x18e>
DB 196,193,123,16,92,64,32 ; vmovsd 0x20(%r8,%rax,2),%xmm3
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 15,132,205,254,255,255 ; je 3ff7 <_sk_load_u16_be_avx+0x31>
+ DB 15,132,205,254,255,255 ; je 47ef <_sk_load_u16_be_avx+0x31>
DB 196,193,97,22,92,64,40 ; vmovhpd 0x28(%r8,%rax,2),%xmm3,%xmm3
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 15,130,188,254,255,255 ; jb 3ff7 <_sk_load_u16_be_avx+0x31>
+ DB 15,130,188,254,255,255 ; jb 47ef <_sk_load_u16_be_avx+0x31>
DB 196,65,122,126,76,64,48 ; vmovq 0x30(%r8,%rax,2),%xmm9
- DB 233,176,254,255,255 ; jmpq 3ff7 <_sk_load_u16_be_avx+0x31>
+ DB 233,176,254,255,255 ; jmpq 47ef <_sk_load_u16_be_avx+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,163,254,255,255 ; jmpq 3ff7 <_sk_load_u16_be_avx+0x31>
+ DB 233,163,254,255,255 ; jmpq 47ef <_sk_load_u16_be_avx+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
- DB 233,154,254,255,255 ; jmpq 3ff7 <_sk_load_u16_be_avx+0x31>
+ DB 233,154,254,255,255 ; jmpq 47ef <_sk_load_u16_be_avx+0x31>
PUBLIC _sk_load_rgb_u16_be_avx
_sk_load_rgb_u16_be_avx LABEL PROC
@@ -7979,7 +8711,7 @@ _sk_load_rgb_u16_be_avx LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 72,141,4,127 ; lea (%rdi,%rdi,2),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,8,1,0,0 ; jne 4277 <_sk_load_rgb_u16_be_avx+0x11a>
+ DB 15,133,8,1,0,0 ; jne 4a6f <_sk_load_rgb_u16_be_avx+0x11a>
DB 196,193,122,111,4,64 ; vmovdqu (%r8,%rax,2),%xmm0
DB 196,193,122,111,84,64,12 ; vmovdqu 0xc(%r8,%rax,2),%xmm2
DB 196,193,122,111,76,64,24 ; vmovdqu 0x18(%r8,%rax,2),%xmm1
@@ -8038,36 +8770,36 @@ _sk_load_rgb_u16_be_avx LABEL PROC
DB 196,193,121,110,4,64 ; vmovd (%r8,%rax,2),%xmm0
DB 196,193,121,196,68,64,4,2 ; vpinsrw $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 117,5 ; jne 4290 <_sk_load_rgb_u16_be_avx+0x133>
- DB 233,19,255,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46>
+ DB 117,5 ; jne 4a88 <_sk_load_rgb_u16_be_avx+0x133>
+ DB 233,19,255,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46>
DB 196,193,121,110,76,64,6 ; vmovd 0x6(%r8,%rax,2),%xmm1
DB 196,65,113,196,68,64,10,2 ; vpinsrw $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,26 ; jb 42bf <_sk_load_rgb_u16_be_avx+0x162>
+ DB 114,26 ; jb 4ab7 <_sk_load_rgb_u16_be_avx+0x162>
DB 196,193,121,110,76,64,12 ; vmovd 0xc(%r8,%rax,2),%xmm1
DB 196,193,113,196,84,64,16,2 ; vpinsrw $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 117,10 ; jne 42c4 <_sk_load_rgb_u16_be_avx+0x167>
- DB 233,228,254,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46>
- DB 233,223,254,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46>
+ DB 117,10 ; jne 4abc <_sk_load_rgb_u16_be_avx+0x167>
+ DB 233,228,254,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46>
+ DB 233,223,254,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46>
DB 196,193,121,110,76,64,18 ; vmovd 0x12(%r8,%rax,2),%xmm1
DB 196,65,113,196,76,64,22,2 ; vpinsrw $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,26 ; jb 42f3 <_sk_load_rgb_u16_be_avx+0x196>
+ DB 114,26 ; jb 4aeb <_sk_load_rgb_u16_be_avx+0x196>
DB 196,193,121,110,76,64,24 ; vmovd 0x18(%r8,%rax,2),%xmm1
DB 196,193,113,196,76,64,28,2 ; vpinsrw $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 117,10 ; jne 42f8 <_sk_load_rgb_u16_be_avx+0x19b>
- DB 233,176,254,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46>
- DB 233,171,254,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46>
+ DB 117,10 ; jne 4af0 <_sk_load_rgb_u16_be_avx+0x19b>
+ DB 233,176,254,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46>
+ DB 233,171,254,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46>
DB 196,193,121,110,92,64,30 ; vmovd 0x1e(%r8,%rax,2),%xmm3
DB 196,65,97,196,92,64,34,2 ; vpinsrw $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,20 ; jb 4321 <_sk_load_rgb_u16_be_avx+0x1c4>
+ DB 114,20 ; jb 4b19 <_sk_load_rgb_u16_be_avx+0x1c4>
DB 196,193,121,110,92,64,36 ; vmovd 0x24(%r8,%rax,2),%xmm3
DB 196,193,97,196,92,64,40,2 ; vpinsrw $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3
- DB 233,130,254,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46>
- DB 233,125,254,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46>
+ DB 233,130,254,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46>
+ DB 233,125,254,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46>
PUBLIC _sk_store_u16_be_avx
_sk_store_u16_be_avx LABEL PROC
@@ -8115,7 +8847,7 @@ _sk_store_u16_be_avx LABEL PROC
DB 196,65,17,98,200 ; vpunpckldq %xmm8,%xmm13,%xmm9
DB 196,65,17,106,192 ; vpunpckhdq %xmm8,%xmm13,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,31 ; jne 4428 <_sk_store_u16_be_avx+0x102>
+ DB 117,31 ; jne 4c20 <_sk_store_u16_be_avx+0x102>
DB 196,1,120,17,28,72 ; vmovups %xmm11,(%r8,%r9,2)
DB 196,1,120,17,84,72,16 ; vmovups %xmm10,0x10(%r8,%r9,2)
DB 196,1,120,17,76,72,32 ; vmovups %xmm9,0x20(%r8,%r9,2)
@@ -8124,31 +8856,31 @@ _sk_store_u16_be_avx LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,1,121,214,28,72 ; vmovq %xmm11,(%r8,%r9,2)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,240 ; je 4424 <_sk_store_u16_be_avx+0xfe>
+ DB 116,240 ; je 4c1c <_sk_store_u16_be_avx+0xfe>
DB 196,1,121,23,92,72,8 ; vmovhpd %xmm11,0x8(%r8,%r9,2)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,227 ; jb 4424 <_sk_store_u16_be_avx+0xfe>
+ DB 114,227 ; jb 4c1c <_sk_store_u16_be_avx+0xfe>
DB 196,1,121,214,84,72,16 ; vmovq %xmm10,0x10(%r8,%r9,2)
- DB 116,218 ; je 4424 <_sk_store_u16_be_avx+0xfe>
+ DB 116,218 ; je 4c1c <_sk_store_u16_be_avx+0xfe>
DB 196,1,121,23,84,72,24 ; vmovhpd %xmm10,0x18(%r8,%r9,2)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,205 ; jb 4424 <_sk_store_u16_be_avx+0xfe>
+ DB 114,205 ; jb 4c1c <_sk_store_u16_be_avx+0xfe>
DB 196,1,121,214,76,72,32 ; vmovq %xmm9,0x20(%r8,%r9,2)
- DB 116,196 ; je 4424 <_sk_store_u16_be_avx+0xfe>
+ DB 116,196 ; je 4c1c <_sk_store_u16_be_avx+0xfe>
DB 196,1,121,23,76,72,40 ; vmovhpd %xmm9,0x28(%r8,%r9,2)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,183 ; jb 4424 <_sk_store_u16_be_avx+0xfe>
+ DB 114,183 ; jb 4c1c <_sk_store_u16_be_avx+0xfe>
DB 196,1,121,214,68,72,48 ; vmovq %xmm8,0x30(%r8,%r9,2)
- DB 235,174 ; jmp 4424 <_sk_store_u16_be_avx+0xfe>
+ DB 235,174 ; jmp 4c1c <_sk_store_u16_be_avx+0xfe>
PUBLIC _sk_load_f32_avx
_sk_load_f32_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 119,110 ; ja 44ec <_sk_load_f32_avx+0x76>
+ DB 119,110 ; ja 4ce4 <_sk_load_f32_avx+0x76>
DB 76,139,0 ; mov (%rax),%r8
DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9
- DB 76,141,21,132,0,0,0 ; lea 0x84(%rip),%r10 # 4514 <_sk_load_f32_avx+0x9e>
+ DB 76,141,21,132,0,0,0 ; lea 0x84(%rip),%r10 # 4d0c <_sk_load_f32_avx+0x9e>
DB 73,99,4,138 ; movslq (%r10,%rcx,4),%rax
DB 76,1,208 ; add %r10,%rax
DB 255,224 ; jmpq *%rax
@@ -8205,7 +8937,7 @@ _sk_store_f32_avx LABEL PROC
DB 196,65,37,20,196 ; vunpcklpd %ymm12,%ymm11,%ymm8
DB 196,65,37,21,220 ; vunpckhpd %ymm12,%ymm11,%ymm11
DB 72,133,201 ; test %rcx,%rcx
- DB 117,55 ; jne 45a1 <_sk_store_f32_avx+0x6d>
+ DB 117,55 ; jne 4d99 <_sk_store_f32_avx+0x6d>
DB 196,67,45,24,225,1 ; vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
DB 196,67,61,24,235,1 ; vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
DB 196,67,45,6,201,49 ; vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
@@ -8218,22 +8950,22 @@ _sk_store_f32_avx LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,65,121,17,20,128 ; vmovupd %xmm10,(%r8,%rax,4)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,240 ; je 459d <_sk_store_f32_avx+0x69>
+ DB 116,240 ; je 4d95 <_sk_store_f32_avx+0x69>
DB 196,65,121,17,76,128,16 ; vmovupd %xmm9,0x10(%r8,%rax,4)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,227 ; jb 459d <_sk_store_f32_avx+0x69>
+ DB 114,227 ; jb 4d95 <_sk_store_f32_avx+0x69>
DB 196,65,121,17,68,128,32 ; vmovupd %xmm8,0x20(%r8,%rax,4)
- DB 116,218 ; je 459d <_sk_store_f32_avx+0x69>
+ DB 116,218 ; je 4d95 <_sk_store_f32_avx+0x69>
DB 196,65,121,17,92,128,48 ; vmovupd %xmm11,0x30(%r8,%rax,4)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,205 ; jb 459d <_sk_store_f32_avx+0x69>
+ DB 114,205 ; jb 4d95 <_sk_store_f32_avx+0x69>
DB 196,67,125,25,84,128,64,1 ; vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4)
- DB 116,195 ; je 459d <_sk_store_f32_avx+0x69>
+ DB 116,195 ; je 4d95 <_sk_store_f32_avx+0x69>
DB 196,67,125,25,76,128,80,1 ; vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,181 ; jb 459d <_sk_store_f32_avx+0x69>
+ DB 114,181 ; jb 4d95 <_sk_store_f32_avx+0x69>
DB 196,67,125,25,68,128,96,1 ; vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4)
- DB 235,171 ; jmp 459d <_sk_store_f32_avx+0x69>
+ DB 235,171 ; jmp 4d95 <_sk_store_f32_avx+0x69>
PUBLIC _sk_clamp_x_avx
_sk_clamp_x_avx LABEL PROC
@@ -8537,7 +9269,7 @@ _sk_linear_gradient_avx LABEL PROC
DB 196,226,125,24,88,28 ; vbroadcastss 0x1c(%rax),%ymm3
DB 76,139,0 ; mov (%rax),%r8
DB 77,133,192 ; test %r8,%r8
- DB 15,132,146,0,0,0 ; je 4b55 <_sk_linear_gradient_avx+0xb8>
+ DB 15,132,146,0,0,0 ; je 534d <_sk_linear_gradient_avx+0xb8>
DB 72,139,64,8 ; mov 0x8(%rax),%rax
DB 72,131,192,32 ; add $0x20,%rax
DB 196,65,28,87,228 ; vxorps %ymm12,%ymm12,%ymm12
@@ -8564,8 +9296,8 @@ _sk_linear_gradient_avx LABEL PROC
DB 196,227,13,74,219,208 ; vblendvps %ymm13,%ymm3,%ymm14,%ymm3
DB 72,131,192,36 ; add $0x24,%rax
DB 73,255,200 ; dec %r8
- DB 117,140 ; jne 4adf <_sk_linear_gradient_avx+0x42>
- DB 235,20 ; jmp 4b69 <_sk_linear_gradient_avx+0xcc>
+ DB 117,140 ; jne 52d7 <_sk_linear_gradient_avx+0x42>
+ DB 235,20 ; jmp 5361 <_sk_linear_gradient_avx+0xcc>
DB 196,65,36,87,219 ; vxorps %ymm11,%ymm11,%ymm11
DB 196,65,44,87,210 ; vxorps %ymm10,%ymm10,%ymm10
DB 196,65,52,87,201 ; vxorps %ymm9,%ymm9,%ymm9
@@ -11142,6 +11874,392 @@ _sk_table_a_sse41 LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
+PUBLIC _sk_parametric_r_sse41
+_sk_parametric_r_sse41 LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,68,15,16,64,16 ; movss 0x10(%rax),%xmm8
+ DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
+ DB 243,68,15,16,72,12 ; movss 0xc(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 68,15,89,200 ; mulps %xmm0,%xmm9
+ DB 243,68,15,16,80,4 ; movss 0x4(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 68,15,89,208 ; mulps %xmm0,%xmm10
+ DB 65,15,194,192,2 ; cmpleps %xmm8,%xmm0
+ DB 243,68,15,16,64,24 ; movss 0x18(%rax),%xmm8
+ DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
+ DB 69,15,88,200 ; addps %xmm8,%xmm9
+ DB 243,68,15,16,0 ; movss (%rax),%xmm8
+ DB 243,68,15,16,88,8 ; movss 0x8(%rax),%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,88,211 ; addps %xmm11,%xmm10
+ DB 69,15,91,218 ; cvtdq2ps %xmm10,%xmm11
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,89,227 ; mulps %xmm11,%xmm12
+ DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 102,69,15,112,219,0 ; pshufd $0x0,%xmm11,%xmm11
+ DB 102,69,15,219,218 ; pand %xmm10,%xmm11
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 102,69,15,112,210,0 ; pshufd $0x0,%xmm10,%xmm10
+ DB 102,69,15,235,211 ; por %xmm11,%xmm10
+ DB 185,42,145,49,64 ; mov $0x4031912a,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,88,220 ; addps %xmm12,%xmm11
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,89,226 ; mulps %xmm10,%xmm12
+ DB 69,15,92,220 ; subps %xmm12,%xmm11
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,88,234 ; addps %xmm10,%xmm13
+ DB 69,15,94,229 ; divps %xmm13,%xmm12
+ DB 69,15,92,220 ; subps %xmm12,%xmm11
+ DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
+ DB 69,15,89,195 ; mulps %xmm11,%xmm8
+ DB 102,69,15,58,8,216,1 ; roundps $0x1,%xmm8,%xmm11
+ DB 185,0,0,0,75 ; mov $0x4b000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,224 ; addps %xmm8,%xmm12
+ DB 69,15,92,195 ; subps %xmm11,%xmm8
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,89,216 ; mulps %xmm8,%xmm11
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,92,232 ; subps %xmm8,%xmm13
+ DB 69,15,94,221 ; divps %xmm13,%xmm11
+ DB 69,15,88,220 ; addps %xmm12,%xmm11
+ DB 69,15,89,218 ; mulps %xmm10,%xmm11
+ DB 102,69,15,91,211 ; cvtps2dq %xmm11,%xmm10
+ DB 243,68,15,16,64,20 ; movss 0x14(%rax),%xmm8
+ DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
+ DB 69,15,88,194 ; addps %xmm10,%xmm8
+ DB 102,69,15,56,20,193 ; blendvps %xmm0,%xmm9,%xmm8
+ DB 15,87,192 ; xorps %xmm0,%xmm0
+ DB 68,15,95,192 ; maxps %xmm0,%xmm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 102,15,110,192 ; movd %eax,%xmm0
+ DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0
+ DB 68,15,93,192 ; minps %xmm0,%xmm8
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 65,15,40,192 ; movaps %xmm8,%xmm0
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_g_sse41
+_sk_parametric_g_sse41 LABEL PROC
+ DB 68,15,40,192 ; movaps %xmm0,%xmm8
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,68,15,16,80,16 ; movss 0x10(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 243,68,15,16,72,12 ; movss 0xc(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 68,15,89,201 ; mulps %xmm1,%xmm9
+ DB 243,68,15,16,88,4 ; movss 0x4(%rax),%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 68,15,89,217 ; mulps %xmm1,%xmm11
+ DB 15,40,193 ; movaps %xmm1,%xmm0
+ DB 65,15,194,194,2 ; cmpleps %xmm10,%xmm0
+ DB 243,15,16,72,24 ; movss 0x18(%rax),%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 68,15,88,201 ; addps %xmm1,%xmm9
+ DB 243,68,15,16,32 ; movss (%rax),%xmm12
+ DB 243,15,16,72,8 ; movss 0x8(%rax),%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 68,15,88,217 ; addps %xmm1,%xmm11
+ DB 69,15,91,211 ; cvtdq2ps %xmm11,%xmm10
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,234 ; mulps %xmm10,%xmm13
+ DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 68,15,92,233 ; subps %xmm1,%xmm13
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 102,68,15,112,209,0 ; pshufd $0x0,%xmm1,%xmm10
+ DB 102,69,15,219,211 ; pand %xmm11,%xmm10
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 102,68,15,112,217,0 ; pshufd $0x0,%xmm1,%xmm11
+ DB 102,69,15,235,218 ; por %xmm10,%xmm11
+ DB 185,42,145,49,64 ; mov $0x4031912a,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,88,213 ; addps %xmm13,%xmm10
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 65,15,89,203 ; mulps %xmm11,%xmm1
+ DB 68,15,92,209 ; subps %xmm1,%xmm10
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 65,15,88,203 ; addps %xmm11,%xmm1
+ DB 68,15,94,233 ; divps %xmm1,%xmm13
+ DB 69,15,92,213 ; subps %xmm13,%xmm10
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,89,226 ; mulps %xmm10,%xmm12
+ DB 102,69,15,58,8,220,1 ; roundps $0x1,%xmm12,%xmm11
+ DB 185,0,0,0,75 ; mov $0x4b000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,88,236 ; addps %xmm12,%xmm13
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 65,15,89,204 ; mulps %xmm12,%xmm1
+ DB 68,15,92,233 ; subps %xmm1,%xmm13
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 65,15,92,204 ; subps %xmm12,%xmm1
+ DB 68,15,94,217 ; divps %xmm1,%xmm11
+ DB 69,15,88,221 ; addps %xmm13,%xmm11
+ DB 69,15,89,218 ; mulps %xmm10,%xmm11
+ DB 102,69,15,91,211 ; cvtps2dq %xmm11,%xmm10
+ DB 243,15,16,72,20 ; movss 0x14(%rax),%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 65,15,88,202 ; addps %xmm10,%xmm1
+ DB 102,65,15,56,20,201 ; blendvps %xmm0,%xmm9,%xmm1
+ DB 15,87,192 ; xorps %xmm0,%xmm0
+ DB 15,95,200 ; maxps %xmm0,%xmm1
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 102,15,110,192 ; movd %eax,%xmm0
+ DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0
+ DB 15,93,200 ; minps %xmm0,%xmm1
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 65,15,40,192 ; movaps %xmm8,%xmm0
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_b_sse41
+_sk_parametric_b_sse41 LABEL PROC
+ DB 68,15,40,192 ; movaps %xmm0,%xmm8
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,68,15,16,80,16 ; movss 0x10(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 243,68,15,16,72,12 ; movss 0xc(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 68,15,89,202 ; mulps %xmm2,%xmm9
+ DB 243,68,15,16,88,4 ; movss 0x4(%rax),%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 68,15,89,218 ; mulps %xmm2,%xmm11
+ DB 15,40,194 ; movaps %xmm2,%xmm0
+ DB 65,15,194,194,2 ; cmpleps %xmm10,%xmm0
+ DB 243,15,16,80,24 ; movss 0x18(%rax),%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 68,15,88,202 ; addps %xmm2,%xmm9
+ DB 243,68,15,16,32 ; movss (%rax),%xmm12
+ DB 243,15,16,80,8 ; movss 0x8(%rax),%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 68,15,88,218 ; addps %xmm2,%xmm11
+ DB 69,15,91,211 ; cvtdq2ps %xmm11,%xmm10
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,234 ; mulps %xmm10,%xmm13
+ DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx
+ DB 102,15,110,209 ; movd %ecx,%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 68,15,92,234 ; subps %xmm2,%xmm13
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,15,110,209 ; movd %ecx,%xmm2
+ DB 102,68,15,112,210,0 ; pshufd $0x0,%xmm2,%xmm10
+ DB 102,69,15,219,211 ; pand %xmm11,%xmm10
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,15,110,209 ; movd %ecx,%xmm2
+ DB 102,68,15,112,218,0 ; pshufd $0x0,%xmm2,%xmm11
+ DB 102,69,15,235,218 ; por %xmm10,%xmm11
+ DB 185,42,145,49,64 ; mov $0x4031912a,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,88,213 ; addps %xmm13,%xmm10
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,15,110,209 ; movd %ecx,%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 65,15,89,211 ; mulps %xmm11,%xmm2
+ DB 68,15,92,210 ; subps %xmm2,%xmm10
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,15,110,209 ; movd %ecx,%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 65,15,88,211 ; addps %xmm11,%xmm2
+ DB 68,15,94,234 ; divps %xmm2,%xmm13
+ DB 69,15,92,213 ; subps %xmm13,%xmm10
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,89,226 ; mulps %xmm10,%xmm12
+ DB 102,69,15,58,8,220,1 ; roundps $0x1,%xmm12,%xmm11
+ DB 185,0,0,0,75 ; mov $0x4b000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,88,236 ; addps %xmm12,%xmm13
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,15,110,209 ; movd %ecx,%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 65,15,89,212 ; mulps %xmm12,%xmm2
+ DB 68,15,92,234 ; subps %xmm2,%xmm13
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx
+ DB 102,15,110,209 ; movd %ecx,%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 65,15,92,212 ; subps %xmm12,%xmm2
+ DB 68,15,94,218 ; divps %xmm2,%xmm11
+ DB 69,15,88,221 ; addps %xmm13,%xmm11
+ DB 69,15,89,218 ; mulps %xmm10,%xmm11
+ DB 102,69,15,91,211 ; cvtps2dq %xmm11,%xmm10
+ DB 243,15,16,80,20 ; movss 0x14(%rax),%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 65,15,88,210 ; addps %xmm10,%xmm2
+ DB 102,65,15,56,20,209 ; blendvps %xmm0,%xmm9,%xmm2
+ DB 15,87,192 ; xorps %xmm0,%xmm0
+ DB 15,95,208 ; maxps %xmm0,%xmm2
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 102,15,110,192 ; movd %eax,%xmm0
+ DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0
+ DB 15,93,208 ; minps %xmm0,%xmm2
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 65,15,40,192 ; movaps %xmm8,%xmm0
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_a_sse41
+_sk_parametric_a_sse41 LABEL PROC
+ DB 68,15,40,192 ; movaps %xmm0,%xmm8
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,68,15,16,80,16 ; movss 0x10(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 243,68,15,16,72,12 ; movss 0xc(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 68,15,89,203 ; mulps %xmm3,%xmm9
+ DB 243,68,15,16,88,4 ; movss 0x4(%rax),%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 68,15,89,219 ; mulps %xmm3,%xmm11
+ DB 15,40,195 ; movaps %xmm3,%xmm0
+ DB 65,15,194,194,2 ; cmpleps %xmm10,%xmm0
+ DB 243,15,16,88,24 ; movss 0x18(%rax),%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 68,15,88,203 ; addps %xmm3,%xmm9
+ DB 243,68,15,16,32 ; movss (%rax),%xmm12
+ DB 243,15,16,88,8 ; movss 0x8(%rax),%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 68,15,88,219 ; addps %xmm3,%xmm11
+ DB 69,15,91,211 ; cvtdq2ps %xmm11,%xmm10
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,234 ; mulps %xmm10,%xmm13
+ DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 68,15,92,235 ; subps %xmm3,%xmm13
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 102,68,15,112,211,0 ; pshufd $0x0,%xmm3,%xmm10
+ DB 102,69,15,219,211 ; pand %xmm11,%xmm10
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 102,68,15,112,219,0 ; pshufd $0x0,%xmm3,%xmm11
+ DB 102,69,15,235,218 ; por %xmm10,%xmm11
+ DB 185,42,145,49,64 ; mov $0x4031912a,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,88,213 ; addps %xmm13,%xmm10
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 65,15,89,219 ; mulps %xmm11,%xmm3
+ DB 68,15,92,211 ; subps %xmm3,%xmm10
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 65,15,88,219 ; addps %xmm11,%xmm3
+ DB 68,15,94,235 ; divps %xmm3,%xmm13
+ DB 69,15,92,213 ; subps %xmm13,%xmm10
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,89,226 ; mulps %xmm10,%xmm12
+ DB 102,69,15,58,8,220,1 ; roundps $0x1,%xmm12,%xmm11
+ DB 185,0,0,0,75 ; mov $0x4b000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,88,236 ; addps %xmm12,%xmm13
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 65,15,89,220 ; mulps %xmm12,%xmm3
+ DB 68,15,92,235 ; subps %xmm3,%xmm13
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 65,15,92,220 ; subps %xmm12,%xmm3
+ DB 68,15,94,219 ; divps %xmm3,%xmm11
+ DB 69,15,88,221 ; addps %xmm13,%xmm11
+ DB 69,15,89,218 ; mulps %xmm10,%xmm11
+ DB 102,69,15,91,211 ; cvtps2dq %xmm11,%xmm10
+ DB 243,15,16,88,20 ; movss 0x14(%rax),%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 65,15,88,218 ; addps %xmm10,%xmm3
+ DB 102,65,15,56,20,217 ; blendvps %xmm0,%xmm9,%xmm3
+ DB 15,87,192 ; xorps %xmm0,%xmm0
+ DB 15,95,216 ; maxps %xmm0,%xmm3
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 102,15,110,192 ; movd %eax,%xmm0
+ DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0
+ DB 15,93,216 ; minps %xmm0,%xmm3
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 65,15,40,192 ; movaps %xmm8,%xmm0
+ DB 255,224 ; jmpq *%rax
+
PUBLIC _sk_load_a8_sse41
_sk_load_a8_sse41 LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -11266,9 +12384,9 @@ _sk_gather_i8_sse41 LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 73,137,192 ; mov %rax,%r8
DB 77,133,192 ; test %r8,%r8
- DB 116,5 ; je 210a <_sk_gather_i8_sse41+0xf>
+ DB 116,5 ; je 27a3 <_sk_gather_i8_sse41+0xf>
DB 76,137,192 ; mov %r8,%rax
- DB 235,2 ; jmp 210c <_sk_gather_i8_sse41+0x11>
+ DB 235,2 ; jmp 27a5 <_sk_gather_i8_sse41+0x11>
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 243,15,91,201 ; cvttps2dq %xmm1,%xmm1
@@ -12362,7 +13480,7 @@ _sk_linear_gradient_sse41 LABEL PROC
DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
DB 72,139,8 ; mov (%rax),%rcx
DB 72,133,201 ; test %rcx,%rcx
- DB 15,132,4,1,0,0 ; je 3338 <_sk_linear_gradient_sse41+0x13e>
+ DB 15,132,4,1,0,0 ; je 39d1 <_sk_linear_gradient_sse41+0x13e>
DB 72,131,236,88 ; sub $0x58,%rsp
DB 15,41,36,36 ; movaps %xmm4,(%rsp)
DB 15,41,108,36,16 ; movaps %xmm5,0x10(%rsp)
@@ -12413,13 +13531,13 @@ _sk_linear_gradient_sse41 LABEL PROC
DB 15,40,196 ; movaps %xmm4,%xmm0
DB 72,131,192,36 ; add $0x24,%rax
DB 72,255,201 ; dec %rcx
- DB 15,133,65,255,255,255 ; jne 3260 <_sk_linear_gradient_sse41+0x66>
+ DB 15,133,65,255,255,255 ; jne 38f9 <_sk_linear_gradient_sse41+0x66>
DB 15,40,124,36,48 ; movaps 0x30(%rsp),%xmm7
DB 15,40,116,36,32 ; movaps 0x20(%rsp),%xmm6
DB 15,40,108,36,16 ; movaps 0x10(%rsp),%xmm5
DB 15,40,36,36 ; movaps (%rsp),%xmm4
DB 72,131,196,88 ; add $0x58,%rsp
- DB 235,13 ; jmp 3345 <_sk_linear_gradient_sse41+0x14b>
+ DB 235,13 ; jmp 39de <_sk_linear_gradient_sse41+0x14b>
DB 15,87,201 ; xorps %xmm1,%xmm1
DB 15,87,210 ; xorps %xmm2,%xmm2
DB 15,87,219 ; xorps %xmm3,%xmm3
@@ -15098,6 +16216,414 @@ _sk_table_a_sse2 LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
+PUBLIC _sk_parametric_r_sse2
+_sk_parametric_r_sse2 LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,68,15,16,72,16 ; movss 0x10(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 243,68,15,16,64,12 ; movss 0xc(%rax),%xmm8
+ DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
+ DB 68,15,89,192 ; mulps %xmm0,%xmm8
+ DB 243,68,15,16,80,4 ; movss 0x4(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 68,15,89,208 ; mulps %xmm0,%xmm10
+ DB 65,15,194,193,2 ; cmpleps %xmm9,%xmm0
+ DB 243,68,15,16,72,24 ; movss 0x18(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 69,15,88,193 ; addps %xmm9,%xmm8
+ DB 243,68,15,16,8 ; movss (%rax),%xmm9
+ DB 243,68,15,16,88,8 ; movss 0x8(%rax),%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,88,211 ; addps %xmm11,%xmm10
+ DB 69,15,91,218 ; cvtdq2ps %xmm10,%xmm11
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,235 ; mulps %xmm11,%xmm13
+ DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,92,235 ; subps %xmm11,%xmm13
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 102,69,15,112,227,0 ; pshufd $0x0,%xmm11,%xmm12
+ DB 102,69,15,219,226 ; pand %xmm10,%xmm12
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 102,69,15,112,218,0 ; pshufd $0x0,%xmm10,%xmm11
+ DB 102,69,15,235,220 ; por %xmm12,%xmm11
+ DB 185,42,145,49,64 ; mov $0x4031912a,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,229 ; addps %xmm13,%xmm12
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,89,211 ; mulps %xmm11,%xmm10
+ DB 69,15,92,226 ; subps %xmm10,%xmm12
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 185,0,0,128,63 ; mov $0x3f800000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,249 ; movd %ecx,%xmm15
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 69,15,88,243 ; addps %xmm11,%xmm14
+ DB 69,15,94,238 ; divps %xmm14,%xmm13
+ DB 69,15,92,229 ; subps %xmm13,%xmm12
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 69,15,89,204 ; mulps %xmm12,%xmm9
+ DB 243,69,15,91,217 ; cvttps2dq %xmm9,%xmm11
+ DB 69,15,91,219 ; cvtdq2ps %xmm11,%xmm11
+ DB 69,15,40,225 ; movaps %xmm9,%xmm12
+ DB 69,15,198,255,0 ; shufps $0x0,%xmm15,%xmm15
+ DB 69,15,88,249 ; addps %xmm9,%xmm15
+ DB 69,15,40,233 ; movaps %xmm9,%xmm13
+ DB 69,15,194,235,1 ; cmpltps %xmm11,%xmm13
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,84,234 ; andps %xmm10,%xmm13
+ DB 69,15,87,201 ; xorps %xmm9,%xmm9
+ DB 69,15,92,221 ; subps %xmm13,%xmm11
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 102,69,15,110,216 ; movd %r8d,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,236 ; mulps %xmm12,%xmm13
+ DB 69,15,92,253 ; subps %xmm13,%xmm15
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 69,15,92,244 ; subps %xmm12,%xmm14
+ DB 69,15,94,238 ; divps %xmm14,%xmm13
+ DB 69,15,88,239 ; addps %xmm15,%xmm13
+ DB 69,15,89,235 ; mulps %xmm11,%xmm13
+ DB 102,69,15,91,221 ; cvtps2dq %xmm13,%xmm11
+ DB 243,68,15,16,96,20 ; movss 0x14(%rax),%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,227 ; addps %xmm11,%xmm12
+ DB 68,15,84,192 ; andps %xmm0,%xmm8
+ DB 65,15,85,196 ; andnps %xmm12,%xmm0
+ DB 65,15,86,192 ; orps %xmm8,%xmm0
+ DB 65,15,95,193 ; maxps %xmm9,%xmm0
+ DB 65,15,93,194 ; minps %xmm10,%xmm0
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_g_sse2
+_sk_parametric_g_sse2 LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,68,15,16,72,16 ; movss 0x10(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 243,68,15,16,64,12 ; movss 0xc(%rax),%xmm8
+ DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
+ DB 68,15,89,193 ; mulps %xmm1,%xmm8
+ DB 243,68,15,16,80,4 ; movss 0x4(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 68,15,89,209 ; mulps %xmm1,%xmm10
+ DB 65,15,194,201,2 ; cmpleps %xmm9,%xmm1
+ DB 243,68,15,16,72,24 ; movss 0x18(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 69,15,88,193 ; addps %xmm9,%xmm8
+ DB 243,68,15,16,8 ; movss (%rax),%xmm9
+ DB 243,68,15,16,88,8 ; movss 0x8(%rax),%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,88,211 ; addps %xmm11,%xmm10
+ DB 69,15,91,218 ; cvtdq2ps %xmm10,%xmm11
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,235 ; mulps %xmm11,%xmm13
+ DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,92,235 ; subps %xmm11,%xmm13
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 102,69,15,112,227,0 ; pshufd $0x0,%xmm11,%xmm12
+ DB 102,69,15,219,226 ; pand %xmm10,%xmm12
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 102,69,15,112,218,0 ; pshufd $0x0,%xmm10,%xmm11
+ DB 102,69,15,235,220 ; por %xmm12,%xmm11
+ DB 185,42,145,49,64 ; mov $0x4031912a,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,229 ; addps %xmm13,%xmm12
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,89,211 ; mulps %xmm11,%xmm10
+ DB 69,15,92,226 ; subps %xmm10,%xmm12
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 185,0,0,128,63 ; mov $0x3f800000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,249 ; movd %ecx,%xmm15
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 69,15,88,243 ; addps %xmm11,%xmm14
+ DB 69,15,94,238 ; divps %xmm14,%xmm13
+ DB 69,15,92,229 ; subps %xmm13,%xmm12
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 69,15,89,204 ; mulps %xmm12,%xmm9
+ DB 243,69,15,91,217 ; cvttps2dq %xmm9,%xmm11
+ DB 69,15,91,219 ; cvtdq2ps %xmm11,%xmm11
+ DB 69,15,40,225 ; movaps %xmm9,%xmm12
+ DB 69,15,198,255,0 ; shufps $0x0,%xmm15,%xmm15
+ DB 69,15,88,249 ; addps %xmm9,%xmm15
+ DB 69,15,40,233 ; movaps %xmm9,%xmm13
+ DB 69,15,194,235,1 ; cmpltps %xmm11,%xmm13
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,84,234 ; andps %xmm10,%xmm13
+ DB 69,15,87,201 ; xorps %xmm9,%xmm9
+ DB 69,15,92,221 ; subps %xmm13,%xmm11
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 102,69,15,110,216 ; movd %r8d,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,236 ; mulps %xmm12,%xmm13
+ DB 69,15,92,253 ; subps %xmm13,%xmm15
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 69,15,92,244 ; subps %xmm12,%xmm14
+ DB 69,15,94,238 ; divps %xmm14,%xmm13
+ DB 69,15,88,239 ; addps %xmm15,%xmm13
+ DB 69,15,89,235 ; mulps %xmm11,%xmm13
+ DB 102,69,15,91,221 ; cvtps2dq %xmm13,%xmm11
+ DB 243,68,15,16,96,20 ; movss 0x14(%rax),%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,227 ; addps %xmm11,%xmm12
+ DB 68,15,84,193 ; andps %xmm1,%xmm8
+ DB 65,15,85,204 ; andnps %xmm12,%xmm1
+ DB 65,15,86,200 ; orps %xmm8,%xmm1
+ DB 65,15,95,201 ; maxps %xmm9,%xmm1
+ DB 65,15,93,202 ; minps %xmm10,%xmm1
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_b_sse2
+_sk_parametric_b_sse2 LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,68,15,16,72,16 ; movss 0x10(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 243,68,15,16,64,12 ; movss 0xc(%rax),%xmm8
+ DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
+ DB 68,15,89,194 ; mulps %xmm2,%xmm8
+ DB 243,68,15,16,80,4 ; movss 0x4(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 68,15,89,210 ; mulps %xmm2,%xmm10
+ DB 65,15,194,209,2 ; cmpleps %xmm9,%xmm2
+ DB 243,68,15,16,72,24 ; movss 0x18(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 69,15,88,193 ; addps %xmm9,%xmm8
+ DB 243,68,15,16,8 ; movss (%rax),%xmm9
+ DB 243,68,15,16,88,8 ; movss 0x8(%rax),%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,88,211 ; addps %xmm11,%xmm10
+ DB 69,15,91,218 ; cvtdq2ps %xmm10,%xmm11
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,235 ; mulps %xmm11,%xmm13
+ DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,92,235 ; subps %xmm11,%xmm13
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 102,69,15,112,227,0 ; pshufd $0x0,%xmm11,%xmm12
+ DB 102,69,15,219,226 ; pand %xmm10,%xmm12
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 102,69,15,112,218,0 ; pshufd $0x0,%xmm10,%xmm11
+ DB 102,69,15,235,220 ; por %xmm12,%xmm11
+ DB 185,42,145,49,64 ; mov $0x4031912a,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,229 ; addps %xmm13,%xmm12
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,89,211 ; mulps %xmm11,%xmm10
+ DB 69,15,92,226 ; subps %xmm10,%xmm12
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 185,0,0,128,63 ; mov $0x3f800000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,249 ; movd %ecx,%xmm15
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 69,15,88,243 ; addps %xmm11,%xmm14
+ DB 69,15,94,238 ; divps %xmm14,%xmm13
+ DB 69,15,92,229 ; subps %xmm13,%xmm12
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 69,15,89,204 ; mulps %xmm12,%xmm9
+ DB 243,69,15,91,217 ; cvttps2dq %xmm9,%xmm11
+ DB 69,15,91,219 ; cvtdq2ps %xmm11,%xmm11
+ DB 69,15,40,225 ; movaps %xmm9,%xmm12
+ DB 69,15,198,255,0 ; shufps $0x0,%xmm15,%xmm15
+ DB 69,15,88,249 ; addps %xmm9,%xmm15
+ DB 69,15,40,233 ; movaps %xmm9,%xmm13
+ DB 69,15,194,235,1 ; cmpltps %xmm11,%xmm13
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,84,234 ; andps %xmm10,%xmm13
+ DB 69,15,87,201 ; xorps %xmm9,%xmm9
+ DB 69,15,92,221 ; subps %xmm13,%xmm11
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 102,69,15,110,216 ; movd %r8d,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,236 ; mulps %xmm12,%xmm13
+ DB 69,15,92,253 ; subps %xmm13,%xmm15
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 69,15,92,244 ; subps %xmm12,%xmm14
+ DB 69,15,94,238 ; divps %xmm14,%xmm13
+ DB 69,15,88,239 ; addps %xmm15,%xmm13
+ DB 69,15,89,235 ; mulps %xmm11,%xmm13
+ DB 102,69,15,91,221 ; cvtps2dq %xmm13,%xmm11
+ DB 243,68,15,16,96,20 ; movss 0x14(%rax),%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,227 ; addps %xmm11,%xmm12
+ DB 68,15,84,194 ; andps %xmm2,%xmm8
+ DB 65,15,85,212 ; andnps %xmm12,%xmm2
+ DB 65,15,86,208 ; orps %xmm8,%xmm2
+ DB 65,15,95,209 ; maxps %xmm9,%xmm2
+ DB 65,15,93,210 ; minps %xmm10,%xmm2
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_a_sse2
+_sk_parametric_a_sse2 LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,68,15,16,72,16 ; movss 0x10(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 243,68,15,16,64,12 ; movss 0xc(%rax),%xmm8
+ DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
+ DB 68,15,89,195 ; mulps %xmm3,%xmm8
+ DB 243,68,15,16,80,4 ; movss 0x4(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 68,15,89,211 ; mulps %xmm3,%xmm10
+ DB 65,15,194,217,2 ; cmpleps %xmm9,%xmm3
+ DB 243,68,15,16,72,24 ; movss 0x18(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 69,15,88,193 ; addps %xmm9,%xmm8
+ DB 243,68,15,16,8 ; movss (%rax),%xmm9
+ DB 243,68,15,16,88,8 ; movss 0x8(%rax),%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,88,211 ; addps %xmm11,%xmm10
+ DB 69,15,91,218 ; cvtdq2ps %xmm10,%xmm11
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,235 ; mulps %xmm11,%xmm13
+ DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,92,235 ; subps %xmm11,%xmm13
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 102,69,15,112,227,0 ; pshufd $0x0,%xmm11,%xmm12
+ DB 102,69,15,219,226 ; pand %xmm10,%xmm12
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 102,69,15,112,218,0 ; pshufd $0x0,%xmm10,%xmm11
+ DB 102,69,15,235,220 ; por %xmm12,%xmm11
+ DB 185,42,145,49,64 ; mov $0x4031912a,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,229 ; addps %xmm13,%xmm12
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,89,211 ; mulps %xmm11,%xmm10
+ DB 69,15,92,226 ; subps %xmm10,%xmm12
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 185,0,0,128,63 ; mov $0x3f800000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,249 ; movd %ecx,%xmm15
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 69,15,88,243 ; addps %xmm11,%xmm14
+ DB 69,15,94,238 ; divps %xmm14,%xmm13
+ DB 69,15,92,229 ; subps %xmm13,%xmm12
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 69,15,89,204 ; mulps %xmm12,%xmm9
+ DB 243,69,15,91,217 ; cvttps2dq %xmm9,%xmm11
+ DB 69,15,91,219 ; cvtdq2ps %xmm11,%xmm11
+ DB 69,15,40,225 ; movaps %xmm9,%xmm12
+ DB 69,15,198,255,0 ; shufps $0x0,%xmm15,%xmm15
+ DB 69,15,88,249 ; addps %xmm9,%xmm15
+ DB 69,15,40,233 ; movaps %xmm9,%xmm13
+ DB 69,15,194,235,1 ; cmpltps %xmm11,%xmm13
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,84,234 ; andps %xmm10,%xmm13
+ DB 69,15,87,201 ; xorps %xmm9,%xmm9
+ DB 69,15,92,221 ; subps %xmm13,%xmm11
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 102,69,15,110,216 ; movd %r8d,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,236 ; mulps %xmm12,%xmm13
+ DB 69,15,92,253 ; subps %xmm13,%xmm15
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 69,15,92,244 ; subps %xmm12,%xmm14
+ DB 69,15,94,238 ; divps %xmm14,%xmm13
+ DB 69,15,88,239 ; addps %xmm15,%xmm13
+ DB 69,15,89,235 ; mulps %xmm11,%xmm13
+ DB 102,69,15,91,221 ; cvtps2dq %xmm13,%xmm11
+ DB 243,68,15,16,96,20 ; movss 0x14(%rax),%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,227 ; addps %xmm11,%xmm12
+ DB 68,15,84,195 ; andps %xmm3,%xmm8
+ DB 65,15,85,220 ; andnps %xmm12,%xmm3
+ DB 65,15,86,216 ; orps %xmm8,%xmm3
+ DB 65,15,95,217 ; maxps %xmm9,%xmm3
+ DB 65,15,93,218 ; minps %xmm10,%xmm3
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
PUBLIC _sk_load_a8_sse2
_sk_load_a8_sse2 LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -15254,9 +16780,9 @@ _sk_gather_i8_sse2 LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 73,137,192 ; mov %rax,%r8
DB 77,133,192 ; test %r8,%r8
- DB 116,5 ; je 2285 <_sk_gather_i8_sse2+0xf>
+ DB 116,5 ; je 29b9 <_sk_gather_i8_sse2+0xf>
DB 76,137,192 ; mov %r8,%rax
- DB 235,2 ; jmp 2287 <_sk_gather_i8_sse2+0x11>
+ DB 235,2 ; jmp 29bb <_sk_gather_i8_sse2+0x11>
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 243,15,91,201 ; cvttps2dq %xmm1,%xmm1
@@ -16453,7 +17979,7 @@ _sk_linear_gradient_sse2 LABEL PROC
DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
DB 72,139,8 ; mov (%rax),%rcx
DB 72,133,201 ; test %rcx,%rcx
- DB 15,132,15,1,0,0 ; je 366f <_sk_linear_gradient_sse2+0x149>
+ DB 15,132,15,1,0,0 ; je 3da3 <_sk_linear_gradient_sse2+0x149>
DB 72,139,64,8 ; mov 0x8(%rax),%rax
DB 72,131,192,32 ; add $0x20,%rax
DB 69,15,87,192 ; xorps %xmm8,%xmm8
@@ -16514,8 +18040,8 @@ _sk_linear_gradient_sse2 LABEL PROC
DB 69,15,86,231 ; orps %xmm15,%xmm12
DB 72,131,192,36 ; add $0x24,%rax
DB 72,255,201 ; dec %rcx
- DB 15,133,8,255,255,255 ; jne 3575 <_sk_linear_gradient_sse2+0x4f>
- DB 235,13 ; jmp 367c <_sk_linear_gradient_sse2+0x156>
+ DB 15,133,8,255,255,255 ; jne 3ca9 <_sk_linear_gradient_sse2+0x4f>
+ DB 235,13 ; jmp 3db0 <_sk_linear_gradient_sse2+0x156>
DB 15,87,201 ; xorps %xmm1,%xmm1
DB 15,87,210 ; xorps %xmm2,%xmm2
DB 15,87,219 ; xorps %xmm3,%xmm3
diff --git a/src/jumper/SkJumper_stages.cpp b/src/jumper/SkJumper_stages.cpp
index b366cf5b12..a7f6d8036e 100644
--- a/src/jumper/SkJumper_stages.cpp
+++ b/src/jumper/SkJumper_stages.cpp
@@ -671,6 +671,40 @@ STAGE(table_g) { g = table(g, ctx); }
STAGE(table_b) { b = table(b, ctx); }
STAGE(table_a) { a = table(a, ctx); }
+// See http://www.machinedlearnings.com/2011/06/fast-approximate-logarithm-exponential.html.
+SI F approx_log2(F x) {
+ // e is a fair approximation of log2(x) in its own right...
+ F e = cast(bit_cast<U32>(x)) * C(1.0f / (1<<23)) - 127.0_f;
+
+ // ... but using the mantissa to refine its error is _much_ better.
+ F m = bit_cast<F>((bit_cast<U32>(x) & 0x007fffff_i) | 0x3f000000_i);
+ return e
+ + 2.774485010_f
+ - 1.498030302_f * m
+ - 1.725879990_f / (0.3520887068_f + m);
+}
+SI F approx_pow2(F x) {
+ F f = fract(x);
+ return bit_cast<F>(round(C(1.0f * (1<<23)),
+ x + 121.2740575_f
+ - 1.490129070_f * f
+ + 27.72802330_f / (4.84252568_f - f)));
+}
+
+SI F approx_powf(F x, float g) {
+ return approx_pow2(approx_log2(x) * g);
+}
+
+SI F parametric(F v, const SkJumper_ParametricTransferFunction* ctx) {
+ F r = if_then_else(v <= ctx->D, mad(ctx->C, v, ctx->F)
+ , approx_powf(mad(ctx->A, v, ctx->B), ctx->G) + ctx->E);
+ return min(max(r, 0), 1.0_f); // Clamp to [0,1], with argument order mattering to handle NaN.
+}
+STAGE(parametric_r) { r = parametric(r, ctx); }
+STAGE(parametric_g) { g = parametric(g, ctx); }
+STAGE(parametric_b) { b = parametric(b, ctx); }
+STAGE(parametric_a) { a = parametric(a, ctx); }
+
STAGE(load_a8) {
auto ptr = *(const uint8_t**)ctx + x;
@@ -954,7 +988,6 @@ STAGE(save_xy) {
// Whether bilinear or bicubic, all sample points are at the same fractional offset (fx,fy).
// They're either the 4 corners of a logical 1x1 pixel or the 16 corners of a 3x3 grid
// surrounding (x,y) at (0.5,0.5) off-center.
- auto fract = [](F v) { return v - floor_(v); };
F fx = fract(r + 0.5_f),
fy = fract(g + 0.5_f);
diff --git a/src/jumper/SkJumper_vectors.h b/src/jumper/SkJumper_vectors.h
index f47dd115ef..590fe9c077 100644
--- a/src/jumper/SkJumper_vectors.h
+++ b/src/jumper/SkJumper_vectors.h
@@ -626,4 +626,6 @@ SI U16 bswap(U16 x) {
#endif
}
+SI F fract(F v) { return v - floor_(v); }
+
#endif//SkJumper_vectors_DEFINED
diff --git a/tests/ColorSpaceXformTest.cpp b/tests/ColorSpaceXformTest.cpp
index 81fdc37caf..83317d95e4 100644
--- a/tests/ColorSpaceXformTest.cpp
+++ b/tests/ColorSpaceXformTest.cpp
@@ -252,6 +252,9 @@ DEF_TEST(ColorSpaceXform_NonMatchingGamma, r) {
gammas->fType[0] = SkGammas::Type::kValue_Type;
gammas->fData[0].fValue = 1.2f;
+ // See ColorSpaceXform_TableGamma... we've decided to allow some tolerance
+ // for SkJumper's implementation of tables.
+ const int tolerance = 12;
gammas->fType[1] = SkGammas::Type::kTable_Type;
gammas->fData[1].fTable.fSize = tableSize;
gammas->fData[1].fTable.fOffset = 0;
@@ -260,7 +263,7 @@ DEF_TEST(ColorSpaceXform_NonMatchingGamma, r) {
gammas->fData[2].fParamOffset = sizeof(float) * tableSize;
test_identity_xform(r, gammas, true);
- test_identity_xform_A2B(r, kNonStandard_SkGammaNamed, gammas);
+ test_identity_xform_A2B(r, kNonStandard_SkGammaNamed, gammas, tolerance);
}
DEF_TEST(ColorSpaceXform_A2BCLUT, r) {