aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--src/jumper/SkJumper.cpp1
-rw-r--r--src/jumper/SkJumper.h5
-rw-r--r--src/jumper/SkJumper_generated.S2680
-rw-r--r--src/jumper/SkJumper_generated_win.S1942
-rw-r--r--src/jumper/SkJumper_stages.cpp35
-rw-r--r--src/jumper/SkJumper_vectors.h2
-rw-r--r--tests/ColorSpaceXformTest.cpp5
7 files changed, 4245 insertions, 425 deletions
diff --git a/src/jumper/SkJumper.cpp b/src/jumper/SkJumper.cpp
index 7c72e85711..89fda93206 100644
--- a/src/jumper/SkJumper.cpp
+++ b/src/jumper/SkJumper.cpp
@@ -92,6 +92,7 @@ static K kConstants = {
M(byte_tables) \
M(byte_tables_rgb) \
M(table_r) M(table_g) M(table_b) M(table_a) \
+ M(parametric_r) M(parametric_g) M(parametric_b) M(parametric_a) \
M(load_a8) \
M(gather_a8) \
M(store_a8) \
diff --git a/src/jumper/SkJumper.h b/src/jumper/SkJumper.h
index 2f0db4e819..d4ab9684a4 100644
--- a/src/jumper/SkJumper.h
+++ b/src/jumper/SkJumper.h
@@ -96,4 +96,9 @@ struct SkJumper_TableCtx {
int size;
};
+// This should line up with the memory layout of SkColorSpaceTransferFn.
+struct SkJumper_ParametricTransferFunction {
+ float G, A,B,C,D,E,F;
+};
+
#endif//SkJumper_DEFINED
diff --git a/src/jumper/SkJumper_generated.S b/src/jumper/SkJumper_generated.S
index 8ac3441ee4..f12e5e252d 100644
--- a/src/jumper/SkJumper_generated.S
+++ b/src/jumper/SkJumper_generated.S
@@ -1897,6 +1897,318 @@ _sk_table_a_aarch64:
.long 0x6e1c0623 // mov v3.s[3], v17.s[0]
.long 0xd61f0060 // br x3
+HIDDEN _sk_parametric_r_aarch64
+.globl _sk_parametric_r_aarch64
+FUNCTION(_sk_parametric_r_aarch64)
+_sk_parametric_r_aarch64:
+ .long 0xf9400028 // ldr x8, [x1]
+ .long 0x4f016696 // movi v22.4s, #0x34, lsl #24
+ .long 0x91004109 // add x9, x8, #0x10
+ .long 0x9100610a // add x10, x8, #0x18
+ .long 0x4d40c933 // ld1r {v19.4s}, [x9]
+ .long 0xaa0803e9 // mov x9, x8
+ .long 0xbd400d12 // ldr s18, [x8, #12]
+ .long 0x4d40c950 // ld1r {v16.4s}, [x10]
+ .long 0x4ddfc931 // ld1r {v17.4s}, [x9], #4
+ .long 0x9100210a // add x10, x8, #0x8
+ .long 0x4d40c954 // ld1r {v20.4s}, [x10]
+ .long 0x4f921010 // fmla v16.4s, v0.4s, v18.s[0]
+ .long 0xbd400135 // ldr s21, [x9]
+ .long 0x52b85fc9 // mov w9, #0xc2fe0000
+ .long 0x4e040d37 // dup v23.4s, w9
+ .long 0x52a80629 // mov w9, #0x40310000
+ .long 0x72922549 // movk w9, #0x912a
+ .long 0x4f951014 // fmla v20.4s, v0.4s, v21.s[0]
+ .long 0x6e20e660 // fcmge v0.4s, v19.4s, v0.4s
+ .long 0x4e040d33 // dup v19.4s, w9
+ .long 0x52a7f7e9 // mov w9, #0x3fbf0000
+ .long 0x4f03d7f2 // movi v18.4s, #0x7f, msl #16
+ .long 0x7297eea9 // movk w9, #0xbf75
+ .long 0x4e21da95 // scvtf v21.4s, v20.4s
+ .long 0x4e321e92 // and v18.16b, v20.16b, v18.16b
+ .long 0x4e040d34 // dup v20.4s, w9
+ .long 0x52a7d689 // mov w9, #0x3eb40000
+ .long 0x72889f29 // movk w9, #0x44f9
+ .long 0x4e35ced7 // fmla v23.4s, v22.4s, v21.4s
+ .long 0x4e040d35 // dup v21.4s, w9
+ .long 0x52a7fb89 // mov w9, #0x3fdc0000
+ .long 0x4e33d6f3 // fadd v19.4s, v23.4s, v19.4s
+ .long 0x729d3469 // movk w9, #0xe9a3
+ .long 0x4f0177f2 // orr v18.4s, #0x3f, lsl #24
+ .long 0x4eb4ce53 // fmls v19.4s, v18.4s, v20.4s
+ .long 0x4e040d34 // dup v20.4s, w9
+ .long 0x52a85e49 // mov w9, #0x42f20000
+ .long 0x72918a29 // movk w9, #0x8c51
+ .long 0x4e35d652 // fadd v18.4s, v18.4s, v21.4s
+ .long 0x4e040d35 // dup v21.4s, w9
+ .long 0x52a7f7c9 // mov w9, #0x3fbe0000
+ .long 0x729791a9 // movk w9, #0xbc8d
+ .long 0x6e32fe92 // fdiv v18.4s, v20.4s, v18.4s
+ .long 0x4e040d34 // dup v20.4s, w9
+ .long 0x52a81349 // mov w9, #0x409a0000
+ .long 0x4eb2d672 // fsub v18.4s, v19.4s, v18.4s
+ .long 0x729ebf09 // movk w9, #0xf5f8
+ .long 0x6e32de31 // fmul v17.4s, v17.4s, v18.4s
+ .long 0x4e040d33 // dup v19.4s, w9
+ .long 0x52a83ba9 // mov w9, #0x41dd0000
+ .long 0x4e219a32 // frintm v18.4s, v17.4s
+ .long 0x729a5fc9 // movk w9, #0xd2fe
+ .long 0x4e35d635 // fadd v21.4s, v17.4s, v21.4s
+ .long 0x4eb2d631 // fsub v17.4s, v17.4s, v18.4s
+ .long 0x4eb4ce35 // fmls v21.4s, v17.4s, v20.4s
+ .long 0x4eb1d671 // fsub v17.4s, v19.4s, v17.4s
+ .long 0x4e040d33 // dup v19.4s, w9
+ .long 0x91005108 // add x8, x8, #0x14
+ .long 0x6e31fe71 // fdiv v17.4s, v19.4s, v17.4s
+ .long 0x4e31d6b1 // fadd v17.4s, v21.4s, v17.4s
+ .long 0x4d40c915 // ld1r {v21.4s}, [x8]
+ .long 0x4f026572 // movi v18.4s, #0x4b, lsl #24
+ .long 0x6e32de31 // fmul v17.4s, v17.4s, v18.4s
+ .long 0x6e21aa31 // fcvtnu v17.4s, v17.4s
+ .long 0xf9400423 // ldr x3, [x1, #8]
+ .long 0x4e35d631 // fadd v17.4s, v17.4s, v21.4s
+ .long 0x6f00e414 // movi v20.2d, #0x0
+ .long 0x6e711e00 // bsl v0.16b, v16.16b, v17.16b
+ .long 0x4f03f613 // fmov v19.4s, #1.000000000000000000e+00
+ .long 0x4e34f400 // fmax v0.4s, v0.4s, v20.4s
+ .long 0x4eb3f400 // fmin v0.4s, v0.4s, v19.4s
+ .long 0x91004021 // add x1, x1, #0x10
+ .long 0xd61f0060 // br x3
+
+HIDDEN _sk_parametric_g_aarch64
+.globl _sk_parametric_g_aarch64
+FUNCTION(_sk_parametric_g_aarch64)
+_sk_parametric_g_aarch64:
+ .long 0xf9400028 // ldr x8, [x1]
+ .long 0x4f016696 // movi v22.4s, #0x34, lsl #24
+ .long 0x91004109 // add x9, x8, #0x10
+ .long 0x9100610a // add x10, x8, #0x18
+ .long 0x4d40c933 // ld1r {v19.4s}, [x9]
+ .long 0xaa0803e9 // mov x9, x8
+ .long 0xbd400d12 // ldr s18, [x8, #12]
+ .long 0x4d40c950 // ld1r {v16.4s}, [x10]
+ .long 0x4ddfc931 // ld1r {v17.4s}, [x9], #4
+ .long 0x9100210a // add x10, x8, #0x8
+ .long 0x4d40c954 // ld1r {v20.4s}, [x10]
+ .long 0x4f921030 // fmla v16.4s, v1.4s, v18.s[0]
+ .long 0xbd400135 // ldr s21, [x9]
+ .long 0x52b85fc9 // mov w9, #0xc2fe0000
+ .long 0x4e040d37 // dup v23.4s, w9
+ .long 0x52a80629 // mov w9, #0x40310000
+ .long 0x72922549 // movk w9, #0x912a
+ .long 0x4f951034 // fmla v20.4s, v1.4s, v21.s[0]
+ .long 0x6e21e661 // fcmge v1.4s, v19.4s, v1.4s
+ .long 0x4e040d33 // dup v19.4s, w9
+ .long 0x52a7f7e9 // mov w9, #0x3fbf0000
+ .long 0x4f03d7f2 // movi v18.4s, #0x7f, msl #16
+ .long 0x7297eea9 // movk w9, #0xbf75
+ .long 0x4e21da95 // scvtf v21.4s, v20.4s
+ .long 0x4e321e92 // and v18.16b, v20.16b, v18.16b
+ .long 0x4e040d34 // dup v20.4s, w9
+ .long 0x52a7d689 // mov w9, #0x3eb40000
+ .long 0x72889f29 // movk w9, #0x44f9
+ .long 0x4e35ced7 // fmla v23.4s, v22.4s, v21.4s
+ .long 0x4e040d35 // dup v21.4s, w9
+ .long 0x52a7fb89 // mov w9, #0x3fdc0000
+ .long 0x4e33d6f3 // fadd v19.4s, v23.4s, v19.4s
+ .long 0x729d3469 // movk w9, #0xe9a3
+ .long 0x4f0177f2 // orr v18.4s, #0x3f, lsl #24
+ .long 0x4eb4ce53 // fmls v19.4s, v18.4s, v20.4s
+ .long 0x4e040d34 // dup v20.4s, w9
+ .long 0x52a85e49 // mov w9, #0x42f20000
+ .long 0x72918a29 // movk w9, #0x8c51
+ .long 0x4e35d652 // fadd v18.4s, v18.4s, v21.4s
+ .long 0x4e040d35 // dup v21.4s, w9
+ .long 0x52a7f7c9 // mov w9, #0x3fbe0000
+ .long 0x729791a9 // movk w9, #0xbc8d
+ .long 0x6e32fe92 // fdiv v18.4s, v20.4s, v18.4s
+ .long 0x4e040d34 // dup v20.4s, w9
+ .long 0x52a81349 // mov w9, #0x409a0000
+ .long 0x4eb2d672 // fsub v18.4s, v19.4s, v18.4s
+ .long 0x729ebf09 // movk w9, #0xf5f8
+ .long 0x6e32de31 // fmul v17.4s, v17.4s, v18.4s
+ .long 0x4e040d33 // dup v19.4s, w9
+ .long 0x52a83ba9 // mov w9, #0x41dd0000
+ .long 0x4e219a32 // frintm v18.4s, v17.4s
+ .long 0x729a5fc9 // movk w9, #0xd2fe
+ .long 0x4e35d635 // fadd v21.4s, v17.4s, v21.4s
+ .long 0x4eb2d631 // fsub v17.4s, v17.4s, v18.4s
+ .long 0x4eb4ce35 // fmls v21.4s, v17.4s, v20.4s
+ .long 0x4eb1d671 // fsub v17.4s, v19.4s, v17.4s
+ .long 0x4e040d33 // dup v19.4s, w9
+ .long 0x91005108 // add x8, x8, #0x14
+ .long 0x6e31fe71 // fdiv v17.4s, v19.4s, v17.4s
+ .long 0x4e31d6b1 // fadd v17.4s, v21.4s, v17.4s
+ .long 0x4d40c915 // ld1r {v21.4s}, [x8]
+ .long 0x4f026572 // movi v18.4s, #0x4b, lsl #24
+ .long 0x6e32de31 // fmul v17.4s, v17.4s, v18.4s
+ .long 0x6e21aa31 // fcvtnu v17.4s, v17.4s
+ .long 0xf9400423 // ldr x3, [x1, #8]
+ .long 0x4e35d631 // fadd v17.4s, v17.4s, v21.4s
+ .long 0x6f00e414 // movi v20.2d, #0x0
+ .long 0x6e711e01 // bsl v1.16b, v16.16b, v17.16b
+ .long 0x4f03f613 // fmov v19.4s, #1.000000000000000000e+00
+ .long 0x4e34f421 // fmax v1.4s, v1.4s, v20.4s
+ .long 0x4eb3f421 // fmin v1.4s, v1.4s, v19.4s
+ .long 0x91004021 // add x1, x1, #0x10
+ .long 0xd61f0060 // br x3
+
+HIDDEN _sk_parametric_b_aarch64
+.globl _sk_parametric_b_aarch64
+FUNCTION(_sk_parametric_b_aarch64)
+_sk_parametric_b_aarch64:
+ .long 0xf9400028 // ldr x8, [x1]
+ .long 0x4f016696 // movi v22.4s, #0x34, lsl #24
+ .long 0x91004109 // add x9, x8, #0x10
+ .long 0x9100610a // add x10, x8, #0x18
+ .long 0x4d40c933 // ld1r {v19.4s}, [x9]
+ .long 0xaa0803e9 // mov x9, x8
+ .long 0xbd400d12 // ldr s18, [x8, #12]
+ .long 0x4d40c950 // ld1r {v16.4s}, [x10]
+ .long 0x4ddfc931 // ld1r {v17.4s}, [x9], #4
+ .long 0x9100210a // add x10, x8, #0x8
+ .long 0x4d40c954 // ld1r {v20.4s}, [x10]
+ .long 0x4f921050 // fmla v16.4s, v2.4s, v18.s[0]
+ .long 0xbd400135 // ldr s21, [x9]
+ .long 0x52b85fc9 // mov w9, #0xc2fe0000
+ .long 0x4e040d37 // dup v23.4s, w9
+ .long 0x52a80629 // mov w9, #0x40310000
+ .long 0x72922549 // movk w9, #0x912a
+ .long 0x4f951054 // fmla v20.4s, v2.4s, v21.s[0]
+ .long 0x6e22e662 // fcmge v2.4s, v19.4s, v2.4s
+ .long 0x4e040d33 // dup v19.4s, w9
+ .long 0x52a7f7e9 // mov w9, #0x3fbf0000
+ .long 0x4f03d7f2 // movi v18.4s, #0x7f, msl #16
+ .long 0x7297eea9 // movk w9, #0xbf75
+ .long 0x4e21da95 // scvtf v21.4s, v20.4s
+ .long 0x4e321e92 // and v18.16b, v20.16b, v18.16b
+ .long 0x4e040d34 // dup v20.4s, w9
+ .long 0x52a7d689 // mov w9, #0x3eb40000
+ .long 0x72889f29 // movk w9, #0x44f9
+ .long 0x4e35ced7 // fmla v23.4s, v22.4s, v21.4s
+ .long 0x4e040d35 // dup v21.4s, w9
+ .long 0x52a7fb89 // mov w9, #0x3fdc0000
+ .long 0x4e33d6f3 // fadd v19.4s, v23.4s, v19.4s
+ .long 0x729d3469 // movk w9, #0xe9a3
+ .long 0x4f0177f2 // orr v18.4s, #0x3f, lsl #24
+ .long 0x4eb4ce53 // fmls v19.4s, v18.4s, v20.4s
+ .long 0x4e040d34 // dup v20.4s, w9
+ .long 0x52a85e49 // mov w9, #0x42f20000
+ .long 0x72918a29 // movk w9, #0x8c51
+ .long 0x4e35d652 // fadd v18.4s, v18.4s, v21.4s
+ .long 0x4e040d35 // dup v21.4s, w9
+ .long 0x52a7f7c9 // mov w9, #0x3fbe0000
+ .long 0x729791a9 // movk w9, #0xbc8d
+ .long 0x6e32fe92 // fdiv v18.4s, v20.4s, v18.4s
+ .long 0x4e040d34 // dup v20.4s, w9
+ .long 0x52a81349 // mov w9, #0x409a0000
+ .long 0x4eb2d672 // fsub v18.4s, v19.4s, v18.4s
+ .long 0x729ebf09 // movk w9, #0xf5f8
+ .long 0x6e32de31 // fmul v17.4s, v17.4s, v18.4s
+ .long 0x4e040d33 // dup v19.4s, w9
+ .long 0x52a83ba9 // mov w9, #0x41dd0000
+ .long 0x4e219a32 // frintm v18.4s, v17.4s
+ .long 0x729a5fc9 // movk w9, #0xd2fe
+ .long 0x4e35d635 // fadd v21.4s, v17.4s, v21.4s
+ .long 0x4eb2d631 // fsub v17.4s, v17.4s, v18.4s
+ .long 0x4eb4ce35 // fmls v21.4s, v17.4s, v20.4s
+ .long 0x4eb1d671 // fsub v17.4s, v19.4s, v17.4s
+ .long 0x4e040d33 // dup v19.4s, w9
+ .long 0x91005108 // add x8, x8, #0x14
+ .long 0x6e31fe71 // fdiv v17.4s, v19.4s, v17.4s
+ .long 0x4e31d6b1 // fadd v17.4s, v21.4s, v17.4s
+ .long 0x4d40c915 // ld1r {v21.4s}, [x8]
+ .long 0x4f026572 // movi v18.4s, #0x4b, lsl #24
+ .long 0x6e32de31 // fmul v17.4s, v17.4s, v18.4s
+ .long 0x6e21aa31 // fcvtnu v17.4s, v17.4s
+ .long 0xf9400423 // ldr x3, [x1, #8]
+ .long 0x4e35d631 // fadd v17.4s, v17.4s, v21.4s
+ .long 0x6f00e414 // movi v20.2d, #0x0
+ .long 0x6e711e02 // bsl v2.16b, v16.16b, v17.16b
+ .long 0x4f03f613 // fmov v19.4s, #1.000000000000000000e+00
+ .long 0x4e34f442 // fmax v2.4s, v2.4s, v20.4s
+ .long 0x4eb3f442 // fmin v2.4s, v2.4s, v19.4s
+ .long 0x91004021 // add x1, x1, #0x10
+ .long 0xd61f0060 // br x3
+
+HIDDEN _sk_parametric_a_aarch64
+.globl _sk_parametric_a_aarch64
+FUNCTION(_sk_parametric_a_aarch64)
+_sk_parametric_a_aarch64:
+ .long 0xf9400028 // ldr x8, [x1]
+ .long 0x4f016696 // movi v22.4s, #0x34, lsl #24
+ .long 0x91004109 // add x9, x8, #0x10
+ .long 0x9100610a // add x10, x8, #0x18
+ .long 0x4d40c933 // ld1r {v19.4s}, [x9]
+ .long 0xaa0803e9 // mov x9, x8
+ .long 0xbd400d12 // ldr s18, [x8, #12]
+ .long 0x4d40c950 // ld1r {v16.4s}, [x10]
+ .long 0x4ddfc931 // ld1r {v17.4s}, [x9], #4
+ .long 0x9100210a // add x10, x8, #0x8
+ .long 0x4d40c954 // ld1r {v20.4s}, [x10]
+ .long 0x4f921070 // fmla v16.4s, v3.4s, v18.s[0]
+ .long 0xbd400135 // ldr s21, [x9]
+ .long 0x52b85fc9 // mov w9, #0xc2fe0000
+ .long 0x4e040d37 // dup v23.4s, w9
+ .long 0x52a80629 // mov w9, #0x40310000
+ .long 0x72922549 // movk w9, #0x912a
+ .long 0x4f951074 // fmla v20.4s, v3.4s, v21.s[0]
+ .long 0x6e23e663 // fcmge v3.4s, v19.4s, v3.4s
+ .long 0x4e040d33 // dup v19.4s, w9
+ .long 0x52a7f7e9 // mov w9, #0x3fbf0000
+ .long 0x4f03d7f2 // movi v18.4s, #0x7f, msl #16
+ .long 0x7297eea9 // movk w9, #0xbf75
+ .long 0x4e21da95 // scvtf v21.4s, v20.4s
+ .long 0x4e321e92 // and v18.16b, v20.16b, v18.16b
+ .long 0x4e040d34 // dup v20.4s, w9
+ .long 0x52a7d689 // mov w9, #0x3eb40000
+ .long 0x72889f29 // movk w9, #0x44f9
+ .long 0x4e35ced7 // fmla v23.4s, v22.4s, v21.4s
+ .long 0x4e040d35 // dup v21.4s, w9
+ .long 0x52a7fb89 // mov w9, #0x3fdc0000
+ .long 0x4e33d6f3 // fadd v19.4s, v23.4s, v19.4s
+ .long 0x729d3469 // movk w9, #0xe9a3
+ .long 0x4f0177f2 // orr v18.4s, #0x3f, lsl #24
+ .long 0x4eb4ce53 // fmls v19.4s, v18.4s, v20.4s
+ .long 0x4e040d34 // dup v20.4s, w9
+ .long 0x52a85e49 // mov w9, #0x42f20000
+ .long 0x72918a29 // movk w9, #0x8c51
+ .long 0x4e35d652 // fadd v18.4s, v18.4s, v21.4s
+ .long 0x4e040d35 // dup v21.4s, w9
+ .long 0x52a7f7c9 // mov w9, #0x3fbe0000
+ .long 0x729791a9 // movk w9, #0xbc8d
+ .long 0x6e32fe92 // fdiv v18.4s, v20.4s, v18.4s
+ .long 0x4e040d34 // dup v20.4s, w9
+ .long 0x52a81349 // mov w9, #0x409a0000
+ .long 0x4eb2d672 // fsub v18.4s, v19.4s, v18.4s
+ .long 0x729ebf09 // movk w9, #0xf5f8
+ .long 0x6e32de31 // fmul v17.4s, v17.4s, v18.4s
+ .long 0x4e040d33 // dup v19.4s, w9
+ .long 0x52a83ba9 // mov w9, #0x41dd0000
+ .long 0x4e219a32 // frintm v18.4s, v17.4s
+ .long 0x729a5fc9 // movk w9, #0xd2fe
+ .long 0x4e35d635 // fadd v21.4s, v17.4s, v21.4s
+ .long 0x4eb2d631 // fsub v17.4s, v17.4s, v18.4s
+ .long 0x4eb4ce35 // fmls v21.4s, v17.4s, v20.4s
+ .long 0x4eb1d671 // fsub v17.4s, v19.4s, v17.4s
+ .long 0x4e040d33 // dup v19.4s, w9
+ .long 0x91005108 // add x8, x8, #0x14
+ .long 0x6e31fe71 // fdiv v17.4s, v19.4s, v17.4s
+ .long 0x4e31d6b1 // fadd v17.4s, v21.4s, v17.4s
+ .long 0x4d40c915 // ld1r {v21.4s}, [x8]
+ .long 0x4f026572 // movi v18.4s, #0x4b, lsl #24
+ .long 0x6e32de31 // fmul v17.4s, v17.4s, v18.4s
+ .long 0x6e21aa31 // fcvtnu v17.4s, v17.4s
+ .long 0xf9400423 // ldr x3, [x1, #8]
+ .long 0x4e35d631 // fadd v17.4s, v17.4s, v21.4s
+ .long 0x6f00e414 // movi v20.2d, #0x0
+ .long 0x6e711e03 // bsl v3.16b, v16.16b, v17.16b
+ .long 0x4f03f613 // fmov v19.4s, #1.000000000000000000e+00
+ .long 0x4e34f463 // fmax v3.4s, v3.4s, v20.4s
+ .long 0x4eb3f463 // fmin v3.4s, v3.4s, v19.4s
+ .long 0x91004021 // add x1, x1, #0x10
+ .long 0xd61f0060 // br x3
+
HIDDEN _sk_load_a8_aarch64
.globl _sk_load_a8_aarch64
FUNCTION(_sk_load_a8_aarch64)
@@ -2049,9 +2361,9 @@ FUNCTION(_sk_gather_i8_aarch64)
_sk_gather_i8_aarch64:
.long 0xaa0103e8 // mov x8, x1
.long 0xf8408429 // ldr x9, [x1], #8
- .long 0xb4000069 // cbz x9, 1ae0 <sk_gather_i8_aarch64+0x14>
+ .long 0xb4000069 // cbz x9, 1f70 <sk_gather_i8_aarch64+0x14>
.long 0xaa0903ea // mov x10, x9
- .long 0x14000003 // b 1ae8 <sk_gather_i8_aarch64+0x1c>
+ .long 0x14000003 // b 1f78 <sk_gather_i8_aarch64+0x1c>
.long 0xf940050a // ldr x10, [x8, #8]
.long 0x91004101 // add x1, x8, #0x10
.long 0xf8410548 // ldr x8, [x10], #16
@@ -2900,7 +3212,7 @@ _sk_linear_gradient_aarch64:
.long 0x4d40c902 // ld1r {v2.4s}, [x8]
.long 0xf9400128 // ldr x8, [x9]
.long 0x4d40c943 // ld1r {v3.4s}, [x10]
- .long 0xb40006c8 // cbz x8, 26b4 <sk_linear_gradient_aarch64+0x100>
+ .long 0xb40006c8 // cbz x8, 2b44 <sk_linear_gradient_aarch64+0x100>
.long 0x6dbf23e9 // stp d9, d8, [sp, #-16]!
.long 0xf9400529 // ldr x9, [x9, #8]
.long 0x6f00e413 // movi v19.2d, #0x0
@@ -2951,9 +3263,9 @@ _sk_linear_gradient_aarch64:
.long 0xd1000508 // sub x8, x8, #0x1
.long 0x6e771fd0 // bsl v16.16b, v30.16b, v23.16b
.long 0x91009129 // add x9, x9, #0x24
- .long 0xb5fffaa8 // cbnz x8, 25fc <sk_linear_gradient_aarch64+0x48>
+ .long 0xb5fffaa8 // cbnz x8, 2a8c <sk_linear_gradient_aarch64+0x48>
.long 0x6cc123e9 // ldp d9, d8, [sp], #16
- .long 0x14000005 // b 26c4 <sk_linear_gradient_aarch64+0x110>
+ .long 0x14000005 // b 2b54 <sk_linear_gradient_aarch64+0x110>
.long 0x6f00e414 // movi v20.2d, #0x0
.long 0x6f00e412 // movi v18.2d, #0x0
.long 0x6f00e411 // movi v17.2d, #0x0
@@ -5234,6 +5546,386 @@ _sk_table_a_vfp4:
.long 0xe8bd4010 // pop {r4, lr}
.long 0xe12fff1c // bx ip
+HIDDEN _sk_parametric_r_vfp4
+.globl _sk_parametric_r_vfp4
+FUNCTION(_sk_parametric_r_vfp4)
+_sk_parametric_r_vfp4:
+ .long 0xe92d4800 // push {fp, lr}
+ .long 0xed2d8b06 // vpush {d8-d10}
+ .long 0xe591e000 // ldr lr, [r1]
+ .long 0xeddf3b43 // vldr d19, [pc, #268]
+ .long 0xed9f8a52 // vldr s16, [pc, #328]
+ .long 0xe1a0300e // mov r3, lr
+ .long 0xeddf4b46 // vldr d20, [pc, #280]
+ .long 0xf4e30c9d // vld1.32 {d16[]}, [r3 :32]!
+ .long 0xe591c004 // ldr ip, [r1, #4]
+ .long 0xe2811008 // add r1, r1, #8
+ .long 0xf4e31c9f // vld1.32 {d17[]}, [r3 :32]
+ .long 0xe28e3008 // add r3, lr, #8
+ .long 0xf4e32c9f // vld1.32 {d18[]}, [r3 :32]
+ .long 0xe28e300c // add r3, lr, #12
+ .long 0xf2412c90 // vfma.f32 d18, d17, d0
+ .long 0xf2c71d1f // vmov.i32 d17, #8388607
+ .long 0xf24211b1 // vand d17, d18, d17
+ .long 0xf2c3171f // vorr.i32 d17, #1056964608
+ .long 0xf3fb2622 // vcvt.f32.s32 d18, d18
+ .long 0xf2019da3 // vadd.f32 d9, d17, d19
+ .long 0xf2c33614 // vmov.i32 d19, #872415232
+ .long 0xf3422db3 // vmul.f32 d18, d18, d19
+ .long 0xeddf3b32 // vldr d19, [pc, #200]
+ .long 0xeec8aa29 // vdiv.f32 s21, s16, s19
+ .long 0xee88aa09 // vdiv.f32 s20, s16, s18
+ .long 0xf2422da3 // vadd.f32 d18, d18, d19
+ .long 0xeddf3b30 // vldr d19, [pc, #192]
+ .long 0xed9f8a3c // vldr s16, [pc, #240]
+ .long 0xf3411db3 // vmul.f32 d17, d17, d19
+ .long 0xf2c03010 // vmov.i32 d19, #0
+ .long 0xf2422da4 // vadd.f32 d18, d18, d20
+ .long 0xeddf4b2f // vldr d20, [pc, #188]
+ .long 0xf2621da1 // vsub.f32 d17, d18, d17
+ .long 0xf2611d8a // vsub.f32 d17, d17, d10
+ .long 0xf3400db1 // vmul.f32 d16, d16, d17
+ .long 0xf3fb1720 // vcvt.s32.f32 d17, d16
+ .long 0xf3fb1621 // vcvt.f32.s32 d17, d17
+ .long 0xf3612ea0 // vcgt.f32 d18, d17, d16
+ .long 0xf35421b3 // vbsl d18, d20, d19
+ .long 0xeddf4b2d // vldr d20, [pc, #180]
+ .long 0xf2611da2 // vsub.f32 d17, d17, d18
+ .long 0xeddf2b27 // vldr d18, [pc, #156]
+ .long 0xf2601da1 // vsub.f32 d17, d16, d17
+ .long 0xf2400da4 // vadd.f32 d16, d16, d20
+ .long 0xf2229da1 // vsub.f32 d9, d18, d17
+ .long 0xeddf2b25 // vldr d18, [pc, #148]
+ .long 0xf3411db2 // vmul.f32 d17, d17, d18
+ .long 0xf2c3261f // vmov.i32 d18, #1056964608
+ .long 0xeec8aa29 // vdiv.f32 s21, s16, s19
+ .long 0xee88aa09 // vdiv.f32 s20, s16, s18
+ .long 0xf2600da1 // vsub.f32 d16, d16, d17
+ .long 0xf2c4161b // vmov.i32 d17, #1258291200
+ .long 0xf2400d8a // vadd.f32 d16, d16, d10
+ .long 0xf2402cb1 // vfma.f32 d18, d16, d17
+ .long 0xf4e30c9f // vld1.32 {d16[]}, [r3 :32]
+ .long 0xe28e3018 // add r3, lr, #24
+ .long 0xf4e31c9f // vld1.32 {d17[]}, [r3 :32]
+ .long 0xe28e3010 // add r3, lr, #16
+ .long 0xf2401c90 // vfma.f32 d17, d16, d0
+ .long 0xf4e30c9f // vld1.32 {d16[]}, [r3 :32]
+ .long 0xe28e3014 // add r3, lr, #20
+ .long 0xf3400e80 // vcge.f32 d16, d16, d0
+ .long 0xf4e34c9f // vld1.32 {d20[]}, [r3 :32]
+ .long 0xf3fb27a2 // vcvt.u32.f32 d18, d18
+ .long 0xf2442da2 // vadd.f32 d18, d20, d18
+ .long 0xf35101b2 // vbsl d16, d17, d18
+ .long 0xf2c71f10 // vmov.f32 d17, #1
+ .long 0xf2400fa3 // vmax.f32 d16, d16, d19
+ .long 0xf2200fa1 // vmin.f32 d0, d16, d17
+ .long 0xecbd8b06 // vpop {d8-d10}
+ .long 0xe8bd4800 // pop {fp, lr}
+ .long 0xe12fff1c // bx ip
+ .long 0x3eb444f9 // .word 0x3eb444f9
+ .long 0x3eb444f9 // .word 0x3eb444f9
+ .long 0xc2fe0000 // .word 0xc2fe0000
+ .long 0xc2fe0000 // .word 0xc2fe0000
+ .long 0x3fbfbf75 // .word 0x3fbfbf75
+ .long 0x3fbfbf75 // .word 0x3fbfbf75
+ .long 0x4031912a // .word 0x4031912a
+ .long 0x4031912a // .word 0x4031912a
+ .long 0x3f800000 // .word 0x3f800000
+ .long 0x3f800000 // .word 0x3f800000
+ .long 0x409af5f8 // .word 0x409af5f8
+ .long 0x409af5f8 // .word 0x409af5f8
+ .long 0x3fbebc8d // .word 0x3fbebc8d
+ .long 0x3fbebc8d // .word 0x3fbebc8d
+ .long 0x42f28c51 // .word 0x42f28c51
+ .long 0x42f28c51 // .word 0x42f28c51
+ .long 0x3fdce9a3 // .word 0x3fdce9a3
+ .long 0x41ddd2fe // .word 0x41ddd2fe
+
+HIDDEN _sk_parametric_g_vfp4
+.globl _sk_parametric_g_vfp4
+FUNCTION(_sk_parametric_g_vfp4)
+_sk_parametric_g_vfp4:
+ .long 0xe92d4800 // push {fp, lr}
+ .long 0xed2d8b06 // vpush {d8-d10}
+ .long 0xe591e000 // ldr lr, [r1]
+ .long 0xeddf3b43 // vldr d19, [pc, #268]
+ .long 0xed9f8a52 // vldr s16, [pc, #328]
+ .long 0xe1a0300e // mov r3, lr
+ .long 0xeddf4b46 // vldr d20, [pc, #280]
+ .long 0xf4e30c9d // vld1.32 {d16[]}, [r3 :32]!
+ .long 0xe591c004 // ldr ip, [r1, #4]
+ .long 0xe2811008 // add r1, r1, #8
+ .long 0xf4e31c9f // vld1.32 {d17[]}, [r3 :32]
+ .long 0xe28e3008 // add r3, lr, #8
+ .long 0xf4e32c9f // vld1.32 {d18[]}, [r3 :32]
+ .long 0xe28e300c // add r3, lr, #12
+ .long 0xf2412c91 // vfma.f32 d18, d17, d1
+ .long 0xf2c71d1f // vmov.i32 d17, #8388607
+ .long 0xf24211b1 // vand d17, d18, d17
+ .long 0xf2c3171f // vorr.i32 d17, #1056964608
+ .long 0xf3fb2622 // vcvt.f32.s32 d18, d18
+ .long 0xf2019da3 // vadd.f32 d9, d17, d19
+ .long 0xf2c33614 // vmov.i32 d19, #872415232
+ .long 0xf3422db3 // vmul.f32 d18, d18, d19
+ .long 0xeddf3b32 // vldr d19, [pc, #200]
+ .long 0xeec8aa29 // vdiv.f32 s21, s16, s19
+ .long 0xee88aa09 // vdiv.f32 s20, s16, s18
+ .long 0xf2422da3 // vadd.f32 d18, d18, d19
+ .long 0xeddf3b30 // vldr d19, [pc, #192]
+ .long 0xed9f8a3c // vldr s16, [pc, #240]
+ .long 0xf3411db3 // vmul.f32 d17, d17, d19
+ .long 0xf2c03010 // vmov.i32 d19, #0
+ .long 0xf2422da4 // vadd.f32 d18, d18, d20
+ .long 0xeddf4b2f // vldr d20, [pc, #188]
+ .long 0xf2621da1 // vsub.f32 d17, d18, d17
+ .long 0xf2611d8a // vsub.f32 d17, d17, d10
+ .long 0xf3400db1 // vmul.f32 d16, d16, d17
+ .long 0xf3fb1720 // vcvt.s32.f32 d17, d16
+ .long 0xf3fb1621 // vcvt.f32.s32 d17, d17
+ .long 0xf3612ea0 // vcgt.f32 d18, d17, d16
+ .long 0xf35421b3 // vbsl d18, d20, d19
+ .long 0xeddf4b2d // vldr d20, [pc, #180]
+ .long 0xf2611da2 // vsub.f32 d17, d17, d18
+ .long 0xeddf2b27 // vldr d18, [pc, #156]
+ .long 0xf2601da1 // vsub.f32 d17, d16, d17
+ .long 0xf2400da4 // vadd.f32 d16, d16, d20
+ .long 0xf2229da1 // vsub.f32 d9, d18, d17
+ .long 0xeddf2b25 // vldr d18, [pc, #148]
+ .long 0xf3411db2 // vmul.f32 d17, d17, d18
+ .long 0xf2c3261f // vmov.i32 d18, #1056964608
+ .long 0xeec8aa29 // vdiv.f32 s21, s16, s19
+ .long 0xee88aa09 // vdiv.f32 s20, s16, s18
+ .long 0xf2600da1 // vsub.f32 d16, d16, d17
+ .long 0xf2c4161b // vmov.i32 d17, #1258291200
+ .long 0xf2400d8a // vadd.f32 d16, d16, d10
+ .long 0xf2402cb1 // vfma.f32 d18, d16, d17
+ .long 0xf4e30c9f // vld1.32 {d16[]}, [r3 :32]
+ .long 0xe28e3018 // add r3, lr, #24
+ .long 0xf4e31c9f // vld1.32 {d17[]}, [r3 :32]
+ .long 0xe28e3010 // add r3, lr, #16
+ .long 0xf2401c91 // vfma.f32 d17, d16, d1
+ .long 0xf4e30c9f // vld1.32 {d16[]}, [r3 :32]
+ .long 0xe28e3014 // add r3, lr, #20
+ .long 0xf3400e81 // vcge.f32 d16, d16, d1
+ .long 0xf4e34c9f // vld1.32 {d20[]}, [r3 :32]
+ .long 0xf3fb27a2 // vcvt.u32.f32 d18, d18
+ .long 0xf2442da2 // vadd.f32 d18, d20, d18
+ .long 0xf35101b2 // vbsl d16, d17, d18
+ .long 0xf2c71f10 // vmov.f32 d17, #1
+ .long 0xf2400fa3 // vmax.f32 d16, d16, d19
+ .long 0xf2201fa1 // vmin.f32 d1, d16, d17
+ .long 0xecbd8b06 // vpop {d8-d10}
+ .long 0xe8bd4800 // pop {fp, lr}
+ .long 0xe12fff1c // bx ip
+ .long 0x3eb444f9 // .word 0x3eb444f9
+ .long 0x3eb444f9 // .word 0x3eb444f9
+ .long 0xc2fe0000 // .word 0xc2fe0000
+ .long 0xc2fe0000 // .word 0xc2fe0000
+ .long 0x3fbfbf75 // .word 0x3fbfbf75
+ .long 0x3fbfbf75 // .word 0x3fbfbf75
+ .long 0x4031912a // .word 0x4031912a
+ .long 0x4031912a // .word 0x4031912a
+ .long 0x3f800000 // .word 0x3f800000
+ .long 0x3f800000 // .word 0x3f800000
+ .long 0x409af5f8 // .word 0x409af5f8
+ .long 0x409af5f8 // .word 0x409af5f8
+ .long 0x3fbebc8d // .word 0x3fbebc8d
+ .long 0x3fbebc8d // .word 0x3fbebc8d
+ .long 0x42f28c51 // .word 0x42f28c51
+ .long 0x42f28c51 // .word 0x42f28c51
+ .long 0x3fdce9a3 // .word 0x3fdce9a3
+ .long 0x41ddd2fe // .word 0x41ddd2fe
+
+HIDDEN _sk_parametric_b_vfp4
+.globl _sk_parametric_b_vfp4
+FUNCTION(_sk_parametric_b_vfp4)
+_sk_parametric_b_vfp4:
+ .long 0xe92d4800 // push {fp, lr}
+ .long 0xed2d8b06 // vpush {d8-d10}
+ .long 0xe591e000 // ldr lr, [r1]
+ .long 0xeddf3b43 // vldr d19, [pc, #268]
+ .long 0xed9f8a52 // vldr s16, [pc, #328]
+ .long 0xe1a0300e // mov r3, lr
+ .long 0xeddf4b46 // vldr d20, [pc, #280]
+ .long 0xf4e30c9d // vld1.32 {d16[]}, [r3 :32]!
+ .long 0xe591c004 // ldr ip, [r1, #4]
+ .long 0xe2811008 // add r1, r1, #8
+ .long 0xf4e31c9f // vld1.32 {d17[]}, [r3 :32]
+ .long 0xe28e3008 // add r3, lr, #8
+ .long 0xf4e32c9f // vld1.32 {d18[]}, [r3 :32]
+ .long 0xe28e300c // add r3, lr, #12
+ .long 0xf2412c92 // vfma.f32 d18, d17, d2
+ .long 0xf2c71d1f // vmov.i32 d17, #8388607
+ .long 0xf24211b1 // vand d17, d18, d17
+ .long 0xf2c3171f // vorr.i32 d17, #1056964608
+ .long 0xf3fb2622 // vcvt.f32.s32 d18, d18
+ .long 0xf2019da3 // vadd.f32 d9, d17, d19
+ .long 0xf2c33614 // vmov.i32 d19, #872415232
+ .long 0xf3422db3 // vmul.f32 d18, d18, d19
+ .long 0xeddf3b32 // vldr d19, [pc, #200]
+ .long 0xeec8aa29 // vdiv.f32 s21, s16, s19
+ .long 0xee88aa09 // vdiv.f32 s20, s16, s18
+ .long 0xf2422da3 // vadd.f32 d18, d18, d19
+ .long 0xeddf3b30 // vldr d19, [pc, #192]
+ .long 0xed9f8a3c // vldr s16, [pc, #240]
+ .long 0xf3411db3 // vmul.f32 d17, d17, d19
+ .long 0xf2c03010 // vmov.i32 d19, #0
+ .long 0xf2422da4 // vadd.f32 d18, d18, d20
+ .long 0xeddf4b2f // vldr d20, [pc, #188]
+ .long 0xf2621da1 // vsub.f32 d17, d18, d17
+ .long 0xf2611d8a // vsub.f32 d17, d17, d10
+ .long 0xf3400db1 // vmul.f32 d16, d16, d17
+ .long 0xf3fb1720 // vcvt.s32.f32 d17, d16
+ .long 0xf3fb1621 // vcvt.f32.s32 d17, d17
+ .long 0xf3612ea0 // vcgt.f32 d18, d17, d16
+ .long 0xf35421b3 // vbsl d18, d20, d19
+ .long 0xeddf4b2d // vldr d20, [pc, #180]
+ .long 0xf2611da2 // vsub.f32 d17, d17, d18
+ .long 0xeddf2b27 // vldr d18, [pc, #156]
+ .long 0xf2601da1 // vsub.f32 d17, d16, d17
+ .long 0xf2400da4 // vadd.f32 d16, d16, d20
+ .long 0xf2229da1 // vsub.f32 d9, d18, d17
+ .long 0xeddf2b25 // vldr d18, [pc, #148]
+ .long 0xf3411db2 // vmul.f32 d17, d17, d18
+ .long 0xf2c3261f // vmov.i32 d18, #1056964608
+ .long 0xeec8aa29 // vdiv.f32 s21, s16, s19
+ .long 0xee88aa09 // vdiv.f32 s20, s16, s18
+ .long 0xf2600da1 // vsub.f32 d16, d16, d17
+ .long 0xf2c4161b // vmov.i32 d17, #1258291200
+ .long 0xf2400d8a // vadd.f32 d16, d16, d10
+ .long 0xf2402cb1 // vfma.f32 d18, d16, d17
+ .long 0xf4e30c9f // vld1.32 {d16[]}, [r3 :32]
+ .long 0xe28e3018 // add r3, lr, #24
+ .long 0xf4e31c9f // vld1.32 {d17[]}, [r3 :32]
+ .long 0xe28e3010 // add r3, lr, #16
+ .long 0xf2401c92 // vfma.f32 d17, d16, d2
+ .long 0xf4e30c9f // vld1.32 {d16[]}, [r3 :32]
+ .long 0xe28e3014 // add r3, lr, #20
+ .long 0xf3400e82 // vcge.f32 d16, d16, d2
+ .long 0xf4e34c9f // vld1.32 {d20[]}, [r3 :32]
+ .long 0xf3fb27a2 // vcvt.u32.f32 d18, d18
+ .long 0xf2442da2 // vadd.f32 d18, d20, d18
+ .long 0xf35101b2 // vbsl d16, d17, d18
+ .long 0xf2c71f10 // vmov.f32 d17, #1
+ .long 0xf2400fa3 // vmax.f32 d16, d16, d19
+ .long 0xf2202fa1 // vmin.f32 d2, d16, d17
+ .long 0xecbd8b06 // vpop {d8-d10}
+ .long 0xe8bd4800 // pop {fp, lr}
+ .long 0xe12fff1c // bx ip
+ .long 0x3eb444f9 // .word 0x3eb444f9
+ .long 0x3eb444f9 // .word 0x3eb444f9
+ .long 0xc2fe0000 // .word 0xc2fe0000
+ .long 0xc2fe0000 // .word 0xc2fe0000
+ .long 0x3fbfbf75 // .word 0x3fbfbf75
+ .long 0x3fbfbf75 // .word 0x3fbfbf75
+ .long 0x4031912a // .word 0x4031912a
+ .long 0x4031912a // .word 0x4031912a
+ .long 0x3f800000 // .word 0x3f800000
+ .long 0x3f800000 // .word 0x3f800000
+ .long 0x409af5f8 // .word 0x409af5f8
+ .long 0x409af5f8 // .word 0x409af5f8
+ .long 0x3fbebc8d // .word 0x3fbebc8d
+ .long 0x3fbebc8d // .word 0x3fbebc8d
+ .long 0x42f28c51 // .word 0x42f28c51
+ .long 0x42f28c51 // .word 0x42f28c51
+ .long 0x3fdce9a3 // .word 0x3fdce9a3
+ .long 0x41ddd2fe // .word 0x41ddd2fe
+
+HIDDEN _sk_parametric_a_vfp4
+.globl _sk_parametric_a_vfp4
+FUNCTION(_sk_parametric_a_vfp4)
+_sk_parametric_a_vfp4:
+ .long 0xe92d4800 // push {fp, lr}
+ .long 0xed2d8b06 // vpush {d8-d10}
+ .long 0xe591e000 // ldr lr, [r1]
+ .long 0xeddf3b43 // vldr d19, [pc, #268]
+ .long 0xed9f8a52 // vldr s16, [pc, #328]
+ .long 0xe1a0300e // mov r3, lr
+ .long 0xeddf4b46 // vldr d20, [pc, #280]
+ .long 0xf4e30c9d // vld1.32 {d16[]}, [r3 :32]!
+ .long 0xe591c004 // ldr ip, [r1, #4]
+ .long 0xe2811008 // add r1, r1, #8
+ .long 0xf4e31c9f // vld1.32 {d17[]}, [r3 :32]
+ .long 0xe28e3008 // add r3, lr, #8
+ .long 0xf4e32c9f // vld1.32 {d18[]}, [r3 :32]
+ .long 0xe28e300c // add r3, lr, #12
+ .long 0xf2412c93 // vfma.f32 d18, d17, d3
+ .long 0xf2c71d1f // vmov.i32 d17, #8388607
+ .long 0xf24211b1 // vand d17, d18, d17
+ .long 0xf2c3171f // vorr.i32 d17, #1056964608
+ .long 0xf3fb2622 // vcvt.f32.s32 d18, d18
+ .long 0xf2019da3 // vadd.f32 d9, d17, d19
+ .long 0xf2c33614 // vmov.i32 d19, #872415232
+ .long 0xf3422db3 // vmul.f32 d18, d18, d19
+ .long 0xeddf3b32 // vldr d19, [pc, #200]
+ .long 0xeec8aa29 // vdiv.f32 s21, s16, s19
+ .long 0xee88aa09 // vdiv.f32 s20, s16, s18
+ .long 0xf2422da3 // vadd.f32 d18, d18, d19
+ .long 0xeddf3b30 // vldr d19, [pc, #192]
+ .long 0xed9f8a3c // vldr s16, [pc, #240]
+ .long 0xf3411db3 // vmul.f32 d17, d17, d19
+ .long 0xf2c03010 // vmov.i32 d19, #0
+ .long 0xf2422da4 // vadd.f32 d18, d18, d20
+ .long 0xeddf4b2f // vldr d20, [pc, #188]
+ .long 0xf2621da1 // vsub.f32 d17, d18, d17
+ .long 0xf2611d8a // vsub.f32 d17, d17, d10
+ .long 0xf3400db1 // vmul.f32 d16, d16, d17
+ .long 0xf3fb1720 // vcvt.s32.f32 d17, d16
+ .long 0xf3fb1621 // vcvt.f32.s32 d17, d17
+ .long 0xf3612ea0 // vcgt.f32 d18, d17, d16
+ .long 0xf35421b3 // vbsl d18, d20, d19
+ .long 0xeddf4b2d // vldr d20, [pc, #180]
+ .long 0xf2611da2 // vsub.f32 d17, d17, d18
+ .long 0xeddf2b27 // vldr d18, [pc, #156]
+ .long 0xf2601da1 // vsub.f32 d17, d16, d17
+ .long 0xf2400da4 // vadd.f32 d16, d16, d20
+ .long 0xf2229da1 // vsub.f32 d9, d18, d17
+ .long 0xeddf2b25 // vldr d18, [pc, #148]
+ .long 0xf3411db2 // vmul.f32 d17, d17, d18
+ .long 0xf2c3261f // vmov.i32 d18, #1056964608
+ .long 0xeec8aa29 // vdiv.f32 s21, s16, s19
+ .long 0xee88aa09 // vdiv.f32 s20, s16, s18
+ .long 0xf2600da1 // vsub.f32 d16, d16, d17
+ .long 0xf2c4161b // vmov.i32 d17, #1258291200
+ .long 0xf2400d8a // vadd.f32 d16, d16, d10
+ .long 0xf2402cb1 // vfma.f32 d18, d16, d17
+ .long 0xf4e30c9f // vld1.32 {d16[]}, [r3 :32]
+ .long 0xe28e3018 // add r3, lr, #24
+ .long 0xf4e31c9f // vld1.32 {d17[]}, [r3 :32]
+ .long 0xe28e3010 // add r3, lr, #16
+ .long 0xf2401c93 // vfma.f32 d17, d16, d3
+ .long 0xf4e30c9f // vld1.32 {d16[]}, [r3 :32]
+ .long 0xe28e3014 // add r3, lr, #20
+ .long 0xf3400e83 // vcge.f32 d16, d16, d3
+ .long 0xf4e34c9f // vld1.32 {d20[]}, [r3 :32]
+ .long 0xf3fb27a2 // vcvt.u32.f32 d18, d18
+ .long 0xf2442da2 // vadd.f32 d18, d20, d18
+ .long 0xf35101b2 // vbsl d16, d17, d18
+ .long 0xf2c71f10 // vmov.f32 d17, #1
+ .long 0xf2400fa3 // vmax.f32 d16, d16, d19
+ .long 0xf2203fa1 // vmin.f32 d3, d16, d17
+ .long 0xecbd8b06 // vpop {d8-d10}
+ .long 0xe8bd4800 // pop {fp, lr}
+ .long 0xe12fff1c // bx ip
+ .long 0x3eb444f9 // .word 0x3eb444f9
+ .long 0x3eb444f9 // .word 0x3eb444f9
+ .long 0xc2fe0000 // .word 0xc2fe0000
+ .long 0xc2fe0000 // .word 0xc2fe0000
+ .long 0x3fbfbf75 // .word 0x3fbfbf75
+ .long 0x3fbfbf75 // .word 0x3fbfbf75
+ .long 0x4031912a // .word 0x4031912a
+ .long 0x4031912a // .word 0x4031912a
+ .long 0x3f800000 // .word 0x3f800000
+ .long 0x3f800000 // .word 0x3f800000
+ .long 0x409af5f8 // .word 0x409af5f8
+ .long 0x409af5f8 // .word 0x409af5f8
+ .long 0x3fbebc8d // .word 0x3fbebc8d
+ .long 0x3fbebc8d // .word 0x3fbebc8d
+ .long 0x42f28c51 // .word 0x42f28c51
+ .long 0x42f28c51 // .word 0x42f28c51
+ .long 0x3fdce9a3 // .word 0x3fdce9a3
+ .long 0x41ddd2fe // .word 0x41ddd2fe
+
HIDDEN _sk_load_a8_vfp4
.globl _sk_load_a8_vfp4
FUNCTION(_sk_load_a8_vfp4)
@@ -6414,7 +7106,7 @@ _sk_linear_gradient_vfp4:
.long 0xe494c00c // ldr ip, [r4], #12
.long 0xf4a41c9f // vld1.32 {d1[]}, [r4 :32]
.long 0xe35c0000 // cmp ip, #0
- .long 0x0a000036 // beq 2a18 <sk_linear_gradient_vfp4+0x110>
+ .long 0x0a000036 // beq 2fb8 <sk_linear_gradient_vfp4+0x110>
.long 0xe59e3004 // ldr r3, [lr, #4]
.long 0xf2c01010 // vmov.i32 d17, #0
.long 0xf2c07010 // vmov.i32 d23, #0
@@ -6464,12 +7156,12 @@ _sk_linear_gradient_vfp4:
.long 0xf26371b3 // vorr d23, d19, d19
.long 0xf26481b4 // vorr d24, d20, d20
.long 0xf26561b5 // vorr d22, d21, d21
- .long 0x1affffd3 // bne 2954 <sk_linear_gradient_vfp4+0x4c>
+ .long 0x1affffd3 // bne 2ef4 <sk_linear_gradient_vfp4+0x4c>
.long 0xf26c01bc // vorr d16, d28, d28
.long 0xf22b11bb // vorr d1, d27, d27
.long 0xf22a21ba // vorr d2, d26, d26
.long 0xf22931b9 // vorr d3, d25, d25
- .long 0xea000003 // b 2a28 <sk_linear_gradient_vfp4+0x120>
+ .long 0xea000003 // b 2fc8 <sk_linear_gradient_vfp4+0x120>
.long 0xf2c05010 // vmov.i32 d21, #0
.long 0xf2c04010 // vmov.i32 d20, #0
.long 0xf2c03010 // vmov.i32 d19, #0
@@ -8887,6 +9579,342 @@ _sk_table_a_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
+HIDDEN _sk_parametric_r_hsw
+.globl _sk_parametric_r_hsw
+FUNCTION(_sk_parametric_r_hsw)
+_sk_parametric_r_hsw:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 196,98,125,24,64,16 // vbroadcastss 0x10(%rax),%ymm8
+ .byte 196,65,124,194,192,2 // vcmpleps %ymm8,%ymm0,%ymm8
+ .byte 196,98,125,24,72,12 // vbroadcastss 0xc(%rax),%ymm9
+ .byte 196,98,125,24,80,24 // vbroadcastss 0x18(%rax),%ymm10
+ .byte 196,66,125,168,202 // vfmadd213ps %ymm10,%ymm0,%ymm9
+ .byte 196,98,125,24,80,4 // vbroadcastss 0x4(%rax),%ymm10
+ .byte 196,98,125,24,88,8 // vbroadcastss 0x8(%rax),%ymm11
+ .byte 196,66,125,168,211 // vfmadd213ps %ymm11,%ymm0,%ymm10
+ .byte 196,65,124,91,218 // vcvtdq2ps %ymm10,%ymm11
+ .byte 65,184,0,0,0,52 // mov $0x34000000,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,98,125,88,224 // vpbroadcastd %xmm0,%ymm12
+ .byte 65,184,0,0,254,66 // mov $0x42fe0000,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,98,125,88,232 // vpbroadcastd %xmm0,%ymm13
+ .byte 196,66,37,186,236 // vfmsub231ps %ymm12,%ymm11,%ymm13
+ .byte 65,184,255,255,127,0 // mov $0x7fffff,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,226,125,88,192 // vpbroadcastd %xmm0,%ymm0
+ .byte 196,65,125,219,210 // vpand %ymm10,%ymm0,%ymm10
+ .byte 65,184,0,0,0,63 // mov $0x3f000000,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,226,125,88,192 // vpbroadcastd %xmm0,%ymm0
+ .byte 197,45,235,208 // vpor %ymm0,%ymm10,%ymm10
+ .byte 65,184,42,145,49,64 // mov $0x4031912a,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,226,125,88,192 // vpbroadcastd %xmm0,%ymm0
+ .byte 197,20,88,216 // vaddps %ymm0,%ymm13,%ymm11
+ .byte 65,184,117,191,191,63 // mov $0x3fbfbf75,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,98,125,88,224 // vpbroadcastd %xmm0,%ymm12
+ .byte 196,66,45,172,227 // vfnmadd213ps %ymm11,%ymm10,%ymm12
+ .byte 65,184,163,233,220,63 // mov $0x3fdce9a3,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,98,125,88,216 // vpbroadcastd %xmm0,%ymm11
+ .byte 65,184,249,68,180,62 // mov $0x3eb444f9,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,226,125,88,192 // vpbroadcastd %xmm0,%ymm0
+ .byte 197,172,88,192 // vaddps %ymm0,%ymm10,%ymm0
+ .byte 197,164,94,192 // vdivps %ymm0,%ymm11,%ymm0
+ .byte 197,156,92,192 // vsubps %ymm0,%ymm12,%ymm0
+ .byte 196,98,125,24,16 // vbroadcastss (%rax),%ymm10
+ .byte 197,44,89,216 // vmulps %ymm0,%ymm10,%ymm11
+ .byte 196,67,125,8,211,1 // vroundps $0x1,%ymm11,%ymm10
+ .byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,98,125,88,224 // vpbroadcastd %xmm0,%ymm12
+ .byte 65,184,81,140,242,66 // mov $0x42f28c51,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,226,125,88,192 // vpbroadcastd %xmm0,%ymm0
+ .byte 196,65,124,88,219 // vaddps %ymm11,%ymm0,%ymm11
+ .byte 65,184,141,188,190,63 // mov $0x3fbebc8d,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,98,125,88,232 // vpbroadcastd %xmm0,%ymm13
+ .byte 196,66,45,172,235 // vfnmadd213ps %ymm11,%ymm10,%ymm13
+ .byte 65,184,254,210,221,65 // mov $0x41ddd2fe,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,98,125,88,216 // vpbroadcastd %xmm0,%ymm11
+ .byte 65,184,248,245,154,64 // mov $0x409af5f8,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,226,125,88,192 // vpbroadcastd %xmm0,%ymm0
+ .byte 196,193,124,92,194 // vsubps %ymm10,%ymm0,%ymm0
+ .byte 197,164,94,192 // vdivps %ymm0,%ymm11,%ymm0
+ .byte 197,148,88,192 // vaddps %ymm0,%ymm13,%ymm0
+ .byte 197,156,89,192 // vmulps %ymm0,%ymm12,%ymm0
+ .byte 197,253,91,192 // vcvtps2dq %ymm0,%ymm0
+ .byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
+ .byte 196,193,124,88,194 // vaddps %ymm10,%ymm0,%ymm0
+ .byte 196,195,125,74,193,128 // vblendvps %ymm8,%ymm9,%ymm0,%ymm0
+ .byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
+ .byte 196,65,124,95,192 // vmaxps %ymm8,%ymm0,%ymm8
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 197,249,110,192 // vmovd %eax,%xmm0
+ .byte 196,226,125,88,192 // vpbroadcastd %xmm0,%ymm0
+ .byte 197,188,93,192 // vminps %ymm0,%ymm8,%ymm0
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_parametric_g_hsw
+.globl _sk_parametric_g_hsw
+FUNCTION(_sk_parametric_g_hsw)
+_sk_parametric_g_hsw:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 196,98,125,24,64,16 // vbroadcastss 0x10(%rax),%ymm8
+ .byte 196,65,116,194,192,2 // vcmpleps %ymm8,%ymm1,%ymm8
+ .byte 196,98,125,24,72,12 // vbroadcastss 0xc(%rax),%ymm9
+ .byte 196,98,125,24,80,24 // vbroadcastss 0x18(%rax),%ymm10
+ .byte 196,66,117,168,202 // vfmadd213ps %ymm10,%ymm1,%ymm9
+ .byte 196,98,125,24,80,4 // vbroadcastss 0x4(%rax),%ymm10
+ .byte 196,98,125,24,88,8 // vbroadcastss 0x8(%rax),%ymm11
+ .byte 196,66,117,168,211 // vfmadd213ps %ymm11,%ymm1,%ymm10
+ .byte 196,65,124,91,218 // vcvtdq2ps %ymm10,%ymm11
+ .byte 65,184,0,0,0,52 // mov $0x34000000,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,98,125,88,225 // vpbroadcastd %xmm1,%ymm12
+ .byte 65,184,0,0,254,66 // mov $0x42fe0000,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,98,125,88,233 // vpbroadcastd %xmm1,%ymm13
+ .byte 196,66,37,186,236 // vfmsub231ps %ymm12,%ymm11,%ymm13
+ .byte 65,184,255,255,127,0 // mov $0x7fffff,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,226,125,88,201 // vpbroadcastd %xmm1,%ymm1
+ .byte 196,65,117,219,210 // vpand %ymm10,%ymm1,%ymm10
+ .byte 65,184,0,0,0,63 // mov $0x3f000000,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,226,125,88,201 // vpbroadcastd %xmm1,%ymm1
+ .byte 197,45,235,209 // vpor %ymm1,%ymm10,%ymm10
+ .byte 65,184,42,145,49,64 // mov $0x4031912a,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,226,125,88,201 // vpbroadcastd %xmm1,%ymm1
+ .byte 197,20,88,217 // vaddps %ymm1,%ymm13,%ymm11
+ .byte 65,184,117,191,191,63 // mov $0x3fbfbf75,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,98,125,88,225 // vpbroadcastd %xmm1,%ymm12
+ .byte 196,66,45,172,227 // vfnmadd213ps %ymm11,%ymm10,%ymm12
+ .byte 65,184,163,233,220,63 // mov $0x3fdce9a3,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,98,125,88,217 // vpbroadcastd %xmm1,%ymm11
+ .byte 65,184,249,68,180,62 // mov $0x3eb444f9,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,226,125,88,201 // vpbroadcastd %xmm1,%ymm1
+ .byte 197,172,88,201 // vaddps %ymm1,%ymm10,%ymm1
+ .byte 197,164,94,201 // vdivps %ymm1,%ymm11,%ymm1
+ .byte 197,156,92,201 // vsubps %ymm1,%ymm12,%ymm1
+ .byte 196,98,125,24,16 // vbroadcastss (%rax),%ymm10
+ .byte 197,44,89,217 // vmulps %ymm1,%ymm10,%ymm11
+ .byte 196,67,125,8,211,1 // vroundps $0x1,%ymm11,%ymm10
+ .byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,98,125,88,225 // vpbroadcastd %xmm1,%ymm12
+ .byte 65,184,81,140,242,66 // mov $0x42f28c51,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,226,125,88,201 // vpbroadcastd %xmm1,%ymm1
+ .byte 196,65,116,88,219 // vaddps %ymm11,%ymm1,%ymm11
+ .byte 65,184,141,188,190,63 // mov $0x3fbebc8d,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,98,125,88,233 // vpbroadcastd %xmm1,%ymm13
+ .byte 196,66,45,172,235 // vfnmadd213ps %ymm11,%ymm10,%ymm13
+ .byte 65,184,254,210,221,65 // mov $0x41ddd2fe,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,98,125,88,217 // vpbroadcastd %xmm1,%ymm11
+ .byte 65,184,248,245,154,64 // mov $0x409af5f8,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,226,125,88,201 // vpbroadcastd %xmm1,%ymm1
+ .byte 196,193,116,92,202 // vsubps %ymm10,%ymm1,%ymm1
+ .byte 197,164,94,201 // vdivps %ymm1,%ymm11,%ymm1
+ .byte 197,148,88,201 // vaddps %ymm1,%ymm13,%ymm1
+ .byte 197,156,89,201 // vmulps %ymm1,%ymm12,%ymm1
+ .byte 197,253,91,201 // vcvtps2dq %ymm1,%ymm1
+ .byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
+ .byte 196,193,116,88,202 // vaddps %ymm10,%ymm1,%ymm1
+ .byte 196,195,117,74,201,128 // vblendvps %ymm8,%ymm9,%ymm1,%ymm1
+ .byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
+ .byte 196,65,116,95,192 // vmaxps %ymm8,%ymm1,%ymm8
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 197,249,110,200 // vmovd %eax,%xmm1
+ .byte 196,226,125,88,201 // vpbroadcastd %xmm1,%ymm1
+ .byte 197,188,93,201 // vminps %ymm1,%ymm8,%ymm1
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_parametric_b_hsw
+.globl _sk_parametric_b_hsw
+FUNCTION(_sk_parametric_b_hsw)
+_sk_parametric_b_hsw:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 196,98,125,24,64,16 // vbroadcastss 0x10(%rax),%ymm8
+ .byte 196,65,108,194,192,2 // vcmpleps %ymm8,%ymm2,%ymm8
+ .byte 196,98,125,24,72,12 // vbroadcastss 0xc(%rax),%ymm9
+ .byte 196,98,125,24,80,24 // vbroadcastss 0x18(%rax),%ymm10
+ .byte 196,66,109,168,202 // vfmadd213ps %ymm10,%ymm2,%ymm9
+ .byte 196,98,125,24,80,4 // vbroadcastss 0x4(%rax),%ymm10
+ .byte 196,98,125,24,88,8 // vbroadcastss 0x8(%rax),%ymm11
+ .byte 196,66,109,168,211 // vfmadd213ps %ymm11,%ymm2,%ymm10
+ .byte 196,65,124,91,218 // vcvtdq2ps %ymm10,%ymm11
+ .byte 65,184,0,0,0,52 // mov $0x34000000,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,98,125,88,226 // vpbroadcastd %xmm2,%ymm12
+ .byte 65,184,0,0,254,66 // mov $0x42fe0000,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,98,125,88,234 // vpbroadcastd %xmm2,%ymm13
+ .byte 196,66,37,186,236 // vfmsub231ps %ymm12,%ymm11,%ymm13
+ .byte 65,184,255,255,127,0 // mov $0x7fffff,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,226,125,88,210 // vpbroadcastd %xmm2,%ymm2
+ .byte 196,65,109,219,210 // vpand %ymm10,%ymm2,%ymm10
+ .byte 65,184,0,0,0,63 // mov $0x3f000000,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,226,125,88,210 // vpbroadcastd %xmm2,%ymm2
+ .byte 197,45,235,210 // vpor %ymm2,%ymm10,%ymm10
+ .byte 65,184,42,145,49,64 // mov $0x4031912a,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,226,125,88,210 // vpbroadcastd %xmm2,%ymm2
+ .byte 197,20,88,218 // vaddps %ymm2,%ymm13,%ymm11
+ .byte 65,184,117,191,191,63 // mov $0x3fbfbf75,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,98,125,88,226 // vpbroadcastd %xmm2,%ymm12
+ .byte 196,66,45,172,227 // vfnmadd213ps %ymm11,%ymm10,%ymm12
+ .byte 65,184,163,233,220,63 // mov $0x3fdce9a3,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,98,125,88,218 // vpbroadcastd %xmm2,%ymm11
+ .byte 65,184,249,68,180,62 // mov $0x3eb444f9,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,226,125,88,210 // vpbroadcastd %xmm2,%ymm2
+ .byte 197,172,88,210 // vaddps %ymm2,%ymm10,%ymm2
+ .byte 197,164,94,210 // vdivps %ymm2,%ymm11,%ymm2
+ .byte 197,156,92,210 // vsubps %ymm2,%ymm12,%ymm2
+ .byte 196,98,125,24,16 // vbroadcastss (%rax),%ymm10
+ .byte 197,44,89,218 // vmulps %ymm2,%ymm10,%ymm11
+ .byte 196,67,125,8,211,1 // vroundps $0x1,%ymm11,%ymm10
+ .byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,98,125,88,226 // vpbroadcastd %xmm2,%ymm12
+ .byte 65,184,81,140,242,66 // mov $0x42f28c51,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,226,125,88,210 // vpbroadcastd %xmm2,%ymm2
+ .byte 196,65,108,88,219 // vaddps %ymm11,%ymm2,%ymm11
+ .byte 65,184,141,188,190,63 // mov $0x3fbebc8d,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,98,125,88,234 // vpbroadcastd %xmm2,%ymm13
+ .byte 196,66,45,172,235 // vfnmadd213ps %ymm11,%ymm10,%ymm13
+ .byte 65,184,254,210,221,65 // mov $0x41ddd2fe,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,98,125,88,218 // vpbroadcastd %xmm2,%ymm11
+ .byte 65,184,248,245,154,64 // mov $0x409af5f8,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,226,125,88,210 // vpbroadcastd %xmm2,%ymm2
+ .byte 196,193,108,92,210 // vsubps %ymm10,%ymm2,%ymm2
+ .byte 197,164,94,210 // vdivps %ymm2,%ymm11,%ymm2
+ .byte 197,148,88,210 // vaddps %ymm2,%ymm13,%ymm2
+ .byte 197,156,89,210 // vmulps %ymm2,%ymm12,%ymm2
+ .byte 197,253,91,210 // vcvtps2dq %ymm2,%ymm2
+ .byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
+ .byte 196,193,108,88,210 // vaddps %ymm10,%ymm2,%ymm2
+ .byte 196,195,109,74,209,128 // vblendvps %ymm8,%ymm9,%ymm2,%ymm2
+ .byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
+ .byte 196,65,108,95,192 // vmaxps %ymm8,%ymm2,%ymm8
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 197,249,110,208 // vmovd %eax,%xmm2
+ .byte 196,226,125,88,210 // vpbroadcastd %xmm2,%ymm2
+ .byte 197,188,93,210 // vminps %ymm2,%ymm8,%ymm2
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_parametric_a_hsw
+.globl _sk_parametric_a_hsw
+FUNCTION(_sk_parametric_a_hsw)
+_sk_parametric_a_hsw:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 196,98,125,24,64,16 // vbroadcastss 0x10(%rax),%ymm8
+ .byte 196,65,100,194,192,2 // vcmpleps %ymm8,%ymm3,%ymm8
+ .byte 196,98,125,24,72,12 // vbroadcastss 0xc(%rax),%ymm9
+ .byte 196,98,125,24,80,24 // vbroadcastss 0x18(%rax),%ymm10
+ .byte 196,66,101,168,202 // vfmadd213ps %ymm10,%ymm3,%ymm9
+ .byte 196,98,125,24,80,4 // vbroadcastss 0x4(%rax),%ymm10
+ .byte 196,98,125,24,88,8 // vbroadcastss 0x8(%rax),%ymm11
+ .byte 196,66,101,168,211 // vfmadd213ps %ymm11,%ymm3,%ymm10
+ .byte 196,65,124,91,218 // vcvtdq2ps %ymm10,%ymm11
+ .byte 65,184,0,0,0,52 // mov $0x34000000,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,98,125,88,227 // vpbroadcastd %xmm3,%ymm12
+ .byte 65,184,0,0,254,66 // mov $0x42fe0000,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,98,125,88,235 // vpbroadcastd %xmm3,%ymm13
+ .byte 196,66,37,186,236 // vfmsub231ps %ymm12,%ymm11,%ymm13
+ .byte 65,184,255,255,127,0 // mov $0x7fffff,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,226,125,88,219 // vpbroadcastd %xmm3,%ymm3
+ .byte 196,65,101,219,210 // vpand %ymm10,%ymm3,%ymm10
+ .byte 65,184,0,0,0,63 // mov $0x3f000000,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,226,125,88,219 // vpbroadcastd %xmm3,%ymm3
+ .byte 197,45,235,211 // vpor %ymm3,%ymm10,%ymm10
+ .byte 65,184,42,145,49,64 // mov $0x4031912a,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,226,125,88,219 // vpbroadcastd %xmm3,%ymm3
+ .byte 197,20,88,219 // vaddps %ymm3,%ymm13,%ymm11
+ .byte 65,184,117,191,191,63 // mov $0x3fbfbf75,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,98,125,88,227 // vpbroadcastd %xmm3,%ymm12
+ .byte 196,66,45,172,227 // vfnmadd213ps %ymm11,%ymm10,%ymm12
+ .byte 65,184,163,233,220,63 // mov $0x3fdce9a3,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,98,125,88,219 // vpbroadcastd %xmm3,%ymm11
+ .byte 65,184,249,68,180,62 // mov $0x3eb444f9,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,226,125,88,219 // vpbroadcastd %xmm3,%ymm3
+ .byte 197,172,88,219 // vaddps %ymm3,%ymm10,%ymm3
+ .byte 197,164,94,219 // vdivps %ymm3,%ymm11,%ymm3
+ .byte 197,156,92,219 // vsubps %ymm3,%ymm12,%ymm3
+ .byte 196,98,125,24,16 // vbroadcastss (%rax),%ymm10
+ .byte 197,44,89,219 // vmulps %ymm3,%ymm10,%ymm11
+ .byte 196,67,125,8,211,1 // vroundps $0x1,%ymm11,%ymm10
+ .byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,98,125,88,227 // vpbroadcastd %xmm3,%ymm12
+ .byte 65,184,81,140,242,66 // mov $0x42f28c51,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,226,125,88,219 // vpbroadcastd %xmm3,%ymm3
+ .byte 196,65,100,88,219 // vaddps %ymm11,%ymm3,%ymm11
+ .byte 65,184,141,188,190,63 // mov $0x3fbebc8d,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,98,125,88,235 // vpbroadcastd %xmm3,%ymm13
+ .byte 196,66,45,172,235 // vfnmadd213ps %ymm11,%ymm10,%ymm13
+ .byte 65,184,254,210,221,65 // mov $0x41ddd2fe,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,98,125,88,219 // vpbroadcastd %xmm3,%ymm11
+ .byte 65,184,248,245,154,64 // mov $0x409af5f8,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,226,125,88,219 // vpbroadcastd %xmm3,%ymm3
+ .byte 196,193,100,92,218 // vsubps %ymm10,%ymm3,%ymm3
+ .byte 197,164,94,219 // vdivps %ymm3,%ymm11,%ymm3
+ .byte 197,148,88,219 // vaddps %ymm3,%ymm13,%ymm3
+ .byte 197,156,89,219 // vmulps %ymm3,%ymm12,%ymm3
+ .byte 197,253,91,219 // vcvtps2dq %ymm3,%ymm3
+ .byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
+ .byte 196,193,100,88,218 // vaddps %ymm10,%ymm3,%ymm3
+ .byte 196,195,101,74,217,128 // vblendvps %ymm8,%ymm9,%ymm3,%ymm3
+ .byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
+ .byte 196,65,100,95,192 // vmaxps %ymm8,%ymm3,%ymm8
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 197,249,110,216 // vmovd %eax,%xmm3
+ .byte 196,226,125,88,219 // vpbroadcastd %xmm3,%ymm3
+ .byte 197,188,93,219 // vminps %ymm3,%ymm8,%ymm3
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
HIDDEN _sk_load_a8_hsw
.globl _sk_load_a8_hsw
FUNCTION(_sk_load_a8_hsw)
@@ -8896,7 +9924,7 @@ _sk_load_a8_hsw:
.byte 72,139,0 // mov (%rax),%rax
.byte 72,1,248 // add %rdi,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 117,50 // jne 1d78 <_sk_load_a8_hsw+0x42>
+ .byte 117,50 // jne 23a8 <_sk_load_a8_hsw+0x42>
.byte 197,250,126,0 // vmovq (%rax),%xmm0
.byte 196,226,125,49,192 // vpmovzxbd %xmm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
@@ -8919,9 +9947,9 @@ _sk_load_a8_hsw:
.byte 77,9,217 // or %r11,%r9
.byte 72,131,193,8 // add $0x8,%rcx
.byte 73,255,202 // dec %r10
- .byte 117,234 // jne 1d80 <_sk_load_a8_hsw+0x4a>
+ .byte 117,234 // jne 23b0 <_sk_load_a8_hsw+0x4a>
.byte 196,193,249,110,193 // vmovq %r9,%xmm0
- .byte 235,173 // jmp 1d4a <_sk_load_a8_hsw+0x14>
+ .byte 235,173 // jmp 237a <_sk_load_a8_hsw+0x14>
HIDDEN _sk_gather_a8_hsw
.globl _sk_gather_a8_hsw
@@ -8996,7 +10024,7 @@ _sk_store_a8_hsw:
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 196,65,57,103,192 // vpackuswb %xmm8,%xmm8,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 1eb5 <_sk_store_a8_hsw+0x3b>
+ .byte 117,10 // jne 24e5 <_sk_store_a8_hsw+0x3b>
.byte 196,65,123,17,4,57 // vmovsd %xmm8,(%r9,%rdi,1)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -9004,10 +10032,10 @@ _sk_store_a8_hsw:
.byte 65,128,224,7 // and $0x7,%r8b
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 1eb1 <_sk_store_a8_hsw+0x37>
+ .byte 119,236 // ja 24e1 <_sk_store_a8_hsw+0x37>
.byte 196,66,121,48,192 // vpmovzxbw %xmm8,%xmm8
.byte 65,15,182,192 // movzbl %r8b,%eax
- .byte 76,141,5,67,0,0,0 // lea 0x43(%rip),%r8 # 1f18 <_sk_store_a8_hsw+0x9e>
+ .byte 76,141,5,67,0,0,0 // lea 0x43(%rip),%r8 # 2548 <_sk_store_a8_hsw+0x9e>
.byte 73,99,4,128 // movslq (%r8,%rax,4),%rax
.byte 76,1,192 // add %r8,%rax
.byte 255,224 // jmpq *%rax
@@ -9018,7 +10046,7 @@ _sk_store_a8_hsw:
.byte 196,67,121,20,68,57,2,4 // vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1)
.byte 196,67,121,20,68,57,1,2 // vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1)
.byte 196,67,121,20,4,57,0 // vpextrb $0x0,%xmm8,(%r9,%rdi,1)
- .byte 235,154 // jmp 1eb1 <_sk_store_a8_hsw+0x37>
+ .byte 235,154 // jmp 24e1 <_sk_store_a8_hsw+0x37>
.byte 144 // nop
.byte 246,255 // idiv %bh
.byte 255 // (bad)
@@ -9052,7 +10080,7 @@ _sk_load_g8_hsw:
.byte 72,139,0 // mov (%rax),%rax
.byte 72,1,248 // add %rdi,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 117,60 // jne 1f80 <_sk_load_g8_hsw+0x4c>
+ .byte 117,60 // jne 25b0 <_sk_load_g8_hsw+0x4c>
.byte 197,250,126,0 // vmovq (%rax),%xmm0
.byte 196,226,125,49,192 // vpmovzxbd %xmm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
@@ -9077,9 +10105,9 @@ _sk_load_g8_hsw:
.byte 77,9,217 // or %r11,%r9
.byte 72,131,193,8 // add $0x8,%rcx
.byte 73,255,202 // dec %r10
- .byte 117,234 // jne 1f88 <_sk_load_g8_hsw+0x54>
+ .byte 117,234 // jne 25b8 <_sk_load_g8_hsw+0x54>
.byte 196,193,249,110,193 // vmovq %r9,%xmm0
- .byte 235,163 // jmp 1f48 <_sk_load_g8_hsw+0x14>
+ .byte 235,163 // jmp 2578 <_sk_load_g8_hsw+0x14>
HIDDEN _sk_gather_g8_hsw
.globl _sk_gather_g8_hsw
@@ -9148,9 +10176,9 @@ _sk_gather_i8_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 73,137,192 // mov %rax,%r8
.byte 77,133,192 // test %r8,%r8
- .byte 116,5 // je 209b <_sk_gather_i8_hsw+0xf>
+ .byte 116,5 // je 26cb <_sk_gather_i8_hsw+0xf>
.byte 76,137,192 // mov %r8,%rax
- .byte 235,2 // jmp 209d <_sk_gather_i8_hsw+0x11>
+ .byte 235,2 // jmp 26cd <_sk_gather_i8_hsw+0x11>
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 65,87 // push %r15
.byte 65,86 // push %r14
@@ -9223,7 +10251,7 @@ _sk_load_565_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,149,0,0,0 // jne 224f <_sk_load_565_hsw+0xa3>
+ .byte 15,133,149,0,0,0 // jne 287f <_sk_load_565_hsw+0xa3>
.byte 196,193,122,111,4,122 // vmovdqu (%r10,%rdi,2),%xmm0
.byte 196,226,125,51,208 // vpmovzxwd %xmm0,%ymm2
.byte 184,0,248,0,0 // mov $0xf800,%eax
@@ -9263,9 +10291,9 @@ _sk_load_565_hsw:
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 15,135,89,255,255,255 // ja 21c0 <_sk_load_565_hsw+0x14>
+ .byte 15,135,89,255,255,255 // ja 27f0 <_sk_load_565_hsw+0x14>
.byte 69,15,182,192 // movzbl %r8b,%r8d
- .byte 76,141,13,74,0,0,0 // lea 0x4a(%rip),%r9 # 22bc <_sk_load_565_hsw+0x110>
+ .byte 76,141,13,74,0,0,0 // lea 0x4a(%rip),%r9 # 28ec <_sk_load_565_hsw+0x110>
.byte 75,99,4,129 // movslq (%r9,%r8,4),%rax
.byte 76,1,200 // add %r9,%rax
.byte 255,224 // jmpq *%rax
@@ -9277,12 +10305,12 @@ _sk_load_565_hsw:
.byte 196,193,121,196,68,122,4,2 // vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,68,122,2,1 // vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,4,122,0 // vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- .byte 233,5,255,255,255 // jmpq 21c0 <_sk_load_565_hsw+0x14>
+ .byte 233,5,255,255,255 // jmpq 27f0 <_sk_load_565_hsw+0x14>
.byte 144 // nop
.byte 243,255 // repz (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 235,255 // jmp 22c1 <_sk_load_565_hsw+0x115>
+ .byte 235,255 // jmp 28f1 <_sk_load_565_hsw+0x115>
.byte 255 // (bad)
.byte 255,227 // jmpq *%rbx
.byte 255 // (bad)
@@ -9409,7 +10437,7 @@ _sk_store_565_hsw:
.byte 196,67,125,57,193,1 // vextracti128 $0x1,%ymm8,%xmm9
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 2487 <_sk_store_565_hsw+0x6c>
+ .byte 117,10 // jne 2ab7 <_sk_store_565_hsw+0x6c>
.byte 196,65,122,127,4,121 // vmovdqu %xmm8,(%r9,%rdi,2)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -9417,9 +10445,9 @@ _sk_store_565_hsw:
.byte 65,128,224,7 // and $0x7,%r8b
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 2483 <_sk_store_565_hsw+0x68>
+ .byte 119,236 // ja 2ab3 <_sk_store_565_hsw+0x68>
.byte 65,15,182,192 // movzbl %r8b,%eax
- .byte 76,141,5,66,0,0,0 // lea 0x42(%rip),%r8 # 24e4 <_sk_store_565_hsw+0xc9>
+ .byte 76,141,5,66,0,0,0 // lea 0x42(%rip),%r8 # 2b14 <_sk_store_565_hsw+0xc9>
.byte 73,99,4,128 // movslq (%r8,%rax,4),%rax
.byte 76,1,192 // add %r8,%rax
.byte 255,224 // jmpq *%rax
@@ -9430,7 +10458,7 @@ _sk_store_565_hsw:
.byte 196,67,121,21,68,121,4,2 // vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
.byte 196,67,121,21,68,121,2,1 // vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
.byte 196,67,121,21,4,121,0 // vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- .byte 235,159 // jmp 2483 <_sk_store_565_hsw+0x68>
+ .byte 235,159 // jmp 2ab3 <_sk_store_565_hsw+0x68>
.byte 247,255 // idiv %edi
.byte 255 // (bad)
.byte 255 // (bad)
@@ -9461,7 +10489,7 @@ _sk_load_4444_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,179,0,0,0 // jne 25c1 <_sk_load_4444_hsw+0xc1>
+ .byte 15,133,179,0,0,0 // jne 2bf1 <_sk_load_4444_hsw+0xc1>
.byte 196,193,122,111,4,122 // vmovdqu (%r10,%rdi,2),%xmm0
.byte 196,98,125,51,200 // vpmovzxwd %xmm0,%ymm9
.byte 184,0,240,0,0 // mov $0xf000,%eax
@@ -9507,9 +10535,9 @@ _sk_load_4444_hsw:
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 15,135,59,255,255,255 // ja 2514 <_sk_load_4444_hsw+0x14>
+ .byte 15,135,59,255,255,255 // ja 2b44 <_sk_load_4444_hsw+0x14>
.byte 69,15,182,192 // movzbl %r8b,%r8d
- .byte 76,141,13,76,0,0,0 // lea 0x4c(%rip),%r9 # 2630 <_sk_load_4444_hsw+0x130>
+ .byte 76,141,13,76,0,0,0 // lea 0x4c(%rip),%r9 # 2c60 <_sk_load_4444_hsw+0x130>
.byte 75,99,4,129 // movslq (%r9,%r8,4),%rax
.byte 76,1,200 // add %r9,%rax
.byte 255,224 // jmpq *%rax
@@ -9521,13 +10549,13 @@ _sk_load_4444_hsw:
.byte 196,193,121,196,68,122,4,2 // vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,68,122,2,1 // vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,4,122,0 // vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- .byte 233,231,254,255,255 // jmpq 2514 <_sk_load_4444_hsw+0x14>
+ .byte 233,231,254,255,255 // jmpq 2b44 <_sk_load_4444_hsw+0x14>
.byte 15,31,0 // nopl (%rax)
.byte 241 // icebp
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 233,255,255,255,225 // jmpq ffffffffe2002638 <_sk_callback_hsw+0xffffffffe1ffeace>
+ .byte 233,255,255,255,225 // jmpq ffffffffe2002c68 <_sk_callback_hsw+0xffffffffe1ffeace>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
@@ -9659,7 +10687,7 @@ _sk_store_4444_hsw:
.byte 196,67,125,57,193,1 // vextracti128 $0x1,%ymm8,%xmm9
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 281f <_sk_store_4444_hsw+0x72>
+ .byte 117,10 // jne 2e4f <_sk_store_4444_hsw+0x72>
.byte 196,65,122,127,4,121 // vmovdqu %xmm8,(%r9,%rdi,2)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -9667,9 +10695,9 @@ _sk_store_4444_hsw:
.byte 65,128,224,7 // and $0x7,%r8b
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 281b <_sk_store_4444_hsw+0x6e>
+ .byte 119,236 // ja 2e4b <_sk_store_4444_hsw+0x6e>
.byte 65,15,182,192 // movzbl %r8b,%eax
- .byte 76,141,5,66,0,0,0 // lea 0x42(%rip),%r8 # 287c <_sk_store_4444_hsw+0xcf>
+ .byte 76,141,5,66,0,0,0 // lea 0x42(%rip),%r8 # 2eac <_sk_store_4444_hsw+0xcf>
.byte 73,99,4,128 // movslq (%r8,%rax,4),%rax
.byte 76,1,192 // add %r8,%rax
.byte 255,224 // jmpq *%rax
@@ -9680,7 +10708,7 @@ _sk_store_4444_hsw:
.byte 196,67,121,21,68,121,4,2 // vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
.byte 196,67,121,21,68,121,2,1 // vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
.byte 196,67,121,21,4,121,0 // vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- .byte 235,159 // jmp 281b <_sk_store_4444_hsw+0x6e>
+ .byte 235,159 // jmp 2e4b <_sk_store_4444_hsw+0x6e>
.byte 247,255 // idiv %edi
.byte 255 // (bad)
.byte 255 // (bad)
@@ -9713,7 +10741,7 @@ _sk_load_8888_hsw:
.byte 76,141,12,189,0,0,0,0 // lea 0x0(,%rdi,4),%r9
.byte 76,3,8 // add (%rax),%r9
.byte 77,133,192 // test %r8,%r8
- .byte 117,104 // jne 2915 <_sk_load_8888_hsw+0x7d>
+ .byte 117,104 // jne 2f45 <_sk_load_8888_hsw+0x7d>
.byte 196,193,126,111,25 // vmovdqu (%r9),%ymm3
.byte 184,255,0,0,0 // mov $0xff,%eax
.byte 197,249,110,192 // vmovd %eax,%xmm0
@@ -9746,7 +10774,7 @@ _sk_load_8888_hsw:
.byte 196,225,249,110,192 // vmovq %rax,%xmm0
.byte 196,226,125,33,192 // vpmovsxbd %xmm0,%ymm0
.byte 196,194,125,140,25 // vpmaskmovd (%r9),%ymm0,%ymm3
- .byte 233,116,255,255,255 // jmpq 28b2 <_sk_load_8888_hsw+0x1a>
+ .byte 233,116,255,255,255 // jmpq 2ee2 <_sk_load_8888_hsw+0x1a>
HIDDEN _sk_gather_8888_hsw
.globl _sk_gather_8888_hsw
@@ -9810,7 +10838,7 @@ _sk_store_8888_hsw:
.byte 196,65,45,235,192 // vpor %ymm8,%ymm10,%ymm8
.byte 196,65,53,235,192 // vpor %ymm8,%ymm9,%ymm8
.byte 77,133,192 // test %r8,%r8
- .byte 117,12 // jne 2a38 <_sk_store_8888_hsw+0x74>
+ .byte 117,12 // jne 3068 <_sk_store_8888_hsw+0x74>
.byte 196,65,126,127,1 // vmovdqu %ymm8,(%r9)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,137,193 // mov %r8,%rcx
@@ -9823,7 +10851,7 @@ _sk_store_8888_hsw:
.byte 196,97,249,110,200 // vmovq %rax,%xmm9
.byte 196,66,125,33,201 // vpmovsxbd %xmm9,%ymm9
.byte 196,66,53,142,1 // vpmaskmovd %ymm8,%ymm9,(%r9)
- .byte 235,211 // jmp 2a31 <_sk_store_8888_hsw+0x6d>
+ .byte 235,211 // jmp 3061 <_sk_store_8888_hsw+0x6d>
HIDDEN _sk_load_f16_hsw
.globl _sk_load_f16_hsw
@@ -9832,7 +10860,7 @@ _sk_load_f16_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 72,139,0 // mov (%rax),%rax
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,97 // jne 2ac9 <_sk_load_f16_hsw+0x6b>
+ .byte 117,97 // jne 30f9 <_sk_load_f16_hsw+0x6b>
.byte 197,121,16,4,248 // vmovupd (%rax,%rdi,8),%xmm8
.byte 197,249,16,84,248,16 // vmovupd 0x10(%rax,%rdi,8),%xmm2
.byte 197,249,16,92,248,32 // vmovupd 0x20(%rax,%rdi,8),%xmm3
@@ -9858,29 +10886,29 @@ _sk_load_f16_hsw:
.byte 197,123,16,4,248 // vmovsd (%rax,%rdi,8),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,79 // je 2b28 <_sk_load_f16_hsw+0xca>
+ .byte 116,79 // je 3158 <_sk_load_f16_hsw+0xca>
.byte 197,57,22,68,248,8 // vmovhpd 0x8(%rax,%rdi,8),%xmm8,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,67 // jb 2b28 <_sk_load_f16_hsw+0xca>
+ .byte 114,67 // jb 3158 <_sk_load_f16_hsw+0xca>
.byte 197,251,16,84,248,16 // vmovsd 0x10(%rax,%rdi,8),%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 116,68 // je 2b35 <_sk_load_f16_hsw+0xd7>
+ .byte 116,68 // je 3165 <_sk_load_f16_hsw+0xd7>
.byte 197,233,22,84,248,24 // vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,56 // jb 2b35 <_sk_load_f16_hsw+0xd7>
+ .byte 114,56 // jb 3165 <_sk_load_f16_hsw+0xd7>
.byte 197,251,16,92,248,32 // vmovsd 0x20(%rax,%rdi,8),%xmm3
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 15,132,114,255,255,255 // je 2a7f <_sk_load_f16_hsw+0x21>
+ .byte 15,132,114,255,255,255 // je 30af <_sk_load_f16_hsw+0x21>
.byte 197,225,22,92,248,40 // vmovhpd 0x28(%rax,%rdi,8),%xmm3,%xmm3
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 15,130,98,255,255,255 // jb 2a7f <_sk_load_f16_hsw+0x21>
+ .byte 15,130,98,255,255,255 // jb 30af <_sk_load_f16_hsw+0x21>
.byte 197,122,126,76,248,48 // vmovq 0x30(%rax,%rdi,8),%xmm9
- .byte 233,87,255,255,255 // jmpq 2a7f <_sk_load_f16_hsw+0x21>
+ .byte 233,87,255,255,255 // jmpq 30af <_sk_load_f16_hsw+0x21>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
.byte 197,233,87,210 // vxorpd %xmm2,%xmm2,%xmm2
- .byte 233,74,255,255,255 // jmpq 2a7f <_sk_load_f16_hsw+0x21>
+ .byte 233,74,255,255,255 // jmpq 30af <_sk_load_f16_hsw+0x21>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
- .byte 233,65,255,255,255 // jmpq 2a7f <_sk_load_f16_hsw+0x21>
+ .byte 233,65,255,255,255 // jmpq 30af <_sk_load_f16_hsw+0x21>
HIDDEN _sk_gather_f16_hsw
.globl _sk_gather_f16_hsw
@@ -9938,7 +10966,7 @@ _sk_store_f16_hsw:
.byte 196,65,57,98,205 // vpunpckldq %xmm13,%xmm8,%xmm9
.byte 196,65,57,106,197 // vpunpckhdq %xmm13,%xmm8,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,27 // jne 2c2d <_sk_store_f16_hsw+0x65>
+ .byte 117,27 // jne 325d <_sk_store_f16_hsw+0x65>
.byte 197,120,17,28,248 // vmovups %xmm11,(%rax,%rdi,8)
.byte 197,120,17,84,248,16 // vmovups %xmm10,0x10(%rax,%rdi,8)
.byte 197,120,17,76,248,32 // vmovups %xmm9,0x20(%rax,%rdi,8)
@@ -9947,22 +10975,22 @@ _sk_store_f16_hsw:
.byte 255,224 // jmpq *%rax
.byte 197,121,214,28,248 // vmovq %xmm11,(%rax,%rdi,8)
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,241 // je 2c29 <_sk_store_f16_hsw+0x61>
+ .byte 116,241 // je 3259 <_sk_store_f16_hsw+0x61>
.byte 197,121,23,92,248,8 // vmovhpd %xmm11,0x8(%rax,%rdi,8)
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,229 // jb 2c29 <_sk_store_f16_hsw+0x61>
+ .byte 114,229 // jb 3259 <_sk_store_f16_hsw+0x61>
.byte 197,121,214,84,248,16 // vmovq %xmm10,0x10(%rax,%rdi,8)
- .byte 116,221 // je 2c29 <_sk_store_f16_hsw+0x61>
+ .byte 116,221 // je 3259 <_sk_store_f16_hsw+0x61>
.byte 197,121,23,84,248,24 // vmovhpd %xmm10,0x18(%rax,%rdi,8)
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,209 // jb 2c29 <_sk_store_f16_hsw+0x61>
+ .byte 114,209 // jb 3259 <_sk_store_f16_hsw+0x61>
.byte 197,121,214,76,248,32 // vmovq %xmm9,0x20(%rax,%rdi,8)
- .byte 116,201 // je 2c29 <_sk_store_f16_hsw+0x61>
+ .byte 116,201 // je 3259 <_sk_store_f16_hsw+0x61>
.byte 197,121,23,76,248,40 // vmovhpd %xmm9,0x28(%rax,%rdi,8)
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,189 // jb 2c29 <_sk_store_f16_hsw+0x61>
+ .byte 114,189 // jb 3259 <_sk_store_f16_hsw+0x61>
.byte 197,121,214,68,248,48 // vmovq %xmm8,0x30(%rax,%rdi,8)
- .byte 235,181 // jmp 2c29 <_sk_store_f16_hsw+0x61>
+ .byte 235,181 // jmp 3259 <_sk_store_f16_hsw+0x61>
HIDDEN _sk_load_u16_be_hsw
.globl _sk_load_u16_be_hsw
@@ -9972,7 +11000,7 @@ _sk_load_u16_be_hsw:
.byte 76,139,0 // mov (%rax),%r8
.byte 72,141,4,189,0,0,0,0 // lea 0x0(,%rdi,4),%rax
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,205,0,0,0 // jne 2d57 <_sk_load_u16_be_hsw+0xe3>
+ .byte 15,133,205,0,0,0 // jne 3387 <_sk_load_u16_be_hsw+0xe3>
.byte 196,65,121,16,4,64 // vmovupd (%r8,%rax,2),%xmm8
.byte 196,193,121,16,84,64,16 // vmovupd 0x10(%r8,%rax,2),%xmm2
.byte 196,193,121,16,92,64,32 // vmovupd 0x20(%r8,%rax,2),%xmm3
@@ -10021,29 +11049,29 @@ _sk_load_u16_be_hsw:
.byte 196,65,123,16,4,64 // vmovsd (%r8,%rax,2),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,85 // je 2dbd <_sk_load_u16_be_hsw+0x149>
+ .byte 116,85 // je 33ed <_sk_load_u16_be_hsw+0x149>
.byte 196,65,57,22,68,64,8 // vmovhpd 0x8(%r8,%rax,2),%xmm8,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,72 // jb 2dbd <_sk_load_u16_be_hsw+0x149>
+ .byte 114,72 // jb 33ed <_sk_load_u16_be_hsw+0x149>
.byte 196,193,123,16,84,64,16 // vmovsd 0x10(%r8,%rax,2),%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 116,72 // je 2dca <_sk_load_u16_be_hsw+0x156>
+ .byte 116,72 // je 33fa <_sk_load_u16_be_hsw+0x156>
.byte 196,193,105,22,84,64,24 // vmovhpd 0x18(%r8,%rax,2),%xmm2,%xmm2
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,59 // jb 2dca <_sk_load_u16_be_hsw+0x156>
+ .byte 114,59 // jb 33fa <_sk_load_u16_be_hsw+0x156>
.byte 196,193,123,16,92,64,32 // vmovsd 0x20(%r8,%rax,2),%xmm3
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 15,132,5,255,255,255 // je 2ca5 <_sk_load_u16_be_hsw+0x31>
+ .byte 15,132,5,255,255,255 // je 32d5 <_sk_load_u16_be_hsw+0x31>
.byte 196,193,97,22,92,64,40 // vmovhpd 0x28(%r8,%rax,2),%xmm3,%xmm3
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 15,130,244,254,255,255 // jb 2ca5 <_sk_load_u16_be_hsw+0x31>
+ .byte 15,130,244,254,255,255 // jb 32d5 <_sk_load_u16_be_hsw+0x31>
.byte 196,65,122,126,76,64,48 // vmovq 0x30(%r8,%rax,2),%xmm9
- .byte 233,232,254,255,255 // jmpq 2ca5 <_sk_load_u16_be_hsw+0x31>
+ .byte 233,232,254,255,255 // jmpq 32d5 <_sk_load_u16_be_hsw+0x31>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
.byte 197,233,87,210 // vxorpd %xmm2,%xmm2,%xmm2
- .byte 233,219,254,255,255 // jmpq 2ca5 <_sk_load_u16_be_hsw+0x31>
+ .byte 233,219,254,255,255 // jmpq 32d5 <_sk_load_u16_be_hsw+0x31>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
- .byte 233,210,254,255,255 // jmpq 2ca5 <_sk_load_u16_be_hsw+0x31>
+ .byte 233,210,254,255,255 // jmpq 32d5 <_sk_load_u16_be_hsw+0x31>
HIDDEN _sk_load_rgb_u16_be_hsw
.globl _sk_load_rgb_u16_be_hsw
@@ -10053,7 +11081,7 @@ _sk_load_rgb_u16_be_hsw:
.byte 76,139,0 // mov (%rax),%r8
.byte 72,141,4,127 // lea (%rdi,%rdi,2),%rax
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,211,0,0,0 // jne 2eb8 <_sk_load_rgb_u16_be_hsw+0xe5>
+ .byte 15,133,211,0,0,0 // jne 34e8 <_sk_load_rgb_u16_be_hsw+0xe5>
.byte 196,193,122,111,4,64 // vmovdqu (%r8,%rax,2),%xmm0
.byte 196,193,122,111,84,64,12 // vmovdqu 0xc(%r8,%rax,2),%xmm2
.byte 196,193,122,111,76,64,24 // vmovdqu 0x18(%r8,%rax,2),%xmm1
@@ -10103,36 +11131,36 @@ _sk_load_rgb_u16_be_hsw:
.byte 196,193,121,110,4,64 // vmovd (%r8,%rax,2),%xmm0
.byte 196,193,121,196,68,64,4,2 // vpinsrw $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 117,5 // jne 2ed1 <_sk_load_rgb_u16_be_hsw+0xfe>
- .byte 233,72,255,255,255 // jmpq 2e19 <_sk_load_rgb_u16_be_hsw+0x46>
+ .byte 117,5 // jne 3501 <_sk_load_rgb_u16_be_hsw+0xfe>
+ .byte 233,72,255,255,255 // jmpq 3449 <_sk_load_rgb_u16_be_hsw+0x46>
.byte 196,193,121,110,76,64,6 // vmovd 0x6(%r8,%rax,2),%xmm1
.byte 196,65,113,196,68,64,10,2 // vpinsrw $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,26 // jb 2f00 <_sk_load_rgb_u16_be_hsw+0x12d>
+ .byte 114,26 // jb 3530 <_sk_load_rgb_u16_be_hsw+0x12d>
.byte 196,193,121,110,76,64,12 // vmovd 0xc(%r8,%rax,2),%xmm1
.byte 196,193,113,196,84,64,16,2 // vpinsrw $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 117,10 // jne 2f05 <_sk_load_rgb_u16_be_hsw+0x132>
- .byte 233,25,255,255,255 // jmpq 2e19 <_sk_load_rgb_u16_be_hsw+0x46>
- .byte 233,20,255,255,255 // jmpq 2e19 <_sk_load_rgb_u16_be_hsw+0x46>
+ .byte 117,10 // jne 3535 <_sk_load_rgb_u16_be_hsw+0x132>
+ .byte 233,25,255,255,255 // jmpq 3449 <_sk_load_rgb_u16_be_hsw+0x46>
+ .byte 233,20,255,255,255 // jmpq 3449 <_sk_load_rgb_u16_be_hsw+0x46>
.byte 196,193,121,110,76,64,18 // vmovd 0x12(%r8,%rax,2),%xmm1
.byte 196,65,113,196,76,64,22,2 // vpinsrw $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,26 // jb 2f34 <_sk_load_rgb_u16_be_hsw+0x161>
+ .byte 114,26 // jb 3564 <_sk_load_rgb_u16_be_hsw+0x161>
.byte 196,193,121,110,76,64,24 // vmovd 0x18(%r8,%rax,2),%xmm1
.byte 196,193,113,196,76,64,28,2 // vpinsrw $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 117,10 // jne 2f39 <_sk_load_rgb_u16_be_hsw+0x166>
- .byte 233,229,254,255,255 // jmpq 2e19 <_sk_load_rgb_u16_be_hsw+0x46>
- .byte 233,224,254,255,255 // jmpq 2e19 <_sk_load_rgb_u16_be_hsw+0x46>
+ .byte 117,10 // jne 3569 <_sk_load_rgb_u16_be_hsw+0x166>
+ .byte 233,229,254,255,255 // jmpq 3449 <_sk_load_rgb_u16_be_hsw+0x46>
+ .byte 233,224,254,255,255 // jmpq 3449 <_sk_load_rgb_u16_be_hsw+0x46>
.byte 196,193,121,110,92,64,30 // vmovd 0x1e(%r8,%rax,2),%xmm3
.byte 196,65,97,196,92,64,34,2 // vpinsrw $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,20 // jb 2f62 <_sk_load_rgb_u16_be_hsw+0x18f>
+ .byte 114,20 // jb 3592 <_sk_load_rgb_u16_be_hsw+0x18f>
.byte 196,193,121,110,92,64,36 // vmovd 0x24(%r8,%rax,2),%xmm3
.byte 196,193,97,196,92,64,40,2 // vpinsrw $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3
- .byte 233,183,254,255,255 // jmpq 2e19 <_sk_load_rgb_u16_be_hsw+0x46>
- .byte 233,178,254,255,255 // jmpq 2e19 <_sk_load_rgb_u16_be_hsw+0x46>
+ .byte 233,183,254,255,255 // jmpq 3449 <_sk_load_rgb_u16_be_hsw+0x46>
+ .byte 233,178,254,255,255 // jmpq 3449 <_sk_load_rgb_u16_be_hsw+0x46>
HIDDEN _sk_store_u16_be_hsw
.globl _sk_store_u16_be_hsw
@@ -10181,7 +11209,7 @@ _sk_store_u16_be_hsw:
.byte 196,65,17,98,200 // vpunpckldq %xmm8,%xmm13,%xmm9
.byte 196,65,17,106,192 // vpunpckhdq %xmm8,%xmm13,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,31 // jne 3062 <_sk_store_u16_be_hsw+0xfb>
+ .byte 117,31 // jne 3692 <_sk_store_u16_be_hsw+0xfb>
.byte 196,1,120,17,28,72 // vmovups %xmm11,(%r8,%r9,2)
.byte 196,1,120,17,84,72,16 // vmovups %xmm10,0x10(%r8,%r9,2)
.byte 196,1,120,17,76,72,32 // vmovups %xmm9,0x20(%r8,%r9,2)
@@ -10190,22 +11218,22 @@ _sk_store_u16_be_hsw:
.byte 255,224 // jmpq *%rax
.byte 196,1,121,214,28,72 // vmovq %xmm11,(%r8,%r9,2)
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,240 // je 305e <_sk_store_u16_be_hsw+0xf7>
+ .byte 116,240 // je 368e <_sk_store_u16_be_hsw+0xf7>
.byte 196,1,121,23,92,72,8 // vmovhpd %xmm11,0x8(%r8,%r9,2)
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,227 // jb 305e <_sk_store_u16_be_hsw+0xf7>
+ .byte 114,227 // jb 368e <_sk_store_u16_be_hsw+0xf7>
.byte 196,1,121,214,84,72,16 // vmovq %xmm10,0x10(%r8,%r9,2)
- .byte 116,218 // je 305e <_sk_store_u16_be_hsw+0xf7>
+ .byte 116,218 // je 368e <_sk_store_u16_be_hsw+0xf7>
.byte 196,1,121,23,84,72,24 // vmovhpd %xmm10,0x18(%r8,%r9,2)
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,205 // jb 305e <_sk_store_u16_be_hsw+0xf7>
+ .byte 114,205 // jb 368e <_sk_store_u16_be_hsw+0xf7>
.byte 196,1,121,214,76,72,32 // vmovq %xmm9,0x20(%r8,%r9,2)
- .byte 116,196 // je 305e <_sk_store_u16_be_hsw+0xf7>
+ .byte 116,196 // je 368e <_sk_store_u16_be_hsw+0xf7>
.byte 196,1,121,23,76,72,40 // vmovhpd %xmm9,0x28(%r8,%r9,2)
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,183 // jb 305e <_sk_store_u16_be_hsw+0xf7>
+ .byte 114,183 // jb 368e <_sk_store_u16_be_hsw+0xf7>
.byte 196,1,121,214,68,72,48 // vmovq %xmm8,0x30(%r8,%r9,2)
- .byte 235,174 // jmp 305e <_sk_store_u16_be_hsw+0xf7>
+ .byte 235,174 // jmp 368e <_sk_store_u16_be_hsw+0xf7>
HIDDEN _sk_load_f32_hsw
.globl _sk_load_f32_hsw
@@ -10213,10 +11241,10 @@ FUNCTION(_sk_load_f32_hsw)
_sk_load_f32_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 119,110 // ja 3126 <_sk_load_f32_hsw+0x76>
+ .byte 119,110 // ja 3756 <_sk_load_f32_hsw+0x76>
.byte 76,139,0 // mov (%rax),%r8
.byte 76,141,12,189,0,0,0,0 // lea 0x0(,%rdi,4),%r9
- .byte 76,141,21,134,0,0,0 // lea 0x86(%rip),%r10 # 3150 <_sk_load_f32_hsw+0xa0>
+ .byte 76,141,21,134,0,0,0 // lea 0x86(%rip),%r10 # 3780 <_sk_load_f32_hsw+0xa0>
.byte 73,99,4,138 // movslq (%r10,%rcx,4),%rax
.byte 76,1,208 // add %r10,%rax
.byte 255,224 // jmpq *%rax
@@ -10275,7 +11303,7 @@ _sk_store_f32_hsw:
.byte 196,65,37,20,196 // vunpcklpd %ymm12,%ymm11,%ymm8
.byte 196,65,37,21,220 // vunpckhpd %ymm12,%ymm11,%ymm11
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,55 // jne 31dd <_sk_store_f32_hsw+0x6d>
+ .byte 117,55 // jne 380d <_sk_store_f32_hsw+0x6d>
.byte 196,67,45,24,225,1 // vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
.byte 196,67,61,24,235,1 // vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
.byte 196,67,45,6,201,49 // vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
@@ -10288,22 +11316,22 @@ _sk_store_f32_hsw:
.byte 255,224 // jmpq *%rax
.byte 196,65,121,17,20,128 // vmovupd %xmm10,(%r8,%rax,4)
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,240 // je 31d9 <_sk_store_f32_hsw+0x69>
+ .byte 116,240 // je 3809 <_sk_store_f32_hsw+0x69>
.byte 196,65,121,17,76,128,16 // vmovupd %xmm9,0x10(%r8,%rax,4)
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,227 // jb 31d9 <_sk_store_f32_hsw+0x69>
+ .byte 114,227 // jb 3809 <_sk_store_f32_hsw+0x69>
.byte 196,65,121,17,68,128,32 // vmovupd %xmm8,0x20(%r8,%rax,4)
- .byte 116,218 // je 31d9 <_sk_store_f32_hsw+0x69>
+ .byte 116,218 // je 3809 <_sk_store_f32_hsw+0x69>
.byte 196,65,121,17,92,128,48 // vmovupd %xmm11,0x30(%r8,%rax,4)
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,205 // jb 31d9 <_sk_store_f32_hsw+0x69>
+ .byte 114,205 // jb 3809 <_sk_store_f32_hsw+0x69>
.byte 196,67,125,25,84,128,64,1 // vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4)
- .byte 116,195 // je 31d9 <_sk_store_f32_hsw+0x69>
+ .byte 116,195 // je 3809 <_sk_store_f32_hsw+0x69>
.byte 196,67,125,25,76,128,80,1 // vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4)
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,181 // jb 31d9 <_sk_store_f32_hsw+0x69>
+ .byte 114,181 // jb 3809 <_sk_store_f32_hsw+0x69>
.byte 196,67,125,25,68,128,96,1 // vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4)
- .byte 235,171 // jmp 31d9 <_sk_store_f32_hsw+0x69>
+ .byte 235,171 // jmp 3809 <_sk_store_f32_hsw+0x69>
HIDDEN _sk_clamp_x_hsw
.globl _sk_clamp_x_hsw
@@ -10568,7 +11596,7 @@ _sk_linear_gradient_hsw:
.byte 196,98,125,24,72,28 // vbroadcastss 0x1c(%rax),%ymm9
.byte 76,139,0 // mov (%rax),%r8
.byte 77,133,192 // test %r8,%r8
- .byte 15,132,143,0,0,0 // je 3669 <_sk_linear_gradient_hsw+0xb5>
+ .byte 15,132,143,0,0,0 // je 3c99 <_sk_linear_gradient_hsw+0xb5>
.byte 72,139,64,8 // mov 0x8(%rax),%rax
.byte 72,131,192,32 // add $0x20,%rax
.byte 196,65,28,87,228 // vxorps %ymm12,%ymm12,%ymm12
@@ -10595,8 +11623,8 @@ _sk_linear_gradient_hsw:
.byte 196,67,13,74,201,208 // vblendvps %ymm13,%ymm9,%ymm14,%ymm9
.byte 72,131,192,36 // add $0x24,%rax
.byte 73,255,200 // dec %r8
- .byte 117,140 // jne 35f3 <_sk_linear_gradient_hsw+0x3f>
- .byte 235,17 // jmp 367a <_sk_linear_gradient_hsw+0xc6>
+ .byte 117,140 // jne 3c23 <_sk_linear_gradient_hsw+0x3f>
+ .byte 235,17 // jmp 3caa <_sk_linear_gradient_hsw+0xc6>
.byte 197,244,87,201 // vxorps %ymm1,%ymm1,%ymm1
.byte 197,236,87,210 // vxorps %ymm2,%ymm2,%ymm2
.byte 197,228,87,219 // vxorps %ymm3,%ymm3,%ymm3
@@ -13611,6 +14639,418 @@ _sk_table_a_avx:
.byte 65,95 // pop %r15
.byte 255,224 // jmpq *%rax
+HIDDEN _sk_parametric_r_avx
+.globl _sk_parametric_r_avx
+FUNCTION(_sk_parametric_r_avx)
+_sk_parametric_r_avx:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 196,98,125,24,64,16 // vbroadcastss 0x10(%rax),%ymm8
+ .byte 196,65,124,194,192,2 // vcmpleps %ymm8,%ymm0,%ymm8
+ .byte 196,98,125,24,72,12 // vbroadcastss 0xc(%rax),%ymm9
+ .byte 196,98,125,24,80,24 // vbroadcastss 0x18(%rax),%ymm10
+ .byte 197,52,89,200 // vmulps %ymm0,%ymm9,%ymm9
+ .byte 196,65,52,88,202 // vaddps %ymm10,%ymm9,%ymm9
+ .byte 196,98,125,24,80,4 // vbroadcastss 0x4(%rax),%ymm10
+ .byte 196,98,125,24,88,8 // vbroadcastss 0x8(%rax),%ymm11
+ .byte 197,172,89,192 // vmulps %ymm0,%ymm10,%ymm0
+ .byte 196,65,124,88,219 // vaddps %ymm11,%ymm0,%ymm11
+ .byte 196,65,124,91,211 // vcvtdq2ps %ymm11,%ymm10
+ .byte 65,184,0,0,0,52 // mov $0x34000000,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 197,44,89,208 // vmulps %ymm0,%ymm10,%ymm10
+ .byte 65,184,0,0,254,66 // mov $0x42fe0000,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 197,44,92,208 // vsubps %ymm0,%ymm10,%ymm10
+ .byte 65,184,255,255,127,0 // mov $0x7fffff,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 197,249,112,192,0 // vpshufd $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 196,65,124,84,219 // vandps %ymm11,%ymm0,%ymm11
+ .byte 65,184,0,0,0,63 // mov $0x3f000000,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 197,249,112,192,0 // vpshufd $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 197,36,86,216 // vorps %ymm0,%ymm11,%ymm11
+ .byte 65,184,42,145,49,64 // mov $0x4031912a,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 197,44,88,208 // vaddps %ymm0,%ymm10,%ymm10
+ .byte 65,184,117,191,191,63 // mov $0x3fbfbf75,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 197,164,89,192 // vmulps %ymm0,%ymm11,%ymm0
+ .byte 197,44,92,208 // vsubps %ymm0,%ymm10,%ymm10
+ .byte 65,184,163,233,220,63 // mov $0x3fdce9a3,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,99,125,24,224,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm12
+ .byte 65,184,249,68,180,62 // mov $0x3eb444f9,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 197,164,88,192 // vaddps %ymm0,%ymm11,%ymm0
+ .byte 197,156,94,192 // vdivps %ymm0,%ymm12,%ymm0
+ .byte 197,172,92,192 // vsubps %ymm0,%ymm10,%ymm0
+ .byte 196,98,125,24,16 // vbroadcastss (%rax),%ymm10
+ .byte 197,44,89,216 // vmulps %ymm0,%ymm10,%ymm11
+ .byte 196,67,125,8,211,1 // vroundps $0x1,%ymm11,%ymm10
+ .byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,99,125,24,224,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm12
+ .byte 65,184,81,140,242,66 // mov $0x42f28c51,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 196,65,124,88,219 // vaddps %ymm11,%ymm0,%ymm11
+ .byte 65,184,141,188,190,63 // mov $0x3fbebc8d,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 196,193,124,89,194 // vmulps %ymm10,%ymm0,%ymm0
+ .byte 197,36,92,216 // vsubps %ymm0,%ymm11,%ymm11
+ .byte 65,184,254,210,221,65 // mov $0x41ddd2fe,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,99,125,24,232,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm13
+ .byte 65,184,248,245,154,64 // mov $0x409af5f8,%r8d
+ .byte 196,193,121,110,192 // vmovd %r8d,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 196,193,124,92,194 // vsubps %ymm10,%ymm0,%ymm0
+ .byte 197,148,94,192 // vdivps %ymm0,%ymm13,%ymm0
+ .byte 197,164,88,192 // vaddps %ymm0,%ymm11,%ymm0
+ .byte 197,156,89,192 // vmulps %ymm0,%ymm12,%ymm0
+ .byte 197,253,91,192 // vcvtps2dq %ymm0,%ymm0
+ .byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
+ .byte 196,193,124,88,194 // vaddps %ymm10,%ymm0,%ymm0
+ .byte 196,195,125,74,193,128 // vblendvps %ymm8,%ymm9,%ymm0,%ymm0
+ .byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
+ .byte 196,65,124,95,192 // vmaxps %ymm8,%ymm0,%ymm8
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 197,249,110,192 // vmovd %eax,%xmm0
+ .byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 197,188,93,192 // vminps %ymm0,%ymm8,%ymm0
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_parametric_g_avx
+.globl _sk_parametric_g_avx
+FUNCTION(_sk_parametric_g_avx)
+_sk_parametric_g_avx:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 196,98,125,24,64,16 // vbroadcastss 0x10(%rax),%ymm8
+ .byte 196,65,116,194,192,2 // vcmpleps %ymm8,%ymm1,%ymm8
+ .byte 196,98,125,24,72,12 // vbroadcastss 0xc(%rax),%ymm9
+ .byte 196,98,125,24,80,24 // vbroadcastss 0x18(%rax),%ymm10
+ .byte 197,52,89,201 // vmulps %ymm1,%ymm9,%ymm9
+ .byte 196,65,52,88,202 // vaddps %ymm10,%ymm9,%ymm9
+ .byte 196,98,125,24,80,4 // vbroadcastss 0x4(%rax),%ymm10
+ .byte 196,98,125,24,88,8 // vbroadcastss 0x8(%rax),%ymm11
+ .byte 197,172,89,201 // vmulps %ymm1,%ymm10,%ymm1
+ .byte 196,65,116,88,219 // vaddps %ymm11,%ymm1,%ymm11
+ .byte 196,65,124,91,211 // vcvtdq2ps %ymm11,%ymm10
+ .byte 65,184,0,0,0,52 // mov $0x34000000,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ .byte 197,44,89,209 // vmulps %ymm1,%ymm10,%ymm10
+ .byte 65,184,0,0,254,66 // mov $0x42fe0000,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ .byte 197,44,92,209 // vsubps %ymm1,%ymm10,%ymm10
+ .byte 65,184,255,255,127,0 // mov $0x7fffff,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 197,249,112,201,0 // vpshufd $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ .byte 196,65,116,84,219 // vandps %ymm11,%ymm1,%ymm11
+ .byte 65,184,0,0,0,63 // mov $0x3f000000,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 197,249,112,201,0 // vpshufd $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ .byte 197,36,86,217 // vorps %ymm1,%ymm11,%ymm11
+ .byte 65,184,42,145,49,64 // mov $0x4031912a,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ .byte 197,44,88,209 // vaddps %ymm1,%ymm10,%ymm10
+ .byte 65,184,117,191,191,63 // mov $0x3fbfbf75,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ .byte 197,164,89,201 // vmulps %ymm1,%ymm11,%ymm1
+ .byte 197,44,92,209 // vsubps %ymm1,%ymm10,%ymm10
+ .byte 65,184,163,233,220,63 // mov $0x3fdce9a3,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,99,117,24,225,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm12
+ .byte 65,184,249,68,180,62 // mov $0x3eb444f9,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ .byte 197,164,88,201 // vaddps %ymm1,%ymm11,%ymm1
+ .byte 197,156,94,201 // vdivps %ymm1,%ymm12,%ymm1
+ .byte 197,172,92,201 // vsubps %ymm1,%ymm10,%ymm1
+ .byte 196,98,125,24,16 // vbroadcastss (%rax),%ymm10
+ .byte 197,44,89,217 // vmulps %ymm1,%ymm10,%ymm11
+ .byte 196,67,125,8,211,1 // vroundps $0x1,%ymm11,%ymm10
+ .byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,99,117,24,225,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm12
+ .byte 65,184,81,140,242,66 // mov $0x42f28c51,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ .byte 196,65,116,88,219 // vaddps %ymm11,%ymm1,%ymm11
+ .byte 65,184,141,188,190,63 // mov $0x3fbebc8d,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ .byte 196,193,116,89,202 // vmulps %ymm10,%ymm1,%ymm1
+ .byte 197,36,92,217 // vsubps %ymm1,%ymm11,%ymm11
+ .byte 65,184,254,210,221,65 // mov $0x41ddd2fe,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,99,117,24,233,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm13
+ .byte 65,184,248,245,154,64 // mov $0x409af5f8,%r8d
+ .byte 196,193,121,110,200 // vmovd %r8d,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ .byte 196,193,116,92,202 // vsubps %ymm10,%ymm1,%ymm1
+ .byte 197,148,94,201 // vdivps %ymm1,%ymm13,%ymm1
+ .byte 197,164,88,201 // vaddps %ymm1,%ymm11,%ymm1
+ .byte 197,156,89,201 // vmulps %ymm1,%ymm12,%ymm1
+ .byte 197,253,91,201 // vcvtps2dq %ymm1,%ymm1
+ .byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
+ .byte 196,193,116,88,202 // vaddps %ymm10,%ymm1,%ymm1
+ .byte 196,195,117,74,201,128 // vblendvps %ymm8,%ymm9,%ymm1,%ymm1
+ .byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
+ .byte 196,65,116,95,192 // vmaxps %ymm8,%ymm1,%ymm8
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 197,249,110,200 // vmovd %eax,%xmm1
+ .byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
+ .byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ .byte 197,188,93,201 // vminps %ymm1,%ymm8,%ymm1
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_parametric_b_avx
+.globl _sk_parametric_b_avx
+FUNCTION(_sk_parametric_b_avx)
+_sk_parametric_b_avx:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 196,98,125,24,64,16 // vbroadcastss 0x10(%rax),%ymm8
+ .byte 196,65,108,194,192,2 // vcmpleps %ymm8,%ymm2,%ymm8
+ .byte 196,98,125,24,72,12 // vbroadcastss 0xc(%rax),%ymm9
+ .byte 196,98,125,24,80,24 // vbroadcastss 0x18(%rax),%ymm10
+ .byte 197,52,89,202 // vmulps %ymm2,%ymm9,%ymm9
+ .byte 196,65,52,88,202 // vaddps %ymm10,%ymm9,%ymm9
+ .byte 196,98,125,24,80,4 // vbroadcastss 0x4(%rax),%ymm10
+ .byte 196,98,125,24,88,8 // vbroadcastss 0x8(%rax),%ymm11
+ .byte 197,172,89,210 // vmulps %ymm2,%ymm10,%ymm2
+ .byte 196,65,108,88,219 // vaddps %ymm11,%ymm2,%ymm11
+ .byte 196,65,124,91,211 // vcvtdq2ps %ymm11,%ymm10
+ .byte 65,184,0,0,0,52 // mov $0x34000000,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,227,109,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ .byte 197,44,89,210 // vmulps %ymm2,%ymm10,%ymm10
+ .byte 65,184,0,0,254,66 // mov $0x42fe0000,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,227,109,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ .byte 197,44,92,210 // vsubps %ymm2,%ymm10,%ymm10
+ .byte 65,184,255,255,127,0 // mov $0x7fffff,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 197,249,112,210,0 // vpshufd $0x0,%xmm2,%xmm2
+ .byte 196,227,109,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ .byte 196,65,108,84,219 // vandps %ymm11,%ymm2,%ymm11
+ .byte 65,184,0,0,0,63 // mov $0x3f000000,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 197,249,112,210,0 // vpshufd $0x0,%xmm2,%xmm2
+ .byte 196,227,109,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ .byte 197,36,86,218 // vorps %ymm2,%ymm11,%ymm11
+ .byte 65,184,42,145,49,64 // mov $0x4031912a,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,227,109,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ .byte 197,44,88,210 // vaddps %ymm2,%ymm10,%ymm10
+ .byte 65,184,117,191,191,63 // mov $0x3fbfbf75,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,227,109,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ .byte 197,164,89,210 // vmulps %ymm2,%ymm11,%ymm2
+ .byte 197,44,92,210 // vsubps %ymm2,%ymm10,%ymm10
+ .byte 65,184,163,233,220,63 // mov $0x3fdce9a3,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,99,109,24,226,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm12
+ .byte 65,184,249,68,180,62 // mov $0x3eb444f9,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,227,109,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ .byte 197,164,88,210 // vaddps %ymm2,%ymm11,%ymm2
+ .byte 197,156,94,210 // vdivps %ymm2,%ymm12,%ymm2
+ .byte 197,172,92,210 // vsubps %ymm2,%ymm10,%ymm2
+ .byte 196,98,125,24,16 // vbroadcastss (%rax),%ymm10
+ .byte 197,44,89,218 // vmulps %ymm2,%ymm10,%ymm11
+ .byte 196,67,125,8,211,1 // vroundps $0x1,%ymm11,%ymm10
+ .byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,99,109,24,226,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm12
+ .byte 65,184,81,140,242,66 // mov $0x42f28c51,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,227,109,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ .byte 196,65,108,88,219 // vaddps %ymm11,%ymm2,%ymm11
+ .byte 65,184,141,188,190,63 // mov $0x3fbebc8d,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,227,109,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ .byte 196,193,108,89,210 // vmulps %ymm10,%ymm2,%ymm2
+ .byte 197,36,92,218 // vsubps %ymm2,%ymm11,%ymm11
+ .byte 65,184,254,210,221,65 // mov $0x41ddd2fe,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,99,109,24,234,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm13
+ .byte 65,184,248,245,154,64 // mov $0x409af5f8,%r8d
+ .byte 196,193,121,110,208 // vmovd %r8d,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,227,109,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ .byte 196,193,108,92,210 // vsubps %ymm10,%ymm2,%ymm2
+ .byte 197,148,94,210 // vdivps %ymm2,%ymm13,%ymm2
+ .byte 197,164,88,210 // vaddps %ymm2,%ymm11,%ymm2
+ .byte 197,156,89,210 // vmulps %ymm2,%ymm12,%ymm2
+ .byte 197,253,91,210 // vcvtps2dq %ymm2,%ymm2
+ .byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
+ .byte 196,193,108,88,210 // vaddps %ymm10,%ymm2,%ymm2
+ .byte 196,195,109,74,209,128 // vblendvps %ymm8,%ymm9,%ymm2,%ymm2
+ .byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
+ .byte 196,65,108,95,192 // vmaxps %ymm8,%ymm2,%ymm8
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 197,249,110,208 // vmovd %eax,%xmm2
+ .byte 196,227,121,4,210,0 // vpermilps $0x0,%xmm2,%xmm2
+ .byte 196,227,109,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ .byte 197,188,93,210 // vminps %ymm2,%ymm8,%ymm2
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_parametric_a_avx
+.globl _sk_parametric_a_avx
+FUNCTION(_sk_parametric_a_avx)
+_sk_parametric_a_avx:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 196,98,125,24,64,16 // vbroadcastss 0x10(%rax),%ymm8
+ .byte 196,65,100,194,192,2 // vcmpleps %ymm8,%ymm3,%ymm8
+ .byte 196,98,125,24,72,12 // vbroadcastss 0xc(%rax),%ymm9
+ .byte 196,98,125,24,80,24 // vbroadcastss 0x18(%rax),%ymm10
+ .byte 197,52,89,203 // vmulps %ymm3,%ymm9,%ymm9
+ .byte 196,65,52,88,202 // vaddps %ymm10,%ymm9,%ymm9
+ .byte 196,98,125,24,80,4 // vbroadcastss 0x4(%rax),%ymm10
+ .byte 196,98,125,24,88,8 // vbroadcastss 0x8(%rax),%ymm11
+ .byte 197,172,89,219 // vmulps %ymm3,%ymm10,%ymm3
+ .byte 196,65,100,88,219 // vaddps %ymm11,%ymm3,%ymm11
+ .byte 196,65,124,91,211 // vcvtdq2ps %ymm11,%ymm10
+ .byte 65,184,0,0,0,52 // mov $0x34000000,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,227,121,4,219,0 // vpermilps $0x0,%xmm3,%xmm3
+ .byte 196,227,101,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ .byte 197,44,89,211 // vmulps %ymm3,%ymm10,%ymm10
+ .byte 65,184,0,0,254,66 // mov $0x42fe0000,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,227,121,4,219,0 // vpermilps $0x0,%xmm3,%xmm3
+ .byte 196,227,101,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ .byte 197,44,92,211 // vsubps %ymm3,%ymm10,%ymm10
+ .byte 65,184,255,255,127,0 // mov $0x7fffff,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 197,249,112,219,0 // vpshufd $0x0,%xmm3,%xmm3
+ .byte 196,227,101,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ .byte 196,65,100,84,219 // vandps %ymm11,%ymm3,%ymm11
+ .byte 65,184,0,0,0,63 // mov $0x3f000000,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 197,249,112,219,0 // vpshufd $0x0,%xmm3,%xmm3
+ .byte 196,227,101,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ .byte 197,36,86,219 // vorps %ymm3,%ymm11,%ymm11
+ .byte 65,184,42,145,49,64 // mov $0x4031912a,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,227,121,4,219,0 // vpermilps $0x0,%xmm3,%xmm3
+ .byte 196,227,101,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ .byte 197,44,88,211 // vaddps %ymm3,%ymm10,%ymm10
+ .byte 65,184,117,191,191,63 // mov $0x3fbfbf75,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,227,121,4,219,0 // vpermilps $0x0,%xmm3,%xmm3
+ .byte 196,227,101,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ .byte 197,164,89,219 // vmulps %ymm3,%ymm11,%ymm3
+ .byte 197,44,92,211 // vsubps %ymm3,%ymm10,%ymm10
+ .byte 65,184,163,233,220,63 // mov $0x3fdce9a3,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,227,121,4,219,0 // vpermilps $0x0,%xmm3,%xmm3
+ .byte 196,99,101,24,227,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm12
+ .byte 65,184,249,68,180,62 // mov $0x3eb444f9,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,227,121,4,219,0 // vpermilps $0x0,%xmm3,%xmm3
+ .byte 196,227,101,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ .byte 197,164,88,219 // vaddps %ymm3,%ymm11,%ymm3
+ .byte 197,156,94,219 // vdivps %ymm3,%ymm12,%ymm3
+ .byte 197,172,92,219 // vsubps %ymm3,%ymm10,%ymm3
+ .byte 196,98,125,24,16 // vbroadcastss (%rax),%ymm10
+ .byte 197,44,89,219 // vmulps %ymm3,%ymm10,%ymm11
+ .byte 196,67,125,8,211,1 // vroundps $0x1,%ymm11,%ymm10
+ .byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,227,121,4,219,0 // vpermilps $0x0,%xmm3,%xmm3
+ .byte 196,99,101,24,227,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm12
+ .byte 65,184,81,140,242,66 // mov $0x42f28c51,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,227,121,4,219,0 // vpermilps $0x0,%xmm3,%xmm3
+ .byte 196,227,101,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ .byte 196,65,100,88,219 // vaddps %ymm11,%ymm3,%ymm11
+ .byte 65,184,141,188,190,63 // mov $0x3fbebc8d,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,227,121,4,219,0 // vpermilps $0x0,%xmm3,%xmm3
+ .byte 196,227,101,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ .byte 196,193,100,89,218 // vmulps %ymm10,%ymm3,%ymm3
+ .byte 197,36,92,219 // vsubps %ymm3,%ymm11,%ymm11
+ .byte 65,184,254,210,221,65 // mov $0x41ddd2fe,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,227,121,4,219,0 // vpermilps $0x0,%xmm3,%xmm3
+ .byte 196,99,101,24,235,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm13
+ .byte 65,184,248,245,154,64 // mov $0x409af5f8,%r8d
+ .byte 196,193,121,110,216 // vmovd %r8d,%xmm3
+ .byte 196,227,121,4,219,0 // vpermilps $0x0,%xmm3,%xmm3
+ .byte 196,227,101,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ .byte 196,193,100,92,218 // vsubps %ymm10,%ymm3,%ymm3
+ .byte 197,148,94,219 // vdivps %ymm3,%ymm13,%ymm3
+ .byte 197,164,88,219 // vaddps %ymm3,%ymm11,%ymm3
+ .byte 197,156,89,219 // vmulps %ymm3,%ymm12,%ymm3
+ .byte 197,253,91,219 // vcvtps2dq %ymm3,%ymm3
+ .byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
+ .byte 196,193,100,88,218 // vaddps %ymm10,%ymm3,%ymm3
+ .byte 196,195,101,74,217,128 // vblendvps %ymm8,%ymm9,%ymm3,%ymm3
+ .byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
+ .byte 196,65,100,95,192 // vmaxps %ymm8,%ymm3,%ymm8
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 197,249,110,216 // vmovd %eax,%xmm3
+ .byte 196,227,121,4,219,0 // vpermilps $0x0,%xmm3,%xmm3
+ .byte 196,227,101,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ .byte 197,188,93,219 // vminps %ymm3,%ymm8,%ymm3
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
HIDDEN _sk_load_a8_avx
.globl _sk_load_a8_avx
FUNCTION(_sk_load_a8_avx)
@@ -13620,7 +15060,7 @@ _sk_load_a8_avx:
.byte 72,139,0 // mov (%rax),%rax
.byte 72,1,248 // add %rdi,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 117,74 // jne 2912 <_sk_load_a8_avx+0x5a>
+ .byte 117,74 // jne 310a <_sk_load_a8_avx+0x5a>
.byte 197,250,126,0 // vmovq (%rax),%xmm0
.byte 196,226,121,49,200 // vpmovzxbd %xmm0,%xmm1
.byte 196,227,121,4,192,229 // vpermilps $0xe5,%xmm0,%xmm0
@@ -13647,9 +15087,9 @@ _sk_load_a8_avx:
.byte 77,9,217 // or %r11,%r9
.byte 72,131,193,8 // add $0x8,%rcx
.byte 73,255,202 // dec %r10
- .byte 117,234 // jne 291a <_sk_load_a8_avx+0x62>
+ .byte 117,234 // jne 3112 <_sk_load_a8_avx+0x62>
.byte 196,193,249,110,193 // vmovq %r9,%xmm0
- .byte 235,149 // jmp 28cc <_sk_load_a8_avx+0x14>
+ .byte 235,149 // jmp 30c4 <_sk_load_a8_avx+0x14>
HIDDEN _sk_gather_a8_avx
.globl _sk_gather_a8_avx
@@ -13730,7 +15170,7 @@ _sk_store_a8_avx:
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 196,65,57,103,192 // vpackuswb %xmm8,%xmm8,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 2a73 <_sk_store_a8_avx+0x42>
+ .byte 117,10 // jne 326b <_sk_store_a8_avx+0x42>
.byte 196,65,123,17,4,57 // vmovsd %xmm8,(%r9,%rdi,1)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -13738,10 +15178,10 @@ _sk_store_a8_avx:
.byte 65,128,224,7 // and $0x7,%r8b
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 2a6f <_sk_store_a8_avx+0x3e>
+ .byte 119,236 // ja 3267 <_sk_store_a8_avx+0x3e>
.byte 196,66,121,48,192 // vpmovzxbw %xmm8,%xmm8
.byte 65,15,182,192 // movzbl %r8b,%eax
- .byte 76,141,5,69,0,0,0 // lea 0x45(%rip),%r8 # 2ad8 <_sk_store_a8_avx+0xa7>
+ .byte 76,141,5,69,0,0,0 // lea 0x45(%rip),%r8 # 32d0 <_sk_store_a8_avx+0xa7>
.byte 73,99,4,128 // movslq (%r8,%rax,4),%rax
.byte 76,1,192 // add %r8,%rax
.byte 255,224 // jmpq *%rax
@@ -13752,7 +15192,7 @@ _sk_store_a8_avx:
.byte 196,67,121,20,68,57,2,4 // vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1)
.byte 196,67,121,20,68,57,1,2 // vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1)
.byte 196,67,121,20,4,57,0 // vpextrb $0x0,%xmm8,(%r9,%rdi,1)
- .byte 235,154 // jmp 2a6f <_sk_store_a8_avx+0x3e>
+ .byte 235,154 // jmp 3267 <_sk_store_a8_avx+0x3e>
.byte 15,31,0 // nopl (%rax)
.byte 244 // hlt
.byte 255 // (bad)
@@ -13787,7 +15227,7 @@ _sk_load_g8_avx:
.byte 72,139,0 // mov (%rax),%rax
.byte 72,1,248 // add %rdi,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 117,91 // jne 2b5f <_sk_load_g8_avx+0x6b>
+ .byte 117,91 // jne 3357 <_sk_load_g8_avx+0x6b>
.byte 197,250,126,0 // vmovq (%rax),%xmm0
.byte 196,226,121,49,200 // vpmovzxbd %xmm0,%xmm1
.byte 196,227,121,4,192,229 // vpermilps $0xe5,%xmm0,%xmm0
@@ -13817,9 +15257,9 @@ _sk_load_g8_avx:
.byte 77,9,217 // or %r11,%r9
.byte 72,131,193,8 // add $0x8,%rcx
.byte 73,255,202 // dec %r10
- .byte 117,234 // jne 2b67 <_sk_load_g8_avx+0x73>
+ .byte 117,234 // jne 335f <_sk_load_g8_avx+0x73>
.byte 196,193,249,110,193 // vmovq %r9,%xmm0
- .byte 235,132 // jmp 2b08 <_sk_load_g8_avx+0x14>
+ .byte 235,132 // jmp 3300 <_sk_load_g8_avx+0x14>
HIDDEN _sk_gather_g8_avx
.globl _sk_gather_g8_avx
@@ -13894,9 +15334,9 @@ _sk_gather_i8_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 73,137,192 // mov %rax,%r8
.byte 77,133,192 // test %r8,%r8
- .byte 116,5 // je 2c9e <_sk_gather_i8_avx+0xf>
+ .byte 116,5 // je 3496 <_sk_gather_i8_avx+0xf>
.byte 76,137,192 // mov %r8,%rax
- .byte 235,2 // jmp 2ca0 <_sk_gather_i8_avx+0x11>
+ .byte 235,2 // jmp 3498 <_sk_gather_i8_avx+0x11>
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 65,87 // push %r15
.byte 65,86 // push %r14
@@ -14001,7 +15441,7 @@ _sk_load_565_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,209,0,0,0 // jne 2f3a <_sk_load_565_avx+0xdf>
+ .byte 15,133,209,0,0,0 // jne 3732 <_sk_load_565_avx+0xdf>
.byte 196,193,122,111,4,122 // vmovdqu (%r10,%rdi,2),%xmm0
.byte 197,241,239,201 // vpxor %xmm1,%xmm1,%xmm1
.byte 197,249,105,201 // vpunpckhwd %xmm1,%xmm0,%xmm1
@@ -14051,9 +15491,9 @@ _sk_load_565_avx:
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 15,135,29,255,255,255 // ja 2e6f <_sk_load_565_avx+0x14>
+ .byte 15,135,29,255,255,255 // ja 3667 <_sk_load_565_avx+0x14>
.byte 69,15,182,192 // movzbl %r8b,%r8d
- .byte 76,141,13,75,0,0,0 // lea 0x4b(%rip),%r9 # 2fa8 <_sk_load_565_avx+0x14d>
+ .byte 76,141,13,75,0,0,0 // lea 0x4b(%rip),%r9 # 37a0 <_sk_load_565_avx+0x14d>
.byte 75,99,4,129 // movslq (%r9,%r8,4),%rax
.byte 76,1,200 // add %r9,%rax
.byte 255,224 // jmpq *%rax
@@ -14065,7 +15505,7 @@ _sk_load_565_avx:
.byte 196,193,121,196,68,122,4,2 // vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,68,122,2,1 // vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,4,122,0 // vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- .byte 233,201,254,255,255 // jmpq 2e6f <_sk_load_565_avx+0x14>
+ .byte 233,201,254,255,255 // jmpq 3667 <_sk_load_565_avx+0x14>
.byte 102,144 // xchg %ax,%ax
.byte 242,255 // repnz (bad)
.byte 255 // (bad)
@@ -14222,7 +15662,7 @@ _sk_store_565_avx:
.byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 31f3 <_sk_store_565_avx+0x9e>
+ .byte 117,10 // jne 39eb <_sk_store_565_avx+0x9e>
.byte 196,65,122,127,4,121 // vmovdqu %xmm8,(%r9,%rdi,2)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -14230,9 +15670,9 @@ _sk_store_565_avx:
.byte 65,128,224,7 // and $0x7,%r8b
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 31ef <_sk_store_565_avx+0x9a>
+ .byte 119,236 // ja 39e7 <_sk_store_565_avx+0x9a>
.byte 65,15,182,192 // movzbl %r8b,%eax
- .byte 76,141,5,66,0,0,0 // lea 0x42(%rip),%r8 # 3250 <_sk_store_565_avx+0xfb>
+ .byte 76,141,5,66,0,0,0 // lea 0x42(%rip),%r8 # 3a48 <_sk_store_565_avx+0xfb>
.byte 73,99,4,128 // movslq (%r8,%rax,4),%rax
.byte 76,1,192 // add %r8,%rax
.byte 255,224 // jmpq *%rax
@@ -14243,7 +15683,7 @@ _sk_store_565_avx:
.byte 196,67,121,21,68,121,4,2 // vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
.byte 196,67,121,21,68,121,2,1 // vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
.byte 196,67,121,21,4,121,0 // vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- .byte 235,159 // jmp 31ef <_sk_store_565_avx+0x9a>
+ .byte 235,159 // jmp 39e7 <_sk_store_565_avx+0x9a>
.byte 247,255 // idiv %edi
.byte 255 // (bad)
.byte 255 // (bad)
@@ -14274,7 +15714,7 @@ _sk_load_4444_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,245,0,0,0 // jne 336f <_sk_load_4444_avx+0x103>
+ .byte 15,133,245,0,0,0 // jne 3b67 <_sk_load_4444_avx+0x103>
.byte 196,193,122,111,4,122 // vmovdqu (%r10,%rdi,2),%xmm0
.byte 197,241,239,201 // vpxor %xmm1,%xmm1,%xmm1
.byte 197,249,105,201 // vpunpckhwd %xmm1,%xmm0,%xmm1
@@ -14331,9 +15771,9 @@ _sk_load_4444_avx:
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 15,135,249,254,255,255 // ja 3280 <_sk_load_4444_avx+0x14>
+ .byte 15,135,249,254,255,255 // ja 3a78 <_sk_load_4444_avx+0x14>
.byte 69,15,182,192 // movzbl %r8b,%r8d
- .byte 76,141,13,74,0,0,0 // lea 0x4a(%rip),%r9 # 33dc <_sk_load_4444_avx+0x170>
+ .byte 76,141,13,74,0,0,0 // lea 0x4a(%rip),%r9 # 3bd4 <_sk_load_4444_avx+0x170>
.byte 75,99,4,129 // movslq (%r9,%r8,4),%rax
.byte 76,1,200 // add %r9,%rax
.byte 255,224 // jmpq *%rax
@@ -14345,12 +15785,12 @@ _sk_load_4444_avx:
.byte 196,193,121,196,68,122,4,2 // vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,68,122,2,1 // vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,4,122,0 // vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- .byte 233,165,254,255,255 // jmpq 3280 <_sk_load_4444_avx+0x14>
+ .byte 233,165,254,255,255 // jmpq 3a78 <_sk_load_4444_avx+0x14>
.byte 144 // nop
.byte 243,255 // repz (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 235,255 // jmp 33e1 <_sk_load_4444_avx+0x175>
+ .byte 235,255 // jmp 3bd9 <_sk_load_4444_avx+0x175>
.byte 255 // (bad)
.byte 255,227 // jmpq *%rbx
.byte 255 // (bad)
@@ -14511,7 +15951,7 @@ _sk_store_4444_avx:
.byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 365c <_sk_store_4444_avx+0xaf>
+ .byte 117,10 // jne 3e54 <_sk_store_4444_avx+0xaf>
.byte 196,65,122,127,4,121 // vmovdqu %xmm8,(%r9,%rdi,2)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -14519,9 +15959,9 @@ _sk_store_4444_avx:
.byte 65,128,224,7 // and $0x7,%r8b
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 3658 <_sk_store_4444_avx+0xab>
+ .byte 119,236 // ja 3e50 <_sk_store_4444_avx+0xab>
.byte 65,15,182,192 // movzbl %r8b,%eax
- .byte 76,141,5,69,0,0,0 // lea 0x45(%rip),%r8 # 36bc <_sk_store_4444_avx+0x10f>
+ .byte 76,141,5,69,0,0,0 // lea 0x45(%rip),%r8 # 3eb4 <_sk_store_4444_avx+0x10f>
.byte 73,99,4,128 // movslq (%r8,%rax,4),%rax
.byte 76,1,192 // add %r8,%rax
.byte 255,224 // jmpq *%rax
@@ -14532,7 +15972,7 @@ _sk_store_4444_avx:
.byte 196,67,121,21,68,121,4,2 // vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
.byte 196,67,121,21,68,121,2,1 // vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
.byte 196,67,121,21,4,121,0 // vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- .byte 235,159 // jmp 3658 <_sk_store_4444_avx+0xab>
+ .byte 235,159 // jmp 3e50 <_sk_store_4444_avx+0xab>
.byte 15,31,0 // nopl (%rax)
.byte 244 // hlt
.byte 255 // (bad)
@@ -14565,7 +16005,7 @@ _sk_load_8888_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,157,0,0,0 // jne 3783 <_sk_load_8888_avx+0xab>
+ .byte 15,133,157,0,0,0 // jne 3f7b <_sk_load_8888_avx+0xab>
.byte 196,65,124,16,12,186 // vmovups (%r10,%rdi,4),%ymm9
.byte 184,255,0,0,0 // mov $0xff,%eax
.byte 197,249,110,192 // vmovd %eax,%xmm0
@@ -14603,9 +16043,9 @@ _sk_load_8888_avx:
.byte 196,65,52,87,201 // vxorps %ymm9,%ymm9,%ymm9
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 15,135,80,255,255,255 // ja 36ec <_sk_load_8888_avx+0x14>
+ .byte 15,135,80,255,255,255 // ja 3ee4 <_sk_load_8888_avx+0x14>
.byte 69,15,182,192 // movzbl %r8b,%r8d
- .byte 76,141,13,137,0,0,0 // lea 0x89(%rip),%r9 # 3830 <_sk_load_8888_avx+0x158>
+ .byte 76,141,13,137,0,0,0 // lea 0x89(%rip),%r9 # 4028 <_sk_load_8888_avx+0x158>
.byte 75,99,4,129 // movslq (%r9,%r8,4),%rax
.byte 76,1,200 // add %r9,%rax
.byte 255,224 // jmpq *%rax
@@ -14628,7 +16068,7 @@ _sk_load_8888_avx:
.byte 196,99,53,12,200,15 // vblendps $0xf,%ymm0,%ymm9,%ymm9
.byte 196,195,49,34,4,186,0 // vpinsrd $0x0,(%r10,%rdi,4),%xmm9,%xmm0
.byte 196,99,53,12,200,15 // vblendps $0xf,%ymm0,%ymm9,%ymm9
- .byte 233,188,254,255,255 // jmpq 36ec <_sk_load_8888_avx+0x14>
+ .byte 233,188,254,255,255 // jmpq 3ee4 <_sk_load_8888_avx+0x14>
.byte 238 // out %al,(%dx)
.byte 255 // (bad)
.byte 255 // (bad)
@@ -14758,7 +16198,7 @@ _sk_store_8888_avx:
.byte 196,65,45,86,192 // vorpd %ymm8,%ymm10,%ymm8
.byte 196,65,53,86,192 // vorpd %ymm8,%ymm9,%ymm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 3a31 <_sk_store_8888_avx+0xa4>
+ .byte 117,10 // jne 4229 <_sk_store_8888_avx+0xa4>
.byte 196,65,124,17,4,185 // vmovups %ymm8,(%r9,%rdi,4)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -14766,9 +16206,9 @@ _sk_store_8888_avx:
.byte 65,128,224,7 // and $0x7,%r8b
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 3a2d <_sk_store_8888_avx+0xa0>
+ .byte 119,236 // ja 4225 <_sk_store_8888_avx+0xa0>
.byte 65,15,182,192 // movzbl %r8b,%eax
- .byte 76,141,5,84,0,0,0 // lea 0x54(%rip),%r8 # 3aa0 <_sk_store_8888_avx+0x113>
+ .byte 76,141,5,84,0,0,0 // lea 0x54(%rip),%r8 # 4298 <_sk_store_8888_avx+0x113>
.byte 73,99,4,128 // movslq (%r8,%rax,4),%rax
.byte 76,1,192 // add %r8,%rax
.byte 255,224 // jmpq *%rax
@@ -14782,7 +16222,7 @@ _sk_store_8888_avx:
.byte 196,67,121,22,68,185,8,2 // vpextrd $0x2,%xmm8,0x8(%r9,%rdi,4)
.byte 196,67,121,22,68,185,4,1 // vpextrd $0x1,%xmm8,0x4(%r9,%rdi,4)
.byte 196,65,121,126,4,185 // vmovd %xmm8,(%r9,%rdi,4)
- .byte 235,143 // jmp 3a2d <_sk_store_8888_avx+0xa0>
+ .byte 235,143 // jmp 4225 <_sk_store_8888_avx+0xa0>
.byte 102,144 // xchg %ax,%ax
.byte 246,255 // idiv %bh
.byte 255 // (bad)
@@ -14814,7 +16254,7 @@ _sk_load_f16_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 72,139,0 // mov (%rax),%rax
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,17,1,0,0 // jne 3bdb <_sk_load_f16_avx+0x11f>
+ .byte 15,133,17,1,0,0 // jne 43d3 <_sk_load_f16_avx+0x11f>
.byte 197,121,16,4,248 // vmovupd (%rax,%rdi,8),%xmm8
.byte 197,249,16,84,248,16 // vmovupd 0x10(%rax,%rdi,8),%xmm2
.byte 197,249,16,92,248,32 // vmovupd 0x20(%rax,%rdi,8),%xmm3
@@ -14876,29 +16316,29 @@ _sk_load_f16_avx:
.byte 197,123,16,4,248 // vmovsd (%rax,%rdi,8),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,79 // je 3c3a <_sk_load_f16_avx+0x17e>
+ .byte 116,79 // je 4432 <_sk_load_f16_avx+0x17e>
.byte 197,57,22,68,248,8 // vmovhpd 0x8(%rax,%rdi,8),%xmm8,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,67 // jb 3c3a <_sk_load_f16_avx+0x17e>
+ .byte 114,67 // jb 4432 <_sk_load_f16_avx+0x17e>
.byte 197,251,16,84,248,16 // vmovsd 0x10(%rax,%rdi,8),%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 116,68 // je 3c47 <_sk_load_f16_avx+0x18b>
+ .byte 116,68 // je 443f <_sk_load_f16_avx+0x18b>
.byte 197,233,22,84,248,24 // vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,56 // jb 3c47 <_sk_load_f16_avx+0x18b>
+ .byte 114,56 // jb 443f <_sk_load_f16_avx+0x18b>
.byte 197,251,16,92,248,32 // vmovsd 0x20(%rax,%rdi,8),%xmm3
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 15,132,194,254,255,255 // je 3ae1 <_sk_load_f16_avx+0x25>
+ .byte 15,132,194,254,255,255 // je 42d9 <_sk_load_f16_avx+0x25>
.byte 197,225,22,92,248,40 // vmovhpd 0x28(%rax,%rdi,8),%xmm3,%xmm3
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 15,130,178,254,255,255 // jb 3ae1 <_sk_load_f16_avx+0x25>
+ .byte 15,130,178,254,255,255 // jb 42d9 <_sk_load_f16_avx+0x25>
.byte 197,122,126,76,248,48 // vmovq 0x30(%rax,%rdi,8),%xmm9
- .byte 233,167,254,255,255 // jmpq 3ae1 <_sk_load_f16_avx+0x25>
+ .byte 233,167,254,255,255 // jmpq 42d9 <_sk_load_f16_avx+0x25>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
.byte 197,233,87,210 // vxorpd %xmm2,%xmm2,%xmm2
- .byte 233,154,254,255,255 // jmpq 3ae1 <_sk_load_f16_avx+0x25>
+ .byte 233,154,254,255,255 // jmpq 42d9 <_sk_load_f16_avx+0x25>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
- .byte 233,145,254,255,255 // jmpq 3ae1 <_sk_load_f16_avx+0x25>
+ .byte 233,145,254,255,255 // jmpq 42d9 <_sk_load_f16_avx+0x25>
HIDDEN _sk_gather_f16_avx
.globl _sk_gather_f16_avx
@@ -15042,7 +16482,7 @@ _sk_store_f16_avx:
.byte 196,65,17,98,200 // vpunpckldq %xmm8,%xmm13,%xmm9
.byte 196,65,17,106,192 // vpunpckhdq %xmm8,%xmm13,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,31 // jne 3ed0 <_sk_store_f16_avx+0xd2>
+ .byte 117,31 // jne 46c8 <_sk_store_f16_avx+0xd2>
.byte 196,65,120,17,28,248 // vmovups %xmm11,(%r8,%rdi,8)
.byte 196,65,120,17,84,248,16 // vmovups %xmm10,0x10(%r8,%rdi,8)
.byte 196,65,120,17,76,248,32 // vmovups %xmm9,0x20(%r8,%rdi,8)
@@ -15051,22 +16491,22 @@ _sk_store_f16_avx:
.byte 255,224 // jmpq *%rax
.byte 196,65,121,214,28,248 // vmovq %xmm11,(%r8,%rdi,8)
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,240 // je 3ecc <_sk_store_f16_avx+0xce>
+ .byte 116,240 // je 46c4 <_sk_store_f16_avx+0xce>
.byte 196,65,121,23,92,248,8 // vmovhpd %xmm11,0x8(%r8,%rdi,8)
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,227 // jb 3ecc <_sk_store_f16_avx+0xce>
+ .byte 114,227 // jb 46c4 <_sk_store_f16_avx+0xce>
.byte 196,65,121,214,84,248,16 // vmovq %xmm10,0x10(%r8,%rdi,8)
- .byte 116,218 // je 3ecc <_sk_store_f16_avx+0xce>
+ .byte 116,218 // je 46c4 <_sk_store_f16_avx+0xce>
.byte 196,65,121,23,84,248,24 // vmovhpd %xmm10,0x18(%r8,%rdi,8)
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,205 // jb 3ecc <_sk_store_f16_avx+0xce>
+ .byte 114,205 // jb 46c4 <_sk_store_f16_avx+0xce>
.byte 196,65,121,214,76,248,32 // vmovq %xmm9,0x20(%r8,%rdi,8)
- .byte 116,196 // je 3ecc <_sk_store_f16_avx+0xce>
+ .byte 116,196 // je 46c4 <_sk_store_f16_avx+0xce>
.byte 196,65,121,23,76,248,40 // vmovhpd %xmm9,0x28(%r8,%rdi,8)
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,183 // jb 3ecc <_sk_store_f16_avx+0xce>
+ .byte 114,183 // jb 46c4 <_sk_store_f16_avx+0xce>
.byte 196,65,121,214,68,248,48 // vmovq %xmm8,0x30(%r8,%rdi,8)
- .byte 235,174 // jmp 3ecc <_sk_store_f16_avx+0xce>
+ .byte 235,174 // jmp 46c4 <_sk_store_f16_avx+0xce>
HIDDEN _sk_load_u16_be_avx
.globl _sk_load_u16_be_avx
@@ -15076,7 +16516,7 @@ _sk_load_u16_be_avx:
.byte 76,139,0 // mov (%rax),%r8
.byte 72,141,4,189,0,0,0,0 // lea 0x0(,%rdi,4),%rax
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,5,1,0,0 // jne 4039 <_sk_load_u16_be_avx+0x11b>
+ .byte 15,133,5,1,0,0 // jne 4831 <_sk_load_u16_be_avx+0x11b>
.byte 196,65,121,16,4,64 // vmovupd (%r8,%rax,2),%xmm8
.byte 196,193,121,16,84,64,16 // vmovupd 0x10(%r8,%rax,2),%xmm2
.byte 196,193,121,16,92,64,32 // vmovupd 0x20(%r8,%rax,2),%xmm3
@@ -15135,29 +16575,29 @@ _sk_load_u16_be_avx:
.byte 196,65,123,16,4,64 // vmovsd (%r8,%rax,2),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,85 // je 409f <_sk_load_u16_be_avx+0x181>
+ .byte 116,85 // je 4897 <_sk_load_u16_be_avx+0x181>
.byte 196,65,57,22,68,64,8 // vmovhpd 0x8(%r8,%rax,2),%xmm8,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,72 // jb 409f <_sk_load_u16_be_avx+0x181>
+ .byte 114,72 // jb 4897 <_sk_load_u16_be_avx+0x181>
.byte 196,193,123,16,84,64,16 // vmovsd 0x10(%r8,%rax,2),%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 116,72 // je 40ac <_sk_load_u16_be_avx+0x18e>
+ .byte 116,72 // je 48a4 <_sk_load_u16_be_avx+0x18e>
.byte 196,193,105,22,84,64,24 // vmovhpd 0x18(%r8,%rax,2),%xmm2,%xmm2
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,59 // jb 40ac <_sk_load_u16_be_avx+0x18e>
+ .byte 114,59 // jb 48a4 <_sk_load_u16_be_avx+0x18e>
.byte 196,193,123,16,92,64,32 // vmovsd 0x20(%r8,%rax,2),%xmm3
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 15,132,205,254,255,255 // je 3f4f <_sk_load_u16_be_avx+0x31>
+ .byte 15,132,205,254,255,255 // je 4747 <_sk_load_u16_be_avx+0x31>
.byte 196,193,97,22,92,64,40 // vmovhpd 0x28(%r8,%rax,2),%xmm3,%xmm3
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 15,130,188,254,255,255 // jb 3f4f <_sk_load_u16_be_avx+0x31>
+ .byte 15,130,188,254,255,255 // jb 4747 <_sk_load_u16_be_avx+0x31>
.byte 196,65,122,126,76,64,48 // vmovq 0x30(%r8,%rax,2),%xmm9
- .byte 233,176,254,255,255 // jmpq 3f4f <_sk_load_u16_be_avx+0x31>
+ .byte 233,176,254,255,255 // jmpq 4747 <_sk_load_u16_be_avx+0x31>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
.byte 197,233,87,210 // vxorpd %xmm2,%xmm2,%xmm2
- .byte 233,163,254,255,255 // jmpq 3f4f <_sk_load_u16_be_avx+0x31>
+ .byte 233,163,254,255,255 // jmpq 4747 <_sk_load_u16_be_avx+0x31>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
- .byte 233,154,254,255,255 // jmpq 3f4f <_sk_load_u16_be_avx+0x31>
+ .byte 233,154,254,255,255 // jmpq 4747 <_sk_load_u16_be_avx+0x31>
HIDDEN _sk_load_rgb_u16_be_avx
.globl _sk_load_rgb_u16_be_avx
@@ -15167,7 +16607,7 @@ _sk_load_rgb_u16_be_avx:
.byte 76,139,0 // mov (%rax),%r8
.byte 72,141,4,127 // lea (%rdi,%rdi,2),%rax
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,8,1,0,0 // jne 41cf <_sk_load_rgb_u16_be_avx+0x11a>
+ .byte 15,133,8,1,0,0 // jne 49c7 <_sk_load_rgb_u16_be_avx+0x11a>
.byte 196,193,122,111,4,64 // vmovdqu (%r8,%rax,2),%xmm0
.byte 196,193,122,111,84,64,12 // vmovdqu 0xc(%r8,%rax,2),%xmm2
.byte 196,193,122,111,76,64,24 // vmovdqu 0x18(%r8,%rax,2),%xmm1
@@ -15226,36 +16666,36 @@ _sk_load_rgb_u16_be_avx:
.byte 196,193,121,110,4,64 // vmovd (%r8,%rax,2),%xmm0
.byte 196,193,121,196,68,64,4,2 // vpinsrw $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 117,5 // jne 41e8 <_sk_load_rgb_u16_be_avx+0x133>
- .byte 233,19,255,255,255 // jmpq 40fb <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 117,5 // jne 49e0 <_sk_load_rgb_u16_be_avx+0x133>
+ .byte 233,19,255,255,255 // jmpq 48f3 <_sk_load_rgb_u16_be_avx+0x46>
.byte 196,193,121,110,76,64,6 // vmovd 0x6(%r8,%rax,2),%xmm1
.byte 196,65,113,196,68,64,10,2 // vpinsrw $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,26 // jb 4217 <_sk_load_rgb_u16_be_avx+0x162>
+ .byte 114,26 // jb 4a0f <_sk_load_rgb_u16_be_avx+0x162>
.byte 196,193,121,110,76,64,12 // vmovd 0xc(%r8,%rax,2),%xmm1
.byte 196,193,113,196,84,64,16,2 // vpinsrw $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 117,10 // jne 421c <_sk_load_rgb_u16_be_avx+0x167>
- .byte 233,228,254,255,255 // jmpq 40fb <_sk_load_rgb_u16_be_avx+0x46>
- .byte 233,223,254,255,255 // jmpq 40fb <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 117,10 // jne 4a14 <_sk_load_rgb_u16_be_avx+0x167>
+ .byte 233,228,254,255,255 // jmpq 48f3 <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 233,223,254,255,255 // jmpq 48f3 <_sk_load_rgb_u16_be_avx+0x46>
.byte 196,193,121,110,76,64,18 // vmovd 0x12(%r8,%rax,2),%xmm1
.byte 196,65,113,196,76,64,22,2 // vpinsrw $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,26 // jb 424b <_sk_load_rgb_u16_be_avx+0x196>
+ .byte 114,26 // jb 4a43 <_sk_load_rgb_u16_be_avx+0x196>
.byte 196,193,121,110,76,64,24 // vmovd 0x18(%r8,%rax,2),%xmm1
.byte 196,193,113,196,76,64,28,2 // vpinsrw $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 117,10 // jne 4250 <_sk_load_rgb_u16_be_avx+0x19b>
- .byte 233,176,254,255,255 // jmpq 40fb <_sk_load_rgb_u16_be_avx+0x46>
- .byte 233,171,254,255,255 // jmpq 40fb <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 117,10 // jne 4a48 <_sk_load_rgb_u16_be_avx+0x19b>
+ .byte 233,176,254,255,255 // jmpq 48f3 <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 233,171,254,255,255 // jmpq 48f3 <_sk_load_rgb_u16_be_avx+0x46>
.byte 196,193,121,110,92,64,30 // vmovd 0x1e(%r8,%rax,2),%xmm3
.byte 196,65,97,196,92,64,34,2 // vpinsrw $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,20 // jb 4279 <_sk_load_rgb_u16_be_avx+0x1c4>
+ .byte 114,20 // jb 4a71 <_sk_load_rgb_u16_be_avx+0x1c4>
.byte 196,193,121,110,92,64,36 // vmovd 0x24(%r8,%rax,2),%xmm3
.byte 196,193,97,196,92,64,40,2 // vpinsrw $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3
- .byte 233,130,254,255,255 // jmpq 40fb <_sk_load_rgb_u16_be_avx+0x46>
- .byte 233,125,254,255,255 // jmpq 40fb <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 233,130,254,255,255 // jmpq 48f3 <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 233,125,254,255,255 // jmpq 48f3 <_sk_load_rgb_u16_be_avx+0x46>
HIDDEN _sk_store_u16_be_avx
.globl _sk_store_u16_be_avx
@@ -15305,7 +16745,7 @@ _sk_store_u16_be_avx:
.byte 196,65,17,98,200 // vpunpckldq %xmm8,%xmm13,%xmm9
.byte 196,65,17,106,192 // vpunpckhdq %xmm8,%xmm13,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,31 // jne 4380 <_sk_store_u16_be_avx+0x102>
+ .byte 117,31 // jne 4b78 <_sk_store_u16_be_avx+0x102>
.byte 196,1,120,17,28,72 // vmovups %xmm11,(%r8,%r9,2)
.byte 196,1,120,17,84,72,16 // vmovups %xmm10,0x10(%r8,%r9,2)
.byte 196,1,120,17,76,72,32 // vmovups %xmm9,0x20(%r8,%r9,2)
@@ -15314,22 +16754,22 @@ _sk_store_u16_be_avx:
.byte 255,224 // jmpq *%rax
.byte 196,1,121,214,28,72 // vmovq %xmm11,(%r8,%r9,2)
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,240 // je 437c <_sk_store_u16_be_avx+0xfe>
+ .byte 116,240 // je 4b74 <_sk_store_u16_be_avx+0xfe>
.byte 196,1,121,23,92,72,8 // vmovhpd %xmm11,0x8(%r8,%r9,2)
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,227 // jb 437c <_sk_store_u16_be_avx+0xfe>
+ .byte 114,227 // jb 4b74 <_sk_store_u16_be_avx+0xfe>
.byte 196,1,121,214,84,72,16 // vmovq %xmm10,0x10(%r8,%r9,2)
- .byte 116,218 // je 437c <_sk_store_u16_be_avx+0xfe>
+ .byte 116,218 // je 4b74 <_sk_store_u16_be_avx+0xfe>
.byte 196,1,121,23,84,72,24 // vmovhpd %xmm10,0x18(%r8,%r9,2)
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,205 // jb 437c <_sk_store_u16_be_avx+0xfe>
+ .byte 114,205 // jb 4b74 <_sk_store_u16_be_avx+0xfe>
.byte 196,1,121,214,76,72,32 // vmovq %xmm9,0x20(%r8,%r9,2)
- .byte 116,196 // je 437c <_sk_store_u16_be_avx+0xfe>
+ .byte 116,196 // je 4b74 <_sk_store_u16_be_avx+0xfe>
.byte 196,1,121,23,76,72,40 // vmovhpd %xmm9,0x28(%r8,%r9,2)
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,183 // jb 437c <_sk_store_u16_be_avx+0xfe>
+ .byte 114,183 // jb 4b74 <_sk_store_u16_be_avx+0xfe>
.byte 196,1,121,214,68,72,48 // vmovq %xmm8,0x30(%r8,%r9,2)
- .byte 235,174 // jmp 437c <_sk_store_u16_be_avx+0xfe>
+ .byte 235,174 // jmp 4b74 <_sk_store_u16_be_avx+0xfe>
HIDDEN _sk_load_f32_avx
.globl _sk_load_f32_avx
@@ -15337,10 +16777,10 @@ FUNCTION(_sk_load_f32_avx)
_sk_load_f32_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 119,110 // ja 4444 <_sk_load_f32_avx+0x76>
+ .byte 119,110 // ja 4c3c <_sk_load_f32_avx+0x76>
.byte 76,139,0 // mov (%rax),%r8
.byte 76,141,12,189,0,0,0,0 // lea 0x0(,%rdi,4),%r9
- .byte 76,141,21,132,0,0,0 // lea 0x84(%rip),%r10 # 446c <_sk_load_f32_avx+0x9e>
+ .byte 76,141,21,132,0,0,0 // lea 0x84(%rip),%r10 # 4c64 <_sk_load_f32_avx+0x9e>
.byte 73,99,4,138 // movslq (%r10,%rcx,4),%rax
.byte 76,1,208 // add %r10,%rax
.byte 255,224 // jmpq *%rax
@@ -15399,7 +16839,7 @@ _sk_store_f32_avx:
.byte 196,65,37,20,196 // vunpcklpd %ymm12,%ymm11,%ymm8
.byte 196,65,37,21,220 // vunpckhpd %ymm12,%ymm11,%ymm11
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,55 // jne 44f9 <_sk_store_f32_avx+0x6d>
+ .byte 117,55 // jne 4cf1 <_sk_store_f32_avx+0x6d>
.byte 196,67,45,24,225,1 // vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
.byte 196,67,61,24,235,1 // vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
.byte 196,67,45,6,201,49 // vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
@@ -15412,22 +16852,22 @@ _sk_store_f32_avx:
.byte 255,224 // jmpq *%rax
.byte 196,65,121,17,20,128 // vmovupd %xmm10,(%r8,%rax,4)
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,240 // je 44f5 <_sk_store_f32_avx+0x69>
+ .byte 116,240 // je 4ced <_sk_store_f32_avx+0x69>
.byte 196,65,121,17,76,128,16 // vmovupd %xmm9,0x10(%r8,%rax,4)
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,227 // jb 44f5 <_sk_store_f32_avx+0x69>
+ .byte 114,227 // jb 4ced <_sk_store_f32_avx+0x69>
.byte 196,65,121,17,68,128,32 // vmovupd %xmm8,0x20(%r8,%rax,4)
- .byte 116,218 // je 44f5 <_sk_store_f32_avx+0x69>
+ .byte 116,218 // je 4ced <_sk_store_f32_avx+0x69>
.byte 196,65,121,17,92,128,48 // vmovupd %xmm11,0x30(%r8,%rax,4)
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,205 // jb 44f5 <_sk_store_f32_avx+0x69>
+ .byte 114,205 // jb 4ced <_sk_store_f32_avx+0x69>
.byte 196,67,125,25,84,128,64,1 // vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4)
- .byte 116,195 // je 44f5 <_sk_store_f32_avx+0x69>
+ .byte 116,195 // je 4ced <_sk_store_f32_avx+0x69>
.byte 196,67,125,25,76,128,80,1 // vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4)
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,181 // jb 44f5 <_sk_store_f32_avx+0x69>
+ .byte 114,181 // jb 4ced <_sk_store_f32_avx+0x69>
.byte 196,67,125,25,68,128,96,1 // vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4)
- .byte 235,171 // jmp 44f5 <_sk_store_f32_avx+0x69>
+ .byte 235,171 // jmp 4ced <_sk_store_f32_avx+0x69>
HIDDEN _sk_clamp_x_avx
.globl _sk_clamp_x_avx
@@ -15755,7 +17195,7 @@ _sk_linear_gradient_avx:
.byte 196,226,125,24,88,28 // vbroadcastss 0x1c(%rax),%ymm3
.byte 76,139,0 // mov (%rax),%r8
.byte 77,133,192 // test %r8,%r8
- .byte 15,132,146,0,0,0 // je 4aad <_sk_linear_gradient_avx+0xb8>
+ .byte 15,132,146,0,0,0 // je 52a5 <_sk_linear_gradient_avx+0xb8>
.byte 72,139,64,8 // mov 0x8(%rax),%rax
.byte 72,131,192,32 // add $0x20,%rax
.byte 196,65,28,87,228 // vxorps %ymm12,%ymm12,%ymm12
@@ -15782,8 +17222,8 @@ _sk_linear_gradient_avx:
.byte 196,227,13,74,219,208 // vblendvps %ymm13,%ymm3,%ymm14,%ymm3
.byte 72,131,192,36 // add $0x24,%rax
.byte 73,255,200 // dec %r8
- .byte 117,140 // jne 4a37 <_sk_linear_gradient_avx+0x42>
- .byte 235,20 // jmp 4ac1 <_sk_linear_gradient_avx+0xcc>
+ .byte 117,140 // jne 522f <_sk_linear_gradient_avx+0x42>
+ .byte 235,20 // jmp 52b9 <_sk_linear_gradient_avx+0xcc>
.byte 196,65,36,87,219 // vxorps %ymm11,%ymm11,%ymm11
.byte 196,65,44,87,210 // vxorps %ymm10,%ymm10,%ymm10
.byte 196,65,52,87,201 // vxorps %ymm9,%ymm9,%ymm9
@@ -18479,6 +19919,400 @@ _sk_table_a_sse41:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
+HIDDEN _sk_parametric_r_sse41
+.globl _sk_parametric_r_sse41
+FUNCTION(_sk_parametric_r_sse41)
+_sk_parametric_r_sse41:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 243,68,15,16,64,16 // movss 0x10(%rax),%xmm8
+ .byte 69,15,198,192,0 // shufps $0x0,%xmm8,%xmm8
+ .byte 243,68,15,16,72,12 // movss 0xc(%rax),%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 68,15,89,200 // mulps %xmm0,%xmm9
+ .byte 243,68,15,16,80,4 // movss 0x4(%rax),%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 68,15,89,208 // mulps %xmm0,%xmm10
+ .byte 65,15,194,192,2 // cmpleps %xmm8,%xmm0
+ .byte 243,68,15,16,64,24 // movss 0x18(%rax),%xmm8
+ .byte 69,15,198,192,0 // shufps $0x0,%xmm8,%xmm8
+ .byte 69,15,88,200 // addps %xmm8,%xmm9
+ .byte 243,68,15,16,0 // movss (%rax),%xmm8
+ .byte 243,68,15,16,88,8 // movss 0x8(%rax),%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 69,15,88,211 // addps %xmm11,%xmm10
+ .byte 69,15,91,218 // cvtdq2ps %xmm10,%xmm11
+ .byte 185,0,0,0,52 // mov $0x34000000,%ecx
+ .byte 102,68,15,110,225 // movd %ecx,%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,89,227 // mulps %xmm11,%xmm12
+ .byte 185,0,0,254,66 // mov $0x42fe0000,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 69,15,92,227 // subps %xmm11,%xmm12
+ .byte 185,255,255,127,0 // mov $0x7fffff,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 102,69,15,112,219,0 // pshufd $0x0,%xmm11,%xmm11
+ .byte 102,69,15,219,218 // pand %xmm10,%xmm11
+ .byte 185,0,0,0,63 // mov $0x3f000000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 102,69,15,112,210,0 // pshufd $0x0,%xmm10,%xmm10
+ .byte 102,69,15,235,211 // por %xmm11,%xmm10
+ .byte 185,42,145,49,64 // mov $0x4031912a,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 69,15,88,220 // addps %xmm12,%xmm11
+ .byte 185,117,191,191,63 // mov $0x3fbfbf75,%ecx
+ .byte 102,68,15,110,225 // movd %ecx,%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,89,226 // mulps %xmm10,%xmm12
+ .byte 69,15,92,220 // subps %xmm12,%xmm11
+ .byte 185,163,233,220,63 // mov $0x3fdce9a3,%ecx
+ .byte 102,68,15,110,225 // movd %ecx,%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 185,249,68,180,62 // mov $0x3eb444f9,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,88,234 // addps %xmm10,%xmm13
+ .byte 69,15,94,229 // divps %xmm13,%xmm12
+ .byte 69,15,92,220 // subps %xmm12,%xmm11
+ .byte 69,15,198,192,0 // shufps $0x0,%xmm8,%xmm8
+ .byte 69,15,89,195 // mulps %xmm11,%xmm8
+ .byte 102,69,15,58,8,216,1 // roundps $0x1,%xmm8,%xmm11
+ .byte 185,0,0,0,75 // mov $0x4b000000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 185,81,140,242,66 // mov $0x42f28c51,%ecx
+ .byte 102,68,15,110,225 // movd %ecx,%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,88,224 // addps %xmm8,%xmm12
+ .byte 69,15,92,195 // subps %xmm11,%xmm8
+ .byte 185,141,188,190,63 // mov $0x3fbebc8d,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 69,15,89,216 // mulps %xmm8,%xmm11
+ .byte 69,15,92,227 // subps %xmm11,%xmm12
+ .byte 185,254,210,221,65 // mov $0x41ddd2fe,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 185,248,245,154,64 // mov $0x409af5f8,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,92,232 // subps %xmm8,%xmm13
+ .byte 69,15,94,221 // divps %xmm13,%xmm11
+ .byte 69,15,88,220 // addps %xmm12,%xmm11
+ .byte 69,15,89,218 // mulps %xmm10,%xmm11
+ .byte 102,69,15,91,211 // cvtps2dq %xmm11,%xmm10
+ .byte 243,68,15,16,64,20 // movss 0x14(%rax),%xmm8
+ .byte 69,15,198,192,0 // shufps $0x0,%xmm8,%xmm8
+ .byte 69,15,88,194 // addps %xmm10,%xmm8
+ .byte 102,69,15,56,20,193 // blendvps %xmm0,%xmm9,%xmm8
+ .byte 15,87,192 // xorps %xmm0,%xmm0
+ .byte 68,15,95,192 // maxps %xmm0,%xmm8
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 102,15,110,192 // movd %eax,%xmm0
+ .byte 15,198,192,0 // shufps $0x0,%xmm0,%xmm0
+ .byte 68,15,93,192 // minps %xmm0,%xmm8
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 65,15,40,192 // movaps %xmm8,%xmm0
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_parametric_g_sse41
+.globl _sk_parametric_g_sse41
+FUNCTION(_sk_parametric_g_sse41)
+_sk_parametric_g_sse41:
+ .byte 68,15,40,192 // movaps %xmm0,%xmm8
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 243,68,15,16,80,16 // movss 0x10(%rax),%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 243,68,15,16,72,12 // movss 0xc(%rax),%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 68,15,89,201 // mulps %xmm1,%xmm9
+ .byte 243,68,15,16,88,4 // movss 0x4(%rax),%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 68,15,89,217 // mulps %xmm1,%xmm11
+ .byte 15,40,193 // movaps %xmm1,%xmm0
+ .byte 65,15,194,194,2 // cmpleps %xmm10,%xmm0
+ .byte 243,15,16,72,24 // movss 0x18(%rax),%xmm1
+ .byte 15,198,201,0 // shufps $0x0,%xmm1,%xmm1
+ .byte 68,15,88,201 // addps %xmm1,%xmm9
+ .byte 243,68,15,16,32 // movss (%rax),%xmm12
+ .byte 243,15,16,72,8 // movss 0x8(%rax),%xmm1
+ .byte 15,198,201,0 // shufps $0x0,%xmm1,%xmm1
+ .byte 68,15,88,217 // addps %xmm1,%xmm11
+ .byte 69,15,91,211 // cvtdq2ps %xmm11,%xmm10
+ .byte 185,0,0,0,52 // mov $0x34000000,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,89,234 // mulps %xmm10,%xmm13
+ .byte 185,0,0,254,66 // mov $0x42fe0000,%ecx
+ .byte 102,15,110,201 // movd %ecx,%xmm1
+ .byte 15,198,201,0 // shufps $0x0,%xmm1,%xmm1
+ .byte 68,15,92,233 // subps %xmm1,%xmm13
+ .byte 185,255,255,127,0 // mov $0x7fffff,%ecx
+ .byte 102,15,110,201 // movd %ecx,%xmm1
+ .byte 102,68,15,112,209,0 // pshufd $0x0,%xmm1,%xmm10
+ .byte 102,69,15,219,211 // pand %xmm11,%xmm10
+ .byte 185,0,0,0,63 // mov $0x3f000000,%ecx
+ .byte 102,15,110,201 // movd %ecx,%xmm1
+ .byte 102,68,15,112,217,0 // pshufd $0x0,%xmm1,%xmm11
+ .byte 102,69,15,235,218 // por %xmm10,%xmm11
+ .byte 185,42,145,49,64 // mov $0x4031912a,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 69,15,88,213 // addps %xmm13,%xmm10
+ .byte 185,117,191,191,63 // mov $0x3fbfbf75,%ecx
+ .byte 102,15,110,201 // movd %ecx,%xmm1
+ .byte 15,198,201,0 // shufps $0x0,%xmm1,%xmm1
+ .byte 65,15,89,203 // mulps %xmm11,%xmm1
+ .byte 68,15,92,209 // subps %xmm1,%xmm10
+ .byte 185,163,233,220,63 // mov $0x3fdce9a3,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 185,249,68,180,62 // mov $0x3eb444f9,%ecx
+ .byte 102,15,110,201 // movd %ecx,%xmm1
+ .byte 15,198,201,0 // shufps $0x0,%xmm1,%xmm1
+ .byte 65,15,88,203 // addps %xmm11,%xmm1
+ .byte 68,15,94,233 // divps %xmm1,%xmm13
+ .byte 69,15,92,213 // subps %xmm13,%xmm10
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,89,226 // mulps %xmm10,%xmm12
+ .byte 102,69,15,58,8,220,1 // roundps $0x1,%xmm12,%xmm11
+ .byte 185,0,0,0,75 // mov $0x4b000000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 185,81,140,242,66 // mov $0x42f28c51,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,88,236 // addps %xmm12,%xmm13
+ .byte 69,15,92,227 // subps %xmm11,%xmm12
+ .byte 185,141,188,190,63 // mov $0x3fbebc8d,%ecx
+ .byte 102,15,110,201 // movd %ecx,%xmm1
+ .byte 15,198,201,0 // shufps $0x0,%xmm1,%xmm1
+ .byte 65,15,89,204 // mulps %xmm12,%xmm1
+ .byte 68,15,92,233 // subps %xmm1,%xmm13
+ .byte 185,254,210,221,65 // mov $0x41ddd2fe,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 185,248,245,154,64 // mov $0x409af5f8,%ecx
+ .byte 102,15,110,201 // movd %ecx,%xmm1
+ .byte 15,198,201,0 // shufps $0x0,%xmm1,%xmm1
+ .byte 65,15,92,204 // subps %xmm12,%xmm1
+ .byte 68,15,94,217 // divps %xmm1,%xmm11
+ .byte 69,15,88,221 // addps %xmm13,%xmm11
+ .byte 69,15,89,218 // mulps %xmm10,%xmm11
+ .byte 102,69,15,91,211 // cvtps2dq %xmm11,%xmm10
+ .byte 243,15,16,72,20 // movss 0x14(%rax),%xmm1
+ .byte 15,198,201,0 // shufps $0x0,%xmm1,%xmm1
+ .byte 65,15,88,202 // addps %xmm10,%xmm1
+ .byte 102,65,15,56,20,201 // blendvps %xmm0,%xmm9,%xmm1
+ .byte 15,87,192 // xorps %xmm0,%xmm0
+ .byte 15,95,200 // maxps %xmm0,%xmm1
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 102,15,110,192 // movd %eax,%xmm0
+ .byte 15,198,192,0 // shufps $0x0,%xmm0,%xmm0
+ .byte 15,93,200 // minps %xmm0,%xmm1
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 65,15,40,192 // movaps %xmm8,%xmm0
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_parametric_b_sse41
+.globl _sk_parametric_b_sse41
+FUNCTION(_sk_parametric_b_sse41)
+_sk_parametric_b_sse41:
+ .byte 68,15,40,192 // movaps %xmm0,%xmm8
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 243,68,15,16,80,16 // movss 0x10(%rax),%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 243,68,15,16,72,12 // movss 0xc(%rax),%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 68,15,89,202 // mulps %xmm2,%xmm9
+ .byte 243,68,15,16,88,4 // movss 0x4(%rax),%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 68,15,89,218 // mulps %xmm2,%xmm11
+ .byte 15,40,194 // movaps %xmm2,%xmm0
+ .byte 65,15,194,194,2 // cmpleps %xmm10,%xmm0
+ .byte 243,15,16,80,24 // movss 0x18(%rax),%xmm2
+ .byte 15,198,210,0 // shufps $0x0,%xmm2,%xmm2
+ .byte 68,15,88,202 // addps %xmm2,%xmm9
+ .byte 243,68,15,16,32 // movss (%rax),%xmm12
+ .byte 243,15,16,80,8 // movss 0x8(%rax),%xmm2
+ .byte 15,198,210,0 // shufps $0x0,%xmm2,%xmm2
+ .byte 68,15,88,218 // addps %xmm2,%xmm11
+ .byte 69,15,91,211 // cvtdq2ps %xmm11,%xmm10
+ .byte 185,0,0,0,52 // mov $0x34000000,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,89,234 // mulps %xmm10,%xmm13
+ .byte 185,0,0,254,66 // mov $0x42fe0000,%ecx
+ .byte 102,15,110,209 // movd %ecx,%xmm2
+ .byte 15,198,210,0 // shufps $0x0,%xmm2,%xmm2
+ .byte 68,15,92,234 // subps %xmm2,%xmm13
+ .byte 185,255,255,127,0 // mov $0x7fffff,%ecx
+ .byte 102,15,110,209 // movd %ecx,%xmm2
+ .byte 102,68,15,112,210,0 // pshufd $0x0,%xmm2,%xmm10
+ .byte 102,69,15,219,211 // pand %xmm11,%xmm10
+ .byte 185,0,0,0,63 // mov $0x3f000000,%ecx
+ .byte 102,15,110,209 // movd %ecx,%xmm2
+ .byte 102,68,15,112,218,0 // pshufd $0x0,%xmm2,%xmm11
+ .byte 102,69,15,235,218 // por %xmm10,%xmm11
+ .byte 185,42,145,49,64 // mov $0x4031912a,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 69,15,88,213 // addps %xmm13,%xmm10
+ .byte 185,117,191,191,63 // mov $0x3fbfbf75,%ecx
+ .byte 102,15,110,209 // movd %ecx,%xmm2
+ .byte 15,198,210,0 // shufps $0x0,%xmm2,%xmm2
+ .byte 65,15,89,211 // mulps %xmm11,%xmm2
+ .byte 68,15,92,210 // subps %xmm2,%xmm10
+ .byte 185,163,233,220,63 // mov $0x3fdce9a3,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 185,249,68,180,62 // mov $0x3eb444f9,%ecx
+ .byte 102,15,110,209 // movd %ecx,%xmm2
+ .byte 15,198,210,0 // shufps $0x0,%xmm2,%xmm2
+ .byte 65,15,88,211 // addps %xmm11,%xmm2
+ .byte 68,15,94,234 // divps %xmm2,%xmm13
+ .byte 69,15,92,213 // subps %xmm13,%xmm10
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,89,226 // mulps %xmm10,%xmm12
+ .byte 102,69,15,58,8,220,1 // roundps $0x1,%xmm12,%xmm11
+ .byte 185,0,0,0,75 // mov $0x4b000000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 185,81,140,242,66 // mov $0x42f28c51,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,88,236 // addps %xmm12,%xmm13
+ .byte 69,15,92,227 // subps %xmm11,%xmm12
+ .byte 185,141,188,190,63 // mov $0x3fbebc8d,%ecx
+ .byte 102,15,110,209 // movd %ecx,%xmm2
+ .byte 15,198,210,0 // shufps $0x0,%xmm2,%xmm2
+ .byte 65,15,89,212 // mulps %xmm12,%xmm2
+ .byte 68,15,92,234 // subps %xmm2,%xmm13
+ .byte 185,254,210,221,65 // mov $0x41ddd2fe,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 185,248,245,154,64 // mov $0x409af5f8,%ecx
+ .byte 102,15,110,209 // movd %ecx,%xmm2
+ .byte 15,198,210,0 // shufps $0x0,%xmm2,%xmm2
+ .byte 65,15,92,212 // subps %xmm12,%xmm2
+ .byte 68,15,94,218 // divps %xmm2,%xmm11
+ .byte 69,15,88,221 // addps %xmm13,%xmm11
+ .byte 69,15,89,218 // mulps %xmm10,%xmm11
+ .byte 102,69,15,91,211 // cvtps2dq %xmm11,%xmm10
+ .byte 243,15,16,80,20 // movss 0x14(%rax),%xmm2
+ .byte 15,198,210,0 // shufps $0x0,%xmm2,%xmm2
+ .byte 65,15,88,210 // addps %xmm10,%xmm2
+ .byte 102,65,15,56,20,209 // blendvps %xmm0,%xmm9,%xmm2
+ .byte 15,87,192 // xorps %xmm0,%xmm0
+ .byte 15,95,208 // maxps %xmm0,%xmm2
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 102,15,110,192 // movd %eax,%xmm0
+ .byte 15,198,192,0 // shufps $0x0,%xmm0,%xmm0
+ .byte 15,93,208 // minps %xmm0,%xmm2
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 65,15,40,192 // movaps %xmm8,%xmm0
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_parametric_a_sse41
+.globl _sk_parametric_a_sse41
+FUNCTION(_sk_parametric_a_sse41)
+_sk_parametric_a_sse41:
+ .byte 68,15,40,192 // movaps %xmm0,%xmm8
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 243,68,15,16,80,16 // movss 0x10(%rax),%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 243,68,15,16,72,12 // movss 0xc(%rax),%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 68,15,89,203 // mulps %xmm3,%xmm9
+ .byte 243,68,15,16,88,4 // movss 0x4(%rax),%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 68,15,89,219 // mulps %xmm3,%xmm11
+ .byte 15,40,195 // movaps %xmm3,%xmm0
+ .byte 65,15,194,194,2 // cmpleps %xmm10,%xmm0
+ .byte 243,15,16,88,24 // movss 0x18(%rax),%xmm3
+ .byte 15,198,219,0 // shufps $0x0,%xmm3,%xmm3
+ .byte 68,15,88,203 // addps %xmm3,%xmm9
+ .byte 243,68,15,16,32 // movss (%rax),%xmm12
+ .byte 243,15,16,88,8 // movss 0x8(%rax),%xmm3
+ .byte 15,198,219,0 // shufps $0x0,%xmm3,%xmm3
+ .byte 68,15,88,219 // addps %xmm3,%xmm11
+ .byte 69,15,91,211 // cvtdq2ps %xmm11,%xmm10
+ .byte 185,0,0,0,52 // mov $0x34000000,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,89,234 // mulps %xmm10,%xmm13
+ .byte 185,0,0,254,66 // mov $0x42fe0000,%ecx
+ .byte 102,15,110,217 // movd %ecx,%xmm3
+ .byte 15,198,219,0 // shufps $0x0,%xmm3,%xmm3
+ .byte 68,15,92,235 // subps %xmm3,%xmm13
+ .byte 185,255,255,127,0 // mov $0x7fffff,%ecx
+ .byte 102,15,110,217 // movd %ecx,%xmm3
+ .byte 102,68,15,112,211,0 // pshufd $0x0,%xmm3,%xmm10
+ .byte 102,69,15,219,211 // pand %xmm11,%xmm10
+ .byte 185,0,0,0,63 // mov $0x3f000000,%ecx
+ .byte 102,15,110,217 // movd %ecx,%xmm3
+ .byte 102,68,15,112,219,0 // pshufd $0x0,%xmm3,%xmm11
+ .byte 102,69,15,235,218 // por %xmm10,%xmm11
+ .byte 185,42,145,49,64 // mov $0x4031912a,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 69,15,88,213 // addps %xmm13,%xmm10
+ .byte 185,117,191,191,63 // mov $0x3fbfbf75,%ecx
+ .byte 102,15,110,217 // movd %ecx,%xmm3
+ .byte 15,198,219,0 // shufps $0x0,%xmm3,%xmm3
+ .byte 65,15,89,219 // mulps %xmm11,%xmm3
+ .byte 68,15,92,211 // subps %xmm3,%xmm10
+ .byte 185,163,233,220,63 // mov $0x3fdce9a3,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 185,249,68,180,62 // mov $0x3eb444f9,%ecx
+ .byte 102,15,110,217 // movd %ecx,%xmm3
+ .byte 15,198,219,0 // shufps $0x0,%xmm3,%xmm3
+ .byte 65,15,88,219 // addps %xmm11,%xmm3
+ .byte 68,15,94,235 // divps %xmm3,%xmm13
+ .byte 69,15,92,213 // subps %xmm13,%xmm10
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,89,226 // mulps %xmm10,%xmm12
+ .byte 102,69,15,58,8,220,1 // roundps $0x1,%xmm12,%xmm11
+ .byte 185,0,0,0,75 // mov $0x4b000000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 185,81,140,242,66 // mov $0x42f28c51,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,88,236 // addps %xmm12,%xmm13
+ .byte 69,15,92,227 // subps %xmm11,%xmm12
+ .byte 185,141,188,190,63 // mov $0x3fbebc8d,%ecx
+ .byte 102,15,110,217 // movd %ecx,%xmm3
+ .byte 15,198,219,0 // shufps $0x0,%xmm3,%xmm3
+ .byte 65,15,89,220 // mulps %xmm12,%xmm3
+ .byte 68,15,92,235 // subps %xmm3,%xmm13
+ .byte 185,254,210,221,65 // mov $0x41ddd2fe,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 185,248,245,154,64 // mov $0x409af5f8,%ecx
+ .byte 102,15,110,217 // movd %ecx,%xmm3
+ .byte 15,198,219,0 // shufps $0x0,%xmm3,%xmm3
+ .byte 65,15,92,220 // subps %xmm12,%xmm3
+ .byte 68,15,94,219 // divps %xmm3,%xmm11
+ .byte 69,15,88,221 // addps %xmm13,%xmm11
+ .byte 69,15,89,218 // mulps %xmm10,%xmm11
+ .byte 102,69,15,91,211 // cvtps2dq %xmm11,%xmm10
+ .byte 243,15,16,88,20 // movss 0x14(%rax),%xmm3
+ .byte 15,198,219,0 // shufps $0x0,%xmm3,%xmm3
+ .byte 65,15,88,218 // addps %xmm10,%xmm3
+ .byte 102,65,15,56,20,217 // blendvps %xmm0,%xmm9,%xmm3
+ .byte 15,87,192 // xorps %xmm0,%xmm0
+ .byte 15,95,216 // maxps %xmm0,%xmm3
+ .byte 184,0,0,128,63 // mov $0x3f800000,%eax
+ .byte 102,15,110,192 // movd %eax,%xmm0
+ .byte 15,198,192,0 // shufps $0x0,%xmm0,%xmm0
+ .byte 15,93,216 // minps %xmm0,%xmm3
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 65,15,40,192 // movaps %xmm8,%xmm0
+ .byte 255,224 // jmpq *%rax
+
HIDDEN _sk_load_a8_sse41
.globl _sk_load_a8_sse41
FUNCTION(_sk_load_a8_sse41)
@@ -18615,9 +20449,9 @@ _sk_gather_i8_sse41:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 73,137,192 // mov %rax,%r8
.byte 77,133,192 // test %r8,%r8
- .byte 116,5 // je 2054 <_sk_gather_i8_sse41+0xf>
+ .byte 116,5 // je 26ed <_sk_gather_i8_sse41+0xf>
.byte 76,137,192 // mov %r8,%rax
- .byte 235,2 // jmp 2056 <_sk_gather_i8_sse41+0x11>
+ .byte 235,2 // jmp 26ef <_sk_gather_i8_sse41+0x11>
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 243,15,91,201 // cvttps2dq %xmm1,%xmm1
@@ -19769,7 +21603,7 @@ _sk_linear_gradient_sse41:
.byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
.byte 72,139,8 // mov (%rax),%rcx
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,132,254,0,0,0 // je 327c <_sk_linear_gradient_sse41+0x138>
+ .byte 15,132,254,0,0,0 // je 3915 <_sk_linear_gradient_sse41+0x138>
.byte 15,41,100,36,168 // movaps %xmm4,-0x58(%rsp)
.byte 15,41,108,36,184 // movaps %xmm5,-0x48(%rsp)
.byte 15,41,116,36,200 // movaps %xmm6,-0x38(%rsp)
@@ -19819,12 +21653,12 @@ _sk_linear_gradient_sse41:
.byte 15,40,196 // movaps %xmm4,%xmm0
.byte 72,131,192,36 // add $0x24,%rax
.byte 72,255,201 // dec %rcx
- .byte 15,133,65,255,255,255 // jne 31a7 <_sk_linear_gradient_sse41+0x63>
+ .byte 15,133,65,255,255,255 // jne 3840 <_sk_linear_gradient_sse41+0x63>
.byte 15,40,124,36,216 // movaps -0x28(%rsp),%xmm7
.byte 15,40,116,36,200 // movaps -0x38(%rsp),%xmm6
.byte 15,40,108,36,184 // movaps -0x48(%rsp),%xmm5
.byte 15,40,100,36,168 // movaps -0x58(%rsp),%xmm4
- .byte 235,13 // jmp 3289 <_sk_linear_gradient_sse41+0x145>
+ .byte 235,13 // jmp 3922 <_sk_linear_gradient_sse41+0x145>
.byte 15,87,201 // xorps %xmm1,%xmm1
.byte 15,87,210 // xorps %xmm2,%xmm2
.byte 15,87,219 // xorps %xmm3,%xmm3
@@ -22628,6 +24462,422 @@ _sk_table_a_sse2:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
+HIDDEN _sk_parametric_r_sse2
+.globl _sk_parametric_r_sse2
+FUNCTION(_sk_parametric_r_sse2)
+_sk_parametric_r_sse2:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 243,68,15,16,72,16 // movss 0x10(%rax),%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 243,68,15,16,64,12 // movss 0xc(%rax),%xmm8
+ .byte 69,15,198,192,0 // shufps $0x0,%xmm8,%xmm8
+ .byte 68,15,89,192 // mulps %xmm0,%xmm8
+ .byte 243,68,15,16,80,4 // movss 0x4(%rax),%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 68,15,89,208 // mulps %xmm0,%xmm10
+ .byte 65,15,194,193,2 // cmpleps %xmm9,%xmm0
+ .byte 243,68,15,16,72,24 // movss 0x18(%rax),%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 69,15,88,193 // addps %xmm9,%xmm8
+ .byte 243,68,15,16,8 // movss (%rax),%xmm9
+ .byte 243,68,15,16,88,8 // movss 0x8(%rax),%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 69,15,88,211 // addps %xmm11,%xmm10
+ .byte 69,15,91,218 // cvtdq2ps %xmm10,%xmm11
+ .byte 185,0,0,0,52 // mov $0x34000000,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,89,235 // mulps %xmm11,%xmm13
+ .byte 185,0,0,254,66 // mov $0x42fe0000,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 69,15,92,235 // subps %xmm11,%xmm13
+ .byte 185,255,255,127,0 // mov $0x7fffff,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 102,69,15,112,227,0 // pshufd $0x0,%xmm11,%xmm12
+ .byte 102,69,15,219,226 // pand %xmm10,%xmm12
+ .byte 185,0,0,0,63 // mov $0x3f000000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 102,69,15,112,218,0 // pshufd $0x0,%xmm10,%xmm11
+ .byte 102,69,15,235,220 // por %xmm12,%xmm11
+ .byte 185,42,145,49,64 // mov $0x4031912a,%ecx
+ .byte 102,68,15,110,225 // movd %ecx,%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,88,229 // addps %xmm13,%xmm12
+ .byte 185,117,191,191,63 // mov $0x3fbfbf75,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 69,15,89,211 // mulps %xmm11,%xmm10
+ .byte 69,15,92,226 // subps %xmm10,%xmm12
+ .byte 185,163,233,220,63 // mov $0x3fdce9a3,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 185,249,68,180,62 // mov $0x3eb444f9,%ecx
+ .byte 102,68,15,110,241 // movd %ecx,%xmm14
+ .byte 185,0,0,128,63 // mov $0x3f800000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 185,81,140,242,66 // mov $0x42f28c51,%ecx
+ .byte 102,68,15,110,249 // movd %ecx,%xmm15
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,198,246,0 // shufps $0x0,%xmm14,%xmm14
+ .byte 69,15,88,243 // addps %xmm11,%xmm14
+ .byte 69,15,94,238 // divps %xmm14,%xmm13
+ .byte 69,15,92,229 // subps %xmm13,%xmm12
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 69,15,89,204 // mulps %xmm12,%xmm9
+ .byte 243,69,15,91,217 // cvttps2dq %xmm9,%xmm11
+ .byte 69,15,91,219 // cvtdq2ps %xmm11,%xmm11
+ .byte 69,15,40,225 // movaps %xmm9,%xmm12
+ .byte 69,15,198,255,0 // shufps $0x0,%xmm15,%xmm15
+ .byte 69,15,88,249 // addps %xmm9,%xmm15
+ .byte 69,15,40,233 // movaps %xmm9,%xmm13
+ .byte 69,15,194,235,1 // cmpltps %xmm11,%xmm13
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 69,15,84,234 // andps %xmm10,%xmm13
+ .byte 69,15,87,201 // xorps %xmm9,%xmm9
+ .byte 69,15,92,221 // subps %xmm13,%xmm11
+ .byte 69,15,92,227 // subps %xmm11,%xmm12
+ .byte 102,69,15,110,216 // movd %r8d,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 185,141,188,190,63 // mov $0x3fbebc8d,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,89,236 // mulps %xmm12,%xmm13
+ .byte 69,15,92,253 // subps %xmm13,%xmm15
+ .byte 185,254,210,221,65 // mov $0x41ddd2fe,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 185,248,245,154,64 // mov $0x409af5f8,%ecx
+ .byte 102,68,15,110,241 // movd %ecx,%xmm14
+ .byte 69,15,198,246,0 // shufps $0x0,%xmm14,%xmm14
+ .byte 69,15,92,244 // subps %xmm12,%xmm14
+ .byte 69,15,94,238 // divps %xmm14,%xmm13
+ .byte 69,15,88,239 // addps %xmm15,%xmm13
+ .byte 69,15,89,235 // mulps %xmm11,%xmm13
+ .byte 102,69,15,91,221 // cvtps2dq %xmm13,%xmm11
+ .byte 243,68,15,16,96,20 // movss 0x14(%rax),%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,88,227 // addps %xmm11,%xmm12
+ .byte 68,15,84,192 // andps %xmm0,%xmm8
+ .byte 65,15,85,196 // andnps %xmm12,%xmm0
+ .byte 65,15,86,192 // orps %xmm8,%xmm0
+ .byte 65,15,95,193 // maxps %xmm9,%xmm0
+ .byte 65,15,93,194 // minps %xmm10,%xmm0
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_parametric_g_sse2
+.globl _sk_parametric_g_sse2
+FUNCTION(_sk_parametric_g_sse2)
+_sk_parametric_g_sse2:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 243,68,15,16,72,16 // movss 0x10(%rax),%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 243,68,15,16,64,12 // movss 0xc(%rax),%xmm8
+ .byte 69,15,198,192,0 // shufps $0x0,%xmm8,%xmm8
+ .byte 68,15,89,193 // mulps %xmm1,%xmm8
+ .byte 243,68,15,16,80,4 // movss 0x4(%rax),%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 68,15,89,209 // mulps %xmm1,%xmm10
+ .byte 65,15,194,201,2 // cmpleps %xmm9,%xmm1
+ .byte 243,68,15,16,72,24 // movss 0x18(%rax),%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 69,15,88,193 // addps %xmm9,%xmm8
+ .byte 243,68,15,16,8 // movss (%rax),%xmm9
+ .byte 243,68,15,16,88,8 // movss 0x8(%rax),%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 69,15,88,211 // addps %xmm11,%xmm10
+ .byte 69,15,91,218 // cvtdq2ps %xmm10,%xmm11
+ .byte 185,0,0,0,52 // mov $0x34000000,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,89,235 // mulps %xmm11,%xmm13
+ .byte 185,0,0,254,66 // mov $0x42fe0000,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 69,15,92,235 // subps %xmm11,%xmm13
+ .byte 185,255,255,127,0 // mov $0x7fffff,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 102,69,15,112,227,0 // pshufd $0x0,%xmm11,%xmm12
+ .byte 102,69,15,219,226 // pand %xmm10,%xmm12
+ .byte 185,0,0,0,63 // mov $0x3f000000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 102,69,15,112,218,0 // pshufd $0x0,%xmm10,%xmm11
+ .byte 102,69,15,235,220 // por %xmm12,%xmm11
+ .byte 185,42,145,49,64 // mov $0x4031912a,%ecx
+ .byte 102,68,15,110,225 // movd %ecx,%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,88,229 // addps %xmm13,%xmm12
+ .byte 185,117,191,191,63 // mov $0x3fbfbf75,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 69,15,89,211 // mulps %xmm11,%xmm10
+ .byte 69,15,92,226 // subps %xmm10,%xmm12
+ .byte 185,163,233,220,63 // mov $0x3fdce9a3,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 185,249,68,180,62 // mov $0x3eb444f9,%ecx
+ .byte 102,68,15,110,241 // movd %ecx,%xmm14
+ .byte 185,0,0,128,63 // mov $0x3f800000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 185,81,140,242,66 // mov $0x42f28c51,%ecx
+ .byte 102,68,15,110,249 // movd %ecx,%xmm15
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,198,246,0 // shufps $0x0,%xmm14,%xmm14
+ .byte 69,15,88,243 // addps %xmm11,%xmm14
+ .byte 69,15,94,238 // divps %xmm14,%xmm13
+ .byte 69,15,92,229 // subps %xmm13,%xmm12
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 69,15,89,204 // mulps %xmm12,%xmm9
+ .byte 243,69,15,91,217 // cvttps2dq %xmm9,%xmm11
+ .byte 69,15,91,219 // cvtdq2ps %xmm11,%xmm11
+ .byte 69,15,40,225 // movaps %xmm9,%xmm12
+ .byte 69,15,198,255,0 // shufps $0x0,%xmm15,%xmm15
+ .byte 69,15,88,249 // addps %xmm9,%xmm15
+ .byte 69,15,40,233 // movaps %xmm9,%xmm13
+ .byte 69,15,194,235,1 // cmpltps %xmm11,%xmm13
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 69,15,84,234 // andps %xmm10,%xmm13
+ .byte 69,15,87,201 // xorps %xmm9,%xmm9
+ .byte 69,15,92,221 // subps %xmm13,%xmm11
+ .byte 69,15,92,227 // subps %xmm11,%xmm12
+ .byte 102,69,15,110,216 // movd %r8d,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 185,141,188,190,63 // mov $0x3fbebc8d,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,89,236 // mulps %xmm12,%xmm13
+ .byte 69,15,92,253 // subps %xmm13,%xmm15
+ .byte 185,254,210,221,65 // mov $0x41ddd2fe,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 185,248,245,154,64 // mov $0x409af5f8,%ecx
+ .byte 102,68,15,110,241 // movd %ecx,%xmm14
+ .byte 69,15,198,246,0 // shufps $0x0,%xmm14,%xmm14
+ .byte 69,15,92,244 // subps %xmm12,%xmm14
+ .byte 69,15,94,238 // divps %xmm14,%xmm13
+ .byte 69,15,88,239 // addps %xmm15,%xmm13
+ .byte 69,15,89,235 // mulps %xmm11,%xmm13
+ .byte 102,69,15,91,221 // cvtps2dq %xmm13,%xmm11
+ .byte 243,68,15,16,96,20 // movss 0x14(%rax),%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,88,227 // addps %xmm11,%xmm12
+ .byte 68,15,84,193 // andps %xmm1,%xmm8
+ .byte 65,15,85,204 // andnps %xmm12,%xmm1
+ .byte 65,15,86,200 // orps %xmm8,%xmm1
+ .byte 65,15,95,201 // maxps %xmm9,%xmm1
+ .byte 65,15,93,202 // minps %xmm10,%xmm1
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_parametric_b_sse2
+.globl _sk_parametric_b_sse2
+FUNCTION(_sk_parametric_b_sse2)
+_sk_parametric_b_sse2:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 243,68,15,16,72,16 // movss 0x10(%rax),%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 243,68,15,16,64,12 // movss 0xc(%rax),%xmm8
+ .byte 69,15,198,192,0 // shufps $0x0,%xmm8,%xmm8
+ .byte 68,15,89,194 // mulps %xmm2,%xmm8
+ .byte 243,68,15,16,80,4 // movss 0x4(%rax),%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 68,15,89,210 // mulps %xmm2,%xmm10
+ .byte 65,15,194,209,2 // cmpleps %xmm9,%xmm2
+ .byte 243,68,15,16,72,24 // movss 0x18(%rax),%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 69,15,88,193 // addps %xmm9,%xmm8
+ .byte 243,68,15,16,8 // movss (%rax),%xmm9
+ .byte 243,68,15,16,88,8 // movss 0x8(%rax),%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 69,15,88,211 // addps %xmm11,%xmm10
+ .byte 69,15,91,218 // cvtdq2ps %xmm10,%xmm11
+ .byte 185,0,0,0,52 // mov $0x34000000,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,89,235 // mulps %xmm11,%xmm13
+ .byte 185,0,0,254,66 // mov $0x42fe0000,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 69,15,92,235 // subps %xmm11,%xmm13
+ .byte 185,255,255,127,0 // mov $0x7fffff,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 102,69,15,112,227,0 // pshufd $0x0,%xmm11,%xmm12
+ .byte 102,69,15,219,226 // pand %xmm10,%xmm12
+ .byte 185,0,0,0,63 // mov $0x3f000000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 102,69,15,112,218,0 // pshufd $0x0,%xmm10,%xmm11
+ .byte 102,69,15,235,220 // por %xmm12,%xmm11
+ .byte 185,42,145,49,64 // mov $0x4031912a,%ecx
+ .byte 102,68,15,110,225 // movd %ecx,%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,88,229 // addps %xmm13,%xmm12
+ .byte 185,117,191,191,63 // mov $0x3fbfbf75,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 69,15,89,211 // mulps %xmm11,%xmm10
+ .byte 69,15,92,226 // subps %xmm10,%xmm12
+ .byte 185,163,233,220,63 // mov $0x3fdce9a3,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 185,249,68,180,62 // mov $0x3eb444f9,%ecx
+ .byte 102,68,15,110,241 // movd %ecx,%xmm14
+ .byte 185,0,0,128,63 // mov $0x3f800000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 185,81,140,242,66 // mov $0x42f28c51,%ecx
+ .byte 102,68,15,110,249 // movd %ecx,%xmm15
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,198,246,0 // shufps $0x0,%xmm14,%xmm14
+ .byte 69,15,88,243 // addps %xmm11,%xmm14
+ .byte 69,15,94,238 // divps %xmm14,%xmm13
+ .byte 69,15,92,229 // subps %xmm13,%xmm12
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 69,15,89,204 // mulps %xmm12,%xmm9
+ .byte 243,69,15,91,217 // cvttps2dq %xmm9,%xmm11
+ .byte 69,15,91,219 // cvtdq2ps %xmm11,%xmm11
+ .byte 69,15,40,225 // movaps %xmm9,%xmm12
+ .byte 69,15,198,255,0 // shufps $0x0,%xmm15,%xmm15
+ .byte 69,15,88,249 // addps %xmm9,%xmm15
+ .byte 69,15,40,233 // movaps %xmm9,%xmm13
+ .byte 69,15,194,235,1 // cmpltps %xmm11,%xmm13
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 69,15,84,234 // andps %xmm10,%xmm13
+ .byte 69,15,87,201 // xorps %xmm9,%xmm9
+ .byte 69,15,92,221 // subps %xmm13,%xmm11
+ .byte 69,15,92,227 // subps %xmm11,%xmm12
+ .byte 102,69,15,110,216 // movd %r8d,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 185,141,188,190,63 // mov $0x3fbebc8d,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,89,236 // mulps %xmm12,%xmm13
+ .byte 69,15,92,253 // subps %xmm13,%xmm15
+ .byte 185,254,210,221,65 // mov $0x41ddd2fe,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 185,248,245,154,64 // mov $0x409af5f8,%ecx
+ .byte 102,68,15,110,241 // movd %ecx,%xmm14
+ .byte 69,15,198,246,0 // shufps $0x0,%xmm14,%xmm14
+ .byte 69,15,92,244 // subps %xmm12,%xmm14
+ .byte 69,15,94,238 // divps %xmm14,%xmm13
+ .byte 69,15,88,239 // addps %xmm15,%xmm13
+ .byte 69,15,89,235 // mulps %xmm11,%xmm13
+ .byte 102,69,15,91,221 // cvtps2dq %xmm13,%xmm11
+ .byte 243,68,15,16,96,20 // movss 0x14(%rax),%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,88,227 // addps %xmm11,%xmm12
+ .byte 68,15,84,194 // andps %xmm2,%xmm8
+ .byte 65,15,85,212 // andnps %xmm12,%xmm2
+ .byte 65,15,86,208 // orps %xmm8,%xmm2
+ .byte 65,15,95,209 // maxps %xmm9,%xmm2
+ .byte 65,15,93,210 // minps %xmm10,%xmm2
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_parametric_a_sse2
+.globl _sk_parametric_a_sse2
+FUNCTION(_sk_parametric_a_sse2)
+_sk_parametric_a_sse2:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 243,68,15,16,72,16 // movss 0x10(%rax),%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 243,68,15,16,64,12 // movss 0xc(%rax),%xmm8
+ .byte 69,15,198,192,0 // shufps $0x0,%xmm8,%xmm8
+ .byte 68,15,89,195 // mulps %xmm3,%xmm8
+ .byte 243,68,15,16,80,4 // movss 0x4(%rax),%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 68,15,89,211 // mulps %xmm3,%xmm10
+ .byte 65,15,194,217,2 // cmpleps %xmm9,%xmm3
+ .byte 243,68,15,16,72,24 // movss 0x18(%rax),%xmm9
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 69,15,88,193 // addps %xmm9,%xmm8
+ .byte 243,68,15,16,8 // movss (%rax),%xmm9
+ .byte 243,68,15,16,88,8 // movss 0x8(%rax),%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 69,15,88,211 // addps %xmm11,%xmm10
+ .byte 69,15,91,218 // cvtdq2ps %xmm10,%xmm11
+ .byte 185,0,0,0,52 // mov $0x34000000,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,89,235 // mulps %xmm11,%xmm13
+ .byte 185,0,0,254,66 // mov $0x42fe0000,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 69,15,92,235 // subps %xmm11,%xmm13
+ .byte 185,255,255,127,0 // mov $0x7fffff,%ecx
+ .byte 102,68,15,110,217 // movd %ecx,%xmm11
+ .byte 102,69,15,112,227,0 // pshufd $0x0,%xmm11,%xmm12
+ .byte 102,69,15,219,226 // pand %xmm10,%xmm12
+ .byte 185,0,0,0,63 // mov $0x3f000000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 102,69,15,112,218,0 // pshufd $0x0,%xmm10,%xmm11
+ .byte 102,69,15,235,220 // por %xmm12,%xmm11
+ .byte 185,42,145,49,64 // mov $0x4031912a,%ecx
+ .byte 102,68,15,110,225 // movd %ecx,%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,88,229 // addps %xmm13,%xmm12
+ .byte 185,117,191,191,63 // mov $0x3fbfbf75,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 69,15,89,211 // mulps %xmm11,%xmm10
+ .byte 69,15,92,226 // subps %xmm10,%xmm12
+ .byte 185,163,233,220,63 // mov $0x3fdce9a3,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 185,249,68,180,62 // mov $0x3eb444f9,%ecx
+ .byte 102,68,15,110,241 // movd %ecx,%xmm14
+ .byte 185,0,0,128,63 // mov $0x3f800000,%ecx
+ .byte 102,68,15,110,209 // movd %ecx,%xmm10
+ .byte 65,184,0,0,0,75 // mov $0x4b000000,%r8d
+ .byte 185,81,140,242,66 // mov $0x42f28c51,%ecx
+ .byte 102,68,15,110,249 // movd %ecx,%xmm15
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,198,246,0 // shufps $0x0,%xmm14,%xmm14
+ .byte 69,15,88,243 // addps %xmm11,%xmm14
+ .byte 69,15,94,238 // divps %xmm14,%xmm13
+ .byte 69,15,92,229 // subps %xmm13,%xmm12
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 69,15,89,204 // mulps %xmm12,%xmm9
+ .byte 243,69,15,91,217 // cvttps2dq %xmm9,%xmm11
+ .byte 69,15,91,219 // cvtdq2ps %xmm11,%xmm11
+ .byte 69,15,40,225 // movaps %xmm9,%xmm12
+ .byte 69,15,198,255,0 // shufps $0x0,%xmm15,%xmm15
+ .byte 69,15,88,249 // addps %xmm9,%xmm15
+ .byte 69,15,40,233 // movaps %xmm9,%xmm13
+ .byte 69,15,194,235,1 // cmpltps %xmm11,%xmm13
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
+ .byte 69,15,84,234 // andps %xmm10,%xmm13
+ .byte 69,15,87,201 // xorps %xmm9,%xmm9
+ .byte 69,15,92,221 // subps %xmm13,%xmm11
+ .byte 69,15,92,227 // subps %xmm11,%xmm12
+ .byte 102,69,15,110,216 // movd %r8d,%xmm11
+ .byte 69,15,198,219,0 // shufps $0x0,%xmm11,%xmm11
+ .byte 185,141,188,190,63 // mov $0x3fbebc8d,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 69,15,89,236 // mulps %xmm12,%xmm13
+ .byte 69,15,92,253 // subps %xmm13,%xmm15
+ .byte 185,254,210,221,65 // mov $0x41ddd2fe,%ecx
+ .byte 102,68,15,110,233 // movd %ecx,%xmm13
+ .byte 69,15,198,237,0 // shufps $0x0,%xmm13,%xmm13
+ .byte 185,248,245,154,64 // mov $0x409af5f8,%ecx
+ .byte 102,68,15,110,241 // movd %ecx,%xmm14
+ .byte 69,15,198,246,0 // shufps $0x0,%xmm14,%xmm14
+ .byte 69,15,92,244 // subps %xmm12,%xmm14
+ .byte 69,15,94,238 // divps %xmm14,%xmm13
+ .byte 69,15,88,239 // addps %xmm15,%xmm13
+ .byte 69,15,89,235 // mulps %xmm11,%xmm13
+ .byte 102,69,15,91,221 // cvtps2dq %xmm13,%xmm11
+ .byte 243,68,15,16,96,20 // movss 0x14(%rax),%xmm12
+ .byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
+ .byte 69,15,88,227 // addps %xmm11,%xmm12
+ .byte 68,15,84,195 // andps %xmm3,%xmm8
+ .byte 65,15,85,220 // andnps %xmm12,%xmm3
+ .byte 65,15,86,216 // orps %xmm8,%xmm3
+ .byte 65,15,95,217 // maxps %xmm9,%xmm3
+ .byte 65,15,93,218 // minps %xmm10,%xmm3
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
HIDDEN _sk_load_a8_sse2
.globl _sk_load_a8_sse2
FUNCTION(_sk_load_a8_sse2)
@@ -22796,9 +25046,9 @@ _sk_gather_i8_sse2:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 73,137,192 // mov %rax,%r8
.byte 77,133,192 // test %r8,%r8
- .byte 116,5 // je 21e0 <_sk_gather_i8_sse2+0xf>
+ .byte 116,5 // je 2914 <_sk_gather_i8_sse2+0xf>
.byte 76,137,192 // mov %r8,%rax
- .byte 235,2 // jmp 21e2 <_sk_gather_i8_sse2+0x11>
+ .byte 235,2 // jmp 2916 <_sk_gather_i8_sse2+0x11>
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 243,15,91,201 // cvttps2dq %xmm1,%xmm1
@@ -24053,7 +26303,7 @@ _sk_linear_gradient_sse2:
.byte 69,15,198,228,0 // shufps $0x0,%xmm12,%xmm12
.byte 72,139,8 // mov (%rax),%rcx
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,132,15,1,0,0 // je 35ca <_sk_linear_gradient_sse2+0x149>
+ .byte 15,132,15,1,0,0 // je 3cfe <_sk_linear_gradient_sse2+0x149>
.byte 72,139,64,8 // mov 0x8(%rax),%rax
.byte 72,131,192,32 // add $0x20,%rax
.byte 69,15,87,192 // xorps %xmm8,%xmm8
@@ -24114,8 +26364,8 @@ _sk_linear_gradient_sse2:
.byte 69,15,86,231 // orps %xmm15,%xmm12
.byte 72,131,192,36 // add $0x24,%rax
.byte 72,255,201 // dec %rcx
- .byte 15,133,8,255,255,255 // jne 34d0 <_sk_linear_gradient_sse2+0x4f>
- .byte 235,13 // jmp 35d7 <_sk_linear_gradient_sse2+0x156>
+ .byte 15,133,8,255,255,255 // jne 3c04 <_sk_linear_gradient_sse2+0x4f>
+ .byte 235,13 // jmp 3d0b <_sk_linear_gradient_sse2+0x156>
.byte 15,87,201 // xorps %xmm1,%xmm1
.byte 15,87,210 // xorps %xmm2,%xmm2
.byte 15,87,219 // xorps %xmm3,%xmm3
diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S
index eb77b6d5a5..d8f82efef0 100644
--- a/src/jumper/SkJumper_generated_win.S
+++ b/src/jumper/SkJumper_generated_win.S
@@ -1357,7 +1357,7 @@ _sk_lerp_565_hsw LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 233,255,255,255,225 ; jmpq ffffffffe2001478 <_sk_callback_hsw+0xffffffffe1ffd85e>
+ DB 233,255,255,255,225 ; jmpq ffffffffe2001478 <_sk_callback_hsw+0xffffffffe1ffd22e>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
@@ -1934,6 +1934,334 @@ _sk_table_a_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
+PUBLIC _sk_parametric_r_hsw
+_sk_parametric_r_hsw LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8
+ DB 196,65,124,194,192,2 ; vcmpleps %ymm8,%ymm0,%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
+ DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10
+ DB 196,66,125,168,202 ; vfmadd213ps %ymm10,%ymm0,%ymm9
+ DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10
+ DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11
+ DB 196,66,125,168,211 ; vfmadd213ps %ymm11,%ymm0,%ymm10
+ DB 196,65,124,91,218 ; vcvtdq2ps %ymm10,%ymm11
+ DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,98,125,88,224 ; vpbroadcastd %xmm0,%ymm12
+ DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,98,125,88,232 ; vpbroadcastd %xmm0,%ymm13
+ DB 196,66,37,186,236 ; vfmsub231ps %ymm12,%ymm11,%ymm13
+ DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0
+ DB 196,65,125,219,210 ; vpand %ymm10,%ymm0,%ymm10
+ DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0
+ DB 197,45,235,208 ; vpor %ymm0,%ymm10,%ymm10
+ DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0
+ DB 197,20,88,216 ; vaddps %ymm0,%ymm13,%ymm11
+ DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,98,125,88,224 ; vpbroadcastd %xmm0,%ymm12
+ DB 196,66,45,172,227 ; vfnmadd213ps %ymm11,%ymm10,%ymm12
+ DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,98,125,88,216 ; vpbroadcastd %xmm0,%ymm11
+ DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0
+ DB 197,172,88,192 ; vaddps %ymm0,%ymm10,%ymm0
+ DB 197,164,94,192 ; vdivps %ymm0,%ymm11,%ymm0
+ DB 197,156,92,192 ; vsubps %ymm0,%ymm12,%ymm0
+ DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
+ DB 197,44,89,216 ; vmulps %ymm0,%ymm10,%ymm11
+ DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10
+ DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,98,125,88,224 ; vpbroadcastd %xmm0,%ymm12
+ DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0
+ DB 196,65,124,88,219 ; vaddps %ymm11,%ymm0,%ymm11
+ DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,98,125,88,232 ; vpbroadcastd %xmm0,%ymm13
+ DB 196,66,45,172,235 ; vfnmadd213ps %ymm11,%ymm10,%ymm13
+ DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,98,125,88,216 ; vpbroadcastd %xmm0,%ymm11
+ DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0
+ DB 196,193,124,92,194 ; vsubps %ymm10,%ymm0,%ymm0
+ DB 197,164,94,192 ; vdivps %ymm0,%ymm11,%ymm0
+ DB 197,148,88,192 ; vaddps %ymm0,%ymm13,%ymm0
+ DB 197,156,89,192 ; vmulps %ymm0,%ymm12,%ymm0
+ DB 197,253,91,192 ; vcvtps2dq %ymm0,%ymm0
+ DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
+ DB 196,193,124,88,194 ; vaddps %ymm10,%ymm0,%ymm0
+ DB 196,195,125,74,193,128 ; vblendvps %ymm8,%ymm9,%ymm0,%ymm0
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 196,65,124,95,192 ; vmaxps %ymm8,%ymm0,%ymm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 197,249,110,192 ; vmovd %eax,%xmm0
+ DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0
+ DB 197,188,93,192 ; vminps %ymm0,%ymm8,%ymm0
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_g_hsw
+_sk_parametric_g_hsw LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8
+ DB 196,65,116,194,192,2 ; vcmpleps %ymm8,%ymm1,%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
+ DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10
+ DB 196,66,117,168,202 ; vfmadd213ps %ymm10,%ymm1,%ymm9
+ DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10
+ DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11
+ DB 196,66,117,168,211 ; vfmadd213ps %ymm11,%ymm1,%ymm10
+ DB 196,65,124,91,218 ; vcvtdq2ps %ymm10,%ymm11
+ DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,98,125,88,225 ; vpbroadcastd %xmm1,%ymm12
+ DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,98,125,88,233 ; vpbroadcastd %xmm1,%ymm13
+ DB 196,66,37,186,236 ; vfmsub231ps %ymm12,%ymm11,%ymm13
+ DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1
+ DB 196,65,117,219,210 ; vpand %ymm10,%ymm1,%ymm10
+ DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1
+ DB 197,45,235,209 ; vpor %ymm1,%ymm10,%ymm10
+ DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1
+ DB 197,20,88,217 ; vaddps %ymm1,%ymm13,%ymm11
+ DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,98,125,88,225 ; vpbroadcastd %xmm1,%ymm12
+ DB 196,66,45,172,227 ; vfnmadd213ps %ymm11,%ymm10,%ymm12
+ DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,98,125,88,217 ; vpbroadcastd %xmm1,%ymm11
+ DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1
+ DB 197,172,88,201 ; vaddps %ymm1,%ymm10,%ymm1
+ DB 197,164,94,201 ; vdivps %ymm1,%ymm11,%ymm1
+ DB 197,156,92,201 ; vsubps %ymm1,%ymm12,%ymm1
+ DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
+ DB 197,44,89,217 ; vmulps %ymm1,%ymm10,%ymm11
+ DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10
+ DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,98,125,88,225 ; vpbroadcastd %xmm1,%ymm12
+ DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1
+ DB 196,65,116,88,219 ; vaddps %ymm11,%ymm1,%ymm11
+ DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,98,125,88,233 ; vpbroadcastd %xmm1,%ymm13
+ DB 196,66,45,172,235 ; vfnmadd213ps %ymm11,%ymm10,%ymm13
+ DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,98,125,88,217 ; vpbroadcastd %xmm1,%ymm11
+ DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1
+ DB 196,193,116,92,202 ; vsubps %ymm10,%ymm1,%ymm1
+ DB 197,164,94,201 ; vdivps %ymm1,%ymm11,%ymm1
+ DB 197,148,88,201 ; vaddps %ymm1,%ymm13,%ymm1
+ DB 197,156,89,201 ; vmulps %ymm1,%ymm12,%ymm1
+ DB 197,253,91,201 ; vcvtps2dq %ymm1,%ymm1
+ DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
+ DB 196,193,116,88,202 ; vaddps %ymm10,%ymm1,%ymm1
+ DB 196,195,117,74,201,128 ; vblendvps %ymm8,%ymm9,%ymm1,%ymm1
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 196,65,116,95,192 ; vmaxps %ymm8,%ymm1,%ymm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 197,249,110,200 ; vmovd %eax,%xmm1
+ DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1
+ DB 197,188,93,201 ; vminps %ymm1,%ymm8,%ymm1
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_b_hsw
+_sk_parametric_b_hsw LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8
+ DB 196,65,108,194,192,2 ; vcmpleps %ymm8,%ymm2,%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
+ DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10
+ DB 196,66,109,168,202 ; vfmadd213ps %ymm10,%ymm2,%ymm9
+ DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10
+ DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11
+ DB 196,66,109,168,211 ; vfmadd213ps %ymm11,%ymm2,%ymm10
+ DB 196,65,124,91,218 ; vcvtdq2ps %ymm10,%ymm11
+ DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,98,125,88,226 ; vpbroadcastd %xmm2,%ymm12
+ DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,98,125,88,234 ; vpbroadcastd %xmm2,%ymm13
+ DB 196,66,37,186,236 ; vfmsub231ps %ymm12,%ymm11,%ymm13
+ DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2
+ DB 196,65,109,219,210 ; vpand %ymm10,%ymm2,%ymm10
+ DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2
+ DB 197,45,235,210 ; vpor %ymm2,%ymm10,%ymm10
+ DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2
+ DB 197,20,88,218 ; vaddps %ymm2,%ymm13,%ymm11
+ DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,98,125,88,226 ; vpbroadcastd %xmm2,%ymm12
+ DB 196,66,45,172,227 ; vfnmadd213ps %ymm11,%ymm10,%ymm12
+ DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,98,125,88,218 ; vpbroadcastd %xmm2,%ymm11
+ DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2
+ DB 197,172,88,210 ; vaddps %ymm2,%ymm10,%ymm2
+ DB 197,164,94,210 ; vdivps %ymm2,%ymm11,%ymm2
+ DB 197,156,92,210 ; vsubps %ymm2,%ymm12,%ymm2
+ DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
+ DB 197,44,89,218 ; vmulps %ymm2,%ymm10,%ymm11
+ DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10
+ DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,98,125,88,226 ; vpbroadcastd %xmm2,%ymm12
+ DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2
+ DB 196,65,108,88,219 ; vaddps %ymm11,%ymm2,%ymm11
+ DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,98,125,88,234 ; vpbroadcastd %xmm2,%ymm13
+ DB 196,66,45,172,235 ; vfnmadd213ps %ymm11,%ymm10,%ymm13
+ DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,98,125,88,218 ; vpbroadcastd %xmm2,%ymm11
+ DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2
+ DB 196,193,108,92,210 ; vsubps %ymm10,%ymm2,%ymm2
+ DB 197,164,94,210 ; vdivps %ymm2,%ymm11,%ymm2
+ DB 197,148,88,210 ; vaddps %ymm2,%ymm13,%ymm2
+ DB 197,156,89,210 ; vmulps %ymm2,%ymm12,%ymm2
+ DB 197,253,91,210 ; vcvtps2dq %ymm2,%ymm2
+ DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
+ DB 196,193,108,88,210 ; vaddps %ymm10,%ymm2,%ymm2
+ DB 196,195,109,74,209,128 ; vblendvps %ymm8,%ymm9,%ymm2,%ymm2
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 196,65,108,95,192 ; vmaxps %ymm8,%ymm2,%ymm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 197,249,110,208 ; vmovd %eax,%xmm2
+ DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2
+ DB 197,188,93,210 ; vminps %ymm2,%ymm8,%ymm2
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_a_hsw
+_sk_parametric_a_hsw LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8
+ DB 196,65,100,194,192,2 ; vcmpleps %ymm8,%ymm3,%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
+ DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10
+ DB 196,66,101,168,202 ; vfmadd213ps %ymm10,%ymm3,%ymm9
+ DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10
+ DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11
+ DB 196,66,101,168,211 ; vfmadd213ps %ymm11,%ymm3,%ymm10
+ DB 196,65,124,91,218 ; vcvtdq2ps %ymm10,%ymm11
+ DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,98,125,88,227 ; vpbroadcastd %xmm3,%ymm12
+ DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,98,125,88,235 ; vpbroadcastd %xmm3,%ymm13
+ DB 196,66,37,186,236 ; vfmsub231ps %ymm12,%ymm11,%ymm13
+ DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3
+ DB 196,65,101,219,210 ; vpand %ymm10,%ymm3,%ymm10
+ DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3
+ DB 197,45,235,211 ; vpor %ymm3,%ymm10,%ymm10
+ DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3
+ DB 197,20,88,219 ; vaddps %ymm3,%ymm13,%ymm11
+ DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,98,125,88,227 ; vpbroadcastd %xmm3,%ymm12
+ DB 196,66,45,172,227 ; vfnmadd213ps %ymm11,%ymm10,%ymm12
+ DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,98,125,88,219 ; vpbroadcastd %xmm3,%ymm11
+ DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3
+ DB 197,172,88,219 ; vaddps %ymm3,%ymm10,%ymm3
+ DB 197,164,94,219 ; vdivps %ymm3,%ymm11,%ymm3
+ DB 197,156,92,219 ; vsubps %ymm3,%ymm12,%ymm3
+ DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
+ DB 197,44,89,219 ; vmulps %ymm3,%ymm10,%ymm11
+ DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10
+ DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,98,125,88,227 ; vpbroadcastd %xmm3,%ymm12
+ DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3
+ DB 196,65,100,88,219 ; vaddps %ymm11,%ymm3,%ymm11
+ DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,98,125,88,235 ; vpbroadcastd %xmm3,%ymm13
+ DB 196,66,45,172,235 ; vfnmadd213ps %ymm11,%ymm10,%ymm13
+ DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,98,125,88,219 ; vpbroadcastd %xmm3,%ymm11
+ DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3
+ DB 196,193,100,92,218 ; vsubps %ymm10,%ymm3,%ymm3
+ DB 197,164,94,219 ; vdivps %ymm3,%ymm11,%ymm3
+ DB 197,148,88,219 ; vaddps %ymm3,%ymm13,%ymm3
+ DB 197,156,89,219 ; vmulps %ymm3,%ymm12,%ymm3
+ DB 197,253,91,219 ; vcvtps2dq %ymm3,%ymm3
+ DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
+ DB 196,193,100,88,218 ; vaddps %ymm10,%ymm3,%ymm3
+ DB 196,195,101,74,217,128 ; vblendvps %ymm8,%ymm9,%ymm3,%ymm3
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 196,65,100,95,192 ; vmaxps %ymm8,%ymm3,%ymm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 197,249,110,216 ; vmovd %eax,%xmm3
+ DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3
+ DB 197,188,93,219 ; vminps %ymm3,%ymm8,%ymm3
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
PUBLIC _sk_load_a8_hsw
_sk_load_a8_hsw LABEL PROC
DB 73,137,200 ; mov %rcx,%r8
@@ -1941,7 +2269,7 @@ _sk_load_a8_hsw LABEL PROC
DB 72,139,0 ; mov (%rax),%rax
DB 72,1,248 ; add %rdi,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,50 ; jne 1e28 <_sk_load_a8_hsw+0x42>
+ DB 117,50 ; jne 2458 <_sk_load_a8_hsw+0x42>
DB 197,250,126,0 ; vmovq (%rax),%xmm0
DB 196,226,125,49,192 ; vpmovzxbd %xmm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
@@ -1964,9 +2292,9 @@ _sk_load_a8_hsw LABEL PROC
DB 77,9,217 ; or %r11,%r9
DB 72,131,193,8 ; add $0x8,%rcx
DB 73,255,202 ; dec %r10
- DB 117,234 ; jne 1e30 <_sk_load_a8_hsw+0x4a>
+ DB 117,234 ; jne 2460 <_sk_load_a8_hsw+0x4a>
DB 196,193,249,110,193 ; vmovq %r9,%xmm0
- DB 235,173 ; jmp 1dfa <_sk_load_a8_hsw+0x14>
+ DB 235,173 ; jmp 242a <_sk_load_a8_hsw+0x14>
PUBLIC _sk_gather_a8_hsw
_sk_gather_a8_hsw LABEL PROC
@@ -2037,7 +2365,7 @@ _sk_store_a8_hsw LABEL PROC
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 196,65,57,103,192 ; vpackuswb %xmm8,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 1f65 <_sk_store_a8_hsw+0x3b>
+ DB 117,10 ; jne 2595 <_sk_store_a8_hsw+0x3b>
DB 196,65,123,17,4,57 ; vmovsd %xmm8,(%r9,%rdi,1)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -2045,10 +2373,10 @@ _sk_store_a8_hsw LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 1f61 <_sk_store_a8_hsw+0x37>
+ DB 119,236 ; ja 2591 <_sk_store_a8_hsw+0x37>
DB 196,66,121,48,192 ; vpmovzxbw %xmm8,%xmm8
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,67,0,0,0 ; lea 0x43(%rip),%r8 # 1fc8 <_sk_store_a8_hsw+0x9e>
+ DB 76,141,5,67,0,0,0 ; lea 0x43(%rip),%r8 # 25f8 <_sk_store_a8_hsw+0x9e>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -2059,7 +2387,7 @@ _sk_store_a8_hsw LABEL PROC
DB 196,67,121,20,68,57,2,4 ; vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1)
DB 196,67,121,20,68,57,1,2 ; vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1)
DB 196,67,121,20,4,57,0 ; vpextrb $0x0,%xmm8,(%r9,%rdi,1)
- DB 235,154 ; jmp 1f61 <_sk_store_a8_hsw+0x37>
+ DB 235,154 ; jmp 2591 <_sk_store_a8_hsw+0x37>
DB 144 ; nop
DB 246,255 ; idiv %bh
DB 255 ; (bad)
@@ -2091,7 +2419,7 @@ _sk_load_g8_hsw LABEL PROC
DB 72,139,0 ; mov (%rax),%rax
DB 72,1,248 ; add %rdi,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,60 ; jne 2030 <_sk_load_g8_hsw+0x4c>
+ DB 117,60 ; jne 2660 <_sk_load_g8_hsw+0x4c>
DB 197,250,126,0 ; vmovq (%rax),%xmm0
DB 196,226,125,49,192 ; vpmovzxbd %xmm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
@@ -2116,9 +2444,9 @@ _sk_load_g8_hsw LABEL PROC
DB 77,9,217 ; or %r11,%r9
DB 72,131,193,8 ; add $0x8,%rcx
DB 73,255,202 ; dec %r10
- DB 117,234 ; jne 2038 <_sk_load_g8_hsw+0x54>
+ DB 117,234 ; jne 2668 <_sk_load_g8_hsw+0x54>
DB 196,193,249,110,193 ; vmovq %r9,%xmm0
- DB 235,163 ; jmp 1ff8 <_sk_load_g8_hsw+0x14>
+ DB 235,163 ; jmp 2628 <_sk_load_g8_hsw+0x14>
PUBLIC _sk_gather_g8_hsw
_sk_gather_g8_hsw LABEL PROC
@@ -2183,9 +2511,9 @@ _sk_gather_i8_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 73,137,192 ; mov %rax,%r8
DB 77,133,192 ; test %r8,%r8
- DB 116,5 ; je 214b <_sk_gather_i8_hsw+0xf>
+ DB 116,5 ; je 277b <_sk_gather_i8_hsw+0xf>
DB 76,137,192 ; mov %r8,%rax
- DB 235,2 ; jmp 214d <_sk_gather_i8_hsw+0x11>
+ DB 235,2 ; jmp 277d <_sk_gather_i8_hsw+0x11>
DB 72,173 ; lods %ds:(%rsi),%rax
DB 65,87 ; push %r15
DB 65,86 ; push %r14
@@ -2256,7 +2584,7 @@ _sk_load_565_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,149,0,0,0 ; jne 22ff <_sk_load_565_hsw+0xa3>
+ DB 15,133,149,0,0,0 ; jne 292f <_sk_load_565_hsw+0xa3>
DB 196,193,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm0
DB 196,226,125,51,208 ; vpmovzxwd %xmm0,%ymm2
DB 184,0,248,0,0 ; mov $0xf800,%eax
@@ -2296,9 +2624,9 @@ _sk_load_565_hsw LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,89,255,255,255 ; ja 2270 <_sk_load_565_hsw+0x14>
+ DB 15,135,89,255,255,255 ; ja 28a0 <_sk_load_565_hsw+0x14>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,74,0,0,0 ; lea 0x4a(%rip),%r9 # 236c <_sk_load_565_hsw+0x110>
+ DB 76,141,13,74,0,0,0 ; lea 0x4a(%rip),%r9 # 299c <_sk_load_565_hsw+0x110>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -2310,12 +2638,12 @@ _sk_load_565_hsw LABEL PROC
DB 196,193,121,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- DB 233,5,255,255,255 ; jmpq 2270 <_sk_load_565_hsw+0x14>
+ DB 233,5,255,255,255 ; jmpq 28a0 <_sk_load_565_hsw+0x14>
DB 144 ; nop
DB 243,255 ; repz (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 235,255 ; jmp 2371 <_sk_load_565_hsw+0x115>
+ DB 235,255 ; jmp 29a1 <_sk_load_565_hsw+0x115>
DB 255 ; (bad)
DB 255,227 ; jmpq *%rbx
DB 255 ; (bad)
@@ -2438,7 +2766,7 @@ _sk_store_565_hsw LABEL PROC
DB 196,67,125,57,193,1 ; vextracti128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 2537 <_sk_store_565_hsw+0x6c>
+ DB 117,10 ; jne 2b67 <_sk_store_565_hsw+0x6c>
DB 196,65,122,127,4,121 ; vmovdqu %xmm8,(%r9,%rdi,2)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -2446,9 +2774,9 @@ _sk_store_565_hsw LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 2533 <_sk_store_565_hsw+0x68>
+ DB 119,236 ; ja 2b63 <_sk_store_565_hsw+0x68>
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 2594 <_sk_store_565_hsw+0xc9>
+ DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 2bc4 <_sk_store_565_hsw+0xc9>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -2459,7 +2787,7 @@ _sk_store_565_hsw LABEL PROC
DB 196,67,121,21,68,121,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
DB 196,67,121,21,68,121,2,1 ; vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
DB 196,67,121,21,4,121,0 ; vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- DB 235,159 ; jmp 2533 <_sk_store_565_hsw+0x68>
+ DB 235,159 ; jmp 2b63 <_sk_store_565_hsw+0x68>
DB 247,255 ; idiv %edi
DB 255 ; (bad)
DB 255 ; (bad)
@@ -2488,7 +2816,7 @@ _sk_load_4444_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,179,0,0,0 ; jne 2671 <_sk_load_4444_hsw+0xc1>
+ DB 15,133,179,0,0,0 ; jne 2ca1 <_sk_load_4444_hsw+0xc1>
DB 196,193,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm0
DB 196,98,125,51,200 ; vpmovzxwd %xmm0,%ymm9
DB 184,0,240,0,0 ; mov $0xf000,%eax
@@ -2534,9 +2862,9 @@ _sk_load_4444_hsw LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,59,255,255,255 ; ja 25c4 <_sk_load_4444_hsw+0x14>
+ DB 15,135,59,255,255,255 ; ja 2bf4 <_sk_load_4444_hsw+0x14>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,76,0,0,0 ; lea 0x4c(%rip),%r9 # 26e0 <_sk_load_4444_hsw+0x130>
+ DB 76,141,13,76,0,0,0 ; lea 0x4c(%rip),%r9 # 2d10 <_sk_load_4444_hsw+0x130>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -2548,13 +2876,13 @@ _sk_load_4444_hsw LABEL PROC
DB 196,193,121,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- DB 233,231,254,255,255 ; jmpq 25c4 <_sk_load_4444_hsw+0x14>
+ DB 233,231,254,255,255 ; jmpq 2bf4 <_sk_load_4444_hsw+0x14>
DB 15,31,0 ; nopl (%rax)
DB 241 ; icebp
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 233,255,255,255,225 ; jmpq ffffffffe20026e8 <_sk_callback_hsw+0xffffffffe1ffeace>
+ DB 233,255,255,255,225 ; jmpq ffffffffe2002d18 <_sk_callback_hsw+0xffffffffe1ffeace>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
@@ -2682,7 +3010,7 @@ _sk_store_4444_hsw LABEL PROC
DB 196,67,125,57,193,1 ; vextracti128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 28cf <_sk_store_4444_hsw+0x72>
+ DB 117,10 ; jne 2eff <_sk_store_4444_hsw+0x72>
DB 196,65,122,127,4,121 ; vmovdqu %xmm8,(%r9,%rdi,2)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -2690,9 +3018,9 @@ _sk_store_4444_hsw LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 28cb <_sk_store_4444_hsw+0x6e>
+ DB 119,236 ; ja 2efb <_sk_store_4444_hsw+0x6e>
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 292c <_sk_store_4444_hsw+0xcf>
+ DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 2f5c <_sk_store_4444_hsw+0xcf>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -2703,7 +3031,7 @@ _sk_store_4444_hsw LABEL PROC
DB 196,67,121,21,68,121,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
DB 196,67,121,21,68,121,2,1 ; vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
DB 196,67,121,21,4,121,0 ; vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- DB 235,159 ; jmp 28cb <_sk_store_4444_hsw+0x6e>
+ DB 235,159 ; jmp 2efb <_sk_store_4444_hsw+0x6e>
DB 247,255 ; idiv %edi
DB 255 ; (bad)
DB 255 ; (bad)
@@ -2734,7 +3062,7 @@ _sk_load_8888_hsw LABEL PROC
DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9
DB 76,3,8 ; add (%rax),%r9
DB 77,133,192 ; test %r8,%r8
- DB 117,104 ; jne 29c5 <_sk_load_8888_hsw+0x7d>
+ DB 117,104 ; jne 2ff5 <_sk_load_8888_hsw+0x7d>
DB 196,193,126,111,25 ; vmovdqu (%r9),%ymm3
DB 184,255,0,0,0 ; mov $0xff,%eax
DB 197,249,110,192 ; vmovd %eax,%xmm0
@@ -2767,7 +3095,7 @@ _sk_load_8888_hsw LABEL PROC
DB 196,225,249,110,192 ; vmovq %rax,%xmm0
DB 196,226,125,33,192 ; vpmovsxbd %xmm0,%ymm0
DB 196,194,125,140,25 ; vpmaskmovd (%r9),%ymm0,%ymm3
- DB 233,116,255,255,255 ; jmpq 2962 <_sk_load_8888_hsw+0x1a>
+ DB 233,116,255,255,255 ; jmpq 2f92 <_sk_load_8888_hsw+0x1a>
PUBLIC _sk_gather_8888_hsw
_sk_gather_8888_hsw LABEL PROC
@@ -2827,7 +3155,7 @@ _sk_store_8888_hsw LABEL PROC
DB 196,65,45,235,192 ; vpor %ymm8,%ymm10,%ymm8
DB 196,65,53,235,192 ; vpor %ymm8,%ymm9,%ymm8
DB 77,133,192 ; test %r8,%r8
- DB 117,12 ; jne 2ae8 <_sk_store_8888_hsw+0x74>
+ DB 117,12 ; jne 3118 <_sk_store_8888_hsw+0x74>
DB 196,65,126,127,1 ; vmovdqu %ymm8,(%r9)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,137,193 ; mov %r8,%rcx
@@ -2840,14 +3168,14 @@ _sk_store_8888_hsw LABEL PROC
DB 196,97,249,110,200 ; vmovq %rax,%xmm9
DB 196,66,125,33,201 ; vpmovsxbd %xmm9,%ymm9
DB 196,66,53,142,1 ; vpmaskmovd %ymm8,%ymm9,(%r9)
- DB 235,211 ; jmp 2ae1 <_sk_store_8888_hsw+0x6d>
+ DB 235,211 ; jmp 3111 <_sk_store_8888_hsw+0x6d>
PUBLIC _sk_load_f16_hsw
_sk_load_f16_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 72,139,0 ; mov (%rax),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 117,97 ; jne 2b79 <_sk_load_f16_hsw+0x6b>
+ DB 117,97 ; jne 31a9 <_sk_load_f16_hsw+0x6b>
DB 197,121,16,4,248 ; vmovupd (%rax,%rdi,8),%xmm8
DB 197,249,16,84,248,16 ; vmovupd 0x10(%rax,%rdi,8),%xmm2
DB 197,249,16,92,248,32 ; vmovupd 0x20(%rax,%rdi,8),%xmm3
@@ -2873,29 +3201,29 @@ _sk_load_f16_hsw LABEL PROC
DB 197,123,16,4,248 ; vmovsd (%rax,%rdi,8),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,79 ; je 2bd8 <_sk_load_f16_hsw+0xca>
+ DB 116,79 ; je 3208 <_sk_load_f16_hsw+0xca>
DB 197,57,22,68,248,8 ; vmovhpd 0x8(%rax,%rdi,8),%xmm8,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,67 ; jb 2bd8 <_sk_load_f16_hsw+0xca>
+ DB 114,67 ; jb 3208 <_sk_load_f16_hsw+0xca>
DB 197,251,16,84,248,16 ; vmovsd 0x10(%rax,%rdi,8),%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 116,68 ; je 2be5 <_sk_load_f16_hsw+0xd7>
+ DB 116,68 ; je 3215 <_sk_load_f16_hsw+0xd7>
DB 197,233,22,84,248,24 ; vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,56 ; jb 2be5 <_sk_load_f16_hsw+0xd7>
+ DB 114,56 ; jb 3215 <_sk_load_f16_hsw+0xd7>
DB 197,251,16,92,248,32 ; vmovsd 0x20(%rax,%rdi,8),%xmm3
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 15,132,114,255,255,255 ; je 2b2f <_sk_load_f16_hsw+0x21>
+ DB 15,132,114,255,255,255 ; je 315f <_sk_load_f16_hsw+0x21>
DB 197,225,22,92,248,40 ; vmovhpd 0x28(%rax,%rdi,8),%xmm3,%xmm3
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 15,130,98,255,255,255 ; jb 2b2f <_sk_load_f16_hsw+0x21>
+ DB 15,130,98,255,255,255 ; jb 315f <_sk_load_f16_hsw+0x21>
DB 197,122,126,76,248,48 ; vmovq 0x30(%rax,%rdi,8),%xmm9
- DB 233,87,255,255,255 ; jmpq 2b2f <_sk_load_f16_hsw+0x21>
+ DB 233,87,255,255,255 ; jmpq 315f <_sk_load_f16_hsw+0x21>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,74,255,255,255 ; jmpq 2b2f <_sk_load_f16_hsw+0x21>
+ DB 233,74,255,255,255 ; jmpq 315f <_sk_load_f16_hsw+0x21>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
- DB 233,65,255,255,255 ; jmpq 2b2f <_sk_load_f16_hsw+0x21>
+ DB 233,65,255,255,255 ; jmpq 315f <_sk_load_f16_hsw+0x21>
PUBLIC _sk_gather_f16_hsw
_sk_gather_f16_hsw LABEL PROC
@@ -2949,7 +3277,7 @@ _sk_store_f16_hsw LABEL PROC
DB 196,65,57,98,205 ; vpunpckldq %xmm13,%xmm8,%xmm9
DB 196,65,57,106,197 ; vpunpckhdq %xmm13,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,27 ; jne 2cdd <_sk_store_f16_hsw+0x65>
+ DB 117,27 ; jne 330d <_sk_store_f16_hsw+0x65>
DB 197,120,17,28,248 ; vmovups %xmm11,(%rax,%rdi,8)
DB 197,120,17,84,248,16 ; vmovups %xmm10,0x10(%rax,%rdi,8)
DB 197,120,17,76,248,32 ; vmovups %xmm9,0x20(%rax,%rdi,8)
@@ -2958,22 +3286,22 @@ _sk_store_f16_hsw LABEL PROC
DB 255,224 ; jmpq *%rax
DB 197,121,214,28,248 ; vmovq %xmm11,(%rax,%rdi,8)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,241 ; je 2cd9 <_sk_store_f16_hsw+0x61>
+ DB 116,241 ; je 3309 <_sk_store_f16_hsw+0x61>
DB 197,121,23,92,248,8 ; vmovhpd %xmm11,0x8(%rax,%rdi,8)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,229 ; jb 2cd9 <_sk_store_f16_hsw+0x61>
+ DB 114,229 ; jb 3309 <_sk_store_f16_hsw+0x61>
DB 197,121,214,84,248,16 ; vmovq %xmm10,0x10(%rax,%rdi,8)
- DB 116,221 ; je 2cd9 <_sk_store_f16_hsw+0x61>
+ DB 116,221 ; je 3309 <_sk_store_f16_hsw+0x61>
DB 197,121,23,84,248,24 ; vmovhpd %xmm10,0x18(%rax,%rdi,8)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,209 ; jb 2cd9 <_sk_store_f16_hsw+0x61>
+ DB 114,209 ; jb 3309 <_sk_store_f16_hsw+0x61>
DB 197,121,214,76,248,32 ; vmovq %xmm9,0x20(%rax,%rdi,8)
- DB 116,201 ; je 2cd9 <_sk_store_f16_hsw+0x61>
+ DB 116,201 ; je 3309 <_sk_store_f16_hsw+0x61>
DB 197,121,23,76,248,40 ; vmovhpd %xmm9,0x28(%rax,%rdi,8)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,189 ; jb 2cd9 <_sk_store_f16_hsw+0x61>
+ DB 114,189 ; jb 3309 <_sk_store_f16_hsw+0x61>
DB 197,121,214,68,248,48 ; vmovq %xmm8,0x30(%rax,%rdi,8)
- DB 235,181 ; jmp 2cd9 <_sk_store_f16_hsw+0x61>
+ DB 235,181 ; jmp 3309 <_sk_store_f16_hsw+0x61>
PUBLIC _sk_load_u16_be_hsw
_sk_load_u16_be_hsw LABEL PROC
@@ -2981,7 +3309,7 @@ _sk_load_u16_be_hsw LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 72,141,4,189,0,0,0,0 ; lea 0x0(,%rdi,4),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,205,0,0,0 ; jne 2e07 <_sk_load_u16_be_hsw+0xe3>
+ DB 15,133,205,0,0,0 ; jne 3437 <_sk_load_u16_be_hsw+0xe3>
DB 196,65,121,16,4,64 ; vmovupd (%r8,%rax,2),%xmm8
DB 196,193,121,16,84,64,16 ; vmovupd 0x10(%r8,%rax,2),%xmm2
DB 196,193,121,16,92,64,32 ; vmovupd 0x20(%r8,%rax,2),%xmm3
@@ -3030,29 +3358,29 @@ _sk_load_u16_be_hsw LABEL PROC
DB 196,65,123,16,4,64 ; vmovsd (%r8,%rax,2),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,85 ; je 2e6d <_sk_load_u16_be_hsw+0x149>
+ DB 116,85 ; je 349d <_sk_load_u16_be_hsw+0x149>
DB 196,65,57,22,68,64,8 ; vmovhpd 0x8(%r8,%rax,2),%xmm8,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,72 ; jb 2e6d <_sk_load_u16_be_hsw+0x149>
+ DB 114,72 ; jb 349d <_sk_load_u16_be_hsw+0x149>
DB 196,193,123,16,84,64,16 ; vmovsd 0x10(%r8,%rax,2),%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 116,72 ; je 2e7a <_sk_load_u16_be_hsw+0x156>
+ DB 116,72 ; je 34aa <_sk_load_u16_be_hsw+0x156>
DB 196,193,105,22,84,64,24 ; vmovhpd 0x18(%r8,%rax,2),%xmm2,%xmm2
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,59 ; jb 2e7a <_sk_load_u16_be_hsw+0x156>
+ DB 114,59 ; jb 34aa <_sk_load_u16_be_hsw+0x156>
DB 196,193,123,16,92,64,32 ; vmovsd 0x20(%r8,%rax,2),%xmm3
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 15,132,5,255,255,255 ; je 2d55 <_sk_load_u16_be_hsw+0x31>
+ DB 15,132,5,255,255,255 ; je 3385 <_sk_load_u16_be_hsw+0x31>
DB 196,193,97,22,92,64,40 ; vmovhpd 0x28(%r8,%rax,2),%xmm3,%xmm3
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 15,130,244,254,255,255 ; jb 2d55 <_sk_load_u16_be_hsw+0x31>
+ DB 15,130,244,254,255,255 ; jb 3385 <_sk_load_u16_be_hsw+0x31>
DB 196,65,122,126,76,64,48 ; vmovq 0x30(%r8,%rax,2),%xmm9
- DB 233,232,254,255,255 ; jmpq 2d55 <_sk_load_u16_be_hsw+0x31>
+ DB 233,232,254,255,255 ; jmpq 3385 <_sk_load_u16_be_hsw+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,219,254,255,255 ; jmpq 2d55 <_sk_load_u16_be_hsw+0x31>
+ DB 233,219,254,255,255 ; jmpq 3385 <_sk_load_u16_be_hsw+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
- DB 233,210,254,255,255 ; jmpq 2d55 <_sk_load_u16_be_hsw+0x31>
+ DB 233,210,254,255,255 ; jmpq 3385 <_sk_load_u16_be_hsw+0x31>
PUBLIC _sk_load_rgb_u16_be_hsw
_sk_load_rgb_u16_be_hsw LABEL PROC
@@ -3060,7 +3388,7 @@ _sk_load_rgb_u16_be_hsw LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 72,141,4,127 ; lea (%rdi,%rdi,2),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,211,0,0,0 ; jne 2f68 <_sk_load_rgb_u16_be_hsw+0xe5>
+ DB 15,133,211,0,0,0 ; jne 3598 <_sk_load_rgb_u16_be_hsw+0xe5>
DB 196,193,122,111,4,64 ; vmovdqu (%r8,%rax,2),%xmm0
DB 196,193,122,111,84,64,12 ; vmovdqu 0xc(%r8,%rax,2),%xmm2
DB 196,193,122,111,76,64,24 ; vmovdqu 0x18(%r8,%rax,2),%xmm1
@@ -3110,36 +3438,36 @@ _sk_load_rgb_u16_be_hsw LABEL PROC
DB 196,193,121,110,4,64 ; vmovd (%r8,%rax,2),%xmm0
DB 196,193,121,196,68,64,4,2 ; vpinsrw $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 117,5 ; jne 2f81 <_sk_load_rgb_u16_be_hsw+0xfe>
- DB 233,72,255,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 117,5 ; jne 35b1 <_sk_load_rgb_u16_be_hsw+0xfe>
+ DB 233,72,255,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46>
DB 196,193,121,110,76,64,6 ; vmovd 0x6(%r8,%rax,2),%xmm1
DB 196,65,113,196,68,64,10,2 ; vpinsrw $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,26 ; jb 2fb0 <_sk_load_rgb_u16_be_hsw+0x12d>
+ DB 114,26 ; jb 35e0 <_sk_load_rgb_u16_be_hsw+0x12d>
DB 196,193,121,110,76,64,12 ; vmovd 0xc(%r8,%rax,2),%xmm1
DB 196,193,113,196,84,64,16,2 ; vpinsrw $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 117,10 ; jne 2fb5 <_sk_load_rgb_u16_be_hsw+0x132>
- DB 233,25,255,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46>
- DB 233,20,255,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 117,10 ; jne 35e5 <_sk_load_rgb_u16_be_hsw+0x132>
+ DB 233,25,255,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 233,20,255,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46>
DB 196,193,121,110,76,64,18 ; vmovd 0x12(%r8,%rax,2),%xmm1
DB 196,65,113,196,76,64,22,2 ; vpinsrw $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,26 ; jb 2fe4 <_sk_load_rgb_u16_be_hsw+0x161>
+ DB 114,26 ; jb 3614 <_sk_load_rgb_u16_be_hsw+0x161>
DB 196,193,121,110,76,64,24 ; vmovd 0x18(%r8,%rax,2),%xmm1
DB 196,193,113,196,76,64,28,2 ; vpinsrw $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 117,10 ; jne 2fe9 <_sk_load_rgb_u16_be_hsw+0x166>
- DB 233,229,254,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46>
- DB 233,224,254,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 117,10 ; jne 3619 <_sk_load_rgb_u16_be_hsw+0x166>
+ DB 233,229,254,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 233,224,254,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46>
DB 196,193,121,110,92,64,30 ; vmovd 0x1e(%r8,%rax,2),%xmm3
DB 196,65,97,196,92,64,34,2 ; vpinsrw $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,20 ; jb 3012 <_sk_load_rgb_u16_be_hsw+0x18f>
+ DB 114,20 ; jb 3642 <_sk_load_rgb_u16_be_hsw+0x18f>
DB 196,193,121,110,92,64,36 ; vmovd 0x24(%r8,%rax,2),%xmm3
DB 196,193,97,196,92,64,40,2 ; vpinsrw $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3
- DB 233,183,254,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46>
- DB 233,178,254,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 233,183,254,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 233,178,254,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46>
PUBLIC _sk_store_u16_be_hsw
_sk_store_u16_be_hsw LABEL PROC
@@ -3186,7 +3514,7 @@ _sk_store_u16_be_hsw LABEL PROC
DB 196,65,17,98,200 ; vpunpckldq %xmm8,%xmm13,%xmm9
DB 196,65,17,106,192 ; vpunpckhdq %xmm8,%xmm13,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,31 ; jne 3112 <_sk_store_u16_be_hsw+0xfb>
+ DB 117,31 ; jne 3742 <_sk_store_u16_be_hsw+0xfb>
DB 196,1,120,17,28,72 ; vmovups %xmm11,(%r8,%r9,2)
DB 196,1,120,17,84,72,16 ; vmovups %xmm10,0x10(%r8,%r9,2)
DB 196,1,120,17,76,72,32 ; vmovups %xmm9,0x20(%r8,%r9,2)
@@ -3195,31 +3523,31 @@ _sk_store_u16_be_hsw LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,1,121,214,28,72 ; vmovq %xmm11,(%r8,%r9,2)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,240 ; je 310e <_sk_store_u16_be_hsw+0xf7>
+ DB 116,240 ; je 373e <_sk_store_u16_be_hsw+0xf7>
DB 196,1,121,23,92,72,8 ; vmovhpd %xmm11,0x8(%r8,%r9,2)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,227 ; jb 310e <_sk_store_u16_be_hsw+0xf7>
+ DB 114,227 ; jb 373e <_sk_store_u16_be_hsw+0xf7>
DB 196,1,121,214,84,72,16 ; vmovq %xmm10,0x10(%r8,%r9,2)
- DB 116,218 ; je 310e <_sk_store_u16_be_hsw+0xf7>
+ DB 116,218 ; je 373e <_sk_store_u16_be_hsw+0xf7>
DB 196,1,121,23,84,72,24 ; vmovhpd %xmm10,0x18(%r8,%r9,2)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,205 ; jb 310e <_sk_store_u16_be_hsw+0xf7>
+ DB 114,205 ; jb 373e <_sk_store_u16_be_hsw+0xf7>
DB 196,1,121,214,76,72,32 ; vmovq %xmm9,0x20(%r8,%r9,2)
- DB 116,196 ; je 310e <_sk_store_u16_be_hsw+0xf7>
+ DB 116,196 ; je 373e <_sk_store_u16_be_hsw+0xf7>
DB 196,1,121,23,76,72,40 ; vmovhpd %xmm9,0x28(%r8,%r9,2)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,183 ; jb 310e <_sk_store_u16_be_hsw+0xf7>
+ DB 114,183 ; jb 373e <_sk_store_u16_be_hsw+0xf7>
DB 196,1,121,214,68,72,48 ; vmovq %xmm8,0x30(%r8,%r9,2)
- DB 235,174 ; jmp 310e <_sk_store_u16_be_hsw+0xf7>
+ DB 235,174 ; jmp 373e <_sk_store_u16_be_hsw+0xf7>
PUBLIC _sk_load_f32_hsw
_sk_load_f32_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 119,110 ; ja 31d6 <_sk_load_f32_hsw+0x76>
+ DB 119,110 ; ja 3806 <_sk_load_f32_hsw+0x76>
DB 76,139,0 ; mov (%rax),%r8
DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9
- DB 76,141,21,134,0,0,0 ; lea 0x86(%rip),%r10 # 3200 <_sk_load_f32_hsw+0xa0>
+ DB 76,141,21,134,0,0,0 ; lea 0x86(%rip),%r10 # 3830 <_sk_load_f32_hsw+0xa0>
DB 73,99,4,138 ; movslq (%r10,%rcx,4),%rax
DB 76,1,208 ; add %r10,%rax
DB 255,224 ; jmpq *%rax
@@ -3276,7 +3604,7 @@ _sk_store_f32_hsw LABEL PROC
DB 196,65,37,20,196 ; vunpcklpd %ymm12,%ymm11,%ymm8
DB 196,65,37,21,220 ; vunpckhpd %ymm12,%ymm11,%ymm11
DB 72,133,201 ; test %rcx,%rcx
- DB 117,55 ; jne 328d <_sk_store_f32_hsw+0x6d>
+ DB 117,55 ; jne 38bd <_sk_store_f32_hsw+0x6d>
DB 196,67,45,24,225,1 ; vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
DB 196,67,61,24,235,1 ; vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
DB 196,67,45,6,201,49 ; vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
@@ -3289,22 +3617,22 @@ _sk_store_f32_hsw LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,65,121,17,20,128 ; vmovupd %xmm10,(%r8,%rax,4)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,240 ; je 3289 <_sk_store_f32_hsw+0x69>
+ DB 116,240 ; je 38b9 <_sk_store_f32_hsw+0x69>
DB 196,65,121,17,76,128,16 ; vmovupd %xmm9,0x10(%r8,%rax,4)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,227 ; jb 3289 <_sk_store_f32_hsw+0x69>
+ DB 114,227 ; jb 38b9 <_sk_store_f32_hsw+0x69>
DB 196,65,121,17,68,128,32 ; vmovupd %xmm8,0x20(%r8,%rax,4)
- DB 116,218 ; je 3289 <_sk_store_f32_hsw+0x69>
+ DB 116,218 ; je 38b9 <_sk_store_f32_hsw+0x69>
DB 196,65,121,17,92,128,48 ; vmovupd %xmm11,0x30(%r8,%rax,4)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,205 ; jb 3289 <_sk_store_f32_hsw+0x69>
+ DB 114,205 ; jb 38b9 <_sk_store_f32_hsw+0x69>
DB 196,67,125,25,84,128,64,1 ; vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4)
- DB 116,195 ; je 3289 <_sk_store_f32_hsw+0x69>
+ DB 116,195 ; je 38b9 <_sk_store_f32_hsw+0x69>
DB 196,67,125,25,76,128,80,1 ; vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,181 ; jb 3289 <_sk_store_f32_hsw+0x69>
+ DB 114,181 ; jb 38b9 <_sk_store_f32_hsw+0x69>
DB 196,67,125,25,68,128,96,1 ; vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4)
- DB 235,171 ; jmp 3289 <_sk_store_f32_hsw+0x69>
+ DB 235,171 ; jmp 38b9 <_sk_store_f32_hsw+0x69>
PUBLIC _sk_clamp_x_hsw
_sk_clamp_x_hsw LABEL PROC
@@ -3545,7 +3873,7 @@ _sk_linear_gradient_hsw LABEL PROC
DB 196,98,125,24,72,28 ; vbroadcastss 0x1c(%rax),%ymm9
DB 76,139,0 ; mov (%rax),%r8
DB 77,133,192 ; test %r8,%r8
- DB 15,132,143,0,0,0 ; je 3719 <_sk_linear_gradient_hsw+0xb5>
+ DB 15,132,143,0,0,0 ; je 3d49 <_sk_linear_gradient_hsw+0xb5>
DB 72,139,64,8 ; mov 0x8(%rax),%rax
DB 72,131,192,32 ; add $0x20,%rax
DB 196,65,28,87,228 ; vxorps %ymm12,%ymm12,%ymm12
@@ -3572,8 +3900,8 @@ _sk_linear_gradient_hsw LABEL PROC
DB 196,67,13,74,201,208 ; vblendvps %ymm13,%ymm9,%ymm14,%ymm9
DB 72,131,192,36 ; add $0x24,%rax
DB 73,255,200 ; dec %r8
- DB 117,140 ; jne 36a3 <_sk_linear_gradient_hsw+0x3f>
- DB 235,17 ; jmp 372a <_sk_linear_gradient_hsw+0xc6>
+ DB 117,140 ; jne 3cd3 <_sk_linear_gradient_hsw+0x3f>
+ DB 235,17 ; jmp 3d5a <_sk_linear_gradient_hsw+0xc6>
DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1
DB 197,236,87,210 ; vxorps %ymm2,%ymm2,%ymm2
DB 197,228,87,219 ; vxorps %ymm3,%ymm3,%ymm3
@@ -6463,6 +6791,410 @@ _sk_table_a_avx LABEL PROC
DB 65,95 ; pop %r15
DB 255,224 ; jmpq *%rax
+PUBLIC _sk_parametric_r_avx
+_sk_parametric_r_avx LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8
+ DB 196,65,124,194,192,2 ; vcmpleps %ymm8,%ymm0,%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
+ DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10
+ DB 197,52,89,200 ; vmulps %ymm0,%ymm9,%ymm9
+ DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9
+ DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10
+ DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11
+ DB 197,172,89,192 ; vmulps %ymm0,%ymm10,%ymm0
+ DB 196,65,124,88,219 ; vaddps %ymm11,%ymm0,%ymm11
+ DB 196,65,124,91,211 ; vcvtdq2ps %ymm11,%ymm10
+ DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,44,89,208 ; vmulps %ymm0,%ymm10,%ymm10
+ DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,44,92,208 ; vsubps %ymm0,%ymm10,%ymm10
+ DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 197,249,112,192,0 ; vpshufd $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 196,65,124,84,219 ; vandps %ymm11,%ymm0,%ymm11
+ DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 197,249,112,192,0 ; vpshufd $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,36,86,216 ; vorps %ymm0,%ymm11,%ymm11
+ DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,44,88,208 ; vaddps %ymm0,%ymm10,%ymm10
+ DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,164,89,192 ; vmulps %ymm0,%ymm11,%ymm0
+ DB 197,44,92,208 ; vsubps %ymm0,%ymm10,%ymm10
+ DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,99,125,24,224,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm12
+ DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,164,88,192 ; vaddps %ymm0,%ymm11,%ymm0
+ DB 197,156,94,192 ; vdivps %ymm0,%ymm12,%ymm0
+ DB 197,172,92,192 ; vsubps %ymm0,%ymm10,%ymm0
+ DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
+ DB 197,44,89,216 ; vmulps %ymm0,%ymm10,%ymm11
+ DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10
+ DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,99,125,24,224,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm12
+ DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 196,65,124,88,219 ; vaddps %ymm11,%ymm0,%ymm11
+ DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 196,193,124,89,194 ; vmulps %ymm10,%ymm0,%ymm0
+ DB 197,36,92,216 ; vsubps %ymm0,%ymm11,%ymm11
+ DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,99,125,24,232,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm13
+ DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 196,193,124,92,194 ; vsubps %ymm10,%ymm0,%ymm0
+ DB 197,148,94,192 ; vdivps %ymm0,%ymm13,%ymm0
+ DB 197,164,88,192 ; vaddps %ymm0,%ymm11,%ymm0
+ DB 197,156,89,192 ; vmulps %ymm0,%ymm12,%ymm0
+ DB 197,253,91,192 ; vcvtps2dq %ymm0,%ymm0
+ DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
+ DB 196,193,124,88,194 ; vaddps %ymm10,%ymm0,%ymm0
+ DB 196,195,125,74,193,128 ; vblendvps %ymm8,%ymm9,%ymm0,%ymm0
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 196,65,124,95,192 ; vmaxps %ymm8,%ymm0,%ymm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 197,249,110,192 ; vmovd %eax,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,188,93,192 ; vminps %ymm0,%ymm8,%ymm0
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_g_avx
+_sk_parametric_g_avx LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8
+ DB 196,65,116,194,192,2 ; vcmpleps %ymm8,%ymm1,%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
+ DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10
+ DB 197,52,89,201 ; vmulps %ymm1,%ymm9,%ymm9
+ DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9
+ DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10
+ DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11
+ DB 197,172,89,201 ; vmulps %ymm1,%ymm10,%ymm1
+ DB 196,65,116,88,219 ; vaddps %ymm11,%ymm1,%ymm11
+ DB 196,65,124,91,211 ; vcvtdq2ps %ymm11,%ymm10
+ DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 197,44,89,209 ; vmulps %ymm1,%ymm10,%ymm10
+ DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 197,44,92,209 ; vsubps %ymm1,%ymm10,%ymm10
+ DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 197,249,112,201,0 ; vpshufd $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 196,65,116,84,219 ; vandps %ymm11,%ymm1,%ymm11
+ DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 197,249,112,201,0 ; vpshufd $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 197,36,86,217 ; vorps %ymm1,%ymm11,%ymm11
+ DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 197,44,88,209 ; vaddps %ymm1,%ymm10,%ymm10
+ DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 197,164,89,201 ; vmulps %ymm1,%ymm11,%ymm1
+ DB 197,44,92,209 ; vsubps %ymm1,%ymm10,%ymm10
+ DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,99,117,24,225,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm12
+ DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 197,164,88,201 ; vaddps %ymm1,%ymm11,%ymm1
+ DB 197,156,94,201 ; vdivps %ymm1,%ymm12,%ymm1
+ DB 197,172,92,201 ; vsubps %ymm1,%ymm10,%ymm1
+ DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
+ DB 197,44,89,217 ; vmulps %ymm1,%ymm10,%ymm11
+ DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10
+ DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,99,117,24,225,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm12
+ DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 196,65,116,88,219 ; vaddps %ymm11,%ymm1,%ymm11
+ DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 196,193,116,89,202 ; vmulps %ymm10,%ymm1,%ymm1
+ DB 197,36,92,217 ; vsubps %ymm1,%ymm11,%ymm11
+ DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,99,117,24,233,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm13
+ DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 196,193,116,92,202 ; vsubps %ymm10,%ymm1,%ymm1
+ DB 197,148,94,201 ; vdivps %ymm1,%ymm13,%ymm1
+ DB 197,164,88,201 ; vaddps %ymm1,%ymm11,%ymm1
+ DB 197,156,89,201 ; vmulps %ymm1,%ymm12,%ymm1
+ DB 197,253,91,201 ; vcvtps2dq %ymm1,%ymm1
+ DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
+ DB 196,193,116,88,202 ; vaddps %ymm10,%ymm1,%ymm1
+ DB 196,195,117,74,201,128 ; vblendvps %ymm8,%ymm9,%ymm1,%ymm1
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 196,65,116,95,192 ; vmaxps %ymm8,%ymm1,%ymm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 197,249,110,200 ; vmovd %eax,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 197,188,93,201 ; vminps %ymm1,%ymm8,%ymm1
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_b_avx
+_sk_parametric_b_avx LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8
+ DB 196,65,108,194,192,2 ; vcmpleps %ymm8,%ymm2,%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
+ DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10
+ DB 197,52,89,202 ; vmulps %ymm2,%ymm9,%ymm9
+ DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9
+ DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10
+ DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11
+ DB 197,172,89,210 ; vmulps %ymm2,%ymm10,%ymm2
+ DB 196,65,108,88,219 ; vaddps %ymm11,%ymm2,%ymm11
+ DB 196,65,124,91,211 ; vcvtdq2ps %ymm11,%ymm10
+ DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 197,44,89,210 ; vmulps %ymm2,%ymm10,%ymm10
+ DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 197,44,92,210 ; vsubps %ymm2,%ymm10,%ymm10
+ DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 197,249,112,210,0 ; vpshufd $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 196,65,108,84,219 ; vandps %ymm11,%ymm2,%ymm11
+ DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 197,249,112,210,0 ; vpshufd $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 197,36,86,218 ; vorps %ymm2,%ymm11,%ymm11
+ DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 197,44,88,210 ; vaddps %ymm2,%ymm10,%ymm10
+ DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 197,164,89,210 ; vmulps %ymm2,%ymm11,%ymm2
+ DB 197,44,92,210 ; vsubps %ymm2,%ymm10,%ymm10
+ DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,99,109,24,226,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm12
+ DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 197,164,88,210 ; vaddps %ymm2,%ymm11,%ymm2
+ DB 197,156,94,210 ; vdivps %ymm2,%ymm12,%ymm2
+ DB 197,172,92,210 ; vsubps %ymm2,%ymm10,%ymm2
+ DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
+ DB 197,44,89,218 ; vmulps %ymm2,%ymm10,%ymm11
+ DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10
+ DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,99,109,24,226,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm12
+ DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 196,65,108,88,219 ; vaddps %ymm11,%ymm2,%ymm11
+ DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 196,193,108,89,210 ; vmulps %ymm10,%ymm2,%ymm2
+ DB 197,36,92,218 ; vsubps %ymm2,%ymm11,%ymm11
+ DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,99,109,24,234,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm13
+ DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 196,193,108,92,210 ; vsubps %ymm10,%ymm2,%ymm2
+ DB 197,148,94,210 ; vdivps %ymm2,%ymm13,%ymm2
+ DB 197,164,88,210 ; vaddps %ymm2,%ymm11,%ymm2
+ DB 197,156,89,210 ; vmulps %ymm2,%ymm12,%ymm2
+ DB 197,253,91,210 ; vcvtps2dq %ymm2,%ymm2
+ DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
+ DB 196,193,108,88,210 ; vaddps %ymm10,%ymm2,%ymm2
+ DB 196,195,109,74,209,128 ; vblendvps %ymm8,%ymm9,%ymm2,%ymm2
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 196,65,108,95,192 ; vmaxps %ymm8,%ymm2,%ymm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 197,249,110,208 ; vmovd %eax,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 197,188,93,210 ; vminps %ymm2,%ymm8,%ymm2
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_a_avx
+_sk_parametric_a_avx LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8
+ DB 196,65,100,194,192,2 ; vcmpleps %ymm8,%ymm3,%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
+ DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10
+ DB 197,52,89,203 ; vmulps %ymm3,%ymm9,%ymm9
+ DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9
+ DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10
+ DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11
+ DB 197,172,89,219 ; vmulps %ymm3,%ymm10,%ymm3
+ DB 196,65,100,88,219 ; vaddps %ymm11,%ymm3,%ymm11
+ DB 196,65,124,91,211 ; vcvtdq2ps %ymm11,%ymm10
+ DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 197,44,89,211 ; vmulps %ymm3,%ymm10,%ymm10
+ DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 197,44,92,211 ; vsubps %ymm3,%ymm10,%ymm10
+ DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 197,249,112,219,0 ; vpshufd $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 196,65,100,84,219 ; vandps %ymm11,%ymm3,%ymm11
+ DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 197,249,112,219,0 ; vpshufd $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 197,36,86,219 ; vorps %ymm3,%ymm11,%ymm11
+ DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 197,44,88,211 ; vaddps %ymm3,%ymm10,%ymm10
+ DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 197,164,89,219 ; vmulps %ymm3,%ymm11,%ymm3
+ DB 197,44,92,211 ; vsubps %ymm3,%ymm10,%ymm10
+ DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,99,101,24,227,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm12
+ DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 197,164,88,219 ; vaddps %ymm3,%ymm11,%ymm3
+ DB 197,156,94,219 ; vdivps %ymm3,%ymm12,%ymm3
+ DB 197,172,92,219 ; vsubps %ymm3,%ymm10,%ymm3
+ DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
+ DB 197,44,89,219 ; vmulps %ymm3,%ymm10,%ymm11
+ DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10
+ DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,99,101,24,227,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm12
+ DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 196,65,100,88,219 ; vaddps %ymm11,%ymm3,%ymm11
+ DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 196,193,100,89,218 ; vmulps %ymm10,%ymm3,%ymm3
+ DB 197,36,92,219 ; vsubps %ymm3,%ymm11,%ymm11
+ DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,99,101,24,235,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm13
+ DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 196,193,100,92,218 ; vsubps %ymm10,%ymm3,%ymm3
+ DB 197,148,94,219 ; vdivps %ymm3,%ymm13,%ymm3
+ DB 197,164,88,219 ; vaddps %ymm3,%ymm11,%ymm3
+ DB 197,156,89,219 ; vmulps %ymm3,%ymm12,%ymm3
+ DB 197,253,91,219 ; vcvtps2dq %ymm3,%ymm3
+ DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
+ DB 196,193,100,88,218 ; vaddps %ymm10,%ymm3,%ymm3
+ DB 196,195,101,74,217,128 ; vblendvps %ymm8,%ymm9,%ymm3,%ymm3
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 196,65,100,95,192 ; vmaxps %ymm8,%ymm3,%ymm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 197,249,110,216 ; vmovd %eax,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 197,188,93,219 ; vminps %ymm3,%ymm8,%ymm3
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
PUBLIC _sk_load_a8_avx
_sk_load_a8_avx LABEL PROC
DB 73,137,200 ; mov %rcx,%r8
@@ -6470,7 +7202,7 @@ _sk_load_a8_avx LABEL PROC
DB 72,139,0 ; mov (%rax),%rax
DB 72,1,248 ; add %rdi,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,74 ; jne 29ba <_sk_load_a8_avx+0x5a>
+ DB 117,74 ; jne 31b2 <_sk_load_a8_avx+0x5a>
DB 197,250,126,0 ; vmovq (%rax),%xmm0
DB 196,226,121,49,200 ; vpmovzxbd %xmm0,%xmm1
DB 196,227,121,4,192,229 ; vpermilps $0xe5,%xmm0,%xmm0
@@ -6497,9 +7229,9 @@ _sk_load_a8_avx LABEL PROC
DB 77,9,217 ; or %r11,%r9
DB 72,131,193,8 ; add $0x8,%rcx
DB 73,255,202 ; dec %r10
- DB 117,234 ; jne 29c2 <_sk_load_a8_avx+0x62>
+ DB 117,234 ; jne 31ba <_sk_load_a8_avx+0x62>
DB 196,193,249,110,193 ; vmovq %r9,%xmm0
- DB 235,149 ; jmp 2974 <_sk_load_a8_avx+0x14>
+ DB 235,149 ; jmp 316c <_sk_load_a8_avx+0x14>
PUBLIC _sk_gather_a8_avx
_sk_gather_a8_avx LABEL PROC
@@ -6576,7 +7308,7 @@ _sk_store_a8_avx LABEL PROC
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 196,65,57,103,192 ; vpackuswb %xmm8,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 2b1b <_sk_store_a8_avx+0x42>
+ DB 117,10 ; jne 3313 <_sk_store_a8_avx+0x42>
DB 196,65,123,17,4,57 ; vmovsd %xmm8,(%r9,%rdi,1)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -6584,10 +7316,10 @@ _sk_store_a8_avx LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 2b17 <_sk_store_a8_avx+0x3e>
+ DB 119,236 ; ja 330f <_sk_store_a8_avx+0x3e>
DB 196,66,121,48,192 ; vpmovzxbw %xmm8,%xmm8
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,69,0,0,0 ; lea 0x45(%rip),%r8 # 2b80 <_sk_store_a8_avx+0xa7>
+ DB 76,141,5,69,0,0,0 ; lea 0x45(%rip),%r8 # 3378 <_sk_store_a8_avx+0xa7>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -6598,7 +7330,7 @@ _sk_store_a8_avx LABEL PROC
DB 196,67,121,20,68,57,2,4 ; vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1)
DB 196,67,121,20,68,57,1,2 ; vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1)
DB 196,67,121,20,4,57,0 ; vpextrb $0x0,%xmm8,(%r9,%rdi,1)
- DB 235,154 ; jmp 2b17 <_sk_store_a8_avx+0x3e>
+ DB 235,154 ; jmp 330f <_sk_store_a8_avx+0x3e>
DB 15,31,0 ; nopl (%rax)
DB 244 ; hlt
DB 255 ; (bad)
@@ -6631,7 +7363,7 @@ _sk_load_g8_avx LABEL PROC
DB 72,139,0 ; mov (%rax),%rax
DB 72,1,248 ; add %rdi,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,91 ; jne 2c07 <_sk_load_g8_avx+0x6b>
+ DB 117,91 ; jne 33ff <_sk_load_g8_avx+0x6b>
DB 197,250,126,0 ; vmovq (%rax),%xmm0
DB 196,226,121,49,200 ; vpmovzxbd %xmm0,%xmm1
DB 196,227,121,4,192,229 ; vpermilps $0xe5,%xmm0,%xmm0
@@ -6661,9 +7393,9 @@ _sk_load_g8_avx LABEL PROC
DB 77,9,217 ; or %r11,%r9
DB 72,131,193,8 ; add $0x8,%rcx
DB 73,255,202 ; dec %r10
- DB 117,234 ; jne 2c0f <_sk_load_g8_avx+0x73>
+ DB 117,234 ; jne 3407 <_sk_load_g8_avx+0x73>
DB 196,193,249,110,193 ; vmovq %r9,%xmm0
- DB 235,132 ; jmp 2bb0 <_sk_load_g8_avx+0x14>
+ DB 235,132 ; jmp 33a8 <_sk_load_g8_avx+0x14>
PUBLIC _sk_gather_g8_avx
_sk_gather_g8_avx LABEL PROC
@@ -6734,9 +7466,9 @@ _sk_gather_i8_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 73,137,192 ; mov %rax,%r8
DB 77,133,192 ; test %r8,%r8
- DB 116,5 ; je 2d46 <_sk_gather_i8_avx+0xf>
+ DB 116,5 ; je 353e <_sk_gather_i8_avx+0xf>
DB 76,137,192 ; mov %r8,%rax
- DB 235,2 ; jmp 2d48 <_sk_gather_i8_avx+0x11>
+ DB 235,2 ; jmp 3540 <_sk_gather_i8_avx+0x11>
DB 72,173 ; lods %ds:(%rsi),%rax
DB 65,87 ; push %r15
DB 65,86 ; push %r14
@@ -6839,7 +7571,7 @@ _sk_load_565_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,209,0,0,0 ; jne 2fe2 <_sk_load_565_avx+0xdf>
+ DB 15,133,209,0,0,0 ; jne 37da <_sk_load_565_avx+0xdf>
DB 196,193,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm0
DB 197,241,239,201 ; vpxor %xmm1,%xmm1,%xmm1
DB 197,249,105,201 ; vpunpckhwd %xmm1,%xmm0,%xmm1
@@ -6889,9 +7621,9 @@ _sk_load_565_avx LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,29,255,255,255 ; ja 2f17 <_sk_load_565_avx+0x14>
+ DB 15,135,29,255,255,255 ; ja 370f <_sk_load_565_avx+0x14>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,75,0,0,0 ; lea 0x4b(%rip),%r9 # 3050 <_sk_load_565_avx+0x14d>
+ DB 76,141,13,75,0,0,0 ; lea 0x4b(%rip),%r9 # 3848 <_sk_load_565_avx+0x14d>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -6903,7 +7635,7 @@ _sk_load_565_avx LABEL PROC
DB 196,193,121,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- DB 233,201,254,255,255 ; jmpq 2f17 <_sk_load_565_avx+0x14>
+ DB 233,201,254,255,255 ; jmpq 370f <_sk_load_565_avx+0x14>
DB 102,144 ; xchg %ax,%ax
DB 242,255 ; repnz (bad)
DB 255 ; (bad)
@@ -7056,7 +7788,7 @@ _sk_store_565_avx LABEL PROC
DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 329b <_sk_store_565_avx+0x9e>
+ DB 117,10 ; jne 3a93 <_sk_store_565_avx+0x9e>
DB 196,65,122,127,4,121 ; vmovdqu %xmm8,(%r9,%rdi,2)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -7064,9 +7796,9 @@ _sk_store_565_avx LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 3297 <_sk_store_565_avx+0x9a>
+ DB 119,236 ; ja 3a8f <_sk_store_565_avx+0x9a>
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 32f8 <_sk_store_565_avx+0xfb>
+ DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 3af0 <_sk_store_565_avx+0xfb>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -7077,7 +7809,7 @@ _sk_store_565_avx LABEL PROC
DB 196,67,121,21,68,121,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
DB 196,67,121,21,68,121,2,1 ; vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
DB 196,67,121,21,4,121,0 ; vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- DB 235,159 ; jmp 3297 <_sk_store_565_avx+0x9a>
+ DB 235,159 ; jmp 3a8f <_sk_store_565_avx+0x9a>
DB 247,255 ; idiv %edi
DB 255 ; (bad)
DB 255 ; (bad)
@@ -7106,7 +7838,7 @@ _sk_load_4444_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,245,0,0,0 ; jne 3417 <_sk_load_4444_avx+0x103>
+ DB 15,133,245,0,0,0 ; jne 3c0f <_sk_load_4444_avx+0x103>
DB 196,193,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm0
DB 197,241,239,201 ; vpxor %xmm1,%xmm1,%xmm1
DB 197,249,105,201 ; vpunpckhwd %xmm1,%xmm0,%xmm1
@@ -7163,9 +7895,9 @@ _sk_load_4444_avx LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,249,254,255,255 ; ja 3328 <_sk_load_4444_avx+0x14>
+ DB 15,135,249,254,255,255 ; ja 3b20 <_sk_load_4444_avx+0x14>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,74,0,0,0 ; lea 0x4a(%rip),%r9 # 3484 <_sk_load_4444_avx+0x170>
+ DB 76,141,13,74,0,0,0 ; lea 0x4a(%rip),%r9 # 3c7c <_sk_load_4444_avx+0x170>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -7177,12 +7909,12 @@ _sk_load_4444_avx LABEL PROC
DB 196,193,121,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- DB 233,165,254,255,255 ; jmpq 3328 <_sk_load_4444_avx+0x14>
+ DB 233,165,254,255,255 ; jmpq 3b20 <_sk_load_4444_avx+0x14>
DB 144 ; nop
DB 243,255 ; repz (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 235,255 ; jmp 3489 <_sk_load_4444_avx+0x175>
+ DB 235,255 ; jmp 3c81 <_sk_load_4444_avx+0x175>
DB 255 ; (bad)
DB 255,227 ; jmpq *%rbx
DB 255 ; (bad)
@@ -7339,7 +8071,7 @@ _sk_store_4444_avx LABEL PROC
DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 3704 <_sk_store_4444_avx+0xaf>
+ DB 117,10 ; jne 3efc <_sk_store_4444_avx+0xaf>
DB 196,65,122,127,4,121 ; vmovdqu %xmm8,(%r9,%rdi,2)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -7347,9 +8079,9 @@ _sk_store_4444_avx LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 3700 <_sk_store_4444_avx+0xab>
+ DB 119,236 ; ja 3ef8 <_sk_store_4444_avx+0xab>
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,69,0,0,0 ; lea 0x45(%rip),%r8 # 3764 <_sk_store_4444_avx+0x10f>
+ DB 76,141,5,69,0,0,0 ; lea 0x45(%rip),%r8 # 3f5c <_sk_store_4444_avx+0x10f>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -7360,7 +8092,7 @@ _sk_store_4444_avx LABEL PROC
DB 196,67,121,21,68,121,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
DB 196,67,121,21,68,121,2,1 ; vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
DB 196,67,121,21,4,121,0 ; vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- DB 235,159 ; jmp 3700 <_sk_store_4444_avx+0xab>
+ DB 235,159 ; jmp 3ef8 <_sk_store_4444_avx+0xab>
DB 15,31,0 ; nopl (%rax)
DB 244 ; hlt
DB 255 ; (bad)
@@ -7391,7 +8123,7 @@ _sk_load_8888_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,157,0,0,0 ; jne 382b <_sk_load_8888_avx+0xab>
+ DB 15,133,157,0,0,0 ; jne 4023 <_sk_load_8888_avx+0xab>
DB 196,65,124,16,12,186 ; vmovups (%r10,%rdi,4),%ymm9
DB 184,255,0,0,0 ; mov $0xff,%eax
DB 197,249,110,192 ; vmovd %eax,%xmm0
@@ -7429,9 +8161,9 @@ _sk_load_8888_avx LABEL PROC
DB 196,65,52,87,201 ; vxorps %ymm9,%ymm9,%ymm9
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,80,255,255,255 ; ja 3794 <_sk_load_8888_avx+0x14>
+ DB 15,135,80,255,255,255 ; ja 3f8c <_sk_load_8888_avx+0x14>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,137,0,0,0 ; lea 0x89(%rip),%r9 # 38d8 <_sk_load_8888_avx+0x158>
+ DB 76,141,13,137,0,0,0 ; lea 0x89(%rip),%r9 # 40d0 <_sk_load_8888_avx+0x158>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -7454,7 +8186,7 @@ _sk_load_8888_avx LABEL PROC
DB 196,99,53,12,200,15 ; vblendps $0xf,%ymm0,%ymm9,%ymm9
DB 196,195,49,34,4,186,0 ; vpinsrd $0x0,(%r10,%rdi,4),%xmm9,%xmm0
DB 196,99,53,12,200,15 ; vblendps $0xf,%ymm0,%ymm9,%ymm9
- DB 233,188,254,255,255 ; jmpq 3794 <_sk_load_8888_avx+0x14>
+ DB 233,188,254,255,255 ; jmpq 3f8c <_sk_load_8888_avx+0x14>
DB 238 ; out %al,(%dx)
DB 255 ; (bad)
DB 255 ; (bad)
@@ -7580,7 +8312,7 @@ _sk_store_8888_avx LABEL PROC
DB 196,65,45,86,192 ; vorpd %ymm8,%ymm10,%ymm8
DB 196,65,53,86,192 ; vorpd %ymm8,%ymm9,%ymm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 3ad9 <_sk_store_8888_avx+0xa4>
+ DB 117,10 ; jne 42d1 <_sk_store_8888_avx+0xa4>
DB 196,65,124,17,4,185 ; vmovups %ymm8,(%r9,%rdi,4)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -7588,9 +8320,9 @@ _sk_store_8888_avx LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 3ad5 <_sk_store_8888_avx+0xa0>
+ DB 119,236 ; ja 42cd <_sk_store_8888_avx+0xa0>
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,84,0,0,0 ; lea 0x54(%rip),%r8 # 3b48 <_sk_store_8888_avx+0x113>
+ DB 76,141,5,84,0,0,0 ; lea 0x54(%rip),%r8 # 4340 <_sk_store_8888_avx+0x113>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -7604,7 +8336,7 @@ _sk_store_8888_avx LABEL PROC
DB 196,67,121,22,68,185,8,2 ; vpextrd $0x2,%xmm8,0x8(%r9,%rdi,4)
DB 196,67,121,22,68,185,4,1 ; vpextrd $0x1,%xmm8,0x4(%r9,%rdi,4)
DB 196,65,121,126,4,185 ; vmovd %xmm8,(%r9,%rdi,4)
- DB 235,143 ; jmp 3ad5 <_sk_store_8888_avx+0xa0>
+ DB 235,143 ; jmp 42cd <_sk_store_8888_avx+0xa0>
DB 102,144 ; xchg %ax,%ax
DB 246,255 ; idiv %bh
DB 255 ; (bad)
@@ -7634,7 +8366,7 @@ _sk_load_f16_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 72,139,0 ; mov (%rax),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,17,1,0,0 ; jne 3c83 <_sk_load_f16_avx+0x11f>
+ DB 15,133,17,1,0,0 ; jne 447b <_sk_load_f16_avx+0x11f>
DB 197,121,16,4,248 ; vmovupd (%rax,%rdi,8),%xmm8
DB 197,249,16,84,248,16 ; vmovupd 0x10(%rax,%rdi,8),%xmm2
DB 197,249,16,92,248,32 ; vmovupd 0x20(%rax,%rdi,8),%xmm3
@@ -7696,29 +8428,29 @@ _sk_load_f16_avx LABEL PROC
DB 197,123,16,4,248 ; vmovsd (%rax,%rdi,8),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,79 ; je 3ce2 <_sk_load_f16_avx+0x17e>
+ DB 116,79 ; je 44da <_sk_load_f16_avx+0x17e>
DB 197,57,22,68,248,8 ; vmovhpd 0x8(%rax,%rdi,8),%xmm8,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,67 ; jb 3ce2 <_sk_load_f16_avx+0x17e>
+ DB 114,67 ; jb 44da <_sk_load_f16_avx+0x17e>
DB 197,251,16,84,248,16 ; vmovsd 0x10(%rax,%rdi,8),%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 116,68 ; je 3cef <_sk_load_f16_avx+0x18b>
+ DB 116,68 ; je 44e7 <_sk_load_f16_avx+0x18b>
DB 197,233,22,84,248,24 ; vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,56 ; jb 3cef <_sk_load_f16_avx+0x18b>
+ DB 114,56 ; jb 44e7 <_sk_load_f16_avx+0x18b>
DB 197,251,16,92,248,32 ; vmovsd 0x20(%rax,%rdi,8),%xmm3
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 15,132,194,254,255,255 ; je 3b89 <_sk_load_f16_avx+0x25>
+ DB 15,132,194,254,255,255 ; je 4381 <_sk_load_f16_avx+0x25>
DB 197,225,22,92,248,40 ; vmovhpd 0x28(%rax,%rdi,8),%xmm3,%xmm3
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 15,130,178,254,255,255 ; jb 3b89 <_sk_load_f16_avx+0x25>
+ DB 15,130,178,254,255,255 ; jb 4381 <_sk_load_f16_avx+0x25>
DB 197,122,126,76,248,48 ; vmovq 0x30(%rax,%rdi,8),%xmm9
- DB 233,167,254,255,255 ; jmpq 3b89 <_sk_load_f16_avx+0x25>
+ DB 233,167,254,255,255 ; jmpq 4381 <_sk_load_f16_avx+0x25>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,154,254,255,255 ; jmpq 3b89 <_sk_load_f16_avx+0x25>
+ DB 233,154,254,255,255 ; jmpq 4381 <_sk_load_f16_avx+0x25>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
- DB 233,145,254,255,255 ; jmpq 3b89 <_sk_load_f16_avx+0x25>
+ DB 233,145,254,255,255 ; jmpq 4381 <_sk_load_f16_avx+0x25>
PUBLIC _sk_gather_f16_avx
_sk_gather_f16_avx LABEL PROC
@@ -7858,7 +8590,7 @@ _sk_store_f16_avx LABEL PROC
DB 196,65,17,98,200 ; vpunpckldq %xmm8,%xmm13,%xmm9
DB 196,65,17,106,192 ; vpunpckhdq %xmm8,%xmm13,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,31 ; jne 3f78 <_sk_store_f16_avx+0xd2>
+ DB 117,31 ; jne 4770 <_sk_store_f16_avx+0xd2>
DB 196,65,120,17,28,248 ; vmovups %xmm11,(%r8,%rdi,8)
DB 196,65,120,17,84,248,16 ; vmovups %xmm10,0x10(%r8,%rdi,8)
DB 196,65,120,17,76,248,32 ; vmovups %xmm9,0x20(%r8,%rdi,8)
@@ -7867,22 +8599,22 @@ _sk_store_f16_avx LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,65,121,214,28,248 ; vmovq %xmm11,(%r8,%rdi,8)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,240 ; je 3f74 <_sk_store_f16_avx+0xce>
+ DB 116,240 ; je 476c <_sk_store_f16_avx+0xce>
DB 196,65,121,23,92,248,8 ; vmovhpd %xmm11,0x8(%r8,%rdi,8)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,227 ; jb 3f74 <_sk_store_f16_avx+0xce>
+ DB 114,227 ; jb 476c <_sk_store_f16_avx+0xce>
DB 196,65,121,214,84,248,16 ; vmovq %xmm10,0x10(%r8,%rdi,8)
- DB 116,218 ; je 3f74 <_sk_store_f16_avx+0xce>
+ DB 116,218 ; je 476c <_sk_store_f16_avx+0xce>
DB 196,65,121,23,84,248,24 ; vmovhpd %xmm10,0x18(%r8,%rdi,8)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,205 ; jb 3f74 <_sk_store_f16_avx+0xce>
+ DB 114,205 ; jb 476c <_sk_store_f16_avx+0xce>
DB 196,65,121,214,76,248,32 ; vmovq %xmm9,0x20(%r8,%rdi,8)
- DB 116,196 ; je 3f74 <_sk_store_f16_avx+0xce>
+ DB 116,196 ; je 476c <_sk_store_f16_avx+0xce>
DB 196,65,121,23,76,248,40 ; vmovhpd %xmm9,0x28(%r8,%rdi,8)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,183 ; jb 3f74 <_sk_store_f16_avx+0xce>
+ DB 114,183 ; jb 476c <_sk_store_f16_avx+0xce>
DB 196,65,121,214,68,248,48 ; vmovq %xmm8,0x30(%r8,%rdi,8)
- DB 235,174 ; jmp 3f74 <_sk_store_f16_avx+0xce>
+ DB 235,174 ; jmp 476c <_sk_store_f16_avx+0xce>
PUBLIC _sk_load_u16_be_avx
_sk_load_u16_be_avx LABEL PROC
@@ -7890,7 +8622,7 @@ _sk_load_u16_be_avx LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 72,141,4,189,0,0,0,0 ; lea 0x0(,%rdi,4),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,5,1,0,0 ; jne 40e1 <_sk_load_u16_be_avx+0x11b>
+ DB 15,133,5,1,0,0 ; jne 48d9 <_sk_load_u16_be_avx+0x11b>
DB 196,65,121,16,4,64 ; vmovupd (%r8,%rax,2),%xmm8
DB 196,193,121,16,84,64,16 ; vmovupd 0x10(%r8,%rax,2),%xmm2
DB 196,193,121,16,92,64,32 ; vmovupd 0x20(%r8,%rax,2),%xmm3
@@ -7949,29 +8681,29 @@ _sk_load_u16_be_avx LABEL PROC
DB 196,65,123,16,4,64 ; vmovsd (%r8,%rax,2),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,85 ; je 4147 <_sk_load_u16_be_avx+0x181>
+ DB 116,85 ; je 493f <_sk_load_u16_be_avx+0x181>
DB 196,65,57,22,68,64,8 ; vmovhpd 0x8(%r8,%rax,2),%xmm8,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,72 ; jb 4147 <_sk_load_u16_be_avx+0x181>
+ DB 114,72 ; jb 493f <_sk_load_u16_be_avx+0x181>
DB 196,193,123,16,84,64,16 ; vmovsd 0x10(%r8,%rax,2),%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 116,72 ; je 4154 <_sk_load_u16_be_avx+0x18e>
+ DB 116,72 ; je 494c <_sk_load_u16_be_avx+0x18e>
DB 196,193,105,22,84,64,24 ; vmovhpd 0x18(%r8,%rax,2),%xmm2,%xmm2
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,59 ; jb 4154 <_sk_load_u16_be_avx+0x18e>
+ DB 114,59 ; jb 494c <_sk_load_u16_be_avx+0x18e>
DB 196,193,123,16,92,64,32 ; vmovsd 0x20(%r8,%rax,2),%xmm3
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 15,132,205,254,255,255 ; je 3ff7 <_sk_load_u16_be_avx+0x31>
+ DB 15,132,205,254,255,255 ; je 47ef <_sk_load_u16_be_avx+0x31>
DB 196,193,97,22,92,64,40 ; vmovhpd 0x28(%r8,%rax,2),%xmm3,%xmm3
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 15,130,188,254,255,255 ; jb 3ff7 <_sk_load_u16_be_avx+0x31>
+ DB 15,130,188,254,255,255 ; jb 47ef <_sk_load_u16_be_avx+0x31>
DB 196,65,122,126,76,64,48 ; vmovq 0x30(%r8,%rax,2),%xmm9
- DB 233,176,254,255,255 ; jmpq 3ff7 <_sk_load_u16_be_avx+0x31>
+ DB 233,176,254,255,255 ; jmpq 47ef <_sk_load_u16_be_avx+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,163,254,255,255 ; jmpq 3ff7 <_sk_load_u16_be_avx+0x31>
+ DB 233,163,254,255,255 ; jmpq 47ef <_sk_load_u16_be_avx+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
- DB 233,154,254,255,255 ; jmpq 3ff7 <_sk_load_u16_be_avx+0x31>
+ DB 233,154,254,255,255 ; jmpq 47ef <_sk_load_u16_be_avx+0x31>
PUBLIC _sk_load_rgb_u16_be_avx
_sk_load_rgb_u16_be_avx LABEL PROC
@@ -7979,7 +8711,7 @@ _sk_load_rgb_u16_be_avx LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 72,141,4,127 ; lea (%rdi,%rdi,2),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,8,1,0,0 ; jne 4277 <_sk_load_rgb_u16_be_avx+0x11a>
+ DB 15,133,8,1,0,0 ; jne 4a6f <_sk_load_rgb_u16_be_avx+0x11a>
DB 196,193,122,111,4,64 ; vmovdqu (%r8,%rax,2),%xmm0
DB 196,193,122,111,84,64,12 ; vmovdqu 0xc(%r8,%rax,2),%xmm2
DB 196,193,122,111,76,64,24 ; vmovdqu 0x18(%r8,%rax,2),%xmm1
@@ -8038,36 +8770,36 @@ _sk_load_rgb_u16_be_avx LABEL PROC
DB 196,193,121,110,4,64 ; vmovd (%r8,%rax,2),%xmm0
DB 196,193,121,196,68,64,4,2 ; vpinsrw $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 117,5 ; jne 4290 <_sk_load_rgb_u16_be_avx+0x133>
- DB 233,19,255,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46>
+ DB 117,5 ; jne 4a88 <_sk_load_rgb_u16_be_avx+0x133>
+ DB 233,19,255,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46>
DB 196,193,121,110,76,64,6 ; vmovd 0x6(%r8,%rax,2),%xmm1
DB 196,65,113,196,68,64,10,2 ; vpinsrw $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,26 ; jb 42bf <_sk_load_rgb_u16_be_avx+0x162>
+ DB 114,26 ; jb 4ab7 <_sk_load_rgb_u16_be_avx+0x162>
DB 196,193,121,110,76,64,12 ; vmovd 0xc(%r8,%rax,2),%xmm1
DB 196,193,113,196,84,64,16,2 ; vpinsrw $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 117,10 ; jne 42c4 <_sk_load_rgb_u16_be_avx+0x167>
- DB 233,228,254,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46>
- DB 233,223,254,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46>
+ DB 117,10 ; jne 4abc <_sk_load_rgb_u16_be_avx+0x167>
+ DB 233,228,254,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46>
+ DB 233,223,254,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46>
DB 196,193,121,110,76,64,18 ; vmovd 0x12(%r8,%rax,2),%xmm1
DB 196,65,113,196,76,64,22,2 ; vpinsrw $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,26 ; jb 42f3 <_sk_load_rgb_u16_be_avx+0x196>
+ DB 114,26 ; jb 4aeb <_sk_load_rgb_u16_be_avx+0x196>
DB 196,193,121,110,76,64,24 ; vmovd 0x18(%r8,%rax,2),%xmm1
DB 196,193,113,196,76,64,28,2 ; vpinsrw $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 117,10 ; jne 42f8 <_sk_load_rgb_u16_be_avx+0x19b>
- DB 233,176,254,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46>
- DB 233,171,254,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46>
+ DB 117,10 ; jne 4af0 <_sk_load_rgb_u16_be_avx+0x19b>
+ DB 233,176,254,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46>
+ DB 233,171,254,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46>
DB 196,193,121,110,92,64,30 ; vmovd 0x1e(%r8,%rax,2),%xmm3
DB 196,65,97,196,92,64,34,2 ; vpinsrw $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,20 ; jb 4321 <_sk_load_rgb_u16_be_avx+0x1c4>
+ DB 114,20 ; jb 4b19 <_sk_load_rgb_u16_be_avx+0x1c4>
DB 196,193,121,110,92,64,36 ; vmovd 0x24(%r8,%rax,2),%xmm3
DB 196,193,97,196,92,64,40,2 ; vpinsrw $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3
- DB 233,130,254,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46>
- DB 233,125,254,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46>
+ DB 233,130,254,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46>
+ DB 233,125,254,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46>
PUBLIC _sk_store_u16_be_avx
_sk_store_u16_be_avx LABEL PROC
@@ -8115,7 +8847,7 @@ _sk_store_u16_be_avx LABEL PROC
DB 196,65,17,98,200 ; vpunpckldq %xmm8,%xmm13,%xmm9
DB 196,65,17,106,192 ; vpunpckhdq %xmm8,%xmm13,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,31 ; jne 4428 <_sk_store_u16_be_avx+0x102>
+ DB 117,31 ; jne 4c20 <_sk_store_u16_be_avx+0x102>
DB 196,1,120,17,28,72 ; vmovups %xmm11,(%r8,%r9,2)
DB 196,1,120,17,84,72,16 ; vmovups %xmm10,0x10(%r8,%r9,2)
DB 196,1,120,17,76,72,32 ; vmovups %xmm9,0x20(%r8,%r9,2)
@@ -8124,31 +8856,31 @@ _sk_store_u16_be_avx LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,1,121,214,28,72 ; vmovq %xmm11,(%r8,%r9,2)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,240 ; je 4424 <_sk_store_u16_be_avx+0xfe>
+ DB 116,240 ; je 4c1c <_sk_store_u16_be_avx+0xfe>
DB 196,1,121,23,92,72,8 ; vmovhpd %xmm11,0x8(%r8,%r9,2)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,227 ; jb 4424 <_sk_store_u16_be_avx+0xfe>
+ DB 114,227 ; jb 4c1c <_sk_store_u16_be_avx+0xfe>
DB 196,1,121,214,84,72,16 ; vmovq %xmm10,0x10(%r8,%r9,2)
- DB 116,218 ; je 4424 <_sk_store_u16_be_avx+0xfe>
+ DB 116,218 ; je 4c1c <_sk_store_u16_be_avx+0xfe>
DB 196,1,121,23,84,72,24 ; vmovhpd %xmm10,0x18(%r8,%r9,2)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,205 ; jb 4424 <_sk_store_u16_be_avx+0xfe>
+ DB 114,205 ; jb 4c1c <_sk_store_u16_be_avx+0xfe>
DB 196,1,121,214,76,72,32 ; vmovq %xmm9,0x20(%r8,%r9,2)
- DB 116,196 ; je 4424 <_sk_store_u16_be_avx+0xfe>
+ DB 116,196 ; je 4c1c <_sk_store_u16_be_avx+0xfe>
DB 196,1,121,23,76,72,40 ; vmovhpd %xmm9,0x28(%r8,%r9,2)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,183 ; jb 4424 <_sk_store_u16_be_avx+0xfe>
+ DB 114,183 ; jb 4c1c <_sk_store_u16_be_avx+0xfe>
DB 196,1,121,214,68,72,48 ; vmovq %xmm8,0x30(%r8,%r9,2)
- DB 235,174 ; jmp 4424 <_sk_store_u16_be_avx+0xfe>
+ DB 235,174 ; jmp 4c1c <_sk_store_u16_be_avx+0xfe>
PUBLIC _sk_load_f32_avx
_sk_load_f32_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 119,110 ; ja 44ec <_sk_load_f32_avx+0x76>
+ DB 119,110 ; ja 4ce4 <_sk_load_f32_avx+0x76>
DB 76,139,0 ; mov (%rax),%r8
DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9
- DB 76,141,21,132,0,0,0 ; lea 0x84(%rip),%r10 # 4514 <_sk_load_f32_avx+0x9e>
+ DB 76,141,21,132,0,0,0 ; lea 0x84(%rip),%r10 # 4d0c <_sk_load_f32_avx+0x9e>
DB 73,99,4,138 ; movslq (%r10,%rcx,4),%rax
DB 76,1,208 ; add %r10,%rax
DB 255,224 ; jmpq *%rax
@@ -8205,7 +8937,7 @@ _sk_store_f32_avx LABEL PROC
DB 196,65,37,20,196 ; vunpcklpd %ymm12,%ymm11,%ymm8
DB 196,65,37,21,220 ; vunpckhpd %ymm12,%ymm11,%ymm11
DB 72,133,201 ; test %rcx,%rcx
- DB 117,55 ; jne 45a1 <_sk_store_f32_avx+0x6d>
+ DB 117,55 ; jne 4d99 <_sk_store_f32_avx+0x6d>
DB 196,67,45,24,225,1 ; vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
DB 196,67,61,24,235,1 ; vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
DB 196,67,45,6,201,49 ; vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
@@ -8218,22 +8950,22 @@ _sk_store_f32_avx LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,65,121,17,20,128 ; vmovupd %xmm10,(%r8,%rax,4)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,240 ; je 459d <_sk_store_f32_avx+0x69>
+ DB 116,240 ; je 4d95 <_sk_store_f32_avx+0x69>
DB 196,65,121,17,76,128,16 ; vmovupd %xmm9,0x10(%r8,%rax,4)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,227 ; jb 459d <_sk_store_f32_avx+0x69>
+ DB 114,227 ; jb 4d95 <_sk_store_f32_avx+0x69>
DB 196,65,121,17,68,128,32 ; vmovupd %xmm8,0x20(%r8,%rax,4)
- DB 116,218 ; je 459d <_sk_store_f32_avx+0x69>
+ DB 116,218 ; je 4d95 <_sk_store_f32_avx+0x69>
DB 196,65,121,17,92,128,48 ; vmovupd %xmm11,0x30(%r8,%rax,4)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,205 ; jb 459d <_sk_store_f32_avx+0x69>
+ DB 114,205 ; jb 4d95 <_sk_store_f32_avx+0x69>
DB 196,67,125,25,84,128,64,1 ; vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4)
- DB 116,195 ; je 459d <_sk_store_f32_avx+0x69>
+ DB 116,195 ; je 4d95 <_sk_store_f32_avx+0x69>
DB 196,67,125,25,76,128,80,1 ; vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,181 ; jb 459d <_sk_store_f32_avx+0x69>
+ DB 114,181 ; jb 4d95 <_sk_store_f32_avx+0x69>
DB 196,67,125,25,68,128,96,1 ; vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4)
- DB 235,171 ; jmp 459d <_sk_store_f32_avx+0x69>
+ DB 235,171 ; jmp 4d95 <_sk_store_f32_avx+0x69>
PUBLIC _sk_clamp_x_avx
_sk_clamp_x_avx LABEL PROC
@@ -8537,7 +9269,7 @@ _sk_linear_gradient_avx LABEL PROC
DB 196,226,125,24,88,28 ; vbroadcastss 0x1c(%rax),%ymm3
DB 76,139,0 ; mov (%rax),%r8
DB 77,133,192 ; test %r8,%r8
- DB 15,132,146,0,0,0 ; je 4b55 <_sk_linear_gradient_avx+0xb8>
+ DB 15,132,146,0,0,0 ; je 534d <_sk_linear_gradient_avx+0xb8>
DB 72,139,64,8 ; mov 0x8(%rax),%rax
DB 72,131,192,32 ; add $0x20,%rax
DB 196,65,28,87,228 ; vxorps %ymm12,%ymm12,%ymm12
@@ -8564,8 +9296,8 @@ _sk_linear_gradient_avx LABEL PROC
DB 196,227,13,74,219,208 ; vblendvps %ymm13,%ymm3,%ymm14,%ymm3
DB 72,131,192,36 ; add $0x24,%rax
DB 73,255,200 ; dec %r8
- DB 117,140 ; jne 4adf <_sk_linear_gradient_avx+0x42>
- DB 235,20 ; jmp 4b69 <_sk_linear_gradient_avx+0xcc>
+ DB 117,140 ; jne 52d7 <_sk_linear_gradient_avx+0x42>
+ DB 235,20 ; jmp 5361 <_sk_linear_gradient_avx+0xcc>
DB 196,65,36,87,219 ; vxorps %ymm11,%ymm11,%ymm11
DB 196,65,44,87,210 ; vxorps %ymm10,%ymm10,%ymm10
DB 196,65,52,87,201 ; vxorps %ymm9,%ymm9,%ymm9
@@ -11142,6 +11874,392 @@ _sk_table_a_sse41 LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
+PUBLIC _sk_parametric_r_sse41
+_sk_parametric_r_sse41 LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,68,15,16,64,16 ; movss 0x10(%rax),%xmm8
+ DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
+ DB 243,68,15,16,72,12 ; movss 0xc(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 68,15,89,200 ; mulps %xmm0,%xmm9
+ DB 243,68,15,16,80,4 ; movss 0x4(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 68,15,89,208 ; mulps %xmm0,%xmm10
+ DB 65,15,194,192,2 ; cmpleps %xmm8,%xmm0
+ DB 243,68,15,16,64,24 ; movss 0x18(%rax),%xmm8
+ DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
+ DB 69,15,88,200 ; addps %xmm8,%xmm9
+ DB 243,68,15,16,0 ; movss (%rax),%xmm8
+ DB 243,68,15,16,88,8 ; movss 0x8(%rax),%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,88,211 ; addps %xmm11,%xmm10
+ DB 69,15,91,218 ; cvtdq2ps %xmm10,%xmm11
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,89,227 ; mulps %xmm11,%xmm12
+ DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 102,69,15,112,219,0 ; pshufd $0x0,%xmm11,%xmm11
+ DB 102,69,15,219,218 ; pand %xmm10,%xmm11
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 102,69,15,112,210,0 ; pshufd $0x0,%xmm10,%xmm10
+ DB 102,69,15,235,211 ; por %xmm11,%xmm10
+ DB 185,42,145,49,64 ; mov $0x4031912a,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,88,220 ; addps %xmm12,%xmm11
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,89,226 ; mulps %xmm10,%xmm12
+ DB 69,15,92,220 ; subps %xmm12,%xmm11
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,88,234 ; addps %xmm10,%xmm13
+ DB 69,15,94,229 ; divps %xmm13,%xmm12
+ DB 69,15,92,220 ; subps %xmm12,%xmm11
+ DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
+ DB 69,15,89,195 ; mulps %xmm11,%xmm8
+ DB 102,69,15,58,8,216,1 ; roundps $0x1,%xmm8,%xmm11
+ DB 185,0,0,0,75 ; mov $0x4b000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,224 ; addps %xmm8,%xmm12
+ DB 69,15,92,195 ; subps %xmm11,%xmm8
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,89,216 ; mulps %xmm8,%xmm11
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,92,232 ; subps %xmm8,%xmm13
+ DB 69,15,94,221 ; divps %xmm13,%xmm11
+ DB 69,15,88,220 ; addps %xmm12,%xmm11
+ DB 69,15,89,218 ; mulps %xmm10,%xmm11
+ DB 102,69,15,91,211 ; cvtps2dq %xmm11,%xmm10
+ DB 243,68,15,16,64,20 ; movss 0x14(%rax),%xmm8
+ DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
+ DB 69,15,88,194 ; addps %xmm10,%xmm8
+ DB 102,69,15,56,20,193 ; blendvps %xmm0,%xmm9,%xmm8
+ DB 15,87,192 ; xorps %xmm0,%xmm0
+ DB 68,15,95,192 ; maxps %xmm0,%xmm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 102,15,110,192 ; movd %eax,%xmm0
+ DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0
+ DB 68,15,93,192 ; minps %xmm0,%xmm8
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 65,15,40,192 ; movaps %xmm8,%xmm0
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_g_sse41
+_sk_parametric_g_sse41 LABEL PROC
+ DB 68,15,40,192 ; movaps %xmm0,%xmm8
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,68,15,16,80,16 ; movss 0x10(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 243,68,15,16,72,12 ; movss 0xc(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 68,15,89,201 ; mulps %xmm1,%xmm9
+ DB 243,68,15,16,88,4 ; movss 0x4(%rax),%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 68,15,89,217 ; mulps %xmm1,%xmm11
+ DB 15,40,193 ; movaps %xmm1,%xmm0
+ DB 65,15,194,194,2 ; cmpleps %xmm10,%xmm0
+ DB 243,15,16,72,24 ; movss 0x18(%rax),%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 68,15,88,201 ; addps %xmm1,%xmm9
+ DB 243,68,15,16,32 ; movss (%rax),%xmm12
+ DB 243,15,16,72,8 ; movss 0x8(%rax),%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 68,15,88,217 ; addps %xmm1,%xmm11
+ DB 69,15,91,211 ; cvtdq2ps %xmm11,%xmm10
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,234 ; mulps %xmm10,%xmm13
+ DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 68,15,92,233 ; subps %xmm1,%xmm13
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 102,68,15,112,209,0 ; pshufd $0x0,%xmm1,%xmm10
+ DB 102,69,15,219,211 ; pand %xmm11,%xmm10
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 102,68,15,112,217,0 ; pshufd $0x0,%xmm1,%xmm11
+ DB 102,69,15,235,218 ; por %xmm10,%xmm11
+ DB 185,42,145,49,64 ; mov $0x4031912a,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,88,213 ; addps %xmm13,%xmm10
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 65,15,89,203 ; mulps %xmm11,%xmm1
+ DB 68,15,92,209 ; subps %xmm1,%xmm10
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 65,15,88,203 ; addps %xmm11,%xmm1
+ DB 68,15,94,233 ; divps %xmm1,%xmm13
+ DB 69,15,92,213 ; subps %xmm13,%xmm10
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,89,226 ; mulps %xmm10,%xmm12
+ DB 102,69,15,58,8,220,1 ; roundps $0x1,%xmm12,%xmm11
+ DB 185,0,0,0,75 ; mov $0x4b000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,88,236 ; addps %xmm12,%xmm13
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 65,15,89,204 ; mulps %xmm12,%xmm1
+ DB 68,15,92,233 ; subps %xmm1,%xmm13
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 65,15,92,204 ; subps %xmm12,%xmm1
+ DB 68,15,94,217 ; divps %xmm1,%xmm11
+ DB 69,15,88,221 ; addps %xmm13,%xmm11
+ DB 69,15,89,218 ; mulps %xmm10,%xmm11
+ DB 102,69,15,91,211 ; cvtps2dq %xmm11,%xmm10
+ DB 243,15,16,72,20 ; movss 0x14(%rax),%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 65,15,88,202 ; addps %xmm10,%xmm1
+ DB 102,65,15,56,20,201 ; blendvps %xmm0,%xmm9,%xmm1
+ DB 15,87,192 ; xorps %xmm0,%xmm0
+ DB 15,95,200 ; maxps %xmm0,%xmm1
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 102,15,110,192 ; movd %eax,%xmm0
+ DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0
+ DB 15,93,200 ; minps %xmm0,%xmm1
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 65,15,40,192 ; movaps %xmm8,%xmm0
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_b_sse41
+_sk_parametric_b_sse41 LABEL PROC
+ DB 68,15,40,192 ; movaps %xmm0,%xmm8
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,68,15,16,80,16 ; movss 0x10(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 243,68,15,16,72,12 ; movss 0xc(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 68,15,89,202 ; mulps %xmm2,%xmm9
+ DB 243,68,15,16,88,4 ; movss 0x4(%rax),%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 68,15,89,218 ; mulps %xmm2,%xmm11
+ DB 15,40,194 ; movaps %xmm2,%xmm0
+ DB 65,15,194,194,2 ; cmpleps %xmm10,%xmm0
+ DB 243,15,16,80,24 ; movss 0x18(%rax),%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 68,15,88,202 ; addps %xmm2,%xmm9
+ DB 243,68,15,16,32 ; movss (%rax),%xmm12
+ DB 243,15,16,80,8 ; movss 0x8(%rax),%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 68,15,88,218 ; addps %xmm2,%xmm11
+ DB 69,15,91,211 ; cvtdq2ps %xmm11,%xmm10
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,234 ; mulps %xmm10,%xmm13
+ DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx
+ DB 102,15,110,209 ; movd %ecx,%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 68,15,92,234 ; subps %xmm2,%xmm13
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,15,110,209 ; movd %ecx,%xmm2
+ DB 102,68,15,112,210,0 ; pshufd $0x0,%xmm2,%xmm10
+ DB 102,69,15,219,211 ; pand %xmm11,%xmm10
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,15,110,209 ; movd %ecx,%xmm2
+ DB 102,68,15,112,218,0 ; pshufd $0x0,%xmm2,%xmm11
+ DB 102,69,15,235,218 ; por %xmm10,%xmm11
+ DB 185,42,145,49,64 ; mov $0x4031912a,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,88,213 ; addps %xmm13,%xmm10
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,15,110,209 ; movd %ecx,%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 65,15,89,211 ; mulps %xmm11,%xmm2
+ DB 68,15,92,210 ; subps %xmm2,%xmm10
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,15,110,209 ; movd %ecx,%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 65,15,88,211 ; addps %xmm11,%xmm2
+ DB 68,15,94,234 ; divps %xmm2,%xmm13
+ DB 69,15,92,213 ; subps %xmm13,%xmm10
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,89,226 ; mulps %xmm10,%xmm12
+ DB 102,69,15,58,8,220,1 ; roundps $0x1,%xmm12,%xmm11
+ DB 185,0,0,0,75 ; mov $0x4b000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,88,236 ; addps %xmm12,%xmm13
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,15,110,209 ; movd %ecx,%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 65,15,89,212 ; mulps %xmm12,%xmm2
+ DB 68,15,92,234 ; subps %xmm2,%xmm13
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx
+ DB 102,15,110,209 ; movd %ecx,%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 65,15,92,212 ; subps %xmm12,%xmm2
+ DB 68,15,94,218 ; divps %xmm2,%xmm11
+ DB 69,15,88,221 ; addps %xmm13,%xmm11
+ DB 69,15,89,218 ; mulps %xmm10,%xmm11
+ DB 102,69,15,91,211 ; cvtps2dq %xmm11,%xmm10
+ DB 243,15,16,80,20 ; movss 0x14(%rax),%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 65,15,88,210 ; addps %xmm10,%xmm2
+ DB 102,65,15,56,20,209 ; blendvps %xmm0,%xmm9,%xmm2
+ DB 15,87,192 ; xorps %xmm0,%xmm0
+ DB 15,95,208 ; maxps %xmm0,%xmm2
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 102,15,110,192 ; movd %eax,%xmm0
+ DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0
+ DB 15,93,208 ; minps %xmm0,%xmm2
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 65,15,40,192 ; movaps %xmm8,%xmm0
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_a_sse41
+_sk_parametric_a_sse41 LABEL PROC
+ DB 68,15,40,192 ; movaps %xmm0,%xmm8
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,68,15,16,80,16 ; movss 0x10(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 243,68,15,16,72,12 ; movss 0xc(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 68,15,89,203 ; mulps %xmm3,%xmm9
+ DB 243,68,15,16,88,4 ; movss 0x4(%rax),%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 68,15,89,219 ; mulps %xmm3,%xmm11
+ DB 15,40,195 ; movaps %xmm3,%xmm0
+ DB 65,15,194,194,2 ; cmpleps %xmm10,%xmm0
+ DB 243,15,16,88,24 ; movss 0x18(%rax),%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 68,15,88,203 ; addps %xmm3,%xmm9
+ DB 243,68,15,16,32 ; movss (%rax),%xmm12
+ DB 243,15,16,88,8 ; movss 0x8(%rax),%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 68,15,88,219 ; addps %xmm3,%xmm11
+ DB 69,15,91,211 ; cvtdq2ps %xmm11,%xmm10
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,234 ; mulps %xmm10,%xmm13
+ DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 68,15,92,235 ; subps %xmm3,%xmm13
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 102,68,15,112,211,0 ; pshufd $0x0,%xmm3,%xmm10
+ DB 102,69,15,219,211 ; pand %xmm11,%xmm10
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 102,68,15,112,219,0 ; pshufd $0x0,%xmm3,%xmm11
+ DB 102,69,15,235,218 ; por %xmm10,%xmm11
+ DB 185,42,145,49,64 ; mov $0x4031912a,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,88,213 ; addps %xmm13,%xmm10
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 65,15,89,219 ; mulps %xmm11,%xmm3
+ DB 68,15,92,211 ; subps %xmm3,%xmm10
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 65,15,88,219 ; addps %xmm11,%xmm3
+ DB 68,15,94,235 ; divps %xmm3,%xmm13
+ DB 69,15,92,213 ; subps %xmm13,%xmm10
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,89,226 ; mulps %xmm10,%xmm12
+ DB 102,69,15,58,8,220,1 ; roundps $0x1,%xmm12,%xmm11
+ DB 185,0,0,0,75 ; mov $0x4b000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,88,236 ; addps %xmm12,%xmm13
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 65,15,89,220 ; mulps %xmm12,%xmm3
+ DB 68,15,92,235 ; subps %xmm3,%xmm13
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 65,15,92,220 ; subps %xmm12,%xmm3
+ DB 68,15,94,219 ; divps %xmm3,%xmm11
+ DB 69,15,88,221 ; addps %xmm13,%xmm11
+ DB 69,15,89,218 ; mulps %xmm10,%xmm11
+ DB 102,69,15,91,211 ; cvtps2dq %xmm11,%xmm10
+ DB 243,15,16,88,20 ; movss 0x14(%rax),%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 65,15,88,218 ; addps %xmm10,%xmm3
+ DB 102,65,15,56,20,217 ; blendvps %xmm0,%xmm9,%xmm3
+ DB 15,87,192 ; xorps %xmm0,%xmm0
+ DB 15,95,216 ; maxps %xmm0,%xmm3
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 102,15,110,192 ; movd %eax,%xmm0
+ DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0
+ DB 15,93,216 ; minps %xmm0,%xmm3
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 65,15,40,192 ; movaps %xmm8,%xmm0
+ DB 255,224 ; jmpq *%rax
+
PUBLIC _sk_load_a8_sse41
_sk_load_a8_sse41 LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -11266,9 +12384,9 @@ _sk_gather_i8_sse41 LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 73,137,192 ; mov %rax,%r8
DB 77,133,192 ; test %r8,%r8
- DB 116,5 ; je 210a <_sk_gather_i8_sse41+0xf>
+ DB 116,5 ; je 27a3 <_sk_gather_i8_sse41+0xf>
DB 76,137,192 ; mov %r8,%rax
- DB 235,2 ; jmp 210c <_sk_gather_i8_sse41+0x11>
+ DB 235,2 ; jmp 27a5 <_sk_gather_i8_sse41+0x11>
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 243,15,91,201 ; cvttps2dq %xmm1,%xmm1
@@ -12362,7 +13480,7 @@ _sk_linear_gradient_sse41 LABEL PROC
DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
DB 72,139,8 ; mov (%rax),%rcx
DB 72,133,201 ; test %rcx,%rcx
- DB 15,132,4,1,0,0 ; je 3338 <_sk_linear_gradient_sse41+0x13e>
+ DB 15,132,4,1,0,0 ; je 39d1 <_sk_linear_gradient_sse41+0x13e>
DB 72,131,236,88 ; sub $0x58,%rsp
DB 15,41,36,36 ; movaps %xmm4,(%rsp)
DB 15,41,108,36,16 ; movaps %xmm5,0x10(%rsp)
@@ -12413,13 +13531,13 @@ _sk_linear_gradient_sse41 LABEL PROC
DB 15,40,196 ; movaps %xmm4,%xmm0
DB 72,131,192,36 ; add $0x24,%rax
DB 72,255,201 ; dec %rcx
- DB 15,133,65,255,255,255 ; jne 3260 <_sk_linear_gradient_sse41+0x66>
+ DB 15,133,65,255,255,255 ; jne 38f9 <_sk_linear_gradient_sse41+0x66>
DB 15,40,124,36,48 ; movaps 0x30(%rsp),%xmm7
DB 15,40,116,36,32 ; movaps 0x20(%rsp),%xmm6
DB 15,40,108,36,16 ; movaps 0x10(%rsp),%xmm5
DB 15,40,36,36 ; movaps (%rsp),%xmm4
DB 72,131,196,88 ; add $0x58,%rsp
- DB 235,13 ; jmp 3345 <_sk_linear_gradient_sse41+0x14b>
+ DB 235,13 ; jmp 39de <_sk_linear_gradient_sse41+0x14b>
DB 15,87,201 ; xorps %xmm1,%xmm1
DB 15,87,210 ; xorps %xmm2,%xmm2
DB 15,87,219 ; xorps %xmm3,%xmm3
@@ -15098,6 +16216,414 @@ _sk_table_a_sse2 LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
+PUBLIC _sk_parametric_r_sse2
+_sk_parametric_r_sse2 LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,68,15,16,72,16 ; movss 0x10(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 243,68,15,16,64,12 ; movss 0xc(%rax),%xmm8
+ DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
+ DB 68,15,89,192 ; mulps %xmm0,%xmm8
+ DB 243,68,15,16,80,4 ; movss 0x4(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 68,15,89,208 ; mulps %xmm0,%xmm10
+ DB 65,15,194,193,2 ; cmpleps %xmm9,%xmm0
+ DB 243,68,15,16,72,24 ; movss 0x18(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 69,15,88,193 ; addps %xmm9,%xmm8
+ DB 243,68,15,16,8 ; movss (%rax),%xmm9
+ DB 243,68,15,16,88,8 ; movss 0x8(%rax),%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,88,211 ; addps %xmm11,%xmm10
+ DB 69,15,91,218 ; cvtdq2ps %xmm10,%xmm11
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,235 ; mulps %xmm11,%xmm13
+ DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,92,235 ; subps %xmm11,%xmm13
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 102,69,15,112,227,0 ; pshufd $0x0,%xmm11,%xmm12
+ DB 102,69,15,219,226 ; pand %xmm10,%xmm12
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 102,69,15,112,218,0 ; pshufd $0x0,%xmm10,%xmm11
+ DB 102,69,15,235,220 ; por %xmm12,%xmm11
+ DB 185,42,145,49,64 ; mov $0x4031912a,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,229 ; addps %xmm13,%xmm12
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,89,211 ; mulps %xmm11,%xmm10
+ DB 69,15,92,226 ; subps %xmm10,%xmm12
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 185,0,0,128,63 ; mov $0x3f800000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,249 ; movd %ecx,%xmm15
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 69,15,88,243 ; addps %xmm11,%xmm14
+ DB 69,15,94,238 ; divps %xmm14,%xmm13
+ DB 69,15,92,229 ; subps %xmm13,%xmm12
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 69,15,89,204 ; mulps %xmm12,%xmm9
+ DB 243,69,15,91,217 ; cvttps2dq %xmm9,%xmm11
+ DB 69,15,91,219 ; cvtdq2ps %xmm11,%xmm11
+ DB 69,15,40,225 ; movaps %xmm9,%xmm12
+ DB 69,15,198,255,0 ; shufps $0x0,%xmm15,%xmm15
+ DB 69,15,88,249 ; addps %xmm9,%xmm15
+ DB 69,15,40,233 ; movaps %xmm9,%xmm13
+ DB 69,15,194,235,1 ; cmpltps %xmm11,%xmm13
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,84,234 ; andps %xmm10,%xmm13
+ DB 69,15,87,201 ; xorps %xmm9,%xmm9
+ DB 69,15,92,221 ; subps %xmm13,%xmm11
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 102,69,15,110,216 ; movd %r8d,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,236 ; mulps %xmm12,%xmm13
+ DB 69,15,92,253 ; subps %xmm13,%xmm15
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 69,15,92,244 ; subps %xmm12,%xmm14
+ DB 69,15,94,238 ; divps %xmm14,%xmm13
+ DB 69,15,88,239 ; addps %xmm15,%xmm13
+ DB 69,15,89,235 ; mulps %xmm11,%xmm13
+ DB 102,69,15,91,221 ; cvtps2dq %xmm13,%xmm11
+ DB 243,68,15,16,96,20 ; movss 0x14(%rax),%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,227 ; addps %xmm11,%xmm12
+ DB 68,15,84,192 ; andps %xmm0,%xmm8
+ DB 65,15,85,196 ; andnps %xmm12,%xmm0
+ DB 65,15,86,192 ; orps %xmm8,%xmm0
+ DB 65,15,95,193 ; maxps %xmm9,%xmm0
+ DB 65,15,93,194 ; minps %xmm10,%xmm0
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_g_sse2
+_sk_parametric_g_sse2 LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,68,15,16,72,16 ; movss 0x10(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 243,68,15,16,64,12 ; movss 0xc(%rax),%xmm8
+ DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
+ DB 68,15,89,193 ; mulps %xmm1,%xmm8
+ DB 243,68,15,16,80,4 ; movss 0x4(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 68,15,89,209 ; mulps %xmm1,%xmm10
+ DB 65,15,194,201,2 ; cmpleps %xmm9,%xmm1
+ DB 243,68,15,16,72,24 ; movss 0x18(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 69,15,88,193 ; addps %xmm9,%xmm8
+ DB 243,68,15,16,8 ; movss (%rax),%xmm9
+ DB 243,68,15,16,88,8 ; movss 0x8(%rax),%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,88,211 ; addps %xmm11,%xmm10
+ DB 69,15,91,218 ; cvtdq2ps %xmm10,%xmm11
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,235 ; mulps %xmm11,%xmm13
+ DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,92,235 ; subps %xmm11,%xmm13
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 102,69,15,112,227,0 ; pshufd $0x0,%xmm11,%xmm12
+ DB 102,69,15,219,226 ; pand %xmm10,%xmm12
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 102,69,15,112,218,0 ; pshufd $0x0,%xmm10,%xmm11
+ DB 102,69,15,235,220 ; por %xmm12,%xmm11
+ DB 185,42,145,49,64 ; mov $0x4031912a,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,229 ; addps %xmm13,%xmm12
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,89,211 ; mulps %xmm11,%xmm10
+ DB 69,15,92,226 ; subps %xmm10,%xmm12
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 185,0,0,128,63 ; mov $0x3f800000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,249 ; movd %ecx,%xmm15
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 69,15,88,243 ; addps %xmm11,%xmm14
+ DB 69,15,94,238 ; divps %xmm14,%xmm13
+ DB 69,15,92,229 ; subps %xmm13,%xmm12
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 69,15,89,204 ; mulps %xmm12,%xmm9
+ DB 243,69,15,91,217 ; cvttps2dq %xmm9,%xmm11
+ DB 69,15,91,219 ; cvtdq2ps %xmm11,%xmm11
+ DB 69,15,40,225 ; movaps %xmm9,%xmm12
+ DB 69,15,198,255,0 ; shufps $0x0,%xmm15,%xmm15
+ DB 69,15,88,249 ; addps %xmm9,%xmm15
+ DB 69,15,40,233 ; movaps %xmm9,%xmm13
+ DB 69,15,194,235,1 ; cmpltps %xmm11,%xmm13
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,84,234 ; andps %xmm10,%xmm13
+ DB 69,15,87,201 ; xorps %xmm9,%xmm9
+ DB 69,15,92,221 ; subps %xmm13,%xmm11
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 102,69,15,110,216 ; movd %r8d,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,236 ; mulps %xmm12,%xmm13
+ DB 69,15,92,253 ; subps %xmm13,%xmm15
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 69,15,92,244 ; subps %xmm12,%xmm14
+ DB 69,15,94,238 ; divps %xmm14,%xmm13
+ DB 69,15,88,239 ; addps %xmm15,%xmm13
+ DB 69,15,89,235 ; mulps %xmm11,%xmm13
+ DB 102,69,15,91,221 ; cvtps2dq %xmm13,%xmm11
+ DB 243,68,15,16,96,20 ; movss 0x14(%rax),%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,227 ; addps %xmm11,%xmm12
+ DB 68,15,84,193 ; andps %xmm1,%xmm8
+ DB 65,15,85,204 ; andnps %xmm12,%xmm1
+ DB 65,15,86,200 ; orps %xmm8,%xmm1
+ DB 65,15,95,201 ; maxps %xmm9,%xmm1
+ DB 65,15,93,202 ; minps %xmm10,%xmm1
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_b_sse2
+_sk_parametric_b_sse2 LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,68,15,16,72,16 ; movss 0x10(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 243,68,15,16,64,12 ; movss 0xc(%rax),%xmm8
+ DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
+ DB 68,15,89,194 ; mulps %xmm2,%xmm8
+ DB 243,68,15,16,80,4 ; movss 0x4(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 68,15,89,210 ; mulps %xmm2,%xmm10
+ DB 65,15,194,209,2 ; cmpleps %xmm9,%xmm2
+ DB 243,68,15,16,72,24 ; movss 0x18(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 69,15,88,193 ; addps %xmm9,%xmm8
+ DB 243,68,15,16,8 ; movss (%rax),%xmm9
+ DB 243,68,15,16,88,8 ; movss 0x8(%rax),%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,88,211 ; addps %xmm11,%xmm10
+ DB 69,15,91,218 ; cvtdq2ps %xmm10,%xmm11
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,235 ; mulps %xmm11,%xmm13
+ DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,92,235 ; subps %xmm11,%xmm13
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 102,69,15,112,227,0 ; pshufd $0x0,%xmm11,%xmm12
+ DB 102,69,15,219,226 ; pand %xmm10,%xmm12
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 102,69,15,112,218,0 ; pshufd $0x0,%xmm10,%xmm11
+ DB 102,69,15,235,220 ; por %xmm12,%xmm11
+ DB 185,42,145,49,64 ; mov $0x4031912a,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,229 ; addps %xmm13,%xmm12
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,89,211 ; mulps %xmm11,%xmm10
+ DB 69,15,92,226 ; subps %xmm10,%xmm12
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 185,0,0,128,63 ; mov $0x3f800000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,249 ; movd %ecx,%xmm15
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 69,15,88,243 ; addps %xmm11,%xmm14
+ DB 69,15,94,238 ; divps %xmm14,%xmm13
+ DB 69,15,92,229 ; subps %xmm13,%xmm12
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 69,15,89,204 ; mulps %xmm12,%xmm9
+ DB 243,69,15,91,217 ; cvttps2dq %xmm9,%xmm11
+ DB 69,15,91,219 ; cvtdq2ps %xmm11,%xmm11
+ DB 69,15,40,225 ; movaps %xmm9,%xmm12
+ DB 69,15,198,255,0 ; shufps $0x0,%xmm15,%xmm15
+ DB 69,15,88,249 ; addps %xmm9,%xmm15
+ DB 69,15,40,233 ; movaps %xmm9,%xmm13
+ DB 69,15,194,235,1 ; cmpltps %xmm11,%xmm13
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,84,234 ; andps %xmm10,%xmm13
+ DB 69,15,87,201 ; xorps %xmm9,%xmm9
+ DB 69,15,92,221 ; subps %xmm13,%xmm11
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 102,69,15,110,216 ; movd %r8d,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,236 ; mulps %xmm12,%xmm13
+ DB 69,15,92,253 ; subps %xmm13,%xmm15
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 69,15,92,244 ; subps %xmm12,%xmm14
+ DB 69,15,94,238 ; divps %xmm14,%xmm13
+ DB 69,15,88,239 ; addps %xmm15,%xmm13
+ DB 69,15,89,235 ; mulps %xmm11,%xmm13
+ DB 102,69,15,91,221 ; cvtps2dq %xmm13,%xmm11
+ DB 243,68,15,16,96,20 ; movss 0x14(%rax),%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,227 ; addps %xmm11,%xmm12
+ DB 68,15,84,194 ; andps %xmm2,%xmm8
+ DB 65,15,85,212 ; andnps %xmm12,%xmm2
+ DB 65,15,86,208 ; orps %xmm8,%xmm2
+ DB 65,15,95,209 ; maxps %xmm9,%xmm2
+ DB 65,15,93,210 ; minps %xmm10,%xmm2
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_a_sse2
+_sk_parametric_a_sse2 LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,68,15,16,72,16 ; movss 0x10(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 243,68,15,16,64,12 ; movss 0xc(%rax),%xmm8
+ DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
+ DB 68,15,89,195 ; mulps %xmm3,%xmm8
+ DB 243,68,15,16,80,4 ; movss 0x4(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 68,15,89,211 ; mulps %xmm3,%xmm10
+ DB 65,15,194,217,2 ; cmpleps %xmm9,%xmm3
+ DB 243,68,15,16,72,24 ; movss 0x18(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 69,15,88,193 ; addps %xmm9,%xmm8
+ DB 243,68,15,16,8 ; movss (%rax),%xmm9
+ DB 243,68,15,16,88,8 ; movss 0x8(%rax),%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,88,211 ; addps %xmm11,%xmm10
+ DB 69,15,91,218 ; cvtdq2ps %xmm10,%xmm11
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,235 ; mulps %xmm11,%xmm13
+ DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,92,235 ; subps %xmm11,%xmm13
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 102,69,15,112,227,0 ; pshufd $0x0,%xmm11,%xmm12
+ DB 102,69,15,219,226 ; pand %xmm10,%xmm12
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 102,69,15,112,218,0 ; pshufd $0x0,%xmm10,%xmm11
+ DB 102,69,15,235,220 ; por %xmm12,%xmm11
+ DB 185,42,145,49,64 ; mov $0x4031912a,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,229 ; addps %xmm13,%xmm12
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,89,211 ; mulps %xmm11,%xmm10
+ DB 69,15,92,226 ; subps %xmm10,%xmm12
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 185,0,0,128,63 ; mov $0x3f800000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,249 ; movd %ecx,%xmm15
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 69,15,88,243 ; addps %xmm11,%xmm14
+ DB 69,15,94,238 ; divps %xmm14,%xmm13
+ DB 69,15,92,229 ; subps %xmm13,%xmm12
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 69,15,89,204 ; mulps %xmm12,%xmm9
+ DB 243,69,15,91,217 ; cvttps2dq %xmm9,%xmm11
+ DB 69,15,91,219 ; cvtdq2ps %xmm11,%xmm11
+ DB 69,15,40,225 ; movaps %xmm9,%xmm12
+ DB 69,15,198,255,0 ; shufps $0x0,%xmm15,%xmm15
+ DB 69,15,88,249 ; addps %xmm9,%xmm15
+ DB 69,15,40,233 ; movaps %xmm9,%xmm13
+ DB 69,15,194,235,1 ; cmpltps %xmm11,%xmm13
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,84,234 ; andps %xmm10,%xmm13
+ DB 69,15,87,201 ; xorps %xmm9,%xmm9
+ DB 69,15,92,221 ; subps %xmm13,%xmm11
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 102,69,15,110,216 ; movd %r8d,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,236 ; mulps %xmm12,%xmm13
+ DB 69,15,92,253 ; subps %xmm13,%xmm15
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 69,15,92,244 ; subps %xmm12,%xmm14
+ DB 69,15,94,238 ; divps %xmm14,%xmm13
+ DB 69,15,88,239 ; addps %xmm15,%xmm13
+ DB 69,15,89,235 ; mulps %xmm11,%xmm13
+ DB 102,69,15,91,221 ; cvtps2dq %xmm13,%xmm11
+ DB 243,68,15,16,96,20 ; movss 0x14(%rax),%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,227 ; addps %xmm11,%xmm12
+ DB 68,15,84,195 ; andps %xmm3,%xmm8
+ DB 65,15,85,220 ; andnps %xmm12,%xmm3
+ DB 65,15,86,216 ; orps %xmm8,%xmm3
+ DB 65,15,95,217 ; maxps %xmm9,%xmm3
+ DB 65,15,93,218 ; minps %xmm10,%xmm3
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
PUBLIC _sk_load_a8_sse2
_sk_load_a8_sse2 LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -15254,9 +16780,9 @@ _sk_gather_i8_sse2 LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 73,137,192 ; mov %rax,%r8
DB 77,133,192 ; test %r8,%r8
- DB 116,5 ; je 2285 <_sk_gather_i8_sse2+0xf>
+ DB 116,5 ; je 29b9 <_sk_gather_i8_sse2+0xf>
DB 76,137,192 ; mov %r8,%rax
- DB 235,2 ; jmp 2287 <_sk_gather_i8_sse2+0x11>
+ DB 235,2 ; jmp 29bb <_sk_gather_i8_sse2+0x11>
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 243,15,91,201 ; cvttps2dq %xmm1,%xmm1
@@ -16453,7 +17979,7 @@ _sk_linear_gradient_sse2 LABEL PROC
DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
DB 72,139,8 ; mov (%rax),%rcx
DB 72,133,201 ; test %rcx,%rcx
- DB 15,132,15,1,0,0 ; je 366f <_sk_linear_gradient_sse2+0x149>
+ DB 15,132,15,1,0,0 ; je 3da3 <_sk_linear_gradient_sse2+0x149>
DB 72,139,64,8 ; mov 0x8(%rax),%rax
DB 72,131,192,32 ; add $0x20,%rax
DB 69,15,87,192 ; xorps %xmm8,%xmm8
@@ -16514,8 +18040,8 @@ _sk_linear_gradient_sse2 LABEL PROC
DB 69,15,86,231 ; orps %xmm15,%xmm12
DB 72,131,192,36 ; add $0x24,%rax
DB 72,255,201 ; dec %rcx
- DB 15,133,8,255,255,255 ; jne 3575 <_sk_linear_gradient_sse2+0x4f>
- DB 235,13 ; jmp 367c <_sk_linear_gradient_sse2+0x156>
+ DB 15,133,8,255,255,255 ; jne 3ca9 <_sk_linear_gradient_sse2+0x4f>
+ DB 235,13 ; jmp 3db0 <_sk_linear_gradient_sse2+0x156>
DB 15,87,201 ; xorps %xmm1,%xmm1
DB 15,87,210 ; xorps %xmm2,%xmm2
DB 15,87,219 ; xorps %xmm3,%xmm3
diff --git a/src/jumper/SkJumper_stages.cpp b/src/jumper/SkJumper_stages.cpp
index b366cf5b12..a7f6d8036e 100644
--- a/src/jumper/SkJumper_stages.cpp
+++ b/src/jumper/SkJumper_stages.cpp
@@ -671,6 +671,40 @@ STAGE(table_g) { g = table(g, ctx); }
STAGE(table_b) { b = table(b, ctx); }
STAGE(table_a) { a = table(a, ctx); }
+// See http://www.machinedlearnings.com/2011/06/fast-approximate-logarithm-exponential.html.
+SI F approx_log2(F x) {
+ // e is a fair approximation of log2(x) in its own right...
+ F e = cast(bit_cast<U32>(x)) * C(1.0f / (1<<23)) - 127.0_f;
+
+ // ... but using the mantissa to refine its error is _much_ better.
+ F m = bit_cast<F>((bit_cast<U32>(x) & 0x007fffff_i) | 0x3f000000_i);
+ return e
+ + 2.774485010_f
+ - 1.498030302_f * m
+ - 1.725879990_f / (0.3520887068_f + m);
+}
+SI F approx_pow2(F x) {
+ F f = fract(x);
+ return bit_cast<F>(round(C(1.0f * (1<<23)),
+ x + 121.2740575_f
+ - 1.490129070_f * f
+ + 27.72802330_f / (4.84252568_f - f)));
+}
+
+SI F approx_powf(F x, float g) {
+ return approx_pow2(approx_log2(x) * g);
+}
+
+SI F parametric(F v, const SkJumper_ParametricTransferFunction* ctx) {
+ F r = if_then_else(v <= ctx->D, mad(ctx->C, v, ctx->F)
+ , approx_powf(mad(ctx->A, v, ctx->B), ctx->G) + ctx->E);
+ return min(max(r, 0), 1.0_f); // Clamp to [0,1], with argument order mattering to handle NaN.
+}
+STAGE(parametric_r) { r = parametric(r, ctx); }
+STAGE(parametric_g) { g = parametric(g, ctx); }
+STAGE(parametric_b) { b = parametric(b, ctx); }
+STAGE(parametric_a) { a = parametric(a, ctx); }
+
STAGE(load_a8) {
auto ptr = *(const uint8_t**)ctx + x;
@@ -954,7 +988,6 @@ STAGE(save_xy) {
// Whether bilinear or bicubic, all sample points are at the same fractional offset (fx,fy).
// They're either the 4 corners of a logical 1x1 pixel or the 16 corners of a 3x3 grid
// surrounding (x,y) at (0.5,0.5) off-center.
- auto fract = [](F v) { return v - floor_(v); };
F fx = fract(r + 0.5_f),
fy = fract(g + 0.5_f);
diff --git a/src/jumper/SkJumper_vectors.h b/src/jumper/SkJumper_vectors.h
index f47dd115ef..590fe9c077 100644
--- a/src/jumper/SkJumper_vectors.h
+++ b/src/jumper/SkJumper_vectors.h
@@ -626,4 +626,6 @@ SI U16 bswap(U16 x) {
#endif
}
+SI F fract(F v) { return v - floor_(v); }
+
#endif//SkJumper_vectors_DEFINED
diff --git a/tests/ColorSpaceXformTest.cpp b/tests/ColorSpaceXformTest.cpp
index 81fdc37caf..83317d95e4 100644
--- a/tests/ColorSpaceXformTest.cpp
+++ b/tests/ColorSpaceXformTest.cpp
@@ -252,6 +252,9 @@ DEF_TEST(ColorSpaceXform_NonMatchingGamma, r) {
gammas->fType[0] = SkGammas::Type::kValue_Type;
gammas->fData[0].fValue = 1.2f;
+ // See ColorSpaceXform_TableGamma... we've decided to allow some tolerance
+ // for SkJumper's implementation of tables.
+ const int tolerance = 12;
gammas->fType[1] = SkGammas::Type::kTable_Type;
gammas->fData[1].fTable.fSize = tableSize;
gammas->fData[1].fTable.fOffset = 0;
@@ -260,7 +263,7 @@ DEF_TEST(ColorSpaceXform_NonMatchingGamma, r) {
gammas->fData[2].fParamOffset = sizeof(float) * tableSize;
test_identity_xform(r, gammas, true);
- test_identity_xform_A2B(r, kNonStandard_SkGammaNamed, gammas);
+ test_identity_xform_A2B(r, kNonStandard_SkGammaNamed, gammas, tolerance);
}
DEF_TEST(ColorSpaceXform_A2BCLUT, r) {