aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/jumper
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-10-20 15:50:12 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-10-20 20:10:35 +0000
commitf04ff7696d34d810a94b7fd98aa0006955f57fc0 (patch)
tree867984694273a3b24ca2bd8d8a7ca8610ea93a30 /src/jumper
parent1e6b36eb574d859c51968ff0c5bc3c17773857a1 (diff)
translate+scale -> scale+translate
This is a no-op refactor. It's just always surprised me that the matrix_scale_translate stage expects [tx ty sx sy], when scales precede the translates in the names and in both normal row-major and column-major matrix layouts. This switches to [sx sy tx ty], scale then translate. Change-Id: I2d88701121ae8013facd5a28bb0ff520211db5a6 Reviewed-on: https://skia-review.googlesource.com/62541 Reviewed-by: Mike Reed <reed@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src/jumper')
-rw-r--r--src/jumper/SkJumper_generated.S92
-rw-r--r--src/jumper/SkJumper_generated_win.S64
-rw-r--r--src/jumper/SkJumper_stages.cpp4
3 files changed, 81 insertions, 79 deletions
diff --git a/src/jumper/SkJumper_generated.S b/src/jumper/SkJumper_generated.S
index 42a5c717ab..c4292fbec9 100644
--- a/src/jumper/SkJumper_generated.S
+++ b/src/jumper/SkJumper_generated.S
@@ -6565,14 +6565,15 @@ FUNCTION(_sk_matrix_scale_translate_vfp4)
_sk_matrix_scale_translate_vfp4:
.long 0xe92d4800 // push {fp, lr}
.long 0xe8911008 // ldm r1, {r3, ip}
+ .long 0xe3a0200c // mov r2, #12
.long 0xe2811008 // add r1, r1, #8
- .long 0xe2832008 // add r2, r3, #8
.long 0xe1a0e003 // mov lr, r3
- .long 0xf4e22cbf // vld1.32 {d18[]-d19[]}, [r2 :32]
- .long 0xe283200c // add r2, r3, #12
- .long 0xf4ee0cbd // vld1.32 {d16[]-d17[]}, [lr :32]!
- .long 0xf2400c72 // vfma.f32 q8, q0, q9
+ .long 0xf4ee2cb2 // vld1.32 {d18[]-d19[]}, [lr :32], r2
+ .long 0xe2832004 // add r2, r3, #4
.long 0xf4e24cbf // vld1.32 {d20[]-d21[]}, [r2 :32]
+ .long 0xe2832008 // add r2, r3, #8
+ .long 0xf4e20cbf // vld1.32 {d16[]-d17[]}, [r2 :32]
+ .long 0xf2400c72 // vfma.f32 q8, q0, q9
.long 0xf4ee2cbf // vld1.32 {d18[]-d19[]}, [lr :32]
.long 0xf2422c74 // vfma.f32 q9, q1, q10
.long 0xf22001f0 // vorr q0, q8, q8
@@ -6915,6 +6916,7 @@ _sk_evenly_spaced_gradient_vfp4:
.long 0xe28dd004 // add sp, sp, #4
.long 0xe8bd4ff0 // pop {r4, r5, r6, r7, r8, r9, sl, fp, lr}
.long 0xe12fff12 // bx r2
+ .long 0xe320f000 // nop {0}
HIDDEN _sk_gauss_a_to_rgba_vfp4
.globl _sk_gauss_a_to_rgba_vfp4
@@ -6974,7 +6976,7 @@ _sk_gradient_vfp4:
.long 0xf2c00050 // vmov.i32 q8, #0
.long 0xe5923000 // ldr r3, [r2]
.long 0xe3530002 // cmp r3, #2
- .long 0x3a00000a // bcc 62ec <sk_gradient_vfp4+0x54>
+ .long 0x3a00000a // bcc 62f4 <sk_gradient_vfp4+0x54>
.long 0xe5927024 // ldr r7, [r2, #36]
.long 0xf2c04051 // vmov.i32 q10, #1
.long 0xf2c00050 // vmov.i32 q8, #0
@@ -6985,7 +6987,7 @@ _sk_gradient_vfp4:
.long 0xf3468ee8 // vcge.f32 q12, q11, q12
.long 0xf35481f2 // vbsl q12, q10, q9
.long 0xf26008e8 // vadd.i32 q8, q8, q12
- .long 0x1afffff9 // bne 62d4 <sk_gradient_vfp4+0x3c>
+ .long 0x1afffff9 // bne 62dc <sk_gradient_vfp4+0x3c>
.long 0xee314b90 // vmov.32 r4, d17[1]
.long 0xe5926010 // ldr r6, [r2, #16]
.long 0xee11cb90 // vmov.32 ip, d17[0]
@@ -8129,7 +8131,7 @@ _sk_clut_3D_vfp4:
.long 0xe0835105 // add r5, r3, r5, lsl #2
.long 0xedd55a00 // vldr s11, [r5]
.long 0xee325b90 // vmov.32 r5, d18[1]
- .long 0xea000004 // b 7300 <sk_clut_3D_vfp4+0x508>
+ .long 0xea000004 // b 7308 <sk_clut_3D_vfp4+0x508>
.long 0xe320f000 // nop {0}
.long 0x3f7ff972 // .word 0x3f7ff972
.long 0x3f7ff972 // .word 0x3f7ff972
@@ -8646,7 +8648,7 @@ _sk_clut_4D_vfp4:
.long 0xf2802051 // vmov.i32 q1, #1
.long 0xf22e29e0 // vmla.i32 q1, q15, q8
.long 0xedd20a00 // vldr s1, [r2]
- .long 0xea000004 // b 7b00 <sk_clut_4D_vfp4+0x528>
+ .long 0xea000004 // b 7b08 <sk_clut_4D_vfp4+0x528>
.long 0xe320f000 // nop {0}
.long 0x3f7ff972 // .word 0x3f7ff972
.long 0x3f7ff972 // .word 0x3f7ff972
@@ -14995,10 +14997,10 @@ HIDDEN _sk_matrix_scale_translate_skx
FUNCTION(_sk_matrix_scale_translate_skx)
_sk_matrix_scale_translate_skx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,98,125,24,64,8 // vbroadcastss 0x8(%rax),%ymm8
- .byte 98,242,61,56,168,0 // vfmadd213ps (%rax){1to8},%ymm8,%ymm0
- .byte 196,98,125,24,64,12 // vbroadcastss 0xc(%rax),%ymm8
- .byte 98,242,61,56,168,72,1 // vfmadd213ps 0x4(%rax){1to8},%ymm8,%ymm1
+ .byte 196,98,125,24,0 // vbroadcastss (%rax),%ymm8
+ .byte 98,242,61,56,168,64,2 // vfmadd213ps 0x8(%rax){1to8},%ymm8,%ymm0
+ .byte 196,98,125,24,64,4 // vbroadcastss 0x4(%rax),%ymm8
+ .byte 98,242,61,56,168,72,3 // vfmadd213ps 0xc(%rax){1to8},%ymm8,%ymm1
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -22180,11 +22182,11 @@ HIDDEN _sk_matrix_scale_translate_hsw
FUNCTION(_sk_matrix_scale_translate_hsw)
_sk_matrix_scale_translate_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,98,125,24,64,8 // vbroadcastss 0x8(%rax),%ymm8
- .byte 196,98,125,24,8 // vbroadcastss (%rax),%ymm9
+ .byte 196,98,125,24,0 // vbroadcastss (%rax),%ymm8
+ .byte 196,98,125,24,72,8 // vbroadcastss 0x8(%rax),%ymm9
.byte 196,194,61,168,193 // vfmadd213ps %ymm9,%ymm8,%ymm0
- .byte 196,98,125,24,64,12 // vbroadcastss 0xc(%rax),%ymm8
- .byte 196,98,125,24,72,4 // vbroadcastss 0x4(%rax),%ymm9
+ .byte 196,98,125,24,64,4 // vbroadcastss 0x4(%rax),%ymm8
+ .byte 196,98,125,24,72,12 // vbroadcastss 0xc(%rax),%ymm9
.byte 196,194,61,168,201 // vfmadd213ps %ymm9,%ymm8,%ymm1
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -30782,12 +30784,12 @@ HIDDEN _sk_matrix_scale_translate_avx
FUNCTION(_sk_matrix_scale_translate_avx)
_sk_matrix_scale_translate_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,98,125,24,64,8 // vbroadcastss 0x8(%rax),%ymm8
- .byte 196,98,125,24,8 // vbroadcastss (%rax),%ymm9
+ .byte 196,98,125,24,0 // vbroadcastss (%rax),%ymm8
+ .byte 196,98,125,24,72,8 // vbroadcastss 0x8(%rax),%ymm9
.byte 197,188,89,192 // vmulps %ymm0,%ymm8,%ymm0
.byte 196,193,124,88,193 // vaddps %ymm9,%ymm0,%ymm0
- .byte 196,98,125,24,64,12 // vbroadcastss 0xc(%rax),%ymm8
- .byte 196,98,125,24,72,4 // vbroadcastss 0x4(%rax),%ymm9
+ .byte 196,98,125,24,64,4 // vbroadcastss 0x4(%rax),%ymm8
+ .byte 196,98,125,24,72,12 // vbroadcastss 0xc(%rax),%ymm9
.byte 197,188,89,201 // vmulps %ymm1,%ymm8,%ymm1
.byte 196,193,116,88,201 // vaddps %ymm9,%ymm1,%ymm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -39808,18 +39810,18 @@ HIDDEN _sk_matrix_scale_translate_sse41
FUNCTION(_sk_matrix_scale_translate_sse41)
_sk_matrix_scale_translate_sse41:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 243,68,15,16,64,8 // movss 0x8(%rax),%xmm8
+ .byte 243,68,15,16,0 // movss (%rax),%xmm8
+ .byte 243,68,15,16,72,4 // movss 0x4(%rax),%xmm9
.byte 69,15,198,192,0 // shufps $0x0,%xmm8,%xmm8
- .byte 243,68,15,16,8 // movss (%rax),%xmm9
- .byte 243,68,15,16,80,4 // movss 0x4(%rax),%xmm10
- .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 243,68,15,16,80,8 // movss 0x8(%rax),%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
.byte 65,15,89,192 // mulps %xmm8,%xmm0
- .byte 65,15,88,193 // addps %xmm9,%xmm0
+ .byte 65,15,88,194 // addps %xmm10,%xmm0
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
.byte 243,68,15,16,64,12 // movss 0xc(%rax),%xmm8
.byte 69,15,198,192,0 // shufps $0x0,%xmm8,%xmm8
- .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
- .byte 65,15,89,200 // mulps %xmm8,%xmm1
- .byte 65,15,88,202 // addps %xmm10,%xmm1
+ .byte 65,15,89,201 // mulps %xmm9,%xmm1
+ .byte 65,15,88,200 // addps %xmm8,%xmm1
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -48111,18 +48113,18 @@ HIDDEN _sk_matrix_scale_translate_sse2
FUNCTION(_sk_matrix_scale_translate_sse2)
_sk_matrix_scale_translate_sse2:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 243,68,15,16,64,8 // movss 0x8(%rax),%xmm8
+ .byte 243,68,15,16,0 // movss (%rax),%xmm8
+ .byte 243,68,15,16,72,4 // movss 0x4(%rax),%xmm9
.byte 69,15,198,192,0 // shufps $0x0,%xmm8,%xmm8
- .byte 243,68,15,16,8 // movss (%rax),%xmm9
- .byte 243,68,15,16,80,4 // movss 0x4(%rax),%xmm10
- .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
+ .byte 243,68,15,16,80,8 // movss 0x8(%rax),%xmm10
+ .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
.byte 65,15,89,192 // mulps %xmm8,%xmm0
- .byte 65,15,88,193 // addps %xmm9,%xmm0
+ .byte 65,15,88,194 // addps %xmm10,%xmm0
+ .byte 69,15,198,201,0 // shufps $0x0,%xmm9,%xmm9
.byte 243,68,15,16,64,12 // movss 0xc(%rax),%xmm8
.byte 69,15,198,192,0 // shufps $0x0,%xmm8,%xmm8
- .byte 69,15,198,210,0 // shufps $0x0,%xmm10,%xmm10
- .byte 65,15,89,200 // mulps %xmm8,%xmm1
- .byte 65,15,88,202 // addps %xmm10,%xmm1
+ .byte 65,15,89,201 // mulps %xmm9,%xmm1
+ .byte 65,15,88,200 // addps %xmm8,%xmm1
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -69804,18 +69806,18 @@ _sk_matrix_scale_translate_sse2:
.byte 131,236,8 // sub $0x8,%esp
.byte 139,69,12 // mov 0xc(%ebp),%eax
.byte 139,8 // mov (%eax),%ecx
- .byte 243,15,16,97,8 // movss 0x8(%ecx),%xmm4
+ .byte 243,15,16,33 // movss (%ecx),%xmm4
+ .byte 243,15,16,105,4 // movss 0x4(%ecx),%xmm5
.byte 15,198,228,0 // shufps $0x0,%xmm4,%xmm4
- .byte 243,15,16,41 // movss (%ecx),%xmm5
- .byte 243,15,16,113,4 // movss 0x4(%ecx),%xmm6
- .byte 15,198,237,0 // shufps $0x0,%xmm5,%xmm5
+ .byte 243,15,16,113,8 // movss 0x8(%ecx),%xmm6
+ .byte 15,198,246,0 // shufps $0x0,%xmm6,%xmm6
.byte 15,89,196 // mulps %xmm4,%xmm0
- .byte 15,88,197 // addps %xmm5,%xmm0
+ .byte 15,88,198 // addps %xmm6,%xmm0
+ .byte 15,198,237,0 // shufps $0x0,%xmm5,%xmm5
.byte 243,15,16,97,12 // movss 0xc(%ecx),%xmm4
.byte 15,198,228,0 // shufps $0x0,%xmm4,%xmm4
- .byte 15,198,246,0 // shufps $0x0,%xmm6,%xmm6
- .byte 15,89,204 // mulps %xmm4,%xmm1
- .byte 15,88,206 // addps %xmm6,%xmm1
+ .byte 15,89,205 // mulps %xmm5,%xmm1
+ .byte 15,88,204 // addps %xmm4,%xmm1
.byte 141,72,8 // lea 0x8(%eax),%ecx
.byte 131,236,8 // sub $0x8,%esp
.byte 81 // push %ecx
diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S
index 6325b10066..f9edf539d2 100644
--- a/src/jumper/SkJumper_generated_win.S
+++ b/src/jumper/SkJumper_generated_win.S
@@ -5480,11 +5480,11 @@ _sk_matrix_translate_hsw LABEL PROC
PUBLIC _sk_matrix_scale_translate_hsw
_sk_matrix_scale_translate_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,98,125,24,64,8 ; vbroadcastss 0x8(%rax),%ymm8
- DB 196,98,125,24,8 ; vbroadcastss (%rax),%ymm9
+ DB 196,98,125,24,0 ; vbroadcastss (%rax),%ymm8
+ DB 196,98,125,24,72,8 ; vbroadcastss 0x8(%rax),%ymm9
DB 196,194,61,168,193 ; vfmadd213ps %ymm9,%ymm8,%ymm0
- DB 196,98,125,24,64,12 ; vbroadcastss 0xc(%rax),%ymm8
- DB 196,98,125,24,72,4 ; vbroadcastss 0x4(%rax),%ymm9
+ DB 196,98,125,24,64,4 ; vbroadcastss 0x4(%rax),%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
DB 196,194,61,168,201 ; vfmadd213ps %ymm9,%ymm8,%ymm1
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -13816,12 +13816,12 @@ _sk_matrix_translate_avx LABEL PROC
PUBLIC _sk_matrix_scale_translate_avx
_sk_matrix_scale_translate_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,98,125,24,64,8 ; vbroadcastss 0x8(%rax),%ymm8
- DB 196,98,125,24,8 ; vbroadcastss (%rax),%ymm9
+ DB 196,98,125,24,0 ; vbroadcastss (%rax),%ymm8
+ DB 196,98,125,24,72,8 ; vbroadcastss 0x8(%rax),%ymm9
DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0
DB 196,193,124,88,193 ; vaddps %ymm9,%ymm0,%ymm0
- DB 196,98,125,24,64,12 ; vbroadcastss 0xc(%rax),%ymm8
- DB 196,98,125,24,72,4 ; vbroadcastss 0x4(%rax),%ymm9
+ DB 196,98,125,24,64,4 ; vbroadcastss 0x4(%rax),%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
DB 197,188,89,201 ; vmulps %ymm1,%ymm8,%ymm1
DB 196,193,116,88,201 ; vaddps %ymm9,%ymm1,%ymm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -22580,18 +22580,18 @@ _sk_matrix_translate_sse41 LABEL PROC
PUBLIC _sk_matrix_scale_translate_sse41
_sk_matrix_scale_translate_sse41 LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 243,68,15,16,64,8 ; movss 0x8(%rax),%xmm8
+ DB 243,68,15,16,0 ; movss (%rax),%xmm8
+ DB 243,68,15,16,72,4 ; movss 0x4(%rax),%xmm9
DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
- DB 243,68,15,16,8 ; movss (%rax),%xmm9
- DB 243,68,15,16,80,4 ; movss 0x4(%rax),%xmm10
- DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 243,68,15,16,80,8 ; movss 0x8(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
DB 65,15,89,192 ; mulps %xmm8,%xmm0
- DB 65,15,88,193 ; addps %xmm9,%xmm0
+ DB 65,15,88,194 ; addps %xmm10,%xmm0
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
DB 243,68,15,16,64,12 ; movss 0xc(%rax),%xmm8
DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
- DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
- DB 65,15,89,200 ; mulps %xmm8,%xmm1
- DB 65,15,88,202 ; addps %xmm10,%xmm1
+ DB 65,15,89,201 ; mulps %xmm9,%xmm1
+ DB 65,15,88,200 ; addps %xmm8,%xmm1
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -30613,18 +30613,18 @@ _sk_matrix_translate_sse2 LABEL PROC
PUBLIC _sk_matrix_scale_translate_sse2
_sk_matrix_scale_translate_sse2 LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 243,68,15,16,64,8 ; movss 0x8(%rax),%xmm8
+ DB 243,68,15,16,0 ; movss (%rax),%xmm8
+ DB 243,68,15,16,72,4 ; movss 0x4(%rax),%xmm9
DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
- DB 243,68,15,16,8 ; movss (%rax),%xmm9
- DB 243,68,15,16,80,4 ; movss 0x4(%rax),%xmm10
- DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 243,68,15,16,80,8 ; movss 0x8(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
DB 65,15,89,192 ; mulps %xmm8,%xmm0
- DB 65,15,88,193 ; addps %xmm9,%xmm0
+ DB 65,15,88,194 ; addps %xmm10,%xmm0
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
DB 243,68,15,16,64,12 ; movss 0xc(%rax),%xmm8
DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
- DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
- DB 65,15,89,200 ; mulps %xmm8,%xmm1
- DB 65,15,88,202 ; addps %xmm10,%xmm1
+ DB 65,15,89,201 ; mulps %xmm9,%xmm1
+ DB 65,15,88,200 ; addps %xmm8,%xmm1
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -51729,18 +51729,18 @@ _sk_matrix_scale_translate_sse2 LABEL PROC
DB 131,236,8 ; sub $0x8,%esp
DB 139,69,12 ; mov 0xc(%ebp),%eax
DB 139,8 ; mov (%eax),%ecx
- DB 243,15,16,97,8 ; movss 0x8(%ecx),%xmm4
+ DB 243,15,16,33 ; movss (%ecx),%xmm4
+ DB 243,15,16,105,4 ; movss 0x4(%ecx),%xmm5
DB 15,198,228,0 ; shufps $0x0,%xmm4,%xmm4
- DB 243,15,16,41 ; movss (%ecx),%xmm5
- DB 243,15,16,113,4 ; movss 0x4(%ecx),%xmm6
- DB 15,198,237,0 ; shufps $0x0,%xmm5,%xmm5
+ DB 243,15,16,113,8 ; movss 0x8(%ecx),%xmm6
+ DB 15,198,246,0 ; shufps $0x0,%xmm6,%xmm6
DB 15,89,196 ; mulps %xmm4,%xmm0
- DB 15,88,197 ; addps %xmm5,%xmm0
+ DB 15,88,198 ; addps %xmm6,%xmm0
+ DB 15,198,237,0 ; shufps $0x0,%xmm5,%xmm5
DB 243,15,16,97,12 ; movss 0xc(%ecx),%xmm4
DB 15,198,228,0 ; shufps $0x0,%xmm4,%xmm4
- DB 15,198,246,0 ; shufps $0x0,%xmm6,%xmm6
- DB 15,89,204 ; mulps %xmm4,%xmm1
- DB 15,88,206 ; addps %xmm6,%xmm1
+ DB 15,89,205 ; mulps %xmm5,%xmm1
+ DB 15,88,204 ; addps %xmm4,%xmm1
DB 141,72,8 ; lea 0x8(%eax),%ecx
DB 131,236,8 ; sub $0x8,%esp
DB 81 ; push %ecx
diff --git a/src/jumper/SkJumper_stages.cpp b/src/jumper/SkJumper_stages.cpp
index 690ba574ef..f2c701020f 100644
--- a/src/jumper/SkJumper_stages.cpp
+++ b/src/jumper/SkJumper_stages.cpp
@@ -1091,8 +1091,8 @@ STAGE(matrix_translate, const float* m) {
g += m[1];
}
STAGE(matrix_scale_translate, const float* m) {
- r = mad(r,m[2], m[0]);
- g = mad(g,m[3], m[1]);
+ r = mad(r,m[0], m[2]);
+ g = mad(g,m[1], m[3]);
}
STAGE(matrix_2x3, const float* m) {
auto R = mad(r,m[0], mad(g,m[2], m[4])),