diff options
author | Mike Klein <mtklein@chromium.org> | 2017-06-06 12:34:32 -0400 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2017-06-06 17:06:16 +0000 |
commit | 1dda8bbf467802113ab1802f792b5b191108add1 (patch) | |
tree | 39e198fb32458443a22d14c6744195d572766be3 /src | |
parent | ce4b6c100f6e700b50933f75e3b4429357001028 (diff) |
use smarter float -> skfixed15 logic everywhere
This is the same logic from constant_color, covering all the other
places where we convert from float to fixed, e.g. scale_1_float.
This isn't quite ideal yet. We replace mulss+cvttss2si for addss+movd,
which is great, but this leads to a silly sequence of code:
addss %xmm2, %xmm0
movd %xmm0, %r9d
movd %r9d, %xmm0
pshuflw $0x0, %xmm0, %xmm0
Those two movd are pointless...
Again, all diffs due to switching from truncation to rounding.
Change-Id: Icf6f3b6eb370fe41cea0cebcfda0b8907e055f41
Reviewed-on: https://skia-review.googlesource.com/18846
Reviewed-by: Mike Reed <reed@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src')
-rw-r--r-- | src/jumper/SkJumper_generated.S | 29 | ||||
-rw-r--r-- | src/jumper/SkJumper_generated_win.S | 29 | ||||
-rw-r--r-- | src/jumper/SkJumper_stages_lowp.cpp | 6 |
3 files changed, 33 insertions, 31 deletions
diff --git a/src/jumper/SkJumper_generated.S b/src/jumper/SkJumper_generated.S index 00ca2e080f..e568ed9db9 100644 --- a/src/jumper/SkJumper_generated.S +++ b/src/jumper/SkJumper_generated.S @@ -36825,19 +36825,19 @@ _sk_set_rgb_ssse3_lowp: .byte 72,173 // lods %ds:(%rsi),%rax .byte 243,15,16,21,10,18,0,0 // movss 0x120a(%rip),%xmm2 # 1300 <_sk_xor__ssse3_lowp+0x203> .byte 243,15,16,0 // movss (%rax),%xmm0 - .byte 243,15,89,194 // mulss %xmm2,%xmm0 - .byte 243,68,15,44,200 // cvttss2si %xmm0,%r9d + .byte 243,15,88,194 // addss %xmm2,%xmm0 + .byte 102,65,15,126,193 // movd %xmm0,%r9d .byte 102,65,15,110,193 // movd %r9d,%xmm0 .byte 242,15,112,192,0 // pshuflw $0x0,%xmm0,%xmm0 .byte 102,15,112,192,80 // pshufd $0x50,%xmm0,%xmm0 .byte 243,15,16,72,4 // movss 0x4(%rax),%xmm1 - .byte 243,15,89,202 // mulss %xmm2,%xmm1 - .byte 243,68,15,44,201 // cvttss2si %xmm1,%r9d + .byte 243,15,88,202 // addss %xmm2,%xmm1 + .byte 102,65,15,126,201 // movd %xmm1,%r9d .byte 102,65,15,110,201 // movd %r9d,%xmm1 .byte 242,15,112,201,0 // pshuflw $0x0,%xmm1,%xmm1 .byte 102,15,112,201,80 // pshufd $0x50,%xmm1,%xmm1 - .byte 243,15,89,80,8 // mulss 0x8(%rax),%xmm2 - .byte 243,15,44,194 // cvttss2si %xmm2,%eax + .byte 243,15,88,80,8 // addss 0x8(%rax),%xmm2 + .byte 102,15,126,208 // movd %xmm2,%eax .byte 102,15,110,208 // movd %eax,%xmm2 .byte 242,15,112,210,0 // pshuflw $0x0,%xmm2,%xmm2 .byte 102,15,112,210,80 // pshufd $0x50,%xmm2,%xmm2 @@ -37374,8 +37374,8 @@ FUNCTION(_sk_scale_1_float_ssse3_lowp) _sk_scale_1_float_ssse3_lowp: .byte 72,173 // lods %ds:(%rsi),%rax .byte 243,68,15,16,0 // movss (%rax),%xmm8 - .byte 243,68,15,89,5,40,10,0,0 // mulss 0xa28(%rip),%xmm8 # 1304 <_sk_xor__ssse3_lowp+0x207> - .byte 243,65,15,44,192 // cvttss2si %xmm8,%eax + .byte 243,68,15,88,5,40,10,0,0 // addss 0xa28(%rip),%xmm8 # 1304 <_sk_xor__ssse3_lowp+0x207> + .byte 102,68,15,126,192 // movd %xmm8,%eax .byte 102,68,15,110,192 // movd %eax,%xmm8 .byte 242,69,15,112,192,0 // pshuflw $0x0,%xmm8,%xmm8 .byte 102,69,15,112,192,80 // pshufd $0x50,%xmm8,%xmm8 @@ -37463,8 +37463,8 @@ FUNCTION(_sk_lerp_1_float_ssse3_lowp) _sk_lerp_1_float_ssse3_lowp: .byte 72,173 // lods %ds:(%rsi),%rax .byte 243,68,15,16,0 // movss (%rax),%xmm8 - .byte 243,68,15,89,5,224,8,0,0 // mulss 0x8e0(%rip),%xmm8 # 1308 <_sk_xor__ssse3_lowp+0x20b> - .byte 243,65,15,44,192 // cvttss2si %xmm8,%eax + .byte 243,68,15,88,5,224,8,0,0 // addss 0x8e0(%rip),%xmm8 # 1308 <_sk_xor__ssse3_lowp+0x20b> + .byte 102,68,15,126,192 // movd %xmm8,%eax .byte 102,68,15,110,192 // movd %eax,%xmm8 .byte 242,69,15,112,192,0 // pshuflw $0x0,%xmm8,%xmm8 .byte 102,69,15,112,192,80 // pshufd $0x50,%xmm8,%xmm8 @@ -38025,9 +38025,8 @@ BALIGN16 BALIGN4 .byte 0,0 // add %al,(%rax) - .byte 0,71,0 // add %al,0x0(%rdi) - .byte 0,0 // add %al,(%rax) - .byte 71,0,0 // rex.RXB add %r8b,(%r8) - .byte 0 // .byte 0x0 - .byte 71 // rex.RXB + .byte 128,67,0,0 // addb $0x0,0x0(%rbx) + .byte 128,67,0,0 // addb $0x0,0x0(%rbx) + .byte 128 // .byte 0x80 + .byte 67 // rex.XB #endif diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S index 3d0ee6f78f..99b5d7a374 100644 --- a/src/jumper/SkJumper_generated_win.S +++ b/src/jumper/SkJumper_generated_win.S @@ -26244,19 +26244,19 @@ _sk_set_rgb_ssse3_lowp LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 243,15,16,21,25,18,0,0 ; movss 0x1219(%rip),%xmm2 # 13b0 <_sk_xor__ssse3_lowp+0x203> DB 243,15,16,0 ; movss (%rax),%xmm0 - DB 243,15,89,194 ; mulss %xmm2,%xmm0 - DB 243,68,15,44,200 ; cvttss2si %xmm0,%r9d + DB 243,15,88,194 ; addss %xmm2,%xmm0 + DB 102,65,15,126,193 ; movd %xmm0,%r9d DB 102,65,15,110,193 ; movd %r9d,%xmm0 DB 242,15,112,192,0 ; pshuflw $0x0,%xmm0,%xmm0 DB 102,15,112,192,80 ; pshufd $0x50,%xmm0,%xmm0 DB 243,15,16,72,4 ; movss 0x4(%rax),%xmm1 - DB 243,15,89,202 ; mulss %xmm2,%xmm1 - DB 243,68,15,44,201 ; cvttss2si %xmm1,%r9d + DB 243,15,88,202 ; addss %xmm2,%xmm1 + DB 102,65,15,126,201 ; movd %xmm1,%r9d DB 102,65,15,110,201 ; movd %r9d,%xmm1 DB 242,15,112,201,0 ; pshuflw $0x0,%xmm1,%xmm1 DB 102,15,112,201,80 ; pshufd $0x50,%xmm1,%xmm1 - DB 243,15,89,80,8 ; mulss 0x8(%rax),%xmm2 - DB 243,15,44,194 ; cvttss2si %xmm2,%eax + DB 243,15,88,80,8 ; addss 0x8(%rax),%xmm2 + DB 102,15,126,208 ; movd %xmm2,%eax DB 102,15,110,208 ; movd %eax,%xmm2 DB 242,15,112,210,0 ; pshuflw $0x0,%xmm2,%xmm2 DB 102,15,112,210,80 ; pshufd $0x50,%xmm2,%xmm2 @@ -26783,8 +26783,8 @@ PUBLIC _sk_scale_1_float_ssse3_lowp _sk_scale_1_float_ssse3_lowp LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 243,68,15,16,0 ; movss (%rax),%xmm8 - DB 243,68,15,89,5,48,10,0,0 ; mulss 0xa30(%rip),%xmm8 # 13b4 <_sk_xor__ssse3_lowp+0x207> - DB 243,65,15,44,192 ; cvttss2si %xmm8,%eax + DB 243,68,15,88,5,48,10,0,0 ; addss 0xa30(%rip),%xmm8 # 13b4 <_sk_xor__ssse3_lowp+0x207> + DB 102,68,15,126,192 ; movd %xmm8,%eax DB 102,68,15,110,192 ; movd %eax,%xmm8 DB 242,69,15,112,192,0 ; pshuflw $0x0,%xmm8,%xmm8 DB 102,69,15,112,192,80 ; pshufd $0x50,%xmm8,%xmm8 @@ -26875,8 +26875,8 @@ PUBLIC _sk_lerp_1_float_ssse3_lowp _sk_lerp_1_float_ssse3_lowp LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 243,68,15,16,0 ; movss (%rax),%xmm8 - DB 243,68,15,89,5,228,8,0,0 ; mulss 0x8e4(%rip),%xmm8 # 13b8 <_sk_xor__ssse3_lowp+0x20b> - DB 243,65,15,44,192 ; cvttss2si %xmm8,%eax + DB 243,68,15,88,5,228,8,0,0 ; addss 0x8e4(%rip),%xmm8 # 13b8 <_sk_xor__ssse3_lowp+0x20b> + DB 102,68,15,126,192 ; movd %xmm8,%eax DB 102,68,15,110,192 ; movd %eax,%xmm8 DB 242,69,15,112,192,0 ; pshuflw $0x0,%xmm8,%xmm8 DB 102,69,15,112,192,80 ; pshufd $0x50,%xmm8,%xmm8 @@ -27401,10 +27401,9 @@ ALIGN 16 ALIGN 4 DB 0,0 ; add %al,(%rax) - DB 0,71,0 ; add %al,0x0(%rdi) - DB 0,0 ; add %al,(%rax) - DB 71,0,0 ; rex.RXB add %r8b,(%r8) - DB 0 ; .byte 0x0 - DB 71 ; rex.RXB + DB 128,67,0,0 ; addb $0x0,0x0(%rbx) + DB 128,67,0,0 ; addb $0x0,0x0(%rbx) + DB 128 ; .byte 0x80 + DB 67 ; rex.XB ENDIF END diff --git a/src/jumper/SkJumper_stages_lowp.cpp b/src/jumper/SkJumper_stages_lowp.cpp index 3cb39e5af0..73b6def4ac 100644 --- a/src/jumper/SkJumper_stages_lowp.cpp +++ b/src/jumper/SkJumper_stages_lowp.cpp @@ -28,7 +28,11 @@ struct F { U16 vec; F() = default; - F(float f) : vec((uint16_t)(f * 0x8000)) {} + F(float f) { + // After adding 256.0f, the SkFixed15 value is the bottom two bytes of the float. + f += 256.0f; + vec = unaligned_load<uint16_t>(&f); + } F(U16 v) : vec(v) {} operator U16() const { return vec; } |