aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-06-06 12:34:32 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-06-06 17:06:16 +0000
commit1dda8bbf467802113ab1802f792b5b191108add1 (patch)
tree39e198fb32458443a22d14c6744195d572766be3 /src
parentce4b6c100f6e700b50933f75e3b4429357001028 (diff)
use smarter float -> skfixed15 logic everywhere
This is the same logic from constant_color, covering all the other places where we convert from float to fixed, e.g. scale_1_float. This isn't quite ideal yet. We replace mulss+cvttss2si for addss+movd, which is great, but this leads to a silly sequence of code: addss %xmm2, %xmm0 movd %xmm0, %r9d movd %r9d, %xmm0 pshuflw $0x0, %xmm0, %xmm0 Those two movd are pointless... Again, all diffs due to switching from truncation to rounding. Change-Id: Icf6f3b6eb370fe41cea0cebcfda0b8907e055f41 Reviewed-on: https://skia-review.googlesource.com/18846 Reviewed-by: Mike Reed <reed@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src')
-rw-r--r--src/jumper/SkJumper_generated.S29
-rw-r--r--src/jumper/SkJumper_generated_win.S29
-rw-r--r--src/jumper/SkJumper_stages_lowp.cpp6
3 files changed, 33 insertions, 31 deletions
diff --git a/src/jumper/SkJumper_generated.S b/src/jumper/SkJumper_generated.S
index 00ca2e080f..e568ed9db9 100644
--- a/src/jumper/SkJumper_generated.S
+++ b/src/jumper/SkJumper_generated.S
@@ -36825,19 +36825,19 @@ _sk_set_rgb_ssse3_lowp:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 243,15,16,21,10,18,0,0 // movss 0x120a(%rip),%xmm2 # 1300 <_sk_xor__ssse3_lowp+0x203>
.byte 243,15,16,0 // movss (%rax),%xmm0
- .byte 243,15,89,194 // mulss %xmm2,%xmm0
- .byte 243,68,15,44,200 // cvttss2si %xmm0,%r9d
+ .byte 243,15,88,194 // addss %xmm2,%xmm0
+ .byte 102,65,15,126,193 // movd %xmm0,%r9d
.byte 102,65,15,110,193 // movd %r9d,%xmm0
.byte 242,15,112,192,0 // pshuflw $0x0,%xmm0,%xmm0
.byte 102,15,112,192,80 // pshufd $0x50,%xmm0,%xmm0
.byte 243,15,16,72,4 // movss 0x4(%rax),%xmm1
- .byte 243,15,89,202 // mulss %xmm2,%xmm1
- .byte 243,68,15,44,201 // cvttss2si %xmm1,%r9d
+ .byte 243,15,88,202 // addss %xmm2,%xmm1
+ .byte 102,65,15,126,201 // movd %xmm1,%r9d
.byte 102,65,15,110,201 // movd %r9d,%xmm1
.byte 242,15,112,201,0 // pshuflw $0x0,%xmm1,%xmm1
.byte 102,15,112,201,80 // pshufd $0x50,%xmm1,%xmm1
- .byte 243,15,89,80,8 // mulss 0x8(%rax),%xmm2
- .byte 243,15,44,194 // cvttss2si %xmm2,%eax
+ .byte 243,15,88,80,8 // addss 0x8(%rax),%xmm2
+ .byte 102,15,126,208 // movd %xmm2,%eax
.byte 102,15,110,208 // movd %eax,%xmm2
.byte 242,15,112,210,0 // pshuflw $0x0,%xmm2,%xmm2
.byte 102,15,112,210,80 // pshufd $0x50,%xmm2,%xmm2
@@ -37374,8 +37374,8 @@ FUNCTION(_sk_scale_1_float_ssse3_lowp)
_sk_scale_1_float_ssse3_lowp:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 243,68,15,16,0 // movss (%rax),%xmm8
- .byte 243,68,15,89,5,40,10,0,0 // mulss 0xa28(%rip),%xmm8 # 1304 <_sk_xor__ssse3_lowp+0x207>
- .byte 243,65,15,44,192 // cvttss2si %xmm8,%eax
+ .byte 243,68,15,88,5,40,10,0,0 // addss 0xa28(%rip),%xmm8 # 1304 <_sk_xor__ssse3_lowp+0x207>
+ .byte 102,68,15,126,192 // movd %xmm8,%eax
.byte 102,68,15,110,192 // movd %eax,%xmm8
.byte 242,69,15,112,192,0 // pshuflw $0x0,%xmm8,%xmm8
.byte 102,69,15,112,192,80 // pshufd $0x50,%xmm8,%xmm8
@@ -37463,8 +37463,8 @@ FUNCTION(_sk_lerp_1_float_ssse3_lowp)
_sk_lerp_1_float_ssse3_lowp:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 243,68,15,16,0 // movss (%rax),%xmm8
- .byte 243,68,15,89,5,224,8,0,0 // mulss 0x8e0(%rip),%xmm8 # 1308 <_sk_xor__ssse3_lowp+0x20b>
- .byte 243,65,15,44,192 // cvttss2si %xmm8,%eax
+ .byte 243,68,15,88,5,224,8,0,0 // addss 0x8e0(%rip),%xmm8 # 1308 <_sk_xor__ssse3_lowp+0x20b>
+ .byte 102,68,15,126,192 // movd %xmm8,%eax
.byte 102,68,15,110,192 // movd %eax,%xmm8
.byte 242,69,15,112,192,0 // pshuflw $0x0,%xmm8,%xmm8
.byte 102,69,15,112,192,80 // pshufd $0x50,%xmm8,%xmm8
@@ -38025,9 +38025,8 @@ BALIGN16
BALIGN4
.byte 0,0 // add %al,(%rax)
- .byte 0,71,0 // add %al,0x0(%rdi)
- .byte 0,0 // add %al,(%rax)
- .byte 71,0,0 // rex.RXB add %r8b,(%r8)
- .byte 0 // .byte 0x0
- .byte 71 // rex.RXB
+ .byte 128,67,0,0 // addb $0x0,0x0(%rbx)
+ .byte 128,67,0,0 // addb $0x0,0x0(%rbx)
+ .byte 128 // .byte 0x80
+ .byte 67 // rex.XB
#endif
diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S
index 3d0ee6f78f..99b5d7a374 100644
--- a/src/jumper/SkJumper_generated_win.S
+++ b/src/jumper/SkJumper_generated_win.S
@@ -26244,19 +26244,19 @@ _sk_set_rgb_ssse3_lowp LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 243,15,16,21,25,18,0,0 ; movss 0x1219(%rip),%xmm2 # 13b0 <_sk_xor__ssse3_lowp+0x203>
DB 243,15,16,0 ; movss (%rax),%xmm0
- DB 243,15,89,194 ; mulss %xmm2,%xmm0
- DB 243,68,15,44,200 ; cvttss2si %xmm0,%r9d
+ DB 243,15,88,194 ; addss %xmm2,%xmm0
+ DB 102,65,15,126,193 ; movd %xmm0,%r9d
DB 102,65,15,110,193 ; movd %r9d,%xmm0
DB 242,15,112,192,0 ; pshuflw $0x0,%xmm0,%xmm0
DB 102,15,112,192,80 ; pshufd $0x50,%xmm0,%xmm0
DB 243,15,16,72,4 ; movss 0x4(%rax),%xmm1
- DB 243,15,89,202 ; mulss %xmm2,%xmm1
- DB 243,68,15,44,201 ; cvttss2si %xmm1,%r9d
+ DB 243,15,88,202 ; addss %xmm2,%xmm1
+ DB 102,65,15,126,201 ; movd %xmm1,%r9d
DB 102,65,15,110,201 ; movd %r9d,%xmm1
DB 242,15,112,201,0 ; pshuflw $0x0,%xmm1,%xmm1
DB 102,15,112,201,80 ; pshufd $0x50,%xmm1,%xmm1
- DB 243,15,89,80,8 ; mulss 0x8(%rax),%xmm2
- DB 243,15,44,194 ; cvttss2si %xmm2,%eax
+ DB 243,15,88,80,8 ; addss 0x8(%rax),%xmm2
+ DB 102,15,126,208 ; movd %xmm2,%eax
DB 102,15,110,208 ; movd %eax,%xmm2
DB 242,15,112,210,0 ; pshuflw $0x0,%xmm2,%xmm2
DB 102,15,112,210,80 ; pshufd $0x50,%xmm2,%xmm2
@@ -26783,8 +26783,8 @@ PUBLIC _sk_scale_1_float_ssse3_lowp
_sk_scale_1_float_ssse3_lowp LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 243,68,15,16,0 ; movss (%rax),%xmm8
- DB 243,68,15,89,5,48,10,0,0 ; mulss 0xa30(%rip),%xmm8 # 13b4 <_sk_xor__ssse3_lowp+0x207>
- DB 243,65,15,44,192 ; cvttss2si %xmm8,%eax
+ DB 243,68,15,88,5,48,10,0,0 ; addss 0xa30(%rip),%xmm8 # 13b4 <_sk_xor__ssse3_lowp+0x207>
+ DB 102,68,15,126,192 ; movd %xmm8,%eax
DB 102,68,15,110,192 ; movd %eax,%xmm8
DB 242,69,15,112,192,0 ; pshuflw $0x0,%xmm8,%xmm8
DB 102,69,15,112,192,80 ; pshufd $0x50,%xmm8,%xmm8
@@ -26875,8 +26875,8 @@ PUBLIC _sk_lerp_1_float_ssse3_lowp
_sk_lerp_1_float_ssse3_lowp LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 243,68,15,16,0 ; movss (%rax),%xmm8
- DB 243,68,15,89,5,228,8,0,0 ; mulss 0x8e4(%rip),%xmm8 # 13b8 <_sk_xor__ssse3_lowp+0x20b>
- DB 243,65,15,44,192 ; cvttss2si %xmm8,%eax
+ DB 243,68,15,88,5,228,8,0,0 ; addss 0x8e4(%rip),%xmm8 # 13b8 <_sk_xor__ssse3_lowp+0x20b>
+ DB 102,68,15,126,192 ; movd %xmm8,%eax
DB 102,68,15,110,192 ; movd %eax,%xmm8
DB 242,69,15,112,192,0 ; pshuflw $0x0,%xmm8,%xmm8
DB 102,69,15,112,192,80 ; pshufd $0x50,%xmm8,%xmm8
@@ -27401,10 +27401,9 @@ ALIGN 16
ALIGN 4
DB 0,0 ; add %al,(%rax)
- DB 0,71,0 ; add %al,0x0(%rdi)
- DB 0,0 ; add %al,(%rax)
- DB 71,0,0 ; rex.RXB add %r8b,(%r8)
- DB 0 ; .byte 0x0
- DB 71 ; rex.RXB
+ DB 128,67,0,0 ; addb $0x0,0x0(%rbx)
+ DB 128,67,0,0 ; addb $0x0,0x0(%rbx)
+ DB 128 ; .byte 0x80
+ DB 67 ; rex.XB
ENDIF
END
diff --git a/src/jumper/SkJumper_stages_lowp.cpp b/src/jumper/SkJumper_stages_lowp.cpp
index 3cb39e5af0..73b6def4ac 100644
--- a/src/jumper/SkJumper_stages_lowp.cpp
+++ b/src/jumper/SkJumper_stages_lowp.cpp
@@ -28,7 +28,11 @@ struct F {
U16 vec;
F() = default;
- F(float f) : vec((uint16_t)(f * 0x8000)) {}
+ F(float f) {
+ // After adding 256.0f, the SkFixed15 value is the bottom two bytes of the float.
+ f += 256.0f;
+ vec = unaligned_load<uint16_t>(&f);
+ }
F(U16 v) : vec(v) {}
operator U16() const { return vec; }