aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2016-12-02 14:22:57 -0500
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2016-12-02 19:54:39 +0000
commit3e05671ace65faf4b849275f2eefb534e336e92f (patch)
tree4eadc1310adc600e344ea3db64e47d6fe83d09d9 /src/opts
parentc2881e9b404d757cb26a1c68ed9c8a51ddd36f6b (diff)
Tricky float -> byte conversion in store_8888.
In IEEE, for each byte BB, the float 0x470000BB equals 32768.0f + BB*(1/256.0f). So to turn a [0,1] float into a byte, we can - multiply by (255/256.0f) to get into [0,255/256.0f] range, - add 32768.0f to get into [32768.0f, 32768.0f + 255/256.0f] range, - look at the low byte. Those first two of course are an FMA. Using this trick here makes store_8888 measurably faster. Instead of a FMA then float->int trunc, we do an FMA then a bitwise AND. Overall the math goes from 4 FMA + 4 trunc + 3 shift to 4 FMA + 3 AND + 3 shift (we can skip the shift for red and the AND for alpha). As you might guess, AND is cheaper than trunc, so this is a net win. I should be able to follow up with the same trick in reverse in from_8888(). CQ_INCLUDE_TRYBOTS=skia.primary:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD Change-Id: I42c8f4a6ea0b6c22160517cf5f9c048f01c9a330 Reviewed-on: https://skia-review.googlesource.com/5540 Reviewed-by: Matt Sarett <msarett@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src/opts')
-rw-r--r--src/opts/SkRasterPipeline_opts.h15
1 files changed, 11 insertions, 4 deletions
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h
index 8164d6cac3..14f1255ec8 100644
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@@ -547,11 +547,18 @@ STAGE(load_8888_d) {
from_8888(load(tail, ptr), &dr, &dg, &db, &da);
}
STAGE(store_8888) {
+ auto byte = [](const SkNf& x, int ix) {
+ // Here's a neat trick: 0x47000000 == 32768.0f, and 0x470000ff == 32768.0f + (255/256.0f).
+ auto v = SkNf_fma(255/256.0f, x, 32768.0f);
+ switch (ix) {
+ case 0: return SkNi::Load(&v) & 0xff; // R
+ case 3: return SkNi::Load(&v) << 24; // A
+ }
+ return (SkNi::Load(&v) & 0xff) << (8*ix); // B or G
+ };
+
auto ptr = *(uint32_t**)ctx + x;
- store(tail, ( SkNf_round(255.0f, r) << 0
- | SkNf_round(255.0f, g) << 8
- | SkNf_round(255.0f, b) << 16
- | SkNf_round(255.0f, a) << 24 ), (int*)ptr);
+ store(tail, byte(r,0)|byte(g,1)|byte(b,2)|byte(a,3), (int*)ptr);
}
STAGE(load_tables) {