diff options
author | mtklein <mtklein@chromium.org> | 2015-09-14 12:43:20 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-09-14 12:43:20 -0700 |
commit | 82c93b45ed6ac0b628adb8375389c202d1f586f9 (patch) | |
tree | bdb517b2f2a05fe22dda1f84f5f5eafda87efa87 /src/opts/SkPx_none.h | |
parent | b5b603241aaa99e07dc4e12ca9f2661aa85e5f74 (diff) |
SkPx: new approach to fixed-point SIMD
SkPx is like Sk4px, except each platform implementation of SkPx can declare
a different sweet spot of N pixels, with extra loads and stores to handle the
ragged edge of 0<n<N pixels.
In this case, _sse's sweet spot remains 4 pixels. _neon jumps up to 8 so
we can now use NEON's transposing loads and stores, and _none is just 1.
This makes operations involving alpha considerably more efficient on NEON,
as alpha is its own distinct 8x8 bit plane that's easy to toss around.
This incorporates a few other improvements I've been wanting:
- no requirement that we're dealing with SkPMColor. SkColor works too.
- no anonymous namespace hack to differentiate implementations.
Codegen and perf look good on Clang/x86-64 and GCC/ARMv7.
The NEON code looks very similar to the old NEON code, as intended.
No .skp or GM diffs on my laptop. Don't expect any.
I intend this to replace Sk4px. Plan after landing:
- port SkXfermode_opts.h
- port Color32 in SkBlitRow_D32.cpp (and move to SkBlitRow_opts.h like other
SkOpts code)
- delete all Sk4px-related code
- clean up evolutionary dead ends in SkNx (Sk16b, Sk16h, Sk4i, Sk4d, etc.)
leaving Sk2f, Sk4f (and Sk2s, Sk4s).
- find a machine with AVX2 to work on, write SkPx_avx2.h handling 8 pixels
at a time.
In the end we'll have Sk4f for float pixels, SkPx for fixed-point pixels.
BUG=skia:4117
Review URL: https://codereview.chromium.org/1317233005
Diffstat (limited to 'src/opts/SkPx_none.h')
-rw-r--r-- | src/opts/SkPx_none.h | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/src/opts/SkPx_none.h b/src/opts/SkPx_none.h new file mode 100644 index 0000000000..a4758c1004 --- /dev/null +++ b/src/opts/SkPx_none.h @@ -0,0 +1,106 @@ +/* + * Copyright 2015 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkPx_none_DEFINED +#define SkPx_none_DEFINED + +// Nothing fancy here. We're the backup _none case after all. +// Our declared sweet spot is simply a single pixel at a time. + +struct SkPx_none { + static const int N = 1; + uint8_t f8[4]; + + SkPx_none(uint32_t px) { memcpy(f8, &px, 4); } + SkPx_none(uint8_t x, uint8_t y, uint8_t z, uint8_t a) { + f8[0] = x; f8[1] = y; f8[2] = z; f8[3] = a; + } + + static SkPx_none Dup(uint32_t px) { return px; } + static SkPx_none LoadN(const uint32_t* px) { return *px; } + static SkPx_none Load(int n, const uint32_t* px) { + SkASSERT(false); // There are no 0<n<1. + return 0; + } + + void storeN(uint32_t* px) const { memcpy(px, f8, 4); } + void store(int n, uint32_t* px) const { + SkASSERT(false); // There are no 0<n<1. + } + + struct Alpha { + uint8_t fA; + Alpha(uint8_t a) : fA(a) {} + + static Alpha Dup(uint8_t a) { return a; } + static Alpha LoadN(const uint8_t* a) { return *a; } + static Alpha Load(int n, const uint8_t* a) { + SkASSERT(false); // There are no 0<n<1. + return 0; + } + Alpha inv() const { return 255 - fA; } + }; + + struct Wide { + uint16_t f16[4]; + + Wide(uint16_t x, uint16_t y, uint16_t z, uint16_t a) { + f16[0] = x; f16[1] = y; f16[2] = z; f16[3] = a; + } + + Wide operator+(const Wide& o) const { + return Wide(f16[0]+o.f16[0], f16[1]+o.f16[1], f16[2]+o.f16[2], f16[3]+o.f16[3]); + } + Wide operator-(const Wide& o) const { + return Wide(f16[0]-o.f16[0], f16[1]-o.f16[1], f16[2]-o.f16[2], f16[3]-o.f16[3]); + } + Wide operator<<(int bits) const { + return Wide(f16[0]<<bits, f16[1]<<bits, f16[2]<<bits, f16[3]<<bits); + } + Wide operator>>(int bits) const { + return Wide(f16[0]>>bits, f16[1]>>bits, f16[2]>>bits, f16[3]>>bits); + } + + SkPx_none addNarrowHi(const SkPx_none& o) const { + Wide sum = (*this + o.widenLo()) >> 8; + return SkPx_none(sum.f16[0], sum.f16[1], sum.f16[2], sum.f16[3]); + } + }; + + Alpha alpha() const { return f8[3]; } + + Wide widenLo() const { return Wide(f8[0], f8[1], f8[2], f8[3]); } + Wide widenHi() const { return this->widenLo() << 8; } + Wide widenLoHi() const { return this->widenLo() + this->widenHi(); } + + SkPx_none operator+(const SkPx_none& o) const { + return SkPx_none(f8[0]+o.f8[0], f8[1]+o.f8[1], f8[2]+o.f8[2], f8[3]+o.f8[3]); + } + SkPx_none operator-(const SkPx_none& o) const { + return SkPx_none(f8[0]-o.f8[0], f8[1]-o.f8[1], f8[2]-o.f8[2], f8[3]-o.f8[3]); + } + SkPx_none saturatedAdd(const SkPx_none& o) const { + return SkPx_none(SkTMax(0, SkTMin(255, f8[0]+o.f8[0])), + SkTMax(0, SkTMin(255, f8[1]+o.f8[1])), + SkTMax(0, SkTMin(255, f8[2]+o.f8[2])), + SkTMax(0, SkTMin(255, f8[3]+o.f8[3]))); + } + + Wide operator*(const Alpha& a) const { + return Wide(f8[0]*a.fA, f8[1]*a.fA, f8[2]*a.fA, f8[3]*a.fA); + } + SkPx_none approxMulDiv255(const Alpha& a) const { + return (*this * a).addNarrowHi(*this); + } + + SkPx_none addAlpha(const Alpha& a) const { + return SkPx_none(f8[0], f8[1], f8[2], f8[3]+a.fA); + } +}; +typedef SkPx_none SkPx; + +#endif//SkPx_none_DEFINED |