aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts/SkPx_none.h
diff options
context:
space:
mode:
authorGravatar mtklein <mtklein@chromium.org>2015-09-14 12:43:20 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2015-09-14 12:43:20 -0700
commit82c93b45ed6ac0b628adb8375389c202d1f586f9 (patch)
treebdb517b2f2a05fe22dda1f84f5f5eafda87efa87 /src/opts/SkPx_none.h
parentb5b603241aaa99e07dc4e12ca9f2661aa85e5f74 (diff)
SkPx: new approach to fixed-point SIMD
SkPx is like Sk4px, except each platform implementation of SkPx can declare a different sweet spot of N pixels, with extra loads and stores to handle the ragged edge of 0<n<N pixels. In this case, _sse's sweet spot remains 4 pixels. _neon jumps up to 8 so we can now use NEON's transposing loads and stores, and _none is just 1. This makes operations involving alpha considerably more efficient on NEON, as alpha is its own distinct 8x8 bit plane that's easy to toss around. This incorporates a few other improvements I've been wanting: - no requirement that we're dealing with SkPMColor. SkColor works too. - no anonymous namespace hack to differentiate implementations. Codegen and perf look good on Clang/x86-64 and GCC/ARMv7. The NEON code looks very similar to the old NEON code, as intended. No .skp or GM diffs on my laptop. Don't expect any. I intend this to replace Sk4px. Plan after landing: - port SkXfermode_opts.h - port Color32 in SkBlitRow_D32.cpp (and move to SkBlitRow_opts.h like other SkOpts code) - delete all Sk4px-related code - clean up evolutionary dead ends in SkNx (Sk16b, Sk16h, Sk4i, Sk4d, etc.) leaving Sk2f, Sk4f (and Sk2s, Sk4s). - find a machine with AVX2 to work on, write SkPx_avx2.h handling 8 pixels at a time. In the end we'll have Sk4f for float pixels, SkPx for fixed-point pixels. BUG=skia:4117 Review URL: https://codereview.chromium.org/1317233005
Diffstat (limited to 'src/opts/SkPx_none.h')
-rw-r--r--src/opts/SkPx_none.h106
1 files changed, 106 insertions, 0 deletions
diff --git a/src/opts/SkPx_none.h b/src/opts/SkPx_none.h
new file mode 100644
index 0000000000..a4758c1004
--- /dev/null
+++ b/src/opts/SkPx_none.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkPx_none_DEFINED
+#define SkPx_none_DEFINED
+
+// Nothing fancy here. We're the backup _none case after all.
+// Our declared sweet spot is simply a single pixel at a time.
+
+struct SkPx_none {
+ static const int N = 1;
+ uint8_t f8[4];
+
+ SkPx_none(uint32_t px) { memcpy(f8, &px, 4); }
+ SkPx_none(uint8_t x, uint8_t y, uint8_t z, uint8_t a) {
+ f8[0] = x; f8[1] = y; f8[2] = z; f8[3] = a;
+ }
+
+ static SkPx_none Dup(uint32_t px) { return px; }
+ static SkPx_none LoadN(const uint32_t* px) { return *px; }
+ static SkPx_none Load(int n, const uint32_t* px) {
+ SkASSERT(false); // There are no 0<n<1.
+ return 0;
+ }
+
+ void storeN(uint32_t* px) const { memcpy(px, f8, 4); }
+ void store(int n, uint32_t* px) const {
+ SkASSERT(false); // There are no 0<n<1.
+ }
+
+ struct Alpha {
+ uint8_t fA;
+ Alpha(uint8_t a) : fA(a) {}
+
+ static Alpha Dup(uint8_t a) { return a; }
+ static Alpha LoadN(const uint8_t* a) { return *a; }
+ static Alpha Load(int n, const uint8_t* a) {
+ SkASSERT(false); // There are no 0<n<1.
+ return 0;
+ }
+ Alpha inv() const { return 255 - fA; }
+ };
+
+ struct Wide {
+ uint16_t f16[4];
+
+ Wide(uint16_t x, uint16_t y, uint16_t z, uint16_t a) {
+ f16[0] = x; f16[1] = y; f16[2] = z; f16[3] = a;
+ }
+
+ Wide operator+(const Wide& o) const {
+ return Wide(f16[0]+o.f16[0], f16[1]+o.f16[1], f16[2]+o.f16[2], f16[3]+o.f16[3]);
+ }
+ Wide operator-(const Wide& o) const {
+ return Wide(f16[0]-o.f16[0], f16[1]-o.f16[1], f16[2]-o.f16[2], f16[3]-o.f16[3]);
+ }
+ Wide operator<<(int bits) const {
+ return Wide(f16[0]<<bits, f16[1]<<bits, f16[2]<<bits, f16[3]<<bits);
+ }
+ Wide operator>>(int bits) const {
+ return Wide(f16[0]>>bits, f16[1]>>bits, f16[2]>>bits, f16[3]>>bits);
+ }
+
+ SkPx_none addNarrowHi(const SkPx_none& o) const {
+ Wide sum = (*this + o.widenLo()) >> 8;
+ return SkPx_none(sum.f16[0], sum.f16[1], sum.f16[2], sum.f16[3]);
+ }
+ };
+
+ Alpha alpha() const { return f8[3]; }
+
+ Wide widenLo() const { return Wide(f8[0], f8[1], f8[2], f8[3]); }
+ Wide widenHi() const { return this->widenLo() << 8; }
+ Wide widenLoHi() const { return this->widenLo() + this->widenHi(); }
+
+ SkPx_none operator+(const SkPx_none& o) const {
+ return SkPx_none(f8[0]+o.f8[0], f8[1]+o.f8[1], f8[2]+o.f8[2], f8[3]+o.f8[3]);
+ }
+ SkPx_none operator-(const SkPx_none& o) const {
+ return SkPx_none(f8[0]-o.f8[0], f8[1]-o.f8[1], f8[2]-o.f8[2], f8[3]-o.f8[3]);
+ }
+ SkPx_none saturatedAdd(const SkPx_none& o) const {
+ return SkPx_none(SkTMax(0, SkTMin(255, f8[0]+o.f8[0])),
+ SkTMax(0, SkTMin(255, f8[1]+o.f8[1])),
+ SkTMax(0, SkTMin(255, f8[2]+o.f8[2])),
+ SkTMax(0, SkTMin(255, f8[3]+o.f8[3])));
+ }
+
+ Wide operator*(const Alpha& a) const {
+ return Wide(f8[0]*a.fA, f8[1]*a.fA, f8[2]*a.fA, f8[3]*a.fA);
+ }
+ SkPx_none approxMulDiv255(const Alpha& a) const {
+ return (*this * a).addNarrowHi(*this);
+ }
+
+ SkPx_none addAlpha(const Alpha& a) const {
+ return SkPx_none(f8[0], f8[1], f8[2], f8[3]+a.fA);
+ }
+};
+typedef SkPx_none SkPx;
+
+#endif//SkPx_none_DEFINED