aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/core
diff options
context:
space:
mode:
authorGravatar msarett <msarett@google.com>2016-07-19 09:07:55 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2016-07-19 09:07:55 -0700
commit6bdbf4412bd1a6fe26be1042ccf080174b13021f (patch)
treeb25a250336c9de632cfc40a2e6204626620f099d /src/core
parent9061aa4217cabc75aca24f929f370c9a82208e73 (diff)
Improve naive SkColorXform to half floats
This should give us a good baseline to explore using SkRasterPipeline. A particular colorxform to half float drops from 425us to 282us on my desktop. Color Xform to Half Float (HP z620) Original 425us Trans16 (not 32) 355us Vector Trans16 378us Trans16 + Keep Halfs in Vector 335us Vector Trans16 + Keep Halfs in Vector 282us Final 282us Color Xform to Half Float (Nexus 5X) Original 556us Final 472us BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2159993003 CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Review-Url: https://codereview.chromium.org/2159993003
Diffstat (limited to 'src/core')
-rw-r--r--src/core/SkHalf.h22
-rw-r--r--src/core/SkMipMap.cpp4
-rw-r--r--src/core/SkNx.h9
-rw-r--r--src/core/SkXfermodeF16.cpp26
4 files changed, 36 insertions, 25 deletions
diff --git a/src/core/SkHalf.h b/src/core/SkHalf.h
index 2f2ed66c6a..adf8d3a126 100644
--- a/src/core/SkHalf.h
+++ b/src/core/SkHalf.h
@@ -16,9 +16,10 @@
// only used for storage
typedef uint16_t SkHalf;
-#define SK_HalfMin 0x0400 // 2^-24 (minimum positive normal value)
-#define SK_HalfMax 0x7bff // 65504
-#define SK_HalfEpsilon 0x1400 // 2^-10
+static constexpr uint16_t SK_HalfMin = 0x0400; // 2^-24 (minimum positive normal value)
+static constexpr uint16_t SK_HalfMax = 0x7bff; // 65504
+static constexpr uint16_t SK_HalfEpsilon = 0x1400; // 2^-10
+static constexpr uint16_t SK_Half1 = 0x3C00; // 1
// convert between half and single precision floating point
float SkHalfToFloat(SkHalf h);
@@ -26,8 +27,8 @@ SkHalf SkFloatToHalf(float f);
// Convert between half and single precision floating point,
// assuming inputs and outputs are both finite.
-static inline Sk4f SkHalfToFloat_finite(uint64_t);
-static inline uint64_t SkFloatToHalf_finite(const Sk4f&);
+static inline Sk4f SkHalfToFloat_finite(uint64_t);
+static inline Sk4h SkFloatToHalf_finite(const Sk4f&);
// ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ //
@@ -65,14 +66,12 @@ static inline Sk4f SkHalfToFloat_finite(uint64_t hs) {
#endif
}
-static inline uint64_t SkFloatToHalf_finite(const Sk4f& fs) {
- uint64_t r;
+static inline Sk4h SkFloatToHalf_finite(const Sk4f& fs) {
#if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64)
float32x4_t vec = fs.fVec;
asm ("fcvtn %[vec].4h, %[vec].4s \n" // vcvt_f16_f32(vec)
- "fmov %[r], %d[vec] \n" // vst1_f16(&r, ...)
- : [r] "=r" (r) // =r: write-only 64-bit general register
- , [vec] "+w" (vec)); // +w: read-write NEON register
+ : [vec] "+w" (vec)); // +w: read-write NEON register
+ return vreinterpret_u16_f32(vget_low_f32(vec));
#else
Sk4i bits = Sk4i::Load(&fs),
sign = bits & 0x80000000, // Save the sign bit for later...
@@ -91,9 +90,8 @@ static inline uint64_t SkFloatToHalf_finite(const Sk4f& fs) {
Sk4i denorm = Sk4i::Load(&plus_K) ^ K;
Sk4i merged = (sign >> 16) | will_be_denorm.thenElse(denorm, norm);
- SkNx_cast<uint16_t>(merged).store(&r);
+ return SkNx_cast<uint16_t>(merged);
#endif
- return r;
}
#endif
diff --git a/src/core/SkMipMap.cpp b/src/core/SkMipMap.cpp
index 4811c9e073..cb9cc85a98 100644
--- a/src/core/SkMipMap.cpp
+++ b/src/core/SkMipMap.cpp
@@ -88,7 +88,9 @@ struct ColorTypeFilter_F16 {
return SkHalfToFloat_finite(x);
}
static uint64_t Compact(const Sk4f& x) {
- return SkFloatToHalf_finite(x);
+ uint64_t r;
+ SkFloatToHalf_finite(x).store(&r);
+ return r;
}
};
diff --git a/src/core/SkNx.h b/src/core/SkNx.h
index dec63f8d89..253fcf22fe 100644
--- a/src/core/SkNx.h
+++ b/src/core/SkNx.h
@@ -309,6 +309,15 @@ SI Sk4i Sk4f_round(const Sk4f& x) {
(int) lrintf (x[3]), };
}
+// Transpose 4 Sk4h and store (256 bits total).
+SI void Sk4h_store4(void* dst, const Sk4h& r, const Sk4h& g, const Sk4h& b, const Sk4h& a) {
+ uint64_t* dst64 = (uint64_t*) dst;
+ Sk4h(r[0], g[0], b[0], a[0]).store(dst64 + 0);
+ Sk4h(r[1], g[1], b[1], a[1]).store(dst64 + 1);
+ Sk4h(r[2], g[2], b[2], a[2]).store(dst64 + 2);
+ Sk4h(r[3], g[3], b[3], a[3]).store(dst64 + 3);
+}
+
#endif
SI void Sk4f_ToBytes(uint8_t p[16], const Sk4f& a, const Sk4f& b, const Sk4f& c, const Sk4f& d) {
diff --git a/src/core/SkXfermodeF16.cpp b/src/core/SkXfermodeF16.cpp
index 63058f9dce..219e91188e 100644
--- a/src/core/SkXfermodeF16.cpp
+++ b/src/core/SkXfermodeF16.cpp
@@ -25,13 +25,13 @@ static void xfer_1(const SkXfermode* xfer, uint64_t dst[], const SkPM4f* src, in
Sk4f d4 = SkHalfToFloat_finite(dst[i]);
d4.store(d.fVec);
Sk4f r4 = Sk4f::Load(proc(*src, d).fVec);
- dst[i] = SkFloatToHalf_finite(lerp_by_coverage(r4, d4, aa[i]));
+ SkFloatToHalf_finite(lerp_by_coverage(r4, d4, aa[i])).store(&dst[i]);
}
} else {
for (int i = 0; i < count; ++i) {
SkHalfToFloat_finite(dst[i]).store(d.fVec);
Sk4f r4 = Sk4f::Load(proc(*src, d).fVec);
- dst[i] = SkFloatToHalf_finite(r4);
+ SkFloatToHalf_finite(r4).store(&dst[i]);
}
}
}
@@ -45,13 +45,13 @@ static void xfer_n(const SkXfermode* xfer, uint64_t dst[], const SkPM4f src[], i
Sk4f d4 = SkHalfToFloat_finite(dst[i]);
d4.store(d.fVec);
Sk4f r4 = Sk4f::Load(proc(src[i], d).fVec);
- dst[i] = SkFloatToHalf_finite(lerp_by_coverage(r4, d4, aa[i]));
+ SkFloatToHalf_finite(lerp_by_coverage(r4, d4, aa[i])).store(&dst[i]);
}
} else {
for (int i = 0; i < count; ++i) {
SkHalfToFloat_finite(dst[i]).store(d.fVec);
Sk4f r4 = Sk4f::Load(proc(src[i], d).fVec);
- dst[i] = SkFloatToHalf_finite(r4);
+ SkFloatToHalf_finite(r4).store(&dst[i]);
}
}
}
@@ -65,7 +65,7 @@ static void clear(const SkXfermode*, uint64_t dst[], const SkPM4f*, int count, c
for (int i = 0; i < count; ++i) {
if (aa[i]) {
const Sk4f d4 = SkHalfToFloat_finite(dst[i]);
- dst[i] = SkFloatToHalf_finite(d4 * Sk4f((255 - aa[i]) * 1.0f/255));
+ SkFloatToHalf_finite(d4 * Sk4f((255 - aa[i]) * 1.0f/255)).store(&dst[i]);
}
}
} else {
@@ -83,10 +83,12 @@ static void src_1(const SkXfermode*, uint64_t dst[], const SkPM4f* src, int coun
if (aa) {
for (int i = 0; i < count; ++i) {
const Sk4f d4 = SkHalfToFloat_finite(dst[i]);
- dst[i] = SkFloatToHalf_finite(lerp_by_coverage(s4, d4, aa[i]));
+ SkFloatToHalf_finite(lerp_by_coverage(s4, d4, aa[i])).store(&dst[i]);
}
} else {
- sk_memset64(dst, SkFloatToHalf_finite(s4), count);
+ uint64_t s4h;
+ SkFloatToHalf_finite(s4).store(&s4h);
+ sk_memset64(dst, s4h, count);
}
}
@@ -96,12 +98,12 @@ static void src_n(const SkXfermode*, uint64_t dst[], const SkPM4f src[], int cou
for (int i = 0; i < count; ++i) {
const Sk4f s4 = Sk4f::Load(src[i].fVec);
const Sk4f d4 = SkHalfToFloat_finite(dst[i]);
- dst[i] = SkFloatToHalf_finite(lerp_by_coverage(s4, d4, aa[i]));
+ SkFloatToHalf_finite(lerp_by_coverage(s4, d4, aa[i])).store(&dst[i]);
}
} else {
for (int i = 0; i < count; ++i) {
const Sk4f s4 = Sk4f::Load(src[i].fVec);
- dst[i] = SkFloatToHalf_finite(s4);
+ SkFloatToHalf_finite(s4).store(&dst[i]);
}
}
}
@@ -124,9 +126,9 @@ static void srcover_1(const SkXfermode*, uint64_t dst[], const SkPM4f* src, int
const Sk4f d4 = SkHalfToFloat_finite(dst[i]);
const Sk4f r4 = s4 + d4 * dst_scale;
if (aa) {
- dst[i] = SkFloatToHalf_finite(lerp_by_coverage(r4, d4, aa[i]));
+ SkFloatToHalf_finite(lerp_by_coverage(r4, d4, aa[i])).store(&dst[i]);
} else {
- dst[i] = SkFloatToHalf_finite(r4);
+ SkFloatToHalf_finite(r4).store(&dst[i]);
}
}
}
@@ -140,7 +142,7 @@ static void srcover_n(const SkXfermode*, uint64_t dst[], const SkPM4f src[], int
if (aa) {
r = lerp_by_coverage(r, d, aa[i]);
}
- dst[i] = SkFloatToHalf_finite(r);
+ SkFloatToHalf_finite(r).store(&dst[i]);
}
}