aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--bench/ColorCodecBench.cpp57
-rw-r--r--bench/ColorCodecBench.h3
-rw-r--r--src/core/SkHalf.h22
-rw-r--r--src/core/SkMipMap.cpp4
-rw-r--r--src/core/SkNx.h9
-rw-r--r--src/core/SkXfermodeF16.cpp26
-rw-r--r--src/effects/gradients/Sk4fGradientPriv.h6
-rw-r--r--src/opts/SkColorXform_opts.h21
-rw-r--r--src/opts/SkNx_neon.h11
-rw-r--r--src/opts/SkNx_sse.h10
-rw-r--r--tests/Float16Test.cpp6
11 files changed, 110 insertions, 65 deletions
diff --git a/bench/ColorCodecBench.cpp b/bench/ColorCodecBench.cpp
index edad759860..55ec7d9d7b 100644
--- a/bench/ColorCodecBench.cpp
+++ b/bench/ColorCodecBench.cpp
@@ -16,6 +16,7 @@ DEFINE_bool(qcms, false, "Bench qcms color conversion");
#endif
DEFINE_bool(xform_only, false, "Only time the color xform, do not include the decode time");
DEFINE_bool(srgb, false, "Convert to srgb dst space");
+DEFINE_bool(half, false, "Convert to half floats");
ColorCodecBench::ColorCodecBench(const char* name, sk_sp<SkData> encoded)
: fEncoded(std::move(encoded))
@@ -43,7 +44,7 @@ void ColorCodecBench::decodeAndXform() {
#ifdef SK_DEBUG
const SkCodec::Result result =
#endif
- codec->startScanlineDecode(fInfo);
+ codec->startScanlineDecode(fSrcInfo);
SkASSERT(SkCodec::kSuccess == result);
sk_sp<SkColorSpace> srcSpace = sk_ref_sp(codec->getColorSpace());
@@ -54,15 +55,17 @@ void ColorCodecBench::decodeAndXform() {
SkASSERT(xform);
void* dst = fDst.get();
- for (int y = 0; y < fInfo.height(); y++) {
+ for (int y = 0; y < fSrcInfo.height(); y++) {
#ifdef SK_DEBUG
const int rows =
#endif
codec->getScanlines(fSrc.get(), 1, 0);
SkASSERT(1 == rows);
- xform->applyTo8888((SkPMColor*) dst, (uint32_t*) fSrc.get(), fInfo.width());
- dst = SkTAddOffset<void>(dst, fInfo.minRowBytes());
+ FLAGS_half ?
+ xform->applyToF16((uint64_t*) dst, (uint32_t*) fSrc.get(), fSrcInfo.width()) :
+ xform->applyTo8888((SkPMColor*) dst, (uint32_t*) fSrc.get(), fSrcInfo.width());
+ dst = SkTAddOffset<void>(dst, fDstInfo.minRowBytes());
}
}
@@ -72,7 +75,7 @@ void ColorCodecBench::decodeAndXformQCMS() {
#ifdef SK_DEBUG
const SkCodec::Result result =
#endif
- codec->startScanlineDecode(fInfo);
+ codec->startScanlineDecode(fSrcInfo);
SkASSERT(SkCodec::kSuccess == result);
SkAutoTCallVProc<qcms_profile, qcms_profile_release>
@@ -91,15 +94,15 @@ void ColorCodecBench::decodeAndXformQCMS() {
#endif
void* dst = fDst.get();
- for (int y = 0; y < fInfo.height(); y++) {
+ for (int y = 0; y < fSrcInfo.height(); y++) {
#ifdef SK_DEBUG
const int rows =
#endif
codec->getScanlines(fSrc.get(), 1, 0);
SkASSERT(1 == rows);
- qcms_transform_data_type(transform, fSrc.get(), dst, fInfo.width(), outType);
- dst = SkTAddOffset<void>(dst, fInfo.minRowBytes());
+ qcms_transform_data_type(transform, fSrc.get(), dst, fSrcInfo.width(), outType);
+ dst = SkTAddOffset<void>(dst, fDstInfo.minRowBytes());
}
}
#endif
@@ -114,11 +117,13 @@ void ColorCodecBench::xformOnly() {
void* dst = fDst.get();
void* src = fSrc.get();
- for (int y = 0; y < fInfo.height(); y++) {
+ for (int y = 0; y < fSrcInfo.height(); y++) {
// Transform in place
- xform->applyTo8888((SkPMColor*) dst, (uint32_t*) src, fInfo.width());
- dst = SkTAddOffset<void>(dst, fInfo.minRowBytes());
- src = SkTAddOffset<void>(src, fInfo.minRowBytes());
+ FLAGS_half ?
+ xform->applyToF16((uint64_t*) dst, (uint32_t*) src, fSrcInfo.width()) :
+ xform->applyTo8888((SkPMColor*) dst, (uint32_t*) src, fSrcInfo.width());
+ dst = SkTAddOffset<void>(dst, fDstInfo.minRowBytes());
+ src = SkTAddOffset<void>(src, fSrcInfo.minRowBytes());
}
}
@@ -141,26 +146,31 @@ void ColorCodecBench::xformOnlyQCMS() {
void* dst = fDst.get();
void* src = fSrc.get();
- for (int y = 0; y < fInfo.height(); y++) {
+ for (int y = 0; y < fSrcInfo.height(); y++) {
// Transform in place
- qcms_transform_data_type(transform, src, dst, fInfo.width(), outType);
- dst = SkTAddOffset<void>(dst, fInfo.minRowBytes());
- src = SkTAddOffset<void>(src, fInfo.minRowBytes());
+ qcms_transform_data_type(transform, src, dst, fSrcInfo.width(), outType);
+ dst = SkTAddOffset<void>(dst, fDstInfo.minRowBytes());
+ src = SkTAddOffset<void>(src, fSrcInfo.minRowBytes());
}
}
#endif
void ColorCodecBench::onDelayedSetup() {
SkAutoTDelete<SkCodec> codec(SkCodec::NewFromData(fEncoded.get()));
- fInfo = codec->getInfo().makeColorType(kRGBA_8888_SkColorType);
+ fSrcInfo = codec->getInfo().makeColorType(kRGBA_8888_SkColorType);
+
+ fDstInfo = fSrcInfo;
+ if (FLAGS_half) {
+ fDstInfo = fDstInfo.makeColorType(kRGBA_F16_SkColorType);
+ }
+ fDst.reset(fDstInfo.getSafeSize(fDstInfo.minRowBytes()));
- fDst.reset(fInfo.getSafeSize(fInfo.minRowBytes()));
if (FLAGS_xform_only) {
- fSrc.reset(fInfo.getSafeSize(fInfo.minRowBytes()));
- codec->getPixels(fInfo, fSrc.get(), fInfo.minRowBytes());
+ fSrc.reset(fSrcInfo.getSafeSize(fSrcInfo.minRowBytes()));
+ codec->getPixels(fSrcInfo, fSrc.get(), fSrcInfo.minRowBytes());
} else {
// Set-up a row buffer to decode into before transforming to dst.
- fSrc.reset(fInfo.minRowBytes());
+ fSrc.reset(fSrcInfo.minRowBytes());
}
fSrcData = codec->getICCData();
@@ -189,6 +199,11 @@ void ColorCodecBench::onDelayedSetup() {
}
void ColorCodecBench::onDraw(int n, SkCanvas*) {
+ if ((FLAGS_srgb || FLAGS_qcms) && FLAGS_half) {
+ SkDebugf("Error: Contradicting flags.\n");
+ return;
+ }
+
for (int i = 0; i < n; i++) {
#if defined(SK_TEST_QCMS)
if (FLAGS_qcms) {
diff --git a/bench/ColorCodecBench.h b/bench/ColorCodecBench.h
index 9da505fd47..45bf41ec2d 100644
--- a/bench/ColorCodecBench.h
+++ b/bench/ColorCodecBench.h
@@ -36,7 +36,8 @@ private:
SkString fName;
sk_sp<SkData> fEncoded;
- SkImageInfo fInfo;
+ SkImageInfo fSrcInfo;
+ SkImageInfo fDstInfo;
SkAutoMalloc fDst;
SkAutoMalloc fSrc;
sk_sp<SkColorSpace> fDstSpace;
diff --git a/src/core/SkHalf.h b/src/core/SkHalf.h
index 2f2ed66c6a..adf8d3a126 100644
--- a/src/core/SkHalf.h
+++ b/src/core/SkHalf.h
@@ -16,9 +16,10 @@
// only used for storage
typedef uint16_t SkHalf;
-#define SK_HalfMin 0x0400 // 2^-24 (minimum positive normal value)
-#define SK_HalfMax 0x7bff // 65504
-#define SK_HalfEpsilon 0x1400 // 2^-10
+static constexpr uint16_t SK_HalfMin = 0x0400; // 2^-24 (minimum positive normal value)
+static constexpr uint16_t SK_HalfMax = 0x7bff; // 65504
+static constexpr uint16_t SK_HalfEpsilon = 0x1400; // 2^-10
+static constexpr uint16_t SK_Half1 = 0x3C00; // 1
// convert between half and single precision floating point
float SkHalfToFloat(SkHalf h);
@@ -26,8 +27,8 @@ SkHalf SkFloatToHalf(float f);
// Convert between half and single precision floating point,
// assuming inputs and outputs are both finite.
-static inline Sk4f SkHalfToFloat_finite(uint64_t);
-static inline uint64_t SkFloatToHalf_finite(const Sk4f&);
+static inline Sk4f SkHalfToFloat_finite(uint64_t);
+static inline Sk4h SkFloatToHalf_finite(const Sk4f&);
// ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ //
@@ -65,14 +66,12 @@ static inline Sk4f SkHalfToFloat_finite(uint64_t hs) {
#endif
}
-static inline uint64_t SkFloatToHalf_finite(const Sk4f& fs) {
- uint64_t r;
+static inline Sk4h SkFloatToHalf_finite(const Sk4f& fs) {
#if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64)
float32x4_t vec = fs.fVec;
asm ("fcvtn %[vec].4h, %[vec].4s \n" // vcvt_f16_f32(vec)
- "fmov %[r], %d[vec] \n" // vst1_f16(&r, ...)
- : [r] "=r" (r) // =r: write-only 64-bit general register
- , [vec] "+w" (vec)); // +w: read-write NEON register
+ : [vec] "+w" (vec)); // +w: read-write NEON register
+ return vreinterpret_u16_f32(vget_low_f32(vec));
#else
Sk4i bits = Sk4i::Load(&fs),
sign = bits & 0x80000000, // Save the sign bit for later...
@@ -91,9 +90,8 @@ static inline uint64_t SkFloatToHalf_finite(const Sk4f& fs) {
Sk4i denorm = Sk4i::Load(&plus_K) ^ K;
Sk4i merged = (sign >> 16) | will_be_denorm.thenElse(denorm, norm);
- SkNx_cast<uint16_t>(merged).store(&r);
+ return SkNx_cast<uint16_t>(merged);
#endif
- return r;
}
#endif
diff --git a/src/core/SkMipMap.cpp b/src/core/SkMipMap.cpp
index 4811c9e073..cb9cc85a98 100644
--- a/src/core/SkMipMap.cpp
+++ b/src/core/SkMipMap.cpp
@@ -88,7 +88,9 @@ struct ColorTypeFilter_F16 {
return SkHalfToFloat_finite(x);
}
static uint64_t Compact(const Sk4f& x) {
- return SkFloatToHalf_finite(x);
+ uint64_t r;
+ SkFloatToHalf_finite(x).store(&r);
+ return r;
}
};
diff --git a/src/core/SkNx.h b/src/core/SkNx.h
index dec63f8d89..253fcf22fe 100644
--- a/src/core/SkNx.h
+++ b/src/core/SkNx.h
@@ -309,6 +309,15 @@ SI Sk4i Sk4f_round(const Sk4f& x) {
(int) lrintf (x[3]), };
}
+// Transpose 4 Sk4h and store (256 bits total).
+SI void Sk4h_store4(void* dst, const Sk4h& r, const Sk4h& g, const Sk4h& b, const Sk4h& a) {
+ uint64_t* dst64 = (uint64_t*) dst;
+ Sk4h(r[0], g[0], b[0], a[0]).store(dst64 + 0);
+ Sk4h(r[1], g[1], b[1], a[1]).store(dst64 + 1);
+ Sk4h(r[2], g[2], b[2], a[2]).store(dst64 + 2);
+ Sk4h(r[3], g[3], b[3], a[3]).store(dst64 + 3);
+}
+
#endif
SI void Sk4f_ToBytes(uint8_t p[16], const Sk4f& a, const Sk4f& b, const Sk4f& c, const Sk4f& d) {
diff --git a/src/core/SkXfermodeF16.cpp b/src/core/SkXfermodeF16.cpp
index 63058f9dce..219e91188e 100644
--- a/src/core/SkXfermodeF16.cpp
+++ b/src/core/SkXfermodeF16.cpp
@@ -25,13 +25,13 @@ static void xfer_1(const SkXfermode* xfer, uint64_t dst[], const SkPM4f* src, in
Sk4f d4 = SkHalfToFloat_finite(dst[i]);
d4.store(d.fVec);
Sk4f r4 = Sk4f::Load(proc(*src, d).fVec);
- dst[i] = SkFloatToHalf_finite(lerp_by_coverage(r4, d4, aa[i]));
+ SkFloatToHalf_finite(lerp_by_coverage(r4, d4, aa[i])).store(&dst[i]);
}
} else {
for (int i = 0; i < count; ++i) {
SkHalfToFloat_finite(dst[i]).store(d.fVec);
Sk4f r4 = Sk4f::Load(proc(*src, d).fVec);
- dst[i] = SkFloatToHalf_finite(r4);
+ SkFloatToHalf_finite(r4).store(&dst[i]);
}
}
}
@@ -45,13 +45,13 @@ static void xfer_n(const SkXfermode* xfer, uint64_t dst[], const SkPM4f src[], i
Sk4f d4 = SkHalfToFloat_finite(dst[i]);
d4.store(d.fVec);
Sk4f r4 = Sk4f::Load(proc(src[i], d).fVec);
- dst[i] = SkFloatToHalf_finite(lerp_by_coverage(r4, d4, aa[i]));
+ SkFloatToHalf_finite(lerp_by_coverage(r4, d4, aa[i])).store(&dst[i]);
}
} else {
for (int i = 0; i < count; ++i) {
SkHalfToFloat_finite(dst[i]).store(d.fVec);
Sk4f r4 = Sk4f::Load(proc(src[i], d).fVec);
- dst[i] = SkFloatToHalf_finite(r4);
+ SkFloatToHalf_finite(r4).store(&dst[i]);
}
}
}
@@ -65,7 +65,7 @@ static void clear(const SkXfermode*, uint64_t dst[], const SkPM4f*, int count, c
for (int i = 0; i < count; ++i) {
if (aa[i]) {
const Sk4f d4 = SkHalfToFloat_finite(dst[i]);
- dst[i] = SkFloatToHalf_finite(d4 * Sk4f((255 - aa[i]) * 1.0f/255));
+ SkFloatToHalf_finite(d4 * Sk4f((255 - aa[i]) * 1.0f/255)).store(&dst[i]);
}
}
} else {
@@ -83,10 +83,12 @@ static void src_1(const SkXfermode*, uint64_t dst[], const SkPM4f* src, int coun
if (aa) {
for (int i = 0; i < count; ++i) {
const Sk4f d4 = SkHalfToFloat_finite(dst[i]);
- dst[i] = SkFloatToHalf_finite(lerp_by_coverage(s4, d4, aa[i]));
+ SkFloatToHalf_finite(lerp_by_coverage(s4, d4, aa[i])).store(&dst[i]);
}
} else {
- sk_memset64(dst, SkFloatToHalf_finite(s4), count);
+ uint64_t s4h;
+ SkFloatToHalf_finite(s4).store(&s4h);
+ sk_memset64(dst, s4h, count);
}
}
@@ -96,12 +98,12 @@ static void src_n(const SkXfermode*, uint64_t dst[], const SkPM4f src[], int cou
for (int i = 0; i < count; ++i) {
const Sk4f s4 = Sk4f::Load(src[i].fVec);
const Sk4f d4 = SkHalfToFloat_finite(dst[i]);
- dst[i] = SkFloatToHalf_finite(lerp_by_coverage(s4, d4, aa[i]));
+ SkFloatToHalf_finite(lerp_by_coverage(s4, d4, aa[i])).store(&dst[i]);
}
} else {
for (int i = 0; i < count; ++i) {
const Sk4f s4 = Sk4f::Load(src[i].fVec);
- dst[i] = SkFloatToHalf_finite(s4);
+ SkFloatToHalf_finite(s4).store(&dst[i]);
}
}
}
@@ -124,9 +126,9 @@ static void srcover_1(const SkXfermode*, uint64_t dst[], const SkPM4f* src, int
const Sk4f d4 = SkHalfToFloat_finite(dst[i]);
const Sk4f r4 = s4 + d4 * dst_scale;
if (aa) {
- dst[i] = SkFloatToHalf_finite(lerp_by_coverage(r4, d4, aa[i]));
+ SkFloatToHalf_finite(lerp_by_coverage(r4, d4, aa[i])).store(&dst[i]);
} else {
- dst[i] = SkFloatToHalf_finite(r4);
+ SkFloatToHalf_finite(r4).store(&dst[i]);
}
}
}
@@ -140,7 +142,7 @@ static void srcover_n(const SkXfermode*, uint64_t dst[], const SkPM4f src[], int
if (aa) {
r = lerp_by_coverage(r, d, aa[i]);
}
- dst[i] = SkFloatToHalf_finite(r);
+ SkFloatToHalf_finite(r).store(&dst[i]);
}
}
diff --git a/src/effects/gradients/Sk4fGradientPriv.h b/src/effects/gradients/Sk4fGradientPriv.h
index 68e95a63d9..9745119fd4 100644
--- a/src/effects/gradients/Sk4fGradientPriv.h
+++ b/src/effects/gradients/Sk4fGradientPriv.h
@@ -143,11 +143,13 @@ struct DstTraits<DstType::F16, premul> {
}
static void store(const Sk4f& c, Type* dst) {
- *dst = SkFloatToHalf_finite(PM::apply(c));
+ SkFloatToHalf_finite(PM::apply(c)).store(dst);
}
static void store(const Sk4f& c, Type* dst, int n) {
- sk_memset64(dst, SkFloatToHalf_finite(PM::apply(c)), n);
+ uint64_t color;
+ SkFloatToHalf_finite(PM::apply(c)).store(&color);
+ sk_memset64(dst, color, n);
}
static void store4x(const Sk4f& c0, const Sk4f& c1,
diff --git a/src/opts/SkColorXform_opts.h b/src/opts/SkColorXform_opts.h
index e7a2b45946..3bb11f5599 100644
--- a/src/opts/SkColorXform_opts.h
+++ b/src/opts/SkColorXform_opts.h
@@ -126,16 +126,10 @@ static void color_xform_RGB1(void* dst, const uint32_t* src, int len,
dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t));
} else {
- // FIXME (msarett):
- // Can we do better here? Should we store half floats as planar?
- // Should we write Intel/Arm specific code? Should we add a transpose
- // function to SkNx? Should we rewrite the algorithm to be interleaved?
- uint64_t* dst64 = (uint64_t*) dst;
- dst64[0] = SkFloatToHalf_finite(Sk4f(dstReds[0], dstGreens[0], dstBlues[0], 1.0f));
- dst64[1] = SkFloatToHalf_finite(Sk4f(dstReds[1], dstGreens[1], dstBlues[1], 1.0f));
- dst64[2] = SkFloatToHalf_finite(Sk4f(dstReds[2], dstGreens[2], dstBlues[2], 1.0f));
- dst64[3] = SkFloatToHalf_finite(Sk4f(dstReds[3], dstGreens[3], dstBlues[3], 1.0f));
-
+ Sk4h_store4(dst, SkFloatToHalf_finite(dstReds),
+ SkFloatToHalf_finite(dstGreens),
+ SkFloatToHalf_finite(dstBlues),
+ SK_Half1);
dst = SkTAddOffset<void>(dst, 4 * sizeof(uint64_t));
}
};
@@ -185,10 +179,9 @@ static void color_xform_RGB1(void* dst, const uint32_t* src, int len,
dst = SkTAddOffset<void>(dst, sizeof(uint32_t));
} else {
- uint64_t rgba = SkFloatToHalf_finite(dstPixel);
-
- // Set alpha to 1.0
- rgba |= 0x3C00000000000000;
+ uint64_t rgba;
+ SkFloatToHalf_finite(dstPixel).store(&rgba);
+ rgba |= static_cast<uint64_t>(SK_Half1) << 48;
*((uint64_t*) dst) = rgba;
dst = SkTAddOffset<void>(dst, sizeof(uint64_t));
}
diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h
index 2f73e0368d..53e95eb97f 100644
--- a/src/opts/SkNx_neon.h
+++ b/src/opts/SkNx_neon.h
@@ -483,4 +483,15 @@ static inline Sk4i Sk4f_round(const Sk4f& x) {
return vcvtq_s32_f32((x + 0.5f).fVec);
}
+static inline void Sk4h_store4(void* dst, const Sk4h& r, const Sk4h& g, const Sk4h& b,
+ const Sk4h& a) {
+ uint16x4x4_t rgba = {{
+ r.fVec,
+ g.fVec,
+ b.fVec,
+ a.fVec,
+ }};
+ vst4_u16((uint16_t*) dst, rgba);
+}
+
#endif//SkNx_neon_DEFINED
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h
index 553ff1de3e..c0e48287b4 100644
--- a/src/opts/SkNx_sse.h
+++ b/src/opts/SkNx_sse.h
@@ -403,4 +403,14 @@ static inline Sk4i Sk4f_round(const Sk4f& x) {
return _mm_cvtps_epi32(x.fVec);
}
+static inline void Sk4h_store4(void* dst, const Sk4h& r, const Sk4h& g, const Sk4h& b,
+ const Sk4h& a) {
+ __m128i rg = _mm_unpacklo_epi16(r.fVec, g.fVec);
+ __m128i ba = _mm_unpacklo_epi16(b.fVec, a.fVec);
+ __m128i lo = _mm_unpacklo_epi32(rg, ba);
+ __m128i hi = _mm_unpackhi_epi32(rg, ba);
+ _mm_storeu_si128(((__m128i*) dst) + 0, lo);
+ _mm_storeu_si128(((__m128i*) dst) + 1, hi);
+}
+
#endif//SkNx_sse_DEFINED
diff --git a/tests/Float16Test.cpp b/tests/Float16Test.cpp
index 2091652522..8ab56048e9 100644
--- a/tests/Float16Test.cpp
+++ b/tests/Float16Test.cpp
@@ -73,7 +73,9 @@ DEF_TEST(HalfToFloat_finite, r) {
u(f), f);
}
REPORTER_ASSERT(r, SkHalfToFloat_finite(h)[0] == f);
- REPORTER_ASSERT(r, SkFloatToHalf_finite(SkHalfToFloat_finite(h)) == h);
+ uint64_t result;
+ SkFloatToHalf_finite(SkHalfToFloat_finite(h)).store(&result);
+ REPORTER_ASSERT(r, result == h);
}
}
}
@@ -89,7 +91,7 @@ DEF_TEST(FloatToHalf_finite, r) {
float f;
memcpy(&f, &bits, 4);
if (isfinite(f) && isfinite(SkHalfToFloat(SkFloatToHalf(f)))) {
- uint16_t h1 = (uint16_t)SkFloatToHalf_finite(Sk4f(f,0,0,0)),
+ uint16_t h1 = SkFloatToHalf_finite(Sk4f(f,0,0,0))[0],
h2 = SkFloatToHalf(f);
bool ok = (h1 == h2 || h1 == h2-1);
REPORTER_ASSERT(r, ok);