diff options
author | mtklein <mtklein@chromium.org> | 2016-07-26 08:01:19 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2016-07-26 08:01:20 -0700 |
commit | f660b7cfcfbf3062f88e61f8320ea7051da72213 (patch) | |
tree | fca0db0effa07c70ea7f0b64f84b7f6e954c1944 /src/core/SkHalf.h | |
parent | 8c523e0f3ffa66eefd70f893e9f863b7d9ea3dc9 (diff) |
Add Sk4h_load4 for loading F16.
Should feel very similar to Sk4h_store4:
NEON uses its native instruction, SSE unpacks manually.
Since we'll have our F16s in 4 Sk4h by the time we're done here,
this also extracts an Sk4h->Sk4f routine from the old uint64_t->Sk4f one.
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2184753002
CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot
Review-Url: https://codereview.chromium.org/2184753002
Diffstat (limited to 'src/core/SkHalf.h')
-rw-r--r-- | src/core/SkHalf.h | 19 |
1 files changed, 11 insertions, 8 deletions
diff --git a/src/core/SkHalf.h b/src/core/SkHalf.h index adf8d3a126..bc9dd7940f 100644 --- a/src/core/SkHalf.h +++ b/src/core/SkHalf.h @@ -37,19 +37,18 @@ static inline Sk4h SkFloatToHalf_finite(const Sk4f&); // GCC 4.9 lacks the intrinsics to use ARMv8 f16<->f32 instructions, so we use inline assembly. -static inline Sk4f SkHalfToFloat_finite(uint64_t hs) { +static inline Sk4f SkHalfToFloat_finite(const Sk4h& hs) { #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64) float32x4_t fs; - asm ("fmov %d[fs], %[hs] \n" // vcreate_f16(hs) - "fcvtl %[fs].4s, %[fs].4h \n" // vcvt_f32_f16(...) + asm ("fcvtl %[fs].4s, %[hs].4h \n" // vcvt_f32_f16(...) : [fs] "=w" (fs) // =w: write-only NEON register - : [hs] "r" (hs)); // r: read-only 64-bit general register + : [hs] "w" (hs.fVec)); // w: read-only NEON register return fs; #else - Sk4i bits = SkNx_cast<int>(Sk4h::Load(&hs)), // Expand to 32 bit. - sign = bits & 0x00008000, // Save the sign bit for later... - positive = bits ^ sign, // ...but strip it off for now. - is_denorm = positive < (1<<10); // Exponent == 0? + Sk4i bits = SkNx_cast<int>(hs), // Expand to 32 bit. + sign = bits & 0x00008000, // Save the sign bit for later... + positive = bits ^ sign, // ...but strip it off for now. + is_denorm = positive < (1<<10); // Exponent == 0? // For normal half floats, extend the mantissa by 13 zero bits, // then adjust the exponent from 15 bias to 127 bias. @@ -66,6 +65,10 @@ static inline Sk4f SkHalfToFloat_finite(uint64_t hs) { #endif } +static inline Sk4f SkHalfToFloat_finite(uint64_t hs) { + return SkHalfToFloat_finite(Sk4h::Load(&hs)); +} + static inline Sk4h SkFloatToHalf_finite(const Sk4f& fs) { #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64) float32x4_t vec = fs.fVec; |