diff options
author | Mike Klein <mtklein@chromium.org> | 2016-10-07 11:21:06 -0400 |
---|---|---|
committer | Mike Klein <mtklein@chromium.org> | 2016-10-07 16:28:16 +0000 |
commit | a71e151c6f0be68dc96ad2d169bbc31edca8f946 (patch) | |
tree | 56c67a12eb299f814bb3d1f197e21512a38e3d82 /src/core/SkHalf.h | |
parent | 49df8d17c56ee08ecf860289d501913d356f67dc (diff) |
SkRasterPipeline: 8x pipelines, attempt 2
Original review here: https://skia-review.googlesource.com/c/2990/
Changes since:
- simpler implementations of load_tail() / store_tail(): slower, but more obviously correct to all compilers
- fleshed out math ops on Sk8i and Sk8u to make unit tests happy on -Fast bot (where we always have AVX2)
- now storing stage functions as void(*)() to avoid undefined behavior and/or linker problems. This restores 32-bit Windows.
- all AVX2 Sk8x methods are marked always-inline, to avoid linking the "wrong" version on Debug builds.
CQ_INCLUDE_TRYBOTS=master.client.skia:Perf-Ubuntu-Clang-GCE-CPU-AVX2-x86_64-Debug-ASAN-Trybot,Perf-Ubuntu-Clang-GCE-CPU-AVX2-x86_64-Debug-GN,Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot,Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-Fast-Trybot;master.client.skia.compile:Build-Win-MSVC-x86_64-Debug-Trybot
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=3064
Change-Id: Id0ba250037e271a9475fe2f0989d64f0aa909bae
Reviewed-on: https://skia-review.googlesource.com/3064
Reviewed-by: Mike Klein <mtklein@chromium.org>
Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src/core/SkHalf.h')
-rw-r--r-- | src/core/SkHalf.h | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/src/core/SkHalf.h b/src/core/SkHalf.h index dd978a2347..e71cb8750a 100644 --- a/src/core/SkHalf.h +++ b/src/core/SkHalf.h @@ -11,6 +11,10 @@ #include "SkNx.h" #include "SkTypes.h" +#if !defined(_MSC_VER) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2 + #include <x86intrin.h> +#endif + // 16-bit floating point value // format is 1 bit sign, 5 bits exponent, 10 bits mantissa // only used for storage @@ -85,4 +89,29 @@ static inline Sk4h SkFloatToHalf_finite_ftz(const Sk4f& fs) { #endif } +static inline Sk8f SkHalfToFloat_finite_ftz(const Sk8h& hs) { +#if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2 + return _mm256_cvtph_ps(hs.fVec); + +#else + uint64_t parts[2]; + hs.store(parts); + return SkNx_join(SkHalfToFloat_finite_ftz(parts[0]), + SkHalfToFloat_finite_ftz(parts[1])); + +#endif +} + +static inline Sk8h SkFloatToHalf_finite_ftz(const Sk8f& fs) { +#if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2 + return _mm256_cvtps_ph(fs.fVec, _MM_FROUND_CUR_DIRECTION); + +#else + uint64_t parts[2]; + SkFloatToHalf_finite_ftz(fs.fLo).store(parts+0); + SkFloatToHalf_finite_ftz(fs.fHi).store(parts+1); + return Sk8h::Load(parts); +#endif +} + #endif |