From e18fa440e74e9af0324de0a1de9b6ffb0fe3c3d3 Mon Sep 17 00:00:00 2001 From: mtklein Date: Thu, 9 Jun 2016 13:40:56 -0700 Subject: Move immintrin/arm_neon includes to where they are used. On my Mac (so, immintrin), this improves compile time, both wall and cpu, by about 16%. To test I ran this on an SSD with files hot in their caches: $ env CC=/usr/bin/clang CXX=/usr/bin/clang++ ./gyp_skia && \ ninja -C out/Release -t clean && \ time ninja -C out/Release Before: 159 wall / 3367 cpu 159 wall / 3368 cpu After: 137 wall / 2860 cpu 136 wall / 2863 cpu I also tried further refining immintrin down to emmintrin / tmmintrin / smmintrin etc. That made no signficant difference, so I've kept immintrin for its simplicity. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2045633002 CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot TBR=reed@google.com No public API changes. Committed: https://skia.googlesource.com/skia/+/12dfaaa53c23f3d03050bde8f64136ac1f44164a Review-Url: https://codereview.chromium.org/2045633002 --- include/core/SkTypes.h | 6 ------ include/private/SkFloatingPoint.h | 6 ++++++ src/core/SkSharedMutex.h | 12 ++++++++++++ src/opts/SkBlurImageFilter_opts.h | 4 ++++ src/opts/SkNx_neon.h | 2 ++ src/opts/SkNx_sse.h | 1 + src/opts/SkSwizzler_opts.h | 6 ++++++ 7 files changed, 31 insertions(+), 6 deletions(-) diff --git a/include/core/SkTypes.h b/include/core/SkTypes.h index a47225db11..4e95c69f32 100644 --- a/include/core/SkTypes.h +++ b/include/core/SkTypes.h @@ -14,12 +14,6 @@ #include "SkPostConfig.h" #include #include - -#if defined(SK_ARM_HAS_NEON) - #include -#elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 - #include -#endif // IWYU pragma: end_exports #include diff --git a/include/private/SkFloatingPoint.h b/include/private/SkFloatingPoint.h index 6ed6144d18..a7aa50cf9f 100644 --- a/include/private/SkFloatingPoint.h +++ b/include/private/SkFloatingPoint.h @@ -15,6 +15,12 @@ #include #include +#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1 + #include +#elif defined(SK_ARM_HAS_NEON) + #include +#endif + // For _POSIX_VERSION #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) #include diff --git a/src/core/SkSharedMutex.h b/src/core/SkSharedMutex.h index 21c9f46d64..302940bbc4 100644 --- a/src/core/SkSharedMutex.h +++ b/src/core/SkSharedMutex.h @@ -14,6 +14,18 @@ #ifdef SK_DEBUG #include "SkMutex.h" + + // On GCC 4.8, targeting ARMv7 with NEON, using libc++, we need to typedef float float32_t, + // (or include which does that) before #including here. + // This makes no sense. I'm not very interested in understanding why... this is an old, + // bizarre platform configuration that we should just let die. + #include // Include something innocuous to define _LIBCPP_VERISON if it's libc++. + #if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8 \ + && defined(SK_CPU_ARM32) && defined(SK_ARM_HAS_NEON) \ + && defined(_LIBCPP_VERSION) + typedef float float32_t; + #endif + #include #endif // SK_DEBUG diff --git a/src/opts/SkBlurImageFilter_opts.h b/src/opts/SkBlurImageFilter_opts.h index 8d22391b5f..f62604ca08 100644 --- a/src/opts/SkBlurImageFilter_opts.h +++ b/src/opts/SkBlurImageFilter_opts.h @@ -11,6 +11,10 @@ #include "SkColorPriv.h" #include "SkTypes.h" +#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 + #include +#endif + namespace SK_OPTS_NS { enum class BlurDirection { kX, kY }; diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h index 66f8074e40..063b99f89a 100644 --- a/src/opts/SkNx_neon.h +++ b/src/opts/SkNx_neon.h @@ -8,6 +8,8 @@ #ifndef SkNx_neon_DEFINED #define SkNx_neon_DEFINED +#include + #define SKNX_IS_FAST // ARMv8 has vrndmq_f32 to floor 4 floats. Here we emulate it: diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h index 1fc235d99b..0b22a5a3c6 100644 --- a/src/opts/SkNx_sse.h +++ b/src/opts/SkNx_sse.h @@ -9,6 +9,7 @@ #define SkNx_sse_DEFINED #include "SkCpu.h" +#include // This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything more recent. // If you do, make sure this is in a static inline function... anywhere else risks violating ODR. diff --git a/src/opts/SkSwizzler_opts.h b/src/opts/SkSwizzler_opts.h index 15eec3a355..a22e145020 100644 --- a/src/opts/SkSwizzler_opts.h +++ b/src/opts/SkSwizzler_opts.h @@ -10,6 +10,12 @@ #include "SkColorPriv.h" +#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 + #include +#elif defined(SK_ARM_HAS_NEON) + #include +#endif + namespace SK_OPTS_NS { static void RGBA_to_rgbA_portable(uint32_t* dst, const void* vsrc, int count) { -- cgit v1.2.3