aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar mtklein <mtklein@chromium.org>2016-06-07 09:35:27 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2016-06-07 09:35:28 -0700
commit12dfaaa53c23f3d03050bde8f64136ac1f44164a (patch)
tree63cfa96123575974f0560f785b3bc63367e15e63
parentd62e28b19a23b913c549b7891ecf79e779577181 (diff)
Move immintrin/arm_neon includes to where they are used.
On my Mac (so, immintrin), this improves compile time, both wall and cpu, by about 16%. To test I ran this on an SSD with files hot in their caches: $ env CC=/usr/bin/clang CXX=/usr/bin/clang++ ./gyp_skia && \ ninja -C out/Release -t clean && \ time ninja -C out/Release Before: 159 wall / 3367 cpu 159 wall / 3368 cpu After: 137 wall / 2860 cpu 136 wall / 2863 cpu I also tried further refining immintrin down to emmintrin / tmmintrin / smmintrin etc. That made no signficant difference, so I've kept immintrin for its simplicity. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2045633002 CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot TBR=reed@google.com No public API changes. Review-Url: https://codereview.chromium.org/2045633002
-rw-r--r--include/core/SkTypes.h6
-rw-r--r--include/private/SkFloatingPoint.h6
-rw-r--r--src/opts/SkBlurImageFilter_opts.h4
-rw-r--r--src/opts/SkNx_neon.h2
-rw-r--r--src/opts/SkNx_sse.h1
-rw-r--r--src/opts/SkSwizzler_opts.h6
6 files changed, 19 insertions, 6 deletions
diff --git a/include/core/SkTypes.h b/include/core/SkTypes.h
index a47225db11..4e95c69f32 100644
--- a/include/core/SkTypes.h
+++ b/include/core/SkTypes.h
@@ -14,12 +14,6 @@
#include "SkPostConfig.h"
#include <stddef.h>
#include <stdint.h>
-
-#if defined(SK_ARM_HAS_NEON)
- #include <arm_neon.h>
-#elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
- #include <immintrin.h>
-#endif
// IWYU pragma: end_exports
#include <string.h>
diff --git a/include/private/SkFloatingPoint.h b/include/private/SkFloatingPoint.h
index 6ed6144d18..a7aa50cf9f 100644
--- a/include/private/SkFloatingPoint.h
+++ b/include/private/SkFloatingPoint.h
@@ -15,6 +15,12 @@
#include <math.h>
#include <float.h>
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1
+ #include <xmmintrin.h>
+#elif defined(SK_ARM_HAS_NEON)
+ #include <arm_neon.h>
+#endif
+
// For _POSIX_VERSION
#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
#include <unistd.h>
diff --git a/src/opts/SkBlurImageFilter_opts.h b/src/opts/SkBlurImageFilter_opts.h
index 8d22391b5f..f62604ca08 100644
--- a/src/opts/SkBlurImageFilter_opts.h
+++ b/src/opts/SkBlurImageFilter_opts.h
@@ -11,6 +11,10 @@
#include "SkColorPriv.h"
#include "SkTypes.h"
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
+ #include <immintrin.h>
+#endif
+
namespace SK_OPTS_NS {
enum class BlurDirection { kX, kY };
diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h
index 66f8074e40..063b99f89a 100644
--- a/src/opts/SkNx_neon.h
+++ b/src/opts/SkNx_neon.h
@@ -8,6 +8,8 @@
#ifndef SkNx_neon_DEFINED
#define SkNx_neon_DEFINED
+#include <arm_neon.h>
+
#define SKNX_IS_FAST
// ARMv8 has vrndmq_f32 to floor 4 floats. Here we emulate it:
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h
index 1fc235d99b..0b22a5a3c6 100644
--- a/src/opts/SkNx_sse.h
+++ b/src/opts/SkNx_sse.h
@@ -9,6 +9,7 @@
#define SkNx_sse_DEFINED
#include "SkCpu.h"
+#include <immintrin.h>
// This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything more recent.
// If you do, make sure this is in a static inline function... anywhere else risks violating ODR.
diff --git a/src/opts/SkSwizzler_opts.h b/src/opts/SkSwizzler_opts.h
index 15eec3a355..a22e145020 100644
--- a/src/opts/SkSwizzler_opts.h
+++ b/src/opts/SkSwizzler_opts.h
@@ -10,6 +10,12 @@
#include "SkColorPriv.h"
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
+ #include <immintrin.h>
+#elif defined(SK_ARM_HAS_NEON)
+ #include <arm_neon.h>
+#endif
+
namespace SK_OPTS_NS {
static void RGBA_to_rgbA_portable(uint32_t* dst, const void* vsrc, int count) {