aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/core
diff options
context:
space:
mode:
authorGravatar mtklein <mtklein@google.com>2016-04-15 08:40:22 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2016-04-15 08:40:23 -0700
commit86498fbfcb93a9048bbe1c28cc0df40d8d0c96e9 (patch)
treea579fdee9a3cf330050dbe7fd0a3a25d49783693 /src/core
parent2c7f24093a394ccbe54a7db60ba79af14682e7fa (diff)
Revert of Move CPU feature detection to its own file. (patchset #7 id:120001 of https://codereview.chromium.org/1890483002/ )
Reason for revert: many unexpected GM diffs across GPU+CPU configs on Windows (hopefully just text masks on GPU?). seems like we pick a different srcover variant in some places. Original issue's description: > Move CPU feature detection to its own file. > > - Moves CPU feature detection to its own file. > - Cleans up some redundant feature detection scattered around core/ and opts/. > - Can now detect a few new CPU features: > * F16C -> Intel f16<->f32 instructions, added between AVX and AVX2 > * FMA -> Intel FMA instructions, added at the same time as AVX2 > * VFP_FP16 -> ARM f16<->f32 instructions, quite common > * NEON_FMA -> ARM FMA instructions, also quite common > * SSE and SSE3... why not? > > This new internal API makes it very cheap to do fine-grained runtime CPU > feature detection. Redundant calls to SkCpu::Supports() should be eliminated > and it's hoistable out of loops. It compiles away entirely when we have the > appropriate instructions available at compile time. > > This means we can call it to guard even a little snippet of 1 or 2 instructions > right where needed and let inlining hoist the check (if any at all) up to > somewhere that doesn't hurt performance. I've explained how I made this work > in the private section of the new header. > > Once this lands and bakes a bit, I'll start following up with CLs to use it more > and to add a bunch of those little 1-2 instruction snippets we've been wanting, > e.g. cvtps2ph, cvtph2ps, ptest, pmulld, pmovzxbd, blendvps, pshufb, roundps > (for floor) on x86, and vcvt.f32.f16, vcvt.f16.f32 on ARM. > > BUG=skia: > GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1890483002 > CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot > > Committed: https://skia.googlesource.com/skia/+/872ea29357439f05b1f6995dd300fc054733e607 TBR=fmalita@chromium.org,herb@google.com,reed@google.com,mtklein@chromium.org # Skipping CQ checks because original CL landed less than 1 days ago. NOPRESUBMIT=true NOTREECHECKS=true NOTRY=true BUG=skia: Review URL: https://codereview.chromium.org/1892643003
Diffstat (limited to 'src/core')
-rw-r--r--src/core/SkCpu.cpp90
-rw-r--r--src/core/SkCpu.h123
-rw-r--r--src/core/SkOpts.cpp62
-rw-r--r--src/core/SkUtilsArm.cpp139
-rw-r--r--src/core/SkUtilsArm.h14
5 files changed, 198 insertions, 230 deletions
diff --git a/src/core/SkCpu.cpp b/src/core/SkCpu.cpp
deleted file mode 100644
index 4030fce2e1..0000000000
--- a/src/core/SkCpu.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright 2016 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#include "SkCpu.h"
-#include "SkOncePtr.h"
-
-#if defined(SK_CPU_X86)
- #if defined(SK_BUILD_FOR_WIN32)
- #include <intrin.h>
- static void cpuid (uint32_t abcd[4]) { __cpuid ((int*)abcd, 1); }
- static void cpuid7(uint32_t abcd[4]) { __cpuidex((int*)abcd, 7, 0); }
- static uint64_t xgetbv(uint32_t xcr) { return _xgetbv(xcr); }
- #else
- #include <cpuid.h>
- #if !defined(__cpuid_count) // Old Mac Clang doesn't have this defined.
- #define __cpuid_count(eax, ecx, a, b, c, d) \
- __asm__("cpuid" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eax), "2"(ecx))
- #endif
- static void cpuid (uint32_t abcd[4]) { __get_cpuid(1, abcd+0, abcd+1, abcd+2, abcd+3); }
- static void cpuid7(uint32_t abcd[4]) {
- __cpuid_count(7, 0, abcd[0], abcd[1], abcd[2], abcd[3]);
- }
- static uint64_t xgetbv(uint32_t xcr) {
- uint32_t eax, edx;
- __asm__ __volatile__ ( "xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
- return (uint64_t)(edx) << 32 | eax;
- }
- #endif
-
- static uint32_t read_cpu_features() {
- uint32_t features = 0;
- uint32_t abcd[4] = {0,0,0,0};
-
- // You might want to refer to http://www.sandpile.org/x86/cpuid.htm
-
- cpuid(abcd);
- if (abcd[3] & (1<<25)) { features |= SkCpu:: SSE1; }
- if (abcd[3] & (1<<26)) { features |= SkCpu:: SSE2; }
- if (abcd[2] & (1<< 0)) { features |= SkCpu:: SSE3; }
- if (abcd[2] & (1<< 9)) { features |= SkCpu::SSSE3; }
- if (abcd[2] & (1<<19)) { features |= SkCpu::SSE41; }
- if (abcd[2] & (1<<20)) { features |= SkCpu::SSE42; }
-
- if ((abcd[2] & (3<<26)) == (3<<26) && (xgetbv(0) & 6) == 6) { // XSAVE + OSXSAVE
- if (abcd[2] & (1<<28)) { features |= SkCpu:: AVX; }
- if (abcd[2] & (1<<29)) { features |= SkCpu::F16C; }
- if (abcd[2] & (1<<12)) { features |= SkCpu:: FMA; }
-
- cpuid7(abcd);
- if (abcd[1] & (1<<5)) { features |= SkCpu::AVX2; }
- }
- return features;
- }
-
-#elif defined(SK_CPU_ARM32) && \
- defined(SK_BUILD_FOR_ANDROID) && \
- !defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
- #include <cpu-features.h>
-
- static uint32_t read_cpu_features() {
- uint32_t features = 0;
-
- uint64_t android_features = android_getCpuFeatures();
- if (android_features & ANDROID_CPU_ARM_FEATURE_NEON ) { features |= SkCpu::NEON ; }
- if (android_features & ANDROID_CPU_ARM_FEATURE_NEON_FMA) { features |= SkCpu::NEON_FMA; }
- if (android_features & ANDROID_CPU_ARM_FEATURE_VFP_FP16) { features |= SkCpu::VFP_FP16; }
- return features;
- }
-
-#else
- static uint32_t read_cpu_features() {
- return 0;
- }
-
-#endif
-
-#if defined(__GNUC__) || defined(__clang__)
- SK_DECLARE_STATIC_ONCE_PTR(uint32_t, gCachedCpuFeatures);
- uint32_t SkCpu::RuntimeCpuFeatures() {
- return *gCachedCpuFeatures.get([]{ return new uint32_t{read_cpu_features()}; });
- }
-
-#else
- const uint32_t SkCpu::gCachedCpuFeatures = read_cpu_features();
-
-#endif
diff --git a/src/core/SkCpu.h b/src/core/SkCpu.h
deleted file mode 100644
index 2a41d37b16..0000000000
--- a/src/core/SkCpu.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright 2016 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef SkCpu_DEFINED
-#define SkCpu_DEFINED
-
-#include "SkTypes.h"
-
-struct SkCpu {
- enum {
- SSE1 = 1 << 0,
- SSE2 = 1 << 1,
- SSE3 = 1 << 2,
- SSSE3 = 1 << 3,
- SSE41 = 1 << 4,
- SSE42 = 1 << 5,
- AVX = 1 << 6,
- F16C = 1 << 7,
- FMA = 1 << 8,
- AVX2 = 1 << 9,
- };
- enum {
- NEON = 1 << 0,
- NEON_FMA = 1 << 1,
- VFP_FP16 = 1 << 2,
- };
-
- static bool Supports(uint32_t);
-
-private:
- // Consider a loop like this that expands 16-bit floats out to 32-bit, does math, and repacks:
- // for (int i = 0; i < N; i++) {
- // if (SkCpu::Supports(SkCpu::F16C)) {
- // f32s = SkCpu::F16C_cvtph_ps(f16s);
- // } else {
- // f32s = some_slower_f16_to_f32_routine(f16s);
- // }
- //
- // ... do some math with f32s ...
- //
- // if (SkCpu::Supports(SkCpu::F16C)) {
- // f16s = SkCpu::F16C_cvtps_ph(f32s);
- // } else {
- // f16s = some_slower_f32_to_f16_routine(f32s);
- // }
- // }
- //
- // We would like SkCpu::Supports() to participate in common sub-expression elimination,
- // so that it's called exactly 1 time, rather than N or 2N times. This is especially
- // important when the if-else blocks you see above are really inline functions.
- //
- // The key to this is to make sure to implement RuntimeCpuFeatures() with the same
- // capacity for common sub-expression elimination.
- //
- // __attribute__((const)) works perfectly when available.
- //
- // When it's not (MSVC), we fall back to a static initializer.
- // (Static intializers would work fine everywhere, but Chrome really dislikes them.)
-
-#if defined(__GNUC__) || defined(__clang__) // i.e. GCC, Clang, or clang-cl
- __attribute__((const))
- static uint32_t RuntimeCpuFeatures();
-#else
- static const uint32_t gCachedCpuFeatures;
- static uint32_t RuntimeCpuFeatures() {
- return gCachedCpuFeatures;
- }
-#endif
-};
-
-inline bool SkCpu::Supports(uint32_t mask) {
- uint32_t features = RuntimeCpuFeatures();
-
- // If we mask in compile-time known lower limits, the compiler can completely
- // drop many calls to RuntimeCpuFeatures().
-#if SK_CPU_X86
- #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1
- features |= SSE1;
- #endif
- #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
- features |= SSE2;
- #endif
- #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE3
- features |= SSE3;
- #endif
- #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
- features |= SSSE3;
- #endif
- #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
- features |= SSE41;
- #endif
- #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42
- features |= SSE42;
- #endif
- #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX
- features |= AVX;
- #endif
- // F16C goes here if we add SK_CPU_SSE_LEVEL_F16C
- #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
- features |= AVX2;
- #endif
- // FMA doesn't fit neatly into this total ordering.
- // It's available on Haswell+ just like AVX2, but it's technically a different bit.
- // TODO: circle back on this if we find ourselves limited by lack of compile-time FMA
-
-#else
- #if defined(SK_ARM_HAS_NEON)
- features |= NEON;
- #endif
-
- #if defined(SK_CPU_ARM64)
- features |= NEON|NEON_FMA|VFP_FP16;
- #endif
-
-#endif
- return (features & mask) == mask;
-}
-
-#endif//SkCpu_DEFINED
diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp
index d97a680846..570e329094 100644
--- a/src/core/SkOpts.cpp
+++ b/src/core/SkOpts.cpp
@@ -5,7 +5,6 @@
* found in the LICENSE file.
*/
-#include "SkCpu.h"
#include "SkHalf.h"
#include "SkOnce.h"
#include "SkOpts.h"
@@ -33,6 +32,35 @@ namespace SK_OPTS_NS {
}
}
+#if defined(SK_CPU_X86) && !defined(SK_BUILD_FOR_IOS)
+ #if defined(SK_BUILD_FOR_WIN32)
+ #include <intrin.h>
+ static void cpuid (uint32_t abcd[4]) { __cpuid ((int*)abcd, 1); }
+ static void cpuid7(uint32_t abcd[4]) { __cpuidex((int*)abcd, 7, 0); }
+ static uint64_t xgetbv(uint32_t xcr) { return _xgetbv(xcr); }
+ #else
+ #include <cpuid.h>
+ #if !defined(__cpuid_count) // Old Mac Clang doesn't have this defined.
+ #define __cpuid_count(eax, ecx, a, b, c, d) \
+ __asm__("cpuid" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eax), "2"(ecx))
+ #endif
+ static void cpuid (uint32_t abcd[4]) { __get_cpuid(1, abcd+0, abcd+1, abcd+2, abcd+3); }
+ static void cpuid7(uint32_t abcd[4]) {
+ __cpuid_count(7, 0, abcd[0], abcd[1], abcd[2], abcd[3]);
+ }
+ static uint64_t xgetbv(uint32_t xcr) {
+ uint32_t eax, edx;
+ __asm__ __volatile__ ( "xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
+ return (uint64_t)(edx) << 32 | eax;
+ }
+ #endif
+#elif !defined(SK_ARM_HAS_NEON) && \
+ defined(SK_CPU_ARM32) && \
+ defined(SK_BUILD_FOR_ANDROID) && \
+ !defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
+ #include <cpu-features.h>
+#endif
+
namespace SkOpts {
// Define default function pointer values here...
@@ -84,16 +112,28 @@ namespace SkOpts {
static void init() {
// TODO: Chrome's not linking _sse* opts on iOS simulator builds. Bug or feature?
#if defined(SK_CPU_X86) && !defined(SK_BUILD_FOR_IOS)
- if (SkCpu::Supports(SkCpu::SSSE3)) { Init_ssse3(); }
- if (SkCpu::Supports(SkCpu::SSE41)) { Init_sse41(); }
- if (SkCpu::Supports(SkCpu::SSE42)) { Init_sse42(); }
- if (SkCpu::Supports(SkCpu::AVX )) { Init_avx(); }
- if (SkCpu::Supports(SkCpu::AVX2 )) { Init_avx2(); }
-
- #elif defined(SK_CPU_ARM32) && \
- defined(SK_BUILD_FOR_ANDROID) && \
- !defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
- if (SkCpu::Supports(SkCpu::NEON)) { Init_neon(); }
+ uint32_t abcd[] = {0,0,0,0};
+ cpuid(abcd);
+ if (abcd[2] & (1<< 9)) { Init_ssse3(); }
+ if (abcd[2] & (1<<19)) { Init_sse41(); }
+ if (abcd[2] & (1<<20)) { Init_sse42(); }
+
+ // AVX detection's kind of a pain. This is cribbed from Chromium.
+ if ( ( abcd[2] & (7<<26)) == (7<<26) && // Check bits 26-28 of ecx are all set,
+ (xgetbv(0) & 6 ) == 6 ){ // and check the OS supports XSAVE.
+ Init_avx();
+
+ // AVX2 additionally needs bit 5 set on ebx after calling cpuid(7).
+ uint32_t abcd7[] = {0,0,0,0};
+ cpuid7(abcd7);
+ if (abcd7[1] & (1<<5)) { Init_avx2(); }
+ }
+
+ #elif !defined(SK_ARM_HAS_NEON) && \
+ defined(SK_CPU_ARM32) && \
+ defined(SK_BUILD_FOR_ANDROID) && \
+ !defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
+ if (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) { Init_neon(); }
#endif
}
diff --git a/src/core/SkUtilsArm.cpp b/src/core/SkUtilsArm.cpp
index c29938fdfc..bf98fed476 100644
--- a/src/core/SkUtilsArm.cpp
+++ b/src/core/SkUtilsArm.cpp
@@ -5,4 +5,141 @@
* found in the LICENSE file.
*/
-// This file no longer needs to exist, but it's still referenced by Chrome's GYP / GN builds.
+#include "SkUtilsArm.h"
+
+#if SK_ARM_NEON_IS_DYNAMIC
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+#include <pthread.h>
+
+#if defined(SK_BUILD_FOR_ANDROID)
+# include <cpu-features.h>
+#endif
+
+// A function used to determine at runtime if the target CPU supports
+// the ARM NEON instruction set. This implementation is Linux-specific.
+static bool sk_cpu_arm_check_neon(void) {
+ // If we fail any of the following, assume we don't have NEON instructions
+ // This allows us to return immediately in case of error.
+ bool result = false;
+
+// Use the Android NDK's cpu-features helper library to detect NEON at runtime.
+// See http://crbug.com/164154 to see why this is needed in Chromium for Android.
+#ifdef SK_BUILD_FOR_ANDROID
+
+ result = (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) != 0;
+
+#else // SK_BUILD_FOR_ANDROID
+
+ // There is no user-accessible CPUID instruction on ARM that we can use.
+ // Instead, we must parse /proc/cpuinfo and look for the 'neon' feature.
+ // For example, here's a typical output (Nexus S running ICS 4.0.3):
+ /*
+ Processor : ARMv7 Processor rev 2 (v7l)
+ BogoMIPS : 994.65
+ Features : swp half thumb fastmult vfp edsp thumbee neon vfpv3
+ CPU implementer : 0x41
+ CPU architecture: 7
+ CPU variant : 0x2
+ CPU part : 0xc08
+ CPU revision : 2
+
+ Hardware : herring
+ Revision : 000b
+ Serial : 3833c77d6dc000ec
+ */
+ char buffer[4096];
+
+ do {
+ // open /proc/cpuinfo
+ int fd = TEMP_FAILURE_RETRY(open("/proc/cpuinfo", O_RDONLY));
+ if (fd < 0) {
+ SkDebugf("Could not open /proc/cpuinfo: %s\n", strerror(errno));
+ break;
+ }
+
+ // Read the file. To simplify our search, we're going to place two
+ // sentinel '\n' characters: one at the start of the buffer, and one at
+ // the end. This means we reserve the first and last buffer bytes.
+ buffer[0] = '\n';
+ int size = TEMP_FAILURE_RETRY(read(fd, buffer+1, sizeof(buffer)-2));
+ close(fd);
+
+ if (size < 0) { // should not happen
+ SkDebugf("Could not read /proc/cpuinfo: %s\n", strerror(errno));
+ break;
+ }
+
+ SkDebugf("START /proc/cpuinfo:\n%.*s\nEND /proc/cpuinfo\n",
+ size, buffer+1);
+
+ // Compute buffer limit, and place final sentinel
+ char* buffer_end = buffer + 1 + size;
+ buffer_end[0] = '\n';
+
+ // Now, find a line that starts with "Features", i.e. look for
+ // '\nFeatures ' in our buffer.
+ const char features[] = "\nFeatures\t";
+ const size_t features_len = sizeof(features)-1;
+
+ char* line = (char*) memmem(buffer, buffer_end - buffer,
+ features, features_len);
+ if (line == nullptr) { // Weird, no Features line, bad kernel?
+ SkDebugf("Could not find a line starting with 'Features'"
+ "in /proc/cpuinfo ?\n");
+ break;
+ }
+
+ line += features_len; // Skip the "\nFeatures\t" prefix
+
+ // Find the end of the current line
+ char* line_end = (char*) memchr(line, '\n', buffer_end - line);
+ if (line_end == nullptr)
+ line_end = buffer_end;
+
+ // Now find an instance of 'neon' in the flags list. We want to
+ // ensure it's only 'neon' and not something fancy like 'noneon'
+ // so check that it follows a space.
+ const char neon[] = " neon";
+ const size_t neon_len = sizeof(neon)-1;
+ const char* flag = (const char*) memmem(line, line_end - line,
+ neon, neon_len);
+ if (flag == nullptr)
+ break;
+
+ // Ensure it is followed by a space or a newline.
+ if (flag[neon_len] != ' ' && flag[neon_len] != '\n')
+ break;
+
+ // Fine, we support Arm NEON !
+ result = true;
+
+ } while (0);
+
+#endif // SK_BUILD_FOR_ANDROID
+
+ if (result) {
+ SkDEBUGF(("Device supports ARM NEON instructions!\n"));
+ } else {
+ SkDEBUGF(("Device does NOT support ARM NEON instructions!\n"));
+ }
+ return result;
+}
+
+static pthread_once_t sOnce;
+static bool sHasArmNeon;
+
+// called through pthread_once()
+void sk_cpu_arm_probe_features(void) {
+ sHasArmNeon = sk_cpu_arm_check_neon();
+}
+
+bool sk_cpu_arm_has_neon(void) {
+ pthread_once(&sOnce, sk_cpu_arm_probe_features);
+ return sHasArmNeon;
+}
+
+#endif // SK_ARM_NEON_IS_DYNAMIC
diff --git a/src/core/SkUtilsArm.h b/src/core/SkUtilsArm.h
index dde933bafa..317677115c 100644
--- a/src/core/SkUtilsArm.h
+++ b/src/core/SkUtilsArm.h
@@ -8,7 +8,6 @@
#ifndef SkUtilsArm_DEFINED
#define SkUtilsArm_DEFINED
-#include "SkCpu.h"
#include "SkUtils.h"
// Define SK_ARM_NEON_MODE to one of the following values
@@ -38,13 +37,18 @@
// is ARMv7-A and supports Neon instructions. In DYNAMIC mode, this actually
// probes the CPU at runtime (and caches the result).
-static inline bool sk_cpu_arm_has_neon(void) {
#if SK_ARM_NEON_IS_NONE
+static inline bool sk_cpu_arm_has_neon(void) {
return false;
-#else
- return SkCpu::Supports(SkCpu::NEON);
-#endif
}
+#elif SK_ARM_NEON_IS_ALWAYS
+static inline bool sk_cpu_arm_has_neon(void) {
+ return true;
+}
+#else // SK_ARM_NEON_IS_DYNAMIC
+
+extern bool sk_cpu_arm_has_neon(void) SK_PURE_FUNC;
+#endif
// Use SK_ARM_NEON_WRAP(symbol) to map 'symbol' to a NEON-specific symbol
// when applicable. This will transform 'symbol' differently depending on