aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar mtklein <mtklein@google.com>2016-04-15 08:40:22 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2016-04-15 08:40:23 -0700
commit86498fbfcb93a9048bbe1c28cc0df40d8d0c96e9 (patch)
treea579fdee9a3cf330050dbe7fd0a3a25d49783693
parent2c7f24093a394ccbe54a7db60ba79af14682e7fa (diff)
Revert of Move CPU feature detection to its own file. (patchset #7 id:120001 of https://codereview.chromium.org/1890483002/ )
Reason for revert: many unexpected GM diffs across GPU+CPU configs on Windows (hopefully just text masks on GPU?). seems like we pick a different srcover variant in some places. Original issue's description: > Move CPU feature detection to its own file. > > - Moves CPU feature detection to its own file. > - Cleans up some redundant feature detection scattered around core/ and opts/. > - Can now detect a few new CPU features: > * F16C -> Intel f16<->f32 instructions, added between AVX and AVX2 > * FMA -> Intel FMA instructions, added at the same time as AVX2 > * VFP_FP16 -> ARM f16<->f32 instructions, quite common > * NEON_FMA -> ARM FMA instructions, also quite common > * SSE and SSE3... why not? > > This new internal API makes it very cheap to do fine-grained runtime CPU > feature detection. Redundant calls to SkCpu::Supports() should be eliminated > and it's hoistable out of loops. It compiles away entirely when we have the > appropriate instructions available at compile time. > > This means we can call it to guard even a little snippet of 1 or 2 instructions > right where needed and let inlining hoist the check (if any at all) up to > somewhere that doesn't hurt performance. I've explained how I made this work > in the private section of the new header. > > Once this lands and bakes a bit, I'll start following up with CLs to use it more > and to add a bunch of those little 1-2 instruction snippets we've been wanting, > e.g. cvtps2ph, cvtph2ps, ptest, pmulld, pmovzxbd, blendvps, pshufb, roundps > (for floor) on x86, and vcvt.f32.f16, vcvt.f16.f32 on ARM. > > BUG=skia: > GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1890483002 > CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot > > Committed: https://skia.googlesource.com/skia/+/872ea29357439f05b1f6995dd300fc054733e607 TBR=fmalita@chromium.org,herb@google.com,reed@google.com,mtklein@chromium.org # Skipping CQ checks because original CL landed less than 1 days ago. NOPRESUBMIT=true NOTREECHECKS=true NOTRY=true BUG=skia: Review URL: https://codereview.chromium.org/1892643003
-rw-r--r--gyp/core.gyp7
-rw-r--r--gyp/core.gypi2
-rw-r--r--src/core/SkCpu.cpp90
-rw-r--r--src/core/SkCpu.h123
-rw-r--r--src/core/SkOpts.cpp62
-rw-r--r--src/core/SkUtilsArm.cpp139
-rw-r--r--src/core/SkUtilsArm.h14
-rw-r--r--src/opts/opts_check_x86.cpp110
8 files changed, 307 insertions, 240 deletions
diff --git a/gyp/core.gyp b/gyp/core.gyp
index b0cbbf9bb0..13b8217fb1 100644
--- a/gyp/core.gyp
+++ b/gyp/core.gyp
@@ -87,6 +87,13 @@
'android_deps.gyp:cpu_features',
],
}],
+ [ 'skia_arch_type == "arm"', {
+ # The code in SkUtilsArm.cpp can be used on an ARM-based Linux system, not only Android.
+ 'sources': [
+ '../src/core/SkUtilsArm.cpp',
+ '../src/core/SkUtilsArm.h',
+ ],
+ }],
['skia_gpu == 1', {
'include_dirs': [
'../include/gpu',
diff --git a/gyp/core.gypi b/gyp/core.gypi
index b08e25a0e6..efc9c0dbfa 100644
--- a/gyp/core.gypi
+++ b/gyp/core.gypi
@@ -85,8 +85,6 @@
'<(skia_src_path)/core/SkConvolver.cpp',
'<(skia_src_path)/core/SkConvolver.h',
'<(skia_src_path)/core/SkCoreBlitters.h',
- '<(skia_src_path)/core/SkCpu.cpp',
- '<(skia_src_path)/core/SkCpu.h',
'<(skia_src_path)/core/SkCubicClipper.cpp',
'<(skia_src_path)/core/SkCubicClipper.h',
'<(skia_src_path)/core/SkData.cpp',
diff --git a/src/core/SkCpu.cpp b/src/core/SkCpu.cpp
deleted file mode 100644
index 4030fce2e1..0000000000
--- a/src/core/SkCpu.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright 2016 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#include "SkCpu.h"
-#include "SkOncePtr.h"
-
-#if defined(SK_CPU_X86)
- #if defined(SK_BUILD_FOR_WIN32)
- #include <intrin.h>
- static void cpuid (uint32_t abcd[4]) { __cpuid ((int*)abcd, 1); }
- static void cpuid7(uint32_t abcd[4]) { __cpuidex((int*)abcd, 7, 0); }
- static uint64_t xgetbv(uint32_t xcr) { return _xgetbv(xcr); }
- #else
- #include <cpuid.h>
- #if !defined(__cpuid_count) // Old Mac Clang doesn't have this defined.
- #define __cpuid_count(eax, ecx, a, b, c, d) \
- __asm__("cpuid" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eax), "2"(ecx))
- #endif
- static void cpuid (uint32_t abcd[4]) { __get_cpuid(1, abcd+0, abcd+1, abcd+2, abcd+3); }
- static void cpuid7(uint32_t abcd[4]) {
- __cpuid_count(7, 0, abcd[0], abcd[1], abcd[2], abcd[3]);
- }
- static uint64_t xgetbv(uint32_t xcr) {
- uint32_t eax, edx;
- __asm__ __volatile__ ( "xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
- return (uint64_t)(edx) << 32 | eax;
- }
- #endif
-
- static uint32_t read_cpu_features() {
- uint32_t features = 0;
- uint32_t abcd[4] = {0,0,0,0};
-
- // You might want to refer to http://www.sandpile.org/x86/cpuid.htm
-
- cpuid(abcd);
- if (abcd[3] & (1<<25)) { features |= SkCpu:: SSE1; }
- if (abcd[3] & (1<<26)) { features |= SkCpu:: SSE2; }
- if (abcd[2] & (1<< 0)) { features |= SkCpu:: SSE3; }
- if (abcd[2] & (1<< 9)) { features |= SkCpu::SSSE3; }
- if (abcd[2] & (1<<19)) { features |= SkCpu::SSE41; }
- if (abcd[2] & (1<<20)) { features |= SkCpu::SSE42; }
-
- if ((abcd[2] & (3<<26)) == (3<<26) && (xgetbv(0) & 6) == 6) { // XSAVE + OSXSAVE
- if (abcd[2] & (1<<28)) { features |= SkCpu:: AVX; }
- if (abcd[2] & (1<<29)) { features |= SkCpu::F16C; }
- if (abcd[2] & (1<<12)) { features |= SkCpu:: FMA; }
-
- cpuid7(abcd);
- if (abcd[1] & (1<<5)) { features |= SkCpu::AVX2; }
- }
- return features;
- }
-
-#elif defined(SK_CPU_ARM32) && \
- defined(SK_BUILD_FOR_ANDROID) && \
- !defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
- #include <cpu-features.h>
-
- static uint32_t read_cpu_features() {
- uint32_t features = 0;
-
- uint64_t android_features = android_getCpuFeatures();
- if (android_features & ANDROID_CPU_ARM_FEATURE_NEON ) { features |= SkCpu::NEON ; }
- if (android_features & ANDROID_CPU_ARM_FEATURE_NEON_FMA) { features |= SkCpu::NEON_FMA; }
- if (android_features & ANDROID_CPU_ARM_FEATURE_VFP_FP16) { features |= SkCpu::VFP_FP16; }
- return features;
- }
-
-#else
- static uint32_t read_cpu_features() {
- return 0;
- }
-
-#endif
-
-#if defined(__GNUC__) || defined(__clang__)
- SK_DECLARE_STATIC_ONCE_PTR(uint32_t, gCachedCpuFeatures);
- uint32_t SkCpu::RuntimeCpuFeatures() {
- return *gCachedCpuFeatures.get([]{ return new uint32_t{read_cpu_features()}; });
- }
-
-#else
- const uint32_t SkCpu::gCachedCpuFeatures = read_cpu_features();
-
-#endif
diff --git a/src/core/SkCpu.h b/src/core/SkCpu.h
deleted file mode 100644
index 2a41d37b16..0000000000
--- a/src/core/SkCpu.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright 2016 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef SkCpu_DEFINED
-#define SkCpu_DEFINED
-
-#include "SkTypes.h"
-
-struct SkCpu {
- enum {
- SSE1 = 1 << 0,
- SSE2 = 1 << 1,
- SSE3 = 1 << 2,
- SSSE3 = 1 << 3,
- SSE41 = 1 << 4,
- SSE42 = 1 << 5,
- AVX = 1 << 6,
- F16C = 1 << 7,
- FMA = 1 << 8,
- AVX2 = 1 << 9,
- };
- enum {
- NEON = 1 << 0,
- NEON_FMA = 1 << 1,
- VFP_FP16 = 1 << 2,
- };
-
- static bool Supports(uint32_t);
-
-private:
- // Consider a loop like this that expands 16-bit floats out to 32-bit, does math, and repacks:
- // for (int i = 0; i < N; i++) {
- // if (SkCpu::Supports(SkCpu::F16C)) {
- // f32s = SkCpu::F16C_cvtph_ps(f16s);
- // } else {
- // f32s = some_slower_f16_to_f32_routine(f16s);
- // }
- //
- // ... do some math with f32s ...
- //
- // if (SkCpu::Supports(SkCpu::F16C)) {
- // f16s = SkCpu::F16C_cvtps_ph(f32s);
- // } else {
- // f16s = some_slower_f32_to_f16_routine(f32s);
- // }
- // }
- //
- // We would like SkCpu::Supports() to participate in common sub-expression elimination,
- // so that it's called exactly 1 time, rather than N or 2N times. This is especially
- // important when the if-else blocks you see above are really inline functions.
- //
- // The key to this is to make sure to implement RuntimeCpuFeatures() with the same
- // capacity for common sub-expression elimination.
- //
- // __attribute__((const)) works perfectly when available.
- //
- // When it's not (MSVC), we fall back to a static initializer.
- // (Static intializers would work fine everywhere, but Chrome really dislikes them.)
-
-#if defined(__GNUC__) || defined(__clang__) // i.e. GCC, Clang, or clang-cl
- __attribute__((const))
- static uint32_t RuntimeCpuFeatures();
-#else
- static const uint32_t gCachedCpuFeatures;
- static uint32_t RuntimeCpuFeatures() {
- return gCachedCpuFeatures;
- }
-#endif
-};
-
-inline bool SkCpu::Supports(uint32_t mask) {
- uint32_t features = RuntimeCpuFeatures();
-
- // If we mask in compile-time known lower limits, the compiler can completely
- // drop many calls to RuntimeCpuFeatures().
-#if SK_CPU_X86
- #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1
- features |= SSE1;
- #endif
- #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
- features |= SSE2;
- #endif
- #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE3
- features |= SSE3;
- #endif
- #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
- features |= SSSE3;
- #endif
- #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
- features |= SSE41;
- #endif
- #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42
- features |= SSE42;
- #endif
- #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX
- features |= AVX;
- #endif
- // F16C goes here if we add SK_CPU_SSE_LEVEL_F16C
- #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
- features |= AVX2;
- #endif
- // FMA doesn't fit neatly into this total ordering.
- // It's available on Haswell+ just like AVX2, but it's technically a different bit.
- // TODO: circle back on this if we find ourselves limited by lack of compile-time FMA
-
-#else
- #if defined(SK_ARM_HAS_NEON)
- features |= NEON;
- #endif
-
- #if defined(SK_CPU_ARM64)
- features |= NEON|NEON_FMA|VFP_FP16;
- #endif
-
-#endif
- return (features & mask) == mask;
-}
-
-#endif//SkCpu_DEFINED
diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp
index d97a680846..570e329094 100644
--- a/src/core/SkOpts.cpp
+++ b/src/core/SkOpts.cpp
@@ -5,7 +5,6 @@
* found in the LICENSE file.
*/
-#include "SkCpu.h"
#include "SkHalf.h"
#include "SkOnce.h"
#include "SkOpts.h"
@@ -33,6 +32,35 @@ namespace SK_OPTS_NS {
}
}
+#if defined(SK_CPU_X86) && !defined(SK_BUILD_FOR_IOS)
+ #if defined(SK_BUILD_FOR_WIN32)
+ #include <intrin.h>
+ static void cpuid (uint32_t abcd[4]) { __cpuid ((int*)abcd, 1); }
+ static void cpuid7(uint32_t abcd[4]) { __cpuidex((int*)abcd, 7, 0); }
+ static uint64_t xgetbv(uint32_t xcr) { return _xgetbv(xcr); }
+ #else
+ #include <cpuid.h>
+ #if !defined(__cpuid_count) // Old Mac Clang doesn't have this defined.
+ #define __cpuid_count(eax, ecx, a, b, c, d) \
+ __asm__("cpuid" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eax), "2"(ecx))
+ #endif
+ static void cpuid (uint32_t abcd[4]) { __get_cpuid(1, abcd+0, abcd+1, abcd+2, abcd+3); }
+ static void cpuid7(uint32_t abcd[4]) {
+ __cpuid_count(7, 0, abcd[0], abcd[1], abcd[2], abcd[3]);
+ }
+ static uint64_t xgetbv(uint32_t xcr) {
+ uint32_t eax, edx;
+ __asm__ __volatile__ ( "xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
+ return (uint64_t)(edx) << 32 | eax;
+ }
+ #endif
+#elif !defined(SK_ARM_HAS_NEON) && \
+ defined(SK_CPU_ARM32) && \
+ defined(SK_BUILD_FOR_ANDROID) && \
+ !defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
+ #include <cpu-features.h>
+#endif
+
namespace SkOpts {
// Define default function pointer values here...
@@ -84,16 +112,28 @@ namespace SkOpts {
static void init() {
// TODO: Chrome's not linking _sse* opts on iOS simulator builds. Bug or feature?
#if defined(SK_CPU_X86) && !defined(SK_BUILD_FOR_IOS)
- if (SkCpu::Supports(SkCpu::SSSE3)) { Init_ssse3(); }
- if (SkCpu::Supports(SkCpu::SSE41)) { Init_sse41(); }
- if (SkCpu::Supports(SkCpu::SSE42)) { Init_sse42(); }
- if (SkCpu::Supports(SkCpu::AVX )) { Init_avx(); }
- if (SkCpu::Supports(SkCpu::AVX2 )) { Init_avx2(); }
-
- #elif defined(SK_CPU_ARM32) && \
- defined(SK_BUILD_FOR_ANDROID) && \
- !defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
- if (SkCpu::Supports(SkCpu::NEON)) { Init_neon(); }
+ uint32_t abcd[] = {0,0,0,0};
+ cpuid(abcd);
+ if (abcd[2] & (1<< 9)) { Init_ssse3(); }
+ if (abcd[2] & (1<<19)) { Init_sse41(); }
+ if (abcd[2] & (1<<20)) { Init_sse42(); }
+
+ // AVX detection's kind of a pain. This is cribbed from Chromium.
+ if ( ( abcd[2] & (7<<26)) == (7<<26) && // Check bits 26-28 of ecx are all set,
+ (xgetbv(0) & 6 ) == 6 ){ // and check the OS supports XSAVE.
+ Init_avx();
+
+ // AVX2 additionally needs bit 5 set on ebx after calling cpuid(7).
+ uint32_t abcd7[] = {0,0,0,0};
+ cpuid7(abcd7);
+ if (abcd7[1] & (1<<5)) { Init_avx2(); }
+ }
+
+ #elif !defined(SK_ARM_HAS_NEON) && \
+ defined(SK_CPU_ARM32) && \
+ defined(SK_BUILD_FOR_ANDROID) && \
+ !defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
+ if (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) { Init_neon(); }
#endif
}
diff --git a/src/core/SkUtilsArm.cpp b/src/core/SkUtilsArm.cpp
index c29938fdfc..bf98fed476 100644
--- a/src/core/SkUtilsArm.cpp
+++ b/src/core/SkUtilsArm.cpp
@@ -5,4 +5,141 @@
* found in the LICENSE file.
*/
-// This file no longer needs to exist, but it's still referenced by Chrome's GYP / GN builds.
+#include "SkUtilsArm.h"
+
+#if SK_ARM_NEON_IS_DYNAMIC
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+#include <pthread.h>
+
+#if defined(SK_BUILD_FOR_ANDROID)
+# include <cpu-features.h>
+#endif
+
+// A function used to determine at runtime if the target CPU supports
+// the ARM NEON instruction set. This implementation is Linux-specific.
+static bool sk_cpu_arm_check_neon(void) {
+ // If we fail any of the following, assume we don't have NEON instructions
+ // This allows us to return immediately in case of error.
+ bool result = false;
+
+// Use the Android NDK's cpu-features helper library to detect NEON at runtime.
+// See http://crbug.com/164154 to see why this is needed in Chromium for Android.
+#ifdef SK_BUILD_FOR_ANDROID
+
+ result = (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) != 0;
+
+#else // SK_BUILD_FOR_ANDROID
+
+ // There is no user-accessible CPUID instruction on ARM that we can use.
+ // Instead, we must parse /proc/cpuinfo and look for the 'neon' feature.
+ // For example, here's a typical output (Nexus S running ICS 4.0.3):
+ /*
+ Processor : ARMv7 Processor rev 2 (v7l)
+ BogoMIPS : 994.65
+ Features : swp half thumb fastmult vfp edsp thumbee neon vfpv3
+ CPU implementer : 0x41
+ CPU architecture: 7
+ CPU variant : 0x2
+ CPU part : 0xc08
+ CPU revision : 2
+
+ Hardware : herring
+ Revision : 000b
+ Serial : 3833c77d6dc000ec
+ */
+ char buffer[4096];
+
+ do {
+ // open /proc/cpuinfo
+ int fd = TEMP_FAILURE_RETRY(open("/proc/cpuinfo", O_RDONLY));
+ if (fd < 0) {
+ SkDebugf("Could not open /proc/cpuinfo: %s\n", strerror(errno));
+ break;
+ }
+
+ // Read the file. To simplify our search, we're going to place two
+ // sentinel '\n' characters: one at the start of the buffer, and one at
+ // the end. This means we reserve the first and last buffer bytes.
+ buffer[0] = '\n';
+ int size = TEMP_FAILURE_RETRY(read(fd, buffer+1, sizeof(buffer)-2));
+ close(fd);
+
+ if (size < 0) { // should not happen
+ SkDebugf("Could not read /proc/cpuinfo: %s\n", strerror(errno));
+ break;
+ }
+
+ SkDebugf("START /proc/cpuinfo:\n%.*s\nEND /proc/cpuinfo\n",
+ size, buffer+1);
+
+ // Compute buffer limit, and place final sentinel
+ char* buffer_end = buffer + 1 + size;
+ buffer_end[0] = '\n';
+
+ // Now, find a line that starts with "Features", i.e. look for
+ // '\nFeatures ' in our buffer.
+ const char features[] = "\nFeatures\t";
+ const size_t features_len = sizeof(features)-1;
+
+ char* line = (char*) memmem(buffer, buffer_end - buffer,
+ features, features_len);
+ if (line == nullptr) { // Weird, no Features line, bad kernel?
+ SkDebugf("Could not find a line starting with 'Features'"
+ "in /proc/cpuinfo ?\n");
+ break;
+ }
+
+ line += features_len; // Skip the "\nFeatures\t" prefix
+
+ // Find the end of the current line
+ char* line_end = (char*) memchr(line, '\n', buffer_end - line);
+ if (line_end == nullptr)
+ line_end = buffer_end;
+
+ // Now find an instance of 'neon' in the flags list. We want to
+ // ensure it's only 'neon' and not something fancy like 'noneon'
+ // so check that it follows a space.
+ const char neon[] = " neon";
+ const size_t neon_len = sizeof(neon)-1;
+ const char* flag = (const char*) memmem(line, line_end - line,
+ neon, neon_len);
+ if (flag == nullptr)
+ break;
+
+ // Ensure it is followed by a space or a newline.
+ if (flag[neon_len] != ' ' && flag[neon_len] != '\n')
+ break;
+
+ // Fine, we support Arm NEON !
+ result = true;
+
+ } while (0);
+
+#endif // SK_BUILD_FOR_ANDROID
+
+ if (result) {
+ SkDEBUGF(("Device supports ARM NEON instructions!\n"));
+ } else {
+ SkDEBUGF(("Device does NOT support ARM NEON instructions!\n"));
+ }
+ return result;
+}
+
+static pthread_once_t sOnce;
+static bool sHasArmNeon;
+
+// called through pthread_once()
+void sk_cpu_arm_probe_features(void) {
+ sHasArmNeon = sk_cpu_arm_check_neon();
+}
+
+bool sk_cpu_arm_has_neon(void) {
+ pthread_once(&sOnce, sk_cpu_arm_probe_features);
+ return sHasArmNeon;
+}
+
+#endif // SK_ARM_NEON_IS_DYNAMIC
diff --git a/src/core/SkUtilsArm.h b/src/core/SkUtilsArm.h
index dde933bafa..317677115c 100644
--- a/src/core/SkUtilsArm.h
+++ b/src/core/SkUtilsArm.h
@@ -8,7 +8,6 @@
#ifndef SkUtilsArm_DEFINED
#define SkUtilsArm_DEFINED
-#include "SkCpu.h"
#include "SkUtils.h"
// Define SK_ARM_NEON_MODE to one of the following values
@@ -38,13 +37,18 @@
// is ARMv7-A and supports Neon instructions. In DYNAMIC mode, this actually
// probes the CPU at runtime (and caches the result).
-static inline bool sk_cpu_arm_has_neon(void) {
#if SK_ARM_NEON_IS_NONE
+static inline bool sk_cpu_arm_has_neon(void) {
return false;
-#else
- return SkCpu::Supports(SkCpu::NEON);
-#endif
}
+#elif SK_ARM_NEON_IS_ALWAYS
+static inline bool sk_cpu_arm_has_neon(void) {
+ return true;
+}
+#else // SK_ARM_NEON_IS_DYNAMIC
+
+extern bool sk_cpu_arm_has_neon(void) SK_PURE_FUNC;
+#endif
// Use SK_ARM_NEON_WRAP(symbol) to map 'symbol' to a NEON-specific symbol
// when applicable. This will transform 'symbol' differently depending on
diff --git a/src/opts/opts_check_x86.cpp b/src/opts/opts_check_x86.cpp
index 2cb2e65bf1..e0e84f3786 100644
--- a/src/opts/opts_check_x86.cpp
+++ b/src/opts/opts_check_x86.cpp
@@ -12,7 +12,6 @@
#include "SkBlitMask.h"
#include "SkBlitRow.h"
#include "SkBlitRow_opts_SSE2.h"
-#include "SkCpu.h"
#include "SkOncePtr.h"
#include "SkRTConf.h"
@@ -29,16 +28,111 @@
*/
+#if defined(_MSC_VER) && defined(_WIN64)
+#include <intrin.h>
+#endif
+
/* This file must *not* be compiled with -msse or any other optional SIMD
extension, otherwise gcc may generate SIMD instructions even for scalar ops
(and thus give an invalid instruction on Pentium3 on the code below).
For example, only files named *_SSE2.cpp in this directory should be
compiled with -msse2 or higher. */
+
+/* Function to get the CPU SSE-level in runtime, for different compilers. */
+#ifdef _MSC_VER
+static inline void getcpuid(int info_type, int info[4]) {
+#if defined(_WIN64)
+ __cpuid(info, info_type);
+#else
+ __asm {
+ mov eax, [info_type]
+ cpuid
+ mov edi, [info]
+ mov [edi], eax
+ mov [edi+4], ebx
+ mov [edi+8], ecx
+ mov [edi+12], edx
+ }
+#endif
+}
+#elif defined(__x86_64__)
+static inline void getcpuid(int info_type, int info[4]) {
+ asm volatile (
+ "cpuid \n\t"
+ : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
+ : "a"(info_type)
+ );
+}
+#else
+static inline void getcpuid(int info_type, int info[4]) {
+ // We save and restore ebx, so this code can be compatible with -fPIC
+ asm volatile (
+ "pushl %%ebx \n\t"
+ "cpuid \n\t"
+ "movl %%ebx, %1 \n\t"
+ "popl %%ebx \n\t"
+ : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
+ : "a"(info_type)
+ );
+}
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+
+/* Fetch the SIMD level directly from the CPU, at run-time.
+ * Only checks the levels needed by the optimizations in this file.
+ */
+static int* get_SIMD_level() {
+ int cpu_info[4] = { 0, 0, 0, 0 };
+ getcpuid(1, cpu_info);
+
+ int* level = new int;
+
+ if ((cpu_info[2] & (1<<20)) != 0) {
+ *level = SK_CPU_SSE_LEVEL_SSE42;
+ } else if ((cpu_info[2] & (1<<19)) != 0) {
+ *level = SK_CPU_SSE_LEVEL_SSE41;
+ } else if ((cpu_info[2] & (1<<9)) != 0) {
+ *level = SK_CPU_SSE_LEVEL_SSSE3;
+ } else if ((cpu_info[3] & (1<<26)) != 0) {
+ *level = SK_CPU_SSE_LEVEL_SSE2;
+ } else {
+ *level = 0;
+ }
+ return level;
+}
+
+SK_DECLARE_STATIC_ONCE_PTR(int, gSIMDLevel);
+
+/* Verify that the requested SIMD level is supported in the build.
+ * If not, check if the platform supports it.
+ */
+static inline bool supports_simd(int minLevel) {
+#if defined(SK_CPU_SSE_LEVEL)
+ if (minLevel <= SK_CPU_SSE_LEVEL) {
+ return true;
+ } else
+#endif
+ {
+#if defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
+ /* For the Android framework we should always know at compile time if the device
+ * we are building for supports SSSE3. The one exception to this rule is on the
+ * emulator where we are compiled without the -mssse3 option (so we have no
+ * SSSE3 procs) but can be run on a host machine that supports SSSE3
+ * instructions. So for that particular case we disable our SSSE3 options.
+ */
+ return false;
+#else
+ return minLevel <= *gSIMDLevel.get(get_SIMD_level);
+#endif
+ }
+}
+
////////////////////////////////////////////////////////////////////////////////
void SkBitmapScaler::PlatformConvolutionProcs(SkConvolutionProcs* procs) {
- if (SkCpu::Supports(SkCpu::SSE2)) {
+ if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
procs->fExtraHorizontalReads = 3;
procs->fConvolveVertically = &convolveVertically_SSE2;
procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2;
@@ -51,10 +145,10 @@ void SkBitmapScaler::PlatformConvolutionProcs(SkConvolutionProcs* procs) {
void SkBitmapProcState::platformProcs() {
/* Every optimization in the function requires at least SSE2 */
- if (!SkCpu::Supports(SkCpu::SSE2)) {
+ if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
return;
}
- const bool ssse3 = SkCpu::Supports(SkCpu::SSSE3);
+ const bool ssse3 = supports_simd(SK_CPU_SSE_LEVEL_SSSE3);
/* Check fSampleProc32 */
if (fSampleProc32 == S32_opaque_D32_filter_DX) {
@@ -105,7 +199,7 @@ static const SkBlitRow::Proc16 platform_16_procs[] = {
};
SkBlitRow::Proc16 SkBlitRow::PlatformFactory565(unsigned flags) {
- if (SkCpu::Supports(SkCpu::SSE2)) {
+ if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
return platform_16_procs[flags];
} else {
return nullptr;
@@ -123,7 +217,7 @@ SkBlitRow::ColorProc16 SkBlitRow::PlatformColorFactory565(unsigned flags) {
* SSE2 version on Silvermont, and only marginally faster on a Core i7,
* mainly due to the MULLD timings.
*/
- if (SkCpu::Supports(SkCpu::SSE2)) {
+ if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
return platform_565_colorprocs_SSE2[flags];
} else {
return nullptr;
@@ -138,7 +232,7 @@ static const SkBlitRow::Proc32 platform_32_procs_SSE2[] = {
};
SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
- if (SkCpu::Supports(SkCpu::SSE2)) {
+ if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
return platform_32_procs_SSE2[flags];
} else {
return nullptr;
@@ -148,7 +242,7 @@ SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
////////////////////////////////////////////////////////////////////////////////
SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
- if (SkCpu::Supports(SkCpu::SSE2)) {
+ if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
if (isOpaque) {
return SkBlitLCD16OpaqueRow_SSE2;
} else {