diff options
author | Pete Warden <petewarden@google.com> | 2016-12-14 08:10:42 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2016-12-14 08:25:51 -0800 |
commit | cf86fd10ee16a048b646a96760bea475cb989473 (patch) | |
tree | 0dffe22379ab31ec502acb211a364c186a550c11 | |
parent | 72db22494e491cdf7b18ea9736b3d2fb87b6b28c (diff) |
Automated rollback of change 141971738
Change: 142018466
-rw-r--r-- | tensorflow/core/platform/cpu_feature_guard.cc | 100 | ||||
-rw-r--r-- | tensorflow/core/platform/cpu_info.h | 58 | ||||
-rw-r--r-- | tensorflow/core/platform/port_test.cc | 10 | ||||
-rw-r--r-- | tensorflow/core/platform/posix/port.cc | 291 | ||||
-rw-r--r-- | tensorflow/core/platform/windows/port.cc | 5 |
5 files changed, 12 insertions, 452 deletions
diff --git a/tensorflow/core/platform/cpu_feature_guard.cc b/tensorflow/core/platform/cpu_feature_guard.cc deleted file mode 100644 index 097489b0fa..0000000000 --- a/tensorflow/core/platform/cpu_feature_guard.cc +++ /dev/null @@ -1,100 +0,0 @@ -/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/platform/cpu_info.h" -#include "tensorflow/core/platform/logging.h" - -namespace tensorflow { -namespace port { -namespace { - -// Raises an error if the binary has been compiled for a CPU feature (like AVX) -// that isn't available on the current machine. It also warns of performance -// loss if there's a feature available that's not being used. -// Depending on the compiler and initialization order, a SIGILL exception may -// occur before this code is reached, but this at least offers a chance to give -// a more meaningful error message. -class CPUFeatureGuard { - public: - CPUFeatureGuard() { -#ifdef __SSE__ - CheckFeatureOrDie(CPUFeature::SSE, "SSE"); -#else - WarnIfFeatureUnused(CPUFeature::SSE, "SSE"); -#endif // __SSE__ -#ifdef __SSE2__ - CheckFeatureOrDie(CPUFeature::SSE2, "SSE2"); -#else - WarnIfFeatureUnused(CPUFeature::SSE2, "SSE2"); -#endif // __SSE2__ -#ifdef __SSE3__ - CheckFeatureOrDie(CPUFeature::SSE3, "SSE3"); -#else - WarnIfFeatureUnused(CPUFeature::SSE3, "SSE3"); -#endif // __SSE3__ -#ifdef __SSE4_1__ - CheckFeatureOrDie(CPUFeature::SSE4_1, "SSE4.1"); -#else - WarnIfFeatureUnused(CPUFeature::SSE4_1, "SSE4.1"); -#endif // __SSE4_1__ -#ifdef __SSE4_2__ - CheckFeatureOrDie(CPUFeature::SSE4_2, "SSE4.2"); -#else - WarnIfFeatureUnused(CPUFeature::SSE4_2, "SSE4.2"); -#endif // __SSE4_2__ -#ifdef __AVX__ - CheckFeatureOrDie(CPUFeature::AVX, "AVX"); -#else - WarnIfFeatureUnused(CPUFeature::AVX, "AVX"); -#endif // __AVX__ -#ifdef __AVX2__ - CheckFeatureOrDie(CPUFeature::AVX2, "AVX2"); -#else - WarnIfFeatureUnused(CPUFeature::AVX2, "AVX2"); -#endif // __AVX2__ -#ifdef __AVX512F__ - CheckFeatureOrDie(CPUFeature::AVX512F, "AVX512F"); -#else - WarnIfFeatureUnused(CPUFeature::AVX512F, "AVX512F"); -#endif // __AVX512F__ -#ifdef __FMA__ - CheckFeatureOrDie(CPUFeature::FMA, "FMA"); -#else - WarnIfFeatureUnused(CPUFeature::FMA, "FMA"); -#endif // __FMA__ - } - - void CheckFeatureOrDie(CPUFeature feature, const string& feature_name) { - if (!TestCPUFeature(feature)) { - LOG(FATAL) - << "The TensorFlow library was compiled to use " << feature_name - << " instructions, but these aren't available on your machine."; - } - } - - void WarnIfFeatureUnused(CPUFeature feature, const string& feature_name) { - if (TestCPUFeature(feature)) { - LOG(WARNING) << "The TensorFlow library wasn't compiled to use " - << feature_name - << " instructions, but these are available on your machine " - "and could speed up CPU computations."; - } - } -}; - -CPUFeatureGuard g_cpu_feature_guard_singleton; -} // namespace -} // namespace port -} // namespace tensorflow diff --git a/tensorflow/core/platform/cpu_info.h b/tensorflow/core/platform/cpu_info.h index 215bdae226..706dc4dcc5 100644 --- a/tensorflow/core/platform/cpu_info.h +++ b/tensorflow/core/platform/cpu_info.h @@ -28,64 +28,6 @@ static const bool kLittleEndian = true; // software can change it dynamically. int NumSchedulableCPUs(); -// Mostly ISA related features that we care about -enum CPUFeature { - // Do not change numeric assignments. - MMX = 0, - SSE = 1, - SSE2 = 2, - SSE3 = 3, - SSSE3 = 4, - SSE4_1 = 5, - SSE4_2 = 6, - CMOV = 7, - CMPXCHG8B = 8, - CMPXCHG16B = 9, - POPCNT = 10, - AES = 11, - AVX = 12, - RDRAND = 13, - AVX2 = 14, - FMA = 15, - F16C = 16, - PCLMULQDQ = 17, - RDSEED = 18, - ADX = 19, - SMAP = 20, - - // Prefetch Vector Data Into Caches with Intent to Write and T1 Hint - // http://www.felixcloutier.com/x86/PREFETCHWT1.html. - // You probably want PREFETCHW instead. - PREFETCHWT1 = 21, - - BMI1 = 22, - BMI2 = 23, - HYPERVISOR = 25, // 0 when on a real CPU, 1 on (well-behaved) hypervisor. - - // Prefetch Data into Caches in Anticipation of a Write (3D Now!). - // http://www.felixcloutier.com/x86/PREFETCHW.html - PREFETCHW = 26, - - // AVX-512: 512-bit vectors (plus masking, etc.) in Knights Landing, - // Skylake - // Xeon, etc.; each of these entries is a different subset of - // instructions, - // various combinations of which occur on various CPU types. - AVX512F = 27, // Foundation - AVX512CD = 28, // Conflict detection - AVX512ER = 29, // Exponential and reciprocal - AVX512PF = 30, // Prefetching - AVX512VL = 31, // Shorter vector lengths - AVX512BW = 32, // Byte and word - AVX512DQ = 33, // Dword and qword - AVX512VBMI = 34, // Bit manipulation - AVX512IFMA = 35, // Integer multiply-add - AVX512_4VNNIW = 36, // Integer neural network - AVX512_4FMAPS = 37, // Floating point neural network -}; - -bool TestCPUFeature(CPUFeature feature); - } // namespace port } // namespace tensorflow diff --git a/tensorflow/core/platform/port_test.cc b/tensorflow/core/platform/port_test.cc index 8d98eb25a2..402c718e4f 100644 --- a/tensorflow/core/platform/port_test.cc +++ b/tensorflow/core/platform/port_test.cc @@ -15,7 +15,6 @@ limitations under the License. #include <condition_variable> #include "tensorflow/core/lib/core/threadpool.h" -#include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/mem.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/test.h" @@ -67,14 +66,5 @@ TEST(ConditionVariable, WaitForMilliseconds_Signalled) { EXPECT_LT(finish - start, 3); } -TEST(TestCPUFeature, TestFeature) { - // We don't know what the result should be on this platform, so just make - // sure it's callable. - const bool has_avx = TestCPUFeature(CPUFeature::AVX); - LOG(INFO) << "has_avx = " << has_avx; - const bool has_avx2 = TestCPUFeature(CPUFeature::AVX2); - LOG(INFO) << "has_avx2 = " << has_avx2; -} - } // namespace port } // namespace tensorflow diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc index 4498525505..a7c797ebf2 100644 --- a/tensorflow/core/platform/posix/port.cc +++ b/tensorflow/core/platform/posix/port.cc @@ -13,17 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// SIMD extension querying is only available on x86. -#if defined(__x86_64__) || defined(__amd64__) -#define PORT_IS_X86 -#endif - -#include "tensorflow/core/platform/cpu_info.h" -#include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" -#if defined(PORT_IS_X86) -#include <mutex> // NOLINT -#endif #if defined(__linux__) && !defined(__ANDROID__) #include <sched.h> #endif @@ -38,259 +28,10 @@ limitations under the License. #include <thread> #endif -#ifdef PORT_IS_X86 -#define GETCPUID(a, b, c, d, a_inp, c_inp) \ - asm("mov %%rbx, %%rdi\n" \ - "cpuid\n" \ - "xchg %%rdi, %%rbx\n" \ - : "=a"(a), "=D"(b), "=c"(c), "=d"(d) \ - : "a"(a_inp), "2"(c_inp)) -#endif - namespace tensorflow { namespace port { -namespace { - -#ifdef PORT_IS_X86 -class CPUIDInfo; -void InitCPUIDInfo(); - -CPUIDInfo *cpuid = nullptr; - -int GetXCR0EAX() { - int eax, edx; - asm("XGETBV" : "=a"(eax), "=d"(edx) : "c"(0)); - return eax; -} - -// Structure for basic CPUID info -struct CPUIDInfo { - CPUIDInfo() - : have_adx_(0), - have_aes_(0), - have_avx_(0), - have_avx2_(0), - have_avx512f_(0), - have_avx512cd_(0), - have_avx512er_(0), - have_avx512pf_(0), - have_avx512vl_(0), - have_avx512bw_(0), - have_avx512dq_(0), - have_avx512vbmi_(0), - have_avx512ifma_(0), - have_avx512_4vnniw_(0), - have_avx512_4fmaps_(0), - have_bmi1_(0), - have_bmi2_(0), - have_cmov_(0), - have_cmpxchg16b_(0), - have_cmpxchg8b_(0), - have_f16c_(0), - have_fma_(0), - have_mmx_(0), - have_pclmulqdq_(0), - have_popcnt_(0), - have_prefetchw_(0), - have_prefetchwt1_(0), - have_rdrand_(0), - have_rdseed_(0), - have_smap_(0), - have_sse_(0), - have_sse2_(0), - have_sse3_(0), - have_sse4_1_(0), - have_sse4_2_(0), - have_ssse3_(0), - have_hypervisor_(0) {} - - static void Initialize() { - // Initialize cpuid struct - CHECK(cpuid == NULL) << __func__ << " ran more than once"; - cpuid = new CPUIDInfo; - - uint32 eax, ebx, ecx, edx; - - // To get general information and extended features we send eax = 1 and - // ecx = 0 to cpuid. The response is returned in eax, ebx, ecx and edx. - // (See Intel 64 and IA-32 Architectures Software Developer's Manual - // Volume 2A: Instruction Set Reference, A-M CPUID). - GETCPUID(eax, ebx, ecx, edx, 1, 0); - - cpuid->have_aes_ = (ecx >> 25) & 0x1; - cpuid->have_cmov_ = (edx >> 15) & 0x1; - cpuid->have_cmpxchg16b_ = (ecx >> 13) & 0x1; - cpuid->have_cmpxchg8b_ = (edx >> 8) & 0x1; - cpuid->have_mmx_ = (edx >> 23) & 0x1; - cpuid->have_pclmulqdq_ = (ecx >> 1) & 0x1; - cpuid->have_popcnt_ = (ecx >> 23) & 0x1; - cpuid->have_rdrand_ = (ecx >> 30) & 0x1; - cpuid->have_sse2_ = (edx >> 26) & 0x1; - cpuid->have_sse3_ = ecx & 0x1; - cpuid->have_sse4_1_ = (ecx >> 19) & 0x1; - cpuid->have_sse4_2_ = (ecx >> 20) & 0x1; - cpuid->have_sse_ = (edx >> 25) & 0x1; - cpuid->have_ssse3_ = (ecx >> 9) & 0x1; - cpuid->have_hypervisor_ = (ecx >> 31) & 1; - - const uint64 xcr0_xmm_mask = 0x2; - const uint64 xcr0_ymm_mask = 0x4; - const uint64 xcr0_maskreg_mask = 0x20; - const uint64 xcr0_zmm0_15_mask = 0x40; - const uint64 xcr0_zmm16_31_mask = 0x80; - - const uint64 xcr0_avx_mask = xcr0_xmm_mask | xcr0_ymm_mask; - const uint64 xcr0_avx512_mask = xcr0_avx_mask | xcr0_maskreg_mask | - xcr0_zmm0_15_mask | xcr0_zmm16_31_mask; - - const bool have_avx = - // Does the OS support XGETBV instruction use by applications? - ((ecx >> 27) & 0x1) && - // Does the OS save/restore XMM and YMM state? - ((GetXCR0EAX() & xcr0_avx_mask) == xcr0_avx_mask) && - // Is AVX supported in hardware? - ((ecx >> 28) & 0x1); - - const bool have_avx512 = - // Does the OS support XGETBV instruction use by applications? - ((ecx >> 27) & 0x1) && - // Does the OS save/restore ZMM state? - ((GetXCR0EAX() & xcr0_avx512_mask) == xcr0_avx512_mask); - cpuid->have_avx_ = have_avx; - cpuid->have_fma_ = have_avx && ((ecx >> 12) & 0x1); - cpuid->have_f16c_ = have_avx && ((ecx >> 29) & 0x1); - - // Get standard level 7 structured extension features (issue CPUID with - // eax = 7 and ecx= 0), which is required to check for AVX2 support as - // well as other Haswell (and beyond) features. (See Intel 64 and IA-32 - // Architectures Software Developer's Manual Volume 2A: Instruction Set - // Reference, A-M CPUID). - GETCPUID(eax, ebx, ecx, edx, 7, 0); - - cpuid->have_adx_ = (ebx >> 19) & 0x1; - cpuid->have_avx2_ = have_avx && ((ebx >> 5) & 0x1); - cpuid->have_bmi1_ = (ebx >> 3) & 0x1; - cpuid->have_bmi2_ = (ebx >> 8) & 0x1; - cpuid->have_prefetchwt1_ = ecx & 0x1; - cpuid->have_rdseed_ = (ebx >> 18) & 0x1; - cpuid->have_smap_ = (ebx >> 20) & 0x1; - - cpuid->have_avx512f_ = have_avx512 && ((ebx >> 16) & 0x1); - cpuid->have_avx512cd_ = have_avx512 && ((ebx >> 28) & 0x1); - cpuid->have_avx512er_ = have_avx512 && ((ebx >> 27) & 0x1); - cpuid->have_avx512pf_ = have_avx512 && ((ebx >> 26) & 0x1); - cpuid->have_avx512vl_ = have_avx512 && ((ebx >> 31) & 0x1); - cpuid->have_avx512bw_ = have_avx512 && ((ebx >> 30) & 0x1); - cpuid->have_avx512dq_ = have_avx512 && ((ebx >> 17) & 0x1); - cpuid->have_avx512vbmi_ = have_avx512 && ((ecx >> 1) & 0x1); - cpuid->have_avx512ifma_ = have_avx512 && ((ebx >> 21) & 0x1); - cpuid->have_avx512_4vnniw_ = have_avx512 && ((edx >> 2) & 0x1); - cpuid->have_avx512_4fmaps_ = have_avx512 && ((edx >> 3) & 0x1); - } - - static bool TestFeature(CPUFeature feature) { - InitCPUIDInfo(); - // clang-format off - switch (feature) { - case ADX: return cpuid->have_adx_; - case AES: return cpuid->have_aes_; - case AVX2: return cpuid->have_avx2_; - case AVX: return cpuid->have_avx_; - case AVX512F: return cpuid->have_avx512f_; - case AVX512CD: return cpuid->have_avx512cd_; - case AVX512PF: return cpuid->have_avx512pf_; - case AVX512ER: return cpuid->have_avx512er_; - case AVX512VL: return cpuid->have_avx512vl_; - case AVX512BW: return cpuid->have_avx512bw_; - case AVX512DQ: return cpuid->have_avx512dq_; - case AVX512VBMI: return cpuid->have_avx512vbmi_; - case AVX512IFMA: return cpuid->have_avx512ifma_; - case AVX512_4VNNIW: return cpuid->have_avx512_4vnniw_; - case AVX512_4FMAPS: return cpuid->have_avx512_4fmaps_; - case BMI1: return cpuid->have_bmi1_; - case BMI2: return cpuid->have_bmi2_; - case CMOV: return cpuid->have_cmov_; - case CMPXCHG16B: return cpuid->have_cmpxchg16b_; - case CMPXCHG8B: return cpuid->have_cmpxchg8b_; - case F16C: return cpuid->have_f16c_; - case FMA: return cpuid->have_fma_; - case MMX: return cpuid->have_mmx_; - case PCLMULQDQ: return cpuid->have_pclmulqdq_; - case POPCNT: return cpuid->have_popcnt_; - case PREFETCHW: return cpuid->have_prefetchw_; - case PREFETCHWT1: return cpuid->have_prefetchwt1_; - case RDRAND: return cpuid->have_rdrand_; - case RDSEED: return cpuid->have_rdseed_; - case SMAP: return cpuid->have_smap_; - case SSE2: return cpuid->have_sse2_; - case SSE3: return cpuid->have_sse3_; - case SSE4_1: return cpuid->have_sse4_1_; - case SSE4_2: return cpuid->have_sse4_2_; - case SSE: return cpuid->have_sse_; - case SSSE3: return cpuid->have_ssse3_; - case HYPERVISOR: return cpuid->have_hypervisor_; - default: - break; - } - // clang-format on - return false; - } - - private: - int highest_eax_; - int have_adx_ : 1; - int have_aes_ : 1; - int have_avx_ : 1; - int have_avx2_ : 1; - int have_avx512f_ : 1; - int have_avx512cd_ : 1; - int have_avx512er_ : 1; - int have_avx512pf_ : 1; - int have_avx512vl_ : 1; - int have_avx512bw_ : 1; - int have_avx512dq_ : 1; - int have_avx512vbmi_ : 1; - int have_avx512ifma_ : 1; - int have_avx512_4vnniw_ : 1; - int have_avx512_4fmaps_ : 1; - int have_bmi1_ : 1; - int have_bmi2_ : 1; - int have_cmov_ : 1; - int have_cmpxchg16b_ : 1; - int have_cmpxchg8b_ : 1; - int have_f16c_ : 1; - int have_fma_ : 1; - int have_mmx_ : 1; - int have_pclmulqdq_ : 1; - int have_popcnt_ : 1; - int have_prefetchw_ : 1; - int have_prefetchwt1_ : 1; - int have_rdrand_ : 1; - int have_rdseed_ : 1; - int have_smap_ : 1; - int have_sse_ : 1; - int have_sse2_ : 1; - int have_sse3_ : 1; - int have_sse4_1_ : 1; - int have_sse4_2_ : 1; - int have_ssse3_ : 1; - int have_hypervisor_ : 1; -}; - -std::once_flag cpuid_once_flag; - -void InitCPUIDInfo() { - // This ensures that CPUIDInfo::Initialize() is called exactly - // once regardless of how many threads concurrently call us - std::call_once(cpuid_once_flag, CPUIDInfo::Initialize); -} - -#endif // PORT_IS_X86 - -} // namespace - -void InitMain(const char *usage, int *argc, char ***argv) {} +void InitMain(const char* usage, int* argc, char*** argv) {} string Hostname() { char hostname[1024]; @@ -317,15 +58,15 @@ int NumSchedulableCPUs() { return kDefaultCores; } -void *aligned_malloc(size_t size, int minimum_alignment) { +void* aligned_malloc(size_t size, int minimum_alignment) { #if defined(__ANDROID__) return memalign(minimum_alignment, size); #else // !defined(__ANDROID__) - void *ptr = NULL; + void* ptr = NULL; // posix_memalign requires that the requested alignment be at least // sizeof(void*). In this case, fall back on malloc which should return // memory aligned to at least the size of a pointer. - const int required_alignment = sizeof(void *); + const int required_alignment = sizeof(void*); if (minimum_alignment < required_alignment) return malloc(size); if (posix_memalign(&ptr, minimum_alignment, size) != 0) return NULL; @@ -334,19 +75,19 @@ void *aligned_malloc(size_t size, int minimum_alignment) { #endif } -void aligned_free(void *aligned_memory) { free(aligned_memory); } +void aligned_free(void* aligned_memory) { free(aligned_memory); } void MallocExtension_ReleaseToSystem(std::size_t num_bytes) { // No-op. } -std::size_t MallocExtension_GetAllocatedSize(const void *p) { return 0; } +std::size_t MallocExtension_GetAllocatedSize(const void* p) { return 0; } -void AdjustFilenameForLogging(string *filename) { +void AdjustFilenameForLogging(string* filename) { // Nothing to do } -bool Snappy_Compress(const char *input, size_t length, string *output) { +bool Snappy_Compress(const char* input, size_t length, string* output) { #ifdef SNAPPY output->resize(snappy::MaxCompressedLength(length)); size_t outlen; @@ -358,8 +99,8 @@ bool Snappy_Compress(const char *input, size_t length, string *output) { #endif } -bool Snappy_GetUncompressedLength(const char *input, size_t length, - size_t *result) { +bool Snappy_GetUncompressedLength(const char* input, size_t length, + size_t* result) { #ifdef SNAPPY return snappy::GetUncompressedLength(input, length, result); #else @@ -367,7 +108,7 @@ bool Snappy_GetUncompressedLength(const char *input, size_t length, #endif } -bool Snappy_Uncompress(const char *input, size_t length, char *output) { +bool Snappy_Uncompress(const char* input, size_t length, char* output) { #ifdef SNAPPY return snappy::RawUncompress(input, length, output); #else @@ -375,15 +116,7 @@ bool Snappy_Uncompress(const char *input, size_t length, char *output) { #endif } -string Demangle(const char *mangled) { return mangled; } - -bool TestCPUFeature(CPUFeature feature) { -#ifdef PORT_IS_X86 - return CPUIDInfo::TestFeature(feature); -#else - return false; -#endif -} +string Demangle(const char* mangled) { return mangled; } } // namespace port } // namespace tensorflow diff --git a/tensorflow/core/platform/windows/port.cc b/tensorflow/core/platform/windows/port.cc index 7f403fec94..ee5be221cd 100644 --- a/tensorflow/core/platform/windows/port.cc +++ b/tensorflow/core/platform/windows/port.cc @@ -99,10 +99,5 @@ bool Snappy_Uncompress(const char* input, size_t length, char* output) { string Demangle(const char* mangled) { return mangled; } -bool TestCPUFeature(CPUFeature feature) { - // TODO(petewarden): Checking CPU features is not yet implemented for Windows. - return false; -} - } // namespace port } // namespace tensorflow |