/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #include #include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/denormal.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/platform.h" // If we're on gcc 4.8 or older, there's a known bug that prevents the use of // intrinsics when the architecture is not defined in the flags. See // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57202 #if !defined(__SSE3__) && !defined(__clang__) && \ (defined(__GNUC__) && (__GNUC__ < 4) || \ ((__GNUC__ == 4) && (__GNUC_MINOR__ < 9))) #define GCC_WITHOUT_INTRINSICS #endif // Only try to use SSE3 instructions if we're on an x86 platform, and it's not // mobile, and we're not on a known bad gcc version. #if defined(PLATFORM_IS_X86) && !defined(IS_MOBILE_PLATFORM) && \ !defined(GCC_WITHOUT_INTRINSICS) #define DENORM_USE_INTRINSICS #endif #ifdef DENORM_USE_INTRINSICS #include #endif namespace tensorflow { namespace port { static void SetDenormalState(bool flush_zero_mode, bool denormals_zero_mode) { // For now, we flush denormals only on SSE 3. Other architectures such as ARM // can be added as needed. #ifdef DENORM_USE_INTRINSICS if (TestCPUFeature(SSE3)) { // Restore flags _MM_SET_FLUSH_ZERO_MODE(flush_zero_mode ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF); _MM_SET_DENORMALS_ZERO_MODE(denormals_zero_mode ? _MM_DENORMALS_ZERO_ON : _MM_DENORMALS_ZERO_OFF); } #endif } static std::pair GetDernormalState() { // For now, we flush denormals only on SSE 3. Other architectures such as ARM // can be added as needed. #ifdef DENORM_USE_INTRINSICS if (TestCPUFeature(SSE3)) { // Save existing flags bool flush_zero_mode = _MM_GET_FLUSH_ZERO_MODE() == _MM_FLUSH_ZERO_ON; bool denormals_zero_mode = _MM_GET_DENORMALS_ZERO_MODE() == _MM_DENORMALS_ZERO_ON; return {flush_zero_mode, denormals_zero_mode}; } #endif return {false, false}; } ScopedRestoreFlushDenormalState::ScopedRestoreFlushDenormalState() { std::tie(flush_zero_mode_, denormals_zero_mode_) = GetDernormalState(); } ScopedRestoreFlushDenormalState::~ScopedRestoreFlushDenormalState() { SetDenormalState(flush_zero_mode_, denormals_zero_mode_); } ScopedFlushDenormal::ScopedFlushDenormal() { SetDenormalState(/*flush_zero_mode=*/true, /*denormals_zero_mode=*/true); } ScopedDontFlushDenormal::ScopedDontFlushDenormal() { SetDenormalState(/*flush_zero_mode=*/false, /*denormals_zero_mode=*/false); } } // namespace port } // namespace tensorflow