diff options
Diffstat (limited to 'tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h')
-rw-r--r-- | tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h | 13 |
1 files changed, 11 insertions, 2 deletions
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h index 80ca4243a2..aeb1eda23f 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h @@ -24,6 +24,13 @@ limitations under the License. #include "tensorflow/core/platform/macros.h" +// MSVC does not have __SSE4_1__ macro. Eigen enables EIGEN_VECTORIZE_SSE4_1 +// when __AVX__ is defined, we should do the same. +#if defined(__SSE4_1__) || (defined(_MSC_VER) && defined(__AVX__)) +#include <smmintrin.h> +#define TF_XLA_HAS_SSE4_1 +#endif + namespace xla { namespace cpu { namespace runtime { @@ -31,7 +38,9 @@ namespace runtime { extern const char *const kExpV4F32SSESymbolName; extern const char *const kLogV4F32SSESymbolName; -typedef float V4F32SSE __attribute__((__vector_size__(16))); +#ifdef TF_XLA_HAS_SSE4_1 +typedef __m128 V4F32SSE; +#endif } // namespace runtime } // namespace cpu @@ -39,7 +48,7 @@ typedef float V4F32SSE __attribute__((__vector_size__(16))); extern "C" { -#ifdef __SSE4_1__ +#ifdef TF_XLA_HAS_SSE4_1 // The following functions are vectorized versions of a selection of libm // library functions. // References to these functions are created by the LLVM vectorizer. |