aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h')
-rw-r--r--tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h13
1 files changed, 11 insertions, 2 deletions
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h
index 80ca4243a2..aeb1eda23f 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h
@@ -24,6 +24,13 @@ limitations under the License.
#include "tensorflow/core/platform/macros.h"
+// MSVC does not have __SSE4_1__ macro. Eigen enables EIGEN_VECTORIZE_SSE4_1
+// when __AVX__ is defined, we should do the same.
+#if defined(__SSE4_1__) || (defined(_MSC_VER) && defined(__AVX__))
+#include <smmintrin.h>
+#define TF_XLA_HAS_SSE4_1
+#endif
+
namespace xla {
namespace cpu {
namespace runtime {
@@ -31,7 +38,9 @@ namespace runtime {
extern const char *const kExpV4F32SSESymbolName;
extern const char *const kLogV4F32SSESymbolName;
-typedef float V4F32SSE __attribute__((__vector_size__(16)));
+#ifdef TF_XLA_HAS_SSE4_1
+typedef __m128 V4F32SSE;
+#endif
} // namespace runtime
} // namespace cpu
@@ -39,7 +48,7 @@ typedef float V4F32SSE __attribute__((__vector_size__(16)));
extern "C" {
-#ifdef __SSE4_1__
+#ifdef TF_XLA_HAS_SSE4_1
// The following functions are vectorized versions of a selection of libm
// library functions.
// References to these functions are created by the LLVM vectorizer.