aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <ezhulenev@google.com>2018-12-03 15:54:10 -0800
committerGravatar Eugene Zhulenev <ezhulenev@google.com>2018-12-03 15:54:10 -0800
commitfd0fbfa9b5301e5339c34846c76835cf347ef4cb (patch)
tree6ce8f6446db933ab4fbad6178345e2d81f0a6235 /Eigen/src
parent0ea7ae72130cac7334823ec442f0a8a6772c9ab8 (diff)
Do not disable alignment with EIGEN_GPUCC
Diffstat (limited to 'Eigen/src')
-rw-r--r--Eigen/src/Core/util/ConfigureVectorization.h15
-rw-r--r--Eigen/src/Core/util/Macros.h4
2 files changed, 7 insertions, 12 deletions
diff --git a/Eigen/src/Core/util/ConfigureVectorization.h b/Eigen/src/Core/util/ConfigureVectorization.h
index 263604597..c482a0b14 100644
--- a/Eigen/src/Core/util/ConfigureVectorization.h
+++ b/Eigen/src/Core/util/ConfigureVectorization.h
@@ -10,13 +10,6 @@
#ifndef EIGEN_CONFIGURE_VECTORIZATION_H
#define EIGEN_CONFIGURE_VECTORIZATION_H
-// FIXME: not sure why this is needed, perhaps it is not needed anymore.
-#ifdef __NVCC__
- #ifndef EIGEN_DONT_VECTORIZE
- #define EIGEN_DONT_VECTORIZE
- #endif
-#endif
-
//------------------------------------------------------------------------------------------
// Static and dynamic alignment control
//
@@ -183,7 +176,13 @@
//----------------------------------------------------------------------
-
+// If we are compiling for GPU we should also disable vectorization because
+// all the packet functions are not marked as __device__ functions.
+#ifdef EIGEN_GPUCC
+#ifndef EIGEN_DONT_VECTORIZE
+ #define EIGEN_DONT_VECTORIZE
+ #endif
+#endif
// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into
// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 9d277e26f..c7dba1fc4 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -742,10 +742,6 @@
// All functions callable from CUDA/HIP code must be qualified with __device__
#ifdef EIGEN_GPUCC
- #ifndef EIGEN_DONT_VECTORIZE
- #define EIGEN_DONT_VECTORIZE
- #endif
-
#define EIGEN_DEVICE_FUNC __host__ __device__
#else
#define EIGEN_DEVICE_FUNC