aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
diff options
context:
space:
mode:
authorGravatar Mehdi Goli <mehdi.goli@codeplay.com>2019-06-28 10:08:23 +0100
committerGravatar Mehdi Goli <mehdi.goli@codeplay.com>2019-06-28 10:08:23 +0100
commit7d08fa805a38f9ebb9e0e487c4e2d23d32a0fcde (patch)
treefbff4d80b6b373dcd53632de4c1fab5c393bdd64 /unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
parent16a56b2dddbfaf2d4b81d62be5e3139f12783ac8 (diff)
[SYCL] This PR adds the minimum modifications to the Eigen unsupported module required to run it on devices supporting SYCL.
* Abstracting the pointer type so that both SYCL memory and pointer can be captured. * Converting SYCL virtual pointer to SYCL device memory in Eigen evaluator class. * Binding SYCL placeholder accessor to command group handler by using bind method in Eigen evaluator node. * Adding SYCL macro for controlling loop unrolling. * Modifying the TensorDeviceSycl.h and SYCL executor method to adopt the above changes.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h16
1 files changed, 8 insertions, 8 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
index b6d445c50..e7fec5d3a 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
@@ -37,7 +37,7 @@ namespace {
{
#ifdef EIGEN_GPU_COMPILE_PHASE
return __clz(val);
-#elif defined(__SYCL_DEVICE_ONLY__)
+#elif defined(SYCL_DEVICE_ONLY)
return cl::sycl::clz(val);
#elif EIGEN_COMP_MSVC
unsigned long index;
@@ -55,8 +55,8 @@ namespace {
{
#ifdef EIGEN_GPU_COMPILE_PHASE
return __clzll(val);
-#elif defined(__SYCL_DEVICE_ONLY__)
- return cl::sycl::clz(val);
+#elif defined(SYCL_DEVICE_ONLY)
+ return static_cast<int>(cl::sycl::clz(val));
#elif EIGEN_COMP_MSVC && EIGEN_ARCH_x86_64
unsigned long index;
_BitScanReverse64(&index, val);
@@ -92,7 +92,7 @@ namespace {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t muluh(const uint32_t a, const T b) {
#if defined(EIGEN_GPU_COMPILE_PHASE)
return __umulhi(a, b);
-#elif defined(__SYCL_DEVICE_ONLY__)
+#elif defined(SYCL_DEVICE_ONLY)
return cl::sycl::mul_hi(a, static_cast<uint32_t>(b));
#else
return (static_cast<uint64_t>(a) * b) >> 32;
@@ -103,7 +103,7 @@ namespace {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) {
#if defined(EIGEN_GPU_COMPILE_PHASE)
return __umul64hi(a, b);
-#elif defined(__SYCL_DEVICE_ONLY__)
+#elif defined(SYCL_DEVICE_ONLY)
return cl::sycl::mul_hi(a, static_cast<uint64_t>(b));
#elif defined(__SIZEOF_INT128__)
__uint128_t v = static_cast<__uint128_t>(a) * static_cast<__uint128_t>(b);
@@ -124,7 +124,7 @@ namespace {
template <typename T>
struct DividerHelper<64, T> {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) {
-#if defined(__SIZEOF_INT128__) && !defined(EIGEN_GPU_COMPILE_PHASE) && !defined(__SYCL_DEVICE_ONLY__)
+#if defined(__SIZEOF_INT128__) && !defined(EIGEN_GPU_COMPILE_PHASE) && !defined(SYCL_DEVICE_ONLY)
return static_cast<uint64_t>((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1);
#else
const uint64_t shift = 1ULL << log_div;
@@ -205,8 +205,8 @@ class TensorIntDivisor<int32_t, true> {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int divide(const int32_t n) const {
#ifdef EIGEN_GPU_COMPILE_PHASE
return (__umulhi(magic, n) >> shift);
-#elif defined(__SYCL_DEVICE_ONLY__)
- return (cl::sycl::mul_hi(static_cast<uint64_t>(magic), static_cast<uint64_t>(n)) >> shift);
+#elif defined(SYCL_DEVICE_ONLY)
+ return (cl::sycl::mul_hi(magic, static_cast<uint32_t>(n)) >> shift);
#else
uint64_t v = static_cast<uint64_t>(magic) * static_cast<uint64_t>(n);
return (static_cast<uint32_t>(v >> 32) >> shift);