From b3e7c9132d41da75c0a6af783300cb11101010db Mon Sep 17 00:00:00 2001
From: Paul Tucker <tucker@google.com>
Date: Mon, 16 Jul 2018 17:26:05 -0700
Subject: Add optional Allocator argument to ThreadPoolDevice constructor. When
 supplied, this allocator will be used in place of internal::aligned_malloc. 
 This permits e.g. use of a NUMA-node specific allocator where the thread-pool
 is also restricted a single NUMA-node.

---
 .../Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h      | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

(limited to 'unsupported')

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
index 90fd99027..be397e1b6 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@@ -95,14 +95,20 @@ static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) {
 // Build a thread pool device on top the an existing pool of threads.
 struct ThreadPoolDevice {
   // The ownership of the thread pool remains with the caller.
-  ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores) : pool_(pool), num_threads_(num_cores) { }
+  ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores)
+      : pool_(pool), num_threads_(num_cores), allocator_(nullptr) { }
 
   EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
-    return internal::aligned_malloc(num_bytes);
+    return allocator_ ? allocator_->allocate(num_bytes)
+        : internal::aligned_malloc(num_bytes);
   }
 
   EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
-    internal::aligned_free(buffer);
+    if (allocator_) {
+      allocator_->deallocate(buffer);
+    } else {
+      internal::aligned_free(buffer);
+    }
   }
 
   EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
@@ -267,9 +273,13 @@ struct ThreadPoolDevice {
   // Thread pool accessor.
   ThreadPoolInterface* getPool() const { return pool_; }
 
+  // Allocator accessor.
+  Allocator* getAllocator() const { return allocator_; }
+
  private:
   ThreadPoolInterface* pool_;
   int num_threads_;
+  Allocator* allocator_;
 };
 
 
-- 
cgit v1.2.3


From 4e9848fa8600be69dfb51405606eafa1dba8d0bf Mon Sep 17 00:00:00 2001
From: Paul Tucker <tucker@google.com>
Date: Mon, 16 Jul 2018 17:53:36 -0700
Subject: Actually add optional Allocator* arg to ThreadPoolDevice().

---
 unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'unsupported')

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
index be397e1b6..c9534d400 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@@ -95,8 +95,8 @@ static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) {
 // Build a thread pool device on top the an existing pool of threads.
 struct ThreadPoolDevice {
   // The ownership of the thread pool remains with the caller.
-  ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores)
-      : pool_(pool), num_threads_(num_cores), allocator_(nullptr) { }
+  ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores, Allocator* allocator = nullptr)
+      : pool_(pool), num_threads_(num_cores), allocator_(allocator) { }
 
   EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
     return allocator_ ? allocator_->allocate(num_bytes)
-- 
cgit v1.2.3


From d4afccde5a9553ddfb48b0f5fad0115cd8bf791a Mon Sep 17 00:00:00 2001
From: Paul Tucker <tucker@google.com>
Date: Thu, 19 Jul 2018 17:43:44 -0700
Subject: Add test coverage for ThreadPoolDevice optional allocator.

---
 .../CXX11/src/Tensor/TensorDeviceThreadPool.h      |  7 ++++
 unsupported/test/cxx11_tensor_thread_pool.cpp      | 45 ++++++++++++++++++++--
 2 files changed, 48 insertions(+), 4 deletions(-)

(limited to 'unsupported')

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
index c9534d400..f4123b71d 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@@ -91,6 +91,13 @@ static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) {
   }
 }
 
+// An abstract interface to a device specific memory allocator.
+class Allocator {
+ public:
+  virtual ~Allocator() {}
+  EIGEN_DEVICE_FUNC virtual void* allocate(size_t num_bytes) const = 0;
+  EIGEN_DEVICE_FUNC virtual void deallocate(void* buffer) const = 0;
+};
 
 // Build a thread pool device on top the an existing pool of threads.
 struct ThreadPoolDevice {
diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp
index 2ef665f30..200664740 100644
--- a/unsupported/test/cxx11_tensor_thread_pool.cpp
+++ b/unsupported/test/cxx11_tensor_thread_pool.cpp
@@ -16,6 +16,25 @@
 
 using Eigen::Tensor;
 
+class TestAllocator : public Allocator {
+ public:
+  ~TestAllocator() override {}
+  EIGEN_DEVICE_FUNC void* allocate(size_t num_bytes) const override {
+    const_cast<TestAllocator*>(this)->alloc_count_++;
+    return internal::aligned_malloc(num_bytes);
+  }
+  EIGEN_DEVICE_FUNC void deallocate(void* buffer) const override {
+    const_cast<TestAllocator*>(this)->dealloc_count_++;
+    internal::aligned_free(buffer);
+  }
+
+  int alloc_count() const { return alloc_count_; }
+  int dealloc_count() const { return dealloc_count_; }
+
+ private:
+  int alloc_count_ = 0;
+  int dealloc_count_ = 0;
+};
 
 void test_multithread_elementwise()
 {
@@ -320,14 +339,14 @@ void test_multithread_random()
 }
 
 template<int DataLayout>
-void test_multithread_shuffle()
+void test_multithread_shuffle(Allocator* allocator)
 {
   Tensor<float, 4, DataLayout> tensor(17,5,7,11);
   tensor.setRandom();
 
   const int num_threads = internal::random<int>(2, 11);
   ThreadPool threads(num_threads);
-  Eigen::ThreadPoolDevice device(&threads, num_threads);
+  Eigen::ThreadPoolDevice device(&threads, num_threads, allocator);
 
   Tensor<float, 4, DataLayout> shuffle(7,5,11,17);
   array<ptrdiff_t, 4> shuffles = {{2,1,3,0}};
@@ -344,6 +363,21 @@ void test_multithread_shuffle()
   }
 }
 
+void test_threadpool_allocate(TestAllocator* allocator)
+{
+  const int num_threads = internal::random<int>(2, 11);
+  const int num_allocs = internal::random<int>(2, 11);
+  ThreadPool threads(num_threads);
+  Eigen::ThreadPoolDevice device(&threads, num_threads, allocator);
+
+  for (int a = 0; a < num_allocs; ++a) {
+    void* ptr = device.allocate(512);
+    device.deallocate(ptr);
+  }
+  VERIFY(allocator != nullptr);
+  VERIFY_IS_EQUAL(allocator->alloc_count(), num_allocs);
+  VERIFY_IS_EQUAL(allocator->dealloc_count(), num_allocs);
+}
 
 void test_cxx11_tensor_thread_pool()
 {
@@ -368,6 +402,9 @@ void test_cxx11_tensor_thread_pool()
 
   CALL_SUBTEST_6(test_memcpy());
   CALL_SUBTEST_6(test_multithread_random());
-  CALL_SUBTEST_6(test_multithread_shuffle<ColMajor>());
-  CALL_SUBTEST_6(test_multithread_shuffle<RowMajor>());
+
+  TestAllocator test_allocator;
+  CALL_SUBTEST_6(test_multithread_shuffle<ColMajor>(nullptr));
+  CALL_SUBTEST_6(test_multithread_shuffle<RowMajor>(&test_allocator));
+  CALL_SUBTEST_6(test_threadpool_allocate(&test_allocator));
 }
-- 
cgit v1.2.3


From 385f7b8d0ca926d00c71987ab308202511e5c753 Mon Sep 17 00:00:00 2001
From: Paul Tucker <tucker@google.com>
Date: Tue, 31 Jul 2018 13:52:18 -0700
Subject: Change getAllocator() to allocator() in ThreadPoolDevice.

---
 unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'unsupported')

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
index f4123b71d..f8188ffde 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@@ -281,7 +281,7 @@ struct ThreadPoolDevice {
   ThreadPoolInterface* getPool() const { return pool_; }
 
   // Allocator accessor.
-  Allocator* getAllocator() const { return allocator_; }
+  Allocator* allocator() const { return allocator_; }
 
  private:
   ThreadPoolInterface* pool_;
-- 
cgit v1.2.3


From 40d6d020a098d9f828bad5e2a1accfc5ee1a1289 Mon Sep 17 00:00:00 2001
From: Mehdi Goli <mehdi.goli@codeplay.com>
Date: Thu, 2 Aug 2018 13:34:53 +0100
Subject: Fixing ambigous constructor error for Clang compiler.

---
 unsupported/test/cxx11_tensor_concatenation.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'unsupported')

diff --git a/unsupported/test/cxx11_tensor_concatenation.cpp b/unsupported/test/cxx11_tensor_concatenation.cpp
index 9189a609b..f53515b4e 100644
--- a/unsupported/test/cxx11_tensor_concatenation.cpp
+++ b/unsupported/test/cxx11_tensor_concatenation.cpp
@@ -50,7 +50,13 @@ static void test_static_dimension_failure()
       .reshape(Tensor<int, 3>::Dimensions(2, 3, 1))
       .concatenate(right, 0);
   Tensor<int, 2, DataLayout> alternative = left
-      .concatenate(right.reshape(Tensor<int, 2>::Dimensions{{{2, 3}}}), 0);
+   // Clang compiler break with {{{}}} with an ambigous error on copy constructor
+  // the variadic DSize constructor added for #ifndef EIGEN_EMULATE_CXX11_META_H.
+  // Solution:
+  // either the code should change to 
+  //  Tensor<int, 2>::Dimensions{{2, 3}}
+  // or Tensor<int, 2>::Dimensions{Tensor<int, 2>::Dimensions{{2, 3}}}
+      .concatenate(right.reshape(Tensor<int, 2>::Dimensions{{2, 3}}), 0);
 }
 
 template<int DataLayout>
-- 
cgit v1.2.3


From 516d2621b96a0c41ff999781b8bbcaa527a7c6ee Mon Sep 17 00:00:00 2001
From: Mehdi Goli <mehdi.goli@codeplay.com>
Date: Thu, 2 Aug 2018 14:30:48 +0100
Subject: fixing compilation error for cxx11_tensor_trace.cpp error on
 Microsoft Visual Studio.

---
 unsupported/test/cxx11_tensor_trace.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'unsupported')

diff --git a/unsupported/test/cxx11_tensor_trace.cpp b/unsupported/test/cxx11_tensor_trace.cpp
index 1579bc1eb..0cb23060e 100644
--- a/unsupported/test/cxx11_tensor_trace.cpp
+++ b/unsupported/test/cxx11_tensor_trace.cpp
@@ -37,7 +37,7 @@ static void test_all_dimensions_trace() {
   VERIFY_IS_EQUAL(result1(), sum);
 
   Tensor<float, 5, DataLayout> tensor2(7, 7, 7, 7, 7);
-  array<ptrdiff_t, 5> dims({{2, 1, 0, 3, 4}});
+  array<ptrdiff_t, 5> dims = { { 2, 1, 0, 3, 4 } };
   Tensor<float, 0, DataLayout> result2 = tensor2.trace(dims);
   VERIFY_IS_EQUAL(result2.rank(), 0);
   sum = 0.0f;
@@ -52,7 +52,7 @@ template <int DataLayout>
 static void test_simple_trace() {
   Tensor<float, 3, DataLayout> tensor1(3, 5, 3);
   tensor1.setRandom();
-  array<ptrdiff_t, 2> dims1({{0, 2}});
+  array<ptrdiff_t, 2> dims1 = { { 0, 2 } };
   Tensor<float, 1, DataLayout> result1 = tensor1.trace(dims1);
   VERIFY_IS_EQUAL(result1.rank(), 1);
   VERIFY_IS_EQUAL(result1.dimension(0), 5);
@@ -67,7 +67,7 @@ static void test_simple_trace() {
 
   Tensor<float, 4, DataLayout> tensor2(5, 5, 7, 7);
   tensor2.setRandom();
-  array<ptrdiff_t, 2> dims2({{2, 3}});
+  array<ptrdiff_t, 2> dims2 = { { 2, 3 } };
   Tensor<float, 2, DataLayout> result2 = tensor2.trace(dims2);
   VERIFY_IS_EQUAL(result2.rank(), 2);
   VERIFY_IS_EQUAL(result2.dimension(0), 5);
@@ -82,7 +82,7 @@ static void test_simple_trace() {
     }
   }
 
-  array<ptrdiff_t, 2> dims3({{1, 0}});
+  array<ptrdiff_t, 2> dims3 = { { 1, 0 } };
   Tensor<float, 2, DataLayout> result3 = tensor2.trace(dims3);
   VERIFY_IS_EQUAL(result3.rank(), 2);
   VERIFY_IS_EQUAL(result3.dimension(0), 7);
@@ -99,7 +99,7 @@ static void test_simple_trace() {
 
   Tensor<float, 5, DataLayout> tensor3(3, 7, 3, 7, 3);
   tensor3.setRandom();
-  array<ptrdiff_t, 3> dims4({{0, 2, 4}});
+  array<ptrdiff_t, 3> dims4 = { { 0, 2, 4 } };
   Tensor<float, 2, DataLayout> result4 = tensor3.trace(dims4);
   VERIFY_IS_EQUAL(result4.rank(), 2);
   VERIFY_IS_EQUAL(result4.dimension(0), 7);
@@ -116,7 +116,7 @@ static void test_simple_trace() {
 
   Tensor<float, 5, DataLayout> tensor4(3, 7, 4, 7, 5);
   tensor4.setRandom();
-  array<ptrdiff_t, 2> dims5({{1, 3}});
+  array<ptrdiff_t, 2> dims5 = { { 1, 3 } };
   Tensor<float, 3, DataLayout> result5 = tensor4.trace(dims5);
   VERIFY_IS_EQUAL(result5.rank(), 3);
   VERIFY_IS_EQUAL(result5.dimension(0), 3);
@@ -140,7 +140,7 @@ template<int DataLayout>
 static void test_trace_in_expr() {
   Tensor<float, 4, DataLayout> tensor(2, 3, 5, 3);
   tensor.setRandom();
-  array<ptrdiff_t, 2> dims({{1, 3}});
+  array<ptrdiff_t, 2> dims = { { 1, 3 } };
   Tensor<float, 2, DataLayout> result(2, 5);
   result = result.constant(1.0f) - tensor.trace(dims);
   VERIFY_IS_EQUAL(result.rank(), 2);
@@ -168,4 +168,4 @@ EIGEN_DECLARE_TEST(cxx11_tensor_trace) {
   CALL_SUBTEST(test_simple_trace<RowMajor>());
   CALL_SUBTEST(test_trace_in_expr<ColMajor>());
   CALL_SUBTEST(test_trace_in_expr<RowMajor>());
-}
+}
\ No newline at end of file
-- 
cgit v1.2.3


From 01358300d581f94c842bcdf35266b50f2795ded3 Mon Sep 17 00:00:00 2001
From: Mehdi Goli <mehdi.goli@codeplay.com>
Date: Fri, 3 Aug 2018 16:59:15 +0100
Subject: Creating separate SYCL required PR for uncontroversial files.

---
 Eigen/Core                                         |  6 ++++
 Eigen/src/Core/arch/GPU/Half.h                     |  7 ++++-
 unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h   |  4 +--
 unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h  |  4 +++
 .../src/SpecialFunctions/SpecialFunctionsImpl.h    | 36 +++++++++++++++++++---
 5 files changed, 50 insertions(+), 7 deletions(-)

(limited to 'unsupported')

diff --git a/Eigen/Core b/Eigen/Core
index 864bde551..080511fc9 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -200,6 +200,12 @@ using std::ptrdiff_t;
   #include "src/Core/arch/GPU/MathFunctions.h"
 #endif
 
+#if defined EIGEN_VECTORIZE_SYCL
+  #include "src/Core/arch/SYCL/InteropHeaders.h"
+  #include "src/Core/arch/SYCL/PacketMath.h"
+  #include "src/Core/arch/SYCL/MathFunctions.h"
+  #include "src/Core/arch/SYCL/TypeCasting.h"
+#endif
 #include "src/Core/arch/Default/Settings.h"
 
 #include "src/Core/functors/TernaryFunctors.h"
diff --git a/Eigen/src/Core/arch/GPU/Half.h b/Eigen/src/Core/arch/GPU/Half.h
index ab9d27591..aca56fa72 100644
--- a/Eigen/src/Core/arch/GPU/Half.h
+++ b/Eigen/src/Core/arch/GPU/Half.h
@@ -83,7 +83,11 @@ struct __half_raw {
  #if defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000
 // In CUDA < 9.0, __half is the equivalent of CUDA 9's __half_raw
  typedef __half __half_raw;
- #endif
+ #endif // defined(EIGEN_HAS_CUDA_FP16)
+
+#elif defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
+typedef cl::sycl::half __half_raw;
+
 #endif
 
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x);
@@ -200,6 +204,7 @@ struct half : public half_impl::half_base {
     x = other.x;
     return *this;
   }
+
 };
 
 } // end namespace Eigen
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h
index c8b2fad1e..ea53bb04b 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h
@@ -273,11 +273,11 @@ struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device>
 
   Dimensions m_dimensions;
   TensorEvaluator<ArgType, Device> m_impl;
+  // Initialize the size of the trace dimension
+  Index m_traceDim;
   const Device& m_device;
   array<bool, NumInputDims> m_reduced;
   array<Index, NumReducedDims> m_reducedDims;
-  // Initialize the size of the trace dimension
-  Index m_traceDim;
   array<Index, NumOutputDims> m_outputStrides;
   array<Index, NumReducedDims> m_reducedStrides;
   array<Index, NumOutputDims> m_preservedStrides;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
index 006b37921..0a394c88d 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
@@ -59,6 +59,7 @@ struct traits<Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
   template <typename T> struct MakePointer {
     typedef T* Type;
     typedef T& RefType;
+    typedef T ScalarType;
 
   };
   typedef typename MakePointer<Scalar>::Type PointerType;
@@ -80,6 +81,7 @@ struct traits<TensorFixedSize<Scalar_, Dimensions, Options_, IndexType_> >
   template <typename T> struct MakePointer {
     typedef T* Type;
     typedef T& RefType;
+    typedef T ScalarType;
 
   };
   typedef typename MakePointer<Scalar>::Type PointerType;
@@ -105,6 +107,8 @@ struct traits<TensorMap<PlainObjectType, Options_, MakePointer_> >
     typedef MakePointer_<T> MakePointerT;
     typedef typename MakePointerT::Type Type;
     typedef typename MakePointerT::RefType RefType;
+    typedef typename MakePointerT::ScalarType ScalarType;
+
 
   };
   typedef typename MakePointer<Scalar>::Type PointerType;
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
index dbcc9d8ac..5784cbc86 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
@@ -193,6 +193,8 @@ struct lgamma_impl<float> {
 #if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__) 
     int dummy;
     return ::lgammaf_r(x, &dummy);
+#elif defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
+    return cl::sycl::lgamma(x);
 #else
     return ::lgammaf(x);
 #endif
@@ -206,6 +208,8 @@ struct lgamma_impl<double> {
 #if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__) 
     int dummy;
     return ::lgamma_r(x, &dummy);
+#elif defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
+    return cl::sycl::lgamma(x);
 #else
     return ::lgamma(x);
 #endif
@@ -423,13 +427,25 @@ struct erf_retval {
 template <>
 struct erf_impl<float> {
   EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE float run(float x) { return ::erff(x); }
+  static EIGEN_STRONG_INLINE float run(float x) {
+#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
+    return cl::sycl::erf(x);
+#else
+    return ::erff(x);
+#endif
+  }
 };
 
 template <>
 struct erf_impl<double> {
   EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE double run(double x) { return ::erf(x); }
+  static EIGEN_STRONG_INLINE double run(double x) {
+#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
+    return cl::sycl::erf(x);
+#else
+    return ::erf(x);
+#endif
+  }
 };
 #endif  // EIGEN_HAS_C99_MATH
 
@@ -456,13 +472,25 @@ struct erfc_retval {
 template <>
 struct erfc_impl<float> {
   EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE float run(const float x) { return ::erfcf(x); }
+  static EIGEN_STRONG_INLINE float run(const float x) {
+#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
+    return cl::sycl::erfc(x);
+#else
+    return ::erfcf(x);
+#endif
+  }
 };
 
 template <>
 struct erfc_impl<double> {
   EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE double run(const double x) { return ::erfc(x); }
+  static EIGEN_STRONG_INLINE double run(const double x) {
+#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
+    return cl::sycl::erfc(x);
+#else
+    return ::erfc(x);
+#endif
+  }
 };
 #endif  // EIGEN_HAS_C99_MATH
 
-- 
cgit v1.2.3


From 3074b1ff9e61a14384accdbda7c8b3b520140360 Mon Sep 17 00:00:00 2001
From: Mehdi Goli <mehdi.goli@codeplay.com>
Date: Fri, 3 Aug 2018 17:13:44 +0100
Subject: Fixing the compilation error.

---
 unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'unsupported')

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
index 8ed1796df..ec1dc0fab 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
@@ -21,6 +21,7 @@ namespace Eigen {
 template<typename T> struct MakePointer {
   typedef T* Type;
   typedef T& RefType;
+  typedef T ScalarType;
 };
 
 namespace internal{
-- 
cgit v1.2.3


From bcb29f890ccdbf4922780ed5da0e23db65d7ae64 Mon Sep 17 00:00:00 2001
From: Rasmus Munk Larsen <rmlarsen@google.com>
Date: Fri, 3 Aug 2018 10:18:53 -0700
Subject: Fix initialization order.

---
 unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'unsupported')

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
index e1649fb47..e604456e8 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
@@ -274,8 +274,8 @@ struct TensorContractionEvaluatorBase
                           op.lhsExpression(), op.rhsExpression()), device),
     m_rightImpl(choose(Cond<static_cast<int>(Layout) == static_cast<int>(ColMajor)>(),
                           op.rhsExpression(), op.lhsExpression()), device),
-        m_output_kernel(op.outputKernel()),
         m_device(device),
+        m_output_kernel(op.outputKernel()),
         m_result(NULL) {
     EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) ==
          static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout)),
-- 
cgit v1.2.3


From 09c81ac03335584674371369d0045510dc83aba4 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Sat, 4 Aug 2018 00:17:37 +0200
Subject: bug #1451: fix numeric_limits<AutoDiffScalar<Der>> with a reference
 as derivative type

---
 unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'unsupported')

diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
index 279fe5cd3..13d959df4 100755
--- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
+++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
@@ -684,10 +684,15 @@ template<typename DerType> struct NumTraits<AutoDiffScalar<DerType> >
 }
 
 namespace std {
+
 template <typename T>
 class numeric_limits<Eigen::AutoDiffScalar<T> >
   : public numeric_limits<typename T::Scalar> {};
 
+template <typename T>
+class numeric_limits<Eigen::AutoDiffScalar<T&> >
+  : public numeric_limits<typename T::Scalar> {};
+
 }  // namespace std
 
 #endif // EIGEN_AUTODIFF_SCALAR_H
-- 
cgit v1.2.3


From 36e7e7dd8fc995c8a7cb8caa924663a56bc4cc3a Mon Sep 17 00:00:00 2001
From: Rasmus Munk Larsen <rmlarsen@google.com>
Date: Mon, 6 Aug 2018 13:16:32 -0700
Subject: Forward declare NoOpOutputKernel as struct rather than class to be
 consistent with implementation.

---
 unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'unsupported')

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
index ec1dc0fab..0dd524a30 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
@@ -98,7 +98,7 @@ template<typename XprType> class TensorForcedEvalOp;
 template<typename ExpressionType, typename DeviceType> class TensorDevice;
 template<typename Derived, typename Device> struct TensorEvaluator;
 
-class NoOpOutputKernel;
+struct NoOpOutputKernel;
 
 struct DefaultDevice;
 struct ThreadPoolDevice;
-- 
cgit v1.2.3


From d011d05fd6306de11fbc74c5b4fdaa929f2fe3ac Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Mon, 6 Aug 2018 13:40:51 -0700
Subject: Fixed compilation errors.

---
 unsupported/Eigen/CXX11/src/Tensor/TensorBase.h    | 24 +++++++++++-----------
 .../Eigen/CXX11/src/Tensor/TensorConvolution.h     |  4 ++--
 .../Eigen/CXX11/src/Tensor/TensorEvaluator.h       |  4 ++--
 .../Eigen/CXX11/src/Tensor/TensorForcedEval.h      |  4 ++--
 4 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'unsupported')

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
index 97f90f638..ab3731952 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@@ -538,8 +538,8 @@ class TensorBase<Derived, ReadOnlyAccessors>
     // Fourier transforms
     template <int FFTDataType, int FFTDirection, typename FFT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const TensorFFTOp<const FFT, const Derived, FFTDataType, FFTDirection>
-    fft(const FFT& fft) const {
-      return TensorFFTOp<const FFT, const Derived, FFTDataType, FFTDirection>(derived(), fft);
+    fft(const FFT& dims) const {
+      return TensorFFTOp<const FFT, const Derived, FFTDataType, FFTDirection>(derived(), dims);
     }
 
     // Scan.
@@ -723,8 +723,8 @@ class TensorBase<Derived, ReadOnlyAccessors>
 
     template <typename Broadcast> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const TensorBroadcastingOp<const Broadcast, const Derived>
-    broadcast(const Broadcast& broadcast) const {
-      return TensorBroadcastingOp<const Broadcast, const Derived>(derived(), broadcast);
+    broadcast(const Broadcast& bcast) const {
+      return TensorBroadcastingOp<const Broadcast, const Derived>(derived(), bcast);
     }
 
     template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -832,8 +832,8 @@ class TensorBase<Derived, ReadOnlyAccessors>
     }
     template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const TensorShufflingOp<const Shuffle, const Derived>
-    shuffle(const Shuffle& shuffle) const {
-      return TensorShufflingOp<const Shuffle, const Derived>(derived(), shuffle);
+    shuffle(const Shuffle& shfl) const {
+      return TensorShufflingOp<const Shuffle, const Derived>(derived(), shfl);
     }
     template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const TensorStridingOp<const Strides, const Derived>
@@ -1030,13 +1030,13 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> {
 
     template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const TensorShufflingOp<const Shuffle, const Derived>
-    shuffle(const Shuffle& shuffle) const {
-      return TensorShufflingOp<const Shuffle, const Derived>(derived(), shuffle);
+    shuffle(const Shuffle& shfl) const {
+      return TensorShufflingOp<const Shuffle, const Derived>(derived(), shfl);
     }
     template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     TensorShufflingOp<const Shuffle, Derived>
-    shuffle(const Shuffle& shuffle) {
-      return TensorShufflingOp<const Shuffle, Derived>(derived(), shuffle);
+    shuffle(const Shuffle& shfl) {
+      return TensorShufflingOp<const Shuffle, Derived>(derived(), shfl);
     }
 
     template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -1052,8 +1052,8 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> {
 
     // Select the device on which to evaluate the expression.
     template <typename DeviceType>
-    TensorDevice<Derived, DeviceType> device(const DeviceType& device) {
-      return TensorDevice<Derived, DeviceType>(device, derived());
+    TensorDevice<Derived, DeviceType> device(const DeviceType& dev) {
+      return TensorDevice<Derived, DeviceType>(dev, derived());
     }
 
  protected:
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
index 0d3ca966c..e3e650fa5 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
@@ -527,8 +527,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
       Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
       typedef TensorEvalToOp<const KernelArgType> EvalTo;
       EvalTo evalToTmp(local, m_kernelArg);
-      const bool PacketAccess = internal::IsVectorizable<Device, KernelArgType>::value;
-      internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device);
+      const bool Vectorize = internal::IsVectorizable<Device, KernelArgType>::value;
+      internal::TensorExecutor<const EvalTo, Device, Vectorize>::run(evalToTmp, m_device);
 
       m_kernel = local;
       m_local_kernel = true;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index 8f7a81575..028902fea 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -126,7 +126,7 @@ struct TensorEvaluator
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
-      std::vector<internal::TensorOpResourceRequirements>* resources) const {}
+      std::vector<internal::TensorOpResourceRequirements>*) const {}
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(TensorBlock* block) const {
     assert(m_data != NULL);
@@ -255,7 +255,7 @@ struct TensorEvaluator<const Derived, Device>
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
-      std::vector<internal::TensorOpResourceRequirements>* resources) const {}
+      std::vector<internal::TensorOpResourceRequirements>*) const {}
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(TensorBlock* block) const {
     assert(m_data != NULL);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
index a456f308b..2778bf5ec 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
@@ -124,8 +124,8 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
     }
     typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo;
     EvalTo evalToTmp(m_buffer, m_op);
-    const bool PacketAccess = internal::IsVectorizable<Device, const ArgType>::value;
-    internal::TensorExecutor<const EvalTo, typename internal::remove_const<Device>::type, PacketAccess>::run(evalToTmp, m_device);
+    const bool Vectorize = internal::IsVectorizable<Device, const ArgType>::value;
+    internal::TensorExecutor<const EvalTo, typename internal::remove_const<Device>::type, Vectorize>::run(evalToTmp, m_device);
     return true;
   }
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
-- 
cgit v1.2.3


From 10d286f55b30bfcd45be3e83c44edde5ac184270 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Mon, 6 Aug 2018 16:00:29 -0700
Subject: Silenced a couple of compilation warnings.

---
 unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h     | 2 +-
 unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'unsupported')

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
index 84cf6d216..dc8b5ae6d 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
@@ -747,7 +747,7 @@ class TensorBlockMapper {
         // block dimension sizes based on "square" dimension size target.
         const size_t dim_size_target = static_cast<const size_t>(
             std::pow(static_cast<float>(min_target_size),
-                     1.0 / static_cast<float>(block_dim_sizes.rank())));
+                     1.0f / static_cast<float>(block_dim_sizes.rank())));
         for (size_t i = 0; i < block_dim_sizes.rank(); ++i) {
           // TODO(andydavis) Adjust the inner most 'block_dim_size' to make it
           // a multiple of the packet size. Note that reducing
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
index 0fc49255d..e25dd9cf8 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
@@ -61,8 +61,8 @@ class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType>
   typedef typename Eigen::internal::traits<TensorShufflingOp>::StorageKind StorageKind;
   typedef typename Eigen::internal::traits<TensorShufflingOp>::Index Index;
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(const XprType& expr, const Shuffle& shuffle)
-      : m_xpr(expr), m_shuffle(shuffle) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(const XprType& expr, const Shuffle& shfl)
+      : m_xpr(expr), m_shuffle(shfl) {}
 
     EIGEN_DEVICE_FUNC
     const Shuffle& shufflePermutation() const { return m_shuffle; }
-- 
cgit v1.2.3


From 693fb1d41e1b267ae149b8f368fc3008bd59aab9 Mon Sep 17 00:00:00 2001
From: Rasmus Munk Larsen <rmlarsen@google.com>
Date: Tue, 7 Aug 2018 17:18:51 -0700
Subject: Fix init order.

---
 unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'unsupported')

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
index e3e650fa5..a07e32db0 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
@@ -786,7 +786,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
   };
 
   EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const GpuDevice& device)
-      : m_inputImpl(op.inputExpression(), device), m_kernelArg(op.kernelExpression()), m_kernelImpl(op.kernelExpression(), device), m_indices(op.indices()), m_buf(NULL), m_kernel(NULL), m_local_kernel(false), m_device(device)
+      : m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_indices(op.indices()), m_buf(NULL), m_kernel(NULL), m_local_kernel(false), m_device(device)
   {
     EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, GpuDevice>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, GpuDevice>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
 
-- 
cgit v1.2.3


From 532a0be05c0bc0f9a199f97122c88967494dd4ba Mon Sep 17 00:00:00 2001
From: Mehdi Goli <mehdi.goli@codeplay.com>
Date: Wed, 8 Aug 2018 12:12:26 +0100
Subject: Fixing compiler warning in TensorBlock.h as it was creating a lot of
 noise at compilation.

---
 unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'unsupported')

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
index dc8b5ae6d..45ddfdb39 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
@@ -89,7 +89,7 @@ EIGEN_STRONG_INLINE void MergeResourceRequirements(
   // policy if block shapes/sizes conflict).
   *block_shape = resources[0].block_shape;
   *block_total_size = resources[0].block_total_size;
-  for (int i = 1; i < resources.size(); ++i) {
+  for (std::vector<TensorOpResourceRequirements>::size_type i = 1; i < resources.size(); ++i) {
     if (resources[i].block_shape == TensorBlockShapeType::kSkewedInnerDims &&
         *block_shape != TensorBlockShapeType::kSkewedInnerDims) {
       *block_shape = TensorBlockShapeType::kSkewedInnerDims;
-- 
cgit v1.2.3