Merged in benoitsteiner/opencl (pull request PR-318)

Improved support for OpenCL
author: Benoit Steiner <benoit.steiner.goog@gmail.com> 2017-06-13 05:01:10 +0000
committer: Benoit Steiner <benoit.steiner.goog@gmail.com> 2017-06-13 05:01:10 +0000
commit: b8e805497e446e7159f231238b4a8fd22fe70749 (patch)
tree: bd7921a8823d5377fa16ba6be6795dbd63ae80be /unsupported
parent: 9fbdf020597cd198e3686ca786172aec6f009db6 (diff)
parent: 1e736b9ead34533952f29258af8deead38e68242 (diff)
4 files changed, 70 insertions, 47 deletions
diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor
index 39916092b..5d71a9c25 100644
--- a/unsupported/Eigen/CXX11/Tensor
+++ b/unsupported/Eigen/CXX11/Tensor
@@ -19,7 +19,7 @@
 #undef isnan
 #undef isinf
 #undef isfinite
-#include <SYCL/sycl.hpp>
+#include <CL/sycl.hpp>
 #include <iostream>
 #include <map>
 #include <memory>
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h
index 5b4c3c5bd..e6840bc87 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h
@@ -195,7 +195,7 @@ typename HostExpr::Index LocalThreadSizeM, typename HostExpr::Index LocalThreadS
     m_j_strides(m_j_strides_),  m_right_nocontract_strides(m_right_nocontract_strides_),
     left_tuple_of_accessors(left_tuple_of_accessors_), right_tuple_of_accessors(right_tuple_of_accessors_), dev(dev_){}
 
-    void operator()(cl::sycl::nd_item<1> itemID) {
+    void operator()(cl::sycl::nd_item<2> itemID) {
       typedef typename Eigen::TensorSycl::internal::ConvertToDeviceExpression<HostExpr>::Type DevExpr;
       typedef typename Eigen::TensorSycl::internal::ConvertToDeviceExpression<LHSHostExpr>::Type LHSDevExpr;
       typedef typename Eigen::TensorSycl::internal::ConvertToDeviceExpression<RHSHostExpr>::Type RHSDevExpr;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
index c5142b7c9..2b5749f55 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
@@ -14,7 +14,23 @@
 
 #if defined(EIGEN_USE_SYCL) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H)
 #define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H
+template<size_t Align> struct CheckAlignStatically {
+  static const bool Val= (((Align&(Align-1))==0) && (Align >= sizeof(void *)));
+};
+template <bool IsAligned, size_t Align>
+struct Conditional_Allocate {
+
+  EIGEN_ALWAYS_INLINE static void* conditional_allocate(std::size_t elements) {
+    return aligned_alloc(Align, elements);
+  }
+};
+template <size_t Align>
+struct Conditional_Allocate<false, Align> {
 
+  EIGEN_ALWAYS_INLINE static void* conditional_allocate(std::size_t elements){
+    return malloc(elements);
+  }
+};
 template <typename Scalar, size_t Align = EIGEN_MAX_ALIGN_BYTES, class Allocator = std::allocator<Scalar>>
 struct SyclAllocator {
   typedef Scalar value_type;
@@ -22,7 +38,9 @@ struct SyclAllocator {
   typedef typename std::allocator_traits<Allocator>::size_type size_type;
 
   SyclAllocator( ){};
-  Scalar* allocate(std::size_t elements) { return static_cast<Scalar*>(aligned_alloc(Align, elements)); }
+  Scalar* allocate(std::size_t elements) {
+    return static_cast<Scalar*>(Conditional_Allocate<CheckAlignStatically<Align>::Val, Align>::conditional_allocate(elements));
+  }
   void deallocate(Scalar * p, std::size_t size) { EIGEN_UNUSED_VARIABLE(size); free(p); }
 };
 
@@ -81,28 +99,26 @@ struct memsetCghFunctor{
   }
 };
 
-  //get_devices returns all the available opencl devices. Either use device_selector or exclude devices that computecpp does not support (AMD OpenCL for CPU  and intel GPU)
+//get_devices returns all the available opencl devices. Either use device_selector or exclude devices that computecpp does not support (AMD OpenCL for CPU  and intel GPU)
 EIGEN_STRONG_INLINE auto get_sycl_supported_devices()->decltype(cl::sycl::device::get_devices()){
-  auto devices = cl::sycl::device::get_devices();
-  std::vector<cl::sycl::device>::iterator it =devices.begin();
-  while(it!=devices.end()) {
-    ///FIXME: Currently there is a bug in amd cpu OpenCL
-    auto name = (*it).template get_info<cl::sycl::info::device::name>();
-    std::transform(name.begin(), name.end(), name.begin(), ::tolower);
-    auto vendor = (*it).template get_info<cl::sycl::info::device::vendor>();
+std::vector<cl::sycl::device> supported_devices;
+auto plafrom_list =cl::sycl::platform::get_platforms();
+for(const auto& platform : plafrom_list){
+  auto device_list = platform.get_devices();
+  auto platform_name =platform.template get_info<cl::sycl::info::platform::name>();
+  std::transform(platform_name.begin(), platform_name.end(), platform_name.begin(), ::tolower);
+  for(const auto& device : device_list){
+    auto vendor = device.template get_info<cl::sycl::info::device::vendor>();
     std::transform(vendor.begin(), vendor.end(), vendor.begin(), ::tolower);
-
-    if((*it).is_cpu() && vendor.find("amd")!=std::string::npos && vendor.find("apu") == std::string::npos){ // remove amd cpu as it is not supported by computecpp allow APUs
-      it = devices.erase(it);
-      //FIXME: currently there is a bug in intel gpu driver regarding memory allignment issue.
-    }else if((*it).is_gpu() && name.find("intel")!=std::string::npos){
-      it = devices.erase(it);
-    }
-    else{
-      ++it;
+    bool unsuported_condition = (device.is_cpu() && platform_name.find("amd")!=std::string::npos && vendor.find("apu") == std::string::npos) ||
+    (device.is_gpu() && platform_name.find("intel")!=std::string::npos);
+    if(!unsuported_condition){
+      std::cout << "Platform name "<< platform_name << std::endl;
+        supported_devices.push_back(device);
     }
   }
-  return devices;
+}
+return supported_devices;
 }
 
 class QueueInterface {
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index cdf151f15..e639e7056 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -152,33 +152,40 @@ endif()
 
 if(EIGEN_TEST_CXX11)
   if(EIGEN_TEST_SYCL)
-    ei_add_test_sycl(cxx11_tensor_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_forced_eval_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_broadcast_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_device_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_reduction_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_morphing_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_shuffling_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_padding_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_builtins_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_contract_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_concatenation_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_reverse_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_convolution_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_striding_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_chipping_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_layout_swap_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_inflation_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_generator_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_patch_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_image_patch_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_volume_patcP_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_argmax_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_custom_op_sycl "-std=c++11")
+    if(EIGEN_SYCL_TRISYCL)
+      set(CMAKE_CXX_STANDARD 14)
+      set(STD_CXX_FLAG "-std=c++1z")
+    else(EIGEN_SYCL_TRISYCL)
+      # It should be safe to always run these tests as there is some fallback code for
+      # older compiler that don't support cxx11.
+      set(CMAKE_CXX_STANDARD 11)
+      set(STD_CXX_FLAG "-std=c++11")
+    endif(EIGEN_SYCL_TRISYCL)
+
+    ei_add_test_sycl(cxx11_tensor_sycl ${STD_CXX_FLAG})
+    ei_add_test_sycl(cxx11_tensor_forced_eval_sycl ${STD_CXX_FLAG})
+    ei_add_test_sycl(cxx11_tensor_broadcast_sycl ${STD_CXX_FLAG})
+    ei_add_test_sycl(cxx11_tensor_device_sycl ${STD_CXX_FLAG})
+    ei_add_test_sycl(cxx11_tensor_reduction_sycl ${STD_CXX_FLAG})
+    ei_add_test_sycl(cxx11_tensor_morphing_sycl ${STD_CXX_FLAG})
+    ei_add_test_sycl(cxx11_tensor_shuffling_sycl ${STD_CXX_FLAG})
+    ei_add_test_sycl(cxx11_tensor_padding_sycl ${STD_CXX_FLAG})
+    ei_add_test_sycl(cxx11_tensor_builtins_sycl ${STD_CXX_FLAG})
+    ei_add_test_sycl(cxx11_tensor_contract_sycl ${STD_CXX_FLAG})
+    ei_add_test_sycl(cxx11_tensor_concatenation_sycl ${STD_CXX_FLAG})
+    ei_add_test_sycl(cxx11_tensor_reverse_sycl ${STD_CXX_FLAG})
+    ei_add_test_sycl(cxx11_tensor_convolution_sycl ${STD_CXX_FLAG})
+    ei_add_test_sycl(cxx11_tensor_striding_sycl ${STD_CXX_FLAG})
+    ei_add_test_sycl(cxx11_tensor_chipping_sycl ${STD_CXX_FLAG})
+    ei_add_test_sycl(cxx11_tensor_layout_swap_sycl ${STD_CXX_FLAG})
+    ei_add_test_sycl(cxx11_tensor_inflation_sycl ${STD_CXX_FLAG})
+    ei_add_test_sycl(cxx11_tensor_generator_sycl ${STD_CXX_FLAG})
+    ei_add_test_sycl(cxx11_tensor_patch_sycl ${STD_CXX_FLAG})
+    ei_add_test_sycl(cxx11_tensor_image_patch_sycl ${STD_CXX_FLAG})
+    ei_add_test_sycl(cxx11_tensor_volume_patch_sycl ${STD_CXX_FLAG})
+    ei_add_test_sycl(cxx11_tensor_argmax_sycl ${STD_CXX_FLAG})
+    ei_add_test_sycl(cxx11_tensor_custom_op_sycl ${STD_CXX_FLAG})
   endif(EIGEN_TEST_SYCL)
-  # It should be safe to always run these tests as there is some fallback code for
-  # older compiler that don't support cxx11.
-  set(CMAKE_CXX_STANDARD 11)
 
   ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
   ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
author	Benoit Steiner <benoit.steiner.goog@gmail.com>	2017-06-13 05:01:10 +0000
committer	Benoit Steiner <benoit.steiner.goog@gmail.com>	2017-06-13 05:01:10 +0000
commit	b8e805497e446e7159f231238b4a8fd22fe70749 (patch)
tree	bd7921a8823d5377fa16ba6be6795dbd63ae80be /unsupported
parent	9fbdf020597cd198e3686ca786172aec6f009db6 (diff)
parent	1e736b9ead34533952f29258af8deead38e68242 (diff)