aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/doc
diff options
context:
space:
mode:
authorGravatar Mehdi Goli <mehdi.goli@codeplay.com>2019-11-28 10:08:54 +0000
committerGravatar Mehdi Goli <mehdi.goli@codeplay.com>2019-11-28 10:08:54 +0000
commit00f32752f7d0b193c6788691c3cf0b76457a044d (patch)
tree792e46110f0751ea8802fa9d403d1472d5977ac3 /unsupported/doc
parentea51a9eace7e4f0ea839e61eb2df85ccfb94aee8 (diff)
[SYCL] Rebasing the SYCL support branch on top of the Einge upstream master branch.
* Unifying all loadLocalTile from lhs and rhs to an extract_block function. * Adding get_tensor operation which was missing in TensorContractionMapper. * Adding the -D method missing from cmake for Disable_Skinny Contraction operation. * Wrapping all the indices in TensorScanSycl into Scan parameter struct. * Fixing typo in Device SYCL * Unifying load to private register for tall/skinny no shared * Unifying load to vector tile for tensor-vector/vector-tensor operation * Removing all the LHS/RHS class for extracting data from global * Removing Outputfunction from TensorContractionSkinnyNoshared. * Combining the local memory version of tall/skinny and normal tensor contraction into one kernel. * Combining the no-local memory version of tall/skinny and normal tensor contraction into one kernel. * Combining General Tensor-Vector and VectorTensor contraction into one kernel. * Making double buffering optional for Tensor contraction when local memory is version is used. * Modifying benchmark to accept custom Reduction Sizes * Disabling AVX optimization for SYCL backend on the host to allow SSE optimization to the host * Adding Test for SYCL * Modifying SYCL CMake
Diffstat (limited to 'unsupported/doc')
-rw-r--r--unsupported/doc/Overview.dox3
-rw-r--r--unsupported/doc/SYCL.dox9
-rw-r--r--unsupported/doc/examples/CMakeLists.txt4
-rw-r--r--unsupported/doc/examples/SYCL/CMakeLists.txt38
-rw-r--r--unsupported/doc/examples/SYCL/CwiseMul.cpp63
5 files changed, 117 insertions, 0 deletions
diff --git a/unsupported/doc/Overview.dox b/unsupported/doc/Overview.dox
index 45464a545..bae51dcf6 100644
--- a/unsupported/doc/Overview.dox
+++ b/unsupported/doc/Overview.dox
@@ -11,6 +11,8 @@ Click on the \e Modules tab at the top of this page to get a list of all unsuppo
Don't miss the <a href="../index.html">official Eigen documentation</a>.
+ \subpage SYCL_EIGEN "SYCL backend for Eigen"
+
*/
/*
@@ -26,3 +28,4 @@ subject to be included in %Eigen in the future.
/// \internal \brief Namespace containing low-level routines from the %Eigen library.
namespace internal {}
}
+
diff --git a/unsupported/doc/SYCL.dox b/unsupported/doc/SYCL.dox
new file mode 100644
index 000000000..2295adf21
--- /dev/null
+++ b/unsupported/doc/SYCL.dox
@@ -0,0 +1,9 @@
+/** \page SYCL_EIGEN Eigen SYCL Backend
+
+Useful information for Eigen SYCL Backend:
+
+- <a href="https://developer.codeplay.com/computecppce/latest/getting-started-with-eigen">Getting Started with Eigen</a>
+
+- <a href="https://developer.codeplay.com/computecppce/latest/options-for-building-eigen-sycl">Options for Building Eigen SYCL</a>
+
+*/
diff --git a/unsupported/doc/examples/CMakeLists.txt b/unsupported/doc/examples/CMakeLists.txt
index bee2b8ad4..7bb67736c 100644
--- a/unsupported/doc/examples/CMakeLists.txt
+++ b/unsupported/doc/examples/CMakeLists.txt
@@ -18,3 +18,7 @@ foreach(example_src ${examples_SRCS})
)
add_dependencies(unsupported_examples example_${example})
endforeach(example_src)
+
+if(EIGEN_TEST_SYCL)
+ add_subdirectory(SYCL)
+endif(EIGEN_TEST_SYCL)
diff --git a/unsupported/doc/examples/SYCL/CMakeLists.txt b/unsupported/doc/examples/SYCL/CMakeLists.txt
new file mode 100644
index 000000000..bef4f1925
--- /dev/null
+++ b/unsupported/doc/examples/SYCL/CMakeLists.txt
@@ -0,0 +1,38 @@
+FILE(GLOB examples_SRCS "*.cpp")
+
+set(EIGEN_SYCL ON)
+list(APPEND CMAKE_EXE_LINKER_FLAGS -pthread)
+if(EIGEN_SYCL_TRISYCL)
+ set(CMAKE_CXX_STANDARD 14)
+ set(STD_CXX_FLAG "-std=c++1z")
+else(EIGEN_SYCL_TRISYCL)
+ if(MSVC)
+ # Set the host and device compilers C++ standard to C++14. On Windows setting this to C++11
+ # can cause issues with the ComputeCpp device compiler parsing Visual Studio Headers.
+ set(CMAKE_CXX_STANDARD 14)
+ list(APPEND COMPUTECPP_USER_FLAGS -DWIN32)
+ else()
+ set(CMAKE_CXX_STANDARD 11)
+ list(APPEND COMPUTECPP_USER_FLAGS -Wall)
+ endif()
+ # The following flags are not supported by Clang and can cause warnings
+ # if used with -Werror so they are removed here.
+ if(COMPUTECPP_USE_COMPILER_DRIVER)
+ set(CMAKE_CXX_COMPILER ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE})
+ string(REPLACE "-Wlogical-op" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
+ string(REPLACE "-Wno-psabi" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
+ string(REPLACE "-ansi" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
+ endif()
+ list(APPEND COMPUTECPP_USER_FLAGS
+ -DEIGEN_NO_ASSERTION_CHECKING=1
+ -no-serial-memop
+ -Xclang
+ -cl-mad-enable)
+endif(EIGEN_SYCL_TRISYCL)
+
+FOREACH(example_src ${examples_SRCS})
+ GET_FILENAME_COMPONENT(example ${example_src} NAME_WE)
+ ei_add_test_internal(${example} example_${example})
+ ADD_DEPENDENCIES(unsupported_examples example_${example})
+ENDFOREACH(example_src)
+set(EIGEN_SYCL OFF)
diff --git a/unsupported/doc/examples/SYCL/CwiseMul.cpp b/unsupported/doc/examples/SYCL/CwiseMul.cpp
new file mode 100644
index 000000000..31eb104c6
--- /dev/null
+++ b/unsupported/doc/examples/SYCL/CwiseMul.cpp
@@ -0,0 +1,63 @@
+#include <iostream>
+#define EIGEN_USE_SYCL
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::array;
+using Eigen::SyclDevice;
+using Eigen::Tensor;
+using Eigen::TensorMap;
+
+int main()
+{
+ using DataType = float;
+ using IndexType = int64_t;
+ constexpr auto DataLayout = Eigen::RowMajor;
+
+ auto devices = Eigen::get_sycl_supported_devices();
+ const auto device_selector = *devices.begin();
+ Eigen::QueueInterface queueInterface(device_selector);
+ auto sycl_device = Eigen::SyclDevice(&queueInterface);
+
+ // create the tensors to be used in the operation
+ IndexType sizeDim1 = 3;
+ IndexType sizeDim2 = 3;
+ IndexType sizeDim3 = 3;
+ array<IndexType, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
+
+ // initialize the tensors with the data we want manipulate to
+ Tensor<DataType, 3,DataLayout, IndexType> in1(tensorRange);
+ Tensor<DataType, 3,DataLayout, IndexType> in2(tensorRange);
+ Tensor<DataType, 3,DataLayout, IndexType> out(tensorRange);
+
+ // set up some random data in the tensors to be multiplied
+ in1 = in1.random();
+ in2 = in2.random();
+
+ // allocate memory for the tensors
+ DataType * gpu_in1_data = static_cast<DataType*>(sycl_device.allocate(in1.size()*sizeof(DataType)));
+ DataType * gpu_in2_data = static_cast<DataType*>(sycl_device.allocate(in2.size()*sizeof(DataType)));
+ DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(out.size()*sizeof(DataType)));
+
+ //
+ TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu_in1(gpu_in1_data, tensorRange);
+ TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu_in2(gpu_in2_data, tensorRange);
+ TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu_out(gpu_out_data, tensorRange);
+
+ // copy the memory to the device and do the c=a*b calculation
+ sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.size())*sizeof(DataType));
+ sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in2.size())*sizeof(DataType));
+ gpu_out.device(sycl_device) = gpu_in1 * gpu_in2;
+ sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.size())*sizeof(DataType));
+ sycl_device.synchronize();
+
+ // print out the results
+ for (IndexType i = 0; i < sizeDim1; ++i) {
+ for (IndexType j = 0; j < sizeDim2; ++j) {
+ for (IndexType k = 0; k < sizeDim3; ++k) {
+ std::cout << "device_out" << "(" << i << ", " << j << ", " << k << ") : " << out(i,j,k)
+ << " vs host_out" << "(" << i << ", " << j << ", " << k << ") : " << in1(i,j,k) * in2(i,j,k) << "\n";
+ }
+ }
+ }
+ printf("c=a*b Done\n");
+} \ No newline at end of file