From 00f32752f7d0b193c6788691c3cf0b76457a044d Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Thu, 28 Nov 2019 10:08:54 +0000 Subject: [SYCL] Rebasing the SYCL support branch on top of the Einge upstream master branch. * Unifying all loadLocalTile from lhs and rhs to an extract_block function. * Adding get_tensor operation which was missing in TensorContractionMapper. * Adding the -D method missing from cmake for Disable_Skinny Contraction operation. * Wrapping all the indices in TensorScanSycl into Scan parameter struct. * Fixing typo in Device SYCL * Unifying load to private register for tall/skinny no shared * Unifying load to vector tile for tensor-vector/vector-tensor operation * Removing all the LHS/RHS class for extracting data from global * Removing Outputfunction from TensorContractionSkinnyNoshared. * Combining the local memory version of tall/skinny and normal tensor contraction into one kernel. * Combining the no-local memory version of tall/skinny and normal tensor contraction into one kernel. * Combining General Tensor-Vector and VectorTensor contraction into one kernel. * Making double buffering optional for Tensor contraction when local memory is version is used. * Modifying benchmark to accept custom Reduction Sizes * Disabling AVX optimization for SYCL backend on the host to allow SSE optimization to the host * Adding Test for SYCL * Modifying SYCL CMake --- unsupported/doc/Overview.dox | 3 ++ unsupported/doc/SYCL.dox | 9 ++++ unsupported/doc/examples/CMakeLists.txt | 4 ++ unsupported/doc/examples/SYCL/CMakeLists.txt | 38 +++++++++++++++++ unsupported/doc/examples/SYCL/CwiseMul.cpp | 63 ++++++++++++++++++++++++++++ 5 files changed, 117 insertions(+) create mode 100644 unsupported/doc/SYCL.dox create mode 100644 unsupported/doc/examples/SYCL/CMakeLists.txt create mode 100644 unsupported/doc/examples/SYCL/CwiseMul.cpp (limited to 'unsupported/doc') diff --git a/unsupported/doc/Overview.dox b/unsupported/doc/Overview.dox index 45464a545..bae51dcf6 100644 --- a/unsupported/doc/Overview.dox +++ b/unsupported/doc/Overview.dox @@ -11,6 +11,8 @@ Click on the \e Modules tab at the top of this page to get a list of all unsuppo Don't miss the official Eigen documentation. + \subpage SYCL_EIGEN "SYCL backend for Eigen" + */ /* @@ -26,3 +28,4 @@ subject to be included in %Eigen in the future. /// \internal \brief Namespace containing low-level routines from the %Eigen library. namespace internal {} } + diff --git a/unsupported/doc/SYCL.dox b/unsupported/doc/SYCL.dox new file mode 100644 index 000000000..2295adf21 --- /dev/null +++ b/unsupported/doc/SYCL.dox @@ -0,0 +1,9 @@ +/** \page SYCL_EIGEN Eigen SYCL Backend + +Useful information for Eigen SYCL Backend: + +- Getting Started with Eigen + +- Options for Building Eigen SYCL + +*/ diff --git a/unsupported/doc/examples/CMakeLists.txt b/unsupported/doc/examples/CMakeLists.txt index bee2b8ad4..7bb67736c 100644 --- a/unsupported/doc/examples/CMakeLists.txt +++ b/unsupported/doc/examples/CMakeLists.txt @@ -18,3 +18,7 @@ foreach(example_src ${examples_SRCS}) ) add_dependencies(unsupported_examples example_${example}) endforeach(example_src) + +if(EIGEN_TEST_SYCL) + add_subdirectory(SYCL) +endif(EIGEN_TEST_SYCL) diff --git a/unsupported/doc/examples/SYCL/CMakeLists.txt b/unsupported/doc/examples/SYCL/CMakeLists.txt new file mode 100644 index 000000000..bef4f1925 --- /dev/null +++ b/unsupported/doc/examples/SYCL/CMakeLists.txt @@ -0,0 +1,38 @@ +FILE(GLOB examples_SRCS "*.cpp") + +set(EIGEN_SYCL ON) +list(APPEND CMAKE_EXE_LINKER_FLAGS -pthread) +if(EIGEN_SYCL_TRISYCL) + set(CMAKE_CXX_STANDARD 14) + set(STD_CXX_FLAG "-std=c++1z") +else(EIGEN_SYCL_TRISYCL) + if(MSVC) + # Set the host and device compilers C++ standard to C++14. On Windows setting this to C++11 + # can cause issues with the ComputeCpp device compiler parsing Visual Studio Headers. + set(CMAKE_CXX_STANDARD 14) + list(APPEND COMPUTECPP_USER_FLAGS -DWIN32) + else() + set(CMAKE_CXX_STANDARD 11) + list(APPEND COMPUTECPP_USER_FLAGS -Wall) + endif() + # The following flags are not supported by Clang and can cause warnings + # if used with -Werror so they are removed here. + if(COMPUTECPP_USE_COMPILER_DRIVER) + set(CMAKE_CXX_COMPILER ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE}) + string(REPLACE "-Wlogical-op" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) + string(REPLACE "-Wno-psabi" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) + string(REPLACE "-ansi" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) + endif() + list(APPEND COMPUTECPP_USER_FLAGS + -DEIGEN_NO_ASSERTION_CHECKING=1 + -no-serial-memop + -Xclang + -cl-mad-enable) +endif(EIGEN_SYCL_TRISYCL) + +FOREACH(example_src ${examples_SRCS}) + GET_FILENAME_COMPONENT(example ${example_src} NAME_WE) + ei_add_test_internal(${example} example_${example}) + ADD_DEPENDENCIES(unsupported_examples example_${example}) +ENDFOREACH(example_src) +set(EIGEN_SYCL OFF) diff --git a/unsupported/doc/examples/SYCL/CwiseMul.cpp b/unsupported/doc/examples/SYCL/CwiseMul.cpp new file mode 100644 index 000000000..31eb104c6 --- /dev/null +++ b/unsupported/doc/examples/SYCL/CwiseMul.cpp @@ -0,0 +1,63 @@ +#include +#define EIGEN_USE_SYCL +#include + +using Eigen::array; +using Eigen::SyclDevice; +using Eigen::Tensor; +using Eigen::TensorMap; + +int main() +{ + using DataType = float; + using IndexType = int64_t; + constexpr auto DataLayout = Eigen::RowMajor; + + auto devices = Eigen::get_sycl_supported_devices(); + const auto device_selector = *devices.begin(); + Eigen::QueueInterface queueInterface(device_selector); + auto sycl_device = Eigen::SyclDevice(&queueInterface); + + // create the tensors to be used in the operation + IndexType sizeDim1 = 3; + IndexType sizeDim2 = 3; + IndexType sizeDim3 = 3; + array tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; + + // initialize the tensors with the data we want manipulate to + Tensor in1(tensorRange); + Tensor in2(tensorRange); + Tensor out(tensorRange); + + // set up some random data in the tensors to be multiplied + in1 = in1.random(); + in2 = in2.random(); + + // allocate memory for the tensors + DataType * gpu_in1_data = static_cast(sycl_device.allocate(in1.size()*sizeof(DataType))); + DataType * gpu_in2_data = static_cast(sycl_device.allocate(in2.size()*sizeof(DataType))); + DataType * gpu_out_data = static_cast(sycl_device.allocate(out.size()*sizeof(DataType))); + + // + TensorMap> gpu_in1(gpu_in1_data, tensorRange); + TensorMap> gpu_in2(gpu_in2_data, tensorRange); + TensorMap> gpu_out(gpu_out_data, tensorRange); + + // copy the memory to the device and do the c=a*b calculation + sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.size())*sizeof(DataType)); + sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in2.size())*sizeof(DataType)); + gpu_out.device(sycl_device) = gpu_in1 * gpu_in2; + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.size())*sizeof(DataType)); + sycl_device.synchronize(); + + // print out the results + for (IndexType i = 0; i < sizeDim1; ++i) { + for (IndexType j = 0; j < sizeDim2; ++j) { + for (IndexType k = 0; k < sizeDim3; ++k) { + std::cout << "device_out" << "(" << i << ", " << j << ", " << k << ") : " << out(i,j,k) + << " vs host_out" << "(" << i << ", " << j << ", " << k << ") : " << in1(i,j,k) * in2(i,j,k) << "\n"; + } + } + } + printf("c=a*b Done\n"); +} \ No newline at end of file -- cgit v1.2.3