From f4404777ff4b8f8ed70a479276c657cb6062465d Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Tue, 2 Aug 2016 17:08:57 +0000 Subject: Change project name to Eigen3, to be compatible with FindEigen3.cmake and Eigen3Config.cmake. This is related to pull-requests 214. --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'CMakeLists.txt') diff --git a/CMakeLists.txt b/CMakeLists.txt index abae3b23c..b4d28125a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -project(Eigen) +project(Eigen3) cmake_minimum_required(VERSION 2.8.5) -- cgit v1.2.3 From fe4b927e9c8e796a07c5864e58630e888979519e Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Fri, 5 Aug 2016 15:21:14 +0200 Subject: Add aliases Eigen_*_DIR to Eigen3_*_DIR This is to make configuring work again after project was renamed from Eigen to Eigen3 --- CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'CMakeLists.txt') diff --git a/CMakeLists.txt b/CMakeLists.txt index b4d28125a..76ec09ea0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,11 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) message(FATAL_ERROR "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there. You may need to remove CMakeCache.txt. ") endif() +# Alias Eigen_*_DIR to Eigen3_*_DIR: + +set(Eigen_SOURCE_DIR ${Eigen3_SOURCE_DIR}) +set(Eigen_BINARY_DIR ${Eigen3_BINARY_DIR}) + # guard against bad build-type strings if (NOT CMAKE_BUILD_TYPE) -- cgit v1.2.3 From cb81975714a96ecb2faf33ca242feeee3543b1db Mon Sep 17 00:00:00 2001 From: Luke Iwanski Date: Mon, 19 Sep 2016 12:44:13 +0100 Subject: Partial OpenCL support via SYCL compatible with ComputeCpp CE. --- CMakeLists.txt | 7 + Eigen/Core | 35 +- bench/tensors/README | 8 +- bench/tensors/tensor_benchmarks_sycl.cc | 37 ++ cmake/EigenTesting.cmake | 135 ++++++ cmake/FindComputeCpp.cmake | 228 ++++++++++ unsupported/Eigen/CXX11/Tensor | 2 + unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h | 5 + unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 4 +- .../Eigen/CXX11/src/Tensor/TensorBroadcasting.h | 7 +- .../Eigen/CXX11/src/Tensor/TensorDeviceSycl.h | 122 +++++ unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h | 56 ++- .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 58 ++- .../Eigen/CXX11/src/Tensor/TensorExecutor.h | 14 + .../Eigen/CXX11/src/Tensor/TensorFixedSize.h | 4 +- .../Eigen/CXX11/src/Tensor/TensorForcedEval.h | 49 +- .../CXX11/src/Tensor/TensorForwardDeclarations.h | 19 +- unsupported/Eigen/CXX11/src/Tensor/TensorMap.h | 20 +- unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h | 62 +++ .../Tensor/TensorSyclConvertToDeviceExpression.h | 238 ++++++++++ .../CXX11/src/Tensor/TensorSyclExprConstructor.h | 495 +++++++++++++++++++++ .../CXX11/src/Tensor/TensorSyclExtractAccessor.h | 466 +++++++++++++++++++ .../CXX11/src/Tensor/TensorSyclExtractFunctors.h | 313 +++++++++++++ .../Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h | 188 ++++++++ .../Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h | 151 +++++++ .../CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h | 293 ++++++++++++ unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h | 84 ++++ .../Eigen/CXX11/src/Tensor/TensorSyclTuple.h | 264 +++++++++++ unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h | 11 +- unsupported/test/CMakeLists.txt | 7 + unsupported/test/cxx11_tensor_sycl.cpp | 157 +++++++ unsupported/test/cxx11_tensor_sycl_broadcast.cpp | 76 ++++ unsupported/test/cxx11_tensor_sycl_device.cpp | 37 ++ unsupported/test/cxx11_tensor_sycl_forced_eval.cpp | 64 +++ 34 files changed, 3652 insertions(+), 64 deletions(-) create mode 100644 bench/tensors/tensor_benchmarks_sycl.cc create mode 100644 cmake/FindComputeCpp.cmake create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h create mode 100644 unsupported/test/cxx11_tensor_sycl.cpp create mode 100644 unsupported/test/cxx11_tensor_sycl_broadcast.cpp create mode 100644 unsupported/test/cxx11_tensor_sycl_device.cpp create mode 100644 unsupported/test/cxx11_tensor_sycl_forced_eval.cpp (limited to 'CMakeLists.txt') diff --git a/CMakeLists.txt b/CMakeLists.txt index 76ec09ea0..812997a29 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -429,6 +429,13 @@ else() add_subdirectory(lapack EXCLUDE_FROM_ALL) endif() +# add SYCL +option(EIGEN_TEST_SYCL "Add Sycl support." OFF) +if(EIGEN_TEST_SYCL) + set (CMAKE_MODULE_PATH "${CMAKE_ROOT}/Modules" "cmake/Modules/" "${CMAKE_MODULE_PATH}") + include(FindComputeCpp) +endif() + add_subdirectory(unsupported) add_subdirectory(demos EXCLUDE_FROM_ALL) diff --git a/Eigen/Core b/Eigen/Core index 946ed0677..3d2152acf 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -51,7 +51,40 @@ #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC; #endif -#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) +#ifdef EIGEN_USE_SYCL +#undef min +#undef max +#undef isnan +#undef isinf +#undef isfinite +#include +#endif + +// We need these predefines to determine if asserts need to be disabled for the device compiler +#if defined(__SYCL_DEVICE_ONLY__) + // Do not try asserts on SYCL! + #ifndef EIGEN_NO_DEBUG + #define EIGEN_NO_DEBUG + #endif + + #ifdef EIGEN_INTERNAL_DEBUGGING + #undef EIGEN_INTERNAL_DEBUGGING + #endif + + // Do not try to vectorize on SYCL! + #ifndef EIGEN_DONT_VECTORIZE + #define EIGEN_DONT_VECTORIZE + #endif + + #ifdef EIGEN_EXCEPTIONS + #undef EIGEN_EXCEPTIONS + #endif + + #define EIGEN_DEVICE_FUNC + +#endif + +#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL) #define EIGEN_EXCEPTIONS #endif diff --git a/bench/tensors/README b/bench/tensors/README index 803cb8ef8..3a5fdbe17 100644 --- a/bench/tensors/README +++ b/bench/tensors/README @@ -11,5 +11,11 @@ nvcc tensor_benchmarks_gpu.cu benchmark_main.cc -I ../../ -std=c++11 -O2 -DNDEBU We also provide a version of the generic GPU tensor benchmarks that uses half floats (aka fp16) instead of regular floats. To compile these benchmarks, simply call the command line below. You'll need a recent GPU that supports compute capability 5.3 or higher to run them and nvcc 7.5 or higher to compile the code. nvcc tensor_benchmarks_fp16_gpu.cu benchmark_main.cc -I ../../ -std=c++11 -O2 -DNDEBUG -use_fast_math -ftz=true -arch compute_53 -o benchmarks_fp16_gpu -last but not least, we also provide a suite of benchmarks to measure the scalability of the contraction code on CPU. To compile these benchmarks, call +last but not least, we also provide a suite of benchmarks to measure the scalability of the contraction code on CPU. To compile these benchmarks, call g++ contraction_benchmarks_cpu.cc benchmark_main.cc -I ../../ -std=c++11 -O3 -DNDEBUG -pthread -mavx -o benchmarks_cpu + +To compile the benchmark for SYCL, using ComputeCpp you currently need 2 passes (only for translation units containing device code): +1. The device compilation pass that generates the device code (SYCL kernels and referenced device functions) and glue code needed by the host compiler to reference the device code from host code. +{ComputeCpp_ROOT}/bin/compute++ -I ../../ -I {ComputeCpp_ROOT}/include/ -std=c++11 -mllvm -inline-threshold=1000 -Wno-ignored-attributes -sycl -intelspirmetadata -emit-llvm -no-serial-memop -sycl-compress-name -DBUILD_PLATFORM_SPIR -DNDBUG -O3 -c tensor_benchmarks_sycl.cc +2. The host compilation pass that generates the final host binary. +clang++-3.7 -include tensor_benchmarks_sycl.sycl benchmark_main.cc tensor_benchmarks_sycl.cc -pthread -I ../../ -I {ComputeCpp_ROOT}/include/ -L {ComputeCpp_ROOT}/lib/ -lComputeCpp -lOpenCL -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++11 -o tensor_benchmark_sycl diff --git a/bench/tensors/tensor_benchmarks_sycl.cc b/bench/tensors/tensor_benchmarks_sycl.cc new file mode 100644 index 000000000..7eca4d966 --- /dev/null +++ b/bench/tensors/tensor_benchmarks_sycl.cc @@ -0,0 +1,37 @@ +#define EIGEN_USE_SYCL + +#include +#include + +#include "tensor_benchmarks.h" + +using Eigen::array; +using Eigen::SyclDevice; +using Eigen::Tensor; +using Eigen::TensorMap; +// Simple functions +template +cl::sycl::queue sycl_queue() { + return cl::sycl::queue(device_selector(), [=](cl::sycl::exception_list l) { + for (const auto& e : l) { + try { + std::rethrow_exception(e); + } catch (cl::sycl::exception e) { + std::cout << e.what() << std::endl; + } + } + }); +} + +#define BM_FuncGPU(FUNC) \ + static void BM_##FUNC(int iters, int N) { \ + StopBenchmarkTiming(); \ + cl::sycl::queue q = sycl_queue(); \ + Eigen::SyclDevice device(q); \ + BenchmarkSuite suite(device, N); \ + suite.FUNC(iters); \ + } \ + BENCHMARK_RANGE(BM_##FUNC, 10, 5000); + +BM_FuncGPU(broadcasting); +BM_FuncGPU(coeffWiseOp); diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake index 57866d865..602ab5271 100644 --- a/cmake/EigenTesting.cmake +++ b/cmake/EigenTesting.cmake @@ -109,6 +109,103 @@ macro(ei_add_test_internal testname testname_with_suffix) endmacro(ei_add_test_internal) +# SYCL +macro(ei_add_test_internal_sycl testname testname_with_suffix) + include_directories( SYSTEM ${COMPUTECPP_PACKAGE_ROOT_DIR}/include) + set(targetname ${testname_with_suffix}) + + if(EIGEN_ADD_TEST_FILENAME_EXTENSION) + set(filename ${testname}.${EIGEN_ADD_TEST_FILENAME_EXTENSION}) + else() + set(filename ${testname}.cpp) + endif() + + set( include_file ${CMAKE_CURRENT_BINARY_DIR}/inc_${filename}) + set( bc_file ${CMAKE_CURRENT_BINARY_DIR}/${filename}) + set( host_file ${CMAKE_CURRENT_SOURCE_DIR}/${filename}) + + ADD_CUSTOM_COMMAND( + OUTPUT ${include_file} + COMMAND ${CMAKE_COMMAND} -E echo "\\#include \\\"${host_file}\\\"" > ${include_file} + COMMAND ${CMAKE_COMMAND} -E echo "\\#include \\\"${bc_file}.sycl\\\"" >> ${include_file} + DEPENDS ${filename} + COMMENT "Building ComputeCpp integration header file ${include_file}" + ) + # Add a custom target for the generated integration header + add_custom_target(${testname}_integration_header_woho DEPENDS ${include_file}) + + add_executable(${targetname} ${include_file}) + add_dependencies(${targetname} ${testname}_integration_header_woho) + add_sycl_to_target(${targetname} ${filename} ${CMAKE_CURRENT_BINARY_DIR}) + + if (targetname MATCHES "^eigen2_") + add_dependencies(eigen2_buildtests ${targetname}) + else() + add_dependencies(buildtests ${targetname}) + endif() + + if(EIGEN_NO_ASSERTION_CHECKING) + ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_NO_ASSERTION_CHECKING=1") + else(EIGEN_NO_ASSERTION_CHECKING) + if(EIGEN_DEBUG_ASSERTS) + ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_DEBUG_ASSERTS=1") + endif(EIGEN_DEBUG_ASSERTS) + endif(EIGEN_NO_ASSERTION_CHECKING) + + ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_TEST_MAX_SIZE=${EIGEN_TEST_MAX_SIZE}") + + ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_TEST_FUNC=${testname}") + + if(MSVC AND NOT EIGEN_SPLIT_LARGE_TESTS) + ei_add_target_property(${targetname} COMPILE_FLAGS "/bigobj") + endif() + + # let the user pass flags. + if(${ARGC} GREATER 2) + ei_add_target_property(${targetname} COMPILE_FLAGS "${ARGV2}") + endif(${ARGC} GREATER 2) + + if(EIGEN_TEST_CUSTOM_CXX_FLAGS) + ei_add_target_property(${targetname} COMPILE_FLAGS "${EIGEN_TEST_CUSTOM_CXX_FLAGS}") + endif() + + if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO) + target_link_libraries(${targetname} ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO}) + endif() + if(EXTERNAL_LIBS) + target_link_libraries(${targetname} ${EXTERNAL_LIBS}) + endif() + if(EIGEN_TEST_CUSTOM_LINKER_FLAGS) + target_link_libraries(${targetname} ${EIGEN_TEST_CUSTOM_LINKER_FLAGS}) + endif() + + if(${ARGC} GREATER 3) + set(libs_to_link ${ARGV3}) + # it could be that some cmake module provides a bad library string " " (just spaces), + # and that severely breaks target_link_libraries ("can't link to -l-lstdc++" errors). + # so we check for strings containing only spaces. + string(STRIP "${libs_to_link}" libs_to_link_stripped) + string(LENGTH "${libs_to_link_stripped}" libs_to_link_stripped_length) + if(${libs_to_link_stripped_length} GREATER 0) + # notice: no double quotes around ${libs_to_link} here. It may be a list. + target_link_libraries(${targetname} ${libs_to_link}) + endif() + endif() + + add_test(${testname_with_suffix} "${targetname}") + + # Specify target and test labels according to EIGEN_CURRENT_SUBPROJECT + get_property(current_subproject GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT) + if ((current_subproject) AND (NOT (current_subproject STREQUAL ""))) + set_property(TARGET ${targetname} PROPERTY LABELS "Build${current_subproject}") + add_dependencies("Build${current_subproject}" ${targetname}) + set_property(TEST ${testname_with_suffix} PROPERTY LABELS "${current_subproject}") + endif() + + +endmacro(ei_add_test_internal_sycl) + + # Macro to add a test # # the unique mandatory parameter testname must correspond to a file @@ -185,6 +282,39 @@ macro(ei_add_test testname) endif(EIGEN_SPLIT_LARGE_TESTS AND suffixes) endmacro(ei_add_test) +macro(ei_add_test_sycl testname) + get_property(EIGEN_TESTS_LIST GLOBAL PROPERTY EIGEN_TESTS_LIST) + set(EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}${testname}\n") + set_property(GLOBAL PROPERTY EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}") + + if(EIGEN_ADD_TEST_FILENAME_EXTENSION) + set(filename ${testname}.${EIGEN_ADD_TEST_FILENAME_EXTENSION}) + else() + set(filename ${testname}.cpp) + endif() + + file(READ "${filename}" test_source) + set(parts 0) + string(REGEX MATCHALL "CALL_SUBTEST_[0-9]+|EIGEN_TEST_PART_[0-9]+|EIGEN_SUFFIXES(;[0-9]+)+" + occurences "${test_source}") + string(REGEX REPLACE "CALL_SUBTEST_|EIGEN_TEST_PART_|EIGEN_SUFFIXES" "" suffixes "${occurences}") + list(REMOVE_DUPLICATES suffixes) + if(EIGEN_SPLIT_LARGE_TESTS AND suffixes) + add_custom_target(${testname}) + foreach(suffix ${suffixes}) + ei_add_test_internal_sycl(${testname} ${testname}_${suffix} + "${ARGV1} -DEIGEN_TEST_PART_${suffix}=1" "${ARGV2}") + add_dependencies(${testname} ${testname}_${suffix}) + endforeach(suffix) + else(EIGEN_SPLIT_LARGE_TESTS AND suffixes) + set(symbols_to_enable_all_parts "") + foreach(suffix ${suffixes}) + set(symbols_to_enable_all_parts + "${symbols_to_enable_all_parts} -DEIGEN_TEST_PART_${suffix}=1") + endforeach(suffix) + ei_add_test_internal_sycl(${testname} ${testname} "${ARGV1} ${symbols_to_enable_all_parts}" "${ARGV2}") + endif(EIGEN_SPLIT_LARGE_TESTS AND suffixes) +endmacro(ei_add_test_sycl) # adds a failtest, i.e. a test that succeed if the program fails to compile # note that the test runner for these is CMake itself, when passed -DEIGEN_FAILTEST=ON @@ -330,6 +460,11 @@ macro(ei_testing_print_summary) message(STATUS "C++11: OFF") endif() + if(EIGEN_TEST_SYCL) + message(STATUS "SYCL: ON") + else() + message(STATUS "SYCL: OFF") + endif() if(EIGEN_TEST_CUDA) if(EIGEN_TEST_CUDA_CLANG) message(STATUS "CUDA: ON (using clang)") diff --git a/cmake/FindComputeCpp.cmake b/cmake/FindComputeCpp.cmake new file mode 100644 index 000000000..3aab5b833 --- /dev/null +++ b/cmake/FindComputeCpp.cmake @@ -0,0 +1,228 @@ +#.rst: +# FindComputeCpp +#--------------- + +######################### +# FindComputeCpp.cmake +######################### +# +# Tools for finding and building with ComputeCpp. +# + +# Require CMake version 3.2.2 or higher +cmake_minimum_required(VERSION 3.2.2) + +# Check that a supported host compiler can be found +if(CMAKE_COMPILER_IS_GNUCXX) + # Require at least gcc 4.8 + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8) + message(FATAL_ERROR + "host compiler - Not found! (gcc version must be at least 4.8)") + # Require the GCC dual ABI to be disabled for 5.1 or higher + elseif (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.1) + set(COMPUTECPP_DISABLE_GCC_DUAL_ABI "True") + message(STATUS + "host compiler - gcc ${CMAKE_CXX_COMPILER_VERSION} (note pre 5.1 gcc ABI enabled)") + else() + message(STATUS "host compiler - gcc ${CMAKE_CXX_COMPILER_VERSION}") + endif() +elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + # Require at least clang 3.6 + if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.6) + message(FATAL_ERROR + "host compiler - Not found! (clang version must be at least 3.6)") + else() + set(COMPUTECPP_DISABLE_GCC_DUAL_ABI "True") + message(STATUS "host compiler - clang ${CMAKE_CXX_COMPILER_VERSION}") + endif() +else() + message(WARNING + "host compiler - Not found! (ComputeCpp supports GCC and Clang, see readme)") +endif() + +set(COMPUTECPP_64_BIT_DEFAULT ON) +option(COMPUTECPP_64_BIT_CODE "Compile device code in 64 bit mode" + ${COMPUTECPP_64_BIT_DEFAULT}) +mark_as_advanced(COMPUTECPP_64_BIT_CODE) + +# Find OpenCL package +find_package(OpenCL REQUIRED) + +# Find ComputeCpp package +if(EXISTS ${COMPUTECPP_PACKAGE_ROOT_DIR}) + message(STATUS "ComputeCpp package - Found (${COMPUTECPP_PACKAGE_ROOT_DIR})") +else() + message(FATAL_ERROR "ComputeCpp package - Not found! (please set COMPUTECPP_PACKAGE_ROOT_DIR) (${COMPUTECPP_PACKAGE_ROOT_DIR})") +endif() +option(COMPUTECPP_PACKAGE_ROOT_DIR "Path to the ComputeCpp Package") + +# Obtain the path to compute++ +find_program(COMPUTECPP_DEVICE_COMPILER compute++ PATHS + ${COMPUTECPP_PACKAGE_ROOT_DIR} PATH_SUFFIXES bin) +if (EXISTS ${COMPUTECPP_DEVICE_COMPILER}) + mark_as_advanced(COMPUTECPP_DEVICE_COMPILER) + message(STATUS "compute++ - Found (${COMPUTECPP_PACKAGE_ROOT_DIR})") +else() + message(FATAL_ERROR "compute++ - Not found! (${COMPUTECPP_DEVICE_COMPILER}) (${COMPUTECPP_PACKAGE_ROOT_DIR})") +endif() + +# Obtain the path to computecpp_info +find_program(COMPUTECPP_INFO_TOOL computecpp_info PATHS + ${COMPUTECPP_PACKAGE_ROOT_DIR} PATH_SUFFIXES bin) +if (EXISTS ${COMPUTECPP_INFO_TOOL}) + mark_as_advanced(${COMPUTECPP_INFO_TOOL}) + message(STATUS "computecpp_info - Found (${COMPUTECPP_PACKAGE_ROOT_DIR})") +else() + message(FATAL_ERROR "computecpp_info - Not found! (${COMPUTECPP_INFO_TOOL}) (${COMPUTECPP_PACKAGE_ROOT_DIR})") +endif() + +# Obtain the path to the ComputeCpp runtime library +find_library(COMPUTECPP_RUNTIME_LIBRARY ComputeCpp PATHS ${COMPUTECPP_PACKAGE_ROOT_DIR} + HINTS ${COMPUTECPP_PACKAGE_ROOT_DIR}/lib PATH_SUFFIXES lib + DOC "ComputeCpp Runtime Library" NO_DEFAULT_PATH) + +if (EXISTS ${COMPUTECPP_RUNTIME_LIBRARY}) + mark_as_advanced(COMPUTECPP_RUNTIME_LIBRARY) + message(STATUS "libComputeCpp.so - Found") +else() + message(FATAL_ERROR "libComputeCpp.so - Not found! (${COMPUTECPP_PACKAGE_ROOT_DIR})") +endif() + +# Obtain the ComputeCpp include directory +set(COMPUTECPP_INCLUDE_DIRECTORY ${COMPUTECPP_PACKAGE_ROOT_DIR}/include/) +if (NOT EXISTS ${COMPUTECPP_INCLUDE_DIRECTORY}) + message(FATAL_ERROR "ComputeCpp includes - Not found! (${COMPUTECPP_PACKAGE_ROOT_DIR}/include/)") +else() + message(STATUS "ComputeCpp includes - Found (${COMPUTECPP_PACKAGE_ROOT_DIR})") +endif() + +# Obtain the package version +execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-version" + OUTPUT_VARIABLE COMPUTECPP_PACKAGE_VERSION + RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE) +if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0") + message(FATAL_ERROR "Package version - Error obtaining version!") +else() + mark_as_advanced(COMPUTECPP_PACKAGE_VERSION) + message(STATUS "Package version - ${COMPUTECPP_PACKAGE_VERSION}") +endif() + +# Obtain the device compiler flags +execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-device-compiler-flags" + OUTPUT_VARIABLE COMPUTECPP_DEVICE_COMPILER_FLAGS + RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE) +if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0") + message(FATAL_ERROR "compute++ flags - Error obtaining compute++ flags!") +else() + mark_as_advanced(COMPUTECPP_COMPILER_FLAGS) + message(STATUS "compute++ flags - ${COMPUTECPP_DEVICE_COMPILER_FLAGS}") +endif() + +set(COMPUTECPP_DEVICE_COMPILER_FLAGS ${COMPUTECPP_DEVICE_COMPILER_FLAGS} -sycl-compress-name -no-serial-memop -DEIGEN_NO_ASSERTION_CHECKING=1) + +# Check if the platform is supported +execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-is-supported" + OUTPUT_VARIABLE COMPUTECPP_PLATFORM_IS_SUPPORTED + RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE) +if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0") + message(FATAL_ERROR "platform - Error checking platform support!") +else() + mark_as_advanced(COMPUTECPP_PLATFORM_IS_SUPPORTED) + if (COMPUTECPP_PLATFORM_IS_SUPPORTED) + message(STATUS "platform - your system can support ComputeCpp") + else() + message(STATUS "platform - your system CANNOT support ComputeCpp") + endif() +endif() + +#################### +# __build_sycl +#################### +# +# Adds a custom target for running compute++ and adding a dependency for the +# resulting integration header. +# +# targetName : Name of the target. +# sourceFile : Source file to be compiled. +# binaryDir : Intermediate output directory for the integration header. +# +function(__build_spir targetName sourceFile binaryDir) + + # Retrieve source file name. + get_filename_component(sourceFileName ${sourceFile} NAME) + + # Set the path to the Sycl file. + set(outputSyclFile ${binaryDir}/${sourceFileName}.sycl) + + # Add any user-defined include to the device compiler + get_property(includeDirectories DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY + INCLUDE_DIRECTORIES) + set(device_compiler_includes "") + foreach(directory ${includeDirectories}) + set(device_compiler_includes "-I${directory}" ${device_compiler_includes}) + endforeach() + if (CMAKE_INCLUDE_PATH) + foreach(directory ${CMAKE_INCLUDE_PATH}) + set(device_compiler_includes "-I${directory}" + ${device_compiler_includes}) + endforeach() + endif() + + # Convert argument list format + separate_arguments(COMPUTECPP_DEVICE_COMPILER_FLAGS) + + # Add custom command for running compute++ + add_custom_command( + OUTPUT ${outputSyclFile} + COMMAND ${COMPUTECPP_DEVICE_COMPILER} + ${COMPUTECPP_DEVICE_COMPILER_FLAGS} + -I${COMPUTECPP_INCLUDE_DIRECTORY} + ${COMPUTECPP_PLATFORM_SPECIFIC_ARGS} + ${device_compiler_includes} + -o ${outputSyclFile} + -c ${CMAKE_CURRENT_SOURCE_DIR}/${sourceFile} + DEPENDS ${sourceFile} + COMMENT "Building ComputeCpp integration header file ${outputSyclFile}") + + # Add a custom target for the generated integration header + add_custom_target(${targetName}_integration_header DEPENDS ${outputSyclFile}) + + # Add a dependency on the integration header + add_dependencies(${targetName} ${targetName}_integration_header) + + # Force inclusion of the integration header for the host compiler + #set(compileFlags -include ${include_file} "-Wall") + target_compile_options(${targetName} PUBLIC ${compileFlags}) + + # Set the host compiler C++ standard to C++11 + set_property(TARGET ${targetName} PROPERTY CXX_STANDARD 11) + + # Disable GCC dual ABI on GCC 5.1 and higher + if(COMPUTECPP_DISABLE_GCC_DUAL_ABI) + set_property(TARGET ${targetName} APPEND PROPERTY COMPILE_DEFINITIONS + "_GLIBCXX_USE_CXX11_ABI=0") + endif() + +endfunction() + +####################### +# add_sycl_to_target +####################### +# +# Adds a SYCL compilation custom command associated with an existing +# target and sets a dependency on that new command. +# +# targetName : Name of the target to add a SYCL to. +# sourceFile : Source file to be compiled for SYCL. +# binaryDir : Intermediate output directory for the integration header. +# +function(add_sycl_to_target targetName sourceFile binaryDir) + + # Add custom target to run compute++ and generate the integration header + __build_spir(${targetName} ${sourceFile} ${binaryDir}) + + # Link with the ComputeCpp runtime library + target_link_libraries(${targetName} PUBLIC ${COMPUTECPP_RUNTIME_LIBRARY} + PUBLIC ${OpenCL_LIBRARIES}) + +endfunction(add_sycl_to_target) diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index f7b94cee1..da6a3f301 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -74,6 +74,8 @@ typedef unsigned __int64 uint64_t; #include "src/Tensor/TensorDeviceDefault.h" #include "src/Tensor/TensorDeviceThreadPool.h" #include "src/Tensor/TensorDeviceCuda.h" +#include "src/Tensor/TensorSycl.h" +#include "src/Tensor/TensorDeviceSycl.h" #include "src/Tensor/TensorIndexList.h" #include "src/Tensor/TensorDimensionList.h" #include "src/Tensor/TensorDimensions.h" diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h index cb615c75b..166be200c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h @@ -163,6 +163,11 @@ struct TensorEvaluator, Device> TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize); } + /// required by sycl in order to extract the accessor + const TensorEvaluator& left_impl() const { return m_leftImpl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& right_impl() const { return m_rightImpl; } + EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_leftImpl.data(); } private: diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 19d2b50b5..e3880d2e0 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -811,7 +811,7 @@ class TensorBase protected: template friend class Tensor; - template friend class TensorFixedSize; + template class MakePointer_> friend class TensorFixedSize; template friend class TensorBase; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast(this); } @@ -827,7 +827,7 @@ class TensorBase : public TensorBase { static const int NumDimensions = DerivedTraits::NumDimensions; template friend class Tensor; - template friend class TensorFixedSize; + template class MakePointer_> friend class TensorFixedSize; template friend class TensorBase; EIGEN_DEVICE_FUNC diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index 5d67f69f3..4cfe300eb 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -113,7 +113,7 @@ struct TensorEvaluator, Device> }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device) + : m_broadcast(op.broadcast()),m_impl(op.expression(), device) { // The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar // and store the result in a scalar. Instead one should reshape the scalar into a a N-D @@ -374,7 +374,12 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + const TensorEvaluator& impl() const { return m_impl; } + + Broadcast functor() const { return m_broadcast; } + protected: + const Broadcast m_broadcast; Dimensions m_dimensions; array m_outputStrides; array m_inputStrides; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h new file mode 100644 index 000000000..bfd36f5aa --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h @@ -0,0 +1,122 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Cummins Chris PhD student at The University of Edinburgh. +// Contact: + +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#if defined(EIGEN_USE_SYCL) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H) +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H + +namespace Eigen { +/// \struct BufferT is used to specialise add_sycl_buffer function for +// two types of buffer we have. When the MapAllocator is true, we create the +// sycl buffer with MapAllocator. +/// We have to const_cast the input pointer in order to work around the fact +/// that sycl does not accept map allocator for const pointer. +template +struct BufferT { + using Type = cl::sycl::buffer>; + static inline void add_sycl_buffer( + const T *ptr, size_t num_bytes, + std::map> &buffer_map) { + buffer_map.insert(std::pair>( + ptr, std::shared_ptr(std::make_shared( + Type(const_cast(ptr), cl::sycl::range<1>(num_bytes)))))); + } +}; + +/// specialisation of the \ref BufferT when the MapAllocator is false. In this +/// case we only create the device-only buffer. +template +struct BufferT { + using Type = cl::sycl::buffer; + static inline void add_sycl_buffer( + const T *ptr, size_t num_bytes, + std::map> &buffer_map) { + buffer_map.insert(std::pair>( + ptr, std::shared_ptr( + std::make_shared(Type(cl::sycl::range<1>(num_bytes)))))); + } +}; + +struct SyclDevice { + /// class members + /// sycl queue + cl::sycl::queue &m_queue; + /// std::map is the container used to make sure that we create only one buffer + /// per pointer. The lifespan of the buffer + /// now depends on the lifespan of SyclDevice. If a non-read-only pointer is + /// needed to be accessed on the host we should manually deallocate it. + mutable std::map> buffer_map; + + SyclDevice(cl::sycl::queue &q) : m_queue(q) {} + // destructor + ~SyclDevice() { deallocate_all(); } + + template + void deallocate(const T *p) const { + auto it = buffer_map.find(p); + if (it != buffer_map.end()) { + buffer_map.erase(it); + } + } + void deallocate_all() const { buffer_map.clear(); } + + /// creation of sycl accessor for a buffer. This function first tries to find + /// the buffer in the buffer_map. + /// If found it gets the accessor from it, if not, the function then adds an + /// entry by creating a sycl buffer + /// for that particular pointer. + template + inline cl::sycl::accessor + get_sycl_accessor(size_t num_bytes, cl::sycl::handler &cgh, + const T *ptr) const { + auto it = buffer_map.find(ptr); + if (it == buffer_map.end()) { + BufferT::add_sycl_buffer(ptr, num_bytes, buffer_map); + } + return ( + ((typename BufferT::Type *)(buffer_map.at(ptr).get())) + ->template get_access(cgh)); + } + + /// allocating memory on the cpu + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void *allocate(size_t num_bytes) const { + return internal::aligned_malloc(num_bytes); + } + + // some runtime conditions that can be applied here + bool isDeviceSuitable() const { return true; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void *buffer) const { + internal::aligned_free(buffer); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void *dst, const void *src, + size_t n) const { + ::memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice( + void *dst, const void *src, size_t n) const { + memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost( + void *dst, const void *src, size_t n) const { + memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void *buffer, int c, + size_t n) const { + ::memset(buffer, c, n); + } +}; +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h index a08dfa7c3..3dab6da99 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h @@ -20,8 +20,8 @@ namespace Eigen { * */ namespace internal { -template -struct traits > +template class MakePointer_> +struct traits > { // Type promotion to handle the case where the types of the lhs and the rhs are different. typedef typename XprType::Scalar Scalar; @@ -36,16 +36,20 @@ struct traits > enum { Flags = 0 }; + template + struct MakePointer { + typedef typename MakePointer_::Type Type; + }; }; -template -struct eval, Eigen::Dense> +template class MakePointer_> +struct eval, Eigen::Dense> { typedef const TensorEvalToOp& type; }; -template -struct nested, 1, typename eval >::type> +template class MakePointer_> +struct nested, 1, typename eval >::type> { typedef TensorEvalToOp type; }; @@ -55,37 +59,38 @@ struct nested, 1, typename eval -template -class TensorEvalToOp : public TensorBase, ReadOnlyAccessors> +template class MakePointer_> +class TensorEvalToOp : public TensorBase, ReadOnlyAccessors> { public: typedef typename Eigen::internal::traits::Scalar Scalar; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename internal::remove_const::type CoeffReturnType; + typedef typename MakePointer_::Type PointerType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvalToOp(CoeffReturnType* buffer, const XprType& expr) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvalToOp(PointerType buffer, const XprType& expr) : m_xpr(expr), m_buffer(buffer) {} EIGEN_DEVICE_FUNC const typename internal::remove_all::type& expression() const { return m_xpr; } - EIGEN_DEVICE_FUNC CoeffReturnType* buffer() const { return m_buffer; } + EIGEN_DEVICE_FUNC PointerType buffer() const { return m_buffer; } protected: typename XprType::Nested m_xpr; - CoeffReturnType* m_buffer; + PointerType m_buffer; }; -template -struct TensorEvaluator, Device> +template class MakePointer_> +struct TensorEvaluator, Device> { - typedef TensorEvalToOp XprType; + typedef TensorEvalToOp XprType; typedef typename ArgType::Scalar Scalar; typedef typename TensorEvaluator::Dimensions Dimensions; typedef typename XprType::Index Index; @@ -102,15 +107,22 @@ struct TensorEvaluator, Device> }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_device(device), m_buffer(op.buffer()) + : m_impl(op.expression(), device), m_device(device), + m_buffer(op.buffer()), m_op(op), m_expression(op.expression()) { } + // Used for accessor extraction in SYCL Managed TensorMap: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const XprType& op() const { + return m_op; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~TensorEvaluator() { } + typedef typename internal::traits>::template MakePointer::Type DevicePointer; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* scalar) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(DevicePointer scalar) { EIGEN_UNUSED_VARIABLE(scalar); eigen_assert(scalar == NULL); return m_impl.evalSubExprsIfNeeded(m_buffer); @@ -145,12 +157,20 @@ struct TensorEvaluator, Device> TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize); } - EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_buffer; } + EIGEN_DEVICE_FUNC DevicePointer data() const { return m_buffer; } + ArgType expression() const { return m_expression; } + + /// required by sycl in order to extract the accessor + const TensorEvaluator& impl() const { return m_impl; } + /// added for sycl in order to construct the buffer from the sycl device + const Device& device() const{return m_device;} private: TensorEvaluator m_impl; const Device& m_device; - CoeffReturnType* m_buffer; + DevicePointer m_buffer; + const XprType& m_op; + const ArgType m_expression; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index c2a327bf0..b2b4bcf62 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -46,9 +46,11 @@ struct TensorEvaluator }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) - : m_data(const_cast(m.data())), m_dims(m.dimensions()), m_device(device) + : m_data(const_cast::template MakePointer::Type>(m.data())), m_dims(m.dimensions()), m_device(device), m_impl(m) { } + // Used for accessor extraction in SYCL Managed TensorMap: + const Derived& derived() const { return m_impl; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* dest) { @@ -106,12 +108,16 @@ struct TensorEvaluator internal::unpacket_traits::size); } - EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; } + EIGEN_DEVICE_FUNC typename internal::traits::template MakePointer::Type data() const { return m_data; } + + /// required by sycl in order to construct sycl buffer from raw pointer + const Device& device() const{return m_device;} protected: - Scalar* m_data; + typename internal::traits::template MakePointer::Type m_data; Dimensions m_dims; const Device& m_device; + const Derived& m_impl; }; namespace { @@ -159,8 +165,11 @@ struct TensorEvaluator RawAccess = true }; + // Used for accessor extraction in SYCL Managed TensorMap: + const Derived& derived() const { return m_impl; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) - : m_data(m.data()), m_dims(m.dimensions()), m_device(device) + : m_data(m.data()), m_dims(m.dimensions()), m_device(device), m_impl(m) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } @@ -198,12 +207,16 @@ struct TensorEvaluator internal::unpacket_traits::size); } - EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; } + EIGEN_DEVICE_FUNC typename internal::traits::template MakePointer::Type data() const { return m_data; } + + /// added for sycl in order to construct the buffer from the sycl device + const Device& device() const{return m_device;} protected: - const Scalar* m_data; + typename internal::traits::template MakePointer::Type m_data; Dimensions m_dims; const Device& m_device; + const Derived& m_impl; }; @@ -260,6 +273,12 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& impl() const { return m_argImpl; } + /// required by sycl in order to extract the accessor + NullaryOp functor() const { return m_functor; } + + private: const NullaryOp m_functor; TensorEvaluator m_argImpl; @@ -323,6 +342,12 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } + /// required by sycl in order to extract the accessor + const TensorEvaluator & impl() const { return m_argImpl; } + /// added for sycl in order to construct the buffer from sycl device + UnaryOp functor() const { return m_functor; } + + private: const UnaryOp m_functor; TensorEvaluator m_argImpl; @@ -396,6 +421,12 @@ struct TensorEvaluator& left_impl() const { return m_leftImpl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& right_impl() const { return m_rightImpl; } + /// required by sycl in order to extract the accessor + BinaryOp functor() const { return m_functor; } private: const BinaryOp m_functor; @@ -491,10 +522,17 @@ struct TensorEvaluator & arg1Impl() const { return m_arg1Impl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& arg2Impl() const { return m_arg2Impl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& arg3Impl() const { return m_arg3Impl; } + private: const TernaryOp m_functor; TensorEvaluator m_arg1Impl; - TensorEvaluator m_arg2Impl; + TensorEvaluator m_arg2Impl; TensorEvaluator m_arg3Impl; }; @@ -575,6 +613,12 @@ struct TensorEvaluator } EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } + /// required by sycl in order to extract the accessor + const TensorEvaluator & cond_impl() const { return m_condImpl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& then_impl() const { return m_thenImpl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& else_impl() const { return m_elseImpl; } private: TensorEvaluator m_condImpl; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index a116bf17f..9b99af641 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -272,6 +272,20 @@ inline void TensorExecutor::run( #endif // __CUDACC__ #endif // EIGEN_USE_GPU +// SYCL Executor policy +#ifdef EIGEN_USE_SYCL + +template +class TensorExecutor { +public: + static inline void run(const Expression &expr, const SyclDevice &device) { + // call TensorSYCL module + TensorSycl::run(expr, device); + } +}; + +#endif + } // end namespace internal } // end namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h index fcee5f60d..415e459b9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h @@ -23,8 +23,8 @@ namespace Eigen { * Eigen::TensorFixedSize> t; */ -template -class TensorFixedSize : public TensorBase > +template class MakePointer_> +class TensorFixedSize : public TensorBase > { public: typedef TensorFixedSize Self; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index c23ecdbc4..9cf4a07e5 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -19,9 +19,15 @@ namespace Eigen { * * */ +/// template class MakePointer_ is added to convert the host pointer to the device pointer. +/// It is added due to the fact that for our device compiler T* is not allowed. +/// If we wanted to use the same Evaluator functions we have to convert that type to our pointer T. +/// This is done through our MakePointer_ class. By default the Type in the MakePointer_ is T* . +/// Therefore, by adding the default value, we managed to convert the type and it does not break any +/// existing code as its default value is T*. namespace internal { -template -struct traits > +template class MakePointer_> +struct traits > { // Type promotion to handle the case where the types of the lhs and the rhs are different. typedef typename XprType::Scalar Scalar; @@ -36,26 +42,30 @@ struct traits > enum { Flags = 0 }; + template + struct MakePointer { + typedef typename MakePointer_::Type Type; + }; }; -template -struct eval, Eigen::Dense> +template class MakePointer_> +struct eval, Eigen::Dense> { - typedef const TensorForcedEvalOp& type; + typedef const TensorForcedEvalOp& type; }; -template -struct nested, 1, typename eval >::type> +template class MakePointer_> +struct nested, 1, typename eval >::type> { - typedef TensorForcedEvalOp type; + typedef TensorForcedEvalOp type; }; } // end namespace internal -template -class TensorForcedEvalOp : public TensorBase, ReadOnlyAccessors> +template class MakePointer_> +class TensorForcedEvalOp : public TensorBase, ReadOnlyAccessors> { public: typedef typename Eigen::internal::traits::Scalar Scalar; @@ -77,10 +87,10 @@ class TensorForcedEvalOp : public TensorBase, ReadOn }; -template -struct TensorEvaluator, Device> +template class MakePointer_> +struct TensorEvaluator, Device> { - typedef TensorForcedEvalOp XprType; + typedef TensorForcedEvalOp XprType; typedef typename ArgType::Scalar Scalar; typedef typename TensorEvaluator::Dimensions Dimensions; typedef typename XprType::Index Index; @@ -96,6 +106,7 @@ struct TensorEvaluator, Device> }; EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) + /// op_ is used for sycl : m_impl(op.expression(), device), m_op(op.expression()), m_device(device), m_buffer(NULL) { } @@ -110,10 +121,10 @@ struct TensorEvaluator, Device> new(m_buffer+i) CoeffReturnType(); } } - typedef TensorEvalToOp EvalTo; + typedef TensorEvalToOp< const typename internal::remove_const::type > EvalTo; EvalTo evalToTmp(m_buffer, m_op); const bool PacketAccess = internal::IsVectorizable::value; - internal::TensorExecutor::run(evalToTmp, m_device); + internal::TensorExecutor::type, PacketAccess>::run(evalToTmp, m_device); return true; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { @@ -136,13 +147,17 @@ struct TensorEvaluator, Device> return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); } - EIGEN_DEVICE_FUNC Scalar* data() const { return m_buffer; } + EIGEN_DEVICE_FUNC typename MakePointer::Type data() const { return m_buffer; } + /// required by sycl in order to extract the sycl accessor + const TensorEvaluator& impl() { return m_impl; } + /// used by sycl in order to build the sycl buffer + const Device& device() const{return m_device;} private: TensorEvaluator m_impl; const ArgType m_op; const Device& m_device; - CoeffReturnType* m_buffer; + typename MakePointer::Type m_buffer; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index 490ddd8bd..83c690133 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -12,9 +12,19 @@ namespace Eigen { +// MakePointer class is used as a container of the adress space of the pointer +// on the host and on the device. From the host side it generates the T* pointer +// and when EIGEN_USE_SYCL is used it construct a buffer with a map_allocator to +// T* m_data on the host. It is always called on the device. +// Specialisation of MakePointer class for creating the sycl buffer with +// map_allocator. +template struct MakePointer{ + typedef T* Type; +}; + +template class MakePointer_ = MakePointer> class TensorMap; template class Tensor; -template class TensorFixedSize; -template class TensorMap; +template class MakePointer_ = MakePointer> class TensorFixedSize; template class TensorRef; template class TensorBase; @@ -52,8 +62,8 @@ template class TensorScanOp; template class TensorCustomUnaryOp; template class TensorCustomBinaryOp; -template class TensorEvalToOp; -template class TensorForcedEvalOp; +template class MakePointer_ = MakePointer> class TensorEvalToOp; +template class MakePointer_ = MakePointer> class TensorForcedEvalOp; template class TensorDevice; template struct TensorEvaluator; @@ -61,6 +71,7 @@ template struct TensorEvaluator; struct DefaultDevice; struct ThreadPoolDevice; struct GpuDevice; +struct SyclDevice; enum FFTResultType { RealPart = 0, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h index 6fb4f4a31..298a49138 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h @@ -18,11 +18,16 @@ namespace Eigen { * \brief A tensor expression mapping an existing array of data. * */ - -template class TensorMap : public TensorBase > +/// template class MakePointer_ is added to convert the host pointer to the device pointer. +/// It is added due to the fact that for our device compiler T* is not allowed. +/// If we wanted to use the same Evaluator functions we have to convert that type to our pointer T. +/// This is done through our MakePointer_ class. By default the Type in the MakePointer_ is T* . +/// Therefore, by adding the default value, we managed to convert the type and it does not break any +/// existing code as its default value is T*. +template class MakePointer_> class TensorMap : public TensorBase > { public: - typedef TensorMap Self; + typedef TensorMap Self; typedef typename PlainObjectType::Base Base; typedef typename Eigen::internal::nested::type Nested; typedef typename internal::traits::StorageKind StorageKind; @@ -36,7 +41,7 @@ template class TensorMap : public Tensor Scalar *, const Scalar *>::type PointerType;*/ - typedef Scalar* PointerType; + typedef typename MakePointer_::Type PointerType; typedef PointerType PointerArgType; static const int Options = Options_; @@ -109,9 +114,9 @@ template class TensorMap : public Tensor EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_dimensions.TotalSize(); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar* data() { return m_data; } + EIGEN_STRONG_INLINE PointerType data() { return m_data; } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar* data() const { return m_data; } + EIGEN_STRONG_INLINE const PointerType data() const { return m_data; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const array& indices) const @@ -307,8 +312,9 @@ template class TensorMap : public Tensor } private: - Scalar* m_data; + typename MakePointer_::Type m_data; Dimensions m_dimensions; + size_t is_coverted= size_t(0); }; } // end namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h new file mode 100644 index 000000000..277dd739c --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h @@ -0,0 +1,62 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: eigen@codeplay.com +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// General include header of SYCL target for Tensor Module +#ifndef TENSORSYCL_H +#define TENSORSYCL_H + +#ifdef EIGEN_USE_SYCL + +// trait class to extract different attribute contents +template +struct Trait; +// global pointer to set different attribute state for a class +template +struct MakeGlobalPointer { + typedef typename cl::sycl::global_ptr::pointer_t Type; +}; + +// tuple construction +#include "TensorSyclTuple.h" + +// This file contains the PlaceHolder that replaces the actual data +#include "TensorSyclPlaceHolder.h" + +#include "TensorSyclLeafCount.h" + +// The index PlaceHolder takes the actual expression and replaces the actual +// data on it with the place holder. It uses the same pre-order expression tree +// traverse as the leaf count in order to give the right access number to each +// node in the expression +#include "TensorSyclPlaceHolderExpr.h" + +// creation of an accessor tuple from a tuple of SYCL buffers +#include "TensorSyclExtractAccessor.h" + +// actual data extraction using accessors +//#include "GetDeviceData.h" + +// this is used to change the address space type in tensor map for GPU +#include "TensorSyclConvertToDeviceExpression.h" + +// this is used to extract the functors +#include "TensorSyclExtractFunctors.h" + +// this is used to create tensormap on the device +// this is used to construct the expression on the device +#include "TensorSyclExprConstructor.h" + +// kernel execution using fusion +#include "TensorSyclRun.h" + +#endif // end of EIGEN_USE_SYCL +#endif // TENSORSYCL_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h new file mode 100644 index 000000000..b3748131b --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h @@ -0,0 +1,238 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclConvertToDeviceExpression.h + * + * \brief: + * Conversion from host pointer to device pointer + * inside leaf nodes of the expression. + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_TENSORSYCL_CONVERT_TO_DEVICE_EXPRESSION_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_TENSORSYCL_CONVERT_TO_DEVICE_EXPRESSION_HPP + +namespace Eigen { +namespace TensorSycl { +namespace internal { +/// \struct ConvertToDeviceExpression +/// \brief This struct is used to convert the MakePointer in the host expression +/// to the MakeGlobalPointer for the device expression. For the leafNodes +/// containing the pointer. This is due to the fact that the address space of +/// the pointer T* is different on the host and the device. +template +struct ConvertToDeviceExpression; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is TensorMap +template class MakePointer_> +struct ConvertToDeviceExpression< + TensorMap, Options2_, + MakePointer_>> { + using Type = TensorMap, + Options2_, MakeGlobalPointer>; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is const TensorMap +template class MakePointer_> +struct ConvertToDeviceExpression< + const TensorMap, + Options2_, MakePointer_>> { + using Type = + const TensorMap, + Options2_, MakeGlobalPointer>; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is const TensorCwiseNullaryOp +template +struct ConvertToDeviceExpression> { + using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = const TensorCwiseNullaryOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is TensorCwiseNullaryOp +template +struct ConvertToDeviceExpression> { + using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = TensorCwiseNullaryOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is const TensorBroadcastingOp +template +struct ConvertToDeviceExpression> { + using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = const TensorBroadcastingOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is TensorBroadcastingOp +template +struct ConvertToDeviceExpression> { + using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = TensorBroadcastingOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is const TensorCwiseUnaryOp +template +struct ConvertToDeviceExpression> { + using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = const TensorCwiseUnaryOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is TensorCwiseUnaryOp +template +struct ConvertToDeviceExpression> { + using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = TensorCwiseUnaryOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is const TensorCwiseBinaryOp +template +struct ConvertToDeviceExpression< + const TensorCwiseBinaryOp> { + using LHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = + const TensorCwiseBinaryOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is TensorCwiseBinaryOp +template +struct ConvertToDeviceExpression> { + using LHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = TensorCwiseBinaryOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is const TensorCwiseTernaryOp +template +struct ConvertToDeviceExpression< + const TensorCwiseTernaryOp> { + using Arg1PlaceHolderType = + typename ConvertToDeviceExpression::Type; + using Arg2PlaceHolderType = + typename ConvertToDeviceExpression::Type; + using Arg3PlaceHolderType = + typename ConvertToDeviceExpression::Type; + using Type = + const TensorCwiseTernaryOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is TensorCwiseTernaryOp +template +struct ConvertToDeviceExpression< + TensorCwiseTernaryOp> { + using Arg1PlaceHolderType = + typename ConvertToDeviceExpression::Type; + using Arg2PlaceHolderType = + typename ConvertToDeviceExpression::Type; + using Arg3PlaceHolderType = + typename ConvertToDeviceExpression::Type; + using Type = TensorCwiseTernaryOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is const TensorCwiseSelectOp +template +struct ConvertToDeviceExpression< + const TensorSelectOp> { + using IfPlaceHolderType = typename ConvertToDeviceExpression::Type; + using ThenPlaceHolderType = + typename ConvertToDeviceExpression::Type; + using ElsePlaceHolderType = + typename ConvertToDeviceExpression::Type; + using Type = const TensorSelectOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is TensorCwiseSelectOp +template +struct ConvertToDeviceExpression> { + using IfPlaceHolderType = typename ConvertToDeviceExpression::Type; + using ThenPlaceHolderType = + typename ConvertToDeviceExpression::Type; + using ElsePlaceHolderType = + typename ConvertToDeviceExpression::Type; + using Type = TensorSelectOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is const AssingOP +template +struct ConvertToDeviceExpression> { + using LHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = const TensorAssignOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is AssingOP +template +struct ConvertToDeviceExpression> { + using LHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = TensorAssignOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is const TensorForcedEvalOp +template +struct ConvertToDeviceExpression> { + using PlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = const TensorForcedEvalOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is const TensorForcedEvalOp +template +struct ConvertToDeviceExpression> { + using PlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = TensorForcedEvalOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is const TensorEvalToOp +template +struct ConvertToDeviceExpression> { + using PlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = const TensorEvalToOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is TensorEvalToOp +template +struct ConvertToDeviceExpression> { + using PlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = TensorEvalToOp; +}; +} // namespace internal +} // namespace TensorSycl +} // namespace Eigen + +#endif // UNSUPPORTED_EIGEN_CXX1 diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h new file mode 100644 index 000000000..fe3994175 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h @@ -0,0 +1,495 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclExprConstructor.h + * + * \brief: + * This file re-create an expression on the SYCL device in order + * to use the original tensor evaluator. + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXPR_CONSTRUCTOR_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXPR_CONSTRUCTOR_HPP + +namespace Eigen { +namespace TensorSycl { +namespace internal { +/// this class is used by EvalToOp in order to create an lhs expression which is +/// a pointer from an accessor on device-only buffer +template +struct EvalToLHSConstructor { + PtrType expr; + EvalToLHSConstructor(const utility::tuple::Tuple &t) + : expr((&(*(utility::tuple::get(t).get_pointer())))) {} +}; + +/// \struct ExprConstructor is used to reconstruct the expression on the device +/// and +/// recreate the expression with MakeGlobalPointer containing the device address +/// space for the TensorMap pointers used in eval function. +/// It receives the original expression type, the functor of the node, the tuple +/// of accessors, and the device expression type to re-instantiate the +/// expression tree for the device +template +struct ExprConstructor; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// const TensorMap +template class MakePointer_, size_t N, typename... Params> +struct ExprConstructor< + const TensorMap, + Options2_, MakeGlobalPointer>, + const Eigen::internal::PlaceHolder< + const TensorMap, + Options3_, MakePointer_>, + N>, + Params...> { + using Type = + const TensorMap, + Options2_, MakeGlobalPointer>; + + Type expr; + + template + ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple &t) + : expr(Type((&(*(utility::tuple::get(t).get_pointer()))), + fd.dimensions())) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorMap +template class MakePointer_, size_t N, typename... Params> +struct ExprConstructor< + TensorMap, Options2_, + MakeGlobalPointer>, + Eigen::internal::PlaceHolder< + TensorMap, Options3_, + MakePointer_>, + N>, + Params...> { + using Type = TensorMap, + Options2_, MakeGlobalPointer>; + + Type expr; + template + ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple &t) + : expr(Type((&(*(utility::tuple::get(t).get_pointer()))), + fd.dimensions())) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorCwiseNullaryOp +template +struct ExprConstructor, + TensorCwiseNullaryOp, Params...> { + using my_type = ExprConstructor; + my_type rhsExpr; + using Type = TensorCwiseNullaryOp; + Type expr; + + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// const TensorCwiseNullaryOp +template +struct ExprConstructor, + const TensorCwiseNullaryOp, Params...> { + using my_type = const ExprConstructor; + my_type rhsExpr; + using Type = const TensorCwiseNullaryOp; + Type expr; + + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorBroadcastingOp +template +struct ExprConstructor, + TensorBroadcastingOp, Params...> { + using my_type = ExprConstructor; + my_type rhsExpr; + using Type = TensorBroadcastingOp; + Type expr; + + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// const TensorBroadcastingOp +template +struct ExprConstructor, + const TensorBroadcastingOp, Params...> { + using my_type = const ExprConstructor; + my_type rhsExpr; + using Type = const TensorBroadcastingOp; + Type expr; + + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorCwiseUnaryOp +template +struct ExprConstructor, + TensorCwiseUnaryOp, Params...> { + using my_type = ExprConstructor; + using Type = TensorCwiseUnaryOp; + my_type rhsExpr; + Type expr; + + template + ExprConstructor(FuncDetector &funcD, utility::tuple::Tuple &t) + : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// const TensorCwiseUnaryOp +template +struct ExprConstructor, + const TensorCwiseUnaryOp, Params...> { + using my_type = ExprConstructor; + using Type = const TensorCwiseUnaryOp; + my_type rhsExpr; + Type expr; + + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorCwiseBinaryOp +template +struct ExprConstructor, + TensorCwiseBinaryOp, Params...> { + using my_left_type = ExprConstructor; + using my_right_type = ExprConstructor; + using Type = TensorCwiseBinaryOp; + + my_left_type lhsExpr; + my_right_type rhsExpr; + Type expr; + + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : lhsExpr(funcD.lhsExpr, t), + rhsExpr(funcD.rhsExpr, t), + expr(lhsExpr.expr, rhsExpr.expr, funcD.func) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// const TensorCwiseBinaryOp +template +struct ExprConstructor, + const TensorCwiseBinaryOp, + Params...> { + using my_left_type = ExprConstructor; + using my_right_type = ExprConstructor; + using Type = const TensorCwiseBinaryOp; + + my_left_type lhsExpr; + my_right_type rhsExpr; + Type expr; + + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : lhsExpr(funcD.lhsExpr, t), + rhsExpr(funcD.rhsExpr, t), + expr(lhsExpr.expr, rhsExpr.expr, funcD.func) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// const TensorCwiseTernaryOp +template +struct ExprConstructor< + const TensorCwiseTernaryOp, + const TensorCwiseTernaryOp, Params...> { + using my_arg1_type = ExprConstructor; + using my_arg2_type = ExprConstructor; + using my_arg3_type = ExprConstructor; + using Type = const TensorCwiseTernaryOp; + + my_arg1_type arg1Expr; + my_arg2_type arg2Expr; + my_arg3_type arg3Expr; + Type expr; + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : arg1Expr(funcD.arg1Expr, t), + arg2Expr(funcD.arg2Expr, t), + arg3Expr(funcD.arg3Expr, t), + expr(arg1Expr.expr, arg2Expr.expr, arg3Expr.expr, funcD.func) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorCwiseTernaryOp +template +struct ExprConstructor< + TensorCwiseTernaryOp, + TensorCwiseTernaryOp, Params...> { + using my_arg1_type = ExprConstructor; + using my_arg2_type = ExprConstructor; + using my_arg3_type = ExprConstructor; + using Type = TensorCwiseTernaryOp; + + my_arg1_type arg1Expr; + my_arg2_type arg2Expr; + my_arg3_type arg3Expr; + Type expr; + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : arg1Expr(funcD.arg1Expr, t), + arg2Expr(funcD.arg2Expr, t), + arg3Expr(funcD.arg3Expr, t), + expr(arg1Expr.expr, arg2Expr.expr, arg3Expr.expr, funcD.func) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// const TensorCwiseSelectOp +template +struct ExprConstructor< + const TensorSelectOp, + const TensorSelectOp, Params...> { + using my_if_type = ExprConstructor; + using my_then_type = ExprConstructor; + using my_else_type = ExprConstructor; + using Type = const TensorSelectOp; + + my_if_type ifExpr; + my_then_type thenExpr; + my_else_type elseExpr; + Type expr; + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : ifExpr(funcD.ifExpr, t), + thenExpr(funcD.thenExpr, t), + elseExpr(funcD.elseExpr, t), + expr(ifExpr.expr, thenExpr.expr, elseExpr.expr) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorCwiseSelectOp +template +struct ExprConstructor, + TensorSelectOp, Params...> { + using my_if_type = ExprConstructor; + using my_then_type = ExprConstructor; + using my_else_type = ExprConstructor; + using Type = + TensorSelectOp; + + my_if_type ifExpr; + my_then_type thenExpr; + my_else_type elseExpr; + Type expr; + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : ifExpr(funcD.ifExpr, t), + thenExpr(funcD.thenExpr, t), + elseExpr(funcD.elseExpr, t), + expr(ifExpr.expr, thenExpr.expr, elseExpr.expr) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorAssignOp +template +struct ExprConstructor, + TensorAssignOp, Params...> { + using my_left_type = ExprConstructor; + using my_right_type = ExprConstructor; + using Type = + TensorAssignOp; + + my_left_type lhsExpr; + my_right_type rhsExpr; + Type expr; + + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : lhsExpr(funcD.lhsExpr, t), + rhsExpr(funcD.rhsExpr, t), + expr(lhsExpr.expr, rhsExpr.expr) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// const TensorAssignOp +template +struct ExprConstructor, + const TensorAssignOp, Params...> { + using my_left_type = ExprConstructor; + using my_right_type = ExprConstructor; + using Type = const TensorAssignOp; + + my_left_type lhsExpr; + my_right_type rhsExpr; + Type expr; + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : lhsExpr(funcD.lhsExpr, t), + rhsExpr(funcD.rhsExpr, t), + expr(lhsExpr.expr, rhsExpr.expr) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// const TensorEvalToOp +template +struct ExprConstructor, + const TensorEvalToOp, Params...> { + using my_expr_type = ExprConstructor; + using my_buffer_type = + typename TensorEvalToOp::PointerType; + using Type = + const TensorEvalToOp; + my_expr_type nestedExpression; + EvalToLHSConstructor buffer; + Type expr; + + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : nestedExpression(funcD.rhsExpr, t), + buffer(t), + expr(buffer.expr, nestedExpression.expr) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorEvalToOp +template +struct ExprConstructor, + TensorEvalToOp, Params...> { + using my_expr_type = ExprConstructor; + using my_buffer_type = + typename TensorEvalToOp::PointerType; + using Type = TensorEvalToOp; + my_expr_type nestedExpression; + EvalToLHSConstructor buffer; + Type expr; + + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : nestedExpression(funcD.rhsExpr, t), + buffer(t), + expr(buffer.expr, nestedExpression.expr) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// const TensorForcedEvalOp +template +struct ExprConstructor< + const TensorForcedEvalOp, + const Eigen::internal::PlaceHolder, N>, + Params...> { + using Type = const TensorMap< + Tensor::Scalar, + TensorForcedEvalOp::NumDimensions, 0, + typename TensorForcedEvalOp::Index>, + 0, MakeGlobalPointer>; + + Type expr; + + template + ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple &t) + : expr(Type((&(*(utility::tuple::get(t).get_pointer()))), + fd.dimensions())) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorForcedEvalOp +template +struct ExprConstructor< + const TensorForcedEvalOp, + const Eigen::internal::PlaceHolder, N>, + Params...> { + using Type = TensorMap< + Tensor::Scalar, 1, + 0, typename TensorForcedEvalOp::Index>, + 0, MakeGlobalPointer>; + + Type expr; + + template + ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple &t) + : expr(Type((&(*(utility::tuple::get(t).get_pointer()))), + fd.dimensions())) {} +}; + +/// template deduction for \ref ExprConstructor struct +template +auto createDeviceExpression(FuncD &funcD, + const utility::tuple::Tuple &t) + -> decltype(ExprConstructor(funcD, t)) { + return ExprConstructor(funcD, t); +} +} +} +} // namespace Eigen + +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXPR_CONSTRUCTOR_HPP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h new file mode 100644 index 000000000..cb0ac131d --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h @@ -0,0 +1,466 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclExtractAccessor.h + * + * \brief: + * ExtractAccessor takes Expression placeHolder expression and the tuple of sycl + * buffers as an input. Using pre-order tree traversal, ExtractAccessor + * recursively calls itself for its children in the expression tree. The + * leaf node in the PlaceHolder expression is nothing but a container preserving + * the order of the actual data in the tuple of sycl buffer. By invoking the + * extract accessor for the PlaceHolder, an accessor is created for the Nth + * buffer in the tuple of buffers. This accessor is then added as an Nth + * element in the tuple of accessors. In this case we preserve the order of data + * in the expression tree. + * + * This is the specialisation of extract accessor method for different operation + * type in the PlaceHolder expression. + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXTRACT_ACCESSOR_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXTRACT_ACCESSOR_HPP + +namespace Eigen { +namespace TensorSycl { +namespace internal { +/// \struct ExtractAccessor: Extract Accessor Class is used to extract the +/// accessor from a buffer. +/// Depending on the type of the leaf node we can get a read accessor or a +/// read_write accessor +template +struct ExtractAccessor; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorMap +template +struct ExtractAccessor< + TensorEvaluator, Dev>> { + using actual_type = typename Eigen::internal::remove_all< + typename Eigen::internal::traits::Scalar>::type; + static inline auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, Dev> + eval) + -> decltype(utility::tuple::make_tuple( + (eval.device() + .template get_sycl_accessor( + eval.dimensions().TotalSize(), cgh, + eval.derived().data())))) { + return utility::tuple::make_tuple( + (eval.device() + .template get_sycl_accessor( + eval.dimensions().TotalSize(), cgh, eval.derived().data()))); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorMap +template +struct ExtractAccessor< + TensorEvaluator, Dev>> { + using actual_type = typename Eigen::internal::remove_all< + typename Eigen::internal::traits::Scalar>::type; + + static inline auto getTuple( + cl::sycl::handler& cgh, + TensorEvaluator, Dev> eval) + -> decltype(utility::tuple::make_tuple( + (eval.device() + .template get_sycl_accessor( + eval.dimensions().TotalSize(), cgh, + eval.derived().data())))) { + return utility::tuple::make_tuple( + (eval.device() + .template get_sycl_accessor( + eval.dimensions().TotalSize(), cgh, eval.derived().data()))); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorCwiseNullaryOp +template +struct ExtractAccessor< + TensorEvaluator, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, Dev> eval) + -> decltype(ExtractAccessor>::getTuple( + cgh, eval.impl())) { + auto RHSTuple = ExtractAccessor>::getTuple( + cgh, eval.impl()); + return RHSTuple; + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorCwiseNullaryOp +template +struct ExtractAccessor< + TensorEvaluator, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, Dev> eval) + -> decltype(ExtractAccessor>::getTuple( + cgh, eval.impl())) { + auto RHSTuple = ExtractAccessor>::getTuple( + cgh, eval.impl()); + return RHSTuple; + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorBroadcastingOp +template +struct ExtractAccessor< + TensorEvaluator, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, Dev> eval) + -> decltype(ExtractAccessor>::getTuple( + cgh, eval.impl())) { + auto RHSTuple = ExtractAccessor>::getTuple( + cgh, eval.impl()); + return RHSTuple; + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorBroadcastingOp +template +struct ExtractAccessor< + TensorEvaluator, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, Dev> eval) + -> decltype(ExtractAccessor>::getTuple( + cgh, eval.impl())) { + auto RHSTuple = ExtractAccessor>::getTuple( + cgh, eval.impl()); + return RHSTuple; + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TenosorCwiseUnary +template +struct ExtractAccessor< + TensorEvaluator, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, Dev> eval) + -> decltype(ExtractAccessor>::getTuple( + cgh, eval.impl())) { + auto RHSTuple = ExtractAccessor>::getTuple( + cgh, eval.impl()); + return RHSTuple; + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TenosorCwiseUnary +template +struct ExtractAccessor, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, Dev> eval) + -> decltype(ExtractAccessor>::getTuple( + cgh, eval.impl())) { + auto RHSTuple = ExtractAccessor>::getTuple( + cgh, eval.impl()); + return RHSTuple; + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorCwiseBinaryOp +template +struct ExtractAccessor< + TensorEvaluator, Dev>> { + static auto getTuple(cl::sycl::handler& cgh, + const TensorEvaluator< + const TensorCwiseBinaryOp, Dev> + eval) + -> decltype(utility::tuple::append( + ExtractAccessor>::getTuple( + cgh, eval.left_impl()), + ExtractAccessor>::getTuple( + cgh, eval.right_impl()))) { + auto LHSTuple = ExtractAccessor>::getTuple( + cgh, eval.left_impl()); + auto RHSTuple = ExtractAccessor>::getTuple( + cgh, eval.right_impl()); + return utility::tuple::append(LHSTuple, RHSTuple); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorCwiseBinaryOp +template +struct ExtractAccessor< + TensorEvaluator, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, Dev> + eval) + -> decltype(utility::tuple::append( + ExtractAccessor>::getTuple( + cgh, eval.left_impl()), + ExtractAccessor>::getTuple( + cgh, eval.right_impl()))) { + auto LHSTuple = ExtractAccessor>::getTuple( + cgh, eval.left_impl()); + auto RHSTuple = ExtractAccessor>::getTuple( + cgh, eval.right_impl()); + return utility::tuple::append(LHSTuple, RHSTuple); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorCwiseTernaryOp +template +struct ExtractAccessor, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator< + const TensorCwiseTernaryOp, Dev> + eval) + -> decltype(utility::tuple::append( + ExtractAccessor>::getTuple( + cgh, eval.arg1Impl()), + utility::tuple::append( + ExtractAccessor>::getTuple( + cgh, eval.arg2Impl()), + ExtractAccessor>::getTuple( + cgh, eval.arg3Impl())))) { + auto Arg1Tuple = ExtractAccessor>::getTuple( + cgh, eval.arg1Impl()); + auto Arg2Tuple = ExtractAccessor>::getTuple( + cgh, eval.arg2Impl()); + auto Arg3Tuple = ExtractAccessor>::getTuple( + cgh, eval.arg3Impl()); + return utility::tuple::append(Arg1Tuple, + utility::tuple::append(Arg2Tuple, Arg3Tuple)); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorCwiseTernaryOp +template +struct ExtractAccessor, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator< + TensorCwiseTernaryOp, Dev> + eval) + -> decltype(utility::tuple::append( + ExtractAccessor>::getTuple( + cgh, eval.arg1Impl()), + utility::tuple::append( + ExtractAccessor>::getTuple( + cgh, eval.arg2Impl()), + ExtractAccessor>::getTuple( + cgh, eval.arg3Impl())))) { + auto Arg1Tuple = ExtractAccessor>::getTuple( + cgh, eval.arg1Impl()); + auto Arg2Tuple = ExtractAccessor>::getTuple( + cgh, eval.arg2Impl()); + auto Arg3Tuple = ExtractAccessor>::getTuple( + cgh, eval.arg3Impl()); + return utility::tuple::append(Arg1Tuple, + utility::tuple::append(Arg2Tuple, Arg3Tuple)); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorCwiseSelectOp +template +struct ExtractAccessor< + TensorEvaluator, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, + Dev> + eval) + -> decltype(utility::tuple::append( + ExtractAccessor>::getTuple( + cgh, eval.cond_impl()), + utility::tuple::append( + ExtractAccessor>::getTuple( + cgh, eval.then_impl()), + ExtractAccessor>::getTuple( + cgh, eval.else_impl())))) { + auto IfTuple = ExtractAccessor>::getTuple( + cgh, eval.cond_impl()); + auto ThenTuple = ExtractAccessor>::getTuple( + cgh, eval.then_impl()); + auto ElseTuple = ExtractAccessor>::getTuple( + cgh, eval.else_impl()); + return utility::tuple::append(IfTuple, + utility::tuple::append(ThenTuple, ElseTuple)); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorCwiseSelectOp +template +struct ExtractAccessor< + TensorEvaluator, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, Dev> + eval) + -> decltype(utility::tuple::append( + ExtractAccessor>::getTuple( + cgh, eval.cond_impl()), + utility::tuple::append( + ExtractAccessor>::getTuple( + cgh, eval.then_impl()), + ExtractAccessor>::getTuple( + cgh, eval.else_impl())))) { + auto IfTuple = ExtractAccessor>::getTuple( + cgh, eval.cond_impl()); + auto ThenTuple = ExtractAccessor>::getTuple( + cgh, eval.then_impl()); + auto ElseTuple = ExtractAccessor>::getTuple( + cgh, eval.else_impl()); + return utility::tuple::append(IfTuple, + utility::tuple::append(ThenTuple, ElseTuple)); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorAssignOp +template +struct ExtractAccessor< + TensorEvaluator, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, Dev> eval) + -> decltype(utility::tuple::append( + ExtractAccessor>::getTuple( + cgh, eval.left_impl()), + ExtractAccessor>::getTuple( + cgh, eval.right_impl()))) { + auto LHSTuple = ExtractAccessor>::getTuple( + cgh, eval.left_impl()); + auto RHSTuple = ExtractAccessor>::getTuple( + cgh, eval.right_impl()); + return utility::tuple::append(LHSTuple, RHSTuple); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorAssignOp +template +struct ExtractAccessor, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, Dev> eval) + -> decltype(utility::tuple::append( + ExtractAccessor>::getTuple( + eval.left_impl()), + ExtractAccessor>::getTuple( + eval.right_impl()))) { + auto LHSTuple = ExtractAccessor>::getTuple( + cgh, eval.left_impl()); + auto RHSTuple = ExtractAccessor>::getTuple( + cgh, eval.right_impl()); + return utility::tuple::append(LHSTuple, RHSTuple); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorForcedEvalOp +template +struct ExtractAccessor, Dev>> { + using actual_type = + typename Eigen::internal::remove_all, Dev>::CoeffReturnType>::type; + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, Dev> eval) + -> decltype(utility::tuple::make_tuple( + (eval.device() + .template get_sycl_accessor( + eval.dimensions().TotalSize(), cgh, eval.data())))) { + return utility::tuple::make_tuple( + (eval.device() + .template get_sycl_accessor( + eval.dimensions().TotalSize(), cgh, eval.data()))); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorForcedEvalOp +template +struct ExtractAccessor, Dev>> + : ExtractAccessor, Dev>> {}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorEvalToOp +template +struct ExtractAccessor, Dev>> { + using actual_type = + typename Eigen::internal::remove_all, Dev>::CoeffReturnType>::type; + + static auto getTuple(cl::sycl::handler& cgh, + TensorEvaluator, Dev> eval) + -> decltype(utility::tuple::append( + utility::tuple::make_tuple( + (eval.device() + .template get_sycl_accessor( + eval.dimensions().TotalSize(), cgh, eval.data()))), + ExtractAccessor>::getTuple(cgh, + eval.impl()))) { + auto LHSTuple = utility::tuple::make_tuple( + (eval.device() + .template get_sycl_accessor( + eval.dimensions().TotalSize(), cgh, eval.data()))); + + auto RHSTuple = + ExtractAccessor>::getTuple(cgh, eval.impl()); + return utility::tuple::append(LHSTuple, RHSTuple); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorEvalToOp +template +struct ExtractAccessor, Dev>> + : ExtractAccessor, Dev>> {}; + +/// template deduction for \ref ExtractAccessor +template +auto createTupleOfAccessors(cl::sycl::handler& cgh, const Evaluator& expr) + -> decltype(ExtractAccessor::getTuple(cgh, expr)) { + return ExtractAccessor::getTuple(cgh, expr); +} +} +} +} +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXTRACT_ACCESSOR_HPP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h new file mode 100644 index 000000000..f69c5afcb --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h @@ -0,0 +1,313 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclextractFunctors.h + * + * \brief: + * Used to extract all the functors allocated to each node of the expression +*tree. + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXTRACT_FUNCTORS_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXTRACT_FUNCTORS_HPP + +namespace Eigen { +namespace TensorSycl { +namespace internal { +/// \struct FunctorExtractor: This struct is used to extract the functors +/// constructed on +/// the host-side, to pack them and reuse them in reconstruction of the +/// expression on the device. +/// We have to do that as in Eigen the functors are not stateless so we cannot +/// re-instantiate them on the device. +/// We have to pass whatever instantiated to the device. +template +struct FunctorExtractor; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorMap: +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + using Dimensions = typename PlainObjectType::Dimensions; + const Dimensions m_dimensions; + const Dimensions& dimensions() const { return m_dimensions; } + FunctorExtractor( + const TensorEvaluator, Dev>& expr) + : m_dimensions(expr.dimensions()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorMap +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + using Dimensions = typename PlainObjectType::Dimensions; + const Dimensions m_dimensions; + const Dimensions& dimensions() const { return m_dimensions; } + FunctorExtractor( + const TensorEvaluator, Dev>& + expr) + : m_dimensions(expr.dimensions()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorForcedEvalOp +template +struct FunctorExtractor, Dev>> { + using Dimensions = typename Expr::Dimensions; + const Dimensions m_dimensions; + const Dimensions& dimensions() const { return m_dimensions; } + FunctorExtractor(const TensorEvaluator, Dev>& expr) + : m_dimensions(expr.dimensions()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorForcedEvalOp +template +struct FunctorExtractor, Dev>> { + using Dimensions = + typename TensorEvaluator, Dev>::Dimensions; + const Dimensions m_dimensions; + const Dimensions& dimensions() const { return m_dimensions; } + FunctorExtractor( + const TensorEvaluator, Dev>& expr) + : m_dimensions(expr.dimensions()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorCwiseNullaryOp +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + FunctorExtractor> rhsExpr; + OP func; + FunctorExtractor( + TensorEvaluator, Dev>& expr) + : rhsExpr(expr.impl()), func(expr.functor()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorCwiseNullaryOp +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + FunctorExtractor> rhsExpr; + OP func; + FunctorExtractor( + const TensorEvaluator, Dev>& expr) + : rhsExpr(expr.impl()), func(expr.functor()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorBroadcastingOp +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + FunctorExtractor> rhsExpr; + OP func; + FunctorExtractor( + const TensorEvaluator, Dev>& expr) + : rhsExpr(expr.impl()), func(expr.functor()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorBroadcastingOp +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + FunctorExtractor> rhsExpr; + OP func; + FunctorExtractor( + const TensorEvaluator, Dev>& expr) + : rhsExpr(expr.impl()), func(expr.functor()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorCwiseUnaryOp +template +struct FunctorExtractor, Dev>> { + FunctorExtractor> rhsExpr; + OP func; + FunctorExtractor( + const TensorEvaluator, Dev>& expr) + : rhsExpr(expr.impl()), func(expr.functor()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorCwiseUnaryOp +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + FunctorExtractor> rhsExpr; + OP func; + FunctorExtractor( + const TensorEvaluator, Dev>& expr) + : rhsExpr(expr.impl()), func(expr.functor()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorCwiseBinaryOp +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + FunctorExtractor> lhsExpr; + FunctorExtractor> rhsExpr; + OP func; + FunctorExtractor( + const TensorEvaluator, Dev>& + expr) + : lhsExpr(expr.left_impl()), + rhsExpr(expr.right_impl()), + func(expr.functor()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorCwiseBinaryOp +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + FunctorExtractor> lhsExpr; + FunctorExtractor> rhsExpr; + OP func; + FunctorExtractor(const TensorEvaluator< + const TensorCwiseBinaryOp, Dev>& expr) + : lhsExpr(expr.left_impl()), + rhsExpr(expr.right_impl()), + func(expr.functor()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorCwiseTernaryOp +template +struct FunctorExtractor, Dev>> { + FunctorExtractor> arg1Expr; + FunctorExtractor> arg2Expr; + FunctorExtractor> arg3Expr; + OP func; + FunctorExtractor(const TensorEvaluator< + const TensorCwiseTernaryOp, + Dev>& expr) + : arg1Expr(expr.arg1Impl()), + arg2Expr(expr.arg2Impl()), + arg3Expr(expr.arg3Impl()), + func(expr.functor()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorCwiseTernaryOp +template +struct FunctorExtractor, Dev>> { + FunctorExtractor> arg1Expr; + FunctorExtractor> arg2Expr; + FunctorExtractor> arg3Expr; + OP func; + FunctorExtractor( + const TensorEvaluator< + TensorCwiseTernaryOp, Dev>& expr) + : arg1Expr(expr.arg1Impl()), + arg2Expr(expr.arg2Impl()), + arg3Expr(expr.arg3Impl()), + func(expr.functor()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorCwiseSelectOp +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + FunctorExtractor> ifExpr; + FunctorExtractor> thenExpr; + FunctorExtractor> elseExpr; + FunctorExtractor(const TensorEvaluator< + const TensorSelectOp, Dev>& expr) + : ifExpr(expr.cond_impl()), + thenExpr(expr.then_impl()), + elseExpr(expr.else_impl()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorCwiseSelectOp +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + FunctorExtractor ifExpr; + FunctorExtractor thenExpr; + FunctorExtractor elseExpr; + FunctorExtractor( + const TensorEvaluator, Dev>& + expr) + : ifExpr(expr.cond_impl()), + thenExpr(expr.then_impl()), + elseExpr(expr.else_impl()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorAssignOp +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + FunctorExtractor> lhsExpr; + FunctorExtractor> rhsExpr; + FunctorExtractor( + const TensorEvaluator, Dev>& expr) + : lhsExpr(expr.left_impl()), rhsExpr(expr.right_impl()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorAssignOp +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + FunctorExtractor> lhsExpr; + FunctorExtractor> rhsExpr; + FunctorExtractor( + const TensorEvaluator, Dev>& expr) + : lhsExpr(expr.left_impl()), rhsExpr(expr.right_impl()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorEvalToOp +template +struct FunctorExtractor, Dev>> { + FunctorExtractor> rhsExpr; + FunctorExtractor(const TensorEvaluator, Dev>& expr) + : rhsExpr(expr.impl()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorEvalToOp +template +struct FunctorExtractor, Dev>> { + FunctorExtractor> rhsExpr; + FunctorExtractor( + const TensorEvaluator, Dev>& expr) + : rhsExpr(expr.impl()) {} +}; + +/// template deduction function for FunctorExtractor +template +auto extractFunctors(const Evaluator& evaluator) + -> FunctorExtractor { + return FunctorExtractor(evaluator); +} +} // namespace internal +} // namespace TensorSycl +} // namespace Eigen + +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXTRACT_FUNCTORS_HPP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h new file mode 100644 index 000000000..77e0e15e1 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h @@ -0,0 +1,188 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclLeafCount.h + * + * \brief: + * The leaf count used the pre-order expression tree traverse in order to name + * count the number of leaf nodes in the expression + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_LEAF_COUNT_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_LEAF_COUNT_HPP + +namespace Eigen { +namespace TensorSycl { +namespace internal { +/// \brief LeafCount used to counting terminal nodes. The total number of +/// leaf nodes is used by MakePlaceHolderExprHelper to find the order +/// of the leaf node in a expression tree at compile time. +template +struct LeafCount; + +/// specialisation of the \ref LeafCount struct when the node type is const +/// TensorMap +template class MakePointer_> +struct LeafCount> { + static const size_t Count = 1; +}; + +/// specialisation of the \ref LeafCount struct when the node type is TensorMap +template class MakePointer_> +struct LeafCount> { + static const size_t Count = 1; +}; + +/// specialisation of the \ref LeafCount struct when the node type is const +/// TensorCwiseNullaryOp +template +struct LeafCount> { + static const size_t Count = LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is +/// TensorCwiseNullaryOp +template +struct LeafCount> { + static const size_t Count = LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is const +/// TensorBroadcastingOp +template +struct LeafCount> { + static const size_t Count = LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is +/// TensorCwiseNullaryOp +template +struct LeafCount> { + static const size_t Count = LeafCount::Count; +}; + +// TensorCwiseUnaryOp +template +struct LeafCount> { + static const size_t Count = LeafCount::Count; +}; + +// TensorCwiseUnaryOp +template +struct LeafCount> { + static const size_t Count = LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is const +/// TensorCwiseBinaryOp +template +struct LeafCount> { + static const size_t Count = + LeafCount::Count + LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is +/// TensorCwiseBinaryOp +template +struct LeafCount> { + static const size_t Count = + LeafCount::Count + LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is +/// TensorCwiseTernaryOp +template +struct LeafCount> { + static const size_t Count = LeafCount::Count + + LeafCount::Count + + LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is const +/// TensorCwiseTernaryOp +template +struct LeafCount> { + static const size_t Count = LeafCount::Count + + LeafCount::Count + + LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is +/// TensorCwiseSelectOp +template +struct LeafCount> { + static const size_t Count = LeafCount::Count + + LeafCount::Count + + LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is const +/// TensorCwiseSelectOp +template +struct LeafCount> { + static const size_t Count = LeafCount::Count + + LeafCount::Count + + LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is +/// TensorAssignOp +template +struct LeafCount> { + static const size_t Count = + LeafCount::Count + LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is const +/// TensorAssignOp +template +struct LeafCount> { + static const size_t Count = + LeafCount::Count + LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is const +/// TensorForcedEvalOp +template +struct LeafCount> { + static const size_t Count = 1; +}; + +/// specialisation of the \ref LeafCount struct when the node type is +/// TensorForcedEvalOp +template +struct LeafCount> { + static const size_t Count = 1; +}; + +/// specialisation of the \ref LeafCount struct when the node type is const +/// TensorEvalToOp +template +struct LeafCount> { + static const size_t Count = 1 + LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is +/// TensorEvalToOp +template +struct LeafCount> { + static const size_t Count = 1 + LeafCount::Count; +}; +} +} +} // namespace Eigen + +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_LEAF_COUNT_HPP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h new file mode 100644 index 000000000..87995a25e --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h @@ -0,0 +1,151 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclPlaceHolder.h + * + * \brief: + * The PlaceHolder expression are nothing but a container preserving + * the order of actual data in the tuple of sycl buffer. + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_PLACEHOLDER_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_PLACEHOLDER_HPP + +namespace Eigen { +namespace internal { +/// \struct PlaceHolder +/// \brief PlaceHolder is used to replace the \ref TensorMap in the expression +/// tree. +/// PlaceHolder contains the order of the leaf node in the expression tree. +template +struct PlaceHolder { + static constexpr size_t I = N; + using Type = Scalar; +}; + +template class MakePointer_, size_t N> +struct PlaceHolder, + N> { + static constexpr size_t I = N; + + using Type = const TensorMap; + + typedef typename Type::Self Self; + typedef typename Type::Base Base; + typedef typename Type::Nested Nested; + typedef typename Type::StorageKind StorageKind; + typedef typename Type::Index Index; + typedef typename Type::Scalar Scalar; + typedef typename Type::RealScalar RealScalar; + typedef typename Type::CoeffReturnType CoeffReturnType; +}; + +/// \brief specialisation of the PlaceHolder node for TensorForcedEvalOp. The +/// TensorForcedEvalOp act as a leaf node for its parent node. +template +struct PlaceHolder, N> { + static constexpr size_t I = N; + + using Type = const TensorForcedEvalOp; + + typedef typename Type::Nested Nested; + typedef typename Type::StorageKind StorageKind; + typedef typename Type::Index Index; + + typedef typename Type::Scalar Scalar; + typedef typename Type::Packet Packet; + + typedef typename Type::RealScalar RealScalar; + typedef typename Type::CoeffReturnType CoeffReturnType; + typedef typename Type::PacketReturnType PacketReturnType; +}; + +template +struct PlaceHolder, N> { + static constexpr size_t I = N; + + using Type = TensorForcedEvalOp; + + typedef typename Type::Nested Nested; + typedef typename Type::StorageKind StorageKind; + typedef typename Type::Index Index; + + typedef typename Type::Scalar Scalar; + typedef typename Type::Packet Packet; + + typedef typename Type::RealScalar RealScalar; + typedef typename Type::CoeffReturnType CoeffReturnType; + typedef typename Type::PacketReturnType PacketReturnType; +}; + +/// \brief specialisation of the PlaceHolder node for const TensorMap +template class Makepointer_, size_t N> +struct PlaceHolder, N> { + static constexpr size_t I = N; + + using Type = TensorMap; + + typedef typename Type::Self Self; + typedef typename Type::Base Base; + typedef typename Type::Nested Nested; + typedef typename Type::StorageKind StorageKind; + typedef typename Type::Index Index; + typedef typename Type::Scalar Scalar; + typedef typename Type::Packet Packet; + typedef typename Type::RealScalar RealScalar; + typedef typename Type::CoeffReturnType CoeffReturnType; + typedef typename Base::PacketReturnType PacketReturnType; +}; + +/// specialisation of the traits struct for PlaceHolder +template class Makepointer_, size_t N> +struct traits< + PlaceHolder, N>> + : public traits { + typedef traits BaseTraits; + typedef typename BaseTraits::Scalar Scalar; + typedef typename BaseTraits::StorageKind StorageKind; + typedef typename BaseTraits::Index Index; + static const int NumDimensions = BaseTraits::NumDimensions; + static const int Layout = BaseTraits::Layout; + enum { + Options = Options_, + Flags = BaseTraits::Flags, + }; +}; + +template class Makepointer_, size_t N> +struct traits< + PlaceHolder, N>> + : public traits { + typedef traits BaseTraits; + typedef typename BaseTraits::Scalar Scalar; + typedef typename BaseTraits::StorageKind StorageKind; + typedef typename BaseTraits::Index Index; + static const int NumDimensions = BaseTraits::NumDimensions; + static const int Layout = BaseTraits::Layout; + enum { + Options = Options_, + Flags = BaseTraits::Flags, + }; +}; + +} // end namespoace internal +} // end namespoace Eigen + +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_PLACEHOLDER_HPP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h new file mode 100644 index 000000000..dbd7a8544 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h @@ -0,0 +1,293 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclPlaceHolderExpr.h + * + * \brief: + * This is the specialisation of the placeholder expression based on the + * operation type + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_PLACEHOLDER_EXPR_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_PLACEHOLDER_EXPR_HPP + +namespace Eigen { +namespace TensorSycl { +namespace internal { +/// \sttruct PlaceHolderExpression +/// \brief it is used to create the PlaceHolder expression. The PlaceHolder +/// expression is a copy of expression type in which the TensorMap of the has +/// been replaced with PlaceHolder. +template +struct PlaceHolderExpression; + +/// specialisation of the \ref PlaceHolderExpression when the node is TensorMap +template class MakePointer_, size_t N> +struct PlaceHolderExpression< + Eigen::TensorMap, + Options2_, MakePointer_>, + N> { + using Type = Eigen::internal::PlaceHolder< + Eigen::TensorMap, + Options2_, MakePointer_>, + N>; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is const +/// TensorMap +template class MakePointer_, size_t N> +struct PlaceHolderExpression< + const Eigen::TensorMap, + Options2_, MakePointer_>, + N> { + using Type = const Eigen::internal::PlaceHolder< + const TensorMap, + Options2_, MakePointer_>, + N>; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorCwiseNullaryOp +template +struct PlaceHolderExpression, N> { + using RHSPlaceHolderType = typename PlaceHolderExpression::Type; + using Type = TensorCwiseNullaryOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is const +/// TensorCwiseNullaryOp +template +struct PlaceHolderExpression, N> { + using RHSPlaceHolderType = typename PlaceHolderExpression::Type; + using Type = const TensorCwiseNullaryOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorBroadcastingOp +template +struct PlaceHolderExpression, N> { + using RHSPlaceHolderType = typename PlaceHolderExpression::Type; + using Type = TensorBroadcastingOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is const +/// TensorBroadcastingOp +template +struct PlaceHolderExpression, N> { + using RHSPlaceHolderType = typename PlaceHolderExpression::Type; + using Type = const TensorBroadcastingOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorCwiseUnaryOp +template +struct PlaceHolderExpression, N> { + using RHSPlaceHolderType = typename PlaceHolderExpression::Type; + using Type = TensorCwiseUnaryOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is const +/// TensorCwiseUnaryOp +template +struct PlaceHolderExpression, N> { + using RHSPlaceHolderType = typename PlaceHolderExpression::Type; + using Type = const TensorCwiseUnaryOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorCwiseBinaryOp +template +struct PlaceHolderExpression, N> { + static const size_t RHSLeafCount = LeafCount::Count; + + using LHSPlaceHolderType = + typename PlaceHolderExpression::Type; + using RHSPlaceHolderType = typename PlaceHolderExpression::Type; + + using Type = TensorCwiseBinaryOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is const +/// TensorCwiseBinaryOp +template +struct PlaceHolderExpression, + N> { + static const size_t RHSLeafCount = LeafCount::Count; + + using LHSPlaceHolderType = + typename PlaceHolderExpression::Type; + using RHSPlaceHolderType = typename PlaceHolderExpression::Type; + + using Type = + const TensorCwiseBinaryOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is const +/// TensorCwiseSelectOp +template +struct PlaceHolderExpression< + const TensorCwiseTernaryOp, N> { + static const size_t Arg3LeafCount = LeafCount::Count; + static const size_t Arg2LeafCount = LeafCount::Count; + + using Arg1PlaceHolderType = + typename PlaceHolderExpression::Type; + using Arg2PlaceHolderType = + typename PlaceHolderExpression::Type; + + using Arg3PlaceHolderType = typename PlaceHolderExpression::Type; + + using Type = + const TensorCwiseTernaryOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorCwiseSelectOp +template +struct PlaceHolderExpression< + TensorCwiseTernaryOp, N> { + static const size_t Arg3LeafCount = LeafCount::Count; + static const size_t Arg2LeafCount = LeafCount::Count; + + using Arg1PlaceHolderType = + typename PlaceHolderExpression::Type; + using Arg2PlaceHolderType = + typename PlaceHolderExpression::Type; + + using Arg3PlaceHolderType = typename PlaceHolderExpression::Type; + + using Type = TensorCwiseTernaryOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is const +/// TensorCwiseSelectOp +template +struct PlaceHolderExpression, + N> { + static const size_t ElseLeafCount = LeafCount::Count; + static const size_t ThenLeafCount = LeafCount::Count; + + using IfPlaceHolderType = + typename PlaceHolderExpression::Type; + using ThenPlaceHolderType = + typename PlaceHolderExpression::Type; + + using ElsePlaceHolderType = typename PlaceHolderExpression::Type; + + using Type = const TensorSelectOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorCwiseSelectOp +template +struct PlaceHolderExpression, N> { + static const size_t ElseLeafCount = LeafCount::Count; + static const size_t ThenLeafCount = LeafCount::Count; + + using IfPlaceHolderType = + typename PlaceHolderExpression::Type; + using ThenPlaceHolderType = + typename PlaceHolderExpression::Type; + + using ElsePlaceHolderType = typename PlaceHolderExpression::Type; + + using Type = TensorSelectOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorAssignOp +template +struct PlaceHolderExpression, N> { + static const size_t RHSLeafCount = LeafCount::Count; + + using LHSPlaceHolderType = + typename PlaceHolderExpression::Type; + using RHSPlaceHolderType = typename PlaceHolderExpression::Type; + + using Type = TensorAssignOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is const +/// TensorAssignOp +template +struct PlaceHolderExpression, N> { + static const size_t RHSLeafCount = LeafCount::Count; + + using LHSPlaceHolderType = + typename PlaceHolderExpression::Type; + using RHSPlaceHolderType = typename PlaceHolderExpression::Type; + + using Type = const TensorAssignOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is const +/// TensorForcedEvalOp +template +struct PlaceHolderExpression, N> { + using Type = + const Eigen::internal::PlaceHolder, N>; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorForcedEvalOp +template +struct PlaceHolderExpression, N> { + using Type = Eigen::internal::PlaceHolder, N>; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is const +/// TensorEvalToOp +template +struct PlaceHolderExpression, N> { + static const size_t RHSLeafCount = LeafCount::Count; + + using RHSPlaceHolderType = typename PlaceHolderExpression::Type; + + using Type = const TensorEvalToOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorEvalToOp +template +struct PlaceHolderExpression, N> { + static const size_t RHSLeafCount = LeafCount::Count; + + using RHSPlaceHolderType = typename PlaceHolderExpression::Type; + + using Type = TensorEvalToOp; +}; + +/// template deduction for \ref PlaceHolderExpression struct +template +struct createPlaceHolderExpression { + static const size_t TotalLeaves = LeafCount::Count; + using Type = typename PlaceHolderExpression::Type; +}; +} +} +} // namespace Eigen + +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_PLACEHOLDER_EXPR_HPP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h new file mode 100644 index 000000000..3758d46a0 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h @@ -0,0 +1,84 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Cummins Chris PhD student at The University of Edinburgh. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclRun.h + * + * \brief: + * Schedule_kernel invoke an specialised version of kernel struct. The + * specialisation is based on the data dimension in sycl buffer + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORSYCL_SYCLRUN_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORSYCL_SYCLRUN_HPP + +namespace Eigen { +namespace TensorSycl { +/// The run function in tensor sycl convert the expression tree to a buffer +/// based expression tree; +/// creates the expression tree for the device with accessor to buffers; +/// construct the kernel and submit it to the sycl queue. +template +void run(Expr &expr, Dev &dev) { + Eigen::TensorEvaluator evaluator(expr, dev); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); + if (needs_assign) { + using PlaceHolderExpr = + typename internal::createPlaceHolderExpression::Type; + auto functors = internal::extractFunctors(evaluator); + + dev.m_queue.submit([&](cl::sycl::handler &cgh) { + + // create a tuple of accessors from Evaluator + auto tuple_of_accessors = + internal::createTupleOfAccessors(cgh, evaluator); + const auto range = + utility::tuple::get<0>(tuple_of_accessors).get_range()[0]; + + size_t outTileSize = range; + if (range > 64) outTileSize = 64; + size_t yMode = range % outTileSize; + int yRange = static_cast(range); + if (yMode != 0) yRange += (outTileSize - yMode); + + // run the kernel + cgh.parallel_for( + cl::sycl::nd_range<1>(cl::sycl::range<1>(yRange), + cl::sycl::range<1>(outTileSize)), + [=](cl::sycl::nd_item<1> itemID) { + using DevExpr = + typename internal::ConvertToDeviceExpression::Type; + + auto device_expr = + internal::createDeviceExpression( + functors, tuple_of_accessors); + auto device_evaluator = + Eigen::TensorEvaluator( + device_expr.expr, Eigen::DefaultDevice()); + + if (itemID.get_global_linear_id() < range) { + device_evaluator.evalScalar( + static_cast(itemID.get_global_linear_id())); + } + }); + }); + dev.m_queue.throw_asynchronous(); + } + evaluator.cleanup(); +} +} // namespace TensorSycl +} // namespace Eigen + +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORSYCL_SYCLRUN_HPP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h new file mode 100644 index 000000000..8b9fc52c4 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h @@ -0,0 +1,264 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensroSyclTuple.h + * + * \brief: + * Minimal implementation of std::tuple that can be used inside a SYCL kernel. + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORSYCL_TUPLE_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORSYCL_TUPLE_HPP +namespace utility { +namespace tuple { +/// \struct EnableIf +/// \brief The EnableIf struct is used to statically define type based on the +/// condition. +template +struct EnableIf {}; +/// \brief specialisation of the \ref EnableIf when the condition is true +template +struct EnableIf { + typedef T type; +}; + +/// \struct Tuple +/// \brief is a fixed-size collection of heterogeneous values +/// \ztparam Ts... - the types of the elements that the tuple stores. +/// Empty list is supported. +template +struct Tuple {}; + +/// \brief specialisation of the \ref Tuple class when the tuple has at least +/// one element. +/// \tparam T : the type of the first element in the tuple. +/// \tparam Ts... the rest of the elements in the tuple. Ts... can be empty. +template +struct Tuple { + Tuple(T t, Ts... ts) : head(t), tail(ts...) {} + + T head; + Tuple tail; +}; + +/// \struct ElemTypeHolder +/// \brief ElemTypeHolder class is used to specify the types of the +/// elements inside the tuple +/// \tparam size_t the number of elements inside the tuple +/// \tparam class the tuple class +template +struct ElemTypeHolder; + +/// \brief specialisation of the \ref ElemTypeHolder class when the number +/// elements inside the tuple is 1 +template +struct ElemTypeHolder<0, Tuple> { + typedef T type; +}; + +/// \brief specialisation of the \ref ElemTypeHolder class when the number of +/// elements inside the tuple is bigger than 1. It recursively call itself to +/// detect the type of each element in the tuple +/// \tparam T : the type of the first element in the tuple. +/// \tparam Ts... the rest of the elements in the tuple. Ts... can be empty. +/// \tparam K is the Kth element in the tuple +template +struct ElemTypeHolder> { + typedef typename ElemTypeHolder>::type type; +}; + +/// get +/// \brief Extracts the first element from the tuple. +/// K=0 represents the first element of the tuple. The tuple cannot be empty. +/// \tparam Ts... are the elements type in the tuple. +/// \param t is the tuple whose contents to extract +/// \return typename ElemTypeHolder<0, Tuple>::type &>::type +template +typename EnableIf>::type &>::type +get(Tuple &t) { + return t.head; +} +/// get +/// \brief Extracts the Kth element from the tuple. +/// \tparam K is an integer value in [0,sizeof...(Types)). +/// \tparam T is the (sizeof...(Types) -(K+1)) element in the tuple +/// \tparam Ts... are the elements type in the tuple. +/// \param t is the tuple whose contents to extract +/// \return typename ElemTypeHolder>::type &>::type +template +typename EnableIf>::type &>::type +get(Tuple &t) { + return get(t.tail); +} + +/// get +/// \brief Extracts the first element from the tuple when the tuple and all the +/// elements inside are const. +/// K=0 represents the first element of the tuple. The tuple cannot be empty. +/// \tparam Ts... are the elements type in the tuple. +/// \param t is the const tuple whose contents to extract +/// \return const typename ElemTypeHolder<0, Tuple>::type &>::type +template +typename EnableIf>::type &>::type +get(const Tuple &t) { + return t.head; +} + +/// get +/// \brief Extracts the Kth element from the tuple when the tuple and all the +/// elements inside are const. +/// \tparam K is an integer value in [0,sizeof...(Types)). +/// \tparam T is the (sizeof...(Types) -(K+1)) element in the tuple +/// \tparam Ts... are the elements type in the tuple. +/// \param t is the const tuple whose contents to extract +/// \return const typename ElemTypeHolder>::type &>::type +template +typename EnableIf< + k != 0, const typename ElemTypeHolder>::type &>::type +get(const Tuple &t) { + return get(t.tail); +} +/// make_tuple +/// \brief Creates a tuple object, deducing the target type from the types of +/// arguments. +/// \tparam Args the type of the arguments to construct the tuple from +/// \param args zero or more arguments to construct the tuple from +/// \return Tuple +template +Tuple make_tuple(Args... args) { + return Tuple(args...); +} + +/// size +/// \brief Provides access to the number of elements in a tuple as a +/// compile-time constant expression. +/// \tparam Args the type of the arguments to construct the tuple from +/// \return size_t +template +static constexpr size_t size(Tuple &) { + return sizeof...(Args); +} + +/// \struct Index_list +/// \brief Creates a list of index from the elements in the tuple +/// \tparam Is... a list of index from [0 to sizeof...(tuple elements)) +template +struct Index_list {}; + +/// \struct RangeBuilder +/// \brief Collects internal details for generating index ranges [MIN, MAX) +/// Declare primary template for index range builder +/// \tparam MIN is the starting index in the tuple +/// \tparam N represents sizeof..(elements)- sizeof...(Is) +/// \tparam Is... are the list of generated index so far +template +struct RangeBuilder; + +/// \brief base Step: Specialisation of the \ref RangeBuilder when the +/// MIN==MAX. In this case the Is... is [0 to sizeof...(tuple elements)) +/// \tparam MIN is the starting index of the tuple +/// \tparam Is is [0 to sizeof...(tuple elements)) +template +struct RangeBuilder { + typedef Index_list type; +}; + +/// Induction step: Specialisation of the RangeBuilder class when N!=MIN +/// in this case we are recursively subtracting the N by one and adding one +/// index to Is... list until MIN==N +/// \tparam MIN is the starting index in the tuple +/// \tparam N represents sizeof..(elements)- sizeof...(Is) +/// \tparam Is... are the list of generated index so far +template +struct RangeBuilder : public RangeBuilder {}; + +/// \brief IndexRange that returns a [MIN, MAX) index range +/// \tparam MIN is the starting index in the tuple +/// \tparam MAX is the size of the tuple +template +using Index_range = typename RangeBuilder::type; + +/// append_impl +/// \brief unpacking the elements of the input tuple t and creating a new tuple +/// by adding element a at the end of it. +/// \tparam Args... the type of the elements inside the tuple t +/// \tparam T the type of the new element going to be added at the end of tuple +/// \tparam I... is the list of index from [0 to sizeof...(t)) +/// \param t the tuple on which we want to append a. +/// \param a the new elements going to be added to the tuple +/// \return Tuple +template +Tuple append_impl(utility::tuple::Tuple t, T a, + utility::tuple::Index_list) { + return utility::tuple::make_tuple(get(t)..., a); +} + +/// append +/// \brief the deduction function for \ref append_impl that automatically +/// generate the \ref Index_range +/// \tparam Args... the type of the elements inside the tuple t +/// \tparam T the type of the new element going to be added at the end of tuple +/// \param t the tuple on which we want to append a. +/// \param a the new elements going to be added to the tuple +/// \return Tuple +template +Tuple append(Tuple t, T a) { + return utility::tuple::append_impl( + t, a, utility::tuple::Index_range<0, sizeof...(Args)>()); +} + +/// append_impl +/// \brief This is an specialised of \ref append_impl when we want to +/// concatenate +/// tuple t2 at the end of the tuple t1. Here we unpack both tuples, generate +/// the +/// Index_range for each of them and create an output tuple T that contains both +/// elements of t1 and t2. +/// \tparam Args1... the type of the elements inside the tuple t1 +/// \tparam Args2... the type of the elements inside the tuple t2 +/// \tparam I1... is the list of index from [0 to sizeof...(t1)) +/// \tparam I2... is the list of index from [0 to sizeof...(t2)) +/// \param t1 is the tuple on which we want to append t2. +/// \param t2 is the tuple that is going to be added on t1. +/// \return Tuple +template +Tuple append_impl(utility::tuple::Tuple t1, + utility::tuple::Tuple t2, + utility::tuple::Index_list, + utility::tuple::Index_list) { + return utility::tuple::make_tuple(utility::tuple::get(t1)..., + utility::tuple::get(t2)...); +} +/// append +/// \brief deduction function for \ref append_impl when we are appending tuple +/// t1 by tuple t2. In this case the \ref Index_range for both tuple are +/// automatically generated. +/// \tparam Args1... the type of the elements inside the tuple t1 +/// \tparam Args2... the type of the elements inside the tuple t2 +/// \param t1 is the tuple on which we want to append t2. +/// \param t2 is the tuple that is going to be added on t1. +/// \return Tuple +template +Tuple append(utility::tuple::Tuple t1, + utility::tuple::Tuple t2) { + return utility::tuple::append_impl( + t1, t2, utility::tuple::Index_range<0, sizeof...(Args1)>(), + utility::tuple::Index_range<0, sizeof...(Args2)>()); +} +} // tuple +} // utility +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORSYCL_TUPLE_HPP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h index b7597b3a5..62c5caf6c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h @@ -56,11 +56,12 @@ struct traits > Options = Options_, Flags = compute_tensor_flags::ret | (is_const::value ? 0 : LvalueBit) }; + template using MakePointer = MakePointer; }; -template -struct traits > +template class MakePointer_> +struct traits > { typedef Scalar_ Scalar; typedef Dense StorageKind; @@ -71,11 +72,12 @@ struct traits > Options = Options_, Flags = compute_tensor_flags::ret | (is_const::value ? 0: LvalueBit) }; + template using MakePointer = MakePointer_; }; -template -struct traits > +template class MakePointer_> +struct traits > : public traits { typedef traits BaseTraits; @@ -88,6 +90,7 @@ struct traits > Options = Options_, Flags = BaseTraits::Flags }; + template using MakePointer = MakePointer_; }; template diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 6188b421a..de9b5243a 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -138,6 +138,13 @@ endif() endif() if(EIGEN_TEST_CXX11) + if(EIGEN_TEST_SYCL) + ei_add_test_sycl(cxx11_tensor_sycl "-std=c++11") + ei_add_test_sycl(cxx11_tensor_sycl_forced_eval "-std=c++11") + ei_add_test_sycl(cxx11_tensor_sycl_broadcast "-std=c++11") + ei_add_test_sycl(cxx11_tensor_sycl_device "-std=c++11") + endif(EIGEN_TEST_SYCL) + # It should be safe to always run these tests as there is some fallback code for # older compiler that don't support cxx11. set(CMAKE_CXX_STANDARD 11) diff --git a/unsupported/test/cxx11_tensor_sycl.cpp b/unsupported/test/cxx11_tensor_sycl.cpp new file mode 100644 index 000000000..1ec9b1883 --- /dev/null +++ b/unsupported/test/cxx11_tensor_sycl.cpp @@ -0,0 +1,157 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_sycl +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_USE_SYCL + +#include "main.h" +#include + +using Eigen::array; +using Eigen::SyclDevice; +using Eigen::Tensor; +using Eigen::TensorMap; + +// Types used in tests: +using TestTensor = Tensor; +using TestTensorMap = TensorMap>; + +void test_sycl_cpu() { + cl::sycl::gpu_selector s; + cl::sycl::queue q(s, [=](cl::sycl::exception_list l) { + for (const auto& e : l) { + try { + std::rethrow_exception(e); + } catch (cl::sycl::exception e) { + std::cout << e.what() << std::endl; + } + } + }); + SyclDevice sycl_device(q); + + int sizeDim1 = 100; + int sizeDim2 = 100; + int sizeDim3 = 100; + array tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; + TestTensor in1(tensorRange); + TestTensor in2(tensorRange); + TestTensor in3(tensorRange); + TestTensor out(tensorRange); + in1 = in1.random(); + in2 = in2.random(); + in3 = in3.random(); + TestTensorMap gpu_in1(in1.data(), tensorRange); + TestTensorMap gpu_in2(in2.data(), tensorRange); + TestTensorMap gpu_in3(in3.data(), tensorRange); + TestTensorMap gpu_out(out.data(), tensorRange); + + /// a=1.2f + gpu_in1.device(sycl_device) = gpu_in1.constant(1.2f); + sycl_device.deallocate(in1.data()); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(in1(i,j,k), 1.2f); + } + } + } + printf("a=1.2f Test passed\n"); + + /// a=b*1.2f + gpu_out.device(sycl_device) = gpu_in1 * 1.2f; + sycl_device.deallocate(out.data()); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(out(i,j,k), + in1(i,j,k) * 1.2f); + } + } + } + printf("a=b*1.2f Test Passed\n"); + + /// c=a*b + gpu_out.device(sycl_device) = gpu_in1 * gpu_in2; + sycl_device.deallocate(out.data()); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(out(i,j,k), + in1(i,j,k) * + in2(i,j,k)); + } + } + } + printf("c=a*b Test Passed\n"); + + /// c=a+b + gpu_out.device(sycl_device) = gpu_in1 + gpu_in2; + sycl_device.deallocate(out.data()); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(out(i,j,k), + in1(i,j,k) + + in2(i,j,k)); + } + } + } + printf("c=a+b Test Passed\n"); + + /// c=a*a + gpu_out.device(sycl_device) = gpu_in1 * gpu_in1; + sycl_device.deallocate(out.data()); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(out(i,j,k), + in1(i,j,k) * + in1(i,j,k)); + } + } + } + + printf("c= a*a Test Passed\n"); + + //a*3.14f + b*2.7f + gpu_out.device(sycl_device) = gpu_in1 * gpu_in1.constant(3.14f) + gpu_in2 * gpu_in2.constant(2.7f); + sycl_device.deallocate(out.data()); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(out(i,j,k), + in1(i,j,k) * 3.14f + + in2(i,j,k) * 2.7f); + } + } + } + printf("a*3.14f + b*2.7f Test Passed\n"); + + ///d= (a>0.5? b:c) + gpu_out.device(sycl_device) =(gpu_in1 > gpu_in1.constant(0.5f)).select(gpu_in2, gpu_in3); + sycl_device.deallocate(out.data()); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(out(i, j, k), (in1(i, j, k) > 0.5f) + ? in2(i, j, k) + : in3(i, j, k)); + } + } + } + printf("d= (a>0.5? b:c) Test Passed\n"); + +} +void test_cxx11_tensor_sycl() { + CALL_SUBTEST(test_sycl_cpu()); +} diff --git a/unsupported/test/cxx11_tensor_sycl_broadcast.cpp b/unsupported/test/cxx11_tensor_sycl_broadcast.cpp new file mode 100644 index 000000000..1babbc038 --- /dev/null +++ b/unsupported/test/cxx11_tensor_sycl_broadcast.cpp @@ -0,0 +1,76 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_sycl_broadcast +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_USE_SYCL + +#include "main.h" +#include + +using Eigen::array; +using Eigen::SyclDevice; +using Eigen::Tensor; +using Eigen::TensorMap; + +// Types used in tests: +using TestTensor = Tensor; +using TestTensorMap = TensorMap>; +static void test_sycl_broadcast(){ + + cl::sycl::gpu_selector s; + cl::sycl::queue q(s, [=](cl::sycl::exception_list l) { + for (const auto& e : l) { + try { + std::rethrow_exception(e); + } catch (cl::sycl::exception e) { + std::cout << e.what() << std::endl; + } + } + }); + SyclDevice sycl_device(q); + // BROADCAST test: + array in_range = {{2, 3, 5, 7}}; + array broadcasts = {{2, 3, 1, 4}}; + array out_range; // = in_range * broadcasts + for (size_t i = 0; i < out_range.size(); ++i) + out_range[i] = in_range[i] * broadcasts[i]; + + Tensor input(in_range); + Tensor output(out_range); + + for (int i = 0; i < input.size(); ++i) + input(i) = static_cast(i); + + TensorMap gpu_in(input.data(), in_range); + TensorMap gpu_out(output.data(), out_range); + gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts); + sycl_device.deallocate(output.data()); + + for (size_t i = 0; i < in_range.size(); ++i) + VERIFY_IS_EQUAL(output.dimension(i), out_range[i]); + + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 9; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 28; ++l) { + VERIFY_IS_APPROX(input(i%2,j%3,k%5,l%7), output(i,j,k,l)); + } + } + } + } + printf("Broadcast Test Passed\n"); +} + +void test_cxx11_tensor_sycl_broadcast() { + CALL_SUBTEST(test_sycl_broadcast()); +} diff --git a/unsupported/test/cxx11_tensor_sycl_device.cpp b/unsupported/test/cxx11_tensor_sycl_device.cpp new file mode 100644 index 000000000..2c1c17972 --- /dev/null +++ b/unsupported/test/cxx11_tensor_sycl_device.cpp @@ -0,0 +1,37 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_sycl_device +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_USE_SYCL + +#include "main.h" +#include + + +void test_sycl_device() { + cl::sycl::gpu_selector s; + cl::sycl::queue q(s, [=](cl::sycl::exception_list l) { + for (const auto& e : l) { + try { + std::rethrow_exception(e); + } catch (cl::sycl::exception e) { + std::cout << e.what() << std::endl; + } + } + }); + SyclDevice sycl_device(q); + printf("Helo from ComputeCpp: Device Exists\n"); +} +void test_cxx11_tensor_sycl_device() { + CALL_SUBTEST(test_sycl_device()); +} diff --git a/unsupported/test/cxx11_tensor_sycl_forced_eval.cpp b/unsupported/test/cxx11_tensor_sycl_forced_eval.cpp new file mode 100644 index 000000000..ee934d4fa --- /dev/null +++ b/unsupported/test/cxx11_tensor_sycl_forced_eval.cpp @@ -0,0 +1,64 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_sycl_forced_eval +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_USE_SYCL + +#include "main.h" +#include + +using Eigen::Tensor; + +void test_sycl_gpu() { + cl::sycl::gpu_selector s; + cl::sycl::queue q(s, [=](cl::sycl::exception_list l) { + for (const auto& e : l) { + try { + std::rethrow_exception(e); + } catch (cl::sycl::exception e) { + std::cout << e.what() << std::endl; + } + } + }); + SyclDevice sycl_device(q); + + int sizeDim1 = 100; + int sizeDim2 = 200; + int sizeDim3 = 200; + Eigen::array tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; + Eigen::Tensor in1(tensorRange); + Eigen::Tensor in2(tensorRange); + Eigen::Tensor out(tensorRange); + + in1 = in1.random() + in1.constant(10.0f); + in2 = in2.random() + in2.constant(10.0f); + + // creating TensorMap from tensor + Eigen::TensorMap> gpu_in1(in1.data(), tensorRange); + Eigen::TensorMap> gpu_in2(in2.data(), tensorRange); + Eigen::TensorMap> gpu_out(out.data(), tensorRange); + + /// c=(a+b)*b + gpu_out.device(sycl_device) =(gpu_in1 + gpu_in2).eval() * gpu_in2; + sycl_device.deallocate(out.data()); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(out(i, j, k), + (in1(i, j, k) + in2(i, j, k)) * in2(i, j, k)); + } + } + } + printf("(a+b)*b Test Passed\n"); +} + +void test_cxx11_tensor_sycl_forced_eval() { CALL_SUBTEST(test_sycl_gpu()); } -- cgit v1.2.3