aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt7
-rw-r--r--Eigen/Core35
-rw-r--r--bench/tensors/README8
-rw-r--r--bench/tensors/tensor_benchmarks_sycl.cc37
-rw-r--r--cmake/EigenTesting.cmake135
-rw-r--r--cmake/FindComputeCpp.cmake228
-rw-r--r--unsupported/Eigen/CXX11/Tensor2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h5
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBase.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h7
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h122
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h56
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h58
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h14
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h49
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h19
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorMap.h20
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h62
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h238
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h495
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h466
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h313
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h188
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h151
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h293
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h84
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h264
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h11
-rw-r--r--unsupported/test/CMakeLists.txt7
-rw-r--r--unsupported/test/cxx11_tensor_sycl.cpp157
-rw-r--r--unsupported/test/cxx11_tensor_sycl_broadcast.cpp76
-rw-r--r--unsupported/test/cxx11_tensor_sycl_device.cpp37
-rw-r--r--unsupported/test/cxx11_tensor_sycl_forced_eval.cpp64
34 files changed, 3652 insertions, 64 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 76ec09ea0..812997a29 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -429,6 +429,13 @@ else()
add_subdirectory(lapack EXCLUDE_FROM_ALL)
endif()
+# add SYCL
+option(EIGEN_TEST_SYCL "Add Sycl support." OFF)
+if(EIGEN_TEST_SYCL)
+ set (CMAKE_MODULE_PATH "${CMAKE_ROOT}/Modules" "cmake/Modules/" "${CMAKE_MODULE_PATH}")
+ include(FindComputeCpp)
+endif()
+
add_subdirectory(unsupported)
add_subdirectory(demos EXCLUDE_FROM_ALL)
diff --git a/Eigen/Core b/Eigen/Core
index 946ed0677..3d2152acf 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -51,7 +51,40 @@
#define EIGEN_USING_STD_MATH(FUNC) using std::FUNC;
#endif
-#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS)
+#ifdef EIGEN_USE_SYCL
+#undef min
+#undef max
+#undef isnan
+#undef isinf
+#undef isfinite
+#include <SYCL/sycl.hpp>
+#endif
+
+// We need these predefines to determine if asserts need to be disabled for the device compiler
+#if defined(__SYCL_DEVICE_ONLY__)
+ // Do not try asserts on SYCL!
+ #ifndef EIGEN_NO_DEBUG
+ #define EIGEN_NO_DEBUG
+ #endif
+
+ #ifdef EIGEN_INTERNAL_DEBUGGING
+ #undef EIGEN_INTERNAL_DEBUGGING
+ #endif
+
+ // Do not try to vectorize on SYCL!
+ #ifndef EIGEN_DONT_VECTORIZE
+ #define EIGEN_DONT_VECTORIZE
+ #endif
+
+ #ifdef EIGEN_EXCEPTIONS
+ #undef EIGEN_EXCEPTIONS
+ #endif
+
+ #define EIGEN_DEVICE_FUNC
+
+#endif
+
+#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL)
#define EIGEN_EXCEPTIONS
#endif
diff --git a/bench/tensors/README b/bench/tensors/README
index 803cb8ef8..3a5fdbe17 100644
--- a/bench/tensors/README
+++ b/bench/tensors/README
@@ -11,5 +11,11 @@ nvcc tensor_benchmarks_gpu.cu benchmark_main.cc -I ../../ -std=c++11 -O2 -DNDEBU
We also provide a version of the generic GPU tensor benchmarks that uses half floats (aka fp16) instead of regular floats. To compile these benchmarks, simply call the command line below. You'll need a recent GPU that supports compute capability 5.3 or higher to run them and nvcc 7.5 or higher to compile the code.
nvcc tensor_benchmarks_fp16_gpu.cu benchmark_main.cc -I ../../ -std=c++11 -O2 -DNDEBUG -use_fast_math -ftz=true -arch compute_53 -o benchmarks_fp16_gpu
-last but not least, we also provide a suite of benchmarks to measure the scalability of the contraction code on CPU. To compile these benchmarks, call
+last but not least, we also provide a suite of benchmarks to measure the scalability of the contraction code on CPU. To compile these benchmarks, call
g++ contraction_benchmarks_cpu.cc benchmark_main.cc -I ../../ -std=c++11 -O3 -DNDEBUG -pthread -mavx -o benchmarks_cpu
+
+To compile the benchmark for SYCL, using ComputeCpp you currently need 2 passes (only for translation units containing device code):
+1. The device compilation pass that generates the device code (SYCL kernels and referenced device functions) and glue code needed by the host compiler to reference the device code from host code.
+{ComputeCpp_ROOT}/bin/compute++ -I ../../ -I {ComputeCpp_ROOT}/include/ -std=c++11 -mllvm -inline-threshold=1000 -Wno-ignored-attributes -sycl -intelspirmetadata -emit-llvm -no-serial-memop -sycl-compress-name -DBUILD_PLATFORM_SPIR -DNDBUG -O3 -c tensor_benchmarks_sycl.cc
+2. The host compilation pass that generates the final host binary.
+clang++-3.7 -include tensor_benchmarks_sycl.sycl benchmark_main.cc tensor_benchmarks_sycl.cc -pthread -I ../../ -I {ComputeCpp_ROOT}/include/ -L {ComputeCpp_ROOT}/lib/ -lComputeCpp -lOpenCL -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++11 -o tensor_benchmark_sycl
diff --git a/bench/tensors/tensor_benchmarks_sycl.cc b/bench/tensors/tensor_benchmarks_sycl.cc
new file mode 100644
index 000000000..7eca4d966
--- /dev/null
+++ b/bench/tensors/tensor_benchmarks_sycl.cc
@@ -0,0 +1,37 @@
+#define EIGEN_USE_SYCL
+
+#include <SYCL/sycl.hpp>
+#include <iostream>
+
+#include "tensor_benchmarks.h"
+
+using Eigen::array;
+using Eigen::SyclDevice;
+using Eigen::Tensor;
+using Eigen::TensorMap;
+// Simple functions
+template <typename device_selector>
+cl::sycl::queue sycl_queue() {
+ return cl::sycl::queue(device_selector(), [=](cl::sycl::exception_list l) {
+ for (const auto& e : l) {
+ try {
+ std::rethrow_exception(e);
+ } catch (cl::sycl::exception e) {
+ std::cout << e.what() << std::endl;
+ }
+ }
+ });
+}
+
+#define BM_FuncGPU(FUNC) \
+ static void BM_##FUNC(int iters, int N) { \
+ StopBenchmarkTiming(); \
+ cl::sycl::queue q = sycl_queue<cl::sycl::gpu_selector>(); \
+ Eigen::SyclDevice device(q); \
+ BenchmarkSuite<Eigen::SyclDevice, float> suite(device, N); \
+ suite.FUNC(iters); \
+ } \
+ BENCHMARK_RANGE(BM_##FUNC, 10, 5000);
+
+BM_FuncGPU(broadcasting);
+BM_FuncGPU(coeffWiseOp);
diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake
index 57866d865..602ab5271 100644
--- a/cmake/EigenTesting.cmake
+++ b/cmake/EigenTesting.cmake
@@ -109,6 +109,103 @@ macro(ei_add_test_internal testname testname_with_suffix)
endmacro(ei_add_test_internal)
+# SYCL
+macro(ei_add_test_internal_sycl testname testname_with_suffix)
+ include_directories( SYSTEM ${COMPUTECPP_PACKAGE_ROOT_DIR}/include)
+ set(targetname ${testname_with_suffix})
+
+ if(EIGEN_ADD_TEST_FILENAME_EXTENSION)
+ set(filename ${testname}.${EIGEN_ADD_TEST_FILENAME_EXTENSION})
+ else()
+ set(filename ${testname}.cpp)
+ endif()
+
+ set( include_file ${CMAKE_CURRENT_BINARY_DIR}/inc_${filename})
+ set( bc_file ${CMAKE_CURRENT_BINARY_DIR}/${filename})
+ set( host_file ${CMAKE_CURRENT_SOURCE_DIR}/${filename})
+
+ ADD_CUSTOM_COMMAND(
+ OUTPUT ${include_file}
+ COMMAND ${CMAKE_COMMAND} -E echo "\\#include \\\"${host_file}\\\"" > ${include_file}
+ COMMAND ${CMAKE_COMMAND} -E echo "\\#include \\\"${bc_file}.sycl\\\"" >> ${include_file}
+ DEPENDS ${filename}
+ COMMENT "Building ComputeCpp integration header file ${include_file}"
+ )
+ # Add a custom target for the generated integration header
+ add_custom_target(${testname}_integration_header_woho DEPENDS ${include_file})
+
+ add_executable(${targetname} ${include_file})
+ add_dependencies(${targetname} ${testname}_integration_header_woho)
+ add_sycl_to_target(${targetname} ${filename} ${CMAKE_CURRENT_BINARY_DIR})
+
+ if (targetname MATCHES "^eigen2_")
+ add_dependencies(eigen2_buildtests ${targetname})
+ else()
+ add_dependencies(buildtests ${targetname})
+ endif()
+
+ if(EIGEN_NO_ASSERTION_CHECKING)
+ ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_NO_ASSERTION_CHECKING=1")
+ else(EIGEN_NO_ASSERTION_CHECKING)
+ if(EIGEN_DEBUG_ASSERTS)
+ ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_DEBUG_ASSERTS=1")
+ endif(EIGEN_DEBUG_ASSERTS)
+ endif(EIGEN_NO_ASSERTION_CHECKING)
+
+ ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_TEST_MAX_SIZE=${EIGEN_TEST_MAX_SIZE}")
+
+ ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_TEST_FUNC=${testname}")
+
+ if(MSVC AND NOT EIGEN_SPLIT_LARGE_TESTS)
+ ei_add_target_property(${targetname} COMPILE_FLAGS "/bigobj")
+ endif()
+
+ # let the user pass flags.
+ if(${ARGC} GREATER 2)
+ ei_add_target_property(${targetname} COMPILE_FLAGS "${ARGV2}")
+ endif(${ARGC} GREATER 2)
+
+ if(EIGEN_TEST_CUSTOM_CXX_FLAGS)
+ ei_add_target_property(${targetname} COMPILE_FLAGS "${EIGEN_TEST_CUSTOM_CXX_FLAGS}")
+ endif()
+
+ if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO)
+ target_link_libraries(${targetname} ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO})
+ endif()
+ if(EXTERNAL_LIBS)
+ target_link_libraries(${targetname} ${EXTERNAL_LIBS})
+ endif()
+ if(EIGEN_TEST_CUSTOM_LINKER_FLAGS)
+ target_link_libraries(${targetname} ${EIGEN_TEST_CUSTOM_LINKER_FLAGS})
+ endif()
+
+ if(${ARGC} GREATER 3)
+ set(libs_to_link ${ARGV3})
+ # it could be that some cmake module provides a bad library string " " (just spaces),
+ # and that severely breaks target_link_libraries ("can't link to -l-lstdc++" errors).
+ # so we check for strings containing only spaces.
+ string(STRIP "${libs_to_link}" libs_to_link_stripped)
+ string(LENGTH "${libs_to_link_stripped}" libs_to_link_stripped_length)
+ if(${libs_to_link_stripped_length} GREATER 0)
+ # notice: no double quotes around ${libs_to_link} here. It may be a list.
+ target_link_libraries(${targetname} ${libs_to_link})
+ endif()
+ endif()
+
+ add_test(${testname_with_suffix} "${targetname}")
+
+ # Specify target and test labels according to EIGEN_CURRENT_SUBPROJECT
+ get_property(current_subproject GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT)
+ if ((current_subproject) AND (NOT (current_subproject STREQUAL "")))
+ set_property(TARGET ${targetname} PROPERTY LABELS "Build${current_subproject}")
+ add_dependencies("Build${current_subproject}" ${targetname})
+ set_property(TEST ${testname_with_suffix} PROPERTY LABELS "${current_subproject}")
+ endif()
+
+
+endmacro(ei_add_test_internal_sycl)
+
+
# Macro to add a test
#
# the unique mandatory parameter testname must correspond to a file
@@ -185,6 +282,39 @@ macro(ei_add_test testname)
endif(EIGEN_SPLIT_LARGE_TESTS AND suffixes)
endmacro(ei_add_test)
+macro(ei_add_test_sycl testname)
+ get_property(EIGEN_TESTS_LIST GLOBAL PROPERTY EIGEN_TESTS_LIST)
+ set(EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}${testname}\n")
+ set_property(GLOBAL PROPERTY EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}")
+
+ if(EIGEN_ADD_TEST_FILENAME_EXTENSION)
+ set(filename ${testname}.${EIGEN_ADD_TEST_FILENAME_EXTENSION})
+ else()
+ set(filename ${testname}.cpp)
+ endif()
+
+ file(READ "${filename}" test_source)
+ set(parts 0)
+ string(REGEX MATCHALL "CALL_SUBTEST_[0-9]+|EIGEN_TEST_PART_[0-9]+|EIGEN_SUFFIXES(;[0-9]+)+"
+ occurences "${test_source}")
+ string(REGEX REPLACE "CALL_SUBTEST_|EIGEN_TEST_PART_|EIGEN_SUFFIXES" "" suffixes "${occurences}")
+ list(REMOVE_DUPLICATES suffixes)
+ if(EIGEN_SPLIT_LARGE_TESTS AND suffixes)
+ add_custom_target(${testname})
+ foreach(suffix ${suffixes})
+ ei_add_test_internal_sycl(${testname} ${testname}_${suffix}
+ "${ARGV1} -DEIGEN_TEST_PART_${suffix}=1" "${ARGV2}")
+ add_dependencies(${testname} ${testname}_${suffix})
+ endforeach(suffix)
+ else(EIGEN_SPLIT_LARGE_TESTS AND suffixes)
+ set(symbols_to_enable_all_parts "")
+ foreach(suffix ${suffixes})
+ set(symbols_to_enable_all_parts
+ "${symbols_to_enable_all_parts} -DEIGEN_TEST_PART_${suffix}=1")
+ endforeach(suffix)
+ ei_add_test_internal_sycl(${testname} ${testname} "${ARGV1} ${symbols_to_enable_all_parts}" "${ARGV2}")
+ endif(EIGEN_SPLIT_LARGE_TESTS AND suffixes)
+endmacro(ei_add_test_sycl)
# adds a failtest, i.e. a test that succeed if the program fails to compile
# note that the test runner for these is CMake itself, when passed -DEIGEN_FAILTEST=ON
@@ -330,6 +460,11 @@ macro(ei_testing_print_summary)
message(STATUS "C++11: OFF")
endif()
+ if(EIGEN_TEST_SYCL)
+ message(STATUS "SYCL: ON")
+ else()
+ message(STATUS "SYCL: OFF")
+ endif()
if(EIGEN_TEST_CUDA)
if(EIGEN_TEST_CUDA_CLANG)
message(STATUS "CUDA: ON (using clang)")
diff --git a/cmake/FindComputeCpp.cmake b/cmake/FindComputeCpp.cmake
new file mode 100644
index 000000000..3aab5b833
--- /dev/null
+++ b/cmake/FindComputeCpp.cmake
@@ -0,0 +1,228 @@
+#.rst:
+# FindComputeCpp
+#---------------
+
+#########################
+# FindComputeCpp.cmake
+#########################
+#
+# Tools for finding and building with ComputeCpp.
+#
+
+# Require CMake version 3.2.2 or higher
+cmake_minimum_required(VERSION 3.2.2)
+
+# Check that a supported host compiler can be found
+if(CMAKE_COMPILER_IS_GNUCXX)
+ # Require at least gcc 4.8
+ if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8)
+ message(FATAL_ERROR
+ "host compiler - Not found! (gcc version must be at least 4.8)")
+ # Require the GCC dual ABI to be disabled for 5.1 or higher
+ elseif (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.1)
+ set(COMPUTECPP_DISABLE_GCC_DUAL_ABI "True")
+ message(STATUS
+ "host compiler - gcc ${CMAKE_CXX_COMPILER_VERSION} (note pre 5.1 gcc ABI enabled)")
+ else()
+ message(STATUS "host compiler - gcc ${CMAKE_CXX_COMPILER_VERSION}")
+ endif()
+elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
+ # Require at least clang 3.6
+ if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.6)
+ message(FATAL_ERROR
+ "host compiler - Not found! (clang version must be at least 3.6)")
+ else()
+ set(COMPUTECPP_DISABLE_GCC_DUAL_ABI "True")
+ message(STATUS "host compiler - clang ${CMAKE_CXX_COMPILER_VERSION}")
+ endif()
+else()
+ message(WARNING
+ "host compiler - Not found! (ComputeCpp supports GCC and Clang, see readme)")
+endif()
+
+set(COMPUTECPP_64_BIT_DEFAULT ON)
+option(COMPUTECPP_64_BIT_CODE "Compile device code in 64 bit mode"
+ ${COMPUTECPP_64_BIT_DEFAULT})
+mark_as_advanced(COMPUTECPP_64_BIT_CODE)
+
+# Find OpenCL package
+find_package(OpenCL REQUIRED)
+
+# Find ComputeCpp package
+if(EXISTS ${COMPUTECPP_PACKAGE_ROOT_DIR})
+ message(STATUS "ComputeCpp package - Found (${COMPUTECPP_PACKAGE_ROOT_DIR})")
+else()
+ message(FATAL_ERROR "ComputeCpp package - Not found! (please set COMPUTECPP_PACKAGE_ROOT_DIR) (${COMPUTECPP_PACKAGE_ROOT_DIR})")
+endif()
+option(COMPUTECPP_PACKAGE_ROOT_DIR "Path to the ComputeCpp Package")
+
+# Obtain the path to compute++
+find_program(COMPUTECPP_DEVICE_COMPILER compute++ PATHS
+ ${COMPUTECPP_PACKAGE_ROOT_DIR} PATH_SUFFIXES bin)
+if (EXISTS ${COMPUTECPP_DEVICE_COMPILER})
+ mark_as_advanced(COMPUTECPP_DEVICE_COMPILER)
+ message(STATUS "compute++ - Found (${COMPUTECPP_PACKAGE_ROOT_DIR})")
+else()
+ message(FATAL_ERROR "compute++ - Not found! (${COMPUTECPP_DEVICE_COMPILER}) (${COMPUTECPP_PACKAGE_ROOT_DIR})")
+endif()
+
+# Obtain the path to computecpp_info
+find_program(COMPUTECPP_INFO_TOOL computecpp_info PATHS
+ ${COMPUTECPP_PACKAGE_ROOT_DIR} PATH_SUFFIXES bin)
+if (EXISTS ${COMPUTECPP_INFO_TOOL})
+ mark_as_advanced(${COMPUTECPP_INFO_TOOL})
+ message(STATUS "computecpp_info - Found (${COMPUTECPP_PACKAGE_ROOT_DIR})")
+else()
+ message(FATAL_ERROR "computecpp_info - Not found! (${COMPUTECPP_INFO_TOOL}) (${COMPUTECPP_PACKAGE_ROOT_DIR})")
+endif()
+
+# Obtain the path to the ComputeCpp runtime library
+find_library(COMPUTECPP_RUNTIME_LIBRARY ComputeCpp PATHS ${COMPUTECPP_PACKAGE_ROOT_DIR}
+ HINTS ${COMPUTECPP_PACKAGE_ROOT_DIR}/lib PATH_SUFFIXES lib
+ DOC "ComputeCpp Runtime Library" NO_DEFAULT_PATH)
+
+if (EXISTS ${COMPUTECPP_RUNTIME_LIBRARY})
+ mark_as_advanced(COMPUTECPP_RUNTIME_LIBRARY)
+ message(STATUS "libComputeCpp.so - Found")
+else()
+ message(FATAL_ERROR "libComputeCpp.so - Not found! (${COMPUTECPP_PACKAGE_ROOT_DIR})")
+endif()
+
+# Obtain the ComputeCpp include directory
+set(COMPUTECPP_INCLUDE_DIRECTORY ${COMPUTECPP_PACKAGE_ROOT_DIR}/include/)
+if (NOT EXISTS ${COMPUTECPP_INCLUDE_DIRECTORY})
+ message(FATAL_ERROR "ComputeCpp includes - Not found! (${COMPUTECPP_PACKAGE_ROOT_DIR}/include/)")
+else()
+ message(STATUS "ComputeCpp includes - Found (${COMPUTECPP_PACKAGE_ROOT_DIR})")
+endif()
+
+# Obtain the package version
+execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-version"
+ OUTPUT_VARIABLE COMPUTECPP_PACKAGE_VERSION
+ RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE)
+if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0")
+ message(FATAL_ERROR "Package version - Error obtaining version!")
+else()
+ mark_as_advanced(COMPUTECPP_PACKAGE_VERSION)
+ message(STATUS "Package version - ${COMPUTECPP_PACKAGE_VERSION}")
+endif()
+
+# Obtain the device compiler flags
+execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-device-compiler-flags"
+ OUTPUT_VARIABLE COMPUTECPP_DEVICE_COMPILER_FLAGS
+ RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE)
+if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0")
+ message(FATAL_ERROR "compute++ flags - Error obtaining compute++ flags!")
+else()
+ mark_as_advanced(COMPUTECPP_COMPILER_FLAGS)
+ message(STATUS "compute++ flags - ${COMPUTECPP_DEVICE_COMPILER_FLAGS}")
+endif()
+
+set(COMPUTECPP_DEVICE_COMPILER_FLAGS ${COMPUTECPP_DEVICE_COMPILER_FLAGS} -sycl-compress-name -no-serial-memop -DEIGEN_NO_ASSERTION_CHECKING=1)
+
+# Check if the platform is supported
+execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-is-supported"
+ OUTPUT_VARIABLE COMPUTECPP_PLATFORM_IS_SUPPORTED
+ RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE)
+if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0")
+ message(FATAL_ERROR "platform - Error checking platform support!")
+else()
+ mark_as_advanced(COMPUTECPP_PLATFORM_IS_SUPPORTED)
+ if (COMPUTECPP_PLATFORM_IS_SUPPORTED)
+ message(STATUS "platform - your system can support ComputeCpp")
+ else()
+ message(STATUS "platform - your system CANNOT support ComputeCpp")
+ endif()
+endif()
+
+####################
+# __build_sycl
+####################
+#
+# Adds a custom target for running compute++ and adding a dependency for the
+# resulting integration header.
+#
+# targetName : Name of the target.
+# sourceFile : Source file to be compiled.
+# binaryDir : Intermediate output directory for the integration header.
+#
+function(__build_spir targetName sourceFile binaryDir)
+
+ # Retrieve source file name.
+ get_filename_component(sourceFileName ${sourceFile} NAME)
+
+ # Set the path to the Sycl file.
+ set(outputSyclFile ${binaryDir}/${sourceFileName}.sycl)
+
+ # Add any user-defined include to the device compiler
+ get_property(includeDirectories DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY
+ INCLUDE_DIRECTORIES)
+ set(device_compiler_includes "")
+ foreach(directory ${includeDirectories})
+ set(device_compiler_includes "-I${directory}" ${device_compiler_includes})
+ endforeach()
+ if (CMAKE_INCLUDE_PATH)
+ foreach(directory ${CMAKE_INCLUDE_PATH})
+ set(device_compiler_includes "-I${directory}"
+ ${device_compiler_includes})
+ endforeach()
+ endif()
+
+ # Convert argument list format
+ separate_arguments(COMPUTECPP_DEVICE_COMPILER_FLAGS)
+
+ # Add custom command for running compute++
+ add_custom_command(
+ OUTPUT ${outputSyclFile}
+ COMMAND ${COMPUTECPP_DEVICE_COMPILER}
+ ${COMPUTECPP_DEVICE_COMPILER_FLAGS}
+ -I${COMPUTECPP_INCLUDE_DIRECTORY}
+ ${COMPUTECPP_PLATFORM_SPECIFIC_ARGS}
+ ${device_compiler_includes}
+ -o ${outputSyclFile}
+ -c ${CMAKE_CURRENT_SOURCE_DIR}/${sourceFile}
+ DEPENDS ${sourceFile}
+ COMMENT "Building ComputeCpp integration header file ${outputSyclFile}")
+
+ # Add a custom target for the generated integration header
+ add_custom_target(${targetName}_integration_header DEPENDS ${outputSyclFile})
+
+ # Add a dependency on the integration header
+ add_dependencies(${targetName} ${targetName}_integration_header)
+
+ # Force inclusion of the integration header for the host compiler
+ #set(compileFlags -include ${include_file} "-Wall")
+ target_compile_options(${targetName} PUBLIC ${compileFlags})
+
+ # Set the host compiler C++ standard to C++11
+ set_property(TARGET ${targetName} PROPERTY CXX_STANDARD 11)
+
+ # Disable GCC dual ABI on GCC 5.1 and higher
+ if(COMPUTECPP_DISABLE_GCC_DUAL_ABI)
+ set_property(TARGET ${targetName} APPEND PROPERTY COMPILE_DEFINITIONS
+ "_GLIBCXX_USE_CXX11_ABI=0")
+ endif()
+
+endfunction()
+
+#######################
+# add_sycl_to_target
+#######################
+#
+# Adds a SYCL compilation custom command associated with an existing
+# target and sets a dependency on that new command.
+#
+# targetName : Name of the target to add a SYCL to.
+# sourceFile : Source file to be compiled for SYCL.
+# binaryDir : Intermediate output directory for the integration header.
+#
+function(add_sycl_to_target targetName sourceFile binaryDir)
+
+ # Add custom target to run compute++ and generate the integration header
+ __build_spir(${targetName} ${sourceFile} ${binaryDir})
+
+ # Link with the ComputeCpp runtime library
+ target_link_libraries(${targetName} PUBLIC ${COMPUTECPP_RUNTIME_LIBRARY}
+ PUBLIC ${OpenCL_LIBRARIES})
+
+endfunction(add_sycl_to_target)
diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor
index f7b94cee1..da6a3f301 100644
--- a/unsupported/Eigen/CXX11/Tensor
+++ b/unsupported/Eigen/CXX11/Tensor
@@ -74,6 +74,8 @@ typedef unsigned __int64 uint64_t;
#include "src/Tensor/TensorDeviceDefault.h"
#include "src/Tensor/TensorDeviceThreadPool.h"
#include "src/Tensor/TensorDeviceCuda.h"
+#include "src/Tensor/TensorSycl.h"
+#include "src/Tensor/TensorDeviceSycl.h"
#include "src/Tensor/TensorIndexList.h"
#include "src/Tensor/TensorDimensionList.h"
#include "src/Tensor/TensorDimensions.h"
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
index cb615c75b..166be200c 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
@@ -163,6 +163,11 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize);
}
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<LeftArgType, Device>& left_impl() const { return m_leftImpl; }
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<RightArgType, Device>& right_impl() const { return m_rightImpl; }
+
EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_leftImpl.data(); }
private:
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
index 19d2b50b5..e3880d2e0 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@@ -811,7 +811,7 @@ class TensorBase<Derived, ReadOnlyAccessors>
protected:
template <typename Scalar, int NumIndices, int Options, typename IndexType> friend class Tensor;
- template <typename Scalar, typename Dimensions, int Option, typename IndexTypes> friend class TensorFixedSize;
+ template <typename Scalar, typename Dimensions, int Option, typename IndexTypes, template <class> class MakePointer_> friend class TensorFixedSize;
template <typename OtherDerived, int AccessLevel> friend class TensorBase;
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast<const Derived*>(this); }
@@ -827,7 +827,7 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> {
static const int NumDimensions = DerivedTraits::NumDimensions;
template <typename Scalar, int NumIndices, int Options, typename IndexType> friend class Tensor;
- template <typename Scalar, typename Dimensions, int Option, typename IndexTypes> friend class TensorFixedSize;
+ template <typename Scalar, typename Dimensions, int Option, typename IndexTypes, template <class> class MakePointer_> friend class TensorFixedSize;
template <typename OtherDerived, int OtherAccessLevel> friend class TensorBase;
EIGEN_DEVICE_FUNC
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
index 5d67f69f3..4cfe300eb 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
@@ -113,7 +113,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_impl(op.expression(), device)
+ : m_broadcast(op.broadcast()),m_impl(op.expression(), device)
{
// The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar
// and store the result in a scalar. Instead one should reshape the scalar into a a N-D
@@ -374,7 +374,12 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
+ const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
+
+ Broadcast functor() const { return m_broadcast; }
+
protected:
+ const Broadcast m_broadcast;
Dimensions m_dimensions;
array<Index, NumDims> m_outputStrides;
array<Index, NumDims> m_inputStrides;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
new file mode 100644
index 000000000..bfd36f5aa
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
@@ -0,0 +1,122 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Cummins Chris PhD student at The University of Edinburgh.
+// Contact: <eigen@codeplay.com>
+
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#if defined(EIGEN_USE_SYCL) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H)
+#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H
+
+namespace Eigen {
+/// \struct BufferT is used to specialise add_sycl_buffer function for
+// two types of buffer we have. When the MapAllocator is true, we create the
+// sycl buffer with MapAllocator.
+/// We have to const_cast the input pointer in order to work around the fact
+/// that sycl does not accept map allocator for const pointer.
+template <typename T, bool MapAllocator>
+struct BufferT {
+ using Type = cl::sycl::buffer<T, 1, cl::sycl::map_allocator<T>>;
+ static inline void add_sycl_buffer(
+ const T *ptr, size_t num_bytes,
+ std::map<const void *, std::shared_ptr<void>> &buffer_map) {
+ buffer_map.insert(std::pair<const void *, std::shared_ptr<void>>(
+ ptr, std::shared_ptr<void>(std::make_shared<Type>(
+ Type(const_cast<T *>(ptr), cl::sycl::range<1>(num_bytes))))));
+ }
+};
+
+/// specialisation of the \ref BufferT when the MapAllocator is false. In this
+/// case we only create the device-only buffer.
+template <typename T>
+struct BufferT<T, false> {
+ using Type = cl::sycl::buffer<T, 1>;
+ static inline void add_sycl_buffer(
+ const T *ptr, size_t num_bytes,
+ std::map<const void *, std::shared_ptr<void>> &buffer_map) {
+ buffer_map.insert(std::pair<const void *, std::shared_ptr<void>>(
+ ptr, std::shared_ptr<void>(
+ std::make_shared<Type>(Type(cl::sycl::range<1>(num_bytes))))));
+ }
+};
+
+struct SyclDevice {
+ /// class members
+ /// sycl queue
+ cl::sycl::queue &m_queue;
+ /// std::map is the container used to make sure that we create only one buffer
+ /// per pointer. The lifespan of the buffer
+ /// now depends on the lifespan of SyclDevice. If a non-read-only pointer is
+ /// needed to be accessed on the host we should manually deallocate it.
+ mutable std::map<const void *, std::shared_ptr<void>> buffer_map;
+
+ SyclDevice(cl::sycl::queue &q) : m_queue(q) {}
+ // destructor
+ ~SyclDevice() { deallocate_all(); }
+
+ template <typename T>
+ void deallocate(const T *p) const {
+ auto it = buffer_map.find(p);
+ if (it != buffer_map.end()) {
+ buffer_map.erase(it);
+ }
+ }
+ void deallocate_all() const { buffer_map.clear(); }
+
+ /// creation of sycl accessor for a buffer. This function first tries to find
+ /// the buffer in the buffer_map.
+ /// If found it gets the accessor from it, if not, the function then adds an
+ /// entry by creating a sycl buffer
+ /// for that particular pointer.
+ template <cl::sycl::access::mode AcMd, bool MapAllocator, typename T>
+ inline cl::sycl::accessor<T, 1, AcMd, cl::sycl::access::target::global_buffer>
+ get_sycl_accessor(size_t num_bytes, cl::sycl::handler &cgh,
+ const T *ptr) const {
+ auto it = buffer_map.find(ptr);
+ if (it == buffer_map.end()) {
+ BufferT<T, MapAllocator>::add_sycl_buffer(ptr, num_bytes, buffer_map);
+ }
+ return (
+ ((typename BufferT<T, MapAllocator>::Type *)(buffer_map.at(ptr).get()))
+ ->template get_access<AcMd>(cgh));
+ }
+
+ /// allocating memory on the cpu
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void *allocate(size_t num_bytes) const {
+ return internal::aligned_malloc(num_bytes);
+ }
+
+ // some runtime conditions that can be applied here
+ bool isDeviceSuitable() const { return true; }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void *buffer) const {
+ internal::aligned_free(buffer);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void *dst, const void *src,
+ size_t n) const {
+ ::memcpy(dst, src, n);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(
+ void *dst, const void *src, size_t n) const {
+ memcpy(dst, src, n);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(
+ void *dst, const void *src, size_t n) const {
+ memcpy(dst, src, n);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void *buffer, int c,
+ size_t n) const {
+ ::memset(buffer, c, n);
+ }
+};
+} // end namespace Eigen
+
+#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
index a08dfa7c3..3dab6da99 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
@@ -20,8 +20,8 @@ namespace Eigen {
*
*/
namespace internal {
-template<typename XprType>
-struct traits<TensorEvalToOp<XprType> >
+template<typename XprType, template <class> class MakePointer_>
+struct traits<TensorEvalToOp<XprType, MakePointer_> >
{
// Type promotion to handle the case where the types of the lhs and the rhs are different.
typedef typename XprType::Scalar Scalar;
@@ -36,16 +36,20 @@ struct traits<TensorEvalToOp<XprType> >
enum {
Flags = 0
};
+ template <class T>
+ struct MakePointer {
+ typedef typename MakePointer_<T>::Type Type;
+ };
};
-template<typename XprType>
-struct eval<TensorEvalToOp<XprType>, Eigen::Dense>
+template<typename XprType, template <class> class MakePointer_>
+struct eval<TensorEvalToOp<XprType, MakePointer_>, Eigen::Dense>
{
typedef const TensorEvalToOp<XprType>& type;
};
-template<typename XprType>
-struct nested<TensorEvalToOp<XprType>, 1, typename eval<TensorEvalToOp<XprType> >::type>
+template<typename XprType, template <class> class MakePointer_>
+struct nested<TensorEvalToOp<XprType, MakePointer_>, 1, typename eval<TensorEvalToOp<XprType, MakePointer_> >::type>
{
typedef TensorEvalToOp<XprType> type;
};
@@ -55,37 +59,38 @@ struct nested<TensorEvalToOp<XprType>, 1, typename eval<TensorEvalToOp<XprType>
-template<typename XprType>
-class TensorEvalToOp : public TensorBase<TensorEvalToOp<XprType>, ReadOnlyAccessors>
+template<typename XprType, template <class> class MakePointer_>
+class TensorEvalToOp : public TensorBase<TensorEvalToOp<XprType, MakePointer_>, ReadOnlyAccessors>
{
public:
typedef typename Eigen::internal::traits<TensorEvalToOp>::Scalar Scalar;
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
+ typedef typename MakePointer_<CoeffReturnType>::Type PointerType;
typedef typename Eigen::internal::nested<TensorEvalToOp>::type Nested;
typedef typename Eigen::internal::traits<TensorEvalToOp>::StorageKind StorageKind;
typedef typename Eigen::internal::traits<TensorEvalToOp>::Index Index;
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvalToOp(CoeffReturnType* buffer, const XprType& expr)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvalToOp(PointerType buffer, const XprType& expr)
: m_xpr(expr), m_buffer(buffer) {}
EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type&
expression() const { return m_xpr; }
- EIGEN_DEVICE_FUNC CoeffReturnType* buffer() const { return m_buffer; }
+ EIGEN_DEVICE_FUNC PointerType buffer() const { return m_buffer; }
protected:
typename XprType::Nested m_xpr;
- CoeffReturnType* m_buffer;
+ PointerType m_buffer;
};
-template<typename ArgType, typename Device>
-struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device>
+template<typename ArgType, typename Device, template <class> class MakePointer_>
+struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
{
- typedef TensorEvalToOp<ArgType> XprType;
+ typedef TensorEvalToOp<ArgType, MakePointer_> XprType;
typedef typename ArgType::Scalar Scalar;
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
typedef typename XprType::Index Index;
@@ -102,15 +107,22 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device>
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_impl(op.expression(), device), m_device(device), m_buffer(op.buffer())
+ : m_impl(op.expression(), device), m_device(device),
+ m_buffer(op.buffer()), m_op(op), m_expression(op.expression())
{ }
+ // Used for accessor extraction in SYCL Managed TensorMap:
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const XprType& op() const {
+ return m_op;
+ }
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~TensorEvaluator() {
}
+ typedef typename internal::traits<const TensorEvalToOp<ArgType, MakePointer_>>::template MakePointer<CoeffReturnType>::Type DevicePointer;
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* scalar) {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(DevicePointer scalar) {
EIGEN_UNUSED_VARIABLE(scalar);
eigen_assert(scalar == NULL);
return m_impl.evalSubExprsIfNeeded(m_buffer);
@@ -145,12 +157,20 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device>
TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize);
}
- EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_buffer; }
+ EIGEN_DEVICE_FUNC DevicePointer data() const { return m_buffer; }
+ ArgType expression() const { return m_expression; }
+
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
+ /// added for sycl in order to construct the buffer from the sycl device
+ const Device& device() const{return m_device;}
private:
TensorEvaluator<ArgType, Device> m_impl;
const Device& m_device;
- CoeffReturnType* m_buffer;
+ DevicePointer m_buffer;
+ const XprType& m_op;
+ const ArgType m_expression;
};
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index c2a327bf0..b2b4bcf62 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -46,9 +46,11 @@ struct TensorEvaluator
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
- : m_data(const_cast<Scalar*>(m.data())), m_dims(m.dimensions()), m_device(device)
+ : m_data(const_cast<typename internal::traits<Derived>::template MakePointer<Scalar>::Type>(m.data())), m_dims(m.dimensions()), m_device(device), m_impl(m)
{ }
+ // Used for accessor extraction in SYCL Managed TensorMap:
+ const Derived& derived() const { return m_impl; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* dest) {
@@ -106,12 +108,16 @@ struct TensorEvaluator
internal::unpacket_traits<PacketReturnType>::size);
}
- EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; }
+ EIGEN_DEVICE_FUNC typename internal::traits<Derived>::template MakePointer<Scalar>::Type data() const { return m_data; }
+
+ /// required by sycl in order to construct sycl buffer from raw pointer
+ const Device& device() const{return m_device;}
protected:
- Scalar* m_data;
+ typename internal::traits<Derived>::template MakePointer<Scalar>::Type m_data;
Dimensions m_dims;
const Device& m_device;
+ const Derived& m_impl;
};
namespace {
@@ -159,8 +165,11 @@ struct TensorEvaluator<const Derived, Device>
RawAccess = true
};
+ // Used for accessor extraction in SYCL Managed TensorMap:
+ const Derived& derived() const { return m_impl; }
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
- : m_data(m.data()), m_dims(m.dimensions()), m_device(device)
+ : m_data(m.data()), m_dims(m.dimensions()), m_device(device), m_impl(m)
{ }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
@@ -198,12 +207,16 @@ struct TensorEvaluator<const Derived, Device>
internal::unpacket_traits<PacketReturnType>::size);
}
- EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }
+ EIGEN_DEVICE_FUNC typename internal::traits<Derived>::template MakePointer<const Scalar>::Type data() const { return m_data; }
+
+ /// added for sycl in order to construct the buffer from the sycl device
+ const Device& device() const{return m_device;}
protected:
- const Scalar* m_data;
+ typename internal::traits<Derived>::template MakePointer<const Scalar>::Type m_data;
Dimensions m_dims;
const Device& m_device;
+ const Derived& m_impl;
};
@@ -260,6 +273,12 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<ArgType, Device>& impl() const { return m_argImpl; }
+ /// required by sycl in order to extract the accessor
+ NullaryOp functor() const { return m_functor; }
+
+
private:
const NullaryOp m_functor;
TensorEvaluator<ArgType, Device> m_argImpl;
@@ -323,6 +342,12 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<ArgType, Device> & impl() const { return m_argImpl; }
+ /// added for sycl in order to construct the buffer from sycl device
+ UnaryOp functor() const { return m_functor; }
+
+
private:
const UnaryOp m_functor;
TensorEvaluator<ArgType, Device> m_argImpl;
@@ -396,6 +421,12 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
}
EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<LeftArgType, Device>& left_impl() const { return m_leftImpl; }
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<RightArgType, Device>& right_impl() const { return m_rightImpl; }
+ /// required by sycl in order to extract the accessor
+ BinaryOp functor() const { return m_functor; }
private:
const BinaryOp m_functor;
@@ -491,10 +522,17 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<Arg1Type, Device> & arg1Impl() const { return m_arg1Impl; }
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<Arg2Type, Device>& arg2Impl() const { return m_arg2Impl; }
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<Arg3Type, Device>& arg3Impl() const { return m_arg3Impl; }
+
private:
const TernaryOp m_functor;
TensorEvaluator<Arg1Type, Device> m_arg1Impl;
- TensorEvaluator<Arg1Type, Device> m_arg2Impl;
+ TensorEvaluator<Arg2Type, Device> m_arg2Impl;
TensorEvaluator<Arg3Type, Device> m_arg3Impl;
};
@@ -575,6 +613,12 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
}
EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<IfArgType, Device> & cond_impl() const { return m_condImpl; }
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<ThenArgType, Device>& then_impl() const { return m_thenImpl; }
+ /// required by sycl in order to extract the accessor
+ const TensorEvaluator<ElseArgType, Device>& else_impl() const { return m_elseImpl; }
private:
TensorEvaluator<IfArgType, Device> m_condImpl;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index a116bf17f..9b99af641 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -272,6 +272,20 @@ inline void TensorExecutor<Expression, GpuDevice, Vectorizable>::run(
#endif // __CUDACC__
#endif // EIGEN_USE_GPU
+// SYCL Executor policy
+#ifdef EIGEN_USE_SYCL
+
+template <typename Expression, bool Vectorizable>
+class TensorExecutor<Expression, SyclDevice, Vectorizable> {
+public:
+ static inline void run(const Expression &expr, const SyclDevice &device) {
+ // call TensorSYCL module
+ TensorSycl::run(expr, device);
+ }
+};
+
+#endif
+
} // end namespace internal
} // end namespace Eigen
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h
index fcee5f60d..415e459b9 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h
@@ -23,8 +23,8 @@ namespace Eigen {
* Eigen::TensorFixedSize<float, Size<3,5,7>> t;
*/
-template<typename Scalar_, typename Dimensions_, int Options_, typename IndexType>
-class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_, Options_, IndexType> >
+template<typename Scalar_, typename Dimensions_, int Options_, typename IndexType, template <class> class MakePointer_>
+class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_, Options_, IndexType, MakePointer_> >
{
public:
typedef TensorFixedSize<Scalar_, Dimensions_, Options_, IndexType> Self;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
index c23ecdbc4..9cf4a07e5 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
@@ -19,9 +19,15 @@ namespace Eigen {
*
*
*/
+/// template <class> class MakePointer_ is added to convert the host pointer to the device pointer.
+/// It is added due to the fact that for our device compiler T* is not allowed.
+/// If we wanted to use the same Evaluator functions we have to convert that type to our pointer T.
+/// This is done through our MakePointer_ class. By default the Type in the MakePointer_<T> is T* .
+/// Therefore, by adding the default value, we managed to convert the type and it does not break any
+/// existing code as its default value is T*.
namespace internal {
-template<typename XprType>
-struct traits<TensorForcedEvalOp<XprType> >
+template<typename XprType, template <class> class MakePointer_>
+struct traits<TensorForcedEvalOp<XprType, MakePointer_> >
{
// Type promotion to handle the case where the types of the lhs and the rhs are different.
typedef typename XprType::Scalar Scalar;
@@ -36,26 +42,30 @@ struct traits<TensorForcedEvalOp<XprType> >
enum {
Flags = 0
};
+ template <class T>
+ struct MakePointer {
+ typedef typename MakePointer_<T>::Type Type;
+ };
};
-template<typename XprType>
-struct eval<TensorForcedEvalOp<XprType>, Eigen::Dense>
+template<typename XprType, template <class> class MakePointer_>
+struct eval<TensorForcedEvalOp<XprType, MakePointer_>, Eigen::Dense>
{
- typedef const TensorForcedEvalOp<XprType>& type;
+ typedef const TensorForcedEvalOp<XprType, MakePointer_>& type;
};
-template<typename XprType>
-struct nested<TensorForcedEvalOp<XprType>, 1, typename eval<TensorForcedEvalOp<XprType> >::type>
+template<typename XprType, template <class> class MakePointer_>
+struct nested<TensorForcedEvalOp<XprType, MakePointer_>, 1, typename eval<TensorForcedEvalOp<XprType, MakePointer_> >::type>
{
- typedef TensorForcedEvalOp<XprType> type;
+ typedef TensorForcedEvalOp<XprType, MakePointer_> type;
};
} // end namespace internal
-template<typename XprType>
-class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType>, ReadOnlyAccessors>
+template<typename XprType, template <class> class MakePointer_>
+class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType, MakePointer_>, ReadOnlyAccessors>
{
public:
typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Scalar Scalar;
@@ -77,10 +87,10 @@ class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType>, ReadOn
};
-template<typename ArgType, typename Device>
-struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
+template<typename ArgType, typename Device, template <class> class MakePointer_>
+struct TensorEvaluator<const TensorForcedEvalOp<ArgType, MakePointer_>, Device>
{
- typedef TensorForcedEvalOp<ArgType> XprType;
+ typedef TensorForcedEvalOp<ArgType, MakePointer_> XprType;
typedef typename ArgType::Scalar Scalar;
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
typedef typename XprType::Index Index;
@@ -96,6 +106,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
};
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
+ /// op_ is used for sycl
: m_impl(op.expression(), device), m_op(op.expression()), m_device(device), m_buffer(NULL)
{ }
@@ -110,10 +121,10 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
new(m_buffer+i) CoeffReturnType();
}
}
- typedef TensorEvalToOp<const ArgType> EvalTo;
+ typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo;
EvalTo evalToTmp(m_buffer, m_op);
const bool PacketAccess = internal::IsVectorizable<Device, const ArgType>::value;
- internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device);
+ internal::TensorExecutor<const EvalTo, typename internal::remove_const<Device>::type, PacketAccess>::run(evalToTmp, m_device);
return true;
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
@@ -136,13 +147,17 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
}
- EIGEN_DEVICE_FUNC Scalar* data() const { return m_buffer; }
+ EIGEN_DEVICE_FUNC typename MakePointer<Scalar>::Type data() const { return m_buffer; }
+ /// required by sycl in order to extract the sycl accessor
+ const TensorEvaluator<ArgType, Device>& impl() { return m_impl; }
+ /// used by sycl in order to build the sycl buffer
+ const Device& device() const{return m_device;}
private:
TensorEvaluator<ArgType, Device> m_impl;
const ArgType m_op;
const Device& m_device;
- CoeffReturnType* m_buffer;
+ typename MakePointer<CoeffReturnType>::Type m_buffer;
};
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
index 490ddd8bd..83c690133 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
@@ -12,9 +12,19 @@
namespace Eigen {
+// MakePointer class is used as a container of the adress space of the pointer
+// on the host and on the device. From the host side it generates the T* pointer
+// and when EIGEN_USE_SYCL is used it construct a buffer with a map_allocator to
+// T* m_data on the host. It is always called on the device.
+// Specialisation of MakePointer class for creating the sycl buffer with
+// map_allocator.
+template<class T> struct MakePointer{
+ typedef T* Type;
+};
+
+template<typename PlainObjectType, int Options_ = Unaligned, template <class> class MakePointer_ = MakePointer> class TensorMap;
template<typename Scalar_, int NumIndices_, int Options_ = 0, typename IndexType = DenseIndex> class Tensor;
-template<typename Scalar_, typename Dimensions, int Options_ = 0, typename IndexType = DenseIndex> class TensorFixedSize;
-template<typename PlainObjectType, int Options_ = Unaligned> class TensorMap;
+template<typename Scalar_, typename Dimensions, int Options_ = 0, typename IndexType = DenseIndex, template <class> class MakePointer_ = MakePointer> class TensorFixedSize;
template<typename PlainObjectType> class TensorRef;
template<typename Derived, int AccessLevel> class TensorBase;
@@ -52,8 +62,8 @@ template<typename Op, typename XprType> class TensorScanOp;
template<typename CustomUnaryFunc, typename XprType> class TensorCustomUnaryOp;
template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> class TensorCustomBinaryOp;
-template<typename XprType> class TensorEvalToOp;
-template<typename XprType> class TensorForcedEvalOp;
+template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorEvalToOp;
+template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorForcedEvalOp;
template<typename ExpressionType, typename DeviceType> class TensorDevice;
template<typename Derived, typename Device> struct TensorEvaluator;
@@ -61,6 +71,7 @@ template<typename Derived, typename Device> struct TensorEvaluator;
struct DefaultDevice;
struct ThreadPoolDevice;
struct GpuDevice;
+struct SyclDevice;
enum FFTResultType {
RealPart = 0,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
index 6fb4f4a31..298a49138 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
@@ -18,11 +18,16 @@ namespace Eigen {
* \brief A tensor expression mapping an existing array of data.
*
*/
-
-template<typename PlainObjectType, int Options_> class TensorMap : public TensorBase<TensorMap<PlainObjectType, Options_> >
+/// template <class> class MakePointer_ is added to convert the host pointer to the device pointer.
+/// It is added due to the fact that for our device compiler T* is not allowed.
+/// If we wanted to use the same Evaluator functions we have to convert that type to our pointer T.
+/// This is done through our MakePointer_ class. By default the Type in the MakePointer_<T> is T* .
+/// Therefore, by adding the default value, we managed to convert the type and it does not break any
+/// existing code as its default value is T*.
+template<typename PlainObjectType, int Options_, template <class> class MakePointer_> class TensorMap : public TensorBase<TensorMap<PlainObjectType, Options_, MakePointer_> >
{
public:
- typedef TensorMap<PlainObjectType, Options_> Self;
+ typedef TensorMap<PlainObjectType, Options_, MakePointer_> Self;
typedef typename PlainObjectType::Base Base;
typedef typename Eigen::internal::nested<Self>::type Nested;
typedef typename internal::traits<PlainObjectType>::StorageKind StorageKind;
@@ -36,7 +41,7 @@ template<typename PlainObjectType, int Options_> class TensorMap : public Tensor
Scalar *,
const Scalar *>::type
PointerType;*/
- typedef Scalar* PointerType;
+ typedef typename MakePointer_<Scalar>::Type PointerType;
typedef PointerType PointerArgType;
static const int Options = Options_;
@@ -109,9 +114,9 @@ template<typename PlainObjectType, int Options_> class TensorMap : public Tensor
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Index size() const { return m_dimensions.TotalSize(); }
EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar* data() { return m_data; }
+ EIGEN_STRONG_INLINE PointerType data() { return m_data; }
EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar* data() const { return m_data; }
+ EIGEN_STRONG_INLINE const PointerType data() const { return m_data; }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const
@@ -307,8 +312,9 @@ template<typename PlainObjectType, int Options_> class TensorMap : public Tensor
}
private:
- Scalar* m_data;
+ typename MakePointer_<Scalar>::Type m_data;
Dimensions m_dimensions;
+ size_t is_coverted= size_t(0);
};
} // end namespace Eigen
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h
new file mode 100644
index 000000000..277dd739c
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h
@@ -0,0 +1,62 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: eigen@codeplay.com
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+// General include header of SYCL target for Tensor Module
+#ifndef TENSORSYCL_H
+#define TENSORSYCL_H
+
+#ifdef EIGEN_USE_SYCL
+
+// trait class to extract different attribute contents
+template <typename T>
+struct Trait;
+// global pointer to set different attribute state for a class
+template <class T>
+struct MakeGlobalPointer {
+ typedef typename cl::sycl::global_ptr<T>::pointer_t Type;
+};
+
+// tuple construction
+#include "TensorSyclTuple.h"
+
+// This file contains the PlaceHolder that replaces the actual data
+#include "TensorSyclPlaceHolder.h"
+
+#include "TensorSyclLeafCount.h"
+
+// The index PlaceHolder takes the actual expression and replaces the actual
+// data on it with the place holder. It uses the same pre-order expression tree
+// traverse as the leaf count in order to give the right access number to each
+// node in the expression
+#include "TensorSyclPlaceHolderExpr.h"
+
+// creation of an accessor tuple from a tuple of SYCL buffers
+#include "TensorSyclExtractAccessor.h"
+
+// actual data extraction using accessors
+//#include "GetDeviceData.h"
+
+// this is used to change the address space type in tensor map for GPU
+#include "TensorSyclConvertToDeviceExpression.h"
+
+// this is used to extract the functors
+#include "TensorSyclExtractFunctors.h"
+
+// this is used to create tensormap on the device
+// this is used to construct the expression on the device
+#include "TensorSyclExprConstructor.h"
+
+// kernel execution using fusion
+#include "TensorSyclRun.h"
+
+#endif // end of EIGEN_USE_SYCL
+#endif // TENSORSYCL_H
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h
new file mode 100644
index 000000000..b3748131b
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h
@@ -0,0 +1,238 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensorSyclConvertToDeviceExpression.h
+ *
+ * \brief:
+ * Conversion from host pointer to device pointer
+ * inside leaf nodes of the expression.
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_TENSORSYCL_CONVERT_TO_DEVICE_EXPRESSION_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_TENSORSYCL_CONVERT_TO_DEVICE_EXPRESSION_HPP
+
+namespace Eigen {
+namespace TensorSycl {
+namespace internal {
+/// \struct ConvertToDeviceExpression
+/// \brief This struct is used to convert the MakePointer in the host expression
+/// to the MakeGlobalPointer for the device expression. For the leafNodes
+/// containing the pointer. This is due to the fact that the address space of
+/// the pointer T* is different on the host and the device.
+template <typename Expr>
+struct ConvertToDeviceExpression;
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is TensorMap
+template <typename Scalar_, int Options_, int Options2_, int NumIndices_,
+ typename IndexType_, template <class> class MakePointer_>
+struct ConvertToDeviceExpression<
+ TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_,
+ MakePointer_>> {
+ using Type = TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>,
+ Options2_, MakeGlobalPointer>;
+};
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is const TensorMap
+template <typename Scalar_, int Options_, int Options2_, int NumIndices_,
+ typename IndexType_, template <class> class MakePointer_>
+struct ConvertToDeviceExpression<
+ const TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>,
+ Options2_, MakePointer_>> {
+ using Type =
+ const TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>,
+ Options2_, MakeGlobalPointer>;
+};
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is const TensorCwiseNullaryOp
+template <typename OP, typename RHSExpr>
+struct ConvertToDeviceExpression<const TensorCwiseNullaryOp<OP, RHSExpr>> {
+ using RHSPlaceHolderType = typename ConvertToDeviceExpression<RHSExpr>::Type;
+ using Type = const TensorCwiseNullaryOp<OP, RHSPlaceHolderType>;
+};
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is TensorCwiseNullaryOp
+template <typename OP, typename RHSExpr>
+struct ConvertToDeviceExpression<TensorCwiseNullaryOp<OP, RHSExpr>> {
+ using RHSPlaceHolderType = typename ConvertToDeviceExpression<RHSExpr>::Type;
+ using Type = TensorCwiseNullaryOp<OP, RHSPlaceHolderType>;
+};
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is const TensorBroadcastingOp
+template <typename OP, typename RHSExpr>
+struct ConvertToDeviceExpression<const TensorBroadcastingOp<OP, RHSExpr>> {
+ using RHSPlaceHolderType = typename ConvertToDeviceExpression<RHSExpr>::Type;
+ using Type = const TensorBroadcastingOp<OP, RHSPlaceHolderType>;
+};
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is TensorBroadcastingOp
+template <typename OP, typename RHSExpr>
+struct ConvertToDeviceExpression<TensorBroadcastingOp<OP, RHSExpr>> {
+ using RHSPlaceHolderType = typename ConvertToDeviceExpression<RHSExpr>::Type;
+ using Type = TensorBroadcastingOp<OP, RHSPlaceHolderType>;
+};
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is const TensorCwiseUnaryOp
+template <typename OP, typename RHSExpr>
+struct ConvertToDeviceExpression<const TensorCwiseUnaryOp<OP, RHSExpr>> {
+ using RHSPlaceHolderType = typename ConvertToDeviceExpression<RHSExpr>::Type;
+ using Type = const TensorCwiseUnaryOp<OP, RHSPlaceHolderType>;
+};
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is TensorCwiseUnaryOp
+template <typename OP, typename RHSExpr>
+struct ConvertToDeviceExpression<TensorCwiseUnaryOp<OP, RHSExpr>> {
+ using RHSPlaceHolderType = typename ConvertToDeviceExpression<RHSExpr>::Type;
+ using Type = TensorCwiseUnaryOp<OP, RHSPlaceHolderType>;
+};
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is const TensorCwiseBinaryOp
+template <typename OP, typename LHSExpr, typename RHSExpr>
+struct ConvertToDeviceExpression<
+ const TensorCwiseBinaryOp<OP, LHSExpr, RHSExpr>> {
+ using LHSPlaceHolderType = typename ConvertToDeviceExpression<LHSExpr>::Type;
+ using RHSPlaceHolderType = typename ConvertToDeviceExpression<RHSExpr>::Type;
+ using Type =
+ const TensorCwiseBinaryOp<OP, LHSPlaceHolderType, RHSPlaceHolderType>;
+};
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is TensorCwiseBinaryOp
+template <typename OP, typename LHSExpr, typename RHSExpr>
+struct ConvertToDeviceExpression<TensorCwiseBinaryOp<OP, LHSExpr, RHSExpr>> {
+ using LHSPlaceHolderType = typename ConvertToDeviceExpression<LHSExpr>::Type;
+ using RHSPlaceHolderType = typename ConvertToDeviceExpression<RHSExpr>::Type;
+ using Type = TensorCwiseBinaryOp<OP, LHSPlaceHolderType, RHSPlaceHolderType>;
+};
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is const TensorCwiseTernaryOp
+template <typename OP, typename Arg1Impl, typename Arg2Impl, typename Arg3Impl>
+struct ConvertToDeviceExpression<
+ const TensorCwiseTernaryOp<OP, Arg1Impl, Arg2Impl, Arg3Impl>> {
+ using Arg1PlaceHolderType =
+ typename ConvertToDeviceExpression<Arg1Impl>::Type;
+ using Arg2PlaceHolderType =
+ typename ConvertToDeviceExpression<Arg2Impl>::Type;
+ using Arg3PlaceHolderType =
+ typename ConvertToDeviceExpression<Arg3Impl>::Type;
+ using Type =
+ const TensorCwiseTernaryOp<OP, Arg1PlaceHolderType, Arg2PlaceHolderType,
+ Arg3PlaceHolderType>;
+};
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is TensorCwiseTernaryOp
+template <typename OP, typename Arg1Impl, typename Arg2Impl, typename Arg3Impl>
+struct ConvertToDeviceExpression<
+ TensorCwiseTernaryOp<OP, Arg1Impl, Arg2Impl, Arg3Impl>> {
+ using Arg1PlaceHolderType =
+ typename ConvertToDeviceExpression<Arg1Impl>::Type;
+ using Arg2PlaceHolderType =
+ typename ConvertToDeviceExpression<Arg2Impl>::Type;
+ using Arg3PlaceHolderType =
+ typename ConvertToDeviceExpression<Arg3Impl>::Type;
+ using Type = TensorCwiseTernaryOp<OP, Arg1PlaceHolderType,
+ Arg2PlaceHolderType, Arg3PlaceHolderType>;
+};
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is const TensorCwiseSelectOp
+template <typename IfExpr, typename ThenExpr, typename ElseExpr>
+struct ConvertToDeviceExpression<
+ const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>> {
+ using IfPlaceHolderType = typename ConvertToDeviceExpression<IfExpr>::Type;
+ using ThenPlaceHolderType =
+ typename ConvertToDeviceExpression<ThenExpr>::Type;
+ using ElsePlaceHolderType =
+ typename ConvertToDeviceExpression<ElseExpr>::Type;
+ using Type = const TensorSelectOp<IfPlaceHolderType, ThenPlaceHolderType,
+ ElsePlaceHolderType>;
+};
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is TensorCwiseSelectOp
+template <typename IfExpr, typename ThenExpr, typename ElseExpr>
+struct ConvertToDeviceExpression<TensorSelectOp<IfExpr, ThenExpr, ElseExpr>> {
+ using IfPlaceHolderType = typename ConvertToDeviceExpression<IfExpr>::Type;
+ using ThenPlaceHolderType =
+ typename ConvertToDeviceExpression<ThenExpr>::Type;
+ using ElsePlaceHolderType =
+ typename ConvertToDeviceExpression<ElseExpr>::Type;
+ using Type = TensorSelectOp<IfPlaceHolderType, ThenPlaceHolderType,
+ ElsePlaceHolderType>;
+};
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is const AssingOP
+template <typename LHSExpr, typename RHSExpr>
+struct ConvertToDeviceExpression<const TensorAssignOp<LHSExpr, RHSExpr>> {
+ using LHSPlaceHolderType = typename ConvertToDeviceExpression<LHSExpr>::Type;
+ using RHSPlaceHolderType = typename ConvertToDeviceExpression<RHSExpr>::Type;
+ using Type = const TensorAssignOp<LHSPlaceHolderType, RHSPlaceHolderType>;
+};
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is AssingOP
+template <typename LHSExpr, typename RHSExpr>
+struct ConvertToDeviceExpression<TensorAssignOp<LHSExpr, RHSExpr>> {
+ using LHSPlaceHolderType = typename ConvertToDeviceExpression<LHSExpr>::Type;
+ using RHSPlaceHolderType = typename ConvertToDeviceExpression<RHSExpr>::Type;
+ using Type = TensorAssignOp<LHSPlaceHolderType, RHSPlaceHolderType>;
+};
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is const TensorForcedEvalOp
+template <typename Expr>
+struct ConvertToDeviceExpression<const TensorForcedEvalOp<Expr>> {
+ using PlaceHolderType = typename ConvertToDeviceExpression<Expr>::Type;
+ using Type = const TensorForcedEvalOp<PlaceHolderType, MakeGlobalPointer>;
+};
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is const TensorForcedEvalOp
+template <typename Expr>
+struct ConvertToDeviceExpression<TensorForcedEvalOp<Expr>> {
+ using PlaceHolderType = typename ConvertToDeviceExpression<Expr>::Type;
+ using Type = TensorForcedEvalOp<PlaceHolderType, MakeGlobalPointer>;
+};
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is const TensorEvalToOp
+template <typename Expr>
+struct ConvertToDeviceExpression<const TensorEvalToOp<Expr>> {
+ using PlaceHolderType = typename ConvertToDeviceExpression<Expr>::Type;
+ using Type = const TensorEvalToOp<PlaceHolderType, MakeGlobalPointer>;
+};
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is TensorEvalToOp
+template <typename Expr>
+struct ConvertToDeviceExpression<TensorEvalToOp<Expr>> {
+ using PlaceHolderType = typename ConvertToDeviceExpression<Expr>::Type;
+ using Type = TensorEvalToOp<PlaceHolderType, MakeGlobalPointer>;
+};
+} // namespace internal
+} // namespace TensorSycl
+} // namespace Eigen
+
+#endif // UNSUPPORTED_EIGEN_CXX1
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h
new file mode 100644
index 000000000..fe3994175
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h
@@ -0,0 +1,495 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensorSyclExprConstructor.h
+ *
+ * \brief:
+ * This file re-create an expression on the SYCL device in order
+ * to use the original tensor evaluator.
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXPR_CONSTRUCTOR_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXPR_CONSTRUCTOR_HPP
+
+namespace Eigen {
+namespace TensorSycl {
+namespace internal {
+/// this class is used by EvalToOp in order to create an lhs expression which is
+/// a pointer from an accessor on device-only buffer
+template <typename PtrType, size_t N, typename... Params>
+struct EvalToLHSConstructor {
+ PtrType expr;
+ EvalToLHSConstructor(const utility::tuple::Tuple<Params...> &t)
+ : expr((&(*(utility::tuple::get<N>(t).get_pointer())))) {}
+};
+
+/// \struct ExprConstructor is used to reconstruct the expression on the device
+/// and
+/// recreate the expression with MakeGlobalPointer containing the device address
+/// space for the TensorMap pointers used in eval function.
+/// It receives the original expression type, the functor of the node, the tuple
+/// of accessors, and the device expression type to re-instantiate the
+/// expression tree for the device
+template <typename OrigExpr, typename IndexExpr, typename... Params>
+struct ExprConstructor;
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// const TensorMap
+template <typename Scalar_, int Options_, int Options2_, int Options3_,
+ int NumIndices_, typename IndexType_,
+ template <class> class MakePointer_, size_t N, typename... Params>
+struct ExprConstructor<
+ const TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>,
+ Options2_, MakeGlobalPointer>,
+ const Eigen::internal::PlaceHolder<
+ const TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>,
+ Options3_, MakePointer_>,
+ N>,
+ Params...> {
+ using Type =
+ const TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>,
+ Options2_, MakeGlobalPointer>;
+
+ Type expr;
+
+ template <typename FuncDetector>
+ ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)
+ : expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))),
+ fd.dimensions())) {}
+};
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// TensorMap
+template <typename Scalar_, int Options_, int Options2_, int Options3_,
+ int NumIndices_, typename IndexType_,
+ template <class> class MakePointer_, size_t N, typename... Params>
+struct ExprConstructor<
+ TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_,
+ MakeGlobalPointer>,
+ Eigen::internal::PlaceHolder<
+ TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options3_,
+ MakePointer_>,
+ N>,
+ Params...> {
+ using Type = TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>,
+ Options2_, MakeGlobalPointer>;
+
+ Type expr;
+ template <typename FuncDetector>
+ ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)
+ : expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))),
+ fd.dimensions())) {}
+};
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// TensorCwiseNullaryOp
+template <typename OP, typename OrigRHSExpr, typename RHSExpr,
+ typename... Params>
+struct ExprConstructor<TensorCwiseNullaryOp<OP, OrigRHSExpr>,
+ TensorCwiseNullaryOp<OP, RHSExpr>, Params...> {
+ using my_type = ExprConstructor<OrigRHSExpr, RHSExpr, Params...>;
+ my_type rhsExpr;
+ using Type = TensorCwiseNullaryOp<OP, typename my_type::Type>;
+ Type expr;
+
+ template <typename FuncDetector>
+ ExprConstructor(FuncDetector &funcD,
+ const utility::tuple::Tuple<Params...> &t)
+ : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {}
+};
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// const TensorCwiseNullaryOp
+template <typename OP, typename OrigRHSExpr, typename RHSExpr,
+ typename... Params>
+struct ExprConstructor<const TensorCwiseNullaryOp<OP, OrigRHSExpr>,
+ const TensorCwiseNullaryOp<OP, RHSExpr>, Params...> {
+ using my_type = const ExprConstructor<OrigRHSExpr, RHSExpr, Params...>;
+ my_type rhsExpr;
+ using Type = const TensorCwiseNullaryOp<OP, typename my_type::Type>;
+ Type expr;
+
+ template <typename FuncDetector>
+ ExprConstructor(FuncDetector &funcD,
+ const utility::tuple::Tuple<Params...> &t)
+ : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {}
+};
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// TensorBroadcastingOp
+template <typename OP, typename OrigRHSExpr, typename RHSExpr,
+ typename... Params>
+struct ExprConstructor<TensorBroadcastingOp<OP, OrigRHSExpr>,
+ TensorBroadcastingOp<OP, RHSExpr>, Params...> {
+ using my_type = ExprConstructor<OrigRHSExpr, RHSExpr, Params...>;
+ my_type rhsExpr;
+ using Type = TensorBroadcastingOp<OP, typename my_type::Type>;
+ Type expr;
+
+ template <typename FuncDetector>
+ ExprConstructor(FuncDetector &funcD,
+ const utility::tuple::Tuple<Params...> &t)
+ : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {}
+};
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// const TensorBroadcastingOp
+template <typename OP, typename OrigRHSExpr, typename RHSExpr,
+ typename... Params>
+struct ExprConstructor<const TensorBroadcastingOp<OP, OrigRHSExpr>,
+ const TensorBroadcastingOp<OP, RHSExpr>, Params...> {
+ using my_type = const ExprConstructor<OrigRHSExpr, RHSExpr, Params...>;
+ my_type rhsExpr;
+ using Type = const TensorBroadcastingOp<OP, typename my_type::Type>;
+ Type expr;
+
+ template <typename FuncDetector>
+ ExprConstructor(FuncDetector &funcD,
+ const utility::tuple::Tuple<Params...> &t)
+ : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {}
+};
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// TensorCwiseUnaryOp
+template <typename OP, typename OrigRHSExpr, typename RHSExpr,
+ typename... Params>
+struct ExprConstructor<TensorCwiseUnaryOp<OP, OrigRHSExpr>,
+ TensorCwiseUnaryOp<OP, RHSExpr>, Params...> {
+ using my_type = ExprConstructor<OrigRHSExpr, RHSExpr, Params...>;
+ using Type = TensorCwiseUnaryOp<OP, typename my_type::Type>;
+ my_type rhsExpr;
+ Type expr;
+
+ template <typename FuncDetector>
+ ExprConstructor(FuncDetector &funcD, utility::tuple::Tuple<Params...> &t)
+ : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {}
+};
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// const TensorCwiseUnaryOp
+template <typename OP, typename OrigRHSExpr, typename RHSExpr,
+ typename... Params>
+struct ExprConstructor<const TensorCwiseUnaryOp<OP, OrigRHSExpr>,
+ const TensorCwiseUnaryOp<OP, RHSExpr>, Params...> {
+ using my_type = ExprConstructor<OrigRHSExpr, RHSExpr, Params...>;
+ using Type = const TensorCwiseUnaryOp<OP, typename my_type::Type>;
+ my_type rhsExpr;
+ Type expr;
+
+ template <typename FuncDetector>
+ ExprConstructor(FuncDetector &funcD,
+ const utility::tuple::Tuple<Params...> &t)
+ : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {}
+};
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// TensorCwiseBinaryOp
+template <typename OP, typename OrigLHSExpr, typename OrigRHSExpr,
+ typename LHSExpr, typename RHSExpr, typename... Params>
+struct ExprConstructor<TensorCwiseBinaryOp<OP, OrigLHSExpr, OrigRHSExpr>,
+ TensorCwiseBinaryOp<OP, LHSExpr, RHSExpr>, Params...> {
+ using my_left_type = ExprConstructor<OrigLHSExpr, LHSExpr, Params...>;
+ using my_right_type = ExprConstructor<OrigRHSExpr, RHSExpr, Params...>;
+ using Type = TensorCwiseBinaryOp<OP, typename my_left_type::Type,
+ typename my_right_type::Type>;
+
+ my_left_type lhsExpr;
+ my_right_type rhsExpr;
+ Type expr;
+
+ template <typename FuncDetector>
+ ExprConstructor(FuncDetector &funcD,
+ const utility::tuple::Tuple<Params...> &t)
+ : lhsExpr(funcD.lhsExpr, t),
+ rhsExpr(funcD.rhsExpr, t),
+ expr(lhsExpr.expr, rhsExpr.expr, funcD.func) {}
+};
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// const TensorCwiseBinaryOp
+template <typename OP, typename OrigLHSExpr, typename OrigRHSExpr,
+ typename LHSExpr, typename RHSExpr, typename... Params>
+struct ExprConstructor<const TensorCwiseBinaryOp<OP, OrigLHSExpr, OrigRHSExpr>,
+ const TensorCwiseBinaryOp<OP, LHSExpr, RHSExpr>,
+ Params...> {
+ using my_left_type = ExprConstructor<OrigLHSExpr, LHSExpr, Params...>;
+ using my_right_type = ExprConstructor<OrigRHSExpr, RHSExpr, Params...>;
+ using Type = const TensorCwiseBinaryOp<OP, typename my_left_type::Type,
+ typename my_right_type::Type>;
+
+ my_left_type lhsExpr;
+ my_right_type rhsExpr;
+ Type expr;
+
+ template <typename FuncDetector>
+ ExprConstructor(FuncDetector &funcD,
+ const utility::tuple::Tuple<Params...> &t)
+ : lhsExpr(funcD.lhsExpr, t),
+ rhsExpr(funcD.rhsExpr, t),
+ expr(lhsExpr.expr, rhsExpr.expr, funcD.func) {}
+};
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// const TensorCwiseTernaryOp
+template <typename OP, typename OrigArg1Expr, typename OrigArg2Expr,
+ typename OrigArg3Expr, typename Arg1Expr, typename Arg2Expr,
+ typename Arg3Expr, typename... Params>
+struct ExprConstructor<
+ const TensorCwiseTernaryOp<OP, OrigArg1Expr, OrigArg2Expr, OrigArg3Expr>,
+ const TensorCwiseTernaryOp<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Params...> {
+ using my_arg1_type = ExprConstructor<OrigArg1Expr, Arg1Expr, Params...>;
+ using my_arg2_type = ExprConstructor<OrigArg2Expr, Arg2Expr, Params...>;
+ using my_arg3_type = ExprConstructor<OrigArg3Expr, Arg3Expr, Params...>;
+ using Type = const TensorCwiseTernaryOp<OP, typename my_arg1_type::Type,
+ typename my_arg2_type::Type,
+ typename my_arg3_type::Type>;
+
+ my_arg1_type arg1Expr;
+ my_arg2_type arg2Expr;
+ my_arg3_type arg3Expr;
+ Type expr;
+ template <typename FuncDetector>
+ ExprConstructor(FuncDetector &funcD,
+ const utility::tuple::Tuple<Params...> &t)
+ : arg1Expr(funcD.arg1Expr, t),
+ arg2Expr(funcD.arg2Expr, t),
+ arg3Expr(funcD.arg3Expr, t),
+ expr(arg1Expr.expr, arg2Expr.expr, arg3Expr.expr, funcD.func) {}
+};
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// TensorCwiseTernaryOp
+template <typename OP, typename OrigArg1Expr, typename OrigArg2Expr,
+ typename OrigArg3Expr, typename Arg1Expr, typename Arg2Expr,
+ typename Arg3Expr, typename... Params>
+struct ExprConstructor<
+ TensorCwiseTernaryOp<OP, OrigArg1Expr, OrigArg2Expr, OrigArg3Expr>,
+ TensorCwiseTernaryOp<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Params...> {
+ using my_arg1_type = ExprConstructor<OrigArg1Expr, Arg1Expr, Params...>;
+ using my_arg2_type = ExprConstructor<OrigArg2Expr, Arg2Expr, Params...>;
+ using my_arg3_type = ExprConstructor<OrigArg3Expr, Arg3Expr, Params...>;
+ using Type = TensorCwiseTernaryOp<OP, typename my_arg1_type::Type,
+ typename my_arg2_type::Type,
+ typename my_arg3_type::Type>;
+
+ my_arg1_type arg1Expr;
+ my_arg2_type arg2Expr;
+ my_arg3_type arg3Expr;
+ Type expr;
+ template <typename FuncDetector>
+ ExprConstructor(FuncDetector &funcD,
+ const utility::tuple::Tuple<Params...> &t)
+ : arg1Expr(funcD.arg1Expr, t),
+ arg2Expr(funcD.arg2Expr, t),
+ arg3Expr(funcD.arg3Expr, t),
+ expr(arg1Expr.expr, arg2Expr.expr, arg3Expr.expr, funcD.func) {}
+};
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// const TensorCwiseSelectOp
+template <typename OrigIfExpr, typename OrigThenExpr, typename OrigElseExpr,
+ typename IfExpr, typename ThenExpr, typename ElseExpr,
+ typename... Params>
+struct ExprConstructor<
+ const TensorSelectOp<OrigIfExpr, OrigThenExpr, OrigElseExpr>,
+ const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Params...> {
+ using my_if_type = ExprConstructor<OrigIfExpr, IfExpr, Params...>;
+ using my_then_type = ExprConstructor<OrigThenExpr, ThenExpr, Params...>;
+ using my_else_type = ExprConstructor<OrigElseExpr, ElseExpr, Params...>;
+ using Type = const TensorSelectOp<typename my_if_type::Type,
+ typename my_then_type::Type,
+ typename my_else_type::Type>;
+
+ my_if_type ifExpr;
+ my_then_type thenExpr;
+ my_else_type elseExpr;
+ Type expr;
+ template <typename FuncDetector>
+ ExprConstructor(FuncDetector &funcD,
+ const utility::tuple::Tuple<Params...> &t)
+ : ifExpr(funcD.ifExpr, t),
+ thenExpr(funcD.thenExpr, t),
+ elseExpr(funcD.elseExpr, t),
+ expr(ifExpr.expr, thenExpr.expr, elseExpr.expr) {}
+};
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// TensorCwiseSelectOp
+template <typename OrigIfExpr, typename OrigThenExpr, typename OrigElseExpr,
+ typename IfExpr, typename ThenExpr, typename ElseExpr,
+ typename... Params>
+struct ExprConstructor<TensorSelectOp<OrigIfExpr, OrigThenExpr, OrigElseExpr>,
+ TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Params...> {
+ using my_if_type = ExprConstructor<OrigIfExpr, IfExpr, Params...>;
+ using my_then_type = ExprConstructor<OrigThenExpr, ThenExpr, Params...>;
+ using my_else_type = ExprConstructor<OrigElseExpr, ElseExpr, Params...>;
+ using Type =
+ TensorSelectOp<typename my_if_type::Type, typename my_then_type::Type,
+ typename my_else_type::Type>;
+
+ my_if_type ifExpr;
+ my_then_type thenExpr;
+ my_else_type elseExpr;
+ Type expr;
+ template <typename FuncDetector>
+ ExprConstructor(FuncDetector &funcD,
+ const utility::tuple::Tuple<Params...> &t)
+ : ifExpr(funcD.ifExpr, t),
+ thenExpr(funcD.thenExpr, t),
+ elseExpr(funcD.elseExpr, t),
+ expr(ifExpr.expr, thenExpr.expr, elseExpr.expr) {}
+};
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// TensorAssignOp
+template <typename OrigLHSExpr, typename OrigRHSExpr, typename LHSExpr,
+ typename RHSExpr, typename... Params>
+struct ExprConstructor<TensorAssignOp<OrigLHSExpr, OrigRHSExpr>,
+ TensorAssignOp<LHSExpr, RHSExpr>, Params...> {
+ using my_left_type = ExprConstructor<OrigLHSExpr, LHSExpr, Params...>;
+ using my_right_type = ExprConstructor<OrigRHSExpr, RHSExpr, Params...>;
+ using Type =
+ TensorAssignOp<typename my_left_type::Type, typename my_right_type::Type>;
+
+ my_left_type lhsExpr;
+ my_right_type rhsExpr;
+ Type expr;
+
+ template <typename FuncDetector>
+ ExprConstructor(FuncDetector &funcD,
+ const utility::tuple::Tuple<Params...> &t)
+ : lhsExpr(funcD.lhsExpr, t),
+ rhsExpr(funcD.rhsExpr, t),
+ expr(lhsExpr.expr, rhsExpr.expr) {}
+};
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// const TensorAssignOp
+template <typename OrigLHSExpr, typename OrigRHSExpr, typename LHSExpr,
+ typename RHSExpr, typename... Params>
+struct ExprConstructor<const TensorAssignOp<OrigLHSExpr, OrigRHSExpr>,
+ const TensorAssignOp<LHSExpr, RHSExpr>, Params...> {
+ using my_left_type = ExprConstructor<OrigLHSExpr, LHSExpr, Params...>;
+ using my_right_type = ExprConstructor<OrigRHSExpr, RHSExpr, Params...>;
+ using Type = const TensorAssignOp<typename my_left_type::Type,
+ typename my_right_type::Type>;
+
+ my_left_type lhsExpr;
+ my_right_type rhsExpr;
+ Type expr;
+ template <typename FuncDetector>
+ ExprConstructor(FuncDetector &funcD,
+ const utility::tuple::Tuple<Params...> &t)
+ : lhsExpr(funcD.lhsExpr, t),
+ rhsExpr(funcD.rhsExpr, t),
+ expr(lhsExpr.expr, rhsExpr.expr) {}
+};
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// const TensorEvalToOp
+template <typename OrigExpr, typename Expr, typename... Params>
+struct ExprConstructor<const TensorEvalToOp<OrigExpr, MakeGlobalPointer>,
+ const TensorEvalToOp<Expr>, Params...> {
+ using my_expr_type = ExprConstructor<OrigExpr, Expr, Params...>;
+ using my_buffer_type =
+ typename TensorEvalToOp<OrigExpr, MakeGlobalPointer>::PointerType;
+ using Type =
+ const TensorEvalToOp<typename my_expr_type::Type, MakeGlobalPointer>;
+ my_expr_type nestedExpression;
+ EvalToLHSConstructor<my_buffer_type, 0, Params...> buffer;
+ Type expr;
+
+ template <typename FuncDetector>
+ ExprConstructor(FuncDetector &funcD,
+ const utility::tuple::Tuple<Params...> &t)
+ : nestedExpression(funcD.rhsExpr, t),
+ buffer(t),
+ expr(buffer.expr, nestedExpression.expr) {}
+};
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// TensorEvalToOp
+template <typename OrigExpr, typename Expr, typename... Params>
+struct ExprConstructor<TensorEvalToOp<OrigExpr, MakeGlobalPointer>,
+ TensorEvalToOp<Expr>, Params...> {
+ using my_expr_type = ExprConstructor<OrigExpr, Expr, Params...>;
+ using my_buffer_type =
+ typename TensorEvalToOp<OrigExpr, MakeGlobalPointer>::PointerType;
+ using Type = TensorEvalToOp<typename my_expr_type::Type>;
+ my_expr_type nestedExpression;
+ EvalToLHSConstructor<my_buffer_type, 0, Params...> buffer;
+ Type expr;
+
+ template <typename FuncDetector>
+ ExprConstructor(FuncDetector &funcD,
+ const utility::tuple::Tuple<Params...> &t)
+ : nestedExpression(funcD.rhsExpr, t),
+ buffer(t),
+ expr(buffer.expr, nestedExpression.expr) {}
+};
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// const TensorForcedEvalOp
+template <typename OrigExpr, typename DevExpr, size_t N, typename... Params>
+struct ExprConstructor<
+ const TensorForcedEvalOp<OrigExpr, MakeGlobalPointer>,
+ const Eigen::internal::PlaceHolder<const TensorForcedEvalOp<DevExpr>, N>,
+ Params...> {
+ using Type = const TensorMap<
+ Tensor<typename TensorForcedEvalOp<DevExpr, MakeGlobalPointer>::Scalar,
+ TensorForcedEvalOp<DevExpr, MakeGlobalPointer>::NumDimensions, 0,
+ typename TensorForcedEvalOp<DevExpr>::Index>,
+ 0, MakeGlobalPointer>;
+
+ Type expr;
+
+ template <typename FuncDetector>
+ ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)
+ : expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))),
+ fd.dimensions())) {}
+};
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// TensorForcedEvalOp
+template <typename OrigExpr, typename DevExpr, size_t N, typename... Params>
+struct ExprConstructor<
+ const TensorForcedEvalOp<OrigExpr, MakeGlobalPointer>,
+ const Eigen::internal::PlaceHolder<TensorForcedEvalOp<DevExpr>, N>,
+ Params...> {
+ using Type = TensorMap<
+ Tensor<typename TensorForcedEvalOp<DevExpr, MakeGlobalPointer>::Scalar, 1,
+ 0, typename TensorForcedEvalOp<DevExpr>::Index>,
+ 0, MakeGlobalPointer>;
+
+ Type expr;
+
+ template <typename FuncDetector>
+ ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)
+ : expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))),
+ fd.dimensions())) {}
+};
+
+/// template deduction for \ref ExprConstructor struct
+template <typename OrigExpr, typename IndexExpr, typename FuncD,
+ typename... Params>
+auto createDeviceExpression(FuncD &funcD,
+ const utility::tuple::Tuple<Params...> &t)
+ -> decltype(ExprConstructor<OrigExpr, IndexExpr, Params...>(funcD, t)) {
+ return ExprConstructor<OrigExpr, IndexExpr, Params...>(funcD, t);
+}
+}
+}
+} // namespace Eigen
+
+#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXPR_CONSTRUCTOR_HPP
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h
new file mode 100644
index 000000000..cb0ac131d
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h
@@ -0,0 +1,466 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensorSyclExtractAccessor.h
+ *
+ * \brief:
+ * ExtractAccessor takes Expression placeHolder expression and the tuple of sycl
+ * buffers as an input. Using pre-order tree traversal, ExtractAccessor
+ * recursively calls itself for its children in the expression tree. The
+ * leaf node in the PlaceHolder expression is nothing but a container preserving
+ * the order of the actual data in the tuple of sycl buffer. By invoking the
+ * extract accessor for the PlaceHolder<N>, an accessor is created for the Nth
+ * buffer in the tuple of buffers. This accessor is then added as an Nth
+ * element in the tuple of accessors. In this case we preserve the order of data
+ * in the expression tree.
+ *
+ * This is the specialisation of extract accessor method for different operation
+ * type in the PlaceHolder expression.
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXTRACT_ACCESSOR_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXTRACT_ACCESSOR_HPP
+
+namespace Eigen {
+namespace TensorSycl {
+namespace internal {
+/// \struct ExtractAccessor: Extract Accessor Class is used to extract the
+/// accessor from a buffer.
+/// Depending on the type of the leaf node we can get a read accessor or a
+/// read_write accessor
+template <typename Evaluator>
+struct ExtractAccessor;
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorMap
+template <typename PlainObjectType, int Options_, typename Dev>
+struct ExtractAccessor<
+ TensorEvaluator<const TensorMap<PlainObjectType, Options_>, Dev>> {
+ using actual_type = typename Eigen::internal::remove_all<
+ typename Eigen::internal::traits<PlainObjectType>::Scalar>::type;
+ static inline auto getTuple(
+ cl::sycl::handler& cgh,
+ const TensorEvaluator<const TensorMap<PlainObjectType, Options_>, Dev>
+ eval)
+ -> decltype(utility::tuple::make_tuple(
+ (eval.device()
+ .template get_sycl_accessor<cl::sycl::access::mode::read, true,
+ actual_type>(
+ eval.dimensions().TotalSize(), cgh,
+ eval.derived().data())))) {
+ return utility::tuple::make_tuple(
+ (eval.device()
+ .template get_sycl_accessor<cl::sycl::access::mode::read, true,
+ actual_type>(
+ eval.dimensions().TotalSize(), cgh, eval.derived().data())));
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// TensorMap
+template <typename PlainObjectType, int Options_, typename Dev>
+struct ExtractAccessor<
+ TensorEvaluator<TensorMap<PlainObjectType, Options_>, Dev>> {
+ using actual_type = typename Eigen::internal::remove_all<
+ typename Eigen::internal::traits<PlainObjectType>::Scalar>::type;
+
+ static inline auto getTuple(
+ cl::sycl::handler& cgh,
+ TensorEvaluator<TensorMap<PlainObjectType, Options_>, Dev> eval)
+ -> decltype(utility::tuple::make_tuple(
+ (eval.device()
+ .template get_sycl_accessor<cl::sycl::access::mode::read_write,
+ true, actual_type>(
+ eval.dimensions().TotalSize(), cgh,
+ eval.derived().data())))) {
+ return utility::tuple::make_tuple(
+ (eval.device()
+ .template get_sycl_accessor<cl::sycl::access::mode::read_write,
+ true, actual_type>(
+ eval.dimensions().TotalSize(), cgh, eval.derived().data())));
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorCwiseNullaryOp
+template <typename OP, typename RHSExpr, typename Dev>
+struct ExtractAccessor<
+ TensorEvaluator<const TensorCwiseNullaryOp<OP, RHSExpr>, Dev>> {
+ static auto getTuple(
+ cl::sycl::handler& cgh,
+ const TensorEvaluator<const TensorCwiseNullaryOp<OP, RHSExpr>, Dev> eval)
+ -> decltype(ExtractAccessor<TensorEvaluator<RHSExpr, Dev>>::getTuple(
+ cgh, eval.impl())) {
+ auto RHSTuple = ExtractAccessor<TensorEvaluator<RHSExpr, Dev>>::getTuple(
+ cgh, eval.impl());
+ return RHSTuple;
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// TensorCwiseNullaryOp
+template <typename OP, typename RHSExpr, typename Dev>
+struct ExtractAccessor<
+ TensorEvaluator<TensorCwiseNullaryOp<OP, RHSExpr>, Dev>> {
+ static auto getTuple(
+ cl::sycl::handler& cgh,
+ const TensorEvaluator<TensorCwiseNullaryOp<OP, RHSExpr>, Dev> eval)
+ -> decltype(ExtractAccessor<TensorEvaluator<RHSExpr, Dev>>::getTuple(
+ cgh, eval.impl())) {
+ auto RHSTuple = ExtractAccessor<TensorEvaluator<RHSExpr, Dev>>::getTuple(
+ cgh, eval.impl());
+ return RHSTuple;
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorBroadcastingOp
+template <typename OP, typename RHSExpr, typename Dev>
+struct ExtractAccessor<
+ TensorEvaluator<const TensorBroadcastingOp<OP, RHSExpr>, Dev>> {
+ static auto getTuple(
+ cl::sycl::handler& cgh,
+ const TensorEvaluator<const TensorBroadcastingOp<OP, RHSExpr>, Dev> eval)
+ -> decltype(ExtractAccessor<TensorEvaluator<RHSExpr, Dev>>::getTuple(
+ cgh, eval.impl())) {
+ auto RHSTuple = ExtractAccessor<TensorEvaluator<RHSExpr, Dev>>::getTuple(
+ cgh, eval.impl());
+ return RHSTuple;
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// TensorBroadcastingOp
+template <typename OP, typename RHSExpr, typename Dev>
+struct ExtractAccessor<
+ TensorEvaluator<TensorBroadcastingOp<OP, RHSExpr>, Dev>> {
+ static auto getTuple(
+ cl::sycl::handler& cgh,
+ const TensorEvaluator<TensorBroadcastingOp<OP, RHSExpr>, Dev> eval)
+ -> decltype(ExtractAccessor<TensorEvaluator<RHSExpr, Dev>>::getTuple(
+ cgh, eval.impl())) {
+ auto RHSTuple = ExtractAccessor<TensorEvaluator<RHSExpr, Dev>>::getTuple(
+ cgh, eval.impl());
+ return RHSTuple;
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TenosorCwiseUnary
+template <typename OP, typename RHSExpr, typename Dev>
+struct ExtractAccessor<
+ TensorEvaluator<const TensorCwiseUnaryOp<OP, RHSExpr>, Dev>> {
+ static auto getTuple(
+ cl::sycl::handler& cgh,
+ const TensorEvaluator<const TensorCwiseUnaryOp<OP, RHSExpr>, Dev> eval)
+ -> decltype(ExtractAccessor<TensorEvaluator<RHSExpr, Dev>>::getTuple(
+ cgh, eval.impl())) {
+ auto RHSTuple = ExtractAccessor<TensorEvaluator<RHSExpr, Dev>>::getTuple(
+ cgh, eval.impl());
+ return RHSTuple;
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// TenosorCwiseUnary
+template <typename OP, typename RHSExpr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<TensorCwiseUnaryOp<OP, RHSExpr>, Dev>> {
+ static auto getTuple(
+ cl::sycl::handler& cgh,
+ const TensorEvaluator<TensorCwiseUnaryOp<OP, RHSExpr>, Dev> eval)
+ -> decltype(ExtractAccessor<TensorEvaluator<RHSExpr, Dev>>::getTuple(
+ cgh, eval.impl())) {
+ auto RHSTuple = ExtractAccessor<TensorEvaluator<RHSExpr, Dev>>::getTuple(
+ cgh, eval.impl());
+ return RHSTuple;
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorCwiseBinaryOp
+template <typename OP, typename LHSExpr, typename RHSExpr, typename Dev>
+struct ExtractAccessor<
+ TensorEvaluator<const TensorCwiseBinaryOp<OP, LHSExpr, RHSExpr>, Dev>> {
+ static auto getTuple(cl::sycl::handler& cgh,
+ const TensorEvaluator<
+ const TensorCwiseBinaryOp<OP, LHSExpr, RHSExpr>, Dev>
+ eval)
+ -> decltype(utility::tuple::append(
+ ExtractAccessor<TensorEvaluator<LHSExpr, Dev>>::getTuple(
+ cgh, eval.left_impl()),
+ ExtractAccessor<TensorEvaluator<RHSExpr, Dev>>::getTuple(
+ cgh, eval.right_impl()))) {
+ auto LHSTuple = ExtractAccessor<TensorEvaluator<LHSExpr, Dev>>::getTuple(
+ cgh, eval.left_impl());
+ auto RHSTuple = ExtractAccessor<TensorEvaluator<RHSExpr, Dev>>::getTuple(
+ cgh, eval.right_impl());
+ return utility::tuple::append(LHSTuple, RHSTuple);
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// TensorCwiseBinaryOp
+template <typename OP, typename LHSExpr, typename RHSExpr, typename Dev>
+struct ExtractAccessor<
+ TensorEvaluator<TensorCwiseBinaryOp<OP, LHSExpr, RHSExpr>, Dev>> {
+ static auto getTuple(
+ cl::sycl::handler& cgh,
+ const TensorEvaluator<TensorCwiseBinaryOp<OP, LHSExpr, RHSExpr>, Dev>
+ eval)
+ -> decltype(utility::tuple::append(
+ ExtractAccessor<TensorEvaluator<LHSExpr, Dev>>::getTuple(
+ cgh, eval.left_impl()),
+ ExtractAccessor<TensorEvaluator<RHSExpr, Dev>>::getTuple(
+ cgh, eval.right_impl()))) {
+ auto LHSTuple = ExtractAccessor<TensorEvaluator<LHSExpr, Dev>>::getTuple(
+ cgh, eval.left_impl());
+ auto RHSTuple = ExtractAccessor<TensorEvaluator<RHSExpr, Dev>>::getTuple(
+ cgh, eval.right_impl());
+ return utility::tuple::append(LHSTuple, RHSTuple);
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorCwiseTernaryOp
+template <typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr,
+ typename Dev>
+struct ExtractAccessor<TensorEvaluator<
+ const TensorCwiseTernaryOp<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev>> {
+ static auto getTuple(
+ cl::sycl::handler& cgh,
+ const TensorEvaluator<
+ const TensorCwiseTernaryOp<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev>
+ eval)
+ -> decltype(utility::tuple::append(
+ ExtractAccessor<TensorEvaluator<Arg1Expr, Dev>>::getTuple(
+ cgh, eval.arg1Impl()),
+ utility::tuple::append(
+ ExtractAccessor<TensorEvaluator<Arg2Expr, Dev>>::getTuple(
+ cgh, eval.arg2Impl()),
+ ExtractAccessor<TensorEvaluator<Arg3Expr, Dev>>::getTuple(
+ cgh, eval.arg3Impl())))) {
+ auto Arg1Tuple = ExtractAccessor<TensorEvaluator<Arg1Expr, Dev>>::getTuple(
+ cgh, eval.arg1Impl());
+ auto Arg2Tuple = ExtractAccessor<TensorEvaluator<Arg2Expr, Dev>>::getTuple(
+ cgh, eval.arg2Impl());
+ auto Arg3Tuple = ExtractAccessor<TensorEvaluator<Arg3Expr, Dev>>::getTuple(
+ cgh, eval.arg3Impl());
+ return utility::tuple::append(Arg1Tuple,
+ utility::tuple::append(Arg2Tuple, Arg3Tuple));
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// TensorCwiseTernaryOp
+template <typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr,
+ typename Dev>
+struct ExtractAccessor<TensorEvaluator<
+ TensorCwiseTernaryOp<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev>> {
+ static auto getTuple(
+ cl::sycl::handler& cgh,
+ const TensorEvaluator<
+ TensorCwiseTernaryOp<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev>
+ eval)
+ -> decltype(utility::tuple::append(
+ ExtractAccessor<TensorEvaluator<Arg1Expr, Dev>>::getTuple(
+ cgh, eval.arg1Impl()),
+ utility::tuple::append(
+ ExtractAccessor<TensorEvaluator<Arg2Expr, Dev>>::getTuple(
+ cgh, eval.arg2Impl()),
+ ExtractAccessor<TensorEvaluator<Arg3Expr, Dev>>::getTuple(
+ cgh, eval.arg3Impl())))) {
+ auto Arg1Tuple = ExtractAccessor<TensorEvaluator<Arg1Expr, Dev>>::getTuple(
+ cgh, eval.arg1Impl());
+ auto Arg2Tuple = ExtractAccessor<TensorEvaluator<Arg2Expr, Dev>>::getTuple(
+ cgh, eval.arg2Impl());
+ auto Arg3Tuple = ExtractAccessor<TensorEvaluator<Arg3Expr, Dev>>::getTuple(
+ cgh, eval.arg3Impl());
+ return utility::tuple::append(Arg1Tuple,
+ utility::tuple::append(Arg2Tuple, Arg3Tuple));
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorCwiseSelectOp
+template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev>
+struct ExtractAccessor<
+ TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev>> {
+ static auto getTuple(
+ cl::sycl::handler& cgh,
+ const TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>,
+ Dev>
+ eval)
+ -> decltype(utility::tuple::append(
+ ExtractAccessor<TensorEvaluator<IfExpr, Dev>>::getTuple(
+ cgh, eval.cond_impl()),
+ utility::tuple::append(
+ ExtractAccessor<TensorEvaluator<ThenExpr, Dev>>::getTuple(
+ cgh, eval.then_impl()),
+ ExtractAccessor<TensorEvaluator<ElseExpr, Dev>>::getTuple(
+ cgh, eval.else_impl())))) {
+ auto IfTuple = ExtractAccessor<TensorEvaluator<IfExpr, Dev>>::getTuple(
+ cgh, eval.cond_impl());
+ auto ThenTuple = ExtractAccessor<TensorEvaluator<ThenExpr, Dev>>::getTuple(
+ cgh, eval.then_impl());
+ auto ElseTuple = ExtractAccessor<TensorEvaluator<ElseExpr, Dev>>::getTuple(
+ cgh, eval.else_impl());
+ return utility::tuple::append(IfTuple,
+ utility::tuple::append(ThenTuple, ElseTuple));
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// TensorCwiseSelectOp
+template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev>
+struct ExtractAccessor<
+ TensorEvaluator<TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev>> {
+ static auto getTuple(
+ cl::sycl::handler& cgh,
+ const TensorEvaluator<TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev>
+ eval)
+ -> decltype(utility::tuple::append(
+ ExtractAccessor<TensorEvaluator<IfExpr, Dev>>::getTuple(
+ cgh, eval.cond_impl()),
+ utility::tuple::append(
+ ExtractAccessor<TensorEvaluator<ThenExpr, Dev>>::getTuple(
+ cgh, eval.then_impl()),
+ ExtractAccessor<TensorEvaluator<ElseExpr, Dev>>::getTuple(
+ cgh, eval.else_impl())))) {
+ auto IfTuple = ExtractAccessor<TensorEvaluator<IfExpr, Dev>>::getTuple(
+ cgh, eval.cond_impl());
+ auto ThenTuple = ExtractAccessor<TensorEvaluator<ThenExpr, Dev>>::getTuple(
+ cgh, eval.then_impl());
+ auto ElseTuple = ExtractAccessor<TensorEvaluator<ElseExpr, Dev>>::getTuple(
+ cgh, eval.else_impl());
+ return utility::tuple::append(IfTuple,
+ utility::tuple::append(ThenTuple, ElseTuple));
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorAssignOp
+template <typename LHSExpr, typename RHSExpr, typename Dev>
+struct ExtractAccessor<
+ TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev>> {
+ static auto getTuple(
+ cl::sycl::handler& cgh,
+ const TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> eval)
+ -> decltype(utility::tuple::append(
+ ExtractAccessor<TensorEvaluator<LHSExpr, Dev>>::getTuple(
+ cgh, eval.left_impl()),
+ ExtractAccessor<TensorEvaluator<RHSExpr, Dev>>::getTuple(
+ cgh, eval.right_impl()))) {
+ auto LHSTuple = ExtractAccessor<TensorEvaluator<LHSExpr, Dev>>::getTuple(
+ cgh, eval.left_impl());
+ auto RHSTuple = ExtractAccessor<TensorEvaluator<RHSExpr, Dev>>::getTuple(
+ cgh, eval.right_impl());
+ return utility::tuple::append(LHSTuple, RHSTuple);
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// TensorAssignOp
+template <typename LHSExpr, typename RHSExpr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<TensorAssignOp<LHSExpr, RHSExpr>, Dev>> {
+ static auto getTuple(
+ cl::sycl::handler& cgh,
+ const TensorEvaluator<TensorAssignOp<LHSExpr, RHSExpr>, Dev> eval)
+ -> decltype(utility::tuple::append(
+ ExtractAccessor<TensorEvaluator<LHSExpr, Dev>>::getTuple(
+ eval.left_impl()),
+ ExtractAccessor<TensorEvaluator<RHSExpr, Dev>>::getTuple(
+ eval.right_impl()))) {
+ auto LHSTuple = ExtractAccessor<TensorEvaluator<LHSExpr, Dev>>::getTuple(
+ cgh, eval.left_impl());
+ auto RHSTuple = ExtractAccessor<TensorEvaluator<RHSExpr, Dev>>::getTuple(
+ cgh, eval.right_impl());
+ return utility::tuple::append(LHSTuple, RHSTuple);
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorForcedEvalOp
+template <typename Expr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev>> {
+ using actual_type =
+ typename Eigen::internal::remove_all<typename TensorEvaluator<
+ const TensorForcedEvalOp<Expr>, Dev>::CoeffReturnType>::type;
+ static auto getTuple(
+ cl::sycl::handler& cgh,
+ const TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> eval)
+ -> decltype(utility::tuple::make_tuple(
+ (eval.device()
+ .template get_sycl_accessor<cl::sycl::access::mode::read, false,
+ actual_type>(
+ eval.dimensions().TotalSize(), cgh, eval.data())))) {
+ return utility::tuple::make_tuple(
+ (eval.device()
+ .template get_sycl_accessor<cl::sycl::access::mode::read, false,
+ actual_type>(
+ eval.dimensions().TotalSize(), cgh, eval.data())));
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// TensorForcedEvalOp
+template <typename Expr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<TensorForcedEvalOp<Expr>, Dev>>
+ : ExtractAccessor<TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev>> {};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorEvalToOp
+template <typename Expr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<const TensorEvalToOp<Expr>, Dev>> {
+ using actual_type =
+ typename Eigen::internal::remove_all<typename TensorEvaluator<
+ const TensorEvalToOp<Expr>, Dev>::CoeffReturnType>::type;
+
+ static auto getTuple(cl::sycl::handler& cgh,
+ TensorEvaluator<const TensorEvalToOp<Expr>, Dev> eval)
+ -> decltype(utility::tuple::append(
+ utility::tuple::make_tuple(
+ (eval.device()
+ .template get_sycl_accessor<cl::sycl::access::mode::write,
+ false, actual_type>(
+ eval.dimensions().TotalSize(), cgh, eval.data()))),
+ ExtractAccessor<TensorEvaluator<Expr, Dev>>::getTuple(cgh,
+ eval.impl()))) {
+ auto LHSTuple = utility::tuple::make_tuple(
+ (eval.device()
+ .template get_sycl_accessor<cl::sycl::access::mode::write, false,
+ actual_type>(
+ eval.dimensions().TotalSize(), cgh, eval.data())));
+
+ auto RHSTuple =
+ ExtractAccessor<TensorEvaluator<Expr, Dev>>::getTuple(cgh, eval.impl());
+ return utility::tuple::append(LHSTuple, RHSTuple);
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// TensorEvalToOp
+template <typename Expr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<TensorEvalToOp<Expr>, Dev>>
+ : ExtractAccessor<TensorEvaluator<const TensorEvalToOp<Expr>, Dev>> {};
+
+/// template deduction for \ref ExtractAccessor
+template <typename Evaluator>
+auto createTupleOfAccessors(cl::sycl::handler& cgh, const Evaluator& expr)
+ -> decltype(ExtractAccessor<Evaluator>::getTuple(cgh, expr)) {
+ return ExtractAccessor<Evaluator>::getTuple(cgh, expr);
+}
+}
+}
+}
+#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXTRACT_ACCESSOR_HPP
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h
new file mode 100644
index 000000000..f69c5afcb
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h
@@ -0,0 +1,313 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensorSyclextractFunctors.h
+ *
+ * \brief:
+ * Used to extract all the functors allocated to each node of the expression
+*tree.
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXTRACT_FUNCTORS_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXTRACT_FUNCTORS_HPP
+
+namespace Eigen {
+namespace TensorSycl {
+namespace internal {
+/// \struct FunctorExtractor: This struct is used to extract the functors
+/// constructed on
+/// the host-side, to pack them and reuse them in reconstruction of the
+/// expression on the device.
+/// We have to do that as in Eigen the functors are not stateless so we cannot
+/// re-instantiate them on the device.
+/// We have to pass whatever instantiated to the device.
+template <typename Evaluator>
+struct FunctorExtractor;
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// TensorMap:
+template <typename PlainObjectType, int Options_, typename Dev>
+struct FunctorExtractor<
+ TensorEvaluator<TensorMap<PlainObjectType, Options_>, Dev>> {
+ using Dimensions = typename PlainObjectType::Dimensions;
+ const Dimensions m_dimensions;
+ const Dimensions& dimensions() const { return m_dimensions; }
+ FunctorExtractor(
+ const TensorEvaluator<TensorMap<PlainObjectType, Options_>, Dev>& expr)
+ : m_dimensions(expr.dimensions()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// const TensorMap
+template <typename PlainObjectType, int Options_, typename Dev>
+struct FunctorExtractor<
+ TensorEvaluator<const TensorMap<PlainObjectType, Options_>, Dev>> {
+ using Dimensions = typename PlainObjectType::Dimensions;
+ const Dimensions m_dimensions;
+ const Dimensions& dimensions() const { return m_dimensions; }
+ FunctorExtractor(
+ const TensorEvaluator<const TensorMap<PlainObjectType, Options_>, Dev>&
+ expr)
+ : m_dimensions(expr.dimensions()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// TensorForcedEvalOp
+template <typename Expr, typename Dev>
+struct FunctorExtractor<TensorEvaluator<TensorForcedEvalOp<Expr>, Dev>> {
+ using Dimensions = typename Expr::Dimensions;
+ const Dimensions m_dimensions;
+ const Dimensions& dimensions() const { return m_dimensions; }
+ FunctorExtractor(const TensorEvaluator<TensorForcedEvalOp<Expr>, Dev>& expr)
+ : m_dimensions(expr.dimensions()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// const TensorForcedEvalOp
+template <typename Expr, typename Dev>
+struct FunctorExtractor<TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev>> {
+ using Dimensions =
+ typename TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev>::Dimensions;
+ const Dimensions m_dimensions;
+ const Dimensions& dimensions() const { return m_dimensions; }
+ FunctorExtractor(
+ const TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev>& expr)
+ : m_dimensions(expr.dimensions()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// TensorCwiseNullaryOp
+template <typename OP, typename RHSExpr, typename Dev>
+struct FunctorExtractor<
+ TensorEvaluator<TensorCwiseNullaryOp<OP, RHSExpr>, Dev>> {
+ FunctorExtractor<TensorEvaluator<RHSExpr, Dev>> rhsExpr;
+ OP func;
+ FunctorExtractor(
+ TensorEvaluator<TensorCwiseNullaryOp<OP, RHSExpr>, Dev>& expr)
+ : rhsExpr(expr.impl()), func(expr.functor()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// const TensorCwiseNullaryOp
+template <typename OP, typename RHSExpr, typename Dev>
+struct FunctorExtractor<
+ TensorEvaluator<const TensorCwiseNullaryOp<OP, RHSExpr>, Dev>> {
+ FunctorExtractor<TensorEvaluator<RHSExpr, Dev>> rhsExpr;
+ OP func;
+ FunctorExtractor(
+ const TensorEvaluator<const TensorCwiseNullaryOp<OP, RHSExpr>, Dev>& expr)
+ : rhsExpr(expr.impl()), func(expr.functor()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// TensorBroadcastingOp
+template <typename OP, typename RHSExpr, typename Dev>
+struct FunctorExtractor<
+ TensorEvaluator<TensorBroadcastingOp<OP, RHSExpr>, Dev>> {
+ FunctorExtractor<TensorEvaluator<RHSExpr, Dev>> rhsExpr;
+ OP func;
+ FunctorExtractor(
+ const TensorEvaluator<TensorBroadcastingOp<OP, RHSExpr>, Dev>& expr)
+ : rhsExpr(expr.impl()), func(expr.functor()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// const TensorBroadcastingOp
+template <typename OP, typename RHSExpr, typename Dev>
+struct FunctorExtractor<
+ TensorEvaluator<const TensorBroadcastingOp<OP, RHSExpr>, Dev>> {
+ FunctorExtractor<TensorEvaluator<RHSExpr, Dev>> rhsExpr;
+ OP func;
+ FunctorExtractor(
+ const TensorEvaluator<const TensorBroadcastingOp<OP, RHSExpr>, Dev>& expr)
+ : rhsExpr(expr.impl()), func(expr.functor()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// TensorCwiseUnaryOp
+template <typename OP, typename RHSExpr, typename Dev>
+struct FunctorExtractor<TensorEvaluator<TensorCwiseUnaryOp<OP, RHSExpr>, Dev>> {
+ FunctorExtractor<TensorEvaluator<RHSExpr, Dev>> rhsExpr;
+ OP func;
+ FunctorExtractor(
+ const TensorEvaluator<TensorCwiseUnaryOp<OP, RHSExpr>, Dev>& expr)
+ : rhsExpr(expr.impl()), func(expr.functor()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// const TensorCwiseUnaryOp
+template <typename OP, typename RHSExpr, typename Dev>
+struct FunctorExtractor<
+ TensorEvaluator<const TensorCwiseUnaryOp<OP, RHSExpr>, Dev>> {
+ FunctorExtractor<TensorEvaluator<RHSExpr, Dev>> rhsExpr;
+ OP func;
+ FunctorExtractor(
+ const TensorEvaluator<const TensorCwiseUnaryOp<OP, RHSExpr>, Dev>& expr)
+ : rhsExpr(expr.impl()), func(expr.functor()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// TensorCwiseBinaryOp
+template <typename OP, typename LHSExpr, typename RHSExpr, typename Dev>
+struct FunctorExtractor<
+ TensorEvaluator<TensorCwiseBinaryOp<OP, LHSExpr, RHSExpr>, Dev>> {
+ FunctorExtractor<TensorEvaluator<LHSExpr, Dev>> lhsExpr;
+ FunctorExtractor<TensorEvaluator<RHSExpr, Dev>> rhsExpr;
+ OP func;
+ FunctorExtractor(
+ const TensorEvaluator<TensorCwiseBinaryOp<OP, LHSExpr, RHSExpr>, Dev>&
+ expr)
+ : lhsExpr(expr.left_impl()),
+ rhsExpr(expr.right_impl()),
+ func(expr.functor()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// const TensorCwiseBinaryOp
+template <typename OP, typename LHSExpr, typename RHSExpr, typename Dev>
+struct FunctorExtractor<
+ TensorEvaluator<const TensorCwiseBinaryOp<OP, LHSExpr, RHSExpr>, Dev>> {
+ FunctorExtractor<TensorEvaluator<LHSExpr, Dev>> lhsExpr;
+ FunctorExtractor<TensorEvaluator<RHSExpr, Dev>> rhsExpr;
+ OP func;
+ FunctorExtractor(const TensorEvaluator<
+ const TensorCwiseBinaryOp<OP, LHSExpr, RHSExpr>, Dev>& expr)
+ : lhsExpr(expr.left_impl()),
+ rhsExpr(expr.right_impl()),
+ func(expr.functor()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// const TensorCwiseTernaryOp
+template <typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr,
+ typename Dev>
+struct FunctorExtractor<TensorEvaluator<
+ const TensorCwiseTernaryOp<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev>> {
+ FunctorExtractor<TensorEvaluator<Arg1Expr, Dev>> arg1Expr;
+ FunctorExtractor<TensorEvaluator<Arg2Expr, Dev>> arg2Expr;
+ FunctorExtractor<TensorEvaluator<Arg3Expr, Dev>> arg3Expr;
+ OP func;
+ FunctorExtractor(const TensorEvaluator<
+ const TensorCwiseTernaryOp<OP, Arg1Expr, Arg2Expr, Arg3Expr>,
+ Dev>& expr)
+ : arg1Expr(expr.arg1Impl()),
+ arg2Expr(expr.arg2Impl()),
+ arg3Expr(expr.arg3Impl()),
+ func(expr.functor()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// TensorCwiseTernaryOp
+template <typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr,
+ typename Dev>
+struct FunctorExtractor<TensorEvaluator<
+ TensorCwiseTernaryOp<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev>> {
+ FunctorExtractor<TensorEvaluator<Arg1Expr, Dev>> arg1Expr;
+ FunctorExtractor<TensorEvaluator<Arg2Expr, Dev>> arg2Expr;
+ FunctorExtractor<TensorEvaluator<Arg3Expr, Dev>> arg3Expr;
+ OP func;
+ FunctorExtractor(
+ const TensorEvaluator<
+ TensorCwiseTernaryOp<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev>& expr)
+ : arg1Expr(expr.arg1Impl()),
+ arg2Expr(expr.arg2Impl()),
+ arg3Expr(expr.arg3Impl()),
+ func(expr.functor()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// const TensorCwiseSelectOp
+template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev>
+struct FunctorExtractor<
+ TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev>> {
+ FunctorExtractor<TensorEvaluator<IfExpr, Dev>> ifExpr;
+ FunctorExtractor<TensorEvaluator<ThenExpr, Dev>> thenExpr;
+ FunctorExtractor<TensorEvaluator<ElseExpr, Dev>> elseExpr;
+ FunctorExtractor(const TensorEvaluator<
+ const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev>& expr)
+ : ifExpr(expr.cond_impl()),
+ thenExpr(expr.then_impl()),
+ elseExpr(expr.else_impl()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// TensorCwiseSelectOp
+template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev>
+struct FunctorExtractor<
+ TensorEvaluator<TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev>> {
+ FunctorExtractor<IfExpr> ifExpr;
+ FunctorExtractor<ThenExpr> thenExpr;
+ FunctorExtractor<ElseExpr> elseExpr;
+ FunctorExtractor(
+ const TensorEvaluator<TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev>&
+ expr)
+ : ifExpr(expr.cond_impl()),
+ thenExpr(expr.then_impl()),
+ elseExpr(expr.else_impl()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// TensorAssignOp
+template <typename LHSExpr, typename RHSExpr, typename Dev>
+struct FunctorExtractor<
+ TensorEvaluator<TensorAssignOp<LHSExpr, RHSExpr>, Dev>> {
+ FunctorExtractor<TensorEvaluator<LHSExpr, Dev>> lhsExpr;
+ FunctorExtractor<TensorEvaluator<RHSExpr, Dev>> rhsExpr;
+ FunctorExtractor(
+ const TensorEvaluator<TensorAssignOp<LHSExpr, RHSExpr>, Dev>& expr)
+ : lhsExpr(expr.left_impl()), rhsExpr(expr.right_impl()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// const TensorAssignOp
+template <typename LHSExpr, typename RHSExpr, typename Dev>
+struct FunctorExtractor<
+ TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev>> {
+ FunctorExtractor<TensorEvaluator<LHSExpr, Dev>> lhsExpr;
+ FunctorExtractor<TensorEvaluator<RHSExpr, Dev>> rhsExpr;
+ FunctorExtractor(
+ const TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev>& expr)
+ : lhsExpr(expr.left_impl()), rhsExpr(expr.right_impl()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// TensorEvalToOp
+template <typename RHSExpr, typename Dev>
+struct FunctorExtractor<TensorEvaluator<TensorEvalToOp<RHSExpr>, Dev>> {
+ FunctorExtractor<TensorEvaluator<RHSExpr, Dev>> rhsExpr;
+ FunctorExtractor(const TensorEvaluator<TensorEvalToOp<RHSExpr>, Dev>& expr)
+ : rhsExpr(expr.impl()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// const TensorEvalToOp
+template <typename RHSExpr, typename Dev>
+struct FunctorExtractor<TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev>> {
+ FunctorExtractor<TensorEvaluator<RHSExpr, Dev>> rhsExpr;
+ FunctorExtractor(
+ const TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev>& expr)
+ : rhsExpr(expr.impl()) {}
+};
+
+/// template deduction function for FunctorExtractor
+template <typename Evaluator>
+auto extractFunctors(const Evaluator& evaluator)
+ -> FunctorExtractor<Evaluator> {
+ return FunctorExtractor<Evaluator>(evaluator);
+}
+} // namespace internal
+} // namespace TensorSycl
+} // namespace Eigen
+
+#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXTRACT_FUNCTORS_HPP
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h
new file mode 100644
index 000000000..77e0e15e1
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h
@@ -0,0 +1,188 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensorSyclLeafCount.h
+ *
+ * \brief:
+ * The leaf count used the pre-order expression tree traverse in order to name
+ * count the number of leaf nodes in the expression
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_LEAF_COUNT_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_LEAF_COUNT_HPP
+
+namespace Eigen {
+namespace TensorSycl {
+namespace internal {
+/// \brief LeafCount used to counting terminal nodes. The total number of
+/// leaf nodes is used by MakePlaceHolderExprHelper to find the order
+/// of the leaf node in a expression tree at compile time.
+template <typename Expr>
+struct LeafCount;
+
+/// specialisation of the \ref LeafCount struct when the node type is const
+/// TensorMap
+template <typename PlainObjectType, int Options_,
+ template <class> class MakePointer_>
+struct LeafCount<const TensorMap<PlainObjectType, Options_, MakePointer_>> {
+ static const size_t Count = 1;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is TensorMap
+template <typename PlainObjectType, int Options_,
+ template <class> class MakePointer_>
+struct LeafCount<TensorMap<PlainObjectType, Options_, MakePointer_>> {
+ static const size_t Count = 1;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is const
+/// TensorCwiseNullaryOp
+template <typename OP, typename RHSExpr>
+struct LeafCount<const TensorCwiseNullaryOp<OP, RHSExpr>> {
+ static const size_t Count = LeafCount<RHSExpr>::Count;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is
+/// TensorCwiseNullaryOp
+template <typename OP, typename RHSExpr>
+struct LeafCount<TensorCwiseNullaryOp<OP, RHSExpr>> {
+ static const size_t Count = LeafCount<RHSExpr>::Count;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is const
+/// TensorBroadcastingOp
+template <typename OP, typename RHSExpr>
+struct LeafCount<const TensorBroadcastingOp<OP, RHSExpr>> {
+ static const size_t Count = LeafCount<RHSExpr>::Count;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is
+/// TensorCwiseNullaryOp
+template <typename OP, typename RHSExpr>
+struct LeafCount<TensorBroadcastingOp<OP, RHSExpr>> {
+ static const size_t Count = LeafCount<RHSExpr>::Count;
+};
+
+// TensorCwiseUnaryOp
+template <typename OP, typename RHSExpr>
+struct LeafCount<const TensorCwiseUnaryOp<OP, RHSExpr>> {
+ static const size_t Count = LeafCount<RHSExpr>::Count;
+};
+
+// TensorCwiseUnaryOp
+template <typename OP, typename RHSExpr>
+struct LeafCount<TensorCwiseUnaryOp<OP, RHSExpr>> {
+ static const size_t Count = LeafCount<RHSExpr>::Count;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is const
+/// TensorCwiseBinaryOp
+template <typename OP, typename LHSExpr, typename RHSExpr>
+struct LeafCount<const TensorCwiseBinaryOp<OP, LHSExpr, RHSExpr>> {
+ static const size_t Count =
+ LeafCount<LHSExpr>::Count + LeafCount<RHSExpr>::Count;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is
+/// TensorCwiseBinaryOp
+template <typename OP, typename LHSExpr, typename RHSExpr>
+struct LeafCount<TensorCwiseBinaryOp<OP, LHSExpr, RHSExpr>> {
+ static const size_t Count =
+ LeafCount<LHSExpr>::Count + LeafCount<RHSExpr>::Count;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is
+/// TensorCwiseTernaryOp
+template <typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr>
+struct LeafCount<TensorCwiseTernaryOp<OP, Arg1Expr, Arg2Expr, Arg3Expr>> {
+ static const size_t Count = LeafCount<Arg1Expr>::Count +
+ LeafCount<Arg2Expr>::Count +
+ LeafCount<Arg3Expr>::Count;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is const
+/// TensorCwiseTernaryOp
+template <typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr>
+struct LeafCount<const TensorCwiseTernaryOp<OP, Arg1Expr, Arg2Expr, Arg3Expr>> {
+ static const size_t Count = LeafCount<Arg1Expr>::Count +
+ LeafCount<Arg2Expr>::Count +
+ LeafCount<Arg3Expr>::Count;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is
+/// TensorCwiseSelectOp
+template <typename IfExpr, typename ThenExpr, typename ElseExpr>
+struct LeafCount<TensorSelectOp<IfExpr, ThenExpr, ElseExpr>> {
+ static const size_t Count = LeafCount<IfExpr>::Count +
+ LeafCount<ThenExpr>::Count +
+ LeafCount<ElseExpr>::Count;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is const
+/// TensorCwiseSelectOp
+template <typename IfExpr, typename ThenExpr, typename ElseExpr>
+struct LeafCount<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>> {
+ static const size_t Count = LeafCount<IfExpr>::Count +
+ LeafCount<ThenExpr>::Count +
+ LeafCount<ElseExpr>::Count;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is
+/// TensorAssignOp
+template <typename LHSExpr, typename RHSExpr>
+struct LeafCount<TensorAssignOp<LHSExpr, RHSExpr>> {
+ static const size_t Count =
+ LeafCount<LHSExpr>::Count + LeafCount<RHSExpr>::Count;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is const
+/// TensorAssignOp
+template <typename LHSExpr, typename RHSExpr>
+struct LeafCount<const TensorAssignOp<LHSExpr, RHSExpr>> {
+ static const size_t Count =
+ LeafCount<LHSExpr>::Count + LeafCount<RHSExpr>::Count;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is const
+/// TensorForcedEvalOp
+template <typename Expr>
+struct LeafCount<const TensorForcedEvalOp<Expr>> {
+ static const size_t Count = 1;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is
+/// TensorForcedEvalOp
+template <typename Expr>
+struct LeafCount<TensorForcedEvalOp<Expr>> {
+ static const size_t Count = 1;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is const
+/// TensorEvalToOp
+template <typename Expr>
+struct LeafCount<const TensorEvalToOp<Expr>> {
+ static const size_t Count = 1 + LeafCount<Expr>::Count;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is
+/// TensorEvalToOp
+template <typename Expr>
+struct LeafCount<TensorEvalToOp<Expr>> {
+ static const size_t Count = 1 + LeafCount<Expr>::Count;
+};
+}
+}
+} // namespace Eigen
+
+#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_LEAF_COUNT_HPP
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h
new file mode 100644
index 000000000..87995a25e
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h
@@ -0,0 +1,151 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensorSyclPlaceHolder.h
+ *
+ * \brief:
+ * The PlaceHolder expression are nothing but a container preserving
+ * the order of actual data in the tuple of sycl buffer.
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_PLACEHOLDER_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_PLACEHOLDER_HPP
+
+namespace Eigen {
+namespace internal {
+/// \struct PlaceHolder
+/// \brief PlaceHolder is used to replace the \ref TensorMap in the expression
+/// tree.
+/// PlaceHolder contains the order of the leaf node in the expression tree.
+template <typename Scalar, size_t N>
+struct PlaceHolder {
+ static constexpr size_t I = N;
+ using Type = Scalar;
+};
+
+template <typename PlainObjectType, int Options_,
+ template <class> class MakePointer_, size_t N>
+struct PlaceHolder<const TensorMap<PlainObjectType, Options_, MakePointer_>,
+ N> {
+ static constexpr size_t I = N;
+
+ using Type = const TensorMap<PlainObjectType, Options_, MakePointer_>;
+
+ typedef typename Type::Self Self;
+ typedef typename Type::Base Base;
+ typedef typename Type::Nested Nested;
+ typedef typename Type::StorageKind StorageKind;
+ typedef typename Type::Index Index;
+ typedef typename Type::Scalar Scalar;
+ typedef typename Type::RealScalar RealScalar;
+ typedef typename Type::CoeffReturnType CoeffReturnType;
+};
+
+/// \brief specialisation of the PlaceHolder node for TensorForcedEvalOp. The
+/// TensorForcedEvalOp act as a leaf node for its parent node.
+template <typename Expression, size_t N>
+struct PlaceHolder<const TensorForcedEvalOp<Expression>, N> {
+ static constexpr size_t I = N;
+
+ using Type = const TensorForcedEvalOp<Expression>;
+
+ typedef typename Type::Nested Nested;
+ typedef typename Type::StorageKind StorageKind;
+ typedef typename Type::Index Index;
+
+ typedef typename Type::Scalar Scalar;
+ typedef typename Type::Packet Packet;
+
+ typedef typename Type::RealScalar RealScalar;
+ typedef typename Type::CoeffReturnType CoeffReturnType;
+ typedef typename Type::PacketReturnType PacketReturnType;
+};
+
+template <typename Expression, size_t N>
+struct PlaceHolder<TensorForcedEvalOp<Expression>, N> {
+ static constexpr size_t I = N;
+
+ using Type = TensorForcedEvalOp<Expression>;
+
+ typedef typename Type::Nested Nested;
+ typedef typename Type::StorageKind StorageKind;
+ typedef typename Type::Index Index;
+
+ typedef typename Type::Scalar Scalar;
+ typedef typename Type::Packet Packet;
+
+ typedef typename Type::RealScalar RealScalar;
+ typedef typename Type::CoeffReturnType CoeffReturnType;
+ typedef typename Type::PacketReturnType PacketReturnType;
+};
+
+/// \brief specialisation of the PlaceHolder node for const TensorMap
+template <typename PlainObjectType, int Options_,
+ template <class> class Makepointer_, size_t N>
+struct PlaceHolder<TensorMap<PlainObjectType, Options_, Makepointer_>, N> {
+ static constexpr size_t I = N;
+
+ using Type = TensorMap<PlainObjectType, Options_, Makepointer_>;
+
+ typedef typename Type::Self Self;
+ typedef typename Type::Base Base;
+ typedef typename Type::Nested Nested;
+ typedef typename Type::StorageKind StorageKind;
+ typedef typename Type::Index Index;
+ typedef typename Type::Scalar Scalar;
+ typedef typename Type::Packet Packet;
+ typedef typename Type::RealScalar RealScalar;
+ typedef typename Type::CoeffReturnType CoeffReturnType;
+ typedef typename Base::PacketReturnType PacketReturnType;
+};
+
+/// specialisation of the traits struct for PlaceHolder
+template <typename PlainObjectType, int Options_,
+ template <class> class Makepointer_, size_t N>
+struct traits<
+ PlaceHolder<TensorMap<PlainObjectType, Options_, Makepointer_>, N>>
+ : public traits<PlainObjectType> {
+ typedef traits<PlainObjectType> BaseTraits;
+ typedef typename BaseTraits::Scalar Scalar;
+ typedef typename BaseTraits::StorageKind StorageKind;
+ typedef typename BaseTraits::Index Index;
+ static const int NumDimensions = BaseTraits::NumDimensions;
+ static const int Layout = BaseTraits::Layout;
+ enum {
+ Options = Options_,
+ Flags = BaseTraits::Flags,
+ };
+};
+
+template <typename PlainObjectType, int Options_,
+ template <class> class Makepointer_, size_t N>
+struct traits<
+ PlaceHolder<const TensorMap<PlainObjectType, Options_, Makepointer_>, N>>
+ : public traits<PlainObjectType> {
+ typedef traits<PlainObjectType> BaseTraits;
+ typedef typename BaseTraits::Scalar Scalar;
+ typedef typename BaseTraits::StorageKind StorageKind;
+ typedef typename BaseTraits::Index Index;
+ static const int NumDimensions = BaseTraits::NumDimensions;
+ static const int Layout = BaseTraits::Layout;
+ enum {
+ Options = Options_,
+ Flags = BaseTraits::Flags,
+ };
+};
+
+} // end namespoace internal
+} // end namespoace Eigen
+
+#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_PLACEHOLDER_HPP
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h
new file mode 100644
index 000000000..dbd7a8544
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h
@@ -0,0 +1,293 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensorSyclPlaceHolderExpr.h
+ *
+ * \brief:
+ * This is the specialisation of the placeholder expression based on the
+ * operation type
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_PLACEHOLDER_EXPR_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_PLACEHOLDER_EXPR_HPP
+
+namespace Eigen {
+namespace TensorSycl {
+namespace internal {
+/// \sttruct PlaceHolderExpression
+/// \brief it is used to create the PlaceHolder expression. The PlaceHolder
+/// expression is a copy of expression type in which the TensorMap of the has
+/// been replaced with PlaceHolder.
+template <typename Expr, size_t N>
+struct PlaceHolderExpression;
+
+/// specialisation of the \ref PlaceHolderExpression when the node is TensorMap
+template <typename Scalar_, int Options_, int Options2_, int NumIndices_,
+ typename IndexType_, template <class> class MakePointer_, size_t N>
+struct PlaceHolderExpression<
+ Eigen::TensorMap<Eigen::Tensor<Scalar_, NumIndices_, Options_, IndexType_>,
+ Options2_, MakePointer_>,
+ N> {
+ using Type = Eigen::internal::PlaceHolder<
+ Eigen::TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>,
+ Options2_, MakePointer_>,
+ N>;
+};
+
+/// specialisation of the \ref PlaceHolderExpression when the node is const
+/// TensorMap
+template <typename Scalar_, int Options_, int Options2_, int NumIndices_,
+ typename IndexType_, template <class> class MakePointer_, size_t N>
+struct PlaceHolderExpression<
+ const Eigen::TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>,
+ Options2_, MakePointer_>,
+ N> {
+ using Type = const Eigen::internal::PlaceHolder<
+ const TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>,
+ Options2_, MakePointer_>,
+ N>;
+};
+
+/// specialisation of the \ref PlaceHolderExpression when the node is
+/// TensorCwiseNullaryOp
+template <typename OP, typename RHSExpr, size_t N>
+struct PlaceHolderExpression<TensorCwiseNullaryOp<OP, RHSExpr>, N> {
+ using RHSPlaceHolderType = typename PlaceHolderExpression<RHSExpr, N>::Type;
+ using Type = TensorCwiseNullaryOp<OP, RHSPlaceHolderType>;
+};
+
+/// specialisation of the \ref PlaceHolderExpression when the node is const
+/// TensorCwiseNullaryOp
+template <typename OP, typename RHSExpr, size_t N>
+struct PlaceHolderExpression<const TensorCwiseNullaryOp<OP, RHSExpr>, N> {
+ using RHSPlaceHolderType = typename PlaceHolderExpression<RHSExpr, N>::Type;
+ using Type = const TensorCwiseNullaryOp<OP, RHSPlaceHolderType>;
+};
+
+/// specialisation of the \ref PlaceHolderExpression when the node is
+/// TensorBroadcastingOp
+template <typename OP, typename RHSExpr, size_t N>
+struct PlaceHolderExpression<TensorBroadcastingOp<OP, RHSExpr>, N> {
+ using RHSPlaceHolderType = typename PlaceHolderExpression<RHSExpr, N>::Type;
+ using Type = TensorBroadcastingOp<OP, RHSPlaceHolderType>;
+};
+
+/// specialisation of the \ref PlaceHolderExpression when the node is const
+/// TensorBroadcastingOp
+template <typename OP, typename RHSExpr, size_t N>
+struct PlaceHolderExpression<const TensorBroadcastingOp<OP, RHSExpr>, N> {
+ using RHSPlaceHolderType = typename PlaceHolderExpression<RHSExpr, N>::Type;
+ using Type = const TensorBroadcastingOp<OP, RHSPlaceHolderType>;
+};
+
+/// specialisation of the \ref PlaceHolderExpression when the node is
+/// TensorCwiseUnaryOp
+template <typename OP, typename RHSExpr, size_t N>
+struct PlaceHolderExpression<TensorCwiseUnaryOp<OP, RHSExpr>, N> {
+ using RHSPlaceHolderType = typename PlaceHolderExpression<RHSExpr, N>::Type;
+ using Type = TensorCwiseUnaryOp<OP, RHSPlaceHolderType>;
+};
+
+/// specialisation of the \ref PlaceHolderExpression when the node is const
+/// TensorCwiseUnaryOp
+template <typename OP, typename RHSExpr, size_t N>
+struct PlaceHolderExpression<const TensorCwiseUnaryOp<OP, RHSExpr>, N> {
+ using RHSPlaceHolderType = typename PlaceHolderExpression<RHSExpr, N>::Type;
+ using Type = const TensorCwiseUnaryOp<OP, RHSPlaceHolderType>;
+};
+
+/// specialisation of the \ref PlaceHolderExpression when the node is
+/// TensorCwiseBinaryOp
+template <typename OP, typename LHSExpr, typename RHSExpr, size_t N>
+struct PlaceHolderExpression<TensorCwiseBinaryOp<OP, LHSExpr, RHSExpr>, N> {
+ static const size_t RHSLeafCount = LeafCount<RHSExpr>::Count;
+
+ using LHSPlaceHolderType =
+ typename PlaceHolderExpression<LHSExpr, N - RHSLeafCount>::Type;
+ using RHSPlaceHolderType = typename PlaceHolderExpression<RHSExpr, N>::Type;
+
+ using Type = TensorCwiseBinaryOp<OP, LHSPlaceHolderType, RHSPlaceHolderType>;
+};
+
+/// specialisation of the \ref PlaceHolderExpression when the node is const
+/// TensorCwiseBinaryOp
+template <typename OP, typename LHSExpr, typename RHSExpr, size_t N>
+struct PlaceHolderExpression<const TensorCwiseBinaryOp<OP, LHSExpr, RHSExpr>,
+ N> {
+ static const size_t RHSLeafCount = LeafCount<RHSExpr>::Count;
+
+ using LHSPlaceHolderType =
+ typename PlaceHolderExpression<LHSExpr, N - RHSLeafCount>::Type;
+ using RHSPlaceHolderType = typename PlaceHolderExpression<RHSExpr, N>::Type;
+
+ using Type =
+ const TensorCwiseBinaryOp<OP, LHSPlaceHolderType, RHSPlaceHolderType>;
+};
+
+/// specialisation of the \ref PlaceHolderExpression when the node is const
+/// TensorCwiseSelectOp
+template <typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr,
+ size_t N>
+struct PlaceHolderExpression<
+ const TensorCwiseTernaryOp<OP, Arg1Expr, Arg2Expr, Arg3Expr>, N> {
+ static const size_t Arg3LeafCount = LeafCount<Arg3Expr>::Count;
+ static const size_t Arg2LeafCount = LeafCount<Arg2Expr>::Count;
+
+ using Arg1PlaceHolderType =
+ typename PlaceHolderExpression<Arg1Expr,
+ N - Arg3LeafCount - Arg2LeafCount>::Type;
+ using Arg2PlaceHolderType =
+ typename PlaceHolderExpression<Arg2Expr, N - Arg3LeafCount>::Type;
+
+ using Arg3PlaceHolderType = typename PlaceHolderExpression<Arg3Expr, N>::Type;
+
+ using Type =
+ const TensorCwiseTernaryOp<OP, Arg1PlaceHolderType, Arg2PlaceHolderType,
+ Arg3PlaceHolderType>;
+};
+
+/// specialisation of the \ref PlaceHolderExpression when the node is
+/// TensorCwiseSelectOp
+template <typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr,
+ size_t N>
+struct PlaceHolderExpression<
+ TensorCwiseTernaryOp<OP, Arg1Expr, Arg2Expr, Arg3Expr>, N> {
+ static const size_t Arg3LeafCount = LeafCount<Arg3Expr>::Count;
+ static const size_t Arg2LeafCount = LeafCount<Arg2Expr>::Count;
+
+ using Arg1PlaceHolderType =
+ typename PlaceHolderExpression<Arg1Expr,
+ N - Arg3LeafCount - Arg2LeafCount>::Type;
+ using Arg2PlaceHolderType =
+ typename PlaceHolderExpression<Arg2Expr, N - Arg3LeafCount>::Type;
+
+ using Arg3PlaceHolderType = typename PlaceHolderExpression<Arg3Expr, N>::Type;
+
+ using Type = TensorCwiseTernaryOp<OP, Arg1PlaceHolderType,
+ Arg2PlaceHolderType, Arg3PlaceHolderType>;
+};
+
+/// specialisation of the \ref PlaceHolderExpression when the node is const
+/// TensorCwiseSelectOp
+template <typename IfExpr, typename ThenExpr, typename ElseExpr, size_t N>
+struct PlaceHolderExpression<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>,
+ N> {
+ static const size_t ElseLeafCount = LeafCount<ElseExpr>::Count;
+ static const size_t ThenLeafCount = LeafCount<ThenExpr>::Count;
+
+ using IfPlaceHolderType =
+ typename PlaceHolderExpression<IfExpr,
+ N - ElseLeafCount - ThenLeafCount>::Type;
+ using ThenPlaceHolderType =
+ typename PlaceHolderExpression<ThenExpr, N - ElseLeafCount>::Type;
+
+ using ElsePlaceHolderType = typename PlaceHolderExpression<ElseExpr, N>::Type;
+
+ using Type = const TensorSelectOp<IfPlaceHolderType, ThenPlaceHolderType,
+ ElsePlaceHolderType>;
+};
+
+/// specialisation of the \ref PlaceHolderExpression when the node is
+/// TensorCwiseSelectOp
+template <typename IfExpr, typename ThenExpr, typename ElseExpr, size_t N>
+struct PlaceHolderExpression<TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, N> {
+ static const size_t ElseLeafCount = LeafCount<ElseExpr>::Count;
+ static const size_t ThenLeafCount = LeafCount<ThenExpr>::Count;
+
+ using IfPlaceHolderType =
+ typename PlaceHolderExpression<IfExpr,
+ N - ElseLeafCount - ThenLeafCount>::Type;
+ using ThenPlaceHolderType =
+ typename PlaceHolderExpression<ThenExpr, N - ElseLeafCount>::Type;
+
+ using ElsePlaceHolderType = typename PlaceHolderExpression<ElseExpr, N>::Type;
+
+ using Type = TensorSelectOp<IfPlaceHolderType, ThenPlaceHolderType,
+ ElsePlaceHolderType>;
+};
+
+/// specialisation of the \ref PlaceHolderExpression when the node is
+/// TensorAssignOp
+template <typename LHSExpr, typename RHSExpr, size_t N>
+struct PlaceHolderExpression<TensorAssignOp<LHSExpr, RHSExpr>, N> {
+ static const size_t RHSLeafCount = LeafCount<RHSExpr>::Count;
+
+ using LHSPlaceHolderType =
+ typename PlaceHolderExpression<LHSExpr, N - RHSLeafCount>::Type;
+ using RHSPlaceHolderType = typename PlaceHolderExpression<RHSExpr, N>::Type;
+
+ using Type = TensorAssignOp<LHSPlaceHolderType, RHSPlaceHolderType>;
+};
+
+/// specialisation of the \ref PlaceHolderExpression when the node is const
+/// TensorAssignOp
+template <typename LHSExpr, typename RHSExpr, size_t N>
+struct PlaceHolderExpression<const TensorAssignOp<LHSExpr, RHSExpr>, N> {
+ static const size_t RHSLeafCount = LeafCount<RHSExpr>::Count;
+
+ using LHSPlaceHolderType =
+ typename PlaceHolderExpression<LHSExpr, N - RHSLeafCount>::Type;
+ using RHSPlaceHolderType = typename PlaceHolderExpression<RHSExpr, N>::Type;
+
+ using Type = const TensorAssignOp<LHSPlaceHolderType, RHSPlaceHolderType>;
+};
+
+/// specialisation of the \ref PlaceHolderExpression when the node is const
+/// TensorForcedEvalOp
+template <typename Expr, size_t N>
+struct PlaceHolderExpression<const TensorForcedEvalOp<Expr>, N> {
+ using Type =
+ const Eigen::internal::PlaceHolder<const TensorForcedEvalOp<Expr>, N>;
+};
+
+/// specialisation of the \ref PlaceHolderExpression when the node is
+/// TensorForcedEvalOp
+template <typename Expr, size_t N>
+struct PlaceHolderExpression<TensorForcedEvalOp<Expr>, N> {
+ using Type = Eigen::internal::PlaceHolder<TensorForcedEvalOp<Expr>, N>;
+};
+
+/// specialisation of the \ref PlaceHolderExpression when the node is const
+/// TensorEvalToOp
+template <typename Expr, size_t N>
+struct PlaceHolderExpression<const TensorEvalToOp<Expr>, N> {
+ static const size_t RHSLeafCount = LeafCount<Expr>::Count;
+
+ using RHSPlaceHolderType = typename PlaceHolderExpression<Expr, N>::Type;
+
+ using Type = const TensorEvalToOp<RHSPlaceHolderType>;
+};
+
+/// specialisation of the \ref PlaceHolderExpression when the node is
+/// TensorEvalToOp
+template <typename Expr, size_t N>
+struct PlaceHolderExpression<TensorEvalToOp<Expr>, N> {
+ static const size_t RHSLeafCount = LeafCount<Expr>::Count;
+
+ using RHSPlaceHolderType = typename PlaceHolderExpression<Expr, N>::Type;
+
+ using Type = TensorEvalToOp<RHSPlaceHolderType>;
+};
+
+/// template deduction for \ref PlaceHolderExpression struct
+template <typename Expr>
+struct createPlaceHolderExpression {
+ static const size_t TotalLeaves = LeafCount<Expr>::Count;
+ using Type = typename PlaceHolderExpression<Expr, TotalLeaves - 1>::Type;
+};
+}
+}
+} // namespace Eigen
+
+#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_PLACEHOLDER_EXPR_HPP
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h
new file mode 100644
index 000000000..3758d46a0
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h
@@ -0,0 +1,84 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Cummins Chris PhD student at The University of Edinburgh.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensorSyclRun.h
+ *
+ * \brief:
+ * Schedule_kernel invoke an specialised version of kernel struct. The
+ * specialisation is based on the data dimension in sycl buffer
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORSYCL_SYCLRUN_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORSYCL_SYCLRUN_HPP
+
+namespace Eigen {
+namespace TensorSycl {
+/// The run function in tensor sycl convert the expression tree to a buffer
+/// based expression tree;
+/// creates the expression tree for the device with accessor to buffers;
+/// construct the kernel and submit it to the sycl queue.
+template <typename Expr, typename Dev>
+void run(Expr &expr, Dev &dev) {
+ Eigen::TensorEvaluator<Expr, Dev> evaluator(expr, dev);
+ const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
+ if (needs_assign) {
+ using PlaceHolderExpr =
+ typename internal::createPlaceHolderExpression<Expr>::Type;
+ auto functors = internal::extractFunctors(evaluator);
+
+ dev.m_queue.submit([&](cl::sycl::handler &cgh) {
+
+ // create a tuple of accessors from Evaluator
+ auto tuple_of_accessors =
+ internal::createTupleOfAccessors<decltype(evaluator)>(cgh, evaluator);
+ const auto range =
+ utility::tuple::get<0>(tuple_of_accessors).get_range()[0];
+
+ size_t outTileSize = range;
+ if (range > 64) outTileSize = 64;
+ size_t yMode = range % outTileSize;
+ int yRange = static_cast<int>(range);
+ if (yMode != 0) yRange += (outTileSize - yMode);
+
+ // run the kernel
+ cgh.parallel_for<PlaceHolderExpr>(
+ cl::sycl::nd_range<1>(cl::sycl::range<1>(yRange),
+ cl::sycl::range<1>(outTileSize)),
+ [=](cl::sycl::nd_item<1> itemID) {
+ using DevExpr =
+ typename internal::ConvertToDeviceExpression<Expr>::Type;
+
+ auto device_expr =
+ internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(
+ functors, tuple_of_accessors);
+ auto device_evaluator =
+ Eigen::TensorEvaluator<decltype(device_expr.expr),
+ Eigen::DefaultDevice>(
+ device_expr.expr, Eigen::DefaultDevice());
+
+ if (itemID.get_global_linear_id() < range) {
+ device_evaluator.evalScalar(
+ static_cast<int>(itemID.get_global_linear_id()));
+ }
+ });
+ });
+ dev.m_queue.throw_asynchronous();
+ }
+ evaluator.cleanup();
+}
+} // namespace TensorSycl
+} // namespace Eigen
+
+#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORSYCL_SYCLRUN_HPP
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h
new file mode 100644
index 000000000..8b9fc52c4
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h
@@ -0,0 +1,264 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensroSyclTuple.h
+ *
+ * \brief:
+ * Minimal implementation of std::tuple that can be used inside a SYCL kernel.
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORSYCL_TUPLE_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORSYCL_TUPLE_HPP
+namespace utility {
+namespace tuple {
+/// \struct EnableIf
+/// \brief The EnableIf struct is used to statically define type based on the
+/// condition.
+template <bool, typename T = void>
+struct EnableIf {};
+/// \brief specialisation of the \ref EnableIf when the condition is true
+template <typename T>
+struct EnableIf<true, T> {
+ typedef T type;
+};
+
+/// \struct Tuple
+/// \brief is a fixed-size collection of heterogeneous values
+/// \ztparam Ts... - the types of the elements that the tuple stores.
+/// Empty list is supported.
+template <class... Ts>
+struct Tuple {};
+
+/// \brief specialisation of the \ref Tuple class when the tuple has at least
+/// one element.
+/// \tparam T : the type of the first element in the tuple.
+/// \tparam Ts... the rest of the elements in the tuple. Ts... can be empty.
+template <class T, class... Ts>
+struct Tuple<T, Ts...> {
+ Tuple(T t, Ts... ts) : head(t), tail(ts...) {}
+
+ T head;
+ Tuple<Ts...> tail;
+};
+
+/// \struct ElemTypeHolder
+/// \brief ElemTypeHolder class is used to specify the types of the
+/// elements inside the tuple
+/// \tparam size_t the number of elements inside the tuple
+/// \tparam class the tuple class
+template <size_t, class>
+struct ElemTypeHolder;
+
+/// \brief specialisation of the \ref ElemTypeHolder class when the number
+/// elements inside the tuple is 1
+template <class T, class... Ts>
+struct ElemTypeHolder<0, Tuple<T, Ts...>> {
+ typedef T type;
+};
+
+/// \brief specialisation of the \ref ElemTypeHolder class when the number of
+/// elements inside the tuple is bigger than 1. It recursively call itself to
+/// detect the type of each element in the tuple
+/// \tparam T : the type of the first element in the tuple.
+/// \tparam Ts... the rest of the elements in the tuple. Ts... can be empty.
+/// \tparam K is the Kth element in the tuple
+template <size_t k, class T, class... Ts>
+struct ElemTypeHolder<k, Tuple<T, Ts...>> {
+ typedef typename ElemTypeHolder<k - 1, Tuple<Ts...>>::type type;
+};
+
+/// get
+/// \brief Extracts the first element from the tuple.
+/// K=0 represents the first element of the tuple. The tuple cannot be empty.
+/// \tparam Ts... are the elements type in the tuple.
+/// \param t is the tuple whose contents to extract
+/// \return typename ElemTypeHolder<0, Tuple<Ts...>>::type &>::type
+template <size_t k, class... Ts>
+typename EnableIf<k == 0,
+ typename ElemTypeHolder<0, Tuple<Ts...>>::type &>::type
+get(Tuple<Ts...> &t) {
+ return t.head;
+}
+/// get
+/// \brief Extracts the Kth element from the tuple.
+/// \tparam K is an integer value in [0,sizeof...(Types)).
+/// \tparam T is the (sizeof...(Types) -(K+1)) element in the tuple
+/// \tparam Ts... are the elements type in the tuple.
+/// \param t is the tuple whose contents to extract
+/// \return typename ElemTypeHolder<K, Tuple<Ts...>>::type &>::type
+template <size_t k, class T, class... Ts>
+typename EnableIf<k != 0,
+ typename ElemTypeHolder<k, Tuple<T, Ts...>>::type &>::type
+get(Tuple<T, Ts...> &t) {
+ return get<k - 1>(t.tail);
+}
+
+/// get
+/// \brief Extracts the first element from the tuple when the tuple and all the
+/// elements inside are const.
+/// K=0 represents the first element of the tuple. The tuple cannot be empty.
+/// \tparam Ts... are the elements type in the tuple.
+/// \param t is the const tuple whose contents to extract
+/// \return const typename ElemTypeHolder<0, Tuple<Ts...>>::type &>::type
+template <size_t k, class... Ts>
+typename EnableIf<k == 0,
+ const typename ElemTypeHolder<0, Tuple<Ts...>>::type &>::type
+get(const Tuple<Ts...> &t) {
+ return t.head;
+}
+
+/// get
+/// \brief Extracts the Kth element from the tuple when the tuple and all the
+/// elements inside are const.
+/// \tparam K is an integer value in [0,sizeof...(Types)).
+/// \tparam T is the (sizeof...(Types) -(K+1)) element in the tuple
+/// \tparam Ts... are the elements type in the tuple.
+/// \param t is the const tuple whose contents to extract
+/// \return const typename ElemTypeHolder<K, Tuple<Ts...>>::type &>::type
+template <size_t k, class T, class... Ts>
+typename EnableIf<
+ k != 0, const typename ElemTypeHolder<k, Tuple<T, Ts...>>::type &>::type
+get(const Tuple<T, Ts...> &t) {
+ return get<k - 1>(t.tail);
+}
+/// make_tuple
+/// \brief Creates a tuple object, deducing the target type from the types of
+/// arguments.
+/// \tparam Args the type of the arguments to construct the tuple from
+/// \param args zero or more arguments to construct the tuple from
+/// \return Tuple<Args...>
+template <typename... Args>
+Tuple<Args...> make_tuple(Args... args) {
+ return Tuple<Args...>(args...);
+}
+
+/// size
+/// \brief Provides access to the number of elements in a tuple as a
+/// compile-time constant expression.
+/// \tparam Args the type of the arguments to construct the tuple from
+/// \return size_t
+template <typename... Args>
+static constexpr size_t size(Tuple<Args...> &) {
+ return sizeof...(Args);
+}
+
+/// \struct Index_list
+/// \brief Creates a list of index from the elements in the tuple
+/// \tparam Is... a list of index from [0 to sizeof...(tuple elements))
+template <size_t... Is>
+struct Index_list {};
+
+/// \struct RangeBuilder
+/// \brief Collects internal details for generating index ranges [MIN, MAX)
+/// Declare primary template for index range builder
+/// \tparam MIN is the starting index in the tuple
+/// \tparam N represents sizeof..(elements)- sizeof...(Is)
+/// \tparam Is... are the list of generated index so far
+template <size_t MIN, size_t N, size_t... Is>
+struct RangeBuilder;
+
+/// \brief base Step: Specialisation of the \ref RangeBuilder when the
+/// MIN==MAX. In this case the Is... is [0 to sizeof...(tuple elements))
+/// \tparam MIN is the starting index of the tuple
+/// \tparam Is is [0 to sizeof...(tuple elements))
+template <size_t MIN, size_t... Is>
+struct RangeBuilder<MIN, MIN, Is...> {
+ typedef Index_list<Is...> type;
+};
+
+/// Induction step: Specialisation of the RangeBuilder class when N!=MIN
+/// in this case we are recursively subtracting the N by one and adding one
+/// index to Is... list until MIN==N
+/// \tparam MIN is the starting index in the tuple
+/// \tparam N represents sizeof..(elements)- sizeof...(Is)
+/// \tparam Is... are the list of generated index so far
+template <size_t MIN, size_t N, size_t... Is>
+struct RangeBuilder : public RangeBuilder<MIN, N - 1, N - 1, Is...> {};
+
+/// \brief IndexRange that returns a [MIN, MAX) index range
+/// \tparam MIN is the starting index in the tuple
+/// \tparam MAX is the size of the tuple
+template <size_t MIN, size_t MAX>
+using Index_range = typename RangeBuilder<MIN, MAX>::type;
+
+/// append_impl
+/// \brief unpacking the elements of the input tuple t and creating a new tuple
+/// by adding element a at the end of it.
+/// \tparam Args... the type of the elements inside the tuple t
+/// \tparam T the type of the new element going to be added at the end of tuple
+/// \tparam I... is the list of index from [0 to sizeof...(t))
+/// \param t the tuple on which we want to append a.
+/// \param a the new elements going to be added to the tuple
+/// \return Tuple<Args..., T>
+template <typename... Args, typename T, size_t... I>
+Tuple<Args..., T> append_impl(utility::tuple::Tuple<Args...> t, T a,
+ utility::tuple::Index_list<I...>) {
+ return utility::tuple::make_tuple(get<I>(t)..., a);
+}
+
+/// append
+/// \brief the deduction function for \ref append_impl that automatically
+/// generate the \ref Index_range
+/// \tparam Args... the type of the elements inside the tuple t
+/// \tparam T the type of the new element going to be added at the end of tuple
+/// \param t the tuple on which we want to append a.
+/// \param a the new elements going to be added to the tuple
+/// \return Tuple<Args..., T>
+template <typename... Args, typename T>
+Tuple<Args..., T> append(Tuple<Args...> t, T a) {
+ return utility::tuple::append_impl(
+ t, a, utility::tuple::Index_range<0, sizeof...(Args)>());
+}
+
+/// append_impl
+/// \brief This is an specialised of \ref append_impl when we want to
+/// concatenate
+/// tuple t2 at the end of the tuple t1. Here we unpack both tuples, generate
+/// the
+/// Index_range for each of them and create an output tuple T that contains both
+/// elements of t1 and t2.
+/// \tparam Args1... the type of the elements inside the tuple t1
+/// \tparam Args2... the type of the elements inside the tuple t2
+/// \tparam I1... is the list of index from [0 to sizeof...(t1))
+/// \tparam I2... is the list of index from [0 to sizeof...(t2))
+/// \param t1 is the tuple on which we want to append t2.
+/// \param t2 is the tuple that is going to be added on t1.
+/// \return Tuple<Args1..., Args2...>
+template <typename... Args1, typename... Args2, size_t... I1, size_t... I2>
+Tuple<Args1..., Args2...> append_impl(utility::tuple::Tuple<Args1...> t1,
+ utility::tuple::Tuple<Args2...> t2,
+ utility::tuple::Index_list<I1...>,
+ utility::tuple::Index_list<I2...>) {
+ return utility::tuple::make_tuple(utility::tuple::get<I1>(t1)...,
+ utility::tuple::get<I2>(t2)...);
+}
+/// append
+/// \brief deduction function for \ref append_impl when we are appending tuple
+/// t1 by tuple t2. In this case the \ref Index_range for both tuple are
+/// automatically generated.
+/// \tparam Args1... the type of the elements inside the tuple t1
+/// \tparam Args2... the type of the elements inside the tuple t2
+/// \param t1 is the tuple on which we want to append t2.
+/// \param t2 is the tuple that is going to be added on t1.
+/// \return Tuple<Args1..., Args2...>
+template <typename... Args1, typename... Args2>
+Tuple<Args1..., Args2...> append(utility::tuple::Tuple<Args1...> t1,
+ utility::tuple::Tuple<Args2...> t2) {
+ return utility::tuple::append_impl(
+ t1, t2, utility::tuple::Index_range<0, sizeof...(Args1)>(),
+ utility::tuple::Index_range<0, sizeof...(Args2)>());
+}
+} // tuple
+} // utility
+#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORSYCL_TUPLE_HPP
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
index b7597b3a5..62c5caf6c 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
@@ -56,11 +56,12 @@ struct traits<Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
Options = Options_,
Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0 : LvalueBit)
};
+ template <class T> using MakePointer = MakePointer<T>;
};
-template<typename Scalar_, typename Dimensions, int Options_, typename IndexType_>
-struct traits<TensorFixedSize<Scalar_, Dimensions, Options_, IndexType_> >
+template<typename Scalar_, typename Dimensions, int Options_, typename IndexType_, template <class> class MakePointer_>
+struct traits<TensorFixedSize<Scalar_, Dimensions, Options_, IndexType_, MakePointer_> >
{
typedef Scalar_ Scalar;
typedef Dense StorageKind;
@@ -71,11 +72,12 @@ struct traits<TensorFixedSize<Scalar_, Dimensions, Options_, IndexType_> >
Options = Options_,
Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0: LvalueBit)
};
+ template <class T> using MakePointer = MakePointer_<T>;
};
-template<typename PlainObjectType, int Options_>
-struct traits<TensorMap<PlainObjectType, Options_> >
+template<typename PlainObjectType, int Options_ , template <class> class MakePointer_>
+struct traits<TensorMap<PlainObjectType, Options_ , MakePointer_> >
: public traits<PlainObjectType>
{
typedef traits<PlainObjectType> BaseTraits;
@@ -88,6 +90,7 @@ struct traits<TensorMap<PlainObjectType, Options_> >
Options = Options_,
Flags = BaseTraits::Flags
};
+ template <class T> using MakePointer = MakePointer_<T>;
};
template<typename PlainObjectType>
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index 6188b421a..de9b5243a 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -138,6 +138,13 @@ endif()
endif()
if(EIGEN_TEST_CXX11)
+ if(EIGEN_TEST_SYCL)
+ ei_add_test_sycl(cxx11_tensor_sycl "-std=c++11")
+ ei_add_test_sycl(cxx11_tensor_sycl_forced_eval "-std=c++11")
+ ei_add_test_sycl(cxx11_tensor_sycl_broadcast "-std=c++11")
+ ei_add_test_sycl(cxx11_tensor_sycl_device "-std=c++11")
+ endif(EIGEN_TEST_SYCL)
+
# It should be safe to always run these tests as there is some fallback code for
# older compiler that don't support cxx11.
set(CMAKE_CXX_STANDARD 11)
diff --git a/unsupported/test/cxx11_tensor_sycl.cpp b/unsupported/test/cxx11_tensor_sycl.cpp
new file mode 100644
index 000000000..1ec9b1883
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_sycl.cpp
@@ -0,0 +1,157 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_sycl
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_SYCL
+
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::array;
+using Eigen::SyclDevice;
+using Eigen::Tensor;
+using Eigen::TensorMap;
+
+// Types used in tests:
+using TestTensor = Tensor<float, 3>;
+using TestTensorMap = TensorMap<Tensor<float, 3>>;
+
+void test_sycl_cpu() {
+ cl::sycl::gpu_selector s;
+ cl::sycl::queue q(s, [=](cl::sycl::exception_list l) {
+ for (const auto& e : l) {
+ try {
+ std::rethrow_exception(e);
+ } catch (cl::sycl::exception e) {
+ std::cout << e.what() << std::endl;
+ }
+ }
+ });
+ SyclDevice sycl_device(q);
+
+ int sizeDim1 = 100;
+ int sizeDim2 = 100;
+ int sizeDim3 = 100;
+ array<int, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
+ TestTensor in1(tensorRange);
+ TestTensor in2(tensorRange);
+ TestTensor in3(tensorRange);
+ TestTensor out(tensorRange);
+ in1 = in1.random();
+ in2 = in2.random();
+ in3 = in3.random();
+ TestTensorMap gpu_in1(in1.data(), tensorRange);
+ TestTensorMap gpu_in2(in2.data(), tensorRange);
+ TestTensorMap gpu_in3(in3.data(), tensorRange);
+ TestTensorMap gpu_out(out.data(), tensorRange);
+
+ /// a=1.2f
+ gpu_in1.device(sycl_device) = gpu_in1.constant(1.2f);
+ sycl_device.deallocate(in1.data());
+ for (int i = 0; i < sizeDim1; ++i) {
+ for (int j = 0; j < sizeDim2; ++j) {
+ for (int k = 0; k < sizeDim3; ++k) {
+ VERIFY_IS_APPROX(in1(i,j,k), 1.2f);
+ }
+ }
+ }
+ printf("a=1.2f Test passed\n");
+
+ /// a=b*1.2f
+ gpu_out.device(sycl_device) = gpu_in1 * 1.2f;
+ sycl_device.deallocate(out.data());
+ for (int i = 0; i < sizeDim1; ++i) {
+ for (int j = 0; j < sizeDim2; ++j) {
+ for (int k = 0; k < sizeDim3; ++k) {
+ VERIFY_IS_APPROX(out(i,j,k),
+ in1(i,j,k) * 1.2f);
+ }
+ }
+ }
+ printf("a=b*1.2f Test Passed\n");
+
+ /// c=a*b
+ gpu_out.device(sycl_device) = gpu_in1 * gpu_in2;
+ sycl_device.deallocate(out.data());
+ for (int i = 0; i < sizeDim1; ++i) {
+ for (int j = 0; j < sizeDim2; ++j) {
+ for (int k = 0; k < sizeDim3; ++k) {
+ VERIFY_IS_APPROX(out(i,j,k),
+ in1(i,j,k) *
+ in2(i,j,k));
+ }
+ }
+ }
+ printf("c=a*b Test Passed\n");
+
+ /// c=a+b
+ gpu_out.device(sycl_device) = gpu_in1 + gpu_in2;
+ sycl_device.deallocate(out.data());
+ for (int i = 0; i < sizeDim1; ++i) {
+ for (int j = 0; j < sizeDim2; ++j) {
+ for (int k = 0; k < sizeDim3; ++k) {
+ VERIFY_IS_APPROX(out(i,j,k),
+ in1(i,j,k) +
+ in2(i,j,k));
+ }
+ }
+ }
+ printf("c=a+b Test Passed\n");
+
+ /// c=a*a
+ gpu_out.device(sycl_device) = gpu_in1 * gpu_in1;
+ sycl_device.deallocate(out.data());
+ for (int i = 0; i < sizeDim1; ++i) {
+ for (int j = 0; j < sizeDim2; ++j) {
+ for (int k = 0; k < sizeDim3; ++k) {
+ VERIFY_IS_APPROX(out(i,j,k),
+ in1(i,j,k) *
+ in1(i,j,k));
+ }
+ }
+ }
+
+ printf("c= a*a Test Passed\n");
+
+ //a*3.14f + b*2.7f
+ gpu_out.device(sycl_device) = gpu_in1 * gpu_in1.constant(3.14f) + gpu_in2 * gpu_in2.constant(2.7f);
+ sycl_device.deallocate(out.data());
+ for (int i = 0; i < sizeDim1; ++i) {
+ for (int j = 0; j < sizeDim2; ++j) {
+ for (int k = 0; k < sizeDim3; ++k) {
+ VERIFY_IS_APPROX(out(i,j,k),
+ in1(i,j,k) * 3.14f
+ + in2(i,j,k) * 2.7f);
+ }
+ }
+ }
+ printf("a*3.14f + b*2.7f Test Passed\n");
+
+ ///d= (a>0.5? b:c)
+ gpu_out.device(sycl_device) =(gpu_in1 > gpu_in1.constant(0.5f)).select(gpu_in2, gpu_in3);
+ sycl_device.deallocate(out.data());
+ for (int i = 0; i < sizeDim1; ++i) {
+ for (int j = 0; j < sizeDim2; ++j) {
+ for (int k = 0; k < sizeDim3; ++k) {
+ VERIFY_IS_APPROX(out(i, j, k), (in1(i, j, k) > 0.5f)
+ ? in2(i, j, k)
+ : in3(i, j, k));
+ }
+ }
+ }
+ printf("d= (a>0.5? b:c) Test Passed\n");
+
+}
+void test_cxx11_tensor_sycl() {
+ CALL_SUBTEST(test_sycl_cpu());
+}
diff --git a/unsupported/test/cxx11_tensor_sycl_broadcast.cpp b/unsupported/test/cxx11_tensor_sycl_broadcast.cpp
new file mode 100644
index 000000000..1babbc038
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_sycl_broadcast.cpp
@@ -0,0 +1,76 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_sycl_broadcast
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_SYCL
+
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::array;
+using Eigen::SyclDevice;
+using Eigen::Tensor;
+using Eigen::TensorMap;
+
+// Types used in tests:
+using TestTensor = Tensor<float, 3>;
+using TestTensorMap = TensorMap<Tensor<float, 3>>;
+static void test_sycl_broadcast(){
+
+ cl::sycl::gpu_selector s;
+ cl::sycl::queue q(s, [=](cl::sycl::exception_list l) {
+ for (const auto& e : l) {
+ try {
+ std::rethrow_exception(e);
+ } catch (cl::sycl::exception e) {
+ std::cout << e.what() << std::endl;
+ }
+ }
+ });
+ SyclDevice sycl_device(q);
+ // BROADCAST test:
+ array<int, 4> in_range = {{2, 3, 5, 7}};
+ array<int, in_range.size()> broadcasts = {{2, 3, 1, 4}};
+ array<int, in_range.size()> out_range; // = in_range * broadcasts
+ for (size_t i = 0; i < out_range.size(); ++i)
+ out_range[i] = in_range[i] * broadcasts[i];
+
+ Tensor<float, in_range.size()> input(in_range);
+ Tensor<float, out_range.size()> output(out_range);
+
+ for (int i = 0; i < input.size(); ++i)
+ input(i) = static_cast<float>(i);
+
+ TensorMap<decltype(input)> gpu_in(input.data(), in_range);
+ TensorMap<decltype(output)> gpu_out(output.data(), out_range);
+ gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts);
+ sycl_device.deallocate(output.data());
+
+ for (size_t i = 0; i < in_range.size(); ++i)
+ VERIFY_IS_EQUAL(output.dimension(i), out_range[i]);
+
+ for (int i = 0; i < 4; ++i) {
+ for (int j = 0; j < 9; ++j) {
+ for (int k = 0; k < 5; ++k) {
+ for (int l = 0; l < 28; ++l) {
+ VERIFY_IS_APPROX(input(i%2,j%3,k%5,l%7), output(i,j,k,l));
+ }
+ }
+ }
+ }
+ printf("Broadcast Test Passed\n");
+}
+
+void test_cxx11_tensor_sycl_broadcast() {
+ CALL_SUBTEST(test_sycl_broadcast());
+}
diff --git a/unsupported/test/cxx11_tensor_sycl_device.cpp b/unsupported/test/cxx11_tensor_sycl_device.cpp
new file mode 100644
index 000000000..2c1c17972
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_sycl_device.cpp
@@ -0,0 +1,37 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_sycl_device
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_SYCL
+
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+
+void test_sycl_device() {
+ cl::sycl::gpu_selector s;
+ cl::sycl::queue q(s, [=](cl::sycl::exception_list l) {
+ for (const auto& e : l) {
+ try {
+ std::rethrow_exception(e);
+ } catch (cl::sycl::exception e) {
+ std::cout << e.what() << std::endl;
+ }
+ }
+ });
+ SyclDevice sycl_device(q);
+ printf("Helo from ComputeCpp: Device Exists\n");
+}
+void test_cxx11_tensor_sycl_device() {
+ CALL_SUBTEST(test_sycl_device());
+}
diff --git a/unsupported/test/cxx11_tensor_sycl_forced_eval.cpp b/unsupported/test/cxx11_tensor_sycl_forced_eval.cpp
new file mode 100644
index 000000000..ee934d4fa
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_sycl_forced_eval.cpp
@@ -0,0 +1,64 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_sycl_forced_eval
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_SYCL
+
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+void test_sycl_gpu() {
+ cl::sycl::gpu_selector s;
+ cl::sycl::queue q(s, [=](cl::sycl::exception_list l) {
+ for (const auto& e : l) {
+ try {
+ std::rethrow_exception(e);
+ } catch (cl::sycl::exception e) {
+ std::cout << e.what() << std::endl;
+ }
+ }
+ });
+ SyclDevice sycl_device(q);
+
+ int sizeDim1 = 100;
+ int sizeDim2 = 200;
+ int sizeDim3 = 200;
+ Eigen::array<int, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
+ Eigen::Tensor<float, 3> in1(tensorRange);
+ Eigen::Tensor<float, 3> in2(tensorRange);
+ Eigen::Tensor<float, 3> out(tensorRange);
+
+ in1 = in1.random() + in1.constant(10.0f);
+ in2 = in2.random() + in2.constant(10.0f);
+
+ // creating TensorMap from tensor
+ Eigen::TensorMap<Eigen::Tensor<float, 3>> gpu_in1(in1.data(), tensorRange);
+ Eigen::TensorMap<Eigen::Tensor<float, 3>> gpu_in2(in2.data(), tensorRange);
+ Eigen::TensorMap<Eigen::Tensor<float, 3>> gpu_out(out.data(), tensorRange);
+
+ /// c=(a+b)*b
+ gpu_out.device(sycl_device) =(gpu_in1 + gpu_in2).eval() * gpu_in2;
+ sycl_device.deallocate(out.data());
+ for (int i = 0; i < sizeDim1; ++i) {
+ for (int j = 0; j < sizeDim2; ++j) {
+ for (int k = 0; k < sizeDim3; ++k) {
+ VERIFY_IS_APPROX(out(i, j, k),
+ (in1(i, j, k) + in2(i, j, k)) * in2(i, j, k));
+ }
+ }
+ }
+ printf("(a+b)*b Test Passed\n");
+}
+
+void test_cxx11_tensor_sycl_forced_eval() { CALL_SUBTEST(test_sycl_gpu()); }