From e46e7223817cfd982edec6d8e25c77e8e2493d78 Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Mon, 16 Jan 2017 13:58:49 +0000 Subject: Adding Tensor ReverseOp; TensorStriding; TensorConversionOp; Modifying Tensor Contractsycl to be located in any place in the expression tree. --- unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h index 319417687..82ca71215 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h @@ -74,7 +74,7 @@ struct FullReducer { static void run(const Self& self, Op& reducer, const Eigen::SyclDevice& dev, CoeffReturnType* output) { typedef const typename Self::ChildType HostExpr; /// this is the child of reduction - typedef decltype(TensorSycl::internal::extractFunctors(self.impl())) FunctorExpr; + typedef Eigen::TensorSycl::internal::FunctorExtractor > FunctorExpr; FunctorExpr functors = TensorSycl::internal::extractFunctors(self.impl()); int red_factor =256; /// initial reduction. If the size is less than red_factor we only creates one thread. size_t inputSize =self.impl().dimensions().TotalSize(); @@ -108,9 +108,10 @@ struct FullReducer { // Dims dims= self.xprDims(); //Op functor = reducer; dev.sycl_queue().submit([&](cl::sycl::handler &cgh) { + // this is a work around for gcc bug + typedef decltype(TensorSycl::internal::createTupleOfAccessors(cgh, self.impl())) TupleType; // create a tuple of accessors from Evaluator - auto tuple_of_accessors = TensorSycl::internal::createTupleOfAccessors(cgh, self.impl()); - typedef decltype(tuple_of_accessors) TupleType; + TupleType tuple_of_accessors = TensorSycl::internal::createTupleOfAccessors(cgh, self.impl()); auto tmp_global_accessor = temp_global_buffer. template get_access(cgh); typedef decltype(tmp_global_accessor) OutAccessor; cgh.parallel_for( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(outTileSize)), @@ -136,7 +137,7 @@ struct InnerReducer { static bool run(const Self& self, Op& reducer, const Eigen::SyclDevice& dev, CoeffReturnType* output, typename Self::Index , typename Self::Index num_coeffs_to_preserve) { typedef const typename Self::ChildType HostExpr; /// this is the child of reduction - typedef decltype(TensorSycl::internal::extractFunctors(self.impl())) FunctorExpr; + typedef Eigen::TensorSycl::internal::FunctorExtractor > FunctorExpr; FunctorExpr functors = TensorSycl::internal::extractFunctors(self.impl()); typename Self::Index range, GRange, tileSize; typedef typename Eigen::internal::remove_all::type Dims; @@ -147,9 +148,10 @@ struct InnerReducer { /// recursively apply reduction on it in order to reduce the whole. dev.parallel_for_setup(num_coeffs_to_preserve, tileSize, range, GRange); dev.sycl_queue().submit([&](cl::sycl::handler &cgh) { + // this is work around for gcc bug. + typedef decltype(TensorSycl::internal::createTupleOfAccessors(cgh, self.impl())) Tuple_of_Acc; // create a tuple of accessors from Evaluator - auto tuple_of_accessors = TensorSycl::internal::createTupleOfAccessors(cgh, self.impl()); - typedef typename Eigen::internal::remove_all::type Tuple_of_Acc; + Tuple_of_Acc tuple_of_accessors = TensorSycl::internal::createTupleOfAccessors(cgh, self.impl()); auto output_accessor = dev.template get_sycl_accessor(cgh, output); cgh.parallel_for( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), -- cgit v1.2.3