From bab29936a1cf0a68ffe4ccb1fd9b4807a3ec87ae Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Wed, 1 Feb 2017 15:29:53 +0000 Subject: Reducing warnings in Sycl backend. --- .../Eigen/CXX11/src/Tensor/TensorContractionSycl.h | 115 ++++++++++----------- 1 file changed, 57 insertions(+), 58 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h index dc16f89e0..e87de0c57 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h @@ -22,7 +22,7 @@ #define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_SYCL_H namespace Eigen { -template struct LaunchSyclKernels; +template struct LaunchSyclKernels; template struct TensorEvaluator, const Eigen::SyclDevice> : public TensorContractionEvaluatorBase, const Eigen::SyclDevice> > { @@ -146,7 +146,7 @@ struct TensorEvaluatorm_device.memset(buffer, 0, m * n * sizeof(Scalar)); - LaunchSyclKernels::Run(*this, buffer, m, n, k, + LaunchSyclKernels::Run(*this, buffer, m, n, k, this->m_k_strides, this->m_left_contracting_strides, this->m_right_contracting_strides, this->m_i_strides, this->m_j_strides, this->m_left_nocontract_strides, this->m_right_nocontract_strides); } @@ -162,8 +162,8 @@ struct TensorEvaluator struct KernelConstructor{ +typename HostExpr::Index TileSizeDimM, typename HostExpr::Index TileSizeDimN,typename HostExpr::Index TileSizeDimK, typename HostExpr::Index WorkLoadPerThreadM,typename HostExpr::Index WorkLoadPerThreadN, +typename HostExpr::Index LocalThreadSizeM, typename HostExpr::Index LocalThreadSizeN, typename HostExpr::Index LoadPerThreadLhs, typename HostExpr::Index LoadPerThreadRhs, typename LHSTupleType, typename RHSTupleType, typename Device> struct KernelConstructor{ typedef typename Eigen::internal::traits::_LhsNested LHSHostExpr; typedef typename Eigen::internal::traits::_RhsNested RHSHostExpr; typedef typename Eigen::TensorSycl::internal::createPlaceHolderExpression::Type LHSPlaceHolderExpr; @@ -224,84 +224,83 @@ int LocalThreadSizeM, int LocalThreadSizeN, int LoadPerThreadLhs, int LoadPerThr auto out_ptr = ConvertToActualTypeSycl(OutScalar, out_res); // Matmul Kernel // Thread identifiers - const int mLocalThreadId = itemID.get_local(0); // Local ID row - const int nLocalThreadId = itemID.get_local(1); // Local ID col - const int mGroupId = itemID.get_group(0); // Work-group ID row - const int nGroupId = itemID.get_group(1); // Work-group ID localCol - const int linearLocalThreadId = nLocalThreadId*LocalThreadSizeM + mLocalThreadId; // linear local thread ID + const Index mLocalThreadId = itemID.get_local(0); // Local ID row + const Index nLocalThreadId = itemID.get_local(1); // Local ID col + const Index mGroupId = itemID.get_group(0); // Work-group ID row + const Index nGroupId = itemID.get_group(1); // Work-group ID localCol + const Index linearLocalThreadId = nLocalThreadId*LocalThreadSizeM + mLocalThreadId; // linear local thread ID // Allocate register space float privateLhs; float privateRhs[WorkLoadPerThreadN]; float privateRes[WorkLoadPerThreadM][WorkLoadPerThreadN]; // Initialise the privateResumulation registers - for (int wLPTM=0; wLPTM(0); } // Tile Rhs - for (int lPTR=0; lPTR(0); } // Loop over all tiles - const int numTiles = roundUpK/TileSizeDimK; - int firstHalf=0; + const Index numTiles = roundUpK/TileSizeDimK; + Index firstHalf=0; do { // Synchronise itemID.barrier(cl::sycl::access::fence_space::local_space); // Load the next tile of Lhs and Rhs into local memory - int nextHalf = firstHalf + 1; + Index nextHalf = firstHalf + 1; if (nextHalf < numTiles) { // Tile A - for (int lPTL=0; lPTL(0); } // Tile B - for (int lPTR=0; lPTR(0); } } // Loop over the values of a single tile - for (int k=0; k struct LaunchSyclKernels { - -static const int TileSizeDimM = 32; // Tile size for dimension M -static const int TileSizeDimN = 32; // Tile size for dimension N -static const int TileSizeDimK = 16; // Tile size for dimension K -static const int WorkLoadPerThreadM = 4; // Work load per thread in dimension M -static const int WorkLoadPerThreadN = 4; // work load per thread in dimension N -static const int LocalThreadSizeM = (TileSizeDimM/WorkLoadPerThreadM); // Local thread size for the first dimension (M here) -static const int LocalThreadSizeN = (TileSizeDimN/WorkLoadPerThreadN); // Local thread size for the second dimension (N here) -static const int LoadPerThreadLhs = ((TileSizeDimK*WorkLoadPerThreadM*WorkLoadPerThreadN)/(TileSizeDimN)); // workload per thread for Lhs expression -static const int LoadPerThreadRhs = ((TileSizeDimK*WorkLoadPerThreadM*WorkLoadPerThreadN)/(TileSizeDimM)); // workload per thread for Rhs expression +template struct LaunchSyclKernels { + +static const Index TileSizeDimM = 32ul; // Tile size for dimension M +static const Index TileSizeDimN = 32ul; // Tile size for dimension N +static const Index TileSizeDimK = 16ul; // Tile size for dimension K +static const Index WorkLoadPerThreadM = 4ul; // Work load per thread in dimension M +static const Index WorkLoadPerThreadN = 4ul; // work load per thread in dimension N +static const Index LocalThreadSizeM = (TileSizeDimM/WorkLoadPerThreadM); // Local thread size for the first dimension (M here) +static const Index LocalThreadSizeN = (TileSizeDimN/WorkLoadPerThreadN); // Local thread size for the second dimension (N here) +static const Index LoadPerThreadLhs = ((TileSizeDimK*WorkLoadPerThreadM*WorkLoadPerThreadN)/(TileSizeDimN)); // workload per thread for Lhs expression +static const Index LoadPerThreadRhs = ((TileSizeDimK*WorkLoadPerThreadM*WorkLoadPerThreadN)/(TileSizeDimM)); // workload per thread for Rhs expression // RoundUp function to make sure that the global threadId is divisable by local threadId -static int RoundUp(int x, int y) { +static Index RoundUp(Index x, Index y) { return ((((x) + (y) - 1) / (y))*(y)); } -template< typename Self, typename OutScalar, typename Index, typename ContractT, typename LeftNocontractT, typename RightNocontractT> +template< typename Self, typename OutScalar, typename ContractT, typename LeftNocontractT, typename RightNocontractT> static void Run(const Self& self, OutScalar* buffer, Index M, Index N, Index K, ContractT m_k_strides, ContractT m_left_contracting_strides, ContractT m_right_contracting_strides, LeftNocontractT m_i_strides, RightNocontractT m_j_strides, LeftNocontractT m_left_nocontract_strides, RightNocontractT m_right_nocontract_strides){ -- cgit v1.2.3