From 2ab603316af7c1bcf1d5e87d9ba50a2589b36e37 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 28 Aug 2015 08:14:15 -0700 Subject: Use numext::mini/numext::maxi instead of std::min/std::max in the tensor code --- .../Eigen/CXX11/src/Tensor/TensorConvolution.h | 48 +++++++++++----------- 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h index 23500e138..a82bfc0aa 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -857,29 +857,29 @@ struct TensorEvaluator)(inner_dim, numX); - const int maxP = (std::min)(maxSharedMem / ((kernel_size - 1 + maxX) * sizeof(Scalar)), numP); - block_size.x = (std::min)(maxThreadsPerBlock, maxX); - block_size.y = (std::min)(maxThreadsPerBlock / block_size.x, maxP); + maxX = numext::mini(inner_dim, numX); + const int maxP = numext::mini(maxSharedMem / ((kernel_size - 1 + maxX) * sizeof(Scalar)), numP); + block_size.x = numext::mini(maxThreadsPerBlock, maxX); + block_size.y = numext::mini(maxThreadsPerBlock / block_size.x, maxP); } else { // Read as much as possible alongside the inner most dimension, that is the plane const int inner_dim = maxSharedMem / ((warpSize + kernel_size) * sizeof(Scalar)); - const int maxP = (std::min)(inner_dim, numP); - maxX = (std::min)(maxSharedMem / (inner_dim * sizeof(Scalar)) - kernel_size + 1, numX); + const int maxP = numext::mini(inner_dim, numP); + maxX = numext::mini(maxSharedMem / (inner_dim * sizeof(Scalar)) - kernel_size + 1, numX); - block_size.x = (std::min)(warpSize, maxX); - block_size.y = (std::min)(maxThreadsPerBlock/block_size.x, maxP); + block_size.x = numext::mini(warpSize, maxX); + block_size.y = numext::mini(maxThreadsPerBlock/block_size.x, maxP); } const int shared_mem = block_size.y * (maxX + kernel_size - 1) * sizeof(Scalar); assert(shared_mem <= maxSharedMem); const int num_x_blocks = ceil(numX, maxX); - const int blocksPerProcessor = (std::min)(maxBlocksPerProcessor, maxSharedMem / shared_mem); + const int blocksPerProcessor = numext::mini(maxBlocksPerProcessor, maxSharedMem / shared_mem); const int num_y_blocks = ceil(numMultiProcessors * blocksPerProcessor, num_x_blocks); - dim3 num_blocks(num_x_blocks, std::min(num_y_blocks, ceil(numP, block_size.y))); + dim3 num_blocks(num_x_blocks, numext::mini(num_y_blocks, ceil(numP, block_size.y))); //cout << "launching 1D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " maxX: " << maxX << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl; @@ -920,24 +920,24 @@ struct TensorEvaluator(scaling_factor * kernel_size_x) - kernel_size_x + 1 + 32) / 32) * 32; - const int maxX = (std::min)(inner_dim, numX); - const int maxY = (std::min)(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1)) - kernel_size_y + 1, numY); - const int maxP = (std::min)(maxSharedMem / ((kernel_size_x - 1 + maxX) * (kernel_size_y - 1 + maxY) * sizeof(Scalar)), numP); + const int maxX = numext::mini(inner_dim, numX); + const int maxY = numext::mini(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1)) - kernel_size_y + 1, numY); + const int maxP = numext::mini(maxSharedMem / ((kernel_size_x - 1 + maxX) * (kernel_size_y - 1 + maxY) * sizeof(Scalar)), numP); dim3 block_size; - block_size.x = (std::min)(1024, maxX); - block_size.y = (std::min)(1024/block_size.x, maxY); - block_size.z = (std::min)(1024/(block_size.x*block_size.y), maxP); + block_size.x = numext::mini(1024, maxX); + block_size.y = numext::mini(1024/block_size.x, maxY); + block_size.z = numext::mini(1024/(block_size.x*block_size.y), maxP); const int shared_mem = block_size.z * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * sizeof(Scalar); assert(shared_mem <= maxSharedMem); const int num_x_blocks = ceil(numX, maxX); const int num_y_blocks = ceil(numY, maxY); - const int blocksPerProcessor = (std::min)(maxBlocksPerProcessor, maxSharedMem / shared_mem); + const int blocksPerProcessor = numext::mini(maxBlocksPerProcessor, maxSharedMem / shared_mem); const int num_z_blocks = ceil(numMultiProcessors * blocksPerProcessor, num_x_blocks * num_y_blocks); - dim3 num_blocks(num_x_blocks, num_y_blocks, std::min(num_z_blocks, ceil(numP, block_size.z))); + dim3 num_blocks(num_x_blocks, num_y_blocks, numext::mini(num_z_blocks, ceil(numP, block_size.z))); //cout << "launching 2D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " maxX: " << maxX << " maxY: " << maxY << " maxP: " << maxP << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl; @@ -999,14 +999,14 @@ struct TensorEvaluator)(128, (std::min)(maxSharedMem / (sizeof(Scalar) * kernel_size_y * kernel_size_z) - kernel_size_x + 1, numX)); - const int maxY = (std::min)(128, (std::min)(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1) * kernel_size_z) - kernel_size_y + 1, numY)); - const int maxZ = (std::min)(128, (std::min)(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1)) - kernel_size_z + 1, numZ)); + const int maxX = numext::mini(128, numext::mini(maxSharedMem / (sizeof(Scalar) * kernel_size_y * kernel_size_z) - kernel_size_x + 1, numX)); + const int maxY = numext::mini(128, numext::mini(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1) * kernel_size_z) - kernel_size_y + 1, numY)); + const int maxZ = numext::mini(128, numext::mini(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1)) - kernel_size_z + 1, numZ)); dim3 block_size; - block_size.x = (std::min)(32, maxX); - block_size.y = (std::min)(32, maxY); - block_size.z = (std::min)(1024/(block_size.x*block_size.y), maxZ); + block_size.x = numext::mini(32, maxX); + block_size.y = numext::mini(32, maxY); + block_size.z = numext::mini(1024/(block_size.x*block_size.y), maxZ); dim3 num_blocks(ceil(numX, maxX), ceil(numY, maxY), ceil(numZ, maxZ)); const int shared_mem = (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * (maxZ + kernel_size_z - 1) * sizeof(Scalar); -- cgit v1.2.3