diff options
Diffstat (limited to 'tensorflow/stream_executor/cuda/cuda_gpu_executor.cc')
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_gpu_executor.cc | 17 |
1 files changed, 9 insertions, 8 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc index ce2f1ce3ae..ef84d01a94 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc @@ -493,10 +493,10 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel, // Compute and return maximum blocks per core (occupancy) based on the // device description, some kernel characteristics and the number of threads per // block. If unable to compute occupancy, zero is returned. -int CUDAExecutor::CalculateOccupancy(const DeviceDescription& device_description, - uint64 registers_per_thread, - uint64 shared_memory_per_block, - const ThreadDim& thread_dims, CUfunction func) { +int CUDAExecutor::CalculateOccupancy( + const DeviceDescription& device_description, uint64 registers_per_thread, + uint64 shared_memory_per_block, const ThreadDim& thread_dims, + CUfunction func) { int suggested_blocks = 0; int suggested_threads = 0; CUresult err = @@ -509,10 +509,11 @@ int CUDAExecutor::CalculateOccupancy(const DeviceDescription& device_description // Compute and return the suggested thread count to acheive ideal occupancy. // If the provided thread dimensions match this number, zero is returned. int CUDAExecutor::CompareOccupancy(int* initial_blocks, - const DeviceDescription& device_description, - uint64 registers_per_thread, - uint64 shared_memory_per_block, - const ThreadDim& thread_dims, CUfunction func) { + const DeviceDescription& device_description, + uint64 registers_per_thread, + uint64 shared_memory_per_block, + const ThreadDim& thread_dims, + CUfunction func) { int suggested_blocks = 0; int suggested_threads = 0; CUresult err = |