From 6a5090b086bc9d665eb9e65f05eb94cdb58baaa2 Mon Sep 17 00:00:00 2001 From: Matt Conley Date: Thu, 6 Sep 2018 13:09:12 -0700 Subject: Fully fixed clang errors --- tensorflow/stream_executor/cuda/cuda_gpu_executor.cc | 12 ++++++------ tensorflow/stream_executor/cuda/cuda_gpu_executor.h | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'tensorflow/stream_executor') diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc index ef84d01a94..9d5bcc7f77 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc @@ -472,7 +472,7 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel, const DeviceDescription &device_description = kernel.parent()->GetDeviceDescription(); - const CUDAKernel* cuda_kernel = AsCUDAKernel(&kernel); + const CUDAKernel *cuda_kernel = AsCUDAKernel(&kernel); CUfunction cufunc = cuda_kernel->AsCUDAFunctionValue(); int blocks_per_sm = CalculateOccupancy(device_description, regs_per_thread, @@ -494,8 +494,8 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel, // device description, some kernel characteristics and the number of threads per // block. If unable to compute occupancy, zero is returned. int CUDAExecutor::CalculateOccupancy( - const DeviceDescription& device_description, uint64 registers_per_thread, - uint64 shared_memory_per_block, const ThreadDim& thread_dims, + const DeviceDescription &device_description, uint64 registers_per_thread, + uint64 shared_memory_per_block, const ThreadDim &thread_dims, CUfunction func) { int suggested_blocks = 0; int suggested_threads = 0; @@ -508,11 +508,11 @@ int CUDAExecutor::CalculateOccupancy( // Compute and return the suggested thread count to acheive ideal occupancy. // If the provided thread dimensions match this number, zero is returned. -int CUDAExecutor::CompareOccupancy(int* initial_blocks, - const DeviceDescription& device_description, +int CUDAExecutor::CompareOccupancy(int *initial_blocks, + const DeviceDescription &device_description, uint64 registers_per_thread, uint64 shared_memory_per_block, - const ThreadDim& thread_dims, + const ThreadDim &thread_dims, CUfunction func) { int suggested_blocks = 0; int suggested_threads = 0; diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h index 1481dcc19a..53b2a29ae7 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h @@ -70,16 +70,16 @@ class CUDAExecutor : public internal::StreamExecutorInterface { const BlockDim &block_dims, const KernelBase &k, const KernelArgsArrayBase &args) override; - int CalculateOccupancy(const DeviceDescription& device_description, + int CalculateOccupancy(const DeviceDescription &device_description, uint64 registers_per_thread, uint64 shared_memory_per_block, - const ThreadDim& thread_dims, CUfunction func); + const ThreadDim &thread_dims, CUfunction func); - int CompareOccupancy(int* initial_blocks, - const DeviceDescription& device_description, + int CompareOccupancy(int *initial_blocks, + const DeviceDescription &device_description, uint64 registers_per_thread, uint64 shared_memory_per_block, - const ThreadDim& thread_dims, CUfunction func); + const ThreadDim &thread_dims, CUfunction func); void *Allocate(uint64 size) override; -- cgit v1.2.3