aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor
diff options
context:
space:
mode:
authorGravatar Matt Conley <mconley@nvidia.com>2018-09-06 13:09:12 -0700
committerGravatar Matt Conley <mconley@nvidia.com>2018-09-06 13:09:12 -0700
commit6a5090b086bc9d665eb9e65f05eb94cdb58baaa2 (patch)
tree37ff8db78c6248765c4b91fd5081dedea3ae2449 /tensorflow/stream_executor
parentd0574f6b25ab01052e093ab92612520a7e4ada8d (diff)
Fully fixed clang errors
Diffstat (limited to 'tensorflow/stream_executor')
-rw-r--r--tensorflow/stream_executor/cuda/cuda_gpu_executor.cc12
-rw-r--r--tensorflow/stream_executor/cuda/cuda_gpu_executor.h10
2 files changed, 11 insertions, 11 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index ef84d01a94..9d5bcc7f77 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -472,7 +472,7 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel,
const DeviceDescription &device_description =
kernel.parent()->GetDeviceDescription();
- const CUDAKernel* cuda_kernel = AsCUDAKernel(&kernel);
+ const CUDAKernel *cuda_kernel = AsCUDAKernel(&kernel);
CUfunction cufunc = cuda_kernel->AsCUDAFunctionValue();
int blocks_per_sm = CalculateOccupancy(device_description, regs_per_thread,
@@ -494,8 +494,8 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel,
// device description, some kernel characteristics and the number of threads per
// block. If unable to compute occupancy, zero is returned.
int CUDAExecutor::CalculateOccupancy(
- const DeviceDescription& device_description, uint64 registers_per_thread,
- uint64 shared_memory_per_block, const ThreadDim& thread_dims,
+ const DeviceDescription &device_description, uint64 registers_per_thread,
+ uint64 shared_memory_per_block, const ThreadDim &thread_dims,
CUfunction func) {
int suggested_blocks = 0;
int suggested_threads = 0;
@@ -508,11 +508,11 @@ int CUDAExecutor::CalculateOccupancy(
// Compute and return the suggested thread count to acheive ideal occupancy.
// If the provided thread dimensions match this number, zero is returned.
-int CUDAExecutor::CompareOccupancy(int* initial_blocks,
- const DeviceDescription& device_description,
+int CUDAExecutor::CompareOccupancy(int *initial_blocks,
+ const DeviceDescription &device_description,
uint64 registers_per_thread,
uint64 shared_memory_per_block,
- const ThreadDim& thread_dims,
+ const ThreadDim &thread_dims,
CUfunction func) {
int suggested_blocks = 0;
int suggested_threads = 0;
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
index 1481dcc19a..53b2a29ae7 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
@@ -70,16 +70,16 @@ class CUDAExecutor : public internal::StreamExecutorInterface {
const BlockDim &block_dims, const KernelBase &k,
const KernelArgsArrayBase &args) override;
- int CalculateOccupancy(const DeviceDescription& device_description,
+ int CalculateOccupancy(const DeviceDescription &device_description,
uint64 registers_per_thread,
uint64 shared_memory_per_block,
- const ThreadDim& thread_dims, CUfunction func);
+ const ThreadDim &thread_dims, CUfunction func);
- int CompareOccupancy(int* initial_blocks,
- const DeviceDescription& device_description,
+ int CompareOccupancy(int *initial_blocks,
+ const DeviceDescription &device_description,
uint64 registers_per_thread,
uint64 shared_memory_per_block,
- const ThreadDim& thread_dims, CUfunction func);
+ const ThreadDim &thread_dims, CUfunction func);
void *Allocate(uint64 size) override;