From d0574f6b25ab01052e093ab92612520a7e4ada8d Mon Sep 17 00:00:00 2001
From: Matt Conley <mconley@nvidia.com>
Date: Thu, 6 Sep 2018 08:22:37 -0700
Subject: Fixed clang formatting

---
 tensorflow/stream_executor/cuda/cuda_gpu_executor.cc | 17 +++++++++--------
 tensorflow/stream_executor/cuda/cuda_gpu_executor.h  | 12 ++++++------
 2 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'tensorflow/stream_executor')

diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index ce2f1ce3ae..ef84d01a94 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -493,10 +493,10 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel,
 // Compute and return maximum blocks per core (occupancy) based on the
 // device description, some kernel characteristics and the number of threads per
 // block.  If unable to compute occupancy, zero is returned.
-int CUDAExecutor::CalculateOccupancy(const DeviceDescription& device_description,
-                       uint64 registers_per_thread,
-                       uint64 shared_memory_per_block,
-                       const ThreadDim& thread_dims, CUfunction func) {
+int CUDAExecutor::CalculateOccupancy(
+    const DeviceDescription& device_description, uint64 registers_per_thread,
+    uint64 shared_memory_per_block, const ThreadDim& thread_dims,
+    CUfunction func) {
   int suggested_blocks = 0;
   int suggested_threads = 0;
   CUresult err =
@@ -509,10 +509,11 @@ int CUDAExecutor::CalculateOccupancy(const DeviceDescription& device_description
 // Compute and return the suggested thread count to acheive ideal occupancy.
 // If the provided thread dimensions match this number, zero is returned.
 int CUDAExecutor::CompareOccupancy(int* initial_blocks,
-                     const DeviceDescription& device_description,
-                     uint64 registers_per_thread,
-                     uint64 shared_memory_per_block,
-                     const ThreadDim& thread_dims, CUfunction func) {
+                                   const DeviceDescription& device_description,
+                                   uint64 registers_per_thread,
+                                   uint64 shared_memory_per_block,
+                                   const ThreadDim& thread_dims,
+                                   CUfunction func) {
   int suggested_blocks = 0;
   int suggested_threads = 0;
   CUresult err =
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
index e8ebbc3220..1481dcc19a 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
@@ -71,16 +71,16 @@ class CUDAExecutor : public internal::StreamExecutorInterface {
               const KernelArgsArrayBase &args) override;
 
   int CalculateOccupancy(const DeviceDescription& device_description,
+                         uint64 registers_per_thread,
+                         uint64 shared_memory_per_block,
+                         const ThreadDim& thread_dims, CUfunction func);
+
+  int CompareOccupancy(int* initial_blocks,
+                       const DeviceDescription& device_description,
                        uint64 registers_per_thread,
                        uint64 shared_memory_per_block,
                        const ThreadDim& thread_dims, CUfunction func);
 
-  int CompareOccupancy(int* initial_blocks,
-                     const DeviceDescription& device_description,
-                     uint64 registers_per_thread,
-                     uint64 shared_memory_per_block,
-                     const ThreadDim& thread_dims, CUfunction func);
-
   void *Allocate(uint64 size) override;
 
   void *AllocateSubBuffer(DeviceMemoryBase *mem, uint64 offset_bytes,
-- 
cgit v1.2.3