aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor
diff options
context:
space:
mode:
authorGravatar Matt Conley <mconley@nvidia.com>2018-09-06 08:22:37 -0700
committerGravatar Matt Conley <mconley@nvidia.com>2018-09-06 08:22:37 -0700
commitd0574f6b25ab01052e093ab92612520a7e4ada8d (patch)
tree89ba73228de937a3ff8ba0128833cc566c89ab4c /tensorflow/stream_executor
parent475b7715f16ad0f94fa9986a0eefc1b2cf2044bd (diff)
Fixed clang formatting
Diffstat (limited to 'tensorflow/stream_executor')
-rw-r--r--tensorflow/stream_executor/cuda/cuda_gpu_executor.cc17
-rw-r--r--tensorflow/stream_executor/cuda/cuda_gpu_executor.h12
2 files changed, 15 insertions, 14 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index ce2f1ce3ae..ef84d01a94 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -493,10 +493,10 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel,
// Compute and return maximum blocks per core (occupancy) based on the
// device description, some kernel characteristics and the number of threads per
// block. If unable to compute occupancy, zero is returned.
-int CUDAExecutor::CalculateOccupancy(const DeviceDescription& device_description,
- uint64 registers_per_thread,
- uint64 shared_memory_per_block,
- const ThreadDim& thread_dims, CUfunction func) {
+int CUDAExecutor::CalculateOccupancy(
+ const DeviceDescription& device_description, uint64 registers_per_thread,
+ uint64 shared_memory_per_block, const ThreadDim& thread_dims,
+ CUfunction func) {
int suggested_blocks = 0;
int suggested_threads = 0;
CUresult err =
@@ -509,10 +509,11 @@ int CUDAExecutor::CalculateOccupancy(const DeviceDescription& device_description
// Compute and return the suggested thread count to acheive ideal occupancy.
// If the provided thread dimensions match this number, zero is returned.
int CUDAExecutor::CompareOccupancy(int* initial_blocks,
- const DeviceDescription& device_description,
- uint64 registers_per_thread,
- uint64 shared_memory_per_block,
- const ThreadDim& thread_dims, CUfunction func) {
+ const DeviceDescription& device_description,
+ uint64 registers_per_thread,
+ uint64 shared_memory_per_block,
+ const ThreadDim& thread_dims,
+ CUfunction func) {
int suggested_blocks = 0;
int suggested_threads = 0;
CUresult err =
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
index e8ebbc3220..1481dcc19a 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
@@ -71,16 +71,16 @@ class CUDAExecutor : public internal::StreamExecutorInterface {
const KernelArgsArrayBase &args) override;
int CalculateOccupancy(const DeviceDescription& device_description,
+ uint64 registers_per_thread,
+ uint64 shared_memory_per_block,
+ const ThreadDim& thread_dims, CUfunction func);
+
+ int CompareOccupancy(int* initial_blocks,
+ const DeviceDescription& device_description,
uint64 registers_per_thread,
uint64 shared_memory_per_block,
const ThreadDim& thread_dims, CUfunction func);
- int CompareOccupancy(int* initial_blocks,
- const DeviceDescription& device_description,
- uint64 registers_per_thread,
- uint64 shared_memory_per_block,
- const ThreadDim& thread_dims, CUfunction func);
-
void *Allocate(uint64 size) override;
void *AllocateSubBuffer(DeviceMemoryBase *mem, uint64 offset_bytes,