aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/stream_executor/cuda/cuda_gpu_executor.h')
-rw-r--r--tensorflow/stream_executor/cuda/cuda_gpu_executor.h12
1 files changed, 6 insertions, 6 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
index e8ebbc3220..1481dcc19a 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
@@ -71,16 +71,16 @@ class CUDAExecutor : public internal::StreamExecutorInterface {
const KernelArgsArrayBase &args) override;
int CalculateOccupancy(const DeviceDescription& device_description,
+ uint64 registers_per_thread,
+ uint64 shared_memory_per_block,
+ const ThreadDim& thread_dims, CUfunction func);
+
+ int CompareOccupancy(int* initial_blocks,
+ const DeviceDescription& device_description,
uint64 registers_per_thread,
uint64 shared_memory_per_block,
const ThreadDim& thread_dims, CUfunction func);
- int CompareOccupancy(int* initial_blocks,
- const DeviceDescription& device_description,
- uint64 registers_per_thread,
- uint64 shared_memory_per_block,
- const ThreadDim& thread_dims, CUfunction func);
-
void *Allocate(uint64 size) override;
void *AllocateSubBuffer(DeviceMemoryBase *mem, uint64 offset_bytes,