aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/stream_executor/cuda/cuda_gpu_executor.h')
-rw-r--r--tensorflow/stream_executor/cuda/cuda_gpu_executor.h11
1 files changed, 11 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
index 8a954d5461..53b2a29ae7 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
@@ -70,6 +70,17 @@ class CUDAExecutor : public internal::StreamExecutorInterface {
const BlockDim &block_dims, const KernelBase &k,
const KernelArgsArrayBase &args) override;
+ int CalculateOccupancy(const DeviceDescription &device_description,
+ uint64 registers_per_thread,
+ uint64 shared_memory_per_block,
+ const ThreadDim &thread_dims, CUfunction func);
+
+ int CompareOccupancy(int *initial_blocks,
+ const DeviceDescription &device_description,
+ uint64 registers_per_thread,
+ uint64 shared_memory_per_block,
+ const ThreadDim &thread_dims, CUfunction func);
+
void *Allocate(uint64 size) override;
void *AllocateSubBuffer(DeviceMemoryBase *mem, uint64 offset_bytes,