diff options
Diffstat (limited to 'tensorflow/stream_executor/cuda/cuda_gpu_executor.h')
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_gpu_executor.h | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h index 8a954d5461..53b2a29ae7 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h @@ -70,6 +70,17 @@ class CUDAExecutor : public internal::StreamExecutorInterface { const BlockDim &block_dims, const KernelBase &k, const KernelArgsArrayBase &args) override; + int CalculateOccupancy(const DeviceDescription &device_description, + uint64 registers_per_thread, + uint64 shared_memory_per_block, + const ThreadDim &thread_dims, CUfunction func); + + int CompareOccupancy(int *initial_blocks, + const DeviceDescription &device_description, + uint64 registers_per_thread, + uint64 shared_memory_per_block, + const ThreadDim &thread_dims, CUfunction func); + void *Allocate(uint64 size) override; void *AllocateSubBuffer(DeviceMemoryBase *mem, uint64 offset_bytes, |