diff options
Diffstat (limited to 'tensorflow/stream_executor/device_description.cc')
-rw-r--r-- | tensorflow/stream_executor/device_description.cc | 32 |
1 files changed, 0 insertions, 32 deletions
diff --git a/tensorflow/stream_executor/device_description.cc b/tensorflow/stream_executor/device_description.cc index df52ce6cce..726c4adf74 100644 --- a/tensorflow/stream_executor/device_description.cc +++ b/tensorflow/stream_executor/device_description.cc @@ -157,36 +157,4 @@ static uint64 RoundDown(uint64 value, uint64 n) { return port::MathUtil::FloorOfRatio(value, n) * n; } -int CalculateOccupancy(const DeviceDescription& device_description, - uint64 registers_per_thread, - uint64 shared_memory_per_block, - const ThreadDim& thread_dims, CUfunction func) { - int suggested_blocks = 0; - int suggested_threads = 0; - CUresult err = - cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads, - func, NULL, shared_memory_per_block, 0); - CHECK_EQ(err, CUDA_SUCCESS); - return suggested_blocks; -} - -int CompareOccupancy(int* initial_blocks, - const DeviceDescription& device_description, - uint64 registers_per_thread, - uint64 shared_memory_per_block, - const ThreadDim& thread_dims, CUfunction func) { - int suggested_blocks = 0; - int suggested_threads = 0; - CUresult err = - cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads, - func, NULL, shared_memory_per_block, 0); - CHECK_EQ(err, CUDA_SUCCESS); - if (suggested_blocks > *initial_blocks) { - *initial_blocks = suggested_blocks; - return suggested_threads; - } else { - return 0; - } -} - } // namespace stream_executor |