diff options
Diffstat (limited to 'tensorflow/core/common_runtime/gpu/gpu_allocator_retry.h')
-rw-r--r-- | tensorflow/core/common_runtime/gpu/gpu_allocator_retry.h | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/tensorflow/core/common_runtime/gpu/gpu_allocator_retry.h b/tensorflow/core/common_runtime/gpu/gpu_allocator_retry.h new file mode 100644 index 0000000000..a3298ab222 --- /dev/null +++ b/tensorflow/core/common_runtime/gpu/gpu_allocator_retry.h @@ -0,0 +1,36 @@ +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ALLOCATOR_RETRY_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ALLOCATOR_RETRY_H_ + +#include "tensorflow/core/platform/port.h" +#include "tensorflow/core/public/env.h" + +namespace tensorflow { + +// A retrying wrapper for a memory allocator. +class GPUAllocatorRetry { + public: + GPUAllocatorRetry(); + + // Call 'alloc_func' to obtain memory. On first call, + // 'verbose_failure' will be false. If return value is nullptr, + // then wait up to 'max_millis_to_wait' milliseconds, retrying each + // time a call to DeallocateRaw() is detected, until either a good + // pointer is returned or the deadline is exhausted. If the + // deadline is exahusted, try one more time with 'verbose_failure' + // set to true. The value returned is either the first good pointer + // obtained from 'alloc_func' or nullptr. + void* AllocateRaw(std::function<void*(size_t alignment, size_t num_bytes, + bool verbose_failure)> alloc_func, + int max_millis_to_wait, size_t alignment, size_t bytes); + + // Calls dealloc_func(ptr) and then notifies any threads blocked in + // AllocateRaw() that would like to retry. + void DeallocateRaw(std::function<void(void* ptr)> dealloc_func, void* ptr); + + private: + Env* env_; + mutex mu_; + condition_variable memory_returned_; +}; +} // namespace tensorflow +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ALLOCATOR_RETRY_H_ |