diff options
Diffstat (limited to 'tensorflow/core/common_runtime/pool_allocator.h')
-rw-r--r-- | tensorflow/core/common_runtime/pool_allocator.h | 182 |
1 files changed, 182 insertions, 0 deletions
diff --git a/tensorflow/core/common_runtime/pool_allocator.h b/tensorflow/core/common_runtime/pool_allocator.h new file mode 100644 index 0000000000..607734445b --- /dev/null +++ b/tensorflow/core/common_runtime/pool_allocator.h @@ -0,0 +1,182 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_ + +// Simple LRU pool allocators for various flavors of CPU RAM that +// implement the VisitableAllocator interface. + +#include <atomic> +#include <map> +#include <memory> +#include <vector> +#include "tensorflow/core/common_runtime/visitable_allocator.h" +#include "tensorflow/core/lib/core/bits.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { + +// Interface of an object that rounds up integers. +class RoundUpInterface { + public: + virtual ~RoundUpInterface() {} + virtual size_t RoundUp(size_t num_bytes) = 0; +}; + +// Size-limited pool of memory buffers obtained from a SubAllocator +// instance. Pool eviction policy is LRU. +class PoolAllocator : public VisitableAllocator { + public: + // "pool_size_limit" is the maximum number of returned, re-usable + // memory buffers to keep in the pool. If pool_size_limit == 0, the + // pool is effectively a thin wrapper around the allocator. + // If "auto_resize" is true, then the pool_size_limit will gradually + // be raised so that deallocations happen very rarely, if at all. + // Transitory start-up objects may deallocate, but the long-term + // working-set should not. Auto-resizing can raise pool_size_limit + // but will never lower it. + // "allocator" is the object that performs the underlying memory + // malloc/free operations. This object takes ownership of allocator. + PoolAllocator(size_t pool_size_limit, bool auto_resize, + SubAllocator* allocator, RoundUpInterface* size_rounder, + string name); + ~PoolAllocator() override; + + string Name() override { return name_; } + + void* AllocateRaw(size_t alignment, size_t num_bytes) override; + + void DeallocateRaw(void* ptr) override; + + // REQUIRES: The following functions may only be called prior + // to the first Allocate*() call. Once allocation has begun, it is + // illegal to register another visitor. + + void AddAllocVisitor(Visitor visitor) override; + + void AddFreeVisitor(Visitor visitor) override; + + // Allocate an unused memory region of size "num_bytes". Fetch from + // the pool if available, otherwise call allocator_. + void* Get(size_t num_bytes); + + // Return a no-longer needed memory region to the pool. It is an error + // to deference "ptr" after this call. If the pool is full, the least + // recently used region will be deallocated. + void Put(void* ptr, size_t num_bytes); + + // Reset the pool to empty. + void Clear(); + + // The following accessors permit monitoring the effectiveness of + // the pool at avoiding repeated malloc/frees on the underlying + // allocator. Read locks are not taken on the theory that value + // consistency with other threads is not important. + + // Number of Get() requests satisfied from pool. + int64 get_from_pool_count() const NO_THREAD_SAFETY_ANALYSIS { + return get_from_pool_count_; + } + // Number of Put() requests. + int64 put_count() const NO_THREAD_SAFETY_ANALYSIS { return put_count_; } + // Number of Get() requests requiring a fresh allocation. + int64 allocated_count() const NO_THREAD_SAFETY_ANALYSIS { + return allocated_count_; + } + // Number of pool evictions. + int64 evicted_count() const NO_THREAD_SAFETY_ANALYSIS { + return evicted_count_; + } + // Current size limit. + size_t size_limit() const NO_THREAD_SAFETY_ANALYSIS { + return pool_size_limit_; + } + + void GetStats(AllocatorStats* stats) override { stats->Clear(); } + + private: + struct PtrRecord { + void* ptr; + size_t num_bytes; + PtrRecord* prev; + PtrRecord* next; + }; + + // Remove "pr" from the double-linked LRU list. + void RemoveFromList(PtrRecord* pr) EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Add "pr" to the head of the double-linked LRU list. + void AddToList(PtrRecord* pr) EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Delete the least recently used record. + void EvictOne() EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + const string name_; + const bool has_size_limit_; + const bool auto_resize_; + size_t pool_size_limit_; + std::unique_ptr<SubAllocator> allocator_; + std::unique_ptr<RoundUpInterface> size_rounder_; + mutex mutex_; + std::multimap<const size_t, PtrRecord*> pool_ GUARDED_BY(mutex_); + PtrRecord* lru_head_ GUARDED_BY(mutex_) = nullptr; + PtrRecord* lru_tail_ GUARDED_BY(mutex_) = nullptr; + int64 get_from_pool_count_ GUARDED_BY(mutex_) = 0; + int64 put_count_ GUARDED_BY(mutex_) = 0; + int64 allocated_count_ GUARDED_BY(mutex_) = 0; + int64 evicted_count_ GUARDED_BY(mutex_) = 0; + // Write access to these is guarded by mutex_, but not read + // access. They may only be modified prior to the first + // allocation. Later attempts to modify will fail. + std::vector<Visitor> alloc_visitors_; + std::vector<Visitor> free_visitors_; + std::atomic<bool> allocation_begun_; +}; + +// Do-nothing rounder. Passes through sizes unchanged. +class NoopRounder : public RoundUpInterface { + public: + size_t RoundUp(size_t num_bytes) override { return num_bytes; } +}; + +// Power of 2 rounder: rounds up to nearest power of 2 size. +class Pow2Rounder : public RoundUpInterface { + public: + size_t RoundUp(size_t num_bytes) override { + return 1uLL << Log2Ceiling64(num_bytes); + } +}; + +class BasicCPUAllocator : public SubAllocator { + public: + // Argument numa_node is currently ignored. + explicit BasicCPUAllocator(int numa_node) : numa_node_(numa_node) {} + + ~BasicCPUAllocator() override {} + + void* Alloc(size_t alignment, size_t num_bytes) override; + + void Free(void* ptr, size_t num_bytes) override; + + private: + int numa_node_; +}; + +} // namespace tensorflow +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_ |