aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/common_runtime/gpu/gpu_region_allocator.h
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/common_runtime/gpu/gpu_region_allocator.h')
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_region_allocator.h146
1 files changed, 146 insertions, 0 deletions
diff --git a/tensorflow/core/common_runtime/gpu/gpu_region_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_region_allocator.h
new file mode 100644
index 0000000000..1a250b6ede
--- /dev/null
+++ b/tensorflow/core/common_runtime/gpu/gpu_region_allocator.h
@@ -0,0 +1,146 @@
+#ifndef TENSORFLOW_COMMON_RUNTIME_GPU_GPU_REGION_ALLOCATOR_H_
+#define TENSORFLOW_COMMON_RUNTIME_GPU_GPU_REGION_ALLOCATOR_H_
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "tensorflow/stream_executor/stream_executor.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_allocator_retry.h"
+#include "tensorflow/core/common_runtime/gpu/visitable_allocator.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/port.h"
+#include "tensorflow/core/platform/thread_annotations.h"
+
+namespace tensorflow {
+
+class GPURegionAllocator : public VisitableAllocator {
+ public:
+ // 'device_id' must be a valid device on the machine.
+ //
+ // total_bytes is how many bytes this allocator should allocate up
+ // to. This may be less than the total available.
+ explicit GPURegionAllocator(int device_id, size_t total_bytes);
+ ~GPURegionAllocator() override;
+
+ string Name() override { return "gpu_region"; }
+ void* AllocateRaw(size_t alignment, size_t num_bytes) override;
+ void DeallocateRaw(void* ptr) override;
+ void AddAllocVisitor(Visitor visitor) override;
+ // Does nothing, because regions are never freed.
+ void AddFreeVisitor(Visitor visitor) override {}
+
+ bool TracksAllocationSizes() override;
+ size_t RequestedSize(void* ptr) override;
+ size_t AllocatedSize(void* ptr) override;
+
+ private:
+ // A Chunk is the header on a single piece of memory given back
+ // in response to an AllocateRaw() call.
+ struct Chunk {
+ char* ptr; // pointer to granted GPU buffer.
+ size_t size; // Full size of GPU buffer.
+ size_t bytes_allocated; // Bytes asked for by client.
+ bool in_use;
+ Chunk* prev; // Used for chaining in pool.
+ Chunk* next;
+ Chunk()
+ : ptr(nullptr),
+ size(0),
+ bytes_allocated(0),
+ in_use(false),
+ prev(nullptr),
+ next(nullptr) {}
+ };
+
+ // A Pool is a collection of same-sized Chunks.
+ struct Pool {
+ int num_chunks; // total chunks in this pool
+ int num_free; // total free chunks in this pool
+ int64 cumulative_malloced; // number of chunks malloced so far
+ int64 cumulative_freed; // number of chunks freed so far
+
+ // double-linked ring of chunks; all free chunks precede all
+ // granted chunks
+ Chunk* first;
+ Chunk* last;
+ Pool()
+ : num_chunks(0),
+ num_free(0),
+ cumulative_malloced(0),
+ cumulative_freed(0),
+ first(nullptr),
+ last(nullptr) {}
+
+ string ToString() const {
+ return strings::StrCat("chunks: ", num_chunks, " free: ", num_free,
+ " cumulative malloc: ", cumulative_malloced,
+ " cumulative freed: ", cumulative_freed);
+ }
+ };
+
+ // A Region is a single area of GPU memory that has been
+ // reserved by this class and carved up into Chunks.
+ struct Region {
+ char* ptr; // base GPU ptr
+ char* next; // frontier of unused part of region
+ size_t size;
+ Region() : ptr(nullptr), size(0) {}
+ };
+
+ // Calculate size of chunk for an allocation of this size.
+ // Min chunk size is 16, for alignment.
+ // For larger sizes, we round up somewhat so there are fewer
+ // size-specific pools.
+ static size_t ChunkSize(size_t bytes);
+
+ void* AllocateRawInternal(size_t alignment, size_t num_bytes,
+ bool dump_log_on_failure);
+ void DeallocateRawInternal(void* ptr);
+
+ bool ExpandPool(Pool* p, size_t chunk_size, size_t requested_size,
+ bool dump_log_on_failure) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
+ // Inspects region maps and crashes with debug information if there
+ // are any memory leaks as detected by the region allocator.
+ void CheckForMemoryLeaks() LOCKS_EXCLUDED(lock_);
+
+ void DumpMemoryLog() EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
+ perftools::gputools::StreamExecutor* stream_exec_; // Not owned.
+
+ typedef std::unordered_map<size_t, Pool> PoolMap;
+ typedef std::unordered_map<void*, Chunk*> ChunkMap;
+
+ GPUAllocatorRetry retry_helper_;
+ mutable mutex lock_;
+ PoolMap pools_ GUARDED_BY(lock_);
+
+ // Owns regions.
+ std::vector<Region*> regions_ GUARDED_BY(lock_);
+
+ // Maps from GPU ptr to Chunk owning it.
+ //
+ // Owns chunks.
+ ChunkMap chunk_map_ GUARDED_BY(lock_);
+
+ // Called once on each region, ASAP.
+ std::vector<Visitor> region_visitors_ GUARDED_BY(lock_);
+
+ const int device_id_;
+
+ // Total amount of memory (in bytes) available to this Allocator
+ const size_t total_bytes_;
+
+ // Total amount of memory allocated to regions.
+ size_t allocated_memory_ = 0;
+
+ size_t region_size_ = 0;
+
+ TF_DISALLOW_COPY_AND_ASSIGN(GPURegionAllocator);
+};
+
+} // namespace tensorflow
+
+#endif // TENSORFLOW_COMMON_RUNTIME_GPU_GPU_REGION_ALLOCATOR_H_