Use an iterator to walk down the set of free chunks, so that deleting

one from the set when we find one we want is cheaper. Slight performance improvement (~0.3% on ptb_word_lm model on my desktop). Change: 112832451
author: A. Unique TensorFlower <nobody@tensorflow.org> 2016-01-22 16:55:27 -0800
committer: Vijay Vasudevan <vrv@google.com> 2016-01-22 17:05:17 -0800
commit: ff57d126486e35553ae9c456373e9ad3adcb8142 (patch)
tree: c00643f72620a3c4af0b5907958fb10f6e8ef87a /tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
parent: 7a68818ff52a8c8331c57aefd16afe6c1c1ed936 (diff)
1 files changed, 14 insertions, 12 deletions
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
index 5551c8bdb7..6205abbc87 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
@@ -125,17 +125,6 @@ class GPUBFCAllocator : public VisitableAllocator {
       return dbg;
     }
   };
-
-  Chunk* AllocateNewChunk(size_t num_bytes);
-  void SplitChunk(Chunk* c, size_t num_bytes) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-  void Merge(Chunk* c1, Chunk* c2) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-  void FreeAndMaybeCoalesce(Chunk* c) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-  void InsertFreeChunkIntoBin(Chunk* c) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-  void RemoveFreeChunkFromBin(Chunk* c);
-  void DeleteChunk(Chunk* c) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-
-  void DumpMemoryLog(size_t num_bytes) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-
   // A Bin is a collection of similar-sized free chunks.
   struct Bin {
     // All chunks in this bin have >= bin_size memory.
@@ -151,13 +140,26 @@ class GPUBFCAllocator : public VisitableAllocator {
       }
     };
 
+    typedef std::set<Chunk*, ChunkComparator> FreeChunkSet;
     // List of free chunks within the bin, sorted by chunk size.
     // Chunk * not owned.
-    std::set<Chunk*, ChunkComparator> free_chunks;
+    FreeChunkSet free_chunks;
 
     explicit Bin(size_t bs) : bin_size(bs) {}
   };
 
+  Chunk* AllocateNewChunk(size_t num_bytes);
+  void SplitChunk(Chunk* c, size_t num_bytes) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  void Merge(Chunk* c1, Chunk* c2) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  void FreeAndMaybeCoalesce(Chunk* c) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  void InsertFreeChunkIntoBin(Chunk* c) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  void RemoveFreeChunkIterFromBin(Bin::FreeChunkSet* free_chunks,
+                                  const Bin::FreeChunkSet::iterator& c);
+  void RemoveFreeChunkFromBin(Chunk* c);
+  void DeleteChunk(Chunk* c) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
+  void DumpMemoryLog(size_t num_bytes) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
   GPUAllocatorRetry retry_helper_;
 
   // Structures immutable after construction
author	A. Unique TensorFlower <nobody@tensorflow.org>	2016-01-22 16:55:27 -0800
committer	Vijay Vasudevan <vrv@google.com>	2016-01-22 17:05:17 -0800
commit	ff57d126486e35553ae9c456373e9ad3adcb8142 (patch)
tree	c00643f72620a3c4af0b5907958fb10f6e8ef87a /tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
parent	7a68818ff52a8c8331c57aefd16afe6c1c1ed936 (diff)