aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <nobody@tensorflow.org>2016-01-22 16:55:27 -0800
committerGravatar Vijay Vasudevan <vrv@google.com>2016-01-22 17:05:17 -0800
commitff57d126486e35553ae9c456373e9ad3adcb8142 (patch)
treec00643f72620a3c4af0b5907958fb10f6e8ef87a
parent7a68818ff52a8c8331c57aefd16afe6c1c1ed936 (diff)
Use an iterator to walk down the set of free chunks, so that deleting
one from the set when we find one we want is cheaper. Slight performance improvement (~0.3% on ptb_word_lm model on my desktop). Change: 112832451
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc15
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h26
2 files changed, 27 insertions, 14 deletions
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
index cd82361030..2ec149ee8e 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
@@ -159,12 +159,14 @@ void* GPUBFCAllocator::AllocateRawInternal(size_t unused_alignment,
// Start searching from the first bin for the smallest chunk that fits
// rounded_bytes.
Bin* b = it->second;
- for (GPUBFCAllocator::Chunk* chunk : b->free_chunks) {
+ for (auto citer = b->free_chunks.begin(); citer != b->free_chunks.end();
+ ++citer) {
+ GPUBFCAllocator::Chunk* chunk = (*citer);
DCHECK(!chunk->in_use());
if (chunk->size >= rounded_bytes) {
// We found an existing chunk that fits us that wasn't in use, so remove
// it from the free bin structure prior to using.
- RemoveFreeChunkFromBin(chunk);
+ RemoveFreeChunkIterFromBin(&b->free_chunks, citer);
// If we can break the size of the chunk into two reasonably
// large pieces, do so.
@@ -299,6 +301,15 @@ void GPUBFCAllocator::InsertFreeChunkIntoBin(GPUBFCAllocator::Chunk* c) {
new_bin->free_chunks.insert(c);
}
+void GPUBFCAllocator::RemoveFreeChunkIterFromBin(
+ GPUBFCAllocator::Bin::FreeChunkSet* free_chunks,
+ const GPUBFCAllocator::Bin::FreeChunkSet::iterator& citer) {
+ GPUBFCAllocator::Chunk* c = *citer;
+ CHECK(!c->in_use() && c->bin);
+ free_chunks->erase(citer);
+ c->bin = nullptr;
+}
+
void GPUBFCAllocator::RemoveFreeChunkFromBin(GPUBFCAllocator::Chunk* c) {
CHECK(!c->in_use() && c->bin);
int count = c->bin->free_chunks.erase(c);
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
index 5551c8bdb7..6205abbc87 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
@@ -125,17 +125,6 @@ class GPUBFCAllocator : public VisitableAllocator {
return dbg;
}
};
-
- Chunk* AllocateNewChunk(size_t num_bytes);
- void SplitChunk(Chunk* c, size_t num_bytes) EXCLUSIVE_LOCKS_REQUIRED(lock_);
- void Merge(Chunk* c1, Chunk* c2) EXCLUSIVE_LOCKS_REQUIRED(lock_);
- void FreeAndMaybeCoalesce(Chunk* c) EXCLUSIVE_LOCKS_REQUIRED(lock_);
- void InsertFreeChunkIntoBin(Chunk* c) EXCLUSIVE_LOCKS_REQUIRED(lock_);
- void RemoveFreeChunkFromBin(Chunk* c);
- void DeleteChunk(Chunk* c) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-
- void DumpMemoryLog(size_t num_bytes) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-
// A Bin is a collection of similar-sized free chunks.
struct Bin {
// All chunks in this bin have >= bin_size memory.
@@ -151,13 +140,26 @@ class GPUBFCAllocator : public VisitableAllocator {
}
};
+ typedef std::set<Chunk*, ChunkComparator> FreeChunkSet;
// List of free chunks within the bin, sorted by chunk size.
// Chunk * not owned.
- std::set<Chunk*, ChunkComparator> free_chunks;
+ FreeChunkSet free_chunks;
explicit Bin(size_t bs) : bin_size(bs) {}
};
+ Chunk* AllocateNewChunk(size_t num_bytes);
+ void SplitChunk(Chunk* c, size_t num_bytes) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+ void Merge(Chunk* c1, Chunk* c2) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+ void FreeAndMaybeCoalesce(Chunk* c) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+ void InsertFreeChunkIntoBin(Chunk* c) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+ void RemoveFreeChunkIterFromBin(Bin::FreeChunkSet* free_chunks,
+ const Bin::FreeChunkSet::iterator& c);
+ void RemoveFreeChunkFromBin(Chunk* c);
+ void DeleteChunk(Chunk* c) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
+ void DumpMemoryLog(size_t num_bytes) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
GPUAllocatorRetry retry_helper_;
// Structures immutable after construction