diff options
Diffstat (limited to 'tensorflow/core/common_runtime/gpu/gpu_event_mgr_test.cc')
-rw-r--r-- | tensorflow/core/common_runtime/gpu/gpu_event_mgr_test.cc | 120 |
1 files changed, 103 insertions, 17 deletions
diff --git a/tensorflow/core/common_runtime/gpu/gpu_event_mgr_test.cc b/tensorflow/core/common_runtime/gpu/gpu_event_mgr_test.cc index 910093a069..57c1554678 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_event_mgr_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_event_mgr_test.cc @@ -17,10 +17,12 @@ limitations under the License. #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" +#include <atomic> #include "tensorflow/stream_executor/multi_platform_manager.h" #include "tensorflow/stream_executor/stream_executor.h" #include <gtest/gtest.h> #include "tensorflow/core/common_runtime/gpu/gpu_init.h" +#include "tensorflow/core/framework/config.pb.h" namespace gpu = ::perftools::gputools; @@ -59,11 +61,32 @@ class TEST_EventMgrHelper { EventMgr* em_; }; +static std::atomic_int_fast64_t live_tensor_bytes(0); + +// A TensorBuffer that counts live memory usage for testing +class TestTensorBuffer : public TensorBuffer { + public: + TestTensorBuffer(size_t bytes) : bytes_(bytes) { + live_tensor_bytes += bytes_; + } + ~TestTensorBuffer() { live_tensor_bytes -= bytes_; } + + size_t size() const override { return bytes_; } + + // Not used in this test + void* data() const override { return nullptr; } + TensorBuffer* root_buffer() override { return nullptr; } + void FillAllocationDescription(AllocationDescription* arg) const override {} + + private: + size_t bytes_; +}; + namespace { TEST(EventMgr, Empty) { auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie(); - EventMgr em(stream_exec); + EventMgr em(stream_exec, GPUOptions()); TEST_EventMgrHelper th(&em); EXPECT_EQ(0, th.queue_size()); EXPECT_EQ(0, th.free_size()); @@ -74,7 +97,7 @@ TEST(EventMgr, Empty) { // the max simultaneously pending, we should not allocate any more. TEST(EventMgr, DelayedPolling) { auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie(); - EventMgr em(stream_exec); + EventMgr em(stream_exec, GPUOptions()); TEST_EventMgrHelper th(&em); EXPECT_EQ(0, th.queue_size()); EventMgr::TensorReferenceVector* v = nullptr; @@ -103,22 +126,87 @@ TEST(EventMgr, DelayedPolling) { } } -// Immediate polling should require only one event to be allocated. -TEST(EventMgr, ImmediatePolling) { +static void AddTensorReference(EventMgr::TensorReferenceVector* v, int64 size) { + TestTensorBuffer* buf = new TestTensorBuffer(size); + v->push_back(TensorReference(buf)); + buf->Unref(); +} + +TEST(EventMgr, FlushLargeTensorImmediately) { auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie(); - EventMgr em(stream_exec); + EventMgr em(stream_exec, GPUOptions()); TEST_EventMgrHelper th(&em); - EXPECT_EQ(0, th.queue_size()); - EXPECT_EQ(0, th.free_size()); - EventMgr::TensorReferenceVector* v = nullptr; + EXPECT_EQ(0, live_tensor_bytes); std::unique_ptr<gpu::Stream> stream(new gpu::Stream(stream_exec)); CHECK(stream.get()); stream->Init(); for (int i = 0; i < 5; ++i) { - v = new EventMgr::TensorReferenceVector; + EventMgr::TensorReferenceVector v; + AddTensorReference(&v, 100 * 1048576); em.ThenDeleteTensors(stream.get(), v); - EXPECT_EQ(0, th.queue_size()); - EXPECT_EQ(1, th.free_size()); + th.PollEvents(false); // Ensure things get registered to be freed by Poll + EXPECT_EQ(0, live_tensor_bytes); + } +} + +TEST(EventMgr, ManySmallTensorsFlushedImmediately) { + auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie(); + EventMgr em(stream_exec, GPUOptions()); + TEST_EventMgrHelper th(&em); + EXPECT_EQ(0, live_tensor_bytes); + std::unique_ptr<gpu::Stream> stream(new gpu::Stream(stream_exec)); + CHECK(stream.get()); + stream->Init(); + for (int i = 0; i < 5; ++i) { + EventMgr::TensorReferenceVector v; + for (int i = 0; i < 1000; i++) { + AddTensorReference(&v, 100 * 1024); + } + em.ThenDeleteTensors(stream.get(), v); + th.PollEvents(false); // Ensure things get registered to be freed by Poll + EXPECT_EQ(0, live_tensor_bytes); + } +} + +TEST(EventMgr, StreamSwitchingFlushesImmediately) { + auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie(); + EventMgr em(stream_exec, GPUOptions()); + TEST_EventMgrHelper th(&em); + EXPECT_EQ(0, live_tensor_bytes); + std::unique_ptr<gpu::Stream> stream1(new gpu::Stream(stream_exec)); + std::unique_ptr<gpu::Stream> stream2(new gpu::Stream(stream_exec)); + stream1->Init(); + stream2->Init(); + EventMgr::TensorReferenceVector v1; + AddTensorReference(&v1, 1024); + em.ThenDeleteTensors(stream1.get(), v1); + + EventMgr::TensorReferenceVector v2; + AddTensorReference(&v2, 1024); + int64 initial_live_bytes = live_tensor_bytes; + em.ThenDeleteTensors(stream2.get(), v2); + th.PollEvents(false); // Ensure things get registered to be freed by Poll + // Different stream should cause first tensor to get deleted + EXPECT_GT(initial_live_bytes, live_tensor_bytes); +} + +TEST(EventMgr, ManySmallTensorsSeperateCallsFlushed) { + auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie(); + EventMgr em(stream_exec, GPUOptions()); + TEST_EventMgrHelper th(&em); + EXPECT_EQ(0, live_tensor_bytes); + std::unique_ptr<gpu::Stream> stream(new gpu::Stream(stream_exec)); + CHECK(stream.get()); + stream->Init(); + for (int i = 0; i < 5; ++i) { + for (int i = 0; i < 1000; i++) { + EventMgr::TensorReferenceVector v; + AddTensorReference(&v, 100 * 1024); + em.ThenDeleteTensors(stream.get(), v); + } + th.PollEvents(false); // Ensure things get registered to be freed by Poll + // Some of the tensors at least should be flushed + EXPECT_GT(1000 * 100 * 1024, live_tensor_bytes); } } @@ -126,16 +214,15 @@ TEST(EventMgr, ImmediatePolling) { // should clear the queue. TEST(EventMgr, LongDelayedPolling) { auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie(); - EventMgr em(stream_exec); + EventMgr em(stream_exec, GPUOptions()); TEST_EventMgrHelper th(&em); EXPECT_EQ(0, th.queue_size()); EXPECT_EQ(0, th.free_size()); - EventMgr::TensorReferenceVector* v = nullptr; std::unique_ptr<gpu::Stream> stream(new gpu::Stream(stream_exec)); CHECK(stream.get()); stream->Init(); for (int i = 0; i < 5; ++i) { - v = new EventMgr::TensorReferenceVector; + EventMgr::TensorReferenceVector* v = new EventMgr::TensorReferenceVector; th.QueueTensors(stream.get(), v); EXPECT_EQ(1 + i, th.queue_size()); EXPECT_EQ(0, th.free_size()); @@ -149,16 +236,15 @@ TEST(EventMgr, LongDelayedPolling) { // down gracefully. TEST(EventMgr, NonEmptyShutdown) { auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie(); - EventMgr em(stream_exec); + EventMgr em(stream_exec, GPUOptions()); TEST_EventMgrHelper th(&em); EXPECT_EQ(0, th.queue_size()); EXPECT_EQ(0, th.free_size()); - EventMgr::TensorReferenceVector* v = nullptr; std::unique_ptr<gpu::Stream> stream(new gpu::Stream(stream_exec)); CHECK(stream.get()); stream->Init(); for (int i = 0; i < 5; ++i) { - v = new EventMgr::TensorReferenceVector; + EventMgr::TensorReferenceVector* v = new EventMgr::TensorReferenceVector; th.QueueTensors(stream.get(), v); EXPECT_EQ(1 + i, th.queue_size()); EXPECT_EQ(0, th.free_size()); |