tensorflow/core/common_runtime/gpu/gpu_event_mgr_test.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152

#if GOOGLE_CUDA

#include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"

#include "tensorflow/core/common_runtime/gpu/gpu_init.h"
#include "tensorflow/stream_executor/multi_platform_manager.h"
#include "tensorflow/stream_executor/stream_executor.h"
#include <gtest/gtest.h>

namespace gpu = ::perftools::gputools;

namespace tensorflow {

class TEST_EventMgrHelper {
 public:
  explicit TEST_EventMgrHelper(EventMgr* em) : em_(em) {}

  int queue_size() {
    mutex_lock l(em_->mu_);
    return em_->used_events_.size();
  }

  int free_size() {
    mutex_lock l(em_->mu_);
    return em_->free_events_.size();
  }

  void QueueTensors(perftools::gputools::Stream* stream,
                    std::vector<Tensor>* tensors) {
    mutex_lock l(em_->mu_);
    em_->QueueTensors(stream, tensors);
  }

  void PollEvents(bool is_dedicated_poller) {
    mutex_lock l(em_->mu_);
    em_->PollEvents(is_dedicated_poller);
  }

 private:
  EventMgr* em_;
};

namespace {

TEST(EventMgr, Empty) {
  auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie();
  EventMgr em(stream_exec);
  TEST_EventMgrHelper th(&em);
  EXPECT_EQ(0, th.queue_size());
  EXPECT_EQ(0, th.free_size());
}

// Delaying polling until after several enqueings should grow the
// total number of allocated events.  Once we have enough events for
// the max simultaneously pending, we should not allocate any more.
TEST(EventMgr, DelayedPolling) {
  auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie();
  EventMgr em(stream_exec);
  TEST_EventMgrHelper th(&em);
  EXPECT_EQ(0, th.queue_size());
  std::vector<Tensor>* v = nullptr;
  std::unique_ptr<gpu::Stream> stream(new gpu::Stream(stream_exec));
  CHECK(stream.get());
  stream->Init();
  for (int i = 0; i < 5; ++i) {
    v = new std::vector<Tensor>;
    th.QueueTensors(stream.get(), v);
    EXPECT_EQ(i + 1, th.queue_size());
    EXPECT_EQ(0, th.free_size());
  }
  th.PollEvents(false);
  EXPECT_EQ(0, th.queue_size());
  EXPECT_EQ(5, th.free_size());
  for (int j = 0; j < 2; ++j) {
    for (int i = 0; i < 5; ++i) {
      v = new std::vector<Tensor>;
      th.QueueTensors(stream.get(), v);
      EXPECT_EQ(i + 1, th.queue_size());
      EXPECT_EQ(4 - i, th.free_size());
    }
    th.PollEvents(false);
    EXPECT_EQ(0, th.queue_size());
    EXPECT_EQ(5, th.free_size());
  }
}

// Immediate polling should require only one event to be allocated.
TEST(EventMgr, ImmediatePolling) {
  auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie();
  EventMgr em(stream_exec);
  TEST_EventMgrHelper th(&em);
  EXPECT_EQ(0, th.queue_size());
  EXPECT_EQ(0, th.free_size());
  std::vector<Tensor>* v = nullptr;
  std::unique_ptr<gpu::Stream> stream(new gpu::Stream(stream_exec));
  CHECK(stream.get());
  stream->Init();
  for (int i = 0; i < 5; ++i) {
    v = new std::vector<Tensor>;
    em.ThenDeleteTensors(stream.get(), v);
    EXPECT_EQ(0, th.queue_size());
    EXPECT_EQ(1, th.free_size());
  }
}

// If we delay polling by more than 1 second, the backup polling loop
// should clear the queue.
TEST(EventMgr, LongDelayedPolling) {
  auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie();
  EventMgr em(stream_exec);
  TEST_EventMgrHelper th(&em);
  EXPECT_EQ(0, th.queue_size());
  EXPECT_EQ(0, th.free_size());
  std::vector<Tensor>* v = nullptr;
  std::unique_ptr<gpu::Stream> stream(new gpu::Stream(stream_exec));
  CHECK(stream.get());
  stream->Init();
  for (int i = 0; i < 5; ++i) {
    v = new std::vector<Tensor>;
    th.QueueTensors(stream.get(), v);
    EXPECT_EQ(1 + i, th.queue_size());
    EXPECT_EQ(0, th.free_size());
  }
  sleep(1);
  EXPECT_EQ(0, th.queue_size());
  EXPECT_EQ(5, th.free_size());
}

// Deleting the EventMgr when events are still pending should shut
// down gracefully.
TEST(EventMgr, NonEmptyShutdown) {
  auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie();
  EventMgr em(stream_exec);
  TEST_EventMgrHelper th(&em);
  EXPECT_EQ(0, th.queue_size());
  EXPECT_EQ(0, th.free_size());
  std::vector<Tensor>* v = nullptr;
  std::unique_ptr<gpu::Stream> stream(new gpu::Stream(stream_exec));
  CHECK(stream.get());
  stream->Init();
  for (int i = 0; i < 5; ++i) {
    v = new std::vector<Tensor>;
    th.QueueTensors(stream.get(), v);
    EXPECT_EQ(1 + i, th.queue_size());
    EXPECT_EQ(0, th.free_size());
  }
}

}  // namespace
}  // namespace tensorflow

#endif  // GOOGLE_CUDA