1 files changed, 14 insertions, 13 deletions
diff --git a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc
index 1821289f4b..962848ad17 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc
@@ -40,13 +40,13 @@ EventMgr::~EventMgr() {
     delete e;
   }
   while (!used_events_.empty()) {
-    delete used_events_[0].event;
-    delete used_events_[0].mem;
-    if (used_events_[0].bufrec.buf) {
-      used_events_[0].bufrec.alloc->DeallocateRaw(used_events_[0].bufrec.buf);
+    InUse* ue = &used_events_[0];
+    delete ue->event;
+    delete ue->mem;
+    if (ue->bufrec.buf) {
+      ue->bufrec.alloc->DeallocateRaw(ue->bufrec.buf);
     }
-    if (used_events_[0].func != nullptr)
-      threadpool_.Schedule(used_events_[0].func);
+    if (ue->func != nullptr) threadpool_.Schedule(ue->func);
     used_events_.pop_front();
   }
 }
@@ -60,10 +60,12 @@ EventMgr::~EventMgr() {
 void EventMgr::PollLoop() {
   while (!stop_polling_.HasBeenNotified()) {
     Env::Default()->SleepForMicroseconds(1 * 1000);
+    ToFreeVector to_free;
     {
       mutex_lock l(mu_);
-      PollEvents(true);
+      PollEvents(true, &to_free);
     }
+    FreeMemory(to_free);
   }
   polling_stopped_.Notify();
 }
@@ -103,7 +105,8 @@ void EventMgr::QueueInUse(gpu::Stream* stream, InUse iu) {
 // GPU memory use to spike needlessly.  An alternative strategy would
 // be to throttle new Op execution until the pending event queue
 // clears.
-void EventMgr::PollEvents(bool is_dedicated_poller) {
+void EventMgr::PollEvents(bool is_dedicated_poller,
+                          gtl::InlinedVector<InUse, 4>* to_free) {
   VLOG(2) << "PollEvents  free_events_ " << free_events_.size()
           << " used_events_ " << used_events_.size();
   // Sweep the remaining events in order.  If this is the dedicated
@@ -123,11 +126,9 @@ void EventMgr::PollEvents(bool is_dedicated_poller) {
         if (!is_dedicated_poller) return;  // quit processing queue
         break;
       case gpu::Event::Status::kComplete:
-        delete iu.mem;
-        if (iu.bufrec.buf) iu.bufrec.alloc->DeallocateRaw(iu.bufrec.buf);
-        // The function must be called in another thread, outside of
-        // the mutex held here.
-        if (iu.func != nullptr) threadpool_.Schedule(iu.func);
+        // Make a copy of the InUse record so we can free it after releasing
+        // the lock
+        to_free->push_back(iu);
         free_events_.push_back(iu.event);
         // Mark this InUse record as completed.
         iu.event = nullptr;