35 files changed, 1008 insertions, 623 deletions
diff --git a/src/core/lib/iomgr/closure.c b/src/core/lib/iomgr/closure.c
index 1ba0a5c141..2c84e82aca 100644
--- a/src/core/lib/iomgr/closure.c
+++ b/src/core/lib/iomgr/closure.c
@@ -35,6 +35,8 @@
 
 #include <grpc/support/alloc.h>
 
+#include "src/core/lib/profiling/timers.h"
+
 void grpc_closure_init(grpc_closure *closure, grpc_iomgr_cb_func cb,
                        void *cb_arg) {
   closure->cb = cb;
@@ -51,7 +53,7 @@ void grpc_closure_list_append(grpc_closure_list *closure_list,
     GRPC_ERROR_UNREF(error);
     return;
   }
-  closure->error = error;
+  closure->error_data.error = error;
   closure->next_data.next = NULL;
   if (closure_list->head == NULL) {
     closure_list->head = closure;
@@ -64,8 +66,8 @@ void grpc_closure_list_append(grpc_closure_list *closure_list,
 void grpc_closure_list_fail_all(grpc_closure_list *list,
                                 grpc_error *forced_failure) {
   for (grpc_closure *c = list->head; c != NULL; c = c->next_data.next) {
-    if (c->error == GRPC_ERROR_NONE) {
-      c->error = GRPC_ERROR_REF(forced_failure);
+    if (c->error_data.error == GRPC_ERROR_NONE) {
+      c->error_data.error = GRPC_ERROR_REF(forced_failure);
     }
   }
   GRPC_ERROR_UNREF(forced_failure);
@@ -110,3 +112,11 @@ grpc_closure *grpc_closure_create(grpc_iomgr_cb_func cb, void *cb_arg) {
   grpc_closure_init(&wc->wrapper, closure_wrapper, wc);
   return &wc->wrapper;
 }
+
+void grpc_closure_run(grpc_exec_ctx *exec_ctx, grpc_closure *c,
+                      grpc_error *error) {
+  GPR_TIMER_BEGIN("grpc_closure_run", 0);
+  c->cb(exec_ctx, c->cb_arg, error);
+  GRPC_ERROR_UNREF(error);
+  GPR_TIMER_END("grpc_closure_run", 0);
+}
diff --git a/src/core/lib/iomgr/closure.h b/src/core/lib/iomgr/closure.h
index c1a22b6021..2b4b271eaa 100644
--- a/src/core/lib/iomgr/closure.h
+++ b/src/core/lib/iomgr/closure.h
@@ -76,7 +76,10 @@ struct grpc_closure {
   void *cb_arg;
 
   /** Once queued, the result of the closure. Before then: scratch space */
-  grpc_error *error;
+  union {
+    grpc_error *error;
+    uintptr_t scratch;
+  } error_data;
 };
 
 /** Initializes \a closure with \a cb and \a cb_arg. */
@@ -106,4 +109,10 @@ void grpc_closure_list_move(grpc_closure_list *src, grpc_closure_list *dst);
 /** return whether \a list is empty. */
 bool grpc_closure_list_empty(grpc_closure_list list);
 
+/** Run a closure directly. Caller ensures that no locks are being held above.
+ *  Note that calling this at the end of a closure callback function itself is
+ *  by definition safe. */
+void grpc_closure_run(grpc_exec_ctx *exec_ctx, grpc_closure *closure,
+                      grpc_error *error);
+
 #endif /* GRPC_CORE_LIB_IOMGR_CLOSURE_H */
diff --git a/src/core/lib/iomgr/combiner.c b/src/core/lib/iomgr/combiner.c
index 831bdb4aff..48806abc38 100644
--- a/src/core/lib/iomgr/combiner.c
+++ b/src/core/lib/iomgr/combiner.c
@@ -51,24 +51,53 @@ int grpc_combiner_trace = 0;
   } while (0)
 
 struct grpc_combiner {
+  grpc_combiner *next_combiner_on_this_exec_ctx;
   grpc_workqueue *optional_workqueue;
   gpr_mpscq queue;
   // state is:
   // lower bit - zero if orphaned
   // other bits - number of items queued on the lock
   gpr_atm state;
-  bool take_async_break_before_final_list;
+  // number of elements in the list that are covered by a poller: if >0, we can
+  // offload safely
+  gpr_atm covered_by_poller;
+  bool time_to_execute_final_list;
+  bool final_list_covered_by_poller;
   grpc_closure_list final_list;
-  grpc_closure continue_finishing;
+  grpc_closure offload;
 };
 
+static void offload(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error);
+
+typedef struct {
+  grpc_error *error;
+  bool covered_by_poller;
+} error_data;
+
+static uintptr_t pack_error_data(error_data d) {
+  return ((uintptr_t)d.error) | (d.covered_by_poller ? 1 : 0);
+}
+
+static error_data unpack_error_data(uintptr_t p) {
+  return (error_data){(grpc_error *)(p & ~(uintptr_t)1), p & 1};
+}
+
+static bool is_covered_by_poller(grpc_combiner *lock) {
+  return lock->final_list_covered_by_poller ||
+         gpr_atm_acq_load(&lock->covered_by_poller) > 0;
+}
+
 grpc_combiner *grpc_combiner_create(grpc_workqueue *optional_workqueue) {
   grpc_combiner *lock = gpr_malloc(sizeof(*lock));
+  lock->next_combiner_on_this_exec_ctx = NULL;
+  lock->time_to_execute_final_list = false;
   lock->optional_workqueue = optional_workqueue;
+  lock->final_list_covered_by_poller = false;
   gpr_atm_no_barrier_store(&lock->state, 1);
+  gpr_atm_no_barrier_store(&lock->covered_by_poller, 0);
   gpr_mpscq_init(&lock->queue);
-  lock->take_async_break_before_final_list = false;
   grpc_closure_list_init(&lock->final_list);
+  grpc_closure_init(&lock->offload, offload, lock);
   GRPC_COMBINER_TRACE(gpr_log(GPR_DEBUG, "C:%p create", lock));
   return lock;
 }
@@ -90,170 +119,176 @@ void grpc_combiner_destroy(grpc_exec_ctx *exec_ctx, grpc_combiner *lock) {
   }
 }
 
-static bool maybe_finish_one(grpc_exec_ctx *exec_ctx, grpc_combiner *lock);
-static void finish(grpc_exec_ctx *exec_ctx, grpc_combiner *lock);
+static void push_last_on_exec_ctx(grpc_exec_ctx *exec_ctx,
+                                  grpc_combiner *lock) {
+  lock->next_combiner_on_this_exec_ctx = NULL;
+  if (exec_ctx->active_combiner == NULL) {
+    exec_ctx->active_combiner = exec_ctx->last_combiner = lock;
+  } else {
+    exec_ctx->last_combiner->next_combiner_on_this_exec_ctx = lock;
+    exec_ctx->last_combiner = lock;
+  }
+}
 
-static void continue_finishing_mainline(grpc_exec_ctx *exec_ctx, void *arg,
-                                        grpc_error *error) {
-  GPR_TIMER_BEGIN("combiner.continue_executing_mainline", 0);
-  grpc_combiner *lock = arg;
-  GRPC_COMBINER_TRACE(
-      gpr_log(GPR_DEBUG, "C:%p continue_finishing_mainline", lock));
-  GPR_ASSERT(exec_ctx->active_combiner == NULL);
+static void push_first_on_exec_ctx(grpc_exec_ctx *exec_ctx,
+                                   grpc_combiner *lock) {
+  lock->next_combiner_on_this_exec_ctx = exec_ctx->active_combiner;
   exec_ctx->active_combiner = lock;
-  if (maybe_finish_one(exec_ctx, lock)) finish(exec_ctx, lock);
-  GPR_ASSERT(exec_ctx->active_combiner == lock);
-  exec_ctx->active_combiner = NULL;
-  GPR_TIMER_END("combiner.continue_executing_mainline", 0);
+  if (lock->next_combiner_on_this_exec_ctx == NULL) {
+    exec_ctx->last_combiner = lock;
+  }
 }
 
-static void execute_final(grpc_exec_ctx *exec_ctx, grpc_combiner *lock) {
-  GPR_TIMER_BEGIN("combiner.execute_final", 0);
-  grpc_closure *c = lock->final_list.head;
-  GPR_ASSERT(c != NULL);
-  grpc_closure_list_init(&lock->final_list);
-  lock->take_async_break_before_final_list = false;
-  int loops = 0;
-  while (c != NULL) {
-    GRPC_COMBINER_TRACE(
-        gpr_log(GPR_DEBUG, "C:%p execute_final[%d] c=%p", lock, loops, c));
-    grpc_closure *next = c->next_data.next;
-    grpc_error *error = c->error;
-    c->cb(exec_ctx, c->cb_arg, error);
-    GRPC_ERROR_UNREF(error);
-    c = next;
-    loops++;
+void grpc_combiner_execute(grpc_exec_ctx *exec_ctx, grpc_combiner *lock,
+                           grpc_closure *cl, grpc_error *error,
+                           bool covered_by_poller) {
+  GPR_TIMER_BEGIN("combiner.execute", 0);
+  gpr_atm last = gpr_atm_full_fetch_add(&lock->state, 2);
+  GRPC_COMBINER_TRACE(gpr_log(
+      GPR_DEBUG, "C:%p grpc_combiner_execute c=%p cov=%d last=%" PRIdPTR, lock,
+      cl, covered_by_poller, last));
+  GPR_ASSERT(last & 1);  // ensure lock has not been destroyed
+  cl->error_data.scratch =
+      pack_error_data((error_data){error, covered_by_poller});
+  if (covered_by_poller) {
+    gpr_atm_no_barrier_fetch_add(&lock->covered_by_poller, 1);
+  }
+  gpr_mpscq_push(&lock->queue, &cl->next_data.atm_next);
+  if (last == 1) {
+    // code will be written when the exec_ctx calls
+    // grpc_combiner_continue_exec_ctx
+    push_last_on_exec_ctx(exec_ctx, lock);
   }
-  GPR_TIMER_END("combiner.execute_final", 0);
+  GPR_TIMER_END("combiner.execute", 0);
 }
 
-static void continue_executing_final(grpc_exec_ctx *exec_ctx, void *arg,
-                                     grpc_error *error) {
-  GPR_TIMER_BEGIN("combiner.continue_executing_final", 0);
-  grpc_combiner *lock = arg;
-  GRPC_COMBINER_TRACE(
-      gpr_log(GPR_DEBUG, "C:%p continue_executing_final", lock));
-  GPR_ASSERT(exec_ctx->active_combiner == NULL);
-  exec_ctx->active_combiner = lock;
-  // quick peek to see if new things have turned up on the queue: if so, go back
-  // to executing them before the final list
-  if ((gpr_atm_acq_load(&lock->state) >> 1) > 1) {
-    if (maybe_finish_one(exec_ctx, lock)) finish(exec_ctx, lock);
-  } else {
-    execute_final(exec_ctx, lock);
-    finish(exec_ctx, lock);
+static void move_next(grpc_exec_ctx *exec_ctx) {
+  exec_ctx->active_combiner =
+      exec_ctx->active_combiner->next_combiner_on_this_exec_ctx;
+  if (exec_ctx->active_combiner == NULL) {
+    exec_ctx->last_combiner = NULL;
   }
-  GPR_ASSERT(exec_ctx->active_combiner == lock);
-  exec_ctx->active_combiner = NULL;
-  GPR_TIMER_END("combiner.continue_executing_final", 0);
 }
 
-static bool start_execute_final(grpc_exec_ctx *exec_ctx, grpc_combiner *lock) {
-  GPR_TIMER_BEGIN("combiner.start_execute_final", 0);
-  GPR_ASSERT(exec_ctx->active_combiner == lock);
-  GRPC_COMBINER_TRACE(
-      gpr_log(GPR_DEBUG,
-              "C:%p start_execute_final take_async_break_before_final_list=%d",
-              lock, lock->take_async_break_before_final_list));
-  if (lock->take_async_break_before_final_list) {
-    grpc_closure_init(&lock->continue_finishing, continue_executing_final,
-                      lock);
-    grpc_exec_ctx_sched(exec_ctx, &lock->continue_finishing, GRPC_ERROR_NONE,
-                        GRPC_WORKQUEUE_REF(lock->optional_workqueue, "sched"));
-    GPR_TIMER_END("combiner.start_execute_final", 0);
+static void offload(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) {
+  grpc_combiner *lock = arg;
+  push_last_on_exec_ctx(exec_ctx, lock);
+}
+
+static void queue_offload(grpc_exec_ctx *exec_ctx, grpc_combiner *lock) {
+  move_next(exec_ctx);
+  GRPC_COMBINER_TRACE(gpr_log(GPR_DEBUG, "C:%p queue_offload --> %p", lock,
+                              lock->optional_workqueue));
+  grpc_workqueue_enqueue(exec_ctx, lock->optional_workqueue, &lock->offload,
+                         GRPC_ERROR_NONE);
+}
+
+bool grpc_combiner_continue_exec_ctx(grpc_exec_ctx *exec_ctx) {
+  GPR_TIMER_BEGIN("combiner.continue_exec_ctx", 0);
+  grpc_combiner *lock = exec_ctx->active_combiner;
+  if (lock == NULL) {
+    GPR_TIMER_END("combiner.continue_exec_ctx", 0);
     return false;
-  } else {
-    execute_final(exec_ctx, lock);
-    GPR_TIMER_END("combiner.start_execute_final", 0);
-    return true;
   }
-}
 
-static bool maybe_finish_one(grpc_exec_ctx *exec_ctx, grpc_combiner *lock) {
-  GPR_TIMER_BEGIN("combiner.maybe_finish_one", 0);
-  gpr_mpscq_node *n = gpr_mpscq_pop(&lock->queue);
   GRPC_COMBINER_TRACE(
-      gpr_log(GPR_DEBUG, "C:%p maybe_finish_one n=%p", lock, n));
-  GPR_ASSERT(exec_ctx->active_combiner == lock);
-  if (n == NULL) {
-    // Queue is in an transiently inconsistent state: a new item is being queued
-    // but is not visible to this thread yet.
-    // Use this as a cue that we should go off and do something else for a while
-    // (and come back later)
-    grpc_closure_init(&lock->continue_finishing, continue_finishing_mainline,
-                      lock);
-    grpc_exec_ctx_sched(exec_ctx, &lock->continue_finishing, GRPC_ERROR_NONE,
-                        GRPC_WORKQUEUE_REF(lock->optional_workqueue, "sched"));
-    GPR_TIMER_END("combiner.maybe_finish_one", 0);
-    return false;
+      gpr_log(GPR_DEBUG,
+              "C:%p grpc_combiner_continue_exec_ctx workqueue=%p "
+              "is_covered_by_poller=%d exec_ctx_ready_to_finish=%d "
+              "time_to_execute_final_list=%d",
+              lock, lock->optional_workqueue, is_covered_by_poller(lock),
+              grpc_exec_ctx_ready_to_finish(exec_ctx),
+              lock->time_to_execute_final_list));
+
+  if (lock->optional_workqueue != NULL && is_covered_by_poller(lock) &&
+      grpc_exec_ctx_ready_to_finish(exec_ctx)) {
+    GPR_TIMER_MARK("offload_from_finished_exec_ctx", 0);
+    // this execution context wants to move on, and we have a workqueue (and
+    // so can help the execution context out): schedule remaining work to be
+    // picked up on the workqueue
+    queue_offload(exec_ctx, lock);
+    GPR_TIMER_END("combiner.continue_exec_ctx", 0);
+    return true;
   }
-  grpc_closure *cl = (grpc_closure *)n;
-  grpc_error *error = cl->error;
-  cl->cb(exec_ctx, cl->cb_arg, error);
-  GRPC_ERROR_UNREF(error);
-  GPR_TIMER_END("combiner.maybe_finish_one", 0);
-  return true;
-}
 
-static void finish(grpc_exec_ctx *exec_ctx, grpc_combiner *lock) {
-  bool (*executor)(grpc_exec_ctx * exec_ctx, grpc_combiner * lock);
-  GPR_TIMER_BEGIN("combiner.finish", 0);
-  int loops = 0;
-  do {
-    executor = maybe_finish_one;
-    gpr_atm old_state = gpr_atm_full_fetch_add(&lock->state, -2);
-    GRPC_COMBINER_TRACE(gpr_log(GPR_DEBUG,
-                                "C:%p finish[%d] old_state=%" PRIdPTR, lock,
-                                loops, old_state));
-    switch (old_state) {
-      default:
-        // we have multiple queued work items: just continue executing them
-        break;
-      case 5:  // we're down to one queued item: if it's the final list we
-      case 4:  // should do that
-        if (!grpc_closure_list_empty(lock->final_list)) {
-          executor = start_execute_final;
-        }
-        break;
-      case 3:  // had one count, one unorphaned --> unlocked unorphaned
-        GPR_TIMER_END("combiner.finish", 0);
-        return;
-      case 2:  // and one count, one orphaned --> unlocked and orphaned
-        really_destroy(exec_ctx, lock);
-        GPR_TIMER_END("combiner.finish", 0);
-        return;
-      case 1:
-      case 0:
-        // these values are illegal - representing an already unlocked or
-        // deleted lock
-        GPR_UNREACHABLE_CODE(return );
+  if (!lock->time_to_execute_final_list ||
+      // peek to see if something new has shown up, and execute that with
+      // priority
+      (gpr_atm_acq_load(&lock->state) >> 1) > 1) {
+    gpr_mpscq_node *n = gpr_mpscq_pop(&lock->queue);
+    GRPC_COMBINER_TRACE(
+        gpr_log(GPR_DEBUG, "C:%p maybe_finish_one n=%p", lock, n));
+    if (n == NULL) {
+      // queue is in an inconsistant state: use this as a cue that we should
+      // go off and do something else for a while (and come back later)
+      GPR_TIMER_MARK("delay_busy", 0);
+      if (lock->optional_workqueue != NULL && is_covered_by_poller(lock)) {
+        queue_offload(exec_ctx, lock);
+      }
+      GPR_TIMER_END("combiner.continue_exec_ctx", 0);
+      return true;
     }
-    loops++;
-  } while (executor(exec_ctx, lock));
-  GPR_TIMER_END("combiner.finish", 0);
-}
+    GPR_TIMER_BEGIN("combiner.exec1", 0);
+    grpc_closure *cl = (grpc_closure *)n;
+    error_data err = unpack_error_data(cl->error_data.scratch);
+    cl->cb(exec_ctx, cl->cb_arg, err.error);
+    if (err.covered_by_poller) {
+      gpr_atm_no_barrier_fetch_add(&lock->covered_by_poller, -1);
+    }
+    GRPC_ERROR_UNREF(err.error);
+    GPR_TIMER_END("combiner.exec1", 0);
+  } else {
+    grpc_closure *c = lock->final_list.head;
+    GPR_ASSERT(c != NULL);
+    grpc_closure_list_init(&lock->final_list);
+    lock->final_list_covered_by_poller = false;
+    int loops = 0;
+    while (c != NULL) {
+      GPR_TIMER_BEGIN("combiner.exec_1final", 0);
+      GRPC_COMBINER_TRACE(
+          gpr_log(GPR_DEBUG, "C:%p execute_final[%d] c=%p", lock, loops, c));
+      grpc_closure *next = c->next_data.next;
+      grpc_error *error = c->error_data.error;
+      c->cb(exec_ctx, c->cb_arg, error);
+      GRPC_ERROR_UNREF(error);
+      c = next;
+      GPR_TIMER_END("combiner.exec_1final", 0);
+    }
+  }
 
-void grpc_combiner_execute(grpc_exec_ctx *exec_ctx, grpc_combiner *lock,
-                           grpc_closure *cl, grpc_error *error) {
+  GPR_TIMER_MARK("unref", 0);
+  move_next(exec_ctx);
+  lock->time_to_execute_final_list = false;
+  gpr_atm old_state = gpr_atm_full_fetch_add(&lock->state, -2);
   GRPC_COMBINER_TRACE(
-      gpr_log(GPR_DEBUG, "C:%p grpc_combiner_execute c=%p", lock, cl));
-  GPR_TIMER_BEGIN("combiner.execute", 0);
-  gpr_atm last = gpr_atm_full_fetch_add(&lock->state, 2);
-  GPR_ASSERT(last & 1);  // ensure lock has not been destroyed
-  if (last == 1) {
-    exec_ctx->active_combiner = lock;
-    GPR_TIMER_BEGIN("combiner.execute_first_cb", 0);
-    cl->cb(exec_ctx, cl->cb_arg, error);
-    GPR_TIMER_END("combiner.execute_first_cb", 0);
-    GRPC_ERROR_UNREF(error);
-    finish(exec_ctx, lock);
-    GPR_ASSERT(exec_ctx->active_combiner == lock);
-    exec_ctx->active_combiner = NULL;
-  } else {
-    cl->error = error;
-    gpr_mpscq_push(&lock->queue, &cl->next_data.atm_next);
+      gpr_log(GPR_DEBUG, "C:%p finish old_state=%" PRIdPTR, lock, old_state));
+  switch (old_state) {
+    default:
+      // we have multiple queued work items: just continue executing them
+      break;
+    case 5:  // we're down to one queued item: if it's the final list we
+    case 4:  // should do that
+      if (!grpc_closure_list_empty(lock->final_list)) {
+        lock->time_to_execute_final_list = true;
+      }
+      break;
+    case 3:  // had one count, one unorphaned --> unlocked unorphaned
+      GPR_TIMER_END("combiner.continue_exec_ctx", 0);
+      return true;
+    case 2:  // and one count, one orphaned --> unlocked and orphaned
+      really_destroy(exec_ctx, lock);
+      GPR_TIMER_END("combiner.continue_exec_ctx", 0);
+      return true;
+    case 1:
+    case 0:
+      // these values are illegal - representing an already unlocked or
+      // deleted lock
+      GPR_TIMER_END("combiner.continue_exec_ctx", 0);
+      GPR_UNREACHABLE_CODE(return true);
   }
-  GPR_TIMER_END("combiner.execute", 0);
+  push_first_on_exec_ctx(exec_ctx, lock);
+  GPR_TIMER_END("combiner.continue_exec_ctx", 0);
+  return true;
 }
 
 static void enqueue_finally(grpc_exec_ctx *exec_ctx, void *closure,
@@ -264,30 +299,26 @@ static void enqueue_finally(grpc_exec_ctx *exec_ctx, void *closure,
 
 void grpc_combiner_execute_finally(grpc_exec_ctx *exec_ctx, grpc_combiner *lock,
                                    grpc_closure *closure, grpc_error *error,
-                                   bool force_async_break) {
+                                   bool covered_by_poller) {
   GRPC_COMBINER_TRACE(gpr_log(
-      GPR_DEBUG,
-      "C:%p grpc_combiner_execute_finally c=%p force_async_break=%d; ac=%p",
-      lock, closure, force_async_break, exec_ctx->active_combiner));
+      GPR_DEBUG, "C:%p grpc_combiner_execute_finally c=%p; ac=%p; cov=%d", lock,
+      closure, exec_ctx->active_combiner, covered_by_poller));
   GPR_TIMER_BEGIN("combiner.execute_finally", 0);
   if (exec_ctx->active_combiner != lock) {
     GPR_TIMER_MARK("slowpath", 0);
     grpc_combiner_execute(exec_ctx, lock,
-                          grpc_closure_create(enqueue_finally, closure), error);
+                          grpc_closure_create(enqueue_finally, closure), error,
+                          false);
     GPR_TIMER_END("combiner.execute_finally", 0);
     return;
   }
 
-  if (force_async_break) {
-    lock->take_async_break_before_final_list = true;
-  }
   if (grpc_closure_list_empty(lock->final_list)) {
     gpr_atm_full_fetch_add(&lock->state, 2);
   }
+  if (covered_by_poller) {
+    lock->final_list_covered_by_poller = true;
+  }
   grpc_closure_list_append(&lock->final_list, closure, error);
   GPR_TIMER_END("combiner.execute_finally", 0);
 }
-
-void grpc_combiner_force_async_finally(grpc_combiner *lock) {
-  lock->take_async_break_before_final_list = true;
-}
diff --git a/src/core/lib/iomgr/combiner.h b/src/core/lib/iomgr/combiner.h
index 1409db24b9..d04eeed83a 100644
--- a/src/core/lib/iomgr/combiner.h
+++ b/src/core/lib/iomgr/combiner.h
@@ -52,19 +52,14 @@ grpc_combiner *grpc_combiner_create(grpc_workqueue *optional_workqueue);
 void grpc_combiner_destroy(grpc_exec_ctx *exec_ctx, grpc_combiner *lock);
 // Execute \a action within the lock.
 void grpc_combiner_execute(grpc_exec_ctx *exec_ctx, grpc_combiner *lock,
-                           grpc_closure *closure, grpc_error *error);
+                           grpc_closure *closure, grpc_error *error,
+                           bool covered_by_poller);
 // Execute \a action within the lock just prior to unlocking.
-// if \a hint_async_break is true, the combiner tries to hand execution to
-// another thread before finishing the primary queue of combined closures and
-// executing the finally list.
-// Deprecation warning: \a hint_async_break will be removed in a future version
-// Takes a very slow and round-about path if not called from a
-// grpc_combiner_execute closure.
 void grpc_combiner_execute_finally(grpc_exec_ctx *exec_ctx, grpc_combiner *lock,
                                    grpc_closure *closure, grpc_error *error,
-                                   bool hint_async_break);
-// Deprecated: force the finally list execution onto another thread
-void grpc_combiner_force_async_finally(grpc_combiner *lock);
+                                   bool covered_by_poller);
+
+bool grpc_combiner_continue_exec_ctx(grpc_exec_ctx *exec_ctx);
 
 extern int grpc_combiner_trace;
 
diff --git a/src/core/lib/iomgr/error.c b/src/core/lib/iomgr/error.c
index e366961936..45ef75e04d 100644
--- a/src/core/lib/iomgr/error.c
+++ b/src/core/lib/iomgr/error.c
@@ -265,7 +265,7 @@ static grpc_error *copy_error_and_unref(grpc_error *in) {
   } else {
     out = gpr_malloc(sizeof(*out));
 #ifdef GRPC_ERROR_REFCOUNT_DEBUG
-    gpr_log(GPR_DEBUG, "%p create copying", out);
+    gpr_log(GPR_DEBUG, "%p create copying %p", out, in);
 #endif
     out->ints = gpr_avl_ref(in->ints);
     out->strs = gpr_avl_ref(in->strs);
diff --git a/src/core/lib/iomgr/error.h b/src/core/lib/iomgr/error.h
index 6c769accdb..2ab3ef9f40 100644
--- a/src/core/lib/iomgr/error.h
+++ b/src/core/lib/iomgr/error.h
@@ -123,9 +123,13 @@ typedef enum {
   GRPC_ERROR_TIME_CREATED,
 } grpc_error_times;
 
+/// The following "special" errors can be propagated without allocating memory.
+/// They are always even so that other code (particularly combiner locks) can
+/// safely use the lower bit for themselves.
+
 #define GRPC_ERROR_NONE ((grpc_error *)NULL)
-#define GRPC_ERROR_OOM ((grpc_error *)1)
-#define GRPC_ERROR_CANCELLED ((grpc_error *)2)
+#define GRPC_ERROR_OOM ((grpc_error *)2)
+#define GRPC_ERROR_CANCELLED ((grpc_error *)4)
 
 const char *grpc_error_string(grpc_error *error);
 void grpc_error_free_string(const char *str);
diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c
index 740920d760..42c0ae2dcd 100644
--- a/src/core/lib/iomgr/ev_epoll_linux.c
+++ b/src/core/lib/iomgr/ev_epoll_linux.c
@@ -152,14 +152,13 @@ static void fd_global_shutdown(void);
  * Polling island Declarations
  */
 
-//#define GRPC_PI_REF_COUNT_DEBUG
-#ifdef GRPC_PI_REF_COUNT_DEBUG
+#ifdef GRPC_WORKQUEUE_REFCOUNT_DEBUG
 
 #define PI_ADD_REF(p, r) pi_add_ref_dbg((p), (r), __FILE__, __LINE__)
 #define PI_UNREF(exec_ctx, p, r) \
   pi_unref_dbg((exec_ctx), (p), (r), __FILE__, __LINE__)
 
-#else /* defined(GRPC_PI_REF_COUNT_DEBUG) */
+#else /* defined(GRPC_WORKQUEUE_REFCOUNT_DEBUG) */
 
 #define PI_ADD_REF(p, r) pi_add_ref((p))
 #define PI_UNREF(exec_ctx, p, r) pi_unref((exec_ctx), (p))
@@ -185,8 +184,11 @@ typedef struct polling_island {
    * (except mu and ref_count) are invalid and must be ignored. */
   gpr_atm merged_to;
 
-  /* The workqueue associated with this polling island */
-  grpc_workqueue *workqueue;
+  gpr_atm poller_count;
+  gpr_mu workqueue_read_mu;
+  gpr_mpscq workqueue_items;
+  gpr_atm workqueue_item_count;
+  grpc_wakeup_fd workqueue_wakeup_fd;
 
   /* The fd of the underlying epoll set */
   int epoll_fd;
@@ -275,6 +277,8 @@ static bool append_error(grpc_error **composite, grpc_error *error,
    threads that woke up MUST NOT call grpc_wakeup_fd_consume_wakeup() */
 static grpc_wakeup_fd polling_island_wakeup_fd;
 
+static __thread polling_island *g_current_thread_polling_island;
+
 /* Forward declaration */
 static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi);
 
@@ -289,12 +293,12 @@ static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi);
 gpr_atm g_epoll_sync;
 #endif /* defined(GRPC_TSAN) */
 
-#ifdef GRPC_PI_REF_COUNT_DEBUG
 static void pi_add_ref(polling_island *pi);
 static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi);
 
-static void pi_add_ref_dbg(polling_island *pi, char *reason, char *file,
-                           int line) {
+#ifdef GRPC_WORKQUEUE_REFCOUNT_DEBUG
+static void pi_add_ref_dbg(polling_island *pi, const char *reason,
+                           const char *file, int line) {
   long old_cnt = gpr_atm_acq_load(&pi->ref_count);
   pi_add_ref(pi);
   gpr_log(GPR_DEBUG, "Add ref pi: %p, old: %ld -> new:%ld (%s) - (%s, %d)",
@@ -302,12 +306,42 @@ static void pi_add_ref_dbg(polling_island *pi, char *reason, char *file,
 }
 
 static void pi_unref_dbg(grpc_exec_ctx *exec_ctx, polling_island *pi,
-                         char *reason, char *file, int line) {
+                         const char *reason, const char *file, int line) {
   long old_cnt = gpr_atm_acq_load(&pi->ref_count);
   pi_unref(exec_ctx, pi);
   gpr_log(GPR_DEBUG, "Unref pi: %p, old:%ld -> new:%ld (%s) - (%s, %d)",
           (void *)pi, old_cnt, (old_cnt - 1), reason, file, line);
 }
+
+static grpc_workqueue *workqueue_ref(grpc_workqueue *workqueue,
+                                     const char *file, int line,
+                                     const char *reason) {
+  if (workqueue != NULL) {
+    pi_add_ref_dbg((polling_island *)workqueue, reason, file, line);
+  }
+  return workqueue;
+}
+
+static void workqueue_unref(grpc_exec_ctx *exec_ctx, grpc_workqueue *workqueue,
+                            const char *file, int line, const char *reason) {
+  if (workqueue != NULL) {
+    pi_unref_dbg(exec_ctx, (polling_island *)workqueue, reason, file, line);
+  }
+}
+#else
+static grpc_workqueue *workqueue_ref(grpc_workqueue *workqueue) {
+  if (workqueue != NULL) {
+    pi_add_ref((polling_island *)workqueue);
+  }
+  return workqueue;
+}
+
+static void workqueue_unref(grpc_exec_ctx *exec_ctx,
+                            grpc_workqueue *workqueue) {
+  if (workqueue != NULL) {
+    pi_unref(exec_ctx, (polling_island *)workqueue);
+  }
+}
 #endif
 
 static void pi_add_ref(polling_island *pi) {
@@ -315,10 +349,7 @@ static void pi_add_ref(polling_island *pi) {
 }
 
 static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi) {
-  /* If ref count went to one, we're back to just the workqueue owning a ref.
-     Unref the workqueue to break the loop.
-
-     If ref count went to zero, delete the polling island.
+  /* If ref count went to zero, delete the polling island.
      Note that this deletion not be done under a lock. Once the ref count goes
      to zero, we are guaranteed that no one else holds a reference to the
      polling island (and that there is no racing pi_add_ref() call either).
@@ -326,20 +357,12 @@ static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi) {
      Also, if we are deleting the polling island and the merged_to field is
      non-empty, we should remove a ref to the merged_to polling island
    */
-  switch (gpr_atm_full_fetch_add(&pi->ref_count, -1)) {
-    case 2: /* last external ref: the only one now owned is by the workqueue */
-      GRPC_WORKQUEUE_UNREF(exec_ctx, pi->workqueue, "polling_island");
-      break;
-    case 1: {
-      polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
-      polling_island_delete(exec_ctx, pi);
-      if (next != NULL) {
-        PI_UNREF(exec_ctx, next, "pi_delete"); /* Recursive call */
-      }
-      break;
+  if (1 == gpr_atm_full_fetch_add(&pi->ref_count, -1)) {
+    polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
+    polling_island_delete(exec_ctx, pi);
+    if (next != NULL) {
+      PI_UNREF(exec_ctx, next, "pi_delete"); /* Recursive call */
     }
-    case 0:
-      GPR_UNREACHABLE_CODE(return );
   }
 }
 
@@ -488,11 +511,20 @@ static polling_island *polling_island_create(grpc_exec_ctx *exec_ctx,
   pi->fd_capacity = 0;
   pi->fds = NULL;
   pi->epoll_fd = -1;
-  pi->workqueue = NULL;
+
+  gpr_mu_init(&pi->workqueue_read_mu);
+  gpr_mpscq_init(&pi->workqueue_items);
+  gpr_atm_rel_store(&pi->workqueue_item_count, 0);
 
   gpr_atm_rel_store(&pi->ref_count, 0);
+  gpr_atm_rel_store(&pi->poller_count, 0);
   gpr_atm_rel_store(&pi->merged_to, (gpr_atm)NULL);
 
+  if (!append_error(error, grpc_wakeup_fd_init(&pi->workqueue_wakeup_fd),
+                    err_desc)) {
+    goto done;
+  }
+
   pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
 
   if (pi->epoll_fd < 0) {
@@ -501,26 +533,14 @@ static polling_island *polling_island_create(grpc_exec_ctx *exec_ctx,
   }
 
   polling_island_add_wakeup_fd_locked(pi, &grpc_global_wakeup_fd, error);
+  polling_island_add_wakeup_fd_locked(pi, &pi->workqueue_wakeup_fd, error);
 
   if (initial_fd != NULL) {
     polling_island_add_fds_locked(pi, &initial_fd, 1, true, error);
   }
 
-  if (append_error(error, grpc_workqueue_create(exec_ctx, &pi->workqueue),
-                   err_desc) &&
-      *error == GRPC_ERROR_NONE) {
-    polling_island_add_fds_locked(pi, &pi->workqueue->wakeup_read_fd, 1, true,
-                                  error);
-    GPR_ASSERT(pi->workqueue->wakeup_read_fd->polling_island == NULL);
-    pi->workqueue->wakeup_read_fd->polling_island = pi;
-    PI_ADD_REF(pi, "fd");
-  }
-
 done:
   if (*error != GRPC_ERROR_NONE) {
-    if (pi->workqueue != NULL) {
-      GRPC_WORKQUEUE_UNREF(exec_ctx, pi->workqueue, "polling_island");
-    }
     polling_island_delete(exec_ctx, pi);
     pi = NULL;
   }
@@ -533,7 +553,11 @@ static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi) {
   if (pi->epoll_fd >= 0) {
     close(pi->epoll_fd);
   }
+  GPR_ASSERT(gpr_atm_no_barrier_load(&pi->workqueue_item_count) == 0);
+  gpr_mu_destroy(&pi->workqueue_read_mu);
+  gpr_mpscq_destroy(&pi->workqueue_items);
   gpr_mu_destroy(&pi->mu);
+  grpc_wakeup_fd_destroy(&pi->workqueue_wakeup_fd);
   gpr_free(pi->fds);
   gpr_free(pi);
 }
@@ -678,6 +702,40 @@ static void polling_island_unlock_pair(polling_island *p, polling_island *q) {
   }
 }
 
+static void workqueue_maybe_wakeup(polling_island *pi) {
+  bool force_wakeup = false;
+  bool is_current_poller = (g_current_thread_polling_island == pi);
+  gpr_atm min_current_pollers_for_wakeup = is_current_poller ? 1 : 0;
+  gpr_atm current_pollers = gpr_atm_no_barrier_load(&pi->poller_count);
+  if (force_wakeup || current_pollers > min_current_pollers_for_wakeup) {
+    GRPC_LOG_IF_ERROR("workqueue_wakeup_fd",
+                      grpc_wakeup_fd_wakeup(&pi->workqueue_wakeup_fd));
+  }
+}
+
+static void workqueue_move_items_to_parent(polling_island *q) {
+  polling_island *p = (polling_island *)gpr_atm_no_barrier_load(&q->merged_to);
+  if (p == NULL) {
+    return;
+  }
+  gpr_mu_lock(&q->workqueue_read_mu);
+  int num_added = 0;
+  while (gpr_atm_no_barrier_load(&q->workqueue_item_count) > 0) {
+    gpr_mpscq_node *n = gpr_mpscq_pop(&q->workqueue_items);
+    if (n != NULL) {
+      gpr_atm_no_barrier_fetch_add(&q->workqueue_item_count, -1);
+      gpr_atm_no_barrier_fetch_add(&p->workqueue_item_count, 1);
+      gpr_mpscq_push(&p->workqueue_items, n);
+      num_added++;
+    }
+  }
+  gpr_mu_unlock(&q->workqueue_read_mu);
+  if (num_added > 0) {
+    workqueue_maybe_wakeup(p);
+  }
+  workqueue_move_items_to_parent(p);
+}
+
 static polling_island *polling_island_merge(polling_island *p,
                                             polling_island *q,
                                             grpc_error **error) {
@@ -702,6 +760,8 @@ static polling_island *polling_island_merge(polling_island *p,
     /* Add the 'merged_to' link from p --> q */
     gpr_atm_rel_store(&p->merged_to, (gpr_atm)q);
     PI_ADD_REF(q, "pi_merge"); /* To account for the new incoming ref from p */
+
+    workqueue_move_items_to_parent(q);
   }
   /* else if p == q, nothing needs to be done */
 
@@ -712,6 +772,26 @@ static polling_island *polling_island_merge(polling_island *p,
   return q;
 }
 
+static void workqueue_enqueue(grpc_exec_ctx *exec_ctx,
+                              grpc_workqueue *workqueue, grpc_closure *closure,
+                              grpc_error *error) {
+  GPR_TIMER_BEGIN("workqueue.enqueue", 0);
+  /* take a ref to the workqueue: otherwise it can happen that whatever events
+   * this kicks off ends up destroying the workqueue before this function
+   * completes */
+  GRPC_WORKQUEUE_REF(workqueue, "enqueue");
+  polling_island *pi = (polling_island *)workqueue;
+  gpr_atm last = gpr_atm_no_barrier_fetch_add(&pi->workqueue_item_count, 1);
+  closure->error_data.error = error;
+  gpr_mpscq_push(&pi->workqueue_items, &closure->next_data.atm_next);
+  if (last == 0) {
+    workqueue_maybe_wakeup(pi);
+  }
+  workqueue_move_items_to_parent(pi);
+  GRPC_WORKQUEUE_UNREF(exec_ctx, workqueue, "enqueue");
+  GPR_TIMER_END("workqueue.enqueue", 0);
+}
+
 static grpc_error *polling_island_global_init() {
   grpc_error *error = GRPC_ERROR_NONE;
 
@@ -1042,11 +1122,8 @@ static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
 
 static grpc_workqueue *fd_get_workqueue(grpc_fd *fd) {
   gpr_mu_lock(&fd->mu);
-  grpc_workqueue *workqueue = NULL;
-  if (fd->polling_island != NULL) {
-    workqueue =
-        GRPC_WORKQUEUE_REF(fd->polling_island->workqueue, "get_workqueue");
-  }
+  grpc_workqueue *workqueue = GRPC_WORKQUEUE_REF(
+      (grpc_workqueue *)fd->polling_island, "fd_get_workqueue");
   gpr_mu_unlock(&fd->mu);
   return workqueue;
 }
@@ -1299,7 +1376,26 @@ static void pollset_reset(grpc_pollset *pollset) {
   GPR_ASSERT(pollset->polling_island == NULL);
 }
 
-#define GRPC_EPOLL_MAX_EVENTS 1000
+static bool maybe_do_workqueue_work(grpc_exec_ctx *exec_ctx,
+                                    polling_island *pi) {
+  if (gpr_mu_trylock(&pi->workqueue_read_mu)) {
+    gpr_mpscq_node *n = gpr_mpscq_pop(&pi->workqueue_items);
+    gpr_mu_unlock(&pi->workqueue_read_mu);
+    if (n != NULL) {
+      if (gpr_atm_full_fetch_add(&pi->workqueue_item_count, -1) > 1) {
+        workqueue_maybe_wakeup(pi);
+      }
+      grpc_closure *c = (grpc_closure *)n;
+      grpc_closure_run(exec_ctx, c, c->error_data.error);
+      return true;
+    } else if (gpr_atm_no_barrier_load(&pi->workqueue_item_count) > 0) {
+      workqueue_maybe_wakeup(pi);
+    }
+  }
+  return false;
+}
+
+#define GRPC_EPOLL_MAX_EVENTS 100
 /* Note: sig_mask contains the signal mask to use *during* epoll_wait() */
 static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx,
                                     grpc_pollset *pollset,
@@ -1354,7 +1450,10 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx,
   PI_ADD_REF(pi, "ps_work");
   gpr_mu_unlock(&pollset->mu);
 
-  do {
+  if (!maybe_do_workqueue_work(exec_ctx, pi)) {
+    gpr_atm_no_barrier_fetch_add(&pi->poller_count, 1);
+    g_current_thread_polling_island = pi;
+
     GRPC_SCHEDULING_START_BLOCKING_REGION;
     ep_rv = epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms,
                         sig_mask);
@@ -1386,6 +1485,11 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx,
         append_error(error,
                      grpc_wakeup_fd_consume_wakeup(&grpc_global_wakeup_fd),
                      err_desc);
+      } else if (data_ptr == &pi->workqueue_wakeup_fd) {
+        append_error(error,
+                     grpc_wakeup_fd_consume_wakeup(&grpc_global_wakeup_fd),
+                     err_desc);
+        maybe_do_workqueue_work(exec_ctx, pi);
       } else if (data_ptr == &polling_island_wakeup_fd) {
         GRPC_POLLING_TRACE(
             "pollset_work: pollset: %p, worker: %p polling island (epoll_fd: "
@@ -1408,7 +1512,10 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx,
         }
       }
     }
-  } while (ep_rv == GRPC_EPOLL_MAX_EVENTS);
+
+    g_current_thread_polling_island = NULL;
+    gpr_atm_no_barrier_fetch_add(&pi->poller_count, -1);
+  }
 
   GPR_ASSERT(pi != NULL);
 
@@ -1868,6 +1975,10 @@ static const grpc_event_engine_vtable vtable = {
 
     .kick_poller = kick_poller,
 
+    .workqueue_ref = workqueue_ref,
+    .workqueue_unref = workqueue_unref,
+    .workqueue_enqueue = workqueue_enqueue,
+
     .shutdown_engine = shutdown_engine,
 };
 
diff --git a/src/core/lib/iomgr/ev_poll_and_epoll_posix.c b/src/core/lib/iomgr/ev_poll_and_epoll_posix.c
index c2107e5e39..1829440a6e 100644
--- a/src/core/lib/iomgr/ev_poll_and_epoll_posix.c
+++ b/src/core/lib/iomgr/ev_poll_and_epoll_posix.c
@@ -1989,6 +1989,32 @@ static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx,
 }
 
 /*******************************************************************************
+ * workqueue stubs
+ */
+
+#ifdef GRPC_WORKQUEUE_REFCOUNT_DEBUG
+static grpc_workqueue *workqueue_ref(grpc_workqueue *workqueue,
+                                     const char *file, int line,
+                                     const char *reason) {
+  return workqueue;
+}
+static void workqueue_unref(grpc_exec_ctx *exec_ctx, grpc_workqueue *workqueue,
+                            const char *file, int line, const char *reason) {}
+#else
+static grpc_workqueue *workqueue_ref(grpc_workqueue *workqueue) {
+  return workqueue;
+}
+static void workqueue_unref(grpc_exec_ctx *exec_ctx,
+                            grpc_workqueue *workqueue) {}
+#endif
+
+static void workqueue_enqueue(grpc_exec_ctx *exec_ctx,
+                              grpc_workqueue *workqueue, grpc_closure *closure,
+                              grpc_error *error) {
+  grpc_exec_ctx_sched(exec_ctx, closure, error, NULL);
+}
+
+/*******************************************************************************
  * event engine binding
  */
 
@@ -2029,6 +2055,10 @@ static const grpc_event_engine_vtable vtable = {
 
     .kick_poller = kick_poller,
 
+    .workqueue_ref = workqueue_ref,
+    .workqueue_unref = workqueue_unref,
+    .workqueue_enqueue = workqueue_enqueue,
+
     .shutdown_engine = shutdown_engine,
 };
 
diff --git a/src/core/lib/iomgr/ev_poll_posix.c b/src/core/lib/iomgr/ev_poll_posix.c
index 16a5e3083e..b84a56018f 100644
--- a/src/core/lib/iomgr/ev_poll_posix.c
+++ b/src/core/lib/iomgr/ev_poll_posix.c
@@ -1236,6 +1236,32 @@ static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx,
 }
 
 /*******************************************************************************
+ * workqueue stubs
+ */
+
+#ifdef GRPC_WORKQUEUE_REFCOUNT_DEBUG
+static grpc_workqueue *workqueue_ref(grpc_workqueue *workqueue,
+                                     const char *file, int line,
+                                     const char *reason) {
+  return workqueue;
+}
+static void workqueue_unref(grpc_exec_ctx *exec_ctx, grpc_workqueue *workqueue,
+                            const char *file, int line, const char *reason) {}
+#else
+static grpc_workqueue *workqueue_ref(grpc_workqueue *workqueue) {
+  return workqueue;
+}
+static void workqueue_unref(grpc_exec_ctx *exec_ctx,
+                            grpc_workqueue *workqueue) {}
+#endif
+
+static void workqueue_enqueue(grpc_exec_ctx *exec_ctx,
+                              grpc_workqueue *workqueue, grpc_closure *closure,
+                              grpc_error *error) {
+  grpc_exec_ctx_sched(exec_ctx, closure, error, NULL);
+}
+
+/*******************************************************************************
  * event engine binding
  */
 
@@ -1273,6 +1299,10 @@ static const grpc_event_engine_vtable vtable = {
 
     .kick_poller = kick_poller,
 
+    .workqueue_ref = workqueue_ref,
+    .workqueue_unref = workqueue_unref,
+    .workqueue_enqueue = workqueue_enqueue,
+
     .shutdown_engine = shutdown_engine,
 };
 
diff --git a/src/core/lib/iomgr/ev_posix.c b/src/core/lib/iomgr/ev_posix.c
index 6536672685..26618f8d55 100644
--- a/src/core/lib/iomgr/ev_posix.c
+++ b/src/core/lib/iomgr/ev_posix.c
@@ -258,4 +258,27 @@ void grpc_pollset_set_del_fd(grpc_exec_ctx *exec_ctx,
 
 grpc_error *grpc_kick_poller(void) { return g_event_engine->kick_poller(); }
 
+#ifdef GRPC_WORKQUEUE_REFCOUNT_DEBUG
+grpc_workqueue *grpc_workqueue_ref(grpc_workqueue *workqueue, const char *file,
+                                   int line, const char *reason) {
+  return g_event_engine->workqueue_ref(workqueue, file, line, reason);
+}
+void grpc_workqueue_unref(grpc_exec_ctx *exec_ctx, grpc_workqueue *workqueue,
+                          const char *file, int line, const char *reason) {
+  g_event_engine->workqueue_unref(exec_ctx, workqueue, file, line, reason);
+}
+#else
+grpc_workqueue *grpc_workqueue_ref(grpc_workqueue *workqueue) {
+  return g_event_engine->workqueue_ref(workqueue);
+}
+void grpc_workqueue_unref(grpc_exec_ctx *exec_ctx, grpc_workqueue *workqueue) {
+  g_event_engine->workqueue_unref(exec_ctx, workqueue);
+}
+#endif
+
+void grpc_workqueue_enqueue(grpc_exec_ctx *exec_ctx, grpc_workqueue *workqueue,
+                            grpc_closure *closure, grpc_error *error) {
+  g_event_engine->workqueue_enqueue(exec_ctx, workqueue, closure, error);
+}
+
 #endif  // GPR_POSIX_SOCKET
diff --git a/src/core/lib/iomgr/ev_posix.h b/src/core/lib/iomgr/ev_posix.h
index c2aa1756ea..2fdef06838 100644
--- a/src/core/lib/iomgr/ev_posix.h
+++ b/src/core/lib/iomgr/ev_posix.h
@@ -40,6 +40,7 @@
 #include "src/core/lib/iomgr/pollset.h"
 #include "src/core/lib/iomgr/pollset_set.h"
 #include "src/core/lib/iomgr/wakeup_fd_posix.h"
+#include "src/core/lib/iomgr/workqueue.h"
 
 typedef struct grpc_fd grpc_fd;
 
@@ -95,6 +96,18 @@ typedef struct grpc_event_engine_vtable {
   grpc_error *(*kick_poller)(void);
 
   void (*shutdown_engine)(void);
+
+#ifdef GRPC_WORKQUEUE_REFCOUNT_DEBUG
+  grpc_workqueue *(*workqueue_ref)(grpc_workqueue *workqueue, const char *file,
+                                   int line, const char *reason);
+  void (*workqueue_unref)(grpc_exec_ctx *exec_ctx, grpc_workqueue *workqueue,
+                          const char *file, int line, const char *reason);
+#else
+  grpc_workqueue *(*workqueue_ref)(grpc_workqueue *workqueue);
+  void (*workqueue_unref)(grpc_exec_ctx *exec_ctx, grpc_workqueue *workqueue);
+#endif
+  void (*workqueue_enqueue)(grpc_exec_ctx *exec_ctx, grpc_workqueue *workqueue,
+                            grpc_closure *closure, grpc_error *error);
 } grpc_event_engine_vtable;
 
 void grpc_event_engine_init(void);
diff --git a/src/core/lib/iomgr/exec_ctx.c b/src/core/lib/iomgr/exec_ctx.c
index ac7785ec13..a3c40e8092 100644
--- a/src/core/lib/iomgr/exec_ctx.c
+++ b/src/core/lib/iomgr/exec_ctx.c
@@ -37,6 +37,7 @@
 #include <grpc/support/sync.h>
 #include <grpc/support/thd.h>
 
+#include "src/core/lib/iomgr/combiner.h"
 #include "src/core/lib/iomgr/workqueue.h"
 #include "src/core/lib/profiling/timers.h"
 
@@ -60,18 +61,47 @@ bool grpc_always_ready_to_finish(grpc_exec_ctx *exec_ctx, void *arg_ignored) {
 bool grpc_exec_ctx_flush(grpc_exec_ctx *exec_ctx) {
   bool did_something = 0;
   GPR_TIMER_BEGIN("grpc_exec_ctx_flush", 0);
-  while (!grpc_closure_list_empty(exec_ctx->closure_list)) {
-    grpc_closure *c = exec_ctx->closure_list.head;
-    exec_ctx->closure_list.head = exec_ctx->closure_list.tail = NULL;
-    while (c != NULL) {
-      grpc_closure *next = c->next_data.next;
-      grpc_error *error = c->error;
-      did_something = true;
-      GPR_TIMER_BEGIN("grpc_exec_ctx_flush.cb", 0);
+  for (;;) {
+    if (!grpc_closure_list_empty(exec_ctx->closure_list)) {
+      grpc_closure *c = exec_ctx->closure_list.head;
+      exec_ctx->closure_list.head = exec_ctx->closure_list.tail = NULL;
+      while (c != NULL) {
+        grpc_closure *next = c->next_data.next;
+        did_something = true;
+        grpc_closure_run(exec_ctx, c, c->error_data.error);
+        c = next;
+      }
+      continue;
+    }
+    if (grpc_combiner_continue_exec_ctx(exec_ctx)) {
+      continue;
+    }
+    break;
+  }
+  GPR_ASSERT(exec_ctx->active_combiner == NULL);
+  if (exec_ctx->stealing_from_workqueue != NULL) {
+    if (grpc_exec_ctx_ready_to_finish(exec_ctx)) {
+      grpc_workqueue_enqueue(exec_ctx, exec_ctx->stealing_from_workqueue,
+                             exec_ctx->stolen_closure,
+                             exec_ctx->stolen_closure->error_data.error);
+      GRPC_WORKQUEUE_UNREF(exec_ctx, exec_ctx->stealing_from_workqueue,
+                           "exec_ctx_sched");
+      exec_ctx->stealing_from_workqueue = NULL;
+      exec_ctx->stolen_closure = NULL;
+    } else {
+      grpc_closure *c = exec_ctx->stolen_closure;
+      GRPC_WORKQUEUE_UNREF(exec_ctx, exec_ctx->stealing_from_workqueue,
+                           "exec_ctx_sched");
+      exec_ctx->stealing_from_workqueue = NULL;
+      exec_ctx->stolen_closure = NULL;
+      grpc_error *error = c->error_data.error;
+      GPR_TIMER_BEGIN("grpc_exec_ctx_flush.stolen_cb", 0);
       c->cb(exec_ctx, c->cb_arg, error);
       GRPC_ERROR_UNREF(error);
-      GPR_TIMER_END("grpc_exec_ctx_flush.cb", 0);
-      c = next;
+      GPR_TIMER_END("grpc_exec_ctx_flush.stolen_cb", 0);
+      grpc_exec_ctx_flush(exec_ctx);
+      GPR_TIMER_END("grpc_exec_ctx_flush", 0);
+      return true;
     }
   }
   GPR_TIMER_END("grpc_exec_ctx_flush", 0);
@@ -86,12 +116,25 @@ void grpc_exec_ctx_finish(grpc_exec_ctx *exec_ctx) {
 void grpc_exec_ctx_sched(grpc_exec_ctx *exec_ctx, grpc_closure *closure,
                          grpc_error *error,
                          grpc_workqueue *offload_target_or_null) {
+  GPR_TIMER_BEGIN("grpc_exec_ctx_sched", 0);
   if (offload_target_or_null == NULL) {
     grpc_closure_list_append(&exec_ctx->closure_list, closure, error);
-  } else {
+  } else if (exec_ctx->stealing_from_workqueue == NULL) {
+    exec_ctx->stealing_from_workqueue = offload_target_or_null;
+    closure->error_data.error = error;
+    exec_ctx->stolen_closure = closure;
+  } else if (exec_ctx->stealing_from_workqueue != offload_target_or_null) {
     grpc_workqueue_enqueue(exec_ctx, offload_target_or_null, closure, error);
     GRPC_WORKQUEUE_UNREF(exec_ctx, offload_target_or_null, "exec_ctx_sched");
+  } else { /* stealing_from_workqueue == offload_target_or_null */
+    grpc_workqueue_enqueue(exec_ctx, offload_target_or_null,
+                           exec_ctx->stolen_closure,
+                           exec_ctx->stolen_closure->error_data.error);
+    closure->error_data.error = error;
+    exec_ctx->stolen_closure = closure;
+    GRPC_WORKQUEUE_UNREF(exec_ctx, offload_target_or_null, "exec_ctx_sched");
   }
+  GPR_TIMER_END("grpc_exec_ctx_sched", 0);
 }
 
 void grpc_exec_ctx_enqueue_list(grpc_exec_ctx *exec_ctx,
diff --git a/src/core/lib/iomgr/exec_ctx.h b/src/core/lib/iomgr/exec_ctx.h
index 1895ee6245..91029f5fba 100644
--- a/src/core/lib/iomgr/exec_ctx.h
+++ b/src/core/lib/iomgr/exec_ctx.h
@@ -66,15 +66,23 @@ typedef struct grpc_combiner grpc_combiner;
  */
 struct grpc_exec_ctx {
   grpc_closure_list closure_list;
+  grpc_workqueue *stealing_from_workqueue;
+  grpc_closure *stolen_closure;
   /** currently active combiner: updated only via combiner.c */
   grpc_combiner *active_combiner;
+  grpc_combiner *last_combiner;
   bool cached_ready_to_finish;
   void *check_ready_to_finish_arg;
   bool (*check_ready_to_finish)(grpc_exec_ctx *exec_ctx, void *arg);
 };
 
+/* initializer for grpc_exec_ctx:
+   prefer to use GRPC_EXEC_CTX_INIT whenever possible */
 #define GRPC_EXEC_CTX_INIT_WITH_FINISH_CHECK(finish_check, finish_check_arg) \
-  { GRPC_CLOSURE_LIST_INIT, NULL, false, finish_check_arg, finish_check }
+  {                                                                          \
+    GRPC_CLOSURE_LIST_INIT, NULL, NULL, NULL, NULL, false, finish_check_arg, \
+        finish_check                                                         \
+  }
 #else
 struct grpc_exec_ctx {
   bool cached_ready_to_finish;
@@ -85,8 +93,10 @@ struct grpc_exec_ctx {
   { false, finish_check_arg, finish_check }
 #endif
 
+/* initialize an execution context at the top level of an API call into grpc
+   (this is safe to use elsewhere, though possibly not as efficient) */
 #define GRPC_EXEC_CTX_INIT \
-  GRPC_EXEC_CTX_INIT_WITH_FINISH_CHECK(grpc_never_ready_to_finish, NULL)
+  GRPC_EXEC_CTX_INIT_WITH_FINISH_CHECK(grpc_always_ready_to_finish, NULL)
 
 /** Flush any work that has been enqueued onto this grpc_exec_ctx.
  *  Caller must guarantee that no interfering locks are held.
diff --git a/src/core/lib/iomgr/iomgr.c b/src/core/lib/iomgr/iomgr.c
index d67d388b8c..4fd83e0b22 100644
--- a/src/core/lib/iomgr/iomgr.c
+++ b/src/core/lib/iomgr/iomgr.c
@@ -112,6 +112,14 @@ void grpc_iomgr_shutdown(void) {
       continue;
     }
     if (g_root_object.next != &g_root_object) {
+      if (grpc_iomgr_abort_on_leaks()) {
+        gpr_log(GPR_DEBUG, "Failed to free %" PRIuPTR
+                           " iomgr objects before shutdown deadline: "
+                           "memory leaks are likely",
+                count_objects());
+        dump_objects("LEAKED");
+        abort();
+      }
       gpr_timespec short_deadline = gpr_time_add(
           gpr_now(GPR_CLOCK_REALTIME), gpr_time_from_millis(100, GPR_TIMESPAN));
       if (gpr_cv_wait(&g_rcv, &g_mu, short_deadline)) {
@@ -122,9 +130,6 @@ void grpc_iomgr_shutdown(void) {
                                "memory leaks are likely",
                     count_objects());
             dump_objects("LEAKED");
-            if (grpc_iomgr_abort_on_leaks()) {
-              abort();
-            }
           }
           break;
         }
diff --git a/src/core/lib/iomgr/tcp_posix.c b/src/core/lib/iomgr/tcp_posix.c
index 92767721d5..00fd77679a 100644
--- a/src/core/lib/iomgr/tcp_posix.c
+++ b/src/core/lib/iomgr/tcp_posix.c
@@ -177,7 +177,7 @@ static void call_read_cb(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp,
 
   tcp->read_cb = NULL;
   tcp->incoming_buffer = NULL;
-  grpc_exec_ctx_sched(exec_ctx, cb, error, NULL);
+  grpc_closure_run(exec_ctx, cb, error);
 }
 
 #define MAX_READ_IOVEC 4
@@ -209,11 +209,11 @@ static void tcp_continue_read(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) {
   msg.msg_controllen = 0;
   msg.msg_flags = 0;
 
-  GPR_TIMER_BEGIN("recvmsg", 1);
+  GPR_TIMER_BEGIN("recvmsg", 0);
   do {
     read_bytes = recvmsg(tcp->fd, &msg, 0);
   } while (read_bytes < 0 && errno == EINTR);
-  GPR_TIMER_END("recvmsg", 0);
+  GPR_TIMER_END("recvmsg", read_bytes >= 0);
 
   if (read_bytes < 0) {
     /* NB: After calling call_read_cb a parallel call of the read handler may
@@ -392,11 +392,8 @@ static void tcp_handle_write(grpc_exec_ctx *exec_ctx, void *arg /* grpc_tcp */,
       grpc_error_free_string(str);
     }
 
-    GPR_TIMER_BEGIN("tcp_handle_write.cb", 0);
-    cb->cb(exec_ctx, cb->cb_arg, error);
-    GPR_TIMER_END("tcp_handle_write.cb", 0);
+    grpc_closure_run(exec_ctx, cb, error);
     TCP_UNREF(exec_ctx, tcp, "write");
-    GRPC_ERROR_UNREF(error);
   }
 }
 
diff --git a/src/core/lib/iomgr/workqueue.h b/src/core/lib/iomgr/workqueue.h
index b2805dc66c..5b96d1d851 100644
--- a/src/core/lib/iomgr/workqueue.h
+++ b/src/core/lib/iomgr/workqueue.h
@@ -40,10 +40,6 @@
 #include "src/core/lib/iomgr/pollset.h"
 #include "src/core/lib/iomgr/pollset_set.h"
 
-#ifdef GPR_POSIX_SOCKET
-#include "src/core/lib/iomgr/workqueue_posix.h"
-#endif
-
 #ifdef GPR_WINDOWS
 #include "src/core/lib/iomgr/workqueue_windows.h"
 #endif
@@ -58,20 +54,20 @@
    string will be printed alongside the refcount. When it is not defined, the
    string will be discarded at compilation time. */
 
-//#define GRPC_WORKQUEUE_REFCOUNT_DEBUG
+/*#define GRPC_WORKQUEUE_REFCOUNT_DEBUG*/
 #ifdef GRPC_WORKQUEUE_REFCOUNT_DEBUG
 #define GRPC_WORKQUEUE_REF(p, r) \
-  (grpc_workqueue_ref((p), __FILE__, __LINE__, (r)), (p))
+  grpc_workqueue_ref((p), __FILE__, __LINE__, (r))
 #define GRPC_WORKQUEUE_UNREF(exec_ctx, p, r) \
   grpc_workqueue_unref((exec_ctx), (p), __FILE__, __LINE__, (r))
-void grpc_workqueue_ref(grpc_workqueue *workqueue, const char *file, int line,
-                        const char *reason);
+grpc_workqueue *grpc_workqueue_ref(grpc_workqueue *workqueue, const char *file,
+                                   int line, const char *reason);
 void grpc_workqueue_unref(grpc_exec_ctx *exec_ctx, grpc_workqueue *workqueue,
                           const char *file, int line, const char *reason);
 #else
-#define GRPC_WORKQUEUE_REF(p, r) (grpc_workqueue_ref((p)), (p))
+#define GRPC_WORKQUEUE_REF(p, r) grpc_workqueue_ref((p))
 #define GRPC_WORKQUEUE_UNREF(cl, p, r) grpc_workqueue_unref((cl), (p))
-void grpc_workqueue_ref(grpc_workqueue *workqueue);
+grpc_workqueue *grpc_workqueue_ref(grpc_workqueue *workqueue);
 void grpc_workqueue_unref(grpc_exec_ctx *exec_ctx, grpc_workqueue *workqueue);
 #endif
 
diff --git a/src/core/lib/iomgr/workqueue_posix.c b/src/core/lib/iomgr/workqueue_posix.c
deleted file mode 100644
index ecfea68f56..0000000000
--- a/src/core/lib/iomgr/workqueue_posix.c
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- *
- * Copyright 2015, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <grpc/support/port_platform.h>
-
-#ifdef GPR_POSIX_SOCKET
-
-#include "src/core/lib/iomgr/workqueue.h"
-
-#include <stdio.h>
-
-#include <grpc/support/alloc.h>
-#include <grpc/support/log.h>
-#include <grpc/support/useful.h>
-
-#include "src/core/lib/iomgr/ev_posix.h"
-#include "src/core/lib/profiling/timers.h"
-
-static void on_readable(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error);
-
-grpc_error *grpc_workqueue_create(grpc_exec_ctx *exec_ctx,
-                                  grpc_workqueue **workqueue) {
-  char name[32];
-  *workqueue = gpr_malloc(sizeof(grpc_workqueue));
-  gpr_ref_init(&(*workqueue)->refs, 1);
-  gpr_atm_no_barrier_store(&(*workqueue)->state, 1);
-  grpc_error *err = grpc_wakeup_fd_init(&(*workqueue)->wakeup_fd);
-  if (err != GRPC_ERROR_NONE) {
-    gpr_free(*workqueue);
-    return err;
-  }
-  sprintf(name, "workqueue:%p", (void *)(*workqueue));
-  (*workqueue)->wakeup_read_fd = grpc_fd_create(
-      GRPC_WAKEUP_FD_GET_READ_FD(&(*workqueue)->wakeup_fd), name);
-  gpr_mpscq_init(&(*workqueue)->queue);
-  grpc_closure_init(&(*workqueue)->read_closure, on_readable, *workqueue);
-  grpc_fd_notify_on_read(exec_ctx, (*workqueue)->wakeup_read_fd,
-                         &(*workqueue)->read_closure);
-  return GRPC_ERROR_NONE;
-}
-
-static void workqueue_destroy(grpc_exec_ctx *exec_ctx,
-                              grpc_workqueue *workqueue) {
-  grpc_fd_shutdown(exec_ctx, workqueue->wakeup_read_fd);
-}
-
-static void workqueue_orphan(grpc_exec_ctx *exec_ctx,
-                             grpc_workqueue *workqueue) {
-  if (gpr_atm_full_fetch_add(&workqueue->state, -1) == 1) {
-    workqueue_destroy(exec_ctx, workqueue);
-  }
-}
-
-#ifdef GRPC_WORKQUEUE_REFCOUNT_DEBUG
-void grpc_workqueue_ref(grpc_workqueue *workqueue, const char *file, int line,
-                        const char *reason) {
-  if (workqueue == NULL) return;
-  gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG, "WORKQUEUE:%p   ref %d -> %d %s",
-          workqueue, (int)workqueue->refs.count, (int)workqueue->refs.count + 1,
-          reason);
-  gpr_ref(&workqueue->refs);
-}
-#else
-void grpc_workqueue_ref(grpc_workqueue *workqueue) {
-  if (workqueue == NULL) return;
-  gpr_ref(&workqueue->refs);
-}
-#endif
-
-#ifdef GRPC_WORKQUEUE_REFCOUNT_DEBUG
-void grpc_workqueue_unref(grpc_exec_ctx *exec_ctx, grpc_workqueue *workqueue,
-                          const char *file, int line, const char *reason) {
-  if (workqueue == NULL) return;
-  gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG, "WORKQUEUE:%p unref %d -> %d %s",
-          workqueue, (int)workqueue->refs.count, (int)workqueue->refs.count - 1,
-          reason);
-  if (gpr_unref(&workqueue->refs)) {
-    workqueue_orphan(exec_ctx, workqueue);
-  }
-}
-#else
-void grpc_workqueue_unref(grpc_exec_ctx *exec_ctx, grpc_workqueue *workqueue) {
-  if (workqueue == NULL) return;
-  if (gpr_unref(&workqueue->refs)) {
-    workqueue_orphan(exec_ctx, workqueue);
-  }
-}
-#endif
-
-static void drain(grpc_exec_ctx *exec_ctx, grpc_workqueue *workqueue) {
-  abort();
-}
-
-static void wakeup(grpc_exec_ctx *exec_ctx, grpc_workqueue *workqueue) {
-  GPR_TIMER_MARK("workqueue.wakeup", 0);
-  grpc_error *err = grpc_wakeup_fd_wakeup(&workqueue->wakeup_fd);
-  if (!GRPC_LOG_IF_ERROR("wakeupfd_wakeup", err)) {
-    drain(exec_ctx, workqueue);
-  }
-}
-
-static void on_readable(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) {
-  GPR_TIMER_BEGIN("workqueue.on_readable", 0);
-
-  grpc_workqueue *workqueue = arg;
-
-  if (error != GRPC_ERROR_NONE) {
-    /* HACK: let wakeup_fd code know that we stole the fd */
-    workqueue->wakeup_fd.read_fd = 0;
-    grpc_wakeup_fd_destroy(&workqueue->wakeup_fd);
-    grpc_fd_orphan(exec_ctx, workqueue->wakeup_read_fd, NULL, NULL, "destroy");
-    GPR_ASSERT(gpr_atm_no_barrier_load(&workqueue->state) == 0);
-    gpr_free(workqueue);
-  } else {
-    error = grpc_wakeup_fd_consume_wakeup(&workqueue->wakeup_fd);
-    gpr_mpscq_node *n = gpr_mpscq_pop(&workqueue->queue);
-    if (error == GRPC_ERROR_NONE) {
-      grpc_fd_notify_on_read(exec_ctx, workqueue->wakeup_read_fd,
-                             &workqueue->read_closure);
-    } else {
-      /* recurse to get error handling */
-      on_readable(exec_ctx, arg, error);
-    }
-    if (n == NULL) {
-      /* try again - queue in an inconsistant state */
-      wakeup(exec_ctx, workqueue);
-    } else {
-      switch (gpr_atm_full_fetch_add(&workqueue->state, -2)) {
-        case 3:  // had one count, one unorphaned --> done, unorphaned
-          break;
-        case 2:  // had one count, one orphaned --> done, orphaned
-          workqueue_destroy(exec_ctx, workqueue);
-          break;
-        case 1:
-        case 0:
-          // these values are illegal - representing an already done or
-          // deleted workqueue
-          GPR_UNREACHABLE_CODE(break);
-        default:
-          // schedule a wakeup since there's more to do
-          wakeup(exec_ctx, workqueue);
-      }
-      grpc_closure *cl = (grpc_closure *)n;
-      grpc_error *clerr = cl->error;
-      cl->cb(exec_ctx, cl->cb_arg, clerr);
-      GRPC_ERROR_UNREF(clerr);
-    }
-  }
-
-  GPR_TIMER_END("workqueue.on_readable", 0);
-}
-
-void grpc_workqueue_enqueue(grpc_exec_ctx *exec_ctx, grpc_workqueue *workqueue,
-                            grpc_closure *closure, grpc_error *error) {
-  GPR_TIMER_BEGIN("workqueue.enqueue", 0);
-  gpr_atm last = gpr_atm_full_fetch_add(&workqueue->state, 2);
-  GPR_ASSERT(last & 1);
-  closure->error = error;
-  gpr_mpscq_push(&workqueue->queue, &closure->next_data.atm_next);
-  if (last == 1) {
-    wakeup(exec_ctx, workqueue);
-  }
-  GPR_TIMER_END("workqueue.enqueue", 0);
-}
-
-#endif /* GPR_POSIX_SOCKET */
diff --git a/src/core/lib/iomgr/workqueue_posix.h b/src/core/lib/iomgr/workqueue_posix.h
deleted file mode 100644
index 03ee21cef7..0000000000
--- a/src/core/lib/iomgr/workqueue_posix.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- *
- * Copyright 2015, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef GRPC_CORE_LIB_IOMGR_WORKQUEUE_POSIX_H
-#define GRPC_CORE_LIB_IOMGR_WORKQUEUE_POSIX_H
-
-#include "src/core/lib/iomgr/wakeup_fd_posix.h"
-#include "src/core/lib/support/mpscq.h"
-
-struct grpc_fd;
-
-struct grpc_workqueue {
-  gpr_refcount refs;
-  gpr_mpscq queue;
-  // state is:
-  // lower bit - zero if orphaned
-  // other bits - number of items enqueued
-  gpr_atm state;
-
-  grpc_wakeup_fd wakeup_fd;
-  struct grpc_fd *wakeup_read_fd;
-
-  grpc_closure read_closure;
-};
-
-/** Create a work queue. Returns an error if creation fails. If creation
-    succeeds, sets *workqueue to point to it. */
-grpc_error *grpc_workqueue_create(grpc_exec_ctx *exec_ctx,
-                                  grpc_workqueue **workqueue);
-
-#endif /* GRPC_CORE_LIB_IOMGR_WORKQUEUE_POSIX_H */
diff --git a/src/core/lib/iomgr/workqueue_windows.c b/src/core/lib/iomgr/workqueue_windows.c
index ee81dc248e..5c93d3c59e 100644
--- a/src/core/lib/iomgr/workqueue_windows.c
+++ b/src/core/lib/iomgr/workqueue_windows.c
@@ -43,12 +43,16 @@
 // workqueues.
 
 #ifdef GRPC_WORKQUEUE_REFCOUNT_DEBUG
-void grpc_workqueue_ref(grpc_workqueue *workqueue, const char *file, int line,
-                        const char *reason) {}
+grpc_workqueue *grpc_workqueue_ref(grpc_workqueue *workqueue, const char *file,
+                                   int line, const char *reason) {
+  return workqueue;
+}
 void grpc_workqueue_unref(grpc_exec_ctx *exec_ctx, grpc_workqueue *workqueue,
                           const char *file, int line, const char *reason) {}
 #else
-void grpc_workqueue_ref(grpc_workqueue *workqueue) {}
+grpc_workqueue *grpc_workqueue_ref(grpc_workqueue *workqueue) {
+  return workqueue;
+}
 void grpc_workqueue_unref(grpc_exec_ctx *exec_ctx, grpc_workqueue *workqueue) {}
 #endif
 
diff --git a/src/core/lib/profiling/basic_timers.c b/src/core/lib/profiling/basic_timers.c
index 51813d0461..bdf9af2339 100644
--- a/src/core/lib/profiling/basic_timers.c
+++ b/src/core/lib/profiling/basic_timers.c
@@ -83,6 +83,7 @@ static int g_shutdown;
 static gpr_thd_id g_writing_thread;
 static __thread int g_thread_id;
 static int g_next_thread_id;
+static int g_writing_enabled = 1;
 
 static int timer_log_push_back(gpr_timer_log_list *list, gpr_timer_log *log) {
   if (list->head == NULL) {
@@ -177,7 +178,7 @@ static void flush_logs(gpr_timer_log_list *list) {
   }
 }
 
-static void finish_writing() {
+static void finish_writing(void) {
   pthread_mutex_lock(&g_mu);
   g_shutdown = 1;
   pthread_cond_signal(&g_cv);
@@ -230,6 +231,10 @@ static void gpr_timers_log_add(const char *tagstr, marker_type type,
                                int important, const char *file, int line) {
   gpr_timer_entry *entry;
 
+  if (!g_writing_enabled) {
+    return;
+  }
+
   if (g_thread_log == NULL || g_thread_log->num_entries == MAX_COUNT) {
     rotate_log();
   }
@@ -261,6 +266,8 @@ void gpr_timer_end(const char *tagstr, int important, const char *file,
   gpr_timers_log_add(tagstr, END, important, file, line);
 }
 
+void gpr_timer_set_enabled(int enabled) { g_writing_enabled = enabled; }
+
 /* Basic profiler specific API functions. */
 void gpr_timers_global_init(void) {}
 
@@ -272,4 +279,6 @@ void gpr_timers_global_init(void) {}
 void gpr_timers_global_destroy(void) {}
 
 void gpr_timers_set_log_filename(const char *filename) {}
+
+void gpr_timer_set_enabled(int enabled) {}
 #endif /* GRPC_BASIC_PROFILER */
diff --git a/src/core/lib/profiling/timers.h b/src/core/lib/profiling/timers.h
index c8567e8137..ea0cbca977 100644
--- a/src/core/lib/profiling/timers.h
+++ b/src/core/lib/profiling/timers.h
@@ -34,6 +34,8 @@
 #ifndef GRPC_CORE_LIB_PROFILING_TIMERS_H
 #define GRPC_CORE_LIB_PROFILING_TIMERS_H
 
+#include <stdio.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -50,18 +52,23 @@ void gpr_timer_end(const char *tagstr, int important, const char *file,
 
 void gpr_timers_set_log_filename(const char *filename);
 
+void gpr_timer_set_enabled(int enabled);
+
 #if !(defined(GRPC_STAP_PROFILER) + defined(GRPC_BASIC_PROFILER))
 /* No profiling. No-op all the things. */
 #define GPR_TIMER_MARK(tag, important) \
   do {                                 \
+    /*printf("- %s\n", tag);*/         \
   } while (0)
 
 #define GPR_TIMER_BEGIN(tag, important) \
   do {                                  \
+    /*printf("%s {\n", tag);*/          \
   } while (0)
 
 #define GPR_TIMER_END(tag, important) \
   do {                                \
+    /*printf("} // %s\n", tag);*/     \
   } while (0)
 
 #else /* at least one profiler requested... */
diff --git a/src/core/lib/support/log.c b/src/core/lib/support/log.c
index 899f1218b6..af1651dae5 100644
--- a/src/core/lib/support/log.c
+++ b/src/core/lib/support/log.c
@@ -60,8 +60,9 @@ const char *gpr_log_severity_string(gpr_log_severity severity) {
 
 void gpr_log_message(const char *file, int line, gpr_log_severity severity,
                      const char *message) {
-  if ((gpr_atm)severity < gpr_atm_no_barrier_load(&g_min_severity_to_print))
+  if ((gpr_atm)severity < gpr_atm_no_barrier_load(&g_min_severity_to_print)) {
     return;
+  }
 
   gpr_log_func_args lfargs;
   memset(&lfargs, 0, sizeof(lfargs));
@@ -82,11 +83,11 @@ void gpr_log_verbosity_init() {
 
   gpr_atm min_severity_to_print = GPR_LOG_SEVERITY_ERROR;
   if (verbosity != NULL) {
-    if (strcmp(verbosity, "DEBUG") == 0) {
+    if (gpr_stricmp(verbosity, "DEBUG") == 0) {
       min_severity_to_print = (gpr_atm)GPR_LOG_SEVERITY_DEBUG;
-    } else if (strcmp(verbosity, "INFO") == 0) {
+    } else if (gpr_stricmp(verbosity, "INFO") == 0) {
       min_severity_to_print = (gpr_atm)GPR_LOG_SEVERITY_INFO;
-    } else if (strcmp(verbosity, "ERROR") == 0) {
+    } else if (gpr_stricmp(verbosity, "ERROR") == 0) {
       min_severity_to_print = (gpr_atm)GPR_LOG_SEVERITY_ERROR;
     }
     gpr_free(verbosity);
diff --git a/src/core/lib/support/string.c b/src/core/lib/support/string.c
index 30c1e67647..d17fb9da4b 100644
--- a/src/core/lib/support/string.c
+++ b/src/core/lib/support/string.c
@@ -304,3 +304,14 @@ void gpr_strvec_add(gpr_strvec *sv, char *str) {
 char *gpr_strvec_flatten(gpr_strvec *sv, size_t *final_length) {
   return gpr_strjoin((const char **)sv->strs, sv->count, final_length);
 }
+
+int gpr_stricmp(const char *a, const char *b) {
+  int ca, cb;
+  do {
+    ca = tolower(*a);
+    cb = tolower(*b);
+    ++a;
+    ++b;
+  } while (ca == cb && ca && cb);
+  return ca - cb;
+}
diff --git a/src/core/lib/support/string.h b/src/core/lib/support/string.h
index 2b6bb3eec6..3aebc083ac 100644
--- a/src/core/lib/support/string.h
+++ b/src/core/lib/support/string.h
@@ -118,6 +118,8 @@ void gpr_strvec_add(gpr_strvec *strs, char *add);
    total_length as per gpr_strjoin */
 char *gpr_strvec_flatten(gpr_strvec *strs, size_t *total_length);
 
+int gpr_stricmp(const char *a, const char *b);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/core/lib/surface/call.c b/src/core/lib/surface/call.c
index 119f5e82ab..97bfd587d2 100644
--- a/src/core/lib/surface/call.c
+++ b/src/core/lib/surface/call.c
@@ -230,33 +230,33 @@ static void destroy_call(grpc_exec_ctx *exec_ctx, void *call_stack,
 static void receiving_slice_ready(grpc_exec_ctx *exec_ctx, void *bctlp,
                                   grpc_error *error);
 
-grpc_call *grpc_call_create(
-    grpc_channel *channel, grpc_call *parent_call, uint32_t propagation_mask,
-    grpc_completion_queue *cq, grpc_pollset_set *pollset_set_alternative,
-    const void *server_transport_data, grpc_mdelem **add_initial_metadata,
-    size_t add_initial_metadata_count, gpr_timespec send_deadline) {
+grpc_error *grpc_call_create(const grpc_call_create_args *args,
+                             grpc_call **out_call) {
   size_t i, j;
-  grpc_channel_stack *channel_stack = grpc_channel_get_channel_stack(channel);
+  grpc_channel_stack *channel_stack =
+      grpc_channel_get_channel_stack(args->channel);
   grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
   grpc_call *call;
   GPR_TIMER_BEGIN("grpc_call_create", 0);
-  call = gpr_malloc(sizeof(grpc_call) + channel_stack->call_stack_size);
+  *out_call = call =
+      gpr_malloc(sizeof(grpc_call) + channel_stack->call_stack_size);
   memset(call, 0, sizeof(grpc_call));
   gpr_mu_init(&call->mu);
-  call->channel = channel;
-  call->cq = cq;
-  call->parent = parent_call;
+  call->channel = args->channel;
+  call->cq = args->cq;
+  call->parent = args->parent_call;
   /* Always support no compression */
   GPR_BITSET(&call->encodings_accepted_by_peer, GRPC_COMPRESS_NONE);
-  call->is_client = server_transport_data == NULL;
+  call->is_client = args->server_transport_data == NULL;
   if (call->is_client) {
-    GPR_ASSERT(add_initial_metadata_count < MAX_SEND_EXTRA_METADATA_COUNT);
-    for (i = 0; i < add_initial_metadata_count; i++) {
-      call->send_extra_metadata[i].md = add_initial_metadata[i];
+    GPR_ASSERT(args->add_initial_metadata_count <
+               MAX_SEND_EXTRA_METADATA_COUNT);
+    for (i = 0; i < args->add_initial_metadata_count; i++) {
+      call->send_extra_metadata[i].md = args->add_initial_metadata[i];
     }
-    call->send_extra_metadata_count = (int)add_initial_metadata_count;
+    call->send_extra_metadata_count = (int)args->add_initial_metadata_count;
   } else {
-    GPR_ASSERT(add_initial_metadata_count == 0);
+    GPR_ASSERT(args->add_initial_metadata_count == 0);
     call->send_extra_metadata_count = 0;
   }
   for (i = 0; i < 2; i++) {
@@ -265,78 +265,79 @@ grpc_call *grpc_call_create(
     }
   }
   call->send_deadline =
-      gpr_convert_clock_type(send_deadline, GPR_CLOCK_MONOTONIC);
-  GRPC_CHANNEL_INTERNAL_REF(channel, "call");
+      gpr_convert_clock_type(args->send_deadline, GPR_CLOCK_MONOTONIC);
+  GRPC_CHANNEL_INTERNAL_REF(args->channel, "call");
   /* initial refcount dropped by grpc_call_destroy */
   grpc_error *error = grpc_call_stack_init(
       &exec_ctx, channel_stack, 1, destroy_call, call, call->context,
-      server_transport_data, CALL_STACK_FROM_CALL(call));
+      args->server_transport_data, CALL_STACK_FROM_CALL(call));
   if (error != GRPC_ERROR_NONE) {
     intptr_t status;
-    if (!grpc_error_get_int(error, GRPC_ERROR_INT_GRPC_STATUS, &status))
+    if (!grpc_error_get_int(error, GRPC_ERROR_INT_GRPC_STATUS, &status)) {
       status = GRPC_STATUS_UNKNOWN;
+    }
     const char *error_str =
         grpc_error_get_str(error, GRPC_ERROR_STR_DESCRIPTION);
     close_with_status(&exec_ctx, call, (grpc_status_code)status,
                       error_str == NULL ? "unknown error" : error_str);
-    GRPC_ERROR_UNREF(error);
   }
-  if (cq != NULL) {
+  if (args->cq != NULL) {
     GPR_ASSERT(
-        pollset_set_alternative == NULL &&
+        args->pollset_set_alternative == NULL &&
         "Only one of 'cq' and 'pollset_set_alternative' should be non-NULL.");
-    GRPC_CQ_INTERNAL_REF(cq, "bind");
+    GRPC_CQ_INTERNAL_REF(args->cq, "bind");
     call->pollent =
-        grpc_polling_entity_create_from_pollset(grpc_cq_pollset(cq));
+        grpc_polling_entity_create_from_pollset(grpc_cq_pollset(args->cq));
   }
-  if (pollset_set_alternative != NULL) {
-    call->pollent =
-        grpc_polling_entity_create_from_pollset_set(pollset_set_alternative);
+  if (args->pollset_set_alternative != NULL) {
+    call->pollent = grpc_polling_entity_create_from_pollset_set(
+        args->pollset_set_alternative);
   }
   if (!grpc_polling_entity_is_empty(&call->pollent)) {
     grpc_call_stack_set_pollset_or_pollset_set(
         &exec_ctx, CALL_STACK_FROM_CALL(call), &call->pollent);
   }
-  if (parent_call != NULL) {
-    GRPC_CALL_INTERNAL_REF(parent_call, "child");
+  gpr_timespec send_deadline = args->send_deadline;
+  if (args->parent_call != NULL) {
+    GRPC_CALL_INTERNAL_REF(args->parent_call, "child");
     GPR_ASSERT(call->is_client);
-    GPR_ASSERT(!parent_call->is_client);
+    GPR_ASSERT(!args->parent_call->is_client);
 
-    gpr_mu_lock(&parent_call->mu);
+    gpr_mu_lock(&args->parent_call->mu);
 
-    if (propagation_mask & GRPC_PROPAGATE_DEADLINE) {
+    if (args->propagation_mask & GRPC_PROPAGATE_DEADLINE) {
       send_deadline = gpr_time_min(
           gpr_convert_clock_type(send_deadline,
-                                 parent_call->send_deadline.clock_type),
-          parent_call->send_deadline);
+                                 args->parent_call->send_deadline.clock_type),
+          args->parent_call->send_deadline);
     }
     /* for now GRPC_PROPAGATE_TRACING_CONTEXT *MUST* be passed with
      * GRPC_PROPAGATE_STATS_CONTEXT */
     /* TODO(ctiller): This should change to use the appropriate census start_op
      * call. */
-    if (propagation_mask & GRPC_PROPAGATE_CENSUS_TRACING_CONTEXT) {
-      GPR_ASSERT(propagation_mask & GRPC_PROPAGATE_CENSUS_STATS_CONTEXT);
-      grpc_call_context_set(call, GRPC_CONTEXT_TRACING,
-                            parent_call->context[GRPC_CONTEXT_TRACING].value,
-                            NULL);
+    if (args->propagation_mask & GRPC_PROPAGATE_CENSUS_TRACING_CONTEXT) {
+      GPR_ASSERT(args->propagation_mask & GRPC_PROPAGATE_CENSUS_STATS_CONTEXT);
+      grpc_call_context_set(
+          call, GRPC_CONTEXT_TRACING,
+          args->parent_call->context[GRPC_CONTEXT_TRACING].value, NULL);
     } else {
-      GPR_ASSERT(propagation_mask & GRPC_PROPAGATE_CENSUS_STATS_CONTEXT);
+      GPR_ASSERT(args->propagation_mask & GRPC_PROPAGATE_CENSUS_STATS_CONTEXT);
     }
-    if (propagation_mask & GRPC_PROPAGATE_CANCELLATION) {
+    if (args->propagation_mask & GRPC_PROPAGATE_CANCELLATION) {
       call->cancellation_is_inherited = 1;
     }
 
-    if (parent_call->first_child == NULL) {
-      parent_call->first_child = call;
+    if (args->parent_call->first_child == NULL) {
+      args->parent_call->first_child = call;
       call->sibling_next = call->sibling_prev = call;
     } else {
-      call->sibling_next = parent_call->first_child;
-      call->sibling_prev = parent_call->first_child->sibling_prev;
+      call->sibling_next = args->parent_call->first_child;
+      call->sibling_prev = args->parent_call->first_child->sibling_prev;
       call->sibling_next->sibling_prev = call->sibling_prev->sibling_next =
           call;
     }
 
-    gpr_mu_unlock(&parent_call->mu);
+    gpr_mu_unlock(&args->parent_call->mu);
   }
   if (gpr_time_cmp(send_deadline, gpr_inf_future(send_deadline.clock_type)) !=
       0) {
@@ -344,7 +345,7 @@ grpc_call *grpc_call_create(
   }
   grpc_exec_ctx_finish(&exec_ctx);
   GPR_TIMER_END("grpc_call_create", 0);
-  return call;
+  return error;
 }
 
 void grpc_call_set_completion_queue(grpc_exec_ctx *exec_ctx, grpc_call *call,
@@ -1089,9 +1090,14 @@ static void finish_batch_completion(grpc_exec_ctx *exec_ctx, void *user_data,
 static void post_batch_completion(grpc_exec_ctx *exec_ctx,
                                   batch_control *bctl) {
   grpc_call *call = bctl->call;
+  grpc_error *error = bctl->error;
+  if (bctl->recv_final_op) {
+    GRPC_ERROR_UNREF(error);
+    error = GRPC_ERROR_NONE;
+  }
   if (bctl->is_notify_tag_closure) {
     /* unrefs bctl->error */
-    grpc_exec_ctx_sched(exec_ctx, bctl->notify_tag, bctl->error, NULL);
+    grpc_closure_run(exec_ctx, bctl->notify_tag, error);
     gpr_mu_lock(&call->mu);
     bctl->call->used_batches =
         (uint8_t)(bctl->call->used_batches &
@@ -1100,7 +1106,7 @@ static void post_batch_completion(grpc_exec_ctx *exec_ctx,
     GRPC_CALL_INTERNAL_UNREF(exec_ctx, call, "completion");
   } else {
     /* unrefs bctl->error */
-    grpc_cq_end_op(exec_ctx, bctl->call->cq, bctl->notify_tag, bctl->error,
+    grpc_cq_end_op(exec_ctx, bctl->call->cq, bctl->notify_tag, error,
                    finish_batch_completion, bctl, &bctl->cq_completion);
   }
 }
@@ -1257,6 +1263,14 @@ static void validate_filtered_metadata(grpc_exec_ctx *exec_ctx,
   }
 }
 
+static void add_batch_error(batch_control *bctl, grpc_error *error) {
+  if (error == GRPC_ERROR_NONE) return;
+  if (bctl->error == GRPC_ERROR_NONE) {
+    bctl->error = GRPC_ERROR_CREATE("Call batch operation failed");
+  }
+  bctl->error = grpc_error_add_child(bctl->error, error);
+}
+
 static void receiving_initial_metadata_ready(grpc_exec_ctx *exec_ctx,
                                              void *bctlp, grpc_error *error) {
   batch_control *bctl = bctlp;
@@ -1264,9 +1278,8 @@ static void receiving_initial_metadata_ready(grpc_exec_ctx *exec_ctx,
 
   gpr_mu_lock(&call->mu);
 
-  if (error != GRPC_ERROR_NONE) {
-    bctl->error = GRPC_ERROR_REF(error);
-  } else {
+  add_batch_error(bctl, GRPC_ERROR_REF(error));
+  if (error == GRPC_ERROR_NONE) {
     grpc_metadata_batch *md =
         &call->metadata_batch[1 /* is_receiving */][0 /* is_trailing */];
     grpc_metadata_batch_filter(md, recv_initial_filter, call);
@@ -1359,8 +1372,7 @@ static void finish_batch(grpc_exec_ctx *exec_ctx, void *bctlp,
     GRPC_ERROR_UNREF(error);
     error = GRPC_ERROR_NONE;
   }
-  GRPC_ERROR_UNREF(bctl->error);
-  bctl->error = GRPC_ERROR_REF(error);
+  add_batch_error(bctl, GRPC_ERROR_REF(error));
   gpr_mu_unlock(&call->mu);
   if (gpr_unref(&bctl->steps_to_complete)) {
     post_batch_completion(exec_ctx, bctl);
@@ -1396,6 +1408,7 @@ static grpc_call_error call_start_batch(grpc_exec_ctx *exec_ctx,
 
   grpc_transport_stream_op *stream_op = &bctl->op;
   memset(stream_op, 0, sizeof(*stream_op));
+  stream_op->covered_by_poller = true;
 
   if (nops == 0) {
     GRPC_CALL_INTERNAL_REF(call, "completion");
diff --git a/src/core/lib/surface/call.h b/src/core/lib/surface/call.h
index 3a78fe3aa3..18af41b7fb 100644
--- a/src/core/lib/surface/call.h
+++ b/src/core/lib/surface/call.h
@@ -49,15 +49,29 @@ typedef void (*grpc_ioreq_completion_func)(grpc_exec_ctx *exec_ctx,
                                            grpc_call *call, int success,
                                            void *user_data);
 
-grpc_call *grpc_call_create(grpc_channel *channel, grpc_call *parent_call,
-                            uint32_t propagation_mask,
-                            grpc_completion_queue *cq,
-                            /* if not NULL, it'll be used in lieu of \a cq */
-                            grpc_pollset_set *pollset_set_alternative,
-                            const void *server_transport_data,
-                            grpc_mdelem **add_initial_metadata,
-                            size_t add_initial_metadata_count,
-                            gpr_timespec send_deadline);
+typedef struct grpc_call_create_args {
+  grpc_channel *channel;
+
+  grpc_call *parent_call;
+  uint32_t propagation_mask;
+
+  grpc_completion_queue *cq;
+  /* if not NULL, it'll be used in lieu of cq */
+  grpc_pollset_set *pollset_set_alternative;
+
+  const void *server_transport_data;
+
+  grpc_mdelem **add_initial_metadata;
+  size_t add_initial_metadata_count;
+
+  gpr_timespec send_deadline;
+} grpc_call_create_args;
+
+/* Create a new call based on \a args.
+   Regardless of success or failure, always returns a valid new call into *call
+   */
+grpc_error *grpc_call_create(const grpc_call_create_args *args,
+                             grpc_call **call);
 
 void grpc_call_set_completion_queue(grpc_exec_ctx *exec_ctx, grpc_call *call,
                                     grpc_completion_queue *cq);
diff --git a/src/core/lib/surface/channel.c b/src/core/lib/surface/channel.c
index 52e78567bd..aa8c052b41 100644
--- a/src/core/lib/surface/channel.c
+++ b/src/core/lib/surface/channel.c
@@ -208,9 +208,21 @@ static grpc_call *grpc_channel_create_call_internal(
     send_metadata[num_metadata++] = GRPC_MDELEM_REF(channel->default_authority);
   }
 
-  return grpc_call_create(channel, parent_call, propagation_mask, cq,
-                          pollset_set_alternative, NULL, send_metadata,
-                          num_metadata, deadline);
+  grpc_call_create_args args;
+  memset(&args, 0, sizeof(args));
+  args.channel = channel;
+  args.parent_call = parent_call;
+  args.propagation_mask = propagation_mask;
+  args.cq = cq;
+  args.pollset_set_alternative = pollset_set_alternative;
+  args.server_transport_data = NULL;
+  args.add_initial_metadata = send_metadata;
+  args.add_initial_metadata_count = num_metadata;
+  args.send_deadline = deadline;
+
+  grpc_call *call;
+  GRPC_LOG_IF_ERROR("call_create", grpc_call_create(&args, &call));
+  return call;
 }
 
 grpc_call *grpc_channel_create_call(grpc_channel *channel,
diff --git a/src/core/lib/surface/completion_queue.c b/src/core/lib/surface/completion_queue.c
index 5978884db8..4e0feb56ac 100644
--- a/src/core/lib/surface/completion_queue.c
+++ b/src/core/lib/surface/completion_queue.c
@@ -39,6 +39,7 @@
 #include <grpc/support/alloc.h>
 #include <grpc/support/atm.h>
 #include <grpc/support/log.h>
+#include <grpc/support/string_util.h>
 #include <grpc/support/time.h>
 
 #include "src/core/lib/iomgr/pollset.h"
@@ -50,6 +51,9 @@
 #include "src/core/lib/surface/event_string.h"
 
 int grpc_trace_operation_failures;
+#ifndef NDEBUG
+int grpc_trace_pending_tags;
+#endif
 
 typedef struct {
   grpc_pollset_worker **worker;
@@ -67,6 +71,9 @@ struct grpc_completion_queue {
   gpr_refcount pending_events;
   /** Once owning_refs drops to zero, we will destroy the cq */
   gpr_refcount owning_refs;
+  /** counter of how many things have ever been queued on this completion queue
+      useful for avoiding locks to check the queue */
+  gpr_atm things_queued_ever;
   /** 0 initially, 1 once we've begun shutting down */
   int shutdown;
   int shutdown_called;
@@ -121,15 +128,6 @@ void grpc_cq_global_shutdown(void) {
   }
 }
 
-struct grpc_cq_alarm {
-  grpc_timer alarm;
-  grpc_cq_completion completion;
-  /** completion queue where events about this alarm will be posted */
-  grpc_completion_queue *cq;
-  /** user supplied tag */
-  void *tag;
-};
-
 grpc_completion_queue *grpc_completion_queue_create(void *reserved) {
   grpc_completion_queue *cc;
   GPR_ASSERT(!reserved);
@@ -166,6 +164,7 @@ grpc_completion_queue *grpc_completion_queue_create(void *reserved) {
   cc->is_server_cq = 0;
   cc->is_non_listening_server_cq = 0;
   cc->num_pluckers = 0;
+  gpr_atm_no_barrier_store(&cc->things_queued_ever, 0);
 #ifndef NDEBUG
   cc->outstanding_tag_count = 0;
 #endif
@@ -276,6 +275,7 @@ void grpc_cq_end_op(grpc_exec_ctx *exec_ctx, grpc_completion_queue *cc,
   GPR_ASSERT(found);
 #endif
   shutdown = gpr_unref(&cc->pending_events);
+  gpr_atm_no_barrier_fetch_add(&cc->things_queued_ever, 1);
   if (!shutdown) {
     cc->completed_tail->next =
         ((uintptr_t)storage) | (1u & (uintptr_t)cc->completed_tail->next);
@@ -313,13 +313,66 @@ void grpc_cq_end_op(grpc_exec_ctx *exec_ctx, grpc_completion_queue *cc,
   GRPC_ERROR_UNREF(error);
 }
 
+typedef struct {
+  gpr_atm last_seen_things_queued_ever;
+  grpc_completion_queue *cq;
+  gpr_timespec deadline;
+  grpc_cq_completion *stolen_completion;
+  void *tag; /* for pluck */
+  bool first_loop;
+} cq_is_finished_arg;
+
+static bool cq_is_next_finished(grpc_exec_ctx *exec_ctx, void *arg) {
+  cq_is_finished_arg *a = arg;
+  grpc_completion_queue *cq = a->cq;
+  GPR_ASSERT(a->stolen_completion == NULL);
+  gpr_atm current_last_seen_things_queued_ever =
+      gpr_atm_no_barrier_load(&cq->things_queued_ever);
+  if (current_last_seen_things_queued_ever != a->last_seen_things_queued_ever) {
+    gpr_mu_lock(cq->mu);
+    a->last_seen_things_queued_ever =
+        gpr_atm_no_barrier_load(&cq->things_queued_ever);
+    if (cq->completed_tail != &cq->completed_head) {
+      a->stolen_completion = (grpc_cq_completion *)cq->completed_head.next;
+      cq->completed_head.next = a->stolen_completion->next & ~(uintptr_t)1;
+      if (a->stolen_completion == cq->completed_tail) {
+        cq->completed_tail = &cq->completed_head;
+      }
+      gpr_mu_unlock(cq->mu);
+      return true;
+    }
+    gpr_mu_unlock(cq->mu);
+  }
+  return !a->first_loop &&
+         gpr_time_cmp(a->deadline, gpr_now(a->deadline.clock_type)) < 0;
+}
+
+#ifndef NDEBUG
+static void dump_pending_tags(grpc_completion_queue *cc) {
+  if (!grpc_trace_pending_tags) return;
+
+  gpr_strvec v;
+  gpr_strvec_init(&v);
+  gpr_strvec_add(&v, gpr_strdup("PENDING TAGS:"));
+  for (size_t i = 0; i < cc->outstanding_tag_count; i++) {
+    char *s;
+    gpr_asprintf(&s, " %p", cc->outstanding_tags[i]);
+    gpr_strvec_add(&v, s);
+  }
+  char *out = gpr_strvec_flatten(&v, NULL);
+  gpr_strvec_destroy(&v);
+  gpr_log(GPR_DEBUG, "%s", out);
+  gpr_free(out);
+}
+#else
+static void dump_pending_tags(grpc_completion_queue *cc) {}
+#endif
+
 grpc_event grpc_completion_queue_next(grpc_completion_queue *cc,
                                       gpr_timespec deadline, void *reserved) {
   grpc_event ret;
   grpc_pollset_worker *worker = NULL;
-  int first_loop = 1;
   gpr_timespec now;
-  grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
 
   GPR_TIMER_BEGIN("grpc_completion_queue_next", 0);
 
@@ -333,11 +386,33 @@ grpc_event grpc_completion_queue_next(grpc_completion_queue *cc,
           reserved));
   GPR_ASSERT(!reserved);
 
+  dump_pending_tags(cc);
+
   deadline = gpr_convert_clock_type(deadline, GPR_CLOCK_MONOTONIC);
 
   GRPC_CQ_INTERNAL_REF(cc, "next");
   gpr_mu_lock(cc->mu);
+  cq_is_finished_arg is_finished_arg = {
+      .last_seen_things_queued_ever =
+          gpr_atm_no_barrier_load(&cc->things_queued_ever),
+      .cq = cc,
+      .deadline = deadline,
+      .stolen_completion = NULL,
+      .tag = NULL,
+      .first_loop = true};
+  grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT_WITH_FINISH_CHECK(
+      cq_is_next_finished, &is_finished_arg);
   for (;;) {
+    if (is_finished_arg.stolen_completion != NULL) {
+      gpr_mu_unlock(cc->mu);
+      grpc_cq_completion *c = is_finished_arg.stolen_completion;
+      is_finished_arg.stolen_completion = NULL;
+      ret.type = GRPC_OP_COMPLETE;
+      ret.success = c->next & 1u;
+      ret.tag = c->tag;
+      c->done(&exec_ctx, c->done_arg, c);
+      break;
+    }
     if (cc->completed_tail != &cc->completed_head) {
       grpc_cq_completion *c = (grpc_cq_completion *)cc->completed_head.next;
       cc->completed_head.next = c->next & ~(uintptr_t)1;
@@ -358,13 +433,13 @@ grpc_event grpc_completion_queue_next(grpc_completion_queue *cc,
       break;
     }
     now = gpr_now(GPR_CLOCK_MONOTONIC);
-    if (!first_loop && gpr_time_cmp(now, deadline) >= 0) {
+    if (!is_finished_arg.first_loop && gpr_time_cmp(now, deadline) >= 0) {
       gpr_mu_unlock(cc->mu);
       memset(&ret, 0, sizeof(ret));
       ret.type = GRPC_QUEUE_TIMEOUT;
+      dump_pending_tags(cc);
       break;
     }
-    first_loop = 0;
     /* Check alarms - these are a global resource so we just ping
        each time through on every pollset.
        May update deadline to ensure timely wakeups.
@@ -387,13 +462,16 @@ grpc_event grpc_completion_queue_next(grpc_completion_queue *cc,
         GRPC_ERROR_UNREF(err);
         memset(&ret, 0, sizeof(ret));
         ret.type = GRPC_QUEUE_TIMEOUT;
+        dump_pending_tags(cc);
         break;
       }
     }
+    is_finished_arg.first_loop = false;
   }
   GRPC_SURFACE_TRACE_RETURNED_EVENT(cc, &ret);
   GRPC_CQ_INTERNAL_UNREF(cc, "next");
   grpc_exec_ctx_finish(&exec_ctx);
+  GPR_ASSERT(is_finished_arg.stolen_completion == NULL);
 
   GPR_TIMER_END("grpc_completion_queue_next", 0);
 
@@ -424,6 +502,37 @@ static void del_plucker(grpc_completion_queue *cc, void *tag,
   GPR_UNREACHABLE_CODE(return );
 }
 
+static bool cq_is_pluck_finished(grpc_exec_ctx *exec_ctx, void *arg) {
+  cq_is_finished_arg *a = arg;
+  grpc_completion_queue *cq = a->cq;
+  GPR_ASSERT(a->stolen_completion == NULL);
+  gpr_atm current_last_seen_things_queued_ever =
+      gpr_atm_no_barrier_load(&cq->things_queued_ever);
+  if (current_last_seen_things_queued_ever != a->last_seen_things_queued_ever) {
+    gpr_mu_lock(cq->mu);
+    a->last_seen_things_queued_ever =
+        gpr_atm_no_barrier_load(&cq->things_queued_ever);
+    grpc_cq_completion *c;
+    grpc_cq_completion *prev = &cq->completed_head;
+    while ((c = (grpc_cq_completion *)(prev->next & ~(uintptr_t)1)) !=
+           &cq->completed_head) {
+      if (c->tag == a->tag) {
+        prev->next = (prev->next & (uintptr_t)1) | (c->next & ~(uintptr_t)1);
+        if (c == cq->completed_tail) {
+          cq->completed_tail = prev;
+        }
+        gpr_mu_unlock(cq->mu);
+        a->stolen_completion = c;
+        return true;
+      }
+      prev = c;
+    }
+    gpr_mu_unlock(cq->mu);
+  }
+  return !a->first_loop &&
+         gpr_time_cmp(a->deadline, gpr_now(a->deadline.clock_type)) < 0;
+}
+
 grpc_event grpc_completion_queue_pluck(grpc_completion_queue *cc, void *tag,
                                        gpr_timespec deadline, void *reserved) {
   grpc_event ret;
@@ -431,8 +540,6 @@ grpc_event grpc_completion_queue_pluck(grpc_completion_queue *cc, void *tag,
   grpc_cq_completion *prev;
   grpc_pollset_worker *worker = NULL;
   gpr_timespec now;
-  int first_loop = 1;
-  grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
 
   GPR_TIMER_BEGIN("grpc_completion_queue_pluck", 0);
 
@@ -448,11 +555,33 @@ grpc_event grpc_completion_queue_pluck(grpc_completion_queue *cc, void *tag,
   }
   GPR_ASSERT(!reserved);
 
+  dump_pending_tags(cc);
+
   deadline = gpr_convert_clock_type(deadline, GPR_CLOCK_MONOTONIC);
 
   GRPC_CQ_INTERNAL_REF(cc, "pluck");
   gpr_mu_lock(cc->mu);
+  cq_is_finished_arg is_finished_arg = {
+      .last_seen_things_queued_ever =
+          gpr_atm_no_barrier_load(&cc->things_queued_ever),
+      .cq = cc,
+      .deadline = deadline,
+      .stolen_completion = NULL,
+      .tag = tag,
+      .first_loop = true};
+  grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT_WITH_FINISH_CHECK(
+      cq_is_pluck_finished, &is_finished_arg);
   for (;;) {
+    if (is_finished_arg.stolen_completion != NULL) {
+      gpr_mu_unlock(cc->mu);
+      c = is_finished_arg.stolen_completion;
+      is_finished_arg.stolen_completion = NULL;
+      ret.type = GRPC_OP_COMPLETE;
+      ret.success = c->next & 1u;
+      ret.tag = c->tag;
+      c->done(&exec_ctx, c->done_arg, c);
+      break;
+    }
     prev = &cc->completed_head;
     while ((c = (grpc_cq_completion *)(prev->next & ~(uintptr_t)1)) !=
            &cc->completed_head) {
@@ -485,17 +614,18 @@ grpc_event grpc_completion_queue_pluck(grpc_completion_queue *cc, void *tag,
       memset(&ret, 0, sizeof(ret));
       /* TODO(ctiller): should we use a different result here */
       ret.type = GRPC_QUEUE_TIMEOUT;
+      dump_pending_tags(cc);
       break;
     }
     now = gpr_now(GPR_CLOCK_MONOTONIC);
-    if (!first_loop && gpr_time_cmp(now, deadline) >= 0) {
+    if (!is_finished_arg.first_loop && gpr_time_cmp(now, deadline) >= 0) {
       del_plucker(cc, tag, &worker);
       gpr_mu_unlock(cc->mu);
       memset(&ret, 0, sizeof(ret));
       ret.type = GRPC_QUEUE_TIMEOUT;
+      dump_pending_tags(cc);
       break;
     }
-    first_loop = 0;
     /* Check alarms - these are a global resource so we just ping
        each time through on every pollset.
        May update deadline to ensure timely wakeups.
@@ -518,15 +648,18 @@ grpc_event grpc_completion_queue_pluck(grpc_completion_queue *cc, void *tag,
         GRPC_ERROR_UNREF(err);
         memset(&ret, 0, sizeof(ret));
         ret.type = GRPC_QUEUE_TIMEOUT;
+        dump_pending_tags(cc);
         break;
       }
     }
+    is_finished_arg.first_loop = false;
     del_plucker(cc, tag, &worker);
   }
 done:
   GRPC_SURFACE_TRACE_RETURNED_EVENT(cc, &ret);
   GRPC_CQ_INTERNAL_UNREF(cc, "pluck");
   grpc_exec_ctx_finish(&exec_ctx);
+  GPR_ASSERT(is_finished_arg.stolen_completion == NULL);
 
   GPR_TIMER_END("grpc_completion_queue_pluck", 0);
 
diff --git a/src/core/lib/surface/completion_queue.h b/src/core/lib/surface/completion_queue.h
index 3049284f68..e9d840df77 100644
--- a/src/core/lib/surface/completion_queue.h
+++ b/src/core/lib/surface/completion_queue.h
@@ -44,6 +44,9 @@
 extern int grpc_cq_pluck_trace;
 extern int grpc_cq_event_timeout_trace;
 extern int grpc_trace_operation_failures;
+#ifndef NDEBUG
+extern int grpc_trace_pending_tags;
+#endif
 
 typedef struct grpc_cq_completion {
   /** user supplied tag */
diff --git a/src/core/lib/surface/init.c b/src/core/lib/surface/init.c
index edda0c85fa..ac111253ef 100644
--- a/src/core/lib/surface/init.c
+++ b/src/core/lib/surface/init.c
@@ -173,6 +173,9 @@ void grpc_init(void) {
     // Default timeout trace to 1
     grpc_cq_event_timeout_trace = 1;
     grpc_register_tracer("op_failure", &grpc_trace_operation_failures);
+#ifndef NDEBUG
+    grpc_register_tracer("pending_tags", &grpc_trace_pending_tags);
+#endif
     grpc_security_pre_init();
     grpc_iomgr_init();
     grpc_executor_init();
diff --git a/src/core/lib/surface/server.c b/src/core/lib/surface/server.c
index 56fb80e92e..9a9fcddb6e 100644
--- a/src/core/lib/surface/server.c
+++ b/src/core/lib/surface/server.c
@@ -280,6 +280,7 @@ static void send_shutdown(grpc_exec_ctx *exec_ctx, grpc_channel *channel,
   grpc_channel_element *elem;
 
   op->send_goaway = send_goaway;
+  op->set_accept_stream = true;
   sc->slice = gpr_slice_from_copied_string("Server shutdown");
   op->goaway_message = &sc->slice;
   op->goaway_status = GRPC_STATUS_OK;
@@ -439,6 +440,13 @@ static void destroy_channel(grpc_exec_ctx *exec_ctx, channel_data *chand,
   chand->finish_destroy_channel_closure.cb = finish_destroy_channel;
   chand->finish_destroy_channel_closure.cb_arg = chand;
 
+  if (error != GRPC_ERROR_NONE) {
+    const char *msg = grpc_error_string(error);
+    gpr_log(GPR_INFO, "Disconnected client: %s", msg);
+    grpc_error_free_string(msg);
+  }
+  GRPC_ERROR_UNREF(error);
+
   grpc_transport_op *op =
       grpc_make_transport_op(&chand->finish_destroy_channel_closure);
   op->set_accept_stream = true;
@@ -446,13 +454,6 @@ static void destroy_channel(grpc_exec_ctx *exec_ctx, channel_data *chand,
                        grpc_channel_stack_element(
                            grpc_channel_get_channel_stack(chand->channel), 0),
                        op);
-
-  if (error != GRPC_ERROR_NONE) {
-    const char *msg = grpc_error_string(error);
-    gpr_log(GPR_INFO, "Disconnected client: %s", msg);
-    grpc_error_free_string(msg);
-  }
-  GRPC_ERROR_UNREF(error);
 }
 
 static void cpstr(char **dest, size_t *capacity, grpc_mdstr *value) {
@@ -773,8 +774,7 @@ static void server_on_recv_initial_metadata(grpc_exec_ctx *exec_ctx, void *ptr,
         GRPC_ERROR_CREATE_REFERENCING("Missing :authority or :path", &error, 1);
   }
 
-  grpc_exec_ctx_sched(exec_ctx, calld->on_done_recv_initial_metadata, error,
-                      NULL);
+  grpc_closure_run(exec_ctx, calld->on_done_recv_initial_metadata, error);
 }
 
 static void server_mutate_op(grpc_call_element *elem,
@@ -829,11 +829,20 @@ static void accept_stream(grpc_exec_ctx *exec_ctx, void *cd,
                           const void *transport_server_data) {
   channel_data *chand = cd;
   /* create a call */
-  grpc_call *call = grpc_call_create(chand->channel, NULL, 0, NULL, NULL,
-                                     transport_server_data, NULL, 0,
-                                     gpr_inf_future(GPR_CLOCK_MONOTONIC));
+  grpc_call_create_args args;
+  memset(&args, 0, sizeof(args));
+  args.channel = chand->channel;
+  args.server_transport_data = transport_server_data;
+  args.send_deadline = gpr_inf_future(GPR_CLOCK_MONOTONIC);
+  grpc_call *call;
+  grpc_error *error = grpc_call_create(&args, &call);
   grpc_call_element *elem =
       grpc_call_stack_element(grpc_call_get_call_stack(call), 0);
+  if (error != GRPC_ERROR_NONE) {
+    got_initial_metadata(exec_ctx, elem, error);
+    GRPC_ERROR_UNREF(error);
+    return;
+  }
   call_data *calld = elem->call_data;
   grpc_op op;
   memset(&op, 0, sizeof(op));
diff --git a/src/core/lib/transport/connectivity_state.c b/src/core/lib/transport/connectivity_state.c
index 68d05e3a85..fdb5307814 100644
--- a/src/core/lib/transport/connectivity_state.c
+++ b/src/core/lib/transport/connectivity_state.c
@@ -180,7 +180,8 @@ void grpc_connectivity_state_set(grpc_exec_ctx *exec_ctx,
     *w->current = tracker->current_state;
     tracker->watchers = w->next;
     if (grpc_connectivity_state_trace) {
-      gpr_log(GPR_DEBUG, "NOTIFY: %p", w->notify);
+      gpr_log(GPR_DEBUG, "NOTIFY: %p %s: %p", tracker, tracker->name,
+              w->notify);
     }
     grpc_exec_ctx_sched(exec_ctx, w->notify,
                         GRPC_ERROR_REF(tracker->current_error), NULL);
diff --git a/src/core/lib/transport/transport.c b/src/core/lib/transport/transport.c
index 08f9d7e8d9..b951218130 100644
--- a/src/core/lib/transport/transport.c
+++ b/src/core/lib/transport/transport.c
@@ -46,8 +46,9 @@
 #ifdef GRPC_STREAM_REFCOUNT_DEBUG
 void grpc_stream_ref(grpc_stream_refcount *refcount, const char *reason) {
   gpr_atm val = gpr_atm_no_barrier_load(&refcount->refs.count);
-  gpr_log(GPR_DEBUG, "%s %p:%p   REF %d->%d %s", refcount->object_type,
-          refcount, refcount->destroy.cb_arg, val, val + 1, reason);
+  gpr_log(GPR_DEBUG, "%s %p:%p   REF %" PRIdPTR "->%" PRIdPTR " %s",
+          refcount->object_type, refcount, refcount->destroy.cb_arg, val,
+          val + 1, reason);
 #else
 void grpc_stream_ref(grpc_stream_refcount *refcount) {
 #endif
@@ -58,8 +59,9 @@ void grpc_stream_ref(grpc_stream_refcount *refcount) {
 void grpc_stream_unref(grpc_exec_ctx *exec_ctx, grpc_stream_refcount *refcount,
                        const char *reason) {
   gpr_atm val = gpr_atm_no_barrier_load(&refcount->refs.count);
-  gpr_log(GPR_DEBUG, "%s %p:%p UNREF %d->%d %s", refcount->object_type,
-          refcount, refcount->destroy.cb_arg, val, val - 1, reason);
+  gpr_log(GPR_DEBUG, "%s %p:%p UNREF %" PRIdPTR "->%" PRIdPTR " %s",
+          refcount->object_type, refcount, refcount->destroy.cb_arg, val,
+          val - 1, reason);
 #else
 void grpc_stream_unref(grpc_exec_ctx *exec_ctx,
                        grpc_stream_refcount *refcount) {
@@ -274,3 +276,28 @@ grpc_transport_op *grpc_make_transport_op(grpc_closure *on_complete) {
   op->op.on_consumed = &op->outer_on_complete;
   return &op->op;
 }
+
+typedef struct {
+  grpc_closure outer_on_complete;
+  grpc_closure *inner_on_complete;
+  grpc_transport_stream_op op;
+} made_transport_stream_op;
+
+static void destroy_made_transport_stream_op(grpc_exec_ctx *exec_ctx, void *arg,
+                                             grpc_error *error) {
+  made_transport_stream_op *op = arg;
+  grpc_exec_ctx_sched(exec_ctx, op->inner_on_complete, GRPC_ERROR_REF(error),
+                      NULL);
+  gpr_free(op);
+}
+
+grpc_transport_stream_op *grpc_make_transport_stream_op(
+    grpc_closure *on_complete) {
+  made_transport_stream_op *op = gpr_malloc(sizeof(*op));
+  grpc_closure_init(&op->outer_on_complete, destroy_made_transport_stream_op,
+                    op);
+  op->inner_on_complete = on_complete;
+  memset(&op->op, 0, sizeof(op->op));
+  op->op.on_complete = &op->outer_on_complete;
+  return &op->op;
+}
diff --git a/src/core/lib/transport/transport.h b/src/core/lib/transport/transport.h
index 8dc393fd61..50253ebad1 100644
--- a/src/core/lib/transport/transport.h
+++ b/src/core/lib/transport/transport.h
@@ -113,6 +113,10 @@ typedef struct grpc_transport_stream_op {
       have been completed. */
   grpc_closure *on_complete;
 
+  /** Is the completion of this op covered by a poller (if false: the op should
+      complete independently of some pollset being polled) */
+  bool covered_by_poller;
+
   /** Send initial metadata to the peer, from the provided metadata batch.
       idempotent_request MUST be set if this is non-null */
   grpc_metadata_batch *send_initial_metadata;
@@ -252,6 +256,7 @@ void grpc_transport_stream_op_add_close(grpc_transport_stream_op *op,
                                         gpr_slice *optional_message);
 
 char *grpc_transport_stream_op_string(grpc_transport_stream_op *op);
+char *grpc_transport_op_string(grpc_transport_op *op);
 
 /* Send a batch of operations on a transport
 
@@ -293,6 +298,10 @@ char *grpc_transport_get_peer(grpc_exec_ctx *exec_ctx,
 /* Allocate a grpc_transport_op, and preconfigure the on_consumed closure to
    \a on_consumed and then delete the returned transport op */
 grpc_transport_op *grpc_make_transport_op(grpc_closure *on_consumed);
+/* Allocate a grpc_transport_stream_op, and preconfigure the on_consumed closure
+   to \a on_consumed and then delete the returned transport op */
+grpc_transport_stream_op *grpc_make_transport_stream_op(
+    grpc_closure *on_consumed);
 
 #ifdef __cplusplus
 }
diff --git a/src/core/lib/transport/transport_op_string.c b/src/core/lib/transport/transport_op_string.c
index 138591db2a..8a687d8cd3 100644
--- a/src/core/lib/transport/transport_op_string.c
+++ b/src/core/lib/transport/transport_op_string.c
@@ -41,6 +41,7 @@
 #include <grpc/support/string_util.h>
 #include <grpc/support/useful.h>
 #include "src/core/lib/support/string.h"
+#include "src/core/lib/transport/connectivity_state.h"
 
 /* These routines are here to facilitate debugging - they produce string
    representations of various transport data structures */
@@ -143,6 +144,82 @@ char *grpc_transport_stream_op_string(grpc_transport_stream_op *op) {
   return out;
 }
 
+char *grpc_transport_op_string(grpc_transport_op *op) {
+  char *tmp;
+  char *out;
+  int first = 1;
+
+  gpr_strvec b;
+  gpr_strvec_init(&b);
+
+  if (op->on_connectivity_state_change != NULL) {
+    if (!first) gpr_strvec_add(&b, gpr_strdup(" "));
+    first = 0;
+    if (op->connectivity_state != NULL) {
+      gpr_asprintf(&tmp, "ON_CONNECTIVITY_STATE_CHANGE:p=%p:from=%s",
+                   op->on_connectivity_state_change,
+                   grpc_connectivity_state_name(*op->connectivity_state));
+      gpr_strvec_add(&b, tmp);
+    } else {
+      gpr_asprintf(&tmp, "ON_CONNECTIVITY_STATE_CHANGE:p=%p:unsubscribe",
+                   op->on_connectivity_state_change);
+      gpr_strvec_add(&b, tmp);
+    }
+  }
+
+  if (op->disconnect_with_error != GRPC_ERROR_NONE) {
+    if (!first) gpr_strvec_add(&b, gpr_strdup(" "));
+    first = 0;
+    const char *err = grpc_error_string(op->disconnect_with_error);
+    gpr_asprintf(&tmp, "DISCONNECT:%s", err);
+    gpr_strvec_add(&b, tmp);
+    grpc_error_free_string(err);
+  }
+
+  if (op->send_goaway) {
+    if (!first) gpr_strvec_add(&b, gpr_strdup(" "));
+    first = 0;
+    char *msg = op->goaway_message == NULL
+                    ? "null"
+                    : gpr_dump_slice(*op->goaway_message,
+                                     GPR_DUMP_ASCII | GPR_DUMP_HEX);
+    gpr_asprintf(&tmp, "SEND_GOAWAY:status=%d:msg=%s", op->goaway_status, msg);
+    if (op->goaway_message != NULL) gpr_free(msg);
+    gpr_strvec_add(&b, tmp);
+  }
+
+  if (op->set_accept_stream) {
+    if (!first) gpr_strvec_add(&b, gpr_strdup(" "));
+    first = 0;
+    gpr_asprintf(&tmp, "SET_ACCEPT_STREAM:%p(%p,...)", op->set_accept_stream_fn,
+                 op->set_accept_stream_user_data);
+    gpr_strvec_add(&b, tmp);
+  }
+
+  if (op->bind_pollset != NULL) {
+    if (!first) gpr_strvec_add(&b, gpr_strdup(" "));
+    first = 0;
+    gpr_strvec_add(&b, gpr_strdup("BIND_POLLSET"));
+  }
+
+  if (op->bind_pollset_set != NULL) {
+    if (!first) gpr_strvec_add(&b, gpr_strdup(" "));
+    first = 0;
+    gpr_strvec_add(&b, gpr_strdup("BIND_POLLSET_SET"));
+  }
+
+  if (op->send_ping != NULL) {
+    if (!first) gpr_strvec_add(&b, gpr_strdup(" "));
+    first = 0;
+    gpr_strvec_add(&b, gpr_strdup("SEND_PING"));
+  }
+
+  out = gpr_strvec_flatten(&b, NULL);
+  gpr_strvec_destroy(&b);
+
+  return out;
+}
+
 void grpc_call_log_op(char *file, int line, gpr_log_severity severity,
                       grpc_call_element *elem, grpc_transport_stream_op *op) {
   char *str = grpc_transport_stream_op_string(op);