47 files changed, 3735 insertions, 2065 deletions
diff --git a/src/core/ext/filters/client_channel/README.md b/src/core/ext/filters/client_channel/README.md
index 7c209db12e..9676a4535b 100644
--- a/src/core/ext/filters/client_channel/README.md
+++ b/src/core/ext/filters/client_channel/README.md
@@ -46,20 +46,4 @@ construction arguments for concrete grpc_subchannel instances.
 Naming for GRPC
 ===============
 
-Names in GRPC are represented by a URI (as defined in
-[RFC 3986](https://tools.ietf.org/html/rfc3986)).
-
-The following schemes are currently supported:
-
-dns:///host:port - dns schemes are currently supported so long as authority is
-                   empty (authority based dns resolution is expected in a future
-                   release)
-
-unix:path        - the unix scheme is used to create and connect to unix domain
-                   sockets - the authority must be empty, and the path
-                   represents the absolute or relative path to the desired
-                   socket
-
-ipv4:host:port   - a pre-resolved ipv4 dotted decimal address/port combination
-
-ipv6:[host]:port - a pre-resolved ipv6 address/port combination
+See [/doc/naming.md](gRPC name resolution).
diff --git a/src/core/ext/filters/client_channel/client_channel.cc b/src/core/ext/filters/client_channel/client_channel.cc
index 3813190794..388736b60a 100644
--- a/src/core/ext/filters/client_channel/client_channel.cc
+++ b/src/core/ext/filters/client_channel/client_channel.cc
@@ -126,9 +126,9 @@ typedef struct client_channel_channel_data {
   /* the following properties are guarded by a mutex since APIs require them
      to be instantaneously available */
   gpr_mu info_mu;
-  char* info_lb_policy_name;
+  grpc_core::UniquePtr<char> info_lb_policy_name;
   /** service config in JSON form */
-  char* info_service_config_json;
+  grpc_core::UniquePtr<char> info_service_config_json;
 } channel_data;
 
 typedef struct {
@@ -284,6 +284,78 @@ static void parse_retry_throttle_params(
   }
 }
 
+// Invoked from the resolver NextLocked() callback when the resolver
+// is shutting down.
+static void on_resolver_shutdown_locked(channel_data* chand,
+                                        grpc_error* error) {
+  if (grpc_client_channel_trace.enabled()) {
+    gpr_log(GPR_INFO, "chand=%p: shutting down", chand);
+  }
+  if (chand->lb_policy != nullptr) {
+    if (grpc_client_channel_trace.enabled()) {
+      gpr_log(GPR_INFO, "chand=%p: shutting down lb_policy=%p", chand,
+              chand->lb_policy.get());
+    }
+    grpc_pollset_set_del_pollset_set(chand->lb_policy->interested_parties(),
+                                     chand->interested_parties);
+    chand->lb_policy.reset();
+  }
+  if (chand->resolver != nullptr) {
+    // This should never happen; it can only be triggered by a resolver
+    // implementation spotaneously deciding to report shutdown without
+    // being orphaned.  This code is included just to be defensive.
+    if (grpc_client_channel_trace.enabled()) {
+      gpr_log(GPR_INFO, "chand=%p: spontaneous shutdown from resolver %p",
+              chand, chand->resolver.get());
+    }
+    chand->resolver.reset();
+    set_channel_connectivity_state_locked(
+        chand, GRPC_CHANNEL_SHUTDOWN,
+        GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING(
+            "Resolver spontaneous shutdown", &error, 1),
+        "resolver_spontaneous_shutdown");
+  }
+  grpc_closure_list_fail_all(&chand->waiting_for_resolver_result_closures,
+                             GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING(
+                                 "Channel disconnected", &error, 1));
+  GRPC_CLOSURE_LIST_SCHED(&chand->waiting_for_resolver_result_closures);
+  GRPC_CHANNEL_STACK_UNREF(chand->owning_stack, "resolver");
+  grpc_channel_args_destroy(chand->resolver_result);
+  chand->resolver_result = nullptr;
+  GRPC_ERROR_UNREF(error);
+}
+
+// Returns the LB policy name from the resolver result.
+static grpc_core::UniquePtr<char>
+get_lb_policy_name_from_resolver_result_locked(channel_data* chand) {
+  // Find LB policy name in channel args.
+  const grpc_arg* channel_arg =
+      grpc_channel_args_find(chand->resolver_result, GRPC_ARG_LB_POLICY_NAME);
+  const char* lb_policy_name = grpc_channel_arg_get_string(channel_arg);
+  // Special case: If at least one balancer address is present, we use
+  // the grpclb policy, regardless of what the resolver actually specified.
+  channel_arg =
+      grpc_channel_args_find(chand->resolver_result, GRPC_ARG_LB_ADDRESSES);
+  if (channel_arg != nullptr && channel_arg->type == GRPC_ARG_POINTER) {
+    grpc_lb_addresses* addresses =
+        static_cast<grpc_lb_addresses*>(channel_arg->value.pointer.p);
+    if (grpc_lb_addresses_contains_balancer_address(*addresses)) {
+      if (lb_policy_name != nullptr &&
+          gpr_stricmp(lb_policy_name, "grpclb") != 0) {
+        gpr_log(GPR_INFO,
+                "resolver requested LB policy %s but provided at least one "
+                "balancer address -- forcing use of grpclb LB policy",
+                lb_policy_name);
+      }
+      lb_policy_name = "grpclb";
+    }
+  }
+  // Use pick_first if nothing was specified and we didn't select grpclb
+  // above.
+  if (lb_policy_name == nullptr) lb_policy_name = "pick_first";
+  return grpc_core::UniquePtr<char>(gpr_strdup(lb_policy_name));
+}
+
 static void request_reresolution_locked(void* arg, grpc_error* error) {
   reresolution_request_args* args =
       static_cast<reresolution_request_args*>(arg);
@@ -304,234 +376,182 @@ static void request_reresolution_locked(void* arg, grpc_error* error) {
   chand->lb_policy->SetReresolutionClosureLocked(&args->closure);
 }
 
-// TODO(roth): The logic in this function is very hard to follow.  We
-// should refactor this so that it's easier to understand, perhaps as
-// part of changing the resolver API to more clearly differentiate
-// between transient failures and shutdown.
-static void on_resolver_result_changed_locked(void* arg, grpc_error* error) {
-  channel_data* chand = static_cast<channel_data*>(arg);
-  if (grpc_client_channel_trace.enabled()) {
-    gpr_log(GPR_INFO,
-            "chand=%p: got resolver result: resolver_result=%p error=%s", chand,
-            chand->resolver_result, grpc_error_string(error));
-  }
-  // Extract the following fields from the resolver result, if non-nullptr.
-  bool lb_policy_updated = false;
-  bool lb_policy_created = false;
-  char* lb_policy_name_dup = nullptr;
-  bool lb_policy_name_changed = false;
-  grpc_core::OrphanablePtr<grpc_core::LoadBalancingPolicy> new_lb_policy;
-  char* service_config_json = nullptr;
-  grpc_core::RefCountedPtr<ServerRetryThrottleData> retry_throttle_data;
-  grpc_core::RefCountedPtr<MethodParamsTable> method_params_table;
-  if (chand->resolver_result != nullptr) {
-    if (chand->resolver != nullptr) {
-      // Find LB policy name.
-      const grpc_arg* channel_arg = grpc_channel_args_find(
-          chand->resolver_result, GRPC_ARG_LB_POLICY_NAME);
-      const char* lb_policy_name = grpc_channel_arg_get_string(channel_arg);
-      // Special case: If at least one balancer address is present, we use
-      // the grpclb policy, regardless of what the resolver actually specified.
-      channel_arg =
-          grpc_channel_args_find(chand->resolver_result, GRPC_ARG_LB_ADDRESSES);
-      if (channel_arg != nullptr && channel_arg->type == GRPC_ARG_POINTER) {
-        grpc_lb_addresses* addresses =
-            static_cast<grpc_lb_addresses*>(channel_arg->value.pointer.p);
-        bool found_balancer_address = false;
-        for (size_t i = 0; i < addresses->num_addresses; ++i) {
-          if (addresses->addresses[i].is_balancer) {
-            found_balancer_address = true;
-            break;
-          }
-        }
-        if (found_balancer_address) {
-          if (lb_policy_name != nullptr &&
-              strcmp(lb_policy_name, "grpclb") != 0) {
-            gpr_log(GPR_INFO,
-                    "resolver requested LB policy %s but provided at least one "
-                    "balancer address -- forcing use of grpclb LB policy",
-                    lb_policy_name);
-          }
-          lb_policy_name = "grpclb";
-        }
-      }
-      // Use pick_first if nothing was specified and we didn't select grpclb
-      // above.
-      if (lb_policy_name == nullptr) lb_policy_name = "pick_first";
-      // Check to see if we're already using the right LB policy.
-      // Note: It's safe to use chand->info_lb_policy_name here without
-      // taking a lock on chand->info_mu, because this function is the
-      // only thing that modifies its value, and it can only be invoked
-      // once at any given time.
-      lb_policy_name_changed =
-          chand->info_lb_policy_name == nullptr ||
-          gpr_stricmp(chand->info_lb_policy_name, lb_policy_name) != 0;
-      if (chand->lb_policy != nullptr && !lb_policy_name_changed) {
-        // Continue using the same LB policy.  Update with new addresses.
-        lb_policy_updated = true;
-        chand->lb_policy->UpdateLocked(*chand->resolver_result);
-      } else {
-        // Instantiate new LB policy.
-        grpc_core::LoadBalancingPolicy::Args lb_policy_args;
-        lb_policy_args.combiner = chand->combiner;
-        lb_policy_args.client_channel_factory = chand->client_channel_factory;
-        lb_policy_args.args = chand->resolver_result;
-        new_lb_policy =
-            grpc_core::LoadBalancingPolicyRegistry::CreateLoadBalancingPolicy(
-                lb_policy_name, lb_policy_args);
-        if (GPR_UNLIKELY(new_lb_policy == nullptr)) {
-          gpr_log(GPR_ERROR, "could not create LB policy \"%s\"",
-                  lb_policy_name);
-        } else {
-          lb_policy_created = true;
-          reresolution_request_args* args =
-              static_cast<reresolution_request_args*>(
-                  gpr_zalloc(sizeof(*args)));
-          args->chand = chand;
-          args->lb_policy = new_lb_policy.get();
-          GRPC_CLOSURE_INIT(&args->closure, request_reresolution_locked, args,
-                            grpc_combiner_scheduler(chand->combiner));
-          GRPC_CHANNEL_STACK_REF(chand->owning_stack, "re-resolution");
-          new_lb_policy->SetReresolutionClosureLocked(&args->closure);
-        }
-      }
-      // Before we clean up, save a copy of lb_policy_name, since it might
-      // be pointing to data inside chand->resolver_result.
-      // The copy will be saved in chand->lb_policy_name below.
-      lb_policy_name_dup = gpr_strdup(lb_policy_name);
-      // Find service config.
-      channel_arg = grpc_channel_args_find(chand->resolver_result,
-                                           GRPC_ARG_SERVICE_CONFIG);
-      service_config_json =
-          gpr_strdup(grpc_channel_arg_get_string(channel_arg));
-      if (service_config_json != nullptr) {
-        grpc_core::UniquePtr<grpc_core::ServiceConfig> service_config =
-            grpc_core::ServiceConfig::Create(service_config_json);
-        if (service_config != nullptr) {
-          if (chand->enable_retries) {
-            channel_arg = grpc_channel_args_find(chand->resolver_result,
-                                                 GRPC_ARG_SERVER_URI);
-            const char* server_uri = grpc_channel_arg_get_string(channel_arg);
-            GPR_ASSERT(server_uri != nullptr);
-            grpc_uri* uri = grpc_uri_parse(server_uri, true);
-            GPR_ASSERT(uri->path[0] != '\0');
-            service_config_parsing_state parsing_state;
-            memset(&parsing_state, 0, sizeof(parsing_state));
-            parsing_state.server_name =
-                uri->path[0] == '/' ? uri->path + 1 : uri->path;
-            service_config->ParseGlobalParams(parse_retry_throttle_params,
-                                              &parsing_state);
-            grpc_uri_destroy(uri);
-            retry_throttle_data = std::move(parsing_state.retry_throttle_data);
-          }
-          method_params_table = service_config->CreateMethodConfigTable(
-              ClientChannelMethodParams::CreateFromJson);
-        }
-      }
+// Creates a new LB policy, replacing any previous one.
+// If the new policy is created successfully, sets *connectivity_state and
+// *connectivity_error to its initial connectivity state; otherwise,
+// leaves them unchanged.
+static void create_new_lb_policy_locked(
+    channel_data* chand, char* lb_policy_name,
+    grpc_connectivity_state* connectivity_state,
+    grpc_error** connectivity_error) {
+  grpc_core::LoadBalancingPolicy::Args lb_policy_args;
+  lb_policy_args.combiner = chand->combiner;
+  lb_policy_args.client_channel_factory = chand->client_channel_factory;
+  lb_policy_args.args = chand->resolver_result;
+  grpc_core::OrphanablePtr<grpc_core::LoadBalancingPolicy> new_lb_policy =
+      grpc_core::LoadBalancingPolicyRegistry::CreateLoadBalancingPolicy(
+          lb_policy_name, lb_policy_args);
+  if (GPR_UNLIKELY(new_lb_policy == nullptr)) {
+    gpr_log(GPR_ERROR, "could not create LB policy \"%s\"", lb_policy_name);
+  } else {
+    if (grpc_client_channel_trace.enabled()) {
+      gpr_log(GPR_INFO, "chand=%p: created new LB policy \"%s\" (%p)", chand,
+              lb_policy_name, new_lb_policy.get());
     }
-  }
-  if (grpc_client_channel_trace.enabled()) {
-    gpr_log(GPR_INFO,
-            "chand=%p: resolver result: lb_policy_name=\"%s\"%s, "
-            "service_config=\"%s\"",
-            chand, lb_policy_name_dup,
-            lb_policy_name_changed ? " (changed)" : "", service_config_json);
-  }
-  // Now swap out fields in chand.  Note that the new values may still
-  // be nullptr if (e.g.) the resolver failed to return results or the
-  // results did not contain the necessary data.
-  //
-  // First, swap out the data used by cc_get_channel_info().
-  gpr_mu_lock(&chand->info_mu);
-  if (lb_policy_name_dup != nullptr) {
-    gpr_free(chand->info_lb_policy_name);
-    chand->info_lb_policy_name = lb_policy_name_dup;
-  }
-  if (service_config_json != nullptr) {
-    gpr_free(chand->info_service_config_json);
-    chand->info_service_config_json = service_config_json;
-  }
-  gpr_mu_unlock(&chand->info_mu);
-  // Swap out the retry throttle data.
-  chand->retry_throttle_data = std::move(retry_throttle_data);
-  // Swap out the method params table.
-  chand->method_params_table = std::move(method_params_table);
-  // If we have a new LB policy or are shutting down (in which case
-  // new_lb_policy will be nullptr), swap out the LB policy, unreffing the
-  // old one and removing its fds from chand->interested_parties.
-  // Note that we do NOT do this if either (a) we updated the existing
-  // LB policy above or (b) we failed to create the new LB policy (in
-  // which case we want to continue using the most recent one we had).
-  if (new_lb_policy != nullptr || error != GRPC_ERROR_NONE ||
-      chand->resolver == nullptr) {
+    // Swap out the LB policy and update the fds in
+    // chand->interested_parties.
     if (chand->lb_policy != nullptr) {
       if (grpc_client_channel_trace.enabled()) {
-        gpr_log(GPR_INFO, "chand=%p: unreffing lb_policy=%p", chand,
+        gpr_log(GPR_INFO, "chand=%p: shutting down lb_policy=%p", chand,
                 chand->lb_policy.get());
       }
       grpc_pollset_set_del_pollset_set(chand->lb_policy->interested_parties(),
                                        chand->interested_parties);
       chand->lb_policy->HandOffPendingPicksLocked(new_lb_policy.get());
-      chand->lb_policy.reset();
     }
     chand->lb_policy = std::move(new_lb_policy);
+    grpc_pollset_set_add_pollset_set(chand->lb_policy->interested_parties(),
+                                     chand->interested_parties);
+    // Set up re-resolution callback.
+    reresolution_request_args* args =
+        static_cast<reresolution_request_args*>(gpr_zalloc(sizeof(*args)));
+    args->chand = chand;
+    args->lb_policy = chand->lb_policy.get();
+    GRPC_CLOSURE_INIT(&args->closure, request_reresolution_locked, args,
+                      grpc_combiner_scheduler(chand->combiner));
+    GRPC_CHANNEL_STACK_REF(chand->owning_stack, "re-resolution");
+    chand->lb_policy->SetReresolutionClosureLocked(&args->closure);
+    // Get the new LB policy's initial connectivity state and start a
+    // connectivity watch.
+    GRPC_ERROR_UNREF(*connectivity_error);
+    *connectivity_state =
+        chand->lb_policy->CheckConnectivityLocked(connectivity_error);
+    if (chand->exit_idle_when_lb_policy_arrives) {
+      chand->lb_policy->ExitIdleLocked();
+      chand->exit_idle_when_lb_policy_arrives = false;
+    }
+    watch_lb_policy_locked(chand, chand->lb_policy.get(), *connectivity_state);
+  }
+}
+
+// Returns the service config (as a JSON string) from the resolver result.
+// Also updates state in chand.
+static grpc_core::UniquePtr<char>
+get_service_config_from_resolver_result_locked(channel_data* chand) {
+  const grpc_arg* channel_arg =
+      grpc_channel_args_find(chand->resolver_result, GRPC_ARG_SERVICE_CONFIG);
+  const char* service_config_json = grpc_channel_arg_get_string(channel_arg);
+  if (service_config_json != nullptr) {
+    if (grpc_client_channel_trace.enabled()) {
+      gpr_log(GPR_INFO, "chand=%p: resolver returned service config: \"%s\"",
+              chand, service_config_json);
+    }
+    grpc_core::UniquePtr<grpc_core::ServiceConfig> service_config =
+        grpc_core::ServiceConfig::Create(service_config_json);
+    if (service_config != nullptr) {
+      if (chand->enable_retries) {
+        channel_arg =
+            grpc_channel_args_find(chand->resolver_result, GRPC_ARG_SERVER_URI);
+        const char* server_uri = grpc_channel_arg_get_string(channel_arg);
+        GPR_ASSERT(server_uri != nullptr);
+        grpc_uri* uri = grpc_uri_parse(server_uri, true);
+        GPR_ASSERT(uri->path[0] != '\0');
+        service_config_parsing_state parsing_state;
+        parsing_state.server_name =
+            uri->path[0] == '/' ? uri->path + 1 : uri->path;
+        service_config->ParseGlobalParams(parse_retry_throttle_params,
+                                          &parsing_state);
+        grpc_uri_destroy(uri);
+        chand->retry_throttle_data =
+            std::move(parsing_state.retry_throttle_data);
+      }
+      chand->method_params_table = service_config->CreateMethodConfigTable(
+          ClientChannelMethodParams::CreateFromJson);
+    }
   }
-  // Now that we've swapped out the relevant fields of chand, check for
-  // error or shutdown.
+  return grpc_core::UniquePtr<char>(gpr_strdup(service_config_json));
+}
+
+// Callback invoked when a resolver result is available.
+static void on_resolver_result_changed_locked(void* arg, grpc_error* error) {
+  channel_data* chand = static_cast<channel_data*>(arg);
+  if (grpc_client_channel_trace.enabled()) {
+    const char* disposition =
+        chand->resolver_result != nullptr
+            ? ""
+            : (error == GRPC_ERROR_NONE ? " (transient error)"
+                                        : " (resolver shutdown)");
+    gpr_log(GPR_INFO,
+            "chand=%p: got resolver result: resolver_result=%p error=%s%s",
+            chand, chand->resolver_result, grpc_error_string(error),
+            disposition);
+  }
+  // Handle shutdown.
   if (error != GRPC_ERROR_NONE || chand->resolver == nullptr) {
+    on_resolver_shutdown_locked(chand, GRPC_ERROR_REF(error));
+    return;
+  }
+  // Data used to set the channel's connectivity state.
+  bool set_connectivity_state = true;
+  grpc_connectivity_state connectivity_state = GRPC_CHANNEL_TRANSIENT_FAILURE;
+  grpc_error* connectivity_error =
+      GRPC_ERROR_CREATE_FROM_STATIC_STRING("No load balancing policy");
+  // chand->resolver_result will be null in the case of a transient
+  // resolution error.  In that case, we don't have any new result to
+  // process, which means that we keep using the previous result (if any).
+  if (chand->resolver_result == nullptr) {
     if (grpc_client_channel_trace.enabled()) {
-      gpr_log(GPR_INFO, "chand=%p: shutting down", chand);
-    }
-    if (chand->resolver != nullptr) {
-      if (grpc_client_channel_trace.enabled()) {
-        gpr_log(GPR_INFO, "chand=%p: shutting down resolver", chand);
-      }
-      chand->resolver.reset();
+      gpr_log(GPR_INFO, "chand=%p: resolver transient failure", chand);
     }
-    set_channel_connectivity_state_locked(
-        chand, GRPC_CHANNEL_SHUTDOWN,
-        GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING(
-            "Got resolver result after disconnection", &error, 1),
-        "resolver_gone");
-    grpc_closure_list_fail_all(&chand->waiting_for_resolver_result_closures,
-                               GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING(
-                                   "Channel disconnected", &error, 1));
-    GRPC_CLOSURE_LIST_SCHED(&chand->waiting_for_resolver_result_closures);
-    GRPC_CHANNEL_STACK_UNREF(chand->owning_stack, "resolver");
-    grpc_channel_args_destroy(chand->resolver_result);
-    chand->resolver_result = nullptr;
-  } else {  // Not shutting down.
-    grpc_connectivity_state state = GRPC_CHANNEL_TRANSIENT_FAILURE;
-    grpc_error* state_error =
-        GRPC_ERROR_CREATE_FROM_STATIC_STRING("No load balancing policy");
-    if (lb_policy_created) {
+  } else {
+    grpc_core::UniquePtr<char> lb_policy_name =
+        get_lb_policy_name_from_resolver_result_locked(chand);
+    // Check to see if we're already using the right LB policy.
+    // Note: It's safe to use chand->info_lb_policy_name here without
+    // taking a lock on chand->info_mu, because this function is the
+    // only thing that modifies its value, and it can only be invoked
+    // once at any given time.
+    bool lb_policy_name_changed = chand->info_lb_policy_name == nullptr ||
+                                  gpr_stricmp(chand->info_lb_policy_name.get(),
+                                              lb_policy_name.get()) != 0;
+    if (chand->lb_policy != nullptr && !lb_policy_name_changed) {
+      // Continue using the same LB policy.  Update with new addresses.
       if (grpc_client_channel_trace.enabled()) {
-        gpr_log(GPR_INFO, "chand=%p: initializing new LB policy", chand);
+        gpr_log(GPR_INFO, "chand=%p: updating existing LB policy \"%s\" (%p)",
+                chand, lb_policy_name.get(), chand->lb_policy.get());
       }
-      GRPC_ERROR_UNREF(state_error);
-      state = chand->lb_policy->CheckConnectivityLocked(&state_error);
-      grpc_pollset_set_add_pollset_set(chand->lb_policy->interested_parties(),
-                                       chand->interested_parties);
-      GRPC_CLOSURE_LIST_SCHED(&chand->waiting_for_resolver_result_closures);
-      if (chand->exit_idle_when_lb_policy_arrives) {
-        chand->lb_policy->ExitIdleLocked();
-        chand->exit_idle_when_lb_policy_arrives = false;
-      }
-      watch_lb_policy_locked(chand, chand->lb_policy.get(), state);
-    } else if (chand->resolver_result == nullptr) {
-      // Transient failure.
-      GRPC_CLOSURE_LIST_SCHED(&chand->waiting_for_resolver_result_closures);
-    }
-    if (!lb_policy_updated) {
-      set_channel_connectivity_state_locked(
-          chand, state, GRPC_ERROR_REF(state_error), "new_lb+resolver");
-    }
+      chand->lb_policy->UpdateLocked(*chand->resolver_result);
+      // No need to set the channel's connectivity state; the existing
+      // watch on the LB policy will take care of that.
+      set_connectivity_state = false;
+    } else {
+      // Instantiate new LB policy.
+      create_new_lb_policy_locked(chand, lb_policy_name.get(),
+                                  &connectivity_state, &connectivity_error);
+    }
+    // Find service config.
+    grpc_core::UniquePtr<char> service_config_json =
+        get_service_config_from_resolver_result_locked(chand);
+    // Swap out the data used by cc_get_channel_info().
+    gpr_mu_lock(&chand->info_mu);
+    chand->info_lb_policy_name = std::move(lb_policy_name);
+    chand->info_service_config_json = std::move(service_config_json);
+    gpr_mu_unlock(&chand->info_mu);
+    // Clean up.
     grpc_channel_args_destroy(chand->resolver_result);
     chand->resolver_result = nullptr;
-    chand->resolver->NextLocked(&chand->resolver_result,
-                                &chand->on_resolver_result_changed);
-    GRPC_ERROR_UNREF(state_error);
   }
+  // Set the channel's connectivity state if needed.
+  if (set_connectivity_state) {
+    set_channel_connectivity_state_locked(
+        chand, connectivity_state, connectivity_error, "resolver_result");
+  } else {
+    GRPC_ERROR_UNREF(connectivity_error);
+  }
+  // Invoke closures that were waiting for results and renew the watch.
+  GRPC_CLOSURE_LIST_SCHED(&chand->waiting_for_resolver_result_closures);
+  chand->resolver->NextLocked(&chand->resolver_result,
+                              &chand->on_resolver_result_changed);
 }
 
 static void start_transport_op_locked(void* arg, grpc_error* error_ignored) {
@@ -550,15 +570,32 @@ static void start_transport_op_locked(void* arg, grpc_error* error_ignored) {
 
   if (op->send_ping.on_initiate != nullptr || op->send_ping.on_ack != nullptr) {
     if (chand->lb_policy == nullptr) {
-      GRPC_CLOSURE_SCHED(
-          op->send_ping.on_initiate,
-          GRPC_ERROR_CREATE_FROM_STATIC_STRING("Ping with no load balancing"));
-      GRPC_CLOSURE_SCHED(
-          op->send_ping.on_ack,
-          GRPC_ERROR_CREATE_FROM_STATIC_STRING("Ping with no load balancing"));
+      grpc_error* error =
+          GRPC_ERROR_CREATE_FROM_STATIC_STRING("Ping with no load balancing");
+      GRPC_CLOSURE_SCHED(op->send_ping.on_initiate, GRPC_ERROR_REF(error));
+      GRPC_CLOSURE_SCHED(op->send_ping.on_ack, error);
     } else {
-      chand->lb_policy->PingOneLocked(op->send_ping.on_initiate,
-                                      op->send_ping.on_ack);
+      grpc_error* error = GRPC_ERROR_NONE;
+      grpc_core::LoadBalancingPolicy::PickState pick_state;
+      pick_state.initial_metadata = nullptr;
+      pick_state.initial_metadata_flags = 0;
+      pick_state.on_complete = nullptr;
+      memset(&pick_state.subchannel_call_context, 0,
+             sizeof(pick_state.subchannel_call_context));
+      pick_state.user_data = nullptr;
+      // Pick must return synchronously, because pick_state.on_complete is null.
+      GPR_ASSERT(chand->lb_policy->PickLocked(&pick_state, &error));
+      if (pick_state.connected_subchannel != nullptr) {
+        pick_state.connected_subchannel->Ping(op->send_ping.on_initiate,
+                                              op->send_ping.on_ack);
+      } else {
+        if (error == GRPC_ERROR_NONE) {
+          error = GRPC_ERROR_CREATE_FROM_STATIC_STRING(
+              "LB policy dropped call on ping");
+        }
+        GRPC_CLOSURE_SCHED(op->send_ping.on_initiate, GRPC_ERROR_REF(error));
+        GRPC_CLOSURE_SCHED(op->send_ping.on_ack, error);
+      }
       op->bind_pollset = nullptr;
     }
     op->send_ping.on_initiate = nullptr;
@@ -584,6 +621,17 @@ static void start_transport_op_locked(void* arg, grpc_error* error_ignored) {
     }
     GRPC_ERROR_UNREF(op->disconnect_with_error);
   }
+
+  if (op->reset_connect_backoff) {
+    if (chand->resolver != nullptr) {
+      chand->resolver->ResetBackoffLocked();
+      chand->resolver->RequestReresolutionLocked();
+    }
+    if (chand->lb_policy != nullptr) {
+      chand->lb_policy->ResetBackoffLocked();
+    }
+  }
+
   GRPC_CHANNEL_STACK_UNREF(chand->owning_stack, "start_transport_op");
 
   GRPC_CLOSURE_SCHED(op->on_consumed, GRPC_ERROR_NONE);
@@ -611,15 +659,11 @@ static void cc_get_channel_info(grpc_channel_element* elem,
   channel_data* chand = static_cast<channel_data*>(elem->channel_data);
   gpr_mu_lock(&chand->info_mu);
   if (info->lb_policy_name != nullptr) {
-    *info->lb_policy_name = chand->info_lb_policy_name == nullptr
-                                ? nullptr
-                                : gpr_strdup(chand->info_lb_policy_name);
+    *info->lb_policy_name = gpr_strdup(chand->info_lb_policy_name.get());
   }
   if (info->service_config_json != nullptr) {
     *info->service_config_json =
-        chand->info_service_config_json == nullptr
-            ? nullptr
-            : gpr_strdup(chand->info_service_config_json);
+        gpr_strdup(chand->info_service_config_json.get());
   }
   gpr_mu_unlock(&chand->info_mu);
 }
@@ -699,19 +743,15 @@ static grpc_error* cc_init_channel_elem(grpc_channel_element* elem,
   return GRPC_ERROR_NONE;
 }
 
-static void shutdown_resolver_locked(void* arg, grpc_error* error) {
-  grpc_core::Resolver* resolver = static_cast<grpc_core::Resolver*>(arg);
-  resolver->Orphan();
-}
-
 /* Destructor for channel_data */
 static void cc_destroy_channel_elem(grpc_channel_element* elem) {
   channel_data* chand = static_cast<channel_data*>(elem->channel_data);
   if (chand->resolver != nullptr) {
-    GRPC_CLOSURE_SCHED(
-        GRPC_CLOSURE_CREATE(shutdown_resolver_locked, chand->resolver.release(),
-                            grpc_combiner_scheduler(chand->combiner)),
-        GRPC_ERROR_NONE);
+    // The only way we can get here is if we never started resolving,
+    // because we take a ref to the channel stack when we start
+    // resolving and do not release it until the resolver callback is
+    // invoked after the resolver shuts down.
+    chand->resolver.reset();
   }
   if (chand->client_channel_factory != nullptr) {
     grpc_client_channel_factory_unref(chand->client_channel_factory);
@@ -721,8 +761,10 @@ static void cc_destroy_channel_elem(grpc_channel_element* elem) {
                                      chand->interested_parties);
     chand->lb_policy.reset();
   }
-  gpr_free(chand->info_lb_policy_name);
-  gpr_free(chand->info_service_config_json);
+  // TODO(roth): Once we convert the filter API to C++, there will no
+  // longer be any need to explicitly reset these smart pointer data members.
+  chand->info_lb_policy_name.reset();
+  chand->info_service_config_json.reset();
   chand->retry_throttle_data.reset();
   chand->method_params_table.reset();
   grpc_client_channel_stop_backup_polling(chand->interested_parties);
@@ -794,6 +836,15 @@ typedef struct {
   // The batch to use in the subchannel call.
   // Its payload field points to subchannel_call_retry_state.batch_payload.
   grpc_transport_stream_op_batch batch;
+  // For intercepting on_complete.
+  grpc_closure on_complete;
+} subchannel_batch_data;
+
+// Retry state associated with a subchannel call.
+// Stored in the parent_data of the subchannel call object.
+typedef struct {
+  // subchannel_batch_data.batch.payload points to this.
+  grpc_transport_stream_op_batch_payload batch_payload;
   // For send_initial_metadata.
   // Note that we need to make a copy of the initial metadata for each
   // subchannel call instead of just referring to the copy in call_data,
@@ -817,15 +868,7 @@ typedef struct {
   // For intercepting recv_trailing_metadata.
   grpc_metadata_batch recv_trailing_metadata;
   grpc_transport_stream_stats collect_stats;
-  // For intercepting on_complete.
-  grpc_closure on_complete;
-} subchannel_batch_data;
-
-// Retry state associated with a subchannel call.
-// Stored in the parent_data of the subchannel call object.
-typedef struct {
-  // subchannel_batch_data.batch.payload points to this.
-  grpc_transport_stream_op_batch_payload batch_payload;
+  grpc_closure recv_trailing_metadata_ready;
   // These fields indicate which ops have been started and completed on
   // this subchannel call.
   size_t started_send_message_count;
@@ -890,7 +933,13 @@ typedef struct client_channel_call_data {
   grpc_closure pick_closure;
   grpc_closure pick_cancel_closure;
 
+  // state needed to support channelz interception of recv trailing metadata.
+  grpc_closure recv_trailing_metadata_ready_channelz;
+  grpc_closure* original_recv_trailing_metadata;
+  grpc_metadata_batch* recv_trailing_metadata;
+
   grpc_polling_entity* pollent;
+  bool pollent_added_to_interested_parties;
 
   // Batches are added to this list when received from above.
   // They are removed when we are done handling the batch (i.e., when
@@ -911,6 +960,15 @@ typedef struct client_channel_call_data {
   grpc_core::ManualConstructor<grpc_core::BackOff> retry_backoff;
   grpc_timer retry_timer;
 
+  // The number of pending retriable subchannel batches containing send ops.
+  // We hold a ref to the call stack while this is non-zero, since replay
+  // batches may not complete until after all callbacks have been returned
+  // to the surface, and we need to make sure that the call is not destroyed
+  // until all of these batches have completed.
+  // Note that we actually only need to track replay batches, but it's
+  // easier to track all batches with send ops.
+  int num_pending_retriable_subchannel_send_batches;
+
   // Cached data for retrying send ops.
   // send_initial_metadata
   bool seen_send_initial_metadata;
@@ -940,8 +998,9 @@ static void retry_commit(grpc_call_element* elem,
 static void start_internal_recv_trailing_metadata(grpc_call_element* elem);
 static void on_complete(void* arg, grpc_error* error);
 static void start_retriable_subchannel_batches(void* arg, grpc_error* ignored);
-static void pick_after_resolver_result_start_locked(grpc_call_element* elem);
 static void start_pick_locked(void* arg, grpc_error* ignored);
+static void maybe_intercept_recv_trailing_metadata_for_channelz(
+    grpc_call_element* elem, grpc_transport_stream_op_batch* batch);
 
 //
 // send op data caching
@@ -1114,7 +1173,8 @@ static void pending_batches_add(grpc_call_element* elem,
     if (batch->send_trailing_metadata) {
       calld->pending_send_trailing_metadata = true;
     }
-    if (calld->bytes_buffered_for_retry > chand->per_rpc_retry_buffer_size) {
+    if (GPR_UNLIKELY(calld->bytes_buffered_for_retry >
+                     chand->per_rpc_retry_buffer_size)) {
       if (grpc_client_channel_trace.enabled()) {
         gpr_log(GPR_INFO,
                 "chand=%p calld=%p: exceeded retry buffer size, committing",
@@ -1182,35 +1242,24 @@ static void pending_batches_fail(grpc_call_element* elem, grpc_error* error,
             "chand=%p calld=%p: failing %" PRIuPTR " pending batches: %s",
             elem->channel_data, calld, num_batches, grpc_error_string(error));
   }
-  grpc_transport_stream_op_batch*
-      batches[GPR_ARRAY_SIZE(calld->pending_batches)];
-  size_t num_batches = 0;
+  grpc_core::CallCombinerClosureList closures;
   for (size_t i = 0; i < GPR_ARRAY_SIZE(calld->pending_batches); ++i) {
     pending_batch* pending = &calld->pending_batches[i];
     grpc_transport_stream_op_batch* batch = pending->batch;
     if (batch != nullptr) {
-      batches[num_batches++] = batch;
+      batch->handler_private.extra_arg = calld;
+      GRPC_CLOSURE_INIT(&batch->handler_private.closure,
+                        fail_pending_batch_in_call_combiner, batch,
+                        grpc_schedule_on_exec_ctx);
+      closures.Add(&batch->handler_private.closure, GRPC_ERROR_REF(error),
+                   "pending_batches_fail");
       pending_batch_clear(calld, pending);
     }
   }
-  for (size_t i = yield_call_combiner ? 1 : 0; i < num_batches; ++i) {
-    grpc_transport_stream_op_batch* batch = batches[i];
-    batch->handler_private.extra_arg = calld;
-    GRPC_CLOSURE_INIT(&batch->handler_private.closure,
-                      fail_pending_batch_in_call_combiner, batch,
-                      grpc_schedule_on_exec_ctx);
-    GRPC_CALL_COMBINER_START(calld->call_combiner,
-                             &batch->handler_private.closure,
-                             GRPC_ERROR_REF(error), "pending_batches_fail");
-  }
   if (yield_call_combiner) {
-    if (num_batches > 0) {
-      // Note: This will release the call combiner.
-      grpc_transport_stream_op_batch_finish_with_failure(
-          batches[0], GRPC_ERROR_REF(error), calld->call_combiner);
-    } else {
-      GRPC_CALL_COMBINER_STOP(calld->call_combiner, "pending_batches_fail");
-    }
+    closures.RunClosures(calld->call_combiner);
+  } else {
+    closures.RunClosuresWithoutYielding(calld->call_combiner);
   }
   GRPC_ERROR_UNREF(error);
 }
@@ -1245,30 +1294,23 @@ static void pending_batches_resume(grpc_call_element* elem) {
             " pending batches on subchannel_call=%p",
             chand, calld, num_batches, calld->subchannel_call);
   }
-  grpc_transport_stream_op_batch*
-      batches[GPR_ARRAY_SIZE(calld->pending_batches)];
-  size_t num_batches = 0;
+  grpc_core::CallCombinerClosureList closures;
   for (size_t i = 0; i < GPR_ARRAY_SIZE(calld->pending_batches); ++i) {
     pending_batch* pending = &calld->pending_batches[i];
     grpc_transport_stream_op_batch* batch = pending->batch;
     if (batch != nullptr) {
-      batches[num_batches++] = batch;
+      maybe_intercept_recv_trailing_metadata_for_channelz(elem, batch);
+      batch->handler_private.extra_arg = calld->subchannel_call;
+      GRPC_CLOSURE_INIT(&batch->handler_private.closure,
+                        resume_pending_batch_in_call_combiner, batch,
+                        grpc_schedule_on_exec_ctx);
+      closures.Add(&batch->handler_private.closure, GRPC_ERROR_NONE,
+                   "pending_batches_resume");
       pending_batch_clear(calld, pending);
     }
   }
-  for (size_t i = 1; i < num_batches; ++i) {
-    grpc_transport_stream_op_batch* batch = batches[i];
-    batch->handler_private.extra_arg = calld->subchannel_call;
-    GRPC_CLOSURE_INIT(&batch->handler_private.closure,
-                      resume_pending_batch_in_call_combiner, batch,
-                      grpc_schedule_on_exec_ctx);
-    GRPC_CALL_COMBINER_START(calld->call_combiner,
-                             &batch->handler_private.closure, GRPC_ERROR_NONE,
-                             "pending_batches_resume");
-  }
-  GPR_ASSERT(num_batches > 0);
   // Note: This will release the call combiner.
-  grpc_subchannel_call_process_op(calld->subchannel_call, batches[0]);
+  closures.RunClosures(calld->call_combiner);
 }
 
 static void maybe_clear_pending_batch(grpc_call_element* elem,
@@ -1283,7 +1325,10 @@ static void maybe_clear_pending_batch(grpc_call_element* elem,
        batch->payload->recv_initial_metadata.recv_initial_metadata_ready ==
            nullptr) &&
       (!batch->recv_message ||
-       batch->payload->recv_message.recv_message_ready == nullptr)) {
+       batch->payload->recv_message.recv_message_ready == nullptr) &&
+      (!batch->recv_trailing_metadata ||
+       batch->payload->recv_trailing_metadata.recv_trailing_metadata_ready ==
+           nullptr)) {
     if (grpc_client_channel_trace.enabled()) {
       gpr_log(GPR_INFO, "chand=%p calld=%p: clearing pending batch", chand,
               calld);
@@ -1292,75 +1337,27 @@ static void maybe_clear_pending_batch(grpc_call_element* elem,
   }
 }
 
-// Returns true if all ops in the pending batch have been completed.
-static bool pending_batch_is_completed(
-    pending_batch* pending, call_data* calld,
-    subchannel_call_retry_state* retry_state) {
-  if (pending->batch == nullptr || pending->batch->on_complete == nullptr) {
-    return false;
-  }
-  if (pending->batch->send_initial_metadata &&
-      !retry_state->completed_send_initial_metadata) {
-    return false;
-  }
-  if (pending->batch->send_message &&
-      retry_state->completed_send_message_count <
-          calld->send_messages->size()) {
-    return false;
-  }
-  if (pending->batch->send_trailing_metadata &&
-      !retry_state->completed_send_trailing_metadata) {
-    return false;
-  }
-  if (pending->batch->recv_initial_metadata &&
-      !retry_state->completed_recv_initial_metadata) {
-    return false;
-  }
-  if (pending->batch->recv_message &&
-      retry_state->completed_recv_message_count <
-          retry_state->started_recv_message_count) {
-    return false;
-  }
-  if (pending->batch->recv_trailing_metadata &&
-      !retry_state->completed_recv_trailing_metadata) {
-    return false;
-  }
-  return true;
-}
-
-// Returns true if any op in the batch was not yet started.
-static bool pending_batch_is_unstarted(
-    pending_batch* pending, call_data* calld,
-    subchannel_call_retry_state* retry_state) {
-  if (pending->batch == nullptr || pending->batch->on_complete == nullptr) {
-    return false;
-  }
-  if (pending->batch->send_initial_metadata &&
-      !retry_state->started_send_initial_metadata) {
-    return true;
-  }
-  if (pending->batch->send_message &&
-      retry_state->started_send_message_count < calld->send_messages->size()) {
-    return true;
-  }
-  if (pending->batch->send_trailing_metadata &&
-      !retry_state->started_send_trailing_metadata) {
-    return true;
-  }
-  if (pending->batch->recv_initial_metadata &&
-      !retry_state->started_recv_initial_metadata) {
-    return true;
-  }
-  if (pending->batch->recv_message &&
-      retry_state->completed_recv_message_count ==
-          retry_state->started_recv_message_count) {
-    return true;
-  }
-  if (pending->batch->recv_trailing_metadata &&
-      !retry_state->started_recv_trailing_metadata) {
-    return true;
+// Returns a pointer to the first pending batch for which predicate(batch)
+// returns true, or null if not found.
+template <typename Predicate>
+static pending_batch* pending_batch_find(grpc_call_element* elem,
+                                         const char* log_message,
+                                         Predicate predicate) {
+  channel_data* chand = static_cast<channel_data*>(elem->channel_data);
+  call_data* calld = static_cast<call_data*>(elem->call_data);
+  for (size_t i = 0; i < GPR_ARRAY_SIZE(calld->pending_batches); ++i) {
+    pending_batch* pending = &calld->pending_batches[i];
+    grpc_transport_stream_op_batch* batch = pending->batch;
+    if (batch != nullptr && predicate(batch)) {
+      if (grpc_client_channel_trace.enabled()) {
+        gpr_log(GPR_INFO,
+                "chand=%p calld=%p: %s pending batch at index %" PRIuPTR, chand,
+                calld, log_message, i);
+      }
+      return pending;
+    }
   }
-  return false;
+  return nullptr;
 }
 
 //
@@ -1421,7 +1418,7 @@ static void do_retry(grpc_call_element* elem,
   }
   if (grpc_client_channel_trace.enabled()) {
     gpr_log(GPR_INFO,
-            "chand=%p calld=%p: retrying failed call in %" PRIuPTR " ms", chand,
+            "chand=%p calld=%p: retrying failed call in %" PRId64 " ms", chand,
             calld, next_attempt_time - grpc_core::ExecCtx::Get()->Now());
   }
   // Schedule retry after computed delay.
@@ -1461,7 +1458,7 @@ static bool maybe_retry(grpc_call_element* elem,
     }
   }
   // Check status.
-  if (status == GRPC_STATUS_OK) {
+  if (GPR_LIKELY(status == GRPC_STATUS_OK)) {
     if (calld->retry_throttle_data != nullptr) {
       calld->retry_throttle_data->RecordSuccess();
     }
@@ -1547,8 +1544,13 @@ static bool maybe_retry(grpc_call_element* elem,
 // subchannel_batch_data
 //
 
+// Creates a subchannel_batch_data object on the call's arena with the
+// specified refcount.  If set_on_complete is true, the batch's
+// on_complete callback will be set to point to on_complete();
+// otherwise, the batch's on_complete callback will be null.
 static subchannel_batch_data* batch_data_create(grpc_call_element* elem,
-                                                int refcount) {
+                                                int refcount,
+                                                bool set_on_complete) {
   call_data* calld = static_cast<call_data*>(elem->call_data);
   subchannel_call_retry_state* retry_state =
       static_cast<subchannel_call_retry_state*>(
@@ -1561,26 +1563,32 @@ static subchannel_batch_data* batch_data_create(grpc_call_element* elem,
       GRPC_SUBCHANNEL_CALL_REF(calld->subchannel_call, "batch_data_create");
   batch_data->batch.payload = &retry_state->batch_payload;
   gpr_ref_init(&batch_data->refs, refcount);
-  GRPC_CLOSURE_INIT(&batch_data->on_complete, on_complete, batch_data,
-                    grpc_schedule_on_exec_ctx);
-  batch_data->batch.on_complete = &batch_data->on_complete;
+  if (set_on_complete) {
+    GRPC_CLOSURE_INIT(&batch_data->on_complete, on_complete, batch_data,
+                      grpc_schedule_on_exec_ctx);
+    batch_data->batch.on_complete = &batch_data->on_complete;
+  }
   GRPC_CALL_STACK_REF(calld->owning_call, "batch_data");
   return batch_data;
 }
 
 static void batch_data_unref(subchannel_batch_data* batch_data) {
   if (gpr_unref(&batch_data->refs)) {
-    if (batch_data->send_initial_metadata_storage != nullptr) {
-      grpc_metadata_batch_destroy(&batch_data->send_initial_metadata);
+    subchannel_call_retry_state* retry_state =
+        static_cast<subchannel_call_retry_state*>(
+            grpc_connected_subchannel_call_get_parent_data(
+                batch_data->subchannel_call));
+    if (batch_data->batch.send_initial_metadata) {
+      grpc_metadata_batch_destroy(&retry_state->send_initial_metadata);
     }
-    if (batch_data->send_trailing_metadata_storage != nullptr) {
-      grpc_metadata_batch_destroy(&batch_data->send_trailing_metadata);
+    if (batch_data->batch.send_trailing_metadata) {
+      grpc_metadata_batch_destroy(&retry_state->send_trailing_metadata);
     }
     if (batch_data->batch.recv_initial_metadata) {
-      grpc_metadata_batch_destroy(&batch_data->recv_initial_metadata);
+      grpc_metadata_batch_destroy(&retry_state->recv_initial_metadata);
     }
     if (batch_data->batch.recv_trailing_metadata) {
-      grpc_metadata_batch_destroy(&batch_data->recv_trailing_metadata);
+      grpc_metadata_batch_destroy(&retry_state->recv_trailing_metadata);
     }
     GRPC_SUBCHANNEL_CALL_UNREF(batch_data->subchannel_call, "batch_data_unref");
     call_data* calld = static_cast<call_data*>(batch_data->elem->call_data);
@@ -1596,30 +1604,22 @@ static void batch_data_unref(subchannel_batch_data* batch_data) {
 static void invoke_recv_initial_metadata_callback(void* arg,
                                                   grpc_error* error) {
   subchannel_batch_data* batch_data = static_cast<subchannel_batch_data*>(arg);
-  channel_data* chand =
-      static_cast<channel_data*>(batch_data->elem->channel_data);
-  call_data* calld = static_cast<call_data*>(batch_data->elem->call_data);
   // Find pending batch.
-  pending_batch* pending = nullptr;
-  for (size_t i = 0; i < GPR_ARRAY_SIZE(calld->pending_batches); ++i) {
-    grpc_transport_stream_op_batch* batch = calld->pending_batches[i].batch;
-    if (batch != nullptr && batch->recv_initial_metadata &&
-        batch->payload->recv_initial_metadata.recv_initial_metadata_ready !=
-            nullptr) {
-      if (grpc_client_channel_trace.enabled()) {
-        gpr_log(GPR_INFO,
-                "chand=%p calld=%p: invoking recv_initial_metadata_ready for "
-                "pending batch at index %" PRIuPTR,
-                chand, calld, i);
-      }
-      pending = &calld->pending_batches[i];
-      break;
-    }
-  }
+  pending_batch* pending = pending_batch_find(
+      batch_data->elem, "invoking recv_initial_metadata_ready for",
+      [](grpc_transport_stream_op_batch* batch) {
+        return batch->recv_initial_metadata &&
+               batch->payload->recv_initial_metadata
+                       .recv_initial_metadata_ready != nullptr;
+      });
   GPR_ASSERT(pending != nullptr);
   // Return metadata.
+  subchannel_call_retry_state* retry_state =
+      static_cast<subchannel_call_retry_state*>(
+          grpc_connected_subchannel_call_get_parent_data(
+              batch_data->subchannel_call));
   grpc_metadata_batch_move(
-      &batch_data->recv_initial_metadata,
+      &retry_state->recv_initial_metadata,
       pending->batch->payload->recv_initial_metadata.recv_initial_metadata);
   // Update bookkeeping.
   // Note: Need to do this before invoking the callback, since invoking
@@ -1651,12 +1651,22 @@ static void recv_initial_metadata_ready(void* arg, grpc_error* error) {
       static_cast<subchannel_call_retry_state*>(
           grpc_connected_subchannel_call_get_parent_data(
               batch_data->subchannel_call));
+  retry_state->completed_recv_initial_metadata = true;
+  // If a retry was already dispatched, then we're not going to use the
+  // result of this recv_initial_metadata op, so do nothing.
+  if (retry_state->retry_dispatched) {
+    GRPC_CALL_COMBINER_STOP(
+        calld->call_combiner,
+        "recv_initial_metadata_ready after retry dispatched");
+    return;
+  }
   // If we got an error or a Trailers-Only response and have not yet gotten
-  // the recv_trailing_metadata on_complete callback, then defer
-  // propagating this callback back to the surface.  We can evaluate whether
-  // to retry when recv_trailing_metadata comes back.
-  if ((batch_data->trailing_metadata_available || error != GRPC_ERROR_NONE) &&
-      !retry_state->completed_recv_trailing_metadata) {
+  // the recv_trailing_metadata_ready callback, then defer propagating this
+  // callback back to the surface.  We can evaluate whether to retry when
+  // recv_trailing_metadata comes back.
+  if (GPR_UNLIKELY((retry_state->trailing_metadata_available ||
+                    error != GRPC_ERROR_NONE) &&
+                   !retry_state->completed_recv_trailing_metadata)) {
     if (grpc_client_channel_trace.enabled()) {
       gpr_log(GPR_INFO,
               "chand=%p calld=%p: deferring recv_initial_metadata_ready "
@@ -1678,9 +1688,9 @@ static void recv_initial_metadata_ready(void* arg, grpc_error* error) {
   }
   // Received valid initial metadata, so commit the call.
   retry_commit(elem, retry_state);
+  // Invoke the callback to return the result to the surface.
   // Manually invoking a callback function; it does not take ownership of error.
   invoke_recv_initial_metadata_callback(batch_data, error);
-  GRPC_ERROR_UNREF(error);
 }
 
 //
@@ -1690,29 +1700,21 @@ static void recv_initial_metadata_ready(void* arg, grpc_error* error) {
 // Invokes recv_message_ready for a subchannel batch.
 static void invoke_recv_message_callback(void* arg, grpc_error* error) {
   subchannel_batch_data* batch_data = static_cast<subchannel_batch_data*>(arg);
-  channel_data* chand =
-      static_cast<channel_data*>(batch_data->elem->channel_data);
-  call_data* calld = static_cast<call_data*>(batch_data->elem->call_data);
   // Find pending op.
-  pending_batch* pending = nullptr;
-  for (size_t i = 0; i < GPR_ARRAY_SIZE(calld->pending_batches); ++i) {
-    grpc_transport_stream_op_batch* batch = calld->pending_batches[i].batch;
-    if (batch != nullptr && batch->recv_message &&
-        batch->payload->recv_message.recv_message_ready != nullptr) {
-      if (grpc_client_channel_trace.enabled()) {
-        gpr_log(GPR_INFO,
-                "chand=%p calld=%p: invoking recv_message_ready for "
-                "pending batch at index %" PRIuPTR,
-                chand, calld, i);
-      }
-      pending = &calld->pending_batches[i];
-      break;
-    }
-  }
+  pending_batch* pending = pending_batch_find(
+      batch_data->elem, "invoking recv_message_ready for",
+      [](grpc_transport_stream_op_batch* batch) {
+        return batch->recv_message &&
+               batch->payload->recv_message.recv_message_ready != nullptr;
+      });
   GPR_ASSERT(pending != nullptr);
   // Return payload.
+  subchannel_call_retry_state* retry_state =
+      static_cast<subchannel_call_retry_state*>(
+          grpc_connected_subchannel_call_get_parent_data(
+              batch_data->subchannel_call));
   *pending->batch->payload->recv_message.recv_message =
-      std::move(batch_data->recv_message);
+      std::move(retry_state->recv_message);
   // Update bookkeeping.
   // Note: Need to do this before invoking the callback, since invoking
   // the callback will result in yielding the call combiner.
@@ -1740,12 +1742,21 @@ static void recv_message_ready(void* arg, grpc_error* error) {
       static_cast<subchannel_call_retry_state*>(
           grpc_connected_subchannel_call_get_parent_data(
               batch_data->subchannel_call));
+  ++retry_state->completed_recv_message_count;
+  // If a retry was already dispatched, then we're not going to use the
+  // result of this recv_message op, so do nothing.
+  if (retry_state->retry_dispatched) {
+    GRPC_CALL_COMBINER_STOP(calld->call_combiner,
+                            "recv_message_ready after retry dispatched");
+    return;
+  }
   // If we got an error or the payload was nullptr and we have not yet gotten
-  // the recv_trailing_metadata on_complete callback, then defer
-  // propagating this callback back to the surface.  We can evaluate whether
-  // to retry when recv_trailing_metadata comes back.
-  if ((batch_data->recv_message == nullptr || error != GRPC_ERROR_NONE) &&
-      !retry_state->completed_recv_trailing_metadata) {
+  // the recv_trailing_metadata_ready callback, then defer propagating this
+  // callback back to the surface.  We can evaluate whether to retry when
+  // recv_trailing_metadata comes back.
+  if (GPR_UNLIKELY(
+          (retry_state->recv_message == nullptr || error != GRPC_ERROR_NONE) &&
+          !retry_state->completed_recv_trailing_metadata)) {
     if (grpc_client_channel_trace.enabled()) {
       gpr_log(GPR_INFO,
               "chand=%p calld=%p: deferring recv_message_ready (nullptr "
@@ -1765,131 +1776,282 @@ static void recv_message_ready(void* arg, grpc_error* error) {
   }
   // Received a valid message, so commit the call.
   retry_commit(elem, retry_state);
+  // Invoke the callback to return the result to the surface.
   // Manually invoking a callback function; it does not take ownership of error.
   invoke_recv_message_callback(batch_data, error);
-  GRPC_ERROR_UNREF(error);
 }
 
 //
-// list of closures to execute in call combiner
+// recv_trailing_metadata handling
 //
 
-// Represents a closure that needs to run in the call combiner as part of
-// starting or completing a batch.
-typedef struct {
-  grpc_closure* closure;
-  grpc_error* error;
-  const char* reason;
-  bool free_reason = false;
-} closure_to_execute;
-
-static void execute_closures_in_call_combiner(grpc_call_element* elem,
-                                              const char* caller,
-                                              closure_to_execute* closures,
-                                              size_t num_closures) {
-  channel_data* chand = static_cast<channel_data*>(elem->channel_data);
+// Sets *status and *server_pushback_md based on md_batch and error.
+// Only sets *server_pushback_md if server_pushback_md != nullptr.
+static void get_call_status(grpc_call_element* elem,
+                            grpc_metadata_batch* md_batch, grpc_error* error,
+                            grpc_status_code* status,
+                            grpc_mdelem** server_pushback_md) {
   call_data* calld = static_cast<call_data*>(elem->call_data);
-  // Note that the call combiner will be yielded for each closure that
-  // we schedule.  We're already running in the call combiner, so one of
-  // the closures can be scheduled directly, but the others will
-  // have to re-enter the call combiner.
-  if (num_closures > 0) {
-    if (grpc_client_channel_trace.enabled()) {
-      gpr_log(GPR_INFO, "chand=%p calld=%p: %s starting closure: %s", chand,
-              calld, caller, closures[0].reason);
-    }
-    GRPC_CLOSURE_SCHED(closures[0].closure, closures[0].error);
-    if (closures[0].free_reason) {
-      gpr_free(const_cast<char*>(closures[0].reason));
-    }
-    for (size_t i = 1; i < num_closures; ++i) {
-      if (grpc_client_channel_trace.enabled()) {
-        gpr_log(GPR_INFO,
-                "chand=%p calld=%p: %s starting closure in call combiner: %s",
-                chand, calld, caller, closures[i].reason);
-      }
-      GRPC_CALL_COMBINER_START(calld->call_combiner, closures[i].closure,
-                               closures[i].error, closures[i].reason);
-      if (closures[i].free_reason) {
-        gpr_free(const_cast<char*>(closures[i].reason));
-      }
-    }
+  if (error != GRPC_ERROR_NONE) {
+    grpc_error_get_status(error, calld->deadline, status, nullptr, nullptr,
+                          nullptr);
   } else {
-    if (grpc_client_channel_trace.enabled()) {
-      gpr_log(GPR_INFO, "chand=%p calld=%p: no closures to run for %s", chand,
-              calld, caller);
+    GPR_ASSERT(md_batch->idx.named.grpc_status != nullptr);
+    *status =
+        grpc_get_status_code_from_metadata(md_batch->idx.named.grpc_status->md);
+    if (server_pushback_md != nullptr &&
+        md_batch->idx.named.grpc_retry_pushback_ms != nullptr) {
+      *server_pushback_md = &md_batch->idx.named.grpc_retry_pushback_ms->md;
     }
-    GRPC_CALL_COMBINER_STOP(calld->call_combiner, "no closures to run");
   }
+  GRPC_ERROR_UNREF(error);
 }
 
-//
-// on_complete callback handling
-//
-
-// Updates retry_state to reflect the ops completed in batch_data.
-static void update_retry_state_for_completed_batch(
-    subchannel_batch_data* batch_data,
-    subchannel_call_retry_state* retry_state) {
-  if (batch_data->batch.send_initial_metadata) {
-    retry_state->completed_send_initial_metadata = true;
-  }
-  if (batch_data->batch.send_message) {
-    ++retry_state->completed_send_message_count;
-  }
-  if (batch_data->batch.send_trailing_metadata) {
-    retry_state->completed_send_trailing_metadata = true;
-  }
-  if (batch_data->batch.recv_initial_metadata) {
-    retry_state->completed_recv_initial_metadata = true;
-  }
-  if (batch_data->batch.recv_message) {
-    ++retry_state->completed_recv_message_count;
-  }
-  if (batch_data->batch.recv_trailing_metadata) {
-    retry_state->completed_recv_trailing_metadata = true;
+// Adds recv_trailing_metadata_ready closure to closures.
+static void add_closure_for_recv_trailing_metadata_ready(
+    grpc_call_element* elem, subchannel_batch_data* batch_data,
+    grpc_error* error, grpc_core::CallCombinerClosureList* closures) {
+  // Find pending batch.
+  pending_batch* pending = pending_batch_find(
+      elem, "invoking recv_trailing_metadata for",
+      [](grpc_transport_stream_op_batch* batch) {
+        return batch->recv_trailing_metadata &&
+               batch->payload->recv_trailing_metadata
+                       .recv_trailing_metadata_ready != nullptr;
+      });
+  // If we generated the recv_trailing_metadata op internally via
+  // start_internal_recv_trailing_metadata(), then there will be no
+  // pending batch.
+  if (pending == nullptr) {
+    GRPC_ERROR_UNREF(error);
+    return;
   }
+  // Return metadata.
+  subchannel_call_retry_state* retry_state =
+      static_cast<subchannel_call_retry_state*>(
+          grpc_connected_subchannel_call_get_parent_data(
+              batch_data->subchannel_call));
+  grpc_metadata_batch_move(
+      &retry_state->recv_trailing_metadata,
+      pending->batch->payload->recv_trailing_metadata.recv_trailing_metadata);
+  // Add closure.
+  closures->Add(pending->batch->payload->recv_trailing_metadata
+                    .recv_trailing_metadata_ready,
+                error, "recv_trailing_metadata_ready for pending batch");
+  // Update bookkeeping.
+  pending->batch->payload->recv_trailing_metadata.recv_trailing_metadata_ready =
+      nullptr;
+  maybe_clear_pending_batch(elem, pending);
 }
 
 // Adds any necessary closures for deferred recv_initial_metadata and
-// recv_message callbacks to closures, updating *num_closures as needed.
+// recv_message callbacks to closures.
 static void add_closures_for_deferred_recv_callbacks(
     subchannel_batch_data* batch_data, subchannel_call_retry_state* retry_state,
-    closure_to_execute* closures, size_t* num_closures) {
+    grpc_core::CallCombinerClosureList* closures) {
   if (batch_data->batch.recv_trailing_metadata) {
     // Add closure for deferred recv_initial_metadata_ready.
-    if (retry_state->recv_initial_metadata_ready_deferred_batch != nullptr) {
-      closure_to_execute* closure = &closures[(*num_closures)++];
-      closure->closure = GRPC_CLOSURE_INIT(
-          &batch_data->recv_initial_metadata_ready,
-          invoke_recv_initial_metadata_callback,
-          retry_state->recv_initial_metadata_ready_deferred_batch,
-          grpc_schedule_on_exec_ctx);
-      closure->error = retry_state->recv_initial_metadata_error;
-      closure->reason = "resuming recv_initial_metadata_ready";
+    if (GPR_UNLIKELY(retry_state->recv_initial_metadata_ready_deferred_batch !=
+                     nullptr)) {
+      GRPC_CLOSURE_INIT(&retry_state->recv_initial_metadata_ready,
+                        invoke_recv_initial_metadata_callback,
+                        retry_state->recv_initial_metadata_ready_deferred_batch,
+                        grpc_schedule_on_exec_ctx);
+      closures->Add(&retry_state->recv_initial_metadata_ready,
+                    retry_state->recv_initial_metadata_error,
+                    "resuming recv_initial_metadata_ready");
       retry_state->recv_initial_metadata_ready_deferred_batch = nullptr;
     }
     // Add closure for deferred recv_message_ready.
-    if (retry_state->recv_message_ready_deferred_batch != nullptr) {
-      closure_to_execute* closure = &closures[(*num_closures)++];
-      closure->closure = GRPC_CLOSURE_INIT(
-          &batch_data->recv_message_ready, invoke_recv_message_callback,
-          retry_state->recv_message_ready_deferred_batch,
-          grpc_schedule_on_exec_ctx);
-      closure->error = retry_state->recv_message_error;
-      closure->reason = "resuming recv_message_ready";
+    if (GPR_UNLIKELY(retry_state->recv_message_ready_deferred_batch !=
+                     nullptr)) {
+      GRPC_CLOSURE_INIT(&retry_state->recv_message_ready,
+                        invoke_recv_message_callback,
+                        retry_state->recv_message_ready_deferred_batch,
+                        grpc_schedule_on_exec_ctx);
+      closures->Add(&retry_state->recv_message_ready,
+                    retry_state->recv_message_error,
+                    "resuming recv_message_ready");
       retry_state->recv_message_ready_deferred_batch = nullptr;
     }
   }
 }
 
+// Returns true if any op in the batch was not yet started.
+// Only looks at send ops, since recv ops are always started immediately.
+static bool pending_batch_is_unstarted(
+    pending_batch* pending, call_data* calld,
+    subchannel_call_retry_state* retry_state) {
+  if (pending->batch == nullptr || pending->batch->on_complete == nullptr) {
+    return false;
+  }
+  if (pending->batch->send_initial_metadata &&
+      !retry_state->started_send_initial_metadata) {
+    return true;
+  }
+  if (pending->batch->send_message &&
+      retry_state->started_send_message_count < calld->send_messages->size()) {
+    return true;
+  }
+  if (pending->batch->send_trailing_metadata &&
+      !retry_state->started_send_trailing_metadata) {
+    return true;
+  }
+  return false;
+}
+
+// For any pending batch containing an op that has not yet been started,
+// adds the pending batch's completion closures to closures.
+static void add_closures_to_fail_unstarted_pending_batches(
+    grpc_call_element* elem, subchannel_call_retry_state* retry_state,
+    grpc_error* error, grpc_core::CallCombinerClosureList* closures) {
+  channel_data* chand = static_cast<channel_data*>(elem->channel_data);
+  call_data* calld = static_cast<call_data*>(elem->call_data);
+  for (size_t i = 0; i < GPR_ARRAY_SIZE(calld->pending_batches); ++i) {
+    pending_batch* pending = &calld->pending_batches[i];
+    if (pending_batch_is_unstarted(pending, calld, retry_state)) {
+      if (grpc_client_channel_trace.enabled()) {
+        gpr_log(GPR_INFO,
+                "chand=%p calld=%p: failing unstarted pending batch at index "
+                "%" PRIuPTR,
+                chand, calld, i);
+      }
+      closures->Add(pending->batch->on_complete, GRPC_ERROR_REF(error),
+                    "failing on_complete for pending batch");
+      pending->batch->on_complete = nullptr;
+      maybe_clear_pending_batch(elem, pending);
+    }
+  }
+  GRPC_ERROR_UNREF(error);
+}
+
+// Runs necessary closures upon completion of a call attempt.
+static void run_closures_for_completed_call(subchannel_batch_data* batch_data,
+                                            grpc_error* error) {
+  grpc_call_element* elem = batch_data->elem;
+  call_data* calld = static_cast<call_data*>(elem->call_data);
+  subchannel_call_retry_state* retry_state =
+      static_cast<subchannel_call_retry_state*>(
+          grpc_connected_subchannel_call_get_parent_data(
+              batch_data->subchannel_call));
+  // Construct list of closures to execute.
+  grpc_core::CallCombinerClosureList closures;
+  // First, add closure for recv_trailing_metadata_ready.
+  add_closure_for_recv_trailing_metadata_ready(
+      elem, batch_data, GRPC_ERROR_REF(error), &closures);
+  // If there are deferred recv_initial_metadata_ready or recv_message_ready
+  // callbacks, add them to closures.
+  add_closures_for_deferred_recv_callbacks(batch_data, retry_state, &closures);
+  // Add closures to fail any pending batches that have not yet been started.
+  add_closures_to_fail_unstarted_pending_batches(
+      elem, retry_state, GRPC_ERROR_REF(error), &closures);
+  // Don't need batch_data anymore.
+  batch_data_unref(batch_data);
+  // Schedule all of the closures identified above.
+  // Note: This will release the call combiner.
+  closures.RunClosures(calld->call_combiner);
+  GRPC_ERROR_UNREF(error);
+}
+
+// Intercepts recv_trailing_metadata_ready callback for retries.
+// Commits the call and returns the trailing metadata up the stack.
+static void recv_trailing_metadata_ready(void* arg, grpc_error* error) {
+  subchannel_batch_data* batch_data = static_cast<subchannel_batch_data*>(arg);
+  grpc_call_element* elem = batch_data->elem;
+  channel_data* chand = static_cast<channel_data*>(elem->channel_data);
+  call_data* calld = static_cast<call_data*>(elem->call_data);
+  if (grpc_client_channel_trace.enabled()) {
+    gpr_log(GPR_INFO,
+            "chand=%p calld=%p: got recv_trailing_metadata_ready, error=%s",
+            chand, calld, grpc_error_string(error));
+  }
+  subchannel_call_retry_state* retry_state =
+      static_cast<subchannel_call_retry_state*>(
+          grpc_connected_subchannel_call_get_parent_data(
+              batch_data->subchannel_call));
+  retry_state->completed_recv_trailing_metadata = true;
+  // Get the call's status and check for server pushback metadata.
+  grpc_status_code status = GRPC_STATUS_OK;
+  grpc_mdelem* server_pushback_md = nullptr;
+  grpc_metadata_batch* md_batch =
+      batch_data->batch.payload->recv_trailing_metadata.recv_trailing_metadata;
+  get_call_status(elem, md_batch, GRPC_ERROR_REF(error), &status,
+                  &server_pushback_md);
+  grpc_core::channelz::SubchannelNode* channelz_subchannel =
+      calld->pick.connected_subchannel->channelz_subchannel();
+  if (channelz_subchannel != nullptr) {
+    if (status == GRPC_STATUS_OK) {
+      channelz_subchannel->RecordCallSucceeded();
+    } else {
+      channelz_subchannel->RecordCallFailed();
+    }
+  }
+  if (grpc_client_channel_trace.enabled()) {
+    gpr_log(GPR_INFO, "chand=%p calld=%p: call finished, status=%s", chand,
+            calld, grpc_status_code_to_string(status));
+  }
+  // Check if we should retry.
+  if (maybe_retry(elem, batch_data, status, server_pushback_md)) {
+    // Unref batch_data for deferred recv_initial_metadata_ready or
+    // recv_message_ready callbacks, if any.
+    if (retry_state->recv_initial_metadata_ready_deferred_batch != nullptr) {
+      batch_data_unref(batch_data);
+      GRPC_ERROR_UNREF(retry_state->recv_initial_metadata_error);
+    }
+    if (retry_state->recv_message_ready_deferred_batch != nullptr) {
+      batch_data_unref(batch_data);
+      GRPC_ERROR_UNREF(retry_state->recv_message_error);
+    }
+    batch_data_unref(batch_data);
+    return;
+  }
+  // Not retrying, so commit the call.
+  retry_commit(elem, retry_state);
+  // Run any necessary closures.
+  run_closures_for_completed_call(batch_data, GRPC_ERROR_REF(error));
+}
+
+//
+// on_complete callback handling
+//
+
+// Adds the on_complete closure for the pending batch completed in
+// batch_data to closures.
+static void add_closure_for_completed_pending_batch(
+    grpc_call_element* elem, subchannel_batch_data* batch_data,
+    subchannel_call_retry_state* retry_state, grpc_error* error,
+    grpc_core::CallCombinerClosureList* closures) {
+  pending_batch* pending = pending_batch_find(
+      elem, "completed", [batch_data](grpc_transport_stream_op_batch* batch) {
+        // Match the pending batch with the same set of send ops as the
+        // subchannel batch we've just completed.
+        return batch->on_complete != nullptr &&
+               batch_data->batch.send_initial_metadata ==
+                   batch->send_initial_metadata &&
+               batch_data->batch.send_message == batch->send_message &&
+               batch_data->batch.send_trailing_metadata ==
+                   batch->send_trailing_metadata;
+      });
+  // If batch_data is a replay batch, then there will be no pending
+  // batch to complete.
+  if (pending == nullptr) {
+    GRPC_ERROR_UNREF(error);
+    return;
+  }
+  // Add closure.
+  closures->Add(pending->batch->on_complete, error,
+                "on_complete for pending batch");
+  pending->batch->on_complete = nullptr;
+  maybe_clear_pending_batch(elem, pending);
+}
+
 // If there are any cached ops to replay or pending ops to start on the
 // subchannel call, adds a closure to closures to invoke
-// start_retriable_subchannel_batches(), updating *num_closures as needed.
+// start_retriable_subchannel_batches().
 static void add_closures_for_replay_or_pending_send_ops(
     grpc_call_element* elem, subchannel_batch_data* batch_data,
-    subchannel_call_retry_state* retry_state, closure_to_execute* closures,
-    size_t* num_closures) {
+    subchannel_call_retry_state* retry_state,
+    grpc_core::CallCombinerClosureList* closures) {
   channel_data* chand = static_cast<channel_data*>(elem->channel_data);
   call_data* calld = static_cast<call_data*>(elem->call_data);
   bool have_pending_send_message_ops =
@@ -1915,93 +2077,12 @@ static void add_closures_for_replay_or_pending_send_ops(
               "chand=%p calld=%p: starting next batch for pending send op(s)",
               chand, calld);
     }
-    closure_to_execute* closure = &closures[(*num_closures)++];
-    closure->closure = GRPC_CLOSURE_INIT(
-        &batch_data->batch.handler_private.closure,
-        start_retriable_subchannel_batches, elem, grpc_schedule_on_exec_ctx);
-    closure->error = GRPC_ERROR_NONE;
-    closure->reason = "starting next batch for send_* op(s)";
-  }
-}
-
-// For any pending batch completed in batch_data, adds the necessary
-// completion closures to closures, updating *num_closures as needed.
-static void add_closures_for_completed_pending_batches(
-    grpc_call_element* elem, subchannel_batch_data* batch_data,
-    subchannel_call_retry_state* retry_state, grpc_error* error,
-    closure_to_execute* closures, size_t* num_closures) {
-  channel_data* chand = static_cast<channel_data*>(elem->channel_data);
-  call_data* calld = static_cast<call_data*>(elem->call_data);
-  for (size_t i = 0; i < GPR_ARRAY_SIZE(calld->pending_batches); ++i) {
-    pending_batch* pending = &calld->pending_batches[i];
-    if (pending_batch_is_completed(pending, calld, retry_state)) {
-      if (grpc_client_channel_trace.enabled()) {
-        gpr_log(GPR_INFO,
-                "chand=%p calld=%p: pending batch completed at index %" PRIuPTR,
-                chand, calld, i);
-      }
-      // Copy the trailing metadata to return it to the surface.
-      if (batch_data->batch.recv_trailing_metadata) {
-        grpc_metadata_batch_move(&batch_data->recv_trailing_metadata,
-                                 pending->batch->payload->recv_trailing_metadata
-                                     .recv_trailing_metadata);
-      }
-      closure_to_execute* closure = &closures[(*num_closures)++];
-      closure->closure = pending->batch->on_complete;
-      closure->error = GRPC_ERROR_REF(error);
-      closure->reason = "on_complete for pending batch";
-      pending->batch->on_complete = nullptr;
-      maybe_clear_pending_batch(elem, pending);
-    }
-  }
-  GRPC_ERROR_UNREF(error);
-}
-
-// For any pending batch containing an op that has not yet been started,
-// adds the pending batch's completion closures to closures, updating
-// *num_closures as needed.
-static void add_closures_to_fail_unstarted_pending_batches(
-    grpc_call_element* elem, subchannel_call_retry_state* retry_state,
-    grpc_error* error, closure_to_execute* closures, size_t* num_closures) {
-  channel_data* chand = static_cast<channel_data*>(elem->channel_data);
-  call_data* calld = static_cast<call_data*>(elem->call_data);
-  for (size_t i = 0; i < GPR_ARRAY_SIZE(calld->pending_batches); ++i) {
-    pending_batch* pending = &calld->pending_batches[i];
-    if (pending_batch_is_unstarted(pending, calld, retry_state)) {
-      if (grpc_client_channel_trace.enabled()) {
-        gpr_log(GPR_INFO,
-                "chand=%p calld=%p: failing unstarted pending batch at index "
-                "%" PRIuPTR,
-                chand, calld, i);
-      }
-      if (pending->batch->recv_initial_metadata) {
-        closure_to_execute* closure = &closures[(*num_closures)++];
-        closure->closure = pending->batch->payload->recv_initial_metadata
-                               .recv_initial_metadata_ready;
-        closure->error = GRPC_ERROR_REF(error);
-        closure->reason =
-            "failing recv_initial_metadata_ready for pending batch";
-        pending->batch->payload->recv_initial_metadata
-            .recv_initial_metadata_ready = nullptr;
-      }
-      if (pending->batch->recv_message) {
-        *pending->batch->payload->recv_message.recv_message = nullptr;
-        closure_to_execute* closure = &closures[(*num_closures)++];
-        closure->closure =
-            pending->batch->payload->recv_message.recv_message_ready;
-        closure->error = GRPC_ERROR_REF(error);
-        closure->reason = "failing recv_message_ready for pending batch";
-        pending->batch->payload->recv_message.recv_message_ready = nullptr;
-      }
-      closure_to_execute* closure = &closures[(*num_closures)++];
-      closure->closure = pending->batch->on_complete;
-      closure->error = GRPC_ERROR_REF(error);
-      closure->reason = "failing on_complete for pending batch";
-      pending->batch->on_complete = nullptr;
-      maybe_clear_pending_batch(elem, pending);
-    }
+    GRPC_CLOSURE_INIT(&batch_data->batch.handler_private.closure,
+                      start_retriable_subchannel_batches, elem,
+                      grpc_schedule_on_exec_ctx);
+    closures->Add(&batch_data->batch.handler_private.closure, GRPC_ERROR_NONE,
+                  "starting next batch for send_* op(s)");
   }
-  GRPC_ERROR_UNREF(error);
 }
 
 // Callback used to intercept on_complete from subchannel calls.
@@ -2021,108 +2102,51 @@ static void on_complete(void* arg, grpc_error* error) {
       static_cast<subchannel_call_retry_state*>(
           grpc_connected_subchannel_call_get_parent_data(
               batch_data->subchannel_call));
-  // If we have previously completed recv_trailing_metadata, then the
-  // call is finished.
-  bool call_finished = retry_state->completed_recv_trailing_metadata;
-  // Record whether we were already committed before receiving this callback.
-  const bool previously_committed = calld->retry_committed;
   // Update bookkeeping in retry_state.
-  update_retry_state_for_completed_batch(batch_data, retry_state);
-  if (call_finished) {
-    if (grpc_client_channel_trace.enabled()) {
-      gpr_log(GPR_INFO, "chand=%p calld=%p: call already finished", chand,
-              calld);
-    }
-  } else {
-    // Check if this batch finished the call, and if so, get its status.
-    // The call is finished if either (a) this callback was invoked with
-    // an error or (b) we receive status.
-    grpc_status_code status = GRPC_STATUS_OK;
-    grpc_mdelem* server_pushback_md = nullptr;
-    if (error != GRPC_ERROR_NONE) {  // Case (a).
-      call_finished = true;
-      grpc_error_get_status(error, calld->deadline, &status, nullptr, nullptr,
-                            nullptr);
-    } else if (batch_data->batch.recv_trailing_metadata) {  // Case (b).
-      call_finished = true;
-      grpc_metadata_batch* md_batch =
-          batch_data->batch.payload->recv_trailing_metadata
-              .recv_trailing_metadata;
-      GPR_ASSERT(md_batch->idx.named.grpc_status != nullptr);
-      status = grpc_get_status_code_from_metadata(
-          md_batch->idx.named.grpc_status->md);
-      if (md_batch->idx.named.grpc_retry_pushback_ms != nullptr) {
-        server_pushback_md = &md_batch->idx.named.grpc_retry_pushback_ms->md;
-      }
-    }
-    // If the call just finished, check if we should retry.
-    if (call_finished) {
-      if (grpc_client_channel_trace.enabled()) {
-        gpr_log(GPR_INFO, "chand=%p calld=%p: call finished, status=%s", chand,
-                calld, grpc_status_code_to_string(status));
-      }
-      if (maybe_retry(elem, batch_data, status, server_pushback_md)) {
-        // Unref batch_data for deferred recv_initial_metadata_ready or
-        // recv_message_ready callbacks, if any.
-        if (batch_data->batch.recv_trailing_metadata &&
-            retry_state->recv_initial_metadata_ready_deferred_batch !=
-                nullptr) {
-          batch_data_unref(batch_data);
-          GRPC_ERROR_UNREF(retry_state->recv_initial_metadata_error);
-        }
-        if (batch_data->batch.recv_trailing_metadata &&
-            retry_state->recv_message_ready_deferred_batch != nullptr) {
-          batch_data_unref(batch_data);
-          GRPC_ERROR_UNREF(retry_state->recv_message_error);
-        }
-        batch_data_unref(batch_data);
-        return;
-      }
-      // Not retrying, so commit the call.
-      retry_commit(elem, retry_state);
-    }
+  if (batch_data->batch.send_initial_metadata) {
+    retry_state->completed_send_initial_metadata = true;
+  }
+  if (batch_data->batch.send_message) {
+    ++retry_state->completed_send_message_count;
   }
-  // If we were already committed before receiving this callback, free
-  // cached data for send ops that we've just completed.  (If the call has
-  // just now finished, the call to retry_commit() above will have freed all
-  // cached send ops, so we don't need to do it here.)
-  if (previously_committed) {
+  if (batch_data->batch.send_trailing_metadata) {
+    retry_state->completed_send_trailing_metadata = true;
+  }
+  // If the call is committed, free cached data for send ops that we've just
+  // completed.
+  if (calld->retry_committed) {
     free_cached_send_op_data_for_completed_batch(elem, batch_data, retry_state);
   }
-  // Call not being retried.
   // Construct list of closures to execute.
-  // Max number of closures is number of pending batches plus one for
-  // each of:
-  // - recv_initial_metadata_ready (either deferred or unstarted)
-  // - recv_message_ready (either deferred or unstarted)
-  // - starting a new batch for pending send ops
-  closure_to_execute closures[GPR_ARRAY_SIZE(calld->pending_batches) + 3];
-  size_t num_closures = 0;
-  // If there are deferred recv_initial_metadata_ready or recv_message_ready
-  // callbacks, add them to closures.
-  add_closures_for_deferred_recv_callbacks(batch_data, retry_state, closures,
-                                           &num_closures);
-  // Find pending batches whose ops are now complete and add their
-  // on_complete callbacks to closures.
-  add_closures_for_completed_pending_batches(elem, batch_data, retry_state,
-                                             GRPC_ERROR_REF(error), closures,
-                                             &num_closures);
-  // Add closures to handle any pending batches that have not yet been started.
-  // If the call is finished, we fail these batches; otherwise, we add a
-  // callback to start_retriable_subchannel_batches() to start them on
-  // the subchannel call.
-  if (call_finished) {
-    add_closures_to_fail_unstarted_pending_batches(
-        elem, retry_state, GRPC_ERROR_REF(error), closures, &num_closures);
-  } else {
-    add_closures_for_replay_or_pending_send_ops(elem, batch_data, retry_state,
-                                                closures, &num_closures);
-  }
+  grpc_core::CallCombinerClosureList closures;
+  // If a retry was already dispatched, that means we saw
+  // recv_trailing_metadata before this, so we do nothing here.
+  // Otherwise, invoke the callback to return the result to the surface.
+  if (!retry_state->retry_dispatched) {
+    // Add closure for the completed pending batch, if any.
+    add_closure_for_completed_pending_batch(elem, batch_data, retry_state,
+                                            GRPC_ERROR_REF(error), &closures);
+    // If needed, add a callback to start any replay or pending send ops on
+    // the subchannel call.
+    if (!retry_state->completed_recv_trailing_metadata) {
+      add_closures_for_replay_or_pending_send_ops(elem, batch_data, retry_state,
+                                                  &closures);
+    }
+  }
+  // Track number of pending subchannel send batches and determine if this
+  // was the last one.
+  --calld->num_pending_retriable_subchannel_send_batches;
+  const bool last_send_batch_complete =
+      calld->num_pending_retriable_subchannel_send_batches == 0;
   // Don't need batch_data anymore.
   batch_data_unref(batch_data);
   // Schedule all of the closures identified above.
-  execute_closures_in_call_combiner(elem, "on_complete", closures,
-                                    num_closures);
+  // Note: This yeilds the call combiner.
+  closures.RunClosures(calld->call_combiner);
+  // If this was the last subchannel send batch, unref the call stack.
+  if (last_send_batch_complete) {
+    GRPC_CALL_STACK_UNREF(calld->owning_call, "subchannel_send_batches");
+  }
 }
 
 //
@@ -2141,27 +2165,22 @@ static void start_batch_in_call_combiner(void* arg, grpc_error* ignored) {
 
 // Adds a closure to closures that will execute batch in the call combiner.
 static void add_closure_for_subchannel_batch(
-    call_data* calld, grpc_transport_stream_op_batch* batch,
-    closure_to_execute* closures, size_t* num_closures) {
+    grpc_call_element* elem, grpc_transport_stream_op_batch* batch,
+    grpc_core::CallCombinerClosureList* closures) {
+  channel_data* chand = static_cast<channel_data*>(elem->channel_data);
+  call_data* calld = static_cast<call_data*>(elem->call_data);
   batch->handler_private.extra_arg = calld->subchannel_call;
   GRPC_CLOSURE_INIT(&batch->handler_private.closure,
                     start_batch_in_call_combiner, batch,
                     grpc_schedule_on_exec_ctx);
-  closure_to_execute* closure = &closures[(*num_closures)++];
-  closure->closure = &batch->handler_private.closure;
-  closure->error = GRPC_ERROR_NONE;
-  // If the tracer is enabled, we log a more detailed message, which
-  // requires dynamic allocation.  This will be freed in
-  // start_retriable_subchannel_batches().
   if (grpc_client_channel_trace.enabled()) {
     char* batch_str = grpc_transport_stream_op_batch_string(batch);
-    gpr_asprintf(const_cast<char**>(&closure->reason),
-                 "starting batch in call combiner: %s", batch_str);
+    gpr_log(GPR_INFO, "chand=%p calld=%p: starting subchannel batch: %s", chand,
+            calld, batch_str);
     gpr_free(batch_str);
-    closure->free_reason = true;
-  } else {
-    closure->reason = "start_subchannel_batch";
   }
+  closures->Add(&batch->handler_private.closure, GRPC_ERROR_NONE,
+                "start_subchannel_batch");
 }
 
 // Adds retriable send_initial_metadata op to batch_data.
@@ -2177,28 +2196,28 @@ static void add_retriable_send_initial_metadata_op(
   //
   // If we've already completed one or more attempts, add the
   // grpc-retry-attempts header.
-  batch_data->send_initial_metadata_storage =
+  retry_state->send_initial_metadata_storage =
       static_cast<grpc_linked_mdelem*>(gpr_arena_alloc(
           calld->arena, sizeof(grpc_linked_mdelem) *
                             (calld->send_initial_metadata.list.count +
                              (calld->num_attempts_completed > 0))));
   grpc_metadata_batch_copy(&calld->send_initial_metadata,
-                           &batch_data->send_initial_metadata,
-                           batch_data->send_initial_metadata_storage);
-  if (batch_data->send_initial_metadata.idx.named.grpc_previous_rpc_attempts !=
-      nullptr) {
-    grpc_metadata_batch_remove(
-        &batch_data->send_initial_metadata,
-        batch_data->send_initial_metadata.idx.named.grpc_previous_rpc_attempts);
-  }
-  if (calld->num_attempts_completed > 0) {
+                           &retry_state->send_initial_metadata,
+                           retry_state->send_initial_metadata_storage);
+  if (GPR_UNLIKELY(retry_state->send_initial_metadata.idx.named
+                       .grpc_previous_rpc_attempts != nullptr)) {
+    grpc_metadata_batch_remove(&retry_state->send_initial_metadata,
+                               retry_state->send_initial_metadata.idx.named
+                                   .grpc_previous_rpc_attempts);
+  }
+  if (GPR_UNLIKELY(calld->num_attempts_completed > 0)) {
     grpc_mdelem retry_md = grpc_mdelem_from_slices(
         GRPC_MDSTR_GRPC_PREVIOUS_RPC_ATTEMPTS,
         *retry_count_strings[calld->num_attempts_completed - 1]);
     grpc_error* error = grpc_metadata_batch_add_tail(
-        &batch_data->send_initial_metadata,
-        &batch_data->send_initial_metadata_storage[calld->send_initial_metadata
-                                                       .list.count],
+        &retry_state->send_initial_metadata,
+        &retry_state->send_initial_metadata_storage[calld->send_initial_metadata
+                                                        .list.count],
         retry_md);
     if (GPR_UNLIKELY(error != GRPC_ERROR_NONE)) {
       gpr_log(GPR_ERROR, "error adding retry metadata: %s",
@@ -2209,7 +2228,7 @@ static void add_retriable_send_initial_metadata_op(
   retry_state->started_send_initial_metadata = true;
   batch_data->batch.send_initial_metadata = true;
   batch_data->batch.payload->send_initial_metadata.send_initial_metadata =
-      &batch_data->send_initial_metadata;
+      &retry_state->send_initial_metadata;
   batch_data->batch.payload->send_initial_metadata.send_initial_metadata_flags =
       calld->send_initial_metadata_flags;
   batch_data->batch.payload->send_initial_metadata.peer_string =
@@ -2230,10 +2249,10 @@ static void add_retriable_send_message_op(
   grpc_core::ByteStreamCache* cache =
       (*calld->send_messages)[retry_state->started_send_message_count];
   ++retry_state->started_send_message_count;
-  batch_data->send_message.Init(cache);
+  retry_state->send_message.Init(cache);
   batch_data->batch.send_message = true;
   batch_data->batch.payload->send_message.send_message.reset(
-      batch_data->send_message.get());
+      retry_state->send_message.get());
 }
 
 // Adds retriable send_trailing_metadata op to batch_data.
@@ -2243,17 +2262,17 @@ static void add_retriable_send_trailing_metadata_op(
   // We need to make a copy of the metadata batch for each attempt, since
   // the filters in the subchannel stack may modify this batch, and we don't
   // want those modifications to be passed forward to subsequent attempts.
-  batch_data->send_trailing_metadata_storage =
+  retry_state->send_trailing_metadata_storage =
       static_cast<grpc_linked_mdelem*>(gpr_arena_alloc(
           calld->arena, sizeof(grpc_linked_mdelem) *
                             calld->send_trailing_metadata.list.count));
   grpc_metadata_batch_copy(&calld->send_trailing_metadata,
-                           &batch_data->send_trailing_metadata,
-                           batch_data->send_trailing_metadata_storage);
+                           &retry_state->send_trailing_metadata,
+                           retry_state->send_trailing_metadata_storage);
   retry_state->started_send_trailing_metadata = true;
   batch_data->batch.send_trailing_metadata = true;
   batch_data->batch.payload->send_trailing_metadata.send_trailing_metadata =
-      &batch_data->send_trailing_metadata;
+      &retry_state->send_trailing_metadata;
 }
 
 // Adds retriable recv_initial_metadata op to batch_data.
@@ -2262,16 +2281,16 @@ static void add_retriable_recv_initial_metadata_op(
     subchannel_batch_data* batch_data) {
   retry_state->started_recv_initial_metadata = true;
   batch_data->batch.recv_initial_metadata = true;
-  grpc_metadata_batch_init(&batch_data->recv_initial_metadata);
+  grpc_metadata_batch_init(&retry_state->recv_initial_metadata);
   batch_data->batch.payload->recv_initial_metadata.recv_initial_metadata =
-      &batch_data->recv_initial_metadata;
+      &retry_state->recv_initial_metadata;
   batch_data->batch.payload->recv_initial_metadata.trailing_metadata_available =
-      &batch_data->trailing_metadata_available;
-  GRPC_CLOSURE_INIT(&batch_data->recv_initial_metadata_ready,
+      &retry_state->trailing_metadata_available;
+  GRPC_CLOSURE_INIT(&retry_state->recv_initial_metadata_ready,
                     recv_initial_metadata_ready, batch_data,
                     grpc_schedule_on_exec_ctx);
   batch_data->batch.payload->recv_initial_metadata.recv_initial_metadata_ready =
-      &batch_data->recv_initial_metadata_ready;
+      &retry_state->recv_initial_metadata_ready;
 }
 
 // Adds retriable recv_message op to batch_data.
@@ -2281,11 +2300,11 @@ static void add_retriable_recv_message_op(
   ++retry_state->started_recv_message_count;
   batch_data->batch.recv_message = true;
   batch_data->batch.payload->recv_message.recv_message =
-      &batch_data->recv_message;
-  GRPC_CLOSURE_INIT(&batch_data->recv_message_ready, recv_message_ready,
+      &retry_state->recv_message;
+  GRPC_CLOSURE_INIT(&retry_state->recv_message_ready, recv_message_ready,
                     batch_data, grpc_schedule_on_exec_ctx);
   batch_data->batch.payload->recv_message.recv_message_ready =
-      &batch_data->recv_message_ready;
+      &retry_state->recv_message_ready;
 }
 
 // Adds retriable recv_trailing_metadata op to batch_data.
@@ -2294,12 +2313,17 @@ static void add_retriable_recv_trailing_metadata_op(
     subchannel_batch_data* batch_data) {
   retry_state->started_recv_trailing_metadata = true;
   batch_data->batch.recv_trailing_metadata = true;
-  grpc_metadata_batch_init(&batch_data->recv_trailing_metadata);
+  grpc_metadata_batch_init(&retry_state->recv_trailing_metadata);
   batch_data->batch.payload->recv_trailing_metadata.recv_trailing_metadata =
-      &batch_data->recv_trailing_metadata;
-  batch_data->batch.collect_stats = true;
-  batch_data->batch.payload->collect_stats.collect_stats =
-      &batch_data->collect_stats;
+      &retry_state->recv_trailing_metadata;
+  batch_data->batch.payload->recv_trailing_metadata.collect_stats =
+      &retry_state->collect_stats;
+  GRPC_CLOSURE_INIT(&retry_state->recv_trailing_metadata_ready,
+                    recv_trailing_metadata_ready, batch_data,
+                    grpc_schedule_on_exec_ctx);
+  batch_data->batch.payload->recv_trailing_metadata
+      .recv_trailing_metadata_ready =
+      &retry_state->recv_trailing_metadata_ready;
 }
 
 // Helper function used to start a recv_trailing_metadata batch.  This
@@ -2320,9 +2344,11 @@ static void start_internal_recv_trailing_metadata(grpc_call_element* elem) {
           grpc_connected_subchannel_call_get_parent_data(
               calld->subchannel_call));
   // Create batch_data with 2 refs, since this batch will be unreffed twice:
-  // once when the subchannel batch returns, and again when we actually get
-  // a recv_trailing_metadata op from the surface.
-  subchannel_batch_data* batch_data = batch_data_create(elem, 2);
+  // once for the recv_trailing_metadata_ready callback when the subchannel
+  // batch returns, and again when we actually get a recv_trailing_metadata
+  // op from the surface.
+  subchannel_batch_data* batch_data =
+      batch_data_create(elem, 2, false /* set_on_complete */);
   add_retriable_recv_trailing_metadata_op(calld, retry_state, batch_data);
   retry_state->recv_trailing_metadata_internal_batch = batch_data;
   // Note: This will release the call combiner.
@@ -2347,7 +2373,7 @@ static subchannel_batch_data* maybe_create_subchannel_batch_for_replay(
               "send_initial_metadata op",
               chand, calld);
     }
-    replay_batch_data = batch_data_create(elem, 1);
+    replay_batch_data = batch_data_create(elem, 1, true /* set_on_complete */);
     add_retriable_send_initial_metadata_op(calld, retry_state,
                                            replay_batch_data);
   }
@@ -2364,7 +2390,8 @@ static subchannel_batch_data* maybe_create_subchannel_batch_for_replay(
               chand, calld);
     }
     if (replay_batch_data == nullptr) {
-      replay_batch_data = batch_data_create(elem, 1);
+      replay_batch_data =
+          batch_data_create(elem, 1, true /* set_on_complete */);
     }
     add_retriable_send_message_op(elem, retry_state, replay_batch_data);
   }
@@ -2383,7 +2410,8 @@ static subchannel_batch_data* maybe_create_subchannel_batch_for_replay(
               chand, calld);
     }
     if (replay_batch_data == nullptr) {
-      replay_batch_data = batch_data_create(elem, 1);
+      replay_batch_data =
+          batch_data_create(elem, 1, true /* set_on_complete */);
     }
     add_retriable_send_trailing_metadata_op(calld, retry_state,
                                             replay_batch_data);
@@ -2395,7 +2423,7 @@ static subchannel_batch_data* maybe_create_subchannel_batch_for_replay(
 // *num_batches as needed.
 static void add_subchannel_batches_for_pending_batches(
     grpc_call_element* elem, subchannel_call_retry_state* retry_state,
-    closure_to_execute* closures, size_t* num_closures) {
+    grpc_core::CallCombinerClosureList* closures) {
   call_data* calld = static_cast<call_data*>(elem->call_data);
   for (size_t i = 0; i < GPR_ARRAY_SIZE(calld->pending_batches); ++i) {
     pending_batch* pending = &calld->pending_batches[i];
@@ -2441,22 +2469,19 @@ static void add_subchannel_batches_for_pending_batches(
       // If we previously completed a recv_trailing_metadata op
       // initiated by start_internal_recv_trailing_metadata(), use the
       // result of that instead of trying to re-start this op.
-      if (retry_state->recv_trailing_metadata_internal_batch != nullptr) {
+      if (GPR_UNLIKELY((retry_state->recv_trailing_metadata_internal_batch !=
+                        nullptr))) {
         // If the batch completed, then trigger the completion callback
         // directly, so that we return the previously returned results to
         // the application.  Otherwise, just unref the internally
         // started subchannel batch, since we'll propagate the
         // completion when it completes.
         if (retry_state->completed_recv_trailing_metadata) {
-          subchannel_batch_data* batch_data =
-              retry_state->recv_trailing_metadata_internal_batch;
-          closure_to_execute* closure = &closures[(*num_closures)++];
-          closure->closure = &batch_data->on_complete;
           // Batches containing recv_trailing_metadata always succeed.
-          closure->error = GRPC_ERROR_NONE;
-          closure->reason =
-              "re-executing on_complete for recv_trailing_metadata "
-              "to propagate internally triggered result";
+          closures->Add(
+              &retry_state->recv_trailing_metadata_ready, GRPC_ERROR_NONE,
+              "re-executing recv_trailing_metadata_ready to propagate "
+              "internally triggered result");
         } else {
           batch_data_unref(retry_state->recv_trailing_metadata_internal_batch);
         }
@@ -2468,14 +2493,19 @@ static void add_subchannel_batches_for_pending_batches(
     if (calld->method_params == nullptr ||
         calld->method_params->retry_policy() == nullptr ||
         calld->retry_committed) {
-      add_closure_for_subchannel_batch(calld, batch, closures, num_closures);
+      add_closure_for_subchannel_batch(elem, batch, closures);
       pending_batch_clear(calld, pending);
       continue;
     }
     // Create batch with the right number of callbacks.
-    const int num_callbacks =
-        1 + batch->recv_initial_metadata + batch->recv_message;
-    subchannel_batch_data* batch_data = batch_data_create(elem, num_callbacks);
+    const bool has_send_ops = batch->send_initial_metadata ||
+                              batch->send_message ||
+                              batch->send_trailing_metadata;
+    const int num_callbacks = has_send_ops + batch->recv_initial_metadata +
+                              batch->recv_message +
+                              batch->recv_trailing_metadata;
+    subchannel_batch_data* batch_data = batch_data_create(
+        elem, num_callbacks, has_send_ops /* set_on_complete */);
     // Cache send ops if needed.
     maybe_cache_send_ops_for_batch(calld, pending);
     // send_initial_metadata.
@@ -2502,11 +2532,18 @@ static void add_subchannel_batches_for_pending_batches(
     }
     // recv_trailing_metadata.
     if (batch->recv_trailing_metadata) {
-      GPR_ASSERT(batch->collect_stats);
       add_retriable_recv_trailing_metadata_op(calld, retry_state, batch_data);
     }
-    add_closure_for_subchannel_batch(calld, &batch_data->batch, closures,
-                                     num_closures);
+    add_closure_for_subchannel_batch(elem, &batch_data->batch, closures);
+    // Track number of pending subchannel send batches.
+    // If this is the first one, take a ref to the call stack.
+    if (batch->send_initial_metadata || batch->send_message ||
+        batch->send_trailing_metadata) {
+      if (calld->num_pending_retriable_subchannel_send_batches == 0) {
+        GRPC_CALL_STACK_REF(calld->owning_call, "subchannel_send_batches");
+      }
+      ++calld->num_pending_retriable_subchannel_send_batches;
+    }
   }
 }
 
@@ -2525,28 +2562,94 @@ static void start_retriable_subchannel_batches(void* arg, grpc_error* ignored) {
           grpc_connected_subchannel_call_get_parent_data(
               calld->subchannel_call));
   // Construct list of closures to execute, one for each pending batch.
-  // We can start up to 6 batches.
-  closure_to_execute closures[GPR_ARRAY_SIZE(calld->pending_batches)];
-  size_t num_closures = 0;
+  grpc_core::CallCombinerClosureList closures;
   // Replay previously-returned send_* ops if needed.
   subchannel_batch_data* replay_batch_data =
       maybe_create_subchannel_batch_for_replay(elem, retry_state);
   if (replay_batch_data != nullptr) {
-    add_closure_for_subchannel_batch(calld, &replay_batch_data->batch, closures,
-                                     &num_closures);
+    add_closure_for_subchannel_batch(elem, &replay_batch_data->batch,
+                                     &closures);
+    // Track number of pending subchannel send batches.
+    // If this is the first one, take a ref to the call stack.
+    if (calld->num_pending_retriable_subchannel_send_batches == 0) {
+      GRPC_CALL_STACK_REF(calld->owning_call, "subchannel_send_batches");
+    }
+    ++calld->num_pending_retriable_subchannel_send_batches;
   }
   // Now add pending batches.
-  add_subchannel_batches_for_pending_batches(elem, retry_state, closures,
-                                             &num_closures);
+  add_subchannel_batches_for_pending_batches(elem, retry_state, &closures);
   // Start batches on subchannel call.
   if (grpc_client_channel_trace.enabled()) {
     gpr_log(GPR_INFO,
             "chand=%p calld=%p: starting %" PRIuPTR
             " retriable batches on subchannel_call=%p",
-            chand, calld, num_closures, calld->subchannel_call);
+            chand, calld, closures.size(), calld->subchannel_call);
   }
-  execute_closures_in_call_combiner(elem, "start_retriable_subchannel_batches",
-                                    closures, num_closures);
+  // Note: This will yield the call combiner.
+  closures.RunClosures(calld->call_combiner);
+}
+
+//
+// Channelz
+//
+
+static void recv_trailing_metadata_ready_channelz(void* arg,
+                                                  grpc_error* error) {
+  grpc_call_element* elem = static_cast<grpc_call_element*>(arg);
+  channel_data* chand = static_cast<channel_data*>(elem->channel_data);
+  call_data* calld = static_cast<call_data*>(elem->call_data);
+  if (grpc_client_channel_trace.enabled()) {
+    gpr_log(GPR_INFO,
+            "chand=%p calld=%p: got recv_trailing_metadata_ready_channelz, "
+            "error=%s",
+            chand, calld, grpc_error_string(error));
+  }
+  GPR_ASSERT(calld->recv_trailing_metadata != nullptr);
+  grpc_status_code status = GRPC_STATUS_OK;
+  grpc_metadata_batch* md_batch = calld->recv_trailing_metadata;
+  get_call_status(elem, md_batch, GRPC_ERROR_REF(error), &status, nullptr);
+  grpc_core::channelz::SubchannelNode* channelz_subchannel =
+      calld->pick.connected_subchannel->channelz_subchannel();
+  GPR_ASSERT(channelz_subchannel != nullptr);
+  if (status == GRPC_STATUS_OK) {
+    channelz_subchannel->RecordCallSucceeded();
+  } else {
+    channelz_subchannel->RecordCallFailed();
+  }
+  calld->recv_trailing_metadata = nullptr;
+  GRPC_CLOSURE_RUN(calld->original_recv_trailing_metadata, error);
+}
+
+// If channelz is enabled, intercept recv_trailing so that we may check the
+// status and associate it to a subchannel.
+// Returns true if callback was intercepted, false otherwise.
+static void maybe_intercept_recv_trailing_metadata_for_channelz(
+    grpc_call_element* elem, grpc_transport_stream_op_batch* batch) {
+  call_data* calld = static_cast<call_data*>(elem->call_data);
+  // only intercept payloads with recv trailing.
+  if (!batch->recv_trailing_metadata) {
+    return;
+  }
+  // only add interceptor is channelz is enabled.
+  if (calld->pick.connected_subchannel->channelz_subchannel() == nullptr) {
+    return;
+  }
+  if (grpc_client_channel_trace.enabled()) {
+    gpr_log(GPR_INFO,
+            "calld=%p batch=%p: intercepting recv trailing for channelz", calld,
+            batch);
+  }
+  GRPC_CLOSURE_INIT(&calld->recv_trailing_metadata_ready_channelz,
+                    recv_trailing_metadata_ready_channelz, elem,
+                    grpc_schedule_on_exec_ctx);
+  // save some state needed for the interception callback.
+  GPR_ASSERT(calld->recv_trailing_metadata == nullptr);
+  calld->recv_trailing_metadata =
+      batch->payload->recv_trailing_metadata.recv_trailing_metadata;
+  calld->original_recv_trailing_metadata =
+      batch->payload->recv_trailing_metadata.recv_trailing_metadata_ready;
+  batch->payload->recv_trailing_metadata.recv_trailing_metadata_ready =
+      &calld->recv_trailing_metadata_ready_channelz;
 }
 
 //
@@ -2574,10 +2677,15 @@ static void create_subchannel_call(grpc_call_element* elem, grpc_error* error) {
     gpr_log(GPR_INFO, "chand=%p calld=%p: create subchannel_call=%p: error=%s",
             chand, calld, calld->subchannel_call, grpc_error_string(new_error));
   }
-  if (new_error != GRPC_ERROR_NONE) {
+  if (GPR_UNLIKELY(new_error != GRPC_ERROR_NONE)) {
     new_error = grpc_error_add_child(new_error, error);
     pending_batches_fail(elem, new_error, true /* yield_call_combiner */);
   } else {
+    grpc_core::channelz::SubchannelNode* channelz_subchannel =
+        calld->pick.connected_subchannel->channelz_subchannel();
+    if (channelz_subchannel != nullptr) {
+      channelz_subchannel->RecordCallStarted();
+    }
     if (parent_data_size > 0) {
       subchannel_call_retry_state* retry_state =
           static_cast<subchannel_call_retry_state*>(
@@ -2595,7 +2703,7 @@ static void pick_done(void* arg, grpc_error* error) {
   grpc_call_element* elem = static_cast<grpc_call_element*>(arg);
   channel_data* chand = static_cast<channel_data*>(elem->channel_data);
   call_data* calld = static_cast<call_data*>(elem->call_data);
-  if (calld->pick.connected_subchannel == nullptr) {
+  if (GPR_UNLIKELY(calld->pick.connected_subchannel == nullptr)) {
     // Failed to create subchannel.
     // If there was no error, this is an LB policy drop, in which case
     // we return an error; otherwise, we may retry.
@@ -2624,59 +2732,134 @@ static void pick_done(void* arg, grpc_error* error) {
   }
 }
 
+static void maybe_add_call_to_channel_interested_parties_locked(
+    grpc_call_element* elem) {
+  channel_data* chand = static_cast<channel_data*>(elem->channel_data);
+  call_data* calld = static_cast<call_data*>(elem->call_data);
+  if (!calld->pollent_added_to_interested_parties) {
+    calld->pollent_added_to_interested_parties = true;
+    grpc_polling_entity_add_to_pollset_set(calld->pollent,
+                                           chand->interested_parties);
+  }
+}
+
+static void maybe_del_call_from_channel_interested_parties_locked(
+    grpc_call_element* elem) {
+  channel_data* chand = static_cast<channel_data*>(elem->channel_data);
+  call_data* calld = static_cast<call_data*>(elem->call_data);
+  if (calld->pollent_added_to_interested_parties) {
+    calld->pollent_added_to_interested_parties = false;
+    grpc_polling_entity_del_from_pollset_set(calld->pollent,
+                                             chand->interested_parties);
+  }
+}
+
 // Invoked when a pick is completed to leave the client_channel combiner
 // and continue processing in the call combiner.
+// If needed, removes the call's polling entity from chand->interested_parties.
 static void pick_done_locked(grpc_call_element* elem, grpc_error* error) {
   call_data* calld = static_cast<call_data*>(elem->call_data);
+  maybe_del_call_from_channel_interested_parties_locked(elem);
   GRPC_CLOSURE_INIT(&calld->pick_closure, pick_done, elem,
                     grpc_schedule_on_exec_ctx);
   GRPC_CLOSURE_SCHED(&calld->pick_closure, error);
 }
 
-// A wrapper around pick_done_locked() that is used in cases where
-// either (a) the pick was deferred pending a resolver result or (b) the
-// pick was done asynchronously.  Removes the call's polling entity from
-// chand->interested_parties before invoking pick_done_locked().
-static void async_pick_done_locked(grpc_call_element* elem, grpc_error* error) {
-  channel_data* chand = static_cast<channel_data*>(elem->channel_data);
-  call_data* calld = static_cast<call_data*>(elem->call_data);
-  grpc_polling_entity_del_from_pollset_set(calld->pollent,
-                                           chand->interested_parties);
-  pick_done_locked(elem, error);
-}
+namespace grpc_core {
 
-// Note: This runs under the client_channel combiner, but will NOT be
-// holding the call combiner.
-static void pick_callback_cancel_locked(void* arg, grpc_error* error) {
-  grpc_call_element* elem = static_cast<grpc_call_element*>(arg);
-  channel_data* chand = static_cast<channel_data*>(elem->channel_data);
-  call_data* calld = static_cast<call_data*>(elem->call_data);
-  // Note: chand->lb_policy may have changed since we started our pick,
-  // in which case we will be cancelling the pick on a policy other than
-  // the one we started it on.  However, this will just be a no-op.
-  if (error != GRPC_ERROR_NONE && chand->lb_policy != nullptr) {
+// Performs subchannel pick via LB policy.
+class LbPicker {
+ public:
+  // Starts a pick on chand->lb_policy.
+  static void StartLocked(grpc_call_element* elem) {
+    channel_data* chand = static_cast<channel_data*>(elem->channel_data);
+    call_data* calld = static_cast<call_data*>(elem->call_data);
     if (grpc_client_channel_trace.enabled()) {
-      gpr_log(GPR_INFO, "chand=%p calld=%p: cancelling pick from LB policy %p",
+      gpr_log(GPR_INFO, "chand=%p calld=%p: starting pick on lb_policy=%p",
               chand, calld, chand->lb_policy.get());
     }
-    chand->lb_policy->CancelPickLocked(&calld->pick, GRPC_ERROR_REF(error));
+    // If this is a retry, use the send_initial_metadata payload that
+    // we've cached; otherwise, use the pending batch.  The
+    // send_initial_metadata batch will be the first pending batch in the
+    // list, as set by get_batch_index() above.
+    calld->pick.initial_metadata =
+        calld->seen_send_initial_metadata
+            ? &calld->send_initial_metadata
+            : calld->pending_batches[0]
+                  .batch->payload->send_initial_metadata.send_initial_metadata;
+    calld->pick.initial_metadata_flags =
+        calld->seen_send_initial_metadata
+            ? calld->send_initial_metadata_flags
+            : calld->pending_batches[0]
+                  .batch->payload->send_initial_metadata
+                  .send_initial_metadata_flags;
+    GRPC_CLOSURE_INIT(&calld->pick_closure, &LbPicker::DoneLocked, elem,
+                      grpc_combiner_scheduler(chand->combiner));
+    calld->pick.on_complete = &calld->pick_closure;
+    GRPC_CALL_STACK_REF(calld->owning_call, "pick_callback");
+    grpc_error* error = GRPC_ERROR_NONE;
+    const bool pick_done = chand->lb_policy->PickLocked(&calld->pick, &error);
+    if (GPR_LIKELY(pick_done)) {
+      // Pick completed synchronously.
+      if (grpc_client_channel_trace.enabled()) {
+        gpr_log(GPR_INFO, "chand=%p calld=%p: pick completed synchronously",
+                chand, calld);
+      }
+      pick_done_locked(elem, error);
+      GRPC_CALL_STACK_UNREF(calld->owning_call, "pick_callback");
+    } else {
+      // Pick will be returned asynchronously.
+      // Add the polling entity from call_data to the channel_data's
+      // interested_parties, so that the I/O of the LB policy can be done
+      // under it.  It will be removed in pick_done_locked().
+      maybe_add_call_to_channel_interested_parties_locked(elem);
+      // Request notification on call cancellation.
+      GRPC_CALL_STACK_REF(calld->owning_call, "pick_callback_cancel");
+      grpc_call_combiner_set_notify_on_cancel(
+          calld->call_combiner,
+          GRPC_CLOSURE_INIT(&calld->pick_cancel_closure,
+                            &LbPicker::CancelLocked, elem,
+                            grpc_combiner_scheduler(chand->combiner)));
+    }
   }
-  GRPC_CALL_STACK_UNREF(calld->owning_call, "pick_callback_cancel");
-}
 
-// Callback invoked by LoadBalancingPolicy::PickLocked() for async picks.
-// Unrefs the LB policy and invokes async_pick_done_locked().
-static void pick_callback_done_locked(void* arg, grpc_error* error) {
-  grpc_call_element* elem = static_cast<grpc_call_element*>(arg);
-  channel_data* chand = static_cast<channel_data*>(elem->channel_data);
-  call_data* calld = static_cast<call_data*>(elem->call_data);
-  if (grpc_client_channel_trace.enabled()) {
-    gpr_log(GPR_INFO, "chand=%p calld=%p: pick completed asynchronously", chand,
-            calld);
+ private:
+  // Callback invoked by LoadBalancingPolicy::PickLocked() for async picks.
+  // Unrefs the LB policy and invokes pick_done_locked().
+  static void DoneLocked(void* arg, grpc_error* error) {
+    grpc_call_element* elem = static_cast<grpc_call_element*>(arg);
+    channel_data* chand = static_cast<channel_data*>(elem->channel_data);
+    call_data* calld = static_cast<call_data*>(elem->call_data);
+    if (grpc_client_channel_trace.enabled()) {
+      gpr_log(GPR_INFO, "chand=%p calld=%p: pick completed asynchronously",
+              chand, calld);
+    }
+    pick_done_locked(elem, GRPC_ERROR_REF(error));
+    GRPC_CALL_STACK_UNREF(calld->owning_call, "pick_callback");
   }
-  async_pick_done_locked(elem, GRPC_ERROR_REF(error));
-  GRPC_CALL_STACK_UNREF(calld->owning_call, "pick_callback");
-}
+
+  // Note: This runs under the client_channel combiner, but will NOT be
+  // holding the call combiner.
+  static void CancelLocked(void* arg, grpc_error* error) {
+    grpc_call_element* elem = static_cast<grpc_call_element*>(arg);
+    channel_data* chand = static_cast<channel_data*>(elem->channel_data);
+    call_data* calld = static_cast<call_data*>(elem->call_data);
+    // Note: chand->lb_policy may have changed since we started our pick,
+    // in which case we will be cancelling the pick on a policy other than
+    // the one we started it on.  However, this will just be a no-op.
+    if (GPR_UNLIKELY(error != GRPC_ERROR_NONE && chand->lb_policy != nullptr)) {
+      if (grpc_client_channel_trace.enabled()) {
+        gpr_log(GPR_INFO,
+                "chand=%p calld=%p: cancelling pick from LB policy %p", chand,
+                calld, chand->lb_policy.get());
+      }
+      chand->lb_policy->CancelPickLocked(&calld->pick, GRPC_ERROR_REF(error));
+    }
+    GRPC_CALL_STACK_UNREF(calld->owning_call, "pick_callback_cancel");
+  }
+};
+
+}  // namespace grpc_core
 
 // Applies service config to the call.  Must be invoked once we know
 // that the resolver has returned results to the channel.
@@ -2706,6 +2889,24 @@ static void apply_service_config_to_call_locked(grpc_call_element* elem) {
           grpc_deadline_state_reset(elem, calld->deadline);
         }
       }
+      // If the service config set wait_for_ready and the application
+      // did not explicitly set it, use the value from the service config.
+      uint32_t* send_initial_metadata_flags =
+          &calld->pending_batches[0]
+               .batch->payload->send_initial_metadata
+               .send_initial_metadata_flags;
+      if (GPR_UNLIKELY(
+              calld->method_params->wait_for_ready() !=
+                  ClientChannelMethodParams::WAIT_FOR_READY_UNSET &&
+              !(*send_initial_metadata_flags &
+                GRPC_INITIAL_METADATA_WAIT_FOR_READY_EXPLICITLY_SET))) {
+        if (calld->method_params->wait_for_ready() ==
+            ClientChannelMethodParams::WAIT_FOR_READY_TRUE) {
+          *send_initial_metadata_flags |= GRPC_INITIAL_METADATA_WAIT_FOR_READY;
+        } else {
+          *send_initial_metadata_flags &= ~GRPC_INITIAL_METADATA_WAIT_FOR_READY;
+        }
+      }
     }
   }
   // If no retry policy, disable retries.
@@ -2716,214 +2917,164 @@ static void apply_service_config_to_call_locked(grpc_call_element* elem) {
   }
 }
 
-// Starts a pick on chand->lb_policy.
-// Returns true if pick is completed synchronously.
-static bool pick_callback_start_locked(grpc_call_element* elem) {
-  channel_data* chand = static_cast<channel_data*>(elem->channel_data);
+// Invoked once resolver results are available.
+static void process_service_config_and_start_lb_pick_locked(
+    grpc_call_element* elem) {
   call_data* calld = static_cast<call_data*>(elem->call_data);
-  if (grpc_client_channel_trace.enabled()) {
-    gpr_log(GPR_INFO, "chand=%p calld=%p: starting pick on lb_policy=%p", chand,
-            calld, chand->lb_policy.get());
-  }
   // Only get service config data on the first attempt.
-  if (calld->num_attempts_completed == 0) {
+  if (GPR_LIKELY(calld->num_attempts_completed == 0)) {
     apply_service_config_to_call_locked(elem);
   }
-  // If the application explicitly set wait_for_ready, use that.
-  // Otherwise, if the service config specified a value for this
-  // method, use that.
-  //
-  // The send_initial_metadata batch will be the first one in the list,
-  // as set by get_batch_index() above.
-  calld->pick.initial_metadata =
-      calld->seen_send_initial_metadata
-          ? &calld->send_initial_metadata
-          : calld->pending_batches[0]
-                .batch->payload->send_initial_metadata.send_initial_metadata;
-  uint32_t send_initial_metadata_flags =
-      calld->seen_send_initial_metadata
-          ? calld->send_initial_metadata_flags
-          : calld->pending_batches[0]
-                .batch->payload->send_initial_metadata
-                .send_initial_metadata_flags;
-  const bool wait_for_ready_set_from_api =
-      send_initial_metadata_flags &
-      GRPC_INITIAL_METADATA_WAIT_FOR_READY_EXPLICITLY_SET;
-  const bool wait_for_ready_set_from_service_config =
-      calld->method_params != nullptr &&
-      calld->method_params->wait_for_ready() !=
-          ClientChannelMethodParams::WAIT_FOR_READY_UNSET;
-  if (!wait_for_ready_set_from_api && wait_for_ready_set_from_service_config) {
-    if (calld->method_params->wait_for_ready() ==
-        ClientChannelMethodParams::WAIT_FOR_READY_TRUE) {
-      send_initial_metadata_flags |= GRPC_INITIAL_METADATA_WAIT_FOR_READY;
-    } else {
-      send_initial_metadata_flags &= ~GRPC_INITIAL_METADATA_WAIT_FOR_READY;
-    }
-  }
-  calld->pick.initial_metadata_flags = send_initial_metadata_flags;
-  GRPC_CLOSURE_INIT(&calld->pick_closure, pick_callback_done_locked, elem,
-                    grpc_combiner_scheduler(chand->combiner));
-  calld->pick.on_complete = &calld->pick_closure;
-  GRPC_CALL_STACK_REF(calld->owning_call, "pick_callback");
-  const bool pick_done = chand->lb_policy->PickLocked(&calld->pick);
-  if (pick_done) {
-    // Pick completed synchronously.
-    if (grpc_client_channel_trace.enabled()) {
-      gpr_log(GPR_INFO, "chand=%p calld=%p: pick completed synchronously",
-              chand, calld);
-    }
-    GRPC_CALL_STACK_UNREF(calld->owning_call, "pick_callback");
-  } else {
-    GRPC_CALL_STACK_REF(calld->owning_call, "pick_callback_cancel");
-    grpc_call_combiner_set_notify_on_cancel(
-        calld->call_combiner,
-        GRPC_CLOSURE_INIT(&calld->pick_cancel_closure,
-                          pick_callback_cancel_locked, elem,
-                          grpc_combiner_scheduler(chand->combiner)));
-  }
-  return pick_done;
+  // Start LB pick.
+  grpc_core::LbPicker::StartLocked(elem);
 }
 
-typedef struct {
-  grpc_call_element* elem;
-  bool finished;
-  grpc_closure closure;
-  grpc_closure cancel_closure;
-} pick_after_resolver_result_args;
-
-// Note: This runs under the client_channel combiner, but will NOT be
-// holding the call combiner.
-static void pick_after_resolver_result_cancel_locked(void* arg,
-                                                     grpc_error* error) {
-  pick_after_resolver_result_args* args =
-      static_cast<pick_after_resolver_result_args*>(arg);
-  if (args->finished) {
-    gpr_free(args);
-    return;
-  }
-  // If we don't yet have a resolver result, then a closure for
-  // pick_after_resolver_result_done_locked() will have been added to
-  // chand->waiting_for_resolver_result_closures, and it may not be invoked
-  // until after this call has been destroyed.  We mark the operation as
-  // finished, so that when pick_after_resolver_result_done_locked()
-  // is called, it will be a no-op.  We also immediately invoke
-  // async_pick_done_locked() to propagate the error back to the caller.
-  args->finished = true;
-  grpc_call_element* elem = args->elem;
-  channel_data* chand = static_cast<channel_data*>(elem->channel_data);
-  call_data* calld = static_cast<call_data*>(elem->call_data);
-  if (grpc_client_channel_trace.enabled()) {
-    gpr_log(GPR_INFO,
-            "chand=%p calld=%p: cancelling pick waiting for resolver result",
-            chand, calld);
-  }
-  // Note: Although we are not in the call combiner here, we are
-  // basically stealing the call combiner from the pending pick, so
-  // it's safe to call async_pick_done_locked() here -- we are
-  // essentially calling it here instead of calling it in
-  // pick_after_resolver_result_done_locked().
-  async_pick_done_locked(elem, GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING(
-                                   "Pick cancelled", &error, 1));
-}
-
-static void pick_after_resolver_result_done_locked(void* arg,
-                                                   grpc_error* error) {
-  pick_after_resolver_result_args* args =
-      static_cast<pick_after_resolver_result_args*>(arg);
-  if (args->finished) {
-    /* cancelled, do nothing */
-    if (grpc_client_channel_trace.enabled()) {
-      gpr_log(GPR_INFO, "call cancelled before resolver result");
-    }
-    gpr_free(args);
-    return;
-  }
-  args->finished = true;
-  grpc_call_element* elem = args->elem;
-  channel_data* chand = static_cast<channel_data*>(elem->channel_data);
-  call_data* calld = static_cast<call_data*>(elem->call_data);
-  if (error != GRPC_ERROR_NONE) {
+namespace grpc_core {
+
+// Handles waiting for a resolver result.
+// Used only for the first call on an idle channel.
+class ResolverResultWaiter {
+ public:
+  explicit ResolverResultWaiter(grpc_call_element* elem) : elem_(elem) {
+    channel_data* chand = static_cast<channel_data*>(elem->channel_data);
+    call_data* calld = static_cast<call_data*>(elem->call_data);
     if (grpc_client_channel_trace.enabled()) {
-      gpr_log(GPR_INFO, "chand=%p calld=%p: resolver failed to return data",
+      gpr_log(GPR_INFO,
+              "chand=%p calld=%p: deferring pick pending resolver result",
               chand, calld);
     }
-    async_pick_done_locked(elem, GRPC_ERROR_REF(error));
-  } else if (chand->resolver == nullptr) {
-    // Shutting down.
-    if (grpc_client_channel_trace.enabled()) {
-      gpr_log(GPR_INFO, "chand=%p calld=%p: resolver disconnected", chand,
-              calld);
+    // Add closure to be run when a resolver result is available.
+    GRPC_CLOSURE_INIT(&done_closure_, &ResolverResultWaiter::DoneLocked, this,
+                      grpc_combiner_scheduler(chand->combiner));
+    AddToWaitingList();
+    // Set cancellation closure, so that we abort if the call is cancelled.
+    GRPC_CLOSURE_INIT(&cancel_closure_, &ResolverResultWaiter::CancelLocked,
+                      this, grpc_combiner_scheduler(chand->combiner));
+    grpc_call_combiner_set_notify_on_cancel(calld->call_combiner,
+                                            &cancel_closure_);
+  }
+
+ private:
+  // Adds closure_ to chand->waiting_for_resolver_result_closures.
+  void AddToWaitingList() {
+    channel_data* chand = static_cast<channel_data*>(elem_->channel_data);
+    grpc_closure_list_append(&chand->waiting_for_resolver_result_closures,
+                             &done_closure_, GRPC_ERROR_NONE);
+  }
+
+  // Invoked when a resolver result is available.
+  static void DoneLocked(void* arg, grpc_error* error) {
+    ResolverResultWaiter* self = static_cast<ResolverResultWaiter*>(arg);
+    // If CancelLocked() has already run, delete ourselves without doing
+    // anything.  Note that the call stack may have already been destroyed,
+    // so it's not safe to access anything in elem_.
+    if (GPR_UNLIKELY(self->finished_)) {
+      if (grpc_client_channel_trace.enabled()) {
+        gpr_log(GPR_INFO, "call cancelled before resolver result");
+      }
+      Delete(self);
+      return;
     }
-    async_pick_done_locked(
-        elem, GRPC_ERROR_CREATE_FROM_STATIC_STRING("Disconnected"));
-  } else if (chand->lb_policy == nullptr) {
-    // Transient resolver failure.
-    // If call has wait_for_ready=true, try again; otherwise, fail.
-    uint32_t send_initial_metadata_flags =
-        calld->seen_send_initial_metadata
-            ? calld->send_initial_metadata_flags
-            : calld->pending_batches[0]
-                  .batch->payload->send_initial_metadata
-                  .send_initial_metadata_flags;
-    if (send_initial_metadata_flags & GRPC_INITIAL_METADATA_WAIT_FOR_READY) {
+    // Otherwise, process the resolver result.
+    grpc_call_element* elem = self->elem_;
+    channel_data* chand = static_cast<channel_data*>(elem->channel_data);
+    call_data* calld = static_cast<call_data*>(elem->call_data);
+    if (GPR_UNLIKELY(error != GRPC_ERROR_NONE)) {
       if (grpc_client_channel_trace.enabled()) {
-        gpr_log(GPR_INFO,
-                "chand=%p calld=%p: resolver returned but no LB policy; "
-                "wait_for_ready=true; trying again",
+        gpr_log(GPR_INFO, "chand=%p calld=%p: resolver failed to return data",
                 chand, calld);
       }
-      pick_after_resolver_result_start_locked(elem);
+      pick_done_locked(elem, GRPC_ERROR_REF(error));
+    } else if (GPR_UNLIKELY(chand->resolver == nullptr)) {
+      // Shutting down.
+      if (grpc_client_channel_trace.enabled()) {
+        gpr_log(GPR_INFO, "chand=%p calld=%p: resolver disconnected", chand,
+                calld);
+      }
+      pick_done_locked(elem,
+                       GRPC_ERROR_CREATE_FROM_STATIC_STRING("Disconnected"));
+    } else if (GPR_UNLIKELY(chand->lb_policy == nullptr)) {
+      // Transient resolver failure.
+      // If call has wait_for_ready=true, try again; otherwise, fail.
+      uint32_t send_initial_metadata_flags =
+          calld->seen_send_initial_metadata
+              ? calld->send_initial_metadata_flags
+              : calld->pending_batches[0]
+                    .batch->payload->send_initial_metadata
+                    .send_initial_metadata_flags;
+      if (send_initial_metadata_flags & GRPC_INITIAL_METADATA_WAIT_FOR_READY) {
+        if (grpc_client_channel_trace.enabled()) {
+          gpr_log(GPR_INFO,
+                  "chand=%p calld=%p: resolver returned but no LB policy; "
+                  "wait_for_ready=true; trying again",
+                  chand, calld);
+        }
+        // Re-add ourselves to the waiting list.
+        self->AddToWaitingList();
+        // Return early so that we don't set finished_ to true below.
+        return;
+      } else {
+        if (grpc_client_channel_trace.enabled()) {
+          gpr_log(GPR_INFO,
+                  "chand=%p calld=%p: resolver returned but no LB policy; "
+                  "wait_for_ready=false; failing",
+                  chand, calld);
+        }
+        pick_done_locked(
+            elem,
+            grpc_error_set_int(
+                GRPC_ERROR_CREATE_FROM_STATIC_STRING("Name resolution failure"),
+                GRPC_ERROR_INT_GRPC_STATUS, GRPC_STATUS_UNAVAILABLE));
+      }
     } else {
       if (grpc_client_channel_trace.enabled()) {
-        gpr_log(GPR_INFO,
-                "chand=%p calld=%p: resolver returned but no LB policy; "
-                "wait_for_ready=false; failing",
+        gpr_log(GPR_INFO, "chand=%p calld=%p: resolver returned, doing LB pick",
                 chand, calld);
       }
-      async_pick_done_locked(
-          elem,
-          grpc_error_set_int(
-              GRPC_ERROR_CREATE_FROM_STATIC_STRING("Name resolution failure"),
-              GRPC_ERROR_INT_GRPC_STATUS, GRPC_STATUS_UNAVAILABLE));
+      process_service_config_and_start_lb_pick_locked(elem);
     }
-  } else {
-    if (grpc_client_channel_trace.enabled()) {
-      gpr_log(GPR_INFO, "chand=%p calld=%p: resolver returned, doing pick",
-              chand, calld);
+    self->finished_ = true;
+  }
+
+  // Invoked when the call is cancelled.
+  // Note: This runs under the client_channel combiner, but will NOT be
+  // holding the call combiner.
+  static void CancelLocked(void* arg, grpc_error* error) {
+    ResolverResultWaiter* self = static_cast<ResolverResultWaiter*>(arg);
+    // If DoneLocked() has already run, delete ourselves without doing anything.
+    if (GPR_LIKELY(self->finished_)) {
+      Delete(self);
+      return;
     }
-    if (pick_callback_start_locked(elem)) {
-      // Even if the LB policy returns a result synchronously, we have
-      // already added our polling entity to chand->interested_parties
-      // in order to wait for the resolver result, so we need to
-      // remove it here.  Therefore, we call async_pick_done_locked()
-      // instead of pick_done_locked().
-      async_pick_done_locked(elem, GRPC_ERROR_NONE);
+    // If we are being cancelled, immediately invoke pick_done_locked()
+    // to propagate the error back to the caller.
+    if (GPR_UNLIKELY(error != GRPC_ERROR_NONE)) {
+      grpc_call_element* elem = self->elem_;
+      channel_data* chand = static_cast<channel_data*>(elem->channel_data);
+      call_data* calld = static_cast<call_data*>(elem->call_data);
+      if (grpc_client_channel_trace.enabled()) {
+        gpr_log(GPR_INFO,
+                "chand=%p calld=%p: cancelling call waiting for name "
+                "resolution",
+                chand, calld);
+      }
+      // Note: Although we are not in the call combiner here, we are
+      // basically stealing the call combiner from the pending pick, so
+      // it's safe to call pick_done_locked() here -- we are essentially
+      // calling it here instead of calling it in DoneLocked().
+      pick_done_locked(elem, GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING(
+                                 "Pick cancelled", &error, 1));
     }
+    self->finished_ = true;
   }
-}
 
-static void pick_after_resolver_result_start_locked(grpc_call_element* elem) {
-  channel_data* chand = static_cast<channel_data*>(elem->channel_data);
-  call_data* calld = static_cast<call_data*>(elem->call_data);
-  if (grpc_client_channel_trace.enabled()) {
-    gpr_log(GPR_INFO,
-            "chand=%p calld=%p: deferring pick pending resolver result", chand,
-            calld);
-  }
-  pick_after_resolver_result_args* args =
-      static_cast<pick_after_resolver_result_args*>(gpr_zalloc(sizeof(*args)));
-  args->elem = elem;
-  GRPC_CLOSURE_INIT(&args->closure, pick_after_resolver_result_done_locked,
-                    args, grpc_combiner_scheduler(chand->combiner));
-  grpc_closure_list_append(&chand->waiting_for_resolver_result_closures,
-                           &args->closure, GRPC_ERROR_NONE);
-  grpc_call_combiner_set_notify_on_cancel(
-      calld->call_combiner,
-      GRPC_CLOSURE_INIT(&args->cancel_closure,
-                        pick_after_resolver_result_cancel_locked, args,
-                        grpc_combiner_scheduler(chand->combiner)));
-}
+  grpc_call_element* elem_;
+  grpc_closure done_closure_;
+  grpc_closure cancel_closure_;
+  bool finished_ = false;
+};
+
+}  // namespace grpc_core
 
 static void start_pick_locked(void* arg, grpc_error* ignored) {
   grpc_call_element* elem = static_cast<grpc_call_element*>(arg);
@@ -2931,32 +3082,25 @@ static void start_pick_locked(void* arg, grpc_error* ignored) {
   channel_data* chand = static_cast<channel_data*>(elem->channel_data);
   GPR_ASSERT(calld->pick.connected_subchannel == nullptr);
   GPR_ASSERT(calld->subchannel_call == nullptr);
-  if (chand->lb_policy != nullptr) {
-    // We already have an LB policy, so ask it for a pick.
-    if (pick_callback_start_locked(elem)) {
-      // Pick completed synchronously.
-      pick_done_locked(elem, GRPC_ERROR_NONE);
-      return;
-    }
+  if (GPR_LIKELY(chand->lb_policy != nullptr)) {
+    // We already have resolver results, so process the service config
+    // and start an LB pick.
+    process_service_config_and_start_lb_pick_locked(elem);
+  } else if (GPR_UNLIKELY(chand->resolver == nullptr)) {
+    pick_done_locked(elem,
+                     GRPC_ERROR_CREATE_FROM_STATIC_STRING("Disconnected"));
   } else {
     // We do not yet have an LB policy, so wait for a resolver result.
-    if (chand->resolver == nullptr) {
-      pick_done_locked(elem,
-                       GRPC_ERROR_CREATE_FROM_STATIC_STRING("Disconnected"));
-      return;
-    }
-    if (!chand->started_resolving) {
+    if (GPR_UNLIKELY(!chand->started_resolving)) {
       start_resolving_locked(chand);
     }
-    pick_after_resolver_result_start_locked(elem);
+    // Create a new waiter, which will delete itself when done.
+    grpc_core::New<grpc_core::ResolverResultWaiter>(elem);
+    // Add the polling entity from call_data to the channel_data's
+    // interested_parties, so that the I/O of the resolver can be done
+    // under it.  It will be removed in pick_done_locked().
+    maybe_add_call_to_channel_interested_parties_locked(elem);
   }
-  // We need to wait for either a resolver result or for an async result
-  // from the LB policy.  Add the polling entity from call_data to the
-  // channel_data's interested_parties, so that the I/O of the LB policy
-  // and resolver can be done under it.  The polling entity will be
-  // removed in async_pick_done_locked().
-  grpc_polling_entity_add_to_pollset_set(calld->pollent,
-                                         chand->interested_parties);
 }
 
 //
@@ -2968,11 +3112,11 @@ static void cc_start_transport_stream_op_batch(
   GPR_TIMER_SCOPE("cc_start_transport_stream_op_batch", 0);
   call_data* calld = static_cast<call_data*>(elem->call_data);
   channel_data* chand = static_cast<channel_data*>(elem->channel_data);
-  if (chand->deadline_checking_enabled) {
+  if (GPR_LIKELY(chand->deadline_checking_enabled)) {
     grpc_deadline_state_client_start_transport_stream_op_batch(elem, batch);
   }
   // If we've previously been cancelled, immediately fail any new batches.
-  if (calld->cancel_error != GRPC_ERROR_NONE) {
+  if (GPR_UNLIKELY(calld->cancel_error != GRPC_ERROR_NONE)) {
     if (grpc_client_channel_trace.enabled()) {
       gpr_log(GPR_INFO, "chand=%p calld=%p: failing batch with error: %s",
               chand, calld, grpc_error_string(calld->cancel_error));
@@ -2983,7 +3127,7 @@ static void cc_start_transport_stream_op_batch(
     return;
   }
   // Handle cancellation.
-  if (batch->cancel_stream) {
+  if (GPR_UNLIKELY(batch->cancel_stream)) {
     // Stash a copy of cancel_error in our call data, so that we can use
     // it for subsequent operations.  This ensures that if the call is
     // cancelled before any batches are passed down (e.g., if the deadline
@@ -3029,7 +3173,7 @@ static void cc_start_transport_stream_op_batch(
   // We do not yet have a subchannel call.
   // For batches containing a send_initial_metadata op, enter the channel
   // combiner to start a pick.
-  if (batch->send_initial_metadata) {
+  if (GPR_LIKELY(batch->send_initial_metadata)) {
     if (grpc_client_channel_trace.enabled()) {
       gpr_log(GPR_INFO, "chand=%p calld=%p: entering client_channel combiner",
               chand, calld);
@@ -3042,7 +3186,7 @@ static void cc_start_transport_stream_op_batch(
     // For all other batches, release the call combiner.
     if (grpc_client_channel_trace.enabled()) {
       gpr_log(GPR_INFO,
-              "chand=%p calld=%p: saved batch, yeilding call combiner", chand,
+              "chand=%p calld=%p: saved batch, yielding call combiner", chand,
               calld);
     }
     GRPC_CALL_COMBINER_STOP(calld->call_combiner,
@@ -3062,7 +3206,7 @@ static grpc_error* cc_init_call_elem(grpc_call_element* elem,
   calld->arena = args->arena;
   calld->owning_call = args->call_stack;
   calld->call_combiner = args->call_combiner;
-  if (chand->deadline_checking_enabled) {
+  if (GPR_LIKELY(chand->deadline_checking_enabled)) {
     grpc_deadline_state_init(elem, args->call_stack, args->call_combiner,
                              calld->deadline);
   }
@@ -3077,14 +3221,14 @@ static void cc_destroy_call_elem(grpc_call_element* elem,
                                  grpc_closure* then_schedule_closure) {
   call_data* calld = static_cast<call_data*>(elem->call_data);
   channel_data* chand = static_cast<channel_data*>(elem->channel_data);
-  if (chand->deadline_checking_enabled) {
+  if (GPR_LIKELY(chand->deadline_checking_enabled)) {
     grpc_deadline_state_destroy(elem);
   }
   grpc_slice_unref_internal(calld->path);
   calld->retry_throttle_data.reset();
   calld->method_params.reset();
   GRPC_ERROR_UNREF(calld->cancel_error);
-  if (calld->subchannel_call != nullptr) {
+  if (GPR_LIKELY(calld->subchannel_call != nullptr)) {
     grpc_subchannel_call_set_cleanup_closure(calld->subchannel_call,
                                              then_schedule_closure);
     then_schedule_closure = nullptr;
@@ -3094,7 +3238,7 @@ static void cc_destroy_call_elem(grpc_call_element* elem,
   for (size_t i = 0; i < GPR_ARRAY_SIZE(calld->pending_batches); ++i) {
     GPR_ASSERT(calld->pending_batches[i].batch == nullptr);
   }
-  if (calld->pick.connected_subchannel != nullptr) {
+  if (GPR_LIKELY(calld->pick.connected_subchannel != nullptr)) {
     calld->pick.connected_subchannel.reset();
   }
   for (size_t i = 0; i < GRPC_CONTEXT_COUNT; ++i) {
@@ -3144,6 +3288,16 @@ static void try_to_connect_locked(void* arg, grpc_error* error_ignored) {
   GRPC_CHANNEL_STACK_UNREF(chand->owning_stack, "try_to_connect");
 }
 
+void grpc_client_channel_populate_child_refs(
+    grpc_channel_element* elem, grpc_core::ChildRefsList* child_subchannels,
+    grpc_core::ChildRefsList* child_channels) {
+  channel_data* chand = static_cast<channel_data*>(elem->channel_data);
+  if (chand->lb_policy != nullptr) {
+    chand->lb_policy->FillChildRefsForChannelz(child_subchannels,
+                                               child_channels);
+  }
+}
+
 grpc_connectivity_state grpc_client_channel_check_connectivity_state(
     grpc_channel_element* elem, int try_to_connect) {
   channel_data* chand = static_cast<channel_data*>(elem->channel_data);
@@ -3242,7 +3396,7 @@ static void on_external_watch_complete_locked(void* arg, grpc_error* error) {
                            "external_connectivity_watcher");
   external_connectivity_watcher_list_remove(w->chand, w);
   gpr_free(w);
-  GRPC_CLOSURE_RUN(follow_up, GRPC_ERROR_REF(error));
+  GRPC_CLOSURE_SCHED(follow_up, GRPC_ERROR_REF(error));
 }
 
 static void watch_connectivity_state_locked(void* arg,
diff --git a/src/core/ext/filters/client_channel/client_channel.h b/src/core/ext/filters/client_channel/client_channel.h
index a21e5623a7..0b44a17562 100644
--- a/src/core/ext/filters/client_channel/client_channel.h
+++ b/src/core/ext/filters/client_channel/client_channel.h
@@ -21,6 +21,7 @@
 
 #include <grpc/support/port_platform.h>
 
+#include "src/core/ext/filters/client_channel/client_channel_channelz.h"
 #include "src/core/ext/filters/client_channel/client_channel_factory.h"
 #include "src/core/ext/filters/client_channel/resolver.h"
 #include "src/core/lib/channel/channel_stack.h"
@@ -39,6 +40,10 @@ extern grpc_core::TraceFlag grpc_client_channel_trace;
 
 extern const grpc_channel_filter grpc_client_channel_filter;
 
+void grpc_client_channel_populate_child_refs(
+    grpc_channel_element* elem, grpc_core::ChildRefsList* child_subchannels,
+    grpc_core::ChildRefsList* child_channels);
+
 grpc_connectivity_state grpc_client_channel_check_connectivity_state(
     grpc_channel_element* elem, int try_to_connect);
 
diff --git a/src/core/ext/filters/client_channel/client_channel_channelz.cc b/src/core/ext/filters/client_channel/client_channel_channelz.cc
new file mode 100644
index 0000000000..7e8f59bcd3
--- /dev/null
+++ b/src/core/ext/filters/client_channel/client_channel_channelz.cc
@@ -0,0 +1,173 @@
+/*
+ *
+ * Copyright 2018 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include <grpc/support/port_platform.h>
+
+#include "src/core/ext/filters/client_channel/client_channel.h"
+#include "src/core/ext/filters/client_channel/client_channel_channelz.h"
+#include "src/core/lib/channel/channelz_registry.h"
+#include "src/core/lib/gpr/useful.h"
+#include "src/core/lib/surface/channel.h"
+#include "src/core/lib/transport/connectivity_state.h"
+
+#include <grpc/support/string_util.h>
+
+namespace grpc_core {
+namespace channelz {
+namespace {
+
+void* client_channel_channelz_copy(void* p) { return p; }
+
+void client_channel_channelz_destroy(void* p) {}
+
+int client_channel_channelz_cmp(void* a, void* b) { return GPR_ICMP(a, b); }
+
+}  // namespace
+
+static const grpc_arg_pointer_vtable client_channel_channelz_vtable = {
+    client_channel_channelz_copy, client_channel_channelz_destroy,
+    client_channel_channelz_cmp};
+
+ClientChannelNode::ClientChannelNode(grpc_channel* channel,
+                                     size_t channel_tracer_max_nodes,
+                                     bool is_top_level_channel)
+    : ChannelNode(channel, channel_tracer_max_nodes, is_top_level_channel) {
+  client_channel_ =
+      grpc_channel_stack_last_element(grpc_channel_get_channel_stack(channel));
+  GPR_ASSERT(client_channel_->filter == &grpc_client_channel_filter);
+}
+
+void ClientChannelNode::PopulateConnectivityState(grpc_json* json) {
+  grpc_connectivity_state state;
+  if (ChannelIsDestroyed()) {
+    state = GRPC_CHANNEL_SHUTDOWN;
+  } else {
+    state =
+        grpc_client_channel_check_connectivity_state(client_channel_, false);
+  }
+  json = grpc_json_create_child(nullptr, json, "state", nullptr,
+                                GRPC_JSON_OBJECT, false);
+  grpc_json_create_child(nullptr, json, "state",
+                         grpc_connectivity_state_name(state), GRPC_JSON_STRING,
+                         false);
+}
+
+void ClientChannelNode::PopulateChildRefs(grpc_json* json) {
+  ChildRefsList child_subchannels;
+  ChildRefsList child_channels;
+  grpc_json* json_iterator = nullptr;
+  grpc_client_channel_populate_child_refs(client_channel_, &child_subchannels,
+                                          &child_channels);
+  if (!child_subchannels.empty()) {
+    grpc_json* array_parent = grpc_json_create_child(
+        nullptr, json, "subchannelRef", nullptr, GRPC_JSON_ARRAY, false);
+    for (size_t i = 0; i < child_subchannels.size(); ++i) {
+      json_iterator =
+          grpc_json_create_child(json_iterator, array_parent, nullptr, nullptr,
+                                 GRPC_JSON_OBJECT, false);
+      grpc_json_add_number_string_child(json_iterator, nullptr, "subchannelId",
+                                        child_subchannels[i]);
+    }
+  }
+  if (!child_channels.empty()) {
+    grpc_json* array_parent = grpc_json_create_child(
+        nullptr, json, "channelRef", nullptr, GRPC_JSON_ARRAY, false);
+    json_iterator = nullptr;
+    for (size_t i = 0; i < child_channels.size(); ++i) {
+      json_iterator =
+          grpc_json_create_child(json_iterator, array_parent, nullptr, nullptr,
+                                 GRPC_JSON_OBJECT, false);
+      grpc_json_add_number_string_child(json_iterator, nullptr, "channelId",
+                                        child_channels[i]);
+    }
+  }
+}
+
+grpc_arg ClientChannelNode::CreateChannelArg() {
+  return grpc_channel_arg_pointer_create(
+      const_cast<char*>(GRPC_ARG_CHANNELZ_CHANNEL_NODE_CREATION_FUNC),
+      reinterpret_cast<void*>(MakeClientChannelNode),
+      &client_channel_channelz_vtable);
+}
+
+RefCountedPtr<ChannelNode> ClientChannelNode::MakeClientChannelNode(
+    grpc_channel* channel, size_t channel_tracer_max_nodes,
+    bool is_top_level_channel) {
+  return MakeRefCounted<ClientChannelNode>(channel, channel_tracer_max_nodes,
+                                           is_top_level_channel);
+}
+
+SubchannelNode::SubchannelNode(grpc_subchannel* subchannel,
+                               size_t channel_tracer_max_nodes)
+    : BaseNode(EntityType::kSubchannel),
+      subchannel_(subchannel),
+      target_(
+          UniquePtr<char>(gpr_strdup(grpc_subchannel_get_target(subchannel_)))),
+      trace_(channel_tracer_max_nodes) {}
+
+SubchannelNode::~SubchannelNode() {}
+
+void SubchannelNode::PopulateConnectivityState(grpc_json* json) {
+  grpc_connectivity_state state;
+  if (subchannel_ == nullptr) {
+    state = GRPC_CHANNEL_SHUTDOWN;
+  } else {
+    state = grpc_subchannel_check_connectivity(subchannel_, nullptr);
+  }
+  json = grpc_json_create_child(nullptr, json, "state", nullptr,
+                                GRPC_JSON_OBJECT, false);
+  grpc_json_create_child(nullptr, json, "state",
+                         grpc_connectivity_state_name(state), GRPC_JSON_STRING,
+                         false);
+}
+
+grpc_json* SubchannelNode::RenderJson() {
+  grpc_json* top_level_json = grpc_json_create(GRPC_JSON_OBJECT);
+  grpc_json* json = top_level_json;
+  grpc_json* json_iterator = nullptr;
+  json_iterator = grpc_json_create_child(json_iterator, json, "ref", nullptr,
+                                         GRPC_JSON_OBJECT, false);
+  json = json_iterator;
+  json_iterator = nullptr;
+  json_iterator = grpc_json_add_number_string_child(json, json_iterator,
+                                                    "subchannelId", uuid());
+  // reset json iterators to top level object
+  json = top_level_json;
+  json_iterator = nullptr;
+  // create and fill the data child.
+  grpc_json* data = grpc_json_create_child(json_iterator, json, "data", nullptr,
+                                           GRPC_JSON_OBJECT, false);
+  json = data;
+  json_iterator = nullptr;
+  PopulateConnectivityState(json);
+  GPR_ASSERT(target_.get() != nullptr);
+  grpc_json_create_child(nullptr, json, "target", target_.get(),
+                         GRPC_JSON_STRING, false);
+  // fill in the channel trace if applicable
+  grpc_json* trace_json = trace_.RenderJson();
+  if (trace_json != nullptr) {
+    trace_json->key = "trace";  // this object is named trace in channelz.proto
+    grpc_json_link_child(json, trace_json, nullptr);
+  }
+  // ask CallCountingHelper to populate trace and call count data.
+  call_counter_.PopulateCallCounts(json);
+  return top_level_json;
+}
+
+}  // namespace channelz
+}  // namespace grpc_core
diff --git a/src/core/ext/filters/client_channel/client_channel_channelz.h b/src/core/ext/filters/client_channel/client_channel_channelz.h
new file mode 100644
index 0000000000..8ce331e529
--- /dev/null
+++ b/src/core/ext/filters/client_channel/client_channel_channelz.h
@@ -0,0 +1,105 @@
+/*
+ *
+ * Copyright 2018 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#ifndef GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_CLIENT_CHANNEL_CHANNELZ_H
+#define GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_CLIENT_CHANNEL_CHANNELZ_H
+
+#include <grpc/support/port_platform.h>
+
+#include "src/core/lib/channel/channel_args.h"
+#include "src/core/lib/channel/channel_stack.h"
+#include "src/core/lib/channel/channel_trace.h"
+#include "src/core/lib/channel/channelz.h"
+#include "src/core/lib/gprpp/inlined_vector.h"
+
+typedef struct grpc_subchannel grpc_subchannel;
+
+namespace grpc_core {
+
+// TODO(ncteisen), this only contains the uuids of the children for now,
+// since that is all that is strictly needed. In a future enhancement we will
+// add human readable names as in the channelz.proto
+typedef InlinedVector<intptr_t, 10> ChildRefsList;
+
+namespace channelz {
+
+// Subtype of ChannelNode that overrides and provides client_channel specific
+// functionality like querying for connectivity_state and subchannel data.
+class ClientChannelNode : public ChannelNode {
+ public:
+  static RefCountedPtr<ChannelNode> MakeClientChannelNode(
+      grpc_channel* channel, size_t channel_tracer_max_nodes,
+      bool is_top_level_channel);
+
+  ClientChannelNode(grpc_channel* channel, size_t channel_tracer_max_nodes,
+                    bool is_top_level_channel);
+  virtual ~ClientChannelNode() {}
+
+  // Overriding template methods from ChannelNode to render information that
+  // only ClientChannelNode knows about.
+  void PopulateConnectivityState(grpc_json* json) override;
+  void PopulateChildRefs(grpc_json* json) override;
+
+  // Helper to create a channel arg to ensure this type of ChannelNode is
+  // created.
+  static grpc_arg CreateChannelArg();
+
+ private:
+  grpc_channel_element* client_channel_;
+};
+
+// Handles channelz bookkeeping for sockets
+class SubchannelNode : public BaseNode {
+ public:
+  SubchannelNode(grpc_subchannel* subchannel, size_t channel_tracer_max_nodes);
+  ~SubchannelNode() override;
+
+  void MarkSubchannelDestroyed() {
+    GPR_ASSERT(subchannel_ != nullptr);
+    subchannel_ = nullptr;
+  }
+
+  grpc_json* RenderJson() override;
+
+  // proxy methods to composed classes.
+  void AddTraceEvent(ChannelTrace::Severity severity, grpc_slice data) {
+    trace_.AddTraceEvent(severity, data);
+  }
+  void AddTraceEventWithReference(ChannelTrace::Severity severity,
+                                  grpc_slice data,
+                                  RefCountedPtr<BaseNode> referenced_channel) {
+    trace_.AddTraceEventWithReference(severity, data,
+                                      std::move(referenced_channel));
+  }
+  void RecordCallStarted() { call_counter_.RecordCallStarted(); }
+  void RecordCallFailed() { call_counter_.RecordCallFailed(); }
+  void RecordCallSucceeded() { call_counter_.RecordCallSucceeded(); }
+
+ private:
+  grpc_subchannel* subchannel_;
+  UniquePtr<char> target_;
+  CallCountingHelper call_counter_;
+  ChannelTrace trace_;
+
+  void PopulateConnectivityState(grpc_json* json);
+};
+
+}  // namespace channelz
+}  // namespace grpc_core
+
+#endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_CLIENT_CHANNEL_CHANNELZ_H */
diff --git a/src/core/ext/filters/client_channel/client_channel_plugin.cc b/src/core/ext/filters/client_channel/client_channel_plugin.cc
index 8385852d1b..e0784b7e5c 100644
--- a/src/core/ext/filters/client_channel/client_channel_plugin.cc
+++ b/src/core/ext/filters/client_channel/client_channel_plugin.cc
@@ -25,6 +25,7 @@
 #include <grpc/support/alloc.h>
 
 #include "src/core/ext/filters/client_channel/client_channel.h"
+#include "src/core/ext/filters/client_channel/client_channel_channelz.h"
 #include "src/core/ext/filters/client_channel/http_connect_handshaker.h"
 #include "src/core/ext/filters/client_channel/http_proxy.h"
 #include "src/core/ext/filters/client_channel/lb_policy_registry.h"
@@ -35,6 +36,14 @@
 #include "src/core/lib/surface/channel_init.h"
 
 static bool append_filter(grpc_channel_stack_builder* builder, void* arg) {
+  const grpc_channel_args* args =
+      grpc_channel_stack_builder_get_channel_arguments(builder);
+  grpc_arg args_to_add[] = {
+      grpc_core::channelz::ClientChannelNode::CreateChannelArg()};
+  grpc_channel_args* new_args = grpc_channel_args_copy_and_add(
+      args, args_to_add, GPR_ARRAY_SIZE(args_to_add));
+  grpc_channel_stack_builder_set_channel_arguments(builder, new_args);
+  grpc_channel_args_destroy(new_args);
   return grpc_channel_stack_builder_append_filter(
       builder, static_cast<const grpc_channel_filter*>(arg), nullptr, nullptr);
 }
diff --git a/src/core/ext/filters/client_channel/http_connect_handshaker.cc b/src/core/ext/filters/client_channel/http_connect_handshaker.cc
index 4e8b8b71db..7ce8da8c00 100644
--- a/src/core/ext/filters/client_channel/http_connect_handshaker.cc
+++ b/src/core/ext/filters/client_channel/http_connect_handshaker.cc
@@ -320,7 +320,7 @@ static void http_connect_handshaker_do_handshake(
   // Take a new ref to be held by the write callback.
   gpr_ref(&handshaker->refcount);
   grpc_endpoint_write(args->endpoint, &handshaker->write_buffer,
-                      &handshaker->request_done_closure);
+                      &handshaker->request_done_closure, nullptr);
   gpr_mu_unlock(&handshaker->mu);
 }
 
diff --git a/src/core/ext/filters/client_channel/http_proxy.cc b/src/core/ext/filters/client_channel/http_proxy.cc
index 29a6c0e367..26d3f479b7 100644
--- a/src/core/ext/filters/client_channel/http_proxy.cc
+++ b/src/core/ext/filters/client_channel/http_proxy.cc
@@ -37,17 +37,21 @@
 #include "src/core/lib/slice/b64.h"
 
 /**
- * Parses the 'http_proxy' env var and returns the proxy hostname to resolve or
- * nullptr on error. Also sets 'user_cred' to user credentials if present in the
- * 'http_proxy' env var, otherwise leaves it unchanged. It is caller's
- * responsibility to gpr_free user_cred.
+ * Parses the 'https_proxy' env var (fallback on 'http_proxy') and returns the
+ * proxy hostname to resolve or nullptr on error. Also sets 'user_cred' to user
+ * credentials if present in the 'http_proxy' env var, otherwise leaves it
+ * unchanged. It is caller's responsibility to gpr_free user_cred.
  */
 static char* get_http_proxy_server(char** user_cred) {
   GPR_ASSERT(user_cred != nullptr);
   char* proxy_name = nullptr;
-  char* uri_str = gpr_getenv("http_proxy");
   char** authority_strs = nullptr;
   size_t authority_nstrs;
+  /* Prefer using 'https_proxy'. Fallback on 'http_proxy' if it is not set. The
+   * fallback behavior can be removed if there's a demand for it.
+   */
+  char* uri_str = gpr_getenv("https_proxy");
+  if (uri_str == nullptr) uri_str = gpr_getenv("http_proxy");
   if (uri_str == nullptr) return nullptr;
   grpc_uri* uri = grpc_uri_parse(uri_str, false /* suppress_errors */);
   if (uri == nullptr || uri->authority == nullptr) {
@@ -83,11 +87,24 @@ done:
   return proxy_name;
 }
 
+/**
+ * Checks the value of GRPC_ARG_ENABLE_HTTP_PROXY to determine if http_proxy
+ * should be used.
+ */
+bool http_proxy_enabled(const grpc_channel_args* args) {
+  const grpc_arg* arg =
+      grpc_channel_args_find(args, GRPC_ARG_ENABLE_HTTP_PROXY);
+  return grpc_channel_arg_get_bool(arg, true);
+}
+
 static bool proxy_mapper_map_name(grpc_proxy_mapper* mapper,
                                   const char* server_uri,
                                   const grpc_channel_args* args,
                                   char** name_to_resolve,
                                   grpc_channel_args** new_args) {
+  if (!http_proxy_enabled(args)) {
+    return false;
+  }
   char* user_cred = nullptr;
   *name_to_resolve = get_http_proxy_server(&user_cred);
   if (*name_to_resolve == nullptr) return false;
diff --git a/src/core/ext/filters/client_channel/lb_policy.h b/src/core/ext/filters/client_channel/lb_policy.h
index 454e00a690..3c0a9c1118 100644
--- a/src/core/ext/filters/client_channel/lb_policy.h
+++ b/src/core/ext/filters/client_channel/lb_policy.h
@@ -21,6 +21,7 @@
 
 #include <grpc/support/port_platform.h>
 
+#include "src/core/ext/filters/client_channel/client_channel_channelz.h"
 #include "src/core/ext/filters/client_channel/client_channel_factory.h"
 #include "src/core/ext/filters/client_channel/subchannel.h"
 #include "src/core/lib/gprpp/abstract.h"
@@ -70,6 +71,7 @@ class LoadBalancingPolicy
     /// Storage for LB token in \a initial_metadata, or nullptr if not used.
     grpc_linked_mdelem lb_token_mdelem_storage;
     /// Closure to run when pick is complete, if not completed synchronously.
+    /// If null, pick will fail if a result is not available synchronously.
     grpc_closure* on_complete;
     /// Will be set to the selected subchannel, or nullptr on failure or when
     /// the LB policy decides to drop the call.
@@ -98,10 +100,15 @@ class LoadBalancingPolicy
   /// Finds an appropriate subchannel for a call, based on data in \a pick.
   /// \a pick must remain alive until the pick is complete.
   ///
-  /// If the pick succeeds and a result is known immediately, returns true.
-  /// Otherwise, \a pick->on_complete will be invoked once the pick is
-  /// complete with its error argument set to indicate success or failure.
-  virtual bool PickLocked(PickState* pick) GRPC_ABSTRACT;
+  /// If a result is known immediately, returns true, setting \a *error
+  /// upon failure.  Otherwise, \a pick->on_complete will be invoked once
+  /// the pick is complete with its error argument set to indicate success
+  /// or failure.
+  ///
+  /// If \a pick->on_complete is null and no result is known immediately,
+  /// a synchronous failure will be returned (i.e., \a *error will be
+  /// set and true will be returned).
+  virtual bool PickLocked(PickState* pick, grpc_error** error) GRPC_ABSTRACT;
 
   /// Cancels \a pick.
   /// The \a on_complete callback of the pending pick will be invoked with
@@ -132,17 +139,22 @@ class LoadBalancingPolicy
   virtual void HandOffPendingPicksLocked(LoadBalancingPolicy* new_policy)
       GRPC_ABSTRACT;
 
-  /// Performs a connected subchannel ping via \a ConnectedSubchannel::Ping()
-  /// against one of the connected subchannels managed by the policy.
-  /// Note: This is intended only for use in tests.
-  virtual void PingOneLocked(grpc_closure* on_initiate,
-                             grpc_closure* on_ack) GRPC_ABSTRACT;
-
   /// Tries to enter a READY connectivity state.
   /// TODO(roth): As part of restructuring how we handle IDLE state,
   /// consider whether this method is still needed.
   virtual void ExitIdleLocked() GRPC_ABSTRACT;
 
+  /// Resets connection backoff.
+  virtual void ResetBackoffLocked() GRPC_ABSTRACT;
+
+  /// Populates child_subchannels and child_channels with the uuids of this
+  /// LB policy's referenced children. This is not invoked from the
+  /// client_channel's combiner. The implementation is responsible for
+  /// providing its own synchronization.
+  virtual void FillChildRefsForChannelz(ChildRefsList* child_subchannels,
+                                        ChildRefsList* child_channels)
+      GRPC_ABSTRACT;
+
   void Orphan() override {
     // Invoke ShutdownAndUnrefLocked() inside of the combiner.
     GRPC_CLOSURE_SCHED(
@@ -162,9 +174,7 @@ class LoadBalancingPolicy
   GRPC_ABSTRACT_BASE_CLASS
 
  protected:
-  // So Delete() can access our protected dtor.
-  template <typename T>
-  friend void Delete(T*);
+  GPRC_ALLOW_CLASS_TO_USE_NON_PUBLIC_DELETE
 
   explicit LoadBalancingPolicy(const Args& args);
   virtual ~LoadBalancingPolicy();
@@ -198,6 +208,12 @@ class LoadBalancingPolicy
   grpc_pollset_set* interested_parties_;
   /// Callback to force a re-resolution.
   grpc_closure* request_reresolution_;
+
+  // Dummy classes needed for alignment issues.
+  // See https://github.com/grpc/grpc/issues/16032 for context.
+  // TODO(ncteisen): remove this as soon as the issue is resolved.
+  ChildRefsList dummy_list_foo;
+  ChildRefsList dummy_list_bar;
 };
 
 }  // namespace grpc_core
diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc b/src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc
index 18ef1f6ff5..cc259bcdbf 100644
--- a/src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc
+++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc
@@ -35,9 +35,10 @@ static grpc_error* init_channel_elem(grpc_channel_element* elem,
 static void destroy_channel_elem(grpc_channel_element* elem) {}
 
 namespace {
+
 struct call_data {
   // Stats object to update.
-  grpc_grpclb_client_stats* client_stats;
+  grpc_core::RefCountedPtr<grpc_core::GrpcLbClientStats> client_stats;
   // State for intercepting send_initial_metadata.
   grpc_closure on_complete_for_send;
   grpc_closure* original_on_complete_for_send;
@@ -47,6 +48,7 @@ struct call_data {
   grpc_closure* original_recv_initial_metadata_ready;
   bool recv_initial_metadata_succeeded;
 };
+
 }  // namespace
 
 static void on_complete_for_send(void* arg, grpc_error* error) {
@@ -72,11 +74,11 @@ static grpc_error* init_call_elem(grpc_call_element* elem,
   // Get stats object from context and take a ref.
   GPR_ASSERT(args->context != nullptr);
   if (args->context[GRPC_GRPCLB_CLIENT_STATS].value != nullptr) {
-    calld->client_stats =
-        grpc_grpclb_client_stats_ref(static_cast<grpc_grpclb_client_stats*>(
-            args->context[GRPC_GRPCLB_CLIENT_STATS].value));
+    calld->client_stats = static_cast<grpc_core::GrpcLbClientStats*>(
+                              args->context[GRPC_GRPCLB_CLIENT_STATS].value)
+                              ->Ref();
     // Record call started.
-    grpc_grpclb_client_stats_add_call_started(calld->client_stats);
+    calld->client_stats->AddCallStarted();
   }
   return GRPC_ERROR_NONE;
 }
@@ -88,12 +90,12 @@ static void destroy_call_elem(grpc_call_element* elem,
   if (calld->client_stats != nullptr) {
     // Record call finished, optionally setting client_failed_to_send and
     // received.
-    grpc_grpclb_client_stats_add_call_finished(
+    calld->client_stats->AddCallFinished(
         !calld->send_initial_metadata_succeeded /* client_failed_to_send */,
-        calld->recv_initial_metadata_succeeded /* known_received */,
-        calld->client_stats);
+        calld->recv_initial_metadata_succeeded /* known_received */);
     // All done, so unref the stats object.
-    grpc_grpclb_client_stats_unref(calld->client_stats);
+    // TODO(roth): Eliminate this once filter stack is converted to C++.
+    calld->client_stats.reset();
   }
 }
 
diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc
index 8bf739cba5..0061190016 100644
--- a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc
+++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc
@@ -92,6 +92,7 @@
 #include "src/core/lib/gpr/string.h"
 #include "src/core/lib/gprpp/manual_constructor.h"
 #include "src/core/lib/gprpp/memory.h"
+#include "src/core/lib/gprpp/mutex_lock.h"
 #include "src/core/lib/gprpp/orphanable.h"
 #include "src/core/lib/gprpp/ref_counted_ptr.h"
 #include "src/core/lib/iomgr/combiner.h"
@@ -123,7 +124,7 @@ class GrpcLb : public LoadBalancingPolicy {
   GrpcLb(const grpc_lb_addresses* addresses, const Args& args);
 
   void UpdateLocked(const grpc_channel_args& args) override;
-  bool PickLocked(PickState* pick) override;
+  bool PickLocked(PickState* pick, grpc_error** error) override;
   void CancelPickLocked(PickState* pick, grpc_error* error) override;
   void CancelMatchingPicksLocked(uint32_t initial_metadata_flags_mask,
                                  uint32_t initial_metadata_flags_eq,
@@ -133,8 +134,10 @@ class GrpcLb : public LoadBalancingPolicy {
   grpc_connectivity_state CheckConnectivityLocked(
       grpc_error** connectivity_error) override;
   void HandOffPendingPicksLocked(LoadBalancingPolicy* new_policy) override;
-  void PingOneLocked(grpc_closure* on_initiate, grpc_closure* on_ack) override;
   void ExitIdleLocked() override;
+  void ResetBackoffLocked() override;
+  void FillChildRefsForChannelz(ChildRefsList* child_subchannels,
+                                ChildRefsList* child_channels) override;
 
  private:
   /// Linked list of pending pick requests. It stores all information needed to
@@ -159,20 +162,12 @@ class GrpcLb : public LoadBalancingPolicy {
     // The LB token associated with the pick.  This is set via user_data in
     // the pick.
     grpc_mdelem lb_token;
-    // Stats for client-side load reporting. Note that this holds a
-    // reference, which must be either passed on via context or unreffed.
-    grpc_grpclb_client_stats* client_stats = nullptr;
+    // Stats for client-side load reporting.
+    RefCountedPtr<GrpcLbClientStats> client_stats;
     // Next pending pick.
     PendingPick* next = nullptr;
   };
 
-  /// A linked list of pending pings waiting for the RR policy to be created.
-  struct PendingPing {
-    grpc_closure* on_initiate;
-    grpc_closure* on_ack;
-    PendingPing* next = nullptr;
-  };
-
   /// Contains a call to the LB server and all the data related to the call.
   class BalancerCallState
       : public InternallyRefCountedWithTracing<BalancerCallState> {
@@ -186,7 +181,8 @@ class GrpcLb : public LoadBalancingPolicy {
 
     void StartQuery();
 
-    grpc_grpclb_client_stats* client_stats() const { return client_stats_; }
+    GrpcLbClientStats* client_stats() const { return client_stats_.get(); }
+
     bool seen_initial_response() const { return seen_initial_response_; }
 
    private:
@@ -237,7 +233,7 @@ class GrpcLb : public LoadBalancingPolicy {
 
     // The stats for client-side load reporting associated with this LB call.
     // Created after the first serverlist is received.
-    grpc_grpclb_client_stats* client_stats_ = nullptr;
+    RefCountedPtr<GrpcLbClientStats> client_stats_;
     grpc_millis client_stats_report_interval_ = 0;
     grpc_timer client_load_report_timer_;
     bool client_load_report_timer_callback_pending_ = false;
@@ -270,14 +266,12 @@ class GrpcLb : public LoadBalancingPolicy {
   void AddPendingPick(PendingPick* pp);
   static void OnPendingPickComplete(void* arg, grpc_error* error);
 
-  // Pending ping methods.
-  void AddPendingPing(grpc_closure* on_initiate, grpc_closure* on_ack);
-
   // Methods for dealing with the RR policy.
   void CreateOrUpdateRoundRobinPolicyLocked();
   grpc_channel_args* CreateRoundRobinPolicyArgsLocked();
   void CreateRoundRobinPolicyLocked(const Args& args);
-  bool PickFromRoundRobinPolicyLocked(bool force_async, PendingPick* pp);
+  bool PickFromRoundRobinPolicyLocked(bool force_async, PendingPick* pp,
+                                      grpc_error** error);
   void UpdateConnectivityStateFromRoundRobinPolicyLocked(
       grpc_error* rr_state_error);
   static void OnRoundRobinConnectivityChangedLocked(void* arg,
@@ -298,6 +292,9 @@ class GrpcLb : public LoadBalancingPolicy {
 
   // The channel for communicating with the LB server.
   grpc_channel* lb_channel_ = nullptr;
+  // Mutex to protect the channel to the LB server. This is used when
+  // processing a channelz request.
+  gpr_mu lb_channel_mu_;
   grpc_connectivity_state lb_channel_connectivity_;
   grpc_closure lb_channel_on_connectivity_changed_;
   // Are we already watching the LB channel's connectivity?
@@ -337,9 +334,8 @@ class GrpcLb : public LoadBalancingPolicy {
   grpc_timer lb_fallback_timer_;
   grpc_closure lb_on_fallback_;
 
-  // Pending picks and pings that are waiting on the RR policy's connectivity.
+  // Pending picks that are waiting on the RR policy's connectivity.
   PendingPick* pending_picks_ = nullptr;
-  PendingPing* pending_pings_ = nullptr;
 
   // The RR policy to use for the backends.
   OrphanablePtr<LoadBalancingPolicy> rr_policy_;
@@ -548,9 +544,6 @@ GrpcLb::BalancerCallState::~BalancerCallState() {
   grpc_byte_buffer_destroy(send_message_payload_);
   grpc_byte_buffer_destroy(recv_message_payload_);
   grpc_slice_unref_internal(lb_call_status_details_);
-  if (client_stats_ != nullptr) {
-    grpc_grpclb_client_stats_unref(client_stats_);
-  }
 }
 
 void GrpcLb::BalancerCallState::Orphan() {
@@ -673,22 +666,22 @@ void GrpcLb::BalancerCallState::MaybeSendClientLoadReportLocked(
 
 bool GrpcLb::BalancerCallState::LoadReportCountersAreZero(
     grpc_grpclb_request* request) {
-  grpc_grpclb_dropped_call_counts* drop_entries =
-      static_cast<grpc_grpclb_dropped_call_counts*>(
+  GrpcLbClientStats::DroppedCallCounts* drop_entries =
+      static_cast<GrpcLbClientStats::DroppedCallCounts*>(
           request->client_stats.calls_finished_with_drop.arg);
   return request->client_stats.num_calls_started == 0 &&
          request->client_stats.num_calls_finished == 0 &&
          request->client_stats.num_calls_finished_with_client_failed_to_send ==
              0 &&
          request->client_stats.num_calls_finished_known_received == 0 &&
-         (drop_entries == nullptr || drop_entries->num_entries == 0);
+         (drop_entries == nullptr || drop_entries->size() == 0);
 }
 
 void GrpcLb::BalancerCallState::SendClientLoadReportLocked() {
   // Construct message payload.
   GPR_ASSERT(send_message_payload_ == nullptr);
   grpc_grpclb_request* request =
-      grpc_grpclb_load_report_request_create_locked(client_stats_);
+      grpc_grpclb_load_report_request_create_locked(client_stats_.get());
   // Skip client load report if the counters were all zero in the last
   // report and they are still zero in this one.
   if (LoadReportCountersAreZero(request)) {
@@ -779,7 +772,7 @@ void GrpcLb::BalancerCallState::OnBalancerMessageReceivedLocked(
       if (grpc_lb_glb_trace.enabled()) {
         gpr_log(GPR_INFO,
                 "[grpclb %p] Received initial LB response message; "
-                "client load reporting interval = %" PRIdPTR " milliseconds",
+                "client load reporting interval = %" PRId64 " milliseconds",
                 grpclb_policy, lb_calld->client_stats_report_interval_);
       }
     } else if (grpc_lb_glb_trace.enabled()) {
@@ -814,7 +807,7 @@ void GrpcLb::BalancerCallState::OnBalancerMessageReceivedLocked(
       // serverlist returned from the current LB call.
       if (lb_calld->client_stats_report_interval_ > 0 &&
           lb_calld->client_stats_ == nullptr) {
-        lb_calld->client_stats_ = grpc_grpclb_client_stats_create();
+        lb_calld->client_stats_.reset(New<GrpcLbClientStats>());
         // TODO(roth): We currently track this ref manually.  Once the
         // ClosureRef API is ready, we should pass the RefCountedPtr<> along
         // with the callback.
@@ -859,10 +852,12 @@ void GrpcLb::BalancerCallState::OnBalancerMessageReceivedLocked(
     }
   } else {
     // No valid initial response or serverlist found.
+    char* response_slice_str =
+        grpc_dump_slice(response_slice, GPR_DUMP_ASCII | GPR_DUMP_HEX);
     gpr_log(GPR_ERROR,
             "[grpclb %p] Invalid LB response received: '%s'. Ignoring.",
-            grpclb_policy,
-            grpc_dump_slice(response_slice, GPR_DUMP_ASCII | GPR_DUMP_HEX));
+            grpclb_policy, response_slice_str);
+    gpr_free(response_slice_str);
   }
   grpc_slice_unref_internal(response_slice);
   if (!grpclb_policy->shutting_down_) {
@@ -1007,6 +1002,10 @@ grpc_channel_args* BuildBalancerChannelArgs(
       // A channel arg indicating the target is a grpclb load balancer.
       grpc_channel_arg_integer_create(
           const_cast<char*>(GRPC_ARG_ADDRESS_IS_GRPCLB_LOAD_BALANCER), 1),
+      // A channel arg indicating this is an internal channels, aka it is
+      // owned by components in Core, not by the user application.
+      grpc_channel_arg_integer_create(
+          const_cast<char*>(GRPC_ARG_CHANNELZ_CHANNEL_IS_INTERNAL_CHANNEL), 1),
   };
   // Construct channel args.
   grpc_channel_args* new_args = grpc_channel_args_copy_and_add_and_remove(
@@ -1036,6 +1035,7 @@ GrpcLb::GrpcLb(const grpc_lb_addresses* addresses,
               .set_max_backoff(GRPC_GRPCLB_RECONNECT_MAX_BACKOFF_SECONDS *
                                1000)) {
   // Initialization.
+  gpr_mu_init(&lb_channel_mu_);
   grpc_subchannel_index_ref();
   GRPC_CLOSURE_INIT(&lb_channel_on_connectivity_changed_,
                     &GrpcLb::OnBalancerChannelConnectivityChangedLocked, this,
@@ -1073,7 +1073,7 @@ GrpcLb::GrpcLb(const grpc_lb_addresses* addresses,
 
 GrpcLb::~GrpcLb() {
   GPR_ASSERT(pending_picks_ == nullptr);
-  GPR_ASSERT(pending_pings_ == nullptr);
+  gpr_mu_destroy(&lb_channel_mu_);
   gpr_free((void*)server_name_);
   grpc_channel_args_destroy(args_);
   grpc_connectivity_state_destroy(&state_tracker_);
@@ -1103,8 +1103,10 @@ void GrpcLb::ShutdownLocked() {
   // OnBalancerChannelConnectivityChangedLocked(), and we need to be
   // alive when that callback is invoked.
   if (lb_channel_ != nullptr) {
+    gpr_mu_lock(&lb_channel_mu_);
     grpc_channel_destroy(lb_channel_);
     lb_channel_ = nullptr;
+    gpr_mu_unlock(&lb_channel_mu_);
   }
   grpc_connectivity_state_set(&state_tracker_, GRPC_CHANNEL_SHUTDOWN,
                               GRPC_ERROR_REF(error), "grpclb_shutdown");
@@ -1116,14 +1118,6 @@ void GrpcLb::ShutdownLocked() {
     // Note: pp is deleted in this callback.
     GRPC_CLOSURE_SCHED(&pp->on_complete, GRPC_ERROR_REF(error));
   }
-  // Clear pending pings.
-  PendingPing* pping;
-  while ((pping = pending_pings_) != nullptr) {
-    pending_pings_ = pping->next;
-    GRPC_CLOSURE_SCHED(pping->on_initiate, GRPC_ERROR_REF(error));
-    GRPC_CLOSURE_SCHED(pping->on_ack, GRPC_ERROR_REF(error));
-    Delete(pping);
-  }
   GRPC_ERROR_UNREF(error);
 }
 
@@ -1137,9 +1131,10 @@ void GrpcLb::HandOffPendingPicksLocked(LoadBalancingPolicy* new_policy) {
     pending_picks_ = pp->next;
     pp->pick->on_complete = pp->original_on_complete;
     pp->pick->user_data = nullptr;
-    if (new_policy->PickLocked(pp->pick)) {
+    grpc_error* error = GRPC_ERROR_NONE;
+    if (new_policy->PickLocked(pp->pick, &error)) {
       // Synchronous return; schedule closure.
-      GRPC_CLOSURE_SCHED(pp->pick->on_complete, GRPC_ERROR_NONE);
+      GRPC_CLOSURE_SCHED(pp->pick->on_complete, error);
     }
     Delete(pp);
   }
@@ -1223,54 +1218,56 @@ void GrpcLb::ExitIdleLocked() {
   }
 }
 
-bool GrpcLb::PickLocked(PickState* pick) {
+void GrpcLb::ResetBackoffLocked() {
+  if (lb_channel_ != nullptr) {
+    grpc_channel_reset_connect_backoff(lb_channel_);
+  }
+  if (rr_policy_ != nullptr) {
+    rr_policy_->ResetBackoffLocked();
+  }
+}
+
+bool GrpcLb::PickLocked(PickState* pick, grpc_error** error) {
   PendingPick* pp = PendingPickCreate(pick);
   bool pick_done = false;
   if (rr_policy_ != nullptr) {
-    const grpc_connectivity_state rr_connectivity_state =
-        rr_policy_->CheckConnectivityLocked(nullptr);
-    // The RR policy may have transitioned to SHUTDOWN but the callback
-    // registered to capture this event (on_rr_connectivity_changed_) may not
-    // have been invoked yet. We need to make sure we aren't trying to pick
-    // from an RR policy instance that's in shutdown.
-    if (rr_connectivity_state == GRPC_CHANNEL_SHUTDOWN) {
+    if (grpc_lb_glb_trace.enabled()) {
+      gpr_log(GPR_INFO, "[grpclb %p] about to PICK from RR %p", this,
+              rr_policy_.get());
+    }
+    pick_done =
+        PickFromRoundRobinPolicyLocked(false /* force_async */, pp, error);
+  } else {  // rr_policy_ == NULL
+    if (pick->on_complete == nullptr) {
+      *error = GRPC_ERROR_CREATE_FROM_STATIC_STRING(
+          "No pick result available but synchronous result required.");
+      pick_done = true;
+    } else {
       if (grpc_lb_glb_trace.enabled()) {
         gpr_log(GPR_INFO,
-                "[grpclb %p] NOT picking from from RR %p: RR conn state=%s",
-                this, rr_policy_.get(),
-                grpc_connectivity_state_name(rr_connectivity_state));
+                "[grpclb %p] No RR policy. Adding to grpclb's pending picks",
+                this);
       }
       AddPendingPick(pp);
-      pick_done = false;
-    } else {  // RR not in shutdown
-      if (grpc_lb_glb_trace.enabled()) {
-        gpr_log(GPR_INFO, "[grpclb %p] about to PICK from RR %p", this,
-                rr_policy_.get());
+      if (!started_picking_) {
+        StartPickingLocked();
       }
-      pick_done = PickFromRoundRobinPolicyLocked(false /* force_async */, pp);
-    }
-  } else {  // rr_policy_ == NULL
-    if (grpc_lb_glb_trace.enabled()) {
-      gpr_log(GPR_INFO,
-              "[grpclb %p] No RR policy. Adding to grpclb's pending picks",
-              this);
-    }
-    AddPendingPick(pp);
-    if (!started_picking_) {
-      StartPickingLocked();
+      pick_done = false;
     }
-    pick_done = false;
   }
   return pick_done;
 }
 
-void GrpcLb::PingOneLocked(grpc_closure* on_initiate, grpc_closure* on_ack) {
-  if (rr_policy_ != nullptr) {
-    rr_policy_->PingOneLocked(on_initiate, on_ack);
-  } else {
-    AddPendingPing(on_initiate, on_ack);
-    if (!started_picking_) {
-      StartPickingLocked();
+void GrpcLb::FillChildRefsForChannelz(ChildRefsList* child_subchannels,
+                                      ChildRefsList* child_channels) {
+  // delegate to the RoundRobin to fill the children subchannels.
+  rr_policy_->FillChildRefsForChannelz(child_subchannels, child_channels);
+  MutexLock lock(&lb_channel_mu_);
+  if (lb_channel_ != nullptr) {
+    grpc_core::channelz::ChannelNode* channel_node =
+        grpc_channel_get_channelz_node(lb_channel_);
+    if (channel_node != nullptr) {
+      child_channels->push_back(channel_node->uuid());
     }
   }
 }
@@ -1318,9 +1315,11 @@ void GrpcLb::ProcessChannelArgsLocked(const grpc_channel_args& args) {
   if (lb_channel_ == nullptr) {
     char* uri_str;
     gpr_asprintf(&uri_str, "fake:///%s", server_name_);
+    gpr_mu_lock(&lb_channel_mu_);
     lb_channel_ = grpc_client_channel_factory_create_channel(
         client_channel_factory(), uri_str,
         GRPC_CLIENT_CHANNEL_TYPE_LOAD_BALANCING, lb_channel_args);
+    gpr_mu_unlock(&lb_channel_mu_);
     GPR_ASSERT(lb_channel_ != nullptr);
     gpr_free(uri_str);
   }
@@ -1420,7 +1419,7 @@ void GrpcLb::StartBalancerCallRetryTimerLocked() {
     gpr_log(GPR_INFO, "[grpclb %p] Connection to LB server lost...", this);
     grpc_millis timeout = next_try - ExecCtx::Get()->Now();
     if (timeout > 0) {
-      gpr_log(GPR_INFO, "[grpclb %p] ... retry_timer_active in %" PRIuPTR "ms.",
+      gpr_log(GPR_INFO, "[grpclb %p] ... retry_timer_active in %" PRId64 "ms.",
               this, timeout);
     } else {
       gpr_log(GPR_INFO, "[grpclb %p] ... retry_timer_active immediately.",
@@ -1516,7 +1515,7 @@ grpc_error* AddLbTokenToInitialMetadata(
 
 // Destroy function used when embedding client stats in call context.
 void DestroyClientStats(void* arg) {
-  grpc_grpclb_client_stats_unref(static_cast<grpc_grpclb_client_stats*>(arg));
+  static_cast<GrpcLbClientStats*>(arg)->Unref();
 }
 
 void GrpcLb::PendingPickSetMetadataAndContext(PendingPick* pp) {
@@ -1537,14 +1536,12 @@ void GrpcLb::PendingPickSetMetadataAndContext(PendingPick* pp) {
     // Pass on client stats via context. Passes ownership of the reference.
     if (pp->client_stats != nullptr) {
       pp->pick->subchannel_call_context[GRPC_GRPCLB_CLIENT_STATS].value =
-          pp->client_stats;
+          pp->client_stats.release();
       pp->pick->subchannel_call_context[GRPC_GRPCLB_CLIENT_STATS].destroy =
           DestroyClientStats;
     }
   } else {
-    if (pp->client_stats != nullptr) {
-      grpc_grpclb_client_stats_unref(pp->client_stats);
-    }
+    pp->client_stats.reset();
   }
 }
 
@@ -1575,18 +1572,6 @@ void GrpcLb::AddPendingPick(PendingPick* pp) {
 }
 
 //
-// PendingPing
-//
-
-void GrpcLb::AddPendingPing(grpc_closure* on_initiate, grpc_closure* on_ack) {
-  PendingPing* pping = New<PendingPing>();
-  pping->on_initiate = on_initiate;
-  pping->on_ack = on_ack;
-  pping->next = pending_pings_;
-  pending_pings_ = pping;
-}
-
-//
 // code for interacting with the RR policy
 //
 
@@ -1595,7 +1580,8 @@ void GrpcLb::AddPendingPing(grpc_closure* on_initiate, grpc_closure* on_ack) {
 // cleanups this callback would otherwise be responsible for.
 // If \a force_async is true, then we will manually schedule the
 // completion callback even if the pick is available immediately.
-bool GrpcLb::PickFromRoundRobinPolicyLocked(bool force_async, PendingPick* pp) {
+bool GrpcLb::PickFromRoundRobinPolicyLocked(bool force_async, PendingPick* pp,
+                                            grpc_error** error) {
   // Check for drops if we are not using fallback backend addresses.
   if (serverlist_ != nullptr) {
     // Look at the index into the serverlist to see if we should drop this call.
@@ -1610,8 +1596,8 @@ bool GrpcLb::PickFromRoundRobinPolicyLocked(bool force_async, PendingPick* pp) {
       // subchannel call (and therefore no client_load_reporting filter)
       // for dropped calls.
       if (lb_calld_ != nullptr && lb_calld_->client_stats() != nullptr) {
-        grpc_grpclb_client_stats_add_call_dropped_locked(
-            server->load_balance_token, lb_calld_->client_stats());
+        lb_calld_->client_stats()->AddCallDroppedLocked(
+            server->load_balance_token);
       }
       if (force_async) {
         GRPC_CLOSURE_SCHED(pp->original_on_complete, GRPC_ERROR_NONE);
@@ -1624,16 +1610,17 @@ bool GrpcLb::PickFromRoundRobinPolicyLocked(bool force_async, PendingPick* pp) {
   }
   // Set client_stats and user_data.
   if (lb_calld_ != nullptr && lb_calld_->client_stats() != nullptr) {
-    pp->client_stats = grpc_grpclb_client_stats_ref(lb_calld_->client_stats());
+    pp->client_stats = lb_calld_->client_stats()->Ref();
   }
   GPR_ASSERT(pp->pick->user_data == nullptr);
   pp->pick->user_data = (void**)&pp->lb_token;
   // Pick via the RR policy.
-  bool pick_done = rr_policy_->PickLocked(pp->pick);
+  bool pick_done = rr_policy_->PickLocked(pp->pick, error);
   if (pick_done) {
     PendingPickSetMetadataAndContext(pp);
     if (force_async) {
-      GRPC_CLOSURE_SCHED(pp->original_on_complete, GRPC_ERROR_NONE);
+      GRPC_CLOSURE_SCHED(pp->original_on_complete, *error);
+      *error = GRPC_ERROR_NONE;
       pick_done = false;
     }
     Delete(pp);
@@ -1685,18 +1672,8 @@ void GrpcLb::CreateRoundRobinPolicyLocked(const Args& args) {
               "[grpclb %p] Pending pick about to (async) PICK from RR %p", this,
               rr_policy_.get());
     }
-    PickFromRoundRobinPolicyLocked(true /* force_async */, pp);
-  }
-  // Send pending pings to RR policy.
-  PendingPing* pping;
-  while ((pping = pending_pings_)) {
-    pending_pings_ = pping->next;
-    if (grpc_lb_glb_trace.enabled()) {
-      gpr_log(GPR_INFO, "[grpclb %p] Pending ping about to PING from RR %p",
-              this, rr_policy_.get());
-    }
-    rr_policy_->PingOneLocked(pping->on_initiate, pping->on_ack);
-    Delete(pping);
+    grpc_error* error = GRPC_ERROR_NONE;
+    PickFromRoundRobinPolicyLocked(true /* force_async */, pp, &error);
   }
 }
 
diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.cc b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.cc
index dfbaead7d5..087cd8f276 100644
--- a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.cc
+++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.cc
@@ -22,131 +22,65 @@
 
 #include <string.h>
 
-#include <grpc/support/alloc.h>
 #include <grpc/support/atm.h>
 #include <grpc/support/string_util.h>
-#include <grpc/support/sync.h>
 
-#include "src/core/lib/channel/channel_args.h"
+namespace grpc_core {
 
-#define GRPC_ARG_GRPCLB_CLIENT_STATS "grpc.grpclb_client_stats"
-
-struct grpc_grpclb_client_stats {
-  gpr_refcount refs;
-  // This field must only be accessed via *_locked() methods.
-  grpc_grpclb_dropped_call_counts* drop_token_counts;
-  // These fields may be accessed from multiple threads at a time.
-  gpr_atm num_calls_started;
-  gpr_atm num_calls_finished;
-  gpr_atm num_calls_finished_with_client_failed_to_send;
-  gpr_atm num_calls_finished_known_received;
-};
-
-grpc_grpclb_client_stats* grpc_grpclb_client_stats_create() {
-  grpc_grpclb_client_stats* client_stats =
-      static_cast<grpc_grpclb_client_stats*>(gpr_zalloc(sizeof(*client_stats)));
-  gpr_ref_init(&client_stats->refs, 1);
-  return client_stats;
-}
-
-grpc_grpclb_client_stats* grpc_grpclb_client_stats_ref(
-    grpc_grpclb_client_stats* client_stats) {
-  gpr_ref(&client_stats->refs);
-  return client_stats;
-}
-
-void grpc_grpclb_client_stats_unref(grpc_grpclb_client_stats* client_stats) {
-  if (gpr_unref(&client_stats->refs)) {
-    grpc_grpclb_dropped_call_counts_destroy(client_stats->drop_token_counts);
-    gpr_free(client_stats);
-  }
-}
-
-void grpc_grpclb_client_stats_add_call_started(
-    grpc_grpclb_client_stats* client_stats) {
-  gpr_atm_full_fetch_add(&client_stats->num_calls_started, (gpr_atm)1);
+void GrpcLbClientStats::AddCallStarted() {
+  gpr_atm_full_fetch_add(&num_calls_started_, (gpr_atm)1);
 }
 
-void grpc_grpclb_client_stats_add_call_finished(
-    bool finished_with_client_failed_to_send, bool finished_known_received,
-    grpc_grpclb_client_stats* client_stats) {
-  gpr_atm_full_fetch_add(&client_stats->num_calls_finished, (gpr_atm)1);
+void GrpcLbClientStats::AddCallFinished(
+    bool finished_with_client_failed_to_send, bool finished_known_received) {
+  gpr_atm_full_fetch_add(&num_calls_finished_, (gpr_atm)1);
   if (finished_with_client_failed_to_send) {
-    gpr_atm_full_fetch_add(
-        &client_stats->num_calls_finished_with_client_failed_to_send,
-        (gpr_atm)1);
+    gpr_atm_full_fetch_add(&num_calls_finished_with_client_failed_to_send_,
+                           (gpr_atm)1);
   }
   if (finished_known_received) {
-    gpr_atm_full_fetch_add(&client_stats->num_calls_finished_known_received,
-                           (gpr_atm)1);
+    gpr_atm_full_fetch_add(&num_calls_finished_known_received_, (gpr_atm)1);
   }
 }
 
-void grpc_grpclb_client_stats_add_call_dropped_locked(
-    char* token, grpc_grpclb_client_stats* client_stats) {
+void GrpcLbClientStats::AddCallDroppedLocked(char* token) {
   // Increment num_calls_started and num_calls_finished.
-  gpr_atm_full_fetch_add(&client_stats->num_calls_started, (gpr_atm)1);
-  gpr_atm_full_fetch_add(&client_stats->num_calls_finished, (gpr_atm)1);
+  gpr_atm_full_fetch_add(&num_calls_started_, (gpr_atm)1);
+  gpr_atm_full_fetch_add(&num_calls_finished_, (gpr_atm)1);
   // Record the drop.
-  if (client_stats->drop_token_counts == nullptr) {
-    client_stats->drop_token_counts =
-        static_cast<grpc_grpclb_dropped_call_counts*>(
-            gpr_zalloc(sizeof(grpc_grpclb_dropped_call_counts)));
+  if (drop_token_counts_ == nullptr) {
+    drop_token_counts_.reset(New<DroppedCallCounts>());
   }
-  grpc_grpclb_dropped_call_counts* drop_token_counts =
-      client_stats->drop_token_counts;
-  for (size_t i = 0; i < drop_token_counts->num_entries; ++i) {
-    if (strcmp(drop_token_counts->token_counts[i].token, token) == 0) {
-      ++drop_token_counts->token_counts[i].count;
+  for (size_t i = 0; i < drop_token_counts_->size(); ++i) {
+    if (strcmp((*drop_token_counts_)[i].token.get(), token) == 0) {
+      ++(*drop_token_counts_)[i].count;
       return;
     }
   }
-  // Not found, so add a new entry.  We double the size of the array each time.
-  size_t new_num_entries = 2;
-  while (new_num_entries < drop_token_counts->num_entries + 1) {
-    new_num_entries *= 2;
-  }
-  drop_token_counts->token_counts = static_cast<grpc_grpclb_drop_token_count*>(
-      gpr_realloc(drop_token_counts->token_counts,
-                  new_num_entries * sizeof(grpc_grpclb_drop_token_count)));
-  grpc_grpclb_drop_token_count* new_entry =
-      &drop_token_counts->token_counts[drop_token_counts->num_entries++];
-  new_entry->token = gpr_strdup(token);
-  new_entry->count = 1;
+  // Not found, so add a new entry.
+  drop_token_counts_->emplace_back(UniquePtr<char>(gpr_strdup(token)), 1);
 }
 
-static void atomic_get_and_reset_counter(int64_t* value, gpr_atm* counter) {
-  *value = static_cast<int64_t>(gpr_atm_acq_load(counter));
-  gpr_atm_full_fetch_add(counter, (gpr_atm)(-*value));
+namespace {
+
+void AtomicGetAndResetCounter(int64_t* value, gpr_atm* counter) {
+  *value = static_cast<int64_t>(gpr_atm_full_xchg(counter, (gpr_atm)0));
 }
 
-void grpc_grpclb_client_stats_get_locked(
-    grpc_grpclb_client_stats* client_stats, int64_t* num_calls_started,
-    int64_t* num_calls_finished,
+}  // namespace
+
+void GrpcLbClientStats::GetLocked(
+    int64_t* num_calls_started, int64_t* num_calls_finished,
     int64_t* num_calls_finished_with_client_failed_to_send,
     int64_t* num_calls_finished_known_received,
-    grpc_grpclb_dropped_call_counts** drop_token_counts) {
-  atomic_get_and_reset_counter(num_calls_started,
-                               &client_stats->num_calls_started);
-  atomic_get_and_reset_counter(num_calls_finished,
-                               &client_stats->num_calls_finished);
-  atomic_get_and_reset_counter(
-      num_calls_finished_with_client_failed_to_send,
-      &client_stats->num_calls_finished_with_client_failed_to_send);
-  atomic_get_and_reset_counter(
-      num_calls_finished_known_received,
-      &client_stats->num_calls_finished_known_received);
-  *drop_token_counts = client_stats->drop_token_counts;
-  client_stats->drop_token_counts = nullptr;
+    UniquePtr<DroppedCallCounts>* drop_token_counts) {
+  AtomicGetAndResetCounter(num_calls_started, &num_calls_started_);
+  AtomicGetAndResetCounter(num_calls_finished, &num_calls_finished_);
+  AtomicGetAndResetCounter(num_calls_finished_with_client_failed_to_send,
+                           &num_calls_finished_with_client_failed_to_send_);
+  AtomicGetAndResetCounter(num_calls_finished_known_received,
+                           &num_calls_finished_known_received_);
+  *drop_token_counts = std::move(drop_token_counts_);
 }
 
-void grpc_grpclb_dropped_call_counts_destroy(
-    grpc_grpclb_dropped_call_counts* drop_entries) {
-  if (drop_entries != nullptr) {
-    for (size_t i = 0; i < drop_entries->num_entries; ++i) {
-      gpr_free(drop_entries->token_counts[i].token);
-    }
-    gpr_free(drop_entries->token_counts);
-    gpr_free(drop_entries);
-  }
-}
+}  // namespace grpc_core
diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h
index c971e56883..18ab2c9452 100644
--- a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h
+++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h
@@ -21,47 +21,52 @@
 
 #include <grpc/support/port_platform.h>
 
-#include <stdbool.h>
+#include <grpc/support/atm.h>
 
-#include <grpc/impl/codegen/grpc_types.h>
+#include "src/core/lib/gprpp/inlined_vector.h"
+#include "src/core/lib/gprpp/memory.h"
+#include "src/core/lib/gprpp/ref_counted.h"
 
-typedef struct grpc_grpclb_client_stats grpc_grpclb_client_stats;
+namespace grpc_core {
 
-typedef struct {
-  char* token;
-  int64_t count;
-} grpc_grpclb_drop_token_count;
+class GrpcLbClientStats : public RefCounted<GrpcLbClientStats> {
+ public:
+  struct DropTokenCount {
+    UniquePtr<char> token;
+    int64_t count;
 
-typedef struct {
-  grpc_grpclb_drop_token_count* token_counts;
-  size_t num_entries;
-} grpc_grpclb_dropped_call_counts;
+    DropTokenCount(UniquePtr<char> token, int64_t count)
+        : token(std::move(token)), count(count) {}
+  };
 
-grpc_grpclb_client_stats* grpc_grpclb_client_stats_create();
-grpc_grpclb_client_stats* grpc_grpclb_client_stats_ref(
-    grpc_grpclb_client_stats* client_stats);
-void grpc_grpclb_client_stats_unref(grpc_grpclb_client_stats* client_stats);
+  typedef InlinedVector<DropTokenCount, 10> DroppedCallCounts;
 
-void grpc_grpclb_client_stats_add_call_started(
-    grpc_grpclb_client_stats* client_stats);
-void grpc_grpclb_client_stats_add_call_finished(
-    bool finished_with_client_failed_to_send, bool finished_known_received,
-    grpc_grpclb_client_stats* client_stats);
+  GrpcLbClientStats() {}
 
-// This method is not thread-safe; caller must synchronize.
-void grpc_grpclb_client_stats_add_call_dropped_locked(
-    char* token, grpc_grpclb_client_stats* client_stats);
+  void AddCallStarted();
+  void AddCallFinished(bool finished_with_client_failed_to_send,
+                       bool finished_known_received);
 
-// This method is not thread-safe; caller must synchronize.
-void grpc_grpclb_client_stats_get_locked(
-    grpc_grpclb_client_stats* client_stats, int64_t* num_calls_started,
-    int64_t* num_calls_finished,
-    int64_t* num_calls_finished_with_client_failed_to_send,
-    int64_t* num_calls_finished_known_received,
-    grpc_grpclb_dropped_call_counts** drop_token_counts);
+  // This method is not thread-safe; caller must synchronize.
+  void AddCallDroppedLocked(char* token);
 
-void grpc_grpclb_dropped_call_counts_destroy(
-    grpc_grpclb_dropped_call_counts* drop_entries);
+  // This method is not thread-safe; caller must synchronize.
+  void GetLocked(int64_t* num_calls_started, int64_t* num_calls_finished,
+                 int64_t* num_calls_finished_with_client_failed_to_send,
+                 int64_t* num_calls_finished_known_received,
+                 UniquePtr<DroppedCallCounts>* drop_token_counts);
+
+ private:
+  // This field must only be accessed via *_locked() methods.
+  UniquePtr<DroppedCallCounts> drop_token_counts_;
+  // These fields may be accessed from multiple threads at a time.
+  gpr_atm num_calls_started_ = 0;
+  gpr_atm num_calls_finished_ = 0;
+  gpr_atm num_calls_finished_with_client_failed_to_send_ = 0;
+  gpr_atm num_calls_finished_known_received_ = 0;
+};
+
+}  // namespace grpc_core
 
 #endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_GRPCLB_GRPCLB_CLIENT_STATS_H \
         */
diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc b/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc
index 0e80dd5521..f24281a5bf 100644
--- a/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc
+++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc
@@ -18,9 +18,9 @@
 
 #include <grpc/support/port_platform.h>
 
+#include "pb_decode.h"
+#include "pb_encode.h"
 #include "src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h"
-#include "third_party/nanopb/pb_decode.h"
-#include "third_party/nanopb/pb_encode.h"
 
 #include <grpc/support/alloc.h>
 
@@ -73,7 +73,7 @@ grpc_grpclb_request* grpc_grpclb_request_create(const char* lb_service_name) {
 }
 
 static void populate_timestamp(gpr_timespec timestamp,
-                               struct _grpc_lb_v1_Timestamp* timestamp_pb) {
+                               grpc_grpclb_timestamp* timestamp_pb) {
   timestamp_pb->has_seconds = true;
   timestamp_pb->seconds = timestamp.tv_sec;
   timestamp_pb->has_nanos = true;
@@ -89,16 +89,16 @@ static bool encode_string(pb_ostream_t* stream, const pb_field_t* field,
 
 static bool encode_drops(pb_ostream_t* stream, const pb_field_t* field,
                          void* const* arg) {
-  grpc_grpclb_dropped_call_counts* drop_entries =
-      static_cast<grpc_grpclb_dropped_call_counts*>(*arg);
+  grpc_core::GrpcLbClientStats::DroppedCallCounts* drop_entries =
+      static_cast<grpc_core::GrpcLbClientStats::DroppedCallCounts*>(*arg);
   if (drop_entries == nullptr) return true;
-  for (size_t i = 0; i < drop_entries->num_entries; ++i) {
+  for (size_t i = 0; i < drop_entries->size(); ++i) {
     if (!pb_encode_tag_for_field(stream, field)) return false;
     grpc_lb_v1_ClientStatsPerToken drop_message;
     drop_message.load_balance_token.funcs.encode = encode_string;
-    drop_message.load_balance_token.arg = drop_entries->token_counts[i].token;
+    drop_message.load_balance_token.arg = (*drop_entries)[i].token.get();
     drop_message.has_num_calls = true;
-    drop_message.num_calls = drop_entries->token_counts[i].count;
+    drop_message.num_calls = (*drop_entries)[i].count;
     if (!pb_encode_submessage(stream, grpc_lb_v1_ClientStatsPerToken_fields,
                               &drop_message)) {
       return false;
@@ -108,7 +108,7 @@ static bool encode_drops(pb_ostream_t* stream, const pb_field_t* field,
 }
 
 grpc_grpclb_request* grpc_grpclb_load_report_request_create_locked(
-    grpc_grpclb_client_stats* client_stats) {
+    grpc_core::GrpcLbClientStats* client_stats) {
   grpc_grpclb_request* req = static_cast<grpc_grpclb_request*>(
       gpr_zalloc(sizeof(grpc_grpclb_request)));
   req->has_client_stats = true;
@@ -120,13 +120,15 @@ grpc_grpclb_request* grpc_grpclb_load_report_request_create_locked(
   req->client_stats.has_num_calls_finished_with_client_failed_to_send = true;
   req->client_stats.has_num_calls_finished_known_received = true;
   req->client_stats.calls_finished_with_drop.funcs.encode = encode_drops;
-  grpc_grpclb_client_stats_get_locked(
-      client_stats, &req->client_stats.num_calls_started,
+  grpc_core::UniquePtr<grpc_core::GrpcLbClientStats::DroppedCallCounts>
+      drop_counts;
+  client_stats->GetLocked(
+      &req->client_stats.num_calls_started,
       &req->client_stats.num_calls_finished,
       &req->client_stats.num_calls_finished_with_client_failed_to_send,
-      &req->client_stats.num_calls_finished_known_received,
-      reinterpret_cast<grpc_grpclb_dropped_call_counts**>(
-          &req->client_stats.calls_finished_with_drop.arg));
+      &req->client_stats.num_calls_finished_known_received, &drop_counts);
+  // Will be deleted in grpc_grpclb_request_destroy().
+  req->client_stats.calls_finished_with_drop.arg = drop_counts.release();
   return req;
 }
 
@@ -149,10 +151,10 @@ grpc_slice grpc_grpclb_request_encode(const grpc_grpclb_request* request) {
 
 void grpc_grpclb_request_destroy(grpc_grpclb_request* request) {
   if (request->has_client_stats) {
-    grpc_grpclb_dropped_call_counts* drop_entries =
-        static_cast<grpc_grpclb_dropped_call_counts*>(
+    grpc_core::GrpcLbClientStats::DroppedCallCounts* drop_entries =
+        static_cast<grpc_core::GrpcLbClientStats::DroppedCallCounts*>(
             request->client_stats.calls_finished_with_drop.arg);
-    grpc_grpclb_dropped_call_counts_destroy(drop_entries);
+    grpc_core::Delete(drop_entries);
   }
   gpr_free(request);
 }
diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h b/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h
index d4270f2536..9ca7b28d8e 100644
--- a/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h
+++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h
@@ -33,7 +33,9 @@ typedef grpc_lb_v1_Server_ip_address_t grpc_grpclb_ip_address;
 typedef grpc_lb_v1_LoadBalanceRequest grpc_grpclb_request;
 typedef grpc_lb_v1_InitialLoadBalanceResponse grpc_grpclb_initial_response;
 typedef grpc_lb_v1_Server grpc_grpclb_server;
-typedef grpc_lb_v1_Duration grpc_grpclb_duration;
+typedef google_protobuf_Duration grpc_grpclb_duration;
+typedef google_protobuf_Timestamp grpc_grpclb_timestamp;
+
 typedef struct {
   grpc_grpclb_server** servers;
   size_t num_servers;
@@ -42,7 +44,7 @@ typedef struct {
 /** Create a request for a gRPC LB service under \a lb_service_name */
 grpc_grpclb_request* grpc_grpclb_request_create(const char* lb_service_name);
 grpc_grpclb_request* grpc_grpclb_load_report_request_create_locked(
-    grpc_grpclb_client_stats* client_stats);
+    grpc_core::GrpcLbClientStats* client_stats);
 
 /** Protocol Buffers v3-encode \a request */
 grpc_slice grpc_grpclb_request_encode(const grpc_grpclb_request* request);
diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/google/protobuf/duration.pb.c b/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/google/protobuf/duration.pb.c
new file mode 100644
index 0000000000..131d9b7cae
--- /dev/null
+++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/google/protobuf/duration.pb.c
@@ -0,0 +1,19 @@
+/* Automatically generated nanopb constant definitions */
+/* Generated by nanopb-0.3.7-dev */
+
+#include "src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/google/protobuf/duration.pb.h"
+/* @@protoc_insertion_point(includes) */
+#if PB_PROTO_HEADER_VERSION != 30
+#error Regenerate this file with the current version of nanopb generator.
+#endif
+
+
+
+const pb_field_t google_protobuf_Duration_fields[3] = {
+    PB_FIELD(  1, INT64   , OPTIONAL, STATIC  , FIRST, google_protobuf_Duration, seconds, seconds, 0),
+    PB_FIELD(  2, INT32   , OPTIONAL, STATIC  , OTHER, google_protobuf_Duration, nanos, seconds, 0),
+    PB_LAST_FIELD
+};
+
+
+/* @@protoc_insertion_point(eof) */
diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/google/protobuf/duration.pb.h b/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/google/protobuf/duration.pb.h
new file mode 100644
index 0000000000..93070c65b8
--- /dev/null
+++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/google/protobuf/duration.pb.h
@@ -0,0 +1,54 @@
+/* Automatically generated nanopb header */
+/* Generated by nanopb-0.3.7-dev */
+
+#ifndef PB_GOOGLE_PROTOBUF_DURATION_PB_H_INCLUDED
+#define PB_GOOGLE_PROTOBUF_DURATION_PB_H_INCLUDED
+#include "pb.h"
+/* @@protoc_insertion_point(includes) */
+#if PB_PROTO_HEADER_VERSION != 30
+#error Regenerate this file with the current version of nanopb generator.
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Struct definitions */
+typedef struct _google_protobuf_Duration {
+    bool has_seconds;
+    int64_t seconds;
+    bool has_nanos;
+    int32_t nanos;
+/* @@protoc_insertion_point(struct:google_protobuf_Duration) */
+} google_protobuf_Duration;
+
+/* Default values for struct fields */
+
+/* Initializer values for message structs */
+#define google_protobuf_Duration_init_default    {false, 0, false, 0}
+#define google_protobuf_Duration_init_zero       {false, 0, false, 0}
+
+/* Field tags (for use in manual encoding/decoding) */
+#define google_protobuf_Duration_seconds_tag     1
+#define google_protobuf_Duration_nanos_tag       2
+
+/* Struct field encoding specification for nanopb */
+extern const pb_field_t google_protobuf_Duration_fields[3];
+
+/* Maximum encoded size of messages (where known) */
+#define google_protobuf_Duration_size            22
+
+/* Message IDs (where set with "msgid" option) */
+#ifdef PB_MSGID
+
+#define DURATION_MESSAGES \
+
+
+#endif
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+/* @@protoc_insertion_point(eof) */
+
+#endif
diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/google/protobuf/timestamp.pb.c b/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/google/protobuf/timestamp.pb.c
new file mode 100644
index 0000000000..b6f8ffc559
--- /dev/null
+++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/google/protobuf/timestamp.pb.c
@@ -0,0 +1,19 @@
+/* Automatically generated nanopb constant definitions */
+/* Generated by nanopb-0.3.7-dev */
+
+#include "src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/google/protobuf/timestamp.pb.h"
+/* @@protoc_insertion_point(includes) */
+#if PB_PROTO_HEADER_VERSION != 30
+#error Regenerate this file with the current version of nanopb generator.
+#endif
+
+
+
+const pb_field_t google_protobuf_Timestamp_fields[3] = {
+    PB_FIELD(  1, INT64   , OPTIONAL, STATIC  , FIRST, google_protobuf_Timestamp, seconds, seconds, 0),
+    PB_FIELD(  2, INT32   , OPTIONAL, STATIC  , OTHER, google_protobuf_Timestamp, nanos, seconds, 0),
+    PB_LAST_FIELD
+};
+
+
+/* @@protoc_insertion_point(eof) */
diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/google/protobuf/timestamp.pb.h b/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/google/protobuf/timestamp.pb.h
new file mode 100644
index 0000000000..7f48481524
--- /dev/null
+++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/google/protobuf/timestamp.pb.h
@@ -0,0 +1,54 @@
+/* Automatically generated nanopb header */
+/* Generated by nanopb-0.3.7-dev */
+
+#ifndef PB_GOOGLE_PROTOBUF_TIMESTAMP_PB_H_INCLUDED
+#define PB_GOOGLE_PROTOBUF_TIMESTAMP_PB_H_INCLUDED
+#include "pb.h"
+/* @@protoc_insertion_point(includes) */
+#if PB_PROTO_HEADER_VERSION != 30
+#error Regenerate this file with the current version of nanopb generator.
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Struct definitions */
+typedef struct _google_protobuf_Timestamp {
+    bool has_seconds;
+    int64_t seconds;
+    bool has_nanos;
+    int32_t nanos;
+/* @@protoc_insertion_point(struct:google_protobuf_Timestamp) */
+} google_protobuf_Timestamp;
+
+/* Default values for struct fields */
+
+/* Initializer values for message structs */
+#define google_protobuf_Timestamp_init_default   {false, 0, false, 0}
+#define google_protobuf_Timestamp_init_zero      {false, 0, false, 0}
+
+/* Field tags (for use in manual encoding/decoding) */
+#define google_protobuf_Timestamp_seconds_tag    1
+#define google_protobuf_Timestamp_nanos_tag      2
+
+/* Struct field encoding specification for nanopb */
+extern const pb_field_t google_protobuf_Timestamp_fields[3];
+
+/* Maximum encoded size of messages (where known) */
+#define google_protobuf_Timestamp_size           22
+
+/* Message IDs (where set with "msgid" option) */
+#ifdef PB_MSGID
+
+#define TIMESTAMP_MESSAGES \
+
+
+#endif
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+/* @@protoc_insertion_point(eof) */
+
+#endif
diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.c b/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.c
index 4e6c5cc832..f6538e1349 100644
--- a/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.c
+++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.c
@@ -2,7 +2,6 @@
 /* Generated by nanopb-0.3.7-dev */
 
 #include "src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.h"
-
 /* @@protoc_insertion_point(includes) */
 #if PB_PROTO_HEADER_VERSION != 30
 #error Regenerate this file with the current version of nanopb generator.
@@ -10,18 +9,6 @@
 
 
 
-const pb_field_t grpc_lb_v1_Duration_fields[3] = {
-    PB_FIELD(  1, INT64   , OPTIONAL, STATIC  , FIRST, grpc_lb_v1_Duration, seconds, seconds, 0),
-    PB_FIELD(  2, INT32   , OPTIONAL, STATIC  , OTHER, grpc_lb_v1_Duration, nanos, seconds, 0),
-    PB_LAST_FIELD
-};
-
-const pb_field_t grpc_lb_v1_Timestamp_fields[3] = {
-    PB_FIELD(  1, INT64   , OPTIONAL, STATIC  , FIRST, grpc_lb_v1_Timestamp, seconds, seconds, 0),
-    PB_FIELD(  2, INT32   , OPTIONAL, STATIC  , OTHER, grpc_lb_v1_Timestamp, nanos, seconds, 0),
-    PB_LAST_FIELD
-};
-
 const pb_field_t grpc_lb_v1_LoadBalanceRequest_fields[3] = {
     PB_FIELD(  1, MESSAGE , OPTIONAL, STATIC  , FIRST, grpc_lb_v1_LoadBalanceRequest, initial_request, initial_request, &grpc_lb_v1_InitialLoadBalanceRequest_fields),
     PB_FIELD(  2, MESSAGE , OPTIONAL, STATIC  , OTHER, grpc_lb_v1_LoadBalanceRequest, client_stats, initial_request, &grpc_lb_v1_ClientStats_fields),
@@ -40,7 +27,7 @@ const pb_field_t grpc_lb_v1_ClientStatsPerToken_fields[3] = {
 };
 
 const pb_field_t grpc_lb_v1_ClientStats_fields[7] = {
-    PB_FIELD(  1, MESSAGE , OPTIONAL, STATIC  , FIRST, grpc_lb_v1_ClientStats, timestamp, timestamp, &grpc_lb_v1_Timestamp_fields),
+    PB_FIELD(  1, MESSAGE , OPTIONAL, STATIC  , FIRST, grpc_lb_v1_ClientStats, timestamp, timestamp, &google_protobuf_Timestamp_fields),
     PB_FIELD(  2, INT64   , OPTIONAL, STATIC  , OTHER, grpc_lb_v1_ClientStats, num_calls_started, timestamp, 0),
     PB_FIELD(  3, INT64   , OPTIONAL, STATIC  , OTHER, grpc_lb_v1_ClientStats, num_calls_finished, num_calls_started, 0),
     PB_FIELD(  6, INT64   , OPTIONAL, STATIC  , OTHER, grpc_lb_v1_ClientStats, num_calls_finished_with_client_failed_to_send, num_calls_finished, 0),
@@ -57,7 +44,7 @@ const pb_field_t grpc_lb_v1_LoadBalanceResponse_fields[3] = {
 
 const pb_field_t grpc_lb_v1_InitialLoadBalanceResponse_fields[3] = {
     PB_FIELD(  1, STRING  , OPTIONAL, STATIC  , FIRST, grpc_lb_v1_InitialLoadBalanceResponse, load_balancer_delegate, load_balancer_delegate, 0),
-    PB_FIELD(  2, MESSAGE , OPTIONAL, STATIC  , OTHER, grpc_lb_v1_InitialLoadBalanceResponse, client_stats_report_interval, load_balancer_delegate, &grpc_lb_v1_Duration_fields),
+    PB_FIELD(  2, MESSAGE , OPTIONAL, STATIC  , OTHER, grpc_lb_v1_InitialLoadBalanceResponse, client_stats_report_interval, load_balancer_delegate, &google_protobuf_Duration_fields),
     PB_LAST_FIELD
 };
 
@@ -84,7 +71,7 @@ const pb_field_t grpc_lb_v1_Server_fields[5] = {
  * numbers or field sizes that are larger than what can fit in 8 or 16 bit
  * field descriptors.
  */
-PB_STATIC_ASSERT((pb_membersize(grpc_lb_v1_LoadBalanceRequest, initial_request) < 65536 && pb_membersize(grpc_lb_v1_LoadBalanceRequest, client_stats) < 65536 && pb_membersize(grpc_lb_v1_ClientStats, timestamp) < 65536 && pb_membersize(grpc_lb_v1_ClientStats, calls_finished_with_drop) < 65536 && pb_membersize(grpc_lb_v1_LoadBalanceResponse, initial_response) < 65536 && pb_membersize(grpc_lb_v1_LoadBalanceResponse, server_list) < 65536 && pb_membersize(grpc_lb_v1_InitialLoadBalanceResponse, client_stats_report_interval) < 65536 && pb_membersize(grpc_lb_v1_ServerList, servers) < 65536), YOU_MUST_DEFINE_PB_FIELD_32BIT_FOR_MESSAGES_grpc_lb_v1_Duration_grpc_lb_v1_Timestamp_grpc_lb_v1_LoadBalanceRequest_grpc_lb_v1_InitialLoadBalanceRequest_grpc_lb_v1_ClientStatsPerToken_grpc_lb_v1_ClientStats_grpc_lb_v1_LoadBalanceResponse_grpc_lb_v1_InitialLoadBalanceResponse_grpc_lb_v1_ServerList_grpc_lb_v1_Server)
+PB_STATIC_ASSERT((pb_membersize(grpc_lb_v1_LoadBalanceRequest, initial_request) < 65536 && pb_membersize(grpc_lb_v1_LoadBalanceRequest, client_stats) < 65536 && pb_membersize(grpc_lb_v1_ClientStats, timestamp) < 65536 && pb_membersize(grpc_lb_v1_ClientStats, calls_finished_with_drop) < 65536 && pb_membersize(grpc_lb_v1_LoadBalanceResponse, initial_response) < 65536 && pb_membersize(grpc_lb_v1_LoadBalanceResponse, server_list) < 65536 && pb_membersize(grpc_lb_v1_InitialLoadBalanceResponse, client_stats_report_interval) < 65536 && pb_membersize(grpc_lb_v1_ServerList, servers) < 65536), YOU_MUST_DEFINE_PB_FIELD_32BIT_FOR_MESSAGES_grpc_lb_v1_LoadBalanceRequest_grpc_lb_v1_InitialLoadBalanceRequest_grpc_lb_v1_ClientStatsPerToken_grpc_lb_v1_ClientStats_grpc_lb_v1_LoadBalanceResponse_grpc_lb_v1_InitialLoadBalanceResponse_grpc_lb_v1_ServerList_grpc_lb_v1_Server)
 #endif
 
 #if !defined(PB_FIELD_16BIT) && !defined(PB_FIELD_32BIT)
@@ -95,7 +82,7 @@ PB_STATIC_ASSERT((pb_membersize(grpc_lb_v1_LoadBalanceRequest, initial_request)
  * numbers or field sizes that are larger than what can fit in the default
  * 8 bit descriptors.
  */
-PB_STATIC_ASSERT((pb_membersize(grpc_lb_v1_LoadBalanceRequest, initial_request) < 256 && pb_membersize(grpc_lb_v1_LoadBalanceRequest, client_stats) < 256 && pb_membersize(grpc_lb_v1_ClientStats, timestamp) < 256 && pb_membersize(grpc_lb_v1_ClientStats, calls_finished_with_drop) < 256 && pb_membersize(grpc_lb_v1_LoadBalanceResponse, initial_response) < 256 && pb_membersize(grpc_lb_v1_LoadBalanceResponse, server_list) < 256 && pb_membersize(grpc_lb_v1_InitialLoadBalanceResponse, client_stats_report_interval) < 256 && pb_membersize(grpc_lb_v1_ServerList, servers) < 256), YOU_MUST_DEFINE_PB_FIELD_16BIT_FOR_MESSAGES_grpc_lb_v1_Duration_grpc_lb_v1_Timestamp_grpc_lb_v1_LoadBalanceRequest_grpc_lb_v1_InitialLoadBalanceRequest_grpc_lb_v1_ClientStatsPerToken_grpc_lb_v1_ClientStats_grpc_lb_v1_LoadBalanceResponse_grpc_lb_v1_InitialLoadBalanceResponse_grpc_lb_v1_ServerList_grpc_lb_v1_Server)
+PB_STATIC_ASSERT((pb_membersize(grpc_lb_v1_LoadBalanceRequest, initial_request) < 256 && pb_membersize(grpc_lb_v1_LoadBalanceRequest, client_stats) < 256 && pb_membersize(grpc_lb_v1_ClientStats, timestamp) < 256 && pb_membersize(grpc_lb_v1_ClientStats, calls_finished_with_drop) < 256 && pb_membersize(grpc_lb_v1_LoadBalanceResponse, initial_response) < 256 && pb_membersize(grpc_lb_v1_LoadBalanceResponse, server_list) < 256 && pb_membersize(grpc_lb_v1_InitialLoadBalanceResponse, client_stats_report_interval) < 256 && pb_membersize(grpc_lb_v1_ServerList, servers) < 256), YOU_MUST_DEFINE_PB_FIELD_16BIT_FOR_MESSAGES_grpc_lb_v1_LoadBalanceRequest_grpc_lb_v1_InitialLoadBalanceRequest_grpc_lb_v1_ClientStatsPerToken_grpc_lb_v1_ClientStats_grpc_lb_v1_LoadBalanceResponse_grpc_lb_v1_InitialLoadBalanceResponse_grpc_lb_v1_ServerList_grpc_lb_v1_Server)
 #endif
 
 
diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.h b/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.h
index 066c076202..a4ff516d8e 100644
--- a/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.h
+++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.h
@@ -3,7 +3,9 @@
 
 #ifndef PB_GRPC_LB_V1_LOAD_BALANCER_PB_H_INCLUDED
 #define PB_GRPC_LB_V1_LOAD_BALANCER_PB_H_INCLUDED
-#include "third_party/nanopb/pb.h"
+#include "pb.h"
+#include "src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/google/protobuf/duration.pb.h"
+#include "src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/google/protobuf/timestamp.pb.h"
 /* @@protoc_insertion_point(includes) */
 #if PB_PROTO_HEADER_VERSION != 30
 #error Regenerate this file with the current version of nanopb generator.
@@ -19,6 +21,21 @@ typedef struct _grpc_lb_v1_ServerList {
 /* @@protoc_insertion_point(struct:grpc_lb_v1_ServerList) */
 } grpc_lb_v1_ServerList;
 
+typedef struct _grpc_lb_v1_ClientStats {
+    bool has_timestamp;
+    google_protobuf_Timestamp timestamp;
+    bool has_num_calls_started;
+    int64_t num_calls_started;
+    bool has_num_calls_finished;
+    int64_t num_calls_finished;
+    bool has_num_calls_finished_with_client_failed_to_send;
+    int64_t num_calls_finished_with_client_failed_to_send;
+    bool has_num_calls_finished_known_received;
+    int64_t num_calls_finished_known_received;
+    pb_callback_t calls_finished_with_drop;
+/* @@protoc_insertion_point(struct:grpc_lb_v1_ClientStats) */
+} grpc_lb_v1_ClientStats;
+
 typedef struct _grpc_lb_v1_ClientStatsPerToken {
     pb_callback_t load_balance_token;
     bool has_num_calls;
@@ -26,20 +43,20 @@ typedef struct _grpc_lb_v1_ClientStatsPerToken {
 /* @@protoc_insertion_point(struct:grpc_lb_v1_ClientStatsPerToken) */
 } grpc_lb_v1_ClientStatsPerToken;
 
-typedef struct _grpc_lb_v1_Duration {
-    bool has_seconds;
-    int64_t seconds;
-    bool has_nanos;
-    int32_t nanos;
-/* @@protoc_insertion_point(struct:grpc_lb_v1_Duration) */
-} grpc_lb_v1_Duration;
-
 typedef struct _grpc_lb_v1_InitialLoadBalanceRequest {
     bool has_name;
     char name[128];
 /* @@protoc_insertion_point(struct:grpc_lb_v1_InitialLoadBalanceRequest) */
 } grpc_lb_v1_InitialLoadBalanceRequest;
 
+typedef struct _grpc_lb_v1_InitialLoadBalanceResponse {
+    bool has_load_balancer_delegate;
+    char load_balancer_delegate[64];
+    bool has_client_stats_report_interval;
+    google_protobuf_Duration client_stats_report_interval;
+/* @@protoc_insertion_point(struct:grpc_lb_v1_InitialLoadBalanceResponse) */
+} grpc_lb_v1_InitialLoadBalanceResponse;
+
 typedef PB_BYTES_ARRAY_T(16) grpc_lb_v1_Server_ip_address_t;
 typedef struct _grpc_lb_v1_Server {
     bool has_ip_address;
@@ -53,37 +70,6 @@ typedef struct _grpc_lb_v1_Server {
 /* @@protoc_insertion_point(struct:grpc_lb_v1_Server) */
 } grpc_lb_v1_Server;
 
-typedef struct _grpc_lb_v1_Timestamp {
-    bool has_seconds;
-    int64_t seconds;
-    bool has_nanos;
-    int32_t nanos;
-/* @@protoc_insertion_point(struct:grpc_lb_v1_Timestamp) */
-} grpc_lb_v1_Timestamp;
-
-typedef struct _grpc_lb_v1_ClientStats {
-    bool has_timestamp;
-    grpc_lb_v1_Timestamp timestamp;
-    bool has_num_calls_started;
-    int64_t num_calls_started;
-    bool has_num_calls_finished;
-    int64_t num_calls_finished;
-    bool has_num_calls_finished_with_client_failed_to_send;
-    int64_t num_calls_finished_with_client_failed_to_send;
-    bool has_num_calls_finished_known_received;
-    int64_t num_calls_finished_known_received;
-    pb_callback_t calls_finished_with_drop;
-/* @@protoc_insertion_point(struct:grpc_lb_v1_ClientStats) */
-} grpc_lb_v1_ClientStats;
-
-typedef struct _grpc_lb_v1_InitialLoadBalanceResponse {
-    bool has_load_balancer_delegate;
-    char load_balancer_delegate[64];
-    bool has_client_stats_report_interval;
-    grpc_lb_v1_Duration client_stats_report_interval;
-/* @@protoc_insertion_point(struct:grpc_lb_v1_InitialLoadBalanceResponse) */
-} grpc_lb_v1_InitialLoadBalanceResponse;
-
 typedef struct _grpc_lb_v1_LoadBalanceRequest {
     bool has_initial_request;
     grpc_lb_v1_InitialLoadBalanceRequest initial_request;
@@ -103,56 +89,46 @@ typedef struct _grpc_lb_v1_LoadBalanceResponse {
 /* Default values for struct fields */
 
 /* Initializer values for message structs */
-#define grpc_lb_v1_Duration_init_default         {false, 0, false, 0}
-#define grpc_lb_v1_Timestamp_init_default        {false, 0, false, 0}
 #define grpc_lb_v1_LoadBalanceRequest_init_default {false, grpc_lb_v1_InitialLoadBalanceRequest_init_default, false, grpc_lb_v1_ClientStats_init_default}
 #define grpc_lb_v1_InitialLoadBalanceRequest_init_default {false, ""}
 #define grpc_lb_v1_ClientStatsPerToken_init_default {{{NULL}, NULL}, false, 0}
-#define grpc_lb_v1_ClientStats_init_default      {false, grpc_lb_v1_Timestamp_init_default, false, 0, false, 0, false, 0, false, 0, {{NULL}, NULL}}
+#define grpc_lb_v1_ClientStats_init_default      {false, google_protobuf_Timestamp_init_default, false, 0, false, 0, false, 0, false, 0, {{NULL}, NULL}}
 #define grpc_lb_v1_LoadBalanceResponse_init_default {false, grpc_lb_v1_InitialLoadBalanceResponse_init_default, false, grpc_lb_v1_ServerList_init_default}
-#define grpc_lb_v1_InitialLoadBalanceResponse_init_default {false, "", false, grpc_lb_v1_Duration_init_default}
+#define grpc_lb_v1_InitialLoadBalanceResponse_init_default {false, "", false, google_protobuf_Duration_init_default}
 #define grpc_lb_v1_ServerList_init_default       {{{NULL}, NULL}}
 #define grpc_lb_v1_Server_init_default           {false, {0, {0}}, false, 0, false, "", false, 0}
-#define grpc_lb_v1_Duration_init_zero            {false, 0, false, 0}
-#define grpc_lb_v1_Timestamp_init_zero           {false, 0, false, 0}
 #define grpc_lb_v1_LoadBalanceRequest_init_zero  {false, grpc_lb_v1_InitialLoadBalanceRequest_init_zero, false, grpc_lb_v1_ClientStats_init_zero}
 #define grpc_lb_v1_InitialLoadBalanceRequest_init_zero {false, ""}
 #define grpc_lb_v1_ClientStatsPerToken_init_zero {{{NULL}, NULL}, false, 0}
-#define grpc_lb_v1_ClientStats_init_zero         {false, grpc_lb_v1_Timestamp_init_zero, false, 0, false, 0, false, 0, false, 0, {{NULL}, NULL}}
+#define grpc_lb_v1_ClientStats_init_zero         {false, google_protobuf_Timestamp_init_zero, false, 0, false, 0, false, 0, false, 0, {{NULL}, NULL}}
 #define grpc_lb_v1_LoadBalanceResponse_init_zero {false, grpc_lb_v1_InitialLoadBalanceResponse_init_zero, false, grpc_lb_v1_ServerList_init_zero}
-#define grpc_lb_v1_InitialLoadBalanceResponse_init_zero {false, "", false, grpc_lb_v1_Duration_init_zero}
+#define grpc_lb_v1_InitialLoadBalanceResponse_init_zero {false, "", false, google_protobuf_Duration_init_zero}
 #define grpc_lb_v1_ServerList_init_zero          {{{NULL}, NULL}}
 #define grpc_lb_v1_Server_init_zero              {false, {0, {0}}, false, 0, false, "", false, 0}
 
 /* Field tags (for use in manual encoding/decoding) */
 #define grpc_lb_v1_ServerList_servers_tag        1
-#define grpc_lb_v1_ClientStatsPerToken_load_balance_token_tag 1
-#define grpc_lb_v1_ClientStatsPerToken_num_calls_tag 2
-#define grpc_lb_v1_Duration_seconds_tag          1
-#define grpc_lb_v1_Duration_nanos_tag            2
-#define grpc_lb_v1_InitialLoadBalanceRequest_name_tag 1
-#define grpc_lb_v1_Server_ip_address_tag         1
-#define grpc_lb_v1_Server_port_tag               2
-#define grpc_lb_v1_Server_load_balance_token_tag 3
-#define grpc_lb_v1_Server_drop_tag               4
-#define grpc_lb_v1_Timestamp_seconds_tag         1
-#define grpc_lb_v1_Timestamp_nanos_tag           2
 #define grpc_lb_v1_ClientStats_timestamp_tag     1
 #define grpc_lb_v1_ClientStats_num_calls_started_tag 2
 #define grpc_lb_v1_ClientStats_num_calls_finished_tag 3
 #define grpc_lb_v1_ClientStats_num_calls_finished_with_client_failed_to_send_tag 6
 #define grpc_lb_v1_ClientStats_num_calls_finished_known_received_tag 7
 #define grpc_lb_v1_ClientStats_calls_finished_with_drop_tag 8
+#define grpc_lb_v1_ClientStatsPerToken_load_balance_token_tag 1
+#define grpc_lb_v1_ClientStatsPerToken_num_calls_tag 2
+#define grpc_lb_v1_InitialLoadBalanceRequest_name_tag 1
 #define grpc_lb_v1_InitialLoadBalanceResponse_load_balancer_delegate_tag 1
 #define grpc_lb_v1_InitialLoadBalanceResponse_client_stats_report_interval_tag 2
+#define grpc_lb_v1_Server_ip_address_tag         1
+#define grpc_lb_v1_Server_port_tag               2
+#define grpc_lb_v1_Server_load_balance_token_tag 3
+#define grpc_lb_v1_Server_drop_tag               4
 #define grpc_lb_v1_LoadBalanceRequest_initial_request_tag 1
 #define grpc_lb_v1_LoadBalanceRequest_client_stats_tag 2
 #define grpc_lb_v1_LoadBalanceResponse_initial_response_tag 1
 #define grpc_lb_v1_LoadBalanceResponse_server_list_tag 2
 
 /* Struct field encoding specification for nanopb */
-extern const pb_field_t grpc_lb_v1_Duration_fields[3];
-extern const pb_field_t grpc_lb_v1_Timestamp_fields[3];
 extern const pb_field_t grpc_lb_v1_LoadBalanceRequest_fields[3];
 extern const pb_field_t grpc_lb_v1_InitialLoadBalanceRequest_fields[2];
 extern const pb_field_t grpc_lb_v1_ClientStatsPerToken_fields[3];
@@ -163,8 +139,6 @@ extern const pb_field_t grpc_lb_v1_ServerList_fields[2];
 extern const pb_field_t grpc_lb_v1_Server_fields[5];
 
 /* Maximum encoded size of messages (where known) */
-#define grpc_lb_v1_Duration_size                 22
-#define grpc_lb_v1_Timestamp_size                22
 #define grpc_lb_v1_LoadBalanceRequest_size       (140 + grpc_lb_v1_ClientStats_size)
 #define grpc_lb_v1_InitialLoadBalanceRequest_size 131
 /* grpc_lb_v1_ClientStatsPerToken_size depends on runtime parameters */
diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc
index 76df976698..ed8cc60ea1 100644
--- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc
+++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc
@@ -27,6 +27,7 @@
 #include "src/core/ext/filters/client_channel/subchannel.h"
 #include "src/core/ext/filters/client_channel/subchannel_index.h"
 #include "src/core/lib/channel/channel_args.h"
+#include "src/core/lib/gprpp/mutex_lock.h"
 #include "src/core/lib/iomgr/combiner.h"
 #include "src/core/lib/iomgr/sockaddr_utils.h"
 #include "src/core/lib/transport/connectivity_state.h"
@@ -46,7 +47,7 @@ class PickFirst : public LoadBalancingPolicy {
   explicit PickFirst(const Args& args);
 
   void UpdateLocked(const grpc_channel_args& args) override;
-  bool PickLocked(PickState* pick) override;
+  bool PickLocked(PickState* pick, grpc_error** error) override;
   void CancelPickLocked(PickState* pick, grpc_error* error) override;
   void CancelMatchingPicksLocked(uint32_t initial_metadata_flags_mask,
                                  uint32_t initial_metadata_flags_eq,
@@ -56,8 +57,10 @@ class PickFirst : public LoadBalancingPolicy {
   grpc_connectivity_state CheckConnectivityLocked(
       grpc_error** connectivity_error) override;
   void HandOffPendingPicksLocked(LoadBalancingPolicy* new_policy) override;
-  void PingOneLocked(grpc_closure* on_initiate, grpc_closure* on_ack) override;
   void ExitIdleLocked() override;
+  void ResetBackoffLocked() override;
+  void FillChildRefsForChannelz(ChildRefsList* child_subchannels,
+                                ChildRefsList* ignored) override;
 
  private:
   ~PickFirst();
@@ -68,16 +71,22 @@ class PickFirst : public LoadBalancingPolicy {
       : public SubchannelData<PickFirstSubchannelList,
                               PickFirstSubchannelData> {
    public:
-    PickFirstSubchannelData(PickFirstSubchannelList* subchannel_list,
-                            const grpc_lb_user_data_vtable* user_data_vtable,
-                            const grpc_lb_address& address,
-                            grpc_subchannel* subchannel,
-                            grpc_combiner* combiner)
+    PickFirstSubchannelData(
+        SubchannelList<PickFirstSubchannelList, PickFirstSubchannelData>*
+            subchannel_list,
+        const grpc_lb_user_data_vtable* user_data_vtable,
+        const grpc_lb_address& address, grpc_subchannel* subchannel,
+        grpc_combiner* combiner)
         : SubchannelData(subchannel_list, user_data_vtable, address, subchannel,
                          combiner) {}
 
     void ProcessConnectivityChangeLocked(
         grpc_connectivity_state connectivity_state, grpc_error* error) override;
+
+    // Processes the connectivity change to READY for an unselected subchannel.
+    void ProcessUnselectedReadyLocked();
+
+    void CheckConnectivityStateAndStartWatchingLocked();
   };
 
   class PickFirstSubchannelList
@@ -103,10 +112,22 @@ class PickFirst : public LoadBalancingPolicy {
     }
   };
 
+  // Helper class to ensure that any function that modifies the child refs
+  // data structures will update the channelz snapshot data structures before
+  // returning.
+  class AutoChildRefsUpdater {
+   public:
+    explicit AutoChildRefsUpdater(PickFirst* pf) : pf_(pf) {}
+    ~AutoChildRefsUpdater() { pf_->UpdateChildRefsLocked(); }
+
+   private:
+    PickFirst* pf_;
+  };
+
   void ShutdownLocked() override;
 
   void StartPickingLocked();
-  void DestroyUnselectedSubchannelsLocked();
+  void UpdateChildRefsLocked();
 
   // All our subchannels.
   OrphanablePtr<PickFirstSubchannelList> subchannel_list_;
@@ -122,10 +143,17 @@ class PickFirst : public LoadBalancingPolicy {
   PickState* pending_picks_ = nullptr;
   // Our connectivity state tracker.
   grpc_connectivity_state_tracker state_tracker_;
+
+  /// Lock and data used to capture snapshots of this channels child
+  /// channels and subchannels. This data is consumed by channelz.
+  gpr_mu child_refs_mu_;
+  ChildRefsList child_subchannels_;
+  ChildRefsList child_channels_;
 };
 
 PickFirst::PickFirst(const Args& args) : LoadBalancingPolicy(args) {
   GPR_ASSERT(args.client_channel_factory != nullptr);
+  gpr_mu_init(&child_refs_mu_);
   grpc_connectivity_state_init(&state_tracker_, GRPC_CHANNEL_IDLE,
                                "pick_first");
   if (grpc_lb_pick_first_trace.enabled()) {
@@ -139,6 +167,7 @@ PickFirst::~PickFirst() {
   if (grpc_lb_pick_first_trace.enabled()) {
     gpr_log(GPR_INFO, "Destroying Pick First %p", this);
   }
+  gpr_mu_destroy(&child_refs_mu_);
   GPR_ASSERT(subchannel_list_ == nullptr);
   GPR_ASSERT(latest_pending_subchannel_list_ == nullptr);
   GPR_ASSERT(pending_picks_ == nullptr);
@@ -150,14 +179,16 @@ void PickFirst::HandOffPendingPicksLocked(LoadBalancingPolicy* new_policy) {
   PickState* pick;
   while ((pick = pending_picks_) != nullptr) {
     pending_picks_ = pick->next;
-    if (new_policy->PickLocked(pick)) {
+    grpc_error* error = GRPC_ERROR_NONE;
+    if (new_policy->PickLocked(pick, &error)) {
       // Synchronous return, schedule closure.
-      GRPC_CLOSURE_SCHED(pick->on_complete, GRPC_ERROR_NONE);
+      GRPC_CLOSURE_SCHED(pick->on_complete, error);
     }
   }
 }
 
 void PickFirst::ShutdownLocked() {
+  AutoChildRefsUpdater guard(this);
   grpc_error* error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("Channel shutdown");
   if (grpc_lb_pick_first_trace.enabled()) {
     gpr_log(GPR_INFO, "Pick First %p Shutting down", this);
@@ -219,13 +250,9 @@ void PickFirst::CancelMatchingPicksLocked(uint32_t initial_metadata_flags_mask,
 
 void PickFirst::StartPickingLocked() {
   started_picking_ = true;
-  if (subchannel_list_ != nullptr) {
-    for (size_t i = 0; i < subchannel_list_->num_subchannels(); ++i) {
-      if (subchannel_list_->subchannel(i)->subchannel() != nullptr) {
-        subchannel_list_->subchannel(i)->StartConnectivityWatchLocked();
-        break;
-      }
-    }
+  if (subchannel_list_ != nullptr && subchannel_list_->num_subchannels() > 0) {
+    subchannel_list_->subchannel(0)
+        ->CheckConnectivityStateAndStartWatchingLocked();
   }
 }
 
@@ -235,28 +262,31 @@ void PickFirst::ExitIdleLocked() {
   }
 }
 
-bool PickFirst::PickLocked(PickState* pick) {
+void PickFirst::ResetBackoffLocked() {
+  subchannel_list_->ResetBackoffLocked();
+  if (latest_pending_subchannel_list_ != nullptr) {
+    latest_pending_subchannel_list_->ResetBackoffLocked();
+  }
+}
+
+bool PickFirst::PickLocked(PickState* pick, grpc_error** error) {
   // If we have a selected subchannel already, return synchronously.
   if (selected_ != nullptr) {
     pick->connected_subchannel = selected_->connected_subchannel()->Ref();
     return true;
   }
   // No subchannel selected yet, so handle asynchronously.
-  if (!started_picking_) {
-    StartPickingLocked();
+  if (pick->on_complete == nullptr) {
+    *error = GRPC_ERROR_CREATE_FROM_STATIC_STRING(
+        "No pick result available but synchronous result required.");
+    return true;
   }
   pick->next = pending_picks_;
   pending_picks_ = pick;
-  return false;
-}
-
-void PickFirst::DestroyUnselectedSubchannelsLocked() {
-  for (size_t i = 0; i < subchannel_list_->num_subchannels(); ++i) {
-    PickFirstSubchannelData* sd = subchannel_list_->subchannel(i);
-    if (selected_ != sd) {
-      sd->UnrefSubchannelLocked("selected_different_subchannel");
-    }
+  if (!started_picking_) {
+    StartPickingLocked();
   }
+  return false;
 }
 
 grpc_connectivity_state PickFirst::CheckConnectivityLocked(grpc_error** error) {
@@ -269,18 +299,41 @@ void PickFirst::NotifyOnStateChangeLocked(grpc_connectivity_state* current,
                                                  notify);
 }
 
-void PickFirst::PingOneLocked(grpc_closure* on_initiate, grpc_closure* on_ack) {
-  if (selected_ != nullptr) {
-    selected_->connected_subchannel()->Ping(on_initiate, on_ack);
-  } else {
-    GRPC_CLOSURE_SCHED(on_initiate,
-                       GRPC_ERROR_CREATE_FROM_STATIC_STRING("Not connected"));
-    GRPC_CLOSURE_SCHED(on_ack,
-                       GRPC_ERROR_CREATE_FROM_STATIC_STRING("Not connected"));
+void PickFirst::FillChildRefsForChannelz(
+    ChildRefsList* child_subchannels_to_fill, ChildRefsList* ignored) {
+  MutexLock lock(&child_refs_mu_);
+  for (size_t i = 0; i < child_subchannels_.size(); ++i) {
+    // TODO(ncteisen): implement a de dup loop that is not O(n^2). Might
+    // have to implement lightweight set. For now, we don't care about
+    // performance when channelz requests are made.
+    bool found = false;
+    for (size_t j = 0; j < child_subchannels_to_fill->size(); ++j) {
+      if ((*child_subchannels_to_fill)[j] == child_subchannels_[i]) {
+        found = true;
+        break;
+      }
+    }
+    if (!found) {
+      child_subchannels_to_fill->push_back(child_subchannels_[i]);
+    }
   }
 }
 
+void PickFirst::UpdateChildRefsLocked() {
+  ChildRefsList cs;
+  if (subchannel_list_ != nullptr) {
+    subchannel_list_->PopulateChildRefsList(&cs);
+  }
+  if (latest_pending_subchannel_list_ != nullptr) {
+    latest_pending_subchannel_list_->PopulateChildRefsList(&cs);
+  }
+  // atomically update the data that channelz will actually be looking at.
+  MutexLock lock(&child_refs_mu_);
+  child_subchannels_ = std::move(cs);
+}
+
 void PickFirst::UpdateLocked(const grpc_channel_args& args) {
+  AutoChildRefsUpdater guard(this);
   const grpc_arg* arg = grpc_channel_args_find(&args, GRPC_ARG_LB_ADDRESSES);
   if (arg == nullptr || arg->type != GRPC_ARG_POINTER) {
     if (subchannel_list_ == nullptr) {
@@ -326,7 +379,8 @@ void PickFirst::UpdateLocked(const grpc_channel_args& args) {
     // If we've started picking, start trying to connect to the first
     // subchannel in the new list.
     if (started_picking_) {
-      subchannel_list_->subchannel(0)->StartConnectivityWatchLocked();
+      subchannel_list_->subchannel(0)
+          ->CheckConnectivityStateAndStartWatchingLocked();
     }
   } else {
     // We do have a selected subchannel.
@@ -351,7 +405,6 @@ void PickFirst::UpdateLocked(const grpc_channel_args& args) {
         if (sd->CheckConnectivityStateLocked(&error) == GRPC_CHANNEL_READY) {
           selected_ = sd;
           subchannel_list_ = std::move(subchannel_list);
-          DestroyUnselectedSubchannelsLocked();
           sd->StartConnectivityWatchLocked();
           // If there was a previously pending update (which may or may
           // not have contained the currently selected subchannel), drop
@@ -380,7 +433,7 @@ void PickFirst::UpdateLocked(const grpc_channel_args& args) {
     // subchannel in the new list.
     if (started_picking_) {
       latest_pending_subchannel_list_->subchannel(0)
-          ->StartConnectivityWatchLocked();
+          ->CheckConnectivityStateAndStartWatchingLocked();
     }
   }
 }
@@ -388,10 +441,12 @@ void PickFirst::UpdateLocked(const grpc_channel_args& args) {
 void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked(
     grpc_connectivity_state connectivity_state, grpc_error* error) {
   PickFirst* p = static_cast<PickFirst*>(subchannel_list()->policy());
+  AutoChildRefsUpdater guard(p);
   // The notification must be for a subchannel in either the current or
   // latest pending subchannel lists.
   GPR_ASSERT(subchannel_list() == p->subchannel_list_.get() ||
              subchannel_list() == p->latest_pending_subchannel_list_.get());
+  GPR_ASSERT(connectivity_state != GRPC_CHANNEL_SHUTDOWN);
   // Handle updates for the currently selected subchannel.
   if (p->selected_ == this) {
     if (grpc_lb_pick_first_trace.enabled()) {
@@ -421,14 +476,12 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked(
                     "update"),
           "selected_not_ready+switch_to_update");
     } else {
-      // TODO(juanlishen): we re-resolve when the selected subchannel goes to
-      // TRANSIENT_FAILURE because we used to shut down in this case before
-      // re-resolution is introduced. But we need to investigate whether we
-      // really want to take any action instead of waiting for the selected
-      // subchannel reconnecting.
-      GPR_ASSERT(connectivity_state != GRPC_CHANNEL_SHUTDOWN);
       if (connectivity_state == GRPC_CHANNEL_TRANSIENT_FAILURE) {
-        // If the selected channel goes bad, request a re-resolution.
+        // If the selected subchannel goes bad, request a re-resolution. We also
+        // set the channel state to IDLE and reset started_picking_. The reason
+        // is that if the new state is TRANSIENT_FAILURE due to a GOAWAY
+        // reception we don't want to connect to the re-resolved backends until
+        // we leave the IDLE state.
         grpc_connectivity_state_set(&p->state_tracker_, GRPC_CHANNEL_IDLE,
                                     GRPC_ERROR_NONE,
                                     "selected_changed+reresolve");
@@ -436,7 +489,6 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked(
         p->TryReresolutionLocked(&grpc_lb_pick_first_trace, GRPC_ERROR_NONE);
         // In transient failure. Rely on re-resolution to recover.
         p->selected_ = nullptr;
-        UnrefSubchannelLocked("pf_selected_shutdown");
         StopConnectivityWatchLocked();
       } else {
         grpc_connectivity_state_set(&p->state_tracker_, connectivity_state,
@@ -459,41 +511,7 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked(
   //    select in place of the current one.
   switch (connectivity_state) {
     case GRPC_CHANNEL_READY: {
-      // Case 2.  Promote p->latest_pending_subchannel_list_ to
-      // p->subchannel_list_.
-      if (subchannel_list() == p->latest_pending_subchannel_list_.get()) {
-        if (grpc_lb_pick_first_trace.enabled()) {
-          gpr_log(GPR_INFO,
-                  "Pick First %p promoting pending subchannel list %p to "
-                  "replace %p",
-                  p, p->latest_pending_subchannel_list_.get(),
-                  p->subchannel_list_.get());
-        }
-        p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_);
-      }
-      // Cases 1 and 2.
-      grpc_connectivity_state_set(&p->state_tracker_, GRPC_CHANNEL_READY,
-                                  GRPC_ERROR_NONE, "connecting_ready");
-      p->selected_ = this;
-      if (grpc_lb_pick_first_trace.enabled()) {
-        gpr_log(GPR_INFO, "Pick First %p selected subchannel %p", p,
-                subchannel());
-      }
-      // Drop all other subchannels, since we are now connected.
-      p->DestroyUnselectedSubchannelsLocked();
-      // Update any calls that were waiting for a pick.
-      PickState* pick;
-      while ((pick = p->pending_picks_)) {
-        p->pending_picks_ = pick->next;
-        pick->connected_subchannel =
-            p->selected_->connected_subchannel()->Ref();
-        if (grpc_lb_pick_first_trace.enabled()) {
-          gpr_log(GPR_INFO,
-                  "Servicing pending pick with selected subchannel %p",
-                  p->selected_);
-        }
-        GRPC_CLOSURE_SCHED(pick->on_complete, GRPC_ERROR_NONE);
-      }
+      ProcessUnselectedReadyLocked();
       // Renew notification.
       RenewConnectivityWatchLocked();
       break;
@@ -501,19 +519,18 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked(
     case GRPC_CHANNEL_TRANSIENT_FAILURE: {
       StopConnectivityWatchLocked();
       PickFirstSubchannelData* sd = this;
-      do {
-        size_t next_index =
-            (sd->Index() + 1) % subchannel_list()->num_subchannels();
-        sd = subchannel_list()->subchannel(next_index);
-      } while (sd->subchannel() == nullptr);
+      size_t next_index =
+          (sd->Index() + 1) % subchannel_list()->num_subchannels();
+      sd = subchannel_list()->subchannel(next_index);
       // Case 1: Only set state to TRANSIENT_FAILURE if we've tried
       // all subchannels.
       if (sd->Index() == 0 && subchannel_list() == p->subchannel_list_.get()) {
+        p->TryReresolutionLocked(&grpc_lb_pick_first_trace, GRPC_ERROR_NONE);
         grpc_connectivity_state_set(
             &p->state_tracker_, GRPC_CHANNEL_TRANSIENT_FAILURE,
-            GRPC_ERROR_REF(error), "connecting_transient_failure");
+            GRPC_ERROR_REF(error), "exhausted_subchannels");
       }
-      sd->StartConnectivityWatchLocked();
+      sd->CheckConnectivityStateAndStartWatchingLocked();
       break;
     }
     case GRPC_CHANNEL_CONNECTING:
@@ -534,6 +551,65 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked(
   GRPC_ERROR_UNREF(error);
 }
 
+void PickFirst::PickFirstSubchannelData::ProcessUnselectedReadyLocked() {
+  PickFirst* p = static_cast<PickFirst*>(subchannel_list()->policy());
+  // If we get here, there are two possible cases:
+  // 1. We do not currently have a selected subchannel, and the update is
+  //    for a subchannel in p->subchannel_list_ that we're trying to
+  //    connect to.  The goal here is to find a subchannel that we can
+  //    select.
+  // 2. We do currently have a selected subchannel, and the update is
+  //    for a subchannel in p->latest_pending_subchannel_list_.  The
+  //    goal here is to find a subchannel from the update that we can
+  //    select in place of the current one.
+  GPR_ASSERT(subchannel_list() == p->subchannel_list_.get() ||
+             subchannel_list() == p->latest_pending_subchannel_list_.get());
+  // Case 2.  Promote p->latest_pending_subchannel_list_ to p->subchannel_list_.
+  if (subchannel_list() == p->latest_pending_subchannel_list_.get()) {
+    if (grpc_lb_pick_first_trace.enabled()) {
+      gpr_log(GPR_INFO,
+              "Pick First %p promoting pending subchannel list %p to "
+              "replace %p",
+              p, p->latest_pending_subchannel_list_.get(),
+              p->subchannel_list_.get());
+    }
+    p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_);
+  }
+  // Cases 1 and 2.
+  grpc_connectivity_state_set(&p->state_tracker_, GRPC_CHANNEL_READY,
+                              GRPC_ERROR_NONE, "subchannel_ready");
+  p->selected_ = this;
+  if (grpc_lb_pick_first_trace.enabled()) {
+    gpr_log(GPR_INFO, "Pick First %p selected subchannel %p", p, subchannel());
+  }
+  // Update any calls that were waiting for a pick.
+  PickState* pick;
+  while ((pick = p->pending_picks_)) {
+    p->pending_picks_ = pick->next;
+    pick->connected_subchannel = p->selected_->connected_subchannel()->Ref();
+    if (grpc_lb_pick_first_trace.enabled()) {
+      gpr_log(GPR_INFO, "Servicing pending pick with selected subchannel %p",
+              p->selected_->subchannel());
+    }
+    GRPC_CLOSURE_SCHED(pick->on_complete, GRPC_ERROR_NONE);
+  }
+}
+
+void PickFirst::PickFirstSubchannelData::
+    CheckConnectivityStateAndStartWatchingLocked() {
+  PickFirst* p = static_cast<PickFirst*>(subchannel_list()->policy());
+  grpc_error* error = GRPC_ERROR_NONE;
+  if (p->selected_ != this &&
+      CheckConnectivityStateLocked(&error) == GRPC_CHANNEL_READY) {
+    // We must process the READY subchannel before we start watching it.
+    // Otherwise, we won't know it's READY because we will be waiting for its
+    // connectivity state to change from READY.
+    ProcessUnselectedReadyLocked();
+  }
+  GRPC_ERROR_UNREF(error);
+  StartConnectivityWatchLocked();
+}
+
 //
 // factory
 //
diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc
index b177385065..8dd5820bae 100644
--- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc
+++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc
@@ -36,6 +36,7 @@
 #include "src/core/ext/filters/client_channel/subchannel_index.h"
 #include "src/core/lib/channel/channel_args.h"
 #include "src/core/lib/debug/trace.h"
+#include "src/core/lib/gprpp/mutex_lock.h"
 #include "src/core/lib/gprpp/ref_counted_ptr.h"
 #include "src/core/lib/iomgr/combiner.h"
 #include "src/core/lib/iomgr/sockaddr_utils.h"
@@ -57,7 +58,7 @@ class RoundRobin : public LoadBalancingPolicy {
   explicit RoundRobin(const Args& args);
 
   void UpdateLocked(const grpc_channel_args& args) override;
-  bool PickLocked(PickState* pick) override;
+  bool PickLocked(PickState* pick, grpc_error** error) override;
   void CancelPickLocked(PickState* pick, grpc_error* error) override;
   void CancelMatchingPicksLocked(uint32_t initial_metadata_flags_mask,
                                  uint32_t initial_metadata_flags_eq,
@@ -67,8 +68,10 @@ class RoundRobin : public LoadBalancingPolicy {
   grpc_connectivity_state CheckConnectivityLocked(
       grpc_error** connectivity_error) override;
   void HandOffPendingPicksLocked(LoadBalancingPolicy* new_policy) override;
-  void PingOneLocked(grpc_closure* on_initiate, grpc_closure* on_ack) override;
   void ExitIdleLocked() override;
+  void ResetBackoffLocked() override;
+  void FillChildRefsForChannelz(ChildRefsList* child_subchannels,
+                                ChildRefsList* ignored) override;
 
  private:
   ~RoundRobin();
@@ -86,11 +89,12 @@ class RoundRobin : public LoadBalancingPolicy {
       : public SubchannelData<RoundRobinSubchannelList,
                               RoundRobinSubchannelData> {
    public:
-    RoundRobinSubchannelData(RoundRobinSubchannelList* subchannel_list,
-                             const grpc_lb_user_data_vtable* user_data_vtable,
-                             const grpc_lb_address& address,
-                             grpc_subchannel* subchannel,
-                             grpc_combiner* combiner)
+    RoundRobinSubchannelData(
+        SubchannelList<RoundRobinSubchannelList, RoundRobinSubchannelData>*
+            subchannel_list,
+        const grpc_lb_user_data_vtable* user_data_vtable,
+        const grpc_lb_address& address, grpc_subchannel* subchannel,
+        grpc_combiner* combiner)
         : SubchannelData(subchannel_list, user_data_vtable, address, subchannel,
                          combiner),
           user_data_vtable_(user_data_vtable),
@@ -136,7 +140,8 @@ class RoundRobin : public LoadBalancingPolicy {
         grpc_client_channel_factory* client_channel_factory,
         const grpc_channel_args& args)
         : SubchannelList(policy, tracer, addresses, combiner,
-                         client_channel_factory, args) {
+                         client_channel_factory, args),
+          last_ready_index_(num_subchannels() - 1) {
       // Need to maintain a ref to the LB policy as long as we maintain
       // any references to subchannels, since the subchannels'
       // pollset_sets will include the LB policy's pollset_set.
@@ -177,7 +182,19 @@ class RoundRobin : public LoadBalancingPolicy {
     size_t num_connecting_ = 0;
     size_t num_transient_failure_ = 0;
     grpc_error* last_transient_failure_error_ = GRPC_ERROR_NONE;
-    size_t last_ready_index_ = -1;  // Index into list of last pick.
+    size_t last_ready_index_;  // Index into list of last pick.
+  };
+
+  // Helper class to ensure that any function that modifies the child refs
+  // data structures will update the channelz snapshot data structures before
+  // returning.
+  class AutoChildRefsUpdater {
+   public:
+    explicit AutoChildRefsUpdater(RoundRobin* rr) : rr_(rr) {}
+    ~AutoChildRefsUpdater() { rr_->UpdateChildRefsLocked(); }
+
+   private:
+    RoundRobin* rr_;
   };
 
   void ShutdownLocked() override;
@@ -185,6 +202,7 @@ class RoundRobin : public LoadBalancingPolicy {
   void StartPickingLocked();
   bool DoPickLocked(PickState* pick);
   void DrainPendingPicksLocked();
+  void UpdateChildRefsLocked();
 
   /** list of subchannels */
   OrphanablePtr<RoundRobinSubchannelList> subchannel_list_;
@@ -202,10 +220,16 @@ class RoundRobin : public LoadBalancingPolicy {
   PickState* pending_picks_ = nullptr;
   /** our connectivity state tracker */
   grpc_connectivity_state_tracker state_tracker_;
+  /// Lock and data used to capture snapshots of this channel's child
+  /// channels and subchannels. This data is consumed by channelz.
+  gpr_mu child_refs_mu_;
+  ChildRefsList child_subchannels_;
+  ChildRefsList child_channels_;
 };
 
 RoundRobin::RoundRobin(const Args& args) : LoadBalancingPolicy(args) {
   GPR_ASSERT(args.client_channel_factory != nullptr);
+  gpr_mu_init(&child_refs_mu_);
   grpc_connectivity_state_init(&state_tracker_, GRPC_CHANNEL_IDLE,
                                "round_robin");
   UpdateLocked(*args.args);
@@ -220,6 +244,7 @@ RoundRobin::~RoundRobin() {
   if (grpc_lb_round_robin_trace.enabled()) {
     gpr_log(GPR_INFO, "[RR %p] Destroying Round Robin policy", this);
   }
+  gpr_mu_destroy(&child_refs_mu_);
   GPR_ASSERT(subchannel_list_ == nullptr);
   GPR_ASSERT(latest_pending_subchannel_list_ == nullptr);
   GPR_ASSERT(pending_picks_ == nullptr);
@@ -231,14 +256,16 @@ void RoundRobin::HandOffPendingPicksLocked(LoadBalancingPolicy* new_policy) {
   PickState* pick;
   while ((pick = pending_picks_) != nullptr) {
     pending_picks_ = pick->next;
-    if (new_policy->PickLocked(pick)) {
+    grpc_error* error = GRPC_ERROR_NONE;
+    if (new_policy->PickLocked(pick, &error)) {
       // Synchronous return, schedule closure.
-      GRPC_CLOSURE_SCHED(pick->on_complete, GRPC_ERROR_NONE);
+      GRPC_CLOSURE_SCHED(pick->on_complete, error);
     }
   }
 }
 
 void RoundRobin::ShutdownLocked() {
+  AutoChildRefsUpdater guard(this);
   grpc_error* error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("Channel shutdown");
   if (grpc_lb_round_robin_trace.enabled()) {
     gpr_log(GPR_INFO, "[RR %p] Shutting down", this);
@@ -310,6 +337,13 @@ void RoundRobin::ExitIdleLocked() {
   }
 }
 
+void RoundRobin::ResetBackoffLocked() {
+  subchannel_list_->ResetBackoffLocked();
+  if (latest_pending_subchannel_list_ != nullptr) {
+    latest_pending_subchannel_list_->ResetBackoffLocked();
+  }
+}
+
 bool RoundRobin::DoPickLocked(PickState* pick) {
   const size_t next_ready_index =
       subchannel_list_->GetNextReadySubchannelIndexLocked();
@@ -345,7 +379,7 @@ void RoundRobin::DrainPendingPicksLocked() {
   }
 }
 
-bool RoundRobin::PickLocked(PickState* pick) {
+bool RoundRobin::PickLocked(PickState* pick, grpc_error** error) {
   if (grpc_lb_round_robin_trace.enabled()) {
     gpr_log(GPR_INFO, "[RR %p] Trying to pick (shutdown: %d)", this, shutdown_);
   }
@@ -353,15 +387,53 @@ bool RoundRobin::PickLocked(PickState* pick) {
   if (subchannel_list_ != nullptr) {
     if (DoPickLocked(pick)) return true;
   }
+  if (pick->on_complete == nullptr) {
+    *error = GRPC_ERROR_CREATE_FROM_STATIC_STRING(
+        "No pick result available but synchronous result required.");
+    return true;
+  }
   /* no pick currently available. Save for later in list of pending picks */
+  pick->next = pending_picks_;
+  pending_picks_ = pick;
   if (!started_picking_) {
     StartPickingLocked();
   }
-  pick->next = pending_picks_;
-  pending_picks_ = pick;
   return false;
 }
 
+void RoundRobin::FillChildRefsForChannelz(
+    ChildRefsList* child_subchannels_to_fill, ChildRefsList* ignored) {
+  MutexLock lock(&child_refs_mu_);
+  for (size_t i = 0; i < child_subchannels_.size(); ++i) {
+    // TODO(ncteisen): implement a de dup loop that is not O(n^2). Might
+    // have to implement lightweight set. For now, we don't care about
+    // performance when channelz requests are made.
+    bool found = false;
+    for (size_t j = 0; j < child_subchannels_to_fill->size(); ++j) {
+      if ((*child_subchannels_to_fill)[j] == child_subchannels_[i]) {
+        found = true;
+        break;
+      }
+    }
+    if (!found) {
+      child_subchannels_to_fill->push_back(child_subchannels_[i]);
+    }
+  }
+}
+
+void RoundRobin::UpdateChildRefsLocked() {
+  ChildRefsList cs;
+  if (subchannel_list_ != nullptr) {
+    subchannel_list_->PopulateChildRefsList(&cs);
+  }
+  if (latest_pending_subchannel_list_ != nullptr) {
+    latest_pending_subchannel_list_->PopulateChildRefsList(&cs);
+  }
+  // atomically update the data that channelz will actually be looking at.
+  MutexLock lock(&child_refs_mu_);
+  child_subchannels_ = std::move(cs);
+}
+
 void RoundRobin::RoundRobinSubchannelList::StartWatchingLocked() {
   if (num_subchannels() == 0) return;
   // Check current state of each subchannel synchronously, since any
@@ -452,6 +524,7 @@ void RoundRobin::RoundRobinSubchannelList::
 void RoundRobin::RoundRobinSubchannelList::
     UpdateRoundRobinStateFromSubchannelStateCountsLocked() {
   RoundRobin* p = static_cast<RoundRobin*>(policy());
+  AutoChildRefsUpdater guard(p);
   if (num_ready_ > 0) {
     if (p->subchannel_list_.get() != this) {
       // Promote this list to p->subchannel_list_.
@@ -590,24 +663,9 @@ void RoundRobin::NotifyOnStateChangeLocked(grpc_connectivity_state* current,
                                                  notify);
 }
 
-void RoundRobin::PingOneLocked(grpc_closure* on_initiate,
-                               grpc_closure* on_ack) {
-  const size_t next_ready_index =
-      subchannel_list_->GetNextReadySubchannelIndexLocked();
-  if (next_ready_index < subchannel_list_->num_subchannels()) {
-    RoundRobinSubchannelData* selected =
-        subchannel_list_->subchannel(next_ready_index);
-    selected->connected_subchannel()->Ping(on_initiate, on_ack);
-  } else {
-    GRPC_CLOSURE_SCHED(on_initiate, GRPC_ERROR_CREATE_FROM_STATIC_STRING(
-                                        "Round Robin not connected"));
-    GRPC_CLOSURE_SCHED(on_ack, GRPC_ERROR_CREATE_FROM_STATIC_STRING(
-                                   "Round Robin not connected"));
-  }
-}
-
 void RoundRobin::UpdateLocked(const grpc_channel_args& args) {
   const grpc_arg* arg = grpc_channel_args_find(&args, GRPC_ARG_LB_ADDRESSES);
+  AutoChildRefsUpdater guard(this);
   if (GPR_UNLIKELY(arg == nullptr || arg->type != GRPC_ARG_POINTER)) {
     gpr_log(GPR_ERROR, "[RR %p] update provided no addresses; ignoring", this);
     // If we don't have a current subchannel list, go into TRANSIENT_FAILURE.
diff --git a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h
index 7e2046bcdc..5e8682e056 100644
--- a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h
+++ b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h
@@ -65,6 +65,10 @@ class MySubchannelList
 
 namespace grpc_core {
 
+// Forward declaration.
+template <typename SubchannelListType, typename SubchannelDataType>
+class SubchannelList;
+
 // Stores data for a particular subchannel in a subchannel list.
 // Callers must create a subclass that implements the
 // ProcessConnectivityChangeLocked() method.
@@ -72,7 +76,9 @@ template <typename SubchannelListType, typename SubchannelDataType>
 class SubchannelData {
  public:
   // Returns a pointer to the subchannel list containing this object.
-  SubchannelListType* subchannel_list() const { return subchannel_list_; }
+  SubchannelListType* subchannel_list() const {
+    return static_cast<SubchannelListType*>(subchannel_list_);
+  }
 
   // Returns the index into the subchannel list of this object.
   size_t Index() const {
@@ -102,10 +108,10 @@ class SubchannelData {
     return pending_connectivity_state_unsafe_;
   }
 
-  // Unrefs the subchannel.  May be used if an individual subchannel is
-  // no longer needed even though the subchannel list as a whole is not
-  // being unreffed.
-  virtual void UnrefSubchannelLocked(const char* reason);
+  // Resets the connection backoff.
+  // TODO(roth): This method should go away when we move the backoff
+  // code out of the subchannel and into the LB policies.
+  void ResetBackoffLocked();
 
   // Starts watching the connectivity state of the subchannel.
   // ProcessConnectivityChangeLocked() will be called when the
@@ -133,10 +139,11 @@ class SubchannelData {
   GRPC_ABSTRACT_BASE_CLASS
 
  protected:
-  SubchannelData(SubchannelListType* subchannel_list,
-                 const grpc_lb_user_data_vtable* user_data_vtable,
-                 const grpc_lb_address& address, grpc_subchannel* subchannel,
-                 grpc_combiner* combiner);
+  SubchannelData(
+      SubchannelList<SubchannelListType, SubchannelDataType>* subchannel_list,
+      const grpc_lb_user_data_vtable* user_data_vtable,
+      const grpc_lb_address& address, grpc_subchannel* subchannel,
+      grpc_combiner* combiner);
 
   virtual ~SubchannelData();
 
@@ -149,6 +156,10 @@ class SubchannelData {
       grpc_connectivity_state connectivity_state,
       grpc_error* error) GRPC_ABSTRACT;
 
+  // Unrefs the subchannel.  May be overridden by subclasses that need
+  // to perform extra cleanup when unreffing the subchannel.
+  virtual void UnrefSubchannelLocked(const char* reason);
+
  private:
   // Updates connected_subchannel_ based on pending_connectivity_state_unsafe_.
   // Returns true if the connectivity state should be reported.
@@ -157,7 +168,7 @@ class SubchannelData {
   static void OnConnectivityChangedLocked(void* arg, grpc_error* error);
 
   // Backpointer to owning subchannel list.  Not owned.
-  SubchannelListType* subchannel_list_;
+  SubchannelList<SubchannelListType, SubchannelDataType>* subchannel_list_;
 
   // The subchannel and connected subchannel.
   grpc_subchannel* subchannel_;
@@ -189,10 +200,28 @@ class SubchannelList
   // Returns true if the subchannel list is shutting down.
   bool shutting_down() const { return shutting_down_; }
 
+  // Populates refs_list with the uuids of this SubchannelLists's subchannels.
+  void PopulateChildRefsList(ChildRefsList* refs_list) {
+    for (size_t i = 0; i < subchannels_.size(); ++i) {
+      if (subchannels_[i].subchannel() != nullptr) {
+        grpc_core::channelz::SubchannelNode* subchannel_node =
+            grpc_subchannel_get_channelz_node(subchannels_[i].subchannel());
+        if (subchannel_node != nullptr) {
+          refs_list->push_back(subchannel_node->uuid());
+        }
+      }
+    }
+  }
+
   // Accessors.
   LoadBalancingPolicy* policy() const { return policy_; }
   TraceFlag* tracer() const { return tracer_; }
 
+  // Resets connection backoff of all subchannels.
+  // TODO(roth): We will probably need to rethink this as part of moving
+  // the backoff code out of subchannels and into LB policies.
+  void ResetBackoffLocked();
+
   // Note: Caller must ensure that this is invoked inside of the combiner.
   void Orphan() override {
     ShutdownLocked();
@@ -246,7 +275,7 @@ class SubchannelList
 
 template <typename SubchannelListType, typename SubchannelDataType>
 SubchannelData<SubchannelListType, SubchannelDataType>::SubchannelData(
-    SubchannelListType* subchannel_list,
+    SubchannelList<SubchannelListType, SubchannelDataType>* subchannel_list,
     const grpc_lb_user_data_vtable* user_data_vtable,
     const grpc_lb_address& address, grpc_subchannel* subchannel,
     grpc_combiner* combiner)
@@ -287,6 +316,14 @@ void SubchannelData<SubchannelListType, SubchannelDataType>::
 
 template <typename SubchannelListType, typename SubchannelDataType>
 void SubchannelData<SubchannelListType,
+                    SubchannelDataType>::ResetBackoffLocked() {
+  if (subchannel_ != nullptr) {
+    grpc_subchannel_reset_backoff(subchannel_);
+  }
+}
+
+template <typename SubchannelListType, typename SubchannelDataType>
+void SubchannelData<SubchannelListType,
                     SubchannelDataType>::StartConnectivityWatchLocked() {
   if (subchannel_list_->tracer()->enabled()) {
     gpr_log(GPR_INFO,
@@ -502,8 +539,7 @@ SubchannelList<SubchannelListType, SubchannelDataType>::SubchannelList(
               address_uri);
       gpr_free(address_uri);
     }
-    subchannels_.emplace_back(static_cast<SubchannelListType*>(this),
-                              addresses->user_data_vtable,
+    subchannels_.emplace_back(this, addresses->user_data_vtable,
                               addresses->addresses[i], subchannel, combiner);
   }
 }
@@ -531,6 +567,15 @@ void SubchannelList<SubchannelListType, SubchannelDataType>::ShutdownLocked() {
   }
 }
 
+template <typename SubchannelListType, typename SubchannelDataType>
+void SubchannelList<SubchannelListType,
+                    SubchannelDataType>::ResetBackoffLocked() {
+  for (size_t i = 0; i < subchannels_.size(); i++) {
+    SubchannelDataType* sd = &subchannels_[i];
+    sd->ResetBackoffLocked();
+  }
+}
+
 }  // namespace grpc_core
 
 #endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_SUBCHANNEL_LIST_H */
diff --git a/src/core/ext/filters/client_channel/lb_policy_factory.cc b/src/core/ext/filters/client_channel/lb_policy_factory.cc
index 7c8cba55b7..5c6363d295 100644
--- a/src/core/ext/filters/client_channel/lb_policy_factory.cc
+++ b/src/core/ext/filters/client_channel/lb_policy_factory.cc
@@ -153,3 +153,11 @@ grpc_lb_addresses* grpc_lb_addresses_find_channel_arg(
     return nullptr;
   return static_cast<grpc_lb_addresses*>(lb_addresses_arg->value.pointer.p);
 }
+
+bool grpc_lb_addresses_contains_balancer_address(
+    const grpc_lb_addresses& addresses) {
+  for (size_t i = 0; i < addresses.num_addresses; ++i) {
+    if (addresses.addresses[i].is_balancer) return true;
+  }
+  return false;
+}
diff --git a/src/core/ext/filters/client_channel/lb_policy_factory.h b/src/core/ext/filters/client_channel/lb_policy_factory.h
index 6440258158..62bdbf2689 100644
--- a/src/core/ext/filters/client_channel/lb_policy_factory.h
+++ b/src/core/ext/filters/client_channel/lb_policy_factory.h
@@ -70,16 +70,14 @@ grpc_lb_addresses* grpc_lb_addresses_create(
 grpc_lb_addresses* grpc_lb_addresses_copy(const grpc_lb_addresses* addresses);
 
 /** Sets the value of the address at index \a index of \a addresses.
- * \a address is a socket address of length \a address_len.
- * Takes ownership of \a balancer_name. */
+ * \a address is a socket address of length \a address_len. */
 void grpc_lb_addresses_set_address(grpc_lb_addresses* addresses, size_t index,
                                    const void* address, size_t address_len,
                                    bool is_balancer, const char* balancer_name,
                                    void* user_data);
 
 /** Sets the value of the address at index \a index of \a addresses from \a uri.
- * Returns true upon success, false otherwise. Takes ownership of \a
- * balancer_name. */
+ * Returns true upon success, false otherwise. */
 bool grpc_lb_addresses_set_address_from_uri(grpc_lb_addresses* addresses,
                                             size_t index, const grpc_uri* uri,
                                             bool is_balancer,
@@ -101,6 +99,10 @@ grpc_arg grpc_lb_addresses_create_channel_arg(
 grpc_lb_addresses* grpc_lb_addresses_find_channel_arg(
     const grpc_channel_args* channel_args);
 
+// Returns true if addresses contains at least one balancer address.
+bool grpc_lb_addresses_contains_balancer_address(
+    const grpc_lb_addresses& addresses);
+
 //
 // LB policy factory
 //
diff --git a/src/core/ext/filters/client_channel/parse_address.cc b/src/core/ext/filters/client_channel/parse_address.cc
index b3900114ad..707beb8876 100644
--- a/src/core/ext/filters/client_channel/parse_address.cc
+++ b/src/core/ext/filters/client_channel/parse_address.cc
@@ -125,27 +125,41 @@ bool grpc_parse_ipv6_hostport(const char* hostport, grpc_resolved_address* addr,
   char* host_end = static_cast<char*>(gpr_memrchr(host, '%', strlen(host)));
   if (host_end != nullptr) {
     GPR_ASSERT(host_end >= host);
-    char host_without_scope[GRPC_INET6_ADDRSTRLEN];
+    char host_without_scope[GRPC_INET6_ADDRSTRLEN + 1];
     size_t host_without_scope_len = static_cast<size_t>(host_end - host);
     uint32_t sin6_scope_id = 0;
+    if (host_without_scope_len > GRPC_INET6_ADDRSTRLEN) {
+      if (log_errors) {
+        gpr_log(
+            GPR_ERROR,
+            "invalid ipv6 address length %zu. Length cannot be greater than "
+            "GRPC_INET6_ADDRSTRLEN i.e %d)",
+            host_without_scope_len, GRPC_INET6_ADDRSTRLEN);
+      }
+      goto done;
+    }
     strncpy(host_without_scope, host, host_without_scope_len);
     host_without_scope[host_without_scope_len] = '\0';
     if (grpc_inet_pton(GRPC_AF_INET6, host_without_scope, &in6->sin6_addr) ==
         0) {
-      gpr_log(GPR_ERROR, "invalid ipv6 address: '%s'", host_without_scope);
+      if (log_errors) {
+        gpr_log(GPR_ERROR, "invalid ipv6 address: '%s'", host_without_scope);
+      }
       goto done;
     }
     if (gpr_parse_bytes_to_uint32(host_end + 1,
                                   strlen(host) - host_without_scope_len - 1,
                                   &sin6_scope_id) == 0) {
-      gpr_log(GPR_ERROR, "invalid ipv6 scope id: '%s'", host_end + 1);
+      if (log_errors) {
+        gpr_log(GPR_ERROR, "invalid ipv6 scope id: '%s'", host_end + 1);
+      }
       goto done;
     }
     // Handle "sin6_scope_id" being type "u_long". See grpc issue #10027.
     in6->sin6_scope_id = sin6_scope_id;
   } else {
     if (grpc_inet_pton(GRPC_AF_INET6, host, &in6->sin6_addr) == 0) {
-      gpr_log(GPR_ERROR, "invalid ipv6 address: '%s'", host);
+      if (log_errors) gpr_log(GPR_ERROR, "invalid ipv6 address: '%s'", host);
       goto done;
     }
   }
@@ -190,3 +204,12 @@ bool grpc_parse_uri(const grpc_uri* uri, grpc_resolved_address* resolved_addr) {
   gpr_log(GPR_ERROR, "Can't parse scheme '%s'", uri->scheme);
   return false;
 }
+
+uint16_t grpc_strhtons(const char* port) {
+  if (strcmp(port, "http") == 0) {
+    return htons(80);
+  } else if (strcmp(port, "https") == 0) {
+    return htons(443);
+  }
+  return htons(static_cast<unsigned short>(atoi(port)));
+}
diff --git a/src/core/ext/filters/client_channel/parse_address.h b/src/core/ext/filters/client_channel/parse_address.h
index 9a88b66edc..c2af0e6c49 100644
--- a/src/core/ext/filters/client_channel/parse_address.h
+++ b/src/core/ext/filters/client_channel/parse_address.h
@@ -47,4 +47,7 @@ bool grpc_parse_ipv4_hostport(const char* hostport, grpc_resolved_address* addr,
 bool grpc_parse_ipv6_hostport(const char* hostport, grpc_resolved_address* addr,
                               bool log_errors);
 
+/* Converts named or numeric port to a uint16 suitable for use in a sockaddr. */
+uint16_t grpc_strhtons(const char* port);
+
 #endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_PARSE_ADDRESS_H */
diff --git a/src/core/ext/filters/client_channel/resolver.h b/src/core/ext/filters/client_channel/resolver.h
index 02380314dd..e9acbb7c41 100644
--- a/src/core/ext/filters/client_channel/resolver.h
+++ b/src/core/ext/filters/client_channel/resolver.h
@@ -81,18 +81,15 @@ class Resolver : public InternallyRefCountedWithTracing<Resolver> {
   ///
   /// If this causes new data to become available, then the currently
   /// pending call to \a NextLocked() will return the new result.
-  ///
-  /// Note: Currently, all resolvers are required to return a new result
-  /// shortly after this method is called.  For pull-based mechanisms, if
-  /// the implementation decides to delay querying the name service, it
-  /// should immediately return a new copy of the previously returned
-  /// result (and it can then return the updated data later, when it
-  /// actually does query the name service).  For push-based mechanisms,
-  /// the implementation should immediately return a new copy of the
-  /// last-seen result.
-  /// TODO(roth): Remove this requirement once we fix pick_first to not
-  /// throw away unselected subchannels.
-  virtual void RequestReresolutionLocked() GRPC_ABSTRACT;
+  virtual void RequestReresolutionLocked() {}
+
+  /// Resets the re-resolution backoff, if any.
+  /// This needs to be implemented only by pull-based implementations;
+  /// for push-based implementations, it will be a no-op.
+  /// TODO(roth): Pull the backoff code out of resolver and into
+  /// client_channel, so that it can be shared across resolver
+  /// implementations.  At that point, this method can go away.
+  virtual void ResetBackoffLocked() {}
 
   void Orphan() override {
     // Invoke ShutdownAndUnrefLocked() inside of the combiner.
@@ -105,9 +102,7 @@ class Resolver : public InternallyRefCountedWithTracing<Resolver> {
   GRPC_ABSTRACT_BASE_CLASS
 
  protected:
-  // So Delete() can access our protected dtor.
-  template <typename T>
-  friend void Delete(T*);
+  GPRC_ALLOW_CLASS_TO_USE_NON_PUBLIC_DELETE
 
   /// Does NOT take ownership of the reference to \a combiner.
   // TODO(roth): Once we have a C++-like interface for combiners, this
diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc b/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc
index c3c62b60bf..dfa52867d8 100644
--- a/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc
+++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc
@@ -23,7 +23,6 @@
 #include <limits.h>
 #include <stdio.h>
 #include <string.h>
-#include <unistd.h>
 
 #include <grpc/support/alloc.h>
 #include <grpc/support/string_util.h>
@@ -67,6 +66,8 @@ class AresDnsResolver : public Resolver {
 
   void RequestReresolutionLocked() override;
 
+  void ResetBackoffLocked() override;
+
   void ShutdownLocked() override;
 
  private:
@@ -142,8 +143,8 @@ AresDnsResolver::AresDnsResolver(const ResolverArgs& args)
   channel_args_ = grpc_channel_args_copy(args.args);
   const grpc_arg* arg = grpc_channel_args_find(
       channel_args_, GRPC_ARG_SERVICE_CONFIG_DISABLE_RESOLUTION);
-  request_service_config_ = !grpc_channel_arg_get_integer(
-      arg, (grpc_integer_options){false, false, true});
+  grpc_integer_options integer_options = {false, false, true};
+  request_service_config_ = !grpc_channel_arg_get_integer(arg, integer_options);
   arg = grpc_channel_args_find(channel_args_,
                                GRPC_ARG_DNS_MIN_TIME_BETWEEN_RESOLUTIONS_MS);
   min_time_between_resolutions_ =
@@ -188,6 +189,13 @@ void AresDnsResolver::RequestReresolutionLocked() {
   }
 }
 
+void AresDnsResolver::ResetBackoffLocked() {
+  if (have_next_resolution_timer_) {
+    grpc_timer_cancel(&next_resolution_timer_);
+  }
+  backoff_.Reset();
+}
+
 void AresDnsResolver::ShutdownLocked() {
   if (have_next_resolution_timer_) {
     grpc_timer_cancel(&next_resolution_timer_);
@@ -346,7 +354,7 @@ void AresDnsResolver::OnResolvedLocked(void* arg, grpc_error* error) {
     RefCountedPtr<Resolver> self = r->Ref(DEBUG_LOCATION, "retry-timer");
     self.release();
     if (timeout > 0) {
-      gpr_log(GPR_DEBUG, "retrying in %" PRIdPTR " milliseconds", timeout);
+      gpr_log(GPR_DEBUG, "retrying in %" PRId64 " milliseconds", timeout);
     } else {
       gpr_log(GPR_DEBUG, "retrying immediately");
     }
@@ -365,13 +373,7 @@ void AresDnsResolver::OnResolvedLocked(void* arg, grpc_error* error) {
 void AresDnsResolver::MaybeStartResolvingLocked() {
   // If there is an existing timer, the time it fires is the earliest time we
   // can start the next resolution.
-  if (have_next_resolution_timer_) {
-    // TODO(dgq): remove the following two lines once Pick First stops
-    // discarding subchannels after selecting.
-    ++resolved_version_;
-    MaybeFinishNextLocked();
-    return;
-  }
+  if (have_next_resolution_timer_) return;
   if (last_resolution_timestamp_ >= 0) {
     const grpc_millis earliest_next_resolution =
         last_resolution_timestamp_ + min_time_between_resolutions_;
@@ -381,8 +383,8 @@ void AresDnsResolver::MaybeStartResolvingLocked() {
       const grpc_millis last_resolution_ago =
           grpc_core::ExecCtx::Get()->Now() - last_resolution_timestamp_;
       gpr_log(GPR_DEBUG,
-              "In cooldown from last resolution (from %" PRIdPTR
-              " ms ago). Will resolve again in %" PRIdPTR " ms",
+              "In cooldown from last resolution (from %" PRId64
+              " ms ago). Will resolve again in %" PRId64 " ms",
               last_resolution_ago, ms_until_next_resolution);
       have_next_resolution_timer_ = true;
       // TODO(roth): We currently deal with this ref manually.  Once the
@@ -393,10 +395,6 @@ void AresDnsResolver::MaybeStartResolvingLocked() {
       self.release();
       grpc_timer_init(&next_resolution_timer_, ms_until_next_resolution,
                       &on_next_resolution_);
-      // TODO(dgq): remove the following two lines once Pick First stops
-      // discarding subchannels after selecting.
-      ++resolved_version_;
-      MaybeFinishNextLocked();
       return;
     }
   }
@@ -414,10 +412,10 @@ void AresDnsResolver::StartResolvingLocked() {
   resolving_ = true;
   lb_addresses_ = nullptr;
   service_config_json_ = nullptr;
-  pending_request_ = grpc_dns_lookup_ares(
+  pending_request_ = grpc_dns_lookup_ares_locked(
       dns_server_, name_to_resolve_, kDefaultPort, interested_parties_,
       &on_resolved_, &lb_addresses_, true /* check_grpclb */,
-      request_service_config_ ? &service_config_json_ : nullptr);
+      request_service_config_ ? &service_config_json_ : nullptr, combiner());
   last_resolution_timestamp_ = grpc_core::ExecCtx::Get()->Now();
 }
 
diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver.cc b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver.cc
new file mode 100644
index 0000000000..fdbd07ebf5
--- /dev/null
+++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver.cc
@@ -0,0 +1,320 @@
+/*
+ *
+ * Copyright 2016 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+#include <grpc/support/port_platform.h>
+
+#include "src/core/lib/iomgr/port.h"
+#if GRPC_ARES == 1 && !defined(GRPC_UV)
+
+#include <ares.h>
+#include <string.h>
+
+#include "src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver.h"
+
+#include <grpc/support/alloc.h>
+#include <grpc/support/log.h>
+#include <grpc/support/string_util.h>
+#include <grpc/support/time.h>
+#include "src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h"
+#include "src/core/lib/gpr/string.h"
+#include "src/core/lib/iomgr/iomgr_internal.h"
+#include "src/core/lib/iomgr/sockaddr_utils.h"
+
+typedef struct fd_node {
+  /** the owner of this fd node */
+  grpc_ares_ev_driver* ev_driver;
+  /** a closure wrapping on_readable_locked, which should be
+     invoked when the grpc_fd in this node becomes readable. */
+  grpc_closure read_closure;
+  /** a closure wrapping on_writable_locked, which should be
+     invoked when the grpc_fd in this node becomes writable. */
+  grpc_closure write_closure;
+  /** next fd node in the list */
+  struct fd_node* next;
+
+  /** wrapped fd that's polled by grpc's poller for the current platform */
+  grpc_core::GrpcPolledFd* grpc_polled_fd;
+  /** if the readable closure has been registered */
+  bool readable_registered;
+  /** if the writable closure has been registered */
+  bool writable_registered;
+  /** if the fd has been shutdown yet from grpc iomgr perspective */
+  bool already_shutdown;
+} fd_node;
+
+struct grpc_ares_ev_driver {
+  /** the ares_channel owned by this event driver */
+  ares_channel channel;
+  /** pollset set for driving the IO events of the channel */
+  grpc_pollset_set* pollset_set;
+  /** refcount of the event driver */
+  gpr_refcount refs;
+
+  /** combiner to synchronize c-ares and I/O callbacks on */
+  grpc_combiner* combiner;
+  /** a list of grpc_fd that this event driver is currently using. */
+  fd_node* fds;
+  /** is this event driver currently working? */
+  bool working;
+  /** is this event driver being shut down */
+  bool shutting_down;
+  /** request object that's using this ev driver */
+  grpc_ares_request* request;
+  /** Owned by the ev_driver. Creates new GrpcPolledFd's */
+  grpc_core::UniquePtr<grpc_core::GrpcPolledFdFactory> polled_fd_factory;
+};
+
+static void grpc_ares_notify_on_event_locked(grpc_ares_ev_driver* ev_driver);
+
+static grpc_ares_ev_driver* grpc_ares_ev_driver_ref(
+    grpc_ares_ev_driver* ev_driver) {
+  gpr_log(GPR_DEBUG, "Ref ev_driver %" PRIuPTR, (uintptr_t)ev_driver);
+  gpr_ref(&ev_driver->refs);
+  return ev_driver;
+}
+
+static void grpc_ares_ev_driver_unref(grpc_ares_ev_driver* ev_driver) {
+  gpr_log(GPR_DEBUG, "Unref ev_driver %" PRIuPTR, (uintptr_t)ev_driver);
+  if (gpr_unref(&ev_driver->refs)) {
+    gpr_log(GPR_DEBUG, "destroy ev_driver %" PRIuPTR, (uintptr_t)ev_driver);
+    GPR_ASSERT(ev_driver->fds == nullptr);
+    GRPC_COMBINER_UNREF(ev_driver->combiner, "free ares event driver");
+    ares_destroy(ev_driver->channel);
+    grpc_ares_complete_request_locked(ev_driver->request);
+    grpc_core::Delete(ev_driver);
+  }
+}
+
+static void fd_node_destroy_locked(fd_node* fdn) {
+  gpr_log(GPR_DEBUG, "delete fd: %s", fdn->grpc_polled_fd->GetName());
+  GPR_ASSERT(!fdn->readable_registered);
+  GPR_ASSERT(!fdn->writable_registered);
+  GPR_ASSERT(fdn->already_shutdown);
+  grpc_core::Delete(fdn->grpc_polled_fd);
+  gpr_free(fdn);
+}
+
+static void fd_node_shutdown_locked(fd_node* fdn, const char* reason) {
+  if (!fdn->already_shutdown) {
+    fdn->already_shutdown = true;
+    fdn->grpc_polled_fd->ShutdownLocked(
+        GRPC_ERROR_CREATE_FROM_STATIC_STRING(reason));
+  }
+}
+
+grpc_error* grpc_ares_ev_driver_create_locked(grpc_ares_ev_driver** ev_driver,
+                                              grpc_pollset_set* pollset_set,
+                                              grpc_combiner* combiner,
+                                              grpc_ares_request* request) {
+  *ev_driver = grpc_core::New<grpc_ares_ev_driver>();
+  ares_options opts;
+  memset(&opts, 0, sizeof(opts));
+  opts.flags |= ARES_FLAG_STAYOPEN;
+  int status = ares_init_options(&(*ev_driver)->channel, &opts, ARES_OPT_FLAGS);
+  gpr_log(GPR_DEBUG, "grpc_ares_ev_driver_create_locked");
+  if (status != ARES_SUCCESS) {
+    char* err_msg;
+    gpr_asprintf(&err_msg, "Failed to init ares channel. C-ares error: %s",
+                 ares_strerror(status));
+    grpc_error* err = GRPC_ERROR_CREATE_FROM_COPIED_STRING(err_msg);
+    gpr_free(err_msg);
+    gpr_free(*ev_driver);
+    return err;
+  }
+  (*ev_driver)->combiner = GRPC_COMBINER_REF(combiner, "ares event driver");
+  gpr_ref_init(&(*ev_driver)->refs, 1);
+  (*ev_driver)->pollset_set = pollset_set;
+  (*ev_driver)->fds = nullptr;
+  (*ev_driver)->working = false;
+  (*ev_driver)->shutting_down = false;
+  (*ev_driver)->request = request;
+  (*ev_driver)->polled_fd_factory =
+      grpc_core::NewGrpcPolledFdFactory((*ev_driver)->combiner);
+  (*ev_driver)
+      ->polled_fd_factory->ConfigureAresChannelLocked((*ev_driver)->channel);
+  return GRPC_ERROR_NONE;
+}
+
+void grpc_ares_ev_driver_on_queries_complete_locked(
+    grpc_ares_ev_driver* ev_driver) {
+  // We mark the event driver as being shut down. If the event driver
+  // is working, grpc_ares_notify_on_event_locked will shut down the
+  // fds; if it's not working, there are no fds to shut down.
+  ev_driver->shutting_down = true;
+  grpc_ares_ev_driver_unref(ev_driver);
+}
+
+void grpc_ares_ev_driver_shutdown_locked(grpc_ares_ev_driver* ev_driver) {
+  ev_driver->shutting_down = true;
+  fd_node* fn = ev_driver->fds;
+  while (fn != nullptr) {
+    fd_node_shutdown_locked(fn, "grpc_ares_ev_driver_shutdown");
+    fn = fn->next;
+  }
+}
+
+// Search fd in the fd_node list head. This is an O(n) search, the max possible
+// value of n is ARES_GETSOCK_MAXNUM (16). n is typically 1 - 2 in our tests.
+static fd_node* pop_fd_node_locked(fd_node** head, ares_socket_t as) {
+  fd_node dummy_head;
+  dummy_head.next = *head;
+  fd_node* node = &dummy_head;
+  while (node->next != nullptr) {
+    if (node->next->grpc_polled_fd->GetWrappedAresSocketLocked() == as) {
+      fd_node* ret = node->next;
+      node->next = node->next->next;
+      *head = dummy_head.next;
+      return ret;
+    }
+    node = node->next;
+  }
+  return nullptr;
+}
+
+static void on_readable_locked(void* arg, grpc_error* error) {
+  fd_node* fdn = static_cast<fd_node*>(arg);
+  grpc_ares_ev_driver* ev_driver = fdn->ev_driver;
+  const ares_socket_t as = fdn->grpc_polled_fd->GetWrappedAresSocketLocked();
+  fdn->readable_registered = false;
+  gpr_log(GPR_DEBUG, "readable on %s", fdn->grpc_polled_fd->GetName());
+  if (error == GRPC_ERROR_NONE) {
+    do {
+      ares_process_fd(ev_driver->channel, as, ARES_SOCKET_BAD);
+    } while (fdn->grpc_polled_fd->IsFdStillReadableLocked());
+  } else {
+    // If error is not GRPC_ERROR_NONE, it means the fd has been shutdown or
+    // timed out. The pending lookups made on this ev_driver will be cancelled
+    // by the following ares_cancel() and the on_done callbacks will be invoked
+    // with a status of ARES_ECANCELLED. The remaining file descriptors in this
+    // ev_driver will be cleaned up in the follwing
+    // grpc_ares_notify_on_event_locked().
+    ares_cancel(ev_driver->channel);
+  }
+  grpc_ares_notify_on_event_locked(ev_driver);
+  grpc_ares_ev_driver_unref(ev_driver);
+}
+
+static void on_writable_locked(void* arg, grpc_error* error) {
+  fd_node* fdn = static_cast<fd_node*>(arg);
+  grpc_ares_ev_driver* ev_driver = fdn->ev_driver;
+  const ares_socket_t as = fdn->grpc_polled_fd->GetWrappedAresSocketLocked();
+  fdn->writable_registered = false;
+  gpr_log(GPR_DEBUG, "writable on %s", fdn->grpc_polled_fd->GetName());
+  if (error == GRPC_ERROR_NONE) {
+    ares_process_fd(ev_driver->channel, ARES_SOCKET_BAD, as);
+  } else {
+    // If error is not GRPC_ERROR_NONE, it means the fd has been shutdown or
+    // timed out. The pending lookups made on this ev_driver will be cancelled
+    // by the following ares_cancel() and the on_done callbacks will be invoked
+    // with a status of ARES_ECANCELLED. The remaining file descriptors in this
+    // ev_driver will be cleaned up in the follwing
+    // grpc_ares_notify_on_event_locked().
+    ares_cancel(ev_driver->channel);
+  }
+  grpc_ares_notify_on_event_locked(ev_driver);
+  grpc_ares_ev_driver_unref(ev_driver);
+}
+
+ares_channel* grpc_ares_ev_driver_get_channel_locked(
+    grpc_ares_ev_driver* ev_driver) {
+  return &ev_driver->channel;
+}
+
+// Get the file descriptors used by the ev_driver's ares channel, register
+// driver_closure with these filedescriptors.
+static void grpc_ares_notify_on_event_locked(grpc_ares_ev_driver* ev_driver) {
+  fd_node* new_list = nullptr;
+  if (!ev_driver->shutting_down) {
+    ares_socket_t socks[ARES_GETSOCK_MAXNUM];
+    int socks_bitmask =
+        ares_getsock(ev_driver->channel, socks, ARES_GETSOCK_MAXNUM);
+    for (size_t i = 0; i < ARES_GETSOCK_MAXNUM; i++) {
+      if (ARES_GETSOCK_READABLE(socks_bitmask, i) ||
+          ARES_GETSOCK_WRITABLE(socks_bitmask, i)) {
+        fd_node* fdn = pop_fd_node_locked(&ev_driver->fds, socks[i]);
+        // Create a new fd_node if sock[i] is not in the fd_node list.
+        if (fdn == nullptr) {
+          fdn = static_cast<fd_node*>(gpr_malloc(sizeof(fd_node)));
+          fdn->grpc_polled_fd =
+              ev_driver->polled_fd_factory->NewGrpcPolledFdLocked(
+                  socks[i], ev_driver->pollset_set, ev_driver->combiner);
+          gpr_log(GPR_DEBUG, "new fd: %s", fdn->grpc_polled_fd->GetName());
+          fdn->ev_driver = ev_driver;
+          fdn->readable_registered = false;
+          fdn->writable_registered = false;
+          fdn->already_shutdown = false;
+          GRPC_CLOSURE_INIT(&fdn->read_closure, on_readable_locked, fdn,
+                            grpc_combiner_scheduler(ev_driver->combiner));
+          GRPC_CLOSURE_INIT(&fdn->write_closure, on_writable_locked, fdn,
+                            grpc_combiner_scheduler(ev_driver->combiner));
+        }
+        fdn->next = new_list;
+        new_list = fdn;
+        // Register read_closure if the socket is readable and read_closure has
+        // not been registered with this socket.
+        if (ARES_GETSOCK_READABLE(socks_bitmask, i) &&
+            !fdn->readable_registered) {
+          grpc_ares_ev_driver_ref(ev_driver);
+          gpr_log(GPR_DEBUG, "notify read on: %s",
+                  fdn->grpc_polled_fd->GetName());
+          fdn->grpc_polled_fd->RegisterForOnReadableLocked(&fdn->read_closure);
+          fdn->readable_registered = true;
+        }
+        // Register write_closure if the socket is writable and write_closure
+        // has not been registered with this socket.
+        if (ARES_GETSOCK_WRITABLE(socks_bitmask, i) &&
+            !fdn->writable_registered) {
+          gpr_log(GPR_DEBUG, "notify write on: %s",
+                  fdn->grpc_polled_fd->GetName());
+          grpc_ares_ev_driver_ref(ev_driver);
+          fdn->grpc_polled_fd->RegisterForOnWriteableLocked(
+              &fdn->write_closure);
+          fdn->writable_registered = true;
+        }
+      }
+    }
+  }
+  // Any remaining fds in ev_driver->fds were not returned by ares_getsock() and
+  // are therefore no longer in use, so they can be shut down and removed from
+  // the list.
+  while (ev_driver->fds != nullptr) {
+    fd_node* cur = ev_driver->fds;
+    ev_driver->fds = ev_driver->fds->next;
+    fd_node_shutdown_locked(cur, "c-ares fd shutdown");
+    if (!cur->readable_registered && !cur->writable_registered) {
+      fd_node_destroy_locked(cur);
+    } else {
+      cur->next = new_list;
+      new_list = cur;
+    }
+  }
+  ev_driver->fds = new_list;
+  // If the ev driver has no working fd, all the tasks are done.
+  if (new_list == nullptr) {
+    ev_driver->working = false;
+    gpr_log(GPR_DEBUG, "ev driver stop working");
+  }
+}
+
+void grpc_ares_ev_driver_start_locked(grpc_ares_ev_driver* ev_driver) {
+  if (!ev_driver->working) {
+    ev_driver->working = true;
+    grpc_ares_notify_on_event_locked(ev_driver);
+  }
+}
+
+#endif /* GRPC_ARES == 1 && !defined(GRPC_UV) */
diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver.h b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver.h
index 6239549534..671c537fe7 100644
--- a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver.h
+++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver.h
@@ -22,6 +22,8 @@
 #include <grpc/support/port_platform.h>
 
 #include <ares.h>
+#include "src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h"
+#include "src/core/lib/gprpp/abstract.h"
 #include "src/core/lib/iomgr/pollset_set.h"
 
 typedef struct grpc_ares_ev_driver grpc_ares_ev_driver;
@@ -29,25 +31,76 @@ typedef struct grpc_ares_ev_driver grpc_ares_ev_driver;
 /* Start \a ev_driver. It will keep working until all IO on its ares_channel is
    done, or grpc_ares_ev_driver_destroy() is called. It may notify the callbacks
    bound to its ares_channel when necessary. */
-void grpc_ares_ev_driver_start(grpc_ares_ev_driver* ev_driver);
+void grpc_ares_ev_driver_start_locked(grpc_ares_ev_driver* ev_driver);
 
 /* Returns the ares_channel owned by \a ev_driver. To bind a c-ares query to
    \a ev_driver, use the ares_channel owned by \a ev_driver as the arg of the
    query. */
-ares_channel* grpc_ares_ev_driver_get_channel(grpc_ares_ev_driver* ev_driver);
+ares_channel* grpc_ares_ev_driver_get_channel_locked(
+    grpc_ares_ev_driver* ev_driver);
 
 /* Creates a new grpc_ares_ev_driver. Returns GRPC_ERROR_NONE if \a ev_driver is
    created successfully. */
-grpc_error* grpc_ares_ev_driver_create(grpc_ares_ev_driver** ev_driver,
-                                       grpc_pollset_set* pollset_set);
+grpc_error* grpc_ares_ev_driver_create_locked(grpc_ares_ev_driver** ev_driver,
+                                              grpc_pollset_set* pollset_set,
+                                              grpc_combiner* combiner,
+                                              grpc_ares_request* request);
 
-/* Destroys \a ev_driver asynchronously. Pending lookups made on \a ev_driver
-   will be cancelled and their on_done callbacks will be invoked with a status
-   of ARES_ECANCELLED. */
-void grpc_ares_ev_driver_destroy(grpc_ares_ev_driver* ev_driver);
+/* Called back when all DNS lookups have completed. */
+void grpc_ares_ev_driver_on_queries_complete_locked(
+    grpc_ares_ev_driver* ev_driver);
 
 /* Shutdown all the grpc_fds used by \a ev_driver */
-void grpc_ares_ev_driver_shutdown(grpc_ares_ev_driver* ev_driver);
+void grpc_ares_ev_driver_shutdown_locked(grpc_ares_ev_driver* ev_driver);
+
+namespace grpc_core {
+
+/* A wrapped fd that integrates with the grpc iomgr of the current platform.
+ * A GrpcPolledFd knows how to create grpc platform-specific iomgr endpoints
+ * from "ares_socket_t" sockets, and then sign up for readability/writeability
+ * with that poller, and do shutdown and destruction. */
+class GrpcPolledFd {
+ public:
+  virtual ~GrpcPolledFd() {}
+  /* Called when c-ares library is interested and there's no pending callback */
+  virtual void RegisterForOnReadableLocked(grpc_closure* read_closure)
+      GRPC_ABSTRACT;
+  /* Called when c-ares library is interested and there's no pending callback */
+  virtual void RegisterForOnWriteableLocked(grpc_closure* write_closure)
+      GRPC_ABSTRACT;
+  /* Indicates if there is data left even after just being read from */
+  virtual bool IsFdStillReadableLocked() GRPC_ABSTRACT;
+  /* Called once and only once. Must cause cancellation of any pending
+   * read/write callbacks. */
+  virtual void ShutdownLocked(grpc_error* error) GRPC_ABSTRACT;
+  /* Get the underlying ares_socket_t that this was created from */
+  virtual ares_socket_t GetWrappedAresSocketLocked() GRPC_ABSTRACT;
+  /* A unique name, for logging */
+  virtual const char* GetName() GRPC_ABSTRACT;
+
+  GRPC_ABSTRACT_BASE_CLASS
+};
+
+/* A GrpcPolledFdFactory is 1-to-1 with and owned by the
+ * ares event driver. It knows how to create GrpcPolledFd's
+ * for the current platform, and the ares driver uses it for all of
+ * its fd's. */
+class GrpcPolledFdFactory {
+ public:
+  virtual ~GrpcPolledFdFactory() {}
+  /* Creates a new wrapped fd for the current platform */
+  virtual GrpcPolledFd* NewGrpcPolledFdLocked(
+      ares_socket_t as, grpc_pollset_set* driver_pollset_set,
+      grpc_combiner* combiner) GRPC_ABSTRACT;
+  /* Optionally configures the ares channel after creation */
+  virtual void ConfigureAresChannelLocked(ares_channel channel) GRPC_ABSTRACT;
+
+  GRPC_ABSTRACT_BASE_CLASS
+};
+
+UniquePtr<GrpcPolledFdFactory> NewGrpcPolledFdFactory(grpc_combiner* combiner);
+
+}  // namespace grpc_core
 
 #endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_RESOLVER_DNS_C_ARES_GRPC_ARES_EV_DRIVER_H \
         */
diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_posix.cc b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_posix.cc
index b604f2bf14..aa58e1aaf5 100644
--- a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_posix.cc
+++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_posix.cc
@@ -18,9 +18,10 @@
 #include <grpc/support/port_platform.h>
 
 #include "src/core/lib/iomgr/port.h"
-#if GRPC_ARES == 1 && defined(GRPC_POSIX_SOCKET)
+#if GRPC_ARES == 1 && defined(GRPC_POSIX_SOCKET_ARES_EV_DRIVER)
 
 #include <ares.h>
+#include <string.h>
 #include <sys/ioctl.h>
 
 #include "src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver.h"
@@ -35,317 +36,71 @@
 #include "src/core/lib/iomgr/iomgr_internal.h"
 #include "src/core/lib/iomgr/sockaddr_utils.h"
 
-typedef struct fd_node {
-  /** the owner of this fd node */
-  grpc_ares_ev_driver* ev_driver;
-  /** a closure wrapping on_readable_cb, which should be invoked when the
-      grpc_fd in this node becomes readable. */
-  grpc_closure read_closure;
-  /** a closure wrapping on_writable_cb, which should be invoked when the
-      grpc_fd in this node becomes writable. */
-  grpc_closure write_closure;
-  /** next fd node in the list */
-  struct fd_node* next;
+namespace grpc_core {
 
-  /** mutex guarding the rest of the state */
-  gpr_mu mu;
-  /** the grpc_fd owned by this fd node */
-  grpc_fd* fd;
-  /** if the readable closure has been registered */
-  bool readable_registered;
-  /** if the writable closure has been registered */
-  bool writable_registered;
-  /** if the fd is being shut down */
-  bool shutting_down;
-} fd_node;
-
-struct grpc_ares_ev_driver {
-  /** the ares_channel owned by this event driver */
-  ares_channel channel;
-  /** pollset set for driving the IO events of the channel */
-  grpc_pollset_set* pollset_set;
-  /** refcount of the event driver */
-  gpr_refcount refs;
-
-  /** mutex guarding the rest of the state */
-  gpr_mu mu;
-  /** a list of grpc_fd that this event driver is currently using. */
-  fd_node* fds;
-  /** is this event driver currently working? */
-  bool working;
-  /** is this event driver being shut down */
-  bool shutting_down;
-};
-
-static void grpc_ares_notify_on_event_locked(grpc_ares_ev_driver* ev_driver);
-
-static grpc_ares_ev_driver* grpc_ares_ev_driver_ref(
-    grpc_ares_ev_driver* ev_driver) {
-  gpr_log(GPR_DEBUG, "Ref ev_driver %" PRIuPTR, (uintptr_t)ev_driver);
-  gpr_ref(&ev_driver->refs);
-  return ev_driver;
-}
-
-static void grpc_ares_ev_driver_unref(grpc_ares_ev_driver* ev_driver) {
-  gpr_log(GPR_DEBUG, "Unref ev_driver %" PRIuPTR, (uintptr_t)ev_driver);
-  if (gpr_unref(&ev_driver->refs)) {
-    gpr_log(GPR_DEBUG, "destroy ev_driver %" PRIuPTR, (uintptr_t)ev_driver);
-    GPR_ASSERT(ev_driver->fds == nullptr);
-    gpr_mu_destroy(&ev_driver->mu);
-    ares_destroy(ev_driver->channel);
-    gpr_free(ev_driver);
+class GrpcPolledFdPosix : public GrpcPolledFd {
+ public:
+  GrpcPolledFdPosix(ares_socket_t as, grpc_pollset_set* driver_pollset_set)
+      : as_(as) {
+    gpr_asprintf(&name_, "c-ares fd: %d", (int)as);
+    fd_ = grpc_fd_create((int)as, name_, false);
+    driver_pollset_set_ = driver_pollset_set;
+    grpc_pollset_set_add_fd(driver_pollset_set_, fd_);
   }
-}
 
-static void fd_node_destroy(fd_node* fdn) {
-  gpr_log(GPR_DEBUG, "delete fd: %d", grpc_fd_wrapped_fd(fdn->fd));
-  GPR_ASSERT(!fdn->readable_registered);
-  GPR_ASSERT(!fdn->writable_registered);
-  gpr_mu_destroy(&fdn->mu);
-  /* c-ares library has closed the fd inside grpc_fd. This fd may be picked up
-     immediately by another thread, and should not be closed by the following
-     grpc_fd_orphan. */
-  grpc_fd_orphan(fdn->fd, nullptr, nullptr, true /* already_closed */,
-                 "c-ares query finished");
-  gpr_free(fdn);
-}
-
-static void fd_node_shutdown(fd_node* fdn) {
-  gpr_mu_lock(&fdn->mu);
-  fdn->shutting_down = true;
-  if (!fdn->readable_registered && !fdn->writable_registered) {
-    gpr_mu_unlock(&fdn->mu);
-    fd_node_destroy(fdn);
-  } else {
-    grpc_fd_shutdown(
-        fdn->fd, GRPC_ERROR_CREATE_FROM_STATIC_STRING("c-ares fd shutdown"));
-    gpr_mu_unlock(&fdn->mu);
+  ~GrpcPolledFdPosix() {
+    gpr_free(name_);
+    grpc_pollset_set_del_fd(driver_pollset_set_, fd_);
+    /* c-ares library will close the fd inside grpc_fd. This fd may be picked up
+       immediately by another thread, and should not be closed by the following
+       grpc_fd_orphan. */
+    int dummy_release_fd;
+    grpc_fd_orphan(fd_, nullptr, &dummy_release_fd, "c-ares query finished");
   }
-}
 
-grpc_error* grpc_ares_ev_driver_create(grpc_ares_ev_driver** ev_driver,
-                                       grpc_pollset_set* pollset_set) {
-  *ev_driver = static_cast<grpc_ares_ev_driver*>(
-      gpr_malloc(sizeof(grpc_ares_ev_driver)));
-  int status = ares_init(&(*ev_driver)->channel);
-  gpr_log(GPR_DEBUG, "grpc_ares_ev_driver_create");
-  if (status != ARES_SUCCESS) {
-    char* err_msg;
-    gpr_asprintf(&err_msg, "Failed to init ares channel. C-ares error: %s",
-                 ares_strerror(status));
-    grpc_error* err = GRPC_ERROR_CREATE_FROM_COPIED_STRING(err_msg);
-    gpr_free(err_msg);
-    gpr_free(*ev_driver);
-    return err;
+  void RegisterForOnReadableLocked(grpc_closure* read_closure) override {
+    grpc_fd_notify_on_read(fd_, read_closure);
   }
-  gpr_mu_init(&(*ev_driver)->mu);
-  gpr_ref_init(&(*ev_driver)->refs, 1);
-  (*ev_driver)->pollset_set = pollset_set;
-  (*ev_driver)->fds = nullptr;
-  (*ev_driver)->working = false;
-  (*ev_driver)->shutting_down = false;
-  return GRPC_ERROR_NONE;
-}
 
-void grpc_ares_ev_driver_destroy(grpc_ares_ev_driver* ev_driver) {
-  // It's not safe to shut down remaining fds here directly, becauses
-  // ares_host_callback does not provide an exec_ctx. We mark the event driver
-  // as being shut down. If the event driver is working,
-  // grpc_ares_notify_on_event_locked will shut down the fds; if it's not
-  // working, there are no fds to shut down.
-  gpr_mu_lock(&ev_driver->mu);
-  ev_driver->shutting_down = true;
-  gpr_mu_unlock(&ev_driver->mu);
-  grpc_ares_ev_driver_unref(ev_driver);
-}
-
-void grpc_ares_ev_driver_shutdown(grpc_ares_ev_driver* ev_driver) {
-  gpr_mu_lock(&ev_driver->mu);
-  ev_driver->shutting_down = true;
-  fd_node* fn = ev_driver->fds;
-  while (fn != nullptr) {
-    grpc_fd_shutdown(fn->fd, GRPC_ERROR_CREATE_FROM_STATIC_STRING(
-                                 "grpc_ares_ev_driver_shutdown"));
-    fn = fn->next;
+  void RegisterForOnWriteableLocked(grpc_closure* write_closure) override {
+    grpc_fd_notify_on_write(fd_, write_closure);
   }
-  gpr_mu_unlock(&ev_driver->mu);
-}
 
-// Search fd in the fd_node list head. This is an O(n) search, the max possible
-// value of n is ARES_GETSOCK_MAXNUM (16). n is typically 1 - 2 in our tests.
-static fd_node* pop_fd_node(fd_node** head, int fd) {
-  fd_node dummy_head;
-  dummy_head.next = *head;
-  fd_node* node = &dummy_head;
-  while (node->next != nullptr) {
-    if (grpc_fd_wrapped_fd(node->next->fd) == fd) {
-      fd_node* ret = node->next;
-      node->next = node->next->next;
-      *head = dummy_head.next;
-      return ret;
-    }
-    node = node->next;
+  bool IsFdStillReadableLocked() override {
+    size_t bytes_available = 0;
+    return ioctl(grpc_fd_wrapped_fd(fd_), FIONREAD, &bytes_available) == 0 &&
+           bytes_available > 0;
   }
-  return nullptr;
-}
 
-/* Check if \a fd is still readable */
-static bool grpc_ares_is_fd_still_readable(grpc_ares_ev_driver* ev_driver,
-                                           int fd) {
-  size_t bytes_available = 0;
-  return ioctl(fd, FIONREAD, &bytes_available) == 0 && bytes_available > 0;
-}
-
-static void on_readable_cb(void* arg, grpc_error* error) {
-  fd_node* fdn = static_cast<fd_node*>(arg);
-  grpc_ares_ev_driver* ev_driver = fdn->ev_driver;
-  gpr_mu_lock(&fdn->mu);
-  const int fd = grpc_fd_wrapped_fd(fdn->fd);
-  fdn->readable_registered = false;
-  if (fdn->shutting_down && !fdn->writable_registered) {
-    gpr_mu_unlock(&fdn->mu);
-    fd_node_destroy(fdn);
-    grpc_ares_ev_driver_unref(ev_driver);
-    return;
+  void ShutdownLocked(grpc_error* error) override {
+    grpc_fd_shutdown(fd_, error);
   }
-  gpr_mu_unlock(&fdn->mu);
 
-  gpr_log(GPR_DEBUG, "readable on %d", fd);
-  if (error == GRPC_ERROR_NONE) {
-    do {
-      ares_process_fd(ev_driver->channel, fd, ARES_SOCKET_BAD);
-    } while (grpc_ares_is_fd_still_readable(ev_driver, fd));
-  } else {
-    // If error is not GRPC_ERROR_NONE, it means the fd has been shutdown or
-    // timed out. The pending lookups made on this ev_driver will be cancelled
-    // by the following ares_cancel() and the on_done callbacks will be invoked
-    // with a status of ARES_ECANCELLED. The remaining file descriptors in this
-    // ev_driver will be cleaned up in the follwing
-    // grpc_ares_notify_on_event_locked().
-    ares_cancel(ev_driver->channel);
-  }
-  gpr_mu_lock(&ev_driver->mu);
-  grpc_ares_notify_on_event_locked(ev_driver);
-  gpr_mu_unlock(&ev_driver->mu);
-  grpc_ares_ev_driver_unref(ev_driver);
-}
+  ares_socket_t GetWrappedAresSocketLocked() override { return as_; }
 
-static void on_writable_cb(void* arg, grpc_error* error) {
-  fd_node* fdn = static_cast<fd_node*>(arg);
-  grpc_ares_ev_driver* ev_driver = fdn->ev_driver;
-  gpr_mu_lock(&fdn->mu);
-  const int fd = grpc_fd_wrapped_fd(fdn->fd);
-  fdn->writable_registered = false;
-  if (fdn->shutting_down && !fdn->readable_registered) {
-    gpr_mu_unlock(&fdn->mu);
-    fd_node_destroy(fdn);
-    grpc_ares_ev_driver_unref(ev_driver);
-    return;
-  }
-  gpr_mu_unlock(&fdn->mu);
+  const char* GetName() override { return name_; }
+
+  char* name_;
+  ares_socket_t as_;
+  grpc_fd* fd_;
+  grpc_pollset_set* driver_pollset_set_;
+};
 
-  gpr_log(GPR_DEBUG, "writable on %d", fd);
-  if (error == GRPC_ERROR_NONE) {
-    ares_process_fd(ev_driver->channel, ARES_SOCKET_BAD, fd);
-  } else {
-    // If error is not GRPC_ERROR_NONE, it means the fd has been shutdown or
-    // timed out. The pending lookups made on this ev_driver will be cancelled
-    // by the following ares_cancel() and the on_done callbacks will be invoked
-    // with a status of ARES_ECANCELLED. The remaining file descriptors in this
-    // ev_driver will be cleaned up in the follwing
-    // grpc_ares_notify_on_event_locked().
-    ares_cancel(ev_driver->channel);
+class GrpcPolledFdFactoryPosix : public GrpcPolledFdFactory {
+ public:
+  GrpcPolledFd* NewGrpcPolledFdLocked(ares_socket_t as,
+                                      grpc_pollset_set* driver_pollset_set,
+                                      grpc_combiner* combiner) override {
+    return New<GrpcPolledFdPosix>(as, driver_pollset_set);
   }
-  gpr_mu_lock(&ev_driver->mu);
-  grpc_ares_notify_on_event_locked(ev_driver);
-  gpr_mu_unlock(&ev_driver->mu);
-  grpc_ares_ev_driver_unref(ev_driver);
-}
 
-ares_channel* grpc_ares_ev_driver_get_channel(grpc_ares_ev_driver* ev_driver) {
-  return &ev_driver->channel;
-}
+  void ConfigureAresChannelLocked(ares_channel channel) override {}
+};
 
-// Get the file descriptors used by the ev_driver's ares channel, register
-// driver_closure with these filedescriptors.
-static void grpc_ares_notify_on_event_locked(grpc_ares_ev_driver* ev_driver) {
-  fd_node* new_list = nullptr;
-  if (!ev_driver->shutting_down) {
-    ares_socket_t socks[ARES_GETSOCK_MAXNUM];
-    int socks_bitmask =
-        ares_getsock(ev_driver->channel, socks, ARES_GETSOCK_MAXNUM);
-    for (size_t i = 0; i < ARES_GETSOCK_MAXNUM; i++) {
-      if (ARES_GETSOCK_READABLE(socks_bitmask, i) ||
-          ARES_GETSOCK_WRITABLE(socks_bitmask, i)) {
-        fd_node* fdn = pop_fd_node(&ev_driver->fds, socks[i]);
-        // Create a new fd_node if sock[i] is not in the fd_node list.
-        if (fdn == nullptr) {
-          char* fd_name;
-          gpr_asprintf(&fd_name, "ares_ev_driver-%" PRIuPTR, i);
-          fdn = static_cast<fd_node*>(gpr_malloc(sizeof(fd_node)));
-          gpr_log(GPR_DEBUG, "new fd: %d", socks[i]);
-          fdn->fd = grpc_fd_create(socks[i], fd_name);
-          fdn->ev_driver = ev_driver;
-          fdn->readable_registered = false;
-          fdn->writable_registered = false;
-          fdn->shutting_down = false;
-          gpr_mu_init(&fdn->mu);
-          GRPC_CLOSURE_INIT(&fdn->read_closure, on_readable_cb, fdn,
-                            grpc_schedule_on_exec_ctx);
-          GRPC_CLOSURE_INIT(&fdn->write_closure, on_writable_cb, fdn,
-                            grpc_schedule_on_exec_ctx);
-          grpc_pollset_set_add_fd(ev_driver->pollset_set, fdn->fd);
-          gpr_free(fd_name);
-        }
-        fdn->next = new_list;
-        new_list = fdn;
-        gpr_mu_lock(&fdn->mu);
-        // Register read_closure if the socket is readable and read_closure has
-        // not been registered with this socket.
-        if (ARES_GETSOCK_READABLE(socks_bitmask, i) &&
-            !fdn->readable_registered) {
-          grpc_ares_ev_driver_ref(ev_driver);
-          gpr_log(GPR_DEBUG, "notify read on: %d", grpc_fd_wrapped_fd(fdn->fd));
-          grpc_fd_notify_on_read(fdn->fd, &fdn->read_closure);
-          fdn->readable_registered = true;
-        }
-        // Register write_closure if the socket is writable and write_closure
-        // has not been registered with this socket.
-        if (ARES_GETSOCK_WRITABLE(socks_bitmask, i) &&
-            !fdn->writable_registered) {
-          gpr_log(GPR_DEBUG, "notify write on: %d",
-                  grpc_fd_wrapped_fd(fdn->fd));
-          grpc_ares_ev_driver_ref(ev_driver);
-          grpc_fd_notify_on_write(fdn->fd, &fdn->write_closure);
-          fdn->writable_registered = true;
-        }
-        gpr_mu_unlock(&fdn->mu);
-      }
-    }
-  }
-  // Any remaining fds in ev_driver->fds were not returned by ares_getsock() and
-  // are therefore no longer in use, so they can be shut down and removed from
-  // the list.
-  while (ev_driver->fds != nullptr) {
-    fd_node* cur = ev_driver->fds;
-    ev_driver->fds = ev_driver->fds->next;
-    fd_node_shutdown(cur);
-  }
-  ev_driver->fds = new_list;
-  // If the ev driver has no working fd, all the tasks are done.
-  if (new_list == nullptr) {
-    ev_driver->working = false;
-    gpr_log(GPR_DEBUG, "ev driver stop working");
-  }
+UniquePtr<GrpcPolledFdFactory> NewGrpcPolledFdFactory(grpc_combiner* combiner) {
+  return UniquePtr<GrpcPolledFdFactory>(New<GrpcPolledFdFactoryPosix>());
 }
 
-void grpc_ares_ev_driver_start(grpc_ares_ev_driver* ev_driver) {
-  gpr_mu_lock(&ev_driver->mu);
-  if (!ev_driver->working) {
-    ev_driver->working = true;
-    grpc_ares_notify_on_event_locked(ev_driver);
-  }
-  gpr_mu_unlock(&ev_driver->mu);
-}
+}  // namespace grpc_core
 
-#endif /* GRPC_ARES == 1 && defined(GRPC_POSIX_SOCKET) */
+#endif /* GRPC_ARES == 1 && defined(GRPC_POSIX_SOCKET_ARES_EV_DRIVER) */
diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_windows.cc b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_windows.cc
new file mode 100644
index 0000000000..02121aa0ab
--- /dev/null
+++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_windows.cc
@@ -0,0 +1,537 @@
+/*
+ *
+ * Copyright 2016 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+#include <grpc/support/port_platform.h>
+
+#include "src/core/lib/iomgr/port.h"
+#if GRPC_ARES == 1 && defined(GPR_WINDOWS)
+
+#include <ares.h>
+
+#include <grpc/support/alloc.h>
+#include <grpc/support/log.h>
+#include <grpc/support/log_windows.h>
+#include <grpc/support/string_util.h>
+#include <grpc/support/time.h>
+#include <string.h>
+#include "src/core/lib/gpr/string.h"
+#include "src/core/lib/gprpp/memory.h"
+#include "src/core/lib/iomgr/combiner.h"
+#include "src/core/lib/iomgr/socket_windows.h"
+#include "src/core/lib/iomgr/tcp_windows.h"
+#include "src/core/lib/slice/slice_internal.h"
+
+#include "src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver.h"
+#include "src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h"
+
+/* TODO(apolcyn): remove this hack after fixing upstream.
+ * Our grpc/c-ares code on Windows uses the ares_set_socket_functions API,
+ * which uses "struct iovec" type, which on Windows is defined inside of
+ * a c-ares header that is not public.
+ * See https://github.com/c-ares/c-ares/issues/206. */
+struct iovec {
+  void* iov_base;
+  size_t iov_len;
+};
+
+namespace grpc_core {
+
+/* c-ares creates its own sockets and is meant to read them when readable and
+ * write them when writeable. To fit this socket usage model into the grpc
+ * windows poller (which gives notifications when attempted reads and writes are
+ * actually fulfilled rather than possible), this GrpcPolledFdWindows class
+ * takes advantage of the ares_set_socket_functions API and acts as a virtual
+ * socket. It holds its own read and write buffers which are written to and read
+ * from c-ares and are used with the grpc windows poller, and it, e.g.,
+ * manufactures virtual socket error codes when it e.g. needs to tell the c-ares
+ * library to wait for an async read. */
+class GrpcPolledFdWindows : public GrpcPolledFd {
+ public:
+  enum WriteState {
+    WRITE_IDLE,
+    WRITE_REQUESTED,
+    WRITE_PENDING,
+    WRITE_WAITING_FOR_VERIFICATION_UPON_RETRY,
+  };
+
+  GrpcPolledFdWindows(ares_socket_t as, grpc_combiner* combiner)
+      : read_buf_(grpc_empty_slice()),
+        write_buf_(grpc_empty_slice()),
+        write_state_(WRITE_IDLE),
+        gotten_into_driver_list_(false) {
+    gpr_asprintf(&name_, "c-ares socket: %" PRIdPTR, as);
+    winsocket_ = grpc_winsocket_create(as, name_);
+    combiner_ = GRPC_COMBINER_REF(combiner, name_);
+    GRPC_CLOSURE_INIT(&outer_read_closure_,
+                      &GrpcPolledFdWindows::OnIocpReadable, this,
+                      grpc_combiner_scheduler(combiner_));
+    GRPC_CLOSURE_INIT(&outer_write_closure_,
+                      &GrpcPolledFdWindows::OnIocpWriteable, this,
+                      grpc_combiner_scheduler(combiner_));
+  }
+
+  ~GrpcPolledFdWindows() {
+    GRPC_COMBINER_UNREF(combiner_, name_);
+    grpc_slice_unref_internal(read_buf_);
+    grpc_slice_unref_internal(write_buf_);
+    GPR_ASSERT(read_closure_ == nullptr);
+    GPR_ASSERT(write_closure_ == nullptr);
+    grpc_winsocket_destroy(winsocket_);
+    gpr_free(name_);
+  }
+
+  void ScheduleAndNullReadClosure(grpc_error* error) {
+    GRPC_CLOSURE_SCHED(read_closure_, error);
+    read_closure_ = nullptr;
+  }
+
+  void ScheduleAndNullWriteClosure(grpc_error* error) {
+    GRPC_CLOSURE_SCHED(write_closure_, error);
+    write_closure_ = nullptr;
+  }
+
+  void RegisterForOnReadableLocked(grpc_closure* read_closure) override {
+    GPR_ASSERT(read_closure_ == nullptr);
+    read_closure_ = read_closure;
+    GPR_ASSERT(GRPC_SLICE_LENGTH(read_buf_) == 0);
+    grpc_slice_unref_internal(read_buf_);
+    read_buf_ = GRPC_SLICE_MALLOC(4192);
+    WSABUF buffer;
+    buffer.buf = (char*)GRPC_SLICE_START_PTR(read_buf_);
+    buffer.len = GRPC_SLICE_LENGTH(read_buf_);
+    memset(&winsocket_->read_info.overlapped, 0, sizeof(OVERLAPPED));
+    recv_from_source_addr_len_ = sizeof(recv_from_source_addr_);
+    DWORD flags = 0;
+    if (WSARecvFrom(grpc_winsocket_wrapped_socket(winsocket_), &buffer, 1,
+                    nullptr, &flags, (sockaddr*)recv_from_source_addr_,
+                    &recv_from_source_addr_len_,
+                    &winsocket_->read_info.overlapped, nullptr)) {
+      char* msg = gpr_format_message(WSAGetLastError());
+      grpc_error* error = GRPC_ERROR_CREATE_FROM_COPIED_STRING(msg);
+      GRPC_CARES_TRACE_LOG(
+          "RegisterForOnReadableLocked: WSARecvFrom error:|%s|. fd:|%s|", msg,
+          GetName());
+      gpr_free(msg);
+      if (WSAGetLastError() != WSA_IO_PENDING) {
+        ScheduleAndNullReadClosure(error);
+        return;
+      }
+    }
+    grpc_socket_notify_on_read(winsocket_, &outer_read_closure_);
+  }
+
+  void RegisterForOnWriteableLocked(grpc_closure* write_closure) override {
+    GRPC_CARES_TRACE_LOG(
+        "RegisterForOnWriteableLocked. fd:|%s|. Current write state: %d",
+        GetName(), write_state_);
+    GPR_ASSERT(write_closure_ == nullptr);
+    write_closure_ = write_closure;
+    switch (write_state_) {
+      case WRITE_IDLE:
+        ScheduleAndNullWriteClosure(GRPC_ERROR_NONE);
+        break;
+      case WRITE_REQUESTED:
+        write_state_ = WRITE_PENDING;
+        SendWriteBuf(nullptr, &winsocket_->write_info.overlapped);
+        grpc_socket_notify_on_write(winsocket_, &outer_write_closure_);
+        break;
+      case WRITE_PENDING:
+      case WRITE_WAITING_FOR_VERIFICATION_UPON_RETRY:
+        abort();
+    }
+  }
+
+  bool IsFdStillReadableLocked() override {
+    return GRPC_SLICE_LENGTH(read_buf_) > 0;
+  }
+
+  void ShutdownLocked(grpc_error* error) override {
+    grpc_winsocket_shutdown(winsocket_);
+  }
+
+  ares_socket_t GetWrappedAresSocketLocked() override {
+    return grpc_winsocket_wrapped_socket(winsocket_);
+  }
+
+  const char* GetName() override { return name_; }
+
+  ares_ssize_t RecvFrom(void* data, ares_socket_t data_len, int flags,
+                        struct sockaddr* from, ares_socklen_t* from_len) {
+    GRPC_CARES_TRACE_LOG(
+        "RecvFrom called on fd:|%s|. Current read buf length:|%d|", GetName(),
+        GRPC_SLICE_LENGTH(read_buf_));
+    if (GRPC_SLICE_LENGTH(read_buf_) == 0) {
+      WSASetLastError(WSAEWOULDBLOCK);
+      return -1;
+    }
+    ares_ssize_t bytes_read = 0;
+    for (size_t i = 0; i < GRPC_SLICE_LENGTH(read_buf_) && i < data_len; i++) {
+      ((char*)data)[i] = GRPC_SLICE_START_PTR(read_buf_)[i];
+      bytes_read++;
+    }
+    read_buf_ = grpc_slice_sub_no_ref(read_buf_, bytes_read,
+                                      GRPC_SLICE_LENGTH(read_buf_));
+    /* c-ares overloads this recv_from virtual socket function to receive
+     * data on both UDP and TCP sockets, and from is nullptr for TCP. */
+    if (from != nullptr) {
+      GPR_ASSERT(*from_len <= recv_from_source_addr_len_);
+      memcpy(from, &recv_from_source_addr_, recv_from_source_addr_len_);
+      *from_len = recv_from_source_addr_len_;
+    }
+    return bytes_read;
+  }
+
+  grpc_slice FlattenIovec(const struct iovec* iov, int iov_count) {
+    int total = 0;
+    for (int i = 0; i < iov_count; i++) {
+      total += iov[i].iov_len;
+    }
+    grpc_slice out = GRPC_SLICE_MALLOC(total);
+    size_t cur = 0;
+    for (int i = 0; i < iov_count; i++) {
+      for (int k = 0; k < iov[i].iov_len; k++) {
+        GRPC_SLICE_START_PTR(out)[cur++] = ((char*)iov[i].iov_base)[k];
+      }
+    }
+    return out;
+  }
+
+  int SendWriteBuf(LPDWORD bytes_sent_ptr, LPWSAOVERLAPPED overlapped) {
+    WSABUF buf;
+    buf.len = GRPC_SLICE_LENGTH(write_buf_);
+    buf.buf = (char*)GRPC_SLICE_START_PTR(write_buf_);
+    DWORD flags = 0;
+    int out = WSASend(grpc_winsocket_wrapped_socket(winsocket_), &buf, 1,
+                      bytes_sent_ptr, flags, overlapped, nullptr);
+    GRPC_CARES_TRACE_LOG(
+        "WSASend: name:%s. buf len:%d. bytes sent: %d. overlapped %p. return "
+        "val: %d",
+        GetName(), buf.len, *bytes_sent_ptr, overlapped, out);
+    return out;
+  }
+
+  ares_ssize_t TrySendWriteBufSyncNonBlocking() {
+    GPR_ASSERT(write_state_ == WRITE_IDLE);
+    ares_ssize_t total_sent;
+    DWORD bytes_sent = 0;
+    if (SendWriteBuf(&bytes_sent, nullptr) != 0) {
+      char* msg = gpr_format_message(WSAGetLastError());
+      GRPC_CARES_TRACE_LOG(
+          "TrySendWriteBufSyncNonBlocking: SendWriteBuf error:|%s|. fd:|%s|",
+          msg, GetName());
+      gpr_free(msg);
+      if (WSAGetLastError() == WSA_IO_PENDING) {
+        WSASetLastError(WSAEWOULDBLOCK);
+        write_state_ = WRITE_REQUESTED;
+      }
+    }
+    write_buf_ = grpc_slice_sub_no_ref(write_buf_, bytes_sent,
+                                       GRPC_SLICE_LENGTH(write_buf_));
+    return bytes_sent;
+  }
+
+  ares_ssize_t SendV(const struct iovec* iov, int iov_count) {
+    GRPC_CARES_TRACE_LOG("SendV called on fd:|%s|. Current write state: %d",
+                         GetName(), write_state_);
+    switch (write_state_) {
+      case WRITE_IDLE:
+        GPR_ASSERT(GRPC_SLICE_LENGTH(write_buf_) == 0);
+        grpc_slice_unref_internal(write_buf_);
+        write_buf_ = FlattenIovec(iov, iov_count);
+        return TrySendWriteBufSyncNonBlocking();
+      case WRITE_REQUESTED:
+      case WRITE_PENDING:
+        WSASetLastError(WSAEWOULDBLOCK);
+        return -1;
+      case WRITE_WAITING_FOR_VERIFICATION_UPON_RETRY:
+        grpc_slice currently_attempted = FlattenIovec(iov, iov_count);
+        GPR_ASSERT(GRPC_SLICE_LENGTH(currently_attempted) >=
+                   GRPC_SLICE_LENGTH(write_buf_));
+        ares_ssize_t total_sent = 0;
+        for (size_t i = 0; i < GRPC_SLICE_LENGTH(write_buf_); i++) {
+          GPR_ASSERT(GRPC_SLICE_START_PTR(currently_attempted)[i] ==
+                     GRPC_SLICE_START_PTR(write_buf_)[i]);
+          total_sent++;
+        }
+        grpc_slice_unref_internal(write_buf_);
+        write_buf_ =
+            grpc_slice_sub_no_ref(currently_attempted, total_sent,
+                                  GRPC_SLICE_LENGTH(currently_attempted));
+        write_state_ = WRITE_IDLE;
+        total_sent += TrySendWriteBufSyncNonBlocking();
+        return total_sent;
+    }
+    abort();
+  }
+
+  int Connect(const struct sockaddr* target, ares_socklen_t target_len) {
+    SOCKET s = grpc_winsocket_wrapped_socket(winsocket_);
+    GRPC_CARES_TRACE_LOG("Connect: fd:|%s|", GetName());
+    int out =
+        WSAConnect(s, target, target_len, nullptr, nullptr, nullptr, nullptr);
+    if (out != 0) {
+      char* msg = gpr_format_message(WSAGetLastError());
+      GRPC_CARES_TRACE_LOG("Connect error code:|%d|, msg:|%s|. fd:|%s|",
+                           WSAGetLastError(), msg, GetName());
+      gpr_free(msg);
+      // c-ares expects a posix-style connect API
+      out = -1;
+    }
+    return out;
+  }
+
+  static void OnIocpReadable(void* arg, grpc_error* error) {
+    GrpcPolledFdWindows* polled_fd = static_cast<GrpcPolledFdWindows*>(arg);
+    polled_fd->OnIocpReadableInner(error);
+  }
+
+  void OnIocpReadableInner(grpc_error* error) {
+    if (error == GRPC_ERROR_NONE) {
+      if (winsocket_->read_info.wsa_error != 0) {
+        /* WSAEMSGSIZE would be due to receiving more data
+         * than our read buffer's fixed capacity. Assume that
+         * the connection is TCP and read the leftovers
+         * in subsequent c-ares reads. */
+        if (winsocket_->read_info.wsa_error != WSAEMSGSIZE) {
+          GRPC_ERROR_UNREF(error);
+          char* msg = gpr_format_message(winsocket_->read_info.wsa_error);
+          GRPC_CARES_TRACE_LOG(
+              "OnIocpReadableInner. winsocket error:|%s|. fd:|%s|", msg,
+              GetName());
+          error = GRPC_ERROR_CREATE_FROM_COPIED_STRING(msg);
+          gpr_free(msg);
+        }
+      }
+    }
+    if (error == GRPC_ERROR_NONE) {
+      read_buf_ = grpc_slice_sub_no_ref(read_buf_, 0,
+                                        winsocket_->read_info.bytes_transfered);
+    } else {
+      grpc_slice_unref_internal(read_buf_);
+      read_buf_ = grpc_empty_slice();
+    }
+    GRPC_CARES_TRACE_LOG(
+        "OnIocpReadable finishing. read buf length now:|%d|. :fd:|%s|",
+        GRPC_SLICE_LENGTH(read_buf_), GetName());
+    ScheduleAndNullReadClosure(error);
+  }
+
+  static void OnIocpWriteable(void* arg, grpc_error* error) {
+    GrpcPolledFdWindows* polled_fd = static_cast<GrpcPolledFdWindows*>(arg);
+    polled_fd->OnIocpWriteableInner(error);
+  }
+
+  void OnIocpWriteableInner(grpc_error* error) {
+    GRPC_CARES_TRACE_LOG("OnIocpWriteableInner. fd:|%s|", GetName());
+    if (error == GRPC_ERROR_NONE) {
+      if (winsocket_->write_info.wsa_error != 0) {
+        char* msg = gpr_format_message(winsocket_->write_info.wsa_error);
+        GRPC_CARES_TRACE_LOG(
+            "OnIocpWriteableInner. winsocket error:|%s|. fd:|%s|", msg,
+            GetName());
+        GRPC_ERROR_UNREF(error);
+        error = GRPC_ERROR_CREATE_FROM_COPIED_STRING(msg);
+        gpr_free(msg);
+      }
+    }
+    GPR_ASSERT(write_state_ == WRITE_PENDING);
+    if (error == GRPC_ERROR_NONE) {
+      write_state_ = WRITE_WAITING_FOR_VERIFICATION_UPON_RETRY;
+      write_buf_ = grpc_slice_sub_no_ref(
+          write_buf_, 0, winsocket_->write_info.bytes_transfered);
+    } else {
+      grpc_slice_unref_internal(write_buf_);
+      write_buf_ = grpc_empty_slice();
+    }
+    ScheduleAndNullWriteClosure(error);
+  }
+
+  bool gotten_into_driver_list() const { return gotten_into_driver_list_; }
+  void set_gotten_into_driver_list() { gotten_into_driver_list_ = true; }
+
+  grpc_combiner* combiner_;
+  char recv_from_source_addr_[200];
+  ares_socklen_t recv_from_source_addr_len_;
+  grpc_slice read_buf_;
+  grpc_slice write_buf_;
+  grpc_closure* read_closure_ = nullptr;
+  grpc_closure* write_closure_ = nullptr;
+  grpc_closure outer_read_closure_;
+  grpc_closure outer_write_closure_;
+  grpc_winsocket* winsocket_;
+  WriteState write_state_;
+  char* name_ = nullptr;
+  bool gotten_into_driver_list_;
+};
+
+struct SockToPolledFdEntry {
+  SockToPolledFdEntry(SOCKET s, GrpcPolledFdWindows* fd)
+      : socket(s), polled_fd(fd) {}
+  SOCKET socket;
+  GrpcPolledFdWindows* polled_fd;
+  SockToPolledFdEntry* next = nullptr;
+};
+
+/* A SockToPolledFdMap can make ares_socket_t types (SOCKET's on windows)
+ * to GrpcPolledFdWindow's, and is used to find the appropriate
+ * GrpcPolledFdWindows to handle a virtual socket call when c-ares makes that
+ * socket call on the ares_socket_t type. Instances are owned by and one-to-one
+ * with a GrpcPolledFdWindows factory and event driver */
+class SockToPolledFdMap {
+ public:
+  SockToPolledFdMap(grpc_combiner* combiner) {
+    combiner_ = GRPC_COMBINER_REF(combiner, "sock to polled fd map");
+  }
+
+  ~SockToPolledFdMap() {
+    GPR_ASSERT(head_ == nullptr);
+    GRPC_COMBINER_UNREF(combiner_, "sock to polled fd map");
+  }
+
+  void AddNewSocket(SOCKET s, GrpcPolledFdWindows* polled_fd) {
+    SockToPolledFdEntry* new_node = New<SockToPolledFdEntry>(s, polled_fd);
+    new_node->next = head_;
+    head_ = new_node;
+  }
+
+  GrpcPolledFdWindows* LookupPolledFd(SOCKET s) {
+    for (SockToPolledFdEntry* node = head_; node != nullptr;
+         node = node->next) {
+      if (node->socket == s) {
+        GPR_ASSERT(node->polled_fd != nullptr);
+        return node->polled_fd;
+      }
+    }
+    abort();
+  }
+
+  void RemoveEntry(SOCKET s) {
+    GPR_ASSERT(head_ != nullptr);
+    SockToPolledFdEntry** prev = &head_;
+    for (SockToPolledFdEntry* node = head_; node != nullptr;
+         node = node->next) {
+      if (node->socket == s) {
+        *prev = node->next;
+        Delete(node);
+        return;
+      }
+      prev = &node->next;
+    }
+    abort();
+  }
+
+  /* These virtual socket functions are called from within the c-ares
+   * library. These methods generally dispatch those socket calls to the
+   * appropriate methods. The virtual "socket" and "close" methods are
+   * special and instead create/add and remove/destroy GrpcPolledFdWindows
+   * objects.
+   */
+  static ares_socket_t Socket(int af, int type, int protocol, void* user_data) {
+    SockToPolledFdMap* map = static_cast<SockToPolledFdMap*>(user_data);
+    SOCKET s = WSASocket(af, type, protocol, nullptr, 0, WSA_FLAG_OVERLAPPED);
+    if (s == INVALID_SOCKET) {
+      return s;
+    }
+    grpc_tcp_set_non_block(s);
+    GrpcPolledFdWindows* polled_fd =
+        New<GrpcPolledFdWindows>(s, map->combiner_);
+    map->AddNewSocket(s, polled_fd);
+    return s;
+  }
+
+  static int Connect(ares_socket_t as, const struct sockaddr* target,
+                     ares_socklen_t target_len, void* user_data) {
+    SockToPolledFdMap* map = static_cast<SockToPolledFdMap*>(user_data);
+    GrpcPolledFdWindows* polled_fd = map->LookupPolledFd(as);
+    return polled_fd->Connect(target, target_len);
+  }
+
+  static ares_ssize_t SendV(ares_socket_t as, const struct iovec* iov,
+                            int iovec_count, void* user_data) {
+    SockToPolledFdMap* map = static_cast<SockToPolledFdMap*>(user_data);
+    GrpcPolledFdWindows* polled_fd = map->LookupPolledFd(as);
+    return polled_fd->SendV(iov, iovec_count);
+  }
+
+  static ares_ssize_t RecvFrom(ares_socket_t as, void* data, size_t data_len,
+                               int flags, struct sockaddr* from,
+                               ares_socklen_t* from_len, void* user_data) {
+    SockToPolledFdMap* map = static_cast<SockToPolledFdMap*>(user_data);
+    GrpcPolledFdWindows* polled_fd = map->LookupPolledFd(as);
+    return polled_fd->RecvFrom(data, data_len, flags, from, from_len);
+  }
+
+  static int CloseSocket(SOCKET s, void* user_data) {
+    SockToPolledFdMap* map = static_cast<SockToPolledFdMap*>(user_data);
+    GrpcPolledFdWindows* polled_fd = map->LookupPolledFd(s);
+    map->RemoveEntry(s);
+    // If a gRPC polled fd has not made it in to the driver's list yet, then
+    // the driver has not and will never see this socket.
+    if (!polled_fd->gotten_into_driver_list()) {
+      polled_fd->ShutdownLocked(GRPC_ERROR_CREATE_FROM_STATIC_STRING(
+          "Shut down c-ares fd before without it ever having made it into the "
+          "driver's list"));
+      return 0;
+    }
+    return 0;
+  }
+
+ private:
+  SockToPolledFdEntry* head_ = nullptr;
+  grpc_combiner* combiner_;
+};
+
+const struct ares_socket_functions custom_ares_sock_funcs = {
+    &SockToPolledFdMap::Socket /* socket */,
+    &SockToPolledFdMap::CloseSocket /* close */,
+    &SockToPolledFdMap::Connect /* connect */,
+    &SockToPolledFdMap::RecvFrom /* recvfrom */,
+    &SockToPolledFdMap::SendV /* sendv */,
+};
+
+class GrpcPolledFdFactoryWindows : public GrpcPolledFdFactory {
+ public:
+  GrpcPolledFdFactoryWindows(grpc_combiner* combiner)
+      : sock_to_polled_fd_map_(combiner) {}
+
+  GrpcPolledFd* NewGrpcPolledFdLocked(ares_socket_t as,
+                                      grpc_pollset_set* driver_pollset_set,
+                                      grpc_combiner* combiner) override {
+    GrpcPolledFdWindows* polled_fd = sock_to_polled_fd_map_.LookupPolledFd(as);
+    // Set a flag so that the virtual socket "close" method knows it
+    // doesn't need to call ShutdownLocked, since now the driver will.
+    polled_fd->set_gotten_into_driver_list();
+    return polled_fd;
+  }
+
+  void ConfigureAresChannelLocked(ares_channel channel) override {
+    ares_set_socket_functions(channel, &custom_ares_sock_funcs,
+                              &sock_to_polled_fd_map_);
+  }
+
+ private:
+  SockToPolledFdMap sock_to_polled_fd_map_;
+};
+
+UniquePtr<GrpcPolledFdFactory> NewGrpcPolledFdFactory(grpc_combiner* combiner) {
+  return UniquePtr<GrpcPolledFdFactory>(
+      New<GrpcPolledFdFactoryWindows>(combiner));
+}
+
+}  // namespace grpc_core
+
+#endif /* GRPC_ARES == 1 && defined(GPR_WINDOWS) */
diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc
index 18d0a7b9f6..4c795c34c8 100644
--- a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc
+++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc
@@ -22,7 +22,6 @@
 
 #include "src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h"
 #include "src/core/lib/iomgr/sockaddr.h"
-#include "src/core/lib/iomgr/socket_utils_posix.h"
 
 #include <string.h>
 #include <sys/types.h>
@@ -50,6 +49,8 @@ static gpr_mu g_init_mu;
 grpc_core::TraceFlag grpc_trace_cares_address_sorting(false,
                                                       "cares_address_sorting");
 
+grpc_core::TraceFlag grpc_trace_cares_resolver(false, "cares_resolver");
+
 struct grpc_ares_request {
   /** indicates the DNS server to use, if specified */
   struct ares_addr_port_node dns_server_addr;
@@ -63,10 +64,8 @@ struct grpc_ares_request {
   /** the evernt driver used by this request */
   grpc_ares_ev_driver* ev_driver;
   /** number of ongoing queries */
-  gpr_refcount pending_queries;
+  size_t pending_queries;
 
-  /** mutex guarding the rest of the state */
-  gpr_mu mu;
   /** is there at least one successful query, set in on_done_cb */
   bool success;
   /** the errors explaining the request failure, set in on_done_cb */
@@ -74,7 +73,8 @@ struct grpc_ares_request {
 };
 
 typedef struct grpc_ares_hostbyname_request {
-  /** following members are set in create_hostbyname_request */
+  /** following members are set in create_hostbyname_request_locked
+   */
   /** the top-level request instance */
   grpc_ares_request* parent_request;
   /** host to resolve, parsed from the name to resolve */
@@ -87,19 +87,6 @@ typedef struct grpc_ares_hostbyname_request {
 
 static void do_basic_init(void) { gpr_mu_init(&g_init_mu); }
 
-static uint16_t strhtons(const char* port) {
-  if (strcmp(port, "http") == 0) {
-    return htons(80);
-  } else if (strcmp(port, "https") == 0) {
-    return htons(443);
-  }
-  return htons(static_cast<unsigned short>(atoi(port)));
-}
-
-static void grpc_ares_request_ref(grpc_ares_request* r) {
-  gpr_ref(&r->pending_queries);
-}
-
 static void log_address_sorting_list(grpc_lb_addresses* lb_addrs,
                                      const char* input_output_str) {
   for (size_t i = 0; i < lb_addrs->num_addresses; i++) {
@@ -143,28 +130,29 @@ void grpc_cares_wrapper_address_sorting_sort(grpc_lb_addresses* lb_addrs) {
   }
 }
 
-/* Allow tests to access grpc_ares_wrapper_address_sorting_sort */
-void grpc_cares_wrapper_test_only_address_sorting_sort(
-    grpc_lb_addresses* lb_addrs) {
-  grpc_cares_wrapper_address_sorting_sort(lb_addrs);
+static void grpc_ares_request_ref_locked(grpc_ares_request* r) {
+  r->pending_queries++;
 }
 
-static void grpc_ares_request_unref(grpc_ares_request* r) {
-  /* If there are no pending queries, invoke on_done callback and destroy the
+static void grpc_ares_request_unref_locked(grpc_ares_request* r) {
+  r->pending_queries--;
+  if (r->pending_queries == 0u) {
+    grpc_ares_ev_driver_on_queries_complete_locked(r->ev_driver);
+  }
+}
+
+void grpc_ares_complete_request_locked(grpc_ares_request* r) {
+  /* Invoke on_done callback and destroy the
      request */
-  if (gpr_unref(&r->pending_queries)) {
-    grpc_lb_addresses* lb_addrs = *(r->lb_addrs_out);
-    if (lb_addrs != nullptr) {
-      grpc_cares_wrapper_address_sorting_sort(lb_addrs);
-    }
-    GRPC_CLOSURE_SCHED(r->on_done, r->error);
-    gpr_mu_destroy(&r->mu);
-    grpc_ares_ev_driver_destroy(r->ev_driver);
-    gpr_free(r);
+  grpc_lb_addresses* lb_addrs = *(r->lb_addrs_out);
+  if (lb_addrs != nullptr) {
+    grpc_cares_wrapper_address_sorting_sort(lb_addrs);
   }
+  GRPC_CLOSURE_SCHED(r->on_done, r->error);
+  gpr_free(r);
 }
 
-static grpc_ares_hostbyname_request* create_hostbyname_request(
+static grpc_ares_hostbyname_request* create_hostbyname_request_locked(
     grpc_ares_request* parent_request, char* host, uint16_t port,
     bool is_balancer) {
   grpc_ares_hostbyname_request* hr = static_cast<grpc_ares_hostbyname_request*>(
@@ -173,22 +161,22 @@ static grpc_ares_hostbyname_request* create_hostbyname_request(
   hr->host = gpr_strdup(host);
   hr->port = port;
   hr->is_balancer = is_balancer;
-  grpc_ares_request_ref(parent_request);
+  grpc_ares_request_ref_locked(parent_request);
   return hr;
 }
 
-static void destroy_hostbyname_request(grpc_ares_hostbyname_request* hr) {
-  grpc_ares_request_unref(hr->parent_request);
+static void destroy_hostbyname_request_locked(
+    grpc_ares_hostbyname_request* hr) {
+  grpc_ares_request_unref_locked(hr->parent_request);
   gpr_free(hr->host);
   gpr_free(hr);
 }
 
-static void on_hostbyname_done_cb(void* arg, int status, int timeouts,
-                                  struct hostent* hostent) {
+static void on_hostbyname_done_locked(void* arg, int status, int timeouts,
+                                      struct hostent* hostent) {
   grpc_ares_hostbyname_request* hr =
       static_cast<grpc_ares_hostbyname_request*>(arg);
   grpc_ares_request* r = hr->parent_request;
-  gpr_mu_lock(&r->mu);
   if (status == ARES_SUCCESS) {
     GRPC_ERROR_UNREF(r->error);
     r->error = GRPC_ERROR_NONE;
@@ -213,7 +201,7 @@ static void on_hostbyname_done_cb(void* arg, int status, int timeouts,
           memset(&addr, 0, addr_len);
           memcpy(&addr.sin6_addr, hostent->h_addr_list[i - prev_naddr],
                  sizeof(struct in6_addr));
-          addr.sin6_family = static_cast<sa_family_t>(hostent->h_addrtype);
+          addr.sin6_family = static_cast<unsigned char>(hostent->h_addrtype);
           addr.sin6_port = hr->port;
           grpc_lb_addresses_set_address(
               *lb_addresses, i, &addr, addr_len,
@@ -234,7 +222,7 @@ static void on_hostbyname_done_cb(void* arg, int status, int timeouts,
           memset(&addr, 0, addr_len);
           memcpy(&addr.sin_addr, hostent->h_addr_list[i - prev_naddr],
                  sizeof(struct in_addr));
-          addr.sin_family = static_cast<sa_family_t>(hostent->h_addrtype);
+          addr.sin_family = static_cast<unsigned char>(hostent->h_addrtype);
           addr.sin_port = hr->port;
           grpc_lb_addresses_set_address(
               *lb_addresses, i, &addr, addr_len,
@@ -263,33 +251,33 @@ static void on_hostbyname_done_cb(void* arg, int status, int timeouts,
       r->error = grpc_error_add_child(error, r->error);
     }
   }
-  gpr_mu_unlock(&r->mu);
-  destroy_hostbyname_request(hr);
+  destroy_hostbyname_request_locked(hr);
 }
 
-static void on_srv_query_done_cb(void* arg, int status, int timeouts,
-                                 unsigned char* abuf, int alen) {
+static void on_srv_query_done_locked(void* arg, int status, int timeouts,
+                                     unsigned char* abuf, int alen) {
   grpc_ares_request* r = static_cast<grpc_ares_request*>(arg);
-  gpr_log(GPR_DEBUG, "on_query_srv_done_cb");
+  gpr_log(GPR_DEBUG, "on_query_srv_done_locked");
   if (status == ARES_SUCCESS) {
-    gpr_log(GPR_DEBUG, "on_query_srv_done_cb ARES_SUCCESS");
+    gpr_log(GPR_DEBUG, "on_query_srv_done_locked ARES_SUCCESS");
     struct ares_srv_reply* reply;
     const int parse_status = ares_parse_srv_reply(abuf, alen, &reply);
     if (parse_status == ARES_SUCCESS) {
-      ares_channel* channel = grpc_ares_ev_driver_get_channel(r->ev_driver);
+      ares_channel* channel =
+          grpc_ares_ev_driver_get_channel_locked(r->ev_driver);
       for (struct ares_srv_reply* srv_it = reply; srv_it != nullptr;
            srv_it = srv_it->next) {
-        if (grpc_ipv6_loopback_available()) {
-          grpc_ares_hostbyname_request* hr = create_hostbyname_request(
+        if (grpc_ares_query_ipv6()) {
+          grpc_ares_hostbyname_request* hr = create_hostbyname_request_locked(
               r, srv_it->host, htons(srv_it->port), true /* is_balancer */);
           ares_gethostbyname(*channel, hr->host, AF_INET6,
-                             on_hostbyname_done_cb, hr);
+                             on_hostbyname_done_locked, hr);
         }
-        grpc_ares_hostbyname_request* hr = create_hostbyname_request(
+        grpc_ares_hostbyname_request* hr = create_hostbyname_request_locked(
             r, srv_it->host, htons(srv_it->port), true /* is_balancer */);
-        ares_gethostbyname(*channel, hr->host, AF_INET, on_hostbyname_done_cb,
-                           hr);
-        grpc_ares_ev_driver_start(r->ev_driver);
+        ares_gethostbyname(*channel, hr->host, AF_INET,
+                           on_hostbyname_done_locked, hr);
+        grpc_ares_ev_driver_start_locked(r->ev_driver);
       }
     }
     if (reply != nullptr) {
@@ -307,21 +295,20 @@ static void on_srv_query_done_cb(void* arg, int status, int timeouts,
       r->error = grpc_error_add_child(error, r->error);
     }
   }
-  grpc_ares_request_unref(r);
+  grpc_ares_request_unref_locked(r);
 }
 
 static const char g_service_config_attribute_prefix[] = "grpc_config=";
 
-static void on_txt_done_cb(void* arg, int status, int timeouts,
-                           unsigned char* buf, int len) {
-  gpr_log(GPR_DEBUG, "on_txt_done_cb");
+static void on_txt_done_locked(void* arg, int status, int timeouts,
+                               unsigned char* buf, int len) {
+  gpr_log(GPR_DEBUG, "on_txt_done_locked");
   char* error_msg;
   grpc_ares_request* r = static_cast<grpc_ares_request*>(arg);
   const size_t prefix_len = sizeof(g_service_config_attribute_prefix) - 1;
   struct ares_txt_ext* result = nullptr;
   struct ares_txt_ext* reply = nullptr;
   grpc_error* error = GRPC_ERROR_NONE;
-  gpr_mu_lock(&r->mu);
   if (status != ARES_SUCCESS) goto fail;
   status = ares_parse_txt_reply_ext(buf, len, &reply);
   if (status != ARES_SUCCESS) goto fail;
@@ -366,14 +353,15 @@ fail:
     r->error = grpc_error_add_child(error, r->error);
   }
 done:
-  gpr_mu_unlock(&r->mu);
-  grpc_ares_request_unref(r);
+  grpc_ares_request_unref_locked(r);
 }
 
-static grpc_ares_request* grpc_dns_lookup_ares_impl(
+static grpc_ares_request*
+grpc_dns_lookup_ares_continue_after_check_localhost_and_ip_literals_locked(
     const char* dns_server, const char* name, const char* default_port,
     grpc_pollset_set* interested_parties, grpc_closure* on_done,
-    grpc_lb_addresses** addrs, bool check_grpclb, char** service_config_json) {
+    grpc_lb_addresses** addrs, bool check_grpclb, char** service_config_json,
+    grpc_combiner* combiner) {
   grpc_error* error = GRPC_ERROR_NONE;
   grpc_ares_hostbyname_request* hr = nullptr;
   grpc_ares_request* r = nullptr;
@@ -402,21 +390,18 @@ static grpc_ares_request* grpc_dns_lookup_ares_impl(
     }
     port = gpr_strdup(default_port);
   }
-
-  grpc_ares_ev_driver* ev_driver;
-  error = grpc_ares_ev_driver_create(&ev_driver, interested_parties);
-  if (error != GRPC_ERROR_NONE) goto error_cleanup;
-
   r = static_cast<grpc_ares_request*>(gpr_zalloc(sizeof(grpc_ares_request)));
-  gpr_mu_init(&r->mu);
-  r->ev_driver = ev_driver;
+  r->ev_driver = nullptr;
   r->on_done = on_done;
   r->lb_addrs_out = addrs;
   r->service_config_json_out = service_config_json;
   r->success = false;
   r->error = GRPC_ERROR_NONE;
-  channel = grpc_ares_ev_driver_get_channel(r->ev_driver);
-
+  r->pending_queries = 0;
+  error = grpc_ares_ev_driver_create_locked(&r->ev_driver, interested_parties,
+                                            combiner, r);
+  if (error != GRPC_ERROR_NONE) goto error_cleanup;
+  channel = grpc_ares_ev_driver_get_channel_locked(r->ev_driver);
   // If dns_server is specified, use it.
   if (dns_server != nullptr) {
     gpr_log(GPR_INFO, "Using DNS server %s", dns_server);
@@ -441,7 +426,6 @@ static grpc_ares_request* grpc_dns_lookup_ares_impl(
       error = grpc_error_set_str(
           GRPC_ERROR_CREATE_FROM_STATIC_STRING("cannot parse authority"),
           GRPC_ERROR_STR_TARGET_ADDRESS, grpc_slice_from_copied_string(name));
-      gpr_free(r);
       goto error_cleanup;
     }
     int status = ares_set_servers_ports(*channel, &r->dns_server_addr);
@@ -451,58 +435,135 @@ static grpc_ares_request* grpc_dns_lookup_ares_impl(
                    ares_strerror(status));
       error = GRPC_ERROR_CREATE_FROM_COPIED_STRING(error_msg);
       gpr_free(error_msg);
-      gpr_free(r);
       goto error_cleanup;
     }
   }
-  gpr_ref_init(&r->pending_queries, 1);
-  if (grpc_ipv6_loopback_available()) {
-    hr = create_hostbyname_request(r, host, strhtons(port),
-                                   false /* is_balancer */);
-    ares_gethostbyname(*channel, hr->host, AF_INET6, on_hostbyname_done_cb, hr);
+  r->pending_queries = 1;
+  if (grpc_ares_query_ipv6()) {
+    hr = create_hostbyname_request_locked(r, host, grpc_strhtons(port),
+                                          false /* is_balancer */);
+    ares_gethostbyname(*channel, hr->host, AF_INET6, on_hostbyname_done_locked,
+                       hr);
   }
-  hr = create_hostbyname_request(r, host, strhtons(port),
-                                 false /* is_balancer */);
-  ares_gethostbyname(*channel, hr->host, AF_INET, on_hostbyname_done_cb, hr);
+  hr = create_hostbyname_request_locked(r, host, grpc_strhtons(port),
+                                        false /* is_balancer */);
+  ares_gethostbyname(*channel, hr->host, AF_INET, on_hostbyname_done_locked,
+                     hr);
   if (check_grpclb) {
     /* Query the SRV record */
-    grpc_ares_request_ref(r);
+    grpc_ares_request_ref_locked(r);
     char* service_name;
     gpr_asprintf(&service_name, "_grpclb._tcp.%s", host);
-    ares_query(*channel, service_name, ns_c_in, ns_t_srv, on_srv_query_done_cb,
-               r);
+    ares_query(*channel, service_name, ns_c_in, ns_t_srv,
+               on_srv_query_done_locked, r);
     gpr_free(service_name);
   }
   if (service_config_json != nullptr) {
-    grpc_ares_request_ref(r);
+    grpc_ares_request_ref_locked(r);
     char* config_name;
     gpr_asprintf(&config_name, "_grpc_config.%s", host);
-    ares_search(*channel, config_name, ns_c_in, ns_t_txt, on_txt_done_cb, r);
+    ares_search(*channel, config_name, ns_c_in, ns_t_txt, on_txt_done_locked,
+                r);
     gpr_free(config_name);
   }
-  /* TODO(zyc): Handle CNAME records here. */
-  grpc_ares_ev_driver_start(r->ev_driver);
-  grpc_ares_request_unref(r);
+  grpc_ares_ev_driver_start_locked(r->ev_driver);
+  grpc_ares_request_unref_locked(r);
   gpr_free(host);
   gpr_free(port);
   return r;
 
 error_cleanup:
   GRPC_CLOSURE_SCHED(on_done, error);
+  gpr_free(r);
   gpr_free(host);
   gpr_free(port);
   return nullptr;
 }
 
-grpc_ares_request* (*grpc_dns_lookup_ares)(
+static bool inner_resolve_as_ip_literal_locked(const char* name,
+                                               const char* default_port,
+                                               grpc_lb_addresses** addrs,
+                                               char** host, char** port,
+                                               char** hostport) {
+  gpr_split_host_port(name, host, port);
+  if (*host == nullptr) {
+    gpr_log(GPR_ERROR,
+            "Failed to parse %s to host:port while attempting to resolve as ip "
+            "literal.",
+            name);
+    return false;
+  }
+  if (*port == nullptr) {
+    if (default_port == nullptr) {
+      gpr_log(GPR_ERROR,
+              "No port or default port for %s while attempting to resolve as "
+              "ip literal.",
+              name);
+      return false;
+    }
+    *port = gpr_strdup(default_port);
+  }
+  grpc_resolved_address addr;
+  GPR_ASSERT(gpr_join_host_port(hostport, *host, atoi(*port)));
+  if (grpc_parse_ipv4_hostport(*hostport, &addr, false /* log errors */) ||
+      grpc_parse_ipv6_hostport(*hostport, &addr, false /* log errors */)) {
+    GPR_ASSERT(*addrs == nullptr);
+    *addrs = grpc_lb_addresses_create(1, nullptr);
+    grpc_lb_addresses_set_address(
+        *addrs, 0, addr.addr, addr.len, false /* is_balancer */,
+        nullptr /* balancer_name */, nullptr /* user_data */);
+    return true;
+  }
+  return false;
+}
+
+static bool resolve_as_ip_literal_locked(const char* name,
+                                         const char* default_port,
+                                         grpc_lb_addresses** addrs) {
+  char* host = nullptr;
+  char* port = nullptr;
+  char* hostport = nullptr;
+  bool out = inner_resolve_as_ip_literal_locked(name, default_port, addrs,
+                                                &host, &port, &hostport);
+  gpr_free(host);
+  gpr_free(port);
+  gpr_free(hostport);
+  return out;
+}
+
+static grpc_ares_request* grpc_dns_lookup_ares_locked_impl(
+    const char* dns_server, const char* name, const char* default_port,
+    grpc_pollset_set* interested_parties, grpc_closure* on_done,
+    grpc_lb_addresses** addrs, bool check_grpclb, char** service_config_json,
+    grpc_combiner* combiner) {
+  // Early out if the target is an ipv4 or ipv6 literal.
+  if (resolve_as_ip_literal_locked(name, default_port, addrs)) {
+    GRPC_CLOSURE_SCHED(on_done, GRPC_ERROR_NONE);
+    return nullptr;
+  }
+  // Early out if the target is localhost and we're on Windows.
+  if (grpc_ares_maybe_resolve_localhost_manually_locked(name, default_port,
+                                                        addrs)) {
+    GRPC_CLOSURE_SCHED(on_done, GRPC_ERROR_NONE);
+    return nullptr;
+  }
+  // Look up name using c-ares lib.
+  return grpc_dns_lookup_ares_continue_after_check_localhost_and_ip_literals_locked(
+      dns_server, name, default_port, interested_parties, on_done, addrs,
+      check_grpclb, service_config_json, combiner);
+}
+
+grpc_ares_request* (*grpc_dns_lookup_ares_locked)(
     const char* dns_server, const char* name, const char* default_port,
     grpc_pollset_set* interested_parties, grpc_closure* on_done,
-    grpc_lb_addresses** addrs, bool check_grpclb,
-    char** service_config_json) = grpc_dns_lookup_ares_impl;
+    grpc_lb_addresses** addrs, bool check_grpclb, char** service_config_json,
+    grpc_combiner* combiner) = grpc_dns_lookup_ares_locked_impl;
 
 void grpc_cancel_ares_request(grpc_ares_request* r) {
-  if (grpc_dns_lookup_ares == grpc_dns_lookup_ares_impl) {
-    grpc_ares_ev_driver_shutdown(r->ev_driver);
+  if (grpc_dns_lookup_ares_locked == grpc_dns_lookup_ares_locked_impl) {
+    if (r != nullptr) {
+      grpc_ares_ev_driver_shutdown_locked(r->ev_driver);
+    }
   }
 }
 
@@ -534,6 +595,8 @@ void grpc_ares_cleanup(void) {
  */
 
 typedef struct grpc_resolve_address_ares_request {
+  /* combiner that queries and related callbacks run under */
+  grpc_combiner* combiner;
   /** the pointer to receive the resolved addresses */
   grpc_resolved_addresses** addrs_out;
   /** currently resolving lb addresses */
@@ -541,8 +604,14 @@ typedef struct grpc_resolve_address_ares_request {
   /** closure to call when the resolve_address_ares request completes */
   grpc_closure* on_resolve_address_done;
   /** a closure wrapping on_dns_lookup_done_cb, which should be invoked when the
-      grpc_dns_lookup_ares operation is done. */
+      grpc_dns_lookup_ares_locked operation is done. */
   grpc_closure on_dns_lookup_done;
+  /* target name */
+  const char* name;
+  /* default port to use if none is specified */
+  const char* default_port;
+  /* pollset_set to be driven by */
+  grpc_pollset_set* interested_parties;
 } grpc_resolve_address_ares_request;
 
 static void on_dns_lookup_done_cb(void* arg, grpc_error* error) {
@@ -566,9 +635,20 @@ static void on_dns_lookup_done_cb(void* arg, grpc_error* error) {
   }
   GRPC_CLOSURE_SCHED(r->on_resolve_address_done, GRPC_ERROR_REF(error));
   if (r->lb_addrs != nullptr) grpc_lb_addresses_destroy(r->lb_addrs);
+  GRPC_COMBINER_UNREF(r->combiner, "on_dns_lookup_done_cb");
   gpr_free(r);
 }
 
+static void grpc_resolve_address_invoke_dns_lookup_ares_locked(
+    void* arg, grpc_error* unused_error) {
+  grpc_resolve_address_ares_request* r =
+      static_cast<grpc_resolve_address_ares_request*>(arg);
+  grpc_dns_lookup_ares_locked(
+      nullptr /* dns_server */, r->name, r->default_port, r->interested_parties,
+      &r->on_dns_lookup_done, &r->lb_addrs, false /* check_grpclb */,
+      nullptr /* service_config_json */, r->combiner);
+}
+
 static void grpc_resolve_address_ares_impl(const char* name,
                                            const char* default_port,
                                            grpc_pollset_set* interested_parties,
@@ -577,14 +657,18 @@ static void grpc_resolve_address_ares_impl(const char* name,
   grpc_resolve_address_ares_request* r =
       static_cast<grpc_resolve_address_ares_request*>(
           gpr_zalloc(sizeof(grpc_resolve_address_ares_request)));
+  r->combiner = grpc_combiner_create();
   r->addrs_out = addrs;
   r->on_resolve_address_done = on_done;
   GRPC_CLOSURE_INIT(&r->on_dns_lookup_done, on_dns_lookup_done_cb, r,
                     grpc_schedule_on_exec_ctx);
-  grpc_dns_lookup_ares(nullptr /* dns_server */, name, default_port,
-                       interested_parties, &r->on_dns_lookup_done, &r->lb_addrs,
-                       false /* check_grpclb */,
-                       nullptr /* service_config_json */);
+  r->name = name;
+  r->default_port = default_port;
+  r->interested_parties = interested_parties;
+  GRPC_CLOSURE_SCHED(
+      GRPC_CLOSURE_CREATE(grpc_resolve_address_invoke_dns_lookup_ares_locked, r,
+                          grpc_combiner_scheduler(r->combiner)),
+      GRPC_ERROR_NONE);
 }
 
 void (*grpc_resolve_address_ares)(
diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h
index 2d84a038d6..1bc457d4cf 100644
--- a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h
+++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h
@@ -28,6 +28,13 @@
 
 extern grpc_core::TraceFlag grpc_trace_cares_address_sorting;
 
+extern grpc_core::TraceFlag grpc_trace_cares_resolver;
+
+#define GRPC_CARES_TRACE_LOG(format, ...)                         \
+  if (grpc_trace_cares_resolver.enabled()) {                      \
+    gpr_log(GPR_DEBUG, "(c-ares resolver) " format, __VA_ARGS__); \
+  }
+
 typedef struct grpc_ares_request grpc_ares_request;
 
 /* Asynchronously resolve \a name. Use \a default_port if a port isn't
@@ -48,11 +55,11 @@ extern void (*grpc_resolve_address_ares)(const char* name,
   function. \a on_done may be called directly in this function without being
   scheduled with \a exec_ctx, so it must not try to acquire locks that are
   being held by the caller. */
-extern grpc_ares_request* (*grpc_dns_lookup_ares)(
+extern grpc_ares_request* (*grpc_dns_lookup_ares_locked)(
     const char* dns_server, const char* name, const char* default_port,
     grpc_pollset_set* interested_parties, grpc_closure* on_done,
     grpc_lb_addresses** addresses, bool check_grpclb,
-    char** service_config_json);
+    char** service_config_json, grpc_combiner* combiner);
 
 /* Cancel the pending grpc_ares_request \a request */
 void grpc_cancel_ares_request(grpc_ares_request* request);
@@ -66,9 +73,23 @@ grpc_error* grpc_ares_init(void);
    it has been called the same number of times as grpc_ares_init(). */
 void grpc_ares_cleanup(void);
 
-/* Exposed only for testing */
-void grpc_cares_wrapper_test_only_address_sorting_sort(
-    grpc_lb_addresses* lb_addrs);
+/** Schedules the desired callback for request completion
+ * and destroys the grpc_ares_request */
+void grpc_ares_complete_request_locked(grpc_ares_request* request);
+
+/* Indicates whether or not AAAA queries should be attempted. */
+/* E.g., return false if ipv6 is known to not be available. */
+bool grpc_ares_query_ipv6();
+
+/* Maybe (depending on the current platform) checks if "name" matches
+ * "localhost" and if so fills in addrs with the correct sockaddr structures.
+ * Returns a bool indicating whether or not such an action was performed.
+ * See https://github.com/grpc/grpc/issues/15158. */
+bool grpc_ares_maybe_resolve_localhost_manually_locked(
+    const char* name, const char* default_port, grpc_lb_addresses** addrs);
+
+/* Sorts destinations in lb_addrs according to RFC 6724. */
+void grpc_cares_wrapper_address_sorting_sort(grpc_lb_addresses* lb_addrs);
 
 #endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_RESOLVER_DNS_C_ARES_GRPC_ARES_WRAPPER_H \
         */
diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_fallback.cc b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_fallback.cc
index 5096e480bc..d6a76fc8b6 100644
--- a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_fallback.cc
+++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_fallback.cc
@@ -26,18 +26,19 @@ struct grpc_ares_request {
   char val;
 };
 
-static grpc_ares_request* grpc_dns_lookup_ares_impl(
+static grpc_ares_request* grpc_dns_lookup_ares_locked_impl(
     const char* dns_server, const char* name, const char* default_port,
     grpc_pollset_set* interested_parties, grpc_closure* on_done,
-    grpc_lb_addresses** addrs, bool check_grpclb, char** service_config_json) {
+    grpc_lb_addresses** addrs, bool check_grpclb, char** service_config_json,
+    grpc_combiner* combiner) {
   return NULL;
 }
 
-grpc_ares_request* (*grpc_dns_lookup_ares)(
+grpc_ares_request* (*grpc_dns_lookup_ares_locked)(
     const char* dns_server, const char* name, const char* default_port,
     grpc_pollset_set* interested_parties, grpc_closure* on_done,
-    grpc_lb_addresses** addrs, bool check_grpclb,
-    char** service_config_json) = grpc_dns_lookup_ares_impl;
+    grpc_lb_addresses** addrs, bool check_grpclb, char** service_config_json,
+    grpc_combiner* combiner) = grpc_dns_lookup_ares_locked_impl;
 
 void grpc_cancel_ares_request(grpc_ares_request* r) {}
 
diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_posix.cc b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_posix.cc
new file mode 100644
index 0000000000..639eec2323
--- /dev/null
+++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_posix.cc
@@ -0,0 +1,34 @@
+/*
+ *
+ * Copyright 2016 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include <grpc/support/port_platform.h>
+
+#include "src/core/lib/iomgr/port.h"
+#if GRPC_ARES == 1 && defined(GRPC_POSIX_SOCKET_ARES_EV_DRIVER)
+
+#include "src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h"
+#include "src/core/lib/iomgr/socket_utils_posix.h"
+
+bool grpc_ares_query_ipv6() { return grpc_ipv6_loopback_available(); }
+
+bool grpc_ares_maybe_resolve_localhost_manually_locked(
+    const char* name, const char* default_port, grpc_lb_addresses** addrs) {
+  return false;
+}
+
+#endif /* GRPC_ARES == 1 && defined(GRPC_POSIX_SOCKET_ARES_EV_DRIVER) */
diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_windows.cc b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_windows.cc
new file mode 100644
index 0000000000..7e34784691
--- /dev/null
+++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_windows.cc
@@ -0,0 +1,99 @@
+/*
+ *
+ * Copyright 2016 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include <grpc/support/port_platform.h>
+
+#include "src/core/lib/iomgr/port.h"
+#if GRPC_ARES == 1 && defined(GPR_WINDOWS)
+
+#include <grpc/support/string_util.h>
+
+#include "src/core/ext/filters/client_channel/lb_policy_factory.h"
+#include "src/core/ext/filters/client_channel/parse_address.h"
+#include "src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h"
+#include "src/core/lib/gpr/host_port.h"
+#include "src/core/lib/gpr/string.h"
+#include "src/core/lib/iomgr/socket_windows.h"
+
+bool grpc_ares_query_ipv6() { return grpc_ipv6_loopback_available(); }
+
+static bool inner_maybe_resolve_localhost_manually_locked(
+    const char* name, const char* default_port, grpc_lb_addresses** addrs,
+    char** host, char** port) {
+  gpr_split_host_port(name, host, port);
+  if (*host == nullptr) {
+    gpr_log(GPR_ERROR,
+            "Failed to parse %s into host:port during Windows localhost "
+            "resolution check.",
+            name);
+    return false;
+  }
+  if (*port == nullptr) {
+    if (default_port == nullptr) {
+      gpr_log(GPR_ERROR,
+              "No port or default port for %s during Windows localhost "
+              "resolution check.",
+              name);
+      return false;
+    }
+    *port = gpr_strdup(default_port);
+  }
+  if (gpr_stricmp(*host, "localhost") == 0) {
+    GPR_ASSERT(*addrs == nullptr);
+    *addrs = grpc_lb_addresses_create(2, nullptr);
+    uint16_t numeric_port = grpc_strhtons(*port);
+    // Append the ipv6 loopback address.
+    struct sockaddr_in6 ipv6_loopback_addr;
+    memset(&ipv6_loopback_addr, 0, sizeof(ipv6_loopback_addr));
+    ((char*)&ipv6_loopback_addr.sin6_addr)[15] = 1;
+    ipv6_loopback_addr.sin6_family = AF_INET6;
+    ipv6_loopback_addr.sin6_port = numeric_port;
+    grpc_lb_addresses_set_address(
+        *addrs, 0, &ipv6_loopback_addr, sizeof(ipv6_loopback_addr),
+        false /* is_balancer */, nullptr /* balancer_name */,
+        nullptr /* user_data */);
+    // Append the ipv4 loopback address.
+    struct sockaddr_in ipv4_loopback_addr;
+    memset(&ipv4_loopback_addr, 0, sizeof(ipv4_loopback_addr));
+    ((char*)&ipv4_loopback_addr.sin_addr)[0] = 0x7f;
+    ((char*)&ipv4_loopback_addr.sin_addr)[3] = 0x01;
+    ipv4_loopback_addr.sin_family = AF_INET;
+    ipv4_loopback_addr.sin_port = numeric_port;
+    grpc_lb_addresses_set_address(
+        *addrs, 1, &ipv4_loopback_addr, sizeof(ipv4_loopback_addr),
+        false /* is_balancer */, nullptr /* balancer_name */,
+        nullptr /* user_data */);
+    // Let the address sorter figure out which one should be tried first.
+    grpc_cares_wrapper_address_sorting_sort(*addrs);
+    return true;
+  }
+  return false;
+}
+
+bool grpc_ares_maybe_resolve_localhost_manually_locked(
+    const char* name, const char* default_port, grpc_lb_addresses** addrs) {
+  char* host = nullptr;
+  char* port = nullptr;
+  bool out = inner_maybe_resolve_localhost_manually_locked(name, default_port,
+                                                           addrs, &host, &port);
+  gpr_free(host);
+  gpr_free(port);
+  return out;
+}
+
+#endif /* GRPC_ARES == 1 && defined(GPR_WINDOWS) */
diff --git a/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc b/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc
index 920a492fdb..65ff1ec1a5 100644
--- a/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc
+++ b/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc
@@ -58,6 +58,8 @@ class NativeDnsResolver : public Resolver {
 
   void RequestReresolutionLocked() override;
 
+  void ResetBackoffLocked() override;
+
   void ShutdownLocked() override;
 
  private:
@@ -158,6 +160,13 @@ void NativeDnsResolver::RequestReresolutionLocked() {
   }
 }
 
+void NativeDnsResolver::ResetBackoffLocked() {
+  if (have_next_resolution_timer_) {
+    grpc_timer_cancel(&next_resolution_timer_);
+  }
+  backoff_.Reset();
+}
+
 void NativeDnsResolver::ShutdownLocked() {
   if (have_next_resolution_timer_) {
     grpc_timer_cancel(&next_resolution_timer_);
@@ -218,7 +227,7 @@ void NativeDnsResolver::OnResolvedLocked(void* arg, grpc_error* error) {
         r->Ref(DEBUG_LOCATION, "next_resolution_timer");
     self.release();
     if (timeout > 0) {
-      gpr_log(GPR_DEBUG, "retrying in %" PRIdPTR " milliseconds", timeout);
+      gpr_log(GPR_DEBUG, "retrying in %" PRId64 " milliseconds", timeout);
     } else {
       gpr_log(GPR_DEBUG, "retrying immediately");
     }
@@ -238,13 +247,7 @@ void NativeDnsResolver::OnResolvedLocked(void* arg, grpc_error* error) {
 void NativeDnsResolver::MaybeStartResolvingLocked() {
   // If there is an existing timer, the time it fires is the earliest time we
   // can start the next resolution.
-  if (have_next_resolution_timer_) {
-    // TODO(dgq): remove the following two lines once Pick First stops
-    // discarding subchannels after selecting.
-    ++resolved_version_;
-    MaybeFinishNextLocked();
-    return;
-  }
+  if (have_next_resolution_timer_) return;
   if (last_resolution_timestamp_ >= 0) {
     const grpc_millis earliest_next_resolution =
         last_resolution_timestamp_ + min_time_between_resolutions_;
@@ -254,8 +257,8 @@ void NativeDnsResolver::MaybeStartResolvingLocked() {
       const grpc_millis last_resolution_ago =
           grpc_core::ExecCtx::Get()->Now() - last_resolution_timestamp_;
       gpr_log(GPR_DEBUG,
-              "In cooldown from last resolution (from %" PRIdPTR
-              " ms ago). Will resolve again in %" PRIdPTR " ms",
+              "In cooldown from last resolution (from %" PRId64
+              " ms ago). Will resolve again in %" PRId64 " ms",
               last_resolution_ago, ms_until_next_resolution);
       have_next_resolution_timer_ = true;
       // TODO(roth): We currently deal with this ref manually.  Once the
@@ -266,10 +269,6 @@ void NativeDnsResolver::MaybeStartResolvingLocked() {
       self.release();
       grpc_timer_init(&next_resolution_timer_, ms_until_next_resolution,
                       &on_next_resolution_);
-      // TODO(dgq): remove the following two lines once Pick First stops
-      // discarding subchannels after selecting.
-      ++resolved_version_;
-      MaybeFinishNextLocked();
       return;
     }
   }
diff --git a/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.cc b/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.cc
index 99a33f2277..144ac24a56 100644
--- a/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.cc
+++ b/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.cc
@@ -73,11 +73,6 @@ class FakeResolver : public Resolver {
   // Results to use for the pretended re-resolution in
   // RequestReresolutionLocked().
   grpc_channel_args* reresolution_results_ = nullptr;
-  // TODO(juanlishen): This can go away once pick_first is changed to not throw
-  // away its subchannels, since that will eliminate its dependence on
-  // channel_saw_error_locked() causing an immediate resolver return.
-  // A copy of the most-recently used resolution results.
-  grpc_channel_args* last_used_results_ = nullptr;
   // pending next completion, or NULL
   grpc_closure* next_completion_ = nullptr;
   // target result address for next completion
@@ -96,7 +91,6 @@ FakeResolver::FakeResolver(const ResolverArgs& args) : Resolver(args.combiner) {
 FakeResolver::~FakeResolver() {
   grpc_channel_args_destroy(next_results_);
   grpc_channel_args_destroy(reresolution_results_);
-  grpc_channel_args_destroy(last_used_results_);
   grpc_channel_args_destroy(channel_args_);
 }
 
@@ -109,17 +103,11 @@ void FakeResolver::NextLocked(grpc_channel_args** target_result,
 }
 
 void FakeResolver::RequestReresolutionLocked() {
-  // A resolution must have been returned before an error is seen.
-  GPR_ASSERT(last_used_results_ != nullptr);
-  grpc_channel_args_destroy(next_results_);
   if (reresolution_results_ != nullptr) {
+    grpc_channel_args_destroy(next_results_);
     next_results_ = grpc_channel_args_copy(reresolution_results_);
-  } else {
-    // If reresolution_results is unavailable, re-resolve with the most-recently
-    // used results to avoid a no-op re-resolution.
-    next_results_ = grpc_channel_args_copy(last_used_results_);
+    MaybeFinishNextLocked();
   }
-  MaybeFinishNextLocked();
 }
 
 void FakeResolver::MaybeFinishNextLocked() {
@@ -161,8 +149,6 @@ void FakeResolverResponseGenerator::SetResponseLocked(void* arg,
   FakeResolver* resolver = closure_arg->generator->resolver_;
   grpc_channel_args_destroy(resolver->next_results_);
   resolver->next_results_ = closure_arg->response;
-  grpc_channel_args_destroy(resolver->last_used_results_);
-  resolver->last_used_results_ = grpc_channel_args_copy(closure_arg->response);
   resolver->MaybeFinishNextLocked();
   Delete(closure_arg);
 }
diff --git a/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.h b/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.h
index e5175f9b7b..708eaf1147 100644
--- a/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.h
+++ b/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.h
@@ -53,7 +53,8 @@ class FakeResolverResponseGenerator
   // The new re-resolution response replaces any previous re-resolution
   // response that may have been set by a previous call.
   // If the re-resolution response is set to NULL, then the fake
-  // resolver will return the last value set via \a SetResponse().
+  // resolver will not return anything when \a RequestReresolutionLocked()
+  // is called.
   void SetReresolutionResponse(grpc_channel_args* response);
 
   // Tells the resolver to return a transient failure (signalled by
diff --git a/src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.cc b/src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.cc
index f74ac5aebe..801734764b 100644
--- a/src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.cc
+++ b/src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.cc
@@ -50,8 +50,6 @@ class SockaddrResolver : public Resolver {
   void NextLocked(grpc_channel_args** result,
                   grpc_closure* on_complete) override;
 
-  void RequestReresolutionLocked() override;
-
   void ShutdownLocked() override;
 
  private:
@@ -90,11 +88,6 @@ void SockaddrResolver::NextLocked(grpc_channel_args** target_result,
   MaybeFinishNextLocked();
 }
 
-void SockaddrResolver::RequestReresolutionLocked() {
-  published_ = false;
-  MaybeFinishNextLocked();
-}
-
 void SockaddrResolver::ShutdownLocked() {
   if (next_completion_ != nullptr) {
     *target_result_ = nullptr;
diff --git a/src/core/ext/filters/client_channel/subchannel.cc b/src/core/ext/filters/client_channel/subchannel.cc
index 140441da10..57d0b3759f 100644
--- a/src/core/ext/filters/client_channel/subchannel.cc
+++ b/src/core/ext/filters/client_channel/subchannel.cc
@@ -38,6 +38,7 @@
 #include "src/core/lib/channel/channel_args.h"
 #include "src/core/lib/channel/connected_channel.h"
 #include "src/core/lib/debug/stats.h"
+#include "src/core/lib/gpr/alloc.h"
 #include "src/core/lib/gprpp/debug_location.h"
 #include "src/core/lib/gprpp/manual_constructor.h"
 #include "src/core/lib/gprpp/ref_counted_ptr.h"
@@ -131,8 +132,13 @@ struct grpc_subchannel {
   bool have_alarm;
   /** have we started the backoff loop */
   bool backoff_begun;
+  // reset_backoff() was called while alarm was pending
+  bool deferred_reset_backoff;
   /** our alarm */
   grpc_timer alarm;
+
+  grpc_core::RefCountedPtr<grpc_core::channelz::SubchannelNode>
+      channelz_subchannel;
 };
 
 struct grpc_subchannel_call {
@@ -140,9 +146,13 @@ struct grpc_subchannel_call {
   grpc_closure* schedule_closure_after_destroy;
 };
 
-#define SUBCHANNEL_CALL_TO_CALL_STACK(call) ((grpc_call_stack*)((call) + 1))
-#define CALLSTACK_TO_SUBCHANNEL_CALL(callstack) \
-  (((grpc_subchannel_call*)(callstack)) - 1)
+#define SUBCHANNEL_CALL_TO_CALL_STACK(call)                          \
+  (grpc_call_stack*)((char*)(call) + GPR_ROUND_UP_TO_ALIGNMENT_SIZE( \
+                                         sizeof(grpc_subchannel_call)))
+#define CALLSTACK_TO_SUBCHANNEL_CALL(callstack)           \
+  (grpc_subchannel_call*)(((char*)(call_stack)) -         \
+                          GPR_ROUND_UP_TO_ALIGNMENT_SIZE( \
+                              sizeof(grpc_subchannel_call)))
 
 static void on_subchannel_connected(void* subchannel, grpc_error* error);
 
@@ -173,6 +183,13 @@ static void connection_destroy(void* arg, grpc_error* error) {
 
 static void subchannel_destroy(void* arg, grpc_error* error) {
   grpc_subchannel* c = static_cast<grpc_subchannel*>(arg);
+  if (c->channelz_subchannel != nullptr) {
+    c->channelz_subchannel->AddTraceEvent(
+        grpc_core::channelz::ChannelTrace::Severity::Info,
+        grpc_slice_from_static_string("Subchannel destroyed"));
+    c->channelz_subchannel->MarkSubchannelDestroyed();
+    c->channelz_subchannel.reset();
+  }
   gpr_free((void*)c->filters);
   grpc_channel_args_destroy(c->args);
   grpc_connectivity_state_destroy(&c->state_tracker);
@@ -369,9 +386,31 @@ grpc_subchannel* grpc_subchannel_create(grpc_connector* connector,
   c->backoff.Init(backoff_options);
   gpr_mu_init(&c->mu);
 
+  const grpc_arg* arg =
+      grpc_channel_args_find(c->args, GRPC_ARG_ENABLE_CHANNELZ);
+  bool channelz_enabled = grpc_channel_arg_get_bool(arg, false);
+  arg = grpc_channel_args_find(c->args,
+                               GRPC_ARG_MAX_CHANNEL_TRACE_EVENTS_PER_NODE);
+  const grpc_integer_options options = {0, 0, INT_MAX};
+  size_t channel_tracer_max_nodes =
+      (size_t)grpc_channel_arg_get_integer(arg, options);
+  if (channelz_enabled) {
+    c->channelz_subchannel =
+        grpc_core::MakeRefCounted<grpc_core::channelz::SubchannelNode>(
+            c, channel_tracer_max_nodes);
+    c->channelz_subchannel->AddTraceEvent(
+        grpc_core::channelz::ChannelTrace::Severity::Info,
+        grpc_slice_from_static_string("Subchannel created"));
+  }
+
   return grpc_subchannel_index_register(key, c);
 }
 
+grpc_core::channelz::SubchannelNode* grpc_subchannel_get_channelz_node(
+    grpc_subchannel* subchannel) {
+  return subchannel->channelz_subchannel.get();
+}
+
 static void continue_connect_locked(grpc_subchannel* c) {
   grpc_connect_in_args args;
   args.interested_parties = c->pollset_set;
@@ -381,7 +420,7 @@ static void continue_connect_locked(grpc_subchannel* c) {
   args.deadline = std::max(c->next_attempt_deadline, min_deadline);
   args.channel_args = c->args;
   grpc_connectivity_state_set(&c->state_tracker, GRPC_CHANNEL_CONNECTING,
-                              GRPC_ERROR_NONE, "state_change");
+                              GRPC_ERROR_NONE, "connecting");
   grpc_connector_connect(c->connector, &args, &c->connecting_result,
                          &c->on_connected);
 }
@@ -408,7 +447,7 @@ static void on_external_state_watcher_done(void* arg, grpc_error* error) {
   gpr_mu_unlock(&w->subchannel->mu);
   GRPC_SUBCHANNEL_WEAK_UNREF(w->subchannel, "external_state_watcher");
   gpr_free(w);
-  GRPC_CLOSURE_RUN(follow_up, GRPC_ERROR_REF(error));
+  GRPC_CLOSURE_SCHED(follow_up, GRPC_ERROR_REF(error));
 }
 
 static void on_alarm(void* arg, grpc_error* error) {
@@ -418,6 +457,9 @@ static void on_alarm(void* arg, grpc_error* error) {
   if (c->disconnected) {
     error = GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING("Disconnected",
                                                              &error, 1);
+  } else if (c->deferred_reset_backoff) {
+    c->deferred_reset_backoff = false;
+    error = GRPC_ERROR_NONE;
   } else {
     GRPC_ERROR_REF(error);
   }
@@ -437,25 +479,20 @@ static void maybe_start_connecting_locked(grpc_subchannel* c) {
     /* Don't try to connect if we're already disconnected */
     return;
   }
-
   if (c->connecting) {
     /* Already connecting: don't restart */
     return;
   }
-
   if (c->connected_subchannel != nullptr) {
     /* Already connected: don't restart */
     return;
   }
-
   if (!grpc_connectivity_state_has_watchers(&c->state_tracker)) {
     /* Nobody is interested in connecting: so don't just yet */
     return;
   }
-
   c->connecting = true;
   GRPC_SUBCHANNEL_WEAK_REF(c, "connecting");
-
   if (!c->backoff_begun) {
     c->backoff_begun = true;
     continue_connect_locked(c);
@@ -467,7 +504,7 @@ static void maybe_start_connecting_locked(grpc_subchannel* c) {
     if (time_til_next <= 0) {
       gpr_log(GPR_INFO, "Subchannel %p: Retry immediately", c);
     } else {
-      gpr_log(GPR_INFO, "Subchannel %p: Retry in %" PRIdPTR " milliseconds", c,
+      gpr_log(GPR_INFO, "Subchannel %p: Retry in %" PRId64 " milliseconds", c,
               time_til_next);
     }
     GRPC_CLOSURE_INIT(&c->on_alarm, on_alarm, c, grpc_schedule_on_exec_ctx);
@@ -603,8 +640,8 @@ static bool publish_transport_locked(grpc_subchannel* c) {
   }
 
   /* publish */
-  c->connected_subchannel.reset(
-      grpc_core::New<grpc_core::ConnectedSubchannel>(stk));
+  c->connected_subchannel.reset(grpc_core::New<grpc_core::ConnectedSubchannel>(
+      stk, c->channelz_subchannel.get()));
   gpr_log(GPR_INFO, "New connected subchannel at %p for subchannel %p",
           c->connected_subchannel.get(), c);
 
@@ -653,6 +690,19 @@ static void on_subchannel_connected(void* arg, grpc_error* error) {
   grpc_channel_args_destroy(delete_channel_args);
 }
 
+void grpc_subchannel_reset_backoff(grpc_subchannel* subchannel) {
+  gpr_mu_lock(&subchannel->mu);
+  if (subchannel->have_alarm) {
+    subchannel->deferred_reset_backoff = true;
+    grpc_timer_cancel(&subchannel->alarm);
+  } else {
+    subchannel->backoff_begun = false;
+    subchannel->backoff->Reset();
+    maybe_start_connecting_locked(subchannel);
+  }
+  gpr_mu_unlock(&subchannel->mu);
+}
+
 /*
  * grpc_subchannel_call implementation
  */
@@ -735,6 +785,14 @@ void grpc_get_subchannel_address_arg(const grpc_channel_args* args,
   }
 }
 
+const char* grpc_subchannel_get_target(grpc_subchannel* subchannel) {
+  const grpc_arg* addr_arg =
+      grpc_channel_args_find(subchannel->args, GRPC_ARG_SUBCHANNEL_ADDRESS);
+  const char* addr_str = grpc_channel_arg_get_string(addr_arg);
+  GPR_ASSERT(addr_str != nullptr);  // Should have been set by LB policy.
+  return addr_str;
+}
+
 const char* grpc_get_subchannel_address_uri_arg(const grpc_channel_args* args) {
   const grpc_arg* addr_arg =
       grpc_channel_args_find(args, GRPC_ARG_SUBCHANNEL_ADDRESS);
@@ -751,9 +809,12 @@ grpc_arg grpc_create_subchannel_address_arg(const grpc_resolved_address* addr) {
 
 namespace grpc_core {
 
-ConnectedSubchannel::ConnectedSubchannel(grpc_channel_stack* channel_stack)
+ConnectedSubchannel::ConnectedSubchannel(
+    grpc_channel_stack* channel_stack,
+    channelz::SubchannelNode* channelz_subchannel)
     : RefCountedWithTracing<ConnectedSubchannel>(&grpc_trace_stream_refcount),
-      channel_stack_(channel_stack) {}
+      channel_stack_(channel_stack),
+      channelz_subchannel_(channelz_subchannel) {}
 
 ConnectedSubchannel::~ConnectedSubchannel() {
   GRPC_CHANNEL_STACK_UNREF(channel_stack_, "connected_subchannel_dtor");
@@ -783,9 +844,17 @@ void ConnectedSubchannel::Ping(grpc_closure* on_initiate,
 
 grpc_error* ConnectedSubchannel::CreateCall(const CallArgs& args,
                                             grpc_subchannel_call** call) {
-  *call = static_cast<grpc_subchannel_call*>(gpr_arena_alloc(
-      args.arena, sizeof(grpc_subchannel_call) +
-                      channel_stack_->call_stack_size + args.parent_data_size));
+  size_t allocation_size =
+      GPR_ROUND_UP_TO_ALIGNMENT_SIZE(sizeof(grpc_subchannel_call));
+  if (args.parent_data_size > 0) {
+    allocation_size +=
+        GPR_ROUND_UP_TO_ALIGNMENT_SIZE(channel_stack_->call_stack_size) +
+        args.parent_data_size;
+  } else {
+    allocation_size += channel_stack_->call_stack_size;
+  }
+  *call = static_cast<grpc_subchannel_call*>(
+      gpr_arena_alloc(args.arena, allocation_size));
   grpc_call_stack* callstk = SUBCHANNEL_CALL_TO_CALL_STACK(*call);
   RefCountedPtr<ConnectedSubchannel> connection =
       Ref(DEBUG_LOCATION, "subchannel_call");
diff --git a/src/core/ext/filters/client_channel/subchannel.h b/src/core/ext/filters/client_channel/subchannel.h
index e23aec12df..84febb5204 100644
--- a/src/core/ext/filters/client_channel/subchannel.h
+++ b/src/core/ext/filters/client_channel/subchannel.h
@@ -21,6 +21,7 @@
 
 #include <grpc/support/port_platform.h>
 
+#include "src/core/ext/filters/client_channel/client_channel_channelz.h"
 #include "src/core/ext/filters/client_channel/connector.h"
 #include "src/core/lib/channel/channel_stack.h"
 #include "src/core/lib/gpr/arena.h"
@@ -84,7 +85,8 @@ class ConnectedSubchannel : public RefCountedWithTracing<ConnectedSubchannel> {
     size_t parent_data_size;
   };
 
-  explicit ConnectedSubchannel(grpc_channel_stack* channel_stack);
+  explicit ConnectedSubchannel(grpc_channel_stack* channel_stack,
+                               channelz::SubchannelNode* channelz_subchannel);
   ~ConnectedSubchannel();
 
   grpc_channel_stack* channel_stack() { return channel_stack_; }
@@ -93,9 +95,15 @@ class ConnectedSubchannel : public RefCountedWithTracing<ConnectedSubchannel> {
                            grpc_closure* closure);
   void Ping(grpc_closure* on_initiate, grpc_closure* on_ack);
   grpc_error* CreateCall(const CallArgs& args, grpc_subchannel_call** call);
+  channelz::SubchannelNode* channelz_subchannel() {
+    return channelz_subchannel_;
+  }
 
  private:
   grpc_channel_stack* channel_stack_;
+  // backpointer to the channelz node in this connected subchannel's
+  // owning subchannel.
+  channelz::SubchannelNode* channelz_subchannel_;
 };
 
 }  // namespace grpc_core
@@ -115,6 +123,9 @@ grpc_subchannel_call* grpc_subchannel_call_ref(
 void grpc_subchannel_call_unref(
     grpc_subchannel_call* call GRPC_SUBCHANNEL_REF_EXTRA_ARGS);
 
+grpc_core::channelz::SubchannelNode* grpc_subchannel_get_channelz_node(
+    grpc_subchannel* subchannel);
+
 /** Returns a pointer to the parent data associated with \a subchannel_call.
     The data will be of the size specified in \a parent_data_size
     field of the args passed to \a grpc_connected_subchannel_create_call(). */
@@ -141,6 +152,13 @@ grpc_subchannel_get_connected_subchannel(grpc_subchannel* c);
 const grpc_subchannel_key* grpc_subchannel_get_key(
     const grpc_subchannel* subchannel);
 
+// Resets the connection backoff of the subchannel.
+// TODO(roth): Move connection backoff out of subchannels and up into LB
+// policy code (probably by adding a SubchannelGroup between
+// SubchannelList and SubchannelData), at which point this method can
+// go away.
+void grpc_subchannel_reset_backoff(grpc_subchannel* subchannel);
+
 /** continue processing a transport op */
 void grpc_subchannel_call_process_op(grpc_subchannel_call* subchannel_call,
                                      grpc_transport_stream_op_batch* op);
@@ -173,6 +191,8 @@ grpc_subchannel* grpc_subchannel_create(grpc_connector* connector,
 void grpc_get_subchannel_address_arg(const grpc_channel_args* args,
                                      grpc_resolved_address* addr);
 
+const char* grpc_subchannel_get_target(grpc_subchannel* subchannel);
+
 /// Returns the URI string for the address to connect to.
 const char* grpc_get_subchannel_address_uri_arg(const grpc_channel_args* args);
 
diff --git a/src/core/ext/filters/client_channel/subchannel_index.cc b/src/core/ext/filters/client_channel/subchannel_index.cc
index cb02b1a748..1c23a6c4be 100644
--- a/src/core/ext/filters/client_channel/subchannel_index.cc
+++ b/src/core/ext/filters/client_channel/subchannel_index.cc
@@ -73,7 +73,8 @@ static grpc_subchannel_key* subchannel_key_copy(grpc_subchannel_key* k) {
 
 int grpc_subchannel_key_compare(const grpc_subchannel_key* a,
                                 const grpc_subchannel_key* b) {
-  if (g_force_creation) return false;
+  // To pretend the keys are different, return a non-zero value.
+  if (GPR_UNLIKELY(g_force_creation)) return 1;
   int c = GPR_ICMP(a->args.filter_count, b->args.filter_count);
   if (c != 0) return c;
   if (a->args.filter_count > 0) {
diff --git a/src/core/ext/filters/client_channel/subchannel_index.h b/src/core/ext/filters/client_channel/subchannel_index.h
index a7dae9d47d..c135613d26 100644
--- a/src/core/ext/filters/client_channel/subchannel_index.h
+++ b/src/core/ext/filters/client_channel/subchannel_index.h
@@ -65,13 +65,10 @@ void grpc_subchannel_index_ref(void);
 void grpc_subchannel_index_unref(void);
 
 /** \em TEST ONLY.
- * If \a force_creation is true, all key comparisons will be false, resulting in
+ * If \a force_creation is true, all keys are regarded different, resulting in
  * new subchannels always being created. Otherwise, the keys will be compared as
  * usual.
  *
- * This function is *not* threadsafe on purpose: it should *only* be used in
- * test code.
- *
  * Tests using this function \em MUST run tests with and without \a
  * force_creation set. */
 void grpc_subchannel_index_test_only_set_force_creation(bool force_creation);