1 files changed, 344 insertions, 160 deletions
diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.c b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.c
index c3acc3d9db..a23a56c052 100644
--- a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.c
+++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.c
@@ -115,6 +115,7 @@
 #include "src/core/ext/filters/client_channel/lb_policy_factory.h"
 #include "src/core/ext/filters/client_channel/lb_policy_registry.h"
 #include "src/core/ext/filters/client_channel/parse_address.h"
+#include "src/core/ext/filters/client_channel/resolver/fake/fake_resolver.h"
 #include "src/core/lib/channel/channel_args.h"
 #include "src/core/lib/channel/channel_stack.h"
 #include "src/core/lib/iomgr/combiner.h"
@@ -315,6 +316,9 @@ typedef struct glb_lb_policy {
   /** for communicating with the LB server */
   grpc_channel *lb_channel;
 
+  /** response generator to inject address updates into \a lb_channel */
+  grpc_fake_resolver_response_generator *response_generator;
+
   /** the RR policy to use of the backend servers returned by the LB server */
   grpc_lb_policy *rr_policy;
 
@@ -323,6 +327,9 @@ typedef struct glb_lb_policy {
   /** our connectivity state tracker */
   grpc_connectivity_state_tracker state_tracker;
 
+  /** connectivity state of the LB channel */
+  grpc_connectivity_state lb_channel_connectivity;
+
   /** stores the deserialized response from the LB. May be NULL until one such
    * response has arrived. */
   grpc_grpclb_serverlist *serverlist;
@@ -340,10 +347,27 @@ typedef struct glb_lb_policy {
 
   bool shutting_down;
 
+  /** are we currently updating lb_call? */
+  bool updating_lb_call;
+
+  /** are we currently updating lb_channel? */
+  bool updating_lb_channel;
+
+  /** are we already watching the LB channel's connectivity? */
+  bool watching_lb_channel;
+
+  /** is \a lb_call_retry_timer active? */
+  bool retry_timer_active;
+
+  /** called upon changes to the LB channel's connectivity. */
+  grpc_closure lb_channel_on_connectivity_changed;
+
+  /** args from the latest update received while already updating, or NULL */
+  grpc_lb_policy_args *pending_update_args;
+
   /************************************************************/
   /*  client data associated with the LB server communication */
   /************************************************************/
-
   /* Finished sending initial request. */
   grpc_closure lb_on_sent_initial_request;
 
@@ -533,10 +557,9 @@ static grpc_lb_addresses *process_serverlist_locked(
   return lb_addresses;
 }
 
-/* returns true if the new RR policy should replace the current one, if any */
-static bool update_lb_connectivity_status_locked(
+static void update_lb_connectivity_status_locked(
     grpc_exec_ctx *exec_ctx, glb_lb_policy *glb_policy,
-    grpc_connectivity_state new_rr_state, grpc_error *new_rr_state_error) {
+    grpc_connectivity_state rr_state, grpc_error *rr_state_error) {
   const grpc_connectivity_state curr_glb_state =
       grpc_connectivity_state_check(&glb_policy->state_tracker);
 
@@ -570,28 +593,26 @@ static bool update_lb_connectivity_status_locked(
    *  (*) This function mustn't be called during shutting down. */
   GPR_ASSERT(curr_glb_state != GRPC_CHANNEL_SHUTDOWN);
 
-  switch (new_rr_state) {
+  switch (rr_state) {
     case GRPC_CHANNEL_TRANSIENT_FAILURE:
     case GRPC_CHANNEL_SHUTDOWN:
-      GPR_ASSERT(new_rr_state_error != GRPC_ERROR_NONE);
-      return false; /* don't replace the RR policy */
+      GPR_ASSERT(rr_state_error != GRPC_ERROR_NONE);
+      break;
     case GRPC_CHANNEL_INIT:
     case GRPC_CHANNEL_IDLE:
     case GRPC_CHANNEL_CONNECTING:
     case GRPC_CHANNEL_READY:
-      GPR_ASSERT(new_rr_state_error == GRPC_ERROR_NONE);
+      GPR_ASSERT(rr_state_error == GRPC_ERROR_NONE);
   }
 
   if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
-    gpr_log(GPR_INFO,
-            "Setting grpclb's state to %s from new RR policy %p state.",
-            grpc_connectivity_state_name(new_rr_state),
-            (void *)glb_policy->rr_policy);
+    gpr_log(
+        GPR_INFO, "Setting grpclb's state to %s from new RR policy %p state.",
+        grpc_connectivity_state_name(rr_state), (void *)glb_policy->rr_policy);
   }
-  grpc_connectivity_state_set(exec_ctx, &glb_policy->state_tracker,
-                              new_rr_state, GRPC_ERROR_REF(new_rr_state_error),
+  grpc_connectivity_state_set(exec_ctx, &glb_policy->state_tracker, rr_state,
+                              GRPC_ERROR_REF(rr_state_error),
                               "update_lb_connectivity_status_locked");
-  return true;
 }
 
 /* Perform a pick over \a glb_policy->rr_policy. Given that a pick can return
@@ -670,45 +691,38 @@ static bool pick_from_internal_rr_locked(
   return pick_done;
 }
 
-static grpc_lb_policy *create_rr_locked(
-    grpc_exec_ctx *exec_ctx, const grpc_grpclb_serverlist *serverlist,
-    glb_lb_policy *glb_policy) {
-  GPR_ASSERT(serverlist != NULL && serverlist->num_servers > 0);
-
-  grpc_lb_policy_args args;
-  memset(&args, 0, sizeof(args));
-  args.client_channel_factory = glb_policy->cc_factory;
-  args.combiner = glb_policy->base.combiner;
+static grpc_lb_policy_args *lb_policy_args_create(grpc_exec_ctx *exec_ctx,
+                                                  glb_lb_policy *glb_policy) {
+  grpc_lb_policy_args *args = gpr_zalloc(sizeof(*args));
+  args->client_channel_factory = glb_policy->cc_factory;
+  args->combiner = glb_policy->base.combiner;
   grpc_lb_addresses *addresses =
-      process_serverlist_locked(exec_ctx, serverlist);
-
+      process_serverlist_locked(exec_ctx, glb_policy->serverlist);
   // Replace the LB addresses in the channel args that we pass down to
   // the subchannel.
   static const char *keys_to_remove[] = {GRPC_ARG_LB_ADDRESSES};
   const grpc_arg arg = grpc_lb_addresses_create_channel_arg(addresses);
-  args.args = grpc_channel_args_copy_and_add_and_remove(
+  args->args = grpc_channel_args_copy_and_add_and_remove(
       glb_policy->args, keys_to_remove, GPR_ARRAY_SIZE(keys_to_remove), &arg,
       1);
-
-  grpc_lb_policy *rr = grpc_lb_policy_create(exec_ctx, "round_robin", &args);
-  GPR_ASSERT(rr != NULL);
   grpc_lb_addresses_destroy(exec_ctx, addresses);
-  grpc_channel_args_destroy(exec_ctx, args.args);
-  return rr;
+  return args;
+}
+
+static void lb_policy_args_destroy(grpc_exec_ctx *exec_ctx,
+                                   grpc_lb_policy_args *args) {
+  grpc_channel_args_destroy(exec_ctx, args->args);
+  gpr_free(args);
 }
 
 static void glb_rr_connectivity_changed_locked(grpc_exec_ctx *exec_ctx,
                                                void *arg, grpc_error *error);
-/* glb_policy->rr_policy may be NULL (initial handover) */
-static void rr_handover_locked(grpc_exec_ctx *exec_ctx,
-                               glb_lb_policy *glb_policy) {
-  GPR_ASSERT(glb_policy->serverlist != NULL &&
-             glb_policy->serverlist->num_servers > 0);
-
-  if (glb_policy->shutting_down) return;
+static void create_rr_locked(grpc_exec_ctx *exec_ctx, glb_lb_policy *glb_policy,
+                             grpc_lb_policy_args *args) {
+  GPR_ASSERT(glb_policy->rr_policy == NULL);
 
   grpc_lb_policy *new_rr_policy =
-      create_rr_locked(exec_ctx, glb_policy->serverlist, glb_policy);
+      grpc_lb_policy_create(exec_ctx, "round_robin", args);
   if (new_rr_policy == NULL) {
     gpr_log(GPR_ERROR,
             "Failure creating a RoundRobin policy for serverlist update with "
@@ -719,41 +733,16 @@ static void rr_handover_locked(grpc_exec_ctx *exec_ctx,
             (void *)glb_policy->rr_policy);
     return;
   }
-
-  grpc_error *new_rr_state_error = NULL;
-  const grpc_connectivity_state new_rr_state =
-      grpc_lb_policy_check_connectivity_locked(exec_ctx, new_rr_policy,
-                                               &new_rr_state_error);
-  /* Connectivity state is a function of the new RR policy just created */
-  const bool replace_old_rr = update_lb_connectivity_status_locked(
-      exec_ctx, glb_policy, new_rr_state, new_rr_state_error);
-
-  if (!replace_old_rr) {
-    /* dispose of the new RR policy that won't be used after all */
-    GRPC_LB_POLICY_UNREF(exec_ctx, new_rr_policy, "rr_handover_no_replace");
-    if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
-      gpr_log(GPR_INFO,
-              "Keeping old RR policy (%p) despite new serverlist: new RR "
-              "policy was in %s connectivity state.",
-              (void *)glb_policy->rr_policy,
-              grpc_connectivity_state_name(new_rr_state));
-    }
-    return;
-  }
-
-  if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
-    gpr_log(GPR_INFO, "Created RR policy (%p) to replace old RR (%p)",
-            (void *)new_rr_policy, (void *)glb_policy->rr_policy);
-  }
-
-  if (glb_policy->rr_policy != NULL) {
-    /* if we are phasing out an existing RR instance, unref it. */
-    GRPC_LB_POLICY_UNREF(exec_ctx, glb_policy->rr_policy, "rr_handover");
-  }
-
-  /* Finally update the RR policy to the newly created one */
   glb_policy->rr_policy = new_rr_policy;
 
+  grpc_error *rr_state_error = NULL;
+  const grpc_connectivity_state rr_state =
+      grpc_lb_policy_check_connectivity_locked(exec_ctx, glb_policy->rr_policy,
+                                               &rr_state_error);
+  /* Connectivity state is a function of the RR policy updated/created */
+  update_lb_connectivity_status_locked(exec_ctx, glb_policy, rr_state,
+                                       rr_state_error);
+
   /* Add the gRPC LB's interested_parties pollset_set to that of the newly
    * created RR policy. This will make the RR policy progress upon activity on
    * gRPC LB, which in turn is tied to the application's call */
@@ -769,10 +758,10 @@ static void rr_handover_locked(grpc_exec_ctx *exec_ctx,
                     glb_rr_connectivity_changed_locked, rr_connectivity,
                     grpc_combiner_scheduler(glb_policy->base.combiner));
   rr_connectivity->glb_policy = glb_policy;
-  rr_connectivity->state = new_rr_state;
+  rr_connectivity->state = rr_state;
 
   /* Subscribe to changes to the connectivity of the new RR */
-  GRPC_LB_POLICY_WEAK_REF(&glb_policy->base, "rr_connectivity_cb");
+  GRPC_LB_POLICY_WEAK_REF(&glb_policy->base, "rr_connectivity_sched");
   grpc_lb_policy_notify_on_state_change_locked(exec_ctx, glb_policy->rr_policy,
                                                &rr_connectivity->state,
                                                &rr_connectivity->on_change);
@@ -809,6 +798,31 @@ static void rr_handover_locked(grpc_exec_ctx *exec_ctx,
   }
 }
 
+/* glb_policy->rr_policy may be NULL (initial handover) */
+static void rr_handover_locked(grpc_exec_ctx *exec_ctx,
+                               glb_lb_policy *glb_policy) {
+  GPR_ASSERT(glb_policy->serverlist != NULL &&
+             glb_policy->serverlist->num_servers > 0);
+
+  if (glb_policy->shutting_down) return;
+
+  grpc_lb_policy_args *args = lb_policy_args_create(exec_ctx, glb_policy);
+  if (glb_policy->rr_policy != NULL) {
+    if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
+      gpr_log(GPR_DEBUG, "Updating Round Robin policy (%p)",
+              (void *)glb_policy->rr_policy);
+    }
+    grpc_lb_policy_update_locked(exec_ctx, glb_policy->rr_policy, args);
+  } else {
+    create_rr_locked(exec_ctx, glb_policy, args);
+    if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
+      gpr_log(GPR_DEBUG, "Created new Round Robin policy (%p)",
+              (void *)glb_policy->rr_policy);
+    }
+  }
+  lb_policy_args_destroy(exec_ctx, args);
+}
+
 static void glb_rr_connectivity_changed_locked(grpc_exec_ctx *exec_ctx,
                                                void *arg, grpc_error *error) {
   rr_connectivity_data *rr_connectivity = arg;
@@ -854,18 +868,24 @@ static grpc_slice_hash_table_entry targets_info_entry_create(
   return entry;
 }
 
-/* Returns the target URI for the LB service whose addresses are in \a
- * addresses.  Using this URI, a bidirectional streaming channel will be created
- * for the reception of load balancing updates.
+static int balancer_name_cmp_fn(void *a, void *b) {
+  const char *a_str = a;
+  const char *b_str = b;
+  return strcmp(a_str, b_str);
+}
+
+/* Returns the channel args for the LB channel, used to create a bidirectional
+ * stream for the reception of load balancing updates.
  *
- * The output argument \a targets_info will be updated to contain a mapping of
- * "LB server address" to "balancer name", as reported by the naming system.
- * This mapping will be propagated via the channel arguments of the
- * aforementioned LB streaming channel, to be used by the security connector for
- * secure naming checks. The user is responsible for freeing \a targets_info. */
-static char *get_lb_uri_target_addresses(grpc_exec_ctx *exec_ctx,
-                                         const grpc_lb_addresses *addresses,
-                                         grpc_slice_hash_table **targets_info) {
+ * Inputs:
+ *   - \a addresses: corresponding to the balancers.
+ *   - \a response_generator: in order to propagate updates from the resolver
+ *   above the grpclb policy.
+ *   - \a args: other args inherited from the grpclb policy. */
+static grpc_channel_args *build_lb_channel_args(
+    grpc_exec_ctx *exec_ctx, const grpc_lb_addresses *addresses,
+    grpc_fake_resolver_response_generator *response_generator,
+    const grpc_channel_args *args) {
   size_t num_grpclb_addrs = 0;
   for (size_t i = 0; i < addresses->num_addresses; ++i) {
     if (addresses->addresses[i].is_balancer) ++num_grpclb_addrs;
@@ -874,53 +894,54 @@ static char *get_lb_uri_target_addresses(grpc_exec_ctx *exec_ctx,
    * It's the resolver's responsibility to make sure this policy is only
    * instantiated and used in that case. Otherwise, something has gone wrong. */
   GPR_ASSERT(num_grpclb_addrs > 0);
-
+  grpc_lb_addresses *lb_addresses =
+      grpc_lb_addresses_create(num_grpclb_addrs, NULL);
   grpc_slice_hash_table_entry *targets_info_entries =
-      gpr_malloc(sizeof(*targets_info_entries) * num_grpclb_addrs);
+      gpr_zalloc(sizeof(*targets_info_entries) * num_grpclb_addrs);
 
-  /* construct a target ipvX://ip1:port1,ip2:port2,... from the addresses in \a
-   * addresses */
-  /* TODO(dgq): support mixed ip version */
-  char **addr_strs = gpr_malloc(sizeof(char *) * num_grpclb_addrs);
-  size_t addr_index = 0;
-
-  for (size_t i = 0; i < addresses->num_addresses; i++) {
+  size_t lb_addresses_idx = 0;
+  for (size_t i = 0; i < addresses->num_addresses; ++i) {
+    if (!addresses->addresses[i].is_balancer) continue;
     if (addresses->addresses[i].user_data != NULL) {
       gpr_log(GPR_ERROR,
               "This LB policy doesn't support user data. It will be ignored");
     }
-    if (addresses->addresses[i].is_balancer) {
-      char *addr_str;
-      GPR_ASSERT(grpc_sockaddr_to_string(
-                     &addr_str, &addresses->addresses[i].address, true) > 0);
-      targets_info_entries[addr_index] = targets_info_entry_create(
-          addr_str, addresses->addresses[i].balancer_name);
-      addr_strs[addr_index++] = addr_str;
-    }
+    char *addr_str;
+    GPR_ASSERT(grpc_sockaddr_to_string(
+                   &addr_str, &addresses->addresses[i].address, true) > 0);
+    targets_info_entries[lb_addresses_idx] = targets_info_entry_create(
+        addr_str, addresses->addresses[i].balancer_name);
+    gpr_free(addr_str);
+
+    grpc_lb_addresses_set_address(
+        lb_addresses, lb_addresses_idx++, addresses->addresses[i].address.addr,
+        addresses->addresses[i].address.len, false /* is balancer */,
+        addresses->addresses[i].balancer_name, NULL /* user data */);
   }
-  GPR_ASSERT(addr_index == num_grpclb_addrs);
-
-  size_t uri_path_len;
-  char *uri_path = gpr_strjoin_sep((const char **)addr_strs, num_grpclb_addrs,
-                                   ",", &uri_path_len);
-  for (size_t i = 0; i < num_grpclb_addrs; i++) gpr_free(addr_strs[i]);
-  gpr_free(addr_strs);
-
-  char *target_uri_str = NULL;
-  /* TODO(dgq): Don't assume all addresses will share the scheme of the first
-   * one */
-  gpr_asprintf(&target_uri_str, "%s:%s",
-               grpc_sockaddr_get_uri_scheme(&addresses->addresses[0].address),
-               uri_path);
-  gpr_free(uri_path);
-
-  *targets_info = grpc_slice_hash_table_create(
-      num_grpclb_addrs, targets_info_entries, destroy_balancer_name);
+  GPR_ASSERT(num_grpclb_addrs == lb_addresses_idx);
+  grpc_slice_hash_table *targets_info =
+      grpc_slice_hash_table_create(num_grpclb_addrs, targets_info_entries,
+                                   destroy_balancer_name, balancer_name_cmp_fn);
   gpr_free(targets_info_entries);
 
-  return target_uri_str;
+  grpc_channel_args *lb_channel_args =
+      grpc_lb_policy_grpclb_build_lb_channel_args(exec_ctx, targets_info,
+                                                  response_generator, args);
+
+  grpc_arg lb_channel_addresses_arg =
+      grpc_lb_addresses_create_channel_arg(lb_addresses);
+
+  grpc_channel_args *result = grpc_channel_args_copy_and_add(
+      lb_channel_args, &lb_channel_addresses_arg, 1);
+  grpc_slice_hash_table_unref(exec_ctx, targets_info);
+  grpc_channel_args_destroy(exec_ctx, lb_channel_args);
+  grpc_lb_addresses_destroy(exec_ctx, lb_addresses);
+  return result;
 }
 
+static void glb_lb_channel_on_connectivity_changed_cb(grpc_exec_ctx *exec_ctx,
+                                                      void *arg,
+                                                      grpc_error *error);
 static grpc_lb_policy *glb_create(grpc_exec_ctx *exec_ctx,
                                   grpc_lb_policy_factory *factory,
                                   grpc_lb_policy_args *args) {
@@ -976,24 +997,31 @@ static grpc_lb_policy *glb_create(grpc_exec_ctx *exec_ctx,
   glb_policy->args = grpc_channel_args_copy_and_add_and_remove(
       args->args, args_to_remove, GPR_ARRAY_SIZE(args_to_remove), &new_arg, 1);
 
-  grpc_slice_hash_table *targets_info = NULL;
   /* Create a client channel over them to communicate with a LB service */
-  char *lb_service_target_addresses =
-      get_lb_uri_target_addresses(exec_ctx, addresses, &targets_info);
-  grpc_channel_args *lb_channel_args =
-      get_lb_channel_args(exec_ctx, targets_info, args->args);
+  glb_policy->response_generator =
+      grpc_fake_resolver_response_generator_create();
+  grpc_channel_args *lb_channel_args = build_lb_channel_args(
+      exec_ctx, addresses, glb_policy->response_generator, args->args);
+  char *uri_str;
+  gpr_asprintf(&uri_str, "fake:///%s", glb_policy->server_name);
   glb_policy->lb_channel = grpc_lb_policy_grpclb_create_lb_channel(
-      exec_ctx, lb_service_target_addresses, args->client_channel_factory,
-      lb_channel_args);
-  grpc_slice_hash_table_unref(exec_ctx, targets_info);
+      exec_ctx, uri_str, args->client_channel_factory, lb_channel_args);
+
+  /* Propagate initial resolution */
+  grpc_fake_resolver_response_generator_set_response(
+      exec_ctx, glb_policy->response_generator, lb_channel_args);
   grpc_channel_args_destroy(exec_ctx, lb_channel_args);
-  gpr_free(lb_service_target_addresses);
+  gpr_free(uri_str);
   if (glb_policy->lb_channel == NULL) {
     gpr_free((void *)glb_policy->server_name);
     grpc_channel_args_destroy(exec_ctx, glb_policy->args);
     gpr_free(glb_policy);
     return NULL;
   }
+
+  grpc_closure_init(&glb_policy->lb_channel_on_connectivity_changed,
+                    glb_lb_channel_on_connectivity_changed_cb, glb_policy,
+                    grpc_combiner_scheduler(args->combiner, false));
   grpc_lb_policy_init(&glb_policy->base, &glb_lb_policy_vtable, args->combiner);
   grpc_connectivity_state_init(&glb_policy->state_tracker, GRPC_CHANNEL_IDLE,
                                "grpclb");
@@ -1009,12 +1037,15 @@ static void glb_destroy(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) {
   if (glb_policy->client_stats != NULL) {
     grpc_grpclb_client_stats_unref(glb_policy->client_stats);
   }
-  grpc_channel_destroy(glb_policy->lb_channel);
-  glb_policy->lb_channel = NULL;
   grpc_connectivity_state_destroy(exec_ctx, &glb_policy->state_tracker);
   if (glb_policy->serverlist != NULL) {
     grpc_grpclb_destroy_serverlist(glb_policy->serverlist);
   }
+  grpc_fake_resolver_response_generator_unref(glb_policy->response_generator);
+  if (glb_policy->pending_update_args != NULL) {
+    grpc_channel_args_destroy(exec_ctx, glb_policy->pending_update_args->args);
+    gpr_free(glb_policy->pending_update_args);
+  }
   gpr_free(glb_policy);
 }
 
@@ -1022,16 +1053,6 @@ static void glb_shutdown_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) {
   glb_lb_policy *glb_policy = (glb_lb_policy *)pol;
   glb_policy->shutting_down = true;
 
-  pending_pick *pp = glb_policy->pending_picks;
-  glb_policy->pending_picks = NULL;
-  pending_ping *pping = glb_policy->pending_pings;
-  glb_policy->pending_pings = NULL;
-  if (glb_policy->rr_policy) {
-    GRPC_LB_POLICY_UNREF(exec_ctx, glb_policy->rr_policy, "glb_shutdown");
-  }
-  grpc_connectivity_state_set(
-      exec_ctx, &glb_policy->state_tracker, GRPC_CHANNEL_SHUTDOWN,
-      GRPC_ERROR_CREATE_FROM_STATIC_STRING("Channel Shutdown"), "glb_shutdown");
   /* We need a copy of the lb_call pointer because we can't cancell the call
    * while holding glb_policy->mu: lb_on_server_status_received, invoked due to
    * the cancel, needs to acquire that same lock */
@@ -1045,6 +1066,30 @@ static void glb_shutdown_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) {
     grpc_call_cancel(lb_call, NULL);
     /* lb_on_server_status_received will pick up the cancel and clean up */
   }
+  if (glb_policy->retry_timer_active) {
+    grpc_timer_cancel(exec_ctx, &glb_policy->lb_call_retry_timer);
+    glb_policy->retry_timer_active = false;
+  }
+
+  pending_pick *pp = glb_policy->pending_picks;
+  glb_policy->pending_picks = NULL;
+  pending_ping *pping = glb_policy->pending_pings;
+  glb_policy->pending_pings = NULL;
+  if (glb_policy->rr_policy) {
+    GRPC_LB_POLICY_UNREF(exec_ctx, glb_policy->rr_policy, "glb_shutdown");
+  }
+  // We destroy the LB channel here because
+  // glb_lb_channel_on_connectivity_changed_cb needs a valid glb_policy
+  // instance.  Destroying the lb channel in glb_destroy would likely result in
+  // a callback invocation without a valid glb_policy arg.
+  if (glb_policy->lb_channel != NULL) {
+    grpc_channel_destroy(glb_policy->lb_channel);
+    glb_policy->lb_channel = NULL;
+  }
+  grpc_connectivity_state_set(
+      exec_ctx, &glb_policy->state_tracker, GRPC_CHANNEL_SHUTDOWN,
+      GRPC_ERROR_CREATE_FROM_STATIC_STRING("Channel Shutdown"), "glb_shutdown");
+
   while (pp != NULL) {
     pending_pick *next = pp->next;
     *pp->target = NULL;
@@ -1318,6 +1363,7 @@ static void lb_call_init_locked(grpc_exec_ctx *exec_ctx,
                                 glb_lb_policy *glb_policy) {
   GPR_ASSERT(glb_policy->server_name != NULL);
   GPR_ASSERT(glb_policy->server_name[0] != '\0');
+  GPR_ASSERT(glb_policy->lb_call == NULL);
   GPR_ASSERT(!glb_policy->shutting_down);
 
   /* Note the following LB call progresses every time there's activity in \a
@@ -1403,8 +1449,10 @@ static void query_for_backends_locked(grpc_exec_ctx *exec_ctx,
   lb_call_init_locked(exec_ctx, glb_policy);
 
   if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
-    gpr_log(GPR_INFO, "Query for backends (grpclb: %p, lb_call: %p)",
-            (void *)glb_policy, (void *)glb_policy->lb_call);
+    gpr_log(GPR_INFO,
+            "Query for backends (grpclb: %p, lb_channel: %p, lb_call: %p)",
+            (void *)glb_policy, (void *)glb_policy->lb_channel,
+            (void *)glb_policy->lb_call);
   }
   GPR_ASSERT(glb_policy->lb_call != NULL);
 
@@ -1608,8 +1656,8 @@ static void lb_on_response_received_locked(grpc_exec_ctx *exec_ctx, void *arg,
 static void lb_call_on_retry_timer_locked(grpc_exec_ctx *exec_ctx, void *arg,
                                           grpc_error *error) {
   glb_lb_policy *glb_policy = arg;
-
-  if (!glb_policy->shutting_down) {
+  glb_policy->retry_timer_active = false;
+  if (!glb_policy->shutting_down && error == GRPC_ERROR_NONE) {
     if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
       gpr_log(GPR_INFO, "Restaring call to LB server (grpclb %p)",
               (void *)glb_policy);
@@ -1617,31 +1665,32 @@ static void lb_call_on_retry_timer_locked(grpc_exec_ctx *exec_ctx, void *arg,
     GPR_ASSERT(glb_policy->lb_call == NULL);
     query_for_backends_locked(exec_ctx, glb_policy);
   }
-  GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &glb_policy->base,
-                            "grpclb_on_retry_timer");
+  GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &glb_policy->base, "grpclb_retry_timer");
 }
 
 static void lb_on_server_status_received_locked(grpc_exec_ctx *exec_ctx,
                                                 void *arg, grpc_error *error) {
   glb_lb_policy *glb_policy = arg;
-
   GPR_ASSERT(glb_policy->lb_call != NULL);
-
   if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
     char *status_details =
         grpc_slice_to_c_string(glb_policy->lb_call_status_details);
-    gpr_log(GPR_DEBUG,
+    gpr_log(GPR_INFO,
             "Status from LB server received. Status = %d, Details = '%s', "
-            "(call: %p)",
+            "(call: %p), error %p",
             glb_policy->lb_call_status, status_details,
-            (void *)glb_policy->lb_call);
+            (void *)glb_policy->lb_call, (void *)error);
     gpr_free(status_details);
   }
-
   /* We need to perform cleanups no matter what. */
   lb_call_destroy_locked(exec_ctx, glb_policy);
-
-  if (!glb_policy->shutting_down) {
+  if (glb_policy->started_picking && glb_policy->updating_lb_call) {
+    if (glb_policy->retry_timer_active) {
+      grpc_timer_cancel(exec_ctx, &glb_policy->lb_call_retry_timer);
+    }
+    if (!glb_policy->shutting_down) start_picking_locked(exec_ctx, glb_policy);
+    glb_policy->updating_lb_call = false;
+  } else if (!glb_policy->shutting_down) {
     /* if we aren't shutting down, restart the LB client call after some time */
     gpr_timespec now = gpr_now(GPR_CLOCK_MONOTONIC);
     gpr_timespec next_try =
@@ -1651,16 +1700,18 @@ static void lb_on_server_status_received_locked(grpc_exec_ctx *exec_ctx,
               (void *)glb_policy);
       gpr_timespec timeout = gpr_time_sub(next_try, now);
       if (gpr_time_cmp(timeout, gpr_time_0(timeout.clock_type)) > 0) {
-        gpr_log(GPR_DEBUG, "... retrying in %" PRId64 ".%09d seconds.",
+        gpr_log(GPR_DEBUG,
+                "... retry_timer_active in %" PRId64 ".%09d seconds.",
                 timeout.tv_sec, timeout.tv_nsec);
       } else {
-        gpr_log(GPR_DEBUG, "... retrying immediately.");
+        gpr_log(GPR_DEBUG, "... retry_timer_active immediately.");
       }
     }
     GRPC_LB_POLICY_WEAK_REF(&glb_policy->base, "grpclb_retry_timer");
     grpc_closure_init(&glb_policy->lb_on_call_retry,
                       lb_call_on_retry_timer_locked, glb_policy,
                       grpc_combiner_scheduler(glb_policy->base.combiner));
+    glb_policy->retry_timer_active = true;
     grpc_timer_init(exec_ctx, &glb_policy->lb_call_retry_timer, next_try,
                     &glb_policy->lb_on_call_retry, now);
   }
@@ -1668,6 +1719,138 @@ static void lb_on_server_status_received_locked(grpc_exec_ctx *exec_ctx,
                             "lb_on_server_status_received");
 }
 
+static void glb_update_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *policy,
+                              const grpc_lb_policy_args *args) {
+  glb_lb_policy *glb_policy = (glb_lb_policy *)policy;
+
+  if (glb_policy->updating_lb_channel) {
+    if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
+      gpr_log(GPR_INFO,
+              "Update already in progress for grpclb %p. Deferring update.",
+              (void *)glb_policy);
+    }
+    if (glb_policy->pending_update_args != NULL) {
+      grpc_channel_args_destroy(exec_ctx,
+                                glb_policy->pending_update_args->args);
+      gpr_free(glb_policy->pending_update_args);
+    }
+    glb_policy->pending_update_args =
+        gpr_zalloc(sizeof(*glb_policy->pending_update_args));
+    glb_policy->pending_update_args->client_channel_factory =
+        args->client_channel_factory;
+    glb_policy->pending_update_args->args = grpc_channel_args_copy(args->args);
+    glb_policy->pending_update_args->combiner = args->combiner;
+    return;
+  }
+
+  glb_policy->updating_lb_channel = true;
+  // Propagate update to lb_channel (pick first).
+  const grpc_arg *arg =
+      grpc_channel_args_find(args->args, GRPC_ARG_LB_ADDRESSES);
+  if (arg == NULL || arg->type != GRPC_ARG_POINTER) {
+    if (glb_policy->lb_channel == NULL) {
+      // If we don't have a current channel to the LB, go into TRANSIENT
+      // FAILURE.
+      grpc_connectivity_state_set(
+          exec_ctx, &glb_policy->state_tracker, GRPC_CHANNEL_TRANSIENT_FAILURE,
+          GRPC_ERROR_CREATE_FROM_STATIC_STRING("Missing update in args"),
+          "glb_update_missing");
+    } else {
+      // otherwise, keep using the current LB channel (ignore this update).
+      gpr_log(GPR_ERROR,
+              "No valid LB addresses channel arg for grpclb %p update, "
+              "ignoring.",
+              (void *)glb_policy);
+    }
+  }
+  const grpc_lb_addresses *addresses = arg->value.pointer.p;
+  GPR_ASSERT(glb_policy->lb_channel != NULL);
+  grpc_channel_args *lb_channel_args = build_lb_channel_args(
+      exec_ctx, addresses, glb_policy->response_generator, args->args);
+  /* Propagate updates to the LB channel through the fake resolver */
+  grpc_fake_resolver_response_generator_set_response(
+      exec_ctx, glb_policy->response_generator, lb_channel_args);
+  grpc_channel_args_destroy(exec_ctx, lb_channel_args);
+
+  if (!glb_policy->watching_lb_channel) {
+    // Watch the LB channel connectivity for connection.
+    glb_policy->lb_channel_connectivity = GRPC_CHANNEL_INIT;
+    grpc_channel_element *client_channel_elem = grpc_channel_stack_last_element(
+        grpc_channel_get_channel_stack(glb_policy->lb_channel));
+    GPR_ASSERT(client_channel_elem->filter == &grpc_client_channel_filter);
+    glb_policy->watching_lb_channel = true;
+    GRPC_LB_POLICY_WEAK_REF(&glb_policy->base, "watch_lb_channel_connectivity");
+    grpc_client_channel_watch_connectivity_state(
+        exec_ctx, client_channel_elem,
+        grpc_polling_entity_create_from_pollset_set(
+            glb_policy->base.interested_parties),
+        &glb_policy->lb_channel_connectivity,
+        &glb_policy->lb_channel_on_connectivity_changed, NULL);
+  }
+}
+
+// Invoked as part of the update process. It continues watching the LB channel
+// until it shuts down or becomes READY. It's invoked even if the LB channel
+// stayed READY throughout the update (for example if the update is identical).
+static void glb_lb_channel_on_connectivity_changed_cb(grpc_exec_ctx *exec_ctx,
+                                                      void *arg,
+                                                      grpc_error *error) {
+  glb_lb_policy *glb_policy = arg;
+  if (glb_policy->shutting_down) goto done;
+  // Re-initialize the lb_call. This should also take care of updating the
+  // embedded RR policy. Note that the current RR policy, if any, will stay in
+  // effect until an update from the new lb_call is received.
+  switch (glb_policy->lb_channel_connectivity) {
+    case GRPC_CHANNEL_INIT:
+    case GRPC_CHANNEL_CONNECTING:
+    case GRPC_CHANNEL_TRANSIENT_FAILURE: {
+      /* resub. */
+      grpc_channel_element *client_channel_elem =
+          grpc_channel_stack_last_element(
+              grpc_channel_get_channel_stack(glb_policy->lb_channel));
+      GPR_ASSERT(client_channel_elem->filter == &grpc_client_channel_filter);
+      grpc_client_channel_watch_connectivity_state(
+          exec_ctx, client_channel_elem,
+          grpc_polling_entity_create_from_pollset_set(
+              glb_policy->base.interested_parties),
+          &glb_policy->lb_channel_connectivity,
+          &glb_policy->lb_channel_on_connectivity_changed, NULL);
+      break;
+    }
+    case GRPC_CHANNEL_IDLE:
+      // lb channel inactive (probably shutdown prior to update). Restart lb
+      // call to kick the lb channel into gear.
+      GPR_ASSERT(glb_policy->lb_call == NULL);
+    /* fallthrough */
+    case GRPC_CHANNEL_READY:
+      if (glb_policy->lb_call != NULL) {
+        glb_policy->updating_lb_channel = false;
+        glb_policy->updating_lb_call = true;
+        grpc_call_cancel(glb_policy->lb_call, NULL);
+        // lb_on_server_status_received will pick up the cancel and reinit
+        // lb_call.
+        if (glb_policy->pending_update_args != NULL) {
+          const grpc_lb_policy_args *args = glb_policy->pending_update_args;
+          glb_policy->pending_update_args = NULL;
+          glb_update_locked(exec_ctx, &glb_policy->base, args);
+        }
+      } else if (glb_policy->started_picking && !glb_policy->shutting_down) {
+        if (glb_policy->retry_timer_active) {
+          grpc_timer_cancel(exec_ctx, &glb_policy->lb_call_retry_timer);
+          glb_policy->retry_timer_active = false;
+        }
+        start_picking_locked(exec_ctx, glb_policy);
+      }
+    /* fallthrough */
+    case GRPC_CHANNEL_SHUTDOWN:
+    done:
+      glb_policy->watching_lb_channel = false;
+      GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &glb_policy->base,
+                                "watch_lb_channel_connectivity_cb_shutdown");
+      break;
+  }
+}
+
 /* Code wiring the policy with the rest of the core */
 static const grpc_lb_policy_vtable glb_lb_policy_vtable = {
     glb_destroy,
@@ -1678,7 +1861,8 @@ static const grpc_lb_policy_vtable glb_lb_policy_vtable = {
     glb_ping_one_locked,
     glb_exit_idle_locked,
     glb_check_connectivity_locked,
-    glb_notify_on_state_change_locked};
+    glb_notify_on_state_change_locked,
+    glb_update_locked};
 
 static void glb_factory_ref(grpc_lb_policy_factory *factory) {}