1 files changed, 518 insertions, 459 deletions
diff --git a/src/core/ext/lb_policy/grpclb/grpclb.c b/src/core/ext/lb_policy/grpclb/grpclb.c
index ae1f2a3b4c..41e177c495 100644
--- a/src/core/ext/lb_policy/grpclb/grpclb.c
+++ b/src/core/ext/lb_policy/grpclb/grpclb.c
@@ -43,34 +43,27 @@
  * policy to select from this list of LB server backends.
  *
  * The first time the policy gets a request for a pick, a ping, or to exit the
- * idle state, \a query_for_backends() is called. It creates an instance of \a
- * lb_client_data, an internal struct meant to contain the data associated with
- * the internal communication with the LB server. This instance is created via
- * \a lb_client_data_create(). There, the call over lb_channel to pick-first
- * from {a1..an} is created, the \a LoadBalancingRequest message is assembled
- * and all necessary callbacks for the progress of the internal call configured.
+ * idle state, \a query_for_backends_locked() is called. This function sets up
+ * and initiates the internal communication with the LB server. In particular,
+ * it's responsible for instantiating the internal *streaming* call to the LB
+ * server (whichever address from {a1..an} pick-first chose). This call is
+ * serviced by two callbacks, \a lb_on_server_status_received and \a
+ * lb_on_response_received. The former will be called when the call to the LB
+ * server completes. This can happen if the LB server closes the connection or
+ * if this policy itself cancels the call (for example because it's shutting
+ * down). If the internal call times out, the usual behavior of pick-first
+ * applies, continuing to pick from the list {a1..an}.
  *
- * Back in \a query_for_backends(), the internal *streaming* call to the LB
- * server (whichever address from {a1..an} pick-first chose) is kicked off.
- * It'll progress over the callbacks configured in \a lb_client_data_create()
- * (see the field docstrings of \a lb_client_data for more details).
- *
- * If the call fails with UNIMPLEMENTED, the original call will also fail.
- * There's a misconfiguration somewhere: at least one of {a1..an} isn't a LB
- * server, which contradicts the LB bit being set. If the internal call times
- * out, the usual behavior of pick-first applies, continuing to pick from the
- * list {a1..an}.
- *
- * Upon sucesss, a \a LoadBalancingResponse is expected in \a res_recv_cb. An
- * invalid one results in the termination of the streaming call. A new streaming
- * call should be created if possible, failing the original call otherwise.
- * For a valid \a LoadBalancingResponse, the server list of actual backends is
- * extracted. A Round Robin policy will be created from this list. There are two
- * possible scenarios:
+ * Upon sucesss, the incoming \a LoadBalancingResponse is processed by \a
+ * res_recv. An invalid one results in the termination of the streaming call. A
+ * new streaming call should be created if possible, failing the original call
+ * otherwise. For a valid \a LoadBalancingResponse, the server list of actual
+ * backends is extracted. A Round Robin policy will be created from this list.
+ * There are two possible scenarios:
  *
  * 1. This is the first server list received. There was no previous instance of
- *    the Round Robin policy. \a rr_handover() will instantiate the RR policy
- *    and perform all the pending operations over it.
+ *    the Round Robin policy. \a rr_handover_locked() will instantiate the RR
+ *    policy and perform all the pending operations over it.
  * 2. There's already a RR policy instance active. We need to introduce the new
  *    one build from the new serverlist, but taking care not to disrupt the
  *    operations in progress over the old RR instance. This is done by
@@ -78,16 +71,16 @@
  *    references are held on the old RR policy, it'll be destroyed and \a
  *    glb_rr_connectivity_changed notified with a \a GRPC_CHANNEL_SHUTDOWN
  *    state. At this point we can transition to a new RR instance safely, which
- *    is done once again via \a rr_handover().
+ *    is done once again via \a rr_handover_locked().
  *
  *
  * Once a RR policy instance is in place (and getting updated as described),
  * calls to for a pick, a ping or a cancellation will be serviced right away by
  * forwarding them to the RR instance. Any time there's no RR policy available
- * (ie, right after the creation of the gRPCLB policy, if an empty serverlist
- * is received, etc), pick/ping requests are added to a list of pending
- * picks/pings to be flushed and serviced as part of \a rr_handover() the moment
- * the RR policy instance becomes available.
+ * (ie, right after the creation of the gRPCLB policy, if an empty serverlist is
+ * received, etc), pick/ping requests are added to a list of pending picks/pings
+ * to be flushed and serviced as part of \a rr_handover_locked() the moment the
+ * RR policy instance becomes available.
  *
  * \see https://github.com/grpc/grpc/blob/master/doc/load-balancing.md for the
  * high level design and details. */
@@ -96,6 +89,12 @@
  * - Implement LB service forwarding (point 2c. in the doc's diagram).
  */
 
+/* With the addition of a libuv endpoint, sockaddr.h now includes uv.h when
+   using that endpoint. Because of various transitive includes in uv.h,
+   including windows.h on Windows, uv.h must be included before other system
+   headers. Therefore, sockaddr.h must always be included first */
+#include "src/core/lib/iomgr/sockaddr.h"
+
 #include <errno.h>
 
 #include <string.h>
@@ -107,19 +106,28 @@
 #include <grpc/support/string_util.h>
 #include <grpc/support/time.h>
 
-#include "src/core/ext/client_config/client_channel_factory.h"
-#include "src/core/ext/client_config/lb_policy_factory.h"
-#include "src/core/ext/client_config/lb_policy_registry.h"
-#include "src/core/ext/client_config/parse_address.h"
+#include "src/core/ext/client_channel/client_channel_factory.h"
+#include "src/core/ext/client_channel/lb_policy_factory.h"
+#include "src/core/ext/client_channel/lb_policy_registry.h"
+#include "src/core/ext/client_channel/parse_address.h"
 #include "src/core/ext/lb_policy/grpclb/grpclb.h"
 #include "src/core/ext/lb_policy/grpclb/load_balancer_api.h"
+#include "src/core/lib/channel/channel_args.h"
 #include "src/core/lib/iomgr/sockaddr.h"
 #include "src/core/lib/iomgr/sockaddr_utils.h"
+#include "src/core/lib/iomgr/timer.h"
+#include "src/core/lib/slice/slice_string_helpers.h"
+#include "src/core/lib/support/backoff.h"
 #include "src/core/lib/support/string.h"
 #include "src/core/lib/surface/call.h"
 #include "src/core/lib/surface/channel.h"
 #include "src/core/lib/transport/static_metadata.h"
 
+#define BACKOFF_MULTIPLIER 1.6
+#define BACKOFF_JITTER 0.2
+#define BACKOFF_MIN_SECONDS 10
+#define BACKOFF_MAX_SECONDS 60
+
 int grpc_lb_glb_trace = 0;
 
 /* add lb_token of selected subchannel (address) to the call's initial
@@ -134,6 +142,9 @@ static void initial_metadata_add_lb_token(
 }
 
 typedef struct wrapped_rr_closure_arg {
+  /* the closure instance using this struct as argument */
+  grpc_closure wrapper_closure;
+
   /* the original closure. Usually a on_complete/notify cb for pick() and ping()
    * calls against the internal RR instance, respectively. */
   grpc_closure *wrapped_closure;
@@ -155,9 +166,8 @@ typedef struct wrapped_rr_closure_arg {
   /* The RR instance related to the closure */
   grpc_lb_policy *rr_policy;
 
-  /* when not NULL, represents a pending_{pick,ping} node to be freed upon
-   * closure execution */
-  void *owning_pending_node; /* to be freed if not NULL */
+  /* heap memory to be freed upon closure execution. */
+  void *free_when_done;
 } wrapped_rr_closure_arg;
 
 /* The \a on_complete closure passed as part of the pick requires keeping a
@@ -166,13 +176,12 @@ typedef struct wrapped_rr_closure_arg {
 static void wrapped_rr_closure(grpc_exec_ctx *exec_ctx, void *arg,
                                grpc_error *error) {
   wrapped_rr_closure_arg *wc_arg = arg;
-  if (wc_arg->rr_policy != NULL) {
-    if (grpc_lb_glb_trace) {
-      gpr_log(GPR_INFO, "Unreffing RR (0x%" PRIxPTR ")",
-              (intptr_t)wc_arg->rr_policy);
-    }
-    GRPC_LB_POLICY_UNREF(exec_ctx, wc_arg->rr_policy, "wrapped_rr_closure");
 
+  GPR_ASSERT(wc_arg->wrapped_closure != NULL);
+  grpc_exec_ctx_sched(exec_ctx, wc_arg->wrapped_closure, GRPC_ERROR_REF(error),
+                      NULL);
+
+  if (wc_arg->rr_policy != NULL) {
     /* if target is NULL, no pick has been made by the RR policy (eg, all
      * addresses failed to connect). There won't be any user_data/token
      * available */
@@ -181,12 +190,14 @@ static void wrapped_rr_closure(grpc_exec_ctx *exec_ctx, void *arg,
                                     wc_arg->lb_token_mdelem_storage,
                                     GRPC_MDELEM_REF(wc_arg->lb_token));
     }
+    if (grpc_lb_glb_trace) {
+      gpr_log(GPR_INFO, "Unreffing RR (0x%" PRIxPTR ")",
+              (intptr_t)wc_arg->rr_policy);
+    }
+    GRPC_LB_POLICY_UNREF(exec_ctx, wc_arg->rr_policy, "wrapped_rr_closure");
   }
-  GPR_ASSERT(wc_arg->wrapped_closure != NULL);
-
-  grpc_exec_ctx_sched(exec_ctx, wc_arg->wrapped_closure, GRPC_ERROR_REF(error),
-                      NULL);
-  gpr_free(wc_arg->owning_pending_node);
+  GPR_ASSERT(wc_arg->free_when_done != NULL);
+  gpr_free(wc_arg->free_when_done);
 }
 
 /* Linked list of pending pick requests. It stores all information needed to
@@ -207,10 +218,6 @@ typedef struct pending_pick {
    * upon error. */
   grpc_connected_subchannel **target;
 
-  /* a closure wrapping the original on_complete one to be invoked once the
-   * pick() has completed (regardless of success) */
-  grpc_closure wrapped_on_complete;
-
   /* args for wrapped_on_complete */
   wrapped_rr_closure_arg wrapped_on_complete_arg;
 } pending_pick;
@@ -230,8 +237,9 @@ static void add_pending_pick(pending_pick **root,
   pp->wrapped_on_complete_arg.initial_metadata = pick_args->initial_metadata;
   pp->wrapped_on_complete_arg.lb_token_mdelem_storage =
       pick_args->lb_token_mdelem_storage;
-  grpc_closure_init(&pp->wrapped_on_complete, wrapped_rr_closure,
-                    &pp->wrapped_on_complete_arg);
+  pp->wrapped_on_complete_arg.free_when_done = pp;
+  grpc_closure_init(&pp->wrapped_on_complete_arg.wrapper_closure,
+                    wrapped_rr_closure, &pp->wrapped_on_complete_arg);
   *root = pp;
 }
 
@@ -239,10 +247,6 @@ static void add_pending_pick(pending_pick **root,
 typedef struct pending_ping {
   struct pending_ping *next;
 
-  /* a closure wrapping the original on_complete one to be invoked once the
-   * ping() has completed (regardless of success) */
-  grpc_closure wrapped_notify;
-
   /* args for wrapped_notify */
   wrapped_rr_closure_arg wrapped_notify_arg;
 } pending_ping;
@@ -251,10 +255,11 @@ static void add_pending_ping(pending_ping **root, grpc_closure *notify) {
   pending_ping *pping = gpr_malloc(sizeof(*pping));
   memset(pping, 0, sizeof(pending_ping));
   memset(&pping->wrapped_notify_arg, 0, sizeof(wrapped_rr_closure_arg));
-  pping->next = *root;
-  grpc_closure_init(&pping->wrapped_notify, wrapped_rr_closure,
-                    &pping->wrapped_notify_arg);
   pping->wrapped_notify_arg.wrapped_closure = notify;
+  pping->wrapped_notify_arg.free_when_done = pping;
+  pping->next = *root;
+  grpc_closure_init(&pping->wrapped_notify_arg.wrapper_closure,
+                    wrapped_rr_closure, &pping->wrapped_notify_arg);
   *root = pping;
 }
 
@@ -262,7 +267,6 @@ static void add_pending_ping(pending_ping **root, grpc_closure *notify) {
  * glb_lb_policy
  */
 typedef struct rr_connectivity_data rr_connectivity_data;
-struct lb_client_data;
 static const grpc_lb_policy_vtable glb_lb_policy_vtable;
 typedef struct glb_lb_policy {
   /** base policy: must be first */
@@ -274,6 +278,7 @@ typedef struct glb_lb_policy {
   /** who the client is trying to communicate with */
   const char *server_name;
   grpc_client_channel_factory *cc_factory;
+  grpc_channel_args *args;
 
   /** deadline for the LB's call */
   gpr_timespec deadline;
@@ -293,27 +298,47 @@ typedef struct glb_lb_policy {
    * response has arrived. */
   grpc_grpclb_serverlist *serverlist;
 
-  /** addresses from \a serverlist */
-  grpc_lb_addresses *addresses;
-
   /** list of picks that are waiting on RR's policy connectivity */
   pending_pick *pending_picks;
 
   /** list of pings that are waiting on RR's policy connectivity */
   pending_ping *pending_pings;
 
-  /** client data associated with the LB server communication */
-  struct lb_client_data *lb_client;
+  bool shutting_down;
+
+  /************************************************************/
+  /*  client data associated with the LB server communication */
+  /************************************************************/
+  /* Status from the LB server has been received. This signals the end of the LB
+   * call. */
+  grpc_closure lb_on_server_status_received;
+
+  /* A response from the LB server has been received. Process it */
+  grpc_closure lb_on_response_received;
+
+  grpc_call *lb_call; /* streaming call to the LB server, */
+
+  grpc_metadata_array lb_initial_metadata_recv; /* initial MD from LB server */
+  grpc_metadata_array
+      lb_trailing_metadata_recv; /* trailing MD from LB server */
+
+  /* what's being sent to the LB server. Note that its value may vary if the LB
+   * server indicates a redirect. */
+  grpc_byte_buffer *lb_request_payload;
+
+  /* response the LB server, if any. Processed in lb_on_response_received() */
+  grpc_byte_buffer *lb_response_payload;
 
-  /** for tracking of the RR connectivity */
-  rr_connectivity_data *rr_connectivity;
+  /* call status code and details, set in lb_on_server_status_received() */
+  grpc_status_code lb_call_status;
+  char *lb_call_status_details;
+  size_t lb_call_status_details_capacity;
 
-  /* a wrapped (see \a wrapped_rr_closure) on-complete closure for readily
-   * available RR picks */
-  grpc_closure wrapped_on_complete;
+  /** LB call retry backoff state */
+  gpr_backoff lb_call_backoff_state;
 
-  /* arguments for the wrapped_on_complete closure */
-  wrapped_rr_closure_arg wc_arg;
+  /** LB call retry timer */
+  grpc_timer lb_call_retry_timer;
 } glb_lb_policy;
 
 /* Keeps track and reacts to changes in connectivity of the RR instance */
@@ -329,8 +354,8 @@ static bool is_server_valid(const grpc_grpclb_server *server, size_t idx,
   if (server->port >> 16 != 0) {
     if (log) {
       gpr_log(GPR_ERROR,
-              "Invalid port '%d' at index %zu of serverlist. Ignoring.",
-              server->port, idx);
+              "Invalid port '%d' at index %lu of serverlist. Ignoring.",
+              server->port, (unsigned long)idx);
     }
     return false;
   }
@@ -338,15 +363,52 @@ static bool is_server_valid(const grpc_grpclb_server *server, size_t idx,
   if (ip->size != 4 && ip->size != 16) {
     if (log) {
       gpr_log(GPR_ERROR,
-              "Expected IP to be 4 or 16 bytes, got %d at index %zu of "
+              "Expected IP to be 4 or 16 bytes, got %d at index %lu of "
               "serverlist. Ignoring",
-              ip->size, idx);
+              ip->size, (unsigned long)idx);
     }
     return false;
   }
   return true;
 }
 
+/* vtable for LB tokens in grpc_lb_addresses. */
+static void *lb_token_copy(void *token) {
+  return token == NULL ? NULL : GRPC_MDELEM_REF(token);
+}
+static void lb_token_destroy(void *token) {
+  if (token != NULL) GRPC_MDELEM_UNREF(token);
+}
+static int lb_token_cmp(void *token1, void *token2) {
+  if (token1 > token2) return 1;
+  if (token1 < token2) return -1;
+  return 0;
+}
+static const grpc_lb_user_data_vtable lb_token_vtable = {
+    lb_token_copy, lb_token_destroy, lb_token_cmp};
+
+static void parse_server(const grpc_grpclb_server *server,
+                         grpc_resolved_address *addr) {
+  const uint16_t netorder_port = htons((uint16_t)server->port);
+  /* the addresses are given in binary format (a in(6)_addr struct) in
+   * server->ip_address.bytes. */
+  const grpc_grpclb_ip_address *ip = &server->ip_address;
+  memset(addr, 0, sizeof(*addr));
+  if (ip->size == 4) {
+    addr->len = sizeof(struct sockaddr_in);
+    struct sockaddr_in *addr4 = (struct sockaddr_in *)&addr->addr;
+    addr4->sin_family = AF_INET;
+    memcpy(&addr4->sin_addr, ip->bytes, ip->size);
+    addr4->sin_port = netorder_port;
+  } else if (ip->size == 16) {
+    addr->len = sizeof(struct sockaddr_in6);
+    struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&addr->addr;
+    addr6->sin6_family = AF_INET6;
+    memcpy(&addr6->sin6_addr, ip->bytes, ip->size);
+    addr6->sin6_port = netorder_port;
+  }
+}
+
 /* Returns addresses extracted from \a serverlist. */
 static grpc_lb_addresses *process_serverlist(
     const grpc_grpclb_serverlist *serverlist) {
@@ -358,7 +420,8 @@ static grpc_lb_addresses *process_serverlist(
   }
   if (num_valid == 0) return NULL;
 
-  grpc_lb_addresses *lb_addresses = grpc_lb_addresses_create(num_valid);
+  grpc_lb_addresses *lb_addresses =
+      grpc_lb_addresses_create(num_valid, &lb_token_vtable);
 
   /* second pass: actually populate the addresses and LB tokens (aka user data
    * to the outside world) to be read by the RR policy during its creation.
@@ -372,41 +435,26 @@ static grpc_lb_addresses *process_serverlist(
     if (!is_server_valid(serverlist->servers[sl_idx], sl_idx, false)) continue;
 
     /* address processing */
-    const uint16_t netorder_port = htons((uint16_t)server->port);
-    /* the addresses are given in binary format (a in(6)_addr struct) in
-     * server->ip_address.bytes. */
-    const grpc_grpclb_ip_address *ip = &server->ip_address;
     grpc_resolved_address addr;
-    memset(&addr, 0, sizeof(addr));
-    if (ip->size == 4) {
-      addr.len = sizeof(struct sockaddr_in);
-      struct sockaddr_in *addr4 = (struct sockaddr_in *)&addr.addr;
-      addr4->sin_family = AF_INET;
-      memcpy(&addr4->sin_addr, ip->bytes, ip->size);
-      addr4->sin_port = netorder_port;
-    } else if (ip->size == 16) {
-      addr.len = sizeof(struct sockaddr_in6);
-      struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&addr.addr;
-      addr6->sin6_family = AF_INET;
-      memcpy(&addr6->sin6_addr, ip->bytes, ip->size);
-      addr6->sin6_port = netorder_port;
-    }
+    parse_server(server, &addr);
 
     /* lb token processing */
     void *user_data;
     if (server->has_load_balance_token) {
-      const size_t lb_token_size =
-          GPR_ARRAY_SIZE(server->load_balance_token) - 1;
+      const size_t lb_token_max_length =
+          GPR_ARRAY_SIZE(server->load_balance_token);
+      const size_t lb_token_length =
+          strnlen(server->load_balance_token, lb_token_max_length);
       grpc_mdstr *lb_token_mdstr = grpc_mdstr_from_buffer(
-          (uint8_t *)server->load_balance_token, lb_token_size);
-      user_data = grpc_mdelem_from_metadata_strings(
-          GRPC_MDSTR_LOAD_REPORTING_INITIAL, lb_token_mdstr);
+          (uint8_t *)server->load_balance_token, lb_token_length);
+      user_data = grpc_mdelem_from_metadata_strings(GRPC_MDSTR_LB_TOKEN,
+                                                    lb_token_mdstr);
     } else {
       gpr_log(GPR_ERROR,
               "Missing LB token for backend address '%s'. The empty token will "
               "be used instead",
-              grpc_sockaddr_to_uri((struct sockaddr *)&addr.addr));
-      user_data = GRPC_MDELEM_LOAD_REPORTING_INITIAL_EMPTY;
+              grpc_sockaddr_to_uri(&addr));
+      user_data = GRPC_MDELEM_LB_TOKEN_EMPTY;
     }
 
     grpc_lb_addresses_set_address(lb_addresses, addr_idx, &addr.addr, addr.len,
@@ -415,57 +463,111 @@ static grpc_lb_addresses *process_serverlist(
     ++addr_idx;
   }
   GPR_ASSERT(addr_idx == num_valid);
-
   return lb_addresses;
 }
 
-/* A plugin for grpc_lb_addresses_destroy that unrefs the LB token metadata. */
-static void lb_token_destroy(void *token) {
-  if (token != NULL) GRPC_MDELEM_UNREF(token);
+/* perform a pick over \a rr_policy. Given that a pick can return immediately
+ * (ignoring its completion callback) we need to perform the cleanups this
+ * callback would be otherwise resposible for */
+static bool pick_from_internal_rr_locked(
+    grpc_exec_ctx *exec_ctx, grpc_lb_policy *rr_policy,
+    const grpc_lb_policy_pick_args *pick_args,
+    grpc_connected_subchannel **target, wrapped_rr_closure_arg *wc_arg) {
+  GPR_ASSERT(rr_policy != NULL);
+  const bool pick_done =
+      grpc_lb_policy_pick(exec_ctx, rr_policy, pick_args, target,
+                          (void **)&wc_arg->lb_token, &wc_arg->wrapper_closure);
+  if (pick_done) {
+    /* synchronous grpc_lb_policy_pick call. Unref the RR policy. */
+    if (grpc_lb_glb_trace) {
+      gpr_log(GPR_INFO, "Unreffing RR (0x%" PRIxPTR ")",
+              (intptr_t)wc_arg->rr_policy);
+    }
+    GRPC_LB_POLICY_UNREF(exec_ctx, wc_arg->rr_policy, "glb_pick_sync");
+
+    /* add the load reporting initial metadata */
+    initial_metadata_add_lb_token(pick_args->initial_metadata,
+                                  pick_args->lb_token_mdelem_storage,
+                                  GRPC_MDELEM_REF(wc_arg->lb_token));
+
+    gpr_free(wc_arg);
+  }
+  /* else, the pending pick will be registered and taken care of by the
+   * pending pick list inside the RR policy (glb_policy->rr_policy).
+   * Eventually, wrapped_on_complete will be called, which will -among other
+   * things- add the LB token to the call's initial metadata */
+  return pick_done;
 }
 
-static grpc_lb_policy *create_rr(grpc_exec_ctx *exec_ctx,
-                                 const grpc_grpclb_serverlist *serverlist,
-                                 glb_lb_policy *glb_policy) {
+static grpc_lb_policy *create_rr_locked(
+    grpc_exec_ctx *exec_ctx, const grpc_grpclb_serverlist *serverlist,
+    glb_lb_policy *glb_policy) {
   GPR_ASSERT(serverlist != NULL && serverlist->num_servers > 0);
 
   grpc_lb_policy_args args;
   memset(&args, 0, sizeof(args));
-  args.server_name = glb_policy->server_name;
   args.client_channel_factory = glb_policy->cc_factory;
-  args.addresses = process_serverlist(serverlist);
-
-  grpc_lb_policy *rr = grpc_lb_policy_create(exec_ctx, "round_robin", &args);
+  grpc_lb_addresses *addresses = process_serverlist(serverlist);
 
-  if (glb_policy->addresses != NULL) {
-    /* dispose of the previous version */
-    grpc_lb_addresses_destroy(glb_policy->addresses, lb_token_destroy);
-  }
-  glb_policy->addresses = args.addresses;
+  // Replace the LB addresses in the channel args that we pass down to
+  // the subchannel.
+  static const char *keys_to_remove[] = {GRPC_ARG_LB_ADDRESSES};
+  const grpc_arg arg = grpc_lb_addresses_create_channel_arg(addresses);
+  args.args = grpc_channel_args_copy_and_add_and_remove(
+      glb_policy->args, keys_to_remove, GPR_ARRAY_SIZE(keys_to_remove), &arg,
+      1);
 
+  grpc_lb_policy *rr = grpc_lb_policy_create(exec_ctx, "round_robin", &args);
+  GPR_ASSERT(rr != NULL);
+  grpc_lb_addresses_destroy(addresses);
+  grpc_channel_args_destroy(args.args);
   return rr;
 }
 
-static void rr_handover(grpc_exec_ctx *exec_ctx, glb_lb_policy *glb_policy,
-                        grpc_error *error) {
+static void glb_rr_connectivity_changed(grpc_exec_ctx *exec_ctx, void *arg,
+                                        grpc_error *error);
+/* glb_policy->rr_policy may be NULL (initial handover) */
+static void rr_handover_locked(grpc_exec_ctx *exec_ctx,
+                               glb_lb_policy *glb_policy, grpc_error *error) {
   GPR_ASSERT(glb_policy->serverlist != NULL &&
              glb_policy->serverlist->num_servers > 0);
-  glb_policy->rr_policy =
-      create_rr(exec_ctx, glb_policy->serverlist, glb_policy);
 
   if (grpc_lb_glb_trace) {
-    gpr_log(GPR_INFO, "Created RR policy (0x%" PRIxPTR ")",
-            (intptr_t)glb_policy->rr_policy);
+    gpr_log(GPR_INFO, "RR handover. Old RR: %p", (void *)glb_policy->rr_policy);
+  }
+  if (glb_policy->rr_policy != NULL) {
+    /* if we are phasing out an existing RR instance, unref it. */
+    GRPC_LB_POLICY_UNREF(exec_ctx, glb_policy->rr_policy, "rr_handover");
   }
+
+  glb_policy->rr_policy =
+      create_rr_locked(exec_ctx, glb_policy->serverlist, glb_policy);
+  if (grpc_lb_glb_trace) {
+    gpr_log(GPR_INFO, "Created RR policy (%p)", (void *)glb_policy->rr_policy);
+  }
+
   GPR_ASSERT(glb_policy->rr_policy != NULL);
-  glb_policy->rr_connectivity->state = grpc_lb_policy_check_connectivity(
+  grpc_pollset_set_add_pollset_set(exec_ctx,
+                                   glb_policy->rr_policy->interested_parties,
+                                   glb_policy->base.interested_parties);
+
+  rr_connectivity_data *rr_connectivity =
+      gpr_malloc(sizeof(rr_connectivity_data));
+  memset(rr_connectivity, 0, sizeof(rr_connectivity_data));
+  grpc_closure_init(&rr_connectivity->on_change, glb_rr_connectivity_changed,
+                    rr_connectivity);
+  rr_connectivity->glb_policy = glb_policy;
+  rr_connectivity->state = grpc_lb_policy_check_connectivity(
       exec_ctx, glb_policy->rr_policy, &error);
-  grpc_lb_policy_notify_on_state_change(
-      exec_ctx, glb_policy->rr_policy, &glb_policy->rr_connectivity->state,
-      &glb_policy->rr_connectivity->on_change);
+
   grpc_connectivity_state_set(exec_ctx, &glb_policy->state_tracker,
-                              glb_policy->rr_connectivity->state,
-                              GRPC_ERROR_REF(error), "rr_handover");
+                              rr_connectivity->state, GRPC_ERROR_REF(error),
+                              "rr_handover");
+  /* subscribe */
+  GRPC_LB_POLICY_WEAK_REF(&glb_policy->base, "rr_connectivity_cb");
+  grpc_lb_policy_notify_on_state_change(exec_ctx, glb_policy->rr_policy,
+                                        &rr_connectivity->state,
+                                        &rr_connectivity->on_change);
   grpc_lb_policy_exit_idle(exec_ctx, glb_policy->rr_policy);
 
   /* flush pending ops */
@@ -478,11 +580,9 @@ static void rr_handover(grpc_exec_ctx *exec_ctx, glb_lb_policy *glb_policy,
       gpr_log(GPR_INFO, "Pending pick about to PICK from 0x%" PRIxPTR "",
               (intptr_t)glb_policy->rr_policy);
     }
-    grpc_lb_policy_pick(exec_ctx, glb_policy->rr_policy, &pp->pick_args,
-                        pp->target,
-                        (void **)&pp->wrapped_on_complete_arg.lb_token,
-                        &pp->wrapped_on_complete);
-    pp->wrapped_on_complete_arg.owning_pending_node = pp;
+    pick_from_internal_rr_locked(exec_ctx, glb_policy->rr_policy,
+                                 &pp->pick_args, pp->target,
+                                 &pp->wrapped_on_complete_arg);
   }
 
   pending_ping *pping;
@@ -495,44 +595,45 @@ static void rr_handover(grpc_exec_ctx *exec_ctx, glb_lb_policy *glb_policy,
               (intptr_t)glb_policy->rr_policy);
     }
     grpc_lb_policy_ping_one(exec_ctx, glb_policy->rr_policy,
-                            &pping->wrapped_notify);
-    pping->wrapped_notify_arg.owning_pending_node = pping;
+                            &pping->wrapped_notify_arg.wrapper_closure);
   }
 }
 
 static void glb_rr_connectivity_changed(grpc_exec_ctx *exec_ctx, void *arg,
                                         grpc_error *error) {
+  /* If shutdown or error free the arg. Rely on the rest of the code to set the
+   * right grpclb status. */
   rr_connectivity_data *rr_conn_data = arg;
   glb_lb_policy *glb_policy = rr_conn_data->glb_policy;
 
-  if (rr_conn_data->state == GRPC_CHANNEL_SHUTDOWN) {
-    if (glb_policy->serverlist != NULL) {
-      /* a RR policy is shutting down but there's a serverlist available ->
-       * perform a handover */
-      rr_handover(exec_ctx, glb_policy, error);
-    } else {
-      /* shutting down and no new serverlist available. Bail out. */
-      gpr_free(rr_conn_data);
-    }
+  if (rr_conn_data->state != GRPC_CHANNEL_SHUTDOWN &&
+      !glb_policy->shutting_down) {
+    gpr_mu_lock(&glb_policy->mu);
+    /* RR not shutting down. Mimic the RR's policy state */
+    grpc_connectivity_state_set(exec_ctx, &glb_policy->state_tracker,
+                                rr_conn_data->state, GRPC_ERROR_REF(error),
+                                "rr_connectivity_cb");
+    /* resubscribe. Reuse the "rr_connectivity_cb" weak ref. */
+    grpc_lb_policy_notify_on_state_change(exec_ctx, glb_policy->rr_policy,
+                                          &rr_conn_data->state,
+                                          &rr_conn_data->on_change);
+    gpr_mu_unlock(&glb_policy->mu);
   } else {
-    if (error == GRPC_ERROR_NONE) {
-      /* RR not shutting down. Mimic the RR's policy state */
-      grpc_connectivity_state_set(exec_ctx, &glb_policy->state_tracker,
-                                  rr_conn_data->state, GRPC_ERROR_REF(error),
-                                  "glb_rr_connectivity_changed");
-      /* resubscribe */
-      grpc_lb_policy_notify_on_state_change(exec_ctx, glb_policy->rr_policy,
-                                            &rr_conn_data->state,
-                                            &rr_conn_data->on_change);
-    } else { /* error */
-      gpr_free(rr_conn_data);
-    }
+    GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &glb_policy->base,
+                              "rr_connectivity_cb");
+    gpr_free(rr_conn_data);
   }
 }
 
 static grpc_lb_policy *glb_create(grpc_exec_ctx *exec_ctx,
                                   grpc_lb_policy_factory *factory,
                                   grpc_lb_policy_args *args) {
+  /* Get server name. */
+  const grpc_arg *arg =
+      grpc_channel_args_find(args->args, GRPC_ARG_SERVER_NAME);
+  const char *server_name =
+      arg != NULL && arg->type == GRPC_ARG_STRING ? arg->value.string : NULL;
+
   /* Count the number of gRPC-LB addresses. There must be at least one.
    * TODO(roth): For now, we ignore non-balancer addresses, but in the
    * future, we may change the behavior such that we fall back to using
@@ -540,23 +641,27 @@ static grpc_lb_policy *glb_create(grpc_exec_ctx *exec_ctx,
    * time, this should be changed to allow a list with no balancer addresses,
    * since the resolver might fail to return a balancer address even when
    * this is the right LB policy to use. */
+  arg = grpc_channel_args_find(args->args, GRPC_ARG_LB_ADDRESSES);
+  GPR_ASSERT(arg != NULL && arg->type == GRPC_ARG_POINTER);
+  grpc_lb_addresses *addresses = arg->value.pointer.p;
   size_t num_grpclb_addrs = 0;
-  for (size_t i = 0; i < args->addresses->num_addresses; ++i) {
-    if (args->addresses->addresses[i].is_balancer) ++num_grpclb_addrs;
+  for (size_t i = 0; i < addresses->num_addresses; ++i) {
+    if (addresses->addresses[i].is_balancer) ++num_grpclb_addrs;
   }
   if (num_grpclb_addrs == 0) return NULL;
 
   glb_lb_policy *glb_policy = gpr_malloc(sizeof(*glb_policy));
   memset(glb_policy, 0, sizeof(*glb_policy));
 
-  /* All input addresses in args->addresses come from a resolver that claims
+  /* All input addresses in addresses come from a resolver that claims
    * they are LB services. It's the resolver's responsibility to make sure
    * this
    * policy is only instantiated and used in that case.
    *
    * Create a client channel over them to communicate with a LB service */
-  glb_policy->server_name = gpr_strdup(args->server_name);
+  glb_policy->server_name = gpr_strdup(server_name);
   glb_policy->cc_factory = args->client_channel_factory;
+  glb_policy->args = grpc_channel_args_copy(args->args);
   GPR_ASSERT(glb_policy->cc_factory != NULL);
 
   /* construct a target from the addresses in args, given in the form
@@ -564,22 +669,19 @@ static grpc_lb_policy *glb_create(grpc_exec_ctx *exec_ctx,
    * TODO(dgq): support mixed ip version */
   char **addr_strs = gpr_malloc(sizeof(char *) * num_grpclb_addrs);
   size_t addr_index = 0;
-  for (size_t i = 0; i < args->addresses->num_addresses; i++) {
-    if (args->addresses->addresses[i].user_data != NULL) {
+  for (size_t i = 0; i < addresses->num_addresses; i++) {
+    if (addresses->addresses[i].user_data != NULL) {
       gpr_log(GPR_ERROR,
               "This LB policy doesn't support user data. It will be ignored");
     }
-    if (args->addresses->addresses[i].is_balancer) {
+    if (addresses->addresses[i].is_balancer) {
       if (addr_index == 0) {
-        addr_strs[addr_index++] = grpc_sockaddr_to_uri(
-            (const struct sockaddr *)&args->addresses->addresses[i]
-                .address.addr);
+        addr_strs[addr_index++] =
+            grpc_sockaddr_to_uri(&addresses->addresses[i].address);
       } else {
-        GPR_ASSERT(grpc_sockaddr_to_string(
-                       &addr_strs[addr_index++],
-                       (const struct sockaddr *)&args->addresses->addresses[i]
-                           .address.addr,
-                       true) > 0);
+        GPR_ASSERT(grpc_sockaddr_to_string(&addr_strs[addr_index++],
+                                           &addresses->addresses[i].address,
+                                           true) > 0);
       }
     }
   }
@@ -587,10 +689,29 @@ static grpc_lb_policy *glb_create(grpc_exec_ctx *exec_ctx,
   char *target_uri_str = gpr_strjoin_sep((const char **)addr_strs,
                                          num_grpclb_addrs, ",", &uri_path_len);
 
-  /* will pick using pick_first */
+  /* Create a channel to talk to the LBs.
+   *
+   * We strip out the channel arg for the LB policy name, since we want
+   * to use the default (pick_first) in this case.
+   *
+   * We also strip out the channel arg for the resolved addresses, since
+   * that will be generated by the name resolver used in the LB channel.
+   * Note that the LB channel will use the sockaddr resolver, so this
+   * won't actually generate a query to DNS (or some other name service).
+   * However, the addresses returned by the sockaddr resolver will have
+   * is_balancer=false, whereas our own addresses have is_balancer=true.
+   * We need the LB channel to return addresses with is_balancer=false
+   * so that it does not wind up recursively using the grpclb LB policy,
+   * as per the special case logic in client_channel.c.
+   */
+  static const char *keys_to_remove[] = {GRPC_ARG_LB_POLICY_NAME,
+                                         GRPC_ARG_LB_ADDRESSES};
+  grpc_channel_args *new_args = grpc_channel_args_copy_and_remove(
+      args->args, keys_to_remove, GPR_ARRAY_SIZE(keys_to_remove));
   glb_policy->lb_channel = grpc_client_channel_factory_create_channel(
       exec_ctx, glb_policy->cc_factory, target_uri_str,
-      GRPC_CLIENT_CHANNEL_TYPE_LOAD_BALANCING, NULL);
+      GRPC_CLIENT_CHANNEL_TYPE_LOAD_BALANCING, new_args);
+  grpc_channel_args_destroy(new_args);
 
   gpr_free(target_uri_str);
   for (size_t i = 0; i < num_grpclb_addrs; i++) {
@@ -603,18 +724,11 @@ static grpc_lb_policy *glb_create(grpc_exec_ctx *exec_ctx,
     return NULL;
   }
 
-  rr_connectivity_data *rr_connectivity =
-      gpr_malloc(sizeof(rr_connectivity_data));
-  memset(rr_connectivity, 0, sizeof(rr_connectivity_data));
-  grpc_closure_init(&rr_connectivity->on_change, glb_rr_connectivity_changed,
-                    rr_connectivity);
-  rr_connectivity->glb_policy = glb_policy;
-  glb_policy->rr_connectivity = rr_connectivity;
-
   grpc_lb_policy_init(&glb_policy->base, &glb_lb_policy_vtable);
   gpr_mu_init(&glb_policy->mu);
   grpc_connectivity_state_init(&glb_policy->state_tracker, GRPC_CHANNEL_IDLE,
                                "grpclb");
+
   return &glb_policy->base;
 }
 
@@ -623,6 +737,7 @@ static void glb_destroy(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) {
   GPR_ASSERT(glb_policy->pending_picks == NULL);
   GPR_ASSERT(glb_policy->pending_pings == NULL);
   gpr_free((void *)glb_policy->server_name);
+  grpc_channel_args_destroy(glb_policy->args);
   grpc_channel_destroy(glb_policy->lb_channel);
   glb_policy->lb_channel = NULL;
   grpc_connectivity_state_destroy(exec_ctx, &glb_policy->state_tracker);
@@ -630,50 +745,46 @@ static void glb_destroy(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) {
     grpc_grpclb_destroy_serverlist(glb_policy->serverlist);
   }
   gpr_mu_destroy(&glb_policy->mu);
-  grpc_lb_addresses_destroy(glb_policy->addresses, lb_token_destroy);
   gpr_free(glb_policy);
 }
 
-static void lb_client_data_destroy(struct lb_client_data *lb_client);
 static void glb_shutdown(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) {
   glb_lb_policy *glb_policy = (glb_lb_policy *)pol;
   gpr_mu_lock(&glb_policy->mu);
+  glb_policy->shutting_down = true;
 
   pending_pick *pp = glb_policy->pending_picks;
   glb_policy->pending_picks = NULL;
   pending_ping *pping = glb_policy->pending_pings;
   glb_policy->pending_pings = NULL;
+  if (glb_policy->rr_policy) {
+    GRPC_LB_POLICY_UNREF(exec_ctx, glb_policy->rr_policy, "glb_shutdown");
+  }
+  if (glb_policy->started_picking) {
+    if (glb_policy->lb_call != NULL) {
+      grpc_call_cancel(glb_policy->lb_call, NULL);
+      /* lb_on_server_status_received will pick up the cancel and clean up */
+    }
+  }
+  grpc_connectivity_state_set(
+      exec_ctx, &glb_policy->state_tracker, GRPC_CHANNEL_SHUTDOWN,
+      GRPC_ERROR_CREATE("Channel Shutdown"), "glb_shutdown");
   gpr_mu_unlock(&glb_policy->mu);
 
   while (pp != NULL) {
     pending_pick *next = pp->next;
     *pp->target = NULL;
-    grpc_exec_ctx_sched(exec_ctx, &pp->wrapped_on_complete, GRPC_ERROR_NONE,
-                        NULL);
+    grpc_exec_ctx_sched(exec_ctx, &pp->wrapped_on_complete_arg.wrapper_closure,
+                        GRPC_ERROR_NONE, NULL);
     pp = next;
   }
 
   while (pping != NULL) {
     pending_ping *next = pping->next;
-    grpc_exec_ctx_sched(exec_ctx, &pping->wrapped_notify, GRPC_ERROR_NONE,
-                        NULL);
+    grpc_exec_ctx_sched(exec_ctx, &pping->wrapped_notify_arg.wrapper_closure,
+                        GRPC_ERROR_NONE, NULL);
     pping = next;
   }
-
-  if (glb_policy->rr_policy) {
-    /* unsubscribe */
-    grpc_lb_policy_notify_on_state_change(
-        exec_ctx, glb_policy->rr_policy, NULL,
-        &glb_policy->rr_connectivity->on_change);
-    GRPC_LB_POLICY_UNREF(exec_ctx, glb_policy->rr_policy, "glb_shutdown");
-  }
-
-  lb_client_data_destroy(glb_policy->lb_client);
-  glb_policy->lb_client = NULL;
-
-  grpc_connectivity_state_set(
-      exec_ctx, &glb_policy->state_tracker, GRPC_CHANNEL_SHUTDOWN,
-      GRPC_ERROR_CREATE("Channel Shutdown"), "glb_shutdown");
 }
 
 static void glb_cancel_pick(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
@@ -686,11 +797,9 @@ static void glb_cancel_pick(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
   while (pp != NULL) {
     pending_pick *next = pp->next;
     if (pp->target == target) {
-      grpc_polling_entity_del_from_pollset_set(
-          exec_ctx, pp->pick_args.pollent, glb_policy->base.interested_parties);
       *target = NULL;
       grpc_exec_ctx_sched(
-          exec_ctx, &pp->wrapped_on_complete,
+          exec_ctx, &pp->wrapped_on_complete_arg.wrapper_closure,
           GRPC_ERROR_CREATE_REFERENCING("Pick Cancelled", &error, 1), NULL);
     } else {
       pp->next = glb_policy->pending_picks;
@@ -702,27 +811,20 @@ static void glb_cancel_pick(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
   GRPC_ERROR_UNREF(error);
 }
 
-static grpc_call *lb_client_data_get_call(struct lb_client_data *lb_client);
 static void glb_cancel_picks(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
                              uint32_t initial_metadata_flags_mask,
                              uint32_t initial_metadata_flags_eq,
                              grpc_error *error) {
   glb_lb_policy *glb_policy = (glb_lb_policy *)pol;
   gpr_mu_lock(&glb_policy->mu);
-  if (glb_policy->lb_client != NULL) {
-    /* cancel the call to the load balancer service, if any */
-    grpc_call_cancel(lb_client_data_get_call(glb_policy->lb_client), NULL);
-  }
   pending_pick *pp = glb_policy->pending_picks;
   glb_policy->pending_picks = NULL;
   while (pp != NULL) {
     pending_pick *next = pp->next;
     if ((pp->pick_args.initial_metadata_flags & initial_metadata_flags_mask) ==
         initial_metadata_flags_eq) {
-      grpc_polling_entity_del_from_pollset_set(
-          exec_ctx, pp->pick_args.pollent, glb_policy->base.interested_parties);
       grpc_exec_ctx_sched(
-          exec_ctx, &pp->wrapped_on_complete,
+          exec_ctx, &pp->wrapped_on_complete_arg.wrapper_closure,
           GRPC_ERROR_CREATE_REFERENCING("Pick Cancelled", &error, 1), NULL);
     } else {
       pp->next = glb_policy->pending_picks;
@@ -734,18 +836,20 @@ static void glb_cancel_picks(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
   GRPC_ERROR_UNREF(error);
 }
 
-static void query_for_backends(grpc_exec_ctx *exec_ctx,
-                               glb_lb_policy *glb_policy);
-static void start_picking(grpc_exec_ctx *exec_ctx, glb_lb_policy *glb_policy) {
+static void query_for_backends_locked(grpc_exec_ctx *exec_ctx,
+                                      glb_lb_policy *glb_policy);
+static void start_picking_locked(grpc_exec_ctx *exec_ctx,
+                                 glb_lb_policy *glb_policy) {
   glb_policy->started_picking = true;
-  query_for_backends(exec_ctx, glb_policy);
+  gpr_backoff_reset(&glb_policy->lb_call_backoff_state);
+  query_for_backends_locked(exec_ctx, glb_policy);
 }
 
 static void glb_exit_idle(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) {
   glb_lb_policy *glb_policy = (glb_lb_policy *)pol;
   gpr_mu_lock(&glb_policy->mu);
   if (!glb_policy->started_picking) {
-    start_picking(exec_ctx, glb_policy);
+    start_picking_locked(exec_ctx, glb_policy);
   }
   gpr_mu_unlock(&glb_policy->mu);
 }
@@ -771,48 +875,35 @@ static int glb_pick(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
 
   if (glb_policy->rr_policy != NULL) {
     if (grpc_lb_glb_trace) {
-      gpr_log(GPR_INFO, "about to PICK from 0x%" PRIxPTR "",
-              (intptr_t)glb_policy->rr_policy);
+      gpr_log(GPR_INFO, "grpclb %p about to PICK from RR %p",
+              (void *)glb_policy, (void *)glb_policy->rr_policy);
     }
     GRPC_LB_POLICY_REF(glb_policy->rr_policy, "glb_pick");
-    memset(&glb_policy->wc_arg, 0, sizeof(wrapped_rr_closure_arg));
-    glb_policy->wc_arg.rr_policy = glb_policy->rr_policy;
-    glb_policy->wc_arg.target = target;
-    glb_policy->wc_arg.wrapped_closure = on_complete;
-    glb_policy->wc_arg.lb_token_mdelem_storage =
-        pick_args->lb_token_mdelem_storage;
-    glb_policy->wc_arg.initial_metadata = pick_args->initial_metadata;
-    glb_policy->wc_arg.owning_pending_node = NULL;
-    grpc_closure_init(&glb_policy->wrapped_on_complete, wrapped_rr_closure,
-                      &glb_policy->wc_arg);
-
-    pick_done =
-        grpc_lb_policy_pick(exec_ctx, glb_policy->rr_policy, pick_args, target,
-                            (void **)&glb_policy->wc_arg.lb_token,
-                            &glb_policy->wrapped_on_complete);
-    if (pick_done) {
-      /* synchronous grpc_lb_policy_pick call. Unref the RR policy. */
-      if (grpc_lb_glb_trace) {
-        gpr_log(GPR_INFO, "Unreffing RR (0x%" PRIxPTR ")",
-                (intptr_t)glb_policy->wc_arg.rr_policy);
-      }
-      GRPC_LB_POLICY_UNREF(exec_ctx, glb_policy->wc_arg.rr_policy, "glb_pick");
 
-      /* add the load reporting initial metadata */
-      initial_metadata_add_lb_token(
-          pick_args->initial_metadata, pick_args->lb_token_mdelem_storage,
-          GRPC_MDELEM_REF(glb_policy->wc_arg.lb_token));
-    }
+    wrapped_rr_closure_arg *wc_arg = gpr_malloc(sizeof(wrapped_rr_closure_arg));
+    memset(wc_arg, 0, sizeof(wrapped_rr_closure_arg));
+
+    grpc_closure_init(&wc_arg->wrapper_closure, wrapped_rr_closure, wc_arg);
+    wc_arg->rr_policy = glb_policy->rr_policy;
+    wc_arg->target = target;
+    wc_arg->wrapped_closure = on_complete;
+    wc_arg->lb_token_mdelem_storage = pick_args->lb_token_mdelem_storage;
+    wc_arg->initial_metadata = pick_args->initial_metadata;
+    wc_arg->free_when_done = wc_arg;
+    pick_done = pick_from_internal_rr_locked(exec_ctx, glb_policy->rr_policy,
+                                             pick_args, target, wc_arg);
   } else {
-    /* else, the pending pick will be registered and taken care of by the
-     * pending pick list inside the RR policy (glb_policy->rr_policy) */
-    grpc_polling_entity_add_to_pollset_set(exec_ctx, pick_args->pollent,
-                                           glb_policy->base.interested_parties);
+    if (grpc_lb_glb_trace) {
+      gpr_log(GPR_DEBUG,
+              "No RR policy in grpclb instance %p. Adding to grpclb's pending "
+              "picks",
+              (void *)(glb_policy));
+    }
     add_pending_pick(&glb_policy->pending_picks, pick_args, target,
                      on_complete);
 
     if (!glb_policy->started_picking) {
-      start_picking(exec_ctx, glb_policy);
+      start_picking_locked(exec_ctx, glb_policy);
     }
     pick_done = false;
   }
@@ -841,7 +932,7 @@ static void glb_ping_one(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
   } else {
     add_pending_ping(&glb_policy->pending_pings, closure);
     if (!glb_policy->started_picking) {
-      start_picking(exec_ctx, glb_policy);
+      start_picking_locked(exec_ctx, glb_policy);
     }
   }
   gpr_mu_unlock(&glb_policy->mu);
@@ -859,248 +950,182 @@ static void glb_notify_on_state_change(grpc_exec_ctx *exec_ctx,
   gpr_mu_unlock(&glb_policy->mu);
 }
 
-/*
- * lb_client_data
- *
- * Used internally for the client call to the LB */
-typedef struct lb_client_data {
-  gpr_mu mu;
-
-  /* called once initial metadata's been sent */
-  grpc_closure md_sent;
-
-  /* called once the LoadBalanceRequest has been sent to the LB server. See
-   * src/proto/grpc/.../load_balancer.proto */
-  grpc_closure req_sent;
-
-  /* A response from the LB server has been received (or error). Process it */
-  grpc_closure res_rcvd;
-
-  /* After the client has sent a close to the LB server */
-  grpc_closure close_sent;
-
-  /* ... and the status from the LB server has been received */
-  grpc_closure srv_status_rcvd;
-
-  grpc_call *lb_call;    /* streaming call to the LB server, */
-  gpr_timespec deadline; /* for the streaming call to the LB server */
-
-  grpc_metadata_array initial_metadata_recv;  /* initial MD from LB server */
-  grpc_metadata_array trailing_metadata_recv; /* trailing MD from LB server */
-
-  /* what's being sent to the LB server. Note that its value may vary if the LB
-   * server indicates a redirect. */
-  grpc_byte_buffer *request_payload;
-
-  /* response from the LB server, if any. Processed in res_recv_cb() */
-  grpc_byte_buffer *response_payload;
-
-  /* the call's status and status detailset in srv_status_rcvd_cb() */
-  grpc_status_code status;
-  char *status_details;
-  size_t status_details_capacity;
-
-  /* pointer back to the enclosing policy */
-  glb_lb_policy *glb_policy;
-} lb_client_data;
-
-static void md_sent_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error);
-static void req_sent_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error);
-static void res_recv_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error);
-static void close_sent_cb(grpc_exec_ctx *exec_ctx, void *arg,
-                          grpc_error *error);
-static void srv_status_rcvd_cb(grpc_exec_ctx *exec_ctx, void *arg,
-                               grpc_error *error);
-
-static lb_client_data *lb_client_data_create(glb_lb_policy *glb_policy) {
+static void lb_on_server_status_received(grpc_exec_ctx *exec_ctx, void *arg,
+                                         grpc_error *error);
+static void lb_on_response_received(grpc_exec_ctx *exec_ctx, void *arg,
+                                    grpc_error *error);
+static void lb_call_init(glb_lb_policy *glb_policy) {
   GPR_ASSERT(glb_policy->server_name != NULL);
   GPR_ASSERT(glb_policy->server_name[0] != '\0');
 
-  lb_client_data *lb_client = gpr_malloc(sizeof(lb_client_data));
-  memset(lb_client, 0, sizeof(lb_client_data));
-
-  gpr_mu_init(&lb_client->mu);
-  grpc_closure_init(&lb_client->md_sent, md_sent_cb, lb_client);
-
-  grpc_closure_init(&lb_client->req_sent, req_sent_cb, lb_client);
-  grpc_closure_init(&lb_client->res_rcvd, res_recv_cb, lb_client);
-  grpc_closure_init(&lb_client->close_sent, close_sent_cb, lb_client);
-  grpc_closure_init(&lb_client->srv_status_rcvd, srv_status_rcvd_cb, lb_client);
-
-  lb_client->deadline = glb_policy->deadline;
-
   /* Note the following LB call progresses every time there's activity in \a
    * glb_policy->base.interested_parties, which is comprised of the polling
-   * entities passed to glb_pick(). */
-  lb_client->lb_call = grpc_channel_create_pollset_set_call(
+   * entities from \a client_channel. */
+  glb_policy->lb_call = grpc_channel_create_pollset_set_call(
       glb_policy->lb_channel, NULL, GRPC_PROPAGATE_DEFAULTS,
       glb_policy->base.interested_parties,
       "/grpc.lb.v1.LoadBalancer/BalanceLoad", glb_policy->server_name,
-      lb_client->deadline, NULL);
+      glb_policy->deadline, NULL);
 
-  grpc_metadata_array_init(&lb_client->initial_metadata_recv);
-  grpc_metadata_array_init(&lb_client->trailing_metadata_recv);
+  grpc_metadata_array_init(&glb_policy->lb_initial_metadata_recv);
+  grpc_metadata_array_init(&glb_policy->lb_trailing_metadata_recv);
 
   grpc_grpclb_request *request =
       grpc_grpclb_request_create(glb_policy->server_name);
-  gpr_slice request_payload_slice = grpc_grpclb_request_encode(request);
-  lb_client->request_payload =
+  grpc_slice request_payload_slice = grpc_grpclb_request_encode(request);
+  glb_policy->lb_request_payload =
       grpc_raw_byte_buffer_create(&request_payload_slice, 1);
-  gpr_slice_unref(request_payload_slice);
+  grpc_slice_unref(request_payload_slice);
   grpc_grpclb_request_destroy(request);
 
-  lb_client->status_details = NULL;
-  lb_client->status_details_capacity = 0;
-  lb_client->glb_policy = glb_policy;
-  return lb_client;
+  glb_policy->lb_call_status_details = NULL;
+  glb_policy->lb_call_status_details_capacity = 0;
+
+  grpc_closure_init(&glb_policy->lb_on_server_status_received,
+                    lb_on_server_status_received, glb_policy);
+  grpc_closure_init(&glb_policy->lb_on_response_received,
+                    lb_on_response_received, glb_policy);
+
+  gpr_backoff_init(&glb_policy->lb_call_backoff_state, BACKOFF_MULTIPLIER,
+                   BACKOFF_JITTER, BACKOFF_MIN_SECONDS * 1000,
+                   BACKOFF_MAX_SECONDS * 1000);
 }
 
-static void lb_client_data_destroy(lb_client_data *lb_client) {
-  grpc_call_destroy(lb_client->lb_call);
-  grpc_metadata_array_destroy(&lb_client->initial_metadata_recv);
-  grpc_metadata_array_destroy(&lb_client->trailing_metadata_recv);
+static void lb_call_destroy_locked(glb_lb_policy *glb_policy) {
+  GPR_ASSERT(glb_policy->lb_call != NULL);
+  grpc_call_destroy(glb_policy->lb_call);
+  glb_policy->lb_call = NULL;
 
-  grpc_byte_buffer_destroy(lb_client->request_payload);
+  grpc_metadata_array_destroy(&glb_policy->lb_initial_metadata_recv);
+  grpc_metadata_array_destroy(&glb_policy->lb_trailing_metadata_recv);
 
-  gpr_free(lb_client->status_details);
-  gpr_mu_destroy(&lb_client->mu);
-  gpr_free(lb_client);
-}
-static grpc_call *lb_client_data_get_call(lb_client_data *lb_client) {
-  return lb_client->lb_call;
+  grpc_byte_buffer_destroy(glb_policy->lb_request_payload);
+  gpr_free(glb_policy->lb_call_status_details);
 }
 
 /*
  * Auxiliary functions and LB client callbacks.
  */
-static void query_for_backends(grpc_exec_ctx *exec_ctx,
-                               glb_lb_policy *glb_policy) {
+static void query_for_backends_locked(grpc_exec_ctx *exec_ctx,
+                                      glb_lb_policy *glb_policy) {
   GPR_ASSERT(glb_policy->lb_channel != NULL);
+  lb_call_init(glb_policy);
+
+  if (grpc_lb_glb_trace) {
+    gpr_log(GPR_INFO, "Query for backends (grpclb: %p, lb_call: %p)",
+            (void *)glb_policy, (void *)glb_policy->lb_call);
+  }
+  GPR_ASSERT(glb_policy->lb_call != NULL);
 
-  glb_policy->lb_client = lb_client_data_create(glb_policy);
   grpc_call_error call_error;
-  grpc_op ops[1];
+  grpc_op ops[4];
   memset(ops, 0, sizeof(ops));
+
   grpc_op *op = ops;
   op->op = GRPC_OP_SEND_INITIAL_METADATA;
   op->data.send_initial_metadata.count = 0;
   op->flags = 0;
   op->reserved = NULL;
   op++;
-  call_error = grpc_call_start_batch_and_execute(
-      exec_ctx, glb_policy->lb_client->lb_call, ops, (size_t)(op - ops),
-      &glb_policy->lb_client->md_sent);
-  GPR_ASSERT(GRPC_CALL_OK == call_error);
 
-  op = ops;
-  op->op = GRPC_OP_RECV_STATUS_ON_CLIENT;
-  op->data.recv_status_on_client.trailing_metadata =
-      &glb_policy->lb_client->trailing_metadata_recv;
-  op->data.recv_status_on_client.status = &glb_policy->lb_client->status;
-  op->data.recv_status_on_client.status_details =
-      &glb_policy->lb_client->status_details;
-  op->data.recv_status_on_client.status_details_capacity =
-      &glb_policy->lb_client->status_details_capacity;
+  op->op = GRPC_OP_RECV_INITIAL_METADATA;
+  op->data.recv_initial_metadata = &glb_policy->lb_initial_metadata_recv;
   op->flags = 0;
   op->reserved = NULL;
   op++;
-  call_error = grpc_call_start_batch_and_execute(
-      exec_ctx, glb_policy->lb_client->lb_call, ops, (size_t)(op - ops),
-      &glb_policy->lb_client->srv_status_rcvd);
-  GPR_ASSERT(GRPC_CALL_OK == call_error);
-}
-
-static void md_sent_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) {
-  lb_client_data *lb_client = arg;
-  GPR_ASSERT(lb_client->lb_call);
-  grpc_op ops[1];
-  memset(ops, 0, sizeof(ops));
-  grpc_op *op = ops;
 
+  GPR_ASSERT(glb_policy->lb_request_payload != NULL);
   op->op = GRPC_OP_SEND_MESSAGE;
-  op->data.send_message = lb_client->request_payload;
+  op->data.send_message = glb_policy->lb_request_payload;
   op->flags = 0;
   op->reserved = NULL;
   op++;
-  grpc_call_error call_error = grpc_call_start_batch_and_execute(
-      exec_ctx, lb_client->lb_call, ops, (size_t)(op - ops),
-      &lb_client->req_sent);
-  GPR_ASSERT(GRPC_CALL_OK == call_error);
-}
-
-static void req_sent_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) {
-  lb_client_data *lb_client = arg;
-  GPR_ASSERT(lb_client->lb_call);
 
-  grpc_op ops[2];
-  memset(ops, 0, sizeof(ops));
-  grpc_op *op = ops;
-
-  op->op = GRPC_OP_RECV_INITIAL_METADATA;
-  op->data.recv_initial_metadata = &lb_client->initial_metadata_recv;
+  op->op = GRPC_OP_RECV_STATUS_ON_CLIENT;
+  op->data.recv_status_on_client.trailing_metadata =
+      &glb_policy->lb_trailing_metadata_recv;
+  op->data.recv_status_on_client.status = &glb_policy->lb_call_status;
+  op->data.recv_status_on_client.status_details =
+      &glb_policy->lb_call_status_details;
+  op->data.recv_status_on_client.status_details_capacity =
+      &glb_policy->lb_call_status_details_capacity;
   op->flags = 0;
   op->reserved = NULL;
   op++;
+  /* take a weak ref (won't prevent calling of \a glb_shutdown if the strong ref
+   * count goes to zero) to be unref'd in lb_on_server_status_received */
+  GRPC_LB_POLICY_WEAK_REF(&glb_policy->base, "lb_on_server_status_received");
+  call_error = grpc_call_start_batch_and_execute(
+      exec_ctx, glb_policy->lb_call, ops, (size_t)(op - ops),
+      &glb_policy->lb_on_server_status_received);
+  GPR_ASSERT(GRPC_CALL_OK == call_error);
 
+  op = ops;
   op->op = GRPC_OP_RECV_MESSAGE;
-  op->data.recv_message = &lb_client->response_payload;
+  op->data.recv_message = &glb_policy->lb_response_payload;
   op->flags = 0;
   op->reserved = NULL;
   op++;
-  grpc_call_error call_error = grpc_call_start_batch_and_execute(
-      exec_ctx, lb_client->lb_call, ops, (size_t)(op - ops),
-      &lb_client->res_rcvd);
+  /* take another weak ref to be unref'd in lb_on_response_received */
+  GRPC_LB_POLICY_WEAK_REF(&glb_policy->base, "lb_on_response_received");
+  call_error = grpc_call_start_batch_and_execute(
+      exec_ctx, glb_policy->lb_call, ops, (size_t)(op - ops),
+      &glb_policy->lb_on_response_received);
   GPR_ASSERT(GRPC_CALL_OK == call_error);
 }
 
-static void res_recv_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) {
-  lb_client_data *lb_client = arg;
+static void lb_on_response_received(grpc_exec_ctx *exec_ctx, void *arg,
+                                    grpc_error *error) {
+  glb_lb_policy *glb_policy = arg;
+
   grpc_op ops[2];
   memset(ops, 0, sizeof(ops));
   grpc_op *op = ops;
-  if (lb_client->response_payload != NULL) {
+  if (glb_policy->lb_response_payload != NULL) {
+    gpr_backoff_reset(&glb_policy->lb_call_backoff_state);
     /* Received data from the LB server. Look inside
-     * lb_client->response_payload, for a serverlist. */
+     * glb_policy->lb_response_payload, for a serverlist. */
     grpc_byte_buffer_reader bbr;
-    grpc_byte_buffer_reader_init(&bbr, lb_client->response_payload);
-    gpr_slice response_slice = grpc_byte_buffer_reader_readall(&bbr);
-    grpc_byte_buffer_destroy(lb_client->response_payload);
+    grpc_byte_buffer_reader_init(&bbr, glb_policy->lb_response_payload);
+    grpc_slice response_slice = grpc_byte_buffer_reader_readall(&bbr);
+    grpc_byte_buffer_destroy(glb_policy->lb_response_payload);
     grpc_grpclb_serverlist *serverlist =
         grpc_grpclb_response_parse_serverlist(response_slice);
     if (serverlist != NULL) {
-      gpr_slice_unref(response_slice);
+      GPR_ASSERT(glb_policy->lb_call != NULL);
+      grpc_slice_unref(response_slice);
       if (grpc_lb_glb_trace) {
-        gpr_log(GPR_INFO, "Serverlist with %zu servers received",
-                serverlist->num_servers);
+        gpr_log(GPR_INFO, "Serverlist with %lu servers received",
+                (unsigned long)serverlist->num_servers);
+        for (size_t i = 0; i < serverlist->num_servers; ++i) {
+          grpc_resolved_address addr;
+          parse_server(serverlist->servers[i], &addr);
+          char *ipport;
+          grpc_sockaddr_to_string(&ipport, &addr, false);
+          gpr_log(GPR_INFO, "Serverlist[%lu]: %s", (unsigned long)i, ipport);
+          gpr_free(ipport);
+        }
       }
 
       /* update serverlist */
       if (serverlist->num_servers > 0) {
-        if (grpc_grpclb_serverlist_equals(lb_client->glb_policy->serverlist,
-                                          serverlist)) {
+        gpr_mu_lock(&glb_policy->mu);
+        if (grpc_grpclb_serverlist_equals(glb_policy->serverlist, serverlist)) {
           if (grpc_lb_glb_trace) {
             gpr_log(GPR_INFO,
                     "Incoming server list identical to current, ignoring.");
           }
         } else { /* new serverlist */
-          if (lb_client->glb_policy->serverlist != NULL) {
+          if (glb_policy->serverlist != NULL) {
             /* dispose of the old serverlist */
-            grpc_grpclb_destroy_serverlist(lb_client->glb_policy->serverlist);
+            grpc_grpclb_destroy_serverlist(glb_policy->serverlist);
           }
           /* and update the copy in the glb_lb_policy instance */
-          lb_client->glb_policy->serverlist = serverlist;
-        }
-        if (lb_client->glb_policy->rr_policy == NULL) {
-          /* initial "handover", in this case from a null RR policy, meaning
-           * it'll just create the first RR policy instance */
-          rr_handover(exec_ctx, lb_client->glb_policy, error);
-        } else {
-          /* unref the RR policy, eventually leading to its substitution with a
-           * new one constructed from the received serverlist (see
-           * glb_rr_connectivity_changed) */
-          GRPC_LB_POLICY_UNREF(exec_ctx, lb_client->glb_policy->rr_policy,
-                               "serverlist_received");
+          glb_policy->serverlist = serverlist;
+
+          rr_handover_locked(exec_ctx, glb_policy, error);
         }
+        gpr_mu_unlock(&glb_policy->mu);
       } else {
         if (grpc_lb_glb_trace) {
           gpr_log(GPR_INFO,
@@ -1108,60 +1133,94 @@ static void res_recv_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) {
                   "response with > 0 servers is received");
         }
       }
+    } else { /* serverlist == NULL */
+      gpr_log(GPR_ERROR, "Invalid LB response received: '%s'. Ignoring.",
+              grpc_dump_slice(response_slice, GPR_DUMP_ASCII | GPR_DUMP_HEX));
+      grpc_slice_unref(response_slice);
+    }
 
+    if (!glb_policy->shutting_down) {
       /* keep listening for serverlist updates */
       op->op = GRPC_OP_RECV_MESSAGE;
-      op->data.recv_message = &lb_client->response_payload;
+      op->data.recv_message = &glb_policy->lb_response_payload;
       op->flags = 0;
       op->reserved = NULL;
       op++;
+      /* reuse the "lb_on_response_received" weak ref taken in
+       * query_for_backends_locked() */
       const grpc_call_error call_error = grpc_call_start_batch_and_execute(
-          exec_ctx, lb_client->lb_call, ops, (size_t)(op - ops),
-          &lb_client->res_rcvd); /* loop */
+          exec_ctx, glb_policy->lb_call, ops, (size_t)(op - ops),
+          &glb_policy->lb_on_response_received); /* loop */
       GPR_ASSERT(GRPC_CALL_OK == call_error);
-      return;
     }
-
-    GPR_ASSERT(serverlist == NULL);
-    gpr_log(GPR_ERROR, "Invalid LB response received: '%s'",
-            gpr_dump_slice(response_slice, GPR_DUMP_ASCII));
-    gpr_slice_unref(response_slice);
-
-    /* Disconnect from server returning invalid response. */
-    op->op = GRPC_OP_SEND_CLOSE_FROM_CLIENT;
-    op->flags = 0;
-    op->reserved = NULL;
-    op++;
-    grpc_call_error call_error = grpc_call_start_batch_and_execute(
-        exec_ctx, lb_client->lb_call, ops, (size_t)(op - ops),
-        &lb_client->close_sent);
-    GPR_ASSERT(GRPC_CALL_OK == call_error);
+  } else { /* empty payload: call cancelled. */
+           /* dispose of the "lb_on_response_received" weak ref taken in
+            * query_for_backends_locked() and reused in every reception loop */
+    GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &glb_policy->base,
+                              "lb_on_response_received_empty_payload");
   }
-  /* empty payload: call cancelled by server. Cleanups happening in
-   * srv_status_rcvd_cb */
 }
 
-static void close_sent_cb(grpc_exec_ctx *exec_ctx, void *arg,
-                          grpc_error *error) {
-  if (grpc_lb_glb_trace) {
-    gpr_log(GPR_INFO,
-            "Close from LB client sent. Waiting from server status now");
+static void lb_call_on_retry_timer(grpc_exec_ctx *exec_ctx, void *arg,
+                                   grpc_error *error) {
+  glb_lb_policy *glb_policy = arg;
+  gpr_mu_lock(&glb_policy->mu);
+
+  if (!glb_policy->shutting_down) {
+    if (grpc_lb_glb_trace) {
+      gpr_log(GPR_INFO, "Restaring call to LB server (grpclb %p)",
+              (void *)glb_policy);
+    }
+    GPR_ASSERT(glb_policy->lb_call == NULL);
+    query_for_backends_locked(exec_ctx, glb_policy);
   }
+  gpr_mu_unlock(&glb_policy->mu);
+
+  GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &glb_policy->base,
+                            "grpclb_on_retry_timer");
 }
 
-static void srv_status_rcvd_cb(grpc_exec_ctx *exec_ctx, void *arg,
-                               grpc_error *error) {
-  lb_client_data *lb_client = arg;
+static void lb_on_server_status_received(grpc_exec_ctx *exec_ctx, void *arg,
+                                         grpc_error *error) {
+  glb_lb_policy *glb_policy = arg;
+  gpr_mu_lock(&glb_policy->mu);
+
+  GPR_ASSERT(glb_policy->lb_call != NULL);
+
   if (grpc_lb_glb_trace) {
-    gpr_log(GPR_INFO,
-            "status from lb server received. Status = %d, Details = '%s', "
-            "Capaticy "
-            "= %zu",
-            lb_client->status, lb_client->status_details,
-            lb_client->status_details_capacity);
+    gpr_log(GPR_DEBUG,
+            "Status from LB server received. Status = %d, Details = '%s', "
+            "(call: %p)",
+            glb_policy->lb_call_status, glb_policy->lb_call_status_details,
+            (void *)glb_policy->lb_call);
+  }
+
+  /* We need to performe cleanups no matter what. */
+  lb_call_destroy_locked(glb_policy);
+
+  if (!glb_policy->shutting_down) {
+    /* if we aren't shutting down, restart the LB client call after some time */
+    gpr_timespec now = gpr_now(GPR_CLOCK_MONOTONIC);
+    gpr_timespec next_try =
+        gpr_backoff_step(&glb_policy->lb_call_backoff_state, now);
+    if (grpc_lb_glb_trace) {
+      gpr_log(GPR_DEBUG, "Connection to LB server lost (grpclb: %p)...",
+              (void *)glb_policy);
+      gpr_timespec timeout = gpr_time_sub(next_try, now);
+      if (gpr_time_cmp(timeout, gpr_time_0(timeout.clock_type)) > 0) {
+        gpr_log(GPR_DEBUG, "... retrying in %" PRId64 ".%09d seconds.",
+                timeout.tv_sec, timeout.tv_nsec);
+      } else {
+        gpr_log(GPR_DEBUG, "... retrying immediately.");
+      }
+    }
+    GRPC_LB_POLICY_WEAK_REF(&glb_policy->base, "grpclb_retry_timer");
+    grpc_timer_init(exec_ctx, &glb_policy->lb_call_retry_timer, next_try,
+                    lb_call_on_retry_timer, glb_policy, now);
   }
-  /* TODO(dgq): deal with stream termination properly (fire up another one?
-   * fail the original call?) */
+  gpr_mu_unlock(&glb_policy->mu);
+  GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &glb_policy->base,
+                            "lb_on_server_status_received");
 }
 
 /* Code wiring the policy with the rest of the core */