Merge remote-tracking branch 'upstream/master' into config-isolation

author: Muxi Yan <mxyan@google.com> 2018-10-18 09:38:26 -0700
committer: Muxi Yan <mxyan@google.com> 2018-10-18 09:38:26 -0700
commit: 62693ec1bd0fe0133a3d382ef2e11ffa935e9904 (patch)
tree: f84b6c168f55ef2f4b8661ab64960fb11b0f7d76 /src/core
parent: be4ab30899f6ae91e07f02c33297ca462f778296 (diff)
parent: d2704ecca0b6cc43e8dfde6dd64875b4fb71d6cd (diff)
62 files changed, 3404 insertions, 233 deletions
diff --git a/src/core/ext/filters/client_channel/OWNERS b/src/core/ext/filters/client_channel/OWNERS
index c8760d947b..d38970e0fa 100644
--- a/src/core/ext/filters/client_channel/OWNERS
+++ b/src/core/ext/filters/client_channel/OWNERS
@@ -1,4 +1,4 @@
 set noparent
 @markdroth
-@dgquintas
+@apolcyn
 @AspirinSJL
diff --git a/src/core/ext/filters/client_channel/client_channel.cc b/src/core/ext/filters/client_channel/client_channel.cc
index 388736b60a..23da11363b 100644
--- a/src/core/ext/filters/client_channel/client_channel.cc
+++ b/src/core/ext/filters/client_channel/client_channel.cc
@@ -129,6 +129,8 @@ typedef struct client_channel_channel_data {
   grpc_core::UniquePtr<char> info_lb_policy_name;
   /** service config in JSON form */
   grpc_core::UniquePtr<char> info_service_config_json;
+  /* backpointer to grpc_channel's channelz node */
+  grpc_core::channelz::ClientChannelNode* channelz_channel;
 } channel_data;
 
 typedef struct {
@@ -153,6 +155,23 @@ static void watch_lb_policy_locked(channel_data* chand,
                                    grpc_core::LoadBalancingPolicy* lb_policy,
                                    grpc_connectivity_state current_state);
 
+static const char* channel_connectivity_state_change_string(
+    grpc_connectivity_state state) {
+  switch (state) {
+    case GRPC_CHANNEL_IDLE:
+      return "Channel state change to IDLE";
+    case GRPC_CHANNEL_CONNECTING:
+      return "Channel state change to CONNECTING";
+    case GRPC_CHANNEL_READY:
+      return "Channel state change to READY";
+    case GRPC_CHANNEL_TRANSIENT_FAILURE:
+      return "Channel state change to TRANSIENT_FAILURE";
+    case GRPC_CHANNEL_SHUTDOWN:
+      return "Channel state change to SHUTDOWN";
+  }
+  GPR_UNREACHABLE_CODE(return "UNKNOWN");
+}
+
 static void set_channel_connectivity_state_locked(channel_data* chand,
                                                   grpc_connectivity_state state,
                                                   grpc_error* error,
@@ -177,6 +196,12 @@ static void set_channel_connectivity_state_locked(channel_data* chand,
     gpr_log(GPR_INFO, "chand=%p: setting connectivity state to %s", chand,
             grpc_connectivity_state_name(state));
   }
+  if (chand->channelz_channel != nullptr) {
+    chand->channelz_channel->AddTraceEvent(
+        grpc_core::channelz::ChannelTrace::Severity::Info,
+        grpc_slice_from_static_string(
+            channel_connectivity_state_change_string(state)));
+  }
   grpc_connectivity_state_set(&chand->state_tracker, state, error, reason);
 }
 
@@ -699,6 +724,7 @@ static grpc_error* cc_init_channel_elem(grpc_channel_element* elem,
   // Record enable_retries.
   arg = grpc_channel_args_find(args->channel_args, GRPC_ARG_ENABLE_RETRIES);
   chand->enable_retries = grpc_channel_arg_get_bool(arg, true);
+  chand->channelz_channel = nullptr;
   // Record client channel factory.
   arg = grpc_channel_args_find(args->channel_args,
                                GRPC_ARG_CLIENT_CHANNEL_FACTORY);
@@ -933,11 +959,6 @@ typedef struct client_channel_call_data {
   grpc_closure pick_closure;
   grpc_closure pick_cancel_closure;
 
-  // state needed to support channelz interception of recv trailing metadata.
-  grpc_closure recv_trailing_metadata_ready_channelz;
-  grpc_closure* original_recv_trailing_metadata;
-  grpc_metadata_batch* recv_trailing_metadata;
-
   grpc_polling_entity* pollent;
   bool pollent_added_to_interested_parties;
 
@@ -999,8 +1020,6 @@ static void start_internal_recv_trailing_metadata(grpc_call_element* elem);
 static void on_complete(void* arg, grpc_error* error);
 static void start_retriable_subchannel_batches(void* arg, grpc_error* ignored);
 static void start_pick_locked(void* arg, grpc_error* ignored);
-static void maybe_intercept_recv_trailing_metadata_for_channelz(
-    grpc_call_element* elem, grpc_transport_stream_op_batch* batch);
 
 //
 // send op data caching
@@ -1299,7 +1318,6 @@ static void pending_batches_resume(grpc_call_element* elem) {
     pending_batch* pending = &calld->pending_batches[i];
     grpc_transport_stream_op_batch* batch = pending->batch;
     if (batch != nullptr) {
-      maybe_intercept_recv_trailing_metadata_for_channelz(elem, batch);
       batch->handler_private.extra_arg = calld->subchannel_call;
       GRPC_CLOSURE_INIT(&batch->handler_private.closure,
                         resume_pending_batch_in_call_combiner, batch,
@@ -1977,15 +1995,6 @@ static void recv_trailing_metadata_ready(void* arg, grpc_error* error) {
       batch_data->batch.payload->recv_trailing_metadata.recv_trailing_metadata;
   get_call_status(elem, md_batch, GRPC_ERROR_REF(error), &status,
                   &server_pushback_md);
-  grpc_core::channelz::SubchannelNode* channelz_subchannel =
-      calld->pick.connected_subchannel->channelz_subchannel();
-  if (channelz_subchannel != nullptr) {
-    if (status == GRPC_STATUS_OK) {
-      channelz_subchannel->RecordCallSucceeded();
-    } else {
-      channelz_subchannel->RecordCallFailed();
-    }
-  }
   if (grpc_client_channel_trace.enabled()) {
     gpr_log(GPR_INFO, "chand=%p calld=%p: call finished, status=%s", chand,
             calld, grpc_status_code_to_string(status));
@@ -2211,9 +2220,9 @@ static void add_retriable_send_initial_metadata_op(
                                    .grpc_previous_rpc_attempts);
   }
   if (GPR_UNLIKELY(calld->num_attempts_completed > 0)) {
-    grpc_mdelem retry_md = grpc_mdelem_from_slices(
+    grpc_mdelem retry_md = grpc_mdelem_create(
         GRPC_MDSTR_GRPC_PREVIOUS_RPC_ATTEMPTS,
-        *retry_count_strings[calld->num_attempts_completed - 1]);
+        *retry_count_strings[calld->num_attempts_completed - 1], nullptr);
     grpc_error* error = grpc_metadata_batch_add_tail(
         &retry_state->send_initial_metadata,
         &retry_state->send_initial_metadata_storage[calld->send_initial_metadata
@@ -2590,69 +2599,6 @@ static void start_retriable_subchannel_batches(void* arg, grpc_error* ignored) {
 }
 
 //
-// Channelz
-//
-
-static void recv_trailing_metadata_ready_channelz(void* arg,
-                                                  grpc_error* error) {
-  grpc_call_element* elem = static_cast<grpc_call_element*>(arg);
-  channel_data* chand = static_cast<channel_data*>(elem->channel_data);
-  call_data* calld = static_cast<call_data*>(elem->call_data);
-  if (grpc_client_channel_trace.enabled()) {
-    gpr_log(GPR_INFO,
-            "chand=%p calld=%p: got recv_trailing_metadata_ready_channelz, "
-            "error=%s",
-            chand, calld, grpc_error_string(error));
-  }
-  GPR_ASSERT(calld->recv_trailing_metadata != nullptr);
-  grpc_status_code status = GRPC_STATUS_OK;
-  grpc_metadata_batch* md_batch = calld->recv_trailing_metadata;
-  get_call_status(elem, md_batch, GRPC_ERROR_REF(error), &status, nullptr);
-  grpc_core::channelz::SubchannelNode* channelz_subchannel =
-      calld->pick.connected_subchannel->channelz_subchannel();
-  GPR_ASSERT(channelz_subchannel != nullptr);
-  if (status == GRPC_STATUS_OK) {
-    channelz_subchannel->RecordCallSucceeded();
-  } else {
-    channelz_subchannel->RecordCallFailed();
-  }
-  calld->recv_trailing_metadata = nullptr;
-  GRPC_CLOSURE_RUN(calld->original_recv_trailing_metadata, error);
-}
-
-// If channelz is enabled, intercept recv_trailing so that we may check the
-// status and associate it to a subchannel.
-// Returns true if callback was intercepted, false otherwise.
-static void maybe_intercept_recv_trailing_metadata_for_channelz(
-    grpc_call_element* elem, grpc_transport_stream_op_batch* batch) {
-  call_data* calld = static_cast<call_data*>(elem->call_data);
-  // only intercept payloads with recv trailing.
-  if (!batch->recv_trailing_metadata) {
-    return;
-  }
-  // only add interceptor is channelz is enabled.
-  if (calld->pick.connected_subchannel->channelz_subchannel() == nullptr) {
-    return;
-  }
-  if (grpc_client_channel_trace.enabled()) {
-    gpr_log(GPR_INFO,
-            "calld=%p batch=%p: intercepting recv trailing for channelz", calld,
-            batch);
-  }
-  GRPC_CLOSURE_INIT(&calld->recv_trailing_metadata_ready_channelz,
-                    recv_trailing_metadata_ready_channelz, elem,
-                    grpc_schedule_on_exec_ctx);
-  // save some state needed for the interception callback.
-  GPR_ASSERT(calld->recv_trailing_metadata == nullptr);
-  calld->recv_trailing_metadata =
-      batch->payload->recv_trailing_metadata.recv_trailing_metadata;
-  calld->original_recv_trailing_metadata =
-      batch->payload->recv_trailing_metadata.recv_trailing_metadata_ready;
-  batch->payload->recv_trailing_metadata.recv_trailing_metadata_ready =
-      &calld->recv_trailing_metadata_ready_channelz;
-}
-
-//
 // LB pick
 //
 
@@ -3288,9 +3234,16 @@ static void try_to_connect_locked(void* arg, grpc_error* error_ignored) {
   GRPC_CHANNEL_STACK_UNREF(chand->owning_stack, "try_to_connect");
 }
 
+void grpc_client_channel_set_channelz_node(
+    grpc_channel_element* elem, grpc_core::channelz::ClientChannelNode* node) {
+  channel_data* chand = static_cast<channel_data*>(elem->channel_data);
+  chand->channelz_channel = node;
+}
+
 void grpc_client_channel_populate_child_refs(
-    grpc_channel_element* elem, grpc_core::ChildRefsList* child_subchannels,
-    grpc_core::ChildRefsList* child_channels) {
+    grpc_channel_element* elem,
+    grpc_core::channelz::ChildRefsList* child_subchannels,
+    grpc_core::channelz::ChildRefsList* child_channels) {
   channel_data* chand = static_cast<channel_data*>(elem->channel_data);
   if (chand->lb_policy != nullptr) {
     chand->lb_policy->FillChildRefsForChannelz(child_subchannels,
diff --git a/src/core/ext/filters/client_channel/client_channel.h b/src/core/ext/filters/client_channel/client_channel.h
index 0b44a17562..4935fd24d8 100644
--- a/src/core/ext/filters/client_channel/client_channel.h
+++ b/src/core/ext/filters/client_channel/client_channel.h
@@ -40,9 +40,13 @@ extern grpc_core::TraceFlag grpc_client_channel_trace;
 
 extern const grpc_channel_filter grpc_client_channel_filter;
 
+void grpc_client_channel_set_channelz_node(
+    grpc_channel_element* elem, grpc_core::channelz::ClientChannelNode* node);
+
 void grpc_client_channel_populate_child_refs(
-    grpc_channel_element* elem, grpc_core::ChildRefsList* child_subchannels,
-    grpc_core::ChildRefsList* child_channels);
+    grpc_channel_element* elem,
+    grpc_core::channelz::ChildRefsList* child_subchannels,
+    grpc_core::channelz::ChildRefsList* child_channels);
 
 grpc_connectivity_state grpc_client_channel_check_connectivity_state(
     grpc_channel_element* elem, int try_to_connect);
diff --git a/src/core/ext/filters/client_channel/client_channel_channelz.cc b/src/core/ext/filters/client_channel/client_channel_channelz.cc
index b66c920b90..bad1ef668c 100644
--- a/src/core/ext/filters/client_channel/client_channel_channelz.cc
+++ b/src/core/ext/filters/client_channel/client_channel_channelz.cc
@@ -49,6 +49,7 @@ ClientChannelNode::ClientChannelNode(grpc_channel* channel,
     : ChannelNode(channel, channel_tracer_max_nodes, is_top_level_channel) {
   client_channel_ =
       grpc_channel_stack_last_element(grpc_channel_get_channel_stack(channel));
+  grpc_client_channel_set_channelz_node(client_channel_, this);
   GPR_ASSERT(client_channel_->filter == &grpc_client_channel_filter);
 }
 
diff --git a/src/core/ext/filters/client_channel/client_channel_channelz.h b/src/core/ext/filters/client_channel/client_channel_channelz.h
index 8ce331e529..8a5c3e7e5e 100644
--- a/src/core/ext/filters/client_channel/client_channel_channelz.h
+++ b/src/core/ext/filters/client_channel/client_channel_channelz.h
@@ -25,17 +25,10 @@
 #include "src/core/lib/channel/channel_stack.h"
 #include "src/core/lib/channel/channel_trace.h"
 #include "src/core/lib/channel/channelz.h"
-#include "src/core/lib/gprpp/inlined_vector.h"
 
 typedef struct grpc_subchannel grpc_subchannel;
 
 namespace grpc_core {
-
-// TODO(ncteisen), this only contains the uuids of the children for now,
-// since that is all that is strictly needed. In a future enhancement we will
-// add human readable names as in the channelz.proto
-typedef InlinedVector<intptr_t, 10> ChildRefsList;
-
 namespace channelz {
 
 // Subtype of ChannelNode that overrides and provides client_channel specific
diff --git a/src/core/ext/filters/client_channel/http_connect_handshaker.cc b/src/core/ext/filters/client_channel/http_connect_handshaker.cc
index 7ce8da8c00..bfabc68c66 100644
--- a/src/core/ext/filters/client_channel/http_connect_handshaker.cc
+++ b/src/core/ext/filters/client_channel/http_connect_handshaker.cc
@@ -351,6 +351,7 @@ static grpc_handshaker* grpc_http_connect_handshaker_create() {
 
 static void handshaker_factory_add_handshakers(
     grpc_handshaker_factory* factory, const grpc_channel_args* args,
+    grpc_pollset_set* interested_parties,
     grpc_handshake_manager* handshake_mgr) {
   grpc_handshake_manager_add(handshake_mgr,
                              grpc_http_connect_handshaker_create());
diff --git a/src/core/ext/filters/client_channel/lb_policy.h b/src/core/ext/filters/client_channel/lb_policy.h
index 3c0a9c1118..21f80b7b94 100644
--- a/src/core/ext/filters/client_channel/lb_policy.h
+++ b/src/core/ext/filters/client_channel/lb_policy.h
@@ -151,9 +151,9 @@ class LoadBalancingPolicy
   /// LB policy's referenced children. This is not invoked from the
   /// client_channel's combiner. The implementation is responsible for
   /// providing its own synchronization.
-  virtual void FillChildRefsForChannelz(ChildRefsList* child_subchannels,
-                                        ChildRefsList* child_channels)
-      GRPC_ABSTRACT;
+  virtual void FillChildRefsForChannelz(
+      channelz::ChildRefsList* child_subchannels,
+      channelz::ChildRefsList* child_channels) GRPC_ABSTRACT;
 
   void Orphan() override {
     // Invoke ShutdownAndUnrefLocked() inside of the combiner.
@@ -212,8 +212,8 @@ class LoadBalancingPolicy
   // Dummy classes needed for alignment issues.
   // See https://github.com/grpc/grpc/issues/16032 for context.
   // TODO(ncteisen): remove this as soon as the issue is resolved.
-  ChildRefsList dummy_list_foo;
-  ChildRefsList dummy_list_bar;
+  channelz::ChildRefsList dummy_list_foo;
+  channelz::ChildRefsList dummy_list_bar;
 };
 
 }  // namespace grpc_core
diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc
index 0061190016..5511df7a27 100644
--- a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc
+++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc
@@ -136,8 +136,9 @@ class GrpcLb : public LoadBalancingPolicy {
   void HandOffPendingPicksLocked(LoadBalancingPolicy* new_policy) override;
   void ExitIdleLocked() override;
   void ResetBackoffLocked() override;
-  void FillChildRefsForChannelz(ChildRefsList* child_subchannels,
-                                ChildRefsList* child_channels) override;
+  void FillChildRefsForChannelz(
+      channelz::ChildRefsList* child_subchannels,
+      channelz::ChildRefsList* child_channels) override;
 
  private:
   /// Linked list of pending pick requests. It stores all information needed to
@@ -1258,8 +1259,9 @@ bool GrpcLb::PickLocked(PickState* pick, grpc_error** error) {
   return pick_done;
 }
 
-void GrpcLb::FillChildRefsForChannelz(ChildRefsList* child_subchannels,
-                                      ChildRefsList* child_channels) {
+void GrpcLb::FillChildRefsForChannelz(
+    channelz::ChildRefsList* child_subchannels,
+    channelz::ChildRefsList* child_channels) {
   // delegate to the RoundRobin to fill the children subchannels.
   rr_policy_->FillChildRefsForChannelz(child_subchannels, child_channels);
   MutexLock lock(&lb_channel_mu_);
@@ -1489,7 +1491,7 @@ void GrpcLb::OnBalancerChannelConnectivityChangedLocked(void* arg,
         grpclb_policy->lb_call_backoff_.Reset();
         grpclb_policy->StartBalancerCallLocked();
       }
-      // Fall through.
+      // fallthrough
     case GRPC_CHANNEL_SHUTDOWN:
     done:
       grpclb_policy->watching_lb_channel_ = false;
diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc
index ed8cc60ea1..f4dca146f7 100644
--- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc
+++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc
@@ -59,8 +59,8 @@ class PickFirst : public LoadBalancingPolicy {
   void HandOffPendingPicksLocked(LoadBalancingPolicy* new_policy) override;
   void ExitIdleLocked() override;
   void ResetBackoffLocked() override;
-  void FillChildRefsForChannelz(ChildRefsList* child_subchannels,
-                                ChildRefsList* ignored) override;
+  void FillChildRefsForChannelz(channelz::ChildRefsList* child_subchannels,
+                                channelz::ChildRefsList* ignored) override;
 
  private:
   ~PickFirst();
@@ -147,8 +147,8 @@ class PickFirst : public LoadBalancingPolicy {
   /// Lock and data used to capture snapshots of this channels child
   /// channels and subchannels. This data is consumed by channelz.
   gpr_mu child_refs_mu_;
-  ChildRefsList child_subchannels_;
-  ChildRefsList child_channels_;
+  channelz::ChildRefsList child_subchannels_;
+  channelz::ChildRefsList child_channels_;
 };
 
 PickFirst::PickFirst(const Args& args) : LoadBalancingPolicy(args) {
@@ -300,7 +300,8 @@ void PickFirst::NotifyOnStateChangeLocked(grpc_connectivity_state* current,
 }
 
 void PickFirst::FillChildRefsForChannelz(
-    ChildRefsList* child_subchannels_to_fill, ChildRefsList* ignored) {
+    channelz::ChildRefsList* child_subchannels_to_fill,
+    channelz::ChildRefsList* ignored) {
   MutexLock lock(&child_refs_mu_);
   for (size_t i = 0; i < child_subchannels_.size(); ++i) {
     // TODO(ncteisen): implement a de dup loop that is not O(n^2). Might
@@ -320,7 +321,7 @@ void PickFirst::FillChildRefsForChannelz(
 }
 
 void PickFirst::UpdateChildRefsLocked() {
-  ChildRefsList cs;
+  channelz::ChildRefsList cs;
   if (subchannel_list_ != nullptr) {
     subchannel_list_->PopulateChildRefsList(&cs);
   }
diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc
index 8dd5820bae..e9ed85cf66 100644
--- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc
+++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc
@@ -70,8 +70,8 @@ class RoundRobin : public LoadBalancingPolicy {
   void HandOffPendingPicksLocked(LoadBalancingPolicy* new_policy) override;
   void ExitIdleLocked() override;
   void ResetBackoffLocked() override;
-  void FillChildRefsForChannelz(ChildRefsList* child_subchannels,
-                                ChildRefsList* ignored) override;
+  void FillChildRefsForChannelz(channelz::ChildRefsList* child_subchannels,
+                                channelz::ChildRefsList* ignored) override;
 
  private:
   ~RoundRobin();
@@ -223,8 +223,8 @@ class RoundRobin : public LoadBalancingPolicy {
   /// Lock and data used to capture snapshots of this channel's child
   /// channels and subchannels. This data is consumed by channelz.
   gpr_mu child_refs_mu_;
-  ChildRefsList child_subchannels_;
-  ChildRefsList child_channels_;
+  channelz::ChildRefsList child_subchannels_;
+  channelz::ChildRefsList child_channels_;
 };
 
 RoundRobin::RoundRobin(const Args& args) : LoadBalancingPolicy(args) {
@@ -402,7 +402,8 @@ bool RoundRobin::PickLocked(PickState* pick, grpc_error** error) {
 }
 
 void RoundRobin::FillChildRefsForChannelz(
-    ChildRefsList* child_subchannels_to_fill, ChildRefsList* ignored) {
+    channelz::ChildRefsList* child_subchannels_to_fill,
+    channelz::ChildRefsList* ignored) {
   MutexLock lock(&child_refs_mu_);
   for (size_t i = 0; i < child_subchannels_.size(); ++i) {
     // TODO(ncteisen): implement a de dup loop that is not O(n^2). Might
@@ -422,7 +423,7 @@ void RoundRobin::FillChildRefsForChannelz(
 }
 
 void RoundRobin::UpdateChildRefsLocked() {
-  ChildRefsList cs;
+  channelz::ChildRefsList cs;
   if (subchannel_list_ != nullptr) {
     subchannel_list_->PopulateChildRefsList(&cs);
   }
diff --git a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h
index 5e8682e056..e0e0e1e638 100644
--- a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h
+++ b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h
@@ -201,7 +201,7 @@ class SubchannelList
   bool shutting_down() const { return shutting_down_; }
 
   // Populates refs_list with the uuids of this SubchannelLists's subchannels.
-  void PopulateChildRefsList(ChildRefsList* refs_list) {
+  void PopulateChildRefsList(channelz::ChildRefsList* refs_list) {
     for (size_t i = 0; i < subchannels_.size(); ++i) {
       if (subchannels_[i].subchannel() != nullptr) {
         grpc_core::channelz::SubchannelNode* subchannel_node =
diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/client_load_reporting_filter.cc b/src/core/ext/filters/client_channel/lb_policy/xds/client_load_reporting_filter.cc
new file mode 100644
index 0000000000..d79453d008
--- /dev/null
+++ b/src/core/ext/filters/client_channel/lb_policy/xds/client_load_reporting_filter.cc
@@ -0,0 +1,140 @@
+/*
+ *
+ * Copyright 2018 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include <grpc/support/port_platform.h>
+
+#include "src/core/ext/filters/client_channel/lb_policy/xds/client_load_reporting_filter.h"
+
+#include <grpc/support/atm.h>
+#include <grpc/support/log.h>
+
+#include "src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.h"
+#include "src/core/lib/iomgr/error.h"
+#include "src/core/lib/profiling/timers.h"
+
+static grpc_error* init_channel_elem(grpc_channel_element* elem,
+                                     grpc_channel_element_args* args) {
+  return GRPC_ERROR_NONE;
+}
+
+static void destroy_channel_elem(grpc_channel_element* elem) {}
+
+namespace {
+
+struct call_data {
+  // Stats object to update.
+  grpc_core::RefCountedPtr<grpc_core::XdsLbClientStats> client_stats;
+  // State for intercepting send_initial_metadata.
+  grpc_closure on_complete_for_send;
+  grpc_closure* original_on_complete_for_send;
+  bool send_initial_metadata_succeeded;
+  // State for intercepting recv_initial_metadata.
+  grpc_closure recv_initial_metadata_ready;
+  grpc_closure* original_recv_initial_metadata_ready;
+  bool recv_initial_metadata_succeeded;
+};
+
+}  // namespace
+
+static void on_complete_for_send(void* arg, grpc_error* error) {
+  call_data* calld = static_cast<call_data*>(arg);
+  if (error == GRPC_ERROR_NONE) {
+    calld->send_initial_metadata_succeeded = true;
+  }
+  GRPC_CLOSURE_RUN(calld->original_on_complete_for_send, GRPC_ERROR_REF(error));
+}
+
+static void recv_initial_metadata_ready(void* arg, grpc_error* error) {
+  call_data* calld = static_cast<call_data*>(arg);
+  if (error == GRPC_ERROR_NONE) {
+    calld->recv_initial_metadata_succeeded = true;
+  }
+  GRPC_CLOSURE_RUN(calld->original_recv_initial_metadata_ready,
+                   GRPC_ERROR_REF(error));
+}
+
+static grpc_error* init_call_elem(grpc_call_element* elem,
+                                  const grpc_call_element_args* args) {
+  call_data* calld = static_cast<call_data*>(elem->call_data);
+  // Get stats object from context and take a ref.
+  GPR_ASSERT(args->context != nullptr);
+  if (args->context[GRPC_GRPCLB_CLIENT_STATS].value != nullptr) {
+    calld->client_stats = static_cast<grpc_core::XdsLbClientStats*>(
+                              args->context[GRPC_GRPCLB_CLIENT_STATS].value)
+                              ->Ref();
+    // Record call started.
+    calld->client_stats->AddCallStarted();
+  }
+  return GRPC_ERROR_NONE;
+}
+
+static void destroy_call_elem(grpc_call_element* elem,
+                              const grpc_call_final_info* final_info,
+                              grpc_closure* ignored) {
+  call_data* calld = static_cast<call_data*>(elem->call_data);
+  if (calld->client_stats != nullptr) {
+    // Record call finished, optionally setting client_failed_to_send and
+    // received.
+    calld->client_stats->AddCallFinished(
+        !calld->send_initial_metadata_succeeded /* client_failed_to_send */,
+        calld->recv_initial_metadata_succeeded /* known_received */);
+    // All done, so unref the stats object.
+    // TODO(roth): Eliminate this once filter stack is converted to C++.
+    calld->client_stats.reset();
+  }
+}
+
+static void start_transport_stream_op_batch(
+    grpc_call_element* elem, grpc_transport_stream_op_batch* batch) {
+  call_data* calld = static_cast<call_data*>(elem->call_data);
+  GPR_TIMER_SCOPE("clr_start_transport_stream_op_batch", 0);
+  if (calld->client_stats != nullptr) {
+    // Intercept send_initial_metadata.
+    if (batch->send_initial_metadata) {
+      calld->original_on_complete_for_send = batch->on_complete;
+      GRPC_CLOSURE_INIT(&calld->on_complete_for_send, on_complete_for_send,
+                        calld, grpc_schedule_on_exec_ctx);
+      batch->on_complete = &calld->on_complete_for_send;
+    }
+    // Intercept recv_initial_metadata.
+    if (batch->recv_initial_metadata) {
+      calld->original_recv_initial_metadata_ready =
+          batch->payload->recv_initial_metadata.recv_initial_metadata_ready;
+      GRPC_CLOSURE_INIT(&calld->recv_initial_metadata_ready,
+                        recv_initial_metadata_ready, calld,
+                        grpc_schedule_on_exec_ctx);
+      batch->payload->recv_initial_metadata.recv_initial_metadata_ready =
+          &calld->recv_initial_metadata_ready;
+    }
+  }
+  // Chain to next filter.
+  grpc_call_next_op(elem, batch);
+}
+
+const grpc_channel_filter xds_client_load_reporting_filter = {
+    start_transport_stream_op_batch,
+    grpc_channel_next_op,
+    sizeof(call_data),
+    init_call_elem,
+    grpc_call_stack_ignore_set_pollset_or_pollset_set,
+    destroy_call_elem,
+    0,  // sizeof(channel_data)
+    init_channel_elem,
+    destroy_channel_elem,
+    grpc_channel_next_get_info,
+    "client_load_reporting"};
diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/client_load_reporting_filter.h b/src/core/ext/filters/client_channel/lb_policy/xds/client_load_reporting_filter.h
new file mode 100644
index 0000000000..31a799154c
--- /dev/null
+++ b/src/core/ext/filters/client_channel/lb_policy/xds/client_load_reporting_filter.h
@@ -0,0 +1,29 @@
+/*
+ *
+ * Copyright 2018 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#ifndef GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_CLIENT_LOAD_REPORTING_FILTER_H
+#define GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_CLIENT_LOAD_REPORTING_FILTER_H
+
+#include <grpc/support/port_platform.h>
+
+#include "src/core/lib/channel/channel_stack.h"
+
+extern const grpc_channel_filter xds_client_load_reporting_filter;
+
+#endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_CLIENT_LOAD_REPORTING_FILTER_H \
+        */
diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/load_balancer_api.cc b/src/core/ext/filters/client_channel/lb_policy/xds/load_balancer_api.cc
new file mode 100644
index 0000000000..3bd2653635
--- /dev/null
+++ b/src/core/ext/filters/client_channel/lb_policy/xds/load_balancer_api.cc
@@ -0,0 +1,307 @@
+/*
+ *
+ * Copyright 2018 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include <grpc/support/port_platform.h>
+
+#include "pb_decode.h"
+#include "pb_encode.h"
+#include "src/core/ext/filters/client_channel/lb_policy/xds/load_balancer_api.h"
+
+#include <grpc/support/alloc.h>
+
+/* invoked once for every Server in ServerList */
+static bool count_serverlist(pb_istream_t* stream, const pb_field_t* field,
+                             void** arg) {
+  xds_grpclb_serverlist* sl = static_cast<xds_grpclb_serverlist*>(*arg);
+  xds_grpclb_server server;
+  if (GPR_UNLIKELY(!pb_decode(stream, grpc_lb_v1_Server_fields, &server))) {
+    gpr_log(GPR_ERROR, "nanopb error: %s", PB_GET_ERROR(stream));
+    return false;
+  }
+  ++sl->num_servers;
+  return true;
+}
+
+typedef struct decode_serverlist_arg {
+  /* The decoding callback is invoked once per server in serverlist. Remember
+   * which index of the serverlist are we currently decoding */
+  size_t decoding_idx;
+  /* The decoded serverlist */
+  xds_grpclb_serverlist* serverlist;
+} decode_serverlist_arg;
+
+/* invoked once for every Server in ServerList */
+static bool decode_serverlist(pb_istream_t* stream, const pb_field_t* field,
+                              void** arg) {
+  decode_serverlist_arg* dec_arg = static_cast<decode_serverlist_arg*>(*arg);
+  GPR_ASSERT(dec_arg->serverlist->num_servers >= dec_arg->decoding_idx);
+  xds_grpclb_server* server =
+      static_cast<xds_grpclb_server*>(gpr_zalloc(sizeof(xds_grpclb_server)));
+  if (GPR_UNLIKELY(!pb_decode(stream, grpc_lb_v1_Server_fields, server))) {
+    gpr_free(server);
+    gpr_log(GPR_ERROR, "nanopb error: %s", PB_GET_ERROR(stream));
+    return false;
+  }
+  dec_arg->serverlist->servers[dec_arg->decoding_idx++] = server;
+  return true;
+}
+
+xds_grpclb_request* xds_grpclb_request_create(const char* lb_service_name) {
+  xds_grpclb_request* req =
+      static_cast<xds_grpclb_request*>(gpr_malloc(sizeof(xds_grpclb_request)));
+  req->has_client_stats = false;
+  req->has_initial_request = true;
+  req->initial_request.has_name = true;
+  strncpy(req->initial_request.name, lb_service_name,
+          XDS_SERVICE_NAME_MAX_LENGTH);
+  return req;
+}
+
+static void populate_timestamp(gpr_timespec timestamp,
+                               xds_grpclb_timestamp* timestamp_pb) {
+  timestamp_pb->has_seconds = true;
+  timestamp_pb->seconds = timestamp.tv_sec;
+  timestamp_pb->has_nanos = true;
+  timestamp_pb->nanos = timestamp.tv_nsec;
+}
+
+static bool encode_string(pb_ostream_t* stream, const pb_field_t* field,
+                          void* const* arg) {
+  char* str = static_cast<char*>(*arg);
+  if (!pb_encode_tag_for_field(stream, field)) return false;
+  return pb_encode_string(stream, reinterpret_cast<uint8_t*>(str), strlen(str));
+}
+
+static bool encode_drops(pb_ostream_t* stream, const pb_field_t* field,
+                         void* const* arg) {
+  grpc_core::XdsLbClientStats::DroppedCallCounts* drop_entries =
+      static_cast<grpc_core::XdsLbClientStats::DroppedCallCounts*>(*arg);
+  if (drop_entries == nullptr) return true;
+  for (size_t i = 0; i < drop_entries->size(); ++i) {
+    if (!pb_encode_tag_for_field(stream, field)) return false;
+    grpc_lb_v1_ClientStatsPerToken drop_message;
+    drop_message.load_balance_token.funcs.encode = encode_string;
+    drop_message.load_balance_token.arg = (*drop_entries)[i].token.get();
+    drop_message.has_num_calls = true;
+    drop_message.num_calls = (*drop_entries)[i].count;
+    if (!pb_encode_submessage(stream, grpc_lb_v1_ClientStatsPerToken_fields,
+                              &drop_message)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+xds_grpclb_request* xds_grpclb_load_report_request_create_locked(
+    grpc_core::XdsLbClientStats* client_stats) {
+  xds_grpclb_request* req =
+      static_cast<xds_grpclb_request*>(gpr_zalloc(sizeof(xds_grpclb_request)));
+  req->has_client_stats = true;
+  req->client_stats.has_timestamp = true;
+  populate_timestamp(gpr_now(GPR_CLOCK_REALTIME), &req->client_stats.timestamp);
+  req->client_stats.has_num_calls_started = true;
+  req->client_stats.has_num_calls_finished = true;
+  req->client_stats.has_num_calls_finished_with_client_failed_to_send = true;
+  req->client_stats.has_num_calls_finished_with_client_failed_to_send = true;
+  req->client_stats.has_num_calls_finished_known_received = true;
+  req->client_stats.calls_finished_with_drop.funcs.encode = encode_drops;
+  grpc_core::UniquePtr<grpc_core::XdsLbClientStats::DroppedCallCounts>
+      drop_counts;
+  client_stats->GetLocked(
+      &req->client_stats.num_calls_started,
+      &req->client_stats.num_calls_finished,
+      &req->client_stats.num_calls_finished_with_client_failed_to_send,
+      &req->client_stats.num_calls_finished_known_received, &drop_counts);
+  // Will be deleted in xds_grpclb_request_destroy().
+  req->client_stats.calls_finished_with_drop.arg = drop_counts.release();
+  return req;
+}
+
+grpc_slice xds_grpclb_request_encode(const xds_grpclb_request* request) {
+  size_t encoded_length;
+  pb_ostream_t sizestream;
+  pb_ostream_t outputstream;
+  grpc_slice slice;
+  memset(&sizestream, 0, sizeof(pb_ostream_t));
+  pb_encode(&sizestream, grpc_lb_v1_LoadBalanceRequest_fields, request);
+  encoded_length = sizestream.bytes_written;
+
+  slice = GRPC_SLICE_MALLOC(encoded_length);
+  outputstream =
+      pb_ostream_from_buffer(GRPC_SLICE_START_PTR(slice), encoded_length);
+  GPR_ASSERT(pb_encode(&outputstream, grpc_lb_v1_LoadBalanceRequest_fields,
+                       request) != 0);
+  return slice;
+}
+
+void xds_grpclb_request_destroy(xds_grpclb_request* request) {
+  if (request->has_client_stats) {
+    grpc_core::XdsLbClientStats::DroppedCallCounts* drop_entries =
+        static_cast<grpc_core::XdsLbClientStats::DroppedCallCounts*>(
+            request->client_stats.calls_finished_with_drop.arg);
+    grpc_core::Delete(drop_entries);
+  }
+  gpr_free(request);
+}
+
+typedef grpc_lb_v1_LoadBalanceResponse xds_grpclb_response;
+xds_grpclb_initial_response* xds_grpclb_initial_response_parse(
+    grpc_slice encoded_xds_grpclb_response) {
+  pb_istream_t stream =
+      pb_istream_from_buffer(GRPC_SLICE_START_PTR(encoded_xds_grpclb_response),
+                             GRPC_SLICE_LENGTH(encoded_xds_grpclb_response));
+  xds_grpclb_response res;
+  memset(&res, 0, sizeof(xds_grpclb_response));
+  if (GPR_UNLIKELY(
+          !pb_decode(&stream, grpc_lb_v1_LoadBalanceResponse_fields, &res))) {
+    gpr_log(GPR_ERROR, "nanopb error: %s", PB_GET_ERROR(&stream));
+    return nullptr;
+  }
+
+  if (!res.has_initial_response) return nullptr;
+
+  xds_grpclb_initial_response* initial_res =
+      static_cast<xds_grpclb_initial_response*>(
+          gpr_malloc(sizeof(xds_grpclb_initial_response)));
+  memcpy(initial_res, &res.initial_response,
+         sizeof(xds_grpclb_initial_response));
+
+  return initial_res;
+}
+
+xds_grpclb_serverlist* xds_grpclb_response_parse_serverlist(
+    grpc_slice encoded_xds_grpclb_response) {
+  pb_istream_t stream =
+      pb_istream_from_buffer(GRPC_SLICE_START_PTR(encoded_xds_grpclb_response),
+                             GRPC_SLICE_LENGTH(encoded_xds_grpclb_response));
+  pb_istream_t stream_at_start = stream;
+  xds_grpclb_serverlist* sl = static_cast<xds_grpclb_serverlist*>(
+      gpr_zalloc(sizeof(xds_grpclb_serverlist)));
+  xds_grpclb_response res;
+  memset(&res, 0, sizeof(xds_grpclb_response));
+  // First pass: count number of servers.
+  res.server_list.servers.funcs.decode = count_serverlist;
+  res.server_list.servers.arg = sl;
+  bool status = pb_decode(&stream, grpc_lb_v1_LoadBalanceResponse_fields, &res);
+  if (GPR_UNLIKELY(!status)) {
+    gpr_free(sl);
+    gpr_log(GPR_ERROR, "nanopb error: %s", PB_GET_ERROR(&stream));
+    return nullptr;
+  }
+  // Second pass: populate servers.
+  if (sl->num_servers > 0) {
+    sl->servers = static_cast<xds_grpclb_server**>(
+        gpr_zalloc(sizeof(xds_grpclb_server*) * sl->num_servers));
+    decode_serverlist_arg decode_arg;
+    memset(&decode_arg, 0, sizeof(decode_arg));
+    decode_arg.serverlist = sl;
+    res.server_list.servers.funcs.decode = decode_serverlist;
+    res.server_list.servers.arg = &decode_arg;
+    status = pb_decode(&stream_at_start, grpc_lb_v1_LoadBalanceResponse_fields,
+                       &res);
+    if (GPR_UNLIKELY(!status)) {
+      xds_grpclb_destroy_serverlist(sl);
+      gpr_log(GPR_ERROR, "nanopb error: %s", PB_GET_ERROR(&stream));
+      return nullptr;
+    }
+  }
+  return sl;
+}
+
+void xds_grpclb_destroy_serverlist(xds_grpclb_serverlist* serverlist) {
+  if (serverlist == nullptr) {
+    return;
+  }
+  for (size_t i = 0; i < serverlist->num_servers; i++) {
+    gpr_free(serverlist->servers[i]);
+  }
+  gpr_free(serverlist->servers);
+  gpr_free(serverlist);
+}
+
+xds_grpclb_serverlist* xds_grpclb_serverlist_copy(
+    const xds_grpclb_serverlist* sl) {
+  xds_grpclb_serverlist* copy = static_cast<xds_grpclb_serverlist*>(
+      gpr_zalloc(sizeof(xds_grpclb_serverlist)));
+  copy->num_servers = sl->num_servers;
+  copy->servers = static_cast<xds_grpclb_server**>(
+      gpr_malloc(sizeof(xds_grpclb_server*) * sl->num_servers));
+  for (size_t i = 0; i < sl->num_servers; i++) {
+    copy->servers[i] =
+        static_cast<xds_grpclb_server*>(gpr_malloc(sizeof(xds_grpclb_server)));
+    memcpy(copy->servers[i], sl->servers[i], sizeof(xds_grpclb_server));
+  }
+  return copy;
+}
+
+bool xds_grpclb_serverlist_equals(const xds_grpclb_serverlist* lhs,
+                                  const xds_grpclb_serverlist* rhs) {
+  if (lhs == nullptr || rhs == nullptr) {
+    return false;
+  }
+  if (lhs->num_servers != rhs->num_servers) {
+    return false;
+  }
+  for (size_t i = 0; i < lhs->num_servers; i++) {
+    if (!xds_grpclb_server_equals(lhs->servers[i], rhs->servers[i])) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool xds_grpclb_server_equals(const xds_grpclb_server* lhs,
+                              const xds_grpclb_server* rhs) {
+  return memcmp(lhs, rhs, sizeof(xds_grpclb_server)) == 0;
+}
+
+int xds_grpclb_duration_compare(const xds_grpclb_duration* lhs,
+                                const xds_grpclb_duration* rhs) {
+  GPR_ASSERT(lhs && rhs);
+  if (lhs->has_seconds && rhs->has_seconds) {
+    if (lhs->seconds < rhs->seconds) return -1;
+    if (lhs->seconds > rhs->seconds) return 1;
+  } else if (lhs->has_seconds) {
+    return 1;
+  } else if (rhs->has_seconds) {
+    return -1;
+  }
+
+  GPR_ASSERT(lhs->seconds == rhs->seconds);
+  if (lhs->has_nanos && rhs->has_nanos) {
+    if (lhs->nanos < rhs->nanos) return -1;
+    if (lhs->nanos > rhs->nanos) return 1;
+  } else if (lhs->has_nanos) {
+    return 1;
+  } else if (rhs->has_nanos) {
+    return -1;
+  }
+
+  return 0;
+}
+
+grpc_millis xds_grpclb_duration_to_millis(xds_grpclb_duration* duration_pb) {
+  return static_cast<grpc_millis>(
+      (duration_pb->has_seconds ? duration_pb->seconds : 0) * GPR_MS_PER_SEC +
+      (duration_pb->has_nanos ? duration_pb->nanos : 0) / GPR_NS_PER_MS);
+}
+
+void xds_grpclb_initial_response_destroy(
+    xds_grpclb_initial_response* response) {
+  gpr_free(response);
+}
diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/load_balancer_api.h b/src/core/ext/filters/client_channel/lb_policy/xds/load_balancer_api.h
new file mode 100644
index 0000000000..3f096af6d6
--- /dev/null
+++ b/src/core/ext/filters/client_channel/lb_policy/xds/load_balancer_api.h
@@ -0,0 +1,89 @@
+/*
+ *
+ * Copyright 2018 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#ifndef GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_LOAD_BALANCER_API_H
+#define GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_LOAD_BALANCER_API_H
+
+#include <grpc/support/port_platform.h>
+
+#include <grpc/slice_buffer.h>
+
+#include "src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.h"
+#include "src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.h"
+#include "src/core/ext/filters/client_channel/lb_policy_factory.h"
+
+#define XDS_SERVICE_NAME_MAX_LENGTH 128
+
+typedef grpc_lb_v1_Server_ip_address_t xds_grpclb_ip_address;
+typedef grpc_lb_v1_LoadBalanceRequest xds_grpclb_request;
+typedef grpc_lb_v1_InitialLoadBalanceResponse xds_grpclb_initial_response;
+typedef grpc_lb_v1_Server xds_grpclb_server;
+typedef google_protobuf_Duration xds_grpclb_duration;
+typedef google_protobuf_Timestamp xds_grpclb_timestamp;
+
+typedef struct {
+  xds_grpclb_server** servers;
+  size_t num_servers;
+} xds_grpclb_serverlist;
+
+/** Create a request for a gRPC LB service under \a lb_service_name */
+xds_grpclb_request* xds_grpclb_request_create(const char* lb_service_name);
+xds_grpclb_request* xds_grpclb_load_report_request_create_locked(
+    grpc_core::XdsLbClientStats* client_stats);
+
+/** Protocol Buffers v3-encode \a request */
+grpc_slice xds_grpclb_request_encode(const xds_grpclb_request* request);
+
+/** Destroy \a request */
+void xds_grpclb_request_destroy(xds_grpclb_request* request);
+
+/** Parse (ie, decode) the bytes in \a encoded_xds_grpclb_response as a \a
+ * xds_grpclb_initial_response */
+xds_grpclb_initial_response* xds_grpclb_initial_response_parse(
+    grpc_slice encoded_xds_grpclb_response);
+
+/** Parse the list of servers from an encoded \a xds_grpclb_response */
+xds_grpclb_serverlist* xds_grpclb_response_parse_serverlist(
+    grpc_slice encoded_xds_grpclb_response);
+
+/** Return a copy of \a sl. The caller is responsible for calling \a
+ * xds_grpclb_destroy_serverlist on the returned copy. */
+xds_grpclb_serverlist* xds_grpclb_serverlist_copy(
+    const xds_grpclb_serverlist* sl);
+
+bool xds_grpclb_serverlist_equals(const xds_grpclb_serverlist* lhs,
+                                  const xds_grpclb_serverlist* rhs);
+
+bool xds_grpclb_server_equals(const xds_grpclb_server* lhs,
+                              const xds_grpclb_server* rhs);
+
+/** Destroy \a serverlist */
+void xds_grpclb_destroy_serverlist(xds_grpclb_serverlist* serverlist);
+
+/** Compare \a lhs against \a rhs and return 0 if \a lhs and \a rhs are equal,
+ * < 0 if \a lhs represents a duration shorter than \a rhs and > 0 otherwise */
+int xds_grpclb_duration_compare(const xds_grpclb_duration* lhs,
+                                const xds_grpclb_duration* rhs);
+
+grpc_millis xds_grpclb_duration_to_millis(xds_grpclb_duration* duration_pb);
+
+/** Destroy \a initial_response */
+void xds_grpclb_initial_response_destroy(xds_grpclb_initial_response* response);
+
+#endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_LOAD_BALANCER_API_H \
+        */
diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds.cc
new file mode 100644
index 0000000000..16e910b2d9
--- /dev/null
+++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds.cc
@@ -0,0 +1,1894 @@
+/*
+ *
+ * Copyright 2018 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+/// Implementation of the gRPC LB policy.
+///
+/// This policy takes as input a list of resolved addresses, which must
+/// include at least one balancer address.
+///
+/// An internal channel (\a lb_channel_) is created for the addresses
+/// from that are balancers.  This channel behaves just like a regular
+/// channel that uses pick_first to select from the list of balancer
+/// addresses.
+///
+/// The first time the policy gets a request for a pick, a ping, or to exit
+/// the idle state, \a StartPickingLocked() is called. This method is
+/// responsible for instantiating the internal *streaming* call to the LB
+/// server (whichever address pick_first chose).  The call will be complete
+/// when either the balancer sends status or when we cancel the call (e.g.,
+/// because we are shutting down).  In needed, we retry the call.  If we
+/// received at least one valid message from the server, a new call attempt
+/// will be made immediately; otherwise, we apply back-off delays between
+/// attempts.
+///
+/// We maintain an internal round_robin policy instance for distributing
+/// requests across backends.  Whenever we receive a new serverlist from
+/// the balancer, we update the round_robin policy with the new list of
+/// addresses.  If we cannot communicate with the balancer on startup,
+/// however, we may enter fallback mode, in which case we will populate
+/// the RR policy's addresses from the backend addresses returned by the
+/// resolver.
+///
+/// Once an RR policy instance is in place (and getting updated as described),
+/// calls for a pick, a ping, or a cancellation will be serviced right
+/// away by forwarding them to the RR instance.  Any time there's no RR
+/// policy available (i.e., right after the creation of the gRPCLB policy),
+/// pick and ping requests are added to a list of pending picks and pings
+/// to be flushed and serviced when the RR policy instance becomes available.
+///
+/// \see https://github.com/grpc/grpc/blob/master/doc/load-balancing.md for the
+/// high level design and details.
+
+// With the addition of a libuv endpoint, sockaddr.h now includes uv.h when
+// using that endpoint. Because of various transitive includes in uv.h,
+// including windows.h on Windows, uv.h must be included before other system
+// headers. Therefore, sockaddr.h must always be included first.
+#include <grpc/support/port_platform.h>
+
+#include "src/core/lib/iomgr/sockaddr.h"
+#include "src/core/lib/iomgr/socket_utils.h"
+
+#include <inttypes.h>
+#include <limits.h>
+#include <string.h>
+
+#include <grpc/byte_buffer_reader.h>
+#include <grpc/grpc.h>
+#include <grpc/support/alloc.h>
+#include <grpc/support/string_util.h>
+#include <grpc/support/time.h>
+
+#include "src/core/ext/filters/client_channel/client_channel.h"
+#include "src/core/ext/filters/client_channel/client_channel_factory.h"
+#include "src/core/ext/filters/client_channel/lb_policy/xds/client_load_reporting_filter.h"
+#include "src/core/ext/filters/client_channel/lb_policy/xds/load_balancer_api.h"
+#include "src/core/ext/filters/client_channel/lb_policy/xds/xds.h"
+#include "src/core/ext/filters/client_channel/lb_policy/xds/xds_channel.h"
+#include "src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.h"
+#include "src/core/ext/filters/client_channel/lb_policy_factory.h"
+#include "src/core/ext/filters/client_channel/lb_policy_registry.h"
+#include "src/core/ext/filters/client_channel/parse_address.h"
+#include "src/core/ext/filters/client_channel/resolver/fake/fake_resolver.h"
+#include "src/core/ext/filters/client_channel/subchannel_index.h"
+#include "src/core/lib/backoff/backoff.h"
+#include "src/core/lib/channel/channel_args.h"
+#include "src/core/lib/channel/channel_stack.h"
+#include "src/core/lib/gpr/host_port.h"
+#include "src/core/lib/gpr/string.h"
+#include "src/core/lib/gprpp/manual_constructor.h"
+#include "src/core/lib/gprpp/memory.h"
+#include "src/core/lib/gprpp/mutex_lock.h"
+#include "src/core/lib/gprpp/orphanable.h"
+#include "src/core/lib/gprpp/ref_counted_ptr.h"
+#include "src/core/lib/iomgr/combiner.h"
+#include "src/core/lib/iomgr/sockaddr.h"
+#include "src/core/lib/iomgr/sockaddr_utils.h"
+#include "src/core/lib/iomgr/timer.h"
+#include "src/core/lib/slice/slice_hash_table.h"
+#include "src/core/lib/slice/slice_internal.h"
+#include "src/core/lib/slice/slice_string_helpers.h"
+#include "src/core/lib/surface/call.h"
+#include "src/core/lib/surface/channel.h"
+#include "src/core/lib/surface/channel_init.h"
+#include "src/core/lib/transport/static_metadata.h"
+
+#define GRPC_XDS_INITIAL_CONNECT_BACKOFF_SECONDS 1
+#define GRPC_XDS_RECONNECT_BACKOFF_MULTIPLIER 1.6
+#define GRPC_XDS_RECONNECT_MAX_BACKOFF_SECONDS 120
+#define GRPC_XDS_RECONNECT_JITTER 0.2
+#define GRPC_XDS_DEFAULT_FALLBACK_TIMEOUT_MS 10000
+
+namespace grpc_core {
+
+TraceFlag grpc_lb_xds_trace(false, "xds");
+
+namespace {
+
+class XdsLb : public LoadBalancingPolicy {
+ public:
+  XdsLb(const grpc_lb_addresses* addresses, const Args& args);
+
+  void UpdateLocked(const grpc_channel_args& args) override;
+  bool PickLocked(PickState* pick, grpc_error** error) override;
+  void CancelPickLocked(PickState* pick, grpc_error* error) override;
+  void CancelMatchingPicksLocked(uint32_t initial_metadata_flags_mask,
+                                 uint32_t initial_metadata_flags_eq,
+                                 grpc_error* error) override;
+  void NotifyOnStateChangeLocked(grpc_connectivity_state* state,
+                                 grpc_closure* closure) override;
+  grpc_connectivity_state CheckConnectivityLocked(
+      grpc_error** connectivity_error) override;
+  void HandOffPendingPicksLocked(LoadBalancingPolicy* new_policy) override;
+  void ExitIdleLocked() override;
+  void ResetBackoffLocked() override;
+  void FillChildRefsForChannelz(
+      channelz::ChildRefsList* child_subchannels,
+      channelz::ChildRefsList* child_channels) override;
+
+ private:
+  /// Linked list of pending pick requests. It stores all information needed to
+  /// eventually call (Round Robin's) pick() on them. They mainly stay pending
+  /// waiting for the RR policy to be created.
+  ///
+  /// Note that when a pick is sent to the RR policy, we inject our own
+  /// on_complete callback, so that we can intercept the result before
+  /// invoking the original on_complete callback.  This allows us to set the
+  /// LB token metadata and add client_stats to the call context.
+  /// See \a pending_pick_complete() for details.
+  struct PendingPick {
+    // The xds lb instance that created the wrapping. This instance is not
+    // owned; reference counts are untouched. It's used only for logging
+    // purposes.
+    XdsLb* xdslb_policy;
+    // The original pick.
+    PickState* pick;
+    // Our on_complete closure and the original one.
+    grpc_closure on_complete;
+    grpc_closure* original_on_complete;
+    // The LB token associated with the pick.  This is set via user_data in
+    // the pick.
+    grpc_mdelem lb_token;
+    // Stats for client-side load reporting.
+    RefCountedPtr<XdsLbClientStats> client_stats;
+    // Next pending pick.
+    PendingPick* next = nullptr;
+  };
+
+  /// Contains a call to the LB server and all the data related to the call.
+  class BalancerCallState
+      : public InternallyRefCountedWithTracing<BalancerCallState> {
+   public:
+    explicit BalancerCallState(
+        RefCountedPtr<LoadBalancingPolicy> parent_xdslb_policy);
+
+    // It's the caller's responsibility to ensure that Orphan() is called from
+    // inside the combiner.
+    void Orphan() override;
+
+    void StartQuery();
+
+    XdsLbClientStats* client_stats() const { return client_stats_.get(); }
+
+    bool seen_initial_response() const { return seen_initial_response_; }
+
+   private:
+    // So Delete() can access our private dtor.
+    template <typename T>
+    friend void grpc_core::Delete(T*);
+
+    ~BalancerCallState();
+
+    XdsLb* xdslb_policy() const {
+      return static_cast<XdsLb*>(xdslb_policy_.get());
+    }
+
+    void ScheduleNextClientLoadReportLocked();
+    void SendClientLoadReportLocked();
+
+    static bool LoadReportCountersAreZero(xds_grpclb_request* request);
+
+    static void MaybeSendClientLoadReportLocked(void* arg, grpc_error* error);
+    static void ClientLoadReportDoneLocked(void* arg, grpc_error* error);
+    static void OnInitialRequestSentLocked(void* arg, grpc_error* error);
+    static void OnBalancerMessageReceivedLocked(void* arg, grpc_error* error);
+    static void OnBalancerStatusReceivedLocked(void* arg, grpc_error* error);
+
+    // The owning LB policy.
+    RefCountedPtr<LoadBalancingPolicy> xdslb_policy_;
+
+    // The streaming call to the LB server. Always non-NULL.
+    grpc_call* lb_call_ = nullptr;
+
+    // recv_initial_metadata
+    grpc_metadata_array lb_initial_metadata_recv_;
+
+    // send_message
+    grpc_byte_buffer* send_message_payload_ = nullptr;
+    grpc_closure lb_on_initial_request_sent_;
+
+    // recv_message
+    grpc_byte_buffer* recv_message_payload_ = nullptr;
+    grpc_closure lb_on_balancer_message_received_;
+    bool seen_initial_response_ = false;
+
+    // recv_trailing_metadata
+    grpc_closure lb_on_balancer_status_received_;
+    grpc_metadata_array lb_trailing_metadata_recv_;
+    grpc_status_code lb_call_status_;
+    grpc_slice lb_call_status_details_;
+
+    // The stats for client-side load reporting associated with this LB call.
+    // Created after the first serverlist is received.
+    RefCountedPtr<XdsLbClientStats> client_stats_;
+    grpc_millis client_stats_report_interval_ = 0;
+    grpc_timer client_load_report_timer_;
+    bool client_load_report_timer_callback_pending_ = false;
+    bool last_client_load_report_counters_were_zero_ = false;
+    bool client_load_report_is_due_ = false;
+    // The closure used for either the load report timer or the callback for
+    // completion of sending the load report.
+    grpc_closure client_load_report_closure_;
+  };
+
+  ~XdsLb();
+
+  void ShutdownLocked() override;
+
+  // Helper function used in ctor and UpdateLocked().
+  void ProcessChannelArgsLocked(const grpc_channel_args& args);
+
+  // Methods for dealing with the balancer channel and call.
+  void StartPickingLocked();
+  void StartBalancerCallLocked();
+  static void OnFallbackTimerLocked(void* arg, grpc_error* error);
+  void StartBalancerCallRetryTimerLocked();
+  static void OnBalancerCallRetryTimerLocked(void* arg, grpc_error* error);
+  static void OnBalancerChannelConnectivityChangedLocked(void* arg,
+                                                         grpc_error* error);
+
+  // Pending pick methods.
+  static void PendingPickSetMetadataAndContext(PendingPick* pp);
+  PendingPick* PendingPickCreate(PickState* pick);
+  void AddPendingPick(PendingPick* pp);
+  static void OnPendingPickComplete(void* arg, grpc_error* error);
+
+  // Methods for dealing with the RR policy.
+  void CreateOrUpdateRoundRobinPolicyLocked();
+  grpc_channel_args* CreateRoundRobinPolicyArgsLocked();
+  void CreateRoundRobinPolicyLocked(const Args& args);
+  bool PickFromRoundRobinPolicyLocked(bool force_async, PendingPick* pp,
+                                      grpc_error** error);
+  void UpdateConnectivityStateFromRoundRobinPolicyLocked(
+      grpc_error* rr_state_error);
+  static void OnRoundRobinConnectivityChangedLocked(void* arg,
+                                                    grpc_error* error);
+  static void OnRoundRobinRequestReresolutionLocked(void* arg,
+                                                    grpc_error* error);
+
+  // Who the client is trying to communicate with.
+  const char* server_name_ = nullptr;
+
+  // Current channel args from the resolver.
+  grpc_channel_args* args_ = nullptr;
+
+  // Internal state.
+  bool started_picking_ = false;
+  bool shutting_down_ = false;
+  grpc_connectivity_state_tracker state_tracker_;
+
+  // The channel for communicating with the LB server.
+  grpc_channel* lb_channel_ = nullptr;
+  // Mutex to protect the channel to the LB server. This is used when
+  // processing a channelz request.
+  gpr_mu lb_channel_mu_;
+  grpc_connectivity_state lb_channel_connectivity_;
+  grpc_closure lb_channel_on_connectivity_changed_;
+  // Are we already watching the LB channel's connectivity?
+  bool watching_lb_channel_ = false;
+  // Response generator to inject address updates into lb_channel_.
+  RefCountedPtr<FakeResolverResponseGenerator> response_generator_;
+
+  // The data associated with the current LB call. It holds a ref to this LB
+  // policy. It's initialized every time we query for backends. It's reset to
+  // NULL whenever the current LB call is no longer needed (e.g., the LB policy
+  // is shutting down, or the LB call has ended). A non-NULL lb_calld_ always
+  // contains a non-NULL lb_call_.
+  OrphanablePtr<BalancerCallState> lb_calld_;
+  // Timeout in milliseconds for the LB call. 0 means no deadline.
+  int lb_call_timeout_ms_ = 0;
+  // Balancer call retry state.
+  BackOff lb_call_backoff_;
+  bool retry_timer_callback_pending_ = false;
+  grpc_timer lb_call_retry_timer_;
+  grpc_closure lb_on_call_retry_;
+
+  // The deserialized response from the balancer. May be nullptr until one
+  // such response has arrived.
+  xds_grpclb_serverlist* serverlist_ = nullptr;
+  // Index into serverlist for next pick.
+  // If the server at this index is a drop, we return a drop.
+  // Otherwise, we delegate to the RR policy.
+  size_t serverlist_index_ = 0;
+
+  // Timeout in milliseconds for before using fallback backend addresses.
+  // 0 means not using fallback.
+  int lb_fallback_timeout_ms_ = 0;
+  // The backend addresses from the resolver.
+  grpc_lb_addresses* fallback_backend_addresses_ = nullptr;
+  // Fallback timer.
+  bool fallback_timer_callback_pending_ = false;
+  grpc_timer lb_fallback_timer_;
+  grpc_closure lb_on_fallback_;
+
+  // Pending picks that are waiting on the RR policy's connectivity.
+  PendingPick* pending_picks_ = nullptr;
+
+  // The RR policy to use for the backends.
+  OrphanablePtr<LoadBalancingPolicy> rr_policy_;
+  grpc_connectivity_state rr_connectivity_state_;
+  grpc_closure on_rr_connectivity_changed_;
+  grpc_closure on_rr_request_reresolution_;
+};
+
+//
+// serverlist parsing code
+//
+
+// vtable for LB tokens in grpc_lb_addresses
+void* lb_token_copy(void* token) {
+  return token == nullptr
+             ? nullptr
+             : (void*)GRPC_MDELEM_REF(grpc_mdelem{(uintptr_t)token}).payload;
+}
+void lb_token_destroy(void* token) {
+  if (token != nullptr) {
+    GRPC_MDELEM_UNREF(grpc_mdelem{(uintptr_t)token});
+  }
+}
+int lb_token_cmp(void* token1, void* token2) {
+  if (token1 > token2) return 1;
+  if (token1 < token2) return -1;
+  return 0;
+}
+const grpc_lb_user_data_vtable lb_token_vtable = {
+    lb_token_copy, lb_token_destroy, lb_token_cmp};
+
+// Returns the backend addresses extracted from the given addresses.
+grpc_lb_addresses* ExtractBackendAddresses(const grpc_lb_addresses* addresses) {
+  // First pass: count the number of backend addresses.
+  size_t num_backends = 0;
+  for (size_t i = 0; i < addresses->num_addresses; ++i) {
+    if (!addresses->addresses[i].is_balancer) {
+      ++num_backends;
+    }
+  }
+  // Second pass: actually populate the addresses and (empty) LB tokens.
+  grpc_lb_addresses* backend_addresses =
+      grpc_lb_addresses_create(num_backends, &lb_token_vtable);
+  size_t num_copied = 0;
+  for (size_t i = 0; i < addresses->num_addresses; ++i) {
+    if (addresses->addresses[i].is_balancer) continue;
+    const grpc_resolved_address* addr = &addresses->addresses[i].address;
+    grpc_lb_addresses_set_address(backend_addresses, num_copied, &addr->addr,
+                                  addr->len, false /* is_balancer */,
+                                  nullptr /* balancer_name */,
+                                  (void*)GRPC_MDELEM_LB_TOKEN_EMPTY.payload);
+    ++num_copied;
+  }
+  return backend_addresses;
+}
+
+bool IsServerValid(const xds_grpclb_server* server, size_t idx, bool log) {
+  if (server->drop) return false;
+  const xds_grpclb_ip_address* ip = &server->ip_address;
+  if (GPR_UNLIKELY(server->port >> 16 != 0)) {
+    if (log) {
+      gpr_log(GPR_ERROR,
+              "Invalid port '%d' at index %lu of serverlist. Ignoring.",
+              server->port, (unsigned long)idx);
+    }
+    return false;
+  }
+  if (GPR_UNLIKELY(ip->size != 4 && ip->size != 16)) {
+    if (log) {
+      gpr_log(GPR_ERROR,
+              "Expected IP to be 4 or 16 bytes, got %d at index %lu of "
+              "serverlist. Ignoring",
+              ip->size, (unsigned long)idx);
+    }
+    return false;
+  }
+  return true;
+}
+
+void ParseServer(const xds_grpclb_server* server, grpc_resolved_address* addr) {
+  memset(addr, 0, sizeof(*addr));
+  if (server->drop) return;
+  const uint16_t netorder_port = grpc_htons((uint16_t)server->port);
+  /* the addresses are given in binary format (a in(6)_addr struct) in
+   * server->ip_address.bytes. */
+  const xds_grpclb_ip_address* ip = &server->ip_address;
+  if (ip->size == 4) {
+    addr->len = static_cast<socklen_t>(sizeof(grpc_sockaddr_in));
+    grpc_sockaddr_in* addr4 = reinterpret_cast<grpc_sockaddr_in*>(&addr->addr);
+    addr4->sin_family = GRPC_AF_INET;
+    memcpy(&addr4->sin_addr, ip->bytes, ip->size);
+    addr4->sin_port = netorder_port;
+  } else if (ip->size == 16) {
+    addr->len = static_cast<socklen_t>(sizeof(grpc_sockaddr_in6));
+    grpc_sockaddr_in6* addr6 = (grpc_sockaddr_in6*)&addr->addr;
+    addr6->sin6_family = GRPC_AF_INET6;
+    memcpy(&addr6->sin6_addr, ip->bytes, ip->size);
+    addr6->sin6_port = netorder_port;
+  }
+}
+
+// Returns addresses extracted from \a serverlist.
+grpc_lb_addresses* ProcessServerlist(const xds_grpclb_serverlist* serverlist) {
+  size_t num_valid = 0;
+  /* first pass: count how many are valid in order to allocate the necessary
+   * memory in a single block */
+  for (size_t i = 0; i < serverlist->num_servers; ++i) {
+    if (IsServerValid(serverlist->servers[i], i, true)) ++num_valid;
+  }
+  grpc_lb_addresses* lb_addresses =
+      grpc_lb_addresses_create(num_valid, &lb_token_vtable);
+  /* second pass: actually populate the addresses and LB tokens (aka user data
+   * to the outside world) to be read by the RR policy during its creation.
+   * Given that the validity tests are very cheap, they are performed again
+   * instead of marking the valid ones during the first pass, as this would
+   * incurr in an allocation due to the arbitrary number of server */
+  size_t addr_idx = 0;
+  for (size_t sl_idx = 0; sl_idx < serverlist->num_servers; ++sl_idx) {
+    const xds_grpclb_server* server = serverlist->servers[sl_idx];
+    if (!IsServerValid(serverlist->servers[sl_idx], sl_idx, false)) continue;
+    GPR_ASSERT(addr_idx < num_valid);
+    /* address processing */
+    grpc_resolved_address addr;
+    ParseServer(server, &addr);
+    /* lb token processing */
+    void* user_data;
+    if (server->has_load_balance_token) {
+      const size_t lb_token_max_length =
+          GPR_ARRAY_SIZE(server->load_balance_token);
+      const size_t lb_token_length =
+          strnlen(server->load_balance_token, lb_token_max_length);
+      grpc_slice lb_token_mdstr = grpc_slice_from_copied_buffer(
+          server->load_balance_token, lb_token_length);
+      user_data =
+          (void*)grpc_mdelem_from_slices(GRPC_MDSTR_LB_TOKEN, lb_token_mdstr)
+              .payload;
+    } else {
+      char* uri = grpc_sockaddr_to_uri(&addr);
+      gpr_log(GPR_INFO,
+              "Missing LB token for backend address '%s'. The empty token will "
+              "be used instead",
+              uri);
+      gpr_free(uri);
+      user_data = (void*)GRPC_MDELEM_LB_TOKEN_EMPTY.payload;
+    }
+    grpc_lb_addresses_set_address(lb_addresses, addr_idx, &addr.addr, addr.len,
+                                  false /* is_balancer */,
+                                  nullptr /* balancer_name */, user_data);
+    ++addr_idx;
+  }
+  GPR_ASSERT(addr_idx == num_valid);
+  return lb_addresses;
+}
+
+//
+// XdsLb::BalancerCallState
+//
+
+XdsLb::BalancerCallState::BalancerCallState(
+    RefCountedPtr<LoadBalancingPolicy> parent_xdslb_policy)
+    : InternallyRefCountedWithTracing<BalancerCallState>(&grpc_lb_xds_trace),
+      xdslb_policy_(std::move(parent_xdslb_policy)) {
+  GPR_ASSERT(xdslb_policy_ != nullptr);
+  GPR_ASSERT(!xdslb_policy()->shutting_down_);
+  // Init the LB call. Note that the LB call will progress every time there's
+  // activity in xdslb_policy_->interested_parties(), which is comprised of
+  // the polling entities from client_channel.
+  GPR_ASSERT(xdslb_policy()->server_name_ != nullptr);
+  GPR_ASSERT(xdslb_policy()->server_name_[0] != '\0');
+  const grpc_millis deadline =
+      xdslb_policy()->lb_call_timeout_ms_ == 0
+          ? GRPC_MILLIS_INF_FUTURE
+          : ExecCtx::Get()->Now() + xdslb_policy()->lb_call_timeout_ms_;
+  lb_call_ = grpc_channel_create_pollset_set_call(
+      xdslb_policy()->lb_channel_, nullptr, GRPC_PROPAGATE_DEFAULTS,
+      xdslb_policy_->interested_parties(),
+      GRPC_MDSTR_SLASH_GRPC_DOT_LB_DOT_V1_DOT_LOADBALANCER_SLASH_BALANCELOAD,
+      nullptr, deadline, nullptr);
+  // Init the LB call request payload.
+  xds_grpclb_request* request =
+      xds_grpclb_request_create(xdslb_policy()->server_name_);
+  grpc_slice request_payload_slice = xds_grpclb_request_encode(request);
+  send_message_payload_ =
+      grpc_raw_byte_buffer_create(&request_payload_slice, 1);
+  grpc_slice_unref_internal(request_payload_slice);
+  xds_grpclb_request_destroy(request);
+  // Init other data associated with the LB call.
+  grpc_metadata_array_init(&lb_initial_metadata_recv_);
+  grpc_metadata_array_init(&lb_trailing_metadata_recv_);
+  GRPC_CLOSURE_INIT(&lb_on_initial_request_sent_, OnInitialRequestSentLocked,
+                    this, grpc_combiner_scheduler(xdslb_policy()->combiner()));
+  GRPC_CLOSURE_INIT(&lb_on_balancer_message_received_,
+                    OnBalancerMessageReceivedLocked, this,
+                    grpc_combiner_scheduler(xdslb_policy()->combiner()));
+  GRPC_CLOSURE_INIT(&lb_on_balancer_status_received_,
+                    OnBalancerStatusReceivedLocked, this,
+                    grpc_combiner_scheduler(xdslb_policy()->combiner()));
+}
+
+XdsLb::BalancerCallState::~BalancerCallState() {
+  GPR_ASSERT(lb_call_ != nullptr);
+  grpc_call_unref(lb_call_);
+  grpc_metadata_array_destroy(&lb_initial_metadata_recv_);
+  grpc_metadata_array_destroy(&lb_trailing_metadata_recv_);
+  grpc_byte_buffer_destroy(send_message_payload_);
+  grpc_byte_buffer_destroy(recv_message_payload_);
+  grpc_slice_unref_internal(lb_call_status_details_);
+}
+
+void XdsLb::BalancerCallState::Orphan() {
+  GPR_ASSERT(lb_call_ != nullptr);
+  // If we are here because xdslb_policy wants to cancel the call,
+  // lb_on_balancer_status_received_ will complete the cancellation and clean
+  // up. Otherwise, we are here because xdslb_policy has to orphan a failed
+  // call, then the following cancellation will be a no-op.
+  grpc_call_cancel(lb_call_, nullptr);
+  if (client_load_report_timer_callback_pending_) {
+    grpc_timer_cancel(&client_load_report_timer_);
+  }
+  // Note that the initial ref is hold by lb_on_balancer_status_received_
+  // instead of the caller of this function. So the corresponding unref happens
+  // in lb_on_balancer_status_received_ instead of here.
+}
+
+void XdsLb::BalancerCallState::StartQuery() {
+  GPR_ASSERT(lb_call_ != nullptr);
+  if (grpc_lb_xds_trace.enabled()) {
+    gpr_log(GPR_INFO, "[xdslb %p] Starting LB call (lb_calld: %p, lb_call: %p)",
+            xdslb_policy_.get(), this, lb_call_);
+  }
+  // Create the ops.
+  grpc_call_error call_error;
+  grpc_op ops[3];
+  memset(ops, 0, sizeof(ops));
+  // Op: send initial metadata.
+  grpc_op* op = ops;
+  op->op = GRPC_OP_SEND_INITIAL_METADATA;
+  op->data.send_initial_metadata.count = 0;
+  op->flags = 0;
+  op->reserved = nullptr;
+  op++;
+  // Op: send request message.
+  GPR_ASSERT(send_message_payload_ != nullptr);
+  op->op = GRPC_OP_SEND_MESSAGE;
+  op->data.send_message.send_message = send_message_payload_;
+  op->flags = 0;
+  op->reserved = nullptr;
+  op++;
+  // TODO(roth): We currently track this ref manually.  Once the
+  // ClosureRef API is ready, we should pass the RefCountedPtr<> along
+  // with the callback.
+  auto self = Ref(DEBUG_LOCATION, "on_initial_request_sent");
+  self.release();
+  call_error = grpc_call_start_batch_and_execute(
+      lb_call_, ops, (size_t)(op - ops), &lb_on_initial_request_sent_);
+  GPR_ASSERT(GRPC_CALL_OK == call_error);
+  // Op: recv initial metadata.
+  op = ops;
+  op->op = GRPC_OP_RECV_INITIAL_METADATA;
+  op->data.recv_initial_metadata.recv_initial_metadata =
+      &lb_initial_metadata_recv_;
+  op->flags = 0;
+  op->reserved = nullptr;
+  op++;
+  // Op: recv response.
+  op->op = GRPC_OP_RECV_MESSAGE;
+  op->data.recv_message.recv_message = &recv_message_payload_;
+  op->flags = 0;
+  op->reserved = nullptr;
+  op++;
+  // TODO(roth): We currently track this ref manually.  Once the
+  // ClosureRef API is ready, we should pass the RefCountedPtr<> along
+  // with the callback.
+  self = Ref(DEBUG_LOCATION, "on_message_received");
+  self.release();
+  call_error = grpc_call_start_batch_and_execute(
+      lb_call_, ops, (size_t)(op - ops), &lb_on_balancer_message_received_);
+  GPR_ASSERT(GRPC_CALL_OK == call_error);
+  // Op: recv server status.
+  op = ops;
+  op->op = GRPC_OP_RECV_STATUS_ON_CLIENT;
+  op->data.recv_status_on_client.trailing_metadata =
+      &lb_trailing_metadata_recv_;
+  op->data.recv_status_on_client.status = &lb_call_status_;
+  op->data.recv_status_on_client.status_details = &lb_call_status_details_;
+  op->flags = 0;
+  op->reserved = nullptr;
+  op++;
+  // This callback signals the end of the LB call, so it relies on the initial
+  // ref instead of a new ref. When it's invoked, it's the initial ref that is
+  // unreffed.
+  call_error = grpc_call_start_batch_and_execute(
+      lb_call_, ops, (size_t)(op - ops), &lb_on_balancer_status_received_);
+  GPR_ASSERT(GRPC_CALL_OK == call_error);
+}
+
+void XdsLb::BalancerCallState::ScheduleNextClientLoadReportLocked() {
+  const grpc_millis next_client_load_report_time =
+      ExecCtx::Get()->Now() + client_stats_report_interval_;
+  GRPC_CLOSURE_INIT(&client_load_report_closure_,
+                    MaybeSendClientLoadReportLocked, this,
+                    grpc_combiner_scheduler(xdslb_policy()->combiner()));
+  grpc_timer_init(&client_load_report_timer_, next_client_load_report_time,
+                  &client_load_report_closure_);
+  client_load_report_timer_callback_pending_ = true;
+}
+
+void XdsLb::BalancerCallState::MaybeSendClientLoadReportLocked(
+    void* arg, grpc_error* error) {
+  BalancerCallState* lb_calld = static_cast<BalancerCallState*>(arg);
+  XdsLb* xdslb_policy = lb_calld->xdslb_policy();
+  lb_calld->client_load_report_timer_callback_pending_ = false;
+  if (error != GRPC_ERROR_NONE || lb_calld != xdslb_policy->lb_calld_.get()) {
+    lb_calld->Unref(DEBUG_LOCATION, "client_load_report");
+    return;
+  }
+  // If we've already sent the initial request, then we can go ahead and send
+  // the load report. Otherwise, we need to wait until the initial request has
+  // been sent to send this (see OnInitialRequestSentLocked()).
+  if (lb_calld->send_message_payload_ == nullptr) {
+    lb_calld->SendClientLoadReportLocked();
+  } else {
+    lb_calld->client_load_report_is_due_ = true;
+  }
+}
+
+bool XdsLb::BalancerCallState::LoadReportCountersAreZero(
+    xds_grpclb_request* request) {
+  XdsLbClientStats::DroppedCallCounts* drop_entries =
+      static_cast<XdsLbClientStats::DroppedCallCounts*>(
+          request->client_stats.calls_finished_with_drop.arg);
+  return request->client_stats.num_calls_started == 0 &&
+         request->client_stats.num_calls_finished == 0 &&
+         request->client_stats.num_calls_finished_with_client_failed_to_send ==
+             0 &&
+         request->client_stats.num_calls_finished_known_received == 0 &&
+         (drop_entries == nullptr || drop_entries->empty());
+}
+
+void XdsLb::BalancerCallState::SendClientLoadReportLocked() {
+  // Construct message payload.
+  GPR_ASSERT(send_message_payload_ == nullptr);
+  xds_grpclb_request* request =
+      xds_grpclb_load_report_request_create_locked(client_stats_.get());
+  // Skip client load report if the counters were all zero in the last
+  // report and they are still zero in this one.
+  if (LoadReportCountersAreZero(request)) {
+    if (last_client_load_report_counters_were_zero_) {
+      xds_grpclb_request_destroy(request);
+      ScheduleNextClientLoadReportLocked();
+      return;
+    }
+    last_client_load_report_counters_were_zero_ = true;
+  } else {
+    last_client_load_report_counters_were_zero_ = false;
+  }
+  grpc_slice request_payload_slice = xds_grpclb_request_encode(request);
+  send_message_payload_ =
+      grpc_raw_byte_buffer_create(&request_payload_slice, 1);
+  grpc_slice_unref_internal(request_payload_slice);
+  xds_grpclb_request_destroy(request);
+  // Send the report.
+  grpc_op op;
+  memset(&op, 0, sizeof(op));
+  op.op = GRPC_OP_SEND_MESSAGE;
+  op.data.send_message.send_message = send_message_payload_;
+  GRPC_CLOSURE_INIT(&client_load_report_closure_, ClientLoadReportDoneLocked,
+                    this, grpc_combiner_scheduler(xdslb_policy()->combiner()));
+  grpc_call_error call_error = grpc_call_start_batch_and_execute(
+      lb_call_, &op, 1, &client_load_report_closure_);
+  if (GPR_UNLIKELY(call_error != GRPC_CALL_OK)) {
+    gpr_log(GPR_ERROR, "[xdslb %p] call_error=%d", xdslb_policy_.get(),
+            call_error);
+    GPR_ASSERT(GRPC_CALL_OK == call_error);
+  }
+}
+
+void XdsLb::BalancerCallState::ClientLoadReportDoneLocked(void* arg,
+                                                          grpc_error* error) {
+  BalancerCallState* lb_calld = static_cast<BalancerCallState*>(arg);
+  XdsLb* xdslb_policy = lb_calld->xdslb_policy();
+  grpc_byte_buffer_destroy(lb_calld->send_message_payload_);
+  lb_calld->send_message_payload_ = nullptr;
+  if (error != GRPC_ERROR_NONE || lb_calld != xdslb_policy->lb_calld_.get()) {
+    lb_calld->Unref(DEBUG_LOCATION, "client_load_report");
+    return;
+  }
+  lb_calld->ScheduleNextClientLoadReportLocked();
+}
+
+void XdsLb::BalancerCallState::OnInitialRequestSentLocked(void* arg,
+                                                          grpc_error* error) {
+  BalancerCallState* lb_calld = static_cast<BalancerCallState*>(arg);
+  grpc_byte_buffer_destroy(lb_calld->send_message_payload_);
+  lb_calld->send_message_payload_ = nullptr;
+  // If we attempted to send a client load report before the initial request was
+  // sent (and this lb_calld is still in use), send the load report now.
+  if (lb_calld->client_load_report_is_due_ &&
+      lb_calld == lb_calld->xdslb_policy()->lb_calld_.get()) {
+    lb_calld->SendClientLoadReportLocked();
+    lb_calld->client_load_report_is_due_ = false;
+  }
+  lb_calld->Unref(DEBUG_LOCATION, "on_initial_request_sent");
+}
+
+void XdsLb::BalancerCallState::OnBalancerMessageReceivedLocked(
+    void* arg, grpc_error* error) {
+  BalancerCallState* lb_calld = static_cast<BalancerCallState*>(arg);
+  XdsLb* xdslb_policy = lb_calld->xdslb_policy();
+  // Empty payload means the LB call was cancelled.
+  if (lb_calld != xdslb_policy->lb_calld_.get() ||
+      lb_calld->recv_message_payload_ == nullptr) {
+    lb_calld->Unref(DEBUG_LOCATION, "on_message_received");
+    return;
+  }
+  grpc_byte_buffer_reader bbr;
+  grpc_byte_buffer_reader_init(&bbr, lb_calld->recv_message_payload_);
+  grpc_slice response_slice = grpc_byte_buffer_reader_readall(&bbr);
+  grpc_byte_buffer_reader_destroy(&bbr);
+  grpc_byte_buffer_destroy(lb_calld->recv_message_payload_);
+  lb_calld->recv_message_payload_ = nullptr;
+  xds_grpclb_initial_response* initial_response;
+  xds_grpclb_serverlist* serverlist;
+  if (!lb_calld->seen_initial_response_ &&
+      (initial_response = xds_grpclb_initial_response_parse(response_slice)) !=
+          nullptr) {
+    // Have NOT seen initial response, look for initial response.
+    if (initial_response->has_client_stats_report_interval) {
+      lb_calld->client_stats_report_interval_ = GPR_MAX(
+          GPR_MS_PER_SEC, xds_grpclb_duration_to_millis(
+                              &initial_response->client_stats_report_interval));
+      if (grpc_lb_xds_trace.enabled()) {
+        gpr_log(GPR_INFO,
+                "[xdslb %p] Received initial LB response message; "
+                "client load reporting interval = %" PRId64 " milliseconds",
+                xdslb_policy, lb_calld->client_stats_report_interval_);
+      }
+    } else if (grpc_lb_xds_trace.enabled()) {
+      gpr_log(GPR_INFO,
+              "[xdslb %p] Received initial LB response message; client load "
+              "reporting NOT enabled",
+              xdslb_policy);
+    }
+    xds_grpclb_initial_response_destroy(initial_response);
+    lb_calld->seen_initial_response_ = true;
+  } else if ((serverlist = xds_grpclb_response_parse_serverlist(
+                  response_slice)) != nullptr) {
+    // Have seen initial response, look for serverlist.
+    GPR_ASSERT(lb_calld->lb_call_ != nullptr);
+    if (grpc_lb_xds_trace.enabled()) {
+      gpr_log(GPR_INFO,
+              "[xdslb %p] Serverlist with %" PRIuPTR " servers received",
+              xdslb_policy, serverlist->num_servers);
+      for (size_t i = 0; i < serverlist->num_servers; ++i) {
+        grpc_resolved_address addr;
+        ParseServer(serverlist->servers[i], &addr);
+        char* ipport;
+        grpc_sockaddr_to_string(&ipport, &addr, false);
+        gpr_log(GPR_INFO, "[xdslb %p] Serverlist[%" PRIuPTR "]: %s",
+                xdslb_policy, i, ipport);
+        gpr_free(ipport);
+      }
+    }
+    /* update serverlist */
+    if (serverlist->num_servers > 0) {
+      // Start sending client load report only after we start using the
+      // serverlist returned from the current LB call.
+      if (lb_calld->client_stats_report_interval_ > 0 &&
+          lb_calld->client_stats_ == nullptr) {
+        lb_calld->client_stats_.reset(New<XdsLbClientStats>());
+        // TODO(roth): We currently track this ref manually.  Once the
+        // ClosureRef API is ready, we should pass the RefCountedPtr<> along
+        // with the callback.
+        auto self = lb_calld->Ref(DEBUG_LOCATION, "client_load_report");
+        self.release();
+        lb_calld->ScheduleNextClientLoadReportLocked();
+      }
+      if (xds_grpclb_serverlist_equals(xdslb_policy->serverlist_, serverlist)) {
+        if (grpc_lb_xds_trace.enabled()) {
+          gpr_log(GPR_INFO,
+                  "[xdslb %p] Incoming server list identical to current, "
+                  "ignoring.",
+                  xdslb_policy);
+        }
+        xds_grpclb_destroy_serverlist(serverlist);
+      } else { /* new serverlist */
+        if (xdslb_policy->serverlist_ != nullptr) {
+          /* dispose of the old serverlist */
+          xds_grpclb_destroy_serverlist(xdslb_policy->serverlist_);
+        } else {
+          /* or dispose of the fallback */
+          grpc_lb_addresses_destroy(xdslb_policy->fallback_backend_addresses_);
+          xdslb_policy->fallback_backend_addresses_ = nullptr;
+          if (xdslb_policy->fallback_timer_callback_pending_) {
+            grpc_timer_cancel(&xdslb_policy->lb_fallback_timer_);
+          }
+        }
+        // and update the copy in the XdsLb instance. This
+        // serverlist instance will be destroyed either upon the next
+        // update or when the XdsLb instance is destroyed.
+        xdslb_policy->serverlist_ = serverlist;
+        xdslb_policy->serverlist_index_ = 0;
+        xdslb_policy->CreateOrUpdateRoundRobinPolicyLocked();
+      }
+    } else {
+      if (grpc_lb_xds_trace.enabled()) {
+        gpr_log(GPR_INFO, "[xdslb %p] Received empty server list, ignoring.",
+                xdslb_policy);
+      }
+      xds_grpclb_destroy_serverlist(serverlist);
+    }
+  } else {
+    // No valid initial response or serverlist found.
+    char* response_slice_str =
+        grpc_dump_slice(response_slice, GPR_DUMP_ASCII | GPR_DUMP_HEX);
+    gpr_log(GPR_ERROR,
+            "[xdslb %p] Invalid LB response received: '%s'. Ignoring.",
+            xdslb_policy, response_slice_str);
+    gpr_free(response_slice_str);
+  }
+  grpc_slice_unref_internal(response_slice);
+  if (!xdslb_policy->shutting_down_) {
+    // Keep listening for serverlist updates.
+    grpc_op op;
+    memset(&op, 0, sizeof(op));
+    op.op = GRPC_OP_RECV_MESSAGE;
+    op.data.recv_message.recv_message = &lb_calld->recv_message_payload_;
+    op.flags = 0;
+    op.reserved = nullptr;
+    // Reuse the "OnBalancerMessageReceivedLocked" ref taken in StartQuery().
+    const grpc_call_error call_error = grpc_call_start_batch_and_execute(
+        lb_calld->lb_call_, &op, 1,
+        &lb_calld->lb_on_balancer_message_received_);
+    GPR_ASSERT(GRPC_CALL_OK == call_error);
+  } else {
+    lb_calld->Unref(DEBUG_LOCATION, "on_message_received+grpclb_shutdown");
+  }
+}
+
+void XdsLb::BalancerCallState::OnBalancerStatusReceivedLocked(
+    void* arg, grpc_error* error) {
+  BalancerCallState* lb_calld = static_cast<BalancerCallState*>(arg);
+  XdsLb* xdslb_policy = lb_calld->xdslb_policy();
+  GPR_ASSERT(lb_calld->lb_call_ != nullptr);
+  if (grpc_lb_xds_trace.enabled()) {
+    char* status_details =
+        grpc_slice_to_c_string(lb_calld->lb_call_status_details_);
+    gpr_log(GPR_INFO,
+            "[xdslb %p] Status from LB server received. Status = %d, details "
+            "= '%s', (lb_calld: %p, lb_call: %p), error '%s'",
+            xdslb_policy, lb_calld->lb_call_status_, status_details, lb_calld,
+            lb_calld->lb_call_, grpc_error_string(error));
+    gpr_free(status_details);
+  }
+  xdslb_policy->TryReresolutionLocked(&grpc_lb_xds_trace, GRPC_ERROR_NONE);
+  // If this lb_calld is still in use, this call ended because of a failure so
+  // we want to retry connecting. Otherwise, we have deliberately ended this
+  // call and no further action is required.
+  if (lb_calld == xdslb_policy->lb_calld_.get()) {
+    xdslb_policy->lb_calld_.reset();
+    GPR_ASSERT(!xdslb_policy->shutting_down_);
+    if (lb_calld->seen_initial_response_) {
+      // If we lose connection to the LB server, reset the backoff and restart
+      // the LB call immediately.
+      xdslb_policy->lb_call_backoff_.Reset();
+      xdslb_policy->StartBalancerCallLocked();
+    } else {
+      // If this LB call fails establishing any connection to the LB server,
+      // retry later.
+      xdslb_policy->StartBalancerCallRetryTimerLocked();
+    }
+  }
+  lb_calld->Unref(DEBUG_LOCATION, "lb_call_ended");
+}
+
+//
+// helper code for creating balancer channel
+//
+
+grpc_lb_addresses* ExtractBalancerAddresses(
+    const grpc_lb_addresses* addresses) {
+  size_t num_grpclb_addrs = 0;
+  for (size_t i = 0; i < addresses->num_addresses; ++i) {
+    if (addresses->addresses[i].is_balancer) ++num_grpclb_addrs;
+  }
+  // There must be at least one balancer address, or else the
+  // client_channel would not have chosen this LB policy.
+  GPR_ASSERT(num_grpclb_addrs > 0);
+  grpc_lb_addresses* lb_addresses =
+      grpc_lb_addresses_create(num_grpclb_addrs, nullptr);
+  size_t lb_addresses_idx = 0;
+  for (size_t i = 0; i < addresses->num_addresses; ++i) {
+    if (!addresses->addresses[i].is_balancer) continue;
+    if (GPR_UNLIKELY(addresses->addresses[i].user_data != nullptr)) {
+      gpr_log(GPR_ERROR,
+              "This LB policy doesn't support user data. It will be ignored");
+    }
+    grpc_lb_addresses_set_address(
+        lb_addresses, lb_addresses_idx++, addresses->addresses[i].address.addr,
+        addresses->addresses[i].address.len, false /* is balancer */,
+        addresses->addresses[i].balancer_name, nullptr /* user data */);
+  }
+  GPR_ASSERT(num_grpclb_addrs == lb_addresses_idx);
+  return lb_addresses;
+}
+
+/* Returns the channel args for the LB channel, used to create a bidirectional
+ * stream for the reception of load balancing updates.
+ *
+ * Inputs:
+ *   - \a addresses: corresponding to the balancers.
+ *   - \a response_generator: in order to propagate updates from the resolver
+ *   above the grpclb policy.
+ *   - \a args: other args inherited from the grpclb policy. */
+grpc_channel_args* BuildBalancerChannelArgs(
+    const grpc_lb_addresses* addresses,
+    FakeResolverResponseGenerator* response_generator,
+    const grpc_channel_args* args) {
+  grpc_lb_addresses* lb_addresses = ExtractBalancerAddresses(addresses);
+  // Channel args to remove.
+  static const char* args_to_remove[] = {
+      // LB policy name, since we want to use the default (pick_first) in
+      // the LB channel.
+      GRPC_ARG_LB_POLICY_NAME,
+      // The channel arg for the server URI, since that will be different for
+      // the LB channel than for the parent channel.  The client channel
+      // factory will re-add this arg with the right value.
+      GRPC_ARG_SERVER_URI,
+      // The resolved addresses, which will be generated by the name resolver
+      // used in the LB channel.  Note that the LB channel will use the fake
+      // resolver, so this won't actually generate a query to DNS (or some
+      // other name service).  However, the addresses returned by the fake
+      // resolver will have is_balancer=false, whereas our own addresses have
+      // is_balancer=true.  We need the LB channel to return addresses with
+      // is_balancer=false so that it does not wind up recursively using the
+      // grpclb LB policy, as per the special case logic in client_channel.c.
+      GRPC_ARG_LB_ADDRESSES,
+      // The fake resolver response generator, because we are replacing it
+      // with the one from the grpclb policy, used to propagate updates to
+      // the LB channel.
+      GRPC_ARG_FAKE_RESOLVER_RESPONSE_GENERATOR,
+      // The LB channel should use the authority indicated by the target
+      // authority table (see \a grpc_lb_policy_xds_modify_lb_channel_args),
+      // as opposed to the authority from the parent channel.
+      GRPC_ARG_DEFAULT_AUTHORITY,
+      // Just as for \a GRPC_ARG_DEFAULT_AUTHORITY, the LB channel should be
+      // treated as a stand-alone channel and not inherit this argument from the
+      // args of the parent channel.
+      GRPC_SSL_TARGET_NAME_OVERRIDE_ARG,
+  };
+  // Channel args to add.
+  const grpc_arg args_to_add[] = {
+      // New LB addresses.
+      // Note that we pass these in both when creating the LB channel
+      // and via the fake resolver.  The latter is what actually gets used.
+      grpc_lb_addresses_create_channel_arg(lb_addresses),
+      // The fake resolver response generator, which we use to inject
+      // address updates into the LB channel.
+      grpc_core::FakeResolverResponseGenerator::MakeChannelArg(
+          response_generator),
+      // A channel arg indicating the target is a grpclb load balancer.
+      grpc_channel_arg_integer_create(
+          const_cast<char*>(GRPC_ARG_ADDRESS_IS_XDS_LOAD_BALANCER), 1),
+      // A channel arg indicating this is an internal channels, aka it is
+      // owned by components in Core, not by the user application.
+      grpc_channel_arg_integer_create(
+          const_cast<char*>(GRPC_ARG_CHANNELZ_CHANNEL_IS_INTERNAL_CHANNEL), 1),
+  };
+  // Construct channel args.
+  grpc_channel_args* new_args = grpc_channel_args_copy_and_add_and_remove(
+      args, args_to_remove, GPR_ARRAY_SIZE(args_to_remove), args_to_add,
+      GPR_ARRAY_SIZE(args_to_add));
+  // Make any necessary modifications for security.
+  new_args = grpc_lb_policy_xds_modify_lb_channel_args(new_args);
+  // Clean up.
+  grpc_lb_addresses_destroy(lb_addresses);
+  return new_args;
+}
+
+//
+// ctor and dtor
+//
+
+XdsLb::XdsLb(const grpc_lb_addresses* addresses,
+             const LoadBalancingPolicy::Args& args)
+    : LoadBalancingPolicy(args),
+      response_generator_(MakeRefCounted<FakeResolverResponseGenerator>()),
+      lb_call_backoff_(
+          BackOff::Options()
+              .set_initial_backoff(GRPC_XDS_INITIAL_CONNECT_BACKOFF_SECONDS *
+                                   1000)
+              .set_multiplier(GRPC_XDS_RECONNECT_BACKOFF_MULTIPLIER)
+              .set_jitter(GRPC_XDS_RECONNECT_JITTER)
+              .set_max_backoff(GRPC_XDS_RECONNECT_MAX_BACKOFF_SECONDS * 1000)) {
+  // Initialization.
+  gpr_mu_init(&lb_channel_mu_);
+  grpc_subchannel_index_ref();
+  GRPC_CLOSURE_INIT(&lb_channel_on_connectivity_changed_,
+                    &XdsLb::OnBalancerChannelConnectivityChangedLocked, this,
+                    grpc_combiner_scheduler(args.combiner));
+  GRPC_CLOSURE_INIT(&on_rr_connectivity_changed_,
+                    &XdsLb::OnRoundRobinConnectivityChangedLocked, this,
+                    grpc_combiner_scheduler(args.combiner));
+  GRPC_CLOSURE_INIT(&on_rr_request_reresolution_,
+                    &XdsLb::OnRoundRobinRequestReresolutionLocked, this,
+                    grpc_combiner_scheduler(args.combiner));
+  grpc_connectivity_state_init(&state_tracker_, GRPC_CHANNEL_IDLE, "xds");
+  // Record server name.
+  const grpc_arg* arg = grpc_channel_args_find(args.args, GRPC_ARG_SERVER_URI);
+  const char* server_uri = grpc_channel_arg_get_string(arg);
+  GPR_ASSERT(server_uri != nullptr);
+  grpc_uri* uri = grpc_uri_parse(server_uri, true);
+  GPR_ASSERT(uri->path[0] != '\0');
+  server_name_ = gpr_strdup(uri->path[0] == '/' ? uri->path + 1 : uri->path);
+  if (grpc_lb_xds_trace.enabled()) {
+    gpr_log(GPR_INFO,
+            "[xdslb %p] Will use '%s' as the server name for LB request.", this,
+            server_name_);
+  }
+  grpc_uri_destroy(uri);
+  // Record LB call timeout.
+  arg = grpc_channel_args_find(args.args, GRPC_ARG_GRPCLB_CALL_TIMEOUT_MS);
+  lb_call_timeout_ms_ = grpc_channel_arg_get_integer(arg, {0, 0, INT_MAX});
+  // Record fallback timeout.
+  arg = grpc_channel_args_find(args.args, GRPC_ARG_GRPCLB_FALLBACK_TIMEOUT_MS);
+  lb_fallback_timeout_ms_ = grpc_channel_arg_get_integer(
+      arg, {GRPC_XDS_DEFAULT_FALLBACK_TIMEOUT_MS, 0, INT_MAX});
+  // Process channel args.
+  ProcessChannelArgsLocked(*args.args);
+}
+
+XdsLb::~XdsLb() {
+  GPR_ASSERT(pending_picks_ == nullptr);
+  gpr_mu_destroy(&lb_channel_mu_);
+  gpr_free((void*)server_name_);
+  grpc_channel_args_destroy(args_);
+  grpc_connectivity_state_destroy(&state_tracker_);
+  if (serverlist_ != nullptr) {
+    xds_grpclb_destroy_serverlist(serverlist_);
+  }
+  if (fallback_backend_addresses_ != nullptr) {
+    grpc_lb_addresses_destroy(fallback_backend_addresses_);
+  }
+  grpc_subchannel_index_unref();
+}
+
+void XdsLb::ShutdownLocked() {
+  grpc_error* error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("Channel shutdown");
+  shutting_down_ = true;
+  lb_calld_.reset();
+  if (retry_timer_callback_pending_) {
+    grpc_timer_cancel(&lb_call_retry_timer_);
+  }
+  if (fallback_timer_callback_pending_) {
+    grpc_timer_cancel(&lb_fallback_timer_);
+  }
+  rr_policy_.reset();
+  TryReresolutionLocked(&grpc_lb_xds_trace, GRPC_ERROR_CANCELLED);
+  // We destroy the LB channel here instead of in our destructor because
+  // destroying the channel triggers a last callback to
+  // OnBalancerChannelConnectivityChangedLocked(), and we need to be
+  // alive when that callback is invoked.
+  if (lb_channel_ != nullptr) {
+    gpr_mu_lock(&lb_channel_mu_);
+    grpc_channel_destroy(lb_channel_);
+    lb_channel_ = nullptr;
+    gpr_mu_unlock(&lb_channel_mu_);
+  }
+  grpc_connectivity_state_set(&state_tracker_, GRPC_CHANNEL_SHUTDOWN,
+                              GRPC_ERROR_REF(error), "grpclb_shutdown");
+  // Clear pending picks.
+  PendingPick* pp;
+  while ((pp = pending_picks_) != nullptr) {
+    pending_picks_ = pp->next;
+    pp->pick->connected_subchannel.reset();
+    // Note: pp is deleted in this callback.
+    GRPC_CLOSURE_SCHED(&pp->on_complete, GRPC_ERROR_REF(error));
+  }
+  GRPC_ERROR_UNREF(error);
+}
+
+//
+// public methods
+//
+
+void XdsLb::HandOffPendingPicksLocked(LoadBalancingPolicy* new_policy) {
+  PendingPick* pp;
+  while ((pp = pending_picks_) != nullptr) {
+    pending_picks_ = pp->next;
+    pp->pick->on_complete = pp->original_on_complete;
+    pp->pick->user_data = nullptr;
+    grpc_error* error = GRPC_ERROR_NONE;
+    if (new_policy->PickLocked(pp->pick, &error)) {
+      // Synchronous return; schedule closure.
+      GRPC_CLOSURE_SCHED(pp->pick->on_complete, error);
+    }
+    Delete(pp);
+  }
+}
+
+// Cancel a specific pending pick.
+//
+// A grpclb pick progresses as follows:
+// - If there's a Round Robin policy (rr_policy_) available, it'll be
+//   handed over to the RR policy (in CreateRoundRobinPolicyLocked()). From
+//   that point onwards, it'll be RR's responsibility. For cancellations, that
+//   implies the pick needs also be cancelled by the RR instance.
+// - Otherwise, without an RR instance, picks stay pending at this policy's
+//   level (grpclb), inside the pending_picks_ list. To cancel these,
+//   we invoke the completion closure and set the pick's connected
+//   subchannel to nullptr right here.
+void XdsLb::CancelPickLocked(PickState* pick, grpc_error* error) {
+  PendingPick* pp = pending_picks_;
+  pending_picks_ = nullptr;
+  while (pp != nullptr) {
+    PendingPick* next = pp->next;
+    if (pp->pick == pick) {
+      pick->connected_subchannel.reset();
+      // Note: pp is deleted in this callback.
+      GRPC_CLOSURE_SCHED(&pp->on_complete,
+                         GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING(
+                             "Pick Cancelled", &error, 1));
+    } else {
+      pp->next = pending_picks_;
+      pending_picks_ = pp;
+    }
+    pp = next;
+  }
+  if (rr_policy_ != nullptr) {
+    rr_policy_->CancelPickLocked(pick, GRPC_ERROR_REF(error));
+  }
+  GRPC_ERROR_UNREF(error);
+}
+
+// Cancel all pending picks.
+//
+// A grpclb pick progresses as follows:
+// - If there's a Round Robin policy (rr_policy_) available, it'll be
+//   handed over to the RR policy (in CreateRoundRobinPolicyLocked()). From
+//   that point onwards, it'll be RR's responsibility. For cancellations, that
+//   implies the pick needs also be cancelled by the RR instance.
+// - Otherwise, without an RR instance, picks stay pending at this policy's
+//   level (grpclb), inside the pending_picks_ list. To cancel these,
+//   we invoke the completion closure and set the pick's connected
+//   subchannel to nullptr right here.
+void XdsLb::CancelMatchingPicksLocked(uint32_t initial_metadata_flags_mask,
+                                      uint32_t initial_metadata_flags_eq,
+                                      grpc_error* error) {
+  PendingPick* pp = pending_picks_;
+  pending_picks_ = nullptr;
+  while (pp != nullptr) {
+    PendingPick* next = pp->next;
+    if ((pp->pick->initial_metadata_flags & initial_metadata_flags_mask) ==
+        initial_metadata_flags_eq) {
+      // Note: pp is deleted in this callback.
+      GRPC_CLOSURE_SCHED(&pp->on_complete,
+                         GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING(
+                             "Pick Cancelled", &error, 1));
+    } else {
+      pp->next = pending_picks_;
+      pending_picks_ = pp;
+    }
+    pp = next;
+  }
+  if (rr_policy_ != nullptr) {
+    rr_policy_->CancelMatchingPicksLocked(initial_metadata_flags_mask,
+                                          initial_metadata_flags_eq,
+                                          GRPC_ERROR_REF(error));
+  }
+  GRPC_ERROR_UNREF(error);
+}
+
+void XdsLb::ExitIdleLocked() {
+  if (!started_picking_) {
+    StartPickingLocked();
+  }
+}
+
+void XdsLb::ResetBackoffLocked() {
+  if (lb_channel_ != nullptr) {
+    grpc_channel_reset_connect_backoff(lb_channel_);
+  }
+  if (rr_policy_ != nullptr) {
+    rr_policy_->ResetBackoffLocked();
+  }
+}
+
+bool XdsLb::PickLocked(PickState* pick, grpc_error** error) {
+  PendingPick* pp = PendingPickCreate(pick);
+  bool pick_done = false;
+  if (rr_policy_ != nullptr) {
+    if (grpc_lb_xds_trace.enabled()) {
+      gpr_log(GPR_INFO, "[xdslb %p] about to PICK from RR %p", this,
+              rr_policy_.get());
+    }
+    pick_done =
+        PickFromRoundRobinPolicyLocked(false /* force_async */, pp, error);
+  } else {  // rr_policy_ == NULL
+    if (pick->on_complete == nullptr) {
+      *error = GRPC_ERROR_CREATE_FROM_STATIC_STRING(
+          "No pick result available but synchronous result required.");
+      pick_done = true;
+    } else {
+      if (grpc_lb_xds_trace.enabled()) {
+        gpr_log(GPR_INFO,
+                "[xdslb %p] No RR policy. Adding to grpclb's pending picks",
+                this);
+      }
+      AddPendingPick(pp);
+      if (!started_picking_) {
+        StartPickingLocked();
+      }
+      pick_done = false;
+    }
+  }
+  return pick_done;
+}
+
+void XdsLb::FillChildRefsForChannelz(channelz::ChildRefsList* child_subchannels,
+                                     channelz::ChildRefsList* child_channels) {
+  // delegate to the RoundRobin to fill the children subchannels.
+  rr_policy_->FillChildRefsForChannelz(child_subchannels, child_channels);
+  MutexLock lock(&lb_channel_mu_);
+  if (lb_channel_ != nullptr) {
+    grpc_core::channelz::ChannelNode* channel_node =
+        grpc_channel_get_channelz_node(lb_channel_);
+    if (channel_node != nullptr) {
+      child_channels->push_back(channel_node->uuid());
+    }
+  }
+}
+
+grpc_connectivity_state XdsLb::CheckConnectivityLocked(
+    grpc_error** connectivity_error) {
+  return grpc_connectivity_state_get(&state_tracker_, connectivity_error);
+}
+
+void XdsLb::NotifyOnStateChangeLocked(grpc_connectivity_state* current,
+                                      grpc_closure* closure) {
+  grpc_connectivity_state_notify_on_state_change(&state_tracker_, current,
+                                                 closure);
+}
+
+void XdsLb::ProcessChannelArgsLocked(const grpc_channel_args& args) {
+  const grpc_arg* arg = grpc_channel_args_find(&args, GRPC_ARG_LB_ADDRESSES);
+  if (GPR_UNLIKELY(arg == nullptr || arg->type != GRPC_ARG_POINTER)) {
+    // Ignore this update.
+    gpr_log(GPR_ERROR,
+            "[xdslb %p] No valid LB addresses channel arg in update, ignoring.",
+            this);
+    return;
+  }
+  const grpc_lb_addresses* addresses =
+      static_cast<const grpc_lb_addresses*>(arg->value.pointer.p);
+  // Update fallback address list.
+  if (fallback_backend_addresses_ != nullptr) {
+    grpc_lb_addresses_destroy(fallback_backend_addresses_);
+  }
+  fallback_backend_addresses_ = ExtractBackendAddresses(addresses);
+  // Make sure that GRPC_ARG_LB_POLICY_NAME is set in channel args,
+  // since we use this to trigger the client_load_reporting filter.
+  static const char* args_to_remove[] = {GRPC_ARG_LB_POLICY_NAME};
+  grpc_arg new_arg = grpc_channel_arg_string_create(
+      (char*)GRPC_ARG_LB_POLICY_NAME, (char*)"xds");
+  grpc_channel_args_destroy(args_);
+  args_ = grpc_channel_args_copy_and_add_and_remove(
+      &args, args_to_remove, GPR_ARRAY_SIZE(args_to_remove), &new_arg, 1);
+  // Construct args for balancer channel.
+  grpc_channel_args* lb_channel_args =
+      BuildBalancerChannelArgs(addresses, response_generator_.get(), &args);
+  // Create balancer channel if needed.
+  if (lb_channel_ == nullptr) {
+    char* uri_str;
+    gpr_asprintf(&uri_str, "fake:///%s", server_name_);
+    gpr_mu_lock(&lb_channel_mu_);
+    lb_channel_ = grpc_client_channel_factory_create_channel(
+        client_channel_factory(), uri_str,
+        GRPC_CLIENT_CHANNEL_TYPE_LOAD_BALANCING, lb_channel_args);
+    gpr_mu_unlock(&lb_channel_mu_);
+    GPR_ASSERT(lb_channel_ != nullptr);
+    gpr_free(uri_str);
+  }
+  // Propagate updates to the LB channel (pick_first) through the fake
+  // resolver.
+  response_generator_->SetResponse(lb_channel_args);
+  grpc_channel_args_destroy(lb_channel_args);
+}
+
+void XdsLb::UpdateLocked(const grpc_channel_args& args) {
+  ProcessChannelArgsLocked(args);
+  // If fallback is configured and the RR policy already exists, update
+  // it with the new fallback addresses.
+  if (lb_fallback_timeout_ms_ > 0 && rr_policy_ != nullptr) {
+    CreateOrUpdateRoundRobinPolicyLocked();
+  }
+  // Start watching the LB channel connectivity for connection, if not
+  // already doing so.
+  if (!watching_lb_channel_) {
+    lb_channel_connectivity_ = grpc_channel_check_connectivity_state(
+        lb_channel_, true /* try to connect */);
+    grpc_channel_element* client_channel_elem = grpc_channel_stack_last_element(
+        grpc_channel_get_channel_stack(lb_channel_));
+    GPR_ASSERT(client_channel_elem->filter == &grpc_client_channel_filter);
+    watching_lb_channel_ = true;
+    // TODO(roth): We currently track this ref manually.  Once the
+    // ClosureRef API is ready, we should pass the RefCountedPtr<> along
+    // with the callback.
+    auto self = Ref(DEBUG_LOCATION, "watch_lb_channel_connectivity");
+    self.release();
+    grpc_client_channel_watch_connectivity_state(
+        client_channel_elem,
+        grpc_polling_entity_create_from_pollset_set(interested_parties()),
+        &lb_channel_connectivity_, &lb_channel_on_connectivity_changed_,
+        nullptr);
+  }
+}
+
+//
+// code for balancer channel and call
+//
+
+void XdsLb::StartPickingLocked() {
+  // Start a timer to fall back.
+  if (lb_fallback_timeout_ms_ > 0 && serverlist_ == nullptr &&
+      !fallback_timer_callback_pending_) {
+    grpc_millis deadline = ExecCtx::Get()->Now() + lb_fallback_timeout_ms_;
+    // TODO(roth): We currently track this ref manually.  Once the
+    // ClosureRef API is ready, we should pass the RefCountedPtr<> along
+    // with the callback.
+    auto self = Ref(DEBUG_LOCATION, "on_fallback_timer");
+    self.release();
+    GRPC_CLOSURE_INIT(&lb_on_fallback_, &XdsLb::OnFallbackTimerLocked, this,
+                      grpc_combiner_scheduler(combiner()));
+    fallback_timer_callback_pending_ = true;
+    grpc_timer_init(&lb_fallback_timer_, deadline, &lb_on_fallback_);
+  }
+  started_picking_ = true;
+  StartBalancerCallLocked();
+}
+
+void XdsLb::StartBalancerCallLocked() {
+  GPR_ASSERT(lb_channel_ != nullptr);
+  if (shutting_down_) return;
+  // Init the LB call data.
+  GPR_ASSERT(lb_calld_ == nullptr);
+  lb_calld_ = MakeOrphanable<BalancerCallState>(Ref());
+  if (grpc_lb_xds_trace.enabled()) {
+    gpr_log(GPR_INFO,
+            "[xdslb %p] Query for backends (lb_channel: %p, lb_calld: %p)",
+            this, lb_channel_, lb_calld_.get());
+  }
+  lb_calld_->StartQuery();
+}
+
+void XdsLb::OnFallbackTimerLocked(void* arg, grpc_error* error) {
+  XdsLb* xdslb_policy = static_cast<XdsLb*>(arg);
+  xdslb_policy->fallback_timer_callback_pending_ = false;
+  // If we receive a serverlist after the timer fires but before this callback
+  // actually runs, don't fall back.
+  if (xdslb_policy->serverlist_ == nullptr && !xdslb_policy->shutting_down_ &&
+      error == GRPC_ERROR_NONE) {
+    if (grpc_lb_xds_trace.enabled()) {
+      gpr_log(GPR_INFO, "[xdslb %p] Falling back to use backends from resolver",
+              xdslb_policy);
+    }
+    GPR_ASSERT(xdslb_policy->fallback_backend_addresses_ != nullptr);
+    xdslb_policy->CreateOrUpdateRoundRobinPolicyLocked();
+  }
+  xdslb_policy->Unref(DEBUG_LOCATION, "on_fallback_timer");
+}
+
+void XdsLb::StartBalancerCallRetryTimerLocked() {
+  grpc_millis next_try = lb_call_backoff_.NextAttemptTime();
+  if (grpc_lb_xds_trace.enabled()) {
+    gpr_log(GPR_INFO, "[xdslb %p] Connection to LB server lost...", this);
+    grpc_millis timeout = next_try - ExecCtx::Get()->Now();
+    if (timeout > 0) {
+      gpr_log(GPR_INFO, "[xdslb %p] ... retry_timer_active in %" PRId64 "ms.",
+              this, timeout);
+    } else {
+      gpr_log(GPR_INFO, "[xdslb %p] ... retry_timer_active immediately.", this);
+    }
+  }
+  // TODO(roth): We currently track this ref manually.  Once the
+  // ClosureRef API is ready, we should pass the RefCountedPtr<> along
+  // with the callback.
+  auto self = Ref(DEBUG_LOCATION, "on_balancer_call_retry_timer");
+  self.release();
+  GRPC_CLOSURE_INIT(&lb_on_call_retry_, &XdsLb::OnBalancerCallRetryTimerLocked,
+                    this, grpc_combiner_scheduler(combiner()));
+  retry_timer_callback_pending_ = true;
+  grpc_timer_init(&lb_call_retry_timer_, next_try, &lb_on_call_retry_);
+}
+
+void XdsLb::OnBalancerCallRetryTimerLocked(void* arg, grpc_error* error) {
+  XdsLb* xdslb_policy = static_cast<XdsLb*>(arg);
+  xdslb_policy->retry_timer_callback_pending_ = false;
+  if (!xdslb_policy->shutting_down_ && error == GRPC_ERROR_NONE &&
+      xdslb_policy->lb_calld_ == nullptr) {
+    if (grpc_lb_xds_trace.enabled()) {
+      gpr_log(GPR_INFO, "[xdslb %p] Restarting call to LB server",
+              xdslb_policy);
+    }
+    xdslb_policy->StartBalancerCallLocked();
+  }
+  xdslb_policy->Unref(DEBUG_LOCATION, "on_balancer_call_retry_timer");
+}
+
+// Invoked as part of the update process. It continues watching the LB channel
+// until it shuts down or becomes READY. It's invoked even if the LB channel
+// stayed READY throughout the update (for example if the update is identical).
+void XdsLb::OnBalancerChannelConnectivityChangedLocked(void* arg,
+                                                       grpc_error* error) {
+  XdsLb* xdslb_policy = static_cast<XdsLb*>(arg);
+  if (xdslb_policy->shutting_down_) goto done;
+  // Re-initialize the lb_call. This should also take care of updating the
+  // embedded RR policy. Note that the current RR policy, if any, will stay in
+  // effect until an update from the new lb_call is received.
+  switch (xdslb_policy->lb_channel_connectivity_) {
+    case GRPC_CHANNEL_CONNECTING:
+    case GRPC_CHANNEL_TRANSIENT_FAILURE: {
+      // Keep watching the LB channel.
+      grpc_channel_element* client_channel_elem =
+          grpc_channel_stack_last_element(
+              grpc_channel_get_channel_stack(xdslb_policy->lb_channel_));
+      GPR_ASSERT(client_channel_elem->filter == &grpc_client_channel_filter);
+      grpc_client_channel_watch_connectivity_state(
+          client_channel_elem,
+          grpc_polling_entity_create_from_pollset_set(
+              xdslb_policy->interested_parties()),
+          &xdslb_policy->lb_channel_connectivity_,
+          &xdslb_policy->lb_channel_on_connectivity_changed_, nullptr);
+      break;
+    }
+      // The LB channel may be IDLE because it's shut down before the update.
+      // Restart the LB call to kick the LB channel into gear.
+    case GRPC_CHANNEL_IDLE:
+    case GRPC_CHANNEL_READY:
+      xdslb_policy->lb_calld_.reset();
+      if (xdslb_policy->started_picking_) {
+        if (xdslb_policy->retry_timer_callback_pending_) {
+          grpc_timer_cancel(&xdslb_policy->lb_call_retry_timer_);
+        }
+        xdslb_policy->lb_call_backoff_.Reset();
+        xdslb_policy->StartBalancerCallLocked();
+      }
+      [[fallthrough]];
+      // Fall through.
+    case GRPC_CHANNEL_SHUTDOWN:
+    done:
+      xdslb_policy->watching_lb_channel_ = false;
+      xdslb_policy->Unref(DEBUG_LOCATION,
+                          "watch_lb_channel_connectivity_cb_shutdown");
+  }
+}
+
+//
+// PendingPick
+//
+
+// Adds lb_token of selected subchannel (address) to the call's initial
+// metadata.
+grpc_error* AddLbTokenToInitialMetadata(
+    grpc_mdelem lb_token, grpc_linked_mdelem* lb_token_mdelem_storage,
+    grpc_metadata_batch* initial_metadata) {
+  GPR_ASSERT(lb_token_mdelem_storage != nullptr);
+  GPR_ASSERT(!GRPC_MDISNULL(lb_token));
+  return grpc_metadata_batch_add_tail(initial_metadata, lb_token_mdelem_storage,
+                                      lb_token);
+}
+
+// Destroy function used when embedding client stats in call context.
+void DestroyClientStats(void* arg) {
+  static_cast<XdsLbClientStats*>(arg)->Unref();
+}
+
+void XdsLb::PendingPickSetMetadataAndContext(PendingPick* pp) {
+  /* if connected_subchannel is nullptr, no pick has been made by the RR
+   * policy (e.g., all addresses failed to connect). There won't be any
+   * user_data/token available */
+  if (pp->pick->connected_subchannel != nullptr) {
+    if (GPR_LIKELY(!GRPC_MDISNULL(pp->lb_token))) {
+      AddLbTokenToInitialMetadata(GRPC_MDELEM_REF(pp->lb_token),
+                                  &pp->pick->lb_token_mdelem_storage,
+                                  pp->pick->initial_metadata);
+    } else {
+      gpr_log(GPR_ERROR,
+              "[xdslb %p] No LB token for connected subchannel pick %p",
+              pp->xdslb_policy, pp->pick);
+      abort();
+    }
+    // Pass on client stats via context. Passes ownership of the reference.
+    if (pp->client_stats != nullptr) {
+      pp->pick->subchannel_call_context[GRPC_GRPCLB_CLIENT_STATS].value =
+          pp->client_stats.release();
+      pp->pick->subchannel_call_context[GRPC_GRPCLB_CLIENT_STATS].destroy =
+          DestroyClientStats;
+    }
+  } else {
+    pp->client_stats.reset();
+  }
+}
+
+/* The \a on_complete closure passed as part of the pick requires keeping a
+ * reference to its associated round robin instance. We wrap this closure in
+ * order to unref the round robin instance upon its invocation */
+void XdsLb::OnPendingPickComplete(void* arg, grpc_error* error) {
+  PendingPick* pp = static_cast<PendingPick*>(arg);
+  PendingPickSetMetadataAndContext(pp);
+  GRPC_CLOSURE_SCHED(pp->original_on_complete, GRPC_ERROR_REF(error));
+  Delete(pp);
+}
+
+XdsLb::PendingPick* XdsLb::PendingPickCreate(PickState* pick) {
+  PendingPick* pp = New<PendingPick>();
+  pp->xdslb_policy = this;
+  pp->pick = pick;
+  GRPC_CLOSURE_INIT(&pp->on_complete, &XdsLb::OnPendingPickComplete, pp,
+                    grpc_schedule_on_exec_ctx);
+  pp->original_on_complete = pick->on_complete;
+  pick->on_complete = &pp->on_complete;
+  return pp;
+}
+
+void XdsLb::AddPendingPick(PendingPick* pp) {
+  pp->next = pending_picks_;
+  pending_picks_ = pp;
+}
+
+//
+// code for interacting with the RR policy
+//
+
+// Performs a pick over \a rr_policy_. Given that a pick can return
+// immediately (ignoring its completion callback), we need to perform the
+// cleanups this callback would otherwise be responsible for.
+// If \a force_async is true, then we will manually schedule the
+// completion callback even if the pick is available immediately.
+bool XdsLb::PickFromRoundRobinPolicyLocked(bool force_async, PendingPick* pp,
+                                           grpc_error** error) {
+  // Check for drops if we are not using fallback backend addresses.
+  if (serverlist_ != nullptr) {
+    // Look at the index into the serverlist to see if we should drop this call.
+    xds_grpclb_server* server = serverlist_->servers[serverlist_index_++];
+    if (serverlist_index_ == serverlist_->num_servers) {
+      serverlist_index_ = 0;  // Wrap-around.
+    }
+    if (server->drop) {
+      // Update client load reporting stats to indicate the number of
+      // dropped calls.  Note that we have to do this here instead of in
+      // the client_load_reporting filter, because we do not create a
+      // subchannel call (and therefore no client_load_reporting filter)
+      // for dropped calls.
+      if (lb_calld_ != nullptr && lb_calld_->client_stats() != nullptr) {
+        lb_calld_->client_stats()->AddCallDroppedLocked(
+            server->load_balance_token);
+      }
+      if (force_async) {
+        GRPC_CLOSURE_SCHED(pp->original_on_complete, GRPC_ERROR_NONE);
+        Delete(pp);
+        return false;
+      }
+      Delete(pp);
+      return true;
+    }
+  }
+  // Set client_stats and user_data.
+  if (lb_calld_ != nullptr && lb_calld_->client_stats() != nullptr) {
+    pp->client_stats = lb_calld_->client_stats()->Ref();
+  }
+  GPR_ASSERT(pp->pick->user_data == nullptr);
+  pp->pick->user_data = (void**)&pp->lb_token;
+  // Pick via the RR policy.
+  bool pick_done = rr_policy_->PickLocked(pp->pick, error);
+  if (pick_done) {
+    PendingPickSetMetadataAndContext(pp);
+    if (force_async) {
+      GRPC_CLOSURE_SCHED(pp->original_on_complete, *error);
+      *error = GRPC_ERROR_NONE;
+      pick_done = false;
+    }
+    Delete(pp);
+  }
+  // else, the pending pick will be registered and taken care of by the
+  // pending pick list inside the RR policy.  Eventually,
+  // OnPendingPickComplete() will be called, which will (among other
+  // things) add the LB token to the call's initial metadata.
+  return pick_done;
+}
+
+void XdsLb::CreateRoundRobinPolicyLocked(const Args& args) {
+  GPR_ASSERT(rr_policy_ == nullptr);
+  rr_policy_ = LoadBalancingPolicyRegistry::CreateLoadBalancingPolicy(
+      "round_robin", args);
+  if (GPR_UNLIKELY(rr_policy_ == nullptr)) {
+    gpr_log(GPR_ERROR, "[xdslb %p] Failure creating a RoundRobin policy", this);
+    return;
+  }
+  // TODO(roth): We currently track this ref manually.  Once the new
+  // ClosureRef API is done, pass the RefCountedPtr<> along with the closure.
+  auto self = Ref(DEBUG_LOCATION, "on_rr_reresolution_requested");
+  self.release();
+  rr_policy_->SetReresolutionClosureLocked(&on_rr_request_reresolution_);
+  grpc_error* rr_state_error = nullptr;
+  rr_connectivity_state_ = rr_policy_->CheckConnectivityLocked(&rr_state_error);
+  // Connectivity state is a function of the RR policy updated/created.
+  UpdateConnectivityStateFromRoundRobinPolicyLocked(rr_state_error);
+  // Add the gRPC LB's interested_parties pollset_set to that of the newly
+  // created RR policy. This will make the RR policy progress upon activity on
+  // gRPC LB, which in turn is tied to the application's call.
+  grpc_pollset_set_add_pollset_set(rr_policy_->interested_parties(),
+                                   interested_parties());
+  // Subscribe to changes to the connectivity of the new RR.
+  // TODO(roth): We currently track this ref manually.  Once the new
+  // ClosureRef API is done, pass the RefCountedPtr<> along with the closure.
+  self = Ref(DEBUG_LOCATION, "on_rr_connectivity_changed");
+  self.release();
+  rr_policy_->NotifyOnStateChangeLocked(&rr_connectivity_state_,
+                                        &on_rr_connectivity_changed_);
+  rr_policy_->ExitIdleLocked();
+  // Send pending picks to RR policy.
+  PendingPick* pp;
+  while ((pp = pending_picks_)) {
+    pending_picks_ = pp->next;
+    if (grpc_lb_xds_trace.enabled()) {
+      gpr_log(GPR_INFO,
+              "[xdslb %p] Pending pick about to (async) PICK from RR %p", this,
+              rr_policy_.get());
+    }
+    grpc_error* error = GRPC_ERROR_NONE;
+    PickFromRoundRobinPolicyLocked(true /* force_async */, pp, &error);
+  }
+}
+
+grpc_channel_args* XdsLb::CreateRoundRobinPolicyArgsLocked() {
+  grpc_lb_addresses* addresses;
+  bool is_backend_from_grpclb_load_balancer = false;
+  if (serverlist_ != nullptr) {
+    GPR_ASSERT(serverlist_->num_servers > 0);
+    addresses = ProcessServerlist(serverlist_);
+    is_backend_from_grpclb_load_balancer = true;
+  } else {
+    // If CreateOrUpdateRoundRobinPolicyLocked() is invoked when we haven't
+    // received any serverlist from the balancer, we use the fallback backends
+    // returned by the resolver. Note that the fallback backend list may be
+    // empty, in which case the new round_robin policy will keep the requested
+    // picks pending.
+    GPR_ASSERT(fallback_backend_addresses_ != nullptr);
+    addresses = grpc_lb_addresses_copy(fallback_backend_addresses_);
+  }
+  GPR_ASSERT(addresses != nullptr);
+  // Replace the LB addresses in the channel args that we pass down to
+  // the subchannel.
+  static const char* keys_to_remove[] = {GRPC_ARG_LB_ADDRESSES};
+  const grpc_arg args_to_add[] = {
+      grpc_lb_addresses_create_channel_arg(addresses),
+      // A channel arg indicating if the target is a backend inferred from a
+      // grpclb load balancer.
+      grpc_channel_arg_integer_create(
+          const_cast<char*>(GRPC_ARG_ADDRESS_IS_BACKEND_FROM_XDS_LOAD_BALANCER),
+          is_backend_from_grpclb_load_balancer),
+  };
+  grpc_channel_args* args = grpc_channel_args_copy_and_add_and_remove(
+      args_, keys_to_remove, GPR_ARRAY_SIZE(keys_to_remove), args_to_add,
+      GPR_ARRAY_SIZE(args_to_add));
+  grpc_lb_addresses_destroy(addresses);
+  return args;
+}
+
+void XdsLb::CreateOrUpdateRoundRobinPolicyLocked() {
+  if (shutting_down_) return;
+  grpc_channel_args* args = CreateRoundRobinPolicyArgsLocked();
+  GPR_ASSERT(args != nullptr);
+  if (rr_policy_ != nullptr) {
+    if (grpc_lb_xds_trace.enabled()) {
+      gpr_log(GPR_INFO, "[xdslb %p] Updating RR policy %p", this,
+              rr_policy_.get());
+    }
+    rr_policy_->UpdateLocked(*args);
+  } else {
+    LoadBalancingPolicy::Args lb_policy_args;
+    lb_policy_args.combiner = combiner();
+    lb_policy_args.client_channel_factory = client_channel_factory();
+    lb_policy_args.args = args;
+    CreateRoundRobinPolicyLocked(lb_policy_args);
+    if (grpc_lb_xds_trace.enabled()) {
+      gpr_log(GPR_INFO, "[xdslb %p] Created new RR policy %p", this,
+              rr_policy_.get());
+    }
+  }
+  grpc_channel_args_destroy(args);
+}
+
+void XdsLb::OnRoundRobinRequestReresolutionLocked(void* arg,
+                                                  grpc_error* error) {
+  XdsLb* xdslb_policy = static_cast<XdsLb*>(arg);
+  if (xdslb_policy->shutting_down_ || error != GRPC_ERROR_NONE) {
+    xdslb_policy->Unref(DEBUG_LOCATION, "on_rr_reresolution_requested");
+    return;
+  }
+  if (grpc_lb_xds_trace.enabled()) {
+    gpr_log(
+        GPR_INFO,
+        "[xdslb %p] Re-resolution requested from the internal RR policy (%p).",
+        xdslb_policy, xdslb_policy->rr_policy_.get());
+  }
+  // If we are talking to a balancer, we expect to get updated addresses form
+  // the balancer, so we can ignore the re-resolution request from the RR
+  // policy. Otherwise, handle the re-resolution request using the
+  // grpclb policy's original re-resolution closure.
+  if (xdslb_policy->lb_calld_ == nullptr ||
+      !xdslb_policy->lb_calld_->seen_initial_response()) {
+    xdslb_policy->TryReresolutionLocked(&grpc_lb_xds_trace, GRPC_ERROR_NONE);
+  }
+  // Give back the wrapper closure to the RR policy.
+  xdslb_policy->rr_policy_->SetReresolutionClosureLocked(
+      &xdslb_policy->on_rr_request_reresolution_);
+}
+
+void XdsLb::UpdateConnectivityStateFromRoundRobinPolicyLocked(
+    grpc_error* rr_state_error) {
+  const grpc_connectivity_state curr_glb_state =
+      grpc_connectivity_state_check(&state_tracker_);
+  /* The new connectivity status is a function of the previous one and the new
+   * input coming from the status of the RR policy.
+   *
+   *  current state (grpclb's)
+   *  |
+   *  v  || I  |  C  |  R  |  TF  |  SD  |  <- new state (RR's)
+   *  ===++====+=====+=====+======+======+
+   *   I || I  |  C  |  R  | [I]  | [I]  |
+   *  ---++----+-----+-----+------+------+
+   *   C || I  |  C  |  R  | [C]  | [C]  |
+   *  ---++----+-----+-----+------+------+
+   *   R || I  |  C  |  R  | [R]  | [R]  |
+   *  ---++----+-----+-----+------+------+
+   *  TF || I  |  C  |  R  | [TF] | [TF] |
+   *  ---++----+-----+-----+------+------+
+   *  SD || NA |  NA |  NA |  NA  |  NA  | (*)
+   *  ---++----+-----+-----+------+------+
+   *
+   * A [STATE] indicates that the old RR policy is kept. In those cases, STATE
+   * is the current state of grpclb, which is left untouched.
+   *
+   *  In summary, if the new state is TRANSIENT_FAILURE or SHUTDOWN, stick to
+   *  the previous RR instance.
+   *
+   *  Note that the status is never updated to SHUTDOWN as a result of calling
+   *  this function. Only glb_shutdown() has the power to set that state.
+   *
+   *  (*) This function mustn't be called during shutting down. */
+  GPR_ASSERT(curr_glb_state != GRPC_CHANNEL_SHUTDOWN);
+  switch (rr_connectivity_state_) {
+    case GRPC_CHANNEL_TRANSIENT_FAILURE:
+    case GRPC_CHANNEL_SHUTDOWN:
+      GPR_ASSERT(rr_state_error != GRPC_ERROR_NONE);
+      break;
+    case GRPC_CHANNEL_IDLE:
+    case GRPC_CHANNEL_CONNECTING:
+    case GRPC_CHANNEL_READY:
+      GPR_ASSERT(rr_state_error == GRPC_ERROR_NONE);
+  }
+  if (grpc_lb_xds_trace.enabled()) {
+    gpr_log(
+        GPR_INFO,
+        "[xdslb %p] Setting grpclb's state to %s from new RR policy %p state.",
+        this, grpc_connectivity_state_name(rr_connectivity_state_),
+        rr_policy_.get());
+  }
+  grpc_connectivity_state_set(&state_tracker_, rr_connectivity_state_,
+                              rr_state_error,
+                              "update_lb_connectivity_status_locked");
+}
+
+void XdsLb::OnRoundRobinConnectivityChangedLocked(void* arg,
+                                                  grpc_error* error) {
+  XdsLb* xdslb_policy = static_cast<XdsLb*>(arg);
+  if (xdslb_policy->shutting_down_) {
+    xdslb_policy->Unref(DEBUG_LOCATION, "on_rr_connectivity_changed");
+    return;
+  }
+  xdslb_policy->UpdateConnectivityStateFromRoundRobinPolicyLocked(
+      GRPC_ERROR_REF(error));
+  // Resubscribe. Reuse the "on_rr_connectivity_changed" ref.
+  xdslb_policy->rr_policy_->NotifyOnStateChangeLocked(
+      &xdslb_policy->rr_connectivity_state_,
+      &xdslb_policy->on_rr_connectivity_changed_);
+}
+
+//
+// factory
+//
+
+class XdsFactory : public LoadBalancingPolicyFactory {
+ public:
+  OrphanablePtr<LoadBalancingPolicy> CreateLoadBalancingPolicy(
+      const LoadBalancingPolicy::Args& args) const override {
+    /* Count the number of gRPC-LB addresses. There must be at least one. */
+    const grpc_arg* arg =
+        grpc_channel_args_find(args.args, GRPC_ARG_LB_ADDRESSES);
+    if (arg == nullptr || arg->type != GRPC_ARG_POINTER) {
+      return nullptr;
+    }
+    grpc_lb_addresses* addresses =
+        static_cast<grpc_lb_addresses*>(arg->value.pointer.p);
+    size_t num_grpclb_addrs = 0;
+    for (size_t i = 0; i < addresses->num_addresses; ++i) {
+      if (addresses->addresses[i].is_balancer) ++num_grpclb_addrs;
+    }
+    if (num_grpclb_addrs == 0) return nullptr;
+    return OrphanablePtr<LoadBalancingPolicy>(New<XdsLb>(addresses, args));
+  }
+
+  const char* name() const override { return "xds"; }
+};
+
+}  // namespace
+
+}  // namespace grpc_core
+
+//
+// Plugin registration
+//
+
+namespace {
+
+// Only add client_load_reporting filter if the grpclb LB policy is used.
+bool maybe_add_client_load_reporting_filter(grpc_channel_stack_builder* builder,
+                                            void* arg) {
+  const grpc_channel_args* args =
+      grpc_channel_stack_builder_get_channel_arguments(builder);
+  const grpc_arg* channel_arg =
+      grpc_channel_args_find(args, GRPC_ARG_LB_POLICY_NAME);
+  if (channel_arg != nullptr && channel_arg->type == GRPC_ARG_STRING &&
+      strcmp(channel_arg->value.string, "grpclb") == 0) {
+    return grpc_channel_stack_builder_append_filter(
+        builder, (const grpc_channel_filter*)arg, nullptr, nullptr);
+  }
+  return true;
+}
+
+}  // namespace
+
+void grpc_lb_policy_xds_init() {
+  grpc_core::LoadBalancingPolicyRegistry::Builder::
+      RegisterLoadBalancingPolicyFactory(
+          grpc_core::UniquePtr<grpc_core::LoadBalancingPolicyFactory>(
+              grpc_core::New<grpc_core::XdsFactory>()));
+  grpc_channel_init_register_stage(GRPC_CLIENT_SUBCHANNEL,
+                                   GRPC_CHANNEL_INIT_BUILTIN_PRIORITY,
+                                   maybe_add_client_load_reporting_filter,
+                                   (void*)&xds_client_load_reporting_filter);
+}
+
+void grpc_lb_policy_xds_shutdown() {}
diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds.h b/src/core/ext/filters/client_channel/lb_policy/xds/xds.h
new file mode 100644
index 0000000000..8b20680f2d
--- /dev/null
+++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds.h
@@ -0,0 +1,36 @@
+/*
+ *
+ * Copyright 2018 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#ifndef GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_XDS_H
+#define GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_XDS_H
+
+#include <grpc/support/port_platform.h>
+
+/** Channel arg indicating if a target corresponding to the address is grpclb
+ * loadbalancer. The type of this arg is an integer and the value is treated as
+ * a bool. */
+#define GRPC_ARG_ADDRESS_IS_XDS_LOAD_BALANCER \
+  "grpc.address_is_xds_load_balancer"
+/** Channel arg indicating if a target corresponding to the address is a backend
+ * received from a balancer. The type of this arg is an integer and the value is
+ * treated as a bool. */
+#define GRPC_ARG_ADDRESS_IS_BACKEND_FROM_XDS_LOAD_BALANCER \
+  "grpc.address_is_backend_from_xds_load_balancer"
+
+#endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_XDS_H \
+        */
diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel.cc
new file mode 100644
index 0000000000..0aa145a24e
--- /dev/null
+++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel.cc
@@ -0,0 +1,26 @@
+/*
+ *
+ * Copyright 2018 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include <grpc/support/port_platform.h>
+
+#include "src/core/ext/filters/client_channel/lb_policy/xds/xds_channel.h"
+
+grpc_channel_args* grpc_lb_policy_xds_modify_lb_channel_args(
+    grpc_channel_args* args) {
+  return args;
+}
diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel.h b/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel.h
new file mode 100644
index 0000000000..32c4acc8a3
--- /dev/null
+++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel.h
@@ -0,0 +1,36 @@
+/*
+ *
+ * Copyright 2018 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#ifndef GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_XDS_CHANNEL_H
+#define GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_XDS_CHANNEL_H
+
+#include <grpc/support/port_platform.h>
+
+#include "src/core/ext/filters/client_channel/lb_policy_factory.h"
+
+/// Makes any necessary modifications to \a args for use in the xds
+/// balancer channel.
+///
+/// Takes ownership of \a args.
+///
+/// Caller takes ownership of the returned args.
+grpc_channel_args* grpc_lb_policy_xds_modify_lb_channel_args(
+    grpc_channel_args* args);
+
+#endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_XDS_CHANNEL_H \
+        */
diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_secure.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_secure.cc
new file mode 100644
index 0000000000..5ab72efce4
--- /dev/null
+++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_secure.cc
@@ -0,0 +1,107 @@
+/*
+ *
+ * Copyright 2018 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include <grpc/support/port_platform.h>
+
+#include "src/core/ext/filters/client_channel/lb_policy/xds/xds_channel.h"
+
+#include <grpc/support/alloc.h>
+#include <grpc/support/string_util.h>
+#include <string.h>
+
+#include "src/core/ext/filters/client_channel/client_channel.h"
+#include "src/core/lib/channel/channel_args.h"
+#include "src/core/lib/gpr/string.h"
+#include "src/core/lib/iomgr/sockaddr_utils.h"
+#include "src/core/lib/security/credentials/credentials.h"
+#include "src/core/lib/security/transport/target_authority_table.h"
+#include "src/core/lib/slice/slice_internal.h"
+
+namespace grpc_core {
+namespace {
+
+int BalancerNameCmp(const grpc_core::UniquePtr<char>& a,
+                    const grpc_core::UniquePtr<char>& b) {
+  return strcmp(a.get(), b.get());
+}
+
+RefCountedPtr<TargetAuthorityTable> CreateTargetAuthorityTable(
+    grpc_lb_addresses* addresses) {
+  TargetAuthorityTable::Entry* target_authority_entries =
+      static_cast<TargetAuthorityTable::Entry*>(gpr_zalloc(
+          sizeof(*target_authority_entries) * addresses->num_addresses));
+  for (size_t i = 0; i < addresses->num_addresses; ++i) {
+    char* addr_str;
+    GPR_ASSERT(grpc_sockaddr_to_string(
+                   &addr_str, &addresses->addresses[i].address, true) > 0);
+    target_authority_entries[i].key = grpc_slice_from_copied_string(addr_str);
+    target_authority_entries[i].value.reset(
+        gpr_strdup(addresses->addresses[i].balancer_name));
+    gpr_free(addr_str);
+  }
+  RefCountedPtr<TargetAuthorityTable> target_authority_table =
+      TargetAuthorityTable::Create(addresses->num_addresses,
+                                   target_authority_entries, BalancerNameCmp);
+  gpr_free(target_authority_entries);
+  return target_authority_table;
+}
+
+}  // namespace
+}  // namespace grpc_core
+
+grpc_channel_args* grpc_lb_policy_xds_modify_lb_channel_args(
+    grpc_channel_args* args) {
+  const char* args_to_remove[1];
+  size_t num_args_to_remove = 0;
+  grpc_arg args_to_add[2];
+  size_t num_args_to_add = 0;
+  // Add arg for targets info table.
+  const grpc_arg* arg = grpc_channel_args_find(args, GRPC_ARG_LB_ADDRESSES);
+  GPR_ASSERT(arg != nullptr);
+  GPR_ASSERT(arg->type == GRPC_ARG_POINTER);
+  grpc_lb_addresses* addresses =
+      static_cast<grpc_lb_addresses*>(arg->value.pointer.p);
+  grpc_core::RefCountedPtr<grpc_core::TargetAuthorityTable>
+      target_authority_table = grpc_core::CreateTargetAuthorityTable(addresses);
+  args_to_add[num_args_to_add++] =
+      grpc_core::CreateTargetAuthorityTableChannelArg(
+          target_authority_table.get());
+  // Substitute the channel credentials with a version without call
+  // credentials: the load balancer is not necessarily trusted to handle
+  // bearer token credentials.
+  grpc_channel_credentials* channel_credentials =
+      grpc_channel_credentials_find_in_args(args);
+  grpc_channel_credentials* creds_sans_call_creds = nullptr;
+  if (channel_credentials != nullptr) {
+    creds_sans_call_creds =
+        grpc_channel_credentials_duplicate_without_call_credentials(
+            channel_credentials);
+    GPR_ASSERT(creds_sans_call_creds != nullptr);
+    args_to_remove[num_args_to_remove++] = GRPC_ARG_CHANNEL_CREDENTIALS;
+    args_to_add[num_args_to_add++] =
+        grpc_channel_credentials_to_arg(creds_sans_call_creds);
+  }
+  grpc_channel_args* result = grpc_channel_args_copy_and_add_and_remove(
+      args, args_to_remove, num_args_to_remove, args_to_add, num_args_to_add);
+  // Clean up.
+  grpc_channel_args_destroy(args);
+  if (creds_sans_call_creds != nullptr) {
+    grpc_channel_credentials_unref(creds_sans_call_creds);
+  }
+  return result;
+}
diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.cc
new file mode 100644
index 0000000000..cdf5408be3
--- /dev/null
+++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.cc
@@ -0,0 +1,85 @@
+/*
+ *
+ * Copyright 2018 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include <grpc/support/port_platform.h>
+
+#include "src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.h"
+
+#include <grpc/support/atm.h>
+#include <grpc/support/string_util.h>
+#include <string.h>
+
+namespace grpc_core {
+
+void XdsLbClientStats::AddCallStarted() {
+  gpr_atm_full_fetch_add(&num_calls_started_, (gpr_atm)1);
+}
+
+void XdsLbClientStats::AddCallFinished(bool finished_with_client_failed_to_send,
+                                       bool finished_known_received) {
+  gpr_atm_full_fetch_add(&num_calls_finished_, (gpr_atm)1);
+  if (finished_with_client_failed_to_send) {
+    gpr_atm_full_fetch_add(&num_calls_finished_with_client_failed_to_send_,
+                           (gpr_atm)1);
+  }
+  if (finished_known_received) {
+    gpr_atm_full_fetch_add(&num_calls_finished_known_received_, (gpr_atm)1);
+  }
+}
+
+void XdsLbClientStats::AddCallDroppedLocked(char* token) {
+  // Increment num_calls_started and num_calls_finished.
+  gpr_atm_full_fetch_add(&num_calls_started_, (gpr_atm)1);
+  gpr_atm_full_fetch_add(&num_calls_finished_, (gpr_atm)1);
+  // Record the drop.
+  if (drop_token_counts_ == nullptr) {
+    drop_token_counts_.reset(New<DroppedCallCounts>());
+  }
+  for (size_t i = 0; i < drop_token_counts_->size(); ++i) {
+    if (strcmp((*drop_token_counts_)[i].token.get(), token) == 0) {
+      ++(*drop_token_counts_)[i].count;
+      return;
+    }
+  }
+  // Not found, so add a new entry.
+  drop_token_counts_->emplace_back(UniquePtr<char>(gpr_strdup(token)), 1);
+}
+
+namespace {
+
+void AtomicGetAndResetCounter(int64_t* value, gpr_atm* counter) {
+  *value = static_cast<int64_t>(gpr_atm_full_xchg(counter, (gpr_atm)0));
+}
+
+}  // namespace
+
+void XdsLbClientStats::GetLocked(
+    int64_t* num_calls_started, int64_t* num_calls_finished,
+    int64_t* num_calls_finished_with_client_failed_to_send,
+    int64_t* num_calls_finished_known_received,
+    UniquePtr<DroppedCallCounts>* drop_token_counts) {
+  AtomicGetAndResetCounter(num_calls_started, &num_calls_started_);
+  AtomicGetAndResetCounter(num_calls_finished, &num_calls_finished_);
+  AtomicGetAndResetCounter(num_calls_finished_with_client_failed_to_send,
+                           &num_calls_finished_with_client_failed_to_send_);
+  AtomicGetAndResetCounter(num_calls_finished_known_received,
+                           &num_calls_finished_known_received_);
+  *drop_token_counts = std::move(drop_token_counts_);
+}
+
+}  // namespace grpc_core
diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.h b/src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.h
new file mode 100644
index 0000000000..fa0b9f4b63
--- /dev/null
+++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.h
@@ -0,0 +1,72 @@
+/*
+ *
+ * Copyright 2018 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#ifndef GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_XDS_CLIENT_STATS_H
+#define GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_XDS_CLIENT_STATS_H
+
+#include <grpc/support/port_platform.h>
+
+#include <grpc/support/atm.h>
+
+#include "src/core/lib/gprpp/inlined_vector.h"
+#include "src/core/lib/gprpp/memory.h"
+#include "src/core/lib/gprpp/ref_counted.h"
+
+namespace grpc_core {
+
+class XdsLbClientStats : public RefCounted<XdsLbClientStats> {
+ public:
+  struct DropTokenCount {
+    UniquePtr<char> token;
+    int64_t count;
+
+    DropTokenCount(UniquePtr<char> token, int64_t count)
+        : token(std::move(token)), count(count) {}
+  };
+
+  typedef InlinedVector<DropTokenCount, 10> DroppedCallCounts;
+
+  XdsLbClientStats() {}
+
+  void AddCallStarted();
+  void AddCallFinished(bool finished_with_client_failed_to_send,
+                       bool finished_known_received);
+
+  // This method is not thread-safe; caller must synchronize.
+  void AddCallDroppedLocked(char* token);
+
+  // This method is not thread-safe; caller must synchronize.
+  void GetLocked(int64_t* num_calls_started, int64_t* num_calls_finished,
+                 int64_t* num_calls_finished_with_client_failed_to_send,
+                 int64_t* num_calls_finished_known_received,
+                 UniquePtr<DroppedCallCounts>* drop_token_counts);
+
+ private:
+  // This field must only be accessed via *_locked() methods.
+  UniquePtr<DroppedCallCounts> drop_token_counts_;
+  // These fields may be accessed from multiple threads at a time.
+  gpr_atm num_calls_started_ = 0;
+  gpr_atm num_calls_finished_ = 0;
+  gpr_atm num_calls_finished_with_client_failed_to_send_ = 0;
+  gpr_atm num_calls_finished_known_received_ = 0;
+};
+
+}  // namespace grpc_core
+
+#endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_XDS_CLIENT_STATS_H \
+        */
diff --git a/src/core/ext/filters/client_channel/subchannel.cc b/src/core/ext/filters/client_channel/subchannel.cc
index 4a668b0fa7..a0a40eb2b3 100644
--- a/src/core/ext/filters/client_channel/subchannel.cc
+++ b/src/core/ext/filters/client_channel/subchannel.cc
@@ -49,6 +49,8 @@
 #include "src/core/lib/surface/channel.h"
 #include "src/core/lib/surface/channel_init.h"
 #include "src/core/lib/transport/connectivity_state.h"
+#include "src/core/lib/transport/error_utils.h"
+#include "src/core/lib/transport/status_metadata.h"
 
 #define INTERNAL_REF_BITS 16
 #define STRONG_REF_MASK (~(gpr_atm)((1 << INTERNAL_REF_BITS) - 1))
@@ -144,6 +146,11 @@ struct grpc_subchannel {
 struct grpc_subchannel_call {
   grpc_core::ConnectedSubchannel* connection;
   grpc_closure* schedule_closure_after_destroy;
+  // state needed to support channelz interception of recv trailing metadata.
+  grpc_closure recv_trailing_metadata_ready;
+  grpc_closure* original_recv_trailing_metadata;
+  grpc_metadata_batch* recv_trailing_metadata;
+  grpc_millis deadline;
 };
 
 #define SUBCHANNEL_CALL_TO_CALL_STACK(call)                          \
@@ -421,6 +428,35 @@ intptr_t grpc_subchannel_get_child_socket_uuid(grpc_subchannel* subchannel) {
   }
 }
 
+static const char* subchannel_connectivity_state_change_string(
+    grpc_connectivity_state state) {
+  switch (state) {
+    case GRPC_CHANNEL_IDLE:
+      return "Subchannel state change to IDLE";
+    case GRPC_CHANNEL_CONNECTING:
+      return "Subchannel state change to CONNECTING";
+    case GRPC_CHANNEL_READY:
+      return "Subchannel state change to READY";
+    case GRPC_CHANNEL_TRANSIENT_FAILURE:
+      return "Subchannel state change to TRANSIENT_FAILURE";
+    case GRPC_CHANNEL_SHUTDOWN:
+      return "Subchannel state change to SHUTDOWN";
+  }
+  GPR_UNREACHABLE_CODE(return "UNKNOWN");
+}
+
+static void set_subchannel_connectivity_state_locked(
+    grpc_subchannel* c, grpc_connectivity_state state, grpc_error* error,
+    const char* reason) {
+  if (c->channelz_subchannel != nullptr) {
+    c->channelz_subchannel->AddTraceEvent(
+        grpc_core::channelz::ChannelTrace::Severity::Info,
+        grpc_slice_from_static_string(
+            subchannel_connectivity_state_change_string(state)));
+  }
+  grpc_connectivity_state_set(&c->state_tracker, state, error, reason);
+}
+
 static void continue_connect_locked(grpc_subchannel* c) {
   grpc_connect_in_args args;
   args.interested_parties = c->pollset_set;
@@ -429,8 +465,8 @@ static void continue_connect_locked(grpc_subchannel* c) {
   c->next_attempt_deadline = c->backoff->NextAttemptTime();
   args.deadline = std::max(c->next_attempt_deadline, min_deadline);
   args.channel_args = c->args;
-  grpc_connectivity_state_set(&c->state_tracker, GRPC_CHANNEL_CONNECTING,
-                              GRPC_ERROR_NONE, "connecting");
+  set_subchannel_connectivity_state_locked(c, GRPC_CHANNEL_CONNECTING,
+                                           GRPC_ERROR_NONE, "connecting");
   grpc_connector_connect(c->connector, &args, &c->connecting_result,
                          &c->on_connected);
 }
@@ -580,9 +616,9 @@ static void on_connected_subchannel_connectivity_changed(void* p,
                       connected_subchannel_watcher->connectivity_state));
         }
         c->connected_subchannel.reset();
-        grpc_connectivity_state_set(&c->state_tracker,
-                                    GRPC_CHANNEL_TRANSIENT_FAILURE,
-                                    GRPC_ERROR_REF(error), "reflect_child");
+        set_subchannel_connectivity_state_locked(
+            c, GRPC_CHANNEL_TRANSIENT_FAILURE, GRPC_ERROR_REF(error),
+            "reflect_child");
         c->backoff_begun = false;
         c->backoff->Reset();
         maybe_start_connecting_locked(c);
@@ -593,8 +629,8 @@ static void on_connected_subchannel_connectivity_changed(void* p,
       break;
     }
     default: {
-      grpc_connectivity_state_set(
-          &c->state_tracker, connected_subchannel_watcher->connectivity_state,
+      set_subchannel_connectivity_state_locked(
+          c, connected_subchannel_watcher->connectivity_state,
           GRPC_ERROR_REF(error), "reflect_child");
       GRPC_SUBCHANNEL_WEAK_REF(c, "state_watcher");
       c->connected_subchannel->NotifyOnStateChange(
@@ -652,7 +688,7 @@ static bool publish_transport_locked(grpc_subchannel* c) {
 
   /* publish */
   c->connected_subchannel.reset(grpc_core::New<grpc_core::ConnectedSubchannel>(
-      stk, c->channelz_subchannel.get(), socket_uuid));
+      stk, c->channelz_subchannel, socket_uuid));
   gpr_log(GPR_INFO, "New connected subchannel at %p for subchannel %p",
           c->connected_subchannel.get(), c);
 
@@ -665,8 +701,8 @@ static bool publish_transport_locked(grpc_subchannel* c) {
       &connected_subchannel_watcher->closure);
 
   /* signal completion */
-  grpc_connectivity_state_set(&c->state_tracker, GRPC_CHANNEL_READY,
-                              GRPC_ERROR_NONE, "connected");
+  set_subchannel_connectivity_state_locked(c, GRPC_CHANNEL_READY,
+                                           GRPC_ERROR_NONE, "connected");
   return true;
 }
 
@@ -683,8 +719,8 @@ static void on_subchannel_connected(void* arg, grpc_error* error) {
   } else if (c->disconnected) {
     GRPC_SUBCHANNEL_WEAK_UNREF(c, "connecting");
   } else {
-    grpc_connectivity_state_set(
-        &c->state_tracker, GRPC_CHANNEL_TRANSIENT_FAILURE,
+    set_subchannel_connectivity_state_locked(
+        c, GRPC_CHANNEL_TRANSIENT_FAILURE,
         grpc_error_set_int(GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING(
                                "Connect Failed", &error, 1),
                            GRPC_ERROR_INT_GRPC_STATUS, GRPC_STATUS_UNAVAILABLE),
@@ -745,9 +781,71 @@ void grpc_subchannel_call_unref(
   GRPC_CALL_STACK_UNREF(SUBCHANNEL_CALL_TO_CALL_STACK(c), REF_REASON);
 }
 
+// Sets *status based on md_batch and error.
+static void get_call_status(grpc_subchannel_call* call,
+                            grpc_metadata_batch* md_batch, grpc_error* error,
+                            grpc_status_code* status) {
+  if (error != GRPC_ERROR_NONE) {
+    grpc_error_get_status(error, call->deadline, status, nullptr, nullptr,
+                          nullptr);
+  } else {
+    if (md_batch->idx.named.grpc_status != nullptr) {
+      *status = grpc_get_status_code_from_metadata(
+          md_batch->idx.named.grpc_status->md);
+    } else {
+      *status = GRPC_STATUS_UNKNOWN;
+    }
+  }
+  GRPC_ERROR_UNREF(error);
+}
+
+static void recv_trailing_metadata_ready(void* arg, grpc_error* error) {
+  grpc_subchannel_call* call = static_cast<grpc_subchannel_call*>(arg);
+  GPR_ASSERT(call->recv_trailing_metadata != nullptr);
+  grpc_status_code status = GRPC_STATUS_OK;
+  grpc_metadata_batch* md_batch = call->recv_trailing_metadata;
+  get_call_status(call, md_batch, GRPC_ERROR_REF(error), &status);
+  grpc_core::channelz::SubchannelNode* channelz_subchannel =
+      call->connection->channelz_subchannel();
+  GPR_ASSERT(channelz_subchannel != nullptr);
+  if (status == GRPC_STATUS_OK) {
+    channelz_subchannel->RecordCallSucceeded();
+  } else {
+    channelz_subchannel->RecordCallFailed();
+  }
+  GRPC_CLOSURE_RUN(call->original_recv_trailing_metadata,
+                   GRPC_ERROR_REF(error));
+}
+
+// If channelz is enabled, intercept recv_trailing so that we may check the
+// status and associate it to a subchannel.
+static void maybe_intercept_recv_trailing_metadata(
+    grpc_subchannel_call* call, grpc_transport_stream_op_batch* batch) {
+  // only intercept payloads with recv trailing.
+  if (!batch->recv_trailing_metadata) {
+    return;
+  }
+  // only add interceptor is channelz is enabled.
+  if (call->connection->channelz_subchannel() == nullptr) {
+    return;
+  }
+  GRPC_CLOSURE_INIT(&call->recv_trailing_metadata_ready,
+                    recv_trailing_metadata_ready, call,
+                    grpc_schedule_on_exec_ctx);
+  // save some state needed for the interception callback.
+  GPR_ASSERT(call->recv_trailing_metadata == nullptr);
+  call->recv_trailing_metadata =
+      batch->payload->recv_trailing_metadata.recv_trailing_metadata;
+  call->original_recv_trailing_metadata =
+      batch->payload->recv_trailing_metadata.recv_trailing_metadata_ready;
+  batch->payload->recv_trailing_metadata.recv_trailing_metadata_ready =
+      &call->recv_trailing_metadata_ready;
+}
+
 void grpc_subchannel_call_process_op(grpc_subchannel_call* call,
                                      grpc_transport_stream_op_batch* batch) {
   GPR_TIMER_SCOPE("grpc_subchannel_call_process_op", 0);
+  maybe_intercept_recv_trailing_metadata(call, batch);
   grpc_call_stack* call_stack = SUBCHANNEL_CALL_TO_CALL_STACK(call);
   grpc_call_element* top_elem = grpc_call_stack_element(call_stack, 0);
   GRPC_CALL_LOG_OP(GPR_INFO, top_elem, batch);
@@ -822,10 +920,12 @@ namespace grpc_core {
 
 ConnectedSubchannel::ConnectedSubchannel(
     grpc_channel_stack* channel_stack,
-    channelz::SubchannelNode* channelz_subchannel, intptr_t socket_uuid)
+    grpc_core::RefCountedPtr<grpc_core::channelz::SubchannelNode>
+        channelz_subchannel,
+    intptr_t socket_uuid)
     : RefCountedWithTracing<ConnectedSubchannel>(&grpc_trace_stream_refcount),
       channel_stack_(channel_stack),
-      channelz_subchannel_(channelz_subchannel),
+      channelz_subchannel_(std::move(channelz_subchannel)),
       socket_uuid_(socket_uuid) {}
 
 ConnectedSubchannel::~ConnectedSubchannel() {
@@ -872,6 +972,7 @@ grpc_error* ConnectedSubchannel::CreateCall(const CallArgs& args,
       Ref(DEBUG_LOCATION, "subchannel_call");
   connection.release();  // Ref is passed to the grpc_subchannel_call object.
   (*call)->connection = this;
+  (*call)->deadline = args.deadline;
   const grpc_call_element_args call_args = {
       callstk,           /* call_stack */
       nullptr,           /* server_transport_data */
diff --git a/src/core/ext/filters/client_channel/subchannel.h b/src/core/ext/filters/client_channel/subchannel.h
index 699f93a8e7..c53b13e37e 100644
--- a/src/core/ext/filters/client_channel/subchannel.h
+++ b/src/core/ext/filters/client_channel/subchannel.h
@@ -85,9 +85,11 @@ class ConnectedSubchannel : public RefCountedWithTracing<ConnectedSubchannel> {
     size_t parent_data_size;
   };
 
-  explicit ConnectedSubchannel(grpc_channel_stack* channel_stack,
-                               channelz::SubchannelNode* channelz_subchannel,
-                               intptr_t socket_uuid);
+  explicit ConnectedSubchannel(
+      grpc_channel_stack* channel_stack,
+      grpc_core::RefCountedPtr<grpc_core::channelz::SubchannelNode>
+          channelz_subchannel,
+      intptr_t socket_uuid);
   ~ConnectedSubchannel();
 
   grpc_channel_stack* channel_stack() { return channel_stack_; }
@@ -97,15 +99,16 @@ class ConnectedSubchannel : public RefCountedWithTracing<ConnectedSubchannel> {
   void Ping(grpc_closure* on_initiate, grpc_closure* on_ack);
   grpc_error* CreateCall(const CallArgs& args, grpc_subchannel_call** call);
   channelz::SubchannelNode* channelz_subchannel() {
-    return channelz_subchannel_;
+    return channelz_subchannel_.get();
   }
   intptr_t socket_uuid() { return socket_uuid_; }
 
  private:
   grpc_channel_stack* channel_stack_;
-  // backpointer to the channelz node in this connected subchannel's
+  // ref counted pointer to the channelz node in this connected subchannel's
   // owning subchannel.
-  channelz::SubchannelNode* channelz_subchannel_;
+  grpc_core::RefCountedPtr<grpc_core::channelz::SubchannelNode>
+      channelz_subchannel_;
   // uuid of this subchannel's socket. 0 if this subchannel is not connected.
   const intptr_t socket_uuid_;
 };
diff --git a/src/core/ext/filters/http/client_authority_filter.cc b/src/core/ext/filters/http/client_authority_filter.cc
index 1ca20ebb26..6383f12594 100644
--- a/src/core/ext/filters/http/client_authority_filter.cc
+++ b/src/core/ext/filters/http/client_authority_filter.cc
@@ -59,9 +59,8 @@ void authority_start_transport_stream_op_batch(
       initial_metadata->idx.named.authority == nullptr) {
     grpc_error* error = grpc_metadata_batch_add_head(
         initial_metadata, &calld->authority_storage,
-        grpc_mdelem_from_slices(
-            GRPC_MDSTR_AUTHORITY,
-            grpc_slice_ref_internal(chand->default_authority)));
+        grpc_mdelem_create(GRPC_MDSTR_AUTHORITY, chand->default_authority,
+                           nullptr));
     if (error != GRPC_ERROR_NONE) {
       grpc_transport_stream_op_batch_finish_with_failure(batch, error,
                                                          calld->call_combiner);
diff --git a/src/core/ext/transport/chttp2/client/chttp2_connector.cc b/src/core/ext/transport/chttp2/client/chttp2_connector.cc
index 0ac84032fd..5229304fa4 100644
--- a/src/core/ext/transport/chttp2/client/chttp2_connector.cc
+++ b/src/core/ext/transport/chttp2/client/chttp2_connector.cc
@@ -160,7 +160,7 @@ static void on_handshake_done(void* arg, grpc_error* error) {
 static void start_handshake_locked(chttp2_connector* c) {
   c->handshake_mgr = grpc_handshake_manager_create();
   grpc_handshakers_add(HANDSHAKER_CLIENT, c->args.channel_args,
-                       c->handshake_mgr);
+                       c->args.interested_parties, c->handshake_mgr);
   grpc_endpoint_add_to_pollset_set(c->endpoint, c->args.interested_parties);
   grpc_handshake_manager_do_handshake(
       c->handshake_mgr, c->args.interested_parties, c->endpoint,
diff --git a/src/core/ext/transport/chttp2/server/chttp2_server.cc b/src/core/ext/transport/chttp2/server/chttp2_server.cc
index 3f8a26ae32..6ed88dfb5e 100644
--- a/src/core/ext/transport/chttp2/server/chttp2_server.cc
+++ b/src/core/ext/transport/chttp2/server/chttp2_server.cc
@@ -53,6 +53,8 @@ typedef struct {
   grpc_closure tcp_server_shutdown_complete;
   grpc_closure* server_destroy_listener_done;
   grpc_handshake_manager* pending_handshake_mgrs;
+  grpc_core::RefCountedPtr<grpc_core::channelz::ListenSocketNode>
+      channelz_listen_socket;
 } server_state;
 
 typedef struct {
@@ -67,6 +69,7 @@ typedef struct {
   grpc_timer timer;
   grpc_closure on_timeout;
   grpc_closure on_receive_settings;
+  grpc_pollset_set* interested_parties;
 } server_connection_state;
 
 static void server_connection_state_unref(
@@ -76,6 +79,9 @@ static void server_connection_state_unref(
       GRPC_CHTTP2_UNREF_TRANSPORT(connection_state->transport,
                                   "receive settings timeout");
     }
+    grpc_pollset_set_del_pollset(connection_state->interested_parties,
+                                 connection_state->accepting_pollset);
+    grpc_pollset_set_destroy(connection_state->interested_parties);
     gpr_free(connection_state);
   }
 }
@@ -133,7 +139,8 @@ static void on_handshake_done(void* arg, grpc_error* error) {
           grpc_create_chttp2_transport(args->args, args->endpoint, false);
       grpc_server_setup_transport(
           connection_state->svr_state->server, transport,
-          connection_state->accepting_pollset, args->args);
+          connection_state->accepting_pollset, args->args,
+          grpc_chttp2_transport_get_socket_uuid(transport));
       // Use notify_on_receive_settings callback to enforce the
       // handshake deadline.
       connection_state->transport =
@@ -189,7 +196,11 @@ static void on_accept(void* arg, grpc_endpoint* tcp,
   connection_state->accepting_pollset = accepting_pollset;
   connection_state->acceptor = acceptor;
   connection_state->handshake_mgr = handshake_mgr;
+  connection_state->interested_parties = grpc_pollset_set_create();
+  grpc_pollset_set_add_pollset(connection_state->interested_parties,
+                               connection_state->accepting_pollset);
   grpc_handshakers_add(HANDSHAKER_SERVER, state->args,
+                       connection_state->interested_parties,
                        connection_state->handshake_mgr);
   const grpc_arg* timeout_arg =
       grpc_channel_args_find(state->args, GRPC_ARG_SERVER_HANDSHAKE_TIMEOUT_MS);
@@ -223,6 +234,7 @@ static void tcp_server_shutdown_complete(void* arg, grpc_error* error) {
   GPR_ASSERT(state->shutdown);
   grpc_handshake_manager_pending_list_shutdown_all(
       state->pending_handshake_mgrs, GRPC_ERROR_REF(error));
+  state->channelz_listen_socket.reset();
   gpr_mu_unlock(&state->mu);
   // Flush queued work before destroying handshaker factory, since that
   // may do a synchronous unref.
@@ -262,6 +274,8 @@ grpc_error* grpc_chttp2_server_add_port(grpc_server* server, const char* addr,
   server_state* state = nullptr;
   grpc_error** errors = nullptr;
   size_t naddrs = 0;
+  const grpc_arg* arg = nullptr;
+  intptr_t socket_uuid = 0;
 
   *port_num = -1;
 
@@ -323,9 +337,16 @@ grpc_error* grpc_chttp2_server_add_port(grpc_server* server, const char* addr,
   }
   grpc_resolved_addresses_destroy(resolved);
 
+  arg = grpc_channel_args_find(args, GRPC_ARG_ENABLE_CHANNELZ);
+  if (grpc_channel_arg_get_bool(arg, false)) {
+    state->channelz_listen_socket =
+        grpc_core::MakeRefCounted<grpc_core::channelz::ListenSocketNode>();
+    socket_uuid = state->channelz_listen_socket->uuid();
+  }
+
   /* Register with the server only upon success */
   grpc_server_add_listener(server, state, server_start_listener,
-                           server_destroy_listener);
+                           server_destroy_listener, socket_uuid);
   goto done;
 
 /* Error path: cleanup and return */
diff --git a/src/core/ext/transport/chttp2/server/insecure/server_chttp2_posix.cc b/src/core/ext/transport/chttp2/server/insecure/server_chttp2_posix.cc
index e4bd91d07b..b9024a87e2 100644
--- a/src/core/ext/transport/chttp2/server/insecure/server_chttp2_posix.cc
+++ b/src/core/ext/transport/chttp2/server/insecure/server_chttp2_posix.cc
@@ -61,7 +61,7 @@ void grpc_server_add_insecure_channel_from_fd(grpc_server* server,
     grpc_endpoint_add_to_pollset(server_endpoint, pollsets[i]);
   }
 
-  grpc_server_setup_transport(server, transport, nullptr, server_args);
+  grpc_server_setup_transport(server, transport, nullptr, server_args, 0);
   grpc_chttp2_transport_start_reading(transport, nullptr, nullptr);
 }
 
diff --git a/src/core/ext/transport/chttp2/transport/chttp2_transport.cc b/src/core/ext/transport/chttp2/transport/chttp2_transport.cc
index 2d4b4da4c6..8a481bb7d5 100644
--- a/src/core/ext/transport/chttp2/transport/chttp2_transport.cc
+++ b/src/core/ext/transport/chttp2/transport/chttp2_transport.cc
@@ -2128,8 +2128,7 @@ void grpc_chttp2_fake_status(grpc_chttp2_transport* t, grpc_chttp2_stream* s,
           "add_status_message",
           grpc_chttp2_incoming_metadata_buffer_replace_or_add(
               &s->metadata_buffer[1],
-              grpc_mdelem_from_slices(GRPC_MDSTR_GRPC_MESSAGE,
-                                      grpc_slice_ref_internal(slice))));
+              grpc_mdelem_create(GRPC_MDSTR_GRPC_MESSAGE, slice, nullptr)));
     }
     s->published_metadata[1] = GRPC_METADATA_SYNTHESIZED_FROM_FAKE;
     grpc_chttp2_maybe_complete_recv_trailing_metadata(t, s);
diff --git a/src/core/ext/transport/inproc/inproc_transport.cc b/src/core/ext/transport/inproc/inproc_transport.cc
index b0ca7f8207..9dbd095843 100644
--- a/src/core/ext/transport/inproc/inproc_transport.cc
+++ b/src/core/ext/transport/inproc/inproc_transport.cc
@@ -1256,7 +1256,9 @@ grpc_channel* grpc_inproc_channel_create(grpc_server* server,
   inproc_transports_create(&server_transport, server_args, &client_transport,
                            client_args);
 
-  grpc_server_setup_transport(server, server_transport, nullptr, server_args);
+  // TODO(ncteisen): design and support channelz GetSocket for inproc.
+  grpc_server_setup_transport(server, server_transport, nullptr, server_args,
+                              0);
   grpc_channel* channel = grpc_channel_create(
       "inproc", client_args, GRPC_CLIENT_DIRECT_CHANNEL, client_transport);
 
diff --git a/src/core/lib/channel/channelz.cc b/src/core/lib/channel/channelz.cc
index 573292fba1..33577d890a 100644
--- a/src/core/lib/channel/channelz.cc
+++ b/src/core/lib/channel/channelz.cc
@@ -37,13 +37,16 @@
 #include "src/core/lib/iomgr/exec_ctx.h"
 #include "src/core/lib/slice/slice_internal.h"
 #include "src/core/lib/surface/channel.h"
+#include "src/core/lib/surface/server.h"
 #include "src/core/lib/transport/error_utils.h"
 
 namespace grpc_core {
 namespace channelz {
 
-BaseNode::BaseNode(EntityType type)
-    : type_(type), uuid_(ChannelzRegistry::Register(this)) {}
+BaseNode::BaseNode(EntityType type) : type_(type), uuid_(-1) {
+  // The registry will set uuid_ under its lock.
+  ChannelzRegistry::Register(this);
+}
 
 BaseNode::~BaseNode() { ChannelzRegistry::Unregister(uuid_); }
 
@@ -190,11 +193,45 @@ RefCountedPtr<ChannelNode> ChannelNode::MakeChannelNode(
       channel, channel_tracer_max_nodes, is_top_level_channel);
 }
 
-ServerNode::ServerNode(size_t channel_tracer_max_nodes)
-    : BaseNode(EntityType::kServer), trace_(channel_tracer_max_nodes) {}
+ServerNode::ServerNode(grpc_server* server, size_t channel_tracer_max_nodes)
+    : BaseNode(EntityType::kServer),
+      server_(server),
+      trace_(channel_tracer_max_nodes) {}
 
 ServerNode::~ServerNode() {}
 
+char* ServerNode::RenderServerSockets(intptr_t start_socket_id) {
+  grpc_json* top_level_json = grpc_json_create(GRPC_JSON_OBJECT);
+  grpc_json* json = top_level_json;
+  grpc_json* json_iterator = nullptr;
+  ChildRefsList socket_refs;
+  // uuids index into entities one-off (idx 0 is really uuid 1, since 0 is
+  // reserved). However, we want to support requests coming in with
+  // start_server_id=0, which signifies "give me everything."
+  size_t start_idx = start_socket_id == 0 ? 0 : start_socket_id - 1;
+  grpc_server_populate_server_sockets(server_, &socket_refs, start_idx);
+  if (!socket_refs.empty()) {
+    // create list of socket refs
+    grpc_json* array_parent = grpc_json_create_child(
+        nullptr, json, "socketRef", nullptr, GRPC_JSON_ARRAY, false);
+    for (size_t i = 0; i < socket_refs.size(); ++i) {
+      json_iterator =
+          grpc_json_create_child(json_iterator, array_parent, nullptr, nullptr,
+                                 GRPC_JSON_OBJECT, false);
+      grpc_json_add_number_string_child(json_iterator, nullptr, "socketId",
+                                        socket_refs[i]);
+    }
+  }
+  // For now we do not have any pagination rules. In the future we could
+  // pick a constant for max_channels_sent for a GetServers request.
+  // Tracking: https://github.com/grpc/grpc/issues/16019.
+  json_iterator = grpc_json_create_child(nullptr, json, "end", nullptr,
+                                         GRPC_JSON_TRUE, false);
+  char* json_str = grpc_json_dump_to_string(top_level_json, 0);
+  grpc_json_destroy(top_level_json);
+  return json_str;
+}
+
 grpc_json* ServerNode::RenderJson() {
   // We need to track these three json objects to build our object
   grpc_json* top_level_json = grpc_json_create(GRPC_JSON_OBJECT);
@@ -223,6 +260,20 @@ grpc_json* ServerNode::RenderJson() {
   }
   // ask CallCountingHelper to populate trace and call count data.
   call_counter_.PopulateCallCounts(json);
+  json = top_level_json;
+  ChildRefsList listen_sockets;
+  grpc_server_populate_listen_sockets(server_, &listen_sockets);
+  if (!listen_sockets.empty()) {
+    grpc_json* array_parent = grpc_json_create_child(
+        nullptr, json, "listenSocket", nullptr, GRPC_JSON_ARRAY, false);
+    for (size_t i = 0; i < listen_sockets.size(); ++i) {
+      json_iterator =
+          grpc_json_create_child(json_iterator, array_parent, nullptr, nullptr,
+                                 GRPC_JSON_OBJECT, false);
+      grpc_json_add_number_string_child(json_iterator, nullptr, "socketId",
+                                        listen_sockets[i]);
+    }
+  }
   return top_level_json;
 }
 
@@ -323,5 +374,22 @@ grpc_json* SocketNode::RenderJson() {
   return top_level_json;
 }
 
+ListenSocketNode::ListenSocketNode() : BaseNode(EntityType::kSocket) {}
+
+grpc_json* ListenSocketNode::RenderJson() {
+  // We need to track these three json objects to build our object
+  grpc_json* top_level_json = grpc_json_create(GRPC_JSON_OBJECT);
+  grpc_json* json = top_level_json;
+  grpc_json* json_iterator = nullptr;
+  // create and fill the ref child
+  json_iterator = grpc_json_create_child(json_iterator, json, "ref", nullptr,
+                                         GRPC_JSON_OBJECT, false);
+  json = json_iterator;
+  json_iterator = nullptr;
+  json_iterator = grpc_json_add_number_string_child(json, json_iterator,
+                                                    "socketId", uuid());
+  return top_level_json;
+}
+
 }  // namespace channelz
 }  // namespace grpc_core
diff --git a/src/core/lib/channel/channelz.h b/src/core/lib/channel/channelz.h
index 3f728cda6d..88551befc8 100644
--- a/src/core/lib/channel/channelz.h
+++ b/src/core/lib/channel/channelz.h
@@ -24,6 +24,7 @@
 #include <grpc/grpc.h>
 
 #include "src/core/lib/channel/channel_trace.h"
+#include "src/core/lib/gprpp/inlined_vector.h"
 #include "src/core/lib/gprpp/manual_constructor.h"
 #include "src/core/lib/gprpp/ref_counted.h"
 #include "src/core/lib/gprpp/ref_counted_ptr.h"
@@ -41,17 +42,23 @@
 
 /** This is the default value for whether or not to enable channelz. If
  * GRPC_ARG_ENABLE_CHANNELZ is set, it will override this default value. */
-#define GRPC_ENABLE_CHANNELZ_DEFAULT false
+#define GRPC_ENABLE_CHANNELZ_DEFAULT true
 
 /** This is the default value for the maximum amount of memory used by trace
  * events per channel trace node. If
  * GRPC_ARG_MAX_CHANNEL_TRACE_EVENT_MEMORY_PER_NODE is set, it will override
  * this default value. */
-#define GRPC_MAX_CHANNEL_TRACE_EVENT_MEMORY_PER_NODE_DEFAULT 0
+#define GRPC_MAX_CHANNEL_TRACE_EVENT_MEMORY_PER_NODE_DEFAULT 1024 * 4
 
 namespace grpc_core {
+
 namespace channelz {
 
+// TODO(ncteisen), this only contains the uuids of the children for now,
+// since that is all that is strictly needed. In a future enhancement we will
+// add human readable names as in the channelz.proto
+typedef InlinedVector<intptr_t, 10> ChildRefsList;
+
 namespace testing {
 class CallCountingHelperPeer;
 class ChannelNodePeer;
@@ -85,8 +92,10 @@ class BaseNode : public RefCounted<BaseNode> {
   intptr_t uuid() const { return uuid_; }
 
  private:
+  // to allow the ChannelzRegistry to set uuid_ under its lock.
+  friend class ChannelzRegistry;
   const EntityType type_;
-  const intptr_t uuid_;
+  intptr_t uuid_;
 };
 
 // This class is a helper class for channelz entities that deal with Channels,
@@ -193,11 +202,13 @@ class ChannelNode : public BaseNode {
 // Handles channelz bookkeeping for servers
 class ServerNode : public BaseNode {
  public:
-  explicit ServerNode(size_t channel_tracer_max_nodes);
+  ServerNode(grpc_server* server, size_t channel_tracer_max_nodes);
   ~ServerNode() override;
 
   grpc_json* RenderJson() override;
 
+  char* RenderServerSockets(intptr_t start_socket_id);
+
   // proxy methods to composed classes.
   void AddTraceEvent(ChannelTrace::Severity severity, grpc_slice data) {
     trace_.AddTraceEvent(severity, data);
@@ -213,6 +224,7 @@ class ServerNode : public BaseNode {
   void RecordCallSucceeded() { call_counter_.RecordCallSucceeded(); }
 
  private:
+  grpc_server* server_;
   CallCountingHelper call_counter_;
   ChannelTrace trace_;
 };
@@ -253,6 +265,15 @@ class SocketNode : public BaseNode {
   UniquePtr<char> peer_string_;
 };
 
+// Handles channelz bookkeeping for listen sockets
+class ListenSocketNode : public BaseNode {
+ public:
+  ListenSocketNode();
+  ~ListenSocketNode() override {}
+
+  grpc_json* RenderJson() override;
+};
+
 // Creation functions
 
 typedef RefCountedPtr<ChannelNode> (*ChannelNodeCreationFunc)(grpc_channel*,
diff --git a/src/core/lib/channel/channelz_registry.cc b/src/core/lib/channel/channelz_registry.cc
index 841f1c6104..e90a9e1f7f 100644
--- a/src/core/lib/channel/channelz_registry.cc
+++ b/src/core/lib/channel/channelz_registry.cc
@@ -38,6 +38,8 @@ namespace {
 // singleton instance of the registry.
 ChannelzRegistry* g_channelz_registry = nullptr;
 
+const int kPaginationLimit = 100;
+
 }  // anonymous namespace
 
 void ChannelzRegistry::Init() { g_channelz_registry = New<ChannelzRegistry>(); }
@@ -53,29 +55,78 @@ ChannelzRegistry::ChannelzRegistry() { gpr_mu_init(&mu_); }
 
 ChannelzRegistry::~ChannelzRegistry() { gpr_mu_destroy(&mu_); }
 
-intptr_t ChannelzRegistry::InternalRegister(BaseNode* node) {
+void ChannelzRegistry::InternalRegister(BaseNode* node) {
   MutexLock lock(&mu_);
   entities_.push_back(node);
-  intptr_t uuid = entities_.size();
-  return uuid;
+  node->uuid_ = ++uuid_generator_;
+}
+
+void ChannelzRegistry::MaybePerformCompactionLocked() {
+  constexpr double kEmptinessTheshold = 1 / 3;
+  double emptiness_ratio =
+      double(num_empty_slots_) / double(entities_.capacity());
+  if (emptiness_ratio > kEmptinessTheshold) {
+    int front = 0;
+    for (size_t i = 0; i < entities_.size(); ++i) {
+      if (entities_[i] != nullptr) {
+        entities_[front++] = entities_[i];
+      }
+    }
+    for (int i = 0; i < num_empty_slots_; ++i) {
+      entities_.pop_back();
+    }
+    num_empty_slots_ = 0;
+  }
+}
+
+int ChannelzRegistry::FindByUuidLocked(intptr_t target_uuid) {
+  size_t left = 0;
+  size_t right = entities_.size() - 1;
+  while (left <= right) {
+    size_t true_middle = left + (right - left) / 2;
+    size_t first_non_null = true_middle;
+    while (first_non_null < right && entities_[first_non_null] == nullptr) {
+      first_non_null++;
+    }
+    if (entities_[first_non_null] == nullptr) {
+      right = true_middle - 1;
+      continue;
+    }
+    intptr_t uuid = entities_[first_non_null]->uuid();
+    if (uuid == target_uuid) {
+      return int(first_non_null);
+    }
+    if (uuid < target_uuid) {
+      left = first_non_null + 1;
+    } else {
+      right = true_middle - 1;
+    }
+  }
+  return -1;
 }
 
 void ChannelzRegistry::InternalUnregister(intptr_t uuid) {
   GPR_ASSERT(uuid >= 1);
   MutexLock lock(&mu_);
-  GPR_ASSERT(static_cast<size_t>(uuid) <= entities_.size());
-  entities_[uuid - 1] = nullptr;
+  GPR_ASSERT(uuid <= uuid_generator_);
+  int idx = FindByUuidLocked(uuid);
+  GPR_ASSERT(idx >= 0);
+  entities_[idx] = nullptr;
+  num_empty_slots_++;
+  MaybePerformCompactionLocked();
 }
 
 BaseNode* ChannelzRegistry::InternalGet(intptr_t uuid) {
   MutexLock lock(&mu_);
-  if (uuid < 1 || uuid > static_cast<intptr_t>(entities_.size())) {
+  if (uuid < 1 || uuid > uuid_generator_) {
     return nullptr;
   }
-  return entities_[uuid - 1];
+  int idx = FindByUuidLocked(uuid);
+  return idx < 0 ? nullptr : entities_[idx];
 }
 
 char* ChannelzRegistry::InternalGetTopChannels(intptr_t start_channel_id) {
+  MutexLock lock(&mu_);
   grpc_json* top_level_json = grpc_json_create(GRPC_JSON_OBJECT);
   grpc_json* json = top_level_json;
   grpc_json* json_iterator = nullptr;
@@ -85,10 +136,18 @@ char* ChannelzRegistry::InternalGetTopChannels(intptr_t start_channel_id) {
   // start_channel_id=0, which signifies "give me everything." Hence this
   // funky looking line below.
   size_t start_idx = start_channel_id == 0 ? 0 : start_channel_id - 1;
+  bool reached_pagination_limit = false;
   for (size_t i = start_idx; i < entities_.size(); ++i) {
     if (entities_[i] != nullptr &&
         entities_[i]->type() ==
             grpc_core::channelz::BaseNode::EntityType::kTopLevelChannel) {
+      // check if we are over pagination limit to determine if we need to set
+      // the "end" element. If we don't go through this block, we know that
+      // when the loop terminates, we have <= to kPaginationLimit.
+      if (top_level_channels.size() == kPaginationLimit) {
+        reached_pagination_limit = true;
+        break;
+      }
       top_level_channels.push_back(entities_[i]);
     }
   }
@@ -102,17 +161,17 @@ char* ChannelzRegistry::InternalGetTopChannels(intptr_t start_channel_id) {
           grpc_json_link_child(array_parent, channel_json, json_iterator);
     }
   }
-  // For now we do not have any pagination rules. In the future we could
-  // pick a constant for max_channels_sent for a GetTopChannels request.
-  // Tracking: https://github.com/grpc/grpc/issues/16019.
-  json_iterator = grpc_json_create_child(nullptr, json, "end", nullptr,
-                                         GRPC_JSON_TRUE, false);
+  if (!reached_pagination_limit) {
+    grpc_json_create_child(nullptr, json, "end", nullptr, GRPC_JSON_TRUE,
+                           false);
+  }
   char* json_str = grpc_json_dump_to_string(top_level_json, 0);
   grpc_json_destroy(top_level_json);
   return json_str;
 }
 
 char* ChannelzRegistry::InternalGetServers(intptr_t start_server_id) {
+  MutexLock lock(&mu_);
   grpc_json* top_level_json = grpc_json_create(GRPC_JSON_OBJECT);
   grpc_json* json = top_level_json;
   grpc_json* json_iterator = nullptr;
@@ -121,10 +180,18 @@ char* ChannelzRegistry::InternalGetServers(intptr_t start_server_id) {
   // reserved). However, we want to support requests coming in with
   // start_server_id=0, which signifies "give me everything."
   size_t start_idx = start_server_id == 0 ? 0 : start_server_id - 1;
+  bool reached_pagination_limit = false;
   for (size_t i = start_idx; i < entities_.size(); ++i) {
     if (entities_[i] != nullptr &&
         entities_[i]->type() ==
             grpc_core::channelz::BaseNode::EntityType::kServer) {
+      // check if we are over pagination limit to determine if we need to set
+      // the "end" element. If we don't go through this block, we know that
+      // when the loop terminates, we have <= to kPaginationLimit.
+      if (servers.size() == kPaginationLimit) {
+        reached_pagination_limit = true;
+        break;
+      }
       servers.push_back(entities_[i]);
     }
   }
@@ -138,11 +205,10 @@ char* ChannelzRegistry::InternalGetServers(intptr_t start_server_id) {
           grpc_json_link_child(array_parent, server_json, json_iterator);
     }
   }
-  // For now we do not have any pagination rules. In the future we could
-  // pick a constant for max_channels_sent for a GetServers request.
-  // Tracking: https://github.com/grpc/grpc/issues/16019.
-  json_iterator = grpc_json_create_child(nullptr, json, "end", nullptr,
-                                         GRPC_JSON_TRUE, false);
+  if (!reached_pagination_limit) {
+    grpc_json_create_child(nullptr, json, "end", nullptr, GRPC_JSON_TRUE,
+                           false);
+  }
   char* json_str = grpc_json_dump_to_string(top_level_json, 0);
   grpc_json_destroy(top_level_json);
   return json_str;
@@ -160,6 +226,21 @@ char* grpc_channelz_get_servers(intptr_t start_server_id) {
   return grpc_core::channelz::ChannelzRegistry::GetServers(start_server_id);
 }
 
+char* grpc_channelz_get_server_sockets(intptr_t server_id,
+                                       intptr_t start_socket_id) {
+  grpc_core::channelz::BaseNode* base_node =
+      grpc_core::channelz::ChannelzRegistry::Get(server_id);
+  if (base_node == nullptr ||
+      base_node->type() != grpc_core::channelz::BaseNode::EntityType::kServer) {
+    return nullptr;
+  }
+  // This cast is ok since we have just checked to make sure base_node is
+  // actually a server node
+  grpc_core::channelz::ServerNode* server_node =
+      static_cast<grpc_core::channelz::ServerNode*>(base_node);
+  return server_node->RenderServerSockets(start_socket_id);
+}
+
 char* grpc_channelz_get_channel(intptr_t channel_id) {
   grpc_core::channelz::BaseNode* channel_node =
       grpc_core::channelz::ChannelzRegistry::Get(channel_id);
diff --git a/src/core/lib/channel/channelz_registry.h b/src/core/lib/channel/channelz_registry.h
index d0d660600d..ea6ab6c8e5 100644
--- a/src/core/lib/channel/channelz_registry.h
+++ b/src/core/lib/channel/channelz_registry.h
@@ -30,6 +30,10 @@
 namespace grpc_core {
 namespace channelz {
 
+namespace testing {
+class ChannelzRegistryPeer;
+}
+
 // singleton registry object to track all objects that are needed to support
 // channelz bookkeeping. All objects share globally distributed uuids.
 class ChannelzRegistry {
@@ -40,7 +44,7 @@ class ChannelzRegistry {
   // To be called in grpc_shutdown();
   static void Shutdown();
 
-  static intptr_t Register(BaseNode* node) {
+  static void Register(BaseNode* node) {
     return Default()->InternalRegister(node);
   }
   static void Unregister(intptr_t uuid) { Default()->InternalUnregister(uuid); }
@@ -61,6 +65,7 @@ class ChannelzRegistry {
  private:
   GPRC_ALLOW_CLASS_TO_USE_NON_PUBLIC_NEW
   GPRC_ALLOW_CLASS_TO_USE_NON_PUBLIC_DELETE
+  friend class testing::ChannelzRegistryPeer;
 
   ChannelzRegistry();
   ~ChannelzRegistry();
@@ -69,7 +74,7 @@ class ChannelzRegistry {
   static ChannelzRegistry* Default();
 
   // globally registers an Entry. Returns its unique uuid
-  intptr_t InternalRegister(BaseNode* node);
+  void InternalRegister(BaseNode* node);
 
   // globally unregisters the object that is associated to uuid. Also does
   // sanity check that an object doesn't try to unregister the wrong type.
@@ -82,9 +87,18 @@ class ChannelzRegistry {
   char* InternalGetTopChannels(intptr_t start_channel_id);
   char* InternalGetServers(intptr_t start_server_id);
 
-  // protects entities_ and uuid_
+  // If entities_ has over a certain threshold of empty slots, it will
+  // compact the vector and move all used slots to the front.
+  void MaybePerformCompactionLocked();
+
+  // Performs binary search on entities_ to find the index with that uuid.
+  int FindByUuidLocked(intptr_t uuid);
+
+  // protects members
   gpr_mu mu_;
   InlinedVector<BaseNode*, 20> entities_;
+  intptr_t uuid_generator_ = 0;
+  int num_empty_slots_ = 0;
 };
 
 }  // namespace channelz
diff --git a/src/core/lib/channel/handshaker_factory.cc b/src/core/lib/channel/handshaker_factory.cc
index 4fd43635b6..8ade8fe4e2 100644
--- a/src/core/lib/channel/handshaker_factory.cc
+++ b/src/core/lib/channel/handshaker_factory.cc
@@ -24,11 +24,12 @@
 
 void grpc_handshaker_factory_add_handshakers(
     grpc_handshaker_factory* handshaker_factory, const grpc_channel_args* args,
+    grpc_pollset_set* interested_parties,
     grpc_handshake_manager* handshake_mgr) {
   if (handshaker_factory != nullptr) {
     GPR_ASSERT(handshaker_factory->vtable != nullptr);
-    handshaker_factory->vtable->add_handshakers(handshaker_factory, args,
-                                                handshake_mgr);
+    handshaker_factory->vtable->add_handshakers(
+        handshaker_factory, args, interested_parties, handshake_mgr);
   }
 }
 
diff --git a/src/core/lib/channel/handshaker_factory.h b/src/core/lib/channel/handshaker_factory.h
index 3e45fcf20e..e17a678179 100644
--- a/src/core/lib/channel/handshaker_factory.h
+++ b/src/core/lib/channel/handshaker_factory.h
@@ -32,6 +32,7 @@ typedef struct grpc_handshaker_factory grpc_handshaker_factory;
 typedef struct {
   void (*add_handshakers)(grpc_handshaker_factory* handshaker_factory,
                           const grpc_channel_args* args,
+                          grpc_pollset_set* interested_parties,
                           grpc_handshake_manager* handshake_mgr);
   void (*destroy)(grpc_handshaker_factory* handshaker_factory);
 } grpc_handshaker_factory_vtable;
@@ -42,6 +43,7 @@ struct grpc_handshaker_factory {
 
 void grpc_handshaker_factory_add_handshakers(
     grpc_handshaker_factory* handshaker_factory, const grpc_channel_args* args,
+    grpc_pollset_set* interested_parties,
     grpc_handshake_manager* handshake_mgr);
 
 void grpc_handshaker_factory_destroy(
diff --git a/src/core/lib/channel/handshaker_registry.cc b/src/core/lib/channel/handshaker_registry.cc
index eec3e1b352..fbafc43e79 100644
--- a/src/core/lib/channel/handshaker_registry.cc
+++ b/src/core/lib/channel/handshaker_registry.cc
@@ -51,9 +51,11 @@ static void grpc_handshaker_factory_list_register(
 
 static void grpc_handshaker_factory_list_add_handshakers(
     grpc_handshaker_factory_list* list, const grpc_channel_args* args,
+    grpc_pollset_set* interested_parties,
     grpc_handshake_manager* handshake_mgr) {
   for (size_t i = 0; i < list->num_factories; ++i) {
-    grpc_handshaker_factory_add_handshakers(list->list[i], args, handshake_mgr);
+    grpc_handshaker_factory_add_handshakers(list->list[i], args,
+                                            interested_parties, handshake_mgr);
   }
 }
 
@@ -91,7 +93,9 @@ void grpc_handshaker_factory_register(bool at_start,
 
 void grpc_handshakers_add(grpc_handshaker_type handshaker_type,
                           const grpc_channel_args* args,
+                          grpc_pollset_set* interested_parties,
                           grpc_handshake_manager* handshake_mgr) {
   grpc_handshaker_factory_list_add_handshakers(
-      &g_handshaker_factory_lists[handshaker_type], args, handshake_mgr);
+      &g_handshaker_factory_lists[handshaker_type], args, interested_parties,
+      handshake_mgr);
 }
diff --git a/src/core/lib/channel/handshaker_registry.h b/src/core/lib/channel/handshaker_registry.h
index 82ad9c5b9a..3dd4316de6 100644
--- a/src/core/lib/channel/handshaker_registry.h
+++ b/src/core/lib/channel/handshaker_registry.h
@@ -43,6 +43,7 @@ void grpc_handshaker_factory_register(bool at_start,
 
 void grpc_handshakers_add(grpc_handshaker_type handshaker_type,
                           const grpc_channel_args* args,
+                          grpc_pollset_set* interested_parties,
                           grpc_handshake_manager* handshake_mgr);
 
 #endif /* GRPC_CORE_LIB_CHANNEL_HANDSHAKER_REGISTRY_H */
diff --git a/src/core/lib/gprpp/inlined_vector.h b/src/core/lib/gprpp/inlined_vector.h
index 76e2f0a785..65c2b9634f 100644
--- a/src/core/lib/gprpp/inlined_vector.h
+++ b/src/core/lib/gprpp/inlined_vector.h
@@ -123,6 +123,14 @@ class InlinedVector {
 
   void push_back(T&& value) { emplace_back(std::move(value)); }
 
+  void pop_back() {
+    assert(!empty());
+    size_t s = size();
+    T& value = data()[s - 1];
+    value.~T();
+    size_--;
+  }
+
   void copy_from(const InlinedVector& v) {
     // if v is allocated, copy over the buffer.
     if (v.dynamic_ != nullptr) {
diff --git a/src/core/lib/http/httpcli_security_connector.cc b/src/core/lib/http/httpcli_security_connector.cc
index 50078c37a1..5a5e0b72d5 100644
--- a/src/core/lib/http/httpcli_security_connector.cc
+++ b/src/core/lib/http/httpcli_security_connector.cc
@@ -29,6 +29,7 @@
 #include "src/core/lib/channel/channel_args.h"
 #include "src/core/lib/channel/handshaker_registry.h"
 #include "src/core/lib/gpr/string.h"
+#include "src/core/lib/iomgr/pollset.h"
 #include "src/core/lib/security/transport/security_handshaker.h"
 #include "src/core/lib/slice/slice_internal.h"
 #include "src/core/tsi/ssl_transport_security.h"
@@ -51,6 +52,7 @@ static void httpcli_ssl_destroy(grpc_security_connector* sc) {
 }
 
 static void httpcli_ssl_add_handshakers(grpc_channel_security_connector* sc,
+                                        grpc_pollset_set* interested_parties,
                                         grpc_handshake_manager* handshake_mgr) {
   grpc_httpcli_ssl_channel_security_connector* c =
       reinterpret_cast<grpc_httpcli_ssl_channel_security_connector*>(sc);
@@ -189,7 +191,8 @@ static void ssl_handshake(void* arg, grpc_endpoint* tcp, const char* host,
   grpc_arg channel_arg = grpc_security_connector_to_arg(&sc->base);
   grpc_channel_args args = {1, &channel_arg};
   c->handshake_mgr = grpc_handshake_manager_create();
-  grpc_handshakers_add(HANDSHAKER_CLIENT, &args, c->handshake_mgr);
+  grpc_handshakers_add(HANDSHAKER_CLIENT, &args,
+                       nullptr /* interested_parties */, c->handshake_mgr);
   grpc_handshake_manager_do_handshake(
       c->handshake_mgr, nullptr /* interested_parties */, tcp,
       nullptr /* channel_args */, deadline, nullptr /* acceptor */,
diff --git a/src/core/lib/iomgr/ev_posix.cc b/src/core/lib/iomgr/ev_posix.cc
index 764fb08142..8a7dc7b004 100644
--- a/src/core/lib/iomgr/ev_posix.cc
+++ b/src/core/lib/iomgr/ev_posix.cc
@@ -395,6 +395,4 @@ void grpc_pollset_set_del_fd(grpc_pollset_set* pollset_set, grpc_fd* fd) {
   g_event_engine->pollset_set_del_fd(pollset_set, fd);
 }
 
-void grpc_use_signal(int signum) {}
-
 #endif  // GRPC_POSIX_SOCKET_EV
diff --git a/src/core/lib/iomgr/socket_utils_common_posix.cc b/src/core/lib/iomgr/socket_utils_common_posix.cc
index 50674b0845..bdfc1d70c3 100644
--- a/src/core/lib/iomgr/socket_utils_common_posix.cc
+++ b/src/core/lib/iomgr/socket_utils_common_posix.cc
@@ -307,7 +307,10 @@ grpc_error* grpc_set_socket_tcp_user_timeout(
     }
   }
 #else
-  gpr_log(GPR_INFO, "TCP_USER_TIMEOUT not supported for this platform");
+  extern grpc_core::TraceFlag grpc_tcp_trace;
+  if (grpc_tcp_trace.enabled()) {
+    gpr_log(GPR_INFO, "TCP_USER_TIMEOUT not supported for this platform");
+  }
 #endif /* GRPC_HAVE_TCP_USER_TIMEOUT */
   return GRPC_ERROR_NONE;
 }
diff --git a/src/core/lib/iomgr/tcp_posix.cc b/src/core/lib/iomgr/tcp_posix.cc
index e40bf81c90..aa2704ce26 100644
--- a/src/core/lib/iomgr/tcp_posix.cc
+++ b/src/core/lib/iomgr/tcp_posix.cc
@@ -468,7 +468,9 @@ static void tcp_do_read(grpc_tcp* tcp) {
     GRPC_STATS_INC_TCP_READ_SIZE(read_bytes);
     add_to_estimate(tcp, static_cast<size_t>(read_bytes));
     GPR_ASSERT((size_t)read_bytes <= tcp->incoming_buffer->length);
-    if (static_cast<size_t>(read_bytes) < tcp->incoming_buffer->length) {
+    if (static_cast<size_t>(read_bytes) == tcp->incoming_buffer->length) {
+      finish_estimate(tcp);
+    } else if (static_cast<size_t>(read_bytes) < tcp->incoming_buffer->length) {
       grpc_slice_buffer_trim_end(
           tcp->incoming_buffer,
           tcp->incoming_buffer->length - static_cast<size_t>(read_bytes),
@@ -498,7 +500,7 @@ static void tcp_read_allocation_done(void* tcpp, grpc_error* error) {
 
 static void tcp_continue_read(grpc_tcp* tcp) {
   size_t target_read_size = get_target_read_size(tcp);
-  if (tcp->incoming_buffer->length < target_read_size &&
+  if (tcp->incoming_buffer->length < target_read_size / 2 &&
       tcp->incoming_buffer->count < MAX_READ_IOVEC) {
     if (grpc_tcp_trace.enabled()) {
       gpr_log(GPR_INFO, "TCP:%p alloc_slices", tcp);
diff --git a/src/core/lib/iomgr/timer_manager.cc b/src/core/lib/iomgr/timer_manager.cc
index 54f19a9259..ceba79f678 100644
--- a/src/core/lib/iomgr/timer_manager.cc
+++ b/src/core/lib/iomgr/timer_manager.cc
@@ -245,7 +245,7 @@ static void timer_main_loop() {
           gpr_log(GPR_INFO, "timers not checked: expect another thread to");
         }
         next = GRPC_MILLIS_INF_FUTURE;
-      /* fall through */
+      // fallthrough
       case GRPC_TIMERS_CHECKED_AND_EMPTY:
         if (!wait_until(next)) {
           return;
diff --git a/src/core/lib/iomgr/wakeup_fd_eventfd.cc b/src/core/lib/iomgr/wakeup_fd_eventfd.cc
index dcf7dab71f..d68c9ada1f 100644
--- a/src/core/lib/iomgr/wakeup_fd_eventfd.cc
+++ b/src/core/lib/iomgr/wakeup_fd_eventfd.cc
@@ -32,12 +32,11 @@
 #include "src/core/lib/profiling/timers.h"
 
 static grpc_error* eventfd_create(grpc_wakeup_fd* fd_info) {
-  int efd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
-  if (efd < 0) {
+  fd_info->read_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+  fd_info->write_fd = -1;
+  if (fd_info->read_fd < 0) {
     return GRPC_OS_ERROR(errno, "eventfd");
   }
-  fd_info->read_fd = efd;
-  fd_info->write_fd = -1;
   return GRPC_ERROR_NONE;
 }
 
diff --git a/src/core/lib/security/credentials/plugin/plugin_credentials.cc b/src/core/lib/security/credentials/plugin/plugin_credentials.cc
index 73946ce039..4015124298 100644
--- a/src/core/lib/security/credentials/plugin/plugin_credentials.cc
+++ b/src/core/lib/security/credentials/plugin/plugin_credentials.cc
@@ -102,8 +102,7 @@ static grpc_error* process_plugin_result(
     } else {
       for (size_t i = 0; i < num_md; ++i) {
         grpc_mdelem mdelem =
-            grpc_mdelem_from_slices(grpc_slice_ref_internal(md[i].key),
-                                    grpc_slice_ref_internal(md[i].value));
+            grpc_mdelem_create(md[i].key, md[i].value, nullptr);
         grpc_credentials_mdelem_array_add(r->md_array, mdelem);
         GRPC_MDELEM_UNREF(mdelem);
       }
diff --git a/src/core/lib/security/security_connector/alts_security_connector.cc b/src/core/lib/security/security_connector/alts_security_connector.cc
index 35a787871a..d38c0ff044 100644
--- a/src/core/lib/security/security_connector/alts_security_connector.cc
+++ b/src/core/lib/security/security_connector/alts_security_connector.cc
@@ -64,29 +64,29 @@ static void alts_server_destroy(grpc_security_connector* sc) {
 }
 
 static void alts_channel_add_handshakers(
-    grpc_channel_security_connector* sc,
+    grpc_channel_security_connector* sc, grpc_pollset_set* interested_parties,
     grpc_handshake_manager* handshake_manager) {
   tsi_handshaker* handshaker = nullptr;
   auto c = reinterpret_cast<grpc_alts_channel_security_connector*>(sc);
   grpc_alts_credentials* creds =
       reinterpret_cast<grpc_alts_credentials*>(c->base.channel_creds);
-  GPR_ASSERT(alts_tsi_handshaker_create(creds->options, c->target_name,
-                                        creds->handshaker_service_url, true,
-                                        &handshaker) == TSI_OK);
+  GPR_ASSERT(alts_tsi_handshaker_create(
+                 creds->options, c->target_name, creds->handshaker_service_url,
+                 true, interested_parties, &handshaker) == TSI_OK);
   grpc_handshake_manager_add(handshake_manager, grpc_security_handshaker_create(
                                                     handshaker, &sc->base));
 }
 
 static void alts_server_add_handshakers(
-    grpc_server_security_connector* sc,
+    grpc_server_security_connector* sc, grpc_pollset_set* interested_parties,
     grpc_handshake_manager* handshake_manager) {
   tsi_handshaker* handshaker = nullptr;
   auto c = reinterpret_cast<grpc_alts_server_security_connector*>(sc);
   grpc_alts_server_credentials* creds =
       reinterpret_cast<grpc_alts_server_credentials*>(c->base.server_creds);
-  GPR_ASSERT(alts_tsi_handshaker_create(creds->options, nullptr,
-                                        creds->handshaker_service_url, false,
-                                        &handshaker) == TSI_OK);
+  GPR_ASSERT(alts_tsi_handshaker_create(
+                 creds->options, nullptr, creds->handshaker_service_url, false,
+                 interested_parties, &handshaker) == TSI_OK);
   grpc_handshake_manager_add(handshake_manager, grpc_security_handshaker_create(
                                                     handshaker, &sc->base));
 }
diff --git a/src/core/lib/security/security_connector/local_security_connector.cc b/src/core/lib/security/security_connector/local_security_connector.cc
index c436a7906b..911013ae58 100644
--- a/src/core/lib/security/security_connector/local_security_connector.cc
+++ b/src/core/lib/security/security_connector/local_security_connector.cc
@@ -30,6 +30,7 @@
 
 #include "src/core/ext/filters/client_channel/client_channel.h"
 #include "src/core/lib/channel/channel_args.h"
+#include "src/core/lib/iomgr/pollset.h"
 #include "src/core/lib/security/credentials/local/local_credentials.h"
 #include "src/core/lib/security/transport/security_handshaker.h"
 #include "src/core/tsi/local_transport_security.h"
@@ -68,7 +69,7 @@ static void local_server_destroy(grpc_security_connector* sc) {
 }
 
 static void local_channel_add_handshakers(
-    grpc_channel_security_connector* sc,
+    grpc_channel_security_connector* sc, grpc_pollset_set* interested_parties,
     grpc_handshake_manager* handshake_manager) {
   tsi_handshaker* handshaker = nullptr;
   GPR_ASSERT(local_tsi_handshaker_create(true /* is_client */, &handshaker) ==
@@ -78,7 +79,7 @@ static void local_channel_add_handshakers(
 }
 
 static void local_server_add_handshakers(
-    grpc_server_security_connector* sc,
+    grpc_server_security_connector* sc, grpc_pollset_set* interested_parties,
     grpc_handshake_manager* handshake_manager) {
   tsi_handshaker* handshaker = nullptr;
   GPR_ASSERT(local_tsi_handshaker_create(false /* is_client */, &handshaker) ==
diff --git a/src/core/lib/security/security_connector/security_connector.cc b/src/core/lib/security/security_connector/security_connector.cc
index 6246613e7b..7028ae8d16 100644
--- a/src/core/lib/security/security_connector/security_connector.cc
+++ b/src/core/lib/security/security_connector/security_connector.cc
@@ -120,17 +120,19 @@ const tsi_peer_property* tsi_peer_get_property_by_name(const tsi_peer* peer,
 
 void grpc_channel_security_connector_add_handshakers(
     grpc_channel_security_connector* connector,
+    grpc_pollset_set* interested_parties,
     grpc_handshake_manager* handshake_mgr) {
   if (connector != nullptr) {
-    connector->add_handshakers(connector, handshake_mgr);
+    connector->add_handshakers(connector, interested_parties, handshake_mgr);
   }
 }
 
 void grpc_server_security_connector_add_handshakers(
     grpc_server_security_connector* connector,
+    grpc_pollset_set* interested_parties,
     grpc_handshake_manager* handshake_mgr) {
   if (connector != nullptr) {
-    connector->add_handshakers(connector, handshake_mgr);
+    connector->add_handshakers(connector, interested_parties, handshake_mgr);
   }
 }
 
@@ -519,7 +521,7 @@ static void fake_channel_cancel_check_call_host(
 }
 
 static void fake_channel_add_handshakers(
-    grpc_channel_security_connector* sc,
+    grpc_channel_security_connector* sc, grpc_pollset_set* interested_parties,
     grpc_handshake_manager* handshake_mgr) {
   grpc_handshake_manager_add(
       handshake_mgr,
@@ -528,6 +530,7 @@ static void fake_channel_add_handshakers(
 }
 
 static void fake_server_add_handshakers(grpc_server_security_connector* sc,
+                                        grpc_pollset_set* interested_parties,
                                         grpc_handshake_manager* handshake_mgr) {
   grpc_handshake_manager_add(
       handshake_mgr,
@@ -669,6 +672,7 @@ static void ssl_server_destroy(grpc_security_connector* sc) {
 }
 
 static void ssl_channel_add_handshakers(grpc_channel_security_connector* sc,
+                                        grpc_pollset_set* interested_parties,
                                         grpc_handshake_manager* handshake_mgr) {
   grpc_ssl_channel_security_connector* c =
       reinterpret_cast<grpc_ssl_channel_security_connector*>(sc);
@@ -779,6 +783,7 @@ static bool try_fetch_ssl_server_credentials(
 }
 
 static void ssl_server_add_handshakers(grpc_server_security_connector* sc,
+                                       grpc_pollset_set* interested_parties,
                                        grpc_handshake_manager* handshake_mgr) {
   grpc_ssl_server_security_connector* c =
       reinterpret_cast<grpc_ssl_server_security_connector*>(sc);
diff --git a/src/core/lib/security/security_connector/security_connector.h b/src/core/lib/security/security_connector/security_connector.h
index 67a506b576..d8df3cd72f 100644
--- a/src/core/lib/security/security_connector/security_connector.h
+++ b/src/core/lib/security/security_connector/security_connector.h
@@ -27,6 +27,7 @@
 
 #include "src/core/lib/channel/handshaker.h"
 #include "src/core/lib/iomgr/endpoint.h"
+#include "src/core/lib/iomgr/pollset.h"
 #include "src/core/lib/iomgr/tcp_server.h"
 #include "src/core/tsi/ssl_transport_security.h"
 #include "src/core/tsi/transport_security_interface.h"
@@ -125,6 +126,7 @@ struct grpc_channel_security_connector {
                                  grpc_closure* on_call_host_checked,
                                  grpc_error* error);
   void (*add_handshakers)(grpc_channel_security_connector* sc,
+                          grpc_pollset_set* interested_parties,
                           grpc_handshake_manager* handshake_mgr);
 };
 
@@ -151,6 +153,7 @@ void grpc_channel_security_connector_cancel_check_call_host(
 /* Registers handshakers with \a handshake_mgr. */
 void grpc_channel_security_connector_add_handshakers(
     grpc_channel_security_connector* connector,
+    grpc_pollset_set* interested_parties,
     grpc_handshake_manager* handshake_mgr);
 
 /* --- server_security_connector object. ---
@@ -164,6 +167,7 @@ struct grpc_server_security_connector {
   grpc_security_connector base;
   grpc_server_credentials* server_creds;
   void (*add_handshakers)(grpc_server_security_connector* sc,
+                          grpc_pollset_set* interested_parties,
                           grpc_handshake_manager* handshake_mgr);
 };
 
@@ -172,7 +176,8 @@ int grpc_server_security_connector_cmp(grpc_server_security_connector* sc1,
                                        grpc_server_security_connector* sc2);
 
 void grpc_server_security_connector_add_handshakers(
-    grpc_server_security_connector* sc, grpc_handshake_manager* handshake_mgr);
+    grpc_server_security_connector* sc, grpc_pollset_set* interested_parties,
+    grpc_handshake_manager* handshake_mgr);
 
 /* --- Creation security connectors. --- */
 
diff --git a/src/core/lib/security/transport/security_handshaker.cc b/src/core/lib/security/transport/security_handshaker.cc
index d76d582638..4d6b133809 100644
--- a/src/core/lib/security/transport/security_handshaker.cc
+++ b/src/core/lib/security/transport/security_handshaker.cc
@@ -475,22 +475,24 @@ static grpc_handshaker* fail_handshaker_create() {
 
 static void client_handshaker_factory_add_handshakers(
     grpc_handshaker_factory* handshaker_factory, const grpc_channel_args* args,
+    grpc_pollset_set* interested_parties,
     grpc_handshake_manager* handshake_mgr) {
   grpc_channel_security_connector* security_connector =
       reinterpret_cast<grpc_channel_security_connector*>(
           grpc_security_connector_find_in_args(args));
-  grpc_channel_security_connector_add_handshakers(security_connector,
-                                                  handshake_mgr);
+  grpc_channel_security_connector_add_handshakers(
+      security_connector, interested_parties, handshake_mgr);
 }
 
 static void server_handshaker_factory_add_handshakers(
     grpc_handshaker_factory* hf, const grpc_channel_args* args,
+    grpc_pollset_set* interested_parties,
     grpc_handshake_manager* handshake_mgr) {
   grpc_server_security_connector* security_connector =
       reinterpret_cast<grpc_server_security_connector*>(
           grpc_security_connector_find_in_args(args));
-  grpc_server_security_connector_add_handshakers(security_connector,
-                                                 handshake_mgr);
+  grpc_server_security_connector_add_handshakers(
+      security_connector, interested_parties, handshake_mgr);
 }
 
 static void handshaker_factory_destroy(
diff --git a/src/core/lib/surface/channel.cc b/src/core/lib/surface/channel.cc
index 4294434504..d7095c24d4 100644
--- a/src/core/lib/surface/channel.cc
+++ b/src/core/lib/surface/channel.cc
@@ -336,9 +336,8 @@ grpc_call* grpc_channel_create_call(grpc_channel* channel,
   grpc_core::ExecCtx exec_ctx;
   grpc_call* call = grpc_channel_create_call_internal(
       channel, parent_call, propagation_mask, cq, nullptr,
-      grpc_mdelem_from_slices(GRPC_MDSTR_PATH, grpc_slice_ref_internal(method)),
-      host != nullptr ? grpc_mdelem_from_slices(GRPC_MDSTR_AUTHORITY,
-                                                grpc_slice_ref_internal(*host))
+      grpc_mdelem_create(GRPC_MDSTR_PATH, method, nullptr),
+      host != nullptr ? grpc_mdelem_create(GRPC_MDSTR_AUTHORITY, *host, nullptr)
                       : GRPC_MDNULL,
       grpc_timespec_to_millis_round_up(deadline));
 
@@ -347,14 +346,13 @@ grpc_call* grpc_channel_create_call(grpc_channel* channel,
 
 grpc_call* grpc_channel_create_pollset_set_call(
     grpc_channel* channel, grpc_call* parent_call, uint32_t propagation_mask,
-    grpc_pollset_set* pollset_set, grpc_slice method, const grpc_slice* host,
-    grpc_millis deadline, void* reserved) {
+    grpc_pollset_set* pollset_set, const grpc_slice& method,
+    const grpc_slice* host, grpc_millis deadline, void* reserved) {
   GPR_ASSERT(!reserved);
   return grpc_channel_create_call_internal(
       channel, parent_call, propagation_mask, nullptr, pollset_set,
-      grpc_mdelem_from_slices(GRPC_MDSTR_PATH, grpc_slice_ref_internal(method)),
-      host != nullptr ? grpc_mdelem_from_slices(GRPC_MDSTR_AUTHORITY,
-                                                grpc_slice_ref_internal(*host))
+      grpc_mdelem_create(GRPC_MDSTR_PATH, method, nullptr),
+      host != nullptr ? grpc_mdelem_create(GRPC_MDSTR_AUTHORITY, *host, nullptr)
                       : GRPC_MDNULL,
       deadline);
 }
diff --git a/src/core/lib/surface/channel.h b/src/core/lib/surface/channel.h
index e5ff2c3596..4ac76b8a29 100644
--- a/src/core/lib/surface/channel.h
+++ b/src/core/lib/surface/channel.h
@@ -45,8 +45,8 @@ grpc_channel* grpc_channel_create_with_builder(
     value of \a propagation_mask (see propagation_bits.h for possible values) */
 grpc_call* grpc_channel_create_pollset_set_call(
     grpc_channel* channel, grpc_call* parent_call, uint32_t propagation_mask,
-    grpc_pollset_set* pollset_set, grpc_slice method, const grpc_slice* host,
-    grpc_millis deadline, void* reserved);
+    grpc_pollset_set* pollset_set, const grpc_slice& method,
+    const grpc_slice* host, grpc_millis deadline, void* reserved);
 
 /** Get a (borrowed) pointer to this channels underlying channel stack */
 grpc_channel_stack* grpc_channel_get_channel_stack(grpc_channel* channel);
diff --git a/src/core/lib/surface/completion_queue.cc b/src/core/lib/surface/completion_queue.cc
index 5dc9991f70..b81ae73b4d 100644
--- a/src/core/lib/surface/completion_queue.cc
+++ b/src/core/lib/surface/completion_queue.cc
@@ -79,6 +79,7 @@ typedef struct non_polling_worker {
 
 typedef struct {
   gpr_mu mu;
+  bool kicked_without_poller;
   non_polling_worker* root;
   grpc_closure* shutdown;
 } non_polling_poller;
@@ -103,6 +104,10 @@ static grpc_error* non_polling_poller_work(grpc_pollset* pollset,
                                            grpc_millis deadline) {
   non_polling_poller* npp = reinterpret_cast<non_polling_poller*>(pollset);
   if (npp->shutdown) return GRPC_ERROR_NONE;
+  if (npp->kicked_without_poller) {
+    npp->kicked_without_poller = false;
+    return GRPC_ERROR_NONE;
+  }
   non_polling_worker w;
   gpr_cv_init(&w.cv);
   if (worker != nullptr) *worker = reinterpret_cast<grpc_pollset_worker*>(&w);
@@ -148,6 +153,8 @@ static grpc_error* non_polling_poller_kick(
       w->kicked = true;
       gpr_cv_signal(&w->cv);
     }
+  } else {
+    p->kicked_without_poller = true;
   }
   return GRPC_ERROR_NONE;
 }
diff --git a/src/core/lib/surface/server.cc b/src/core/lib/surface/server.cc
index 449aef768e..35ab2c3bce 100644
--- a/src/core/lib/surface/server.cc
+++ b/src/core/lib/surface/server.cc
@@ -54,6 +54,7 @@ struct listener {
                 size_t pollset_count);
   void (*destroy)(grpc_server* server, void* arg, grpc_closure* closure);
   struct listener* next;
+  intptr_t socket_uuid;
   grpc_closure destroy_done;
 };
 
@@ -104,6 +105,7 @@ struct channel_data {
   uint32_t registered_method_max_probes;
   grpc_closure finish_destroy_channel_closure;
   grpc_closure channel_connectivity_changed;
+  intptr_t socket_uuid;
 };
 
 typedef struct shutdown_tag {
@@ -1016,7 +1018,7 @@ grpc_server* grpc_server_create(const grpc_channel_args* args, void* reserved) {
         {GRPC_MAX_CHANNEL_TRACE_EVENT_MEMORY_PER_NODE_DEFAULT, 0, INT_MAX});
     server->channelz_server =
         grpc_core::MakeRefCounted<grpc_core::channelz::ServerNode>(
-            channel_tracer_max_memory);
+            server, channel_tracer_max_memory);
     server->channelz_server->AddTraceEvent(
         grpc_core::channelz::ChannelTrace::Severity::Info,
         grpc_slice_from_static_string("Server created"));
@@ -1119,7 +1121,8 @@ void grpc_server_get_pollsets(grpc_server* server, grpc_pollset*** pollsets,
 
 void grpc_server_setup_transport(grpc_server* s, grpc_transport* transport,
                                  grpc_pollset* accepting_pollset,
-                                 const grpc_channel_args* args) {
+                                 const grpc_channel_args* args,
+                                 intptr_t socket_uuid) {
   size_t num_registered_methods;
   size_t alloc;
   registered_method* rm;
@@ -1139,6 +1142,7 @@ void grpc_server_setup_transport(grpc_server* s, grpc_transport* transport,
   chand->server = s;
   server_ref(s);
   chand->channel = channel;
+  chand->socket_uuid = socket_uuid;
 
   size_t cq_idx;
   for (cq_idx = 0; cq_idx < s->cq_count; cq_idx++) {
@@ -1213,6 +1217,29 @@ void grpc_server_setup_transport(grpc_server* s, grpc_transport* transport,
   grpc_transport_perform_op(transport, op);
 }
 
+void grpc_server_populate_server_sockets(
+    grpc_server* s, grpc_core::channelz::ChildRefsList* server_sockets,
+    intptr_t start_idx) {
+  gpr_mu_lock(&s->mu_global);
+  channel_data* c = nullptr;
+  for (c = s->root_channel_data.next; c != &s->root_channel_data; c = c->next) {
+    intptr_t socket_uuid = c->socket_uuid;
+    if (socket_uuid >= start_idx) {
+      server_sockets->push_back(socket_uuid);
+    }
+  }
+  gpr_mu_unlock(&s->mu_global);
+}
+
+void grpc_server_populate_listen_sockets(
+    grpc_server* server, grpc_core::channelz::ChildRefsList* listen_sockets) {
+  gpr_mu_lock(&server->mu_global);
+  for (listener* l = server->listeners; l != nullptr; l = l->next) {
+    listen_sockets->push_back(l->socket_uuid);
+  }
+  gpr_mu_unlock(&server->mu_global);
+}
+
 void done_published_shutdown(void* done_arg, grpc_cq_completion* storage) {
   (void)done_arg;
   gpr_free(storage);
@@ -1346,11 +1373,13 @@ void grpc_server_add_listener(grpc_server* server, void* arg,
                                             grpc_pollset** pollsets,
                                             size_t pollset_count),
                               void (*destroy)(grpc_server* server, void* arg,
-                                              grpc_closure* on_done)) {
+                                              grpc_closure* on_done),
+                              intptr_t socket_uuid) {
   listener* l = static_cast<listener*>(gpr_malloc(sizeof(listener)));
   l->arg = arg;
   l->start = start;
   l->destroy = destroy;
+  l->socket_uuid = socket_uuid;
   l->next = server->listeners;
   server->listeners = l;
 }
diff --git a/src/core/lib/surface/server.h b/src/core/lib/surface/server.h
index 0196743ff9..33c205417e 100644
--- a/src/core/lib/surface/server.h
+++ b/src/core/lib/surface/server.h
@@ -39,13 +39,24 @@ void grpc_server_add_listener(grpc_server* server, void* listener,
                                             grpc_pollset** pollsets,
                                             size_t npollsets),
                               void (*destroy)(grpc_server* server, void* arg,
-                                              grpc_closure* on_done));
+                                              grpc_closure* on_done),
+                              intptr_t socket_uuid);
 
 /* Setup a transport - creates a channel stack, binds the transport to the
    server */
 void grpc_server_setup_transport(grpc_server* server, grpc_transport* transport,
                                  grpc_pollset* accepting_pollset,
-                                 const grpc_channel_args* args);
+                                 const grpc_channel_args* args,
+                                 intptr_t socket_uuid);
+
+/* fills in the uuids of all sockets used for connections on this server */
+void grpc_server_populate_server_sockets(
+    grpc_server* server, grpc_core::channelz::ChildRefsList* server_sockets,
+    intptr_t start_idx);
+
+/* fills in the uuids of all listen sockets on this server */
+void grpc_server_populate_listen_sockets(
+    grpc_server* server, grpc_core::channelz::ChildRefsList* listen_sockets);
 
 grpc_core::channelz::ServerNode* grpc_server_get_channelz_node(
     grpc_server* server);
diff --git a/src/core/lib/surface/version.cc b/src/core/lib/surface/version.cc
index a44f9acdc3..66890ce65a 100644
--- a/src/core/lib/surface/version.cc
+++ b/src/core/lib/surface/version.cc
@@ -23,6 +23,6 @@
 
 #include <grpc/grpc.h>
 
-const char* grpc_version_string(void) { return "6.0.0-dev"; }
+const char* grpc_version_string(void) { return "7.0.0-dev"; }
 
-const char* grpc_g_stands_for(void) { return "gao"; }
+const char* grpc_g_stands_for(void) { return "gizmo"; }
diff --git a/src/core/lib/transport/metadata.cc b/src/core/lib/transport/metadata.cc
index d164502280..60af22393e 100644
--- a/src/core/lib/transport/metadata.cc
+++ b/src/core/lib/transport/metadata.cc
@@ -237,7 +237,7 @@ static void rehash_mdtab(mdtab_shard* shard) {
 }
 
 grpc_mdelem grpc_mdelem_create(
-    grpc_slice key, grpc_slice value,
+    const grpc_slice& key, const grpc_slice& value,
     grpc_mdelem_data* compatible_external_backing_store) {
   if (!grpc_slice_is_interned(key) || !grpc_slice_is_interned(value)) {
     if (compatible_external_backing_store != nullptr) {
@@ -324,7 +324,8 @@ grpc_mdelem grpc_mdelem_create(
   return GRPC_MAKE_MDELEM(md, GRPC_MDELEM_STORAGE_INTERNED);
 }
 
-grpc_mdelem grpc_mdelem_from_slices(grpc_slice key, grpc_slice value) {
+grpc_mdelem grpc_mdelem_from_slices(const grpc_slice& key,
+                                    const grpc_slice& value) {
   grpc_mdelem out = grpc_mdelem_create(key, value, nullptr);
   grpc_slice_unref_internal(key);
   grpc_slice_unref_internal(value);
diff --git a/src/core/lib/transport/metadata.h b/src/core/lib/transport/metadata.h
index 338082276c..989c7544c1 100644
--- a/src/core/lib/transport/metadata.h
+++ b/src/core/lib/transport/metadata.h
@@ -109,7 +109,8 @@ struct grpc_mdelem {
                               (uintptr_t)GRPC_MDELEM_STORAGE_INTERNED_BIT))
 
 /* Unrefs the slices. */
-grpc_mdelem grpc_mdelem_from_slices(grpc_slice key, grpc_slice value);
+grpc_mdelem grpc_mdelem_from_slices(const grpc_slice& key,
+                                    const grpc_slice& value);
 
 /* Cheaply convert a grpc_metadata to a grpc_mdelem; may use the grpc_metadata
    object as backing storage (so lifetimes should align) */
@@ -120,7 +121,7 @@ grpc_mdelem grpc_mdelem_from_grpc_metadata(grpc_metadata* metadata);
    compatible_external_backing_store if it is non-NULL (in which case it's the
    users responsibility to ensure that it outlives usage) */
 grpc_mdelem grpc_mdelem_create(
-    grpc_slice key, grpc_slice value,
+    const grpc_slice& key, const grpc_slice& value,
     grpc_mdelem_data* compatible_external_backing_store);
 
 bool grpc_mdelem_eq(grpc_mdelem a, grpc_mdelem b);
diff --git a/src/core/lib/transport/metadata_batch.cc b/src/core/lib/transport/metadata_batch.cc
index 761aad19c4..928ed73cda 100644
--- a/src/core/lib/transport/metadata_batch.cc
+++ b/src/core/lib/transport/metadata_batch.cc
@@ -139,6 +139,7 @@ static void link_head(grpc_mdelem_list* list, grpc_linked_mdelem* storage) {
   GPR_ASSERT(!GRPC_MDISNULL(storage->md));
   storage->prev = nullptr;
   storage->next = list->head;
+  storage->reserved = nullptr;
   if (list->head != nullptr) {
     list->head->prev = storage;
   } else {
diff --git a/src/core/tsi/alts/handshaker/alts_tsi_handshaker.cc b/src/core/tsi/alts/handshaker/alts_tsi_handshaker.cc
index 34608a3de1..dfdd659b87 100644
--- a/src/core/tsi/alts/handshaker/alts_tsi_handshaker.cc
+++ b/src/core/tsi/alts/handshaker/alts_tsi_handshaker.cc
@@ -347,7 +347,8 @@ static void init_shared_resources(const char* handshaker_service_url) {
 
 tsi_result alts_tsi_handshaker_create(
     const grpc_alts_credentials_options* options, const char* target_name,
-    const char* handshaker_service_url, bool is_client, tsi_handshaker** self) {
+    const char* handshaker_service_url, bool is_client,
+    grpc_pollset_set* interested_parties, tsi_handshaker** self) {
   if (handshaker_service_url == nullptr || self == nullptr ||
       options == nullptr || (is_client && target_name == nullptr)) {
     gpr_log(GPR_ERROR, "Invalid arguments to alts_tsi_handshaker_create()");
diff --git a/src/core/tsi/alts/handshaker/alts_tsi_handshaker.h b/src/core/tsi/alts/handshaker/alts_tsi_handshaker.h
index 227b30ce53..48ce69b1da 100644
--- a/src/core/tsi/alts/handshaker/alts_tsi_handshaker.h
+++ b/src/core/tsi/alts/handshaker/alts_tsi_handshaker.h
@@ -23,6 +23,7 @@
 
 #include <grpc/grpc.h>
 
+#include "src/core/lib/iomgr/pollset_set.h"
 #include "src/core/lib/security/credentials/alts/grpc_alts_credentials_options.h"
 #include "src/core/tsi/alts_transport_security.h"
 #include "src/core/tsi/transport_security.h"
@@ -51,6 +52,7 @@ typedef struct alts_tsi_handshaker alts_tsi_handshaker;
  *   "host:port".
  * - is_client: boolean value indicating if the handshaker is used at the client
  *   (is_client = true) or server (is_client = false) side.
+ * - interested_parties: set of pollsets interested in this connection.
  * - self: address of ALTS TSI handshaker instance to be returned from the
  *   method.
  *
@@ -58,7 +60,8 @@ typedef struct alts_tsi_handshaker alts_tsi_handshaker;
  */
 tsi_result alts_tsi_handshaker_create(
     const grpc_alts_credentials_options* options, const char* target_name,
-    const char* handshaker_service_url, bool is_client, tsi_handshaker** self);
+    const char* handshaker_service_url, bool is_client,
+    grpc_pollset_set* interested_parties, tsi_handshaker** self);
 
 /**
  * This method handles handshaker response returned from ALTS handshaker
author	Muxi Yan <mxyan@google.com>	2018-10-18 09:38:26 -0700
committer	Muxi Yan <mxyan@google.com>	2018-10-18 09:38:26 -0700
commit	62693ec1bd0fe0133a3d382ef2e11ffa935e9904 (patch)
tree	f84b6c168f55ef2f4b8661ab64960fb11b0f7d76 /src/core
parent	be4ab30899f6ae91e07f02c33297ca462f778296 (diff)
parent	d2704ecca0b6cc43e8dfde6dd64875b4fb71d6cd (diff)