aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/core/ext/lb_policy/grpclb/grpclb.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/ext/lb_policy/grpclb/grpclb.c')
-rw-r--r--src/core/ext/lb_policy/grpclb/grpclb.c1039
1 files changed, 1039 insertions, 0 deletions
diff --git a/src/core/ext/lb_policy/grpclb/grpclb.c b/src/core/ext/lb_policy/grpclb/grpclb.c
new file mode 100644
index 0000000000..dec25efe61
--- /dev/null
+++ b/src/core/ext/lb_policy/grpclb/grpclb.c
@@ -0,0 +1,1039 @@
+/*
+ *
+ * Copyright 2016, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/** Implementation of the gRPC LB policy.
+ *
+ * This policy takes as input a set of resolved addresses {a1..an} for which the
+ * LB set was set (it's the resolver's responsibility to ensure this). That is
+ * to say, {a1..an} represent a collection of LB servers.
+ *
+ * An internal channel (\a glb_lb_policy.lb_channel) is created over {a1..an}.
+ * This channel behaves just like a regular channel. In particular, the
+ * constructed URI over the addresses a1..an will use the default pick first
+ * policy to select from this list of LB server backends.
+ *
+ * The first time the policy gets a request for a pick, a ping, or to exit the
+ * idle state, \a query_for_backends() is called. It creates an instance of \a
+ * lb_client_data, an internal struct meant to contain the data associated with
+ * the internal communication with the LB server. This instance is created via
+ * \a lb_client_data_create(). There, the call over lb_channel to pick-first
+ * from {a1..an} is created, the \a LoadBalancingRequest message is assembled
+ * and all necessary callbacks for the progress of the internal call configured.
+ *
+ * Back in \a query_for_backends(), the internal *streaming* call to the LB
+ * server (whichever address from {a1..an} pick-first chose) is kicked off.
+ * It'll progress over the callbacks configured in \a lb_client_data_create()
+ * (see the field docstrings of \a lb_client_data for more details).
+ *
+ * If the call fails with UNIMPLEMENTED, the original call will also fail.
+ * There's a misconfiguration somewhere: at least one of {a1..an} isn't a LB
+ * server, which contradicts the LB bit being set. If the internal call times
+ * out, the usual behavior of pick-first applies, continuing to pick from the
+ * list {a1..an}.
+ *
+ * Upon sucesss, a \a LoadBalancingResponse is expected in \a res_recv_cb. An
+ * invalid one results in the termination of the streaming call. A new streaming
+ * call should be created if possible, failing the original call otherwise.
+ * For a valid \a LoadBalancingResponse, the server list of actual backends is
+ * extracted. A Round Robin policy will be created from this list. There are two
+ * possible scenarios:
+ *
+ * 1. This is the first server list received. There was no previous instance of
+ * the Round Robin policy. \a rr_handover() will instantiate the RR policy
+ * and perform all the pending operations over it.
+ * 2. There's already a RR policy instance active. We need to introduce the new
+ * one build from the new serverlist, but taking care not to disrupt the
+ * operations in progress over the old RR instance. This is done by
+ * decreasing the reference count on the old policy. The moment no more
+ * references are held on the old RR policy, it'll be destroyed and \a
+ * rr_connectivity_changed notified with a \a GRPC_CHANNEL_SHUTDOWN state.
+ * At this point we can transition to a new RR instance safely, which is done
+ * once again via \a rr_handover().
+ *
+ *
+ * Once a RR policy instance is in place (and getting updated as described),
+ * calls to for a pick, a ping or a cancellation will be serviced right away by
+ * forwarding them to the RR instance. Any time there's no RR policy available
+ * (ie, right after the creation of the gRPCLB policy, if an empty serverlist
+ * is received, etc), pick/ping requests are added to a list of pending
+ * picks/pings to be flushed and serviced as part of \a rr_handover() the moment
+ * the RR policy instance becomes available.
+ *
+ * \see https://github.com/grpc/grpc/blob/master/doc/load-balancing.md for the
+ * high level design and details. */
+
+/* TODO(dgq):
+ * - Implement LB service forwarding (point 2c. in the doc's diagram).
+ */
+
+#include <string.h>
+
+#include <grpc/byte_buffer_reader.h>
+#include <grpc/grpc.h>
+#include <grpc/support/alloc.h>
+#include <grpc/support/host_port.h>
+#include <grpc/support/string_util.h>
+
+#include "src/core/ext/client_config/client_channel_factory.h"
+#include "src/core/ext/client_config/lb_policy_registry.h"
+#include "src/core/ext/client_config/parse_address.h"
+#include "src/core/ext/lb_policy/grpclb/grpclb.h"
+#include "src/core/ext/lb_policy/grpclb/load_balancer_api.h"
+#include "src/core/lib/iomgr/sockaddr_utils.h"
+#include "src/core/lib/support/string.h"
+#include "src/core/lib/surface/call.h"
+#include "src/core/lib/surface/channel.h"
+
+int grpc_lb_glb_trace = 0;
+
+typedef struct wrapped_rr_closure_arg {
+ /* the original closure. Usually a on_complete/notify cb for pick() and ping()
+ * calls against the internal RR instance, respectively. */
+ grpc_closure *wrapped_closure;
+
+ /* The RR instance related to the closure */
+ grpc_lb_policy *rr_policy;
+
+ /* when not NULL, represents a pending_{pick,ping} node to be freed upon
+ * closure execution */
+ void *owning_pending_node; /* to be freed if not NULL */
+} wrapped_rr_closure_arg;
+
+/* The \a on_complete closure passed as part of the pick requires keeping a
+ * reference to its associated round robin instance. We wrap this closure in
+ * order to unref the round robin instance upon its invocation */
+static void wrapped_rr_closure(grpc_exec_ctx *exec_ctx, void *arg,
+ grpc_error *error) {
+ wrapped_rr_closure_arg *wc_arg = arg;
+ if (wc_arg->rr_policy != NULL) {
+ if (grpc_lb_glb_trace) {
+ gpr_log(GPR_INFO, "Unreffing RR (0x%" PRIxPTR ")",
+ (intptr_t)wc_arg->rr_policy);
+ }
+ GRPC_LB_POLICY_UNREF(exec_ctx, wc_arg->rr_policy, "wrapped_rr_closure");
+ }
+ GPR_ASSERT(wc_arg->wrapped_closure != NULL);
+ grpc_exec_ctx_sched(exec_ctx, wc_arg->wrapped_closure, error, NULL);
+ gpr_free(wc_arg->owning_pending_node);
+}
+
+/* Linked list of pending pick requests. It stores all information needed to
+ * eventually call (Round Robin's) pick() on them. They mainly stay pending
+ * waiting for the RR policy to be created/updated.
+ *
+ * One particularity is the wrapping of the user-provided \a on_complete closure
+ * (in \a wrapped_on_complete and \a wrapped_on_complete_arg). This is needed in
+ * order to correctly unref the RR policy instance upon completion of the pick.
+ * See \a wrapped_rr_closure for details. */
+typedef struct pending_pick {
+ struct pending_pick *next;
+
+ /* polling entity for the pick()'s async notification */
+ grpc_polling_entity *pollent;
+
+ /* the initial metadata for the pick. See grpc_lb_policy_pick() */
+ grpc_metadata_batch *initial_metadata;
+
+ /* bitmask passed to pick() and used for selective cancelling. See
+ * grpc_lb_policy_cancel_picks() */
+ uint32_t initial_metadata_flags;
+
+ /* output argument where to store the pick()ed connected subchannel, or NULL
+ * upon error. */
+ grpc_connected_subchannel **target;
+
+ /* a closure wrapping the original on_complete one to be invoked once the
+ * pick() has completed (regardless of success) */
+ grpc_closure wrapped_on_complete;
+
+ /* args for wrapped_on_complete */
+ wrapped_rr_closure_arg wrapped_on_complete_arg;
+} pending_pick;
+
+static void add_pending_pick(pending_pick **root, grpc_polling_entity *pollent,
+ grpc_metadata_batch *initial_metadata,
+ uint32_t initial_metadata_flags,
+ grpc_connected_subchannel **target,
+ grpc_closure *on_complete) {
+ pending_pick *pp = gpr_malloc(sizeof(*pp));
+ memset(pp, 0, sizeof(pending_pick));
+ memset(&pp->wrapped_on_complete_arg, 0, sizeof(wrapped_rr_closure_arg));
+ pp->next = *root;
+ pp->pollent = pollent;
+ pp->target = target;
+ pp->initial_metadata = initial_metadata;
+ pp->initial_metadata_flags = initial_metadata_flags;
+ pp->wrapped_on_complete_arg.wrapped_closure = on_complete;
+ grpc_closure_init(&pp->wrapped_on_complete, wrapped_rr_closure,
+ &pp->wrapped_on_complete_arg);
+ *root = pp;
+}
+
+/* Same as the \a pending_pick struct but for ping operations */
+typedef struct pending_ping {
+ struct pending_ping *next;
+
+ /* a closure wrapping the original on_complete one to be invoked once the
+ * ping() has completed (regardless of success) */
+ grpc_closure wrapped_notify;
+
+ /* args for wrapped_notify */
+ wrapped_rr_closure_arg wrapped_notify_arg;
+} pending_ping;
+
+static void add_pending_ping(pending_ping **root, grpc_closure *notify) {
+ pending_ping *pping = gpr_malloc(sizeof(*pping));
+ memset(pping, 0, sizeof(pending_ping));
+ memset(&pping->wrapped_notify_arg, 0, sizeof(wrapped_rr_closure_arg));
+ pping->next = *root;
+ grpc_closure_init(&pping->wrapped_notify, wrapped_rr_closure,
+ &pping->wrapped_notify_arg);
+ pping->wrapped_notify_arg.wrapped_closure = notify;
+ *root = pping;
+}
+
+/*
+ * glb_lb_policy
+ */
+typedef struct rr_connectivity_data rr_connectivity_data;
+struct lb_client_data;
+static const grpc_lb_policy_vtable glb_lb_policy_vtable;
+typedef struct glb_lb_policy {
+ /** base policy: must be first */
+ grpc_lb_policy base;
+
+ /** mutex protecting remaining members */
+ gpr_mu mu;
+
+ grpc_client_channel_factory *cc_factory;
+
+ /** for communicating with the LB server */
+ grpc_channel *lb_channel;
+
+ /** the RR policy to use of the backend servers returned by the LB server */
+ grpc_lb_policy *rr_policy;
+
+ bool started_picking;
+
+ /** our connectivity state tracker */
+ grpc_connectivity_state_tracker state_tracker;
+
+ /** stores the deserialized response from the LB. May be NULL until one such
+ * response has arrived. */
+ grpc_grpclb_serverlist *serverlist;
+
+ /** list of picks that are waiting on RR's policy connectivity */
+ pending_pick *pending_picks;
+
+ /** list of pings that are waiting on RR's policy connectivity */
+ pending_ping *pending_pings;
+
+ /** client data associated with the LB server communication */
+ struct lb_client_data *lb_client;
+
+ /** for tracking of the RR connectivity */
+ rr_connectivity_data *rr_connectivity;
+
+ /* a wrapped (see \a wrapped_rr_closure) on-complete closure for readily
+ * available RR picks */
+ grpc_closure wrapped_on_complete;
+
+ /* arguments for the wrapped_on_complete closure */
+ wrapped_rr_closure_arg wc_arg;
+} glb_lb_policy;
+
+/* Keeps track and reacts to changes in connectivity of the RR instance */
+struct rr_connectivity_data {
+ grpc_closure on_change;
+ grpc_connectivity_state state;
+ glb_lb_policy *glb_policy;
+};
+
+static grpc_lb_policy *create_rr(grpc_exec_ctx *exec_ctx,
+ const grpc_grpclb_serverlist *serverlist,
+ glb_lb_policy *glb_policy) {
+ /* TODO(dgq): support mixed ip version */
+ GPR_ASSERT(serverlist != NULL && serverlist->num_servers > 0);
+ char **host_ports = gpr_malloc(sizeof(char *) * serverlist->num_servers);
+ for (size_t i = 0; i < serverlist->num_servers; ++i) {
+ gpr_join_host_port(&host_ports[i], serverlist->servers[i]->ip_address,
+ serverlist->servers[i]->port);
+ }
+
+ size_t uri_path_len;
+ char *concat_ipports = gpr_strjoin_sep(
+ (const char **)host_ports, serverlist->num_servers, ",", &uri_path_len);
+
+ grpc_lb_policy_args args;
+ args.client_channel_factory = glb_policy->cc_factory;
+ args.addresses = gpr_malloc(sizeof(grpc_resolved_addresses));
+ args.addresses->naddrs = serverlist->num_servers;
+ args.addresses->addrs =
+ gpr_malloc(sizeof(grpc_resolved_address) * args.addresses->naddrs);
+ size_t out_addrs_idx = 0;
+ for (size_t i = 0; i < serverlist->num_servers; ++i) {
+ grpc_uri uri;
+ struct sockaddr_storage sa;
+ size_t sa_len;
+ uri.path = host_ports[i];
+ if (parse_ipv4(&uri, &sa, &sa_len)) { /* TODO(dgq): add support for ipv6 */
+ memcpy(args.addresses->addrs[out_addrs_idx].addr, &sa, sa_len);
+ args.addresses->addrs[out_addrs_idx].len = sa_len;
+ ++out_addrs_idx;
+ } else {
+ gpr_log(GPR_ERROR, "Invalid LB service address '%s', ignoring.",
+ host_ports[i]);
+ }
+ }
+
+ grpc_lb_policy *rr = grpc_lb_policy_create(exec_ctx, "round_robin", &args);
+
+ gpr_free(concat_ipports);
+ for (size_t i = 0; i < serverlist->num_servers; i++) {
+ gpr_free(host_ports[i]);
+ }
+ gpr_free(host_ports);
+ gpr_free(args.addresses->addrs);
+ gpr_free(args.addresses);
+ return rr;
+}
+
+static void rr_handover(grpc_exec_ctx *exec_ctx, glb_lb_policy *glb_policy,
+ grpc_error *error) {
+ GRPC_ERROR_REF(error);
+ glb_policy->rr_policy =
+ create_rr(exec_ctx, glb_policy->serverlist, glb_policy);
+
+ if (grpc_lb_glb_trace) {
+ gpr_log(GPR_INFO, "Created RR policy (0x%" PRIxPTR ")",
+ (intptr_t)glb_policy->rr_policy);
+ }
+ GPR_ASSERT(glb_policy->rr_policy != NULL);
+ glb_policy->rr_connectivity->state = grpc_lb_policy_check_connectivity(
+ exec_ctx, glb_policy->rr_policy, &error);
+ grpc_lb_policy_notify_on_state_change(
+ exec_ctx, glb_policy->rr_policy, &glb_policy->rr_connectivity->state,
+ &glb_policy->rr_connectivity->on_change);
+ grpc_connectivity_state_set(exec_ctx, &glb_policy->state_tracker,
+ glb_policy->rr_connectivity->state, error,
+ "rr_handover");
+ grpc_lb_policy_exit_idle(exec_ctx, glb_policy->rr_policy);
+
+ /* flush pending ops */
+ pending_pick *pp;
+ while ((pp = glb_policy->pending_picks)) {
+ glb_policy->pending_picks = pp->next;
+ GRPC_LB_POLICY_REF(glb_policy->rr_policy, "rr_handover_pending_pick");
+ pp->wrapped_on_complete_arg.rr_policy = glb_policy->rr_policy;
+ if (grpc_lb_glb_trace) {
+ gpr_log(GPR_INFO, "Pending pick about to PICK from 0x%" PRIxPTR "",
+ (intptr_t)glb_policy->rr_policy);
+ }
+ grpc_lb_policy_pick(exec_ctx, glb_policy->rr_policy, pp->pollent,
+ pp->initial_metadata, pp->initial_metadata_flags,
+ pp->target, &pp->wrapped_on_complete);
+ pp->wrapped_on_complete_arg.owning_pending_node = pp;
+ }
+
+ pending_ping *pping;
+ while ((pping = glb_policy->pending_pings)) {
+ glb_policy->pending_pings = pping->next;
+ GRPC_LB_POLICY_REF(glb_policy->rr_policy, "rr_handover_pending_ping");
+ pping->wrapped_notify_arg.rr_policy = glb_policy->rr_policy;
+ if (grpc_lb_glb_trace) {
+ gpr_log(GPR_INFO, "Pending ping about to PING from 0x%" PRIxPTR "",
+ (intptr_t)glb_policy->rr_policy);
+ }
+ grpc_lb_policy_ping_one(exec_ctx, glb_policy->rr_policy,
+ &pping->wrapped_notify);
+ pping->wrapped_notify_arg.owning_pending_node = pping;
+ }
+ GRPC_ERROR_UNREF(error);
+}
+
+static void rr_connectivity_changed(grpc_exec_ctx *exec_ctx, void *arg,
+ grpc_error *error) {
+ rr_connectivity_data *rr_conn_data = arg;
+ glb_lb_policy *glb_policy = rr_conn_data->glb_policy;
+ if (rr_conn_data->state == GRPC_CHANNEL_SHUTDOWN) {
+ if (glb_policy->serverlist != NULL) {
+ /* a RR policy is shutting down but there's a serverlist available ->
+ * perform a handover */
+ rr_handover(exec_ctx, glb_policy, error);
+ } else {
+ /* shutting down and no new serverlist available. Bail out. */
+ gpr_free(rr_conn_data);
+ }
+ } else {
+ if (error == GRPC_ERROR_NONE) {
+ /* RR not shutting down. Mimic the RR's policy state */
+ grpc_connectivity_state_set(exec_ctx, &glb_policy->state_tracker,
+ rr_conn_data->state, error,
+ "rr_connectivity_changed");
+ /* resubscribe */
+ grpc_lb_policy_notify_on_state_change(exec_ctx, glb_policy->rr_policy,
+ &rr_conn_data->state,
+ &rr_conn_data->on_change);
+ } else { /* error */
+ gpr_free(rr_conn_data);
+ }
+ }
+ GRPC_ERROR_UNREF(error);
+}
+
+static grpc_lb_policy *glb_create(grpc_exec_ctx *exec_ctx,
+ grpc_lb_policy_factory *factory,
+ grpc_lb_policy_args *args) {
+ glb_lb_policy *glb_policy = gpr_malloc(sizeof(*glb_policy));
+ memset(glb_policy, 0, sizeof(*glb_policy));
+
+ /* All input addresses in args->addresses come from a resolver that claims
+ * they are LB services. It's the resolver's responsibility to make sure this
+ * policy is only instantiated and used in that case.
+ *
+ * Create a client channel over them to communicate with a LB service */
+ glb_policy->cc_factory = args->client_channel_factory;
+ GPR_ASSERT(glb_policy->cc_factory != NULL);
+ if (args->addresses->naddrs == 0) {
+ return NULL;
+ }
+
+ /* construct a target from the args->addresses, in the form
+ * ipvX://ip1:port1,ip2:port2,...
+ * TODO(dgq): support mixed ip version */
+ char **addr_strs = gpr_malloc(sizeof(char *) * args->addresses->naddrs);
+ addr_strs[0] =
+ grpc_sockaddr_to_uri((const struct sockaddr *)&args->addresses->addrs[0]);
+ for (size_t i = 1; i < args->addresses->naddrs; i++) {
+ GPR_ASSERT(grpc_sockaddr_to_string(
+ &addr_strs[i],
+ (const struct sockaddr *)&args->addresses->addrs[i],
+ true) == 0);
+ }
+ size_t uri_path_len;
+ char *target_uri_str = gpr_strjoin_sep(
+ (const char **)addr_strs, args->addresses->naddrs, ",", &uri_path_len);
+
+ /* will pick using pick_first */
+ glb_policy->lb_channel = grpc_client_channel_factory_create_channel(
+ exec_ctx, glb_policy->cc_factory, target_uri_str,
+ GRPC_CLIENT_CHANNEL_TYPE_LOAD_BALANCING, NULL);
+
+ gpr_free(target_uri_str);
+ for (size_t i = 0; i < args->addresses->naddrs; i++) {
+ gpr_free(addr_strs[i]);
+ }
+ gpr_free(addr_strs);
+
+ if (glb_policy->lb_channel == NULL) {
+ gpr_free(glb_policy);
+ return NULL;
+ }
+
+ rr_connectivity_data *rr_connectivity =
+ gpr_malloc(sizeof(rr_connectivity_data));
+ memset(rr_connectivity, 0, sizeof(rr_connectivity_data));
+ grpc_closure_init(&rr_connectivity->on_change, rr_connectivity_changed,
+ rr_connectivity);
+ rr_connectivity->glb_policy = glb_policy;
+ glb_policy->rr_connectivity = rr_connectivity;
+
+ grpc_lb_policy_init(&glb_policy->base, &glb_lb_policy_vtable);
+ gpr_mu_init(&glb_policy->mu);
+ grpc_connectivity_state_init(&glb_policy->state_tracker, GRPC_CHANNEL_IDLE,
+ "grpclb");
+ return &glb_policy->base;
+}
+
+static void glb_destroy(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) {
+ glb_lb_policy *glb_policy = (glb_lb_policy *)pol;
+ GPR_ASSERT(glb_policy->pending_picks == NULL);
+ GPR_ASSERT(glb_policy->pending_pings == NULL);
+ grpc_channel_destroy(glb_policy->lb_channel);
+ glb_policy->lb_channel = NULL;
+ grpc_connectivity_state_destroy(exec_ctx, &glb_policy->state_tracker);
+ if (glb_policy->serverlist != NULL) {
+ grpc_grpclb_destroy_serverlist(glb_policy->serverlist);
+ }
+ gpr_mu_destroy(&glb_policy->mu);
+ gpr_free(glb_policy);
+}
+
+static void lb_client_data_destroy(struct lb_client_data *lb_client);
+static void glb_shutdown(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) {
+ glb_lb_policy *glb_policy = (glb_lb_policy *)pol;
+ gpr_mu_lock(&glb_policy->mu);
+
+ pending_pick *pp = glb_policy->pending_picks;
+ glb_policy->pending_picks = NULL;
+ pending_ping *pping = glb_policy->pending_pings;
+ glb_policy->pending_pings = NULL;
+ gpr_mu_unlock(&glb_policy->mu);
+
+ while (pp != NULL) {
+ pending_pick *next = pp->next;
+ *pp->target = NULL;
+ grpc_exec_ctx_sched(exec_ctx, &pp->wrapped_on_complete, GRPC_ERROR_NONE,
+ NULL);
+ gpr_free(pp);
+ pp = next;
+ }
+
+ while (pping != NULL) {
+ pending_ping *next = pping->next;
+ grpc_exec_ctx_sched(exec_ctx, &pping->wrapped_notify, GRPC_ERROR_NONE,
+ NULL);
+ pping = next;
+ }
+
+ if (glb_policy->rr_policy) {
+ /* unsubscribe */
+ grpc_lb_policy_notify_on_state_change(
+ exec_ctx, glb_policy->rr_policy, NULL,
+ &glb_policy->rr_connectivity->on_change);
+ GRPC_LB_POLICY_UNREF(exec_ctx, glb_policy->rr_policy, "glb_shutdown");
+ }
+
+ lb_client_data_destroy(glb_policy->lb_client);
+ glb_policy->lb_client = NULL;
+
+ grpc_connectivity_state_set(
+ exec_ctx, &glb_policy->state_tracker, GRPC_CHANNEL_SHUTDOWN,
+ GRPC_ERROR_CREATE("Channel Shutdown"), "glb_shutdown");
+}
+
+static void glb_cancel_pick(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
+ grpc_connected_subchannel **target) {
+ glb_lb_policy *glb_policy = (glb_lb_policy *)pol;
+ gpr_mu_lock(&glb_policy->mu);
+ pending_pick *pp = glb_policy->pending_picks;
+ glb_policy->pending_picks = NULL;
+ while (pp != NULL) {
+ pending_pick *next = pp->next;
+ if (pp->target == target) {
+ grpc_polling_entity_del_from_pollset_set(
+ exec_ctx, pp->pollent, glb_policy->base.interested_parties);
+ *target = NULL;
+ grpc_exec_ctx_sched(exec_ctx, &pp->wrapped_on_complete,
+ GRPC_ERROR_CANCELLED, NULL);
+ gpr_free(pp);
+ } else {
+ pp->next = glb_policy->pending_picks;
+ glb_policy->pending_picks = pp;
+ }
+ pp = next;
+ }
+ gpr_mu_unlock(&glb_policy->mu);
+}
+
+static grpc_call *lb_client_data_get_call(struct lb_client_data *lb_client);
+static void glb_cancel_picks(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
+ uint32_t initial_metadata_flags_mask,
+ uint32_t initial_metadata_flags_eq) {
+ glb_lb_policy *glb_policy = (glb_lb_policy *)pol;
+ gpr_mu_lock(&glb_policy->mu);
+ if (glb_policy->lb_client != NULL) {
+ /* cancel the call to the load balancer service, if any */
+ grpc_call_cancel(lb_client_data_get_call(glb_policy->lb_client), NULL);
+ }
+ pending_pick *pp = glb_policy->pending_picks;
+ glb_policy->pending_picks = NULL;
+ while (pp != NULL) {
+ pending_pick *next = pp->next;
+ if ((pp->initial_metadata_flags & initial_metadata_flags_mask) ==
+ initial_metadata_flags_eq) {
+ grpc_polling_entity_del_from_pollset_set(
+ exec_ctx, pp->pollent, glb_policy->base.interested_parties);
+ grpc_exec_ctx_sched(exec_ctx, &pp->wrapped_on_complete,
+ GRPC_ERROR_CANCELLED, NULL);
+ gpr_free(pp);
+ } else {
+ pp->next = glb_policy->pending_picks;
+ glb_policy->pending_picks = pp;
+ }
+ pp = next;
+ }
+ gpr_mu_unlock(&glb_policy->mu);
+}
+
+static void query_for_backends(grpc_exec_ctx *exec_ctx,
+ glb_lb_policy *glb_policy);
+static void start_picking(grpc_exec_ctx *exec_ctx, glb_lb_policy *glb_policy) {
+ glb_policy->started_picking = true;
+ query_for_backends(exec_ctx, glb_policy);
+}
+
+static void glb_exit_idle(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) {
+ glb_lb_policy *glb_policy = (glb_lb_policy *)pol;
+ gpr_mu_lock(&glb_policy->mu);
+ if (!glb_policy->started_picking) {
+ start_picking(exec_ctx, glb_policy);
+ }
+ gpr_mu_unlock(&glb_policy->mu);
+}
+
+static int glb_pick(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
+ grpc_polling_entity *pollent,
+ grpc_metadata_batch *initial_metadata,
+ uint32_t initial_metadata_flags,
+ grpc_connected_subchannel **target,
+ grpc_closure *on_complete) {
+ glb_lb_policy *glb_policy = (glb_lb_policy *)pol;
+ gpr_mu_lock(&glb_policy->mu);
+ int r;
+
+ if (glb_policy->rr_policy != NULL) {
+ if (grpc_lb_glb_trace) {
+ gpr_log(GPR_INFO, "about to PICK from 0x%" PRIxPTR "",
+ (intptr_t)glb_policy->rr_policy);
+ }
+ GRPC_LB_POLICY_REF(glb_policy->rr_policy, "glb_pick");
+ memset(&glb_policy->wc_arg, 0, sizeof(wrapped_rr_closure_arg));
+ glb_policy->wc_arg.rr_policy = glb_policy->rr_policy;
+ glb_policy->wc_arg.wrapped_closure = on_complete;
+ grpc_closure_init(&glb_policy->wrapped_on_complete, wrapped_rr_closure,
+ &glb_policy->wc_arg);
+ r = grpc_lb_policy_pick(exec_ctx, glb_policy->rr_policy, pollent,
+ initial_metadata, initial_metadata_flags, target,
+ &glb_policy->wrapped_on_complete);
+ if (r != 0) {
+ /* the call to grpc_lb_policy_pick has been sychronous. Unreffing the RR
+ * policy and notify the original callback */
+ glb_policy->wc_arg.wrapped_closure = NULL;
+ if (grpc_lb_glb_trace) {
+ gpr_log(GPR_INFO, "Unreffing RR (0x%" PRIxPTR ")",
+ (intptr_t)glb_policy->wc_arg.rr_policy);
+ }
+ GRPC_LB_POLICY_UNREF(exec_ctx, glb_policy->wc_arg.rr_policy, "glb_pick");
+ grpc_exec_ctx_sched(exec_ctx, glb_policy->wc_arg.wrapped_closure,
+ GRPC_ERROR_NONE, NULL);
+ }
+ } else {
+ grpc_polling_entity_add_to_pollset_set(exec_ctx, pollent,
+ glb_policy->base.interested_parties);
+ add_pending_pick(&glb_policy->pending_picks, pollent, initial_metadata,
+ initial_metadata_flags, target, on_complete);
+
+ if (!glb_policy->started_picking) {
+ start_picking(exec_ctx, glb_policy);
+ }
+ r = 0;
+ }
+ gpr_mu_unlock(&glb_policy->mu);
+ return r;
+}
+
+static grpc_connectivity_state glb_check_connectivity(
+ grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
+ grpc_error **connectivity_error) {
+ glb_lb_policy *glb_policy = (glb_lb_policy *)pol;
+ grpc_connectivity_state st;
+ gpr_mu_lock(&glb_policy->mu);
+ st = grpc_connectivity_state_check(&glb_policy->state_tracker,
+ connectivity_error);
+ gpr_mu_unlock(&glb_policy->mu);
+ return st;
+}
+
+static void glb_ping_one(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
+ grpc_closure *closure) {
+ glb_lb_policy *glb_policy = (glb_lb_policy *)pol;
+ gpr_mu_lock(&glb_policy->mu);
+ if (glb_policy->rr_policy) {
+ grpc_lb_policy_ping_one(exec_ctx, glb_policy->rr_policy, closure);
+ } else {
+ add_pending_ping(&glb_policy->pending_pings, closure);
+ if (!glb_policy->started_picking) {
+ start_picking(exec_ctx, glb_policy);
+ }
+ }
+ gpr_mu_unlock(&glb_policy->mu);
+}
+
+static void glb_notify_on_state_change(grpc_exec_ctx *exec_ctx,
+ grpc_lb_policy *pol,
+ grpc_connectivity_state *current,
+ grpc_closure *notify) {
+ glb_lb_policy *glb_policy = (glb_lb_policy *)pol;
+ gpr_mu_lock(&glb_policy->mu);
+ grpc_connectivity_state_notify_on_state_change(
+ exec_ctx, &glb_policy->state_tracker, current, notify);
+
+ gpr_mu_unlock(&glb_policy->mu);
+}
+
+/*
+ * lb_client_data
+ *
+ * Used internally for the client call to the LB */
+typedef struct lb_client_data {
+ gpr_mu mu;
+
+ /* called once initial metadata's been sent */
+ grpc_closure md_sent;
+
+ /* called once initial metadata's been received */
+ grpc_closure md_rcvd;
+
+ /* called once the LoadBalanceRequest has been sent to the LB server. See
+ * src/proto/grpc/.../load_balancer.proto */
+ grpc_closure req_sent;
+
+ /* A response from the LB server has been received (or error). Process it */
+ grpc_closure res_rcvd;
+
+ /* After the client has sent a close to the LB server */
+ grpc_closure close_sent;
+
+ /* ... and the status from the LB server has been received */
+ grpc_closure srv_status_rcvd;
+
+ grpc_call *lb_call; /* streaming call to the LB server, */
+ gpr_timespec deadline; /* for the streaming call to the LB server */
+
+ grpc_metadata_array initial_metadata_recv; /* initial MD from LB server */
+ grpc_metadata_array trailing_metadata_recv; /* trailing MD from LB server */
+
+ /* what's being sent to the LB server. Note that its value may vary if the LB
+ * server indicates a redirect. */
+ grpc_byte_buffer *request_payload;
+
+ /* response from the LB server, if any. Processed in res_recv_cb() */
+ grpc_byte_buffer *response_payload;
+
+ /* the call's status and status detailset in srv_status_rcvd_cb() */
+ grpc_status_code status;
+ char *status_details;
+ size_t status_details_capacity;
+
+ /* pointer back to the enclosing policy */
+ glb_lb_policy *glb_policy;
+} lb_client_data;
+
+static void md_sent_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error);
+static void md_recv_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error);
+static void req_sent_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error);
+static void res_recv_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error);
+static void close_sent_cb(grpc_exec_ctx *exec_ctx, void *arg,
+ grpc_error *error);
+static void srv_status_rcvd_cb(grpc_exec_ctx *exec_ctx, void *arg,
+ grpc_error *error);
+
+static lb_client_data *lb_client_data_create(glb_lb_policy *glb_policy) {
+ lb_client_data *lb_client = gpr_malloc(sizeof(lb_client_data));
+ memset(lb_client, 0, sizeof(lb_client_data));
+
+ gpr_mu_init(&lb_client->mu);
+ grpc_closure_init(&lb_client->md_sent, md_sent_cb, lb_client);
+
+ grpc_closure_init(&lb_client->md_rcvd, md_recv_cb, lb_client);
+ grpc_closure_init(&lb_client->req_sent, req_sent_cb, lb_client);
+ grpc_closure_init(&lb_client->res_rcvd, res_recv_cb, lb_client);
+ grpc_closure_init(&lb_client->close_sent, close_sent_cb, lb_client);
+ grpc_closure_init(&lb_client->srv_status_rcvd, srv_status_rcvd_cb, lb_client);
+
+ /* TODO(dgq): get the deadline from the client config instead of fabricating
+ * one here. */
+ lb_client->deadline = gpr_time_add(gpr_now(GPR_CLOCK_MONOTONIC),
+ gpr_time_from_seconds(3, GPR_TIMESPAN));
+
+ lb_client->lb_call = grpc_channel_create_pollset_set_call(
+ glb_policy->lb_channel, NULL, GRPC_PROPAGATE_DEFAULTS,
+ glb_policy->base.interested_parties, "/BalanceLoad",
+ NULL, /* FIXME(dgq): which "host" value to use? */
+ lb_client->deadline, NULL);
+
+ grpc_metadata_array_init(&lb_client->initial_metadata_recv);
+ grpc_metadata_array_init(&lb_client->trailing_metadata_recv);
+
+ grpc_grpclb_request *request = grpc_grpclb_request_create(
+ "load.balanced.service.name"); /* FIXME(dgq): get the name of the load
+ balanced service from the resolver */
+ gpr_slice request_payload_slice = grpc_grpclb_request_encode(request);
+ lb_client->request_payload =
+ grpc_raw_byte_buffer_create(&request_payload_slice, 1);
+ gpr_slice_unref(request_payload_slice);
+ grpc_grpclb_request_destroy(request);
+
+ lb_client->status_details = NULL;
+ lb_client->status_details_capacity = 0;
+ lb_client->glb_policy = glb_policy;
+ return lb_client;
+}
+
+static void lb_client_data_destroy(lb_client_data *lb_client) {
+ grpc_call_destroy(lb_client->lb_call);
+ grpc_metadata_array_destroy(&lb_client->initial_metadata_recv);
+ grpc_metadata_array_destroy(&lb_client->trailing_metadata_recv);
+
+ grpc_byte_buffer_destroy(lb_client->request_payload);
+
+ gpr_free(lb_client->status_details);
+ gpr_mu_destroy(&lb_client->mu);
+ gpr_free(lb_client);
+}
+static grpc_call *lb_client_data_get_call(lb_client_data *lb_client) {
+ return lb_client->lb_call;
+}
+
+/*
+ * Auxiliary functions and LB client callbacks.
+ */
+static void query_for_backends(grpc_exec_ctx *exec_ctx,
+ glb_lb_policy *glb_policy) {
+ GPR_ASSERT(glb_policy->lb_channel != NULL);
+
+ glb_policy->lb_client = lb_client_data_create(glb_policy);
+ grpc_call_error call_error;
+ grpc_op ops[1];
+ memset(ops, 0, sizeof(ops));
+ grpc_op *op = ops;
+ op->op = GRPC_OP_SEND_INITIAL_METADATA;
+ op->data.send_initial_metadata.count = 0;
+ op->flags = 0;
+ op->reserved = NULL;
+ op++;
+ call_error = grpc_call_start_batch_and_execute(
+ exec_ctx, glb_policy->lb_client->lb_call, ops, (size_t)(op - ops),
+ &glb_policy->lb_client->md_sent);
+ GPR_ASSERT(GRPC_CALL_OK == call_error);
+
+ op = ops;
+ op->op = GRPC_OP_RECV_STATUS_ON_CLIENT;
+ op->data.recv_status_on_client.trailing_metadata =
+ &glb_policy->lb_client->trailing_metadata_recv;
+ op->data.recv_status_on_client.status = &glb_policy->lb_client->status;
+ op->data.recv_status_on_client.status_details =
+ &glb_policy->lb_client->status_details;
+ op->data.recv_status_on_client.status_details_capacity =
+ &glb_policy->lb_client->status_details_capacity;
+ op->flags = 0;
+ op->reserved = NULL;
+ op++;
+ call_error = grpc_call_start_batch_and_execute(
+ exec_ctx, glb_policy->lb_client->lb_call, ops, (size_t)(op - ops),
+ &glb_policy->lb_client->srv_status_rcvd);
+ GPR_ASSERT(GRPC_CALL_OK == call_error);
+}
+
+static void md_sent_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) {
+ lb_client_data *lb_client = arg;
+ GPR_ASSERT(lb_client->lb_call);
+ grpc_op ops[1];
+ memset(ops, 0, sizeof(ops));
+ grpc_op *op = ops;
+ op->op = GRPC_OP_RECV_INITIAL_METADATA;
+ op->data.recv_initial_metadata = &lb_client->initial_metadata_recv;
+ op->flags = 0;
+ op->reserved = NULL;
+ op++;
+ grpc_call_error call_error = grpc_call_start_batch_and_execute(
+ exec_ctx, lb_client->lb_call, ops, (size_t)(op - ops),
+ &lb_client->md_rcvd);
+ GPR_ASSERT(GRPC_CALL_OK == call_error);
+}
+
+static void md_recv_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) {
+ lb_client_data *lb_client = arg;
+ GPR_ASSERT(lb_client->lb_call);
+ grpc_op ops[1];
+ memset(ops, 0, sizeof(ops));
+ grpc_op *op = ops;
+
+ op->op = GRPC_OP_SEND_MESSAGE;
+ op->data.send_message = lb_client->request_payload;
+ op->flags = 0;
+ op->reserved = NULL;
+ op++;
+ grpc_call_error call_error = grpc_call_start_batch_and_execute(
+ exec_ctx, lb_client->lb_call, ops, (size_t)(op - ops),
+ &lb_client->req_sent);
+ GPR_ASSERT(GRPC_CALL_OK == call_error);
+}
+
+static void req_sent_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) {
+ lb_client_data *lb_client = arg;
+
+ grpc_op ops[1];
+ memset(ops, 0, sizeof(ops));
+ grpc_op *op = ops;
+
+ op->op = GRPC_OP_RECV_MESSAGE;
+ op->data.recv_message = &lb_client->response_payload;
+ op->flags = 0;
+ op->reserved = NULL;
+ op++;
+ grpc_call_error call_error = grpc_call_start_batch_and_execute(
+ exec_ctx, lb_client->lb_call, ops, (size_t)(op - ops),
+ &lb_client->res_rcvd);
+ GPR_ASSERT(GRPC_CALL_OK == call_error);
+}
+
+static void res_recv_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) {
+ lb_client_data *lb_client = arg;
+ grpc_op ops[2];
+ memset(ops, 0, sizeof(ops));
+ grpc_op *op = ops;
+ if (lb_client->response_payload != NULL) {
+ /* Received data from the LB server. Look inside
+ * lb_client->response_payload, for
+ * a serverlist. */
+ grpc_byte_buffer_reader bbr;
+ grpc_byte_buffer_reader_init(&bbr, lb_client->response_payload);
+ gpr_slice response_slice = grpc_byte_buffer_reader_readall(&bbr);
+ grpc_byte_buffer_destroy(lb_client->response_payload);
+ grpc_grpclb_serverlist *serverlist =
+ grpc_grpclb_response_parse_serverlist(response_slice);
+ if (serverlist != NULL) {
+ gpr_slice_unref(response_slice);
+ if (grpc_lb_glb_trace) {
+ gpr_log(GPR_INFO, "Serverlist with %zu servers received",
+ serverlist->num_servers);
+ }
+
+ /* update serverlist */
+ if (serverlist->num_servers > 0) {
+ if (grpc_grpclb_serverlist_equals(lb_client->glb_policy->serverlist,
+ serverlist)) {
+ if (grpc_lb_glb_trace) {
+ gpr_log(GPR_INFO,
+ "Incoming server list identical to current, ignoring.");
+ }
+ } else { /* new serverlist */
+ if (lb_client->glb_policy->serverlist != NULL) {
+ /* dispose of the old serverlist */
+ grpc_grpclb_destroy_serverlist(lb_client->glb_policy->serverlist);
+ }
+ /* and update the copy in the glb_lb_policy instance */
+ lb_client->glb_policy->serverlist = serverlist;
+ }
+ if (lb_client->glb_policy->rr_policy == NULL) {
+ /* initial "handover", in this case from a null RR policy, meaning
+ * it'll just create the first RR policy instance */
+ rr_handover(exec_ctx, lb_client->glb_policy, error);
+ } else {
+ /* unref the RR policy, eventually leading to its substitution with a
+ * new one constructed from the received serverlist (see
+ * rr_connectivity_changed) */
+ GRPC_LB_POLICY_UNREF(exec_ctx, lb_client->glb_policy->rr_policy,
+ "serverlist_received");
+ }
+ } else {
+ if (grpc_lb_glb_trace) {
+ gpr_log(GPR_INFO,
+ "Received empty server list. Picks will stay pending until a "
+ "response with > 0 servers is received");
+ }
+ }
+
+ /* keep listening for serverlist updates */
+ op->op = GRPC_OP_RECV_MESSAGE;
+ op->data.recv_message = &lb_client->response_payload;
+ op->flags = 0;
+ op->reserved = NULL;
+ op++;
+ const grpc_call_error call_error = grpc_call_start_batch_and_execute(
+ exec_ctx, lb_client->lb_call, ops, (size_t)(op - ops),
+ &lb_client->res_rcvd); /* loop */
+ GPR_ASSERT(GRPC_CALL_OK == call_error);
+ return;
+ }
+
+ GPR_ASSERT(serverlist == NULL);
+ gpr_log(GPR_ERROR, "Invalid LB response received: '%s'",
+ gpr_dump_slice(response_slice, GPR_DUMP_ASCII));
+ gpr_slice_unref(response_slice);
+
+ /* Disconnect from server returning invalid response. */
+ op->op = GRPC_OP_SEND_CLOSE_FROM_CLIENT;
+ op->flags = 0;
+ op->reserved = NULL;
+ op++;
+ grpc_call_error call_error = grpc_call_start_batch_and_execute(
+ exec_ctx, lb_client->lb_call, ops, (size_t)(op - ops),
+ &lb_client->close_sent);
+ GPR_ASSERT(GRPC_CALL_OK == call_error);
+ }
+ /* empty payload: call cancelled by server. Cleanups happening in
+ * srv_status_rcvd_cb */
+}
+
+static void close_sent_cb(grpc_exec_ctx *exec_ctx, void *arg,
+ grpc_error *error) {
+ if (grpc_lb_glb_trace) {
+ gpr_log(GPR_INFO,
+ "Close from LB client sent. Waiting from server status now");
+ }
+}
+
+static void srv_status_rcvd_cb(grpc_exec_ctx *exec_ctx, void *arg,
+ grpc_error *error) {
+ lb_client_data *lb_client = arg;
+ if (grpc_lb_glb_trace) {
+ gpr_log(GPR_INFO,
+ "status from lb server received. Status = %d, Details = '%s', "
+ "Capaticy "
+ "= %zu",
+ lb_client->status, lb_client->status_details,
+ lb_client->status_details_capacity);
+ }
+ /* TODO(dgq): deal with stream termination properly (fire up another one? fail
+ * the original call?) */
+}
+
+/* Code wiring the policy with the rest of the core */
+static const grpc_lb_policy_vtable glb_lb_policy_vtable = {
+ glb_destroy, glb_shutdown, glb_pick,
+ glb_cancel_pick, glb_cancel_picks, glb_ping_one,
+ glb_exit_idle, glb_check_connectivity, glb_notify_on_state_change};
+
+static void glb_factory_ref(grpc_lb_policy_factory *factory) {}
+
+static void glb_factory_unref(grpc_lb_policy_factory *factory) {}
+
+static const grpc_lb_policy_factory_vtable glb_factory_vtable = {
+ glb_factory_ref, glb_factory_unref, glb_create, "grpclb"};
+
+static grpc_lb_policy_factory glb_lb_policy_factory = {&glb_factory_vtable};
+
+grpc_lb_policy_factory *grpc_glb_lb_factory_create() {
+ return &glb_lb_policy_factory;
+}
+
+/* Plugin registration */
+void grpc_lb_policy_grpclb_init() {
+ grpc_register_lb_policy(grpc_glb_lb_factory_create());
+ grpc_register_tracer("glb", &grpc_lb_glb_trace);
+}
+
+void grpc_lb_policy_grpclb_shutdown() {}