diff options
Diffstat (limited to 'src/core/ext/lb_policy/grpclb/grpclb.c')
-rw-r--r-- | src/core/ext/lb_policy/grpclb/grpclb.c | 1039 |
1 files changed, 1039 insertions, 0 deletions
diff --git a/src/core/ext/lb_policy/grpclb/grpclb.c b/src/core/ext/lb_policy/grpclb/grpclb.c new file mode 100644 index 0000000000..dec25efe61 --- /dev/null +++ b/src/core/ext/lb_policy/grpclb/grpclb.c @@ -0,0 +1,1039 @@ +/* + * + * Copyright 2016, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/** Implementation of the gRPC LB policy. + * + * This policy takes as input a set of resolved addresses {a1..an} for which the + * LB set was set (it's the resolver's responsibility to ensure this). That is + * to say, {a1..an} represent a collection of LB servers. + * + * An internal channel (\a glb_lb_policy.lb_channel) is created over {a1..an}. + * This channel behaves just like a regular channel. In particular, the + * constructed URI over the addresses a1..an will use the default pick first + * policy to select from this list of LB server backends. + * + * The first time the policy gets a request for a pick, a ping, or to exit the + * idle state, \a query_for_backends() is called. It creates an instance of \a + * lb_client_data, an internal struct meant to contain the data associated with + * the internal communication with the LB server. This instance is created via + * \a lb_client_data_create(). There, the call over lb_channel to pick-first + * from {a1..an} is created, the \a LoadBalancingRequest message is assembled + * and all necessary callbacks for the progress of the internal call configured. + * + * Back in \a query_for_backends(), the internal *streaming* call to the LB + * server (whichever address from {a1..an} pick-first chose) is kicked off. + * It'll progress over the callbacks configured in \a lb_client_data_create() + * (see the field docstrings of \a lb_client_data for more details). + * + * If the call fails with UNIMPLEMENTED, the original call will also fail. + * There's a misconfiguration somewhere: at least one of {a1..an} isn't a LB + * server, which contradicts the LB bit being set. If the internal call times + * out, the usual behavior of pick-first applies, continuing to pick from the + * list {a1..an}. + * + * Upon sucesss, a \a LoadBalancingResponse is expected in \a res_recv_cb. An + * invalid one results in the termination of the streaming call. A new streaming + * call should be created if possible, failing the original call otherwise. + * For a valid \a LoadBalancingResponse, the server list of actual backends is + * extracted. A Round Robin policy will be created from this list. There are two + * possible scenarios: + * + * 1. This is the first server list received. There was no previous instance of + * the Round Robin policy. \a rr_handover() will instantiate the RR policy + * and perform all the pending operations over it. + * 2. There's already a RR policy instance active. We need to introduce the new + * one build from the new serverlist, but taking care not to disrupt the + * operations in progress over the old RR instance. This is done by + * decreasing the reference count on the old policy. The moment no more + * references are held on the old RR policy, it'll be destroyed and \a + * rr_connectivity_changed notified with a \a GRPC_CHANNEL_SHUTDOWN state. + * At this point we can transition to a new RR instance safely, which is done + * once again via \a rr_handover(). + * + * + * Once a RR policy instance is in place (and getting updated as described), + * calls to for a pick, a ping or a cancellation will be serviced right away by + * forwarding them to the RR instance. Any time there's no RR policy available + * (ie, right after the creation of the gRPCLB policy, if an empty serverlist + * is received, etc), pick/ping requests are added to a list of pending + * picks/pings to be flushed and serviced as part of \a rr_handover() the moment + * the RR policy instance becomes available. + * + * \see https://github.com/grpc/grpc/blob/master/doc/load-balancing.md for the + * high level design and details. */ + +/* TODO(dgq): + * - Implement LB service forwarding (point 2c. in the doc's diagram). + */ + +#include <string.h> + +#include <grpc/byte_buffer_reader.h> +#include <grpc/grpc.h> +#include <grpc/support/alloc.h> +#include <grpc/support/host_port.h> +#include <grpc/support/string_util.h> + +#include "src/core/ext/client_config/client_channel_factory.h" +#include "src/core/ext/client_config/lb_policy_registry.h" +#include "src/core/ext/client_config/parse_address.h" +#include "src/core/ext/lb_policy/grpclb/grpclb.h" +#include "src/core/ext/lb_policy/grpclb/load_balancer_api.h" +#include "src/core/lib/iomgr/sockaddr_utils.h" +#include "src/core/lib/support/string.h" +#include "src/core/lib/surface/call.h" +#include "src/core/lib/surface/channel.h" + +int grpc_lb_glb_trace = 0; + +typedef struct wrapped_rr_closure_arg { + /* the original closure. Usually a on_complete/notify cb for pick() and ping() + * calls against the internal RR instance, respectively. */ + grpc_closure *wrapped_closure; + + /* The RR instance related to the closure */ + grpc_lb_policy *rr_policy; + + /* when not NULL, represents a pending_{pick,ping} node to be freed upon + * closure execution */ + void *owning_pending_node; /* to be freed if not NULL */ +} wrapped_rr_closure_arg; + +/* The \a on_complete closure passed as part of the pick requires keeping a + * reference to its associated round robin instance. We wrap this closure in + * order to unref the round robin instance upon its invocation */ +static void wrapped_rr_closure(grpc_exec_ctx *exec_ctx, void *arg, + grpc_error *error) { + wrapped_rr_closure_arg *wc_arg = arg; + if (wc_arg->rr_policy != NULL) { + if (grpc_lb_glb_trace) { + gpr_log(GPR_INFO, "Unreffing RR (0x%" PRIxPTR ")", + (intptr_t)wc_arg->rr_policy); + } + GRPC_LB_POLICY_UNREF(exec_ctx, wc_arg->rr_policy, "wrapped_rr_closure"); + } + GPR_ASSERT(wc_arg->wrapped_closure != NULL); + grpc_exec_ctx_sched(exec_ctx, wc_arg->wrapped_closure, error, NULL); + gpr_free(wc_arg->owning_pending_node); +} + +/* Linked list of pending pick requests. It stores all information needed to + * eventually call (Round Robin's) pick() on them. They mainly stay pending + * waiting for the RR policy to be created/updated. + * + * One particularity is the wrapping of the user-provided \a on_complete closure + * (in \a wrapped_on_complete and \a wrapped_on_complete_arg). This is needed in + * order to correctly unref the RR policy instance upon completion of the pick. + * See \a wrapped_rr_closure for details. */ +typedef struct pending_pick { + struct pending_pick *next; + + /* polling entity for the pick()'s async notification */ + grpc_polling_entity *pollent; + + /* the initial metadata for the pick. See grpc_lb_policy_pick() */ + grpc_metadata_batch *initial_metadata; + + /* bitmask passed to pick() and used for selective cancelling. See + * grpc_lb_policy_cancel_picks() */ + uint32_t initial_metadata_flags; + + /* output argument where to store the pick()ed connected subchannel, or NULL + * upon error. */ + grpc_connected_subchannel **target; + + /* a closure wrapping the original on_complete one to be invoked once the + * pick() has completed (regardless of success) */ + grpc_closure wrapped_on_complete; + + /* args for wrapped_on_complete */ + wrapped_rr_closure_arg wrapped_on_complete_arg; +} pending_pick; + +static void add_pending_pick(pending_pick **root, grpc_polling_entity *pollent, + grpc_metadata_batch *initial_metadata, + uint32_t initial_metadata_flags, + grpc_connected_subchannel **target, + grpc_closure *on_complete) { + pending_pick *pp = gpr_malloc(sizeof(*pp)); + memset(pp, 0, sizeof(pending_pick)); + memset(&pp->wrapped_on_complete_arg, 0, sizeof(wrapped_rr_closure_arg)); + pp->next = *root; + pp->pollent = pollent; + pp->target = target; + pp->initial_metadata = initial_metadata; + pp->initial_metadata_flags = initial_metadata_flags; + pp->wrapped_on_complete_arg.wrapped_closure = on_complete; + grpc_closure_init(&pp->wrapped_on_complete, wrapped_rr_closure, + &pp->wrapped_on_complete_arg); + *root = pp; +} + +/* Same as the \a pending_pick struct but for ping operations */ +typedef struct pending_ping { + struct pending_ping *next; + + /* a closure wrapping the original on_complete one to be invoked once the + * ping() has completed (regardless of success) */ + grpc_closure wrapped_notify; + + /* args for wrapped_notify */ + wrapped_rr_closure_arg wrapped_notify_arg; +} pending_ping; + +static void add_pending_ping(pending_ping **root, grpc_closure *notify) { + pending_ping *pping = gpr_malloc(sizeof(*pping)); + memset(pping, 0, sizeof(pending_ping)); + memset(&pping->wrapped_notify_arg, 0, sizeof(wrapped_rr_closure_arg)); + pping->next = *root; + grpc_closure_init(&pping->wrapped_notify, wrapped_rr_closure, + &pping->wrapped_notify_arg); + pping->wrapped_notify_arg.wrapped_closure = notify; + *root = pping; +} + +/* + * glb_lb_policy + */ +typedef struct rr_connectivity_data rr_connectivity_data; +struct lb_client_data; +static const grpc_lb_policy_vtable glb_lb_policy_vtable; +typedef struct glb_lb_policy { + /** base policy: must be first */ + grpc_lb_policy base; + + /** mutex protecting remaining members */ + gpr_mu mu; + + grpc_client_channel_factory *cc_factory; + + /** for communicating with the LB server */ + grpc_channel *lb_channel; + + /** the RR policy to use of the backend servers returned by the LB server */ + grpc_lb_policy *rr_policy; + + bool started_picking; + + /** our connectivity state tracker */ + grpc_connectivity_state_tracker state_tracker; + + /** stores the deserialized response from the LB. May be NULL until one such + * response has arrived. */ + grpc_grpclb_serverlist *serverlist; + + /** list of picks that are waiting on RR's policy connectivity */ + pending_pick *pending_picks; + + /** list of pings that are waiting on RR's policy connectivity */ + pending_ping *pending_pings; + + /** client data associated with the LB server communication */ + struct lb_client_data *lb_client; + + /** for tracking of the RR connectivity */ + rr_connectivity_data *rr_connectivity; + + /* a wrapped (see \a wrapped_rr_closure) on-complete closure for readily + * available RR picks */ + grpc_closure wrapped_on_complete; + + /* arguments for the wrapped_on_complete closure */ + wrapped_rr_closure_arg wc_arg; +} glb_lb_policy; + +/* Keeps track and reacts to changes in connectivity of the RR instance */ +struct rr_connectivity_data { + grpc_closure on_change; + grpc_connectivity_state state; + glb_lb_policy *glb_policy; +}; + +static grpc_lb_policy *create_rr(grpc_exec_ctx *exec_ctx, + const grpc_grpclb_serverlist *serverlist, + glb_lb_policy *glb_policy) { + /* TODO(dgq): support mixed ip version */ + GPR_ASSERT(serverlist != NULL && serverlist->num_servers > 0); + char **host_ports = gpr_malloc(sizeof(char *) * serverlist->num_servers); + for (size_t i = 0; i < serverlist->num_servers; ++i) { + gpr_join_host_port(&host_ports[i], serverlist->servers[i]->ip_address, + serverlist->servers[i]->port); + } + + size_t uri_path_len; + char *concat_ipports = gpr_strjoin_sep( + (const char **)host_ports, serverlist->num_servers, ",", &uri_path_len); + + grpc_lb_policy_args args; + args.client_channel_factory = glb_policy->cc_factory; + args.addresses = gpr_malloc(sizeof(grpc_resolved_addresses)); + args.addresses->naddrs = serverlist->num_servers; + args.addresses->addrs = + gpr_malloc(sizeof(grpc_resolved_address) * args.addresses->naddrs); + size_t out_addrs_idx = 0; + for (size_t i = 0; i < serverlist->num_servers; ++i) { + grpc_uri uri; + struct sockaddr_storage sa; + size_t sa_len; + uri.path = host_ports[i]; + if (parse_ipv4(&uri, &sa, &sa_len)) { /* TODO(dgq): add support for ipv6 */ + memcpy(args.addresses->addrs[out_addrs_idx].addr, &sa, sa_len); + args.addresses->addrs[out_addrs_idx].len = sa_len; + ++out_addrs_idx; + } else { + gpr_log(GPR_ERROR, "Invalid LB service address '%s', ignoring.", + host_ports[i]); + } + } + + grpc_lb_policy *rr = grpc_lb_policy_create(exec_ctx, "round_robin", &args); + + gpr_free(concat_ipports); + for (size_t i = 0; i < serverlist->num_servers; i++) { + gpr_free(host_ports[i]); + } + gpr_free(host_ports); + gpr_free(args.addresses->addrs); + gpr_free(args.addresses); + return rr; +} + +static void rr_handover(grpc_exec_ctx *exec_ctx, glb_lb_policy *glb_policy, + grpc_error *error) { + GRPC_ERROR_REF(error); + glb_policy->rr_policy = + create_rr(exec_ctx, glb_policy->serverlist, glb_policy); + + if (grpc_lb_glb_trace) { + gpr_log(GPR_INFO, "Created RR policy (0x%" PRIxPTR ")", + (intptr_t)glb_policy->rr_policy); + } + GPR_ASSERT(glb_policy->rr_policy != NULL); + glb_policy->rr_connectivity->state = grpc_lb_policy_check_connectivity( + exec_ctx, glb_policy->rr_policy, &error); + grpc_lb_policy_notify_on_state_change( + exec_ctx, glb_policy->rr_policy, &glb_policy->rr_connectivity->state, + &glb_policy->rr_connectivity->on_change); + grpc_connectivity_state_set(exec_ctx, &glb_policy->state_tracker, + glb_policy->rr_connectivity->state, error, + "rr_handover"); + grpc_lb_policy_exit_idle(exec_ctx, glb_policy->rr_policy); + + /* flush pending ops */ + pending_pick *pp; + while ((pp = glb_policy->pending_picks)) { + glb_policy->pending_picks = pp->next; + GRPC_LB_POLICY_REF(glb_policy->rr_policy, "rr_handover_pending_pick"); + pp->wrapped_on_complete_arg.rr_policy = glb_policy->rr_policy; + if (grpc_lb_glb_trace) { + gpr_log(GPR_INFO, "Pending pick about to PICK from 0x%" PRIxPTR "", + (intptr_t)glb_policy->rr_policy); + } + grpc_lb_policy_pick(exec_ctx, glb_policy->rr_policy, pp->pollent, + pp->initial_metadata, pp->initial_metadata_flags, + pp->target, &pp->wrapped_on_complete); + pp->wrapped_on_complete_arg.owning_pending_node = pp; + } + + pending_ping *pping; + while ((pping = glb_policy->pending_pings)) { + glb_policy->pending_pings = pping->next; + GRPC_LB_POLICY_REF(glb_policy->rr_policy, "rr_handover_pending_ping"); + pping->wrapped_notify_arg.rr_policy = glb_policy->rr_policy; + if (grpc_lb_glb_trace) { + gpr_log(GPR_INFO, "Pending ping about to PING from 0x%" PRIxPTR "", + (intptr_t)glb_policy->rr_policy); + } + grpc_lb_policy_ping_one(exec_ctx, glb_policy->rr_policy, + &pping->wrapped_notify); + pping->wrapped_notify_arg.owning_pending_node = pping; + } + GRPC_ERROR_UNREF(error); +} + +static void rr_connectivity_changed(grpc_exec_ctx *exec_ctx, void *arg, + grpc_error *error) { + rr_connectivity_data *rr_conn_data = arg; + glb_lb_policy *glb_policy = rr_conn_data->glb_policy; + if (rr_conn_data->state == GRPC_CHANNEL_SHUTDOWN) { + if (glb_policy->serverlist != NULL) { + /* a RR policy is shutting down but there's a serverlist available -> + * perform a handover */ + rr_handover(exec_ctx, glb_policy, error); + } else { + /* shutting down and no new serverlist available. Bail out. */ + gpr_free(rr_conn_data); + } + } else { + if (error == GRPC_ERROR_NONE) { + /* RR not shutting down. Mimic the RR's policy state */ + grpc_connectivity_state_set(exec_ctx, &glb_policy->state_tracker, + rr_conn_data->state, error, + "rr_connectivity_changed"); + /* resubscribe */ + grpc_lb_policy_notify_on_state_change(exec_ctx, glb_policy->rr_policy, + &rr_conn_data->state, + &rr_conn_data->on_change); + } else { /* error */ + gpr_free(rr_conn_data); + } + } + GRPC_ERROR_UNREF(error); +} + +static grpc_lb_policy *glb_create(grpc_exec_ctx *exec_ctx, + grpc_lb_policy_factory *factory, + grpc_lb_policy_args *args) { + glb_lb_policy *glb_policy = gpr_malloc(sizeof(*glb_policy)); + memset(glb_policy, 0, sizeof(*glb_policy)); + + /* All input addresses in args->addresses come from a resolver that claims + * they are LB services. It's the resolver's responsibility to make sure this + * policy is only instantiated and used in that case. + * + * Create a client channel over them to communicate with a LB service */ + glb_policy->cc_factory = args->client_channel_factory; + GPR_ASSERT(glb_policy->cc_factory != NULL); + if (args->addresses->naddrs == 0) { + return NULL; + } + + /* construct a target from the args->addresses, in the form + * ipvX://ip1:port1,ip2:port2,... + * TODO(dgq): support mixed ip version */ + char **addr_strs = gpr_malloc(sizeof(char *) * args->addresses->naddrs); + addr_strs[0] = + grpc_sockaddr_to_uri((const struct sockaddr *)&args->addresses->addrs[0]); + for (size_t i = 1; i < args->addresses->naddrs; i++) { + GPR_ASSERT(grpc_sockaddr_to_string( + &addr_strs[i], + (const struct sockaddr *)&args->addresses->addrs[i], + true) == 0); + } + size_t uri_path_len; + char *target_uri_str = gpr_strjoin_sep( + (const char **)addr_strs, args->addresses->naddrs, ",", &uri_path_len); + + /* will pick using pick_first */ + glb_policy->lb_channel = grpc_client_channel_factory_create_channel( + exec_ctx, glb_policy->cc_factory, target_uri_str, + GRPC_CLIENT_CHANNEL_TYPE_LOAD_BALANCING, NULL); + + gpr_free(target_uri_str); + for (size_t i = 0; i < args->addresses->naddrs; i++) { + gpr_free(addr_strs[i]); + } + gpr_free(addr_strs); + + if (glb_policy->lb_channel == NULL) { + gpr_free(glb_policy); + return NULL; + } + + rr_connectivity_data *rr_connectivity = + gpr_malloc(sizeof(rr_connectivity_data)); + memset(rr_connectivity, 0, sizeof(rr_connectivity_data)); + grpc_closure_init(&rr_connectivity->on_change, rr_connectivity_changed, + rr_connectivity); + rr_connectivity->glb_policy = glb_policy; + glb_policy->rr_connectivity = rr_connectivity; + + grpc_lb_policy_init(&glb_policy->base, &glb_lb_policy_vtable); + gpr_mu_init(&glb_policy->mu); + grpc_connectivity_state_init(&glb_policy->state_tracker, GRPC_CHANNEL_IDLE, + "grpclb"); + return &glb_policy->base; +} + +static void glb_destroy(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) { + glb_lb_policy *glb_policy = (glb_lb_policy *)pol; + GPR_ASSERT(glb_policy->pending_picks == NULL); + GPR_ASSERT(glb_policy->pending_pings == NULL); + grpc_channel_destroy(glb_policy->lb_channel); + glb_policy->lb_channel = NULL; + grpc_connectivity_state_destroy(exec_ctx, &glb_policy->state_tracker); + if (glb_policy->serverlist != NULL) { + grpc_grpclb_destroy_serverlist(glb_policy->serverlist); + } + gpr_mu_destroy(&glb_policy->mu); + gpr_free(glb_policy); +} + +static void lb_client_data_destroy(struct lb_client_data *lb_client); +static void glb_shutdown(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) { + glb_lb_policy *glb_policy = (glb_lb_policy *)pol; + gpr_mu_lock(&glb_policy->mu); + + pending_pick *pp = glb_policy->pending_picks; + glb_policy->pending_picks = NULL; + pending_ping *pping = glb_policy->pending_pings; + glb_policy->pending_pings = NULL; + gpr_mu_unlock(&glb_policy->mu); + + while (pp != NULL) { + pending_pick *next = pp->next; + *pp->target = NULL; + grpc_exec_ctx_sched(exec_ctx, &pp->wrapped_on_complete, GRPC_ERROR_NONE, + NULL); + gpr_free(pp); + pp = next; + } + + while (pping != NULL) { + pending_ping *next = pping->next; + grpc_exec_ctx_sched(exec_ctx, &pping->wrapped_notify, GRPC_ERROR_NONE, + NULL); + pping = next; + } + + if (glb_policy->rr_policy) { + /* unsubscribe */ + grpc_lb_policy_notify_on_state_change( + exec_ctx, glb_policy->rr_policy, NULL, + &glb_policy->rr_connectivity->on_change); + GRPC_LB_POLICY_UNREF(exec_ctx, glb_policy->rr_policy, "glb_shutdown"); + } + + lb_client_data_destroy(glb_policy->lb_client); + glb_policy->lb_client = NULL; + + grpc_connectivity_state_set( + exec_ctx, &glb_policy->state_tracker, GRPC_CHANNEL_SHUTDOWN, + GRPC_ERROR_CREATE("Channel Shutdown"), "glb_shutdown"); +} + +static void glb_cancel_pick(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol, + grpc_connected_subchannel **target) { + glb_lb_policy *glb_policy = (glb_lb_policy *)pol; + gpr_mu_lock(&glb_policy->mu); + pending_pick *pp = glb_policy->pending_picks; + glb_policy->pending_picks = NULL; + while (pp != NULL) { + pending_pick *next = pp->next; + if (pp->target == target) { + grpc_polling_entity_del_from_pollset_set( + exec_ctx, pp->pollent, glb_policy->base.interested_parties); + *target = NULL; + grpc_exec_ctx_sched(exec_ctx, &pp->wrapped_on_complete, + GRPC_ERROR_CANCELLED, NULL); + gpr_free(pp); + } else { + pp->next = glb_policy->pending_picks; + glb_policy->pending_picks = pp; + } + pp = next; + } + gpr_mu_unlock(&glb_policy->mu); +} + +static grpc_call *lb_client_data_get_call(struct lb_client_data *lb_client); +static void glb_cancel_picks(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol, + uint32_t initial_metadata_flags_mask, + uint32_t initial_metadata_flags_eq) { + glb_lb_policy *glb_policy = (glb_lb_policy *)pol; + gpr_mu_lock(&glb_policy->mu); + if (glb_policy->lb_client != NULL) { + /* cancel the call to the load balancer service, if any */ + grpc_call_cancel(lb_client_data_get_call(glb_policy->lb_client), NULL); + } + pending_pick *pp = glb_policy->pending_picks; + glb_policy->pending_picks = NULL; + while (pp != NULL) { + pending_pick *next = pp->next; + if ((pp->initial_metadata_flags & initial_metadata_flags_mask) == + initial_metadata_flags_eq) { + grpc_polling_entity_del_from_pollset_set( + exec_ctx, pp->pollent, glb_policy->base.interested_parties); + grpc_exec_ctx_sched(exec_ctx, &pp->wrapped_on_complete, + GRPC_ERROR_CANCELLED, NULL); + gpr_free(pp); + } else { + pp->next = glb_policy->pending_picks; + glb_policy->pending_picks = pp; + } + pp = next; + } + gpr_mu_unlock(&glb_policy->mu); +} + +static void query_for_backends(grpc_exec_ctx *exec_ctx, + glb_lb_policy *glb_policy); +static void start_picking(grpc_exec_ctx *exec_ctx, glb_lb_policy *glb_policy) { + glb_policy->started_picking = true; + query_for_backends(exec_ctx, glb_policy); +} + +static void glb_exit_idle(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) { + glb_lb_policy *glb_policy = (glb_lb_policy *)pol; + gpr_mu_lock(&glb_policy->mu); + if (!glb_policy->started_picking) { + start_picking(exec_ctx, glb_policy); + } + gpr_mu_unlock(&glb_policy->mu); +} + +static int glb_pick(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol, + grpc_polling_entity *pollent, + grpc_metadata_batch *initial_metadata, + uint32_t initial_metadata_flags, + grpc_connected_subchannel **target, + grpc_closure *on_complete) { + glb_lb_policy *glb_policy = (glb_lb_policy *)pol; + gpr_mu_lock(&glb_policy->mu); + int r; + + if (glb_policy->rr_policy != NULL) { + if (grpc_lb_glb_trace) { + gpr_log(GPR_INFO, "about to PICK from 0x%" PRIxPTR "", + (intptr_t)glb_policy->rr_policy); + } + GRPC_LB_POLICY_REF(glb_policy->rr_policy, "glb_pick"); + memset(&glb_policy->wc_arg, 0, sizeof(wrapped_rr_closure_arg)); + glb_policy->wc_arg.rr_policy = glb_policy->rr_policy; + glb_policy->wc_arg.wrapped_closure = on_complete; + grpc_closure_init(&glb_policy->wrapped_on_complete, wrapped_rr_closure, + &glb_policy->wc_arg); + r = grpc_lb_policy_pick(exec_ctx, glb_policy->rr_policy, pollent, + initial_metadata, initial_metadata_flags, target, + &glb_policy->wrapped_on_complete); + if (r != 0) { + /* the call to grpc_lb_policy_pick has been sychronous. Unreffing the RR + * policy and notify the original callback */ + glb_policy->wc_arg.wrapped_closure = NULL; + if (grpc_lb_glb_trace) { + gpr_log(GPR_INFO, "Unreffing RR (0x%" PRIxPTR ")", + (intptr_t)glb_policy->wc_arg.rr_policy); + } + GRPC_LB_POLICY_UNREF(exec_ctx, glb_policy->wc_arg.rr_policy, "glb_pick"); + grpc_exec_ctx_sched(exec_ctx, glb_policy->wc_arg.wrapped_closure, + GRPC_ERROR_NONE, NULL); + } + } else { + grpc_polling_entity_add_to_pollset_set(exec_ctx, pollent, + glb_policy->base.interested_parties); + add_pending_pick(&glb_policy->pending_picks, pollent, initial_metadata, + initial_metadata_flags, target, on_complete); + + if (!glb_policy->started_picking) { + start_picking(exec_ctx, glb_policy); + } + r = 0; + } + gpr_mu_unlock(&glb_policy->mu); + return r; +} + +static grpc_connectivity_state glb_check_connectivity( + grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol, + grpc_error **connectivity_error) { + glb_lb_policy *glb_policy = (glb_lb_policy *)pol; + grpc_connectivity_state st; + gpr_mu_lock(&glb_policy->mu); + st = grpc_connectivity_state_check(&glb_policy->state_tracker, + connectivity_error); + gpr_mu_unlock(&glb_policy->mu); + return st; +} + +static void glb_ping_one(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol, + grpc_closure *closure) { + glb_lb_policy *glb_policy = (glb_lb_policy *)pol; + gpr_mu_lock(&glb_policy->mu); + if (glb_policy->rr_policy) { + grpc_lb_policy_ping_one(exec_ctx, glb_policy->rr_policy, closure); + } else { + add_pending_ping(&glb_policy->pending_pings, closure); + if (!glb_policy->started_picking) { + start_picking(exec_ctx, glb_policy); + } + } + gpr_mu_unlock(&glb_policy->mu); +} + +static void glb_notify_on_state_change(grpc_exec_ctx *exec_ctx, + grpc_lb_policy *pol, + grpc_connectivity_state *current, + grpc_closure *notify) { + glb_lb_policy *glb_policy = (glb_lb_policy *)pol; + gpr_mu_lock(&glb_policy->mu); + grpc_connectivity_state_notify_on_state_change( + exec_ctx, &glb_policy->state_tracker, current, notify); + + gpr_mu_unlock(&glb_policy->mu); +} + +/* + * lb_client_data + * + * Used internally for the client call to the LB */ +typedef struct lb_client_data { + gpr_mu mu; + + /* called once initial metadata's been sent */ + grpc_closure md_sent; + + /* called once initial metadata's been received */ + grpc_closure md_rcvd; + + /* called once the LoadBalanceRequest has been sent to the LB server. See + * src/proto/grpc/.../load_balancer.proto */ + grpc_closure req_sent; + + /* A response from the LB server has been received (or error). Process it */ + grpc_closure res_rcvd; + + /* After the client has sent a close to the LB server */ + grpc_closure close_sent; + + /* ... and the status from the LB server has been received */ + grpc_closure srv_status_rcvd; + + grpc_call *lb_call; /* streaming call to the LB server, */ + gpr_timespec deadline; /* for the streaming call to the LB server */ + + grpc_metadata_array initial_metadata_recv; /* initial MD from LB server */ + grpc_metadata_array trailing_metadata_recv; /* trailing MD from LB server */ + + /* what's being sent to the LB server. Note that its value may vary if the LB + * server indicates a redirect. */ + grpc_byte_buffer *request_payload; + + /* response from the LB server, if any. Processed in res_recv_cb() */ + grpc_byte_buffer *response_payload; + + /* the call's status and status detailset in srv_status_rcvd_cb() */ + grpc_status_code status; + char *status_details; + size_t status_details_capacity; + + /* pointer back to the enclosing policy */ + glb_lb_policy *glb_policy; +} lb_client_data; + +static void md_sent_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error); +static void md_recv_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error); +static void req_sent_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error); +static void res_recv_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error); +static void close_sent_cb(grpc_exec_ctx *exec_ctx, void *arg, + grpc_error *error); +static void srv_status_rcvd_cb(grpc_exec_ctx *exec_ctx, void *arg, + grpc_error *error); + +static lb_client_data *lb_client_data_create(glb_lb_policy *glb_policy) { + lb_client_data *lb_client = gpr_malloc(sizeof(lb_client_data)); + memset(lb_client, 0, sizeof(lb_client_data)); + + gpr_mu_init(&lb_client->mu); + grpc_closure_init(&lb_client->md_sent, md_sent_cb, lb_client); + + grpc_closure_init(&lb_client->md_rcvd, md_recv_cb, lb_client); + grpc_closure_init(&lb_client->req_sent, req_sent_cb, lb_client); + grpc_closure_init(&lb_client->res_rcvd, res_recv_cb, lb_client); + grpc_closure_init(&lb_client->close_sent, close_sent_cb, lb_client); + grpc_closure_init(&lb_client->srv_status_rcvd, srv_status_rcvd_cb, lb_client); + + /* TODO(dgq): get the deadline from the client config instead of fabricating + * one here. */ + lb_client->deadline = gpr_time_add(gpr_now(GPR_CLOCK_MONOTONIC), + gpr_time_from_seconds(3, GPR_TIMESPAN)); + + lb_client->lb_call = grpc_channel_create_pollset_set_call( + glb_policy->lb_channel, NULL, GRPC_PROPAGATE_DEFAULTS, + glb_policy->base.interested_parties, "/BalanceLoad", + NULL, /* FIXME(dgq): which "host" value to use? */ + lb_client->deadline, NULL); + + grpc_metadata_array_init(&lb_client->initial_metadata_recv); + grpc_metadata_array_init(&lb_client->trailing_metadata_recv); + + grpc_grpclb_request *request = grpc_grpclb_request_create( + "load.balanced.service.name"); /* FIXME(dgq): get the name of the load + balanced service from the resolver */ + gpr_slice request_payload_slice = grpc_grpclb_request_encode(request); + lb_client->request_payload = + grpc_raw_byte_buffer_create(&request_payload_slice, 1); + gpr_slice_unref(request_payload_slice); + grpc_grpclb_request_destroy(request); + + lb_client->status_details = NULL; + lb_client->status_details_capacity = 0; + lb_client->glb_policy = glb_policy; + return lb_client; +} + +static void lb_client_data_destroy(lb_client_data *lb_client) { + grpc_call_destroy(lb_client->lb_call); + grpc_metadata_array_destroy(&lb_client->initial_metadata_recv); + grpc_metadata_array_destroy(&lb_client->trailing_metadata_recv); + + grpc_byte_buffer_destroy(lb_client->request_payload); + + gpr_free(lb_client->status_details); + gpr_mu_destroy(&lb_client->mu); + gpr_free(lb_client); +} +static grpc_call *lb_client_data_get_call(lb_client_data *lb_client) { + return lb_client->lb_call; +} + +/* + * Auxiliary functions and LB client callbacks. + */ +static void query_for_backends(grpc_exec_ctx *exec_ctx, + glb_lb_policy *glb_policy) { + GPR_ASSERT(glb_policy->lb_channel != NULL); + + glb_policy->lb_client = lb_client_data_create(glb_policy); + grpc_call_error call_error; + grpc_op ops[1]; + memset(ops, 0, sizeof(ops)); + grpc_op *op = ops; + op->op = GRPC_OP_SEND_INITIAL_METADATA; + op->data.send_initial_metadata.count = 0; + op->flags = 0; + op->reserved = NULL; + op++; + call_error = grpc_call_start_batch_and_execute( + exec_ctx, glb_policy->lb_client->lb_call, ops, (size_t)(op - ops), + &glb_policy->lb_client->md_sent); + GPR_ASSERT(GRPC_CALL_OK == call_error); + + op = ops; + op->op = GRPC_OP_RECV_STATUS_ON_CLIENT; + op->data.recv_status_on_client.trailing_metadata = + &glb_policy->lb_client->trailing_metadata_recv; + op->data.recv_status_on_client.status = &glb_policy->lb_client->status; + op->data.recv_status_on_client.status_details = + &glb_policy->lb_client->status_details; + op->data.recv_status_on_client.status_details_capacity = + &glb_policy->lb_client->status_details_capacity; + op->flags = 0; + op->reserved = NULL; + op++; + call_error = grpc_call_start_batch_and_execute( + exec_ctx, glb_policy->lb_client->lb_call, ops, (size_t)(op - ops), + &glb_policy->lb_client->srv_status_rcvd); + GPR_ASSERT(GRPC_CALL_OK == call_error); +} + +static void md_sent_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) { + lb_client_data *lb_client = arg; + GPR_ASSERT(lb_client->lb_call); + grpc_op ops[1]; + memset(ops, 0, sizeof(ops)); + grpc_op *op = ops; + op->op = GRPC_OP_RECV_INITIAL_METADATA; + op->data.recv_initial_metadata = &lb_client->initial_metadata_recv; + op->flags = 0; + op->reserved = NULL; + op++; + grpc_call_error call_error = grpc_call_start_batch_and_execute( + exec_ctx, lb_client->lb_call, ops, (size_t)(op - ops), + &lb_client->md_rcvd); + GPR_ASSERT(GRPC_CALL_OK == call_error); +} + +static void md_recv_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) { + lb_client_data *lb_client = arg; + GPR_ASSERT(lb_client->lb_call); + grpc_op ops[1]; + memset(ops, 0, sizeof(ops)); + grpc_op *op = ops; + + op->op = GRPC_OP_SEND_MESSAGE; + op->data.send_message = lb_client->request_payload; + op->flags = 0; + op->reserved = NULL; + op++; + grpc_call_error call_error = grpc_call_start_batch_and_execute( + exec_ctx, lb_client->lb_call, ops, (size_t)(op - ops), + &lb_client->req_sent); + GPR_ASSERT(GRPC_CALL_OK == call_error); +} + +static void req_sent_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) { + lb_client_data *lb_client = arg; + + grpc_op ops[1]; + memset(ops, 0, sizeof(ops)); + grpc_op *op = ops; + + op->op = GRPC_OP_RECV_MESSAGE; + op->data.recv_message = &lb_client->response_payload; + op->flags = 0; + op->reserved = NULL; + op++; + grpc_call_error call_error = grpc_call_start_batch_and_execute( + exec_ctx, lb_client->lb_call, ops, (size_t)(op - ops), + &lb_client->res_rcvd); + GPR_ASSERT(GRPC_CALL_OK == call_error); +} + +static void res_recv_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) { + lb_client_data *lb_client = arg; + grpc_op ops[2]; + memset(ops, 0, sizeof(ops)); + grpc_op *op = ops; + if (lb_client->response_payload != NULL) { + /* Received data from the LB server. Look inside + * lb_client->response_payload, for + * a serverlist. */ + grpc_byte_buffer_reader bbr; + grpc_byte_buffer_reader_init(&bbr, lb_client->response_payload); + gpr_slice response_slice = grpc_byte_buffer_reader_readall(&bbr); + grpc_byte_buffer_destroy(lb_client->response_payload); + grpc_grpclb_serverlist *serverlist = + grpc_grpclb_response_parse_serverlist(response_slice); + if (serverlist != NULL) { + gpr_slice_unref(response_slice); + if (grpc_lb_glb_trace) { + gpr_log(GPR_INFO, "Serverlist with %zu servers received", + serverlist->num_servers); + } + + /* update serverlist */ + if (serverlist->num_servers > 0) { + if (grpc_grpclb_serverlist_equals(lb_client->glb_policy->serverlist, + serverlist)) { + if (grpc_lb_glb_trace) { + gpr_log(GPR_INFO, + "Incoming server list identical to current, ignoring."); + } + } else { /* new serverlist */ + if (lb_client->glb_policy->serverlist != NULL) { + /* dispose of the old serverlist */ + grpc_grpclb_destroy_serverlist(lb_client->glb_policy->serverlist); + } + /* and update the copy in the glb_lb_policy instance */ + lb_client->glb_policy->serverlist = serverlist; + } + if (lb_client->glb_policy->rr_policy == NULL) { + /* initial "handover", in this case from a null RR policy, meaning + * it'll just create the first RR policy instance */ + rr_handover(exec_ctx, lb_client->glb_policy, error); + } else { + /* unref the RR policy, eventually leading to its substitution with a + * new one constructed from the received serverlist (see + * rr_connectivity_changed) */ + GRPC_LB_POLICY_UNREF(exec_ctx, lb_client->glb_policy->rr_policy, + "serverlist_received"); + } + } else { + if (grpc_lb_glb_trace) { + gpr_log(GPR_INFO, + "Received empty server list. Picks will stay pending until a " + "response with > 0 servers is received"); + } + } + + /* keep listening for serverlist updates */ + op->op = GRPC_OP_RECV_MESSAGE; + op->data.recv_message = &lb_client->response_payload; + op->flags = 0; + op->reserved = NULL; + op++; + const grpc_call_error call_error = grpc_call_start_batch_and_execute( + exec_ctx, lb_client->lb_call, ops, (size_t)(op - ops), + &lb_client->res_rcvd); /* loop */ + GPR_ASSERT(GRPC_CALL_OK == call_error); + return; + } + + GPR_ASSERT(serverlist == NULL); + gpr_log(GPR_ERROR, "Invalid LB response received: '%s'", + gpr_dump_slice(response_slice, GPR_DUMP_ASCII)); + gpr_slice_unref(response_slice); + + /* Disconnect from server returning invalid response. */ + op->op = GRPC_OP_SEND_CLOSE_FROM_CLIENT; + op->flags = 0; + op->reserved = NULL; + op++; + grpc_call_error call_error = grpc_call_start_batch_and_execute( + exec_ctx, lb_client->lb_call, ops, (size_t)(op - ops), + &lb_client->close_sent); + GPR_ASSERT(GRPC_CALL_OK == call_error); + } + /* empty payload: call cancelled by server. Cleanups happening in + * srv_status_rcvd_cb */ +} + +static void close_sent_cb(grpc_exec_ctx *exec_ctx, void *arg, + grpc_error *error) { + if (grpc_lb_glb_trace) { + gpr_log(GPR_INFO, + "Close from LB client sent. Waiting from server status now"); + } +} + +static void srv_status_rcvd_cb(grpc_exec_ctx *exec_ctx, void *arg, + grpc_error *error) { + lb_client_data *lb_client = arg; + if (grpc_lb_glb_trace) { + gpr_log(GPR_INFO, + "status from lb server received. Status = %d, Details = '%s', " + "Capaticy " + "= %zu", + lb_client->status, lb_client->status_details, + lb_client->status_details_capacity); + } + /* TODO(dgq): deal with stream termination properly (fire up another one? fail + * the original call?) */ +} + +/* Code wiring the policy with the rest of the core */ +static const grpc_lb_policy_vtable glb_lb_policy_vtable = { + glb_destroy, glb_shutdown, glb_pick, + glb_cancel_pick, glb_cancel_picks, glb_ping_one, + glb_exit_idle, glb_check_connectivity, glb_notify_on_state_change}; + +static void glb_factory_ref(grpc_lb_policy_factory *factory) {} + +static void glb_factory_unref(grpc_lb_policy_factory *factory) {} + +static const grpc_lb_policy_factory_vtable glb_factory_vtable = { + glb_factory_ref, glb_factory_unref, glb_create, "grpclb"}; + +static grpc_lb_policy_factory glb_lb_policy_factory = {&glb_factory_vtable}; + +grpc_lb_policy_factory *grpc_glb_lb_factory_create() { + return &glb_lb_policy_factory; +} + +/* Plugin registration */ +void grpc_lb_policy_grpclb_init() { + grpc_register_lb_policy(grpc_glb_lb_factory_create()); + grpc_register_tracer("glb", &grpc_lb_glb_trace); +} + +void grpc_lb_policy_grpclb_shutdown() {} |