diff options
Diffstat (limited to 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.c')
-rw-r--r-- | src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.c | 504 |
1 files changed, 344 insertions, 160 deletions
diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.c b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.c index c3acc3d9db..a23a56c052 100644 --- a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.c +++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.c @@ -115,6 +115,7 @@ #include "src/core/ext/filters/client_channel/lb_policy_factory.h" #include "src/core/ext/filters/client_channel/lb_policy_registry.h" #include "src/core/ext/filters/client_channel/parse_address.h" +#include "src/core/ext/filters/client_channel/resolver/fake/fake_resolver.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/channel/channel_stack.h" #include "src/core/lib/iomgr/combiner.h" @@ -315,6 +316,9 @@ typedef struct glb_lb_policy { /** for communicating with the LB server */ grpc_channel *lb_channel; + /** response generator to inject address updates into \a lb_channel */ + grpc_fake_resolver_response_generator *response_generator; + /** the RR policy to use of the backend servers returned by the LB server */ grpc_lb_policy *rr_policy; @@ -323,6 +327,9 @@ typedef struct glb_lb_policy { /** our connectivity state tracker */ grpc_connectivity_state_tracker state_tracker; + /** connectivity state of the LB channel */ + grpc_connectivity_state lb_channel_connectivity; + /** stores the deserialized response from the LB. May be NULL until one such * response has arrived. */ grpc_grpclb_serverlist *serverlist; @@ -340,10 +347,27 @@ typedef struct glb_lb_policy { bool shutting_down; + /** are we currently updating lb_call? */ + bool updating_lb_call; + + /** are we currently updating lb_channel? */ + bool updating_lb_channel; + + /** are we already watching the LB channel's connectivity? */ + bool watching_lb_channel; + + /** is \a lb_call_retry_timer active? */ + bool retry_timer_active; + + /** called upon changes to the LB channel's connectivity. */ + grpc_closure lb_channel_on_connectivity_changed; + + /** args from the latest update received while already updating, or NULL */ + grpc_lb_policy_args *pending_update_args; + /************************************************************/ /* client data associated with the LB server communication */ /************************************************************/ - /* Finished sending initial request. */ grpc_closure lb_on_sent_initial_request; @@ -533,10 +557,9 @@ static grpc_lb_addresses *process_serverlist_locked( return lb_addresses; } -/* returns true if the new RR policy should replace the current one, if any */ -static bool update_lb_connectivity_status_locked( +static void update_lb_connectivity_status_locked( grpc_exec_ctx *exec_ctx, glb_lb_policy *glb_policy, - grpc_connectivity_state new_rr_state, grpc_error *new_rr_state_error) { + grpc_connectivity_state rr_state, grpc_error *rr_state_error) { const grpc_connectivity_state curr_glb_state = grpc_connectivity_state_check(&glb_policy->state_tracker); @@ -570,28 +593,26 @@ static bool update_lb_connectivity_status_locked( * (*) This function mustn't be called during shutting down. */ GPR_ASSERT(curr_glb_state != GRPC_CHANNEL_SHUTDOWN); - switch (new_rr_state) { + switch (rr_state) { case GRPC_CHANNEL_TRANSIENT_FAILURE: case GRPC_CHANNEL_SHUTDOWN: - GPR_ASSERT(new_rr_state_error != GRPC_ERROR_NONE); - return false; /* don't replace the RR policy */ + GPR_ASSERT(rr_state_error != GRPC_ERROR_NONE); + break; case GRPC_CHANNEL_INIT: case GRPC_CHANNEL_IDLE: case GRPC_CHANNEL_CONNECTING: case GRPC_CHANNEL_READY: - GPR_ASSERT(new_rr_state_error == GRPC_ERROR_NONE); + GPR_ASSERT(rr_state_error == GRPC_ERROR_NONE); } if (GRPC_TRACER_ON(grpc_lb_glb_trace)) { - gpr_log(GPR_INFO, - "Setting grpclb's state to %s from new RR policy %p state.", - grpc_connectivity_state_name(new_rr_state), - (void *)glb_policy->rr_policy); + gpr_log( + GPR_INFO, "Setting grpclb's state to %s from new RR policy %p state.", + grpc_connectivity_state_name(rr_state), (void *)glb_policy->rr_policy); } - grpc_connectivity_state_set(exec_ctx, &glb_policy->state_tracker, - new_rr_state, GRPC_ERROR_REF(new_rr_state_error), + grpc_connectivity_state_set(exec_ctx, &glb_policy->state_tracker, rr_state, + GRPC_ERROR_REF(rr_state_error), "update_lb_connectivity_status_locked"); - return true; } /* Perform a pick over \a glb_policy->rr_policy. Given that a pick can return @@ -670,45 +691,38 @@ static bool pick_from_internal_rr_locked( return pick_done; } -static grpc_lb_policy *create_rr_locked( - grpc_exec_ctx *exec_ctx, const grpc_grpclb_serverlist *serverlist, - glb_lb_policy *glb_policy) { - GPR_ASSERT(serverlist != NULL && serverlist->num_servers > 0); - - grpc_lb_policy_args args; - memset(&args, 0, sizeof(args)); - args.client_channel_factory = glb_policy->cc_factory; - args.combiner = glb_policy->base.combiner; +static grpc_lb_policy_args *lb_policy_args_create(grpc_exec_ctx *exec_ctx, + glb_lb_policy *glb_policy) { + grpc_lb_policy_args *args = gpr_zalloc(sizeof(*args)); + args->client_channel_factory = glb_policy->cc_factory; + args->combiner = glb_policy->base.combiner; grpc_lb_addresses *addresses = - process_serverlist_locked(exec_ctx, serverlist); - + process_serverlist_locked(exec_ctx, glb_policy->serverlist); // Replace the LB addresses in the channel args that we pass down to // the subchannel. static const char *keys_to_remove[] = {GRPC_ARG_LB_ADDRESSES}; const grpc_arg arg = grpc_lb_addresses_create_channel_arg(addresses); - args.args = grpc_channel_args_copy_and_add_and_remove( + args->args = grpc_channel_args_copy_and_add_and_remove( glb_policy->args, keys_to_remove, GPR_ARRAY_SIZE(keys_to_remove), &arg, 1); - - grpc_lb_policy *rr = grpc_lb_policy_create(exec_ctx, "round_robin", &args); - GPR_ASSERT(rr != NULL); grpc_lb_addresses_destroy(exec_ctx, addresses); - grpc_channel_args_destroy(exec_ctx, args.args); - return rr; + return args; +} + +static void lb_policy_args_destroy(grpc_exec_ctx *exec_ctx, + grpc_lb_policy_args *args) { + grpc_channel_args_destroy(exec_ctx, args->args); + gpr_free(args); } static void glb_rr_connectivity_changed_locked(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error); -/* glb_policy->rr_policy may be NULL (initial handover) */ -static void rr_handover_locked(grpc_exec_ctx *exec_ctx, - glb_lb_policy *glb_policy) { - GPR_ASSERT(glb_policy->serverlist != NULL && - glb_policy->serverlist->num_servers > 0); - - if (glb_policy->shutting_down) return; +static void create_rr_locked(grpc_exec_ctx *exec_ctx, glb_lb_policy *glb_policy, + grpc_lb_policy_args *args) { + GPR_ASSERT(glb_policy->rr_policy == NULL); grpc_lb_policy *new_rr_policy = - create_rr_locked(exec_ctx, glb_policy->serverlist, glb_policy); + grpc_lb_policy_create(exec_ctx, "round_robin", args); if (new_rr_policy == NULL) { gpr_log(GPR_ERROR, "Failure creating a RoundRobin policy for serverlist update with " @@ -719,41 +733,16 @@ static void rr_handover_locked(grpc_exec_ctx *exec_ctx, (void *)glb_policy->rr_policy); return; } - - grpc_error *new_rr_state_error = NULL; - const grpc_connectivity_state new_rr_state = - grpc_lb_policy_check_connectivity_locked(exec_ctx, new_rr_policy, - &new_rr_state_error); - /* Connectivity state is a function of the new RR policy just created */ - const bool replace_old_rr = update_lb_connectivity_status_locked( - exec_ctx, glb_policy, new_rr_state, new_rr_state_error); - - if (!replace_old_rr) { - /* dispose of the new RR policy that won't be used after all */ - GRPC_LB_POLICY_UNREF(exec_ctx, new_rr_policy, "rr_handover_no_replace"); - if (GRPC_TRACER_ON(grpc_lb_glb_trace)) { - gpr_log(GPR_INFO, - "Keeping old RR policy (%p) despite new serverlist: new RR " - "policy was in %s connectivity state.", - (void *)glb_policy->rr_policy, - grpc_connectivity_state_name(new_rr_state)); - } - return; - } - - if (GRPC_TRACER_ON(grpc_lb_glb_trace)) { - gpr_log(GPR_INFO, "Created RR policy (%p) to replace old RR (%p)", - (void *)new_rr_policy, (void *)glb_policy->rr_policy); - } - - if (glb_policy->rr_policy != NULL) { - /* if we are phasing out an existing RR instance, unref it. */ - GRPC_LB_POLICY_UNREF(exec_ctx, glb_policy->rr_policy, "rr_handover"); - } - - /* Finally update the RR policy to the newly created one */ glb_policy->rr_policy = new_rr_policy; + grpc_error *rr_state_error = NULL; + const grpc_connectivity_state rr_state = + grpc_lb_policy_check_connectivity_locked(exec_ctx, glb_policy->rr_policy, + &rr_state_error); + /* Connectivity state is a function of the RR policy updated/created */ + update_lb_connectivity_status_locked(exec_ctx, glb_policy, rr_state, + rr_state_error); + /* Add the gRPC LB's interested_parties pollset_set to that of the newly * created RR policy. This will make the RR policy progress upon activity on * gRPC LB, which in turn is tied to the application's call */ @@ -769,10 +758,10 @@ static void rr_handover_locked(grpc_exec_ctx *exec_ctx, glb_rr_connectivity_changed_locked, rr_connectivity, grpc_combiner_scheduler(glb_policy->base.combiner)); rr_connectivity->glb_policy = glb_policy; - rr_connectivity->state = new_rr_state; + rr_connectivity->state = rr_state; /* Subscribe to changes to the connectivity of the new RR */ - GRPC_LB_POLICY_WEAK_REF(&glb_policy->base, "rr_connectivity_cb"); + GRPC_LB_POLICY_WEAK_REF(&glb_policy->base, "rr_connectivity_sched"); grpc_lb_policy_notify_on_state_change_locked(exec_ctx, glb_policy->rr_policy, &rr_connectivity->state, &rr_connectivity->on_change); @@ -809,6 +798,31 @@ static void rr_handover_locked(grpc_exec_ctx *exec_ctx, } } +/* glb_policy->rr_policy may be NULL (initial handover) */ +static void rr_handover_locked(grpc_exec_ctx *exec_ctx, + glb_lb_policy *glb_policy) { + GPR_ASSERT(glb_policy->serverlist != NULL && + glb_policy->serverlist->num_servers > 0); + + if (glb_policy->shutting_down) return; + + grpc_lb_policy_args *args = lb_policy_args_create(exec_ctx, glb_policy); + if (glb_policy->rr_policy != NULL) { + if (GRPC_TRACER_ON(grpc_lb_glb_trace)) { + gpr_log(GPR_DEBUG, "Updating Round Robin policy (%p)", + (void *)glb_policy->rr_policy); + } + grpc_lb_policy_update_locked(exec_ctx, glb_policy->rr_policy, args); + } else { + create_rr_locked(exec_ctx, glb_policy, args); + if (GRPC_TRACER_ON(grpc_lb_glb_trace)) { + gpr_log(GPR_DEBUG, "Created new Round Robin policy (%p)", + (void *)glb_policy->rr_policy); + } + } + lb_policy_args_destroy(exec_ctx, args); +} + static void glb_rr_connectivity_changed_locked(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) { rr_connectivity_data *rr_connectivity = arg; @@ -854,18 +868,24 @@ static grpc_slice_hash_table_entry targets_info_entry_create( return entry; } -/* Returns the target URI for the LB service whose addresses are in \a - * addresses. Using this URI, a bidirectional streaming channel will be created - * for the reception of load balancing updates. +static int balancer_name_cmp_fn(void *a, void *b) { + const char *a_str = a; + const char *b_str = b; + return strcmp(a_str, b_str); +} + +/* Returns the channel args for the LB channel, used to create a bidirectional + * stream for the reception of load balancing updates. * - * The output argument \a targets_info will be updated to contain a mapping of - * "LB server address" to "balancer name", as reported by the naming system. - * This mapping will be propagated via the channel arguments of the - * aforementioned LB streaming channel, to be used by the security connector for - * secure naming checks. The user is responsible for freeing \a targets_info. */ -static char *get_lb_uri_target_addresses(grpc_exec_ctx *exec_ctx, - const grpc_lb_addresses *addresses, - grpc_slice_hash_table **targets_info) { + * Inputs: + * - \a addresses: corresponding to the balancers. + * - \a response_generator: in order to propagate updates from the resolver + * above the grpclb policy. + * - \a args: other args inherited from the grpclb policy. */ +static grpc_channel_args *build_lb_channel_args( + grpc_exec_ctx *exec_ctx, const grpc_lb_addresses *addresses, + grpc_fake_resolver_response_generator *response_generator, + const grpc_channel_args *args) { size_t num_grpclb_addrs = 0; for (size_t i = 0; i < addresses->num_addresses; ++i) { if (addresses->addresses[i].is_balancer) ++num_grpclb_addrs; @@ -874,53 +894,54 @@ static char *get_lb_uri_target_addresses(grpc_exec_ctx *exec_ctx, * It's the resolver's responsibility to make sure this policy is only * instantiated and used in that case. Otherwise, something has gone wrong. */ GPR_ASSERT(num_grpclb_addrs > 0); - + grpc_lb_addresses *lb_addresses = + grpc_lb_addresses_create(num_grpclb_addrs, NULL); grpc_slice_hash_table_entry *targets_info_entries = - gpr_malloc(sizeof(*targets_info_entries) * num_grpclb_addrs); + gpr_zalloc(sizeof(*targets_info_entries) * num_grpclb_addrs); - /* construct a target ipvX://ip1:port1,ip2:port2,... from the addresses in \a - * addresses */ - /* TODO(dgq): support mixed ip version */ - char **addr_strs = gpr_malloc(sizeof(char *) * num_grpclb_addrs); - size_t addr_index = 0; - - for (size_t i = 0; i < addresses->num_addresses; i++) { + size_t lb_addresses_idx = 0; + for (size_t i = 0; i < addresses->num_addresses; ++i) { + if (!addresses->addresses[i].is_balancer) continue; if (addresses->addresses[i].user_data != NULL) { gpr_log(GPR_ERROR, "This LB policy doesn't support user data. It will be ignored"); } - if (addresses->addresses[i].is_balancer) { - char *addr_str; - GPR_ASSERT(grpc_sockaddr_to_string( - &addr_str, &addresses->addresses[i].address, true) > 0); - targets_info_entries[addr_index] = targets_info_entry_create( - addr_str, addresses->addresses[i].balancer_name); - addr_strs[addr_index++] = addr_str; - } + char *addr_str; + GPR_ASSERT(grpc_sockaddr_to_string( + &addr_str, &addresses->addresses[i].address, true) > 0); + targets_info_entries[lb_addresses_idx] = targets_info_entry_create( + addr_str, addresses->addresses[i].balancer_name); + gpr_free(addr_str); + + grpc_lb_addresses_set_address( + lb_addresses, lb_addresses_idx++, addresses->addresses[i].address.addr, + addresses->addresses[i].address.len, false /* is balancer */, + addresses->addresses[i].balancer_name, NULL /* user data */); } - GPR_ASSERT(addr_index == num_grpclb_addrs); - - size_t uri_path_len; - char *uri_path = gpr_strjoin_sep((const char **)addr_strs, num_grpclb_addrs, - ",", &uri_path_len); - for (size_t i = 0; i < num_grpclb_addrs; i++) gpr_free(addr_strs[i]); - gpr_free(addr_strs); - - char *target_uri_str = NULL; - /* TODO(dgq): Don't assume all addresses will share the scheme of the first - * one */ - gpr_asprintf(&target_uri_str, "%s:%s", - grpc_sockaddr_get_uri_scheme(&addresses->addresses[0].address), - uri_path); - gpr_free(uri_path); - - *targets_info = grpc_slice_hash_table_create( - num_grpclb_addrs, targets_info_entries, destroy_balancer_name); + GPR_ASSERT(num_grpclb_addrs == lb_addresses_idx); + grpc_slice_hash_table *targets_info = + grpc_slice_hash_table_create(num_grpclb_addrs, targets_info_entries, + destroy_balancer_name, balancer_name_cmp_fn); gpr_free(targets_info_entries); - return target_uri_str; + grpc_channel_args *lb_channel_args = + grpc_lb_policy_grpclb_build_lb_channel_args(exec_ctx, targets_info, + response_generator, args); + + grpc_arg lb_channel_addresses_arg = + grpc_lb_addresses_create_channel_arg(lb_addresses); + + grpc_channel_args *result = grpc_channel_args_copy_and_add( + lb_channel_args, &lb_channel_addresses_arg, 1); + grpc_slice_hash_table_unref(exec_ctx, targets_info); + grpc_channel_args_destroy(exec_ctx, lb_channel_args); + grpc_lb_addresses_destroy(exec_ctx, lb_addresses); + return result; } +static void glb_lb_channel_on_connectivity_changed_cb(grpc_exec_ctx *exec_ctx, + void *arg, + grpc_error *error); static grpc_lb_policy *glb_create(grpc_exec_ctx *exec_ctx, grpc_lb_policy_factory *factory, grpc_lb_policy_args *args) { @@ -976,24 +997,31 @@ static grpc_lb_policy *glb_create(grpc_exec_ctx *exec_ctx, glb_policy->args = grpc_channel_args_copy_and_add_and_remove( args->args, args_to_remove, GPR_ARRAY_SIZE(args_to_remove), &new_arg, 1); - grpc_slice_hash_table *targets_info = NULL; /* Create a client channel over them to communicate with a LB service */ - char *lb_service_target_addresses = - get_lb_uri_target_addresses(exec_ctx, addresses, &targets_info); - grpc_channel_args *lb_channel_args = - get_lb_channel_args(exec_ctx, targets_info, args->args); + glb_policy->response_generator = + grpc_fake_resolver_response_generator_create(); + grpc_channel_args *lb_channel_args = build_lb_channel_args( + exec_ctx, addresses, glb_policy->response_generator, args->args); + char *uri_str; + gpr_asprintf(&uri_str, "fake:///%s", glb_policy->server_name); glb_policy->lb_channel = grpc_lb_policy_grpclb_create_lb_channel( - exec_ctx, lb_service_target_addresses, args->client_channel_factory, - lb_channel_args); - grpc_slice_hash_table_unref(exec_ctx, targets_info); + exec_ctx, uri_str, args->client_channel_factory, lb_channel_args); + + /* Propagate initial resolution */ + grpc_fake_resolver_response_generator_set_response( + exec_ctx, glb_policy->response_generator, lb_channel_args); grpc_channel_args_destroy(exec_ctx, lb_channel_args); - gpr_free(lb_service_target_addresses); + gpr_free(uri_str); if (glb_policy->lb_channel == NULL) { gpr_free((void *)glb_policy->server_name); grpc_channel_args_destroy(exec_ctx, glb_policy->args); gpr_free(glb_policy); return NULL; } + + grpc_closure_init(&glb_policy->lb_channel_on_connectivity_changed, + glb_lb_channel_on_connectivity_changed_cb, glb_policy, + grpc_combiner_scheduler(args->combiner, false)); grpc_lb_policy_init(&glb_policy->base, &glb_lb_policy_vtable, args->combiner); grpc_connectivity_state_init(&glb_policy->state_tracker, GRPC_CHANNEL_IDLE, "grpclb"); @@ -1009,12 +1037,15 @@ static void glb_destroy(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) { if (glb_policy->client_stats != NULL) { grpc_grpclb_client_stats_unref(glb_policy->client_stats); } - grpc_channel_destroy(glb_policy->lb_channel); - glb_policy->lb_channel = NULL; grpc_connectivity_state_destroy(exec_ctx, &glb_policy->state_tracker); if (glb_policy->serverlist != NULL) { grpc_grpclb_destroy_serverlist(glb_policy->serverlist); } + grpc_fake_resolver_response_generator_unref(glb_policy->response_generator); + if (glb_policy->pending_update_args != NULL) { + grpc_channel_args_destroy(exec_ctx, glb_policy->pending_update_args->args); + gpr_free(glb_policy->pending_update_args); + } gpr_free(glb_policy); } @@ -1022,16 +1053,6 @@ static void glb_shutdown_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) { glb_lb_policy *glb_policy = (glb_lb_policy *)pol; glb_policy->shutting_down = true; - pending_pick *pp = glb_policy->pending_picks; - glb_policy->pending_picks = NULL; - pending_ping *pping = glb_policy->pending_pings; - glb_policy->pending_pings = NULL; - if (glb_policy->rr_policy) { - GRPC_LB_POLICY_UNREF(exec_ctx, glb_policy->rr_policy, "glb_shutdown"); - } - grpc_connectivity_state_set( - exec_ctx, &glb_policy->state_tracker, GRPC_CHANNEL_SHUTDOWN, - GRPC_ERROR_CREATE_FROM_STATIC_STRING("Channel Shutdown"), "glb_shutdown"); /* We need a copy of the lb_call pointer because we can't cancell the call * while holding glb_policy->mu: lb_on_server_status_received, invoked due to * the cancel, needs to acquire that same lock */ @@ -1045,6 +1066,30 @@ static void glb_shutdown_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) { grpc_call_cancel(lb_call, NULL); /* lb_on_server_status_received will pick up the cancel and clean up */ } + if (glb_policy->retry_timer_active) { + grpc_timer_cancel(exec_ctx, &glb_policy->lb_call_retry_timer); + glb_policy->retry_timer_active = false; + } + + pending_pick *pp = glb_policy->pending_picks; + glb_policy->pending_picks = NULL; + pending_ping *pping = glb_policy->pending_pings; + glb_policy->pending_pings = NULL; + if (glb_policy->rr_policy) { + GRPC_LB_POLICY_UNREF(exec_ctx, glb_policy->rr_policy, "glb_shutdown"); + } + // We destroy the LB channel here because + // glb_lb_channel_on_connectivity_changed_cb needs a valid glb_policy + // instance. Destroying the lb channel in glb_destroy would likely result in + // a callback invocation without a valid glb_policy arg. + if (glb_policy->lb_channel != NULL) { + grpc_channel_destroy(glb_policy->lb_channel); + glb_policy->lb_channel = NULL; + } + grpc_connectivity_state_set( + exec_ctx, &glb_policy->state_tracker, GRPC_CHANNEL_SHUTDOWN, + GRPC_ERROR_CREATE_FROM_STATIC_STRING("Channel Shutdown"), "glb_shutdown"); + while (pp != NULL) { pending_pick *next = pp->next; *pp->target = NULL; @@ -1318,6 +1363,7 @@ static void lb_call_init_locked(grpc_exec_ctx *exec_ctx, glb_lb_policy *glb_policy) { GPR_ASSERT(glb_policy->server_name != NULL); GPR_ASSERT(glb_policy->server_name[0] != '\0'); + GPR_ASSERT(glb_policy->lb_call == NULL); GPR_ASSERT(!glb_policy->shutting_down); /* Note the following LB call progresses every time there's activity in \a @@ -1403,8 +1449,10 @@ static void query_for_backends_locked(grpc_exec_ctx *exec_ctx, lb_call_init_locked(exec_ctx, glb_policy); if (GRPC_TRACER_ON(grpc_lb_glb_trace)) { - gpr_log(GPR_INFO, "Query for backends (grpclb: %p, lb_call: %p)", - (void *)glb_policy, (void *)glb_policy->lb_call); + gpr_log(GPR_INFO, + "Query for backends (grpclb: %p, lb_channel: %p, lb_call: %p)", + (void *)glb_policy, (void *)glb_policy->lb_channel, + (void *)glb_policy->lb_call); } GPR_ASSERT(glb_policy->lb_call != NULL); @@ -1608,8 +1656,8 @@ static void lb_on_response_received_locked(grpc_exec_ctx *exec_ctx, void *arg, static void lb_call_on_retry_timer_locked(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) { glb_lb_policy *glb_policy = arg; - - if (!glb_policy->shutting_down) { + glb_policy->retry_timer_active = false; + if (!glb_policy->shutting_down && error == GRPC_ERROR_NONE) { if (GRPC_TRACER_ON(grpc_lb_glb_trace)) { gpr_log(GPR_INFO, "Restaring call to LB server (grpclb %p)", (void *)glb_policy); @@ -1617,31 +1665,32 @@ static void lb_call_on_retry_timer_locked(grpc_exec_ctx *exec_ctx, void *arg, GPR_ASSERT(glb_policy->lb_call == NULL); query_for_backends_locked(exec_ctx, glb_policy); } - GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &glb_policy->base, - "grpclb_on_retry_timer"); + GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &glb_policy->base, "grpclb_retry_timer"); } static void lb_on_server_status_received_locked(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) { glb_lb_policy *glb_policy = arg; - GPR_ASSERT(glb_policy->lb_call != NULL); - if (GRPC_TRACER_ON(grpc_lb_glb_trace)) { char *status_details = grpc_slice_to_c_string(glb_policy->lb_call_status_details); - gpr_log(GPR_DEBUG, + gpr_log(GPR_INFO, "Status from LB server received. Status = %d, Details = '%s', " - "(call: %p)", + "(call: %p), error %p", glb_policy->lb_call_status, status_details, - (void *)glb_policy->lb_call); + (void *)glb_policy->lb_call, (void *)error); gpr_free(status_details); } - /* We need to perform cleanups no matter what. */ lb_call_destroy_locked(exec_ctx, glb_policy); - - if (!glb_policy->shutting_down) { + if (glb_policy->started_picking && glb_policy->updating_lb_call) { + if (glb_policy->retry_timer_active) { + grpc_timer_cancel(exec_ctx, &glb_policy->lb_call_retry_timer); + } + if (!glb_policy->shutting_down) start_picking_locked(exec_ctx, glb_policy); + glb_policy->updating_lb_call = false; + } else if (!glb_policy->shutting_down) { /* if we aren't shutting down, restart the LB client call after some time */ gpr_timespec now = gpr_now(GPR_CLOCK_MONOTONIC); gpr_timespec next_try = @@ -1651,16 +1700,18 @@ static void lb_on_server_status_received_locked(grpc_exec_ctx *exec_ctx, (void *)glb_policy); gpr_timespec timeout = gpr_time_sub(next_try, now); if (gpr_time_cmp(timeout, gpr_time_0(timeout.clock_type)) > 0) { - gpr_log(GPR_DEBUG, "... retrying in %" PRId64 ".%09d seconds.", + gpr_log(GPR_DEBUG, + "... retry_timer_active in %" PRId64 ".%09d seconds.", timeout.tv_sec, timeout.tv_nsec); } else { - gpr_log(GPR_DEBUG, "... retrying immediately."); + gpr_log(GPR_DEBUG, "... retry_timer_active immediately."); } } GRPC_LB_POLICY_WEAK_REF(&glb_policy->base, "grpclb_retry_timer"); grpc_closure_init(&glb_policy->lb_on_call_retry, lb_call_on_retry_timer_locked, glb_policy, grpc_combiner_scheduler(glb_policy->base.combiner)); + glb_policy->retry_timer_active = true; grpc_timer_init(exec_ctx, &glb_policy->lb_call_retry_timer, next_try, &glb_policy->lb_on_call_retry, now); } @@ -1668,6 +1719,138 @@ static void lb_on_server_status_received_locked(grpc_exec_ctx *exec_ctx, "lb_on_server_status_received"); } +static void glb_update_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *policy, + const grpc_lb_policy_args *args) { + glb_lb_policy *glb_policy = (glb_lb_policy *)policy; + + if (glb_policy->updating_lb_channel) { + if (GRPC_TRACER_ON(grpc_lb_glb_trace)) { + gpr_log(GPR_INFO, + "Update already in progress for grpclb %p. Deferring update.", + (void *)glb_policy); + } + if (glb_policy->pending_update_args != NULL) { + grpc_channel_args_destroy(exec_ctx, + glb_policy->pending_update_args->args); + gpr_free(glb_policy->pending_update_args); + } + glb_policy->pending_update_args = + gpr_zalloc(sizeof(*glb_policy->pending_update_args)); + glb_policy->pending_update_args->client_channel_factory = + args->client_channel_factory; + glb_policy->pending_update_args->args = grpc_channel_args_copy(args->args); + glb_policy->pending_update_args->combiner = args->combiner; + return; + } + + glb_policy->updating_lb_channel = true; + // Propagate update to lb_channel (pick first). + const grpc_arg *arg = + grpc_channel_args_find(args->args, GRPC_ARG_LB_ADDRESSES); + if (arg == NULL || arg->type != GRPC_ARG_POINTER) { + if (glb_policy->lb_channel == NULL) { + // If we don't have a current channel to the LB, go into TRANSIENT + // FAILURE. + grpc_connectivity_state_set( + exec_ctx, &glb_policy->state_tracker, GRPC_CHANNEL_TRANSIENT_FAILURE, + GRPC_ERROR_CREATE_FROM_STATIC_STRING("Missing update in args"), + "glb_update_missing"); + } else { + // otherwise, keep using the current LB channel (ignore this update). + gpr_log(GPR_ERROR, + "No valid LB addresses channel arg for grpclb %p update, " + "ignoring.", + (void *)glb_policy); + } + } + const grpc_lb_addresses *addresses = arg->value.pointer.p; + GPR_ASSERT(glb_policy->lb_channel != NULL); + grpc_channel_args *lb_channel_args = build_lb_channel_args( + exec_ctx, addresses, glb_policy->response_generator, args->args); + /* Propagate updates to the LB channel through the fake resolver */ + grpc_fake_resolver_response_generator_set_response( + exec_ctx, glb_policy->response_generator, lb_channel_args); + grpc_channel_args_destroy(exec_ctx, lb_channel_args); + + if (!glb_policy->watching_lb_channel) { + // Watch the LB channel connectivity for connection. + glb_policy->lb_channel_connectivity = GRPC_CHANNEL_INIT; + grpc_channel_element *client_channel_elem = grpc_channel_stack_last_element( + grpc_channel_get_channel_stack(glb_policy->lb_channel)); + GPR_ASSERT(client_channel_elem->filter == &grpc_client_channel_filter); + glb_policy->watching_lb_channel = true; + GRPC_LB_POLICY_WEAK_REF(&glb_policy->base, "watch_lb_channel_connectivity"); + grpc_client_channel_watch_connectivity_state( + exec_ctx, client_channel_elem, + grpc_polling_entity_create_from_pollset_set( + glb_policy->base.interested_parties), + &glb_policy->lb_channel_connectivity, + &glb_policy->lb_channel_on_connectivity_changed, NULL); + } +} + +// Invoked as part of the update process. It continues watching the LB channel +// until it shuts down or becomes READY. It's invoked even if the LB channel +// stayed READY throughout the update (for example if the update is identical). +static void glb_lb_channel_on_connectivity_changed_cb(grpc_exec_ctx *exec_ctx, + void *arg, + grpc_error *error) { + glb_lb_policy *glb_policy = arg; + if (glb_policy->shutting_down) goto done; + // Re-initialize the lb_call. This should also take care of updating the + // embedded RR policy. Note that the current RR policy, if any, will stay in + // effect until an update from the new lb_call is received. + switch (glb_policy->lb_channel_connectivity) { + case GRPC_CHANNEL_INIT: + case GRPC_CHANNEL_CONNECTING: + case GRPC_CHANNEL_TRANSIENT_FAILURE: { + /* resub. */ + grpc_channel_element *client_channel_elem = + grpc_channel_stack_last_element( + grpc_channel_get_channel_stack(glb_policy->lb_channel)); + GPR_ASSERT(client_channel_elem->filter == &grpc_client_channel_filter); + grpc_client_channel_watch_connectivity_state( + exec_ctx, client_channel_elem, + grpc_polling_entity_create_from_pollset_set( + glb_policy->base.interested_parties), + &glb_policy->lb_channel_connectivity, + &glb_policy->lb_channel_on_connectivity_changed, NULL); + break; + } + case GRPC_CHANNEL_IDLE: + // lb channel inactive (probably shutdown prior to update). Restart lb + // call to kick the lb channel into gear. + GPR_ASSERT(glb_policy->lb_call == NULL); + /* fallthrough */ + case GRPC_CHANNEL_READY: + if (glb_policy->lb_call != NULL) { + glb_policy->updating_lb_channel = false; + glb_policy->updating_lb_call = true; + grpc_call_cancel(glb_policy->lb_call, NULL); + // lb_on_server_status_received will pick up the cancel and reinit + // lb_call. + if (glb_policy->pending_update_args != NULL) { + const grpc_lb_policy_args *args = glb_policy->pending_update_args; + glb_policy->pending_update_args = NULL; + glb_update_locked(exec_ctx, &glb_policy->base, args); + } + } else if (glb_policy->started_picking && !glb_policy->shutting_down) { + if (glb_policy->retry_timer_active) { + grpc_timer_cancel(exec_ctx, &glb_policy->lb_call_retry_timer); + glb_policy->retry_timer_active = false; + } + start_picking_locked(exec_ctx, glb_policy); + } + /* fallthrough */ + case GRPC_CHANNEL_SHUTDOWN: + done: + glb_policy->watching_lb_channel = false; + GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &glb_policy->base, + "watch_lb_channel_connectivity_cb_shutdown"); + break; + } +} + /* Code wiring the policy with the rest of the core */ static const grpc_lb_policy_vtable glb_lb_policy_vtable = { glb_destroy, @@ -1678,7 +1861,8 @@ static const grpc_lb_policy_vtable glb_lb_policy_vtable = { glb_ping_one_locked, glb_exit_idle_locked, glb_check_connectivity_locked, - glb_notify_on_state_change_locked}; + glb_notify_on_state_change_locked, + glb_update_locked}; static void glb_factory_ref(grpc_lb_policy_factory *factory) {} |