diff options
Diffstat (limited to 'src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.c')
-rw-r--r-- | src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.c | 217 |
1 files changed, 140 insertions, 77 deletions
diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.c b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.c index 8e9d6b0f47..bc40165cfb 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.c +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.c @@ -37,7 +37,8 @@ #include "src/core/lib/transport/connectivity_state.h" #include "src/core/lib/transport/static_metadata.h" -grpc_tracer_flag grpc_lb_round_robin_trace = GRPC_TRACER_INITIALIZER(false); +grpc_tracer_flag grpc_lb_round_robin_trace = + GRPC_TRACER_INITIALIZER(false, "round_robin"); /** List of entities waiting for a pick. * @@ -141,6 +142,21 @@ struct rr_subchannel_list { bool shutting_down; }; +static rr_subchannel_list *rr_subchannel_list_create(round_robin_lb_policy *p, + size_t num_subchannels) { + rr_subchannel_list *subchannel_list = gpr_zalloc(sizeof(*subchannel_list)); + subchannel_list->policy = p; + subchannel_list->subchannels = + gpr_zalloc(sizeof(subchannel_data) * num_subchannels); + subchannel_list->num_subchannels = num_subchannels; + gpr_ref_init(&subchannel_list->refcount, 1); + if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) { + gpr_log(GPR_INFO, "[RR %p] Created subchannel list %p for %lu subchannels", + (void *)p, (void *)subchannel_list, (unsigned long)num_subchannels); + } + return subchannel_list; +} + static void rr_subchannel_list_destroy(grpc_exec_ctx *exec_ctx, rr_subchannel_list *subchannel_list) { GPR_ASSERT(subchannel_list->shutting_down); @@ -158,6 +174,7 @@ static void rr_subchannel_list_destroy(grpc_exec_ctx *exec_ctx, if (sd->user_data != NULL) { GPR_ASSERT(sd->user_data_vtable != NULL); sd->user_data_vtable->destroy(exec_ctx, sd->user_data); + sd->user_data = NULL; } } gpr_free(subchannel_list->subchannels); @@ -169,9 +186,9 @@ static void rr_subchannel_list_ref(rr_subchannel_list *subchannel_list, gpr_ref_non_zero(&subchannel_list->refcount); if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) { const gpr_atm count = gpr_atm_acq_load(&subchannel_list->refcount.count); - gpr_log(GPR_INFO, "[RR %p] subchannel_list %p REF %lu->%lu", + gpr_log(GPR_INFO, "[RR %p] subchannel_list %p REF %lu->%lu (%s)", (void *)subchannel_list->policy, (void *)subchannel_list, - (unsigned long)(count - 1), (unsigned long)count); + (unsigned long)(count - 1), (unsigned long)count, reason); } } @@ -181,9 +198,9 @@ static void rr_subchannel_list_unref(grpc_exec_ctx *exec_ctx, const bool done = gpr_unref(&subchannel_list->refcount); if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) { const gpr_atm count = gpr_atm_acq_load(&subchannel_list->refcount.count); - gpr_log(GPR_INFO, "[RR %p] subchannel_list %p UNREF %lu->%lu", + gpr_log(GPR_INFO, "[RR %p] subchannel_list %p UNREF %lu->%lu (%s)", (void *)subchannel_list->policy, (void *)subchannel_list, - (unsigned long)(count + 1), (unsigned long)count); + (unsigned long)(count + 1), (unsigned long)count, reason); } if (done) { rr_subchannel_list_destroy(exec_ctx, subchannel_list); @@ -192,14 +209,27 @@ static void rr_subchannel_list_unref(grpc_exec_ctx *exec_ctx, /** Mark \a subchannel_list as discarded. Unsubscribes all its subchannels. The * watcher's callback will ultimately unref \a subchannel_list. */ -static void rr_subchannel_list_shutdown(grpc_exec_ctx *exec_ctx, - rr_subchannel_list *subchannel_list, - const char *reason) { +static void rr_subchannel_list_shutdown_and_unref( + grpc_exec_ctx *exec_ctx, rr_subchannel_list *subchannel_list, + const char *reason) { + GPR_ASSERT(!subchannel_list->shutting_down); + if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) { + gpr_log(GPR_DEBUG, "[RR %p] Shutting down subchannel_list %p (%s)", + (void *)subchannel_list->policy, (void *)subchannel_list, reason); + } GPR_ASSERT(!subchannel_list->shutting_down); subchannel_list->shutting_down = true; for (size_t i = 0; i < subchannel_list->num_subchannels; i++) { subchannel_data *sd = &subchannel_list->subchannels[i]; if (sd->subchannel != NULL) { // if subchannel isn't shutdown, unsubscribe. + if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) { + gpr_log( + GPR_DEBUG, + "[RR %p] Unsubscribing from subchannel %p as part of shutting down " + "subchannel_list %p", + (void *)subchannel_list->policy, (void *)sd->subchannel, + (void *)subchannel_list); + } grpc_subchannel_notify_on_state_change(exec_ctx, sd->subchannel, NULL, NULL, &sd->connectivity_changed_closure); @@ -228,13 +258,14 @@ static size_t get_next_ready_subchannel_index_locked( const size_t index = (i + p->last_ready_subchannel_index + 1) % p->subchannel_list->num_subchannels; if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) { - gpr_log(GPR_DEBUG, - "[RR %p] checking subchannel %p, subchannel_list %p, index %lu: " - "state=%d", - (void *)p, - (void *)p->subchannel_list->subchannels[index].subchannel, - (void *)p->subchannel_list, (unsigned long)index, - p->subchannel_list->subchannels[index].curr_connectivity_state); + gpr_log( + GPR_DEBUG, + "[RR %p] checking subchannel %p, subchannel_list %p, index %lu: " + "state=%s", + (void *)p, (void *)p->subchannel_list->subchannels[index].subchannel, + (void *)p->subchannel_list, (unsigned long)index, + grpc_connectivity_state_name( + p->subchannel_list->subchannels[index].curr_connectivity_state)); } if (p->subchannel_list->subchannels[index].curr_connectivity_state == GRPC_CHANNEL_READY) { @@ -274,7 +305,8 @@ static void update_last_ready_subchannel_index_locked(round_robin_lb_policy *p, static void rr_destroy(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) { round_robin_lb_policy *p = (round_robin_lb_policy *)pol; if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) { - gpr_log(GPR_DEBUG, "Destroying Round Robin policy at %p", (void *)pol); + gpr_log(GPR_DEBUG, "[RR %p] Destroying Round Robin policy at %p", + (void *)pol, (void *)pol); } grpc_connectivity_state_destroy(exec_ctx, &p->state_tracker); gpr_free(p); @@ -283,7 +315,8 @@ static void rr_destroy(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) { static void rr_shutdown_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) { round_robin_lb_policy *p = (round_robin_lb_policy *)pol; if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) { - gpr_log(GPR_DEBUG, "Shutting down Round Robin policy at %p", (void *)pol); + gpr_log(GPR_DEBUG, "[RR %p] Shutting down Round Robin policy at %p", + (void *)pol, (void *)pol); } p->shutdown = true; pending_pick *pp; @@ -298,9 +331,18 @@ static void rr_shutdown_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) { grpc_connectivity_state_set( exec_ctx, &p->state_tracker, GRPC_CHANNEL_SHUTDOWN, GRPC_ERROR_CREATE_FROM_STATIC_STRING("Channel Shutdown"), "rr_shutdown"); - rr_subchannel_list_shutdown(exec_ctx, p->subchannel_list, - "sl_shutdown_rr_shutdown"); + const bool latest_is_current = + p->subchannel_list == p->latest_pending_subchannel_list; + rr_subchannel_list_shutdown_and_unref(exec_ctx, p->subchannel_list, + "sl_shutdown_rr_shutdown"); p->subchannel_list = NULL; + if (!latest_is_current && p->latest_pending_subchannel_list != NULL && + !p->latest_pending_subchannel_list->shutting_down) { + rr_subchannel_list_shutdown_and_unref(exec_ctx, + p->latest_pending_subchannel_list, + "sl_shutdown_pending_rr_shutdown"); + p->latest_pending_subchannel_list = NULL; + } } static void rr_cancel_pick_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol, @@ -356,8 +398,8 @@ static void start_picking_locked(grpc_exec_ctx *exec_ctx, p->started_picking = true; for (size_t i = 0; i < p->subchannel_list->num_subchannels; i++) { subchannel_data *sd = &p->subchannel_list->subchannels[i]; - GRPC_LB_POLICY_WEAK_REF(&p->base, "rr_connectivity"); - rr_subchannel_list_ref(sd->subchannel_list, "start_picking"); + GRPC_LB_POLICY_WEAK_REF(&p->base, "start_picking_locked"); + rr_subchannel_list_ref(sd->subchannel_list, "started_picking"); grpc_subchannel_notify_on_state_change( exec_ctx, sd->subchannel, p->base.interested_parties, &sd->pending_connectivity_state_unsafe, @@ -379,7 +421,7 @@ static int rr_pick_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol, grpc_closure *on_complete) { round_robin_lb_policy *p = (round_robin_lb_policy *)pol; if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, "Round Robin %p trying to pick", (void *)pol); + gpr_log(GPR_INFO, "[RR %p] Trying to pick", (void *)pol); } if (p->subchannel_list != NULL) { const size_t next_ready_index = get_next_ready_subchannel_index_locked(p); @@ -395,8 +437,8 @@ static int rr_pick_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol, if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) { gpr_log( GPR_DEBUG, - "[RR %p] PICKED TARGET <-- SUBCHANNEL %p (CONNECTED %p) (SL %p, " - "INDEX %lu)", + "[RR %p] Picked target <-- Subchannel %p (connected %p) (sl %p, " + "index %lu)", (void *)p, (void *)sd->subchannel, (void *)*target, (void *)sd->subchannel_list, (unsigned long)next_ready_index); } @@ -511,38 +553,53 @@ static void rr_connectivity_changed_locked(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) { subchannel_data *sd = arg; round_robin_lb_policy *p = sd->subchannel_list->policy; + if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) { + gpr_log( + GPR_DEBUG, + "[RR %p] connectivity changed for subchannel %p, subchannel_list %p: " + "prev_state=%s new_state=%s p->shutdown=%d " + "sd->subchannel_list->shutting_down=%d error=%s", + (void *)p, (void *)sd->subchannel, (void *)sd->subchannel_list, + grpc_connectivity_state_name(sd->prev_connectivity_state), + grpc_connectivity_state_name(sd->pending_connectivity_state_unsafe), + p->shutdown, sd->subchannel_list->shutting_down, + grpc_error_string(error)); + } // If the policy is shutting down, unref and return. if (p->shutdown) { - rr_subchannel_list_unref(exec_ctx, sd->subchannel_list, "pol_shutdown"); + rr_subchannel_list_unref(exec_ctx, sd->subchannel_list, + "pol_shutdown+started_picking"); GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &p->base, "pol_shutdown"); return; } - if (sd->subchannel_list->shutting_down) { + if (sd->subchannel_list->shutting_down && error == GRPC_ERROR_CANCELLED) { // the subchannel list associated with sd has been discarded. This callback - // corresponds to the unsubscription. - GPR_ASSERT(error == GRPC_ERROR_CANCELLED); - rr_subchannel_list_unref(exec_ctx, sd->subchannel_list, "sl_shutdown"); - GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &p->base, "sl_shutdown"); + // corresponds to the unsubscription. The unrefs correspond to the picking + // ref (start_picking_locked or update_started_picking). + rr_subchannel_list_unref(exec_ctx, sd->subchannel_list, + "sl_shutdown+started_picking"); + GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &p->base, "sl_shutdown+picking"); return; } // Dispose of outdated subchannel lists. if (sd->subchannel_list != p->subchannel_list && sd->subchannel_list != p->latest_pending_subchannel_list) { - // sd belongs to an outdated subchannel_list: get rid of it. - rr_subchannel_list_shutdown(exec_ctx, sd->subchannel_list, "sl_oudated"); + char *reason = NULL; + if (sd->subchannel_list->shutting_down) { + reason = "sl_outdated_straggler"; + rr_subchannel_list_unref(exec_ctx, sd->subchannel_list, reason); + } else { + reason = "sl_outdated"; + rr_subchannel_list_shutdown_and_unref(exec_ctx, sd->subchannel_list, + reason); + } + GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &p->base, reason); return; } // Now that we're inside the combiner, copy the pending connectivity // state (which was set by the connectivity state watcher) to // curr_connectivity_state, which is what we use inside of the combiner. sd->curr_connectivity_state = sd->pending_connectivity_state_unsafe; - if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) { - gpr_log(GPR_DEBUG, - "[RR %p] connectivity changed for subchannel %p: " - "prev_state=%d new_state=%d", - (void *)p, (void *)sd->subchannel, sd->prev_connectivity_state, - sd->curr_connectivity_state); - } // Update state counters and determine new overall state. update_state_counters_locked(sd); sd->prev_connectivity_state = sd->curr_connectivity_state; @@ -556,9 +613,10 @@ static void rr_connectivity_changed_locked(grpc_exec_ctx *exec_ctx, void *arg, if (sd->user_data != NULL) { GPR_ASSERT(sd->user_data_vtable != NULL); sd->user_data_vtable->destroy(exec_ctx, sd->user_data); + sd->user_data = NULL; } if (new_policy_connectivity_state == GRPC_CHANNEL_SHUTDOWN) { - /* the policy is shutting down. Flush all the pending picks... */ + // the policy is shutting down. Flush all the pending picks... pending_pick *pp; while ((pp = p->pending_picks)) { p->pending_picks = pp->next; @@ -567,8 +625,9 @@ static void rr_connectivity_changed_locked(grpc_exec_ctx *exec_ctx, void *arg, gpr_free(pp); } } - /* unref the "rr_connectivity" weak ref from start_picking */ - rr_subchannel_list_unref(exec_ctx, sd->subchannel_list, "sd_shutdown"); + rr_subchannel_list_unref(exec_ctx, sd->subchannel_list, + "sd_shutdown+started_picking"); + // unref the "rr_connectivity_update" weak ref from start_picking. GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &p->base, "rr_connectivity_sd_shutdown"); } else { // sd not in SHUTDOWN @@ -593,10 +652,10 @@ static void rr_connectivity_changed_locked(grpc_exec_ctx *exec_ctx, void *arg, } if (p->subchannel_list != NULL) { // dispose of the current subchannel_list - rr_subchannel_list_shutdown(exec_ctx, p->subchannel_list, - "sl_shutdown_rr_update_connectivity"); + rr_subchannel_list_shutdown_and_unref(exec_ctx, p->subchannel_list, + "sl_phase_out_shutdown"); } - p->subchannel_list = sd->subchannel_list; + p->subchannel_list = p->latest_pending_subchannel_list; p->latest_pending_subchannel_list = NULL; } /* at this point we know there's at least one suitable subchannel. Go @@ -607,8 +666,8 @@ static void rr_connectivity_changed_locked(grpc_exec_ctx *exec_ctx, void *arg, subchannel_data *selected = &p->subchannel_list->subchannels[next_ready_index]; if (p->pending_picks != NULL) { - /* if the selected subchannel is going to be used for the pending - * picks, update the last picked pointer */ + // if the selected subchannel is going to be used for the pending + // picks, update the last picked pointer update_last_ready_subchannel_index_locked(p, next_ready_index); } pending_pick *pp; @@ -622,16 +681,17 @@ static void rr_connectivity_changed_locked(grpc_exec_ctx *exec_ctx, void *arg, } if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) { gpr_log(GPR_DEBUG, - "[RR CONN CHANGED] TARGET <-- SUBCHANNEL %p (INDEX %lu)", - (void *)selected->subchannel, - (unsigned long)next_ready_index); + "[RR %p] Fulfilling pending pick. Target <-- subchannel %p " + "(subchannel_list %p, index %lu)", + (void *)p, (void *)selected->subchannel, + (void *)p->subchannel_list, (unsigned long)next_ready_index); } GRPC_CLOSURE_SCHED(exec_ctx, pp->on_complete, GRPC_ERROR_NONE); gpr_free(pp); } } - /* renew notification: reuses the "rr_connectivity" weak ref on the policy - * as well as the sd->subchannel_list ref. */ + /* renew notification: reuses the "rr_connectivity_update" weak ref on the + * policy as well as the sd->subchannel_list ref. */ grpc_subchannel_notify_on_state_change( exec_ctx, sd->subchannel, p->base.interested_parties, &sd->pending_connectivity_state_unsafe, @@ -689,8 +749,7 @@ static void rr_update_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *policy, } else { // otherwise, keep using the current subchannel list (ignore this update). gpr_log(GPR_ERROR, - "No valid LB addresses channel arg for Round Robin %p update, " - "ignoring.", + "[RR %p] No valid LB addresses channel arg for update, ignoring.", (void *)p); } return; @@ -700,30 +759,32 @@ static void rr_update_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *policy, for (size_t i = 0; i < addresses->num_addresses; i++) { if (!addresses->addresses[i].is_balancer) ++num_addrs; } + rr_subchannel_list *subchannel_list = rr_subchannel_list_create(p, num_addrs); if (num_addrs == 0) { grpc_connectivity_state_set( exec_ctx, &p->state_tracker, GRPC_CHANNEL_TRANSIENT_FAILURE, GRPC_ERROR_CREATE_FROM_STATIC_STRING("Empty update"), "rr_update_empty"); if (p->subchannel_list != NULL) { - rr_subchannel_list_shutdown(exec_ctx, p->subchannel_list, - "sl_shutdown_rr_update"); - p->subchannel_list = NULL; + rr_subchannel_list_shutdown_and_unref(exec_ctx, p->subchannel_list, + "sl_shutdown_empty_update"); } + p->subchannel_list = subchannel_list; // empty list return; } size_t subchannel_index = 0; - rr_subchannel_list *subchannel_list = gpr_zalloc(sizeof(*subchannel_list)); - subchannel_list->policy = p; - subchannel_list->subchannels = - gpr_zalloc(sizeof(subchannel_data) * num_addrs); - subchannel_list->num_subchannels = num_addrs; - gpr_ref_init(&subchannel_list->refcount, 1); - p->latest_pending_subchannel_list = subchannel_list; - if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) { - gpr_log(GPR_DEBUG, "Created subchannel list %p for %lu subchannels", - (void *)subchannel_list, (unsigned long)num_addrs); + if (p->latest_pending_subchannel_list != NULL && p->started_picking) { + if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) { + gpr_log(GPR_DEBUG, + "[RR %p] Shutting down latest pending subchannel list %p, about " + "to be replaced by newer latest %p", + (void *)p, (void *)p->latest_pending_subchannel_list, + (void *)subchannel_list); + } + rr_subchannel_list_shutdown_and_unref( + exec_ctx, p->latest_pending_subchannel_list, "sl_outdated_dont_smash"); } + p->latest_pending_subchannel_list = subchannel_list; grpc_subchannel_args sc_args; /* We need to remove the LB addresses in order to be able to compare the * subchannel keys of subchannels from a different batch of addresses. */ @@ -747,11 +808,12 @@ static void rr_update_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *policy, if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) { char *address_uri = grpc_sockaddr_to_uri(&addresses->addresses[i].address); - gpr_log(GPR_DEBUG, - "index %lu: Created subchannel %p for address uri %s into " - "subchannel_list %p", - (unsigned long)subchannel_index, (void *)subchannel, address_uri, - (void *)subchannel_list); + gpr_log( + GPR_DEBUG, + "[RR %p] index %lu: Created subchannel %p for address uri %s into " + "subchannel_list %p", + (void *)p, (unsigned long)subchannel_index, (void *)subchannel, + address_uri, (void *)subchannel_list); gpr_free(address_uri); } grpc_channel_args_destroy(exec_ctx, new_args); @@ -790,10 +852,11 @@ static void rr_update_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *policy, // The policy isn't picking yet. Save the update for later, disposing of // previous version if any. if (p->subchannel_list != NULL) { - rr_subchannel_list_shutdown(exec_ctx, p->subchannel_list, - "rr_update_before_started_picking"); + rr_subchannel_list_shutdown_and_unref(exec_ctx, p->subchannel_list, + "rr_update_before_started_picking"); } p->subchannel_list = subchannel_list; + p->latest_pending_subchannel_list = NULL; } } @@ -818,12 +881,12 @@ static grpc_lb_policy *round_robin_create(grpc_exec_ctx *exec_ctx, grpc_lb_policy_args *args) { GPR_ASSERT(args->client_channel_factory != NULL); round_robin_lb_policy *p = gpr_zalloc(sizeof(*p)); - rr_update_locked(exec_ctx, &p->base, args); grpc_lb_policy_init(&p->base, &round_robin_lb_policy_vtable, args->combiner); grpc_connectivity_state_init(&p->state_tracker, GRPC_CHANNEL_IDLE, "round_robin"); + rr_update_locked(exec_ctx, &p->base, args); if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) { - gpr_log(GPR_DEBUG, "Created Round Robin %p with %lu subchannels", (void *)p, + gpr_log(GPR_DEBUG, "[RR %p] Created with %lu subchannels", (void *)p, (unsigned long)p->subchannel_list->num_subchannels); } return &p->base; @@ -844,7 +907,7 @@ static grpc_lb_policy_factory *round_robin_lb_factory_create() { void grpc_lb_policy_round_robin_init() { grpc_register_lb_policy(round_robin_lb_factory_create()); - grpc_register_tracer("round_robin", &grpc_lb_round_robin_trace); + grpc_register_tracer(&grpc_lb_round_robin_trace); } void grpc_lb_policy_round_robin_shutdown() {} |