From 592cf34f91a38a3d16c5b079dd28d89c8c382446 Mon Sep 17 00:00:00 2001 From: Juanli Shen Date: Mon, 4 Dec 2017 20:52:01 -0800 Subject: Add re-resolution into LB policies --- .../lb_policy/pick_first/pick_first.cc | 98 +++++++++++++++------- 1 file changed, 66 insertions(+), 32 deletions(-) (limited to 'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc') diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index 6cfc37e9d1..228a77d9db 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -70,8 +70,9 @@ static void pf_destroy(grpc_exec_ctx* exec_ctx, grpc_lb_policy* pol) { } } -static void shutdown_locked(grpc_exec_ctx* exec_ctx, pick_first_lb_policy* p, - grpc_error* error) { +static void pf_shutdown_locked(grpc_exec_ctx* exec_ctx, grpc_lb_policy* pol) { + pick_first_lb_policy* p = (pick_first_lb_policy*)pol; + grpc_error* error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("Channel shutdown"); if (grpc_lb_pick_first_trace.enabled()) { gpr_log(GPR_DEBUG, "Pick First %p Shutting down", p); } @@ -96,14 +97,11 @@ static void shutdown_locked(grpc_exec_ctx* exec_ctx, pick_first_lb_policy* p, exec_ctx, p->latest_pending_subchannel_list, "pf_shutdown"); p->latest_pending_subchannel_list = nullptr; } + grpc_lb_policy_try_reresolve(exec_ctx, &p->base, &grpc_lb_pick_first_trace, + GRPC_ERROR_CANCELLED); GRPC_ERROR_UNREF(error); } -static void pf_shutdown_locked(grpc_exec_ctx* exec_ctx, grpc_lb_policy* pol) { - shutdown_locked(exec_ctx, (pick_first_lb_policy*)pol, - GRPC_ERROR_CREATE_FROM_STATIC_STRING("Channel shutdown")); -} - static void pf_cancel_pick_locked(grpc_exec_ctx* exec_ctx, grpc_lb_policy* pol, grpc_connected_subchannel** target, grpc_error* error) { @@ -157,10 +155,15 @@ static void start_picking_locked(grpc_exec_ctx* exec_ctx, if (p->subchannel_list != nullptr && p->subchannel_list->num_subchannels > 0) { p->subchannel_list->checking_subchannel = 0; - grpc_lb_subchannel_list_ref_for_connectivity_watch( - p->subchannel_list, "connectivity_watch+start_picking"); - grpc_lb_subchannel_data_start_connectivity_watch( - exec_ctx, &p->subchannel_list->subchannels[0]); + for (size_t i = 0; i < p->subchannel_list->num_subchannels; ++i) { + if (p->subchannel_list->subchannels[i].subchannel != nullptr) { + grpc_lb_subchannel_list_ref_for_connectivity_watch( + p->subchannel_list, "connectivity_watch+start_picking"); + grpc_lb_subchannel_data_start_connectivity_watch( + exec_ctx, &p->subchannel_list->subchannels[i]); + break; + } + } } } @@ -404,6 +407,9 @@ static void pf_connectivity_changed_locked(grpc_exec_ctx* exec_ctx, void* arg, if (sd->curr_connectivity_state != GRPC_CHANNEL_READY && p->latest_pending_subchannel_list != nullptr) { p->selected = nullptr; + grpc_lb_subchannel_data_stop_connectivity_watch(exec_ctx, sd); + grpc_lb_subchannel_list_unref_for_connectivity_watch( + exec_ctx, sd->subchannel_list, "selected_not_ready+switch_to_update"); grpc_lb_subchannel_list_shutdown_and_unref( exec_ctx, p->subchannel_list, "selected_not_ready+switch_to_update"); p->subchannel_list = p->latest_pending_subchannel_list; @@ -412,21 +418,35 @@ static void pf_connectivity_changed_locked(grpc_exec_ctx* exec_ctx, void* arg, exec_ctx, &p->state_tracker, GRPC_CHANNEL_TRANSIENT_FAILURE, GRPC_ERROR_REF(error), "selected_not_ready+switch_to_update"); } else { - if (sd->curr_connectivity_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { - /* if the selected channel goes bad, we're done */ - sd->curr_connectivity_state = GRPC_CHANNEL_SHUTDOWN; + // TODO(juanlishen): we re-resolve when the selected subchannel goes to + // TRANSIENT_FAILURE because we used to shut down in this case before + // re-resolution is introduced. But we need to investigate whether we + // really want to take any action instead of waiting for the selected + // subchannel reconnecting. + if (sd->curr_connectivity_state == GRPC_CHANNEL_SHUTDOWN || + sd->curr_connectivity_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { + // If the selected channel goes bad, request a re-resolution. + grpc_connectivity_state_set(exec_ctx, &p->state_tracker, + GRPC_CHANNEL_IDLE, GRPC_ERROR_NONE, + "selected_changed+reresolve"); + p->started_picking = false; + grpc_lb_policy_try_reresolve( + exec_ctx, &p->base, &grpc_lb_pick_first_trace, GRPC_ERROR_NONE); + } else { + grpc_connectivity_state_set(exec_ctx, &p->state_tracker, + sd->curr_connectivity_state, + GRPC_ERROR_REF(error), "selected_changed"); } - grpc_connectivity_state_set(exec_ctx, &p->state_tracker, - sd->curr_connectivity_state, - GRPC_ERROR_REF(error), "selected_changed"); if (sd->curr_connectivity_state != GRPC_CHANNEL_SHUTDOWN) { // Renew notification. grpc_lb_subchannel_data_start_connectivity_watch(exec_ctx, sd); } else { + p->selected = nullptr; grpc_lb_subchannel_data_stop_connectivity_watch(exec_ctx, sd); grpc_lb_subchannel_list_unref_for_connectivity_watch( exec_ctx, sd->subchannel_list, "pf_selected_shutdown"); - shutdown_locked(exec_ctx, p, GRPC_ERROR_REF(error)); + grpc_lb_subchannel_data_unref_subchannel(exec_ctx, sd, + "pf_selected_shutdown"); } } return; @@ -531,24 +551,37 @@ static void pf_connectivity_changed_locked(grpc_exec_ctx* exec_ctx, void* arg, } while (sd->subchannel == nullptr && sd != original_sd); if (sd == original_sd) { grpc_lb_subchannel_list_unref_for_connectivity_watch( - exec_ctx, sd->subchannel_list, "pf_candidate_shutdown"); - shutdown_locked(exec_ctx, p, - GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( - "Pick first exhausted channels", &error, 1)); - break; - } - if (sd->subchannel_list == p->subchannel_list) { - grpc_connectivity_state_set(exec_ctx, &p->state_tracker, - GRPC_CHANNEL_TRANSIENT_FAILURE, - GRPC_ERROR_REF(error), "subchannel_failed"); + exec_ctx, sd->subchannel_list, "pf_exhausted_subchannels"); + if (sd->subchannel_list == p->subchannel_list) { + grpc_connectivity_state_set(exec_ctx, &p->state_tracker, + GRPC_CHANNEL_IDLE, GRPC_ERROR_NONE, + "exhausted_subchannels+reresolve"); + p->started_picking = false; + grpc_lb_policy_try_reresolve( + exec_ctx, &p->base, &grpc_lb_pick_first_trace, GRPC_ERROR_NONE); + } + } else { + if (sd->subchannel_list == p->subchannel_list) { + grpc_connectivity_state_set( + exec_ctx, &p->state_tracker, GRPC_CHANNEL_TRANSIENT_FAILURE, + GRPC_ERROR_REF(error), "subchannel_failed"); + } + // Reuses the connectivity refs from the previous watch. + grpc_lb_subchannel_data_start_connectivity_watch(exec_ctx, sd); } - // Reuses the connectivity refs from the previous watch. - grpc_lb_subchannel_data_start_connectivity_watch(exec_ctx, sd); - break; } } } +static void pf_set_reresolve_closure_locked( + grpc_exec_ctx* exec_ctx, grpc_lb_policy* policy, + grpc_closure* request_reresolution) { + pick_first_lb_policy* p = (pick_first_lb_policy*)policy; + GPR_ASSERT(!p->shutdown); + GPR_ASSERT(policy->request_reresolution == nullptr); + policy->request_reresolution = request_reresolution; +} + static const grpc_lb_policy_vtable pick_first_lb_policy_vtable = { pf_destroy, pf_shutdown_locked, @@ -559,7 +592,8 @@ static const grpc_lb_policy_vtable pick_first_lb_policy_vtable = { pf_exit_idle_locked, pf_check_connectivity_locked, pf_notify_on_state_change_locked, - pf_update_locked}; + pf_update_locked, + pf_set_reresolve_closure_locked}; static void pick_first_factory_ref(grpc_lb_policy_factory* factory) {} -- cgit v1.2.3