diff options
author | Juanli Shen <juanlishen@google.com> | 2018-07-24 20:54:48 -0700 |
---|---|---|
committer | Juanli Shen <juanlishen@google.com> | 2018-07-24 20:54:48 -0700 |
commit | b0e41f6c7da8015935fdda3ce38a728f01be4802 (patch) | |
tree | cf634f05d4a9852542ff451e02d1df050c61a00f /src/core/ext | |
parent | b929339276bdc60c3ff9fee3b3b1032d5c4539fc (diff) |
Fix re-resolution in pick_first
Diffstat (limited to 'src/core/ext')
-rw-r--r-- | src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc | 16 | ||||
-rw-r--r-- | src/core/ext/filters/client_channel/subchannel.cc | 14 |
2 files changed, 15 insertions, 15 deletions
diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index 023281db97..d217dc0e63 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -451,6 +451,7 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked( // latest pending subchannel lists. GPR_ASSERT(subchannel_list() == p->subchannel_list_.get() || subchannel_list() == p->latest_pending_subchannel_list_.get()); + GPR_ASSERT(connectivity_state != GRPC_CHANNEL_SHUTDOWN); // Handle updates for the currently selected subchannel. if (p->selected_ == this) { if (grpc_lb_pick_first_trace.enabled()) { @@ -480,14 +481,12 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked( "update"), "selected_not_ready+switch_to_update"); } else { - // TODO(juanlishen): we re-resolve when the selected subchannel goes to - // TRANSIENT_FAILURE because we used to shut down in this case before - // re-resolution is introduced. But we need to investigate whether we - // really want to take any action instead of waiting for the selected - // subchannel reconnecting. - GPR_ASSERT(connectivity_state != GRPC_CHANNEL_SHUTDOWN); if (connectivity_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { - // If the selected channel goes bad, request a re-resolution. + // If the selected subchannel goes bad, request a re-resolution. We also + // set the channel state to IDLE and reset started_picking_. The reason + // is that if the new state is TRANSIENT_FAILURE due to a GOAWAY + // reception we don't want to connect to the re-resolved backends until + // we leave the IDLE state. grpc_connectivity_state_set(&p->state_tracker_, GRPC_CHANNEL_IDLE, GRPC_ERROR_NONE, "selected_changed+reresolve"); @@ -568,9 +567,10 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked( // Case 1: Only set state to TRANSIENT_FAILURE if we've tried // all subchannels. if (sd->Index() == 0 && subchannel_list() == p->subchannel_list_.get()) { + p->TryReresolutionLocked(&grpc_lb_pick_first_trace, GRPC_ERROR_NONE); grpc_connectivity_state_set( &p->state_tracker_, GRPC_CHANNEL_TRANSIENT_FAILURE, - GRPC_ERROR_REF(error), "connecting_transient_failure"); + GRPC_ERROR_REF(error), "exhausted_subchannels"); } sd->StartConnectivityWatchLocked(); break; diff --git a/src/core/ext/filters/client_channel/subchannel.cc b/src/core/ext/filters/client_channel/subchannel.cc index 93df2aff70..71ef8c518b 100644 --- a/src/core/ext/filters/client_channel/subchannel.cc +++ b/src/core/ext/filters/client_channel/subchannel.cc @@ -402,8 +402,6 @@ static void continue_connect_locked(grpc_subchannel* c) { c->next_attempt_deadline = c->backoff->NextAttemptTime(); args.deadline = std::max(c->next_attempt_deadline, min_deadline); args.channel_args = c->args; - grpc_connectivity_state_set(&c->state_tracker, GRPC_CHANNEL_CONNECTING, - GRPC_ERROR_NONE, "state_change"); grpc_connector_connect(c->connector, &args, &c->connecting_result, &c->on_connected); } @@ -459,27 +457,24 @@ static void maybe_start_connecting_locked(grpc_subchannel* c) { /* Don't try to connect if we're already disconnected */ return; } - if (c->connecting) { /* Already connecting: don't restart */ return; } - if (c->connected_subchannel != nullptr) { /* Already connected: don't restart */ return; } - if (!grpc_connectivity_state_has_watchers(&c->state_tracker)) { /* Nobody is interested in connecting: so don't just yet */ return; } - c->connecting = true; GRPC_SUBCHANNEL_WEAK_REF(c, "connecting"); - if (!c->backoff_begun) { c->backoff_begun = true; + grpc_connectivity_state_set(&c->state_tracker, GRPC_CHANNEL_CONNECTING, + GRPC_ERROR_NONE, "connecting"); continue_connect_locked(c); } else { GPR_ASSERT(!c->have_alarm); @@ -494,6 +489,11 @@ static void maybe_start_connecting_locked(grpc_subchannel* c) { } GRPC_CLOSURE_INIT(&c->on_alarm, on_alarm, c, grpc_schedule_on_exec_ctx); grpc_timer_init(&c->alarm, c->next_attempt_deadline, &c->on_alarm); + // During backoff, we prefer the connectivity state of CONNECTING instead of + // TRANSIENT_FAILURE in order to prevent triggering re-resolution + // continuously in pick_first. + grpc_connectivity_state_set(&c->state_tracker, GRPC_CHANNEL_CONNECTING, + GRPC_ERROR_NONE, "backoff"); } } |