aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Juanli Shen <aspirinsjl@gmail.com>2018-08-17 10:02:28 -0700
committerGravatar GitHub <noreply@github.com>2018-08-17 10:02:28 -0700
commitb8972046b44eb1ccfa5be05df75ac967ab4c4738 (patch)
tree1c4a856d906af8742a6f243425008322efb9f811
parent3bb0cd77697c2d80cc3f14397a6bf2a550499759 (diff)
parentd19fd1c689b1d60cf329331da7fab5d1ca6063cc (diff)
Merge pull request #16306 from AspirinSJL/pf_check_state_before_watch
PF: Check connectivity state before watching
-rw-r--r--src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc112
-rw-r--r--test/cpp/end2end/client_lb_end2end_test.cc55
2 files changed, 128 insertions, 39 deletions
diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc
index bc51903ef5..ab33d93398 100644
--- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc
+++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc
@@ -80,6 +80,11 @@ class PickFirst : public LoadBalancingPolicy {
void ProcessConnectivityChangeLocked(
grpc_connectivity_state connectivity_state, grpc_error* error) override;
+
+ // Processes the connectivity change to READY for an unselected subchannel.
+ void ProcessUnselectedReadyLocked();
+
+ void CheckConnectivityStateAndStartWatchingLocked();
};
class PickFirstSubchannelList
@@ -247,7 +252,8 @@ void PickFirst::StartPickingLocked() {
if (subchannel_list_ != nullptr) {
for (size_t i = 0; i < subchannel_list_->num_subchannels(); ++i) {
if (subchannel_list_->subchannel(i)->subchannel() != nullptr) {
- subchannel_list_->subchannel(i)->StartConnectivityWatchLocked();
+ subchannel_list_->subchannel(i)
+ ->CheckConnectivityStateAndStartWatchingLocked();
break;
}
}
@@ -386,7 +392,8 @@ void PickFirst::UpdateLocked(const grpc_channel_args& args) {
// If we've started picking, start trying to connect to the first
// subchannel in the new list.
if (started_picking_) {
- subchannel_list_->subchannel(0)->StartConnectivityWatchLocked();
+ subchannel_list_->subchannel(0)
+ ->CheckConnectivityStateAndStartWatchingLocked();
}
} else {
// We do have a selected subchannel.
@@ -440,7 +447,7 @@ void PickFirst::UpdateLocked(const grpc_channel_args& args) {
// subchannel in the new list.
if (started_picking_) {
latest_pending_subchannel_list_->subchannel(0)
- ->StartConnectivityWatchLocked();
+ ->CheckConnectivityStateAndStartWatchingLocked();
}
}
}
@@ -519,41 +526,7 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked(
// select in place of the current one.
switch (connectivity_state) {
case GRPC_CHANNEL_READY: {
- // Case 2. Promote p->latest_pending_subchannel_list_ to
- // p->subchannel_list_.
- if (subchannel_list() == p->latest_pending_subchannel_list_.get()) {
- if (grpc_lb_pick_first_trace.enabled()) {
- gpr_log(GPR_INFO,
- "Pick First %p promoting pending subchannel list %p to "
- "replace %p",
- p, p->latest_pending_subchannel_list_.get(),
- p->subchannel_list_.get());
- }
- p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_);
- }
- // Cases 1 and 2.
- grpc_connectivity_state_set(&p->state_tracker_, GRPC_CHANNEL_READY,
- GRPC_ERROR_NONE, "connecting_ready");
- p->selected_ = this;
- if (grpc_lb_pick_first_trace.enabled()) {
- gpr_log(GPR_INFO, "Pick First %p selected subchannel %p", p,
- subchannel());
- }
- // Drop all other subchannels, since we are now connected.
- p->DestroyUnselectedSubchannelsLocked();
- // Update any calls that were waiting for a pick.
- PickState* pick;
- while ((pick = p->pending_picks_)) {
- p->pending_picks_ = pick->next;
- pick->connected_subchannel =
- p->selected_->connected_subchannel()->Ref();
- if (grpc_lb_pick_first_trace.enabled()) {
- gpr_log(GPR_INFO,
- "Servicing pending pick with selected subchannel %p",
- p->selected_->subchannel());
- }
- GRPC_CLOSURE_SCHED(pick->on_complete, GRPC_ERROR_NONE);
- }
+ ProcessUnselectedReadyLocked();
// Renew notification.
RenewConnectivityWatchLocked();
break;
@@ -574,7 +547,7 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked(
&p->state_tracker_, GRPC_CHANNEL_TRANSIENT_FAILURE,
GRPC_ERROR_REF(error), "exhausted_subchannels");
}
- sd->StartConnectivityWatchLocked();
+ sd->CheckConnectivityStateAndStartWatchingLocked();
break;
}
case GRPC_CHANNEL_CONNECTING:
@@ -595,6 +568,67 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked(
GRPC_ERROR_UNREF(error);
}
+void PickFirst::PickFirstSubchannelData::ProcessUnselectedReadyLocked() {
+ PickFirst* p = static_cast<PickFirst*>(subchannel_list()->policy());
+ // If we get here, there are two possible cases:
+ // 1. We do not currently have a selected subchannel, and the update is
+ // for a subchannel in p->subchannel_list_ that we're trying to
+ // connect to. The goal here is to find a subchannel that we can
+ // select.
+ // 2. We do currently have a selected subchannel, and the update is
+ // for a subchannel in p->latest_pending_subchannel_list_. The
+ // goal here is to find a subchannel from the update that we can
+ // select in place of the current one.
+ GPR_ASSERT(subchannel_list() == p->subchannel_list_.get() ||
+ subchannel_list() == p->latest_pending_subchannel_list_.get());
+ // Case 2. Promote p->latest_pending_subchannel_list_ to p->subchannel_list_.
+ if (subchannel_list() == p->latest_pending_subchannel_list_.get()) {
+ if (grpc_lb_pick_first_trace.enabled()) {
+ gpr_log(GPR_INFO,
+ "Pick First %p promoting pending subchannel list %p to "
+ "replace %p",
+ p, p->latest_pending_subchannel_list_.get(),
+ p->subchannel_list_.get());
+ }
+ p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_);
+ }
+ // Cases 1 and 2.
+ grpc_connectivity_state_set(&p->state_tracker_, GRPC_CHANNEL_READY,
+ GRPC_ERROR_NONE, "subchannel_ready");
+ p->selected_ = this;
+ if (grpc_lb_pick_first_trace.enabled()) {
+ gpr_log(GPR_INFO, "Pick First %p selected subchannel %p", p, subchannel());
+ }
+ // Drop all other subchannels, since we are now connected.
+ p->DestroyUnselectedSubchannelsLocked();
+ // Update any calls that were waiting for a pick.
+ PickState* pick;
+ while ((pick = p->pending_picks_)) {
+ p->pending_picks_ = pick->next;
+ pick->connected_subchannel = p->selected_->connected_subchannel()->Ref();
+ if (grpc_lb_pick_first_trace.enabled()) {
+ gpr_log(GPR_INFO, "Servicing pending pick with selected subchannel %p",
+ p->selected_->subchannel());
+ }
+ GRPC_CLOSURE_SCHED(pick->on_complete, GRPC_ERROR_NONE);
+ }
+}
+
+void PickFirst::PickFirstSubchannelData::
+ CheckConnectivityStateAndStartWatchingLocked() {
+ PickFirst* p = static_cast<PickFirst*>(subchannel_list()->policy());
+ grpc_error* error = GRPC_ERROR_NONE;
+ if (p->selected_ != this &&
+ CheckConnectivityStateLocked(&error) == GRPC_CHANNEL_READY) {
+ // We must process the READY subchannel before we start watching it.
+ // Otherwise, we won't know it's READY because we will be waiting for its
+ // connectivity state to change from READY.
+ ProcessUnselectedReadyLocked();
+ }
+ GRPC_ERROR_UNREF(error);
+ StartConnectivityWatchLocked();
+}
+
//
// factory
//
diff --git a/test/cpp/end2end/client_lb_end2end_test.cc b/test/cpp/end2end/client_lb_end2end_test.cc
index 26c241b74a..68219c16dc 100644
--- a/test/cpp/end2end/client_lb_end2end_test.cc
+++ b/test/cpp/end2end/client_lb_end2end_test.cc
@@ -291,6 +291,17 @@ class ClientLbEnd2endTest : public ::testing::Test {
ResetCounters();
}
+ bool WaitForChannelNotReady(Channel* channel, int timeout_seconds = 5) {
+ const gpr_timespec deadline =
+ grpc_timeout_seconds_to_deadline(timeout_seconds);
+ grpc_connectivity_state state;
+ while ((state = channel->GetState(false /* try_to_connect */)) ==
+ GRPC_CHANNEL_READY) {
+ if (!channel->WaitForStateChange(state, deadline)) return false;
+ }
+ return true;
+ }
+
bool SeenAllServers() {
for (const auto& server : servers_) {
if (server->service_.request_count() == 0) return false;
@@ -590,6 +601,50 @@ TEST_F(ClientLbEnd2endTest, PickFirstReresolutionNoSelected) {
EXPECT_EQ("pick_first", channel->GetLoadBalancingPolicyName());
}
+TEST_F(ClientLbEnd2endTest, PickFirstCheckStateBeforeStartWatch) {
+ std::vector<int> ports = {grpc_pick_unused_port_or_die()};
+ StartServers(1, ports);
+ auto channel_1 = BuildChannel("pick_first");
+ auto stub_1 = BuildStub(channel_1);
+ SetNextResolution(ports);
+ gpr_log(GPR_INFO, "****** RESOLUTION SET FOR CHANNEL 1 *******");
+ WaitForServer(stub_1, 0, DEBUG_LOCATION);
+ gpr_log(GPR_INFO, "****** CHANNEL 1 CONNECTED *******");
+ servers_[0]->Shutdown();
+ // Channel 1 will receive a re-resolution containing the same server. It will
+ // create a new subchannel and hold a ref to it.
+ servers_.clear();
+ StartServers(1, ports);
+ gpr_log(GPR_INFO, "****** SERVER RESTARTED *******");
+ auto channel_2 = BuildChannel("pick_first");
+ auto stub_2 = BuildStub(channel_2);
+ // TODO(juanlishen): This resolution result will only be visible to channel 2
+ // since the response generator is only associated with channel 2 now. We
+ // should change the response generator to be able to deliver updates to
+ // multiple channels at once.
+ SetNextResolution(ports);
+ gpr_log(GPR_INFO, "****** RESOLUTION SET FOR CHANNEL 2 *******");
+ WaitForServer(stub_2, 0, DEBUG_LOCATION, true);
+ gpr_log(GPR_INFO, "****** CHANNEL 2 CONNECTED *******");
+ servers_[0]->Shutdown();
+ // Wait until the disconnection has triggered the connectivity notification.
+ // Otherwise, the subchannel may be picked for next call but will fail soon.
+ EXPECT_TRUE(WaitForChannelNotReady(channel_2.get()));
+ // Channel 2 will also receive a re-resolution containing the same server.
+ // Both channels will ref the same subchannel that failed.
+ servers_.clear();
+ StartServers(1, ports);
+ gpr_log(GPR_INFO, "****** SERVER RESTARTED AGAIN *******");
+ gpr_log(GPR_INFO, "****** CHANNEL 2 STARTING A CALL *******");
+ // The first call after the server restart will succeed.
+ CheckRpcSendOk(stub_2, DEBUG_LOCATION);
+ gpr_log(GPR_INFO, "****** CHANNEL 2 FINISHED A CALL *******");
+ // Check LB policy name for the channel.
+ EXPECT_EQ("pick_first", channel_1->GetLoadBalancingPolicyName());
+ // Check LB policy name for the channel.
+ EXPECT_EQ("pick_first", channel_2->GetLoadBalancingPolicyName());
+}
+
TEST_F(ClientLbEnd2endTest, RoundRobin) {
// Start servers and send one RPC per server.
const int kNumServers = 3;