From 10142e465105303d6851c82a6b7683e4fb5af71c Mon Sep 17 00:00:00 2001 From: Juanli Shen Date: Fri, 16 Feb 2018 14:27:59 -0800 Subject: Fix test again --- test/cpp/end2end/grpclb_end2end_test.cc | 138 ++++++++++++++++---------------- 1 file changed, 70 insertions(+), 68 deletions(-) diff --git a/test/cpp/end2end/grpclb_end2end_test.cc b/test/cpp/end2end/grpclb_end2end_test.cc index cc65fe4fdb..96bd93682e 100644 --- a/test/cpp/end2end/grpclb_end2end_test.cc +++ b/test/cpp/end2end/grpclb_end2end_test.cc @@ -187,9 +187,13 @@ class BalancerServiceImpl : public BalancerService { Status BalanceLoad(ServerContext* context, Stream* stream) override { gpr_log(GPR_INFO, "LB[%p]: BalanceLoad", this); LoadBalanceRequest request; - stream->Read(&request); + std::vector responses_and_delays; + + if (!stream->Read(&request)) { + goto done; + } IncreaseRequestCount(); - gpr_log(GPR_INFO, "LB[%p]: recv msg '%s'", this, + gpr_log(GPR_INFO, "LB[%p]: received initial message '%s'", this, request.DebugString().c_str()); // TODO(juanlishen): Initial response should always be the first response. @@ -201,7 +205,6 @@ class BalancerServiceImpl : public BalancerService { stream->Write(initial_response); } - std::vector responses_and_delays; { std::unique_lock lock(mu_); responses_and_delays = responses_and_delays_; @@ -217,14 +220,13 @@ class BalancerServiceImpl : public BalancerService { std::unique_lock lock(mu_); if (shutdown_) goto done; serverlist_cond_.wait(lock, [this] { return serverlist_ready_; }); - serverlist_ready_ = false; } if (client_load_reporting_interval_seconds_ > 0) { request.Clear(); if (stream->Read(&request)) { - gpr_log(GPR_INFO, "LB[%p]: recv client load report msg: '%s'", this, - request.DebugString().c_str()); + gpr_log(GPR_INFO, "LB[%p]: received client load report message '%s'", + this, request.DebugString().c_str()); GPR_ASSERT(request.has_client_stats()); // We need to acquire the lock here in order to prevent the notify_one // below from firing before its corresponding wait is executed. @@ -297,7 +299,7 @@ class BalancerServiceImpl : public BalancerService { void NotifyDoneWithServerlists() { std::lock_guard lock(mu_); serverlist_ready_ = true; - serverlist_cond_.notify_one(); + serverlist_cond_.notify_all(); } private: @@ -1090,26 +1092,26 @@ TEST_F(UpdatesTest, UpdateBalancersDeadUpdate) { EXPECT_EQ(0U, balancer_servers_[2].service_->response_count()); } -TEST_F(UpdatesTest, ReresolveDeadBalancer) { +TEST_F(UpdatesTest, ReresolveDeadBackend) { + ResetStub(500); + // The first resolution contains the addresses of a balancer that never + // responds, and a fallback backend. std::vector addresses; addresses.emplace_back(AddressData{balancer_servers_[0].port_, true, ""}); + addresses.emplace_back(AddressData{backend_servers_[0].port_, false, ""}); SetNextResolution(addresses); + // The re-resolution result will contain the addresses of the same balancer + // and a new fallback backend. addresses.clear(); - addresses.emplace_back(AddressData{balancer_servers_[1].port_, true, ""}); + addresses.emplace_back(AddressData{balancer_servers_[0].port_, true, ""}); + addresses.emplace_back(AddressData{backend_servers_[1].port_, false, ""}); SetNextReresolutionResponse(addresses); - const std::vector first_backend{GetBackendPorts()[0]}; - const std::vector second_backend{GetBackendPorts()[1]}; - - ScheduleResponseForBalancer( - 0, BalancerServiceImpl::BuildResponseForBackends(first_backend, {}), 0); - ScheduleResponseForBalancer( - 1, BalancerServiceImpl::BuildResponseForBackends(second_backend, {}), 0); // Start servers and send 10 RPCs per server. gpr_log(GPR_INFO, "========= BEFORE FIRST BATCH =========="); CheckRpcSendOk(10); gpr_log(GPR_INFO, "========= DONE WITH FIRST BATCH =========="); - // All 10 requests should have gone to the first backend. + // All 10 requests should have gone to the fallback backend. EXPECT_EQ(10U, backend_servers_[0].service_->request_count()); // Kill backend 0. @@ -1117,42 +1119,10 @@ TEST_F(UpdatesTest, ReresolveDeadBalancer) { if (backends_[0]->Shutdown()) backend_servers_[0].Shutdown(); gpr_log(GPR_INFO, "********** KILLED BACKEND 0 *************"); - CheckRpcSendFailure(); - - balancers_[1]->NotifyDoneWithServerlists(); - balancers_[2]->NotifyDoneWithServerlists(); - EXPECT_EQ(0U, balancer_servers_[1].service_->request_count()); - EXPECT_EQ(0U, balancer_servers_[1].service_->response_count()); - EXPECT_EQ(0U, balancer_servers_[2].service_->request_count()); - EXPECT_EQ(0U, balancer_servers_[2].service_->response_count()); - - // Kill balancer 0. - gpr_log(GPR_INFO, "********** ABOUT TO KILL BALANCER 0 *************"); - balancers_[0]->NotifyDoneWithServerlists(); - if (balancers_[0]->Shutdown()) balancer_servers_[0].Shutdown(); - gpr_log(GPR_INFO, "********** KILLED BALANCER 0 *************"); - - balancers_[0]->NotifyDoneWithServerlists(); - balancers_[1]->NotifyDoneWithServerlists(); - balancers_[2]->NotifyDoneWithServerlists(); - // Balancer 0 got a single request. - EXPECT_EQ(1U, balancer_servers_[0].service_->request_count()); - // and sent a single response. - EXPECT_EQ(1U, balancer_servers_[0].service_->response_count()); - // Balancer 1 may have received a request if re-resolution is done quickly - // enough. - EXPECT_GE(balancer_servers_[1].service_->request_count(), 0U); - EXPECT_GE(balancer_servers_[1].service_->response_count(), 0U); - EXPECT_LE(balancer_servers_[1].service_->request_count(), 1U); - EXPECT_LE(balancer_servers_[1].service_->response_count(), 1U); - EXPECT_EQ(0U, balancer_servers_[2].service_->request_count()); - EXPECT_EQ(0U, balancer_servers_[2].service_->response_count()); - // Wait until re-resolution has finished, as signaled by the second backend // receiving a request. WaitForBackend(1); - // This is serviced by the new serverlist. gpr_log(GPR_INFO, "========= BEFORE SECOND BATCH =========="); CheckRpcSendOk(10); gpr_log(GPR_INFO, "========= DONE WITH SECOND BATCH =========="); @@ -1163,33 +1133,43 @@ TEST_F(UpdatesTest, ReresolveDeadBalancer) { balancers_[1]->NotifyDoneWithServerlists(); balancers_[2]->NotifyDoneWithServerlists(); EXPECT_EQ(1U, balancer_servers_[0].service_->request_count()); - EXPECT_EQ(1U, balancer_servers_[0].service_->response_count()); - EXPECT_EQ(1U, balancer_servers_[1].service_->request_count()); - EXPECT_EQ(1U, balancer_servers_[1].service_->response_count()); + EXPECT_EQ(0U, balancer_servers_[0].service_->response_count()); + EXPECT_EQ(0U, balancer_servers_[1].service_->request_count()); + EXPECT_EQ(0U, balancer_servers_[1].service_->response_count()); EXPECT_EQ(0U, balancer_servers_[2].service_->request_count()); EXPECT_EQ(0U, balancer_servers_[2].service_->response_count()); } -TEST_F(UpdatesTest, ReresolveDeadBackend) { - ResetStub(500); - // The first resolution contains the addresses of a balancer that never - // responds, and a fallback backend. +// TODO(juanlishen): Should be removed when the first response is always the +// initial response. Currently, if client load reporting is not enabled, the +// balancer doesn't send initial response. When the backend shuts down, an +// unexpected re-resolution will happen. This test configuration is a workaround +// for test ReresolveDeadBalancer. +class UpdatesWithClientLoadReportingTest : public GrpclbEnd2endTest { + public: + UpdatesWithClientLoadReportingTest() : GrpclbEnd2endTest(4, 3, 2) {} +}; + +TEST_F(UpdatesWithClientLoadReportingTest, ReresolveDeadBalancer) { std::vector addresses; addresses.emplace_back(AddressData{balancer_servers_[0].port_, true, ""}); - addresses.emplace_back(AddressData{backend_servers_[0].port_, false, ""}); SetNextResolution(addresses); - // The re-resolution result will contain the addresses of the same balancer - // and a new fallback backend. addresses.clear(); - addresses.emplace_back(AddressData{balancer_servers_[0].port_, true, ""}); - addresses.emplace_back(AddressData{backend_servers_[1].port_, false, ""}); + addresses.emplace_back(AddressData{balancer_servers_[1].port_, true, ""}); SetNextReresolutionResponse(addresses); + const std::vector first_backend{GetBackendPorts()[0]}; + const std::vector second_backend{GetBackendPorts()[1]}; + + ScheduleResponseForBalancer( + 0, BalancerServiceImpl::BuildResponseForBackends(first_backend, {}), 0); + ScheduleResponseForBalancer( + 1, BalancerServiceImpl::BuildResponseForBackends(second_backend, {}), 0); // Start servers and send 10 RPCs per server. gpr_log(GPR_INFO, "========= BEFORE FIRST BATCH =========="); CheckRpcSendOk(10); gpr_log(GPR_INFO, "========= DONE WITH FIRST BATCH =========="); - // All 10 requests should have gone to the fallback backend. + // All 10 requests should have gone to the first backend. EXPECT_EQ(10U, backend_servers_[0].service_->request_count()); // Kill backend 0. @@ -1197,23 +1177,45 @@ TEST_F(UpdatesTest, ReresolveDeadBackend) { if (backends_[0]->Shutdown()) backend_servers_[0].Shutdown(); gpr_log(GPR_INFO, "********** KILLED BACKEND 0 *************"); + CheckRpcSendFailure(); + + // Balancer 0 got a single request. + EXPECT_EQ(1U, balancer_servers_[0].service_->request_count()); + // and sent a single response. + EXPECT_EQ(1U, balancer_servers_[0].service_->response_count()); + EXPECT_EQ(0U, balancer_servers_[1].service_->request_count()); + EXPECT_EQ(0U, balancer_servers_[1].service_->response_count()); + EXPECT_EQ(0U, balancer_servers_[2].service_->request_count()); + EXPECT_EQ(0U, balancer_servers_[2].service_->response_count()); + + // Kill balancer 0. + gpr_log(GPR_INFO, "********** ABOUT TO KILL BALANCER 0 *************"); + if (balancers_[0]->Shutdown()) balancer_servers_[0].Shutdown(); + gpr_log(GPR_INFO, "********** KILLED BALANCER 0 *************"); + // Wait until re-resolution has finished, as signaled by the second backend // receiving a request. WaitForBackend(1); + // This is serviced by the new serverlist. gpr_log(GPR_INFO, "========= BEFORE SECOND BATCH =========="); CheckRpcSendOk(10); gpr_log(GPR_INFO, "========= DONE WITH SECOND BATCH =========="); // All 10 requests should have gone to the second backend. EXPECT_EQ(10U, backend_servers_[1].service_->request_count()); - balancers_[0]->NotifyDoneWithServerlists(); - balancers_[1]->NotifyDoneWithServerlists(); - balancers_[2]->NotifyDoneWithServerlists(); EXPECT_EQ(1U, balancer_servers_[0].service_->request_count()); - EXPECT_EQ(0U, balancer_servers_[0].service_->response_count()); - EXPECT_EQ(0U, balancer_servers_[1].service_->request_count()); - EXPECT_EQ(0U, balancer_servers_[1].service_->response_count()); + EXPECT_EQ(1U, balancer_servers_[0].service_->response_count()); + // After balancer 0 is killed, we restart an LB call immediately (because we + // disconnect to a previously connected balancer). Although we will cancel + // this call when the re-resolution update is done and another LB call restart + // is needed, this old call may still succeed reaching the LB server if + // re-resolution is slow. So balancer 1 may have received 2 requests and sent + // 2 responses. + EXPECT_GE(balancer_servers_[1].service_->request_count(), 1U); + EXPECT_GE(balancer_servers_[1].service_->response_count(), 1U); + EXPECT_LE(balancer_servers_[1].service_->request_count(), 2U); + EXPECT_LE(balancer_servers_[1].service_->response_count(), 2U); EXPECT_EQ(0U, balancer_servers_[2].service_->request_count()); EXPECT_EQ(0U, balancer_servers_[2].service_->response_count()); } -- cgit v1.2.3