From 5f806d77dc9ddf0b3bec066bf71ff6dc76a6e504 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 4 Dec 2018 09:01:35 -0800 Subject: Fix bug in subchannel backoff reset code. --- test/cpp/end2end/client_lb_end2end_test.cc | 40 ++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'test/cpp/end2end') diff --git a/test/cpp/end2end/client_lb_end2end_test.cc b/test/cpp/end2end/client_lb_end2end_test.cc index b667460cf0..1a7b36cdc0 100644 --- a/test/cpp/end2end/client_lb_end2end_test.cc +++ b/test/cpp/end2end/client_lb_end2end_test.cc @@ -507,6 +507,46 @@ TEST_F(ClientLbEnd2endTest, PickFirstResetConnectionBackoff) { EXPECT_LT(waited_ms, kInitialBackOffMs); } +TEST_F(ClientLbEnd2endTest, + PickFirstResetConnectionBackoffNextAttemptStartsImmediately) { + ChannelArguments args; + constexpr int kInitialBackOffMs = 1000; + args.SetInt(GRPC_ARG_INITIAL_RECONNECT_BACKOFF_MS, kInitialBackOffMs); + const std::vector ports = {grpc_pick_unused_port_or_die()}; + auto channel = BuildChannel("pick_first", args); + auto stub = BuildStub(channel); + SetNextResolution(ports); + // Wait for connect, which should fail ~immediately, because the server + // is not up. + gpr_log(GPR_INFO, "=== INITIAL CONNECTION ATTEMPT"); + EXPECT_FALSE( + channel->WaitForConnected(grpc_timeout_milliseconds_to_deadline(10))); + // Reset connection backoff. + gpr_log(GPR_INFO, "=== RESETTING BACKOFF"); + experimental::ChannelResetConnectionBackoff(channel.get()); + // Trigger a second connection attempt. This should also fail + // ~immediately, but the retry should be scheduled for + // kInitialBackOffMs instead of applying the multiplier. + gpr_log(GPR_INFO, "=== TRIGGERING SECOND CONNECTION ATTEMPT"); + EXPECT_FALSE( + channel->WaitForConnected(grpc_timeout_milliseconds_to_deadline(10))); + // Bring up a server on the chosen port. + gpr_log(GPR_INFO, "=== STARTING BACKEND"); + StartServers(1, ports); + // Wait for connect. Should happen within kInitialBackOffMs. + gpr_log(GPR_INFO, "=== TRIGGERING THIRD CONNECTION ATTEMPT"); + const gpr_timespec t0 = gpr_now(GPR_CLOCK_MONOTONIC); + EXPECT_TRUE(channel->WaitForConnected( + grpc_timeout_milliseconds_to_deadline(kInitialBackOffMs))); + const gpr_timespec t1 = gpr_now(GPR_CLOCK_MONOTONIC); + const grpc_millis waited_ms = gpr_time_to_millis(gpr_time_sub(t1, t0)); + gpr_log(GPR_DEBUG, "Waited %" PRId64 " milliseconds", waited_ms); + // Give an extra 100ms for timing slack. + // (This is still far less than the 1.6x increase we would see if the + // backoff state was not reset properly.) + EXPECT_LT(waited_ms, kInitialBackOffMs + 100); +} + TEST_F(ClientLbEnd2endTest, PickFirstUpdates) { // Start servers and send one RPC per server. const int kNumServers = 3; -- cgit v1.2.3 From 3dd24ea9789fcc455842b9582102f8c261e0bb1c Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 6 Dec 2018 12:27:41 -0800 Subject: code review changes --- src/core/ext/filters/client_channel/subchannel.cc | 8 ++++---- test/cpp/end2end/client_lb_end2end_test.cc | 19 +++++++++++-------- 2 files changed, 15 insertions(+), 12 deletions(-) (limited to 'test/cpp/end2end') diff --git a/src/core/ext/filters/client_channel/subchannel.cc b/src/core/ext/filters/client_channel/subchannel.cc index 4d98b63b49..af55f7710e 100644 --- a/src/core/ext/filters/client_channel/subchannel.cc +++ b/src/core/ext/filters/client_channel/subchannel.cc @@ -153,7 +153,7 @@ struct grpc_subchannel { /** have we started the backoff loop */ bool backoff_begun; // reset_backoff() was called while alarm was pending - bool deferred_reset_backoff; + bool retry_immediately; /** our alarm */ grpc_timer alarm; @@ -709,8 +709,8 @@ static void on_alarm(void* arg, grpc_error* error) { if (c->disconnected) { error = GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING("Disconnected", &error, 1); - } else if (c->deferred_reset_backoff) { - c->deferred_reset_backoff = false; + } else if (c->retry_immediately) { + c->retry_immediately = false; error = GRPC_ERROR_NONE; } else { GRPC_ERROR_REF(error); @@ -889,7 +889,7 @@ void grpc_subchannel_reset_backoff(grpc_subchannel* subchannel) { gpr_mu_lock(&subchannel->mu); subchannel->backoff->Reset(); if (subchannel->have_alarm) { - subchannel->deferred_reset_backoff = true; + subchannel->retry_immediately = true; grpc_timer_cancel(&subchannel->alarm); } else { subchannel->backoff_begun = false; diff --git a/test/cpp/end2end/client_lb_end2end_test.cc b/test/cpp/end2end/client_lb_end2end_test.cc index 1a7b36cdc0..f60f110c5f 100644 --- a/test/cpp/end2end/client_lb_end2end_test.cc +++ b/test/cpp/end2end/client_lb_end2end_test.cc @@ -527,24 +527,27 @@ TEST_F(ClientLbEnd2endTest, // Trigger a second connection attempt. This should also fail // ~immediately, but the retry should be scheduled for // kInitialBackOffMs instead of applying the multiplier. - gpr_log(GPR_INFO, "=== TRIGGERING SECOND CONNECTION ATTEMPT"); + gpr_log(GPR_INFO, "=== POLLING FOR SECOND CONNECTION ATTEMPT"); + const gpr_timespec t0 = gpr_now(GPR_CLOCK_MONOTONIC); EXPECT_FALSE( channel->WaitForConnected(grpc_timeout_milliseconds_to_deadline(10))); // Bring up a server on the chosen port. gpr_log(GPR_INFO, "=== STARTING BACKEND"); StartServers(1, ports); // Wait for connect. Should happen within kInitialBackOffMs. - gpr_log(GPR_INFO, "=== TRIGGERING THIRD CONNECTION ATTEMPT"); - const gpr_timespec t0 = gpr_now(GPR_CLOCK_MONOTONIC); + // Give an extra 100ms to account for the time spent in the second and + // third connection attempts themselves (since what we really want to + // measure is the time between the two). As long as this is less than + // the 1.6x increase we would see if the backoff state was not reset + // properly, the test is still proving that the backoff was reset. + constexpr int kWaitMs = kInitialBackOffMs + 100; + gpr_log(GPR_INFO, "=== POLLING FOR THIRD CONNECTION ATTEMPT"); EXPECT_TRUE(channel->WaitForConnected( - grpc_timeout_milliseconds_to_deadline(kInitialBackOffMs))); + grpc_timeout_milliseconds_to_deadline(kWaitMs))); const gpr_timespec t1 = gpr_now(GPR_CLOCK_MONOTONIC); const grpc_millis waited_ms = gpr_time_to_millis(gpr_time_sub(t1, t0)); gpr_log(GPR_DEBUG, "Waited %" PRId64 " milliseconds", waited_ms); - // Give an extra 100ms for timing slack. - // (This is still far less than the 1.6x increase we would see if the - // backoff state was not reset properly.) - EXPECT_LT(waited_ms, kInitialBackOffMs + 100); + EXPECT_LT(waited_ms, kWaitMs); } TEST_F(ClientLbEnd2endTest, PickFirstUpdates) { -- cgit v1.2.3 From a6a21d1c64b610805c42ca2218d6d95313cb5329 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Fri, 7 Dec 2018 08:38:29 -0800 Subject: more code review changes --- test/cpp/end2end/client_lb_end2end_test.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'test/cpp/end2end') diff --git a/test/cpp/end2end/client_lb_end2end_test.cc b/test/cpp/end2end/client_lb_end2end_test.cc index f60f110c5f..40b657f105 100644 --- a/test/cpp/end2end/client_lb_end2end_test.cc +++ b/test/cpp/end2end/client_lb_end2end_test.cc @@ -522,13 +522,15 @@ TEST_F(ClientLbEnd2endTest, EXPECT_FALSE( channel->WaitForConnected(grpc_timeout_milliseconds_to_deadline(10))); // Reset connection backoff. + // Note that the time at which the third attempt will be started is + // actually computed at this point, so we record the start time here. gpr_log(GPR_INFO, "=== RESETTING BACKOFF"); + const gpr_timespec t0 = gpr_now(GPR_CLOCK_MONOTONIC); experimental::ChannelResetConnectionBackoff(channel.get()); // Trigger a second connection attempt. This should also fail // ~immediately, but the retry should be scheduled for // kInitialBackOffMs instead of applying the multiplier. gpr_log(GPR_INFO, "=== POLLING FOR SECOND CONNECTION ATTEMPT"); - const gpr_timespec t0 = gpr_now(GPR_CLOCK_MONOTONIC); EXPECT_FALSE( channel->WaitForConnected(grpc_timeout_milliseconds_to_deadline(10))); // Bring up a server on the chosen port. -- cgit v1.2.3