diff options
author | Mark D. Roth <roth@google.com> | 2018-10-24 08:40:10 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-10-24 08:40:10 -0700 |
commit | 55906e4d231a886060b1917062be2b1d2cbd1497 (patch) | |
tree | 129089bd4ebc435505ba50143bbab56a486368d3 | |
parent | 17f100063b6dcb9026ca7d593835cb80a41c55d2 (diff) | |
parent | b6059e2e86e5ad060d9323293220d351770d96b9 (diff) |
Merge pull request #16912 from markdroth/fail_rpcs_on_transient_failure
Fail wait_for_ready=false RPCs when channel is in TRANSIENT_FAILURE.
-rw-r--r-- | src/core/ext/filters/client_channel/client_channel.cc | 34 | ||||
-rw-r--r-- | test/cpp/end2end/client_lb_end2end_test.cc | 18 | ||||
-rw-r--r-- | test/cpp/end2end/grpclb_end2end_test.cc | 8 |
3 files changed, 49 insertions, 11 deletions
diff --git a/src/core/ext/filters/client_channel/client_channel.cc b/src/core/ext/filters/client_channel/client_channel.cc index daf1b89b09..91894689c3 100644 --- a/src/core/ext/filters/client_channel/client_channel.cc +++ b/src/core/ext/filters/client_channel/client_channel.cc @@ -2951,6 +2951,27 @@ static void apply_service_config_to_call_locked(grpc_call_element* elem) { } } +// If the channel is in TRANSIENT_FAILURE and the call is not +// wait_for_ready=true, fails the call and returns true. +static bool fail_call_if_in_transient_failure(grpc_call_element* elem) { + channel_data* chand = static_cast<channel_data*>(elem->channel_data); + call_data* calld = static_cast<call_data*>(elem->call_data); + grpc_transport_stream_op_batch* batch = calld->pending_batches[0].batch; + if (grpc_connectivity_state_check(&chand->state_tracker) == + GRPC_CHANNEL_TRANSIENT_FAILURE && + (batch->payload->send_initial_metadata.send_initial_metadata_flags & + GRPC_INITIAL_METADATA_WAIT_FOR_READY) == 0) { + pending_batches_fail( + elem, + grpc_error_set_int(GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "channel is in state TRANSIENT_FAILURE"), + GRPC_ERROR_INT_GRPC_STATUS, GRPC_STATUS_UNAVAILABLE), + true /* yield_call_combiner */); + return true; + } + return false; +} + // Invoked once resolver results are available. static void process_service_config_and_start_lb_pick_locked( grpc_call_element* elem) { @@ -2958,6 +2979,9 @@ static void process_service_config_and_start_lb_pick_locked( // Only get service config data on the first attempt. if (GPR_LIKELY(calld->num_attempts_completed == 0)) { apply_service_config_to_call_locked(elem); + // Check this after applying service config, since it may have + // affected the call's wait_for_ready value. + if (fail_call_if_in_transient_failure(elem)) return; } // Start LB pick. grpc_core::LbPicker::StartLocked(elem); @@ -3127,6 +3151,16 @@ static void start_pick_locked(void* arg, grpc_error* ignored) { // We do not yet have an LB policy, so wait for a resolver result. if (GPR_UNLIKELY(!chand->started_resolving)) { start_resolving_locked(chand); + } else { + // Normally, we want to do this check in + // process_service_config_and_start_lb_pick_locked(), so that we + // can honor the wait_for_ready setting in the service config. + // However, if the channel is in TRANSIENT_FAILURE at this point, that + // means that the resolver has returned a failure, so we're not going + // to get a service config right away. In that case, we fail the + // call now based on the wait_for_ready value passed in from the + // application. + if (fail_call_if_in_transient_failure(elem)) return; } // Create a new waiter, which will delete itself when done. grpc_core::New<grpc_core::ResolverResultWaiter>(elem); diff --git a/test/cpp/end2end/client_lb_end2end_test.cc b/test/cpp/end2end/client_lb_end2end_test.cc index a9d68ab058..2110da84ee 100644 --- a/test/cpp/end2end/client_lb_end2end_test.cc +++ b/test/cpp/end2end/client_lb_end2end_test.cc @@ -212,13 +212,14 @@ class ClientLbEnd2endTest : public ::testing::Test { bool SendRpc( const std::unique_ptr<grpc::testing::EchoTestService::Stub>& stub, EchoResponse* response = nullptr, int timeout_ms = 1000, - Status* result = nullptr) { + Status* result = nullptr, bool wait_for_ready = false) { const bool local_response = (response == nullptr); if (local_response) response = new EchoResponse; EchoRequest request; request.set_message(kRequestMessage_); ClientContext context; context.set_deadline(grpc_timeout_milliseconds_to_deadline(timeout_ms)); + if (wait_for_ready) context.set_wait_for_ready(true); Status status = stub->Echo(&context, request, response); if (result != nullptr) *result = status; if (local_response) delete response; @@ -227,10 +228,11 @@ class ClientLbEnd2endTest : public ::testing::Test { void CheckRpcSendOk( const std::unique_ptr<grpc::testing::EchoTestService::Stub>& stub, - const grpc_core::DebugLocation& location) { + const grpc_core::DebugLocation& location, bool wait_for_ready = false) { EchoResponse response; Status status; - const bool success = SendRpc(stub, &response, 2000, &status); + const bool success = + SendRpc(stub, &response, 2000, &status, wait_for_ready); ASSERT_TRUE(success) << "From " << location.file() << ":" << location.line() << "\n" << "Error: " << status.error_message() << " " @@ -301,7 +303,7 @@ class ClientLbEnd2endTest : public ::testing::Test { if (ignore_failure) { SendRpc(stub); } else { - CheckRpcSendOk(stub, location); + CheckRpcSendOk(stub, location, true); } } while (servers_[server_idx]->service_.request_count() == 0); ResetCounters(); @@ -506,7 +508,7 @@ TEST_F(ClientLbEnd2endTest, PickFirstUpdates) { do { channel_state = channel->GetState(true /* try to connect */); } while (channel_state == GRPC_CHANNEL_READY); - GPR_ASSERT(channel_state != GRPC_CHANNEL_READY); + ASSERT_NE(channel_state, GRPC_CHANNEL_READY); servers_[0]->service_.ResetCounters(); // Next update introduces servers_[1], making the channel recover. @@ -830,7 +832,7 @@ TEST_F(ClientLbEnd2endTest, RoundRobinUpdates) { do { channel_state = channel->GetState(true /* try to connect */); } while (channel_state == GRPC_CHANNEL_READY); - GPR_ASSERT(channel_state != GRPC_CHANNEL_READY); + ASSERT_NE(channel_state, GRPC_CHANNEL_READY); servers_[0]->service_.ResetCounters(); // Next update introduces servers_[1], making the channel recover. @@ -839,7 +841,7 @@ TEST_F(ClientLbEnd2endTest, RoundRobinUpdates) { SetNextResolution(ports); WaitForServer(stub, 1, DEBUG_LOCATION); channel_state = channel->GetState(false /* try to connect */); - GPR_ASSERT(channel_state == GRPC_CHANNEL_READY); + ASSERT_EQ(channel_state, GRPC_CHANNEL_READY); // Check LB policy name for the channel. EXPECT_EQ("round_robin", channel->GetLoadBalancingPolicyName()); @@ -952,7 +954,7 @@ TEST_F(ClientLbEnd2endTest, RoundRobinReresolve) { if (SendRpc(stub)) break; now = gpr_now(GPR_CLOCK_MONOTONIC); } - GPR_ASSERT(gpr_time_cmp(deadline, now) > 0); + ASSERT_GT(gpr_time_cmp(deadline, now), 0); } TEST_F(ClientLbEnd2endTest, RoundRobinSingleReconnect) { diff --git a/test/cpp/end2end/grpclb_end2end_test.cc b/test/cpp/end2end/grpclb_end2end_test.cc index b69b861fcf..6ce0696114 100644 --- a/test/cpp/end2end/grpclb_end2end_test.cc +++ b/test/cpp/end2end/grpclb_end2end_test.cc @@ -539,13 +539,15 @@ class GrpclbEnd2endTest : public ::testing::Test { balancers_.at(i)->add_response(response, delay_ms); } - Status SendRpc(EchoResponse* response = nullptr, int timeout_ms = 1000) { + Status SendRpc(EchoResponse* response = nullptr, int timeout_ms = 1000, + bool wait_for_ready = false) { const bool local_response = (response == nullptr); if (local_response) response = new EchoResponse; EchoRequest request; request.set_message(kRequestMessage_); ClientContext context; context.set_deadline(grpc_timeout_milliseconds_to_deadline(timeout_ms)); + if (wait_for_ready) context.set_wait_for_ready(true); Status status = stub_->Echo(&context, request, response); if (local_response) delete response; return status; @@ -1366,7 +1368,7 @@ TEST_F(SingleBalancerTest, DropAllFirst) { {}, {{"rate_limiting", num_of_drop_by_rate_limiting_addresses}, {"load_balancing", num_of_drop_by_load_balancing_addresses}}), 0); - const Status status = SendRpc(); + const Status status = SendRpc(nullptr, 1000, true); EXPECT_FALSE(status.ok()); EXPECT_EQ(status.error_message(), "Call dropped by load balancing policy"); } @@ -1391,7 +1393,7 @@ TEST_F(SingleBalancerTest, DropAll) { // fail. Status status; do { - status = SendRpc(); + status = SendRpc(nullptr, 1000, true); } while (status.ok()); EXPECT_FALSE(status.ok()); EXPECT_EQ(status.error_message(), "Call dropped by load balancing policy"); |