diff options
Diffstat (limited to 'src/core/ext/filters/client_channel/client_channel.cc')
-rw-r--r-- | src/core/ext/filters/client_channel/client_channel.cc | 461 |
1 files changed, 328 insertions, 133 deletions
diff --git a/src/core/ext/filters/client_channel/client_channel.cc b/src/core/ext/filters/client_channel/client_channel.cc index d2bf4f388d..8e9ee889e1 100644 --- a/src/core/ext/filters/client_channel/client_channel.cc +++ b/src/core/ext/filters/client_channel/client_channel.cc @@ -129,6 +129,10 @@ typedef struct client_channel_channel_data { grpc_core::UniquePtr<char> info_lb_policy_name; /** service config in JSON form */ grpc_core::UniquePtr<char> info_service_config_json; + /* backpointer to grpc_channel's channelz node */ + grpc_core::channelz::ClientChannelNode* channelz_channel; + /* caches if the last resolution event contained addresses */ + bool previous_resolution_contained_addresses; } channel_data; typedef struct { @@ -153,6 +157,23 @@ static void watch_lb_policy_locked(channel_data* chand, grpc_core::LoadBalancingPolicy* lb_policy, grpc_connectivity_state current_state); +static const char* channel_connectivity_state_change_string( + grpc_connectivity_state state) { + switch (state) { + case GRPC_CHANNEL_IDLE: + return "Channel state change to IDLE"; + case GRPC_CHANNEL_CONNECTING: + return "Channel state change to CONNECTING"; + case GRPC_CHANNEL_READY: + return "Channel state change to READY"; + case GRPC_CHANNEL_TRANSIENT_FAILURE: + return "Channel state change to TRANSIENT_FAILURE"; + case GRPC_CHANNEL_SHUTDOWN: + return "Channel state change to SHUTDOWN"; + } + GPR_UNREACHABLE_CODE(return "UNKNOWN"); +} + static void set_channel_connectivity_state_locked(channel_data* chand, grpc_connectivity_state state, grpc_error* error, @@ -177,6 +198,12 @@ static void set_channel_connectivity_state_locked(channel_data* chand, gpr_log(GPR_INFO, "chand=%p: setting connectivity state to %s", chand, grpc_connectivity_state_name(state)); } + if (chand->channelz_channel != nullptr) { + chand->channelz_channel->AddTraceEvent( + grpc_core::channelz::ChannelTrace::Severity::Info, + grpc_slice_from_static_string( + channel_connectivity_state_change_string(state))); + } grpc_connectivity_state_set(&chand->state_tracker, state, error, reason); } @@ -376,6 +403,8 @@ static void request_reresolution_locked(void* arg, grpc_error* error) { chand->lb_policy->SetReresolutionClosureLocked(&args->closure); } +using TraceStringVector = grpc_core::InlinedVector<char*, 3>; + // Creates a new LB policy, replacing any previous one. // If the new policy is created successfully, sets *connectivity_state and // *connectivity_error to its initial connectivity state; otherwise, @@ -383,7 +412,7 @@ static void request_reresolution_locked(void* arg, grpc_error* error) { static void create_new_lb_policy_locked( channel_data* chand, char* lb_policy_name, grpc_connectivity_state* connectivity_state, - grpc_error** connectivity_error) { + grpc_error** connectivity_error, TraceStringVector* trace_strings) { grpc_core::LoadBalancingPolicy::Args lb_policy_args; lb_policy_args.combiner = chand->combiner; lb_policy_args.client_channel_factory = chand->client_channel_factory; @@ -393,11 +422,21 @@ static void create_new_lb_policy_locked( lb_policy_name, lb_policy_args); if (GPR_UNLIKELY(new_lb_policy == nullptr)) { gpr_log(GPR_ERROR, "could not create LB policy \"%s\"", lb_policy_name); + if (chand->channelz_channel != nullptr) { + char* str; + gpr_asprintf(&str, "Could not create LB policy \'%s\'", lb_policy_name); + trace_strings->push_back(str); + } } else { if (grpc_client_channel_trace.enabled()) { gpr_log(GPR_INFO, "chand=%p: created new LB policy \"%s\" (%p)", chand, lb_policy_name, new_lb_policy.get()); } + if (chand->channelz_channel != nullptr) { + char* str; + gpr_asprintf(&str, "Created new LB policy \'%s\'", lb_policy_name); + trace_strings->push_back(str); + } // Swap out the LB policy and update the fds in // chand->interested_parties. if (chand->lb_policy != nullptr) { @@ -457,7 +496,6 @@ get_service_config_from_resolver_result_locked(channel_data* chand) { grpc_uri* uri = grpc_uri_parse(server_uri, true); GPR_ASSERT(uri->path[0] != '\0'); service_config_parsing_state parsing_state; - memset(&parsing_state, 0, sizeof(parsing_state)); parsing_state.server_name = uri->path[0] == '/' ? uri->path + 1 : uri->path; service_config->ParseGlobalParams(parse_retry_throttle_params, @@ -473,6 +511,51 @@ get_service_config_from_resolver_result_locked(channel_data* chand) { return grpc_core::UniquePtr<char>(gpr_strdup(service_config_json)); } +static void maybe_add_trace_message_for_address_changes_locked( + channel_data* chand, TraceStringVector* trace_strings) { + int resolution_contains_addresses = false; + const grpc_arg* channel_arg = + grpc_channel_args_find(chand->resolver_result, GRPC_ARG_LB_ADDRESSES); + if (channel_arg != nullptr && channel_arg->type == GRPC_ARG_POINTER) { + grpc_lb_addresses* addresses = + static_cast<grpc_lb_addresses*>(channel_arg->value.pointer.p); + if (addresses->num_addresses > 0) { + resolution_contains_addresses = true; + } + } + if (!resolution_contains_addresses && + chand->previous_resolution_contained_addresses) { + trace_strings->push_back(gpr_strdup("Address list became empty")); + } else if (resolution_contains_addresses && + !chand->previous_resolution_contained_addresses) { + trace_strings->push_back(gpr_strdup("Address list became non-empty")); + } + chand->previous_resolution_contained_addresses = + resolution_contains_addresses; +} + +static void concatenate_and_add_channel_trace_locked( + channel_data* chand, TraceStringVector* trace_strings) { + if (!trace_strings->empty()) { + gpr_strvec v; + gpr_strvec_init(&v); + gpr_strvec_add(&v, gpr_strdup("Resolution event: ")); + bool is_first = 1; + for (size_t i = 0; i < trace_strings->size(); ++i) { + if (!is_first) gpr_strvec_add(&v, gpr_strdup(", ")); + is_first = false; + gpr_strvec_add(&v, (*trace_strings)[i]); + } + char* flat; + size_t flat_len = 0; + flat = gpr_strvec_flatten(&v, &flat_len); + chand->channelz_channel->AddTraceEvent( + grpc_core::channelz::ChannelTrace::Severity::Info, + grpc_slice_new(flat, flat_len, gpr_free)); + gpr_strvec_destroy(&v); + } +} + // Callback invoked when a resolver result is available. static void on_resolver_result_changed_locked(void* arg, grpc_error* error) { channel_data* chand = static_cast<channel_data*>(arg); @@ -494,6 +577,16 @@ static void on_resolver_result_changed_locked(void* arg, grpc_error* error) { } // Data used to set the channel's connectivity state. bool set_connectivity_state = true; + // We only want to trace the address resolution in the follow cases: + // (a) Address resolution resulted in service config change. + // (b) Address resolution that causes number of backends to go from + // zero to non-zero. + // (c) Address resolution that causes number of backends to go from + // non-zero to zero. + // (d) Address resolution that causes a new LB policy to be created. + // + // we track a list of strings to eventually be concatenated and traced. + TraceStringVector trace_strings; grpc_connectivity_state connectivity_state = GRPC_CHANNEL_TRANSIENT_FAILURE; grpc_error* connectivity_error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("No load balancing policy"); @@ -528,11 +621,29 @@ static void on_resolver_result_changed_locked(void* arg, grpc_error* error) { } else { // Instantiate new LB policy. create_new_lb_policy_locked(chand, lb_policy_name.get(), - &connectivity_state, &connectivity_error); + &connectivity_state, &connectivity_error, + &trace_strings); } // Find service config. grpc_core::UniquePtr<char> service_config_json = get_service_config_from_resolver_result_locked(chand); + // Note: It's safe to use chand->info_service_config_json here without + // taking a lock on chand->info_mu, because this function is the + // only thing that modifies its value, and it can only be invoked + // once at any given time. + if (chand->channelz_channel != nullptr) { + if (((service_config_json == nullptr) != + (chand->info_service_config_json == nullptr)) || + (service_config_json != nullptr && + strcmp(service_config_json.get(), + chand->info_service_config_json.get()) != 0)) { + // TODO(ncteisen): might be worth somehow including a snippet of the + // config in the trace, at the risk of bloating the trace logs. + trace_strings.push_back(gpr_strdup("Service config changed")); + } + maybe_add_trace_message_for_address_changes_locked(chand, &trace_strings); + concatenate_and_add_channel_trace_locked(chand, &trace_strings); + } // Swap out the data used by cc_get_channel_info(). gpr_mu_lock(&chand->info_mu); chand->info_lb_policy_name = std::move(lb_policy_name); @@ -700,6 +811,8 @@ static grpc_error* cc_init_channel_elem(grpc_channel_element* elem, // Record enable_retries. arg = grpc_channel_args_find(args->channel_args, GRPC_ARG_ENABLE_RETRIES); chand->enable_retries = grpc_channel_arg_get_bool(arg, true); + chand->channelz_channel = nullptr; + chand->previous_resolution_contained_addresses = false; // Record client channel factory. arg = grpc_channel_args_find(args->channel_args, GRPC_ARG_CLIENT_CHANNEL_FACTORY); @@ -825,12 +938,26 @@ static void cc_destroy_channel_elem(grpc_channel_element* elem) { // (census filter is on top of this one) // - add census stats for retries +namespace { +struct call_data; + // State used for starting a retryable batch on a subchannel call. // This provides its own grpc_transport_stream_op_batch and other data // structures needed to populate the ops in the batch. // We allocate one struct on the arena for each attempt at starting a // batch on a given subchannel call. -typedef struct { +struct subchannel_batch_data { + subchannel_batch_data(grpc_call_element* elem, call_data* calld, int refcount, + bool set_on_complete); + // All dtor code must be added in `destroy`. This is because we may + // call closures in `subchannel_batch_data` after they are unrefed by + // `batch_data_unref`, and msan would complain about accessing this class + // after calling dtor. As a result we cannot call the `dtor` in + // `batch_data_unref`. + // TODO(soheil): We should try to call the dtor in `batch_data_unref`. + ~subchannel_batch_data() { destroy(); } + void destroy(); + gpr_refcount refs; grpc_call_element* elem; grpc_subchannel_call* subchannel_call; // Holds a ref. @@ -839,11 +966,23 @@ typedef struct { grpc_transport_stream_op_batch batch; // For intercepting on_complete. grpc_closure on_complete; -} subchannel_batch_data; +}; // Retry state associated with a subchannel call. // Stored in the parent_data of the subchannel call object. -typedef struct { +struct subchannel_call_retry_state { + explicit subchannel_call_retry_state(grpc_call_context_element* context) + : batch_payload(context), + started_send_initial_metadata(false), + completed_send_initial_metadata(false), + started_send_trailing_metadata(false), + completed_send_trailing_metadata(false), + started_recv_initial_metadata(false), + completed_recv_initial_metadata(false), + started_recv_trailing_metadata(false), + completed_recv_trailing_metadata(false), + retry_dispatched(false) {} + // subchannel_batch_data.batch.payload points to this. grpc_transport_stream_op_batch_payload batch_payload; // For send_initial_metadata. @@ -862,7 +1001,7 @@ typedef struct { // For intercepting recv_initial_metadata. grpc_metadata_batch recv_initial_metadata; grpc_closure recv_initial_metadata_ready; - bool trailing_metadata_available; + bool trailing_metadata_available = false; // For intercepting recv_message. grpc_closure recv_message_ready; grpc_core::OrphanablePtr<grpc_core::ByteStream> recv_message; @@ -872,10 +1011,10 @@ typedef struct { grpc_closure recv_trailing_metadata_ready; // These fields indicate which ops have been started and completed on // this subchannel call. - size_t started_send_message_count; - size_t completed_send_message_count; - size_t started_recv_message_count; - size_t completed_recv_message_count; + size_t started_send_message_count = 0; + size_t completed_send_message_count = 0; + size_t started_recv_message_count = 0; + size_t completed_recv_message_count = 0; bool started_send_initial_metadata : 1; bool completed_send_initial_metadata : 1; bool started_send_trailing_metadata : 1; @@ -884,14 +1023,18 @@ typedef struct { bool completed_recv_initial_metadata : 1; bool started_recv_trailing_metadata : 1; bool completed_recv_trailing_metadata : 1; + subchannel_batch_data* recv_initial_metadata_ready_deferred_batch = nullptr; + grpc_error* recv_initial_metadata_error = GRPC_ERROR_NONE; + subchannel_batch_data* recv_message_ready_deferred_batch = nullptr; + grpc_error* recv_message_error = GRPC_ERROR_NONE; + subchannel_batch_data* recv_trailing_metadata_internal_batch = nullptr; // State for callback processing. + // NOTE: Do not move this next to the metadata bitfields above. That would + // save space but will also result in a data race because compiler will + // generate a 2 byte store which overwrites the meta-data fields upon + // setting this field. bool retry_dispatched : 1; - subchannel_batch_data* recv_initial_metadata_ready_deferred_batch; - grpc_error* recv_initial_metadata_error; - subchannel_batch_data* recv_message_ready_deferred_batch; - grpc_error* recv_message_error; - subchannel_batch_data* recv_trailing_metadata_internal_batch; -} subchannel_call_retry_state; +}; // Pending batches stored in call data. typedef struct { @@ -906,7 +1049,44 @@ typedef struct { Handles queueing of stream ops until a call object is ready, waiting for initial metadata before trying to create a call object, and handling cancellation gracefully. */ -typedef struct client_channel_call_data { +struct call_data { + call_data(grpc_call_element* elem, const channel_data& chand, + const grpc_call_element_args& args) + : deadline_state(elem, args.call_stack, args.call_combiner, + GPR_LIKELY(chand.deadline_checking_enabled) + ? args.deadline + : GRPC_MILLIS_INF_FUTURE), + path(grpc_slice_ref_internal(args.path)), + call_start_time(args.start_time), + deadline(args.deadline), + arena(args.arena), + owning_call(args.call_stack), + call_combiner(args.call_combiner), + pending_send_initial_metadata(false), + pending_send_message(false), + pending_send_trailing_metadata(false), + enable_retries(chand.enable_retries), + retry_committed(false), + last_attempt_got_server_pushback(false) {} + + ~call_data() { + if (GPR_LIKELY(subchannel_call != nullptr)) { + GRPC_SUBCHANNEL_CALL_UNREF(subchannel_call, + "client_channel_destroy_call"); + } + grpc_slice_unref_internal(path); + GRPC_ERROR_UNREF(cancel_error); + for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches); ++i) { + GPR_ASSERT(pending_batches[i].batch == nullptr); + } + for (size_t i = 0; i < GRPC_CONTEXT_COUNT; ++i) { + if (pick.subchannel_call_context[i].value != nullptr) { + pick.subchannel_call_context[i].destroy( + pick.subchannel_call_context[i].value); + } + } + } + // State for handling deadlines. // The code in deadline_filter.c requires this to be the first field. // TODO(roth): This is slightly sub-optimal in that grpc_deadline_state @@ -925,24 +1105,24 @@ typedef struct client_channel_call_data { grpc_core::RefCountedPtr<ServerRetryThrottleData> retry_throttle_data; grpc_core::RefCountedPtr<ClientChannelMethodParams> method_params; - grpc_subchannel_call* subchannel_call; + grpc_subchannel_call* subchannel_call = nullptr; // Set when we get a cancel_stream op. - grpc_error* cancel_error; + grpc_error* cancel_error = GRPC_ERROR_NONE; grpc_core::LoadBalancingPolicy::PickState pick; grpc_closure pick_closure; grpc_closure pick_cancel_closure; - grpc_polling_entity* pollent; - bool pollent_added_to_interested_parties; + grpc_polling_entity* pollent = nullptr; + bool pollent_added_to_interested_parties = false; // Batches are added to this list when received from above. // They are removed when we are done handling the batch (i.e., when // either we have invoked all of the batch's callbacks or we have // passed the batch down to the subchannel call and are not // intercepting any of its callbacks). - pending_batch pending_batches[MAX_PENDING_BATCHES]; + pending_batch pending_batches[MAX_PENDING_BATCHES] = {}; bool pending_send_initial_metadata : 1; bool pending_send_message : 1; bool pending_send_trailing_metadata : 1; @@ -951,8 +1131,8 @@ typedef struct client_channel_call_data { bool enable_retries : 1; bool retry_committed : 1; bool last_attempt_got_server_pushback : 1; - int num_attempts_completed; - size_t bytes_buffered_for_retry; + int num_attempts_completed = 0; + size_t bytes_buffered_for_retry = 0; grpc_core::ManualConstructor<grpc_core::BackOff> retry_backoff; grpc_timer retry_timer; @@ -963,12 +1143,12 @@ typedef struct client_channel_call_data { // until all of these batches have completed. // Note that we actually only need to track replay batches, but it's // easier to track all batches with send ops. - int num_pending_retriable_subchannel_send_batches; + int num_pending_retriable_subchannel_send_batches = 0; // Cached data for retrying send ops. // send_initial_metadata - bool seen_send_initial_metadata; - grpc_linked_mdelem* send_initial_metadata_storage; + bool seen_send_initial_metadata = false; + grpc_linked_mdelem* send_initial_metadata_storage = nullptr; grpc_metadata_batch send_initial_metadata; uint32_t send_initial_metadata_flags; gpr_atm* peer_string; @@ -979,14 +1159,13 @@ typedef struct client_channel_call_data { // Note: We inline the cache for the first 3 send_message ops and use // dynamic allocation after that. This number was essentially picked // at random; it could be changed in the future to tune performance. - grpc_core::ManualConstructor< - grpc_core::InlinedVector<grpc_core::ByteStreamCache*, 3>> - send_messages; + grpc_core::InlinedVector<grpc_core::ByteStreamCache*, 3> send_messages; // send_trailing_metadata - bool seen_send_trailing_metadata; - grpc_linked_mdelem* send_trailing_metadata_storage; + bool seen_send_trailing_metadata = false; + grpc_linked_mdelem* send_trailing_metadata_storage = nullptr; grpc_metadata_batch send_trailing_metadata; -} call_data; +}; +} // namespace // Forward declarations. static void retry_commit(grpc_call_element* elem, @@ -1030,7 +1209,7 @@ static void maybe_cache_send_ops_for_batch(call_data* calld, gpr_arena_alloc(calld->arena, sizeof(grpc_core::ByteStreamCache))); new (cache) grpc_core::ByteStreamCache( std::move(batch->payload->send_message.send_message)); - calld->send_messages->push_back(cache); + calld->send_messages.push_back(cache); } // Save metadata batch for send_trailing_metadata ops. if (batch->send_trailing_metadata) { @@ -1067,7 +1246,7 @@ static void free_cached_send_message(channel_data* chand, call_data* calld, "chand=%p calld=%p: destroying calld->send_messages[%" PRIuPTR "]", chand, calld, idx); } - (*calld->send_messages)[idx]->Destroy(); + calld->send_messages[idx]->Destroy(); } // Frees cached send_trailing_metadata. @@ -1537,55 +1716,66 @@ static bool maybe_retry(grpc_call_element* elem, // subchannel_batch_data // -// Creates a subchannel_batch_data object on the call's arena with the -// specified refcount. If set_on_complete is true, the batch's -// on_complete callback will be set to point to on_complete(); -// otherwise, the batch's on_complete callback will be null. -static subchannel_batch_data* batch_data_create(grpc_call_element* elem, - int refcount, - bool set_on_complete) { - call_data* calld = static_cast<call_data*>(elem->call_data); +namespace { +subchannel_batch_data::subchannel_batch_data(grpc_call_element* elem, + call_data* calld, int refcount, + bool set_on_complete) + : elem(elem), + subchannel_call(GRPC_SUBCHANNEL_CALL_REF(calld->subchannel_call, + "batch_data_create")) { subchannel_call_retry_state* retry_state = static_cast<subchannel_call_retry_state*>( grpc_connected_subchannel_call_get_parent_data( calld->subchannel_call)); - subchannel_batch_data* batch_data = static_cast<subchannel_batch_data*>( - gpr_arena_alloc(calld->arena, sizeof(*batch_data))); - batch_data->elem = elem; - batch_data->subchannel_call = - GRPC_SUBCHANNEL_CALL_REF(calld->subchannel_call, "batch_data_create"); - batch_data->batch.payload = &retry_state->batch_payload; - gpr_ref_init(&batch_data->refs, refcount); + batch.payload = &retry_state->batch_payload; + gpr_ref_init(&refs, refcount); if (set_on_complete) { - GRPC_CLOSURE_INIT(&batch_data->on_complete, on_complete, batch_data, + GRPC_CLOSURE_INIT(&on_complete, ::on_complete, this, grpc_schedule_on_exec_ctx); - batch_data->batch.on_complete = &batch_data->on_complete; + batch.on_complete = &on_complete; } GRPC_CALL_STACK_REF(calld->owning_call, "batch_data"); +} + +void subchannel_batch_data::destroy() { + subchannel_call_retry_state* retry_state = + static_cast<subchannel_call_retry_state*>( + grpc_connected_subchannel_call_get_parent_data(subchannel_call)); + if (batch.send_initial_metadata) { + grpc_metadata_batch_destroy(&retry_state->send_initial_metadata); + } + if (batch.send_trailing_metadata) { + grpc_metadata_batch_destroy(&retry_state->send_trailing_metadata); + } + if (batch.recv_initial_metadata) { + grpc_metadata_batch_destroy(&retry_state->recv_initial_metadata); + } + if (batch.recv_trailing_metadata) { + grpc_metadata_batch_destroy(&retry_state->recv_trailing_metadata); + } + GRPC_SUBCHANNEL_CALL_UNREF(subchannel_call, "batch_data_unref"); + call_data* calld = static_cast<call_data*>(elem->call_data); + GRPC_CALL_STACK_UNREF(calld->owning_call, "batch_data"); +} +} // namespace + +// Creates a subchannel_batch_data object on the call's arena with the +// specified refcount. If set_on_complete is true, the batch's +// on_complete callback will be set to point to on_complete(); +// otherwise, the batch's on_complete callback will be null. +static subchannel_batch_data* batch_data_create(grpc_call_element* elem, + int refcount, + bool set_on_complete) { + call_data* calld = static_cast<call_data*>(elem->call_data); + subchannel_batch_data* batch_data = + new (gpr_arena_alloc(calld->arena, sizeof(*batch_data))) + subchannel_batch_data(elem, calld, refcount, set_on_complete); return batch_data; } static void batch_data_unref(subchannel_batch_data* batch_data) { if (gpr_unref(&batch_data->refs)) { - subchannel_call_retry_state* retry_state = - static_cast<subchannel_call_retry_state*>( - grpc_connected_subchannel_call_get_parent_data( - batch_data->subchannel_call)); - if (batch_data->batch.send_initial_metadata) { - grpc_metadata_batch_destroy(&retry_state->send_initial_metadata); - } - if (batch_data->batch.send_trailing_metadata) { - grpc_metadata_batch_destroy(&retry_state->send_trailing_metadata); - } - if (batch_data->batch.recv_initial_metadata) { - grpc_metadata_batch_destroy(&retry_state->recv_initial_metadata); - } - if (batch_data->batch.recv_trailing_metadata) { - grpc_metadata_batch_destroy(&retry_state->recv_trailing_metadata); - } - GRPC_SUBCHANNEL_CALL_UNREF(batch_data->subchannel_call, "batch_data_unref"); - call_data* calld = static_cast<call_data*>(batch_data->elem->call_data); - GRPC_CALL_STACK_UNREF(calld->owning_call, "batch_data"); + batch_data->destroy(); } } @@ -1778,23 +1968,22 @@ static void recv_message_ready(void* arg, grpc_error* error) { // recv_trailing_metadata handling // -// Sets *status and *server_pushback_md based on batch_data and error. -static void get_call_status(subchannel_batch_data* batch_data, - grpc_error* error, grpc_status_code* status, +// Sets *status and *server_pushback_md based on md_batch and error. +// Only sets *server_pushback_md if server_pushback_md != nullptr. +static void get_call_status(grpc_call_element* elem, + grpc_metadata_batch* md_batch, grpc_error* error, + grpc_status_code* status, grpc_mdelem** server_pushback_md) { - grpc_call_element* elem = batch_data->elem; call_data* calld = static_cast<call_data*>(elem->call_data); if (error != GRPC_ERROR_NONE) { grpc_error_get_status(error, calld->deadline, status, nullptr, nullptr, nullptr); } else { - grpc_metadata_batch* md_batch = - batch_data->batch.payload->recv_trailing_metadata - .recv_trailing_metadata; GPR_ASSERT(md_batch->idx.named.grpc_status != nullptr); *status = grpc_get_status_code_from_metadata(md_batch->idx.named.grpc_status->md); - if (md_batch->idx.named.grpc_retry_pushback_ms != nullptr) { + if (server_pushback_md != nullptr && + md_batch->idx.named.grpc_retry_pushback_ms != nullptr) { *server_pushback_md = &md_batch->idx.named.grpc_retry_pushback_ms->md; } } @@ -1884,7 +2073,7 @@ static bool pending_batch_is_unstarted( return true; } if (pending->batch->send_message && - retry_state->started_send_message_count < calld->send_messages->size()) { + retry_state->started_send_message_count < calld->send_messages.size()) { return true; } if (pending->batch->send_trailing_metadata && @@ -1967,7 +2156,9 @@ static void recv_trailing_metadata_ready(void* arg, grpc_error* error) { // Get the call's status and check for server pushback metadata. grpc_status_code status = GRPC_STATUS_OK; grpc_mdelem* server_pushback_md = nullptr; - get_call_status(batch_data, GRPC_ERROR_REF(error), &status, + grpc_metadata_batch* md_batch = + batch_data->batch.payload->recv_trailing_metadata.recv_trailing_metadata; + get_call_status(elem, md_batch, GRPC_ERROR_REF(error), &status, &server_pushback_md); if (grpc_client_channel_trace.enabled()) { gpr_log(GPR_INFO, "chand=%p calld=%p: call finished, status=%s", chand, @@ -2038,7 +2229,7 @@ static void add_closures_for_replay_or_pending_send_ops( channel_data* chand = static_cast<channel_data*>(elem->channel_data); call_data* calld = static_cast<call_data*>(elem->call_data); bool have_pending_send_message_ops = - retry_state->started_send_message_count < calld->send_messages->size(); + retry_state->started_send_message_count < calld->send_messages.size(); bool have_pending_send_trailing_metadata_op = calld->seen_send_trailing_metadata && !retry_state->started_send_trailing_metadata; @@ -2194,9 +2385,9 @@ static void add_retriable_send_initial_metadata_op( .grpc_previous_rpc_attempts); } if (GPR_UNLIKELY(calld->num_attempts_completed > 0)) { - grpc_mdelem retry_md = grpc_mdelem_from_slices( + grpc_mdelem retry_md = grpc_mdelem_create( GRPC_MDSTR_GRPC_PREVIOUS_RPC_ATTEMPTS, - *retry_count_strings[calld->num_attempts_completed - 1]); + *retry_count_strings[calld->num_attempts_completed - 1], nullptr); grpc_error* error = grpc_metadata_batch_add_tail( &retry_state->send_initial_metadata, &retry_state->send_initial_metadata_storage[calld->send_initial_metadata @@ -2230,7 +2421,7 @@ static void add_retriable_send_message_op( chand, calld, retry_state->started_send_message_count); } grpc_core::ByteStreamCache* cache = - (*calld->send_messages)[retry_state->started_send_message_count]; + calld->send_messages[retry_state->started_send_message_count]; ++retry_state->started_send_message_count; retry_state->send_message.Init(cache); batch_data->batch.send_message = true; @@ -2362,7 +2553,7 @@ static subchannel_batch_data* maybe_create_subchannel_batch_for_replay( } // send_message. // Note that we can only have one send_message op in flight at a time. - if (retry_state->started_send_message_count < calld->send_messages->size() && + if (retry_state->started_send_message_count < calld->send_messages.size() && retry_state->started_send_message_count == retry_state->completed_send_message_count && !calld->pending_send_message) { @@ -2383,7 +2574,7 @@ static subchannel_batch_data* maybe_create_subchannel_batch_for_replay( // to start, since we can't send down any more send_message ops after // send_trailing_metadata. if (calld->seen_send_trailing_metadata && - retry_state->started_send_message_count == calld->send_messages->size() && + retry_state->started_send_message_count == calld->send_messages.size() && !retry_state->started_send_trailing_metadata && !calld->pending_send_trailing_metadata) { if (grpc_client_channel_trace.enabled()) { @@ -2435,7 +2626,7 @@ static void add_subchannel_batches_for_pending_batches( // send_message ops after send_trailing_metadata. if (batch->send_trailing_metadata && (retry_state->started_send_message_count + batch->send_message < - calld->send_messages->size() || + calld->send_messages.size() || retry_state->started_send_trailing_metadata)) { continue; } @@ -2602,11 +2793,9 @@ static void create_subchannel_call(grpc_call_element* elem, grpc_error* error) { pending_batches_fail(elem, new_error, true /* yield_call_combiner */); } else { if (parent_data_size > 0) { - subchannel_call_retry_state* retry_state = - static_cast<subchannel_call_retry_state*>( - grpc_connected_subchannel_call_get_parent_data( - calld->subchannel_call)); - retry_state->batch_payload.context = calld->pick.subchannel_call_context; + new (grpc_connected_subchannel_call_get_parent_data( + calld->subchannel_call)) + subchannel_call_retry_state(calld->pick.subchannel_call_context); } pending_batches_resume(elem); } @@ -2832,6 +3021,27 @@ static void apply_service_config_to_call_locked(grpc_call_element* elem) { } } +// If the channel is in TRANSIENT_FAILURE and the call is not +// wait_for_ready=true, fails the call and returns true. +static bool fail_call_if_in_transient_failure(grpc_call_element* elem) { + channel_data* chand = static_cast<channel_data*>(elem->channel_data); + call_data* calld = static_cast<call_data*>(elem->call_data); + grpc_transport_stream_op_batch* batch = calld->pending_batches[0].batch; + if (grpc_connectivity_state_check(&chand->state_tracker) == + GRPC_CHANNEL_TRANSIENT_FAILURE && + (batch->payload->send_initial_metadata.send_initial_metadata_flags & + GRPC_INITIAL_METADATA_WAIT_FOR_READY) == 0) { + pending_batches_fail( + elem, + grpc_error_set_int(GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "channel is in state TRANSIENT_FAILURE"), + GRPC_ERROR_INT_GRPC_STATUS, GRPC_STATUS_UNAVAILABLE), + true /* yield_call_combiner */); + return true; + } + return false; +} + // Invoked once resolver results are available. static void process_service_config_and_start_lb_pick_locked( grpc_call_element* elem) { @@ -2839,6 +3049,9 @@ static void process_service_config_and_start_lb_pick_locked( // Only get service config data on the first attempt. if (GPR_LIKELY(calld->num_attempts_completed == 0)) { apply_service_config_to_call_locked(elem); + // Check this after applying service config, since it may have + // affected the call's wait_for_ready value. + if (fail_call_if_in_transient_failure(elem)) return; } // Start LB pick. grpc_core::LbPicker::StartLocked(elem); @@ -3008,6 +3221,16 @@ static void start_pick_locked(void* arg, grpc_error* ignored) { // We do not yet have an LB policy, so wait for a resolver result. if (GPR_UNLIKELY(!chand->started_resolving)) { start_resolving_locked(chand); + } else { + // Normally, we want to do this check in + // process_service_config_and_start_lb_pick_locked(), so that we + // can honor the wait_for_ready setting in the service config. + // However, if the channel is in TRANSIENT_FAILURE at this point, that + // means that the resolver has returned a failure, so we're not going + // to get a service config right away. In that case, we fail the + // call now based on the wait_for_ready value passed in from the + // application. + if (fail_call_if_in_transient_failure(elem)) return; } // Create a new waiter, which will delete itself when done. grpc_core::New<grpc_core::ResolverResultWaiter>(elem); @@ -3112,21 +3335,8 @@ static void cc_start_transport_stream_op_batch( /* Constructor for call_data */ static grpc_error* cc_init_call_elem(grpc_call_element* elem, const grpc_call_element_args* args) { - call_data* calld = static_cast<call_data*>(elem->call_data); channel_data* chand = static_cast<channel_data*>(elem->channel_data); - // Initialize data members. - calld->path = grpc_slice_ref_internal(args->path); - calld->call_start_time = args->start_time; - calld->deadline = args->deadline; - calld->arena = args->arena; - calld->owning_call = args->call_stack; - calld->call_combiner = args->call_combiner; - if (GPR_LIKELY(chand->deadline_checking_enabled)) { - grpc_deadline_state_init(elem, args->call_stack, args->call_combiner, - calld->deadline); - } - calld->enable_retries = chand->enable_retries; - calld->send_messages.Init(); + new (elem->call_data) call_data(elem, *chand, *args); return GRPC_ERROR_NONE; } @@ -3135,34 +3345,12 @@ static void cc_destroy_call_elem(grpc_call_element* elem, const grpc_call_final_info* final_info, grpc_closure* then_schedule_closure) { call_data* calld = static_cast<call_data*>(elem->call_data); - channel_data* chand = static_cast<channel_data*>(elem->channel_data); - if (GPR_LIKELY(chand->deadline_checking_enabled)) { - grpc_deadline_state_destroy(elem); - } - grpc_slice_unref_internal(calld->path); - calld->retry_throttle_data.reset(); - calld->method_params.reset(); - GRPC_ERROR_UNREF(calld->cancel_error); if (GPR_LIKELY(calld->subchannel_call != nullptr)) { grpc_subchannel_call_set_cleanup_closure(calld->subchannel_call, then_schedule_closure); then_schedule_closure = nullptr; - GRPC_SUBCHANNEL_CALL_UNREF(calld->subchannel_call, - "client_channel_destroy_call"); - } - for (size_t i = 0; i < GPR_ARRAY_SIZE(calld->pending_batches); ++i) { - GPR_ASSERT(calld->pending_batches[i].batch == nullptr); - } - if (GPR_LIKELY(calld->pick.connected_subchannel != nullptr)) { - calld->pick.connected_subchannel.reset(); - } - for (size_t i = 0; i < GRPC_CONTEXT_COUNT; ++i) { - if (calld->pick.subchannel_call_context[i].value != nullptr) { - calld->pick.subchannel_call_context[i].destroy( - calld->pick.subchannel_call_context[i].value); - } } - calld->send_messages.Destroy(); + calld->~call_data(); GRPC_CLOSURE_SCHED(then_schedule_closure, GRPC_ERROR_NONE); } @@ -3203,9 +3391,16 @@ static void try_to_connect_locked(void* arg, grpc_error* error_ignored) { GRPC_CHANNEL_STACK_UNREF(chand->owning_stack, "try_to_connect"); } +void grpc_client_channel_set_channelz_node( + grpc_channel_element* elem, grpc_core::channelz::ClientChannelNode* node) { + channel_data* chand = static_cast<channel_data*>(elem->channel_data); + chand->channelz_channel = node; +} + void grpc_client_channel_populate_child_refs( - grpc_channel_element* elem, grpc_core::ChildRefsList* child_subchannels, - grpc_core::ChildRefsList* child_channels) { + grpc_channel_element* elem, + grpc_core::channelz::ChildRefsList* child_subchannels, + grpc_core::channelz::ChildRefsList* child_channels) { channel_data* chand = static_cast<channel_data*>(elem->channel_data); if (chand->lb_policy != nullptr) { chand->lb_policy->FillChildRefsForChannelz(child_subchannels, |