diff options
author | Yash Tibrewal <yashkt@google.com> | 2018-11-16 10:58:12 -0800 |
---|---|---|
committer | Yash Tibrewal <yashkt@google.com> | 2018-11-16 11:11:04 -0800 |
commit | fc332d2c9247832af90792a59ff6d391e84bc8ae (patch) | |
tree | 4bd1db687960ca851f87d237a36f55190ac52f27 /src/core/ext | |
parent | 0eb9a3e783237cd46c8ba6d3b33228f537cafbfc (diff) | |
parent | 9cfacc48ee2e9f8db083d578c84881551734b1f0 (diff) |
Merge master
Diffstat (limited to 'src/core/ext')
77 files changed, 6262 insertions, 2001 deletions
diff --git a/src/core/ext/filters/client_channel/OWNERS b/src/core/ext/filters/client_channel/OWNERS index c8760d947b..d38970e0fa 100644 --- a/src/core/ext/filters/client_channel/OWNERS +++ b/src/core/ext/filters/client_channel/OWNERS @@ -1,4 +1,4 @@ set noparent @markdroth -@dgquintas +@apolcyn @AspirinSJL diff --git a/src/core/ext/filters/client_channel/README.md b/src/core/ext/filters/client_channel/README.md index 7c209db12e..9676a4535b 100644 --- a/src/core/ext/filters/client_channel/README.md +++ b/src/core/ext/filters/client_channel/README.md @@ -46,20 +46,4 @@ construction arguments for concrete grpc_subchannel instances. Naming for GRPC =============== -Names in GRPC are represented by a URI (as defined in -[RFC 3986](https://tools.ietf.org/html/rfc3986)). - -The following schemes are currently supported: - -dns:///host:port - dns schemes are currently supported so long as authority is - empty (authority based dns resolution is expected in a future - release) - -unix:path - the unix scheme is used to create and connect to unix domain - sockets - the authority must be empty, and the path - represents the absolute or relative path to the desired - socket - -ipv4:host:port - a pre-resolved ipv4 dotted decimal address/port combination - -ipv6:[host]:port - a pre-resolved ipv6 address/port combination +See [/doc/naming.md](gRPC name resolution). diff --git a/src/core/ext/filters/client_channel/client_channel.cc b/src/core/ext/filters/client_channel/client_channel.cc index d2bf4f388d..8e9ee889e1 100644 --- a/src/core/ext/filters/client_channel/client_channel.cc +++ b/src/core/ext/filters/client_channel/client_channel.cc @@ -129,6 +129,10 @@ typedef struct client_channel_channel_data { grpc_core::UniquePtr<char> info_lb_policy_name; /** service config in JSON form */ grpc_core::UniquePtr<char> info_service_config_json; + /* backpointer to grpc_channel's channelz node */ + grpc_core::channelz::ClientChannelNode* channelz_channel; + /* caches if the last resolution event contained addresses */ + bool previous_resolution_contained_addresses; } channel_data; typedef struct { @@ -153,6 +157,23 @@ static void watch_lb_policy_locked(channel_data* chand, grpc_core::LoadBalancingPolicy* lb_policy, grpc_connectivity_state current_state); +static const char* channel_connectivity_state_change_string( + grpc_connectivity_state state) { + switch (state) { + case GRPC_CHANNEL_IDLE: + return "Channel state change to IDLE"; + case GRPC_CHANNEL_CONNECTING: + return "Channel state change to CONNECTING"; + case GRPC_CHANNEL_READY: + return "Channel state change to READY"; + case GRPC_CHANNEL_TRANSIENT_FAILURE: + return "Channel state change to TRANSIENT_FAILURE"; + case GRPC_CHANNEL_SHUTDOWN: + return "Channel state change to SHUTDOWN"; + } + GPR_UNREACHABLE_CODE(return "UNKNOWN"); +} + static void set_channel_connectivity_state_locked(channel_data* chand, grpc_connectivity_state state, grpc_error* error, @@ -177,6 +198,12 @@ static void set_channel_connectivity_state_locked(channel_data* chand, gpr_log(GPR_INFO, "chand=%p: setting connectivity state to %s", chand, grpc_connectivity_state_name(state)); } + if (chand->channelz_channel != nullptr) { + chand->channelz_channel->AddTraceEvent( + grpc_core::channelz::ChannelTrace::Severity::Info, + grpc_slice_from_static_string( + channel_connectivity_state_change_string(state))); + } grpc_connectivity_state_set(&chand->state_tracker, state, error, reason); } @@ -376,6 +403,8 @@ static void request_reresolution_locked(void* arg, grpc_error* error) { chand->lb_policy->SetReresolutionClosureLocked(&args->closure); } +using TraceStringVector = grpc_core::InlinedVector<char*, 3>; + // Creates a new LB policy, replacing any previous one. // If the new policy is created successfully, sets *connectivity_state and // *connectivity_error to its initial connectivity state; otherwise, @@ -383,7 +412,7 @@ static void request_reresolution_locked(void* arg, grpc_error* error) { static void create_new_lb_policy_locked( channel_data* chand, char* lb_policy_name, grpc_connectivity_state* connectivity_state, - grpc_error** connectivity_error) { + grpc_error** connectivity_error, TraceStringVector* trace_strings) { grpc_core::LoadBalancingPolicy::Args lb_policy_args; lb_policy_args.combiner = chand->combiner; lb_policy_args.client_channel_factory = chand->client_channel_factory; @@ -393,11 +422,21 @@ static void create_new_lb_policy_locked( lb_policy_name, lb_policy_args); if (GPR_UNLIKELY(new_lb_policy == nullptr)) { gpr_log(GPR_ERROR, "could not create LB policy \"%s\"", lb_policy_name); + if (chand->channelz_channel != nullptr) { + char* str; + gpr_asprintf(&str, "Could not create LB policy \'%s\'", lb_policy_name); + trace_strings->push_back(str); + } } else { if (grpc_client_channel_trace.enabled()) { gpr_log(GPR_INFO, "chand=%p: created new LB policy \"%s\" (%p)", chand, lb_policy_name, new_lb_policy.get()); } + if (chand->channelz_channel != nullptr) { + char* str; + gpr_asprintf(&str, "Created new LB policy \'%s\'", lb_policy_name); + trace_strings->push_back(str); + } // Swap out the LB policy and update the fds in // chand->interested_parties. if (chand->lb_policy != nullptr) { @@ -457,7 +496,6 @@ get_service_config_from_resolver_result_locked(channel_data* chand) { grpc_uri* uri = grpc_uri_parse(server_uri, true); GPR_ASSERT(uri->path[0] != '\0'); service_config_parsing_state parsing_state; - memset(&parsing_state, 0, sizeof(parsing_state)); parsing_state.server_name = uri->path[0] == '/' ? uri->path + 1 : uri->path; service_config->ParseGlobalParams(parse_retry_throttle_params, @@ -473,6 +511,51 @@ get_service_config_from_resolver_result_locked(channel_data* chand) { return grpc_core::UniquePtr<char>(gpr_strdup(service_config_json)); } +static void maybe_add_trace_message_for_address_changes_locked( + channel_data* chand, TraceStringVector* trace_strings) { + int resolution_contains_addresses = false; + const grpc_arg* channel_arg = + grpc_channel_args_find(chand->resolver_result, GRPC_ARG_LB_ADDRESSES); + if (channel_arg != nullptr && channel_arg->type == GRPC_ARG_POINTER) { + grpc_lb_addresses* addresses = + static_cast<grpc_lb_addresses*>(channel_arg->value.pointer.p); + if (addresses->num_addresses > 0) { + resolution_contains_addresses = true; + } + } + if (!resolution_contains_addresses && + chand->previous_resolution_contained_addresses) { + trace_strings->push_back(gpr_strdup("Address list became empty")); + } else if (resolution_contains_addresses && + !chand->previous_resolution_contained_addresses) { + trace_strings->push_back(gpr_strdup("Address list became non-empty")); + } + chand->previous_resolution_contained_addresses = + resolution_contains_addresses; +} + +static void concatenate_and_add_channel_trace_locked( + channel_data* chand, TraceStringVector* trace_strings) { + if (!trace_strings->empty()) { + gpr_strvec v; + gpr_strvec_init(&v); + gpr_strvec_add(&v, gpr_strdup("Resolution event: ")); + bool is_first = 1; + for (size_t i = 0; i < trace_strings->size(); ++i) { + if (!is_first) gpr_strvec_add(&v, gpr_strdup(", ")); + is_first = false; + gpr_strvec_add(&v, (*trace_strings)[i]); + } + char* flat; + size_t flat_len = 0; + flat = gpr_strvec_flatten(&v, &flat_len); + chand->channelz_channel->AddTraceEvent( + grpc_core::channelz::ChannelTrace::Severity::Info, + grpc_slice_new(flat, flat_len, gpr_free)); + gpr_strvec_destroy(&v); + } +} + // Callback invoked when a resolver result is available. static void on_resolver_result_changed_locked(void* arg, grpc_error* error) { channel_data* chand = static_cast<channel_data*>(arg); @@ -494,6 +577,16 @@ static void on_resolver_result_changed_locked(void* arg, grpc_error* error) { } // Data used to set the channel's connectivity state. bool set_connectivity_state = true; + // We only want to trace the address resolution in the follow cases: + // (a) Address resolution resulted in service config change. + // (b) Address resolution that causes number of backends to go from + // zero to non-zero. + // (c) Address resolution that causes number of backends to go from + // non-zero to zero. + // (d) Address resolution that causes a new LB policy to be created. + // + // we track a list of strings to eventually be concatenated and traced. + TraceStringVector trace_strings; grpc_connectivity_state connectivity_state = GRPC_CHANNEL_TRANSIENT_FAILURE; grpc_error* connectivity_error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("No load balancing policy"); @@ -528,11 +621,29 @@ static void on_resolver_result_changed_locked(void* arg, grpc_error* error) { } else { // Instantiate new LB policy. create_new_lb_policy_locked(chand, lb_policy_name.get(), - &connectivity_state, &connectivity_error); + &connectivity_state, &connectivity_error, + &trace_strings); } // Find service config. grpc_core::UniquePtr<char> service_config_json = get_service_config_from_resolver_result_locked(chand); + // Note: It's safe to use chand->info_service_config_json here without + // taking a lock on chand->info_mu, because this function is the + // only thing that modifies its value, and it can only be invoked + // once at any given time. + if (chand->channelz_channel != nullptr) { + if (((service_config_json == nullptr) != + (chand->info_service_config_json == nullptr)) || + (service_config_json != nullptr && + strcmp(service_config_json.get(), + chand->info_service_config_json.get()) != 0)) { + // TODO(ncteisen): might be worth somehow including a snippet of the + // config in the trace, at the risk of bloating the trace logs. + trace_strings.push_back(gpr_strdup("Service config changed")); + } + maybe_add_trace_message_for_address_changes_locked(chand, &trace_strings); + concatenate_and_add_channel_trace_locked(chand, &trace_strings); + } // Swap out the data used by cc_get_channel_info(). gpr_mu_lock(&chand->info_mu); chand->info_lb_policy_name = std::move(lb_policy_name); @@ -700,6 +811,8 @@ static grpc_error* cc_init_channel_elem(grpc_channel_element* elem, // Record enable_retries. arg = grpc_channel_args_find(args->channel_args, GRPC_ARG_ENABLE_RETRIES); chand->enable_retries = grpc_channel_arg_get_bool(arg, true); + chand->channelz_channel = nullptr; + chand->previous_resolution_contained_addresses = false; // Record client channel factory. arg = grpc_channel_args_find(args->channel_args, GRPC_ARG_CLIENT_CHANNEL_FACTORY); @@ -825,12 +938,26 @@ static void cc_destroy_channel_elem(grpc_channel_element* elem) { // (census filter is on top of this one) // - add census stats for retries +namespace { +struct call_data; + // State used for starting a retryable batch on a subchannel call. // This provides its own grpc_transport_stream_op_batch and other data // structures needed to populate the ops in the batch. // We allocate one struct on the arena for each attempt at starting a // batch on a given subchannel call. -typedef struct { +struct subchannel_batch_data { + subchannel_batch_data(grpc_call_element* elem, call_data* calld, int refcount, + bool set_on_complete); + // All dtor code must be added in `destroy`. This is because we may + // call closures in `subchannel_batch_data` after they are unrefed by + // `batch_data_unref`, and msan would complain about accessing this class + // after calling dtor. As a result we cannot call the `dtor` in + // `batch_data_unref`. + // TODO(soheil): We should try to call the dtor in `batch_data_unref`. + ~subchannel_batch_data() { destroy(); } + void destroy(); + gpr_refcount refs; grpc_call_element* elem; grpc_subchannel_call* subchannel_call; // Holds a ref. @@ -839,11 +966,23 @@ typedef struct { grpc_transport_stream_op_batch batch; // For intercepting on_complete. grpc_closure on_complete; -} subchannel_batch_data; +}; // Retry state associated with a subchannel call. // Stored in the parent_data of the subchannel call object. -typedef struct { +struct subchannel_call_retry_state { + explicit subchannel_call_retry_state(grpc_call_context_element* context) + : batch_payload(context), + started_send_initial_metadata(false), + completed_send_initial_metadata(false), + started_send_trailing_metadata(false), + completed_send_trailing_metadata(false), + started_recv_initial_metadata(false), + completed_recv_initial_metadata(false), + started_recv_trailing_metadata(false), + completed_recv_trailing_metadata(false), + retry_dispatched(false) {} + // subchannel_batch_data.batch.payload points to this. grpc_transport_stream_op_batch_payload batch_payload; // For send_initial_metadata. @@ -862,7 +1001,7 @@ typedef struct { // For intercepting recv_initial_metadata. grpc_metadata_batch recv_initial_metadata; grpc_closure recv_initial_metadata_ready; - bool trailing_metadata_available; + bool trailing_metadata_available = false; // For intercepting recv_message. grpc_closure recv_message_ready; grpc_core::OrphanablePtr<grpc_core::ByteStream> recv_message; @@ -872,10 +1011,10 @@ typedef struct { grpc_closure recv_trailing_metadata_ready; // These fields indicate which ops have been started and completed on // this subchannel call. - size_t started_send_message_count; - size_t completed_send_message_count; - size_t started_recv_message_count; - size_t completed_recv_message_count; + size_t started_send_message_count = 0; + size_t completed_send_message_count = 0; + size_t started_recv_message_count = 0; + size_t completed_recv_message_count = 0; bool started_send_initial_metadata : 1; bool completed_send_initial_metadata : 1; bool started_send_trailing_metadata : 1; @@ -884,14 +1023,18 @@ typedef struct { bool completed_recv_initial_metadata : 1; bool started_recv_trailing_metadata : 1; bool completed_recv_trailing_metadata : 1; + subchannel_batch_data* recv_initial_metadata_ready_deferred_batch = nullptr; + grpc_error* recv_initial_metadata_error = GRPC_ERROR_NONE; + subchannel_batch_data* recv_message_ready_deferred_batch = nullptr; + grpc_error* recv_message_error = GRPC_ERROR_NONE; + subchannel_batch_data* recv_trailing_metadata_internal_batch = nullptr; // State for callback processing. + // NOTE: Do not move this next to the metadata bitfields above. That would + // save space but will also result in a data race because compiler will + // generate a 2 byte store which overwrites the meta-data fields upon + // setting this field. bool retry_dispatched : 1; - subchannel_batch_data* recv_initial_metadata_ready_deferred_batch; - grpc_error* recv_initial_metadata_error; - subchannel_batch_data* recv_message_ready_deferred_batch; - grpc_error* recv_message_error; - subchannel_batch_data* recv_trailing_metadata_internal_batch; -} subchannel_call_retry_state; +}; // Pending batches stored in call data. typedef struct { @@ -906,7 +1049,44 @@ typedef struct { Handles queueing of stream ops until a call object is ready, waiting for initial metadata before trying to create a call object, and handling cancellation gracefully. */ -typedef struct client_channel_call_data { +struct call_data { + call_data(grpc_call_element* elem, const channel_data& chand, + const grpc_call_element_args& args) + : deadline_state(elem, args.call_stack, args.call_combiner, + GPR_LIKELY(chand.deadline_checking_enabled) + ? args.deadline + : GRPC_MILLIS_INF_FUTURE), + path(grpc_slice_ref_internal(args.path)), + call_start_time(args.start_time), + deadline(args.deadline), + arena(args.arena), + owning_call(args.call_stack), + call_combiner(args.call_combiner), + pending_send_initial_metadata(false), + pending_send_message(false), + pending_send_trailing_metadata(false), + enable_retries(chand.enable_retries), + retry_committed(false), + last_attempt_got_server_pushback(false) {} + + ~call_data() { + if (GPR_LIKELY(subchannel_call != nullptr)) { + GRPC_SUBCHANNEL_CALL_UNREF(subchannel_call, + "client_channel_destroy_call"); + } + grpc_slice_unref_internal(path); + GRPC_ERROR_UNREF(cancel_error); + for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches); ++i) { + GPR_ASSERT(pending_batches[i].batch == nullptr); + } + for (size_t i = 0; i < GRPC_CONTEXT_COUNT; ++i) { + if (pick.subchannel_call_context[i].value != nullptr) { + pick.subchannel_call_context[i].destroy( + pick.subchannel_call_context[i].value); + } + } + } + // State for handling deadlines. // The code in deadline_filter.c requires this to be the first field. // TODO(roth): This is slightly sub-optimal in that grpc_deadline_state @@ -925,24 +1105,24 @@ typedef struct client_channel_call_data { grpc_core::RefCountedPtr<ServerRetryThrottleData> retry_throttle_data; grpc_core::RefCountedPtr<ClientChannelMethodParams> method_params; - grpc_subchannel_call* subchannel_call; + grpc_subchannel_call* subchannel_call = nullptr; // Set when we get a cancel_stream op. - grpc_error* cancel_error; + grpc_error* cancel_error = GRPC_ERROR_NONE; grpc_core::LoadBalancingPolicy::PickState pick; grpc_closure pick_closure; grpc_closure pick_cancel_closure; - grpc_polling_entity* pollent; - bool pollent_added_to_interested_parties; + grpc_polling_entity* pollent = nullptr; + bool pollent_added_to_interested_parties = false; // Batches are added to this list when received from above. // They are removed when we are done handling the batch (i.e., when // either we have invoked all of the batch's callbacks or we have // passed the batch down to the subchannel call and are not // intercepting any of its callbacks). - pending_batch pending_batches[MAX_PENDING_BATCHES]; + pending_batch pending_batches[MAX_PENDING_BATCHES] = {}; bool pending_send_initial_metadata : 1; bool pending_send_message : 1; bool pending_send_trailing_metadata : 1; @@ -951,8 +1131,8 @@ typedef struct client_channel_call_data { bool enable_retries : 1; bool retry_committed : 1; bool last_attempt_got_server_pushback : 1; - int num_attempts_completed; - size_t bytes_buffered_for_retry; + int num_attempts_completed = 0; + size_t bytes_buffered_for_retry = 0; grpc_core::ManualConstructor<grpc_core::BackOff> retry_backoff; grpc_timer retry_timer; @@ -963,12 +1143,12 @@ typedef struct client_channel_call_data { // until all of these batches have completed. // Note that we actually only need to track replay batches, but it's // easier to track all batches with send ops. - int num_pending_retriable_subchannel_send_batches; + int num_pending_retriable_subchannel_send_batches = 0; // Cached data for retrying send ops. // send_initial_metadata - bool seen_send_initial_metadata; - grpc_linked_mdelem* send_initial_metadata_storage; + bool seen_send_initial_metadata = false; + grpc_linked_mdelem* send_initial_metadata_storage = nullptr; grpc_metadata_batch send_initial_metadata; uint32_t send_initial_metadata_flags; gpr_atm* peer_string; @@ -979,14 +1159,13 @@ typedef struct client_channel_call_data { // Note: We inline the cache for the first 3 send_message ops and use // dynamic allocation after that. This number was essentially picked // at random; it could be changed in the future to tune performance. - grpc_core::ManualConstructor< - grpc_core::InlinedVector<grpc_core::ByteStreamCache*, 3>> - send_messages; + grpc_core::InlinedVector<grpc_core::ByteStreamCache*, 3> send_messages; // send_trailing_metadata - bool seen_send_trailing_metadata; - grpc_linked_mdelem* send_trailing_metadata_storage; + bool seen_send_trailing_metadata = false; + grpc_linked_mdelem* send_trailing_metadata_storage = nullptr; grpc_metadata_batch send_trailing_metadata; -} call_data; +}; +} // namespace // Forward declarations. static void retry_commit(grpc_call_element* elem, @@ -1030,7 +1209,7 @@ static void maybe_cache_send_ops_for_batch(call_data* calld, gpr_arena_alloc(calld->arena, sizeof(grpc_core::ByteStreamCache))); new (cache) grpc_core::ByteStreamCache( std::move(batch->payload->send_message.send_message)); - calld->send_messages->push_back(cache); + calld->send_messages.push_back(cache); } // Save metadata batch for send_trailing_metadata ops. if (batch->send_trailing_metadata) { @@ -1067,7 +1246,7 @@ static void free_cached_send_message(channel_data* chand, call_data* calld, "chand=%p calld=%p: destroying calld->send_messages[%" PRIuPTR "]", chand, calld, idx); } - (*calld->send_messages)[idx]->Destroy(); + calld->send_messages[idx]->Destroy(); } // Frees cached send_trailing_metadata. @@ -1537,55 +1716,66 @@ static bool maybe_retry(grpc_call_element* elem, // subchannel_batch_data // -// Creates a subchannel_batch_data object on the call's arena with the -// specified refcount. If set_on_complete is true, the batch's -// on_complete callback will be set to point to on_complete(); -// otherwise, the batch's on_complete callback will be null. -static subchannel_batch_data* batch_data_create(grpc_call_element* elem, - int refcount, - bool set_on_complete) { - call_data* calld = static_cast<call_data*>(elem->call_data); +namespace { +subchannel_batch_data::subchannel_batch_data(grpc_call_element* elem, + call_data* calld, int refcount, + bool set_on_complete) + : elem(elem), + subchannel_call(GRPC_SUBCHANNEL_CALL_REF(calld->subchannel_call, + "batch_data_create")) { subchannel_call_retry_state* retry_state = static_cast<subchannel_call_retry_state*>( grpc_connected_subchannel_call_get_parent_data( calld->subchannel_call)); - subchannel_batch_data* batch_data = static_cast<subchannel_batch_data*>( - gpr_arena_alloc(calld->arena, sizeof(*batch_data))); - batch_data->elem = elem; - batch_data->subchannel_call = - GRPC_SUBCHANNEL_CALL_REF(calld->subchannel_call, "batch_data_create"); - batch_data->batch.payload = &retry_state->batch_payload; - gpr_ref_init(&batch_data->refs, refcount); + batch.payload = &retry_state->batch_payload; + gpr_ref_init(&refs, refcount); if (set_on_complete) { - GRPC_CLOSURE_INIT(&batch_data->on_complete, on_complete, batch_data, + GRPC_CLOSURE_INIT(&on_complete, ::on_complete, this, grpc_schedule_on_exec_ctx); - batch_data->batch.on_complete = &batch_data->on_complete; + batch.on_complete = &on_complete; } GRPC_CALL_STACK_REF(calld->owning_call, "batch_data"); +} + +void subchannel_batch_data::destroy() { + subchannel_call_retry_state* retry_state = + static_cast<subchannel_call_retry_state*>( + grpc_connected_subchannel_call_get_parent_data(subchannel_call)); + if (batch.send_initial_metadata) { + grpc_metadata_batch_destroy(&retry_state->send_initial_metadata); + } + if (batch.send_trailing_metadata) { + grpc_metadata_batch_destroy(&retry_state->send_trailing_metadata); + } + if (batch.recv_initial_metadata) { + grpc_metadata_batch_destroy(&retry_state->recv_initial_metadata); + } + if (batch.recv_trailing_metadata) { + grpc_metadata_batch_destroy(&retry_state->recv_trailing_metadata); + } + GRPC_SUBCHANNEL_CALL_UNREF(subchannel_call, "batch_data_unref"); + call_data* calld = static_cast<call_data*>(elem->call_data); + GRPC_CALL_STACK_UNREF(calld->owning_call, "batch_data"); +} +} // namespace + +// Creates a subchannel_batch_data object on the call's arena with the +// specified refcount. If set_on_complete is true, the batch's +// on_complete callback will be set to point to on_complete(); +// otherwise, the batch's on_complete callback will be null. +static subchannel_batch_data* batch_data_create(grpc_call_element* elem, + int refcount, + bool set_on_complete) { + call_data* calld = static_cast<call_data*>(elem->call_data); + subchannel_batch_data* batch_data = + new (gpr_arena_alloc(calld->arena, sizeof(*batch_data))) + subchannel_batch_data(elem, calld, refcount, set_on_complete); return batch_data; } static void batch_data_unref(subchannel_batch_data* batch_data) { if (gpr_unref(&batch_data->refs)) { - subchannel_call_retry_state* retry_state = - static_cast<subchannel_call_retry_state*>( - grpc_connected_subchannel_call_get_parent_data( - batch_data->subchannel_call)); - if (batch_data->batch.send_initial_metadata) { - grpc_metadata_batch_destroy(&retry_state->send_initial_metadata); - } - if (batch_data->batch.send_trailing_metadata) { - grpc_metadata_batch_destroy(&retry_state->send_trailing_metadata); - } - if (batch_data->batch.recv_initial_metadata) { - grpc_metadata_batch_destroy(&retry_state->recv_initial_metadata); - } - if (batch_data->batch.recv_trailing_metadata) { - grpc_metadata_batch_destroy(&retry_state->recv_trailing_metadata); - } - GRPC_SUBCHANNEL_CALL_UNREF(batch_data->subchannel_call, "batch_data_unref"); - call_data* calld = static_cast<call_data*>(batch_data->elem->call_data); - GRPC_CALL_STACK_UNREF(calld->owning_call, "batch_data"); + batch_data->destroy(); } } @@ -1778,23 +1968,22 @@ static void recv_message_ready(void* arg, grpc_error* error) { // recv_trailing_metadata handling // -// Sets *status and *server_pushback_md based on batch_data and error. -static void get_call_status(subchannel_batch_data* batch_data, - grpc_error* error, grpc_status_code* status, +// Sets *status and *server_pushback_md based on md_batch and error. +// Only sets *server_pushback_md if server_pushback_md != nullptr. +static void get_call_status(grpc_call_element* elem, + grpc_metadata_batch* md_batch, grpc_error* error, + grpc_status_code* status, grpc_mdelem** server_pushback_md) { - grpc_call_element* elem = batch_data->elem; call_data* calld = static_cast<call_data*>(elem->call_data); if (error != GRPC_ERROR_NONE) { grpc_error_get_status(error, calld->deadline, status, nullptr, nullptr, nullptr); } else { - grpc_metadata_batch* md_batch = - batch_data->batch.payload->recv_trailing_metadata - .recv_trailing_metadata; GPR_ASSERT(md_batch->idx.named.grpc_status != nullptr); *status = grpc_get_status_code_from_metadata(md_batch->idx.named.grpc_status->md); - if (md_batch->idx.named.grpc_retry_pushback_ms != nullptr) { + if (server_pushback_md != nullptr && + md_batch->idx.named.grpc_retry_pushback_ms != nullptr) { *server_pushback_md = &md_batch->idx.named.grpc_retry_pushback_ms->md; } } @@ -1884,7 +2073,7 @@ static bool pending_batch_is_unstarted( return true; } if (pending->batch->send_message && - retry_state->started_send_message_count < calld->send_messages->size()) { + retry_state->started_send_message_count < calld->send_messages.size()) { return true; } if (pending->batch->send_trailing_metadata && @@ -1967,7 +2156,9 @@ static void recv_trailing_metadata_ready(void* arg, grpc_error* error) { // Get the call's status and check for server pushback metadata. grpc_status_code status = GRPC_STATUS_OK; grpc_mdelem* server_pushback_md = nullptr; - get_call_status(batch_data, GRPC_ERROR_REF(error), &status, + grpc_metadata_batch* md_batch = + batch_data->batch.payload->recv_trailing_metadata.recv_trailing_metadata; + get_call_status(elem, md_batch, GRPC_ERROR_REF(error), &status, &server_pushback_md); if (grpc_client_channel_trace.enabled()) { gpr_log(GPR_INFO, "chand=%p calld=%p: call finished, status=%s", chand, @@ -2038,7 +2229,7 @@ static void add_closures_for_replay_or_pending_send_ops( channel_data* chand = static_cast<channel_data*>(elem->channel_data); call_data* calld = static_cast<call_data*>(elem->call_data); bool have_pending_send_message_ops = - retry_state->started_send_message_count < calld->send_messages->size(); + retry_state->started_send_message_count < calld->send_messages.size(); bool have_pending_send_trailing_metadata_op = calld->seen_send_trailing_metadata && !retry_state->started_send_trailing_metadata; @@ -2194,9 +2385,9 @@ static void add_retriable_send_initial_metadata_op( .grpc_previous_rpc_attempts); } if (GPR_UNLIKELY(calld->num_attempts_completed > 0)) { - grpc_mdelem retry_md = grpc_mdelem_from_slices( + grpc_mdelem retry_md = grpc_mdelem_create( GRPC_MDSTR_GRPC_PREVIOUS_RPC_ATTEMPTS, - *retry_count_strings[calld->num_attempts_completed - 1]); + *retry_count_strings[calld->num_attempts_completed - 1], nullptr); grpc_error* error = grpc_metadata_batch_add_tail( &retry_state->send_initial_metadata, &retry_state->send_initial_metadata_storage[calld->send_initial_metadata @@ -2230,7 +2421,7 @@ static void add_retriable_send_message_op( chand, calld, retry_state->started_send_message_count); } grpc_core::ByteStreamCache* cache = - (*calld->send_messages)[retry_state->started_send_message_count]; + calld->send_messages[retry_state->started_send_message_count]; ++retry_state->started_send_message_count; retry_state->send_message.Init(cache); batch_data->batch.send_message = true; @@ -2362,7 +2553,7 @@ static subchannel_batch_data* maybe_create_subchannel_batch_for_replay( } // send_message. // Note that we can only have one send_message op in flight at a time. - if (retry_state->started_send_message_count < calld->send_messages->size() && + if (retry_state->started_send_message_count < calld->send_messages.size() && retry_state->started_send_message_count == retry_state->completed_send_message_count && !calld->pending_send_message) { @@ -2383,7 +2574,7 @@ static subchannel_batch_data* maybe_create_subchannel_batch_for_replay( // to start, since we can't send down any more send_message ops after // send_trailing_metadata. if (calld->seen_send_trailing_metadata && - retry_state->started_send_message_count == calld->send_messages->size() && + retry_state->started_send_message_count == calld->send_messages.size() && !retry_state->started_send_trailing_metadata && !calld->pending_send_trailing_metadata) { if (grpc_client_channel_trace.enabled()) { @@ -2435,7 +2626,7 @@ static void add_subchannel_batches_for_pending_batches( // send_message ops after send_trailing_metadata. if (batch->send_trailing_metadata && (retry_state->started_send_message_count + batch->send_message < - calld->send_messages->size() || + calld->send_messages.size() || retry_state->started_send_trailing_metadata)) { continue; } @@ -2602,11 +2793,9 @@ static void create_subchannel_call(grpc_call_element* elem, grpc_error* error) { pending_batches_fail(elem, new_error, true /* yield_call_combiner */); } else { if (parent_data_size > 0) { - subchannel_call_retry_state* retry_state = - static_cast<subchannel_call_retry_state*>( - grpc_connected_subchannel_call_get_parent_data( - calld->subchannel_call)); - retry_state->batch_payload.context = calld->pick.subchannel_call_context; + new (grpc_connected_subchannel_call_get_parent_data( + calld->subchannel_call)) + subchannel_call_retry_state(calld->pick.subchannel_call_context); } pending_batches_resume(elem); } @@ -2832,6 +3021,27 @@ static void apply_service_config_to_call_locked(grpc_call_element* elem) { } } +// If the channel is in TRANSIENT_FAILURE and the call is not +// wait_for_ready=true, fails the call and returns true. +static bool fail_call_if_in_transient_failure(grpc_call_element* elem) { + channel_data* chand = static_cast<channel_data*>(elem->channel_data); + call_data* calld = static_cast<call_data*>(elem->call_data); + grpc_transport_stream_op_batch* batch = calld->pending_batches[0].batch; + if (grpc_connectivity_state_check(&chand->state_tracker) == + GRPC_CHANNEL_TRANSIENT_FAILURE && + (batch->payload->send_initial_metadata.send_initial_metadata_flags & + GRPC_INITIAL_METADATA_WAIT_FOR_READY) == 0) { + pending_batches_fail( + elem, + grpc_error_set_int(GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "channel is in state TRANSIENT_FAILURE"), + GRPC_ERROR_INT_GRPC_STATUS, GRPC_STATUS_UNAVAILABLE), + true /* yield_call_combiner */); + return true; + } + return false; +} + // Invoked once resolver results are available. static void process_service_config_and_start_lb_pick_locked( grpc_call_element* elem) { @@ -2839,6 +3049,9 @@ static void process_service_config_and_start_lb_pick_locked( // Only get service config data on the first attempt. if (GPR_LIKELY(calld->num_attempts_completed == 0)) { apply_service_config_to_call_locked(elem); + // Check this after applying service config, since it may have + // affected the call's wait_for_ready value. + if (fail_call_if_in_transient_failure(elem)) return; } // Start LB pick. grpc_core::LbPicker::StartLocked(elem); @@ -3008,6 +3221,16 @@ static void start_pick_locked(void* arg, grpc_error* ignored) { // We do not yet have an LB policy, so wait for a resolver result. if (GPR_UNLIKELY(!chand->started_resolving)) { start_resolving_locked(chand); + } else { + // Normally, we want to do this check in + // process_service_config_and_start_lb_pick_locked(), so that we + // can honor the wait_for_ready setting in the service config. + // However, if the channel is in TRANSIENT_FAILURE at this point, that + // means that the resolver has returned a failure, so we're not going + // to get a service config right away. In that case, we fail the + // call now based on the wait_for_ready value passed in from the + // application. + if (fail_call_if_in_transient_failure(elem)) return; } // Create a new waiter, which will delete itself when done. grpc_core::New<grpc_core::ResolverResultWaiter>(elem); @@ -3112,21 +3335,8 @@ static void cc_start_transport_stream_op_batch( /* Constructor for call_data */ static grpc_error* cc_init_call_elem(grpc_call_element* elem, const grpc_call_element_args* args) { - call_data* calld = static_cast<call_data*>(elem->call_data); channel_data* chand = static_cast<channel_data*>(elem->channel_data); - // Initialize data members. - calld->path = grpc_slice_ref_internal(args->path); - calld->call_start_time = args->start_time; - calld->deadline = args->deadline; - calld->arena = args->arena; - calld->owning_call = args->call_stack; - calld->call_combiner = args->call_combiner; - if (GPR_LIKELY(chand->deadline_checking_enabled)) { - grpc_deadline_state_init(elem, args->call_stack, args->call_combiner, - calld->deadline); - } - calld->enable_retries = chand->enable_retries; - calld->send_messages.Init(); + new (elem->call_data) call_data(elem, *chand, *args); return GRPC_ERROR_NONE; } @@ -3135,34 +3345,12 @@ static void cc_destroy_call_elem(grpc_call_element* elem, const grpc_call_final_info* final_info, grpc_closure* then_schedule_closure) { call_data* calld = static_cast<call_data*>(elem->call_data); - channel_data* chand = static_cast<channel_data*>(elem->channel_data); - if (GPR_LIKELY(chand->deadline_checking_enabled)) { - grpc_deadline_state_destroy(elem); - } - grpc_slice_unref_internal(calld->path); - calld->retry_throttle_data.reset(); - calld->method_params.reset(); - GRPC_ERROR_UNREF(calld->cancel_error); if (GPR_LIKELY(calld->subchannel_call != nullptr)) { grpc_subchannel_call_set_cleanup_closure(calld->subchannel_call, then_schedule_closure); then_schedule_closure = nullptr; - GRPC_SUBCHANNEL_CALL_UNREF(calld->subchannel_call, - "client_channel_destroy_call"); - } - for (size_t i = 0; i < GPR_ARRAY_SIZE(calld->pending_batches); ++i) { - GPR_ASSERT(calld->pending_batches[i].batch == nullptr); - } - if (GPR_LIKELY(calld->pick.connected_subchannel != nullptr)) { - calld->pick.connected_subchannel.reset(); - } - for (size_t i = 0; i < GRPC_CONTEXT_COUNT; ++i) { - if (calld->pick.subchannel_call_context[i].value != nullptr) { - calld->pick.subchannel_call_context[i].destroy( - calld->pick.subchannel_call_context[i].value); - } } - calld->send_messages.Destroy(); + calld->~call_data(); GRPC_CLOSURE_SCHED(then_schedule_closure, GRPC_ERROR_NONE); } @@ -3203,9 +3391,16 @@ static void try_to_connect_locked(void* arg, grpc_error* error_ignored) { GRPC_CHANNEL_STACK_UNREF(chand->owning_stack, "try_to_connect"); } +void grpc_client_channel_set_channelz_node( + grpc_channel_element* elem, grpc_core::channelz::ClientChannelNode* node) { + channel_data* chand = static_cast<channel_data*>(elem->channel_data); + chand->channelz_channel = node; +} + void grpc_client_channel_populate_child_refs( - grpc_channel_element* elem, grpc_core::ChildRefsList* child_subchannels, - grpc_core::ChildRefsList* child_channels) { + grpc_channel_element* elem, + grpc_core::channelz::ChildRefsList* child_subchannels, + grpc_core::channelz::ChildRefsList* child_channels) { channel_data* chand = static_cast<channel_data*>(elem->channel_data); if (chand->lb_policy != nullptr) { chand->lb_policy->FillChildRefsForChannelz(child_subchannels, diff --git a/src/core/ext/filters/client_channel/client_channel.h b/src/core/ext/filters/client_channel/client_channel.h index 0b44a17562..4935fd24d8 100644 --- a/src/core/ext/filters/client_channel/client_channel.h +++ b/src/core/ext/filters/client_channel/client_channel.h @@ -40,9 +40,13 @@ extern grpc_core::TraceFlag grpc_client_channel_trace; extern const grpc_channel_filter grpc_client_channel_filter; +void grpc_client_channel_set_channelz_node( + grpc_channel_element* elem, grpc_core::channelz::ClientChannelNode* node); + void grpc_client_channel_populate_child_refs( - grpc_channel_element* elem, grpc_core::ChildRefsList* child_subchannels, - grpc_core::ChildRefsList* child_channels); + grpc_channel_element* elem, + grpc_core::channelz::ChildRefsList* child_subchannels, + grpc_core::channelz::ChildRefsList* child_channels); grpc_connectivity_state grpc_client_channel_check_connectivity_state( grpc_channel_element* elem, int try_to_connect); diff --git a/src/core/ext/filters/client_channel/client_channel_channelz.cc b/src/core/ext/filters/client_channel/client_channel_channelz.cc index 86c765df52..8e5426081c 100644 --- a/src/core/ext/filters/client_channel/client_channel_channelz.cc +++ b/src/core/ext/filters/client_channel/client_channel_channelz.cc @@ -20,10 +20,13 @@ #include "src/core/ext/filters/client_channel/client_channel.h" #include "src/core/ext/filters/client_channel/client_channel_channelz.h" +#include "src/core/lib/channel/channelz_registry.h" #include "src/core/lib/gpr/useful.h" #include "src/core/lib/surface/channel.h" #include "src/core/lib/transport/connectivity_state.h" +#include <grpc/support/string_util.h> + namespace grpc_core { namespace channelz { namespace { @@ -46,6 +49,7 @@ ClientChannelNode::ClientChannelNode(grpc_channel* channel, : ChannelNode(channel, channel_tracer_max_nodes, is_top_level_channel) { client_channel_ = grpc_channel_stack_last_element(grpc_channel_get_channel_stack(channel)); + grpc_client_channel_set_channelz_node(client_channel_, this); GPR_ASSERT(client_channel_->filter == &grpc_client_channel_filter); } @@ -109,5 +113,74 @@ RefCountedPtr<ChannelNode> ClientChannelNode::MakeClientChannelNode( is_top_level_channel); } +SubchannelNode::SubchannelNode(grpc_subchannel* subchannel, + size_t channel_tracer_max_nodes) + : BaseNode(EntityType::kSubchannel), + subchannel_(subchannel), + target_( + UniquePtr<char>(gpr_strdup(grpc_subchannel_get_target(subchannel_)))), + trace_(channel_tracer_max_nodes) {} + +SubchannelNode::~SubchannelNode() {} + +void SubchannelNode::PopulateConnectivityState(grpc_json* json) { + grpc_connectivity_state state; + if (subchannel_ == nullptr) { + state = GRPC_CHANNEL_SHUTDOWN; + } else { + state = grpc_subchannel_check_connectivity( + subchannel_, nullptr, true /* inhibit_health_checking */); + } + json = grpc_json_create_child(nullptr, json, "state", nullptr, + GRPC_JSON_OBJECT, false); + grpc_json_create_child(nullptr, json, "state", + grpc_connectivity_state_name(state), GRPC_JSON_STRING, + false); +} + +grpc_json* SubchannelNode::RenderJson() { + grpc_json* top_level_json = grpc_json_create(GRPC_JSON_OBJECT); + grpc_json* json = top_level_json; + grpc_json* json_iterator = nullptr; + json_iterator = grpc_json_create_child(json_iterator, json, "ref", nullptr, + GRPC_JSON_OBJECT, false); + json = json_iterator; + json_iterator = nullptr; + json_iterator = grpc_json_add_number_string_child(json, json_iterator, + "subchannelId", uuid()); + // reset json iterators to top level object + json = top_level_json; + json_iterator = nullptr; + // create and fill the data child. + grpc_json* data = grpc_json_create_child(json_iterator, json, "data", nullptr, + GRPC_JSON_OBJECT, false); + json = data; + json_iterator = nullptr; + PopulateConnectivityState(json); + GPR_ASSERT(target_.get() != nullptr); + grpc_json_create_child(nullptr, json, "target", target_.get(), + GRPC_JSON_STRING, false); + // fill in the channel trace if applicable + grpc_json* trace_json = trace_.RenderJson(); + if (trace_json != nullptr) { + trace_json->key = "trace"; // this object is named trace in channelz.proto + grpc_json_link_child(json, trace_json, nullptr); + } + // ask CallCountingHelper to populate trace and call count data. + call_counter_.PopulateCallCounts(json); + json = top_level_json; + // populate the child socket. + intptr_t socket_uuid = grpc_subchannel_get_child_socket_uuid(subchannel_); + if (socket_uuid != 0) { + grpc_json* array_parent = grpc_json_create_child( + nullptr, json, "socketRef", nullptr, GRPC_JSON_ARRAY, false); + json_iterator = grpc_json_create_child(json_iterator, array_parent, nullptr, + nullptr, GRPC_JSON_OBJECT, false); + grpc_json_add_number_string_child(json_iterator, nullptr, "socketId", + socket_uuid); + } + return top_level_json; +} + } // namespace channelz } // namespace grpc_core diff --git a/src/core/ext/filters/client_channel/client_channel_channelz.h b/src/core/ext/filters/client_channel/client_channel_channelz.h index 6f27b5c8b7..8a5c3e7e5e 100644 --- a/src/core/ext/filters/client_channel/client_channel_channelz.h +++ b/src/core/ext/filters/client_channel/client_channel_channelz.h @@ -23,16 +23,12 @@ #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/channel/channel_stack.h" +#include "src/core/lib/channel/channel_trace.h" #include "src/core/lib/channel/channelz.h" -#include "src/core/lib/gprpp/inlined_vector.h" -namespace grpc_core { - -// TODO(ncteisen), this only contains the uuids of the children for now, -// since that is all that is strictly needed. In a future enhancement we will -// add human readable names as in the channelz.proto -typedef InlinedVector<intptr_t, 10> ChildRefsList; +typedef struct grpc_subchannel grpc_subchannel; +namespace grpc_core { namespace channelz { // Subtype of ChannelNode that overrides and provides client_channel specific @@ -43,28 +39,59 @@ class ClientChannelNode : public ChannelNode { grpc_channel* channel, size_t channel_tracer_max_nodes, bool is_top_level_channel); - // Override this functionality since client_channels have a notion of - // channel connectivity. - void PopulateConnectivityState(grpc_json* json) override; + ClientChannelNode(grpc_channel* channel, size_t channel_tracer_max_nodes, + bool is_top_level_channel); + virtual ~ClientChannelNode() {} - // Override this functionality since client_channels have subchannels + // Overriding template methods from ChannelNode to render information that + // only ClientChannelNode knows about. + void PopulateConnectivityState(grpc_json* json) override; void PopulateChildRefs(grpc_json* json) override; // Helper to create a channel arg to ensure this type of ChannelNode is // created. static grpc_arg CreateChannelArg(); - protected: - GPRC_ALLOW_CLASS_TO_USE_NON_PUBLIC_DELETE - GPRC_ALLOW_CLASS_TO_USE_NON_PUBLIC_NEW - ClientChannelNode(grpc_channel* channel, size_t channel_tracer_max_nodes, - bool is_top_level_channel); - virtual ~ClientChannelNode() {} - private: grpc_channel_element* client_channel_; }; +// Handles channelz bookkeeping for sockets +class SubchannelNode : public BaseNode { + public: + SubchannelNode(grpc_subchannel* subchannel, size_t channel_tracer_max_nodes); + ~SubchannelNode() override; + + void MarkSubchannelDestroyed() { + GPR_ASSERT(subchannel_ != nullptr); + subchannel_ = nullptr; + } + + grpc_json* RenderJson() override; + + // proxy methods to composed classes. + void AddTraceEvent(ChannelTrace::Severity severity, grpc_slice data) { + trace_.AddTraceEvent(severity, data); + } + void AddTraceEventWithReference(ChannelTrace::Severity severity, + grpc_slice data, + RefCountedPtr<BaseNode> referenced_channel) { + trace_.AddTraceEventWithReference(severity, data, + std::move(referenced_channel)); + } + void RecordCallStarted() { call_counter_.RecordCallStarted(); } + void RecordCallFailed() { call_counter_.RecordCallFailed(); } + void RecordCallSucceeded() { call_counter_.RecordCallSucceeded(); } + + private: + grpc_subchannel* subchannel_; + UniquePtr<char> target_; + CallCountingHelper call_counter_; + ChannelTrace trace_; + + void PopulateConnectivityState(grpc_json* json); +}; + } // namespace channelz } // namespace grpc_core diff --git a/src/core/ext/filters/client_channel/connector.h b/src/core/ext/filters/client_channel/connector.h index 556594929c..ea34dcdab5 100644 --- a/src/core/ext/filters/client_channel/connector.h +++ b/src/core/ext/filters/client_channel/connector.h @@ -47,6 +47,9 @@ typedef struct { /** channel arguments (to be passed to the filters) */ grpc_channel_args* channel_args; + + /** socket uuid of the connected transport. 0 if not available */ + intptr_t socket_uuid; } grpc_connect_out_args; struct grpc_connector_vtable { diff --git a/src/core/ext/filters/client_channel/health/health.pb.c b/src/core/ext/filters/client_channel/health/health.pb.c new file mode 100644 index 0000000000..5499c549cc --- /dev/null +++ b/src/core/ext/filters/client_channel/health/health.pb.c @@ -0,0 +1,23 @@ +/* Automatically generated nanopb constant definitions */ +/* Generated by nanopb-0.3.7-dev */ + +#include "src/core/ext/filters/client_channel/health/health.pb.h" +/* @@protoc_insertion_point(includes) */ +#if PB_PROTO_HEADER_VERSION != 30 +#error Regenerate this file with the current version of nanopb generator. +#endif + + + +const pb_field_t grpc_health_v1_HealthCheckRequest_fields[2] = { + PB_FIELD( 1, STRING , OPTIONAL, STATIC , FIRST, grpc_health_v1_HealthCheckRequest, service, service, 0), + PB_LAST_FIELD +}; + +const pb_field_t grpc_health_v1_HealthCheckResponse_fields[2] = { + PB_FIELD( 1, UENUM , OPTIONAL, STATIC , FIRST, grpc_health_v1_HealthCheckResponse, status, status, 0), + PB_LAST_FIELD +}; + + +/* @@protoc_insertion_point(eof) */ diff --git a/src/core/ext/filters/client_channel/health/health.pb.h b/src/core/ext/filters/client_channel/health/health.pb.h new file mode 100644 index 0000000000..9d54ccd618 --- /dev/null +++ b/src/core/ext/filters/client_channel/health/health.pb.h @@ -0,0 +1,73 @@ +/* Automatically generated nanopb header */ +/* Generated by nanopb-0.3.7-dev */ + +#ifndef PB_GRPC_HEALTH_V1_HEALTH_PB_H_INCLUDED +#define PB_GRPC_HEALTH_V1_HEALTH_PB_H_INCLUDED +#include "pb.h" +/* @@protoc_insertion_point(includes) */ +#if PB_PROTO_HEADER_VERSION != 30 +#error Regenerate this file with the current version of nanopb generator. +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* Enum definitions */ +typedef enum _grpc_health_v1_HealthCheckResponse_ServingStatus { + grpc_health_v1_HealthCheckResponse_ServingStatus_UNKNOWN = 0, + grpc_health_v1_HealthCheckResponse_ServingStatus_SERVING = 1, + grpc_health_v1_HealthCheckResponse_ServingStatus_NOT_SERVING = 2, + grpc_health_v1_HealthCheckResponse_ServingStatus_SERVICE_UNKNOWN = 3 +} grpc_health_v1_HealthCheckResponse_ServingStatus; +#define _grpc_health_v1_HealthCheckResponse_ServingStatus_MIN grpc_health_v1_HealthCheckResponse_ServingStatus_UNKNOWN +#define _grpc_health_v1_HealthCheckResponse_ServingStatus_MAX grpc_health_v1_HealthCheckResponse_ServingStatus_SERVICE_UNKNOWN +#define _grpc_health_v1_HealthCheckResponse_ServingStatus_ARRAYSIZE ((grpc_health_v1_HealthCheckResponse_ServingStatus)(grpc_health_v1_HealthCheckResponse_ServingStatus_SERVICE_UNKNOWN+1)) + +/* Struct definitions */ +typedef struct _grpc_health_v1_HealthCheckRequest { + bool has_service; + char service[200]; +/* @@protoc_insertion_point(struct:grpc_health_v1_HealthCheckRequest) */ +} grpc_health_v1_HealthCheckRequest; + +typedef struct _grpc_health_v1_HealthCheckResponse { + bool has_status; + grpc_health_v1_HealthCheckResponse_ServingStatus status; +/* @@protoc_insertion_point(struct:grpc_health_v1_HealthCheckResponse) */ +} grpc_health_v1_HealthCheckResponse; + +/* Default values for struct fields */ + +/* Initializer values for message structs */ +#define grpc_health_v1_HealthCheckRequest_init_default {false, ""} +#define grpc_health_v1_HealthCheckResponse_init_default {false, (grpc_health_v1_HealthCheckResponse_ServingStatus)0} +#define grpc_health_v1_HealthCheckRequest_init_zero {false, ""} +#define grpc_health_v1_HealthCheckResponse_init_zero {false, (grpc_health_v1_HealthCheckResponse_ServingStatus)0} + +/* Field tags (for use in manual encoding/decoding) */ +#define grpc_health_v1_HealthCheckRequest_service_tag 1 +#define grpc_health_v1_HealthCheckResponse_status_tag 1 + +/* Struct field encoding specification for nanopb */ +extern const pb_field_t grpc_health_v1_HealthCheckRequest_fields[2]; +extern const pb_field_t grpc_health_v1_HealthCheckResponse_fields[2]; + +/* Maximum encoded size of messages (where known) */ +#define grpc_health_v1_HealthCheckRequest_size 203 +#define grpc_health_v1_HealthCheckResponse_size 2 + +/* Message IDs (where set with "msgid" option) */ +#ifdef PB_MSGID + +#define HEALTH_MESSAGES \ + + +#endif + +#ifdef __cplusplus +} /* extern "C" */ +#endif +/* @@protoc_insertion_point(eof) */ + +#endif diff --git a/src/core/ext/filters/client_channel/health/health_check_client.cc b/src/core/ext/filters/client_channel/health/health_check_client.cc new file mode 100644 index 0000000000..587919596f --- /dev/null +++ b/src/core/ext/filters/client_channel/health/health_check_client.cc @@ -0,0 +1,652 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include <grpc/support/port_platform.h> + +#include <stdint.h> +#include <stdio.h> + +#include "src/core/ext/filters/client_channel/health/health_check_client.h" + +#include "pb_decode.h" +#include "pb_encode.h" +#include "src/core/ext/filters/client_channel/health/health.pb.h" +#include "src/core/lib/debug/trace.h" +#include "src/core/lib/gprpp/mutex_lock.h" +#include "src/core/lib/slice/slice_internal.h" +#include "src/core/lib/transport/error_utils.h" +#include "src/core/lib/transport/status_metadata.h" + +#define HEALTH_CHECK_INITIAL_CONNECT_BACKOFF_SECONDS 1 +#define HEALTH_CHECK_RECONNECT_BACKOFF_MULTIPLIER 1.6 +#define HEALTH_CHECK_RECONNECT_MAX_BACKOFF_SECONDS 120 +#define HEALTH_CHECK_RECONNECT_JITTER 0.2 + +grpc_core::TraceFlag grpc_health_check_client_trace(false, + "health_check_client"); + +namespace grpc_core { + +// +// HealthCheckClient +// + +HealthCheckClient::HealthCheckClient( + const char* service_name, + RefCountedPtr<ConnectedSubchannel> connected_subchannel, + grpc_pollset_set* interested_parties, + grpc_core::RefCountedPtr<grpc_core::channelz::SubchannelNode> channelz_node) + : InternallyRefCountedWithTracing<HealthCheckClient>( + &grpc_health_check_client_trace), + service_name_(service_name), + connected_subchannel_(std::move(connected_subchannel)), + interested_parties_(interested_parties), + channelz_node_(std::move(channelz_node)), + retry_backoff_( + BackOff::Options() + .set_initial_backoff( + HEALTH_CHECK_INITIAL_CONNECT_BACKOFF_SECONDS * 1000) + .set_multiplier(HEALTH_CHECK_RECONNECT_BACKOFF_MULTIPLIER) + .set_jitter(HEALTH_CHECK_RECONNECT_JITTER) + .set_max_backoff(HEALTH_CHECK_RECONNECT_MAX_BACKOFF_SECONDS * + 1000)) { + if (grpc_health_check_client_trace.enabled()) { + gpr_log(GPR_INFO, "created HealthCheckClient %p", this); + } + GRPC_CLOSURE_INIT(&retry_timer_callback_, OnRetryTimer, this, + grpc_schedule_on_exec_ctx); + gpr_mu_init(&mu_); + StartCall(); +} + +HealthCheckClient::~HealthCheckClient() { + if (grpc_health_check_client_trace.enabled()) { + gpr_log(GPR_INFO, "destroying HealthCheckClient %p", this); + } + GRPC_ERROR_UNREF(error_); + gpr_mu_destroy(&mu_); +} + +void HealthCheckClient::NotifyOnHealthChange(grpc_connectivity_state* state, + grpc_closure* closure) { + MutexLock lock(&mu_); + GPR_ASSERT(notify_state_ == nullptr); + if (*state != state_) { + *state = state_; + GRPC_CLOSURE_SCHED(closure, GRPC_ERROR_REF(error_)); + return; + } + notify_state_ = state; + on_health_changed_ = closure; +} + +void HealthCheckClient::SetHealthStatus(grpc_connectivity_state state, + grpc_error* error) { + MutexLock lock(&mu_); + SetHealthStatusLocked(state, error); +} + +void HealthCheckClient::SetHealthStatusLocked(grpc_connectivity_state state, + grpc_error* error) { + if (grpc_health_check_client_trace.enabled()) { + gpr_log(GPR_INFO, "HealthCheckClient %p: setting state=%d error=%s", this, + state, grpc_error_string(error)); + } + if (notify_state_ != nullptr && *notify_state_ != state) { + *notify_state_ = state; + notify_state_ = nullptr; + GRPC_CLOSURE_SCHED(on_health_changed_, GRPC_ERROR_REF(error)); + on_health_changed_ = nullptr; + } + state_ = state; + GRPC_ERROR_UNREF(error_); + error_ = error; +} + +void HealthCheckClient::Orphan() { + if (grpc_health_check_client_trace.enabled()) { + gpr_log(GPR_INFO, "HealthCheckClient %p: shutting down", this); + } + { + MutexLock lock(&mu_); + if (on_health_changed_ != nullptr) { + *notify_state_ = GRPC_CHANNEL_SHUTDOWN; + notify_state_ = nullptr; + GRPC_CLOSURE_SCHED(on_health_changed_, GRPC_ERROR_NONE); + on_health_changed_ = nullptr; + } + shutting_down_ = true; + call_state_.reset(); + if (retry_timer_callback_pending_) { + grpc_timer_cancel(&retry_timer_); + } + } + Unref(DEBUG_LOCATION, "orphan"); +} + +void HealthCheckClient::StartCall() { + MutexLock lock(&mu_); + StartCallLocked(); +} + +void HealthCheckClient::StartCallLocked() { + if (shutting_down_) return; + GPR_ASSERT(call_state_ == nullptr); + SetHealthStatusLocked(GRPC_CHANNEL_CONNECTING, GRPC_ERROR_NONE); + call_state_ = MakeOrphanable<CallState>(Ref(), interested_parties_); + if (grpc_health_check_client_trace.enabled()) { + gpr_log(GPR_INFO, "HealthCheckClient %p: created CallState %p", this, + call_state_.get()); + } + call_state_->StartCall(); +} + +void HealthCheckClient::StartRetryTimer() { + MutexLock lock(&mu_); + SetHealthStatusLocked( + GRPC_CHANNEL_TRANSIENT_FAILURE, + GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "health check call failed; will retry after backoff")); + grpc_millis next_try = retry_backoff_.NextAttemptTime(); + if (grpc_health_check_client_trace.enabled()) { + gpr_log(GPR_INFO, "HealthCheckClient %p: health check call lost...", this); + grpc_millis timeout = next_try - ExecCtx::Get()->Now(); + if (timeout > 0) { + gpr_log(GPR_INFO, + "HealthCheckClient %p: ... will retry in %" PRId64 "ms.", this, + timeout); + } else { + gpr_log(GPR_INFO, "HealthCheckClient %p: ... retrying immediately.", + this); + } + } + // Ref for callback, tracked manually. + Ref(DEBUG_LOCATION, "health_retry_timer").release(); + retry_timer_callback_pending_ = true; + grpc_timer_init(&retry_timer_, next_try, &retry_timer_callback_); +} + +void HealthCheckClient::OnRetryTimer(void* arg, grpc_error* error) { + HealthCheckClient* self = static_cast<HealthCheckClient*>(arg); + { + MutexLock lock(&self->mu_); + self->retry_timer_callback_pending_ = false; + if (!self->shutting_down_ && error == GRPC_ERROR_NONE && + self->call_state_ == nullptr) { + if (grpc_health_check_client_trace.enabled()) { + gpr_log(GPR_INFO, "HealthCheckClient %p: restarting health check call", + self); + } + self->StartCallLocked(); + } + } + self->Unref(DEBUG_LOCATION, "health_retry_timer"); +} + +// +// protobuf helpers +// + +namespace { + +void EncodeRequest(const char* service_name, + ManualConstructor<SliceBufferByteStream>* send_message) { + grpc_health_v1_HealthCheckRequest request_struct; + request_struct.has_service = true; + snprintf(request_struct.service, sizeof(request_struct.service), "%s", + service_name); + pb_ostream_t ostream; + memset(&ostream, 0, sizeof(ostream)); + pb_encode(&ostream, grpc_health_v1_HealthCheckRequest_fields, + &request_struct); + grpc_slice request_slice = GRPC_SLICE_MALLOC(ostream.bytes_written); + ostream = pb_ostream_from_buffer(GRPC_SLICE_START_PTR(request_slice), + GRPC_SLICE_LENGTH(request_slice)); + GPR_ASSERT(pb_encode(&ostream, grpc_health_v1_HealthCheckRequest_fields, + &request_struct) != 0); + grpc_slice_buffer slice_buffer; + grpc_slice_buffer_init(&slice_buffer); + grpc_slice_buffer_add(&slice_buffer, request_slice); + send_message->Init(&slice_buffer, 0); + grpc_slice_buffer_destroy_internal(&slice_buffer); +} + +// Returns true if healthy. +// If there was an error parsing the response, sets *error and returns false. +bool DecodeResponse(grpc_slice_buffer* slice_buffer, grpc_error** error) { + // If message is empty, assume unhealthy. + if (slice_buffer->length == 0) { + *error = + GRPC_ERROR_CREATE_FROM_STATIC_STRING("health check response was empty"); + return false; + } + // Concatenate the slices to form a single string. + UniquePtr<uint8_t> recv_message_deleter; + uint8_t* recv_message; + if (slice_buffer->count == 1) { + recv_message = GRPC_SLICE_START_PTR(slice_buffer->slices[0]); + } else { + recv_message = static_cast<uint8_t*>(gpr_malloc(slice_buffer->length)); + recv_message_deleter.reset(recv_message); + size_t offset = 0; + for (size_t i = 0; i < slice_buffer->count; ++i) { + memcpy(recv_message + offset, + GRPC_SLICE_START_PTR(slice_buffer->slices[i]), + GRPC_SLICE_LENGTH(slice_buffer->slices[i])); + offset += GRPC_SLICE_LENGTH(slice_buffer->slices[i]); + } + } + // Deserialize message. + grpc_health_v1_HealthCheckResponse response_struct; + pb_istream_t istream = + pb_istream_from_buffer(recv_message, slice_buffer->length); + if (!pb_decode(&istream, grpc_health_v1_HealthCheckResponse_fields, + &response_struct)) { + // Can't parse message; assume unhealthy. + *error = GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "cannot parse health check response"); + return false; + } + if (!response_struct.has_status) { + // Field not present; assume unhealthy. + *error = GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "status field not present in health check response"); + return false; + } + return response_struct.status == + grpc_health_v1_HealthCheckResponse_ServingStatus_SERVING; +} + +} // namespace + +// +// HealthCheckClient::CallState +// + +HealthCheckClient::CallState::CallState( + RefCountedPtr<HealthCheckClient> health_check_client, + grpc_pollset_set* interested_parties) + : InternallyRefCountedWithTracing<CallState>( + &grpc_health_check_client_trace), + health_check_client_(std::move(health_check_client)), + pollent_(grpc_polling_entity_create_from_pollset_set(interested_parties)), + arena_(gpr_arena_create(health_check_client_->connected_subchannel_ + ->GetInitialCallSizeEstimate(0))), + payload_(context_) { + grpc_call_combiner_init(&call_combiner_); + gpr_atm_rel_store(&seen_response_, static_cast<gpr_atm>(0)); +} + +HealthCheckClient::CallState::~CallState() { + if (grpc_health_check_client_trace.enabled()) { + gpr_log(GPR_INFO, "HealthCheckClient %p: destroying CallState %p", + health_check_client_.get(), this); + } + if (call_ != nullptr) GRPC_SUBCHANNEL_CALL_UNREF(call_, "call_ended"); + for (size_t i = 0; i < GRPC_CONTEXT_COUNT; i++) { + if (context_[i].destroy != nullptr) { + context_[i].destroy(context_[i].value); + } + } + // Unset the call combiner cancellation closure. This has the + // effect of scheduling the previously set cancellation closure, if + // any, so that it can release any internal references it may be + // holding to the call stack. Also flush the closures on exec_ctx so that + // filters that schedule cancel notification closures on exec_ctx do not + // need to take a ref of the call stack to guarantee closure liveness. + grpc_call_combiner_set_notify_on_cancel(&call_combiner_, nullptr); + grpc_core::ExecCtx::Get()->Flush(); + grpc_call_combiner_destroy(&call_combiner_); + gpr_arena_destroy(arena_); +} + +void HealthCheckClient::CallState::Orphan() { + grpc_call_combiner_cancel(&call_combiner_, GRPC_ERROR_CANCELLED); + Cancel(); +} + +void HealthCheckClient::CallState::StartCall() { + ConnectedSubchannel::CallArgs args = { + &pollent_, + GRPC_MDSTR_SLASH_GRPC_DOT_HEALTH_DOT_V1_DOT_HEALTH_SLASH_WATCH, + gpr_now(GPR_CLOCK_MONOTONIC), // start_time + GRPC_MILLIS_INF_FUTURE, // deadline + arena_, + context_, + &call_combiner_, + 0, // parent_data_size + }; + grpc_error* error = + health_check_client_->connected_subchannel_->CreateCall(args, &call_); + if (error != GRPC_ERROR_NONE) { + gpr_log(GPR_ERROR, + "HealthCheckClient %p CallState %p: error creating health " + "checking call on subchannel (%s); will retry", + health_check_client_.get(), this, grpc_error_string(error)); + GRPC_ERROR_UNREF(error); + // Schedule instead of running directly, since we must not be + // holding health_check_client_->mu_ when CallEnded() is called. + Ref(DEBUG_LOCATION, "call_end_closure").release(); + GRPC_CLOSURE_SCHED( + GRPC_CLOSURE_INIT(&batch_.handler_private.closure, CallEndedRetry, this, + grpc_schedule_on_exec_ctx), + GRPC_ERROR_NONE); + return; + } + // Initialize payload and batch. + memset(&batch_, 0, sizeof(batch_)); + payload_.context = context_; + batch_.payload = &payload_; + // on_complete callback takes ref, handled manually. + Ref(DEBUG_LOCATION, "on_complete").release(); + batch_.on_complete = GRPC_CLOSURE_INIT(&on_complete_, OnComplete, this, + grpc_schedule_on_exec_ctx); + // Add send_initial_metadata op. + grpc_metadata_batch_init(&send_initial_metadata_); + error = grpc_metadata_batch_add_head( + &send_initial_metadata_, &path_metadata_storage_, + grpc_mdelem_from_slices( + GRPC_MDSTR_PATH, + GRPC_MDSTR_SLASH_GRPC_DOT_HEALTH_DOT_V1_DOT_HEALTH_SLASH_WATCH)); + GPR_ASSERT(error == GRPC_ERROR_NONE); + payload_.send_initial_metadata.send_initial_metadata = + &send_initial_metadata_; + payload_.send_initial_metadata.send_initial_metadata_flags = 0; + payload_.send_initial_metadata.peer_string = nullptr; + batch_.send_initial_metadata = true; + // Add send_message op. + EncodeRequest(health_check_client_->service_name_, &send_message_); + payload_.send_message.send_message.reset(send_message_.get()); + batch_.send_message = true; + // Add send_trailing_metadata op. + grpc_metadata_batch_init(&send_trailing_metadata_); + payload_.send_trailing_metadata.send_trailing_metadata = + &send_trailing_metadata_; + batch_.send_trailing_metadata = true; + // Add recv_initial_metadata op. + grpc_metadata_batch_init(&recv_initial_metadata_); + payload_.recv_initial_metadata.recv_initial_metadata = + &recv_initial_metadata_; + payload_.recv_initial_metadata.recv_flags = nullptr; + payload_.recv_initial_metadata.trailing_metadata_available = nullptr; + payload_.recv_initial_metadata.peer_string = nullptr; + // recv_initial_metadata_ready callback takes ref, handled manually. + Ref(DEBUG_LOCATION, "recv_initial_metadata_ready").release(); + payload_.recv_initial_metadata.recv_initial_metadata_ready = + GRPC_CLOSURE_INIT(&recv_initial_metadata_ready_, RecvInitialMetadataReady, + this, grpc_schedule_on_exec_ctx); + batch_.recv_initial_metadata = true; + // Add recv_message op. + payload_.recv_message.recv_message = &recv_message_; + // recv_message callback takes ref, handled manually. + Ref(DEBUG_LOCATION, "recv_message_ready").release(); + payload_.recv_message.recv_message_ready = GRPC_CLOSURE_INIT( + &recv_message_ready_, RecvMessageReady, this, grpc_schedule_on_exec_ctx); + batch_.recv_message = true; + // Start batch. + StartBatch(&batch_); + // Initialize recv_trailing_metadata batch. + memset(&recv_trailing_metadata_batch_, 0, + sizeof(recv_trailing_metadata_batch_)); + recv_trailing_metadata_batch_.payload = &payload_; + // Add recv_trailing_metadata op. + grpc_metadata_batch_init(&recv_trailing_metadata_); + payload_.recv_trailing_metadata.recv_trailing_metadata = + &recv_trailing_metadata_; + payload_.recv_trailing_metadata.collect_stats = &collect_stats_; + // This callback signals the end of the call, so it relies on the + // initial ref instead of taking a new ref. When it's invoked, the + // initial ref is released. + payload_.recv_trailing_metadata.recv_trailing_metadata_ready = + GRPC_CLOSURE_INIT(&recv_trailing_metadata_ready_, + RecvTrailingMetadataReady, this, + grpc_schedule_on_exec_ctx); + recv_trailing_metadata_batch_.recv_trailing_metadata = true; + // Start recv_trailing_metadata batch. + StartBatch(&recv_trailing_metadata_batch_); +} + +void HealthCheckClient::CallState::StartBatchInCallCombiner(void* arg, + grpc_error* error) { + grpc_transport_stream_op_batch* batch = + static_cast<grpc_transport_stream_op_batch*>(arg); + grpc_subchannel_call* call = + static_cast<grpc_subchannel_call*>(batch->handler_private.extra_arg); + grpc_subchannel_call_process_op(call, batch); +} + +void HealthCheckClient::CallState::StartBatch( + grpc_transport_stream_op_batch* batch) { + batch->handler_private.extra_arg = call_; + GRPC_CLOSURE_INIT(&batch->handler_private.closure, StartBatchInCallCombiner, + batch, grpc_schedule_on_exec_ctx); + GRPC_CALL_COMBINER_START(&call_combiner_, &batch->handler_private.closure, + GRPC_ERROR_NONE, "start_subchannel_batch"); +} + +void HealthCheckClient::CallState::OnCancelComplete(void* arg, + grpc_error* error) { + HealthCheckClient::CallState* self = + static_cast<HealthCheckClient::CallState*>(arg); + GRPC_CALL_COMBINER_STOP(&self->call_combiner_, "health_cancel"); + self->Unref(DEBUG_LOCATION, "cancel"); +} + +void HealthCheckClient::CallState::StartCancel(void* arg, grpc_error* error) { + HealthCheckClient::CallState* self = + static_cast<HealthCheckClient::CallState*>(arg); + auto* batch = grpc_make_transport_stream_op( + GRPC_CLOSURE_CREATE(OnCancelComplete, self, grpc_schedule_on_exec_ctx)); + batch->cancel_stream = true; + batch->payload->cancel_stream.cancel_error = GRPC_ERROR_CANCELLED; + grpc_subchannel_call_process_op(self->call_, batch); +} + +void HealthCheckClient::CallState::Cancel() { + if (call_ != nullptr) { + Ref(DEBUG_LOCATION, "cancel").release(); + GRPC_CALL_COMBINER_START( + &call_combiner_, + GRPC_CLOSURE_CREATE(StartCancel, this, grpc_schedule_on_exec_ctx), + GRPC_ERROR_NONE, "health_cancel"); + } +} + +void HealthCheckClient::CallState::OnComplete(void* arg, grpc_error* error) { + HealthCheckClient::CallState* self = + static_cast<HealthCheckClient::CallState*>(arg); + GRPC_CALL_COMBINER_STOP(&self->call_combiner_, "on_complete"); + grpc_metadata_batch_destroy(&self->send_initial_metadata_); + grpc_metadata_batch_destroy(&self->send_trailing_metadata_); + self->Unref(DEBUG_LOCATION, "on_complete"); +} + +void HealthCheckClient::CallState::RecvInitialMetadataReady(void* arg, + grpc_error* error) { + HealthCheckClient::CallState* self = + static_cast<HealthCheckClient::CallState*>(arg); + GRPC_CALL_COMBINER_STOP(&self->call_combiner_, "recv_initial_metadata_ready"); + grpc_metadata_batch_destroy(&self->recv_initial_metadata_); + self->Unref(DEBUG_LOCATION, "recv_initial_metadata_ready"); +} + +void HealthCheckClient::CallState::DoneReadingRecvMessage(grpc_error* error) { + recv_message_.reset(); + if (error != GRPC_ERROR_NONE) { + GRPC_ERROR_UNREF(error); + Cancel(); + grpc_slice_buffer_destroy_internal(&recv_message_buffer_); + Unref(DEBUG_LOCATION, "recv_message_ready"); + return; + } + const bool healthy = DecodeResponse(&recv_message_buffer_, &error); + const grpc_connectivity_state state = + healthy ? GRPC_CHANNEL_READY : GRPC_CHANNEL_TRANSIENT_FAILURE; + if (error == GRPC_ERROR_NONE && !healthy) { + error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("backend unhealthy"); + } + health_check_client_->SetHealthStatus(state, error); + gpr_atm_rel_store(&seen_response_, static_cast<gpr_atm>(1)); + grpc_slice_buffer_destroy_internal(&recv_message_buffer_); + // Start another recv_message batch. + // This re-uses the ref we're holding. + // Note: Can't just reuse batch_ here, since we don't know that all + // callbacks from the original batch have completed yet. + memset(&recv_message_batch_, 0, sizeof(recv_message_batch_)); + recv_message_batch_.payload = &payload_; + payload_.recv_message.recv_message = &recv_message_; + payload_.recv_message.recv_message_ready = GRPC_CLOSURE_INIT( + &recv_message_ready_, RecvMessageReady, this, grpc_schedule_on_exec_ctx); + recv_message_batch_.recv_message = true; + StartBatch(&recv_message_batch_); +} + +grpc_error* HealthCheckClient::CallState::PullSliceFromRecvMessage() { + grpc_slice slice; + grpc_error* error = recv_message_->Pull(&slice); + if (error == GRPC_ERROR_NONE) { + grpc_slice_buffer_add(&recv_message_buffer_, slice); + } + return error; +} + +void HealthCheckClient::CallState::ContinueReadingRecvMessage() { + while (recv_message_->Next(SIZE_MAX, &recv_message_ready_)) { + grpc_error* error = PullSliceFromRecvMessage(); + if (error != GRPC_ERROR_NONE) { + DoneReadingRecvMessage(error); + return; + } + if (recv_message_buffer_.length == recv_message_->length()) { + DoneReadingRecvMessage(GRPC_ERROR_NONE); + break; + } + } +} + +void HealthCheckClient::CallState::OnByteStreamNext(void* arg, + grpc_error* error) { + HealthCheckClient::CallState* self = + static_cast<HealthCheckClient::CallState*>(arg); + if (error != GRPC_ERROR_NONE) { + self->DoneReadingRecvMessage(GRPC_ERROR_REF(error)); + return; + } + error = self->PullSliceFromRecvMessage(); + if (error != GRPC_ERROR_NONE) { + self->DoneReadingRecvMessage(error); + return; + } + if (self->recv_message_buffer_.length == self->recv_message_->length()) { + self->DoneReadingRecvMessage(GRPC_ERROR_NONE); + } else { + self->ContinueReadingRecvMessage(); + } +} + +void HealthCheckClient::CallState::RecvMessageReady(void* arg, + grpc_error* error) { + HealthCheckClient::CallState* self = + static_cast<HealthCheckClient::CallState*>(arg); + GRPC_CALL_COMBINER_STOP(&self->call_combiner_, "recv_message_ready"); + if (self->recv_message_ == nullptr) { + self->Unref(DEBUG_LOCATION, "recv_message_ready"); + return; + } + grpc_slice_buffer_init(&self->recv_message_buffer_); + GRPC_CLOSURE_INIT(&self->recv_message_ready_, OnByteStreamNext, self, + grpc_schedule_on_exec_ctx); + self->ContinueReadingRecvMessage(); + // Ref will continue to be held until we finish draining the byte stream. +} + +void HealthCheckClient::CallState::RecvTrailingMetadataReady( + void* arg, grpc_error* error) { + HealthCheckClient::CallState* self = + static_cast<HealthCheckClient::CallState*>(arg); + GRPC_CALL_COMBINER_STOP(&self->call_combiner_, + "recv_trailing_metadata_ready"); + // Get call status. + grpc_status_code status = GRPC_STATUS_UNKNOWN; + if (error != GRPC_ERROR_NONE) { + grpc_error_get_status(error, GRPC_MILLIS_INF_FUTURE, &status, + nullptr /* slice */, nullptr /* http_error */, + nullptr /* error_string */); + } else if (self->recv_trailing_metadata_.idx.named.grpc_status != nullptr) { + status = grpc_get_status_code_from_metadata( + self->recv_trailing_metadata_.idx.named.grpc_status->md); + } + if (grpc_health_check_client_trace.enabled()) { + gpr_log(GPR_INFO, + "HealthCheckClient %p CallState %p: health watch failed with " + "status %d", + self->health_check_client_.get(), self, status); + } + // Clean up. + grpc_metadata_batch_destroy(&self->recv_trailing_metadata_); + // For status UNIMPLEMENTED, give up and assume always healthy. + bool retry = true; + if (status == GRPC_STATUS_UNIMPLEMENTED) { + static const char kErrorMessage[] = + "health checking Watch method returned UNIMPLEMENTED; " + "disabling health checks but assuming server is healthy"; + gpr_log(GPR_ERROR, kErrorMessage); + if (self->health_check_client_->channelz_node_ != nullptr) { + self->health_check_client_->channelz_node_->AddTraceEvent( + channelz::ChannelTrace::Error, + grpc_slice_from_static_string(kErrorMessage)); + } + self->health_check_client_->SetHealthStatus(GRPC_CHANNEL_READY, + GRPC_ERROR_NONE); + retry = false; + } + self->CallEnded(retry); +} + +void HealthCheckClient::CallState::CallEndedRetry(void* arg, + grpc_error* error) { + HealthCheckClient::CallState* self = + static_cast<HealthCheckClient::CallState*>(arg); + self->CallEnded(true /* retry */); + self->Unref(DEBUG_LOCATION, "call_end_closure"); +} + +void HealthCheckClient::CallState::CallEnded(bool retry) { + // If this CallState is still in use, this call ended because of a failure, + // so we need to stop using it and optionally create a new one. + // Otherwise, we have deliberately ended this call, and no further action + // is required. + if (this == health_check_client_->call_state_.get()) { + health_check_client_->call_state_.reset(); + if (retry) { + GPR_ASSERT(!health_check_client_->shutting_down_); + if (static_cast<bool>(gpr_atm_acq_load(&seen_response_))) { + // If the call fails after we've gotten a successful response, reset + // the backoff and restart the call immediately. + health_check_client_->retry_backoff_.Reset(); + health_check_client_->StartCall(); + } else { + // If the call failed without receiving any messages, retry later. + health_check_client_->StartRetryTimer(); + } + } + } + Unref(DEBUG_LOCATION, "call_ended"); +} + +} // namespace grpc_core diff --git a/src/core/ext/filters/client_channel/health/health_check_client.h b/src/core/ext/filters/client_channel/health/health_check_client.h new file mode 100644 index 0000000000..f6babef7d6 --- /dev/null +++ b/src/core/ext/filters/client_channel/health/health_check_client.h @@ -0,0 +1,173 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#ifndef GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_HEALTH_HEALTH_CHECK_CLIENT_H +#define GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_HEALTH_HEALTH_CHECK_CLIENT_H + +#include <grpc/support/port_platform.h> + +#include <grpc/grpc.h> +#include <grpc/support/atm.h> +#include <grpc/support/sync.h> + +#include "src/core/ext/filters/client_channel/client_channel_channelz.h" +#include "src/core/ext/filters/client_channel/subchannel.h" +#include "src/core/lib/backoff/backoff.h" +#include "src/core/lib/gpr/arena.h" +#include "src/core/lib/gprpp/orphanable.h" +#include "src/core/lib/gprpp/ref_counted_ptr.h" +#include "src/core/lib/iomgr/call_combiner.h" +#include "src/core/lib/iomgr/closure.h" +#include "src/core/lib/iomgr/polling_entity.h" +#include "src/core/lib/iomgr/timer.h" +#include "src/core/lib/transport/byte_stream.h" +#include "src/core/lib/transport/metadata_batch.h" +#include "src/core/lib/transport/transport.h" + +namespace grpc_core { + +class HealthCheckClient + : public InternallyRefCountedWithTracing<HealthCheckClient> { + public: + HealthCheckClient(const char* service_name, + RefCountedPtr<ConnectedSubchannel> connected_subchannel, + grpc_pollset_set* interested_parties, + RefCountedPtr<channelz::SubchannelNode> channelz_node); + + ~HealthCheckClient(); + + // When the health state changes from *state, sets *state to the new + // value and schedules closure. + // Only one closure can be outstanding at a time. + void NotifyOnHealthChange(grpc_connectivity_state* state, + grpc_closure* closure); + + void Orphan() override; + + private: + // Contains a call to the backend and all the data related to the call. + class CallState : public InternallyRefCountedWithTracing<CallState> { + public: + CallState(RefCountedPtr<HealthCheckClient> health_check_client, + grpc_pollset_set* interested_parties_); + ~CallState(); + + void Orphan() override; + + void StartCall(); + + private: + void Cancel(); + + void StartBatch(grpc_transport_stream_op_batch* batch); + static void StartBatchInCallCombiner(void* arg, grpc_error* error); + + static void CallEndedRetry(void* arg, grpc_error* error); + void CallEnded(bool retry); + + static void OnComplete(void* arg, grpc_error* error); + static void RecvInitialMetadataReady(void* arg, grpc_error* error); + static void RecvMessageReady(void* arg, grpc_error* error); + static void RecvTrailingMetadataReady(void* arg, grpc_error* error); + static void StartCancel(void* arg, grpc_error* error); + static void OnCancelComplete(void* arg, grpc_error* error); + + static void OnByteStreamNext(void* arg, grpc_error* error); + void ContinueReadingRecvMessage(); + grpc_error* PullSliceFromRecvMessage(); + void DoneReadingRecvMessage(grpc_error* error); + + RefCountedPtr<HealthCheckClient> health_check_client_; + grpc_polling_entity pollent_; + + gpr_arena* arena_; + grpc_call_combiner call_combiner_; + grpc_call_context_element context_[GRPC_CONTEXT_COUNT] = {}; + + // The streaming call to the backend. Always non-NULL. + grpc_subchannel_call* call_; + + grpc_transport_stream_op_batch_payload payload_; + grpc_transport_stream_op_batch batch_; + grpc_transport_stream_op_batch recv_message_batch_; + grpc_transport_stream_op_batch recv_trailing_metadata_batch_; + + grpc_closure on_complete_; + + // send_initial_metadata + grpc_metadata_batch send_initial_metadata_; + grpc_linked_mdelem path_metadata_storage_; + + // send_message + ManualConstructor<SliceBufferByteStream> send_message_; + + // send_trailing_metadata + grpc_metadata_batch send_trailing_metadata_; + + // recv_initial_metadata + grpc_metadata_batch recv_initial_metadata_; + grpc_closure recv_initial_metadata_ready_; + + // recv_message + OrphanablePtr<ByteStream> recv_message_; + grpc_closure recv_message_ready_; + grpc_slice_buffer recv_message_buffer_; + gpr_atm seen_response_; + + // recv_trailing_metadata + grpc_metadata_batch recv_trailing_metadata_; + grpc_transport_stream_stats collect_stats_; + grpc_closure recv_trailing_metadata_ready_; + }; + + void StartCall(); + void StartCallLocked(); // Requires holding mu_. + + void StartRetryTimer(); + static void OnRetryTimer(void* arg, grpc_error* error); + + void SetHealthStatus(grpc_connectivity_state state, grpc_error* error); + void SetHealthStatusLocked(grpc_connectivity_state state, + grpc_error* error); // Requires holding mu_. + + const char* service_name_; // Do not own. + RefCountedPtr<ConnectedSubchannel> connected_subchannel_; + grpc_pollset_set* interested_parties_; // Do not own. + RefCountedPtr<channelz::SubchannelNode> channelz_node_; + + gpr_mu mu_; + grpc_connectivity_state state_ = GRPC_CHANNEL_CONNECTING; + grpc_error* error_ = GRPC_ERROR_NONE; + grpc_connectivity_state* notify_state_ = nullptr; + grpc_closure* on_health_changed_ = nullptr; + bool shutting_down_ = false; + + // The data associated with the current health check call. It holds a ref + // to this HealthCheckClient object. + OrphanablePtr<CallState> call_state_; + + // Call retry state. + BackOff retry_backoff_; + grpc_timer retry_timer_; + grpc_closure retry_timer_callback_; + bool retry_timer_callback_pending_ = false; +}; + +} // namespace grpc_core + +#endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_HEALTH_HEALTH_CHECK_CLIENT_H */ diff --git a/src/core/ext/filters/client_channel/http_connect_handshaker.cc b/src/core/ext/filters/client_channel/http_connect_handshaker.cc index 7ce8da8c00..0716e46818 100644 --- a/src/core/ext/filters/client_channel/http_connect_handshaker.cc +++ b/src/core/ext/filters/client_channel/http_connect_handshaker.cc @@ -29,7 +29,6 @@ #include "src/core/ext/filters/client_channel/client_channel.h" #include "src/core/ext/filters/client_channel/resolver_registry.h" -#include "src/core/ext/filters/client_channel/uri_parser.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/channel/handshaker_registry.h" #include "src/core/lib/gpr/env.h" @@ -37,6 +36,7 @@ #include "src/core/lib/http/format_request.h" #include "src/core/lib/http/parser.h" #include "src/core/lib/slice/slice_internal.h" +#include "src/core/lib/uri/uri_parser.h" typedef struct http_connect_handshaker { // Base class. Must be first. @@ -351,6 +351,7 @@ static grpc_handshaker* grpc_http_connect_handshaker_create() { static void handshaker_factory_add_handshakers( grpc_handshaker_factory* factory, const grpc_channel_args* args, + grpc_pollset_set* interested_parties, grpc_handshake_manager* handshake_mgr) { grpc_handshake_manager_add(handshake_mgr, grpc_http_connect_handshaker_create()); diff --git a/src/core/ext/filters/client_channel/http_proxy.cc b/src/core/ext/filters/client_channel/http_proxy.cc index 26d3f479b7..8951a2920c 100644 --- a/src/core/ext/filters/client_channel/http_proxy.cc +++ b/src/core/ext/filters/client_channel/http_proxy.cc @@ -29,12 +29,12 @@ #include "src/core/ext/filters/client_channel/http_connect_handshaker.h" #include "src/core/ext/filters/client_channel/proxy_mapper_registry.h" -#include "src/core/ext/filters/client_channel/uri_parser.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/gpr/env.h" #include "src/core/lib/gpr/host_port.h" #include "src/core/lib/gpr/string.h" #include "src/core/lib/slice/b64.h" +#include "src/core/lib/uri/uri_parser.h" /** * Parses the 'https_proxy' env var (fallback on 'http_proxy') and returns the diff --git a/src/core/ext/filters/client_channel/lb_policy.h b/src/core/ext/filters/client_channel/lb_policy.h index 3c0a9c1118..b0040457a6 100644 --- a/src/core/ext/filters/client_channel/lb_policy.h +++ b/src/core/ext/filters/client_channel/lb_policy.h @@ -63,29 +63,29 @@ class LoadBalancingPolicy /// State used for an LB pick. struct PickState { /// Initial metadata associated with the picking call. - grpc_metadata_batch* initial_metadata; + grpc_metadata_batch* initial_metadata = nullptr; /// Bitmask used for selective cancelling. See /// \a CancelMatchingPicksLocked() and \a GRPC_INITIAL_METADATA_* in /// grpc_types.h. - uint32_t initial_metadata_flags; + uint32_t initial_metadata_flags = 0; /// Storage for LB token in \a initial_metadata, or nullptr if not used. grpc_linked_mdelem lb_token_mdelem_storage; /// Closure to run when pick is complete, if not completed synchronously. /// If null, pick will fail if a result is not available synchronously. - grpc_closure* on_complete; + grpc_closure* on_complete = nullptr; /// Will be set to the selected subchannel, or nullptr on failure or when /// the LB policy decides to drop the call. RefCountedPtr<ConnectedSubchannel> connected_subchannel; /// Will be populated with context to pass to the subchannel call, if /// needed. - grpc_call_context_element subchannel_call_context[GRPC_CONTEXT_COUNT]; + grpc_call_context_element subchannel_call_context[GRPC_CONTEXT_COUNT] = {}; /// Upon success, \a *user_data will be set to whatever opaque information /// may need to be propagated from the LB policy, or nullptr if not needed. // TODO(roth): As part of revamping our metadata APIs, try to find a // way to clean this up and C++-ify it. - void** user_data; + void** user_data = nullptr; /// Next pointer. For internal use by LB policy. - PickState* next; + PickState* next = nullptr; }; // Not copyable nor movable. @@ -151,9 +151,9 @@ class LoadBalancingPolicy /// LB policy's referenced children. This is not invoked from the /// client_channel's combiner. The implementation is responsible for /// providing its own synchronization. - virtual void FillChildRefsForChannelz(ChildRefsList* child_subchannels, - ChildRefsList* child_channels) - GRPC_ABSTRACT; + virtual void FillChildRefsForChannelz( + channelz::ChildRefsList* child_subchannels, + channelz::ChildRefsList* child_channels) GRPC_ABSTRACT; void Orphan() override { // Invoke ShutdownAndUnrefLocked() inside of the combiner. @@ -212,8 +212,8 @@ class LoadBalancingPolicy // Dummy classes needed for alignment issues. // See https://github.com/grpc/grpc/issues/16032 for context. // TODO(ncteisen): remove this as soon as the issue is resolved. - ChildRefsList dummy_list_foo; - ChildRefsList dummy_list_bar; + channelz::ChildRefsList dummy_list_foo; + channelz::ChildRefsList dummy_list_bar; }; } // namespace grpc_core diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc b/src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc index cc259bcdbf..399bb452f4 100644 --- a/src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc +++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc @@ -37,16 +37,27 @@ static void destroy_channel_elem(grpc_channel_element* elem) {} namespace { struct call_data { + call_data(const grpc_call_element_args& args) { + if (args.context[GRPC_GRPCLB_CLIENT_STATS].value != nullptr) { + // Get stats object from context and take a ref. + client_stats = static_cast<grpc_core::GrpcLbClientStats*>( + args.context[GRPC_GRPCLB_CLIENT_STATS].value) + ->Ref(); + // Record call started. + client_stats->AddCallStarted(); + } + } + // Stats object to update. grpc_core::RefCountedPtr<grpc_core::GrpcLbClientStats> client_stats; // State for intercepting send_initial_metadata. grpc_closure on_complete_for_send; grpc_closure* original_on_complete_for_send; - bool send_initial_metadata_succeeded; + bool send_initial_metadata_succeeded = false; // State for intercepting recv_initial_metadata. grpc_closure recv_initial_metadata_ready; grpc_closure* original_recv_initial_metadata_ready; - bool recv_initial_metadata_succeeded; + bool recv_initial_metadata_succeeded = false; }; } // namespace @@ -70,16 +81,8 @@ static void recv_initial_metadata_ready(void* arg, grpc_error* error) { static grpc_error* init_call_elem(grpc_call_element* elem, const grpc_call_element_args* args) { - call_data* calld = static_cast<call_data*>(elem->call_data); - // Get stats object from context and take a ref. GPR_ASSERT(args->context != nullptr); - if (args->context[GRPC_GRPCLB_CLIENT_STATS].value != nullptr) { - calld->client_stats = static_cast<grpc_core::GrpcLbClientStats*>( - args->context[GRPC_GRPCLB_CLIENT_STATS].value) - ->Ref(); - // Record call started. - calld->client_stats->AddCallStarted(); - } + new (elem->call_data) call_data(*args); return GRPC_ERROR_NONE; } @@ -97,6 +100,7 @@ static void destroy_call_elem(grpc_call_element* elem, // TODO(roth): Eliminate this once filter stack is converted to C++. calld->client_stats.reset(); } + calld->~call_data(); } static void start_transport_stream_op_batch( diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc index 25b0149393..dbb90b438c 100644 --- a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc +++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc @@ -136,8 +136,9 @@ class GrpcLb : public LoadBalancingPolicy { void HandOffPendingPicksLocked(LoadBalancingPolicy* new_policy) override; void ExitIdleLocked() override; void ResetBackoffLocked() override; - void FillChildRefsForChannelz(ChildRefsList* child_subchannels, - ChildRefsList* child_channels) override; + void FillChildRefsForChannelz( + channelz::ChildRefsList* child_subchannels, + channelz::ChildRefsList* child_channels) override; private: /// Linked list of pending pick requests. It stores all information needed to @@ -852,10 +853,12 @@ void GrpcLb::BalancerCallState::OnBalancerMessageReceivedLocked( } } else { // No valid initial response or serverlist found. + char* response_slice_str = + grpc_dump_slice(response_slice, GPR_DUMP_ASCII | GPR_DUMP_HEX); gpr_log(GPR_ERROR, "[grpclb %p] Invalid LB response received: '%s'. Ignoring.", - grpclb_policy, - grpc_dump_slice(response_slice, GPR_DUMP_ASCII | GPR_DUMP_HEX)); + grpclb_policy, response_slice_str); + gpr_free(response_slice_str); } grpc_slice_unref_internal(response_slice); if (!grpclb_policy->shutting_down_) { @@ -1256,8 +1259,9 @@ bool GrpcLb::PickLocked(PickState* pick, grpc_error** error) { return pick_done; } -void GrpcLb::FillChildRefsForChannelz(ChildRefsList* child_subchannels, - ChildRefsList* child_channels) { +void GrpcLb::FillChildRefsForChannelz( + channelz::ChildRefsList* child_subchannels, + channelz::ChildRefsList* child_channels) { // delegate to the RoundRobin to fill the children subchannels. rr_policy_->FillChildRefsForChannelz(child_subchannels, child_channels); MutexLock lock(&lb_channel_mu_); @@ -1265,7 +1269,7 @@ void GrpcLb::FillChildRefsForChannelz(ChildRefsList* child_subchannels, grpc_core::channelz::ChannelNode* channel_node = grpc_channel_get_channelz_node(lb_channel_); if (channel_node != nullptr) { - child_channels->push_back(channel_node->channel_uuid()); + child_channels->push_back(channel_node->uuid()); } } } @@ -1329,11 +1333,8 @@ void GrpcLb::ProcessChannelArgsLocked(const grpc_channel_args& args) { void GrpcLb::UpdateLocked(const grpc_channel_args& args) { ProcessChannelArgsLocked(args); - // If fallback is configured and the RR policy already exists, update - // it with the new fallback addresses. - if (lb_fallback_timeout_ms_ > 0 && rr_policy_ != nullptr) { - CreateOrUpdateRoundRobinPolicyLocked(); - } + // Update the existing RR policy. + if (rr_policy_ != nullptr) CreateOrUpdateRoundRobinPolicyLocked(); // Start watching the LB channel connectivity for connection, if not // already doing so. if (!watching_lb_channel_) { @@ -1487,7 +1488,7 @@ void GrpcLb::OnBalancerChannelConnectivityChangedLocked(void* arg, grpclb_policy->lb_call_backoff_.Reset(); grpclb_policy->StartBalancerCallLocked(); } - // Fall through. + // fallthrough case GRPC_CHANNEL_SHUTDOWN: done: grpclb_policy->watching_lb_channel_ = false; @@ -1695,7 +1696,7 @@ grpc_channel_args* GrpcLb::CreateRoundRobinPolicyArgsLocked() { // Replace the LB addresses in the channel args that we pass down to // the subchannel. static const char* keys_to_remove[] = {GRPC_ARG_LB_ADDRESSES}; - const grpc_arg args_to_add[] = { + grpc_arg args_to_add[3] = { grpc_lb_addresses_create_channel_arg(addresses), // A channel arg indicating if the target is a backend inferred from a // grpclb load balancer. @@ -1704,9 +1705,15 @@ grpc_channel_args* GrpcLb::CreateRoundRobinPolicyArgsLocked() { GRPC_ARG_ADDRESS_IS_BACKEND_FROM_GRPCLB_LOAD_BALANCER), is_backend_from_grpclb_load_balancer), }; + size_t num_args_to_add = 2; + if (is_backend_from_grpclb_load_balancer) { + args_to_add[2] = grpc_channel_arg_integer_create( + const_cast<char*>(GRPC_ARG_INHIBIT_HEALTH_CHECKING), 1); + ++num_args_to_add; + } grpc_channel_args* args = grpc_channel_args_copy_and_add_and_remove( args_, keys_to_remove, GPR_ARRAY_SIZE(keys_to_remove), args_to_add, - GPR_ARRAY_SIZE(args_to_add)); + num_args_to_add); grpc_lb_addresses_destroy(addresses); return args; } diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index 9120abfa3c..eb494486b9 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -59,8 +59,8 @@ class PickFirst : public LoadBalancingPolicy { void HandOffPendingPicksLocked(LoadBalancingPolicy* new_policy) override; void ExitIdleLocked() override; void ResetBackoffLocked() override; - void FillChildRefsForChannelz(ChildRefsList* child_subchannels, - ChildRefsList* ignored) override; + void FillChildRefsForChannelz(channelz::ChildRefsList* child_subchannels, + channelz::ChildRefsList* ignored) override; private: ~PickFirst(); @@ -71,11 +71,12 @@ class PickFirst : public LoadBalancingPolicy { : public SubchannelData<PickFirstSubchannelList, PickFirstSubchannelData> { public: - PickFirstSubchannelData(PickFirstSubchannelList* subchannel_list, - const grpc_lb_user_data_vtable* user_data_vtable, - const grpc_lb_address& address, - grpc_subchannel* subchannel, - grpc_combiner* combiner) + PickFirstSubchannelData( + SubchannelList<PickFirstSubchannelList, PickFirstSubchannelData>* + subchannel_list, + const grpc_lb_user_data_vtable* user_data_vtable, + const grpc_lb_address& address, grpc_subchannel* subchannel, + grpc_combiner* combiner) : SubchannelData(subchannel_list, user_data_vtable, address, subchannel, combiner) {} @@ -126,7 +127,6 @@ class PickFirst : public LoadBalancingPolicy { void ShutdownLocked() override; void StartPickingLocked(); - void DestroyUnselectedSubchannelsLocked(); void UpdateChildRefsLocked(); // All our subchannels. @@ -147,8 +147,8 @@ class PickFirst : public LoadBalancingPolicy { /// Lock and data used to capture snapshots of this channels child /// channels and subchannels. This data is consumed by channelz. gpr_mu child_refs_mu_; - ChildRefsList child_subchannels_; - ChildRefsList child_channels_; + channelz::ChildRefsList child_subchannels_; + channelz::ChildRefsList child_channels_; }; PickFirst::PickFirst(const Args& args) : LoadBalancingPolicy(args) { @@ -250,14 +250,9 @@ void PickFirst::CancelMatchingPicksLocked(uint32_t initial_metadata_flags_mask, void PickFirst::StartPickingLocked() { started_picking_ = true; - if (subchannel_list_ != nullptr) { - for (size_t i = 0; i < subchannel_list_->num_subchannels(); ++i) { - if (subchannel_list_->subchannel(i)->subchannel() != nullptr) { - subchannel_list_->subchannel(i) - ->CheckConnectivityStateAndStartWatchingLocked(); - break; - } - } + if (subchannel_list_ != nullptr && subchannel_list_->num_subchannels() > 0) { + subchannel_list_->subchannel(0) + ->CheckConnectivityStateAndStartWatchingLocked(); } } @@ -294,15 +289,6 @@ bool PickFirst::PickLocked(PickState* pick, grpc_error** error) { return false; } -void PickFirst::DestroyUnselectedSubchannelsLocked() { - for (size_t i = 0; i < subchannel_list_->num_subchannels(); ++i) { - PickFirstSubchannelData* sd = subchannel_list_->subchannel(i); - if (selected_ != sd) { - sd->UnrefSubchannelLocked("selected_different_subchannel"); - } - } -} - grpc_connectivity_state PickFirst::CheckConnectivityLocked(grpc_error** error) { return grpc_connectivity_state_get(&state_tracker_, error); } @@ -314,7 +300,8 @@ void PickFirst::NotifyOnStateChangeLocked(grpc_connectivity_state* current, } void PickFirst::FillChildRefsForChannelz( - ChildRefsList* child_subchannels_to_fill, ChildRefsList* ignored) { + channelz::ChildRefsList* child_subchannels_to_fill, + channelz::ChildRefsList* ignored) { MutexLock lock(&child_refs_mu_); for (size_t i = 0; i < child_subchannels_.size(); ++i) { // TODO(ncteisen): implement a de dup loop that is not O(n^2). Might @@ -334,7 +321,7 @@ void PickFirst::FillChildRefsForChannelz( } void PickFirst::UpdateChildRefsLocked() { - ChildRefsList cs; + channelz::ChildRefsList cs; if (subchannel_list_ != nullptr) { subchannel_list_->PopulateChildRefsList(&cs); } @@ -372,9 +359,14 @@ void PickFirst::UpdateLocked(const grpc_channel_args& args) { "Pick First %p received update with %" PRIuPTR " addresses", this, addresses->num_addresses); } + grpc_arg new_arg = grpc_channel_arg_integer_create( + const_cast<char*>(GRPC_ARG_INHIBIT_HEALTH_CHECKING), 1); + grpc_channel_args* new_args = + grpc_channel_args_copy_and_add(&args, &new_arg, 1); auto subchannel_list = MakeOrphanable<PickFirstSubchannelList>( this, &grpc_lb_pick_first_trace, addresses, combiner(), - client_channel_factory(), args); + client_channel_factory(), *new_args); + grpc_channel_args_destroy(new_args); if (subchannel_list->num_subchannels() == 0) { // Empty update or no valid subchannels. Unsubscribe from all current // subchannels and put the channel in TRANSIENT_FAILURE. @@ -419,7 +411,6 @@ void PickFirst::UpdateLocked(const grpc_channel_args& args) { if (sd->CheckConnectivityStateLocked(&error) == GRPC_CHANNEL_READY) { selected_ = sd; subchannel_list_ = std::move(subchannel_list); - DestroyUnselectedSubchannelsLocked(); sd->StartConnectivityWatchLocked(); // If there was a previously pending update (which may or may // not have contained the currently selected subchannel), drop @@ -504,7 +495,6 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked( p->TryReresolutionLocked(&grpc_lb_pick_first_trace, GRPC_ERROR_NONE); // In transient failure. Rely on re-resolution to recover. p->selected_ = nullptr; - UnrefSubchannelLocked("pf_selected_shutdown"); StopConnectivityWatchLocked(); } else { grpc_connectivity_state_set(&p->state_tracker_, connectivity_state, @@ -535,11 +525,9 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked( case GRPC_CHANNEL_TRANSIENT_FAILURE: { StopConnectivityWatchLocked(); PickFirstSubchannelData* sd = this; - do { - size_t next_index = - (sd->Index() + 1) % subchannel_list()->num_subchannels(); - sd = subchannel_list()->subchannel(next_index); - } while (sd->subchannel() == nullptr); + size_t next_index = + (sd->Index() + 1) % subchannel_list()->num_subchannels(); + sd = subchannel_list()->subchannel(next_index); // Case 1: Only set state to TRANSIENT_FAILURE if we've tried // all subchannels. if (sd->Index() == 0 && subchannel_list() == p->subchannel_list_.get()) { @@ -600,8 +588,6 @@ void PickFirst::PickFirstSubchannelData::ProcessUnselectedReadyLocked() { if (grpc_lb_pick_first_trace.enabled()) { gpr_log(GPR_INFO, "Pick First %p selected subchannel %p", p, subchannel()); } - // Drop all other subchannels, since we are now connected. - p->DestroyUnselectedSubchannelsLocked(); // Update any calls that were waiting for a pick. PickState* pick; while ((pick = p->pending_picks_)) { diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index 4195c1e9d1..e9ed85cf66 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -70,8 +70,8 @@ class RoundRobin : public LoadBalancingPolicy { void HandOffPendingPicksLocked(LoadBalancingPolicy* new_policy) override; void ExitIdleLocked() override; void ResetBackoffLocked() override; - void FillChildRefsForChannelz(ChildRefsList* child_subchannels, - ChildRefsList* ignored) override; + void FillChildRefsForChannelz(channelz::ChildRefsList* child_subchannels, + channelz::ChildRefsList* ignored) override; private: ~RoundRobin(); @@ -89,11 +89,12 @@ class RoundRobin : public LoadBalancingPolicy { : public SubchannelData<RoundRobinSubchannelList, RoundRobinSubchannelData> { public: - RoundRobinSubchannelData(RoundRobinSubchannelList* subchannel_list, - const grpc_lb_user_data_vtable* user_data_vtable, - const grpc_lb_address& address, - grpc_subchannel* subchannel, - grpc_combiner* combiner) + RoundRobinSubchannelData( + SubchannelList<RoundRobinSubchannelList, RoundRobinSubchannelData>* + subchannel_list, + const grpc_lb_user_data_vtable* user_data_vtable, + const grpc_lb_address& address, grpc_subchannel* subchannel, + grpc_combiner* combiner) : SubchannelData(subchannel_list, user_data_vtable, address, subchannel, combiner), user_data_vtable_(user_data_vtable), @@ -222,8 +223,8 @@ class RoundRobin : public LoadBalancingPolicy { /// Lock and data used to capture snapshots of this channel's child /// channels and subchannels. This data is consumed by channelz. gpr_mu child_refs_mu_; - ChildRefsList child_subchannels_; - ChildRefsList child_channels_; + channelz::ChildRefsList child_subchannels_; + channelz::ChildRefsList child_channels_; }; RoundRobin::RoundRobin(const Args& args) : LoadBalancingPolicy(args) { @@ -401,7 +402,8 @@ bool RoundRobin::PickLocked(PickState* pick, grpc_error** error) { } void RoundRobin::FillChildRefsForChannelz( - ChildRefsList* child_subchannels_to_fill, ChildRefsList* ignored) { + channelz::ChildRefsList* child_subchannels_to_fill, + channelz::ChildRefsList* ignored) { MutexLock lock(&child_refs_mu_); for (size_t i = 0; i < child_subchannels_.size(); ++i) { // TODO(ncteisen): implement a de dup loop that is not O(n^2). Might @@ -421,7 +423,7 @@ void RoundRobin::FillChildRefsForChannelz( } void RoundRobin::UpdateChildRefsLocked() { - ChildRefsList cs; + channelz::ChildRefsList cs; if (subchannel_list_ != nullptr) { subchannel_list_->PopulateChildRefsList(&cs); } diff --git a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h index 0fa2f04e73..4ec9e935ed 100644 --- a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h +++ b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h @@ -65,6 +65,10 @@ class MySubchannelList namespace grpc_core { +// Forward declaration. +template <typename SubchannelListType, typename SubchannelDataType> +class SubchannelList; + // Stores data for a particular subchannel in a subchannel list. // Callers must create a subclass that implements the // ProcessConnectivityChangeLocked() method. @@ -72,7 +76,9 @@ template <typename SubchannelListType, typename SubchannelDataType> class SubchannelData { public: // Returns a pointer to the subchannel list containing this object. - SubchannelListType* subchannel_list() const { return subchannel_list_; } + SubchannelListType* subchannel_list() const { + return static_cast<SubchannelListType*>(subchannel_list_); + } // Returns the index into the subchannel list of this object. size_t Index() const { @@ -96,17 +102,12 @@ class SubchannelData { // ProcessConnectivityChangeLocked()). grpc_connectivity_state CheckConnectivityStateLocked(grpc_error** error) { GPR_ASSERT(!connectivity_notification_pending_); - pending_connectivity_state_unsafe_ = - grpc_subchannel_check_connectivity(subchannel(), error); + pending_connectivity_state_unsafe_ = grpc_subchannel_check_connectivity( + subchannel(), error, subchannel_list_->inhibit_health_checking()); UpdateConnectedSubchannelLocked(); return pending_connectivity_state_unsafe_; } - // Unrefs the subchannel. May be used if an individual subchannel is - // no longer needed even though the subchannel list as a whole is not - // being unreffed. - virtual void UnrefSubchannelLocked(const char* reason); - // Resets the connection backoff. // TODO(roth): This method should go away when we move the backoff // code out of the subchannel and into the LB policies. @@ -138,10 +139,11 @@ class SubchannelData { GRPC_ABSTRACT_BASE_CLASS protected: - SubchannelData(SubchannelListType* subchannel_list, - const grpc_lb_user_data_vtable* user_data_vtable, - const grpc_lb_address& address, grpc_subchannel* subchannel, - grpc_combiner* combiner); + SubchannelData( + SubchannelList<SubchannelListType, SubchannelDataType>* subchannel_list, + const grpc_lb_user_data_vtable* user_data_vtable, + const grpc_lb_address& address, grpc_subchannel* subchannel, + grpc_combiner* combiner); virtual ~SubchannelData(); @@ -154,6 +156,10 @@ class SubchannelData { grpc_connectivity_state connectivity_state, grpc_error* error) GRPC_ABSTRACT; + // Unrefs the subchannel. May be overridden by subclasses that need + // to perform extra cleanup when unreffing the subchannel. + virtual void UnrefSubchannelLocked(const char* reason); + private: // Updates connected_subchannel_ based on pending_connectivity_state_unsafe_. // Returns true if the connectivity state should be reported. @@ -162,7 +168,7 @@ class SubchannelData { static void OnConnectivityChangedLocked(void* arg, grpc_error* error); // Backpointer to owning subchannel list. Not owned. - SubchannelListType* subchannel_list_; + SubchannelList<SubchannelListType, SubchannelDataType>* subchannel_list_; // The subchannel and connected subchannel. grpc_subchannel* subchannel_; @@ -195,13 +201,13 @@ class SubchannelList bool shutting_down() const { return shutting_down_; } // Populates refs_list with the uuids of this SubchannelLists's subchannels. - void PopulateChildRefsList(ChildRefsList* refs_list) { + void PopulateChildRefsList(channelz::ChildRefsList* refs_list) { for (size_t i = 0; i < subchannels_.size(); ++i) { if (subchannels_[i].subchannel() != nullptr) { grpc_core::channelz::SubchannelNode* subchannel_node = grpc_subchannel_get_channelz_node(subchannels_[i].subchannel()); if (subchannel_node != nullptr) { - refs_list->push_back(subchannel_node->subchannel_uuid()); + refs_list->push_back(subchannel_node->uuid()); } } } @@ -210,6 +216,7 @@ class SubchannelList // Accessors. LoadBalancingPolicy* policy() const { return policy_; } TraceFlag* tracer() const { return tracer_; } + bool inhibit_health_checking() const { return inhibit_health_checking_; } // Resets connection backoff of all subchannels. // TODO(roth): We will probably need to rethink this as part of moving @@ -248,6 +255,8 @@ class SubchannelList TraceFlag* tracer_; + bool inhibit_health_checking_; + grpc_combiner* combiner_; // The list of subchannels. @@ -269,7 +278,7 @@ class SubchannelList template <typename SubchannelListType, typename SubchannelDataType> SubchannelData<SubchannelListType, SubchannelDataType>::SubchannelData( - SubchannelListType* subchannel_list, + SubchannelList<SubchannelListType, SubchannelDataType>* subchannel_list, const grpc_lb_user_data_vtable* user_data_vtable, const grpc_lb_address& address, grpc_subchannel* subchannel, grpc_combiner* combiner) @@ -334,7 +343,8 @@ void SubchannelData<SubchannelListType, subchannel_list()->Ref(DEBUG_LOCATION, "connectivity_watch").release(); grpc_subchannel_notify_on_state_change( subchannel_, subchannel_list_->policy()->interested_parties(), - &pending_connectivity_state_unsafe_, &connectivity_changed_closure_); + &pending_connectivity_state_unsafe_, &connectivity_changed_closure_, + subchannel_list_->inhibit_health_checking()); } template <typename SubchannelListType, typename SubchannelDataType> @@ -353,7 +363,8 @@ void SubchannelData<SubchannelListType, GPR_ASSERT(connectivity_notification_pending_); grpc_subchannel_notify_on_state_change( subchannel_, subchannel_list_->policy()->interested_parties(), - &pending_connectivity_state_unsafe_, &connectivity_changed_closure_); + &pending_connectivity_state_unsafe_, &connectivity_changed_closure_, + subchannel_list_->inhibit_health_checking()); } template <typename SubchannelListType, typename SubchannelDataType> @@ -384,8 +395,9 @@ void SubchannelData<SubchannelListType, SubchannelDataType>:: subchannel_, reason); } GPR_ASSERT(connectivity_notification_pending_); - grpc_subchannel_notify_on_state_change(subchannel_, nullptr, nullptr, - &connectivity_changed_closure_); + grpc_subchannel_notify_on_state_change( + subchannel_, nullptr, nullptr, &connectivity_changed_closure_, + subchannel_list_->inhibit_health_checking()); } template <typename SubchannelListType, typename SubchannelDataType> @@ -493,8 +505,13 @@ SubchannelList<SubchannelListType, SubchannelDataType>::SubchannelList( subchannels_.reserve(addresses->num_addresses); // We need to remove the LB addresses in order to be able to compare the // subchannel keys of subchannels from a different batch of addresses. + // We also remove the inhibit-health-checking arg, since we are + // handling that here. + inhibit_health_checking_ = grpc_channel_arg_get_bool( + grpc_channel_args_find(&args, GRPC_ARG_INHIBIT_HEALTH_CHECKING), false); static const char* keys_to_remove[] = {GRPC_ARG_SUBCHANNEL_ADDRESS, - GRPC_ARG_LB_ADDRESSES}; + GRPC_ARG_LB_ADDRESSES, + GRPC_ARG_INHIBIT_HEALTH_CHECKING}; // Create a subchannel for each address. grpc_subchannel_args sc_args; for (size_t i = 0; i < addresses->num_addresses; i++) { @@ -533,8 +550,7 @@ SubchannelList<SubchannelListType, SubchannelDataType>::SubchannelList( address_uri); gpr_free(address_uri); } - subchannels_.emplace_back(static_cast<SubchannelListType*>(this), - addresses->user_data_vtable, + subchannels_.emplace_back(this, addresses->user_data_vtable, addresses->addresses[i], subchannel, combiner); } } diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds.cc new file mode 100644 index 0000000000..59d57295d4 --- /dev/null +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds.cc @@ -0,0 +1,1828 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +/// Implementation of the gRPC LB policy. +/// +/// This policy takes as input a list of resolved addresses, which must +/// include at least one balancer address. +/// +/// An internal channel (\a lb_channel_) is created for the addresses +/// from that are balancers. This channel behaves just like a regular +/// channel that uses pick_first to select from the list of balancer +/// addresses. +/// +/// The first time the xDS policy gets a request for a pick or to exit the idle +/// state, \a StartPickingLocked() is called. This method is responsible for +/// instantiating the internal *streaming* call to the LB server (whichever +/// address pick_first chose). The call will be complete when either the +/// balancer sends status or when we cancel the call (e.g., because we are +/// shutting down). In needed, we retry the call. If we received at least one +/// valid message from the server, a new call attempt will be made immediately; +/// otherwise, we apply back-off delays between attempts. +/// +/// We maintain an internal child policy (round_robin) instance for distributing +/// requests across backends. Whenever we receive a new serverlist from +/// the balancer, we update the child policy with the new list of +/// addresses. +/// +/// Once a child policy instance is in place (and getting updated as +/// described), calls for a pick, or a cancellation will be serviced right away +/// by forwarding them to the child policy instance. Any time there's no child +/// policy available (i.e., right after the creation of the xDS policy), pick +/// requests are added to a list of pending picks to be flushed and serviced +/// when the child policy instance becomes available. +/// +/// \see https://github.com/grpc/grpc/blob/master/doc/load-balancing.md for the +/// high level design and details. + +// With the addition of a libuv endpoint, sockaddr.h now includes uv.h when +// using that endpoint. Because of various transitive includes in uv.h, +// including windows.h on Windows, uv.h must be included before other system +// headers. Therefore, sockaddr.h must always be included first. +#include <grpc/support/port_platform.h> + +#include "src/core/lib/iomgr/sockaddr.h" +#include "src/core/lib/iomgr/socket_utils.h" + +#include <inttypes.h> +#include <limits.h> +#include <string.h> + +#include <grpc/byte_buffer_reader.h> +#include <grpc/grpc.h> +#include <grpc/support/alloc.h> +#include <grpc/support/string_util.h> +#include <grpc/support/time.h> + +#include "src/core/ext/filters/client_channel/client_channel.h" +#include "src/core/ext/filters/client_channel/client_channel_factory.h" +#include "src/core/ext/filters/client_channel/lb_policy/xds/xds.h" +#include "src/core/ext/filters/client_channel/lb_policy/xds/xds_channel.h" +#include "src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.h" +#include "src/core/ext/filters/client_channel/lb_policy/xds/xds_load_balancer_api.h" +#include "src/core/ext/filters/client_channel/lb_policy_factory.h" +#include "src/core/ext/filters/client_channel/lb_policy_registry.h" +#include "src/core/ext/filters/client_channel/parse_address.h" +#include "src/core/ext/filters/client_channel/resolver/fake/fake_resolver.h" +#include "src/core/ext/filters/client_channel/subchannel_index.h" +#include "src/core/lib/backoff/backoff.h" +#include "src/core/lib/channel/channel_args.h" +#include "src/core/lib/channel/channel_stack.h" +#include "src/core/lib/gpr/host_port.h" +#include "src/core/lib/gpr/string.h" +#include "src/core/lib/gprpp/manual_constructor.h" +#include "src/core/lib/gprpp/memory.h" +#include "src/core/lib/gprpp/mutex_lock.h" +#include "src/core/lib/gprpp/orphanable.h" +#include "src/core/lib/gprpp/ref_counted_ptr.h" +#include "src/core/lib/iomgr/combiner.h" +#include "src/core/lib/iomgr/sockaddr.h" +#include "src/core/lib/iomgr/sockaddr_utils.h" +#include "src/core/lib/iomgr/timer.h" +#include "src/core/lib/slice/slice_hash_table.h" +#include "src/core/lib/slice/slice_internal.h" +#include "src/core/lib/slice/slice_string_helpers.h" +#include "src/core/lib/surface/call.h" +#include "src/core/lib/surface/channel.h" +#include "src/core/lib/surface/channel_init.h" +#include "src/core/lib/transport/static_metadata.h" + +#define GRPC_XDS_INITIAL_CONNECT_BACKOFF_SECONDS 1 +#define GRPC_XDS_RECONNECT_BACKOFF_MULTIPLIER 1.6 +#define GRPC_XDS_RECONNECT_MAX_BACKOFF_SECONDS 120 +#define GRPC_XDS_RECONNECT_JITTER 0.2 +#define GRPC_XDS_DEFAULT_FALLBACK_TIMEOUT_MS 10000 + +namespace grpc_core { + +TraceFlag grpc_lb_xds_trace(false, "xds"); + +namespace { + +class XdsLb : public LoadBalancingPolicy { + public: + XdsLb(const grpc_lb_addresses* addresses, const Args& args); + + void UpdateLocked(const grpc_channel_args& args) override; + bool PickLocked(PickState* pick, grpc_error** error) override; + void CancelPickLocked(PickState* pick, grpc_error* error) override; + void CancelMatchingPicksLocked(uint32_t initial_metadata_flags_mask, + uint32_t initial_metadata_flags_eq, + grpc_error* error) override; + void NotifyOnStateChangeLocked(grpc_connectivity_state* state, + grpc_closure* closure) override; + grpc_connectivity_state CheckConnectivityLocked( + grpc_error** connectivity_error) override; + void HandOffPendingPicksLocked(LoadBalancingPolicy* new_policy) override; + void ExitIdleLocked() override; + void ResetBackoffLocked() override; + void FillChildRefsForChannelz( + channelz::ChildRefsList* child_subchannels, + channelz::ChildRefsList* child_channels) override; + + private: + /// Linked list of pending pick requests. It stores all information needed to + /// eventually call pick() on them. They mainly stay pending waiting for the + /// child policy to be created. + /// + /// Note that when a pick is sent to the child policy, we inject our own + /// on_complete callback, so that we can intercept the result before + /// invoking the original on_complete callback. This allows us to set the + /// LB token metadata and add client_stats to the call context. + /// See \a pending_pick_complete() for details. + struct PendingPick { + // The xds lb instance that created the wrapping. This instance is not + // owned; reference counts are untouched. It's used only for logging + // purposes. + XdsLb* xdslb_policy; + // The original pick. + PickState* pick; + // Our on_complete closure and the original one. + grpc_closure on_complete; + grpc_closure* original_on_complete; + // The LB token associated with the pick. This is set via user_data in + // the pick. + grpc_mdelem lb_token; + // Stats for client-side load reporting. + RefCountedPtr<XdsLbClientStats> client_stats; + // Next pending pick. + PendingPick* next = nullptr; + }; + + /// Contains a call to the LB server and all the data related to the call. + class BalancerCallState + : public InternallyRefCountedWithTracing<BalancerCallState> { + public: + explicit BalancerCallState( + RefCountedPtr<LoadBalancingPolicy> parent_xdslb_policy); + + // It's the caller's responsibility to ensure that Orphan() is called from + // inside the combiner. + void Orphan() override; + + void StartQuery(); + + XdsLbClientStats* client_stats() const { return client_stats_.get(); } + + bool seen_initial_response() const { return seen_initial_response_; } + + private: + // So Delete() can access our private dtor. + template <typename T> + friend void grpc_core::Delete(T*); + + ~BalancerCallState(); + + XdsLb* xdslb_policy() const { + return static_cast<XdsLb*>(xdslb_policy_.get()); + } + + void ScheduleNextClientLoadReportLocked(); + void SendClientLoadReportLocked(); + + static bool LoadReportCountersAreZero(xds_grpclb_request* request); + + static void MaybeSendClientLoadReportLocked(void* arg, grpc_error* error); + static void ClientLoadReportDoneLocked(void* arg, grpc_error* error); + static void OnInitialRequestSentLocked(void* arg, grpc_error* error); + static void OnBalancerMessageReceivedLocked(void* arg, grpc_error* error); + static void OnBalancerStatusReceivedLocked(void* arg, grpc_error* error); + + // The owning LB policy. + RefCountedPtr<LoadBalancingPolicy> xdslb_policy_; + + // The streaming call to the LB server. Always non-NULL. + grpc_call* lb_call_ = nullptr; + + // recv_initial_metadata + grpc_metadata_array lb_initial_metadata_recv_; + + // send_message + grpc_byte_buffer* send_message_payload_ = nullptr; + grpc_closure lb_on_initial_request_sent_; + + // recv_message + grpc_byte_buffer* recv_message_payload_ = nullptr; + grpc_closure lb_on_balancer_message_received_; + bool seen_initial_response_ = false; + + // recv_trailing_metadata + grpc_closure lb_on_balancer_status_received_; + grpc_metadata_array lb_trailing_metadata_recv_; + grpc_status_code lb_call_status_; + grpc_slice lb_call_status_details_; + + // The stats for client-side load reporting associated with this LB call. + // Created after the first serverlist is received. + RefCountedPtr<XdsLbClientStats> client_stats_; + grpc_millis client_stats_report_interval_ = 0; + grpc_timer client_load_report_timer_; + bool client_load_report_timer_callback_pending_ = false; + bool last_client_load_report_counters_were_zero_ = false; + bool client_load_report_is_due_ = false; + // The closure used for either the load report timer or the callback for + // completion of sending the load report. + grpc_closure client_load_report_closure_; + }; + + ~XdsLb(); + + void ShutdownLocked() override; + + // Helper function used in ctor and UpdateLocked(). + void ProcessChannelArgsLocked(const grpc_channel_args& args); + + // Methods for dealing with the balancer channel and call. + void StartPickingLocked(); + void StartBalancerCallLocked(); + static void OnFallbackTimerLocked(void* arg, grpc_error* error); + void StartBalancerCallRetryTimerLocked(); + static void OnBalancerCallRetryTimerLocked(void* arg, grpc_error* error); + static void OnBalancerChannelConnectivityChangedLocked(void* arg, + grpc_error* error); + + // Pending pick methods. + static void PendingPickSetMetadataAndContext(PendingPick* pp); + PendingPick* PendingPickCreate(PickState* pick); + void AddPendingPick(PendingPick* pp); + static void OnPendingPickComplete(void* arg, grpc_error* error); + + // Methods for dealing with the child policy. + void CreateOrUpdateChildPolicyLocked(); + grpc_channel_args* CreateChildPolicyArgsLocked(); + void CreateChildPolicyLocked(const Args& args); + bool PickFromChildPolicyLocked(bool force_async, PendingPick* pp, + grpc_error** error); + void UpdateConnectivityStateFromChildPolicyLocked( + grpc_error* child_state_error); + static void OnChildPolicyConnectivityChangedLocked(void* arg, + grpc_error* error); + static void OnChildPolicyRequestReresolutionLocked(void* arg, + grpc_error* error); + + // Who the client is trying to communicate with. + const char* server_name_ = nullptr; + + // Current channel args from the resolver. + grpc_channel_args* args_ = nullptr; + + // Internal state. + bool started_picking_ = false; + bool shutting_down_ = false; + grpc_connectivity_state_tracker state_tracker_; + + // The channel for communicating with the LB server. + grpc_channel* lb_channel_ = nullptr; + // Mutex to protect the channel to the LB server. This is used when + // processing a channelz request. + gpr_mu lb_channel_mu_; + grpc_connectivity_state lb_channel_connectivity_; + grpc_closure lb_channel_on_connectivity_changed_; + // Are we already watching the LB channel's connectivity? + bool watching_lb_channel_ = false; + // Response generator to inject address updates into lb_channel_. + RefCountedPtr<FakeResolverResponseGenerator> response_generator_; + + // The data associated with the current LB call. It holds a ref to this LB + // policy. It's initialized every time we query for backends. It's reset to + // NULL whenever the current LB call is no longer needed (e.g., the LB policy + // is shutting down, or the LB call has ended). A non-NULL lb_calld_ always + // contains a non-NULL lb_call_. + OrphanablePtr<BalancerCallState> lb_calld_; + // Timeout in milliseconds for the LB call. 0 means no deadline. + int lb_call_timeout_ms_ = 0; + // Balancer call retry state. + BackOff lb_call_backoff_; + bool retry_timer_callback_pending_ = false; + grpc_timer lb_call_retry_timer_; + grpc_closure lb_on_call_retry_; + + // The deserialized response from the balancer. May be nullptr until one + // such response has arrived. + xds_grpclb_serverlist* serverlist_ = nullptr; + + // Timeout in milliseconds for before using fallback backend addresses. + // 0 means not using fallback. + int lb_fallback_timeout_ms_ = 0; + // The backend addresses from the resolver. + grpc_lb_addresses* fallback_backend_addresses_ = nullptr; + // Fallback timer. + bool fallback_timer_callback_pending_ = false; + grpc_timer lb_fallback_timer_; + grpc_closure lb_on_fallback_; + + // Pending picks that are waiting on the xDS policy's connectivity. + PendingPick* pending_picks_ = nullptr; + + // The policy to use for the backends. + OrphanablePtr<LoadBalancingPolicy> child_policy_; + grpc_connectivity_state child_connectivity_state_; + grpc_closure on_child_connectivity_changed_; + grpc_closure on_child_request_reresolution_; +}; + +// +// serverlist parsing code +// + +// vtable for LB tokens in grpc_lb_addresses +void* lb_token_copy(void* token) { + return token == nullptr + ? nullptr + : (void*)GRPC_MDELEM_REF(grpc_mdelem{(uintptr_t)token}).payload; +} +void lb_token_destroy(void* token) { + if (token != nullptr) { + GRPC_MDELEM_UNREF(grpc_mdelem{(uintptr_t)token}); + } +} +int lb_token_cmp(void* token1, void* token2) { + if (token1 > token2) return 1; + if (token1 < token2) return -1; + return 0; +} +const grpc_lb_user_data_vtable lb_token_vtable = { + lb_token_copy, lb_token_destroy, lb_token_cmp}; + +// Returns the backend addresses extracted from the given addresses. +grpc_lb_addresses* ExtractBackendAddresses(const grpc_lb_addresses* addresses) { + // First pass: count the number of backend addresses. + size_t num_backends = 0; + for (size_t i = 0; i < addresses->num_addresses; ++i) { + if (!addresses->addresses[i].is_balancer) { + ++num_backends; + } + } + // Second pass: actually populate the addresses and (empty) LB tokens. + grpc_lb_addresses* backend_addresses = + grpc_lb_addresses_create(num_backends, &lb_token_vtable); + size_t num_copied = 0; + for (size_t i = 0; i < addresses->num_addresses; ++i) { + if (addresses->addresses[i].is_balancer) continue; + const grpc_resolved_address* addr = &addresses->addresses[i].address; + grpc_lb_addresses_set_address(backend_addresses, num_copied, &addr->addr, + addr->len, false /* is_balancer */, + nullptr /* balancer_name */, + (void*)GRPC_MDELEM_LB_TOKEN_EMPTY.payload); + ++num_copied; + } + return backend_addresses; +} + +bool IsServerValid(const xds_grpclb_server* server, size_t idx, bool log) { + if (server->drop) return false; + const xds_grpclb_ip_address* ip = &server->ip_address; + if (GPR_UNLIKELY(server->port >> 16 != 0)) { + if (log) { + gpr_log(GPR_ERROR, + "Invalid port '%d' at index %lu of serverlist. Ignoring.", + server->port, (unsigned long)idx); + } + return false; + } + if (GPR_UNLIKELY(ip->size != 4 && ip->size != 16)) { + if (log) { + gpr_log(GPR_ERROR, + "Expected IP to be 4 or 16 bytes, got %d at index %lu of " + "serverlist. Ignoring", + ip->size, (unsigned long)idx); + } + return false; + } + return true; +} + +void ParseServer(const xds_grpclb_server* server, grpc_resolved_address* addr) { + memset(addr, 0, sizeof(*addr)); + if (server->drop) return; + const uint16_t netorder_port = grpc_htons((uint16_t)server->port); + /* the addresses are given in binary format (a in(6)_addr struct) in + * server->ip_address.bytes. */ + const xds_grpclb_ip_address* ip = &server->ip_address; + if (ip->size == 4) { + addr->len = static_cast<socklen_t>(sizeof(grpc_sockaddr_in)); + grpc_sockaddr_in* addr4 = reinterpret_cast<grpc_sockaddr_in*>(&addr->addr); + addr4->sin_family = GRPC_AF_INET; + memcpy(&addr4->sin_addr, ip->bytes, ip->size); + addr4->sin_port = netorder_port; + } else if (ip->size == 16) { + addr->len = static_cast<socklen_t>(sizeof(grpc_sockaddr_in6)); + grpc_sockaddr_in6* addr6 = (grpc_sockaddr_in6*)&addr->addr; + addr6->sin6_family = GRPC_AF_INET6; + memcpy(&addr6->sin6_addr, ip->bytes, ip->size); + addr6->sin6_port = netorder_port; + } +} + +// Returns addresses extracted from \a serverlist. +grpc_lb_addresses* ProcessServerlist(const xds_grpclb_serverlist* serverlist) { + size_t num_valid = 0; + /* first pass: count how many are valid in order to allocate the necessary + * memory in a single block */ + for (size_t i = 0; i < serverlist->num_servers; ++i) { + if (IsServerValid(serverlist->servers[i], i, true)) ++num_valid; + } + grpc_lb_addresses* lb_addresses = + grpc_lb_addresses_create(num_valid, &lb_token_vtable); + /* second pass: actually populate the addresses and LB tokens (aka user data + * to the outside world) to be read by the child policy during its creation. + * Given that the validity tests are very cheap, they are performed again + * instead of marking the valid ones during the first pass, as this would + * incurr in an allocation due to the arbitrary number of server */ + size_t addr_idx = 0; + for (size_t sl_idx = 0; sl_idx < serverlist->num_servers; ++sl_idx) { + const xds_grpclb_server* server = serverlist->servers[sl_idx]; + if (!IsServerValid(serverlist->servers[sl_idx], sl_idx, false)) continue; + GPR_ASSERT(addr_idx < num_valid); + /* address processing */ + grpc_resolved_address addr; + ParseServer(server, &addr); + /* lb token processing */ + void* user_data; + if (server->has_load_balance_token) { + const size_t lb_token_max_length = + GPR_ARRAY_SIZE(server->load_balance_token); + const size_t lb_token_length = + strnlen(server->load_balance_token, lb_token_max_length); + grpc_slice lb_token_mdstr = grpc_slice_from_copied_buffer( + server->load_balance_token, lb_token_length); + user_data = + (void*)grpc_mdelem_from_slices(GRPC_MDSTR_LB_TOKEN, lb_token_mdstr) + .payload; + } else { + char* uri = grpc_sockaddr_to_uri(&addr); + gpr_log(GPR_INFO, + "Missing LB token for backend address '%s'. The empty token will " + "be used instead", + uri); + gpr_free(uri); + user_data = (void*)GRPC_MDELEM_LB_TOKEN_EMPTY.payload; + } + grpc_lb_addresses_set_address(lb_addresses, addr_idx, &addr.addr, addr.len, + false /* is_balancer */, + nullptr /* balancer_name */, user_data); + ++addr_idx; + } + GPR_ASSERT(addr_idx == num_valid); + return lb_addresses; +} + +// +// XdsLb::BalancerCallState +// + +XdsLb::BalancerCallState::BalancerCallState( + RefCountedPtr<LoadBalancingPolicy> parent_xdslb_policy) + : InternallyRefCountedWithTracing<BalancerCallState>(&grpc_lb_xds_trace), + xdslb_policy_(std::move(parent_xdslb_policy)) { + GPR_ASSERT(xdslb_policy_ != nullptr); + GPR_ASSERT(!xdslb_policy()->shutting_down_); + // Init the LB call. Note that the LB call will progress every time there's + // activity in xdslb_policy_->interested_parties(), which is comprised of + // the polling entities from client_channel. + GPR_ASSERT(xdslb_policy()->server_name_ != nullptr); + GPR_ASSERT(xdslb_policy()->server_name_[0] != '\0'); + const grpc_millis deadline = + xdslb_policy()->lb_call_timeout_ms_ == 0 + ? GRPC_MILLIS_INF_FUTURE + : ExecCtx::Get()->Now() + xdslb_policy()->lb_call_timeout_ms_; + lb_call_ = grpc_channel_create_pollset_set_call( + xdslb_policy()->lb_channel_, nullptr, GRPC_PROPAGATE_DEFAULTS, + xdslb_policy_->interested_parties(), + GRPC_MDSTR_SLASH_GRPC_DOT_LB_DOT_V1_DOT_LOADBALANCER_SLASH_BALANCELOAD, + nullptr, deadline, nullptr); + // Init the LB call request payload. + xds_grpclb_request* request = + xds_grpclb_request_create(xdslb_policy()->server_name_); + grpc_slice request_payload_slice = xds_grpclb_request_encode(request); + send_message_payload_ = + grpc_raw_byte_buffer_create(&request_payload_slice, 1); + grpc_slice_unref_internal(request_payload_slice); + xds_grpclb_request_destroy(request); + // Init other data associated with the LB call. + grpc_metadata_array_init(&lb_initial_metadata_recv_); + grpc_metadata_array_init(&lb_trailing_metadata_recv_); + GRPC_CLOSURE_INIT(&lb_on_initial_request_sent_, OnInitialRequestSentLocked, + this, grpc_combiner_scheduler(xdslb_policy()->combiner())); + GRPC_CLOSURE_INIT(&lb_on_balancer_message_received_, + OnBalancerMessageReceivedLocked, this, + grpc_combiner_scheduler(xdslb_policy()->combiner())); + GRPC_CLOSURE_INIT(&lb_on_balancer_status_received_, + OnBalancerStatusReceivedLocked, this, + grpc_combiner_scheduler(xdslb_policy()->combiner())); +} + +XdsLb::BalancerCallState::~BalancerCallState() { + GPR_ASSERT(lb_call_ != nullptr); + grpc_call_unref(lb_call_); + grpc_metadata_array_destroy(&lb_initial_metadata_recv_); + grpc_metadata_array_destroy(&lb_trailing_metadata_recv_); + grpc_byte_buffer_destroy(send_message_payload_); + grpc_byte_buffer_destroy(recv_message_payload_); + grpc_slice_unref_internal(lb_call_status_details_); +} + +void XdsLb::BalancerCallState::Orphan() { + GPR_ASSERT(lb_call_ != nullptr); + // If we are here because xdslb_policy wants to cancel the call, + // lb_on_balancer_status_received_ will complete the cancellation and clean + // up. Otherwise, we are here because xdslb_policy has to orphan a failed + // call, then the following cancellation will be a no-op. + grpc_call_cancel(lb_call_, nullptr); + if (client_load_report_timer_callback_pending_) { + grpc_timer_cancel(&client_load_report_timer_); + } + // Note that the initial ref is hold by lb_on_balancer_status_received_ + // instead of the caller of this function. So the corresponding unref happens + // in lb_on_balancer_status_received_ instead of here. +} + +void XdsLb::BalancerCallState::StartQuery() { + GPR_ASSERT(lb_call_ != nullptr); + if (grpc_lb_xds_trace.enabled()) { + gpr_log(GPR_INFO, "[xdslb %p] Starting LB call (lb_calld: %p, lb_call: %p)", + xdslb_policy_.get(), this, lb_call_); + } + // Create the ops. + grpc_call_error call_error; + grpc_op ops[3]; + memset(ops, 0, sizeof(ops)); + // Op: send initial metadata. + grpc_op* op = ops; + op->op = GRPC_OP_SEND_INITIAL_METADATA; + op->data.send_initial_metadata.count = 0; + op->flags = 0; + op->reserved = nullptr; + op++; + // Op: send request message. + GPR_ASSERT(send_message_payload_ != nullptr); + op->op = GRPC_OP_SEND_MESSAGE; + op->data.send_message.send_message = send_message_payload_; + op->flags = 0; + op->reserved = nullptr; + op++; + // TODO(roth): We currently track this ref manually. Once the + // ClosureRef API is ready, we should pass the RefCountedPtr<> along + // with the callback. + auto self = Ref(DEBUG_LOCATION, "on_initial_request_sent"); + self.release(); + call_error = grpc_call_start_batch_and_execute( + lb_call_, ops, (size_t)(op - ops), &lb_on_initial_request_sent_); + GPR_ASSERT(GRPC_CALL_OK == call_error); + // Op: recv initial metadata. + op = ops; + op->op = GRPC_OP_RECV_INITIAL_METADATA; + op->data.recv_initial_metadata.recv_initial_metadata = + &lb_initial_metadata_recv_; + op->flags = 0; + op->reserved = nullptr; + op++; + // Op: recv response. + op->op = GRPC_OP_RECV_MESSAGE; + op->data.recv_message.recv_message = &recv_message_payload_; + op->flags = 0; + op->reserved = nullptr; + op++; + // TODO(roth): We currently track this ref manually. Once the + // ClosureRef API is ready, we should pass the RefCountedPtr<> along + // with the callback. + self = Ref(DEBUG_LOCATION, "on_message_received"); + self.release(); + call_error = grpc_call_start_batch_and_execute( + lb_call_, ops, (size_t)(op - ops), &lb_on_balancer_message_received_); + GPR_ASSERT(GRPC_CALL_OK == call_error); + // Op: recv server status. + op = ops; + op->op = GRPC_OP_RECV_STATUS_ON_CLIENT; + op->data.recv_status_on_client.trailing_metadata = + &lb_trailing_metadata_recv_; + op->data.recv_status_on_client.status = &lb_call_status_; + op->data.recv_status_on_client.status_details = &lb_call_status_details_; + op->flags = 0; + op->reserved = nullptr; + op++; + // This callback signals the end of the LB call, so it relies on the initial + // ref instead of a new ref. When it's invoked, it's the initial ref that is + // unreffed. + call_error = grpc_call_start_batch_and_execute( + lb_call_, ops, (size_t)(op - ops), &lb_on_balancer_status_received_); + GPR_ASSERT(GRPC_CALL_OK == call_error); +} + +void XdsLb::BalancerCallState::ScheduleNextClientLoadReportLocked() { + const grpc_millis next_client_load_report_time = + ExecCtx::Get()->Now() + client_stats_report_interval_; + GRPC_CLOSURE_INIT(&client_load_report_closure_, + MaybeSendClientLoadReportLocked, this, + grpc_combiner_scheduler(xdslb_policy()->combiner())); + grpc_timer_init(&client_load_report_timer_, next_client_load_report_time, + &client_load_report_closure_); + client_load_report_timer_callback_pending_ = true; +} + +void XdsLb::BalancerCallState::MaybeSendClientLoadReportLocked( + void* arg, grpc_error* error) { + BalancerCallState* lb_calld = static_cast<BalancerCallState*>(arg); + XdsLb* xdslb_policy = lb_calld->xdslb_policy(); + lb_calld->client_load_report_timer_callback_pending_ = false; + if (error != GRPC_ERROR_NONE || lb_calld != xdslb_policy->lb_calld_.get()) { + lb_calld->Unref(DEBUG_LOCATION, "client_load_report"); + return; + } + // If we've already sent the initial request, then we can go ahead and send + // the load report. Otherwise, we need to wait until the initial request has + // been sent to send this (see OnInitialRequestSentLocked()). + if (lb_calld->send_message_payload_ == nullptr) { + lb_calld->SendClientLoadReportLocked(); + } else { + lb_calld->client_load_report_is_due_ = true; + } +} + +bool XdsLb::BalancerCallState::LoadReportCountersAreZero( + xds_grpclb_request* request) { + XdsLbClientStats::DroppedCallCounts* drop_entries = + static_cast<XdsLbClientStats::DroppedCallCounts*>( + request->client_stats.calls_finished_with_drop.arg); + return request->client_stats.num_calls_started == 0 && + request->client_stats.num_calls_finished == 0 && + request->client_stats.num_calls_finished_with_client_failed_to_send == + 0 && + request->client_stats.num_calls_finished_known_received == 0 && + (drop_entries == nullptr || drop_entries->empty()); +} + +void XdsLb::BalancerCallState::SendClientLoadReportLocked() { + // Construct message payload. + GPR_ASSERT(send_message_payload_ == nullptr); + xds_grpclb_request* request = + xds_grpclb_load_report_request_create_locked(client_stats_.get()); + // Skip client load report if the counters were all zero in the last + // report and they are still zero in this one. + if (LoadReportCountersAreZero(request)) { + if (last_client_load_report_counters_were_zero_) { + xds_grpclb_request_destroy(request); + ScheduleNextClientLoadReportLocked(); + return; + } + last_client_load_report_counters_were_zero_ = true; + } else { + last_client_load_report_counters_were_zero_ = false; + } + grpc_slice request_payload_slice = xds_grpclb_request_encode(request); + send_message_payload_ = + grpc_raw_byte_buffer_create(&request_payload_slice, 1); + grpc_slice_unref_internal(request_payload_slice); + xds_grpclb_request_destroy(request); + // Send the report. + grpc_op op; + memset(&op, 0, sizeof(op)); + op.op = GRPC_OP_SEND_MESSAGE; + op.data.send_message.send_message = send_message_payload_; + GRPC_CLOSURE_INIT(&client_load_report_closure_, ClientLoadReportDoneLocked, + this, grpc_combiner_scheduler(xdslb_policy()->combiner())); + grpc_call_error call_error = grpc_call_start_batch_and_execute( + lb_call_, &op, 1, &client_load_report_closure_); + if (GPR_UNLIKELY(call_error != GRPC_CALL_OK)) { + gpr_log(GPR_ERROR, "[xdslb %p] call_error=%d", xdslb_policy_.get(), + call_error); + GPR_ASSERT(GRPC_CALL_OK == call_error); + } +} + +void XdsLb::BalancerCallState::ClientLoadReportDoneLocked(void* arg, + grpc_error* error) { + BalancerCallState* lb_calld = static_cast<BalancerCallState*>(arg); + XdsLb* xdslb_policy = lb_calld->xdslb_policy(); + grpc_byte_buffer_destroy(lb_calld->send_message_payload_); + lb_calld->send_message_payload_ = nullptr; + if (error != GRPC_ERROR_NONE || lb_calld != xdslb_policy->lb_calld_.get()) { + lb_calld->Unref(DEBUG_LOCATION, "client_load_report"); + return; + } + lb_calld->ScheduleNextClientLoadReportLocked(); +} + +void XdsLb::BalancerCallState::OnInitialRequestSentLocked(void* arg, + grpc_error* error) { + BalancerCallState* lb_calld = static_cast<BalancerCallState*>(arg); + grpc_byte_buffer_destroy(lb_calld->send_message_payload_); + lb_calld->send_message_payload_ = nullptr; + // If we attempted to send a client load report before the initial request was + // sent (and this lb_calld is still in use), send the load report now. + if (lb_calld->client_load_report_is_due_ && + lb_calld == lb_calld->xdslb_policy()->lb_calld_.get()) { + lb_calld->SendClientLoadReportLocked(); + lb_calld->client_load_report_is_due_ = false; + } + lb_calld->Unref(DEBUG_LOCATION, "on_initial_request_sent"); +} + +void XdsLb::BalancerCallState::OnBalancerMessageReceivedLocked( + void* arg, grpc_error* error) { + BalancerCallState* lb_calld = static_cast<BalancerCallState*>(arg); + XdsLb* xdslb_policy = lb_calld->xdslb_policy(); + // Empty payload means the LB call was cancelled. + if (lb_calld != xdslb_policy->lb_calld_.get() || + lb_calld->recv_message_payload_ == nullptr) { + lb_calld->Unref(DEBUG_LOCATION, "on_message_received"); + return; + } + grpc_byte_buffer_reader bbr; + grpc_byte_buffer_reader_init(&bbr, lb_calld->recv_message_payload_); + grpc_slice response_slice = grpc_byte_buffer_reader_readall(&bbr); + grpc_byte_buffer_reader_destroy(&bbr); + grpc_byte_buffer_destroy(lb_calld->recv_message_payload_); + lb_calld->recv_message_payload_ = nullptr; + xds_grpclb_initial_response* initial_response; + xds_grpclb_serverlist* serverlist; + if (!lb_calld->seen_initial_response_ && + (initial_response = xds_grpclb_initial_response_parse(response_slice)) != + nullptr) { + // Have NOT seen initial response, look for initial response. + if (initial_response->has_client_stats_report_interval) { + lb_calld->client_stats_report_interval_ = GPR_MAX( + GPR_MS_PER_SEC, xds_grpclb_duration_to_millis( + &initial_response->client_stats_report_interval)); + if (grpc_lb_xds_trace.enabled()) { + gpr_log(GPR_INFO, + "[xdslb %p] Received initial LB response message; " + "client load reporting interval = %" PRId64 " milliseconds", + xdslb_policy, lb_calld->client_stats_report_interval_); + } + } else if (grpc_lb_xds_trace.enabled()) { + gpr_log(GPR_INFO, + "[xdslb %p] Received initial LB response message; client load " + "reporting NOT enabled", + xdslb_policy); + } + xds_grpclb_initial_response_destroy(initial_response); + lb_calld->seen_initial_response_ = true; + } else if ((serverlist = xds_grpclb_response_parse_serverlist( + response_slice)) != nullptr) { + // Have seen initial response, look for serverlist. + GPR_ASSERT(lb_calld->lb_call_ != nullptr); + if (grpc_lb_xds_trace.enabled()) { + gpr_log(GPR_INFO, + "[xdslb %p] Serverlist with %" PRIuPTR " servers received", + xdslb_policy, serverlist->num_servers); + for (size_t i = 0; i < serverlist->num_servers; ++i) { + grpc_resolved_address addr; + ParseServer(serverlist->servers[i], &addr); + char* ipport; + grpc_sockaddr_to_string(&ipport, &addr, false); + gpr_log(GPR_INFO, "[xdslb %p] Serverlist[%" PRIuPTR "]: %s", + xdslb_policy, i, ipport); + gpr_free(ipport); + } + } + /* update serverlist */ + if (serverlist->num_servers > 0) { + // Start sending client load report only after we start using the + // serverlist returned from the current LB call. + if (lb_calld->client_stats_report_interval_ > 0 && + lb_calld->client_stats_ == nullptr) { + lb_calld->client_stats_.reset(New<XdsLbClientStats>()); + // TODO(roth): We currently track this ref manually. Once the + // ClosureRef API is ready, we should pass the RefCountedPtr<> along + // with the callback. + auto self = lb_calld->Ref(DEBUG_LOCATION, "client_load_report"); + self.release(); + lb_calld->ScheduleNextClientLoadReportLocked(); + } + if (xds_grpclb_serverlist_equals(xdslb_policy->serverlist_, serverlist)) { + if (grpc_lb_xds_trace.enabled()) { + gpr_log(GPR_INFO, + "[xdslb %p] Incoming server list identical to current, " + "ignoring.", + xdslb_policy); + } + xds_grpclb_destroy_serverlist(serverlist); + } else { /* new serverlist */ + if (xdslb_policy->serverlist_ != nullptr) { + /* dispose of the old serverlist */ + xds_grpclb_destroy_serverlist(xdslb_policy->serverlist_); + } else { + /* or dispose of the fallback */ + grpc_lb_addresses_destroy(xdslb_policy->fallback_backend_addresses_); + xdslb_policy->fallback_backend_addresses_ = nullptr; + if (xdslb_policy->fallback_timer_callback_pending_) { + grpc_timer_cancel(&xdslb_policy->lb_fallback_timer_); + } + } + // and update the copy in the XdsLb instance. This + // serverlist instance will be destroyed either upon the next + // update or when the XdsLb instance is destroyed. + xdslb_policy->serverlist_ = serverlist; + xdslb_policy->CreateOrUpdateChildPolicyLocked(); + } + } else { + if (grpc_lb_xds_trace.enabled()) { + gpr_log(GPR_INFO, "[xdslb %p] Received empty server list, ignoring.", + xdslb_policy); + } + xds_grpclb_destroy_serverlist(serverlist); + } + } else { + // No valid initial response or serverlist found. + char* response_slice_str = + grpc_dump_slice(response_slice, GPR_DUMP_ASCII | GPR_DUMP_HEX); + gpr_log(GPR_ERROR, + "[xdslb %p] Invalid LB response received: '%s'. Ignoring.", + xdslb_policy, response_slice_str); + gpr_free(response_slice_str); + } + grpc_slice_unref_internal(response_slice); + if (!xdslb_policy->shutting_down_) { + // Keep listening for serverlist updates. + grpc_op op; + memset(&op, 0, sizeof(op)); + op.op = GRPC_OP_RECV_MESSAGE; + op.data.recv_message.recv_message = &lb_calld->recv_message_payload_; + op.flags = 0; + op.reserved = nullptr; + // Reuse the "OnBalancerMessageReceivedLocked" ref taken in StartQuery(). + const grpc_call_error call_error = grpc_call_start_batch_and_execute( + lb_calld->lb_call_, &op, 1, + &lb_calld->lb_on_balancer_message_received_); + GPR_ASSERT(GRPC_CALL_OK == call_error); + } else { + lb_calld->Unref(DEBUG_LOCATION, "on_message_received+xds_shutdown"); + } +} + +void XdsLb::BalancerCallState::OnBalancerStatusReceivedLocked( + void* arg, grpc_error* error) { + BalancerCallState* lb_calld = static_cast<BalancerCallState*>(arg); + XdsLb* xdslb_policy = lb_calld->xdslb_policy(); + GPR_ASSERT(lb_calld->lb_call_ != nullptr); + if (grpc_lb_xds_trace.enabled()) { + char* status_details = + grpc_slice_to_c_string(lb_calld->lb_call_status_details_); + gpr_log(GPR_INFO, + "[xdslb %p] Status from LB server received. Status = %d, details " + "= '%s', (lb_calld: %p, lb_call: %p), error '%s'", + xdslb_policy, lb_calld->lb_call_status_, status_details, lb_calld, + lb_calld->lb_call_, grpc_error_string(error)); + gpr_free(status_details); + } + xdslb_policy->TryReresolutionLocked(&grpc_lb_xds_trace, GRPC_ERROR_NONE); + // If this lb_calld is still in use, this call ended because of a failure so + // we want to retry connecting. Otherwise, we have deliberately ended this + // call and no further action is required. + if (lb_calld == xdslb_policy->lb_calld_.get()) { + xdslb_policy->lb_calld_.reset(); + GPR_ASSERT(!xdslb_policy->shutting_down_); + if (lb_calld->seen_initial_response_) { + // If we lose connection to the LB server, reset the backoff and restart + // the LB call immediately. + xdslb_policy->lb_call_backoff_.Reset(); + xdslb_policy->StartBalancerCallLocked(); + } else { + // If this LB call fails establishing any connection to the LB server, + // retry later. + xdslb_policy->StartBalancerCallRetryTimerLocked(); + } + } + lb_calld->Unref(DEBUG_LOCATION, "lb_call_ended"); +} + +// +// helper code for creating balancer channel +// + +grpc_lb_addresses* ExtractBalancerAddresses( + const grpc_lb_addresses* addresses) { + size_t num_grpclb_addrs = 0; + for (size_t i = 0; i < addresses->num_addresses; ++i) { + if (addresses->addresses[i].is_balancer) ++num_grpclb_addrs; + } + // There must be at least one balancer address, or else the + // client_channel would not have chosen this LB policy. + GPR_ASSERT(num_grpclb_addrs > 0); + grpc_lb_addresses* lb_addresses = + grpc_lb_addresses_create(num_grpclb_addrs, nullptr); + size_t lb_addresses_idx = 0; + for (size_t i = 0; i < addresses->num_addresses; ++i) { + if (!addresses->addresses[i].is_balancer) continue; + if (GPR_UNLIKELY(addresses->addresses[i].user_data != nullptr)) { + gpr_log(GPR_ERROR, + "This LB policy doesn't support user data. It will be ignored"); + } + grpc_lb_addresses_set_address( + lb_addresses, lb_addresses_idx++, addresses->addresses[i].address.addr, + addresses->addresses[i].address.len, false /* is balancer */, + addresses->addresses[i].balancer_name, nullptr /* user data */); + } + GPR_ASSERT(num_grpclb_addrs == lb_addresses_idx); + return lb_addresses; +} + +/* Returns the channel args for the LB channel, used to create a bidirectional + * stream for the reception of load balancing updates. + * + * Inputs: + * - \a addresses: corresponding to the balancers. + * - \a response_generator: in order to propagate updates from the resolver + * above the grpclb policy. + * - \a args: other args inherited from the xds policy. */ +grpc_channel_args* BuildBalancerChannelArgs( + const grpc_lb_addresses* addresses, + FakeResolverResponseGenerator* response_generator, + const grpc_channel_args* args) { + grpc_lb_addresses* lb_addresses = ExtractBalancerAddresses(addresses); + // Channel args to remove. + static const char* args_to_remove[] = { + // LB policy name, since we want to use the default (pick_first) in + // the LB channel. + GRPC_ARG_LB_POLICY_NAME, + // The channel arg for the server URI, since that will be different for + // the LB channel than for the parent channel. The client channel + // factory will re-add this arg with the right value. + GRPC_ARG_SERVER_URI, + // The resolved addresses, which will be generated by the name resolver + // used in the LB channel. Note that the LB channel will use the fake + // resolver, so this won't actually generate a query to DNS (or some + // other name service). However, the addresses returned by the fake + // resolver will have is_balancer=false, whereas our own addresses have + // is_balancer=true. We need the LB channel to return addresses with + // is_balancer=false so that it does not wind up recursively using the + // xds LB policy, as per the special case logic in client_channel.c. + GRPC_ARG_LB_ADDRESSES, + // The fake resolver response generator, because we are replacing it + // with the one from the xds policy, used to propagate updates to + // the LB channel. + GRPC_ARG_FAKE_RESOLVER_RESPONSE_GENERATOR, + // The LB channel should use the authority indicated by the target + // authority table (see \a grpc_lb_policy_xds_modify_lb_channel_args), + // as opposed to the authority from the parent channel. + GRPC_ARG_DEFAULT_AUTHORITY, + // Just as for \a GRPC_ARG_DEFAULT_AUTHORITY, the LB channel should be + // treated as a stand-alone channel and not inherit this argument from the + // args of the parent channel. + GRPC_SSL_TARGET_NAME_OVERRIDE_ARG, + }; + // Channel args to add. + const grpc_arg args_to_add[] = { + // New LB addresses. + // Note that we pass these in both when creating the LB channel + // and via the fake resolver. The latter is what actually gets used. + grpc_lb_addresses_create_channel_arg(lb_addresses), + // The fake resolver response generator, which we use to inject + // address updates into the LB channel. + grpc_core::FakeResolverResponseGenerator::MakeChannelArg( + response_generator), + // A channel arg indicating the target is a xds load balancer. + grpc_channel_arg_integer_create( + const_cast<char*>(GRPC_ARG_ADDRESS_IS_XDS_LOAD_BALANCER), 1), + // A channel arg indicating this is an internal channels, aka it is + // owned by components in Core, not by the user application. + grpc_channel_arg_integer_create( + const_cast<char*>(GRPC_ARG_CHANNELZ_CHANNEL_IS_INTERNAL_CHANNEL), 1), + }; + // Construct channel args. + grpc_channel_args* new_args = grpc_channel_args_copy_and_add_and_remove( + args, args_to_remove, GPR_ARRAY_SIZE(args_to_remove), args_to_add, + GPR_ARRAY_SIZE(args_to_add)); + // Make any necessary modifications for security. + new_args = grpc_lb_policy_xds_modify_lb_channel_args(new_args); + // Clean up. + grpc_lb_addresses_destroy(lb_addresses); + return new_args; +} + +// +// ctor and dtor +// + +XdsLb::XdsLb(const grpc_lb_addresses* addresses, + const LoadBalancingPolicy::Args& args) + : LoadBalancingPolicy(args), + response_generator_(MakeRefCounted<FakeResolverResponseGenerator>()), + lb_call_backoff_( + BackOff::Options() + .set_initial_backoff(GRPC_XDS_INITIAL_CONNECT_BACKOFF_SECONDS * + 1000) + .set_multiplier(GRPC_XDS_RECONNECT_BACKOFF_MULTIPLIER) + .set_jitter(GRPC_XDS_RECONNECT_JITTER) + .set_max_backoff(GRPC_XDS_RECONNECT_MAX_BACKOFF_SECONDS * 1000)) { + // Initialization. + gpr_mu_init(&lb_channel_mu_); + grpc_subchannel_index_ref(); + GRPC_CLOSURE_INIT(&lb_channel_on_connectivity_changed_, + &XdsLb::OnBalancerChannelConnectivityChangedLocked, this, + grpc_combiner_scheduler(args.combiner)); + GRPC_CLOSURE_INIT(&on_child_connectivity_changed_, + &XdsLb::OnChildPolicyConnectivityChangedLocked, this, + grpc_combiner_scheduler(args.combiner)); + GRPC_CLOSURE_INIT(&on_child_request_reresolution_, + &XdsLb::OnChildPolicyRequestReresolutionLocked, this, + grpc_combiner_scheduler(args.combiner)); + grpc_connectivity_state_init(&state_tracker_, GRPC_CHANNEL_IDLE, "xds"); + // Record server name. + const grpc_arg* arg = grpc_channel_args_find(args.args, GRPC_ARG_SERVER_URI); + const char* server_uri = grpc_channel_arg_get_string(arg); + GPR_ASSERT(server_uri != nullptr); + grpc_uri* uri = grpc_uri_parse(server_uri, true); + GPR_ASSERT(uri->path[0] != '\0'); + server_name_ = gpr_strdup(uri->path[0] == '/' ? uri->path + 1 : uri->path); + if (grpc_lb_xds_trace.enabled()) { + gpr_log(GPR_INFO, + "[xdslb %p] Will use '%s' as the server name for LB request.", this, + server_name_); + } + grpc_uri_destroy(uri); + // Record LB call timeout. + arg = grpc_channel_args_find(args.args, GRPC_ARG_GRPCLB_CALL_TIMEOUT_MS); + lb_call_timeout_ms_ = grpc_channel_arg_get_integer(arg, {0, 0, INT_MAX}); + // Record fallback timeout. + arg = grpc_channel_args_find(args.args, GRPC_ARG_GRPCLB_FALLBACK_TIMEOUT_MS); + lb_fallback_timeout_ms_ = grpc_channel_arg_get_integer( + arg, {GRPC_XDS_DEFAULT_FALLBACK_TIMEOUT_MS, 0, INT_MAX}); + // Process channel args. + ProcessChannelArgsLocked(*args.args); +} + +XdsLb::~XdsLb() { + GPR_ASSERT(pending_picks_ == nullptr); + gpr_mu_destroy(&lb_channel_mu_); + gpr_free((void*)server_name_); + grpc_channel_args_destroy(args_); + grpc_connectivity_state_destroy(&state_tracker_); + if (serverlist_ != nullptr) { + xds_grpclb_destroy_serverlist(serverlist_); + } + if (fallback_backend_addresses_ != nullptr) { + grpc_lb_addresses_destroy(fallback_backend_addresses_); + } + grpc_subchannel_index_unref(); +} + +void XdsLb::ShutdownLocked() { + grpc_error* error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("Channel shutdown"); + shutting_down_ = true; + lb_calld_.reset(); + if (retry_timer_callback_pending_) { + grpc_timer_cancel(&lb_call_retry_timer_); + } + if (fallback_timer_callback_pending_) { + grpc_timer_cancel(&lb_fallback_timer_); + } + child_policy_.reset(); + TryReresolutionLocked(&grpc_lb_xds_trace, GRPC_ERROR_CANCELLED); + // We destroy the LB channel here instead of in our destructor because + // destroying the channel triggers a last callback to + // OnBalancerChannelConnectivityChangedLocked(), and we need to be + // alive when that callback is invoked. + if (lb_channel_ != nullptr) { + gpr_mu_lock(&lb_channel_mu_); + grpc_channel_destroy(lb_channel_); + lb_channel_ = nullptr; + gpr_mu_unlock(&lb_channel_mu_); + } + grpc_connectivity_state_set(&state_tracker_, GRPC_CHANNEL_SHUTDOWN, + GRPC_ERROR_REF(error), "xds_shutdown"); + // Clear pending picks. + PendingPick* pp; + while ((pp = pending_picks_) != nullptr) { + pending_picks_ = pp->next; + pp->pick->connected_subchannel.reset(); + // Note: pp is deleted in this callback. + GRPC_CLOSURE_SCHED(&pp->on_complete, GRPC_ERROR_REF(error)); + } + GRPC_ERROR_UNREF(error); +} + +// +// public methods +// + +void XdsLb::HandOffPendingPicksLocked(LoadBalancingPolicy* new_policy) { + PendingPick* pp; + while ((pp = pending_picks_) != nullptr) { + pending_picks_ = pp->next; + pp->pick->on_complete = pp->original_on_complete; + pp->pick->user_data = nullptr; + grpc_error* error = GRPC_ERROR_NONE; + if (new_policy->PickLocked(pp->pick, &error)) { + // Synchronous return; schedule closure. + GRPC_CLOSURE_SCHED(pp->pick->on_complete, error); + } + Delete(pp); + } +} + +// Cancel a specific pending pick. +// +// A pick progresses as follows: +// - If there's a child policy available, it'll be handed over to child policy +// (in CreateChildPolicyLocked()). From that point onwards, it'll be the +// child policy's responsibility. For cancellations, that implies the pick +// needs to be also cancelled by the child policy instance. +// - Otherwise, without a child policy instance, picks stay pending at this +// policy's level (xds), inside the pending_picks_ list. To cancel these, +// we invoke the completion closure and set the pick's connected +// subchannel to nullptr right here. +void XdsLb::CancelPickLocked(PickState* pick, grpc_error* error) { + PendingPick* pp = pending_picks_; + pending_picks_ = nullptr; + while (pp != nullptr) { + PendingPick* next = pp->next; + if (pp->pick == pick) { + pick->connected_subchannel.reset(); + // Note: pp is deleted in this callback. + GRPC_CLOSURE_SCHED(&pp->on_complete, + GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( + "Pick Cancelled", &error, 1)); + } else { + pp->next = pending_picks_; + pending_picks_ = pp; + } + pp = next; + } + if (child_policy_ != nullptr) { + child_policy_->CancelPickLocked(pick, GRPC_ERROR_REF(error)); + } + GRPC_ERROR_UNREF(error); +} + +// Cancel all pending picks. +// +// A pick progresses as follows: +// - If there's a child policy available, it'll be handed over to child policy +// (in CreateChildPolicyLocked()). From that point onwards, it'll be the +// child policy's responsibility. For cancellations, that implies the pick +// needs to be also cancelled by the child policy instance. +// - Otherwise, without a child policy instance, picks stay pending at this +// policy's level (xds), inside the pending_picks_ list. To cancel these, +// we invoke the completion closure and set the pick's connected +// subchannel to nullptr right here. +void XdsLb::CancelMatchingPicksLocked(uint32_t initial_metadata_flags_mask, + uint32_t initial_metadata_flags_eq, + grpc_error* error) { + PendingPick* pp = pending_picks_; + pending_picks_ = nullptr; + while (pp != nullptr) { + PendingPick* next = pp->next; + if ((pp->pick->initial_metadata_flags & initial_metadata_flags_mask) == + initial_metadata_flags_eq) { + // Note: pp is deleted in this callback. + GRPC_CLOSURE_SCHED(&pp->on_complete, + GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( + "Pick Cancelled", &error, 1)); + } else { + pp->next = pending_picks_; + pending_picks_ = pp; + } + pp = next; + } + if (child_policy_ != nullptr) { + child_policy_->CancelMatchingPicksLocked(initial_metadata_flags_mask, + initial_metadata_flags_eq, + GRPC_ERROR_REF(error)); + } + GRPC_ERROR_UNREF(error); +} + +void XdsLb::ExitIdleLocked() { + if (!started_picking_) { + StartPickingLocked(); + } +} + +void XdsLb::ResetBackoffLocked() { + if (lb_channel_ != nullptr) { + grpc_channel_reset_connect_backoff(lb_channel_); + } + if (child_policy_ != nullptr) { + child_policy_->ResetBackoffLocked(); + } +} + +bool XdsLb::PickLocked(PickState* pick, grpc_error** error) { + PendingPick* pp = PendingPickCreate(pick); + bool pick_done = false; + if (child_policy_ != nullptr) { + if (grpc_lb_xds_trace.enabled()) { + gpr_log(GPR_INFO, "[xdslb %p] about to PICK from policy %p", this, + child_policy_.get()); + } + pick_done = PickFromChildPolicyLocked(false /* force_async */, pp, error); + } else { // child_policy_ == NULL + if (pick->on_complete == nullptr) { + *error = GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "No pick result available but synchronous result required."); + pick_done = true; + } else { + if (grpc_lb_xds_trace.enabled()) { + gpr_log(GPR_INFO, + "[xdslb %p] No child policy. Adding to xds's pending picks", + this); + } + AddPendingPick(pp); + if (!started_picking_) { + StartPickingLocked(); + } + pick_done = false; + } + } + return pick_done; +} + +void XdsLb::FillChildRefsForChannelz(channelz::ChildRefsList* child_subchannels, + channelz::ChildRefsList* child_channels) { + // delegate to the child_policy_ to fill the children subchannels. + child_policy_->FillChildRefsForChannelz(child_subchannels, child_channels); + MutexLock lock(&lb_channel_mu_); + if (lb_channel_ != nullptr) { + grpc_core::channelz::ChannelNode* channel_node = + grpc_channel_get_channelz_node(lb_channel_); + if (channel_node != nullptr) { + child_channels->push_back(channel_node->uuid()); + } + } +} + +grpc_connectivity_state XdsLb::CheckConnectivityLocked( + grpc_error** connectivity_error) { + return grpc_connectivity_state_get(&state_tracker_, connectivity_error); +} + +void XdsLb::NotifyOnStateChangeLocked(grpc_connectivity_state* current, + grpc_closure* closure) { + grpc_connectivity_state_notify_on_state_change(&state_tracker_, current, + closure); +} + +void XdsLb::ProcessChannelArgsLocked(const grpc_channel_args& args) { + const grpc_arg* arg = grpc_channel_args_find(&args, GRPC_ARG_LB_ADDRESSES); + if (GPR_UNLIKELY(arg == nullptr || arg->type != GRPC_ARG_POINTER)) { + // Ignore this update. + gpr_log(GPR_ERROR, + "[xdslb %p] No valid LB addresses channel arg in update, ignoring.", + this); + return; + } + const grpc_lb_addresses* addresses = + static_cast<const grpc_lb_addresses*>(arg->value.pointer.p); + // Update fallback address list. + if (fallback_backend_addresses_ != nullptr) { + grpc_lb_addresses_destroy(fallback_backend_addresses_); + } + fallback_backend_addresses_ = ExtractBackendAddresses(addresses); + // Make sure that GRPC_ARG_LB_POLICY_NAME is set in channel args, + // since we use this to trigger the client_load_reporting filter. + static const char* args_to_remove[] = {GRPC_ARG_LB_POLICY_NAME}; + grpc_arg new_arg = grpc_channel_arg_string_create( + (char*)GRPC_ARG_LB_POLICY_NAME, (char*)"xds"); + grpc_channel_args_destroy(args_); + args_ = grpc_channel_args_copy_and_add_and_remove( + &args, args_to_remove, GPR_ARRAY_SIZE(args_to_remove), &new_arg, 1); + // Construct args for balancer channel. + grpc_channel_args* lb_channel_args = + BuildBalancerChannelArgs(addresses, response_generator_.get(), &args); + // Create balancer channel if needed. + if (lb_channel_ == nullptr) { + char* uri_str; + gpr_asprintf(&uri_str, "fake:///%s", server_name_); + gpr_mu_lock(&lb_channel_mu_); + lb_channel_ = grpc_client_channel_factory_create_channel( + client_channel_factory(), uri_str, + GRPC_CLIENT_CHANNEL_TYPE_LOAD_BALANCING, lb_channel_args); + gpr_mu_unlock(&lb_channel_mu_); + GPR_ASSERT(lb_channel_ != nullptr); + gpr_free(uri_str); + } + // Propagate updates to the LB channel (pick_first) through the fake + // resolver. + response_generator_->SetResponse(lb_channel_args); + grpc_channel_args_destroy(lb_channel_args); +} + +void XdsLb::UpdateLocked(const grpc_channel_args& args) { + ProcessChannelArgsLocked(args); + // Update the existing child policy. + // Note: We have disabled fallback mode in the code, so this child policy must + // have been created from a serverlist. + // TODO(vpowar): Handle the fallback_address changes when we add support for + // fallback in xDS. + if (child_policy_ != nullptr) CreateOrUpdateChildPolicyLocked(); + // Start watching the LB channel connectivity for connection, if not + // already doing so. + if (!watching_lb_channel_) { + lb_channel_connectivity_ = grpc_channel_check_connectivity_state( + lb_channel_, true /* try to connect */); + grpc_channel_element* client_channel_elem = grpc_channel_stack_last_element( + grpc_channel_get_channel_stack(lb_channel_)); + GPR_ASSERT(client_channel_elem->filter == &grpc_client_channel_filter); + watching_lb_channel_ = true; + // TODO(roth): We currently track this ref manually. Once the + // ClosureRef API is ready, we should pass the RefCountedPtr<> along + // with the callback. + auto self = Ref(DEBUG_LOCATION, "watch_lb_channel_connectivity"); + self.release(); + grpc_client_channel_watch_connectivity_state( + client_channel_elem, + grpc_polling_entity_create_from_pollset_set(interested_parties()), + &lb_channel_connectivity_, &lb_channel_on_connectivity_changed_, + nullptr); + } +} + +// +// code for balancer channel and call +// + +void XdsLb::StartPickingLocked() { + // Start a timer to fall back. + if (lb_fallback_timeout_ms_ > 0 && serverlist_ == nullptr && + !fallback_timer_callback_pending_) { + grpc_millis deadline = ExecCtx::Get()->Now() + lb_fallback_timeout_ms_; + // TODO(roth): We currently track this ref manually. Once the + // ClosureRef API is ready, we should pass the RefCountedPtr<> along + // with the callback. + auto self = Ref(DEBUG_LOCATION, "on_fallback_timer"); + self.release(); + GRPC_CLOSURE_INIT(&lb_on_fallback_, &XdsLb::OnFallbackTimerLocked, this, + grpc_combiner_scheduler(combiner())); + fallback_timer_callback_pending_ = true; + grpc_timer_init(&lb_fallback_timer_, deadline, &lb_on_fallback_); + } + started_picking_ = true; + StartBalancerCallLocked(); +} + +void XdsLb::StartBalancerCallLocked() { + GPR_ASSERT(lb_channel_ != nullptr); + if (shutting_down_) return; + // Init the LB call data. + GPR_ASSERT(lb_calld_ == nullptr); + lb_calld_ = MakeOrphanable<BalancerCallState>(Ref()); + if (grpc_lb_xds_trace.enabled()) { + gpr_log(GPR_INFO, + "[xdslb %p] Query for backends (lb_channel: %p, lb_calld: %p)", + this, lb_channel_, lb_calld_.get()); + } + lb_calld_->StartQuery(); +} + +void XdsLb::OnFallbackTimerLocked(void* arg, grpc_error* error) { + XdsLb* xdslb_policy = static_cast<XdsLb*>(arg); + xdslb_policy->fallback_timer_callback_pending_ = false; + // If we receive a serverlist after the timer fires but before this callback + // actually runs, don't fall back. + if (xdslb_policy->serverlist_ == nullptr && !xdslb_policy->shutting_down_ && + error == GRPC_ERROR_NONE) { + if (grpc_lb_xds_trace.enabled()) { + gpr_log(GPR_INFO, + "[xdslb %p] Fallback timer fired. Not using fallback backends", + xdslb_policy); + } + } + xdslb_policy->Unref(DEBUG_LOCATION, "on_fallback_timer"); +} + +void XdsLb::StartBalancerCallRetryTimerLocked() { + grpc_millis next_try = lb_call_backoff_.NextAttemptTime(); + if (grpc_lb_xds_trace.enabled()) { + gpr_log(GPR_INFO, "[xdslb %p] Connection to LB server lost...", this); + grpc_millis timeout = next_try - ExecCtx::Get()->Now(); + if (timeout > 0) { + gpr_log(GPR_INFO, "[xdslb %p] ... retry_timer_active in %" PRId64 "ms.", + this, timeout); + } else { + gpr_log(GPR_INFO, "[xdslb %p] ... retry_timer_active immediately.", this); + } + } + // TODO(roth): We currently track this ref manually. Once the + // ClosureRef API is ready, we should pass the RefCountedPtr<> along + // with the callback. + auto self = Ref(DEBUG_LOCATION, "on_balancer_call_retry_timer"); + self.release(); + GRPC_CLOSURE_INIT(&lb_on_call_retry_, &XdsLb::OnBalancerCallRetryTimerLocked, + this, grpc_combiner_scheduler(combiner())); + retry_timer_callback_pending_ = true; + grpc_timer_init(&lb_call_retry_timer_, next_try, &lb_on_call_retry_); +} + +void XdsLb::OnBalancerCallRetryTimerLocked(void* arg, grpc_error* error) { + XdsLb* xdslb_policy = static_cast<XdsLb*>(arg); + xdslb_policy->retry_timer_callback_pending_ = false; + if (!xdslb_policy->shutting_down_ && error == GRPC_ERROR_NONE && + xdslb_policy->lb_calld_ == nullptr) { + if (grpc_lb_xds_trace.enabled()) { + gpr_log(GPR_INFO, "[xdslb %p] Restarting call to LB server", + xdslb_policy); + } + xdslb_policy->StartBalancerCallLocked(); + } + xdslb_policy->Unref(DEBUG_LOCATION, "on_balancer_call_retry_timer"); +} + +// Invoked as part of the update process. It continues watching the LB channel +// until it shuts down or becomes READY. It's invoked even if the LB channel +// stayed READY throughout the update (for example if the update is identical). +void XdsLb::OnBalancerChannelConnectivityChangedLocked(void* arg, + grpc_error* error) { + XdsLb* xdslb_policy = static_cast<XdsLb*>(arg); + if (xdslb_policy->shutting_down_) goto done; + // Re-initialize the lb_call. This should also take care of updating the + // child policy. Note that the current child policy, if any, will + // stay in effect until an update from the new lb_call is received. + switch (xdslb_policy->lb_channel_connectivity_) { + case GRPC_CHANNEL_CONNECTING: + case GRPC_CHANNEL_TRANSIENT_FAILURE: { + // Keep watching the LB channel. + grpc_channel_element* client_channel_elem = + grpc_channel_stack_last_element( + grpc_channel_get_channel_stack(xdslb_policy->lb_channel_)); + GPR_ASSERT(client_channel_elem->filter == &grpc_client_channel_filter); + grpc_client_channel_watch_connectivity_state( + client_channel_elem, + grpc_polling_entity_create_from_pollset_set( + xdslb_policy->interested_parties()), + &xdslb_policy->lb_channel_connectivity_, + &xdslb_policy->lb_channel_on_connectivity_changed_, nullptr); + break; + } + // The LB channel may be IDLE because it's shut down before the update. + // Restart the LB call to kick the LB channel into gear. + case GRPC_CHANNEL_IDLE: + case GRPC_CHANNEL_READY: + xdslb_policy->lb_calld_.reset(); + if (xdslb_policy->started_picking_) { + if (xdslb_policy->retry_timer_callback_pending_) { + grpc_timer_cancel(&xdslb_policy->lb_call_retry_timer_); + } + xdslb_policy->lb_call_backoff_.Reset(); + xdslb_policy->StartBalancerCallLocked(); + } + // Fall through. + case GRPC_CHANNEL_SHUTDOWN: + done: + xdslb_policy->watching_lb_channel_ = false; + xdslb_policy->Unref(DEBUG_LOCATION, + "watch_lb_channel_connectivity_cb_shutdown"); + } +} + +// +// PendingPick +// + +// Adds lb_token of selected subchannel (address) to the call's initial +// metadata. +grpc_error* AddLbTokenToInitialMetadata( + grpc_mdelem lb_token, grpc_linked_mdelem* lb_token_mdelem_storage, + grpc_metadata_batch* initial_metadata) { + GPR_ASSERT(lb_token_mdelem_storage != nullptr); + GPR_ASSERT(!GRPC_MDISNULL(lb_token)); + return grpc_metadata_batch_add_tail(initial_metadata, lb_token_mdelem_storage, + lb_token); +} + +// Destroy function used when embedding client stats in call context. +void DestroyClientStats(void* arg) { + static_cast<XdsLbClientStats*>(arg)->Unref(); +} + +void XdsLb::PendingPickSetMetadataAndContext(PendingPick* pp) { + /* if connected_subchannel is nullptr, no pick has been made by the + * child policy (e.g., all addresses failed to connect). There won't be any + * user_data/token available */ + if (pp->pick->connected_subchannel != nullptr) { + if (GPR_LIKELY(!GRPC_MDISNULL(pp->lb_token))) { + AddLbTokenToInitialMetadata(GRPC_MDELEM_REF(pp->lb_token), + &pp->pick->lb_token_mdelem_storage, + pp->pick->initial_metadata); + } else { + gpr_log(GPR_ERROR, + "[xdslb %p] No LB token for connected subchannel pick %p", + pp->xdslb_policy, pp->pick); + abort(); + } + // Pass on client stats via context. Passes ownership of the reference. + if (pp->client_stats != nullptr) { + pp->pick->subchannel_call_context[GRPC_GRPCLB_CLIENT_STATS].value = + pp->client_stats.release(); + pp->pick->subchannel_call_context[GRPC_GRPCLB_CLIENT_STATS].destroy = + DestroyClientStats; + } + } else { + pp->client_stats.reset(); + } +} + +/* The \a on_complete closure passed as part of the pick requires keeping a + * reference to its associated child policy instance. We wrap this closure in + * order to unref the child policy instance upon its invocation */ +void XdsLb::OnPendingPickComplete(void* arg, grpc_error* error) { + PendingPick* pp = static_cast<PendingPick*>(arg); + PendingPickSetMetadataAndContext(pp); + GRPC_CLOSURE_SCHED(pp->original_on_complete, GRPC_ERROR_REF(error)); + Delete(pp); +} + +XdsLb::PendingPick* XdsLb::PendingPickCreate(PickState* pick) { + PendingPick* pp = New<PendingPick>(); + pp->xdslb_policy = this; + pp->pick = pick; + GRPC_CLOSURE_INIT(&pp->on_complete, &XdsLb::OnPendingPickComplete, pp, + grpc_schedule_on_exec_ctx); + pp->original_on_complete = pick->on_complete; + pick->on_complete = &pp->on_complete; + return pp; +} + +void XdsLb::AddPendingPick(PendingPick* pp) { + pp->next = pending_picks_; + pending_picks_ = pp; +} + +// +// code for interacting with the child policy +// + +// Performs a pick over \a child_policy_. Given that a pick can return +// immediately (ignoring its completion callback), we need to perform the +// cleanups this callback would otherwise be responsible for. +// If \a force_async is true, then we will manually schedule the +// completion callback even if the pick is available immediately. +bool XdsLb::PickFromChildPolicyLocked(bool force_async, PendingPick* pp, + grpc_error** error) { + // Set client_stats and user_data. + if (lb_calld_ != nullptr && lb_calld_->client_stats() != nullptr) { + pp->client_stats = lb_calld_->client_stats()->Ref(); + } + GPR_ASSERT(pp->pick->user_data == nullptr); + pp->pick->user_data = (void**)&pp->lb_token; + // Pick via the child policy. + bool pick_done = child_policy_->PickLocked(pp->pick, error); + if (pick_done) { + PendingPickSetMetadataAndContext(pp); + if (force_async) { + GRPC_CLOSURE_SCHED(pp->original_on_complete, *error); + *error = GRPC_ERROR_NONE; + pick_done = false; + } + Delete(pp); + } + // else, the pending pick will be registered and taken care of by the + // pending pick list inside the child policy. Eventually, + // OnPendingPickComplete() will be called, which will (among other + // things) add the LB token to the call's initial metadata. + return pick_done; +} + +void XdsLb::CreateChildPolicyLocked(const Args& args) { + GPR_ASSERT(child_policy_ == nullptr); + child_policy_ = LoadBalancingPolicyRegistry::CreateLoadBalancingPolicy( + "round_robin", args); + if (GPR_UNLIKELY(child_policy_ == nullptr)) { + gpr_log(GPR_ERROR, "[xdslb %p] Failure creating a child policy", this); + return; + } + // TODO(roth): We currently track this ref manually. Once the new + // ClosureRef API is done, pass the RefCountedPtr<> along with the closure. + auto self = Ref(DEBUG_LOCATION, "on_child_reresolution_requested"); + self.release(); + child_policy_->SetReresolutionClosureLocked(&on_child_request_reresolution_); + grpc_error* child_state_error = nullptr; + child_connectivity_state_ = + child_policy_->CheckConnectivityLocked(&child_state_error); + // Connectivity state is a function of the child policy updated/created. + UpdateConnectivityStateFromChildPolicyLocked(child_state_error); + // Add the xDS's interested_parties pollset_set to that of the newly created + // child policy. This will make the child policy progress upon activity on + // xDS LB, which in turn is tied to the application's call. + grpc_pollset_set_add_pollset_set(child_policy_->interested_parties(), + interested_parties()); + // Subscribe to changes to the connectivity of the new child policy. + // TODO(roth): We currently track this ref manually. Once the new + // ClosureRef API is done, pass the RefCountedPtr<> along with the closure. + self = Ref(DEBUG_LOCATION, "on_child_connectivity_changed"); + self.release(); + child_policy_->NotifyOnStateChangeLocked(&child_connectivity_state_, + &on_child_connectivity_changed_); + child_policy_->ExitIdleLocked(); + // Send pending picks to child policy. + PendingPick* pp; + while ((pp = pending_picks_)) { + pending_picks_ = pp->next; + if (grpc_lb_xds_trace.enabled()) { + gpr_log( + GPR_INFO, + "[xdslb %p] Pending pick about to (async) PICK from child policy %p", + this, child_policy_.get()); + } + grpc_error* error = GRPC_ERROR_NONE; + PickFromChildPolicyLocked(true /* force_async */, pp, &error); + } +} + +grpc_channel_args* XdsLb::CreateChildPolicyArgsLocked() { + grpc_lb_addresses* addresses; + bool is_backend_from_grpclb_load_balancer = false; + // This should never be invoked if we do not have serverlist_, as fallback + // mode is disabled for xDS plugin. + GPR_ASSERT(serverlist_ != nullptr); + GPR_ASSERT(serverlist_->num_servers > 0); + addresses = ProcessServerlist(serverlist_); + is_backend_from_grpclb_load_balancer = true; + GPR_ASSERT(addresses != nullptr); + // Replace the LB addresses in the channel args that we pass down to + // the subchannel. + static const char* keys_to_remove[] = {GRPC_ARG_LB_ADDRESSES}; + const grpc_arg args_to_add[] = { + grpc_lb_addresses_create_channel_arg(addresses), + // A channel arg indicating if the target is a backend inferred from a + // grpclb load balancer. + grpc_channel_arg_integer_create( + const_cast<char*>(GRPC_ARG_ADDRESS_IS_BACKEND_FROM_XDS_LOAD_BALANCER), + is_backend_from_grpclb_load_balancer), + }; + grpc_channel_args* args = grpc_channel_args_copy_and_add_and_remove( + args_, keys_to_remove, GPR_ARRAY_SIZE(keys_to_remove), args_to_add, + GPR_ARRAY_SIZE(args_to_add)); + grpc_lb_addresses_destroy(addresses); + return args; +} + +void XdsLb::CreateOrUpdateChildPolicyLocked() { + if (shutting_down_) return; + grpc_channel_args* args = CreateChildPolicyArgsLocked(); + GPR_ASSERT(args != nullptr); + if (child_policy_ != nullptr) { + if (grpc_lb_xds_trace.enabled()) { + gpr_log(GPR_INFO, "[xdslb %p] Updating the child policy %p", this, + child_policy_.get()); + } + child_policy_->UpdateLocked(*args); + } else { + LoadBalancingPolicy::Args lb_policy_args; + lb_policy_args.combiner = combiner(); + lb_policy_args.client_channel_factory = client_channel_factory(); + lb_policy_args.args = args; + CreateChildPolicyLocked(lb_policy_args); + if (grpc_lb_xds_trace.enabled()) { + gpr_log(GPR_INFO, "[xdslb %p] Created a new child policy %p", this, + child_policy_.get()); + } + } + grpc_channel_args_destroy(args); +} + +void XdsLb::OnChildPolicyRequestReresolutionLocked(void* arg, + grpc_error* error) { + XdsLb* xdslb_policy = static_cast<XdsLb*>(arg); + if (xdslb_policy->shutting_down_ || error != GRPC_ERROR_NONE) { + xdslb_policy->Unref(DEBUG_LOCATION, "on_child_reresolution_requested"); + return; + } + if (grpc_lb_xds_trace.enabled()) { + gpr_log(GPR_INFO, + "[xdslb %p] Re-resolution requested from child policy " + "(%p).", + xdslb_policy, xdslb_policy->child_policy_.get()); + } + // If we are talking to a balancer, we expect to get updated addresses form + // the balancer, so we can ignore the re-resolution request from the child + // policy. + // Otherwise, handle the re-resolution request using the xds policy's + // original re-resolution closure. + if (xdslb_policy->lb_calld_ == nullptr || + !xdslb_policy->lb_calld_->seen_initial_response()) { + xdslb_policy->TryReresolutionLocked(&grpc_lb_xds_trace, GRPC_ERROR_NONE); + } + // Give back the wrapper closure to the child policy. + xdslb_policy->child_policy_->SetReresolutionClosureLocked( + &xdslb_policy->on_child_request_reresolution_); +} + +void XdsLb::UpdateConnectivityStateFromChildPolicyLocked( + grpc_error* child_state_error) { + const grpc_connectivity_state curr_glb_state = + grpc_connectivity_state_check(&state_tracker_); + /* The new connectivity status is a function of the previous one and the new + * input coming from the status of the child policy. + * + * current state (xds's) + * | + * v || I | C | R | TF | SD | <- new state (child policy's) + * ===++====+=====+=====+======+======+ + * I || I | C | R | [I] | [I] | + * ---++----+-----+-----+------+------+ + * C || I | C | R | [C] | [C] | + * ---++----+-----+-----+------+------+ + * R || I | C | R | [R] | [R] | + * ---++----+-----+-----+------+------+ + * TF || I | C | R | [TF] | [TF] | + * ---++----+-----+-----+------+------+ + * SD || NA | NA | NA | NA | NA | (*) + * ---++----+-----+-----+------+------+ + * + * A [STATE] indicates that the old child policy is kept. In those cases, + * STATE is the current state of xds, which is left untouched. + * + * In summary, if the new state is TRANSIENT_FAILURE or SHUTDOWN, stick to + * the previous child policy instance. + * + * Note that the status is never updated to SHUTDOWN as a result of calling + * this function. Only glb_shutdown() has the power to set that state. + * + * (*) This function mustn't be called during shutting down. */ + GPR_ASSERT(curr_glb_state != GRPC_CHANNEL_SHUTDOWN); + switch (child_connectivity_state_) { + case GRPC_CHANNEL_TRANSIENT_FAILURE: + case GRPC_CHANNEL_SHUTDOWN: + GPR_ASSERT(child_state_error != GRPC_ERROR_NONE); + break; + case GRPC_CHANNEL_IDLE: + case GRPC_CHANNEL_CONNECTING: + case GRPC_CHANNEL_READY: + GPR_ASSERT(child_state_error == GRPC_ERROR_NONE); + } + if (grpc_lb_xds_trace.enabled()) { + gpr_log(GPR_INFO, + "[xdslb %p] Setting xds's state to %s from child policy %p state.", + this, grpc_connectivity_state_name(child_connectivity_state_), + child_policy_.get()); + } + grpc_connectivity_state_set(&state_tracker_, child_connectivity_state_, + child_state_error, + "update_lb_connectivity_status_locked"); +} + +void XdsLb::OnChildPolicyConnectivityChangedLocked(void* arg, + grpc_error* error) { + XdsLb* xdslb_policy = static_cast<XdsLb*>(arg); + if (xdslb_policy->shutting_down_) { + xdslb_policy->Unref(DEBUG_LOCATION, "on_child_connectivity_changed"); + return; + } + xdslb_policy->UpdateConnectivityStateFromChildPolicyLocked( + GRPC_ERROR_REF(error)); + // Resubscribe. Reuse the "on_child_connectivity_changed" ref. + xdslb_policy->child_policy_->NotifyOnStateChangeLocked( + &xdslb_policy->child_connectivity_state_, + &xdslb_policy->on_child_connectivity_changed_); +} + +// +// factory +// + +class XdsFactory : public LoadBalancingPolicyFactory { + public: + OrphanablePtr<LoadBalancingPolicy> CreateLoadBalancingPolicy( + const LoadBalancingPolicy::Args& args) const override { + /* Count the number of gRPC-LB addresses. There must be at least one. */ + const grpc_arg* arg = + grpc_channel_args_find(args.args, GRPC_ARG_LB_ADDRESSES); + if (arg == nullptr || arg->type != GRPC_ARG_POINTER) { + return nullptr; + } + grpc_lb_addresses* addresses = + static_cast<grpc_lb_addresses*>(arg->value.pointer.p); + size_t num_grpclb_addrs = 0; + for (size_t i = 0; i < addresses->num_addresses; ++i) { + if (addresses->addresses[i].is_balancer) ++num_grpclb_addrs; + } + if (num_grpclb_addrs == 0) return nullptr; + return OrphanablePtr<LoadBalancingPolicy>(New<XdsLb>(addresses, args)); + } + + const char* name() const override { return "xds"; } +}; + +} // namespace + +} // namespace grpc_core + +// +// Plugin registration +// + +void grpc_lb_policy_xds_init() { + grpc_core::LoadBalancingPolicyRegistry::Builder:: + RegisterLoadBalancingPolicyFactory( + grpc_core::UniquePtr<grpc_core::LoadBalancingPolicyFactory>( + grpc_core::New<grpc_core::XdsFactory>())); +} + +void grpc_lb_policy_xds_shutdown() {} diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds.h b/src/core/ext/filters/client_channel/lb_policy/xds/xds.h new file mode 100644 index 0000000000..8b20680f2d --- /dev/null +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds.h @@ -0,0 +1,36 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#ifndef GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_XDS_H +#define GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_XDS_H + +#include <grpc/support/port_platform.h> + +/** Channel arg indicating if a target corresponding to the address is grpclb + * loadbalancer. The type of this arg is an integer and the value is treated as + * a bool. */ +#define GRPC_ARG_ADDRESS_IS_XDS_LOAD_BALANCER \ + "grpc.address_is_xds_load_balancer" +/** Channel arg indicating if a target corresponding to the address is a backend + * received from a balancer. The type of this arg is an integer and the value is + * treated as a bool. */ +#define GRPC_ARG_ADDRESS_IS_BACKEND_FROM_XDS_LOAD_BALANCER \ + "grpc.address_is_backend_from_xds_load_balancer" + +#endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_XDS_H \ + */ diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel.cc new file mode 100644 index 0000000000..0aa145a24e --- /dev/null +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel.cc @@ -0,0 +1,26 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include <grpc/support/port_platform.h> + +#include "src/core/ext/filters/client_channel/lb_policy/xds/xds_channel.h" + +grpc_channel_args* grpc_lb_policy_xds_modify_lb_channel_args( + grpc_channel_args* args) { + return args; +} diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel.h b/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel.h new file mode 100644 index 0000000000..32c4acc8a3 --- /dev/null +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel.h @@ -0,0 +1,36 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#ifndef GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_XDS_CHANNEL_H +#define GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_XDS_CHANNEL_H + +#include <grpc/support/port_platform.h> + +#include "src/core/ext/filters/client_channel/lb_policy_factory.h" + +/// Makes any necessary modifications to \a args for use in the xds +/// balancer channel. +/// +/// Takes ownership of \a args. +/// +/// Caller takes ownership of the returned args. +grpc_channel_args* grpc_lb_policy_xds_modify_lb_channel_args( + grpc_channel_args* args); + +#endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_XDS_CHANNEL_H \ + */ diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_secure.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_secure.cc new file mode 100644 index 0000000000..5ab72efce4 --- /dev/null +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_secure.cc @@ -0,0 +1,107 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include <grpc/support/port_platform.h> + +#include "src/core/ext/filters/client_channel/lb_policy/xds/xds_channel.h" + +#include <grpc/support/alloc.h> +#include <grpc/support/string_util.h> +#include <string.h> + +#include "src/core/ext/filters/client_channel/client_channel.h" +#include "src/core/lib/channel/channel_args.h" +#include "src/core/lib/gpr/string.h" +#include "src/core/lib/iomgr/sockaddr_utils.h" +#include "src/core/lib/security/credentials/credentials.h" +#include "src/core/lib/security/transport/target_authority_table.h" +#include "src/core/lib/slice/slice_internal.h" + +namespace grpc_core { +namespace { + +int BalancerNameCmp(const grpc_core::UniquePtr<char>& a, + const grpc_core::UniquePtr<char>& b) { + return strcmp(a.get(), b.get()); +} + +RefCountedPtr<TargetAuthorityTable> CreateTargetAuthorityTable( + grpc_lb_addresses* addresses) { + TargetAuthorityTable::Entry* target_authority_entries = + static_cast<TargetAuthorityTable::Entry*>(gpr_zalloc( + sizeof(*target_authority_entries) * addresses->num_addresses)); + for (size_t i = 0; i < addresses->num_addresses; ++i) { + char* addr_str; + GPR_ASSERT(grpc_sockaddr_to_string( + &addr_str, &addresses->addresses[i].address, true) > 0); + target_authority_entries[i].key = grpc_slice_from_copied_string(addr_str); + target_authority_entries[i].value.reset( + gpr_strdup(addresses->addresses[i].balancer_name)); + gpr_free(addr_str); + } + RefCountedPtr<TargetAuthorityTable> target_authority_table = + TargetAuthorityTable::Create(addresses->num_addresses, + target_authority_entries, BalancerNameCmp); + gpr_free(target_authority_entries); + return target_authority_table; +} + +} // namespace +} // namespace grpc_core + +grpc_channel_args* grpc_lb_policy_xds_modify_lb_channel_args( + grpc_channel_args* args) { + const char* args_to_remove[1]; + size_t num_args_to_remove = 0; + grpc_arg args_to_add[2]; + size_t num_args_to_add = 0; + // Add arg for targets info table. + const grpc_arg* arg = grpc_channel_args_find(args, GRPC_ARG_LB_ADDRESSES); + GPR_ASSERT(arg != nullptr); + GPR_ASSERT(arg->type == GRPC_ARG_POINTER); + grpc_lb_addresses* addresses = + static_cast<grpc_lb_addresses*>(arg->value.pointer.p); + grpc_core::RefCountedPtr<grpc_core::TargetAuthorityTable> + target_authority_table = grpc_core::CreateTargetAuthorityTable(addresses); + args_to_add[num_args_to_add++] = + grpc_core::CreateTargetAuthorityTableChannelArg( + target_authority_table.get()); + // Substitute the channel credentials with a version without call + // credentials: the load balancer is not necessarily trusted to handle + // bearer token credentials. + grpc_channel_credentials* channel_credentials = + grpc_channel_credentials_find_in_args(args); + grpc_channel_credentials* creds_sans_call_creds = nullptr; + if (channel_credentials != nullptr) { + creds_sans_call_creds = + grpc_channel_credentials_duplicate_without_call_credentials( + channel_credentials); + GPR_ASSERT(creds_sans_call_creds != nullptr); + args_to_remove[num_args_to_remove++] = GRPC_ARG_CHANNEL_CREDENTIALS; + args_to_add[num_args_to_add++] = + grpc_channel_credentials_to_arg(creds_sans_call_creds); + } + grpc_channel_args* result = grpc_channel_args_copy_and_add_and_remove( + args, args_to_remove, num_args_to_remove, args_to_add, num_args_to_add); + // Clean up. + grpc_channel_args_destroy(args); + if (creds_sans_call_creds != nullptr) { + grpc_channel_credentials_unref(creds_sans_call_creds); + } + return result; +} diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.cc new file mode 100644 index 0000000000..cdf5408be3 --- /dev/null +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.cc @@ -0,0 +1,85 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include <grpc/support/port_platform.h> + +#include "src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.h" + +#include <grpc/support/atm.h> +#include <grpc/support/string_util.h> +#include <string.h> + +namespace grpc_core { + +void XdsLbClientStats::AddCallStarted() { + gpr_atm_full_fetch_add(&num_calls_started_, (gpr_atm)1); +} + +void XdsLbClientStats::AddCallFinished(bool finished_with_client_failed_to_send, + bool finished_known_received) { + gpr_atm_full_fetch_add(&num_calls_finished_, (gpr_atm)1); + if (finished_with_client_failed_to_send) { + gpr_atm_full_fetch_add(&num_calls_finished_with_client_failed_to_send_, + (gpr_atm)1); + } + if (finished_known_received) { + gpr_atm_full_fetch_add(&num_calls_finished_known_received_, (gpr_atm)1); + } +} + +void XdsLbClientStats::AddCallDroppedLocked(char* token) { + // Increment num_calls_started and num_calls_finished. + gpr_atm_full_fetch_add(&num_calls_started_, (gpr_atm)1); + gpr_atm_full_fetch_add(&num_calls_finished_, (gpr_atm)1); + // Record the drop. + if (drop_token_counts_ == nullptr) { + drop_token_counts_.reset(New<DroppedCallCounts>()); + } + for (size_t i = 0; i < drop_token_counts_->size(); ++i) { + if (strcmp((*drop_token_counts_)[i].token.get(), token) == 0) { + ++(*drop_token_counts_)[i].count; + return; + } + } + // Not found, so add a new entry. + drop_token_counts_->emplace_back(UniquePtr<char>(gpr_strdup(token)), 1); +} + +namespace { + +void AtomicGetAndResetCounter(int64_t* value, gpr_atm* counter) { + *value = static_cast<int64_t>(gpr_atm_full_xchg(counter, (gpr_atm)0)); +} + +} // namespace + +void XdsLbClientStats::GetLocked( + int64_t* num_calls_started, int64_t* num_calls_finished, + int64_t* num_calls_finished_with_client_failed_to_send, + int64_t* num_calls_finished_known_received, + UniquePtr<DroppedCallCounts>* drop_token_counts) { + AtomicGetAndResetCounter(num_calls_started, &num_calls_started_); + AtomicGetAndResetCounter(num_calls_finished, &num_calls_finished_); + AtomicGetAndResetCounter(num_calls_finished_with_client_failed_to_send, + &num_calls_finished_with_client_failed_to_send_); + AtomicGetAndResetCounter(num_calls_finished_known_received, + &num_calls_finished_known_received_); + *drop_token_counts = std::move(drop_token_counts_); +} + +} // namespace grpc_core diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.h b/src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.h new file mode 100644 index 0000000000..fa0b9f4b63 --- /dev/null +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.h @@ -0,0 +1,72 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#ifndef GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_XDS_CLIENT_STATS_H +#define GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_XDS_CLIENT_STATS_H + +#include <grpc/support/port_platform.h> + +#include <grpc/support/atm.h> + +#include "src/core/lib/gprpp/inlined_vector.h" +#include "src/core/lib/gprpp/memory.h" +#include "src/core/lib/gprpp/ref_counted.h" + +namespace grpc_core { + +class XdsLbClientStats : public RefCounted<XdsLbClientStats> { + public: + struct DropTokenCount { + UniquePtr<char> token; + int64_t count; + + DropTokenCount(UniquePtr<char> token, int64_t count) + : token(std::move(token)), count(count) {} + }; + + typedef InlinedVector<DropTokenCount, 10> DroppedCallCounts; + + XdsLbClientStats() {} + + void AddCallStarted(); + void AddCallFinished(bool finished_with_client_failed_to_send, + bool finished_known_received); + + // This method is not thread-safe; caller must synchronize. + void AddCallDroppedLocked(char* token); + + // This method is not thread-safe; caller must synchronize. + void GetLocked(int64_t* num_calls_started, int64_t* num_calls_finished, + int64_t* num_calls_finished_with_client_failed_to_send, + int64_t* num_calls_finished_known_received, + UniquePtr<DroppedCallCounts>* drop_token_counts); + + private: + // This field must only be accessed via *_locked() methods. + UniquePtr<DroppedCallCounts> drop_token_counts_; + // These fields may be accessed from multiple threads at a time. + gpr_atm num_calls_started_ = 0; + gpr_atm num_calls_finished_ = 0; + gpr_atm num_calls_finished_with_client_failed_to_send_ = 0; + gpr_atm num_calls_finished_known_received_ = 0; +}; + +} // namespace grpc_core + +#endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_XDS_CLIENT_STATS_H \ + */ diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_load_balancer_api.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds_load_balancer_api.cc new file mode 100644 index 0000000000..79b7bdbe33 --- /dev/null +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_load_balancer_api.cc @@ -0,0 +1,307 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include <grpc/support/port_platform.h> + +#include "pb_decode.h" +#include "pb_encode.h" +#include "src/core/ext/filters/client_channel/lb_policy/xds/xds_load_balancer_api.h" + +#include <grpc/support/alloc.h> + +/* invoked once for every Server in ServerList */ +static bool count_serverlist(pb_istream_t* stream, const pb_field_t* field, + void** arg) { + xds_grpclb_serverlist* sl = static_cast<xds_grpclb_serverlist*>(*arg); + xds_grpclb_server server; + if (GPR_UNLIKELY(!pb_decode(stream, grpc_lb_v1_Server_fields, &server))) { + gpr_log(GPR_ERROR, "nanopb error: %s", PB_GET_ERROR(stream)); + return false; + } + ++sl->num_servers; + return true; +} + +typedef struct decode_serverlist_arg { + /* The decoding callback is invoked once per server in serverlist. Remember + * which index of the serverlist are we currently decoding */ + size_t decoding_idx; + /* The decoded serverlist */ + xds_grpclb_serverlist* serverlist; +} decode_serverlist_arg; + +/* invoked once for every Server in ServerList */ +static bool decode_serverlist(pb_istream_t* stream, const pb_field_t* field, + void** arg) { + decode_serverlist_arg* dec_arg = static_cast<decode_serverlist_arg*>(*arg); + GPR_ASSERT(dec_arg->serverlist->num_servers >= dec_arg->decoding_idx); + xds_grpclb_server* server = + static_cast<xds_grpclb_server*>(gpr_zalloc(sizeof(xds_grpclb_server))); + if (GPR_UNLIKELY(!pb_decode(stream, grpc_lb_v1_Server_fields, server))) { + gpr_free(server); + gpr_log(GPR_ERROR, "nanopb error: %s", PB_GET_ERROR(stream)); + return false; + } + dec_arg->serverlist->servers[dec_arg->decoding_idx++] = server; + return true; +} + +xds_grpclb_request* xds_grpclb_request_create(const char* lb_service_name) { + xds_grpclb_request* req = + static_cast<xds_grpclb_request*>(gpr_malloc(sizeof(xds_grpclb_request))); + req->has_client_stats = false; + req->has_initial_request = true; + req->initial_request.has_name = true; + strncpy(req->initial_request.name, lb_service_name, + XDS_SERVICE_NAME_MAX_LENGTH); + return req; +} + +static void populate_timestamp(gpr_timespec timestamp, + xds_grpclb_timestamp* timestamp_pb) { + timestamp_pb->has_seconds = true; + timestamp_pb->seconds = timestamp.tv_sec; + timestamp_pb->has_nanos = true; + timestamp_pb->nanos = timestamp.tv_nsec; +} + +static bool encode_string(pb_ostream_t* stream, const pb_field_t* field, + void* const* arg) { + char* str = static_cast<char*>(*arg); + if (!pb_encode_tag_for_field(stream, field)) return false; + return pb_encode_string(stream, reinterpret_cast<uint8_t*>(str), strlen(str)); +} + +static bool encode_drops(pb_ostream_t* stream, const pb_field_t* field, + void* const* arg) { + grpc_core::XdsLbClientStats::DroppedCallCounts* drop_entries = + static_cast<grpc_core::XdsLbClientStats::DroppedCallCounts*>(*arg); + if (drop_entries == nullptr) return true; + for (size_t i = 0; i < drop_entries->size(); ++i) { + if (!pb_encode_tag_for_field(stream, field)) return false; + grpc_lb_v1_ClientStatsPerToken drop_message; + drop_message.load_balance_token.funcs.encode = encode_string; + drop_message.load_balance_token.arg = (*drop_entries)[i].token.get(); + drop_message.has_num_calls = true; + drop_message.num_calls = (*drop_entries)[i].count; + if (!pb_encode_submessage(stream, grpc_lb_v1_ClientStatsPerToken_fields, + &drop_message)) { + return false; + } + } + return true; +} + +xds_grpclb_request* xds_grpclb_load_report_request_create_locked( + grpc_core::XdsLbClientStats* client_stats) { + xds_grpclb_request* req = + static_cast<xds_grpclb_request*>(gpr_zalloc(sizeof(xds_grpclb_request))); + req->has_client_stats = true; + req->client_stats.has_timestamp = true; + populate_timestamp(gpr_now(GPR_CLOCK_REALTIME), &req->client_stats.timestamp); + req->client_stats.has_num_calls_started = true; + req->client_stats.has_num_calls_finished = true; + req->client_stats.has_num_calls_finished_with_client_failed_to_send = true; + req->client_stats.has_num_calls_finished_with_client_failed_to_send = true; + req->client_stats.has_num_calls_finished_known_received = true; + req->client_stats.calls_finished_with_drop.funcs.encode = encode_drops; + grpc_core::UniquePtr<grpc_core::XdsLbClientStats::DroppedCallCounts> + drop_counts; + client_stats->GetLocked( + &req->client_stats.num_calls_started, + &req->client_stats.num_calls_finished, + &req->client_stats.num_calls_finished_with_client_failed_to_send, + &req->client_stats.num_calls_finished_known_received, &drop_counts); + // Will be deleted in xds_grpclb_request_destroy(). + req->client_stats.calls_finished_with_drop.arg = drop_counts.release(); + return req; +} + +grpc_slice xds_grpclb_request_encode(const xds_grpclb_request* request) { + size_t encoded_length; + pb_ostream_t sizestream; + pb_ostream_t outputstream; + grpc_slice slice; + memset(&sizestream, 0, sizeof(pb_ostream_t)); + pb_encode(&sizestream, grpc_lb_v1_LoadBalanceRequest_fields, request); + encoded_length = sizestream.bytes_written; + + slice = GRPC_SLICE_MALLOC(encoded_length); + outputstream = + pb_ostream_from_buffer(GRPC_SLICE_START_PTR(slice), encoded_length); + GPR_ASSERT(pb_encode(&outputstream, grpc_lb_v1_LoadBalanceRequest_fields, + request) != 0); + return slice; +} + +void xds_grpclb_request_destroy(xds_grpclb_request* request) { + if (request->has_client_stats) { + grpc_core::XdsLbClientStats::DroppedCallCounts* drop_entries = + static_cast<grpc_core::XdsLbClientStats::DroppedCallCounts*>( + request->client_stats.calls_finished_with_drop.arg); + grpc_core::Delete(drop_entries); + } + gpr_free(request); +} + +typedef grpc_lb_v1_LoadBalanceResponse xds_grpclb_response; +xds_grpclb_initial_response* xds_grpclb_initial_response_parse( + grpc_slice encoded_xds_grpclb_response) { + pb_istream_t stream = + pb_istream_from_buffer(GRPC_SLICE_START_PTR(encoded_xds_grpclb_response), + GRPC_SLICE_LENGTH(encoded_xds_grpclb_response)); + xds_grpclb_response res; + memset(&res, 0, sizeof(xds_grpclb_response)); + if (GPR_UNLIKELY( + !pb_decode(&stream, grpc_lb_v1_LoadBalanceResponse_fields, &res))) { + gpr_log(GPR_ERROR, "nanopb error: %s", PB_GET_ERROR(&stream)); + return nullptr; + } + + if (!res.has_initial_response) return nullptr; + + xds_grpclb_initial_response* initial_res = + static_cast<xds_grpclb_initial_response*>( + gpr_malloc(sizeof(xds_grpclb_initial_response))); + memcpy(initial_res, &res.initial_response, + sizeof(xds_grpclb_initial_response)); + + return initial_res; +} + +xds_grpclb_serverlist* xds_grpclb_response_parse_serverlist( + grpc_slice encoded_xds_grpclb_response) { + pb_istream_t stream = + pb_istream_from_buffer(GRPC_SLICE_START_PTR(encoded_xds_grpclb_response), + GRPC_SLICE_LENGTH(encoded_xds_grpclb_response)); + pb_istream_t stream_at_start = stream; + xds_grpclb_serverlist* sl = static_cast<xds_grpclb_serverlist*>( + gpr_zalloc(sizeof(xds_grpclb_serverlist))); + xds_grpclb_response res; + memset(&res, 0, sizeof(xds_grpclb_response)); + // First pass: count number of servers. + res.server_list.servers.funcs.decode = count_serverlist; + res.server_list.servers.arg = sl; + bool status = pb_decode(&stream, grpc_lb_v1_LoadBalanceResponse_fields, &res); + if (GPR_UNLIKELY(!status)) { + gpr_free(sl); + gpr_log(GPR_ERROR, "nanopb error: %s", PB_GET_ERROR(&stream)); + return nullptr; + } + // Second pass: populate servers. + if (sl->num_servers > 0) { + sl->servers = static_cast<xds_grpclb_server**>( + gpr_zalloc(sizeof(xds_grpclb_server*) * sl->num_servers)); + decode_serverlist_arg decode_arg; + memset(&decode_arg, 0, sizeof(decode_arg)); + decode_arg.serverlist = sl; + res.server_list.servers.funcs.decode = decode_serverlist; + res.server_list.servers.arg = &decode_arg; + status = pb_decode(&stream_at_start, grpc_lb_v1_LoadBalanceResponse_fields, + &res); + if (GPR_UNLIKELY(!status)) { + xds_grpclb_destroy_serverlist(sl); + gpr_log(GPR_ERROR, "nanopb error: %s", PB_GET_ERROR(&stream)); + return nullptr; + } + } + return sl; +} + +void xds_grpclb_destroy_serverlist(xds_grpclb_serverlist* serverlist) { + if (serverlist == nullptr) { + return; + } + for (size_t i = 0; i < serverlist->num_servers; i++) { + gpr_free(serverlist->servers[i]); + } + gpr_free(serverlist->servers); + gpr_free(serverlist); +} + +xds_grpclb_serverlist* xds_grpclb_serverlist_copy( + const xds_grpclb_serverlist* sl) { + xds_grpclb_serverlist* copy = static_cast<xds_grpclb_serverlist*>( + gpr_zalloc(sizeof(xds_grpclb_serverlist))); + copy->num_servers = sl->num_servers; + copy->servers = static_cast<xds_grpclb_server**>( + gpr_malloc(sizeof(xds_grpclb_server*) * sl->num_servers)); + for (size_t i = 0; i < sl->num_servers; i++) { + copy->servers[i] = + static_cast<xds_grpclb_server*>(gpr_malloc(sizeof(xds_grpclb_server))); + memcpy(copy->servers[i], sl->servers[i], sizeof(xds_grpclb_server)); + } + return copy; +} + +bool xds_grpclb_serverlist_equals(const xds_grpclb_serverlist* lhs, + const xds_grpclb_serverlist* rhs) { + if (lhs == nullptr || rhs == nullptr) { + return false; + } + if (lhs->num_servers != rhs->num_servers) { + return false; + } + for (size_t i = 0; i < lhs->num_servers; i++) { + if (!xds_grpclb_server_equals(lhs->servers[i], rhs->servers[i])) { + return false; + } + } + return true; +} + +bool xds_grpclb_server_equals(const xds_grpclb_server* lhs, + const xds_grpclb_server* rhs) { + return memcmp(lhs, rhs, sizeof(xds_grpclb_server)) == 0; +} + +int xds_grpclb_duration_compare(const xds_grpclb_duration* lhs, + const xds_grpclb_duration* rhs) { + GPR_ASSERT(lhs && rhs); + if (lhs->has_seconds && rhs->has_seconds) { + if (lhs->seconds < rhs->seconds) return -1; + if (lhs->seconds > rhs->seconds) return 1; + } else if (lhs->has_seconds) { + return 1; + } else if (rhs->has_seconds) { + return -1; + } + + GPR_ASSERT(lhs->seconds == rhs->seconds); + if (lhs->has_nanos && rhs->has_nanos) { + if (lhs->nanos < rhs->nanos) return -1; + if (lhs->nanos > rhs->nanos) return 1; + } else if (lhs->has_nanos) { + return 1; + } else if (rhs->has_nanos) { + return -1; + } + + return 0; +} + +grpc_millis xds_grpclb_duration_to_millis(xds_grpclb_duration* duration_pb) { + return static_cast<grpc_millis>( + (duration_pb->has_seconds ? duration_pb->seconds : 0) * GPR_MS_PER_SEC + + (duration_pb->has_nanos ? duration_pb->nanos : 0) / GPR_NS_PER_MS); +} + +void xds_grpclb_initial_response_destroy( + xds_grpclb_initial_response* response) { + gpr_free(response); +} diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_load_balancer_api.h b/src/core/ext/filters/client_channel/lb_policy/xds/xds_load_balancer_api.h new file mode 100644 index 0000000000..9d08defa7e --- /dev/null +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_load_balancer_api.h @@ -0,0 +1,89 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#ifndef GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_XDS_LOAD_BALANCER_API_H +#define GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_XDS_LOAD_BALANCER_API_H + +#include <grpc/support/port_platform.h> + +#include <grpc/slice_buffer.h> + +#include "src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.h" +#include "src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.h" +#include "src/core/ext/filters/client_channel/lb_policy_factory.h" + +#define XDS_SERVICE_NAME_MAX_LENGTH 128 + +typedef grpc_lb_v1_Server_ip_address_t xds_grpclb_ip_address; +typedef grpc_lb_v1_LoadBalanceRequest xds_grpclb_request; +typedef grpc_lb_v1_InitialLoadBalanceResponse xds_grpclb_initial_response; +typedef grpc_lb_v1_Server xds_grpclb_server; +typedef google_protobuf_Duration xds_grpclb_duration; +typedef google_protobuf_Timestamp xds_grpclb_timestamp; + +typedef struct { + xds_grpclb_server** servers; + size_t num_servers; +} xds_grpclb_serverlist; + +/** Create a request for a gRPC LB service under \a lb_service_name */ +xds_grpclb_request* xds_grpclb_request_create(const char* lb_service_name); +xds_grpclb_request* xds_grpclb_load_report_request_create_locked( + grpc_core::XdsLbClientStats* client_stats); + +/** Protocol Buffers v3-encode \a request */ +grpc_slice xds_grpclb_request_encode(const xds_grpclb_request* request); + +/** Destroy \a request */ +void xds_grpclb_request_destroy(xds_grpclb_request* request); + +/** Parse (ie, decode) the bytes in \a encoded_xds_grpclb_response as a \a + * xds_grpclb_initial_response */ +xds_grpclb_initial_response* xds_grpclb_initial_response_parse( + grpc_slice encoded_xds_grpclb_response); + +/** Parse the list of servers from an encoded \a xds_grpclb_response */ +xds_grpclb_serverlist* xds_grpclb_response_parse_serverlist( + grpc_slice encoded_xds_grpclb_response); + +/** Return a copy of \a sl. The caller is responsible for calling \a + * xds_grpclb_destroy_serverlist on the returned copy. */ +xds_grpclb_serverlist* xds_grpclb_serverlist_copy( + const xds_grpclb_serverlist* sl); + +bool xds_grpclb_serverlist_equals(const xds_grpclb_serverlist* lhs, + const xds_grpclb_serverlist* rhs); + +bool xds_grpclb_server_equals(const xds_grpclb_server* lhs, + const xds_grpclb_server* rhs); + +/** Destroy \a serverlist */ +void xds_grpclb_destroy_serverlist(xds_grpclb_serverlist* serverlist); + +/** Compare \a lhs against \a rhs and return 0 if \a lhs and \a rhs are equal, + * < 0 if \a lhs represents a duration shorter than \a rhs and > 0 otherwise */ +int xds_grpclb_duration_compare(const xds_grpclb_duration* lhs, + const xds_grpclb_duration* rhs); + +grpc_millis xds_grpclb_duration_to_millis(xds_grpclb_duration* duration_pb); + +/** Destroy \a initial_response */ +void xds_grpclb_initial_response_destroy(xds_grpclb_initial_response* response); + +#endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_XDS_XDS_LOAD_BALANCER_API_H \ + */ diff --git a/src/core/ext/filters/client_channel/lb_policy_factory.h b/src/core/ext/filters/client_channel/lb_policy_factory.h index c07792d8a7..a59deadb26 100644 --- a/src/core/ext/filters/client_channel/lb_policy_factory.h +++ b/src/core/ext/filters/client_channel/lb_policy_factory.h @@ -25,7 +25,7 @@ #include "src/core/ext/filters/client_channel/client_channel_factory.h" #include "src/core/ext/filters/client_channel/lb_policy.h" -#include "src/core/ext/filters/client_channel/uri_parser.h" +#include "src/core/lib/uri/uri_parser.h" // // representation of an LB address @@ -70,16 +70,14 @@ grpc_lb_addresses* grpc_lb_addresses_create( grpc_lb_addresses* grpc_lb_addresses_copy(const grpc_lb_addresses* addresses); /** Sets the value of the address at index \a index of \a addresses. - * \a address is a socket address of length \a address_len. - * Takes ownership of \a balancer_name. */ + * \a address is a socket address of length \a address_len. */ void grpc_lb_addresses_set_address(grpc_lb_addresses* addresses, size_t index, const void* address, size_t address_len, bool is_balancer, const char* balancer_name, void* user_data); /** Sets the value of the address at index \a index of \a addresses from \a uri. - * Returns true upon success, false otherwise. Takes ownership of \a - * balancer_name. */ + * Returns true upon success, false otherwise. */ bool grpc_lb_addresses_set_address_from_uri(grpc_lb_addresses* addresses, size_t index, const grpc_uri* uri, bool is_balancer, diff --git a/src/core/ext/filters/client_channel/parse_address.cc b/src/core/ext/filters/client_channel/parse_address.cc index b3900114ad..707beb8876 100644 --- a/src/core/ext/filters/client_channel/parse_address.cc +++ b/src/core/ext/filters/client_channel/parse_address.cc @@ -125,27 +125,41 @@ bool grpc_parse_ipv6_hostport(const char* hostport, grpc_resolved_address* addr, char* host_end = static_cast<char*>(gpr_memrchr(host, '%', strlen(host))); if (host_end != nullptr) { GPR_ASSERT(host_end >= host); - char host_without_scope[GRPC_INET6_ADDRSTRLEN]; + char host_without_scope[GRPC_INET6_ADDRSTRLEN + 1]; size_t host_without_scope_len = static_cast<size_t>(host_end - host); uint32_t sin6_scope_id = 0; + if (host_without_scope_len > GRPC_INET6_ADDRSTRLEN) { + if (log_errors) { + gpr_log( + GPR_ERROR, + "invalid ipv6 address length %zu. Length cannot be greater than " + "GRPC_INET6_ADDRSTRLEN i.e %d)", + host_without_scope_len, GRPC_INET6_ADDRSTRLEN); + } + goto done; + } strncpy(host_without_scope, host, host_without_scope_len); host_without_scope[host_without_scope_len] = '\0'; if (grpc_inet_pton(GRPC_AF_INET6, host_without_scope, &in6->sin6_addr) == 0) { - gpr_log(GPR_ERROR, "invalid ipv6 address: '%s'", host_without_scope); + if (log_errors) { + gpr_log(GPR_ERROR, "invalid ipv6 address: '%s'", host_without_scope); + } goto done; } if (gpr_parse_bytes_to_uint32(host_end + 1, strlen(host) - host_without_scope_len - 1, &sin6_scope_id) == 0) { - gpr_log(GPR_ERROR, "invalid ipv6 scope id: '%s'", host_end + 1); + if (log_errors) { + gpr_log(GPR_ERROR, "invalid ipv6 scope id: '%s'", host_end + 1); + } goto done; } // Handle "sin6_scope_id" being type "u_long". See grpc issue #10027. in6->sin6_scope_id = sin6_scope_id; } else { if (grpc_inet_pton(GRPC_AF_INET6, host, &in6->sin6_addr) == 0) { - gpr_log(GPR_ERROR, "invalid ipv6 address: '%s'", host); + if (log_errors) gpr_log(GPR_ERROR, "invalid ipv6 address: '%s'", host); goto done; } } @@ -190,3 +204,12 @@ bool grpc_parse_uri(const grpc_uri* uri, grpc_resolved_address* resolved_addr) { gpr_log(GPR_ERROR, "Can't parse scheme '%s'", uri->scheme); return false; } + +uint16_t grpc_strhtons(const char* port) { + if (strcmp(port, "http") == 0) { + return htons(80); + } else if (strcmp(port, "https") == 0) { + return htons(443); + } + return htons(static_cast<unsigned short>(atoi(port))); +} diff --git a/src/core/ext/filters/client_channel/parse_address.h b/src/core/ext/filters/client_channel/parse_address.h index 9a88b66edc..5c050a2333 100644 --- a/src/core/ext/filters/client_channel/parse_address.h +++ b/src/core/ext/filters/client_channel/parse_address.h @@ -23,8 +23,8 @@ #include <stddef.h> -#include "src/core/ext/filters/client_channel/uri_parser.h" #include "src/core/lib/iomgr/resolve_address.h" +#include "src/core/lib/uri/uri_parser.h" /** Populate \a resolved_addr from \a uri, whose path is expected to contain a * unix socket path. Returns true upon success. */ @@ -47,4 +47,7 @@ bool grpc_parse_ipv4_hostport(const char* hostport, grpc_resolved_address* addr, bool grpc_parse_ipv6_hostport(const char* hostport, grpc_resolved_address* addr, bool log_errors); +/* Converts named or numeric port to a uint16 suitable for use in a sockaddr. */ +uint16_t grpc_strhtons(const char* port); + #endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_PARSE_ADDRESS_H */ diff --git a/src/core/ext/filters/client_channel/resolver.h b/src/core/ext/filters/client_channel/resolver.h index 48f2e89095..e9acbb7c41 100644 --- a/src/core/ext/filters/client_channel/resolver.h +++ b/src/core/ext/filters/client_channel/resolver.h @@ -81,18 +81,7 @@ class Resolver : public InternallyRefCountedWithTracing<Resolver> { /// /// If this causes new data to become available, then the currently /// pending call to \a NextLocked() will return the new result. - /// - /// Note: Currently, all resolvers are required to return a new result - /// shortly after this method is called. For pull-based mechanisms, if - /// the implementation decides to delay querying the name service, it - /// should immediately return a new copy of the previously returned - /// result (and it can then return the updated data later, when it - /// actually does query the name service). For push-based mechanisms, - /// the implementation should immediately return a new copy of the - /// last-seen result. - /// TODO(roth): Remove this requirement once we fix pick_first to not - /// throw away unselected subchannels. - virtual void RequestReresolutionLocked() GRPC_ABSTRACT; + virtual void RequestReresolutionLocked() {} /// Resets the re-resolution backoff, if any. /// This needs to be implemented only by pull-based implementations; diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc b/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc index f2bb5f3c71..9562a3f893 100644 --- a/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc +++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc @@ -120,6 +120,8 @@ class AresDnsResolver : public Resolver { grpc_lb_addresses* lb_addresses_ = nullptr; /// currently resolving service config char* service_config_json_ = nullptr; + // has shutdown been initiated + bool shutdown_initiated_ = false; }; AresDnsResolver::AresDnsResolver(const ResolverArgs& args) @@ -197,11 +199,12 @@ void AresDnsResolver::ResetBackoffLocked() { } void AresDnsResolver::ShutdownLocked() { + shutdown_initiated_ = true; if (have_next_resolution_timer_) { grpc_timer_cancel(&next_resolution_timer_); } if (pending_request_ != nullptr) { - grpc_cancel_ares_request(pending_request_); + grpc_cancel_ares_request_locked(pending_request_); } if (next_completion_ != nullptr) { *target_result_ = nullptr; @@ -213,9 +216,13 @@ void AresDnsResolver::ShutdownLocked() { void AresDnsResolver::OnNextResolutionLocked(void* arg, grpc_error* error) { AresDnsResolver* r = static_cast<AresDnsResolver*>(arg); + GRPC_CARES_TRACE_LOG( + "%p re-resolution timer fired. error: %s. shutdown_initiated_: %d", r, + grpc_error_string(error), r->shutdown_initiated_); r->have_next_resolution_timer_ = false; - if (error == GRPC_ERROR_NONE) { + if (error == GRPC_ERROR_NONE && !r->shutdown_initiated_) { if (!r->resolving_) { + GRPC_CARES_TRACE_LOG("%p start resolving due to re-resolution timer", r); r->StartResolvingLocked(); } } @@ -298,6 +305,7 @@ void AresDnsResolver::OnResolvedLocked(void* arg, grpc_error* error) { grpc_channel_args* result = nullptr; GPR_ASSERT(r->resolving_); r->resolving_ = false; + gpr_free(r->pending_request_); r->pending_request_ = nullptr; if (r->lb_addresses_ != nullptr) { static const char* args_to_remove[2]; @@ -339,7 +347,7 @@ void AresDnsResolver::OnResolvedLocked(void* arg, grpc_error* error) { // Reset backoff state so that we start from the beginning when the // next request gets triggered. r->backoff_.Reset(); - } else { + } else if (!r->shutdown_initiated_) { const char* msg = grpc_error_string(error); gpr_log(GPR_DEBUG, "dns resolution failed: %s", msg); grpc_millis next_try = r->backoff_.NextAttemptTime(); @@ -373,13 +381,7 @@ void AresDnsResolver::OnResolvedLocked(void* arg, grpc_error* error) { void AresDnsResolver::MaybeStartResolvingLocked() { // If there is an existing timer, the time it fires is the earliest time we // can start the next resolution. - if (have_next_resolution_timer_) { - // TODO(dgq): remove the following two lines once Pick First stops - // discarding subchannels after selecting. - ++resolved_version_; - MaybeFinishNextLocked(); - return; - } + if (have_next_resolution_timer_) return; if (last_resolution_timestamp_ >= 0) { const grpc_millis earliest_next_resolution = last_resolution_timestamp_ + min_time_between_resolutions_; @@ -401,10 +403,6 @@ void AresDnsResolver::MaybeStartResolvingLocked() { self.release(); grpc_timer_init(&next_resolution_timer_, ms_until_next_resolution, &on_next_resolution_); - // TODO(dgq): remove the following two lines once Pick First stops - // discarding subchannels after selecting. - ++resolved_version_; - MaybeFinishNextLocked(); return; } } @@ -483,7 +481,9 @@ void grpc_resolver_dns_ares_init() { GRPC_LOG_IF_ERROR("ares_library_init() failed", error); return; } - default_resolver = grpc_resolve_address_impl; + if (default_resolver == nullptr) { + default_resolver = grpc_resolve_address_impl; + } grpc_set_resolver_impl(&ares_resolver); grpc_core::ResolverRegistry::Builder::RegisterResolverFactory( grpc_core::UniquePtr<grpc_core::ResolverFactory>( diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc index 485998f5e4..582e2203fc 100644 --- a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc +++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc @@ -87,15 +87,6 @@ typedef struct grpc_ares_hostbyname_request { static void do_basic_init(void) { gpr_mu_init(&g_init_mu); } -static uint16_t strhtons(const char* port) { - if (strcmp(port, "http") == 0) { - return htons(80); - } else if (strcmp(port, "https") == 0) { - return htons(443); - } - return htons(static_cast<unsigned short>(atoi(port))); -} - static void log_address_sorting_list(grpc_lb_addresses* lb_addrs, const char* input_output_str) { for (size_t i = 0; i < lb_addrs->num_addresses; i++) { @@ -139,12 +130,6 @@ void grpc_cares_wrapper_address_sorting_sort(grpc_lb_addresses* lb_addrs) { } } -/* Allow tests to access grpc_ares_wrapper_address_sorting_sort */ -void grpc_cares_wrapper_test_only_address_sorting_sort( - grpc_lb_addresses* lb_addrs) { - grpc_cares_wrapper_address_sorting_sort(lb_addrs); -} - static void grpc_ares_request_ref_locked(grpc_ares_request* r) { r->pending_queries++; } @@ -159,12 +144,12 @@ static void grpc_ares_request_unref_locked(grpc_ares_request* r) { void grpc_ares_complete_request_locked(grpc_ares_request* r) { /* Invoke on_done callback and destroy the request */ + r->ev_driver = nullptr; grpc_lb_addresses* lb_addrs = *(r->lb_addrs_out); if (lb_addrs != nullptr) { grpc_cares_wrapper_address_sorting_sort(lb_addrs); } GRPC_CLOSURE_SCHED(r->on_done, r->error); - gpr_free(r); } static grpc_ares_hostbyname_request* create_hostbyname_request_locked( @@ -371,14 +356,12 @@ done: grpc_ares_request_unref_locked(r); } -static grpc_ares_request* grpc_dns_lookup_ares_locked_impl( - const char* dns_server, const char* name, const char* default_port, - grpc_pollset_set* interested_parties, grpc_closure* on_done, - grpc_lb_addresses** addrs, bool check_grpclb, char** service_config_json, - grpc_combiner* combiner) { +void grpc_dns_lookup_ares_continue_after_check_localhost_and_ip_literals_locked( + grpc_ares_request* r, const char* dns_server, const char* name, + const char* default_port, grpc_pollset_set* interested_parties, + bool check_grpclb, grpc_combiner* combiner) { grpc_error* error = GRPC_ERROR_NONE; grpc_ares_hostbyname_request* hr = nullptr; - grpc_ares_request* r = nullptr; ares_channel* channel = nullptr; /* TODO(zyc): Enable tracing after #9603 is checked in */ /* if (grpc_dns_trace) { @@ -404,14 +387,6 @@ static grpc_ares_request* grpc_dns_lookup_ares_locked_impl( } port = gpr_strdup(default_port); } - r = static_cast<grpc_ares_request*>(gpr_zalloc(sizeof(grpc_ares_request))); - r->ev_driver = nullptr; - r->on_done = on_done; - r->lb_addrs_out = addrs; - r->service_config_json_out = service_config_json; - r->success = false; - r->error = GRPC_ERROR_NONE; - r->pending_queries = 0; error = grpc_ares_ev_driver_create_locked(&r->ev_driver, interested_parties, combiner, r); if (error != GRPC_ERROR_NONE) goto error_cleanup; @@ -454,12 +429,12 @@ static grpc_ares_request* grpc_dns_lookup_ares_locked_impl( } r->pending_queries = 1; if (grpc_ares_query_ipv6()) { - hr = create_hostbyname_request_locked(r, host, strhtons(port), + hr = create_hostbyname_request_locked(r, host, grpc_strhtons(port), false /* is_balancer */); ares_gethostbyname(*channel, hr->host, AF_INET6, on_hostbyname_done_locked, hr); } - hr = create_hostbyname_request_locked(r, host, strhtons(port), + hr = create_hostbyname_request_locked(r, host, grpc_strhtons(port), false /* is_balancer */); ares_gethostbyname(*channel, hr->host, AF_INET, on_hostbyname_done_locked, hr); @@ -472,7 +447,7 @@ static grpc_ares_request* grpc_dns_lookup_ares_locked_impl( on_srv_query_done_locked, r); gpr_free(service_name); } - if (service_config_json != nullptr) { + if (r->service_config_json_out != nullptr) { grpc_ares_request_ref_locked(r); char* config_name; gpr_asprintf(&config_name, "_grpc_config.%s", host); @@ -484,14 +459,95 @@ static grpc_ares_request* grpc_dns_lookup_ares_locked_impl( grpc_ares_request_unref_locked(r); gpr_free(host); gpr_free(port); - return r; + return; error_cleanup: - GRPC_CLOSURE_SCHED(on_done, error); - gpr_free(r); + GRPC_CLOSURE_SCHED(r->on_done, error); + gpr_free(host); + gpr_free(port); +} + +static bool inner_resolve_as_ip_literal_locked(const char* name, + const char* default_port, + grpc_lb_addresses** addrs, + char** host, char** port, + char** hostport) { + gpr_split_host_port(name, host, port); + if (*host == nullptr) { + gpr_log(GPR_ERROR, + "Failed to parse %s to host:port while attempting to resolve as ip " + "literal.", + name); + return false; + } + if (*port == nullptr) { + if (default_port == nullptr) { + gpr_log(GPR_ERROR, + "No port or default port for %s while attempting to resolve as " + "ip literal.", + name); + return false; + } + *port = gpr_strdup(default_port); + } + grpc_resolved_address addr; + GPR_ASSERT(gpr_join_host_port(hostport, *host, atoi(*port))); + if (grpc_parse_ipv4_hostport(*hostport, &addr, false /* log errors */) || + grpc_parse_ipv6_hostport(*hostport, &addr, false /* log errors */)) { + GPR_ASSERT(*addrs == nullptr); + *addrs = grpc_lb_addresses_create(1, nullptr); + grpc_lb_addresses_set_address( + *addrs, 0, addr.addr, addr.len, false /* is_balancer */, + nullptr /* balancer_name */, nullptr /* user_data */); + return true; + } + return false; +} + +static bool resolve_as_ip_literal_locked(const char* name, + const char* default_port, + grpc_lb_addresses** addrs) { + char* host = nullptr; + char* port = nullptr; + char* hostport = nullptr; + bool out = inner_resolve_as_ip_literal_locked(name, default_port, addrs, + &host, &port, &hostport); gpr_free(host); gpr_free(port); - return nullptr; + gpr_free(hostport); + return out; +} + +static grpc_ares_request* grpc_dns_lookup_ares_locked_impl( + const char* dns_server, const char* name, const char* default_port, + grpc_pollset_set* interested_parties, grpc_closure* on_done, + grpc_lb_addresses** addrs, bool check_grpclb, char** service_config_json, + grpc_combiner* combiner) { + grpc_ares_request* r = + static_cast<grpc_ares_request*>(gpr_zalloc(sizeof(grpc_ares_request))); + r->ev_driver = nullptr; + r->on_done = on_done; + r->lb_addrs_out = addrs; + r->service_config_json_out = service_config_json; + r->success = false; + r->error = GRPC_ERROR_NONE; + r->pending_queries = 0; + // Early out if the target is an ipv4 or ipv6 literal. + if (resolve_as_ip_literal_locked(name, default_port, addrs)) { + GRPC_CLOSURE_SCHED(on_done, GRPC_ERROR_NONE); + return r; + } + // Early out if the target is localhost and we're on Windows. + if (grpc_ares_maybe_resolve_localhost_manually_locked(name, default_port, + addrs)) { + GRPC_CLOSURE_SCHED(on_done, GRPC_ERROR_NONE); + return r; + } + // Look up name using c-ares lib. + grpc_dns_lookup_ares_continue_after_check_localhost_and_ip_literals_locked( + r, dns_server, name, default_port, interested_parties, check_grpclb, + combiner); + return r; } grpc_ares_request* (*grpc_dns_lookup_ares_locked)( @@ -500,12 +556,16 @@ grpc_ares_request* (*grpc_dns_lookup_ares_locked)( grpc_lb_addresses** addrs, bool check_grpclb, char** service_config_json, grpc_combiner* combiner) = grpc_dns_lookup_ares_locked_impl; -void grpc_cancel_ares_request(grpc_ares_request* r) { - if (grpc_dns_lookup_ares_locked == grpc_dns_lookup_ares_locked_impl) { +static void grpc_cancel_ares_request_locked_impl(grpc_ares_request* r) { + GPR_ASSERT(r != nullptr); + if (r->ev_driver != nullptr) { grpc_ares_ev_driver_shutdown_locked(r->ev_driver); } } +void (*grpc_cancel_ares_request_locked)(grpc_ares_request* r) = + grpc_cancel_ares_request_locked_impl; + grpc_error* grpc_ares_init(void) { gpr_once_init(&g_basic_init, do_basic_init); gpr_mu_lock(&g_init_mu); @@ -542,20 +602,23 @@ typedef struct grpc_resolve_address_ares_request { grpc_lb_addresses* lb_addrs; /** closure to call when the resolve_address_ares request completes */ grpc_closure* on_resolve_address_done; - /** a closure wrapping on_dns_lookup_done_cb, which should be invoked when the - grpc_dns_lookup_ares_locked operation is done. */ - grpc_closure on_dns_lookup_done; + /** a closure wrapping on_resolve_address_done, which should be invoked when + the grpc_dns_lookup_ares_locked operation is done. */ + grpc_closure on_dns_lookup_done_locked; /* target name */ const char* name; /* default port to use if none is specified */ const char* default_port; /* pollset_set to be driven by */ grpc_pollset_set* interested_parties; + /* underlying ares_request that the query is performed on */ + grpc_ares_request* ares_request; } grpc_resolve_address_ares_request; -static void on_dns_lookup_done_cb(void* arg, grpc_error* error) { +static void on_dns_lookup_done_locked(void* arg, grpc_error* error) { grpc_resolve_address_ares_request* r = static_cast<grpc_resolve_address_ares_request*>(arg); + gpr_free(r->ares_request); grpc_resolved_addresses** resolved_addresses = r->addrs_out; if (r->lb_addrs == nullptr || r->lb_addrs->num_addresses == 0) { *resolved_addresses = nullptr; @@ -582,9 +645,9 @@ static void grpc_resolve_address_invoke_dns_lookup_ares_locked( void* arg, grpc_error* unused_error) { grpc_resolve_address_ares_request* r = static_cast<grpc_resolve_address_ares_request*>(arg); - grpc_dns_lookup_ares_locked( + r->ares_request = grpc_dns_lookup_ares_locked( nullptr /* dns_server */, r->name, r->default_port, r->interested_parties, - &r->on_dns_lookup_done, &r->lb_addrs, false /* check_grpclb */, + &r->on_dns_lookup_done_locked, &r->lb_addrs, false /* check_grpclb */, nullptr /* service_config_json */, r->combiner); } @@ -599,8 +662,8 @@ static void grpc_resolve_address_ares_impl(const char* name, r->combiner = grpc_combiner_create(); r->addrs_out = addrs; r->on_resolve_address_done = on_done; - GRPC_CLOSURE_INIT(&r->on_dns_lookup_done, on_dns_lookup_done_cb, r, - grpc_schedule_on_exec_ctx); + GRPC_CLOSURE_INIT(&r->on_dns_lookup_done_locked, on_dns_lookup_done_locked, r, + grpc_combiner_scheduler(r->combiner)); r->name = name; r->default_port = default_port; r->interested_parties = interested_parties; diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h index ca5779e1d7..a1231cc4e0 100644 --- a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h +++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h @@ -54,7 +54,8 @@ extern void (*grpc_resolve_address_ares)(const char* name, port in \a name. grpc_ares_init() must be called at least once before this function. \a on_done may be called directly in this function without being scheduled with \a exec_ctx, so it must not try to acquire locks that are - being held by the caller. */ + being held by the caller. The returned grpc_ares_request object is owned + by the caller and it is safe to free after on_done is called back. */ extern grpc_ares_request* (*grpc_dns_lookup_ares_locked)( const char* dns_server, const char* name, const char* default_port, grpc_pollset_set* interested_parties, grpc_closure* on_done, @@ -62,7 +63,7 @@ extern grpc_ares_request* (*grpc_dns_lookup_ares_locked)( char** service_config_json, grpc_combiner* combiner); /* Cancel the pending grpc_ares_request \a request */ -void grpc_cancel_ares_request(grpc_ares_request* request); +extern void (*grpc_cancel_ares_request_locked)(grpc_ares_request* request); /* Initialize gRPC ares wrapper. Must be called at least once before grpc_resolve_address_ares(). */ @@ -81,9 +82,15 @@ void grpc_ares_complete_request_locked(grpc_ares_request* request); /* E.g., return false if ipv6 is known to not be available. */ bool grpc_ares_query_ipv6(); -/* Exposed only for testing */ -void grpc_cares_wrapper_test_only_address_sorting_sort( - grpc_lb_addresses* lb_addrs); +/* Maybe (depending on the current platform) checks if "name" matches + * "localhost" and if so fills in addrs with the correct sockaddr structures. + * Returns a bool indicating whether or not such an action was performed. + * See https://github.com/grpc/grpc/issues/15158. */ +bool grpc_ares_maybe_resolve_localhost_manually_locked( + const char* name, const char* default_port, grpc_lb_addresses** addrs); + +/* Sorts destinations in lb_addrs according to RFC 6724. */ +void grpc_cares_wrapper_address_sorting_sort(grpc_lb_addresses* lb_addrs); #endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_RESOLVER_DNS_C_ARES_GRPC_ARES_WRAPPER_H \ */ diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_fallback.cc b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_fallback.cc index d6a76fc8b6..9f293c1ac0 100644 --- a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_fallback.cc +++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_fallback.cc @@ -40,7 +40,10 @@ grpc_ares_request* (*grpc_dns_lookup_ares_locked)( grpc_lb_addresses** addrs, bool check_grpclb, char** service_config_json, grpc_combiner* combiner) = grpc_dns_lookup_ares_locked_impl; -void grpc_cancel_ares_request(grpc_ares_request* r) {} +static void grpc_cancel_ares_request_locked_impl(grpc_ares_request* r) {} + +void (*grpc_cancel_ares_request_locked)(grpc_ares_request* r) = + grpc_cancel_ares_request_locked_impl; grpc_error* grpc_ares_init(void) { return GRPC_ERROR_NONE; } diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_posix.cc b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_posix.cc index 23c0fec74f..639eec2323 100644 --- a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_posix.cc +++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_posix.cc @@ -26,4 +26,9 @@ bool grpc_ares_query_ipv6() { return grpc_ipv6_loopback_available(); } +bool grpc_ares_maybe_resolve_localhost_manually_locked( + const char* name, const char* default_port, grpc_lb_addresses** addrs) { + return false; +} + #endif /* GRPC_ARES == 1 && defined(GRPC_POSIX_SOCKET_ARES_EV_DRIVER) */ diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_windows.cc b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_windows.cc index ee827e284e..7e34784691 100644 --- a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_windows.cc +++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_windows.cc @@ -21,9 +21,79 @@ #include "src/core/lib/iomgr/port.h" #if GRPC_ARES == 1 && defined(GPR_WINDOWS) +#include <grpc/support/string_util.h> + +#include "src/core/ext/filters/client_channel/lb_policy_factory.h" +#include "src/core/ext/filters/client_channel/parse_address.h" #include "src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h" +#include "src/core/lib/gpr/host_port.h" +#include "src/core/lib/gpr/string.h" #include "src/core/lib/iomgr/socket_windows.h" bool grpc_ares_query_ipv6() { return grpc_ipv6_loopback_available(); } +static bool inner_maybe_resolve_localhost_manually_locked( + const char* name, const char* default_port, grpc_lb_addresses** addrs, + char** host, char** port) { + gpr_split_host_port(name, host, port); + if (*host == nullptr) { + gpr_log(GPR_ERROR, + "Failed to parse %s into host:port during Windows localhost " + "resolution check.", + name); + return false; + } + if (*port == nullptr) { + if (default_port == nullptr) { + gpr_log(GPR_ERROR, + "No port or default port for %s during Windows localhost " + "resolution check.", + name); + return false; + } + *port = gpr_strdup(default_port); + } + if (gpr_stricmp(*host, "localhost") == 0) { + GPR_ASSERT(*addrs == nullptr); + *addrs = grpc_lb_addresses_create(2, nullptr); + uint16_t numeric_port = grpc_strhtons(*port); + // Append the ipv6 loopback address. + struct sockaddr_in6 ipv6_loopback_addr; + memset(&ipv6_loopback_addr, 0, sizeof(ipv6_loopback_addr)); + ((char*)&ipv6_loopback_addr.sin6_addr)[15] = 1; + ipv6_loopback_addr.sin6_family = AF_INET6; + ipv6_loopback_addr.sin6_port = numeric_port; + grpc_lb_addresses_set_address( + *addrs, 0, &ipv6_loopback_addr, sizeof(ipv6_loopback_addr), + false /* is_balancer */, nullptr /* balancer_name */, + nullptr /* user_data */); + // Append the ipv4 loopback address. + struct sockaddr_in ipv4_loopback_addr; + memset(&ipv4_loopback_addr, 0, sizeof(ipv4_loopback_addr)); + ((char*)&ipv4_loopback_addr.sin_addr)[0] = 0x7f; + ((char*)&ipv4_loopback_addr.sin_addr)[3] = 0x01; + ipv4_loopback_addr.sin_family = AF_INET; + ipv4_loopback_addr.sin_port = numeric_port; + grpc_lb_addresses_set_address( + *addrs, 1, &ipv4_loopback_addr, sizeof(ipv4_loopback_addr), + false /* is_balancer */, nullptr /* balancer_name */, + nullptr /* user_data */); + // Let the address sorter figure out which one should be tried first. + grpc_cares_wrapper_address_sorting_sort(*addrs); + return true; + } + return false; +} + +bool grpc_ares_maybe_resolve_localhost_manually_locked( + const char* name, const char* default_port, grpc_lb_addresses** addrs) { + char* host = nullptr; + char* port = nullptr; + bool out = inner_maybe_resolve_localhost_manually_locked(name, default_port, + addrs, &host, &port); + gpr_free(host); + gpr_free(port); + return out; +} + #endif /* GRPC_ARES == 1 && defined(GPR_WINDOWS) */ diff --git a/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc b/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc index 282caf215c..65ff1ec1a5 100644 --- a/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc +++ b/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc @@ -247,13 +247,7 @@ void NativeDnsResolver::OnResolvedLocked(void* arg, grpc_error* error) { void NativeDnsResolver::MaybeStartResolvingLocked() { // If there is an existing timer, the time it fires is the earliest time we // can start the next resolution. - if (have_next_resolution_timer_) { - // TODO(dgq): remove the following two lines once Pick First stops - // discarding subchannels after selecting. - ++resolved_version_; - MaybeFinishNextLocked(); - return; - } + if (have_next_resolution_timer_) return; if (last_resolution_timestamp_ >= 0) { const grpc_millis earliest_next_resolution = last_resolution_timestamp_ + min_time_between_resolutions_; @@ -275,10 +269,6 @@ void NativeDnsResolver::MaybeStartResolvingLocked() { self.release(); grpc_timer_init(&next_resolution_timer_, ms_until_next_resolution, &on_next_resolution_); - // TODO(dgq): remove the following two lines once Pick First stops - // discarding subchannels after selecting. - ++resolved_version_; - MaybeFinishNextLocked(); return; } } diff --git a/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.cc b/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.cc index 99a33f2277..144ac24a56 100644 --- a/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.cc +++ b/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.cc @@ -73,11 +73,6 @@ class FakeResolver : public Resolver { // Results to use for the pretended re-resolution in // RequestReresolutionLocked(). grpc_channel_args* reresolution_results_ = nullptr; - // TODO(juanlishen): This can go away once pick_first is changed to not throw - // away its subchannels, since that will eliminate its dependence on - // channel_saw_error_locked() causing an immediate resolver return. - // A copy of the most-recently used resolution results. - grpc_channel_args* last_used_results_ = nullptr; // pending next completion, or NULL grpc_closure* next_completion_ = nullptr; // target result address for next completion @@ -96,7 +91,6 @@ FakeResolver::FakeResolver(const ResolverArgs& args) : Resolver(args.combiner) { FakeResolver::~FakeResolver() { grpc_channel_args_destroy(next_results_); grpc_channel_args_destroy(reresolution_results_); - grpc_channel_args_destroy(last_used_results_); grpc_channel_args_destroy(channel_args_); } @@ -109,17 +103,11 @@ void FakeResolver::NextLocked(grpc_channel_args** target_result, } void FakeResolver::RequestReresolutionLocked() { - // A resolution must have been returned before an error is seen. - GPR_ASSERT(last_used_results_ != nullptr); - grpc_channel_args_destroy(next_results_); if (reresolution_results_ != nullptr) { + grpc_channel_args_destroy(next_results_); next_results_ = grpc_channel_args_copy(reresolution_results_); - } else { - // If reresolution_results is unavailable, re-resolve with the most-recently - // used results to avoid a no-op re-resolution. - next_results_ = grpc_channel_args_copy(last_used_results_); + MaybeFinishNextLocked(); } - MaybeFinishNextLocked(); } void FakeResolver::MaybeFinishNextLocked() { @@ -161,8 +149,6 @@ void FakeResolverResponseGenerator::SetResponseLocked(void* arg, FakeResolver* resolver = closure_arg->generator->resolver_; grpc_channel_args_destroy(resolver->next_results_); resolver->next_results_ = closure_arg->response; - grpc_channel_args_destroy(resolver->last_used_results_); - resolver->last_used_results_ = grpc_channel_args_copy(closure_arg->response); resolver->MaybeFinishNextLocked(); Delete(closure_arg); } diff --git a/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.h b/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.h index e5175f9b7b..74a3062e7f 100644 --- a/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.h +++ b/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.h @@ -20,9 +20,9 @@ #include <grpc/support/port_platform.h> #include "src/core/ext/filters/client_channel/lb_policy_factory.h" -#include "src/core/ext/filters/client_channel/uri_parser.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/gprpp/ref_counted.h" +#include "src/core/lib/uri/uri_parser.h" #define GRPC_ARG_FAKE_RESOLVER_RESPONSE_GENERATOR \ "grpc.fake_resolver.response_generator" @@ -53,7 +53,8 @@ class FakeResolverResponseGenerator // The new re-resolution response replaces any previous re-resolution // response that may have been set by a previous call. // If the re-resolution response is set to NULL, then the fake - // resolver will return the last value set via \a SetResponse(). + // resolver will not return anything when \a RequestReresolutionLocked() + // is called. void SetReresolutionResponse(grpc_channel_args* response); // Tells the resolver to return a transient failure (signalled by diff --git a/src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.cc b/src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.cc index f74ac5aebe..801734764b 100644 --- a/src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.cc +++ b/src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.cc @@ -50,8 +50,6 @@ class SockaddrResolver : public Resolver { void NextLocked(grpc_channel_args** result, grpc_closure* on_complete) override; - void RequestReresolutionLocked() override; - void ShutdownLocked() override; private: @@ -90,11 +88,6 @@ void SockaddrResolver::NextLocked(grpc_channel_args** target_result, MaybeFinishNextLocked(); } -void SockaddrResolver::RequestReresolutionLocked() { - published_ = false; - MaybeFinishNextLocked(); -} - void SockaddrResolver::ShutdownLocked() { if (next_completion_ != nullptr) { *target_result_ = nullptr; diff --git a/src/core/ext/filters/client_channel/resolver_factory.h b/src/core/ext/filters/client_channel/resolver_factory.h index ee3cfeeb9b..d891ef62e1 100644 --- a/src/core/ext/filters/client_channel/resolver_factory.h +++ b/src/core/ext/filters/client_channel/resolver_factory.h @@ -24,11 +24,11 @@ #include <grpc/support/string_util.h> #include "src/core/ext/filters/client_channel/resolver.h" -#include "src/core/ext/filters/client_channel/uri_parser.h" #include "src/core/lib/gprpp/abstract.h" #include "src/core/lib/gprpp/memory.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/iomgr/pollset_set.h" +#include "src/core/lib/uri/uri_parser.h" namespace grpc_core { diff --git a/src/core/ext/filters/client_channel/subchannel.cc b/src/core/ext/filters/client_channel/subchannel.cc index 0e40f42e18..a56db0201b 100644 --- a/src/core/ext/filters/client_channel/subchannel.cc +++ b/src/core/ext/filters/client_channel/subchannel.cc @@ -30,10 +30,10 @@ #include <grpc/support/string_util.h> #include "src/core/ext/filters/client_channel/client_channel.h" +#include "src/core/ext/filters/client_channel/health/health_check_client.h" #include "src/core/ext/filters/client_channel/parse_address.h" #include "src/core/ext/filters/client_channel/proxy_mapper_registry.h" #include "src/core/ext/filters/client_channel/subchannel_index.h" -#include "src/core/ext/filters/client_channel/uri_parser.h" #include "src/core/lib/backoff/backoff.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/channel/connected_channel.h" @@ -41,6 +41,7 @@ #include "src/core/lib/gpr/alloc.h" #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/gprpp/manual_constructor.h" +#include "src/core/lib/gprpp/mutex_lock.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/iomgr/sockaddr_utils.h" #include "src/core/lib/iomgr/timer.h" @@ -49,6 +50,10 @@ #include "src/core/lib/surface/channel.h" #include "src/core/lib/surface/channel_init.h" #include "src/core/lib/transport/connectivity_state.h" +#include "src/core/lib/transport/error_utils.h" +#include "src/core/lib/transport/service_config.h" +#include "src/core/lib/transport/status_metadata.h" +#include "src/core/lib/uri/uri_parser.h" #define INTERNAL_REF_BITS 16 #define STRONG_REF_MASK (~(gpr_atm)((1 << INTERNAL_REF_BITS) - 1)) @@ -64,6 +69,10 @@ struct state_watcher { grpc_closure closure; grpc_subchannel* subchannel; grpc_connectivity_state connectivity_state; + grpc_connectivity_state last_connectivity_state; + grpc_core::OrphanablePtr<grpc_core::HealthCheckClient> health_check_client; + grpc_closure health_check_closure; + grpc_connectivity_state health_state; }; } // namespace @@ -76,6 +85,12 @@ typedef struct external_state_watcher { struct external_state_watcher* prev; } external_state_watcher; +namespace grpc_core { + +class ConnectedSubchannelStateWatcher; + +} // namespace grpc_core + struct grpc_subchannel { grpc_connector* connector; @@ -107,19 +122,24 @@ struct grpc_subchannel { being setup */ grpc_pollset_set* pollset_set; + grpc_core::UniquePtr<char> health_check_service_name; + /** mutex protecting remaining elements */ gpr_mu mu; - /** active connection, or null; of type grpc_core::ConnectedSubchannel - */ + /** active connection, or null */ grpc_core::RefCountedPtr<grpc_core::ConnectedSubchannel> connected_subchannel; + grpc_core::OrphanablePtr<grpc_core::ConnectedSubchannelStateWatcher> + connected_subchannel_watcher; /** have we seen a disconnection? */ bool disconnected; /** are we connecting */ bool connecting; + /** connectivity state tracking */ grpc_connectivity_state_tracker state_tracker; + grpc_connectivity_state_tracker state_and_health_tracker; external_state_watcher root_external_state_watcher; @@ -142,10 +162,184 @@ struct grpc_subchannel { }; struct grpc_subchannel_call { + grpc_subchannel_call(grpc_core::ConnectedSubchannel* connection, + const grpc_core::ConnectedSubchannel::CallArgs& args) + : connection(connection), deadline(args.deadline) {} + grpc_core::ConnectedSubchannel* connection; - grpc_closure* schedule_closure_after_destroy; + grpc_closure* schedule_closure_after_destroy = nullptr; + // state needed to support channelz interception of recv trailing metadata. + grpc_closure recv_trailing_metadata_ready; + grpc_closure* original_recv_trailing_metadata; + grpc_metadata_batch* recv_trailing_metadata = nullptr; + grpc_millis deadline; }; +static void maybe_start_connecting_locked(grpc_subchannel* c); + +static const char* subchannel_connectivity_state_change_string( + grpc_connectivity_state state) { + switch (state) { + case GRPC_CHANNEL_IDLE: + return "Subchannel state change to IDLE"; + case GRPC_CHANNEL_CONNECTING: + return "Subchannel state change to CONNECTING"; + case GRPC_CHANNEL_READY: + return "Subchannel state change to READY"; + case GRPC_CHANNEL_TRANSIENT_FAILURE: + return "Subchannel state change to TRANSIENT_FAILURE"; + case GRPC_CHANNEL_SHUTDOWN: + return "Subchannel state change to SHUTDOWN"; + } + GPR_UNREACHABLE_CODE(return "UNKNOWN"); +} + +static void set_subchannel_connectivity_state_locked( + grpc_subchannel* c, grpc_connectivity_state state, grpc_error* error, + const char* reason) { + if (c->channelz_subchannel != nullptr) { + c->channelz_subchannel->AddTraceEvent( + grpc_core::channelz::ChannelTrace::Severity::Info, + grpc_slice_from_static_string( + subchannel_connectivity_state_change_string(state))); + } + grpc_connectivity_state_set(&c->state_tracker, state, error, reason); +} + +namespace grpc_core { + +class ConnectedSubchannelStateWatcher + : public InternallyRefCounted<ConnectedSubchannelStateWatcher> { + public: + // Must be instantiated while holding c->mu. + explicit ConnectedSubchannelStateWatcher(grpc_subchannel* c) + : subchannel_(c) { + // Steal subchannel ref for connecting. + GRPC_SUBCHANNEL_WEAK_REF(subchannel_, "state_watcher"); + GRPC_SUBCHANNEL_WEAK_UNREF(subchannel_, "connecting"); + // Start watching for connectivity state changes. + // Callback uses initial ref to this. + GRPC_CLOSURE_INIT(&on_connectivity_changed_, OnConnectivityChanged, this, + grpc_schedule_on_exec_ctx); + c->connected_subchannel->NotifyOnStateChange(c->pollset_set, + &pending_connectivity_state_, + &on_connectivity_changed_); + // Start health check if needed. + grpc_connectivity_state health_state = GRPC_CHANNEL_READY; + if (c->health_check_service_name != nullptr) { + health_check_client_ = grpc_core::MakeOrphanable<HealthCheckClient>( + c->health_check_service_name.get(), c->connected_subchannel, + c->pollset_set, c->channelz_subchannel); + GRPC_CLOSURE_INIT(&on_health_changed_, OnHealthChanged, this, + grpc_schedule_on_exec_ctx); + Ref().release(); // Ref for health callback tracked manually. + health_check_client_->NotifyOnHealthChange(&health_state_, + &on_health_changed_); + health_state = GRPC_CHANNEL_CONNECTING; + } + // Report initial state. + set_subchannel_connectivity_state_locked( + c, GRPC_CHANNEL_READY, GRPC_ERROR_NONE, "subchannel_connected"); + grpc_connectivity_state_set(&c->state_and_health_tracker, health_state, + GRPC_ERROR_NONE, "subchannel_connected"); + } + + ~ConnectedSubchannelStateWatcher() { + GRPC_SUBCHANNEL_WEAK_UNREF(subchannel_, "state_watcher"); + } + + void Orphan() override { health_check_client_.reset(); } + + private: + static void OnConnectivityChanged(void* arg, grpc_error* error) { + auto* self = static_cast<ConnectedSubchannelStateWatcher*>(arg); + grpc_subchannel* c = self->subchannel_; + { + MutexLock lock(&c->mu); + switch (self->pending_connectivity_state_) { + case GRPC_CHANNEL_TRANSIENT_FAILURE: + case GRPC_CHANNEL_SHUTDOWN: { + if (!c->disconnected && c->connected_subchannel != nullptr) { + if (grpc_trace_stream_refcount.enabled()) { + gpr_log(GPR_INFO, + "Connected subchannel %p of subchannel %p has gone into " + "%s. Attempting to reconnect.", + c->connected_subchannel.get(), c, + grpc_connectivity_state_name( + self->pending_connectivity_state_)); + } + c->connected_subchannel.reset(); + c->connected_subchannel_watcher.reset(); + self->last_connectivity_state_ = GRPC_CHANNEL_TRANSIENT_FAILURE; + set_subchannel_connectivity_state_locked( + c, GRPC_CHANNEL_TRANSIENT_FAILURE, GRPC_ERROR_REF(error), + "reflect_child"); + grpc_connectivity_state_set(&c->state_and_health_tracker, + GRPC_CHANNEL_TRANSIENT_FAILURE, + GRPC_ERROR_REF(error), "reflect_child"); + c->backoff_begun = false; + c->backoff->Reset(); + maybe_start_connecting_locked(c); + } else { + self->last_connectivity_state_ = GRPC_CHANNEL_SHUTDOWN; + } + self->health_check_client_.reset(); + break; + } + default: { + // In principle, this should never happen. We should not get + // a callback for READY, because that was the state we started + // this watch from. And a connected subchannel should never go + // from READY to CONNECTING or IDLE. + self->last_connectivity_state_ = self->pending_connectivity_state_; + set_subchannel_connectivity_state_locked( + c, self->pending_connectivity_state_, GRPC_ERROR_REF(error), + "reflect_child"); + if (self->pending_connectivity_state_ != GRPC_CHANNEL_READY) { + grpc_connectivity_state_set(&c->state_and_health_tracker, + self->pending_connectivity_state_, + GRPC_ERROR_REF(error), "reflect_child"); + } + c->connected_subchannel->NotifyOnStateChange( + nullptr, &self->pending_connectivity_state_, + &self->on_connectivity_changed_); + self = nullptr; // So we don't unref below. + } + } + } + // Don't unref until we've released the lock, because this might + // cause the subchannel (which contains the lock) to be destroyed. + if (self != nullptr) self->Unref(); + } + + static void OnHealthChanged(void* arg, grpc_error* error) { + auto* self = static_cast<ConnectedSubchannelStateWatcher*>(arg); + if (self->health_state_ == GRPC_CHANNEL_SHUTDOWN) { + self->Unref(); + return; + } + grpc_subchannel* c = self->subchannel_; + MutexLock lock(&c->mu); + if (self->last_connectivity_state_ == GRPC_CHANNEL_READY) { + grpc_connectivity_state_set(&c->state_and_health_tracker, + self->health_state_, GRPC_ERROR_REF(error), + "health_changed"); + } + self->health_check_client_->NotifyOnHealthChange(&self->health_state_, + &self->on_health_changed_); + } + + grpc_subchannel* subchannel_; + grpc_closure on_connectivity_changed_; + grpc_connectivity_state pending_connectivity_state_ = GRPC_CHANNEL_READY; + grpc_connectivity_state last_connectivity_state_ = GRPC_CHANNEL_READY; + grpc_core::OrphanablePtr<grpc_core::HealthCheckClient> health_check_client_; + grpc_closure on_health_changed_; + grpc_connectivity_state health_state_ = GRPC_CHANNEL_CONNECTING; +}; + +} // namespace grpc_core + #define SUBCHANNEL_CALL_TO_CALL_STACK(call) \ (grpc_call_stack*)((char*)(call) + GPR_ROUND_UP_TO_ALIGNMENT_SIZE( \ sizeof(grpc_subchannel_call))) @@ -183,10 +377,18 @@ static void connection_destroy(void* arg, grpc_error* error) { static void subchannel_destroy(void* arg, grpc_error* error) { grpc_subchannel* c = static_cast<grpc_subchannel*>(arg); - c->channelz_subchannel.reset(); + if (c->channelz_subchannel != nullptr) { + c->channelz_subchannel->AddTraceEvent( + grpc_core::channelz::ChannelTrace::Severity::Info, + grpc_slice_from_static_string("Subchannel destroyed")); + c->channelz_subchannel->MarkSubchannelDestroyed(); + c->channelz_subchannel.reset(); + } gpr_free((void*)c->filters); + c->health_check_service_name.reset(); grpc_channel_args_destroy(c->args); grpc_connectivity_state_destroy(&c->state_tracker); + grpc_connectivity_state_destroy(&c->state_and_health_tracker); grpc_connector_unref(c->connector); grpc_pollset_set_destroy(c->pollset_set); grpc_subchannel_key_destroy(c->key); @@ -249,6 +451,7 @@ static void disconnect(grpc_subchannel* c) { grpc_connector_shutdown(c->connector, GRPC_ERROR_CREATE_FROM_STATIC_STRING( "Subchannel disconnected")); c->connected_subchannel.reset(); + c->connected_subchannel_watcher.reset(); gpr_mu_unlock(&c->mu); } @@ -324,6 +527,31 @@ static void parse_args_for_backoff_values( .set_max_backoff(max_backoff_ms); } +namespace grpc_core { +namespace { + +struct HealthCheckParams { + UniquePtr<char> service_name; + + static void Parse(const grpc_json* field, HealthCheckParams* params) { + if (strcmp(field->key, "healthCheckConfig") == 0) { + if (field->type != GRPC_JSON_OBJECT) return; + for (grpc_json* sub_field = field->child; sub_field != nullptr; + sub_field = sub_field->next) { + if (sub_field->key == nullptr) return; + if (strcmp(sub_field->key, "serviceName") == 0) { + if (params->service_name != nullptr) return; // Duplicate. + if (sub_field->type != GRPC_JSON_STRING) return; + params->service_name.reset(gpr_strdup(sub_field->value)); + } + } + } + } +}; + +} // namespace +} // namespace grpc_core + grpc_subchannel* grpc_subchannel_create(grpc_connector* connector, const grpc_subchannel_args* args) { grpc_subchannel_key* key = grpc_subchannel_key_create(args); @@ -374,18 +602,45 @@ grpc_subchannel* grpc_subchannel_create(grpc_connector* connector, grpc_schedule_on_exec_ctx); grpc_connectivity_state_init(&c->state_tracker, GRPC_CHANNEL_IDLE, "subchannel"); + grpc_connectivity_state_init(&c->state_and_health_tracker, GRPC_CHANNEL_IDLE, + "subchannel"); grpc_core::BackOff::Options backoff_options; parse_args_for_backoff_values(args->args, &backoff_options, &c->min_connect_timeout_ms); c->backoff.Init(backoff_options); gpr_mu_init(&c->mu); + // Check whether we should enable health checking. + const char* service_config_json = grpc_channel_arg_get_string( + grpc_channel_args_find(c->args, GRPC_ARG_SERVICE_CONFIG)); + if (service_config_json != nullptr) { + grpc_core::UniquePtr<grpc_core::ServiceConfig> service_config = + grpc_core::ServiceConfig::Create(service_config_json); + if (service_config != nullptr) { + grpc_core::HealthCheckParams params; + service_config->ParseGlobalParams(grpc_core::HealthCheckParams::Parse, + ¶ms); + c->health_check_service_name = std::move(params.service_name); + } + } + const grpc_arg* arg = grpc_channel_args_find(c->args, GRPC_ARG_ENABLE_CHANNELZ); - bool channelz_enabled = grpc_channel_arg_get_bool(arg, false); + bool channelz_enabled = + grpc_channel_arg_get_bool(arg, GRPC_ENABLE_CHANNELZ_DEFAULT); + arg = grpc_channel_args_find( + c->args, GRPC_ARG_MAX_CHANNEL_TRACE_EVENT_MEMORY_PER_NODE); + const grpc_integer_options options = { + GRPC_MAX_CHANNEL_TRACE_EVENT_MEMORY_PER_NODE_DEFAULT, 0, INT_MAX}; + size_t channel_tracer_max_memory = + (size_t)grpc_channel_arg_get_integer(arg, options); if (channelz_enabled) { c->channelz_subchannel = - grpc_core::MakeRefCounted<grpc_core::channelz::SubchannelNode>(); + grpc_core::MakeRefCounted<grpc_core::channelz::SubchannelNode>( + c, channel_tracer_max_memory); + c->channelz_subchannel->AddTraceEvent( + grpc_core::channelz::ChannelTrace::Severity::Info, + grpc_slice_from_static_string("Subchannel created")); } return grpc_subchannel_index_register(key, c); @@ -396,6 +651,14 @@ grpc_core::channelz::SubchannelNode* grpc_subchannel_get_channelz_node( return subchannel->channelz_subchannel.get(); } +intptr_t grpc_subchannel_get_child_socket_uuid(grpc_subchannel* subchannel) { + if (subchannel->connected_subchannel != nullptr) { + return subchannel->connected_subchannel->socket_uuid(); + } else { + return 0; + } +} + static void continue_connect_locked(grpc_subchannel* c) { grpc_connect_in_args args; args.interested_parties = c->pollset_set; @@ -404,17 +667,21 @@ static void continue_connect_locked(grpc_subchannel* c) { c->next_attempt_deadline = c->backoff->NextAttemptTime(); args.deadline = std::max(c->next_attempt_deadline, min_deadline); args.channel_args = c->args; - grpc_connectivity_state_set(&c->state_tracker, GRPC_CHANNEL_CONNECTING, - GRPC_ERROR_NONE, "connecting"); + set_subchannel_connectivity_state_locked(c, GRPC_CHANNEL_CONNECTING, + GRPC_ERROR_NONE, "connecting"); + grpc_connectivity_state_set(&c->state_and_health_tracker, + GRPC_CHANNEL_CONNECTING, GRPC_ERROR_NONE, + "connecting"); grpc_connector_connect(c->connector, &args, &c->connecting_result, &c->on_connected); } -grpc_connectivity_state grpc_subchannel_check_connectivity(grpc_subchannel* c, - grpc_error** error) { - grpc_connectivity_state state; +grpc_connectivity_state grpc_subchannel_check_connectivity( + grpc_subchannel* c, grpc_error** error, bool inhibit_health_checks) { gpr_mu_lock(&c->mu); - state = grpc_connectivity_state_get(&c->state_tracker, error); + grpc_connectivity_state_tracker* tracker = + inhibit_health_checks ? &c->state_tracker : &c->state_and_health_tracker; + grpc_connectivity_state state = grpc_connectivity_state_get(tracker, error); gpr_mu_unlock(&c->mu); return state; } @@ -472,7 +739,8 @@ static void maybe_start_connecting_locked(grpc_subchannel* c) { /* Already connected: don't restart */ return; } - if (!grpc_connectivity_state_has_watchers(&c->state_tracker)) { + if (!grpc_connectivity_state_has_watchers(&c->state_tracker) && + !grpc_connectivity_state_has_watchers(&c->state_and_health_tracker)) { /* Nobody is interested in connecting: so don't just yet */ return; } @@ -499,16 +767,18 @@ static void maybe_start_connecting_locked(grpc_subchannel* c) { void grpc_subchannel_notify_on_state_change( grpc_subchannel* c, grpc_pollset_set* interested_parties, - grpc_connectivity_state* state, grpc_closure* notify) { + grpc_connectivity_state* state, grpc_closure* notify, + bool inhibit_health_checks) { + grpc_connectivity_state_tracker* tracker = + inhibit_health_checks ? &c->state_tracker : &c->state_and_health_tracker; external_state_watcher* w; - if (state == nullptr) { gpr_mu_lock(&c->mu); for (w = c->root_external_state_watcher.next; w != &c->root_external_state_watcher; w = w->next) { if (w->notify == notify) { - grpc_connectivity_state_notify_on_state_change(&c->state_tracker, - nullptr, &w->closure); + grpc_connectivity_state_notify_on_state_change(tracker, nullptr, + &w->closure); } } gpr_mu_unlock(&c->mu); @@ -527,62 +797,12 @@ void grpc_subchannel_notify_on_state_change( w->next = &c->root_external_state_watcher; w->prev = w->next->prev; w->next->prev = w->prev->next = w; - grpc_connectivity_state_notify_on_state_change(&c->state_tracker, state, - &w->closure); + grpc_connectivity_state_notify_on_state_change(tracker, state, &w->closure); maybe_start_connecting_locked(c); gpr_mu_unlock(&c->mu); } } -static void on_connected_subchannel_connectivity_changed(void* p, - grpc_error* error) { - state_watcher* connected_subchannel_watcher = static_cast<state_watcher*>(p); - grpc_subchannel* c = connected_subchannel_watcher->subchannel; - gpr_mu* mu = &c->mu; - - gpr_mu_lock(mu); - - switch (connected_subchannel_watcher->connectivity_state) { - case GRPC_CHANNEL_TRANSIENT_FAILURE: - case GRPC_CHANNEL_SHUTDOWN: { - if (!c->disconnected && c->connected_subchannel != nullptr) { - if (grpc_trace_stream_refcount.enabled()) { - gpr_log(GPR_INFO, - "Connected subchannel %p of subchannel %p has gone into %s. " - "Attempting to reconnect.", - c->connected_subchannel.get(), c, - grpc_connectivity_state_name( - connected_subchannel_watcher->connectivity_state)); - } - c->connected_subchannel.reset(); - grpc_connectivity_state_set(&c->state_tracker, - GRPC_CHANNEL_TRANSIENT_FAILURE, - GRPC_ERROR_REF(error), "reflect_child"); - c->backoff_begun = false; - c->backoff->Reset(); - maybe_start_connecting_locked(c); - } else { - connected_subchannel_watcher->connectivity_state = - GRPC_CHANNEL_SHUTDOWN; - } - break; - } - default: { - grpc_connectivity_state_set( - &c->state_tracker, connected_subchannel_watcher->connectivity_state, - GRPC_ERROR_REF(error), "reflect_child"); - GRPC_SUBCHANNEL_WEAK_REF(c, "state_watcher"); - c->connected_subchannel->NotifyOnStateChange( - nullptr, &connected_subchannel_watcher->connectivity_state, - &connected_subchannel_watcher->closure); - connected_subchannel_watcher = nullptr; - } - } - gpr_mu_unlock(mu); - GRPC_SUBCHANNEL_WEAK_UNREF(c, "state_watcher"); - gpr_free(connected_subchannel_watcher); -} - static bool publish_transport_locked(grpc_subchannel* c) { /* construct channel stack */ grpc_channel_stack_builder* builder = grpc_channel_stack_builder_create(); @@ -606,41 +826,25 @@ static bool publish_transport_locked(grpc_subchannel* c) { GRPC_ERROR_UNREF(error); return false; } + intptr_t socket_uuid = c->connecting_result.socket_uuid; memset(&c->connecting_result, 0, sizeof(c->connecting_result)); - /* initialize state watcher */ - state_watcher* connected_subchannel_watcher = static_cast<state_watcher*>( - gpr_zalloc(sizeof(*connected_subchannel_watcher))); - connected_subchannel_watcher->subchannel = c; - connected_subchannel_watcher->connectivity_state = GRPC_CHANNEL_READY; - GRPC_CLOSURE_INIT(&connected_subchannel_watcher->closure, - on_connected_subchannel_connectivity_changed, - connected_subchannel_watcher, grpc_schedule_on_exec_ctx); - if (c->disconnected) { - gpr_free(connected_subchannel_watcher); grpc_channel_stack_destroy(stk); gpr_free(stk); return false; } /* publish */ - c->connected_subchannel.reset( - grpc_core::New<grpc_core::ConnectedSubchannel>(stk)); + c->connected_subchannel.reset(grpc_core::New<grpc_core::ConnectedSubchannel>( + stk, c->channelz_subchannel, socket_uuid)); gpr_log(GPR_INFO, "New connected subchannel at %p for subchannel %p", c->connected_subchannel.get(), c); - /* setup subchannel watching connected subchannel for changes; subchannel - ref for connecting is donated to the state watcher */ - GRPC_SUBCHANNEL_WEAK_REF(c, "state_watcher"); - GRPC_SUBCHANNEL_WEAK_UNREF(c, "connecting"); - c->connected_subchannel->NotifyOnStateChange( - c->pollset_set, &connected_subchannel_watcher->connectivity_state, - &connected_subchannel_watcher->closure); - - /* signal completion */ - grpc_connectivity_state_set(&c->state_tracker, GRPC_CHANNEL_READY, - GRPC_ERROR_NONE, "connected"); + // Instantiate state watcher. Will clean itself up. + c->connected_subchannel_watcher = + grpc_core::MakeOrphanable<grpc_core::ConnectedSubchannelStateWatcher>(c); + return true; } @@ -657,8 +861,14 @@ static void on_subchannel_connected(void* arg, grpc_error* error) { } else if (c->disconnected) { GRPC_SUBCHANNEL_WEAK_UNREF(c, "connecting"); } else { + set_subchannel_connectivity_state_locked( + c, GRPC_CHANNEL_TRANSIENT_FAILURE, + grpc_error_set_int(GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( + "Connect Failed", &error, 1), + GRPC_ERROR_INT_GRPC_STATUS, GRPC_STATUS_UNAVAILABLE), + "connect_failed"); grpc_connectivity_state_set( - &c->state_tracker, GRPC_CHANNEL_TRANSIENT_FAILURE, + &c->state_and_health_tracker, GRPC_CHANNEL_TRANSIENT_FAILURE, grpc_error_set_int(GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( "Connect Failed", &error, 1), GRPC_ERROR_INT_GRPC_STATUS, GRPC_STATUS_UNAVAILABLE), @@ -699,6 +909,7 @@ static void subchannel_call_destroy(void* call, grpc_error* error) { grpc_call_stack_destroy(SUBCHANNEL_CALL_TO_CALL_STACK(c), nullptr, c->schedule_closure_after_destroy); connection->Unref(DEBUG_LOCATION, "subchannel_call"); + c->~grpc_subchannel_call(); } void grpc_subchannel_call_set_cleanup_closure(grpc_subchannel_call* call, @@ -719,9 +930,71 @@ void grpc_subchannel_call_unref( GRPC_CALL_STACK_UNREF(SUBCHANNEL_CALL_TO_CALL_STACK(c), REF_REASON); } +// Sets *status based on md_batch and error. +static void get_call_status(grpc_subchannel_call* call, + grpc_metadata_batch* md_batch, grpc_error* error, + grpc_status_code* status) { + if (error != GRPC_ERROR_NONE) { + grpc_error_get_status(error, call->deadline, status, nullptr, nullptr, + nullptr); + } else { + if (md_batch->idx.named.grpc_status != nullptr) { + *status = grpc_get_status_code_from_metadata( + md_batch->idx.named.grpc_status->md); + } else { + *status = GRPC_STATUS_UNKNOWN; + } + } + GRPC_ERROR_UNREF(error); +} + +static void recv_trailing_metadata_ready(void* arg, grpc_error* error) { + grpc_subchannel_call* call = static_cast<grpc_subchannel_call*>(arg); + GPR_ASSERT(call->recv_trailing_metadata != nullptr); + grpc_status_code status = GRPC_STATUS_OK; + grpc_metadata_batch* md_batch = call->recv_trailing_metadata; + get_call_status(call, md_batch, GRPC_ERROR_REF(error), &status); + grpc_core::channelz::SubchannelNode* channelz_subchannel = + call->connection->channelz_subchannel(); + GPR_ASSERT(channelz_subchannel != nullptr); + if (status == GRPC_STATUS_OK) { + channelz_subchannel->RecordCallSucceeded(); + } else { + channelz_subchannel->RecordCallFailed(); + } + GRPC_CLOSURE_RUN(call->original_recv_trailing_metadata, + GRPC_ERROR_REF(error)); +} + +// If channelz is enabled, intercept recv_trailing so that we may check the +// status and associate it to a subchannel. +static void maybe_intercept_recv_trailing_metadata( + grpc_subchannel_call* call, grpc_transport_stream_op_batch* batch) { + // only intercept payloads with recv trailing. + if (!batch->recv_trailing_metadata) { + return; + } + // only add interceptor is channelz is enabled. + if (call->connection->channelz_subchannel() == nullptr) { + return; + } + GRPC_CLOSURE_INIT(&call->recv_trailing_metadata_ready, + recv_trailing_metadata_ready, call, + grpc_schedule_on_exec_ctx); + // save some state needed for the interception callback. + GPR_ASSERT(call->recv_trailing_metadata == nullptr); + call->recv_trailing_metadata = + batch->payload->recv_trailing_metadata.recv_trailing_metadata; + call->original_recv_trailing_metadata = + batch->payload->recv_trailing_metadata.recv_trailing_metadata_ready; + batch->payload->recv_trailing_metadata.recv_trailing_metadata_ready = + &call->recv_trailing_metadata_ready; +} + void grpc_subchannel_call_process_op(grpc_subchannel_call* call, grpc_transport_stream_op_batch* batch) { GPR_TIMER_SCOPE("grpc_subchannel_call_process_op", 0); + maybe_intercept_recv_trailing_metadata(call, batch); grpc_call_stack* call_stack = SUBCHANNEL_CALL_TO_CALL_STACK(call); grpc_call_element* top_elem = grpc_call_stack_element(call_stack, 0); GRPC_CALL_LOG_OP(GPR_INFO, top_elem, batch); @@ -770,6 +1043,14 @@ void grpc_get_subchannel_address_arg(const grpc_channel_args* args, } } +const char* grpc_subchannel_get_target(grpc_subchannel* subchannel) { + const grpc_arg* addr_arg = + grpc_channel_args_find(subchannel->args, GRPC_ARG_SUBCHANNEL_ADDRESS); + const char* addr_str = grpc_channel_arg_get_string(addr_arg); + GPR_ASSERT(addr_str != nullptr); // Should have been set by LB policy. + return addr_str; +} + const char* grpc_get_subchannel_address_uri_arg(const grpc_channel_args* args) { const grpc_arg* addr_arg = grpc_channel_args_find(args, GRPC_ARG_SUBCHANNEL_ADDRESS); @@ -786,9 +1067,15 @@ grpc_arg grpc_create_subchannel_address_arg(const grpc_resolved_address* addr) { namespace grpc_core { -ConnectedSubchannel::ConnectedSubchannel(grpc_channel_stack* channel_stack) +ConnectedSubchannel::ConnectedSubchannel( + grpc_channel_stack* channel_stack, + grpc_core::RefCountedPtr<grpc_core::channelz::SubchannelNode> + channelz_subchannel, + intptr_t socket_uuid) : RefCountedWithTracing<ConnectedSubchannel>(&grpc_trace_stream_refcount), - channel_stack_(channel_stack) {} + channel_stack_(channel_stack), + channelz_subchannel_(std::move(channelz_subchannel)), + socket_uuid_(socket_uuid) {} ConnectedSubchannel::~ConnectedSubchannel() { GRPC_CHANNEL_STACK_UNREF(channel_stack_, "connected_subchannel_dtor"); @@ -818,22 +1105,14 @@ void ConnectedSubchannel::Ping(grpc_closure* on_initiate, grpc_error* ConnectedSubchannel::CreateCall(const CallArgs& args, grpc_subchannel_call** call) { - size_t allocation_size = - GPR_ROUND_UP_TO_ALIGNMENT_SIZE(sizeof(grpc_subchannel_call)); - if (args.parent_data_size > 0) { - allocation_size += - GPR_ROUND_UP_TO_ALIGNMENT_SIZE(channel_stack_->call_stack_size) + - args.parent_data_size; - } else { - allocation_size += channel_stack_->call_stack_size; - } - *call = static_cast<grpc_subchannel_call*>( - gpr_arena_alloc(args.arena, allocation_size)); + const size_t allocation_size = + GetInitialCallSizeEstimate(args.parent_data_size); + *call = new (gpr_arena_alloc(args.arena, allocation_size)) + grpc_subchannel_call(this, args); grpc_call_stack* callstk = SUBCHANNEL_CALL_TO_CALL_STACK(*call); RefCountedPtr<ConnectedSubchannel> connection = Ref(DEBUG_LOCATION, "subchannel_call"); connection.release(); // Ref is passed to the grpc_subchannel_call object. - (*call)->connection = this; const grpc_call_element_args call_args = { callstk, /* call_stack */ nullptr, /* server_transport_data */ @@ -852,7 +1131,24 @@ grpc_error* ConnectedSubchannel::CreateCall(const CallArgs& args, return error; } grpc_call_stack_set_pollset_or_pollset_set(callstk, args.pollent); + if (channelz_subchannel_ != nullptr) { + channelz_subchannel_->RecordCallStarted(); + } return GRPC_ERROR_NONE; } +size_t ConnectedSubchannel::GetInitialCallSizeEstimate( + size_t parent_data_size) const { + size_t allocation_size = + GPR_ROUND_UP_TO_ALIGNMENT_SIZE(sizeof(grpc_subchannel_call)); + if (parent_data_size > 0) { + allocation_size += + GPR_ROUND_UP_TO_ALIGNMENT_SIZE(channel_stack_->call_stack_size) + + parent_data_size; + } else { + allocation_size += channel_stack_->call_stack_size; + } + return allocation_size; +} + } // namespace grpc_core diff --git a/src/core/ext/filters/client_channel/subchannel.h b/src/core/ext/filters/client_channel/subchannel.h index a135035d62..ec3b4d86e4 100644 --- a/src/core/ext/filters/client_channel/subchannel.h +++ b/src/core/ext/filters/client_channel/subchannel.h @@ -85,7 +85,11 @@ class ConnectedSubchannel : public RefCountedWithTracing<ConnectedSubchannel> { size_t parent_data_size; }; - explicit ConnectedSubchannel(grpc_channel_stack* channel_stack); + explicit ConnectedSubchannel( + grpc_channel_stack* channel_stack, + grpc_core::RefCountedPtr<grpc_core::channelz::SubchannelNode> + channelz_subchannel, + intptr_t socket_uuid); ~ConnectedSubchannel(); grpc_channel_stack* channel_stack() { return channel_stack_; } @@ -94,9 +98,21 @@ class ConnectedSubchannel : public RefCountedWithTracing<ConnectedSubchannel> { grpc_closure* closure); void Ping(grpc_closure* on_initiate, grpc_closure* on_ack); grpc_error* CreateCall(const CallArgs& args, grpc_subchannel_call** call); + channelz::SubchannelNode* channelz_subchannel() { + return channelz_subchannel_.get(); + } + intptr_t socket_uuid() { return socket_uuid_; } + + size_t GetInitialCallSizeEstimate(size_t parent_data_size) const; private: grpc_channel_stack* channel_stack_; + // ref counted pointer to the channelz node in this connected subchannel's + // owning subchannel. + grpc_core::RefCountedPtr<grpc_core::channelz::SubchannelNode> + channelz_subchannel_; + // uuid of this subchannel's socket. 0 if this subchannel is not connected. + const intptr_t socket_uuid_; }; } // namespace grpc_core @@ -119,6 +135,8 @@ void grpc_subchannel_call_unref( grpc_core::channelz::SubchannelNode* grpc_subchannel_get_channelz_node( grpc_subchannel* subchannel); +intptr_t grpc_subchannel_get_child_socket_uuid(grpc_subchannel* subchannel); + /** Returns a pointer to the parent data associated with \a subchannel_call. The data will be of the size specified in \a parent_data_size field of the args passed to \a grpc_connected_subchannel_create_call(). */ @@ -127,13 +145,14 @@ void* grpc_connected_subchannel_call_get_parent_data( /** poll the current connectivity state of a channel */ grpc_connectivity_state grpc_subchannel_check_connectivity( - grpc_subchannel* channel, grpc_error** error); + grpc_subchannel* channel, grpc_error** error, bool inhibit_health_checking); /** Calls notify when the connectivity state of a channel becomes different from *state. Updates *state with the new state of the channel. */ void grpc_subchannel_notify_on_state_change( grpc_subchannel* channel, grpc_pollset_set* interested_parties, - grpc_connectivity_state* state, grpc_closure* notify); + grpc_connectivity_state* state, grpc_closure* notify, + bool inhibit_health_checks); /** retrieve the grpc_core::ConnectedSubchannel - or nullptr if not connected * (which may happen before it initially connects or during transient failures) @@ -184,6 +203,8 @@ grpc_subchannel* grpc_subchannel_create(grpc_connector* connector, void grpc_get_subchannel_address_arg(const grpc_channel_args* args, grpc_resolved_address* addr); +const char* grpc_subchannel_get_target(grpc_subchannel* subchannel); + /// Returns the URI string for the address to connect to. const char* grpc_get_subchannel_address_uri_arg(const grpc_channel_args* args); diff --git a/src/core/ext/filters/client_channel/subchannel_index.cc b/src/core/ext/filters/client_channel/subchannel_index.cc index cb02b1a748..1c23a6c4be 100644 --- a/src/core/ext/filters/client_channel/subchannel_index.cc +++ b/src/core/ext/filters/client_channel/subchannel_index.cc @@ -73,7 +73,8 @@ static grpc_subchannel_key* subchannel_key_copy(grpc_subchannel_key* k) { int grpc_subchannel_key_compare(const grpc_subchannel_key* a, const grpc_subchannel_key* b) { - if (g_force_creation) return false; + // To pretend the keys are different, return a non-zero value. + if (GPR_UNLIKELY(g_force_creation)) return 1; int c = GPR_ICMP(a->args.filter_count, b->args.filter_count); if (c != 0) return c; if (a->args.filter_count > 0) { diff --git a/src/core/ext/filters/client_channel/subchannel_index.h b/src/core/ext/filters/client_channel/subchannel_index.h index a7dae9d47d..c135613d26 100644 --- a/src/core/ext/filters/client_channel/subchannel_index.h +++ b/src/core/ext/filters/client_channel/subchannel_index.h @@ -65,13 +65,10 @@ void grpc_subchannel_index_ref(void); void grpc_subchannel_index_unref(void); /** \em TEST ONLY. - * If \a force_creation is true, all key comparisons will be false, resulting in + * If \a force_creation is true, all keys are regarded different, resulting in * new subchannels always being created. Otherwise, the keys will be compared as * usual. * - * This function is *not* threadsafe on purpose: it should *only* be used in - * test code. - * * Tests using this function \em MUST run tests with and without \a * force_creation set. */ void grpc_subchannel_index_test_only_set_force_creation(bool force_creation); diff --git a/src/core/ext/filters/client_channel/uri_parser.cc b/src/core/ext/filters/client_channel/uri_parser.cc deleted file mode 100644 index 0572034a9c..0000000000 --- a/src/core/ext/filters/client_channel/uri_parser.cc +++ /dev/null @@ -1,314 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include <grpc/support/port_platform.h> - -#include "src/core/ext/filters/client_channel/uri_parser.h" - -#include <string.h> - -#include <grpc/slice_buffer.h> -#include <grpc/support/alloc.h> -#include <grpc/support/log.h> -#include <grpc/support/string_util.h> - -#include "src/core/lib/gpr/string.h" -#include "src/core/lib/slice/percent_encoding.h" -#include "src/core/lib/slice/slice_internal.h" -#include "src/core/lib/slice/slice_string_helpers.h" - -/** a size_t default value... maps to all 1's */ -#define NOT_SET (~(size_t)0) - -static grpc_uri* bad_uri(const char* uri_text, size_t pos, const char* section, - bool suppress_errors) { - char* line_prefix; - size_t pfx_len; - - if (!suppress_errors) { - gpr_asprintf(&line_prefix, "bad uri.%s: '", section); - pfx_len = strlen(line_prefix) + pos; - gpr_log(GPR_ERROR, "%s%s'", line_prefix, uri_text); - gpr_free(line_prefix); - - line_prefix = static_cast<char*>(gpr_malloc(pfx_len + 1)); - memset(line_prefix, ' ', pfx_len); - line_prefix[pfx_len] = 0; - gpr_log(GPR_ERROR, "%s^ here", line_prefix); - gpr_free(line_prefix); - } - - return nullptr; -} - -/** Returns a copy of percent decoded \a src[begin, end) */ -static char* decode_and_copy_component(const char* src, size_t begin, - size_t end) { - grpc_slice component = - (begin == NOT_SET || end == NOT_SET) - ? grpc_empty_slice() - : grpc_slice_from_copied_buffer(src + begin, end - begin); - grpc_slice decoded_component = - grpc_permissive_percent_decode_slice(component); - char* out = grpc_dump_slice(decoded_component, GPR_DUMP_ASCII); - grpc_slice_unref_internal(component); - grpc_slice_unref_internal(decoded_component); - return out; -} - -static bool valid_hex(char c) { - return ((c >= 'a') && (c <= 'f')) || ((c >= 'A') && (c <= 'F')) || - ((c >= '0') && (c <= '9')); -} - -/** Returns how many chars to advance if \a uri_text[i] begins a valid \a pchar - * production. If \a uri_text[i] introduces an invalid \a pchar (such as percent - * sign not followed by two hex digits), NOT_SET is returned. */ -static size_t parse_pchar(const char* uri_text, size_t i) { - /* pchar = unreserved / pct-encoded / sub-delims / ":" / "@" - * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" - * pct-encoded = "%" HEXDIG HEXDIG - * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" - / "*" / "+" / "," / ";" / "=" */ - char c = uri_text[i]; - switch (c) { - default: - if (((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')) || - ((c >= '0') && (c <= '9'))) { - return 1; - } - break; - case ':': - case '@': - case '-': - case '.': - case '_': - case '~': - case '!': - case '$': - case '&': - case '\'': - case '(': - case ')': - case '*': - case '+': - case ',': - case ';': - case '=': - return 1; - case '%': /* pct-encoded */ - if (valid_hex(uri_text[i + 1]) && valid_hex(uri_text[i + 2])) { - return 2; - } - return NOT_SET; - } - return 0; -} - -/* *( pchar / "?" / "/" ) */ -static int parse_fragment_or_query(const char* uri_text, size_t* i) { - char c; - while ((c = uri_text[*i]) != 0) { - const size_t advance = parse_pchar(uri_text, *i); /* pchar */ - switch (advance) { - case 0: /* uri_text[i] isn't in pchar */ - /* maybe it's ? or / */ - if (uri_text[*i] == '?' || uri_text[*i] == '/') { - (*i)++; - break; - } else { - return 1; - } - GPR_UNREACHABLE_CODE(return 0); - default: - (*i) += advance; - break; - case NOT_SET: /* uri_text[i] introduces an invalid URI */ - return 0; - } - } - /* *i is the first uri_text position past the \a query production, maybe \0 */ - return 1; -} - -static void parse_query_parts(grpc_uri* uri) { - static const char* QUERY_PARTS_SEPARATOR = "&"; - static const char* QUERY_PARTS_VALUE_SEPARATOR = "="; - GPR_ASSERT(uri->query != nullptr); - if (uri->query[0] == '\0') { - uri->query_parts = nullptr; - uri->query_parts_values = nullptr; - uri->num_query_parts = 0; - return; - } - - gpr_string_split(uri->query, QUERY_PARTS_SEPARATOR, &uri->query_parts, - &uri->num_query_parts); - uri->query_parts_values = - static_cast<char**>(gpr_malloc(uri->num_query_parts * sizeof(char**))); - for (size_t i = 0; i < uri->num_query_parts; i++) { - char** query_param_parts; - size_t num_query_param_parts; - char* full = uri->query_parts[i]; - gpr_string_split(full, QUERY_PARTS_VALUE_SEPARATOR, &query_param_parts, - &num_query_param_parts); - GPR_ASSERT(num_query_param_parts > 0); - uri->query_parts[i] = query_param_parts[0]; - if (num_query_param_parts > 1) { - /* TODO(dgq): only the first value after the separator is considered. - * Perhaps all chars after the first separator for the query part should - * be included, even if they include the separator. */ - uri->query_parts_values[i] = query_param_parts[1]; - } else { - uri->query_parts_values[i] = nullptr; - } - for (size_t j = 2; j < num_query_param_parts; j++) { - gpr_free(query_param_parts[j]); - } - gpr_free(query_param_parts); - gpr_free(full); - } -} - -grpc_uri* grpc_uri_parse(const char* uri_text, bool suppress_errors) { - grpc_uri* uri; - size_t scheme_begin = 0; - size_t scheme_end = NOT_SET; - size_t authority_begin = NOT_SET; - size_t authority_end = NOT_SET; - size_t path_begin = NOT_SET; - size_t path_end = NOT_SET; - size_t query_begin = NOT_SET; - size_t query_end = NOT_SET; - size_t fragment_begin = NOT_SET; - size_t fragment_end = NOT_SET; - size_t i; - - for (i = scheme_begin; uri_text[i] != 0; i++) { - if (uri_text[i] == ':') { - scheme_end = i; - break; - } - if (uri_text[i] >= 'a' && uri_text[i] <= 'z') continue; - if (uri_text[i] >= 'A' && uri_text[i] <= 'Z') continue; - if (i != scheme_begin) { - if (uri_text[i] >= '0' && uri_text[i] <= '9') continue; - if (uri_text[i] == '+') continue; - if (uri_text[i] == '-') continue; - if (uri_text[i] == '.') continue; - } - break; - } - if (scheme_end == NOT_SET) { - return bad_uri(uri_text, i, "scheme", suppress_errors); - } - - if (uri_text[scheme_end + 1] == '/' && uri_text[scheme_end + 2] == '/') { - authority_begin = scheme_end + 3; - for (i = authority_begin; uri_text[i] != 0 && authority_end == NOT_SET; - i++) { - if (uri_text[i] == '/' || uri_text[i] == '?' || uri_text[i] == '#') { - authority_end = i; - } - } - if (authority_end == NOT_SET && uri_text[i] == 0) { - authority_end = i; - } - if (authority_end == NOT_SET) { - return bad_uri(uri_text, i, "authority", suppress_errors); - } - /* TODO(ctiller): parse the authority correctly */ - path_begin = authority_end; - } else { - path_begin = scheme_end + 1; - } - - for (i = path_begin; uri_text[i] != 0; i++) { - if (uri_text[i] == '?' || uri_text[i] == '#') { - path_end = i; - break; - } - } - if (path_end == NOT_SET && uri_text[i] == 0) { - path_end = i; - } - if (path_end == NOT_SET) { - return bad_uri(uri_text, i, "path", suppress_errors); - } - - if (uri_text[i] == '?') { - query_begin = ++i; - if (!parse_fragment_or_query(uri_text, &i)) { - return bad_uri(uri_text, i, "query", suppress_errors); - } else if (uri_text[i] != 0 && uri_text[i] != '#') { - /* We must be at the end or at the beginning of a fragment */ - return bad_uri(uri_text, i, "query", suppress_errors); - } - query_end = i; - } - if (uri_text[i] == '#') { - fragment_begin = ++i; - if (!parse_fragment_or_query(uri_text, &i)) { - return bad_uri(uri_text, i - fragment_end, "fragment", suppress_errors); - } else if (uri_text[i] != 0) { - /* We must be at the end */ - return bad_uri(uri_text, i, "fragment", suppress_errors); - } - fragment_end = i; - } - - uri = static_cast<grpc_uri*>(gpr_zalloc(sizeof(*uri))); - uri->scheme = decode_and_copy_component(uri_text, scheme_begin, scheme_end); - uri->authority = - decode_and_copy_component(uri_text, authority_begin, authority_end); - uri->path = decode_and_copy_component(uri_text, path_begin, path_end); - uri->query = decode_and_copy_component(uri_text, query_begin, query_end); - uri->fragment = - decode_and_copy_component(uri_text, fragment_begin, fragment_end); - parse_query_parts(uri); - - return uri; -} - -const char* grpc_uri_get_query_arg(const grpc_uri* uri, const char* key) { - GPR_ASSERT(key != nullptr); - if (key[0] == '\0') return nullptr; - - for (size_t i = 0; i < uri->num_query_parts; ++i) { - if (0 == strcmp(key, uri->query_parts[i])) { - return uri->query_parts_values[i]; - } - } - return nullptr; -} - -void grpc_uri_destroy(grpc_uri* uri) { - if (!uri) return; - gpr_free(uri->scheme); - gpr_free(uri->authority); - gpr_free(uri->path); - gpr_free(uri->query); - for (size_t i = 0; i < uri->num_query_parts; ++i) { - gpr_free(uri->query_parts[i]); - gpr_free(uri->query_parts_values[i]); - } - gpr_free(uri->query_parts); - gpr_free(uri->query_parts_values); - gpr_free(uri->fragment); - gpr_free(uri); -} diff --git a/src/core/ext/filters/client_channel/uri_parser.h b/src/core/ext/filters/client_channel/uri_parser.h deleted file mode 100644 index d749f23308..0000000000 --- a/src/core/ext/filters/client_channel/uri_parser.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#ifndef GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_URI_PARSER_H -#define GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_URI_PARSER_H - -#include <grpc/support/port_platform.h> - -#include <stddef.h> - -typedef struct { - char* scheme; - char* authority; - char* path; - char* query; - /** Query substrings separated by '&' */ - char** query_parts; - /** Number of elements in \a query_parts and \a query_parts_values */ - size_t num_query_parts; - /** Split each query part by '='. NULL if not present. */ - char** query_parts_values; - char* fragment; -} grpc_uri; - -/** parse a uri, return NULL on failure */ -grpc_uri* grpc_uri_parse(const char* uri_text, bool suppress_errors); - -/** return the part of a query string after the '=' in "?key=xxx&...", or NULL - * if key is not present */ -const char* grpc_uri_get_query_arg(const grpc_uri* uri, const char* key); - -/** destroy a uri */ -void grpc_uri_destroy(grpc_uri* uri); - -#endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_URI_PARSER_H */ diff --git a/src/core/ext/filters/deadline/deadline_filter.cc b/src/core/ext/filters/deadline/deadline_filter.cc index d23ad67ad5..b4cb07f0f9 100644 --- a/src/core/ext/filters/deadline/deadline_filter.cc +++ b/src/core/ext/filters/deadline/deadline_filter.cc @@ -27,6 +27,7 @@ #include <grpc/support/time.h> #include "src/core/lib/channel/channel_stack_builder.h" +#include "src/core/lib/gprpp/memory.h" #include "src/core/lib/iomgr/timer.h" #include "src/core/lib/slice/slice_internal.h" #include "src/core/lib/surface/channel_init.h" @@ -152,7 +153,11 @@ static void inject_recv_trailing_metadata_ready( // Callback and associated state for starting the timer after call stack // initialization has been completed. struct start_timer_after_init_state { - bool in_call_combiner; + start_timer_after_init_state(grpc_call_element* elem, grpc_millis deadline) + : elem(elem), deadline(deadline) {} + ~start_timer_after_init_state() { start_timer_if_needed(elem, deadline); } + + bool in_call_combiner = false; grpc_call_element* elem; grpc_millis deadline; grpc_closure closure; @@ -171,20 +176,16 @@ static void start_timer_after_init(void* arg, grpc_error* error) { "scheduling deadline timer"); return; } - start_timer_if_needed(state->elem, state->deadline); - gpr_free(state); + grpc_core::Delete(state); GRPC_CALL_COMBINER_STOP(deadline_state->call_combiner, "done scheduling deadline timer"); } -void grpc_deadline_state_init(grpc_call_element* elem, - grpc_call_stack* call_stack, - grpc_call_combiner* call_combiner, - grpc_millis deadline) { - grpc_deadline_state* deadline_state = - static_cast<grpc_deadline_state*>(elem->call_data); - deadline_state->call_stack = call_stack; - deadline_state->call_combiner = call_combiner; +grpc_deadline_state::grpc_deadline_state(grpc_call_element* elem, + grpc_call_stack* call_stack, + grpc_call_combiner* call_combiner, + grpc_millis deadline) + : call_stack(call_stack), call_combiner(call_combiner) { // Deadline will always be infinite on servers, so the timer will only be // set on clients with a finite deadline. if (deadline != GRPC_MILLIS_INF_FUTURE) { @@ -196,21 +197,14 @@ void grpc_deadline_state_init(grpc_call_element* elem, // create a closure to start the timer, and we schedule that closure // to be run after call stack initialization is done. struct start_timer_after_init_state* state = - static_cast<struct start_timer_after_init_state*>( - gpr_zalloc(sizeof(*state))); - state->elem = elem; - state->deadline = deadline; + grpc_core::New<start_timer_after_init_state>(elem, deadline); GRPC_CLOSURE_INIT(&state->closure, start_timer_after_init, state, grpc_schedule_on_exec_ctx); GRPC_CLOSURE_SCHED(&state->closure, GRPC_ERROR_NONE); } } -void grpc_deadline_state_destroy(grpc_call_element* elem) { - grpc_deadline_state* deadline_state = - static_cast<grpc_deadline_state*>(elem->call_data); - cancel_timer_if_needed(deadline_state); -} +grpc_deadline_state::~grpc_deadline_state() { cancel_timer_if_needed(this); } void grpc_deadline_state_reset(grpc_call_element* elem, grpc_millis new_deadline) { @@ -269,8 +263,8 @@ typedef struct server_call_data { // Constructor for call_data. Used for both client and server filters. static grpc_error* init_call_elem(grpc_call_element* elem, const grpc_call_element_args* args) { - grpc_deadline_state_init(elem, args->call_stack, args->call_combiner, - args->deadline); + new (elem->call_data) grpc_deadline_state( + elem, args->call_stack, args->call_combiner, args->deadline); return GRPC_ERROR_NONE; } @@ -278,7 +272,9 @@ static grpc_error* init_call_elem(grpc_call_element* elem, static void destroy_call_elem(grpc_call_element* elem, const grpc_call_final_info* final_info, grpc_closure* ignored) { - grpc_deadline_state_destroy(elem); + grpc_deadline_state* deadline_state = + static_cast<grpc_deadline_state*>(elem->call_data); + deadline_state->~grpc_deadline_state(); } // Method for starting a call op for client filter. diff --git a/src/core/ext/filters/deadline/deadline_filter.h b/src/core/ext/filters/deadline/deadline_filter.h index 1d797f445a..e37032999c 100644 --- a/src/core/ext/filters/deadline/deadline_filter.h +++ b/src/core/ext/filters/deadline/deadline_filter.h @@ -22,19 +22,23 @@ #include "src/core/lib/channel/channel_stack.h" #include "src/core/lib/iomgr/timer.h" -typedef enum grpc_deadline_timer_state { +enum grpc_deadline_timer_state { GRPC_DEADLINE_STATE_INITIAL, GRPC_DEADLINE_STATE_PENDING, GRPC_DEADLINE_STATE_FINISHED -} grpc_deadline_timer_state; +}; // State used for filters that enforce call deadlines. // Must be the first field in the filter's call_data. -typedef struct grpc_deadline_state { +struct grpc_deadline_state { + grpc_deadline_state(grpc_call_element* elem, grpc_call_stack* call_stack, + grpc_call_combiner* call_combiner, grpc_millis deadline); + ~grpc_deadline_state(); + // We take a reference to the call stack for the timer callback. grpc_call_stack* call_stack; grpc_call_combiner* call_combiner; - grpc_deadline_timer_state timer_state; + grpc_deadline_timer_state timer_state = GRPC_DEADLINE_STATE_INITIAL; grpc_timer timer; grpc_closure timer_callback; // Closure to invoke when we receive trailing metadata. @@ -43,21 +47,13 @@ typedef struct grpc_deadline_state { // The original recv_trailing_metadata_ready closure, which we chain to // after our own closure is invoked. grpc_closure* original_recv_trailing_metadata_ready; -} grpc_deadline_state; +}; // // NOTE: All of these functions require that the first field in // elem->call_data is a grpc_deadline_state. // -// assumes elem->call_data is zero'd -void grpc_deadline_state_init(grpc_call_element* elem, - grpc_call_stack* call_stack, - grpc_call_combiner* call_combiner, - grpc_millis deadline); - -void grpc_deadline_state_destroy(grpc_call_element* elem); - // Cancels the existing timer and starts a new one with new_deadline. // // Note: It is generally safe to call this with an earlier deadline diff --git a/src/core/ext/filters/http/client/http_client_filter.cc b/src/core/ext/filters/http/client/http_client_filter.cc index 1678051beb..bf9a01f659 100644 --- a/src/core/ext/filters/http/client/http_client_filter.cc +++ b/src/core/ext/filters/http/client/http_client_filter.cc @@ -37,10 +37,31 @@ #define EXPECTED_CONTENT_TYPE_LENGTH sizeof(EXPECTED_CONTENT_TYPE) - 1 /* default maximum size of payload eligable for GET request */ -static const size_t kMaxPayloadSizeForGet = 2048; +static constexpr size_t kMaxPayloadSizeForGet = 2048; + +static void recv_initial_metadata_ready(void* user_data, grpc_error* error); +static void recv_trailing_metadata_ready(void* user_data, grpc_error* error); +static void on_send_message_next_done(void* arg, grpc_error* error); +static void send_message_on_complete(void* arg, grpc_error* error); namespace { struct call_data { + call_data(grpc_call_element* elem, const grpc_call_element_args& args) + : call_combiner(args.call_combiner) { + GRPC_CLOSURE_INIT(&recv_initial_metadata_ready, + ::recv_initial_metadata_ready, elem, + grpc_schedule_on_exec_ctx); + GRPC_CLOSURE_INIT(&recv_trailing_metadata_ready, + ::recv_trailing_metadata_ready, elem, + grpc_schedule_on_exec_ctx); + GRPC_CLOSURE_INIT(&on_send_message_next_done, ::on_send_message_next_done, + elem, grpc_schedule_on_exec_ctx); + GRPC_CLOSURE_INIT(&send_message_on_complete, ::send_message_on_complete, + elem, grpc_schedule_on_exec_ctx); + } + + ~call_data() { GRPC_ERROR_UNREF(recv_initial_metadata_error); } + grpc_call_combiner* call_combiner; // State for handling send_initial_metadata ops. grpc_linked_mdelem method; @@ -51,15 +72,18 @@ struct call_data { grpc_linked_mdelem user_agent; // State for handling recv_initial_metadata ops. grpc_metadata_batch* recv_initial_metadata; - grpc_closure* original_recv_initial_metadata_ready; + grpc_error* recv_initial_metadata_error = GRPC_ERROR_NONE; + grpc_closure* original_recv_initial_metadata_ready = nullptr; grpc_closure recv_initial_metadata_ready; // State for handling recv_trailing_metadata ops. grpc_metadata_batch* recv_trailing_metadata; grpc_closure* original_recv_trailing_metadata_ready; grpc_closure recv_trailing_metadata_ready; + grpc_error* recv_trailing_metadata_error = GRPC_ERROR_NONE; + bool seen_recv_trailing_metadata_ready = false; // State for handling send_message ops. grpc_transport_stream_op_batch* send_message_batch; - size_t send_message_bytes_read; + size_t send_message_bytes_read = 0; grpc_core::ManualConstructor<grpc_core::ByteStreamCache> send_message_cache; grpc_core::ManualConstructor<grpc_core::ByteStreamCache::CachingByteStream> send_message_caching_stream; @@ -78,7 +102,12 @@ struct channel_data { static grpc_error* client_filter_incoming_metadata(grpc_call_element* elem, grpc_metadata_batch* b) { if (b->idx.named.status != nullptr) { - if (grpc_mdelem_eq(b->idx.named.status->md, GRPC_MDELEM_STATUS_200)) { + /* If both gRPC status and HTTP status are provided in the response, we + * should prefer the gRPC status code, as mentioned in + * https://github.com/grpc/grpc/blob/master/doc/http-grpc-status-mapping.md. + */ + if (b->idx.named.grpc_status != nullptr || + grpc_mdelem_eq(b->idx.named.status->md, GRPC_MDELEM_STATUS_200)) { grpc_metadata_batch_remove(b, b->idx.named.status); } else { char* val = grpc_dump_slice(GRPC_MDVALUE(b->idx.named.status->md), @@ -147,21 +176,39 @@ static void recv_initial_metadata_ready(void* user_data, grpc_error* error) { call_data* calld = static_cast<call_data*>(elem->call_data); if (error == GRPC_ERROR_NONE) { error = client_filter_incoming_metadata(elem, calld->recv_initial_metadata); + calld->recv_initial_metadata_error = GRPC_ERROR_REF(error); } else { GRPC_ERROR_REF(error); } - GRPC_CLOSURE_RUN(calld->original_recv_initial_metadata_ready, error); + grpc_closure* closure = calld->original_recv_initial_metadata_ready; + calld->original_recv_initial_metadata_ready = nullptr; + if (calld->seen_recv_trailing_metadata_ready) { + GRPC_CALL_COMBINER_START( + calld->call_combiner, &calld->recv_trailing_metadata_ready, + calld->recv_trailing_metadata_error, "continue recv_trailing_metadata"); + } + GRPC_CLOSURE_RUN(closure, error); } static void recv_trailing_metadata_ready(void* user_data, grpc_error* error) { grpc_call_element* elem = static_cast<grpc_call_element*>(user_data); call_data* calld = static_cast<call_data*>(elem->call_data); + if (calld->original_recv_initial_metadata_ready != nullptr) { + calld->recv_trailing_metadata_error = GRPC_ERROR_REF(error); + calld->seen_recv_trailing_metadata_ready = true; + GRPC_CALL_COMBINER_STOP(calld->call_combiner, + "deferring recv_trailing_metadata_ready until " + "after recv_initial_metadata_ready"); + return; + } if (error == GRPC_ERROR_NONE) { error = client_filter_incoming_metadata(elem, calld->recv_trailing_metadata); } else { GRPC_ERROR_REF(error); } + error = grpc_error_add_child( + error, GRPC_ERROR_REF(calld->recv_initial_metadata_error)); GRPC_CLOSURE_RUN(calld->original_recv_trailing_metadata_ready, error); } @@ -416,25 +463,17 @@ done: /* Constructor for call_data */ static grpc_error* init_call_elem(grpc_call_element* elem, const grpc_call_element_args* args) { - call_data* calld = static_cast<call_data*>(elem->call_data); - calld->call_combiner = args->call_combiner; - GRPC_CLOSURE_INIT(&calld->recv_initial_metadata_ready, - recv_initial_metadata_ready, elem, - grpc_schedule_on_exec_ctx); - GRPC_CLOSURE_INIT(&calld->recv_trailing_metadata_ready, - recv_trailing_metadata_ready, elem, - grpc_schedule_on_exec_ctx); - GRPC_CLOSURE_INIT(&calld->send_message_on_complete, send_message_on_complete, - elem, grpc_schedule_on_exec_ctx); - GRPC_CLOSURE_INIT(&calld->on_send_message_next_done, - on_send_message_next_done, elem, grpc_schedule_on_exec_ctx); + new (elem->call_data) call_data(elem, *args); return GRPC_ERROR_NONE; } /* Destructor for call_data */ static void destroy_call_elem(grpc_call_element* elem, const grpc_call_final_info* final_info, - grpc_closure* ignored) {} + grpc_closure* ignored) { + call_data* calld = static_cast<call_data*>(elem->call_data); + calld->~call_data(); +} static grpc_mdelem scheme_from_args(const grpc_channel_args* args) { unsigned i; diff --git a/src/core/ext/filters/http/client_authority_filter.cc b/src/core/ext/filters/http/client_authority_filter.cc index 1ca20ebb26..6383f12594 100644 --- a/src/core/ext/filters/http/client_authority_filter.cc +++ b/src/core/ext/filters/http/client_authority_filter.cc @@ -59,9 +59,8 @@ void authority_start_transport_stream_op_batch( initial_metadata->idx.named.authority == nullptr) { grpc_error* error = grpc_metadata_batch_add_head( initial_metadata, &calld->authority_storage, - grpc_mdelem_from_slices( - GRPC_MDSTR_AUTHORITY, - grpc_slice_ref_internal(chand->default_authority))); + grpc_mdelem_create(GRPC_MDSTR_AUTHORITY, chand->default_authority, + nullptr)); if (error != GRPC_ERROR_NONE) { grpc_transport_stream_op_batch_finish_with_failure(batch, error, calld->call_combiner); diff --git a/src/core/ext/filters/http/message_compress/message_compress_filter.cc b/src/core/ext/filters/http/message_compress/message_compress_filter.cc index 933fe3c77b..9c8c8d9e18 100644 --- a/src/core/ext/filters/http/message_compress/message_compress_filter.cc +++ b/src/core/ext/filters/http/message_compress/message_compress_filter.cc @@ -39,6 +39,10 @@ #include "src/core/lib/surface/call.h" #include "src/core/lib/transport/static_metadata.h" +static void start_send_message_batch(void* arg, grpc_error* unused); +static void send_message_on_complete(void* arg, grpc_error* error); +static void on_send_message_next_done(void* arg, grpc_error* error); + namespace { enum initial_metadata_state { // Initial metadata not yet seen. @@ -50,6 +54,23 @@ enum initial_metadata_state { }; struct call_data { + call_data(grpc_call_element* elem, const grpc_call_element_args& args) + : call_combiner(args.call_combiner) { + GRPC_CLOSURE_INIT(&start_send_message_batch_in_call_combiner, + start_send_message_batch, elem, + grpc_schedule_on_exec_ctx); + grpc_slice_buffer_init(&slices); + GRPC_CLOSURE_INIT(&send_message_on_complete, ::send_message_on_complete, + elem, grpc_schedule_on_exec_ctx); + GRPC_CLOSURE_INIT(&on_send_message_next_done, ::on_send_message_next_done, + elem, grpc_schedule_on_exec_ctx); + } + + ~call_data() { + grpc_slice_buffer_destroy_internal(&slices); + GRPC_ERROR_UNREF(cancel_error); + } + grpc_call_combiner* call_combiner; grpc_linked_mdelem compression_algorithm_storage; grpc_linked_mdelem stream_compression_algorithm_storage; @@ -57,11 +78,12 @@ struct call_data { grpc_linked_mdelem accept_stream_encoding_storage; /** Compression algorithm we'll try to use. It may be given by incoming * metadata, or by the channel's default compression settings. */ - grpc_message_compression_algorithm message_compression_algorithm; - initial_metadata_state send_initial_metadata_state; - grpc_error* cancel_error; + grpc_message_compression_algorithm message_compression_algorithm = + GRPC_MESSAGE_COMPRESS_NONE; + initial_metadata_state send_initial_metadata_state = INITIAL_METADATA_UNSEEN; + grpc_error* cancel_error = GRPC_ERROR_NONE; grpc_closure start_send_message_batch_in_call_combiner; - grpc_transport_stream_op_batch* send_message_batch; + grpc_transport_stream_op_batch* send_message_batch = nullptr; grpc_slice_buffer slices; /**< Buffers up input slices to be compressed */ grpc_core::ManualConstructor<grpc_core::SliceBufferByteStream> replacement_stream; @@ -424,16 +446,7 @@ static void compress_start_transport_stream_op_batch( /* Constructor for call_data */ static grpc_error* init_call_elem(grpc_call_element* elem, const grpc_call_element_args* args) { - call_data* calld = static_cast<call_data*>(elem->call_data); - calld->call_combiner = args->call_combiner; - calld->cancel_error = GRPC_ERROR_NONE; - grpc_slice_buffer_init(&calld->slices); - GRPC_CLOSURE_INIT(&calld->start_send_message_batch_in_call_combiner, - start_send_message_batch, elem, grpc_schedule_on_exec_ctx); - GRPC_CLOSURE_INIT(&calld->on_send_message_next_done, - on_send_message_next_done, elem, grpc_schedule_on_exec_ctx); - GRPC_CLOSURE_INIT(&calld->send_message_on_complete, send_message_on_complete, - elem, grpc_schedule_on_exec_ctx); + new (elem->call_data) call_data(elem, *args); return GRPC_ERROR_NONE; } @@ -442,8 +455,7 @@ static void destroy_call_elem(grpc_call_element* elem, const grpc_call_final_info* final_info, grpc_closure* ignored) { call_data* calld = static_cast<call_data*>(elem->call_data); - grpc_slice_buffer_destroy_internal(&calld->slices); - GRPC_ERROR_UNREF(calld->cancel_error); + calld->~call_data(); } /* Constructor for channel_data */ diff --git a/src/core/ext/filters/http/server/http_server_filter.cc b/src/core/ext/filters/http/server/http_server_filter.cc index 3919447f26..ce1be8370c 100644 --- a/src/core/ext/filters/http/server/http_server_filter.cc +++ b/src/core/ext/filters/http/server/http_server_filter.cc @@ -23,6 +23,7 @@ #include <grpc/support/alloc.h> #include <grpc/support/log.h> #include <string.h> +#include "src/core/lib/channel/channel_args.h" #include "src/core/lib/gprpp/manual_constructor.h" #include "src/core/lib/profiling/timers.h" #include "src/core/lib/slice/b64.h" @@ -34,9 +35,32 @@ #define EXPECTED_CONTENT_TYPE "application/grpc" #define EXPECTED_CONTENT_TYPE_LENGTH sizeof(EXPECTED_CONTENT_TYPE) - 1 +static void hs_recv_initial_metadata_ready(void* user_data, grpc_error* err); +static void hs_recv_trailing_metadata_ready(void* user_data, grpc_error* err); +static void hs_recv_message_ready(void* user_data, grpc_error* err); + namespace { struct call_data { + call_data(grpc_call_element* elem, const grpc_call_element_args& args) + : call_combiner(args.call_combiner) { + GRPC_CLOSURE_INIT(&recv_initial_metadata_ready, + hs_recv_initial_metadata_ready, elem, + grpc_schedule_on_exec_ctx); + GRPC_CLOSURE_INIT(&recv_message_ready, hs_recv_message_ready, elem, + grpc_schedule_on_exec_ctx); + GRPC_CLOSURE_INIT(&recv_trailing_metadata_ready, + hs_recv_trailing_metadata_ready, elem, + grpc_schedule_on_exec_ctx); + } + + ~call_data() { + GRPC_ERROR_UNREF(recv_initial_metadata_ready_error); + if (have_read_stream) { + read_stream->Orphan(); + } + } + grpc_call_combiner* call_combiner; // Outgoing headers to add to send_initial_metadata. @@ -46,20 +70,31 @@ struct call_data { // If we see the recv_message contents in the GET query string, we // store it here. grpc_core::ManualConstructor<grpc_core::SliceBufferByteStream> read_stream; - bool have_read_stream; + bool have_read_stream = false; // State for intercepting recv_initial_metadata. grpc_closure recv_initial_metadata_ready; + grpc_error* recv_initial_metadata_ready_error = GRPC_ERROR_NONE; grpc_closure* original_recv_initial_metadata_ready; - grpc_metadata_batch* recv_initial_metadata; + grpc_metadata_batch* recv_initial_metadata = nullptr; uint32_t* recv_initial_metadata_flags; - bool seen_recv_initial_metadata_ready; + bool seen_recv_initial_metadata_ready = false; // State for intercepting recv_message. grpc_closure* original_recv_message_ready; grpc_closure recv_message_ready; grpc_core::OrphanablePtr<grpc_core::ByteStream>* recv_message; - bool seen_recv_message_ready; + bool seen_recv_message_ready = false; + + // State for intercepting recv_trailing_metadata + grpc_closure recv_trailing_metadata_ready; + grpc_closure* original_recv_trailing_metadata_ready; + grpc_error* recv_trailing_metadata_ready_error; + bool seen_recv_trailing_metadata_ready = false; +}; + +struct channel_data { + bool surface_user_agent; }; } // namespace @@ -258,6 +293,11 @@ static grpc_error* hs_filter_incoming_metadata(grpc_call_element* elem, GRPC_ERROR_STR_KEY, grpc_slice_from_static_string(":authority"))); } + channel_data* chand = static_cast<channel_data*>(elem->channel_data); + if (!chand->surface_user_agent && b->idx.named.user_agent != nullptr) { + grpc_metadata_batch_remove(b, b->idx.named.user_agent); + } + return error; } @@ -267,6 +307,7 @@ static void hs_recv_initial_metadata_ready(void* user_data, grpc_error* err) { calld->seen_recv_initial_metadata_ready = true; if (err == GRPC_ERROR_NONE) { err = hs_filter_incoming_metadata(elem, calld->recv_initial_metadata); + calld->recv_initial_metadata_ready_error = GRPC_ERROR_REF(err); if (calld->seen_recv_message_ready) { // We've already seen the recv_message callback, but we previously // deferred it, so we need to return it here. @@ -286,6 +327,13 @@ static void hs_recv_initial_metadata_ready(void* user_data, grpc_error* err) { } else { GRPC_ERROR_REF(err); } + if (calld->seen_recv_trailing_metadata_ready) { + GRPC_CALL_COMBINER_START(calld->call_combiner, + &calld->recv_trailing_metadata_ready, + calld->recv_trailing_metadata_ready_error, + "resuming hs_recv_trailing_metadata_ready from " + "hs_recv_initial_metadata_ready"); + } GRPC_CLOSURE_RUN(calld->original_recv_initial_metadata_ready, err); } @@ -313,6 +361,23 @@ static void hs_recv_message_ready(void* user_data, grpc_error* err) { } } +static void hs_recv_trailing_metadata_ready(void* user_data, grpc_error* err) { + grpc_call_element* elem = static_cast<grpc_call_element*>(user_data); + call_data* calld = static_cast<call_data*>(elem->call_data); + if (!calld->seen_recv_initial_metadata_ready) { + calld->recv_trailing_metadata_ready_error = GRPC_ERROR_REF(err); + calld->seen_recv_trailing_metadata_ready = true; + GRPC_CALL_COMBINER_STOP(calld->call_combiner, + "deferring hs_recv_trailing_metadata_ready until " + "ater hs_recv_initial_metadata_ready"); + return; + } + err = grpc_error_add_child( + GRPC_ERROR_REF(err), + GRPC_ERROR_REF(calld->recv_initial_metadata_ready_error)); + GRPC_CLOSURE_RUN(calld->original_recv_trailing_metadata_ready, err); +} + static grpc_error* hs_mutate_op(grpc_call_element* elem, grpc_transport_stream_op_batch* op) { /* grab pointers to our data from the call element */ @@ -357,6 +422,13 @@ static grpc_error* hs_mutate_op(grpc_call_element* elem, op->payload->recv_message.recv_message_ready = &calld->recv_message_ready; } + if (op->recv_trailing_metadata) { + calld->original_recv_trailing_metadata_ready = + op->payload->recv_trailing_metadata.recv_trailing_metadata_ready; + op->payload->recv_trailing_metadata.recv_trailing_metadata_ready = + &calld->recv_trailing_metadata_ready; + } + if (op->send_trailing_metadata) { grpc_error* error = hs_filter_outgoing_metadata( elem, op->payload->send_trailing_metadata.send_trailing_metadata); @@ -382,13 +454,7 @@ static void hs_start_transport_stream_op_batch( /* Constructor for call_data */ static grpc_error* hs_init_call_elem(grpc_call_element* elem, const grpc_call_element_args* args) { - call_data* calld = static_cast<call_data*>(elem->call_data); - calld->call_combiner = args->call_combiner; - GRPC_CLOSURE_INIT(&calld->recv_initial_metadata_ready, - hs_recv_initial_metadata_ready, elem, - grpc_schedule_on_exec_ctx); - GRPC_CLOSURE_INIT(&calld->recv_message_ready, hs_recv_message_ready, elem, - grpc_schedule_on_exec_ctx); + new (elem->call_data) call_data(elem, *args); return GRPC_ERROR_NONE; } @@ -397,15 +463,18 @@ static void hs_destroy_call_elem(grpc_call_element* elem, const grpc_call_final_info* final_info, grpc_closure* ignored) { call_data* calld = static_cast<call_data*>(elem->call_data); - if (calld->have_read_stream) { - calld->read_stream->Orphan(); - } + calld->~call_data(); } /* Constructor for channel_data */ static grpc_error* hs_init_channel_elem(grpc_channel_element* elem, grpc_channel_element_args* args) { + channel_data* chand = static_cast<channel_data*>(elem->channel_data); GPR_ASSERT(!args->is_last); + chand->surface_user_agent = grpc_channel_arg_get_bool( + grpc_channel_args_find(args->channel_args, + const_cast<char*>(GRPC_ARG_SURFACE_USER_AGENT)), + true); return GRPC_ERROR_NONE; } @@ -419,7 +488,7 @@ const grpc_channel_filter grpc_http_server_filter = { hs_init_call_elem, grpc_call_stack_ignore_set_pollset_or_pollset_set, hs_destroy_call_elem, - 0, + sizeof(channel_data), hs_init_channel_elem, hs_destroy_channel_elem, grpc_channel_next_get_info, diff --git a/src/core/ext/filters/load_reporting/server_load_reporting_filter.cc b/src/core/ext/filters/load_reporting/server_load_reporting_filter.cc index 8ac34c629f..6a7231ff7d 100644 --- a/src/core/ext/filters/load_reporting/server_load_reporting_filter.cc +++ b/src/core/ext/filters/load_reporting/server_load_reporting_filter.cc @@ -25,7 +25,6 @@ #include <grpc/support/string_util.h> #include "src/core/ext/filters/client_channel/parse_address.h" -#include "src/core/ext/filters/client_channel/uri_parser.h" #include "src/core/ext/filters/load_reporting/registered_opencensus_objects.h" #include "src/core/ext/filters/load_reporting/server_load_reporting_filter.h" #include "src/core/lib/channel/channel_args.h" @@ -36,6 +35,7 @@ #include "src/core/lib/security/context/security_context.h" #include "src/core/lib/slice/slice_internal.h" #include "src/core/lib/surface/call.h" +#include "src/core/lib/uri/uri_parser.h" namespace grpc { diff --git a/src/core/ext/filters/max_age/max_age_filter.cc b/src/core/ext/filters/max_age/max_age_filter.cc index 1fe8288bd0..431472609e 100644 --- a/src/core/ext/filters/max_age/max_age_filter.cc +++ b/src/core/ext/filters/max_age/max_age_filter.cc @@ -429,8 +429,7 @@ static grpc_error* init_channel_elem(grpc_channel_element* elem, ? GRPC_MILLIS_INF_FUTURE : DEFAULT_MAX_CONNECTION_IDLE_MS; chand->idle_state = MAX_IDLE_STATE_INIT; - gpr_atm_no_barrier_store(&chand->last_enter_idle_time_millis, - GRPC_MILLIS_INF_PAST); + gpr_atm_no_barrier_store(&chand->last_enter_idle_time_millis, GPR_ATM_MIN); for (size_t i = 0; i < args->channel_args->num_args; ++i) { if (0 == strcmp(args->channel_args->args[i].key, GRPC_ARG_MAX_CONNECTION_AGE_MS)) { diff --git a/src/core/ext/filters/message_size/message_size_filter.cc b/src/core/ext/filters/message_size/message_size_filter.cc index c7fc3f2e62..94d6942aa4 100644 --- a/src/core/ext/filters/message_size/message_size_filter.cc +++ b/src/core/ext/filters/message_size/message_size_filter.cc @@ -90,27 +90,70 @@ RefCountedPtr<MessageSizeLimits> MessageSizeLimits::CreateFromJson( } // namespace } // namespace grpc_core +static void recv_message_ready(void* user_data, grpc_error* error); +static void recv_trailing_metadata_ready(void* user_data, grpc_error* error); + namespace { +struct channel_data { + message_size_limits limits; + // Maps path names to refcounted_message_size_limits structs. + grpc_core::RefCountedPtr<grpc_core::SliceHashTable< + grpc_core::RefCountedPtr<grpc_core::MessageSizeLimits>>> + method_limit_table; +}; + struct call_data { + call_data(grpc_call_element* elem, const channel_data& chand, + const grpc_call_element_args& args) + : call_combiner(args.call_combiner), limits(chand.limits) { + GRPC_CLOSURE_INIT(&recv_message_ready, ::recv_message_ready, elem, + grpc_schedule_on_exec_ctx); + GRPC_CLOSURE_INIT(&recv_trailing_metadata_ready, + ::recv_trailing_metadata_ready, elem, + grpc_schedule_on_exec_ctx); + // Get max sizes from channel data, then merge in per-method config values. + // Note: Per-method config is only available on the client, so we + // apply the max request size to the send limit and the max response + // size to the receive limit. + if (chand.method_limit_table != nullptr) { + grpc_core::RefCountedPtr<grpc_core::MessageSizeLimits> limits = + grpc_core::ServiceConfig::MethodConfigTableLookup( + *chand.method_limit_table, args.path); + if (limits != nullptr) { + if (limits->limits().max_send_size >= 0 && + (limits->limits().max_send_size < this->limits.max_send_size || + this->limits.max_send_size < 0)) { + this->limits.max_send_size = limits->limits().max_send_size; + } + if (limits->limits().max_recv_size >= 0 && + (limits->limits().max_recv_size < this->limits.max_recv_size || + this->limits.max_recv_size < 0)) { + this->limits.max_recv_size = limits->limits().max_recv_size; + } + } + } + } + + ~call_data() { GRPC_ERROR_UNREF(error); } + grpc_call_combiner* call_combiner; message_size_limits limits; // Receive closures are chained: we inject this closure as the // recv_message_ready up-call on transport_stream_op, and remember to // call our next_recv_message_ready member after handling it. grpc_closure recv_message_ready; + grpc_closure recv_trailing_metadata_ready; + // The error caused by a message that is too large, or GRPC_ERROR_NONE + grpc_error* error = GRPC_ERROR_NONE; // Used by recv_message_ready. - grpc_core::OrphanablePtr<grpc_core::ByteStream>* recv_message; + grpc_core::OrphanablePtr<grpc_core::ByteStream>* recv_message = nullptr; // Original recv_message_ready callback, invoked after our own. - grpc_closure* next_recv_message_ready; -}; - -struct channel_data { - message_size_limits limits; - // Maps path names to refcounted_message_size_limits structs. - grpc_core::RefCountedPtr<grpc_core::SliceHashTable< - grpc_core::RefCountedPtr<grpc_core::MessageSizeLimits>>> - method_limit_table; + grpc_closure* next_recv_message_ready = nullptr; + // Original recv_trailing_metadata callback, invoked after our own. + grpc_closure* original_recv_trailing_metadata_ready; + bool seen_recv_trailing_metadata = false; + grpc_error* recv_trailing_metadata_error; }; } // namespace @@ -130,18 +173,52 @@ static void recv_message_ready(void* user_data, grpc_error* error) { grpc_error* new_error = grpc_error_set_int( GRPC_ERROR_CREATE_FROM_COPIED_STRING(message_string), GRPC_ERROR_INT_GRPC_STATUS, GRPC_STATUS_RESOURCE_EXHAUSTED); + GRPC_ERROR_UNREF(calld->error); if (error == GRPC_ERROR_NONE) { error = new_error; } else { error = grpc_error_add_child(error, new_error); - GRPC_ERROR_UNREF(new_error); } + calld->error = GRPC_ERROR_REF(error); gpr_free(message_string); } else { GRPC_ERROR_REF(error); } // Invoke the next callback. - GRPC_CLOSURE_RUN(calld->next_recv_message_ready, error); + grpc_closure* closure = calld->next_recv_message_ready; + calld->next_recv_message_ready = nullptr; + if (calld->seen_recv_trailing_metadata) { + /* We might potentially see another RECV_MESSAGE op. In that case, we do not + * want to run the recv_trailing_metadata_ready closure again. The newer + * RECV_MESSAGE op cannot cause any errors since the transport has already + * invoked the recv_trailing_metadata_ready closure and all further + * RECV_MESSAGE ops will get null payloads. */ + calld->seen_recv_trailing_metadata = false; + GRPC_CALL_COMBINER_START(calld->call_combiner, + &calld->recv_trailing_metadata_ready, + calld->recv_trailing_metadata_error, + "continue recv_trailing_metadata_ready"); + } + GRPC_CLOSURE_RUN(closure, error); +} + +// Callback invoked on completion of recv_trailing_metadata +// Notifies the recv_trailing_metadata batch of any message size failures +static void recv_trailing_metadata_ready(void* user_data, grpc_error* error) { + grpc_call_element* elem = static_cast<grpc_call_element*>(user_data); + call_data* calld = static_cast<call_data*>(elem->call_data); + if (calld->next_recv_message_ready != nullptr) { + calld->seen_recv_trailing_metadata = true; + calld->recv_trailing_metadata_error = GRPC_ERROR_REF(error); + GRPC_CALL_COMBINER_STOP(calld->call_combiner, + "deferring recv_trailing_metadata_ready until " + "after recv_message_ready"); + return; + } + error = + grpc_error_add_child(GRPC_ERROR_REF(error), GRPC_ERROR_REF(calld->error)); + // Invoke the next callback. + GRPC_CLOSURE_RUN(calld->original_recv_trailing_metadata_ready, error); } // Start transport stream op. @@ -172,6 +249,13 @@ static void start_transport_stream_op_batch( calld->recv_message = op->payload->recv_message.recv_message; op->payload->recv_message.recv_message_ready = &calld->recv_message_ready; } + // Inject callback for receiving trailing metadata. + if (op->recv_trailing_metadata) { + calld->original_recv_trailing_metadata_ready = + op->payload->recv_trailing_metadata.recv_trailing_metadata_ready; + op->payload->recv_trailing_metadata.recv_trailing_metadata_ready = + &calld->recv_trailing_metadata_ready; + } // Chain to the next filter. grpc_call_next_op(elem, op); } @@ -180,40 +264,17 @@ static void start_transport_stream_op_batch( static grpc_error* init_call_elem(grpc_call_element* elem, const grpc_call_element_args* args) { channel_data* chand = static_cast<channel_data*>(elem->channel_data); - call_data* calld = static_cast<call_data*>(elem->call_data); - calld->call_combiner = args->call_combiner; - calld->next_recv_message_ready = nullptr; - GRPC_CLOSURE_INIT(&calld->recv_message_ready, recv_message_ready, elem, - grpc_schedule_on_exec_ctx); - // Get max sizes from channel data, then merge in per-method config values. - // Note: Per-method config is only available on the client, so we - // apply the max request size to the send limit and the max response - // size to the receive limit. - calld->limits = chand->limits; - if (chand->method_limit_table != nullptr) { - grpc_core::RefCountedPtr<grpc_core::MessageSizeLimits> limits = - grpc_core::ServiceConfig::MethodConfigTableLookup( - *chand->method_limit_table, args->path); - if (limits != nullptr) { - if (limits->limits().max_send_size >= 0 && - (limits->limits().max_send_size < calld->limits.max_send_size || - calld->limits.max_send_size < 0)) { - calld->limits.max_send_size = limits->limits().max_send_size; - } - if (limits->limits().max_recv_size >= 0 && - (limits->limits().max_recv_size < calld->limits.max_recv_size || - calld->limits.max_recv_size < 0)) { - calld->limits.max_recv_size = limits->limits().max_recv_size; - } - } - } + new (elem->call_data) call_data(elem, *chand, *args); return GRPC_ERROR_NONE; } // Destructor for call_data. static void destroy_call_elem(grpc_call_element* elem, const grpc_call_final_info* final_info, - grpc_closure* ignored) {} + grpc_closure* ignored) { + call_data* calld = (call_data*)elem->call_data; + calld->~call_data(); +} static int default_size(const grpc_channel_args* args, int without_minimal_stack) { diff --git a/src/core/ext/transport/chttp2/client/chttp2_connector.cc b/src/core/ext/transport/chttp2/client/chttp2_connector.cc index e7522ffba8..60a32022f5 100644 --- a/src/core/ext/transport/chttp2/client/chttp2_connector.cc +++ b/src/core/ext/transport/chttp2/client/chttp2_connector.cc @@ -117,6 +117,8 @@ static void on_handshake_done(void* arg, grpc_error* error) { c->args.interested_parties); c->result->transport = grpc_create_chttp2_transport(args->args, args->endpoint, true); + c->result->socket_uuid = + grpc_chttp2_transport_get_socket_uuid(c->result->transport); GPR_ASSERT(c->result->transport); // TODO(roth): We ideally want to wait until we receive HTTP/2 // settings from the server before we consider the connection @@ -158,12 +160,11 @@ static void on_handshake_done(void* arg, grpc_error* error) { static void start_handshake_locked(chttp2_connector* c) { c->handshake_mgr = grpc_handshake_manager_create(); grpc_handshakers_add(HANDSHAKER_CLIENT, c->args.channel_args, - c->handshake_mgr); + c->args.interested_parties, c->handshake_mgr); grpc_endpoint_add_to_pollset_set(c->endpoint, c->args.interested_parties); grpc_handshake_manager_do_handshake( - c->handshake_mgr, c->args.interested_parties, c->endpoint, - c->args.channel_args, c->args.deadline, nullptr /* acceptor */, - on_handshake_done, c); + c->handshake_mgr, c->endpoint, c->args.channel_args, c->args.deadline, + nullptr /* acceptor */, on_handshake_done, c); c->endpoint = nullptr; // Endpoint handed off to handshake manager. } @@ -211,9 +212,17 @@ static void chttp2_connector_connect(grpc_connector* con, GRPC_CLOSURE_INIT(&c->connected, connected, c, grpc_schedule_on_exec_ctx); GPR_ASSERT(!c->connecting); c->connecting = true; - grpc_tcp_client_connect(&c->connected, &c->endpoint, args->interested_parties, - args->channel_args, &addr, args->deadline); + grpc_closure* closure = &c->connected; + grpc_endpoint** ep = &c->endpoint; gpr_mu_unlock(&c->mu); + // In some implementations, the closure can be flushed before + // grpc_tcp_client_connect and since the closure requires access to c->mu, + // this can result in a deadlock. Refer + // https://github.com/grpc/grpc/issues/16427 + // grpc_tcp_client_connect would fill c->endpoint with proper contents and we + // make sure that we would still exist at that point by taking a ref. + grpc_tcp_client_connect(closure, ep, args->interested_parties, + args->channel_args, &addr, args->deadline); } static const grpc_connector_vtable chttp2_connector_vtable = { diff --git a/src/core/ext/transport/chttp2/client/secure/secure_channel_create.cc b/src/core/ext/transport/chttp2/client/secure/secure_channel_create.cc index 5ce73a95d7..e73eee4353 100644 --- a/src/core/ext/transport/chttp2/client/secure/secure_channel_create.cc +++ b/src/core/ext/transport/chttp2/client/secure/secure_channel_create.cc @@ -27,7 +27,6 @@ #include "src/core/ext/filters/client_channel/client_channel.h" #include "src/core/ext/filters/client_channel/resolver_registry.h" -#include "src/core/ext/filters/client_channel/uri_parser.h" #include "src/core/ext/transport/chttp2/client/chttp2_connector.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/gprpp/memory.h" @@ -39,6 +38,7 @@ #include "src/core/lib/slice/slice_internal.h" #include "src/core/lib/surface/api_trace.h" #include "src/core/lib/surface/channel.h" +#include "src/core/lib/uri/uri_parser.h" static void client_channel_factory_ref( grpc_client_channel_factory* cc_factory) {} diff --git a/src/core/ext/transport/chttp2/server/chttp2_server.cc b/src/core/ext/transport/chttp2/server/chttp2_server.cc index 3f8a26ae32..33d2b22aa5 100644 --- a/src/core/ext/transport/chttp2/server/chttp2_server.cc +++ b/src/core/ext/transport/chttp2/server/chttp2_server.cc @@ -37,8 +37,10 @@ #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/channel/handshaker.h" #include "src/core/lib/channel/handshaker_registry.h" +#include "src/core/lib/gpr/host_port.h" #include "src/core/lib/iomgr/endpoint.h" #include "src/core/lib/iomgr/resolve_address.h" +#include "src/core/lib/iomgr/resource_quota.h" #include "src/core/lib/iomgr/tcp_server.h" #include "src/core/lib/slice/slice_internal.h" #include "src/core/lib/surface/api_trace.h" @@ -53,6 +55,8 @@ typedef struct { grpc_closure tcp_server_shutdown_complete; grpc_closure* server_destroy_listener_done; grpc_handshake_manager* pending_handshake_mgrs; + grpc_core::RefCountedPtr<grpc_core::channelz::ListenSocketNode> + channelz_listen_socket; } server_state; typedef struct { @@ -67,6 +71,7 @@ typedef struct { grpc_timer timer; grpc_closure on_timeout; grpc_closure on_receive_settings; + grpc_pollset_set* interested_parties; } server_connection_state; static void server_connection_state_unref( @@ -76,6 +81,9 @@ static void server_connection_state_unref( GRPC_CHTTP2_UNREF_TRANSPORT(connection_state->transport, "receive settings timeout"); } + grpc_pollset_set_del_pollset(connection_state->interested_parties, + connection_state->accepting_pollset); + grpc_pollset_set_destroy(connection_state->interested_parties); gpr_free(connection_state); } } @@ -108,9 +116,16 @@ static void on_handshake_done(void* arg, grpc_error* error) { server_connection_state* connection_state = static_cast<server_connection_state*>(args->user_data); gpr_mu_lock(&connection_state->svr_state->mu); + grpc_resource_user* resource_user = grpc_server_get_default_resource_user( + connection_state->svr_state->server); if (error != GRPC_ERROR_NONE || connection_state->svr_state->shutdown) { const char* error_str = grpc_error_string(error); gpr_log(GPR_DEBUG, "Handshaking failed: %s", error_str); + grpc_resource_user* resource_user = grpc_server_get_default_resource_user( + connection_state->svr_state->server); + if (resource_user != nullptr) { + grpc_resource_user_free(resource_user, GRPC_RESOURCE_QUOTA_CHANNEL_SIZE); + } if (error == GRPC_ERROR_NONE && args->endpoint != nullptr) { // We were shut down after handshaking completed successfully, so // destroy the endpoint here. @@ -129,11 +144,12 @@ static void on_handshake_done(void* arg, grpc_error* error) { // handshaker may have handed off the connection to some external // code, so we can just clean up here without creating a transport. if (args->endpoint != nullptr) { - grpc_transport* transport = - grpc_create_chttp2_transport(args->args, args->endpoint, false); + grpc_transport* transport = grpc_create_chttp2_transport( + args->args, args->endpoint, false, resource_user); grpc_server_setup_transport( connection_state->svr_state->server, transport, - connection_state->accepting_pollset, args->args); + connection_state->accepting_pollset, args->args, + grpc_chttp2_transport_get_socket_uuid(transport), resource_user); // Use notify_on_receive_settings callback to enforce the // handshake deadline. connection_state->transport = @@ -152,6 +168,11 @@ static void on_handshake_done(void* arg, grpc_error* error) { connection_state, grpc_schedule_on_exec_ctx); grpc_timer_init(&connection_state->timer, connection_state->deadline, &connection_state->on_timeout); + } else { + if (resource_user != nullptr) { + grpc_resource_user_free(resource_user, + GRPC_RESOURCE_QUOTA_CHANNEL_SIZE); + } } } grpc_handshake_manager_pending_list_remove( @@ -176,6 +197,20 @@ static void on_accept(void* arg, grpc_endpoint* tcp, gpr_free(acceptor); return; } + grpc_resource_user* resource_user = + grpc_server_get_default_resource_user(state->server); + if (resource_user != nullptr && + !grpc_resource_user_safe_alloc(resource_user, + GRPC_RESOURCE_QUOTA_CHANNEL_SIZE)) { + gpr_log( + GPR_ERROR, + "Memory quota exhausted, rejecting the connection, no handshaking."); + gpr_mu_unlock(&state->mu); + grpc_endpoint_shutdown(tcp, GRPC_ERROR_NONE); + grpc_endpoint_destroy(tcp); + gpr_free(acceptor); + return; + } grpc_handshake_manager* handshake_mgr = grpc_handshake_manager_create(); grpc_handshake_manager_pending_list_add(&state->pending_handshake_mgrs, handshake_mgr); @@ -189,7 +224,11 @@ static void on_accept(void* arg, grpc_endpoint* tcp, connection_state->accepting_pollset = accepting_pollset; connection_state->acceptor = acceptor; connection_state->handshake_mgr = handshake_mgr; + connection_state->interested_parties = grpc_pollset_set_create(); + grpc_pollset_set_add_pollset(connection_state->interested_parties, + connection_state->accepting_pollset); grpc_handshakers_add(HANDSHAKER_SERVER, state->args, + connection_state->interested_parties, connection_state->handshake_mgr); const grpc_arg* timeout_arg = grpc_channel_args_find(state->args, GRPC_ARG_SERVER_HANDSHAKE_TIMEOUT_MS); @@ -197,10 +236,10 @@ static void on_accept(void* arg, grpc_endpoint* tcp, grpc_core::ExecCtx::Get()->Now() + grpc_channel_arg_get_integer(timeout_arg, {120 * GPR_MS_PER_SEC, 1, INT_MAX}); - grpc_handshake_manager_do_handshake( - connection_state->handshake_mgr, nullptr /* interested_parties */, tcp, - state->args, connection_state->deadline, acceptor, on_handshake_done, - connection_state); + grpc_handshake_manager_do_handshake(connection_state->handshake_mgr, tcp, + state->args, connection_state->deadline, + acceptor, on_handshake_done, + connection_state); } /* Server callback: start listening on our ports */ @@ -223,6 +262,7 @@ static void tcp_server_shutdown_complete(void* arg, grpc_error* error) { GPR_ASSERT(state->shutdown); grpc_handshake_manager_pending_list_shutdown_all( state->pending_handshake_mgrs, GRPC_ERROR_REF(error)); + state->channelz_listen_socket.reset(); gpr_mu_unlock(&state->mu); // Flush queued work before destroying handshaker factory, since that // may do a synchronous unref. @@ -262,6 +302,8 @@ grpc_error* grpc_chttp2_server_add_port(grpc_server* server, const char* addr, server_state* state = nullptr; grpc_error** errors = nullptr; size_t naddrs = 0; + const grpc_arg* arg = nullptr; + intptr_t socket_uuid = 0; *port_num = -1; @@ -323,9 +365,17 @@ grpc_error* grpc_chttp2_server_add_port(grpc_server* server, const char* addr, } grpc_resolved_addresses_destroy(resolved); + arg = grpc_channel_args_find(args, GRPC_ARG_ENABLE_CHANNELZ); + if (grpc_channel_arg_get_bool(arg, GRPC_ENABLE_CHANNELZ_DEFAULT)) { + state->channelz_listen_socket = + grpc_core::MakeRefCounted<grpc_core::channelz::ListenSocketNode>( + grpc_core::UniquePtr<char>(gpr_strdup(addr))); + socket_uuid = state->channelz_listen_socket->uuid(); + } + /* Register with the server only upon success */ grpc_server_add_listener(server, state, server_start_listener, - server_destroy_listener); + server_destroy_listener, socket_uuid); goto done; /* Error path: cleanup and return */ diff --git a/src/core/ext/transport/chttp2/server/insecure/server_chttp2_posix.cc b/src/core/ext/transport/chttp2/server/insecure/server_chttp2_posix.cc index e4bd91d07b..b9024a87e2 100644 --- a/src/core/ext/transport/chttp2/server/insecure/server_chttp2_posix.cc +++ b/src/core/ext/transport/chttp2/server/insecure/server_chttp2_posix.cc @@ -61,7 +61,7 @@ void grpc_server_add_insecure_channel_from_fd(grpc_server* server, grpc_endpoint_add_to_pollset(server_endpoint, pollsets[i]); } - grpc_server_setup_transport(server, transport, nullptr, server_args); + grpc_server_setup_transport(server, transport, nullptr, server_args, 0); grpc_chttp2_transport_start_reading(transport, nullptr, nullptr); } diff --git a/src/core/ext/transport/chttp2/transport/chttp2_transport.cc b/src/core/ext/transport/chttp2/transport/chttp2_transport.cc index 8e07e3e4f9..da29ff1b37 100644 --- a/src/core/ext/transport/chttp2/transport/chttp2_transport.cc +++ b/src/core/ext/transport/chttp2/transport/chttp2_transport.cc @@ -54,6 +54,7 @@ #include "src/core/lib/transport/timeout_encoding.h" #include "src/core/lib/transport/transport.h" #include "src/core/lib/transport/transport_impl.h" +#include "src/core/lib/uri/uri_parser.h" #define DEFAULT_CONNECTION_WINDOW_TARGET (1024 * 1024) #define MAX_WINDOW 0x7fffffffu @@ -155,51 +156,55 @@ bool g_flow_control_enabled = true; * CONSTRUCTION/DESTRUCTION/REFCOUNTING */ -static void destruct_transport(grpc_chttp2_transport* t) { +grpc_chttp2_transport::~grpc_chttp2_transport() { gpr_log(GPR_INFO, "destruct transport %p", t); size_t i; - grpc_endpoint_destroy(t->ep); + if (channelz_socket != nullptr) { + channelz_socket.reset(); + } + + grpc_endpoint_destroy(ep); - grpc_slice_buffer_destroy_internal(&t->qbuf); + grpc_slice_buffer_destroy_internal(&qbuf); - grpc_slice_buffer_destroy_internal(&t->outbuf); - grpc_chttp2_hpack_compressor_destroy(&t->hpack_compressor); + grpc_slice_buffer_destroy_internal(&outbuf); + grpc_chttp2_hpack_compressor_destroy(&hpack_compressor); - grpc_core::ContextList::Execute(t->cl, nullptr, GRPC_ERROR_NONE); + grpc_core::ContextList::Execute(cl, nullptr, GRPC_ERROR_NONE); + grpc_slice_buffer_destroy_internal(&read_buffer); + grpc_chttp2_hpack_parser_destroy(&hpack_parser); + grpc_chttp2_goaway_parser_destroy(&goaway_parser); - grpc_slice_buffer_destroy_internal(&t->read_buffer); - grpc_chttp2_hpack_parser_destroy(&t->hpack_parser); - grpc_chttp2_goaway_parser_destroy(&t->goaway_parser); for (i = 0; i < STREAM_LIST_COUNT; i++) { - GPR_ASSERT(t->lists[i].head == nullptr); - GPR_ASSERT(t->lists[i].tail == nullptr); + GPR_ASSERT(lists[i].head == nullptr); + GPR_ASSERT(lists[i].tail == nullptr); } - GRPC_ERROR_UNREF(t->goaway_error); + GRPC_ERROR_UNREF(goaway_error); - GPR_ASSERT(grpc_chttp2_stream_map_size(&t->stream_map) == 0); + GPR_ASSERT(grpc_chttp2_stream_map_size(&stream_map) == 0); - grpc_chttp2_stream_map_destroy(&t->stream_map); - grpc_connectivity_state_destroy(&t->channel_callback.state_tracker); + grpc_chttp2_stream_map_destroy(&stream_map); + grpc_connectivity_state_destroy(&channel_callback.state_tracker); - GRPC_COMBINER_UNREF(t->combiner, "chttp2_transport"); + GRPC_COMBINER_UNREF(combiner, "chttp2_transport"); - cancel_pings(t, GRPC_ERROR_CREATE_FROM_STATIC_STRING("Transport destroyed")); + cancel_pings(this, + GRPC_ERROR_CREATE_FROM_STATIC_STRING("Transport destroyed")); - while (t->write_cb_pool) { - grpc_chttp2_write_cb* next = t->write_cb_pool->next; - gpr_free(t->write_cb_pool); - t->write_cb_pool = next; + while (write_cb_pool) { + grpc_chttp2_write_cb* next = write_cb_pool->next; + gpr_free(write_cb_pool); + write_cb_pool = next; } - t->flow_control.Destroy(); + flow_control.Destroy(); - GRPC_ERROR_UNREF(t->closed_with_error); - gpr_free(t->ping_acks); - gpr_free(t->peer_string); - gpr_free(t); + GRPC_ERROR_UNREF(closed_with_error); + gpr_free(ping_acks); + gpr_free(peer_string); } #ifndef NDEBUG @@ -211,7 +216,8 @@ void grpc_chttp2_unref_transport(grpc_chttp2_transport* t, const char* reason, t, val, val - 1, reason, file, line); } if (!gpr_unref(&t->refs)) return; - destruct_transport(t); + t->~grpc_chttp2_transport(); + gpr_free(t); } void grpc_chttp2_ref_transport(grpc_chttp2_transport* t, const char* reason, @@ -226,7 +232,8 @@ void grpc_chttp2_ref_transport(grpc_chttp2_transport* t, const char* reason, #else void grpc_chttp2_unref_transport(grpc_chttp2_transport* t) { if (!gpr_unref(&t->refs)) return; - destruct_transport(t); + t->~grpc_chttp2_transport(); + gpr_free(t); } void grpc_chttp2_ref_transport(grpc_chttp2_transport* t) { gpr_ref(&t->refs); } @@ -234,35 +241,178 @@ void grpc_chttp2_ref_transport(grpc_chttp2_transport* t) { gpr_ref(&t->refs); } static const grpc_transport_vtable* get_vtable(void); -static void init_transport(grpc_chttp2_transport* t, - const grpc_channel_args* channel_args, - grpc_endpoint* ep, bool is_client) { +/* Returns whether bdp is enabled */ +static bool read_channel_args(grpc_chttp2_transport* t, + const grpc_channel_args* channel_args, + bool is_client) { + bool enable_bdp = true; + bool channelz_enabled = GRPC_ENABLE_CHANNELZ_DEFAULT; size_t i; int j; - GPR_ASSERT(strlen(GRPC_CHTTP2_CLIENT_CONNECT_STRING) == - GRPC_CHTTP2_CLIENT_CONNECT_STRLEN); - - t->base.vtable = get_vtable(); - t->ep = ep; - /* one ref is for destroy */ - gpr_ref_init(&t->refs, 1); - t->combiner = grpc_combiner_create(); - t->peer_string = grpc_endpoint_get_peer(ep); - t->endpoint_reading = 1; - t->next_stream_id = is_client ? 1 : 2; - t->is_client = is_client; - t->deframe_state = is_client ? GRPC_DTS_FH_0 : GRPC_DTS_CLIENT_PREFIX_0; - t->is_first_frame = true; - grpc_connectivity_state_init( - &t->channel_callback.state_tracker, GRPC_CHANNEL_READY, - is_client ? "client_transport" : "server_transport"); - - grpc_slice_buffer_init(&t->qbuf); - - grpc_slice_buffer_init(&t->outbuf); - grpc_chttp2_hpack_compressor_init(&t->hpack_compressor); + for (i = 0; i < channel_args->num_args; i++) { + if (0 == strcmp(channel_args->args[i].key, + GRPC_ARG_HTTP2_INITIAL_SEQUENCE_NUMBER)) { + const grpc_integer_options options = {-1, 0, INT_MAX}; + const int value = + grpc_channel_arg_get_integer(&channel_args->args[i], options); + if (value >= 0) { + if ((t->next_stream_id & 1) != (value & 1)) { + gpr_log(GPR_ERROR, "%s: low bit must be %d on %s", + GRPC_ARG_HTTP2_INITIAL_SEQUENCE_NUMBER, t->next_stream_id & 1, + is_client ? "client" : "server"); + } else { + t->next_stream_id = static_cast<uint32_t>(value); + } + } + } else if (0 == strcmp(channel_args->args[i].key, + GRPC_ARG_HTTP2_HPACK_TABLE_SIZE_ENCODER)) { + const grpc_integer_options options = {-1, 0, INT_MAX}; + const int value = + grpc_channel_arg_get_integer(&channel_args->args[i], options); + if (value >= 0) { + grpc_chttp2_hpack_compressor_set_max_usable_size( + &t->hpack_compressor, static_cast<uint32_t>(value)); + } + } else if (0 == strcmp(channel_args->args[i].key, + GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA)) { + t->ping_policy.max_pings_without_data = grpc_channel_arg_get_integer( + &channel_args->args[i], + {g_default_max_pings_without_data, 0, INT_MAX}); + } else if (0 == strcmp(channel_args->args[i].key, + GRPC_ARG_HTTP2_MAX_PING_STRIKES)) { + t->ping_policy.max_ping_strikes = grpc_channel_arg_get_integer( + &channel_args->args[i], {g_default_max_ping_strikes, 0, INT_MAX}); + } else if (0 == + strcmp(channel_args->args[i].key, + GRPC_ARG_HTTP2_MIN_SENT_PING_INTERVAL_WITHOUT_DATA_MS)) { + t->ping_policy.min_sent_ping_interval_without_data = + grpc_channel_arg_get_integer( + &channel_args->args[i], + grpc_integer_options{ + g_default_min_sent_ping_interval_without_data_ms, 0, + INT_MAX}); + } else if (0 == + strcmp(channel_args->args[i].key, + GRPC_ARG_HTTP2_MIN_RECV_PING_INTERVAL_WITHOUT_DATA_MS)) { + t->ping_policy.min_recv_ping_interval_without_data = + grpc_channel_arg_get_integer( + &channel_args->args[i], + grpc_integer_options{ + g_default_min_recv_ping_interval_without_data_ms, 0, + INT_MAX}); + } else if (0 == strcmp(channel_args->args[i].key, + GRPC_ARG_HTTP2_WRITE_BUFFER_SIZE)) { + t->write_buffer_size = static_cast<uint32_t>(grpc_channel_arg_get_integer( + &channel_args->args[i], {0, 0, MAX_WRITE_BUFFER_SIZE})); + } else if (0 == + strcmp(channel_args->args[i].key, GRPC_ARG_HTTP2_BDP_PROBE)) { + enable_bdp = grpc_channel_arg_get_bool(&channel_args->args[i], true); + } else if (0 == + strcmp(channel_args->args[i].key, GRPC_ARG_KEEPALIVE_TIME_MS)) { + const int value = grpc_channel_arg_get_integer( + &channel_args->args[i], + grpc_integer_options{t->is_client + ? g_default_client_keepalive_time_ms + : g_default_server_keepalive_time_ms, + 1, INT_MAX}); + t->keepalive_time = value == INT_MAX ? GRPC_MILLIS_INF_FUTURE : value; + } else if (0 == strcmp(channel_args->args[i].key, + GRPC_ARG_KEEPALIVE_TIMEOUT_MS)) { + const int value = grpc_channel_arg_get_integer( + &channel_args->args[i], + grpc_integer_options{t->is_client + ? g_default_client_keepalive_timeout_ms + : g_default_server_keepalive_timeout_ms, + 0, INT_MAX}); + t->keepalive_timeout = value == INT_MAX ? GRPC_MILLIS_INF_FUTURE : value; + } else if (0 == strcmp(channel_args->args[i].key, + GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS)) { + t->keepalive_permit_without_calls = static_cast<uint32_t>( + grpc_channel_arg_get_integer(&channel_args->args[i], {0, 0, 1})); + } else if (0 == strcmp(channel_args->args[i].key, + GRPC_ARG_OPTIMIZATION_TARGET)) { + if (channel_args->args[i].type != GRPC_ARG_STRING) { + gpr_log(GPR_ERROR, "%s should be a string", + GRPC_ARG_OPTIMIZATION_TARGET); + } else if (0 == strcmp(channel_args->args[i].value.string, "blend")) { + t->opt_target = GRPC_CHTTP2_OPTIMIZE_FOR_LATENCY; + } else if (0 == strcmp(channel_args->args[i].value.string, "latency")) { + t->opt_target = GRPC_CHTTP2_OPTIMIZE_FOR_LATENCY; + } else if (0 == + strcmp(channel_args->args[i].value.string, "throughput")) { + t->opt_target = GRPC_CHTTP2_OPTIMIZE_FOR_THROUGHPUT; + } else { + gpr_log(GPR_ERROR, "%s value '%s' unknown, assuming 'blend'", + GRPC_ARG_OPTIMIZATION_TARGET, + channel_args->args[i].value.string); + } + } else if (0 == + strcmp(channel_args->args[i].key, GRPC_ARG_ENABLE_CHANNELZ)) { + channelz_enabled = grpc_channel_arg_get_bool( + &channel_args->args[i], GRPC_ENABLE_CHANNELZ_DEFAULT); + } else { + static const struct { + const char* channel_arg_name; + grpc_chttp2_setting_id setting_id; + grpc_integer_options integer_options; + bool availability[2] /* server, client */; + } settings_map[] = {{GRPC_ARG_MAX_CONCURRENT_STREAMS, + GRPC_CHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS, + {-1, 0, INT32_MAX}, + {true, false}}, + {GRPC_ARG_HTTP2_HPACK_TABLE_SIZE_DECODER, + GRPC_CHTTP2_SETTINGS_HEADER_TABLE_SIZE, + {-1, 0, INT32_MAX}, + {true, true}}, + {GRPC_ARG_MAX_METADATA_SIZE, + GRPC_CHTTP2_SETTINGS_MAX_HEADER_LIST_SIZE, + {-1, 0, INT32_MAX}, + {true, true}}, + {GRPC_ARG_HTTP2_MAX_FRAME_SIZE, + GRPC_CHTTP2_SETTINGS_MAX_FRAME_SIZE, + {-1, 16384, 16777215}, + {true, true}}, + {GRPC_ARG_HTTP2_ENABLE_TRUE_BINARY, + GRPC_CHTTP2_SETTINGS_GRPC_ALLOW_TRUE_BINARY_METADATA, + {1, 0, 1}, + {true, true}}, + {GRPC_ARG_HTTP2_STREAM_LOOKAHEAD_BYTES, + GRPC_CHTTP2_SETTINGS_INITIAL_WINDOW_SIZE, + {-1, 5, INT32_MAX}, + {true, true}}}; + for (j = 0; j < static_cast<int> GPR_ARRAY_SIZE(settings_map); j++) { + if (0 == strcmp(channel_args->args[i].key, + settings_map[j].channel_arg_name)) { + if (!settings_map[j].availability[is_client]) { + gpr_log(GPR_DEBUG, "%s is not available on %s", + settings_map[j].channel_arg_name, + is_client ? "clients" : "servers"); + } else { + int value = grpc_channel_arg_get_integer( + &channel_args->args[i], settings_map[j].integer_options); + if (value >= 0) { + queue_setting_update(t, settings_map[j].setting_id, + static_cast<uint32_t>(value)); + } + } + break; + } + } + } + } + if (channelz_enabled) { + // TODO(ncteisen): add an API to endpoint to query for local addr, and pass + // it in here, so SocketNode knows its own address. + t->channelz_socket = + grpc_core::MakeRefCounted<grpc_core::channelz::SocketNode>( + grpc_core::UniquePtr<char>(), + grpc_core::UniquePtr<char>(gpr_strdup(t->peer_string))); + } + return enable_bdp; +} +static void init_transport_closures(grpc_chttp2_transport* t) { GRPC_CLOSURE_INIT(&t->read_action_locked, read_action_locked, t, grpc_combiner_scheduler(t->combiner)); GRPC_CLOSURE_INIT(&t->benign_reclaimer_locked, benign_reclaimer_locked, t, @@ -290,56 +440,9 @@ static void init_transport(grpc_chttp2_transport* t, GRPC_CLOSURE_INIT(&t->keepalive_watchdog_fired_locked, keepalive_watchdog_fired_locked, t, grpc_combiner_scheduler(t->combiner)); +} - t->goaway_error = GRPC_ERROR_NONE; - grpc_chttp2_goaway_parser_init(&t->goaway_parser); - grpc_chttp2_hpack_parser_init(&t->hpack_parser); - - grpc_slice_buffer_init(&t->read_buffer); - - /* 8 is a random stab in the dark as to a good initial size: it's small enough - that it shouldn't waste memory for infrequently used connections, yet - large enough that the exponential growth should happen nicely when it's - needed. - TODO(ctiller): tune this */ - grpc_chttp2_stream_map_init(&t->stream_map, 8); - - /* copy in initial settings to all setting sets */ - for (i = 0; i < GRPC_CHTTP2_NUM_SETTINGS; i++) { - for (j = 0; j < GRPC_NUM_SETTING_SETS; j++) { - t->settings[j][i] = grpc_chttp2_settings_parameters[i].default_value; - } - } - t->dirtied_local_settings = 1; - /* Hack: it's common for implementations to assume 65536 bytes initial send - window -- this should by rights be 0 */ - t->force_send_settings = 1 << GRPC_CHTTP2_SETTINGS_INITIAL_WINDOW_SIZE; - t->sent_local_settings = 0; - t->write_buffer_size = grpc_core::chttp2::kDefaultWindow; - - if (is_client) { - grpc_slice_buffer_add(&t->outbuf, grpc_slice_from_copied_string( - GRPC_CHTTP2_CLIENT_CONNECT_STRING)); - } - - /* configure http2 the way we like it */ - if (is_client) { - queue_setting_update(t, GRPC_CHTTP2_SETTINGS_ENABLE_PUSH, 0); - queue_setting_update(t, GRPC_CHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS, 0); - } - queue_setting_update(t, GRPC_CHTTP2_SETTINGS_MAX_HEADER_LIST_SIZE, - DEFAULT_MAX_HEADER_LIST_SIZE); - queue_setting_update(t, GRPC_CHTTP2_SETTINGS_GRPC_ALLOW_TRUE_BINARY_METADATA, - 1); - - t->ping_policy.max_pings_without_data = g_default_max_pings_without_data; - t->ping_policy.min_sent_ping_interval_without_data = - g_default_min_sent_ping_interval_without_data_ms; - t->ping_policy.max_ping_strikes = g_default_max_ping_strikes; - t->ping_policy.min_recv_ping_interval_without_data = - g_default_min_recv_ping_interval_without_data_ms; - - /* Keepalive setting */ +static void init_transport_keepalive_settings(grpc_chttp2_transport* t) { if (t->is_client) { t->keepalive_time = g_default_client_keepalive_time_ms == INT_MAX ? GRPC_MILLIS_INF_FUTURE @@ -359,205 +462,122 @@ static void init_transport(grpc_chttp2_transport* t, t->keepalive_permit_without_calls = g_default_server_keepalive_permit_without_calls; } +} - t->opt_target = GRPC_CHTTP2_OPTIMIZE_FOR_LATENCY; +static void configure_transport_ping_policy(grpc_chttp2_transport* t) { + t->ping_policy.max_pings_without_data = g_default_max_pings_without_data; + t->ping_policy.min_sent_ping_interval_without_data = + g_default_min_sent_ping_interval_without_data_ms; + t->ping_policy.max_ping_strikes = g_default_max_ping_strikes; + t->ping_policy.min_recv_ping_interval_without_data = + g_default_min_recv_ping_interval_without_data_ms; +} - bool enable_bdp = true; +static void init_keepalive_pings_if_enabled(grpc_chttp2_transport* t) { + if (t->keepalive_time != GRPC_MILLIS_INF_FUTURE) { + t->keepalive_state = GRPC_CHTTP2_KEEPALIVE_STATE_WAITING; + GRPC_CHTTP2_REF_TRANSPORT(t, "init keepalive ping"); + grpc_timer_init(&t->keepalive_ping_timer, + grpc_core::ExecCtx::Get()->Now() + t->keepalive_time, + &t->init_keepalive_ping_locked); + } else { + /* Use GRPC_CHTTP2_KEEPALIVE_STATE_DISABLED to indicate there are no + inflight keeaplive timers */ + t->keepalive_state = GRPC_CHTTP2_KEEPALIVE_STATE_DISABLED; + } +} - if (channel_args) { - for (i = 0; i < channel_args->num_args; i++) { - if (0 == strcmp(channel_args->args[i].key, - GRPC_ARG_HTTP2_INITIAL_SEQUENCE_NUMBER)) { - const grpc_integer_options options = {-1, 0, INT_MAX}; - const int value = - grpc_channel_arg_get_integer(&channel_args->args[i], options); - if (value >= 0) { - if ((t->next_stream_id & 1) != (value & 1)) { - gpr_log(GPR_ERROR, "%s: low bit must be %d on %s", - GRPC_ARG_HTTP2_INITIAL_SEQUENCE_NUMBER, - t->next_stream_id & 1, is_client ? "client" : "server"); - } else { - t->next_stream_id = static_cast<uint32_t>(value); - } - } - } else if (0 == strcmp(channel_args->args[i].key, - GRPC_ARG_HTTP2_HPACK_TABLE_SIZE_ENCODER)) { - const grpc_integer_options options = {-1, 0, INT_MAX}; - const int value = - grpc_channel_arg_get_integer(&channel_args->args[i], options); - if (value >= 0) { - grpc_chttp2_hpack_compressor_set_max_usable_size( - &t->hpack_compressor, static_cast<uint32_t>(value)); - } - } else if (0 == strcmp(channel_args->args[i].key, - GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA)) { - t->ping_policy.max_pings_without_data = grpc_channel_arg_get_integer( - &channel_args->args[i], - {g_default_max_pings_without_data, 0, INT_MAX}); - } else if (0 == strcmp(channel_args->args[i].key, - GRPC_ARG_HTTP2_MAX_PING_STRIKES)) { - t->ping_policy.max_ping_strikes = grpc_channel_arg_get_integer( - &channel_args->args[i], {g_default_max_ping_strikes, 0, INT_MAX}); - } else if (0 == - strcmp( - channel_args->args[i].key, - GRPC_ARG_HTTP2_MIN_SENT_PING_INTERVAL_WITHOUT_DATA_MS)) { - t->ping_policy.min_sent_ping_interval_without_data = - grpc_channel_arg_get_integer( - &channel_args->args[i], - grpc_integer_options{ - g_default_min_sent_ping_interval_without_data_ms, 0, - INT_MAX}); - } else if (0 == - strcmp( - channel_args->args[i].key, - GRPC_ARG_HTTP2_MIN_RECV_PING_INTERVAL_WITHOUT_DATA_MS)) { - t->ping_policy.min_recv_ping_interval_without_data = - grpc_channel_arg_get_integer( - &channel_args->args[i], - grpc_integer_options{ - g_default_min_recv_ping_interval_without_data_ms, 0, - INT_MAX}); - } else if (0 == strcmp(channel_args->args[i].key, - GRPC_ARG_HTTP2_WRITE_BUFFER_SIZE)) { - t->write_buffer_size = - static_cast<uint32_t>(grpc_channel_arg_get_integer( - &channel_args->args[i], {0, 0, MAX_WRITE_BUFFER_SIZE})); - } else if (0 == - strcmp(channel_args->args[i].key, GRPC_ARG_HTTP2_BDP_PROBE)) { - enable_bdp = grpc_channel_arg_get_bool(&channel_args->args[i], true); - } else if (0 == strcmp(channel_args->args[i].key, - GRPC_ARG_KEEPALIVE_TIME_MS)) { - const int value = grpc_channel_arg_get_integer( - &channel_args->args[i], - grpc_integer_options{t->is_client - ? g_default_client_keepalive_time_ms - : g_default_server_keepalive_time_ms, - 1, INT_MAX}); - t->keepalive_time = value == INT_MAX ? GRPC_MILLIS_INF_FUTURE : value; - } else if (0 == strcmp(channel_args->args[i].key, - GRPC_ARG_KEEPALIVE_TIMEOUT_MS)) { - const int value = grpc_channel_arg_get_integer( - &channel_args->args[i], - grpc_integer_options{t->is_client - ? g_default_client_keepalive_timeout_ms - : g_default_server_keepalive_timeout_ms, - 0, INT_MAX}); - t->keepalive_timeout = - value == INT_MAX ? GRPC_MILLIS_INF_FUTURE : value; - } else if (0 == strcmp(channel_args->args[i].key, - GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS)) { - t->keepalive_permit_without_calls = static_cast<uint32_t>( - grpc_channel_arg_get_integer(&channel_args->args[i], {0, 0, 1})); - } else if (0 == strcmp(channel_args->args[i].key, - GRPC_ARG_OPTIMIZATION_TARGET)) { - if (channel_args->args[i].type != GRPC_ARG_STRING) { - gpr_log(GPR_ERROR, "%s should be a string", - GRPC_ARG_OPTIMIZATION_TARGET); - } else if (0 == strcmp(channel_args->args[i].value.string, "blend")) { - t->opt_target = GRPC_CHTTP2_OPTIMIZE_FOR_LATENCY; - } else if (0 == strcmp(channel_args->args[i].value.string, "latency")) { - t->opt_target = GRPC_CHTTP2_OPTIMIZE_FOR_LATENCY; - } else if (0 == - strcmp(channel_args->args[i].value.string, "throughput")) { - t->opt_target = GRPC_CHTTP2_OPTIMIZE_FOR_THROUGHPUT; - } else { - gpr_log(GPR_ERROR, "%s value '%s' unknown, assuming 'blend'", - GRPC_ARG_OPTIMIZATION_TARGET, - channel_args->args[i].value.string); - } - } else { - static const struct { - const char* channel_arg_name; - grpc_chttp2_setting_id setting_id; - grpc_integer_options integer_options; - bool availability[2] /* server, client */; - } settings_map[] = { - {GRPC_ARG_MAX_CONCURRENT_STREAMS, - GRPC_CHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS, - {-1, 0, INT32_MAX}, - {true, false}}, - {GRPC_ARG_HTTP2_HPACK_TABLE_SIZE_DECODER, - GRPC_CHTTP2_SETTINGS_HEADER_TABLE_SIZE, - {-1, 0, INT32_MAX}, - {true, true}}, - {GRPC_ARG_MAX_METADATA_SIZE, - GRPC_CHTTP2_SETTINGS_MAX_HEADER_LIST_SIZE, - {-1, 0, INT32_MAX}, - {true, true}}, - {GRPC_ARG_HTTP2_MAX_FRAME_SIZE, - GRPC_CHTTP2_SETTINGS_MAX_FRAME_SIZE, - {-1, 16384, 16777215}, - {true, true}}, - {GRPC_ARG_HTTP2_ENABLE_TRUE_BINARY, - GRPC_CHTTP2_SETTINGS_GRPC_ALLOW_TRUE_BINARY_METADATA, - {1, 0, 1}, - {true, true}}, - {GRPC_ARG_HTTP2_STREAM_LOOKAHEAD_BYTES, - GRPC_CHTTP2_SETTINGS_INITIAL_WINDOW_SIZE, - {-1, 5, INT32_MAX}, - {true, true}}}; - for (j = 0; j < static_cast<int> GPR_ARRAY_SIZE(settings_map); j++) { - if (0 == strcmp(channel_args->args[i].key, - settings_map[j].channel_arg_name)) { - if (!settings_map[j].availability[is_client]) { - gpr_log(GPR_DEBUG, "%s is not available on %s", - settings_map[j].channel_arg_name, - is_client ? "clients" : "servers"); - } else { - int value = grpc_channel_arg_get_integer( - &channel_args->args[i], settings_map[j].integer_options); - if (value >= 0) { - queue_setting_update(t, settings_map[j].setting_id, - static_cast<uint32_t>(value)); - } - } - break; - } - } - } +grpc_chttp2_transport::grpc_chttp2_transport( + const grpc_channel_args* channel_args, grpc_endpoint* ep, bool is_client, + grpc_resource_user* resource_user) + : ep(ep), + peer_string(grpc_endpoint_get_peer(ep)), + resource_user(resource_user), + combiner(grpc_combiner_create()), + is_client(is_client), + next_stream_id(is_client ? 1 : 2), + deframe_state(is_client ? GRPC_DTS_FH_0 : GRPC_DTS_CLIENT_PREFIX_0) { + GPR_ASSERT(strlen(GRPC_CHTTP2_CLIENT_CONNECT_STRING) == + GRPC_CHTTP2_CLIENT_CONNECT_STRLEN); + base.vtable = get_vtable(); + /* one ref is for destroy */ + gpr_ref_init(&refs, 1); + /* 8 is a random stab in the dark as to a good initial size: it's small enough + that it shouldn't waste memory for infrequently used connections, yet + large enough that the exponential growth should happen nicely when it's + needed. + TODO(ctiller): tune this */ + grpc_chttp2_stream_map_init(&stream_map, 8); + + grpc_slice_buffer_init(&read_buffer); + grpc_connectivity_state_init( + &channel_callback.state_tracker, GRPC_CHANNEL_READY, + is_client ? "client_transport" : "server_transport"); + grpc_slice_buffer_init(&outbuf); + if (is_client) { + grpc_slice_buffer_add(&outbuf, grpc_slice_from_copied_string( + GRPC_CHTTP2_CLIENT_CONNECT_STRING)); + } + grpc_chttp2_hpack_compressor_init(&hpack_compressor); + grpc_slice_buffer_init(&qbuf); + /* copy in initial settings to all setting sets */ + size_t i; + int j; + for (i = 0; i < GRPC_CHTTP2_NUM_SETTINGS; i++) { + for (j = 0; j < GRPC_NUM_SETTING_SETS; j++) { + settings[j][i] = grpc_chttp2_settings_parameters[i].default_value; } } + grpc_chttp2_hpack_parser_init(&hpack_parser); + grpc_chttp2_goaway_parser_init(&goaway_parser); + + init_transport_closures(this); + + /* configure http2 the way we like it */ + if (is_client) { + queue_setting_update(this, GRPC_CHTTP2_SETTINGS_ENABLE_PUSH, 0); + queue_setting_update(this, GRPC_CHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS, 0); + } + queue_setting_update(this, GRPC_CHTTP2_SETTINGS_MAX_HEADER_LIST_SIZE, + DEFAULT_MAX_HEADER_LIST_SIZE); + queue_setting_update(this, + GRPC_CHTTP2_SETTINGS_GRPC_ALLOW_TRUE_BINARY_METADATA, 1); + + configure_transport_ping_policy(this); + init_transport_keepalive_settings(this); + + bool enable_bdp = true; + if (channel_args) { + enable_bdp = read_channel_args(this, channel_args, is_client); + } if (g_flow_control_enabled) { - t->flow_control.Init<grpc_core::chttp2::TransportFlowControl>(t, - enable_bdp); + flow_control.Init<grpc_core::chttp2::TransportFlowControl>(this, + enable_bdp); } else { - t->flow_control.Init<grpc_core::chttp2::TransportFlowControlDisabled>(t); + flow_control.Init<grpc_core::chttp2::TransportFlowControlDisabled>(this); enable_bdp = false; } /* No pings allowed before receiving a header or data frame. */ - t->ping_state.pings_before_data_required = 0; - t->ping_state.is_delayed_ping_timer_set = false; - t->ping_state.last_ping_sent_time = GRPC_MILLIS_INF_PAST; + ping_state.pings_before_data_required = 0; + ping_state.is_delayed_ping_timer_set = false; + ping_state.last_ping_sent_time = GRPC_MILLIS_INF_PAST; - t->ping_recv_state.last_ping_recv_time = GRPC_MILLIS_INF_PAST; - t->ping_recv_state.ping_strikes = 0; + ping_recv_state.last_ping_recv_time = GRPC_MILLIS_INF_PAST; + ping_recv_state.ping_strikes = 0; - /* Start keepalive pings */ - if (t->keepalive_time != GRPC_MILLIS_INF_FUTURE) { - t->keepalive_state = GRPC_CHTTP2_KEEPALIVE_STATE_WAITING; - GRPC_CHTTP2_REF_TRANSPORT(t, "init keepalive ping"); - grpc_timer_init(&t->keepalive_ping_timer, - grpc_core::ExecCtx::Get()->Now() + t->keepalive_time, - &t->init_keepalive_ping_locked); - } else { - /* Use GRPC_CHTTP2_KEEPALIVE_STATE_DISABLED to indicate there are no - inflight keeaplive timers */ - t->keepalive_state = GRPC_CHTTP2_KEEPALIVE_STATE_DISABLED; - } + init_keepalive_pings_if_enabled(this); if (enable_bdp) { - GRPC_CHTTP2_REF_TRANSPORT(t, "bdp_ping"); - schedule_bdp_ping_locked(t); - - grpc_chttp2_act_on_flowctl_action(t->flow_control->PeriodicUpdate(), t, + GRPC_CHTTP2_REF_TRANSPORT(this, "bdp_ping"); + schedule_bdp_ping_locked(this); + grpc_chttp2_act_on_flowctl_action(flow_control->PeriodicUpdate(), this, nullptr); } - grpc_chttp2_initiate_write(t, GRPC_CHTTP2_INITIATE_WRITE_INITIAL_WRITE); - post_benign_reclaimer(t); + grpc_chttp2_initiate_write(this, GRPC_CHTTP2_INITIATE_WRITE_INITIAL_WRITE); + post_benign_reclaimer(this); } static void destroy_transport_locked(void* tp, grpc_error* error) { @@ -567,6 +587,7 @@ static void destroy_transport_locked(void* tp, grpc_error* error) { t, grpc_error_set_int( GRPC_ERROR_CREATE_FROM_STATIC_STRING("Transport destroyed"), GRPC_ERROR_INT_OCCURRED_DURING_WRITE, t->write_state)); + // Must be the last line. GRPC_CHTTP2_UNREF_TRANSPORT(t, "destroy"); } @@ -651,103 +672,108 @@ void grpc_chttp2_stream_unref(grpc_chttp2_stream* s) { } #endif -static int init_stream(grpc_transport* gt, grpc_stream* gs, - grpc_stream_refcount* refcount, const void* server_data, - gpr_arena* arena) { - GPR_TIMER_SCOPE("init_stream", 0); - grpc_chttp2_transport* t = reinterpret_cast<grpc_chttp2_transport*>(gt); - grpc_chttp2_stream* s = reinterpret_cast<grpc_chttp2_stream*>(gs); - - s->t = t; - s->refcount = refcount; +grpc_chttp2_stream::grpc_chttp2_stream(grpc_chttp2_transport* t, + grpc_stream_refcount* refcount, + const void* server_data, + gpr_arena* arena) + : t(t), refcount(refcount), metadata_buffer{{arena}, {arena}} { /* We reserve one 'active stream' that's dropped when the stream is read-closed. The others are for Chttp2IncomingByteStreams that are actively reading */ - GRPC_CHTTP2_STREAM_REF(s, "chttp2"); - - grpc_chttp2_incoming_metadata_buffer_init(&s->metadata_buffer[0], arena); - grpc_chttp2_incoming_metadata_buffer_init(&s->metadata_buffer[1], arena); - grpc_chttp2_data_parser_init(&s->data_parser); - grpc_slice_buffer_init(&s->flow_controlled_buffer); - s->deadline = GRPC_MILLIS_INF_FUTURE; - GRPC_CLOSURE_INIT(&s->complete_fetch_locked, complete_fetch_locked, s, - grpc_schedule_on_exec_ctx); - grpc_slice_buffer_init(&s->unprocessed_incoming_frames_buffer); - s->unprocessed_incoming_frames_buffer_cached_length = 0; - grpc_slice_buffer_init(&s->frame_storage); - grpc_slice_buffer_init(&s->compressed_data_buffer); - grpc_slice_buffer_init(&s->decompressed_data_buffer); - s->pending_byte_stream = false; - s->decompressed_header_bytes = 0; - GRPC_CLOSURE_INIT(&s->reset_byte_stream, reset_byte_stream, s, - grpc_combiner_scheduler(t->combiner)); - + GRPC_CHTTP2_STREAM_REF(this, "chttp2"); GRPC_CHTTP2_REF_TRANSPORT(t, "stream"); if (server_data) { - s->id = static_cast<uint32_t>((uintptr_t)server_data); - *t->accepting_stream = s; - grpc_chttp2_stream_map_add(&t->stream_map, s->id, s); + id = static_cast<uint32_t>((uintptr_t)server_data); + *t->accepting_stream = this; + grpc_chttp2_stream_map_add(&t->stream_map, id, this); post_destructive_reclaimer(t); } - if (t->flow_control->flow_control_enabled()) { - s->flow_control.Init<grpc_core::chttp2::StreamFlowControl>( + flow_control.Init<grpc_core::chttp2::StreamFlowControl>( static_cast<grpc_core::chttp2::TransportFlowControl*>( t->flow_control.get()), - s); + this); } else { - s->flow_control.Init<grpc_core::chttp2::StreamFlowControlDisabled>(); + flow_control.Init<grpc_core::chttp2::StreamFlowControlDisabled>(); } - return 0; + grpc_slice_buffer_init(&frame_storage); + grpc_slice_buffer_init(&unprocessed_incoming_frames_buffer); + grpc_slice_buffer_init(&flow_controlled_buffer); + grpc_slice_buffer_init(&compressed_data_buffer); + grpc_slice_buffer_init(&decompressed_data_buffer); + + GRPC_CLOSURE_INIT(&complete_fetch_locked, ::complete_fetch_locked, this, + grpc_schedule_on_exec_ctx); + GRPC_CLOSURE_INIT(&reset_byte_stream, ::reset_byte_stream, this, + grpc_combiner_scheduler(t->combiner)); } -static void destroy_stream_locked(void* sp, grpc_error* error) { - GPR_TIMER_SCOPE("destroy_stream", 0); - grpc_chttp2_stream* s = static_cast<grpc_chttp2_stream*>(sp); - grpc_chttp2_transport* t = s->t; +grpc_chttp2_stream::~grpc_chttp2_stream() { + if (t->channelz_socket != nullptr) { + if ((t->is_client && eos_received) || (!t->is_client && eos_sent)) { + t->channelz_socket->RecordStreamSucceeded(); + } else { + t->channelz_socket->RecordStreamFailed(); + } + } - GPR_ASSERT((s->write_closed && s->read_closed) || s->id == 0); - if (s->id != 0) { - GPR_ASSERT(grpc_chttp2_stream_map_find(&t->stream_map, s->id) == nullptr); + GPR_ASSERT((write_closed && read_closed) || id == 0); + if (id != 0) { + GPR_ASSERT(grpc_chttp2_stream_map_find(&t->stream_map, id) == nullptr); } - grpc_slice_buffer_destroy_internal(&s->unprocessed_incoming_frames_buffer); - grpc_slice_buffer_destroy_internal(&s->frame_storage); - grpc_slice_buffer_destroy_internal(&s->compressed_data_buffer); - grpc_slice_buffer_destroy_internal(&s->decompressed_data_buffer); + grpc_slice_buffer_destroy_internal(&unprocessed_incoming_frames_buffer); + grpc_slice_buffer_destroy_internal(&frame_storage); + grpc_slice_buffer_destroy_internal(&compressed_data_buffer); + grpc_slice_buffer_destroy_internal(&decompressed_data_buffer); - grpc_chttp2_list_remove_stalled_by_transport(t, s); - grpc_chttp2_list_remove_stalled_by_stream(t, s); + grpc_chttp2_list_remove_stalled_by_transport(t, this); + grpc_chttp2_list_remove_stalled_by_stream(t, this); for (int i = 0; i < STREAM_LIST_COUNT; i++) { - if (GPR_UNLIKELY(s->included[i])) { + if (GPR_UNLIKELY(included[i])) { gpr_log(GPR_ERROR, "%s stream %d still included in list %d", - t->is_client ? "client" : "server", s->id, i); + t->is_client ? "client" : "server", id, i); abort(); } } - GPR_ASSERT(s->send_initial_metadata_finished == nullptr); - GPR_ASSERT(s->fetching_send_message == nullptr); - GPR_ASSERT(s->send_trailing_metadata_finished == nullptr); - GPR_ASSERT(s->recv_initial_metadata_ready == nullptr); - GPR_ASSERT(s->recv_message_ready == nullptr); - GPR_ASSERT(s->recv_trailing_metadata_finished == nullptr); - grpc_chttp2_data_parser_destroy(&s->data_parser); - grpc_chttp2_incoming_metadata_buffer_destroy(&s->metadata_buffer[0]); - grpc_chttp2_incoming_metadata_buffer_destroy(&s->metadata_buffer[1]); - grpc_slice_buffer_destroy_internal(&s->flow_controlled_buffer); - GRPC_ERROR_UNREF(s->read_closed_error); - GRPC_ERROR_UNREF(s->write_closed_error); - GRPC_ERROR_UNREF(s->byte_stream_error); + GPR_ASSERT(send_initial_metadata_finished == nullptr); + GPR_ASSERT(fetching_send_message == nullptr); + GPR_ASSERT(send_trailing_metadata_finished == nullptr); + GPR_ASSERT(recv_initial_metadata_ready == nullptr); + GPR_ASSERT(recv_message_ready == nullptr); + GPR_ASSERT(recv_trailing_metadata_finished == nullptr); + grpc_slice_buffer_destroy_internal(&flow_controlled_buffer); + GRPC_ERROR_UNREF(read_closed_error); + GRPC_ERROR_UNREF(write_closed_error); + GRPC_ERROR_UNREF(byte_stream_error); + + flow_control.Destroy(); - s->flow_control.Destroy(); + if (t->resource_user != nullptr) { + grpc_resource_user_free(t->resource_user, GRPC_RESOURCE_QUOTA_CALL_SIZE); + } GRPC_CHTTP2_UNREF_TRANSPORT(t, "stream"); + GRPC_CLOSURE_SCHED(destroy_stream_arg, GRPC_ERROR_NONE); +} - GRPC_CLOSURE_SCHED(s->destroy_stream_arg, GRPC_ERROR_NONE); +static int init_stream(grpc_transport* gt, grpc_stream* gs, + grpc_stream_refcount* refcount, const void* server_data, + gpr_arena* arena) { + GPR_TIMER_SCOPE("init_stream", 0); + grpc_chttp2_transport* t = reinterpret_cast<grpc_chttp2_transport*>(gt); + new (gs) grpc_chttp2_stream(t, refcount, server_data, arena); + return 0; +} + +static void destroy_stream_locked(void* sp, grpc_error* error) { + GPR_TIMER_SCOPE("destroy_stream", 0); + grpc_chttp2_stream* s = static_cast<grpc_chttp2_stream*>(sp); + s->~grpc_chttp2_stream(); } static void destroy_stream(grpc_transport* gt, grpc_stream* gs, @@ -783,7 +809,21 @@ grpc_chttp2_stream* grpc_chttp2_parsing_accept_stream(grpc_chttp2_transport* t, if (t->channel_callback.accept_stream == nullptr) { return nullptr; } - grpc_chttp2_stream* accepting; + // Don't accept the stream if memory quota doesn't allow. Note that we should + // simply refuse the stream here instead of canceling the stream after it's + // accepted since the latter will create the call which costs much memory. + if (t->resource_user != nullptr && + !grpc_resource_user_safe_alloc(t->resource_user, + GRPC_RESOURCE_QUOTA_CALL_SIZE)) { + gpr_log(GPR_ERROR, "Memory exhausted, rejecting the stream."); + grpc_slice_buffer_add( + &t->qbuf, + grpc_chttp2_rst_stream_create( + id, static_cast<uint32_t>(GRPC_HTTP2_REFUSED_STREAM), nullptr)); + grpc_chttp2_initiate_write(t, GRPC_CHTTP2_INITIATE_WRITE_RST_STREAM); + return nullptr; + } + grpc_chttp2_stream* accepting = nullptr; GPR_ASSERT(t->accepting_stream == nullptr); t->accepting_stream = &accepting; t->channel_callback.accept_stream(t->channel_callback.accept_stream_user_data, @@ -1401,6 +1441,9 @@ static void perform_stream_op_locked(void* stream_op, } if (op->send_initial_metadata) { + if (t->is_client && t->channelz_socket != nullptr) { + t->channelz_socket->RecordStreamStartedFromLocal(); + } GRPC_STATS_INC_HTTP2_OP_SEND_INITIAL_METADATA(); GPR_ASSERT(s->send_initial_metadata_finished == nullptr); on_complete->next_data.scratch |= CLOSURE_BARRIER_MAY_COVER_WRITE; @@ -1486,6 +1529,7 @@ static void perform_stream_op_locked(void* stream_op, if (op->send_message) { GRPC_STATS_INC_HTTP2_OP_SEND_MESSAGE(); + t->num_messages_in_next_write++; GRPC_STATS_INC_HTTP2_SEND_MESSAGE_SIZE( op->payload->send_message.send_message->length()); on_complete->next_data.scratch |= CLOSURE_BARRIER_MAY_COVER_WRITE; @@ -1670,8 +1714,8 @@ static void perform_stream_op(grpc_transport* gt, grpc_stream* gs, gpr_free(str); } - op->handler_private.extra_arg = gs; GRPC_CHTTP2_STREAM_REF(s, "perform_stream_op"); + op->handler_private.extra_arg = gs; GRPC_CLOSURE_SCHED( GRPC_CLOSURE_INIT(&op->handler_private.closure, perform_stream_op_locked, op, grpc_combiner_scheduler(t->combiner)), @@ -2097,8 +2141,7 @@ void grpc_chttp2_fake_status(grpc_chttp2_transport* t, grpc_chttp2_stream* s, "add_status_message", grpc_chttp2_incoming_metadata_buffer_replace_or_add( &s->metadata_buffer[1], - grpc_mdelem_from_slices(GRPC_MDSTR_GRPC_MESSAGE, - grpc_slice_ref_internal(slice)))); + grpc_mdelem_create(GRPC_MDSTR_GRPC_MESSAGE, slice, nullptr))); } s->published_metadata[1] = GRPC_METADATA_SYNTHESIZED_FROM_FAKE; grpc_chttp2_maybe_complete_recv_trailing_metadata(t, s); @@ -2678,6 +2721,7 @@ static void init_keepalive_ping_locked(void* arg, grpc_error* error) { grpc_chttp2_stream_map_size(&t->stream_map) > 0) { t->keepalive_state = GRPC_CHTTP2_KEEPALIVE_STATE_PINGING; GRPC_CHTTP2_REF_TRANSPORT(t, "keepalive ping end"); + grpc_timer_init_unset(&t->keepalive_watchdog_timer); send_keepalive_ping_locked(t); grpc_chttp2_initiate_write(t, GRPC_CHTTP2_INITIATE_WRITE_KEEPALIVE_PING); } else { @@ -2701,6 +2745,9 @@ static void start_keepalive_ping_locked(void* arg, grpc_error* error) { if (error != GRPC_ERROR_NONE) { return; } + if (t->channelz_socket != nullptr) { + t->channelz_socket->RecordKeepaliveSent(); + } GRPC_CHTTP2_REF_TRANSPORT(t, "keepalive watchdog"); grpc_timer_init(&t->keepalive_watchdog_timer, grpc_core::ExecCtx::Get()->Now() + t->keepalive_timeout, @@ -2728,10 +2775,10 @@ static void keepalive_watchdog_fired_locked(void* arg, grpc_error* error) { if (error == GRPC_ERROR_NONE) { t->keepalive_state = GRPC_CHTTP2_KEEPALIVE_STATE_DYING; close_transport_locked( - t, - grpc_error_set_int(GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "keepalive watchdog timeout"), - GRPC_ERROR_INT_GRPC_STATUS, GRPC_STATUS_INTERNAL)); + t, grpc_error_set_int(GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "keepalive watchdog timeout"), + GRPC_ERROR_INT_GRPC_STATUS, + GRPC_STATUS_UNAVAILABLE)); } } else { /* The watchdog timer should have been cancelled by @@ -2897,17 +2944,20 @@ bool Chttp2IncomingByteStream::Next(size_t max_size_hint, } } +void Chttp2IncomingByteStream::MaybeCreateStreamDecompressionCtx() { + if (!stream_->stream_decompression_ctx) { + stream_->stream_decompression_ctx = grpc_stream_compression_context_create( + stream_->stream_decompression_method); + } +} + grpc_error* Chttp2IncomingByteStream::Pull(grpc_slice* slice) { GPR_TIMER_SCOPE("incoming_byte_stream_pull", 0); grpc_error* error; if (stream_->unprocessed_incoming_frames_buffer.length > 0) { if (!stream_->unprocessed_incoming_frames_decompressed) { bool end_of_context; - if (!stream_->stream_decompression_ctx) { - stream_->stream_decompression_ctx = - grpc_stream_compression_context_create( - stream_->stream_decompression_method); - } + MaybeCreateStreamDecompressionCtx(); if (!grpc_stream_decompress(stream_->stream_decompression_ctx, &stream_->unprocessed_incoming_frames_buffer, &stream_->decompressed_data_buffer, nullptr, @@ -3138,11 +3188,21 @@ static const grpc_transport_vtable vtable = {sizeof(grpc_chttp2_stream), static const grpc_transport_vtable* get_vtable(void) { return &vtable; } +intptr_t grpc_chttp2_transport_get_socket_uuid(grpc_transport* transport) { + grpc_chttp2_transport* t = + reinterpret_cast<grpc_chttp2_transport*>(transport); + if (t->channelz_socket != nullptr) { + return t->channelz_socket->uuid(); + } else { + return 0; + } +} + grpc_transport* grpc_create_chttp2_transport( - const grpc_channel_args* channel_args, grpc_endpoint* ep, bool is_client) { - grpc_chttp2_transport* t = static_cast<grpc_chttp2_transport*>( - gpr_zalloc(sizeof(grpc_chttp2_transport))); - init_transport(t, channel_args, ep, is_client); + const grpc_channel_args* channel_args, grpc_endpoint* ep, bool is_client, + grpc_resource_user* resource_user) { + auto t = new (gpr_malloc(sizeof(grpc_chttp2_transport))) + grpc_chttp2_transport(channel_args, ep, is_client, resource_user); return &t->base; } diff --git a/src/core/ext/transport/chttp2/transport/chttp2_transport.h b/src/core/ext/transport/chttp2/transport/chttp2_transport.h index 9d55b3f4b0..b3fe1c082e 100644 --- a/src/core/ext/transport/chttp2/transport/chttp2_transport.h +++ b/src/core/ext/transport/chttp2/transport/chttp2_transport.h @@ -32,7 +32,10 @@ extern grpc_core::DebugOnlyTraceFlag grpc_trace_chttp2_refcount; extern bool g_flow_control_enabled; grpc_transport* grpc_create_chttp2_transport( - const grpc_channel_args* channel_args, grpc_endpoint* ep, bool is_client); + const grpc_channel_args* channel_args, grpc_endpoint* ep, bool is_client, + grpc_resource_user* resource_user = nullptr); + +intptr_t grpc_chttp2_transport_get_socket_uuid(grpc_transport* transport); /// Takes ownership of \a read_buffer, which (if non-NULL) contains /// leftover bytes previously read from the endpoint (e.g., by handshakers). diff --git a/src/core/ext/transport/chttp2/transport/frame_data.cc b/src/core/ext/transport/chttp2/transport/frame_data.cc index f8f06f6789..1de00735cf 100644 --- a/src/core/ext/transport/chttp2/transport/frame_data.cc +++ b/src/core/ext/transport/chttp2/transport/frame_data.cc @@ -32,18 +32,12 @@ #include "src/core/lib/slice/slice_string_helpers.h" #include "src/core/lib/transport/transport.h" -grpc_error* grpc_chttp2_data_parser_init(grpc_chttp2_data_parser* parser) { - parser->state = GRPC_CHTTP2_DATA_FH_0; - parser->parsing_frame = nullptr; - return GRPC_ERROR_NONE; -} - -void grpc_chttp2_data_parser_destroy(grpc_chttp2_data_parser* parser) { - if (parser->parsing_frame != nullptr) { - GRPC_ERROR_UNREF(parser->parsing_frame->Finished( +grpc_chttp2_data_parser::~grpc_chttp2_data_parser() { + if (parsing_frame != nullptr) { + GRPC_ERROR_UNREF(parsing_frame->Finished( GRPC_ERROR_CREATE_FROM_STATIC_STRING("Parser destroyed"), false)); } - GRPC_ERROR_UNREF(parser->error); + GRPC_ERROR_UNREF(error); } grpc_error* grpc_chttp2_data_parser_begin_frame(grpc_chttp2_data_parser* parser, @@ -62,6 +56,7 @@ grpc_error* grpc_chttp2_data_parser_begin_frame(grpc_chttp2_data_parser* parser, if (flags & GRPC_CHTTP2_DATA_FLAG_END_STREAM) { s->received_last_frame = true; + s->eos_received = true; } else { s->received_last_frame = false; } @@ -191,6 +186,9 @@ grpc_error* grpc_deframe_unprocessed_incoming_frames( GPR_ASSERT(stream_out != nullptr); GPR_ASSERT(p->parsing_frame == nullptr); p->frame_size |= (static_cast<uint32_t>(*cur)); + if (t->channelz_socket != nullptr) { + t->channelz_socket->RecordMessageReceived(); + } p->state = GRPC_CHTTP2_DATA_FRAME; ++cur; message_flags = 0; diff --git a/src/core/ext/transport/chttp2/transport/frame_data.h b/src/core/ext/transport/chttp2/transport/frame_data.h index e5d01f764e..2c5da99fa6 100644 --- a/src/core/ext/transport/chttp2/transport/frame_data.h +++ b/src/core/ext/transport/chttp2/transport/frame_data.h @@ -43,20 +43,18 @@ namespace grpc_core { class Chttp2IncomingByteStream; } // namespace grpc_core -typedef struct { - grpc_chttp2_stream_state state; - uint8_t frame_type; - uint32_t frame_size; - grpc_error* error; +struct grpc_chttp2_data_parser { + grpc_chttp2_data_parser() = default; + ~grpc_chttp2_data_parser(); - bool is_frame_compressed; - grpc_core::Chttp2IncomingByteStream* parsing_frame; -} grpc_chttp2_data_parser; + grpc_chttp2_stream_state state = GRPC_CHTTP2_DATA_FH_0; + uint8_t frame_type = 0; + uint32_t frame_size = 0; + grpc_error* error = GRPC_ERROR_NONE; -/* initialize per-stream state for data frame parsing */ -grpc_error* grpc_chttp2_data_parser_init(grpc_chttp2_data_parser* parser); - -void grpc_chttp2_data_parser_destroy(grpc_chttp2_data_parser* parser); + bool is_frame_compressed = false; + grpc_core::Chttp2IncomingByteStream* parsing_frame = nullptr; +}; /* start processing a new data frame */ grpc_error* grpc_chttp2_data_parser_begin_frame(grpc_chttp2_data_parser* parser, diff --git a/src/core/ext/transport/chttp2/transport/frame_rst_stream.cc b/src/core/ext/transport/chttp2/transport/frame_rst_stream.cc index 4bdd4309a4..a0a7534594 100644 --- a/src/core/ext/transport/chttp2/transport/frame_rst_stream.cc +++ b/src/core/ext/transport/chttp2/transport/frame_rst_stream.cc @@ -32,7 +32,7 @@ grpc_slice grpc_chttp2_rst_stream_create(uint32_t id, uint32_t code, grpc_transport_one_way_stats* stats) { static const size_t frame_size = 13; grpc_slice slice = GRPC_SLICE_MALLOC(frame_size); - stats->framing_bytes += frame_size; + if (stats != nullptr) stats->framing_bytes += frame_size; uint8_t* p = GRPC_SLICE_START_PTR(slice); // Frame size. diff --git a/src/core/ext/transport/chttp2/transport/hpack_encoder.cc b/src/core/ext/transport/chttp2/transport/hpack_encoder.cc index 0eaf63f133..dbe9df6ae3 100644 --- a/src/core/ext/transport/chttp2/transport/hpack_encoder.cc +++ b/src/core/ext/transport/chttp2/transport/hpack_encoder.cc @@ -212,10 +212,6 @@ static uint32_t prepare_space_for_new_elem(grpc_chttp2_hpack_compressor* c, return new_index; } -/* dummy function */ -static void add_nothing(grpc_chttp2_hpack_compressor* c, grpc_mdelem elem, - size_t elem_size) {} - // Add a key to the dynamic table. Both key and value will be added to table at // the decoder. static void add_key_with_index(grpc_chttp2_hpack_compressor* c, @@ -524,17 +520,22 @@ static void hpack_enc(grpc_chttp2_hpack_compressor* c, grpc_mdelem elem, uint32_t indices_key; /* should this elem be in the table? */ - size_t decoder_space_usage = - grpc_mdelem_get_size_in_hpack_table(elem, st->use_true_binary_metadata); - bool should_add_elem = elem_interned && - decoder_space_usage < MAX_DECODER_SPACE_USAGE && - c->filter_elems[HASH_FRAGMENT_1(elem_hash)] >= - c->filter_elems_sum / ONE_ON_ADD_PROBABILITY; - void (*maybe_add)(grpc_chttp2_hpack_compressor*, grpc_mdelem, size_t) = - should_add_elem ? add_elem : add_nothing; - void (*emit)(grpc_chttp2_hpack_compressor*, uint32_t, grpc_mdelem, - framer_state*) = - should_add_elem ? emit_lithdr_incidx : emit_lithdr_noidx; + const size_t decoder_space_usage = + grpc_chttp2_get_size_in_hpack_table(elem, st->use_true_binary_metadata); + const bool should_add_elem = elem_interned && + decoder_space_usage < MAX_DECODER_SPACE_USAGE && + c->filter_elems[HASH_FRAGMENT_1(elem_hash)] >= + c->filter_elems_sum / ONE_ON_ADD_PROBABILITY; + + auto emit_maybe_add = [&should_add_elem, &elem, &st, &c, &indices_key, + &decoder_space_usage] { + if (should_add_elem) { + emit_lithdr_incidx(c, dynidx(c, indices_key), elem, st); + add_elem(c, elem, decoder_space_usage); + } else { + emit_lithdr_noidx(c, dynidx(c, indices_key), elem, st); + } + }; /* no hits for the elem... maybe there's a key? */ indices_key = c->indices_keys[HASH_FRAGMENT_2(key_hash)]; @@ -542,8 +543,7 @@ static void hpack_enc(grpc_chttp2_hpack_compressor* c, grpc_mdelem elem, GRPC_MDKEY(elem)) && indices_key > c->tail_remote_index) { /* HIT: key (first cuckoo hash) */ - emit(c, dynidx(c, indices_key), elem, st); - maybe_add(c, elem, decoder_space_usage); + emit_maybe_add(); return; } @@ -552,20 +552,23 @@ static void hpack_enc(grpc_chttp2_hpack_compressor* c, grpc_mdelem elem, GRPC_MDKEY(elem)) && indices_key > c->tail_remote_index) { /* HIT: key (first cuckoo hash) */ - emit(c, dynidx(c, indices_key), elem, st); - maybe_add(c, elem, decoder_space_usage); + emit_maybe_add(); return; } /* no elem, key in the table... fall back to literal emission */ - bool should_add_key = + const bool should_add_key = !elem_interned && decoder_space_usage < MAX_DECODER_SPACE_USAGE; - emit = (should_add_elem || should_add_key) ? emit_lithdr_incidx_v - : emit_lithdr_noidx_v; - maybe_add = - should_add_elem ? add_elem : (should_add_key ? add_key : add_nothing); - emit(c, 0, elem, st); - maybe_add(c, elem, decoder_space_usage); + if (should_add_elem || should_add_key) { + emit_lithdr_incidx_v(c, 0, elem, st); + } else { + emit_lithdr_noidx_v(c, 0, elem, st); + } + if (should_add_elem) { + add_elem(c, elem, decoder_space_usage); + } else if (should_add_key) { + add_key(c, elem, decoder_space_usage); + } } #define STRLEN_LIT(x) (sizeof(x) - 1) @@ -688,11 +691,22 @@ void grpc_chttp2_encode_header(grpc_chttp2_hpack_compressor* c, emit_advertise_table_size_change(c, &st); } for (size_t i = 0; i < extra_headers_size; ++i) { - hpack_enc(c, *extra_headers[i], &st); + grpc_mdelem md = *extra_headers[i]; + uint8_t static_index = grpc_chttp2_get_static_hpack_table_index(md); + if (static_index) { + emit_indexed(c, static_index, &st); + } else { + hpack_enc(c, md, &st); + } } grpc_metadata_batch_assert_ok(metadata); for (grpc_linked_mdelem* l = metadata->list.head; l; l = l->next) { - hpack_enc(c, l->md, &st); + uint8_t static_index = grpc_chttp2_get_static_hpack_table_index(l->md); + if (static_index) { + emit_indexed(c, static_index, &st); + } else { + hpack_enc(c, l->md, &st); + } } grpc_millis deadline = metadata->deadline; if (deadline != GRPC_MILLIS_INF_FUTURE) { diff --git a/src/core/ext/transport/chttp2/transport/hpack_table.cc b/src/core/ext/transport/chttp2/transport/hpack_table.cc index 7929258356..fcfb01872b 100644 --- a/src/core/ext/transport/chttp2/transport/hpack_table.cc +++ b/src/core/ext/transport/chttp2/transport/hpack_table.cc @@ -29,6 +29,7 @@ #include "src/core/lib/debug/trace.h" #include "src/core/lib/gpr/murmur_hash.h" +#include "src/core/lib/transport/static_metadata.h" extern grpc_core::TraceFlag grpc_http_trace; @@ -366,3 +367,31 @@ grpc_chttp2_hptbl_find_result grpc_chttp2_hptbl_find( return r; } + +static size_t get_base64_encoded_size(size_t raw_length) { + static const uint8_t tail_xtra[3] = {0, 2, 3}; + return raw_length / 3 * 4 + tail_xtra[raw_length % 3]; +} + +size_t grpc_chttp2_get_size_in_hpack_table(grpc_mdelem elem, + bool use_true_binary_metadata) { + size_t overhead_and_key = 32 + GRPC_SLICE_LENGTH(GRPC_MDKEY(elem)); + size_t value_len = GRPC_SLICE_LENGTH(GRPC_MDVALUE(elem)); + if (grpc_is_binary_header(GRPC_MDKEY(elem))) { + return overhead_and_key + (use_true_binary_metadata + ? value_len + 1 + : get_base64_encoded_size(value_len)); + } else { + return overhead_and_key + value_len; + } +} + +uint8_t grpc_chttp2_get_static_hpack_table_index(grpc_mdelem md) { + if (GRPC_MDELEM_STORAGE(md) == GRPC_MDELEM_STORAGE_STATIC) { + uint8_t index = GRPC_MDELEM_DATA(md) - grpc_static_mdelem_table; + if (index < GRPC_CHTTP2_LAST_STATIC_ENTRY) { + return index + 1; // Hpack static metadata element indices start at 1 + } + } + return 0; +} diff --git a/src/core/ext/transport/chttp2/transport/hpack_table.h b/src/core/ext/transport/chttp2/transport/hpack_table.h index 98026a4ba4..a0ffc6fab7 100644 --- a/src/core/ext/transport/chttp2/transport/hpack_table.h +++ b/src/core/ext/transport/chttp2/transport/hpack_table.h @@ -83,6 +83,15 @@ grpc_mdelem grpc_chttp2_hptbl_lookup(const grpc_chttp2_hptbl* tbl, /* add a table entry to the index */ grpc_error* grpc_chttp2_hptbl_add(grpc_chttp2_hptbl* tbl, grpc_mdelem md) GRPC_MUST_USE_RESULT; + +size_t grpc_chttp2_get_size_in_hpack_table(grpc_mdelem elem, + bool use_true_binary_metadata); + +/* Returns the static hpack table index that corresponds to /a elem. Returns 0 + if /a elem is not statically stored or if it is not in the static hpack + table */ +uint8_t grpc_chttp2_get_static_hpack_table_index(grpc_mdelem md); + /* Find a key/value pair in the table... returns the index in the table of the most similar entry, or 0 if the value was not found */ typedef struct { diff --git a/src/core/ext/transport/chttp2/transport/incoming_metadata.cc b/src/core/ext/transport/chttp2/transport/incoming_metadata.cc index 4d7dfd900f..dca15e7680 100644 --- a/src/core/ext/transport/chttp2/transport/incoming_metadata.cc +++ b/src/core/ext/transport/chttp2/transport/incoming_metadata.cc @@ -27,18 +27,6 @@ #include <grpc/support/alloc.h> #include <grpc/support/log.h> -void grpc_chttp2_incoming_metadata_buffer_init( - grpc_chttp2_incoming_metadata_buffer* buffer, gpr_arena* arena) { - buffer->arena = arena; - grpc_metadata_batch_init(&buffer->batch); - buffer->batch.deadline = GRPC_MILLIS_INF_FUTURE; -} - -void grpc_chttp2_incoming_metadata_buffer_destroy( - grpc_chttp2_incoming_metadata_buffer* buffer) { - grpc_metadata_batch_destroy(&buffer->batch); -} - grpc_error* grpc_chttp2_incoming_metadata_buffer_add( grpc_chttp2_incoming_metadata_buffer* buffer, grpc_mdelem elem) { buffer->size += GRPC_MDELEM_LENGTH(elem); diff --git a/src/core/ext/transport/chttp2/transport/incoming_metadata.h b/src/core/ext/transport/chttp2/transport/incoming_metadata.h index d029cf00d4..c551b3cc8b 100644 --- a/src/core/ext/transport/chttp2/transport/incoming_metadata.h +++ b/src/core/ext/transport/chttp2/transport/incoming_metadata.h @@ -23,17 +23,20 @@ #include "src/core/lib/transport/transport.h" -typedef struct { +struct grpc_chttp2_incoming_metadata_buffer { + grpc_chttp2_incoming_metadata_buffer(gpr_arena* arena) : arena(arena) { + grpc_metadata_batch_init(&batch); + batch.deadline = GRPC_MILLIS_INF_FUTURE; + } + ~grpc_chttp2_incoming_metadata_buffer() { + grpc_metadata_batch_destroy(&batch); + } + gpr_arena* arena; grpc_metadata_batch batch; - size_t size; // total size of metadata -} grpc_chttp2_incoming_metadata_buffer; - -/** assumes everything initially zeroed */ -void grpc_chttp2_incoming_metadata_buffer_init( - grpc_chttp2_incoming_metadata_buffer* buffer, gpr_arena* arena); -void grpc_chttp2_incoming_metadata_buffer_destroy( - grpc_chttp2_incoming_metadata_buffer* buffer); + size_t size = 0; // total size of metadata +}; + void grpc_chttp2_incoming_metadata_buffer_publish( grpc_chttp2_incoming_metadata_buffer* buffer, grpc_metadata_batch* batch); diff --git a/src/core/ext/transport/chttp2/transport/internal.h b/src/core/ext/transport/chttp2/transport/internal.h index 32a13df48c..8a83f4894c 100644 --- a/src/core/ext/transport/chttp2/transport/internal.h +++ b/src/core/ext/transport/chttp2/transport/internal.h @@ -36,6 +36,7 @@ #include "src/core/ext/transport/chttp2/transport/hpack_parser.h" #include "src/core/ext/transport/chttp2/transport/incoming_metadata.h" #include "src/core/ext/transport/chttp2/transport/stream_map.h" +#include "src/core/lib/channel/channelz.h" #include "src/core/lib/compression/stream_compression.h" #include "src/core/lib/gprpp/manual_constructor.h" #include "src/core/lib/iomgr/combiner.h" @@ -106,8 +107,8 @@ const char* grpc_chttp2_initiate_write_reason_string( grpc_chttp2_initiate_write_reason reason); typedef struct { - grpc_closure_list lists[GRPC_CHTTP2_PCL_COUNT]; - uint64_t inflight_id; + grpc_closure_list lists[GRPC_CHTTP2_PCL_COUNT] = {}; + uint64_t inflight_id = 0; } grpc_chttp2_ping_queue; typedef struct { @@ -250,6 +251,8 @@ class Chttp2IncomingByteStream : public ByteStream { static void NextLocked(void* arg, grpc_error* error_ignored); static void OrphanLocked(void* arg, grpc_error* error_ignored); + void MaybeCreateStreamDecompressionCtx(); + grpc_chttp2_transport* transport_; // Immutable. grpc_chttp2_stream* stream_; // Immutable. @@ -281,34 +284,41 @@ typedef enum { } grpc_chttp2_keepalive_state; struct grpc_chttp2_transport { + grpc_chttp2_transport(const grpc_channel_args* channel_args, + grpc_endpoint* ep, bool is_client, + grpc_resource_user* resource_user); + ~grpc_chttp2_transport(); + grpc_transport base; /* must be first */ gpr_refcount refs; grpc_endpoint* ep; char* peer_string; + grpc_resource_user* resource_user; + grpc_combiner* combiner; - grpc_closure* notify_on_receive_settings; + grpc_closure* notify_on_receive_settings = nullptr; /** write execution state of the transport */ - grpc_chttp2_write_state write_state; + grpc_chttp2_write_state write_state = GRPC_CHTTP2_WRITE_STATE_IDLE; /** is this the first write in a series of writes? set when we initiate writing from idle, cleared when we initiate writing from writing+more */ - bool is_first_write_in_batch; + bool is_first_write_in_batch = false; /** is the transport destroying itself? */ - uint8_t destroying; + uint8_t destroying = false; /** has the upper layer closed the transport? */ - grpc_error* closed_with_error; + grpc_error* closed_with_error = GRPC_ERROR_NONE; /** is there a read request to the endpoint outstanding? */ - uint8_t endpoint_reading; + uint8_t endpoint_reading = 1; - grpc_chttp2_optimization_target opt_target; + grpc_chttp2_optimization_target opt_target = GRPC_CHTTP2_OPTIMIZE_FOR_LATENCY; /** various lists of streams */ - grpc_chttp2_stream_list lists[STREAM_LIST_COUNT]; + grpc_chttp2_stream_list lists[STREAM_LIST_COUNT] = {}; /** maps stream id to grpc_chttp2_stream objects */ grpc_chttp2_stream_map stream_map; @@ -325,7 +335,7 @@ struct grpc_chttp2_transport { /** address to place a newly accepted stream - set and unset by grpc_chttp2_parsing_accept_stream; used by init_stream to publish the accepted server stream */ - grpc_chttp2_stream** accepting_stream; + grpc_chttp2_stream** accepting_stream = nullptr; struct { /* accept stream callback */ @@ -349,41 +359,43 @@ struct grpc_chttp2_transport { /** how much data are we willing to buffer when the WRITE_BUFFER_HINT is set? */ - uint32_t write_buffer_size; + uint32_t write_buffer_size = grpc_core::chttp2::kDefaultWindow; /** Set to a grpc_error object if a goaway frame is received. By default, set * to GRPC_ERROR_NONE */ - grpc_error* goaway_error; + grpc_error* goaway_error = GRPC_ERROR_NONE; - grpc_chttp2_sent_goaway_state sent_goaway_state; + grpc_chttp2_sent_goaway_state sent_goaway_state = GRPC_CHTTP2_NO_GOAWAY_SEND; /** are the local settings dirty and need to be sent? */ - bool dirtied_local_settings; + bool dirtied_local_settings = true; /** have local settings been sent? */ - bool sent_local_settings; - /** bitmask of setting indexes to send out */ - uint32_t force_send_settings; + bool sent_local_settings = false; + /** bitmask of setting indexes to send out + Hack: it's common for implementations to assume 65536 bytes initial send + window -- this should by rights be 0 */ + uint32_t force_send_settings = 1 << GRPC_CHTTP2_SETTINGS_INITIAL_WINDOW_SIZE; /** settings values */ uint32_t settings[GRPC_NUM_SETTING_SETS][GRPC_CHTTP2_NUM_SETTINGS]; /** what is the next stream id to be allocated by this peer? copied to next_stream_id in parsing when parsing commences */ - uint32_t next_stream_id; + uint32_t next_stream_id = 0; /** last new stream id */ - uint32_t last_new_stream_id; + uint32_t last_new_stream_id = 0; /** ping queues for various ping insertion points */ - grpc_chttp2_ping_queue ping_queue; + grpc_chttp2_ping_queue ping_queue = grpc_chttp2_ping_queue(); grpc_chttp2_repeated_ping_policy ping_policy; grpc_chttp2_repeated_ping_state ping_state; - uint64_t ping_ctr; /* unique id for pings */ + uint64_t ping_ctr = 0; /* unique id for pings */ grpc_closure retry_initiate_ping_locked; /** ping acks */ - size_t ping_ack_count; - size_t ping_ack_capacity; - uint64_t* ping_acks; + size_t ping_ack_count = 0; + size_t ping_ack_capacity = 0; + uint64_t* ping_acks = nullptr; grpc_chttp2_server_ping_recv_state ping_recv_state; /** parser for headers */ @@ -409,22 +421,22 @@ struct grpc_chttp2_transport { int64_t initial_window_update = 0; /* deframing */ - grpc_chttp2_deframe_transport_state deframe_state; - uint8_t incoming_frame_type; - uint8_t incoming_frame_flags; - uint8_t header_eof; - bool is_first_frame; - uint32_t expect_continuation_stream_id; - uint32_t incoming_frame_size; - uint32_t incoming_stream_id; + grpc_chttp2_deframe_transport_state deframe_state = GRPC_DTS_CLIENT_PREFIX_0; + uint8_t incoming_frame_type = 0; + uint8_t incoming_frame_flags = 0; + uint8_t header_eof = 0; + bool is_first_frame = true; + uint32_t expect_continuation_stream_id = 0; + uint32_t incoming_frame_size = 0; + uint32_t incoming_stream_id = 0; /* active parser */ - void* parser_data; - grpc_chttp2_stream* incoming_stream; + void* parser_data = nullptr; + grpc_chttp2_stream* incoming_stream = nullptr; grpc_error* (*parser)(void* parser_user_data, grpc_chttp2_transport* t, grpc_chttp2_stream* s, grpc_slice slice, int is_last); - grpc_chttp2_write_cb* write_cb_pool; + grpc_chttp2_write_cb* write_cb_pool = nullptr; /* bdp estimator */ grpc_closure next_bdp_ping_timer_expired_locked; @@ -433,23 +445,23 @@ struct grpc_chttp2_transport { /* if non-NULL, close the transport with this error when writes are finished */ - grpc_error* close_transport_on_writes_finished; + grpc_error* close_transport_on_writes_finished = GRPC_ERROR_NONE; /* a list of closures to run after writes are finished */ - grpc_closure_list run_after_write; + grpc_closure_list run_after_write = GRPC_CLOSURE_LIST_INIT; /* buffer pool state */ /** have we scheduled a benign cleanup? */ - bool benign_reclaimer_registered; + bool benign_reclaimer_registered = false; /** have we scheduled a destructive cleanup? */ - bool destructive_reclaimer_registered; + bool destructive_reclaimer_registered = false; /** benign cleanup closure */ grpc_closure benign_reclaimer_locked; /** destructive cleanup closure */ grpc_closure destructive_reclaimer_locked; /* next bdp ping timer */ - bool have_next_bdp_ping_timer; + bool have_next_bdp_ping_timer = false; grpc_timer next_bdp_ping_timer; /* keep-alive ping support */ @@ -470,10 +482,12 @@ struct grpc_chttp2_transport { /** grace period for a ping to complete before watchdog kicks in */ grpc_millis keepalive_timeout; /** if keepalive pings are allowed when there's no outstanding streams */ - bool keepalive_permit_without_calls; + bool keepalive_permit_without_calls = false; /** keep-alive state machine state */ grpc_chttp2_keepalive_state keepalive_state; grpc_core::ContextList* cl; + grpc_core::RefCountedPtr<grpc_core::channelz::SocketNode> channelz_socket; + uint32_t num_messages_in_next_write = 0; }; typedef enum { @@ -484,6 +498,10 @@ typedef enum { } grpc_published_metadata_method; struct grpc_chttp2_stream { + grpc_chttp2_stream(grpc_chttp2_transport* t, grpc_stream_refcount* refcount, + const void* server_data, gpr_arena* arena); + ~grpc_chttp2_stream(); + void* context; grpc_chttp2_transport* t; grpc_stream_refcount* refcount; @@ -492,59 +510,63 @@ struct grpc_chttp2_stream { grpc_closure* destroy_stream_arg; grpc_chttp2_stream_link links[STREAM_LIST_COUNT]; - uint8_t included[STREAM_LIST_COUNT]; + uint8_t included[STREAM_LIST_COUNT] = {}; /** HTTP2 stream id for this stream, or zero if one has not been assigned */ - uint32_t id; + uint32_t id = 0; /** things the upper layers would like to send */ - grpc_metadata_batch* send_initial_metadata; - grpc_closure* send_initial_metadata_finished; - grpc_metadata_batch* send_trailing_metadata; - grpc_closure* send_trailing_metadata_finished; + grpc_metadata_batch* send_initial_metadata = nullptr; + grpc_closure* send_initial_metadata_finished = nullptr; + grpc_metadata_batch* send_trailing_metadata = nullptr; + grpc_closure* send_trailing_metadata_finished = nullptr; grpc_core::OrphanablePtr<grpc_core::ByteStream> fetching_send_message; - uint32_t fetched_send_message_length; - grpc_slice fetching_slice; + uint32_t fetched_send_message_length = 0; + grpc_slice fetching_slice = grpc_empty_slice(); int64_t next_message_end_offset; - int64_t flow_controlled_bytes_written; - int64_t flow_controlled_bytes_flowed; + int64_t flow_controlled_bytes_written = 0; + int64_t flow_controlled_bytes_flowed = 0; grpc_closure complete_fetch_locked; - grpc_closure* fetching_send_message_finished; + grpc_closure* fetching_send_message_finished = nullptr; grpc_metadata_batch* recv_initial_metadata; - grpc_closure* recv_initial_metadata_ready; - bool* trailing_metadata_available; + grpc_closure* recv_initial_metadata_ready = nullptr; + bool* trailing_metadata_available = nullptr; grpc_core::OrphanablePtr<grpc_core::ByteStream>* recv_message; - grpc_closure* recv_message_ready; + grpc_closure* recv_message_ready = nullptr; grpc_metadata_batch* recv_trailing_metadata; - grpc_closure* recv_trailing_metadata_finished; + grpc_closure* recv_trailing_metadata_finished = nullptr; - grpc_transport_stream_stats* collecting_stats; - grpc_transport_stream_stats stats; + grpc_transport_stream_stats* collecting_stats = nullptr; + grpc_transport_stream_stats stats = grpc_transport_stream_stats(); /** Is this stream closed for writing. */ - bool write_closed; + bool write_closed = false; /** Is this stream reading half-closed. */ - bool read_closed; + bool read_closed = false; /** Are all published incoming byte streams closed. */ - bool all_incoming_byte_streams_finished; + bool all_incoming_byte_streams_finished = false; /** Has this stream seen an error. If true, then pending incoming frames can be thrown away. */ - bool seen_error; + bool seen_error = false; /** Are we buffering writes on this stream? If yes, we won't become writable until there's enough queued up in the flow_controlled_buffer */ - bool write_buffering; + bool write_buffering = false; /** Has trailing metadata been received. */ - bool received_trailing_metadata; + bool received_trailing_metadata = false; + + /* have we sent or received the EOS bit? */ + bool eos_received = false; + bool eos_sent = false; /** the error that resulted in this stream being read-closed */ - grpc_error* read_closed_error; + grpc_error* read_closed_error = GRPC_ERROR_NONE; /** the error that resulted in this stream being write-closed */ - grpc_error* write_closed_error; + grpc_error* write_closed_error = GRPC_ERROR_NONE; - grpc_published_metadata_method published_metadata[2]; - bool final_metadata_requested; + grpc_published_metadata_method published_metadata[2] = {}; + bool final_metadata_requested = false; grpc_chttp2_incoming_metadata_buffer metadata_buffer[2]; @@ -554,33 +576,33 @@ struct grpc_chttp2_stream { * Accessed only by application thread when stream->pending_byte_stream == * true */ grpc_slice_buffer unprocessed_incoming_frames_buffer; - grpc_closure* on_next; /* protected by t combiner */ - bool pending_byte_stream; /* protected by t combiner */ + grpc_closure* on_next = nullptr; /* protected by t combiner */ + bool pending_byte_stream = false; /* protected by t combiner */ // cached length of buffer to be used by the transport thread in cases where // stream->pending_byte_stream == true. The value is saved before // application threads are allowed to modify // unprocessed_incoming_frames_buffer - size_t unprocessed_incoming_frames_buffer_cached_length; + size_t unprocessed_incoming_frames_buffer_cached_length = 0; grpc_closure reset_byte_stream; - grpc_error* byte_stream_error; /* protected by t combiner */ - bool received_last_frame; /* protected by t combiner */ + grpc_error* byte_stream_error = GRPC_ERROR_NONE; /* protected by t combiner */ + bool received_last_frame = false; /* protected by t combiner */ - grpc_millis deadline; + grpc_millis deadline = GRPC_MILLIS_INF_FUTURE; /** saw some stream level error */ - grpc_error* forced_close_error; + grpc_error* forced_close_error = GRPC_ERROR_NONE; /** how many header frames have we received? */ - uint8_t header_frames_received; + uint8_t header_frames_received = 0; /** parsing state for data frames */ /* Accessed only by transport thread when stream->pending_byte_stream == false * Accessed only by application thread when stream->pending_byte_stream == * true */ grpc_chttp2_data_parser data_parser; /** number of bytes received - reset at end of parse thread execution */ - int64_t received_bytes; + int64_t received_bytes = 0; - bool sent_initial_metadata; - bool sent_trailing_metadata; + bool sent_initial_metadata = false; + bool sent_trailing_metadata = false; grpc_core::PolymorphicManualConstructor< grpc_core::chttp2::StreamFlowControlBase, @@ -590,32 +612,34 @@ struct grpc_chttp2_stream { grpc_slice_buffer flow_controlled_buffer; - grpc_chttp2_write_cb* on_flow_controlled_cbs; - grpc_chttp2_write_cb* on_write_finished_cbs; - grpc_chttp2_write_cb* finish_after_write; - size_t sending_bytes; + grpc_chttp2_write_cb* on_flow_controlled_cbs = nullptr; + grpc_chttp2_write_cb* on_write_finished_cbs = nullptr; + grpc_chttp2_write_cb* finish_after_write = nullptr; + size_t sending_bytes = 0; /* Stream compression method to be used. */ - grpc_stream_compression_method stream_compression_method; + grpc_stream_compression_method stream_compression_method = + GRPC_STREAM_COMPRESSION_IDENTITY_COMPRESS; /* Stream decompression method to be used. */ - grpc_stream_compression_method stream_decompression_method; + grpc_stream_compression_method stream_decompression_method = + GRPC_STREAM_COMPRESSION_IDENTITY_COMPRESS; /** Stream compression decompress context */ - grpc_stream_compression_context* stream_decompression_ctx; + grpc_stream_compression_context* stream_decompression_ctx = nullptr; /** Stream compression compress context */ - grpc_stream_compression_context* stream_compression_ctx; + grpc_stream_compression_context* stream_compression_ctx = nullptr; /** Buffer storing data that is compressed but not sent */ grpc_slice_buffer compressed_data_buffer; /** Amount of uncompressed bytes sent out when compressed_data_buffer is * emptied */ - size_t uncompressed_data_size; + size_t uncompressed_data_size = 0; /** Temporary buffer storing decompressed data */ grpc_slice_buffer decompressed_data_buffer; /** Whether bytes stored in unprocessed_incoming_byte_stream is decompressed */ - bool unprocessed_incoming_frames_decompressed; + bool unprocessed_incoming_frames_decompressed = false; /** gRPC header bytes that are already decompressed */ - size_t decompressed_header_bytes; + size_t decompressed_header_bytes = 0; }; /** Transport writing call flow: diff --git a/src/core/ext/transport/chttp2/transport/parsing.cc b/src/core/ext/transport/chttp2/transport/parsing.cc index 1e491d2ef8..1ff96d3cd3 100644 --- a/src/core/ext/transport/chttp2/transport/parsing.cc +++ b/src/core/ext/transport/chttp2/transport/parsing.cc @@ -368,6 +368,7 @@ static grpc_error* init_data_frame_parser(grpc_chttp2_transport* t) { &s->data_parser, t->incoming_frame_flags, s->id, s); } error_handler: + intptr_t unused; if (err == GRPC_ERROR_NONE) { t->incoming_stream = s; /* t->parser = grpc_chttp2_data_parser_parse;*/ @@ -375,7 +376,7 @@ error_handler: t->parser_data = &s->data_parser; t->ping_state.last_ping_sent_time = GRPC_MILLIS_INF_PAST; return GRPC_ERROR_NONE; - } else if (grpc_error_get_int(err, GRPC_ERROR_INT_STREAM_ID, nullptr)) { + } else if (grpc_error_get_int(err, GRPC_ERROR_INT_STREAM_ID, &unused)) { /* handle stream errors by closing the stream */ if (s != nullptr) { grpc_chttp2_mark_stream_closed(t, s, true, false, err); @@ -409,67 +410,81 @@ static void on_initial_header(void* tp, grpc_mdelem md) { gpr_free(value); } - if (grpc_slice_eq(GRPC_MDKEY(md), GRPC_MDSTR_GRPC_STATUS) && - !grpc_mdelem_eq(md, GRPC_MDELEM_GRPC_STATUS_0)) { - /* TODO(ctiller): check for a status like " 0" */ - s->seen_error = true; - } + if (GRPC_MDELEM_STORAGE(md) == GRPC_MDELEM_STORAGE_STATIC) { + // We don't use grpc_mdelem_eq here to avoid executing additional + // instructions. The reasoning is if the payload is not equal, we already + // know that the metadata elements are not equal because the md is + // confirmed to be static. If we had used grpc_mdelem_eq here, then if the + // payloads are not equal, grpc_mdelem_eq executes more instructions to + // determine if they're equal or not. + if (md.payload == GRPC_MDELEM_GRPC_STATUS_1.payload || + md.payload == GRPC_MDELEM_GRPC_STATUS_2.payload) { + s->seen_error = true; + } + } else { + if (grpc_slice_eq(GRPC_MDKEY(md), GRPC_MDSTR_GRPC_STATUS) && + !grpc_mdelem_eq(md, GRPC_MDELEM_GRPC_STATUS_0)) { + /* TODO(ctiller): check for a status like " 0" */ + s->seen_error = true; + } - if (grpc_slice_eq(GRPC_MDKEY(md), GRPC_MDSTR_GRPC_TIMEOUT)) { - grpc_millis* cached_timeout = - static_cast<grpc_millis*>(grpc_mdelem_get_user_data(md, free_timeout)); - grpc_millis timeout; - if (cached_timeout != nullptr) { - timeout = *cached_timeout; - } else { - if (GPR_UNLIKELY( - !grpc_http2_decode_timeout(GRPC_MDVALUE(md), &timeout))) { - char* val = grpc_slice_to_c_string(GRPC_MDVALUE(md)); - gpr_log(GPR_ERROR, "Ignoring bad timeout value '%s'", val); - gpr_free(val); - timeout = GRPC_MILLIS_INF_FUTURE; + if (grpc_slice_eq(GRPC_MDKEY(md), GRPC_MDSTR_GRPC_TIMEOUT)) { + grpc_millis* cached_timeout = static_cast<grpc_millis*>( + grpc_mdelem_get_user_data(md, free_timeout)); + grpc_millis timeout; + if (cached_timeout != nullptr) { + timeout = *cached_timeout; + } else { + if (GPR_UNLIKELY( + !grpc_http2_decode_timeout(GRPC_MDVALUE(md), &timeout))) { + char* val = grpc_slice_to_c_string(GRPC_MDVALUE(md)); + gpr_log(GPR_ERROR, "Ignoring bad timeout value '%s'", val); + gpr_free(val); + timeout = GRPC_MILLIS_INF_FUTURE; + } + if (GRPC_MDELEM_IS_INTERNED(md)) { + /* store the result */ + cached_timeout = + static_cast<grpc_millis*>(gpr_malloc(sizeof(grpc_millis))); + *cached_timeout = timeout; + grpc_mdelem_set_user_data(md, free_timeout, cached_timeout); + } } - if (GRPC_MDELEM_IS_INTERNED(md)) { - /* store the result */ - cached_timeout = - static_cast<grpc_millis*>(gpr_malloc(sizeof(grpc_millis))); - *cached_timeout = timeout; - grpc_mdelem_set_user_data(md, free_timeout, cached_timeout); + if (timeout != GRPC_MILLIS_INF_FUTURE) { + grpc_chttp2_incoming_metadata_buffer_set_deadline( + &s->metadata_buffer[0], grpc_core::ExecCtx::Get()->Now() + timeout); } + GRPC_MDELEM_UNREF(md); + return; } - if (timeout != GRPC_MILLIS_INF_FUTURE) { - grpc_chttp2_incoming_metadata_buffer_set_deadline( - &s->metadata_buffer[0], grpc_core::ExecCtx::Get()->Now() + timeout); - } + } + + const size_t new_size = s->metadata_buffer[0].size + GRPC_MDELEM_LENGTH(md); + const size_t metadata_size_limit = + t->settings[GRPC_ACKED_SETTINGS] + [GRPC_CHTTP2_SETTINGS_MAX_HEADER_LIST_SIZE]; + if (new_size > metadata_size_limit) { + gpr_log(GPR_DEBUG, + "received initial metadata size exceeds limit (%" PRIuPTR + " vs. %" PRIuPTR ")", + new_size, metadata_size_limit); + grpc_chttp2_cancel_stream( + t, s, + grpc_error_set_int(GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "received initial metadata size exceeds limit"), + GRPC_ERROR_INT_GRPC_STATUS, + GRPC_STATUS_RESOURCE_EXHAUSTED)); + grpc_chttp2_parsing_become_skip_parser(t); + s->seen_error = true; GRPC_MDELEM_UNREF(md); } else { - const size_t new_size = s->metadata_buffer[0].size + GRPC_MDELEM_LENGTH(md); - const size_t metadata_size_limit = - t->settings[GRPC_ACKED_SETTINGS] - [GRPC_CHTTP2_SETTINGS_MAX_HEADER_LIST_SIZE]; - if (new_size > metadata_size_limit) { - gpr_log(GPR_DEBUG, - "received initial metadata size exceeds limit (%" PRIuPTR - " vs. %" PRIuPTR ")", - new_size, metadata_size_limit); - grpc_chttp2_cancel_stream( - t, s, - grpc_error_set_int( - GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "received initial metadata size exceeds limit"), - GRPC_ERROR_INT_GRPC_STATUS, GRPC_STATUS_RESOURCE_EXHAUSTED)); + grpc_error* error = + grpc_chttp2_incoming_metadata_buffer_add(&s->metadata_buffer[0], md); + if (error != GRPC_ERROR_NONE) { + grpc_chttp2_cancel_stream(t, s, error); grpc_chttp2_parsing_become_skip_parser(t); s->seen_error = true; GRPC_MDELEM_UNREF(md); - } else { - grpc_error* error = - grpc_chttp2_incoming_metadata_buffer_add(&s->metadata_buffer[0], md); - if (error != GRPC_ERROR_NONE) { - grpc_chttp2_cancel_stream(t, s, error); - grpc_chttp2_parsing_become_skip_parser(t); - s->seen_error = true; - GRPC_MDELEM_UNREF(md); - } } } } @@ -491,8 +506,19 @@ static void on_trailing_header(void* tp, grpc_mdelem md) { gpr_free(value); } - if (grpc_slice_eq(GRPC_MDKEY(md), GRPC_MDSTR_GRPC_STATUS) && - !grpc_mdelem_eq(md, GRPC_MDELEM_GRPC_STATUS_0)) { + if (GRPC_MDELEM_STORAGE(md) == GRPC_MDELEM_STORAGE_STATIC) { + // We don't use grpc_mdelem_eq here to avoid executing additional + // instructions. The reasoning is if the payload is not equal, we already + // know that the metadata elements are not equal because the md is + // confirmed to be static. If we had used grpc_mdelem_eq here, then if the + // payloads are not equal, grpc_mdelem_eq executes more instructions to + // determine if they're equal or not. + if (md.payload == GRPC_MDELEM_GRPC_STATUS_1.payload || + md.payload == GRPC_MDELEM_GRPC_STATUS_2.payload) { + s->seen_error = true; + } + } else if (grpc_slice_eq(GRPC_MDKEY(md), GRPC_MDSTR_GRPC_STATUS) && + !grpc_mdelem_eq(md, GRPC_MDELEM_GRPC_STATUS_0)) { /* TODO(ctiller): check for a status like " 0" */ s->seen_error = true; } @@ -598,6 +624,9 @@ static grpc_error* init_header_frame_parser(grpc_chttp2_transport* t, gpr_log(GPR_ERROR, "grpc_chttp2_stream not accepted")); return init_skip_frame_parser(t, 1); } + if (t->channelz_socket != nullptr) { + t->channelz_socket->RecordStreamStartedFromRemote(); + } } else { t->incoming_stream = s; } @@ -611,6 +640,9 @@ static grpc_error* init_header_frame_parser(grpc_chttp2_transport* t, } t->parser = grpc_chttp2_header_parser_parse; t->parser_data = &t->hpack_parser; + if (t->header_eof) { + s->eos_received = true; + } switch (s->header_frames_received) { case 0: if (t->is_client && t->header_eof) { @@ -725,9 +757,10 @@ static grpc_error* parse_frame_slice(grpc_chttp2_transport* t, grpc_slice slice, int is_last) { grpc_chttp2_stream* s = t->incoming_stream; grpc_error* err = t->parser(t->parser_data, t, s, slice, is_last); + intptr_t unused; if (GPR_LIKELY(err == GRPC_ERROR_NONE)) { return err; - } else if (grpc_error_get_int(err, GRPC_ERROR_INT_STREAM_ID, nullptr)) { + } else if (grpc_error_get_int(err, GRPC_ERROR_INT_STREAM_ID, &unused)) { if (grpc_http_trace.enabled()) { const char* msg = grpc_error_string(err); gpr_log(GPR_ERROR, "%s", msg); diff --git a/src/core/ext/transport/chttp2/transport/writing.cc b/src/core/ext/transport/chttp2/transport/writing.cc index c9273f7e39..3b3367d0f3 100644 --- a/src/core/ext/transport/chttp2/transport/writing.cc +++ b/src/core/ext/transport/chttp2/transport/writing.cc @@ -574,6 +574,7 @@ class StreamWriteContext { void SentLastFrame() { s_->send_trailing_metadata = nullptr; s_->sent_trailing_metadata = true; + s_->eos_sent = true; if (!t_->is_client && !s_->read_closed) { grpc_slice_buffer_add( @@ -637,6 +638,11 @@ void grpc_chttp2_end_write(grpc_chttp2_transport* t, grpc_error* error) { GPR_TIMER_SCOPE("grpc_chttp2_end_write", 0); grpc_chttp2_stream* s; + if (t->channelz_socket != nullptr) { + t->channelz_socket->RecordMessagesSent(t->num_messages_in_next_write); + } + t->num_messages_in_next_write = 0; + while (grpc_chttp2_list_pop_writing_stream(t, &s)) { if (s->sending_bytes != 0) { update_list(t, s, static_cast<int64_t>(s->sending_bytes), diff --git a/src/core/ext/transport/cronet/transport/cronet_transport.cc b/src/core/ext/transport/cronet/transport/cronet_transport.cc index 4a252d972d..349d8681d5 100644 --- a/src/core/ext/transport/cronet/transport/cronet_transport.cc +++ b/src/core/ext/transport/cronet/transport/cronet_transport.cc @@ -111,16 +111,21 @@ typedef struct grpc_cronet_transport grpc_cronet_transport; /* TODO (makdharma): reorder structure for memory efficiency per http://www.catb.org/esr/structure-packing/#_structure_reordering: */ struct read_state { + read_state(gpr_arena* arena) + : trailing_metadata(arena), initial_metadata(arena) { + grpc_slice_buffer_init(&read_slice_buffer); + } + /* vars to store data coming from server */ - char* read_buffer; - bool length_field_received; - int received_bytes; - int remaining_bytes; - int length_field; - bool compressed; - char grpc_header_bytes[GRPC_HEADER_SIZE_IN_BYTES]; - char* payload_field; - bool read_stream_closed; + char* read_buffer = nullptr; + bool length_field_received = false; + int received_bytes = 0; + int remaining_bytes = 0; + int length_field = 0; + bool compressed = 0; + char grpc_header_bytes[GRPC_HEADER_SIZE_IN_BYTES] = {}; + char* payload_field = nullptr; + bool read_stream_closed = 0; /* vars for holding data destined for the application */ grpc_core::ManualConstructor<grpc_core::SliceBufferByteStream> sbs; @@ -128,59 +133,71 @@ struct read_state { /* vars for trailing metadata */ grpc_chttp2_incoming_metadata_buffer trailing_metadata; - bool trailing_metadata_valid; + bool trailing_metadata_valid = false; /* vars for initial metadata */ grpc_chttp2_incoming_metadata_buffer initial_metadata; }; struct write_state { - char* write_buffer; + char* write_buffer = nullptr; }; /* track state of one stream op */ struct op_state { - bool state_op_done[OP_NUM_OPS]; - bool state_callback_received[OP_NUM_OPS]; + op_state(gpr_arena* arena) : rs(arena) {} + + bool state_op_done[OP_NUM_OPS] = {}; + bool state_callback_received[OP_NUM_OPS] = {}; /* A non-zero gRPC status code has been seen */ - bool fail_state; + bool fail_state = false; /* Transport is discarding all buffered messages */ - bool flush_read; - bool flush_cronet_when_ready; - bool pending_write_for_trailer; - bool pending_send_message; + bool flush_read = false; + bool flush_cronet_when_ready = false; + bool pending_write_for_trailer = false; + bool pending_send_message = false; /* User requested RECV_TRAILING_METADATA */ - bool pending_recv_trailing_metadata; + bool pending_recv_trailing_metadata = false; /* Cronet has not issued a callback of a bidirectional read */ - bool pending_read_from_cronet; - grpc_error* cancel_error; + bool pending_read_from_cronet = false; + grpc_error* cancel_error = GRPC_ERROR_NONE; /* data structure for storing data coming from server */ struct read_state rs; /* data structure for storing data going to the server */ struct write_state ws; }; +struct stream_obj; + struct op_and_state { + op_and_state(stream_obj* s, const grpc_transport_stream_op_batch& op); + grpc_transport_stream_op_batch op; struct op_state state; - bool done; - struct stream_obj* s; /* Pointer back to the stream object */ - struct op_and_state* next; /* next op_and_state in the linked list */ + bool done = false; + struct stream_obj* s; /* Pointer back to the stream object */ + /* next op_and_state in the linked list */ + struct op_and_state* next; }; struct op_storage { - int num_pending_ops; - struct op_and_state* head; + int num_pending_ops = 0; + struct op_and_state* head = nullptr; }; struct stream_obj { + stream_obj(grpc_transport* gt, grpc_stream* gs, + grpc_stream_refcount* refcount, gpr_arena* arena); + ~stream_obj(); + gpr_arena* arena; - struct op_and_state* oas; - grpc_transport_stream_op_batch* curr_op; + struct op_and_state* oas = nullptr; + grpc_transport_stream_op_batch* curr_op = nullptr; grpc_cronet_transport* curr_ct; grpc_stream* curr_gs; - bidirectional_stream* cbs; - bidirectional_stream_header_array header_array; + bidirectional_stream* cbs = nullptr; + bidirectional_stream_header_array header_array = + bidirectional_stream_header_array(); // Zero-initialize the structure. /* Stream level state. Some state will be tracked both at stream and stream_op * level */ @@ -195,7 +212,6 @@ struct stream_obj { /* Refcount object of the stream */ grpc_stream_refcount* refcount; }; -typedef struct stream_obj stream_obj; #ifndef NDEBUG #define GRPC_CRONET_STREAM_REF(stream, reason) \ @@ -306,6 +322,10 @@ static grpc_error* make_error_with_desc(int error_code, const char* desc) { return error; } +inline op_and_state::op_and_state(stream_obj* s, + const grpc_transport_stream_op_batch& op) + : op(op), state(s->arena), s(s), next(s->storage.head) {} + /* Add a new stream op to op storage. */ @@ -314,14 +334,8 @@ static void add_to_storage(struct stream_obj* s, struct op_storage* storage = &s->storage; /* add new op at the beginning of the linked list. The memory is freed in remove_from_storage */ - struct op_and_state* new_op = static_cast<struct op_and_state*>( - gpr_malloc(sizeof(struct op_and_state))); - memcpy(&new_op->op, op, sizeof(grpc_transport_stream_op_batch)); - memset(&new_op->state, 0, sizeof(new_op->state)); - new_op->s = s; - new_op->done = false; + op_and_state* new_op = grpc_core::New<op_and_state>(s, *op); gpr_mu_lock(&s->mu); - new_op->next = storage->head; storage->head = new_op; storage->num_pending_ops++; if (op->send_message) { @@ -347,7 +361,7 @@ static void remove_from_storage(struct stream_obj* s, } if (s->storage.head == oas) { s->storage.head = oas->next; - gpr_free(oas); + grpc_core::Delete(oas); s->storage.num_pending_ops--; CRONET_LOG(GPR_DEBUG, "Freed %p. Now %d in the queue", oas, s->storage.num_pending_ops); @@ -358,7 +372,7 @@ static void remove_from_storage(struct stream_obj* s, s->storage.num_pending_ops--; CRONET_LOG(GPR_DEBUG, "Freed %p. Now %d in the queue", oas, s->storage.num_pending_ops); - gpr_free(oas); + grpc_core::Delete(oas); break; } else if (GPR_UNLIKELY(curr->next == nullptr)) { CRONET_LOG(GPR_ERROR, "Reached end of LL and did not find op to free"); @@ -540,10 +554,6 @@ static void on_response_headers_received( } gpr_mu_lock(&s->mu); - memset(&s->state.rs.initial_metadata, 0, - sizeof(s->state.rs.initial_metadata)); - grpc_chttp2_incoming_metadata_buffer_init(&s->state.rs.initial_metadata, - s->arena); convert_cronet_array_to_metadata(headers, &s->state.rs.initial_metadata); s->state.state_callback_received[OP_RECV_INITIAL_METADATA] = true; if (!(s->state.state_op_done[OP_CANCEL_ERROR] || @@ -634,11 +644,7 @@ static void on_response_trailers_received( stream_obj* s = static_cast<stream_obj*>(stream->annotation); grpc_cronet_transport* t = s->curr_ct; gpr_mu_lock(&s->mu); - memset(&s->state.rs.trailing_metadata, 0, - sizeof(s->state.rs.trailing_metadata)); s->state.rs.trailing_metadata_valid = false; - grpc_chttp2_incoming_metadata_buffer_init(&s->state.rs.trailing_metadata, - s->arena); convert_cronet_array_to_metadata(trailers, &s->state.rs.trailing_metadata); if (trailers->count > 0) { s->state.rs.trailing_metadata_valid = true; @@ -1287,7 +1293,7 @@ static enum e_op_result execute_stream_op(struct op_and_state* oas) { grpc_error* error = GRPC_ERROR_NONE; if (stream_state->state_op_done[OP_CANCEL_ERROR]) { error = GRPC_ERROR_REF(stream_state->cancel_error); - } else if (stream_state->state_op_done[OP_FAILED]) { + } else if (stream_state->state_callback_received[OP_FAILED]) { error = make_error_with_desc(GRPC_STATUS_UNAVAILABLE, "Unavailable."); } else if (oas->s->state.rs.trailing_metadata_valid) { grpc_chttp2_incoming_metadata_buffer_publish( @@ -1354,36 +1360,28 @@ static enum e_op_result execute_stream_op(struct op_and_state* oas) { Functions used by upper layers to access transport functionality. */ +inline stream_obj::stream_obj(grpc_transport* gt, grpc_stream* gs, + grpc_stream_refcount* refcount, gpr_arena* arena) + : arena(arena), + curr_ct(reinterpret_cast<grpc_cronet_transport*>(gt)), + curr_gs(gs), + state(arena), + refcount(refcount) { + GRPC_CRONET_STREAM_REF(this, "cronet transport"); + gpr_mu_init(&mu); +} + +inline stream_obj::~stream_obj() { + null_and_maybe_free_read_buffer(this); + /* Clean up read_slice_buffer in case there is unread data. */ + grpc_slice_buffer_destroy_internal(&state.rs.read_slice_buffer); + GRPC_ERROR_UNREF(state.cancel_error); +} + static int init_stream(grpc_transport* gt, grpc_stream* gs, grpc_stream_refcount* refcount, const void* server_data, gpr_arena* arena) { - stream_obj* s = reinterpret_cast<stream_obj*>(gs); - - s->refcount = refcount; - GRPC_CRONET_STREAM_REF(s, "cronet transport"); - memset(&s->storage, 0, sizeof(s->storage)); - s->storage.head = nullptr; - memset(&s->state, 0, sizeof(s->state)); - s->curr_op = nullptr; - s->cbs = nullptr; - memset(&s->header_array, 0, sizeof(s->header_array)); - memset(&s->state.rs, 0, sizeof(s->state.rs)); - memset(&s->state.ws, 0, sizeof(s->state.ws)); - memset(s->state.state_op_done, 0, sizeof(s->state.state_op_done)); - memset(s->state.state_callback_received, 0, - sizeof(s->state.state_callback_received)); - s->state.fail_state = s->state.flush_read = false; - s->state.cancel_error = nullptr; - s->state.flush_cronet_when_ready = s->state.pending_write_for_trailer = false; - s->state.pending_send_message = false; - s->state.pending_recv_trailing_metadata = false; - s->state.pending_read_from_cronet = false; - - s->curr_gs = gs; - s->curr_ct = reinterpret_cast<grpc_cronet_transport*>(gt); - s->arena = arena; - - gpr_mu_init(&s->mu); + new (gs) stream_obj(gt, gs, refcount, arena); return 0; } @@ -1426,10 +1424,7 @@ static void perform_stream_op(grpc_transport* gt, grpc_stream* gs, static void destroy_stream(grpc_transport* gt, grpc_stream* gs, grpc_closure* then_schedule_closure) { stream_obj* s = reinterpret_cast<stream_obj*>(gs); - null_and_maybe_free_read_buffer(s); - /* Clean up read_slice_buffer in case there is unread data. */ - grpc_slice_buffer_destroy_internal(&s->state.rs.read_slice_buffer); - GRPC_ERROR_UNREF(s->state.cancel_error); + s->~stream_obj(); GRPC_CLOSURE_SCHED(then_schedule_closure, GRPC_ERROR_NONE); } diff --git a/src/core/ext/transport/inproc/inproc_transport.cc b/src/core/ext/transport/inproc/inproc_transport.cc index b0ca7f8207..61968de4d5 100644 --- a/src/core/ext/transport/inproc/inproc_transport.cc +++ b/src/core/ext/transport/inproc/inproc_transport.cc @@ -40,18 +40,68 @@ if (grpc_inproc_trace.enabled()) gpr_log(__VA_ARGS__); \ } while (0) -static grpc_slice g_empty_slice; -static grpc_slice g_fake_path_key; -static grpc_slice g_fake_path_value; -static grpc_slice g_fake_auth_key; -static grpc_slice g_fake_auth_value; +namespace { +grpc_slice g_empty_slice; +grpc_slice g_fake_path_key; +grpc_slice g_fake_path_value; +grpc_slice g_fake_auth_key; +grpc_slice g_fake_auth_value; + +struct inproc_stream; +bool cancel_stream_locked(inproc_stream* s, grpc_error* error); +void op_state_machine(void* arg, grpc_error* error); +void log_metadata(const grpc_metadata_batch* md_batch, bool is_client, + bool is_initial); +grpc_error* fill_in_metadata(inproc_stream* s, + const grpc_metadata_batch* metadata, + uint32_t flags, grpc_metadata_batch* out_md, + uint32_t* outflags, bool* markfilled); + +struct shared_mu { + shared_mu() { + // Share one lock between both sides since both sides get affected + gpr_mu_init(&mu); + gpr_ref_init(&refs, 2); + } -typedef struct { gpr_mu mu; gpr_refcount refs; -} shared_mu; +}; + +struct inproc_transport { + inproc_transport(const grpc_transport_vtable* vtable, shared_mu* mu, + bool is_client) + : mu(mu), is_client(is_client) { + base.vtable = vtable; + // Start each side of transport with 2 refs since they each have a ref + // to the other + gpr_ref_init(&refs, 2); + grpc_connectivity_state_init(&connectivity, GRPC_CHANNEL_READY, + is_client ? "inproc_client" : "inproc_server"); + } + + ~inproc_transport() { + grpc_connectivity_state_destroy(&connectivity); + if (gpr_unref(&mu->refs)) { + gpr_free(mu); + } + } + + void ref() { + INPROC_LOG(GPR_INFO, "ref_transport %p", this); + gpr_ref(&refs); + } + + void unref() { + INPROC_LOG(GPR_INFO, "unref_transport %p", this); + if (!gpr_unref(&refs)) { + return; + } + INPROC_LOG(GPR_INFO, "really_destroy_transport %p", this); + this->~inproc_transport(); + gpr_free(this); + } -typedef struct inproc_transport { grpc_transport base; shared_mu* mu; gpr_refcount refs; @@ -60,128 +110,174 @@ typedef struct inproc_transport { void (*accept_stream_cb)(void* user_data, grpc_transport* transport, const void* server_data); void* accept_stream_data; - bool is_closed; + bool is_closed = false; struct inproc_transport* other_side; - struct inproc_stream* stream_list; -} inproc_transport; + struct inproc_stream* stream_list = nullptr; +}; + +struct inproc_stream { + inproc_stream(inproc_transport* t, grpc_stream_refcount* refcount, + const void* server_data, gpr_arena* arena) + : t(t), refs(refcount), arena(arena) { + // Ref this stream right now for ctor and list. + ref("inproc_init_stream:init"); + ref("inproc_init_stream:list"); + + grpc_metadata_batch_init(&to_read_initial_md); + grpc_metadata_batch_init(&to_read_trailing_md); + GRPC_CLOSURE_INIT(&op_closure, op_state_machine, this, + grpc_schedule_on_exec_ctx); + grpc_metadata_batch_init(&write_buffer_initial_md); + grpc_metadata_batch_init(&write_buffer_trailing_md); + + stream_list_prev = nullptr; + gpr_mu_lock(&t->mu->mu); + stream_list_next = t->stream_list; + if (t->stream_list) { + t->stream_list->stream_list_prev = this; + } + t->stream_list = this; + gpr_mu_unlock(&t->mu->mu); + + if (!server_data) { + t->ref(); + inproc_transport* st = t->other_side; + st->ref(); + other_side = nullptr; // will get filled in soon + // Pass the client-side stream address to the server-side for a ref + ref("inproc_init_stream:clt"); // ref it now on behalf of server + // side to avoid destruction + INPROC_LOG(GPR_INFO, "calling accept stream cb %p %p", + st->accept_stream_cb, st->accept_stream_data); + (*st->accept_stream_cb)(st->accept_stream_data, &st->base, (void*)this); + } else { + // This is the server-side and is being called through accept_stream_cb + inproc_stream* cs = (inproc_stream*)server_data; + other_side = cs; + // Ref the server-side stream on behalf of the client now + ref("inproc_init_stream:srv"); + + // Now we are about to affect the other side, so lock the transport + // to make sure that it doesn't get destroyed + gpr_mu_lock(&t->mu->mu); + cs->other_side = this; + // Now transfer from the other side's write_buffer if any to the to_read + // buffer + if (cs->write_buffer_initial_md_filled) { + fill_in_metadata(this, &cs->write_buffer_initial_md, + cs->write_buffer_initial_md_flags, &to_read_initial_md, + &to_read_initial_md_flags, &to_read_initial_md_filled); + deadline = GPR_MIN(deadline, cs->write_buffer_deadline); + grpc_metadata_batch_clear(&cs->write_buffer_initial_md); + cs->write_buffer_initial_md_filled = false; + } + if (cs->write_buffer_trailing_md_filled) { + fill_in_metadata(this, &cs->write_buffer_trailing_md, 0, + &to_read_trailing_md, nullptr, + &to_read_trailing_md_filled); + grpc_metadata_batch_clear(&cs->write_buffer_trailing_md); + cs->write_buffer_trailing_md_filled = false; + } + if (cs->write_buffer_cancel_error != GRPC_ERROR_NONE) { + cancel_other_error = cs->write_buffer_cancel_error; + cs->write_buffer_cancel_error = GRPC_ERROR_NONE; + } + + gpr_mu_unlock(&t->mu->mu); + } + } + + ~inproc_stream() { + GRPC_ERROR_UNREF(write_buffer_cancel_error); + GRPC_ERROR_UNREF(cancel_self_error); + GRPC_ERROR_UNREF(cancel_other_error); + + if (recv_inited) { + grpc_slice_buffer_destroy_internal(&recv_message); + } + + t->unref(); + + if (closure_at_destroy) { + GRPC_CLOSURE_SCHED(closure_at_destroy, GRPC_ERROR_NONE); + } + } + +#ifndef NDEBUG +#define STREAM_REF(refs, reason) grpc_stream_ref(refs, reason) +#define STREAM_UNREF(refs, reason) grpc_stream_unref(refs, reason) +#else +#define STREAM_REF(refs, reason) grpc_stream_ref(refs) +#define STREAM_UNREF(refs, reason) grpc_stream_unref(refs) +#endif + void ref(const char* reason) { + INPROC_LOG(GPR_INFO, "ref_stream %p %s", this, reason); + STREAM_REF(refs, reason); + } + + void unref(const char* reason) { + INPROC_LOG(GPR_INFO, "unref_stream %p %s", this, reason); + STREAM_UNREF(refs, reason); + } +#undef STREAM_REF +#undef STREAM_UNREF -typedef struct inproc_stream { inproc_transport* t; grpc_metadata_batch to_read_initial_md; - uint32_t to_read_initial_md_flags; - bool to_read_initial_md_filled; + uint32_t to_read_initial_md_flags = 0; + bool to_read_initial_md_filled = false; grpc_metadata_batch to_read_trailing_md; - bool to_read_trailing_md_filled; - bool ops_needed; - bool op_closure_scheduled; + bool to_read_trailing_md_filled = false; + bool ops_needed = false; + bool op_closure_scheduled = false; grpc_closure op_closure; // Write buffer used only during gap at init time when client-side // stream is set up but server side stream is not yet set up grpc_metadata_batch write_buffer_initial_md; - bool write_buffer_initial_md_filled; - uint32_t write_buffer_initial_md_flags; - grpc_millis write_buffer_deadline; + bool write_buffer_initial_md_filled = false; + uint32_t write_buffer_initial_md_flags = 0; + grpc_millis write_buffer_deadline = GRPC_MILLIS_INF_FUTURE; grpc_metadata_batch write_buffer_trailing_md; - bool write_buffer_trailing_md_filled; - grpc_error* write_buffer_cancel_error; + bool write_buffer_trailing_md_filled = false; + grpc_error* write_buffer_cancel_error = GRPC_ERROR_NONE; struct inproc_stream* other_side; - bool other_side_closed; // won't talk anymore - bool write_buffer_other_side_closed; // on hold + bool other_side_closed = false; // won't talk anymore + bool write_buffer_other_side_closed = false; // on hold grpc_stream_refcount* refs; - grpc_closure* closure_at_destroy; + grpc_closure* closure_at_destroy = nullptr; gpr_arena* arena; - grpc_transport_stream_op_batch* send_message_op; - grpc_transport_stream_op_batch* send_trailing_md_op; - grpc_transport_stream_op_batch* recv_initial_md_op; - grpc_transport_stream_op_batch* recv_message_op; - grpc_transport_stream_op_batch* recv_trailing_md_op; + grpc_transport_stream_op_batch* send_message_op = nullptr; + grpc_transport_stream_op_batch* send_trailing_md_op = nullptr; + grpc_transport_stream_op_batch* recv_initial_md_op = nullptr; + grpc_transport_stream_op_batch* recv_message_op = nullptr; + grpc_transport_stream_op_batch* recv_trailing_md_op = nullptr; grpc_slice_buffer recv_message; grpc_core::ManualConstructor<grpc_core::SliceBufferByteStream> recv_stream; - bool recv_inited; + bool recv_inited = false; - bool initial_md_sent; - bool trailing_md_sent; - bool initial_md_recvd; - bool trailing_md_recvd; + bool initial_md_sent = false; + bool trailing_md_sent = false; + bool initial_md_recvd = false; + bool trailing_md_recvd = false; - bool closed; + bool closed = false; - grpc_error* cancel_self_error; - grpc_error* cancel_other_error; + grpc_error* cancel_self_error = GRPC_ERROR_NONE; + grpc_error* cancel_other_error = GRPC_ERROR_NONE; - grpc_millis deadline; + grpc_millis deadline = GRPC_MILLIS_INF_FUTURE; - bool listed; + bool listed = true; struct inproc_stream* stream_list_prev; struct inproc_stream* stream_list_next; -} inproc_stream; - -static bool cancel_stream_locked(inproc_stream* s, grpc_error* error); -static void op_state_machine(void* arg, grpc_error* error); - -static void ref_transport(inproc_transport* t) { - INPROC_LOG(GPR_INFO, "ref_transport %p", t); - gpr_ref(&t->refs); -} - -static void really_destroy_transport(inproc_transport* t) { - INPROC_LOG(GPR_INFO, "really_destroy_transport %p", t); - grpc_connectivity_state_destroy(&t->connectivity); - if (gpr_unref(&t->mu->refs)) { - gpr_free(t->mu); - } - gpr_free(t); -} - -static void unref_transport(inproc_transport* t) { - INPROC_LOG(GPR_INFO, "unref_transport %p", t); - if (gpr_unref(&t->refs)) { - really_destroy_transport(t); - } -} - -#ifndef NDEBUG -#define STREAM_REF(refs, reason) grpc_stream_ref(refs, reason) -#define STREAM_UNREF(refs, reason) grpc_stream_unref(refs, reason) -#else -#define STREAM_REF(refs, reason) grpc_stream_ref(refs) -#define STREAM_UNREF(refs, reason) grpc_stream_unref(refs) -#endif - -static void ref_stream(inproc_stream* s, const char* reason) { - INPROC_LOG(GPR_INFO, "ref_stream %p %s", s, reason); - STREAM_REF(s->refs, reason); -} - -static void unref_stream(inproc_stream* s, const char* reason) { - INPROC_LOG(GPR_INFO, "unref_stream %p %s", s, reason); - STREAM_UNREF(s->refs, reason); -} - -static void really_destroy_stream(inproc_stream* s) { - INPROC_LOG(GPR_INFO, "really_destroy_stream %p", s); +}; - GRPC_ERROR_UNREF(s->write_buffer_cancel_error); - GRPC_ERROR_UNREF(s->cancel_self_error); - GRPC_ERROR_UNREF(s->cancel_other_error); - - if (s->recv_inited) { - grpc_slice_buffer_destroy_internal(&s->recv_message); - } - - unref_transport(s->t); - - if (s->closure_at_destroy) { - GRPC_CLOSURE_SCHED(s->closure_at_destroy, GRPC_ERROR_NONE); - } -} - -static void log_metadata(const grpc_metadata_batch* md_batch, bool is_client, - bool is_initial) { +void log_metadata(const grpc_metadata_batch* md_batch, bool is_client, + bool is_initial) { for (grpc_linked_mdelem* md = md_batch->list.head; md != nullptr; md = md->next) { char* key = grpc_slice_to_c_string(GRPC_MDKEY(md->md)); @@ -193,10 +289,10 @@ static void log_metadata(const grpc_metadata_batch* md_batch, bool is_client, } } -static grpc_error* fill_in_metadata(inproc_stream* s, - const grpc_metadata_batch* metadata, - uint32_t flags, grpc_metadata_batch* out_md, - uint32_t* outflags, bool* markfilled) { +grpc_error* fill_in_metadata(inproc_stream* s, + const grpc_metadata_batch* metadata, + uint32_t flags, grpc_metadata_batch* out_md, + uint32_t* outflags, bool* markfilled) { if (grpc_inproc_trace.enabled()) { log_metadata(metadata, s->t->is_client, outflags != nullptr); } @@ -221,109 +317,16 @@ static grpc_error* fill_in_metadata(inproc_stream* s, return error; } -static int init_stream(grpc_transport* gt, grpc_stream* gs, - grpc_stream_refcount* refcount, const void* server_data, - gpr_arena* arena) { +int init_stream(grpc_transport* gt, grpc_stream* gs, + grpc_stream_refcount* refcount, const void* server_data, + gpr_arena* arena) { INPROC_LOG(GPR_INFO, "init_stream %p %p %p", gt, gs, server_data); inproc_transport* t = reinterpret_cast<inproc_transport*>(gt); - inproc_stream* s = reinterpret_cast<inproc_stream*>(gs); - s->arena = arena; - - s->refs = refcount; - // Ref this stream right now - ref_stream(s, "inproc_init_stream:init"); - - grpc_metadata_batch_init(&s->to_read_initial_md); - s->to_read_initial_md_flags = 0; - s->to_read_initial_md_filled = false; - grpc_metadata_batch_init(&s->to_read_trailing_md); - s->to_read_trailing_md_filled = false; - grpc_metadata_batch_init(&s->write_buffer_initial_md); - s->write_buffer_initial_md_flags = 0; - s->write_buffer_initial_md_filled = false; - grpc_metadata_batch_init(&s->write_buffer_trailing_md); - s->write_buffer_trailing_md_filled = false; - s->ops_needed = false; - s->op_closure_scheduled = false; - GRPC_CLOSURE_INIT(&s->op_closure, op_state_machine, s, - grpc_schedule_on_exec_ctx); - s->t = t; - s->closure_at_destroy = nullptr; - s->other_side_closed = false; - - s->initial_md_sent = s->trailing_md_sent = s->initial_md_recvd = - s->trailing_md_recvd = false; - - s->closed = false; - - s->cancel_self_error = GRPC_ERROR_NONE; - s->cancel_other_error = GRPC_ERROR_NONE; - s->write_buffer_cancel_error = GRPC_ERROR_NONE; - s->deadline = GRPC_MILLIS_INF_FUTURE; - s->write_buffer_deadline = GRPC_MILLIS_INF_FUTURE; - - s->stream_list_prev = nullptr; - gpr_mu_lock(&t->mu->mu); - s->listed = true; - ref_stream(s, "inproc_init_stream:list"); - s->stream_list_next = t->stream_list; - if (t->stream_list) { - t->stream_list->stream_list_prev = s; - } - t->stream_list = s; - gpr_mu_unlock(&t->mu->mu); - - if (!server_data) { - ref_transport(t); - inproc_transport* st = t->other_side; - ref_transport(st); - s->other_side = nullptr; // will get filled in soon - // Pass the client-side stream address to the server-side for a ref - ref_stream(s, "inproc_init_stream:clt"); // ref it now on behalf of server - // side to avoid destruction - INPROC_LOG(GPR_INFO, "calling accept stream cb %p %p", st->accept_stream_cb, - st->accept_stream_data); - (*st->accept_stream_cb)(st->accept_stream_data, &st->base, (void*)s); - } else { - // This is the server-side and is being called through accept_stream_cb - inproc_stream* cs = (inproc_stream*)server_data; - s->other_side = cs; - // Ref the server-side stream on behalf of the client now - ref_stream(s, "inproc_init_stream:srv"); - - // Now we are about to affect the other side, so lock the transport - // to make sure that it doesn't get destroyed - gpr_mu_lock(&s->t->mu->mu); - cs->other_side = s; - // Now transfer from the other side's write_buffer if any to the to_read - // buffer - if (cs->write_buffer_initial_md_filled) { - fill_in_metadata(s, &cs->write_buffer_initial_md, - cs->write_buffer_initial_md_flags, - &s->to_read_initial_md, &s->to_read_initial_md_flags, - &s->to_read_initial_md_filled); - s->deadline = GPR_MIN(s->deadline, cs->write_buffer_deadline); - grpc_metadata_batch_clear(&cs->write_buffer_initial_md); - cs->write_buffer_initial_md_filled = false; - } - if (cs->write_buffer_trailing_md_filled) { - fill_in_metadata(s, &cs->write_buffer_trailing_md, 0, - &s->to_read_trailing_md, nullptr, - &s->to_read_trailing_md_filled); - grpc_metadata_batch_clear(&cs->write_buffer_trailing_md); - cs->write_buffer_trailing_md_filled = false; - } - if (cs->write_buffer_cancel_error != GRPC_ERROR_NONE) { - s->cancel_other_error = cs->write_buffer_cancel_error; - cs->write_buffer_cancel_error = GRPC_ERROR_NONE; - } - - gpr_mu_unlock(&s->t->mu->mu); - } + new (gs) inproc_stream(t, refcount, server_data, arena); return 0; // return value is not important } -static void close_stream_locked(inproc_stream* s) { +void close_stream_locked(inproc_stream* s) { if (!s->closed) { // Release the metadata that we would have written out grpc_metadata_batch_destroy(&s->write_buffer_initial_md); @@ -341,21 +344,21 @@ static void close_stream_locked(inproc_stream* s) { n->stream_list_prev = p; } s->listed = false; - unref_stream(s, "close_stream:list"); + s->unref("close_stream:list"); } s->closed = true; - unref_stream(s, "close_stream:closing"); + s->unref("close_stream:closing"); } } // This function means that we are done talking/listening to the other side -static void close_other_side_locked(inproc_stream* s, const char* reason) { +void close_other_side_locked(inproc_stream* s, const char* reason) { if (s->other_side != nullptr) { // First release the metadata that came from the other side's arena grpc_metadata_batch_destroy(&s->to_read_initial_md); grpc_metadata_batch_destroy(&s->to_read_trailing_md); - unref_stream(s->other_side, reason); + s->other_side->unref(reason); s->other_side_closed = true; s->other_side = nullptr; } else if (!s->other_side_closed) { @@ -367,9 +370,9 @@ static void close_other_side_locked(inproc_stream* s, const char* reason) { // this stream_op_batch is only one of the pending operations for this // stream. This is called when one of the pending operations for the stream // is done and about to be NULLed out -static void complete_if_batch_end_locked(inproc_stream* s, grpc_error* error, - grpc_transport_stream_op_batch* op, - const char* msg) { +void complete_if_batch_end_locked(inproc_stream* s, grpc_error* error, + grpc_transport_stream_op_batch* op, + const char* msg) { int is_sm = static_cast<int>(op == s->send_message_op); int is_stm = static_cast<int>(op == s->send_trailing_md_op); // TODO(vjpai): We should not consider the recv ops here, since they @@ -386,8 +389,7 @@ static void complete_if_batch_end_locked(inproc_stream* s, grpc_error* error, } } -static void maybe_schedule_op_closure_locked(inproc_stream* s, - grpc_error* error) { +void maybe_schedule_op_closure_locked(inproc_stream* s, grpc_error* error) { if (s && s->ops_needed && !s->op_closure_scheduled) { GRPC_CLOSURE_SCHED(&s->op_closure, GRPC_ERROR_REF(error)); s->op_closure_scheduled = true; @@ -395,7 +397,7 @@ static void maybe_schedule_op_closure_locked(inproc_stream* s, } } -static void fail_helper_locked(inproc_stream* s, grpc_error* error) { +void fail_helper_locked(inproc_stream* s, grpc_error* error) { INPROC_LOG(GPR_INFO, "op_state_machine %p fail_helper", s); // If we're failing this side, we need to make sure that // we also send or have already sent trailing metadata @@ -525,8 +527,7 @@ static void fail_helper_locked(inproc_stream* s, grpc_error* error) { // that the incoming byte stream's next() call will always return // synchronously. That assumption is true today but may not always be // true in the future. -static void message_transfer_locked(inproc_stream* sender, - inproc_stream* receiver) { +void message_transfer_locked(inproc_stream* sender, inproc_stream* receiver) { size_t remaining = sender->send_message_op->payload->send_message.send_message->length(); if (receiver->recv_inited) { @@ -572,7 +573,7 @@ static void message_transfer_locked(inproc_stream* sender, sender->send_message_op = nullptr; } -static void op_state_machine(void* arg, grpc_error* error) { +void op_state_machine(void* arg, grpc_error* error) { // This function gets called when we have contents in the unprocessed reads // Get what we want based on our ops wanted // Schedule our appropriate closures @@ -607,10 +608,8 @@ static void op_state_machine(void* arg, grpc_error* error) { if (other->recv_message_op) { message_transfer_locked(s, other); maybe_schedule_op_closure_locked(other, GRPC_ERROR_NONE); - } else if (!s->t->is_client && - (s->trailing_md_sent || other->recv_trailing_md_op)) { - // A server send will never be matched if the client is waiting - // for trailing metadata already + } else if (!s->t->is_client && s->trailing_md_sent) { + // A server send will never be matched if the server already sent status s->send_message_op->payload->send_message.send_message.reset(); complete_if_batch_end_locked( s, GRPC_ERROR_NONE, s->send_message_op, @@ -621,11 +620,15 @@ static void op_state_machine(void* arg, grpc_error* error) { // Pause a send trailing metadata if there is still an outstanding // send message unless we know that the send message will never get // matched to a receive. This happens on the client if the server has - // already sent status. + // already sent status or on the server if the client has requested + // status if (s->send_trailing_md_op && (!s->send_message_op || (s->t->is_client && - (s->trailing_md_recvd || s->to_read_trailing_md_filled)))) { + (s->trailing_md_recvd || s->to_read_trailing_md_filled)) || + (!s->t->is_client && other && + (other->trailing_md_recvd || other->to_read_trailing_md_filled || + other->recv_trailing_md_op)))) { grpc_metadata_batch* dest = (other == nullptr) ? &s->write_buffer_trailing_md : &other->to_read_trailing_md; @@ -723,16 +726,6 @@ static void op_state_machine(void* arg, grpc_error* error) { maybe_schedule_op_closure_locked(other, GRPC_ERROR_NONE); } } - if (s->recv_trailing_md_op && s->t->is_client && other && - other->send_message_op) { - INPROC_LOG(GPR_INFO, - "op_state_machine %p scheduling trailing-metadata-ready %p", s, - GRPC_ERROR_NONE); - GRPC_CLOSURE_SCHED(s->recv_trailing_md_op->payload->recv_trailing_metadata - .recv_trailing_metadata_ready, - GRPC_ERROR_NONE); - maybe_schedule_op_closure_locked(other, GRPC_ERROR_NONE); - } if (s->to_read_trailing_md_filled) { if (s->trailing_md_recvd) { new_err = @@ -748,6 +741,7 @@ static void op_state_machine(void* arg, grpc_error* error) { if (s->recv_message_op != nullptr) { // This message needs to be wrapped up because it will never be // satisfied + *s->recv_message_op->payload->recv_message.recv_message = nullptr; INPROC_LOG(GPR_INFO, "op_state_machine %p scheduling message-ready", s); GRPC_CLOSURE_SCHED( s->recv_message_op->payload->recv_message.recv_message_ready, @@ -810,6 +804,7 @@ static void op_state_machine(void* arg, grpc_error* error) { // No further message will come on this stream, so finish off the // recv_message_op INPROC_LOG(GPR_INFO, "op_state_machine %p scheduling message-ready", s); + *s->recv_message_op->payload->recv_message.recv_message = nullptr; GRPC_CLOSURE_SCHED( s->recv_message_op->payload->recv_message.recv_message_ready, GRPC_ERROR_NONE); @@ -847,7 +842,7 @@ done: GRPC_ERROR_UNREF(new_err); } -static bool cancel_stream_locked(inproc_stream* s, grpc_error* error) { +bool cancel_stream_locked(inproc_stream* s, grpc_error* error) { bool ret = false; // was the cancel accepted INPROC_LOG(GPR_INFO, "cancel_stream %p with %s", s, grpc_error_string(error)); if (s->cancel_self_error == GRPC_ERROR_NONE) { @@ -900,10 +895,10 @@ static bool cancel_stream_locked(inproc_stream* s, grpc_error* error) { return ret; } -static void do_nothing(void* arg, grpc_error* error) {} +void do_nothing(void* arg, grpc_error* error) {} -static void perform_stream_op(grpc_transport* gt, grpc_stream* gs, - grpc_transport_stream_op_batch* op) { +void perform_stream_op(grpc_transport* gt, grpc_stream* gs, + grpc_transport_stream_op_batch* op) { INPROC_LOG(GPR_INFO, "perform_stream_op %p %p %p", gt, gs, op); inproc_stream* s = reinterpret_cast<inproc_stream*>(gs); gpr_mu* mu = &s->t->mu->mu; // save aside in case s gets closed @@ -1012,18 +1007,18 @@ static void perform_stream_op(grpc_transport* gt, grpc_stream* gs, } // We want to initiate the closure if: - // 1. We want to send a message and the other side wants to receive or end + // 1. We want to send a message and the other side wants to receive // 2. We want to send trailing metadata and there isn't an unmatched send + // or the other side wants trailing metadata // 3. We want initial metadata and the other side has sent it // 4. We want to receive a message and there is a message ready // 5. There is trailing metadata, even if nothing specifically wants // that because that can shut down the receive message as well - if ((op->send_message && other && - ((other->recv_message_op != nullptr) || - (other->recv_trailing_md_op != nullptr))) || - (op->send_trailing_metadata && !op->send_message) || + if ((op->send_message && other && other->recv_message_op != nullptr) || + (op->send_trailing_metadata && + (!s->send_message_op || (other && other->recv_trailing_md_op))) || (op->recv_initial_metadata && s->to_read_initial_md_filled) || - (op->recv_message && other && (other->send_message_op != nullptr)) || + (op->recv_message && other && other->send_message_op != nullptr) || (s->to_read_trailing_md_filled || s->trailing_md_recvd)) { if (!s->op_closure_scheduled) { GRPC_CLOSURE_SCHED(&s->op_closure, GRPC_ERROR_NONE); @@ -1083,7 +1078,7 @@ static void perform_stream_op(grpc_transport* gt, grpc_stream* gs, GRPC_ERROR_UNREF(error); } -static void close_transport_locked(inproc_transport* t) { +void close_transport_locked(inproc_transport* t) { INPROC_LOG(GPR_INFO, "close_transport %p %d", t, t->is_closed); grpc_connectivity_state_set( &t->connectivity, GRPC_CHANNEL_SHUTDOWN, @@ -1103,7 +1098,7 @@ static void close_transport_locked(inproc_transport* t) { } } -static void perform_transport_op(grpc_transport* gt, grpc_transport_op* op) { +void perform_transport_op(grpc_transport* gt, grpc_transport_op* op) { inproc_transport* t = reinterpret_cast<inproc_transport*>(gt); INPROC_LOG(GPR_INFO, "perform_transport_op %p %p", t, op); gpr_mu_lock(&t->mu->mu); @@ -1136,39 +1131,64 @@ static void perform_transport_op(grpc_transport* gt, grpc_transport_op* op) { gpr_mu_unlock(&t->mu->mu); } -static void destroy_stream(grpc_transport* gt, grpc_stream* gs, - grpc_closure* then_schedule_closure) { +void destroy_stream(grpc_transport* gt, grpc_stream* gs, + grpc_closure* then_schedule_closure) { INPROC_LOG(GPR_INFO, "destroy_stream %p %p", gs, then_schedule_closure); inproc_stream* s = reinterpret_cast<inproc_stream*>(gs); s->closure_at_destroy = then_schedule_closure; - really_destroy_stream(s); + s->~inproc_stream(); } -static void destroy_transport(grpc_transport* gt) { +void destroy_transport(grpc_transport* gt) { inproc_transport* t = reinterpret_cast<inproc_transport*>(gt); INPROC_LOG(GPR_INFO, "destroy_transport %p", t); gpr_mu_lock(&t->mu->mu); close_transport_locked(t); gpr_mu_unlock(&t->mu->mu); - unref_transport(t->other_side); - unref_transport(t); + t->other_side->unref(); + t->unref(); } /******************************************************************************* * INTEGRATION GLUE */ -static void set_pollset(grpc_transport* gt, grpc_stream* gs, - grpc_pollset* pollset) { +void set_pollset(grpc_transport* gt, grpc_stream* gs, grpc_pollset* pollset) { // Nothing to do here } -static void set_pollset_set(grpc_transport* gt, grpc_stream* gs, - grpc_pollset_set* pollset_set) { +void set_pollset_set(grpc_transport* gt, grpc_stream* gs, + grpc_pollset_set* pollset_set) { // Nothing to do here } -static grpc_endpoint* get_endpoint(grpc_transport* t) { return nullptr; } +grpc_endpoint* get_endpoint(grpc_transport* t) { return nullptr; } + +const grpc_transport_vtable inproc_vtable = { + sizeof(inproc_stream), "inproc", init_stream, + set_pollset, set_pollset_set, perform_stream_op, + perform_transport_op, destroy_stream, destroy_transport, + get_endpoint}; + +/******************************************************************************* + * Main inproc transport functions + */ +void inproc_transports_create(grpc_transport** server_transport, + const grpc_channel_args* server_args, + grpc_transport** client_transport, + const grpc_channel_args* client_args) { + INPROC_LOG(GPR_INFO, "inproc_transports_create"); + shared_mu* mu = new (gpr_malloc(sizeof(*mu))) shared_mu(); + inproc_transport* st = new (gpr_malloc(sizeof(*st))) + inproc_transport(&inproc_vtable, mu, /*is_client=*/false); + inproc_transport* ct = new (gpr_malloc(sizeof(*ct))) + inproc_transport(&inproc_vtable, mu, /*is_client=*/true); + st->other_side = ct; + ct->other_side = st; + *server_transport = reinterpret_cast<grpc_transport*>(st); + *client_transport = reinterpret_cast<grpc_transport*>(ct); +} +} // namespace /******************************************************************************* * GLOBAL INIT AND DESTROY @@ -1190,48 +1210,6 @@ void grpc_inproc_transport_init(void) { g_fake_auth_value = grpc_slice_from_static_string("inproc-fail"); } -static const grpc_transport_vtable inproc_vtable = { - sizeof(inproc_stream), "inproc", init_stream, - set_pollset, set_pollset_set, perform_stream_op, - perform_transport_op, destroy_stream, destroy_transport, - get_endpoint}; - -/******************************************************************************* - * Main inproc transport functions - */ -static void inproc_transports_create(grpc_transport** server_transport, - const grpc_channel_args* server_args, - grpc_transport** client_transport, - const grpc_channel_args* client_args) { - INPROC_LOG(GPR_INFO, "inproc_transports_create"); - inproc_transport* st = - static_cast<inproc_transport*>(gpr_zalloc(sizeof(*st))); - inproc_transport* ct = - static_cast<inproc_transport*>(gpr_zalloc(sizeof(*ct))); - // Share one lock between both sides since both sides get affected - st->mu = ct->mu = static_cast<shared_mu*>(gpr_malloc(sizeof(*st->mu))); - gpr_mu_init(&st->mu->mu); - gpr_ref_init(&st->mu->refs, 2); - st->base.vtable = &inproc_vtable; - ct->base.vtable = &inproc_vtable; - // Start each side of transport with 2 refs since they each have a ref - // to the other - gpr_ref_init(&st->refs, 2); - gpr_ref_init(&ct->refs, 2); - st->is_client = false; - ct->is_client = true; - grpc_connectivity_state_init(&st->connectivity, GRPC_CHANNEL_READY, - "inproc_server"); - grpc_connectivity_state_init(&ct->connectivity, GRPC_CHANNEL_READY, - "inproc_client"); - st->other_side = ct; - ct->other_side = st; - st->stream_list = nullptr; - ct->stream_list = nullptr; - *server_transport = reinterpret_cast<grpc_transport*>(st); - *client_transport = reinterpret_cast<grpc_transport*>(ct); -} - grpc_channel* grpc_inproc_channel_create(grpc_server* server, grpc_channel_args* args, void* reserved) { @@ -1256,7 +1234,9 @@ grpc_channel* grpc_inproc_channel_create(grpc_server* server, inproc_transports_create(&server_transport, server_args, &client_transport, client_args); - grpc_server_setup_transport(server, server_transport, nullptr, server_args); + // TODO(ncteisen): design and support channelz GetSocket for inproc. + grpc_server_setup_transport(server, server_transport, nullptr, server_args, + 0); grpc_channel* channel = grpc_channel_create( "inproc", client_args, GRPC_CLIENT_DIRECT_CHANNEL, client_transport); |