aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/core
diff options
context:
space:
mode:
authorGravatar Vizerai <jsking@google.com>2017-05-23 14:10:10 -0700
committerGravatar Vizerai <jsking@google.com>2017-05-23 14:10:10 -0700
commitd74dbd3889d4cbd3f756d0d6392569bf358a88d8 (patch)
treed64647b9fa65c5996ce56203f3698cfbefc4ab46 /src/core
parenta194aab223af6558713b6482976a407b816ce15a (diff)
parent0a94f3c8ab55dfd12c14058d57f33121c8d6c411 (diff)
Merge branch 'master' of https://github.com/Vizerai/grpc into intrusive_hash_map
Diffstat (limited to 'src/core')
-rw-r--r--src/core/ext/filters/client_channel/channel_connectivity.c2
-rw-r--r--src/core/ext/filters/client_channel/client_channel.c150
-rw-r--r--src/core/ext/filters/client_channel/lb_policy.c3
-rw-r--r--src/core/ext/filters/client_channel/lb_policy.h15
-rw-r--r--src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.c153
-rw-r--r--src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h42
-rw-r--r--src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.c481
-rw-r--r--src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.c133
-rw-r--r--src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h65
-rw-r--r--src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.c139
-rw-r--r--src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h8
-rw-r--r--src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.c3
-rw-r--r--src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.c482
-rw-r--r--src/core/ext/filters/client_channel/parse_address.c129
-rw-r--r--src/core/ext/filters/client_channel/parse_address.h6
-rw-r--r--src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.c18
-rw-r--r--src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.c73
-rw-r--r--src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h6
-rw-r--r--src/core/ext/filters/client_channel/subchannel.c4
-rw-r--r--src/core/ext/filters/client_channel/subchannel.h1
-rw-r--r--src/core/ext/filters/client_channel/subchannel_index.c12
-rw-r--r--src/core/ext/filters/http/http_filters_plugin.c1
-rw-r--r--src/core/ext/filters/http/message_compress/message_compress_filter.c5
-rw-r--r--src/core/ext/filters/http/message_compress/message_compress_filter.h2
-rw-r--r--src/core/ext/filters/http/server/http_server_filter.c2
-rw-r--r--src/core/ext/filters/workarounds/workaround_cronet_compression_filter.c223
-rw-r--r--src/core/ext/filters/workarounds/workaround_cronet_compression_filter.h40
-rw-r--r--src/core/ext/filters/workarounds/workaround_utils.c65
-rw-r--r--src/core/ext/filters/workarounds/workaround_utils.h52
-rw-r--r--src/core/ext/transport/chttp2/client/insecure/channel_create.c2
-rw-r--r--src/core/ext/transport/chttp2/transport/chttp2_transport.c97
-rw-r--r--src/core/ext/transport/chttp2/transport/chttp2_transport.h5
-rw-r--r--src/core/ext/transport/chttp2/transport/frame_settings.c6
-rw-r--r--src/core/ext/transport/chttp2/transport/hpack_encoder.c6
-rw-r--r--src/core/ext/transport/chttp2/transport/hpack_parser.c6
-rw-r--r--src/core/ext/transport/chttp2/transport/hpack_table.c7
-rw-r--r--src/core/ext/transport/chttp2/transport/internal.h31
-rw-r--r--src/core/ext/transport/chttp2/transport/parsing.c16
-rw-r--r--src/core/ext/transport/chttp2/transport/writing.c45
-rw-r--r--src/core/ext/transport/cronet/transport/cronet_transport.c4
-rw-r--r--src/core/lib/channel/channel_args.c2
-rw-r--r--src/core/lib/channel/channel_stack.c2
-rw-r--r--src/core/lib/channel/channel_stack.h4
-rw-r--r--src/core/lib/channel/channel_stack_builder.c3
-rw-r--r--src/core/lib/channel/channel_stack_builder.h2
-rw-r--r--src/core/lib/channel/context.h3
-rw-r--r--src/core/lib/debug/trace.c19
-rw-r--r--src/core/lib/debug/trace.h28
-rw-r--r--src/core/lib/http/httpcli.c2
-rw-r--r--src/core/lib/http/parser.c4
-rw-r--r--src/core/lib/http/parser.h3
-rw-r--r--src/core/lib/iomgr/combiner.c12
-rw-r--r--src/core/lib/iomgr/combiner.h3
-rw-r--r--src/core/lib/iomgr/error.c2
-rw-r--r--src/core/lib/iomgr/ev_epoll1_linux.c984
-rw-r--r--src/core/lib/iomgr/ev_epoll1_linux.h44
-rw-r--r--src/core/lib/iomgr/ev_epoll_limited_pollers_linux.c2146
-rw-r--r--src/core/lib/iomgr/ev_epoll_limited_pollers_linux.h43
-rw-r--r--src/core/lib/iomgr/ev_epoll_thread_pool_linux.c1337
-rw-r--r--src/core/lib/iomgr/ev_epoll_thread_pool_linux.h43
-rw-r--r--src/core/lib/iomgr/ev_epollex_linux.c1511
-rw-r--r--src/core/lib/iomgr/ev_epollex_linux.h43
-rw-r--r--src/core/lib/iomgr/ev_epollsig_linux.c (renamed from src/core/lib/iomgr/ev_epoll_linux.c)74
-rw-r--r--src/core/lib/iomgr/ev_epollsig_linux.h (renamed from src/core/lib/iomgr/ev_epoll_linux.h)8
-rw-r--r--src/core/lib/iomgr/ev_poll_posix.c39
-rw-r--r--src/core/lib/iomgr/ev_poll_posix.h4
-rw-r--r--src/core/lib/iomgr/ev_posix.c30
-rw-r--r--src/core/lib/iomgr/ev_posix.h7
-rw-r--r--src/core/lib/iomgr/ev_windows.c43
-rw-r--r--src/core/lib/iomgr/exec_ctx.c5
-rw-r--r--src/core/lib/iomgr/exec_ctx.h2
-rw-r--r--src/core/lib/iomgr/iomgr.c4
-rw-r--r--src/core/lib/iomgr/iomgr.h3
-rw-r--r--src/core/lib/iomgr/is_epollexclusive_available.c116
-rw-r--r--src/core/lib/iomgr/is_epollexclusive_available.h41
-rw-r--r--src/core/lib/iomgr/lockfree_event.c16
-rw-r--r--src/core/lib/iomgr/pollset.h7
-rw-r--r--src/core/lib/iomgr/pollset_uv.c2
-rw-r--r--src/core/lib/iomgr/pollset_windows.c6
-rw-r--r--src/core/lib/iomgr/port.h1
-rw-r--r--src/core/lib/iomgr/resource_quota.c17
-rw-r--r--src/core/lib/iomgr/resource_quota.h3
-rw-r--r--src/core/lib/iomgr/sys_epoll_wrapper.h43
-rw-r--r--src/core/lib/iomgr/tcp_client_posix.c12
-rw-r--r--src/core/lib/iomgr/tcp_client_uv.c6
-rw-r--r--src/core/lib/iomgr/tcp_posix.c14
-rw-r--r--src/core/lib/iomgr/tcp_posix.h3
-rw-r--r--src/core/lib/iomgr/tcp_server_posix.c2
-rw-r--r--src/core/lib/iomgr/tcp_server_uv.c32
-rw-r--r--src/core/lib/iomgr/tcp_uv.c26
-rw-r--r--src/core/lib/iomgr/tcp_uv.h3
-rw-r--r--src/core/lib/iomgr/timer_generic.c49
-rw-r--r--src/core/lib/iomgr/timer_manager.c276
-rw-r--r--src/core/lib/iomgr/timer_manager.h52
-rw-r--r--src/core/lib/iomgr/timer_uv.c6
-rw-r--r--src/core/lib/security/credentials/google_default/google_default_credentials.c2
-rw-r--r--src/core/lib/security/credentials/jwt/jwt_credentials.c2
-rw-r--r--src/core/lib/security/credentials/oauth2/oauth2_credentials.c2
-rw-r--r--src/core/lib/security/transport/client_auth_filter.c2
-rw-r--r--src/core/lib/security/transport/secure_endpoint.c6
-rw-r--r--src/core/lib/security/transport/secure_endpoint.h2
-rw-r--r--src/core/lib/support/cmdline.c6
-rw-r--r--src/core/lib/support/cpu_linux.c8
-rw-r--r--src/core/lib/support/histogram.c4
-rw-r--r--src/core/lib/support/host_port.c2
-rw-r--r--src/core/lib/support/mpscq.c38
-rw-r--r--src/core/lib/support/mpscq.h29
-rw-r--r--src/core/lib/support/string.c24
-rw-r--r--src/core/lib/support/string_posix.c2
-rw-r--r--src/core/lib/support/subprocess_posix.c4
-rw-r--r--src/core/lib/support/thd_posix.c2
-rw-r--r--src/core/lib/support/wrap_memcpy.c4
-rw-r--r--src/core/lib/surface/alarm.c4
-rw-r--r--src/core/lib/surface/api_trace.c3
-rw-r--r--src/core/lib/surface/api_trace.h4
-rw-r--r--src/core/lib/surface/call.c13
-rw-r--r--src/core/lib/surface/call.h6
-rw-r--r--src/core/lib/surface/completion_queue.c803
-rw-r--r--src/core/lib/surface/completion_queue.h33
-rw-r--r--src/core/lib/surface/init.c3
-rw-r--r--src/core/lib/surface/server.c118
-rw-r--r--src/core/lib/surface/server.h3
-rw-r--r--src/core/lib/transport/bdp_estimator.c34
-rw-r--r--src/core/lib/transport/bdp_estimator.h8
-rw-r--r--src/core/lib/transport/connectivity_state.c12
-rw-r--r--src/core/lib/transport/connectivity_state.h3
-rw-r--r--src/core/plugin_registry/grpc_plugin_registry.c4
-rw-r--r--src/core/plugin_registry/grpc_unsecure_plugin_registry.c4
-rw-r--r--src/core/tsi/fake_transport_security.c8
-rw-r--r--src/core/tsi/ssl_transport_security.c2
-rw-r--r--src/core/tsi/transport_security.c2
-rw-r--r--src/core/tsi/transport_security.h3
-rw-r--r--src/core/tsi/transport_security_interface.h5
133 files changed, 9633 insertions, 1313 deletions
diff --git a/src/core/ext/filters/client_channel/channel_connectivity.c b/src/core/ext/filters/client_channel/channel_connectivity.c
index 62f58fb278..f83670db82 100644
--- a/src/core/ext/filters/client_channel/channel_connectivity.c
+++ b/src/core/ext/filters/client_channel/channel_connectivity.c
@@ -132,7 +132,7 @@ static void partly_done(grpc_exec_ctx *exec_ctx, state_watcher *w,
gpr_mu_lock(&w->mu);
if (due_to_completion) {
- if (grpc_trace_operation_failures) {
+ if (GRPC_TRACER_ON(grpc_trace_operation_failures)) {
GRPC_LOG_IF_ERROR("watch_completion_error", GRPC_ERROR_REF(error));
}
GRPC_ERROR_UNREF(error);
diff --git a/src/core/ext/filters/client_channel/client_channel.c b/src/core/ext/filters/client_channel/client_channel.c
index 0463b25412..f2f27b9175 100644
--- a/src/core/ext/filters/client_channel/client_channel.c
+++ b/src/core/ext/filters/client_channel/client_channel.c
@@ -760,12 +760,6 @@ static void cc_destroy_channel_elem(grpc_exec_ctx *exec_ctx,
#define CANCELLED_CALL ((grpc_subchannel_call *)1)
-typedef enum {
- /* zero so that it can be default-initialized */
- GRPC_SUBCHANNEL_CALL_HOLDER_NOT_CREATING = 0,
- GRPC_SUBCHANNEL_CALL_HOLDER_PICKING_SUBCHANNEL
-} subchannel_creation_phase;
-
/** Call data. Holds a pointer to grpc_subchannel_call and the
associated machinery to create such a pointer.
Handles queueing of stream ops until a call object is ready, waiting
@@ -793,8 +787,9 @@ typedef struct client_channel_call_data {
gpr_atm subchannel_call;
gpr_arena *arena;
- subchannel_creation_phase creation_phase;
+ bool pick_pending;
grpc_connected_subchannel *connected_subchannel;
+ grpc_call_context_element subchannel_call_context[GRPC_CONTEXT_COUNT];
grpc_polling_entity *pollent;
grpc_transport_stream_op_batch **waiting_ops;
@@ -914,16 +909,18 @@ static void subchannel_ready_locked(grpc_exec_ctx *exec_ctx, void *arg,
grpc_call_element *elem = arg;
call_data *calld = elem->call_data;
channel_data *chand = elem->channel_data;
- GPR_ASSERT(calld->creation_phase ==
- GRPC_SUBCHANNEL_CALL_HOLDER_PICKING_SUBCHANNEL);
+ GPR_ASSERT(calld->pick_pending);
+ calld->pick_pending = false;
grpc_polling_entity_del_from_pollset_set(exec_ctx, calld->pollent,
chand->interested_parties);
- calld->creation_phase = GRPC_SUBCHANNEL_CALL_HOLDER_NOT_CREATING;
if (calld->connected_subchannel == NULL) {
- gpr_atm_no_barrier_store(&calld->subchannel_call, 1);
+ gpr_atm_no_barrier_store(&calld->subchannel_call, (gpr_atm)CANCELLED_CALL);
fail_locked(exec_ctx, calld,
- GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING(
- "Failed to create subchannel", &error, 1));
+ error == GRPC_ERROR_NONE
+ ? GRPC_ERROR_CREATE_FROM_STATIC_STRING(
+ "Call dropped by load balancing policy")
+ : GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING(
+ "Failed to create subchannel", &error, 1));
} else if (GET_CALL(calld) == CANCELLED_CALL) {
/* already cancelled before subchannel became ready */
grpc_error *cancellation_error =
@@ -944,7 +941,8 @@ static void subchannel_ready_locked(grpc_exec_ctx *exec_ctx, void *arg,
.path = calld->path,
.start_time = calld->call_start_time,
.deadline = calld->deadline,
- .arena = calld->arena};
+ .arena = calld->arena,
+ .context = calld->subchannel_call_context};
grpc_error *new_error = grpc_connected_subchannel_create_call(
exec_ctx, calld->connected_subchannel, &call_args, &subchannel_call);
gpr_atm_rel_store(&calld->subchannel_call,
@@ -973,6 +971,7 @@ typedef struct {
grpc_metadata_batch *initial_metadata;
uint32_t initial_metadata_flags;
grpc_connected_subchannel **connected_subchannel;
+ grpc_call_context_element *subchannel_call_context;
grpc_closure *on_ready;
grpc_call_element *elem;
grpc_closure closure;
@@ -984,8 +983,8 @@ typedef struct {
static bool pick_subchannel_locked(
grpc_exec_ctx *exec_ctx, grpc_call_element *elem,
grpc_metadata_batch *initial_metadata, uint32_t initial_metadata_flags,
- grpc_connected_subchannel **connected_subchannel, grpc_closure *on_ready,
- grpc_error *error);
+ grpc_connected_subchannel **connected_subchannel,
+ grpc_call_context_element *subchannel_call_context, grpc_closure *on_ready);
static void continue_picking_locked(grpc_exec_ctx *exec_ctx, void *arg,
grpc_error *error) {
@@ -997,49 +996,49 @@ static void continue_picking_locked(grpc_exec_ctx *exec_ctx, void *arg,
} else {
if (pick_subchannel_locked(exec_ctx, cpa->elem, cpa->initial_metadata,
cpa->initial_metadata_flags,
- cpa->connected_subchannel, cpa->on_ready,
- GRPC_ERROR_NONE)) {
+ cpa->connected_subchannel,
+ cpa->subchannel_call_context, cpa->on_ready)) {
grpc_closure_sched(exec_ctx, cpa->on_ready, GRPC_ERROR_NONE);
}
}
gpr_free(cpa);
}
+static void cancel_pick_locked(grpc_exec_ctx *exec_ctx, grpc_call_element *elem,
+ grpc_error *error) {
+ channel_data *chand = elem->channel_data;
+ call_data *calld = elem->call_data;
+ if (chand->lb_policy != NULL) {
+ grpc_lb_policy_cancel_pick_locked(exec_ctx, chand->lb_policy,
+ &calld->connected_subchannel,
+ GRPC_ERROR_REF(error));
+ }
+ for (grpc_closure *closure = chand->waiting_for_config_closures.head;
+ closure != NULL; closure = closure->next_data.next) {
+ continue_picking_args *cpa = closure->cb_arg;
+ if (cpa->connected_subchannel == &calld->connected_subchannel) {
+ cpa->connected_subchannel = NULL;
+ grpc_closure_sched(exec_ctx, cpa->on_ready,
+ GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING(
+ "Pick cancelled", &error, 1));
+ }
+ }
+ GRPC_ERROR_UNREF(error);
+}
+
static bool pick_subchannel_locked(
grpc_exec_ctx *exec_ctx, grpc_call_element *elem,
grpc_metadata_batch *initial_metadata, uint32_t initial_metadata_flags,
- grpc_connected_subchannel **connected_subchannel, grpc_closure *on_ready,
- grpc_error *error) {
+ grpc_connected_subchannel **connected_subchannel,
+ grpc_call_context_element *subchannel_call_context,
+ grpc_closure *on_ready) {
GPR_TIMER_BEGIN("pick_subchannel", 0);
channel_data *chand = elem->channel_data;
call_data *calld = elem->call_data;
- continue_picking_args *cpa;
- grpc_closure *closure;
GPR_ASSERT(connected_subchannel);
- if (initial_metadata == NULL) {
- if (chand->lb_policy != NULL) {
- grpc_lb_policy_cancel_pick_locked(exec_ctx, chand->lb_policy,
- connected_subchannel,
- GRPC_ERROR_REF(error));
- }
- for (closure = chand->waiting_for_config_closures.head; closure != NULL;
- closure = closure->next_data.next) {
- cpa = closure->cb_arg;
- if (cpa->connected_subchannel == connected_subchannel) {
- cpa->connected_subchannel = NULL;
- grpc_closure_sched(exec_ctx, cpa->on_ready,
- GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING(
- "Pick cancelled", &error, 1));
- }
- }
- GPR_TIMER_END("pick_subchannel", 0);
- GRPC_ERROR_UNREF(error);
- return true;
- }
- GPR_ASSERT(error == GRPC_ERROR_NONE);
if (chand->lb_policy != NULL) {
apply_final_configuration_locked(exec_ctx, elem);
grpc_lb_policy *lb_policy = chand->lb_policy;
@@ -1062,8 +1061,7 @@ static bool pick_subchannel_locked(
}
}
const grpc_lb_policy_pick_args inputs = {
- initial_metadata, initial_metadata_flags, &calld->lb_token_mdelem,
- gpr_inf_future(GPR_CLOCK_MONOTONIC)};
+ initial_metadata, initial_metadata_flags, &calld->lb_token_mdelem};
// Wrap the user-provided callback in order to hold a strong reference to
// the LB policy for the duration of the pick.
@@ -1076,8 +1074,8 @@ static bool pick_subchannel_locked(
GRPC_LB_POLICY_REF(lb_policy, "pick_subchannel_wrapping");
w_on_pick_arg->lb_policy = lb_policy;
const bool pick_done = grpc_lb_policy_pick_locked(
- exec_ctx, lb_policy, &inputs, connected_subchannel, NULL,
- &w_on_pick_arg->wrapper_closure);
+ exec_ctx, lb_policy, &inputs, connected_subchannel,
+ subchannel_call_context, NULL, &w_on_pick_arg->wrapper_closure);
if (pick_done) {
/* synchronous grpc_lb_policy_pick call. Unref the LB policy. */
GRPC_LB_POLICY_UNREF(exec_ctx, w_on_pick_arg->lb_policy,
@@ -1096,10 +1094,11 @@ static bool pick_subchannel_locked(
&chand->on_resolver_result_changed);
}
if (chand->resolver != NULL) {
- cpa = gpr_malloc(sizeof(*cpa));
+ continue_picking_args *cpa = gpr_malloc(sizeof(*cpa));
cpa->initial_metadata = initial_metadata;
cpa->initial_metadata_flags = initial_metadata_flags;
cpa->connected_subchannel = connected_subchannel;
+ cpa->subchannel_call_context = subchannel_call_context;
cpa->on_ready = on_ready;
cpa->elem = elem;
grpc_closure_init(&cpa->closure, continue_picking_locked, cpa,
@@ -1151,16 +1150,13 @@ static void start_transport_stream_op_batch_locked_inner(
error to the caller when the first op does get passed down. */
calld->cancel_error =
GRPC_ERROR_REF(op->payload->cancel_stream.cancel_error);
- switch (calld->creation_phase) {
- case GRPC_SUBCHANNEL_CALL_HOLDER_NOT_CREATING:
- fail_locked(exec_ctx, calld,
- GRPC_ERROR_REF(op->payload->cancel_stream.cancel_error));
- break;
- case GRPC_SUBCHANNEL_CALL_HOLDER_PICKING_SUBCHANNEL:
- pick_subchannel_locked(
- exec_ctx, elem, NULL, 0, &calld->connected_subchannel, NULL,
- GRPC_ERROR_REF(op->payload->cancel_stream.cancel_error));
- break;
+ if (calld->pick_pending) {
+ cancel_pick_locked(
+ exec_ctx, elem,
+ GRPC_ERROR_REF(op->payload->cancel_stream.cancel_error));
+ } else {
+ fail_locked(exec_ctx, calld,
+ GRPC_ERROR_REF(op->payload->cancel_stream.cancel_error));
}
grpc_transport_stream_op_batch_finish_with_failure(
exec_ctx, op,
@@ -1170,9 +1166,9 @@ static void start_transport_stream_op_batch_locked_inner(
}
}
/* if we don't have a subchannel, try to get one */
- if (calld->creation_phase == GRPC_SUBCHANNEL_CALL_HOLDER_NOT_CREATING &&
- calld->connected_subchannel == NULL && op->send_initial_metadata) {
- calld->creation_phase = GRPC_SUBCHANNEL_CALL_HOLDER_PICKING_SUBCHANNEL;
+ if (!calld->pick_pending && calld->connected_subchannel == NULL &&
+ op->send_initial_metadata) {
+ calld->pick_pending = true;
grpc_closure_init(&calld->next_step, subchannel_ready_locked, elem,
grpc_combiner_scheduler(chand->combiner, true));
GRPC_CALL_STACK_REF(calld->owning_call, "pick_subchannel");
@@ -1183,24 +1179,34 @@ static void start_transport_stream_op_batch_locked_inner(
exec_ctx, elem,
op->payload->send_initial_metadata.send_initial_metadata,
op->payload->send_initial_metadata.send_initial_metadata_flags,
- &calld->connected_subchannel, &calld->next_step, GRPC_ERROR_NONE)) {
- calld->creation_phase = GRPC_SUBCHANNEL_CALL_HOLDER_NOT_CREATING;
+ &calld->connected_subchannel, calld->subchannel_call_context,
+ &calld->next_step)) {
+ calld->pick_pending = false;
GRPC_CALL_STACK_UNREF(exec_ctx, calld->owning_call, "pick_subchannel");
+ if (calld->connected_subchannel == NULL) {
+ gpr_atm_no_barrier_store(&calld->subchannel_call,
+ (gpr_atm)CANCELLED_CALL);
+ grpc_error *error = GRPC_ERROR_CREATE_FROM_STATIC_STRING(
+ "Call dropped by load balancing policy");
+ fail_locked(exec_ctx, calld, GRPC_ERROR_REF(error));
+ grpc_transport_stream_op_batch_finish_with_failure(exec_ctx, op, error);
+ return; // Early out.
+ }
} else {
grpc_polling_entity_add_to_pollset_set(exec_ctx, calld->pollent,
chand->interested_parties);
}
}
/* if we've got a subchannel, then let's ask it to create a call */
- if (calld->creation_phase == GRPC_SUBCHANNEL_CALL_HOLDER_NOT_CREATING &&
- calld->connected_subchannel != NULL) {
+ if (!calld->pick_pending && calld->connected_subchannel != NULL) {
grpc_subchannel_call *subchannel_call = NULL;
const grpc_connected_subchannel_call_args call_args = {
.pollent = calld->pollent,
.path = calld->path,
.start_time = calld->call_start_time,
.deadline = calld->deadline,
- .arena = calld->arena};
+ .arena = calld->arena,
+ .context = calld->subchannel_call_context};
grpc_error *error = grpc_connected_subchannel_create_call(
exec_ctx, calld->connected_subchannel, &call_args, &subchannel_call);
gpr_atm_rel_store(&calld->subchannel_call,
@@ -1349,12 +1355,18 @@ static void cc_destroy_call_elem(grpc_exec_ctx *exec_ctx,
then_schedule_closure = NULL;
GRPC_SUBCHANNEL_CALL_UNREF(exec_ctx, call, "client_channel_destroy_call");
}
- GPR_ASSERT(calld->creation_phase == GRPC_SUBCHANNEL_CALL_HOLDER_NOT_CREATING);
+ GPR_ASSERT(!calld->pick_pending);
GPR_ASSERT(calld->waiting_ops_count == 0);
if (calld->connected_subchannel != NULL) {
GRPC_CONNECTED_SUBCHANNEL_UNREF(exec_ctx, calld->connected_subchannel,
"picked");
}
+ for (size_t i = 0; i < GRPC_CONTEXT_COUNT; ++i) {
+ if (calld->subchannel_call_context[i].value != NULL) {
+ calld->subchannel_call_context[i].destroy(
+ calld->subchannel_call_context[i].value);
+ }
+ }
gpr_free(calld->waiting_ops);
grpc_closure_sched(exec_ctx, then_schedule_closure, GRPC_ERROR_NONE);
}
@@ -1450,12 +1462,12 @@ static void watch_connectivity_state_locked(grpc_exec_ctx *exec_ctx, void *arg,
void grpc_client_channel_watch_connectivity_state(
grpc_exec_ctx *exec_ctx, grpc_channel_element *elem, grpc_pollset *pollset,
- grpc_connectivity_state *state, grpc_closure *on_complete) {
+ grpc_connectivity_state *state, grpc_closure *closure) {
channel_data *chand = elem->channel_data;
external_connectivity_watcher *w = gpr_malloc(sizeof(*w));
w->chand = chand;
w->pollset = pollset;
- w->on_complete = on_complete;
+ w->on_complete = closure;
w->state = state;
grpc_pollset_set_add_pollset(exec_ctx, chand->interested_parties, pollset);
GRPC_CHANNEL_STACK_REF(w->chand->owning_stack,
diff --git a/src/core/ext/filters/client_channel/lb_policy.c b/src/core/ext/filters/client_channel/lb_policy.c
index 2d31499d13..112ba40658 100644
--- a/src/core/ext/filters/client_channel/lb_policy.c
+++ b/src/core/ext/filters/client_channel/lb_policy.c
@@ -119,9 +119,10 @@ void grpc_lb_policy_weak_unref(grpc_exec_ctx *exec_ctx,
int grpc_lb_policy_pick_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *policy,
const grpc_lb_policy_pick_args *pick_args,
grpc_connected_subchannel **target,
+ grpc_call_context_element *context,
void **user_data, grpc_closure *on_complete) {
return policy->vtable->pick_locked(exec_ctx, policy, pick_args, target,
- user_data, on_complete);
+ context, user_data, on_complete);
}
void grpc_lb_policy_cancel_pick_locked(grpc_exec_ctx *exec_ctx,
diff --git a/src/core/ext/filters/client_channel/lb_policy.h b/src/core/ext/filters/client_channel/lb_policy.h
index 25427666ae..fb4aa084a6 100644
--- a/src/core/ext/filters/client_channel/lb_policy.h
+++ b/src/core/ext/filters/client_channel/lb_policy.h
@@ -43,9 +43,6 @@
typedef struct grpc_lb_policy grpc_lb_policy;
typedef struct grpc_lb_policy_vtable grpc_lb_policy_vtable;
-typedef void (*grpc_lb_completion)(void *cb_arg, grpc_subchannel *subchannel,
- grpc_status_code status, const char *errmsg);
-
struct grpc_lb_policy {
const grpc_lb_policy_vtable *vtable;
gpr_atm ref_pair;
@@ -65,8 +62,6 @@ typedef struct grpc_lb_policy_pick_args {
uint32_t initial_metadata_flags;
/** Storage for LB token in \a initial_metadata, or NULL if not used */
grpc_linked_mdelem *lb_token_mdelem_storage;
- /** Deadline for the call to the LB server */
- gpr_timespec deadline;
} grpc_lb_policy_pick_args;
struct grpc_lb_policy_vtable {
@@ -76,7 +71,8 @@ struct grpc_lb_policy_vtable {
/** \see grpc_lb_policy_pick */
int (*pick_locked)(grpc_exec_ctx *exec_ctx, grpc_lb_policy *policy,
const grpc_lb_policy_pick_args *pick_args,
- grpc_connected_subchannel **target, void **user_data,
+ grpc_connected_subchannel **target,
+ grpc_call_context_element *context, void **user_data,
grpc_closure *on_complete);
/** \see grpc_lb_policy_cancel_pick */
@@ -153,9 +149,13 @@ void grpc_lb_policy_init(grpc_lb_policy *policy,
/** Finds an appropriate subchannel for a call, based on \a pick_args.
- \a target will be set to the selected subchannel, or NULL on failure.
+ \a target will be set to the selected subchannel, or NULL on failure
+ or when the LB policy decides to drop the call.
+
Upon success, \a user_data will be set to whatever opaque information
may need to be propagated from the LB policy, or NULL if not needed.
+ \a context will be populated with context to pass to the subchannel
+ call, if needed.
If the pick succeeds and a result is known immediately, a non-zero
value will be returned. Otherwise, \a on_complete will be invoked
@@ -167,6 +167,7 @@ void grpc_lb_policy_init(grpc_lb_policy *policy,
int grpc_lb_policy_pick_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *policy,
const grpc_lb_policy_pick_args *pick_args,
grpc_connected_subchannel **target,
+ grpc_call_context_element *context,
void **user_data, grpc_closure *on_complete);
/** Perform a connected subchannel ping (see \a grpc_connected_subchannel_ping)
diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.c b/src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.c
new file mode 100644
index 0000000000..67baa46de7
--- /dev/null
+++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.c
@@ -0,0 +1,153 @@
+/*
+ *
+ * Copyright 2017, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h"
+
+#include <grpc/support/atm.h>
+#include <grpc/support/log.h>
+
+#include "src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h"
+#include "src/core/lib/iomgr/error.h"
+#include "src/core/lib/profiling/timers.h"
+
+static grpc_error *init_channel_elem(grpc_exec_ctx *exec_ctx,
+ grpc_channel_element *elem,
+ grpc_channel_element_args *args) {
+ return GRPC_ERROR_NONE;
+}
+
+static void destroy_channel_elem(grpc_exec_ctx *exec_ctx,
+ grpc_channel_element *elem) {}
+
+typedef struct {
+ // Stats object to update.
+ grpc_grpclb_client_stats *client_stats;
+ // State for intercepting send_initial_metadata.
+ grpc_closure on_complete_for_send;
+ grpc_closure *original_on_complete_for_send;
+ bool send_initial_metadata_succeeded;
+ // State for intercepting recv_initial_metadata.
+ grpc_closure recv_initial_metadata_ready;
+ grpc_closure *original_recv_initial_metadata_ready;
+ bool recv_initial_metadata_succeeded;
+} call_data;
+
+static void on_complete_for_send(grpc_exec_ctx *exec_ctx, void *arg,
+ grpc_error *error) {
+ call_data *calld = arg;
+ if (error == GRPC_ERROR_NONE) {
+ calld->send_initial_metadata_succeeded = true;
+ }
+ grpc_closure_run(exec_ctx, calld->original_on_complete_for_send,
+ GRPC_ERROR_REF(error));
+}
+
+static void recv_initial_metadata_ready(grpc_exec_ctx *exec_ctx, void *arg,
+ grpc_error *error) {
+ call_data *calld = arg;
+ if (error == GRPC_ERROR_NONE) {
+ calld->recv_initial_metadata_succeeded = true;
+ }
+ grpc_closure_run(exec_ctx, calld->original_recv_initial_metadata_ready,
+ GRPC_ERROR_REF(error));
+}
+
+static grpc_error *init_call_elem(grpc_exec_ctx *exec_ctx,
+ grpc_call_element *elem,
+ const grpc_call_element_args *args) {
+ call_data *calld = elem->call_data;
+ // Get stats object from context and take a ref.
+ GPR_ASSERT(args->context != NULL);
+ GPR_ASSERT(args->context[GRPC_GRPCLB_CLIENT_STATS].value != NULL);
+ calld->client_stats = grpc_grpclb_client_stats_ref(
+ args->context[GRPC_GRPCLB_CLIENT_STATS].value);
+ // Record call started.
+ grpc_grpclb_client_stats_add_call_started(calld->client_stats);
+ return GRPC_ERROR_NONE;
+}
+
+static void destroy_call_elem(grpc_exec_ctx *exec_ctx, grpc_call_element *elem,
+ const grpc_call_final_info *final_info,
+ grpc_closure *ignored) {
+ call_data *calld = elem->call_data;
+ // Record call finished, optionally setting client_failed_to_send and
+ // received.
+ grpc_grpclb_client_stats_add_call_finished(
+ false /* drop_for_rate_limiting */, false /* drop_for_load_balancing */,
+ !calld->send_initial_metadata_succeeded /* client_failed_to_send */,
+ calld->recv_initial_metadata_succeeded /* known_received */,
+ calld->client_stats);
+ // All done, so unref the stats object.
+ grpc_grpclb_client_stats_unref(calld->client_stats);
+}
+
+static void start_transport_stream_op_batch(
+ grpc_exec_ctx *exec_ctx, grpc_call_element *elem,
+ grpc_transport_stream_op_batch *batch) {
+ call_data *calld = elem->call_data;
+ GPR_TIMER_BEGIN("clr_start_transport_stream_op_batch", 0);
+ // Intercept send_initial_metadata.
+ if (batch->send_initial_metadata) {
+ calld->original_on_complete_for_send = batch->on_complete;
+ grpc_closure_init(&calld->on_complete_for_send, on_complete_for_send, calld,
+ grpc_schedule_on_exec_ctx);
+ batch->on_complete = &calld->on_complete_for_send;
+ }
+ // Intercept recv_initial_metadata.
+ if (batch->recv_initial_metadata) {
+ calld->original_recv_initial_metadata_ready =
+ batch->payload->recv_initial_metadata.recv_initial_metadata_ready;
+ grpc_closure_init(&calld->recv_initial_metadata_ready,
+ recv_initial_metadata_ready, calld,
+ grpc_schedule_on_exec_ctx);
+ batch->payload->recv_initial_metadata.recv_initial_metadata_ready =
+ &calld->recv_initial_metadata_ready;
+ }
+ // Chain to next filter.
+ grpc_call_next_op(exec_ctx, elem, batch);
+ GPR_TIMER_END("clr_start_transport_stream_op_batch", 0);
+}
+
+const grpc_channel_filter grpc_client_load_reporting_filter = {
+ start_transport_stream_op_batch,
+ grpc_channel_next_op,
+ sizeof(call_data),
+ init_call_elem,
+ grpc_call_stack_ignore_set_pollset_or_pollset_set,
+ destroy_call_elem,
+ 0, // sizeof(channel_data)
+ init_channel_elem,
+ destroy_channel_elem,
+ grpc_call_next_get_peer,
+ grpc_channel_next_get_info,
+ "client_load_reporting"};
diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h b/src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h
new file mode 100644
index 0000000000..28b313d874
--- /dev/null
+++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * Copyright 2017, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_GRPCLB_CLIENT_LOAD_REPORTING_FILTER_H
+#define GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_GRPCLB_CLIENT_LOAD_REPORTING_FILTER_H
+
+#include "src/core/lib/channel/channel_stack.h"
+
+extern const grpc_channel_filter grpc_client_load_reporting_filter;
+
+#endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_GRPCLB_CLIENT_LOAD_REPORTING_FILTER_H \
+ */
diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.c b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.c
index 9e158a94ad..d2a2856a18 100644
--- a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.c
+++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.c
@@ -95,8 +95,7 @@
headers. Therefore, sockaddr.h must always be included first */
#include "src/core/lib/iomgr/sockaddr.h"
-#include <errno.h>
-
+#include <limits.h>
#include <string.h>
#include <grpc/byte_buffer_reader.h>
@@ -108,13 +107,16 @@
#include "src/core/ext/filters/client_channel/client_channel.h"
#include "src/core/ext/filters/client_channel/client_channel_factory.h"
+#include "src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h"
#include "src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.h"
#include "src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_channel.h"
+#include "src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h"
#include "src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h"
#include "src/core/ext/filters/client_channel/lb_policy_factory.h"
#include "src/core/ext/filters/client_channel/lb_policy_registry.h"
#include "src/core/ext/filters/client_channel/parse_address.h"
#include "src/core/lib/channel/channel_args.h"
+#include "src/core/lib/channel/channel_stack.h"
#include "src/core/lib/iomgr/combiner.h"
#include "src/core/lib/iomgr/sockaddr.h"
#include "src/core/lib/iomgr/sockaddr_utils.h"
@@ -126,6 +128,7 @@
#include "src/core/lib/support/string.h"
#include "src/core/lib/surface/call.h"
#include "src/core/lib/surface/channel.h"
+#include "src/core/lib/surface/channel_init.h"
#include "src/core/lib/transport/static_metadata.h"
#define GRPC_GRPCLB_MIN_CONNECT_TIMEOUT_SECONDS 20
@@ -134,7 +137,7 @@
#define GRPC_GRPCLB_RECONNECT_MAX_BACKOFF_SECONDS 120
#define GRPC_GRPCLB_RECONNECT_JITTER 0.2
-int grpc_lb_glb_trace = 0;
+grpc_tracer_flag grpc_lb_glb_trace = GRPC_TRACER_INITIALIZER(false);
/* add lb_token of selected subchannel (address) to the call's initial
* metadata */
@@ -147,6 +150,10 @@ static grpc_error *initial_metadata_add_lb_token(
lb_token_mdelem_storage, lb_token);
}
+static void destroy_client_stats(void *arg) {
+ grpc_grpclb_client_stats_unref(arg);
+}
+
typedef struct wrapped_rr_closure_arg {
/* the closure instance using this struct as argument */
grpc_closure wrapper_closure;
@@ -163,6 +170,13 @@ typedef struct wrapped_rr_closure_arg {
* initial metadata */
grpc_connected_subchannel **target;
+ /* the context to be populated for the subchannel call */
+ grpc_call_context_element *context;
+
+ /* Stats for client-side load reporting. Note that this holds a
+ * reference, which must be either passed on via context or unreffed. */
+ grpc_grpclb_client_stats *client_stats;
+
/* the LB token associated with the pick */
grpc_mdelem lb_token;
@@ -202,8 +216,14 @@ static void wrapped_rr_closure(grpc_exec_ctx *exec_ctx, void *arg,
(void *)*wc_arg->target, (void *)wc_arg->rr_policy);
abort();
}
+ // Pass on client stats via context. Passes ownership of the reference.
+ GPR_ASSERT(wc_arg->client_stats != NULL);
+ wc_arg->context[GRPC_GRPCLB_CLIENT_STATS].value = wc_arg->client_stats;
+ wc_arg->context[GRPC_GRPCLB_CLIENT_STATS].destroy = destroy_client_stats;
+ } else {
+ grpc_grpclb_client_stats_unref(wc_arg->client_stats);
}
- if (grpc_lb_glb_trace) {
+ if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
gpr_log(GPR_INFO, "Unreffing RR %p", (void *)wc_arg->rr_policy);
}
GRPC_LB_POLICY_UNREF(exec_ctx, wc_arg->rr_policy, "wrapped_rr_closure");
@@ -237,6 +257,7 @@ typedef struct pending_pick {
static void add_pending_pick(pending_pick **root,
const grpc_lb_policy_pick_args *pick_args,
grpc_connected_subchannel **target,
+ grpc_call_context_element *context,
grpc_closure *on_complete) {
pending_pick *pp = gpr_zalloc(sizeof(*pp));
pp->next = *root;
@@ -244,6 +265,7 @@ static void add_pending_pick(pending_pick **root,
pp->target = target;
pp->wrapped_on_complete_arg.wrapped_closure = on_complete;
pp->wrapped_on_complete_arg.target = target;
+ pp->wrapped_on_complete_arg.context = context;
pp->wrapped_on_complete_arg.initial_metadata = pick_args->initial_metadata;
pp->wrapped_on_complete_arg.lb_token_mdelem_storage =
pick_args->lb_token_mdelem_storage;
@@ -287,8 +309,8 @@ typedef struct glb_lb_policy {
grpc_client_channel_factory *cc_factory;
grpc_channel_args *args;
- /** deadline for the LB's call */
- gpr_timespec deadline;
+ /** timeout in milliseconds for the LB call. 0 means no deadline. */
+ int lb_call_timeout_ms;
/** for communicating with the LB server */
grpc_channel *lb_channel;
@@ -305,6 +327,11 @@ typedef struct glb_lb_policy {
* response has arrived. */
grpc_grpclb_serverlist *serverlist;
+ /** Index into serverlist for next pick.
+ * If the server at this index is a drop, we return a drop.
+ * Otherwise, we delegate to the RR policy. */
+ size_t serverlist_index;
+
/** list of picks that are waiting on RR's policy connectivity */
pending_pick *pending_picks;
@@ -316,6 +343,10 @@ typedef struct glb_lb_policy {
/************************************************************/
/* client data associated with the LB server communication */
/************************************************************/
+
+ /* Finished sending initial request. */
+ grpc_closure lb_on_sent_initial_request;
+
/* Status from the LB server has been received. This signals the end of the LB
* call. */
grpc_closure lb_on_server_status_received;
@@ -348,6 +379,23 @@ typedef struct glb_lb_policy {
/** LB call retry timer */
grpc_timer lb_call_retry_timer;
+
+ bool initial_request_sent;
+ bool seen_initial_response;
+
+ /* Stats for client-side load reporting. Should be unreffed and
+ * recreated whenever lb_call is replaced. */
+ grpc_grpclb_client_stats *client_stats;
+ /* Interval and timer for next client load report. */
+ gpr_timespec client_stats_report_interval;
+ grpc_timer client_load_report_timer;
+ bool client_load_report_timer_pending;
+ bool last_client_load_report_counters_were_zero;
+ /* Closure used for either the load report timer or the callback for
+ * completion of sending the load report. */
+ grpc_closure client_load_report_closure;
+ /* Client load report message payload. */
+ grpc_byte_buffer *client_load_report_payload;
} glb_lb_policy;
/* Keeps track and reacts to changes in connectivity of the RR instance */
@@ -359,6 +407,9 @@ struct rr_connectivity_data {
static bool is_server_valid(const grpc_grpclb_server *server, size_t idx,
bool log) {
+ if (server->drop_for_rate_limiting || server->drop_for_load_balancing) {
+ return false;
+ }
const grpc_grpclb_ip_address *ip = &server->ip_address;
if (server->port >> 16 != 0) {
if (log) {
@@ -368,7 +419,6 @@ static bool is_server_valid(const grpc_grpclb_server *server, size_t idx,
}
return false;
}
-
if (ip->size != 4 && ip->size != 16) {
if (log) {
gpr_log(GPR_ERROR,
@@ -402,11 +452,12 @@ static const grpc_lb_user_data_vtable lb_token_vtable = {
static void parse_server(const grpc_grpclb_server *server,
grpc_resolved_address *addr) {
+ memset(addr, 0, sizeof(*addr));
+ if (server->drop_for_rate_limiting || server->drop_for_load_balancing) return;
const uint16_t netorder_port = htons((uint16_t)server->port);
/* the addresses are given in binary format (a in(6)_addr struct) in
* server->ip_address.bytes. */
const grpc_grpclb_ip_address *ip = &server->ip_address;
- memset(addr, 0, sizeof(*addr));
if (ip->size == 4) {
addr->len = sizeof(struct sockaddr_in);
struct sockaddr_in *addr4 = (struct sockaddr_in *)&addr->addr;
@@ -531,7 +582,7 @@ static bool update_lb_connectivity_status_locked(
GPR_ASSERT(new_rr_state_error == GRPC_ERROR_NONE);
}
- if (grpc_lb_glb_trace) {
+ if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
gpr_log(GPR_INFO,
"Setting grpclb's state to %s from new RR policy %p state.",
grpc_connectivity_state_name(new_rr_state),
@@ -543,31 +594,74 @@ static bool update_lb_connectivity_status_locked(
return true;
}
-/* perform a pick over \a rr_policy. Given that a pick can return immediately
- * (ignoring its completion callback) we need to perform the cleanups this
- * callback would be otherwise resposible for */
+/* Perform a pick over \a glb_policy->rr_policy. Given that a pick can return
+ * immediately (ignoring its completion callback), we need to perform the
+ * cleanups this callback would otherwise be resposible for.
+ * If \a force_async is true, then we will manually schedule the
+ * completion callback even if the pick is available immediately. */
static bool pick_from_internal_rr_locked(
- grpc_exec_ctx *exec_ctx, grpc_lb_policy *rr_policy,
- const grpc_lb_policy_pick_args *pick_args,
+ grpc_exec_ctx *exec_ctx, glb_lb_policy *glb_policy,
+ const grpc_lb_policy_pick_args *pick_args, bool force_async,
grpc_connected_subchannel **target, wrapped_rr_closure_arg *wc_arg) {
- GPR_ASSERT(rr_policy != NULL);
+ // Look at the index into the serverlist to see if we should drop this call.
+ grpc_grpclb_server *server =
+ glb_policy->serverlist->servers[glb_policy->serverlist_index++];
+ if (glb_policy->serverlist_index == glb_policy->serverlist->num_servers) {
+ glb_policy->serverlist_index = 0; // Wrap-around.
+ }
+ if (server->drop_for_rate_limiting || server->drop_for_load_balancing) {
+ // Not using the RR policy, so unref it.
+ if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
+ gpr_log(GPR_INFO, "Unreffing RR for drop (0x%" PRIxPTR ")",
+ (intptr_t)wc_arg->rr_policy);
+ }
+ GRPC_LB_POLICY_UNREF(exec_ctx, wc_arg->rr_policy, "glb_pick_sync");
+ // Update client load reporting stats to indicate the number of
+ // dropped calls. Note that we have to do this here instead of in
+ // the client_load_reporting filter, because we do not create a
+ // subchannel call (and therefore no client_load_reporting filter)
+ // for dropped calls.
+ grpc_grpclb_client_stats_add_call_started(wc_arg->client_stats);
+ grpc_grpclb_client_stats_add_call_finished(
+ server->drop_for_rate_limiting, server->drop_for_load_balancing,
+ false /* failed_to_send */, false /* known_received */,
+ wc_arg->client_stats);
+ grpc_grpclb_client_stats_unref(wc_arg->client_stats);
+ if (force_async) {
+ GPR_ASSERT(wc_arg->wrapped_closure != NULL);
+ grpc_closure_sched(exec_ctx, wc_arg->wrapped_closure, GRPC_ERROR_NONE);
+ gpr_free(wc_arg->free_when_done);
+ return false;
+ }
+ gpr_free(wc_arg->free_when_done);
+ return true;
+ }
+ // Pick via the RR policy.
const bool pick_done = grpc_lb_policy_pick_locked(
- exec_ctx, rr_policy, pick_args, target, (void **)&wc_arg->lb_token,
- &wc_arg->wrapper_closure);
+ exec_ctx, wc_arg->rr_policy, pick_args, target, wc_arg->context,
+ (void **)&wc_arg->lb_token, &wc_arg->wrapper_closure);
if (pick_done) {
/* synchronous grpc_lb_policy_pick call. Unref the RR policy. */
- if (grpc_lb_glb_trace) {
+ if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
gpr_log(GPR_INFO, "Unreffing RR (0x%" PRIxPTR ")",
(intptr_t)wc_arg->rr_policy);
}
GRPC_LB_POLICY_UNREF(exec_ctx, wc_arg->rr_policy, "glb_pick_sync");
-
/* add the load reporting initial metadata */
initial_metadata_add_lb_token(exec_ctx, pick_args->initial_metadata,
pick_args->lb_token_mdelem_storage,
GRPC_MDELEM_REF(wc_arg->lb_token));
-
- gpr_free(wc_arg);
+ // Pass on client stats via context. Passes ownership of the reference.
+ GPR_ASSERT(wc_arg->client_stats != NULL);
+ wc_arg->context[GRPC_GRPCLB_CLIENT_STATS].value = wc_arg->client_stats;
+ wc_arg->context[GRPC_GRPCLB_CLIENT_STATS].destroy = destroy_client_stats;
+ if (force_async) {
+ GPR_ASSERT(wc_arg->wrapped_closure != NULL);
+ grpc_closure_sched(exec_ctx, wc_arg->wrapped_closure, GRPC_ERROR_NONE);
+ gpr_free(wc_arg->free_when_done);
+ return false;
+ }
+ gpr_free(wc_arg->free_when_done);
}
/* else, the pending pick will be registered and taken care of by the
* pending pick list inside the RR policy (glb_policy->rr_policy).
@@ -637,7 +731,7 @@ static void rr_handover_locked(grpc_exec_ctx *exec_ctx,
if (!replace_old_rr) {
/* dispose of the new RR policy that won't be used after all */
GRPC_LB_POLICY_UNREF(exec_ctx, new_rr_policy, "rr_handover_no_replace");
- if (grpc_lb_glb_trace) {
+ if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
gpr_log(GPR_INFO,
"Keeping old RR policy (%p) despite new serverlist: new RR "
"policy was in %s connectivity state.",
@@ -647,7 +741,7 @@ static void rr_handover_locked(grpc_exec_ctx *exec_ctx,
return;
}
- if (grpc_lb_glb_trace) {
+ if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
gpr_log(GPR_INFO, "Created RR policy (%p) to replace old RR (%p)",
(void *)new_rr_policy, (void *)glb_policy->rr_policy);
}
@@ -690,12 +784,14 @@ static void rr_handover_locked(grpc_exec_ctx *exec_ctx,
glb_policy->pending_picks = pp->next;
GRPC_LB_POLICY_REF(glb_policy->rr_policy, "rr_handover_pending_pick");
pp->wrapped_on_complete_arg.rr_policy = glb_policy->rr_policy;
- if (grpc_lb_glb_trace) {
+ pp->wrapped_on_complete_arg.client_stats =
+ grpc_grpclb_client_stats_ref(glb_policy->client_stats);
+ if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
gpr_log(GPR_INFO, "Pending pick about to PICK from 0x%" PRIxPTR "",
(intptr_t)glb_policy->rr_policy);
}
- pick_from_internal_rr_locked(exec_ctx, glb_policy->rr_policy,
- &pp->pick_args, pp->target,
+ pick_from_internal_rr_locked(exec_ctx, glb_policy, &pp->pick_args,
+ true /* force_async */, pp->target,
&pp->wrapped_on_complete_arg);
}
@@ -704,7 +800,7 @@ static void rr_handover_locked(grpc_exec_ctx *exec_ctx,
glb_policy->pending_pings = pping->next;
GRPC_LB_POLICY_REF(glb_policy->rr_policy, "rr_handover_pending_ping");
pping->wrapped_notify_arg.rr_policy = glb_policy->rr_policy;
- if (grpc_lb_glb_trace) {
+ if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
gpr_log(GPR_INFO, "Pending ping about to PING from 0x%" PRIxPTR "",
(intptr_t)glb_policy->rr_policy);
}
@@ -857,16 +953,29 @@ static grpc_lb_policy *glb_create(grpc_exec_ctx *exec_ctx,
GPR_ASSERT(uri->path[0] != '\0');
glb_policy->server_name =
gpr_strdup(uri->path[0] == '/' ? uri->path + 1 : uri->path);
- if (grpc_lb_glb_trace) {
+ if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
gpr_log(GPR_INFO, "Will use '%s' as the server name for LB request.",
glb_policy->server_name);
}
grpc_uri_destroy(uri);
glb_policy->cc_factory = args->client_channel_factory;
- glb_policy->args = grpc_channel_args_copy(args->args);
GPR_ASSERT(glb_policy->cc_factory != NULL);
+ arg = grpc_channel_args_find(args->args, GRPC_ARG_GRPCLB_CALL_TIMEOUT_MS);
+ glb_policy->lb_call_timeout_ms =
+ grpc_channel_arg_get_integer(arg, (grpc_integer_options){0, 0, INT_MAX});
+
+ // Make sure that GRPC_ARG_LB_POLICY_NAME is set in channel args,
+ // since we use this to trigger the client_load_reporting filter.
+ grpc_arg new_arg;
+ new_arg.key = GRPC_ARG_LB_POLICY_NAME;
+ new_arg.type = GRPC_ARG_STRING;
+ new_arg.value.string = "grpclb";
+ static const char *args_to_remove[] = {GRPC_ARG_LB_POLICY_NAME};
+ glb_policy->args = grpc_channel_args_copy_and_add_and_remove(
+ args->args, args_to_remove, GPR_ARRAY_SIZE(args_to_remove), &new_arg, 1);
+
grpc_slice_hash_table *targets_info = NULL;
/* Create a client channel over them to communicate with a LB service */
char *lb_service_target_addresses =
@@ -880,6 +989,8 @@ static grpc_lb_policy *glb_create(grpc_exec_ctx *exec_ctx,
grpc_channel_args_destroy(exec_ctx, lb_channel_args);
gpr_free(lb_service_target_addresses);
if (glb_policy->lb_channel == NULL) {
+ gpr_free((void *)glb_policy->server_name);
+ grpc_channel_args_destroy(exec_ctx, glb_policy->args);
gpr_free(glb_policy);
return NULL;
}
@@ -895,6 +1006,9 @@ static void glb_destroy(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) {
GPR_ASSERT(glb_policy->pending_pings == NULL);
gpr_free((void *)glb_policy->server_name);
grpc_channel_args_destroy(exec_ctx, glb_policy->args);
+ if (glb_policy->client_stats != NULL) {
+ grpc_grpclb_client_stats_unref(glb_policy->client_stats);
+ }
grpc_channel_destroy(glb_policy->lb_channel);
glb_policy->lb_channel = NULL;
grpc_connectivity_state_destroy(exec_ctx, &glb_policy->state_tracker);
@@ -1011,7 +1125,8 @@ static void glb_exit_idle_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) {
static int glb_pick_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
const grpc_lb_policy_pick_args *pick_args,
- grpc_connected_subchannel **target, void **user_data,
+ grpc_connected_subchannel **target,
+ grpc_call_context_element *context, void **user_data,
grpc_closure *on_complete) {
if (pick_args->lb_token_mdelem_storage == NULL) {
*target = NULL;
@@ -1023,11 +1138,10 @@ static int glb_pick_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
}
glb_lb_policy *glb_policy = (glb_lb_policy *)pol;
- glb_policy->deadline = pick_args->deadline;
bool pick_done;
if (glb_policy->rr_policy != NULL) {
- if (grpc_lb_glb_trace) {
+ if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
gpr_log(GPR_INFO, "grpclb %p about to PICK from RR %p",
(void *)glb_policy, (void *)glb_policy->rr_policy);
}
@@ -1039,20 +1153,25 @@ static int glb_pick_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
grpc_schedule_on_exec_ctx);
wc_arg->rr_policy = glb_policy->rr_policy;
wc_arg->target = target;
+ wc_arg->context = context;
+ GPR_ASSERT(glb_policy->client_stats != NULL);
+ wc_arg->client_stats =
+ grpc_grpclb_client_stats_ref(glb_policy->client_stats);
wc_arg->wrapped_closure = on_complete;
wc_arg->lb_token_mdelem_storage = pick_args->lb_token_mdelem_storage;
wc_arg->initial_metadata = pick_args->initial_metadata;
wc_arg->free_when_done = wc_arg;
- pick_done = pick_from_internal_rr_locked(exec_ctx, glb_policy->rr_policy,
- pick_args, target, wc_arg);
+ pick_done =
+ pick_from_internal_rr_locked(exec_ctx, glb_policy, pick_args,
+ false /* force_async */, target, wc_arg);
} else {
- if (grpc_lb_glb_trace) {
+ if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
gpr_log(GPR_DEBUG,
"No RR policy in grpclb instance %p. Adding to grpclb's pending "
"picks",
(void *)(glb_policy));
}
- add_pending_pick(&glb_policy->pending_picks, pick_args, target,
+ add_pending_pick(&glb_policy->pending_picks, pick_args, target, context,
on_complete);
if (!glb_policy->started_picking) {
@@ -1093,6 +1212,104 @@ static void glb_notify_on_state_change_locked(grpc_exec_ctx *exec_ctx,
exec_ctx, &glb_policy->state_tracker, current, notify);
}
+static void send_client_load_report_locked(grpc_exec_ctx *exec_ctx, void *arg,
+ grpc_error *error);
+
+static void schedule_next_client_load_report(grpc_exec_ctx *exec_ctx,
+ glb_lb_policy *glb_policy) {
+ const gpr_timespec now = gpr_now(GPR_CLOCK_MONOTONIC);
+ const gpr_timespec next_client_load_report_time =
+ gpr_time_add(now, glb_policy->client_stats_report_interval);
+ grpc_closure_init(&glb_policy->client_load_report_closure,
+ send_client_load_report_locked, glb_policy,
+ grpc_combiner_scheduler(glb_policy->base.combiner, false));
+ grpc_timer_init(exec_ctx, &glb_policy->client_load_report_timer,
+ next_client_load_report_time,
+ &glb_policy->client_load_report_closure, now);
+}
+
+static void client_load_report_done_locked(grpc_exec_ctx *exec_ctx, void *arg,
+ grpc_error *error) {
+ glb_lb_policy *glb_policy = arg;
+ grpc_byte_buffer_destroy(glb_policy->client_load_report_payload);
+ glb_policy->client_load_report_payload = NULL;
+ if (error != GRPC_ERROR_NONE || glb_policy->lb_call == NULL) {
+ glb_policy->client_load_report_timer_pending = false;
+ GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &glb_policy->base,
+ "client_load_report");
+ return;
+ }
+ schedule_next_client_load_report(exec_ctx, glb_policy);
+}
+
+static void do_send_client_load_report_locked(grpc_exec_ctx *exec_ctx,
+ glb_lb_policy *glb_policy) {
+ grpc_op op;
+ memset(&op, 0, sizeof(op));
+ op.op = GRPC_OP_SEND_MESSAGE;
+ op.data.send_message.send_message = glb_policy->client_load_report_payload;
+ grpc_closure_init(&glb_policy->client_load_report_closure,
+ client_load_report_done_locked, glb_policy,
+ grpc_combiner_scheduler(glb_policy->base.combiner, false));
+ grpc_call_error call_error = grpc_call_start_batch_and_execute(
+ exec_ctx, glb_policy->lb_call, &op, 1,
+ &glb_policy->client_load_report_closure);
+ GPR_ASSERT(GRPC_CALL_OK == call_error);
+}
+
+static bool load_report_counters_are_zero(grpc_grpclb_request *request) {
+ return request->client_stats.num_calls_started == 0 &&
+ request->client_stats.num_calls_finished == 0 &&
+ request->client_stats.num_calls_finished_with_drop_for_rate_limiting ==
+ 0 &&
+ request->client_stats
+ .num_calls_finished_with_drop_for_load_balancing == 0 &&
+ request->client_stats.num_calls_finished_with_client_failed_to_send ==
+ 0 &&
+ request->client_stats.num_calls_finished_known_received == 0;
+}
+
+static void send_client_load_report_locked(grpc_exec_ctx *exec_ctx, void *arg,
+ grpc_error *error) {
+ glb_lb_policy *glb_policy = arg;
+ if (error == GRPC_ERROR_CANCELLED || glb_policy->lb_call == NULL) {
+ glb_policy->client_load_report_timer_pending = false;
+ GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &glb_policy->base,
+ "client_load_report");
+ return;
+ }
+ // Construct message payload.
+ GPR_ASSERT(glb_policy->client_load_report_payload == NULL);
+ grpc_grpclb_request *request =
+ grpc_grpclb_load_report_request_create(glb_policy->client_stats);
+ // Skip client load report if the counters were all zero in the last
+ // report and they are still zero in this one.
+ if (load_report_counters_are_zero(request)) {
+ if (glb_policy->last_client_load_report_counters_were_zero) {
+ grpc_grpclb_request_destroy(request);
+ schedule_next_client_load_report(exec_ctx, glb_policy);
+ return;
+ }
+ glb_policy->last_client_load_report_counters_were_zero = true;
+ } else {
+ glb_policy->last_client_load_report_counters_were_zero = false;
+ }
+ grpc_slice request_payload_slice = grpc_grpclb_request_encode(request);
+ glb_policy->client_load_report_payload =
+ grpc_raw_byte_buffer_create(&request_payload_slice, 1);
+ grpc_slice_unref_internal(exec_ctx, request_payload_slice);
+ grpc_grpclb_request_destroy(request);
+ // If we've already sent the initial request, then we can go ahead and
+ // sent the load report. Otherwise, we need to wait until the initial
+ // request has been sent to send this
+ // (see lb_on_sent_initial_request_locked() below).
+ if (glb_policy->initial_request_sent) {
+ do_send_client_load_report_locked(exec_ctx, glb_policy);
+ }
+}
+
+static void lb_on_sent_initial_request_locked(grpc_exec_ctx *exec_ctx,
+ void *arg, grpc_error *error);
static void lb_on_server_status_received_locked(grpc_exec_ctx *exec_ctx,
void *arg, grpc_error *error);
static void lb_on_response_received_locked(grpc_exec_ctx *exec_ctx, void *arg,
@@ -1107,13 +1324,24 @@ static void lb_call_init_locked(grpc_exec_ctx *exec_ctx,
* glb_policy->base.interested_parties, which is comprised of the polling
* entities from \a client_channel. */
grpc_slice host = grpc_slice_from_copied_string(glb_policy->server_name);
+ gpr_timespec deadline =
+ glb_policy->lb_call_timeout_ms == 0
+ ? gpr_inf_future(GPR_CLOCK_MONOTONIC)
+ : gpr_time_add(gpr_now(GPR_CLOCK_MONOTONIC),
+ gpr_time_from_millis(glb_policy->lb_call_timeout_ms,
+ GPR_TIMESPAN));
glb_policy->lb_call = grpc_channel_create_pollset_set_call(
exec_ctx, glb_policy->lb_channel, NULL, GRPC_PROPAGATE_DEFAULTS,
glb_policy->base.interested_parties,
GRPC_MDSTR_SLASH_GRPC_DOT_LB_DOT_V1_DOT_LOADBALANCER_SLASH_BALANCELOAD,
- &host, glb_policy->deadline, NULL);
+ &host, deadline, NULL);
grpc_slice_unref_internal(exec_ctx, host);
+ if (glb_policy->client_stats != NULL) {
+ grpc_grpclb_client_stats_unref(glb_policy->client_stats);
+ }
+ glb_policy->client_stats = grpc_grpclb_client_stats_create();
+
grpc_metadata_array_init(&glb_policy->lb_initial_metadata_recv);
grpc_metadata_array_init(&glb_policy->lb_trailing_metadata_recv);
@@ -1125,6 +1353,9 @@ static void lb_call_init_locked(grpc_exec_ctx *exec_ctx,
grpc_slice_unref_internal(exec_ctx, request_payload_slice);
grpc_grpclb_request_destroy(request);
+ grpc_closure_init(&glb_policy->lb_on_sent_initial_request,
+ lb_on_sent_initial_request_locked, glb_policy,
+ grpc_combiner_scheduler(glb_policy->base.combiner, false));
grpc_closure_init(&glb_policy->lb_on_server_status_received,
lb_on_server_status_received_locked, glb_policy,
grpc_combiner_scheduler(glb_policy->base.combiner, false));
@@ -1138,6 +1369,10 @@ static void lb_call_init_locked(grpc_exec_ctx *exec_ctx,
GRPC_GRPCLB_RECONNECT_JITTER,
GRPC_GRPCLB_MIN_CONNECT_TIMEOUT_SECONDS * 1000,
GRPC_GRPCLB_RECONNECT_MAX_BACKOFF_SECONDS * 1000);
+
+ glb_policy->initial_request_sent = false;
+ glb_policy->seen_initial_response = false;
+ glb_policy->last_client_load_report_counters_were_zero = false;
}
static void lb_call_destroy_locked(grpc_exec_ctx *exec_ctx,
@@ -1151,6 +1386,10 @@ static void lb_call_destroy_locked(grpc_exec_ctx *exec_ctx,
grpc_byte_buffer_destroy(glb_policy->lb_request_payload);
grpc_slice_unref_internal(exec_ctx, glb_policy->lb_call_status_details);
+
+ if (!glb_policy->client_load_report_timer_pending) {
+ grpc_timer_cancel(exec_ctx, &glb_policy->client_load_report_timer);
+ }
}
/*
@@ -1163,7 +1402,7 @@ static void query_for_backends_locked(grpc_exec_ctx *exec_ctx,
lb_call_init_locked(exec_ctx, glb_policy);
- if (grpc_lb_glb_trace) {
+ if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
gpr_log(GPR_INFO, "Query for backends (grpclb: %p, lb_call: %p)",
(void *)glb_policy, (void *)glb_policy->lb_call);
}
@@ -1179,21 +1418,27 @@ static void query_for_backends_locked(grpc_exec_ctx *exec_ctx,
op->flags = 0;
op->reserved = NULL;
op++;
-
op->op = GRPC_OP_RECV_INITIAL_METADATA;
op->data.recv_initial_metadata.recv_initial_metadata =
&glb_policy->lb_initial_metadata_recv;
op->flags = 0;
op->reserved = NULL;
op++;
-
GPR_ASSERT(glb_policy->lb_request_payload != NULL);
op->op = GRPC_OP_SEND_MESSAGE;
op->data.send_message.send_message = glb_policy->lb_request_payload;
op->flags = 0;
op->reserved = NULL;
op++;
+ /* take a weak ref (won't prevent calling of \a glb_shutdown if the strong ref
+ * count goes to zero) to be unref'd in lb_on_sent_initial_request_locked() */
+ GRPC_LB_POLICY_WEAK_REF(&glb_policy->base, "lb_on_server_status_received");
+ call_error = grpc_call_start_batch_and_execute(
+ exec_ctx, glb_policy->lb_call, ops, (size_t)(op - ops),
+ &glb_policy->lb_on_sent_initial_request);
+ GPR_ASSERT(GRPC_CALL_OK == call_error);
+ op = ops;
op->op = GRPC_OP_RECV_STATUS_ON_CLIENT;
op->data.recv_status_on_client.trailing_metadata =
&glb_policy->lb_trailing_metadata_recv;
@@ -1225,6 +1470,19 @@ static void query_for_backends_locked(grpc_exec_ctx *exec_ctx,
GPR_ASSERT(GRPC_CALL_OK == call_error);
}
+static void lb_on_sent_initial_request_locked(grpc_exec_ctx *exec_ctx,
+ void *arg, grpc_error *error) {
+ glb_lb_policy *glb_policy = arg;
+ glb_policy->initial_request_sent = true;
+ // If we attempted to send a client load report before the initial
+ // request was sent, send the load report now.
+ if (glb_policy->client_load_report_payload != NULL) {
+ do_send_client_load_report_locked(exec_ctx, glb_policy);
+ }
+ GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &glb_policy->base,
+ "lb_on_response_received_locked");
+}
+
static void lb_on_response_received_locked(grpc_exec_ctx *exec_ctx, void *arg,
grpc_error *error) {
glb_lb_policy *glb_policy = arg;
@@ -1240,58 +1498,91 @@ static void lb_on_response_received_locked(grpc_exec_ctx *exec_ctx, void *arg,
grpc_byte_buffer_reader_init(&bbr, glb_policy->lb_response_payload);
grpc_slice response_slice = grpc_byte_buffer_reader_readall(&bbr);
grpc_byte_buffer_destroy(glb_policy->lb_response_payload);
- grpc_grpclb_serverlist *serverlist =
- grpc_grpclb_response_parse_serverlist(response_slice);
- if (serverlist != NULL) {
- GPR_ASSERT(glb_policy->lb_call != NULL);
- grpc_slice_unref_internal(exec_ctx, response_slice);
- if (grpc_lb_glb_trace) {
- gpr_log(GPR_INFO, "Serverlist with %lu servers received",
- (unsigned long)serverlist->num_servers);
- for (size_t i = 0; i < serverlist->num_servers; ++i) {
- grpc_resolved_address addr;
- parse_server(serverlist->servers[i], &addr);
- char *ipport;
- grpc_sockaddr_to_string(&ipport, &addr, false);
- gpr_log(GPR_INFO, "Serverlist[%lu]: %s", (unsigned long)i, ipport);
- gpr_free(ipport);
+
+ grpc_grpclb_initial_response *response = NULL;
+ if (!glb_policy->seen_initial_response &&
+ (response = grpc_grpclb_initial_response_parse(response_slice)) !=
+ NULL) {
+ if (response->has_client_stats_report_interval) {
+ glb_policy->client_stats_report_interval =
+ gpr_time_max(gpr_time_from_seconds(1, GPR_TIMESPAN),
+ grpc_grpclb_duration_to_timespec(
+ &response->client_stats_report_interval));
+ if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
+ gpr_log(GPR_INFO,
+ "received initial LB response message; "
+ "client load reporting interval = %" PRId64 ".%09d sec",
+ glb_policy->client_stats_report_interval.tv_sec,
+ glb_policy->client_stats_report_interval.tv_nsec);
}
+ /* take a weak ref (won't prevent calling of \a glb_shutdown() if the
+ * strong ref count goes to zero) to be unref'd in
+ * send_client_load_report() */
+ glb_policy->client_load_report_timer_pending = true;
+ GRPC_LB_POLICY_WEAK_REF(&glb_policy->base, "client_load_report");
+ schedule_next_client_load_report(exec_ctx, glb_policy);
+ } else if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
+ gpr_log(GPR_INFO,
+ "received initial LB response message; "
+ "client load reporting NOT enabled");
}
+ grpc_grpclb_initial_response_destroy(response);
+ glb_policy->seen_initial_response = true;
+ } else {
+ grpc_grpclb_serverlist *serverlist =
+ grpc_grpclb_response_parse_serverlist(response_slice);
+ if (serverlist != NULL) {
+ GPR_ASSERT(glb_policy->lb_call != NULL);
+ if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
+ gpr_log(GPR_INFO, "Serverlist with %lu servers received",
+ (unsigned long)serverlist->num_servers);
+ for (size_t i = 0; i < serverlist->num_servers; ++i) {
+ grpc_resolved_address addr;
+ parse_server(serverlist->servers[i], &addr);
+ char *ipport;
+ grpc_sockaddr_to_string(&ipport, &addr, false);
+ gpr_log(GPR_INFO, "Serverlist[%lu]: %s", (unsigned long)i, ipport);
+ gpr_free(ipport);
+ }
+ }
- /* update serverlist */
- if (serverlist->num_servers > 0) {
- if (grpc_grpclb_serverlist_equals(glb_policy->serverlist, serverlist)) {
- if (grpc_lb_glb_trace) {
+ /* update serverlist */
+ if (serverlist->num_servers > 0) {
+ if (grpc_grpclb_serverlist_equals(glb_policy->serverlist,
+ serverlist)) {
+ if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
+ gpr_log(GPR_INFO,
+ "Incoming server list identical to current, ignoring.");
+ }
+ grpc_grpclb_destroy_serverlist(serverlist);
+ } else { /* new serverlist */
+ if (glb_policy->serverlist != NULL) {
+ /* dispose of the old serverlist */
+ grpc_grpclb_destroy_serverlist(glb_policy->serverlist);
+ }
+ /* and update the copy in the glb_lb_policy instance. This
+ * serverlist instance will be destroyed either upon the next
+ * update or in glb_destroy() */
+ glb_policy->serverlist = serverlist;
+ glb_policy->serverlist_index = 0;
+ rr_handover_locked(exec_ctx, glb_policy);
+ }
+ } else {
+ if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
gpr_log(GPR_INFO,
- "Incoming server list identical to current, ignoring.");
+ "Received empty server list. Picks will stay pending until "
+ "a response with > 0 servers is received");
}
grpc_grpclb_destroy_serverlist(serverlist);
- } else { /* new serverlist */
- if (glb_policy->serverlist != NULL) {
- /* dispose of the old serverlist */
- grpc_grpclb_destroy_serverlist(glb_policy->serverlist);
- }
- /* and update the copy in the glb_lb_policy instance. This serverlist
- * instance will be destroyed either upon the next update or in
- * glb_destroy() */
- glb_policy->serverlist = serverlist;
-
- rr_handover_locked(exec_ctx, glb_policy);
- }
- } else {
- if (grpc_lb_glb_trace) {
- gpr_log(GPR_INFO,
- "Received empty server list. Picks will stay pending until a "
- "response with > 0 servers is received");
}
- grpc_grpclb_destroy_serverlist(serverlist);
+ } else { /* serverlist == NULL */
+ gpr_log(GPR_ERROR, "Invalid LB response received: '%s'. Ignoring.",
+ grpc_dump_slice(response_slice, GPR_DUMP_ASCII | GPR_DUMP_HEX));
}
- } else { /* serverlist == NULL */
- gpr_log(GPR_ERROR, "Invalid LB response received: '%s'. Ignoring.",
- grpc_dump_slice(response_slice, GPR_DUMP_ASCII | GPR_DUMP_HEX));
- grpc_slice_unref_internal(exec_ctx, response_slice);
}
+ grpc_slice_unref_internal(exec_ctx, response_slice);
+
if (!glb_policy->shutting_down) {
/* keep listening for serverlist updates */
op->op = GRPC_OP_RECV_MESSAGE;
@@ -1319,7 +1610,7 @@ static void lb_call_on_retry_timer_locked(grpc_exec_ctx *exec_ctx, void *arg,
glb_lb_policy *glb_policy = arg;
if (!glb_policy->shutting_down) {
- if (grpc_lb_glb_trace) {
+ if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
gpr_log(GPR_INFO, "Restaring call to LB server (grpclb %p)",
(void *)glb_policy);
}
@@ -1336,7 +1627,7 @@ static void lb_on_server_status_received_locked(grpc_exec_ctx *exec_ctx,
GPR_ASSERT(glb_policy->lb_call != NULL);
- if (grpc_lb_glb_trace) {
+ if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
char *status_details =
grpc_slice_to_c_string(glb_policy->lb_call_status_details);
gpr_log(GPR_DEBUG,
@@ -1355,7 +1646,7 @@ static void lb_on_server_status_received_locked(grpc_exec_ctx *exec_ctx,
gpr_timespec now = gpr_now(GPR_CLOCK_MONOTONIC);
gpr_timespec next_try =
gpr_backoff_step(&glb_policy->lb_call_backoff_state, now);
- if (grpc_lb_glb_trace) {
+ if (GRPC_TRACER_ON(grpc_lb_glb_trace)) {
gpr_log(GPR_DEBUG, "Connection to LB server lost (grpclb: %p)...",
(void *)glb_policy);
gpr_timespec timeout = gpr_time_sub(next_try, now);
@@ -1403,9 +1694,29 @@ grpc_lb_policy_factory *grpc_glb_lb_factory_create() {
}
/* Plugin registration */
+
+// Only add client_load_reporting filter if the grpclb LB policy is used.
+static bool maybe_add_client_load_reporting_filter(
+ grpc_exec_ctx *exec_ctx, grpc_channel_stack_builder *builder, void *arg) {
+ const grpc_channel_args *args =
+ grpc_channel_stack_builder_get_channel_arguments(builder);
+ const grpc_arg *channel_arg =
+ grpc_channel_args_find(args, GRPC_ARG_LB_POLICY_NAME);
+ if (channel_arg != NULL && channel_arg->type == GRPC_ARG_STRING &&
+ strcmp(channel_arg->value.string, "grpclb") == 0) {
+ return grpc_channel_stack_builder_append_filter(
+ builder, (const grpc_channel_filter *)arg, NULL, NULL);
+ }
+ return true;
+}
+
void grpc_lb_policy_grpclb_init() {
grpc_register_lb_policy(grpc_glb_lb_factory_create());
grpc_register_tracer("glb", &grpc_lb_glb_trace);
+ grpc_channel_init_register_stage(GRPC_CLIENT_SUBCHANNEL,
+ GRPC_CHANNEL_INIT_BUILTIN_PRIORITY,
+ maybe_add_client_load_reporting_filter,
+ (void *)&grpc_client_load_reporting_filter);
}
void grpc_lb_policy_grpclb_shutdown() {}
diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.c b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.c
new file mode 100644
index 0000000000..444c03b9aa
--- /dev/null
+++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.c
@@ -0,0 +1,133 @@
+/*
+ *
+ * Copyright 2017, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h"
+
+#include <grpc/support/alloc.h>
+#include <grpc/support/atm.h>
+#include <grpc/support/sync.h>
+#include <grpc/support/useful.h>
+
+#include "src/core/lib/channel/channel_args.h"
+
+#define GRPC_ARG_GRPCLB_CLIENT_STATS "grpc.grpclb_client_stats"
+
+struct grpc_grpclb_client_stats {
+ gpr_refcount refs;
+ gpr_atm num_calls_started;
+ gpr_atm num_calls_finished;
+ gpr_atm num_calls_finished_with_drop_for_rate_limiting;
+ gpr_atm num_calls_finished_with_drop_for_load_balancing;
+ gpr_atm num_calls_finished_with_client_failed_to_send;
+ gpr_atm num_calls_finished_known_received;
+};
+
+grpc_grpclb_client_stats* grpc_grpclb_client_stats_create() {
+ grpc_grpclb_client_stats* client_stats = gpr_zalloc(sizeof(*client_stats));
+ gpr_ref_init(&client_stats->refs, 1);
+ return client_stats;
+}
+
+grpc_grpclb_client_stats* grpc_grpclb_client_stats_ref(
+ grpc_grpclb_client_stats* client_stats) {
+ gpr_ref(&client_stats->refs);
+ return client_stats;
+}
+
+void grpc_grpclb_client_stats_unref(grpc_grpclb_client_stats* client_stats) {
+ if (gpr_unref(&client_stats->refs)) {
+ gpr_free(client_stats);
+ }
+}
+
+void grpc_grpclb_client_stats_add_call_started(
+ grpc_grpclb_client_stats* client_stats) {
+ gpr_atm_full_fetch_add(&client_stats->num_calls_started, (gpr_atm)1);
+}
+
+void grpc_grpclb_client_stats_add_call_finished(
+ bool finished_with_drop_for_rate_limiting,
+ bool finished_with_drop_for_load_balancing,
+ bool finished_with_client_failed_to_send, bool finished_known_received,
+ grpc_grpclb_client_stats* client_stats) {
+ gpr_atm_full_fetch_add(&client_stats->num_calls_finished, (gpr_atm)1);
+ if (finished_with_drop_for_rate_limiting) {
+ gpr_atm_full_fetch_add(
+ &client_stats->num_calls_finished_with_drop_for_rate_limiting,
+ (gpr_atm)1);
+ }
+ if (finished_with_drop_for_load_balancing) {
+ gpr_atm_full_fetch_add(
+ &client_stats->num_calls_finished_with_drop_for_load_balancing,
+ (gpr_atm)1);
+ }
+ if (finished_with_client_failed_to_send) {
+ gpr_atm_full_fetch_add(
+ &client_stats->num_calls_finished_with_client_failed_to_send,
+ (gpr_atm)1);
+ }
+ if (finished_known_received) {
+ gpr_atm_full_fetch_add(&client_stats->num_calls_finished_known_received,
+ (gpr_atm)1);
+ }
+}
+
+static void atomic_get_and_reset_counter(int64_t* value, gpr_atm* counter) {
+ *value = (int64_t)gpr_atm_acq_load(counter);
+ gpr_atm_full_fetch_add(counter, (gpr_atm)(-*value));
+}
+
+void grpc_grpclb_client_stats_get(
+ grpc_grpclb_client_stats* client_stats, int64_t* num_calls_started,
+ int64_t* num_calls_finished,
+ int64_t* num_calls_finished_with_drop_for_rate_limiting,
+ int64_t* num_calls_finished_with_drop_for_load_balancing,
+ int64_t* num_calls_finished_with_client_failed_to_send,
+ int64_t* num_calls_finished_known_received) {
+ atomic_get_and_reset_counter(num_calls_started,
+ &client_stats->num_calls_started);
+ atomic_get_and_reset_counter(num_calls_finished,
+ &client_stats->num_calls_finished);
+ atomic_get_and_reset_counter(
+ num_calls_finished_with_drop_for_rate_limiting,
+ &client_stats->num_calls_finished_with_drop_for_rate_limiting);
+ atomic_get_and_reset_counter(
+ num_calls_finished_with_drop_for_load_balancing,
+ &client_stats->num_calls_finished_with_drop_for_load_balancing);
+ atomic_get_and_reset_counter(
+ num_calls_finished_with_client_failed_to_send,
+ &client_stats->num_calls_finished_with_client_failed_to_send);
+ atomic_get_and_reset_counter(
+ num_calls_finished_known_received,
+ &client_stats->num_calls_finished_known_received);
+}
diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h
new file mode 100644
index 0000000000..0af4a919f8
--- /dev/null
+++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h
@@ -0,0 +1,65 @@
+/*
+ *
+ * Copyright 2017, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_GRPCLB_GRPCLB_CLIENT_STATS_H
+#define GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_GRPCLB_GRPCLB_CLIENT_STATS_H
+
+#include <stdbool.h>
+
+#include <grpc/impl/codegen/grpc_types.h>
+
+typedef struct grpc_grpclb_client_stats grpc_grpclb_client_stats;
+
+grpc_grpclb_client_stats* grpc_grpclb_client_stats_create();
+grpc_grpclb_client_stats* grpc_grpclb_client_stats_ref(
+ grpc_grpclb_client_stats* client_stats);
+void grpc_grpclb_client_stats_unref(grpc_grpclb_client_stats* client_stats);
+
+void grpc_grpclb_client_stats_add_call_started(
+ grpc_grpclb_client_stats* client_stats);
+void grpc_grpclb_client_stats_add_call_finished(
+ bool finished_with_drop_for_rate_limiting,
+ bool finished_with_drop_for_load_balancing,
+ bool finished_with_client_failed_to_send, bool finished_known_received,
+ grpc_grpclb_client_stats* client_stats);
+
+void grpc_grpclb_client_stats_get(
+ grpc_grpclb_client_stats* client_stats, int64_t* num_calls_started,
+ int64_t* num_calls_finished,
+ int64_t* num_calls_finished_with_drop_for_rate_limiting,
+ int64_t* num_calls_finished_with_drop_for_load_balancing,
+ int64_t* num_calls_finished_with_client_failed_to_send,
+ int64_t* num_calls_finished_known_received);
+
+#endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_GRPCLB_GRPCLB_CLIENT_STATS_H \
+ */
diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.c b/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.c
index 87549b78f0..90e7c2efe5 100644
--- a/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.c
+++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.c
@@ -37,58 +37,83 @@
#include <grpc/support/alloc.h>
+/* invoked once for every Server in ServerList */
+static bool count_serverlist(pb_istream_t *stream, const pb_field_t *field,
+ void **arg) {
+ grpc_grpclb_serverlist *sl = *arg;
+ grpc_grpclb_server server;
+ if (!pb_decode(stream, grpc_lb_v1_Server_fields, &server)) {
+ gpr_log(GPR_ERROR, "nanopb error: %s", PB_GET_ERROR(stream));
+ return false;
+ }
+ ++sl->num_servers;
+ return true;
+}
+
typedef struct decode_serverlist_arg {
- /* The first pass counts the number of servers in the server list. The second
- * one allocates and decodes. */
- bool first_pass;
/* The decoding callback is invoked once per server in serverlist. Remember
* which index of the serverlist are we currently decoding */
size_t decoding_idx;
- /* Populated after the first pass. Number of server in the input serverlist */
- size_t num_servers;
/* The decoded serverlist */
- grpc_grpclb_server **servers;
+ grpc_grpclb_serverlist *serverlist;
} decode_serverlist_arg;
/* invoked once for every Server in ServerList */
static bool decode_serverlist(pb_istream_t *stream, const pb_field_t *field,
void **arg) {
decode_serverlist_arg *dec_arg = *arg;
- if (dec_arg->first_pass) { /* count how many server do we have */
- grpc_grpclb_server server;
- if (!pb_decode(stream, grpc_lb_v1_Server_fields, &server)) {
- gpr_log(GPR_ERROR, "nanopb error: %s", PB_GET_ERROR(stream));
- return false;
- }
- dec_arg->num_servers++;
- } else { /* second pass. Actually decode. */
- grpc_grpclb_server *server = gpr_zalloc(sizeof(grpc_grpclb_server));
- GPR_ASSERT(dec_arg->num_servers > 0);
- if (dec_arg->decoding_idx == 0) { /* first iteration of second pass */
- dec_arg->servers =
- gpr_malloc(sizeof(grpc_grpclb_server *) * dec_arg->num_servers);
- }
- if (!pb_decode(stream, grpc_lb_v1_Server_fields, server)) {
- gpr_log(GPR_ERROR, "nanopb error: %s", PB_GET_ERROR(stream));
- return false;
- }
- dec_arg->servers[dec_arg->decoding_idx++] = server;
+ GPR_ASSERT(dec_arg->serverlist->num_servers >= dec_arg->decoding_idx);
+ grpc_grpclb_server *server = gpr_zalloc(sizeof(grpc_grpclb_server));
+ if (!pb_decode(stream, grpc_lb_v1_Server_fields, server)) {
+ gpr_free(server);
+ gpr_log(GPR_ERROR, "nanopb error: %s", PB_GET_ERROR(stream));
+ return false;
}
-
+ dec_arg->serverlist->servers[dec_arg->decoding_idx++] = server;
return true;
}
grpc_grpclb_request *grpc_grpclb_request_create(const char *lb_service_name) {
grpc_grpclb_request *req = gpr_malloc(sizeof(grpc_grpclb_request));
-
- req->has_client_stats = 0; /* TODO(dgq): add support for stats once defined */
- req->has_initial_request = 1;
- req->initial_request.has_name = 1;
+ req->has_client_stats = false;
+ req->has_initial_request = true;
+ req->initial_request.has_name = true;
strncpy(req->initial_request.name, lb_service_name,
GRPC_GRPCLB_SERVICE_NAME_MAX_LENGTH);
return req;
}
+static void populate_timestamp(gpr_timespec timestamp,
+ struct _grpc_lb_v1_Timestamp *timestamp_pb) {
+ timestamp_pb->has_seconds = true;
+ timestamp_pb->seconds = timestamp.tv_sec;
+ timestamp_pb->has_nanos = true;
+ timestamp_pb->nanos = timestamp.tv_nsec;
+}
+
+grpc_grpclb_request *grpc_grpclb_load_report_request_create(
+ grpc_grpclb_client_stats *client_stats) {
+ grpc_grpclb_request *req = gpr_zalloc(sizeof(grpc_grpclb_request));
+ req->has_client_stats = true;
+ req->client_stats.has_timestamp = true;
+ populate_timestamp(gpr_now(GPR_CLOCK_REALTIME), &req->client_stats.timestamp);
+ req->client_stats.has_num_calls_started = true;
+ req->client_stats.has_num_calls_finished = true;
+ req->client_stats.has_num_calls_finished_with_drop_for_rate_limiting = true;
+ req->client_stats.has_num_calls_finished_with_drop_for_load_balancing = true;
+ req->client_stats.has_num_calls_finished_with_client_failed_to_send = true;
+ req->client_stats.has_num_calls_finished_with_client_failed_to_send = true;
+ req->client_stats.has_num_calls_finished_known_received = true;
+ grpc_grpclb_client_stats_get(
+ client_stats, &req->client_stats.num_calls_started,
+ &req->client_stats.num_calls_finished,
+ &req->client_stats.num_calls_finished_with_drop_for_rate_limiting,
+ &req->client_stats.num_calls_finished_with_drop_for_load_balancing,
+ &req->client_stats.num_calls_finished_with_client_failed_to_send,
+ &req->client_stats.num_calls_finished_known_received);
+ return req;
+}
+
grpc_slice grpc_grpclb_request_encode(const grpc_grpclb_request *request) {
size_t encoded_length;
pb_ostream_t sizestream;
@@ -122,6 +147,9 @@ grpc_grpclb_initial_response *grpc_grpclb_initial_response_parse(
gpr_log(GPR_ERROR, "nanopb error: %s", PB_GET_ERROR(&stream));
return NULL;
}
+
+ if (!res.has_initial_response) return NULL;
+
grpc_grpclb_initial_response *initial_res =
gpr_malloc(sizeof(grpc_grpclb_initial_response));
memcpy(initial_res, &res.initial_response,
@@ -132,36 +160,38 @@ grpc_grpclb_initial_response *grpc_grpclb_initial_response_parse(
grpc_grpclb_serverlist *grpc_grpclb_response_parse_serverlist(
grpc_slice encoded_grpc_grpclb_response) {
- bool status;
- decode_serverlist_arg arg;
pb_istream_t stream =
pb_istream_from_buffer(GRPC_SLICE_START_PTR(encoded_grpc_grpclb_response),
GRPC_SLICE_LENGTH(encoded_grpc_grpclb_response));
pb_istream_t stream_at_start = stream;
+ grpc_grpclb_serverlist *sl = gpr_zalloc(sizeof(grpc_grpclb_serverlist));
grpc_grpclb_response res;
memset(&res, 0, sizeof(grpc_grpclb_response));
- memset(&arg, 0, sizeof(decode_serverlist_arg));
-
- res.server_list.servers.funcs.decode = decode_serverlist;
- res.server_list.servers.arg = &arg;
- arg.first_pass = true;
- status = pb_decode(&stream, grpc_lb_v1_LoadBalanceResponse_fields, &res);
+ // First pass: count number of servers.
+ res.server_list.servers.funcs.decode = count_serverlist;
+ res.server_list.servers.arg = sl;
+ bool status = pb_decode(&stream, grpc_lb_v1_LoadBalanceResponse_fields, &res);
if (!status) {
+ gpr_free(sl);
gpr_log(GPR_ERROR, "nanopb error: %s", PB_GET_ERROR(&stream));
return NULL;
}
-
- arg.first_pass = false;
- status =
- pb_decode(&stream_at_start, grpc_lb_v1_LoadBalanceResponse_fields, &res);
- if (!status) {
- gpr_log(GPR_ERROR, "nanopb error: %s", PB_GET_ERROR(&stream));
- return NULL;
+ // Second pass: populate servers.
+ if (sl->num_servers > 0) {
+ sl->servers = gpr_zalloc(sizeof(grpc_grpclb_server *) * sl->num_servers);
+ decode_serverlist_arg decode_arg;
+ memset(&decode_arg, 0, sizeof(decode_arg));
+ decode_arg.serverlist = sl;
+ res.server_list.servers.funcs.decode = decode_serverlist;
+ res.server_list.servers.arg = &decode_arg;
+ status = pb_decode(&stream_at_start, grpc_lb_v1_LoadBalanceResponse_fields,
+ &res);
+ if (!status) {
+ grpc_grpclb_destroy_serverlist(sl);
+ gpr_log(GPR_ERROR, "nanopb error: %s", PB_GET_ERROR(&stream));
+ return NULL;
+ }
}
-
- grpc_grpclb_serverlist *sl = gpr_zalloc(sizeof(grpc_grpclb_serverlist));
- sl->num_servers = arg.num_servers;
- sl->servers = arg.servers;
if (res.server_list.has_expiration_interval) {
sl->expiration_interval = res.server_list.expiration_interval;
}
@@ -195,7 +225,7 @@ grpc_grpclb_serverlist *grpc_grpclb_serverlist_copy(
bool grpc_grpclb_serverlist_equals(const grpc_grpclb_serverlist *lhs,
const grpc_grpclb_serverlist *rhs) {
- if ((lhs == NULL) || (rhs == NULL)) {
+ if (lhs == NULL || rhs == NULL) {
return false;
}
if (lhs->num_servers != rhs->num_servers) {
@@ -243,6 +273,15 @@ int grpc_grpclb_duration_compare(const grpc_grpclb_duration *lhs,
return 0;
}
+gpr_timespec grpc_grpclb_duration_to_timespec(
+ grpc_grpclb_duration *duration_pb) {
+ gpr_timespec duration;
+ duration.tv_sec = duration_pb->has_seconds ? duration_pb->seconds : 0;
+ duration.tv_nsec = duration_pb->has_nanos ? duration_pb->nanos : 0;
+ duration.clock_type = GPR_TIMESPAN;
+ return duration;
+}
+
void grpc_grpclb_initial_response_destroy(
grpc_grpclb_initial_response *response) {
gpr_free(response);
diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h b/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h
index d014b8800c..7f596ce1f1 100644
--- a/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h
+++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h
@@ -36,6 +36,7 @@
#include <grpc/slice_buffer.h>
+#include "src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h"
#include "src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.h"
#include "src/core/ext/filters/client_channel/lb_policy_factory.h"
@@ -50,7 +51,7 @@ typedef grpc_lb_v1_LoadBalanceRequest grpc_grpclb_request;
typedef grpc_lb_v1_InitialLoadBalanceResponse grpc_grpclb_initial_response;
typedef grpc_lb_v1_Server grpc_grpclb_server;
typedef grpc_lb_v1_Duration grpc_grpclb_duration;
-typedef struct grpc_grpclb_serverlist {
+typedef struct {
grpc_grpclb_server **servers;
size_t num_servers;
grpc_grpclb_duration expiration_interval;
@@ -58,6 +59,8 @@ typedef struct grpc_grpclb_serverlist {
/** Create a request for a gRPC LB service under \a lb_service_name */
grpc_grpclb_request *grpc_grpclb_request_create(const char *lb_service_name);
+grpc_grpclb_request *grpc_grpclb_load_report_request_create(
+ grpc_grpclb_client_stats *client_stats);
/** Protocol Buffers v3-encode \a request */
grpc_slice grpc_grpclb_request_encode(const grpc_grpclb_request *request);
@@ -93,6 +96,9 @@ void grpc_grpclb_destroy_serverlist(grpc_grpclb_serverlist *serverlist);
int grpc_grpclb_duration_compare(const grpc_grpclb_duration *lhs,
const grpc_grpclb_duration *rhs);
+gpr_timespec grpc_grpclb_duration_to_timespec(
+ grpc_grpclb_duration *duration_pb);
+
/** Destroy \a initial_response */
void grpc_grpclb_initial_response_destroy(
grpc_grpclb_initial_response *response);
diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.c b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.c
index 2b77cd39b8..b1c5dfc61c 100644
--- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.c
+++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.c
@@ -189,7 +189,8 @@ static void pf_exit_idle_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) {
static int pf_pick_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
const grpc_lb_policy_pick_args *pick_args,
- grpc_connected_subchannel **target, void **user_data,
+ grpc_connected_subchannel **target,
+ grpc_call_context_element *context, void **user_data,
grpc_closure *on_complete) {
pick_first_lb_policy *p = (pick_first_lb_policy *)pol;
pending_pick *pp;
diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.c b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.c
index ff41e61b3e..7ee6ffb787 100644
--- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.c
+++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.c
@@ -74,7 +74,7 @@
typedef struct round_robin_lb_policy round_robin_lb_policy;
-int grpc_lb_round_robin_trace = 0;
+grpc_tracer_flag grpc_lb_round_robin_trace = GRPC_TRACER_INITIALIZER(false);
/** List of entities waiting for a pick.
*
@@ -99,26 +99,13 @@ typedef struct pending_pick {
grpc_closure *on_complete;
} pending_pick;
-/** List of subchannels in a connectivity READY state */
-typedef struct ready_list {
- grpc_subchannel *subchannel;
- /* references namesake entry in subchannel_data */
- void *user_data;
- struct ready_list *next;
- struct ready_list *prev;
-} ready_list;
-
typedef struct {
- /** index within policy->subchannels */
- size_t index;
/** backpointer to owning policy */
round_robin_lb_policy *policy;
/** subchannel itself */
grpc_subchannel *subchannel;
/** notification that connectivity has changed on subchannel */
grpc_closure connectivity_changed_closure;
- /** this subchannels current position in subchannel->ready_list */
- ready_list *ready_list_node;
/** last observed connectivity. Not updated by
* \a grpc_subchannel_notify_on_state_change. Used to determine the previous
* state while processing the new state in \a rr_connectivity_changed */
@@ -126,6 +113,10 @@ typedef struct {
/** current connectivity state. Updated by \a
* grpc_subchannel_notify_on_state_change */
grpc_connectivity_state curr_connectivity_state;
+ /** connectivity state to be updated by the watcher, not guarded by
+ * the combiner. Will be moved to curr_connectivity_state inside of
+ * the combiner by rr_connectivity_changed_locked(). */
+ grpc_connectivity_state pending_connectivity_state_unsafe;
/** the subchannel's target user data */
void *user_data;
/** vtable to operate over \a user_data */
@@ -141,182 +132,106 @@ struct round_robin_lb_policy {
/** all our subchannels */
size_t num_subchannels;
- subchannel_data **subchannels;
+ subchannel_data *subchannels;
- /** how many subchannels are in TRANSIENT_FAILURE */
+ /** how many subchannels are in state READY */
+ size_t num_ready;
+ /** how many subchannels are in state TRANSIENT_FAILURE */
size_t num_transient_failures;
- /** how many subchannels are IDLE */
+ /** how many subchannels are in state IDLE */
size_t num_idle;
/** have we started picking? */
- int started_picking;
+ bool started_picking;
/** are we shutting down? */
- int shutdown;
+ bool shutdown;
/** List of picks that are waiting on connectivity */
pending_pick *pending_picks;
/** our connectivity state tracker */
grpc_connectivity_state_tracker state_tracker;
- /** (Dummy) root of the doubly linked list containing READY subchannels */
- ready_list ready_list;
- /** Last pick from the ready list. */
- ready_list *ready_list_last_pick;
+ // Index into subchannels for last pick.
+ size_t last_ready_subchannel_index;
};
-/** Returns the next subchannel from the connected list or NULL if the list is
- * empty.
+/** Returns the index into p->subchannels of the next subchannel in
+ * READY state, or p->num_subchannels if no subchannel is READY.
*
- * Note that this function does *not* advance p->ready_list_last_pick. Use \a
- * advance_last_picked_locked() for that. */
-static ready_list *peek_next_connected_locked(const round_robin_lb_policy *p) {
- ready_list *selected;
- selected = p->ready_list_last_pick->next;
-
- while (selected != NULL) {
- if (selected == &p->ready_list) {
- GPR_ASSERT(selected->subchannel == NULL);
- /* skip dummy root */
- selected = selected->next;
- } else {
- GPR_ASSERT(selected->subchannel != NULL);
- return selected;
- }
+ * Note that this function does *not* update p->last_ready_subchannel_index.
+ * The caller must do that if it returns a pick. */
+static size_t get_next_ready_subchannel_index_locked(
+ const round_robin_lb_policy *p) {
+ if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) {
+ gpr_log(GPR_INFO,
+ "[RR: %p] getting next ready subchannel, "
+ "last_ready_subchannel_index=%lu",
+ p, (unsigned long)p->last_ready_subchannel_index);
}
- return NULL;
-}
-
-/** Advance the \a ready_list picking head. */
-static void advance_last_picked_locked(round_robin_lb_policy *p) {
- if (p->ready_list_last_pick->next != NULL) { /* non-empty list */
- p->ready_list_last_pick = p->ready_list_last_pick->next;
- if (p->ready_list_last_pick == &p->ready_list) {
- /* skip dummy root */
- p->ready_list_last_pick = p->ready_list_last_pick->next;
+ for (size_t i = 0; i < p->num_subchannels; ++i) {
+ const size_t index =
+ (i + p->last_ready_subchannel_index + 1) % p->num_subchannels;
+ if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) {
+ gpr_log(GPR_DEBUG, "[RR %p] checking index %lu: state=%d", p,
+ (unsigned long)index,
+ p->subchannels[index].curr_connectivity_state);
+ }
+ if (p->subchannels[index].curr_connectivity_state == GRPC_CHANNEL_READY) {
+ if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) {
+ gpr_log(GPR_DEBUG, "[RR %p] found next ready subchannel at index %lu",
+ p, (unsigned long)index);
+ }
+ return index;
}
- } else { /* should be an empty list */
- GPR_ASSERT(p->ready_list_last_pick == &p->ready_list);
- }
-
- if (grpc_lb_round_robin_trace) {
- gpr_log(GPR_DEBUG,
- "[READYLIST, RR: %p] ADVANCED LAST PICK. NOW AT NODE %p (SC %p, "
- "CSC %p)",
- (void *)p, (void *)p->ready_list_last_pick,
- (void *)p->ready_list_last_pick->subchannel,
- (void *)grpc_subchannel_get_connected_subchannel(
- p->ready_list_last_pick->subchannel));
- }
-}
-
-/** Prepends (relative to the root at p->ready_list) the connected subchannel \a
- * csc to the list of ready subchannels. */
-static ready_list *add_connected_sc_locked(round_robin_lb_policy *p,
- subchannel_data *sd) {
- ready_list *new_elem = gpr_zalloc(sizeof(ready_list));
- new_elem->subchannel = sd->subchannel;
- new_elem->user_data = sd->user_data;
- if (p->ready_list.prev == NULL) {
- /* first element */
- new_elem->next = &p->ready_list;
- new_elem->prev = &p->ready_list;
- p->ready_list.next = new_elem;
- p->ready_list.prev = new_elem;
- } else {
- new_elem->next = &p->ready_list;
- new_elem->prev = p->ready_list.prev;
- p->ready_list.prev->next = new_elem;
- p->ready_list.prev = new_elem;
}
- if (grpc_lb_round_robin_trace) {
- gpr_log(GPR_DEBUG, "[READYLIST] ADDING NODE %p (Conn. SC %p)",
- (void *)new_elem, (void *)sd->subchannel);
+ if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) {
+ gpr_log(GPR_DEBUG, "[RR %p] no subchannels in ready state", p);
}
- return new_elem;
+ return p->num_subchannels;
}
-/** Removes \a node from the list of connected subchannels */
-static void remove_disconnected_sc_locked(round_robin_lb_policy *p,
- ready_list *node) {
- if (node == NULL) {
- return;
- }
- if (node == p->ready_list_last_pick) {
- p->ready_list_last_pick = p->ready_list_last_pick->prev;
- }
-
- /* removing last item */
- if (node->next == &p->ready_list && node->prev == &p->ready_list) {
- GPR_ASSERT(p->ready_list.next == node);
- GPR_ASSERT(p->ready_list.prev == node);
- p->ready_list.next = NULL;
- p->ready_list.prev = NULL;
- } else {
- node->prev->next = node->next;
- node->next->prev = node->prev;
- }
-
- if (grpc_lb_round_robin_trace) {
- gpr_log(GPR_DEBUG, "[READYLIST] REMOVED NODE %p (SC %p)", (void *)node,
- (void *)node->subchannel);
+// Sets p->last_ready_subchannel_index to last_ready_index.
+static void update_last_ready_subchannel_index_locked(round_robin_lb_policy *p,
+ size_t last_ready_index) {
+ GPR_ASSERT(last_ready_index < p->num_subchannels);
+ p->last_ready_subchannel_index = last_ready_index;
+ if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) {
+ gpr_log(GPR_DEBUG,
+ "[RR: %p] setting last_ready_subchannel_index=%lu (SC %p, CSC %p)",
+ (void *)p, (unsigned long)last_ready_index,
+ (void *)p->subchannels[last_ready_index].subchannel,
+ (void *)grpc_subchannel_get_connected_subchannel(
+ p->subchannels[last_ready_index].subchannel));
}
-
- node->next = NULL;
- node->prev = NULL;
- node->subchannel = NULL;
-
- gpr_free(node);
-}
-
-static bool is_ready_list_empty(round_robin_lb_policy *p) {
- return p->ready_list.prev == NULL;
}
static void rr_destroy(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) {
round_robin_lb_policy *p = (round_robin_lb_policy *)pol;
- ready_list *elem;
-
- if (grpc_lb_round_robin_trace) {
+ if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) {
gpr_log(GPR_DEBUG, "Destroying Round Robin policy at %p", (void *)pol);
}
-
for (size_t i = 0; i < p->num_subchannels; i++) {
- subchannel_data *sd = p->subchannels[i];
- GRPC_SUBCHANNEL_UNREF(exec_ctx, sd->subchannel, "rr_destroy");
- if (sd->user_data != NULL) {
- GPR_ASSERT(sd->user_data_vtable != NULL);
- sd->user_data_vtable->destroy(exec_ctx, sd->user_data);
+ subchannel_data *sd = &p->subchannels[i];
+ if (sd->subchannel != NULL) {
+ GRPC_SUBCHANNEL_UNREF(exec_ctx, sd->subchannel, "rr_destroy");
+ if (sd->user_data != NULL) {
+ GPR_ASSERT(sd->user_data_vtable != NULL);
+ sd->user_data_vtable->destroy(exec_ctx, sd->user_data);
+ }
}
- gpr_free(sd);
}
-
grpc_connectivity_state_destroy(exec_ctx, &p->state_tracker);
gpr_free(p->subchannels);
-
- elem = p->ready_list.next;
- while (elem != NULL && elem != &p->ready_list) {
- ready_list *tmp;
- tmp = elem->next;
- elem->next = NULL;
- elem->prev = NULL;
- elem->subchannel = NULL;
- gpr_free(elem);
- elem = tmp;
- }
-
gpr_free(p);
}
static void rr_shutdown_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) {
round_robin_lb_policy *p = (round_robin_lb_policy *)pol;
- pending_pick *pp;
- size_t i;
-
- if (grpc_lb_round_robin_trace) {
+ if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) {
gpr_log(GPR_DEBUG, "Shutting down Round Robin policy at %p", (void *)pol);
}
-
- p->shutdown = 1;
+ p->shutdown = true;
+ pending_pick *pp;
while ((pp = p->pending_picks)) {
p->pending_picks = pp->next;
*pp->target = NULL;
@@ -328,10 +243,13 @@ static void rr_shutdown_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) {
grpc_connectivity_state_set(
exec_ctx, &p->state_tracker, GRPC_CHANNEL_SHUTDOWN,
GRPC_ERROR_CREATE_FROM_STATIC_STRING("Channel Shutdown"), "rr_shutdown");
- for (i = 0; i < p->num_subchannels; i++) {
- subchannel_data *sd = p->subchannels[i];
- grpc_subchannel_notify_on_state_change(exec_ctx, sd->subchannel, NULL, NULL,
- &sd->connectivity_changed_closure);
+ for (size_t i = 0; i < p->num_subchannels; i++) {
+ subchannel_data *sd = &p->subchannels[i];
+ if (sd->subchannel != NULL) {
+ grpc_subchannel_notify_on_state_change(exec_ctx, sd->subchannel, NULL,
+ NULL,
+ &sd->connectivity_changed_closure);
+ }
}
}
@@ -339,8 +257,7 @@ static void rr_cancel_pick_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
grpc_connected_subchannel **target,
grpc_error *error) {
round_robin_lb_policy *p = (round_robin_lb_policy *)pol;
- pending_pick *pp;
- pp = p->pending_picks;
+ pending_pick *pp = p->pending_picks;
p->pending_picks = NULL;
while (pp != NULL) {
pending_pick *next = pp->next;
@@ -364,8 +281,7 @@ static void rr_cancel_picks_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
uint32_t initial_metadata_flags_eq,
grpc_error *error) {
round_robin_lb_policy *p = (round_robin_lb_policy *)pol;
- pending_pick *pp;
- pp = p->pending_picks;
+ pending_pick *pp = p->pending_picks;
p->pending_picks = NULL;
while (pp != NULL) {
pending_pick *next = pp->next;
@@ -387,21 +303,16 @@ static void rr_cancel_picks_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
static void start_picking_locked(grpc_exec_ctx *exec_ctx,
round_robin_lb_policy *p) {
- size_t i;
- p->started_picking = 1;
-
- for (i = 0; i < p->num_subchannels; i++) {
- subchannel_data *sd = p->subchannels[i];
- /* use some sentinel value outside of the range of grpc_connectivity_state
- * to signal an undefined previous state. We won't be referring to this
- * value again and it'll be overwritten after the first call to
- * rr_connectivity_changed */
- sd->prev_connectivity_state = GRPC_CHANNEL_INIT;
- sd->curr_connectivity_state = GRPC_CHANNEL_IDLE;
- GRPC_LB_POLICY_WEAK_REF(&p->base, "rr_connectivity");
- grpc_subchannel_notify_on_state_change(
- exec_ctx, sd->subchannel, p->base.interested_parties,
- &sd->curr_connectivity_state, &sd->connectivity_changed_closure);
+ p->started_picking = true;
+ for (size_t i = 0; i < p->num_subchannels; i++) {
+ subchannel_data *sd = &p->subchannels[i];
+ if (sd->subchannel != NULL) {
+ GRPC_LB_POLICY_WEAK_REF(&p->base, "rr_connectivity");
+ grpc_subchannel_notify_on_state_change(
+ exec_ctx, sd->subchannel, p->base.interested_parties,
+ &sd->pending_connectivity_state_unsafe,
+ &sd->connectivity_changed_closure);
+ }
}
}
@@ -414,39 +325,36 @@ static void rr_exit_idle_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol) {
static int rr_pick_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
const grpc_lb_policy_pick_args *pick_args,
- grpc_connected_subchannel **target, void **user_data,
+ grpc_connected_subchannel **target,
+ grpc_call_context_element *context, void **user_data,
grpc_closure *on_complete) {
round_robin_lb_policy *p = (round_robin_lb_policy *)pol;
- pending_pick *pp;
- ready_list *selected;
-
- if (grpc_lb_round_robin_trace) {
+ if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) {
gpr_log(GPR_INFO, "Round Robin %p trying to pick", (void *)pol);
}
-
- if ((selected = peek_next_connected_locked(p))) {
+ const size_t next_ready_index = get_next_ready_subchannel_index_locked(p);
+ if (next_ready_index < p->num_subchannels) {
/* readily available, report right away */
+ subchannel_data *sd = &p->subchannels[next_ready_index];
*target = GRPC_CONNECTED_SUBCHANNEL_REF(
- grpc_subchannel_get_connected_subchannel(selected->subchannel),
- "rr_picked");
-
+ grpc_subchannel_get_connected_subchannel(sd->subchannel), "rr_picked");
if (user_data != NULL) {
- *user_data = selected->user_data;
+ *user_data = sd->user_data;
}
- if (grpc_lb_round_robin_trace) {
+ if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) {
gpr_log(GPR_DEBUG,
- "[RR PICK] TARGET <-- CONNECTED SUBCHANNEL %p (NODE %p)",
- (void *)*target, (void *)selected);
+ "[RR PICK] TARGET <-- CONNECTED SUBCHANNEL %p (INDEX %lu)",
+ (void *)*target, (unsigned long)next_ready_index);
}
/* only advance the last picked pointer if the selection was used */
- advance_last_picked_locked(p);
+ update_last_ready_subchannel_index_locked(p, next_ready_index);
return 1;
} else {
/* no pick currently available. Save for later in list of pending picks */
if (!p->started_picking) {
start_picking_locked(exec_ctx, p);
}
- pp = gpr_malloc(sizeof(*pp));
+ pending_pick *pp = gpr_malloc(sizeof(*pp));
pp->next = p->pending_picks;
pp->target = target;
pp->on_complete = on_complete;
@@ -457,25 +365,31 @@ static int rr_pick_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
}
}
-static void update_state_counters(subchannel_data *sd) {
+static void update_state_counters_locked(subchannel_data *sd) {
round_robin_lb_policy *p = sd->policy;
-
- /* update p->num_transient_failures (resp. p->num_idle): if the previous
- * state was TRANSIENT_FAILURE (resp. IDLE), decrement
- * p->num_transient_failures (resp. p->num_idle). */
- if (sd->prev_connectivity_state == GRPC_CHANNEL_TRANSIENT_FAILURE) {
+ if (sd->prev_connectivity_state == GRPC_CHANNEL_READY) {
+ GPR_ASSERT(p->num_ready > 0);
+ --p->num_ready;
+ } else if (sd->prev_connectivity_state == GRPC_CHANNEL_TRANSIENT_FAILURE) {
GPR_ASSERT(p->num_transient_failures > 0);
--p->num_transient_failures;
} else if (sd->prev_connectivity_state == GRPC_CHANNEL_IDLE) {
GPR_ASSERT(p->num_idle > 0);
--p->num_idle;
}
+ if (sd->curr_connectivity_state == GRPC_CHANNEL_READY) {
+ ++p->num_ready;
+ } else if (sd->curr_connectivity_state == GRPC_CHANNEL_TRANSIENT_FAILURE) {
+ ++p->num_transient_failures;
+ } else if (sd->curr_connectivity_state == GRPC_CHANNEL_IDLE) {
+ ++p->num_idle;
+ }
}
/* sd is the subchannel_data associted with the updated subchannel.
* shutdown_error will only be used upon policy transition to TRANSIENT_FAILURE
* or SHUTDOWN */
-static grpc_connectivity_state update_lb_connectivity_status(
+static grpc_connectivity_state update_lb_connectivity_status_locked(
grpc_exec_ctx *exec_ctx, subchannel_data *sd, grpc_error *error) {
/* In priority order. The first rule to match terminates the search (ie, if we
* are on rule n, all previous rules were unfulfilled).
@@ -497,7 +411,7 @@ static grpc_connectivity_state update_lb_connectivity_status(
* CHECK: p->num_idle == p->num_subchannels.
*/
round_robin_lb_policy *p = sd->policy;
- if (!is_ready_list_empty(p)) { /* 1) READY */
+ if (p->num_ready > 0) { /* 1) READY */
grpc_connectivity_state_set(exec_ctx, &p->state_tracker, GRPC_CHANNEL_READY,
GRPC_ERROR_NONE, "rr_ready");
return GRPC_CHANNEL_READY;
@@ -531,32 +445,62 @@ static void rr_connectivity_changed_locked(grpc_exec_ctx *exec_ctx, void *arg,
grpc_error *error) {
subchannel_data *sd = arg;
round_robin_lb_policy *p = sd->policy;
- pending_pick *pp;
-
- GRPC_ERROR_REF(error);
-
+ // Now that we're inside the combiner, copy the pending connectivity
+ // state (which was set by the connectivity state watcher) to
+ // curr_connectivity_state, which is what we use inside of the combiner.
+ sd->curr_connectivity_state = sd->pending_connectivity_state_unsafe;
+ if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) {
+ gpr_log(GPR_DEBUG,
+ "[RR %p] connectivity changed for subchannel %p: "
+ "prev_state=%d new_state=%d",
+ p, sd->subchannel, sd->prev_connectivity_state,
+ sd->curr_connectivity_state);
+ }
+ // If we're shutting down, unref and return.
if (p->shutdown) {
GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &p->base, "rr_connectivity");
- GRPC_ERROR_UNREF(error);
return;
}
- switch (sd->curr_connectivity_state) {
- case GRPC_CHANNEL_INIT:
- GPR_UNREACHABLE_CODE(return );
- case GRPC_CHANNEL_READY:
- /* add the newly connected subchannel to the list of connected ones.
- * Note that it goes to the "end of the line". */
- sd->ready_list_node = add_connected_sc_locked(p, sd);
+ // Update state counters and determine new overall state.
+ update_state_counters_locked(sd);
+ sd->prev_connectivity_state = sd->curr_connectivity_state;
+ grpc_connectivity_state new_connectivity_state =
+ update_lb_connectivity_status_locked(exec_ctx, sd, GRPC_ERROR_REF(error));
+ // If the new state is SHUTDOWN, unref the subchannel, and if the new
+ // overall state is SHUTDOWN, clean up.
+ if (sd->curr_connectivity_state == GRPC_CHANNEL_SHUTDOWN) {
+ GRPC_SUBCHANNEL_UNREF(exec_ctx, sd->subchannel, "rr_subchannel_shutdown");
+ sd->subchannel = NULL;
+ if (sd->user_data != NULL) {
+ GPR_ASSERT(sd->user_data_vtable != NULL);
+ sd->user_data_vtable->destroy(exec_ctx, sd->user_data);
+ }
+ if (new_connectivity_state == GRPC_CHANNEL_SHUTDOWN) {
+ /* the policy is shutting down. Flush all the pending picks... */
+ pending_pick *pp;
+ while ((pp = p->pending_picks)) {
+ p->pending_picks = pp->next;
+ *pp->target = NULL;
+ grpc_closure_sched(exec_ctx, pp->on_complete, GRPC_ERROR_NONE);
+ gpr_free(pp);
+ }
+ }
+ /* unref the "rr_connectivity" weak ref from start_picking */
+ GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &p->base, "rr_connectivity");
+ } else {
+ if (sd->curr_connectivity_state == GRPC_CHANNEL_READY) {
/* at this point we know there's at least one suitable subchannel. Go
* ahead and pick one and notify the pending suitors in
* p->pending_picks. This preemtively replicates rr_pick()'s actions. */
- ready_list *selected = peek_next_connected_locked(p);
- GPR_ASSERT(selected != NULL);
+ const size_t next_ready_index = get_next_ready_subchannel_index_locked(p);
+ GPR_ASSERT(next_ready_index < p->num_subchannels);
+ subchannel_data *selected = &p->subchannels[next_ready_index];
if (p->pending_picks != NULL) {
/* if the selected subchannel is going to be used for the pending
* picks, update the last picked pointer */
- advance_last_picked_locked(p);
+ update_last_ready_subchannel_index_locked(p, next_ready_index);
}
+ pending_pick *pp;
while ((pp = p->pending_picks)) {
p->pending_picks = pp->next;
*pp->target = GRPC_CONNECTED_SUBCHANNEL_REF(
@@ -565,74 +509,22 @@ static void rr_connectivity_changed_locked(grpc_exec_ctx *exec_ctx, void *arg,
if (pp->user_data != NULL) {
*pp->user_data = selected->user_data;
}
- if (grpc_lb_round_robin_trace) {
+ if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) {
gpr_log(GPR_DEBUG,
- "[RR CONN CHANGED] TARGET <-- SUBCHANNEL %p (NODE %p)",
- (void *)selected->subchannel, (void *)selected);
+ "[RR CONN CHANGED] TARGET <-- SUBCHANNEL %p (INDEX %lu)",
+ (void *)selected->subchannel,
+ (unsigned long)next_ready_index);
}
grpc_closure_sched(exec_ctx, pp->on_complete, GRPC_ERROR_NONE);
gpr_free(pp);
}
- update_lb_connectivity_status(exec_ctx, sd, error);
- sd->prev_connectivity_state = sd->curr_connectivity_state;
- /* renew notification: reuses the "rr_connectivity" weak ref */
- grpc_subchannel_notify_on_state_change(
- exec_ctx, sd->subchannel, p->base.interested_parties,
- &sd->curr_connectivity_state, &sd->connectivity_changed_closure);
- break;
- case GRPC_CHANNEL_IDLE:
- ++p->num_idle;
- /* fallthrough */
- case GRPC_CHANNEL_CONNECTING:
- update_state_counters(sd);
- update_lb_connectivity_status(exec_ctx, sd, error);
- sd->prev_connectivity_state = sd->curr_connectivity_state;
- /* renew notification: reuses the "rr_connectivity" weak ref */
- grpc_subchannel_notify_on_state_change(
- exec_ctx, sd->subchannel, p->base.interested_parties,
- &sd->curr_connectivity_state, &sd->connectivity_changed_closure);
- break;
- case GRPC_CHANNEL_TRANSIENT_FAILURE:
- ++p->num_transient_failures;
- /* remove from ready list if still present */
- if (sd->ready_list_node != NULL) {
- remove_disconnected_sc_locked(p, sd->ready_list_node);
- sd->ready_list_node = NULL;
- }
- update_lb_connectivity_status(exec_ctx, sd, error);
- sd->prev_connectivity_state = sd->curr_connectivity_state;
- /* renew notification: reuses the "rr_connectivity" weak ref */
- grpc_subchannel_notify_on_state_change(
- exec_ctx, sd->subchannel, p->base.interested_parties,
- &sd->curr_connectivity_state, &sd->connectivity_changed_closure);
- break;
- case GRPC_CHANNEL_SHUTDOWN:
- update_state_counters(sd);
- if (sd->ready_list_node != NULL) {
- remove_disconnected_sc_locked(p, sd->ready_list_node);
- sd->ready_list_node = NULL;
- }
- --p->num_subchannels;
- GPR_SWAP(subchannel_data *, p->subchannels[sd->index],
- p->subchannels[p->num_subchannels]);
- GRPC_SUBCHANNEL_UNREF(exec_ctx, sd->subchannel, "rr_subchannel_shutdown");
- p->subchannels[sd->index]->index = sd->index;
- if (update_lb_connectivity_status(exec_ctx, sd, error) ==
- GRPC_CHANNEL_SHUTDOWN) {
- /* the policy is shutting down. Flush all the pending picks... */
- while ((pp = p->pending_picks)) {
- p->pending_picks = pp->next;
- *pp->target = NULL;
- grpc_closure_sched(exec_ctx, pp->on_complete, GRPC_ERROR_NONE);
- gpr_free(pp);
- }
- }
- gpr_free(sd);
- /* unref the "rr_connectivity" weak ref from start_picking */
- GRPC_LB_POLICY_WEAK_UNREF(exec_ctx, &p->base, "rr_connectivity");
- break;
+ }
+ /* renew notification: reuses the "rr_connectivity" weak ref */
+ grpc_subchannel_notify_on_state_change(
+ exec_ctx, sd->subchannel, p->base.interested_parties,
+ &sd->pending_connectivity_state_unsafe,
+ &sd->connectivity_changed_closure);
}
- GRPC_ERROR_UNREF(error);
}
static grpc_connectivity_state rr_check_connectivity_locked(
@@ -653,10 +545,10 @@ static void rr_notify_on_state_change_locked(grpc_exec_ctx *exec_ctx,
static void rr_ping_one_locked(grpc_exec_ctx *exec_ctx, grpc_lb_policy *pol,
grpc_closure *closure) {
round_robin_lb_policy *p = (round_robin_lb_policy *)pol;
- ready_list *selected;
- grpc_connected_subchannel *target;
- if ((selected = peek_next_connected_locked(p))) {
- target = GRPC_CONNECTED_SUBCHANNEL_REF(
+ const size_t next_ready_index = get_next_ready_subchannel_index_locked(p);
+ if (next_ready_index < p->num_subchannels) {
+ subchannel_data *selected = &p->subchannels[next_ready_index];
+ grpc_connected_subchannel *target = GRPC_CONNECTED_SUBCHANNEL_REF(
grpc_subchannel_get_connected_subchannel(selected->subchannel),
"rr_picked");
grpc_connected_subchannel_ping(exec_ctx, target, closure);
@@ -707,7 +599,7 @@ static grpc_lb_policy *round_robin_create(grpc_exec_ctx *exec_ctx,
p->subchannels = gpr_zalloc(sizeof(*p->subchannels) * num_addrs);
grpc_subchannel_args sc_args;
- size_t subchannel_idx = 0;
+ size_t subchannel_index = 0;
for (size_t i = 0; i < addresses->num_addresses; i++) {
/* Skip balancer addresses, since we only know how to handle backends. */
if (addresses->addresses[i].is_balancer) continue;
@@ -723,51 +615,53 @@ static grpc_lb_policy *round_robin_create(grpc_exec_ctx *exec_ctx,
sc_args.args = new_args;
grpc_subchannel *subchannel = grpc_client_channel_factory_create_subchannel(
exec_ctx, args->client_channel_factory, &sc_args);
- if (grpc_lb_round_robin_trace) {
+ if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) {
char *address_uri =
grpc_sockaddr_to_uri(&addresses->addresses[i].address);
- gpr_log(GPR_DEBUG, "Created subchannel %p for address uri %s",
- (void *)subchannel, address_uri);
+ gpr_log(GPR_DEBUG, "index %lu: Created subchannel %p for address uri %s",
+ (unsigned long)subchannel_index, (void *)subchannel, address_uri);
gpr_free(address_uri);
}
grpc_channel_args_destroy(exec_ctx, new_args);
if (subchannel != NULL) {
- subchannel_data *sd = gpr_zalloc(sizeof(*sd));
- p->subchannels[subchannel_idx] = sd;
+ subchannel_data *sd = &p->subchannels[subchannel_index];
sd->policy = p;
- sd->index = subchannel_idx;
sd->subchannel = subchannel;
+ /* use some sentinel value outside of the range of grpc_connectivity_state
+ * to signal an undefined previous state. We won't be referring to this
+ * value again and it'll be overwritten after the first call to
+ * rr_connectivity_changed */
+ sd->prev_connectivity_state = GRPC_CHANNEL_INIT;
+ sd->curr_connectivity_state = GRPC_CHANNEL_IDLE;
sd->user_data_vtable = addresses->user_data_vtable;
if (sd->user_data_vtable != NULL) {
sd->user_data =
sd->user_data_vtable->copy(addresses->addresses[i].user_data);
}
- ++subchannel_idx;
grpc_closure_init(&sd->connectivity_changed_closure,
rr_connectivity_changed_locked, sd,
grpc_combiner_scheduler(args->combiner, false));
+ ++subchannel_index;
}
}
- if (subchannel_idx == 0) {
+ if (subchannel_index == 0) {
/* couldn't create any subchannel. Bail out */
gpr_free(p->subchannels);
gpr_free(p);
return NULL;
}
- p->num_subchannels = subchannel_idx;
+ p->num_subchannels = subchannel_index;
- /* The (dummy node) root of the ready list */
- p->ready_list.subchannel = NULL;
- p->ready_list.prev = NULL;
- p->ready_list.next = NULL;
- p->ready_list_last_pick = &p->ready_list;
+ // Initialize the last pick index to the last subchannel, so that the
+ // first pick will start at the beginning of the list.
+ p->last_ready_subchannel_index = subchannel_index - 1;
grpc_lb_policy_init(&p->base, &round_robin_lb_policy_vtable, args->combiner);
grpc_connectivity_state_init(&p->state_tracker, GRPC_CHANNEL_IDLE,
"round_robin");
- if (grpc_lb_round_robin_trace) {
+ if (GRPC_TRACER_ON(grpc_lb_round_robin_trace)) {
gpr_log(GPR_DEBUG, "Created RR policy at %p with %lu subchannels",
(void *)p, (unsigned long)p->num_subchannels);
}
diff --git a/src/core/ext/filters/client_channel/parse_address.c b/src/core/ext/filters/client_channel/parse_address.c
index edc6ce697d..18381eec55 100644
--- a/src/core/ext/filters/client_channel/parse_address.c
+++ b/src/core/ext/filters/client_channel/parse_address.c
@@ -57,11 +57,11 @@ bool grpc_parse_unix(const grpc_uri *uri,
struct sockaddr_un *un = (struct sockaddr_un *)resolved_addr->addr;
const size_t maxlen = sizeof(un->sun_path);
const size_t path_len = strnlen(uri->path, maxlen);
- if (path_len == maxlen) return 0;
+ if (path_len == maxlen) return false;
un->sun_family = AF_UNIX;
strcpy(un->sun_path, uri->path);
resolved_addr->len = sizeof(*un);
- return 1;
+ return true;
}
#else /* GRPC_HAVE_UNIX_SOCKET */
@@ -73,74 +73,65 @@ bool grpc_parse_unix(const grpc_uri *uri,
#endif /* GRPC_HAVE_UNIX_SOCKET */
-bool grpc_parse_ipv4(const grpc_uri *uri,
- grpc_resolved_address *resolved_addr) {
- if (strcmp("ipv4", uri->scheme) != 0) {
- gpr_log(GPR_ERROR, "Expected 'ipv4' scheme, got '%s'", uri->scheme);
- return false;
- }
- const char *host_port = uri->path;
+bool grpc_parse_ipv4_hostport(const char *hostport, grpc_resolved_address *addr,
+ bool log_errors) {
+ bool success = false;
+ // Split host and port.
char *host;
char *port;
- int port_num;
- bool result = false;
- struct sockaddr_in *in = (struct sockaddr_in *)resolved_addr->addr;
-
- if (*host_port == '/') ++host_port;
- if (!gpr_split_host_port(host_port, &host, &port)) {
- return false;
- }
-
- memset(resolved_addr, 0, sizeof(grpc_resolved_address));
- resolved_addr->len = sizeof(struct sockaddr_in);
+ if (!gpr_split_host_port(hostport, &host, &port)) return false;
+ // Parse IP address.
+ memset(addr, 0, sizeof(*addr));
+ addr->len = sizeof(struct sockaddr_in);
+ struct sockaddr_in *in = (struct sockaddr_in *)addr->addr;
in->sin_family = AF_INET;
if (inet_pton(AF_INET, host, &in->sin_addr) == 0) {
- gpr_log(GPR_ERROR, "invalid ipv4 address: '%s'", host);
+ if (log_errors) gpr_log(GPR_ERROR, "invalid ipv4 address: '%s'", host);
goto done;
}
-
- if (port != NULL) {
- if (sscanf(port, "%d", &port_num) != 1 || port_num < 0 ||
- port_num > 65535) {
- gpr_log(GPR_ERROR, "invalid ipv4 port: '%s'", port);
- goto done;
- }
- in->sin_port = htons((uint16_t)port_num);
- } else {
- gpr_log(GPR_ERROR, "no port given for ipv4 scheme");
+ // Parse port.
+ if (port == NULL) {
+ if (log_errors) gpr_log(GPR_ERROR, "no port given for ipv4 scheme");
goto done;
}
-
- result = true;
+ int port_num;
+ if (sscanf(port, "%d", &port_num) != 1 || port_num < 0 || port_num > 65535) {
+ if (log_errors) gpr_log(GPR_ERROR, "invalid ipv4 port: '%s'", port);
+ goto done;
+ }
+ in->sin_port = htons((uint16_t)port_num);
+ success = true;
done:
gpr_free(host);
gpr_free(port);
- return result;
+ return success;
}
-bool grpc_parse_ipv6(const grpc_uri *uri,
+bool grpc_parse_ipv4(const grpc_uri *uri,
grpc_resolved_address *resolved_addr) {
- if (strcmp("ipv6", uri->scheme) != 0) {
- gpr_log(GPR_ERROR, "Expected 'ipv6' scheme, got '%s'", uri->scheme);
+ if (strcmp("ipv4", uri->scheme) != 0) {
+ gpr_log(GPR_ERROR, "Expected 'ipv4' scheme, got '%s'", uri->scheme);
return false;
}
const char *host_port = uri->path;
- char *host;
- char *port;
- int port_num;
- int result = 0;
- struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)resolved_addr->addr;
-
if (*host_port == '/') ++host_port;
- if (!gpr_split_host_port(host_port, &host, &port)) {
- return 0;
- }
+ return grpc_parse_ipv4_hostport(host_port, resolved_addr,
+ true /* log_errors */);
+}
- memset(in6, 0, sizeof(*in6));
- resolved_addr->len = sizeof(*in6);
+bool grpc_parse_ipv6_hostport(const char *hostport, grpc_resolved_address *addr,
+ bool log_errors) {
+ bool success = false;
+ // Split host and port.
+ char *host;
+ char *port;
+ if (!gpr_split_host_port(hostport, &host, &port)) return false;
+ // Parse IP address.
+ memset(addr, 0, sizeof(*addr));
+ addr->len = sizeof(struct sockaddr_in6);
+ struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)addr->addr;
in6->sin6_family = AF_INET6;
-
- /* Handle the RFC6874 syntax for IPv6 zone identifiers. */
+ // Handle the RFC6874 syntax for IPv6 zone identifiers.
char *host_end = (char *)gpr_memrchr(host, '%', strlen(host));
if (host_end != NULL) {
GPR_ASSERT(host_end >= host);
@@ -159,7 +150,7 @@ bool grpc_parse_ipv6(const grpc_uri *uri,
gpr_log(GPR_ERROR, "invalid ipv6 scope id: '%s'", host_end + 1);
goto done;
}
- // Handle "sin6_scope_id" being type "u_long". See grpc issue ##10027.
+ // Handle "sin6_scope_id" being type "u_long". See grpc issue #10027.
in6->sin6_scope_id = sin6_scope_id;
} else {
if (inet_pton(AF_INET6, host, &in6->sin6_addr) == 0) {
@@ -167,24 +158,34 @@ bool grpc_parse_ipv6(const grpc_uri *uri,
goto done;
}
}
-
- if (port != NULL) {
- if (sscanf(port, "%d", &port_num) != 1 || port_num < 0 ||
- port_num > 65535) {
- gpr_log(GPR_ERROR, "invalid ipv6 port: '%s'", port);
- goto done;
- }
- in6->sin6_port = htons((uint16_t)port_num);
- } else {
- gpr_log(GPR_ERROR, "no port given for ipv6 scheme");
+ // Parse port.
+ if (port == NULL) {
+ if (log_errors) gpr_log(GPR_ERROR, "no port given for ipv6 scheme");
goto done;
}
-
- result = 1;
+ int port_num;
+ if (sscanf(port, "%d", &port_num) != 1 || port_num < 0 || port_num > 65535) {
+ if (log_errors) gpr_log(GPR_ERROR, "invalid ipv6 port: '%s'", port);
+ goto done;
+ }
+ in6->sin6_port = htons((uint16_t)port_num);
+ success = true;
done:
gpr_free(host);
gpr_free(port);
- return result;
+ return success;
+}
+
+bool grpc_parse_ipv6(const grpc_uri *uri,
+ grpc_resolved_address *resolved_addr) {
+ if (strcmp("ipv6", uri->scheme) != 0) {
+ gpr_log(GPR_ERROR, "Expected 'ipv6' scheme, got '%s'", uri->scheme);
+ return false;
+ }
+ const char *host_port = uri->path;
+ if (*host_port == '/') ++host_port;
+ return grpc_parse_ipv6_hostport(host_port, resolved_addr,
+ true /* log_errors */);
}
bool grpc_parse_uri(const grpc_uri *uri, grpc_resolved_address *resolved_addr) {
diff --git a/src/core/ext/filters/client_channel/parse_address.h b/src/core/ext/filters/client_channel/parse_address.h
index fa7ea33a00..1a203a3b26 100644
--- a/src/core/ext/filters/client_channel/parse_address.h
+++ b/src/core/ext/filters/client_channel/parse_address.h
@@ -54,4 +54,10 @@ bool grpc_parse_ipv6(const grpc_uri *uri, grpc_resolved_address *resolved_addr);
/** Populate \a resolved_addr from \a uri. Returns true upon success. */
bool grpc_parse_uri(const grpc_uri *uri, grpc_resolved_address *resolved_addr);
+/** Parse bare IPv4 or IPv6 "IP:port" strings. */
+bool grpc_parse_ipv4_hostport(const char *hostport, grpc_resolved_address *addr,
+ bool log_errors);
+bool grpc_parse_ipv6_hostport(const char *hostport, grpc_resolved_address *addr,
+ bool log_errors);
+
#endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_PARSE_ADDRESS_H */
diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.c b/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.c
index ffaeeed324..578e8d697f 100644
--- a/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.c
+++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.c
@@ -61,6 +61,8 @@
typedef struct {
/** base class: must be first */
grpc_resolver base;
+ /** DNS server to use (if not system default) */
+ char *dns_server;
/** name to resolve (usually the same as target_name) */
char *name_to_resolve;
/** default port to use */
@@ -172,6 +174,8 @@ static void dns_ares_on_resolved_locked(grpc_exec_ctx *exec_ctx, void *arg,
grpc_resolved_addresses_destroy(r->addresses);
grpc_lb_addresses_destroy(exec_ctx, addresses);
} else {
+ const char *msg = grpc_error_string(error);
+ gpr_log(GPR_DEBUG, "dns resolution failed: %s", msg);
gpr_timespec now = gpr_now(GPR_CLOCK_MONOTONIC);
gpr_timespec next_try = gpr_backoff_step(&r->backoff_state, now);
gpr_timespec timeout = gpr_time_sub(next_try, now);
@@ -221,9 +225,9 @@ static void dns_ares_start_resolving_locked(grpc_exec_ctx *exec_ctx,
GPR_ASSERT(!r->resolving);
r->resolving = true;
r->addresses = NULL;
- grpc_resolve_address(exec_ctx, r->name_to_resolve, r->default_port,
- r->interested_parties, &r->dns_ares_on_resolved_locked,
- &r->addresses);
+ grpc_dns_lookup_ares(exec_ctx, r->dns_server, r->name_to_resolve,
+ r->default_port, r->interested_parties,
+ &r->dns_ares_on_resolved_locked, &r->addresses);
}
static void dns_ares_maybe_finish_next_locked(grpc_exec_ctx *exec_ctx,
@@ -246,6 +250,7 @@ static void dns_ares_destroy(grpc_exec_ctx *exec_ctx, grpc_resolver *gr) {
grpc_channel_args_destroy(exec_ctx, r->resolved_result);
}
grpc_pollset_set_destroy(exec_ctx, r->interested_parties);
+ gpr_free(r->dns_server);
gpr_free(r->name_to_resolve);
gpr_free(r->default_port);
grpc_channel_args_destroy(exec_ctx, r->channel_args);
@@ -257,14 +262,13 @@ static grpc_resolver *dns_ares_create(grpc_exec_ctx *exec_ctx,
const char *default_port) {
// Get name from args.
const char *path = args->uri->path;
- if (0 != strcmp(args->uri->authority, "")) {
- gpr_log(GPR_ERROR, "authority based dns uri's not supported");
- return NULL;
- }
if (path[0] == '/') ++path;
// Create resolver.
ares_dns_resolver *r = gpr_zalloc(sizeof(ares_dns_resolver));
grpc_resolver_init(&r->base, &dns_ares_resolver_vtable, args->combiner);
+ if (0 != strcmp(args->uri->authority, "")) {
+ r->dns_server = gpr_strdup(args->uri->authority);
+ }
r->name_to_resolve = gpr_strdup(path);
r->default_port = gpr_strdup(default_port);
r->channel_args = grpc_channel_args_copy(args->args);
diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.c b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.c
index 09c46a66e0..e0cfd8b629 100644
--- a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.c
+++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.c
@@ -48,7 +48,10 @@
#include <grpc/support/string_util.h>
#include <grpc/support/time.h>
#include <grpc/support/useful.h>
+
+#include "src/core/ext/filters/client_channel/parse_address.h"
#include "src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver.h"
+#include "src/core/lib/iomgr/error.h"
#include "src/core/lib/iomgr/executor.h"
#include "src/core/lib/iomgr/iomgr_internal.h"
#include "src/core/lib/iomgr/sockaddr_utils.h"
@@ -58,6 +61,8 @@ static gpr_once g_basic_init = GPR_ONCE_INIT;
static gpr_mu g_init_mu;
typedef struct grpc_ares_request {
+ /** indicates the DNS server to use, if specified */
+ struct ares_addr_port_node dns_server_addr;
/** following members are set in grpc_resolve_address_ares_impl */
/** host to resolve, parsed from the name to resolve */
char *host;
@@ -192,11 +197,12 @@ static void on_done_cb(void *arg, int status, int timeouts,
grpc_ares_request_unref(NULL, r);
}
-void grpc_resolve_address_ares_impl(grpc_exec_ctx *exec_ctx, const char *name,
- const char *default_port,
- grpc_pollset_set *interested_parties,
- grpc_closure *on_done,
- grpc_resolved_addresses **addrs) {
+void grpc_dns_lookup_ares(grpc_exec_ctx *exec_ctx, const char *dns_server,
+ const char *name, const char *default_port,
+ grpc_pollset_set *interested_parties,
+ grpc_closure *on_done,
+ grpc_resolved_addresses **addrs) {
+ grpc_error *error = GRPC_ERROR_NONE;
/* TODO(zyc): Enable tracing after #9603 is checked in */
/* if (grpc_dns_trace) {
gpr_log(GPR_DEBUG, "resolve_address (blocking): name=%s, default_port=%s",
@@ -208,28 +214,23 @@ void grpc_resolve_address_ares_impl(grpc_exec_ctx *exec_ctx, const char *name,
char *port;
gpr_split_host_port(name, &host, &port);
if (host == NULL) {
- grpc_error *err = grpc_error_set_str(
+ error = grpc_error_set_str(
GRPC_ERROR_CREATE_FROM_STATIC_STRING("unparseable host:port"),
GRPC_ERROR_STR_TARGET_ADDRESS, grpc_slice_from_copied_string(name));
- grpc_closure_sched(exec_ctx, on_done, err);
goto error_cleanup;
} else if (port == NULL) {
if (default_port == NULL) {
- grpc_error *err = grpc_error_set_str(
+ error = grpc_error_set_str(
GRPC_ERROR_CREATE_FROM_STATIC_STRING("no port in name"),
GRPC_ERROR_STR_TARGET_ADDRESS, grpc_slice_from_copied_string(name));
- grpc_closure_sched(exec_ctx, on_done, err);
goto error_cleanup;
}
port = gpr_strdup(default_port);
}
grpc_ares_ev_driver *ev_driver;
- grpc_error *err = grpc_ares_ev_driver_create(&ev_driver, interested_parties);
- if (err != GRPC_ERROR_NONE) {
- GRPC_LOG_IF_ERROR("grpc_ares_ev_driver_create() failed", err);
- goto error_cleanup;
- }
+ error = grpc_ares_ev_driver_create(&ev_driver, interested_parties);
+ if (error != GRPC_ERROR_NONE) goto error_cleanup;
grpc_ares_request *r = gpr_malloc(sizeof(grpc_ares_request));
gpr_mu_init(&r->mu);
@@ -242,6 +243,40 @@ void grpc_resolve_address_ares_impl(grpc_exec_ctx *exec_ctx, const char *name,
r->success = false;
r->error = GRPC_ERROR_NONE;
ares_channel *channel = grpc_ares_ev_driver_get_channel(r->ev_driver);
+
+ // If dns_server is specified, use it.
+ if (dns_server != NULL) {
+ gpr_log(GPR_INFO, "Using DNS server %s", dns_server);
+ grpc_resolved_address addr;
+ if (grpc_parse_ipv4_hostport(dns_server, &addr, false /* log_errors */)) {
+ r->dns_server_addr.family = AF_INET;
+ memcpy(&r->dns_server_addr.addr.addr4, addr.addr, addr.len);
+ r->dns_server_addr.tcp_port = grpc_sockaddr_get_port(&addr);
+ r->dns_server_addr.udp_port = grpc_sockaddr_get_port(&addr);
+ } else if (grpc_parse_ipv6_hostport(dns_server, &addr,
+ false /* log_errors */)) {
+ r->dns_server_addr.family = AF_INET6;
+ memcpy(&r->dns_server_addr.addr.addr6, addr.addr, addr.len);
+ r->dns_server_addr.tcp_port = grpc_sockaddr_get_port(&addr);
+ r->dns_server_addr.udp_port = grpc_sockaddr_get_port(&addr);
+ } else {
+ error = grpc_error_set_str(
+ GRPC_ERROR_CREATE_FROM_STATIC_STRING("cannot parse authority"),
+ GRPC_ERROR_STR_TARGET_ADDRESS, grpc_slice_from_copied_string(name));
+ goto error_cleanup;
+ }
+ int status = ares_set_servers_ports(*channel, &r->dns_server_addr);
+ if (status != ARES_SUCCESS) {
+ char *error_msg;
+ gpr_asprintf(&error_msg, "C-ares status is not ARES_SUCCESS: %s",
+ ares_strerror(status));
+ error = GRPC_ERROR_CREATE_FROM_COPIED_STRING(error_msg);
+ gpr_free(error_msg);
+ goto error_cleanup;
+ }
+ }
+ // An extra reference is put here to avoid destroying the request in
+ // on_done_cb before calling grpc_ares_ev_driver_start.
gpr_ref_init(&r->pending_queries, 2);
if (grpc_ipv6_loopback_available()) {
gpr_ref(&r->pending_queries);
@@ -254,10 +289,20 @@ void grpc_resolve_address_ares_impl(grpc_exec_ctx *exec_ctx, const char *name,
return;
error_cleanup:
+ grpc_closure_sched(exec_ctx, on_done, error);
gpr_free(host);
gpr_free(port);
}
+void grpc_resolve_address_ares_impl(grpc_exec_ctx *exec_ctx, const char *name,
+ const char *default_port,
+ grpc_pollset_set *interested_parties,
+ grpc_closure *on_done,
+ grpc_resolved_addresses **addrs) {
+ grpc_dns_lookup_ares(exec_ctx, NULL /* dns_server */, name, default_port,
+ interested_parties, on_done, addrs);
+}
+
void (*grpc_resolve_address_ares)(
grpc_exec_ctx *exec_ctx, const char *name, const char *default_port,
grpc_pollset_set *interested_parties, grpc_closure *on_done,
diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h
index 3dd40ea268..84fd7fcbd6 100644
--- a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h
+++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h
@@ -51,6 +51,12 @@ extern void (*grpc_resolve_address_ares)(grpc_exec_ctx *exec_ctx,
grpc_closure *on_done,
grpc_resolved_addresses **addresses);
+void grpc_dns_lookup_ares(grpc_exec_ctx *exec_ctx, const char *dns_server,
+ const char *addr, const char *default_port,
+ grpc_pollset_set *interested_parties,
+ grpc_closure *on_done,
+ grpc_resolved_addresses **addresses);
+
/* Initialize gRPC ares wrapper. Must be called at least once before
grpc_resolve_address_ares(). */
grpc_error *grpc_ares_init(void);
diff --git a/src/core/ext/filters/client_channel/subchannel.c b/src/core/ext/filters/client_channel/subchannel.c
index b2de85c4a1..dd14bf1d02 100644
--- a/src/core/ext/filters/client_channel/subchannel.c
+++ b/src/core/ext/filters/client_channel/subchannel.c
@@ -283,6 +283,7 @@ static void disconnect(grpc_exec_ctx *exec_ctx, grpc_subchannel *c) {
void grpc_subchannel_unref(grpc_exec_ctx *exec_ctx,
grpc_subchannel *c GRPC_SUBCHANNEL_REF_EXTRA_ARGS) {
gpr_atm old_refs;
+ // add a weak ref and subtract a strong ref (atomically)
old_refs = ref_mutate(c, (gpr_atm)1 - (gpr_atm)(1 << INTERNAL_REF_BITS),
1 REF_MUTATE_PURPOSE("STRONG_UNREF"));
if ((old_refs & STRONG_REF_MASK) == (1 << INTERNAL_REF_BITS)) {
@@ -656,7 +657,6 @@ static bool publish_transport_locked(grpc_exec_ctx *exec_ctx,
gpr_free(sw_subchannel);
grpc_channel_stack_destroy(exec_ctx, stk);
gpr_free(con);
- GRPC_SUBCHANNEL_WEAK_UNREF(exec_ctx, c, "connecting");
return false;
}
@@ -781,7 +781,7 @@ grpc_error *grpc_connected_subchannel_create_call(
(*call)->connection = GRPC_CONNECTED_SUBCHANNEL_REF(con, "subchannel_call");
const grpc_call_element_args call_args = {.call_stack = callstk,
.server_transport_data = NULL,
- .context = NULL,
+ .context = args->context,
.path = args->path,
.start_time = args->start_time,
.deadline = args->deadline,
diff --git a/src/core/ext/filters/client_channel/subchannel.h b/src/core/ext/filters/client_channel/subchannel.h
index 6473de49b0..e433c33e40 100644
--- a/src/core/ext/filters/client_channel/subchannel.h
+++ b/src/core/ext/filters/client_channel/subchannel.h
@@ -119,6 +119,7 @@ typedef struct {
gpr_timespec start_time;
gpr_timespec deadline;
gpr_arena *arena;
+ grpc_call_context_element *context;
} grpc_connected_subchannel_call_args;
grpc_error *grpc_connected_subchannel_create_call(
diff --git a/src/core/ext/filters/client_channel/subchannel_index.c b/src/core/ext/filters/client_channel/subchannel_index.c
index f6ef4a845e..b25dbfcf51 100644
--- a/src/core/ext/filters/client_channel/subchannel_index.c
+++ b/src/core/ext/filters/client_channel/subchannel_index.c
@@ -183,8 +183,11 @@ grpc_subchannel *grpc_subchannel_index_register(grpc_exec_ctx *exec_ctx,
enter_ctx(exec_ctx);
grpc_subchannel *c = NULL;
+ bool need_to_unref_constructed;
while (c == NULL) {
+ need_to_unref_constructed = false;
+
// Compare and swap loop:
// - take a reference to the current index
gpr_mu_lock(&g_mu);
@@ -194,8 +197,11 @@ grpc_subchannel *grpc_subchannel_index_register(grpc_exec_ctx *exec_ctx,
// - Check to see if a subchannel already exists
c = gpr_avl_get(index, key);
if (c != NULL) {
+ c = GRPC_SUBCHANNEL_REF_FROM_WEAK_REF(c, "index_register");
+ }
+ if (c != NULL) {
// yes -> we're done
- GRPC_SUBCHANNEL_WEAK_UNREF(exec_ctx, constructed, "index_register");
+ need_to_unref_constructed = true;
} else {
// no -> update the avl and compare/swap
gpr_avl updated =
@@ -219,6 +225,10 @@ grpc_subchannel *grpc_subchannel_index_register(grpc_exec_ctx *exec_ctx,
leave_ctx(exec_ctx);
+ if (need_to_unref_constructed) {
+ GRPC_SUBCHANNEL_UNREF(exec_ctx, constructed, "index_register");
+ }
+
return c;
}
diff --git a/src/core/ext/filters/http/http_filters_plugin.c b/src/core/ext/filters/http/http_filters_plugin.c
index 195a1a8119..856a7dbd91 100644
--- a/src/core/ext/filters/http/http_filters_plugin.c
+++ b/src/core/ext/filters/http/http_filters_plugin.c
@@ -37,6 +37,7 @@
#include "src/core/ext/filters/http/message_compress/message_compress_filter.h"
#include "src/core/ext/filters/http/server/http_server_filter.h"
#include "src/core/lib/channel/channel_stack_builder.h"
+#include "src/core/lib/surface/call.h"
#include "src/core/lib/surface/channel_init.h"
#include "src/core/lib/transport/transport_impl.h"
diff --git a/src/core/ext/filters/http/message_compress/message_compress_filter.c b/src/core/ext/filters/http/message_compress/message_compress_filter.c
index 1da8cf69cb..5a54a6ed15 100644
--- a/src/core/ext/filters/http/message_compress/message_compress_filter.c
+++ b/src/core/ext/filters/http/message_compress/message_compress_filter.c
@@ -47,6 +47,7 @@
#include "src/core/lib/slice/slice_internal.h"
#include "src/core/lib/slice/slice_string_helpers.h"
#include "src/core/lib/support/string.h"
+#include "src/core/lib/surface/call.h"
#include "src/core/lib/transport/static_metadata.h"
#define INITIAL_METADATA_UNSEEN 0
@@ -197,7 +198,7 @@ static void finish_send_message(grpc_exec_ctx *exec_ctx,
did_compress = grpc_msg_compress(exec_ctx, calld->compression_algorithm,
&calld->slices, &tmp);
if (did_compress) {
- if (grpc_compression_trace) {
+ if (GRPC_TRACER_ON(grpc_compression_trace)) {
char *algo_name;
const size_t before_size = calld->slices.length;
const size_t after_size = tmp.length;
@@ -211,7 +212,7 @@ static void finish_send_message(grpc_exec_ctx *exec_ctx,
grpc_slice_buffer_swap(&calld->slices, &tmp);
calld->send_flags |= GRPC_WRITE_INTERNAL_COMPRESS;
} else {
- if (grpc_compression_trace) {
+ if (GRPC_TRACER_ON(grpc_compression_trace)) {
char *algo_name;
GPR_ASSERT(grpc_compression_algorithm_name(calld->compression_algorithm,
&algo_name));
diff --git a/src/core/ext/filters/http/message_compress/message_compress_filter.h b/src/core/ext/filters/http/message_compress/message_compress_filter.h
index 75bfa17fba..135da4da62 100644
--- a/src/core/ext/filters/http/message_compress/message_compress_filter.h
+++ b/src/core/ext/filters/http/message_compress/message_compress_filter.h
@@ -38,8 +38,6 @@
#include "src/core/lib/channel/channel_stack.h"
-extern int grpc_compression_trace;
-
/** Compression filter for outgoing data.
*
* See <grpc/compression.h> for the available compression settings.
diff --git a/src/core/ext/filters/http/server/http_server_filter.c b/src/core/ext/filters/http/server/http_server_filter.c
index ff857878e4..9e495f4d42 100644
--- a/src/core/ext/filters/http/server/http_server_filter.c
+++ b/src/core/ext/filters/http/server/http_server_filter.c
@@ -46,8 +46,6 @@
#define EXPECTED_CONTENT_TYPE "application/grpc"
#define EXPECTED_CONTENT_TYPE_LENGTH sizeof(EXPECTED_CONTENT_TYPE) - 1
-extern int grpc_http_trace;
-
typedef struct call_data {
grpc_linked_mdelem status;
grpc_linked_mdelem content_type;
diff --git a/src/core/ext/filters/workarounds/workaround_cronet_compression_filter.c b/src/core/ext/filters/workarounds/workaround_cronet_compression_filter.c
new file mode 100644
index 0000000000..7fb75e3a4f
--- /dev/null
+++ b/src/core/ext/filters/workarounds/workaround_cronet_compression_filter.c
@@ -0,0 +1,223 @@
+//
+// Copyright 2017, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+#include "src/core/ext/filters/workarounds/workaround_cronet_compression_filter.h"
+
+#include <string.h>
+
+#include <grpc/support/alloc.h>
+
+#include "src/core/ext/filters/workarounds/workaround_utils.h"
+#include "src/core/lib/channel/channel_stack_builder.h"
+#include "src/core/lib/surface/channel_init.h"
+#include "src/core/lib/transport/metadata.h"
+
+typedef struct call_data {
+ // Receive closures are chained: we inject this closure as the
+ // recv_initial_metadata_ready up-call on transport_stream_op, and remember to
+ // call our next_recv_initial_metadata_ready member after handling it.
+ grpc_closure recv_initial_metadata_ready;
+ // Used by recv_initial_metadata_ready.
+ grpc_metadata_batch* recv_initial_metadata;
+ // Original recv_initial_metadata_ready callback, invoked after our own.
+ grpc_closure* next_recv_initial_metadata_ready;
+
+ // Marks whether the workaround is active
+ bool workaround_active;
+} call_data;
+
+// Find the user agent metadata element in the batch
+static bool get_user_agent_mdelem(const grpc_metadata_batch* batch,
+ grpc_mdelem* md) {
+ if (batch->idx.named.user_agent != NULL) {
+ *md = batch->idx.named.user_agent->md;
+ return true;
+ }
+ return false;
+}
+
+// Callback invoked when we receive an initial metadata.
+static void recv_initial_metadata_ready(grpc_exec_ctx* exec_ctx,
+ void* user_data, grpc_error* error) {
+ grpc_call_element* elem = user_data;
+ call_data* calld = elem->call_data;
+
+ if (GRPC_ERROR_NONE == error) {
+ grpc_mdelem md;
+ if (get_user_agent_mdelem(calld->recv_initial_metadata, &md)) {
+ grpc_workaround_user_agent_md* user_agent_md = grpc_parse_user_agent(md);
+ if (user_agent_md
+ ->workaround_active[GRPC_WORKAROUND_ID_CRONET_COMPRESSION]) {
+ calld->workaround_active = true;
+ }
+ }
+ }
+
+ // Invoke the next callback.
+ grpc_closure_run(exec_ctx, calld->next_recv_initial_metadata_ready,
+ GRPC_ERROR_REF(error));
+}
+
+// Start transport stream op.
+static void start_transport_stream_op_batch(
+ grpc_exec_ctx* exec_ctx, grpc_call_element* elem,
+ grpc_transport_stream_op_batch* op) {
+ call_data* calld = elem->call_data;
+
+ // Inject callback for receiving initial metadata
+ if (op->recv_initial_metadata) {
+ calld->next_recv_initial_metadata_ready =
+ op->payload->recv_initial_metadata.recv_initial_metadata_ready;
+ op->payload->recv_initial_metadata.recv_initial_metadata_ready =
+ &calld->recv_initial_metadata_ready;
+ calld->recv_initial_metadata =
+ op->payload->recv_initial_metadata.recv_initial_metadata;
+ }
+
+ if (op->send_message) {
+ /* Send message happens after client's user-agent (initial metadata) is
+ * received, so workaround_active must be set already */
+ if (calld->workaround_active) {
+ op->payload->send_message.send_message->flags |= GRPC_WRITE_NO_COMPRESS;
+ }
+ }
+
+ // Chain to the next filter.
+ grpc_call_next_op(exec_ctx, elem, op);
+}
+
+// Constructor for call_data.
+static grpc_error* init_call_elem(grpc_exec_ctx* exec_ctx,
+ grpc_call_element* elem,
+ const grpc_call_element_args* args) {
+ call_data* calld = elem->call_data;
+ calld->next_recv_initial_metadata_ready = NULL;
+ calld->workaround_active = false;
+ grpc_closure_init(&calld->recv_initial_metadata_ready,
+ recv_initial_metadata_ready, elem,
+ grpc_schedule_on_exec_ctx);
+ return GRPC_ERROR_NONE;
+}
+
+// Destructor for call_data.
+static void destroy_call_elem(grpc_exec_ctx* exec_ctx, grpc_call_element* elem,
+ const grpc_call_final_info* final_info,
+ grpc_closure* ignored) {}
+
+// Constructor for channel_data.
+static grpc_error* init_channel_elem(grpc_exec_ctx* exec_ctx,
+ grpc_channel_element* elem,
+ grpc_channel_element_args* args) {
+ return GRPC_ERROR_NONE;
+}
+
+// Destructor for channel_data.
+static void destroy_channel_elem(grpc_exec_ctx* exec_ctx,
+ grpc_channel_element* elem) {}
+
+// Parse the user agent
+static bool parse_user_agent(grpc_mdelem md) {
+ const char grpc_objc_specifier[] = "grpc-objc/";
+ const size_t grpc_objc_specifier_len = sizeof(grpc_objc_specifier) - 1;
+ const char cronet_specifier[] = "cronet_http";
+ const size_t cronet_specifier_len = sizeof(cronet_specifier) - 1;
+
+ char* user_agent_str = grpc_slice_to_c_string(GRPC_MDVALUE(md));
+ bool grpc_objc_specifier_seen = false;
+ bool cronet_specifier_seen = false;
+ char *major_version_str = user_agent_str, *minor_version_str;
+ long major_version, minor_version;
+
+ char* head = strtok(user_agent_str, " ");
+ while (head != NULL) {
+ if (!grpc_objc_specifier_seen &&
+ 0 == strncmp(head, grpc_objc_specifier, grpc_objc_specifier_len)) {
+ major_version_str = head + grpc_objc_specifier_len;
+ grpc_objc_specifier_seen = true;
+ } else if (grpc_objc_specifier_seen &&
+ 0 == strncmp(head, cronet_specifier, cronet_specifier_len)) {
+ cronet_specifier_seen = true;
+ break;
+ }
+
+ head = strtok(NULL, " ");
+ }
+ if (grpc_objc_specifier_seen) {
+ major_version_str = strtok(major_version_str, ".");
+ minor_version_str = strtok(NULL, ".");
+ major_version = atol(major_version_str);
+ minor_version = atol(minor_version_str);
+ }
+
+ gpr_free(user_agent_str);
+ return (grpc_objc_specifier_seen && cronet_specifier_seen &&
+ (major_version < 1 || (major_version == 1 && minor_version <= 3)));
+}
+
+const grpc_channel_filter grpc_workaround_cronet_compression_filter = {
+ start_transport_stream_op_batch,
+ grpc_channel_next_op,
+ sizeof(call_data),
+ init_call_elem,
+ grpc_call_stack_ignore_set_pollset_or_pollset_set,
+ destroy_call_elem,
+ 0,
+ init_channel_elem,
+ destroy_channel_elem,
+ grpc_call_next_get_peer,
+ grpc_channel_next_get_info,
+ "workaround_cronet_compression"};
+
+static bool register_workaround_cronet_compression(
+ grpc_exec_ctx* exec_ctx, grpc_channel_stack_builder* builder, void* arg) {
+ const grpc_channel_args* channel_args =
+ grpc_channel_stack_builder_get_channel_arguments(builder);
+ const grpc_arg* a = grpc_channel_args_find(
+ channel_args, GRPC_ARG_WORKAROUND_CRONET_COMPRESSION);
+ if (a == NULL) {
+ return true;
+ }
+ if (grpc_channel_arg_get_bool(a, false) == false) {
+ return true;
+ }
+ return grpc_channel_stack_builder_prepend_filter(
+ builder, &grpc_workaround_cronet_compression_filter, NULL, NULL);
+}
+
+void grpc_workaround_cronet_compression_filter_init(void) {
+ grpc_channel_init_register_stage(
+ GRPC_SERVER_CHANNEL, GRPC_WORKAROUND_PRIORITY_HIGH,
+ register_workaround_cronet_compression, NULL);
+ grpc_register_workaround(GRPC_WORKAROUND_ID_CRONET_COMPRESSION,
+ parse_user_agent);
+}
+
+void grpc_workaround_cronet_compression_filter_shutdown(void) {}
diff --git a/src/core/ext/filters/workarounds/workaround_cronet_compression_filter.h b/src/core/ext/filters/workarounds/workaround_cronet_compression_filter.h
new file mode 100644
index 0000000000..58c79a0c00
--- /dev/null
+++ b/src/core/ext/filters/workarounds/workaround_cronet_compression_filter.h
@@ -0,0 +1,40 @@
+//
+// Copyright 2017, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+#ifndef GRPC_CORE_EXT_FILTERS_WORKAROUNDS_WORKAROUND_CRONET_COMPRESSION_FILTER_H
+#define GRPC_CORE_EXT_FILTERS_WORKAROUNDS_WORKAROUND_CRONET_COMPRESSION_FILTER_H
+
+#include "src/core/lib/channel/channel_stack.h"
+
+extern const grpc_channel_filter grpc_workaround_cronet_compression_filter;
+
+#endif /* GRPC_CORE_EXT_FILTERS_WORKAROUNDS_WORKAROUND_CRONET_COMPRESSION_FILTER_H \
+ */
diff --git a/src/core/ext/filters/workarounds/workaround_utils.c b/src/core/ext/filters/workarounds/workaround_utils.c
new file mode 100644
index 0000000000..1c565388e1
--- /dev/null
+++ b/src/core/ext/filters/workarounds/workaround_utils.c
@@ -0,0 +1,65 @@
+//
+// Copyright 2017, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+#include "src/core/ext/filters/workarounds/workaround_utils.h"
+
+#include <grpc/support/alloc.h>
+#include <grpc/support/log.h>
+
+user_agent_parser ua_parser[GRPC_MAX_WORKAROUND_ID];
+
+static void destroy_user_agent_md(void *user_agent_md) {
+ gpr_free(user_agent_md);
+}
+
+grpc_workaround_user_agent_md *grpc_parse_user_agent(grpc_mdelem md) {
+ grpc_workaround_user_agent_md *user_agent_md =
+ (grpc_workaround_user_agent_md *)grpc_mdelem_get_user_data(
+ md, destroy_user_agent_md);
+
+ if (NULL != user_agent_md) {
+ return user_agent_md;
+ }
+ user_agent_md = gpr_malloc(sizeof(grpc_workaround_user_agent_md));
+ for (int i = 0; i < GRPC_MAX_WORKAROUND_ID; i++) {
+ if (ua_parser[i]) {
+ user_agent_md->workaround_active[i] = ua_parser[i](md);
+ }
+ }
+ grpc_mdelem_set_user_data(md, destroy_user_agent_md, (void *)user_agent_md);
+
+ return user_agent_md;
+}
+
+void grpc_register_workaround(uint32_t id, user_agent_parser parser) {
+ GPR_ASSERT(id < GRPC_MAX_WORKAROUND_ID);
+ ua_parser[id] = parser;
+}
diff --git a/src/core/ext/filters/workarounds/workaround_utils.h b/src/core/ext/filters/workarounds/workaround_utils.h
new file mode 100644
index 0000000000..7cd70c12d8
--- /dev/null
+++ b/src/core/ext/filters/workarounds/workaround_utils.h
@@ -0,0 +1,52 @@
+//
+// Copyright 2017, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+#ifndef GRPC_CORE_EXT_FILTERS_WORKAROUNDS_WORKAROUND_UTILS_H
+#define GRPC_CORE_EXT_FILTERS_WORKAROUNDS_WORKAROUND_UTILS_H
+
+#include <grpc/support/workaround_list.h>
+
+#include "src/core/lib/transport/metadata.h"
+
+#define GRPC_WORKAROUND_PRIORITY_HIGH 10001
+#define GRPC_WORKAROUND_PROIRITY_LOW 9999
+
+typedef struct grpc_workaround_user_agent_md {
+ bool workaround_active[GRPC_MAX_WORKAROUND_ID];
+} grpc_workaround_user_agent_md;
+
+grpc_workaround_user_agent_md *grpc_parse_user_agent(grpc_mdelem md);
+
+typedef bool (*user_agent_parser)(grpc_mdelem);
+
+void grpc_register_workaround(uint32_t id, user_agent_parser parser);
+
+#endif
diff --git a/src/core/ext/transport/chttp2/client/insecure/channel_create.c b/src/core/ext/transport/chttp2/client/insecure/channel_create.c
index 9c8505ddfa..ad674b8eb4 100644
--- a/src/core/ext/transport/chttp2/client/insecure/channel_create.c
+++ b/src/core/ext/transport/chttp2/client/insecure/channel_create.c
@@ -101,7 +101,7 @@ grpc_channel *grpc_insecure_channel_create(const char *target,
void *reserved) {
grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
GRPC_API_TRACE(
- "grpc_insecure_channel_create(target=%p, args=%p, reserved=%p)", 3,
+ "grpc_insecure_channel_create(target=%s, args=%p, reserved=%p)", 3,
(target, args, reserved));
GPR_ASSERT(reserved == NULL);
// Add channel arg containing the client channel factory.
diff --git a/src/core/ext/transport/chttp2/transport/chttp2_transport.c b/src/core/ext/transport/chttp2/transport/chttp2_transport.c
index d6b79bd492..f3268bcfca 100644
--- a/src/core/ext/transport/chttp2/transport/chttp2_transport.c
+++ b/src/core/ext/transport/chttp2/transport/chttp2_transport.c
@@ -89,8 +89,8 @@ static bool g_default_keepalive_permit_without_calls =
DEFAULT_KEEPALIVE_PERMIT_WITHOUT_CALLS;
#define MAX_CLIENT_STREAM_ID 0x7fffffffu
-int grpc_http_trace = 0;
-int grpc_flowctl_trace = 0;
+grpc_tracer_flag grpc_http_trace = GRPC_TRACER_INITIALIZER(false);
+grpc_tracer_flag grpc_flowctl_trace = GRPC_TRACER_INITIALIZER(false);
static const grpc_transport_vtable vtable;
@@ -884,14 +884,23 @@ static void write_action_begin_locked(grpc_exec_ctx *exec_ctx, void *gt,
GPR_TIMER_BEGIN("write_action_begin_locked", 0);
grpc_chttp2_transport *t = gt;
GPR_ASSERT(t->write_state != GRPC_CHTTP2_WRITE_STATE_IDLE);
- if (!t->closed && grpc_chttp2_begin_write(exec_ctx, t)) {
- set_write_state(exec_ctx, t, GRPC_CHTTP2_WRITE_STATE_WRITING,
- "begin writing");
- grpc_closure_sched(exec_ctx, &t->write_action, GRPC_ERROR_NONE);
- } else {
- set_write_state(exec_ctx, t, GRPC_CHTTP2_WRITE_STATE_IDLE,
- "begin writing nothing");
- GRPC_CHTTP2_UNREF_TRANSPORT(exec_ctx, t, "writing");
+ switch (t->closed ? GRPC_CHTTP2_NOTHING_TO_WRITE
+ : grpc_chttp2_begin_write(exec_ctx, t)) {
+ case GRPC_CHTTP2_NOTHING_TO_WRITE:
+ set_write_state(exec_ctx, t, GRPC_CHTTP2_WRITE_STATE_IDLE,
+ "begin writing nothing");
+ GRPC_CHTTP2_UNREF_TRANSPORT(exec_ctx, t, "writing");
+ break;
+ case GRPC_CHTTP2_PARTIAL_WRITE:
+ set_write_state(exec_ctx, t, GRPC_CHTTP2_WRITE_STATE_WRITING_WITH_MORE,
+ "begin writing partial");
+ grpc_closure_sched(exec_ctx, &t->write_action, GRPC_ERROR_NONE);
+ break;
+ case GRPC_CHTTP2_FULL_WRITE:
+ set_write_state(exec_ctx, t, GRPC_CHTTP2_WRITE_STATE_WRITING,
+ "begin writing");
+ grpc_closure_sched(exec_ctx, &t->write_action, GRPC_ERROR_NONE);
+ break;
}
GPR_TIMER_END("write_action_begin_locked", 0);
}
@@ -988,7 +997,7 @@ void grpc_chttp2_add_incoming_goaway(grpc_exec_ctx *exec_ctx,
t->seen_goaway = 1;
/* When a client receives a GOAWAY with error code ENHANCE_YOUR_CALM and debug
- * data equal to “too_many_pings”, it should log the occurrence at a log level
+ * data equal to "too_many_pings", it should log the occurrence at a log level
* that is enabled by default and double the configured KEEPALIVE_TIME used
* for new connections on that channel. */
if (t->is_client && goaway_error == GRPC_HTTP2_ENHANCE_YOUR_CALM &&
@@ -1095,7 +1104,7 @@ void grpc_chttp2_complete_closure_step(grpc_exec_ctx *exec_ctx,
return;
}
closure->next_data.scratch -= CLOSURE_BARRIER_FIRST_REF_BIT;
- if (grpc_http_trace) {
+ if (GRPC_TRACER_ON(grpc_http_trace)) {
const char *errstr = grpc_error_string(error);
gpr_log(GPR_DEBUG,
"complete_closure_step: %p refs=%d flags=0x%04x desc=%s err=%s",
@@ -1240,7 +1249,7 @@ static void perform_stream_op_locked(grpc_exec_ctx *exec_ctx, void *stream_op,
grpc_transport_stream_op_batch_payload *op_payload = op->payload;
grpc_chttp2_transport *t = s->t;
- if (grpc_http_trace) {
+ if (GRPC_TRACER_ON(grpc_http_trace)) {
char *str = grpc_transport_stream_op_batch_string(op);
gpr_log(GPR_DEBUG, "perform_stream_op_locked: %s; on_complete = %p", str,
op->on_complete);
@@ -1483,9 +1492,9 @@ static void perform_stream_op(grpc_exec_ctx *exec_ctx, grpc_transport *gt,
grpc_chttp2_transport *t = (grpc_chttp2_transport *)gt;
grpc_chttp2_stream *s = (grpc_chttp2_stream *)gs;
- if (grpc_http_trace) {
+ if (GRPC_TRACER_ON(grpc_http_trace)) {
char *str = grpc_transport_stream_op_batch_string(op);
- gpr_log(GPR_DEBUG, "perform_stream_op[s=%p/%d]: %s", s, s->id, str);
+ gpr_log(GPR_DEBUG, "perform_stream_op[s=%p]: %s", s, str);
gpr_free(str);
}
@@ -2130,27 +2139,41 @@ static void end_all_the_calls(grpc_exec_ctx *exec_ctx, grpc_chttp2_transport *t,
static void update_bdp(grpc_exec_ctx *exec_ctx, grpc_chttp2_transport *t,
double bdp_dbl) {
- uint32_t bdp;
- if (bdp_dbl <= 0) {
- bdp = 0;
- } else if (bdp_dbl > UINT32_MAX) {
- bdp = UINT32_MAX;
- } else {
- bdp = (uint32_t)(bdp_dbl);
- }
+ // initial window size bounded [1,2^31-1], but we set the min to 128.
+ int32_t bdp = GPR_CLAMP((int32_t)bdp_dbl, 128, INT32_MAX);
int64_t delta =
(int64_t)bdp -
(int64_t)t->settings[GRPC_LOCAL_SETTINGS]
[GRPC_CHTTP2_SETTINGS_INITIAL_WINDOW_SIZE];
- if (delta == 0 || (bdp != 0 && delta > -1024 && delta < 1024)) {
+ if (delta == 0 || (delta > -bdp / 10 && delta < bdp / 10)) {
return;
}
- if (grpc_bdp_estimator_trace) {
+ if (GRPC_TRACER_ON(grpc_bdp_estimator_trace)) {
gpr_log(GPR_DEBUG, "%s: update initial window size to %d", t->peer_string,
(int)bdp);
}
- push_setting(exec_ctx, t, GRPC_CHTTP2_SETTINGS_INITIAL_WINDOW_SIZE, bdp);
- push_setting(exec_ctx, t, GRPC_CHTTP2_SETTINGS_MAX_FRAME_SIZE, bdp);
+ push_setting(exec_ctx, t, GRPC_CHTTP2_SETTINGS_INITIAL_WINDOW_SIZE,
+ (uint32_t)bdp);
+}
+
+static void update_frame(grpc_exec_ctx *exec_ctx, grpc_chttp2_transport *t,
+ double bw_dbl, double bdp_dbl) {
+ int32_t bdp = GPR_CLAMP((int32_t)bdp_dbl, 128, INT32_MAX);
+ int32_t target = GPR_MAX((int32_t)bw_dbl / 1000, bdp);
+ // frame size is bounded [2^14,2^24-1]
+ int32_t frame_size = GPR_CLAMP(target, 16384, 16777215);
+ int64_t delta = (int64_t)frame_size -
+ (int64_t)t->settings[GRPC_LOCAL_SETTINGS]
+ [GRPC_CHTTP2_SETTINGS_MAX_FRAME_SIZE];
+ if (delta == 0 || (delta > -frame_size / 10 && delta < frame_size / 10)) {
+ return;
+ }
+ if (GRPC_TRACER_ON(grpc_bdp_estimator_trace)) {
+ gpr_log(GPR_DEBUG, "%s: update max_frame size to %d", t->peer_string,
+ (int)frame_size);
+ }
+ push_setting(exec_ctx, t, GRPC_CHTTP2_SETTINGS_MAX_FRAME_SIZE,
+ (uint32_t)frame_size);
}
static grpc_error *try_http_parsing(grpc_exec_ctx *exec_ctx,
@@ -2269,6 +2292,7 @@ static void read_action_locked(grpc_exec_ctx *exec_ctx, void *tp,
}
int64_t estimate = -1;
+ double bdp_guess = -1;
if (grpc_bdp_estimator_get_estimate(&t->bdp_estimator, &estimate)) {
double target = 1 + log2((double)estimate);
double memory_pressure = grpc_resource_quota_get_memory_pressure(
@@ -2286,9 +2310,15 @@ static void read_action_locked(grpc_exec_ctx *exec_ctx, void *tp,
}
double log2_bdp_guess =
grpc_pid_controller_update(&t->pid_controller, bdp_error, dt);
- update_bdp(exec_ctx, t, pow(2, log2_bdp_guess));
+ bdp_guess = pow(2, log2_bdp_guess);
+ update_bdp(exec_ctx, t, bdp_guess);
t->last_pid_update = now;
}
+
+ double bw = -1;
+ if (grpc_bdp_estimator_get_bw(&t->bdp_estimator, &bw)) {
+ update_frame(exec_ctx, t, bw, bdp_guess);
+ }
}
GRPC_CHTTP2_UNREF_TRANSPORT(exec_ctx, t, "keep_reading");
} else {
@@ -2305,7 +2335,7 @@ static void read_action_locked(grpc_exec_ctx *exec_ctx, void *tp,
static void start_bdp_ping_locked(grpc_exec_ctx *exec_ctx, void *tp,
grpc_error *error) {
grpc_chttp2_transport *t = tp;
- if (grpc_http_trace) {
+ if (GRPC_TRACER_ON(grpc_http_trace)) {
gpr_log(GPR_DEBUG, "%s: Start BDP ping", t->peer_string);
}
/* Reset the keepalive ping timer */
@@ -2318,7 +2348,7 @@ static void start_bdp_ping_locked(grpc_exec_ctx *exec_ctx, void *tp,
static void finish_bdp_ping_locked(grpc_exec_ctx *exec_ctx, void *tp,
grpc_error *error) {
grpc_chttp2_transport *t = tp;
- if (grpc_http_trace) {
+ if (GRPC_TRACER_ON(grpc_http_trace)) {
gpr_log(GPR_DEBUG, "%s: Complete BDP ping", t->peer_string);
}
grpc_bdp_estimator_complete_ping(&t->bdp_estimator);
@@ -2779,7 +2809,7 @@ static void benign_reclaimer_locked(grpc_exec_ctx *exec_ctx, void *arg,
grpc_chttp2_stream_map_size(&t->stream_map) == 0) {
/* Channel with no active streams: send a goaway to try and make it
* disconnect cleanly */
- if (grpc_resource_quota_trace) {
+ if (GRPC_TRACER_ON(grpc_resource_quota_trace)) {
gpr_log(GPR_DEBUG, "HTTP2: %s - send goaway to free memory",
t->peer_string);
}
@@ -2787,7 +2817,8 @@ static void benign_reclaimer_locked(grpc_exec_ctx *exec_ctx, void *arg,
grpc_error_set_int(
GRPC_ERROR_CREATE_FROM_STATIC_STRING("Buffers full"),
GRPC_ERROR_INT_HTTP2_ERROR, GRPC_HTTP2_ENHANCE_YOUR_CALM));
- } else if (error == GRPC_ERROR_NONE && grpc_resource_quota_trace) {
+ } else if (error == GRPC_ERROR_NONE &&
+ GRPC_TRACER_ON(grpc_resource_quota_trace)) {
gpr_log(GPR_DEBUG,
"HTTP2: %s - skip benign reclamation, there are still %" PRIdPTR
" streams",
@@ -2808,7 +2839,7 @@ static void destructive_reclaimer_locked(grpc_exec_ctx *exec_ctx, void *arg,
t->destructive_reclaimer_registered = false;
if (error == GRPC_ERROR_NONE && n > 0) {
grpc_chttp2_stream *s = grpc_chttp2_stream_map_rand(&t->stream_map);
- if (grpc_resource_quota_trace) {
+ if (GRPC_TRACER_ON(grpc_resource_quota_trace)) {
gpr_log(GPR_DEBUG, "HTTP2: %s - abandon stream id %d", t->peer_string,
s->id);
}
diff --git a/src/core/ext/transport/chttp2/transport/chttp2_transport.h b/src/core/ext/transport/chttp2/transport/chttp2_transport.h
index c372174f2d..83b17d1936 100644
--- a/src/core/ext/transport/chttp2/transport/chttp2_transport.h
+++ b/src/core/ext/transport/chttp2/transport/chttp2_transport.h
@@ -34,11 +34,12 @@
#ifndef GRPC_CORE_EXT_TRANSPORT_CHTTP2_TRANSPORT_CHTTP2_TRANSPORT_H
#define GRPC_CORE_EXT_TRANSPORT_CHTTP2_TRANSPORT_CHTTP2_TRANSPORT_H
+#include "src/core/lib/debug/trace.h"
#include "src/core/lib/iomgr/endpoint.h"
#include "src/core/lib/transport/transport.h"
-extern int grpc_http_trace;
-extern int grpc_flowctl_trace;
+extern grpc_tracer_flag grpc_http_trace;
+extern grpc_tracer_flag grpc_flowctl_trace;
grpc_transport *grpc_create_chttp2_transport(
grpc_exec_ctx *exec_ctx, const grpc_channel_args *channel_args,
diff --git a/src/core/ext/transport/chttp2/transport/frame_settings.c b/src/core/ext/transport/chttp2/transport/frame_settings.c
index e3cd70d3f3..dbaafb5929 100644
--- a/src/core/ext/transport/chttp2/transport/frame_settings.c
+++ b/src/core/ext/transport/chttp2/transport/frame_settings.c
@@ -218,18 +218,18 @@ grpc_error *grpc_chttp2_settings_parser_parse(grpc_exec_ctx *exec_ctx, void *p,
parser->incoming_settings[id] != parser->value) {
t->initial_window_update +=
(int64_t)parser->value - parser->incoming_settings[id];
- if (grpc_http_trace) {
+ if (GRPC_TRACER_ON(grpc_http_trace)) {
gpr_log(GPR_DEBUG, "adding %d for initial_window change",
(int)t->initial_window_update);
}
}
parser->incoming_settings[id] = parser->value;
- if (grpc_http_trace) {
+ if (GRPC_TRACER_ON(grpc_http_trace)) {
gpr_log(GPR_DEBUG, "CHTTP2:%s:%s: got setting %s = %d",
t->is_client ? "CLI" : "SVR", t->peer_string, sp->name,
parser->value);
}
- } else if (grpc_http_trace) {
+ } else if (GRPC_TRACER_ON(grpc_http_trace)) {
gpr_log(GPR_ERROR, "CHTTP2: Ignoring unknown setting %d (value %d)",
parser->id, parser->value);
}
diff --git a/src/core/ext/transport/chttp2/transport/hpack_encoder.c b/src/core/ext/transport/chttp2/transport/hpack_encoder.c
index 8fdd4ee77c..126e012aac 100644
--- a/src/core/ext/transport/chttp2/transport/hpack_encoder.c
+++ b/src/core/ext/transport/chttp2/transport/hpack_encoder.c
@@ -69,7 +69,7 @@ static grpc_slice_refcount terminal_slice_refcount = {NULL, NULL};
static const grpc_slice terminal_slice = {&terminal_slice_refcount,
.data.refcounted = {0, 0}};
-extern int grpc_http_trace;
+extern grpc_tracer_flag grpc_http_trace;
typedef struct {
int is_first_frame;
@@ -425,7 +425,7 @@ static void hpack_enc(grpc_exec_ctx *exec_ctx, grpc_chttp2_hpack_compressor *c,
"Reserved header (colon-prefixed) happening after regular ones.");
}
- if (grpc_http_trace && !GRPC_MDELEM_IS_INTERNED(elem)) {
+ if (GRPC_TRACER_ON(grpc_http_trace) && !GRPC_MDELEM_IS_INTERNED(elem)) {
char *k = grpc_slice_to_c_string(GRPC_MDKEY(elem));
char *v = grpc_slice_to_c_string(GRPC_MDVALUE(elem));
gpr_log(
@@ -616,7 +616,7 @@ void grpc_chttp2_hpack_compressor_set_max_table_size(
}
}
c->advertise_table_size_change = 1;
- if (grpc_http_trace) {
+ if (GRPC_TRACER_ON(grpc_http_trace)) {
gpr_log(GPR_DEBUG, "set max table size from encoder to %d", max_table_size);
}
}
diff --git a/src/core/ext/transport/chttp2/transport/hpack_parser.c b/src/core/ext/transport/chttp2/transport/hpack_parser.c
index 1846a85fc6..bb98bc4a79 100644
--- a/src/core/ext/transport/chttp2/transport/hpack_parser.c
+++ b/src/core/ext/transport/chttp2/transport/hpack_parser.c
@@ -50,8 +50,6 @@
#include "src/core/lib/support/string.h"
#include "src/core/lib/transport/http2_errors.h"
-extern int grpc_http_trace;
-
typedef enum {
NOT_BINARY,
BINARY_BEGIN,
@@ -666,7 +664,7 @@ static const uint8_t inverse_base64[256] = {
/* emission helpers */
static grpc_error *on_hdr(grpc_exec_ctx *exec_ctx, grpc_chttp2_hpack_parser *p,
grpc_mdelem md, int add_to_table) {
- if (grpc_http_trace && !GRPC_MDELEM_IS_INTERNED(md)) {
+ if (GRPC_TRACER_ON(grpc_http_trace) && !GRPC_MDELEM_IS_INTERNED(md)) {
char *k = grpc_slice_to_c_string(GRPC_MDKEY(md));
char *v = grpc_slice_to_c_string(GRPC_MDVALUE(md));
gpr_log(
@@ -1052,7 +1050,7 @@ static grpc_error *parse_lithdr_nvridx_v(grpc_exec_ctx *exec_ctx,
static grpc_error *finish_max_tbl_size(grpc_exec_ctx *exec_ctx,
grpc_chttp2_hpack_parser *p,
const uint8_t *cur, const uint8_t *end) {
- if (grpc_http_trace) {
+ if (GRPC_TRACER_ON(grpc_http_trace)) {
gpr_log(GPR_INFO, "MAX TABLE SIZE: %d", p->index);
}
grpc_error *err =
diff --git a/src/core/ext/transport/chttp2/transport/hpack_table.c b/src/core/ext/transport/chttp2/transport/hpack_table.c
index 9dd41fdbe1..7aaff55339 100644
--- a/src/core/ext/transport/chttp2/transport/hpack_table.c
+++ b/src/core/ext/transport/chttp2/transport/hpack_table.c
@@ -40,9 +40,10 @@
#include <grpc/support/log.h>
#include <grpc/support/string_util.h>
+#include "src/core/lib/debug/trace.h"
#include "src/core/lib/support/murmur_hash.h"
-extern int grpc_http_trace;
+extern grpc_tracer_flag grpc_http_trace;
static struct {
const char *key;
@@ -260,7 +261,7 @@ void grpc_chttp2_hptbl_set_max_bytes(grpc_exec_ctx *exec_ctx,
if (tbl->max_bytes == max_bytes) {
return;
}
- if (grpc_http_trace) {
+ if (GRPC_TRACER_ON(grpc_http_trace)) {
gpr_log(GPR_DEBUG, "Update hpack parser max size to %d", max_bytes);
}
while (tbl->mem_used > max_bytes) {
@@ -284,7 +285,7 @@ grpc_error *grpc_chttp2_hptbl_set_current_table_size(grpc_exec_ctx *exec_ctx,
gpr_free(msg);
return err;
}
- if (grpc_http_trace) {
+ if (GRPC_TRACER_ON(grpc_http_trace)) {
gpr_log(GPR_DEBUG, "Update hpack parser table size to %d", bytes);
}
while (tbl->mem_used > bytes) {
diff --git a/src/core/ext/transport/chttp2/transport/internal.h b/src/core/ext/transport/chttp2/transport/internal.h
index 0aaa4aebe5..8d66e396ee 100644
--- a/src/core/ext/transport/chttp2/transport/internal.h
+++ b/src/core/ext/transport/chttp2/transport/internal.h
@@ -552,9 +552,14 @@ void grpc_chttp2_initiate_write(grpc_exec_ctx *exec_ctx,
grpc_chttp2_transport *t,
bool covered_by_poller, const char *reason);
-/** Someone is unlocking the transport mutex: check to see if writes
- are required, and frame them if so */
-bool grpc_chttp2_begin_write(grpc_exec_ctx *exec_ctx, grpc_chttp2_transport *t);
+typedef enum {
+ GRPC_CHTTP2_NOTHING_TO_WRITE,
+ GRPC_CHTTP2_PARTIAL_WRITE,
+ GRPC_CHTTP2_FULL_WRITE,
+} grpc_chttp2_begin_write_result;
+
+grpc_chttp2_begin_write_result grpc_chttp2_begin_write(
+ grpc_exec_ctx *exec_ctx, grpc_chttp2_transport *t);
void grpc_chttp2_end_write(grpc_exec_ctx *exec_ctx, grpc_chttp2_transport *t,
grpc_error *error);
@@ -629,13 +634,13 @@ void grpc_chttp2_complete_closure_step(grpc_exec_ctx *exec_ctx,
#define GRPC_CHTTP2_CLIENT_CONNECT_STRLEN \
(sizeof(GRPC_CHTTP2_CLIENT_CONNECT_STRING) - 1)
-extern int grpc_http_trace;
-extern int grpc_flowctl_trace;
+extern grpc_tracer_flag grpc_http_trace;
+extern grpc_tracer_flag grpc_flowctl_trace;
-#define GRPC_CHTTP2_IF_TRACING(stmt) \
- if (!(grpc_http_trace)) \
- ; \
- else \
+#define GRPC_CHTTP2_IF_TRACING(stmt) \
+ if (!(GRPC_TRACER_ON(grpc_http_trace))) \
+ ; \
+ else \
stmt
typedef enum {
@@ -648,7 +653,7 @@ typedef enum {
dst_var, src_context, src_var) \
do { \
assert(id1 == id2); \
- if (grpc_flowctl_trace) { \
+ if (GRPC_TRACER_ON(grpc_flowctl_trace)) { \
grpc_chttp2_flowctl_trace( \
__FILE__, __LINE__, phase, GRPC_CHTTP2_FLOWCTL_MOVE, #dst_context, \
#dst_var, #src_context, #src_var, transport->is_client, id1, \
@@ -671,7 +676,7 @@ typedef enum {
#define GRPC_CHTTP2_FLOW_CREDIT_COMMON(phase, transport, id, dst_context, \
dst_var, amount) \
do { \
- if (grpc_flowctl_trace) { \
+ if (GRPC_TRACER_ON(grpc_flowctl_trace)) { \
grpc_chttp2_flowctl_trace(__FILE__, __LINE__, phase, \
GRPC_CHTTP2_FLOWCTL_CREDIT, #dst_context, \
#dst_var, NULL, #amount, transport->is_client, \
@@ -729,7 +734,7 @@ typedef enum {
#define GRPC_CHTTP2_FLOW_DEBIT_COMMON(phase, transport, id, dst_context, \
dst_var, amount) \
do { \
- if (grpc_flowctl_trace) { \
+ if (GRPC_TRACER_ON(grpc_flowctl_trace)) { \
grpc_chttp2_flowctl_trace(__FILE__, __LINE__, phase, \
GRPC_CHTTP2_FLOWCTL_DEBIT, #dst_context, \
#dst_var, NULL, #amount, transport->is_client, \
@@ -815,7 +820,7 @@ void grpc_chttp2_ack_ping(grpc_exec_ctx *exec_ctx, grpc_chttp2_transport *t,
/** Add a new ping strike to ping_recv_state.ping_strikes. If
ping_recv_state.ping_strikes > ping_policy.max_ping_strikes, it sends GOAWAY
with error code ENHANCE_YOUR_CALM and additional debug data resembling
- “too_many_pings” followed by immediately closing the connection. */
+ "too_many_pings" followed by immediately closing the connection. */
void grpc_chttp2_add_ping_strike(grpc_exec_ctx *exec_ctx,
grpc_chttp2_transport *t);
diff --git a/src/core/ext/transport/chttp2/transport/parsing.c b/src/core/ext/transport/chttp2/transport/parsing.c
index 638b137316..1a676e2259 100644
--- a/src/core/ext/transport/chttp2/transport/parsing.c
+++ b/src/core/ext/transport/chttp2/transport/parsing.c
@@ -324,7 +324,7 @@ static grpc_error *init_frame_parser(grpc_exec_ctx *exec_ctx,
case GRPC_CHTTP2_FRAME_GOAWAY:
return init_goaway_parser(exec_ctx, t);
default:
- if (grpc_http_trace) {
+ if (GRPC_TRACER_ON(grpc_http_trace)) {
gpr_log(GPR_ERROR, "Unknown frame type %02x", t->incoming_frame_type);
}
return init_skip_frame_parser(exec_ctx, t, 0);
@@ -418,11 +418,9 @@ static grpc_error *update_incoming_window(grpc_exec_ctx *exec_ctx,
GRPC_CHTTP2_FLOW_DEBIT_STREAM_INCOMING_WINDOW_DELTA("parse", t, s,
incoming_frame_size);
- if ((int64_t)t->settings[GRPC_SENT_SETTINGS]
- [GRPC_CHTTP2_SETTINGS_INITIAL_WINDOW_SIZE] +
- (int64_t)s->incoming_window_delta - (int64_t)s->announce_window <=
- (int64_t)t->settings[GRPC_SENT_SETTINGS]
- [GRPC_CHTTP2_SETTINGS_INITIAL_WINDOW_SIZE] /
+ if ((int64_t)s->incoming_window_delta - (int64_t)s->announce_window <=
+ -(int64_t)t->settings[GRPC_SENT_SETTINGS]
+ [GRPC_CHTTP2_SETTINGS_INITIAL_WINDOW_SIZE] /
2) {
grpc_chttp2_become_writable(exec_ctx, t, s,
GRPC_CHTTP2_STREAM_WRITE_INITIATE_UNCOVERED,
@@ -494,7 +492,7 @@ static void on_initial_header(grpc_exec_ctx *exec_ctx, void *tp,
GPR_ASSERT(s != NULL);
- if (grpc_http_trace) {
+ if (GRPC_TRACER_ON(grpc_http_trace)) {
char *key = grpc_slice_to_c_string(GRPC_MDKEY(md));
char *value =
grpc_dump_slice(GRPC_MDVALUE(md), GPR_DUMP_HEX | GPR_DUMP_ASCII);
@@ -574,7 +572,7 @@ static void on_trailing_header(grpc_exec_ctx *exec_ctx, void *tp,
GPR_ASSERT(s != NULL);
- if (grpc_http_trace) {
+ if (GRPC_TRACER_ON(grpc_http_trace)) {
char *key = grpc_slice_to_c_string(GRPC_MDKEY(md));
char *value =
grpc_dump_slice(GRPC_MDVALUE(md), GPR_DUMP_HEX | GPR_DUMP_ASCII);
@@ -807,7 +805,7 @@ static grpc_error *parse_frame_slice(grpc_exec_ctx *exec_ctx,
if (err == GRPC_ERROR_NONE) {
return err;
} else if (grpc_error_get_int(err, GRPC_ERROR_INT_STREAM_ID, NULL)) {
- if (grpc_http_trace) {
+ if (GRPC_TRACER_ON(grpc_http_trace)) {
const char *msg = grpc_error_string(err);
gpr_log(GPR_ERROR, "%s", msg);
}
diff --git a/src/core/ext/transport/chttp2/transport/writing.c b/src/core/ext/transport/chttp2/transport/writing.c
index 069780ae5a..5be1092946 100644
--- a/src/core/ext/transport/chttp2/transport/writing.c
+++ b/src/core/ext/transport/chttp2/transport/writing.c
@@ -74,7 +74,8 @@ static void maybe_initiate_ping(grpc_exec_ctx *exec_ctx,
}
if (!grpc_closure_list_empty(pq->lists[GRPC_CHTTP2_PCL_INFLIGHT])) {
/* ping already in-flight: wait */
- if (grpc_http_trace || grpc_bdp_estimator_trace) {
+ if (GRPC_TRACER_ON(grpc_http_trace) ||
+ GRPC_TRACER_ON(grpc_bdp_estimator_trace)) {
gpr_log(GPR_DEBUG, "Ping delayed [%p]: already pinging", t->peer_string);
}
return;
@@ -82,7 +83,8 @@ static void maybe_initiate_ping(grpc_exec_ctx *exec_ctx,
if (t->ping_state.pings_before_data_required == 0 &&
t->ping_policy.max_pings_without_data != 0) {
/* need to send something of substance before sending a ping again */
- if (grpc_http_trace || grpc_bdp_estimator_trace) {
+ if (GRPC_TRACER_ON(grpc_http_trace) ||
+ GRPC_TRACER_ON(grpc_bdp_estimator_trace)) {
gpr_log(GPR_DEBUG, "Ping delayed [%p]: too many recent pings: %d/%d",
t->peer_string, t->ping_state.pings_before_data_required,
t->ping_policy.max_pings_without_data);
@@ -96,7 +98,8 @@ static void maybe_initiate_ping(grpc_exec_ctx *exec_ctx,
(int)t->ping_policy.min_time_between_pings.tv_nsec);*/
if (gpr_time_cmp(elapsed, t->ping_policy.min_time_between_pings) < 0) {
/* not enough elapsed time between successive pings */
- if (grpc_http_trace || grpc_bdp_estimator_trace) {
+ if (GRPC_TRACER_ON(grpc_http_trace) ||
+ GRPC_TRACER_ON(grpc_bdp_estimator_trace)) {
gpr_log(GPR_DEBUG,
"Ping delayed [%p]: not enough time elapsed since last ping",
t->peer_string);
@@ -160,19 +163,22 @@ static bool stream_ref_if_not_destroyed(gpr_refcount *r) {
return true;
}
+/* How many bytes of incoming flow control would we like to advertise */
uint32_t grpc_chttp2_target_incoming_window(grpc_chttp2_transport *t) {
- return (uint32_t)GPR_MAX(
+ return (uint32_t)GPR_MIN(
(int64_t)((1u << 31) - 1),
t->stream_total_over_incoming_window +
- (int64_t)GPR_MAX(
- t->settings[GRPC_SENT_SETTINGS]
- [GRPC_CHTTP2_SETTINGS_INITIAL_WINDOW_SIZE] -
- t->stream_total_under_incoming_window,
- 0));
+ t->settings[GRPC_SENT_SETTINGS]
+ [GRPC_CHTTP2_SETTINGS_INITIAL_WINDOW_SIZE]);
}
-bool grpc_chttp2_begin_write(grpc_exec_ctx *exec_ctx,
- grpc_chttp2_transport *t) {
+/* How many bytes would we like to put on the wire during a single syscall */
+static uint32_t target_write_size(grpc_chttp2_transport *t) {
+ return 1024 * 1024;
+}
+
+grpc_chttp2_begin_write_result grpc_chttp2_begin_write(
+ grpc_exec_ctx *exec_ctx, grpc_chttp2_transport *t) {
grpc_chttp2_stream *s;
GPR_TIMER_BEGIN("grpc_chttp2_begin_write", 0);
@@ -206,9 +212,20 @@ bool grpc_chttp2_begin_write(grpc_exec_ctx *exec_ctx,
}
}
+ bool partial_write = false;
+
/* for each grpc_chttp2_stream that's become writable, frame it's data
(according to available window sizes) and add to the output buffer */
- while (grpc_chttp2_list_pop_writable_stream(t, &s)) {
+ while (true) {
+ if (t->outbuf.length > target_write_size(t)) {
+ partial_write = true;
+ break;
+ }
+
+ if (!grpc_chttp2_list_pop_writable_stream(t, &s)) {
+ break;
+ }
+
bool sent_initial_metadata = s->sent_initial_metadata;
bool now_writing = false;
@@ -395,7 +412,9 @@ bool grpc_chttp2_begin_write(grpc_exec_ctx *exec_ctx,
GPR_TIMER_END("grpc_chttp2_begin_write", 0);
- return t->outbuf.count > 0;
+ return t->outbuf.count > 0 ? (partial_write ? GRPC_CHTTP2_PARTIAL_WRITE
+ : GRPC_CHTTP2_FULL_WRITE)
+ : GRPC_CHTTP2_NOTHING_TO_WRITE;
}
void grpc_chttp2_end_write(grpc_exec_ctx *exec_ctx, grpc_chttp2_transport *t,
diff --git a/src/core/ext/transport/cronet/transport/cronet_transport.c b/src/core/ext/transport/cronet/transport/cronet_transport.c
index d4e89d6a6c..67974b0b6a 100644
--- a/src/core/ext/transport/cronet/transport/cronet_transport.c
+++ b/src/core/ext/transport/cronet/transport/cronet_transport.c
@@ -886,6 +886,10 @@ static bool op_can_be_run(grpc_transport_stream_op_batch *curr_op,
!stream_state->state_op_done[OP_RECV_MESSAGE]) {
CRONET_LOG(GPR_DEBUG, "Because");
result = false;
+ } else if (curr_op->cancel_stream &&
+ !stream_state->state_callback_received[OP_CANCELED]) {
+ CRONET_LOG(GPR_DEBUG, "Because");
+ result = false;
} else if (curr_op->recv_trailing_metadata) {
/* We aren't done with trailing metadata yet */
if (!stream_state->state_op_done[OP_RECV_TRAILING_METADATA]) {
diff --git a/src/core/lib/channel/channel_args.c b/src/core/lib/channel/channel_args.c
index 238d176dfa..247b134938 100644
--- a/src/core/lib/channel/channel_args.c
+++ b/src/core/lib/channel/channel_args.c
@@ -31,6 +31,8 @@
*
*/
+#include <grpc/support/port_platform.h>
+
#include <limits.h>
#include <string.h>
diff --git a/src/core/lib/channel/channel_stack.c b/src/core/lib/channel/channel_stack.c
index 94382980eb..7db54d1107 100644
--- a/src/core/lib/channel/channel_stack.c
+++ b/src/core/lib/channel/channel_stack.c
@@ -38,7 +38,7 @@
#include <stdlib.h>
#include <string.h>
-int grpc_trace_channel = 0;
+grpc_tracer_flag grpc_trace_channel = GRPC_TRACER_INITIALIZER(false);
/* Memory layouts.
diff --git a/src/core/lib/channel/channel_stack.h b/src/core/lib/channel/channel_stack.h
index fdbcbdb018..c26d61b2ef 100644
--- a/src/core/lib/channel/channel_stack.h
+++ b/src/core/lib/channel/channel_stack.h
@@ -307,10 +307,10 @@ void grpc_call_element_signal_error(grpc_exec_ctx *exec_ctx,
grpc_call_element *cur_elem,
grpc_error *error);
-extern int grpc_trace_channel;
+extern grpc_tracer_flag grpc_trace_channel;
#define GRPC_CALL_LOG_OP(sev, elem, op) \
- if (grpc_trace_channel) grpc_call_log_op(sev, elem, op)
+ if (GRPC_TRACER_ON(grpc_trace_channel)) grpc_call_log_op(sev, elem, op)
#ifdef __cplusplus
}
diff --git a/src/core/lib/channel/channel_stack_builder.c b/src/core/lib/channel/channel_stack_builder.c
index 88c02edb70..44b030e4d1 100644
--- a/src/core/lib/channel/channel_stack_builder.c
+++ b/src/core/lib/channel/channel_stack_builder.c
@@ -38,7 +38,8 @@
#include <grpc/support/alloc.h>
#include <grpc/support/string_util.h>
-int grpc_trace_channel_stack_builder = 0;
+grpc_tracer_flag grpc_trace_channel_stack_builder =
+ GRPC_TRACER_INITIALIZER(false);
typedef struct filter_node {
struct filter_node *next;
diff --git a/src/core/lib/channel/channel_stack_builder.h b/src/core/lib/channel/channel_stack_builder.h
index c78111b00d..8cb36eb117 100644
--- a/src/core/lib/channel/channel_stack_builder.h
+++ b/src/core/lib/channel/channel_stack_builder.h
@@ -165,7 +165,7 @@ grpc_error *grpc_channel_stack_builder_finish(
void grpc_channel_stack_builder_destroy(grpc_exec_ctx *exec_ctx,
grpc_channel_stack_builder *builder);
-extern int grpc_trace_channel_stack_builder;
+extern grpc_tracer_flag grpc_trace_channel_stack_builder;
#ifdef __cplusplus
}
diff --git a/src/core/lib/channel/context.h b/src/core/lib/channel/context.h
index 6c931ad28a..f0a21113c5 100644
--- a/src/core/lib/channel/context.h
+++ b/src/core/lib/channel/context.h
@@ -50,6 +50,9 @@ typedef enum {
/// Reserved for traffic_class_context.
GRPC_CONTEXT_TRAFFIC,
+ /// Value is a \a grpc_grpclb_client_stats.
+ GRPC_GRPCLB_CLIENT_STATS,
+
GRPC_CONTEXT_COUNT
} grpc_context_index;
diff --git a/src/core/lib/debug/trace.c b/src/core/lib/debug/trace.c
index c56046785b..4dfea44c57 100644
--- a/src/core/lib/debug/trace.c
+++ b/src/core/lib/debug/trace.c
@@ -35,24 +35,31 @@
#include <string.h>
-#include <grpc/grpc.h>
#include <grpc/support/alloc.h>
#include <grpc/support/log.h>
#include "src/core/lib/support/env.h"
+int grpc_tracer_set_enabled(const char *name, int enabled);
+
typedef struct tracer {
const char *name;
- int *flag;
+ grpc_tracer_flag *flag;
struct tracer *next;
} tracer;
static tracer *tracers;
-void grpc_register_tracer(const char *name, int *flag) {
+#ifdef GRPC_THREADSAFE_TRACER
+#define TRACER_SET(flag, on) gpr_atm_no_barrier_store(&(flag).value, (on))
+#else
+#define TRACER_SET(flag, on) (flag).value = (on)
+#endif
+
+void grpc_register_tracer(const char *name, grpc_tracer_flag *flag) {
tracer *t = gpr_malloc(sizeof(*t));
t->name = name;
t->flag = flag;
t->next = tracers;
- *flag = 0;
+ TRACER_SET(*flag, false);
tracers = t;
}
@@ -121,13 +128,13 @@ int grpc_tracer_set_enabled(const char *name, int enabled) {
tracer *t;
if (0 == strcmp(name, "all")) {
for (t = tracers; t; t = t->next) {
- *t->flag = enabled;
+ TRACER_SET(*t->flag, enabled);
}
} else {
int found = 0;
for (t = tracers; t; t = t->next) {
if (0 == strcmp(name, t->name)) {
- *t->flag = enabled;
+ TRACER_SET(*t->flag, enabled);
found = 1;
}
}
diff --git a/src/core/lib/debug/trace.h b/src/core/lib/debug/trace.h
index 7afc38db7e..ba432574d0 100644
--- a/src/core/lib/debug/trace.h
+++ b/src/core/lib/debug/trace.h
@@ -34,9 +34,35 @@
#ifndef GRPC_CORE_LIB_DEBUG_TRACE_H
#define GRPC_CORE_LIB_DEBUG_TRACE_H
+#include <grpc/support/atm.h>
#include <grpc/support/port_platform.h>
+#include <stdbool.h>
-void grpc_register_tracer(const char *name, int *flag);
+#if defined(__has_feature)
+#if __has_feature(thread_sanitizer)
+#define GRPC_THREADSAFE_TRACER
+#endif
+#endif
+
+typedef struct {
+#ifdef GRPC_THREADSAFE_TRACER
+ gpr_atm value;
+#else
+ bool value;
+#endif
+} grpc_tracer_flag;
+
+#ifdef GRPC_THREADSAFE_TRACER
+#define GRPC_TRACER_ON(flag) (gpr_atm_no_barrier_load(&(flag).value) != 0)
+#define GRPC_TRACER_INITIALIZER(on) \
+ { (gpr_atm)(on) }
+#else
+#define GRPC_TRACER_ON(flag) ((flag).value)
+#define GRPC_TRACER_INITIALIZER(on) \
+ { (on) }
+#endif
+
+void grpc_register_tracer(const char *name, grpc_tracer_flag *flag);
void grpc_tracer_init(const char *env_var_name);
void grpc_tracer_shutdown(void);
diff --git a/src/core/lib/http/httpcli.c b/src/core/lib/http/httpcli.c
index 453a64b049..0ac2c2ad52 100644
--- a/src/core/lib/http/httpcli.c
+++ b/src/core/lib/http/httpcli.c
@@ -105,7 +105,7 @@ static void finish(grpc_exec_ctx *exec_ctx, internal_request *req,
grpc_error *error) {
grpc_polling_entity_del_from_pollset_set(exec_ctx, req->pollent,
req->context->pollset_set);
- grpc_closure_sched(exec_ctx, req->on_done, error);
+ grpc_closure_sched(exec_ctx, req->on_done, GRPC_ERROR_REF(error));
grpc_http_parser_destroy(&req->parser);
if (req->addresses != NULL) {
grpc_resolved_addresses_destroy(req->addresses);
diff --git a/src/core/lib/http/parser.c b/src/core/lib/http/parser.c
index aac506b800..a4357978e4 100644
--- a/src/core/lib/http/parser.c
+++ b/src/core/lib/http/parser.c
@@ -40,7 +40,7 @@
#include <grpc/support/log.h>
#include <grpc/support/useful.h>
-int grpc_http1_trace = 0;
+grpc_tracer_flag grpc_http1_trace = GRPC_TRACER_INITIALIZER(false);
static char *buf2str(void *buffer, size_t length) {
char *out = gpr_malloc(length + 1);
@@ -308,7 +308,7 @@ static grpc_error *addbyte(grpc_http_parser *parser, uint8_t byte,
case GRPC_HTTP_FIRST_LINE:
case GRPC_HTTP_HEADERS:
if (parser->cur_line_length >= GRPC_HTTP_PARSER_MAX_HEADER_LENGTH) {
- if (grpc_http1_trace)
+ if (GRPC_TRACER_ON(grpc_http1_trace))
gpr_log(GPR_ERROR, "HTTP header max line length (%d) exceeded",
GRPC_HTTP_PARSER_MAX_HEADER_LENGTH);
return GRPC_ERROR_CREATE_FROM_STATIC_STRING(
diff --git a/src/core/lib/http/parser.h b/src/core/lib/http/parser.h
index a68011dd43..a155fecf11 100644
--- a/src/core/lib/http/parser.h
+++ b/src/core/lib/http/parser.h
@@ -36,6 +36,7 @@
#include <grpc/slice.h>
#include <grpc/support/port_platform.h>
+#include "src/core/lib/debug/trace.h"
#include "src/core/lib/iomgr/error.h"
/* Maximum length of a header string of the form 'Key: Value\r\n' */
@@ -121,6 +122,6 @@ grpc_error *grpc_http_parser_eof(grpc_http_parser *parser);
void grpc_http_request_destroy(grpc_http_request *request);
void grpc_http_response_destroy(grpc_http_response *response);
-extern int grpc_http1_trace;
+extern grpc_tracer_flag grpc_http1_trace;
#endif /* GRPC_CORE_LIB_HTTP_PARSER_H */
diff --git a/src/core/lib/iomgr/combiner.c b/src/core/lib/iomgr/combiner.c
index 05cdbdad2b..863f22c614 100644
--- a/src/core/lib/iomgr/combiner.c
+++ b/src/core/lib/iomgr/combiner.c
@@ -42,13 +42,13 @@
#include "src/core/lib/iomgr/workqueue.h"
#include "src/core/lib/profiling/timers.h"
-int grpc_combiner_trace = 0;
+grpc_tracer_flag grpc_combiner_trace = GRPC_TRACER_INITIALIZER(false);
-#define GRPC_COMBINER_TRACE(fn) \
- do { \
- if (grpc_combiner_trace) { \
- fn; \
- } \
+#define GRPC_COMBINER_TRACE(fn) \
+ do { \
+ if (GRPC_TRACER_ON(grpc_combiner_trace)) { \
+ fn; \
+ } \
} while (0)
#define STATE_UNORPHANED 1
diff --git a/src/core/lib/iomgr/combiner.h b/src/core/lib/iomgr/combiner.h
index 75dcb0b70a..6ab7a2b26b 100644
--- a/src/core/lib/iomgr/combiner.h
+++ b/src/core/lib/iomgr/combiner.h
@@ -37,6 +37,7 @@
#include <stddef.h>
#include <grpc/support/atm.h>
+#include "src/core/lib/debug/trace.h"
#include "src/core/lib/iomgr/exec_ctx.h"
#include "src/core/lib/support/mpscq.h"
@@ -78,6 +79,6 @@ grpc_closure_scheduler *grpc_combiner_finally_scheduler(grpc_combiner *lock,
bool grpc_combiner_continue_exec_ctx(grpc_exec_ctx *exec_ctx);
-extern int grpc_combiner_trace;
+extern grpc_tracer_flag grpc_combiner_trace;
#endif /* GRPC_CORE_LIB_IOMGR_COMBINER_H */
diff --git a/src/core/lib/iomgr/error.c b/src/core/lib/iomgr/error.c
index 5f2c989aad..685581b5cb 100644
--- a/src/core/lib/iomgr/error.c
+++ b/src/core/lib/iomgr/error.c
@@ -769,7 +769,7 @@ grpc_error *grpc_os_error(const char *file, int line, int err,
GRPC_ERROR_INT_ERRNO, err),
GRPC_ERROR_STR_OS_ERROR,
grpc_slice_from_static_string(strerror(err))),
- GRPC_ERROR_STR_SYSCALL, grpc_slice_from_static_string(call_name));
+ GRPC_ERROR_STR_SYSCALL, grpc_slice_from_copied_string(call_name));
}
#ifdef GPR_WINDOWS
diff --git a/src/core/lib/iomgr/ev_epoll1_linux.c b/src/core/lib/iomgr/ev_epoll1_linux.c
new file mode 100644
index 0000000000..ad69f808cd
--- /dev/null
+++ b/src/core/lib/iomgr/ev_epoll1_linux.c
@@ -0,0 +1,984 @@
+/*
+ *
+ * Copyright 2017, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "src/core/lib/iomgr/port.h"
+
+/* This polling engine is only relevant on linux kernels supporting epoll() */
+#ifdef GRPC_LINUX_EPOLL
+
+#include "src/core/lib/iomgr/ev_epoll1_linux.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <poll.h>
+#include <pthread.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include <grpc/support/alloc.h>
+#include <grpc/support/cpu.h>
+#include <grpc/support/log.h>
+#include <grpc/support/string_util.h>
+#include <grpc/support/tls.h>
+#include <grpc/support/useful.h>
+
+#include "src/core/lib/iomgr/ev_posix.h"
+#include "src/core/lib/iomgr/iomgr_internal.h"
+#include "src/core/lib/iomgr/lockfree_event.h"
+#include "src/core/lib/iomgr/wakeup_fd_posix.h"
+#include "src/core/lib/iomgr/workqueue.h"
+#include "src/core/lib/profiling/timers.h"
+#include "src/core/lib/support/block_annotate.h"
+
+static grpc_wakeup_fd global_wakeup_fd;
+static int g_epfd;
+
+/*******************************************************************************
+ * Fd Declarations
+ */
+
+struct grpc_fd {
+ int fd;
+
+ gpr_atm read_closure;
+ gpr_atm write_closure;
+
+ struct grpc_fd *freelist_next;
+
+ /* The pollset that last noticed that the fd is readable. The actual type
+ * stored in this is (grpc_pollset *) */
+ gpr_atm read_notifier_pollset;
+
+ grpc_iomgr_object iomgr_object;
+};
+
+static void fd_global_init(void);
+static void fd_global_shutdown(void);
+
+/*******************************************************************************
+ * Pollset Declarations
+ */
+
+typedef enum { UNKICKED, KICKED, DESIGNATED_POLLER } kick_state;
+
+struct grpc_pollset_worker {
+ kick_state kick_state;
+ bool initialized_cv;
+ grpc_pollset_worker *next;
+ grpc_pollset_worker *prev;
+ gpr_cv cv;
+ grpc_closure_list schedule_on_end_work;
+};
+
+#define MAX_NEIGHBOURHOODS 1024
+
+typedef struct pollset_neighbourhood {
+ gpr_mu mu;
+ grpc_pollset *active_root;
+ char pad[GPR_CACHELINE_SIZE];
+} pollset_neighbourhood;
+
+struct grpc_pollset {
+ gpr_mu mu;
+ pollset_neighbourhood *neighbourhood;
+ bool reassigning_neighbourhood;
+ grpc_pollset_worker *root_worker;
+ bool kicked_without_poller;
+ bool seen_inactive;
+ bool shutting_down; /* Is the pollset shutting down ? */
+ bool finish_shutdown_called; /* Is the 'finish_shutdown_locked()' called ? */
+ grpc_closure *shutdown_closure; /* Called after after shutdown is complete */
+ int begin_refs;
+
+ grpc_pollset *next;
+ grpc_pollset *prev;
+};
+
+/*******************************************************************************
+ * Pollset-set Declarations
+ */
+
+struct grpc_pollset_set {};
+
+/*******************************************************************************
+ * Common helpers
+ */
+
+static bool append_error(grpc_error **composite, grpc_error *error,
+ const char *desc) {
+ if (error == GRPC_ERROR_NONE) return true;
+ if (*composite == GRPC_ERROR_NONE) {
+ *composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc);
+ }
+ *composite = grpc_error_add_child(*composite, error);
+ return false;
+}
+
+/*******************************************************************************
+ * Fd Definitions
+ */
+
+/* We need to keep a freelist not because of any concerns of malloc performance
+ * but instead so that implementations with multiple threads in (for example)
+ * epoll_wait deal with the race between pollset removal and incoming poll
+ * notifications.
+ *
+ * The problem is that the poller ultimately holds a reference to this
+ * object, so it is very difficult to know when is safe to free it, at least
+ * without some expensive synchronization.
+ *
+ * If we keep the object freelisted, in the worst case losing this race just
+ * becomes a spurious read notification on a reused fd.
+ */
+
+/* The alarm system needs to be able to wakeup 'some poller' sometimes
+ * (specifically when a new alarm needs to be triggered earlier than the next
+ * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a
+ * case occurs. */
+
+static grpc_fd *fd_freelist = NULL;
+static gpr_mu fd_freelist_mu;
+
+static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); }
+
+static void fd_global_shutdown(void) {
+ gpr_mu_lock(&fd_freelist_mu);
+ gpr_mu_unlock(&fd_freelist_mu);
+ while (fd_freelist != NULL) {
+ grpc_fd *fd = fd_freelist;
+ fd_freelist = fd_freelist->freelist_next;
+ gpr_free(fd);
+ }
+ gpr_mu_destroy(&fd_freelist_mu);
+}
+
+static grpc_fd *fd_create(int fd, const char *name) {
+ grpc_fd *new_fd = NULL;
+
+ gpr_mu_lock(&fd_freelist_mu);
+ if (fd_freelist != NULL) {
+ new_fd = fd_freelist;
+ fd_freelist = fd_freelist->freelist_next;
+ }
+ gpr_mu_unlock(&fd_freelist_mu);
+
+ if (new_fd == NULL) {
+ new_fd = gpr_malloc(sizeof(grpc_fd));
+ }
+
+ new_fd->fd = fd;
+ grpc_lfev_init(&new_fd->read_closure);
+ grpc_lfev_init(&new_fd->write_closure);
+ gpr_atm_no_barrier_store(&new_fd->read_notifier_pollset, (gpr_atm)NULL);
+
+ new_fd->freelist_next = NULL;
+
+ char *fd_name;
+ gpr_asprintf(&fd_name, "%s fd=%d", name, fd);
+ grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name);
+#ifdef GRPC_FD_REF_COUNT_DEBUG
+ gpr_log(GPR_DEBUG, "FD %d %p create %s", fd, (void *)new_fd, fd_name);
+#endif
+ gpr_free(fd_name);
+
+ struct epoll_event ev = {.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET),
+ .data.ptr = new_fd};
+ if (epoll_ctl(g_epfd, EPOLL_CTL_ADD, fd, &ev) != 0) {
+ gpr_log(GPR_ERROR, "epoll_ctl failed: %s", strerror(errno));
+ }
+
+ return new_fd;
+}
+
+static int fd_wrapped_fd(grpc_fd *fd) { return fd->fd; }
+
+/* Might be called multiple times */
+static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_error *why) {
+ if (grpc_lfev_set_shutdown(exec_ctx, &fd->read_closure,
+ GRPC_ERROR_REF(why))) {
+ shutdown(fd->fd, SHUT_RDWR);
+ grpc_lfev_set_shutdown(exec_ctx, &fd->write_closure, GRPC_ERROR_REF(why));
+ }
+ GRPC_ERROR_UNREF(why);
+}
+
+static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
+ grpc_closure *on_done, int *release_fd,
+ const char *reason) {
+ grpc_error *error = GRPC_ERROR_NONE;
+
+ if (!grpc_lfev_is_shutdown(&fd->read_closure)) {
+ fd_shutdown(exec_ctx, fd, GRPC_ERROR_CREATE_FROM_COPIED_STRING(reason));
+ }
+
+ /* If release_fd is not NULL, we should be relinquishing control of the file
+ descriptor fd->fd (but we still own the grpc_fd structure). */
+ if (release_fd != NULL) {
+ *release_fd = fd->fd;
+ } else {
+ close(fd->fd);
+ }
+
+ grpc_closure_sched(exec_ctx, on_done, GRPC_ERROR_REF(error));
+
+ grpc_iomgr_unregister_object(&fd->iomgr_object);
+ grpc_lfev_destroy(&fd->read_closure);
+ grpc_lfev_destroy(&fd->write_closure);
+
+ gpr_mu_lock(&fd_freelist_mu);
+ fd->freelist_next = fd_freelist;
+ fd_freelist = fd;
+ gpr_mu_unlock(&fd_freelist_mu);
+}
+
+static grpc_pollset *fd_get_read_notifier_pollset(grpc_exec_ctx *exec_ctx,
+ grpc_fd *fd) {
+ gpr_atm notifier = gpr_atm_acq_load(&fd->read_notifier_pollset);
+ return (grpc_pollset *)notifier;
+}
+
+static bool fd_is_shutdown(grpc_fd *fd) {
+ return grpc_lfev_is_shutdown(&fd->read_closure);
+}
+
+static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
+ grpc_closure *closure) {
+ grpc_lfev_notify_on(exec_ctx, &fd->read_closure, closure);
+}
+
+static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
+ grpc_closure *closure) {
+ grpc_lfev_notify_on(exec_ctx, &fd->write_closure, closure);
+}
+
+static grpc_workqueue *fd_get_workqueue(grpc_fd *fd) {
+ return (grpc_workqueue *)0xb0b51ed;
+}
+
+static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
+ grpc_pollset *notifier) {
+ grpc_lfev_set_ready(exec_ctx, &fd->read_closure);
+
+ /* Note, it is possible that fd_become_readable might be called twice with
+ different 'notifier's when an fd becomes readable and it is in two epoll
+ sets (This can happen briefly during polling island merges). In such cases
+ it does not really matter which notifer is set as the read_notifier_pollset
+ (They would both point to the same polling island anyway) */
+ /* Use release store to match with acquire load in fd_get_read_notifier */
+ gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier);
+}
+
+static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) {
+ grpc_lfev_set_ready(exec_ctx, &fd->write_closure);
+}
+
+/*******************************************************************************
+ * Pollset Definitions
+ */
+
+GPR_TLS_DECL(g_current_thread_pollset);
+GPR_TLS_DECL(g_current_thread_worker);
+static gpr_atm g_active_poller;
+static pollset_neighbourhood *g_neighbourhoods;
+static size_t g_num_neighbourhoods;
+static gpr_mu g_wq_mu;
+static grpc_closure_list g_wq_items;
+
+/* Return true if first in list */
+static bool worker_insert(grpc_pollset *pollset, grpc_pollset_worker *worker) {
+ if (pollset->root_worker == NULL) {
+ pollset->root_worker = worker;
+ worker->next = worker->prev = worker;
+ return true;
+ } else {
+ worker->next = pollset->root_worker;
+ worker->prev = worker->next->prev;
+ worker->next->prev = worker;
+ worker->prev->next = worker;
+ return false;
+ }
+}
+
+/* Return true if last in list */
+typedef enum { EMPTIED, NEW_ROOT, REMOVED } worker_remove_result;
+
+static worker_remove_result worker_remove(grpc_pollset *pollset,
+ grpc_pollset_worker *worker) {
+ if (worker == pollset->root_worker) {
+ if (worker == worker->next) {
+ pollset->root_worker = NULL;
+ return EMPTIED;
+ } else {
+ pollset->root_worker = worker->next;
+ worker->prev->next = worker->next;
+ worker->next->prev = worker->prev;
+ return NEW_ROOT;
+ }
+ } else {
+ worker->prev->next = worker->next;
+ worker->next->prev = worker->prev;
+ return REMOVED;
+ }
+}
+
+static size_t choose_neighbourhood(void) {
+ return (size_t)gpr_cpu_current_cpu() % g_num_neighbourhoods;
+}
+
+static grpc_error *pollset_global_init(void) {
+ gpr_tls_init(&g_current_thread_pollset);
+ gpr_tls_init(&g_current_thread_worker);
+ gpr_atm_no_barrier_store(&g_active_poller, 0);
+ global_wakeup_fd.read_fd = -1;
+ grpc_error *err = grpc_wakeup_fd_init(&global_wakeup_fd);
+ gpr_mu_init(&g_wq_mu);
+ g_wq_items = (grpc_closure_list)GRPC_CLOSURE_LIST_INIT;
+ if (err != GRPC_ERROR_NONE) return err;
+ struct epoll_event ev = {.events = (uint32_t)(EPOLLIN | EPOLLET),
+ .data.ptr = &global_wakeup_fd};
+ if (epoll_ctl(g_epfd, EPOLL_CTL_ADD, global_wakeup_fd.read_fd, &ev) != 0) {
+ return GRPC_OS_ERROR(errno, "epoll_ctl");
+ }
+ g_num_neighbourhoods = GPR_CLAMP(gpr_cpu_num_cores(), 1, MAX_NEIGHBOURHOODS);
+ g_neighbourhoods =
+ gpr_zalloc(sizeof(*g_neighbourhoods) * g_num_neighbourhoods);
+ for (size_t i = 0; i < g_num_neighbourhoods; i++) {
+ gpr_mu_init(&g_neighbourhoods[i].mu);
+ }
+ return GRPC_ERROR_NONE;
+}
+
+static void pollset_global_shutdown(void) {
+ gpr_tls_destroy(&g_current_thread_pollset);
+ gpr_tls_destroy(&g_current_thread_worker);
+ gpr_mu_destroy(&g_wq_mu);
+ if (global_wakeup_fd.read_fd != -1) grpc_wakeup_fd_destroy(&global_wakeup_fd);
+ for (size_t i = 0; i < g_num_neighbourhoods; i++) {
+ gpr_mu_destroy(&g_neighbourhoods[i].mu);
+ }
+ gpr_free(g_neighbourhoods);
+}
+
+static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) {
+ gpr_mu_init(&pollset->mu);
+ *mu = &pollset->mu;
+ pollset->neighbourhood = &g_neighbourhoods[choose_neighbourhood()];
+ pollset->seen_inactive = true;
+}
+
+static void pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) {
+ gpr_mu_lock(&pollset->mu);
+ if (!pollset->seen_inactive) {
+ pollset_neighbourhood *neighbourhood = pollset->neighbourhood;
+ gpr_mu_unlock(&pollset->mu);
+ retry_lock_neighbourhood:
+ gpr_mu_lock(&neighbourhood->mu);
+ gpr_mu_lock(&pollset->mu);
+ if (!pollset->seen_inactive) {
+ if (pollset->neighbourhood != neighbourhood) {
+ gpr_mu_unlock(&neighbourhood->mu);
+ neighbourhood = pollset->neighbourhood;
+ gpr_mu_unlock(&pollset->mu);
+ goto retry_lock_neighbourhood;
+ }
+ pollset->prev->next = pollset->next;
+ pollset->next->prev = pollset->prev;
+ if (pollset == pollset->neighbourhood->active_root) {
+ pollset->neighbourhood->active_root =
+ pollset->next == pollset ? NULL : pollset->next;
+ }
+ }
+ gpr_mu_unlock(&pollset->neighbourhood->mu);
+ }
+ gpr_mu_unlock(&pollset->mu);
+ gpr_mu_destroy(&pollset->mu);
+}
+
+static grpc_error *pollset_kick_all(grpc_pollset *pollset) {
+ grpc_error *error = GRPC_ERROR_NONE;
+ if (pollset->root_worker != NULL) {
+ grpc_pollset_worker *worker = pollset->root_worker;
+ do {
+ if (worker->initialized_cv) {
+ worker->kick_state = KICKED;
+ gpr_cv_signal(&worker->cv);
+ } else {
+ worker->kick_state = KICKED;
+ append_error(&error, grpc_wakeup_fd_wakeup(&global_wakeup_fd),
+ "pollset_shutdown");
+ }
+
+ worker = worker->next;
+ } while (worker != pollset->root_worker);
+ }
+ return error;
+}
+
+static void pollset_maybe_finish_shutdown(grpc_exec_ctx *exec_ctx,
+ grpc_pollset *pollset) {
+ if (pollset->shutdown_closure != NULL && pollset->root_worker == NULL &&
+ pollset->begin_refs == 0) {
+ grpc_closure_sched(exec_ctx, pollset->shutdown_closure, GRPC_ERROR_NONE);
+ pollset->shutdown_closure = NULL;
+ }
+}
+
+static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
+ grpc_closure *closure) {
+ GPR_ASSERT(pollset->shutdown_closure == NULL);
+ pollset->shutdown_closure = closure;
+ GRPC_LOG_IF_ERROR("pollset_shutdown", pollset_kick_all(pollset));
+ pollset_maybe_finish_shutdown(exec_ctx, pollset);
+}
+
+#define MAX_EPOLL_EVENTS 100
+
+static int poll_deadline_to_millis_timeout(gpr_timespec deadline,
+ gpr_timespec now) {
+ gpr_timespec timeout;
+ if (gpr_time_cmp(deadline, gpr_inf_future(deadline.clock_type)) == 0) {
+ return -1;
+ }
+
+ if (gpr_time_cmp(deadline, now) <= 0) {
+ return 0;
+ }
+
+ static const gpr_timespec round_up = {
+ .clock_type = GPR_TIMESPAN, .tv_sec = 0, .tv_nsec = GPR_NS_PER_MS - 1};
+ timeout = gpr_time_sub(deadline, now);
+ int millis = gpr_time_to_millis(gpr_time_add(timeout, round_up));
+ return millis >= 1 ? millis : 1;
+}
+
+static grpc_error *pollset_epoll(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
+ gpr_timespec now, gpr_timespec deadline) {
+ struct epoll_event events[MAX_EPOLL_EVENTS];
+ static const char *err_desc = "pollset_poll";
+
+ int timeout = poll_deadline_to_millis_timeout(deadline, now);
+
+ if (timeout != 0) {
+ GRPC_SCHEDULING_START_BLOCKING_REGION;
+ }
+ int r;
+ do {
+ r = epoll_wait(g_epfd, events, MAX_EPOLL_EVENTS, timeout);
+ } while (r < 0 && errno == EINTR);
+ if (timeout != 0) {
+ GRPC_SCHEDULING_END_BLOCKING_REGION;
+ }
+
+ if (r < 0) return GRPC_OS_ERROR(errno, "epoll_wait");
+
+ grpc_error *error = GRPC_ERROR_NONE;
+ for (int i = 0; i < r; i++) {
+ void *data_ptr = events[i].data.ptr;
+ if (data_ptr == &global_wakeup_fd) {
+ gpr_mu_lock(&g_wq_mu);
+ grpc_closure_list_move(&g_wq_items, &exec_ctx->closure_list);
+ gpr_mu_unlock(&g_wq_mu);
+ append_error(&error, grpc_wakeup_fd_consume_wakeup(&global_wakeup_fd),
+ err_desc);
+ } else {
+ grpc_fd *fd = (grpc_fd *)(data_ptr);
+ bool cancel = (events[i].events & (EPOLLERR | EPOLLHUP)) != 0;
+ bool read_ev = (events[i].events & (EPOLLIN | EPOLLPRI)) != 0;
+ bool write_ev = (events[i].events & EPOLLOUT) != 0;
+ if (read_ev || cancel) {
+ fd_become_readable(exec_ctx, fd, pollset);
+ }
+ if (write_ev || cancel) {
+ fd_become_writable(exec_ctx, fd);
+ }
+ }
+ }
+
+ return error;
+}
+
+static bool begin_worker(grpc_pollset *pollset, grpc_pollset_worker *worker,
+ grpc_pollset_worker **worker_hdl, gpr_timespec *now,
+ gpr_timespec deadline) {
+ if (worker_hdl != NULL) *worker_hdl = worker;
+ worker->initialized_cv = false;
+ worker->kick_state = UNKICKED;
+ worker->schedule_on_end_work = (grpc_closure_list)GRPC_CLOSURE_LIST_INIT;
+ pollset->begin_refs++;
+
+ if (pollset->seen_inactive) {
+ // pollset has been observed to be inactive, we need to move back to the
+ // active list
+ bool is_reassigning = false;
+ if (!pollset->reassigning_neighbourhood) {
+ is_reassigning = true;
+ pollset->reassigning_neighbourhood = true;
+ pollset->neighbourhood = &g_neighbourhoods[choose_neighbourhood()];
+ }
+ pollset_neighbourhood *neighbourhood = pollset->neighbourhood;
+ gpr_mu_unlock(&pollset->mu);
+ // pollset unlocked: state may change (even worker->kick_state)
+ retry_lock_neighbourhood:
+ gpr_mu_lock(&neighbourhood->mu);
+ gpr_mu_lock(&pollset->mu);
+ if (pollset->seen_inactive) {
+ if (neighbourhood != pollset->neighbourhood) {
+ gpr_mu_unlock(&neighbourhood->mu);
+ neighbourhood = pollset->neighbourhood;
+ gpr_mu_unlock(&pollset->mu);
+ goto retry_lock_neighbourhood;
+ }
+ pollset->seen_inactive = false;
+ if (neighbourhood->active_root == NULL) {
+ neighbourhood->active_root = pollset->next = pollset->prev = pollset;
+ if (gpr_atm_no_barrier_cas(&g_active_poller, 0, (gpr_atm)worker)) {
+ worker->kick_state = DESIGNATED_POLLER;
+ }
+ } else {
+ pollset->next = neighbourhood->active_root;
+ pollset->prev = pollset->next->prev;
+ pollset->next->prev = pollset->prev->next = pollset;
+ }
+ }
+ if (is_reassigning) {
+ GPR_ASSERT(pollset->reassigning_neighbourhood);
+ pollset->reassigning_neighbourhood = false;
+ }
+ gpr_mu_unlock(&neighbourhood->mu);
+ }
+ worker_insert(pollset, worker);
+ pollset->begin_refs--;
+ if (worker->kick_state == UNKICKED) {
+ GPR_ASSERT(gpr_atm_no_barrier_load(&g_active_poller) != (gpr_atm)worker);
+ worker->initialized_cv = true;
+ gpr_cv_init(&worker->cv);
+ while (worker->kick_state == UNKICKED &&
+ pollset->shutdown_closure == NULL) {
+ if (gpr_cv_wait(&worker->cv, &pollset->mu, deadline) &&
+ worker->kick_state == UNKICKED) {
+ worker->kick_state = KICKED;
+ }
+ }
+ *now = gpr_now(now->clock_type);
+ }
+
+ return worker->kick_state == DESIGNATED_POLLER &&
+ pollset->shutdown_closure == NULL;
+}
+
+static bool check_neighbourhood_for_available_poller(
+ pollset_neighbourhood *neighbourhood) {
+ bool found_worker = false;
+ do {
+ grpc_pollset *inspect = neighbourhood->active_root;
+ if (inspect == NULL) {
+ break;
+ }
+ gpr_mu_lock(&inspect->mu);
+ GPR_ASSERT(!inspect->seen_inactive);
+ grpc_pollset_worker *inspect_worker = inspect->root_worker;
+ if (inspect_worker != NULL) {
+ do {
+ switch (inspect_worker->kick_state) {
+ case UNKICKED:
+ if (gpr_atm_no_barrier_cas(&g_active_poller, 0,
+ (gpr_atm)inspect_worker)) {
+ inspect_worker->kick_state = DESIGNATED_POLLER;
+ if (inspect_worker->initialized_cv) {
+ gpr_cv_signal(&inspect_worker->cv);
+ }
+ }
+ // even if we didn't win the cas, there's a worker, we can stop
+ found_worker = true;
+ break;
+ case KICKED:
+ break;
+ case DESIGNATED_POLLER:
+ found_worker = true; // ok, so someone else found the worker, but
+ // we'll accept that
+ break;
+ }
+ inspect_worker = inspect_worker->next;
+ } while (inspect_worker != inspect->root_worker);
+ }
+ if (!found_worker) {
+ inspect->seen_inactive = true;
+ if (inspect == neighbourhood->active_root) {
+ neighbourhood->active_root =
+ inspect->next == inspect ? NULL : inspect->next;
+ }
+ inspect->next->prev = inspect->prev;
+ inspect->prev->next = inspect->next;
+ inspect->next = inspect->prev = NULL;
+ }
+ gpr_mu_unlock(&inspect->mu);
+ } while (!found_worker);
+ return found_worker;
+}
+
+static void end_worker(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
+ grpc_pollset_worker *worker,
+ grpc_pollset_worker **worker_hdl) {
+ if (worker_hdl != NULL) *worker_hdl = NULL;
+ worker->kick_state = KICKED;
+ grpc_closure_list_move(&worker->schedule_on_end_work,
+ &exec_ctx->closure_list);
+ if (gpr_atm_no_barrier_load(&g_active_poller) == (gpr_atm)worker) {
+ if (worker->next != worker && worker->next->kick_state == UNKICKED) {
+ GPR_ASSERT(worker->next->initialized_cv);
+ gpr_atm_no_barrier_store(&g_active_poller, (gpr_atm)worker->next);
+ worker->next->kick_state = DESIGNATED_POLLER;
+ gpr_cv_signal(&worker->next->cv);
+ if (grpc_exec_ctx_has_work(exec_ctx)) {
+ gpr_mu_unlock(&pollset->mu);
+ grpc_exec_ctx_flush(exec_ctx);
+ gpr_mu_lock(&pollset->mu);
+ }
+ } else {
+ gpr_atm_no_barrier_store(&g_active_poller, 0);
+ gpr_mu_unlock(&pollset->mu);
+ size_t poller_neighbourhood_idx =
+ (size_t)(pollset->neighbourhood - g_neighbourhoods);
+ bool found_worker = false;
+ bool scan_state[MAX_NEIGHBOURHOODS];
+ for (size_t i = 0; !found_worker && i < g_num_neighbourhoods; i++) {
+ pollset_neighbourhood *neighbourhood =
+ &g_neighbourhoods[(poller_neighbourhood_idx + i) %
+ g_num_neighbourhoods];
+ if (gpr_mu_trylock(&neighbourhood->mu)) {
+ found_worker =
+ check_neighbourhood_for_available_poller(neighbourhood);
+ gpr_mu_unlock(&neighbourhood->mu);
+ scan_state[i] = true;
+ } else {
+ scan_state[i] = false;
+ }
+ }
+ for (size_t i = 0; !found_worker && i < g_num_neighbourhoods; i++) {
+ if (scan_state[i]) continue;
+ pollset_neighbourhood *neighbourhood =
+ &g_neighbourhoods[(poller_neighbourhood_idx + i) %
+ g_num_neighbourhoods];
+ gpr_mu_lock(&neighbourhood->mu);
+ found_worker = check_neighbourhood_for_available_poller(neighbourhood);
+ gpr_mu_unlock(&neighbourhood->mu);
+ }
+ grpc_exec_ctx_flush(exec_ctx);
+ gpr_mu_lock(&pollset->mu);
+ }
+ } else if (grpc_exec_ctx_has_work(exec_ctx)) {
+ gpr_mu_unlock(&pollset->mu);
+ grpc_exec_ctx_flush(exec_ctx);
+ gpr_mu_lock(&pollset->mu);
+ }
+ if (worker->initialized_cv) {
+ gpr_cv_destroy(&worker->cv);
+ }
+ if (EMPTIED == worker_remove(pollset, worker)) {
+ pollset_maybe_finish_shutdown(exec_ctx, pollset);
+ }
+ GPR_ASSERT(gpr_atm_no_barrier_load(&g_active_poller) != (gpr_atm)worker);
+}
+
+/* pollset->po.mu lock must be held by the caller before calling this.
+ The function pollset_work() may temporarily release the lock (pollset->po.mu)
+ during the course of its execution but it will always re-acquire the lock and
+ ensure that it is held by the time the function returns */
+static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
+ grpc_pollset_worker **worker_hdl,
+ gpr_timespec now, gpr_timespec deadline) {
+ grpc_pollset_worker worker;
+ grpc_error *error = GRPC_ERROR_NONE;
+ static const char *err_desc = "pollset_work";
+ if (pollset->kicked_without_poller) {
+ pollset->kicked_without_poller = false;
+ return GRPC_ERROR_NONE;
+ }
+ gpr_tls_set(&g_current_thread_pollset, (intptr_t)pollset);
+ if (begin_worker(pollset, &worker, worker_hdl, &now, deadline)) {
+ gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker);
+ GPR_ASSERT(!pollset->shutdown_closure);
+ GPR_ASSERT(!pollset->seen_inactive);
+ gpr_mu_unlock(&pollset->mu);
+ append_error(&error, pollset_epoll(exec_ctx, pollset, now, deadline),
+ err_desc);
+ gpr_mu_lock(&pollset->mu);
+ gpr_tls_set(&g_current_thread_worker, 0);
+ }
+ end_worker(exec_ctx, pollset, &worker, worker_hdl);
+ gpr_tls_set(&g_current_thread_pollset, 0);
+ return error;
+}
+
+static grpc_error *pollset_kick(grpc_pollset *pollset,
+ grpc_pollset_worker *specific_worker) {
+ if (specific_worker == NULL) {
+ if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)pollset) {
+ grpc_pollset_worker *root_worker = pollset->root_worker;
+ if (root_worker == NULL) {
+ pollset->kicked_without_poller = true;
+ return GRPC_ERROR_NONE;
+ }
+ grpc_pollset_worker *next_worker = root_worker->next;
+ if (root_worker == next_worker &&
+ root_worker == (grpc_pollset_worker *)gpr_atm_no_barrier_load(
+ &g_active_poller)) {
+ root_worker->kick_state = KICKED;
+ return grpc_wakeup_fd_wakeup(&global_wakeup_fd);
+ } else if (next_worker->kick_state == UNKICKED) {
+ GPR_ASSERT(next_worker->initialized_cv);
+ next_worker->kick_state = KICKED;
+ gpr_cv_signal(&next_worker->cv);
+ return GRPC_ERROR_NONE;
+ } else {
+ return GRPC_ERROR_NONE;
+ }
+ } else {
+ return GRPC_ERROR_NONE;
+ }
+ } else if (specific_worker->kick_state == KICKED) {
+ return GRPC_ERROR_NONE;
+ } else if (gpr_tls_get(&g_current_thread_worker) ==
+ (intptr_t)specific_worker) {
+ specific_worker->kick_state = KICKED;
+ return GRPC_ERROR_NONE;
+ } else if (specific_worker ==
+ (grpc_pollset_worker *)gpr_atm_no_barrier_load(&g_active_poller)) {
+ specific_worker->kick_state = KICKED;
+ return grpc_wakeup_fd_wakeup(&global_wakeup_fd);
+ } else if (specific_worker->initialized_cv) {
+ specific_worker->kick_state = KICKED;
+ gpr_cv_signal(&specific_worker->cv);
+ return GRPC_ERROR_NONE;
+ } else {
+ specific_worker->kick_state = KICKED;
+ return GRPC_ERROR_NONE;
+ }
+}
+
+static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
+ grpc_fd *fd) {}
+
+/*******************************************************************************
+ * Workqueue Definitions
+ */
+
+#ifdef GRPC_WORKQUEUE_REFCOUNT_DEBUG
+static grpc_workqueue *workqueue_ref(grpc_workqueue *workqueue,
+ const char *file, int line,
+ const char *reason) {
+ return workqueue;
+}
+
+static void workqueue_unref(grpc_exec_ctx *exec_ctx, grpc_workqueue *workqueue,
+ const char *file, int line, const char *reason) {}
+#else
+static grpc_workqueue *workqueue_ref(grpc_workqueue *workqueue) {
+ return workqueue;
+}
+
+static void workqueue_unref(grpc_exec_ctx *exec_ctx,
+ grpc_workqueue *workqueue) {}
+#endif
+
+static void wq_sched(grpc_exec_ctx *exec_ctx, grpc_closure *closure,
+ grpc_error *error) {
+ // find a neighbourhood to wakeup
+ bool scheduled = false;
+ size_t initial_neighbourhood = choose_neighbourhood();
+ for (size_t i = 0; !scheduled && i < g_num_neighbourhoods; i++) {
+ pollset_neighbourhood *neighbourhood =
+ &g_neighbourhoods[(initial_neighbourhood + i) % g_num_neighbourhoods];
+ if (gpr_mu_trylock(&neighbourhood->mu)) {
+ if (neighbourhood->active_root != NULL) {
+ grpc_pollset *inspect = neighbourhood->active_root;
+ do {
+ if (gpr_mu_trylock(&inspect->mu)) {
+ if (inspect->root_worker != NULL) {
+ grpc_pollset_worker *inspect_worker = inspect->root_worker;
+ do {
+ if (inspect_worker->kick_state == UNKICKED) {
+ inspect_worker->kick_state = KICKED;
+ grpc_closure_list_append(
+ &inspect_worker->schedule_on_end_work, closure, error);
+ if (inspect_worker->initialized_cv) {
+ gpr_cv_signal(&inspect_worker->cv);
+ }
+ scheduled = true;
+ }
+ inspect_worker = inspect_worker->next;
+ } while (!scheduled && inspect_worker != inspect->root_worker);
+ }
+ gpr_mu_unlock(&inspect->mu);
+ }
+ inspect = inspect->next;
+ } while (!scheduled && inspect != neighbourhood->active_root);
+ }
+ gpr_mu_unlock(&neighbourhood->mu);
+ }
+ }
+ if (!scheduled) {
+ gpr_mu_lock(&g_wq_mu);
+ grpc_closure_list_append(&g_wq_items, closure, error);
+ gpr_mu_unlock(&g_wq_mu);
+ GRPC_LOG_IF_ERROR("workqueue_scheduler",
+ grpc_wakeup_fd_wakeup(&global_wakeup_fd));
+ }
+}
+
+static const grpc_closure_scheduler_vtable
+ singleton_workqueue_scheduler_vtable = {wq_sched, wq_sched,
+ "epoll1_workqueue"};
+
+static grpc_closure_scheduler singleton_workqueue_scheduler = {
+ &singleton_workqueue_scheduler_vtable};
+
+static grpc_closure_scheduler *workqueue_scheduler(grpc_workqueue *workqueue) {
+ return &singleton_workqueue_scheduler;
+}
+
+/*******************************************************************************
+ * Pollset-set Definitions
+ */
+
+static grpc_pollset_set *pollset_set_create(void) {
+ return (grpc_pollset_set *)((intptr_t)0xdeafbeef);
+}
+
+static void pollset_set_destroy(grpc_exec_ctx *exec_ctx,
+ grpc_pollset_set *pss) {}
+
+static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss,
+ grpc_fd *fd) {}
+
+static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss,
+ grpc_fd *fd) {}
+
+static void pollset_set_add_pollset(grpc_exec_ctx *exec_ctx,
+ grpc_pollset_set *pss, grpc_pollset *ps) {}
+
+static void pollset_set_del_pollset(grpc_exec_ctx *exec_ctx,
+ grpc_pollset_set *pss, grpc_pollset *ps) {}
+
+static void pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx,
+ grpc_pollset_set *bag,
+ grpc_pollset_set *item) {}
+
+static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx,
+ grpc_pollset_set *bag,
+ grpc_pollset_set *item) {}
+
+/*******************************************************************************
+ * Event engine binding
+ */
+
+static void shutdown_engine(void) {
+ fd_global_shutdown();
+ pollset_global_shutdown();
+}
+
+static const grpc_event_engine_vtable vtable = {
+ .pollset_size = sizeof(grpc_pollset),
+
+ .fd_create = fd_create,
+ .fd_wrapped_fd = fd_wrapped_fd,
+ .fd_orphan = fd_orphan,
+ .fd_shutdown = fd_shutdown,
+ .fd_is_shutdown = fd_is_shutdown,
+ .fd_notify_on_read = fd_notify_on_read,
+ .fd_notify_on_write = fd_notify_on_write,
+ .fd_get_read_notifier_pollset = fd_get_read_notifier_pollset,
+ .fd_get_workqueue = fd_get_workqueue,
+
+ .pollset_init = pollset_init,
+ .pollset_shutdown = pollset_shutdown,
+ .pollset_destroy = pollset_destroy,
+ .pollset_work = pollset_work,
+ .pollset_kick = pollset_kick,
+ .pollset_add_fd = pollset_add_fd,
+
+ .pollset_set_create = pollset_set_create,
+ .pollset_set_destroy = pollset_set_destroy,
+ .pollset_set_add_pollset = pollset_set_add_pollset,
+ .pollset_set_del_pollset = pollset_set_del_pollset,
+ .pollset_set_add_pollset_set = pollset_set_add_pollset_set,
+ .pollset_set_del_pollset_set = pollset_set_del_pollset_set,
+ .pollset_set_add_fd = pollset_set_add_fd,
+ .pollset_set_del_fd = pollset_set_del_fd,
+
+ .workqueue_ref = workqueue_ref,
+ .workqueue_unref = workqueue_unref,
+ .workqueue_scheduler = workqueue_scheduler,
+
+ .shutdown_engine = shutdown_engine,
+};
+
+/* It is possible that GLIBC has epoll but the underlying kernel doesn't.
+ * Create a dummy epoll_fd to make sure epoll support is available */
+const grpc_event_engine_vtable *grpc_init_epoll1_linux(bool explicit_request) {
+ /* TODO(ctiller): temporary, until this stabilizes */
+ if (!explicit_request) return NULL;
+
+ if (!grpc_has_wakeup_fd()) {
+ return NULL;
+ }
+
+ g_epfd = epoll_create1(EPOLL_CLOEXEC);
+ if (g_epfd < 0) {
+ gpr_log(GPR_ERROR, "epoll unavailable");
+ return NULL;
+ }
+
+ fd_global_init();
+
+ if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) {
+ close(g_epfd);
+ fd_global_shutdown();
+ return NULL;
+ }
+
+ return &vtable;
+}
+
+#else /* defined(GRPC_LINUX_EPOLL) */
+#if defined(GRPC_POSIX_SOCKET)
+#include "src/core/lib/iomgr/ev_posix.h"
+/* If GRPC_LINUX_EPOLL is not defined, it means epoll is not available. Return
+ * NULL */
+const grpc_event_engine_vtable *grpc_init_epoll1_linux(bool explicit_request) {
+ return NULL;
+}
+#endif /* defined(GRPC_POSIX_SOCKET) */
+#endif /* !defined(GRPC_LINUX_EPOLL) */
diff --git a/src/core/lib/iomgr/ev_epoll1_linux.h b/src/core/lib/iomgr/ev_epoll1_linux.h
new file mode 100644
index 0000000000..bd52478a7c
--- /dev/null
+++ b/src/core/lib/iomgr/ev_epoll1_linux.h
@@ -0,0 +1,44 @@
+/*
+ *
+ * Copyright 2017, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef GRPC_CORE_LIB_IOMGR_EV_EPOLL1_LINUX_H
+#define GRPC_CORE_LIB_IOMGR_EV_EPOLL1_LINUX_H
+
+#include "src/core/lib/iomgr/ev_posix.h"
+#include "src/core/lib/iomgr/port.h"
+
+// a polling engine that utilizes a singleton epoll set and turnstile polling
+
+const grpc_event_engine_vtable *grpc_init_epoll1_linux(bool explicit_request);
+
+#endif /* GRPC_CORE_LIB_IOMGR_EV_EPOLL1_LINUX_H */
diff --git a/src/core/lib/iomgr/ev_epoll_limited_pollers_linux.c b/src/core/lib/iomgr/ev_epoll_limited_pollers_linux.c
new file mode 100644
index 0000000000..d23bf6c06c
--- /dev/null
+++ b/src/core/lib/iomgr/ev_epoll_limited_pollers_linux.c
@@ -0,0 +1,2146 @@
+/*
+ *
+ * Copyright 2017, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "src/core/lib/iomgr/port.h"
+
+/* This polling engine is only relevant on linux kernels supporting epoll() */
+#ifdef GRPC_LINUX_EPOLL
+
+#include "src/core/lib/iomgr/ev_epoll_limited_pollers_linux.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <poll.h>
+#include <pthread.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include <grpc/support/alloc.h>
+#include <grpc/support/log.h>
+#include <grpc/support/string_util.h>
+#include <grpc/support/tls.h>
+#include <grpc/support/useful.h>
+
+#include "src/core/lib/debug/trace.h"
+#include "src/core/lib/iomgr/ev_posix.h"
+#include "src/core/lib/iomgr/iomgr_internal.h"
+#include "src/core/lib/iomgr/lockfree_event.h"
+#include "src/core/lib/iomgr/timer.h"
+#include "src/core/lib/iomgr/wakeup_fd_posix.h"
+#include "src/core/lib/iomgr/workqueue.h"
+#include "src/core/lib/profiling/timers.h"
+#include "src/core/lib/support/block_annotate.h"
+#include "src/core/lib/support/env.h"
+
+#define GRPC_POLLING_TRACE(fmt, ...) \
+ if (GRPC_TRACER_ON(grpc_polling_trace)) { \
+ gpr_log(GPR_INFO, (fmt), __VA_ARGS__); \
+ }
+
+#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker *)1)
+
+/* Uncomment the following to enable extra checks on poll_object operations */
+/* #define PO_DEBUG */
+
+/* The maximum number of polling threads per polling island. By default no
+ limit */
+static int g_max_pollers_per_pi = INT_MAX;
+
+static int grpc_wakeup_signal = -1;
+static bool is_grpc_wakeup_signal_initialized = false;
+
+/* Implements the function defined in grpc_posix.h. This function might be
+ * called before even calling grpc_init() to set either a different signal to
+ * use. If signum == -1, then the use of signals is disabled */
+static void grpc_use_signal(int signum) {
+ grpc_wakeup_signal = signum;
+ is_grpc_wakeup_signal_initialized = true;
+
+ if (grpc_wakeup_signal < 0) {
+ gpr_log(GPR_INFO,
+ "Use of signals is disabled. Epoll engine will not be used");
+ } else {
+ gpr_log(GPR_INFO, "epoll engine will be using signal: %d",
+ grpc_wakeup_signal);
+ }
+}
+
+struct polling_island;
+
+typedef enum {
+ POLL_OBJ_FD,
+ POLL_OBJ_POLLSET,
+ POLL_OBJ_POLLSET_SET
+} poll_obj_type;
+
+typedef struct poll_obj {
+#ifdef PO_DEBUG
+ poll_obj_type obj_type;
+#endif
+ gpr_mu mu;
+ struct polling_island *pi;
+} poll_obj;
+
+static const char *poll_obj_string(poll_obj_type po_type) {
+ switch (po_type) {
+ case POLL_OBJ_FD:
+ return "fd";
+ case POLL_OBJ_POLLSET:
+ return "pollset";
+ case POLL_OBJ_POLLSET_SET:
+ return "pollset_set";
+ }
+
+ GPR_UNREACHABLE_CODE(return "UNKNOWN");
+}
+
+/*******************************************************************************
+ * Fd Declarations
+ */
+
+#define FD_FROM_PO(po) ((grpc_fd *)(po))
+
+struct grpc_fd {
+ poll_obj po;
+
+ int fd;
+ /* refst format:
+ bit 0 : 1=Active / 0=Orphaned
+ bits 1-n : refcount
+ Ref/Unref by two to avoid altering the orphaned bit */
+ gpr_atm refst;
+
+ /* The fd is either closed or we relinquished control of it. In either
+ cases, this indicates that the 'fd' on this structure is no longer
+ valid */
+ bool orphaned;
+
+ gpr_atm read_closure;
+ gpr_atm write_closure;
+
+ struct grpc_fd *freelist_next;
+ grpc_closure *on_done_closure;
+
+ /* The pollset that last noticed that the fd is readable. The actual type
+ * stored in this is (grpc_pollset *) */
+ gpr_atm read_notifier_pollset;
+
+ grpc_iomgr_object iomgr_object;
+};
+
+/* Reference counting for fds */
+// #define GRPC_FD_REF_COUNT_DEBUG
+#ifdef GRPC_FD_REF_COUNT_DEBUG
+static void fd_ref(grpc_fd *fd, const char *reason, const char *file, int line);
+static void fd_unref(grpc_fd *fd, const char *reason, const char *file,
+ int line);
+#define GRPC_FD_REF(fd, reason) fd_ref(fd, reason, __FILE__, __LINE__)
+#define GRPC_FD_UNREF(fd, reason) fd_unref(fd, reason, __FILE__, __LINE__)
+#else
+static void fd_ref(grpc_fd *fd);
+static void fd_unref(grpc_fd *fd);
+#define GRPC_FD_REF(fd, reason) fd_ref(fd)
+#define GRPC_FD_UNREF(fd, reason) fd_unref(fd)
+#endif
+
+static void fd_global_init(void);
+static void fd_global_shutdown(void);
+
+/*******************************************************************************
+ * Polling island Declarations
+ */
+
+#ifdef GRPC_WORKQUEUE_REFCOUNT_DEBUG
+
+#define PI_ADD_REF(p, r) pi_add_ref_dbg((p), (r), __FILE__, __LINE__)
+#define PI_UNREF(exec_ctx, p, r) \
+ pi_unref_dbg((exec_ctx), (p), (r), __FILE__, __LINE__)
+
+#else /* defined(GRPC_WORKQUEUE_REFCOUNT_DEBUG) */
+
+#define PI_ADD_REF(p, r) pi_add_ref((p))
+#define PI_UNREF(exec_ctx, p, r) pi_unref((exec_ctx), (p))
+
+#endif /* !defined(GRPC_PI_REF_COUNT_DEBUG) */
+
+typedef struct worker_node {
+ struct worker_node *next;
+ struct worker_node *prev;
+} worker_node;
+
+/* This is also used as grpc_workqueue (by directly casing it) */
+typedef struct polling_island {
+ grpc_closure_scheduler workqueue_scheduler;
+
+ gpr_mu mu;
+ /* Ref count. Use PI_ADD_REF() and PI_UNREF() macros to increment/decrement
+ the refcount.
+ Once the ref count becomes zero, this structure is destroyed which means
+ we should ensure that there is never a scenario where a PI_ADD_REF() is
+ racing with a PI_UNREF() that just made the ref_count zero. */
+ gpr_atm ref_count;
+
+ /* Pointer to the polling_island this merged into.
+ * merged_to value is only set once in polling_island's lifetime (and that too
+ * only if the island is merged with another island). Because of this, we can
+ * use gpr_atm type here so that we can do atomic access on this and reduce
+ * lock contention on 'mu' mutex.
+ *
+ * Note that if this field is not NULL (i.e not 0), all the remaining fields
+ * (except mu and ref_count) are invalid and must be ignored. */
+ gpr_atm merged_to;
+
+ /* Number of threads currently polling on this island */
+ gpr_atm poller_count;
+ /* Mutex guarding the read end of the workqueue (must be held to pop from
+ * workqueue_items) */
+ gpr_mu workqueue_read_mu;
+ /* Queue of closures to be executed */
+ gpr_mpscq workqueue_items;
+ /* Count of items in workqueue_items */
+ gpr_atm workqueue_item_count;
+ /* Wakeup fd used to wake pollers to check the contents of workqueue_items */
+ grpc_wakeup_fd workqueue_wakeup_fd;
+
+ /* The list of workers waiting to do polling on this polling island */
+ gpr_mu worker_list_mu;
+ worker_node worker_list_head;
+
+ /* The fd of the underlying epoll set */
+ int epoll_fd;
+
+ /* The file descriptors in the epoll set */
+ size_t fd_cnt;
+ size_t fd_capacity;
+ grpc_fd **fds;
+} polling_island;
+
+/*******************************************************************************
+ * Pollset Declarations
+ */
+#define WORKER_FROM_WORKER_LIST_NODE(p) \
+ (struct grpc_pollset_worker *)(((char *)(p)) - \
+ offsetof(grpc_pollset_worker, pi_list_link))
+struct grpc_pollset_worker {
+ /* Thread id of this worker */
+ pthread_t pt_id;
+
+ /* Used to prevent a worker from getting kicked multiple times */
+ gpr_atm is_kicked;
+
+ struct grpc_pollset_worker *next;
+ struct grpc_pollset_worker *prev;
+
+ /* Indicates if it is this worker's turn to do epoll */
+ gpr_atm is_polling_turn;
+
+ /* Node in the polling island's worker list. */
+ worker_node pi_list_link;
+};
+
+struct grpc_pollset {
+ poll_obj po;
+
+ grpc_pollset_worker root_worker;
+ bool kicked_without_pollers;
+
+ bool shutting_down; /* Is the pollset shutting down ? */
+ bool finish_shutdown_called; /* Is the 'finish_shutdown_locked()' called ? */
+ grpc_closure *shutdown_done; /* Called after after shutdown is complete */
+};
+
+/*******************************************************************************
+ * Pollset-set Declarations
+ */
+struct grpc_pollset_set {
+ poll_obj po;
+};
+
+/*******************************************************************************
+ * Common helpers
+ */
+
+static bool append_error(grpc_error **composite, grpc_error *error,
+ const char *desc) {
+ if (error == GRPC_ERROR_NONE) return true;
+ if (*composite == GRPC_ERROR_NONE) {
+ *composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc);
+ }
+ *composite = grpc_error_add_child(*composite, error);
+ return false;
+}
+
+/*******************************************************************************
+ * Polling island Definitions
+ */
+
+/* The wakeup fd that is used to wake up all threads in a Polling island. This
+ is useful in the polling island merge operation where we need to wakeup all
+ the threads currently polling the smaller polling island (so that they can
+ start polling the new/merged polling island)
+
+ NOTE: This fd is initialized to be readable and MUST NOT be consumed i.e the
+ threads that woke up MUST NOT call grpc_wakeup_fd_consume_wakeup() */
+static grpc_wakeup_fd polling_island_wakeup_fd;
+
+/* The polling island being polled right now.
+ See comments in workqueue_maybe_wakeup for why this is tracked. */
+static __thread polling_island *g_current_thread_polling_island;
+
+/* Forward declaration */
+static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi);
+static void workqueue_enqueue(grpc_exec_ctx *exec_ctx, grpc_closure *closure,
+ grpc_error *error);
+
+#ifdef GRPC_TSAN
+/* Currently TSAN may incorrectly flag data races between epoll_ctl and
+ epoll_wait for any grpc_fd structs that are added to the epoll set via
+ epoll_ctl and are returned (within a very short window) via epoll_wait().
+
+ To work-around this race, we establish a happens-before relation between
+ the code just-before epoll_ctl() and the code after epoll_wait() by using
+ this atomic */
+gpr_atm g_epoll_sync;
+#endif /* defined(GRPC_TSAN) */
+
+static const grpc_closure_scheduler_vtable workqueue_scheduler_vtable = {
+ workqueue_enqueue, workqueue_enqueue, "workqueue"};
+
+static void pi_add_ref(polling_island *pi);
+static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi);
+
+#ifdef GRPC_WORKQUEUE_REFCOUNT_DEBUG
+static void pi_add_ref_dbg(polling_island *pi, const char *reason,
+ const char *file, int line) {
+ long old_cnt = gpr_atm_acq_load(&pi->ref_count);
+ pi_add_ref(pi);
+ gpr_log(GPR_DEBUG, "Add ref pi: %p, old: %ld -> new:%ld (%s) - (%s, %d)",
+ (void *)pi, old_cnt, old_cnt + 1, reason, file, line);
+}
+
+static void pi_unref_dbg(grpc_exec_ctx *exec_ctx, polling_island *pi,
+ const char *reason, const char *file, int line) {
+ long old_cnt = gpr_atm_acq_load(&pi->ref_count);
+ pi_unref(exec_ctx, pi);
+ gpr_log(GPR_DEBUG, "Unref pi: %p, old:%ld -> new:%ld (%s) - (%s, %d)",
+ (void *)pi, old_cnt, (old_cnt - 1), reason, file, line);
+}
+
+static grpc_workqueue *workqueue_ref(grpc_workqueue *workqueue,
+ const char *file, int line,
+ const char *reason) {
+ if (workqueue != NULL) {
+ pi_add_ref_dbg((polling_island *)workqueue, reason, file, line);
+ }
+ return workqueue;
+}
+
+static void workqueue_unref(grpc_exec_ctx *exec_ctx, grpc_workqueue *workqueue,
+ const char *file, int line, const char *reason) {
+ if (workqueue != NULL) {
+ pi_unref_dbg(exec_ctx, (polling_island *)workqueue, reason, file, line);
+ }
+}
+#else
+static grpc_workqueue *workqueue_ref(grpc_workqueue *workqueue) {
+ if (workqueue != NULL) {
+ pi_add_ref((polling_island *)workqueue);
+ }
+ return workqueue;
+}
+
+static void workqueue_unref(grpc_exec_ctx *exec_ctx,
+ grpc_workqueue *workqueue) {
+ if (workqueue != NULL) {
+ pi_unref(exec_ctx, (polling_island *)workqueue);
+ }
+}
+#endif
+
+static void pi_add_ref(polling_island *pi) {
+ gpr_atm_no_barrier_fetch_add(&pi->ref_count, 1);
+}
+
+static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi) {
+ /* If ref count went to zero, delete the polling island.
+ Note that this deletion not be done under a lock. Once the ref count goes
+ to zero, we are guaranteed that no one else holds a reference to the
+ polling island (and that there is no racing pi_add_ref() call either).
+
+ Also, if we are deleting the polling island and the merged_to field is
+ non-empty, we should remove a ref to the merged_to polling island
+ */
+ if (1 == gpr_atm_full_fetch_add(&pi->ref_count, -1)) {
+ polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
+ polling_island_delete(exec_ctx, pi);
+ if (next != NULL) {
+ PI_UNREF(exec_ctx, next, "pi_delete"); /* Recursive call */
+ }
+ }
+}
+
+static void worker_node_init(worker_node *node) {
+ node->next = node->prev = node;
+}
+
+/* Not thread safe. Do under a list-level lock */
+static void push_back_worker_node(worker_node *head, worker_node *node) {
+ node->next = head;
+ node->prev = head->prev;
+ head->prev->next = node;
+ head->prev = node;
+}
+
+/* Not thread safe. Do under a list-level lock */
+static void remove_worker_node(worker_node *node) {
+ node->next->prev = node->prev;
+ node->prev->next = node->next;
+ /* If node's next and prev point to itself, the node is considered detached
+ * from the list*/
+ node->next = node->prev = node;
+}
+
+/* Not thread safe. Do under a list-level lock */
+static worker_node *pop_front_worker_node(worker_node *head) {
+ worker_node *node = head->next;
+ if (node != head) {
+ remove_worker_node(node);
+ } else {
+ node = NULL;
+ }
+
+ return node;
+}
+
+/* Returns true if the node's next and prev are pointing to itself (which
+ indicates that the node is not in the list */
+static bool is_worker_node_detached(worker_node *node) {
+ return (node->next == node->prev && node->next == node);
+}
+
+/* The caller is expected to hold pi->mu lock before calling this function
+ */
+static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds,
+ size_t fd_count, bool add_fd_refs,
+ grpc_error **error) {
+ int err;
+ size_t i;
+ struct epoll_event ev;
+ char *err_msg;
+ const char *err_desc = "polling_island_add_fds";
+
+#ifdef GRPC_TSAN
+ /* See the definition of g_epoll_sync for more context */
+ gpr_atm_rel_store(&g_epoll_sync, (gpr_atm)0);
+#endif /* defined(GRPC_TSAN) */
+
+ for (i = 0; i < fd_count; i++) {
+ ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET);
+ ev.data.ptr = fds[i];
+ err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, fds[i]->fd, &ev);
+
+ if (err < 0) {
+ if (errno != EEXIST) {
+ gpr_asprintf(
+ &err_msg,
+ "epoll_ctl (epoll_fd: %d) add fd: %d failed with error: %d (%s)",
+ pi->epoll_fd, fds[i]->fd, errno, strerror(errno));
+ append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
+ gpr_free(err_msg);
+ }
+
+ continue;
+ }
+
+ if (pi->fd_cnt == pi->fd_capacity) {
+ pi->fd_capacity = GPR_MAX(pi->fd_capacity + 8, pi->fd_cnt * 3 / 2);
+ pi->fds = gpr_realloc(pi->fds, sizeof(grpc_fd *) * pi->fd_capacity);
+ }
+
+ pi->fds[pi->fd_cnt++] = fds[i];
+ if (add_fd_refs) {
+ GRPC_FD_REF(fds[i], "polling_island");
+ }
+ }
+}
+
+/* The caller is expected to hold pi->mu before calling this */
+static void polling_island_add_wakeup_fd_locked(polling_island *pi,
+ grpc_wakeup_fd *wakeup_fd,
+ grpc_error **error) {
+ struct epoll_event ev;
+ int err;
+ char *err_msg;
+ const char *err_desc = "polling_island_add_wakeup_fd";
+
+ ev.events = (uint32_t)(EPOLLIN | EPOLLET);
+ ev.data.ptr = wakeup_fd;
+ err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD,
+ GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), &ev);
+ if (err < 0 && errno != EEXIST) {
+ gpr_asprintf(&err_msg,
+ "epoll_ctl (epoll_fd: %d) add wakeup fd: %d failed with "
+ "error: %d (%s)",
+ pi->epoll_fd, GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), errno,
+ strerror(errno));
+ append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
+ gpr_free(err_msg);
+ }
+}
+
+/* The caller is expected to hold pi->mu lock before calling this function */
+static void polling_island_remove_all_fds_locked(polling_island *pi,
+ bool remove_fd_refs,
+ grpc_error **error) {
+ int err;
+ size_t i;
+ char *err_msg;
+ const char *err_desc = "polling_island_remove_fds";
+
+ for (i = 0; i < pi->fd_cnt; i++) {
+ err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, pi->fds[i]->fd, NULL);
+ if (err < 0 && errno != ENOENT) {
+ gpr_asprintf(&err_msg,
+ "epoll_ctl (epoll_fd: %d) delete fds[%zu]: %d failed with "
+ "error: %d (%s)",
+ pi->epoll_fd, i, pi->fds[i]->fd, errno, strerror(errno));
+ append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
+ gpr_free(err_msg);
+ }
+
+ if (remove_fd_refs) {
+ GRPC_FD_UNREF(pi->fds[i], "polling_island");
+ }
+ }
+
+ pi->fd_cnt = 0;
+}
+
+/* The caller is expected to hold pi->mu lock before calling this function */
+static void polling_island_remove_fd_locked(polling_island *pi, grpc_fd *fd,
+ bool is_fd_closed,
+ grpc_error **error) {
+ int err;
+ size_t i;
+ char *err_msg;
+ const char *err_desc = "polling_island_remove_fd";
+
+ /* If fd is already closed, then it would have been automatically been removed
+ from the epoll set */
+ if (!is_fd_closed) {
+ err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, fd->fd, NULL);
+ if (err < 0 && errno != ENOENT) {
+ gpr_asprintf(
+ &err_msg,
+ "epoll_ctl (epoll_fd: %d) del fd: %d failed with error: %d (%s)",
+ pi->epoll_fd, fd->fd, errno, strerror(errno));
+ append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
+ gpr_free(err_msg);
+ }
+ }
+
+ for (i = 0; i < pi->fd_cnt; i++) {
+ if (pi->fds[i] == fd) {
+ pi->fds[i] = pi->fds[--pi->fd_cnt];
+ GRPC_FD_UNREF(fd, "polling_island");
+ break;
+ }
+ }
+}
+
+/* Might return NULL in case of an error */
+static polling_island *polling_island_create(grpc_exec_ctx *exec_ctx,
+ grpc_fd *initial_fd,
+ grpc_error **error) {
+ polling_island *pi = NULL;
+ const char *err_desc = "polling_island_create";
+
+ *error = GRPC_ERROR_NONE;
+
+ pi = gpr_malloc(sizeof(*pi));
+ pi->workqueue_scheduler.vtable = &workqueue_scheduler_vtable;
+ gpr_mu_init(&pi->mu);
+ pi->fd_cnt = 0;
+ pi->fd_capacity = 0;
+ pi->fds = NULL;
+ pi->epoll_fd = -1;
+
+ gpr_mu_init(&pi->workqueue_read_mu);
+ gpr_mpscq_init(&pi->workqueue_items);
+ gpr_atm_rel_store(&pi->workqueue_item_count, 0);
+
+ gpr_atm_rel_store(&pi->ref_count, 0);
+ gpr_atm_rel_store(&pi->poller_count, 0);
+ gpr_atm_rel_store(&pi->merged_to, (gpr_atm)NULL);
+
+ gpr_mu_init(&pi->worker_list_mu);
+ worker_node_init(&pi->worker_list_head);
+
+ if (!append_error(error, grpc_wakeup_fd_init(&pi->workqueue_wakeup_fd),
+ err_desc)) {
+ goto done;
+ }
+
+ pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+
+ if (pi->epoll_fd < 0) {
+ append_error(error, GRPC_OS_ERROR(errno, "epoll_create1"), err_desc);
+ goto done;
+ }
+
+ polling_island_add_wakeup_fd_locked(pi, &pi->workqueue_wakeup_fd, error);
+
+ if (initial_fd != NULL) {
+ polling_island_add_fds_locked(pi, &initial_fd, 1, true, error);
+ }
+
+done:
+ if (*error != GRPC_ERROR_NONE) {
+ polling_island_delete(exec_ctx, pi);
+ pi = NULL;
+ }
+ return pi;
+}
+
+static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi) {
+ GPR_ASSERT(pi->fd_cnt == 0);
+
+ if (pi->epoll_fd >= 0) {
+ close(pi->epoll_fd);
+ }
+ GPR_ASSERT(gpr_atm_no_barrier_load(&pi->workqueue_item_count) == 0);
+ gpr_mu_destroy(&pi->workqueue_read_mu);
+ gpr_mpscq_destroy(&pi->workqueue_items);
+ gpr_mu_destroy(&pi->mu);
+ grpc_wakeup_fd_destroy(&pi->workqueue_wakeup_fd);
+ gpr_mu_destroy(&pi->worker_list_mu);
+ GPR_ASSERT(is_worker_node_detached(&pi->worker_list_head));
+
+ gpr_free(pi->fds);
+ gpr_free(pi);
+}
+
+/* Attempts to gets the last polling island in the linked list (liked by the
+ * 'merged_to' field). Since this does not lock the polling island, there are no
+ * guarantees that the island returned is the last island */
+static polling_island *polling_island_maybe_get_latest(polling_island *pi) {
+ polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
+ while (next != NULL) {
+ pi = next;
+ next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
+ }
+
+ return pi;
+}
+
+/* Gets the lock on the *latest* polling island i.e the last polling island in
+ the linked list (linked by the 'merged_to' field). Call gpr_mu_unlock on the
+ returned polling island's mu.
+ Usage: To lock/unlock polling island "pi", do the following:
+ polling_island *pi_latest = polling_island_lock(pi);
+ ...
+ ... critical section ..
+ ...
+ gpr_mu_unlock(&pi_latest->mu); // NOTE: use pi_latest->mu. NOT pi->mu */
+static polling_island *polling_island_lock(polling_island *pi) {
+ polling_island *next = NULL;
+
+ while (true) {
+ next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
+ if (next == NULL) {
+ /* Looks like 'pi' is the last node in the linked list but unless we check
+ this by holding the pi->mu lock, we cannot be sure (i.e without the
+ pi->mu lock, we don't prevent island merges).
+ To be absolutely sure, check once more by holding the pi->mu lock */
+ gpr_mu_lock(&pi->mu);
+ next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
+ if (next == NULL) {
+ /* pi is infact the last node and we have the pi->mu lock. we're done */
+ break;
+ }
+
+ /* pi->merged_to is not NULL i.e pi isn't the last node anymore. pi->mu
+ * isn't the lock we are interested in. Continue traversing the list */
+ gpr_mu_unlock(&pi->mu);
+ }
+
+ pi = next;
+ }
+
+ return pi;
+}
+
+/* Gets the lock on the *latest* polling islands in the linked lists pointed by
+ *p and *q (and also updates *p and *q to point to the latest polling islands)
+
+ This function is needed because calling the following block of code to obtain
+ locks on polling islands (*p and *q) is prone to deadlocks.
+ {
+ polling_island_lock(*p, true);
+ polling_island_lock(*q, true);
+ }
+
+ Usage/example:
+ polling_island *p1;
+ polling_island *p2;
+ ..
+ polling_island_lock_pair(&p1, &p2);
+ ..
+ .. Critical section with both p1 and p2 locked
+ ..
+ // Release locks: Always call polling_island_unlock_pair() to release locks
+ polling_island_unlock_pair(p1, p2);
+*/
+static void polling_island_lock_pair(polling_island **p, polling_island **q) {
+ polling_island *pi_1 = *p;
+ polling_island *pi_2 = *q;
+ polling_island *next_1 = NULL;
+ polling_island *next_2 = NULL;
+
+ /* The algorithm is simple:
+ - Go to the last polling islands in the linked lists *pi_1 and *pi_2 (and
+ keep updating pi_1 and pi_2)
+ - Then obtain locks on the islands by following a lock order rule of
+ locking polling_island with lower address first
+ Special case: Before obtaining the locks, check if pi_1 and pi_2 are
+ pointing to the same island. If that is the case, we can just call
+ polling_island_lock()
+ - After obtaining both the locks, double check that the polling islands
+ are still the last polling islands in their respective linked lists
+ (this is because there might have been polling island merges before
+ we got the lock)
+ - If the polling islands are the last islands, we are done. If not,
+ release the locks and continue the process from the first step */
+ while (true) {
+ next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
+ while (next_1 != NULL) {
+ pi_1 = next_1;
+ next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
+ }
+
+ next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
+ while (next_2 != NULL) {
+ pi_2 = next_2;
+ next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
+ }
+
+ if (pi_1 == pi_2) {
+ pi_1 = pi_2 = polling_island_lock(pi_1);
+ break;
+ }
+
+ if (pi_1 < pi_2) {
+ gpr_mu_lock(&pi_1->mu);
+ gpr_mu_lock(&pi_2->mu);
+ } else {
+ gpr_mu_lock(&pi_2->mu);
+ gpr_mu_lock(&pi_1->mu);
+ }
+
+ next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
+ next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
+ if (next_1 == NULL && next_2 == NULL) {
+ break;
+ }
+
+ gpr_mu_unlock(&pi_1->mu);
+ gpr_mu_unlock(&pi_2->mu);
+ }
+
+ *p = pi_1;
+ *q = pi_2;
+}
+
+static void polling_island_unlock_pair(polling_island *p, polling_island *q) {
+ if (p == q) {
+ gpr_mu_unlock(&p->mu);
+ } else {
+ gpr_mu_unlock(&p->mu);
+ gpr_mu_unlock(&q->mu);
+ }
+}
+
+static void workqueue_maybe_wakeup(polling_island *pi) {
+ /* If this thread is the current poller, then it may be that it's about to
+ decrement the current poller count, so we need to look past this thread */
+ bool is_current_poller = (g_current_thread_polling_island == pi);
+ gpr_atm min_current_pollers_for_wakeup = is_current_poller ? 1 : 0;
+ gpr_atm current_pollers = gpr_atm_no_barrier_load(&pi->poller_count);
+ /* Only issue a wakeup if it's likely that some poller could come in and take
+ it right now. Note that since we do an anticipatory mpscq_pop every poll
+ loop, it's ok if we miss the wakeup here, as we'll get the work item when
+ the next poller enters anyway. */
+ if (current_pollers > min_current_pollers_for_wakeup) {
+ GRPC_LOG_IF_ERROR("workqueue_wakeup_fd",
+ grpc_wakeup_fd_wakeup(&pi->workqueue_wakeup_fd));
+ }
+}
+
+static void workqueue_move_items_to_parent(polling_island *q) {
+ polling_island *p = (polling_island *)gpr_atm_no_barrier_load(&q->merged_to);
+ if (p == NULL) {
+ return;
+ }
+ gpr_mu_lock(&q->workqueue_read_mu);
+ int num_added = 0;
+ while (gpr_atm_no_barrier_load(&q->workqueue_item_count) > 0) {
+ gpr_mpscq_node *n = gpr_mpscq_pop(&q->workqueue_items);
+ if (n != NULL) {
+ gpr_atm_no_barrier_fetch_add(&q->workqueue_item_count, -1);
+ gpr_atm_no_barrier_fetch_add(&p->workqueue_item_count, 1);
+ gpr_mpscq_push(&p->workqueue_items, n);
+ num_added++;
+ }
+ }
+ gpr_mu_unlock(&q->workqueue_read_mu);
+ if (num_added > 0) {
+ workqueue_maybe_wakeup(p);
+ }
+ workqueue_move_items_to_parent(p);
+}
+
+static polling_island *polling_island_merge(polling_island *p,
+ polling_island *q,
+ grpc_error **error) {
+ /* Get locks on both the polling islands */
+ polling_island_lock_pair(&p, &q);
+
+ if (p != q) {
+ /* Make sure that p points to the polling island with fewer fds than q */
+ if (p->fd_cnt > q->fd_cnt) {
+ GPR_SWAP(polling_island *, p, q);
+ }
+
+ /* Merge p with q i.e move all the fds from p (The one with fewer fds) to q
+ Note that the refcounts on the fds being moved will not change here.
+ This is why the last param in the following two functions is 'false') */
+ polling_island_add_fds_locked(q, p->fds, p->fd_cnt, false, error);
+ polling_island_remove_all_fds_locked(p, false, error);
+
+ /* Wakeup all the pollers (if any) on p so that they pickup this change */
+ polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd, error);
+
+ /* Add the 'merged_to' link from p --> q */
+ gpr_atm_rel_store(&p->merged_to, (gpr_atm)q);
+ PI_ADD_REF(q, "pi_merge"); /* To account for the new incoming ref from p */
+
+ workqueue_move_items_to_parent(p);
+ }
+ /* else if p == q, nothing needs to be done */
+
+ polling_island_unlock_pair(p, q);
+
+ /* Return the merged polling island (Note that no merge would have happened
+ if p == q which is ok) */
+ return q;
+}
+
+static void workqueue_enqueue(grpc_exec_ctx *exec_ctx, grpc_closure *closure,
+ grpc_error *error) {
+ GPR_TIMER_BEGIN("workqueue.enqueue", 0);
+ grpc_workqueue *workqueue = (grpc_workqueue *)closure->scheduler;
+ /* take a ref to the workqueue: otherwise it can happen that whatever events
+ * this kicks off ends up destroying the workqueue before this function
+ * completes */
+ GRPC_WORKQUEUE_REF(workqueue, "enqueue");
+ polling_island *pi = (polling_island *)workqueue;
+ gpr_atm last = gpr_atm_no_barrier_fetch_add(&pi->workqueue_item_count, 1);
+ closure->error_data.error = error;
+ gpr_mpscq_push(&pi->workqueue_items, &closure->next_data.atm_next);
+ if (last == 0) {
+ workqueue_maybe_wakeup(pi);
+ }
+ workqueue_move_items_to_parent(pi);
+ GRPC_WORKQUEUE_UNREF(exec_ctx, workqueue, "enqueue");
+ GPR_TIMER_END("workqueue.enqueue", 0);
+}
+
+static grpc_closure_scheduler *workqueue_scheduler(grpc_workqueue *workqueue) {
+ polling_island *pi = (polling_island *)workqueue;
+ return workqueue == NULL ? grpc_schedule_on_exec_ctx
+ : &pi->workqueue_scheduler;
+}
+
+static grpc_error *polling_island_global_init() {
+ grpc_error *error = GRPC_ERROR_NONE;
+
+ error = grpc_wakeup_fd_init(&polling_island_wakeup_fd);
+ if (error == GRPC_ERROR_NONE) {
+ error = grpc_wakeup_fd_wakeup(&polling_island_wakeup_fd);
+ }
+
+ return error;
+}
+
+static void polling_island_global_shutdown() {
+ grpc_wakeup_fd_destroy(&polling_island_wakeup_fd);
+}
+
+/*******************************************************************************
+ * Fd Definitions
+ */
+
+/* We need to keep a freelist not because of any concerns of malloc performance
+ * but instead so that implementations with multiple threads in (for example)
+ * epoll_wait deal with the race between pollset removal and incoming poll
+ * notifications.
+ *
+ * The problem is that the poller ultimately holds a reference to this
+ * object, so it is very difficult to know when is safe to free it, at least
+ * without some expensive synchronization.
+ *
+ * If we keep the object freelisted, in the worst case losing this race just
+ * becomes a spurious read notification on a reused fd.
+ */
+
+/* The alarm system needs to be able to wakeup 'some poller' sometimes
+ * (specifically when a new alarm needs to be triggered earlier than the next
+ * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a
+ * case occurs. */
+
+static grpc_fd *fd_freelist = NULL;
+static gpr_mu fd_freelist_mu;
+
+#ifdef GRPC_FD_REF_COUNT_DEBUG
+#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__)
+#define UNREF_BY(fd, n, reason) unref_by(fd, n, reason, __FILE__, __LINE__)
+static void ref_by(grpc_fd *fd, int n, const char *reason, const char *file,
+ int line) {
+ gpr_log(GPR_DEBUG, "FD %d %p ref %d %ld -> %ld [%s; %s:%d]", fd->fd,
+ (void *)fd, n, gpr_atm_no_barrier_load(&fd->refst),
+ gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line);
+#else
+#define REF_BY(fd, n, reason) ref_by(fd, n)
+#define UNREF_BY(fd, n, reason) unref_by(fd, n)
+static void ref_by(grpc_fd *fd, int n) {
+#endif
+ GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0);
+}
+
+#ifdef GRPC_FD_REF_COUNT_DEBUG
+static void unref_by(grpc_fd *fd, int n, const char *reason, const char *file,
+ int line) {
+ gpr_atm old;
+ gpr_log(GPR_DEBUG, "FD %d %p unref %d %ld -> %ld [%s; %s:%d]", fd->fd,
+ (void *)fd, n, gpr_atm_no_barrier_load(&fd->refst),
+ gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line);
+#else
+static void unref_by(grpc_fd *fd, int n) {
+ gpr_atm old;
+#endif
+ old = gpr_atm_full_fetch_add(&fd->refst, -n);
+ if (old == n) {
+ /* Add the fd to the freelist */
+ gpr_mu_lock(&fd_freelist_mu);
+ fd->freelist_next = fd_freelist;
+ fd_freelist = fd;
+ grpc_iomgr_unregister_object(&fd->iomgr_object);
+
+ grpc_lfev_destroy(&fd->read_closure);
+ grpc_lfev_destroy(&fd->write_closure);
+
+ gpr_mu_unlock(&fd_freelist_mu);
+ } else {
+ GPR_ASSERT(old > n);
+ }
+}
+
+/* Increment refcount by two to avoid changing the orphan bit */
+#ifdef GRPC_FD_REF_COUNT_DEBUG
+static void fd_ref(grpc_fd *fd, const char *reason, const char *file,
+ int line) {
+ ref_by(fd, 2, reason, file, line);
+}
+
+static void fd_unref(grpc_fd *fd, const char *reason, const char *file,
+ int line) {
+ unref_by(fd, 2, reason, file, line);
+}
+#else
+static void fd_ref(grpc_fd *fd) { ref_by(fd, 2); }
+static void fd_unref(grpc_fd *fd) { unref_by(fd, 2); }
+#endif
+
+static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); }
+
+static void fd_global_shutdown(void) {
+ gpr_mu_lock(&fd_freelist_mu);
+ gpr_mu_unlock(&fd_freelist_mu);
+ while (fd_freelist != NULL) {
+ grpc_fd *fd = fd_freelist;
+ fd_freelist = fd_freelist->freelist_next;
+ gpr_mu_destroy(&fd->po.mu);
+ gpr_free(fd);
+ }
+ gpr_mu_destroy(&fd_freelist_mu);
+}
+
+static grpc_fd *fd_create(int fd, const char *name) {
+ grpc_fd *new_fd = NULL;
+
+ gpr_mu_lock(&fd_freelist_mu);
+ if (fd_freelist != NULL) {
+ new_fd = fd_freelist;
+ fd_freelist = fd_freelist->freelist_next;
+ }
+ gpr_mu_unlock(&fd_freelist_mu);
+
+ if (new_fd == NULL) {
+ new_fd = gpr_malloc(sizeof(grpc_fd));
+ gpr_mu_init(&new_fd->po.mu);
+ }
+
+ /* Note: It is not really needed to get the new_fd->po.mu lock here. If this
+ * is a newly created fd (or an fd we got from the freelist), no one else
+ * would be holding a lock to it anyway. */
+ gpr_mu_lock(&new_fd->po.mu);
+ new_fd->po.pi = NULL;
+#ifdef PO_DEBUG
+ new_fd->po.obj_type = POLL_OBJ_FD;
+#endif
+
+ gpr_atm_rel_store(&new_fd->refst, (gpr_atm)1);
+ new_fd->fd = fd;
+ new_fd->orphaned = false;
+ grpc_lfev_init(&new_fd->read_closure);
+ grpc_lfev_init(&new_fd->write_closure);
+ gpr_atm_no_barrier_store(&new_fd->read_notifier_pollset, (gpr_atm)NULL);
+
+ new_fd->freelist_next = NULL;
+ new_fd->on_done_closure = NULL;
+
+ gpr_mu_unlock(&new_fd->po.mu);
+
+ char *fd_name;
+ gpr_asprintf(&fd_name, "%s fd=%d", name, fd);
+ grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name);
+#ifdef GRPC_FD_REF_COUNT_DEBUG
+ gpr_log(GPR_DEBUG, "FD %d %p create %s", fd, (void *)new_fd, fd_name);
+#endif
+ gpr_free(fd_name);
+ return new_fd;
+}
+
+static int fd_wrapped_fd(grpc_fd *fd) {
+ int ret_fd = -1;
+ gpr_mu_lock(&fd->po.mu);
+ if (!fd->orphaned) {
+ ret_fd = fd->fd;
+ }
+ gpr_mu_unlock(&fd->po.mu);
+
+ return ret_fd;
+}
+
+static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
+ grpc_closure *on_done, int *release_fd,
+ const char *reason) {
+ bool is_fd_closed = false;
+ grpc_error *error = GRPC_ERROR_NONE;
+ polling_island *unref_pi = NULL;
+
+ gpr_mu_lock(&fd->po.mu);
+ fd->on_done_closure = on_done;
+
+ /* If release_fd is not NULL, we should be relinquishing control of the file
+ descriptor fd->fd (but we still own the grpc_fd structure). */
+ if (release_fd != NULL) {
+ *release_fd = fd->fd;
+ } else {
+ close(fd->fd);
+ is_fd_closed = true;
+ }
+
+ fd->orphaned = true;
+
+ /* Remove the active status but keep referenced. We want this grpc_fd struct
+ to be alive (and not added to freelist) until the end of this function */
+ REF_BY(fd, 1, reason);
+
+ /* Remove the fd from the polling island:
+ - Get a lock on the latest polling island (i.e the last island in the
+ linked list pointed by fd->po.pi). This is the island that
+ would actually contain the fd
+ - Remove the fd from the latest polling island
+ - Unlock the latest polling island
+ - Set fd->po.pi to NULL (but remove the ref on the polling island
+ before doing this.) */
+ if (fd->po.pi != NULL) {
+ polling_island *pi_latest = polling_island_lock(fd->po.pi);
+ polling_island_remove_fd_locked(pi_latest, fd, is_fd_closed, &error);
+ gpr_mu_unlock(&pi_latest->mu);
+
+ unref_pi = fd->po.pi;
+ fd->po.pi = NULL;
+ }
+
+ grpc_closure_sched(exec_ctx, fd->on_done_closure, GRPC_ERROR_REF(error));
+
+ gpr_mu_unlock(&fd->po.mu);
+ UNREF_BY(fd, 2, reason); /* Drop the reference */
+ if (unref_pi != NULL) {
+ /* Unref stale polling island here, outside the fd lock above.
+ The polling island owns a workqueue which owns an fd, and unreffing
+ inside the lock can cause an eventual lock loop that makes TSAN very
+ unhappy. */
+ PI_UNREF(exec_ctx, unref_pi, "fd_orphan");
+ }
+ GRPC_LOG_IF_ERROR("fd_orphan", GRPC_ERROR_REF(error));
+ GRPC_ERROR_UNREF(error);
+}
+
+static grpc_pollset *fd_get_read_notifier_pollset(grpc_exec_ctx *exec_ctx,
+ grpc_fd *fd) {
+ gpr_atm notifier = gpr_atm_acq_load(&fd->read_notifier_pollset);
+ return (grpc_pollset *)notifier;
+}
+
+static bool fd_is_shutdown(grpc_fd *fd) {
+ return grpc_lfev_is_shutdown(&fd->read_closure);
+}
+
+/* Might be called multiple times */
+static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_error *why) {
+ if (grpc_lfev_set_shutdown(exec_ctx, &fd->read_closure,
+ GRPC_ERROR_REF(why))) {
+ shutdown(fd->fd, SHUT_RDWR);
+ grpc_lfev_set_shutdown(exec_ctx, &fd->write_closure, GRPC_ERROR_REF(why));
+ }
+ GRPC_ERROR_UNREF(why);
+}
+
+static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
+ grpc_closure *closure) {
+ grpc_lfev_notify_on(exec_ctx, &fd->read_closure, closure);
+}
+
+static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
+ grpc_closure *closure) {
+ grpc_lfev_notify_on(exec_ctx, &fd->write_closure, closure);
+}
+
+static grpc_workqueue *fd_get_workqueue(grpc_fd *fd) {
+ gpr_mu_lock(&fd->po.mu);
+ grpc_workqueue *workqueue =
+ GRPC_WORKQUEUE_REF((grpc_workqueue *)fd->po.pi, "fd_get_workqueue");
+ gpr_mu_unlock(&fd->po.mu);
+ return workqueue;
+}
+
+/*******************************************************************************
+ * Pollset Definitions
+ */
+GPR_TLS_DECL(g_current_thread_pollset);
+GPR_TLS_DECL(g_current_thread_worker);
+static __thread bool g_initialized_sigmask;
+static __thread sigset_t g_orig_sigmask;
+static __thread sigset_t g_wakeup_sig_set;
+
+static void sig_handler(int sig_num) {
+#ifdef GRPC_EPOLL_DEBUG
+ gpr_log(GPR_INFO, "Received signal %d", sig_num);
+#endif
+}
+
+static void pollset_worker_init(grpc_pollset_worker *worker) {
+ worker->pt_id = pthread_self();
+ worker->next = worker->prev = NULL;
+ gpr_atm_no_barrier_store(&worker->is_kicked, (gpr_atm)0);
+ gpr_atm_no_barrier_store(&worker->is_polling_turn, (gpr_atm)0);
+ worker_node_init(&worker->pi_list_link);
+}
+
+static void poller_kick_init() { signal(grpc_wakeup_signal, sig_handler); }
+
+/* Global state management */
+static grpc_error *pollset_global_init(void) {
+ gpr_tls_init(&g_current_thread_pollset);
+ gpr_tls_init(&g_current_thread_worker);
+ poller_kick_init();
+ return GRPC_ERROR_NONE;
+}
+
+static void pollset_global_shutdown(void) {
+ gpr_tls_destroy(&g_current_thread_pollset);
+ gpr_tls_destroy(&g_current_thread_worker);
+}
+
+static grpc_error *worker_kick(grpc_pollset_worker *worker,
+ gpr_atm *is_kicked) {
+ grpc_error *err = GRPC_ERROR_NONE;
+
+ /* Kick the worker only if it was not already kicked */
+ if (gpr_atm_no_barrier_cas(is_kicked, (gpr_atm)0, (gpr_atm)1)) {
+ GRPC_POLLING_TRACE(
+ "pollset_worker_kick: Kicking worker: %p (thread id: %ld)",
+ (void *)worker, (long int)worker->pt_id);
+ int err_num = pthread_kill(worker->pt_id, grpc_wakeup_signal);
+ if (err_num != 0) {
+ err = GRPC_OS_ERROR(err_num, "pthread_kill");
+ }
+ }
+ return err;
+}
+
+static grpc_error *pollset_worker_kick(grpc_pollset_worker *worker) {
+ return worker_kick(worker, &worker->is_kicked);
+}
+
+static grpc_error *poller_kick(grpc_pollset_worker *worker) {
+ return worker_kick(worker, &worker->is_polling_turn);
+}
+
+/* Return 1 if the pollset has active threads in pollset_work (pollset must
+ * be locked) */
+static int pollset_has_workers(grpc_pollset *p) {
+ return p->root_worker.next != &p->root_worker;
+}
+
+static void remove_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
+ worker->prev->next = worker->next;
+ worker->next->prev = worker->prev;
+}
+
+static grpc_pollset_worker *pop_front_worker(grpc_pollset *p) {
+ if (pollset_has_workers(p)) {
+ grpc_pollset_worker *w = p->root_worker.next;
+ remove_worker(p, w);
+ return w;
+ } else {
+ return NULL;
+ }
+}
+
+static void push_back_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
+ worker->next = &p->root_worker;
+ worker->prev = worker->next->prev;
+ worker->prev->next = worker->next->prev = worker;
+}
+
+static void push_front_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
+ worker->prev = &p->root_worker;
+ worker->next = worker->prev->next;
+ worker->prev->next = worker->next->prev = worker;
+}
+
+/* p->mu must be held before calling this function */
+static grpc_error *pollset_kick(grpc_pollset *p,
+ grpc_pollset_worker *specific_worker) {
+ GPR_TIMER_BEGIN("pollset_kick", 0);
+ grpc_error *error = GRPC_ERROR_NONE;
+ const char *err_desc = "Kick Failure";
+ grpc_pollset_worker *worker = specific_worker;
+ if (worker != NULL) {
+ if (worker == GRPC_POLLSET_KICK_BROADCAST) {
+ if (pollset_has_workers(p)) {
+ GPR_TIMER_BEGIN("pollset_kick.broadcast", 0);
+ for (worker = p->root_worker.next; worker != &p->root_worker;
+ worker = worker->next) {
+ if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
+ append_error(&error, pollset_worker_kick(worker), err_desc);
+ }
+ }
+ GPR_TIMER_END("pollset_kick.broadcast", 0);
+ } else {
+ p->kicked_without_pollers = true;
+ }
+ } else {
+ GPR_TIMER_MARK("kicked_specifically", 0);
+ if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
+ append_error(&error, pollset_worker_kick(worker), err_desc);
+ }
+ }
+ } else if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)p) {
+ /* Since worker == NULL, it means that we can kick "any" worker on this
+ pollset 'p'. If 'p' happens to be the same pollset this thread is
+ currently polling (i.e in pollset_work() function), then there is no need
+ to kick any other worker since the current thread can just absorb the
+ kick. This is the reason why we enter this case only when
+ g_current_thread_pollset is != p */
+
+ GPR_TIMER_MARK("kick_anonymous", 0);
+ worker = pop_front_worker(p);
+ if (worker != NULL) {
+ GPR_TIMER_MARK("finally_kick", 0);
+ push_back_worker(p, worker);
+ append_error(&error, pollset_worker_kick(worker), err_desc);
+ } else {
+ GPR_TIMER_MARK("kicked_no_pollers", 0);
+ p->kicked_without_pollers = true;
+ }
+ }
+
+ GPR_TIMER_END("pollset_kick", 0);
+ GRPC_LOG_IF_ERROR("pollset_kick", GRPC_ERROR_REF(error));
+ return error;
+}
+
+static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) {
+ gpr_mu_init(&pollset->po.mu);
+ *mu = &pollset->po.mu;
+ pollset->po.pi = NULL;
+#ifdef PO_DEBUG
+ pollset->po.obj_type = POLL_OBJ_POLLSET;
+#endif
+
+ pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker;
+ pollset->kicked_without_pollers = false;
+
+ pollset->shutting_down = false;
+ pollset->finish_shutdown_called = false;
+ pollset->shutdown_done = NULL;
+}
+
+/* Convert millis to timespec (clock-type is assumed to be GPR_TIMESPAN) */
+static struct timespec millis_to_timespec(int millis) {
+ struct timespec linux_ts;
+ gpr_timespec gpr_ts;
+
+ if (millis == -1) {
+ gpr_ts = gpr_inf_future(GPR_TIMESPAN);
+ } else {
+ gpr_ts = gpr_time_from_millis(millis, GPR_TIMESPAN);
+ }
+
+ linux_ts.tv_sec = (time_t)gpr_ts.tv_sec;
+ linux_ts.tv_nsec = gpr_ts.tv_nsec;
+ return linux_ts;
+}
+
+/* Convert a timespec to milliseconds:
+ - Very small or negative poll times are clamped to zero to do a non-blocking
+ poll (which becomes spin polling)
+ - Other small values are rounded up to one millisecond
+ - Longer than a millisecond polls are rounded up to the next nearest
+ millisecond to avoid spinning
+ - Infinite timeouts are converted to -1 */
+static int poll_deadline_to_millis_timeout(gpr_timespec deadline,
+ gpr_timespec now) {
+ gpr_timespec timeout;
+ static const int64_t max_spin_polling_us = 10;
+ if (gpr_time_cmp(deadline, gpr_inf_future(deadline.clock_type)) == 0) {
+ return -1;
+ }
+
+ if (gpr_time_cmp(deadline, gpr_time_add(now, gpr_time_from_micros(
+ max_spin_polling_us,
+ GPR_TIMESPAN))) <= 0) {
+ return 0;
+ }
+ timeout = gpr_time_sub(deadline, now);
+ int millis = gpr_time_to_millis(gpr_time_add(
+ timeout, gpr_time_from_nanos(GPR_NS_PER_MS - 1, GPR_TIMESPAN)));
+ return millis >= 1 ? millis : 1;
+}
+
+static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
+ grpc_pollset *notifier) {
+ grpc_lfev_set_ready(exec_ctx, &fd->read_closure);
+
+ /* Note, it is possible that fd_become_readable might be called twice with
+ different 'notifier's when an fd becomes readable and it is in two epoll
+ sets (This can happen briefly during polling island merges). In such cases
+ it does not really matter which notifer is set as the read_notifier_pollset
+ (They would both point to the same polling island anyway) */
+ /* Use release store to match with acquire load in fd_get_read_notifier */
+ gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier);
+}
+
+static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) {
+ grpc_lfev_set_ready(exec_ctx, &fd->write_closure);
+}
+
+static void pollset_release_polling_island(grpc_exec_ctx *exec_ctx,
+ grpc_pollset *ps, char *reason) {
+ if (ps->po.pi != NULL) {
+ PI_UNREF(exec_ctx, ps->po.pi, reason);
+ }
+ ps->po.pi = NULL;
+}
+
+static void finish_shutdown_locked(grpc_exec_ctx *exec_ctx,
+ grpc_pollset *pollset) {
+ /* The pollset cannot have any workers if we are at this stage */
+ GPR_ASSERT(!pollset_has_workers(pollset));
+
+ pollset->finish_shutdown_called = true;
+
+ /* Release the ref and set pollset->po.pi to NULL */
+ pollset_release_polling_island(exec_ctx, pollset, "ps_shutdown");
+ grpc_closure_sched(exec_ctx, pollset->shutdown_done, GRPC_ERROR_NONE);
+}
+
+/* pollset->po.mu lock must be held by the caller before calling this */
+static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
+ grpc_closure *closure) {
+ GPR_TIMER_BEGIN("pollset_shutdown", 0);
+ GPR_ASSERT(!pollset->shutting_down);
+ pollset->shutting_down = true;
+ pollset->shutdown_done = closure;
+ pollset_kick(pollset, GRPC_POLLSET_KICK_BROADCAST);
+
+ /* If the pollset has any workers, we cannot call finish_shutdown_locked()
+ because it would release the underlying polling island. In such a case, we
+ let the last worker call finish_shutdown_locked() from pollset_work() */
+ if (!pollset_has_workers(pollset)) {
+ GPR_ASSERT(!pollset->finish_shutdown_called);
+ GPR_TIMER_MARK("pollset_shutdown.finish_shutdown_locked", 0);
+ finish_shutdown_locked(exec_ctx, pollset);
+ }
+ GPR_TIMER_END("pollset_shutdown", 0);
+}
+
+/* pollset_shutdown is guaranteed to be called before pollset_destroy. So other
+ * than destroying the mutexes, there is nothing special that needs to be done
+ * here */
+static void pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) {
+ GPR_ASSERT(!pollset_has_workers(pollset));
+ gpr_mu_destroy(&pollset->po.mu);
+}
+
+static bool maybe_do_workqueue_work(grpc_exec_ctx *exec_ctx,
+ polling_island *pi) {
+ if (gpr_mu_trylock(&pi->workqueue_read_mu)) {
+ gpr_mpscq_node *n = gpr_mpscq_pop(&pi->workqueue_items);
+ gpr_mu_unlock(&pi->workqueue_read_mu);
+ if (n != NULL) {
+ if (gpr_atm_full_fetch_add(&pi->workqueue_item_count, -1) > 1) {
+ workqueue_maybe_wakeup(pi);
+ }
+ grpc_closure *c = (grpc_closure *)n;
+ grpc_error *error = c->error_data.error;
+#ifndef NDEBUG
+ c->scheduled = false;
+#endif
+ c->cb(exec_ctx, c->cb_arg, error);
+ GRPC_ERROR_UNREF(error);
+ return true;
+ } else if (gpr_atm_no_barrier_load(&pi->workqueue_item_count) > 0) {
+ /* n == NULL might mean there's work but it's not available to be popped
+ * yet - try to ensure another workqueue wakes up to check shortly if so
+ */
+ workqueue_maybe_wakeup(pi);
+ }
+ }
+ return false;
+}
+
+/* NOTE: This function may modify 'now' */
+static bool acquire_polling_lease(grpc_pollset_worker *worker,
+ polling_island *pi, gpr_timespec deadline,
+ gpr_timespec *now) {
+ bool is_lease_acquired = false;
+
+ gpr_mu_lock(&pi->worker_list_mu); // LOCK
+ long num_pollers = gpr_atm_no_barrier_load(&pi->poller_count);
+
+ if (num_pollers >= g_max_pollers_per_pi) {
+ push_back_worker_node(&pi->worker_list_head, &worker->pi_list_link);
+ gpr_mu_unlock(&pi->worker_list_mu); // UNLOCK
+
+ bool is_timeout = false;
+ int ret;
+ int timeout_ms = poll_deadline_to_millis_timeout(deadline, *now);
+ if (timeout_ms == -1) {
+ ret = sigwaitinfo(&g_wakeup_sig_set, NULL);
+ } else {
+ struct timespec sigwait_timeout = millis_to_timespec(timeout_ms);
+ GRPC_SCHEDULING_START_BLOCKING_REGION;
+ ret = sigtimedwait(&g_wakeup_sig_set, NULL, &sigwait_timeout);
+ GRPC_SCHEDULING_END_BLOCKING_REGION;
+ }
+
+ if (ret == -1) {
+ if (errno == EAGAIN) {
+ is_timeout = true;
+ } else {
+ /* NOTE: This should not happen. If we see these log messages, it means
+ we are most likely doing something incorrect in the setup * needed
+ for sigwaitinfo/sigtimedwait */
+ gpr_log(GPR_ERROR,
+ "sigtimedwait failed with retcode: %d (timeout_ms: %d)", errno,
+ timeout_ms);
+ }
+ }
+
+ /* Did the worker come out of sigtimedwait due to a thread that just
+ exited epoll and kicking it (in release_polling_lease function). */
+ bool is_polling_turn = gpr_atm_acq_load(&worker->is_polling_turn);
+
+ /* Did the worker come out of sigtimedwait due to a thread alerting it that
+ some completion event was (likely) available in the completion queue */
+ bool is_kicked = gpr_atm_no_barrier_load(&worker->is_kicked);
+
+ if (is_kicked || is_timeout) {
+ *now = deadline; /* Essentially make the epoll timeout = 0 */
+ } else if (is_polling_turn) {
+ *now = gpr_now(GPR_CLOCK_MONOTONIC); /* Reduce the epoll timeout */
+ }
+
+ gpr_mu_lock(&pi->worker_list_mu); // LOCK
+ /* The node might have already been removed from the list by the poller
+ that kicked this. However it is safe to call 'remove_worker_node' on
+ an already detached node */
+ remove_worker_node(&worker->pi_list_link);
+ /* It is important to read the num_pollers again under the lock so that we
+ * have the latest num_pollers value that doesn't change while we are doing
+ * the "(num_pollers < g_max_pollers_per_pi)" a a few lines below */
+ num_pollers = gpr_atm_no_barrier_load(&pi->poller_count);
+ }
+
+ if (num_pollers < g_max_pollers_per_pi) {
+ gpr_atm_no_barrier_fetch_add(&pi->poller_count, 1);
+ is_lease_acquired = true;
+ }
+
+ gpr_mu_unlock(&pi->worker_list_mu); // UNLOCK
+ return is_lease_acquired;
+}
+
+static void release_polling_lease(polling_island *pi, grpc_error **error) {
+ gpr_mu_lock(&pi->worker_list_mu);
+
+ gpr_atm_no_barrier_fetch_add(&pi->poller_count, -1);
+ worker_node *node = pop_front_worker_node(&pi->worker_list_head);
+ if (node != NULL) {
+ grpc_pollset_worker *next_worker = WORKER_FROM_WORKER_LIST_NODE(node);
+ append_error(error, poller_kick(next_worker), "poller kick error");
+ }
+
+ gpr_mu_unlock(&pi->worker_list_mu);
+}
+
+#define GRPC_EPOLL_MAX_EVENTS 100
+static void pollset_do_epoll_pwait(grpc_exec_ctx *exec_ctx, int epoll_fd,
+ grpc_pollset *pollset, polling_island *pi,
+ grpc_pollset_worker *worker,
+ gpr_timespec now, gpr_timespec deadline,
+ sigset_t *sig_mask, grpc_error **error) {
+ /* Only g_max_pollers_per_pi threads can be doing polling in parallel.
+ If we cannot get a lease, we cannot continue to do epoll_pwait() */
+ if (!acquire_polling_lease(worker, pi, deadline, &now)) {
+ return;
+ }
+
+ struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS];
+ int ep_rv;
+ char *err_msg;
+ const char *err_desc = "pollset_work_and_unlock";
+
+ /* timeout_ms is the time between 'now' and 'deadline' */
+ int timeout_ms = poll_deadline_to_millis_timeout(deadline, now);
+
+ GRPC_SCHEDULING_START_BLOCKING_REGION;
+ ep_rv =
+ epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms, sig_mask);
+ GRPC_SCHEDULING_END_BLOCKING_REGION;
+
+ /* Give back the lease right away so that some other thread can enter */
+ release_polling_lease(pi, error);
+
+ if (ep_rv < 0) {
+ if (errno != EINTR) {
+ gpr_asprintf(&err_msg,
+ "epoll_wait() epoll fd: %d failed with error: %d (%s)",
+ epoll_fd, errno, strerror(errno));
+ append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
+ } else {
+ /* We were interrupted. Save an interation by doing a zero timeout
+ epoll_wait to see if there are any other events of interest */
+ GRPC_POLLING_TRACE("pollset_work: pollset: %p, worker: %p received kick",
+ (void *)pollset, (void *)worker);
+ ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0);
+ }
+ }
+
+#ifdef GRPC_TSAN
+ /* See the definition of g_poll_sync for more details */
+ gpr_atm_acq_load(&g_epoll_sync);
+#endif /* defined(GRPC_TSAN) */
+
+ for (int i = 0; i < ep_rv; ++i) {
+ void *data_ptr = ep_ev[i].data.ptr;
+ if (data_ptr == &pi->workqueue_wakeup_fd) {
+ append_error(error,
+ grpc_wakeup_fd_consume_wakeup(&pi->workqueue_wakeup_fd),
+ err_desc);
+ maybe_do_workqueue_work(exec_ctx, pi);
+ } else if (data_ptr == &polling_island_wakeup_fd) {
+ GRPC_POLLING_TRACE(
+ "pollset_work: pollset: %p, worker: %p polling island (epoll_fd: "
+ "%d) got merged",
+ (void *)pollset, (void *)worker, epoll_fd);
+ /* This means that our polling island is merged with a different
+ island. We do not have to do anything here since the subsequent call
+ to the function pollset_work_and_unlock() will pick up the correct
+ epoll_fd */
+ } else {
+ grpc_fd *fd = data_ptr;
+ int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP);
+ int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI);
+ int write_ev = ep_ev[i].events & EPOLLOUT;
+ if (read_ev || cancel) {
+ fd_become_readable(exec_ctx, fd, pollset);
+ }
+ if (write_ev || cancel) {
+ fd_become_writable(exec_ctx, fd);
+ }
+ }
+ }
+}
+
+/* Note: sig_mask contains the signal mask to use *during* epoll_wait() */
+static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx,
+ grpc_pollset *pollset,
+ grpc_pollset_worker *worker,
+ gpr_timespec now, gpr_timespec deadline,
+ sigset_t *sig_mask, grpc_error **error) {
+ int epoll_fd = -1;
+ polling_island *pi = NULL;
+ GPR_TIMER_BEGIN("pollset_work_and_unlock", 0);
+
+ /* We need to get the epoll_fd to wait on. The epoll_fd is in inside the
+ latest polling island pointed by pollset->po.pi
+
+ Since epoll_fd is immutable, it is safe to read it without a lock on the
+ polling island. There is however a possibility that the polling island from
+ which we got the epoll_fd, got merged with another island in the meantime.
+ This is okay because in such a case, we will wakeup right-away from
+ epoll_pwait() (because any merge will poison the old polling island's epoll
+ set 'polling_island_wakeup_fd') and then pick up the latest polling_island
+ the next time this function - pollset_work_and_unlock()) is called */
+
+ if (pollset->po.pi == NULL) {
+ pollset->po.pi = polling_island_create(exec_ctx, NULL, error);
+ if (pollset->po.pi == NULL) {
+ GPR_TIMER_END("pollset_work_and_unlock", 0);
+ return; /* Fatal error. Cannot continue */
+ }
+
+ PI_ADD_REF(pollset->po.pi, "ps");
+ GRPC_POLLING_TRACE("pollset_work: pollset: %p created new pi: %p",
+ (void *)pollset, (void *)pollset->po.pi);
+ }
+
+ pi = polling_island_maybe_get_latest(pollset->po.pi);
+ epoll_fd = pi->epoll_fd;
+
+ /* Update the pollset->po.pi since the island being pointed by
+ pollset->po.pi maybe older than the one pointed by pi) */
+ if (pollset->po.pi != pi) {
+ /* Always do PI_ADD_REF before PI_UNREF because PI_UNREF may cause the
+ polling island to be deleted */
+ PI_ADD_REF(pi, "ps");
+ PI_UNREF(exec_ctx, pollset->po.pi, "ps");
+ pollset->po.pi = pi;
+ }
+
+ /* Add an extra ref so that the island does not get destroyed (which means
+ the epoll_fd won't be closed) while we are are doing an epoll_wait() on the
+ epoll_fd */
+ PI_ADD_REF(pi, "ps_work");
+ gpr_mu_unlock(&pollset->po.mu);
+
+ /* If we get some workqueue work to do, it might end up completing an item on
+ the completion queue, so there's no need to poll... so we skip that and
+ redo the complete loop to verify */
+ if (!maybe_do_workqueue_work(exec_ctx, pi)) {
+ g_current_thread_polling_island = pi;
+ pollset_do_epoll_pwait(exec_ctx, epoll_fd, pollset, pi, worker, now,
+ deadline, sig_mask, error);
+ g_current_thread_polling_island = NULL;
+ }
+
+ GPR_ASSERT(pi != NULL);
+
+ /* Before leaving, release the extra ref we added to the polling island. It
+ is important to use "pi" here (i.e our old copy of pollset->po.pi
+ that we got before releasing the polling island lock). This is because
+ pollset->po.pi pointer might get udpated in other parts of the
+ code when there is an island merge while we are doing epoll_wait() above */
+ PI_UNREF(exec_ctx, pi, "ps_work");
+
+ GPR_TIMER_END("pollset_work_and_unlock", 0);
+}
+
+/* pollset->po.mu lock must be held by the caller before calling this.
+ The function pollset_work() may temporarily release the lock (pollset->po.mu)
+ during the course of its execution but it will always re-acquire the lock and
+ ensure that it is held by the time the function returns */
+static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
+ grpc_pollset_worker **worker_hdl,
+ gpr_timespec now, gpr_timespec deadline) {
+ GPR_TIMER_BEGIN("pollset_work", 0);
+ grpc_error *error = GRPC_ERROR_NONE;
+
+ grpc_pollset_worker worker;
+ pollset_worker_init(&worker);
+
+ if (worker_hdl) *worker_hdl = &worker;
+
+ gpr_tls_set(&g_current_thread_pollset, (intptr_t)pollset);
+ gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker);
+
+ if (pollset->kicked_without_pollers) {
+ /* If the pollset was kicked without pollers, pretend that the current
+ worker got the kick and skip polling. A kick indicates that there is some
+ work that needs attention like an event on the completion queue or an
+ alarm */
+ GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0);
+ pollset->kicked_without_pollers = 0;
+ } else if (!pollset->shutting_down) {
+ /* We use the posix-signal with number 'grpc_wakeup_signal' for waking up
+ (i.e 'kicking') a worker in the pollset. A 'kick' is a way to inform the
+ worker that there is some pending work that needs immediate attention
+ (like an event on the completion queue, or a polling island merge that
+ results in a new epoll-fd to wait on) and that the worker should not
+ spend time waiting in epoll_pwait().
+
+ A worker can be kicked anytime from the point it is added to the pollset
+ via push_front_worker() (or push_back_worker()) to the point it is
+ removed via remove_worker().
+ If the worker is kicked before/during it calls epoll_pwait(), it should
+ immediately exit from epoll_wait(). If the worker is kicked after it
+ returns from epoll_wait(), then nothing really needs to be done.
+
+ To accomplish this, we mask 'grpc_wakeup_signal' on this thread at all
+ times *except* when it is in epoll_pwait(). This way, the worker never
+ misses acting on a kick */
+
+ if (!g_initialized_sigmask) {
+ sigemptyset(&g_wakeup_sig_set);
+ sigaddset(&g_wakeup_sig_set, grpc_wakeup_signal);
+ pthread_sigmask(SIG_BLOCK, &g_wakeup_sig_set, &g_orig_sigmask);
+ sigdelset(&g_orig_sigmask, grpc_wakeup_signal);
+ g_initialized_sigmask = true;
+ /* new_mask: The new thread mask which blocks 'grpc_wakeup_signal'.
+ This is the mask used at all times *except during
+ epoll_wait()*"
+ g_orig_sigmask: The thread mask which allows 'grpc_wakeup_signal' and
+ this is the mask to use *during epoll_wait()*
+
+ The new_mask is set on the worker before it is added to the pollset
+ (i.e before it can be kicked) */
+ }
+
+ push_front_worker(pollset, &worker); /* Add worker to pollset */
+
+ pollset_work_and_unlock(exec_ctx, pollset, &worker, now, deadline,
+ &g_orig_sigmask, &error);
+ grpc_exec_ctx_flush(exec_ctx);
+
+ gpr_mu_lock(&pollset->po.mu);
+
+ /* Note: There is no need to reset worker.is_kicked to 0 since we are no
+ longer going to use this worker */
+ remove_worker(pollset, &worker);
+ }
+
+ /* If we are the last worker on the pollset (i.e pollset_has_workers() is
+ false at this point) and the pollset is shutting down, we may have to
+ finish the shutdown process by calling finish_shutdown_locked().
+ See pollset_shutdown() for more details.
+
+ Note: Continuing to access pollset here is safe; it is the caller's
+ responsibility to not destroy a pollset when it has outstanding calls to
+ pollset_work() */
+ if (pollset->shutting_down && !pollset_has_workers(pollset) &&
+ !pollset->finish_shutdown_called) {
+ GPR_TIMER_MARK("pollset_work.finish_shutdown_locked", 0);
+ finish_shutdown_locked(exec_ctx, pollset);
+
+ gpr_mu_unlock(&pollset->po.mu);
+ grpc_exec_ctx_flush(exec_ctx);
+ gpr_mu_lock(&pollset->po.mu);
+ }
+
+ if (worker_hdl) *worker_hdl = NULL;
+
+ gpr_tls_set(&g_current_thread_pollset, (intptr_t)0);
+ gpr_tls_set(&g_current_thread_worker, (intptr_t)0);
+
+ GPR_TIMER_END("pollset_work", 0);
+
+ GRPC_LOG_IF_ERROR("pollset_work", GRPC_ERROR_REF(error));
+ return error;
+}
+
+static void add_poll_object(grpc_exec_ctx *exec_ctx, poll_obj *bag,
+ poll_obj_type bag_type, poll_obj *item,
+ poll_obj_type item_type) {
+ GPR_TIMER_BEGIN("add_poll_object", 0);
+
+#ifdef PO_DEBUG
+ GPR_ASSERT(item->obj_type == item_type);
+ GPR_ASSERT(bag->obj_type == bag_type);
+#endif
+
+ grpc_error *error = GRPC_ERROR_NONE;
+ polling_island *pi_new = NULL;
+
+ gpr_mu_lock(&bag->mu);
+ gpr_mu_lock(&item->mu);
+
+retry:
+ /*
+ * 1) If item->pi and bag->pi are both non-NULL and equal, do nothing
+ * 2) If item->pi and bag->pi are both NULL, create a new polling island (with
+ * a refcount of 2) and point item->pi and bag->pi to the new island
+ * 3) If exactly one of item->pi or bag->pi is NULL, update it to point to
+ * the other's non-NULL pi
+ * 4) Finally if item->pi and bag-pi are non-NULL and not-equal, merge the
+ * polling islands and update item->pi and bag->pi to point to the new
+ * island
+ */
+
+ /* Early out if we are trying to add an 'fd' to a 'bag' but the fd is already
+ * orphaned */
+ if (item_type == POLL_OBJ_FD && (FD_FROM_PO(item))->orphaned) {
+ gpr_mu_unlock(&item->mu);
+ gpr_mu_unlock(&bag->mu);
+ return;
+ }
+
+ if (item->pi == bag->pi) {
+ pi_new = item->pi;
+ if (pi_new == NULL) {
+ /* GPR_ASSERT(item->pi == bag->pi == NULL) */
+
+ /* If we are adding an fd to a bag (i.e pollset or pollset_set), then
+ * we need to do some extra work to make TSAN happy */
+ if (item_type == POLL_OBJ_FD) {
+ /* Unlock before creating a new polling island: the polling island will
+ create a workqueue which creates a file descriptor, and holding an fd
+ lock here can eventually cause a loop to appear to TSAN (making it
+ unhappy). We don't think it's a real loop (there's an epoch point
+ where that loop possibility disappears), but the advantages of
+ keeping TSAN happy outweigh any performance advantage we might have
+ by keeping the lock held. */
+ gpr_mu_unlock(&item->mu);
+ pi_new = polling_island_create(exec_ctx, FD_FROM_PO(item), &error);
+ gpr_mu_lock(&item->mu);
+
+ /* Need to reverify any assumptions made between the initial lock and
+ getting to this branch: if they've changed, we need to throw away our
+ work and figure things out again. */
+ if (item->pi != NULL) {
+ GRPC_POLLING_TRACE(
+ "add_poll_object: Raced creating new polling island. pi_new: %p "
+ "(fd: %d, %s: %p)",
+ (void *)pi_new, FD_FROM_PO(item)->fd, poll_obj_string(bag_type),
+ (void *)bag);
+ /* No need to lock 'pi_new' here since this is a new polling island
+ and no one has a reference to it yet */
+ polling_island_remove_all_fds_locked(pi_new, true, &error);
+
+ /* Ref and unref so that the polling island gets deleted during unref
+ */
+ PI_ADD_REF(pi_new, "dance_of_destruction");
+ PI_UNREF(exec_ctx, pi_new, "dance_of_destruction");
+ goto retry;
+ }
+ } else {
+ pi_new = polling_island_create(exec_ctx, NULL, &error);
+ }
+
+ GRPC_POLLING_TRACE(
+ "add_poll_object: Created new polling island. pi_new: %p (%s: %p, "
+ "%s: %p)",
+ (void *)pi_new, poll_obj_string(item_type), (void *)item,
+ poll_obj_string(bag_type), (void *)bag);
+ } else {
+ GRPC_POLLING_TRACE(
+ "add_poll_object: Same polling island. pi: %p (%s, %s)",
+ (void *)pi_new, poll_obj_string(item_type),
+ poll_obj_string(bag_type));
+ }
+ } else if (item->pi == NULL) {
+ /* GPR_ASSERT(bag->pi != NULL) */
+ /* Make pi_new point to latest pi*/
+ pi_new = polling_island_lock(bag->pi);
+
+ if (item_type == POLL_OBJ_FD) {
+ grpc_fd *fd = FD_FROM_PO(item);
+ polling_island_add_fds_locked(pi_new, &fd, 1, true, &error);
+ }
+
+ gpr_mu_unlock(&pi_new->mu);
+ GRPC_POLLING_TRACE(
+ "add_poll_obj: item->pi was NULL. pi_new: %p (item(%s): %p, "
+ "bag(%s): %p)",
+ (void *)pi_new, poll_obj_string(item_type), (void *)item,
+ poll_obj_string(bag_type), (void *)bag);
+ } else if (bag->pi == NULL) {
+ /* GPR_ASSERT(item->pi != NULL) */
+ /* Make pi_new to point to latest pi */
+ pi_new = polling_island_lock(item->pi);
+ gpr_mu_unlock(&pi_new->mu);
+ GRPC_POLLING_TRACE(
+ "add_poll_obj: bag->pi was NULL. pi_new: %p (item(%s): %p, "
+ "bag(%s): %p)",
+ (void *)pi_new, poll_obj_string(item_type), (void *)item,
+ poll_obj_string(bag_type), (void *)bag);
+ } else {
+ pi_new = polling_island_merge(item->pi, bag->pi, &error);
+ GRPC_POLLING_TRACE(
+ "add_poll_obj: polling islands merged. pi_new: %p (item(%s): %p, "
+ "bag(%s): %p)",
+ (void *)pi_new, poll_obj_string(item_type), (void *)item,
+ poll_obj_string(bag_type), (void *)bag);
+ }
+
+ /* At this point, pi_new is the polling island that both item->pi and bag->pi
+ MUST be pointing to */
+
+ if (item->pi != pi_new) {
+ PI_ADD_REF(pi_new, poll_obj_string(item_type));
+ if (item->pi != NULL) {
+ PI_UNREF(exec_ctx, item->pi, poll_obj_string(item_type));
+ }
+ item->pi = pi_new;
+ }
+
+ if (bag->pi != pi_new) {
+ PI_ADD_REF(pi_new, poll_obj_string(bag_type));
+ if (bag->pi != NULL) {
+ PI_UNREF(exec_ctx, bag->pi, poll_obj_string(bag_type));
+ }
+ bag->pi = pi_new;
+ }
+
+ gpr_mu_unlock(&item->mu);
+ gpr_mu_unlock(&bag->mu);
+
+ GRPC_LOG_IF_ERROR("add_poll_object", error);
+ GPR_TIMER_END("add_poll_object", 0);
+}
+
+static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
+ grpc_fd *fd) {
+ add_poll_object(exec_ctx, &pollset->po, POLL_OBJ_POLLSET, &fd->po,
+ POLL_OBJ_FD);
+}
+
+/*******************************************************************************
+ * Pollset-set Definitions
+ */
+
+static grpc_pollset_set *pollset_set_create(void) {
+ grpc_pollset_set *pss = gpr_malloc(sizeof(*pss));
+ gpr_mu_init(&pss->po.mu);
+ pss->po.pi = NULL;
+#ifdef PO_DEBUG
+ pss->po.obj_type = POLL_OBJ_POLLSET_SET;
+#endif
+ return pss;
+}
+
+static void pollset_set_destroy(grpc_exec_ctx *exec_ctx,
+ grpc_pollset_set *pss) {
+ gpr_mu_destroy(&pss->po.mu);
+
+ if (pss->po.pi != NULL) {
+ PI_UNREF(exec_ctx, pss->po.pi, "pss_destroy");
+ }
+
+ gpr_free(pss);
+}
+
+static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss,
+ grpc_fd *fd) {
+ add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &fd->po,
+ POLL_OBJ_FD);
+}
+
+static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss,
+ grpc_fd *fd) {
+ /* Nothing to do */
+}
+
+static void pollset_set_add_pollset(grpc_exec_ctx *exec_ctx,
+ grpc_pollset_set *pss, grpc_pollset *ps) {
+ add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &ps->po,
+ POLL_OBJ_POLLSET);
+}
+
+static void pollset_set_del_pollset(grpc_exec_ctx *exec_ctx,
+ grpc_pollset_set *pss, grpc_pollset *ps) {
+ /* Nothing to do */
+}
+
+static void pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx,
+ grpc_pollset_set *bag,
+ grpc_pollset_set *item) {
+ add_poll_object(exec_ctx, &bag->po, POLL_OBJ_POLLSET_SET, &item->po,
+ POLL_OBJ_POLLSET_SET);
+}
+
+static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx,
+ grpc_pollset_set *bag,
+ grpc_pollset_set *item) {
+ /* Nothing to do */
+}
+
+/*******************************************************************************
+ * Event engine binding
+ */
+
+static void shutdown_engine(void) {
+ fd_global_shutdown();
+ pollset_global_shutdown();
+ polling_island_global_shutdown();
+}
+
+static const grpc_event_engine_vtable vtable = {
+ .pollset_size = sizeof(grpc_pollset),
+
+ .fd_create = fd_create,
+ .fd_wrapped_fd = fd_wrapped_fd,
+ .fd_orphan = fd_orphan,
+ .fd_shutdown = fd_shutdown,
+ .fd_is_shutdown = fd_is_shutdown,
+ .fd_notify_on_read = fd_notify_on_read,
+ .fd_notify_on_write = fd_notify_on_write,
+ .fd_get_read_notifier_pollset = fd_get_read_notifier_pollset,
+ .fd_get_workqueue = fd_get_workqueue,
+
+ .pollset_init = pollset_init,
+ .pollset_shutdown = pollset_shutdown,
+ .pollset_destroy = pollset_destroy,
+ .pollset_work = pollset_work,
+ .pollset_kick = pollset_kick,
+ .pollset_add_fd = pollset_add_fd,
+
+ .pollset_set_create = pollset_set_create,
+ .pollset_set_destroy = pollset_set_destroy,
+ .pollset_set_add_pollset = pollset_set_add_pollset,
+ .pollset_set_del_pollset = pollset_set_del_pollset,
+ .pollset_set_add_pollset_set = pollset_set_add_pollset_set,
+ .pollset_set_del_pollset_set = pollset_set_del_pollset_set,
+ .pollset_set_add_fd = pollset_set_add_fd,
+ .pollset_set_del_fd = pollset_set_del_fd,
+
+ .workqueue_ref = workqueue_ref,
+ .workqueue_unref = workqueue_unref,
+ .workqueue_scheduler = workqueue_scheduler,
+
+ .shutdown_engine = shutdown_engine,
+};
+
+/* It is possible that GLIBC has epoll but the underlying kernel doesn't.
+ * Create a dummy epoll_fd to make sure epoll support is available */
+static bool is_epoll_available() {
+ int fd = epoll_create1(EPOLL_CLOEXEC);
+ if (fd < 0) {
+ gpr_log(
+ GPR_ERROR,
+ "epoll_create1 failed with error: %d. Not using epoll polling engine",
+ fd);
+ return false;
+ }
+ close(fd);
+ return true;
+}
+
+/* This is mainly for testing purposes. Checks to see if environment variable
+ * GRPC_MAX_POLLERS_PER_PI is set and if so, assigns that value to
+ * g_max_pollers_per_pi (any negative value is considered INT_MAX) */
+static void set_max_pollers_per_island() {
+ char *s = gpr_getenv("GRPC_MAX_POLLERS_PER_PI");
+ if (s) {
+ g_max_pollers_per_pi = (int)strtol(s, NULL, 10);
+ if (g_max_pollers_per_pi < 0) {
+ g_max_pollers_per_pi = INT_MAX;
+ }
+ } else {
+ g_max_pollers_per_pi = INT_MAX;
+ }
+
+ gpr_log(GPR_INFO, "Max number of pollers per polling island: %d",
+ g_max_pollers_per_pi);
+}
+
+const grpc_event_engine_vtable *grpc_init_epoll_limited_pollers_linux(
+ bool explicitly_requested) {
+ if (!explicitly_requested) {
+ return NULL;
+ }
+
+ /* If use of signals is disabled, we cannot use epoll engine*/
+ if (is_grpc_wakeup_signal_initialized && grpc_wakeup_signal < 0) {
+ return NULL;
+ }
+
+ if (!grpc_has_wakeup_fd()) {
+ return NULL;
+ }
+
+ if (!is_epoll_available()) {
+ return NULL;
+ }
+
+ if (!is_grpc_wakeup_signal_initialized) {
+ grpc_use_signal(SIGRTMIN + 6);
+ }
+
+ set_max_pollers_per_island();
+
+ fd_global_init();
+
+ if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) {
+ return NULL;
+ }
+
+ if (!GRPC_LOG_IF_ERROR("polling_island_global_init",
+ polling_island_global_init())) {
+ return NULL;
+ }
+
+ return &vtable;
+}
+
+#else /* defined(GRPC_LINUX_EPOLL) */
+#if defined(GRPC_POSIX_SOCKET)
+#include "src/core/lib/iomgr/ev_posix.h"
+/* If GRPC_LINUX_EPOLL is not defined, it means epoll is not available. Return
+ * NULL */
+const grpc_event_engine_vtable *grpc_init_epoll_limited_pollers_linux(
+ bool explicitly_requested) {
+ return NULL;
+}
+#endif /* defined(GRPC_POSIX_SOCKET) */
+#endif /* !defined(GRPC_LINUX_EPOLL) */
diff --git a/src/core/lib/iomgr/ev_epoll_limited_pollers_linux.h b/src/core/lib/iomgr/ev_epoll_limited_pollers_linux.h
new file mode 100644
index 0000000000..379e1ded3b
--- /dev/null
+++ b/src/core/lib/iomgr/ev_epoll_limited_pollers_linux.h
@@ -0,0 +1,43 @@
+/*
+ *
+ * Copyright 2015, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef GRPC_CORE_LIB_IOMGR_EV_EPOLL_LIMITED_POLLERS_LINUX_H
+#define GRPC_CORE_LIB_IOMGR_EV_EPOLL_LIMITED_POLLERS_LINUX_H
+
+#include "src/core/lib/iomgr/ev_posix.h"
+#include "src/core/lib/iomgr/port.h"
+
+const grpc_event_engine_vtable *grpc_init_epoll_limited_pollers_linux(
+ bool explicitly_requested);
+
+#endif /* GRPC_CORE_LIB_IOMGR_EV_EPOLL_LIMITED_POLLERS_LINUX_H */
diff --git a/src/core/lib/iomgr/ev_epoll_thread_pool_linux.c b/src/core/lib/iomgr/ev_epoll_thread_pool_linux.c
new file mode 100644
index 0000000000..bb44321922
--- /dev/null
+++ b/src/core/lib/iomgr/ev_epoll_thread_pool_linux.c
@@ -0,0 +1,1337 @@
+/*
+ *
+ * Copyright 2017, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "src/core/lib/iomgr/port.h"
+
+/* This polling engine is only relevant on linux kernels supporting epoll() */
+#ifdef GRPC_LINUX_EPOLL
+
+#include "src/core/lib/iomgr/ev_epoll_thread_pool_linux.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <poll.h>
+#include <pthread.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include <grpc/support/alloc.h>
+#include <grpc/support/cpu.h>
+#include <grpc/support/log.h>
+#include <grpc/support/string_util.h>
+#include <grpc/support/thd.h>
+#include <grpc/support/tls.h>
+#include <grpc/support/useful.h>
+
+#include "src/core/lib/iomgr/ev_posix.h"
+#include "src/core/lib/iomgr/iomgr_internal.h"
+#include "src/core/lib/iomgr/lockfree_event.h"
+#include "src/core/lib/iomgr/timer.h"
+#include "src/core/lib/iomgr/wakeup_fd_posix.h"
+#include "src/core/lib/iomgr/workqueue.h"
+#include "src/core/lib/profiling/timers.h"
+#include "src/core/lib/support/block_annotate.h"
+
+/* TODO: sreek - Move this to init.c and initialize this like other tracers. */
+#define GRPC_POLLING_TRACE(fmt, ...) \
+ if (GRPC_TRACER_ON(grpc_polling_trace)) { \
+ gpr_log(GPR_INFO, (fmt), __VA_ARGS__); \
+ }
+
+/* The alarm system needs to be able to wakeup 'some poller' sometimes
+ * (specifically when a new alarm needs to be triggered earlier than the next
+ * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a
+ * case occurs. */
+
+struct epoll_set;
+
+#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker *)1)
+
+/*******************************************************************************
+ * Fd Declarations
+ */
+struct grpc_fd {
+ gpr_mu mu;
+ struct epoll_set *eps;
+
+ int fd;
+
+ /* The fd is either closed or we relinquished control of it. In either cases,
+ this indicates that the 'fd' on this structure is no longer valid */
+ bool orphaned;
+
+ gpr_atm read_closure;
+ gpr_atm write_closure;
+
+ struct grpc_fd *freelist_next;
+ grpc_closure *on_done_closure;
+
+ grpc_iomgr_object iomgr_object;
+};
+
+static void fd_global_init(void);
+static void fd_global_shutdown(void);
+
+/*******************************************************************************
+ * epoll set Declarations
+ */
+
+#ifdef GRPC_WORKQUEUE_REFCOUNT_DEBUG
+
+#define EPS_ADD_REF(p, r) eps_add_ref_dbg((p), (r), __FILE__, __LINE__)
+#define EPS_UNREF(exec_ctx, p, r) \
+ eps_unref_dbg((exec_ctx), (p), (r), __FILE__, __LINE__)
+
+#else /* defined(GRPC_WORKQUEUE_REFCOUNT_DEBUG) */
+
+#define EPS_ADD_REF(p, r) eps_add_ref((p))
+#define EPS_UNREF(exec_ctx, p, r) eps_unref((exec_ctx), (p))
+
+#endif /* !defined(GRPC_EPS_REF_COUNT_DEBUG) */
+
+/* This is also used as grpc_workqueue (by directly casting it) */
+typedef struct epoll_set {
+ grpc_closure_scheduler workqueue_scheduler;
+
+ /* Mutex poller should acquire to poll this. This enforces that only one
+ * poller can be polling on epoll_set at any time */
+ gpr_mu mu;
+
+ /* Ref count. Use EPS_ADD_REF() and EPS_UNREF() macros to increment/decrement
+ the refcount. Once the ref count becomes zero, this structure is destroyed
+ which means we should ensure that there is never a scenario where a
+ EPS_ADD_REF() is racing with a EPS_UNREF() that just made the ref_count
+ zero. */
+ gpr_atm ref_count;
+
+ /* Number of threads currently polling on this epoll set*/
+ gpr_atm poller_count;
+ /* Mutex guarding the read end of the workqueue (must be held to pop from
+ * workqueue_items) */
+ gpr_mu workqueue_read_mu;
+ /* Queue of closures to be executed */
+ gpr_mpscq workqueue_items;
+ /* Count of items in workqueue_items */
+ gpr_atm workqueue_item_count;
+ /* Wakeup fd used to wake pollers to check the contents of workqueue_items */
+ grpc_wakeup_fd workqueue_wakeup_fd;
+
+ /* Is the epoll set shutdown */
+ gpr_atm is_shutdown;
+
+ /* The fd of the underlying epoll set */
+ int epoll_fd;
+} epoll_set;
+
+/*******************************************************************************
+ * Pollset Declarations
+ */
+struct grpc_pollset_worker {
+ gpr_cv kick_cv;
+
+ struct grpc_pollset_worker *next;
+ struct grpc_pollset_worker *prev;
+};
+
+struct grpc_pollset {
+ gpr_mu mu;
+ struct epoll_set *eps;
+
+ grpc_pollset_worker root_worker;
+ bool kicked_without_pollers;
+
+ bool shutting_down; /* Is the pollset shutting down ? */
+ bool finish_shutdown_called; /* Is the 'finish_shutdown_locked()' called ? */
+ grpc_closure *shutdown_done; /* Called after after shutdown is complete */
+};
+
+/*******************************************************************************
+ * Pollset-set Declarations
+ */
+struct grpc_pollset_set {};
+
+/*****************************************************************************
+ * Dedicated polling threads and pollsets - Declarations
+ */
+
+size_t g_num_eps = 1;
+struct epoll_set **g_epoll_sets = NULL;
+gpr_atm g_next_eps;
+size_t g_num_threads_per_eps = 1;
+gpr_thd_id *g_poller_threads = NULL;
+
+/* Used as read-notifier pollsets for fds. We won't be using read notifier
+ * pollsets with this polling engine. So it does not matter what pollset we
+ * return */
+grpc_pollset g_read_notifier;
+
+static void add_fd_to_eps(grpc_fd *fd);
+static bool init_epoll_sets();
+static void shutdown_epoll_sets();
+static void poller_thread_loop(void *arg);
+static void start_poller_threads();
+static void shutdown_poller_threads();
+
+/*******************************************************************************
+ * Common helpers
+ */
+
+static bool append_error(grpc_error **composite, grpc_error *error,
+ const char *desc) {
+ if (error == GRPC_ERROR_NONE) return true;
+ if (*composite == GRPC_ERROR_NONE) {
+ *composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc);
+ }
+ *composite = grpc_error_add_child(*composite, error);
+ return false;
+}
+
+/*******************************************************************************
+ * epoll set Definitions
+ */
+
+/* The wakeup fd that is used to wake up all threads in an epoll_set informing
+ that the epoll set is shutdown. This wakeup fd initialized to be readable
+ and MUST NOT be consumed i.e the threads that woke up MUST NOT call
+ grpc_wakeup_fd_consume_wakeup() */
+static grpc_wakeup_fd epoll_set_wakeup_fd;
+
+/* The epoll set being polled right now.
+ See comments in workqueue_maybe_wakeup for why this is tracked. */
+static __thread epoll_set *g_current_thread_epoll_set;
+
+/* Forward declaration */
+static void epoll_set_delete(epoll_set *eps);
+static void workqueue_enqueue(grpc_exec_ctx *exec_ctx, grpc_closure *closure,
+ grpc_error *error);
+
+#ifdef GRPC_TSAN
+/* Currently TSAN may incorrectly flag data races between epoll_ctl and
+ epoll_wait for any grpc_fd structs that are added to the epoll set via
+ epoll_ctl and are returned (within a very short window) via epoll_wait().
+
+ To work-around this race, we establish a happens-before relation between
+ the code just-before epoll_ctl() and the code after epoll_wait() by using
+ this atomic */
+gpr_atm g_epoll_sync;
+#endif /* defined(GRPC_TSAN) */
+
+static const grpc_closure_scheduler_vtable workqueue_scheduler_vtable = {
+ workqueue_enqueue, workqueue_enqueue, "workqueue"};
+
+static void eps_add_ref(epoll_set *eps);
+static void eps_unref(grpc_exec_ctx *exec_ctx, epoll_set *eps);
+
+#ifdef GRPC_WORKQUEUE_REFCOUNT_DEBUG
+static void eps_add_ref_dbg(epoll_set *eps, const char *reason,
+ const char *file, int line) {
+ long old_cnt = gpr_atm_acq_load(&eps->ref_count);
+ eps_add_ref(eps);
+ gpr_log(GPR_DEBUG, "Add ref eps: %p, old: %ld -> new:%ld (%s) - (%s, %d)",
+ (void *)eps, old_cnt, old_cnt + 1, reason, file, line);
+}
+
+static void eps_unref_dbg(grpc_exec_ctx *exec_ctx, epoll_set *eps,
+ const char *reason, const char *file, int line) {
+ long old_cnt = gpr_atm_acq_load(&eps->ref_count);
+ eps_unref(exec_ctx, eps);
+ gpr_log(GPR_DEBUG, "Unref eps: %p, old:%ld -> new:%ld (%s) - (%s, %d)",
+ (void *)eps, old_cnt, (old_cnt - 1), reason, file, line);
+}
+
+static grpc_workqueue *workqueue_ref(grpc_workqueue *workqueue,
+ const char *file, int line,
+ const char *reason) {
+ if (workqueue != NULL) {
+ eps_add_ref_dbg((epoll_set *)workqueue, reason, file, line);
+ }
+ return workqueue;
+}
+
+static void workqueue_unref(grpc_exec_ctx *exec_ctx, grpc_workqueue *workqueue,
+ const char *file, int line, const char *reason) {
+ if (workqueue != NULL) {
+ eps_unref_dbg(exec_ctx, (epoll_set *)workqueue, reason, file, line);
+ }
+}
+#else
+static grpc_workqueue *workqueue_ref(grpc_workqueue *workqueue) {
+ if (workqueue != NULL) {
+ eps_add_ref((epoll_set *)workqueue);
+ }
+ return workqueue;
+}
+
+static void workqueue_unref(grpc_exec_ctx *exec_ctx,
+ grpc_workqueue *workqueue) {
+ if (workqueue != NULL) {
+ eps_unref(exec_ctx, (epoll_set *)workqueue);
+ }
+}
+#endif
+
+static void eps_add_ref(epoll_set *eps) {
+ gpr_atm_no_barrier_fetch_add(&eps->ref_count, 1);
+}
+
+static void eps_unref(grpc_exec_ctx *exec_ctx, epoll_set *eps) {
+ /* If ref count went to zero, delete the epoll set. This deletion is
+ not done under a lock since once the ref count goes to zero, we are
+ guaranteed that no one else holds a reference to the epoll set (and
+ that there is no racing eps_add_ref() call either).*/
+ if (1 == gpr_atm_full_fetch_add(&eps->ref_count, -1)) {
+ epoll_set_delete(eps);
+ }
+}
+
+static void epoll_set_add_fd_locked(epoll_set *eps, grpc_fd *fd,
+ grpc_error **error) {
+ int err;
+ struct epoll_event ev;
+ char *err_msg;
+ const char *err_desc = "epoll_set_add_fd_locked";
+
+#ifdef GRPC_TSAN
+ /* See the definition of g_epoll_sync for more context */
+ gpr_atm_rel_store(&g_epoll_sync, (gpr_atm)0);
+#endif /* defined(GRPC_TSAN) */
+
+ ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET);
+ ev.data.ptr = fd;
+ err = epoll_ctl(eps->epoll_fd, EPOLL_CTL_ADD, fd->fd, &ev);
+ if (err < 0 && errno != EEXIST) {
+ gpr_asprintf(
+ &err_msg,
+ "epoll_ctl (epoll_fd: %d) add fd: %d failed with error: %d (%s)",
+ eps->epoll_fd, fd->fd, errno, strerror(errno));
+ append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
+ gpr_free(err_msg);
+ }
+}
+
+static void epoll_set_add_wakeup_fd_locked(epoll_set *eps,
+ grpc_wakeup_fd *wakeup_fd,
+ grpc_error **error) {
+ struct epoll_event ev;
+ int err;
+ char *err_msg;
+ const char *err_desc = "epoll_set_add_wakeup_fd";
+
+ ev.events = (uint32_t)(EPOLLIN | EPOLLET);
+ ev.data.ptr = wakeup_fd;
+ err = epoll_ctl(eps->epoll_fd, EPOLL_CTL_ADD,
+ GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), &ev);
+ if (err < 0 && errno != EEXIST) {
+ gpr_asprintf(&err_msg,
+ "epoll_ctl (epoll_fd: %d) add wakeup fd: %d failed with "
+ "error: %d (%s)",
+ eps->epoll_fd, GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), errno,
+ strerror(errno));
+ append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
+ gpr_free(err_msg);
+ }
+}
+
+static void epoll_set_remove_fd(epoll_set *eps, grpc_fd *fd, bool is_fd_closed,
+ grpc_error **error) {
+ int err;
+ char *err_msg;
+ const char *err_desc = "epoll_set_remove_fd";
+
+ /* If fd is already closed, then it would have been automatically been removed
+ from the epoll set */
+ if (!is_fd_closed) {
+ err = epoll_ctl(eps->epoll_fd, EPOLL_CTL_DEL, fd->fd, NULL);
+ if (err < 0 && errno != ENOENT) {
+ gpr_asprintf(
+ &err_msg,
+ "epoll_ctl (epoll_fd: %d) del fd: %d failed with error: %d (%s)",
+ eps->epoll_fd, fd->fd, errno, strerror(errno));
+ append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
+ gpr_free(err_msg);
+ }
+ }
+}
+
+/* Might return NULL in case of an error */
+static epoll_set *epoll_set_create(grpc_error **error) {
+ epoll_set *eps = NULL;
+ const char *err_desc = "epoll_set_create";
+
+ *error = GRPC_ERROR_NONE;
+
+ eps = gpr_malloc(sizeof(*eps));
+ eps->workqueue_scheduler.vtable = &workqueue_scheduler_vtable;
+ eps->epoll_fd = -1;
+
+ gpr_mu_init(&eps->mu);
+ gpr_mu_init(&eps->workqueue_read_mu);
+ gpr_mpscq_init(&eps->workqueue_items);
+ gpr_atm_rel_store(&eps->workqueue_item_count, 0);
+
+ gpr_atm_rel_store(&eps->ref_count, 0);
+ gpr_atm_rel_store(&eps->poller_count, 0);
+
+ gpr_atm_rel_store(&eps->is_shutdown, false);
+
+ if (!append_error(error, grpc_wakeup_fd_init(&eps->workqueue_wakeup_fd),
+ err_desc)) {
+ goto done;
+ }
+
+ eps->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+
+ if (eps->epoll_fd < 0) {
+ append_error(error, GRPC_OS_ERROR(errno, "epoll_create1"), err_desc);
+ goto done;
+ }
+
+ epoll_set_add_wakeup_fd_locked(eps, &eps->workqueue_wakeup_fd, error);
+
+done:
+ if (*error != GRPC_ERROR_NONE) {
+ epoll_set_delete(eps);
+ eps = NULL;
+ }
+ return eps;
+}
+
+static void epoll_set_delete(epoll_set *eps) {
+ if (eps->epoll_fd >= 0) {
+ close(eps->epoll_fd);
+ }
+
+ GPR_ASSERT(gpr_atm_no_barrier_load(&eps->workqueue_item_count) == 0);
+ gpr_mu_destroy(&eps->mu);
+ gpr_mu_destroy(&eps->workqueue_read_mu);
+ gpr_mpscq_destroy(&eps->workqueue_items);
+ grpc_wakeup_fd_destroy(&eps->workqueue_wakeup_fd);
+
+ gpr_free(eps);
+}
+
+static void workqueue_maybe_wakeup(epoll_set *eps) {
+ /* If this thread is the current poller, then it may be that it's about to
+ decrement the current poller count, so we need to look past this thread */
+ bool is_current_poller = (g_current_thread_epoll_set == eps);
+ gpr_atm min_current_pollers_for_wakeup = is_current_poller ? 1 : 0;
+ gpr_atm current_pollers = gpr_atm_no_barrier_load(&eps->poller_count);
+ /* Only issue a wakeup if it's likely that some poller could come in and take
+ it right now. Note that since we do an anticipatory mpscq_pop every poll
+ loop, it's ok if we miss the wakeup here, as we'll get the work item when
+ the next poller enters anyway. */
+ if (current_pollers > min_current_pollers_for_wakeup) {
+ GRPC_LOG_IF_ERROR("workqueue_wakeup_fd",
+ grpc_wakeup_fd_wakeup(&eps->workqueue_wakeup_fd));
+ }
+}
+
+static void workqueue_enqueue(grpc_exec_ctx *exec_ctx, grpc_closure *closure,
+ grpc_error *error) {
+ GPR_TIMER_BEGIN("workqueue.enqueue", 0);
+ grpc_workqueue *workqueue = (grpc_workqueue *)closure->scheduler;
+ /* take a ref to the workqueue: otherwise it can happen that whatever events
+ * this kicks off ends up destroying the workqueue before this function
+ * completes */
+ GRPC_WORKQUEUE_REF(workqueue, "enqueue");
+ epoll_set *eps = (epoll_set *)workqueue;
+ gpr_atm last = gpr_atm_no_barrier_fetch_add(&eps->workqueue_item_count, 1);
+ closure->error_data.error = error;
+ gpr_mpscq_push(&eps->workqueue_items, &closure->next_data.atm_next);
+ if (last == 0) {
+ workqueue_maybe_wakeup(eps);
+ }
+
+ GRPC_WORKQUEUE_UNREF(exec_ctx, workqueue, "enqueue");
+ GPR_TIMER_END("workqueue.enqueue", 0);
+}
+
+static grpc_closure_scheduler *workqueue_scheduler(grpc_workqueue *workqueue) {
+ epoll_set *eps = (epoll_set *)workqueue;
+ return workqueue == NULL ? grpc_schedule_on_exec_ctx
+ : &eps->workqueue_scheduler;
+}
+
+static grpc_error *epoll_set_global_init() {
+ grpc_error *error = GRPC_ERROR_NONE;
+
+ error = grpc_wakeup_fd_init(&epoll_set_wakeup_fd);
+ if (error == GRPC_ERROR_NONE) {
+ error = grpc_wakeup_fd_wakeup(&epoll_set_wakeup_fd);
+ }
+
+ return error;
+}
+
+static void epoll_set_global_shutdown() {
+ grpc_wakeup_fd_destroy(&epoll_set_wakeup_fd);
+}
+
+/*******************************************************************************
+ * Fd Definitions
+ */
+
+/* We need to keep a freelist not because of any concerns of malloc performance
+ * but instead so that implementations with multiple threads in (for example)
+ * epoll_wait deal with the race between pollset removal and incoming poll
+ * notifications.
+ *
+ * The problem is that the poller ultimately holds a reference to this
+ * object, so it is very difficult to know when is safe to free it, at least
+ * without some expensive synchronization.
+ *
+ * If we keep the object freelisted, in the worst case losing this race just
+ * becomes a spurious read notification on a reused fd.
+ */
+
+static grpc_fd *fd_freelist = NULL;
+static gpr_mu fd_freelist_mu;
+
+static grpc_fd *get_fd_from_freelist() {
+ grpc_fd *new_fd = NULL;
+
+ gpr_mu_lock(&fd_freelist_mu);
+ if (fd_freelist != NULL) {
+ new_fd = fd_freelist;
+ fd_freelist = fd_freelist->freelist_next;
+ }
+ gpr_mu_unlock(&fd_freelist_mu);
+ return new_fd;
+}
+
+static void add_fd_to_freelist(grpc_fd *fd) {
+ gpr_mu_lock(&fd_freelist_mu);
+ fd->freelist_next = fd_freelist;
+ fd_freelist = fd;
+ grpc_iomgr_unregister_object(&fd->iomgr_object);
+
+ grpc_lfev_destroy(&fd->read_closure);
+ grpc_lfev_destroy(&fd->write_closure);
+
+ gpr_mu_unlock(&fd_freelist_mu);
+}
+
+static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); }
+
+static void fd_global_shutdown(void) {
+ gpr_mu_lock(&fd_freelist_mu);
+ gpr_mu_unlock(&fd_freelist_mu);
+ while (fd_freelist != NULL) {
+ grpc_fd *fd = fd_freelist;
+ fd_freelist = fd_freelist->freelist_next;
+ gpr_mu_destroy(&fd->mu);
+ gpr_free(fd);
+ }
+ gpr_mu_destroy(&fd_freelist_mu);
+}
+
+static grpc_fd *fd_create(int fd, const char *name) {
+ grpc_fd *new_fd = get_fd_from_freelist();
+ if (new_fd == NULL) {
+ new_fd = gpr_malloc(sizeof(grpc_fd));
+ gpr_mu_init(&new_fd->mu);
+ }
+
+ /* Note: It is not really needed to get the new_fd->mu lock here. If this
+ * is a newly created fd (or an fd we got from the freelist), no one else
+ * would be holding a lock to it anyway. */
+ gpr_mu_lock(&new_fd->mu);
+ new_fd->eps = NULL;
+
+ new_fd->fd = fd;
+ new_fd->orphaned = false;
+ grpc_lfev_init(&new_fd->read_closure);
+ grpc_lfev_init(&new_fd->write_closure);
+
+ new_fd->freelist_next = NULL;
+ new_fd->on_done_closure = NULL;
+
+ gpr_mu_unlock(&new_fd->mu);
+
+ char *fd_name;
+ gpr_asprintf(&fd_name, "%s fd=%d", name, fd);
+ grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name);
+ gpr_log(GPR_DEBUG, "FD %d %p create %s", fd, (void *)new_fd, fd_name);
+ gpr_free(fd_name);
+
+ /* Associate the fd with one of the eps */
+ add_fd_to_eps(new_fd);
+ return new_fd;
+}
+
+static int fd_wrapped_fd(grpc_fd *fd) {
+ int ret_fd = -1;
+ gpr_mu_lock(&fd->mu);
+ if (!fd->orphaned) {
+ ret_fd = fd->fd;
+ }
+ gpr_mu_unlock(&fd->mu);
+
+ return ret_fd;
+}
+
+static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
+ grpc_closure *on_done, int *release_fd,
+ const char *reason) {
+ bool is_fd_closed = false;
+ grpc_error *error = GRPC_ERROR_NONE;
+ epoll_set *unref_eps = NULL;
+
+ gpr_mu_lock(&fd->mu);
+ fd->on_done_closure = on_done;
+
+ /* If release_fd is not NULL, we should be relinquishing control of the file
+ descriptor fd->fd (but we still own the grpc_fd structure). */
+ if (release_fd != NULL) {
+ *release_fd = fd->fd;
+ } else {
+ close(fd->fd);
+ is_fd_closed = true;
+ }
+
+ fd->orphaned = true;
+
+ /* Remove the fd from the epoll set */
+ if (fd->eps != NULL) {
+ epoll_set_remove_fd(fd->eps, fd, is_fd_closed, &error);
+ unref_eps = fd->eps;
+ fd->eps = NULL;
+ }
+
+ grpc_closure_sched(exec_ctx, fd->on_done_closure, GRPC_ERROR_REF(error));
+
+ gpr_mu_unlock(&fd->mu);
+
+ /* We are done with this fd. Release it (i.e add back to freelist) */
+ add_fd_to_freelist(fd);
+
+ if (unref_eps != NULL) {
+ /* Unref stale epoll set here, outside the fd lock above.
+ The epoll set owns a workqueue which owns an fd, and unreffing
+ inside the lock can cause an eventual lock loop that makes TSAN very
+ unhappy. */
+ EPS_UNREF(exec_ctx, unref_eps, "fd_orphan");
+ }
+ GRPC_LOG_IF_ERROR("fd_orphan", GRPC_ERROR_REF(error));
+ GRPC_ERROR_UNREF(error);
+}
+
+/* This polling engine doesn't really need the read notifier functionality. So
+ * it just returns a dummy read notifier pollset */
+static grpc_pollset *fd_get_read_notifier_pollset(grpc_exec_ctx *exec_ctx,
+ grpc_fd *fd) {
+ return &g_read_notifier;
+}
+
+static bool fd_is_shutdown(grpc_fd *fd) {
+ return grpc_lfev_is_shutdown(&fd->read_closure);
+}
+
+/* Might be called multiple times */
+static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_error *why) {
+ if (grpc_lfev_set_shutdown(exec_ctx, &fd->read_closure,
+ GRPC_ERROR_REF(why))) {
+ shutdown(fd->fd, SHUT_RDWR);
+ grpc_lfev_set_shutdown(exec_ctx, &fd->write_closure, GRPC_ERROR_REF(why));
+ }
+ GRPC_ERROR_UNREF(why);
+}
+
+static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
+ grpc_closure *closure) {
+ grpc_lfev_notify_on(exec_ctx, &fd->read_closure, closure);
+}
+
+static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
+ grpc_closure *closure) {
+ grpc_lfev_notify_on(exec_ctx, &fd->write_closure, closure);
+}
+
+static grpc_workqueue *fd_get_workqueue(grpc_fd *fd) { return NULL; }
+
+/*******************************************************************************
+ * Pollset Definitions
+ */
+/* TODO: sreek - Not needed anymore */
+GPR_TLS_DECL(g_current_thread_pollset);
+GPR_TLS_DECL(g_current_thread_worker);
+
+static void pollset_worker_init(grpc_pollset_worker *worker) {
+ worker->next = worker->prev = NULL;
+ gpr_cv_init(&worker->kick_cv);
+}
+
+/* Global state management */
+static grpc_error *pollset_global_init(void) {
+ gpr_tls_init(&g_current_thread_pollset);
+ gpr_tls_init(&g_current_thread_worker);
+ return GRPC_ERROR_NONE;
+}
+
+static void pollset_global_shutdown(void) {
+ gpr_tls_destroy(&g_current_thread_pollset);
+ gpr_tls_destroy(&g_current_thread_worker);
+}
+
+static grpc_error *pollset_worker_kick(grpc_pollset_worker *worker) {
+ gpr_cv_signal(&worker->kick_cv);
+ return GRPC_ERROR_NONE;
+}
+
+/* Return 1 if the pollset has active threads in pollset_work (pollset must
+ * be locked) */
+static int pollset_has_workers(grpc_pollset *p) {
+ return p->root_worker.next != &p->root_worker;
+}
+
+static void remove_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
+ worker->prev->next = worker->next;
+ worker->next->prev = worker->prev;
+}
+
+static grpc_pollset_worker *pop_front_worker(grpc_pollset *p) {
+ if (pollset_has_workers(p)) {
+ grpc_pollset_worker *w = p->root_worker.next;
+ remove_worker(p, w);
+ return w;
+ } else {
+ return NULL;
+ }
+}
+
+static void push_back_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
+ worker->next = &p->root_worker;
+ worker->prev = worker->next->prev;
+ worker->prev->next = worker->next->prev = worker;
+}
+
+static void push_front_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
+ worker->prev = &p->root_worker;
+ worker->next = worker->prev->next;
+ worker->prev->next = worker->next->prev = worker;
+}
+
+/* p->mu must be held before calling this function */
+static grpc_error *pollset_kick(grpc_pollset *p,
+ grpc_pollset_worker *specific_worker) {
+ GPR_TIMER_BEGIN("pollset_kick", 0);
+ grpc_error *error = GRPC_ERROR_NONE;
+ const char *err_desc = "Kick Failure";
+ grpc_pollset_worker *worker = specific_worker;
+ if (worker != NULL) {
+ if (worker == GRPC_POLLSET_KICK_BROADCAST) {
+ if (pollset_has_workers(p)) {
+ GPR_TIMER_BEGIN("pollset_kick.broadcast", 0);
+ for (worker = p->root_worker.next; worker != &p->root_worker;
+ worker = worker->next) {
+ if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
+ append_error(&error, pollset_worker_kick(worker), err_desc);
+ }
+ }
+ GPR_TIMER_END("pollset_kick.broadcast", 0);
+ } else {
+ p->kicked_without_pollers = true;
+ }
+ } else {
+ GPR_TIMER_MARK("kicked_specifically", 0);
+ if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
+ append_error(&error, pollset_worker_kick(worker), err_desc);
+ }
+ }
+ } else if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)p) {
+ /* Since worker == NULL, it means that we can kick "any" worker on this
+ pollset 'p'. If 'p' happens to be the same pollset this thread is
+ currently polling (i.e in pollset_work() function), then there is no need
+ to kick any other worker since the current thread can just absorb the
+ kick. This is the reason why we enter this case only when
+ g_current_thread_pollset is != p */
+
+ GPR_TIMER_MARK("kick_anonymous", 0);
+ worker = pop_front_worker(p);
+ if (worker != NULL) {
+ GPR_TIMER_MARK("finally_kick", 0);
+ push_back_worker(p, worker);
+ append_error(&error, pollset_worker_kick(worker), err_desc);
+ } else {
+ GPR_TIMER_MARK("kicked_no_pollers", 0);
+ p->kicked_without_pollers = true;
+ }
+ }
+
+ GPR_TIMER_END("pollset_kick", 0);
+ GRPC_LOG_IF_ERROR("pollset_kick", GRPC_ERROR_REF(error));
+ return error;
+}
+
+static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) {
+ gpr_mu_init(&pollset->mu);
+ *mu = &pollset->mu;
+ pollset->eps = NULL;
+
+ pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker;
+ pollset->kicked_without_pollers = false;
+
+ pollset->shutting_down = false;
+ pollset->finish_shutdown_called = false;
+ pollset->shutdown_done = NULL;
+}
+
+static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) {
+ grpc_lfev_set_ready(exec_ctx, &fd->read_closure);
+}
+
+static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) {
+ grpc_lfev_set_ready(exec_ctx, &fd->write_closure);
+}
+
+static void pollset_release_epoll_set(grpc_exec_ctx *exec_ctx, grpc_pollset *ps,
+ char *reason) {
+ if (ps->eps != NULL) {
+ EPS_UNREF(exec_ctx, ps->eps, reason);
+ }
+ ps->eps = NULL;
+}
+
+static void finish_shutdown_locked(grpc_exec_ctx *exec_ctx,
+ grpc_pollset *pollset) {
+ /* The pollset cannot have any workers if we are at this stage */
+ GPR_ASSERT(!pollset_has_workers(pollset));
+
+ pollset->finish_shutdown_called = true;
+
+ /* Release the ref and set pollset->eps to NULL */
+ pollset_release_epoll_set(exec_ctx, pollset, "ps_shutdown");
+ grpc_closure_sched(exec_ctx, pollset->shutdown_done, GRPC_ERROR_NONE);
+}
+
+/* pollset->mu lock must be held by the caller before calling this */
+static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
+ grpc_closure *closure) {
+ GPR_TIMER_BEGIN("pollset_shutdown", 0);
+ GPR_ASSERT(!pollset->shutting_down);
+ pollset->shutting_down = true;
+ pollset->shutdown_done = closure;
+ pollset_kick(pollset, GRPC_POLLSET_KICK_BROADCAST);
+
+ /* If the pollset has any workers, we cannot call finish_shutdown_locked()
+ because it would release the underlying epoll set. In such a case, we
+ let the last worker call finish_shutdown_locked() from pollset_work() */
+ if (!pollset_has_workers(pollset)) {
+ GPR_ASSERT(!pollset->finish_shutdown_called);
+ GPR_TIMER_MARK("pollset_shutdown.finish_shutdown_locked", 0);
+ finish_shutdown_locked(exec_ctx, pollset);
+ }
+ GPR_TIMER_END("pollset_shutdown", 0);
+}
+
+/* pollset_shutdown is guaranteed to be called before pollset_destroy. So other
+ * than destroying the mutexes, there is nothing special that needs to be done
+ * here */
+static void pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) {
+ GPR_ASSERT(!pollset_has_workers(pollset));
+ gpr_mu_destroy(&pollset->mu);
+}
+
+static bool maybe_do_workqueue_work(grpc_exec_ctx *exec_ctx, epoll_set *eps) {
+ if (gpr_mu_trylock(&eps->workqueue_read_mu)) {
+ gpr_mpscq_node *n = gpr_mpscq_pop(&eps->workqueue_items);
+ gpr_mu_unlock(&eps->workqueue_read_mu);
+ if (n != NULL) {
+ if (gpr_atm_full_fetch_add(&eps->workqueue_item_count, -1) > 1) {
+ workqueue_maybe_wakeup(eps);
+ }
+ grpc_closure *c = (grpc_closure *)n;
+ grpc_error *error = c->error_data.error;
+#ifndef NDEBUG
+ c->scheduled = false;
+#endif
+ c->cb(exec_ctx, c->cb_arg, error);
+ GRPC_ERROR_UNREF(error);
+ return true;
+ } else if (gpr_atm_no_barrier_load(&eps->workqueue_item_count) > 0) {
+ /* n == NULL might mean there's work but it's not available to be popped
+ * yet - try to ensure another workqueue wakes up to check shortly if so
+ */
+ workqueue_maybe_wakeup(eps);
+ }
+ }
+ return false;
+}
+
+/* Blocking call */
+static void acquire_epoll_lease(epoll_set *eps) {
+ if (g_num_threads_per_eps > 1) {
+ gpr_mu_lock(&eps->mu);
+ }
+}
+
+static void release_epoll_lease(epoll_set *eps) {
+ if (g_num_threads_per_eps > 1) {
+ gpr_mu_unlock(&eps->mu);
+ }
+}
+
+#define GRPC_EPOLL_MAX_EVENTS 100
+static void do_epoll_wait(grpc_exec_ctx *exec_ctx, int epoll_fd, epoll_set *eps,
+ grpc_error **error) {
+ struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS];
+ int ep_rv;
+ char *err_msg;
+ const char *err_desc = "do_epoll_wait";
+
+ int timeout_ms = -1;
+
+ GRPC_SCHEDULING_START_BLOCKING_REGION;
+ acquire_epoll_lease(eps);
+ ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms);
+ release_epoll_lease(eps);
+ GRPC_SCHEDULING_END_BLOCKING_REGION;
+
+ if (ep_rv < 0) {
+ gpr_asprintf(&err_msg,
+ "epoll_wait() epoll fd: %d failed with error: %d (%s)",
+ epoll_fd, errno, strerror(errno));
+ append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
+ }
+
+#ifdef GRPC_TSAN
+ /* See the definition of g_poll_sync for more details */
+ gpr_atm_acq_load(&g_epoll_sync);
+#endif /* defined(GRPC_TSAN) */
+
+ for (int i = 0; i < ep_rv; ++i) {
+ void *data_ptr = ep_ev[i].data.ptr;
+ if (data_ptr == &eps->workqueue_wakeup_fd) {
+ append_error(error,
+ grpc_wakeup_fd_consume_wakeup(&eps->workqueue_wakeup_fd),
+ err_desc);
+ maybe_do_workqueue_work(exec_ctx, eps);
+ } else if (data_ptr == &epoll_set_wakeup_fd) {
+ gpr_atm_rel_store(&eps->is_shutdown, 1);
+ gpr_log(GPR_INFO, "pollset poller: shutdown set");
+ } else {
+ grpc_fd *fd = data_ptr;
+ int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP);
+ int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI);
+ int write_ev = ep_ev[i].events & EPOLLOUT;
+ if (read_ev || cancel) {
+ fd_become_readable(exec_ctx, fd);
+ }
+ if (write_ev || cancel) {
+ fd_become_writable(exec_ctx, fd);
+ }
+ }
+ }
+}
+
+static void epoll_set_work(grpc_exec_ctx *exec_ctx, epoll_set *eps,
+ grpc_error **error) {
+ int epoll_fd = -1;
+ GPR_TIMER_BEGIN("epoll_set_work", 0);
+
+ /* Since epoll_fd is immutable, it is safe to read it without a lock on the
+ epoll set. */
+ epoll_fd = eps->epoll_fd;
+
+ /* If we get some workqueue work to do, it might end up completing an item on
+ the completion queue, so there's no need to poll... so we skip that and
+ redo the complete loop to verify */
+ if (!maybe_do_workqueue_work(exec_ctx, eps)) {
+ gpr_atm_no_barrier_fetch_add(&eps->poller_count, 1);
+ g_current_thread_epoll_set = eps;
+
+ do_epoll_wait(exec_ctx, epoll_fd, eps, error);
+
+ g_current_thread_epoll_set = NULL;
+ gpr_atm_no_barrier_fetch_add(&eps->poller_count, -1);
+ }
+
+ GPR_TIMER_END("epoll_set_work", 0);
+}
+
+/* pollset->mu lock must be held by the caller before calling this.
+ The function pollset_work() may temporarily release the lock (pollset->mu)
+ during the course of its execution but it will always re-acquire the lock and
+ ensure that it is held by the time the function returns */
+static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
+ grpc_pollset_worker **worker_hdl,
+ gpr_timespec now, gpr_timespec deadline) {
+ GPR_TIMER_BEGIN("pollset_work", 0);
+ grpc_error *error = GRPC_ERROR_NONE;
+
+ grpc_pollset_worker worker;
+ pollset_worker_init(&worker);
+
+ if (worker_hdl) *worker_hdl = &worker;
+
+ gpr_tls_set(&g_current_thread_pollset, (intptr_t)pollset);
+ gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker);
+
+ if (pollset->kicked_without_pollers) {
+ /* If the pollset was kicked without pollers, pretend that the current
+ worker got the kick and skip polling. A kick indicates that there is some
+ work that needs attention like an event on the completion queue or an
+ alarm */
+ GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0);
+ pollset->kicked_without_pollers = 0;
+ } else if (!pollset->shutting_down) {
+ push_front_worker(pollset, &worker);
+
+ gpr_cv_wait(&worker.kick_cv, &pollset->mu,
+ gpr_convert_clock_type(deadline, GPR_CLOCK_REALTIME));
+ /* pollset->mu locked here */
+
+ remove_worker(pollset, &worker);
+ }
+
+ /* If we are the last worker on the pollset (i.e pollset_has_workers() is
+ false at this point) and the pollset is shutting down, we may have to
+ finish the shutdown process by calling finish_shutdown_locked().
+ See pollset_shutdown() for more details.
+
+ Note: Continuing to access pollset here is safe; it is the caller's
+ responsibility to not destroy a pollset when it has outstanding calls to
+ pollset_work() */
+ if (pollset->shutting_down && !pollset_has_workers(pollset) &&
+ !pollset->finish_shutdown_called) {
+ GPR_TIMER_MARK("pollset_work.finish_shutdown_locked", 0);
+ finish_shutdown_locked(exec_ctx, pollset);
+
+ gpr_mu_unlock(&pollset->mu);
+ grpc_exec_ctx_flush(exec_ctx);
+ gpr_mu_lock(&pollset->mu);
+ }
+
+ if (worker_hdl) *worker_hdl = NULL;
+
+ gpr_tls_set(&g_current_thread_pollset, (intptr_t)0);
+ gpr_tls_set(&g_current_thread_worker, (intptr_t)0);
+
+ GPR_TIMER_END("pollset_work", 0);
+
+ GRPC_LOG_IF_ERROR("pollset_work", GRPC_ERROR_REF(error));
+ return error;
+}
+
+static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
+ grpc_fd *fd) {
+ /* Nothing to do */
+}
+
+/*******************************************************************************
+ * Pollset-set Definitions
+ */
+grpc_pollset_set g_dummy_pollset_set;
+static grpc_pollset_set *pollset_set_create(void) {
+ return &g_dummy_pollset_set;
+}
+
+static void pollset_set_destroy(grpc_exec_ctx *exec_ctx,
+ grpc_pollset_set *pss) {
+ /* Nothing to do */
+}
+
+static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss,
+ grpc_fd *fd) {
+ /* Nothing to do */
+}
+
+static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss,
+ grpc_fd *fd) {
+ /* Nothing to do */
+}
+
+static void pollset_set_add_pollset(grpc_exec_ctx *exec_ctx,
+ grpc_pollset_set *pss, grpc_pollset *ps) {
+ /* Nothing to do */
+}
+
+static void pollset_set_del_pollset(grpc_exec_ctx *exec_ctx,
+ grpc_pollset_set *pss, grpc_pollset *ps) {
+ /* Nothing to do */
+}
+
+static void pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx,
+ grpc_pollset_set *bag,
+ grpc_pollset_set *item) {
+ /* Nothing to do */
+}
+
+static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx,
+ grpc_pollset_set *bag,
+ grpc_pollset_set *item) {
+ /* Nothing to do */
+}
+
+/*******************************************************************************
+ * Event engine binding
+ */
+
+static void shutdown_engine(void) {
+ shutdown_poller_threads();
+ shutdown_epoll_sets();
+ fd_global_shutdown();
+ pollset_global_shutdown();
+ epoll_set_global_shutdown();
+ gpr_log(GPR_INFO, "ev-epoll-threadpool engine shutdown complete");
+}
+
+static const grpc_event_engine_vtable vtable = {
+ .pollset_size = sizeof(grpc_pollset),
+
+ .fd_create = fd_create,
+ .fd_wrapped_fd = fd_wrapped_fd,
+ .fd_orphan = fd_orphan,
+ .fd_shutdown = fd_shutdown,
+ .fd_is_shutdown = fd_is_shutdown,
+ .fd_notify_on_read = fd_notify_on_read,
+ .fd_notify_on_write = fd_notify_on_write,
+ .fd_get_read_notifier_pollset = fd_get_read_notifier_pollset,
+ .fd_get_workqueue = fd_get_workqueue,
+
+ .pollset_init = pollset_init,
+ .pollset_shutdown = pollset_shutdown,
+ .pollset_destroy = pollset_destroy,
+ .pollset_work = pollset_work,
+ .pollset_kick = pollset_kick,
+ .pollset_add_fd = pollset_add_fd,
+
+ .pollset_set_create = pollset_set_create,
+ .pollset_set_destroy = pollset_set_destroy,
+ .pollset_set_add_pollset = pollset_set_add_pollset,
+ .pollset_set_del_pollset = pollset_set_del_pollset,
+ .pollset_set_add_pollset_set = pollset_set_add_pollset_set,
+ .pollset_set_del_pollset_set = pollset_set_del_pollset_set,
+ .pollset_set_add_fd = pollset_set_add_fd,
+ .pollset_set_del_fd = pollset_set_del_fd,
+
+ .workqueue_ref = workqueue_ref,
+ .workqueue_unref = workqueue_unref,
+ .workqueue_scheduler = workqueue_scheduler,
+
+ .shutdown_engine = shutdown_engine,
+};
+
+/*****************************************************************************
+ * Dedicated polling threads and pollsets - Definitions
+ */
+static void add_fd_to_eps(grpc_fd *fd) {
+ GPR_ASSERT(fd->eps == NULL);
+ GPR_TIMER_BEGIN("add_fd_to_eps", 0);
+
+ grpc_error *error = GRPC_ERROR_NONE;
+ size_t idx = (size_t)gpr_atm_no_barrier_fetch_add(&g_next_eps, 1) % g_num_eps;
+ epoll_set *eps = g_epoll_sets[idx];
+
+ gpr_mu_lock(&fd->mu);
+
+ if (fd->orphaned) {
+ gpr_mu_unlock(&fd->mu);
+ return; /* Early out */
+ }
+
+ epoll_set_add_fd_locked(eps, fd, &error);
+ EPS_ADD_REF(eps, "fd");
+ fd->eps = eps;
+
+ GRPC_POLLING_TRACE("add_fd_to_eps (fd: %d, eps idx = %" PRIdPTR ")", fd->fd,
+ idx);
+ gpr_mu_unlock(&fd->mu);
+
+ GRPC_LOG_IF_ERROR("add_fd_to_eps", error);
+ GPR_TIMER_END("add_fd_to_eps", 0);
+}
+
+static bool init_epoll_sets() {
+ grpc_error *error = GRPC_ERROR_NONE;
+ bool is_success = true;
+
+ g_epoll_sets = (epoll_set **)malloc(g_num_eps * sizeof(epoll_set *));
+
+ for (size_t i = 0; i < g_num_eps; i++) {
+ g_epoll_sets[i] = epoll_set_create(&error);
+ if (g_epoll_sets[i] == NULL) {
+ gpr_log(GPR_ERROR, "Error in creating a epoll set");
+ g_num_eps = i; /* Helps cleanup */
+ shutdown_epoll_sets();
+ is_success = false;
+ goto done;
+ }
+
+ EPS_ADD_REF(g_epoll_sets[i], "init_epoll_sets");
+ }
+
+ gpr_atm_no_barrier_store(&g_next_eps, 0);
+ gpr_mu *mu;
+ pollset_init(&g_read_notifier, &mu);
+
+done:
+ GRPC_LOG_IF_ERROR("init_epoll_sets", error);
+ return is_success;
+}
+
+static void shutdown_epoll_sets() {
+ if (!g_epoll_sets) {
+ return;
+ }
+
+ grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
+ for (size_t i = 0; i < g_num_eps; i++) {
+ EPS_UNREF(&exec_ctx, g_epoll_sets[i], "shutdown_epoll_sets");
+ }
+ grpc_exec_ctx_flush(&exec_ctx);
+
+ gpr_free(g_epoll_sets);
+ g_epoll_sets = NULL;
+ pollset_destroy(&exec_ctx, &g_read_notifier);
+ grpc_exec_ctx_finish(&exec_ctx);
+}
+
+static void poller_thread_loop(void *arg) {
+ grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
+ grpc_error *error = GRPC_ERROR_NONE;
+ epoll_set *eps = (epoll_set *)arg;
+
+ while (!gpr_atm_acq_load(&eps->is_shutdown)) {
+ epoll_set_work(&exec_ctx, eps, &error);
+ grpc_exec_ctx_flush(&exec_ctx);
+ }
+
+ grpc_exec_ctx_finish(&exec_ctx);
+ GRPC_LOG_IF_ERROR("poller_thread_loop", error);
+}
+
+/* g_epoll_sets MUST be initialized before calling this */
+static void start_poller_threads() {
+ GPR_ASSERT(g_epoll_sets);
+
+ gpr_log(GPR_INFO, "Starting poller threads");
+
+ size_t num_threads = g_num_eps * g_num_threads_per_eps;
+ g_poller_threads = (gpr_thd_id *)malloc(num_threads * sizeof(gpr_thd_id));
+ gpr_thd_options options = gpr_thd_options_default();
+ gpr_thd_options_set_joinable(&options);
+
+ for (size_t i = 0; i < num_threads; i++) {
+ gpr_thd_new(&g_poller_threads[i], poller_thread_loop,
+ (void *)g_epoll_sets[i % g_num_eps], &options);
+ }
+}
+
+static void shutdown_poller_threads() {
+ GPR_ASSERT(g_poller_threads);
+ GPR_ASSERT(g_epoll_sets);
+ grpc_error *error = GRPC_ERROR_NONE;
+
+ gpr_log(GPR_INFO, "Shutting down pollers");
+
+ epoll_set *eps = NULL;
+ size_t num_threads = g_num_eps * g_num_threads_per_eps;
+ for (size_t i = 0; i < num_threads; i++) {
+ eps = g_epoll_sets[i];
+ epoll_set_add_wakeup_fd_locked(eps, &epoll_set_wakeup_fd, &error);
+ }
+
+ for (size_t i = 0; i < g_num_eps; i++) {
+ gpr_thd_join(g_poller_threads[i]);
+ }
+
+ GRPC_LOG_IF_ERROR("shutdown_poller_threads", error);
+ gpr_free(g_poller_threads);
+ g_poller_threads = NULL;
+}
+
+/****************************************************************************/
+
+/* It is possible that GLIBC has epoll but the underlying kernel doesn't.
+ * Create a dummy epoll_fd to make sure epoll support is available */
+static bool is_epoll_available() {
+ int fd = epoll_create1(EPOLL_CLOEXEC);
+ if (fd < 0) {
+ gpr_log(
+ GPR_ERROR,
+ "epoll_create1 failed with error: %d. Not using epoll polling engine",
+ fd);
+ return false;
+ }
+ close(fd);
+ return true;
+}
+
+const grpc_event_engine_vtable *grpc_init_epoll_thread_pool_linux(
+ bool requested_explicitly) {
+ if (!requested_explicitly) return NULL;
+
+ if (!grpc_has_wakeup_fd()) {
+ return NULL;
+ }
+
+ if (!is_epoll_available()) {
+ return NULL;
+ }
+
+ fd_global_init();
+
+ if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) {
+ return NULL;
+ }
+
+ if (!GRPC_LOG_IF_ERROR("epoll_set_global_init", epoll_set_global_init())) {
+ return NULL;
+ }
+
+ if (!init_epoll_sets()) {
+ return NULL;
+ }
+
+ /* TODO (sreek): Maynot be a good idea to start threads here (especially if
+ * this engine doesn't get picked. Consider introducing an engine_init
+ * function in the vtable */
+ start_poller_threads();
+ return &vtable;
+}
+
+#else /* defined(GRPC_LINUX_EPOLL) */
+#if defined(GRPC_POSIX_SOCKET)
+#include "src/core/lib/iomgr/ev_posix.h"
+/* If GRPC_LINUX_EPOLL is not defined, it means epoll is not available. Return
+ * NULL */
+const grpc_event_engine_vtable *grpc_init_epoll_thread_pool_linux(
+ bool requested_explicitly) {
+ return NULL;
+}
+#endif /* defined(GRPC_POSIX_SOCKET) */
+#endif /* !defined(GRPC_LINUX_EPOLL) */
diff --git a/src/core/lib/iomgr/ev_epoll_thread_pool_linux.h b/src/core/lib/iomgr/ev_epoll_thread_pool_linux.h
new file mode 100644
index 0000000000..9af776a52e
--- /dev/null
+++ b/src/core/lib/iomgr/ev_epoll_thread_pool_linux.h
@@ -0,0 +1,43 @@
+/*
+ *
+ * Copyright 2017, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef GRPC_CORE_LIB_IOMGR_EV_EPOLL_THREAD_POOL_LINUX_H
+#define GRPC_CORE_LIB_IOMGR_EV_EPOLL_THREAD_POOL_LINUX_H
+
+#include "src/core/lib/iomgr/ev_posix.h"
+#include "src/core/lib/iomgr/port.h"
+
+const grpc_event_engine_vtable *grpc_init_epoll_thread_pool_linux(
+ bool requested_explicitly);
+
+#endif /* GRPC_CORE_LIB_IOMGR_EV_EPOLL_THREAD_POOL_LINUX_H */
diff --git a/src/core/lib/iomgr/ev_epollex_linux.c b/src/core/lib/iomgr/ev_epollex_linux.c
new file mode 100644
index 0000000000..7cb6085e25
--- /dev/null
+++ b/src/core/lib/iomgr/ev_epollex_linux.c
@@ -0,0 +1,1511 @@
+/*
+ *
+ * Copyright 2017, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "src/core/lib/iomgr/port.h"
+
+/* This polling engine is only relevant on linux kernels supporting epoll() */
+#ifdef GRPC_LINUX_EPOLL
+
+#include "src/core/lib/iomgr/ev_epollex_linux.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <poll.h>
+#include <pthread.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include <grpc/support/alloc.h>
+#include <grpc/support/log.h>
+#include <grpc/support/string_util.h>
+#include <grpc/support/tls.h>
+#include <grpc/support/useful.h>
+
+#include "src/core/lib/iomgr/ev_posix.h"
+#include "src/core/lib/iomgr/iomgr_internal.h"
+#include "src/core/lib/iomgr/is_epollexclusive_available.h"
+#include "src/core/lib/iomgr/lockfree_event.h"
+#include "src/core/lib/iomgr/sys_epoll_wrapper.h"
+#include "src/core/lib/iomgr/timer.h"
+#include "src/core/lib/iomgr/wakeup_fd_posix.h"
+#include "src/core/lib/iomgr/workqueue.h"
+#include "src/core/lib/profiling/timers.h"
+#include "src/core/lib/support/block_annotate.h"
+#include "src/core/lib/support/spinlock.h"
+
+/*******************************************************************************
+ * Pollset-set sibling link
+ */
+
+typedef enum {
+ PO_POLLING_GROUP,
+ PO_POLLSET_SET,
+ PO_POLLSET,
+ PO_FD, /* ordering is important: we always want to lock pollsets before fds:
+ this guarantees that using an fd as a pollable is safe */
+ PO_EMPTY_POLLABLE,
+ PO_COUNT
+} polling_obj_type;
+
+typedef struct polling_obj polling_obj;
+typedef struct polling_group polling_group;
+
+struct polling_obj {
+ gpr_mu mu;
+ polling_obj_type type;
+ polling_group *group;
+ struct polling_obj *next;
+ struct polling_obj *prev;
+};
+
+struct polling_group {
+ polling_obj po;
+ gpr_refcount refs;
+};
+
+static void po_init(polling_obj *po, polling_obj_type type);
+static void po_destroy(polling_obj *po);
+static void po_join(grpc_exec_ctx *exec_ctx, polling_obj *a, polling_obj *b);
+static int po_cmp(polling_obj *a, polling_obj *b);
+
+static void pg_create(grpc_exec_ctx *exec_ctx, polling_obj **initial_po,
+ size_t initial_po_count);
+static polling_group *pg_ref(polling_group *pg);
+static void pg_unref(polling_group *pg);
+static void pg_merge(grpc_exec_ctx *exec_ctx, polling_group *a,
+ polling_group *b);
+static void pg_join(grpc_exec_ctx *exec_ctx, polling_group *pg,
+ polling_obj *po);
+
+/*******************************************************************************
+ * pollable Declarations
+ */
+
+typedef struct pollable {
+ polling_obj po;
+ int epfd;
+ grpc_wakeup_fd wakeup;
+ grpc_pollset_worker *root_worker;
+} pollable;
+
+static pollable g_empty_pollable;
+
+static void pollable_init(pollable *p, polling_obj_type type);
+static void pollable_destroy(pollable *p);
+/* ensure that p->epfd, p->wakeup are initialized; p->po.mu must be held */
+static grpc_error *pollable_materialize(pollable *p);
+
+/*******************************************************************************
+ * Fd Declarations
+ */
+
+struct grpc_fd {
+ pollable pollable;
+ int fd;
+ /* refst format:
+ bit 0 : 1=Active / 0=Orphaned
+ bits 1-n : refcount
+ Ref/Unref by two to avoid altering the orphaned bit */
+ gpr_atm refst;
+
+ /* Wakeup fd used to wake pollers to check the contents of workqueue_items */
+ grpc_wakeup_fd workqueue_wakeup_fd;
+ grpc_closure_scheduler workqueue_scheduler;
+ /* Spinlock guarding the read end of the workqueue (must be held to pop from
+ * workqueue_items) */
+ gpr_spinlock workqueue_read_mu;
+ /* Queue of closures to be executed */
+ gpr_mpscq workqueue_items;
+ /* Count of items in workqueue_items */
+ gpr_atm workqueue_item_count;
+
+ /* The fd is either closed or we relinquished control of it. In either
+ cases, this indicates that the 'fd' on this structure is no longer
+ valid */
+ gpr_mu orphaned_mu;
+ bool orphaned;
+
+ gpr_atm read_closure;
+ gpr_atm write_closure;
+
+ struct grpc_fd *freelist_next;
+ grpc_closure *on_done_closure;
+
+ /* The pollset that last noticed that the fd is readable. The actual type
+ * stored in this is (grpc_pollset *) */
+ gpr_atm read_notifier_pollset;
+
+ grpc_iomgr_object iomgr_object;
+};
+
+static void fd_global_init(void);
+static void fd_global_shutdown(void);
+
+static void workqueue_enqueue(grpc_exec_ctx *exec_ctx, grpc_closure *closure,
+ grpc_error *error);
+
+static const grpc_closure_scheduler_vtable workqueue_scheduler_vtable = {
+ workqueue_enqueue, workqueue_enqueue, "workqueue"};
+
+/*******************************************************************************
+ * Pollset Declarations
+ */
+
+typedef struct pollset_worker_link {
+ grpc_pollset_worker *next;
+ grpc_pollset_worker *prev;
+} pollset_worker_link;
+
+typedef enum {
+ PWL_POLLSET,
+ PWL_POLLABLE,
+ POLLSET_WORKER_LINK_COUNT
+} pollset_worker_links;
+
+struct grpc_pollset_worker {
+ bool kicked;
+ bool initialized_cv;
+ pollset_worker_link links[POLLSET_WORKER_LINK_COUNT];
+ gpr_cv cv;
+ grpc_pollset *pollset;
+ pollable *pollable;
+};
+
+struct grpc_pollset {
+ pollable pollable;
+ pollable *current_pollable;
+ bool kicked_without_poller;
+ grpc_closure *shutdown_closure;
+ grpc_pollset_worker *root_worker;
+};
+
+/*******************************************************************************
+ * Pollset-set Declarations
+ */
+struct grpc_pollset_set {
+ polling_obj po;
+};
+
+/*******************************************************************************
+ * Common helpers
+ */
+
+static bool append_error(grpc_error **composite, grpc_error *error,
+ const char *desc) {
+ if (error == GRPC_ERROR_NONE) return true;
+ if (*composite == GRPC_ERROR_NONE) {
+ *composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc);
+ }
+ *composite = grpc_error_add_child(*composite, error);
+ return false;
+}
+
+/*******************************************************************************
+ * Fd Definitions
+ */
+
+/* We need to keep a freelist not because of any concerns of malloc performance
+ * but instead so that implementations with multiple threads in (for example)
+ * epoll_wait deal with the race between pollset removal and incoming poll
+ * notifications.
+ *
+ * The problem is that the poller ultimately holds a reference to this
+ * object, so it is very difficult to know when is safe to free it, at least
+ * without some expensive synchronization.
+ *
+ * If we keep the object freelisted, in the worst case losing this race just
+ * becomes a spurious read notification on a reused fd.
+ */
+
+/* The alarm system needs to be able to wakeup 'some poller' sometimes
+ * (specifically when a new alarm needs to be triggered earlier than the next
+ * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a
+ * case occurs. */
+
+static grpc_fd *fd_freelist = NULL;
+static gpr_mu fd_freelist_mu;
+
+#ifdef GRPC_FD_REF_COUNT_DEBUG
+#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__)
+#define UNREF_BY(ec, fd, n, reason) \
+ unref_by(ec, fd, n, reason, __FILE__, __LINE__)
+static void ref_by(grpc_fd *fd, int n, const char *reason, const char *file,
+ int line) {
+ gpr_log(GPR_DEBUG, "FD %d %p ref %d %ld -> %ld [%s; %s:%d]", fd->fd,
+ (void *)fd, n, gpr_atm_no_barrier_load(&fd->refst),
+ gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line);
+#else
+#define REF_BY(fd, n, reason) ref_by(fd, n)
+#define UNREF_BY(ec, fd, n, reason) unref_by(ec, fd, n)
+static void ref_by(grpc_fd *fd, int n) {
+#endif
+ GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0);
+}
+
+static void fd_destroy(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) {
+ grpc_fd *fd = arg;
+ /* Add the fd to the freelist */
+ grpc_iomgr_unregister_object(&fd->iomgr_object);
+ pollable_destroy(&fd->pollable);
+ gpr_mu_destroy(&fd->orphaned_mu);
+ gpr_mu_lock(&fd_freelist_mu);
+ fd->freelist_next = fd_freelist;
+ fd_freelist = fd;
+
+ grpc_lfev_destroy(&fd->read_closure);
+ grpc_lfev_destroy(&fd->write_closure);
+
+ gpr_mu_unlock(&fd_freelist_mu);
+}
+
+#ifdef GRPC_FD_REF_COUNT_DEBUG
+static void unref_by(grpc_exec_ctx *exec_ctx, grpc_fd *fd, int n,
+ const char *reason, const char *file, int line) {
+ gpr_atm old;
+ gpr_log(GPR_DEBUG, "FD %d %p unref %d %ld -> %ld [%s; %s:%d]", fd->fd,
+ (void *)fd, n, gpr_atm_no_barrier_load(&fd->refst),
+ gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line);
+#else
+static void unref_by(grpc_exec_ctx *exec_ctx, grpc_fd *fd, int n) {
+ gpr_atm old;
+#endif
+ old = gpr_atm_full_fetch_add(&fd->refst, -n);
+ if (old == n) {
+ grpc_closure_sched(exec_ctx, grpc_closure_create(fd_destroy, fd,
+ grpc_schedule_on_exec_ctx),
+ GRPC_ERROR_NONE);
+ } else {
+ GPR_ASSERT(old > n);
+ }
+}
+
+static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); }
+
+static void fd_global_shutdown(void) {
+ gpr_mu_lock(&fd_freelist_mu);
+ gpr_mu_unlock(&fd_freelist_mu);
+ while (fd_freelist != NULL) {
+ grpc_fd *fd = fd_freelist;
+ fd_freelist = fd_freelist->freelist_next;
+ gpr_free(fd);
+ }
+ gpr_mu_destroy(&fd_freelist_mu);
+}
+
+static grpc_fd *fd_create(int fd, const char *name) {
+ grpc_fd *new_fd = NULL;
+
+ gpr_mu_lock(&fd_freelist_mu);
+ if (fd_freelist != NULL) {
+ new_fd = fd_freelist;
+ fd_freelist = fd_freelist->freelist_next;
+ }
+ gpr_mu_unlock(&fd_freelist_mu);
+
+ if (new_fd == NULL) {
+ new_fd = gpr_malloc(sizeof(grpc_fd));
+ }
+
+ pollable_init(&new_fd->pollable, PO_FD);
+
+ gpr_atm_rel_store(&new_fd->refst, (gpr_atm)1);
+ new_fd->fd = fd;
+ gpr_mu_init(&new_fd->orphaned_mu);
+ new_fd->orphaned = false;
+ grpc_lfev_init(&new_fd->read_closure);
+ grpc_lfev_init(&new_fd->write_closure);
+ gpr_atm_no_barrier_store(&new_fd->read_notifier_pollset, (gpr_atm)NULL);
+
+ GRPC_LOG_IF_ERROR("fd_create",
+ grpc_wakeup_fd_init(&new_fd->workqueue_wakeup_fd));
+ new_fd->workqueue_scheduler.vtable = &workqueue_scheduler_vtable;
+ new_fd->workqueue_read_mu = GPR_SPINLOCK_INITIALIZER;
+ gpr_mpscq_init(&new_fd->workqueue_items);
+ gpr_atm_no_barrier_store(&new_fd->workqueue_item_count, 0);
+
+ new_fd->freelist_next = NULL;
+ new_fd->on_done_closure = NULL;
+
+ char *fd_name;
+ gpr_asprintf(&fd_name, "%s fd=%d", name, fd);
+ grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name);
+#ifdef GRPC_FD_REF_COUNT_DEBUG
+ gpr_log(GPR_DEBUG, "FD %d %p create %s", fd, (void *)new_fd, fd_name);
+#endif
+ gpr_free(fd_name);
+ return new_fd;
+}
+
+static int fd_wrapped_fd(grpc_fd *fd) {
+ int ret_fd = -1;
+ gpr_mu_lock(&fd->orphaned_mu);
+ if (!fd->orphaned) {
+ ret_fd = fd->fd;
+ }
+ gpr_mu_unlock(&fd->orphaned_mu);
+
+ return ret_fd;
+}
+
+static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
+ grpc_closure *on_done, int *release_fd,
+ const char *reason) {
+ bool is_fd_closed = false;
+ grpc_error *error = GRPC_ERROR_NONE;
+
+ gpr_mu_lock(&fd->pollable.po.mu);
+ gpr_mu_lock(&fd->orphaned_mu);
+ fd->on_done_closure = on_done;
+
+ /* If release_fd is not NULL, we should be relinquishing control of the file
+ descriptor fd->fd (but we still own the grpc_fd structure). */
+ if (release_fd != NULL) {
+ *release_fd = fd->fd;
+ } else {
+ close(fd->fd);
+ is_fd_closed = true;
+ }
+
+ fd->orphaned = true;
+
+ if (!is_fd_closed) {
+ gpr_log(GPR_DEBUG, "TODO: handle fd removal?");
+ }
+
+ /* Remove the active status but keep referenced. We want this grpc_fd struct
+ to be alive (and not added to freelist) until the end of this function */
+ REF_BY(fd, 1, reason);
+
+ grpc_closure_sched(exec_ctx, fd->on_done_closure, GRPC_ERROR_REF(error));
+
+ gpr_mu_unlock(&fd->orphaned_mu);
+ gpr_mu_unlock(&fd->pollable.po.mu);
+ UNREF_BY(exec_ctx, fd, 2, reason); /* Drop the reference */
+ GRPC_LOG_IF_ERROR("fd_orphan", GRPC_ERROR_REF(error));
+ GRPC_ERROR_UNREF(error);
+}
+
+static grpc_pollset *fd_get_read_notifier_pollset(grpc_exec_ctx *exec_ctx,
+ grpc_fd *fd) {
+ gpr_atm notifier = gpr_atm_acq_load(&fd->read_notifier_pollset);
+ return (grpc_pollset *)notifier;
+}
+
+static bool fd_is_shutdown(grpc_fd *fd) {
+ return grpc_lfev_is_shutdown(&fd->read_closure);
+}
+
+/* Might be called multiple times */
+static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_error *why) {
+ if (grpc_lfev_set_shutdown(exec_ctx, &fd->read_closure,
+ GRPC_ERROR_REF(why))) {
+ shutdown(fd->fd, SHUT_RDWR);
+ grpc_lfev_set_shutdown(exec_ctx, &fd->write_closure, GRPC_ERROR_REF(why));
+ }
+ GRPC_ERROR_UNREF(why);
+}
+
+static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
+ grpc_closure *closure) {
+ grpc_lfev_notify_on(exec_ctx, &fd->read_closure, closure);
+}
+
+static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
+ grpc_closure *closure) {
+ grpc_lfev_notify_on(exec_ctx, &fd->write_closure, closure);
+}
+
+static grpc_workqueue *fd_get_workqueue(grpc_fd *fd) {
+ REF_BY(fd, 2, "return_workqueue");
+ return (grpc_workqueue *)fd;
+}
+
+#ifdef GRPC_WORKQUEUE_REFCOUNT_DEBUG
+static grpc_workqueue *workqueue_ref(grpc_workqueue *workqueue,
+ const char *file, int line,
+ const char *reason) {
+ if (workqueue != NULL) {
+ ref_by((grpc_fd *)workqueue, 2, file, line, reason);
+ }
+ return workqueue;
+}
+
+static void workqueue_unref(grpc_exec_ctx *exec_ctx, grpc_workqueue *workqueue,
+ const char *file, int line, const char *reason) {
+ if (workqueue != NULL) {
+ unref_by(exec_ctx, (grpc_fd *)workqueue, 2, file, line, reason);
+ }
+}
+#else
+static grpc_workqueue *workqueue_ref(grpc_workqueue *workqueue) {
+ if (workqueue != NULL) {
+ ref_by((grpc_fd *)workqueue, 2);
+ }
+ return workqueue;
+}
+
+static void workqueue_unref(grpc_exec_ctx *exec_ctx,
+ grpc_workqueue *workqueue) {
+ if (workqueue != NULL) {
+ unref_by(exec_ctx, (grpc_fd *)workqueue, 2);
+ }
+}
+#endif
+
+static void workqueue_wakeup(grpc_fd *fd) {
+ GRPC_LOG_IF_ERROR("workqueue_enqueue",
+ grpc_wakeup_fd_wakeup(&fd->workqueue_wakeup_fd));
+}
+
+static void workqueue_enqueue(grpc_exec_ctx *exec_ctx, grpc_closure *closure,
+ grpc_error *error) {
+ GPR_TIMER_BEGIN("workqueue.enqueue", 0);
+ grpc_fd *fd = (grpc_fd *)(((char *)closure->scheduler) -
+ offsetof(grpc_fd, workqueue_scheduler));
+ REF_BY(fd, 2, "workqueue_enqueue");
+ gpr_atm last = gpr_atm_no_barrier_fetch_add(&fd->workqueue_item_count, 1);
+ closure->error_data.error = error;
+ gpr_mpscq_push(&fd->workqueue_items, &closure->next_data.atm_next);
+ if (last == 0) {
+ workqueue_wakeup(fd);
+ }
+ UNREF_BY(exec_ctx, fd, 2, "workqueue_enqueue");
+}
+
+static void fd_invoke_workqueue(grpc_exec_ctx *exec_ctx, grpc_fd *fd) {
+ /* handle spurious wakeups */
+ if (!gpr_spinlock_trylock(&fd->workqueue_read_mu)) return;
+ gpr_mpscq_node *n = gpr_mpscq_pop(&fd->workqueue_items);
+ gpr_spinlock_unlock(&fd->workqueue_read_mu);
+ if (n != NULL) {
+ if (gpr_atm_full_fetch_add(&fd->workqueue_item_count, -1) > 1) {
+ workqueue_wakeup(fd);
+ }
+ grpc_closure *c = (grpc_closure *)n;
+ grpc_error *error = c->error_data.error;
+#ifndef NDEBUG
+ c->scheduled = false;
+#endif
+ c->cb(exec_ctx, c->cb_arg, error);
+ GRPC_ERROR_UNREF(error);
+ } else if (gpr_atm_no_barrier_load(&fd->workqueue_item_count) > 0) {
+ /* n == NULL might mean there's work but it's not available to be popped
+ * yet - try to ensure another workqueue wakes up to check shortly if so
+ */
+ workqueue_wakeup(fd);
+ }
+}
+
+static grpc_closure_scheduler *workqueue_scheduler(grpc_workqueue *workqueue) {
+ return &((grpc_fd *)workqueue)->workqueue_scheduler;
+}
+
+/*******************************************************************************
+ * Pollable Definitions
+ */
+
+static void pollable_init(pollable *p, polling_obj_type type) {
+ po_init(&p->po, type);
+ p->root_worker = NULL;
+ p->epfd = -1;
+}
+
+static void pollable_destroy(pollable *p) {
+ po_destroy(&p->po);
+ if (p->epfd != -1) {
+ close(p->epfd);
+ grpc_wakeup_fd_destroy(&p->wakeup);
+ }
+}
+
+/* ensure that p->epfd, p->wakeup are initialized; p->po.mu must be held */
+static grpc_error *pollable_materialize(pollable *p) {
+ if (p->epfd == -1) {
+ int new_epfd = epoll_create1(EPOLL_CLOEXEC);
+ if (new_epfd < 0) {
+ return GRPC_OS_ERROR(errno, "epoll_create1");
+ }
+ grpc_error *err = grpc_wakeup_fd_init(&p->wakeup);
+ if (err != GRPC_ERROR_NONE) {
+ close(new_epfd);
+ return err;
+ }
+ struct epoll_event ev = {.events = (uint32_t)(EPOLLIN | EPOLLET),
+ .data.ptr = &p->wakeup};
+ if (epoll_ctl(new_epfd, EPOLL_CTL_ADD, p->wakeup.read_fd, &ev) != 0) {
+ err = GRPC_OS_ERROR(errno, "epoll_ctl");
+ close(new_epfd);
+ grpc_wakeup_fd_destroy(&p->wakeup);
+ return err;
+ }
+
+ p->epfd = new_epfd;
+ }
+ return GRPC_ERROR_NONE;
+}
+
+/* pollable must be materialized */
+static grpc_error *pollable_add_fd(pollable *p, grpc_fd *fd) {
+ grpc_error *error = GRPC_ERROR_NONE;
+ static const char *err_desc = "pollable_add_fd";
+ const int epfd = p->epfd;
+ GPR_ASSERT(epfd != -1);
+
+ if (GRPC_TRACER_ON(grpc_polling_trace)) {
+ gpr_log(GPR_DEBUG, "add fd %p to pollable %p", fd, p);
+ }
+
+ gpr_mu_lock(&fd->orphaned_mu);
+ if (fd->orphaned) {
+ gpr_mu_unlock(&fd->orphaned_mu);
+ return GRPC_ERROR_NONE;
+ }
+ struct epoll_event ev_fd = {
+ .events = (uint32_t)(EPOLLET | EPOLLIN | EPOLLOUT | EPOLLEXCLUSIVE),
+ .data.ptr = fd};
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd->fd, &ev_fd) != 0) {
+ switch (errno) {
+ case EEXIST: /* if this fd is already in the epoll set, the workqueue fd
+ must also be - just return */
+ gpr_mu_unlock(&fd->orphaned_mu);
+ return GRPC_ERROR_NONE;
+ default:
+ append_error(&error, GRPC_OS_ERROR(errno, "epoll_ctl"), err_desc);
+ }
+ }
+ struct epoll_event ev_wq = {
+ .events = (uint32_t)(EPOLLET | EPOLLIN | EPOLLEXCLUSIVE),
+ .data.ptr = (void *)(1 + (intptr_t)fd)};
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd->workqueue_wakeup_fd.read_fd, &ev_wq) !=
+ 0) {
+ switch (errno) {
+ case EEXIST: /* if the workqueue fd is already in the epoll set we're ok
+ - no need to do anything special */
+ break;
+ default:
+ append_error(&error, GRPC_OS_ERROR(errno, "epoll_ctl"), err_desc);
+ }
+ }
+ gpr_mu_unlock(&fd->orphaned_mu);
+
+ return error;
+}
+
+/*******************************************************************************
+ * Pollset Definitions
+ */
+
+GPR_TLS_DECL(g_current_thread_pollset);
+GPR_TLS_DECL(g_current_thread_worker);
+
+/* Global state management */
+static grpc_error *pollset_global_init(void) {
+ gpr_tls_init(&g_current_thread_pollset);
+ gpr_tls_init(&g_current_thread_worker);
+ pollable_init(&g_empty_pollable, PO_EMPTY_POLLABLE);
+ return GRPC_ERROR_NONE;
+}
+
+static void pollset_global_shutdown(void) {
+ pollable_destroy(&g_empty_pollable);
+ gpr_tls_destroy(&g_current_thread_pollset);
+ gpr_tls_destroy(&g_current_thread_worker);
+}
+
+static grpc_error *pollset_kick_all(grpc_pollset *pollset) {
+ grpc_error *error = GRPC_ERROR_NONE;
+ if (pollset->root_worker != NULL) {
+ grpc_pollset_worker *worker = pollset->root_worker;
+ do {
+ if (worker->pollable != &pollset->pollable) {
+ gpr_mu_lock(&worker->pollable->po.mu);
+ }
+ if (worker->initialized_cv) {
+ worker->kicked = true;
+ gpr_cv_signal(&worker->cv);
+ } else {
+ append_error(&error, grpc_wakeup_fd_wakeup(&worker->pollable->wakeup),
+ "pollset_shutdown");
+ }
+ if (worker->pollable != &pollset->pollable) {
+ gpr_mu_unlock(&worker->pollable->po.mu);
+ }
+
+ worker = worker->links[PWL_POLLSET].next;
+ } while (worker != pollset->root_worker);
+ }
+ return error;
+}
+
+static grpc_error *pollset_kick_inner(grpc_pollset *pollset, pollable *p,
+ grpc_pollset_worker *specific_worker) {
+ if (GRPC_TRACER_ON(grpc_polling_trace)) {
+ gpr_log(GPR_DEBUG,
+ "PS:%p kick %p tls_pollset=%p tls_worker=%p "
+ "root_worker=(pollset:%p pollable:%p)",
+ p, specific_worker, (void *)gpr_tls_get(&g_current_thread_pollset),
+ (void *)gpr_tls_get(&g_current_thread_worker), pollset->root_worker,
+ p->root_worker);
+ }
+ if (specific_worker == NULL) {
+ if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)pollset) {
+ if (pollset->root_worker == NULL) {
+ if (GRPC_TRACER_ON(grpc_polling_trace)) {
+ gpr_log(GPR_DEBUG, "PS:%p kicked_any_without_poller", p);
+ }
+ pollset->kicked_without_poller = true;
+ return GRPC_ERROR_NONE;
+ } else {
+ if (GRPC_TRACER_ON(grpc_polling_trace)) {
+ gpr_log(GPR_DEBUG, "PS:%p kicked_any_via_wakeup_fd", p);
+ }
+ grpc_error *err = pollable_materialize(p);
+ if (err != GRPC_ERROR_NONE) return err;
+ return grpc_wakeup_fd_wakeup(&p->wakeup);
+ }
+ } else {
+ if (GRPC_TRACER_ON(grpc_polling_trace)) {
+ gpr_log(GPR_DEBUG, "PS:%p kicked_any_but_awake", p);
+ }
+ return GRPC_ERROR_NONE;
+ }
+ } else if (specific_worker->kicked) {
+ if (GRPC_TRACER_ON(grpc_polling_trace)) {
+ gpr_log(GPR_DEBUG, "PS:%p kicked_specific_but_already_kicked", p);
+ }
+ return GRPC_ERROR_NONE;
+ } else if (gpr_tls_get(&g_current_thread_worker) ==
+ (intptr_t)specific_worker) {
+ if (GRPC_TRACER_ON(grpc_polling_trace)) {
+ gpr_log(GPR_DEBUG, "PS:%p kicked_specific_but_awake", p);
+ }
+ specific_worker->kicked = true;
+ return GRPC_ERROR_NONE;
+ } else if (specific_worker == p->root_worker) {
+ if (GRPC_TRACER_ON(grpc_polling_trace)) {
+ gpr_log(GPR_DEBUG, "PS:%p kicked_specific_via_wakeup_fd", p);
+ }
+ grpc_error *err = pollable_materialize(p);
+ if (err != GRPC_ERROR_NONE) return err;
+ specific_worker->kicked = true;
+ return grpc_wakeup_fd_wakeup(&p->wakeup);
+ } else {
+ if (GRPC_TRACER_ON(grpc_polling_trace)) {
+ gpr_log(GPR_DEBUG, "PS:%p kicked_specific_via_cv", p);
+ }
+ specific_worker->kicked = true;
+ gpr_cv_signal(&specific_worker->cv);
+ return GRPC_ERROR_NONE;
+ }
+}
+
+/* p->po.mu must be held before calling this function */
+static grpc_error *pollset_kick(grpc_pollset *pollset,
+ grpc_pollset_worker *specific_worker) {
+ pollable *p = pollset->current_pollable;
+ if (p != &pollset->pollable) {
+ gpr_mu_lock(&p->po.mu);
+ }
+ grpc_error *error = pollset_kick_inner(pollset, p, specific_worker);
+ if (p != &pollset->pollable) {
+ gpr_mu_unlock(&p->po.mu);
+ }
+ return error;
+}
+
+static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) {
+ pollable_init(&pollset->pollable, PO_POLLSET);
+ pollset->current_pollable = &g_empty_pollable;
+ pollset->kicked_without_poller = false;
+ pollset->shutdown_closure = NULL;
+ pollset->root_worker = NULL;
+ *mu = &pollset->pollable.po.mu;
+}
+
+/* Convert a timespec to milliseconds:
+ - Very small or negative poll times are clamped to zero to do a non-blocking
+ poll (which becomes spin polling)
+ - Other small values are rounded up to one millisecond
+ - Longer than a millisecond polls are rounded up to the next nearest
+ millisecond to avoid spinning
+ - Infinite timeouts are converted to -1 */
+static int poll_deadline_to_millis_timeout(gpr_timespec deadline,
+ gpr_timespec now) {
+ gpr_timespec timeout;
+ if (gpr_time_cmp(deadline, gpr_inf_future(deadline.clock_type)) == 0) {
+ return -1;
+ }
+
+ if (gpr_time_cmp(deadline, now) <= 0) {
+ return 0;
+ }
+
+ static const gpr_timespec round_up = {
+ .clock_type = GPR_TIMESPAN, .tv_sec = 0, .tv_nsec = GPR_NS_PER_MS - 1};
+ timeout = gpr_time_sub(deadline, now);
+ int millis = gpr_time_to_millis(gpr_time_add(timeout, round_up));
+ return millis >= 1 ? millis : 1;
+}
+
+static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
+ grpc_pollset *notifier) {
+ grpc_lfev_set_ready(exec_ctx, &fd->read_closure);
+
+ /* Note, it is possible that fd_become_readable might be called twice with
+ different 'notifier's when an fd becomes readable and it is in two epoll
+ sets (This can happen briefly during polling island merges). In such cases
+ it does not really matter which notifer is set as the read_notifier_pollset
+ (They would both point to the same polling island anyway) */
+ /* Use release store to match with acquire load in fd_get_read_notifier */
+ gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier);
+}
+
+static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) {
+ grpc_lfev_set_ready(exec_ctx, &fd->write_closure);
+}
+
+static grpc_error *fd_become_pollable_locked(grpc_fd *fd) {
+ grpc_error *error = GRPC_ERROR_NONE;
+ static const char *err_desc = "fd_become_pollable";
+ if (append_error(&error, pollable_materialize(&fd->pollable), err_desc)) {
+ append_error(&error, pollable_add_fd(&fd->pollable, fd), err_desc);
+ }
+ return error;
+}
+
+static void pollset_maybe_finish_shutdown(grpc_exec_ctx *exec_ctx,
+ grpc_pollset *pollset) {
+ if (pollset->shutdown_closure != NULL && pollset->root_worker == NULL) {
+ grpc_closure_sched(exec_ctx, pollset->shutdown_closure, GRPC_ERROR_NONE);
+ pollset->shutdown_closure = NULL;
+ }
+}
+
+/* pollset->po.mu lock must be held by the caller before calling this */
+static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
+ grpc_closure *closure) {
+ GPR_ASSERT(pollset->shutdown_closure == NULL);
+ pollset->shutdown_closure = closure;
+ GRPC_LOG_IF_ERROR("pollset_shutdown", pollset_kick_all(pollset));
+ pollset_maybe_finish_shutdown(exec_ctx, pollset);
+}
+
+static bool pollset_is_pollable_fd(grpc_pollset *pollset, pollable *p) {
+ return p != &g_empty_pollable && p != &pollset->pollable;
+}
+
+/* pollset_shutdown is guaranteed to be called before pollset_destroy. */
+static void pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) {
+ pollable_destroy(&pollset->pollable);
+ if (pollset_is_pollable_fd(pollset, pollset->current_pollable)) {
+ UNREF_BY(exec_ctx, (grpc_fd *)pollset->current_pollable, 2,
+ "pollset_pollable");
+ }
+}
+
+#define MAX_EPOLL_EVENTS 100
+
+static grpc_error *pollset_epoll(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
+ pollable *p, gpr_timespec now,
+ gpr_timespec deadline) {
+ struct epoll_event events[MAX_EPOLL_EVENTS];
+ static const char *err_desc = "pollset_poll";
+
+ int timeout = poll_deadline_to_millis_timeout(deadline, now);
+
+ if (GRPC_TRACER_ON(grpc_polling_trace)) {
+ gpr_log(GPR_DEBUG, "PS:%p poll %p for %dms", pollset, p, timeout);
+ }
+
+ if (timeout != 0) {
+ GRPC_SCHEDULING_START_BLOCKING_REGION;
+ }
+ int r;
+ do {
+ r = epoll_wait(p->epfd, events, MAX_EPOLL_EVENTS, timeout);
+ } while (r < 0 && errno == EINTR);
+ if (timeout != 0) {
+ GRPC_SCHEDULING_END_BLOCKING_REGION;
+ }
+
+ if (r < 0) return GRPC_OS_ERROR(errno, "epoll_wait");
+
+ if (GRPC_TRACER_ON(grpc_polling_trace)) {
+ gpr_log(GPR_DEBUG, "PS:%p poll %p got %d events", pollset, p, r);
+ }
+
+ grpc_error *error = GRPC_ERROR_NONE;
+ for (int i = 0; i < r; i++) {
+ void *data_ptr = events[i].data.ptr;
+ if (data_ptr == &p->wakeup) {
+ if (GRPC_TRACER_ON(grpc_polling_trace)) {
+ gpr_log(GPR_DEBUG, "PS:%p poll %p got pollset_wakeup", pollset, p);
+ }
+ append_error(&error, grpc_wakeup_fd_consume_wakeup(&p->wakeup), err_desc);
+ } else {
+ grpc_fd *fd = (grpc_fd *)(((intptr_t)data_ptr) & ~(intptr_t)1);
+ bool is_workqueue = (((intptr_t)data_ptr) & 1) != 0;
+ bool cancel = (events[i].events & (EPOLLERR | EPOLLHUP)) != 0;
+ bool read_ev = (events[i].events & (EPOLLIN | EPOLLPRI)) != 0;
+ bool write_ev = (events[i].events & EPOLLOUT) != 0;
+ if (GRPC_TRACER_ON(grpc_polling_trace)) {
+ gpr_log(GPR_DEBUG,
+ "PS:%p poll %p got fd %p: is_wq=%d cancel=%d read=%d "
+ "write=%d",
+ pollset, p, fd, is_workqueue, cancel, read_ev, write_ev);
+ }
+ if (is_workqueue) {
+ append_error(&error,
+ grpc_wakeup_fd_consume_wakeup(&fd->workqueue_wakeup_fd),
+ err_desc);
+ fd_invoke_workqueue(exec_ctx, fd);
+ } else {
+ if (read_ev || cancel) {
+ fd_become_readable(exec_ctx, fd, pollset);
+ }
+ if (write_ev || cancel) {
+ fd_become_writable(exec_ctx, fd);
+ }
+ }
+ }
+ }
+
+ return error;
+}
+
+/* Return true if first in list */
+static bool worker_insert(grpc_pollset_worker **root, pollset_worker_links link,
+ grpc_pollset_worker *worker) {
+ if (*root == NULL) {
+ *root = worker;
+ worker->links[link].next = worker->links[link].prev = worker;
+ return true;
+ } else {
+ worker->links[link].next = *root;
+ worker->links[link].prev = worker->links[link].next->links[link].prev;
+ worker->links[link].next->links[link].prev = worker;
+ worker->links[link].prev->links[link].next = worker;
+ return false;
+ }
+}
+
+/* Return true if last in list */
+typedef enum { EMPTIED, NEW_ROOT, REMOVED } worker_remove_result;
+
+static worker_remove_result worker_remove(grpc_pollset_worker **root,
+ pollset_worker_links link,
+ grpc_pollset_worker *worker) {
+ if (worker == *root) {
+ if (worker == worker->links[link].next) {
+ *root = NULL;
+ return EMPTIED;
+ } else {
+ *root = worker->links[link].next;
+ worker->links[link].prev->links[link].next = worker->links[link].next;
+ worker->links[link].next->links[link].prev = worker->links[link].prev;
+ return NEW_ROOT;
+ }
+ } else {
+ worker->links[link].prev->links[link].next = worker->links[link].next;
+ worker->links[link].next->links[link].prev = worker->links[link].prev;
+ return REMOVED;
+ }
+}
+
+/* Return true if this thread should poll */
+static bool begin_worker(grpc_pollset *pollset, grpc_pollset_worker *worker,
+ grpc_pollset_worker **worker_hdl, gpr_timespec *now,
+ gpr_timespec deadline) {
+ bool do_poll = true;
+ if (worker_hdl != NULL) *worker_hdl = worker;
+ worker->initialized_cv = false;
+ worker->kicked = false;
+ worker->pollset = pollset;
+ worker->pollable = pollset->current_pollable;
+
+ if (pollset_is_pollable_fd(pollset, worker->pollable)) {
+ REF_BY((grpc_fd *)worker->pollable, 2, "one_poll");
+ }
+
+ worker_insert(&pollset->root_worker, PWL_POLLSET, worker);
+ if (!worker_insert(&worker->pollable->root_worker, PWL_POLLABLE, worker)) {
+ worker->initialized_cv = true;
+ gpr_cv_init(&worker->cv);
+ if (worker->pollable != &pollset->pollable) {
+ gpr_mu_unlock(&pollset->pollable.po.mu);
+ }
+ if (GRPC_TRACER_ON(grpc_polling_trace) &&
+ worker->pollable->root_worker != worker) {
+ gpr_log(GPR_DEBUG, "PS:%p wait %p w=%p for %dms", pollset,
+ worker->pollable, worker,
+ poll_deadline_to_millis_timeout(deadline, *now));
+ }
+ while (do_poll && worker->pollable->root_worker != worker) {
+ if (gpr_cv_wait(&worker->cv, &worker->pollable->po.mu, deadline)) {
+ if (GRPC_TRACER_ON(grpc_polling_trace)) {
+ gpr_log(GPR_DEBUG, "PS:%p timeout_wait %p w=%p", pollset,
+ worker->pollable, worker);
+ }
+ do_poll = false;
+ } else if (worker->kicked) {
+ if (GRPC_TRACER_ON(grpc_polling_trace)) {
+ gpr_log(GPR_DEBUG, "PS:%p wakeup %p w=%p", pollset, worker->pollable,
+ worker);
+ }
+ do_poll = false;
+ } else if (GRPC_TRACER_ON(grpc_polling_trace) &&
+ worker->pollable->root_worker != worker) {
+ gpr_log(GPR_DEBUG, "PS:%p spurious_wakeup %p w=%p", pollset,
+ worker->pollable, worker);
+ }
+ }
+ if (worker->pollable != &pollset->pollable) {
+ gpr_mu_unlock(&worker->pollable->po.mu);
+ gpr_mu_lock(&pollset->pollable.po.mu);
+ gpr_mu_lock(&worker->pollable->po.mu);
+ }
+ *now = gpr_now(now->clock_type);
+ }
+
+ return do_poll && pollset->shutdown_closure == NULL &&
+ pollset->current_pollable == worker->pollable;
+}
+
+static void end_worker(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
+ grpc_pollset_worker *worker,
+ grpc_pollset_worker **worker_hdl) {
+ if (NEW_ROOT ==
+ worker_remove(&worker->pollable->root_worker, PWL_POLLABLE, worker)) {
+ gpr_cv_signal(&worker->pollable->root_worker->cv);
+ }
+ if (worker->initialized_cv) {
+ gpr_cv_destroy(&worker->cv);
+ }
+ if (pollset_is_pollable_fd(pollset, worker->pollable)) {
+ UNREF_BY(exec_ctx, (grpc_fd *)worker->pollable, 2, "one_poll");
+ }
+ if (EMPTIED == worker_remove(&pollset->root_worker, PWL_POLLSET, worker)) {
+ pollset_maybe_finish_shutdown(exec_ctx, pollset);
+ }
+}
+
+/* pollset->po.mu lock must be held by the caller before calling this.
+ The function pollset_work() may temporarily release the lock (pollset->po.mu)
+ during the course of its execution but it will always re-acquire the lock and
+ ensure that it is held by the time the function returns */
+static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
+ grpc_pollset_worker **worker_hdl,
+ gpr_timespec now, gpr_timespec deadline) {
+ grpc_pollset_worker worker;
+ if (0 && GRPC_TRACER_ON(grpc_polling_trace)) {
+ gpr_log(GPR_DEBUG, "PS:%p work hdl=%p worker=%p now=%" PRId64
+ ".%09d deadline=%" PRId64 ".%09d kwp=%d root_worker=%p",
+ pollset, worker_hdl, &worker, now.tv_sec, now.tv_nsec,
+ deadline.tv_sec, deadline.tv_nsec, pollset->kicked_without_poller,
+ pollset->root_worker);
+ }
+ grpc_error *error = GRPC_ERROR_NONE;
+ static const char *err_desc = "pollset_work";
+ if (pollset->kicked_without_poller) {
+ pollset->kicked_without_poller = false;
+ return GRPC_ERROR_NONE;
+ }
+ if (pollset->current_pollable != &pollset->pollable) {
+ gpr_mu_lock(&pollset->current_pollable->po.mu);
+ }
+ if (begin_worker(pollset, &worker, worker_hdl, &now, deadline)) {
+ gpr_tls_set(&g_current_thread_pollset, (intptr_t)pollset);
+ gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker);
+ GPR_ASSERT(!pollset->shutdown_closure);
+ append_error(&error, pollable_materialize(worker.pollable), err_desc);
+ if (worker.pollable != &pollset->pollable) {
+ gpr_mu_unlock(&worker.pollable->po.mu);
+ }
+ gpr_mu_unlock(&pollset->pollable.po.mu);
+ append_error(&error, pollset_epoll(exec_ctx, pollset, worker.pollable, now,
+ deadline),
+ err_desc);
+ grpc_exec_ctx_flush(exec_ctx);
+ gpr_mu_lock(&pollset->pollable.po.mu);
+ if (worker.pollable != &pollset->pollable) {
+ gpr_mu_lock(&worker.pollable->po.mu);
+ }
+ gpr_tls_set(&g_current_thread_pollset, 0);
+ gpr_tls_set(&g_current_thread_worker, 0);
+ pollset_maybe_finish_shutdown(exec_ctx, pollset);
+ }
+ end_worker(exec_ctx, pollset, &worker, worker_hdl);
+ if (worker.pollable != &pollset->pollable) {
+ gpr_mu_unlock(&worker.pollable->po.mu);
+ }
+ return error;
+}
+
+static void unref_fd_no_longer_poller(grpc_exec_ctx *exec_ctx, void *arg,
+ grpc_error *error) {
+ grpc_fd *fd = arg;
+ UNREF_BY(exec_ctx, fd, 2, "pollset_pollable");
+}
+
+/* expects pollsets locked, flag whether fd is locked or not */
+static grpc_error *pollset_add_fd_locked(grpc_exec_ctx *exec_ctx,
+ grpc_pollset *pollset, grpc_fd *fd,
+ bool fd_locked) {
+ static const char *err_desc = "pollset_add_fd";
+ grpc_error *error = GRPC_ERROR_NONE;
+ if (pollset->current_pollable == &g_empty_pollable) {
+ if (GRPC_TRACER_ON(grpc_polling_trace))
+ gpr_log(GPR_DEBUG,
+ "PS:%p add fd %p; transition pollable from empty to fd", pollset,
+ fd);
+ /* empty pollable --> single fd pollable */
+ append_error(&error, pollset_kick_all(pollset), err_desc);
+ pollset->current_pollable = &fd->pollable;
+ if (!fd_locked) gpr_mu_lock(&fd->pollable.po.mu);
+ append_error(&error, fd_become_pollable_locked(fd), err_desc);
+ if (!fd_locked) gpr_mu_unlock(&fd->pollable.po.mu);
+ REF_BY(fd, 2, "pollset_pollable");
+ } else if (pollset->current_pollable == &pollset->pollable) {
+ if (GRPC_TRACER_ON(grpc_polling_trace))
+ gpr_log(GPR_DEBUG, "PS:%p add fd %p; already multipolling", pollset, fd);
+ append_error(&error, pollable_add_fd(pollset->current_pollable, fd),
+ err_desc);
+ } else if (pollset->current_pollable != &fd->pollable) {
+ grpc_fd *had_fd = (grpc_fd *)pollset->current_pollable;
+ if (GRPC_TRACER_ON(grpc_polling_trace))
+ gpr_log(GPR_DEBUG,
+ "PS:%p add fd %p; transition pollable from fd %p to multipoller",
+ pollset, fd, had_fd);
+ append_error(&error, pollset_kick_all(pollset), err_desc);
+ pollset->current_pollable = &pollset->pollable;
+ if (append_error(&error, pollable_materialize(&pollset->pollable),
+ err_desc)) {
+ pollable_add_fd(&pollset->pollable, had_fd);
+ pollable_add_fd(&pollset->pollable, fd);
+ }
+ grpc_closure_sched(exec_ctx,
+ grpc_closure_create(unref_fd_no_longer_poller, had_fd,
+ grpc_schedule_on_exec_ctx),
+ GRPC_ERROR_NONE);
+ }
+ return error;
+}
+
+static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
+ grpc_fd *fd) {
+ gpr_mu_lock(&pollset->pollable.po.mu);
+ grpc_error *error = pollset_add_fd_locked(exec_ctx, pollset, fd, false);
+ gpr_mu_unlock(&pollset->pollable.po.mu);
+ GRPC_LOG_IF_ERROR("pollset_add_fd", error);
+}
+
+/*******************************************************************************
+ * Pollset-set Definitions
+ */
+
+static grpc_pollset_set *pollset_set_create(void) {
+ grpc_pollset_set *pss = gpr_zalloc(sizeof(*pss));
+ po_init(&pss->po, PO_POLLSET_SET);
+ return pss;
+}
+
+static void pollset_set_destroy(grpc_exec_ctx *exec_ctx,
+ grpc_pollset_set *pss) {
+ po_destroy(&pss->po);
+ gpr_free(pss);
+}
+
+static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss,
+ grpc_fd *fd) {
+ po_join(exec_ctx, &pss->po, &fd->pollable.po);
+}
+
+static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss,
+ grpc_fd *fd) {}
+
+static void pollset_set_add_pollset(grpc_exec_ctx *exec_ctx,
+ grpc_pollset_set *pss, grpc_pollset *ps) {
+ po_join(exec_ctx, &pss->po, &ps->pollable.po);
+}
+
+static void pollset_set_del_pollset(grpc_exec_ctx *exec_ctx,
+ grpc_pollset_set *pss, grpc_pollset *ps) {}
+
+static void pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx,
+ grpc_pollset_set *bag,
+ grpc_pollset_set *item) {
+ po_join(exec_ctx, &bag->po, &item->po);
+}
+
+static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx,
+ grpc_pollset_set *bag,
+ grpc_pollset_set *item) {}
+
+static void po_init(polling_obj *po, polling_obj_type type) {
+ gpr_mu_init(&po->mu);
+ po->type = type;
+ po->group = NULL;
+ po->next = po;
+ po->prev = po;
+}
+
+static polling_group *pg_lock_latest(polling_group *pg) {
+ /* assumes pg unlocked; consumes ref, returns ref */
+ gpr_mu_lock(&pg->po.mu);
+ while (pg->po.group != NULL) {
+ polling_group *new_pg = pg_ref(pg->po.group);
+ gpr_mu_unlock(&pg->po.mu);
+ pg_unref(pg);
+ pg = new_pg;
+ gpr_mu_lock(&pg->po.mu);
+ }
+ return pg;
+}
+
+static void po_destroy(polling_obj *po) {
+ if (po->group != NULL) {
+ polling_group *pg = pg_lock_latest(po->group);
+ po->prev->next = po->next;
+ po->next->prev = po->prev;
+ gpr_mu_unlock(&pg->po.mu);
+ pg_unref(pg);
+ }
+ gpr_mu_destroy(&po->mu);
+}
+
+static polling_group *pg_ref(polling_group *pg) {
+ gpr_ref(&pg->refs);
+ return pg;
+}
+
+static void pg_unref(polling_group *pg) {
+ if (gpr_unref(&pg->refs)) {
+ po_destroy(&pg->po);
+ gpr_free(pg);
+ }
+}
+
+static int po_cmp(polling_obj *a, polling_obj *b) {
+ if (a == b) return 0;
+ if (a->type < b->type) return -1;
+ if (a->type > b->type) return 1;
+ if (a < b) return -1;
+ assert(a > b);
+ return 1;
+}
+
+static void po_join(grpc_exec_ctx *exec_ctx, polling_obj *a, polling_obj *b) {
+ switch (po_cmp(a, b)) {
+ case 0:
+ return;
+ case 1:
+ GPR_SWAP(polling_obj *, a, b);
+ /* fall through */
+ case -1:
+ gpr_mu_lock(&a->mu);
+ gpr_mu_lock(&b->mu);
+
+ if (a->group == NULL) {
+ if (b->group == NULL) {
+ polling_obj *initial_po[] = {a, b};
+ pg_create(exec_ctx, initial_po, GPR_ARRAY_SIZE(initial_po));
+ gpr_mu_unlock(&a->mu);
+ gpr_mu_unlock(&b->mu);
+ } else {
+ polling_group *b_group = pg_ref(b->group);
+ gpr_mu_unlock(&b->mu);
+ gpr_mu_unlock(&a->mu);
+ pg_join(exec_ctx, b_group, a);
+ }
+ } else if (b->group == NULL) {
+ polling_group *a_group = pg_ref(a->group);
+ gpr_mu_unlock(&a->mu);
+ gpr_mu_unlock(&b->mu);
+ pg_join(exec_ctx, a_group, b);
+ } else if (a->group == b->group) {
+ /* nothing to do */
+ gpr_mu_unlock(&a->mu);
+ gpr_mu_unlock(&b->mu);
+ } else {
+ polling_group *a_group = pg_ref(a->group);
+ polling_group *b_group = pg_ref(b->group);
+ gpr_mu_unlock(&a->mu);
+ gpr_mu_unlock(&b->mu);
+ pg_merge(exec_ctx, a_group, b_group);
+ }
+ }
+}
+
+static void pg_notify(grpc_exec_ctx *exec_ctx, polling_obj *a, polling_obj *b) {
+ if (a->type == PO_FD && b->type == PO_POLLSET) {
+ pollset_add_fd_locked(exec_ctx, (grpc_pollset *)b, (grpc_fd *)a, true);
+ } else if (a->type == PO_POLLSET && b->type == PO_FD) {
+ pollset_add_fd_locked(exec_ctx, (grpc_pollset *)a, (grpc_fd *)b, true);
+ }
+}
+
+static void pg_broadcast(grpc_exec_ctx *exec_ctx, polling_group *from,
+ polling_group *to) {
+ for (polling_obj *a = from->po.next; a != &from->po; a = a->next) {
+ for (polling_obj *b = to->po.next; b != &to->po; b = b->next) {
+ if (po_cmp(a, b) < 0) {
+ gpr_mu_lock(&a->mu);
+ gpr_mu_lock(&b->mu);
+ } else {
+ GPR_ASSERT(po_cmp(a, b) != 0);
+ gpr_mu_lock(&b->mu);
+ gpr_mu_lock(&a->mu);
+ }
+ pg_notify(exec_ctx, a, b);
+ gpr_mu_unlock(&a->mu);
+ gpr_mu_unlock(&b->mu);
+ }
+ }
+}
+
+static void pg_create(grpc_exec_ctx *exec_ctx, polling_obj **initial_po,
+ size_t initial_po_count) {
+ /* assumes all polling objects in initial_po are locked */
+ polling_group *pg = gpr_malloc(sizeof(*pg));
+ po_init(&pg->po, PO_POLLING_GROUP);
+ gpr_ref_init(&pg->refs, (int)initial_po_count);
+ for (size_t i = 0; i < initial_po_count; i++) {
+ GPR_ASSERT(initial_po[i]->group == NULL);
+ initial_po[i]->group = pg;
+ }
+ for (size_t i = 1; i < initial_po_count; i++) {
+ initial_po[i]->prev = initial_po[i - 1];
+ }
+ for (size_t i = 0; i < initial_po_count - 1; i++) {
+ initial_po[i]->next = initial_po[i + 1];
+ }
+ initial_po[0]->prev = &pg->po;
+ initial_po[initial_po_count - 1]->next = &pg->po;
+ pg->po.next = initial_po[0];
+ pg->po.prev = initial_po[initial_po_count - 1];
+ for (size_t i = 1; i < initial_po_count; i++) {
+ for (size_t j = 0; j < i; j++) {
+ pg_notify(exec_ctx, initial_po[i], initial_po[j]);
+ }
+ }
+}
+
+static void pg_join(grpc_exec_ctx *exec_ctx, polling_group *pg,
+ polling_obj *po) {
+ /* assumes neither pg nor po are locked; consumes one ref to pg */
+ pg = pg_lock_latest(pg);
+ /* pg locked */
+ for (polling_obj *existing = pg->po.next /* skip pg - it's just a stub */;
+ existing != &pg->po; existing = existing->next) {
+ if (po_cmp(po, existing) < 0) {
+ gpr_mu_lock(&po->mu);
+ gpr_mu_lock(&existing->mu);
+ } else {
+ GPR_ASSERT(po_cmp(po, existing) != 0);
+ gpr_mu_lock(&existing->mu);
+ gpr_mu_lock(&po->mu);
+ }
+ /* pg, po, existing locked */
+ if (po->group != NULL) {
+ gpr_mu_unlock(&pg->po.mu);
+ polling_group *po_group = pg_ref(po->group);
+ gpr_mu_unlock(&po->mu);
+ gpr_mu_unlock(&existing->mu);
+ pg_merge(exec_ctx, pg, po_group);
+ /* early exit: polling obj picked up a group during joining: we needed
+ to do a full merge */
+ return;
+ }
+ pg_notify(exec_ctx, po, existing);
+ gpr_mu_unlock(&po->mu);
+ gpr_mu_unlock(&existing->mu);
+ }
+ gpr_mu_lock(&po->mu);
+ if (po->group != NULL) {
+ gpr_mu_unlock(&pg->po.mu);
+ polling_group *po_group = pg_ref(po->group);
+ gpr_mu_unlock(&po->mu);
+ pg_merge(exec_ctx, pg, po_group);
+ /* early exit: polling obj picked up a group during joining: we needed
+ to do a full merge */
+ return;
+ }
+ po->group = pg;
+ po->next = &pg->po;
+ po->prev = pg->po.prev;
+ po->prev->next = po->next->prev = po;
+ gpr_mu_unlock(&pg->po.mu);
+ gpr_mu_unlock(&po->mu);
+}
+
+static void pg_merge(grpc_exec_ctx *exec_ctx, polling_group *a,
+ polling_group *b) {
+ for (;;) {
+ if (a == b) {
+ pg_unref(a);
+ pg_unref(b);
+ return;
+ }
+ if (a > b) GPR_SWAP(polling_group *, a, b);
+ gpr_mu_lock(&a->po.mu);
+ gpr_mu_lock(&b->po.mu);
+ if (a->po.group != NULL) {
+ polling_group *m2 = pg_ref(a->po.group);
+ gpr_mu_unlock(&a->po.mu);
+ gpr_mu_unlock(&b->po.mu);
+ pg_unref(a);
+ a = m2;
+ } else if (b->po.group != NULL) {
+ polling_group *m2 = pg_ref(b->po.group);
+ gpr_mu_unlock(&a->po.mu);
+ gpr_mu_unlock(&b->po.mu);
+ pg_unref(b);
+ b = m2;
+ } else {
+ break;
+ }
+ }
+ polling_group **unref = NULL;
+ size_t unref_count = 0;
+ size_t unref_cap = 0;
+ b->po.group = a;
+ pg_broadcast(exec_ctx, a, b);
+ pg_broadcast(exec_ctx, b, a);
+ while (b->po.next != &b->po) {
+ polling_obj *po = b->po.next;
+ gpr_mu_lock(&po->mu);
+ if (unref_count == unref_cap) {
+ unref_cap = GPR_MAX(8, 3 * unref_cap / 2);
+ unref = gpr_realloc(unref, unref_cap * sizeof(*unref));
+ }
+ unref[unref_count++] = po->group;
+ po->group = pg_ref(a);
+ // unlink from b
+ po->prev->next = po->next;
+ po->next->prev = po->prev;
+ // link to a
+ po->next = &a->po;
+ po->prev = a->po.prev;
+ po->next->prev = po->prev->next = po;
+ gpr_mu_unlock(&po->mu);
+ }
+ gpr_mu_unlock(&a->po.mu);
+ gpr_mu_unlock(&b->po.mu);
+ for (size_t i = 0; i < unref_count; i++) {
+ pg_unref(unref[i]);
+ }
+ gpr_free(unref);
+ pg_unref(b);
+}
+
+/*******************************************************************************
+ * Event engine binding
+ */
+
+static void shutdown_engine(void) {
+ fd_global_shutdown();
+ pollset_global_shutdown();
+}
+
+static const grpc_event_engine_vtable vtable = {
+ .pollset_size = sizeof(grpc_pollset),
+
+ .fd_create = fd_create,
+ .fd_wrapped_fd = fd_wrapped_fd,
+ .fd_orphan = fd_orphan,
+ .fd_shutdown = fd_shutdown,
+ .fd_is_shutdown = fd_is_shutdown,
+ .fd_notify_on_read = fd_notify_on_read,
+ .fd_notify_on_write = fd_notify_on_write,
+ .fd_get_read_notifier_pollset = fd_get_read_notifier_pollset,
+ .fd_get_workqueue = fd_get_workqueue,
+
+ .pollset_init = pollset_init,
+ .pollset_shutdown = pollset_shutdown,
+ .pollset_destroy = pollset_destroy,
+ .pollset_work = pollset_work,
+ .pollset_kick = pollset_kick,
+ .pollset_add_fd = pollset_add_fd,
+
+ .pollset_set_create = pollset_set_create,
+ .pollset_set_destroy = pollset_set_destroy,
+ .pollset_set_add_pollset = pollset_set_add_pollset,
+ .pollset_set_del_pollset = pollset_set_del_pollset,
+ .pollset_set_add_pollset_set = pollset_set_add_pollset_set,
+ .pollset_set_del_pollset_set = pollset_set_del_pollset_set,
+ .pollset_set_add_fd = pollset_set_add_fd,
+ .pollset_set_del_fd = pollset_set_del_fd,
+
+ .workqueue_ref = workqueue_ref,
+ .workqueue_unref = workqueue_unref,
+ .workqueue_scheduler = workqueue_scheduler,
+
+ .shutdown_engine = shutdown_engine,
+};
+
+const grpc_event_engine_vtable *grpc_init_epollex_linux(
+ bool explicitly_requested) {
+ if (!explicitly_requested) return NULL;
+
+ if (!grpc_has_wakeup_fd()) {
+ return NULL;
+ }
+
+ if (!grpc_is_epollexclusive_available()) {
+ return NULL;
+ }
+
+ fd_global_init();
+
+ if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) {
+ pollset_global_shutdown();
+ fd_global_shutdown();
+ return NULL;
+ }
+
+ return &vtable;
+}
+
+#else /* defined(GRPC_LINUX_EPOLL) */
+#if defined(GRPC_POSIX_SOCKET)
+#include "src/core/lib/iomgr/ev_posix.h"
+/* If GRPC_LINUX_EPOLL is not defined, it means epoll is not available. Return
+ * NULL */
+const grpc_event_engine_vtable *grpc_init_epollex_linux(
+ bool explicitly_requested) {
+ return NULL;
+}
+#endif /* defined(GRPC_POSIX_SOCKET) */
+
+#endif /* !defined(GRPC_LINUX_EPOLL) */
diff --git a/src/core/lib/iomgr/ev_epollex_linux.h b/src/core/lib/iomgr/ev_epollex_linux.h
new file mode 100644
index 0000000000..a078a7f19a
--- /dev/null
+++ b/src/core/lib/iomgr/ev_epollex_linux.h
@@ -0,0 +1,43 @@
+/*
+ *
+ * Copyright 2015, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef GRPC_CORE_LIB_IOMGR_EV_EPOLLEX_LINUX_H
+#define GRPC_CORE_LIB_IOMGR_EV_EPOLLEX_LINUX_H
+
+#include "src/core/lib/iomgr/ev_posix.h"
+#include "src/core/lib/iomgr/port.h"
+
+const grpc_event_engine_vtable *grpc_init_epollex_linux(
+ bool explicitly_requested);
+
+#endif /* GRPC_CORE_LIB_IOMGR_EV_EPOLLEX_LINUX_H */
diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epollsig_linux.c
index e603a75593..92c555b7ea 100644
--- a/src/core/lib/iomgr/ev_epoll_linux.c
+++ b/src/core/lib/iomgr/ev_epollsig_linux.c
@@ -36,7 +36,7 @@
/* This polling engine is only relevant on linux kernels supporting epoll() */
#ifdef GRPC_LINUX_EPOLL
-#include "src/core/lib/iomgr/ev_epoll_linux.h"
+#include "src/core/lib/iomgr/ev_epollsig_linux.h"
#include <assert.h>
#include <errno.h>
@@ -63,11 +63,11 @@
#include "src/core/lib/profiling/timers.h"
#include "src/core/lib/support/block_annotate.h"
-/* TODO: sreek - Move this to init.c and initialize this like other tracers. */
-static int grpc_polling_trace = 0; /* Disabled by default */
-#define GRPC_POLLING_TRACE(fmt, ...) \
- if (grpc_polling_trace) { \
- gpr_log(GPR_INFO, (fmt), __VA_ARGS__); \
+#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker *)1)
+
+#define GRPC_POLLING_TRACE(...) \
+ if (GRPC_TRACER_ON(grpc_polling_trace)) { \
+ gpr_log(GPR_INFO, __VA_ARGS__); \
}
/* Uncomment the following to enable extra checks on poll_object operations */
@@ -76,11 +76,6 @@ static int grpc_polling_trace = 0; /* Disabled by default */
static int grpc_wakeup_signal = -1;
static bool is_grpc_wakeup_signal_initialized = false;
-/* TODO: sreek: Right now, this wakes up all pollers. In future we should make
- * sure to wake up one polling thread (which can wake up other threads if
- * needed) */
-static grpc_wakeup_fd global_wakeup_fd;
-
/* Implements the function defined in grpc_posix.h. This function might be
* called before even calling grpc_init() to set either a different signal to
* use. If signum == -1, then the use of signals is disabled */
@@ -454,8 +449,8 @@ static void polling_island_add_wakeup_fd_locked(polling_island *pi,
gpr_asprintf(&err_msg,
"epoll_ctl (epoll_fd: %d) add wakeup fd: %d failed with "
"error: %d (%s)",
- pi->epoll_fd, GRPC_WAKEUP_FD_GET_READ_FD(&global_wakeup_fd),
- errno, strerror(errno));
+ pi->epoll_fd, GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), errno,
+ strerror(errno));
append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
gpr_free(err_msg);
}
@@ -558,7 +553,6 @@ static polling_island *polling_island_create(grpc_exec_ctx *exec_ctx,
goto done;
}
- polling_island_add_wakeup_fd_locked(pi, &global_wakeup_fd, error);
polling_island_add_wakeup_fd_locked(pi, &pi->workqueue_wakeup_fd, error);
if (initial_fd != NULL) {
@@ -738,7 +732,7 @@ static void workqueue_maybe_wakeup(polling_island *pi) {
it right now. Note that since we do an anticipatory mpscq_pop every poll
loop, it's ok if we miss the wakeup here, as we'll get the work item when
the next poller enters anyway. */
- if (current_pollers > min_current_pollers_for_wakeup) {
+ if (current_pollers >= min_current_pollers_for_wakeup) {
GRPC_LOG_IF_ERROR("workqueue_wakeup_fd",
grpc_wakeup_fd_wakeup(&pi->workqueue_wakeup_fd));
}
@@ -1116,11 +1110,10 @@ static grpc_error *pollset_global_init(void) {
gpr_tls_init(&g_current_thread_pollset);
gpr_tls_init(&g_current_thread_worker);
poller_kick_init();
- return grpc_wakeup_fd_init(&global_wakeup_fd);
+ return GRPC_ERROR_NONE;
}
static void pollset_global_shutdown(void) {
- grpc_wakeup_fd_destroy(&global_wakeup_fd);
gpr_tls_destroy(&g_current_thread_pollset);
gpr_tls_destroy(&g_current_thread_worker);
}
@@ -1226,10 +1219,6 @@ static grpc_error *pollset_kick(grpc_pollset *p,
return error;
}
-static grpc_error *kick_poller(void) {
- return grpc_wakeup_fd_wakeup(&global_wakeup_fd);
-}
-
static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) {
gpr_mu_init(&pollset->po.mu);
*mu = &pollset->po.mu;
@@ -1332,7 +1321,7 @@ static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
/* pollset_shutdown is guaranteed to be called before pollset_destroy. So other
* than destroying the mutexes, there is nothing special that needs to be done
* here */
-static void pollset_destroy(grpc_pollset *pollset) {
+static void pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) {
GPR_ASSERT(!pollset_has_workers(pollset));
gpr_mu_destroy(&pollset->po.mu);
}
@@ -1343,7 +1332,13 @@ static bool maybe_do_workqueue_work(grpc_exec_ctx *exec_ctx,
gpr_mpscq_node *n = gpr_mpscq_pop(&pi->workqueue_items);
gpr_mu_unlock(&pi->workqueue_read_mu);
if (n != NULL) {
- if (gpr_atm_full_fetch_add(&pi->workqueue_item_count, -1) > 1) {
+ gpr_atm remaining =
+ gpr_atm_full_fetch_add(&pi->workqueue_item_count, -1) - 1;
+ GRPC_POLLING_TRACE(
+ "maybe_do_workqueue_work: pi: %p: got closure %p, remaining = "
+ "%" PRIdPTR,
+ pi, n, remaining);
+ if (remaining > 0) {
workqueue_maybe_wakeup(pi);
}
grpc_closure *c = (grpc_closure *)n;
@@ -1358,8 +1353,13 @@ static bool maybe_do_workqueue_work(grpc_exec_ctx *exec_ctx,
/* n == NULL might mean there's work but it's not available to be popped
* yet - try to ensure another workqueue wakes up to check shortly if so
*/
+ GRPC_POLLING_TRACE(
+ "maybe_do_workqueue_work: pi: %p: more to do, but not yet", pi);
workqueue_maybe_wakeup(pi);
}
+ } else {
+ GRPC_POLLING_TRACE("maybe_do_workqueue_work: pi: %p: read already locked",
+ pi);
}
return false;
}
@@ -1422,7 +1422,10 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx,
/* If we get some workqueue work to do, it might end up completing an item on
the completion queue, so there's no need to poll... so we skip that and
redo the complete loop to verify */
+ GRPC_POLLING_TRACE("pollset_work: pollset: %p, worker %p, pi %p", pollset,
+ worker, pi);
if (!maybe_do_workqueue_work(exec_ctx, pi)) {
+ GRPC_POLLING_TRACE("pollset_work: begins");
gpr_atm_no_barrier_fetch_add(&pi->poller_count, 1);
g_current_thread_polling_island = pi;
@@ -1453,11 +1456,7 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx,
for (int i = 0; i < ep_rv; ++i) {
void *data_ptr = ep_ev[i].data.ptr;
- if (data_ptr == &global_wakeup_fd) {
- grpc_timer_consume_kick();
- append_error(error, grpc_wakeup_fd_consume_wakeup(&global_wakeup_fd),
- err_desc);
- } else if (data_ptr == &pi->workqueue_wakeup_fd) {
+ if (data_ptr == &pi->workqueue_wakeup_fd) {
append_error(error,
grpc_wakeup_fd_consume_wakeup(&pi->workqueue_wakeup_fd),
err_desc);
@@ -1487,6 +1486,7 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx,
g_current_thread_polling_island = NULL;
gpr_atm_no_barrier_fetch_add(&pi->poller_count, -1);
+ GRPC_POLLING_TRACE("pollset_work: ends");
}
GPR_ASSERT(pi != NULL);
@@ -1897,8 +1897,6 @@ static const grpc_event_engine_vtable vtable = {
.pollset_set_add_fd = pollset_set_add_fd,
.pollset_set_del_fd = pollset_set_del_fd,
- .kick_poller = kick_poller,
-
.workqueue_ref = workqueue_ref,
.workqueue_unref = workqueue_unref,
.workqueue_scheduler = workqueue_scheduler,
@@ -1921,7 +1919,8 @@ static bool is_epoll_available() {
return true;
}
-const grpc_event_engine_vtable *grpc_init_epoll_linux(void) {
+const grpc_event_engine_vtable *grpc_init_epollsig_linux(
+ bool explicit_request) {
/* If use of signals is disabled, we cannot use epoll engine*/
if (is_grpc_wakeup_signal_initialized && grpc_wakeup_signal < 0) {
return NULL;
@@ -1936,7 +1935,13 @@ const grpc_event_engine_vtable *grpc_init_epoll_linux(void) {
}
if (!is_grpc_wakeup_signal_initialized) {
- grpc_use_signal(SIGRTMIN + 6);
+ /* TODO(ctiller): when other epoll engines are ready, remove the true || to
+ * force this to be explitly chosen if needed */
+ if (true || explicit_request) {
+ grpc_use_signal(SIGRTMIN + 6);
+ } else {
+ return NULL;
+ }
}
fd_global_init();
@@ -1958,7 +1963,10 @@ const grpc_event_engine_vtable *grpc_init_epoll_linux(void) {
#include "src/core/lib/iomgr/ev_posix.h"
/* If GRPC_LINUX_EPOLL is not defined, it means epoll is not available. Return
* NULL */
-const grpc_event_engine_vtable *grpc_init_epoll_linux(void) { return NULL; }
+const grpc_event_engine_vtable *grpc_init_epollsig_linux(
+ bool explicit_request) {
+ return NULL;
+}
#endif /* defined(GRPC_POSIX_SOCKET) */
void grpc_use_signal(int signum) {}
diff --git a/src/core/lib/iomgr/ev_epoll_linux.h b/src/core/lib/iomgr/ev_epollsig_linux.h
index 8fc3ff59a3..9e4034f2a7 100644
--- a/src/core/lib/iomgr/ev_epoll_linux.h
+++ b/src/core/lib/iomgr/ev_epollsig_linux.h
@@ -31,13 +31,13 @@
*
*/
-#ifndef GRPC_CORE_LIB_IOMGR_EV_EPOLL_LINUX_H
-#define GRPC_CORE_LIB_IOMGR_EV_EPOLL_LINUX_H
+#ifndef GRPC_CORE_LIB_IOMGR_EV_EPOLLSIG_LINUX_H
+#define GRPC_CORE_LIB_IOMGR_EV_EPOLLSIG_LINUX_H
#include "src/core/lib/iomgr/ev_posix.h"
#include "src/core/lib/iomgr/port.h"
-const grpc_event_engine_vtable *grpc_init_epoll_linux(void);
+const grpc_event_engine_vtable *grpc_init_epollsig_linux(bool explicit_request);
#ifdef GRPC_LINUX_EPOLL
void *grpc_fd_get_polling_island(grpc_fd *fd);
@@ -45,4 +45,4 @@ void *grpc_pollset_get_polling_island(grpc_pollset *ps);
bool grpc_are_polling_islands_equal(void *p, void *q);
#endif /* defined(GRPC_LINUX_EPOLL) */
-#endif /* GRPC_CORE_LIB_IOMGR_EV_EPOLL_LINUX_H */
+#endif /* GRPC_CORE_LIB_IOMGR_EV_EPOLLSIG_LINUX_H */
diff --git a/src/core/lib/iomgr/ev_poll_posix.c b/src/core/lib/iomgr/ev_poll_posix.c
index 9834cdd197..3a7648ac32 100644
--- a/src/core/lib/iomgr/ev_poll_posix.c
+++ b/src/core/lib/iomgr/ev_poll_posix.c
@@ -58,6 +58,8 @@
#include "src/core/lib/profiling/timers.h"
#include "src/core/lib/support/block_annotate.h"
+#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker *)1)
+
/*******************************************************************************
* FD declarations
*/
@@ -122,8 +124,6 @@ struct grpc_fd {
grpc_pollset *read_notifier_pollset;
};
-static grpc_wakeup_fd global_wakeup_fd;
-
/* Begin polling on an fd.
Registers that the given pollset is interested in this fd - so that if read
or writability interest changes, the pollset can be kicked to pick up that
@@ -784,19 +784,14 @@ static grpc_error *pollset_kick(grpc_pollset *p,
static grpc_error *pollset_global_init(void) {
gpr_tls_init(&g_current_thread_poller);
gpr_tls_init(&g_current_thread_worker);
- return grpc_wakeup_fd_init(&global_wakeup_fd);
+ return GRPC_ERROR_NONE;
}
static void pollset_global_shutdown(void) {
- grpc_wakeup_fd_destroy(&global_wakeup_fd);
gpr_tls_destroy(&g_current_thread_poller);
gpr_tls_destroy(&g_current_thread_worker);
}
-static grpc_error *kick_poller(void) {
- return grpc_wakeup_fd_wakeup(&global_wakeup_fd);
-}
-
/* main interface */
static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) {
@@ -815,7 +810,7 @@ static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) {
pollset->pollset_set_count = 0;
}
-static void pollset_destroy(grpc_pollset *pollset) {
+static void pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) {
GPR_ASSERT(!pollset_has_workers(pollset));
GPR_ASSERT(pollset->idle_jobs.head == pollset->idle_jobs.tail);
while (pollset->local_wakeup_cache) {
@@ -952,13 +947,10 @@ static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
}
fd_count = 0;
- pfd_count = 2;
- pfds[0].fd = GRPC_WAKEUP_FD_GET_READ_FD(&global_wakeup_fd);
+ pfd_count = 1;
+ pfds[0].fd = GRPC_WAKEUP_FD_GET_READ_FD(&worker.wakeup_fd->fd);
pfds[0].events = POLLIN;
pfds[0].revents = 0;
- pfds[1].fd = GRPC_WAKEUP_FD_GET_READ_FD(&worker.wakeup_fd->fd);
- pfds[1].events = POLLIN;
- pfds[1].revents = 0;
for (i = 0; i < pollset->fd_count; i++) {
if (fd_is_orphaned(pollset->fds[i])) {
GRPC_FD_UNREF(pollset->fds[i], "multipoller");
@@ -974,7 +966,7 @@ static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
pollset->fd_count = fd_count;
gpr_mu_unlock(&pollset->mu);
- for (i = 2; i < pfd_count; i++) {
+ for (i = 1; i < pfd_count; i++) {
grpc_fd *fd = watchers[i].fd;
pfds[i].events = (short)fd_begin_poll(fd, pollset, &worker, POLLIN,
POLLOUT, &watchers[i]);
@@ -992,7 +984,7 @@ static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
work_combine_error(&error, GRPC_OS_ERROR(errno, "poll"));
}
- for (i = 2; i < pfd_count; i++) {
+ for (i = 1; i < pfd_count; i++) {
if (watchers[i].fd == NULL) {
fd_end_poll(exec_ctx, &watchers[i], 0, 0, NULL);
} else {
@@ -1002,20 +994,15 @@ static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
}
}
} else if (r == 0) {
- for (i = 2; i < pfd_count; i++) {
+ for (i = 1; i < pfd_count; i++) {
fd_end_poll(exec_ctx, &watchers[i], 0, 0, NULL);
}
} else {
if (pfds[0].revents & POLLIN_CHECK) {
- grpc_timer_consume_kick();
- work_combine_error(&error,
- grpc_wakeup_fd_consume_wakeup(&global_wakeup_fd));
- }
- if (pfds[1].revents & POLLIN_CHECK) {
work_combine_error(
&error, grpc_wakeup_fd_consume_wakeup(&worker.wakeup_fd->fd));
}
- for (i = 2; i < pfd_count; i++) {
+ for (i = 1; i < pfd_count; i++) {
if (watchers[i].fd == NULL) {
fd_end_poll(exec_ctx, &watchers[i], 0, 0, NULL);
} else {
@@ -1560,8 +1547,6 @@ static const grpc_event_engine_vtable vtable = {
.pollset_set_add_fd = pollset_set_add_fd,
.pollset_set_del_fd = pollset_set_del_fd,
- .kick_poller = kick_poller,
-
.workqueue_ref = workqueue_ref,
.workqueue_unref = workqueue_unref,
.workqueue_scheduler = workqueue_scheduler,
@@ -1569,7 +1554,7 @@ static const grpc_event_engine_vtable vtable = {
.shutdown_engine = shutdown_engine,
};
-const grpc_event_engine_vtable *grpc_init_poll_posix(void) {
+const grpc_event_engine_vtable *grpc_init_poll_posix(bool explicit_request) {
if (!grpc_has_wakeup_fd()) {
return NULL;
}
@@ -1579,7 +1564,7 @@ const grpc_event_engine_vtable *grpc_init_poll_posix(void) {
return &vtable;
}
-const grpc_event_engine_vtable *grpc_init_poll_cv_posix(void) {
+const grpc_event_engine_vtable *grpc_init_poll_cv_posix(bool explicit_request) {
global_cv_fd_table_init();
grpc_enable_cv_wakeup_fds(1);
if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) {
diff --git a/src/core/lib/iomgr/ev_poll_posix.h b/src/core/lib/iomgr/ev_poll_posix.h
index 202ffca14c..2890e93ead 100644
--- a/src/core/lib/iomgr/ev_poll_posix.h
+++ b/src/core/lib/iomgr/ev_poll_posix.h
@@ -36,7 +36,7 @@
#include "src/core/lib/iomgr/ev_posix.h"
-const grpc_event_engine_vtable *grpc_init_poll_posix(void);
-const grpc_event_engine_vtable *grpc_init_poll_cv_posix(void);
+const grpc_event_engine_vtable *grpc_init_poll_posix(bool explicit_request);
+const grpc_event_engine_vtable *grpc_init_poll_cv_posix(bool explicit_request);
#endif /* GRPC_CORE_LIB_IOMGR_EV_POLL_POSIX_H */
diff --git a/src/core/lib/iomgr/ev_posix.c b/src/core/lib/iomgr/ev_posix.c
index 13409a4de8..c4d2f23e29 100644
--- a/src/core/lib/iomgr/ev_posix.c
+++ b/src/core/lib/iomgr/ev_posix.c
@@ -44,10 +44,18 @@
#include <grpc/support/string_util.h>
#include <grpc/support/useful.h>
-#include "src/core/lib/iomgr/ev_epoll_linux.h"
+#include "src/core/lib/debug/trace.h"
+#include "src/core/lib/iomgr/ev_epoll1_linux.h"
+#include "src/core/lib/iomgr/ev_epoll_limited_pollers_linux.h"
+#include "src/core/lib/iomgr/ev_epoll_thread_pool_linux.h"
+#include "src/core/lib/iomgr/ev_epollex_linux.h"
+#include "src/core/lib/iomgr/ev_epollsig_linux.h"
#include "src/core/lib/iomgr/ev_poll_posix.h"
#include "src/core/lib/support/env.h"
+grpc_tracer_flag grpc_polling_trace =
+ GRPC_TRACER_INITIALIZER(false); /* Disabled by default */
+
/** Default poll() function - a pointer so that it can be overridden by some
* tests */
grpc_poll_function_type grpc_poll_function = poll;
@@ -57,7 +65,8 @@ grpc_wakeup_fd grpc_global_wakeup_fd;
static const grpc_event_engine_vtable *g_event_engine;
static const char *g_poll_strategy_name = NULL;
-typedef const grpc_event_engine_vtable *(*event_engine_factory_fn)(void);
+typedef const grpc_event_engine_vtable *(*event_engine_factory_fn)(
+ bool explicit_request);
typedef struct {
const char *name;
@@ -65,7 +74,11 @@ typedef struct {
} event_engine_factory;
static const event_engine_factory g_factories[] = {
- {"epoll", grpc_init_epoll_linux},
+ {"epollex", grpc_init_epollex_linux},
+ {"epollsig", grpc_init_epollsig_linux},
+ {"epoll1", grpc_init_epoll1_linux},
+ {"epoll-threadpool", grpc_init_epoll_thread_pool_linux},
+ {"epoll-limited", grpc_init_epoll_limited_pollers_linux},
{"poll", grpc_init_poll_posix},
{"poll-cv", grpc_init_poll_cv_posix},
};
@@ -102,7 +115,8 @@ static bool is(const char *want, const char *have) {
static void try_engine(const char *engine) {
for (size_t i = 0; i < GPR_ARRAY_SIZE(g_factories); i++) {
if (is(engine, g_factories[i].name)) {
- if ((g_event_engine = g_factories[i].factory())) {
+ if ((g_event_engine = g_factories[i].factory(
+ 0 == strcmp(engine, g_factories[i].name)))) {
g_poll_strategy_name = g_factories[i].name;
gpr_log(GPR_DEBUG, "Using polling engine: %s", g_factories[i].name);
return;
@@ -121,6 +135,8 @@ void grpc_set_event_engine_test_only(
const char *grpc_get_poll_strategy_name() { return g_poll_strategy_name; }
void grpc_event_engine_init(void) {
+ grpc_register_tracer("polling", &grpc_polling_trace);
+
char *s = gpr_getenv("GRPC_POLL_STRATEGY");
if (s == NULL) {
s = gpr_strdup("all");
@@ -197,8 +213,8 @@ void grpc_pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
g_event_engine->pollset_shutdown(exec_ctx, pollset, closure);
}
-void grpc_pollset_destroy(grpc_pollset *pollset) {
- g_event_engine->pollset_destroy(pollset);
+void grpc_pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) {
+ g_event_engine->pollset_destroy(exec_ctx, pollset);
}
grpc_error *grpc_pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
@@ -260,8 +276,6 @@ void grpc_pollset_set_del_fd(grpc_exec_ctx *exec_ctx,
g_event_engine->pollset_set_del_fd(exec_ctx, pollset_set, fd);
}
-grpc_error *grpc_kick_poller(void) { return g_event_engine->kick_poller(); }
-
#ifdef GRPC_WORKQUEUE_REFCOUNT_DEBUG
grpc_workqueue *grpc_workqueue_ref(grpc_workqueue *workqueue, const char *file,
int line, const char *reason) {
diff --git a/src/core/lib/iomgr/ev_posix.h b/src/core/lib/iomgr/ev_posix.h
index becc4d359e..80619aab5f 100644
--- a/src/core/lib/iomgr/ev_posix.h
+++ b/src/core/lib/iomgr/ev_posix.h
@@ -36,12 +36,15 @@
#include <poll.h>
+#include "src/core/lib/debug/trace.h"
#include "src/core/lib/iomgr/exec_ctx.h"
#include "src/core/lib/iomgr/pollset.h"
#include "src/core/lib/iomgr/pollset_set.h"
#include "src/core/lib/iomgr/wakeup_fd_posix.h"
#include "src/core/lib/iomgr/workqueue.h"
+extern grpc_tracer_flag grpc_polling_trace; /* Disabled by default */
+
typedef struct grpc_fd grpc_fd;
typedef struct grpc_event_engine_vtable {
@@ -64,7 +67,7 @@ typedef struct grpc_event_engine_vtable {
void (*pollset_init)(grpc_pollset *pollset, gpr_mu **mu);
void (*pollset_shutdown)(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
grpc_closure *closure);
- void (*pollset_destroy)(grpc_pollset *pollset);
+ void (*pollset_destroy)(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset);
grpc_error *(*pollset_work)(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
grpc_pollset_worker **worker, gpr_timespec now,
gpr_timespec deadline);
@@ -93,8 +96,6 @@ typedef struct grpc_event_engine_vtable {
void (*pollset_set_del_fd)(grpc_exec_ctx *exec_ctx,
grpc_pollset_set *pollset_set, grpc_fd *fd);
- grpc_error *(*kick_poller)(void);
-
void (*shutdown_engine)(void);
#ifdef GRPC_WORKQUEUE_REFCOUNT_DEBUG
diff --git a/src/core/lib/iomgr/ev_windows.c b/src/core/lib/iomgr/ev_windows.c
new file mode 100644
index 0000000000..7bf7327823
--- /dev/null
+++ b/src/core/lib/iomgr/ev_windows.c
@@ -0,0 +1,43 @@
+/*
+ *
+ * Copyright 2015, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "src/core/lib/iomgr/port.h"
+
+#ifdef GRPC_WINSOCK_SOCKET
+
+#include "src/core/lib/debug/trace.h"
+
+grpc_tracer_flag grpc_polling_trace =
+ GRPC_TRACER_INITIALIZER(false); /* Disabled by default */
+
+#endif // GRPC_WINSOCK_SOCKET
diff --git a/src/core/lib/iomgr/exec_ctx.c b/src/core/lib/iomgr/exec_ctx.c
index 2532a708e7..318bb2b713 100644
--- a/src/core/lib/iomgr/exec_ctx.c
+++ b/src/core/lib/iomgr/exec_ctx.c
@@ -62,6 +62,11 @@ bool grpc_always_ready_to_finish(grpc_exec_ctx *exec_ctx, void *arg_ignored) {
return true;
}
+bool grpc_exec_ctx_has_work(grpc_exec_ctx *exec_ctx) {
+ return exec_ctx->active_combiner != NULL ||
+ !grpc_closure_list_empty(exec_ctx->closure_list);
+}
+
bool grpc_exec_ctx_flush(grpc_exec_ctx *exec_ctx) {
bool did_something = 0;
GPR_TIMER_BEGIN("grpc_exec_ctx_flush", 0);
diff --git a/src/core/lib/iomgr/exec_ctx.h b/src/core/lib/iomgr/exec_ctx.h
index f99a0fee5f..759a3ae2d5 100644
--- a/src/core/lib/iomgr/exec_ctx.h
+++ b/src/core/lib/iomgr/exec_ctx.h
@@ -93,6 +93,8 @@ struct grpc_exec_ctx {
extern grpc_closure_scheduler *grpc_schedule_on_exec_ctx;
+bool grpc_exec_ctx_has_work(grpc_exec_ctx *exec_ctx);
+
/** Flush any work that has been enqueued onto this grpc_exec_ctx.
* Caller must guarantee that no interfering locks are held.
* Returns true if work was performed, false otherwise. */
diff --git a/src/core/lib/iomgr/iomgr.c b/src/core/lib/iomgr/iomgr.c
index 001e528409..1fd41c2f88 100644
--- a/src/core/lib/iomgr/iomgr.c
+++ b/src/core/lib/iomgr/iomgr.c
@@ -47,6 +47,7 @@
#include "src/core/lib/iomgr/iomgr_internal.h"
#include "src/core/lib/iomgr/network_status_tracker.h"
#include "src/core/lib/iomgr/timer.h"
+#include "src/core/lib/iomgr/timer_manager.h"
#include "src/core/lib/support/env.h"
#include "src/core/lib/support/string.h"
@@ -67,6 +68,8 @@ void grpc_iomgr_init(void) {
grpc_iomgr_platform_init();
}
+void grpc_iomgr_start(void) { grpc_timer_manager_init(); }
+
static size_t count_objects(void) {
grpc_iomgr_object *obj;
size_t n = 0;
@@ -88,6 +91,7 @@ void grpc_iomgr_shutdown(grpc_exec_ctx *exec_ctx) {
gpr_now(GPR_CLOCK_REALTIME), gpr_time_from_seconds(10, GPR_TIMESPAN));
gpr_timespec last_warning_time = gpr_now(GPR_CLOCK_REALTIME);
+ grpc_timer_manager_shutdown();
grpc_iomgr_platform_flush();
gpr_mu_lock(&g_mu);
diff --git a/src/core/lib/iomgr/iomgr.h b/src/core/lib/iomgr/iomgr.h
index 245a1e08aa..6e2e023615 100644
--- a/src/core/lib/iomgr/iomgr.h
+++ b/src/core/lib/iomgr/iomgr.h
@@ -40,6 +40,9 @@
/** Initializes the iomgr. */
void grpc_iomgr_init(void);
+/** Starts any background threads for iomgr. */
+void grpc_iomgr_start(void);
+
/** Signals the intention to shutdown the iomgr. Expects to be able to flush
* exec_ctx. */
void grpc_iomgr_shutdown(grpc_exec_ctx *exec_ctx);
diff --git a/src/core/lib/iomgr/is_epollexclusive_available.c b/src/core/lib/iomgr/is_epollexclusive_available.c
new file mode 100644
index 0000000000..34fcf313e5
--- /dev/null
+++ b/src/core/lib/iomgr/is_epollexclusive_available.c
@@ -0,0 +1,116 @@
+/*
+ *
+ * Copyright 2017, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "src/core/lib/iomgr/port.h"
+
+#include "src/core/lib/iomgr/is_epollexclusive_available.h"
+
+#ifdef GRPC_LINUX_EPOLL
+
+#include <grpc/support/log.h>
+
+#include <errno.h>
+#include <sys/eventfd.h>
+#include <unistd.h>
+
+#include "src/core/lib/iomgr/sys_epoll_wrapper.h"
+
+/* This polling engine is only relevant on linux kernels supporting epoll() */
+bool grpc_is_epollexclusive_available(void) {
+ static bool logged_why_not = false;
+
+ int fd = epoll_create1(EPOLL_CLOEXEC);
+ if (fd < 0) {
+ if (!logged_why_not) {
+ gpr_log(GPR_ERROR,
+ "epoll_create1 failed with error: %d. Not using epollex polling "
+ "engine.",
+ fd);
+ logged_why_not = true;
+ }
+ return false;
+ }
+ int evfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+ if (evfd < 0) {
+ if (!logged_why_not) {
+ gpr_log(GPR_ERROR,
+ "eventfd failed with error: %d. Not using epollex polling "
+ "engine.",
+ fd);
+ logged_why_not = true;
+ }
+ close(fd);
+ return false;
+ }
+ struct epoll_event ev = {
+ /* choose events that should cause an error on
+ EPOLLEXCLUSIVE enabled kernels - specifically the combination of
+ EPOLLONESHOT and EPOLLEXCLUSIVE */
+ .events = (uint32_t)(EPOLLET | EPOLLIN | EPOLLEXCLUSIVE | EPOLLONESHOT),
+ .data.ptr = NULL};
+ if (epoll_ctl(fd, EPOLL_CTL_ADD, evfd, &ev) != 0) {
+ if (errno != EINVAL) {
+ if (!logged_why_not) {
+ gpr_log(
+ GPR_ERROR,
+ "epoll_ctl with EPOLLEXCLUSIVE | EPOLLONESHOT failed with error: "
+ "%d. Not using epollex polling engine.",
+ errno);
+ logged_why_not = true;
+ }
+ close(fd);
+ close(evfd);
+ return false;
+ }
+ } else {
+ if (!logged_why_not) {
+ gpr_log(GPR_ERROR,
+ "epoll_ctl with EPOLLEXCLUSIVE | EPOLLONESHOT succeeded. This is "
+ "evidence of no EPOLLEXCLUSIVE support. Not using "
+ "epollex polling engine.");
+ logged_why_not = true;
+ }
+ close(fd);
+ close(evfd);
+ return false;
+ }
+ close(evfd);
+ close(fd);
+ return true;
+}
+
+#else
+
+bool grpc_is_epollexclusive_available(void) { return false; }
+
+#endif
diff --git a/src/core/lib/iomgr/is_epollexclusive_available.h b/src/core/lib/iomgr/is_epollexclusive_available.h
new file mode 100644
index 0000000000..b65b819e74
--- /dev/null
+++ b/src/core/lib/iomgr/is_epollexclusive_available.h
@@ -0,0 +1,41 @@
+/*
+ *
+ * Copyright 2015, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef GRPC_CORE_LIB_IOMGR_IS_EPOLLEXCLUSIVE_AVAILABLE_H
+#define GRPC_CORE_LIB_IOMGR_IS_EPOLLEXCLUSIVE_AVAILABLE_H
+
+#include <stdbool.h>
+
+bool grpc_is_epollexclusive_available(void);
+
+#endif /* GRPC_CORE_LIB_IOMGR_IS_EPOLLEXCLUSIVE_AVAILABLE_H */
diff --git a/src/core/lib/iomgr/lockfree_event.c b/src/core/lib/iomgr/lockfree_event.c
index 17e3bbf727..898ec1cb1b 100644
--- a/src/core/lib/iomgr/lockfree_event.c
+++ b/src/core/lib/iomgr/lockfree_event.c
@@ -35,6 +35,10 @@
#include <grpc/support/log.h>
+#include "src/core/lib/debug/trace.h"
+
+extern grpc_tracer_flag grpc_polling_trace;
+
/* 'state' holds the to call when the fd is readable or writable respectively.
It can contain one of the following values:
CLOSURE_READY : The fd has an I/O event of interest but there is no
@@ -93,6 +97,10 @@ void grpc_lfev_notify_on(grpc_exec_ctx *exec_ctx, gpr_atm *state,
grpc_closure *closure) {
while (true) {
gpr_atm curr = gpr_atm_no_barrier_load(state);
+ if (GRPC_TRACER_ON(grpc_polling_trace)) {
+ gpr_log(GPR_DEBUG, "lfev_notify_on: %p curr=%p closure=%p", state,
+ (void *)curr, closure);
+ }
switch (curr) {
case CLOSURE_NOT_READY: {
/* CLOSURE_NOT_READY -> <closure>.
@@ -155,6 +163,10 @@ bool grpc_lfev_set_shutdown(grpc_exec_ctx *exec_ctx, gpr_atm *state,
while (true) {
gpr_atm curr = gpr_atm_no_barrier_load(state);
+ if (GRPC_TRACER_ON(grpc_polling_trace)) {
+ gpr_log(GPR_DEBUG, "lfev_set_shutdown: %p curr=%p err=%s", state,
+ (void *)curr, grpc_error_string(shutdown_err));
+ }
switch (curr) {
case CLOSURE_READY:
case CLOSURE_NOT_READY:
@@ -200,6 +212,10 @@ void grpc_lfev_set_ready(grpc_exec_ctx *exec_ctx, gpr_atm *state) {
while (true) {
gpr_atm curr = gpr_atm_no_barrier_load(state);
+ if (GRPC_TRACER_ON(grpc_polling_trace)) {
+ gpr_log(GPR_DEBUG, "lfev_set_ready: %p curr=%p", state, (void *)curr);
+ }
+
switch (curr) {
case CLOSURE_READY: {
/* Already ready. We are done here */
diff --git a/src/core/lib/iomgr/pollset.h b/src/core/lib/iomgr/pollset.h
index 9bf3cdac89..69e20098d7 100644
--- a/src/core/lib/iomgr/pollset.h
+++ b/src/core/lib/iomgr/pollset.h
@@ -40,8 +40,6 @@
#include "src/core/lib/iomgr/exec_ctx.h"
-#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker *)1)
-
/* A grpc_pollset is a set of file descriptors that a higher level item is
interested in. For example:
- a server will typically keep a pollset containing all connected channels,
@@ -59,7 +57,7 @@ void grpc_pollset_init(grpc_pollset *pollset, gpr_mu **mu);
* pollset's mutex must be held */
void grpc_pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
grpc_closure *closure);
-void grpc_pollset_destroy(grpc_pollset *pollset);
+void grpc_pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset);
/* Do some work on a pollset.
May involve invoking asynchronous callbacks, or actually polling file
@@ -88,8 +86,7 @@ grpc_error *grpc_pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
gpr_timespec deadline) GRPC_MUST_USE_RESULT;
/* Break one polling thread out of polling work for this pollset.
- If specific_worker is GRPC_POLLSET_KICK_BROADCAST, kick ALL the workers.
- Otherwise, if specific_worker is non-NULL, then kick that worker. */
+ If specific_worker is non-NULL, then kick that worker. */
grpc_error *grpc_pollset_kick(grpc_pollset *pollset,
grpc_pollset_worker *specific_worker)
GRPC_MUST_USE_RESULT;
diff --git a/src/core/lib/iomgr/pollset_uv.c b/src/core/lib/iomgr/pollset_uv.c
index a2f81bcd78..5923da98e2 100644
--- a/src/core/lib/iomgr/pollset_uv.c
+++ b/src/core/lib/iomgr/pollset_uv.c
@@ -106,7 +106,7 @@ void grpc_pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
grpc_closure_sched(exec_ctx, closure, GRPC_ERROR_NONE);
}
-void grpc_pollset_destroy(grpc_pollset *pollset) {
+void grpc_pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) {
uv_close((uv_handle_t *)&pollset->timer, timer_close_cb);
// timer.data is a boolean indicating that the timer has finished closing
pollset->timer.data = (void *)0;
diff --git a/src/core/lib/iomgr/pollset_windows.c b/src/core/lib/iomgr/pollset_windows.c
index 04c6b71747..b5f454cfa9 100644
--- a/src/core/lib/iomgr/pollset_windows.c
+++ b/src/core/lib/iomgr/pollset_windows.c
@@ -43,6 +43,8 @@
#include "src/core/lib/iomgr/pollset.h"
#include "src/core/lib/iomgr/pollset_windows.h"
+#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker *)1)
+
gpr_mu grpc_polling_mu;
static grpc_pollset_worker *g_active_poller;
static grpc_pollset_worker g_global_root_worker;
@@ -114,7 +116,7 @@ void grpc_pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
}
}
-void grpc_pollset_destroy(grpc_pollset *pollset) {}
+void grpc_pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) {}
grpc_error *grpc_pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
grpc_pollset_worker **worker_hdl,
@@ -227,6 +229,4 @@ grpc_error *grpc_pollset_kick(grpc_pollset *p,
return GRPC_ERROR_NONE;
}
-void grpc_kick_poller(void) { grpc_iocp_kick(); }
-
#endif /* GRPC_WINSOCK_SOCKET */
diff --git a/src/core/lib/iomgr/port.h b/src/core/lib/iomgr/port.h
index 269dc35003..2a553f4114 100644
--- a/src/core/lib/iomgr/port.h
+++ b/src/core/lib/iomgr/port.h
@@ -88,6 +88,7 @@
#ifndef __GLIBC__
#define GRPC_LINUX_EPOLL 1
#define GRPC_LINUX_EVENTFD 1
+#define GRPC_MSG_IOVLEN_TYPE int
#endif
#ifndef GRPC_LINUX_EVENTFD
#define GRPC_POSIX_NO_SPECIAL_WAKEUP_FD 1
diff --git a/src/core/lib/iomgr/resource_quota.c b/src/core/lib/iomgr/resource_quota.c
index c3ee878651..6b2b85cce0 100644
--- a/src/core/lib/iomgr/resource_quota.c
+++ b/src/core/lib/iomgr/resource_quota.c
@@ -44,7 +44,7 @@
#include "src/core/lib/iomgr/combiner.h"
-int grpc_resource_quota_trace = 0;
+grpc_tracer_flag grpc_resource_quota_trace = GRPC_TRACER_INITIALIZER(false);
#define MEMORY_USAGE_ESTIMATION_MAX 65536
@@ -307,13 +307,14 @@ static bool rq_alloc(grpc_exec_ctx *exec_ctx,
resource_user->free_pool = 0;
resource_quota->free_pool -= amt;
rq_update_estimate(resource_quota);
- if (grpc_resource_quota_trace) {
+ if (GRPC_TRACER_ON(grpc_resource_quota_trace)) {
gpr_log(GPR_DEBUG, "RQ %s %s: grant alloc %" PRId64
" bytes; rq_free_pool -> %" PRId64,
resource_quota->name, resource_user->name, amt,
resource_quota->free_pool);
}
- } else if (grpc_resource_quota_trace && resource_user->free_pool >= 0) {
+ } else if (GRPC_TRACER_ON(grpc_resource_quota_trace) &&
+ resource_user->free_pool >= 0) {
gpr_log(GPR_DEBUG, "RQ %s %s: discard already satisfied alloc request",
resource_quota->name, resource_user->name);
}
@@ -342,7 +343,7 @@ static bool rq_reclaim_from_per_user_free_pool(
resource_user->free_pool = 0;
resource_quota->free_pool += amt;
rq_update_estimate(resource_quota);
- if (grpc_resource_quota_trace) {
+ if (GRPC_TRACER_ON(grpc_resource_quota_trace)) {
gpr_log(GPR_DEBUG, "RQ %s %s: reclaim_from_per_user_free_pool %" PRId64
" bytes; rq_free_pool -> %" PRId64,
resource_quota->name, resource_user->name, amt,
@@ -365,7 +366,7 @@ static bool rq_reclaim(grpc_exec_ctx *exec_ctx,
: GRPC_RULIST_RECLAIMER_BENIGN;
grpc_resource_user *resource_user = rulist_pop_head(resource_quota, list);
if (resource_user == NULL) return false;
- if (grpc_resource_quota_trace) {
+ if (GRPC_TRACER_ON(grpc_resource_quota_trace)) {
gpr_log(GPR_DEBUG, "RQ %s %s: initiate %s reclamation",
resource_quota->name, resource_user->name,
destructive ? "destructive" : "benign");
@@ -786,7 +787,7 @@ void grpc_resource_user_alloc(grpc_exec_ctx *exec_ctx,
gpr_mu_lock(&resource_user->mu);
ru_ref_by(resource_user, (gpr_atm)size);
resource_user->free_pool -= (int64_t)size;
- if (grpc_resource_quota_trace) {
+ if (GRPC_TRACER_ON(grpc_resource_quota_trace)) {
gpr_log(GPR_DEBUG, "RQ %s %s: alloc %" PRIdPTR "; free_pool -> %" PRId64,
resource_user->resource_quota->name, resource_user->name, size,
resource_user->free_pool);
@@ -810,7 +811,7 @@ void grpc_resource_user_free(grpc_exec_ctx *exec_ctx,
gpr_mu_lock(&resource_user->mu);
bool was_zero_or_negative = resource_user->free_pool <= 0;
resource_user->free_pool += (int64_t)size;
- if (grpc_resource_quota_trace) {
+ if (GRPC_TRACER_ON(grpc_resource_quota_trace)) {
gpr_log(GPR_DEBUG, "RQ %s %s: free %" PRIdPTR "; free_pool -> %" PRId64,
resource_user->resource_quota->name, resource_user->name, size,
resource_user->free_pool);
@@ -839,7 +840,7 @@ void grpc_resource_user_post_reclaimer(grpc_exec_ctx *exec_ctx,
void grpc_resource_user_finish_reclamation(grpc_exec_ctx *exec_ctx,
grpc_resource_user *resource_user) {
- if (grpc_resource_quota_trace) {
+ if (GRPC_TRACER_ON(grpc_resource_quota_trace)) {
gpr_log(GPR_DEBUG, "RQ %s %s: reclamation complete",
resource_user->resource_quota->name, resource_user->name);
}
diff --git a/src/core/lib/iomgr/resource_quota.h b/src/core/lib/iomgr/resource_quota.h
index 6f99be0d51..51122dad01 100644
--- a/src/core/lib/iomgr/resource_quota.h
+++ b/src/core/lib/iomgr/resource_quota.h
@@ -36,6 +36,7 @@
#include <grpc/grpc.h>
+#include "src/core/lib/debug/trace.h"
#include "src/core/lib/iomgr/exec_ctx.h"
/** \file Tracks resource usage against a pool.
@@ -75,7 +76,7 @@
maintain lists of users (which users arrange to leave before they are
destroyed) */
-extern int grpc_resource_quota_trace;
+extern grpc_tracer_flag grpc_resource_quota_trace;
grpc_resource_quota *grpc_resource_quota_ref_internal(
grpc_resource_quota *resource_quota);
diff --git a/src/core/lib/iomgr/sys_epoll_wrapper.h b/src/core/lib/iomgr/sys_epoll_wrapper.h
new file mode 100644
index 0000000000..2f08423193
--- /dev/null
+++ b/src/core/lib/iomgr/sys_epoll_wrapper.h
@@ -0,0 +1,43 @@
+/*
+ *
+ * Copyright 2017, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef GRPC_CORE_LIB_IOMGR_SYS_EPOLL_WRAPPER_H
+#define GRPC_CORE_LIB_IOMGR_SYS_EPOLL_WRAPPER_H
+
+#include <sys/epoll.h>
+
+#ifndef EPOLLEXCLUSIVE
+#define EPOLLEXCLUSIVE (1 << 28)
+#endif
+
+#endif /* GRPC_CORE_LIB_IOMGR_SYS_EPOLL_WRAPPER_H */
diff --git a/src/core/lib/iomgr/tcp_client_posix.c b/src/core/lib/iomgr/tcp_client_posix.c
index a2692707d9..5c7da999e0 100644
--- a/src/core/lib/iomgr/tcp_client_posix.c
+++ b/src/core/lib/iomgr/tcp_client_posix.c
@@ -58,7 +58,7 @@
#include "src/core/lib/iomgr/unix_sockets_posix.h"
#include "src/core/lib/support/string.h"
-extern int grpc_tcp_trace;
+extern grpc_tracer_flag grpc_tcp_trace;
typedef struct {
gpr_mu mu;
@@ -114,7 +114,7 @@ done:
static void tc_on_alarm(grpc_exec_ctx *exec_ctx, void *acp, grpc_error *error) {
int done;
async_connect *ac = acp;
- if (grpc_tcp_trace) {
+ if (GRPC_TRACER_ON(grpc_tcp_trace)) {
const char *str = grpc_error_string(error);
gpr_log(GPR_DEBUG, "CLIENT_CONNECT: %s: on_alarm: error=%s", ac->addr_str,
str);
@@ -152,7 +152,7 @@ static void on_writable(grpc_exec_ctx *exec_ctx, void *acp, grpc_error *error) {
GRPC_ERROR_REF(error);
- if (grpc_tcp_trace) {
+ if (GRPC_TRACER_ON(grpc_tcp_trace)) {
const char *str = grpc_error_string(error);
gpr_log(GPR_DEBUG, "CLIENT_CONNECT: %s: on_writable: error=%s",
ac->addr_str, str);
@@ -330,9 +330,9 @@ static void tcp_client_connect_impl(grpc_exec_ctx *exec_ctx,
grpc_schedule_on_exec_ctx);
ac->channel_args = grpc_channel_args_copy(channel_args);
- if (grpc_tcp_trace) {
- gpr_log(GPR_DEBUG, "CLIENT_CONNECT: %s: asynchronously connecting",
- ac->addr_str);
+ if (GRPC_TRACER_ON(grpc_tcp_trace)) {
+ gpr_log(GPR_DEBUG, "CLIENT_CONNECT: %s: asynchronously connecting fd %p",
+ ac->addr_str, fdobj);
}
gpr_mu_lock(&ac->mu);
diff --git a/src/core/lib/iomgr/tcp_client_uv.c b/src/core/lib/iomgr/tcp_client_uv.c
index 682c24ed56..f0856a76d4 100644
--- a/src/core/lib/iomgr/tcp_client_uv.c
+++ b/src/core/lib/iomgr/tcp_client_uv.c
@@ -46,7 +46,7 @@
#include "src/core/lib/iomgr/tcp_uv.h"
#include "src/core/lib/iomgr/timer.h"
-extern int grpc_tcp_trace;
+extern grpc_tracer_flag grpc_tcp_trace;
typedef struct grpc_uv_tcp_connect {
uv_connect_t connect_req;
@@ -72,7 +72,7 @@ static void uv_tc_on_alarm(grpc_exec_ctx *exec_ctx, void *acp,
grpc_error *error) {
int done;
grpc_uv_tcp_connect *connect = acp;
- if (grpc_tcp_trace) {
+ if (GRPC_TRACER_ON(grpc_tcp_trace)) {
const char *str = grpc_error_string(error);
gpr_log(GPR_DEBUG, "CLIENT_CONNECT: %s: on_alarm: error=%s",
connect->addr_name, str);
@@ -156,7 +156,7 @@ static void tcp_client_connect_impl(grpc_exec_ctx *exec_ctx,
uv_tcp_init(uv_default_loop(), connect->tcp_handle);
connect->connect_req.data = connect;
- if (grpc_tcp_trace) {
+ if (GRPC_TRACER_ON(grpc_tcp_trace)) {
gpr_log(GPR_DEBUG, "CLIENT_CONNECT: %s: asynchronously connecting",
connect->addr_name);
}
diff --git a/src/core/lib/iomgr/tcp_posix.c b/src/core/lib/iomgr/tcp_posix.c
index 5f4b38de2b..5d360b0b80 100644
--- a/src/core/lib/iomgr/tcp_posix.c
+++ b/src/core/lib/iomgr/tcp_posix.c
@@ -74,7 +74,7 @@ typedef GRPC_MSG_IOVLEN_TYPE msg_iovlen_type;
typedef size_t msg_iovlen_type;
#endif
-int grpc_tcp_trace = 0;
+grpc_tracer_flag grpc_tcp_trace = GRPC_TRACER_INITIALIZER(false);
typedef struct {
grpc_endpoint base;
@@ -221,7 +221,7 @@ static void call_read_cb(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp,
grpc_error *error) {
grpc_closure *cb = tcp->read_cb;
- if (grpc_tcp_trace) {
+ if (GRPC_TRACER_ON(grpc_tcp_trace)) {
size_t i;
const char *str = grpc_error_string(error);
gpr_log(GPR_DEBUG, "read: error=%s", str);
@@ -468,14 +468,14 @@ static void tcp_handle_write(grpc_exec_ctx *exec_ctx, void *arg /* grpc_tcp */,
}
if (!tcp_flush(tcp, &error)) {
- if (grpc_tcp_trace) {
+ if (GRPC_TRACER_ON(grpc_tcp_trace)) {
gpr_log(GPR_DEBUG, "write: delayed");
}
grpc_fd_notify_on_write(exec_ctx, tcp->em_fd, &tcp->write_closure);
} else {
cb = tcp->write_cb;
tcp->write_cb = NULL;
- if (grpc_tcp_trace) {
+ if (GRPC_TRACER_ON(grpc_tcp_trace)) {
const char *str = grpc_error_string(error);
gpr_log(GPR_DEBUG, "write: %s", str);
}
@@ -490,7 +490,7 @@ static void tcp_write(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep,
grpc_tcp *tcp = (grpc_tcp *)ep;
grpc_error *error = GRPC_ERROR_NONE;
- if (grpc_tcp_trace) {
+ if (GRPC_TRACER_ON(grpc_tcp_trace)) {
size_t i;
for (i = 0; i < buf->count; i++) {
@@ -521,12 +521,12 @@ static void tcp_write(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep,
if (!tcp_flush(tcp, &error)) {
TCP_REF(tcp, "write");
tcp->write_cb = cb;
- if (grpc_tcp_trace) {
+ if (GRPC_TRACER_ON(grpc_tcp_trace)) {
gpr_log(GPR_DEBUG, "write: delayed");
}
grpc_fd_notify_on_write(exec_ctx, tcp->em_fd, &tcp->write_closure);
} else {
- if (grpc_tcp_trace) {
+ if (GRPC_TRACER_ON(grpc_tcp_trace)) {
const char *str = grpc_error_string(error);
gpr_log(GPR_DEBUG, "write: %s", str);
}
diff --git a/src/core/lib/iomgr/tcp_posix.h b/src/core/lib/iomgr/tcp_posix.h
index 1ad5788331..4ad60c116e 100644
--- a/src/core/lib/iomgr/tcp_posix.h
+++ b/src/core/lib/iomgr/tcp_posix.h
@@ -44,10 +44,11 @@
otherwise specified.
*/
+#include "src/core/lib/debug/trace.h"
#include "src/core/lib/iomgr/endpoint.h"
#include "src/core/lib/iomgr/ev_posix.h"
-extern int grpc_tcp_trace;
+extern grpc_tracer_flag grpc_tcp_trace;
/* Create a tcp endpoint given a file desciptor and a read slice size.
Takes ownership of fd. */
diff --git a/src/core/lib/iomgr/tcp_server_posix.c b/src/core/lib/iomgr/tcp_server_posix.c
index e66ffc9b1c..08997b5e2b 100644
--- a/src/core/lib/iomgr/tcp_server_posix.c
+++ b/src/core/lib/iomgr/tcp_server_posix.c
@@ -257,7 +257,7 @@ static void on_read(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *err) {
addr_str = grpc_sockaddr_to_uri(&addr);
gpr_asprintf(&name, "tcp-server-connection:%s", addr_str);
- if (grpc_tcp_trace) {
+ if (GRPC_TRACER_ON(grpc_tcp_trace)) {
gpr_log(GPR_DEBUG, "SERVER_CONNECT: incoming connection: %s", addr_str);
}
diff --git a/src/core/lib/iomgr/tcp_server_uv.c b/src/core/lib/iomgr/tcp_server_uv.c
index e9246948f5..d446e5312a 100644
--- a/src/core/lib/iomgr/tcp_server_uv.c
+++ b/src/core/lib/iomgr/tcp_server_uv.c
@@ -56,6 +56,8 @@ struct grpc_tcp_listener {
int port;
/* linked list */
struct grpc_tcp_listener *next;
+
+ bool closed;
};
struct grpc_tcp_server {
@@ -77,6 +79,8 @@ struct grpc_tcp_server {
/* shutdown callback */
grpc_closure *shutdown_complete;
+ bool shutdown;
+
grpc_resource_quota *resource_quota;
};
@@ -109,6 +113,7 @@ grpc_error *grpc_tcp_server_create(grpc_exec_ctx *exec_ctx,
s->shutdown_starting.head = NULL;
s->shutdown_starting.tail = NULL;
s->shutdown_complete = shutdown_complete;
+ s->shutdown = false;
*server = s;
return GRPC_ERROR_NONE;
}
@@ -125,6 +130,7 @@ void grpc_tcp_server_shutdown_starting_add(grpc_tcp_server *s,
}
static void finish_shutdown(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) {
+ GPR_ASSERT(s->shutdown);
if (s->shutdown_complete != NULL) {
grpc_closure_sched(exec_ctx, s->shutdown_complete, GRPC_ERROR_NONE);
}
@@ -144,21 +150,31 @@ static void handle_close_callback(uv_handle_t *handle) {
grpc_tcp_listener *sp = (grpc_tcp_listener *)handle->data;
grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
sp->server->open_ports--;
- if (sp->server->open_ports == 0) {
+ if (sp->server->open_ports == 0 && sp->server->shutdown) {
finish_shutdown(&exec_ctx, sp->server);
}
grpc_exec_ctx_finish(&exec_ctx);
}
+static void close_listener(grpc_tcp_listener *sp) {
+ if (!sp->closed) {
+ sp->closed = true;
+ uv_close((uv_handle_t *)sp->handle, handle_close_callback);
+ }
+}
+
static void tcp_server_destroy(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) {
int immediately_done = 0;
grpc_tcp_listener *sp;
+ GPR_ASSERT(!s->shutdown);
+ s->shutdown = true;
+
if (s->open_ports == 0) {
immediately_done = 1;
}
for (sp = s->head; sp; sp = sp->next) {
- uv_close((uv_handle_t *)sp->handle, handle_close_callback);
+ close_listener(sp);
}
if (immediately_done) {
@@ -196,9 +212,14 @@ static void on_connect(uv_stream_t *server, int status) {
int err;
if (status < 0) {
- gpr_log(GPR_INFO, "Skipping on_accept due to error: %s",
- uv_strerror(status));
- return;
+ switch (status) {
+ case UV_EINTR:
+ case UV_EAGAIN:
+ return;
+ default:
+ close_listener(sp);
+ return;
+ }
}
client = gpr_malloc(sizeof(uv_tcp_t));
@@ -287,6 +308,7 @@ static grpc_error *add_socket_to_server(grpc_tcp_server *s, uv_tcp_t *handle,
sp->handle = handle;
sp->port = port;
sp->port_index = port_index;
+ sp->closed = false;
handle->data = sp;
s->open_ports++;
GPR_ASSERT(sp->handle);
diff --git a/src/core/lib/iomgr/tcp_uv.c b/src/core/lib/iomgr/tcp_uv.c
index 8e8db9f7b4..dc23e4f521 100644
--- a/src/core/lib/iomgr/tcp_uv.c
+++ b/src/core/lib/iomgr/tcp_uv.c
@@ -52,7 +52,7 @@
#include "src/core/lib/slice/slice_string_helpers.h"
#include "src/core/lib/support/string.h"
-int grpc_tcp_trace = 0;
+grpc_tracer_flag grpc_tcp_trace = GRPC_TRACER_INITIALIZER(false);
typedef struct {
grpc_endpoint base;
@@ -88,12 +88,12 @@ static void tcp_free(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) {
#ifdef GRPC_TCP_REFCOUNT_DEBUG
#define TCP_UNREF(exec_ctx, tcp, reason) \
tcp_unref((exec_ctx), (tcp), (reason), __FILE__, __LINE__)
-#define TCP_REF(tcp, reason) \
- tcp_ref((exec_ctx), (tcp), (reason), __FILE__, __LINE__)
+#define TCP_REF(tcp, reason) tcp_ref((tcp), (reason), __FILE__, __LINE__)
static void tcp_unref(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp,
const char *reason, const char *file, int line) {
- gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG, "TCP unref %p : %s %d -> %d", tcp,
- reason, tcp->refcount.count, tcp->refcount.count - 1);
+ gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG,
+ "TCP unref %p : %s %" PRIiPTR " -> %" PRIiPTR, tcp, reason,
+ tcp->refcount.count, tcp->refcount.count - 1);
if (gpr_unref(&tcp->refcount)) {
tcp_free(exec_ctx, tcp);
}
@@ -101,8 +101,9 @@ static void tcp_unref(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp,
static void tcp_ref(grpc_tcp *tcp, const char *reason, const char *file,
int line) {
- gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG, "TCP ref %p : %s %d -> %d", tcp,
- reason, tcp->refcount.count, tcp->refcount.count + 1);
+ gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG,
+ "TCP ref %p : %s %" PRIiPTR " -> %" PRIiPTR, tcp, reason,
+ tcp->refcount.count, tcp->refcount.count + 1);
gpr_ref(&tcp->refcount);
}
#else
@@ -158,7 +159,7 @@ static void read_callback(uv_stream_t *stream, ssize_t nread,
sub = grpc_slice_sub_no_ref(tcp->read_slice, 0, (size_t)nread);
grpc_slice_buffer_add(tcp->read_slices, sub);
error = GRPC_ERROR_NONE;
- if (grpc_tcp_trace) {
+ if (GRPC_TRACER_ON(grpc_tcp_trace)) {
size_t i;
const char *str = grpc_error_string(error);
gpr_log(GPR_DEBUG, "read: error=%s", str);
@@ -199,7 +200,7 @@ static void uv_endpoint_read(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep,
grpc_slice_from_static_string(uv_strerror(status)));
grpc_closure_sched(exec_ctx, cb, error);
}
- if (grpc_tcp_trace) {
+ if (GRPC_TRACER_ON(grpc_tcp_trace)) {
const char *str = grpc_error_string(error);
gpr_log(GPR_DEBUG, "Initiating read on %p: error=%s", tcp, str);
}
@@ -217,7 +218,7 @@ static void write_callback(uv_write_t *req, int status) {
} else {
error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("TCP Write failed");
}
- if (grpc_tcp_trace) {
+ if (GRPC_TRACER_ON(grpc_tcp_trace)) {
const char *str = grpc_error_string(error);
gpr_log(GPR_DEBUG, "write complete on %p: error=%s", tcp, str);
}
@@ -238,7 +239,7 @@ static void uv_endpoint_write(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep,
grpc_slice *slice;
uv_write_t *write_req;
- if (grpc_tcp_trace) {
+ if (GRPC_TRACER_ON(grpc_tcp_trace)) {
size_t j;
for (j = 0; j < write_slices->count; j++) {
@@ -311,6 +312,7 @@ static void uv_endpoint_shutdown(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep,
tcp->shutting_down = true;
uv_shutdown_t *req = &tcp->shutdown_req;
uv_shutdown(req, (uv_stream_t *)tcp->handle, shutdown_callback);
+ grpc_resource_user_shutdown(exec_ctx, tcp->resource_user);
}
GRPC_ERROR_UNREF(why);
}
@@ -346,7 +348,7 @@ grpc_endpoint *grpc_tcp_create(uv_tcp_t *handle,
char *peer_string) {
grpc_tcp *tcp = (grpc_tcp *)gpr_malloc(sizeof(grpc_tcp));
- if (grpc_tcp_trace) {
+ if (GRPC_TRACER_ON(grpc_tcp_trace)) {
gpr_log(GPR_DEBUG, "Creating TCP endpoint %p", tcp);
}
diff --git a/src/core/lib/iomgr/tcp_uv.h b/src/core/lib/iomgr/tcp_uv.h
index 970fcafe4a..106bec5eca 100644
--- a/src/core/lib/iomgr/tcp_uv.h
+++ b/src/core/lib/iomgr/tcp_uv.h
@@ -44,11 +44,12 @@
otherwise specified.
*/
+#include "src/core/lib/debug/trace.h"
#include "src/core/lib/iomgr/endpoint.h"
#include <uv.h>
-extern int grpc_tcp_trace;
+extern grpc_tracer_flag grpc_tcp_trace;
#define GRPC_TCP_DEFAULT_READ_SLICE_SIZE 8192
diff --git a/src/core/lib/iomgr/timer_generic.c b/src/core/lib/iomgr/timer_generic.c
index d8e6068431..b28340b71c 100644
--- a/src/core/lib/iomgr/timer_generic.c
+++ b/src/core/lib/iomgr/timer_generic.c
@@ -56,8 +56,8 @@
#define MIN_QUEUE_WINDOW_DURATION 0.01
#define MAX_QUEUE_WINDOW_DURATION 1
-int grpc_timer_trace = 0;
-int grpc_timer_check_trace = 0;
+grpc_tracer_flag grpc_timer_trace = GRPC_TRACER_INITIALIZER(false);
+grpc_tracer_flag grpc_timer_check_trace = GRPC_TRACER_INITIALIZER(false);
typedef struct {
gpr_mu mu;
@@ -232,14 +232,13 @@ void grpc_timer_init(grpc_exec_ctx *exec_ctx, grpc_timer *timer,
GPR_ASSERT(deadline.clock_type == g_clock_type);
GPR_ASSERT(now.clock_type == g_clock_type);
timer->closure = closure;
- timer->deadline = timespec_to_atm_round_up(deadline);
+ gpr_atm deadline_atm = timer->deadline = timespec_to_atm_round_up(deadline);
- if (grpc_timer_trace) {
+ if (GRPC_TRACER_ON(grpc_timer_trace)) {
gpr_log(GPR_DEBUG, "TIMER %p: SET %" PRId64 ".%09d [%" PRIdPTR
"] now %" PRId64 ".%09d [%" PRIdPTR "] call %p[%p]",
- timer, deadline.tv_sec, deadline.tv_nsec, timer->deadline,
- now.tv_sec, now.tv_nsec, timespec_to_atm_round_down(now), closure,
- closure->cb);
+ timer, deadline.tv_sec, deadline.tv_nsec, deadline_atm, now.tv_sec,
+ now.tv_nsec, timespec_to_atm_round_down(now), closure, closure->cb);
}
if (!g_shared_mutables.initialized) {
@@ -262,13 +261,13 @@ void grpc_timer_init(grpc_exec_ctx *exec_ctx, grpc_timer *timer,
grpc_time_averaged_stats_add_sample(&shard->stats,
ts_to_dbl(gpr_time_sub(deadline, now)));
- if (timer->deadline < shard->queue_deadline_cap) {
+ if (deadline_atm < shard->queue_deadline_cap) {
is_first_timer = grpc_timer_heap_add(&shard->heap, timer);
} else {
timer->heap_index = INVALID_HEAP_INDEX;
list_join(&shard->list, timer);
}
- if (grpc_timer_trace) {
+ if (GRPC_TRACER_ON(grpc_timer_trace)) {
gpr_log(GPR_DEBUG, " .. add to shard %d with queue_deadline_cap=%" PRIdPTR
" => is_first_timer=%s",
(int)(shard - g_shards), shard->queue_deadline_cap,
@@ -289,16 +288,16 @@ void grpc_timer_init(grpc_exec_ctx *exec_ctx, grpc_timer *timer,
grpc_timer_check. */
if (is_first_timer) {
gpr_mu_lock(&g_shared_mutables.mu);
- if (grpc_timer_trace) {
+ if (GRPC_TRACER_ON(grpc_timer_trace)) {
gpr_log(GPR_DEBUG, " .. old shard min_deadline=%" PRIdPTR,
shard->min_deadline);
}
- if (timer->deadline < shard->min_deadline) {
+ if (deadline_atm < shard->min_deadline) {
gpr_atm old_min_deadline = g_shard_queue[0]->min_deadline;
- shard->min_deadline = timer->deadline;
+ shard->min_deadline = deadline_atm;
note_deadline_change(shard);
- if (shard->shard_queue_index == 0 && timer->deadline < old_min_deadline) {
- gpr_atm_no_barrier_store(&g_shared_mutables.min_timer, timer->deadline);
+ if (shard->shard_queue_index == 0 && deadline_atm < old_min_deadline) {
+ gpr_atm_no_barrier_store(&g_shared_mutables.min_timer, deadline_atm);
grpc_kick_poller();
}
}
@@ -319,7 +318,7 @@ void grpc_timer_cancel(grpc_exec_ctx *exec_ctx, grpc_timer *timer) {
shard_type *shard = &g_shards[GPR_HASH_POINTER(timer, NUM_SHARDS)];
gpr_mu_lock(&shard->mu);
- if (grpc_timer_trace) {
+ if (GRPC_TRACER_ON(grpc_timer_trace)) {
gpr_log(GPR_DEBUG, "TIMER %p: CANCEL pending=%s", timer,
timer->pending ? "true" : "false");
}
@@ -355,7 +354,7 @@ static int refill_queue(shard_type *shard, gpr_atm now) {
saturating_add(GPR_MAX(now, shard->queue_deadline_cap),
(gpr_atm)(deadline_delta * 1000.0));
- if (grpc_timer_check_trace) {
+ if (GRPC_TRACER_ON(grpc_timer_check_trace)) {
gpr_log(GPR_DEBUG, " .. shard[%d]->queue_deadline_cap --> %" PRIdPTR,
(int)(shard - g_shards), shard->queue_deadline_cap);
}
@@ -363,7 +362,7 @@ static int refill_queue(shard_type *shard, gpr_atm now) {
next = timer->next;
if (timer->deadline < shard->queue_deadline_cap) {
- if (grpc_timer_check_trace) {
+ if (GRPC_TRACER_ON(grpc_timer_check_trace)) {
gpr_log(GPR_DEBUG, " .. add timer with deadline %" PRIdPTR " to heap",
timer->deadline);
}
@@ -380,7 +379,7 @@ static int refill_queue(shard_type *shard, gpr_atm now) {
static grpc_timer *pop_one(shard_type *shard, gpr_atm now) {
grpc_timer *timer;
for (;;) {
- if (grpc_timer_check_trace) {
+ if (GRPC_TRACER_ON(grpc_timer_check_trace)) {
gpr_log(GPR_DEBUG, " .. shard[%d]: heap_empty=%s",
(int)(shard - g_shards),
grpc_timer_heap_is_empty(&shard->heap) ? "true" : "false");
@@ -390,13 +389,13 @@ static grpc_timer *pop_one(shard_type *shard, gpr_atm now) {
if (!refill_queue(shard, now)) return NULL;
}
timer = grpc_timer_heap_top(&shard->heap);
- if (grpc_timer_check_trace) {
+ if (GRPC_TRACER_ON(grpc_timer_check_trace)) {
gpr_log(GPR_DEBUG,
" .. check top timer deadline=%" PRIdPTR " now=%" PRIdPTR,
timer->deadline, now);
}
if (timer->deadline > now) return NULL;
- if (grpc_timer_trace) {
+ if (GRPC_TRACER_ON(grpc_timer_trace)) {
gpr_log(GPR_DEBUG, "TIMER %p: FIRE %" PRIdPTR "ms late", timer,
now - timer->deadline);
}
@@ -436,7 +435,7 @@ static int run_some_expired_timers(grpc_exec_ctx *exec_ctx, gpr_atm now,
if (gpr_spinlock_trylock(&g_shared_mutables.checker_mu)) {
gpr_mu_lock(&g_shared_mutables.mu);
- if (grpc_timer_check_trace) {
+ if (GRPC_TRACER_ON(grpc_timer_check_trace)) {
gpr_log(GPR_DEBUG, " .. shard[%d]->min_deadline = %" PRIdPTR,
(int)(g_shard_queue[0] - g_shards),
g_shard_queue[0]->min_deadline);
@@ -452,7 +451,7 @@ static int run_some_expired_timers(grpc_exec_ctx *exec_ctx, gpr_atm now,
n +=
pop_timers(exec_ctx, g_shard_queue[0], now, &new_min_deadline, error);
- if (grpc_timer_check_trace) {
+ if (GRPC_TRACER_ON(grpc_timer_check_trace)) {
gpr_log(GPR_DEBUG, " .. popped --> %" PRIdPTR
", shard[%d]->min_deadline %" PRIdPTR
" --> %" PRIdPTR ", now=%" PRIdPTR,
@@ -509,7 +508,7 @@ bool grpc_timer_check(grpc_exec_ctx *exec_ctx, gpr_timespec now,
*next =
atm_to_timespec(GPR_MIN(timespec_to_atm_round_up(*next), min_timer));
}
- if (grpc_timer_check_trace) {
+ if (GRPC_TRACER_ON(grpc_timer_check_trace)) {
gpr_log(GPR_DEBUG,
"TIMER CHECK SKIP: now_atm=%" PRIdPTR " min_timer=%" PRIdPTR,
now_atm, min_timer);
@@ -523,7 +522,7 @@ bool grpc_timer_check(grpc_exec_ctx *exec_ctx, gpr_timespec now,
: GRPC_ERROR_CREATE_FROM_STATIC_STRING("Shutting down timer system");
// tracing
- if (grpc_timer_check_trace) {
+ if (GRPC_TRACER_ON(grpc_timer_check_trace)) {
char *next_str;
if (next == NULL) {
next_str = gpr_strdup("NULL");
@@ -549,7 +548,7 @@ bool grpc_timer_check(grpc_exec_ctx *exec_ctx, gpr_timespec now,
*next = atm_to_timespec(next_atm);
}
// tracing
- if (grpc_timer_check_trace) {
+ if (GRPC_TRACER_ON(grpc_timer_check_trace)) {
char *next_str;
if (next == NULL) {
next_str = gpr_strdup("NULL");
diff --git a/src/core/lib/iomgr/timer_manager.c b/src/core/lib/iomgr/timer_manager.c
new file mode 100644
index 0000000000..24085093e7
--- /dev/null
+++ b/src/core/lib/iomgr/timer_manager.c
@@ -0,0 +1,276 @@
+/*
+ *
+ * Copyright 2017, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "src/core/lib/iomgr/timer_manager.h"
+
+#include <grpc/support/alloc.h>
+#include <grpc/support/log.h>
+#include <grpc/support/thd.h>
+
+#include "src/core/lib/debug/trace.h"
+#include "src/core/lib/iomgr/timer.h"
+
+typedef struct completed_thread {
+ gpr_thd_id t;
+ struct completed_thread *next;
+} completed_thread;
+
+extern grpc_tracer_flag grpc_timer_check_trace;
+
+// global mutex
+static gpr_mu g_mu;
+// are we multi-threaded
+static bool g_threaded;
+// cv to wait until a thread is needed
+static gpr_cv g_cv_wait;
+// cv for notification when threading ends
+static gpr_cv g_cv_shutdown;
+// number of threads in the system
+static int g_thread_count;
+// number of threads sitting around waiting
+static int g_waiter_count;
+// linked list of threads that have completed (and need joining)
+static completed_thread *g_completed_threads;
+// was the manager kicked by the timer system
+static bool g_kicked;
+// is there a thread waiting until the next timer should fire?
+static bool g_has_timed_waiter;
+// generation counter to track which thread is waiting for the next timer
+static uint64_t g_timed_waiter_generation;
+
+static void timer_thread(void *unused);
+
+static void gc_completed_threads(void) {
+ if (g_completed_threads != NULL) {
+ completed_thread *to_gc = g_completed_threads;
+ g_completed_threads = NULL;
+ gpr_mu_unlock(&g_mu);
+ while (to_gc != NULL) {
+ gpr_thd_join(to_gc->t);
+ completed_thread *next = to_gc->next;
+ gpr_free(to_gc);
+ to_gc = next;
+ }
+ gpr_mu_lock(&g_mu);
+ }
+}
+
+static void start_timer_thread_and_unlock(void) {
+ GPR_ASSERT(g_threaded);
+ ++g_waiter_count;
+ ++g_thread_count;
+ gpr_mu_unlock(&g_mu);
+ if (GRPC_TRACER_ON(grpc_timer_check_trace)) {
+ gpr_log(GPR_DEBUG, "Spawn timer thread");
+ }
+ gpr_thd_id thd;
+ gpr_thd_options opt = gpr_thd_options_default();
+ gpr_thd_options_set_joinable(&opt);
+ gpr_thd_new(&thd, timer_thread, NULL, &opt);
+}
+
+void grpc_timer_manager_tick() {
+ grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
+ gpr_timespec next = gpr_inf_future(GPR_CLOCK_MONOTONIC);
+ gpr_timespec now = gpr_now(GPR_CLOCK_MONOTONIC);
+ grpc_timer_check(&exec_ctx, now, &next);
+ grpc_exec_ctx_finish(&exec_ctx);
+}
+
+static void timer_thread(void *unused) {
+ // this threads exec_ctx: we try to run things through to completion here
+ // since it's easy to spin up new threads
+ grpc_exec_ctx exec_ctx =
+ GRPC_EXEC_CTX_INITIALIZER(0, grpc_never_ready_to_finish, NULL);
+ const gpr_timespec inf_future = gpr_inf_future(GPR_CLOCK_MONOTONIC);
+ for (;;) {
+ gpr_timespec next = inf_future;
+ gpr_timespec now = gpr_now(GPR_CLOCK_MONOTONIC);
+ // check timer state, updates next to the next time to run a check
+ if (grpc_timer_check(&exec_ctx, now, &next)) {
+ // if there's something to execute...
+ gpr_mu_lock(&g_mu);
+ // remove a waiter from the pool, and start another thread if necessary
+ --g_waiter_count;
+ if (g_waiter_count == 0 && g_threaded) {
+ start_timer_thread_and_unlock();
+ } else {
+ // if there's no thread waiting with a timeout, kick an existing waiter
+ // so that the next deadline is not missed
+ if (!g_has_timed_waiter) {
+ if (GRPC_TRACER_ON(grpc_timer_check_trace)) {
+ gpr_log(GPR_DEBUG, "kick untimed waiter");
+ }
+ gpr_cv_signal(&g_cv_wait);
+ }
+ gpr_mu_unlock(&g_mu);
+ }
+ // without our lock, flush the exec_ctx
+ grpc_exec_ctx_flush(&exec_ctx);
+ gpr_mu_lock(&g_mu);
+ // garbage collect any threads hanging out that are dead
+ gc_completed_threads();
+ // get ready to wait again
+ ++g_waiter_count;
+ gpr_mu_unlock(&g_mu);
+ } else {
+ gpr_mu_lock(&g_mu);
+ // if we're not threaded anymore, leave
+ if (!g_threaded) break;
+ // if there's no timed waiter, we should become one: that waiter waits
+ // only until the next timer should expire
+ // all other timers wait forever
+ uint64_t my_timed_waiter_generation = g_timed_waiter_generation - 1;
+ if (!g_has_timed_waiter) {
+ g_has_timed_waiter = true;
+ // we use a generation counter to track the timed waiter so we can
+ // cancel an existing one quickly (and when it actually times out it'll
+ // figure stuff out instead of incurring a wakeup)
+ my_timed_waiter_generation = ++g_timed_waiter_generation;
+ if (GRPC_TRACER_ON(grpc_timer_check_trace)) {
+ gpr_log(GPR_DEBUG, "sleep for a while");
+ }
+ } else {
+ next = inf_future;
+ if (GRPC_TRACER_ON(grpc_timer_check_trace)) {
+ gpr_log(GPR_DEBUG, "sleep until kicked");
+ }
+ }
+ gpr_cv_wait(&g_cv_wait, &g_mu, next);
+ if (GRPC_TRACER_ON(grpc_timer_check_trace)) {
+ gpr_log(GPR_DEBUG, "wait ended: was_timed:%d kicked:%d",
+ my_timed_waiter_generation == g_timed_waiter_generation,
+ g_kicked);
+ }
+ // if this was the timed waiter, then we need to check timers, and flag
+ // that there's now no timed waiter... we'll look for a replacement if
+ // there's work to do after checking timers (code above)
+ if (my_timed_waiter_generation == g_timed_waiter_generation) {
+ g_has_timed_waiter = false;
+ }
+ // if this was a kick from the timer system, consume it (and don't stop
+ // this thread yet)
+ if (g_kicked) {
+ grpc_timer_consume_kick();
+ g_kicked = false;
+ }
+ gpr_mu_unlock(&g_mu);
+ }
+ }
+ // terminate the thread: drop the waiter count, thread count, and let whomever
+ // stopped the threading stuff know that we're done
+ --g_waiter_count;
+ --g_thread_count;
+ if (0 == g_thread_count) {
+ gpr_cv_signal(&g_cv_shutdown);
+ }
+ completed_thread *ct = gpr_malloc(sizeof(*ct));
+ ct->t = gpr_thd_currentid();
+ ct->next = g_completed_threads;
+ g_completed_threads = ct;
+ gpr_mu_unlock(&g_mu);
+ grpc_exec_ctx_finish(&exec_ctx);
+ if (GRPC_TRACER_ON(grpc_timer_check_trace)) {
+ gpr_log(GPR_DEBUG, "End timer thread");
+ }
+}
+
+static void start_threads(void) {
+ gpr_mu_lock(&g_mu);
+ if (!g_threaded) {
+ g_threaded = true;
+ start_timer_thread_and_unlock();
+ } else {
+ g_threaded = false;
+ gpr_mu_unlock(&g_mu);
+ }
+}
+
+void grpc_timer_manager_init(void) {
+ gpr_mu_init(&g_mu);
+ gpr_cv_init(&g_cv_wait);
+ gpr_cv_init(&g_cv_shutdown);
+ g_threaded = false;
+ g_thread_count = 0;
+ g_waiter_count = 0;
+ g_completed_threads = NULL;
+
+ start_threads();
+}
+
+static void stop_threads(void) {
+ gpr_mu_lock(&g_mu);
+ if (GRPC_TRACER_ON(grpc_timer_check_trace)) {
+ gpr_log(GPR_DEBUG, "stop timer threads: threaded=%d", g_threaded);
+ }
+ if (g_threaded) {
+ g_threaded = false;
+ gpr_cv_broadcast(&g_cv_wait);
+ if (GRPC_TRACER_ON(grpc_timer_check_trace)) {
+ gpr_log(GPR_DEBUG, "num timer threads: %d", g_thread_count);
+ }
+ while (g_thread_count > 0) {
+ gpr_cv_wait(&g_cv_shutdown, &g_mu, gpr_inf_future(GPR_CLOCK_REALTIME));
+ if (GRPC_TRACER_ON(grpc_timer_check_trace)) {
+ gpr_log(GPR_DEBUG, "num timer threads: %d", g_thread_count);
+ }
+ gc_completed_threads();
+ }
+ }
+ gpr_mu_unlock(&g_mu);
+}
+
+void grpc_timer_manager_shutdown(void) {
+ stop_threads();
+
+ gpr_mu_destroy(&g_mu);
+ gpr_cv_destroy(&g_cv_wait);
+ gpr_cv_destroy(&g_cv_shutdown);
+}
+
+void grpc_timer_manager_set_threading(bool threaded) {
+ if (threaded) {
+ start_threads();
+ } else {
+ stop_threads();
+ }
+}
+
+void grpc_kick_poller(void) {
+ gpr_mu_lock(&g_mu);
+ g_kicked = true;
+ g_has_timed_waiter = false;
+ ++g_timed_waiter_generation;
+ gpr_cv_signal(&g_cv_wait);
+ gpr_mu_unlock(&g_mu);
+}
diff --git a/src/core/lib/iomgr/timer_manager.h b/src/core/lib/iomgr/timer_manager.h
new file mode 100644
index 0000000000..46729ccea6
--- /dev/null
+++ b/src/core/lib/iomgr/timer_manager.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * Copyright 2017, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef GRPC_CORE_LIB_IOMGR_TIMER_MANAGER_H
+#define GRPC_CORE_LIB_IOMGR_TIMER_MANAGER_H
+
+#include <stdbool.h>
+
+/* Timer Manager tries to keep one thread waiting for the next timeout at all
+ times */
+
+void grpc_timer_manager_init(void);
+void grpc_timer_manager_shutdown(void);
+
+/* enable/disable threading - must be called after grpc_timer_manager_init and
+ * before grpc_timer_manager_shutdown */
+void grpc_timer_manager_set_threading(bool enabled);
+/* explicitly perform one tick of the timer system - for when threading is
+ * disabled */
+void grpc_timer_manager_tick(void);
+
+#endif /* GRPC_CORE_LIB_IOMGR_TIMER_MANAGER_H */
diff --git a/src/core/lib/iomgr/timer_uv.c b/src/core/lib/iomgr/timer_uv.c
index 8e8a07578c..2952e44b58 100644
--- a/src/core/lib/iomgr/timer_uv.c
+++ b/src/core/lib/iomgr/timer_uv.c
@@ -38,10 +38,14 @@
#include <grpc/support/alloc.h>
#include <grpc/support/log.h>
+#include "src/core/lib/debug/trace.h"
#include "src/core/lib/iomgr/timer.h"
#include <uv.h>
+grpc_tracer_flag grpc_timer_trace = GRPC_TRACER_INITIALIZER(false);
+grpc_tracer_flag grpc_timer_check_trace = GRPC_TRACER_INITIALIZER(false);
+
static void timer_close_callback(uv_handle_t *handle) { gpr_free(handle); }
static void stop_uv_timer(uv_timer_t *handle) {
@@ -100,4 +104,6 @@ bool grpc_timer_check(grpc_exec_ctx *exec_ctx, gpr_timespec now,
void grpc_timer_list_init(gpr_timespec now) {}
void grpc_timer_list_shutdown(grpc_exec_ctx *exec_ctx) {}
+void grpc_timer_consume_kick(void) {}
+
#endif /* GRPC_UV */
diff --git a/src/core/lib/security/credentials/google_default/google_default_credentials.c b/src/core/lib/security/credentials/google_default/google_default_credentials.c
index 97501e6788..4d8c451ea8 100644
--- a/src/core/lib/security/credentials/google_default/google_default_credentials.c
+++ b/src/core/lib/security/credentials/google_default/google_default_credentials.c
@@ -99,7 +99,7 @@ static void on_compute_engine_detection_http_response(grpc_exec_ctx *exec_ctx,
}
static void destroy_pollset(grpc_exec_ctx *exec_ctx, void *p, grpc_error *e) {
- grpc_pollset_destroy(p);
+ grpc_pollset_destroy(exec_ctx, p);
}
static int is_stack_running_on_compute_engine(grpc_exec_ctx *exec_ctx) {
diff --git a/src/core/lib/security/credentials/jwt/jwt_credentials.c b/src/core/lib/security/credentials/jwt/jwt_credentials.c
index 178ce89aa6..0e7c1afb02 100644
--- a/src/core/lib/security/credentials/jwt/jwt_credentials.c
+++ b/src/core/lib/security/credentials/jwt/jwt_credentials.c
@@ -171,7 +171,7 @@ static char *redact_private_key(const char *json_key) {
grpc_call_credentials *grpc_service_account_jwt_access_credentials_create(
const char *json_key, gpr_timespec token_lifetime, void *reserved) {
- if (grpc_api_trace) {
+ if (GRPC_TRACER_ON(grpc_api_trace)) {
char *clean_json = redact_private_key(json_key);
gpr_log(GPR_INFO,
"grpc_service_account_jwt_access_credentials_create("
diff --git a/src/core/lib/security/credentials/oauth2/oauth2_credentials.c b/src/core/lib/security/credentials/oauth2/oauth2_credentials.c
index ccfb3566c1..29235b6eb3 100644
--- a/src/core/lib/security/credentials/oauth2/oauth2_credentials.c
+++ b/src/core/lib/security/credentials/oauth2/oauth2_credentials.c
@@ -412,7 +412,7 @@ grpc_call_credentials *grpc_google_refresh_token_credentials_create(
const char *json_refresh_token, void *reserved) {
grpc_auth_refresh_token token =
grpc_auth_refresh_token_create_from_string(json_refresh_token);
- if (grpc_api_trace) {
+ if (GRPC_TRACER_ON(grpc_api_trace)) {
char *loggable_token = create_loggable_refresh_token(&token);
gpr_log(GPR_INFO,
"grpc_refresh_token_credentials_create(json_refresh_token=%s, "
diff --git a/src/core/lib/security/transport/client_auth_filter.c b/src/core/lib/security/transport/client_auth_filter.c
index 1f0daf7325..dff05633ec 100644
--- a/src/core/lib/security/transport/client_auth_filter.c
+++ b/src/core/lib/security/transport/client_auth_filter.c
@@ -253,7 +253,7 @@ static void auth_start_transport_op(grpc_exec_ctx *exec_ctx,
grpc_linked_mdelem *l;
grpc_client_security_context *sec_ctx = NULL;
- if (calld->security_context_set == 0 && !op->cancel_stream) {
+ if (!op->cancel_stream && calld->security_context_set == 0) {
calld->security_context_set = 1;
GPR_ASSERT(op->payload->context != NULL);
if (op->payload->context[GRPC_CONTEXT_SECURITY].value == NULL) {
diff --git a/src/core/lib/security/transport/secure_endpoint.c b/src/core/lib/security/transport/secure_endpoint.c
index 0d5c7432c6..48d368a2a7 100644
--- a/src/core/lib/security/transport/secure_endpoint.c
+++ b/src/core/lib/security/transport/secure_endpoint.c
@@ -75,7 +75,7 @@ typedef struct {
gpr_refcount ref;
} secure_endpoint;
-int grpc_trace_secure_endpoint = 0;
+grpc_tracer_flag grpc_trace_secure_endpoint = GRPC_TRACER_INITIALIZER(false);
static void destroy(grpc_exec_ctx *exec_ctx, secure_endpoint *secure_ep) {
secure_endpoint *ep = secure_ep;
@@ -137,7 +137,7 @@ static void flush_read_staging_buffer(secure_endpoint *ep, uint8_t **cur,
static void call_read_cb(grpc_exec_ctx *exec_ctx, secure_endpoint *ep,
grpc_error *error) {
- if (grpc_trace_secure_endpoint) {
+ if (GRPC_TRACER_ON(grpc_trace_secure_endpoint)) {
size_t i;
for (i = 0; i < ep->read_buffer->count; i++) {
char *data = grpc_dump_slice(ep->read_buffer->slices[i],
@@ -269,7 +269,7 @@ static void endpoint_write(grpc_exec_ctx *exec_ctx, grpc_endpoint *secure_ep,
grpc_slice_buffer_reset_and_unref_internal(exec_ctx, &ep->output_buffer);
- if (grpc_trace_secure_endpoint) {
+ if (GRPC_TRACER_ON(grpc_trace_secure_endpoint)) {
for (i = 0; i < slices->count; i++) {
char *data =
grpc_dump_slice(slices->slices[i], GPR_DUMP_HEX | GPR_DUMP_ASCII);
diff --git a/src/core/lib/security/transport/secure_endpoint.h b/src/core/lib/security/transport/secure_endpoint.h
index a61f40a4fa..f1a5c8cb6d 100644
--- a/src/core/lib/security/transport/secure_endpoint.h
+++ b/src/core/lib/security/transport/secure_endpoint.h
@@ -39,7 +39,7 @@
struct tsi_frame_protector;
-extern int grpc_trace_secure_endpoint;
+extern grpc_tracer_flag grpc_trace_secure_endpoint;
/* Takes ownership of protector and to_wrap, and refs leftover_slices. */
grpc_endpoint *grpc_secure_endpoint_create(
diff --git a/src/core/lib/support/cmdline.c b/src/core/lib/support/cmdline.c
index 88a65a8e2e..e5c9f3b84b 100644
--- a/src/core/lib/support/cmdline.c
+++ b/src/core/lib/support/cmdline.c
@@ -71,7 +71,7 @@ struct gpr_cmdline {
static int normal_state(gpr_cmdline *cl, char *arg);
gpr_cmdline *gpr_cmdline_create(const char *description) {
- gpr_cmdline *cl = gpr_zalloc(sizeof(gpr_cmdline));
+ gpr_cmdline *cl = (gpr_cmdline *)gpr_zalloc(sizeof(gpr_cmdline));
cl->description = description;
cl->state = normal_state;
@@ -100,7 +100,7 @@ static void add_arg(gpr_cmdline *cl, const char *name, const char *help,
GPR_ASSERT(0 != strcmp(a->name, name));
}
- a = gpr_zalloc(sizeof(arg));
+ a = (arg *)gpr_zalloc(sizeof(arg));
a->name = name;
a->help = help;
a->type = type;
@@ -302,7 +302,7 @@ static int normal_state(gpr_cmdline *cl, char *str) {
eq = strchr(str, '=');
if (eq != NULL) {
/* copy the string into a temp buffer and extract the name */
- tmp = arg_name = gpr_malloc((size_t)(eq - str + 1));
+ tmp = arg_name = (char *)gpr_malloc((size_t)(eq - str + 1));
memcpy(arg_name, str, (size_t)(eq - str));
arg_name[eq - str] = 0;
} else {
diff --git a/src/core/lib/support/cpu_linux.c b/src/core/lib/support/cpu_linux.c
index 1e50f59823..b826dde160 100644
--- a/src/core/lib/support/cpu_linux.c
+++ b/src/core/lib/support/cpu_linux.c
@@ -67,16 +67,16 @@ unsigned gpr_cpu_num_cores(void) {
}
unsigned gpr_cpu_current_cpu(void) {
-#ifdef __GLIBC__
+#ifdef GPR_MUSL_LIBC_COMPAT
+ // sched_getcpu() is undefined on musl
+ return 0;
+#else
int cpu = sched_getcpu();
if (cpu < 0) {
gpr_log(GPR_ERROR, "Error determining current CPU: %s\n", strerror(errno));
return 0;
}
return (unsigned)cpu;
-#else
- // sched_getcpu() is undefined on musl
- return 0;
#endif
}
diff --git a/src/core/lib/support/histogram.c b/src/core/lib/support/histogram.c
index ba8176bb05..c88695409d 100644
--- a/src/core/lib/support/histogram.c
+++ b/src/core/lib/support/histogram.c
@@ -88,7 +88,7 @@ static double bucket_start(gpr_histogram *h, double x) {
gpr_histogram *gpr_histogram_create(double resolution,
double max_bucket_start) {
- gpr_histogram *h = gpr_malloc(sizeof(gpr_histogram));
+ gpr_histogram *h = (gpr_histogram *)gpr_malloc(sizeof(gpr_histogram));
GPR_ASSERT(resolution > 0.0);
GPR_ASSERT(max_bucket_start > resolution);
h->sum = 0.0;
@@ -102,7 +102,7 @@ gpr_histogram *gpr_histogram_create(double resolution,
h->num_buckets = bucket_for_unchecked(h, max_bucket_start) + 1;
GPR_ASSERT(h->num_buckets > 1);
GPR_ASSERT(h->num_buckets < 100000000);
- h->buckets = gpr_zalloc(sizeof(uint32_t) * h->num_buckets);
+ h->buckets = (uint32_t *)gpr_zalloc(sizeof(uint32_t) * h->num_buckets);
return h;
}
diff --git a/src/core/lib/support/host_port.c b/src/core/lib/support/host_port.c
index f19bdbc835..bbd42c26e0 100644
--- a/src/core/lib/support/host_port.c
+++ b/src/core/lib/support/host_port.c
@@ -98,7 +98,7 @@ int gpr_split_host_port(const char *name, char **host, char **port) {
}
/* Allocate return values. */
- *host = gpr_malloc(host_len + 1);
+ *host = (char *)gpr_malloc(host_len + 1);
memcpy(*host, host_start, host_len);
(*host)[host_len] = '\0';
diff --git a/src/core/lib/support/mpscq.c b/src/core/lib/support/mpscq.c
index 5b9323275a..822abd075d 100644
--- a/src/core/lib/support/mpscq.c
+++ b/src/core/lib/support/mpscq.c
@@ -46,29 +46,40 @@ void gpr_mpscq_destroy(gpr_mpscq *q) {
GPR_ASSERT(q->tail == &q->stub);
}
-void gpr_mpscq_push(gpr_mpscq *q, gpr_mpscq_node *n) {
+bool gpr_mpscq_push(gpr_mpscq *q, gpr_mpscq_node *n) {
gpr_atm_no_barrier_store(&n->next, (gpr_atm)NULL);
gpr_mpscq_node *prev =
(gpr_mpscq_node *)gpr_atm_full_xchg(&q->head, (gpr_atm)n);
gpr_atm_rel_store(&prev->next, (gpr_atm)n);
+ return prev == &q->stub;
}
gpr_mpscq_node *gpr_mpscq_pop(gpr_mpscq *q) {
+ bool empty;
+ return gpr_mpscq_pop_and_check_end(q, &empty);
+}
+
+gpr_mpscq_node *gpr_mpscq_pop_and_check_end(gpr_mpscq *q, bool *empty) {
gpr_mpscq_node *tail = q->tail;
gpr_mpscq_node *next = (gpr_mpscq_node *)gpr_atm_acq_load(&tail->next);
if (tail == &q->stub) {
// indicates the list is actually (ephemerally) empty
- if (next == NULL) return NULL;
+ if (next == NULL) {
+ *empty = true;
+ return NULL;
+ }
q->tail = next;
tail = next;
next = (gpr_mpscq_node *)gpr_atm_acq_load(&tail->next);
}
if (next != NULL) {
+ *empty = false;
q->tail = next;
return tail;
}
gpr_mpscq_node *head = (gpr_mpscq_node *)gpr_atm_acq_load(&q->head);
if (tail != head) {
+ *empty = false;
// indicates a retry is in order: we're still adding
return NULL;
}
@@ -79,5 +90,28 @@ gpr_mpscq_node *gpr_mpscq_pop(gpr_mpscq *q) {
return tail;
}
// indicates a retry is in order: we're still adding
+ *empty = false;
+ return NULL;
+}
+
+void gpr_locked_mpscq_init(gpr_locked_mpscq *q) {
+ gpr_mpscq_init(&q->queue);
+ q->read_lock = GPR_SPINLOCK_INITIALIZER;
+}
+
+void gpr_locked_mpscq_destroy(gpr_locked_mpscq *q) {
+ gpr_mpscq_destroy(&q->queue);
+}
+
+bool gpr_locked_mpscq_push(gpr_locked_mpscq *q, gpr_mpscq_node *n) {
+ return gpr_mpscq_push(&q->queue, n);
+}
+
+gpr_mpscq_node *gpr_locked_mpscq_pop(gpr_locked_mpscq *q) {
+ if (gpr_spinlock_trylock(&q->read_lock)) {
+ gpr_mpscq_node *n = gpr_mpscq_pop(&q->queue);
+ gpr_spinlock_unlock(&q->read_lock);
+ return n;
+ }
return NULL;
}
diff --git a/src/core/lib/support/mpscq.h b/src/core/lib/support/mpscq.h
index 977a117952..b3a171678a 100644
--- a/src/core/lib/support/mpscq.h
+++ b/src/core/lib/support/mpscq.h
@@ -35,7 +35,9 @@
#define GRPC_CORE_LIB_SUPPORT_MPSCQ_H
#include <grpc/support/atm.h>
+#include <stdbool.h>
#include <stddef.h>
+#include "src/core/lib/support/spinlock.h"
// Multiple-producer single-consumer lock free queue, based upon the
// implementation from Dmitry Vyukov here:
@@ -57,9 +59,34 @@ typedef struct gpr_mpscq {
void gpr_mpscq_init(gpr_mpscq *q);
void gpr_mpscq_destroy(gpr_mpscq *q);
// Push a node
-void gpr_mpscq_push(gpr_mpscq *q, gpr_mpscq_node *n);
+// Thread safe - can be called from multiple threads concurrently
+// Returns true if this was possibly the first node (may return true
+// sporadically, will not return false sporadically)
+bool gpr_mpscq_push(gpr_mpscq *q, gpr_mpscq_node *n);
// Pop a node (returns NULL if no node is ready - which doesn't indicate that
// the queue is empty!!)
+// Thread compatible - can only be called from one thread at a time
gpr_mpscq_node *gpr_mpscq_pop(gpr_mpscq *q);
+// Pop a node; sets *empty to true if the queue is empty, or false if it is not
+gpr_mpscq_node *gpr_mpscq_pop_and_check_end(gpr_mpscq *q, bool *empty);
+
+// An mpscq with a spinlock: it's safe to pop from multiple threads, but doing
+// only one thread will succeed concurrently
+typedef struct gpr_locked_mpscq {
+ gpr_mpscq queue;
+ gpr_spinlock read_lock;
+} gpr_locked_mpscq;
+
+void gpr_locked_mpscq_init(gpr_locked_mpscq *q);
+void gpr_locked_mpscq_destroy(gpr_locked_mpscq *q);
+// Push a node
+// Thread safe - can be called from multiple threads concurrently
+// Returns true if this was possibly the first node (may return true
+// sporadically, will not return false sporadically)
+bool gpr_locked_mpscq_push(gpr_locked_mpscq *q, gpr_mpscq_node *n);
+// Pop a node (returns NULL if no node is ready - which doesn't indicate that
+// the queue is empty!!)
+// Thread safe - can be called from multiple threads concurrently
+gpr_mpscq_node *gpr_locked_mpscq_pop(gpr_locked_mpscq *q);
#endif /* GRPC_CORE_LIB_SUPPORT_MPSCQ_H */
diff --git a/src/core/lib/support/string.c b/src/core/lib/support/string.c
index d20b86f7cf..11297c9ddb 100644
--- a/src/core/lib/support/string.c
+++ b/src/core/lib/support/string.c
@@ -53,7 +53,7 @@ char *gpr_strdup(const char *src) {
}
len = strlen(src) + 1;
- dst = gpr_malloc(len);
+ dst = (char *)gpr_malloc(len);
memcpy(dst, src, len);
@@ -74,13 +74,13 @@ static dump_out dump_out_create(void) {
static void dump_out_append(dump_out *out, char c) {
if (out->length == out->capacity) {
out->capacity = GPR_MAX(8, 2 * out->capacity);
- out->data = gpr_realloc(out->data, out->capacity);
+ out->data = (char *)gpr_realloc(out->data, out->capacity);
}
out->data[out->length++] = c;
}
static void hexdump(dump_out *out, const char *buf, size_t len) {
- static const char hex[16] = "0123456789abcdef";
+ static const char *hex = "0123456789abcdef";
const uint8_t *const beg = (const uint8_t *)buf;
const uint8_t *const end = beg + len;
@@ -124,16 +124,16 @@ char *gpr_dump(const char *buf, size_t len, uint32_t flags) {
int gpr_parse_bytes_to_uint32(const char *buf, size_t len, uint32_t *result) {
uint32_t out = 0;
- uint32_t new;
+ uint32_t new_val;
size_t i;
if (len == 0) return 0; /* must have some bytes */
for (i = 0; i < len; i++) {
if (buf[i] < '0' || buf[i] > '9') return 0; /* bad char */
- new = 10 * out + (uint32_t)(buf[i] - '0');
- if (new < out) return 0; /* overflow */
- out = new;
+ new_val = 10 * out + (uint32_t)(buf[i] - '0');
+ if (new_val < out) return 0; /* overflow */
+ out = new_val;
}
*result = out;
@@ -201,7 +201,7 @@ int gpr_parse_nonnegative_int(const char *value) {
char *gpr_leftpad(const char *str, char flag, size_t length) {
const size_t str_length = strlen(str);
const size_t out_length = str_length > length ? str_length : length;
- char *out = gpr_malloc(out_length + 1);
+ char *out = (char *)gpr_malloc(out_length + 1);
memset(out, flag, out_length - str_length);
memcpy(out + out_length - str_length, str, str_length);
out[out_length] = 0;
@@ -225,7 +225,7 @@ char *gpr_strjoin_sep(const char **strs, size_t nstrs, const char *sep,
if (nstrs > 0) {
out_length += sep_len * (nstrs - 1); /* separators */
}
- out = gpr_malloc(out_length);
+ out = (char *)gpr_malloc(out_length);
out_length = 0;
for (i = 0; i < nstrs; i++) {
const size_t slen = strlen(strs[i]);
@@ -256,7 +256,7 @@ void gpr_strvec_destroy(gpr_strvec *sv) {
void gpr_strvec_add(gpr_strvec *sv, char *str) {
if (sv->count == sv->capacity) {
sv->capacity = GPR_MAX(sv->capacity + 8, sv->capacity * 2);
- sv->strs = gpr_realloc(sv->strs, sizeof(char *) * sv->capacity);
+ sv->strs = (char **)gpr_realloc(sv->strs, sizeof(char *) * sv->capacity);
}
sv->strs[sv->count++] = str;
}
@@ -278,12 +278,12 @@ int gpr_stricmp(const char *a, const char *b) {
static void add_string_to_split(const char *beg, const char *end, char ***strs,
size_t *nstrs, size_t *capstrs) {
- char *out = gpr_malloc((size_t)(end - beg) + 1);
+ char *out = (char *)gpr_malloc((size_t)(end - beg) + 1);
memcpy(out, beg, (size_t)(end - beg));
out[end - beg] = 0;
if (*nstrs == *capstrs) {
*capstrs = GPR_MAX(8, 2 * *capstrs);
- *strs = gpr_realloc(*strs, sizeof(*strs) * *capstrs);
+ *strs = (char **)gpr_realloc(*strs, sizeof(*strs) * *capstrs);
}
(*strs)[*nstrs] = out;
++*nstrs;
diff --git a/src/core/lib/support/string_posix.c b/src/core/lib/support/string_posix.c
index c804ed5ded..2438b18d21 100644
--- a/src/core/lib/support/string_posix.c
+++ b/src/core/lib/support/string_posix.c
@@ -58,7 +58,7 @@ int gpr_asprintf(char **strp, const char *format, ...) {
/* Allocate a new buffer, with space for the NUL terminator. */
strp_buflen = (size_t)ret + 1;
- if ((*strp = gpr_malloc(strp_buflen)) == NULL) {
+ if ((*strp = (char *)gpr_malloc(strp_buflen)) == NULL) {
/* This shouldn't happen, because gpr_malloc() calls abort(). */
return -1;
}
diff --git a/src/core/lib/support/subprocess_posix.c b/src/core/lib/support/subprocess_posix.c
index ed653b9c2e..b9d0796b01 100644
--- a/src/core/lib/support/subprocess_posix.c
+++ b/src/core/lib/support/subprocess_posix.c
@@ -67,7 +67,7 @@ gpr_subprocess *gpr_subprocess_create(int argc, const char **argv) {
if (pid == -1) {
return NULL;
} else if (pid == 0) {
- exec_args = gpr_malloc(((size_t)argc + 1) * sizeof(char *));
+ exec_args = (char **)gpr_malloc(((size_t)argc + 1) * sizeof(char *));
memcpy(exec_args, argv, (size_t)argc * sizeof(char *));
exec_args[argc] = NULL;
execv(exec_args[0], exec_args);
@@ -76,7 +76,7 @@ gpr_subprocess *gpr_subprocess_create(int argc, const char **argv) {
_exit(1);
return NULL;
} else {
- r = gpr_zalloc(sizeof(gpr_subprocess));
+ r = (gpr_subprocess *)gpr_zalloc(sizeof(gpr_subprocess));
r->pid = pid;
return r;
}
diff --git a/src/core/lib/support/thd_posix.c b/src/core/lib/support/thd_posix.c
index 2fc23bffaf..16e645ad91 100644
--- a/src/core/lib/support/thd_posix.c
+++ b/src/core/lib/support/thd_posix.c
@@ -65,7 +65,7 @@ int gpr_thd_new(gpr_thd_id *t, void (*thd_body)(void *arg), void *arg,
pthread_t p;
/* don't use gpr_malloc as we may cause an infinite recursion with
* the profiling code */
- struct thd_arg *a = malloc(sizeof(*a));
+ struct thd_arg *a = (struct thd_arg *)malloc(sizeof(*a));
GPR_ASSERT(a != NULL);
a->body = thd_body;
a->arg = arg;
diff --git a/src/core/lib/support/wrap_memcpy.c b/src/core/lib/support/wrap_memcpy.c
index 050cc6db5e..deb8d6b198 100644
--- a/src/core/lib/support/wrap_memcpy.c
+++ b/src/core/lib/support/wrap_memcpy.c
@@ -31,6 +31,8 @@
*
*/
+#include <grpc/support/port_platform.h>
+
#include <string.h>
/* Provide a wrapped memcpy for targets that need to be backwards
@@ -40,7 +42,7 @@
*/
#ifdef __linux__
-#if defined(__x86_64__) && defined(__GNU_LIBRARY__)
+#if defined(__x86_64__) && !defined(GPR_MUSL_LIBC_COMPAT)
__asm__(".symver memcpy,memcpy@GLIBC_2.2.5");
void *__wrap_memcpy(void *destination, const void *source, size_t num) {
return memcpy(destination, source, num);
diff --git a/src/core/lib/surface/alarm.c b/src/core/lib/surface/alarm.c
index e71c0ebfc5..b72d534b7e 100644
--- a/src/core/lib/surface/alarm.c
+++ b/src/core/lib/surface/alarm.c
@@ -81,7 +81,9 @@ void grpc_alarm_cancel(grpc_alarm *alarm) {
}
void grpc_alarm_destroy(grpc_alarm *alarm) {
+ grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
grpc_alarm_cancel(alarm);
- GRPC_CQ_INTERNAL_UNREF(alarm->cq, "alarm");
+ GRPC_CQ_INTERNAL_UNREF(&exec_ctx, alarm->cq, "alarm");
gpr_free(alarm);
+ grpc_exec_ctx_finish(&exec_ctx);
}
diff --git a/src/core/lib/surface/api_trace.c b/src/core/lib/surface/api_trace.c
index 79e3e5ca9b..d8941cdf42 100644
--- a/src/core/lib/surface/api_trace.c
+++ b/src/core/lib/surface/api_trace.c
@@ -32,5 +32,6 @@
*/
#include "src/core/lib/surface/api_trace.h"
+#include "src/core/lib/debug/trace.h"
-int grpc_api_trace = 0;
+grpc_tracer_flag grpc_api_trace = GRPC_TRACER_INITIALIZER(false);
diff --git a/src/core/lib/surface/api_trace.h b/src/core/lib/surface/api_trace.h
index c60aaba5e9..d4fbc8d90d 100644
--- a/src/core/lib/surface/api_trace.h
+++ b/src/core/lib/surface/api_trace.h
@@ -37,7 +37,7 @@
#include <grpc/support/log.h>
#include "src/core/lib/debug/trace.h"
-extern int grpc_api_trace;
+extern grpc_tracer_flag grpc_api_trace;
/* Provide unwrapping macros because we're in C89 and variadic macros weren't
introduced until C99... */
@@ -58,7 +58,7 @@ extern int grpc_api_trace;
/* Due to the limitations of C89's preprocessor, the arity of the var-arg list
'nargs' must be specified. */
#define GRPC_API_TRACE(fmt, nargs, args) \
- if (grpc_api_trace) { \
+ if (GRPC_TRACER_ON(grpc_api_trace)) { \
gpr_log(GPR_INFO, fmt GRPC_API_TRACE_UNWRAP##nargs args); \
}
diff --git a/src/core/lib/surface/call.c b/src/core/lib/surface/call.c
index 7525806583..201969cd45 100644
--- a/src/core/lib/surface/call.c
+++ b/src/core/lib/surface/call.c
@@ -244,8 +244,8 @@ struct grpc_call {
void *saved_receiving_stream_ready_bctlp;
};
-int grpc_call_error_trace = 0;
-int grpc_compression_trace = 0;
+grpc_tracer_flag grpc_call_error_trace = GRPC_TRACER_INITIALIZER(false);
+grpc_tracer_flag grpc_compression_trace = GRPC_TRACER_INITIALIZER(false);
#define CALL_STACK_FROM_CALL(call) ((grpc_call_stack *)((call) + 1))
#define CALL_FROM_CALL_STACK(call_stack) (((grpc_call *)(call_stack)) - 1)
@@ -521,7 +521,7 @@ static void destroy_call(grpc_exec_ctx *exec_ctx, void *call,
}
}
if (c->cq) {
- GRPC_CQ_INTERNAL_UNREF(c->cq, "bind");
+ GRPC_CQ_INTERNAL_UNREF(exec_ctx, c->cq, "bind");
}
get_final_status(call, set_status_value_directly, &c->final_info.final_status,
@@ -702,7 +702,7 @@ static void get_final_status(grpc_call *call,
for (i = 0; i < STATUS_SOURCE_COUNT; i++) {
status[i] = unpack_received_status(gpr_atm_acq_load(&call->status[i]));
}
- if (grpc_call_error_trace) {
+ if (GRPC_TRACER_ON(grpc_call_error_trace)) {
gpr_log(GPR_DEBUG, "get_final_status %s", call->is_client ? "CLI" : "SVR");
for (i = 0; i < STATUS_SOURCE_COUNT; i++) {
if (status[i].is_set) {
@@ -1259,7 +1259,7 @@ static void receiving_slice_ready(grpc_exec_ctx *exec_ctx, void *bctlp,
}
if (error != GRPC_ERROR_NONE) {
- if (grpc_trace_operation_failures) {
+ if (GRPC_TRACER_ON(grpc_trace_operation_failures)) {
GRPC_LOG_IF_ERROR("receiving_slice_ready", GRPC_ERROR_REF(error));
}
grpc_byte_stream_destroy(exec_ctx, call->receiving_stream);
@@ -1355,8 +1355,7 @@ static void validate_filtered_metadata(grpc_exec_ctx *exec_ctx,
GPR_ASSERT(call->encodings_accepted_by_peer != 0);
if (!GPR_BITGET(call->encodings_accepted_by_peer,
call->incoming_compression_algorithm)) {
- extern int grpc_compression_trace;
- if (grpc_compression_trace) {
+ if (GRPC_TRACER_ON(grpc_compression_trace)) {
char *algo_name = NULL;
grpc_compression_algorithm_name(call->incoming_compression_algorithm,
&algo_name);
diff --git a/src/core/lib/surface/call.h b/src/core/lib/surface/call.h
index 7d4d0db28d..256a5fa2fe 100644
--- a/src/core/lib/surface/call.h
+++ b/src/core/lib/surface/call.h
@@ -117,7 +117,8 @@ void grpc_call_context_set(grpc_call *call, grpc_context_index elem,
void *grpc_call_context_get(grpc_call *call, grpc_context_index elem);
#define GRPC_CALL_LOG_BATCH(sev, call, ops, nops, tag) \
- if (grpc_api_trace) grpc_call_log_batch(sev, call, ops, nops, tag)
+ if (GRPC_TRACER_ON(grpc_api_trace)) \
+ grpc_call_log_batch(sev, call, ops, nops, tag)
uint8_t grpc_call_is_client(grpc_call *call);
@@ -126,7 +127,8 @@ uint8_t grpc_call_is_client(grpc_call *call);
grpc_compression_algorithm grpc_call_compression_for_level(
grpc_call *call, grpc_compression_level level);
-extern int grpc_call_error_trace;
+extern grpc_tracer_flag grpc_call_error_trace;
+extern grpc_tracer_flag grpc_compression_trace;
#ifdef __cplusplus
}
diff --git a/src/core/lib/surface/completion_queue.c b/src/core/lib/surface/completion_queue.c
index eae3f103b1..de905941c1 100644
--- a/src/core/lib/surface/completion_queue.c
+++ b/src/core/lib/surface/completion_queue.c
@@ -30,7 +30,6 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
-
#include "src/core/lib/surface/completion_queue.h"
#include <stdio.h>
@@ -45,14 +44,15 @@
#include "src/core/lib/iomgr/pollset.h"
#include "src/core/lib/iomgr/timer.h"
#include "src/core/lib/profiling/timers.h"
+#include "src/core/lib/support/spinlock.h"
#include "src/core/lib/support/string.h"
#include "src/core/lib/surface/api_trace.h"
#include "src/core/lib/surface/call.h"
#include "src/core/lib/surface/event_string.h"
-int grpc_trace_operation_failures;
+grpc_tracer_flag grpc_trace_operation_failures = GRPC_TRACER_INITIALIZER(false);
#ifndef NDEBUG
-int grpc_trace_pending_tags;
+grpc_tracer_flag grpc_trace_pending_tags = GRPC_TRACER_INITIALIZER(false);
#endif
typedef struct {
@@ -72,7 +72,7 @@ typedef struct {
gpr_timespec deadline);
void (*shutdown)(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
grpc_closure *closure);
- void (*destroy)(grpc_pollset *pollset);
+ void (*destroy)(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset);
} cq_poller_vtable;
typedef struct non_polling_worker {
@@ -98,7 +98,8 @@ static void non_polling_poller_init(grpc_pollset *pollset, gpr_mu **mu) {
*mu = &npp->mu;
}
-static void non_polling_poller_destroy(grpc_pollset *pollset) {
+static void non_polling_poller_destroy(grpc_exec_ctx *exec_ctx,
+ grpc_pollset *pollset) {
non_polling_poller *npp = (non_polling_poller *)pollset;
gpr_mu_destroy(&npp->mu);
}
@@ -200,33 +201,68 @@ static const cq_poller_vtable g_poller_vtable_by_poller_type[] = {
.destroy = non_polling_poller_destroy},
};
-/* Completion queue structure */
-struct grpc_completion_queue {
- /** owned by pollset */
+typedef struct cq_vtable {
+ grpc_cq_completion_type cq_completion_type;
+ size_t (*size)();
+ void (*begin_op)(grpc_completion_queue *cc, void *tag);
+ void (*end_op)(grpc_exec_ctx *exec_ctx, grpc_completion_queue *cc, void *tag,
+ grpc_error *error,
+ void (*done)(grpc_exec_ctx *exec_ctx, void *done_arg,
+ grpc_cq_completion *storage),
+ void *done_arg, grpc_cq_completion *storage);
+ grpc_event (*next)(grpc_completion_queue *cc, gpr_timespec deadline,
+ void *reserved);
+ grpc_event (*pluck)(grpc_completion_queue *cc, void *tag,
+ gpr_timespec deadline, void *reserved);
+} cq_vtable;
+
+/* Queue that holds the cq_completion_events. Internally uses gpr_mpscq queue
+ * (a lockfree multiproducer single consumer queue). It uses a queue_lock
+ * to support multiple consumers.
+ * Only used in completion queues whose completion_type is GRPC_CQ_NEXT */
+typedef struct grpc_cq_event_queue {
+ /* Spinlock to serialize consumers i.e pop() operations */
+ gpr_spinlock queue_lock;
+
+ gpr_mpscq queue;
+
+ /* A lazy counter of number of items in the queue. This is NOT atomically
+ incremented/decremented along with push/pop operations and hence is only
+ eventually consistent */
+ gpr_atm num_queue_items;
+} grpc_cq_event_queue;
+
+/* TODO: sreek Refactor this based on the completion_type. Put completion-type
+ * specific data in a different structure (and co-allocate memory for it along
+ * with completion queue + pollset )*/
+typedef struct cq_data {
gpr_mu *mu;
- grpc_cq_completion_type completion_type;
-
- const cq_poller_vtable *poller_vtable;
-
- /** completed events */
+ /** Completed events for completion-queues of type GRPC_CQ_PLUCK */
grpc_cq_completion completed_head;
grpc_cq_completion *completed_tail;
+
+ /** Completed events for completion-queues of type GRPC_CQ_NEXT */
+ grpc_cq_event_queue queue;
+
/** Number of pending events (+1 if we're not shutdown) */
gpr_refcount pending_events;
+
/** Once owning_refs drops to zero, we will destroy the cq */
gpr_refcount owning_refs;
- /** counter of how many things have ever been queued on this completion queue
+
+ /** Counter of how many things have ever been queued on this completion queue
useful for avoiding locks to check the queue */
gpr_atm things_queued_ever;
+
/** 0 initially, 1 once we've begun shutting down */
- int shutdown;
+ gpr_atm shutdown;
int shutdown_called;
+
int is_server_cq;
- /** Can the server cq accept incoming channels */
- /* TODO: sreek - This will no longer be needed. Use polling_type set */
- int is_non_listening_server_cq;
+
int num_pluckers;
+ int num_polls;
plucker pluckers[GRPC_MAX_COMPLETION_QUEUE_PLUCKERS];
grpc_closure pollset_shutdown_done;
@@ -235,27 +271,122 @@ struct grpc_completion_queue {
size_t outstanding_tag_count;
size_t outstanding_tag_capacity;
#endif
+} cq_data;
- grpc_completion_queue *next_free;
+/* Completion queue structure */
+struct grpc_completion_queue {
+ cq_data data;
+ const cq_vtable *vtable;
+ const cq_poller_vtable *poller_vtable;
+};
+
+/* Forward declarations */
+static void cq_finish_shutdown(grpc_exec_ctx *exec_ctx,
+ grpc_completion_queue *cc);
+
+static size_t cq_size(grpc_completion_queue *cc);
+
+static void cq_begin_op(grpc_completion_queue *cc, void *tag);
+
+static void cq_end_op_for_next(grpc_exec_ctx *exec_ctx,
+ grpc_completion_queue *cc, void *tag,
+ grpc_error *error,
+ void (*done)(grpc_exec_ctx *exec_ctx,
+ void *done_arg,
+ grpc_cq_completion *storage),
+ void *done_arg, grpc_cq_completion *storage);
+
+static void cq_end_op_for_pluck(grpc_exec_ctx *exec_ctx,
+ grpc_completion_queue *cc, void *tag,
+ grpc_error *error,
+ void (*done)(grpc_exec_ctx *exec_ctx,
+ void *done_arg,
+ grpc_cq_completion *storage),
+ void *done_arg, grpc_cq_completion *storage);
+
+static grpc_event cq_next(grpc_completion_queue *cc, gpr_timespec deadline,
+ void *reserved);
+
+static grpc_event cq_pluck(grpc_completion_queue *cc, void *tag,
+ gpr_timespec deadline, void *reserved);
+
+/* Completion queue vtables based on the completion-type */
+static const cq_vtable g_cq_vtable[] = {
+ /* GRPC_CQ_NEXT */
+ {.cq_completion_type = GRPC_CQ_NEXT,
+ .size = cq_size,
+ .begin_op = cq_begin_op,
+ .end_op = cq_end_op_for_next,
+ .next = cq_next,
+ .pluck = NULL},
+ /* GRPC_CQ_PLUCK */
+ {.cq_completion_type = GRPC_CQ_PLUCK,
+ .size = cq_size,
+ .begin_op = cq_begin_op,
+ .end_op = cq_end_op_for_pluck,
+ .next = NULL,
+ .pluck = cq_pluck},
};
#define POLLSET_FROM_CQ(cq) ((grpc_pollset *)(cq + 1))
#define CQ_FROM_POLLSET(ps) (((grpc_completion_queue *)ps) - 1)
-int grpc_cq_pluck_trace;
-int grpc_cq_event_timeout_trace;
+grpc_tracer_flag grpc_cq_pluck_trace = GRPC_TRACER_INITIALIZER(true);
+grpc_tracer_flag grpc_cq_event_timeout_trace = GRPC_TRACER_INITIALIZER(true);
-#define GRPC_SURFACE_TRACE_RETURNED_EVENT(cq, event) \
- if (grpc_api_trace && \
- (grpc_cq_pluck_trace || (event)->type != GRPC_QUEUE_TIMEOUT)) { \
- char *_ev = grpc_event_string(event); \
- gpr_log(GPR_INFO, "RETURN_EVENT[%p]: %s", cq, _ev); \
- gpr_free(_ev); \
+#define GRPC_SURFACE_TRACE_RETURNED_EVENT(cq, event) \
+ if (GRPC_TRACER_ON(grpc_api_trace) && \
+ (GRPC_TRACER_ON(grpc_cq_pluck_trace) || \
+ (event)->type != GRPC_QUEUE_TIMEOUT)) { \
+ char *_ev = grpc_event_string(event); \
+ gpr_log(GPR_INFO, "RETURN_EVENT[%p]: %s", cq, _ev); \
+ gpr_free(_ev); \
}
static void on_pollset_shutdown_done(grpc_exec_ctx *exec_ctx, void *cc,
grpc_error *error);
+static void cq_event_queue_init(grpc_cq_event_queue *q) {
+ gpr_mpscq_init(&q->queue);
+ q->queue_lock = GPR_SPINLOCK_INITIALIZER;
+ gpr_atm_no_barrier_store(&q->num_queue_items, 0);
+}
+
+static void cq_event_queue_destroy(grpc_cq_event_queue *q) {
+ gpr_mpscq_destroy(&q->queue);
+}
+
+static void cq_event_queue_push(grpc_cq_event_queue *q, grpc_cq_completion *c) {
+ gpr_mpscq_push(&q->queue, (gpr_mpscq_node *)c);
+ gpr_atm_no_barrier_fetch_add(&q->num_queue_items, 1);
+}
+
+static grpc_cq_completion *cq_event_queue_pop(grpc_cq_event_queue *q) {
+ grpc_cq_completion *c = NULL;
+ if (gpr_spinlock_trylock(&q->queue_lock)) {
+ c = (grpc_cq_completion *)gpr_mpscq_pop(&q->queue);
+ gpr_spinlock_unlock(&q->queue_lock);
+ }
+
+ if (c) {
+ gpr_atm_no_barrier_fetch_add(&q->num_queue_items, -1);
+ }
+
+ return c;
+}
+
+/* Note: The counter is not incremented/decremented atomically with push/pop.
+ * The count is only eventually consistent */
+static long cq_event_queue_num_items(grpc_cq_event_queue *q) {
+ return (long)gpr_atm_no_barrier_load(&q->num_queue_items);
+}
+
+static size_t cq_size(grpc_completion_queue *cc) {
+ /* Size of the completion queue and the size of the pollset whose memory is
+ allocated right after that of completion queue */
+ return sizeof(grpc_completion_queue) + cc->poller_vtable->size();
+}
+
grpc_completion_queue *grpc_completion_queue_create_internal(
grpc_cq_completion_type completion_type,
grpc_cq_polling_type polling_type) {
@@ -268,35 +399,40 @@ grpc_completion_queue *grpc_completion_queue_create_internal(
"polling_type=%d)",
2, (completion_type, polling_type));
+ const cq_vtable *vtable = &g_cq_vtable[completion_type];
const cq_poller_vtable *poller_vtable =
&g_poller_vtable_by_poller_type[polling_type];
cc = gpr_zalloc(sizeof(grpc_completion_queue) + poller_vtable->size());
- poller_vtable->init(POLLSET_FROM_CQ(cc), &cc->mu);
-#ifndef NDEBUG
- cc->outstanding_tags = NULL;
- cc->outstanding_tag_capacity = 0;
-#endif
+ cq_data *cqd = &cc->data;
- cc->completion_type = completion_type;
+ cc->vtable = vtable;
cc->poller_vtable = poller_vtable;
+ poller_vtable->init(POLLSET_FROM_CQ(cc), &cc->data.mu);
+
+#ifndef NDEBUG
+ cqd->outstanding_tags = NULL;
+ cqd->outstanding_tag_capacity = 0;
+#endif
+
/* Initial ref is dropped by grpc_completion_queue_shutdown */
- gpr_ref_init(&cc->pending_events, 1);
+ gpr_ref_init(&cqd->pending_events, 1);
/* One for destroy(), one for pollset_shutdown */
- gpr_ref_init(&cc->owning_refs, 2);
- cc->completed_tail = &cc->completed_head;
- cc->completed_head.next = (uintptr_t)cc->completed_tail;
- cc->shutdown = 0;
- cc->shutdown_called = 0;
- cc->is_server_cq = 0;
- cc->is_non_listening_server_cq = 0;
- cc->num_pluckers = 0;
- gpr_atm_no_barrier_store(&cc->things_queued_ever, 0);
+ gpr_ref_init(&cqd->owning_refs, 2);
+ cqd->completed_tail = &cqd->completed_head;
+ cqd->completed_head.next = (uintptr_t)cqd->completed_tail;
+ gpr_atm_no_barrier_store(&cqd->shutdown, 0);
+ cqd->shutdown_called = 0;
+ cqd->is_server_cq = 0;
+ cqd->num_pluckers = 0;
+ cqd->num_polls = 0;
+ gpr_atm_no_barrier_store(&cqd->things_queued_ever, 0);
#ifndef NDEBUG
- cc->outstanding_tag_count = 0;
+ cqd->outstanding_tag_count = 0;
#endif
- grpc_closure_init(&cc->pollset_shutdown_done, on_pollset_shutdown_done, cc,
+ cq_event_queue_init(&cqd->queue);
+ grpc_closure_init(&cqd->pollset_shutdown_done, on_pollset_shutdown_done, cc,
grpc_schedule_on_exec_ctx);
GPR_TIMER_END("grpc_completion_queue_create_internal", 0);
@@ -305,123 +441,227 @@ grpc_completion_queue *grpc_completion_queue_create_internal(
}
grpc_cq_completion_type grpc_get_cq_completion_type(grpc_completion_queue *cc) {
- return cc->completion_type;
+ return cc->vtable->cq_completion_type;
+}
+
+int grpc_get_cq_poll_num(grpc_completion_queue *cc) {
+ int cur_num_polls;
+ gpr_mu_lock(cc->data.mu);
+ cur_num_polls = cc->data.num_polls;
+ gpr_mu_unlock(cc->data.mu);
+ return cur_num_polls;
}
#ifdef GRPC_CQ_REF_COUNT_DEBUG
void grpc_cq_internal_ref(grpc_completion_queue *cc, const char *reason,
const char *file, int line) {
+ cq_data *cqd = &cc->data;
gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG, "CQ:%p ref %d -> %d %s", cc,
- (int)cc->owning_refs.count, (int)cc->owning_refs.count + 1, reason);
+ (int)cqd->owning_refs.count, (int)cqd->owning_refs.count + 1, reason);
#else
void grpc_cq_internal_ref(grpc_completion_queue *cc) {
+ cq_data *cqd = &cc->data;
#endif
- gpr_ref(&cc->owning_refs);
+ gpr_ref(&cqd->owning_refs);
}
static void on_pollset_shutdown_done(grpc_exec_ctx *exec_ctx, void *arg,
grpc_error *error) {
grpc_completion_queue *cc = arg;
- GRPC_CQ_INTERNAL_UNREF(cc, "pollset_destroy");
+ GRPC_CQ_INTERNAL_UNREF(exec_ctx, cc, "pollset_destroy");
}
#ifdef GRPC_CQ_REF_COUNT_DEBUG
void grpc_cq_internal_unref(grpc_completion_queue *cc, const char *reason,
const char *file, int line) {
+ cq_data *cqd = &cc->data;
gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG, "CQ:%p unref %d -> %d %s", cc,
- (int)cc->owning_refs.count, (int)cc->owning_refs.count - 1, reason);
+ (int)cqd->owning_refs.count, (int)cqd->owning_refs.count - 1, reason);
#else
-void grpc_cq_internal_unref(grpc_completion_queue *cc) {
+void grpc_cq_internal_unref(grpc_exec_ctx *exec_ctx,
+ grpc_completion_queue *cc) {
+ cq_data *cqd = &cc->data;
#endif
- if (gpr_unref(&cc->owning_refs)) {
- GPR_ASSERT(cc->completed_head.next == (uintptr_t)&cc->completed_head);
- cc->poller_vtable->destroy(POLLSET_FROM_CQ(cc));
+ if (gpr_unref(&cqd->owning_refs)) {
+ GPR_ASSERT(cqd->completed_head.next == (uintptr_t)&cqd->completed_head);
+ cc->poller_vtable->destroy(exec_ctx, POLLSET_FROM_CQ(cc));
+ cq_event_queue_destroy(&cqd->queue);
#ifndef NDEBUG
- gpr_free(cc->outstanding_tags);
+ gpr_free(cqd->outstanding_tags);
#endif
gpr_free(cc);
}
}
-void grpc_cq_begin_op(grpc_completion_queue *cc, void *tag) {
+static void cq_begin_op(grpc_completion_queue *cc, void *tag) {
+ cq_data *cqd = &cc->data;
#ifndef NDEBUG
- gpr_mu_lock(cc->mu);
- GPR_ASSERT(!cc->shutdown_called);
- if (cc->outstanding_tag_count == cc->outstanding_tag_capacity) {
- cc->outstanding_tag_capacity = GPR_MAX(4, 2 * cc->outstanding_tag_capacity);
- cc->outstanding_tags =
- gpr_realloc(cc->outstanding_tags, sizeof(*cc->outstanding_tags) *
- cc->outstanding_tag_capacity);
+ gpr_mu_lock(cqd->mu);
+ GPR_ASSERT(!cqd->shutdown_called);
+ if (cqd->outstanding_tag_count == cqd->outstanding_tag_capacity) {
+ cqd->outstanding_tag_capacity =
+ GPR_MAX(4, 2 * cqd->outstanding_tag_capacity);
+ cqd->outstanding_tags =
+ gpr_realloc(cqd->outstanding_tags, sizeof(*cqd->outstanding_tags) *
+ cqd->outstanding_tag_capacity);
}
- cc->outstanding_tags[cc->outstanding_tag_count++] = tag;
- gpr_mu_unlock(cc->mu);
+ cqd->outstanding_tags[cqd->outstanding_tag_count++] = tag;
+ gpr_mu_unlock(cqd->mu);
#endif
- gpr_ref(&cc->pending_events);
+ gpr_ref(&cqd->pending_events);
+}
+
+void grpc_cq_begin_op(grpc_completion_queue *cc, void *tag) {
+ cc->vtable->begin_op(cc, tag);
}
-/* Signal the end of an operation - if this is the last waiting-to-be-queued
- event, then enter shutdown mode */
-/* Queue a GRPC_OP_COMPLETED operation */
-void grpc_cq_end_op(grpc_exec_ctx *exec_ctx, grpc_completion_queue *cc,
- void *tag, grpc_error *error,
- void (*done)(grpc_exec_ctx *exec_ctx, void *done_arg,
- grpc_cq_completion *storage),
- void *done_arg, grpc_cq_completion *storage) {
- int shutdown;
- int i;
- grpc_pollset_worker *pluck_worker;
#ifndef NDEBUG
+static void cq_check_tag(grpc_completion_queue *cc, void *tag, bool lock_cq) {
+ cq_data *cqd = &cc->data;
int found = 0;
+ if (lock_cq) {
+ gpr_mu_lock(cqd->mu);
+ }
+
+ for (int i = 0; i < (int)cqd->outstanding_tag_count; i++) {
+ if (cqd->outstanding_tags[i] == tag) {
+ cqd->outstanding_tag_count--;
+ GPR_SWAP(void *, cqd->outstanding_tags[i],
+ cqd->outstanding_tags[cqd->outstanding_tag_count]);
+ found = 1;
+ break;
+ }
+ }
+
+ if (lock_cq) {
+ gpr_mu_unlock(cqd->mu);
+ }
+
+ GPR_ASSERT(found);
+}
+#else
+static void cq_check_tag(grpc_completion_queue *cc, void *tag, bool lock_cq) {}
#endif
- GPR_TIMER_BEGIN("grpc_cq_end_op", 0);
- if (grpc_api_trace ||
- (grpc_trace_operation_failures && error != GRPC_ERROR_NONE)) {
+/* Queue a GRPC_OP_COMPLETED operation to a completion queue (with a completion
+ * type of GRPC_CQ_NEXT) */
+static void cq_end_op_for_next(grpc_exec_ctx *exec_ctx,
+ grpc_completion_queue *cc, void *tag,
+ grpc_error *error,
+ void (*done)(grpc_exec_ctx *exec_ctx,
+ void *done_arg,
+ grpc_cq_completion *storage),
+ void *done_arg, grpc_cq_completion *storage) {
+ GPR_TIMER_BEGIN("cq_end_op_for_next", 0);
+
+ if (GRPC_TRACER_ON(grpc_api_trace) ||
+ (GRPC_TRACER_ON(grpc_trace_operation_failures) &&
+ error != GRPC_ERROR_NONE)) {
const char *errmsg = grpc_error_string(error);
GRPC_API_TRACE(
- "grpc_cq_end_op(exec_ctx=%p, cc=%p, tag=%p, error=%s, done=%p, "
- "done_arg=%p, storage=%p)",
+ "cq_end_op_for_next(exec_ctx=%p, cc=%p, tag=%p, error=%s, "
+ "done=%p, done_arg=%p, storage=%p)",
7, (exec_ctx, cc, tag, errmsg, done, done_arg, storage));
- if (grpc_trace_operation_failures && error != GRPC_ERROR_NONE) {
+ if (GRPC_TRACER_ON(grpc_trace_operation_failures) &&
+ error != GRPC_ERROR_NONE) {
gpr_log(GPR_ERROR, "Operation failed: tag=%p, error=%s", tag, errmsg);
}
}
+ cq_data *cqd = &cc->data;
+ int is_success = (error == GRPC_ERROR_NONE);
+
storage->tag = tag;
storage->done = done;
storage->done_arg = done_arg;
- storage->next = ((uintptr_t)&cc->completed_head) |
- ((uintptr_t)(error == GRPC_ERROR_NONE));
+ storage->next = (uintptr_t)(is_success);
- gpr_mu_lock(cc->mu);
-#ifndef NDEBUG
- for (i = 0; i < (int)cc->outstanding_tag_count; i++) {
- if (cc->outstanding_tags[i] == tag) {
- cc->outstanding_tag_count--;
- GPR_SWAP(void *, cc->outstanding_tags[i],
- cc->outstanding_tags[cc->outstanding_tag_count]);
- found = 1;
- break;
+ cq_check_tag(cc, tag, true); /* Used in debug builds only */
+
+ /* Add the completion to the queue */
+ cq_event_queue_push(&cqd->queue, storage);
+ gpr_atm_no_barrier_fetch_add(&cqd->things_queued_ever, 1);
+
+ gpr_mu_lock(cqd->mu);
+
+ int shutdown = gpr_unref(&cqd->pending_events);
+ if (!shutdown) {
+ grpc_error *kick_error = cc->poller_vtable->kick(POLLSET_FROM_CQ(cc), NULL);
+ gpr_mu_unlock(cqd->mu);
+
+ if (kick_error != GRPC_ERROR_NONE) {
+ const char *msg = grpc_error_string(kick_error);
+ gpr_log(GPR_ERROR, "Kick failed: %s", msg);
+
+ GRPC_ERROR_UNREF(kick_error);
}
+ } else {
+ cq_finish_shutdown(exec_ctx, cc);
+ gpr_mu_unlock(cqd->mu);
}
- GPR_ASSERT(found);
-#endif
- shutdown = gpr_unref(&cc->pending_events);
- gpr_atm_no_barrier_fetch_add(&cc->things_queued_ever, 1);
+
+ GPR_TIMER_END("cq_end_op_for_next", 0);
+
+ GRPC_ERROR_UNREF(error);
+}
+
+/* Queue a GRPC_OP_COMPLETED operation to a completion queue (with a completion
+ * type of GRPC_CQ_PLUCK) */
+static void cq_end_op_for_pluck(grpc_exec_ctx *exec_ctx,
+ grpc_completion_queue *cc, void *tag,
+ grpc_error *error,
+ void (*done)(grpc_exec_ctx *exec_ctx,
+ void *done_arg,
+ grpc_cq_completion *storage),
+ void *done_arg, grpc_cq_completion *storage) {
+ cq_data *cqd = &cc->data;
+ int is_success = (error == GRPC_ERROR_NONE);
+
+ GPR_TIMER_BEGIN("cq_end_op_for_pluck", 0);
+
+ if (GRPC_TRACER_ON(grpc_api_trace) ||
+ (GRPC_TRACER_ON(grpc_trace_operation_failures) &&
+ error != GRPC_ERROR_NONE)) {
+ const char *errmsg = grpc_error_string(error);
+ GRPC_API_TRACE(
+ "cq_end_op_for_pluck(exec_ctx=%p, cc=%p, tag=%p, error=%s, "
+ "done=%p, done_arg=%p, storage=%p)",
+ 7, (exec_ctx, cc, tag, errmsg, done, done_arg, storage));
+ if (GRPC_TRACER_ON(grpc_trace_operation_failures) &&
+ error != GRPC_ERROR_NONE) {
+ gpr_log(GPR_ERROR, "Operation failed: tag=%p, error=%s", tag, errmsg);
+ }
+ }
+
+ storage->tag = tag;
+ storage->done = done;
+ storage->done_arg = done_arg;
+ storage->next = ((uintptr_t)&cqd->completed_head) | ((uintptr_t)(is_success));
+
+ gpr_mu_lock(cqd->mu);
+ cq_check_tag(cc, tag, false); /* Used in debug builds only */
+
+ /* Add to the list of completions */
+ gpr_atm_no_barrier_fetch_add(&cqd->things_queued_ever, 1);
+ cqd->completed_tail->next =
+ ((uintptr_t)storage) | (1u & (uintptr_t)cqd->completed_tail->next);
+ cqd->completed_tail = storage;
+
+ int shutdown = gpr_unref(&cqd->pending_events);
if (!shutdown) {
- cc->completed_tail->next =
- ((uintptr_t)storage) | (1u & (uintptr_t)cc->completed_tail->next);
- cc->completed_tail = storage;
- pluck_worker = NULL;
- for (i = 0; i < cc->num_pluckers; i++) {
- if (cc->pluckers[i].tag == tag) {
- pluck_worker = *cc->pluckers[i].worker;
+ grpc_pollset_worker *pluck_worker = NULL;
+ for (int i = 0; i < cqd->num_pluckers; i++) {
+ if (cqd->pluckers[i].tag == tag) {
+ pluck_worker = *cqd->pluckers[i].worker;
break;
}
}
+
grpc_error *kick_error =
cc->poller_vtable->kick(POLLSET_FROM_CQ(cc), pluck_worker);
- gpr_mu_unlock(cc->mu);
+
+ gpr_mu_unlock(cqd->mu);
+
if (kick_error != GRPC_ERROR_NONE) {
const char *msg = grpc_error_string(kick_error);
gpr_log(GPR_ERROR, "Kick failed: %s", msg);
@@ -429,22 +669,23 @@ void grpc_cq_end_op(grpc_exec_ctx *exec_ctx, grpc_completion_queue *cc,
GRPC_ERROR_UNREF(kick_error);
}
} else {
- cc->completed_tail->next =
- ((uintptr_t)storage) | (1u & (uintptr_t)cc->completed_tail->next);
- cc->completed_tail = storage;
- GPR_ASSERT(!cc->shutdown);
- GPR_ASSERT(cc->shutdown_called);
- cc->shutdown = 1;
- cc->poller_vtable->shutdown(exec_ctx, POLLSET_FROM_CQ(cc),
- &cc->pollset_shutdown_done);
- gpr_mu_unlock(cc->mu);
+ cq_finish_shutdown(exec_ctx, cc);
+ gpr_mu_unlock(cqd->mu);
}
- GPR_TIMER_END("grpc_cq_end_op", 0);
+ GPR_TIMER_END("cq_end_op_for_pluck", 0);
GRPC_ERROR_UNREF(error);
}
+void grpc_cq_end_op(grpc_exec_ctx *exec_ctx, grpc_completion_queue *cc,
+ void *tag, grpc_error *error,
+ void (*done)(grpc_exec_ctx *exec_ctx, void *done_arg,
+ grpc_cq_completion *storage),
+ void *done_arg, grpc_cq_completion *storage) {
+ cc->vtable->end_op(exec_ctx, cc, tag, error, done, done_arg, storage);
+}
+
typedef struct {
gpr_atm last_seen_things_queued_ever;
grpc_completion_queue *cq;
@@ -457,23 +698,24 @@ typedef struct {
static bool cq_is_next_finished(grpc_exec_ctx *exec_ctx, void *arg) {
cq_is_finished_arg *a = arg;
grpc_completion_queue *cq = a->cq;
+ cq_data *cqd = &cq->data;
GPR_ASSERT(a->stolen_completion == NULL);
+
gpr_atm current_last_seen_things_queued_ever =
- gpr_atm_no_barrier_load(&cq->things_queued_ever);
+ gpr_atm_no_barrier_load(&cqd->things_queued_ever);
+
if (current_last_seen_things_queued_ever != a->last_seen_things_queued_ever) {
- gpr_mu_lock(cq->mu);
a->last_seen_things_queued_ever =
- gpr_atm_no_barrier_load(&cq->things_queued_ever);
- if (cq->completed_tail != &cq->completed_head) {
- a->stolen_completion = (grpc_cq_completion *)cq->completed_head.next;
- cq->completed_head.next = a->stolen_completion->next & ~(uintptr_t)1;
- if (a->stolen_completion == cq->completed_tail) {
- cq->completed_tail = &cq->completed_head;
- }
- gpr_mu_unlock(cq->mu);
+ gpr_atm_no_barrier_load(&cqd->things_queued_ever);
+
+ /* Pop a cq_completion from the queue. Returns NULL if the queue is empty
+ * might return NULL in some cases even if the queue is not empty; but that
+ * is ok and doesn't affect correctness. Might effect the tail latencies a
+ * bit) */
+ a->stolen_completion = cq_event_queue_pop(&cqd->queue);
+ if (a->stolen_completion != NULL) {
return true;
}
- gpr_mu_unlock(cq->mu);
}
return !a->first_loop &&
gpr_time_cmp(a->deadline, gpr_now(a->deadline.clock_type)) < 0;
@@ -481,18 +723,20 @@ static bool cq_is_next_finished(grpc_exec_ctx *exec_ctx, void *arg) {
#ifndef NDEBUG
static void dump_pending_tags(grpc_completion_queue *cc) {
- if (!grpc_trace_pending_tags) return;
+ if (!GRPC_TRACER_ON(grpc_trace_pending_tags)) return;
+
+ cq_data *cqd = &cc->data;
gpr_strvec v;
gpr_strvec_init(&v);
gpr_strvec_add(&v, gpr_strdup("PENDING TAGS:"));
- gpr_mu_lock(cc->mu);
- for (size_t i = 0; i < cc->outstanding_tag_count; i++) {
+ gpr_mu_lock(cqd->mu);
+ for (size_t i = 0; i < cqd->outstanding_tag_count; i++) {
char *s;
- gpr_asprintf(&s, " %p", cc->outstanding_tags[i]);
+ gpr_asprintf(&s, " %p", cqd->outstanding_tags[i]);
gpr_strvec_add(&v, s);
}
- gpr_mu_unlock(cc->mu);
+ gpr_mu_unlock(cqd->mu);
char *out = gpr_strvec_flatten(&v, NULL);
gpr_strvec_destroy(&v);
gpr_log(GPR_DEBUG, "%s", out);
@@ -502,17 +746,11 @@ static void dump_pending_tags(grpc_completion_queue *cc) {
static void dump_pending_tags(grpc_completion_queue *cc) {}
#endif
-grpc_event grpc_completion_queue_next(grpc_completion_queue *cc,
- gpr_timespec deadline, void *reserved) {
+static grpc_event cq_next(grpc_completion_queue *cc, gpr_timespec deadline,
+ void *reserved) {
grpc_event ret;
gpr_timespec now;
-
- if (cc->completion_type != GRPC_CQ_NEXT) {
- gpr_log(GPR_ERROR,
- "grpc_completion_queue_next() cannot be called on this completion "
- "queue since its completion type is not GRPC_CQ_NEXT");
- abort();
- }
+ cq_data *cqd = &cc->data;
GPR_TIMER_BEGIN("grpc_completion_queue_next", 0);
@@ -531,10 +769,10 @@ grpc_event grpc_completion_queue_next(grpc_completion_queue *cc,
deadline = gpr_convert_clock_type(deadline, GPR_CLOCK_MONOTONIC);
GRPC_CQ_INTERNAL_REF(cc, "next");
- gpr_mu_lock(cc->mu);
+
cq_is_finished_arg is_finished_arg = {
.last_seen_things_queued_ever =
- gpr_atm_no_barrier_load(&cc->things_queued_ever),
+ gpr_atm_no_barrier_load(&cqd->things_queued_ever),
.cq = cc,
.deadline = deadline,
.stolen_completion = NULL,
@@ -542,9 +780,11 @@ grpc_event grpc_completion_queue_next(grpc_completion_queue *cc,
.first_loop = true};
grpc_exec_ctx exec_ctx =
GRPC_EXEC_CTX_INITIALIZER(0, cq_is_next_finished, &is_finished_arg);
+
for (;;) {
+ gpr_timespec iteration_deadline = deadline;
+
if (is_finished_arg.stolen_completion != NULL) {
- gpr_mu_unlock(cc->mu);
grpc_cq_completion *c = is_finished_arg.stolen_completion;
is_finished_arg.stolen_completion = NULL;
ret.type = GRPC_OP_COMPLETE;
@@ -553,63 +793,73 @@ grpc_event grpc_completion_queue_next(grpc_completion_queue *cc,
c->done(&exec_ctx, c->done_arg, c);
break;
}
- if (cc->completed_tail != &cc->completed_head) {
- grpc_cq_completion *c = (grpc_cq_completion *)cc->completed_head.next;
- cc->completed_head.next = c->next & ~(uintptr_t)1;
- if (c == cc->completed_tail) {
- cc->completed_tail = &cc->completed_head;
- }
- gpr_mu_unlock(cc->mu);
+
+ grpc_cq_completion *c = cq_event_queue_pop(&cqd->queue);
+
+ if (c != NULL) {
ret.type = GRPC_OP_COMPLETE;
ret.success = c->next & 1u;
ret.tag = c->tag;
c->done(&exec_ctx, c->done_arg, c);
break;
+ } else {
+ /* If c == NULL it means either the queue is empty OR in an transient
+ inconsistent state. If it is the latter, we shold do a 0-timeout poll
+ so that the thread comes back quickly from poll to make a second
+ attempt at popping. Not doing this can potentially deadlock this thread
+ forever (if the deadline is infinity) */
+ if (cq_event_queue_num_items(&cqd->queue) > 0) {
+ iteration_deadline = gpr_time_0(GPR_CLOCK_MONOTONIC);
+ }
}
- if (cc->shutdown) {
- gpr_mu_unlock(cc->mu);
+
+ if (gpr_atm_no_barrier_load(&cqd->shutdown)) {
+ /* Before returning, check if the queue has any items left over (since
+ gpr_mpscq_pop() can sometimes return NULL even if the queue is not
+ empty. If so, keep retrying but do not return GRPC_QUEUE_SHUTDOWN */
+ if (cq_event_queue_num_items(&cqd->queue) > 0) {
+ /* Go to the beginning of the loop. No point doing a poll because
+ (cc->shutdown == true) is only possible when there is no pending work
+ (i.e cc->pending_events == 0) and any outstanding grpc_cq_completion
+ events are already queued on this cq */
+ continue;
+ }
+
memset(&ret, 0, sizeof(ret));
ret.type = GRPC_QUEUE_SHUTDOWN;
break;
}
+
now = gpr_now(GPR_CLOCK_MONOTONIC);
if (!is_finished_arg.first_loop && gpr_time_cmp(now, deadline) >= 0) {
- gpr_mu_unlock(cc->mu);
memset(&ret, 0, sizeof(ret));
ret.type = GRPC_QUEUE_TIMEOUT;
dump_pending_tags(cc);
break;
}
- /* Check alarms - these are a global resource so we just ping
- each time through on every pollset.
- May update deadline to ensure timely wakeups.
- TODO(ctiller): can this work be localized? */
- gpr_timespec iteration_deadline = deadline;
- if (grpc_timer_check(&exec_ctx, now, &iteration_deadline)) {
- GPR_TIMER_MARK("alarm_triggered", 0);
- gpr_mu_unlock(cc->mu);
- grpc_exec_ctx_flush(&exec_ctx);
- gpr_mu_lock(cc->mu);
- continue;
- } else {
- grpc_error *err = cc->poller_vtable->work(&exec_ctx, POLLSET_FROM_CQ(cc),
- NULL, now, iteration_deadline);
- if (err != GRPC_ERROR_NONE) {
- gpr_mu_unlock(cc->mu);
- const char *msg = grpc_error_string(err);
- gpr_log(GPR_ERROR, "Completion queue next failed: %s", msg);
-
- GRPC_ERROR_UNREF(err);
- memset(&ret, 0, sizeof(ret));
- ret.type = GRPC_QUEUE_TIMEOUT;
- dump_pending_tags(cc);
- break;
- }
+
+ /* The main polling work happens in grpc_pollset_work */
+ gpr_mu_lock(cqd->mu);
+ cqd->num_polls++;
+ grpc_error *err = cc->poller_vtable->work(&exec_ctx, POLLSET_FROM_CQ(cc),
+ NULL, now, iteration_deadline);
+ gpr_mu_unlock(cqd->mu);
+
+ if (err != GRPC_ERROR_NONE) {
+ const char *msg = grpc_error_string(err);
+ gpr_log(GPR_ERROR, "Completion queue next failed: %s", msg);
+
+ GRPC_ERROR_UNREF(err);
+ memset(&ret, 0, sizeof(ret));
+ ret.type = GRPC_QUEUE_TIMEOUT;
+ dump_pending_tags(cc);
+ break;
}
is_finished_arg.first_loop = false;
}
+
GRPC_SURFACE_TRACE_RETURNED_EVENT(cc, &ret);
- GRPC_CQ_INTERNAL_UNREF(cc, "next");
+ GRPC_CQ_INTERNAL_UNREF(&exec_ctx, cc, "next");
grpc_exec_ctx_finish(&exec_ctx);
GPR_ASSERT(is_finished_arg.stolen_completion == NULL);
@@ -618,24 +868,30 @@ grpc_event grpc_completion_queue_next(grpc_completion_queue *cc,
return ret;
}
+grpc_event grpc_completion_queue_next(grpc_completion_queue *cc,
+ gpr_timespec deadline, void *reserved) {
+ return cc->vtable->next(cc, deadline, reserved);
+}
+
static int add_plucker(grpc_completion_queue *cc, void *tag,
grpc_pollset_worker **worker) {
- if (cc->num_pluckers == GRPC_MAX_COMPLETION_QUEUE_PLUCKERS) {
+ cq_data *cqd = &cc->data;
+ if (cqd->num_pluckers == GRPC_MAX_COMPLETION_QUEUE_PLUCKERS) {
return 0;
}
- cc->pluckers[cc->num_pluckers].tag = tag;
- cc->pluckers[cc->num_pluckers].worker = worker;
- cc->num_pluckers++;
+ cqd->pluckers[cqd->num_pluckers].tag = tag;
+ cqd->pluckers[cqd->num_pluckers].worker = worker;
+ cqd->num_pluckers++;
return 1;
}
static void del_plucker(grpc_completion_queue *cc, void *tag,
grpc_pollset_worker **worker) {
- int i;
- for (i = 0; i < cc->num_pluckers; i++) {
- if (cc->pluckers[i].tag == tag && cc->pluckers[i].worker == worker) {
- cc->num_pluckers--;
- GPR_SWAP(plucker, cc->pluckers[i], cc->pluckers[cc->num_pluckers]);
+ cq_data *cqd = &cc->data;
+ for (int i = 0; i < cqd->num_pluckers; i++) {
+ if (cqd->pluckers[i].tag == tag && cqd->pluckers[i].worker == worker) {
+ cqd->num_pluckers--;
+ GPR_SWAP(plucker, cqd->pluckers[i], cqd->pluckers[cqd->num_pluckers]);
return;
}
}
@@ -645,52 +901,48 @@ static void del_plucker(grpc_completion_queue *cc, void *tag,
static bool cq_is_pluck_finished(grpc_exec_ctx *exec_ctx, void *arg) {
cq_is_finished_arg *a = arg;
grpc_completion_queue *cq = a->cq;
+ cq_data *cqd = &cq->data;
+
GPR_ASSERT(a->stolen_completion == NULL);
gpr_atm current_last_seen_things_queued_ever =
- gpr_atm_no_barrier_load(&cq->things_queued_ever);
+ gpr_atm_no_barrier_load(&cqd->things_queued_ever);
if (current_last_seen_things_queued_ever != a->last_seen_things_queued_ever) {
- gpr_mu_lock(cq->mu);
+ gpr_mu_lock(cqd->mu);
a->last_seen_things_queued_ever =
- gpr_atm_no_barrier_load(&cq->things_queued_ever);
+ gpr_atm_no_barrier_load(&cqd->things_queued_ever);
grpc_cq_completion *c;
- grpc_cq_completion *prev = &cq->completed_head;
+ grpc_cq_completion *prev = &cqd->completed_head;
while ((c = (grpc_cq_completion *)(prev->next & ~(uintptr_t)1)) !=
- &cq->completed_head) {
+ &cqd->completed_head) {
if (c->tag == a->tag) {
prev->next = (prev->next & (uintptr_t)1) | (c->next & ~(uintptr_t)1);
- if (c == cq->completed_tail) {
- cq->completed_tail = prev;
+ if (c == cqd->completed_tail) {
+ cqd->completed_tail = prev;
}
- gpr_mu_unlock(cq->mu);
+ gpr_mu_unlock(cqd->mu);
a->stolen_completion = c;
return true;
}
prev = c;
}
- gpr_mu_unlock(cq->mu);
+ gpr_mu_unlock(cqd->mu);
}
return !a->first_loop &&
gpr_time_cmp(a->deadline, gpr_now(a->deadline.clock_type)) < 0;
}
-grpc_event grpc_completion_queue_pluck(grpc_completion_queue *cc, void *tag,
- gpr_timespec deadline, void *reserved) {
+static grpc_event cq_pluck(grpc_completion_queue *cc, void *tag,
+ gpr_timespec deadline, void *reserved) {
grpc_event ret;
grpc_cq_completion *c;
grpc_cq_completion *prev;
grpc_pollset_worker *worker = NULL;
gpr_timespec now;
+ cq_data *cqd = &cc->data;
GPR_TIMER_BEGIN("grpc_completion_queue_pluck", 0);
- if (cc->completion_type != GRPC_CQ_PLUCK) {
- gpr_log(GPR_ERROR,
- "grpc_completion_queue_pluck() cannot be called on this completion "
- "queue since its completion type is not GRPC_CQ_PLUCK");
- abort();
- }
-
- if (grpc_cq_pluck_trace) {
+ if (GRPC_TRACER_ON(grpc_cq_pluck_trace)) {
GRPC_API_TRACE(
"grpc_completion_queue_pluck("
"cc=%p, tag=%p, "
@@ -707,10 +959,10 @@ grpc_event grpc_completion_queue_pluck(grpc_completion_queue *cc, void *tag,
deadline = gpr_convert_clock_type(deadline, GPR_CLOCK_MONOTONIC);
GRPC_CQ_INTERNAL_REF(cc, "pluck");
- gpr_mu_lock(cc->mu);
+ gpr_mu_lock(cqd->mu);
cq_is_finished_arg is_finished_arg = {
.last_seen_things_queued_ever =
- gpr_atm_no_barrier_load(&cc->things_queued_ever),
+ gpr_atm_no_barrier_load(&cqd->things_queued_ever),
.cq = cc,
.deadline = deadline,
.stolen_completion = NULL,
@@ -720,7 +972,7 @@ grpc_event grpc_completion_queue_pluck(grpc_completion_queue *cc, void *tag,
GRPC_EXEC_CTX_INITIALIZER(0, cq_is_pluck_finished, &is_finished_arg);
for (;;) {
if (is_finished_arg.stolen_completion != NULL) {
- gpr_mu_unlock(cc->mu);
+ gpr_mu_unlock(cqd->mu);
c = is_finished_arg.stolen_completion;
is_finished_arg.stolen_completion = NULL;
ret.type = GRPC_OP_COMPLETE;
@@ -729,15 +981,15 @@ grpc_event grpc_completion_queue_pluck(grpc_completion_queue *cc, void *tag,
c->done(&exec_ctx, c->done_arg, c);
break;
}
- prev = &cc->completed_head;
+ prev = &cqd->completed_head;
while ((c = (grpc_cq_completion *)(prev->next & ~(uintptr_t)1)) !=
- &cc->completed_head) {
+ &cqd->completed_head) {
if (c->tag == tag) {
prev->next = (prev->next & (uintptr_t)1) | (c->next & ~(uintptr_t)1);
- if (c == cc->completed_tail) {
- cc->completed_tail = prev;
+ if (c == cqd->completed_tail) {
+ cqd->completed_tail = prev;
}
- gpr_mu_unlock(cc->mu);
+ gpr_mu_unlock(cqd->mu);
ret.type = GRPC_OP_COMPLETE;
ret.success = c->next & 1u;
ret.tag = c->tag;
@@ -746,8 +998,8 @@ grpc_event grpc_completion_queue_pluck(grpc_completion_queue *cc, void *tag,
}
prev = c;
}
- if (cc->shutdown) {
- gpr_mu_unlock(cc->mu);
+ if (gpr_atm_no_barrier_load(&cqd->shutdown)) {
+ gpr_mu_unlock(cqd->mu);
memset(&ret, 0, sizeof(ret));
ret.type = GRPC_QUEUE_SHUTDOWN;
break;
@@ -757,7 +1009,7 @@ grpc_event grpc_completion_queue_pluck(grpc_completion_queue *cc, void *tag,
"Too many outstanding grpc_completion_queue_pluck calls: maximum "
"is %d",
GRPC_MAX_COMPLETION_QUEUE_PLUCKERS);
- gpr_mu_unlock(cc->mu);
+ gpr_mu_unlock(cqd->mu);
memset(&ret, 0, sizeof(ret));
/* TODO(ctiller): should we use a different result here */
ret.type = GRPC_QUEUE_TIMEOUT;
@@ -767,44 +1019,34 @@ grpc_event grpc_completion_queue_pluck(grpc_completion_queue *cc, void *tag,
now = gpr_now(GPR_CLOCK_MONOTONIC);
if (!is_finished_arg.first_loop && gpr_time_cmp(now, deadline) >= 0) {
del_plucker(cc, tag, &worker);
- gpr_mu_unlock(cc->mu);
+ gpr_mu_unlock(cqd->mu);
memset(&ret, 0, sizeof(ret));
ret.type = GRPC_QUEUE_TIMEOUT;
dump_pending_tags(cc);
break;
}
- /* Check alarms - these are a global resource so we just ping
- each time through on every pollset.
- May update deadline to ensure timely wakeups.
- TODO(ctiller): can this work be localized? */
- gpr_timespec iteration_deadline = deadline;
- if (grpc_timer_check(&exec_ctx, now, &iteration_deadline)) {
- GPR_TIMER_MARK("alarm_triggered", 0);
- gpr_mu_unlock(cc->mu);
- grpc_exec_ctx_flush(&exec_ctx);
- gpr_mu_lock(cc->mu);
- } else {
- grpc_error *err = cc->poller_vtable->work(
- &exec_ctx, POLLSET_FROM_CQ(cc), &worker, now, iteration_deadline);
- if (err != GRPC_ERROR_NONE) {
- del_plucker(cc, tag, &worker);
- gpr_mu_unlock(cc->mu);
- const char *msg = grpc_error_string(err);
- gpr_log(GPR_ERROR, "Completion queue next failed: %s", msg);
-
- GRPC_ERROR_UNREF(err);
- memset(&ret, 0, sizeof(ret));
- ret.type = GRPC_QUEUE_TIMEOUT;
- dump_pending_tags(cc);
- break;
- }
+
+ cqd->num_polls++;
+ grpc_error *err = cc->poller_vtable->work(&exec_ctx, POLLSET_FROM_CQ(cc),
+ &worker, now, deadline);
+ if (err != GRPC_ERROR_NONE) {
+ del_plucker(cc, tag, &worker);
+ gpr_mu_unlock(cqd->mu);
+ const char *msg = grpc_error_string(err);
+ gpr_log(GPR_ERROR, "Completion queue pluck failed: %s", msg);
+
+ GRPC_ERROR_UNREF(err);
+ memset(&ret, 0, sizeof(ret));
+ ret.type = GRPC_QUEUE_TIMEOUT;
+ dump_pending_tags(cc);
+ break;
}
is_finished_arg.first_loop = false;
del_plucker(cc, tag, &worker);
}
done:
GRPC_SURFACE_TRACE_RETURNED_EVENT(cc, &ret);
- GRPC_CQ_INTERNAL_UNREF(cc, "pluck");
+ GRPC_CQ_INTERNAL_UNREF(&exec_ctx, cc, "pluck");
grpc_exec_ctx_finish(&exec_ctx);
GPR_ASSERT(is_finished_arg.stolen_completion == NULL);
@@ -813,26 +1055,48 @@ done:
return ret;
}
+grpc_event grpc_completion_queue_pluck(grpc_completion_queue *cc, void *tag,
+ gpr_timespec deadline, void *reserved) {
+ return cc->vtable->pluck(cc, tag, deadline, reserved);
+}
+
+/* Finishes the completion queue shutdown. This means that there are no more
+ completion events / tags expected from the completion queue
+ - Must be called under completion queue lock
+ - Must be called only once in completion queue's lifetime
+ - grpc_completion_queue_shutdown() MUST have been called before calling
+ this function */
+static void cq_finish_shutdown(grpc_exec_ctx *exec_ctx,
+ grpc_completion_queue *cc) {
+ cq_data *cqd = &cc->data;
+
+ GPR_ASSERT(cqd->shutdown_called);
+ GPR_ASSERT(!gpr_atm_no_barrier_load(&cqd->shutdown));
+ gpr_atm_no_barrier_store(&cqd->shutdown, 1);
+
+ cc->poller_vtable->shutdown(exec_ctx, POLLSET_FROM_CQ(cc),
+ &cqd->pollset_shutdown_done);
+}
+
/* Shutdown simply drops a ref that we reserved at creation time; if we drop
to zero here, then enter shutdown mode and wake up any waiters */
void grpc_completion_queue_shutdown(grpc_completion_queue *cc) {
grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
GPR_TIMER_BEGIN("grpc_completion_queue_shutdown", 0);
GRPC_API_TRACE("grpc_completion_queue_shutdown(cc=%p)", 1, (cc));
- gpr_mu_lock(cc->mu);
- if (cc->shutdown_called) {
- gpr_mu_unlock(cc->mu);
+ cq_data *cqd = &cc->data;
+
+ gpr_mu_lock(cqd->mu);
+ if (cqd->shutdown_called) {
+ gpr_mu_unlock(cqd->mu);
GPR_TIMER_END("grpc_completion_queue_shutdown", 0);
return;
}
- cc->shutdown_called = 1;
- if (gpr_unref(&cc->pending_events)) {
- GPR_ASSERT(!cc->shutdown);
- cc->shutdown = 1;
- cc->poller_vtable->shutdown(&exec_ctx, POLLSET_FROM_CQ(cc),
- &cc->pollset_shutdown_done);
+ cqd->shutdown_called = 1;
+ if (gpr_unref(&cqd->pending_events)) {
+ cq_finish_shutdown(&exec_ctx, cc);
}
- gpr_mu_unlock(cc->mu);
+ gpr_mu_unlock(cqd->mu);
grpc_exec_ctx_finish(&exec_ctx);
GPR_TIMER_END("grpc_completion_queue_shutdown", 0);
}
@@ -841,7 +1105,16 @@ void grpc_completion_queue_destroy(grpc_completion_queue *cc) {
GRPC_API_TRACE("grpc_completion_queue_destroy(cc=%p)", 1, (cc));
GPR_TIMER_BEGIN("grpc_completion_queue_destroy", 0);
grpc_completion_queue_shutdown(cc);
- GRPC_CQ_INTERNAL_UNREF(cc, "destroy");
+
+ /* TODO (sreek): This should not ideally be here. Refactor it into the
+ * cq_vtable (perhaps have a create/destroy methods in the cq vtable) */
+ if (cc->vtable->cq_completion_type == GRPC_CQ_NEXT) {
+ GPR_ASSERT(cq_event_queue_num_items(&cc->data.queue) == 0);
+ }
+
+ grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
+ GRPC_CQ_INTERNAL_UNREF(&exec_ctx, cc, "destroy");
+ grpc_exec_ctx_finish(&exec_ctx);
GPR_TIMER_END("grpc_completion_queue_destroy", 0);
}
@@ -853,22 +1126,12 @@ grpc_completion_queue *grpc_cq_from_pollset(grpc_pollset *ps) {
return CQ_FROM_POLLSET(ps);
}
-void grpc_cq_mark_non_listening_server_cq(grpc_completion_queue *cc) {
- /* TODO: sreek - use cc->polling_type field here and add a validation check
- (i.e grpc_cq_mark_non_listening_server_cq can only be called on a cc whose
- polling_type is set to GRPC_CQ_NON_LISTENING */
- cc->is_non_listening_server_cq = 1;
+void grpc_cq_mark_server_cq(grpc_completion_queue *cc) {
+ cc->data.is_server_cq = 1;
}
-bool grpc_cq_is_non_listening_server_cq(grpc_completion_queue *cc) {
- /* TODO (sreek) - return (cc->polling_type == GRPC_CQ_NON_LISTENING) */
- return (cc->is_non_listening_server_cq == 1);
-}
-
-void grpc_cq_mark_server_cq(grpc_completion_queue *cc) { cc->is_server_cq = 1; }
-
bool grpc_cq_is_server_cq(grpc_completion_queue *cc) {
- return cc->is_server_cq;
+ return cc->data.is_server_cq;
}
bool grpc_cq_can_listen(grpc_completion_queue *cc) {
diff --git a/src/core/lib/surface/completion_queue.h b/src/core/lib/surface/completion_queue.h
index a932087939..7963ea75e7 100644
--- a/src/core/lib/surface/completion_queue.h
+++ b/src/core/lib/surface/completion_queue.h
@@ -37,18 +37,25 @@
/* Internal API for completion queues */
#include <grpc/grpc.h>
+#include "src/core/lib/debug/trace.h"
#include "src/core/lib/iomgr/pollset.h"
/* These trace flags default to 1. The corresponding lines are only traced
if grpc_api_trace is also truthy */
-extern int grpc_cq_pluck_trace;
-extern int grpc_cq_event_timeout_trace;
-extern int grpc_trace_operation_failures;
+extern grpc_tracer_flag grpc_cq_pluck_trace;
+extern grpc_tracer_flag grpc_cq_event_timeout_trace;
+extern grpc_tracer_flag grpc_trace_operation_failures;
#ifndef NDEBUG
-extern int grpc_trace_pending_tags;
+extern grpc_tracer_flag grpc_trace_pending_tags;
+#endif
+
+#ifdef __cplusplus
+extern "C" {
#endif
typedef struct grpc_cq_completion {
+ gpr_mpscq_node node;
+
/** user supplied tag */
void *tag;
/** done callback - called when this queue element is no longer
@@ -65,17 +72,17 @@ typedef struct grpc_cq_completion {
#ifdef GRPC_CQ_REF_COUNT_DEBUG
void grpc_cq_internal_ref(grpc_completion_queue *cc, const char *reason,
const char *file, int line);
-void grpc_cq_internal_unref(grpc_completion_queue *cc, const char *reason,
- const char *file, int line);
+void grpc_cq_internal_unref(grpc_exec_ctx *exec_ctx, grpc_completion_queue *cc,
+ const char *reason, const char *file, int line);
#define GRPC_CQ_INTERNAL_REF(cc, reason) \
grpc_cq_internal_ref(cc, reason, __FILE__, __LINE__)
-#define GRPC_CQ_INTERNAL_UNREF(cc, reason) \
- grpc_cq_internal_unref(cc, reason, __FILE__, __LINE__)
+#define GRPC_CQ_INTERNAL_UNREF(ec, cc, reason) \
+ grpc_cq_internal_unref(ec, cc, reason, __FILE__, __LINE__)
#else
void grpc_cq_internal_ref(grpc_completion_queue *cc);
-void grpc_cq_internal_unref(grpc_completion_queue *cc);
+void grpc_cq_internal_unref(grpc_exec_ctx *exec_ctx, grpc_completion_queue *cc);
#define GRPC_CQ_INTERNAL_REF(cc, reason) grpc_cq_internal_ref(cc)
-#define GRPC_CQ_INTERNAL_UNREF(cc, reason) grpc_cq_internal_unref(cc)
+#define GRPC_CQ_INTERNAL_UNREF(ec, cc, reason) grpc_cq_internal_unref(ec, cc)
#endif
/* Flag that an operation is beginning: the completion channel will not finish
@@ -100,7 +107,13 @@ bool grpc_cq_can_listen(grpc_completion_queue *cc);
grpc_cq_completion_type grpc_get_cq_completion_type(grpc_completion_queue *cc);
+int grpc_get_cq_poll_num(grpc_completion_queue *cc);
+
grpc_completion_queue *grpc_completion_queue_create_internal(
grpc_cq_completion_type completion_type, grpc_cq_polling_type polling_type);
+#ifdef __cplusplus
+}
+#endif
+
#endif /* GRPC_CORE_LIB_SURFACE_COMPLETION_QUEUE_H */
diff --git a/src/core/lib/surface/init.c b/src/core/lib/surface/init.c
index 4b381b1954..6163776152 100644
--- a/src/core/lib/surface/init.c
+++ b/src/core/lib/surface/init.c
@@ -145,10 +145,8 @@ void grpc_init(void) {
grpc_register_tracer("server_channel", &grpc_server_channel_trace);
grpc_register_tracer("bdp_estimator", &grpc_bdp_estimator_trace);
// Default pluck trace to 1
- grpc_cq_pluck_trace = 1;
grpc_register_tracer("queue_timeout", &grpc_cq_event_timeout_trace);
// Default timeout trace to 1
- grpc_cq_event_timeout_trace = 1;
grpc_register_tracer("op_failure", &grpc_trace_operation_failures);
grpc_register_tracer("resource_quota", &grpc_resource_quota_trace);
grpc_register_tracer("call_error", &grpc_call_error_trace);
@@ -173,6 +171,7 @@ void grpc_init(void) {
grpc_tracer_init("GRPC_TRACE");
/* no more changes to channel init pipelines */
grpc_channel_init_finalize();
+ grpc_iomgr_start();
}
gpr_mu_unlock(&g_init_mu);
GRPC_API_TRACE("grpc_init(void)", 0, ());
diff --git a/src/core/lib/surface/server.c b/src/core/lib/surface/server.c
index 934ca0431a..7e4ae421a0 100644
--- a/src/core/lib/surface/server.c
+++ b/src/core/lib/surface/server.c
@@ -47,7 +47,8 @@
#include "src/core/lib/iomgr/executor.h"
#include "src/core/lib/iomgr/iomgr.h"
#include "src/core/lib/slice/slice_internal.h"
-#include "src/core/lib/support/stack_lockfree.h"
+#include "src/core/lib/support/mpscq.h"
+#include "src/core/lib/support/spinlock.h"
#include "src/core/lib/support/string.h"
#include "src/core/lib/surface/api_trace.h"
#include "src/core/lib/surface/call.h"
@@ -73,9 +74,10 @@ typedef struct registered_method registered_method;
typedef enum { BATCH_CALL, REGISTERED_CALL } requested_call_type;
-int grpc_server_channel_trace = 0;
+grpc_tracer_flag grpc_server_channel_trace = GRPC_TRACER_INITIALIZER(false);
typedef struct requested_call {
+ gpr_mpscq_node request_link; /* must be first */
requested_call_type type;
size_t cq_idx;
void *tag;
@@ -175,7 +177,7 @@ struct request_matcher {
grpc_server *server;
call_data *pending_head;
call_data *pending_tail;
- gpr_stack_lockfree **requests_per_cq;
+ gpr_locked_mpscq *requests_per_cq;
};
struct registered_method {
@@ -220,11 +222,6 @@ struct grpc_server {
registered_method *registered_methods;
/** one request matcher for unregistered methods */
request_matcher unregistered_request_matcher;
- /** free list of available requested_calls_per_cq indices */
- gpr_stack_lockfree **request_freelist_per_cq;
- /** requested call backing data */
- requested_call **requested_calls_per_cq;
- int max_requested_calls_per_cq;
gpr_atm shutdown_flag;
uint8_t shutdown_published;
@@ -324,21 +321,20 @@ static void channel_broadcaster_shutdown(grpc_exec_ctx *exec_ctx,
* request_matcher
*/
-static void request_matcher_init(request_matcher *rm, size_t entries,
- grpc_server *server) {
+static void request_matcher_init(request_matcher *rm, grpc_server *server) {
memset(rm, 0, sizeof(*rm));
rm->server = server;
rm->requests_per_cq =
gpr_malloc(sizeof(*rm->requests_per_cq) * server->cq_count);
for (size_t i = 0; i < server->cq_count; i++) {
- rm->requests_per_cq[i] = gpr_stack_lockfree_create(entries);
+ gpr_locked_mpscq_init(&rm->requests_per_cq[i]);
}
}
static void request_matcher_destroy(request_matcher *rm) {
for (size_t i = 0; i < rm->server->cq_count; i++) {
- GPR_ASSERT(gpr_stack_lockfree_pop(rm->requests_per_cq[i]) == -1);
- gpr_stack_lockfree_destroy(rm->requests_per_cq[i]);
+ GPR_ASSERT(gpr_locked_mpscq_pop(&rm->requests_per_cq[i]) == NULL);
+ gpr_locked_mpscq_destroy(&rm->requests_per_cq[i]);
}
gpr_free(rm->requests_per_cq);
}
@@ -368,13 +364,17 @@ static void request_matcher_kill_requests(grpc_exec_ctx *exec_ctx,
grpc_server *server,
request_matcher *rm,
grpc_error *error) {
- int request_id;
+ requested_call *rc;
for (size_t i = 0; i < server->cq_count; i++) {
- while ((request_id = gpr_stack_lockfree_pop(rm->requests_per_cq[i])) !=
- -1) {
- fail_call(exec_ctx, server, i,
- &server->requested_calls_per_cq[i][request_id],
- GRPC_ERROR_REF(error));
+ /* Here we know:
+ 1. no requests are being added (since the server is shut down)
+ 2. no other threads are pulling (since the shut down process is single
+ threaded)
+ So, we can ignore the queue lock and just pop, with the guarantee that a
+ NULL returned here truly means that the queue is empty */
+ while ((rc = (requested_call *)gpr_mpscq_pop(
+ &rm->requests_per_cq[i].queue)) != NULL) {
+ fail_call(exec_ctx, server, i, rc, GRPC_ERROR_REF(error));
}
}
GRPC_ERROR_UNREF(error);
@@ -408,14 +408,8 @@ static void server_delete(grpc_exec_ctx *exec_ctx, grpc_server *server) {
request_matcher_destroy(&server->unregistered_request_matcher);
}
for (i = 0; i < server->cq_count; i++) {
- GRPC_CQ_INTERNAL_UNREF(server->cqs[i], "server");
- if (server->started) {
- gpr_stack_lockfree_destroy(server->request_freelist_per_cq[i]);
- gpr_free(server->requested_calls_per_cq[i]);
- }
+ GRPC_CQ_INTERNAL_UNREF(exec_ctx, server->cqs[i], "server");
}
- gpr_free(server->request_freelist_per_cq);
- gpr_free(server->requested_calls_per_cq);
gpr_free(server->cqs);
gpr_free(server->pollsets);
gpr_free(server->shutdown_tags);
@@ -456,7 +450,7 @@ static void destroy_channel(grpc_exec_ctx *exec_ctx, channel_data *chand,
grpc_closure_init(&chand->finish_destroy_channel_closure,
finish_destroy_channel, chand, grpc_schedule_on_exec_ctx);
- if (grpc_server_channel_trace && error != GRPC_ERROR_NONE) {
+ if (GRPC_TRACER_ON(grpc_server_channel_trace) && error != GRPC_ERROR_NONE) {
const char *msg = grpc_error_string(error);
gpr_log(GPR_INFO, "Disconnected client: %s", msg);
}
@@ -473,21 +467,7 @@ static void destroy_channel(grpc_exec_ctx *exec_ctx, channel_data *chand,
static void done_request_event(grpc_exec_ctx *exec_ctx, void *req,
grpc_cq_completion *c) {
- requested_call *rc = req;
- grpc_server *server = rc->server;
-
- if (rc >= server->requested_calls_per_cq[rc->cq_idx] &&
- rc < server->requested_calls_per_cq[rc->cq_idx] +
- server->max_requested_calls_per_cq) {
- GPR_ASSERT(rc - server->requested_calls_per_cq[rc->cq_idx] <= INT_MAX);
- gpr_stack_lockfree_push(
- server->request_freelist_per_cq[rc->cq_idx],
- (int)(rc - server->requested_calls_per_cq[rc->cq_idx]));
- } else {
- gpr_free(req);
- }
-
- server_unref(exec_ctx, server);
+ gpr_free(req);
}
static void publish_call(grpc_exec_ctx *exec_ctx, grpc_server *server,
@@ -516,10 +496,6 @@ static void publish_call(grpc_exec_ctx *exec_ctx, grpc_server *server,
GPR_UNREACHABLE_CODE(return );
}
- grpc_call_element *elem =
- grpc_call_stack_element(grpc_call_get_call_stack(call), 0);
- channel_data *chand = elem->channel_data;
- server_ref(chand->server);
grpc_cq_end_op(exec_ctx, calld->cq_new, rc->tag, GRPC_ERROR_NONE,
done_request_event, rc, &rc->completion);
}
@@ -547,15 +523,15 @@ static void publish_new_rpc(grpc_exec_ctx *exec_ctx, void *arg,
for (size_t i = 0; i < server->cq_count; i++) {
size_t cq_idx = (chand->cq_idx + i) % server->cq_count;
- int request_id = gpr_stack_lockfree_pop(rm->requests_per_cq[cq_idx]);
- if (request_id == -1) {
+ requested_call *rc =
+ (requested_call *)gpr_locked_mpscq_pop(&rm->requests_per_cq[cq_idx]);
+ if (rc == NULL) {
continue;
} else {
gpr_mu_lock(&calld->mu_state);
calld->state = ACTIVATED;
gpr_mu_unlock(&calld->mu_state);
- publish_call(exec_ctx, server, calld, cq_idx,
- &server->requested_calls_per_cq[cq_idx][request_id]);
+ publish_call(exec_ctx, server, calld, cq_idx, rc);
return; /* early out */
}
}
@@ -1029,8 +1005,6 @@ grpc_server *grpc_server_create(const grpc_channel_args *args, void *reserved) {
server->root_channel_data.next = server->root_channel_data.prev =
&server->root_channel_data;
- /* TODO(ctiller): expose a channel_arg for this */
- server->max_requested_calls_per_cq = 32768;
server->channel_args = grpc_channel_args_copy(args);
return server;
@@ -1103,29 +1077,15 @@ void grpc_server_start(grpc_server *server) {
server->started = true;
server->pollset_count = 0;
server->pollsets = gpr_malloc(sizeof(grpc_pollset *) * server->cq_count);
- server->request_freelist_per_cq =
- gpr_malloc(sizeof(*server->request_freelist_per_cq) * server->cq_count);
- server->requested_calls_per_cq =
- gpr_malloc(sizeof(*server->requested_calls_per_cq) * server->cq_count);
for (i = 0; i < server->cq_count; i++) {
if (grpc_cq_can_listen(server->cqs[i])) {
server->pollsets[server->pollset_count++] =
grpc_cq_pollset(server->cqs[i]);
}
- server->request_freelist_per_cq[i] =
- gpr_stack_lockfree_create((size_t)server->max_requested_calls_per_cq);
- for (int j = 0; j < server->max_requested_calls_per_cq; j++) {
- gpr_stack_lockfree_push(server->request_freelist_per_cq[i], j);
- }
- server->requested_calls_per_cq[i] =
- gpr_malloc((size_t)server->max_requested_calls_per_cq *
- sizeof(*server->requested_calls_per_cq[i]));
}
- request_matcher_init(&server->unregistered_request_matcher,
- (size_t)server->max_requested_calls_per_cq, server);
+ request_matcher_init(&server->unregistered_request_matcher, server);
for (registered_method *rm = server->registered_methods; rm; rm = rm->next) {
- request_matcher_init(&rm->request_matcher,
- (size_t)server->max_requested_calls_per_cq, server);
+ request_matcher_init(&rm->request_matcher, server);
}
server_ref(server);
@@ -1379,21 +1339,11 @@ static grpc_call_error queue_call_request(grpc_exec_ctx *exec_ctx,
requested_call *rc) {
call_data *calld = NULL;
request_matcher *rm = NULL;
- int request_id;
if (gpr_atm_acq_load(&server->shutdown_flag)) {
fail_call(exec_ctx, server, cq_idx, rc,
GRPC_ERROR_CREATE_FROM_STATIC_STRING("Server Shutdown"));
return GRPC_CALL_OK;
}
- request_id = gpr_stack_lockfree_pop(server->request_freelist_per_cq[cq_idx]);
- if (request_id == -1) {
- /* out of request ids: just fail this one */
- fail_call(exec_ctx, server, cq_idx, rc,
- grpc_error_set_int(
- GRPC_ERROR_CREATE_FROM_STATIC_STRING("Out of request ids"),
- GRPC_ERROR_INT_LIMIT, server->max_requested_calls_per_cq));
- return GRPC_CALL_OK;
- }
switch (rc->type) {
case BATCH_CALL:
rm = &server->unregistered_request_matcher;
@@ -1402,15 +1352,13 @@ static grpc_call_error queue_call_request(grpc_exec_ctx *exec_ctx,
rm = &rc->data.registered.registered_method->request_matcher;
break;
}
- server->requested_calls_per_cq[cq_idx][request_id] = *rc;
- gpr_free(rc);
- if (gpr_stack_lockfree_push(rm->requests_per_cq[cq_idx], request_id)) {
+ if (gpr_locked_mpscq_push(&rm->requests_per_cq[cq_idx], &rc->request_link)) {
/* this was the first queued request: we need to lock and start
matching calls */
gpr_mu_lock(&server->mu_call);
while ((calld = rm->pending_head) != NULL) {
- request_id = gpr_stack_lockfree_pop(rm->requests_per_cq[cq_idx]);
- if (request_id == -1) break;
+ rc = (requested_call *)gpr_locked_mpscq_pop(&rm->requests_per_cq[cq_idx]);
+ if (rc == NULL) break;
rm->pending_head = calld->pending_next;
gpr_mu_unlock(&server->mu_call);
gpr_mu_lock(&calld->mu_state);
@@ -1426,8 +1374,7 @@ static grpc_call_error queue_call_request(grpc_exec_ctx *exec_ctx,
GPR_ASSERT(calld->state == PENDING);
calld->state = ACTIVATED;
gpr_mu_unlock(&calld->mu_state);
- publish_call(exec_ctx, server, calld, cq_idx,
- &server->requested_calls_per_cq[cq_idx][request_id]);
+ publish_call(exec_ctx, server, calld, cq_idx, rc);
}
gpr_mu_lock(&server->mu_call);
}
@@ -1534,7 +1481,6 @@ static void fail_call(grpc_exec_ctx *exec_ctx, grpc_server *server,
rc->initial_metadata->count = 0;
GPR_ASSERT(error != GRPC_ERROR_NONE);
- server_ref(server);
grpc_cq_end_op(exec_ctx, server->cqs[cq_idx], rc->tag, error,
done_request_event, rc, &rc->completion);
}
diff --git a/src/core/lib/surface/server.h b/src/core/lib/surface/server.h
index a85d9f4964..cd2fca0fe0 100644
--- a/src/core/lib/surface/server.h
+++ b/src/core/lib/surface/server.h
@@ -36,12 +36,13 @@
#include <grpc/grpc.h>
#include "src/core/lib/channel/channel_stack.h"
+#include "src/core/lib/debug/trace.h"
#include "src/core/lib/transport/transport.h"
extern const grpc_channel_filter grpc_server_top_filter;
/** Lightweight tracing of server channel state */
-extern int grpc_server_channel_trace;
+extern grpc_tracer_flag grpc_server_channel_trace;
/* Add a listener to the server: when the server starts, it will call start,
and when it shuts down, it will call destroy */
diff --git a/src/core/lib/transport/bdp_estimator.c b/src/core/lib/transport/bdp_estimator.c
index e1483677fd..e3a82b492a 100644
--- a/src/core/lib/transport/bdp_estimator.c
+++ b/src/core/lib/transport/bdp_estimator.c
@@ -38,12 +38,13 @@
#include <grpc/support/log.h>
#include <grpc/support/useful.h>
-int grpc_bdp_estimator_trace = 0;
+grpc_tracer_flag grpc_bdp_estimator_trace = GRPC_TRACER_INITIALIZER(false);
void grpc_bdp_estimator_init(grpc_bdp_estimator *estimator, const char *name) {
estimator->estimate = 65536;
estimator->ping_state = GRPC_BDP_PING_UNSCHEDULED;
estimator->name = name;
+ estimator->bw_est = 0;
}
bool grpc_bdp_estimator_get_estimate(grpc_bdp_estimator *estimator,
@@ -52,6 +53,11 @@ bool grpc_bdp_estimator_get_estimate(grpc_bdp_estimator *estimator,
return true;
}
+bool grpc_bdp_estimator_get_bw(grpc_bdp_estimator *estimator, double *bw) {
+ *bw = estimator->bw_est;
+ return true;
+}
+
bool grpc_bdp_estimator_add_incoming_bytes(grpc_bdp_estimator *estimator,
int64_t num_bytes) {
estimator->accumulator += num_bytes;
@@ -67,7 +73,7 @@ bool grpc_bdp_estimator_add_incoming_bytes(grpc_bdp_estimator *estimator,
}
void grpc_bdp_estimator_schedule_ping(grpc_bdp_estimator *estimator) {
- if (grpc_bdp_estimator_trace) {
+ if (GRPC_TRACER_ON(grpc_bdp_estimator_trace)) {
gpr_log(GPR_DEBUG, "bdp[%s]:sched acc=%" PRId64 " est=%" PRId64,
estimator->name, estimator->accumulator, estimator->estimate);
}
@@ -77,24 +83,34 @@ void grpc_bdp_estimator_schedule_ping(grpc_bdp_estimator *estimator) {
}
void grpc_bdp_estimator_start_ping(grpc_bdp_estimator *estimator) {
- if (grpc_bdp_estimator_trace) {
+ if (GRPC_TRACER_ON(grpc_bdp_estimator_trace)) {
gpr_log(GPR_DEBUG, "bdp[%s]:start acc=%" PRId64 " est=%" PRId64,
estimator->name, estimator->accumulator, estimator->estimate);
}
GPR_ASSERT(estimator->ping_state == GRPC_BDP_PING_SCHEDULED);
estimator->ping_state = GRPC_BDP_PING_STARTED;
estimator->accumulator = 0;
+ estimator->ping_start_time = gpr_now(GPR_CLOCK_MONOTONIC);
}
void grpc_bdp_estimator_complete_ping(grpc_bdp_estimator *estimator) {
- if (grpc_bdp_estimator_trace) {
- gpr_log(GPR_DEBUG, "bdp[%s]:complete acc=%" PRId64 " est=%" PRId64,
- estimator->name, estimator->accumulator, estimator->estimate);
+ gpr_timespec dt_ts =
+ gpr_time_sub(gpr_now(GPR_CLOCK_MONOTONIC), estimator->ping_start_time);
+ double dt = (double)dt_ts.tv_sec + 1e-9 * (double)dt_ts.tv_nsec;
+ double bw = dt > 0 ? ((double)estimator->accumulator / dt) : 0;
+ if (GRPC_TRACER_ON(grpc_bdp_estimator_trace)) {
+ gpr_log(GPR_DEBUG, "bdp[%s]:complete acc=%" PRId64 " est=%" PRId64
+ " dt=%lf bw=%lfMbs bw_est=%lfMbs",
+ estimator->name, estimator->accumulator, estimator->estimate, dt,
+ bw / 125000.0, estimator->bw_est / 125000.0);
}
GPR_ASSERT(estimator->ping_state == GRPC_BDP_PING_STARTED);
- if (estimator->accumulator > 2 * estimator->estimate / 3) {
- estimator->estimate *= 2;
- if (grpc_bdp_estimator_trace) {
+ if (estimator->accumulator > 2 * estimator->estimate / 3 &&
+ bw > estimator->bw_est) {
+ estimator->estimate =
+ GPR_MAX(estimator->accumulator, estimator->estimate * 2);
+ estimator->bw_est = bw;
+ if (GRPC_TRACER_ON(grpc_bdp_estimator_trace)) {
gpr_log(GPR_DEBUG, "bdp[%s]: estimate increased to %" PRId64,
estimator->name, estimator->estimate);
}
diff --git a/src/core/lib/transport/bdp_estimator.h b/src/core/lib/transport/bdp_estimator.h
index df8d1f6fc0..b9a7fc84bb 100644
--- a/src/core/lib/transport/bdp_estimator.h
+++ b/src/core/lib/transport/bdp_estimator.h
@@ -34,13 +34,15 @@
#ifndef GRPC_CORE_LIB_TRANSPORT_BDP_ESTIMATOR_H
#define GRPC_CORE_LIB_TRANSPORT_BDP_ESTIMATOR_H
+#include <grpc/support/time.h>
#include <stdbool.h>
#include <stdint.h>
+#include "src/core/lib/debug/trace.h"
#define GRPC_BDP_SAMPLES 16
#define GRPC_BDP_MIN_SAMPLES_FOR_ESTIMATE 3
-extern int grpc_bdp_estimator_trace;
+extern grpc_tracer_flag grpc_bdp_estimator_trace;
typedef enum {
GRPC_BDP_PING_UNSCHEDULED,
@@ -52,6 +54,8 @@ typedef struct grpc_bdp_estimator {
grpc_bdp_estimator_ping_state ping_state;
int64_t accumulator;
int64_t estimate;
+ gpr_timespec ping_start_time;
+ double bw_est;
const char *name;
} grpc_bdp_estimator;
@@ -60,6 +64,8 @@ void grpc_bdp_estimator_init(grpc_bdp_estimator *estimator, const char *name);
// Returns true if a reasonable estimate could be obtained
bool grpc_bdp_estimator_get_estimate(grpc_bdp_estimator *estimator,
int64_t *estimate);
+// Returns true if a reasonable estimate could be obtained
+bool grpc_bdp_estimator_get_bw(grpc_bdp_estimator *estimator, double *bw);
// Returns true if the user should schedule a ping
bool grpc_bdp_estimator_add_incoming_bytes(grpc_bdp_estimator *estimator,
int64_t num_bytes);
diff --git a/src/core/lib/transport/connectivity_state.c b/src/core/lib/transport/connectivity_state.c
index 3757b25267..e30cd523fa 100644
--- a/src/core/lib/transport/connectivity_state.c
+++ b/src/core/lib/transport/connectivity_state.c
@@ -39,7 +39,7 @@
#include <grpc/support/log.h>
#include <grpc/support/string_util.h>
-int grpc_connectivity_state_trace = 0;
+grpc_tracer_flag grpc_connectivity_state_trace = GRPC_TRACER_INITIALIZER(false);
const char *grpc_connectivity_state_name(grpc_connectivity_state state) {
switch (state) {
@@ -94,7 +94,7 @@ grpc_connectivity_state grpc_connectivity_state_check(
grpc_connectivity_state cur =
(grpc_connectivity_state)gpr_atm_no_barrier_load(
&tracker->current_state_atm);
- if (grpc_connectivity_state_trace) {
+ if (GRPC_TRACER_ON(grpc_connectivity_state_trace)) {
gpr_log(GPR_DEBUG, "CONWATCH: %p %s: get %s", tracker, tracker->name,
grpc_connectivity_state_name(cur));
}
@@ -106,7 +106,7 @@ grpc_connectivity_state grpc_connectivity_state_get(
grpc_connectivity_state cur =
(grpc_connectivity_state)gpr_atm_no_barrier_load(
&tracker->current_state_atm);
- if (grpc_connectivity_state_trace) {
+ if (GRPC_TRACER_ON(grpc_connectivity_state_trace)) {
gpr_log(GPR_DEBUG, "CONWATCH: %p %s: get %s", tracker, tracker->name,
grpc_connectivity_state_name(cur));
}
@@ -127,7 +127,7 @@ bool grpc_connectivity_state_notify_on_state_change(
grpc_connectivity_state cur =
(grpc_connectivity_state)gpr_atm_no_barrier_load(
&tracker->current_state_atm);
- if (grpc_connectivity_state_trace) {
+ if (GRPC_TRACER_ON(grpc_connectivity_state_trace)) {
if (current == NULL) {
gpr_log(GPR_DEBUG, "CONWATCH: %p %s: unsubscribe notify=%p", tracker,
tracker->name, notify);
@@ -180,7 +180,7 @@ void grpc_connectivity_state_set(grpc_exec_ctx *exec_ctx,
(grpc_connectivity_state)gpr_atm_no_barrier_load(
&tracker->current_state_atm);
grpc_connectivity_state_watcher *w;
- if (grpc_connectivity_state_trace) {
+ if (GRPC_TRACER_ON(grpc_connectivity_state_trace)) {
const char *error_string = grpc_error_string(error);
gpr_log(GPR_DEBUG, "SET: %p %s: %s --> %s [%s] error=%p %s", tracker,
tracker->name, grpc_connectivity_state_name(cur),
@@ -208,7 +208,7 @@ void grpc_connectivity_state_set(grpc_exec_ctx *exec_ctx,
while ((w = tracker->watchers) != NULL) {
*w->current = state;
tracker->watchers = w->next;
- if (grpc_connectivity_state_trace) {
+ if (GRPC_TRACER_ON(grpc_connectivity_state_trace)) {
gpr_log(GPR_DEBUG, "NOTIFY: %p %s: %p", tracker, tracker->name,
w->notify);
}
diff --git a/src/core/lib/transport/connectivity_state.h b/src/core/lib/transport/connectivity_state.h
index c9604c34dd..cdc2930c11 100644
--- a/src/core/lib/transport/connectivity_state.h
+++ b/src/core/lib/transport/connectivity_state.h
@@ -35,6 +35,7 @@
#define GRPC_CORE_LIB_TRANSPORT_CONNECTIVITY_STATE_H
#include <grpc/grpc.h>
+#include "src/core/lib/debug/trace.h"
#include "src/core/lib/iomgr/exec_ctx.h"
typedef struct grpc_connectivity_state_watcher {
@@ -57,7 +58,7 @@ typedef struct {
char *name;
} grpc_connectivity_state_tracker;
-extern int grpc_connectivity_state_trace;
+extern grpc_tracer_flag grpc_connectivity_state_trace;
/** enum --> string conversion */
const char *grpc_connectivity_state_name(grpc_connectivity_state state);
diff --git a/src/core/plugin_registry/grpc_plugin_registry.c b/src/core/plugin_registry/grpc_plugin_registry.c
index 25bda7a262..510cf5d5a0 100644
--- a/src/core/plugin_registry/grpc_plugin_registry.c
+++ b/src/core/plugin_registry/grpc_plugin_registry.c
@@ -61,6 +61,8 @@ extern void grpc_max_age_filter_init(void);
extern void grpc_max_age_filter_shutdown(void);
extern void grpc_message_size_filter_init(void);
extern void grpc_message_size_filter_shutdown(void);
+extern void grpc_workaround_cronet_compression_filter_init(void);
+extern void grpc_workaround_cronet_compression_filter_shutdown(void);
void grpc_register_built_in_plugins(void) {
grpc_register_plugin(grpc_http_filters_init,
@@ -91,4 +93,6 @@ void grpc_register_built_in_plugins(void) {
grpc_max_age_filter_shutdown);
grpc_register_plugin(grpc_message_size_filter_init,
grpc_message_size_filter_shutdown);
+ grpc_register_plugin(grpc_workaround_cronet_compression_filter_init,
+ grpc_workaround_cronet_compression_filter_shutdown);
}
diff --git a/src/core/plugin_registry/grpc_unsecure_plugin_registry.c b/src/core/plugin_registry/grpc_unsecure_plugin_registry.c
index 05d4771bce..e5eb68f934 100644
--- a/src/core/plugin_registry/grpc_unsecure_plugin_registry.c
+++ b/src/core/plugin_registry/grpc_unsecure_plugin_registry.c
@@ -61,6 +61,8 @@ extern void grpc_max_age_filter_init(void);
extern void grpc_max_age_filter_shutdown(void);
extern void grpc_message_size_filter_init(void);
extern void grpc_message_size_filter_shutdown(void);
+extern void grpc_workaround_cronet_compression_filter_init(void);
+extern void grpc_workaround_cronet_compression_filter_shutdown(void);
void grpc_register_built_in_plugins(void) {
grpc_register_plugin(grpc_http_filters_init,
@@ -91,4 +93,6 @@ void grpc_register_built_in_plugins(void) {
grpc_max_age_filter_shutdown);
grpc_register_plugin(grpc_message_size_filter_init,
grpc_message_size_filter_shutdown);
+ grpc_register_plugin(grpc_workaround_cronet_compression_filter_init,
+ grpc_workaround_cronet_compression_filter_shutdown);
}
diff --git a/src/core/tsi/fake_transport_security.c b/src/core/tsi/fake_transport_security.c
index 1836beefc4..4925d19f96 100644
--- a/src/core/tsi/fake_transport_security.c
+++ b/src/core/tsi/fake_transport_security.c
@@ -396,7 +396,7 @@ static tsi_result fake_handshaker_get_bytes_to_send_to_peer(
if (next_message_to_send > TSI_FAKE_HANDSHAKE_MESSAGE_MAX) {
next_message_to_send = TSI_FAKE_HANDSHAKE_MESSAGE_MAX;
}
- if (tsi_tracing_enabled) {
+ if (GRPC_TRACER_ON(tsi_tracing_enabled)) {
gpr_log(GPR_INFO, "%s prepared %s.",
impl->is_client ? "Client" : "Server",
tsi_fake_handshake_message_to_string(impl->next_message_to_send));
@@ -408,7 +408,7 @@ static tsi_result fake_handshaker_get_bytes_to_send_to_peer(
if (!impl->is_client &&
impl->next_message_to_send == TSI_FAKE_HANDSHAKE_MESSAGE_MAX) {
/* We're done. */
- if (tsi_tracing_enabled) {
+ if (GRPC_TRACER_ON(tsi_tracing_enabled)) {
gpr_log(GPR_INFO, "Server is done.");
}
impl->result = TSI_OK;
@@ -445,7 +445,7 @@ static tsi_result fake_handshaker_process_bytes_from_peer(
tsi_fake_handshake_message_to_string(received_msg),
tsi_fake_handshake_message_to_string(expected_msg));
}
- if (tsi_tracing_enabled) {
+ if (GRPC_TRACER_ON(tsi_tracing_enabled)) {
gpr_log(GPR_INFO, "%s received %s.", impl->is_client ? "Client" : "Server",
tsi_fake_handshake_message_to_string(received_msg));
}
@@ -453,7 +453,7 @@ static tsi_result fake_handshaker_process_bytes_from_peer(
impl->needs_incoming_message = 0;
if (impl->next_message_to_send == TSI_FAKE_HANDSHAKE_MESSAGE_MAX) {
/* We're done. */
- if (tsi_tracing_enabled) {
+ if (GRPC_TRACER_ON(tsi_tracing_enabled)) {
gpr_log(GPR_INFO, "%s is done.", impl->is_client ? "Client" : "Server");
}
impl->result = TSI_OK;
diff --git a/src/core/tsi/ssl_transport_security.c b/src/core/tsi/ssl_transport_security.c
index e1d634a1fa..59fd2b1c93 100644
--- a/src/core/tsi/ssl_transport_security.c
+++ b/src/core/tsi/ssl_transport_security.c
@@ -180,7 +180,7 @@ static const char *ssl_error_string(int error) {
/* TODO(jboeuf): Remove when we are past the debugging phase with this code. */
static void ssl_log_where_info(const SSL *ssl, int where, int flag,
const char *msg) {
- if ((where & flag) && tsi_tracing_enabled) {
+ if ((where & flag) && GRPC_TRACER_ON(tsi_tracing_enabled)) {
gpr_log(GPR_INFO, "%20.20s - %30.30s - %5.10s", msg,
SSL_state_string_long(ssl), SSL_state_string(ssl));
}
diff --git a/src/core/tsi/transport_security.c b/src/core/tsi/transport_security.c
index b11c00c43c..4efcf8f43d 100644
--- a/src/core/tsi/transport_security.c
+++ b/src/core/tsi/transport_security.c
@@ -41,7 +41,7 @@
/* --- Tracing. --- */
-int tsi_tracing_enabled = 0;
+grpc_tracer_flag tsi_tracing_enabled = GRPC_TRACER_INITIALIZER(false);
/* --- tsi_result common implementation. --- */
diff --git a/src/core/tsi/transport_security.h b/src/core/tsi/transport_security.h
index a4c9cbc001..2422f92076 100644
--- a/src/core/tsi/transport_security.h
+++ b/src/core/tsi/transport_security.h
@@ -36,13 +36,14 @@
#include <stdbool.h>
+#include "src/core/lib/debug/trace.h"
#include "src/core/tsi/transport_security_interface.h"
#ifdef __cplusplus
extern "C" {
#endif
-extern int tsi_tracing_enabled;
+extern grpc_tracer_flag tsi_tracing_enabled;
/* Base for tsi_frame_protector implementations.
See transport_security_interface.h for documentation. */
diff --git a/src/core/tsi/transport_security_interface.h b/src/core/tsi/transport_security_interface.h
index f2112b62b6..8a3fff6a17 100644
--- a/src/core/tsi/transport_security_interface.h
+++ b/src/core/tsi/transport_security_interface.h
@@ -37,6 +37,8 @@
#include <stdint.h>
#include <stdlib.h>
+#include "src/core/lib/debug/trace.h"
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -73,8 +75,7 @@ const char *tsi_result_to_string(tsi_result result);
/* --- tsi tracing --- */
-/* Set this early to avoid races */
-extern int tsi_tracing_enabled;
+extern grpc_tracer_flag tsi_tracing_enabled;
/* --- tsi_frame_protector object ---