diff options
author | David G. Quintas <dgq@google.com> | 2017-11-03 15:05:23 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-11-03 15:05:23 +0100 |
commit | ef68fe7239a89095f1eaa89c1dd28b2b7be2a3c7 (patch) | |
tree | 34f22ce323db985f5599943e2cbe8a4bf80cfe6c /src | |
parent | fb57c665c643508cf1593ee33e7544b7f2447a86 (diff) | |
parent | eca25f374681ee19fb02bf1bb6f765009242fee4 (diff) |
Merge pull request #13121 from dgquintas/backoff_spec
Improvements to C Core's backoff code and API
Diffstat (limited to 'src')
6 files changed, 101 insertions, 73 deletions
diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc index 85e76e68b5..03116b420c 100644 --- a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc +++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc @@ -1266,7 +1266,8 @@ static void maybe_restart_lb_call(grpc_exec_ctx *exec_ctx, } else if (!glb_policy->shutting_down) { /* if we aren't shutting down, restart the LB client call after some time */ grpc_millis next_try = - grpc_backoff_step(exec_ctx, &glb_policy->lb_call_backoff_state); + grpc_backoff_step(exec_ctx, &glb_policy->lb_call_backoff_state) + .next_attempt_start_time; if (GRPC_TRACER_ON(grpc_lb_glb_trace)) { gpr_log(GPR_DEBUG, "Connection to LB server lost (grpclb: %p)...", (void *)glb_policy); @@ -1431,7 +1432,7 @@ static void lb_call_init_locked(grpc_exec_ctx *exec_ctx, grpc_combiner_scheduler(glb_policy->base.combiner)); grpc_backoff_init(&glb_policy->lb_call_backoff_state, - GRPC_GRPCLB_INITIAL_CONNECT_BACKOFF_SECONDS, + GRPC_GRPCLB_INITIAL_CONNECT_BACKOFF_SECONDS * 1000, GRPC_GRPCLB_RECONNECT_BACKOFF_MULTIPLIER, GRPC_GRPCLB_RECONNECT_JITTER, GRPC_GRPCLB_MIN_CONNECT_TIMEOUT_SECONDS * 1000, diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc b/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc index 5f7ab987cb..a1ddaee499 100644 --- a/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc +++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc @@ -271,7 +271,8 @@ static void dns_ares_on_resolved_locked(grpc_exec_ctx *exec_ctx, void *arg, } else { const char *msg = grpc_error_string(error); gpr_log(GPR_DEBUG, "dns resolution failed: %s", msg); - grpc_millis next_try = grpc_backoff_step(exec_ctx, &r->backoff_state); + grpc_millis next_try = + grpc_backoff_step(exec_ctx, &r->backoff_state).next_attempt_start_time; grpc_millis timeout = next_try - grpc_exec_ctx_now(exec_ctx); gpr_log(GPR_INFO, "dns resolution failed (will retry): %s", grpc_error_string(error)); @@ -379,11 +380,11 @@ static grpc_resolver *dns_ares_create(grpc_exec_ctx *exec_ctx, grpc_pollset_set_add_pollset_set(exec_ctx, r->interested_parties, args->pollset_set); } - grpc_backoff_init(&r->backoff_state, GRPC_DNS_INITIAL_CONNECT_BACKOFF_SECONDS, - GRPC_DNS_RECONNECT_BACKOFF_MULTIPLIER, - GRPC_DNS_RECONNECT_JITTER, - GRPC_DNS_MIN_CONNECT_TIMEOUT_SECONDS * 1000, - GRPC_DNS_RECONNECT_MAX_BACKOFF_SECONDS * 1000); + grpc_backoff_init( + &r->backoff_state, GRPC_DNS_INITIAL_CONNECT_BACKOFF_SECONDS * 1000, + GRPC_DNS_RECONNECT_BACKOFF_MULTIPLIER, GRPC_DNS_RECONNECT_JITTER, + GRPC_DNS_MIN_CONNECT_TIMEOUT_SECONDS * 1000, + GRPC_DNS_RECONNECT_MAX_BACKOFF_SECONDS * 1000); GRPC_CLOSURE_INIT(&r->dns_ares_on_retry_timer_locked, dns_ares_on_retry_timer_locked, r, grpc_combiner_scheduler(r->base.combiner)); diff --git a/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc b/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc index e669b6dfc7..62aead5517 100644 --- a/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc +++ b/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc @@ -170,7 +170,8 @@ static void dns_on_resolved_locked(grpc_exec_ctx *exec_ctx, void *arg, grpc_resolved_addresses_destroy(r->addresses); grpc_lb_addresses_destroy(exec_ctx, addresses); } else { - grpc_millis next_try = grpc_backoff_step(exec_ctx, &r->backoff_state); + grpc_millis next_try = + grpc_backoff_step(exec_ctx, &r->backoff_state).next_attempt_start_time; grpc_millis timeout = next_try - grpc_exec_ctx_now(exec_ctx); gpr_log(GPR_INFO, "dns resolution failed (will retry): %s", grpc_error_string(error)); @@ -256,11 +257,11 @@ static grpc_resolver *dns_create(grpc_exec_ctx *exec_ctx, grpc_pollset_set_add_pollset_set(exec_ctx, r->interested_parties, args->pollset_set); } - grpc_backoff_init(&r->backoff_state, GRPC_DNS_INITIAL_CONNECT_BACKOFF_SECONDS, - GRPC_DNS_RECONNECT_BACKOFF_MULTIPLIER, - GRPC_DNS_RECONNECT_JITTER, - GRPC_DNS_MIN_CONNECT_TIMEOUT_SECONDS * 1000, - GRPC_DNS_RECONNECT_MAX_BACKOFF_SECONDS * 1000); + grpc_backoff_init( + &r->backoff_state, GRPC_DNS_INITIAL_CONNECT_BACKOFF_SECONDS * 1000, + GRPC_DNS_RECONNECT_BACKOFF_MULTIPLIER, GRPC_DNS_RECONNECT_JITTER, + GRPC_DNS_MIN_CONNECT_TIMEOUT_SECONDS * 1000, + GRPC_DNS_RECONNECT_MAX_BACKOFF_SECONDS * 1000); return &r->base; } diff --git a/src/core/ext/filters/client_channel/subchannel.cc b/src/core/ext/filters/client_channel/subchannel.cc index 5710a22178..b954e1b879 100644 --- a/src/core/ext/filters/client_channel/subchannel.cc +++ b/src/core/ext/filters/client_channel/subchannel.cc @@ -117,10 +117,10 @@ struct grpc_subchannel { external_state_watcher root_external_state_watcher; - /** next connect attempt time */ - grpc_millis next_attempt; /** backoff state */ grpc_backoff backoff_state; + grpc_backoff_result backoff_result; + /** do we have an active alarm? */ bool have_alarm; /** have we started the backoff loop */ @@ -380,7 +380,7 @@ static void continue_connect_locked(grpc_exec_ctx *exec_ctx, grpc_connect_in_args args; args.interested_parties = c->pollset_set; - args.deadline = c->next_attempt; + args.deadline = c->backoff_result.current_deadline; args.channel_args = c->args; grpc_connectivity_state_set(exec_ctx, &c->state_tracker, @@ -428,7 +428,7 @@ static void on_alarm(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) { } if (error == GRPC_ERROR_NONE) { gpr_log(GPR_INFO, "Failed to connect to channel, retrying"); - c->next_attempt = grpc_backoff_step(exec_ctx, &c->backoff_state); + c->backoff_result = grpc_backoff_step(exec_ctx, &c->backoff_state); continue_connect_locked(exec_ctx, c); gpr_mu_unlock(&c->mu); } else { @@ -465,20 +465,21 @@ static void maybe_start_connecting_locked(grpc_exec_ctx *exec_ctx, if (!c->backoff_begun) { c->backoff_begun = true; - c->next_attempt = grpc_backoff_begin(exec_ctx, &c->backoff_state); + c->backoff_result = grpc_backoff_begin(exec_ctx, &c->backoff_state); continue_connect_locked(exec_ctx, c); } else { GPR_ASSERT(!c->have_alarm); c->have_alarm = true; const grpc_millis time_til_next = - c->next_attempt - grpc_exec_ctx_now(exec_ctx); + c->backoff_result.next_attempt_start_time - grpc_exec_ctx_now(exec_ctx); if (time_til_next <= 0) { gpr_log(GPR_INFO, "Retry immediately"); } else { gpr_log(GPR_INFO, "Retry in %" PRIdPTR " milliseconds", time_til_next); } GRPC_CLOSURE_INIT(&c->on_alarm, on_alarm, c, grpc_schedule_on_exec_ctx); - grpc_timer_init(exec_ctx, &c->alarm, c->next_attempt, &c->on_alarm); + grpc_timer_init(exec_ctx, &c->alarm, + c->backoff_result.next_attempt_start_time, &c->on_alarm); } } diff --git a/src/core/lib/backoff/backoff.cc b/src/core/lib/backoff/backoff.cc index fe0a751817..5dd91da4f3 100644 --- a/src/core/lib/backoff/backoff.cc +++ b/src/core/lib/backoff/backoff.cc @@ -20,23 +20,27 @@ #include <grpc/support/useful.h> -void grpc_backoff_init(grpc_backoff *backoff, - grpc_millis initial_connect_timeout, double multiplier, - double jitter, grpc_millis min_timeout_millis, - grpc_millis max_timeout_millis) { - backoff->initial_connect_timeout = initial_connect_timeout; +void grpc_backoff_init(grpc_backoff *backoff, grpc_millis initial_backoff, + double multiplier, double jitter, + grpc_millis min_connect_timeout, + grpc_millis max_backoff) { + backoff->initial_backoff = initial_backoff; backoff->multiplier = multiplier; backoff->jitter = jitter; - backoff->min_timeout_millis = min_timeout_millis; - backoff->max_timeout_millis = max_timeout_millis; + backoff->min_connect_timeout = min_connect_timeout; + backoff->max_backoff = max_backoff; backoff->rng_state = (uint32_t)gpr_now(GPR_CLOCK_REALTIME).tv_nsec; } -grpc_millis grpc_backoff_begin(grpc_exec_ctx *exec_ctx, grpc_backoff *backoff) { - backoff->current_timeout_millis = backoff->initial_connect_timeout; - const grpc_millis first_timeout = - GPR_MAX(backoff->current_timeout_millis, backoff->min_timeout_millis); - return grpc_exec_ctx_now(exec_ctx) + first_timeout; +grpc_backoff_result grpc_backoff_begin(grpc_exec_ctx *exec_ctx, + grpc_backoff *backoff) { + backoff->current_backoff = backoff->initial_backoff; + const grpc_millis initial_timeout = + GPR_MAX(backoff->initial_backoff, backoff->min_connect_timeout); + const grpc_millis now = grpc_exec_ctx_now(exec_ctx); + const grpc_backoff_result result = {now + initial_timeout, + now + backoff->current_backoff}; + return result; } /* Generate a random number between 0 and 1. */ @@ -45,29 +49,32 @@ static double generate_uniform_random_number(uint32_t *rng_state) { return *rng_state / (double)((uint32_t)1 << 31); } -grpc_millis grpc_backoff_step(grpc_exec_ctx *exec_ctx, grpc_backoff *backoff) { - const double new_timeout_millis = - backoff->multiplier * (double)backoff->current_timeout_millis; - backoff->current_timeout_millis = - GPR_MIN((grpc_millis)new_timeout_millis, backoff->max_timeout_millis); - - const double jitter_range_width = backoff->jitter * new_timeout_millis; - const double jitter = - (2 * generate_uniform_random_number(&backoff->rng_state) - 1) * - jitter_range_width; - - backoff->current_timeout_millis = - (grpc_millis)((double)(backoff->current_timeout_millis) + jitter); - - const grpc_millis current_deadline = - grpc_exec_ctx_now(exec_ctx) + backoff->current_timeout_millis; - - const grpc_millis min_deadline = - grpc_exec_ctx_now(exec_ctx) + backoff->min_timeout_millis; +static double generate_uniform_random_number_between(uint32_t *rng_state, + double a, double b) { + if (a == b) return a; + if (a > b) GPR_SWAP(double, a, b); // make sure a < b + const double range = b - a; + return a + generate_uniform_random_number(rng_state) * range; +} - return GPR_MAX(current_deadline, min_deadline); +grpc_backoff_result grpc_backoff_step(grpc_exec_ctx *exec_ctx, + grpc_backoff *backoff) { + backoff->current_backoff = (grpc_millis)(GPR_MIN( + backoff->current_backoff * backoff->multiplier, backoff->max_backoff)); + const double jitter = generate_uniform_random_number_between( + &backoff->rng_state, -backoff->jitter * backoff->current_backoff, + backoff->jitter * backoff->current_backoff); + const grpc_millis current_timeout = + GPR_MAX((grpc_millis)(backoff->current_backoff + jitter), + backoff->min_connect_timeout); + const grpc_millis next_timeout = GPR_MIN( + (grpc_millis)(backoff->current_backoff + jitter), backoff->max_backoff); + const grpc_millis now = grpc_exec_ctx_now(exec_ctx); + const grpc_backoff_result result = {now + current_timeout, + now + next_timeout}; + return result; } void grpc_backoff_reset(grpc_backoff *backoff) { - backoff->current_timeout_millis = backoff->initial_connect_timeout; + backoff->current_backoff = backoff->initial_backoff; } diff --git a/src/core/lib/backoff/backoff.h b/src/core/lib/backoff/backoff.h index 80e49ea52a..8becf4aab8 100644 --- a/src/core/lib/backoff/backoff.h +++ b/src/core/lib/backoff/backoff.h @@ -27,36 +27,53 @@ extern "C" { typedef struct { /// const: how long to wait after the first failure before retrying - grpc_millis initial_connect_timeout; + grpc_millis initial_backoff; + /// const: factor with which to multiply backoff after a failed retry double multiplier; + /// const: amount to randomize backoffs double jitter; - /// const: minimum time between retries in milliseconds - grpc_millis min_timeout_millis; - /// const: maximum time between retries in milliseconds - grpc_millis max_timeout_millis; + + /// const: minimum time between retries + grpc_millis min_connect_timeout; + + /// const: maximum time between retries + grpc_millis max_backoff; + + /// current delay before retries + grpc_millis current_backoff; /// random number generator uint32_t rng_state; - - /// current retry timeout in milliseconds - grpc_millis current_timeout_millis; } grpc_backoff; +typedef struct { + /// Deadline to be used for the current attempt. + grpc_millis current_deadline; + + /// Deadline to be used for the next attempt, following the backoff strategy. + grpc_millis next_attempt_start_time; +} grpc_backoff_result; + /// Initialize backoff machinery - does not need to be destroyed -void grpc_backoff_init(grpc_backoff *backoff, - grpc_millis initial_connect_timeout, double multiplier, - double jitter, grpc_millis min_timeout_millis, - grpc_millis max_timeout_millis); - -/// Begin retry loop: returns a timespec for the NEXT retry -grpc_millis grpc_backoff_begin(grpc_exec_ctx *exec_ctx, grpc_backoff *backoff); -/// Step a retry loop: returns a timespec for the NEXT retry -grpc_millis grpc_backoff_step(grpc_exec_ctx *exec_ctx, grpc_backoff *backoff); +void grpc_backoff_init(grpc_backoff *backoff, grpc_millis initial_backoff, + double multiplier, double jitter, + grpc_millis min_connect_timeout, + grpc_millis max_backoff); + +/// Begin retry loop: returns the deadlines to be used for the current attempt +/// and the subsequent retry, if any. +grpc_backoff_result grpc_backoff_begin(grpc_exec_ctx *exec_ctx, + grpc_backoff *backoff); + +/// Step a retry loop: returns the deadlines to be used for the current attempt +/// and the subsequent retry, if any. +grpc_backoff_result grpc_backoff_step(grpc_exec_ctx *exec_ctx, + grpc_backoff *backoff); + /// Reset the backoff, so the next grpc_backoff_step will be a -/// grpc_backoff_begin -/// instead +/// grpc_backoff_begin. void grpc_backoff_reset(grpc_backoff *backoff); #ifdef __cplusplus |