diff options
Diffstat (limited to 'src/core/lib/iomgr/tcp_server_posix.c')
-rw-r--r-- | src/core/lib/iomgr/tcp_server_posix.c | 280 |
1 files changed, 208 insertions, 72 deletions
diff --git a/src/core/lib/iomgr/tcp_server_posix.c b/src/core/lib/iomgr/tcp_server_posix.c index aaeb384f6e..a1a463550a 100644 --- a/src/core/lib/iomgr/tcp_server_posix.c +++ b/src/core/lib/iomgr/tcp_server_posix.c @@ -59,6 +59,8 @@ #include <grpc/support/string_util.h> #include <grpc/support/sync.h> #include <grpc/support/time.h> +#include <grpc/support/useful.h> + #include "src/core/lib/iomgr/resolve_address.h" #include "src/core/lib/iomgr/sockaddr_utils.h" #include "src/core/lib/iomgr/socket_utils_posix.h" @@ -110,8 +112,10 @@ struct grpc_tcp_server { /* destroyed port count: how many ports are completely destroyed */ size_t destroyed_ports; - /* is this server shutting down? (boolean) */ - int shutdown; + /* is this server shutting down? */ + bool shutdown; + /* use SO_REUSEPORT */ + bool so_reuseport; /* linked list of server ports */ grpc_tcp_listener *head; @@ -128,15 +132,47 @@ struct grpc_tcp_server { grpc_pollset **pollsets; /* number of pollsets in the pollsets array */ size_t pollset_count; + + /* next pollset to assign a channel to */ + size_t next_pollset_to_assign; }; -grpc_tcp_server *grpc_tcp_server_create(grpc_closure *shutdown_complete) { +static gpr_once check_init = GPR_ONCE_INIT; +static bool has_so_reuseport; + +static void init(void) { + int s = socket(AF_INET, SOCK_STREAM, 0); + if (s >= 0) { + has_so_reuseport = GRPC_LOG_IF_ERROR("check for SO_REUSEPORT", + grpc_set_socket_reuse_port(s, 1)); + close(s); + } +} + +grpc_error *grpc_tcp_server_create(grpc_closure *shutdown_complete, + const grpc_channel_args *args, + grpc_tcp_server **server) { + gpr_once_init(&check_init, init); + grpc_tcp_server *s = gpr_malloc(sizeof(grpc_tcp_server)); + s->so_reuseport = has_so_reuseport; + for (size_t i = 0; i < (args == NULL ? 0 : args->num_args); i++) { + if (0 == strcmp(GRPC_ARG_ALLOW_REUSEPORT, args->args[i].key)) { + if (args->args[i].type == GRPC_ARG_INTEGER) { + s->so_reuseport = + has_so_reuseport && (args->args[i].value.integer != 0); + } else { + gpr_free(s); + return GRPC_ERROR_CREATE(GRPC_ARG_ALLOW_REUSEPORT + " must be an integer"); + } + } + } gpr_ref_init(&s->refs, 1); gpr_mu_init(&s->mu); s->active_ports = 0; s->destroyed_ports = 0; - s->shutdown = 0; + s->shutdown = false; s->shutdown_starting.head = NULL; s->shutdown_starting.tail = NULL; s->shutdown_complete = shutdown_complete; @@ -145,12 +181,14 @@ grpc_tcp_server *grpc_tcp_server_create(grpc_closure *shutdown_complete) { s->head = NULL; s->tail = NULL; s->nports = 0; - return s; + s->next_pollset_to_assign = 0; + *server = s; + return GRPC_ERROR_NONE; } static void finish_shutdown(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) { if (s->shutdown_complete != NULL) { - grpc_exec_ctx_enqueue(exec_ctx, s->shutdown_complete, true, NULL); + grpc_exec_ctx_sched(exec_ctx, s->shutdown_complete, GRPC_ERROR_NONE, NULL); } gpr_mu_destroy(&s->mu); @@ -165,7 +203,7 @@ static void finish_shutdown(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) { } static void destroyed_port(grpc_exec_ctx *exec_ctx, void *server, - bool success) { + grpc_error *error) { grpc_tcp_server *s = server; gpr_mu_lock(&s->mu); s->destroyed_ports++; @@ -210,7 +248,7 @@ static void tcp_server_destroy(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) { gpr_mu_lock(&s->mu); GPR_ASSERT(!s->shutdown); - s->shutdown = 1; + s->shutdown = true; /* shutdown all fd's */ if (s->active_ports) { @@ -259,64 +297,81 @@ static int get_max_accept_queue_size(void) { } /* Prepare a recently-created socket for listening. */ -static int prepare_socket(int fd, const struct sockaddr *addr, - size_t addr_len) { +static grpc_error *prepare_socket(int fd, const struct sockaddr *addr, + size_t addr_len, bool so_reuseport, + int *port) { struct sockaddr_storage sockname_temp; socklen_t sockname_len; + grpc_error *err = GRPC_ERROR_NONE; - if (fd < 0) { - goto error; + GPR_ASSERT(fd >= 0); + + if (so_reuseport) { + err = grpc_set_socket_reuse_port(fd, 1); + if (err != GRPC_ERROR_NONE) goto error; } - if (!grpc_set_socket_nonblocking(fd, 1) || !grpc_set_socket_cloexec(fd, 1) || - (!grpc_is_unix_socket(addr) && (!grpc_set_socket_low_latency(fd, 1) || - !grpc_set_socket_reuse_addr(fd, 1))) || - !grpc_set_socket_no_sigpipe_if_possible(fd)) { - gpr_log(GPR_ERROR, "Unable to configure socket %d: %s", fd, - strerror(errno)); - goto error; + err = grpc_set_socket_nonblocking(fd, 1); + if (err != GRPC_ERROR_NONE) goto error; + err = grpc_set_socket_cloexec(fd, 1); + if (err != GRPC_ERROR_NONE) goto error; + if (!grpc_is_unix_socket(addr)) { + err = grpc_set_socket_low_latency(fd, 1); + if (err != GRPC_ERROR_NONE) goto error; + err = grpc_set_socket_reuse_addr(fd, 1); + if (err != GRPC_ERROR_NONE) goto error; } + err = grpc_set_socket_no_sigpipe_if_possible(fd); + if (err != GRPC_ERROR_NONE) goto error; GPR_ASSERT(addr_len < ~(socklen_t)0); if (bind(fd, addr, (socklen_t)addr_len) < 0) { - char *addr_str; - grpc_sockaddr_to_string(&addr_str, addr, 0); - gpr_log(GPR_ERROR, "bind addr=%s: %s", addr_str, strerror(errno)); - gpr_free(addr_str); + err = GRPC_OS_ERROR(errno, "bind"); goto error; } if (listen(fd, get_max_accept_queue_size()) < 0) { - gpr_log(GPR_ERROR, "listen: %s", strerror(errno)); + err = GRPC_OS_ERROR(errno, "listen"); goto error; } sockname_len = sizeof(sockname_temp); if (getsockname(fd, (struct sockaddr *)&sockname_temp, &sockname_len) < 0) { + err = GRPC_OS_ERROR(errno, "getsockname"); goto error; } - return grpc_sockaddr_get_port((struct sockaddr *)&sockname_temp); + *port = grpc_sockaddr_get_port((struct sockaddr *)&sockname_temp); + return GRPC_ERROR_NONE; error: + GPR_ASSERT(err != GRPC_ERROR_NONE); if (fd >= 0) { close(fd); } - return -1; + grpc_error *ret = grpc_error_set_int( + GRPC_ERROR_CREATE_REFERENCING("Unable to configure socket", &err, 1), + GRPC_ERROR_INT_FD, fd); + GRPC_ERROR_UNREF(err); + return ret; } /* event manager callback when reads are ready */ -static void on_read(grpc_exec_ctx *exec_ctx, void *arg, bool success) { +static void on_read(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *err) { grpc_tcp_listener *sp = arg; grpc_tcp_server_acceptor acceptor = {sp->server, sp->port_index, sp->fd_index}; + grpc_pollset *read_notifier_pollset = NULL; grpc_fd *fdobj; - size_t i; - if (!success) { + if (err != GRPC_ERROR_NONE) { goto error; } + read_notifier_pollset = + sp->server->pollsets[(sp->server->next_pollset_to_assign++) % + sp->server->pollset_count]; + /* loop until accept4 returns EAGAIN, and then re-arm notification */ for (;;) { struct sockaddr_storage addr; @@ -349,16 +404,18 @@ static void on_read(grpc_exec_ctx *exec_ctx, void *arg, bool success) { } fdobj = grpc_fd_create(fd, name); - /* TODO(ctiller): revise this when we have server-side sharding - of channels -- we certainly should not be automatically adding every - incoming channel to every pollset owned by the server */ - for (i = 0; i < sp->server->pollset_count; i++) { - grpc_pollset_add_fd(exec_ctx, sp->server->pollsets[i], fdobj); + + if (read_notifier_pollset == NULL) { + gpr_log(GPR_ERROR, "Read notifier pollset is not set on the fd"); + goto error; } + + grpc_pollset_add_fd(exec_ctx, read_notifier_pollset, fdobj); + sp->server->on_accept_cb( exec_ctx, sp->server->on_accept_cb_arg, grpc_tcp_create(fdobj, GRPC_TCP_DEFAULT_READ_SLICE_SIZE, addr_str), - &acceptor); + read_notifier_pollset, &acceptor); gpr_free(name); gpr_free(addr_str); @@ -376,18 +433,19 @@ error: } } -static grpc_tcp_listener *add_socket_to_server(grpc_tcp_server *s, int fd, - const struct sockaddr *addr, - size_t addr_len, - unsigned port_index, - unsigned fd_index) { +static grpc_error *add_socket_to_server(grpc_tcp_server *s, int fd, + const struct sockaddr *addr, + size_t addr_len, unsigned port_index, + unsigned fd_index, + grpc_tcp_listener **listener) { grpc_tcp_listener *sp = NULL; - int port; + int port = -1; char *addr_str; char *name; - port = prepare_socket(fd, addr, addr_len); - if (port >= 0) { + grpc_error *err = prepare_socket(fd, addr, addr_len, s->so_reuseport, &port); + if (err == GRPC_ERROR_NONE) { + GPR_ASSERT(port > 0); grpc_sockaddr_to_string(&addr_str, (struct sockaddr *)&addr, 1); gpr_asprintf(&name, "tcp-server-listener:%s", addr_str); gpr_mu_lock(&s->mu); @@ -417,11 +475,58 @@ static grpc_tcp_listener *add_socket_to_server(grpc_tcp_server *s, int fd, gpr_free(name); } - return sp; + *listener = sp; + return err; } -int grpc_tcp_server_add_port(grpc_tcp_server *s, const void *addr, - size_t addr_len) { +static grpc_error *clone_port(grpc_tcp_listener *listener, unsigned count) { + grpc_tcp_listener *sp = NULL; + char *addr_str; + char *name; + grpc_error *err; + + for (grpc_tcp_listener *l = listener->next; l && l->is_sibling; l = l->next) { + l->fd_index += count; + } + + for (unsigned i = 0; i < count; i++) { + int fd, port; + grpc_dualstack_mode dsmode; + err = grpc_create_dualstack_socket(&listener->addr.sockaddr, SOCK_STREAM, 0, + &dsmode, &fd); + if (err != GRPC_ERROR_NONE) return err; + err = prepare_socket(fd, &listener->addr.sockaddr, listener->addr_len, true, + &port); + if (err != GRPC_ERROR_NONE) return err; + listener->server->nports++; + grpc_sockaddr_to_string(&addr_str, &listener->addr.sockaddr, 1); + gpr_asprintf(&name, "tcp-server-listener:%s/clone-%d", addr_str, i); + sp = gpr_malloc(sizeof(grpc_tcp_listener)); + sp->next = listener->next; + listener->next = sp; + sp->server = listener->server; + sp->fd = fd; + sp->emfd = grpc_fd_create(fd, name); + memcpy(sp->addr.untyped, listener->addr.untyped, listener->addr_len); + sp->addr_len = listener->addr_len; + sp->port = port; + sp->port_index = listener->port_index; + sp->fd_index = listener->fd_index + count - i; + sp->is_sibling = 1; + sp->sibling = listener->is_sibling ? listener->sibling : listener; + GPR_ASSERT(sp->emfd); + while (listener->server->tail->next != NULL) { + listener->server->tail = listener->server->tail->next; + } + gpr_free(addr_str); + gpr_free(name); + } + + return GRPC_ERROR_NONE; +} + +grpc_error *grpc_tcp_server_add_port(grpc_tcp_server *s, const void *addr, + size_t addr_len, int *out_port) { grpc_tcp_listener *sp; grpc_tcp_listener *sp2 = NULL; int fd; @@ -436,6 +541,7 @@ int grpc_tcp_server_add_port(grpc_tcp_server *s, const void *addr, int port; unsigned port_index = 0; unsigned fd_index = 0; + grpc_error *errs[2] = {GRPC_ERROR_NONE, GRPC_ERROR_NONE}; if (s->tail != NULL) { port_index = s->tail->port_index + 1; } @@ -474,33 +580,35 @@ int grpc_tcp_server_add_port(grpc_tcp_server *s, const void *addr, /* Try listening on IPv6 first. */ addr = (struct sockaddr *)&wild6; addr_len = sizeof(wild6); - fd = grpc_create_dualstack_socket(addr, SOCK_STREAM, 0, &dsmode); - sp = add_socket_to_server(s, fd, addr, addr_len, port_index, fd_index); - if (fd >= 0 && dsmode == GRPC_DSMODE_DUALSTACK) { - goto done; - } - if (sp != NULL) { - ++fd_index; - } - /* If we didn't get a dualstack socket, also listen on 0.0.0.0. */ - if (port == 0 && sp != NULL) { - grpc_sockaddr_set_port((struct sockaddr *)&wild4, sp->port); + errs[0] = grpc_create_dualstack_socket(addr, SOCK_STREAM, 0, &dsmode, &fd); + if (errs[0] == GRPC_ERROR_NONE) { + errs[0] = add_socket_to_server(s, fd, addr, addr_len, port_index, + fd_index, &sp); + if (fd >= 0 && dsmode == GRPC_DSMODE_DUALSTACK) { + goto done; + } + if (sp != NULL) { + ++fd_index; + } + /* If we didn't get a dualstack socket, also listen on 0.0.0.0. */ + if (port == 0 && sp != NULL) { + grpc_sockaddr_set_port((struct sockaddr *)&wild4, sp->port); + } } addr = (struct sockaddr *)&wild4; addr_len = sizeof(wild4); } - fd = grpc_create_dualstack_socket(addr, SOCK_STREAM, 0, &dsmode); - if (fd < 0) { - gpr_log(GPR_ERROR, "Unable to create socket: %s", strerror(errno)); - } else { + errs[1] = grpc_create_dualstack_socket(addr, SOCK_STREAM, 0, &dsmode, &fd); + if (errs[1] == GRPC_ERROR_NONE) { if (dsmode == GRPC_DSMODE_IPV4 && grpc_sockaddr_is_v4mapped(addr, &addr4_copy)) { addr = (struct sockaddr *)&addr4_copy; addr_len = sizeof(addr4_copy); } sp2 = sp; - sp = add_socket_to_server(s, fd, addr, addr_len, port_index, fd_index); + errs[1] = + add_socket_to_server(s, fd, addr, addr_len, port_index, fd_index, &sp); if (sp2 != NULL && sp != NULL) { sp2->sibling = sp; sp->is_sibling = 1; @@ -510,9 +618,21 @@ int grpc_tcp_server_add_port(grpc_tcp_server *s, const void *addr, done: gpr_free(allocated_addr); if (sp != NULL) { - return sp->port; + *out_port = sp->port; + GRPC_ERROR_UNREF(errs[0]); + GRPC_ERROR_UNREF(errs[1]); + return GRPC_ERROR_NONE; } else { - return -1; + *out_port = -1; + char *addr_str = grpc_sockaddr_to_uri(addr); + grpc_error *err = grpc_error_set_str( + GRPC_ERROR_CREATE_REFERENCING("Failed to add port to server", errs, + GPR_ARRAY_SIZE(errs)), + GRPC_ERROR_STR_TARGET_ADDRESS, addr_str); + GRPC_ERROR_UNREF(errs[0]); + GRPC_ERROR_UNREF(errs[1]); + gpr_free(addr_str); + return err; } } @@ -561,14 +681,29 @@ void grpc_tcp_server_start(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s, s->on_accept_cb_arg = on_accept_cb_arg; s->pollsets = pollsets; s->pollset_count = pollset_count; - for (sp = s->head; sp; sp = sp->next) { - for (i = 0; i < pollset_count; i++) { - grpc_pollset_add_fd(exec_ctx, pollsets[i], sp->emfd); + sp = s->head; + while (sp != NULL) { + if (s->so_reuseport && pollset_count > 1) { + GPR_ASSERT(GRPC_LOG_IF_ERROR( + "clone_port", clone_port(sp, (unsigned)(pollset_count - 1)))); + for (i = 0; i < pollset_count; i++) { + grpc_pollset_add_fd(exec_ctx, pollsets[i], sp->emfd); + sp->read_closure.cb = on_read; + sp->read_closure.cb_arg = sp; + grpc_fd_notify_on_read(exec_ctx, sp->emfd, &sp->read_closure); + s->active_ports++; + sp = sp->next; + } + } else { + for (i = 0; i < pollset_count; i++) { + grpc_pollset_add_fd(exec_ctx, pollsets[i], sp->emfd); + } + sp->read_closure.cb = on_read; + sp->read_closure.cb_arg = sp; + grpc_fd_notify_on_read(exec_ctx, sp->emfd, &sp->read_closure); + s->active_ports++; + sp = sp->next; } - sp->read_closure.cb = on_read; - sp->read_closure.cb_arg = sp; - grpc_fd_notify_on_read(exec_ctx, sp->emfd, &sp->read_closure); - s->active_ports++; } gpr_mu_unlock(&s->mu); } @@ -581,7 +716,8 @@ grpc_tcp_server *grpc_tcp_server_ref(grpc_tcp_server *s) { void grpc_tcp_server_shutdown_starting_add(grpc_tcp_server *s, grpc_closure *shutdown_starting) { gpr_mu_lock(&s->mu); - grpc_closure_list_add(&s->shutdown_starting, shutdown_starting, 1); + grpc_closure_list_append(&s->shutdown_starting, shutdown_starting, + GRPC_ERROR_NONE); gpr_mu_unlock(&s->mu); } |