From e48b1bc011e2b5783ce75f4122dfd5aba01f0d97 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Wed, 11 May 2016 15:17:22 -0700 Subject: Base changes. Create ev_epoll_posix.{c,h} files by making a copy of ev_poll_and_epoll.c file --- src/core/lib/iomgr/ev_epoll_posix.c | 1733 +++++++++++++++++++++++++++++++++++ src/core/lib/iomgr/ev_epoll_posix.h | 41 + src/core/lib/iomgr/ev_posix.c | 3 +- 3 files changed, 1776 insertions(+), 1 deletion(-) create mode 100644 src/core/lib/iomgr/ev_epoll_posix.c create mode 100644 src/core/lib/iomgr/ev_epoll_posix.h (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_posix.c b/src/core/lib/iomgr/ev_epoll_posix.c new file mode 100644 index 0000000000..ce8d3981b3 --- /dev/null +++ b/src/core/lib/iomgr/ev_epoll_posix.c @@ -0,0 +1,1733 @@ +/* + * + * Copyright 2016, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include + +#ifdef GPR_POSIX_SOCKET + +#include "src/core/lib/iomgr/ev_epoll_posix.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "src/core/lib/iomgr/iomgr_internal.h" +#include "src/core/lib/iomgr/wakeup_fd_posix.h" +#include "src/core/lib/profiling/timers.h" +#include "src/core/lib/support/block_annotate.h" + +/******************************************************************************* + * FD declarations + */ + +/* TODO(sreek) : Check if grpc_fd_watcher is needed (and if so, check if we can + * share this between ev_poll_posix.h and ev_epoll_posix versions */ + +typedef struct grpc_fd_watcher { + struct grpc_fd_watcher *next; + struct grpc_fd_watcher *prev; + grpc_pollset *pollset; + grpc_pollset_worker *worker; + grpc_fd *fd; +} grpc_fd_watcher; + +struct grpc_fd { + int fd; + /* refst format: + bit0: 1=active/0=orphaned + bit1-n: refcount + meaning that mostly we ref by two to avoid altering the orphaned bit, + and just unref by 1 when we're ready to flag the object as orphaned */ + gpr_atm refst; + + gpr_mu mu; + int shutdown; + int closed; + int released; + + /* The watcher list. + + The following watcher related fields are protected by watcher_mu. + + An fd_watcher is an ephemeral object created when an fd wants to + begin polling, and destroyed after the poll. + + It denotes the fd's interest in whether to read poll or write poll + or both or neither on this fd. + + If a watcher is asked to poll for reads or writes, the read_watcher + or write_watcher fields are set respectively. A watcher may be asked + to poll for both, in which case both fields will be set. + + read_watcher and write_watcher may be NULL if no watcher has been + asked to poll for reads or writes. + + If an fd_watcher is not asked to poll for reads or writes, it's added + to a linked list of inactive watchers, rooted at inactive_watcher_root. + If at a later time there becomes need of a poller to poll, one of + the inactive pollers may be kicked out of their poll loops to take + that responsibility. */ + grpc_fd_watcher inactive_watcher_root; + grpc_fd_watcher *read_watcher; + grpc_fd_watcher *write_watcher; + + grpc_closure *read_closure; + grpc_closure *write_closure; + + struct grpc_fd *freelist_next; + + grpc_closure *on_done_closure; + + grpc_iomgr_object iomgr_object; +}; + +/* Begin polling on an fd. + Registers that the given pollset is interested in this fd - so that if read + or writability interest changes, the pollset can be kicked to pick up that + new interest. + Return value is: + (fd_needs_read? read_mask : 0) | (fd_needs_write? write_mask : 0) + i.e. a combination of read_mask and write_mask determined by the fd's current + interest in said events. + Polling strategies that do not need to alter their behavior depending on the + fd's current interest (such as epoll) do not need to call this function. + MUST NOT be called with a pollset lock taken */ +static uint32_t fd_begin_poll(grpc_fd *fd, grpc_pollset *pollset, + grpc_pollset_worker *worker, uint32_t read_mask, + uint32_t write_mask, grpc_fd_watcher *rec); +/* Complete polling previously started with fd_begin_poll + MUST NOT be called with a pollset lock taken + if got_read or got_write are 1, also does the become_{readable,writable} as + appropriate. */ +static void fd_end_poll(grpc_exec_ctx *exec_ctx, grpc_fd_watcher *rec, + int got_read, int got_write); + +/* Return 1 if this fd is orphaned, 0 otherwise */ +static bool fd_is_orphaned(grpc_fd *fd); + +/* Reference counting for fds */ +/*#define GRPC_FD_REF_COUNT_DEBUG*/ +#ifdef GRPC_FD_REF_COUNT_DEBUG +static void fd_ref(grpc_fd *fd, const char *reason, const char *file, int line); +static void fd_unref(grpc_fd *fd, const char *reason, const char *file, + int line); +#define GRPC_FD_REF(fd, reason) fd_ref(fd, reason, __FILE__, __LINE__) +#define GRPC_FD_UNREF(fd, reason) fd_unref(fd, reason, __FILE__, __LINE__) +#else +static void fd_ref(grpc_fd *fd); +static void fd_unref(grpc_fd *fd); +#define GRPC_FD_REF(fd, reason) fd_ref(fd) +#define GRPC_FD_UNREF(fd, reason) fd_unref(fd) +#endif + +static void fd_global_init(void); +static void fd_global_shutdown(void); + +#define CLOSURE_NOT_READY ((grpc_closure *)0) +#define CLOSURE_READY ((grpc_closure *)1) + +/******************************************************************************* + * pollset declarations + */ + +typedef struct grpc_pollset_vtable grpc_pollset_vtable; + +typedef struct grpc_cached_wakeup_fd { + grpc_wakeup_fd fd; + struct grpc_cached_wakeup_fd *next; +} grpc_cached_wakeup_fd; + +struct grpc_pollset_worker { + grpc_cached_wakeup_fd *wakeup_fd; + int reevaluate_polling_on_wakeup; + int kicked_specifically; + struct grpc_pollset_worker *next; + struct grpc_pollset_worker *prev; +}; + +struct grpc_pollset { + /* pollsets under posix can mutate representation as fds are added and + removed. + For example, we may choose a poll() based implementation on linux for + few fds, and an epoll() based implementation for many fds */ + const grpc_pollset_vtable *vtable; + gpr_mu mu; + grpc_pollset_worker root_worker; + int in_flight_cbs; + int shutting_down; + int called_shutdown; + int kicked_without_pollers; + grpc_closure *shutdown_done; + grpc_closure_list idle_jobs; + union { + int fd; + void *ptr; + } data; + /* Local cache of eventfds for workers */ + grpc_cached_wakeup_fd *local_wakeup_cache; +}; + +struct grpc_pollset_vtable { + void (*add_fd)(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + struct grpc_fd *fd, int and_unlock_pollset); + void (*maybe_work_and_unlock)(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_pollset_worker *worker, + gpr_timespec deadline, gpr_timespec now); + void (*finish_shutdown)(grpc_pollset *pollset); + void (*destroy)(grpc_pollset *pollset); +}; + +/* Add an fd to a pollset */ +static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + struct grpc_fd *fd); + +static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pollset_set, grpc_fd *fd); + +/* Convert a timespec to milliseconds: + - very small or negative poll times are clamped to zero to do a + non-blocking poll (which becomes spin polling) + - other small values are rounded up to one millisecond + - longer than a millisecond polls are rounded up to the next nearest + millisecond to avoid spinning + - infinite timeouts are converted to -1 */ +static int poll_deadline_to_millis_timeout(gpr_timespec deadline, + gpr_timespec now); + +/* Allow kick to wakeup the currently polling worker */ +#define GRPC_POLLSET_CAN_KICK_SELF 1 +/* Force the wakee to repoll when awoken */ +#define GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP 2 +/* As per pollset_kick, with an extended set of flags (defined above) + -- mostly for fd_posix's use. */ +static void pollset_kick_ext(grpc_pollset *p, + grpc_pollset_worker *specific_worker, + uint32_t flags); + +/* turn a pollset into a multipoller: platform specific */ +typedef void (*platform_become_multipoller_type)(grpc_exec_ctx *exec_ctx, + grpc_pollset *pollset, + struct grpc_fd **fds, + size_t fd_count); +static platform_become_multipoller_type platform_become_multipoller; + + +/* Return 1 if the pollset has active threads in pollset_work (pollset must + * be locked) */ +static int pollset_has_workers(grpc_pollset *pollset); + +static void remove_fd_from_all_epoll_sets(int fd); + +/******************************************************************************* + * pollset_set definitions + */ + +struct grpc_pollset_set { + gpr_mu mu; + + size_t pollset_count; + size_t pollset_capacity; + grpc_pollset **pollsets; + + size_t pollset_set_count; + size_t pollset_set_capacity; + struct grpc_pollset_set **pollset_sets; + + size_t fd_count; + size_t fd_capacity; + grpc_fd **fds; +}; + +/******************************************************************************* + * fd_posix.c + */ + +/* We need to keep a freelist not because of any concerns of malloc performance + * but instead so that implementations with multiple threads in (for example) + * epoll_wait deal with the race between pollset removal and incoming poll + * notifications. + * + * The problem is that the poller ultimately holds a reference to this + * object, so it is very difficult to know when is safe to free it, at least + * without some expensive synchronization. + * + * If we keep the object freelisted, in the worst case losing this race just + * becomes a spurious read notification on a reused fd. + */ +/* TODO(klempner): We could use some form of polling generation count to know + * when these are safe to free. */ +/* TODO(klempner): Consider disabling freelisting if we don't have multiple + * threads in poll on the same fd */ +/* TODO(klempner): Batch these allocations to reduce fragmentation */ +static grpc_fd *fd_freelist = NULL; +static gpr_mu fd_freelist_mu; + +static void freelist_fd(grpc_fd *fd) { + gpr_mu_lock(&fd_freelist_mu); + fd->freelist_next = fd_freelist; + fd_freelist = fd; + grpc_iomgr_unregister_object(&fd->iomgr_object); + gpr_mu_unlock(&fd_freelist_mu); +} + +static grpc_fd *alloc_fd(int fd) { + grpc_fd *r = NULL; + gpr_mu_lock(&fd_freelist_mu); + if (fd_freelist != NULL) { + r = fd_freelist; + fd_freelist = fd_freelist->freelist_next; + } + gpr_mu_unlock(&fd_freelist_mu); + if (r == NULL) { + r = gpr_malloc(sizeof(grpc_fd)); + gpr_mu_init(&r->mu); + } + + gpr_mu_lock(&r->mu); + gpr_atm_rel_store(&r->refst, 1); + r->shutdown = 0; + r->read_closure = CLOSURE_NOT_READY; + r->write_closure = CLOSURE_NOT_READY; + r->fd = fd; + r->inactive_watcher_root.next = r->inactive_watcher_root.prev = + &r->inactive_watcher_root; + r->freelist_next = NULL; + r->read_watcher = r->write_watcher = NULL; + r->on_done_closure = NULL; + r->closed = 0; + r->released = 0; + gpr_mu_unlock(&r->mu); + return r; +} + +static void destroy(grpc_fd *fd) { + gpr_mu_destroy(&fd->mu); + gpr_free(fd); +} + +#ifdef GRPC_FD_REF_COUNT_DEBUG +#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__) +#define UNREF_BY(fd, n, reason) unref_by(fd, n, reason, __FILE__, __LINE__) +static void ref_by(grpc_fd *fd, int n, const char *reason, const char *file, + int line) { + gpr_log(GPR_DEBUG, "FD %d %p ref %d %d -> %d [%s; %s:%d]", fd->fd, fd, n, + gpr_atm_no_barrier_load(&fd->refst), + gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line); +#else +#define REF_BY(fd, n, reason) ref_by(fd, n) +#define UNREF_BY(fd, n, reason) unref_by(fd, n) +static void ref_by(grpc_fd *fd, int n) { +#endif + GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0); +} + +#ifdef GRPC_FD_REF_COUNT_DEBUG +static void unref_by(grpc_fd *fd, int n, const char *reason, const char *file, + int line) { + gpr_atm old; + gpr_log(GPR_DEBUG, "FD %d %p unref %d %d -> %d [%s; %s:%d]", fd->fd, fd, n, + gpr_atm_no_barrier_load(&fd->refst), + gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line); +#else +static void unref_by(grpc_fd *fd, int n) { + gpr_atm old; +#endif + old = gpr_atm_full_fetch_add(&fd->refst, -n); + if (old == n) { + freelist_fd(fd); + } else { + GPR_ASSERT(old > n); + } +} + +static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); } + +static void fd_global_shutdown(void) { + gpr_mu_lock(&fd_freelist_mu); + gpr_mu_unlock(&fd_freelist_mu); + while (fd_freelist != NULL) { + grpc_fd *fd = fd_freelist; + fd_freelist = fd_freelist->freelist_next; + destroy(fd); + } + gpr_mu_destroy(&fd_freelist_mu); +} + +static grpc_fd *fd_create(int fd, const char *name) { + grpc_fd *r = alloc_fd(fd); + char *name2; + gpr_asprintf(&name2, "%s fd=%d", name, fd); + grpc_iomgr_register_object(&r->iomgr_object, name2); + gpr_free(name2); +#ifdef GRPC_FD_REF_COUNT_DEBUG + gpr_log(GPR_DEBUG, "FD %d %p create %s", fd, r, name); +#endif + return r; +} + +static bool fd_is_orphaned(grpc_fd *fd) { + return (gpr_atm_acq_load(&fd->refst) & 1) == 0; +} + +static void pollset_kick_locked(grpc_fd_watcher *watcher) { + gpr_mu_lock(&watcher->pollset->mu); + GPR_ASSERT(watcher->worker); + pollset_kick_ext(watcher->pollset, watcher->worker, + GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP); + gpr_mu_unlock(&watcher->pollset->mu); +} + +static void maybe_wake_one_watcher_locked(grpc_fd *fd) { + if (fd->inactive_watcher_root.next != &fd->inactive_watcher_root) { + pollset_kick_locked(fd->inactive_watcher_root.next); + } else if (fd->read_watcher) { + pollset_kick_locked(fd->read_watcher); + } else if (fd->write_watcher) { + pollset_kick_locked(fd->write_watcher); + } +} + +static void wake_all_watchers_locked(grpc_fd *fd) { + grpc_fd_watcher *watcher; + for (watcher = fd->inactive_watcher_root.next; + watcher != &fd->inactive_watcher_root; watcher = watcher->next) { + pollset_kick_locked(watcher); + } + if (fd->read_watcher) { + pollset_kick_locked(fd->read_watcher); + } + if (fd->write_watcher && fd->write_watcher != fd->read_watcher) { + pollset_kick_locked(fd->write_watcher); + } +} + +static int has_watchers(grpc_fd *fd) { + return fd->read_watcher != NULL || fd->write_watcher != NULL || + fd->inactive_watcher_root.next != &fd->inactive_watcher_root; +} + +static void close_fd_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { + fd->closed = 1; + if (!fd->released) { + close(fd->fd); + } else { + remove_fd_from_all_epoll_sets(fd->fd); + } + grpc_exec_ctx_enqueue(exec_ctx, fd->on_done_closure, true, NULL); +} + +static int fd_wrapped_fd(grpc_fd *fd) { + if (fd->released || fd->closed) { + return -1; + } else { + return fd->fd; + } +} + +static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure *on_done, int *release_fd, + const char *reason) { + fd->on_done_closure = on_done; + fd->released = release_fd != NULL; + if (!fd->released) { + shutdown(fd->fd, SHUT_RDWR); + } else { + *release_fd = fd->fd; + } + gpr_mu_lock(&fd->mu); + REF_BY(fd, 1, reason); /* remove active status, but keep referenced */ + if (!has_watchers(fd)) { + close_fd_locked(exec_ctx, fd); + } else { + wake_all_watchers_locked(fd); + } + gpr_mu_unlock(&fd->mu); + UNREF_BY(fd, 2, reason); /* drop the reference */ +} + +/* increment refcount by two to avoid changing the orphan bit */ +#ifdef GRPC_FD_REF_COUNT_DEBUG +static void fd_ref(grpc_fd *fd, const char *reason, const char *file, + int line) { + ref_by(fd, 2, reason, file, line); +} + +static void fd_unref(grpc_fd *fd, const char *reason, const char *file, + int line) { + unref_by(fd, 2, reason, file, line); +} +#else +static void fd_ref(grpc_fd *fd) { ref_by(fd, 2); } + +static void fd_unref(grpc_fd *fd) { unref_by(fd, 2); } +#endif + +static void notify_on_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure **st, grpc_closure *closure) { + if (*st == CLOSURE_NOT_READY) { + /* not ready ==> switch to a waiting state by setting the closure */ + *st = closure; + } else if (*st == CLOSURE_READY) { + /* already ready ==> queue the closure to run immediately */ + *st = CLOSURE_NOT_READY; + grpc_exec_ctx_enqueue(exec_ctx, closure, !fd->shutdown, NULL); + maybe_wake_one_watcher_locked(fd); + } else { + /* upcallptr was set to a different closure. This is an error! */ + gpr_log(GPR_ERROR, + "User called a notify_on function with a previous callback still " + "pending"); + abort(); + } +} + +/* returns 1 if state becomes not ready */ +static int set_ready_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure **st) { + if (*st == CLOSURE_READY) { + /* duplicate ready ==> ignore */ + return 0; + } else if (*st == CLOSURE_NOT_READY) { + /* not ready, and not waiting ==> flag ready */ + *st = CLOSURE_READY; + return 0; + } else { + /* waiting ==> queue closure */ + grpc_exec_ctx_enqueue(exec_ctx, *st, !fd->shutdown, NULL); + *st = CLOSURE_NOT_READY; + return 1; + } +} + +static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { + gpr_mu_lock(&fd->mu); + GPR_ASSERT(!fd->shutdown); + fd->shutdown = 1; + set_ready_locked(exec_ctx, fd, &fd->read_closure); + set_ready_locked(exec_ctx, fd, &fd->write_closure); + gpr_mu_unlock(&fd->mu); +} + +static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure *closure) { + gpr_mu_lock(&fd->mu); + notify_on_locked(exec_ctx, fd, &fd->read_closure, closure); + gpr_mu_unlock(&fd->mu); +} + +static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure *closure) { + gpr_mu_lock(&fd->mu); + notify_on_locked(exec_ctx, fd, &fd->write_closure, closure); + gpr_mu_unlock(&fd->mu); +} + +static uint32_t fd_begin_poll(grpc_fd *fd, grpc_pollset *pollset, + grpc_pollset_worker *worker, uint32_t read_mask, + uint32_t write_mask, grpc_fd_watcher *watcher) { + uint32_t mask = 0; + grpc_closure *cur; + int requested; + /* keep track of pollers that have requested our events, in case they change + */ + GRPC_FD_REF(fd, "poll"); + + gpr_mu_lock(&fd->mu); + + /* if we are shutdown, then don't add to the watcher set */ + if (fd->shutdown) { + watcher->fd = NULL; + watcher->pollset = NULL; + watcher->worker = NULL; + gpr_mu_unlock(&fd->mu); + GRPC_FD_UNREF(fd, "poll"); + return 0; + } + + /* if there is nobody polling for read, but we need to, then start doing so */ + cur = fd->read_closure; + requested = cur != CLOSURE_READY; + if (read_mask && fd->read_watcher == NULL && requested) { + fd->read_watcher = watcher; + mask |= read_mask; + } + /* if there is nobody polling for write, but we need to, then start doing so + */ + cur = fd->write_closure; + requested = cur != CLOSURE_READY; + if (write_mask && fd->write_watcher == NULL && requested) { + fd->write_watcher = watcher; + mask |= write_mask; + } + /* if not polling, remember this watcher in case we need someone to later */ + if (mask == 0 && worker != NULL) { + watcher->next = &fd->inactive_watcher_root; + watcher->prev = watcher->next->prev; + watcher->next->prev = watcher->prev->next = watcher; + } + watcher->pollset = pollset; + watcher->worker = worker; + watcher->fd = fd; + gpr_mu_unlock(&fd->mu); + + return mask; +} + +static void fd_end_poll(grpc_exec_ctx *exec_ctx, grpc_fd_watcher *watcher, + int got_read, int got_write) { + int was_polling = 0; + int kick = 0; + grpc_fd *fd = watcher->fd; + + if (fd == NULL) { + return; + } + + gpr_mu_lock(&fd->mu); + + if (watcher == fd->read_watcher) { + /* remove read watcher, kick if we still need a read */ + was_polling = 1; + if (!got_read) { + kick = 1; + } + fd->read_watcher = NULL; + } + if (watcher == fd->write_watcher) { + /* remove write watcher, kick if we still need a write */ + was_polling = 1; + if (!got_write) { + kick = 1; + } + fd->write_watcher = NULL; + } + if (!was_polling && watcher->worker != NULL) { + /* remove from inactive list */ + watcher->next->prev = watcher->prev; + watcher->prev->next = watcher->next; + } + if (got_read) { + if (set_ready_locked(exec_ctx, fd, &fd->read_closure)) { + kick = 1; + } + } + if (got_write) { + if (set_ready_locked(exec_ctx, fd, &fd->write_closure)) { + kick = 1; + } + } + if (kick) { + maybe_wake_one_watcher_locked(fd); + } + if (fd_is_orphaned(fd) && !has_watchers(fd) && !fd->closed) { + close_fd_locked(exec_ctx, fd); + } + gpr_mu_unlock(&fd->mu); + + GRPC_FD_UNREF(fd, "poll"); +} + +/******************************************************************************* + * pollset_posix.c + */ + +GPR_TLS_DECL(g_current_thread_poller); +GPR_TLS_DECL(g_current_thread_worker); + +/** The alarm system needs to be able to wakeup 'some poller' sometimes + * (specifically when a new alarm needs to be triggered earlier than the next + * alarm 'epoch'). + * This wakeup_fd gives us something to alert on when such a case occurs. */ +grpc_wakeup_fd grpc_global_wakeup_fd; + +static void remove_worker(grpc_pollset *p, grpc_pollset_worker *worker) { + worker->prev->next = worker->next; + worker->next->prev = worker->prev; +} + +static int pollset_has_workers(grpc_pollset *p) { + return p->root_worker.next != &p->root_worker; +} + +static grpc_pollset_worker *pop_front_worker(grpc_pollset *p) { + if (pollset_has_workers(p)) { + grpc_pollset_worker *w = p->root_worker.next; + remove_worker(p, w); + return w; + } else { + return NULL; + } +} + +static void push_back_worker(grpc_pollset *p, grpc_pollset_worker *worker) { + worker->next = &p->root_worker; + worker->prev = worker->next->prev; + worker->prev->next = worker->next->prev = worker; +} + +static void push_front_worker(grpc_pollset *p, grpc_pollset_worker *worker) { + worker->prev = &p->root_worker; + worker->next = worker->prev->next; + worker->prev->next = worker->next->prev = worker; +} + +static void pollset_kick_ext(grpc_pollset *p, + grpc_pollset_worker *specific_worker, + uint32_t flags) { + GPR_TIMER_BEGIN("pollset_kick_ext", 0); + + /* pollset->mu already held */ + if (specific_worker != NULL) { + if (specific_worker == GRPC_POLLSET_KICK_BROADCAST) { + GPR_TIMER_BEGIN("pollset_kick_ext.broadcast", 0); + GPR_ASSERT((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) == 0); + for (specific_worker = p->root_worker.next; + specific_worker != &p->root_worker; + specific_worker = specific_worker->next) { + grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd); + } + p->kicked_without_pollers = 1; + GPR_TIMER_END("pollset_kick_ext.broadcast", 0); + } else if (gpr_tls_get(&g_current_thread_worker) != + (intptr_t)specific_worker) { + GPR_TIMER_MARK("different_thread_worker", 0); + if ((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) != 0) { + specific_worker->reevaluate_polling_on_wakeup = 1; + } + specific_worker->kicked_specifically = 1; + grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd); + } else if ((flags & GRPC_POLLSET_CAN_KICK_SELF) != 0) { + GPR_TIMER_MARK("kick_yoself", 0); + if ((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) != 0) { + specific_worker->reevaluate_polling_on_wakeup = 1; + } + specific_worker->kicked_specifically = 1; + grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd); + } + } else if (gpr_tls_get(&g_current_thread_poller) != (intptr_t)p) { + GPR_ASSERT((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) == 0); + GPR_TIMER_MARK("kick_anonymous", 0); + specific_worker = pop_front_worker(p); + if (specific_worker != NULL) { + if (gpr_tls_get(&g_current_thread_worker) == (intptr_t)specific_worker) { + GPR_TIMER_MARK("kick_anonymous_not_self", 0); + push_back_worker(p, specific_worker); + specific_worker = pop_front_worker(p); + if ((flags & GRPC_POLLSET_CAN_KICK_SELF) == 0 && + gpr_tls_get(&g_current_thread_worker) == + (intptr_t)specific_worker) { + push_back_worker(p, specific_worker); + specific_worker = NULL; + } + } + if (specific_worker != NULL) { + GPR_TIMER_MARK("finally_kick", 0); + push_back_worker(p, specific_worker); + grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd); + } + } else { + GPR_TIMER_MARK("kicked_no_pollers", 0); + p->kicked_without_pollers = 1; + } + } + + GPR_TIMER_END("pollset_kick_ext", 0); +} + +static void pollset_kick(grpc_pollset *p, + grpc_pollset_worker *specific_worker) { + pollset_kick_ext(p, specific_worker, 0); +} + +/* global state management */ + +static void pollset_global_init(void) { + gpr_tls_init(&g_current_thread_poller); + gpr_tls_init(&g_current_thread_worker); + grpc_wakeup_fd_init(&grpc_global_wakeup_fd); +} + +static void pollset_global_shutdown(void) { + grpc_wakeup_fd_destroy(&grpc_global_wakeup_fd); + gpr_tls_destroy(&g_current_thread_poller); + gpr_tls_destroy(&g_current_thread_worker); +} + +static void kick_poller(void) { grpc_wakeup_fd_wakeup(&grpc_global_wakeup_fd); } + +/* main interface */ + +static void become_basic_pollset(grpc_pollset *pollset, grpc_fd *fd_or_null); + +static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { + gpr_mu_init(&pollset->mu); + *mu = &pollset->mu; + pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker; + pollset->in_flight_cbs = 0; + pollset->shutting_down = 0; + pollset->called_shutdown = 0; + pollset->kicked_without_pollers = 0; + pollset->idle_jobs.head = pollset->idle_jobs.tail = NULL; + pollset->local_wakeup_cache = NULL; + pollset->kicked_without_pollers = 0; + become_basic_pollset(pollset, NULL); +} + +static void pollset_destroy(grpc_pollset *pollset) { + GPR_ASSERT(pollset->in_flight_cbs == 0); + GPR_ASSERT(!pollset_has_workers(pollset)); + GPR_ASSERT(pollset->idle_jobs.head == pollset->idle_jobs.tail); + pollset->vtable->destroy(pollset); + while (pollset->local_wakeup_cache) { + grpc_cached_wakeup_fd *next = pollset->local_wakeup_cache->next; + grpc_wakeup_fd_destroy(&pollset->local_wakeup_cache->fd); + gpr_free(pollset->local_wakeup_cache); + pollset->local_wakeup_cache = next; + } + gpr_mu_destroy(&pollset->mu); +} + +static void pollset_reset(grpc_pollset *pollset) { + GPR_ASSERT(pollset->shutting_down); + GPR_ASSERT(pollset->in_flight_cbs == 0); + GPR_ASSERT(!pollset_has_workers(pollset)); + GPR_ASSERT(pollset->idle_jobs.head == pollset->idle_jobs.tail); + pollset->vtable->destroy(pollset); + pollset->shutting_down = 0; + pollset->called_shutdown = 0; + pollset->kicked_without_pollers = 0; + become_basic_pollset(pollset, NULL); +} + +static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_fd *fd) { + gpr_mu_lock(&pollset->mu); + pollset->vtable->add_fd(exec_ctx, pollset, fd, 1); +/* the following (enabled only in debug) will reacquire and then release + our lock - meaning that if the unlocking flag passed to add_fd above is + not respected, the code will deadlock (in a way that we have a chance of + debugging) */ +#ifndef NDEBUG + gpr_mu_lock(&pollset->mu); + gpr_mu_unlock(&pollset->mu); +#endif +} + +static void finish_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) { + GPR_ASSERT(grpc_closure_list_empty(pollset->idle_jobs)); + pollset->vtable->finish_shutdown(pollset); + grpc_exec_ctx_enqueue(exec_ctx, pollset->shutdown_done, true, NULL); +} + +static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_pollset_worker **worker_hdl, gpr_timespec now, + gpr_timespec deadline) { + grpc_pollset_worker worker; + *worker_hdl = &worker; + + /* pollset->mu already held */ + int added_worker = 0; + int locked = 1; + int queued_work = 0; + int keep_polling = 0; + GPR_TIMER_BEGIN("pollset_work", 0); + /* this must happen before we (potentially) drop pollset->mu */ + worker.next = worker.prev = NULL; + worker.reevaluate_polling_on_wakeup = 0; + if (pollset->local_wakeup_cache != NULL) { + worker.wakeup_fd = pollset->local_wakeup_cache; + pollset->local_wakeup_cache = worker.wakeup_fd->next; + } else { + worker.wakeup_fd = gpr_malloc(sizeof(*worker.wakeup_fd)); + grpc_wakeup_fd_init(&worker.wakeup_fd->fd); + } + worker.kicked_specifically = 0; + /* If there's work waiting for the pollset to be idle, and the + pollset is idle, then do that work */ + if (!pollset_has_workers(pollset) && + !grpc_closure_list_empty(pollset->idle_jobs)) { + GPR_TIMER_MARK("pollset_work.idle_jobs", 0); + grpc_exec_ctx_enqueue_list(exec_ctx, &pollset->idle_jobs, NULL); + goto done; + } + /* If we're shutting down then we don't execute any extended work */ + if (pollset->shutting_down) { + GPR_TIMER_MARK("pollset_work.shutting_down", 0); + goto done; + } + /* Give do_promote priority so we don't starve it out */ + if (pollset->in_flight_cbs) { + GPR_TIMER_MARK("pollset_work.in_flight_cbs", 0); + gpr_mu_unlock(&pollset->mu); + locked = 0; + goto done; + } + /* Start polling, and keep doing so while we're being asked to + re-evaluate our pollers (this allows poll() based pollers to + ensure they don't miss wakeups) */ + keep_polling = 1; + while (keep_polling) { + keep_polling = 0; + if (!pollset->kicked_without_pollers) { + if (!added_worker) { + push_front_worker(pollset, &worker); + added_worker = 1; + gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker); + } + gpr_tls_set(&g_current_thread_poller, (intptr_t)pollset); + GPR_TIMER_BEGIN("maybe_work_and_unlock", 0); + pollset->vtable->maybe_work_and_unlock(exec_ctx, pollset, &worker, + deadline, now); + GPR_TIMER_END("maybe_work_and_unlock", 0); + locked = 0; + gpr_tls_set(&g_current_thread_poller, 0); + } else { + GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0); + pollset->kicked_without_pollers = 0; + } + /* Finished execution - start cleaning up. + Note that we may arrive here from outside the enclosing while() loop. + In that case we won't loop though as we haven't added worker to the + worker list, which means nobody could ask us to re-evaluate polling). */ + done: + if (!locked) { + queued_work |= grpc_exec_ctx_flush(exec_ctx); + gpr_mu_lock(&pollset->mu); + locked = 1; + } + /* If we're forced to re-evaluate polling (via pollset_kick with + GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) then we land here and force + a loop */ + if (worker.reevaluate_polling_on_wakeup) { + worker.reevaluate_polling_on_wakeup = 0; + pollset->kicked_without_pollers = 0; + if (queued_work || worker.kicked_specifically) { + /* If there's queued work on the list, then set the deadline to be + immediate so we get back out of the polling loop quickly */ + deadline = gpr_inf_past(GPR_CLOCK_MONOTONIC); + } + keep_polling = 1; + } + } + if (added_worker) { + remove_worker(pollset, &worker); + gpr_tls_set(&g_current_thread_worker, 0); + } + /* release wakeup fd to the local pool */ + worker.wakeup_fd->next = pollset->local_wakeup_cache; + pollset->local_wakeup_cache = worker.wakeup_fd; + /* check shutdown conditions */ + if (pollset->shutting_down) { + if (pollset_has_workers(pollset)) { + pollset_kick(pollset, NULL); + } else if (!pollset->called_shutdown && pollset->in_flight_cbs == 0) { + pollset->called_shutdown = 1; + gpr_mu_unlock(&pollset->mu); + finish_shutdown(exec_ctx, pollset); + grpc_exec_ctx_flush(exec_ctx); + /* Continuing to access pollset here is safe -- it is the caller's + * responsibility to not destroy when it has outstanding calls to + * pollset_work. + * TODO(dklempner): Can we refactor the shutdown logic to avoid this? */ + gpr_mu_lock(&pollset->mu); + } else if (!grpc_closure_list_empty(pollset->idle_jobs)) { + grpc_exec_ctx_enqueue_list(exec_ctx, &pollset->idle_jobs, NULL); + gpr_mu_unlock(&pollset->mu); + grpc_exec_ctx_flush(exec_ctx); + gpr_mu_lock(&pollset->mu); + } + } + *worker_hdl = NULL; + GPR_TIMER_END("pollset_work", 0); +} + +static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_closure *closure) { + GPR_ASSERT(!pollset->shutting_down); + pollset->shutting_down = 1; + pollset->shutdown_done = closure; + pollset_kick(pollset, GRPC_POLLSET_KICK_BROADCAST); + if (!pollset_has_workers(pollset)) { + grpc_exec_ctx_enqueue_list(exec_ctx, &pollset->idle_jobs, NULL); + } + if (!pollset->called_shutdown && pollset->in_flight_cbs == 0 && + !pollset_has_workers(pollset)) { + pollset->called_shutdown = 1; + finish_shutdown(exec_ctx, pollset); + } +} + +static int poll_deadline_to_millis_timeout(gpr_timespec deadline, + gpr_timespec now) { + gpr_timespec timeout; + static const int64_t max_spin_polling_us = 10; + if (gpr_time_cmp(deadline, gpr_inf_future(deadline.clock_type)) == 0) { + return -1; + } + if (gpr_time_cmp(deadline, gpr_time_add(now, gpr_time_from_micros( + max_spin_polling_us, + GPR_TIMESPAN))) <= 0) { + return 0; + } + timeout = gpr_time_sub(deadline, now); + return gpr_time_to_millis(gpr_time_add( + timeout, gpr_time_from_nanos(GPR_NS_PER_MS - 1, GPR_TIMESPAN))); +} + +/* + * basic_pollset - a vtable that provides polling for zero or one file + * descriptor via poll() + */ + +typedef struct grpc_unary_promote_args { + const grpc_pollset_vtable *original_vtable; + grpc_pollset *pollset; + grpc_fd *fd; + grpc_closure promotion_closure; +} grpc_unary_promote_args; + +static void basic_do_promote(grpc_exec_ctx *exec_ctx, void *args, + bool success) { + grpc_unary_promote_args *up_args = args; + const grpc_pollset_vtable *original_vtable = up_args->original_vtable; + grpc_pollset *pollset = up_args->pollset; + grpc_fd *fd = up_args->fd; + + /* + * This is quite tricky. There are a number of cases to keep in mind here: + * 1. fd may have been orphaned + * 2. The pollset may no longer be a unary poller (and we can't let case #1 + * leak to other pollset types!) + * 3. pollset's fd (which may have changed) may have been orphaned + * 4. The pollset may be shutting down. + */ + + gpr_mu_lock(&pollset->mu); + /* First we need to ensure that nobody is polling concurrently */ + GPR_ASSERT(!pollset_has_workers(pollset)); + + gpr_free(up_args); + /* At this point the pollset may no longer be a unary poller. In that case + * we should just call the right add function and be done. */ + /* TODO(klempner): If we're not careful this could cause infinite recursion. + * That's not a problem for now because empty_pollset has a trivial poller + * and we don't have any mechanism to unbecome multipoller. */ + pollset->in_flight_cbs--; + if (pollset->shutting_down) { + /* We don't care about this pollset anymore. */ + if (pollset->in_flight_cbs == 0 && !pollset->called_shutdown) { + pollset->called_shutdown = 1; + finish_shutdown(exec_ctx, pollset); + } + } else if (fd_is_orphaned(fd)) { + /* Don't try to add it to anything, we'll drop our ref on it below */ + } else if (pollset->vtable != original_vtable) { + pollset->vtable->add_fd(exec_ctx, pollset, fd, 0); + } else if (fd != pollset->data.ptr) { + grpc_fd *fds[2]; + fds[0] = pollset->data.ptr; + fds[1] = fd; + + if (fds[0] && !fd_is_orphaned(fds[0])) { + platform_become_multipoller(exec_ctx, pollset, fds, GPR_ARRAY_SIZE(fds)); + GRPC_FD_UNREF(fds[0], "basicpoll"); + } else { + /* old fd is orphaned and we haven't cleaned it up until now, so remain a + * unary poller */ + /* Note that it is possible that fds[1] is also orphaned at this point. + * That's okay, we'll correct it at the next add or poll. */ + if (fds[0]) GRPC_FD_UNREF(fds[0], "basicpoll"); + pollset->data.ptr = fd; + GRPC_FD_REF(fd, "basicpoll"); + } + } + + gpr_mu_unlock(&pollset->mu); + + /* Matching ref in basic_pollset_add_fd */ + GRPC_FD_UNREF(fd, "basicpoll_add"); +} + +static void basic_pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_fd *fd, int and_unlock_pollset) { + grpc_unary_promote_args *up_args; + GPR_ASSERT(fd); + if (fd == pollset->data.ptr) goto exit; + + if (!pollset_has_workers(pollset)) { + /* Fast path -- no in flight cbs */ + /* TODO(klempner): Comment this out and fix any test failures or establish + * they are due to timing issues */ + grpc_fd *fds[2]; + fds[0] = pollset->data.ptr; + fds[1] = fd; + + if (fds[0] == NULL) { + pollset->data.ptr = fd; + GRPC_FD_REF(fd, "basicpoll"); + } else if (!fd_is_orphaned(fds[0])) { + platform_become_multipoller(exec_ctx, pollset, fds, GPR_ARRAY_SIZE(fds)); + GRPC_FD_UNREF(fds[0], "basicpoll"); + } else { + /* old fd is orphaned and we haven't cleaned it up until now, so remain a + * unary poller */ + GRPC_FD_UNREF(fds[0], "basicpoll"); + pollset->data.ptr = fd; + GRPC_FD_REF(fd, "basicpoll"); + } + goto exit; + } + + /* Now we need to promote. This needs to happen when we're not polling. Since + * this may be called from poll, the wait needs to happen asynchronously. */ + GRPC_FD_REF(fd, "basicpoll_add"); + pollset->in_flight_cbs++; + up_args = gpr_malloc(sizeof(*up_args)); + up_args->fd = fd; + up_args->original_vtable = pollset->vtable; + up_args->pollset = pollset; + up_args->promotion_closure.cb = basic_do_promote; + up_args->promotion_closure.cb_arg = up_args; + + grpc_closure_list_add(&pollset->idle_jobs, &up_args->promotion_closure, 1); + pollset_kick(pollset, GRPC_POLLSET_KICK_BROADCAST); + +exit: + if (and_unlock_pollset) { + gpr_mu_unlock(&pollset->mu); + } +} + +static void basic_pollset_maybe_work_and_unlock(grpc_exec_ctx *exec_ctx, + grpc_pollset *pollset, + grpc_pollset_worker *worker, + gpr_timespec deadline, + gpr_timespec now) { +#define POLLOUT_CHECK (POLLOUT | POLLHUP | POLLERR) +#define POLLIN_CHECK (POLLIN | POLLHUP | POLLERR) + + struct pollfd pfd[3]; + grpc_fd *fd; + grpc_fd_watcher fd_watcher; + int timeout; + int r; + nfds_t nfds; + + fd = pollset->data.ptr; + if (fd && fd_is_orphaned(fd)) { + GRPC_FD_UNREF(fd, "basicpoll"); + fd = pollset->data.ptr = NULL; + } + timeout = poll_deadline_to_millis_timeout(deadline, now); + pfd[0].fd = GRPC_WAKEUP_FD_GET_READ_FD(&grpc_global_wakeup_fd); + pfd[0].events = POLLIN; + pfd[0].revents = 0; + pfd[1].fd = GRPC_WAKEUP_FD_GET_READ_FD(&worker->wakeup_fd->fd); + pfd[1].events = POLLIN; + pfd[1].revents = 0; + nfds = 2; + if (fd) { + pfd[2].fd = fd->fd; + pfd[2].revents = 0; + GRPC_FD_REF(fd, "basicpoll_begin"); + gpr_mu_unlock(&pollset->mu); + pfd[2].events = + (short)fd_begin_poll(fd, pollset, worker, POLLIN, POLLOUT, &fd_watcher); + if (pfd[2].events != 0) { + nfds++; + } + } else { + gpr_mu_unlock(&pollset->mu); + } + + /* TODO(vpai): Consider first doing a 0 timeout poll here to avoid + even going into the blocking annotation if possible */ + /* poll fd count (argument 2) is shortened by one if we have no events + to poll on - such that it only includes the kicker */ + GPR_TIMER_BEGIN("poll", 0); + GRPC_SCHEDULING_START_BLOCKING_REGION; + r = grpc_poll_function(pfd, nfds, timeout); + GRPC_SCHEDULING_END_BLOCKING_REGION; + GPR_TIMER_END("poll", 0); + + if (r < 0) { + if (errno != EINTR) { + gpr_log(GPR_ERROR, "poll() failed: %s", strerror(errno)); + } + if (fd) { + fd_end_poll(exec_ctx, &fd_watcher, 0, 0); + } + } else if (r == 0) { + if (fd) { + fd_end_poll(exec_ctx, &fd_watcher, 0, 0); + } + } else { + if (pfd[0].revents & POLLIN_CHECK) { + grpc_wakeup_fd_consume_wakeup(&grpc_global_wakeup_fd); + } + if (pfd[1].revents & POLLIN_CHECK) { + grpc_wakeup_fd_consume_wakeup(&worker->wakeup_fd->fd); + } + if (nfds > 2) { + fd_end_poll(exec_ctx, &fd_watcher, pfd[2].revents & POLLIN_CHECK, + pfd[2].revents & POLLOUT_CHECK); + } else if (fd) { + fd_end_poll(exec_ctx, &fd_watcher, 0, 0); + } + } + + if (fd) { + GRPC_FD_UNREF(fd, "basicpoll_begin"); + } +} + +static void basic_pollset_destroy(grpc_pollset *pollset) { + if (pollset->data.ptr != NULL) { + GRPC_FD_UNREF(pollset->data.ptr, "basicpoll"); + pollset->data.ptr = NULL; + } +} + +static const grpc_pollset_vtable basic_pollset = { + basic_pollset_add_fd, basic_pollset_maybe_work_and_unlock, + basic_pollset_destroy, basic_pollset_destroy}; + +static void become_basic_pollset(grpc_pollset *pollset, grpc_fd *fd_or_null) { + pollset->vtable = &basic_pollset; + pollset->data.ptr = fd_or_null; + if (fd_or_null != NULL) { + GRPC_FD_REF(fd_or_null, "basicpoll"); + } +} + + +/******************************************************************************* + * pollset_multipoller_with_epoll_posix.c + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "src/core/lib/iomgr/ev_posix.h" +#include "src/core/lib/profiling/timers.h" +#include "src/core/lib/support/block_annotate.h" + +static void set_ready(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_closure **st) { + /* only one set_ready can be active at once (but there may be a racing + notify_on) */ + gpr_mu_lock(&fd->mu); + set_ready_locked(exec_ctx, fd, st); + gpr_mu_unlock(&fd->mu); +} + +static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { + set_ready(exec_ctx, fd, &fd->read_closure); +} + +static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { + set_ready(exec_ctx, fd, &fd->write_closure); +} + +struct epoll_fd_list { + int *epoll_fds; + size_t count; + size_t capacity; +}; + +static struct epoll_fd_list epoll_fd_global_list; +static gpr_once init_epoll_fd_list_mu = GPR_ONCE_INIT; +static gpr_mu epoll_fd_list_mu; + +static void init_mu(void) { gpr_mu_init(&epoll_fd_list_mu); } + +static void add_epoll_fd_to_global_list(int epoll_fd) { + gpr_once_init(&init_epoll_fd_list_mu, init_mu); + + gpr_mu_lock(&epoll_fd_list_mu); + if (epoll_fd_global_list.count == epoll_fd_global_list.capacity) { + epoll_fd_global_list.capacity = + GPR_MAX((size_t)8, epoll_fd_global_list.capacity * 2); + epoll_fd_global_list.epoll_fds = + gpr_realloc(epoll_fd_global_list.epoll_fds, + epoll_fd_global_list.capacity * sizeof(int)); + } + epoll_fd_global_list.epoll_fds[epoll_fd_global_list.count++] = epoll_fd; + gpr_mu_unlock(&epoll_fd_list_mu); +} + +static void remove_epoll_fd_from_global_list(int epoll_fd) { + gpr_mu_lock(&epoll_fd_list_mu); + GPR_ASSERT(epoll_fd_global_list.count > 0); + for (size_t i = 0; i < epoll_fd_global_list.count; i++) { + if (epoll_fd == epoll_fd_global_list.epoll_fds[i]) { + epoll_fd_global_list.epoll_fds[i] = + epoll_fd_global_list.epoll_fds[--(epoll_fd_global_list.count)]; + break; + } + } + gpr_mu_unlock(&epoll_fd_list_mu); +} + +static void remove_fd_from_all_epoll_sets(int fd) { + int err; + gpr_once_init(&init_epoll_fd_list_mu, init_mu); + gpr_mu_lock(&epoll_fd_list_mu); + if (epoll_fd_global_list.count == 0) { + gpr_mu_unlock(&epoll_fd_list_mu); + return; + } + for (size_t i = 0; i < epoll_fd_global_list.count; i++) { + err = epoll_ctl(epoll_fd_global_list.epoll_fds[i], EPOLL_CTL_DEL, fd, NULL); + if (err < 0 && errno != ENOENT) { + gpr_log(GPR_ERROR, "epoll_ctl del for %d failed: %s", fd, + strerror(errno)); + } + } + gpr_mu_unlock(&epoll_fd_list_mu); +} + +typedef struct { + grpc_pollset *pollset; + grpc_fd *fd; + grpc_closure closure; +} delayed_add; + +typedef struct { int epoll_fd; } epoll_hdr; + +static void finally_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_fd *fd) { + epoll_hdr *h = pollset->data.ptr; + struct epoll_event ev; + int err; + grpc_fd_watcher watcher; + + /* We pretend to be polling whilst adding an fd to keep the fd from being + closed during the add. This may result in a spurious wakeup being assigned + to this pollset whilst adding, but that should be benign. */ + GPR_ASSERT(fd_begin_poll(fd, pollset, NULL, 0, 0, &watcher) == 0); + if (watcher.fd != NULL) { + ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET); + ev.data.ptr = fd; + err = epoll_ctl(h->epoll_fd, EPOLL_CTL_ADD, fd->fd, &ev); + if (err < 0) { + /* FDs may be added to a pollset multiple times, so EEXIST is normal. */ + if (errno != EEXIST) { + gpr_log(GPR_ERROR, "epoll_ctl add for %d failed: %s", fd->fd, + strerror(errno)); + } + } + } + fd_end_poll(exec_ctx, &watcher, 0, 0); +} + +static void perform_delayed_add(grpc_exec_ctx *exec_ctx, void *arg, + bool iomgr_status) { + delayed_add *da = arg; + + if (!fd_is_orphaned(da->fd)) { + finally_add_fd(exec_ctx, da->pollset, da->fd); + } + + gpr_mu_lock(&da->pollset->mu); + da->pollset->in_flight_cbs--; + if (da->pollset->shutting_down) { + /* We don't care about this pollset anymore. */ + if (da->pollset->in_flight_cbs == 0 && !da->pollset->called_shutdown) { + da->pollset->called_shutdown = 1; + grpc_exec_ctx_enqueue(exec_ctx, da->pollset->shutdown_done, true, NULL); + } + } + gpr_mu_unlock(&da->pollset->mu); + + GRPC_FD_UNREF(da->fd, "delayed_add"); + + gpr_free(da); +} + +static void multipoll_with_epoll_pollset_add_fd(grpc_exec_ctx *exec_ctx, + grpc_pollset *pollset, + grpc_fd *fd, + int and_unlock_pollset) { + if (and_unlock_pollset) { + gpr_mu_unlock(&pollset->mu); + finally_add_fd(exec_ctx, pollset, fd); + } else { + delayed_add *da = gpr_malloc(sizeof(*da)); + da->pollset = pollset; + da->fd = fd; + GRPC_FD_REF(fd, "delayed_add"); + grpc_closure_init(&da->closure, perform_delayed_add, da); + pollset->in_flight_cbs++; + grpc_exec_ctx_enqueue(exec_ctx, &da->closure, true, NULL); + } +} + +/* TODO(klempner): We probably want to turn this down a bit */ +#define GRPC_EPOLL_MAX_EVENTS 1000 + +static void multipoll_with_epoll_pollset_maybe_work_and_unlock( + grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_pollset_worker *worker, + gpr_timespec deadline, gpr_timespec now) { + struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS]; + int ep_rv; + int poll_rv; + epoll_hdr *h = pollset->data.ptr; + int timeout_ms; + struct pollfd pfds[2]; + + /* If you want to ignore epoll's ability to sanely handle parallel pollers, + * for a more apples-to-apples performance comparison with poll, add a + * if (pollset->counter != 0) { return 0; } + * here. + */ + + gpr_mu_unlock(&pollset->mu); + + timeout_ms = poll_deadline_to_millis_timeout(deadline, now); + + pfds[0].fd = GRPC_WAKEUP_FD_GET_READ_FD(&worker->wakeup_fd->fd); + pfds[0].events = POLLIN; + pfds[0].revents = 0; + pfds[1].fd = h->epoll_fd; + pfds[1].events = POLLIN; + pfds[1].revents = 0; + + /* TODO(vpai): Consider first doing a 0 timeout poll here to avoid + even going into the blocking annotation if possible */ + GPR_TIMER_BEGIN("poll", 0); + GRPC_SCHEDULING_START_BLOCKING_REGION; + poll_rv = grpc_poll_function(pfds, 2, timeout_ms); + GRPC_SCHEDULING_END_BLOCKING_REGION; + GPR_TIMER_END("poll", 0); + + if (poll_rv < 0) { + if (errno != EINTR) { + gpr_log(GPR_ERROR, "poll() failed: %s", strerror(errno)); + } + } else if (poll_rv == 0) { + /* do nothing */ + } else { + if (pfds[0].revents) { + grpc_wakeup_fd_consume_wakeup(&worker->wakeup_fd->fd); + } + if (pfds[1].revents) { + do { + /* The following epoll_wait never blocks; it has a timeout of 0 */ + ep_rv = epoll_wait(h->epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0); + if (ep_rv < 0) { + if (errno != EINTR) { + gpr_log(GPR_ERROR, "epoll_wait() failed: %s", strerror(errno)); + } + } else { + int i; + for (i = 0; i < ep_rv; ++i) { + grpc_fd *fd = ep_ev[i].data.ptr; + /* TODO(klempner): We might want to consider making err and pri + * separate events */ + int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP); + int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI); + int write_ev = ep_ev[i].events & EPOLLOUT; + if (fd == NULL) { + grpc_wakeup_fd_consume_wakeup(&grpc_global_wakeup_fd); + } else { + if (read_ev || cancel) { + fd_become_readable(exec_ctx, fd); + } + if (write_ev || cancel) { + fd_become_writable(exec_ctx, fd); + } + } + } + } + } while (ep_rv == GRPC_EPOLL_MAX_EVENTS); + } + } +} + +static void multipoll_with_epoll_pollset_finish_shutdown( + grpc_pollset *pollset) {} + +static void multipoll_with_epoll_pollset_destroy(grpc_pollset *pollset) { + epoll_hdr *h = pollset->data.ptr; + close(h->epoll_fd); + remove_epoll_fd_from_global_list(h->epoll_fd); + gpr_free(h); +} + +static const grpc_pollset_vtable multipoll_with_epoll_pollset = { + multipoll_with_epoll_pollset_add_fd, + multipoll_with_epoll_pollset_maybe_work_and_unlock, + multipoll_with_epoll_pollset_finish_shutdown, + multipoll_with_epoll_pollset_destroy}; + +static void epoll_become_multipoller(grpc_exec_ctx *exec_ctx, + grpc_pollset *pollset, grpc_fd **fds, + size_t nfds) { + size_t i; + epoll_hdr *h = gpr_malloc(sizeof(epoll_hdr)); + struct epoll_event ev; + int err; + + pollset->vtable = &multipoll_with_epoll_pollset; + pollset->data.ptr = h; + h->epoll_fd = epoll_create1(EPOLL_CLOEXEC); + if (h->epoll_fd < 0) { + /* TODO(klempner): Fall back to poll here, especially on ENOSYS */ + gpr_log(GPR_ERROR, "epoll_create1 failed: %s", strerror(errno)); + abort(); + } + add_epoll_fd_to_global_list(h->epoll_fd); + + ev.events = (uint32_t)(EPOLLIN | EPOLLET); + ev.data.ptr = NULL; + err = epoll_ctl(h->epoll_fd, EPOLL_CTL_ADD, + GRPC_WAKEUP_FD_GET_READ_FD(&grpc_global_wakeup_fd), &ev); + if (err < 0) { + gpr_log(GPR_ERROR, "epoll_ctl add for %d failed: %s", + GRPC_WAKEUP_FD_GET_READ_FD(&grpc_global_wakeup_fd), + strerror(errno)); + } + + for (i = 0; i < nfds; i++) { + multipoll_with_epoll_pollset_add_fd(exec_ctx, pollset, fds[i], 0); + } +} + +/******************************************************************************* + * pollset_set_posix.c + */ + +static grpc_pollset_set *pollset_set_create(void) { + grpc_pollset_set *pollset_set = gpr_malloc(sizeof(*pollset_set)); + memset(pollset_set, 0, sizeof(*pollset_set)); + gpr_mu_init(&pollset_set->mu); + return pollset_set; +} + +static void pollset_set_destroy(grpc_pollset_set *pollset_set) { + size_t i; + gpr_mu_destroy(&pollset_set->mu); + for (i = 0; i < pollset_set->fd_count; i++) { + GRPC_FD_UNREF(pollset_set->fds[i], "pollset_set"); + } + gpr_free(pollset_set->pollsets); + gpr_free(pollset_set->pollset_sets); + gpr_free(pollset_set->fds); + gpr_free(pollset_set); +} + +static void pollset_set_add_pollset(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pollset_set, + grpc_pollset *pollset) { + size_t i, j; + gpr_mu_lock(&pollset_set->mu); + if (pollset_set->pollset_count == pollset_set->pollset_capacity) { + pollset_set->pollset_capacity = + GPR_MAX(8, 2 * pollset_set->pollset_capacity); + pollset_set->pollsets = + gpr_realloc(pollset_set->pollsets, pollset_set->pollset_capacity * + sizeof(*pollset_set->pollsets)); + } + pollset_set->pollsets[pollset_set->pollset_count++] = pollset; + for (i = 0, j = 0; i < pollset_set->fd_count; i++) { + if (fd_is_orphaned(pollset_set->fds[i])) { + GRPC_FD_UNREF(pollset_set->fds[i], "pollset_set"); + } else { + pollset_add_fd(exec_ctx, pollset, pollset_set->fds[i]); + pollset_set->fds[j++] = pollset_set->fds[i]; + } + } + pollset_set->fd_count = j; + gpr_mu_unlock(&pollset_set->mu); +} + +static void pollset_set_del_pollset(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pollset_set, + grpc_pollset *pollset) { + size_t i; + gpr_mu_lock(&pollset_set->mu); + for (i = 0; i < pollset_set->pollset_count; i++) { + if (pollset_set->pollsets[i] == pollset) { + pollset_set->pollset_count--; + GPR_SWAP(grpc_pollset *, pollset_set->pollsets[i], + pollset_set->pollsets[pollset_set->pollset_count]); + break; + } + } + gpr_mu_unlock(&pollset_set->mu); +} + +static void pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *bag, + grpc_pollset_set *item) { + size_t i, j; + gpr_mu_lock(&bag->mu); + if (bag->pollset_set_count == bag->pollset_set_capacity) { + bag->pollset_set_capacity = GPR_MAX(8, 2 * bag->pollset_set_capacity); + bag->pollset_sets = + gpr_realloc(bag->pollset_sets, + bag->pollset_set_capacity * sizeof(*bag->pollset_sets)); + } + bag->pollset_sets[bag->pollset_set_count++] = item; + for (i = 0, j = 0; i < bag->fd_count; i++) { + if (fd_is_orphaned(bag->fds[i])) { + GRPC_FD_UNREF(bag->fds[i], "pollset_set"); + } else { + pollset_set_add_fd(exec_ctx, item, bag->fds[i]); + bag->fds[j++] = bag->fds[i]; + } + } + bag->fd_count = j; + gpr_mu_unlock(&bag->mu); +} + +static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *bag, + grpc_pollset_set *item) { + size_t i; + gpr_mu_lock(&bag->mu); + for (i = 0; i < bag->pollset_set_count; i++) { + if (bag->pollset_sets[i] == item) { + bag->pollset_set_count--; + GPR_SWAP(grpc_pollset_set *, bag->pollset_sets[i], + bag->pollset_sets[bag->pollset_set_count]); + break; + } + } + gpr_mu_unlock(&bag->mu); +} + +static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pollset_set, grpc_fd *fd) { + size_t i; + gpr_mu_lock(&pollset_set->mu); + if (pollset_set->fd_count == pollset_set->fd_capacity) { + pollset_set->fd_capacity = GPR_MAX(8, 2 * pollset_set->fd_capacity); + pollset_set->fds = gpr_realloc( + pollset_set->fds, pollset_set->fd_capacity * sizeof(*pollset_set->fds)); + } + GRPC_FD_REF(fd, "pollset_set"); + pollset_set->fds[pollset_set->fd_count++] = fd; + for (i = 0; i < pollset_set->pollset_count; i++) { + pollset_add_fd(exec_ctx, pollset_set->pollsets[i], fd); + } + for (i = 0; i < pollset_set->pollset_set_count; i++) { + pollset_set_add_fd(exec_ctx, pollset_set->pollset_sets[i], fd); + } + gpr_mu_unlock(&pollset_set->mu); +} + +static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pollset_set, grpc_fd *fd) { + size_t i; + gpr_mu_lock(&pollset_set->mu); + for (i = 0; i < pollset_set->fd_count; i++) { + if (pollset_set->fds[i] == fd) { + pollset_set->fd_count--; + GPR_SWAP(grpc_fd *, pollset_set->fds[i], + pollset_set->fds[pollset_set->fd_count]); + GRPC_FD_UNREF(fd, "pollset_set"); + break; + } + } + for (i = 0; i < pollset_set->pollset_set_count; i++) { + pollset_set_del_fd(exec_ctx, pollset_set->pollset_sets[i], fd); + } + gpr_mu_unlock(&pollset_set->mu); +} + +/******************************************************************************* + * event engine binding + */ + +static void shutdown_engine(void) { + fd_global_shutdown(); + pollset_global_shutdown(); +} + +static const grpc_event_engine_vtable vtable = { + .pollset_size = sizeof(grpc_pollset), + + .fd_create = fd_create, + .fd_wrapped_fd = fd_wrapped_fd, + .fd_orphan = fd_orphan, + .fd_shutdown = fd_shutdown, + .fd_notify_on_read = fd_notify_on_read, + .fd_notify_on_write = fd_notify_on_write, + + .pollset_init = pollset_init, + .pollset_shutdown = pollset_shutdown, + .pollset_reset = pollset_reset, + .pollset_destroy = pollset_destroy, + .pollset_work = pollset_work, + .pollset_kick = pollset_kick, + .pollset_add_fd = pollset_add_fd, + + .pollset_set_create = pollset_set_create, + .pollset_set_destroy = pollset_set_destroy, + .pollset_set_add_pollset = pollset_set_add_pollset, + .pollset_set_del_pollset = pollset_set_del_pollset, + .pollset_set_add_pollset_set = pollset_set_add_pollset_set, + .pollset_set_del_pollset_set = pollset_set_del_pollset_set, + .pollset_set_add_fd = pollset_set_add_fd, + .pollset_set_del_fd = pollset_set_del_fd, + + .kick_poller = kick_poller, + + .shutdown_engine = shutdown_engine, +}; + +const grpc_event_engine_vtable *grpc_init_epoll_posix(void) { + platform_become_multipoller = epoll_become_multipoller; + fd_global_init(); + pollset_global_init(); + return &vtable; +} + +#endif diff --git a/src/core/lib/iomgr/ev_epoll_posix.h b/src/core/lib/iomgr/ev_epoll_posix.h new file mode 100644 index 0000000000..35319b4fc5 --- /dev/null +++ b/src/core/lib/iomgr/ev_epoll_posix.h @@ -0,0 +1,41 @@ +/* + * + * Copyright 2015, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef GRPC_CORE_LIB_IOMGR_EV_EPOLL_POSIX_H +#define GRPC_CORE_LIB_IOMGR_EV_EPOLL_POSIX_H + +#include "src/core/lib/iomgr/ev_posix.h" + +const grpc_event_engine_vtable *grpc_init_epoll_posix(void); + +#endif /* GRPC_CORE_LIB_IOMGR_EV_EPOLL_POSIX_H */ diff --git a/src/core/lib/iomgr/ev_posix.c b/src/core/lib/iomgr/ev_posix.c index 95520b01d3..baa3b9856a 100644 --- a/src/core/lib/iomgr/ev_posix.c +++ b/src/core/lib/iomgr/ev_posix.c @@ -44,6 +44,7 @@ #include #include +#include "src/core/lib/iomgr/ev_epoll_posix.h" #include "src/core/lib/iomgr/ev_poll_posix.h" #include "src/core/lib/support/env.h" @@ -61,7 +62,7 @@ typedef struct { } event_engine_factory; static const event_engine_factory g_factories[] = { - {"poll", grpc_init_poll_posix}, + {"poll", grpc_init_poll_posix}, {"epoll", grpc_init_epoll_posix}, }; static void add(const char *beg, const char *end, char ***ss, size_t *ns) { -- cgit v1.2.3 From a7786001a22f511130a4292893cc8e2ab0ccdf75 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Wed, 11 May 2016 18:13:31 -0700 Subject: Remove basic_pollset and the promotion related code --- src/core/lib/iomgr/ev_epoll_posix.c | 355 ++++++++---------------------------- 1 file changed, 71 insertions(+), 284 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_posix.c b/src/core/lib/iomgr/ev_epoll_posix.c index ce8d3981b3..cb4a00a75c 100644 --- a/src/core/lib/iomgr/ev_epoll_posix.c +++ b/src/core/lib/iomgr/ev_epoll_posix.c @@ -185,11 +185,6 @@ struct grpc_pollset_worker { }; struct grpc_pollset { - /* pollsets under posix can mutate representation as fds are added and - removed. - For example, we may choose a poll() based implementation on linux for - few fds, and an epoll() based implementation for many fds */ - const grpc_pollset_vtable *vtable; gpr_mu mu; grpc_pollset_worker root_worker; int in_flight_cbs; @@ -248,8 +243,6 @@ typedef void (*platform_become_multipoller_type)(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, struct grpc_fd **fds, size_t fd_count); -static platform_become_multipoller_type platform_become_multipoller; - /* Return 1 if the pollset has active threads in pollset_work (pollset must * be locked) */ @@ -796,8 +789,6 @@ static void kick_poller(void) { grpc_wakeup_fd_wakeup(&grpc_global_wakeup_fd); } /* main interface */ -static void become_basic_pollset(grpc_pollset *pollset, grpc_fd *fd_or_null); - static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { gpr_mu_init(&pollset->mu); *mu = &pollset->mu; @@ -809,14 +800,20 @@ static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { pollset->idle_jobs.head = pollset->idle_jobs.tail = NULL; pollset->local_wakeup_cache = NULL; pollset->kicked_without_pollers = 0; - become_basic_pollset(pollset, NULL); + pollset->data.ptr = NULL; } +/* TODO(sreek): Maybe merge multipoll_*_destroy() with pollset_destroy() + * function */ +static void multipoll_with_epoll_pollset_destroy(grpc_pollset *pollset); + static void pollset_destroy(grpc_pollset *pollset) { GPR_ASSERT(pollset->in_flight_cbs == 0); GPR_ASSERT(!pollset_has_workers(pollset)); GPR_ASSERT(pollset->idle_jobs.head == pollset->idle_jobs.tail); - pollset->vtable->destroy(pollset); + + multipoll_with_epoll_pollset_destroy(pollset); + while (pollset->local_wakeup_cache) { grpc_cached_wakeup_fd *next = pollset->local_wakeup_cache->next; grpc_wakeup_fd_destroy(&pollset->local_wakeup_cache->fd); @@ -831,17 +828,24 @@ static void pollset_reset(grpc_pollset *pollset) { GPR_ASSERT(pollset->in_flight_cbs == 0); GPR_ASSERT(!pollset_has_workers(pollset)); GPR_ASSERT(pollset->idle_jobs.head == pollset->idle_jobs.tail); - pollset->vtable->destroy(pollset); + + multipoll_with_epoll_pollset_destroy(pollset); + pollset->shutting_down = 0; pollset->called_shutdown = 0; pollset->kicked_without_pollers = 0; - become_basic_pollset(pollset, NULL); } +/* TODO (sreek): Remove multipoll_with_epoll_add_fd declaration*/ +static void multipoll_with_epoll_pollset_add_fd(grpc_exec_ctx *exec_ctx, + grpc_pollset *pollset, + grpc_fd *fd, + int and_unlock_pollset); + static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_fd *fd) { gpr_mu_lock(&pollset->mu); - pollset->vtable->add_fd(exec_ctx, pollset, fd, 1); + multipoll_with_epoll_pollset_add_fd(exec_ctx, pollset, fd, 1); /* the following (enabled only in debug) will reacquire and then release our lock - meaning that if the unlocking flag passed to add_fd above is not respected, the code will deadlock (in a way that we have a chance of @@ -852,12 +856,21 @@ static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, #endif } +/* TODO (sreek): Remove multipoll_with_epoll_finish_shutdown() declaration */ +static void multipoll_with_epoll_pollset_finish_shutdown(grpc_pollset *pollset); + static void finish_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) { GPR_ASSERT(grpc_closure_list_empty(pollset->idle_jobs)); - pollset->vtable->finish_shutdown(pollset); + multipoll_with_epoll_pollset_finish_shutdown(pollset); grpc_exec_ctx_enqueue(exec_ctx, pollset->shutdown_done, true, NULL); } +/* TODO(sreek): Remove multipoll_with_epoll_*_maybe_work_and_unlock declaration + */ +static void multipoll_with_epoll_pollset_maybe_work_and_unlock( + grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_pollset_worker *worker, + gpr_timespec deadline, gpr_timespec now); + static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_pollset_worker **worker_hdl, gpr_timespec now, gpr_timespec deadline) { @@ -915,8 +928,10 @@ static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, } gpr_tls_set(&g_current_thread_poller, (intptr_t)pollset); GPR_TIMER_BEGIN("maybe_work_and_unlock", 0); - pollset->vtable->maybe_work_and_unlock(exec_ctx, pollset, &worker, - deadline, now); + + multipoll_with_epoll_pollset_maybe_work_and_unlock( + exec_ctx, pollset, &worker, deadline, now); + GPR_TIMER_END("maybe_work_and_unlock", 0); locked = 0; gpr_tls_set(&g_current_thread_poller, 0); @@ -1013,233 +1028,6 @@ static int poll_deadline_to_millis_timeout(gpr_timespec deadline, timeout, gpr_time_from_nanos(GPR_NS_PER_MS - 1, GPR_TIMESPAN))); } -/* - * basic_pollset - a vtable that provides polling for zero or one file - * descriptor via poll() - */ - -typedef struct grpc_unary_promote_args { - const grpc_pollset_vtable *original_vtable; - grpc_pollset *pollset; - grpc_fd *fd; - grpc_closure promotion_closure; -} grpc_unary_promote_args; - -static void basic_do_promote(grpc_exec_ctx *exec_ctx, void *args, - bool success) { - grpc_unary_promote_args *up_args = args; - const grpc_pollset_vtable *original_vtable = up_args->original_vtable; - grpc_pollset *pollset = up_args->pollset; - grpc_fd *fd = up_args->fd; - - /* - * This is quite tricky. There are a number of cases to keep in mind here: - * 1. fd may have been orphaned - * 2. The pollset may no longer be a unary poller (and we can't let case #1 - * leak to other pollset types!) - * 3. pollset's fd (which may have changed) may have been orphaned - * 4. The pollset may be shutting down. - */ - - gpr_mu_lock(&pollset->mu); - /* First we need to ensure that nobody is polling concurrently */ - GPR_ASSERT(!pollset_has_workers(pollset)); - - gpr_free(up_args); - /* At this point the pollset may no longer be a unary poller. In that case - * we should just call the right add function and be done. */ - /* TODO(klempner): If we're not careful this could cause infinite recursion. - * That's not a problem for now because empty_pollset has a trivial poller - * and we don't have any mechanism to unbecome multipoller. */ - pollset->in_flight_cbs--; - if (pollset->shutting_down) { - /* We don't care about this pollset anymore. */ - if (pollset->in_flight_cbs == 0 && !pollset->called_shutdown) { - pollset->called_shutdown = 1; - finish_shutdown(exec_ctx, pollset); - } - } else if (fd_is_orphaned(fd)) { - /* Don't try to add it to anything, we'll drop our ref on it below */ - } else if (pollset->vtable != original_vtable) { - pollset->vtable->add_fd(exec_ctx, pollset, fd, 0); - } else if (fd != pollset->data.ptr) { - grpc_fd *fds[2]; - fds[0] = pollset->data.ptr; - fds[1] = fd; - - if (fds[0] && !fd_is_orphaned(fds[0])) { - platform_become_multipoller(exec_ctx, pollset, fds, GPR_ARRAY_SIZE(fds)); - GRPC_FD_UNREF(fds[0], "basicpoll"); - } else { - /* old fd is orphaned and we haven't cleaned it up until now, so remain a - * unary poller */ - /* Note that it is possible that fds[1] is also orphaned at this point. - * That's okay, we'll correct it at the next add or poll. */ - if (fds[0]) GRPC_FD_UNREF(fds[0], "basicpoll"); - pollset->data.ptr = fd; - GRPC_FD_REF(fd, "basicpoll"); - } - } - - gpr_mu_unlock(&pollset->mu); - - /* Matching ref in basic_pollset_add_fd */ - GRPC_FD_UNREF(fd, "basicpoll_add"); -} - -static void basic_pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_fd *fd, int and_unlock_pollset) { - grpc_unary_promote_args *up_args; - GPR_ASSERT(fd); - if (fd == pollset->data.ptr) goto exit; - - if (!pollset_has_workers(pollset)) { - /* Fast path -- no in flight cbs */ - /* TODO(klempner): Comment this out and fix any test failures or establish - * they are due to timing issues */ - grpc_fd *fds[2]; - fds[0] = pollset->data.ptr; - fds[1] = fd; - - if (fds[0] == NULL) { - pollset->data.ptr = fd; - GRPC_FD_REF(fd, "basicpoll"); - } else if (!fd_is_orphaned(fds[0])) { - platform_become_multipoller(exec_ctx, pollset, fds, GPR_ARRAY_SIZE(fds)); - GRPC_FD_UNREF(fds[0], "basicpoll"); - } else { - /* old fd is orphaned and we haven't cleaned it up until now, so remain a - * unary poller */ - GRPC_FD_UNREF(fds[0], "basicpoll"); - pollset->data.ptr = fd; - GRPC_FD_REF(fd, "basicpoll"); - } - goto exit; - } - - /* Now we need to promote. This needs to happen when we're not polling. Since - * this may be called from poll, the wait needs to happen asynchronously. */ - GRPC_FD_REF(fd, "basicpoll_add"); - pollset->in_flight_cbs++; - up_args = gpr_malloc(sizeof(*up_args)); - up_args->fd = fd; - up_args->original_vtable = pollset->vtable; - up_args->pollset = pollset; - up_args->promotion_closure.cb = basic_do_promote; - up_args->promotion_closure.cb_arg = up_args; - - grpc_closure_list_add(&pollset->idle_jobs, &up_args->promotion_closure, 1); - pollset_kick(pollset, GRPC_POLLSET_KICK_BROADCAST); - -exit: - if (and_unlock_pollset) { - gpr_mu_unlock(&pollset->mu); - } -} - -static void basic_pollset_maybe_work_and_unlock(grpc_exec_ctx *exec_ctx, - grpc_pollset *pollset, - grpc_pollset_worker *worker, - gpr_timespec deadline, - gpr_timespec now) { -#define POLLOUT_CHECK (POLLOUT | POLLHUP | POLLERR) -#define POLLIN_CHECK (POLLIN | POLLHUP | POLLERR) - - struct pollfd pfd[3]; - grpc_fd *fd; - grpc_fd_watcher fd_watcher; - int timeout; - int r; - nfds_t nfds; - - fd = pollset->data.ptr; - if (fd && fd_is_orphaned(fd)) { - GRPC_FD_UNREF(fd, "basicpoll"); - fd = pollset->data.ptr = NULL; - } - timeout = poll_deadline_to_millis_timeout(deadline, now); - pfd[0].fd = GRPC_WAKEUP_FD_GET_READ_FD(&grpc_global_wakeup_fd); - pfd[0].events = POLLIN; - pfd[0].revents = 0; - pfd[1].fd = GRPC_WAKEUP_FD_GET_READ_FD(&worker->wakeup_fd->fd); - pfd[1].events = POLLIN; - pfd[1].revents = 0; - nfds = 2; - if (fd) { - pfd[2].fd = fd->fd; - pfd[2].revents = 0; - GRPC_FD_REF(fd, "basicpoll_begin"); - gpr_mu_unlock(&pollset->mu); - pfd[2].events = - (short)fd_begin_poll(fd, pollset, worker, POLLIN, POLLOUT, &fd_watcher); - if (pfd[2].events != 0) { - nfds++; - } - } else { - gpr_mu_unlock(&pollset->mu); - } - - /* TODO(vpai): Consider first doing a 0 timeout poll here to avoid - even going into the blocking annotation if possible */ - /* poll fd count (argument 2) is shortened by one if we have no events - to poll on - such that it only includes the kicker */ - GPR_TIMER_BEGIN("poll", 0); - GRPC_SCHEDULING_START_BLOCKING_REGION; - r = grpc_poll_function(pfd, nfds, timeout); - GRPC_SCHEDULING_END_BLOCKING_REGION; - GPR_TIMER_END("poll", 0); - - if (r < 0) { - if (errno != EINTR) { - gpr_log(GPR_ERROR, "poll() failed: %s", strerror(errno)); - } - if (fd) { - fd_end_poll(exec_ctx, &fd_watcher, 0, 0); - } - } else if (r == 0) { - if (fd) { - fd_end_poll(exec_ctx, &fd_watcher, 0, 0); - } - } else { - if (pfd[0].revents & POLLIN_CHECK) { - grpc_wakeup_fd_consume_wakeup(&grpc_global_wakeup_fd); - } - if (pfd[1].revents & POLLIN_CHECK) { - grpc_wakeup_fd_consume_wakeup(&worker->wakeup_fd->fd); - } - if (nfds > 2) { - fd_end_poll(exec_ctx, &fd_watcher, pfd[2].revents & POLLIN_CHECK, - pfd[2].revents & POLLOUT_CHECK); - } else if (fd) { - fd_end_poll(exec_ctx, &fd_watcher, 0, 0); - } - } - - if (fd) { - GRPC_FD_UNREF(fd, "basicpoll_begin"); - } -} - -static void basic_pollset_destroy(grpc_pollset *pollset) { - if (pollset->data.ptr != NULL) { - GRPC_FD_UNREF(pollset->data.ptr, "basicpoll"); - pollset->data.ptr = NULL; - } -} - -static const grpc_pollset_vtable basic_pollset = { - basic_pollset_add_fd, basic_pollset_maybe_work_and_unlock, - basic_pollset_destroy, basic_pollset_destroy}; - -static void become_basic_pollset(grpc_pollset *pollset, grpc_fd *fd_or_null) { - pollset->vtable = &basic_pollset; - pollset->data.ptr = fd_or_null; - if (fd_or_null != NULL) { - GRPC_FD_REF(fd_or_null, "basicpoll"); - } -} - - /******************************************************************************* * pollset_multipoller_with_epoll_posix.c */ @@ -1274,6 +1062,7 @@ static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { set_ready(exec_ctx, fd, &fd->write_closure); } +/* TODO (sreek): Maybe this global list is not required. Double check*/ struct epoll_fd_list { int *epoll_fds; size_t count; @@ -1390,10 +1179,48 @@ static void perform_delayed_add(grpc_exec_ctx *exec_ctx, void *arg, gpr_free(da); } +/* Creates an epoll fd and initializes the pollset */ +static void multipoll_with_epoll_pollset_create_efd(grpc_exec_ctx *exec_ctx, + grpc_pollset *pollset) { + epoll_hdr *h = gpr_malloc(sizeof(epoll_hdr)); + struct epoll_event ev; + int err; + + /* Ensuring that the pollset is infact empty (with no epoll fd either) */ + GPR_ASSERT(pollset->data.ptr == NULL); + + pollset->data.ptr = h; + h->epoll_fd = epoll_create1(EPOLL_CLOEXEC); + if (h->epoll_fd < 0) { + gpr_log(GPR_ERROR, "epoll_create1 failed: %s", strerror(errno)); + abort(); + } + add_epoll_fd_to_global_list(h->epoll_fd); + + ev.events = (uint32_t)(EPOLLIN | EPOLLET); + ev.data.ptr = NULL; + + /* TODO (sreek): Double-check the use of grpc_global_wakeup_fd here (right now + * I do not know why this is used. I just copied this code from + * epoll_become_mutipoller() function in ev_poll_and_epoll_posix.c file */ + err = epoll_ctl(h->epoll_fd, EPOLL_CTL_ADD, + GRPC_WAKEUP_FD_GET_READ_FD(&grpc_global_wakeup_fd), &ev); + if (err < 0) { + gpr_log(GPR_ERROR, "epoll_ctl add for %d failed: %s", + GRPC_WAKEUP_FD_GET_READ_FD(&grpc_global_wakeup_fd), + strerror(errno)); + } +} + static void multipoll_with_epoll_pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_fd *fd, int and_unlock_pollset) { + /* If there is no epoll fd on the pollset, create one */ + if (pollset->data.ptr == NULL) { + multipoll_with_epoll_pollset_create_efd(exec_ctx, pollset); + } + if (and_unlock_pollset) { gpr_mu_unlock(&pollset->mu); finally_add_fd(exec_ctx, pollset, fd); @@ -1500,45 +1327,6 @@ static void multipoll_with_epoll_pollset_destroy(grpc_pollset *pollset) { gpr_free(h); } -static const grpc_pollset_vtable multipoll_with_epoll_pollset = { - multipoll_with_epoll_pollset_add_fd, - multipoll_with_epoll_pollset_maybe_work_and_unlock, - multipoll_with_epoll_pollset_finish_shutdown, - multipoll_with_epoll_pollset_destroy}; - -static void epoll_become_multipoller(grpc_exec_ctx *exec_ctx, - grpc_pollset *pollset, grpc_fd **fds, - size_t nfds) { - size_t i; - epoll_hdr *h = gpr_malloc(sizeof(epoll_hdr)); - struct epoll_event ev; - int err; - - pollset->vtable = &multipoll_with_epoll_pollset; - pollset->data.ptr = h; - h->epoll_fd = epoll_create1(EPOLL_CLOEXEC); - if (h->epoll_fd < 0) { - /* TODO(klempner): Fall back to poll here, especially on ENOSYS */ - gpr_log(GPR_ERROR, "epoll_create1 failed: %s", strerror(errno)); - abort(); - } - add_epoll_fd_to_global_list(h->epoll_fd); - - ev.events = (uint32_t)(EPOLLIN | EPOLLET); - ev.data.ptr = NULL; - err = epoll_ctl(h->epoll_fd, EPOLL_CTL_ADD, - GRPC_WAKEUP_FD_GET_READ_FD(&grpc_global_wakeup_fd), &ev); - if (err < 0) { - gpr_log(GPR_ERROR, "epoll_ctl add for %d failed: %s", - GRPC_WAKEUP_FD_GET_READ_FD(&grpc_global_wakeup_fd), - strerror(errno)); - } - - for (i = 0; i < nfds; i++) { - multipoll_with_epoll_pollset_add_fd(exec_ctx, pollset, fds[i], 0); - } -} - /******************************************************************************* * pollset_set_posix.c */ @@ -1724,7 +1512,6 @@ static const grpc_event_engine_vtable vtable = { }; const grpc_event_engine_vtable *grpc_init_epoll_posix(void) { - platform_become_multipoller = epoll_become_multipoller; fd_global_init(); pollset_global_init(); return &vtable; -- cgit v1.2.3 From ab7f10ed61c7e0d104ea839206d2cd0340f5fed8 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Thu, 12 May 2016 20:21:37 -0700 Subject: Remove delayed_add --- src/core/lib/iomgr/ev_epoll_posix.c | 61 ++++++++----------------------------- 1 file changed, 13 insertions(+), 48 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_posix.c b/src/core/lib/iomgr/ev_epoll_posix.c index cb4a00a75c..03c544b30c 100644 --- a/src/core/lib/iomgr/ev_epoll_posix.c +++ b/src/core/lib/iomgr/ev_epoll_posix.c @@ -187,7 +187,7 @@ struct grpc_pollset_worker { struct grpc_pollset { gpr_mu mu; grpc_pollset_worker root_worker; - int in_flight_cbs; + int in_flight_cbs; /* TODO (sreek): Most likely this isn't needed anymore */ int shutting_down; int called_shutdown; int kicked_without_pollers; @@ -839,13 +839,12 @@ static void pollset_reset(grpc_pollset *pollset) { /* TODO (sreek): Remove multipoll_with_epoll_add_fd declaration*/ static void multipoll_with_epoll_pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_fd *fd, - int and_unlock_pollset); + grpc_fd *fd); static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_fd *fd) { gpr_mu_lock(&pollset->mu); - multipoll_with_epoll_pollset_add_fd(exec_ctx, pollset, fd, 1); + multipoll_with_epoll_pollset_add_fd(exec_ctx, pollset, fd); /* the following (enabled only in debug) will reacquire and then release our lock - meaning that if the unlocking flag passed to add_fd above is not respected, the code will deadlock (in a way that we have a chance of @@ -1121,12 +1120,6 @@ static void remove_fd_from_all_epoll_sets(int fd) { gpr_mu_unlock(&epoll_fd_list_mu); } -typedef struct { - grpc_pollset *pollset; - grpc_fd *fd; - grpc_closure closure; -} delayed_add; - typedef struct { int epoll_fd; } epoll_hdr; static void finally_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, @@ -1139,6 +1132,13 @@ static void finally_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, /* We pretend to be polling whilst adding an fd to keep the fd from being closed during the add. This may result in a spurious wakeup being assigned to this pollset whilst adding, but that should be benign. */ + /* TODO (sreek). This fd_begin_poll() really seem to accomplish adding + * GRPC_FD_REF() (i.e adding a refcount to the fd) and checking that the + * fd is not shutting down (in which case watcher.fd will be NULL and no + * refcount is added). The ref count is added only durng hte duration of + * adding it to the epoll set (after which fd_end_poll would be called and + * the fd's ref count is decremented by 1. So do we still need fd_begin_poll + * ??? */ GPR_ASSERT(fd_begin_poll(fd, pollset, NULL, 0, 0, &watcher) == 0); if (watcher.fd != NULL) { ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET); @@ -1155,30 +1155,6 @@ static void finally_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, fd_end_poll(exec_ctx, &watcher, 0, 0); } -static void perform_delayed_add(grpc_exec_ctx *exec_ctx, void *arg, - bool iomgr_status) { - delayed_add *da = arg; - - if (!fd_is_orphaned(da->fd)) { - finally_add_fd(exec_ctx, da->pollset, da->fd); - } - - gpr_mu_lock(&da->pollset->mu); - da->pollset->in_flight_cbs--; - if (da->pollset->shutting_down) { - /* We don't care about this pollset anymore. */ - if (da->pollset->in_flight_cbs == 0 && !da->pollset->called_shutdown) { - da->pollset->called_shutdown = 1; - grpc_exec_ctx_enqueue(exec_ctx, da->pollset->shutdown_done, true, NULL); - } - } - gpr_mu_unlock(&da->pollset->mu); - - GRPC_FD_UNREF(da->fd, "delayed_add"); - - gpr_free(da); -} - /* Creates an epoll fd and initializes the pollset */ static void multipoll_with_epoll_pollset_create_efd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) { @@ -1214,25 +1190,14 @@ static void multipoll_with_epoll_pollset_create_efd(grpc_exec_ctx *exec_ctx, static void multipoll_with_epoll_pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_fd *fd, - int and_unlock_pollset) { + grpc_fd *fd) { /* If there is no epoll fd on the pollset, create one */ if (pollset->data.ptr == NULL) { multipoll_with_epoll_pollset_create_efd(exec_ctx, pollset); } - if (and_unlock_pollset) { - gpr_mu_unlock(&pollset->mu); - finally_add_fd(exec_ctx, pollset, fd); - } else { - delayed_add *da = gpr_malloc(sizeof(*da)); - da->pollset = pollset; - da->fd = fd; - GRPC_FD_REF(fd, "delayed_add"); - grpc_closure_init(&da->closure, perform_delayed_add, da); - pollset->in_flight_cbs++; - grpc_exec_ctx_enqueue(exec_ctx, &da->closure, true, NULL); - } + gpr_mu_unlock(&pollset->mu); + finally_add_fd(exec_ctx, pollset, fd); } /* TODO(klempner): We probably want to turn this down a bit */ -- cgit v1.2.3 From f6a2adf0cfa14e473e05b6cb2d6b8fdc7667945b Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Fri, 13 May 2016 16:00:20 -0700 Subject: Pollset_reset should not destroy the epoll_fd --- src/core/lib/iomgr/ev_epoll_posix.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_posix.c b/src/core/lib/iomgr/ev_epoll_posix.c index 03c544b30c..19465280a1 100644 --- a/src/core/lib/iomgr/ev_epoll_posix.c +++ b/src/core/lib/iomgr/ev_epoll_posix.c @@ -828,9 +828,6 @@ static void pollset_reset(grpc_pollset *pollset) { GPR_ASSERT(pollset->in_flight_cbs == 0); GPR_ASSERT(!pollset_has_workers(pollset)); GPR_ASSERT(pollset->idle_jobs.head == pollset->idle_jobs.tail); - - multipoll_with_epoll_pollset_destroy(pollset); - pollset->shutting_down = 0; pollset->called_shutdown = 0; pollset->kicked_without_pollers = 0; -- cgit v1.2.3 From 24f0f57ea16b77e710f7ff4ae8a048c888ab6b12 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Fri, 13 May 2016 19:22:44 -0700 Subject: Moving the creation of epoll_fd to pollset_init() instead of pollset_add_fd() [Verified stable. All tests pass] --- src/core/lib/iomgr/ev_epoll_posix.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_posix.c b/src/core/lib/iomgr/ev_epoll_posix.c index 19465280a1..0ee0ee58a8 100644 --- a/src/core/lib/iomgr/ev_epoll_posix.c +++ b/src/core/lib/iomgr/ev_epoll_posix.c @@ -787,6 +787,9 @@ static void pollset_global_shutdown(void) { static void kick_poller(void) { grpc_wakeup_fd_wakeup(&grpc_global_wakeup_fd); } +/* TODO: sreek. Try to Remove this forward declaration*/ +static void multipoll_with_epoll_pollset_create_efd(grpc_pollset *pollset); + /* main interface */ static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { @@ -800,7 +803,9 @@ static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { pollset->idle_jobs.head = pollset->idle_jobs.tail = NULL; pollset->local_wakeup_cache = NULL; pollset->kicked_without_pollers = 0; + pollset->data.ptr = NULL; + multipoll_with_epoll_pollset_create_efd(pollset); } /* TODO(sreek): Maybe merge multipoll_*_destroy() with pollset_destroy() @@ -1153,13 +1158,15 @@ static void finally_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, } /* Creates an epoll fd and initializes the pollset */ -static void multipoll_with_epoll_pollset_create_efd(grpc_exec_ctx *exec_ctx, - grpc_pollset *pollset) { +/* TODO: This has to be called ONLY from pollset_init function. and hence it + * does not acquire any lock */ +static void multipoll_with_epoll_pollset_create_efd(grpc_pollset *pollset) { epoll_hdr *h = gpr_malloc(sizeof(epoll_hdr)); struct epoll_event ev; int err; - /* Ensuring that the pollset is infact empty (with no epoll fd either) */ + /* TODO (sreek). remove this assert. Currently added this just to ensure that + * we do not overwrite h->epoll_fd without freeing the older one*/ GPR_ASSERT(pollset->data.ptr == NULL); pollset->data.ptr = h; @@ -1188,10 +1195,10 @@ static void multipoll_with_epoll_pollset_create_efd(grpc_exec_ctx *exec_ctx, static void multipoll_with_epoll_pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_fd *fd) { - /* If there is no epoll fd on the pollset, create one */ - if (pollset->data.ptr == NULL) { - multipoll_with_epoll_pollset_create_efd(exec_ctx, pollset); - } + GPR_ASSERT(pollset->data.ptr != NULL); + + /* TODO(sreek). Remove this unlock code (and also the code that acquires the + * lock before calling multipoll_with_epoll_add_fd() function */ gpr_mu_unlock(&pollset->mu); finally_add_fd(exec_ctx, pollset, fd); -- cgit v1.2.3 From 9ff57f67a0920e8e39baedfec5671fb6e662d257 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Sat, 14 May 2016 15:56:57 -0700 Subject: Remove idle_jobs and in_flight_cbs from pollset --- src/core/lib/iomgr/ev_epoll_posix.c | 50 +++---------------------------------- 1 file changed, 3 insertions(+), 47 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_posix.c b/src/core/lib/iomgr/ev_epoll_posix.c index 0ee0ee58a8..5776b6c1cf 100644 --- a/src/core/lib/iomgr/ev_epoll_posix.c +++ b/src/core/lib/iomgr/ev_epoll_posix.c @@ -169,8 +169,6 @@ static void fd_global_shutdown(void); * pollset declarations */ -typedef struct grpc_pollset_vtable grpc_pollset_vtable; - typedef struct grpc_cached_wakeup_fd { grpc_wakeup_fd fd; struct grpc_cached_wakeup_fd *next; @@ -187,12 +185,10 @@ struct grpc_pollset_worker { struct grpc_pollset { gpr_mu mu; grpc_pollset_worker root_worker; - int in_flight_cbs; /* TODO (sreek): Most likely this isn't needed anymore */ int shutting_down; int called_shutdown; int kicked_without_pollers; grpc_closure *shutdown_done; - grpc_closure_list idle_jobs; union { int fd; void *ptr; @@ -201,16 +197,6 @@ struct grpc_pollset { grpc_cached_wakeup_fd *local_wakeup_cache; }; -struct grpc_pollset_vtable { - void (*add_fd)(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - struct grpc_fd *fd, int and_unlock_pollset); - void (*maybe_work_and_unlock)(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_pollset_worker *worker, - gpr_timespec deadline, gpr_timespec now); - void (*finish_shutdown)(grpc_pollset *pollset); - void (*destroy)(grpc_pollset *pollset); -}; - /* Add an fd to a pollset */ static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, struct grpc_fd *fd); @@ -796,11 +782,9 @@ static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { gpr_mu_init(&pollset->mu); *mu = &pollset->mu; pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker; - pollset->in_flight_cbs = 0; pollset->shutting_down = 0; pollset->called_shutdown = 0; pollset->kicked_without_pollers = 0; - pollset->idle_jobs.head = pollset->idle_jobs.tail = NULL; pollset->local_wakeup_cache = NULL; pollset->kicked_without_pollers = 0; @@ -813,9 +797,7 @@ static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { static void multipoll_with_epoll_pollset_destroy(grpc_pollset *pollset); static void pollset_destroy(grpc_pollset *pollset) { - GPR_ASSERT(pollset->in_flight_cbs == 0); GPR_ASSERT(!pollset_has_workers(pollset)); - GPR_ASSERT(pollset->idle_jobs.head == pollset->idle_jobs.tail); multipoll_with_epoll_pollset_destroy(pollset); @@ -830,9 +812,7 @@ static void pollset_destroy(grpc_pollset *pollset) { static void pollset_reset(grpc_pollset *pollset) { GPR_ASSERT(pollset->shutting_down); - GPR_ASSERT(pollset->in_flight_cbs == 0); GPR_ASSERT(!pollset_has_workers(pollset)); - GPR_ASSERT(pollset->idle_jobs.head == pollset->idle_jobs.tail); pollset->shutting_down = 0; pollset->called_shutdown = 0; pollset->kicked_without_pollers = 0; @@ -861,7 +841,6 @@ static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, static void multipoll_with_epoll_pollset_finish_shutdown(grpc_pollset *pollset); static void finish_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) { - GPR_ASSERT(grpc_closure_list_empty(pollset->idle_jobs)); multipoll_with_epoll_pollset_finish_shutdown(pollset); grpc_exec_ctx_enqueue(exec_ctx, pollset->shutdown_done, true, NULL); } @@ -895,26 +874,11 @@ static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_wakeup_fd_init(&worker.wakeup_fd->fd); } worker.kicked_specifically = 0; - /* If there's work waiting for the pollset to be idle, and the - pollset is idle, then do that work */ - if (!pollset_has_workers(pollset) && - !grpc_closure_list_empty(pollset->idle_jobs)) { - GPR_TIMER_MARK("pollset_work.idle_jobs", 0); - grpc_exec_ctx_enqueue_list(exec_ctx, &pollset->idle_jobs, NULL); - goto done; - } /* If we're shutting down then we don't execute any extended work */ if (pollset->shutting_down) { GPR_TIMER_MARK("pollset_work.shutting_down", 0); goto done; } - /* Give do_promote priority so we don't starve it out */ - if (pollset->in_flight_cbs) { - GPR_TIMER_MARK("pollset_work.in_flight_cbs", 0); - gpr_mu_unlock(&pollset->mu); - locked = 0; - goto done; - } /* Start polling, and keep doing so while we're being asked to re-evaluate our pollers (this allows poll() based pollers to ensure they don't miss wakeups) */ @@ -975,7 +939,7 @@ static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, if (pollset->shutting_down) { if (pollset_has_workers(pollset)) { pollset_kick(pollset, NULL); - } else if (!pollset->called_shutdown && pollset->in_flight_cbs == 0) { + } else if (!pollset->called_shutdown) { pollset->called_shutdown = 1; gpr_mu_unlock(&pollset->mu); finish_shutdown(exec_ctx, pollset); @@ -985,11 +949,6 @@ static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, * pollset_work. * TODO(dklempner): Can we refactor the shutdown logic to avoid this? */ gpr_mu_lock(&pollset->mu); - } else if (!grpc_closure_list_empty(pollset->idle_jobs)) { - grpc_exec_ctx_enqueue_list(exec_ctx, &pollset->idle_jobs, NULL); - gpr_mu_unlock(&pollset->mu); - grpc_exec_ctx_flush(exec_ctx); - gpr_mu_lock(&pollset->mu); } } *worker_hdl = NULL; @@ -1002,11 +961,8 @@ static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, pollset->shutting_down = 1; pollset->shutdown_done = closure; pollset_kick(pollset, GRPC_POLLSET_KICK_BROADCAST); - if (!pollset_has_workers(pollset)) { - grpc_exec_ctx_enqueue_list(exec_ctx, &pollset->idle_jobs, NULL); - } - if (!pollset->called_shutdown && pollset->in_flight_cbs == 0 && - !pollset_has_workers(pollset)) { + + if (!pollset->called_shutdown && !pollset_has_workers(pollset)) { pollset->called_shutdown = 1; finish_shutdown(exec_ctx, pollset); } -- cgit v1.2.3 From 97c2d6a269472a8a6e56d9e3d88f89bd27aff5ac Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Sat, 14 May 2016 16:33:16 -0700 Subject: Remove grpc_fd_watcher and related code from ev_epoll_posix.c --- src/core/lib/iomgr/ev_epoll_posix.c | 271 +++++------------------------------- 1 file changed, 38 insertions(+), 233 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_posix.c b/src/core/lib/iomgr/ev_epoll_posix.c index 5776b6c1cf..920be1951f 100644 --- a/src/core/lib/iomgr/ev_epoll_posix.c +++ b/src/core/lib/iomgr/ev_epoll_posix.c @@ -59,17 +59,6 @@ * FD declarations */ -/* TODO(sreek) : Check if grpc_fd_watcher is needed (and if so, check if we can - * share this between ev_poll_posix.h and ev_epoll_posix versions */ - -typedef struct grpc_fd_watcher { - struct grpc_fd_watcher *next; - struct grpc_fd_watcher *prev; - grpc_pollset *pollset; - grpc_pollset_worker *worker; - grpc_fd *fd; -} grpc_fd_watcher; - struct grpc_fd { int fd; /* refst format: @@ -84,32 +73,6 @@ struct grpc_fd { int closed; int released; - /* The watcher list. - - The following watcher related fields are protected by watcher_mu. - - An fd_watcher is an ephemeral object created when an fd wants to - begin polling, and destroyed after the poll. - - It denotes the fd's interest in whether to read poll or write poll - or both or neither on this fd. - - If a watcher is asked to poll for reads or writes, the read_watcher - or write_watcher fields are set respectively. A watcher may be asked - to poll for both, in which case both fields will be set. - - read_watcher and write_watcher may be NULL if no watcher has been - asked to poll for reads or writes. - - If an fd_watcher is not asked to poll for reads or writes, it's added - to a linked list of inactive watchers, rooted at inactive_watcher_root. - If at a later time there becomes need of a poller to poll, one of - the inactive pollers may be kicked out of their poll loops to take - that responsibility. */ - grpc_fd_watcher inactive_watcher_root; - grpc_fd_watcher *read_watcher; - grpc_fd_watcher *write_watcher; - grpc_closure *read_closure; grpc_closure *write_closure; @@ -120,27 +83,6 @@ struct grpc_fd { grpc_iomgr_object iomgr_object; }; -/* Begin polling on an fd. - Registers that the given pollset is interested in this fd - so that if read - or writability interest changes, the pollset can be kicked to pick up that - new interest. - Return value is: - (fd_needs_read? read_mask : 0) | (fd_needs_write? write_mask : 0) - i.e. a combination of read_mask and write_mask determined by the fd's current - interest in said events. - Polling strategies that do not need to alter their behavior depending on the - fd's current interest (such as epoll) do not need to call this function. - MUST NOT be called with a pollset lock taken */ -static uint32_t fd_begin_poll(grpc_fd *fd, grpc_pollset *pollset, - grpc_pollset_worker *worker, uint32_t read_mask, - uint32_t write_mask, grpc_fd_watcher *rec); -/* Complete polling previously started with fd_begin_poll - MUST NOT be called with a pollset lock taken - if got_read or got_write are 1, also does the become_{readable,writable} as - appropriate. */ -static void fd_end_poll(grpc_exec_ctx *exec_ctx, grpc_fd_watcher *rec, - int got_read, int got_write); - /* Return 1 if this fd is orphaned, 0 otherwise */ static bool fd_is_orphaned(grpc_fd *fd); @@ -307,10 +249,7 @@ static grpc_fd *alloc_fd(int fd) { r->read_closure = CLOSURE_NOT_READY; r->write_closure = CLOSURE_NOT_READY; r->fd = fd; - r->inactive_watcher_root.next = r->inactive_watcher_root.prev = - &r->inactive_watcher_root; r->freelist_next = NULL; - r->read_watcher = r->write_watcher = NULL; r->on_done_closure = NULL; r->closed = 0; r->released = 0; @@ -387,43 +326,6 @@ static bool fd_is_orphaned(grpc_fd *fd) { return (gpr_atm_acq_load(&fd->refst) & 1) == 0; } -static void pollset_kick_locked(grpc_fd_watcher *watcher) { - gpr_mu_lock(&watcher->pollset->mu); - GPR_ASSERT(watcher->worker); - pollset_kick_ext(watcher->pollset, watcher->worker, - GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP); - gpr_mu_unlock(&watcher->pollset->mu); -} - -static void maybe_wake_one_watcher_locked(grpc_fd *fd) { - if (fd->inactive_watcher_root.next != &fd->inactive_watcher_root) { - pollset_kick_locked(fd->inactive_watcher_root.next); - } else if (fd->read_watcher) { - pollset_kick_locked(fd->read_watcher); - } else if (fd->write_watcher) { - pollset_kick_locked(fd->write_watcher); - } -} - -static void wake_all_watchers_locked(grpc_fd *fd) { - grpc_fd_watcher *watcher; - for (watcher = fd->inactive_watcher_root.next; - watcher != &fd->inactive_watcher_root; watcher = watcher->next) { - pollset_kick_locked(watcher); - } - if (fd->read_watcher) { - pollset_kick_locked(fd->read_watcher); - } - if (fd->write_watcher && fd->write_watcher != fd->read_watcher) { - pollset_kick_locked(fd->write_watcher); - } -} - -static int has_watchers(grpc_fd *fd) { - return fd->read_watcher != NULL || fd->write_watcher != NULL || - fd->inactive_watcher_root.next != &fd->inactive_watcher_root; -} - static void close_fd_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { fd->closed = 1; if (!fd->released) { @@ -454,11 +356,7 @@ static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, } gpr_mu_lock(&fd->mu); REF_BY(fd, 1, reason); /* remove active status, but keep referenced */ - if (!has_watchers(fd)) { - close_fd_locked(exec_ctx, fd); - } else { - wake_all_watchers_locked(fd); - } + close_fd_locked(exec_ctx, fd); gpr_mu_unlock(&fd->mu); UNREF_BY(fd, 2, reason); /* drop the reference */ } @@ -489,7 +387,6 @@ static void notify_on_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd, /* already ready ==> queue the closure to run immediately */ *st = CLOSURE_NOT_READY; grpc_exec_ctx_enqueue(exec_ctx, closure, !fd->shutdown, NULL); - maybe_wake_one_watcher_locked(fd); } else { /* upcallptr was set to a different closure. This is an error! */ gpr_log(GPR_ERROR, @@ -540,111 +437,6 @@ static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd, gpr_mu_unlock(&fd->mu); } -static uint32_t fd_begin_poll(grpc_fd *fd, grpc_pollset *pollset, - grpc_pollset_worker *worker, uint32_t read_mask, - uint32_t write_mask, grpc_fd_watcher *watcher) { - uint32_t mask = 0; - grpc_closure *cur; - int requested; - /* keep track of pollers that have requested our events, in case they change - */ - GRPC_FD_REF(fd, "poll"); - - gpr_mu_lock(&fd->mu); - - /* if we are shutdown, then don't add to the watcher set */ - if (fd->shutdown) { - watcher->fd = NULL; - watcher->pollset = NULL; - watcher->worker = NULL; - gpr_mu_unlock(&fd->mu); - GRPC_FD_UNREF(fd, "poll"); - return 0; - } - - /* if there is nobody polling for read, but we need to, then start doing so */ - cur = fd->read_closure; - requested = cur != CLOSURE_READY; - if (read_mask && fd->read_watcher == NULL && requested) { - fd->read_watcher = watcher; - mask |= read_mask; - } - /* if there is nobody polling for write, but we need to, then start doing so - */ - cur = fd->write_closure; - requested = cur != CLOSURE_READY; - if (write_mask && fd->write_watcher == NULL && requested) { - fd->write_watcher = watcher; - mask |= write_mask; - } - /* if not polling, remember this watcher in case we need someone to later */ - if (mask == 0 && worker != NULL) { - watcher->next = &fd->inactive_watcher_root; - watcher->prev = watcher->next->prev; - watcher->next->prev = watcher->prev->next = watcher; - } - watcher->pollset = pollset; - watcher->worker = worker; - watcher->fd = fd; - gpr_mu_unlock(&fd->mu); - - return mask; -} - -static void fd_end_poll(grpc_exec_ctx *exec_ctx, grpc_fd_watcher *watcher, - int got_read, int got_write) { - int was_polling = 0; - int kick = 0; - grpc_fd *fd = watcher->fd; - - if (fd == NULL) { - return; - } - - gpr_mu_lock(&fd->mu); - - if (watcher == fd->read_watcher) { - /* remove read watcher, kick if we still need a read */ - was_polling = 1; - if (!got_read) { - kick = 1; - } - fd->read_watcher = NULL; - } - if (watcher == fd->write_watcher) { - /* remove write watcher, kick if we still need a write */ - was_polling = 1; - if (!got_write) { - kick = 1; - } - fd->write_watcher = NULL; - } - if (!was_polling && watcher->worker != NULL) { - /* remove from inactive list */ - watcher->next->prev = watcher->prev; - watcher->prev->next = watcher->next; - } - if (got_read) { - if (set_ready_locked(exec_ctx, fd, &fd->read_closure)) { - kick = 1; - } - } - if (got_write) { - if (set_ready_locked(exec_ctx, fd, &fd->write_closure)) { - kick = 1; - } - } - if (kick) { - maybe_wake_one_watcher_locked(fd); - } - if (fd_is_orphaned(fd) && !has_watchers(fd) && !fd->closed) { - close_fd_locked(exec_ctx, fd); - } - gpr_mu_unlock(&fd->mu); - - GRPC_FD_UNREF(fd, "poll"); -} - /******************************************************************************* * pollset_posix.c */ @@ -1085,32 +877,45 @@ static void finally_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, epoll_hdr *h = pollset->data.ptr; struct epoll_event ev; int err; - grpc_fd_watcher watcher; - - /* We pretend to be polling whilst adding an fd to keep the fd from being - closed during the add. This may result in a spurious wakeup being assigned - to this pollset whilst adding, but that should be benign. */ - /* TODO (sreek). This fd_begin_poll() really seem to accomplish adding - * GRPC_FD_REF() (i.e adding a refcount to the fd) and checking that the - * fd is not shutting down (in which case watcher.fd will be NULL and no - * refcount is added). The ref count is added only durng hte duration of - * adding it to the epoll set (after which fd_end_poll would be called and - * the fd's ref count is decremented by 1. So do we still need fd_begin_poll - * ??? */ - GPR_ASSERT(fd_begin_poll(fd, pollset, NULL, 0, 0, &watcher) == 0); - if (watcher.fd != NULL) { - ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET); - ev.data.ptr = fd; - err = epoll_ctl(h->epoll_fd, EPOLL_CTL_ADD, fd->fd, &ev); - if (err < 0) { - /* FDs may be added to a pollset multiple times, so EEXIST is normal. */ - if (errno != EEXIST) { - gpr_log(GPR_ERROR, "epoll_ctl add for %d failed: %s", fd->fd, - strerror(errno)); - } + + /* Hold a ref to the fd to keep it from being closed during the add. This may + result in a spurious wakeup being assigned to this pollset whilst adding, + but that should be benign. */ + /* TODO: (sreek): Understand how a spurious wake up migh be assinged to this + * pollset..and how holding a reference will prevent the fd from being closed + * (and perhaps more importantly, see how can an fd be closed while being + * added to the epollset */ + GRPC_FD_REF(fd, "add fd"); + + gpr_mu_lock(&fd->mu); + if (fd->shutdown) { + gpr_mu_unlock(&fd->mu); + GRPC_FD_UNREF(fd, "add fd"); + return; + } + gpr_mu_unlock(&fd->mu); + + ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET); + ev.data.ptr = fd; + err = epoll_ctl(h->epoll_fd, EPOLL_CTL_ADD, fd->fd, &ev); + if (err < 0) { + /* FDs may be added to a pollset multiple times, so EEXIST is normal. */ + if (errno != EEXIST) { + gpr_log(GPR_ERROR, "epoll_ctl add for %d failed: %s", fd->fd, + strerror(errno)); } } - fd_end_poll(exec_ctx, &watcher, 0, 0); + + /* The fd might have been orphaned while we were adding it to the epoll set. + Close the fd in such a case (which will also take care of removing it from + the epoll set */ + gpr_mu_lock(&fd->mu); + if (fd_is_orphaned(fd) && !fd->closed) { + close_fd_locked(exec_ctx, fd); + } + gpr_mu_unlock(&fd->mu); + + GRPC_FD_UNREF(fd, "add fd"); } /* Creates an epoll fd and initializes the pollset */ -- cgit v1.2.3 From e9ee1f34b8efdc47ea12821351e5cc23125d62b2 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Sat, 14 May 2016 17:22:10 -0700 Subject: Minor refactor of add_fd path --- src/core/lib/iomgr/ev_epoll_posix.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_posix.c b/src/core/lib/iomgr/ev_epoll_posix.c index 920be1951f..15126b3b62 100644 --- a/src/core/lib/iomgr/ev_epoll_posix.c +++ b/src/core/lib/iomgr/ev_epoll_posix.c @@ -617,16 +617,13 @@ static void multipoll_with_epoll_pollset_add_fd(grpc_exec_ctx *exec_ctx, static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_fd *fd) { + /* TODO (sreek) - Does reading pollset->data.ptr need pollset->mu lock ? + * because finally_add_fd() also reads it but without the lock! */ gpr_mu_lock(&pollset->mu); - multipoll_with_epoll_pollset_add_fd(exec_ctx, pollset, fd); -/* the following (enabled only in debug) will reacquire and then release - our lock - meaning that if the unlocking flag passed to add_fd above is - not respected, the code will deadlock (in a way that we have a chance of - debugging) */ -#ifndef NDEBUG - gpr_mu_lock(&pollset->mu); + GPR_ASSERT(pollset->data.ptr != NULL); gpr_mu_unlock(&pollset->mu); -#endif + + multipoll_with_epoll_pollset_add_fd(exec_ctx, pollset, fd); } /* TODO (sreek): Remove multipoll_with_epoll_finish_shutdown() declaration */ @@ -874,6 +871,8 @@ typedef struct { int epoll_fd; } epoll_hdr; static void finally_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_fd *fd) { + /*TODO: (sree) Shouldn't this read (pollset->data.ptr) be done under a + pollset lock - i.e pollset->mu ? */ epoll_hdr *h = pollset->data.ptr; struct epoll_event ev; int err; @@ -941,9 +940,6 @@ static void multipoll_with_epoll_pollset_create_efd(grpc_pollset *pollset) { ev.events = (uint32_t)(EPOLLIN | EPOLLET); ev.data.ptr = NULL; - /* TODO (sreek): Double-check the use of grpc_global_wakeup_fd here (right now - * I do not know why this is used. I just copied this code from - * epoll_become_mutipoller() function in ev_poll_and_epoll_posix.c file */ err = epoll_ctl(h->epoll_fd, EPOLL_CTL_ADD, GRPC_WAKEUP_FD_GET_READ_FD(&grpc_global_wakeup_fd), &ev); if (err < 0) { @@ -956,12 +952,6 @@ static void multipoll_with_epoll_pollset_create_efd(grpc_pollset *pollset) { static void multipoll_with_epoll_pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_fd *fd) { - GPR_ASSERT(pollset->data.ptr != NULL); - - /* TODO(sreek). Remove this unlock code (and also the code that acquires the - * lock before calling multipoll_with_epoll_add_fd() function */ - - gpr_mu_unlock(&pollset->mu); finally_add_fd(exec_ctx, pollset, fd); } -- cgit v1.2.3 From 2ea165911b23099499da0af48088c47c47d659ba Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Tue, 17 May 2016 09:37:48 -0700 Subject: experiment with signals --- src/core/lib/iomgr/ev_epoll_posix.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_posix.c b/src/core/lib/iomgr/ev_epoll_posix.c index 15126b3b62..4481bab438 100644 --- a/src/core/lib/iomgr/ev_epoll_posix.c +++ b/src/core/lib/iomgr/ev_epoll_posix.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -120,6 +121,7 @@ struct grpc_pollset_worker { grpc_cached_wakeup_fd *wakeup_fd; int reevaluate_polling_on_wakeup; int kicked_specifically; + pthread_t pt_id; struct grpc_pollset_worker *next; struct grpc_pollset_worker *prev; }; @@ -506,6 +508,8 @@ static void pollset_kick_ext(grpc_pollset *p, } specific_worker->kicked_specifically = 1; grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd); + /* TODO (sreek): Refactor this into a separate file*/ + pthread_kill(specific_worker->pt_id, SIGUSR1); } else if ((flags & GRPC_POLLSET_CAN_KICK_SELF) != 0) { GPR_TIMER_MARK("kick_yoself", 0); if ((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) != 0) { @@ -551,10 +555,15 @@ static void pollset_kick(grpc_pollset *p, /* global state management */ +static void sig_handler(int sig_num) { + gpr_log(GPR_INFO, "Received signal %d", sig_num); +} + static void pollset_global_init(void) { gpr_tls_init(&g_current_thread_poller); gpr_tls_init(&g_current_thread_worker); grpc_wakeup_fd_init(&grpc_global_wakeup_fd); + signal(SIGUSR1, sig_handler); } static void pollset_global_shutdown(void) { @@ -663,6 +672,9 @@ static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_wakeup_fd_init(&worker.wakeup_fd->fd); } worker.kicked_specifically = 0; + + /* TODO(sreek): Abstract this thread id stuff out into a separate file */ + worker.pt_id = pthread_self(); /* If we're shutting down then we don't execute any extended work */ if (pollset->shutting_down) { GPR_TIMER_MARK("pollset_work.shutting_down", 0); -- cgit v1.2.3 From f448c34a6839f75476900a4a2b24b2160fe4d164 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Thu, 19 May 2016 10:51:24 -0700 Subject: Remove union { } data and epoll_hdr structures. Added ev_epoll_linux files --- BUILD | 6 + Makefile | 2 + binding.gyp | 1 + build.yaml | 2 + config.m4 | 1 + gRPC.podspec | 3 + grpc.gemspec | 2 + package.xml | 2 + src/core/lib/iomgr/ev_epoll_linux.c | 1335 ++++++++++++++++++++ src/core/lib/iomgr/ev_epoll_linux.h | 41 + src/core/lib/iomgr/ev_epoll_posix.c | 87 +- src/core/lib/iomgr/ev_posix.c | 7 +- src/python/grpcio/grpc_core_dependencies.py | 1 + tools/doxygen/Doxyfile.core.internal | 2 + tools/run_tests/sources_and_headers.json | 3 + vsprojects/vcxproj/grpc/grpc.vcxproj | 3 + vsprojects/vcxproj/grpc/grpc.vcxproj.filters | 6 + .../vcxproj/grpc_unsecure/grpc_unsecure.vcxproj | 3 + .../grpc_unsecure/grpc_unsecure.vcxproj.filters | 6 + 19 files changed, 1442 insertions(+), 71 deletions(-) create mode 100644 src/core/lib/iomgr/ev_epoll_linux.c create mode 100644 src/core/lib/iomgr/ev_epoll_linux.h (limited to 'src/core/lib') diff --git a/BUILD b/BUILD index 0be8f27a01..a32352ebb3 100644 --- a/BUILD +++ b/BUILD @@ -178,6 +178,7 @@ cc_library( "src/core/lib/iomgr/closure.h", "src/core/lib/iomgr/endpoint.h", "src/core/lib/iomgr/endpoint_pair.h", + "src/core/lib/iomgr/ev_epoll_linux.h", "src/core/lib/iomgr/ev_epoll_posix.h", "src/core/lib/iomgr/ev_poll_posix.h", "src/core/lib/iomgr/ev_posix.h", @@ -322,6 +323,7 @@ cc_library( "src/core/lib/iomgr/endpoint.c", "src/core/lib/iomgr/endpoint_pair_posix.c", "src/core/lib/iomgr/endpoint_pair_windows.c", + "src/core/lib/iomgr/ev_epoll_linux.c", "src/core/lib/iomgr/ev_epoll_posix.c", "src/core/lib/iomgr/ev_poll_posix.c", "src/core/lib/iomgr/ev_posix.c", @@ -548,6 +550,7 @@ cc_library( "src/core/lib/iomgr/closure.h", "src/core/lib/iomgr/endpoint.h", "src/core/lib/iomgr/endpoint_pair.h", + "src/core/lib/iomgr/ev_epoll_linux.h", "src/core/lib/iomgr/ev_epoll_posix.h", "src/core/lib/iomgr/ev_poll_posix.h", "src/core/lib/iomgr/ev_posix.h", @@ -669,6 +672,7 @@ cc_library( "src/core/lib/iomgr/endpoint.c", "src/core/lib/iomgr/endpoint_pair_posix.c", "src/core/lib/iomgr/endpoint_pair_windows.c", + "src/core/lib/iomgr/ev_epoll_linux.c", "src/core/lib/iomgr/ev_epoll_posix.c", "src/core/lib/iomgr/ev_poll_posix.c", "src/core/lib/iomgr/ev_posix.c", @@ -1362,6 +1366,7 @@ objc_library( "src/core/lib/iomgr/endpoint.c", "src/core/lib/iomgr/endpoint_pair_posix.c", "src/core/lib/iomgr/endpoint_pair_windows.c", + "src/core/lib/iomgr/ev_epoll_linux.c", "src/core/lib/iomgr/ev_epoll_posix.c", "src/core/lib/iomgr/ev_poll_posix.c", "src/core/lib/iomgr/ev_posix.c", @@ -1567,6 +1572,7 @@ objc_library( "src/core/lib/iomgr/closure.h", "src/core/lib/iomgr/endpoint.h", "src/core/lib/iomgr/endpoint_pair.h", + "src/core/lib/iomgr/ev_epoll_linux.h", "src/core/lib/iomgr/ev_epoll_posix.h", "src/core/lib/iomgr/ev_poll_posix.h", "src/core/lib/iomgr/ev_posix.h", diff --git a/Makefile b/Makefile index 29ebc0e5ad..063698d943 100644 --- a/Makefile +++ b/Makefile @@ -2486,6 +2486,7 @@ LIBGRPC_SRC = \ src/core/lib/iomgr/endpoint.c \ src/core/lib/iomgr/endpoint_pair_posix.c \ src/core/lib/iomgr/endpoint_pair_windows.c \ + src/core/lib/iomgr/ev_epoll_linux.c \ src/core/lib/iomgr/ev_epoll_posix.c \ src/core/lib/iomgr/ev_poll_posix.c \ src/core/lib/iomgr/ev_posix.c \ @@ -2841,6 +2842,7 @@ LIBGRPC_UNSECURE_SRC = \ src/core/lib/iomgr/endpoint.c \ src/core/lib/iomgr/endpoint_pair_posix.c \ src/core/lib/iomgr/endpoint_pair_windows.c \ + src/core/lib/iomgr/ev_epoll_linux.c \ src/core/lib/iomgr/ev_epoll_posix.c \ src/core/lib/iomgr/ev_poll_posix.c \ src/core/lib/iomgr/ev_posix.c \ diff --git a/binding.gyp b/binding.gyp index 89774ead4d..41e1b5bb41 100644 --- a/binding.gyp +++ b/binding.gyp @@ -581,6 +581,7 @@ 'src/core/lib/iomgr/endpoint.c', 'src/core/lib/iomgr/endpoint_pair_posix.c', 'src/core/lib/iomgr/endpoint_pair_windows.c', + 'src/core/lib/iomgr/ev_epoll_linux.c', 'src/core/lib/iomgr/ev_epoll_posix.c', 'src/core/lib/iomgr/ev_poll_posix.c', 'src/core/lib/iomgr/ev_posix.c', diff --git a/build.yaml b/build.yaml index 7ba6533297..2f3d07071d 100644 --- a/build.yaml +++ b/build.yaml @@ -165,6 +165,7 @@ filegroups: - src/core/lib/iomgr/closure.h - src/core/lib/iomgr/endpoint.h - src/core/lib/iomgr/endpoint_pair.h + - src/core/lib/iomgr/ev_epoll_linux.h - src/core/lib/iomgr/ev_epoll_posix.h - src/core/lib/iomgr/ev_poll_posix.h - src/core/lib/iomgr/ev_posix.h @@ -240,6 +241,7 @@ filegroups: - src/core/lib/iomgr/endpoint.c - src/core/lib/iomgr/endpoint_pair_posix.c - src/core/lib/iomgr/endpoint_pair_windows.c + - src/core/lib/iomgr/ev_epoll_linux.c - src/core/lib/iomgr/ev_epoll_posix.c - src/core/lib/iomgr/ev_poll_posix.c - src/core/lib/iomgr/ev_posix.c diff --git a/config.m4 b/config.m4 index 6987c74154..4308295afd 100644 --- a/config.m4 +++ b/config.m4 @@ -100,6 +100,7 @@ if test "$PHP_GRPC" != "no"; then src/core/lib/iomgr/endpoint.c \ src/core/lib/iomgr/endpoint_pair_posix.c \ src/core/lib/iomgr/endpoint_pair_windows.c \ + src/core/lib/iomgr/ev_epoll_linux.c \ src/core/lib/iomgr/ev_epoll_posix.c \ src/core/lib/iomgr/ev_poll_posix.c \ src/core/lib/iomgr/ev_posix.c \ diff --git a/gRPC.podspec b/gRPC.podspec index 3b4dd52380..de55880125 100644 --- a/gRPC.podspec +++ b/gRPC.podspec @@ -181,6 +181,7 @@ Pod::Spec.new do |s| 'src/core/lib/iomgr/closure.h', 'src/core/lib/iomgr/endpoint.h', 'src/core/lib/iomgr/endpoint_pair.h', + 'src/core/lib/iomgr/ev_epoll_linux.h', 'src/core/lib/iomgr/ev_epoll_posix.h', 'src/core/lib/iomgr/ev_poll_posix.h', 'src/core/lib/iomgr/ev_posix.h', @@ -359,6 +360,7 @@ Pod::Spec.new do |s| 'src/core/lib/iomgr/endpoint.c', 'src/core/lib/iomgr/endpoint_pair_posix.c', 'src/core/lib/iomgr/endpoint_pair_windows.c', + 'src/core/lib/iomgr/ev_epoll_linux.c', 'src/core/lib/iomgr/ev_epoll_posix.c', 'src/core/lib/iomgr/ev_poll_posix.c', 'src/core/lib/iomgr/ev_posix.c', @@ -548,6 +550,7 @@ Pod::Spec.new do |s| 'src/core/lib/iomgr/closure.h', 'src/core/lib/iomgr/endpoint.h', 'src/core/lib/iomgr/endpoint_pair.h', + 'src/core/lib/iomgr/ev_epoll_linux.h', 'src/core/lib/iomgr/ev_epoll_posix.h', 'src/core/lib/iomgr/ev_poll_posix.h', 'src/core/lib/iomgr/ev_posix.h', diff --git a/grpc.gemspec b/grpc.gemspec index 71cccb6ca8..54ae2eb68d 100755 --- a/grpc.gemspec +++ b/grpc.gemspec @@ -190,6 +190,7 @@ Gem::Specification.new do |s| s.files += %w( src/core/lib/iomgr/closure.h ) s.files += %w( src/core/lib/iomgr/endpoint.h ) s.files += %w( src/core/lib/iomgr/endpoint_pair.h ) + s.files += %w( src/core/lib/iomgr/ev_epoll_linux.h ) s.files += %w( src/core/lib/iomgr/ev_epoll_posix.h ) s.files += %w( src/core/lib/iomgr/ev_poll_posix.h ) s.files += %w( src/core/lib/iomgr/ev_posix.h ) @@ -338,6 +339,7 @@ Gem::Specification.new do |s| s.files += %w( src/core/lib/iomgr/endpoint.c ) s.files += %w( src/core/lib/iomgr/endpoint_pair_posix.c ) s.files += %w( src/core/lib/iomgr/endpoint_pair_windows.c ) + s.files += %w( src/core/lib/iomgr/ev_epoll_linux.c ) s.files += %w( src/core/lib/iomgr/ev_epoll_posix.c ) s.files += %w( src/core/lib/iomgr/ev_poll_posix.c ) s.files += %w( src/core/lib/iomgr/ev_posix.c ) diff --git a/package.xml b/package.xml index 0fc5d0dee4..d8e82a8bc3 100644 --- a/package.xml +++ b/package.xml @@ -197,6 +197,7 @@ + @@ -345,6 +346,7 @@ + diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c new file mode 100644 index 0000000000..f257ac8a1d --- /dev/null +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -0,0 +1,1335 @@ +/* + * + * Copyright 2016, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include + +#ifdef GPR_POSIX_SOCKET + +#include "src/core/lib/iomgr/ev_epoll_posix.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "src/core/lib/iomgr/ev_posix.h" +#include "src/core/lib/iomgr/iomgr_internal.h" +#include "src/core/lib/iomgr/wakeup_fd_posix.h" +#include "src/core/lib/profiling/timers.h" +#include "src/core/lib/support/block_annotate.h" + +struct polling_island; + +/******************************************************************************* + * FD declarations + */ + +struct grpc_fd { + int fd; + /* refst format: + bit0: 1=active/0=orphaned + bit1-n: refcount + meaning that mostly we ref by two to avoid altering the orphaned bit, + and just unref by 1 when we're ready to flag the object as orphaned */ + gpr_atm refst; + + gpr_mu mu; + int shutdown; + int closed; + int released; + + grpc_closure *read_closure; + grpc_closure *write_closure; + + /* Mutex protecting the 'polling_island' field */ + gpr_mu pi_mu; + + /* The polling island to which this fd belongs to. An fd belongs to exactly + one polling island */ + struct polling_island *polling_island; + + struct grpc_fd *freelist_next; + + grpc_closure *on_done_closure; + + grpc_iomgr_object iomgr_object; +}; + +/* Return 1 if this fd is orphaned, 0 otherwise */ +static bool fd_is_orphaned(grpc_fd *fd); + +/* Reference counting for fds */ +/*#define GRPC_FD_REF_COUNT_DEBUG*/ +#ifdef GRPC_FD_REF_COUNT_DEBUG +static void fd_ref(grpc_fd *fd, const char *reason, const char *file, int line); +static void fd_unref(grpc_fd *fd, const char *reason, const char *file, + int line); +#define GRPC_FD_REF(fd, reason) fd_ref(fd, reason, __FILE__, __LINE__) +#define GRPC_FD_UNREF(fd, reason) fd_unref(fd, reason, __FILE__, __LINE__) +#else +static void fd_ref(grpc_fd *fd); +static void fd_unref(grpc_fd *fd); +#define GRPC_FD_REF(fd, reason) fd_ref(fd) +#define GRPC_FD_UNREF(fd, reason) fd_unref(fd) +#endif + +static void fd_global_init(void); +static void fd_global_shutdown(void); + +#define CLOSURE_NOT_READY ((grpc_closure *)0) +#define CLOSURE_READY ((grpc_closure *)1) + +/******************************************************************************* + * Polling Island + */ +typedef struct polling_island { + gpr_mu mu; + int ref_cnt; + + /* Pointer to the polling_island this merged into. If this is not NULL, all + the remaining fields in this pollset (i.e all fields except mu and ref_cnt) + are considered invalid and must be ignored */ + struct polling_island *merged_to; + + /* The fd of the underlying epoll set */ + int epoll_fd; + + /* The file descriptors in the epoll set */ + size_t fd_cnt; + size_t fd_capacity; + grpc_fd **fds; + + /* Polling islands that are no longer needed are kept in a freelist so that + they can be reused. This field points to the next polling island in the + free list. Note that this is only used if the polling island is in the + free list */ + struct polling_island *next_free; +} polling_island; + +/* Polling island freelist */ +static gpr_mu g_pi_freelist_mu; +static polling_island *g_pi_freelist = NULL; + +/* TODO: sreek - Should we hold a lock on fd or add a ref to the fd ? */ +static void add_fd_to_polling_island_locked(polling_island *pi, grpc_fd *fd) { + int err; + struct epoll_event ev; + + ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET); + ev.data.ptr = fd; + err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, fd->fd, &ev); + + if (err < 0 && errno != EEXIST) { + gpr_log(GPR_ERROR, "epoll_ctl add for fd: %d failed with error: %s", fd->fd, + strerror(errno)); + return; + } + + pi->fd_capacity = GPR_MAX(pi->fd_capacity + 8, pi->fd_cnt * 3 / 2); + pi->fds = gpr_realloc(pi->fds, sizeof(grpc_fd *) * pi->fd_capacity); + pi->fds[pi->fd_cnt++] = fd; +} + +static polling_island *polling_island_create(int initial_ref_cnt, + grpc_fd *initial_fd) { + polling_island *pi = NULL; + gpr_mu_lock(&g_pi_freelist_mu); + if (g_pi_freelist != NULL) { + pi = g_pi_freelist; + g_pi_freelist = g_pi_freelist->next_free; + pi->next_free = NULL; + } + gpr_mu_unlock(&g_pi_freelist_mu); + + /* Create new polling island if we could not get one from the free list */ + if (pi == NULL) { + pi = gpr_malloc(sizeof(*pi)); + gpr_mu_init(&pi->mu); + pi->fd_cnt = 0; + pi->fd_capacity = 0; + pi->fds = NULL; + + pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC); + if (pi->epoll_fd < 0) { + gpr_log(GPR_ERROR, "epoll_create1() failed with error: %s", + strerror(errno)); + } + GPR_ASSERT(pi->epoll_fd >= 0); + } + + pi->ref_cnt = initial_ref_cnt; + pi->merged_to = NULL; + pi->next_free = NULL; + + if (initial_fd != NULL) { + /* add_fd_to_polling_island_locked() expects the caller to hold a pi->mu + * lock. However, since this is a new polling island (and no one has a + * reference to it yet), it is okay to not acquire pi->mu here */ + add_fd_to_polling_island_locked(pi, initial_fd); + } + + return pi; +} + +static void polling_island_global_init() { + polling_island_create(0, NULL); /* TODO(sreek): Delete this line */ + gpr_mu_init(&g_pi_freelist_mu); + g_pi_freelist = NULL; +} + +/******************************************************************************* + * pollset declarations + */ + +typedef struct grpc_cached_wakeup_fd { + grpc_wakeup_fd fd; + struct grpc_cached_wakeup_fd *next; +} grpc_cached_wakeup_fd; + +struct grpc_pollset_worker { + grpc_cached_wakeup_fd *wakeup_fd; + int reevaluate_polling_on_wakeup; + int kicked_specifically; + pthread_t pt_id; + struct grpc_pollset_worker *next; + struct grpc_pollset_worker *prev; +}; + +struct grpc_pollset { + gpr_mu mu; + grpc_pollset_worker root_worker; + int shutting_down; + int called_shutdown; + int kicked_without_pollers; + grpc_closure *shutdown_done; + + int epoll_fd; + + /* Mutex protecting the 'polling_island' field */ + gpr_mu pi_mu; + + /* The polling island to which this fd belongs to. An fd belongs to exactly + one polling island */ + struct polling_island *polling_island; + + /* Local cache of eventfds for workers */ + grpc_cached_wakeup_fd *local_wakeup_cache; +}; + +/* Add an fd to a pollset */ +static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + struct grpc_fd *fd); + +static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pollset_set, grpc_fd *fd); + +/* Convert a timespec to milliseconds: + - very small or negative poll times are clamped to zero to do a + non-blocking poll (which becomes spin polling) + - other small values are rounded up to one millisecond + - longer than a millisecond polls are rounded up to the next nearest + millisecond to avoid spinning + - infinite timeouts are converted to -1 */ +static int poll_deadline_to_millis_timeout(gpr_timespec deadline, + gpr_timespec now); + +/* Allow kick to wakeup the currently polling worker */ +#define GRPC_POLLSET_CAN_KICK_SELF 1 +/* Force the wakee to repoll when awoken */ +#define GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP 2 +/* As per pollset_kick, with an extended set of flags (defined above) + -- mostly for fd_posix's use. */ +static void pollset_kick_ext(grpc_pollset *p, + grpc_pollset_worker *specific_worker, + uint32_t flags); + +/* turn a pollset into a multipoller: platform specific */ +typedef void (*platform_become_multipoller_type)(grpc_exec_ctx *exec_ctx, + grpc_pollset *pollset, + struct grpc_fd **fds, + size_t fd_count); + +/* Return 1 if the pollset has active threads in pollset_work (pollset must + * be locked) */ +static int pollset_has_workers(grpc_pollset *pollset); + +static void remove_fd_from_all_epoll_sets(int fd); + +/******************************************************************************* + * pollset_set definitions + */ + +struct grpc_pollset_set { + gpr_mu mu; + + size_t pollset_count; + size_t pollset_capacity; + grpc_pollset **pollsets; + + size_t pollset_set_count; + size_t pollset_set_capacity; + struct grpc_pollset_set **pollset_sets; + + size_t fd_count; + size_t fd_capacity; + grpc_fd **fds; +}; + +/******************************************************************************* + * fd_posix.c + */ + +/* We need to keep a freelist not because of any concerns of malloc performance + * but instead so that implementations with multiple threads in (for example) + * epoll_wait deal with the race between pollset removal and incoming poll + * notifications. + * + * The problem is that the poller ultimately holds a reference to this + * object, so it is very difficult to know when is safe to free it, at least + * without some expensive synchronization. + * + * If we keep the object freelisted, in the worst case losing this race just + * becomes a spurious read notification on a reused fd. + */ +/* TODO(klempner): We could use some form of polling generation count to know + * when these are safe to free. */ +/* TODO(klempner): Consider disabling freelisting if we don't have multiple + * threads in poll on the same fd */ +/* TODO(klempner): Batch these allocations to reduce fragmentation */ +static grpc_fd *fd_freelist = NULL; +static gpr_mu fd_freelist_mu; + +static void freelist_fd(grpc_fd *fd) { + gpr_mu_lock(&fd_freelist_mu); + fd->freelist_next = fd_freelist; + fd_freelist = fd; + grpc_iomgr_unregister_object(&fd->iomgr_object); + gpr_mu_unlock(&fd_freelist_mu); +} + +static grpc_fd *alloc_fd(int fd) { + grpc_fd *r = NULL; + + gpr_mu_lock(&fd_freelist_mu); + if (fd_freelist != NULL) { + r = fd_freelist; + fd_freelist = fd_freelist->freelist_next; + } + gpr_mu_unlock(&fd_freelist_mu); + + if (r == NULL) { + r = gpr_malloc(sizeof(grpc_fd)); + gpr_mu_init(&r->mu); + gpr_mu_init(&r->pi_mu); + } + + /* TODO: sreek - check with ctiller on why we need to acquire a lock here */ + gpr_mu_lock(&r->mu); + gpr_atm_rel_store(&r->refst, 1); + r->shutdown = 0; + r->read_closure = CLOSURE_NOT_READY; + r->write_closure = CLOSURE_NOT_READY; + r->fd = fd; + r->polling_island = NULL; + r->freelist_next = NULL; + r->on_done_closure = NULL; + r->closed = 0; + r->released = 0; + gpr_mu_unlock(&r->mu); + return r; +} + +static void destroy(grpc_fd *fd) { + gpr_mu_destroy(&fd->mu); + gpr_free(fd); +} + +#ifdef GRPC_FD_REF_COUNT_DEBUG +#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__) +#define UNREF_BY(fd, n, reason) unref_by(fd, n, reason, __FILE__, __LINE__) +static void ref_by(grpc_fd *fd, int n, const char *reason, const char *file, + int line) { + gpr_log(GPR_DEBUG, "FD %d %p ref %d %d -> %d [%s; %s:%d]", fd->fd, fd, n, + gpr_atm_no_barrier_load(&fd->refst), + gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line); +#else +#define REF_BY(fd, n, reason) ref_by(fd, n) +#define UNREF_BY(fd, n, reason) unref_by(fd, n) +static void ref_by(grpc_fd *fd, int n) { +#endif + GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0); +} + +#ifdef GRPC_FD_REF_COUNT_DEBUG +static void unref_by(grpc_fd *fd, int n, const char *reason, const char *file, + int line) { + gpr_atm old; + gpr_log(GPR_DEBUG, "FD %d %p unref %d %d -> %d [%s; %s:%d]", fd->fd, fd, n, + gpr_atm_no_barrier_load(&fd->refst), + gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line); +#else +static void unref_by(grpc_fd *fd, int n) { + gpr_atm old; +#endif + old = gpr_atm_full_fetch_add(&fd->refst, -n); + if (old == n) { + freelist_fd(fd); + } else { + GPR_ASSERT(old > n); + } +} + +static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); } + +static void fd_global_shutdown(void) { + gpr_mu_lock(&fd_freelist_mu); + gpr_mu_unlock(&fd_freelist_mu); + while (fd_freelist != NULL) { + grpc_fd *fd = fd_freelist; + fd_freelist = fd_freelist->freelist_next; + destroy(fd); + } + gpr_mu_destroy(&fd_freelist_mu); +} + +static grpc_fd *fd_create(int fd, const char *name) { + grpc_fd *r = alloc_fd(fd); + char *name2; + gpr_asprintf(&name2, "%s fd=%d", name, fd); + grpc_iomgr_register_object(&r->iomgr_object, name2); + gpr_free(name2); +#ifdef GRPC_FD_REF_COUNT_DEBUG + gpr_log(GPR_DEBUG, "FD %d %p create %s", fd, r, name); +#endif + return r; +} + +static bool fd_is_orphaned(grpc_fd *fd) { + return (gpr_atm_acq_load(&fd->refst) & 1) == 0; +} + +static void close_fd_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { + fd->closed = 1; + if (!fd->released) { + close(fd->fd); + } else { + remove_fd_from_all_epoll_sets(fd->fd); + } + grpc_exec_ctx_enqueue(exec_ctx, fd->on_done_closure, true, NULL); +} + +static int fd_wrapped_fd(grpc_fd *fd) { + if (fd->released || fd->closed) { + return -1; + } else { + return fd->fd; + } +} + +/* TODO: sreek - do something here with the pollset island link */ +static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure *on_done, int *release_fd, + const char *reason) { + fd->on_done_closure = on_done; + fd->released = release_fd != NULL; + if (!fd->released) { + shutdown(fd->fd, SHUT_RDWR); + } else { + *release_fd = fd->fd; + } + gpr_mu_lock(&fd->mu); + REF_BY(fd, 1, reason); /* remove active status, but keep referenced */ + close_fd_locked(exec_ctx, fd); + gpr_mu_unlock(&fd->mu); + UNREF_BY(fd, 2, reason); /* drop the reference */ +} + +/* increment refcount by two to avoid changing the orphan bit */ +#ifdef GRPC_FD_REF_COUNT_DEBUG +static void fd_ref(grpc_fd *fd, const char *reason, const char *file, + int line) { + ref_by(fd, 2, reason, file, line); +} + +static void fd_unref(grpc_fd *fd, const char *reason, const char *file, + int line) { + unref_by(fd, 2, reason, file, line); +} +#else +static void fd_ref(grpc_fd *fd) { ref_by(fd, 2); } + +static void fd_unref(grpc_fd *fd) { unref_by(fd, 2); } +#endif + +static void notify_on_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure **st, grpc_closure *closure) { + if (*st == CLOSURE_NOT_READY) { + /* not ready ==> switch to a waiting state by setting the closure */ + *st = closure; + } else if (*st == CLOSURE_READY) { + /* already ready ==> queue the closure to run immediately */ + *st = CLOSURE_NOT_READY; + grpc_exec_ctx_enqueue(exec_ctx, closure, !fd->shutdown, NULL); + } else { + /* upcallptr was set to a different closure. This is an error! */ + gpr_log(GPR_ERROR, + "User called a notify_on function with a previous callback still " + "pending"); + abort(); + } +} + +/* returns 1 if state becomes not ready */ +static int set_ready_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure **st) { + if (*st == CLOSURE_READY) { + /* duplicate ready ==> ignore */ + return 0; + } else if (*st == CLOSURE_NOT_READY) { + /* not ready, and not waiting ==> flag ready */ + *st = CLOSURE_READY; + return 0; + } else { + /* waiting ==> queue closure */ + grpc_exec_ctx_enqueue(exec_ctx, *st, !fd->shutdown, NULL); + *st = CLOSURE_NOT_READY; + return 1; + } +} + +/* Do something here with the pollset island link (?) */ +static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { + gpr_mu_lock(&fd->mu); + GPR_ASSERT(!fd->shutdown); + fd->shutdown = 1; + set_ready_locked(exec_ctx, fd, &fd->read_closure); + set_ready_locked(exec_ctx, fd, &fd->write_closure); + gpr_mu_unlock(&fd->mu); +} + +static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure *closure) { + gpr_mu_lock(&fd->mu); + notify_on_locked(exec_ctx, fd, &fd->read_closure, closure); + gpr_mu_unlock(&fd->mu); +} + +static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure *closure) { + gpr_mu_lock(&fd->mu); + notify_on_locked(exec_ctx, fd, &fd->write_closure, closure); + gpr_mu_unlock(&fd->mu); +} + +/******************************************************************************* + * pollset_posix.c + */ + +GPR_TLS_DECL(g_current_thread_poller); +GPR_TLS_DECL(g_current_thread_worker); + +/** The alarm system needs to be able to wakeup 'some poller' sometimes + * (specifically when a new alarm needs to be triggered earlier than the next + * alarm 'epoch'). + * This wakeup_fd gives us something to alert on when such a case occurs. */ +grpc_wakeup_fd grpc_global_wakeup_fd; + +static void remove_worker(grpc_pollset *p, grpc_pollset_worker *worker) { + worker->prev->next = worker->next; + worker->next->prev = worker->prev; +} + +static int pollset_has_workers(grpc_pollset *p) { + return p->root_worker.next != &p->root_worker; +} + +static grpc_pollset_worker *pop_front_worker(grpc_pollset *p) { + if (pollset_has_workers(p)) { + grpc_pollset_worker *w = p->root_worker.next; + remove_worker(p, w); + return w; + } else { + return NULL; + } +} + +static void push_back_worker(grpc_pollset *p, grpc_pollset_worker *worker) { + worker->next = &p->root_worker; + worker->prev = worker->next->prev; + worker->prev->next = worker->next->prev = worker; +} + +static void push_front_worker(grpc_pollset *p, grpc_pollset_worker *worker) { + worker->prev = &p->root_worker; + worker->next = worker->prev->next; + worker->prev->next = worker->next->prev = worker; +} + +static void pollset_kick_ext(grpc_pollset *p, + grpc_pollset_worker *specific_worker, + uint32_t flags) { + GPR_TIMER_BEGIN("pollset_kick_ext", 0); + + /* pollset->mu already held */ + if (specific_worker != NULL) { + if (specific_worker == GRPC_POLLSET_KICK_BROADCAST) { + GPR_TIMER_BEGIN("pollset_kick_ext.broadcast", 0); + GPR_ASSERT((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) == 0); + for (specific_worker = p->root_worker.next; + specific_worker != &p->root_worker; + specific_worker = specific_worker->next) { + grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd); + } + p->kicked_without_pollers = 1; + GPR_TIMER_END("pollset_kick_ext.broadcast", 0); + } else if (gpr_tls_get(&g_current_thread_worker) != + (intptr_t)specific_worker) { + GPR_TIMER_MARK("different_thread_worker", 0); + if ((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) != 0) { + specific_worker->reevaluate_polling_on_wakeup = 1; + } + specific_worker->kicked_specifically = 1; + grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd); + /* TODO (sreek): Refactor this into a separate file*/ + pthread_kill(specific_worker->pt_id, SIGUSR1); + } else if ((flags & GRPC_POLLSET_CAN_KICK_SELF) != 0) { + GPR_TIMER_MARK("kick_yoself", 0); + if ((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) != 0) { + specific_worker->reevaluate_polling_on_wakeup = 1; + } + specific_worker->kicked_specifically = 1; + grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd); + } + } else if (gpr_tls_get(&g_current_thread_poller) != (intptr_t)p) { + GPR_ASSERT((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) == 0); + GPR_TIMER_MARK("kick_anonymous", 0); + specific_worker = pop_front_worker(p); + if (specific_worker != NULL) { + if (gpr_tls_get(&g_current_thread_worker) == (intptr_t)specific_worker) { + GPR_TIMER_MARK("kick_anonymous_not_self", 0); + push_back_worker(p, specific_worker); + specific_worker = pop_front_worker(p); + if ((flags & GRPC_POLLSET_CAN_KICK_SELF) == 0 && + gpr_tls_get(&g_current_thread_worker) == + (intptr_t)specific_worker) { + push_back_worker(p, specific_worker); + specific_worker = NULL; + } + } + if (specific_worker != NULL) { + GPR_TIMER_MARK("finally_kick", 0); + push_back_worker(p, specific_worker); + grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd); + } + } else { + GPR_TIMER_MARK("kicked_no_pollers", 0); + p->kicked_without_pollers = 1; + } + } + + GPR_TIMER_END("pollset_kick_ext", 0); +} + +static void pollset_kick(grpc_pollset *p, + grpc_pollset_worker *specific_worker) { + pollset_kick_ext(p, specific_worker, 0); +} + +/* global state management */ + +static void sig_handler(int sig_num) { + gpr_log(GPR_INFO, "Received signal %d", sig_num); +} + +static void pollset_global_init(void) { + gpr_tls_init(&g_current_thread_poller); + gpr_tls_init(&g_current_thread_worker); + grpc_wakeup_fd_init(&grpc_global_wakeup_fd); + signal(SIGUSR1, sig_handler); +} + +static void pollset_global_shutdown(void) { + grpc_wakeup_fd_destroy(&grpc_global_wakeup_fd); + gpr_tls_destroy(&g_current_thread_poller); + gpr_tls_destroy(&g_current_thread_worker); +} + +static void kick_poller(void) { grpc_wakeup_fd_wakeup(&grpc_global_wakeup_fd); } + +/* TODO: sreek. Try to Remove this forward declaration*/ +static void multipoll_with_epoll_pollset_create_efd(grpc_pollset *pollset); + +/* main interface */ + +static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { + gpr_mu_init(&pollset->mu); + *mu = &pollset->mu; + pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker; + gpr_mu_init(&pollset->pi_mu); + pollset->polling_island = NULL; + pollset->shutting_down = 0; + pollset->called_shutdown = 0; + pollset->kicked_without_pollers = 0; + pollset->local_wakeup_cache = NULL; + pollset->kicked_without_pollers = 0; + + multipoll_with_epoll_pollset_create_efd(pollset); +} + +/* TODO(sreek): Maybe merge multipoll_*_destroy() with pollset_destroy() + * function */ +static void multipoll_with_epoll_pollset_destroy(grpc_pollset *pollset); + +static void pollset_destroy(grpc_pollset *pollset) { + GPR_ASSERT(!pollset_has_workers(pollset)); + + multipoll_with_epoll_pollset_destroy(pollset); + + while (pollset->local_wakeup_cache) { + grpc_cached_wakeup_fd *next = pollset->local_wakeup_cache->next; + grpc_wakeup_fd_destroy(&pollset->local_wakeup_cache->fd); + gpr_free(pollset->local_wakeup_cache); + pollset->local_wakeup_cache = next; + } + gpr_mu_destroy(&pollset->pi_mu); + gpr_mu_destroy(&pollset->mu); +} + +/* TODO(sreek) - Do something with the pollset island link (??) */ +static void pollset_reset(grpc_pollset *pollset) { + GPR_ASSERT(pollset->shutting_down); + GPR_ASSERT(!pollset_has_workers(pollset)); + pollset->shutting_down = 0; + pollset->called_shutdown = 0; + pollset->kicked_without_pollers = 0; +} + +/* TODO (sreek): Remove multipoll_with_epoll_finish_shutdown() declaration */ +static void multipoll_with_epoll_pollset_finish_shutdown(grpc_pollset *pollset); + +static void finish_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) { + multipoll_with_epoll_pollset_finish_shutdown(pollset); + grpc_exec_ctx_enqueue(exec_ctx, pollset->shutdown_done, true, NULL); +} + +/* TODO(sreek): Remove multipoll_with_epoll_*_maybe_work_and_unlock declaration + */ +static void multipoll_with_epoll_pollset_maybe_work_and_unlock( + grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_pollset_worker *worker, + gpr_timespec deadline, gpr_timespec now); + +static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_pollset_worker **worker_hdl, gpr_timespec now, + gpr_timespec deadline) { + grpc_pollset_worker worker; + *worker_hdl = &worker; + + /* pollset->mu already held */ + int added_worker = 0; + int locked = 1; + int queued_work = 0; + int keep_polling = 0; + GPR_TIMER_BEGIN("pollset_work", 0); + /* this must happen before we (potentially) drop pollset->mu */ + worker.next = worker.prev = NULL; + worker.reevaluate_polling_on_wakeup = 0; + if (pollset->local_wakeup_cache != NULL) { + worker.wakeup_fd = pollset->local_wakeup_cache; + pollset->local_wakeup_cache = worker.wakeup_fd->next; + } else { + worker.wakeup_fd = gpr_malloc(sizeof(*worker.wakeup_fd)); + grpc_wakeup_fd_init(&worker.wakeup_fd->fd); + } + worker.kicked_specifically = 0; + + /* TODO(sreek): Abstract this thread id stuff out into a separate file */ + worker.pt_id = pthread_self(); + /* If we're shutting down then we don't execute any extended work */ + if (pollset->shutting_down) { + GPR_TIMER_MARK("pollset_work.shutting_down", 0); + goto done; + } + /* Start polling, and keep doing so while we're being asked to + re-evaluate our pollers (this allows poll() based pollers to + ensure they don't miss wakeups) */ + keep_polling = 1; + while (keep_polling) { + keep_polling = 0; + if (!pollset->kicked_without_pollers) { + if (!added_worker) { + push_front_worker(pollset, &worker); + added_worker = 1; + gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker); + } + gpr_tls_set(&g_current_thread_poller, (intptr_t)pollset); + GPR_TIMER_BEGIN("maybe_work_and_unlock", 0); + + multipoll_with_epoll_pollset_maybe_work_and_unlock( + exec_ctx, pollset, &worker, deadline, now); + + GPR_TIMER_END("maybe_work_and_unlock", 0); + locked = 0; + gpr_tls_set(&g_current_thread_poller, 0); + } else { + GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0); + pollset->kicked_without_pollers = 0; + } + /* Finished execution - start cleaning up. + Note that we may arrive here from outside the enclosing while() loop. + In that case we won't loop though as we haven't added worker to the + worker list, which means nobody could ask us to re-evaluate polling). */ + done: + if (!locked) { + queued_work |= grpc_exec_ctx_flush(exec_ctx); + gpr_mu_lock(&pollset->mu); + locked = 1; + } + /* If we're forced to re-evaluate polling (via pollset_kick with + GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) then we land here and force + a loop */ + if (worker.reevaluate_polling_on_wakeup) { + worker.reevaluate_polling_on_wakeup = 0; + pollset->kicked_without_pollers = 0; + if (queued_work || worker.kicked_specifically) { + /* If there's queued work on the list, then set the deadline to be + immediate so we get back out of the polling loop quickly */ + deadline = gpr_inf_past(GPR_CLOCK_MONOTONIC); + } + keep_polling = 1; + } + } + if (added_worker) { + remove_worker(pollset, &worker); + gpr_tls_set(&g_current_thread_worker, 0); + } + /* release wakeup fd to the local pool */ + worker.wakeup_fd->next = pollset->local_wakeup_cache; + pollset->local_wakeup_cache = worker.wakeup_fd; + /* check shutdown conditions */ + if (pollset->shutting_down) { + if (pollset_has_workers(pollset)) { + pollset_kick(pollset, NULL); + } else if (!pollset->called_shutdown) { + pollset->called_shutdown = 1; + gpr_mu_unlock(&pollset->mu); + finish_shutdown(exec_ctx, pollset); + grpc_exec_ctx_flush(exec_ctx); + /* Continuing to access pollset here is safe -- it is the caller's + * responsibility to not destroy when it has outstanding calls to + * pollset_work. + * TODO(dklempner): Can we refactor the shutdown logic to avoid this? */ + gpr_mu_lock(&pollset->mu); + } + } + *worker_hdl = NULL; + GPR_TIMER_END("pollset_work", 0); +} + +/* TODO: (sreek) Do something with the pollset island link */ +static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_closure *closure) { + GPR_ASSERT(!pollset->shutting_down); + pollset->shutting_down = 1; + pollset->shutdown_done = closure; + pollset_kick(pollset, GRPC_POLLSET_KICK_BROADCAST); + + if (!pollset->called_shutdown && !pollset_has_workers(pollset)) { + pollset->called_shutdown = 1; + finish_shutdown(exec_ctx, pollset); + } +} + +static int poll_deadline_to_millis_timeout(gpr_timespec deadline, + gpr_timespec now) { + gpr_timespec timeout; + static const int64_t max_spin_polling_us = 10; + if (gpr_time_cmp(deadline, gpr_inf_future(deadline.clock_type)) == 0) { + return -1; + } + if (gpr_time_cmp(deadline, gpr_time_add(now, gpr_time_from_micros( + max_spin_polling_us, + GPR_TIMESPAN))) <= 0) { + return 0; + } + timeout = gpr_time_sub(deadline, now); + return gpr_time_to_millis(gpr_time_add( + timeout, gpr_time_from_nanos(GPR_NS_PER_MS - 1, GPR_TIMESPAN))); +} + +/******************************************************************************* + * pollset_multipoller_with_epoll_posix.c + */ + +static void set_ready(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_closure **st) { + /* only one set_ready can be active at once (but there may be a racing + notify_on) */ + gpr_mu_lock(&fd->mu); + set_ready_locked(exec_ctx, fd, st); + gpr_mu_unlock(&fd->mu); +} + +static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { + set_ready(exec_ctx, fd, &fd->read_closure); +} + +static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { + set_ready(exec_ctx, fd, &fd->write_closure); +} + +/* TODO (sreek): Maybe this global list is not required. Double check*/ +struct epoll_fd_list { + int *epoll_fds; + size_t count; + size_t capacity; +}; + +static struct epoll_fd_list epoll_fd_global_list; +static gpr_once init_epoll_fd_list_mu = GPR_ONCE_INIT; +static gpr_mu epoll_fd_list_mu; + +static void init_mu(void) { gpr_mu_init(&epoll_fd_list_mu); } + +static void add_epoll_fd_to_global_list(int epoll_fd) { + gpr_once_init(&init_epoll_fd_list_mu, init_mu); + + gpr_mu_lock(&epoll_fd_list_mu); + if (epoll_fd_global_list.count == epoll_fd_global_list.capacity) { + epoll_fd_global_list.capacity = + GPR_MAX((size_t)8, epoll_fd_global_list.capacity * 2); + epoll_fd_global_list.epoll_fds = + gpr_realloc(epoll_fd_global_list.epoll_fds, + epoll_fd_global_list.capacity * sizeof(int)); + } + epoll_fd_global_list.epoll_fds[epoll_fd_global_list.count++] = epoll_fd; + gpr_mu_unlock(&epoll_fd_list_mu); +} + +static void remove_epoll_fd_from_global_list(int epoll_fd) { + gpr_mu_lock(&epoll_fd_list_mu); + GPR_ASSERT(epoll_fd_global_list.count > 0); + for (size_t i = 0; i < epoll_fd_global_list.count; i++) { + if (epoll_fd == epoll_fd_global_list.epoll_fds[i]) { + epoll_fd_global_list.epoll_fds[i] = + epoll_fd_global_list.epoll_fds[--(epoll_fd_global_list.count)]; + break; + } + } + gpr_mu_unlock(&epoll_fd_list_mu); +} + +static void remove_fd_from_all_epoll_sets(int fd) { + int err; + gpr_once_init(&init_epoll_fd_list_mu, init_mu); + gpr_mu_lock(&epoll_fd_list_mu); + if (epoll_fd_global_list.count == 0) { + gpr_mu_unlock(&epoll_fd_list_mu); + return; + } + for (size_t i = 0; i < epoll_fd_global_list.count; i++) { + err = epoll_ctl(epoll_fd_global_list.epoll_fds[i], EPOLL_CTL_DEL, fd, NULL); + if (err < 0 && errno != ENOENT) { + gpr_log(GPR_ERROR, "epoll_ctl del for %d failed: %s", fd, + strerror(errno)); + } + } + gpr_mu_unlock(&epoll_fd_list_mu); +} + +/* TODO: sreek - This function multipoll_with_epoll_pollset_add_fd() and + * finally_add_fd() in ev_poll_and_epoll_posix.c */ +static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_fd *fd) { + + /* TODO sreek - Check if we need to get a pollset->mu lock here */ + + struct epoll_event ev; + int err; + + /* Hold a ref to the fd to keep it from being closed during the add. This may + result in a spurious wakeup being assigned to this pollset whilst adding, + but that should be benign. */ + /* TODO: (sreek): Understand how a spurious wake up migh be assinged to this + * pollset..and how holding a reference will prevent the fd from being closed + * (and perhaps more importantly, see how can an fd be closed while being + * added to the epollset */ + GRPC_FD_REF(fd, "add fd"); + + gpr_mu_lock(&fd->mu); + if (fd->shutdown) { + gpr_mu_unlock(&fd->mu); + GRPC_FD_UNREF(fd, "add fd"); + return; + } + gpr_mu_unlock(&fd->mu); + + ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET); + ev.data.ptr = fd; + err = epoll_ctl(pollset->epoll_fd, EPOLL_CTL_ADD, fd->fd, &ev); + if (err < 0) { + /* FDs may be added to a pollset multiple times, so EEXIST is normal. */ + if (errno != EEXIST) { + gpr_log(GPR_ERROR, "epoll_ctl add for %d failed: %s", fd->fd, + strerror(errno)); + } + } + + /* The fd might have been orphaned while we were adding it to the epoll set. + Close the fd in such a case (which will also take care of removing it from + the epoll set */ + gpr_mu_lock(&fd->mu); + if (fd_is_orphaned(fd) && !fd->closed) { + close_fd_locked(exec_ctx, fd); + } + gpr_mu_unlock(&fd->mu); + + GRPC_FD_UNREF(fd, "add fd"); +} + +/* Creates an epoll fd and initializes the pollset */ +/* TODO: This has to be called ONLY from pollset_init function. and hence it + * does not acquire any lock */ +static void multipoll_with_epoll_pollset_create_efd(grpc_pollset *pollset) { + struct epoll_event ev; + int err; + + pollset->epoll_fd = epoll_create1(EPOLL_CLOEXEC); + if (pollset->epoll_fd < 0) { + gpr_log(GPR_ERROR, "epoll_create1 failed: %s", strerror(errno)); + abort(); + } + add_epoll_fd_to_global_list(pollset->epoll_fd); + + ev.events = (uint32_t)(EPOLLIN | EPOLLET); + ev.data.ptr = NULL; + + err = epoll_ctl(pollset->epoll_fd, EPOLL_CTL_ADD, + GRPC_WAKEUP_FD_GET_READ_FD(&grpc_global_wakeup_fd), &ev); + if (err < 0) { + gpr_log(GPR_ERROR, "epoll_ctl add for %d failed: %s", + GRPC_WAKEUP_FD_GET_READ_FD(&grpc_global_wakeup_fd), + strerror(errno)); + } +} + +/* TODO(klempner): We probably want to turn this down a bit */ +#define GRPC_EPOLL_MAX_EVENTS 1000 + +static void multipoll_with_epoll_pollset_maybe_work_and_unlock( + grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_pollset_worker *worker, + gpr_timespec deadline, gpr_timespec now) { + struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS]; + int epoll_fd = pollset->epoll_fd; + int ep_rv; + int poll_rv; + int timeout_ms; + struct pollfd pfds[2]; + + /* If you want to ignore epoll's ability to sanely handle parallel pollers, + * for a more apples-to-apples performance comparison with poll, add a + * if (pollset->counter != 0) { return 0; } + * here. + */ + + gpr_mu_unlock(&pollset->mu); + + timeout_ms = poll_deadline_to_millis_timeout(deadline, now); + + pfds[0].fd = GRPC_WAKEUP_FD_GET_READ_FD(&worker->wakeup_fd->fd); + pfds[0].events = POLLIN; + pfds[0].revents = 0; + pfds[1].fd = epoll_fd; + pfds[1].events = POLLIN; + pfds[1].revents = 0; + + /* TODO(vpai): Consider first doing a 0 timeout poll here to avoid + even going into the blocking annotation if possible */ + GPR_TIMER_BEGIN("poll", 0); + GRPC_SCHEDULING_START_BLOCKING_REGION; + poll_rv = grpc_poll_function(pfds, 2, timeout_ms); + GRPC_SCHEDULING_END_BLOCKING_REGION; + GPR_TIMER_END("poll", 0); + + if (poll_rv < 0) { + if (errno != EINTR) { + gpr_log(GPR_ERROR, "poll() failed: %s", strerror(errno)); + } + } else if (poll_rv == 0) { + /* do nothing */ + } else { + if (pfds[0].revents) { + grpc_wakeup_fd_consume_wakeup(&worker->wakeup_fd->fd); + } + if (pfds[1].revents) { + do { + /* The following epoll_wait never blocks; it has a timeout of 0 */ + ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0); + if (ep_rv < 0) { + if (errno != EINTR) { + gpr_log(GPR_ERROR, "epoll_wait() failed: %s", strerror(errno)); + } + } else { + int i; + for (i = 0; i < ep_rv; ++i) { + grpc_fd *fd = ep_ev[i].data.ptr; + /* TODO(klempner): We might want to consider making err and pri + * separate events */ + int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP); + int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI); + int write_ev = ep_ev[i].events & EPOLLOUT; + if (fd == NULL) { + grpc_wakeup_fd_consume_wakeup(&grpc_global_wakeup_fd); + } else { + if (read_ev || cancel) { + fd_become_readable(exec_ctx, fd); + } + if (write_ev || cancel) { + fd_become_writable(exec_ctx, fd); + } + } + } + } + } while (ep_rv == GRPC_EPOLL_MAX_EVENTS); + } + } +} + +static void multipoll_with_epoll_pollset_finish_shutdown( + grpc_pollset *pollset) {} + +static void multipoll_with_epoll_pollset_destroy(grpc_pollset *pollset) { + close(pollset->epoll_fd); + remove_epoll_fd_from_global_list(pollset->epoll_fd); +} + +/******************************************************************************* + * pollset_set_posix.c + */ + +static grpc_pollset_set *pollset_set_create(void) { + grpc_pollset_set *pollset_set = gpr_malloc(sizeof(*pollset_set)); + memset(pollset_set, 0, sizeof(*pollset_set)); + gpr_mu_init(&pollset_set->mu); + return pollset_set; +} + +static void pollset_set_destroy(grpc_pollset_set *pollset_set) { + size_t i; + gpr_mu_destroy(&pollset_set->mu); + for (i = 0; i < pollset_set->fd_count; i++) { + GRPC_FD_UNREF(pollset_set->fds[i], "pollset_set"); + } + gpr_free(pollset_set->pollsets); + gpr_free(pollset_set->pollset_sets); + gpr_free(pollset_set->fds); + gpr_free(pollset_set); +} + +static void pollset_set_add_pollset(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pollset_set, + grpc_pollset *pollset) { + size_t i, j; + gpr_mu_lock(&pollset_set->mu); + if (pollset_set->pollset_count == pollset_set->pollset_capacity) { + pollset_set->pollset_capacity = + GPR_MAX(8, 2 * pollset_set->pollset_capacity); + pollset_set->pollsets = + gpr_realloc(pollset_set->pollsets, pollset_set->pollset_capacity * + sizeof(*pollset_set->pollsets)); + } + pollset_set->pollsets[pollset_set->pollset_count++] = pollset; + for (i = 0, j = 0; i < pollset_set->fd_count; i++) { + if (fd_is_orphaned(pollset_set->fds[i])) { + GRPC_FD_UNREF(pollset_set->fds[i], "pollset_set"); + } else { + pollset_add_fd(exec_ctx, pollset, pollset_set->fds[i]); + pollset_set->fds[j++] = pollset_set->fds[i]; + } + } + pollset_set->fd_count = j; + gpr_mu_unlock(&pollset_set->mu); +} + +static void pollset_set_del_pollset(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pollset_set, + grpc_pollset *pollset) { + size_t i; + gpr_mu_lock(&pollset_set->mu); + for (i = 0; i < pollset_set->pollset_count; i++) { + if (pollset_set->pollsets[i] == pollset) { + pollset_set->pollset_count--; + GPR_SWAP(grpc_pollset *, pollset_set->pollsets[i], + pollset_set->pollsets[pollset_set->pollset_count]); + break; + } + } + gpr_mu_unlock(&pollset_set->mu); +} + +static void pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *bag, + grpc_pollset_set *item) { + size_t i, j; + gpr_mu_lock(&bag->mu); + if (bag->pollset_set_count == bag->pollset_set_capacity) { + bag->pollset_set_capacity = GPR_MAX(8, 2 * bag->pollset_set_capacity); + bag->pollset_sets = + gpr_realloc(bag->pollset_sets, + bag->pollset_set_capacity * sizeof(*bag->pollset_sets)); + } + bag->pollset_sets[bag->pollset_set_count++] = item; + for (i = 0, j = 0; i < bag->fd_count; i++) { + if (fd_is_orphaned(bag->fds[i])) { + GRPC_FD_UNREF(bag->fds[i], "pollset_set"); + } else { + pollset_set_add_fd(exec_ctx, item, bag->fds[i]); + bag->fds[j++] = bag->fds[i]; + } + } + bag->fd_count = j; + gpr_mu_unlock(&bag->mu); +} + +static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *bag, + grpc_pollset_set *item) { + size_t i; + gpr_mu_lock(&bag->mu); + for (i = 0; i < bag->pollset_set_count; i++) { + if (bag->pollset_sets[i] == item) { + bag->pollset_set_count--; + GPR_SWAP(grpc_pollset_set *, bag->pollset_sets[i], + bag->pollset_sets[bag->pollset_set_count]); + break; + } + } + gpr_mu_unlock(&bag->mu); +} + +static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pollset_set, grpc_fd *fd) { + size_t i; + gpr_mu_lock(&pollset_set->mu); + if (pollset_set->fd_count == pollset_set->fd_capacity) { + pollset_set->fd_capacity = GPR_MAX(8, 2 * pollset_set->fd_capacity); + pollset_set->fds = gpr_realloc( + pollset_set->fds, pollset_set->fd_capacity * sizeof(*pollset_set->fds)); + } + GRPC_FD_REF(fd, "pollset_set"); + pollset_set->fds[pollset_set->fd_count++] = fd; + for (i = 0; i < pollset_set->pollset_count; i++) { + pollset_add_fd(exec_ctx, pollset_set->pollsets[i], fd); + } + for (i = 0; i < pollset_set->pollset_set_count; i++) { + pollset_set_add_fd(exec_ctx, pollset_set->pollset_sets[i], fd); + } + gpr_mu_unlock(&pollset_set->mu); +} + +static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pollset_set, grpc_fd *fd) { + size_t i; + gpr_mu_lock(&pollset_set->mu); + for (i = 0; i < pollset_set->fd_count; i++) { + if (pollset_set->fds[i] == fd) { + pollset_set->fd_count--; + GPR_SWAP(grpc_fd *, pollset_set->fds[i], + pollset_set->fds[pollset_set->fd_count]); + GRPC_FD_UNREF(fd, "pollset_set"); + break; + } + } + for (i = 0; i < pollset_set->pollset_set_count; i++) { + pollset_set_del_fd(exec_ctx, pollset_set->pollset_sets[i], fd); + } + gpr_mu_unlock(&pollset_set->mu); +} + +/******************************************************************************* + * event engine binding + */ + +static void shutdown_engine(void) { + fd_global_shutdown(); + pollset_global_shutdown(); +} + +static const grpc_event_engine_vtable vtable = { + .pollset_size = sizeof(grpc_pollset), + + .fd_create = fd_create, + .fd_wrapped_fd = fd_wrapped_fd, + .fd_orphan = fd_orphan, + .fd_shutdown = fd_shutdown, + .fd_notify_on_read = fd_notify_on_read, + .fd_notify_on_write = fd_notify_on_write, + + .pollset_init = pollset_init, + .pollset_shutdown = pollset_shutdown, + .pollset_reset = pollset_reset, + .pollset_destroy = pollset_destroy, + .pollset_work = pollset_work, + .pollset_kick = pollset_kick, + .pollset_add_fd = pollset_add_fd, + + .pollset_set_create = pollset_set_create, + .pollset_set_destroy = pollset_set_destroy, + .pollset_set_add_pollset = pollset_set_add_pollset, + .pollset_set_del_pollset = pollset_set_del_pollset, + .pollset_set_add_pollset_set = pollset_set_add_pollset_set, + .pollset_set_del_pollset_set = pollset_set_del_pollset_set, + .pollset_set_add_fd = pollset_set_add_fd, + .pollset_set_del_fd = pollset_set_del_fd, + + .kick_poller = kick_poller, + + .shutdown_engine = shutdown_engine, +}; + +const grpc_event_engine_vtable *grpc_init_epoll_linux(void) { + fd_global_init(); + pollset_global_init(); + polling_island_global_init(); + return &vtable; +} + +#endif diff --git a/src/core/lib/iomgr/ev_epoll_linux.h b/src/core/lib/iomgr/ev_epoll_linux.h new file mode 100644 index 0000000000..8c819975a4 --- /dev/null +++ b/src/core/lib/iomgr/ev_epoll_linux.h @@ -0,0 +1,41 @@ +/* + * + * Copyright 2015, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef GRPC_CORE_LIB_IOMGR_EV_EPOLL_LINUX_H +#define GRPC_CORE_LIB_IOMGR_EV_EPOLL_LINUX_H + +#include "src/core/lib/iomgr/ev_posix.h" + +const grpc_event_engine_vtable *grpc_init_epoll_linux(void); + +#endif /* GRPC_CORE_LIB_IOMGR_EV_EPOLL_LINUX_H */ diff --git a/src/core/lib/iomgr/ev_epoll_posix.c b/src/core/lib/iomgr/ev_epoll_posix.c index 4481bab438..5abd5b2a94 100644 --- a/src/core/lib/iomgr/ev_epoll_posix.c +++ b/src/core/lib/iomgr/ev_epoll_posix.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -51,11 +52,13 @@ #include #include +#include "src/core/lib/iomgr/ev_posix.h" #include "src/core/lib/iomgr/iomgr_internal.h" #include "src/core/lib/iomgr/wakeup_fd_posix.h" #include "src/core/lib/profiling/timers.h" #include "src/core/lib/support/block_annotate.h" + /******************************************************************************* * FD declarations */ @@ -133,10 +136,9 @@ struct grpc_pollset { int called_shutdown; int kicked_without_pollers; grpc_closure *shutdown_done; - union { - int fd; - void *ptr; - } data; + + int epoll_fd; + /* Local cache of eventfds for workers */ grpc_cached_wakeup_fd *local_wakeup_cache; }; @@ -589,7 +591,6 @@ static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { pollset->local_wakeup_cache = NULL; pollset->kicked_without_pollers = 0; - pollset->data.ptr = NULL; multipoll_with_epoll_pollset_create_efd(pollset); } @@ -619,22 +620,6 @@ static void pollset_reset(grpc_pollset *pollset) { pollset->kicked_without_pollers = 0; } -/* TODO (sreek): Remove multipoll_with_epoll_add_fd declaration*/ -static void multipoll_with_epoll_pollset_add_fd(grpc_exec_ctx *exec_ctx, - grpc_pollset *pollset, - grpc_fd *fd); - -static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_fd *fd) { - /* TODO (sreek) - Does reading pollset->data.ptr need pollset->mu lock ? - * because finally_add_fd() also reads it but without the lock! */ - gpr_mu_lock(&pollset->mu); - GPR_ASSERT(pollset->data.ptr != NULL); - gpr_mu_unlock(&pollset->mu); - - multipoll_with_epoll_pollset_add_fd(exec_ctx, pollset, fd); -} - /* TODO (sreek): Remove multipoll_with_epoll_finish_shutdown() declaration */ static void multipoll_with_epoll_pollset_finish_shutdown(grpc_pollset *pollset); @@ -790,20 +775,6 @@ static int poll_deadline_to_millis_timeout(gpr_timespec deadline, * pollset_multipoller_with_epoll_posix.c */ -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "src/core/lib/iomgr/ev_posix.h" -#include "src/core/lib/profiling/timers.h" -#include "src/core/lib/support/block_annotate.h" - static void set_ready(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_closure **st) { /* only one set_ready can be active at once (but there may be a racing notify_on) */ @@ -879,13 +850,13 @@ static void remove_fd_from_all_epoll_sets(int fd) { gpr_mu_unlock(&epoll_fd_list_mu); } -typedef struct { int epoll_fd; } epoll_hdr; - -static void finally_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, +/* TODO: sreek - This function multipoll_with_epoll_pollset_add_fd() and + * finally_add_fd() in ev_poll_and_epoll_posix.c */ +static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_fd *fd) { - /*TODO: (sree) Shouldn't this read (pollset->data.ptr) be done under a - pollset lock - i.e pollset->mu ? */ - epoll_hdr *h = pollset->data.ptr; + + /* TODO sreek - Check if we need to get a pollset->mu lock here */ + struct epoll_event ev; int err; @@ -908,7 +879,7 @@ static void finally_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET); ev.data.ptr = fd; - err = epoll_ctl(h->epoll_fd, EPOLL_CTL_ADD, fd->fd, &ev); + err = epoll_ctl(pollset->epoll_fd, EPOLL_CTL_ADD, fd->fd, &ev); if (err < 0) { /* FDs may be added to a pollset multiple times, so EEXIST is normal. */ if (errno != EEXIST) { @@ -933,26 +904,20 @@ static void finally_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, /* TODO: This has to be called ONLY from pollset_init function. and hence it * does not acquire any lock */ static void multipoll_with_epoll_pollset_create_efd(grpc_pollset *pollset) { - epoll_hdr *h = gpr_malloc(sizeof(epoll_hdr)); struct epoll_event ev; int err; - /* TODO (sreek). remove this assert. Currently added this just to ensure that - * we do not overwrite h->epoll_fd without freeing the older one*/ - GPR_ASSERT(pollset->data.ptr == NULL); - - pollset->data.ptr = h; - h->epoll_fd = epoll_create1(EPOLL_CLOEXEC); - if (h->epoll_fd < 0) { + pollset->epoll_fd = epoll_create1(EPOLL_CLOEXEC); + if (pollset->epoll_fd < 0) { gpr_log(GPR_ERROR, "epoll_create1 failed: %s", strerror(errno)); abort(); } - add_epoll_fd_to_global_list(h->epoll_fd); + add_epoll_fd_to_global_list(pollset->epoll_fd); ev.events = (uint32_t)(EPOLLIN | EPOLLET); ev.data.ptr = NULL; - err = epoll_ctl(h->epoll_fd, EPOLL_CTL_ADD, + err = epoll_ctl(pollset->epoll_fd, EPOLL_CTL_ADD, GRPC_WAKEUP_FD_GET_READ_FD(&grpc_global_wakeup_fd), &ev); if (err < 0) { gpr_log(GPR_ERROR, "epoll_ctl add for %d failed: %s", @@ -961,12 +926,6 @@ static void multipoll_with_epoll_pollset_create_efd(grpc_pollset *pollset) { } } -static void multipoll_with_epoll_pollset_add_fd(grpc_exec_ctx *exec_ctx, - grpc_pollset *pollset, - grpc_fd *fd) { - finally_add_fd(exec_ctx, pollset, fd); -} - /* TODO(klempner): We probably want to turn this down a bit */ #define GRPC_EPOLL_MAX_EVENTS 1000 @@ -974,9 +933,9 @@ static void multipoll_with_epoll_pollset_maybe_work_and_unlock( grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_pollset_worker *worker, gpr_timespec deadline, gpr_timespec now) { struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS]; + int epoll_fd = pollset->epoll_fd; int ep_rv; int poll_rv; - epoll_hdr *h = pollset->data.ptr; int timeout_ms; struct pollfd pfds[2]; @@ -993,7 +952,7 @@ static void multipoll_with_epoll_pollset_maybe_work_and_unlock( pfds[0].fd = GRPC_WAKEUP_FD_GET_READ_FD(&worker->wakeup_fd->fd); pfds[0].events = POLLIN; pfds[0].revents = 0; - pfds[1].fd = h->epoll_fd; + pfds[1].fd = epoll_fd; pfds[1].events = POLLIN; pfds[1].revents = 0; @@ -1018,7 +977,7 @@ static void multipoll_with_epoll_pollset_maybe_work_and_unlock( if (pfds[1].revents) { do { /* The following epoll_wait never blocks; it has a timeout of 0 */ - ep_rv = epoll_wait(h->epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0); + ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0); if (ep_rv < 0) { if (errno != EINTR) { gpr_log(GPR_ERROR, "epoll_wait() failed: %s", strerror(errno)); @@ -1053,10 +1012,8 @@ static void multipoll_with_epoll_pollset_finish_shutdown( grpc_pollset *pollset) {} static void multipoll_with_epoll_pollset_destroy(grpc_pollset *pollset) { - epoll_hdr *h = pollset->data.ptr; - close(h->epoll_fd); - remove_epoll_fd_from_global_list(h->epoll_fd); - gpr_free(h); + close(pollset->epoll_fd); + remove_epoll_fd_from_global_list(pollset->epoll_fd); } /******************************************************************************* diff --git a/src/core/lib/iomgr/ev_posix.c b/src/core/lib/iomgr/ev_posix.c index baa3b9856a..404ef2a64b 100644 --- a/src/core/lib/iomgr/ev_posix.c +++ b/src/core/lib/iomgr/ev_posix.c @@ -44,7 +44,7 @@ #include #include -#include "src/core/lib/iomgr/ev_epoll_posix.h" +#include "src/core/lib/iomgr/ev_epoll_linux.h" #include "src/core/lib/iomgr/ev_poll_posix.h" #include "src/core/lib/support/env.h" @@ -163,11 +163,6 @@ void grpc_fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd, g_event_engine->fd_notify_on_write(exec_ctx, fd, closure); } -grpc_pollset *grpc_fd_get_read_notifier_pollset(grpc_exec_ctx *exec_ctx, - grpc_fd *fd) { - return g_event_engine->fd_get_read_notifier_pollset(exec_ctx, fd); -} - size_t grpc_pollset_size(void) { return g_event_engine->pollset_size; } void grpc_pollset_init(grpc_pollset *pollset, gpr_mu **mu) { diff --git a/src/python/grpcio/grpc_core_dependencies.py b/src/python/grpcio/grpc_core_dependencies.py index 49b4ddc457..13bc6888d6 100644 --- a/src/python/grpcio/grpc_core_dependencies.py +++ b/src/python/grpcio/grpc_core_dependencies.py @@ -94,6 +94,7 @@ CORE_SOURCE_FILES = [ 'src/core/lib/iomgr/endpoint.c', 'src/core/lib/iomgr/endpoint_pair_posix.c', 'src/core/lib/iomgr/endpoint_pair_windows.c', + 'src/core/lib/iomgr/ev_epoll_linux.c', 'src/core/lib/iomgr/ev_epoll_posix.c', 'src/core/lib/iomgr/ev_poll_posix.c', 'src/core/lib/iomgr/ev_posix.c', diff --git a/tools/doxygen/Doxyfile.core.internal b/tools/doxygen/Doxyfile.core.internal index 36b25cca16..d968278f2a 100644 --- a/tools/doxygen/Doxyfile.core.internal +++ b/tools/doxygen/Doxyfile.core.internal @@ -807,6 +807,7 @@ src/core/lib/http/parser.h \ src/core/lib/iomgr/closure.h \ src/core/lib/iomgr/endpoint.h \ src/core/lib/iomgr/endpoint_pair.h \ +src/core/lib/iomgr/ev_epoll_linux.h \ src/core/lib/iomgr/ev_epoll_posix.h \ src/core/lib/iomgr/ev_poll_posix.h \ src/core/lib/iomgr/ev_posix.h \ @@ -955,6 +956,7 @@ src/core/lib/iomgr/closure.c \ src/core/lib/iomgr/endpoint.c \ src/core/lib/iomgr/endpoint_pair_posix.c \ src/core/lib/iomgr/endpoint_pair_windows.c \ +src/core/lib/iomgr/ev_epoll_linux.c \ src/core/lib/iomgr/ev_epoll_posix.c \ src/core/lib/iomgr/ev_poll_posix.c \ src/core/lib/iomgr/ev_posix.c \ diff --git a/tools/run_tests/sources_and_headers.json b/tools/run_tests/sources_and_headers.json index 4394049586..97cc55db36 100644 --- a/tools/run_tests/sources_and_headers.json +++ b/tools/run_tests/sources_and_headers.json @@ -5530,6 +5530,7 @@ "src/core/lib/iomgr/closure.h", "src/core/lib/iomgr/endpoint.h", "src/core/lib/iomgr/endpoint_pair.h", + "src/core/lib/iomgr/ev_epoll_linux.h", "src/core/lib/iomgr/ev_epoll_posix.h", "src/core/lib/iomgr/ev_poll_posix.h", "src/core/lib/iomgr/ev_posix.h", @@ -5630,6 +5631,8 @@ "src/core/lib/iomgr/endpoint_pair.h", "src/core/lib/iomgr/endpoint_pair_posix.c", "src/core/lib/iomgr/endpoint_pair_windows.c", + "src/core/lib/iomgr/ev_epoll_linux.c", + "src/core/lib/iomgr/ev_epoll_linux.h", "src/core/lib/iomgr/ev_epoll_posix.c", "src/core/lib/iomgr/ev_epoll_posix.h", "src/core/lib/iomgr/ev_poll_posix.c", diff --git a/vsprojects/vcxproj/grpc/grpc.vcxproj b/vsprojects/vcxproj/grpc/grpc.vcxproj index 55304af586..a67e4d16da 100644 --- a/vsprojects/vcxproj/grpc/grpc.vcxproj +++ b/vsprojects/vcxproj/grpc/grpc.vcxproj @@ -316,6 +316,7 @@ + @@ -484,6 +485,8 @@ + + diff --git a/vsprojects/vcxproj/grpc/grpc.vcxproj.filters b/vsprojects/vcxproj/grpc/grpc.vcxproj.filters index 7d1c90fda7..bf9b7dc7dc 100644 --- a/vsprojects/vcxproj/grpc/grpc.vcxproj.filters +++ b/vsprojects/vcxproj/grpc/grpc.vcxproj.filters @@ -55,6 +55,9 @@ src\core\lib\iomgr + + src\core\lib\iomgr + src\core\lib\iomgr @@ -677,6 +680,9 @@ src\core\lib\iomgr + + src\core\lib\iomgr + src\core\lib\iomgr diff --git a/vsprojects/vcxproj/grpc_unsecure/grpc_unsecure.vcxproj b/vsprojects/vcxproj/grpc_unsecure/grpc_unsecure.vcxproj index 3d0cdfc668..afc9a2ca1b 100644 --- a/vsprojects/vcxproj/grpc_unsecure/grpc_unsecure.vcxproj +++ b/vsprojects/vcxproj/grpc_unsecure/grpc_unsecure.vcxproj @@ -304,6 +304,7 @@ + @@ -450,6 +451,8 @@ + + diff --git a/vsprojects/vcxproj/grpc_unsecure/grpc_unsecure.vcxproj.filters b/vsprojects/vcxproj/grpc_unsecure/grpc_unsecure.vcxproj.filters index d2ff4c630f..b7507f9a96 100644 --- a/vsprojects/vcxproj/grpc_unsecure/grpc_unsecure.vcxproj.filters +++ b/vsprojects/vcxproj/grpc_unsecure/grpc_unsecure.vcxproj.filters @@ -58,6 +58,9 @@ src\core\lib\iomgr + + src\core\lib\iomgr + src\core\lib\iomgr @@ -575,6 +578,9 @@ src\core\lib\iomgr + + src\core\lib\iomgr + src\core\lib\iomgr -- cgit v1.2.3 From 9442bab5d303d7bd33e9406129ad897588d07111 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Fri, 20 May 2016 17:54:06 -0700 Subject: Write most of the methods in the new epoll implementation --- src/core/lib/iomgr/ev_epoll_linux.c | 301 +++++++++++++++++++++++++++++------- 1 file changed, 244 insertions(+), 57 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index f257ac8a1d..0d30bb659b 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -150,28 +150,84 @@ typedef struct polling_island { static gpr_mu g_pi_freelist_mu; static polling_island *g_pi_freelist = NULL; -/* TODO: sreek - Should we hold a lock on fd or add a ref to the fd ? */ -static void add_fd_to_polling_island_locked(polling_island *pi, grpc_fd *fd) { +/* TODO: sreek - Should we hold a lock on fd or add a ref to the fd ? + * TODO: sreek - Should this add a ref to the grpc_fd ? */ +/* The caller is expected to hold pi->mu lock before calling this function */ +static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds, + size_t fd_count) { int err; + size_t i; struct epoll_event ev; - ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET); - ev.data.ptr = fd; - err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, fd->fd, &ev); + for (i = 0; i < fd_count; i++) { + ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET); + ev.data.ptr = fds[i]; + err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, fds[i]->fd, &ev); + + if (err < 0 && errno != EEXIST) { + gpr_log(GPR_ERROR, "epoll_ctl add for fd: %d failed with error: %s", + fds[i]->fd, strerror(errno)); + /* TODO: sreek - Not sure if it is a good idea to continue here. We need a + * better way to bubble up this error instead of doing an abort() */ + continue; + } - if (err < 0 && errno != EEXIST) { - gpr_log(GPR_ERROR, "epoll_ctl add for fd: %d failed with error: %s", fd->fd, - strerror(errno)); - return; + if (pi->fd_cnt == pi->fd_capacity) { + pi->fd_capacity = GPR_MAX(pi->fd_capacity + 8, pi->fd_cnt * 3 / 2); + pi->fds = gpr_realloc(pi->fds, sizeof(grpc_fd *) * pi->fd_capacity); + } + + pi->fds[pi->fd_cnt++] = fds[i]; + } +} + +/* TODO: sreek - Should we hold a lock on fd or add a ref to the fd ? + * TODO: sreek - Might have to unref the fds (assuming whether we add a ref to + * the fd when adding it to the epollset) */ +/* The caller is expected to hold pi->mu lock before calling this function */ +static void polling_island_clear_fds_locked(polling_island *pi) { + int err; + size_t i; + + for (i = 0; i < pi->fd_cnt; i++) { + err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, pi->fds[i]->fd, NULL); + + if (err < 0 && errno != ENOENT) { + gpr_log(GPR_ERROR, + "epoll_ctl delete for fds[i]: %d failed with error: %s", i, + pi->fds[i]->fd, strerror(errno)); + /* TODO: sreek - Not sure if it is a good idea to continue here. We need a + * better way to bubble up this error instead of doing an abort() */ + continue; + } + } + + pi->fd_cnt = 0; +} + +/* TODO: sreek - Should we hold a lock on fd or add a ref to the fd ? + * TODO: sreek - Might have to unref the fd (assuming whether we add a ref to + * the fd when adding it to the epollset) */ +/* The caller is expected to hold pi->mu lock before calling this function */ +static void polling_island_remove_fd_locked(polling_island *pi, grpc_fd *fd) { + int err; + size_t i; + err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, fd->fd, NULL); + if (err < 0 && errno != ENOENT) { + gpr_log(GPR_ERROR, "epoll_ctl delete for fd: %d failed with error; %s", + fd->fd, strerror(errno)); } - pi->fd_capacity = GPR_MAX(pi->fd_capacity + 8, pi->fd_cnt * 3 / 2); - pi->fds = gpr_realloc(pi->fds, sizeof(grpc_fd *) * pi->fd_capacity); - pi->fds[pi->fd_cnt++] = fd; + for (i = 0; i < pi->fd_cnt; i++) { + if (pi->fds[i] == fd) { + pi->fds[i] = pi->fds[--pi->fd_cnt]; + break; + } + } } -static polling_island *polling_island_create(int initial_ref_cnt, - grpc_fd *initial_fd) { +static polling_island *polling_island_create(grpc_fd *initial_fd, + int initial_ref_cnt) { polling_island *pi = NULL; gpr_mu_lock(&g_pi_freelist_mu); if (g_pi_freelist != NULL) { @@ -202,17 +258,151 @@ static polling_island *polling_island_create(int initial_ref_cnt, pi->next_free = NULL; if (initial_fd != NULL) { - /* add_fd_to_polling_island_locked() expects the caller to hold a pi->mu + /* polling_island_add_fds_locked() expects the caller to hold a pi->mu * lock. However, since this is a new polling island (and no one has a * reference to it yet), it is okay to not acquire pi->mu here */ - add_fd_to_polling_island_locked(pi, initial_fd); + polling_island_add_fds_locked(pi, &initial_fd, 1); } return pi; } +static void polling_island_delete(polling_island *pi) { + GPR_ASSERT(pi->ref_cnt == 0); + GPR_ASSERT(pi->fd_cnt == 0); + + pi->merged_to = NULL; + + gpr_mu_lock(&g_pi_freelist_mu); + pi->next_free = g_pi_freelist; + g_pi_freelist = pi; + gpr_mu_unlock(&g_pi_freelist_mu); +} + +void polling_island_unref_and_unlock(polling_island *pi, int unref_by) { + pi->ref_cnt -= unref_by; + int ref_cnt = pi->ref_cnt; + GPR_ASSERT(ref_cnt >= 0); + + gpr_mu_unlock(&pi->mu); + + if (ref_cnt == 0) { + polling_island_delete(pi); + } +} + +polling_island *polling_island_update_and_lock(polling_island *pi, int unref_by, + int add_ref_by) { + polling_island *next = NULL; + gpr_mu_lock(&pi->mu); + while (pi->merged_to != NULL) { + next = pi->merged_to; + polling_island_unref_and_unlock(pi, unref_by); + pi = next; + gpr_mu_lock(&pi->mu); + } + + pi->ref_cnt += add_ref_by; + return pi; +} + +void polling_island_pair_update_and_lock(polling_island **p, + polling_island **q) { + polling_island *pi_1 = *p; + polling_island *pi_2 = *q; + polling_island *temp = NULL; + bool pi_1_locked = false; + bool pi_2_locked = false; + int num_swaps = 0; + + while (pi_1 != pi_2 && !(pi_1_locked && pi_2_locked)) { + // pi_1 is NOT equal to pi_2 + // pi_1 MAY be locked + + if (pi_1 > pi_2) { + if (pi_1_locked) { + gpr_mu_unlock(&pi_1->mu); + pi_1_locked = false; + } + + GPR_SWAP(polling_island *, pi_1, pi_2); + num_swaps++; + } + + // p1 < p2 + // p1 MAY BE locked + // p2 is NOT locked + + if (!pi_1_locked) { + gpr_mu_lock(&pi_1->mu); + pi_1_locked = true; + + if (pi_1->merged_to != NULL) { + temp = pi_1->merged_to; + polling_island_unref_and_unlock(pi_1, 1); + pi_1 = temp; + pi_1_locked = false; + + continue; + } + } + + // p1 is LOCKED + // p2 is UNLOCKED + // p1 != p2 + + gpr_mu_lock(&pi_2->mu); + pi_2_locked = true; + + if (pi_2->merged_to != NULL) { + temp = pi_2->merged_to; + polling_island_unref_and_unlock(pi_2, 1); + pi_2 = temp; + pi_2_locked = false; + } + } + + // Either pi_1 == pi_2 OR we got both locks! + if (pi_1 == pi_2) { + GPR_ASSERT(pi_1_locked || (!pi_1_locked && !pi_2_locked)); + if (!pi_1_locked) { + pi_1 = pi_2 = polling_island_update_and_lock(pi_1, 2, 0); + } + } else { + GPR_ASSERT(pi_1_locked && pi_2_locked); + if (num_swaps % 2 > 0) { + GPR_SWAP(polling_island *, pi_1, pi_2); + } + } + + *p = pi_1; + *q = pi_2; +} + +polling_island *polling_island_merge(polling_island *p, polling_island *q) { + polling_island *merged = NULL; + + polling_island_pair_update_and_lock(&p, &q); + + /* TODO: sreek: Think about this scenario some more. Is it possible ?. what + * does it mean, when would this happen */ + if (p == q) { + merged = p; + } + + // Move all the fds from polling_island p to polling_island q + polling_island_add_fds_locked(q, p->fds, p->fd_cnt); + polling_island_clear_fds_locked(p); + + q->ref_cnt += p->ref_cnt; + + gpr_mu_unlock(&p->mu); + gpr_mu_unlock(&q->mu); + + return merged; +} + static void polling_island_global_init() { - polling_island_create(0, NULL); /* TODO(sreek): Delete this line */ gpr_mu_init(&g_pi_freelist_mu); g_pi_freelist = NULL; } @@ -245,7 +435,7 @@ struct grpc_pollset { int epoll_fd; - /* Mutex protecting the 'polling_island' field */ + /* Mutex protecting the 'polling_island' field */ gpr_mu pi_mu; /* The polling island to which this fd belongs to. An fd belongs to exactly @@ -319,7 +509,8 @@ struct grpc_pollset_set { * fd_posix.c */ -/* We need to keep a freelist not because of any concerns of malloc performance +/* We need to keep a freelist not because of any concerns of malloc + * performance * but instead so that implementations with multiple threads in (for example) * epoll_wait deal with the race between pollset removal and incoming poll * notifications. @@ -434,6 +625,7 @@ static void fd_global_shutdown(void) { static grpc_fd *fd_create(int fd, const char *name) { grpc_fd *r = alloc_fd(fd); + char *name2; gpr_asprintf(&name2, "%s fd=%d", name, fd); grpc_iomgr_register_object(&r->iomgr_object, name2); @@ -453,6 +645,20 @@ static void close_fd_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { if (!fd->released) { close(fd->fd); } else { + /* TODO: sreek - Check for deadlocks */ + + gpr_mu_lock(&fd->pi_mu); + fd->polling_island = + polling_island_update_and_lock(fd->polling_island, 1, 0); + + polling_island_remove_fd_locked(fd->polling_island, fd); + polling_island_unref_and_unlock(fd->polling_island, 1); + + fd->polling_island = NULL; + gpr_mu_unlock(&fd->pi_mu); + + + /* TODO: sreek - This should be no longer needed */ remove_fd_from_all_epoll_sets(fd->fd); } grpc_exec_ctx_enqueue(exec_ctx, fd->on_done_closure, true, NULL); @@ -752,7 +958,8 @@ static void finish_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) { grpc_exec_ctx_enqueue(exec_ctx, pollset->shutdown_done, true, NULL); } -/* TODO(sreek): Remove multipoll_with_epoll_*_maybe_work_and_unlock declaration +/* TODO(sreek): Remove multipoll_with_epoll_*_maybe_work_and_unlock + * declaration */ static void multipoll_with_epoll_pollset_maybe_work_and_unlock( grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_pollset_worker *worker, @@ -979,50 +1186,30 @@ static void remove_fd_from_all_epoll_sets(int fd) { * finally_add_fd() in ev_poll_and_epoll_posix.c */ static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_fd *fd) { - /* TODO sreek - Check if we need to get a pollset->mu lock here */ + gpr_mu_lock(&pollset->pi_mu); + gpr_mu_lock(&fd->pi_mu); - struct epoll_event ev; - int err; - - /* Hold a ref to the fd to keep it from being closed during the add. This may - result in a spurious wakeup being assigned to this pollset whilst adding, - but that should be benign. */ - /* TODO: (sreek): Understand how a spurious wake up migh be assinged to this - * pollset..and how holding a reference will prevent the fd from being closed - * (and perhaps more importantly, see how can an fd be closed while being - * added to the epollset */ - GRPC_FD_REF(fd, "add fd"); + polling_island *pi_new = NULL; - gpr_mu_lock(&fd->mu); - if (fd->shutdown) { - gpr_mu_unlock(&fd->mu); - GRPC_FD_UNREF(fd, "add fd"); - return; - } - gpr_mu_unlock(&fd->mu); - - ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET); - ev.data.ptr = fd; - err = epoll_ctl(pollset->epoll_fd, EPOLL_CTL_ADD, fd->fd, &ev); - if (err < 0) { - /* FDs may be added to a pollset multiple times, so EEXIST is normal. */ - if (errno != EEXIST) { - gpr_log(GPR_ERROR, "epoll_ctl add for %d failed: %s", fd->fd, - strerror(errno)); + if (fd->polling_island == pollset->polling_island) { + pi_new = fd->polling_island; + if (pi_new == NULL) { + pi_new = polling_island_create(fd, 2); } - } + } else if (fd->polling_island == NULL) { + pi_new = polling_island_update_and_lock(pollset->polling_island, 1, 1); - /* The fd might have been orphaned while we were adding it to the epoll set. - Close the fd in such a case (which will also take care of removing it from - the epoll set */ - gpr_mu_lock(&fd->mu); - if (fd_is_orphaned(fd) && !fd->closed) { - close_fd_locked(exec_ctx, fd); + } else if (pollset->polling_island == NULL) { + pi_new = polling_island_update_and_lock(fd->polling_island, 1, 1); + } else { // Non null and different + pi_new = polling_island_merge(fd->polling_island, pollset->polling_island); } - gpr_mu_unlock(&fd->mu); - GRPC_FD_UNREF(fd, "add fd"); + fd->polling_island = pollset->polling_island = pi_new; + + gpr_mu_unlock(&fd->pi_mu); + gpr_mu_unlock(&pollset->pi_mu); } /* Creates an epoll fd and initializes the pollset */ -- cgit v1.2.3 From d806145573f8e78a52012b4b8ab94ff46b855d58 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Fri, 20 May 2016 18:12:30 -0700 Subject: Removed epoll_fd_global_list --- src/core/lib/iomgr/ev_epoll_linux.c | 68 +------------------------------------ 1 file changed, 1 insertion(+), 67 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index 0d30bb659b..7793a95201 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -483,8 +483,6 @@ typedef void (*platform_become_multipoller_type)(grpc_exec_ctx *exec_ctx, * be locked) */ static int pollset_has_workers(grpc_pollset *pollset); -static void remove_fd_from_all_epoll_sets(int fd); - /******************************************************************************* * pollset_set definitions */ @@ -656,11 +654,8 @@ static void close_fd_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { fd->polling_island = NULL; gpr_mu_unlock(&fd->pi_mu); - - - /* TODO: sreek - This should be no longer needed */ - remove_fd_from_all_epoll_sets(fd->fd); } + grpc_exec_ctx_enqueue(exec_ctx, fd->on_done_closure, true, NULL); } @@ -1123,65 +1118,6 @@ static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { set_ready(exec_ctx, fd, &fd->write_closure); } -/* TODO (sreek): Maybe this global list is not required. Double check*/ -struct epoll_fd_list { - int *epoll_fds; - size_t count; - size_t capacity; -}; - -static struct epoll_fd_list epoll_fd_global_list; -static gpr_once init_epoll_fd_list_mu = GPR_ONCE_INIT; -static gpr_mu epoll_fd_list_mu; - -static void init_mu(void) { gpr_mu_init(&epoll_fd_list_mu); } - -static void add_epoll_fd_to_global_list(int epoll_fd) { - gpr_once_init(&init_epoll_fd_list_mu, init_mu); - - gpr_mu_lock(&epoll_fd_list_mu); - if (epoll_fd_global_list.count == epoll_fd_global_list.capacity) { - epoll_fd_global_list.capacity = - GPR_MAX((size_t)8, epoll_fd_global_list.capacity * 2); - epoll_fd_global_list.epoll_fds = - gpr_realloc(epoll_fd_global_list.epoll_fds, - epoll_fd_global_list.capacity * sizeof(int)); - } - epoll_fd_global_list.epoll_fds[epoll_fd_global_list.count++] = epoll_fd; - gpr_mu_unlock(&epoll_fd_list_mu); -} - -static void remove_epoll_fd_from_global_list(int epoll_fd) { - gpr_mu_lock(&epoll_fd_list_mu); - GPR_ASSERT(epoll_fd_global_list.count > 0); - for (size_t i = 0; i < epoll_fd_global_list.count; i++) { - if (epoll_fd == epoll_fd_global_list.epoll_fds[i]) { - epoll_fd_global_list.epoll_fds[i] = - epoll_fd_global_list.epoll_fds[--(epoll_fd_global_list.count)]; - break; - } - } - gpr_mu_unlock(&epoll_fd_list_mu); -} - -static void remove_fd_from_all_epoll_sets(int fd) { - int err; - gpr_once_init(&init_epoll_fd_list_mu, init_mu); - gpr_mu_lock(&epoll_fd_list_mu); - if (epoll_fd_global_list.count == 0) { - gpr_mu_unlock(&epoll_fd_list_mu); - return; - } - for (size_t i = 0; i < epoll_fd_global_list.count; i++) { - err = epoll_ctl(epoll_fd_global_list.epoll_fds[i], EPOLL_CTL_DEL, fd, NULL); - if (err < 0 && errno != ENOENT) { - gpr_log(GPR_ERROR, "epoll_ctl del for %d failed: %s", fd, - strerror(errno)); - } - } - gpr_mu_unlock(&epoll_fd_list_mu); -} - /* TODO: sreek - This function multipoll_with_epoll_pollset_add_fd() and * finally_add_fd() in ev_poll_and_epoll_posix.c */ static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, @@ -1224,7 +1160,6 @@ static void multipoll_with_epoll_pollset_create_efd(grpc_pollset *pollset) { gpr_log(GPR_ERROR, "epoll_create1 failed: %s", strerror(errno)); abort(); } - add_epoll_fd_to_global_list(pollset->epoll_fd); ev.events = (uint32_t)(EPOLLIN | EPOLLET); ev.data.ptr = NULL; @@ -1325,7 +1260,6 @@ static void multipoll_with_epoll_pollset_finish_shutdown( static void multipoll_with_epoll_pollset_destroy(grpc_pollset *pollset) { close(pollset->epoll_fd); - remove_epoll_fd_from_global_list(pollset->epoll_fd); } /******************************************************************************* -- cgit v1.2.3 From 96b2554313120949dd26e3c0968e8aea9b8a650f Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Fri, 20 May 2016 18:14:48 -0700 Subject: ctiller's ev_epoll_linux.c file (for reference) --- src/core/lib/iomgr/ctiller_ev_epoll_linux.c | 461 ++++++++++++++++++++++++++++ 1 file changed, 461 insertions(+) create mode 100644 src/core/lib/iomgr/ctiller_ev_epoll_linux.c (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ctiller_ev_epoll_linux.c b/src/core/lib/iomgr/ctiller_ev_epoll_linux.c new file mode 100644 index 0000000000..23c20a77aa --- /dev/null +++ b/src/core/lib/iomgr/ctiller_ev_epoll_linux.c @@ -0,0 +1,461 @@ +/* + * + * Copyright 2015-2016, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "src/core/lib/iomgr/ev_epoll_linux.h" + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "src/core/lib/iomgr/iomgr_internal.h" + +/* TODO(sreek) Remove this file */ + + +//////////////////////////////////////////////////////////////////////////////// +// Definitions + +#define STATE_NOT_READY ((gpr_atm)0) +#define STATE_READY ((gpr_atm)1) + +typedef enum { POLLABLE_FD, POLLABLE_EPOLL_SET } pollable_type; + +typedef struct { + pollable_type type; + int fd; + grpc_iomgr_object iomgr_object; +} pollable_object; + +typedef struct polling_island { + pollable_object pollable; + gpr_mu mu; + int refs; + grpc_fd *only_fd; + struct polling_island *became; + struct polling_island *next; +} polling_island; + +struct grpc_fd { + pollable_object pollable; + + // each event atomic is a tri state: + // STATE_NOT_READY - no event received, nobody waiting for it either + // STATE_READY - event received, nobody waiting for it + // closure pointer - no event received, upper layer is waiting for it + gpr_atm on_readable; + gpr_atm on_writable; + + // mutex guarding set_ready & shutdown state + gpr_mu set_ready_mu; + bool shutdown; + + // mutex protecting polling_island + gpr_mu polling_island_mu; + // current polling island + polling_island *polling_island; + + grpc_fd *next_free; +}; + +struct grpc_pollset_worker {}; + +struct grpc_pollset { + gpr_mu mu; + // current polling island + polling_island *polling_island; +}; + +//////////////////////////////////////////////////////////////////////////////// +// Polling island implementation + +static gpr_mu g_pi_freelist_mu; +static polling_island *g_first_free_pi; + +static void add_pollable_to_epoll_set(pollable_object *pollable, int epoll_set, + uint32_t events) { + struct epoll_event ev; + ev.events = events; + ev.data.ptr = pollable; + int err = epoll_ctl(epoll_set, EPOLL_CTL_ADD, pollable->fd, &ev); + if (err < 0) { + gpr_log(GPR_ERROR, "epoll_ctl add for %d faild: %s", pollable->fd, + strerror(errno)); + } +} + +static void add_fd_to_epoll_set(grpc_fd *fd, int epoll_set) { + add_pollable_to_epoll_set(&fd->pollable, epoll_set, + EPOLLIN | EPOLLOUT | EPOLLET); +} + +static void add_island_to_epoll_set(polling_island *pi, int epoll_set) { + add_pollable_to_epoll_set(&pi->pollable, epoll_set, EPOLLIN | EPOLLET); +} + +static polling_island *polling_island_create(grpc_fd *initial_fd) { + polling_island *r = NULL; + gpr_mu_lock(&g_pi_freelist_mu); + if (g_first_free_pi == NULL) { + r = gpr_malloc(sizeof(*r)); + r->pollable.type = POLLABLE_EPOLL_SET; + gpr_mu_init(&r->mu); + } else { + r = g_first_free_pi; + g_first_free_pi = r->next; + } + gpr_mu_unlock(&g_pi_freelist_mu); + + r->pollable.fd = epoll_create1(EPOLL_CLOEXEC); + GPR_ASSERT(r->pollable.fd >= 0); + + gpr_mu_lock(&r->mu); + r->only_fd = initial_fd; + r->refs = 2; // creation of a polling island => a referencing pollset & fd + gpr_mu_unlock(&r->mu); + + add_fd_to_epoll_set(initial_fd, r->pollable.fd); + return r; +} + +static void polling_island_delete(polling_island *p) { + gpr_mu_lock(&g_pi_freelist_mu); + p->next = g_first_free_pi; + g_first_free_pi = p; + gpr_mu_unlock(&g_pi_freelist_mu); +} + +static polling_island *polling_island_add(polling_island *p, grpc_fd *fd) { + gpr_mu_lock(&p->mu); + p->only_fd = NULL; + p->refs++; // new fd picks up a ref + gpr_mu_unlock(&p->mu); + + add_fd_to_epoll_set(fd, p->pollable.fd); + + return p; +} + +static void add_siblings_to(polling_island *siblings, polling_island *dest) { + polling_island *sibling_tail = dest; + while (sibling_tail->next != NULL) { + sibling_tail = sibling_tail->next; + } + sibling_tail->next = siblings; +} + +static polling_island *polling_island_merge(polling_island *a, + polling_island *b) { + GPR_ASSERT(a != b); + polling_island *out; + + gpr_mu_lock(&GPR_MIN(a, b)->mu); + gpr_mu_lock(&GPR_MAX(a, b)->mu); + + GPR_ASSERT(a->became == NULL); + GPR_ASSERT(b->became == NULL); + + if (a->only_fd == NULL && b->only_fd == NULL) { + b->became = a; + add_siblings_to(b, a); + add_island_to_epoll_set(b, a->pollable.fd); + out = a; + } else if (a->only_fd == NULL) { + GPR_ASSERT(b->only_fd != NULL); + add_fd_to_epoll_set(b->only_fd, a->pollable.fd); + b->became = a; + out = a; + } else if (b->only_fd == NULL) { + GPR_ASSERT(a->only_fd != NULL); + add_fd_to_epoll_set(a->only_fd, b->pollable.fd); + a->became = b; + out = b; + } else { + add_fd_to_epoll_set(b->only_fd, a->pollable.fd); + a->only_fd = NULL; + b->only_fd = NULL; + b->became = a; + out = a; + } + + gpr_mu_unlock(&a->mu); + gpr_mu_unlock(&b->mu); + + return out; +} + +static polling_island *polling_island_update_and_lock(polling_island *p) { + gpr_mu_lock(&p->mu); + if (p->became != NULL) { + do { + polling_island *from = p; + p = p->became; + gpr_mu_lock(&p->mu); + bool delete_from = 0 == --from->refs; + p->refs++; + gpr_mu_unlock(&from->mu); + if (delete_from) { + polling_island_delete(from); + } + } while (p->became != NULL); + } + return p; +} + +static polling_island *polling_island_ref(polling_island *p) { + gpr_mu_lock(&p->mu); + gpr_mu_unlock(&p->mu); + return p; +} + +static void polling_island_drop(polling_island *p) {} + +static polling_island *polling_island_update(polling_island *p, + int updating_owner_count) { + p = polling_island_update_and_lock(p); + GPR_ASSERT(p->refs != 0); + p->refs += updating_owner_count; + gpr_mu_unlock(&p->mu); + return p; +} + +//////////////////////////////////////////////////////////////////////////////// +// FD implementation + +static gpr_mu g_fd_freelist_mu; +static grpc_fd *g_first_free_fd; + +static grpc_fd *fd_create(int fd, const char *name) { + grpc_fd *r = NULL; + gpr_mu_lock(&g_fd_freelist_mu); + if (g_first_free_fd == NULL) { + r = gpr_malloc(sizeof(*r)); + r->pollable.type = POLLABLE_FD; + gpr_atm_rel_store(&r->on_readable, 0); + gpr_atm_rel_store(&r->on_writable, 0); + gpr_mu_init(&r->polling_island_mu); + gpr_mu_init(&r->set_ready_mu); + } else { + r = g_first_free_fd; + g_first_free_fd = r->next_free; + } + gpr_mu_unlock(&g_fd_freelist_mu); + + r->pollable.fd = fd; + grpc_iomgr_register_object(&r->pollable.iomgr_object, name); + r->next_free = NULL; + return r; +} + +static int fd_wrapped_fd(grpc_fd *fd) { return fd->pollable.fd; } + +static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure *on_done, int *release_fd, + const char *reason) { + if (release_fd != NULL) { + *release_fd = fd->pollable.fd; + } else { + close(fd->pollable.fd); + } + + gpr_mu_lock(&fd->polling_island_mu); + if (fd->polling_island != NULL) { + polling_island_drop(fd->polling_island); + } + gpr_mu_unlock(&fd->polling_island_mu); + + gpr_mu_lock(&g_fd_freelist_mu); + fd->next_free = g_first_free_fd; + g_first_free_fd = fd; + grpc_iomgr_unregister_object(&fd->pollable.iomgr_object); + gpr_mu_unlock(&g_fd_freelist_mu); + + grpc_exec_ctx_enqueue(exec_ctx, on_done, true, NULL); +} + +static void notify_on(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure *closure, gpr_atm *state) { + if (gpr_atm_acq_cas(state, STATE_NOT_READY, (gpr_atm)closure)) { + // state was not ready, and is now the closure - we're done */ + } else { + // cas failed - we MUST be in STATE_READY (can't request two notifications + // for the same event) + // flip back to not ready, enqueue the closure directly + GPR_ASSERT(gpr_atm_rel_cas(state, STATE_READY, STATE_NOT_READY)); + grpc_exec_ctx_enqueue(exec_ctx, closure, true, NULL); + } +} + +static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure *closure) { + notify_on(exec_ctx, fd, closure, &fd->on_readable); +} + +static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure *closure) { + notify_on(exec_ctx, fd, closure, &fd->on_readable); +} + +static void destroy_fd_freelist(void) { + while (g_first_free_fd) { + grpc_fd *next = g_first_free_fd->next_free; + gpr_mu_destroy(&g_first_free_fd->polling_island_mu); + gpr_free(next); + g_first_free_fd = next; + } +} + +static void set_ready_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + gpr_atm *state) { + if (gpr_atm_acq_cas(state, STATE_NOT_READY, STATE_READY)) { + // state was not ready, and is now ready - we're done + } else { + // cas failed - either there's a closure queued which we should consume OR + // the state was already STATE_READY + gpr_atm cur_state = gpr_atm_acq_load(state); + if (cur_state != STATE_READY) { + // state wasn't STATE_READY - it *must* have been a closure + // since it's illegal to ask for notification twice, it's safe to assume + // that we'll resume being the closure + GPR_ASSERT(gpr_atm_rel_cas(state, cur_state, STATE_NOT_READY)); + grpc_exec_ctx_enqueue(exec_ctx, (grpc_closure *)cur_state, !fd->shutdown, + NULL); + } + } +} + +static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { + gpr_mu_lock(&fd->set_ready_mu); + GPR_ASSERT(!fd->shutdown); + fd->shutdown = 1; + set_ready_locked(exec_ctx, fd, &fd->on_readable); + set_ready_locked(exec_ctx, fd, &fd->on_writable); + gpr_mu_unlock(&fd->set_ready_mu); +} + +//////////////////////////////////////////////////////////////////////////////// +// Pollset implementation + +static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { + gpr_mu_init(&pollset->mu); + *mu = &pollset->mu; + pollset->polling_island = NULL; +} + +static void pollset_destroy(grpc_pollset *pollset) { + gpr_mu_destroy(&pollset->mu); + if (pollset->polling_island) { + polling_island_drop(pollset->polling_island); + } +} + +static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + struct grpc_fd *fd) { + gpr_mu_lock(&pollset->mu); + gpr_mu_lock(&fd->polling_island_mu); + + polling_island *new; + + if (fd->polling_island == NULL) { + if (pollset->polling_island == NULL) { + new = polling_island_create(fd); + } else { + new = polling_island_add(pollset->polling_island, fd); + } + } else if (pollset->polling_island == NULL) { + new = polling_island_ref(fd->polling_island); + } else if (pollset->polling_island != fd->polling_island) { + new = polling_island_merge(pollset->polling_island, fd->polling_island); + } else { + new = polling_island_update(pollset->polling_island, 1); + } + + fd->polling_island = pollset->polling_island = new; + + gpr_mu_unlock(&fd->polling_island_mu); + gpr_mu_unlock(&pollset->mu); +} + +//////////////////////////////////////////////////////////////////////////////// +// Engine binding + +static void shutdown_engine(void) { destroy_fd_freelist(); } + +static const grpc_event_engine_vtable vtable = { + .pollset_size = sizeof(grpc_pollset), + + .fd_create = fd_create, + .fd_wrapped_fd = fd_wrapped_fd, + .fd_orphan = fd_orphan, + .fd_shutdown = fd_shutdown, + .fd_notify_on_read = fd_notify_on_read, + .fd_notify_on_write = fd_notify_on_write, + + .pollset_init = pollset_init, + .pollset_shutdown = pollset_shutdown, + .pollset_reset = pollset_reset, + .pollset_destroy = pollset_destroy, + .pollset_work = pollset_work, + .pollset_kick = pollset_kick, + .pollset_add_fd = pollset_add_fd, + + .pollset_set_create = pollset_set_create, + .pollset_set_destroy = pollset_set_destroy, + .pollset_set_add_pollset = pollset_set_add_pollset, + .pollset_set_del_pollset = pollset_set_del_pollset, + .pollset_set_add_pollset_set = pollset_set_add_pollset_set, + .pollset_set_del_pollset_set = pollset_set_del_pollset_set, + .pollset_set_add_fd = pollset_set_add_fd, + .pollset_set_del_fd = pollset_set_del_fd, + + .kick_poller = kick_poller, + + .shutdown_engine = shutdown_engine, +}; + +static bool is_epoll_available(void) { + abort(); + return false; +} + +const grpc_event_engine_vtable *grpc_init_poll_posix(void) { + if (!is_epoll_available()) { + return NULL; + } + return &vtable; +} -- cgit v1.2.3 From d7d6eed78822ed8de14fdf9b39255365946cf8c4 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Tue, 31 May 2016 09:44:08 -0700 Subject: Correct typo --- src/core/lib/iomgr/ev_posix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_posix.c b/src/core/lib/iomgr/ev_posix.c index 404ef2a64b..96399ef837 100644 --- a/src/core/lib/iomgr/ev_posix.c +++ b/src/core/lib/iomgr/ev_posix.c @@ -44,7 +44,7 @@ #include #include -#include "src/core/lib/iomgr/ev_epoll_linux.h" +#include "src/core/lib/iomgr/ev_epoll_posix.h" #include "src/core/lib/iomgr/ev_poll_posix.h" #include "src/core/lib/support/env.h" -- cgit v1.2.3 From 5098f91159f2a5c0494688b8cfaff4debef5686f Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Tue, 31 May 2016 10:58:17 -0700 Subject: Rewrite all the pollset and fd functions in ev_epoll_linux.c --- src/core/lib/iomgr/ev_epoll_linux.c | 161 +++++++++--------------------------- 1 file changed, 38 insertions(+), 123 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index 7793a95201..1201c10a7e 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -67,10 +67,10 @@ struct polling_island; struct grpc_fd { int fd; /* refst format: - bit0: 1=active/0=orphaned - bit1-n: refcount - meaning that mostly we ref by two to avoid altering the orphaned bit, - and just unref by 1 when we're ready to flag the object as orphaned */ + bit 0 : 1=Active / 0=Orphaned + bits 1-n : refcount + - ref/unref by two to avoid altering the orphaned bit + - To orphan, unref by 1 */ gpr_atm refst; gpr_mu mu; @@ -84,12 +84,11 @@ struct grpc_fd { /* Mutex protecting the 'polling_island' field */ gpr_mu pi_mu; - /* The polling island to which this fd belongs to. An fd belongs to exactly - one polling island */ + /* The polling island to which this fd belongs to. + * An fd belongs to exactly one polling island */ struct polling_island *polling_island; struct grpc_fd *freelist_next; - grpc_closure *on_done_closure; grpc_iomgr_object iomgr_object; @@ -141,7 +140,6 @@ typedef struct polling_island { /* Polling islands that are no longer needed are kept in a freelist so that they can be reused. This field points to the next polling island in the - free list. Note that this is only used if the polling island is in the free list */ struct polling_island *next_free; } polling_island; @@ -185,7 +183,7 @@ static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds, * TODO: sreek - Might have to unref the fds (assuming whether we add a ref to * the fd when adding it to the epollset) */ /* The caller is expected to hold pi->mu lock before calling this function */ -static void polling_island_clear_fds_locked(polling_island *pi) { +static void polling_island_remove_all_fds_locked(polling_island *pi) { int err; size_t i; @@ -392,7 +390,7 @@ polling_island *polling_island_merge(polling_island *p, polling_island *q) { // Move all the fds from polling_island p to polling_island q polling_island_add_fds_locked(q, p->fds, p->fd_cnt); - polling_island_clear_fds_locked(p); + polling_island_remove_all_fds_locked(p); q->ref_cnt += p->ref_cnt; @@ -411,14 +409,7 @@ static void polling_island_global_init() { * pollset declarations */ -typedef struct grpc_cached_wakeup_fd { - grpc_wakeup_fd fd; - struct grpc_cached_wakeup_fd *next; -} grpc_cached_wakeup_fd; - struct grpc_pollset_worker { - grpc_cached_wakeup_fd *wakeup_fd; - int reevaluate_polling_on_wakeup; int kicked_specifically; pthread_t pt_id; struct grpc_pollset_worker *next; @@ -441,9 +432,6 @@ struct grpc_pollset { /* The polling island to which this fd belongs to. An fd belongs to exactly one polling island */ struct polling_island *polling_island; - - /* Local cache of eventfds for workers */ - grpc_cached_wakeup_fd *local_wakeup_cache; }; /* Add an fd to a pollset */ @@ -465,8 +453,6 @@ static int poll_deadline_to_millis_timeout(gpr_timespec deadline, /* Allow kick to wakeup the currently polling worker */ #define GRPC_POLLSET_CAN_KICK_SELF 1 -/* Force the wakee to repoll when awoken */ -#define GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP 2 /* As per pollset_kick, with an extended set of flags (defined above) -- mostly for fd_posix's use. */ static void pollset_kick_ext(grpc_pollset *p, @@ -815,34 +801,25 @@ static void pollset_kick_ext(grpc_pollset *p, if (specific_worker != NULL) { if (specific_worker == GRPC_POLLSET_KICK_BROADCAST) { GPR_TIMER_BEGIN("pollset_kick_ext.broadcast", 0); - GPR_ASSERT((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) == 0); for (specific_worker = p->root_worker.next; specific_worker != &p->root_worker; specific_worker = specific_worker->next) { - grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd); + pthread_kill(specific_worker->pt_id, SIGUSR1); } p->kicked_without_pollers = 1; GPR_TIMER_END("pollset_kick_ext.broadcast", 0); } else if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)specific_worker) { GPR_TIMER_MARK("different_thread_worker", 0); - if ((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) != 0) { - specific_worker->reevaluate_polling_on_wakeup = 1; - } specific_worker->kicked_specifically = 1; - grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd); /* TODO (sreek): Refactor this into a separate file*/ pthread_kill(specific_worker->pt_id, SIGUSR1); } else if ((flags & GRPC_POLLSET_CAN_KICK_SELF) != 0) { GPR_TIMER_MARK("kick_yoself", 0); - if ((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) != 0) { - specific_worker->reevaluate_polling_on_wakeup = 1; - } specific_worker->kicked_specifically = 1; - grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd); + pthread_kill(specific_worker->pt_id, SIGUSR1); } } else if (gpr_tls_get(&g_current_thread_poller) != (intptr_t)p) { - GPR_ASSERT((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) == 0); GPR_TIMER_MARK("kick_anonymous", 0); specific_worker = pop_front_worker(p); if (specific_worker != NULL) { @@ -860,7 +837,7 @@ static void pollset_kick_ext(grpc_pollset *p, if (specific_worker != NULL) { GPR_TIMER_MARK("finally_kick", 0); push_back_worker(p, specific_worker); - grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd); + pthread_kill(specific_worker->pt_id, SIGUSR1); } } else { GPR_TIMER_MARK("kicked_no_pollers", 0); @@ -911,8 +888,6 @@ static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { pollset->shutting_down = 0; pollset->called_shutdown = 0; pollset->kicked_without_pollers = 0; - pollset->local_wakeup_cache = NULL; - pollset->kicked_without_pollers = 0; multipoll_with_epoll_pollset_create_efd(pollset); } @@ -926,12 +901,6 @@ static void pollset_destroy(grpc_pollset *pollset) { multipoll_with_epoll_pollset_destroy(pollset); - while (pollset->local_wakeup_cache) { - grpc_cached_wakeup_fd *next = pollset->local_wakeup_cache->next; - grpc_wakeup_fd_destroy(&pollset->local_wakeup_cache->fd); - gpr_free(pollset->local_wakeup_cache); - pollset->local_wakeup_cache = next; - } gpr_mu_destroy(&pollset->pi_mu); gpr_mu_destroy(&pollset->mu); } @@ -974,14 +943,6 @@ static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, GPR_TIMER_BEGIN("pollset_work", 0); /* this must happen before we (potentially) drop pollset->mu */ worker.next = worker.prev = NULL; - worker.reevaluate_polling_on_wakeup = 0; - if (pollset->local_wakeup_cache != NULL) { - worker.wakeup_fd = pollset->local_wakeup_cache; - pollset->local_wakeup_cache = worker.wakeup_fd->next; - } else { - worker.wakeup_fd = gpr_malloc(sizeof(*worker.wakeup_fd)); - grpc_wakeup_fd_init(&worker.wakeup_fd->fd); - } worker.kicked_specifically = 0; /* TODO(sreek): Abstract this thread id stuff out into a separate file */ @@ -1026,27 +987,12 @@ static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, gpr_mu_lock(&pollset->mu); locked = 1; } - /* If we're forced to re-evaluate polling (via pollset_kick with - GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) then we land here and force - a loop */ - if (worker.reevaluate_polling_on_wakeup) { - worker.reevaluate_polling_on_wakeup = 0; - pollset->kicked_without_pollers = 0; - if (queued_work || worker.kicked_specifically) { - /* If there's queued work on the list, then set the deadline to be - immediate so we get back out of the polling loop quickly */ - deadline = gpr_inf_past(GPR_CLOCK_MONOTONIC); - } - keep_polling = 1; - } } if (added_worker) { remove_worker(pollset, &worker); gpr_tls_set(&g_current_thread_worker, 0); } - /* release wakeup fd to the local pool */ - worker.wakeup_fd->next = pollset->local_wakeup_cache; - pollset->local_wakeup_cache = worker.wakeup_fd; + /* check shutdown conditions */ if (pollset->shutting_down) { if (pollset_has_workers(pollset)) { @@ -1135,10 +1081,9 @@ static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, } } else if (fd->polling_island == NULL) { pi_new = polling_island_update_and_lock(pollset->polling_island, 1, 1); - } else if (pollset->polling_island == NULL) { pi_new = polling_island_update_and_lock(fd->polling_island, 1, 1); - } else { // Non null and different + } else { pi_new = polling_island_merge(fd->polling_island, pollset->polling_island); } @@ -1182,9 +1127,7 @@ static void multipoll_with_epoll_pollset_maybe_work_and_unlock( struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS]; int epoll_fd = pollset->epoll_fd; int ep_rv; - int poll_rv; int timeout_ms; - struct pollfd pfds[2]; /* If you want to ignore epoll's ability to sanely handle parallel pollers, * for a more apples-to-apples performance comparison with poll, add a @@ -1196,63 +1139,35 @@ static void multipoll_with_epoll_pollset_maybe_work_and_unlock( timeout_ms = poll_deadline_to_millis_timeout(deadline, now); - pfds[0].fd = GRPC_WAKEUP_FD_GET_READ_FD(&worker->wakeup_fd->fd); - pfds[0].events = POLLIN; - pfds[0].revents = 0; - pfds[1].fd = epoll_fd; - pfds[1].events = POLLIN; - pfds[1].revents = 0; - - /* TODO(vpai): Consider first doing a 0 timeout poll here to avoid - even going into the blocking annotation if possible */ - GPR_TIMER_BEGIN("poll", 0); - GRPC_SCHEDULING_START_BLOCKING_REGION; - poll_rv = grpc_poll_function(pfds, 2, timeout_ms); - GRPC_SCHEDULING_END_BLOCKING_REGION; - GPR_TIMER_END("poll", 0); - - if (poll_rv < 0) { - if (errno != EINTR) { - gpr_log(GPR_ERROR, "poll() failed: %s", strerror(errno)); - } - } else if (poll_rv == 0) { - /* do nothing */ - } else { - if (pfds[0].revents) { - grpc_wakeup_fd_consume_wakeup(&worker->wakeup_fd->fd); - } - if (pfds[1].revents) { - do { - /* The following epoll_wait never blocks; it has a timeout of 0 */ - ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0); - if (ep_rv < 0) { - if (errno != EINTR) { - gpr_log(GPR_ERROR, "epoll_wait() failed: %s", strerror(errno)); - } + do { + /* The following epoll_wait never blocks; it has a timeout of 0 */ + ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms); + if (ep_rv < 0) { + if (errno != EINTR) { + gpr_log(GPR_ERROR, "epoll_wait() failed: %s", strerror(errno)); + } + } else { + int i; + for (i = 0; i < ep_rv; ++i) { + grpc_fd *fd = ep_ev[i].data.ptr; + /* TODO(klempner): We might want to consider making err and pri + * separate events */ + int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP); + int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI); + int write_ev = ep_ev[i].events & EPOLLOUT; + if (fd == NULL) { + grpc_wakeup_fd_consume_wakeup(&grpc_global_wakeup_fd); } else { - int i; - for (i = 0; i < ep_rv; ++i) { - grpc_fd *fd = ep_ev[i].data.ptr; - /* TODO(klempner): We might want to consider making err and pri - * separate events */ - int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP); - int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI); - int write_ev = ep_ev[i].events & EPOLLOUT; - if (fd == NULL) { - grpc_wakeup_fd_consume_wakeup(&grpc_global_wakeup_fd); - } else { - if (read_ev || cancel) { - fd_become_readable(exec_ctx, fd); - } - if (write_ev || cancel) { - fd_become_writable(exec_ctx, fd); - } - } + if (read_ev || cancel) { + fd_become_readable(exec_ctx, fd); + } + if (write_ev || cancel) { + fd_become_writable(exec_ctx, fd); } } - } while (ep_rv == GRPC_EPOLL_MAX_EVENTS); + } } - } + } while (ep_rv == GRPC_EPOLL_MAX_EVENTS); } static void multipoll_with_epoll_pollset_finish_shutdown( -- cgit v1.2.3 From 0bcbd79baa57c16d4ab64a070b5fbfc93293f543 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Wed, 1 Jun 2016 15:43:03 -0700 Subject: Functionality complete in ev_epoll_linux.c --- src/core/lib/iomgr/ev_epoll_linux.c | 1202 +++++++++++++++++------------------ 1 file changed, 591 insertions(+), 611 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index 1201c10a7e..ce42a9e7ce 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -61,31 +61,27 @@ struct polling_island; /******************************************************************************* - * FD declarations + * Fd Declarations */ - struct grpc_fd { int fd; /* refst format: bit 0 : 1=Active / 0=Orphaned bits 1-n : refcount - - ref/unref by two to avoid altering the orphaned bit - - To orphan, unref by 1 */ + Ref/Unref by two to avoid altering the orphaned bit */ gpr_atm refst; gpr_mu mu; - int shutdown; + bool shutdown; int closed; - int released; + bool released; grpc_closure *read_closure; grpc_closure *write_closure; - /* Mutex protecting the 'polling_island' field */ + /* The polling island to which this fd belongs to and the mutex protecting the + the field */ gpr_mu pi_mu; - - /* The polling island to which this fd belongs to. - * An fd belongs to exactly one polling island */ struct polling_island *polling_island; struct grpc_fd *freelist_next; @@ -94,11 +90,7 @@ struct grpc_fd { grpc_iomgr_object iomgr_object; }; -/* Return 1 if this fd is orphaned, 0 otherwise */ -static bool fd_is_orphaned(grpc_fd *fd); - /* Reference counting for fds */ -/*#define GRPC_FD_REF_COUNT_DEBUG*/ #ifdef GRPC_FD_REF_COUNT_DEBUG static void fd_ref(grpc_fd *fd, const char *reason, const char *file, int line); static void fd_unref(grpc_fd *fd, const char *reason, const char *file, @@ -119,15 +111,15 @@ static void fd_global_shutdown(void); #define CLOSURE_READY ((grpc_closure *)1) /******************************************************************************* - * Polling Island + * Polling-island Declarations */ typedef struct polling_island { gpr_mu mu; int ref_cnt; - /* Pointer to the polling_island this merged into. If this is not NULL, all - the remaining fields in this pollset (i.e all fields except mu and ref_cnt) - are considered invalid and must be ignored */ + /* Points to the polling_island this merged into. + * If merged_to is not NULL, all the remaining fields (except mu and ref_cnt) + * are invalid and must be ignored */ struct polling_island *merged_to; /* The fd of the underlying epoll set */ @@ -144,15 +136,62 @@ typedef struct polling_island { struct polling_island *next_free; } polling_island; +/******************************************************************************* + * Pollset Declarations + */ + +struct grpc_pollset_worker { + int kicked_specifically; + pthread_t pt_id; /* TODO (sreek) - Add an abstraction here */ + struct grpc_pollset_worker *next; + struct grpc_pollset_worker *prev; +}; + +struct grpc_pollset { + gpr_mu mu; + grpc_pollset_worker root_worker; + bool kicked_without_pollers; + + bool shutting_down; /* Is the pollset shutting down ? */ + bool finish_shutdown_called; /* Is the 'finish_shutdown_locked()' called ? */ + grpc_closure *shutdown_done; /* Called after after shutdown is complete */ + + /* The polling island to which this pollset belongs to and the mutex + protecting the field */ + gpr_mu pi_mu; + struct polling_island *polling_island; +}; + +/******************************************************************************* + * Pollset-set Declarations + */ +struct grpc_pollset_set { + gpr_mu mu; + + size_t pollset_count; + size_t pollset_capacity; + grpc_pollset **pollsets; + + size_t pollset_set_count; + size_t pollset_set_capacity; + struct grpc_pollset_set **pollset_sets; + + size_t fd_count; + size_t fd_capacity; + grpc_fd **fds; +}; + +/******************************************************************************* + * Polling-island Definitions + */ + /* Polling island freelist */ static gpr_mu g_pi_freelist_mu; static polling_island *g_pi_freelist = NULL; -/* TODO: sreek - Should we hold a lock on fd or add a ref to the fd ? - * TODO: sreek - Should this add a ref to the grpc_fd ? */ /* The caller is expected to hold pi->mu lock before calling this function */ static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds, - size_t fd_count) { + size_t fd_count, bool add_fd_refs) { int err; size_t i; struct epoll_event ev; @@ -162,11 +201,14 @@ static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds, ev.data.ptr = fds[i]; err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, fds[i]->fd, &ev); - if (err < 0 && errno != EEXIST) { - gpr_log(GPR_ERROR, "epoll_ctl add for fd: %d failed with error: %s", - fds[i]->fd, strerror(errno)); - /* TODO: sreek - Not sure if it is a good idea to continue here. We need a - * better way to bubble up this error instead of doing an abort() */ + if (err < 0) { + if (errno != EEXIST) { + /* TODO: sreek - We need a better way to bubble up this error instead of + just logging a message */ + gpr_log(GPR_ERROR, "epoll_ctl add for fd: %d failed with error: %s", + fds[i]->fd, strerror(errno)); + } + continue; } @@ -176,26 +218,30 @@ static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds, } pi->fds[pi->fd_cnt++] = fds[i]; + if (add_fd_refs) { + GRPC_FD_REF(fds[i], "polling_island"); + } } } -/* TODO: sreek - Should we hold a lock on fd or add a ref to the fd ? - * TODO: sreek - Might have to unref the fds (assuming whether we add a ref to - * the fd when adding it to the epollset) */ /* The caller is expected to hold pi->mu lock before calling this function */ -static void polling_island_remove_all_fds_locked(polling_island *pi) { +static void polling_island_remove_all_fds_locked(polling_island *pi, + bool remove_fd_refs) { int err; size_t i; for (i = 0; i < pi->fd_cnt; i++) { - err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, pi->fds[i]->fd, NULL); + if (remove_fd_refs) { + GRPC_FD_UNREF(pi->fds[i], "polling_island"); + } + err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, pi->fds[i]->fd, NULL); if (err < 0 && errno != ENOENT) { gpr_log(GPR_ERROR, "epoll_ctl delete for fds[i]: %d failed with error: %s", i, pi->fds[i]->fd, strerror(errno)); - /* TODO: sreek - Not sure if it is a good idea to continue here. We need a - * better way to bubble up this error instead of doing an abort() */ + /* TODO: sreek - We need a better way to bubble up this error instead of + * just logging a message */ continue; } } @@ -203,22 +249,31 @@ static void polling_island_remove_all_fds_locked(polling_island *pi) { pi->fd_cnt = 0; } -/* TODO: sreek - Should we hold a lock on fd or add a ref to the fd ? - * TODO: sreek - Might have to unref the fd (assuming whether we add a ref to - * the fd when adding it to the epollset) */ /* The caller is expected to hold pi->mu lock before calling this function */ -static void polling_island_remove_fd_locked(polling_island *pi, grpc_fd *fd) { +static void polling_island_remove_fd_locked(polling_island *pi, grpc_fd *fd, + bool close_fd, bool remove_fd_ref) { int err; size_t i; - err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, fd->fd, NULL); - if (err < 0 && errno != ENOENT) { - gpr_log(GPR_ERROR, "epoll_ctl delete for fd: %d failed with error; %s", - fd->fd, strerror(errno)); + + /* Calling close() on the fd will automatically remove it from the epoll set. + If not calling close(), the fd must be explicitly removed from the epoll + set */ + if (close_fd) { + close(fd->fd); + } else { + err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, fd->fd, NULL); + if (err < 0 && errno != ENOENT) { + gpr_log(GPR_ERROR, "epoll_ctl delete for fd: %d failed with error; %s", + fd->fd, strerror(errno)); + } } for (i = 0; i < pi->fd_cnt; i++) { if (pi->fds[i] == fd) { pi->fds[i] = pi->fds[--pi->fd_cnt]; + if (remove_fd_ref) { + GRPC_FD_UNREF(fd, "polling_island"); + } break; } } @@ -227,6 +282,10 @@ static void polling_island_remove_fd_locked(polling_island *pi, grpc_fd *fd) { static polling_island *polling_island_create(grpc_fd *initial_fd, int initial_ref_cnt) { polling_island *pi = NULL; + struct epoll_event ev; + int err; + + /* Try to get one from the polling island freelist */ gpr_mu_lock(&g_pi_freelist_mu); if (g_pi_freelist != NULL) { pi = g_pi_freelist; @@ -242,13 +301,25 @@ static polling_island *polling_island_create(grpc_fd *initial_fd, pi->fd_cnt = 0; pi->fd_capacity = 0; pi->fds = NULL; + } - pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC); - if (pi->epoll_fd < 0) { - gpr_log(GPR_ERROR, "epoll_create1() failed with error: %s", - strerror(errno)); - } - GPR_ASSERT(pi->epoll_fd >= 0); + pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC); + if (pi->epoll_fd < 0) { + gpr_log(GPR_ERROR, "epoll_create1() failed with error: %s", + strerror(errno)); + } + GPR_ASSERT(pi->epoll_fd >= 0); + + ev.events = (uint32_t)(EPOLLIN | EPOLLET); + ev.data.ptr = NULL; + err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, + GRPC_WAKEUP_FD_GET_READ_FD(&grpc_global_wakeup_fd), &ev); + if (err < 0) { + gpr_log(GPR_ERROR, + "Failed to add grpc_global_wake_up_fd (%d) to the epoll set " + "(epoll_fd: %d) with error: %s", + GRPC_WAKEUP_FD_GET_READ_FD(&grpc_global_wakeup_fd), pi->epoll_fd, + strerror(errno)); } pi->ref_cnt = initial_ref_cnt; @@ -256,10 +327,12 @@ static polling_island *polling_island_create(grpc_fd *initial_fd, pi->next_free = NULL; if (initial_fd != NULL) { - /* polling_island_add_fds_locked() expects the caller to hold a pi->mu - * lock. However, since this is a new polling island (and no one has a - * reference to it yet), it is okay to not acquire pi->mu here */ - polling_island_add_fds_locked(pi, &initial_fd, 1); + /* It is not really needed to get the pi->mu lock here. If this is a newly + created polling island (or one that we got from the freelist), no one + else would be holding a lock to it anyway */ + gpr_mu_lock(&pi->mu); + polling_island_add_fds_locked(pi, &initial_fd, 1, true); + gpr_mu_unlock(&pi->mu); } return pi; @@ -269,6 +342,9 @@ static void polling_island_delete(polling_island *pi) { GPR_ASSERT(pi->ref_cnt == 0); GPR_ASSERT(pi->fd_cnt == 0); + close(pi->epoll_fd); + pi->epoll_fd = -1; + pi->merged_to = NULL; gpr_mu_lock(&g_pi_freelist_mu); @@ -313,10 +389,20 @@ void polling_island_pair_update_and_lock(polling_island **p, bool pi_2_locked = false; int num_swaps = 0; + /* Loop until either pi_1 == pi_2 or until we acquired locks on both pi_1 + and pi_2 */ while (pi_1 != pi_2 && !(pi_1_locked && pi_2_locked)) { - // pi_1 is NOT equal to pi_2 - // pi_1 MAY be locked - + /* The following assertions are true at this point: + - pi_1 != pi_2 (else, the while loop would have exited) + - pi_1 MAY be locked + - pi_2 is NOT locked */ + + /* To maintain lock order consistency, always lock polling_island node with + lower address first. + First, make sure pi_1 < pi_2 before proceeding any further. If it turns + out that pi_1 > pi_2, unlock pi_1 if locked (because pi_2 is not locked + at this point and having pi_1 locked would violate the lock order) and + swap pi_1 and pi_2 so that pi_1 becomes less than pi_2 */ if (pi_1 > pi_2) { if (pi_1_locked) { gpr_mu_unlock(&pi_1->mu); @@ -327,14 +413,22 @@ void polling_island_pair_update_and_lock(polling_island **p, num_swaps++; } - // p1 < p2 - // p1 MAY BE locked - // p2 is NOT locked + /* The following assertions are true at this point: + - pi_1 != pi_2 + - pi_1 < pi_2 (address of pi_1 is less than that of pi_2) + - pi_1 MAYBE locked + - pi_2 is NOT locked */ + /* Lock pi_1 (if pi_1 is pointing to the terminal node in the list) */ if (!pi_1_locked) { gpr_mu_lock(&pi_1->mu); pi_1_locked = true; + /* If pi_1 is not terminal node (i.e pi_1->merged_to != NULL), we are not + done locking this polling_island yet. Release the lock on this node and + advance pi_1 to the next node in the list; and go to the beginning of + the loop (we can't proceed to locking pi_2 unless we locked pi_1 first) + */ if (pi_1->merged_to != NULL) { temp = pi_1->merged_to; polling_island_unref_and_unlock(pi_1, 1); @@ -345,13 +439,16 @@ void polling_island_pair_update_and_lock(polling_island **p, } } - // p1 is LOCKED - // p2 is UNLOCKED - // p1 != p2 + /* The following assertions are true at this point: + - pi_1 is locked + - pi_2 is unlocked + - pi_1 != pi_2 */ gpr_mu_lock(&pi_2->mu); pi_2_locked = true; + /* If pi_2 is not terminal node, we are not done locking this polling_island + yet. Release the lock and update pi_2 to the next node in the list */ if (pi_2->merged_to != NULL) { temp = pi_2->merged_to; polling_island_unref_and_unlock(pi_2, 1); @@ -360,14 +457,19 @@ void polling_island_pair_update_and_lock(polling_island **p, } } - // Either pi_1 == pi_2 OR we got both locks! + /* At this point, either pi_1 == pi_2 AND/OR we got both locks */ if (pi_1 == pi_2) { + /* We may or may not have gotten the lock. If we didn't, walk the rest of + the polling_island list and get the lock */ GPR_ASSERT(pi_1_locked || (!pi_1_locked && !pi_2_locked)); if (!pi_1_locked) { pi_1 = pi_2 = polling_island_update_and_lock(pi_1, 2, 0); } } else { GPR_ASSERT(pi_1_locked && pi_2_locked); + /* If we swapped pi_1 and pi_2 odd number of times, do one more swap so that + pi_1 and pi_2 point to the same polling_island lists they started off + with at the beginning of this function (i.e *p and *q respectively) */ if (num_swaps % 2 > 0) { GPR_SWAP(polling_island *, pi_1, pi_2); } @@ -378,26 +480,37 @@ void polling_island_pair_update_and_lock(polling_island **p, } polling_island *polling_island_merge(polling_island *p, polling_island *q) { - polling_island *merged = NULL; - + /* Get locks on both the polling islands */ polling_island_pair_update_and_lock(&p, &q); /* TODO: sreek: Think about this scenario some more. Is it possible ?. what * does it mean, when would this happen */ if (p == q) { - merged = p; + /* Nothing needs to be done here */ + gpr_mu_unlock(&p->mu); + return p; } - // Move all the fds from polling_island p to polling_island q - polling_island_add_fds_locked(q, p->fds, p->fd_cnt); - polling_island_remove_all_fds_locked(p); + /* Make sure that p points to the polling island with fewer fds than q */ + if (p->fd_cnt > q->fd_cnt) { + GPR_SWAP(polling_island *, p, q); + } + + /* "Merge" p with q i.e move all the fds from p (the polling_island with fewer + fds) to q. + Note: Not altering the ref counts on the affected fds here because they + would effectively remain unchanged */ + polling_island_add_fds_locked(q, p->fds, p->fd_cnt, false); + polling_island_remove_all_fds_locked(p, false); + /* The merged polling island inherits all the ref counts of the island merging + with it */ q->ref_cnt += p->ref_cnt; gpr_mu_unlock(&p->mu); gpr_mu_unlock(&q->mu); - return merged; + return q; } static void polling_island_global_init() { @@ -406,95 +519,10 @@ static void polling_island_global_init() { } /******************************************************************************* - * pollset declarations - */ - -struct grpc_pollset_worker { - int kicked_specifically; - pthread_t pt_id; - struct grpc_pollset_worker *next; - struct grpc_pollset_worker *prev; -}; - -struct grpc_pollset { - gpr_mu mu; - grpc_pollset_worker root_worker; - int shutting_down; - int called_shutdown; - int kicked_without_pollers; - grpc_closure *shutdown_done; - - int epoll_fd; - - /* Mutex protecting the 'polling_island' field */ - gpr_mu pi_mu; - - /* The polling island to which this fd belongs to. An fd belongs to exactly - one polling island */ - struct polling_island *polling_island; -}; - -/* Add an fd to a pollset */ -static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - struct grpc_fd *fd); - -static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pollset_set, grpc_fd *fd); - -/* Convert a timespec to milliseconds: - - very small or negative poll times are clamped to zero to do a - non-blocking poll (which becomes spin polling) - - other small values are rounded up to one millisecond - - longer than a millisecond polls are rounded up to the next nearest - millisecond to avoid spinning - - infinite timeouts are converted to -1 */ -static int poll_deadline_to_millis_timeout(gpr_timespec deadline, - gpr_timespec now); - -/* Allow kick to wakeup the currently polling worker */ -#define GRPC_POLLSET_CAN_KICK_SELF 1 -/* As per pollset_kick, with an extended set of flags (defined above) - -- mostly for fd_posix's use. */ -static void pollset_kick_ext(grpc_pollset *p, - grpc_pollset_worker *specific_worker, - uint32_t flags); - -/* turn a pollset into a multipoller: platform specific */ -typedef void (*platform_become_multipoller_type)(grpc_exec_ctx *exec_ctx, - grpc_pollset *pollset, - struct grpc_fd **fds, - size_t fd_count); - -/* Return 1 if the pollset has active threads in pollset_work (pollset must - * be locked) */ -static int pollset_has_workers(grpc_pollset *pollset); - -/******************************************************************************* - * pollset_set definitions - */ - -struct grpc_pollset_set { - gpr_mu mu; - - size_t pollset_count; - size_t pollset_capacity; - grpc_pollset **pollsets; - - size_t pollset_set_count; - size_t pollset_set_capacity; - struct grpc_pollset_set **pollset_sets; - - size_t fd_count; - size_t fd_capacity; - grpc_fd **fds; -}; - -/******************************************************************************* - * fd_posix.c + * Fd Definitions */ -/* We need to keep a freelist not because of any concerns of malloc - * performance +/* We need to keep a freelist not because of any concerns of malloc performance * but instead so that implementations with multiple threads in (for example) * epoll_wait deal with the race between pollset removal and incoming poll * notifications. @@ -506,58 +534,16 @@ struct grpc_pollset_set { * If we keep the object freelisted, in the worst case losing this race just * becomes a spurious read notification on a reused fd. */ -/* TODO(klempner): We could use some form of polling generation count to know - * when these are safe to free. */ -/* TODO(klempner): Consider disabling freelisting if we don't have multiple - * threads in poll on the same fd */ -/* TODO(klempner): Batch these allocations to reduce fragmentation */ -static grpc_fd *fd_freelist = NULL; -static gpr_mu fd_freelist_mu; - -static void freelist_fd(grpc_fd *fd) { - gpr_mu_lock(&fd_freelist_mu); - fd->freelist_next = fd_freelist; - fd_freelist = fd; - grpc_iomgr_unregister_object(&fd->iomgr_object); - gpr_mu_unlock(&fd_freelist_mu); -} - -static grpc_fd *alloc_fd(int fd) { - grpc_fd *r = NULL; - - gpr_mu_lock(&fd_freelist_mu); - if (fd_freelist != NULL) { - r = fd_freelist; - fd_freelist = fd_freelist->freelist_next; - } - gpr_mu_unlock(&fd_freelist_mu); - - if (r == NULL) { - r = gpr_malloc(sizeof(grpc_fd)); - gpr_mu_init(&r->mu); - gpr_mu_init(&r->pi_mu); - } - /* TODO: sreek - check with ctiller on why we need to acquire a lock here */ - gpr_mu_lock(&r->mu); - gpr_atm_rel_store(&r->refst, 1); - r->shutdown = 0; - r->read_closure = CLOSURE_NOT_READY; - r->write_closure = CLOSURE_NOT_READY; - r->fd = fd; - r->polling_island = NULL; - r->freelist_next = NULL; - r->on_done_closure = NULL; - r->closed = 0; - r->released = 0; - gpr_mu_unlock(&r->mu); - return r; -} +/* The alarm system needs to be able to wakeup 'some poller' sometimes + * (specifically when a new alarm needs to be triggered earlier than the next + * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a + * case occurs. */ +/* TODO: sreek: Right now, this wakes up all pollers */ +grpc_wakeup_fd grpc_global_wakeup_fd; -static void destroy(grpc_fd *fd) { - gpr_mu_destroy(&fd->mu); - gpr_free(fd); -} +static grpc_fd *fd_freelist = NULL; +static gpr_mu fd_freelist_mu; #ifdef GRPC_FD_REF_COUNT_DEBUG #define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__) @@ -588,12 +574,33 @@ static void unref_by(grpc_fd *fd, int n) { #endif old = gpr_atm_full_fetch_add(&fd->refst, -n); if (old == n) { - freelist_fd(fd); + /* Add the fd to the freelist */ + gpr_mu_lock(&fd_freelist_mu); + fd->freelist_next = fd_freelist; + fd_freelist = fd; + grpc_iomgr_unregister_object(&fd->iomgr_object); + gpr_mu_unlock(&fd_freelist_mu); } else { GPR_ASSERT(old > n); } } +/* Increment refcount by two to avoid changing the orphan bit */ +#ifdef GRPC_FD_REF_COUNT_DEBUG +static void fd_ref(grpc_fd *fd, const char *reason, const char *file, + int line) { + ref_by(fd, 2, reason, file, line); +} + +static void fd_unref(grpc_fd *fd, const char *reason, const char *file, + int line) { + unref_by(fd, 2, reason, file, line); +} +#else +static void fd_ref(grpc_fd *fd) { ref_by(fd, 2); } +static void fd_unref(grpc_fd *fd) { unref_by(fd, 2); } +#endif + static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); } static void fd_global_shutdown(void) { @@ -602,91 +609,111 @@ static void fd_global_shutdown(void) { while (fd_freelist != NULL) { grpc_fd *fd = fd_freelist; fd_freelist = fd_freelist->freelist_next; - destroy(fd); + gpr_mu_destroy(&fd->mu); + gpr_free(fd); } gpr_mu_destroy(&fd_freelist_mu); } static grpc_fd *fd_create(int fd, const char *name) { - grpc_fd *r = alloc_fd(fd); + grpc_fd *new_fd = NULL; + + gpr_mu_lock(&fd_freelist_mu); + if (fd_freelist != NULL) { + new_fd = fd_freelist; + fd_freelist = fd_freelist->freelist_next; + } + gpr_mu_unlock(&fd_freelist_mu); + + if (new_fd == NULL) { + new_fd = gpr_malloc(sizeof(grpc_fd)); + gpr_mu_init(&new_fd->mu); + gpr_mu_init(&new_fd->pi_mu); + } - char *name2; - gpr_asprintf(&name2, "%s fd=%d", name, fd); - grpc_iomgr_register_object(&r->iomgr_object, name2); - gpr_free(name2); + /* Note: It is not really needed to get the new_fd->mu lock here. If this is a + newly created fd (or an fd we got from the freelist), no one else would be + holding a lock to it anyway. */ + gpr_mu_lock(&new_fd->mu); + + gpr_atm_rel_store(&new_fd->refst, 1); + new_fd->shutdown = false; + new_fd->read_closure = CLOSURE_NOT_READY; + new_fd->write_closure = CLOSURE_NOT_READY; + new_fd->fd = fd; + new_fd->polling_island = NULL; + new_fd->freelist_next = NULL; + new_fd->on_done_closure = NULL; + new_fd->closed = 0; + new_fd->released = false; + + gpr_mu_unlock(&new_fd->mu); + + char *fd_name; + gpr_asprintf(&fd_name, "%s fd=%d", name, fd); + grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name); + gpr_free(fd_name); #ifdef GRPC_FD_REF_COUNT_DEBUG - gpr_log(GPR_DEBUG, "FD %d %p create %s", fd, r, name); + gpr_log(GPR_DEBUG, "FD %d %p create %s", fd, r, fd_name); #endif - return r; + return new_fd; } static bool fd_is_orphaned(grpc_fd *fd) { return (gpr_atm_acq_load(&fd->refst) & 1) == 0; } -static void close_fd_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { - fd->closed = 1; - if (!fd->released) { - close(fd->fd); - } else { - /* TODO: sreek - Check for deadlocks */ - - gpr_mu_lock(&fd->pi_mu); - fd->polling_island = - polling_island_update_and_lock(fd->polling_island, 1, 0); - - polling_island_remove_fd_locked(fd->polling_island, fd); - polling_island_unref_and_unlock(fd->polling_island, 1); - - fd->polling_island = NULL; - gpr_mu_unlock(&fd->pi_mu); - } - - grpc_exec_ctx_enqueue(exec_ctx, fd->on_done_closure, true, NULL); -} - static int fd_wrapped_fd(grpc_fd *fd) { - if (fd->released || fd->closed) { - return -1; - } else { - return fd->fd; + int ret_fd = -1; + gpr_mu_lock(&fd->mu); + if (!fd->released && !fd->closed) { + ret_fd = fd->fd; } + gpr_mu_unlock(&fd->mu); + + return ret_fd; } -/* TODO: sreek - do something here with the pollset island link */ static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_closure *on_done, int *release_fd, const char *reason) { + /* TODO(sreek) In ev_poll_posix.c,the lock is acquired a little later. Why? */ + gpr_mu_lock(&fd->mu); fd->on_done_closure = on_done; + + /* If release_fd is not NULL, we should be relinquishing control of the file + descriptor fd->fd (but we still own the grpc_fd structure). */ fd->released = release_fd != NULL; if (!fd->released) { shutdown(fd->fd, SHUT_RDWR); } else { *release_fd = fd->fd; } - gpr_mu_lock(&fd->mu); - REF_BY(fd, 1, reason); /* remove active status, but keep referenced */ - close_fd_locked(exec_ctx, fd); - gpr_mu_unlock(&fd->mu); - UNREF_BY(fd, 2, reason); /* drop the reference */ -} -/* increment refcount by two to avoid changing the orphan bit */ -#ifdef GRPC_FD_REF_COUNT_DEBUG -static void fd_ref(grpc_fd *fd, const char *reason, const char *file, - int line) { - ref_by(fd, 2, reason, file, line); -} + REF_BY(fd, 1, reason); /* Remove active status, but keep referenced */ + fd->closed = 1; -static void fd_unref(grpc_fd *fd, const char *reason, const char *file, - int line) { - unref_by(fd, 2, reason, file, line); -} -#else -static void fd_ref(grpc_fd *fd) { ref_by(fd, 2); } + /* Remove the fd from the polling island: + - Update the fd->polling_island to point to the latest polling island + - Remove the fd from the polling island. Also, call close() on the file + descriptor fd->fd ONLY if we haven't relinquised control (i.e + fd->released is 'false') + - Decrement the ref count on the polling island and det fd->polling_island + to NULL */ + gpr_mu_lock(&fd->pi_mu); -static void fd_unref(grpc_fd *fd) { unref_by(fd, 2); } -#endif + fd->polling_island = polling_island_update_and_lock(fd->polling_island, 1, 0); + polling_island_remove_fd_locked(fd->polling_island, fd, !fd->released, true); + polling_island_unref_and_unlock(fd->polling_island, 1); + fd->polling_island = NULL; + + gpr_mu_unlock(&fd->pi_mu); + + grpc_exec_ctx_enqueue(exec_ctx, fd->on_done_closure, true, NULL); + + gpr_mu_unlock(&fd->mu); + UNREF_BY(fd, 2, reason); /* Drop the reference */ +} static void notify_on_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_closure **st, grpc_closure *closure) { @@ -724,11 +751,13 @@ static int set_ready_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd, } } -/* Do something here with the pollset island link (?) */ static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { gpr_mu_lock(&fd->mu); GPR_ASSERT(!fd->shutdown); - fd->shutdown = 1; + fd->shutdown = true; + + /* Flush any pending read and write closures. Since fd->shutdown is 'true' at + this point, the closures would be called with 'success = false' */ set_ready_locked(exec_ctx, fd, &fd->read_closure); set_ready_locked(exec_ctx, fd, &fd->write_closure); gpr_mu_unlock(&fd->mu); @@ -749,27 +778,39 @@ static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd, } /******************************************************************************* - * pollset_posix.c + * Pollset Definitions */ -GPR_TLS_DECL(g_current_thread_poller); -GPR_TLS_DECL(g_current_thread_worker); +static void sig_handler(int sig_num) { + /* TODO: sreek - Remove this expensive log line */ + gpr_log(GPR_INFO, "Received signal %d", sig_num); +} -/** The alarm system needs to be able to wakeup 'some poller' sometimes - * (specifically when a new alarm needs to be triggered earlier than the next - * alarm 'epoch'). - * This wakeup_fd gives us something to alert on when such a case occurs. */ -grpc_wakeup_fd grpc_global_wakeup_fd; +/* Global state management */ +static void pollset_global_init(void) { + gpr_tls_init(&g_current_thread_poller); + gpr_tls_init(&g_current_thread_worker); + grpc_wakeup_fd_init(&grpc_global_wakeup_fd); + signal(SIGUSR1, sig_handler); +} -static void remove_worker(grpc_pollset *p, grpc_pollset_worker *worker) { - worker->prev->next = worker->next; - worker->next->prev = worker->prev; +static void pollset_global_shutdown(void) { + grpc_wakeup_fd_destroy(&grpc_global_wakeup_fd); + gpr_tls_destroy(&g_current_thread_poller); + gpr_tls_destroy(&g_current_thread_worker); } +/* Return 1 if the pollset has active threads in pollset_work (pollset must + * be locked) */ static int pollset_has_workers(grpc_pollset *p) { return p->root_worker.next != &p->root_worker; } +static void remove_worker(grpc_pollset *p, grpc_pollset_worker *worker) { + worker->prev->next = worker->next; + worker->next->prev = worker->prev; +} + static grpc_pollset_worker *pop_front_worker(grpc_pollset *p) { if (pollset_has_workers(p)) { grpc_pollset_worker *w = p->root_worker.next; @@ -792,241 +833,69 @@ static void push_front_worker(grpc_pollset *p, grpc_pollset_worker *worker) { worker->prev->next = worker->next->prev = worker; } -static void pollset_kick_ext(grpc_pollset *p, - grpc_pollset_worker *specific_worker, - uint32_t flags) { - GPR_TIMER_BEGIN("pollset_kick_ext", 0); - - /* pollset->mu already held */ - if (specific_worker != NULL) { - if (specific_worker == GRPC_POLLSET_KICK_BROADCAST) { - GPR_TIMER_BEGIN("pollset_kick_ext.broadcast", 0); - for (specific_worker = p->root_worker.next; - specific_worker != &p->root_worker; - specific_worker = specific_worker->next) { - pthread_kill(specific_worker->pt_id, SIGUSR1); +/* p->mu must be held before calling this function */ +static void pollset_kick(grpc_pollset *p, + grpc_pollset_worker *specific_worker) { + GPR_TIMER_BEGIN("pollset_kick", 0); + + grpc_pollset_worker *worker = specific_worker; + if (worker != NULL) { + if (worker == GRPC_POLLSET_KICK_BROADCAST) { + GPR_TIMER_BEGIN("pollset_kick.broadcast", 0); + if (pollset_has_workers(p)) { + for (worker = p->root_worker.next; worker != &p->root_worker; + worker = worker->next) { + pthread_kill(worker->pt_id, SIGUSR1); + } + } else { + p->kicked_without_pollers = true; } - p->kicked_without_pollers = 1; - GPR_TIMER_END("pollset_kick_ext.broadcast", 0); - } else if (gpr_tls_get(&g_current_thread_worker) != - (intptr_t)specific_worker) { - GPR_TIMER_MARK("different_thread_worker", 0); - specific_worker->kicked_specifically = 1; - /* TODO (sreek): Refactor this into a separate file*/ - pthread_kill(specific_worker->pt_id, SIGUSR1); - } else if ((flags & GRPC_POLLSET_CAN_KICK_SELF) != 0) { - GPR_TIMER_MARK("kick_yoself", 0); - specific_worker->kicked_specifically = 1; - pthread_kill(specific_worker->pt_id, SIGUSR1); + GPR_TIMER_END("pollset_kick.broadcast", 0); + } else { + GPR_TIMER_MARK("kicked_specifically", 0); + worker->kicked_specifically = true; + pthread_kill(worker->pt_id, SIGUSR1); } - } else if (gpr_tls_get(&g_current_thread_poller) != (intptr_t)p) { + } else { GPR_TIMER_MARK("kick_anonymous", 0); - specific_worker = pop_front_worker(p); - if (specific_worker != NULL) { - if (gpr_tls_get(&g_current_thread_worker) == (intptr_t)specific_worker) { - GPR_TIMER_MARK("kick_anonymous_not_self", 0); - push_back_worker(p, specific_worker); - specific_worker = pop_front_worker(p); - if ((flags & GRPC_POLLSET_CAN_KICK_SELF) == 0 && - gpr_tls_get(&g_current_thread_worker) == - (intptr_t)specific_worker) { - push_back_worker(p, specific_worker); - specific_worker = NULL; - } - } - if (specific_worker != NULL) { - GPR_TIMER_MARK("finally_kick", 0); - push_back_worker(p, specific_worker); - pthread_kill(specific_worker->pt_id, SIGUSR1); - } + worker = pop_front_worker(p); + if (worker != NULL) { + GPR_TIMER_MARK("finally_kick", 0); + push_back_worker(p, worker); + pthread_kill(worker->pt_id, SIGUSR1); } else { GPR_TIMER_MARK("kicked_no_pollers", 0); - p->kicked_without_pollers = 1; + p->kicked_without_pollers = true; } } - GPR_TIMER_END("pollset_kick_ext", 0); + GPR_TIMER_END("pollset_kick", 0); } -static void pollset_kick(grpc_pollset *p, - grpc_pollset_worker *specific_worker) { - pollset_kick_ext(p, specific_worker, 0); -} +static void kick_poller(void) { grpc_wakeup_fd_wakeup(&grpc_global_wakeup_fd); } -/* global state management */ +static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { + gpr_mu_init(&pollset->mu); + *mu = &pollset->mu; -static void sig_handler(int sig_num) { - gpr_log(GPR_INFO, "Received signal %d", sig_num); -} + pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker; + pollset->kicked_without_pollers = false; -static void pollset_global_init(void) { - gpr_tls_init(&g_current_thread_poller); - gpr_tls_init(&g_current_thread_worker); - grpc_wakeup_fd_init(&grpc_global_wakeup_fd); - signal(SIGUSR1, sig_handler); -} + pollset->shutting_down = false; + pollset->finish_shutdown_called = false; + pollset->shutdown_done = NULL; -static void pollset_global_shutdown(void) { - grpc_wakeup_fd_destroy(&grpc_global_wakeup_fd); - gpr_tls_destroy(&g_current_thread_poller); - gpr_tls_destroy(&g_current_thread_worker); -} - -static void kick_poller(void) { grpc_wakeup_fd_wakeup(&grpc_global_wakeup_fd); } - -/* TODO: sreek. Try to Remove this forward declaration*/ -static void multipoll_with_epoll_pollset_create_efd(grpc_pollset *pollset); - -/* main interface */ - -static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { - gpr_mu_init(&pollset->mu); - *mu = &pollset->mu; - pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker; gpr_mu_init(&pollset->pi_mu); pollset->polling_island = NULL; - pollset->shutting_down = 0; - pollset->called_shutdown = 0; - pollset->kicked_without_pollers = 0; - - multipoll_with_epoll_pollset_create_efd(pollset); -} - -/* TODO(sreek): Maybe merge multipoll_*_destroy() with pollset_destroy() - * function */ -static void multipoll_with_epoll_pollset_destroy(grpc_pollset *pollset); - -static void pollset_destroy(grpc_pollset *pollset) { - GPR_ASSERT(!pollset_has_workers(pollset)); - - multipoll_with_epoll_pollset_destroy(pollset); - - gpr_mu_destroy(&pollset->pi_mu); - gpr_mu_destroy(&pollset->mu); -} - -/* TODO(sreek) - Do something with the pollset island link (??) */ -static void pollset_reset(grpc_pollset *pollset) { - GPR_ASSERT(pollset->shutting_down); - GPR_ASSERT(!pollset_has_workers(pollset)); - pollset->shutting_down = 0; - pollset->called_shutdown = 0; - pollset->kicked_without_pollers = 0; -} - -/* TODO (sreek): Remove multipoll_with_epoll_finish_shutdown() declaration */ -static void multipoll_with_epoll_pollset_finish_shutdown(grpc_pollset *pollset); - -static void finish_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) { - multipoll_with_epoll_pollset_finish_shutdown(pollset); - grpc_exec_ctx_enqueue(exec_ctx, pollset->shutdown_done, true, NULL); -} - -/* TODO(sreek): Remove multipoll_with_epoll_*_maybe_work_and_unlock - * declaration - */ -static void multipoll_with_epoll_pollset_maybe_work_and_unlock( - grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_pollset_worker *worker, - gpr_timespec deadline, gpr_timespec now); - -static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_pollset_worker **worker_hdl, gpr_timespec now, - gpr_timespec deadline) { - grpc_pollset_worker worker; - *worker_hdl = &worker; - - /* pollset->mu already held */ - int added_worker = 0; - int locked = 1; - int queued_work = 0; - int keep_polling = 0; - GPR_TIMER_BEGIN("pollset_work", 0); - /* this must happen before we (potentially) drop pollset->mu */ - worker.next = worker.prev = NULL; - worker.kicked_specifically = 0; - - /* TODO(sreek): Abstract this thread id stuff out into a separate file */ - worker.pt_id = pthread_self(); - /* If we're shutting down then we don't execute any extended work */ - if (pollset->shutting_down) { - GPR_TIMER_MARK("pollset_work.shutting_down", 0); - goto done; - } - /* Start polling, and keep doing so while we're being asked to - re-evaluate our pollers (this allows poll() based pollers to - ensure they don't miss wakeups) */ - keep_polling = 1; - while (keep_polling) { - keep_polling = 0; - if (!pollset->kicked_without_pollers) { - if (!added_worker) { - push_front_worker(pollset, &worker); - added_worker = 1; - gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker); - } - gpr_tls_set(&g_current_thread_poller, (intptr_t)pollset); - GPR_TIMER_BEGIN("maybe_work_and_unlock", 0); - - multipoll_with_epoll_pollset_maybe_work_and_unlock( - exec_ctx, pollset, &worker, deadline, now); - - GPR_TIMER_END("maybe_work_and_unlock", 0); - locked = 0; - gpr_tls_set(&g_current_thread_poller, 0); - } else { - GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0); - pollset->kicked_without_pollers = 0; - } - /* Finished execution - start cleaning up. - Note that we may arrive here from outside the enclosing while() loop. - In that case we won't loop though as we haven't added worker to the - worker list, which means nobody could ask us to re-evaluate polling). */ - done: - if (!locked) { - queued_work |= grpc_exec_ctx_flush(exec_ctx); - gpr_mu_lock(&pollset->mu); - locked = 1; - } - } - if (added_worker) { - remove_worker(pollset, &worker); - gpr_tls_set(&g_current_thread_worker, 0); - } - - /* check shutdown conditions */ - if (pollset->shutting_down) { - if (pollset_has_workers(pollset)) { - pollset_kick(pollset, NULL); - } else if (!pollset->called_shutdown) { - pollset->called_shutdown = 1; - gpr_mu_unlock(&pollset->mu); - finish_shutdown(exec_ctx, pollset); - grpc_exec_ctx_flush(exec_ctx); - /* Continuing to access pollset here is safe -- it is the caller's - * responsibility to not destroy when it has outstanding calls to - * pollset_work. - * TODO(dklempner): Can we refactor the shutdown logic to avoid this? */ - gpr_mu_lock(&pollset->mu); - } - } - *worker_hdl = NULL; - GPR_TIMER_END("pollset_work", 0); -} - -/* TODO: (sreek) Do something with the pollset island link */ -static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_closure *closure) { - GPR_ASSERT(!pollset->shutting_down); - pollset->shutting_down = 1; - pollset->shutdown_done = closure; - pollset_kick(pollset, GRPC_POLLSET_KICK_BROADCAST); - - if (!pollset->called_shutdown && !pollset_has_workers(pollset)) { - pollset->called_shutdown = 1; - finish_shutdown(exec_ctx, pollset); - } } +/* Convert a timespec to milliseconds: + - Very small or negative poll times are clamped to zero to do a non-blocking + poll (which becomes spin polling) + - Other small values are rounded up to one millisecond + - Longer than a millisecond polls are rounded up to the next nearest + millisecond to avoid spinning + - Infinite timeouts are converted to -1 */ static int poll_deadline_to_millis_timeout(gpr_timespec deadline, gpr_timespec now) { gpr_timespec timeout; @@ -1034,6 +903,7 @@ static int poll_deadline_to_millis_timeout(gpr_timespec deadline, if (gpr_time_cmp(deadline, gpr_inf_future(deadline.clock_type)) == 0) { return -1; } + if (gpr_time_cmp(deadline, gpr_time_add(now, gpr_time_from_micros( max_spin_polling_us, GPR_TIMESPAN))) <= 0) { @@ -1044,10 +914,6 @@ static int poll_deadline_to_millis_timeout(gpr_timespec deadline, timeout, gpr_time_from_nanos(GPR_NS_PER_MS - 1, GPR_TIMESPAN))); } -/******************************************************************************* - * pollset_multipoller_with_epoll_posix.c - */ - static void set_ready(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_closure **st) { /* only one set_ready can be active at once (but there may be a racing notify_on) */ @@ -1064,94 +930,46 @@ static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { set_ready(exec_ctx, fd, &fd->write_closure); } -/* TODO: sreek - This function multipoll_with_epoll_pollset_add_fd() and - * finally_add_fd() in ev_poll_and_epoll_posix.c */ -static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_fd *fd) { - /* TODO sreek - Check if we need to get a pollset->mu lock here */ - gpr_mu_lock(&pollset->pi_mu); - gpr_mu_lock(&fd->pi_mu); - - polling_island *pi_new = NULL; - - if (fd->polling_island == pollset->polling_island) { - pi_new = fd->polling_island; - if (pi_new == NULL) { - pi_new = polling_island_create(fd, 2); - } - } else if (fd->polling_island == NULL) { - pi_new = polling_island_update_and_lock(pollset->polling_island, 1, 1); - } else if (pollset->polling_island == NULL) { - pi_new = polling_island_update_and_lock(fd->polling_island, 1, 1); - } else { - pi_new = polling_island_merge(fd->polling_island, pollset->polling_island); - } - - fd->polling_island = pollset->polling_island = pi_new; - - gpr_mu_unlock(&fd->pi_mu); - gpr_mu_unlock(&pollset->pi_mu); -} - -/* Creates an epoll fd and initializes the pollset */ -/* TODO: This has to be called ONLY from pollset_init function. and hence it - * does not acquire any lock */ -static void multipoll_with_epoll_pollset_create_efd(grpc_pollset *pollset) { - struct epoll_event ev; - int err; - - pollset->epoll_fd = epoll_create1(EPOLL_CLOEXEC); - if (pollset->epoll_fd < 0) { - gpr_log(GPR_ERROR, "epoll_create1 failed: %s", strerror(errno)); - abort(); - } - - ev.events = (uint32_t)(EPOLLIN | EPOLLET); - ev.data.ptr = NULL; - - err = epoll_ctl(pollset->epoll_fd, EPOLL_CTL_ADD, - GRPC_WAKEUP_FD_GET_READ_FD(&grpc_global_wakeup_fd), &ev); - if (err < 0) { - gpr_log(GPR_ERROR, "epoll_ctl add for %d failed: %s", - GRPC_WAKEUP_FD_GET_READ_FD(&grpc_global_wakeup_fd), - strerror(errno)); - } -} - /* TODO(klempner): We probably want to turn this down a bit */ #define GRPC_EPOLL_MAX_EVENTS 1000 - -static void multipoll_with_epoll_pollset_maybe_work_and_unlock( - grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_pollset_worker *worker, - gpr_timespec deadline, gpr_timespec now) { +static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, + grpc_pollset *pollset, int timeout_ms, + sigset_t *sig_mask) { struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS]; - int epoll_fd = pollset->epoll_fd; + int epoll_fd; int ep_rv; - int timeout_ms; + GPR_TIMER_BEGIN("pollset_work_and_unlock", 0); + + /* We need to get the epoll_fd to wait on. The epoll_fd is in inside the + polling island pointed by pollset->polling_island. + Acquire the following locks: + - pollset->mu (which we already have) + - pollset->pi_mu + - pollset->polling_island->mu */ + gpr_mu_lock(&pollset->pi_mu); + pollset->polling_island = + polling_island_update_and_lock(pollset->polling_island, 1, 0); - /* If you want to ignore epoll's ability to sanely handle parallel pollers, - * for a more apples-to-apples performance comparison with poll, add a - * if (pollset->counter != 0) { return 0; } - * here. - */ + epoll_fd = pollset->polling_island->epoll_fd; + /* Release the locks */ + polling_island_unref_and_unlock(pollset->polling_island, 0); /* Keep the ref*/ + gpr_mu_unlock(&pollset->pi_mu); gpr_mu_unlock(&pollset->mu); - timeout_ms = poll_deadline_to_millis_timeout(deadline, now); - do { - /* The following epoll_wait never blocks; it has a timeout of 0 */ - ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms); + ep_rv = epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms, + sig_mask); + if (ep_rv < 0) { if (errno != EINTR) { - gpr_log(GPR_ERROR, "epoll_wait() failed: %s", strerror(errno)); + /* TODO (sreek) - Check for bad file descriptor error */ + gpr_log(GPR_ERROR, "epoll_pwait() failed: %s", strerror(errno)); } } else { int i; for (i = 0; i < ep_rv; ++i) { grpc_fd *fd = ep_ev[i].data.ptr; - /* TODO(klempner): We might want to consider making err and pri - * separate events */ int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP); int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI); int write_ev = ep_ev[i].events & EPOLLOUT; @@ -1168,17 +986,179 @@ static void multipoll_with_epoll_pollset_maybe_work_and_unlock( } } } while (ep_rv == GRPC_EPOLL_MAX_EVENTS); + + GPR_TIMER_END("pollset_work_and_unlock", 0); +} + +/* Release the reference to pollset->polling_island and set it to NULL. + pollset->mu must be held */ +static void pollset_release_polling_island_locked(grpc_pollset *pollset) { + gpr_mu_lock(&pollset->pi_mu); + if (pollset->polling_island) { + pollset->polling_island = + polling_island_update_and_lock(pollset->polling_island, 1, 0); + polling_island_unref_and_unlock(pollset->polling_island, 1); + pollset->polling_island = NULL; + } + gpr_mu_unlock(&pollset->pi_mu); +} + +static void finish_shutdown_locked(grpc_exec_ctx *exec_ctx, + grpc_pollset *pollset) { + /* The pollset cannot have any workers if we are at this stage */ + GPR_ASSERT(!pollset_has_workers(pollset)); + + pollset->finish_shutdown_called = true; + pollset_release_polling_island_locked(pollset); + + grpc_exec_ctx_enqueue(exec_ctx, pollset->shutdown_done, true, NULL); +} + +/* pollset->mu lock must be held by the caller before calling this */ +static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_closure *closure) { + GPR_TIMER_BEGIN("pollset_shutdown", 0); + GPR_ASSERT(!pollset->shutting_down); + pollset->shutting_down = true; + pollset->shutdown_done = closure; + pollset_kick(pollset, GRPC_POLLSET_KICK_BROADCAST); + + /* If the pollset has any workers, we cannot call finish_shutdown_locked() + because it would release the underlying polling island. In such a case, we + let the last worker call finish_shutdown_locked() from pollset_work() */ + if (!pollset_has_workers(pollset)) { + GPR_ASSERT(!pollset->finish_shutdown_called); + GPR_TIMER_MARK("pollset_shutdown.finish_shutdown_locked", 0); + finish_shutdown_locked(exec_ctx, pollset); + } + GPR_TIMER_END("pollset_shutdown", 0); } -static void multipoll_with_epoll_pollset_finish_shutdown( - grpc_pollset *pollset) {} +/* TODO(sreek) Is pollset_shutdown() guranteed to be called before this? */ +static void pollset_destroy(grpc_pollset *pollset) { + GPR_ASSERT(!pollset_has_workers(pollset)); + gpr_mu_destroy(&pollset->pi_mu); + gpr_mu_destroy(&pollset->mu); +} -static void multipoll_with_epoll_pollset_destroy(grpc_pollset *pollset) { - close(pollset->epoll_fd); +static void pollset_reset(grpc_pollset *pollset) { + GPR_ASSERT(pollset->shutting_down); + GPR_ASSERT(!pollset_has_workers(pollset)); + pollset->shutting_down = false; + pollset->finish_shutdown_called = false; + pollset->kicked_without_pollers = false; + /* TODO(sreek) - Should pollset->shutdown closure be set to NULL here? */ + pollset_release_polling_island_locked(pollset); +} + +/* pollset->mu lock must be held by the caller before calling this. + The function pollset_work() may temporarily release the lock (pollset->mu) + during the course of its execution but it will always re-acquire the lock and + ensure that it is held by the time the function returns */ +static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_pollset_worker **worker_hdl, gpr_timespec now, + gpr_timespec deadline) { + GPR_TIMER_BEGIN("pollset_work", 0); + + int timeout_ms = poll_deadline_to_millis_timeout(deadline, now); + + sigset_t new_mask; + sigset_t orig_mask; + + grpc_pollset_worker worker; + worker.next = worker.prev = NULL; + worker.kicked_specifically = 0; + worker.pt_id = pthread_self(); + + *worker_hdl = &worker; + + if (pollset->kicked_without_pollers) { + /* If the pollset was kicked without pollers, pretend that the current + worker got the kick and skip polling. A kick indicates that there is some + work that needs attention like an event on the completion queue or an + alarm */ + GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0); + pollset->kicked_without_pollers = 0; + } else if (!pollset->shutting_down) { + sigemptyset(&new_mask); + sigaddset(&new_mask, SIGUSR1); + pthread_sigmask(SIG_BLOCK, &new_mask, &orig_mask); + sigdelset(&orig_mask, SIGUSR1); + + push_front_worker(pollset, &worker); + + pollset_work_and_unlock(exec_ctx, pollset, timeout_ms, &orig_mask); + grpc_exec_ctx_flush(exec_ctx); + + gpr_mu_lock(&pollset->mu); + remove_worker(pollset, &worker); + } + + /* If we are the last worker on the pollset (i.e pollset_has_workers() is + false at this point) and the pollset is shutting down, we may have to + finish the shutdown process by calling finish_shutdown_locked(). + See pollset_shutdown() for more details. + + Note: Continuing to access pollset here is safe; it is the caller's + responsibility to not destroy a pollset when it has outstanding calls to + pollset_work() */ + if (pollset->shutting_down && !pollset_has_workers(pollset) && + !pollset->finish_shutdown_called) { + GPR_TIMER_MARK("pollset_work.finish_shutdown_locked", 0); + finish_shutdown_locked(exec_ctx, pollset); + + gpr_mu_unlock(&pollset->mu); + grpc_exec_ctx_flush(exec_ctx); + gpr_mu_lock(&pollset->mu); + } + + *worker_hdl = NULL; + GPR_TIMER_END("pollset_work", 0); +} + +static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_fd *fd) { + /* TODO sreek - Check if we need to get a pollset->mu lock here */ + gpr_mu_lock(&pollset->pi_mu); + gpr_mu_lock(&fd->pi_mu); + + polling_island *pi_new = NULL; + + /* 1) If fd->polling_island and pollset->polling_island are both non-NULL and + * equal, do nothing. + * 2) If fd->polling_island and pollset->polling_island are both NULL, create + * a new polling island (with a refcount of 2) and make the polling_island + * fields in both fd and pollset to point to the new island + * 3) If one of fd->polling_island or pollset->polling_island is NULL, update + * the NULL polling_island field to point to the non-NULL polling_island + * field (ensure that the refcount on the polling island is incremented by + * 1 to account for the newly added reference) + * 4) Finally, if fd->polling_island and pollset->polling_island are non-NULL + * and different, merge both the polling islands and update the + * polling_island fields in both fd and pollset to point to the merged + * polling island. + */ + if (fd->polling_island == pollset->polling_island) { + pi_new = fd->polling_island; + if (pi_new == NULL) { + pi_new = polling_island_create(fd, 2); + } + } else if (fd->polling_island == NULL) { + pi_new = polling_island_update_and_lock(pollset->polling_island, 1, 1); + } else if (pollset->polling_island == NULL) { + pi_new = polling_island_update_and_lock(fd->polling_island, 1, 1); + } else { + pi_new = polling_island_merge(fd->polling_island, pollset->polling_island); + } + + fd->polling_island = pollset->polling_island = pi_new; + + gpr_mu_unlock(&fd->pi_mu); + gpr_mu_unlock(&pollset->pi_mu); } /******************************************************************************* - * pollset_set_posix.c + * Pollset-set Definitions */ static grpc_pollset_set *pollset_set_create(void) { @@ -1200,6 +1180,45 @@ static void pollset_set_destroy(grpc_pollset_set *pollset_set) { gpr_free(pollset_set); } +static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pollset_set, grpc_fd *fd) { + size_t i; + gpr_mu_lock(&pollset_set->mu); + if (pollset_set->fd_count == pollset_set->fd_capacity) { + pollset_set->fd_capacity = GPR_MAX(8, 2 * pollset_set->fd_capacity); + pollset_set->fds = gpr_realloc( + pollset_set->fds, pollset_set->fd_capacity * sizeof(*pollset_set->fds)); + } + GRPC_FD_REF(fd, "pollset_set"); + pollset_set->fds[pollset_set->fd_count++] = fd; + for (i = 0; i < pollset_set->pollset_count; i++) { + pollset_add_fd(exec_ctx, pollset_set->pollsets[i], fd); + } + for (i = 0; i < pollset_set->pollset_set_count; i++) { + pollset_set_add_fd(exec_ctx, pollset_set->pollset_sets[i], fd); + } + gpr_mu_unlock(&pollset_set->mu); +} + +static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pollset_set, grpc_fd *fd) { + size_t i; + gpr_mu_lock(&pollset_set->mu); + for (i = 0; i < pollset_set->fd_count; i++) { + if (pollset_set->fds[i] == fd) { + pollset_set->fd_count--; + GPR_SWAP(grpc_fd *, pollset_set->fds[i], + pollset_set->fds[pollset_set->fd_count]); + GRPC_FD_UNREF(fd, "pollset_set"); + break; + } + } + for (i = 0; i < pollset_set->pollset_set_count; i++) { + pollset_set_del_fd(exec_ctx, pollset_set->pollset_sets[i], fd); + } + gpr_mu_unlock(&pollset_set->mu); +} + static void pollset_set_add_pollset(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pollset_set, grpc_pollset *pollset) { @@ -1281,47 +1300,8 @@ static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx, gpr_mu_unlock(&bag->mu); } -static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pollset_set, grpc_fd *fd) { - size_t i; - gpr_mu_lock(&pollset_set->mu); - if (pollset_set->fd_count == pollset_set->fd_capacity) { - pollset_set->fd_capacity = GPR_MAX(8, 2 * pollset_set->fd_capacity); - pollset_set->fds = gpr_realloc( - pollset_set->fds, pollset_set->fd_capacity * sizeof(*pollset_set->fds)); - } - GRPC_FD_REF(fd, "pollset_set"); - pollset_set->fds[pollset_set->fd_count++] = fd; - for (i = 0; i < pollset_set->pollset_count; i++) { - pollset_add_fd(exec_ctx, pollset_set->pollsets[i], fd); - } - for (i = 0; i < pollset_set->pollset_set_count; i++) { - pollset_set_add_fd(exec_ctx, pollset_set->pollset_sets[i], fd); - } - gpr_mu_unlock(&pollset_set->mu); -} - -static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pollset_set, grpc_fd *fd) { - size_t i; - gpr_mu_lock(&pollset_set->mu); - for (i = 0; i < pollset_set->fd_count; i++) { - if (pollset_set->fds[i] == fd) { - pollset_set->fd_count--; - GPR_SWAP(grpc_fd *, pollset_set->fds[i], - pollset_set->fds[pollset_set->fd_count]); - GRPC_FD_UNREF(fd, "pollset_set"); - break; - } - } - for (i = 0; i < pollset_set->pollset_set_count; i++) { - pollset_set_del_fd(exec_ctx, pollset_set->pollset_sets[i], fd); - } - gpr_mu_unlock(&pollset_set->mu); -} - /******************************************************************************* - * event engine binding + * Event engine binding */ static void shutdown_engine(void) { -- cgit v1.2.3 From 73ef9154024290e86a3566a574c04992afc93d00 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Fri, 3 Jun 2016 15:25:05 -0700 Subject: epoll polling strategy now points to the new code --- src/core/lib/iomgr/ev_epoll_linux.c | 4 ---- src/core/lib/iomgr/ev_posix.c | 4 ++-- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index ce42a9e7ce..ab4224b2d5 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -788,16 +788,12 @@ static void sig_handler(int sig_num) { /* Global state management */ static void pollset_global_init(void) { - gpr_tls_init(&g_current_thread_poller); - gpr_tls_init(&g_current_thread_worker); grpc_wakeup_fd_init(&grpc_global_wakeup_fd); signal(SIGUSR1, sig_handler); } static void pollset_global_shutdown(void) { grpc_wakeup_fd_destroy(&grpc_global_wakeup_fd); - gpr_tls_destroy(&g_current_thread_poller); - gpr_tls_destroy(&g_current_thread_worker); } /* Return 1 if the pollset has active threads in pollset_work (pollset must diff --git a/src/core/lib/iomgr/ev_posix.c b/src/core/lib/iomgr/ev_posix.c index 96399ef837..b6b113aed3 100644 --- a/src/core/lib/iomgr/ev_posix.c +++ b/src/core/lib/iomgr/ev_posix.c @@ -44,7 +44,7 @@ #include #include -#include "src/core/lib/iomgr/ev_epoll_posix.h" +#include "src/core/lib/iomgr/ev_epoll_linux.h" #include "src/core/lib/iomgr/ev_poll_posix.h" #include "src/core/lib/support/env.h" @@ -62,7 +62,7 @@ typedef struct { } event_engine_factory; static const event_engine_factory g_factories[] = { - {"poll", grpc_init_poll_posix}, {"epoll", grpc_init_epoll_posix}, + {"poll", grpc_init_poll_posix}, {"epoll", grpc_init_epoll_linux}, }; static void add(const char *beg, const char *end, char ***ss, size_t *ns) { -- cgit v1.2.3 From 88ee12fbe98685e736366d6a151a10ed103f8979 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Fri, 3 Jun 2016 19:26:48 -0700 Subject: Handle pollsets and fds witn no polling islands and fix locking bug in pollset_add_fd --- src/core/lib/iomgr/ev_epoll_linux.c | 83 ++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 38 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index ab4224b2d5..0fb1ccfa0f 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -701,12 +701,14 @@ static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - Decrement the ref count on the polling island and det fd->polling_island to NULL */ gpr_mu_lock(&fd->pi_mu); - - fd->polling_island = polling_island_update_and_lock(fd->polling_island, 1, 0); - polling_island_remove_fd_locked(fd->polling_island, fd, !fd->released, true); - polling_island_unref_and_unlock(fd->polling_island, 1); - fd->polling_island = NULL; - + if (fd->polling_island != NULL) { + fd->polling_island = + polling_island_update_and_lock(fd->polling_island, 1, 0); + polling_island_remove_fd_locked(fd->polling_island, fd, !fd->released, + true); + polling_island_unref_and_unlock(fd->polling_island, 1); + fd->polling_island = NULL; + } gpr_mu_unlock(&fd->pi_mu); grpc_exec_ctx_enqueue(exec_ctx, fd->on_done_closure, true, NULL); @@ -926,13 +928,12 @@ static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { set_ready(exec_ctx, fd, &fd->write_closure); } -/* TODO(klempner): We probably want to turn this down a bit */ #define GRPC_EPOLL_MAX_EVENTS 1000 static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, int timeout_ms, sigset_t *sig_mask) { struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS]; - int epoll_fd; + int epoll_fd = -1; int ep_rv; GPR_TIMER_BEGIN("pollset_work_and_unlock", 0); @@ -943,45 +944,49 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, - pollset->pi_mu - pollset->polling_island->mu */ gpr_mu_lock(&pollset->pi_mu); - pollset->polling_island = - polling_island_update_and_lock(pollset->polling_island, 1, 0); - epoll_fd = pollset->polling_island->epoll_fd; + if (pollset->polling_island != NULL) { + pollset->polling_island = + polling_island_update_and_lock(pollset->polling_island, 1, 0); + epoll_fd = pollset->polling_island->epoll_fd; + gpr_mu_unlock(&pollset->polling_island->mu); + } - /* Release the locks */ - polling_island_unref_and_unlock(pollset->polling_island, 0); /* Keep the ref*/ gpr_mu_unlock(&pollset->pi_mu); gpr_mu_unlock(&pollset->mu); - do { - ep_rv = epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms, - sig_mask); + /* If epoll_fd == -1, this is a blank pollset and does not have any fds yet */ + if (epoll_fd != -1) { + do { + ep_rv = epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms, + sig_mask); - if (ep_rv < 0) { - if (errno != EINTR) { - /* TODO (sreek) - Check for bad file descriptor error */ - gpr_log(GPR_ERROR, "epoll_pwait() failed: %s", strerror(errno)); - } - } else { - int i; - for (i = 0; i < ep_rv; ++i) { - grpc_fd *fd = ep_ev[i].data.ptr; - int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP); - int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI); - int write_ev = ep_ev[i].events & EPOLLOUT; - if (fd == NULL) { - grpc_wakeup_fd_consume_wakeup(&grpc_global_wakeup_fd); - } else { - if (read_ev || cancel) { - fd_become_readable(exec_ctx, fd); - } - if (write_ev || cancel) { - fd_become_writable(exec_ctx, fd); + if (ep_rv < 0) { + if (errno != EINTR) { + /* TODO (sreek) - Check for bad file descriptor error */ + gpr_log(GPR_ERROR, "epoll_pwait() failed: %s", strerror(errno)); + } + } else { + int i; + for (i = 0; i < ep_rv; ++i) { + grpc_fd *fd = ep_ev[i].data.ptr; + int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP); + int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI); + int write_ev = ep_ev[i].events & EPOLLOUT; + if (fd == NULL) { + grpc_wakeup_fd_consume_wakeup(&grpc_global_wakeup_fd); + } else { + if (read_ev || cancel) { + fd_become_readable(exec_ctx, fd); + } + if (write_ev || cancel) { + fd_become_writable(exec_ctx, fd); + } } } } - } - } while (ep_rv == GRPC_EPOLL_MAX_EVENTS); + } while (ep_rv == GRPC_EPOLL_MAX_EVENTS); + } GPR_TIMER_END("pollset_work_and_unlock", 0); } @@ -1141,8 +1146,10 @@ static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, } } else if (fd->polling_island == NULL) { pi_new = polling_island_update_and_lock(pollset->polling_island, 1, 1); + gpr_mu_unlock(&pi_new->mu); } else if (pollset->polling_island == NULL) { pi_new = polling_island_update_and_lock(fd->polling_island, 1, 1); + gpr_mu_unlock(&pi_new->mu); } else { pi_new = polling_island_merge(fd->polling_island, pollset->polling_island); } -- cgit v1.2.3 From 79a6233bef501e1f51250351a55abc61cc024827 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Sat, 4 Jun 2016 14:01:03 -0700 Subject: Fix a few bugs in ev_epoll_linux.c 1. pollset_add_fd: Add fd to epoll set if fd->polling_island == NULL 2. close(fd) in fd_orphan instead of polling_island_remove_fd_locked() since fd->polling_island may be NULL 3. If pollset work() is interrupted, do a zero timeout epoll_wait(). pollset_work may be called without a polling island --- src/core/lib/iomgr/ev_epoll_linux.c | 125 ++++++++++++++++++++++-------------- 1 file changed, 78 insertions(+), 47 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index 0fb1ccfa0f..3aa26109f2 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -72,9 +72,14 @@ struct grpc_fd { gpr_atm refst; gpr_mu mu; + + /* Indicates that the fd is shutdown and that any pending read/write closures + should fail */ bool shutdown; - int closed; - bool released; + + /* The fd is either closed or we relinquished control of it. In either cases, + this indicates that the 'fd' on this structure is no longer valid */ + bool orphaned; grpc_closure *read_closure; grpc_closure *write_closure; @@ -251,16 +256,13 @@ static void polling_island_remove_all_fds_locked(polling_island *pi, /* The caller is expected to hold pi->mu lock before calling this function */ static void polling_island_remove_fd_locked(polling_island *pi, grpc_fd *fd, - bool close_fd, bool remove_fd_ref) { + bool is_fd_closed) { int err; size_t i; - /* Calling close() on the fd will automatically remove it from the epoll set. - If not calling close(), the fd must be explicitly removed from the epoll - set */ - if (close_fd) { - close(fd->fd); - } else { + /* If fd is already closed, then it would have been automatically been removed + from the epoll set */ + if (!is_fd_closed) { err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, fd->fd, NULL); if (err < 0 && errno != ENOENT) { gpr_log(GPR_ERROR, "epoll_ctl delete for fd: %d failed with error; %s", @@ -271,9 +273,7 @@ static void polling_island_remove_fd_locked(polling_island *pi, grpc_fd *fd, for (i = 0; i < pi->fd_cnt; i++) { if (pi->fds[i] == fd) { pi->fds[i] = pi->fds[--pi->fd_cnt]; - if (remove_fd_ref) { - GRPC_FD_UNREF(fd, "polling_island"); - } + GRPC_FD_UNREF(fd, "polling_island"); break; } } @@ -644,8 +644,7 @@ static grpc_fd *fd_create(int fd, const char *name) { new_fd->polling_island = NULL; new_fd->freelist_next = NULL; new_fd->on_done_closure = NULL; - new_fd->closed = 0; - new_fd->released = false; + new_fd->orphaned = false; gpr_mu_unlock(&new_fd->mu); @@ -666,7 +665,7 @@ static bool fd_is_orphaned(grpc_fd *fd) { static int fd_wrapped_fd(grpc_fd *fd) { int ret_fd = -1; gpr_mu_lock(&fd->mu); - if (!fd->released && !fd->closed) { + if (!fd->orphaned) { ret_fd = fd->fd; } gpr_mu_unlock(&fd->mu); @@ -678,34 +677,35 @@ static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_closure *on_done, int *release_fd, const char *reason) { /* TODO(sreek) In ev_poll_posix.c,the lock is acquired a little later. Why? */ + bool is_fd_closed = false; gpr_mu_lock(&fd->mu); fd->on_done_closure = on_done; /* If release_fd is not NULL, we should be relinquishing control of the file descriptor fd->fd (but we still own the grpc_fd structure). */ - fd->released = release_fd != NULL; - if (!fd->released) { - shutdown(fd->fd, SHUT_RDWR); - } else { + if (release_fd != NULL) { *release_fd = fd->fd; + } else { + close(fd->fd); + is_fd_closed = true; } - REF_BY(fd, 1, reason); /* Remove active status, but keep referenced */ - fd->closed = 1; + fd->orphaned = true; + + /* Remove the active status but keep referenced. We want this grpc_fd struct + to be alive (and not added to freelist) until the end of this function */ + REF_BY(fd, 1, reason); /* Remove the fd from the polling island: - Update the fd->polling_island to point to the latest polling island - - Remove the fd from the polling island. Also, call close() on the file - descriptor fd->fd ONLY if we haven't relinquised control (i.e - fd->released is 'false') - - Decrement the ref count on the polling island and det fd->polling_island - to NULL */ + - Remove the fd from the polling island. + - Remove a ref to the polling island and set fd->polling_island to NULL */ gpr_mu_lock(&fd->pi_mu); if (fd->polling_island != NULL) { fd->polling_island = polling_island_update_and_lock(fd->polling_island, 1, 0); - polling_island_remove_fd_locked(fd->polling_island, fd, !fd->released, - true); + polling_island_remove_fd_locked(fd->polling_island, fd, is_fd_closed); + polling_island_unref_and_unlock(fd->polling_island, 1); fd->polling_island = NULL; } @@ -839,17 +839,20 @@ static void pollset_kick(grpc_pollset *p, grpc_pollset_worker *worker = specific_worker; if (worker != NULL) { if (worker == GRPC_POLLSET_KICK_BROADCAST) { - GPR_TIMER_BEGIN("pollset_kick.broadcast", 0); + gpr_log(GPR_DEBUG, "pollset_kick: broadcast!"); if (pollset_has_workers(p)) { + GPR_TIMER_BEGIN("pollset_kick.broadcast", 0); for (worker = p->root_worker.next; worker != &p->root_worker; worker = worker->next) { pthread_kill(worker->pt_id, SIGUSR1); } } else { + gpr_log(GPR_DEBUG, "pollset_kick: (broadcast) Kicked without pollers"); p->kicked_without_pollers = true; } GPR_TIMER_END("pollset_kick.broadcast", 0); } else { + gpr_log(GPR_DEBUG, "pollset_kick: kicked kicked_specifically"); GPR_TIMER_MARK("kicked_specifically", 0); worker->kicked_specifically = true; pthread_kill(worker->pt_id, SIGUSR1); @@ -860,9 +863,11 @@ static void pollset_kick(grpc_pollset *p, if (worker != NULL) { GPR_TIMER_MARK("finally_kick", 0); push_back_worker(p, worker); + gpr_log(GPR_DEBUG, "pollset_kick: anonymous kick"); pthread_kill(worker->pt_id, SIGUSR1); } else { GPR_TIMER_MARK("kicked_no_pollers", 0); + gpr_log(GPR_DEBUG, "pollset_kick: kicked without pollers"); p->kicked_without_pollers = true; } } @@ -935,6 +940,7 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS]; int epoll_fd = -1; int ep_rv; + gpr_log(GPR_DEBUG, "pollset_work_and_unlock: Entering.."); GPR_TIMER_BEGIN("pollset_work_and_unlock", 0); /* We need to get the epoll_fd to wait on. The epoll_fd is in inside the @@ -949,6 +955,16 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, pollset->polling_island = polling_island_update_and_lock(pollset->polling_island, 1, 0); epoll_fd = pollset->polling_island->epoll_fd; + if (pollset->polling_island->fd_cnt == 0) { + gpr_log(GPR_DEBUG, "pollset_work_and_unlock: epoll_fd: %d, No other fds", + epoll_fd); + } + for (size_t i = 0; i < pollset->polling_island->fd_cnt; i++) { + gpr_log(GPR_DEBUG, + "pollset_work_and_unlock: epoll_fd: %d, fd_count: %d, fd[%d]: %d", + epoll_fd, pollset->polling_island->fd_cnt, i, + pollset->polling_island->fds[i]->fd); + } gpr_mu_unlock(&pollset->polling_island->mu); } @@ -958,36 +974,47 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, /* If epoll_fd == -1, this is a blank pollset and does not have any fds yet */ if (epoll_fd != -1) { do { + gpr_timespec before_epoll = gpr_now(GPR_CLOCK_PRECISE); + gpr_log(GPR_DEBUG, "pollset_work_and_unlock: epoll_wait()...."); ep_rv = epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms, sig_mask); + gpr_timespec after_epoll = gpr_now(GPR_CLOCK_PRECISE); + int dur = gpr_time_to_millis(gpr_time_sub(after_epoll, before_epoll)); + gpr_log(GPR_DEBUG, + "pollset_work_and_unlock: DONE epoll_wait() : %d ms, ep_rv: %d", + dur, ep_rv); if (ep_rv < 0) { if (errno != EINTR) { /* TODO (sreek) - Check for bad file descriptor error */ gpr_log(GPR_ERROR, "epoll_pwait() failed: %s", strerror(errno)); + } else { + gpr_log(GPR_DEBUG, "pollset_work_and_unlock: 0-timeout epoll_wait()"); + ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0); + gpr_log(GPR_DEBUG, "pollset_work_and_unlock: ep_rv: %d", ep_rv); } - } else { - int i; - for (i = 0; i < ep_rv; ++i) { - grpc_fd *fd = ep_ev[i].data.ptr; - int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP); - int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI); - int write_ev = ep_ev[i].events & EPOLLOUT; - if (fd == NULL) { - grpc_wakeup_fd_consume_wakeup(&grpc_global_wakeup_fd); - } else { - if (read_ev || cancel) { - fd_become_readable(exec_ctx, fd); - } - if (write_ev || cancel) { - fd_become_writable(exec_ctx, fd); - } + } + + int i; + for (i = 0; i < ep_rv; ++i) { + grpc_fd *fd = ep_ev[i].data.ptr; + int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP); + int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI); + int write_ev = ep_ev[i].events & EPOLLOUT; + if (fd == NULL) { + grpc_wakeup_fd_consume_wakeup(&grpc_global_wakeup_fd); + } else { + if (read_ev || cancel) { + fd_become_readable(exec_ctx, fd); + } + if (write_ev || cancel) { + fd_become_writable(exec_ctx, fd); } } } } while (ep_rv == GRPC_EPOLL_MAX_EVENTS); } - + gpr_log(GPR_DEBUG, "pollset_work_and_unlock: Leaving.."); GPR_TIMER_END("pollset_work_and_unlock", 0); } @@ -1060,7 +1087,7 @@ static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_pollset_worker **worker_hdl, gpr_timespec now, gpr_timespec deadline) { GPR_TIMER_BEGIN("pollset_work", 0); - + gpr_log(GPR_DEBUG, "pollset_work: enter"); int timeout_ms = poll_deadline_to_millis_timeout(deadline, now); sigset_t new_mask; @@ -1079,6 +1106,7 @@ static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, work that needs attention like an event on the completion queue or an alarm */ GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0); + gpr_log(GPR_INFO, "pollset_work: kicked without pollers.."); pollset->kicked_without_pollers = 0; } else if (!pollset->shutting_down) { sigemptyset(&new_mask); @@ -1113,12 +1141,14 @@ static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, gpr_mu_lock(&pollset->mu); } + gpr_log(GPR_DEBUG, "pollset_work(): leaving"); *worker_hdl = NULL; GPR_TIMER_END("pollset_work", 0); } static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_fd *fd) { + gpr_log(GPR_DEBUG, "pollset_add_fd: pollset: %p, fd: %d", pollset, fd->fd); /* TODO sreek - Check if we need to get a pollset->mu lock here */ gpr_mu_lock(&pollset->pi_mu); gpr_mu_lock(&fd->pi_mu); @@ -1146,6 +1176,7 @@ static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, } } else if (fd->polling_island == NULL) { pi_new = polling_island_update_and_lock(pollset->polling_island, 1, 1); + polling_island_add_fds_locked(pollset->polling_island, &fd, 1, true); gpr_mu_unlock(&pi_new->mu); } else if (pollset->polling_island == NULL) { pi_new = polling_island_update_and_lock(fd->polling_island, 1, 1); -- cgit v1.2.3 From 4c11a20bf0b6b1d64a2800bcb06f76404294aa84 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Mon, 6 Jun 2016 09:23:25 -0700 Subject: Remove unused files --- BUILD | 6 - Makefile | 38 - binding.gyp | 1 - build.yaml | 14 - config.m4 | 1 - gRPC.podspec | 3 - grpc.gemspec | 2 - package.xml | 2 - src/core/lib/iomgr/ctiller_ev_epoll_linux.c | 461 -------- src/core/lib/iomgr/ev_epoll_linux.c | 2 +- src/core/lib/iomgr/ev_epoll_posix.c | 1209 -------------------- src/core/lib/iomgr/ev_epoll_posix.h | 41 - src/python/grpcio/grpc_core_dependencies.py | 1 - test/core/network_benchmarks/epoll_test.c | 263 ----- test/core/network_benchmarks/low_level_ping_pong.c | 2 - tools/doxygen/Doxyfile.core.internal | 2 - tools/run_tests/sources_and_headers.json | 19 - tools/run_tests/tests.json | 15 - vsprojects/vcxproj/grpc/grpc.vcxproj | 3 - vsprojects/vcxproj/grpc/grpc.vcxproj.filters | 6 - .../vcxproj/grpc_unsecure/grpc_unsecure.vcxproj | 3 - .../grpc_unsecure/grpc_unsecure.vcxproj.filters | 6 - 22 files changed, 1 insertion(+), 2099 deletions(-) delete mode 100644 src/core/lib/iomgr/ctiller_ev_epoll_linux.c delete mode 100644 src/core/lib/iomgr/ev_epoll_posix.c delete mode 100644 src/core/lib/iomgr/ev_epoll_posix.h delete mode 100644 test/core/network_benchmarks/epoll_test.c (limited to 'src/core/lib') diff --git a/BUILD b/BUILD index a32352ebb3..70354a8810 100644 --- a/BUILD +++ b/BUILD @@ -179,7 +179,6 @@ cc_library( "src/core/lib/iomgr/endpoint.h", "src/core/lib/iomgr/endpoint_pair.h", "src/core/lib/iomgr/ev_epoll_linux.h", - "src/core/lib/iomgr/ev_epoll_posix.h", "src/core/lib/iomgr/ev_poll_posix.h", "src/core/lib/iomgr/ev_posix.h", "src/core/lib/iomgr/exec_ctx.h", @@ -324,7 +323,6 @@ cc_library( "src/core/lib/iomgr/endpoint_pair_posix.c", "src/core/lib/iomgr/endpoint_pair_windows.c", "src/core/lib/iomgr/ev_epoll_linux.c", - "src/core/lib/iomgr/ev_epoll_posix.c", "src/core/lib/iomgr/ev_poll_posix.c", "src/core/lib/iomgr/ev_posix.c", "src/core/lib/iomgr/exec_ctx.c", @@ -551,7 +549,6 @@ cc_library( "src/core/lib/iomgr/endpoint.h", "src/core/lib/iomgr/endpoint_pair.h", "src/core/lib/iomgr/ev_epoll_linux.h", - "src/core/lib/iomgr/ev_epoll_posix.h", "src/core/lib/iomgr/ev_poll_posix.h", "src/core/lib/iomgr/ev_posix.h", "src/core/lib/iomgr/exec_ctx.h", @@ -673,7 +670,6 @@ cc_library( "src/core/lib/iomgr/endpoint_pair_posix.c", "src/core/lib/iomgr/endpoint_pair_windows.c", "src/core/lib/iomgr/ev_epoll_linux.c", - "src/core/lib/iomgr/ev_epoll_posix.c", "src/core/lib/iomgr/ev_poll_posix.c", "src/core/lib/iomgr/ev_posix.c", "src/core/lib/iomgr/exec_ctx.c", @@ -1367,7 +1363,6 @@ objc_library( "src/core/lib/iomgr/endpoint_pair_posix.c", "src/core/lib/iomgr/endpoint_pair_windows.c", "src/core/lib/iomgr/ev_epoll_linux.c", - "src/core/lib/iomgr/ev_epoll_posix.c", "src/core/lib/iomgr/ev_poll_posix.c", "src/core/lib/iomgr/ev_posix.c", "src/core/lib/iomgr/exec_ctx.c", @@ -1573,7 +1568,6 @@ objc_library( "src/core/lib/iomgr/endpoint.h", "src/core/lib/iomgr/endpoint_pair.h", "src/core/lib/iomgr/ev_epoll_linux.h", - "src/core/lib/iomgr/ev_epoll_posix.h", "src/core/lib/iomgr/ev_poll_posix.h", "src/core/lib/iomgr/ev_posix.h", "src/core/lib/iomgr/exec_ctx.h", diff --git a/Makefile b/Makefile index 235f32d9a3..1c83aec21e 100644 --- a/Makefile +++ b/Makefile @@ -903,7 +903,6 @@ dns_resolver_connectivity_test: $(BINDIR)/$(CONFIG)/dns_resolver_connectivity_te dns_resolver_test: $(BINDIR)/$(CONFIG)/dns_resolver_test dualstack_socket_test: $(BINDIR)/$(CONFIG)/dualstack_socket_test endpoint_pair_test: $(BINDIR)/$(CONFIG)/endpoint_pair_test -epoll_test: $(BINDIR)/$(CONFIG)/epoll_test fd_conservation_posix_test: $(BINDIR)/$(CONFIG)/fd_conservation_posix_test fd_posix_test: $(BINDIR)/$(CONFIG)/fd_posix_test fling_client: $(BINDIR)/$(CONFIG)/fling_client @@ -1235,7 +1234,6 @@ buildtests_c: privatelibs_c \ $(BINDIR)/$(CONFIG)/dns_resolver_test \ $(BINDIR)/$(CONFIG)/dualstack_socket_test \ $(BINDIR)/$(CONFIG)/endpoint_pair_test \ - $(BINDIR)/$(CONFIG)/epoll_test \ $(BINDIR)/$(CONFIG)/fd_conservation_posix_test \ $(BINDIR)/$(CONFIG)/fd_posix_test \ $(BINDIR)/$(CONFIG)/fling_client \ @@ -1499,8 +1497,6 @@ test_c: buildtests_c $(Q) $(BINDIR)/$(CONFIG)/dualstack_socket_test || ( echo test dualstack_socket_test failed ; exit 1 ) $(E) "[RUN] Testing endpoint_pair_test" $(Q) $(BINDIR)/$(CONFIG)/endpoint_pair_test || ( echo test endpoint_pair_test failed ; exit 1 ) - $(E) "[RUN] Testing epoll_test" - $(Q) $(BINDIR)/$(CONFIG)/epoll_test || ( echo test epoll_test failed ; exit 1 ) $(E) "[RUN] Testing fd_conservation_posix_test" $(Q) $(BINDIR)/$(CONFIG)/fd_conservation_posix_test || ( echo test fd_conservation_posix_test failed ; exit 1 ) $(E) "[RUN] Testing fd_posix_test" @@ -2491,7 +2487,6 @@ LIBGRPC_SRC = \ src/core/lib/iomgr/endpoint_pair_posix.c \ src/core/lib/iomgr/endpoint_pair_windows.c \ src/core/lib/iomgr/ev_epoll_linux.c \ - src/core/lib/iomgr/ev_epoll_posix.c \ src/core/lib/iomgr/ev_poll_posix.c \ src/core/lib/iomgr/ev_posix.c \ src/core/lib/iomgr/exec_ctx.c \ @@ -2847,7 +2842,6 @@ LIBGRPC_UNSECURE_SRC = \ src/core/lib/iomgr/endpoint_pair_posix.c \ src/core/lib/iomgr/endpoint_pair_windows.c \ src/core/lib/iomgr/ev_epoll_linux.c \ - src/core/lib/iomgr/ev_epoll_posix.c \ src/core/lib/iomgr/ev_poll_posix.c \ src/core/lib/iomgr/ev_posix.c \ src/core/lib/iomgr/exec_ctx.c \ @@ -6581,38 +6575,6 @@ endif endif -EPOLL_TEST_SRC = \ - test/core/network_benchmarks/epoll_test.c \ - -EPOLL_TEST_OBJS = $(addprefix $(OBJDIR)/$(CONFIG)/, $(addsuffix .o, $(basename $(EPOLL_TEST_SRC)))) -ifeq ($(NO_SECURE),true) - -# You can't build secure targets if you don't have OpenSSL. - -$(BINDIR)/$(CONFIG)/epoll_test: openssl_dep_error - -else - - - -$(BINDIR)/$(CONFIG)/epoll_test: $(EPOLL_TEST_OBJS) $(LIBDIR)/$(CONFIG)/libgrpc_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc.a $(LIBDIR)/$(CONFIG)/libgpr_test_util.a $(LIBDIR)/$(CONFIG)/libgpr.a - $(E) "[LD] Linking $@" - $(Q) mkdir -p `dirname $@` - $(Q) $(LD) $(LDFLAGS) $(EPOLL_TEST_OBJS) $(LIBDIR)/$(CONFIG)/libgrpc_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc.a $(LIBDIR)/$(CONFIG)/libgpr_test_util.a $(LIBDIR)/$(CONFIG)/libgpr.a $(LDLIBS) $(LDLIBS_SECURE) -o $(BINDIR)/$(CONFIG)/epoll_test - -endif - -$(OBJDIR)/$(CONFIG)/test/core/network_benchmarks/epoll_test.o: $(LIBDIR)/$(CONFIG)/libgrpc_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc.a $(LIBDIR)/$(CONFIG)/libgpr_test_util.a $(LIBDIR)/$(CONFIG)/libgpr.a - -deps_epoll_test: $(EPOLL_TEST_OBJS:.o=.dep) - -ifneq ($(NO_SECURE),true) -ifneq ($(NO_DEPS),true) --include $(EPOLL_TEST_OBJS:.o=.dep) -endif -endif - - FD_CONSERVATION_POSIX_TEST_SRC = \ test/core/iomgr/fd_conservation_posix_test.c \ diff --git a/binding.gyp b/binding.gyp index 41e1b5bb41..255998aafd 100644 --- a/binding.gyp +++ b/binding.gyp @@ -582,7 +582,6 @@ 'src/core/lib/iomgr/endpoint_pair_posix.c', 'src/core/lib/iomgr/endpoint_pair_windows.c', 'src/core/lib/iomgr/ev_epoll_linux.c', - 'src/core/lib/iomgr/ev_epoll_posix.c', 'src/core/lib/iomgr/ev_poll_posix.c', 'src/core/lib/iomgr/ev_posix.c', 'src/core/lib/iomgr/exec_ctx.c', diff --git a/build.yaml b/build.yaml index db9787546a..75c7a76bdb 100644 --- a/build.yaml +++ b/build.yaml @@ -166,7 +166,6 @@ filegroups: - src/core/lib/iomgr/endpoint.h - src/core/lib/iomgr/endpoint_pair.h - src/core/lib/iomgr/ev_epoll_linux.h - - src/core/lib/iomgr/ev_epoll_posix.h - src/core/lib/iomgr/ev_poll_posix.h - src/core/lib/iomgr/ev_posix.h - src/core/lib/iomgr/exec_ctx.h @@ -242,7 +241,6 @@ filegroups: - src/core/lib/iomgr/endpoint_pair_posix.c - src/core/lib/iomgr/endpoint_pair_windows.c - src/core/lib/iomgr/ev_epoll_linux.c - - src/core/lib/iomgr/ev_epoll_posix.c - src/core/lib/iomgr/ev_poll_posix.c - src/core/lib/iomgr/ev_posix.c - src/core/lib/iomgr/exec_ctx.c @@ -1321,18 +1319,6 @@ targets: - grpc - gpr_test_util - gpr -- name: epoll_test - build: test - language: c - src: - - test/core/network_benchmarks/epoll_test.c - deps: - - grpc_test_util - - grpc - - gpr_test_util - - gpr - platforms: - - linux - name: fd_conservation_posix_test build: test language: c diff --git a/config.m4 b/config.m4 index 4308295afd..e2d1c00b6e 100644 --- a/config.m4 +++ b/config.m4 @@ -101,7 +101,6 @@ if test "$PHP_GRPC" != "no"; then src/core/lib/iomgr/endpoint_pair_posix.c \ src/core/lib/iomgr/endpoint_pair_windows.c \ src/core/lib/iomgr/ev_epoll_linux.c \ - src/core/lib/iomgr/ev_epoll_posix.c \ src/core/lib/iomgr/ev_poll_posix.c \ src/core/lib/iomgr/ev_posix.c \ src/core/lib/iomgr/exec_ctx.c \ diff --git a/gRPC.podspec b/gRPC.podspec index de55880125..736ae98b54 100644 --- a/gRPC.podspec +++ b/gRPC.podspec @@ -182,7 +182,6 @@ Pod::Spec.new do |s| 'src/core/lib/iomgr/endpoint.h', 'src/core/lib/iomgr/endpoint_pair.h', 'src/core/lib/iomgr/ev_epoll_linux.h', - 'src/core/lib/iomgr/ev_epoll_posix.h', 'src/core/lib/iomgr/ev_poll_posix.h', 'src/core/lib/iomgr/ev_posix.h', 'src/core/lib/iomgr/exec_ctx.h', @@ -361,7 +360,6 @@ Pod::Spec.new do |s| 'src/core/lib/iomgr/endpoint_pair_posix.c', 'src/core/lib/iomgr/endpoint_pair_windows.c', 'src/core/lib/iomgr/ev_epoll_linux.c', - 'src/core/lib/iomgr/ev_epoll_posix.c', 'src/core/lib/iomgr/ev_poll_posix.c', 'src/core/lib/iomgr/ev_posix.c', 'src/core/lib/iomgr/exec_ctx.c', @@ -551,7 +549,6 @@ Pod::Spec.new do |s| 'src/core/lib/iomgr/endpoint.h', 'src/core/lib/iomgr/endpoint_pair.h', 'src/core/lib/iomgr/ev_epoll_linux.h', - 'src/core/lib/iomgr/ev_epoll_posix.h', 'src/core/lib/iomgr/ev_poll_posix.h', 'src/core/lib/iomgr/ev_posix.h', 'src/core/lib/iomgr/exec_ctx.h', diff --git a/grpc.gemspec b/grpc.gemspec index 54ae2eb68d..01b2890493 100755 --- a/grpc.gemspec +++ b/grpc.gemspec @@ -191,7 +191,6 @@ Gem::Specification.new do |s| s.files += %w( src/core/lib/iomgr/endpoint.h ) s.files += %w( src/core/lib/iomgr/endpoint_pair.h ) s.files += %w( src/core/lib/iomgr/ev_epoll_linux.h ) - s.files += %w( src/core/lib/iomgr/ev_epoll_posix.h ) s.files += %w( src/core/lib/iomgr/ev_poll_posix.h ) s.files += %w( src/core/lib/iomgr/ev_posix.h ) s.files += %w( src/core/lib/iomgr/exec_ctx.h ) @@ -340,7 +339,6 @@ Gem::Specification.new do |s| s.files += %w( src/core/lib/iomgr/endpoint_pair_posix.c ) s.files += %w( src/core/lib/iomgr/endpoint_pair_windows.c ) s.files += %w( src/core/lib/iomgr/ev_epoll_linux.c ) - s.files += %w( src/core/lib/iomgr/ev_epoll_posix.c ) s.files += %w( src/core/lib/iomgr/ev_poll_posix.c ) s.files += %w( src/core/lib/iomgr/ev_posix.c ) s.files += %w( src/core/lib/iomgr/exec_ctx.c ) diff --git a/package.xml b/package.xml index d8e82a8bc3..ba6e11fadc 100644 --- a/package.xml +++ b/package.xml @@ -198,7 +198,6 @@ - @@ -347,7 +346,6 @@ - diff --git a/src/core/lib/iomgr/ctiller_ev_epoll_linux.c b/src/core/lib/iomgr/ctiller_ev_epoll_linux.c deleted file mode 100644 index 23c20a77aa..0000000000 --- a/src/core/lib/iomgr/ctiller_ev_epoll_linux.c +++ /dev/null @@ -1,461 +0,0 @@ -/* - * - * Copyright 2015-2016, Google Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Google Inc. nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include "src/core/lib/iomgr/ev_epoll_linux.h" - -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "src/core/lib/iomgr/iomgr_internal.h" - -/* TODO(sreek) Remove this file */ - - -//////////////////////////////////////////////////////////////////////////////// -// Definitions - -#define STATE_NOT_READY ((gpr_atm)0) -#define STATE_READY ((gpr_atm)1) - -typedef enum { POLLABLE_FD, POLLABLE_EPOLL_SET } pollable_type; - -typedef struct { - pollable_type type; - int fd; - grpc_iomgr_object iomgr_object; -} pollable_object; - -typedef struct polling_island { - pollable_object pollable; - gpr_mu mu; - int refs; - grpc_fd *only_fd; - struct polling_island *became; - struct polling_island *next; -} polling_island; - -struct grpc_fd { - pollable_object pollable; - - // each event atomic is a tri state: - // STATE_NOT_READY - no event received, nobody waiting for it either - // STATE_READY - event received, nobody waiting for it - // closure pointer - no event received, upper layer is waiting for it - gpr_atm on_readable; - gpr_atm on_writable; - - // mutex guarding set_ready & shutdown state - gpr_mu set_ready_mu; - bool shutdown; - - // mutex protecting polling_island - gpr_mu polling_island_mu; - // current polling island - polling_island *polling_island; - - grpc_fd *next_free; -}; - -struct grpc_pollset_worker {}; - -struct grpc_pollset { - gpr_mu mu; - // current polling island - polling_island *polling_island; -}; - -//////////////////////////////////////////////////////////////////////////////// -// Polling island implementation - -static gpr_mu g_pi_freelist_mu; -static polling_island *g_first_free_pi; - -static void add_pollable_to_epoll_set(pollable_object *pollable, int epoll_set, - uint32_t events) { - struct epoll_event ev; - ev.events = events; - ev.data.ptr = pollable; - int err = epoll_ctl(epoll_set, EPOLL_CTL_ADD, pollable->fd, &ev); - if (err < 0) { - gpr_log(GPR_ERROR, "epoll_ctl add for %d faild: %s", pollable->fd, - strerror(errno)); - } -} - -static void add_fd_to_epoll_set(grpc_fd *fd, int epoll_set) { - add_pollable_to_epoll_set(&fd->pollable, epoll_set, - EPOLLIN | EPOLLOUT | EPOLLET); -} - -static void add_island_to_epoll_set(polling_island *pi, int epoll_set) { - add_pollable_to_epoll_set(&pi->pollable, epoll_set, EPOLLIN | EPOLLET); -} - -static polling_island *polling_island_create(grpc_fd *initial_fd) { - polling_island *r = NULL; - gpr_mu_lock(&g_pi_freelist_mu); - if (g_first_free_pi == NULL) { - r = gpr_malloc(sizeof(*r)); - r->pollable.type = POLLABLE_EPOLL_SET; - gpr_mu_init(&r->mu); - } else { - r = g_first_free_pi; - g_first_free_pi = r->next; - } - gpr_mu_unlock(&g_pi_freelist_mu); - - r->pollable.fd = epoll_create1(EPOLL_CLOEXEC); - GPR_ASSERT(r->pollable.fd >= 0); - - gpr_mu_lock(&r->mu); - r->only_fd = initial_fd; - r->refs = 2; // creation of a polling island => a referencing pollset & fd - gpr_mu_unlock(&r->mu); - - add_fd_to_epoll_set(initial_fd, r->pollable.fd); - return r; -} - -static void polling_island_delete(polling_island *p) { - gpr_mu_lock(&g_pi_freelist_mu); - p->next = g_first_free_pi; - g_first_free_pi = p; - gpr_mu_unlock(&g_pi_freelist_mu); -} - -static polling_island *polling_island_add(polling_island *p, grpc_fd *fd) { - gpr_mu_lock(&p->mu); - p->only_fd = NULL; - p->refs++; // new fd picks up a ref - gpr_mu_unlock(&p->mu); - - add_fd_to_epoll_set(fd, p->pollable.fd); - - return p; -} - -static void add_siblings_to(polling_island *siblings, polling_island *dest) { - polling_island *sibling_tail = dest; - while (sibling_tail->next != NULL) { - sibling_tail = sibling_tail->next; - } - sibling_tail->next = siblings; -} - -static polling_island *polling_island_merge(polling_island *a, - polling_island *b) { - GPR_ASSERT(a != b); - polling_island *out; - - gpr_mu_lock(&GPR_MIN(a, b)->mu); - gpr_mu_lock(&GPR_MAX(a, b)->mu); - - GPR_ASSERT(a->became == NULL); - GPR_ASSERT(b->became == NULL); - - if (a->only_fd == NULL && b->only_fd == NULL) { - b->became = a; - add_siblings_to(b, a); - add_island_to_epoll_set(b, a->pollable.fd); - out = a; - } else if (a->only_fd == NULL) { - GPR_ASSERT(b->only_fd != NULL); - add_fd_to_epoll_set(b->only_fd, a->pollable.fd); - b->became = a; - out = a; - } else if (b->only_fd == NULL) { - GPR_ASSERT(a->only_fd != NULL); - add_fd_to_epoll_set(a->only_fd, b->pollable.fd); - a->became = b; - out = b; - } else { - add_fd_to_epoll_set(b->only_fd, a->pollable.fd); - a->only_fd = NULL; - b->only_fd = NULL; - b->became = a; - out = a; - } - - gpr_mu_unlock(&a->mu); - gpr_mu_unlock(&b->mu); - - return out; -} - -static polling_island *polling_island_update_and_lock(polling_island *p) { - gpr_mu_lock(&p->mu); - if (p->became != NULL) { - do { - polling_island *from = p; - p = p->became; - gpr_mu_lock(&p->mu); - bool delete_from = 0 == --from->refs; - p->refs++; - gpr_mu_unlock(&from->mu); - if (delete_from) { - polling_island_delete(from); - } - } while (p->became != NULL); - } - return p; -} - -static polling_island *polling_island_ref(polling_island *p) { - gpr_mu_lock(&p->mu); - gpr_mu_unlock(&p->mu); - return p; -} - -static void polling_island_drop(polling_island *p) {} - -static polling_island *polling_island_update(polling_island *p, - int updating_owner_count) { - p = polling_island_update_and_lock(p); - GPR_ASSERT(p->refs != 0); - p->refs += updating_owner_count; - gpr_mu_unlock(&p->mu); - return p; -} - -//////////////////////////////////////////////////////////////////////////////// -// FD implementation - -static gpr_mu g_fd_freelist_mu; -static grpc_fd *g_first_free_fd; - -static grpc_fd *fd_create(int fd, const char *name) { - grpc_fd *r = NULL; - gpr_mu_lock(&g_fd_freelist_mu); - if (g_first_free_fd == NULL) { - r = gpr_malloc(sizeof(*r)); - r->pollable.type = POLLABLE_FD; - gpr_atm_rel_store(&r->on_readable, 0); - gpr_atm_rel_store(&r->on_writable, 0); - gpr_mu_init(&r->polling_island_mu); - gpr_mu_init(&r->set_ready_mu); - } else { - r = g_first_free_fd; - g_first_free_fd = r->next_free; - } - gpr_mu_unlock(&g_fd_freelist_mu); - - r->pollable.fd = fd; - grpc_iomgr_register_object(&r->pollable.iomgr_object, name); - r->next_free = NULL; - return r; -} - -static int fd_wrapped_fd(grpc_fd *fd) { return fd->pollable.fd; } - -static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure *on_done, int *release_fd, - const char *reason) { - if (release_fd != NULL) { - *release_fd = fd->pollable.fd; - } else { - close(fd->pollable.fd); - } - - gpr_mu_lock(&fd->polling_island_mu); - if (fd->polling_island != NULL) { - polling_island_drop(fd->polling_island); - } - gpr_mu_unlock(&fd->polling_island_mu); - - gpr_mu_lock(&g_fd_freelist_mu); - fd->next_free = g_first_free_fd; - g_first_free_fd = fd; - grpc_iomgr_unregister_object(&fd->pollable.iomgr_object); - gpr_mu_unlock(&g_fd_freelist_mu); - - grpc_exec_ctx_enqueue(exec_ctx, on_done, true, NULL); -} - -static void notify_on(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure *closure, gpr_atm *state) { - if (gpr_atm_acq_cas(state, STATE_NOT_READY, (gpr_atm)closure)) { - // state was not ready, and is now the closure - we're done */ - } else { - // cas failed - we MUST be in STATE_READY (can't request two notifications - // for the same event) - // flip back to not ready, enqueue the closure directly - GPR_ASSERT(gpr_atm_rel_cas(state, STATE_READY, STATE_NOT_READY)); - grpc_exec_ctx_enqueue(exec_ctx, closure, true, NULL); - } -} - -static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure *closure) { - notify_on(exec_ctx, fd, closure, &fd->on_readable); -} - -static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure *closure) { - notify_on(exec_ctx, fd, closure, &fd->on_readable); -} - -static void destroy_fd_freelist(void) { - while (g_first_free_fd) { - grpc_fd *next = g_first_free_fd->next_free; - gpr_mu_destroy(&g_first_free_fd->polling_island_mu); - gpr_free(next); - g_first_free_fd = next; - } -} - -static void set_ready_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - gpr_atm *state) { - if (gpr_atm_acq_cas(state, STATE_NOT_READY, STATE_READY)) { - // state was not ready, and is now ready - we're done - } else { - // cas failed - either there's a closure queued which we should consume OR - // the state was already STATE_READY - gpr_atm cur_state = gpr_atm_acq_load(state); - if (cur_state != STATE_READY) { - // state wasn't STATE_READY - it *must* have been a closure - // since it's illegal to ask for notification twice, it's safe to assume - // that we'll resume being the closure - GPR_ASSERT(gpr_atm_rel_cas(state, cur_state, STATE_NOT_READY)); - grpc_exec_ctx_enqueue(exec_ctx, (grpc_closure *)cur_state, !fd->shutdown, - NULL); - } - } -} - -static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { - gpr_mu_lock(&fd->set_ready_mu); - GPR_ASSERT(!fd->shutdown); - fd->shutdown = 1; - set_ready_locked(exec_ctx, fd, &fd->on_readable); - set_ready_locked(exec_ctx, fd, &fd->on_writable); - gpr_mu_unlock(&fd->set_ready_mu); -} - -//////////////////////////////////////////////////////////////////////////////// -// Pollset implementation - -static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { - gpr_mu_init(&pollset->mu); - *mu = &pollset->mu; - pollset->polling_island = NULL; -} - -static void pollset_destroy(grpc_pollset *pollset) { - gpr_mu_destroy(&pollset->mu); - if (pollset->polling_island) { - polling_island_drop(pollset->polling_island); - } -} - -static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - struct grpc_fd *fd) { - gpr_mu_lock(&pollset->mu); - gpr_mu_lock(&fd->polling_island_mu); - - polling_island *new; - - if (fd->polling_island == NULL) { - if (pollset->polling_island == NULL) { - new = polling_island_create(fd); - } else { - new = polling_island_add(pollset->polling_island, fd); - } - } else if (pollset->polling_island == NULL) { - new = polling_island_ref(fd->polling_island); - } else if (pollset->polling_island != fd->polling_island) { - new = polling_island_merge(pollset->polling_island, fd->polling_island); - } else { - new = polling_island_update(pollset->polling_island, 1); - } - - fd->polling_island = pollset->polling_island = new; - - gpr_mu_unlock(&fd->polling_island_mu); - gpr_mu_unlock(&pollset->mu); -} - -//////////////////////////////////////////////////////////////////////////////// -// Engine binding - -static void shutdown_engine(void) { destroy_fd_freelist(); } - -static const grpc_event_engine_vtable vtable = { - .pollset_size = sizeof(grpc_pollset), - - .fd_create = fd_create, - .fd_wrapped_fd = fd_wrapped_fd, - .fd_orphan = fd_orphan, - .fd_shutdown = fd_shutdown, - .fd_notify_on_read = fd_notify_on_read, - .fd_notify_on_write = fd_notify_on_write, - - .pollset_init = pollset_init, - .pollset_shutdown = pollset_shutdown, - .pollset_reset = pollset_reset, - .pollset_destroy = pollset_destroy, - .pollset_work = pollset_work, - .pollset_kick = pollset_kick, - .pollset_add_fd = pollset_add_fd, - - .pollset_set_create = pollset_set_create, - .pollset_set_destroy = pollset_set_destroy, - .pollset_set_add_pollset = pollset_set_add_pollset, - .pollset_set_del_pollset = pollset_set_del_pollset, - .pollset_set_add_pollset_set = pollset_set_add_pollset_set, - .pollset_set_del_pollset_set = pollset_set_del_pollset_set, - .pollset_set_add_fd = pollset_set_add_fd, - .pollset_set_del_fd = pollset_set_del_fd, - - .kick_poller = kick_poller, - - .shutdown_engine = shutdown_engine, -}; - -static bool is_epoll_available(void) { - abort(); - return false; -} - -const grpc_event_engine_vtable *grpc_init_poll_posix(void) { - if (!is_epoll_available()) { - return NULL; - } - return &vtable; -} diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index 3aa26109f2..61106faef9 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -35,7 +35,7 @@ #ifdef GPR_POSIX_SOCKET -#include "src/core/lib/iomgr/ev_epoll_posix.h" +#include "src/core/lib/iomgr/ev_epoll_linux.h" #include #include diff --git a/src/core/lib/iomgr/ev_epoll_posix.c b/src/core/lib/iomgr/ev_epoll_posix.c deleted file mode 100644 index 5abd5b2a94..0000000000 --- a/src/core/lib/iomgr/ev_epoll_posix.c +++ /dev/null @@ -1,1209 +0,0 @@ -/* - * - * Copyright 2016, Google Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Google Inc. nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include - -#ifdef GPR_POSIX_SOCKET - -#include "src/core/lib/iomgr/ev_epoll_posix.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "src/core/lib/iomgr/ev_posix.h" -#include "src/core/lib/iomgr/iomgr_internal.h" -#include "src/core/lib/iomgr/wakeup_fd_posix.h" -#include "src/core/lib/profiling/timers.h" -#include "src/core/lib/support/block_annotate.h" - - -/******************************************************************************* - * FD declarations - */ - -struct grpc_fd { - int fd; - /* refst format: - bit0: 1=active/0=orphaned - bit1-n: refcount - meaning that mostly we ref by two to avoid altering the orphaned bit, - and just unref by 1 when we're ready to flag the object as orphaned */ - gpr_atm refst; - - gpr_mu mu; - int shutdown; - int closed; - int released; - - grpc_closure *read_closure; - grpc_closure *write_closure; - - struct grpc_fd *freelist_next; - - grpc_closure *on_done_closure; - - grpc_iomgr_object iomgr_object; -}; - -/* Return 1 if this fd is orphaned, 0 otherwise */ -static bool fd_is_orphaned(grpc_fd *fd); - -/* Reference counting for fds */ -/*#define GRPC_FD_REF_COUNT_DEBUG*/ -#ifdef GRPC_FD_REF_COUNT_DEBUG -static void fd_ref(grpc_fd *fd, const char *reason, const char *file, int line); -static void fd_unref(grpc_fd *fd, const char *reason, const char *file, - int line); -#define GRPC_FD_REF(fd, reason) fd_ref(fd, reason, __FILE__, __LINE__) -#define GRPC_FD_UNREF(fd, reason) fd_unref(fd, reason, __FILE__, __LINE__) -#else -static void fd_ref(grpc_fd *fd); -static void fd_unref(grpc_fd *fd); -#define GRPC_FD_REF(fd, reason) fd_ref(fd) -#define GRPC_FD_UNREF(fd, reason) fd_unref(fd) -#endif - -static void fd_global_init(void); -static void fd_global_shutdown(void); - -#define CLOSURE_NOT_READY ((grpc_closure *)0) -#define CLOSURE_READY ((grpc_closure *)1) - -/******************************************************************************* - * pollset declarations - */ - -typedef struct grpc_cached_wakeup_fd { - grpc_wakeup_fd fd; - struct grpc_cached_wakeup_fd *next; -} grpc_cached_wakeup_fd; - -struct grpc_pollset_worker { - grpc_cached_wakeup_fd *wakeup_fd; - int reevaluate_polling_on_wakeup; - int kicked_specifically; - pthread_t pt_id; - struct grpc_pollset_worker *next; - struct grpc_pollset_worker *prev; -}; - -struct grpc_pollset { - gpr_mu mu; - grpc_pollset_worker root_worker; - int shutting_down; - int called_shutdown; - int kicked_without_pollers; - grpc_closure *shutdown_done; - - int epoll_fd; - - /* Local cache of eventfds for workers */ - grpc_cached_wakeup_fd *local_wakeup_cache; -}; - -/* Add an fd to a pollset */ -static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - struct grpc_fd *fd); - -static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pollset_set, grpc_fd *fd); - -/* Convert a timespec to milliseconds: - - very small or negative poll times are clamped to zero to do a - non-blocking poll (which becomes spin polling) - - other small values are rounded up to one millisecond - - longer than a millisecond polls are rounded up to the next nearest - millisecond to avoid spinning - - infinite timeouts are converted to -1 */ -static int poll_deadline_to_millis_timeout(gpr_timespec deadline, - gpr_timespec now); - -/* Allow kick to wakeup the currently polling worker */ -#define GRPC_POLLSET_CAN_KICK_SELF 1 -/* Force the wakee to repoll when awoken */ -#define GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP 2 -/* As per pollset_kick, with an extended set of flags (defined above) - -- mostly for fd_posix's use. */ -static void pollset_kick_ext(grpc_pollset *p, - grpc_pollset_worker *specific_worker, - uint32_t flags); - -/* turn a pollset into a multipoller: platform specific */ -typedef void (*platform_become_multipoller_type)(grpc_exec_ctx *exec_ctx, - grpc_pollset *pollset, - struct grpc_fd **fds, - size_t fd_count); - -/* Return 1 if the pollset has active threads in pollset_work (pollset must - * be locked) */ -static int pollset_has_workers(grpc_pollset *pollset); - -static void remove_fd_from_all_epoll_sets(int fd); - -/******************************************************************************* - * pollset_set definitions - */ - -struct grpc_pollset_set { - gpr_mu mu; - - size_t pollset_count; - size_t pollset_capacity; - grpc_pollset **pollsets; - - size_t pollset_set_count; - size_t pollset_set_capacity; - struct grpc_pollset_set **pollset_sets; - - size_t fd_count; - size_t fd_capacity; - grpc_fd **fds; -}; - -/******************************************************************************* - * fd_posix.c - */ - -/* We need to keep a freelist not because of any concerns of malloc performance - * but instead so that implementations with multiple threads in (for example) - * epoll_wait deal with the race between pollset removal and incoming poll - * notifications. - * - * The problem is that the poller ultimately holds a reference to this - * object, so it is very difficult to know when is safe to free it, at least - * without some expensive synchronization. - * - * If we keep the object freelisted, in the worst case losing this race just - * becomes a spurious read notification on a reused fd. - */ -/* TODO(klempner): We could use some form of polling generation count to know - * when these are safe to free. */ -/* TODO(klempner): Consider disabling freelisting if we don't have multiple - * threads in poll on the same fd */ -/* TODO(klempner): Batch these allocations to reduce fragmentation */ -static grpc_fd *fd_freelist = NULL; -static gpr_mu fd_freelist_mu; - -static void freelist_fd(grpc_fd *fd) { - gpr_mu_lock(&fd_freelist_mu); - fd->freelist_next = fd_freelist; - fd_freelist = fd; - grpc_iomgr_unregister_object(&fd->iomgr_object); - gpr_mu_unlock(&fd_freelist_mu); -} - -static grpc_fd *alloc_fd(int fd) { - grpc_fd *r = NULL; - gpr_mu_lock(&fd_freelist_mu); - if (fd_freelist != NULL) { - r = fd_freelist; - fd_freelist = fd_freelist->freelist_next; - } - gpr_mu_unlock(&fd_freelist_mu); - if (r == NULL) { - r = gpr_malloc(sizeof(grpc_fd)); - gpr_mu_init(&r->mu); - } - - gpr_mu_lock(&r->mu); - gpr_atm_rel_store(&r->refst, 1); - r->shutdown = 0; - r->read_closure = CLOSURE_NOT_READY; - r->write_closure = CLOSURE_NOT_READY; - r->fd = fd; - r->freelist_next = NULL; - r->on_done_closure = NULL; - r->closed = 0; - r->released = 0; - gpr_mu_unlock(&r->mu); - return r; -} - -static void destroy(grpc_fd *fd) { - gpr_mu_destroy(&fd->mu); - gpr_free(fd); -} - -#ifdef GRPC_FD_REF_COUNT_DEBUG -#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__) -#define UNREF_BY(fd, n, reason) unref_by(fd, n, reason, __FILE__, __LINE__) -static void ref_by(grpc_fd *fd, int n, const char *reason, const char *file, - int line) { - gpr_log(GPR_DEBUG, "FD %d %p ref %d %d -> %d [%s; %s:%d]", fd->fd, fd, n, - gpr_atm_no_barrier_load(&fd->refst), - gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line); -#else -#define REF_BY(fd, n, reason) ref_by(fd, n) -#define UNREF_BY(fd, n, reason) unref_by(fd, n) -static void ref_by(grpc_fd *fd, int n) { -#endif - GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0); -} - -#ifdef GRPC_FD_REF_COUNT_DEBUG -static void unref_by(grpc_fd *fd, int n, const char *reason, const char *file, - int line) { - gpr_atm old; - gpr_log(GPR_DEBUG, "FD %d %p unref %d %d -> %d [%s; %s:%d]", fd->fd, fd, n, - gpr_atm_no_barrier_load(&fd->refst), - gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line); -#else -static void unref_by(grpc_fd *fd, int n) { - gpr_atm old; -#endif - old = gpr_atm_full_fetch_add(&fd->refst, -n); - if (old == n) { - freelist_fd(fd); - } else { - GPR_ASSERT(old > n); - } -} - -static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); } - -static void fd_global_shutdown(void) { - gpr_mu_lock(&fd_freelist_mu); - gpr_mu_unlock(&fd_freelist_mu); - while (fd_freelist != NULL) { - grpc_fd *fd = fd_freelist; - fd_freelist = fd_freelist->freelist_next; - destroy(fd); - } - gpr_mu_destroy(&fd_freelist_mu); -} - -static grpc_fd *fd_create(int fd, const char *name) { - grpc_fd *r = alloc_fd(fd); - char *name2; - gpr_asprintf(&name2, "%s fd=%d", name, fd); - grpc_iomgr_register_object(&r->iomgr_object, name2); - gpr_free(name2); -#ifdef GRPC_FD_REF_COUNT_DEBUG - gpr_log(GPR_DEBUG, "FD %d %p create %s", fd, r, name); -#endif - return r; -} - -static bool fd_is_orphaned(grpc_fd *fd) { - return (gpr_atm_acq_load(&fd->refst) & 1) == 0; -} - -static void close_fd_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { - fd->closed = 1; - if (!fd->released) { - close(fd->fd); - } else { - remove_fd_from_all_epoll_sets(fd->fd); - } - grpc_exec_ctx_enqueue(exec_ctx, fd->on_done_closure, true, NULL); -} - -static int fd_wrapped_fd(grpc_fd *fd) { - if (fd->released || fd->closed) { - return -1; - } else { - return fd->fd; - } -} - -static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure *on_done, int *release_fd, - const char *reason) { - fd->on_done_closure = on_done; - fd->released = release_fd != NULL; - if (!fd->released) { - shutdown(fd->fd, SHUT_RDWR); - } else { - *release_fd = fd->fd; - } - gpr_mu_lock(&fd->mu); - REF_BY(fd, 1, reason); /* remove active status, but keep referenced */ - close_fd_locked(exec_ctx, fd); - gpr_mu_unlock(&fd->mu); - UNREF_BY(fd, 2, reason); /* drop the reference */ -} - -/* increment refcount by two to avoid changing the orphan bit */ -#ifdef GRPC_FD_REF_COUNT_DEBUG -static void fd_ref(grpc_fd *fd, const char *reason, const char *file, - int line) { - ref_by(fd, 2, reason, file, line); -} - -static void fd_unref(grpc_fd *fd, const char *reason, const char *file, - int line) { - unref_by(fd, 2, reason, file, line); -} -#else -static void fd_ref(grpc_fd *fd) { ref_by(fd, 2); } - -static void fd_unref(grpc_fd *fd) { unref_by(fd, 2); } -#endif - -static void notify_on_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure **st, grpc_closure *closure) { - if (*st == CLOSURE_NOT_READY) { - /* not ready ==> switch to a waiting state by setting the closure */ - *st = closure; - } else if (*st == CLOSURE_READY) { - /* already ready ==> queue the closure to run immediately */ - *st = CLOSURE_NOT_READY; - grpc_exec_ctx_enqueue(exec_ctx, closure, !fd->shutdown, NULL); - } else { - /* upcallptr was set to a different closure. This is an error! */ - gpr_log(GPR_ERROR, - "User called a notify_on function with a previous callback still " - "pending"); - abort(); - } -} - -/* returns 1 if state becomes not ready */ -static int set_ready_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure **st) { - if (*st == CLOSURE_READY) { - /* duplicate ready ==> ignore */ - return 0; - } else if (*st == CLOSURE_NOT_READY) { - /* not ready, and not waiting ==> flag ready */ - *st = CLOSURE_READY; - return 0; - } else { - /* waiting ==> queue closure */ - grpc_exec_ctx_enqueue(exec_ctx, *st, !fd->shutdown, NULL); - *st = CLOSURE_NOT_READY; - return 1; - } -} - -static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { - gpr_mu_lock(&fd->mu); - GPR_ASSERT(!fd->shutdown); - fd->shutdown = 1; - set_ready_locked(exec_ctx, fd, &fd->read_closure); - set_ready_locked(exec_ctx, fd, &fd->write_closure); - gpr_mu_unlock(&fd->mu); -} - -static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure *closure) { - gpr_mu_lock(&fd->mu); - notify_on_locked(exec_ctx, fd, &fd->read_closure, closure); - gpr_mu_unlock(&fd->mu); -} - -static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure *closure) { - gpr_mu_lock(&fd->mu); - notify_on_locked(exec_ctx, fd, &fd->write_closure, closure); - gpr_mu_unlock(&fd->mu); -} - -/******************************************************************************* - * pollset_posix.c - */ - -GPR_TLS_DECL(g_current_thread_poller); -GPR_TLS_DECL(g_current_thread_worker); - -/** The alarm system needs to be able to wakeup 'some poller' sometimes - * (specifically when a new alarm needs to be triggered earlier than the next - * alarm 'epoch'). - * This wakeup_fd gives us something to alert on when such a case occurs. */ -grpc_wakeup_fd grpc_global_wakeup_fd; - -static void remove_worker(grpc_pollset *p, grpc_pollset_worker *worker) { - worker->prev->next = worker->next; - worker->next->prev = worker->prev; -} - -static int pollset_has_workers(grpc_pollset *p) { - return p->root_worker.next != &p->root_worker; -} - -static grpc_pollset_worker *pop_front_worker(grpc_pollset *p) { - if (pollset_has_workers(p)) { - grpc_pollset_worker *w = p->root_worker.next; - remove_worker(p, w); - return w; - } else { - return NULL; - } -} - -static void push_back_worker(grpc_pollset *p, grpc_pollset_worker *worker) { - worker->next = &p->root_worker; - worker->prev = worker->next->prev; - worker->prev->next = worker->next->prev = worker; -} - -static void push_front_worker(grpc_pollset *p, grpc_pollset_worker *worker) { - worker->prev = &p->root_worker; - worker->next = worker->prev->next; - worker->prev->next = worker->next->prev = worker; -} - -static void pollset_kick_ext(grpc_pollset *p, - grpc_pollset_worker *specific_worker, - uint32_t flags) { - GPR_TIMER_BEGIN("pollset_kick_ext", 0); - - /* pollset->mu already held */ - if (specific_worker != NULL) { - if (specific_worker == GRPC_POLLSET_KICK_BROADCAST) { - GPR_TIMER_BEGIN("pollset_kick_ext.broadcast", 0); - GPR_ASSERT((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) == 0); - for (specific_worker = p->root_worker.next; - specific_worker != &p->root_worker; - specific_worker = specific_worker->next) { - grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd); - } - p->kicked_without_pollers = 1; - GPR_TIMER_END("pollset_kick_ext.broadcast", 0); - } else if (gpr_tls_get(&g_current_thread_worker) != - (intptr_t)specific_worker) { - GPR_TIMER_MARK("different_thread_worker", 0); - if ((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) != 0) { - specific_worker->reevaluate_polling_on_wakeup = 1; - } - specific_worker->kicked_specifically = 1; - grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd); - /* TODO (sreek): Refactor this into a separate file*/ - pthread_kill(specific_worker->pt_id, SIGUSR1); - } else if ((flags & GRPC_POLLSET_CAN_KICK_SELF) != 0) { - GPR_TIMER_MARK("kick_yoself", 0); - if ((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) != 0) { - specific_worker->reevaluate_polling_on_wakeup = 1; - } - specific_worker->kicked_specifically = 1; - grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd); - } - } else if (gpr_tls_get(&g_current_thread_poller) != (intptr_t)p) { - GPR_ASSERT((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) == 0); - GPR_TIMER_MARK("kick_anonymous", 0); - specific_worker = pop_front_worker(p); - if (specific_worker != NULL) { - if (gpr_tls_get(&g_current_thread_worker) == (intptr_t)specific_worker) { - GPR_TIMER_MARK("kick_anonymous_not_self", 0); - push_back_worker(p, specific_worker); - specific_worker = pop_front_worker(p); - if ((flags & GRPC_POLLSET_CAN_KICK_SELF) == 0 && - gpr_tls_get(&g_current_thread_worker) == - (intptr_t)specific_worker) { - push_back_worker(p, specific_worker); - specific_worker = NULL; - } - } - if (specific_worker != NULL) { - GPR_TIMER_MARK("finally_kick", 0); - push_back_worker(p, specific_worker); - grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd); - } - } else { - GPR_TIMER_MARK("kicked_no_pollers", 0); - p->kicked_without_pollers = 1; - } - } - - GPR_TIMER_END("pollset_kick_ext", 0); -} - -static void pollset_kick(grpc_pollset *p, - grpc_pollset_worker *specific_worker) { - pollset_kick_ext(p, specific_worker, 0); -} - -/* global state management */ - -static void sig_handler(int sig_num) { - gpr_log(GPR_INFO, "Received signal %d", sig_num); -} - -static void pollset_global_init(void) { - gpr_tls_init(&g_current_thread_poller); - gpr_tls_init(&g_current_thread_worker); - grpc_wakeup_fd_init(&grpc_global_wakeup_fd); - signal(SIGUSR1, sig_handler); -} - -static void pollset_global_shutdown(void) { - grpc_wakeup_fd_destroy(&grpc_global_wakeup_fd); - gpr_tls_destroy(&g_current_thread_poller); - gpr_tls_destroy(&g_current_thread_worker); -} - -static void kick_poller(void) { grpc_wakeup_fd_wakeup(&grpc_global_wakeup_fd); } - -/* TODO: sreek. Try to Remove this forward declaration*/ -static void multipoll_with_epoll_pollset_create_efd(grpc_pollset *pollset); - -/* main interface */ - -static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { - gpr_mu_init(&pollset->mu); - *mu = &pollset->mu; - pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker; - pollset->shutting_down = 0; - pollset->called_shutdown = 0; - pollset->kicked_without_pollers = 0; - pollset->local_wakeup_cache = NULL; - pollset->kicked_without_pollers = 0; - - multipoll_with_epoll_pollset_create_efd(pollset); -} - -/* TODO(sreek): Maybe merge multipoll_*_destroy() with pollset_destroy() - * function */ -static void multipoll_with_epoll_pollset_destroy(grpc_pollset *pollset); - -static void pollset_destroy(grpc_pollset *pollset) { - GPR_ASSERT(!pollset_has_workers(pollset)); - - multipoll_with_epoll_pollset_destroy(pollset); - - while (pollset->local_wakeup_cache) { - grpc_cached_wakeup_fd *next = pollset->local_wakeup_cache->next; - grpc_wakeup_fd_destroy(&pollset->local_wakeup_cache->fd); - gpr_free(pollset->local_wakeup_cache); - pollset->local_wakeup_cache = next; - } - gpr_mu_destroy(&pollset->mu); -} - -static void pollset_reset(grpc_pollset *pollset) { - GPR_ASSERT(pollset->shutting_down); - GPR_ASSERT(!pollset_has_workers(pollset)); - pollset->shutting_down = 0; - pollset->called_shutdown = 0; - pollset->kicked_without_pollers = 0; -} - -/* TODO (sreek): Remove multipoll_with_epoll_finish_shutdown() declaration */ -static void multipoll_with_epoll_pollset_finish_shutdown(grpc_pollset *pollset); - -static void finish_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) { - multipoll_with_epoll_pollset_finish_shutdown(pollset); - grpc_exec_ctx_enqueue(exec_ctx, pollset->shutdown_done, true, NULL); -} - -/* TODO(sreek): Remove multipoll_with_epoll_*_maybe_work_and_unlock declaration - */ -static void multipoll_with_epoll_pollset_maybe_work_and_unlock( - grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_pollset_worker *worker, - gpr_timespec deadline, gpr_timespec now); - -static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_pollset_worker **worker_hdl, gpr_timespec now, - gpr_timespec deadline) { - grpc_pollset_worker worker; - *worker_hdl = &worker; - - /* pollset->mu already held */ - int added_worker = 0; - int locked = 1; - int queued_work = 0; - int keep_polling = 0; - GPR_TIMER_BEGIN("pollset_work", 0); - /* this must happen before we (potentially) drop pollset->mu */ - worker.next = worker.prev = NULL; - worker.reevaluate_polling_on_wakeup = 0; - if (pollset->local_wakeup_cache != NULL) { - worker.wakeup_fd = pollset->local_wakeup_cache; - pollset->local_wakeup_cache = worker.wakeup_fd->next; - } else { - worker.wakeup_fd = gpr_malloc(sizeof(*worker.wakeup_fd)); - grpc_wakeup_fd_init(&worker.wakeup_fd->fd); - } - worker.kicked_specifically = 0; - - /* TODO(sreek): Abstract this thread id stuff out into a separate file */ - worker.pt_id = pthread_self(); - /* If we're shutting down then we don't execute any extended work */ - if (pollset->shutting_down) { - GPR_TIMER_MARK("pollset_work.shutting_down", 0); - goto done; - } - /* Start polling, and keep doing so while we're being asked to - re-evaluate our pollers (this allows poll() based pollers to - ensure they don't miss wakeups) */ - keep_polling = 1; - while (keep_polling) { - keep_polling = 0; - if (!pollset->kicked_without_pollers) { - if (!added_worker) { - push_front_worker(pollset, &worker); - added_worker = 1; - gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker); - } - gpr_tls_set(&g_current_thread_poller, (intptr_t)pollset); - GPR_TIMER_BEGIN("maybe_work_and_unlock", 0); - - multipoll_with_epoll_pollset_maybe_work_and_unlock( - exec_ctx, pollset, &worker, deadline, now); - - GPR_TIMER_END("maybe_work_and_unlock", 0); - locked = 0; - gpr_tls_set(&g_current_thread_poller, 0); - } else { - GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0); - pollset->kicked_without_pollers = 0; - } - /* Finished execution - start cleaning up. - Note that we may arrive here from outside the enclosing while() loop. - In that case we won't loop though as we haven't added worker to the - worker list, which means nobody could ask us to re-evaluate polling). */ - done: - if (!locked) { - queued_work |= grpc_exec_ctx_flush(exec_ctx); - gpr_mu_lock(&pollset->mu); - locked = 1; - } - /* If we're forced to re-evaluate polling (via pollset_kick with - GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) then we land here and force - a loop */ - if (worker.reevaluate_polling_on_wakeup) { - worker.reevaluate_polling_on_wakeup = 0; - pollset->kicked_without_pollers = 0; - if (queued_work || worker.kicked_specifically) { - /* If there's queued work on the list, then set the deadline to be - immediate so we get back out of the polling loop quickly */ - deadline = gpr_inf_past(GPR_CLOCK_MONOTONIC); - } - keep_polling = 1; - } - } - if (added_worker) { - remove_worker(pollset, &worker); - gpr_tls_set(&g_current_thread_worker, 0); - } - /* release wakeup fd to the local pool */ - worker.wakeup_fd->next = pollset->local_wakeup_cache; - pollset->local_wakeup_cache = worker.wakeup_fd; - /* check shutdown conditions */ - if (pollset->shutting_down) { - if (pollset_has_workers(pollset)) { - pollset_kick(pollset, NULL); - } else if (!pollset->called_shutdown) { - pollset->called_shutdown = 1; - gpr_mu_unlock(&pollset->mu); - finish_shutdown(exec_ctx, pollset); - grpc_exec_ctx_flush(exec_ctx); - /* Continuing to access pollset here is safe -- it is the caller's - * responsibility to not destroy when it has outstanding calls to - * pollset_work. - * TODO(dklempner): Can we refactor the shutdown logic to avoid this? */ - gpr_mu_lock(&pollset->mu); - } - } - *worker_hdl = NULL; - GPR_TIMER_END("pollset_work", 0); -} - -static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_closure *closure) { - GPR_ASSERT(!pollset->shutting_down); - pollset->shutting_down = 1; - pollset->shutdown_done = closure; - pollset_kick(pollset, GRPC_POLLSET_KICK_BROADCAST); - - if (!pollset->called_shutdown && !pollset_has_workers(pollset)) { - pollset->called_shutdown = 1; - finish_shutdown(exec_ctx, pollset); - } -} - -static int poll_deadline_to_millis_timeout(gpr_timespec deadline, - gpr_timespec now) { - gpr_timespec timeout; - static const int64_t max_spin_polling_us = 10; - if (gpr_time_cmp(deadline, gpr_inf_future(deadline.clock_type)) == 0) { - return -1; - } - if (gpr_time_cmp(deadline, gpr_time_add(now, gpr_time_from_micros( - max_spin_polling_us, - GPR_TIMESPAN))) <= 0) { - return 0; - } - timeout = gpr_time_sub(deadline, now); - return gpr_time_to_millis(gpr_time_add( - timeout, gpr_time_from_nanos(GPR_NS_PER_MS - 1, GPR_TIMESPAN))); -} - -/******************************************************************************* - * pollset_multipoller_with_epoll_posix.c - */ - -static void set_ready(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_closure **st) { - /* only one set_ready can be active at once (but there may be a racing - notify_on) */ - gpr_mu_lock(&fd->mu); - set_ready_locked(exec_ctx, fd, st); - gpr_mu_unlock(&fd->mu); -} - -static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { - set_ready(exec_ctx, fd, &fd->read_closure); -} - -static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { - set_ready(exec_ctx, fd, &fd->write_closure); -} - -/* TODO (sreek): Maybe this global list is not required. Double check*/ -struct epoll_fd_list { - int *epoll_fds; - size_t count; - size_t capacity; -}; - -static struct epoll_fd_list epoll_fd_global_list; -static gpr_once init_epoll_fd_list_mu = GPR_ONCE_INIT; -static gpr_mu epoll_fd_list_mu; - -static void init_mu(void) { gpr_mu_init(&epoll_fd_list_mu); } - -static void add_epoll_fd_to_global_list(int epoll_fd) { - gpr_once_init(&init_epoll_fd_list_mu, init_mu); - - gpr_mu_lock(&epoll_fd_list_mu); - if (epoll_fd_global_list.count == epoll_fd_global_list.capacity) { - epoll_fd_global_list.capacity = - GPR_MAX((size_t)8, epoll_fd_global_list.capacity * 2); - epoll_fd_global_list.epoll_fds = - gpr_realloc(epoll_fd_global_list.epoll_fds, - epoll_fd_global_list.capacity * sizeof(int)); - } - epoll_fd_global_list.epoll_fds[epoll_fd_global_list.count++] = epoll_fd; - gpr_mu_unlock(&epoll_fd_list_mu); -} - -static void remove_epoll_fd_from_global_list(int epoll_fd) { - gpr_mu_lock(&epoll_fd_list_mu); - GPR_ASSERT(epoll_fd_global_list.count > 0); - for (size_t i = 0; i < epoll_fd_global_list.count; i++) { - if (epoll_fd == epoll_fd_global_list.epoll_fds[i]) { - epoll_fd_global_list.epoll_fds[i] = - epoll_fd_global_list.epoll_fds[--(epoll_fd_global_list.count)]; - break; - } - } - gpr_mu_unlock(&epoll_fd_list_mu); -} - -static void remove_fd_from_all_epoll_sets(int fd) { - int err; - gpr_once_init(&init_epoll_fd_list_mu, init_mu); - gpr_mu_lock(&epoll_fd_list_mu); - if (epoll_fd_global_list.count == 0) { - gpr_mu_unlock(&epoll_fd_list_mu); - return; - } - for (size_t i = 0; i < epoll_fd_global_list.count; i++) { - err = epoll_ctl(epoll_fd_global_list.epoll_fds[i], EPOLL_CTL_DEL, fd, NULL); - if (err < 0 && errno != ENOENT) { - gpr_log(GPR_ERROR, "epoll_ctl del for %d failed: %s", fd, - strerror(errno)); - } - } - gpr_mu_unlock(&epoll_fd_list_mu); -} - -/* TODO: sreek - This function multipoll_with_epoll_pollset_add_fd() and - * finally_add_fd() in ev_poll_and_epoll_posix.c */ -static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_fd *fd) { - - /* TODO sreek - Check if we need to get a pollset->mu lock here */ - - struct epoll_event ev; - int err; - - /* Hold a ref to the fd to keep it from being closed during the add. This may - result in a spurious wakeup being assigned to this pollset whilst adding, - but that should be benign. */ - /* TODO: (sreek): Understand how a spurious wake up migh be assinged to this - * pollset..and how holding a reference will prevent the fd from being closed - * (and perhaps more importantly, see how can an fd be closed while being - * added to the epollset */ - GRPC_FD_REF(fd, "add fd"); - - gpr_mu_lock(&fd->mu); - if (fd->shutdown) { - gpr_mu_unlock(&fd->mu); - GRPC_FD_UNREF(fd, "add fd"); - return; - } - gpr_mu_unlock(&fd->mu); - - ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET); - ev.data.ptr = fd; - err = epoll_ctl(pollset->epoll_fd, EPOLL_CTL_ADD, fd->fd, &ev); - if (err < 0) { - /* FDs may be added to a pollset multiple times, so EEXIST is normal. */ - if (errno != EEXIST) { - gpr_log(GPR_ERROR, "epoll_ctl add for %d failed: %s", fd->fd, - strerror(errno)); - } - } - - /* The fd might have been orphaned while we were adding it to the epoll set. - Close the fd in such a case (which will also take care of removing it from - the epoll set */ - gpr_mu_lock(&fd->mu); - if (fd_is_orphaned(fd) && !fd->closed) { - close_fd_locked(exec_ctx, fd); - } - gpr_mu_unlock(&fd->mu); - - GRPC_FD_UNREF(fd, "add fd"); -} - -/* Creates an epoll fd and initializes the pollset */ -/* TODO: This has to be called ONLY from pollset_init function. and hence it - * does not acquire any lock */ -static void multipoll_with_epoll_pollset_create_efd(grpc_pollset *pollset) { - struct epoll_event ev; - int err; - - pollset->epoll_fd = epoll_create1(EPOLL_CLOEXEC); - if (pollset->epoll_fd < 0) { - gpr_log(GPR_ERROR, "epoll_create1 failed: %s", strerror(errno)); - abort(); - } - add_epoll_fd_to_global_list(pollset->epoll_fd); - - ev.events = (uint32_t)(EPOLLIN | EPOLLET); - ev.data.ptr = NULL; - - err = epoll_ctl(pollset->epoll_fd, EPOLL_CTL_ADD, - GRPC_WAKEUP_FD_GET_READ_FD(&grpc_global_wakeup_fd), &ev); - if (err < 0) { - gpr_log(GPR_ERROR, "epoll_ctl add for %d failed: %s", - GRPC_WAKEUP_FD_GET_READ_FD(&grpc_global_wakeup_fd), - strerror(errno)); - } -} - -/* TODO(klempner): We probably want to turn this down a bit */ -#define GRPC_EPOLL_MAX_EVENTS 1000 - -static void multipoll_with_epoll_pollset_maybe_work_and_unlock( - grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_pollset_worker *worker, - gpr_timespec deadline, gpr_timespec now) { - struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS]; - int epoll_fd = pollset->epoll_fd; - int ep_rv; - int poll_rv; - int timeout_ms; - struct pollfd pfds[2]; - - /* If you want to ignore epoll's ability to sanely handle parallel pollers, - * for a more apples-to-apples performance comparison with poll, add a - * if (pollset->counter != 0) { return 0; } - * here. - */ - - gpr_mu_unlock(&pollset->mu); - - timeout_ms = poll_deadline_to_millis_timeout(deadline, now); - - pfds[0].fd = GRPC_WAKEUP_FD_GET_READ_FD(&worker->wakeup_fd->fd); - pfds[0].events = POLLIN; - pfds[0].revents = 0; - pfds[1].fd = epoll_fd; - pfds[1].events = POLLIN; - pfds[1].revents = 0; - - /* TODO(vpai): Consider first doing a 0 timeout poll here to avoid - even going into the blocking annotation if possible */ - GPR_TIMER_BEGIN("poll", 0); - GRPC_SCHEDULING_START_BLOCKING_REGION; - poll_rv = grpc_poll_function(pfds, 2, timeout_ms); - GRPC_SCHEDULING_END_BLOCKING_REGION; - GPR_TIMER_END("poll", 0); - - if (poll_rv < 0) { - if (errno != EINTR) { - gpr_log(GPR_ERROR, "poll() failed: %s", strerror(errno)); - } - } else if (poll_rv == 0) { - /* do nothing */ - } else { - if (pfds[0].revents) { - grpc_wakeup_fd_consume_wakeup(&worker->wakeup_fd->fd); - } - if (pfds[1].revents) { - do { - /* The following epoll_wait never blocks; it has a timeout of 0 */ - ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0); - if (ep_rv < 0) { - if (errno != EINTR) { - gpr_log(GPR_ERROR, "epoll_wait() failed: %s", strerror(errno)); - } - } else { - int i; - for (i = 0; i < ep_rv; ++i) { - grpc_fd *fd = ep_ev[i].data.ptr; - /* TODO(klempner): We might want to consider making err and pri - * separate events */ - int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP); - int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI); - int write_ev = ep_ev[i].events & EPOLLOUT; - if (fd == NULL) { - grpc_wakeup_fd_consume_wakeup(&grpc_global_wakeup_fd); - } else { - if (read_ev || cancel) { - fd_become_readable(exec_ctx, fd); - } - if (write_ev || cancel) { - fd_become_writable(exec_ctx, fd); - } - } - } - } - } while (ep_rv == GRPC_EPOLL_MAX_EVENTS); - } - } -} - -static void multipoll_with_epoll_pollset_finish_shutdown( - grpc_pollset *pollset) {} - -static void multipoll_with_epoll_pollset_destroy(grpc_pollset *pollset) { - close(pollset->epoll_fd); - remove_epoll_fd_from_global_list(pollset->epoll_fd); -} - -/******************************************************************************* - * pollset_set_posix.c - */ - -static grpc_pollset_set *pollset_set_create(void) { - grpc_pollset_set *pollset_set = gpr_malloc(sizeof(*pollset_set)); - memset(pollset_set, 0, sizeof(*pollset_set)); - gpr_mu_init(&pollset_set->mu); - return pollset_set; -} - -static void pollset_set_destroy(grpc_pollset_set *pollset_set) { - size_t i; - gpr_mu_destroy(&pollset_set->mu); - for (i = 0; i < pollset_set->fd_count; i++) { - GRPC_FD_UNREF(pollset_set->fds[i], "pollset_set"); - } - gpr_free(pollset_set->pollsets); - gpr_free(pollset_set->pollset_sets); - gpr_free(pollset_set->fds); - gpr_free(pollset_set); -} - -static void pollset_set_add_pollset(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pollset_set, - grpc_pollset *pollset) { - size_t i, j; - gpr_mu_lock(&pollset_set->mu); - if (pollset_set->pollset_count == pollset_set->pollset_capacity) { - pollset_set->pollset_capacity = - GPR_MAX(8, 2 * pollset_set->pollset_capacity); - pollset_set->pollsets = - gpr_realloc(pollset_set->pollsets, pollset_set->pollset_capacity * - sizeof(*pollset_set->pollsets)); - } - pollset_set->pollsets[pollset_set->pollset_count++] = pollset; - for (i = 0, j = 0; i < pollset_set->fd_count; i++) { - if (fd_is_orphaned(pollset_set->fds[i])) { - GRPC_FD_UNREF(pollset_set->fds[i], "pollset_set"); - } else { - pollset_add_fd(exec_ctx, pollset, pollset_set->fds[i]); - pollset_set->fds[j++] = pollset_set->fds[i]; - } - } - pollset_set->fd_count = j; - gpr_mu_unlock(&pollset_set->mu); -} - -static void pollset_set_del_pollset(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pollset_set, - grpc_pollset *pollset) { - size_t i; - gpr_mu_lock(&pollset_set->mu); - for (i = 0; i < pollset_set->pollset_count; i++) { - if (pollset_set->pollsets[i] == pollset) { - pollset_set->pollset_count--; - GPR_SWAP(grpc_pollset *, pollset_set->pollsets[i], - pollset_set->pollsets[pollset_set->pollset_count]); - break; - } - } - gpr_mu_unlock(&pollset_set->mu); -} - -static void pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *bag, - grpc_pollset_set *item) { - size_t i, j; - gpr_mu_lock(&bag->mu); - if (bag->pollset_set_count == bag->pollset_set_capacity) { - bag->pollset_set_capacity = GPR_MAX(8, 2 * bag->pollset_set_capacity); - bag->pollset_sets = - gpr_realloc(bag->pollset_sets, - bag->pollset_set_capacity * sizeof(*bag->pollset_sets)); - } - bag->pollset_sets[bag->pollset_set_count++] = item; - for (i = 0, j = 0; i < bag->fd_count; i++) { - if (fd_is_orphaned(bag->fds[i])) { - GRPC_FD_UNREF(bag->fds[i], "pollset_set"); - } else { - pollset_set_add_fd(exec_ctx, item, bag->fds[i]); - bag->fds[j++] = bag->fds[i]; - } - } - bag->fd_count = j; - gpr_mu_unlock(&bag->mu); -} - -static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *bag, - grpc_pollset_set *item) { - size_t i; - gpr_mu_lock(&bag->mu); - for (i = 0; i < bag->pollset_set_count; i++) { - if (bag->pollset_sets[i] == item) { - bag->pollset_set_count--; - GPR_SWAP(grpc_pollset_set *, bag->pollset_sets[i], - bag->pollset_sets[bag->pollset_set_count]); - break; - } - } - gpr_mu_unlock(&bag->mu); -} - -static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pollset_set, grpc_fd *fd) { - size_t i; - gpr_mu_lock(&pollset_set->mu); - if (pollset_set->fd_count == pollset_set->fd_capacity) { - pollset_set->fd_capacity = GPR_MAX(8, 2 * pollset_set->fd_capacity); - pollset_set->fds = gpr_realloc( - pollset_set->fds, pollset_set->fd_capacity * sizeof(*pollset_set->fds)); - } - GRPC_FD_REF(fd, "pollset_set"); - pollset_set->fds[pollset_set->fd_count++] = fd; - for (i = 0; i < pollset_set->pollset_count; i++) { - pollset_add_fd(exec_ctx, pollset_set->pollsets[i], fd); - } - for (i = 0; i < pollset_set->pollset_set_count; i++) { - pollset_set_add_fd(exec_ctx, pollset_set->pollset_sets[i], fd); - } - gpr_mu_unlock(&pollset_set->mu); -} - -static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pollset_set, grpc_fd *fd) { - size_t i; - gpr_mu_lock(&pollset_set->mu); - for (i = 0; i < pollset_set->fd_count; i++) { - if (pollset_set->fds[i] == fd) { - pollset_set->fd_count--; - GPR_SWAP(grpc_fd *, pollset_set->fds[i], - pollset_set->fds[pollset_set->fd_count]); - GRPC_FD_UNREF(fd, "pollset_set"); - break; - } - } - for (i = 0; i < pollset_set->pollset_set_count; i++) { - pollset_set_del_fd(exec_ctx, pollset_set->pollset_sets[i], fd); - } - gpr_mu_unlock(&pollset_set->mu); -} - -/******************************************************************************* - * event engine binding - */ - -static void shutdown_engine(void) { - fd_global_shutdown(); - pollset_global_shutdown(); -} - -static const grpc_event_engine_vtable vtable = { - .pollset_size = sizeof(grpc_pollset), - - .fd_create = fd_create, - .fd_wrapped_fd = fd_wrapped_fd, - .fd_orphan = fd_orphan, - .fd_shutdown = fd_shutdown, - .fd_notify_on_read = fd_notify_on_read, - .fd_notify_on_write = fd_notify_on_write, - - .pollset_init = pollset_init, - .pollset_shutdown = pollset_shutdown, - .pollset_reset = pollset_reset, - .pollset_destroy = pollset_destroy, - .pollset_work = pollset_work, - .pollset_kick = pollset_kick, - .pollset_add_fd = pollset_add_fd, - - .pollset_set_create = pollset_set_create, - .pollset_set_destroy = pollset_set_destroy, - .pollset_set_add_pollset = pollset_set_add_pollset, - .pollset_set_del_pollset = pollset_set_del_pollset, - .pollset_set_add_pollset_set = pollset_set_add_pollset_set, - .pollset_set_del_pollset_set = pollset_set_del_pollset_set, - .pollset_set_add_fd = pollset_set_add_fd, - .pollset_set_del_fd = pollset_set_del_fd, - - .kick_poller = kick_poller, - - .shutdown_engine = shutdown_engine, -}; - -const grpc_event_engine_vtable *grpc_init_epoll_posix(void) { - fd_global_init(); - pollset_global_init(); - return &vtable; -} - -#endif diff --git a/src/core/lib/iomgr/ev_epoll_posix.h b/src/core/lib/iomgr/ev_epoll_posix.h deleted file mode 100644 index 35319b4fc5..0000000000 --- a/src/core/lib/iomgr/ev_epoll_posix.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * - * Copyright 2015, Google Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Google Inc. nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#ifndef GRPC_CORE_LIB_IOMGR_EV_EPOLL_POSIX_H -#define GRPC_CORE_LIB_IOMGR_EV_EPOLL_POSIX_H - -#include "src/core/lib/iomgr/ev_posix.h" - -const grpc_event_engine_vtable *grpc_init_epoll_posix(void); - -#endif /* GRPC_CORE_LIB_IOMGR_EV_EPOLL_POSIX_H */ diff --git a/src/python/grpcio/grpc_core_dependencies.py b/src/python/grpcio/grpc_core_dependencies.py index 13bc6888d6..50a6f196d8 100644 --- a/src/python/grpcio/grpc_core_dependencies.py +++ b/src/python/grpcio/grpc_core_dependencies.py @@ -95,7 +95,6 @@ CORE_SOURCE_FILES = [ 'src/core/lib/iomgr/endpoint_pair_posix.c', 'src/core/lib/iomgr/endpoint_pair_windows.c', 'src/core/lib/iomgr/ev_epoll_linux.c', - 'src/core/lib/iomgr/ev_epoll_posix.c', 'src/core/lib/iomgr/ev_poll_posix.c', 'src/core/lib/iomgr/ev_posix.c', 'src/core/lib/iomgr/exec_ctx.c', diff --git a/test/core/network_benchmarks/epoll_test.c b/test/core/network_benchmarks/epoll_test.c deleted file mode 100644 index a918dd9bb9..0000000000 --- a/test/core/network_benchmarks/epoll_test.c +++ /dev/null @@ -1,263 +0,0 @@ -/* - * - * Copyright 2015, Google Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Google Inc. nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -/* TODO: sreek: REMOVE THIS FILE */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -int g_signal_num = SIGUSR1; - -int g_timeout_secs = 2; - -int g_eventfd_create = 1; -int g_eventfd_wakeup = 0; -int g_eventfd_teardown = 0; -int g_close_epoll_fd = 1; - -typedef struct thread_args { - gpr_thd_id id; - int epoll_fd; - int thread_num; -} thread_args; - -static int eventfd_create() { - if (!g_eventfd_create) { - return -1; - } - - int efd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); - GPR_ASSERT(efd >= 0); - return efd; -} - -static void eventfd_wakeup(int efd) { - if (!g_eventfd_wakeup) { - return; - } - - int err; - do { - err = eventfd_write(efd, 1); - } while (err < 0 && errno == EINTR); -} - -static void epoll_teardown(int epoll_fd, int fd) { - if (!g_eventfd_teardown) { - return; - } - - if (epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd, NULL) < 0) { - if (errno != ENOENT) { - gpr_log(GPR_ERROR, "epoll_ctl: %s", strerror(errno)); - GPR_ASSERT(0); - } - } -} - -/* Special case for epoll, where we need to create the fd ahead of time. */ -static int epoll_setup(int fd) { - int epoll_fd; - struct epoll_event ev; - - epoll_fd = epoll_create(1); - if (epoll_fd < 0) { - gpr_log(GPR_ERROR, "epoll_create: %s", strerror(errno)); - return -1; - } - - ev.events = (uint32_t)EPOLLIN; - ev.data.fd = fd; - if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) { - if (errno != EEXIST) { - gpr_log(GPR_ERROR, "epoll_ctl: %s", strerror(errno)); - return -1; - } - - gpr_log(GPR_ERROR, "epoll_ctl: The fd %d already exists", fd); - } - - return epoll_fd; -} - -#define GRPC_EPOLL_MAX_EVENTS 1000 -static void thread_main(void *args) { - int ep_rv; - struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS]; - int fd; - int i; - int cancel; - int read; - int write; - thread_args *thd_args = args; - sigset_t new_mask; - sigset_t orig_mask; - int keep_polling = 0; - - gpr_log(GPR_INFO, "Thread: %d Started", thd_args->thread_num); - - do { - keep_polling = 0; - - /* Mask the signal before getting the epoll_fd */ - gpr_log(GPR_INFO, "Thread: %d Blocking signal: %d", thd_args->thread_num, - g_signal_num); - sigemptyset(&new_mask); - sigaddset(&new_mask, g_signal_num); - pthread_sigmask(SIG_BLOCK, &new_mask, &orig_mask); - - gpr_log(GPR_INFO, "Thread: %d Waiting on epoll_wait()", - thd_args->thread_num); - ep_rv = epoll_pwait(thd_args->epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, - g_timeout_secs * 5000, &orig_mask); - gpr_log(GPR_INFO, "Thread: %d out of epoll_wait. ep_rv = %d", - thd_args->thread_num, ep_rv); - - if (ep_rv < 0) { - if (errno != EINTR) { - gpr_log(GPR_ERROR, "Thread: %d. epoll_wait failed with error: %d", - thd_args->thread_num, errno); - } else { - gpr_log(GPR_INFO, - "Thread: %d. epoll_wait was interrupted. Polling again >>>>>>>", - thd_args->thread_num); - keep_polling = 1; - } - } else { - if (ep_rv == 0) { - gpr_log(GPR_INFO, - "Thread: %d - epoll_wait returned 0. Most likely a timeout. " - "Polling again", - thd_args->thread_num); - keep_polling = 1; - } - - for (i = 0; i < ep_rv; i++) { - fd = ep_ev[i].data.fd; - cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP); - read = ep_ev[i].events & (EPOLLIN | EPOLLPRI); - write = ep_ev[i].events & EPOLLOUT; - gpr_log(GPR_INFO, - "Thread: %d. epoll_wait returned that fd: %d has event of " - "interest. read: %d, write: %d, cancel: %d", - thd_args->thread_num, fd, read, write, cancel); - } - } - } while (keep_polling); -} - -static void close_fd(int fd) { - if (!g_close_epoll_fd) { - return; - } - - gpr_log(GPR_INFO, "*** Closing fd : %d ****", fd); - close(fd); - gpr_log(GPR_INFO, "*** Closed fd : %d ****", fd); -} - - -static void sig_handler(int sig_num) { - gpr_log(GPR_INFO, "<<<<< Received signal %d", sig_num); -} - -static void set_signal_handler() { - gpr_log(GPR_INFO, "Setting signal handler"); - signal(g_signal_num, sig_handler); -} - -#define NUM_THREADS 2 -int main(int argc, char **argv) { - int efd; - int epoll_fd; - int i; - thread_args thd_args[NUM_THREADS]; - gpr_thd_options options = gpr_thd_options_default(); - - set_signal_handler(); - - gpr_log(GPR_INFO, "Starting.."); - efd = eventfd_create(); - gpr_log(GPR_INFO, "Created event fd: %d", efd); - epoll_fd = epoll_setup(efd); - gpr_log(GPR_INFO, "Created epoll_fd: %d", epoll_fd); - - gpr_thd_options_set_joinable(&options); - for (i = 0; i < NUM_THREADS; i++) { - thd_args[i].thread_num = i; - thd_args[i].epoll_fd = epoll_fd; - gpr_log(GPR_INFO, "Starting thread: %d", i); - gpr_thd_new(&thd_args[i].id, thread_main, &thd_args[i], &options); - } - - sleep((unsigned)g_timeout_secs * 2); - - /* Send signals first */ - for (i = 0; i < NUM_THREADS; i++) { - gpr_log(GPR_INFO, "Sending signal to thread: %d", thd_args->thread_num); - pthread_kill(thd_args[i].id, g_signal_num); - gpr_log(GPR_INFO, "Sent signal to thread: %d >>>>>> ", - thd_args->thread_num); - } - - sleep((unsigned)g_timeout_secs * 2); - - close_fd(epoll_fd); - - sleep((unsigned)g_timeout_secs * 2); - - eventfd_wakeup(efd); - epoll_teardown(epoll_fd, efd); - - for (i = 0; i < NUM_THREADS; i++) { - gpr_thd_join(thd_args[i].id); - gpr_log(GPR_INFO, "Thread: %d joined", i); - } - - return 0; -} diff --git a/test/core/network_benchmarks/low_level_ping_pong.c b/test/core/network_benchmarks/low_level_ping_pong.c index b72a07778e..1b40895a71 100644 --- a/test/core/network_benchmarks/low_level_ping_pong.c +++ b/test/core/network_benchmarks/low_level_ping_pong.c @@ -44,7 +44,6 @@ #include #include #include -#include #ifdef __linux__ #include #endif @@ -85,7 +84,6 @@ typedef struct thread_args { static int read_bytes(int fd, char *buf, size_t read_size, int spin) { size_t bytes_read = 0; ssize_t err; - do { err = read(fd, buf + bytes_read, read_size - bytes_read); if (err < 0) { diff --git a/tools/doxygen/Doxyfile.core.internal b/tools/doxygen/Doxyfile.core.internal index d968278f2a..21ee1e6ff8 100644 --- a/tools/doxygen/Doxyfile.core.internal +++ b/tools/doxygen/Doxyfile.core.internal @@ -808,7 +808,6 @@ src/core/lib/iomgr/closure.h \ src/core/lib/iomgr/endpoint.h \ src/core/lib/iomgr/endpoint_pair.h \ src/core/lib/iomgr/ev_epoll_linux.h \ -src/core/lib/iomgr/ev_epoll_posix.h \ src/core/lib/iomgr/ev_poll_posix.h \ src/core/lib/iomgr/ev_posix.h \ src/core/lib/iomgr/exec_ctx.h \ @@ -957,7 +956,6 @@ src/core/lib/iomgr/endpoint.c \ src/core/lib/iomgr/endpoint_pair_posix.c \ src/core/lib/iomgr/endpoint_pair_windows.c \ src/core/lib/iomgr/ev_epoll_linux.c \ -src/core/lib/iomgr/ev_epoll_posix.c \ src/core/lib/iomgr/ev_poll_posix.c \ src/core/lib/iomgr/ev_posix.c \ src/core/lib/iomgr/exec_ctx.c \ diff --git a/tools/run_tests/sources_and_headers.json b/tools/run_tests/sources_and_headers.json index 85b71a8255..304a0e1e3a 100644 --- a/tools/run_tests/sources_and_headers.json +++ b/tools/run_tests/sources_and_headers.json @@ -301,22 +301,6 @@ "third_party": false, "type": "target" }, - { - "deps": [ - "gpr", - "gpr_test_util", - "grpc", - "grpc_test_util" - ], - "headers": [], - "language": "c", - "name": "epoll_test", - "src": [ - "test/core/network_benchmarks/epoll_test.c" - ], - "third_party": false, - "type": "target" - }, { "deps": [ "gpr", @@ -5547,7 +5531,6 @@ "src/core/lib/iomgr/endpoint.h", "src/core/lib/iomgr/endpoint_pair.h", "src/core/lib/iomgr/ev_epoll_linux.h", - "src/core/lib/iomgr/ev_epoll_posix.h", "src/core/lib/iomgr/ev_poll_posix.h", "src/core/lib/iomgr/ev_posix.h", "src/core/lib/iomgr/exec_ctx.h", @@ -5649,8 +5632,6 @@ "src/core/lib/iomgr/endpoint_pair_windows.c", "src/core/lib/iomgr/ev_epoll_linux.c", "src/core/lib/iomgr/ev_epoll_linux.h", - "src/core/lib/iomgr/ev_epoll_posix.c", - "src/core/lib/iomgr/ev_epoll_posix.h", "src/core/lib/iomgr/ev_poll_posix.c", "src/core/lib/iomgr/ev_poll_posix.h", "src/core/lib/iomgr/ev_posix.c", diff --git a/tools/run_tests/tests.json b/tools/run_tests/tests.json index be7b72f61d..850f9474ae 100644 --- a/tools/run_tests/tests.json +++ b/tools/run_tests/tests.json @@ -356,21 +356,6 @@ "windows" ] }, - { - "args": [], - "ci_platforms": [ - "linux" - ], - "cpu_cost": 1.0, - "exclude_configs": [], - "flaky": false, - "gtest": false, - "language": "c", - "name": "epoll_test", - "platforms": [ - "linux" - ] - }, { "args": [], "ci_platforms": [ diff --git a/vsprojects/vcxproj/grpc/grpc.vcxproj b/vsprojects/vcxproj/grpc/grpc.vcxproj index a67e4d16da..ce523725e8 100644 --- a/vsprojects/vcxproj/grpc/grpc.vcxproj +++ b/vsprojects/vcxproj/grpc/grpc.vcxproj @@ -317,7 +317,6 @@ - @@ -487,8 +486,6 @@ - - diff --git a/vsprojects/vcxproj/grpc/grpc.vcxproj.filters b/vsprojects/vcxproj/grpc/grpc.vcxproj.filters index bf9b7dc7dc..d46676f229 100644 --- a/vsprojects/vcxproj/grpc/grpc.vcxproj.filters +++ b/vsprojects/vcxproj/grpc/grpc.vcxproj.filters @@ -58,9 +58,6 @@ src\core\lib\iomgr - - src\core\lib\iomgr - src\core\lib\iomgr @@ -683,9 +680,6 @@ src\core\lib\iomgr - - src\core\lib\iomgr - src\core\lib\iomgr diff --git a/vsprojects/vcxproj/grpc_unsecure/grpc_unsecure.vcxproj b/vsprojects/vcxproj/grpc_unsecure/grpc_unsecure.vcxproj index afc9a2ca1b..d4dd428c2d 100644 --- a/vsprojects/vcxproj/grpc_unsecure/grpc_unsecure.vcxproj +++ b/vsprojects/vcxproj/grpc_unsecure/grpc_unsecure.vcxproj @@ -305,7 +305,6 @@ - @@ -453,8 +452,6 @@ - - diff --git a/vsprojects/vcxproj/grpc_unsecure/grpc_unsecure.vcxproj.filters b/vsprojects/vcxproj/grpc_unsecure/grpc_unsecure.vcxproj.filters index b7507f9a96..d14e7e7ab4 100644 --- a/vsprojects/vcxproj/grpc_unsecure/grpc_unsecure.vcxproj.filters +++ b/vsprojects/vcxproj/grpc_unsecure/grpc_unsecure.vcxproj.filters @@ -61,9 +61,6 @@ src\core\lib\iomgr - - src\core\lib\iomgr - src\core\lib\iomgr @@ -581,9 +578,6 @@ src\core\lib\iomgr - - src\core\lib\iomgr - src\core\lib\iomgr -- cgit v1.2.3 From 9bc3d2d67f32f4dad8cd1319dd4f3fce48c1abee Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Mon, 6 Jun 2016 10:27:56 -0700 Subject: Minor comments --- src/core/lib/iomgr/ev_epoll_linux.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index 61106faef9..d3abf3bd84 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -144,10 +144,9 @@ typedef struct polling_island { /******************************************************************************* * Pollset Declarations */ - struct grpc_pollset_worker { int kicked_specifically; - pthread_t pt_id; /* TODO (sreek) - Add an abstraction here */ + pthread_t pt_id; /* Thread id of this worker */ struct grpc_pollset_worker *next; struct grpc_pollset_worker *prev; }; @@ -483,8 +482,7 @@ polling_island *polling_island_merge(polling_island *p, polling_island *q) { /* Get locks on both the polling islands */ polling_island_pair_update_and_lock(&p, &q); - /* TODO: sreek: Think about this scenario some more. Is it possible ?. what - * does it mean, when would this happen */ + /* TODO: sreek: Think about this scenario some more */ if (p == q) { /* Nothing needs to be done here */ gpr_mu_unlock(&p->mu); @@ -539,7 +537,10 @@ static void polling_island_global_init() { * (specifically when a new alarm needs to be triggered earlier than the next * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a * case occurs. */ -/* TODO: sreek: Right now, this wakes up all pollers */ + +/* TODO: sreek: Right now, this wakes up all pollers. In future we should make + * sure to wake up one polling thread (which can wake up other threads if + * needed) */ grpc_wakeup_fd grpc_global_wakeup_fd; static grpc_fd *fd_freelist = NULL; @@ -676,7 +677,6 @@ static int fd_wrapped_fd(grpc_fd *fd) { static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_closure *on_done, int *release_fd, const char *reason) { - /* TODO(sreek) In ev_poll_posix.c,the lock is acquired a little later. Why? */ bool is_fd_closed = false; gpr_mu_lock(&fd->mu); fd->on_done_closure = on_done; @@ -784,8 +784,9 @@ static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd, */ static void sig_handler(int sig_num) { - /* TODO: sreek - Remove this expensive log line */ +#ifdef GPRC_EPOLL_DEBUG gpr_log(GPR_INFO, "Received signal %d", sig_num); +#endif } /* Global state management */ @@ -986,7 +987,10 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, if (ep_rv < 0) { if (errno != EINTR) { - /* TODO (sreek) - Check for bad file descriptor error */ + /* TODO (sreek) - Do not log an error in case of bad file descriptor + * (A bad file descriptor here would just mean that the epoll set was + * merged with another epoll set and that the current epoll_fd is + * closed) */ gpr_log(GPR_ERROR, "epoll_pwait() failed: %s", strerror(errno)); } else { gpr_log(GPR_DEBUG, "pollset_work_and_unlock: 0-timeout epoll_wait()"); @@ -1062,7 +1066,9 @@ static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, GPR_TIMER_END("pollset_shutdown", 0); } -/* TODO(sreek) Is pollset_shutdown() guranteed to be called before this? */ +/* pollset_shutdown is guaranteed to be called before pollset_destroy. So other + * than destroying the mutexes, there is nothing special that needs to be done + * here */ static void pollset_destroy(grpc_pollset *pollset) { GPR_ASSERT(!pollset_has_workers(pollset)); gpr_mu_destroy(&pollset->pi_mu); @@ -1075,7 +1081,7 @@ static void pollset_reset(grpc_pollset *pollset) { pollset->shutting_down = false; pollset->finish_shutdown_called = false; pollset->kicked_without_pollers = false; - /* TODO(sreek) - Should pollset->shutdown closure be set to NULL here? */ + pollset->shutdown_done = NULL; pollset_release_polling_island_locked(pollset); } @@ -1149,7 +1155,7 @@ static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_fd *fd) { gpr_log(GPR_DEBUG, "pollset_add_fd: pollset: %p, fd: %d", pollset, fd->fd); - /* TODO sreek - Check if we need to get a pollset->mu lock here */ + /* TODO sreek - Double check if we need to get a pollset->mu lock here */ gpr_mu_lock(&pollset->pi_mu); gpr_mu_lock(&fd->pi_mu); -- cgit v1.2.3 From d627c105847f0d262ce3886fb5b463dc914ddbd7 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Mon, 6 Jun 2016 15:49:32 -0700 Subject: Fix asan failures (i.e add pollset_global_shutdown), remove debug log lines --- src/core/lib/iomgr/ev_epoll_linux.c | 76 ++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 38 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index d3abf3bd84..0e00d4d216 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -516,6 +516,21 @@ static void polling_island_global_init() { g_pi_freelist = NULL; } +static void polling_island_global_shutdown() { + polling_island *next; + gpr_mu_lock(&g_pi_freelist_mu); + gpr_mu_unlock(&g_pi_freelist_mu); + while (g_pi_freelist != NULL) { + next = g_pi_freelist->next_free; + gpr_mu_destroy(&g_pi_freelist->mu); + gpr_free(g_pi_freelist->fds); + gpr_free(g_pi_freelist); + g_pi_freelist = next; + } + + gpr_mu_destroy(&g_pi_freelist_mu); +} + /******************************************************************************* * Fd Definitions */ @@ -784,7 +799,7 @@ static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd, */ static void sig_handler(int sig_num) { -#ifdef GPRC_EPOLL_DEBUG +#ifdef GRPC_EPOLL_DEBUG gpr_log(GPR_INFO, "Received signal %d", sig_num); #endif } @@ -792,7 +807,7 @@ static void sig_handler(int sig_num) { /* Global state management */ static void pollset_global_init(void) { grpc_wakeup_fd_init(&grpc_global_wakeup_fd); - signal(SIGUSR1, sig_handler); + signal(SIGUSR1, sig_handler); /* TODO: sreek - Do not hardcode SIGUSR1 */ } static void pollset_global_shutdown(void) { @@ -840,7 +855,6 @@ static void pollset_kick(grpc_pollset *p, grpc_pollset_worker *worker = specific_worker; if (worker != NULL) { if (worker == GRPC_POLLSET_KICK_BROADCAST) { - gpr_log(GPR_DEBUG, "pollset_kick: broadcast!"); if (pollset_has_workers(p)) { GPR_TIMER_BEGIN("pollset_kick.broadcast", 0); for (worker = p->root_worker.next; worker != &p->root_worker; @@ -848,12 +862,10 @@ static void pollset_kick(grpc_pollset *p, pthread_kill(worker->pt_id, SIGUSR1); } } else { - gpr_log(GPR_DEBUG, "pollset_kick: (broadcast) Kicked without pollers"); p->kicked_without_pollers = true; } GPR_TIMER_END("pollset_kick.broadcast", 0); } else { - gpr_log(GPR_DEBUG, "pollset_kick: kicked kicked_specifically"); GPR_TIMER_MARK("kicked_specifically", 0); worker->kicked_specifically = true; pthread_kill(worker->pt_id, SIGUSR1); @@ -864,11 +876,9 @@ static void pollset_kick(grpc_pollset *p, if (worker != NULL) { GPR_TIMER_MARK("finally_kick", 0); push_back_worker(p, worker); - gpr_log(GPR_DEBUG, "pollset_kick: anonymous kick"); pthread_kill(worker->pt_id, SIGUSR1); } else { GPR_TIMER_MARK("kicked_no_pollers", 0); - gpr_log(GPR_DEBUG, "pollset_kick: kicked without pollers"); p->kicked_without_pollers = true; } } @@ -941,7 +951,6 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS]; int epoll_fd = -1; int ep_rv; - gpr_log(GPR_DEBUG, "pollset_work_and_unlock: Entering.."); GPR_TIMER_BEGIN("pollset_work_and_unlock", 0); /* We need to get the epoll_fd to wait on. The epoll_fd is in inside the @@ -952,22 +961,27 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, - pollset->polling_island->mu */ gpr_mu_lock(&pollset->pi_mu); - if (pollset->polling_island != NULL) { - pollset->polling_island = - polling_island_update_and_lock(pollset->polling_island, 1, 0); - epoll_fd = pollset->polling_island->epoll_fd; - if (pollset->polling_island->fd_cnt == 0) { - gpr_log(GPR_DEBUG, "pollset_work_and_unlock: epoll_fd: %d, No other fds", - epoll_fd); - } - for (size_t i = 0; i < pollset->polling_island->fd_cnt; i++) { - gpr_log(GPR_DEBUG, - "pollset_work_and_unlock: epoll_fd: %d, fd_count: %d, fd[%d]: %d", - epoll_fd, pollset->polling_island->fd_cnt, i, - pollset->polling_island->fds[i]->fd); - } - gpr_mu_unlock(&pollset->polling_island->mu); + if (pollset->polling_island == NULL) { + pollset->polling_island = polling_island_create(NULL, 1); + } + + pollset->polling_island = + polling_island_update_and_lock(pollset->polling_island, 1, 0); + epoll_fd = pollset->polling_island->epoll_fd; + +#ifdef GRPC_EPOLL_DEBUG + if (pollset->polling_island->fd_cnt == 0) { + gpr_log(GPR_DEBUG, "pollset_work_and_unlock: epoll_fd: %d, No other fds", + epoll_fd); + } + for (size_t i = 0; i < pollset->polling_island->fd_cnt; i++) { + gpr_log(GPR_DEBUG, + "pollset_work_and_unlock: epoll_fd: %d, fd_count: %d, fd[%d]: %d", + epoll_fd, pollset->polling_island->fd_cnt, i, + pollset->polling_island->fds[i]->fd); } +#endif + gpr_mu_unlock(&pollset->polling_island->mu); gpr_mu_unlock(&pollset->pi_mu); gpr_mu_unlock(&pollset->mu); @@ -975,16 +989,8 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, /* If epoll_fd == -1, this is a blank pollset and does not have any fds yet */ if (epoll_fd != -1) { do { - gpr_timespec before_epoll = gpr_now(GPR_CLOCK_PRECISE); - gpr_log(GPR_DEBUG, "pollset_work_and_unlock: epoll_wait()...."); ep_rv = epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms, sig_mask); - gpr_timespec after_epoll = gpr_now(GPR_CLOCK_PRECISE); - int dur = gpr_time_to_millis(gpr_time_sub(after_epoll, before_epoll)); - gpr_log(GPR_DEBUG, - "pollset_work_and_unlock: DONE epoll_wait() : %d ms, ep_rv: %d", - dur, ep_rv); - if (ep_rv < 0) { if (errno != EINTR) { /* TODO (sreek) - Do not log an error in case of bad file descriptor @@ -993,9 +999,7 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, * closed) */ gpr_log(GPR_ERROR, "epoll_pwait() failed: %s", strerror(errno)); } else { - gpr_log(GPR_DEBUG, "pollset_work_and_unlock: 0-timeout epoll_wait()"); ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0); - gpr_log(GPR_DEBUG, "pollset_work_and_unlock: ep_rv: %d", ep_rv); } } @@ -1018,7 +1022,6 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, } } while (ep_rv == GRPC_EPOLL_MAX_EVENTS); } - gpr_log(GPR_DEBUG, "pollset_work_and_unlock: Leaving.."); GPR_TIMER_END("pollset_work_and_unlock", 0); } @@ -1093,7 +1096,6 @@ static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_pollset_worker **worker_hdl, gpr_timespec now, gpr_timespec deadline) { GPR_TIMER_BEGIN("pollset_work", 0); - gpr_log(GPR_DEBUG, "pollset_work: enter"); int timeout_ms = poll_deadline_to_millis_timeout(deadline, now); sigset_t new_mask; @@ -1112,7 +1114,6 @@ static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, work that needs attention like an event on the completion queue or an alarm */ GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0); - gpr_log(GPR_INFO, "pollset_work: kicked without pollers.."); pollset->kicked_without_pollers = 0; } else if (!pollset->shutting_down) { sigemptyset(&new_mask); @@ -1147,14 +1148,12 @@ static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, gpr_mu_lock(&pollset->mu); } - gpr_log(GPR_DEBUG, "pollset_work(): leaving"); *worker_hdl = NULL; GPR_TIMER_END("pollset_work", 0); } static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_fd *fd) { - gpr_log(GPR_DEBUG, "pollset_add_fd: pollset: %p, fd: %d", pollset, fd->fd); /* TODO sreek - Double check if we need to get a pollset->mu lock here */ gpr_mu_lock(&pollset->pi_mu); gpr_mu_lock(&fd->pi_mu); @@ -1347,6 +1346,7 @@ static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx, static void shutdown_engine(void) { fd_global_shutdown(); pollset_global_shutdown(); + polling_island_global_shutdown(); } static const grpc_event_engine_vtable vtable = { -- cgit v1.2.3 From e5012bac7a57df6b1993a1eaa9b8b3c4d7671975 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Mon, 6 Jun 2016 16:01:45 -0700 Subject: Remove redundant code --- src/core/lib/iomgr/ev_epoll_linux.c | 61 ++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 32 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index 0e00d4d216..f7ac4ae1ff 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -986,42 +986,39 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, gpr_mu_unlock(&pollset->pi_mu); gpr_mu_unlock(&pollset->mu); - /* If epoll_fd == -1, this is a blank pollset and does not have any fds yet */ - if (epoll_fd != -1) { - do { - ep_rv = epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms, - sig_mask); - if (ep_rv < 0) { - if (errno != EINTR) { - /* TODO (sreek) - Do not log an error in case of bad file descriptor - * (A bad file descriptor here would just mean that the epoll set was - * merged with another epoll set and that the current epoll_fd is - * closed) */ - gpr_log(GPR_ERROR, "epoll_pwait() failed: %s", strerror(errno)); - } else { - ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0); - } + do { + ep_rv = epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms, + sig_mask); + if (ep_rv < 0) { + if (errno != EINTR) { + /* TODO (sreek) - Do not log an error in case of bad file descriptor + * (A bad file descriptor here would just mean that the epoll set was + * merged with another epoll set and that the current epoll_fd is + * closed) */ + gpr_log(GPR_ERROR, "epoll_pwait() failed: %s", strerror(errno)); + } else { + ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0); } + } - int i; - for (i = 0; i < ep_rv; ++i) { - grpc_fd *fd = ep_ev[i].data.ptr; - int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP); - int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI); - int write_ev = ep_ev[i].events & EPOLLOUT; - if (fd == NULL) { - grpc_wakeup_fd_consume_wakeup(&grpc_global_wakeup_fd); - } else { - if (read_ev || cancel) { - fd_become_readable(exec_ctx, fd); - } - if (write_ev || cancel) { - fd_become_writable(exec_ctx, fd); - } + int i; + for (i = 0; i < ep_rv; ++i) { + grpc_fd *fd = ep_ev[i].data.ptr; + int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP); + int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI); + int write_ev = ep_ev[i].events & EPOLLOUT; + if (fd == NULL) { + grpc_wakeup_fd_consume_wakeup(&grpc_global_wakeup_fd); + } else { + if (read_ev || cancel) { + fd_become_readable(exec_ctx, fd); + } + if (write_ev || cancel) { + fd_become_writable(exec_ctx, fd); } } - } while (ep_rv == GRPC_EPOLL_MAX_EVENTS); - } + } + } while (ep_rv == GRPC_EPOLL_MAX_EVENTS); GPR_TIMER_END("pollset_work_and_unlock", 0); } -- cgit v1.2.3 From ad162ba5a9f91481b71f233d1f4f8c15b01de644 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Mon, 6 Jun 2016 16:23:37 -0700 Subject: Core review comments and remove 'kicked_specifically' field as its not needed --- src/core/lib/iomgr/ev_epoll_linux.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index f7ac4ae1ff..d5aac96fa4 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -145,7 +145,6 @@ typedef struct polling_island { * Pollset Declarations */ struct grpc_pollset_worker { - int kicked_specifically; pthread_t pt_id; /* Thread id of this worker */ struct grpc_pollset_worker *next; struct grpc_pollset_worker *prev; @@ -235,18 +234,16 @@ static void polling_island_remove_all_fds_locked(polling_island *pi, size_t i; for (i = 0; i < pi->fd_cnt; i++) { - if (remove_fd_refs) { - GRPC_FD_UNREF(pi->fds[i], "polling_island"); - } - err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, pi->fds[i]->fd, NULL); if (err < 0 && errno != ENOENT) { - gpr_log(GPR_ERROR, - "epoll_ctl delete for fds[i]: %d failed with error: %s", i, - pi->fds[i]->fd, strerror(errno)); /* TODO: sreek - We need a better way to bubble up this error instead of - * just logging a message */ - continue; + * just logging a message */ + gpr_log(GPR_ERROR, "epoll_ctl deleting fds[%d]: %d failed with error: %s", + i, pi->fds[i]->fd, strerror(errno)); + } + + if (remove_fd_refs) { + GRPC_FD_UNREF(pi->fds[i], "polling_island"); } } @@ -264,7 +261,7 @@ static void polling_island_remove_fd_locked(polling_island *pi, grpc_fd *fd, if (!is_fd_closed) { err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, fd->fd, NULL); if (err < 0 && errno != ENOENT) { - gpr_log(GPR_ERROR, "epoll_ctl delete for fd: %d failed with error; %s", + gpr_log(GPR_ERROR, "epoll_ctl deleting fd: %d failed with error; %s", fd->fd, strerror(errno)); } } @@ -867,7 +864,6 @@ static void pollset_kick(grpc_pollset *p, GPR_TIMER_END("pollset_kick.broadcast", 0); } else { GPR_TIMER_MARK("kicked_specifically", 0); - worker->kicked_specifically = true; pthread_kill(worker->pt_id, SIGUSR1); } } else { @@ -1100,7 +1096,6 @@ static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_pollset_worker worker; worker.next = worker.prev = NULL; - worker.kicked_specifically = 0; worker.pt_id = pthread_self(); *worker_hdl = &worker; -- cgit v1.2.3 From 5855c478c69508f000baa4878f515d72b5f5a1e9 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Wed, 8 Jun 2016 12:56:56 -0700 Subject: Use poll if not linux, add read notifier pollset support and some groundwork for adding API that allows users to register custom kick signal number --- include/grpc/impl/codegen/port_platform.h | 1 + src/core/lib/iomgr/ev_epoll_linux.c | 72 ++++++++++++++++++++++--------- src/core/lib/iomgr/ev_posix.c | 2 +- 3 files changed, 54 insertions(+), 21 deletions(-) (limited to 'src/core/lib') diff --git a/include/grpc/impl/codegen/port_platform.h b/include/grpc/impl/codegen/port_platform.h index be4215a54b..7a6ec53fb4 100644 --- a/include/grpc/impl/codegen/port_platform.h +++ b/include/grpc/impl/codegen/port_platform.h @@ -189,6 +189,7 @@ #define GPR_GCC_ATOMIC 1 #define GPR_GCC_TLS 1 #define GPR_LINUX 1 +#define GPR_LINUX_EPOLL 1 #define GPR_LINUX_LOG #define GPR_LINUX_MULTIPOLL_WITH_EPOLL 1 #define GPR_POSIX_WAKEUP_FD 1 diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index d5aac96fa4..69ab665e15 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -33,7 +33,7 @@ #include -#ifdef GPR_POSIX_SOCKET +#ifdef GPR_LINUX_EPOLL #include "src/core/lib/iomgr/ev_epoll_linux.h" @@ -60,6 +60,8 @@ struct polling_island; +static int grpc_poller_kick_signum; + /******************************************************************************* * Fd Declarations */ @@ -92,6 +94,9 @@ struct grpc_fd { struct grpc_fd *freelist_next; grpc_closure *on_done_closure; + /* The pollset that last noticed that the fd is readable */ + grpc_pollset *read_notifier_pollset; + grpc_iomgr_object iomgr_object; }; @@ -650,14 +655,15 @@ static grpc_fd *fd_create(int fd, const char *name) { gpr_mu_lock(&new_fd->mu); gpr_atm_rel_store(&new_fd->refst, 1); + new_fd->fd = fd; new_fd->shutdown = false; + new_fd->orphaned = false; new_fd->read_closure = CLOSURE_NOT_READY; new_fd->write_closure = CLOSURE_NOT_READY; - new_fd->fd = fd; new_fd->polling_island = NULL; new_fd->freelist_next = NULL; new_fd->on_done_closure = NULL; - new_fd->orphaned = false; + new_fd->read_notifier_pollset = NULL; gpr_mu_unlock(&new_fd->mu); @@ -765,6 +771,17 @@ static int set_ready_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd, } } +static grpc_pollset *fd_get_read_notifier_pollset(grpc_exec_ctx *exec_ctx, + grpc_fd *fd) { + grpc_pollset *notifier = NULL; + + gpr_mu_lock(&fd->mu); + notifier = fd->read_notifier_pollset; + gpr_mu_unlock(&fd->mu); + + return notifier; +} + static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { gpr_mu_lock(&fd->mu); GPR_ASSERT(!fd->shutdown); @@ -801,16 +818,25 @@ static void sig_handler(int sig_num) { #endif } +static void poller_kick_init() { + grpc_poller_kick_signum = SIGRTMIN + 2; + signal(grpc_poller_kick_signum, sig_handler); +} + /* Global state management */ static void pollset_global_init(void) { grpc_wakeup_fd_init(&grpc_global_wakeup_fd); - signal(SIGUSR1, sig_handler); /* TODO: sreek - Do not hardcode SIGUSR1 */ + poller_kick_init(); } static void pollset_global_shutdown(void) { grpc_wakeup_fd_destroy(&grpc_global_wakeup_fd); } +static void pollset_worker_kick(grpc_pollset_worker *worker) { + pthread_kill(worker->pt_id, grpc_poller_kick_signum); +} + /* Return 1 if the pollset has active threads in pollset_work (pollset must * be locked) */ static int pollset_has_workers(grpc_pollset *p) { @@ -856,7 +882,7 @@ static void pollset_kick(grpc_pollset *p, GPR_TIMER_BEGIN("pollset_kick.broadcast", 0); for (worker = p->root_worker.next; worker != &p->root_worker; worker = worker->next) { - pthread_kill(worker->pt_id, SIGUSR1); + pollset_worker_kick(worker); } } else { p->kicked_without_pollers = true; @@ -864,7 +890,7 @@ static void pollset_kick(grpc_pollset *p, GPR_TIMER_END("pollset_kick.broadcast", 0); } else { GPR_TIMER_MARK("kicked_specifically", 0); - pthread_kill(worker->pt_id, SIGUSR1); + pollset_worker_kick(worker); } } else { GPR_TIMER_MARK("kick_anonymous", 0); @@ -872,7 +898,7 @@ static void pollset_kick(grpc_pollset *p, if (worker != NULL) { GPR_TIMER_MARK("finally_kick", 0); push_back_worker(p, worker); - pthread_kill(worker->pt_id, SIGUSR1); + pollset_worker_kick(worker); } else { GPR_TIMER_MARK("kicked_no_pollers", 0); p->kicked_without_pollers = true; @@ -924,20 +950,20 @@ static int poll_deadline_to_millis_timeout(gpr_timespec deadline, timeout, gpr_time_from_nanos(GPR_NS_PER_MS - 1, GPR_TIMESPAN))); } -static void set_ready(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_closure **st) { - /* only one set_ready can be active at once (but there may be a racing - notify_on) */ +static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_pollset *notifier) { + /* Need the fd->mu since we might be racing with fd_notify_on_read */ gpr_mu_lock(&fd->mu); - set_ready_locked(exec_ctx, fd, st); + set_ready_locked(exec_ctx, fd, &fd->read_closure); + fd->read_notifier_pollset = notifier; gpr_mu_unlock(&fd->mu); } -static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { - set_ready(exec_ctx, fd, &fd->read_closure); -} - static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { - set_ready(exec_ctx, fd, &fd->write_closure); + /* Need the fd->mu since we might be racing with fd_notify_on_write */ + gpr_mu_lock(&fd->mu); + set_ready_locked(exec_ctx, fd, &fd->write_closure); + gpr_mu_unlock(&fd->mu); } #define GRPC_EPOLL_MAX_EVENTS 1000 @@ -1007,7 +1033,7 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, grpc_wakeup_fd_consume_wakeup(&grpc_global_wakeup_fd); } else { if (read_ev || cancel) { - fd_become_readable(exec_ctx, fd); + fd_become_readable(exec_ctx, fd, pollset); } if (write_ev || cancel) { fd_become_writable(exec_ctx, fd); @@ -1109,9 +1135,9 @@ static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, pollset->kicked_without_pollers = 0; } else if (!pollset->shutting_down) { sigemptyset(&new_mask); - sigaddset(&new_mask, SIGUSR1); + sigaddset(&new_mask, grpc_poller_kick_signum); pthread_sigmask(SIG_BLOCK, &new_mask, &orig_mask); - sigdelset(&orig_mask, SIGUSR1); + sigdelset(&orig_mask, grpc_poller_kick_signum); push_front_worker(pollset, &worker); @@ -1350,6 +1376,7 @@ static const grpc_event_engine_vtable vtable = { .fd_shutdown = fd_shutdown, .fd_notify_on_read = fd_notify_on_read, .fd_notify_on_write = fd_notify_on_write, + .fd_get_read_notifier_pollset = fd_get_read_notifier_pollset, .pollset_init = pollset_init, .pollset_shutdown = pollset_shutdown, @@ -1380,4 +1407,9 @@ const grpc_event_engine_vtable *grpc_init_epoll_linux(void) { return &vtable; } -#endif +#else /* defined(GPR_LINUX_EPOLL) */ +/* If GPR_LINUX_EPOLL is not defined, it means epoll is not available. Return + * NULL */ +const grpc_event_engine_vtable *grpc_init_epoll_linux(void) { return NULL; } + +#endif /* !defined(GPR_LINUX_EPOLL) */ diff --git a/src/core/lib/iomgr/ev_posix.c b/src/core/lib/iomgr/ev_posix.c index e0c3558a51..2b15967adc 100644 --- a/src/core/lib/iomgr/ev_posix.c +++ b/src/core/lib/iomgr/ev_posix.c @@ -63,8 +63,8 @@ typedef struct { } event_engine_factory; static const event_engine_factory g_factories[] = { - {"poll", grpc_init_poll_posix}, {"epoll", grpc_init_epoll_linux}, + {"poll", grpc_init_poll_posix}, {"legacy", grpc_init_poll_and_epoll_posix}, }; -- cgit v1.2.3 From 24b1062f42ef01bd47a458e94423f068ec1765f0 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Wed, 8 Jun 2016 15:20:17 -0700 Subject: Do not close epoll_fd while there are any pollers and add the ability to wake up all pollers when an island is merged --- src/core/lib/iomgr/ev_epoll_linux.c | 113 +++++++++++++++++++++++++----------- 1 file changed, 80 insertions(+), 33 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index 69ab665e15..3a3c136a5a 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -190,9 +190,18 @@ struct grpc_pollset_set { }; /******************************************************************************* - * Polling-island Definitions + * Polling island Definitions */ +/* The wakeup fd that is used to wake up all threads in a Polling island. This + is useful in the polling island merge operation where we need to wakeup all + the threads currently polling the smaller polling island (so that they can + start polling the new/merged polling island) + + NOTE: This fd is initialized to be readable and MUST NOT be consumed i.e the + threads that woke up MUST NOT call grpc_wakeup_fd_consume_wakeup() */ +static grpc_wakeup_fd polling_island_wakeup_fd; + /* Polling island freelist */ static gpr_mu g_pi_freelist_mu; static polling_island *g_pi_freelist = NULL; @@ -232,6 +241,25 @@ static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds, } } +/* The caller is expected to hold pi->mu before calling this */ +static void polling_island_add_wakeup_fd_locked(polling_island *pi, + grpc_wakeup_fd *wakeup_fd) { + struct epoll_event ev; + int err; + + ev.events = (uint32_t)(EPOLLIN | EPOLLET); + ev.data.ptr = wakeup_fd; + err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, + GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), &ev); + if (err < 0) { + gpr_log(GPR_ERROR, + "Failed to add grpc_wake_up_fd (%d) to the epoll set (epoll_fd: %d)" + ". Error: %s", + GRPC_WAKEUP_FD_GET_READ_FD(&grpc_global_wakeup_fd), pi->epoll_fd, + strerror(errno)); + } +} + /* The caller is expected to hold pi->mu lock before calling this function */ static void polling_island_remove_all_fds_locked(polling_island *pi, bool remove_fd_refs) { @@ -283,8 +311,6 @@ static void polling_island_remove_fd_locked(polling_island *pi, grpc_fd *fd, static polling_island *polling_island_create(grpc_fd *initial_fd, int initial_ref_cnt) { polling_island *pi = NULL; - struct epoll_event ev; - int err; /* Try to get one from the polling island freelist */ gpr_mu_lock(&g_pi_freelist_mu); @@ -311,17 +337,7 @@ static polling_island *polling_island_create(grpc_fd *initial_fd, } GPR_ASSERT(pi->epoll_fd >= 0); - ev.events = (uint32_t)(EPOLLIN | EPOLLET); - ev.data.ptr = NULL; - err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, - GRPC_WAKEUP_FD_GET_READ_FD(&grpc_global_wakeup_fd), &ev); - if (err < 0) { - gpr_log(GPR_ERROR, - "Failed to add grpc_global_wake_up_fd (%d) to the epoll set " - "(epoll_fd: %d) with error: %s", - GRPC_WAKEUP_FD_GET_READ_FD(&grpc_global_wakeup_fd), pi->epoll_fd, - strerror(errno)); - } + polling_island_add_wakeup_fd_locked(pi, &grpc_global_wakeup_fd); pi->ref_cnt = initial_ref_cnt; pi->merged_to = NULL; @@ -496,13 +512,15 @@ polling_island *polling_island_merge(polling_island *p, polling_island *q) { GPR_SWAP(polling_island *, p, q); } - /* "Merge" p with q i.e move all the fds from p (the polling_island with fewer - fds) to q. - Note: Not altering the ref counts on the affected fds here because they - would effectively remain unchanged */ + /* "Merge" p with q i.e move all the fds from p (The one with fewer fds) to q + )Note that the refcounts on the fds being moved will not change here. This + is why the last parameter in the following two functions is 'false') */ polling_island_add_fds_locked(q, p->fds, p->fd_cnt, false); polling_island_remove_all_fds_locked(p, false); + /* Wakeup all the pollers (if any) on p so that they can pickup this change */ + polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd); + /* The merged polling island inherits all the ref counts of the island merging with it */ q->ref_cnt += p->ref_cnt; @@ -516,6 +534,8 @@ polling_island *polling_island_merge(polling_island *p, polling_island *q) { static void polling_island_global_init() { gpr_mu_init(&g_pi_freelist_mu); g_pi_freelist = NULL; + grpc_wakeup_fd_init(&polling_island_wakeup_fd); + grpc_wakeup_fd_wakeup(&polling_island_wakeup_fd); } static void polling_island_global_shutdown() { @@ -529,8 +549,9 @@ static void polling_island_global_shutdown() { gpr_free(g_pi_freelist); g_pi_freelist = next; } - gpr_mu_destroy(&g_pi_freelist_mu); + + grpc_wakeup_fd_destroy(&polling_island_wakeup_fd); } /******************************************************************************* @@ -973,6 +994,7 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS]; int epoll_fd = -1; int ep_rv; + polling_island *pi = NULL; GPR_TIMER_BEGIN("pollset_work_and_unlock", 0); /* We need to get the epoll_fd to wait on. The epoll_fd is in inside the @@ -983,13 +1005,19 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, - pollset->polling_island->mu */ gpr_mu_lock(&pollset->pi_mu); - if (pollset->polling_island == NULL) { - pollset->polling_island = polling_island_create(NULL, 1); + pi = pollset->polling_island; + if (pi == NULL) { + pi = polling_island_create(NULL, 1); } - pollset->polling_island = - polling_island_update_and_lock(pollset->polling_island, 1, 0); - epoll_fd = pollset->polling_island->epoll_fd; + /* In addition to locking the polling island, add a ref so that the island + does not get destroyed (which means the epoll_fd won't be closed) while + we are are doing an epoll_wait() on the epoll_fd */ + pi = polling_island_update_and_lock(pi, 1, 1); + epoll_fd = pi->epoll_fd; + + /* Update the pollset->polling_island */ + pollset->polling_island = pi; #ifdef GRPC_EPOLL_DEBUG if (pollset->polling_island->fd_cnt == 0) { @@ -1013,25 +1041,29 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, sig_mask); if (ep_rv < 0) { if (errno != EINTR) { - /* TODO (sreek) - Do not log an error in case of bad file descriptor - * (A bad file descriptor here would just mean that the epoll set was - * merged with another epoll set and that the current epoll_fd is - * closed) */ gpr_log(GPR_ERROR, "epoll_pwait() failed: %s", strerror(errno)); } else { + /* We were interrupted. Save an interation by doing a zero timeout + epoll_wait to see if there are any other events of interest */ ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0); } } int i; for (i = 0; i < ep_rv; ++i) { - grpc_fd *fd = ep_ev[i].data.ptr; - int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP); - int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI); - int write_ev = ep_ev[i].events & EPOLLOUT; - if (fd == NULL) { + void *data_ptr = ep_ev[i].data.ptr; + if (data_ptr == &grpc_global_wakeup_fd) { grpc_wakeup_fd_consume_wakeup(&grpc_global_wakeup_fd); + } else if (data_ptr == &polling_island_wakeup_fd) { + /* This means that our polling island is merged with a different + island. We do not have to do anything here since the subsequent call + to the function pollset_work_and_unlock() will pick up the correct + epoll_fd */ } else { + grpc_fd *fd = data_ptr; + int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP); + int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI); + int write_ev = ep_ev[i].events & EPOLLOUT; if (read_ev || cancel) { fd_become_readable(exec_ctx, fd, pollset); } @@ -1041,6 +1073,21 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, } } } while (ep_rv == GRPC_EPOLL_MAX_EVENTS); + + GPR_ASSERT(pi != NULL); + + /* Before leaving, release the extra ref we added to the polling island */ + /* It is important to note that at this point 'pi' may not be the same as + * pollset->polling_island. This is because pollset->polling_island pointer + * gets updated whenever the underlying polling island is merged with another + * island and while we are doing epoll_wait() above, the polling island may + * have been merged */ + + /* TODO (sreek) - Change the ref count on polling island to gpr_atm so that + * we do not have to do this here */ + gpr_mu_lock(&pi->mu); + polling_island_unref_and_unlock(pi, 1); + GPR_TIMER_END("pollset_work_and_unlock", 0); } -- cgit v1.2.3 From e682e46a9e451a94cfcb1cf9c927185abd81fceb Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Wed, 8 Jun 2016 15:40:21 -0700 Subject: Add TODOs --- src/core/lib/iomgr/ev_epoll_linux.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index 3a3c136a5a..2e871a4f1b 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -83,6 +83,7 @@ struct grpc_fd { this indicates that the 'fd' on this structure is no longer valid */ bool orphaned; + /* TODO: sreek - Move this a lockfree implementation */ grpc_closure *read_closure; grpc_closure *write_closure; @@ -166,6 +167,9 @@ struct grpc_pollset { /* The polling island to which this pollset belongs to and the mutex protecting the field */ + /* TODO: sreek: This lock might actually be adding more overhead to the + critical path (i.e pollset_work() function). Consider removing this lock + and just using the overall pollset lock */ gpr_mu pi_mu; struct polling_island *polling_island; }; -- cgit v1.2.3 From 3dbf4d61b26e2364a974e47f16f3a655d3eda908 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Wed, 8 Jun 2016 16:26:45 -0700 Subject: More TODOs --- src/core/lib/iomgr/ev_epoll_linux.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index 2e871a4f1b..046ec5e740 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -83,7 +83,7 @@ struct grpc_fd { this indicates that the 'fd' on this structure is no longer valid */ bool orphaned; - /* TODO: sreek - Move this a lockfree implementation */ + /* TODO: sreek - Move this to a lockfree implementation */ grpc_closure *read_closure; grpc_closure *write_closure; @@ -124,6 +124,8 @@ static void fd_global_shutdown(void); /******************************************************************************* * Polling-island Declarations */ +/* TODO: sree: Consider making ref_cnt and merged_to to gpr_atm - This would + * significantly reduce the number of mutex acquisition calls. */ typedef struct polling_island { gpr_mu mu; int ref_cnt; @@ -177,6 +179,12 @@ struct grpc_pollset { /******************************************************************************* * Pollset-set Declarations */ +/* TODO: sreek - Change the pollset_set implementation such that a pollset_set + * directly points to a polling_island (and adding an fd/pollset/pollset_set to + * the current pollset_set would result in polling island merges. This would + * remove the need to maintain fd_count here. This will also significantly + * simplify the grpc_fd structure since we would no longer need to explicitly + * maintain the orphaned state */ struct grpc_pollset_set { gpr_mu mu; -- cgit v1.2.3 From 8e4926c0eeb0df9e5c8029136e39f6c8700f0814 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Wed, 8 Jun 2016 20:33:19 -0700 Subject: pollset_kick optimization (do not kick any other thread if the current thread can be kicked) --- src/core/lib/iomgr/ev_epoll_linux.c | 153 ++++++++++++++++++++---------------- 1 file changed, 87 insertions(+), 66 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index 046ec5e740..d45f87c2f8 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -844,6 +844,8 @@ static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd, /******************************************************************************* * Pollset Definitions */ +GPR_TLS_DECL(g_current_thread_pollset); +GPR_TLS_DECL(g_current_thread_worker); static void sig_handler(int sig_num) { #ifdef GRPC_EPOLL_DEBUG @@ -859,11 +861,15 @@ static void poller_kick_init() { /* Global state management */ static void pollset_global_init(void) { grpc_wakeup_fd_init(&grpc_global_wakeup_fd); + gpr_tls_init(&g_current_thread_pollset); + gpr_tls_init(&g_current_thread_worker); poller_kick_init(); } static void pollset_global_shutdown(void) { grpc_wakeup_fd_destroy(&grpc_global_wakeup_fd); + gpr_tls_destroy(&g_current_thread_pollset); + gpr_tls_destroy(&g_current_thread_worker); } static void pollset_worker_kick(grpc_pollset_worker *worker) { @@ -915,7 +921,9 @@ static void pollset_kick(grpc_pollset *p, GPR_TIMER_BEGIN("pollset_kick.broadcast", 0); for (worker = p->root_worker.next; worker != &p->root_worker; worker = worker->next) { - pollset_worker_kick(worker); + if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) { + pollset_worker_kick(worker); + } } } else { p->kicked_without_pollers = true; @@ -923,9 +931,18 @@ static void pollset_kick(grpc_pollset *p, GPR_TIMER_END("pollset_kick.broadcast", 0); } else { GPR_TIMER_MARK("kicked_specifically", 0); - pollset_worker_kick(worker); + if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) { + pollset_worker_kick(worker); + } } - } else { + } else if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)p) { + /* Since worker == NULL, it means that we can kick "any" worker on this + pollset 'p'. If 'p' happens to be the same pollset this thread is + currently polling (i.e in pollset_work() function), then there is no need + to kick any other worker since the current thread can just absorb the + kick. This is the reason why we enter this case only when + g_current_thread_pollset is != p */ + GPR_TIMER_MARK("kick_anonymous", 0); worker = pop_front_worker(p); if (worker != NULL) { @@ -999,6 +1016,69 @@ static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { gpr_mu_unlock(&fd->mu); } +/* Release the reference to pollset->polling_island and set it to NULL. + pollset->mu must be held */ +static void pollset_release_polling_island_locked(grpc_pollset *pollset) { + gpr_mu_lock(&pollset->pi_mu); + if (pollset->polling_island) { + pollset->polling_island = + polling_island_update_and_lock(pollset->polling_island, 1, 0); + polling_island_unref_and_unlock(pollset->polling_island, 1); + pollset->polling_island = NULL; + } + gpr_mu_unlock(&pollset->pi_mu); +} + +static void finish_shutdown_locked(grpc_exec_ctx *exec_ctx, + grpc_pollset *pollset) { + /* The pollset cannot have any workers if we are at this stage */ + GPR_ASSERT(!pollset_has_workers(pollset)); + + pollset->finish_shutdown_called = true; + pollset_release_polling_island_locked(pollset); + + grpc_exec_ctx_enqueue(exec_ctx, pollset->shutdown_done, true, NULL); +} + +/* pollset->mu lock must be held by the caller before calling this */ +static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_closure *closure) { + GPR_TIMER_BEGIN("pollset_shutdown", 0); + GPR_ASSERT(!pollset->shutting_down); + pollset->shutting_down = true; + pollset->shutdown_done = closure; + pollset_kick(pollset, GRPC_POLLSET_KICK_BROADCAST); + + /* If the pollset has any workers, we cannot call finish_shutdown_locked() + because it would release the underlying polling island. In such a case, we + let the last worker call finish_shutdown_locked() from pollset_work() */ + if (!pollset_has_workers(pollset)) { + GPR_ASSERT(!pollset->finish_shutdown_called); + GPR_TIMER_MARK("pollset_shutdown.finish_shutdown_locked", 0); + finish_shutdown_locked(exec_ctx, pollset); + } + GPR_TIMER_END("pollset_shutdown", 0); +} + +/* pollset_shutdown is guaranteed to be called before pollset_destroy. So other + * than destroying the mutexes, there is nothing special that needs to be done + * here */ +static void pollset_destroy(grpc_pollset *pollset) { + GPR_ASSERT(!pollset_has_workers(pollset)); + gpr_mu_destroy(&pollset->pi_mu); + gpr_mu_destroy(&pollset->mu); +} + +static void pollset_reset(grpc_pollset *pollset) { + GPR_ASSERT(pollset->shutting_down); + GPR_ASSERT(!pollset_has_workers(pollset)); + pollset->shutting_down = false; + pollset->finish_shutdown_called = false; + pollset->kicked_without_pollers = false; + pollset->shutdown_done = NULL; + pollset_release_polling_island_locked(pollset); +} + #define GRPC_EPOLL_MAX_EVENTS 1000 static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, int timeout_ms, @@ -1103,69 +1183,6 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, GPR_TIMER_END("pollset_work_and_unlock", 0); } -/* Release the reference to pollset->polling_island and set it to NULL. - pollset->mu must be held */ -static void pollset_release_polling_island_locked(grpc_pollset *pollset) { - gpr_mu_lock(&pollset->pi_mu); - if (pollset->polling_island) { - pollset->polling_island = - polling_island_update_and_lock(pollset->polling_island, 1, 0); - polling_island_unref_and_unlock(pollset->polling_island, 1); - pollset->polling_island = NULL; - } - gpr_mu_unlock(&pollset->pi_mu); -} - -static void finish_shutdown_locked(grpc_exec_ctx *exec_ctx, - grpc_pollset *pollset) { - /* The pollset cannot have any workers if we are at this stage */ - GPR_ASSERT(!pollset_has_workers(pollset)); - - pollset->finish_shutdown_called = true; - pollset_release_polling_island_locked(pollset); - - grpc_exec_ctx_enqueue(exec_ctx, pollset->shutdown_done, true, NULL); -} - -/* pollset->mu lock must be held by the caller before calling this */ -static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_closure *closure) { - GPR_TIMER_BEGIN("pollset_shutdown", 0); - GPR_ASSERT(!pollset->shutting_down); - pollset->shutting_down = true; - pollset->shutdown_done = closure; - pollset_kick(pollset, GRPC_POLLSET_KICK_BROADCAST); - - /* If the pollset has any workers, we cannot call finish_shutdown_locked() - because it would release the underlying polling island. In such a case, we - let the last worker call finish_shutdown_locked() from pollset_work() */ - if (!pollset_has_workers(pollset)) { - GPR_ASSERT(!pollset->finish_shutdown_called); - GPR_TIMER_MARK("pollset_shutdown.finish_shutdown_locked", 0); - finish_shutdown_locked(exec_ctx, pollset); - } - GPR_TIMER_END("pollset_shutdown", 0); -} - -/* pollset_shutdown is guaranteed to be called before pollset_destroy. So other - * than destroying the mutexes, there is nothing special that needs to be done - * here */ -static void pollset_destroy(grpc_pollset *pollset) { - GPR_ASSERT(!pollset_has_workers(pollset)); - gpr_mu_destroy(&pollset->pi_mu); - gpr_mu_destroy(&pollset->mu); -} - -static void pollset_reset(grpc_pollset *pollset) { - GPR_ASSERT(pollset->shutting_down); - GPR_ASSERT(!pollset_has_workers(pollset)); - pollset->shutting_down = false; - pollset->finish_shutdown_called = false; - pollset->kicked_without_pollers = false; - pollset->shutdown_done = NULL; - pollset_release_polling_island_locked(pollset); -} - /* pollset->mu lock must be held by the caller before calling this. The function pollset_work() may temporarily release the lock (pollset->mu) during the course of its execution but it will always re-acquire the lock and @@ -1184,6 +1201,8 @@ static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, worker.pt_id = pthread_self(); *worker_hdl = &worker; + gpr_tls_set(&g_current_thread_pollset, (intptr_t)pollset); + gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker); if (pollset->kicked_without_pollers) { /* If the pollset was kicked without pollers, pretend that the current @@ -1226,6 +1245,8 @@ static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, } *worker_hdl = NULL; + gpr_tls_set(&g_current_thread_pollset, (intptr_t)0); + gpr_tls_set(&g_current_thread_worker, (intptr_t)0); GPR_TIMER_END("pollset_work", 0); } -- cgit v1.2.3 From 0553a436610201b252cfce0ed5d2cea69da15e85 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Thu, 9 Jun 2016 00:42:41 -0700 Subject: Fix refcounting bug in polling_island_merge --- src/core/lib/iomgr/ev_epoll_linux.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index d45f87c2f8..66bbae52b2 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -512,7 +512,6 @@ polling_island *polling_island_merge(polling_island *p, polling_island *q) { /* Get locks on both the polling islands */ polling_island_pair_update_and_lock(&p, &q); - /* TODO: sreek: Think about this scenario some more */ if (p == q) { /* Nothing needs to be done here */ gpr_mu_unlock(&p->mu); @@ -525,7 +524,7 @@ polling_island *polling_island_merge(polling_island *p, polling_island *q) { } /* "Merge" p with q i.e move all the fds from p (The one with fewer fds) to q - )Note that the refcounts on the fds being moved will not change here. This + Note that the refcounts on the fds being moved will not change here. This is why the last parameter in the following two functions is 'false') */ polling_island_add_fds_locked(q, p->fds, p->fd_cnt, false); polling_island_remove_all_fds_locked(p, false); @@ -533,9 +532,11 @@ polling_island *polling_island_merge(polling_island *p, polling_island *q) { /* Wakeup all the pollers (if any) on p so that they can pickup this change */ polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd); - /* The merged polling island inherits all the ref counts of the island merging - with it */ + /* - The merged polling island (i.e q) inherits all the ref counts of the + island merging with it (i.e p) + - The island p will lose a ref count */ q->ref_cnt += p->ref_cnt; + p->ref_cnt--; gpr_mu_unlock(&p->mu); gpr_mu_unlock(&q->mu); -- cgit v1.2.3 From 727440216553c01925c0f5bc88293108bb3f051f Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Thu, 9 Jun 2016 09:42:06 -0700 Subject: Check epoll is actually available. set GPR_LINUX_EPOLL only in GLIBC ver 2.9 and above --- include/grpc/impl/codegen/port_platform.h | 2 +- src/core/lib/iomgr/ev_epoll_linux.c | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'src/core/lib') diff --git a/include/grpc/impl/codegen/port_platform.h b/include/grpc/impl/codegen/port_platform.h index 7a6ec53fb4..affef9e66b 100644 --- a/include/grpc/impl/codegen/port_platform.h +++ b/include/grpc/impl/codegen/port_platform.h @@ -189,7 +189,6 @@ #define GPR_GCC_ATOMIC 1 #define GPR_GCC_TLS 1 #define GPR_LINUX 1 -#define GPR_LINUX_EPOLL 1 #define GPR_LINUX_LOG #define GPR_LINUX_MULTIPOLL_WITH_EPOLL 1 #define GPR_POSIX_WAKEUP_FD 1 @@ -201,6 +200,7 @@ #ifdef __GLIBC_PREREQ #if __GLIBC_PREREQ(2, 9) #define GPR_LINUX_EVENTFD 1 +#define GPR_LINUX_EPOLL 1 #endif #if __GLIBC_PREREQ(2, 10) #define GPR_LINUX_SOCKETUTILS 1 diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index 66bbae52b2..d2d5d2852b 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -1481,7 +1481,26 @@ static const grpc_event_engine_vtable vtable = { .shutdown_engine = shutdown_engine, }; +/* It is possible that GLIBC has epoll but the underlying kernel doesn't. + * Create a dummy epoll_fd to make sure epoll support is available */ +static bool is_epoll_available() { + int fd = epoll_create1(EPOLL_CLOEXEC); + if (fd < 0) { + gpr_log( + GPR_ERROR, + "epoll_create1 failed with error: %d. Not using epoll polling engine", + fd); + return false; + } + close(fd); + return true; +} + const grpc_event_engine_vtable *grpc_init_epoll_linux(void) { + if (!is_epoll_available()) { + return NULL; + } + fd_global_init(); pollset_global_init(); polling_island_global_init(); -- cgit v1.2.3 From c7be7c688829281b543428ec22029d4d09bd2a9c Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Thu, 9 Jun 2016 17:08:50 -0700 Subject: Add an API at the core level to disable signals or use a different signal number --- include/grpc/grpc_posix.h | 8 +++++++ src/core/lib/iomgr/ev_epoll_linux.c | 45 ++++++++++++++++++++++++++++--------- 2 files changed, 43 insertions(+), 10 deletions(-) (limited to 'src/core/lib') diff --git a/include/grpc/grpc_posix.h b/include/grpc/grpc_posix.h index 9742b83374..5e89ae3b1e 100644 --- a/include/grpc/grpc_posix.h +++ b/include/grpc/grpc_posix.h @@ -63,6 +63,14 @@ GRPCAPI void grpc_server_add_insecure_channel_from_fd(grpc_server *server, grpc_completion_queue *cq, int fd); +/** GRPC Core POSIX library may internally use signals to optimize some work. + The library uses (SIGRTMIN + 2) signal by default. Use this API to instruct + the library to use a different signal i.e 'signum' instead. + Note: + - To prevent GRPC library from using any signals, pass a 'signum' of -1 + - This API is optional but if called, it MUST be called before grpc_init() */ +GRPCAPI void grpc_use_signal(int signum); + #ifdef __cplusplus } #endif diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index d2d5d2852b..7e01ac144f 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -31,6 +31,7 @@ * */ +#include #include #ifdef GPR_LINUX_EPOLL @@ -58,9 +59,26 @@ #include "src/core/lib/profiling/timers.h" #include "src/core/lib/support/block_annotate.h" -struct polling_island; +static int grpc_wakeup_signal = -1; +static bool is_grpc_wakeup_signal_initialized = false; + +/* Implements the function defined in grpc_posix.h. This function might be + * called before even calling grpc_init() to set either a different signal to + * use. If signum == -1, then the use of signals is disabled */ +void grpc_use_signal(int signum) { + grpc_wakeup_signal = signum; + is_grpc_wakeup_signal_initialized = true; -static int grpc_poller_kick_signum; + if (grpc_wakeup_signal < 0) { + gpr_log(GPR_INFO, + "Use of signals is disabled. Epoll engine will not be used"); + } else { + gpr_log(GPR_INFO, "epoll engine will be using signal: %d", + grpc_wakeup_signal); + } +} + +struct polling_island; /******************************************************************************* * Fd Declarations @@ -854,10 +872,7 @@ static void sig_handler(int sig_num) { #endif } -static void poller_kick_init() { - grpc_poller_kick_signum = SIGRTMIN + 2; - signal(grpc_poller_kick_signum, sig_handler); -} +static void poller_kick_init() { signal(grpc_wakeup_signal, sig_handler); } /* Global state management */ static void pollset_global_init(void) { @@ -874,7 +889,7 @@ static void pollset_global_shutdown(void) { } static void pollset_worker_kick(grpc_pollset_worker *worker) { - pthread_kill(worker->pt_id, grpc_poller_kick_signum); + pthread_kill(worker->pt_id, grpc_wakeup_signal); } /* Return 1 if the pollset has active threads in pollset_work (pollset must @@ -1214,9 +1229,9 @@ static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, pollset->kicked_without_pollers = 0; } else if (!pollset->shutting_down) { sigemptyset(&new_mask); - sigaddset(&new_mask, grpc_poller_kick_signum); + sigaddset(&new_mask, grpc_wakeup_signal); pthread_sigmask(SIG_BLOCK, &new_mask, &orig_mask); - sigdelset(&orig_mask, grpc_poller_kick_signum); + sigdelset(&orig_mask, grpc_wakeup_signal); push_front_worker(pollset, &worker); @@ -1497,19 +1512,29 @@ static bool is_epoll_available() { } const grpc_event_engine_vtable *grpc_init_epoll_linux(void) { + /* If use of signals is disabled, we cannot use epoll engine*/ + if (is_grpc_wakeup_signal_initialized && grpc_wakeup_signal < 0) { + return NULL; + } + if (!is_epoll_available()) { return NULL; } + if (!is_grpc_wakeup_signal_initialized) { + grpc_use_signal(SIGRTMIN + 2); + } + fd_global_init(); pollset_global_init(); polling_island_global_init(); return &vtable; } -#else /* defined(GPR_LINUX_EPOLL) */ +#else /* defined(GPR_LINUX_EPOLL) */ /* If GPR_LINUX_EPOLL is not defined, it means epoll is not available. Return * NULL */ const grpc_event_engine_vtable *grpc_init_epoll_linux(void) { return NULL; } +void grpc_use_signal(int signum) {} #endif /* !defined(GPR_LINUX_EPOLL) */ -- cgit v1.2.3 From eb16b3dc3cd579931d730ba3fef1f7008f649003 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Fri, 10 Jun 2016 23:06:25 -0700 Subject: Fix ref counting bug --- src/core/lib/iomgr/ev_epoll_linux.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index 7e01ac144f..617afad197 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -1127,20 +1127,7 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, /* Update the pollset->polling_island */ pollset->polling_island = pi; -#ifdef GRPC_EPOLL_DEBUG - if (pollset->polling_island->fd_cnt == 0) { - gpr_log(GPR_DEBUG, "pollset_work_and_unlock: epoll_fd: %d, No other fds", - epoll_fd); - } - for (size_t i = 0; i < pollset->polling_island->fd_cnt; i++) { - gpr_log(GPR_DEBUG, - "pollset_work_and_unlock: epoll_fd: %d, fd_count: %d, fd[%d]: %d", - epoll_fd, pollset->polling_island->fd_cnt, i, - pollset->polling_island->fds[i]->fd); - } -#endif - gpr_mu_unlock(&pollset->polling_island->mu); - + polling_island_unref_and_unlock(pollset->polling_island, 0); /* Keep the ref*/ gpr_mu_unlock(&pollset->pi_mu); gpr_mu_unlock(&pollset->mu); @@ -1190,10 +1177,7 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, * gets updated whenever the underlying polling island is merged with another * island and while we are doing epoll_wait() above, the polling island may * have been merged */ - - /* TODO (sreek) - Change the ref count on polling island to gpr_atm so that - * we do not have to do this here */ - gpr_mu_lock(&pi->mu); + polling_island_update_and_lock(pi, 1, 0); /* No new ref added */ polling_island_unref_and_unlock(pi, 1); GPR_TIMER_END("pollset_work_and_unlock", 0); -- cgit v1.2.3 From 58e589644403b10afb31ffd45befabe13b652db8 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Mon, 13 Jun 2016 00:52:56 -0700 Subject: Fix bad merge --- src/core/lib/iomgr/ev_epoll_linux.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index 617afad197..a8a874cd4b 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -550,14 +550,14 @@ polling_island *polling_island_merge(polling_island *p, polling_island *q) { /* Wakeup all the pollers (if any) on p so that they can pickup this change */ polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd); + p->merged_to = q; + /* - The merged polling island (i.e q) inherits all the ref counts of the island merging with it (i.e p) - The island p will lose a ref count */ q->ref_cnt += p->ref_cnt; - p->ref_cnt--; - - gpr_mu_unlock(&p->mu); - gpr_mu_unlock(&q->mu); + polling_island_unref_and_unlock(p, 1); /* Decrement refcount */ + polling_island_unref_and_unlock(q, 0); /* Just Unlock. Don't decrement ref */ return q; } @@ -1110,7 +1110,7 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, Acquire the following locks: - pollset->mu (which we already have) - pollset->pi_mu - - pollset->polling_island->mu */ + - pollset->polling_island->mu (call polling_island_update_and_lock())*/ gpr_mu_lock(&pollset->pi_mu); pi = pollset->polling_island; @@ -1144,8 +1144,7 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, } } - int i; - for (i = 0; i < ep_rv; ++i) { + for (int i = 0; i < ep_rv; ++i) { void *data_ptr = ep_ev[i].data.ptr; if (data_ptr == &grpc_global_wakeup_fd) { grpc_wakeup_fd_consume_wakeup(&grpc_global_wakeup_fd); @@ -1177,7 +1176,7 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, * gets updated whenever the underlying polling island is merged with another * island and while we are doing epoll_wait() above, the polling island may * have been merged */ - polling_island_update_and_lock(pi, 1, 0); /* No new ref added */ + pi = polling_island_update_and_lock(pi, 1, 0); /* No new ref added */ polling_island_unref_and_unlock(pi, 1); GPR_TIMER_END("pollset_work_and_unlock", 0); -- cgit v1.2.3 From 41622a8e389e8eda38d6d3bfbf34cbf35f437156 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Mon, 13 Jun 2016 16:43:14 -0700 Subject: Fix tsan failures --- Makefile | 1 + build.yaml | 1 + src/core/lib/iomgr/ev_epoll_linux.c | 27 ++++++++++++++++++++++++++- 3 files changed, 28 insertions(+), 1 deletion(-) (limited to 'src/core/lib') diff --git a/Makefile b/Makefile index 4d8b060760..28d6842c76 100644 --- a/Makefile +++ b/Makefile @@ -200,6 +200,7 @@ LD_tsan = clang LDXX_tsan = clang++ CPPFLAGS_tsan = -O0 -fsanitize=thread -fno-omit-frame-pointer -Wno-unused-command-line-argument -DGPR_NO_DIRECT_SYSCALLS LDFLAGS_tsan = -fsanitize=thread +DEFINES_tsan = _GRPC_TSAN DEFINES_tsan += GRPC_TEST_SLOWDOWN_BUILD_FACTOR=5 VALID_CONFIG_stapprof = 1 diff --git a/build.yaml b/build.yaml index 85b66d985b..139ab3e8bc 100644 --- a/build.yaml +++ b/build.yaml @@ -3231,6 +3231,7 @@ configs: CPPFLAGS: -O0 -fsanitize=thread -fno-omit-frame-pointer -Wno-unused-command-line-argument -DGPR_NO_DIRECT_SYSCALLS CXX: clang++ + DEFINES: _GRPC_TSAN LD: clang LDFLAGS: -fsanitize=thread LDXX: clang++ diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index a8a874cd4b..35a15e00c9 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -236,6 +236,17 @@ static grpc_wakeup_fd polling_island_wakeup_fd; static gpr_mu g_pi_freelist_mu; static polling_island *g_pi_freelist = NULL; +#ifdef _GRPC_TSAN +/* Currently TSAN may incorrectly flag data races between epoll_ctl and + epoll_wait for any grpc_fd structs that are added to the epoll set via + epoll_ctl and are returned (within a very short window) via epoll_wait(). + + To work-around this race, we establish a happens-before relation between + the code just-before epoll_ctl() and the code after epoll_wait() by using + this atomic */ +gpr_atm g_epoll_sync; +#endif + /* The caller is expected to hold pi->mu lock before calling this function */ static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds, size_t fd_count, bool add_fd_refs) { @@ -243,6 +254,11 @@ static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds, size_t i; struct epoll_event ev; +#ifdef _GRPC_TSAN + /* See the definition of g_epoll_sync for more context */ + gpr_atm_rel_store(&g_epoll_sync, 0); +#endif + for (i = 0; i < fd_count; i++) { ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET); ev.data.ptr = fds[i]; @@ -361,6 +377,7 @@ static polling_island *polling_island_create(grpc_fd *initial_fd, } pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC); + if (pi->epoll_fd < 0) { gpr_log(GPR_ERROR, "epoll_create1() failed with error: %s", strerror(errno)); @@ -1144,6 +1161,11 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, } } +#ifdef _GRPC_TSAN + /* See the definition of g_poll_sync for more details */ + gpr_atm_acq_load(&g_epoll_sync); +#endif + for (int i = 0; i < ep_rv; ++i) { void *data_ptr = ep_ev[i].data.ptr; if (data_ptr == &grpc_global_wakeup_fd) { @@ -1514,10 +1536,13 @@ const grpc_event_engine_vtable *grpc_init_epoll_linux(void) { return &vtable; } -#else /* defined(GPR_LINUX_EPOLL) */ +#else /* defined(GPR_LINUX_EPOLL) */ +#if defined(GPR_POSIX_SOCKET) +#include "src/core/lib/iomgr/ev_posix.h" /* If GPR_LINUX_EPOLL is not defined, it means epoll is not available. Return * NULL */ const grpc_event_engine_vtable *grpc_init_epoll_linux(void) { return NULL; } +#endif /* defined(GPR_POSIX_SOCKET) */ void grpc_use_signal(int signum) {} #endif /* !defined(GPR_LINUX_EPOLL) */ -- cgit v1.2.3 From ad2c4778fc560f10f38550428189c97c9e2bc5a1 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Mon, 13 Jun 2016 19:06:54 -0700 Subject: Rename _GRPC_TSAN to GRPC_TSAN --- Makefile | 2 +- build.yaml | 2 +- src/core/lib/iomgr/ev_epoll_linux.c | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'src/core/lib') diff --git a/Makefile b/Makefile index 040ebd4102..e35e360785 100644 --- a/Makefile +++ b/Makefile @@ -200,7 +200,7 @@ LD_tsan = clang LDXX_tsan = clang++ CPPFLAGS_tsan = -O0 -fsanitize=thread -fno-omit-frame-pointer -Wno-unused-command-line-argument -DGPR_NO_DIRECT_SYSCALLS LDFLAGS_tsan = -fsanitize=thread -DEFINES_tsan = _GRPC_TSAN +DEFINES_tsan = GRPC_TSAN DEFINES_tsan += GRPC_TEST_SLOWDOWN_BUILD_FACTOR=5 VALID_CONFIG_stapprof = 1 diff --git a/build.yaml b/build.yaml index 0847232b50..3d327f8bff 100644 --- a/build.yaml +++ b/build.yaml @@ -3266,7 +3266,7 @@ configs: CPPFLAGS: -O0 -fsanitize=thread -fno-omit-frame-pointer -Wno-unused-command-line-argument -DGPR_NO_DIRECT_SYSCALLS CXX: clang++ - DEFINES: _GRPC_TSAN + DEFINES: GRPC_TSAN LD: clang LDFLAGS: -fsanitize=thread LDXX: clang++ diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index 35a15e00c9..006c2a8ee7 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -236,7 +236,7 @@ static grpc_wakeup_fd polling_island_wakeup_fd; static gpr_mu g_pi_freelist_mu; static polling_island *g_pi_freelist = NULL; -#ifdef _GRPC_TSAN +#ifdef GRPC_TSAN /* Currently TSAN may incorrectly flag data races between epoll_ctl and epoll_wait for any grpc_fd structs that are added to the epoll set via epoll_ctl and are returned (within a very short window) via epoll_wait(). @@ -245,7 +245,7 @@ static polling_island *g_pi_freelist = NULL; the code just-before epoll_ctl() and the code after epoll_wait() by using this atomic */ gpr_atm g_epoll_sync; -#endif +#endif /* defined(GRPC_TSAN) */ /* The caller is expected to hold pi->mu lock before calling this function */ static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds, @@ -254,10 +254,10 @@ static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds, size_t i; struct epoll_event ev; -#ifdef _GRPC_TSAN +#ifdef GRPC_TSAN /* See the definition of g_epoll_sync for more context */ gpr_atm_rel_store(&g_epoll_sync, 0); -#endif +#endif /* defined(GRPC_TSAN) */ for (i = 0; i < fd_count; i++) { ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET); @@ -1161,10 +1161,10 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, } } -#ifdef _GRPC_TSAN +#ifdef GRPC_TSAN /* See the definition of g_poll_sync for more details */ gpr_atm_acq_load(&g_epoll_sync); -#endif +#endif /* defined(GRPC_TSAN) */ for (int i = 0; i < ep_rv; ++i) { void *data_ptr = ep_ev[i].data.ptr; -- cgit v1.2.3 From cf4205dff54d8e3920f98dac28049770b5f8f044 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Wed, 15 Jun 2016 11:22:20 -0700 Subject: Compilation error --- src/core/lib/iomgr/ev_epoll_linux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index 006c2a8ee7..1fb5947464 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -317,7 +317,7 @@ static void polling_island_remove_all_fds_locked(polling_island *pi, if (err < 0 && errno != ENOENT) { /* TODO: sreek - We need a better way to bubble up this error instead of * just logging a message */ - gpr_log(GPR_ERROR, "epoll_ctl deleting fds[%d]: %d failed with error: %s", + gpr_log(GPR_ERROR, "epoll_ctl deleting fds[%zu]: %d failed with error: %s", i, pi->fds[i]->fd, strerror(errno)); } -- cgit v1.2.3 From 2e12db9c319bcbdbb2fa570149f88e4b496b558c Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Thu, 16 Jun 2016 16:53:59 -0700 Subject: Test polling island merges --- Makefile | 36 +++++ build.yaml | 12 ++ src/core/lib/iomgr/ev_epoll_linux.c | 51 ++++++- src/core/lib/iomgr/ev_epoll_linux.h | 6 + src/core/lib/iomgr/ev_posix.c | 7 + src/core/lib/iomgr/ev_posix.h | 3 + test/core/iomgr/ev_epoll_linux_test.c | 222 +++++++++++++++++++++++++++++++ tools/run_tests/sources_and_headers.json | 16 +++ tools/run_tests/tests.json | 15 +++ 9 files changed, 366 insertions(+), 2 deletions(-) create mode 100644 test/core/iomgr/ev_epoll_linux_test.c (limited to 'src/core/lib') diff --git a/Makefile b/Makefile index e615704395..825684cc2d 100644 --- a/Makefile +++ b/Makefile @@ -905,6 +905,7 @@ dns_resolver_connectivity_test: $(BINDIR)/$(CONFIG)/dns_resolver_connectivity_te dns_resolver_test: $(BINDIR)/$(CONFIG)/dns_resolver_test dualstack_socket_test: $(BINDIR)/$(CONFIG)/dualstack_socket_test endpoint_pair_test: $(BINDIR)/$(CONFIG)/endpoint_pair_test +ev_epoll_linux_test: $(BINDIR)/$(CONFIG)/ev_epoll_linux_test fd_conservation_posix_test: $(BINDIR)/$(CONFIG)/fd_conservation_posix_test fd_posix_test: $(BINDIR)/$(CONFIG)/fd_posix_test fling_client: $(BINDIR)/$(CONFIG)/fling_client @@ -1242,6 +1243,7 @@ buildtests_c: privatelibs_c \ $(BINDIR)/$(CONFIG)/dns_resolver_test \ $(BINDIR)/$(CONFIG)/dualstack_socket_test \ $(BINDIR)/$(CONFIG)/endpoint_pair_test \ + $(BINDIR)/$(CONFIG)/ev_epoll_linux_test \ $(BINDIR)/$(CONFIG)/fd_conservation_posix_test \ $(BINDIR)/$(CONFIG)/fd_posix_test \ $(BINDIR)/$(CONFIG)/fling_client \ @@ -1512,6 +1514,8 @@ test_c: buildtests_c $(Q) $(BINDIR)/$(CONFIG)/dualstack_socket_test || ( echo test dualstack_socket_test failed ; exit 1 ) $(E) "[RUN] Testing endpoint_pair_test" $(Q) $(BINDIR)/$(CONFIG)/endpoint_pair_test || ( echo test endpoint_pair_test failed ; exit 1 ) + $(E) "[RUN] Testing ev_epoll_linux_test" + $(Q) $(BINDIR)/$(CONFIG)/ev_epoll_linux_test || ( echo test ev_epoll_linux_test failed ; exit 1 ) $(E) "[RUN] Testing fd_conservation_posix_test" $(Q) $(BINDIR)/$(CONFIG)/fd_conservation_posix_test || ( echo test fd_conservation_posix_test failed ; exit 1 ) $(E) "[RUN] Testing fd_posix_test" @@ -7130,6 +7134,38 @@ endif endif +EV_EPOLL_LINUX_TEST_SRC = \ + test/core/iomgr/ev_epoll_linux_test.c \ + +EV_EPOLL_LINUX_TEST_OBJS = $(addprefix $(OBJDIR)/$(CONFIG)/, $(addsuffix .o, $(basename $(EV_EPOLL_LINUX_TEST_SRC)))) +ifeq ($(NO_SECURE),true) + +# You can't build secure targets if you don't have OpenSSL. + +$(BINDIR)/$(CONFIG)/ev_epoll_linux_test: openssl_dep_error + +else + + + +$(BINDIR)/$(CONFIG)/ev_epoll_linux_test: $(EV_EPOLL_LINUX_TEST_OBJS) $(LIBDIR)/$(CONFIG)/libgrpc_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc.a $(LIBDIR)/$(CONFIG)/libgpr_test_util.a $(LIBDIR)/$(CONFIG)/libgpr.a + $(E) "[LD] Linking $@" + $(Q) mkdir -p `dirname $@` + $(Q) $(LD) $(LDFLAGS) $(EV_EPOLL_LINUX_TEST_OBJS) $(LIBDIR)/$(CONFIG)/libgrpc_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc.a $(LIBDIR)/$(CONFIG)/libgpr_test_util.a $(LIBDIR)/$(CONFIG)/libgpr.a $(LDLIBS) $(LDLIBS_SECURE) -o $(BINDIR)/$(CONFIG)/ev_epoll_linux_test + +endif + +$(OBJDIR)/$(CONFIG)/test/core/iomgr/ev_epoll_linux_test.o: $(LIBDIR)/$(CONFIG)/libgrpc_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc.a $(LIBDIR)/$(CONFIG)/libgpr_test_util.a $(LIBDIR)/$(CONFIG)/libgpr.a + +deps_ev_epoll_linux_test: $(EV_EPOLL_LINUX_TEST_OBJS:.o=.dep) + +ifneq ($(NO_SECURE),true) +ifneq ($(NO_DEPS),true) +-include $(EV_EPOLL_LINUX_TEST_OBJS:.o=.dep) +endif +endif + + FD_CONSERVATION_POSIX_TEST_SRC = \ test/core/iomgr/fd_conservation_posix_test.c \ diff --git a/build.yaml b/build.yaml index 7790e0c517..84f4ea521b 100644 --- a/build.yaml +++ b/build.yaml @@ -1407,6 +1407,18 @@ targets: - grpc - gpr_test_util - gpr +- name: ev_epoll_linux_test + build: test + language: c + src: + - test/core/iomgr/ev_epoll_linux_test.c + deps: + - grpc_test_util + - grpc + - gpr_test_util + - gpr + platforms: + - linux - name: fd_conservation_posix_test build: test language: c diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index 1fb5947464..ed2c494b78 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -317,8 +317,9 @@ static void polling_island_remove_all_fds_locked(polling_island *pi, if (err < 0 && errno != ENOENT) { /* TODO: sreek - We need a better way to bubble up this error instead of * just logging a message */ - gpr_log(GPR_ERROR, "epoll_ctl deleting fds[%zu]: %d failed with error: %s", - i, pi->fds[i]->fd, strerror(errno)); + gpr_log(GPR_ERROR, + "epoll_ctl deleting fds[%zu]: %d failed with error: %s", i, + pi->fds[i]->fd, strerror(errno)); } if (remove_fd_refs) { @@ -1458,6 +1459,52 @@ static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx, gpr_mu_unlock(&bag->mu); } +/* Test helper functions + * */ +void *grpc_fd_get_polling_island(grpc_fd *fd) { + polling_island *pi; + + gpr_mu_lock(&fd->pi_mu); + pi = fd->polling_island; + gpr_mu_unlock(&fd->pi_mu); + + return pi; +} + +void *grpc_pollset_get_polling_island(grpc_pollset *ps) { + polling_island *pi; + + gpr_mu_lock(&ps->pi_mu); + pi = ps->polling_island; + gpr_mu_unlock(&ps->pi_mu); + + return pi; +} + +static polling_island *get_polling_island(polling_island *p) { + if (p == NULL) { + return NULL; + } + + polling_island *next; + gpr_mu_lock(&p->mu); + while (p->merged_to != NULL) { + next = p->merged_to; + gpr_mu_unlock(&p->mu); + p = next; + gpr_mu_lock(&p->mu); + } + gpr_mu_unlock(&p->mu); + + return p; +} + +bool grpc_are_polling_islands_equal(void *p, void *q) { + p = get_polling_island(p); + q = get_polling_island(q); + return p == q; +} + /******************************************************************************* * Event engine binding */ diff --git a/src/core/lib/iomgr/ev_epoll_linux.h b/src/core/lib/iomgr/ev_epoll_linux.h index 8c819975a4..7a494aba19 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.h +++ b/src/core/lib/iomgr/ev_epoll_linux.h @@ -38,4 +38,10 @@ const grpc_event_engine_vtable *grpc_init_epoll_linux(void); +#ifdef GPR_LINUX_EPOLL +void *grpc_fd_get_polling_island(grpc_fd *fd); +void *grpc_pollset_get_polling_island(grpc_pollset *ps); +bool grpc_are_polling_islands_equal(void *p, void *q); +#endif /* defined(GPR_LINUX_EPOLL) */ + #endif /* GRPC_CORE_LIB_IOMGR_EV_EPOLL_LINUX_H */ diff --git a/src/core/lib/iomgr/ev_posix.c b/src/core/lib/iomgr/ev_posix.c index 2b15967adc..5b20600a6f 100644 --- a/src/core/lib/iomgr/ev_posix.c +++ b/src/core/lib/iomgr/ev_posix.c @@ -54,6 +54,7 @@ grpc_poll_function_type grpc_poll_function = poll; static const grpc_event_engine_vtable *g_event_engine; +static const char* g_poll_strategy_name = NULL; typedef const grpc_event_engine_vtable *(*event_engine_factory_fn)(void); @@ -101,6 +102,7 @@ static void try_engine(const char *engine) { for (size_t i = 0; i < GPR_ARRAY_SIZE(g_factories); i++) { if (is(engine, g_factories[i].name)) { if ((g_event_engine = g_factories[i].factory())) { + g_poll_strategy_name = g_factories[i].name; gpr_log(GPR_DEBUG, "Using polling engine: %s", g_factories[i].name); return; } @@ -108,6 +110,11 @@ static void try_engine(const char *engine) { } } +/* Call this only after calling grpc_event_engine_init() */ +const char *grpc_get_poll_strategy_name() { + return g_poll_strategy_name; +} + void grpc_event_engine_init(void) { char *s = gpr_getenv("GRPC_POLL_STRATEGY"); if (s == NULL) { diff --git a/src/core/lib/iomgr/ev_posix.h b/src/core/lib/iomgr/ev_posix.h index 344bf63438..3ed5a5f956 100644 --- a/src/core/lib/iomgr/ev_posix.h +++ b/src/core/lib/iomgr/ev_posix.h @@ -98,6 +98,9 @@ typedef struct grpc_event_engine_vtable { void grpc_event_engine_init(void); void grpc_event_engine_shutdown(void); +/* Return the name of the poll strategy */ +const char* grpc_get_poll_strategy_name(); + /* Create a wrapped file descriptor. Requires fd is a non-blocking file descriptor. This takes ownership of closing fd. */ diff --git a/test/core/iomgr/ev_epoll_linux_test.c b/test/core/iomgr/ev_epoll_linux_test.c new file mode 100644 index 0000000000..51da15faa7 --- /dev/null +++ b/test/core/iomgr/ev_epoll_linux_test.c @@ -0,0 +1,222 @@ +/* + * + * Copyright 2015, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "src/core/lib/iomgr/ev_epoll_linux.h" +#include "src/core/lib/iomgr/ev_posix.h" + +#include +#include +#include +#include + +#include +#include + +#include "src/core/lib/iomgr/iomgr.h" +#include "test/core/util/test_config.h" + +typedef struct test_pollset { + grpc_pollset *pollset; + gpr_mu *mu; +} test_pollset; + +typedef struct test_fd { + int inner_fd; + grpc_fd *fd; +} test_fd; + +static void test_fd_init(test_fd *fds, int num_fds) { + int i; + for (i = 0; i < num_fds; i++) { + fds[i].inner_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + fds[i].fd = grpc_fd_create(fds[i].inner_fd, "test_fd"); + } +} + +static void test_fd_cleanup(grpc_exec_ctx *exec_ctx, test_fd *fds, + int num_fds) { + int release_fd; + int i; + + for (i = 0; i < num_fds; i++) { + grpc_fd_shutdown(exec_ctx, fds[i].fd); + grpc_exec_ctx_flush(exec_ctx); + + grpc_fd_orphan(exec_ctx, fds[i].fd, NULL, &release_fd, "test_fd_cleanup"); + grpc_exec_ctx_flush(exec_ctx); + + GPR_ASSERT(release_fd == fds[i].inner_fd); + close(fds[i].inner_fd); + } +} + +static void test_pollset_init(test_pollset *pollsets, int num_pollsets) { + int i; + for (i = 0; i < num_pollsets; i++) { + pollsets[i].pollset = gpr_malloc(grpc_pollset_size()); + grpc_pollset_init(pollsets[i].pollset, &pollsets[i].mu); + } +} + +static void destroy_pollset(grpc_exec_ctx *exec_ctx, void *p, bool success) { + grpc_pollset_destroy(p); +} + +static void test_pollset_cleanup(grpc_exec_ctx *exec_ctx, + test_pollset *pollsets, int num_pollsets) { + grpc_closure destroyed; + int i; + + for (i = 0; i < num_pollsets; i++) { + grpc_closure_init(&destroyed, destroy_pollset, pollsets[i].pollset); + grpc_pollset_shutdown(exec_ctx, pollsets[i].pollset, &destroyed); + + grpc_exec_ctx_flush(exec_ctx); + gpr_free(pollsets[i].pollset); + } +} + +#define NUM_FDS 8 +#define NUM_POLLSETS 4 +/* + * Cases to test: + * case 1) Polling islands of both fd and pollset are NULL + * case 2) Polling island of fd is NULL but that of pollset is not-NULL + * case 3) Polling island of fd is not-NULL but that of pollset is NULL + * case 4) Polling islands of both fd and pollset are not-NULL and: + * case 4.1) Polling islands of fd and pollset are equal + * case 4.2) Polling islands of fd and pollset are NOT-equal (This results + * in a merge) + * */ +static void test_add_fd_to_pollset() { + grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; + test_fd fds[NUM_FDS]; + test_pollset pollsets[NUM_POLLSETS]; + void *expected_pi = NULL; + int i; + + test_fd_init(fds, NUM_FDS); + test_pollset_init(pollsets, NUM_POLLSETS); + + /*Step 1. + * Create three polling islands (This will exercise test case 1 and 2) with + * the following configuration: + * polling island 0 = { fds:0,1,2, pollsets:0} + * polling island 1 = { fds:3,4, pollsets:1} + * polling island 2 = { fds:5,6,7 pollsets:2} + * + *Step 2. + * Add pollset 3 to polling island 0 (by adding fds 0 and 1 to pollset 3) + * (This will exercise test cases 3 and 4.1). The configuration becomes: + * polling island 0 = { fds:0,1,2, pollsets:0,3} <<< pollset 3 added here + * polling island 1 = { fds:3,4, pollsets:1} + * polling island 2 = { fds:5,6,7 pollsets:2} + * + *Step 3. + * Merge polling islands 0 and 1 by adding fd 0 to pollset 1 (This will + * exercise test case 4.2). The configuration becomes: + * polling island (merged) = {fds: 0,1,2,3,4, pollsets: 0,1,3} + * polling island 2 = {fds: 5,6,7 pollsets: 2} + * + *Step 4. + * Finally do one more merge by adding fd 3 to pollset 2. + * polling island (merged) = {fds: 0,1,2,3,4,5,6,7, pollsets: 0,1,2,3} + */ + + /* == Step 1 == */ + for (i = 0; i <= 2; i++) { + grpc_pollset_add_fd(&exec_ctx, pollsets[0].pollset, fds[i].fd); + grpc_exec_ctx_flush(&exec_ctx); + } + + for (i = 3; i <= 4; i++) { + grpc_pollset_add_fd(&exec_ctx, pollsets[1].pollset, fds[i].fd); + grpc_exec_ctx_flush(&exec_ctx); + } + + for (i = 5; i <= 7; i++) { + grpc_pollset_add_fd(&exec_ctx, pollsets[2].pollset, fds[i].fd); + grpc_exec_ctx_flush(&exec_ctx); + } + + /* == Step 2 == */ + for (i = 0; i <= 1; i++) { + grpc_pollset_add_fd(&exec_ctx, pollsets[3].pollset, fds[i].fd); + grpc_exec_ctx_flush(&exec_ctx); + } + + /* == Step 3 == */ + grpc_pollset_add_fd(&exec_ctx, pollsets[1].pollset, fds[0].fd); + grpc_exec_ctx_flush(&exec_ctx); + + /* == Step 4 == */ + grpc_pollset_add_fd(&exec_ctx, pollsets[2].pollset, fds[3].fd); + grpc_exec_ctx_flush(&exec_ctx); + + /* All polling islands are merged at this point */ + + /* Compare Fd:0's polling island with that of all other Fds */ + expected_pi = grpc_fd_get_polling_island(fds[0].fd); + for (i = 1; i < NUM_FDS; i++) { + GPR_ASSERT(grpc_are_polling_islands_equal( + expected_pi, grpc_fd_get_polling_island(fds[i].fd))); + } + + /* Compare Fd:0's polling island with that of all other pollsets */ + for (i = 0; i < NUM_POLLSETS; i++) { + GPR_ASSERT(grpc_are_polling_islands_equal( + expected_pi, grpc_pollset_get_polling_island(pollsets[i].pollset))); + } + + test_fd_cleanup(&exec_ctx, fds, NUM_FDS); + test_pollset_cleanup(&exec_ctx, pollsets, NUM_POLLSETS); + grpc_exec_ctx_finish(&exec_ctx); +} + +int main(int argc, char **argv) { + const char *poll_strategy = NULL; + grpc_test_init(argc, argv); + grpc_iomgr_init(); + + poll_strategy = grpc_get_poll_strategy_name(); + if (poll_strategy != NULL && strcmp(poll_strategy, "epoll") == 0) { + test_add_fd_to_pollset(); + } else { + gpr_log(GPR_INFO, + "Skipping the test. The test is only relevant for 'epoll' " + "strategy. and the current strategy is: '%s'", + poll_strategy); + } + grpc_iomgr_shutdown(); + return 0; +} diff --git a/tools/run_tests/sources_and_headers.json b/tools/run_tests/sources_and_headers.json index e8ff61dc3f..e9df72e43a 100644 --- a/tools/run_tests/sources_and_headers.json +++ b/tools/run_tests/sources_and_headers.json @@ -315,6 +315,22 @@ "third_party": false, "type": "target" }, + { + "deps": [ + "gpr", + "gpr_test_util", + "grpc", + "grpc_test_util" + ], + "headers": [], + "language": "c", + "name": "ev_epoll_linux_test", + "src": [ + "test/core/iomgr/ev_epoll_linux_test.c" + ], + "third_party": false, + "type": "target" + }, { "deps": [ "gpr", diff --git a/tools/run_tests/tests.json b/tools/run_tests/tests.json index 5a84a41b63..ba661840da 100644 --- a/tools/run_tests/tests.json +++ b/tools/run_tests/tests.json @@ -377,6 +377,21 @@ "windows" ] }, + { + "args": [], + "ci_platforms": [ + "linux" + ], + "cpu_cost": 1.0, + "exclude_configs": [], + "flaky": false, + "gtest": false, + "language": "c", + "name": "ev_epoll_linux_test", + "platforms": [ + "linux" + ] + }, { "args": [], "ci_platforms": [ -- cgit v1.2.3 From 2f8ade0b9df48990e3617a302a5da946f032d4f6 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Fri, 17 Jun 2016 13:28:38 -0700 Subject: Significantly refactor the polling island locking and refcounting code --- src/core/lib/iomgr/ev_epoll_linux.c | 462 +++++++++++++++++++++--------------- 1 file changed, 270 insertions(+), 192 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index ed2c494b78..72288889c0 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -140,18 +140,40 @@ static void fd_global_shutdown(void); #define CLOSURE_READY ((grpc_closure *)1) /******************************************************************************* - * Polling-island Declarations + * Polling island Declarations */ -/* TODO: sree: Consider making ref_cnt and merged_to to gpr_atm - This would - * significantly reduce the number of mutex acquisition calls. */ + +// #define GRPC_PI_REF_COUNT_DEBUG +#ifdef GRPC_PI_REF_COUNT_DEBUG + +#define PI_ADD_REF(p, r) pi_add_ref_dbg((p), 1, (r), __FILE__, __LINE__) +#define PI_UNREF(p, r) pi_unref_dbg((p), 1, (r), __FILE__, __LINE__) + +#else /* defined(GRPC_PI_REF_COUNT_DEBUG) */ + +#define PI_ADD_REF(p, r) pi_add_ref((p), 1) +#define PI_UNREF(p, r) pi_unref((p), 1) + +#endif /* !defined(GPRC_PI_REF_COUNT_DEBUG) */ + typedef struct polling_island { gpr_mu mu; - int ref_cnt; - - /* Points to the polling_island this merged into. - * If merged_to is not NULL, all the remaining fields (except mu and ref_cnt) - * are invalid and must be ignored */ - struct polling_island *merged_to; + /* Ref count. Use PI_ADD_REF() and PI_UNREF() macros to increment/decrement + the refcount. + Once the ref count becomes zero, this structure is destroyed which means + we should ensure that there is never a scenario where a PI_ADD_REF() is + racing with a PI_UNREF() that just made the ref_count zero. */ + gpr_atm ref_count; + + /* Pointer to the polling_island this merged into. + * merged_to value is only set once in polling_island's lifetime (and that too + * only if the island is merged with another island). Because of this, we can + * use gpr_atm type here so that we can do atomic access on this and reduce + * lock contention on 'mu' mutex. + * + * Note that if this field is not NULL (i.e not 0), all the remaining fields + * (except mu and ref_count) are invalid and must be ignored. */ + gpr_atm merged_to; /* The fd of the underlying epoll set */ int epoll_fd; @@ -236,6 +258,8 @@ static grpc_wakeup_fd polling_island_wakeup_fd; static gpr_mu g_pi_freelist_mu; static polling_island *g_pi_freelist = NULL; +static void polling_island_delete(); /* Forward declaration */ + #ifdef GRPC_TSAN /* Currently TSAN may incorrectly flag data races between epoll_ctl and epoll_wait for any grpc_fd structs that are added to the epoll set via @@ -247,6 +271,51 @@ static polling_island *g_pi_freelist = NULL; gpr_atm g_epoll_sync; #endif /* defined(GRPC_TSAN) */ +#ifdef GRPC_PI_REF_COUNT_DEBUG +long pi_add_ref(polling_island *pi, int ref_cnt); +long pi_unref(polling_island *pi, int ref_cnt); + +void pi_add_ref_dbg(polling_island *pi, int ref_cnt, char *reason, char *file, + int line) { + long old_cnt = pi_add_ref(pi, ref_cnt); + gpr_log(GPR_DEBUG, "Add ref pi: %p, old:%ld -> new:%ld (%s) - (%s, %d)", + (void *)pi, old_cnt, (old_cnt + ref_cnt), reason, file, line); +} + +void pi_unref_dbg(polling_island *pi, int ref_cnt, char *reason, char *file, + int line) { + long old_cnt = pi_unref(pi, ref_cnt); + gpr_log(GPR_DEBUG, "Unref pi: %p, old:%ld -> new:%ld (%s) - (%s, %d)", + (void *)pi, old_cnt, (old_cnt - ref_cnt), reason, file, line); +} +#endif + +long pi_add_ref(polling_island *pi, int ref_cnt) { + return gpr_atm_no_barrier_fetch_add(&pi->ref_count, ref_cnt); +} + +long pi_unref(polling_island *pi, int ref_cnt) { + long old_cnt = gpr_atm_no_barrier_fetch_add(&pi->ref_count, -ref_cnt); + + /* If ref count went to zero, delete the polling island. Note that this need + not be done under a lock. Once the ref count goes to zero, we are + guaranteed that no one else holds a reference to the polling island (and + that there is no racing pi_add_ref() call either. + + Also, if we are deleting the polling island and the merged_to field is + non-empty, we should remove a ref to the merged_to polling island + */ + if (old_cnt == ref_cnt) { + polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to); + polling_island_delete(pi); + if (next != NULL) { + PI_UNREF(next, "pi_delete"); /* Recursive call */ + } + } + + return old_cnt; +} + /* The caller is expected to hold pi->mu lock before calling this function */ static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds, size_t fd_count, bool add_fd_refs) { @@ -355,8 +424,7 @@ static void polling_island_remove_fd_locked(polling_island *pi, grpc_fd *fd, } } -static polling_island *polling_island_create(grpc_fd *initial_fd, - int initial_ref_cnt) { +static polling_island *polling_island_create(grpc_fd *initial_fd) { polling_island *pi = NULL; /* Try to get one from the polling island freelist */ @@ -377,6 +445,9 @@ static polling_island *polling_island_create(grpc_fd *initial_fd, pi->fds = NULL; } + gpr_atm_no_barrier_store(&pi->ref_count, 0); + gpr_atm_no_barrier_store(&pi->merged_to, NULL); + pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC); if (pi->epoll_fd < 0) { @@ -387,14 +458,12 @@ static polling_island *polling_island_create(grpc_fd *initial_fd, polling_island_add_wakeup_fd_locked(pi, &grpc_global_wakeup_fd); - pi->ref_cnt = initial_ref_cnt; - pi->merged_to = NULL; pi->next_free = NULL; if (initial_fd != NULL) { - /* It is not really needed to get the pi->mu lock here. If this is a newly - created polling island (or one that we got from the freelist), no one - else would be holding a lock to it anyway */ + /* Lock the polling island here just in case we got this structure from the + freelist and the polling island lock was not released yet (by the code + that adds the polling island to the freelist) */ gpr_mu_lock(&pi->mu); polling_island_add_fds_locked(pi, &initial_fd, 1, true); gpr_mu_unlock(&pi->mu); @@ -404,140 +473,136 @@ static polling_island *polling_island_create(grpc_fd *initial_fd, } static void polling_island_delete(polling_island *pi) { - GPR_ASSERT(pi->ref_cnt == 0); GPR_ASSERT(pi->fd_cnt == 0); + gpr_atm_rel_store(&pi->merged_to, NULL); + close(pi->epoll_fd); pi->epoll_fd = -1; - pi->merged_to = NULL; - gpr_mu_lock(&g_pi_freelist_mu); pi->next_free = g_pi_freelist; g_pi_freelist = pi; gpr_mu_unlock(&g_pi_freelist_mu); } -void polling_island_unref_and_unlock(polling_island *pi, int unref_by) { - pi->ref_cnt -= unref_by; - int ref_cnt = pi->ref_cnt; - GPR_ASSERT(ref_cnt >= 0); - - gpr_mu_unlock(&pi->mu); - - if (ref_cnt == 0) { - polling_island_delete(pi); - } -} - -polling_island *polling_island_update_and_lock(polling_island *pi, int unref_by, - int add_ref_by) { +/* Gets the lock on the *latest* polling island i.e the last polling island in + the linked list (linked by 'merged_to' link). Call gpr_mu_unlock on the + returned polling island's mu. + Usage: To lock/unlock polling island "pi", do the following: + polling_island *pi_latest = polling_island_lock(pi); + ... + ... critical section .. + ... + gpr_mu_unlock(&pi_latest->mu); //NOTE: use pi_latest->mu. NOT pi->mu */ +polling_island *polling_island_lock(polling_island *pi) { polling_island *next = NULL; - gpr_mu_lock(&pi->mu); - while (pi->merged_to != NULL) { - next = pi->merged_to; - polling_island_unref_and_unlock(pi, unref_by); + while (true) { + next = (polling_island *)gpr_atm_acq_load(&pi->merged_to); + if (next == NULL) { + /* pi is the last node in the linked list. Get the lock and check again + (under the pi->mu lock) that pi is still the last node (because a merge + may have happend after the (next == NULL) check above and before + getting the pi->mu lock. + If pi is the last node, we are done. If not, unlock and continue + traversing the list */ + gpr_mu_lock(&pi->mu); + next = (polling_island *)gpr_atm_acq_load(&pi->merged_to); + if (next == NULL) { + break; + } + gpr_mu_unlock(&pi->mu); + } + pi = next; - gpr_mu_lock(&pi->mu); } - pi->ref_cnt += add_ref_by; return pi; } -void polling_island_pair_update_and_lock(polling_island **p, - polling_island **q) { +/* Gets the lock on the *latest* polling islands pointed by *p and *q. + This function is needed because calling the following block of code to obtain + locks on polling islands (*p and *q) is prone to deadlocks. + { + polling_island_lock(*p); + polling_island_lock(*q); + } + + Usage/exmaple: + polling_island *p1; + polling_island *p2; + .. + polling_island_lock_pair(&p1, &p2); + .. + .. Critical section with both p1 and p2 locked + .. + // Release locks + // **IMPORTANT**: Make sure you check p1 == p2 AFTER the function + // polling_island_lock_pair() was called and if so, release the lock only + // once. Note: Even if p1 != p2 beforec calling polling_island_lock_pair(), + // they might be after the function returns: + if (p1 == p2) { + gpr_mu_unlock(&p1->mu) + } else { + gpr_mu_unlock(&p1->mu); + gpr_mu_unlock(&p2->mu); + } + +*/ +void polling_island_lock_pair(polling_island **p, polling_island **q) { polling_island *pi_1 = *p; polling_island *pi_2 = *q; - polling_island *temp = NULL; - bool pi_1_locked = false; - bool pi_2_locked = false; - int num_swaps = 0; - - /* Loop until either pi_1 == pi_2 or until we acquired locks on both pi_1 - and pi_2 */ - while (pi_1 != pi_2 && !(pi_1_locked && pi_2_locked)) { - /* The following assertions are true at this point: - - pi_1 != pi_2 (else, the while loop would have exited) - - pi_1 MAY be locked - - pi_2 is NOT locked */ - - /* To maintain lock order consistency, always lock polling_island node with - lower address first. - First, make sure pi_1 < pi_2 before proceeding any further. If it turns - out that pi_1 > pi_2, unlock pi_1 if locked (because pi_2 is not locked - at this point and having pi_1 locked would violate the lock order) and - swap pi_1 and pi_2 so that pi_1 becomes less than pi_2 */ - if (pi_1 > pi_2) { - if (pi_1_locked) { - gpr_mu_unlock(&pi_1->mu); - pi_1_locked = false; - } + polling_island *next_1 = NULL; + polling_island *next_2 = NULL; + + /* The algorithm is simple: + - Go to the last polling islands in the linked lists *pi_1 and *pi_2 (and + keep updating pi_1 and pi_2) + - Then obtain locks on the islands by following a lock order rule of + locking polling_island with lower address first + Special case: Before obtaining the locks, check if pi_1 and pi_2 are + pointing to the same island. If that is the case, we can just call + polling_island_lock() + - After obtaining both the locks, double check that the polling islands + are still the last polling islands in their respective linked lists + (this is because there might have been polling island merges before + we got the lock) + - If the polling islands are the last islands, we are done. If not, + release the locks and continue the process from the first step */ + while (true) { + next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to); + while (next_1 != NULL) { + pi_1 = next_1; + next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to); + } - GPR_SWAP(polling_island *, pi_1, pi_2); - num_swaps++; + next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to); + while (next_2 != NULL) { + pi_2 = next_2; + next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to); } - /* The following assertions are true at this point: - - pi_1 != pi_2 - - pi_1 < pi_2 (address of pi_1 is less than that of pi_2) - - pi_1 MAYBE locked - - pi_2 is NOT locked */ + if (pi_1 == pi_2) { + pi_1 = pi_2 = polling_island_lock(pi_1); + break; + } - /* Lock pi_1 (if pi_1 is pointing to the terminal node in the list) */ - if (!pi_1_locked) { + if (pi_1 < pi_2) { + gpr_mu_lock(&pi_1->mu); + gpr_mu_lock(&pi_2->mu); + } else { + gpr_mu_lock(&pi_2->mu); gpr_mu_lock(&pi_1->mu); - pi_1_locked = true; - - /* If pi_1 is not terminal node (i.e pi_1->merged_to != NULL), we are not - done locking this polling_island yet. Release the lock on this node and - advance pi_1 to the next node in the list; and go to the beginning of - the loop (we can't proceed to locking pi_2 unless we locked pi_1 first) - */ - if (pi_1->merged_to != NULL) { - temp = pi_1->merged_to; - polling_island_unref_and_unlock(pi_1, 1); - pi_1 = temp; - pi_1_locked = false; - - continue; - } } - /* The following assertions are true at this point: - - pi_1 is locked - - pi_2 is unlocked - - pi_1 != pi_2 */ - - gpr_mu_lock(&pi_2->mu); - pi_2_locked = true; - - /* If pi_2 is not terminal node, we are not done locking this polling_island - yet. Release the lock and update pi_2 to the next node in the list */ - if (pi_2->merged_to != NULL) { - temp = pi_2->merged_to; - polling_island_unref_and_unlock(pi_2, 1); - pi_2 = temp; - pi_2_locked = false; + next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to); + next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to); + if (next_1 == NULL && next_2 == NULL) { + break; } - } - /* At this point, either pi_1 == pi_2 AND/OR we got both locks */ - if (pi_1 == pi_2) { - /* We may or may not have gotten the lock. If we didn't, walk the rest of - the polling_island list and get the lock */ - GPR_ASSERT(pi_1_locked || (!pi_1_locked && !pi_2_locked)); - if (!pi_1_locked) { - pi_1 = pi_2 = polling_island_update_and_lock(pi_1, 2, 0); - } - } else { - GPR_ASSERT(pi_1_locked && pi_2_locked); - /* If we swapped pi_1 and pi_2 odd number of times, do one more swap so that - pi_1 and pi_2 point to the same polling_island lists they started off - with at the beginning of this function (i.e *p and *q respectively) */ - if (num_swaps % 2 > 0) { - GPR_SWAP(polling_island *, pi_1, pi_2); - } + gpr_mu_unlock(&pi_1->mu); + gpr_mu_unlock(&pi_2->mu); } *p = pi_1; @@ -546,7 +611,7 @@ void polling_island_pair_update_and_lock(polling_island **p, polling_island *polling_island_merge(polling_island *p, polling_island *q) { /* Get locks on both the polling islands */ - polling_island_pair_update_and_lock(&p, &q); + polling_island_lock_pair(&p, &q); if (p == q) { /* Nothing needs to be done here */ @@ -568,15 +633,14 @@ polling_island *polling_island_merge(polling_island *p, polling_island *q) { /* Wakeup all the pollers (if any) on p so that they can pickup this change */ polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd); - p->merged_to = q; + /* Add the 'merged_to' link from p --> q */ + gpr_atm_rel_store(&p->merged_to, q); + PI_ADD_REF(q, "pi_merge"); /* To account for the new incoming ref from p */ - /* - The merged polling island (i.e q) inherits all the ref counts of the - island merging with it (i.e p) - - The island p will lose a ref count */ - q->ref_cnt += p->ref_cnt; - polling_island_unref_and_unlock(p, 1); /* Decrement refcount */ - polling_island_unref_and_unlock(q, 0); /* Just Unlock. Don't decrement ref */ + gpr_mu_unlock(&p->mu); + gpr_mu_unlock(&q->mu); + /* Return the merged polling island */ return q; } @@ -667,6 +731,7 @@ static void unref_by(grpc_fd *fd, int n) { fd->freelist_next = fd_freelist; fd_freelist = fd; grpc_iomgr_unregister_object(&fd->iomgr_object); + gpr_mu_unlock(&fd_freelist_mu); } else { GPR_ASSERT(old > n); @@ -785,16 +850,20 @@ static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, REF_BY(fd, 1, reason); /* Remove the fd from the polling island: - - Update the fd->polling_island to point to the latest polling island - - Remove the fd from the polling island. - - Remove a ref to the polling island and set fd->polling_island to NULL */ + - Get a lock on the latest polling island (i.e the last island in the + linked list pointed by fd->polling_island). This is the island that + would actually contain the fd + - Remove the fd from the latest polling island + - Unlock the latest polling island + - Set fd->polling_island to NULL (but remove the ref on the polling island + before doing this.) */ gpr_mu_lock(&fd->pi_mu); if (fd->polling_island != NULL) { - fd->polling_island = - polling_island_update_and_lock(fd->polling_island, 1, 0); - polling_island_remove_fd_locked(fd->polling_island, fd, is_fd_closed); + polling_island *pi_latest = polling_island_lock(fd->polling_island); + polling_island_remove_fd_locked(pi_latest, fd, is_fd_closed); + gpr_mu_unlock(&pi_latest->mu); - polling_island_unref_and_unlock(fd->polling_island, 1); + PI_UNREF(fd->polling_island, "fd_orphan"); fd->polling_island = NULL; } gpr_mu_unlock(&fd->pi_mu); @@ -1050,17 +1119,13 @@ static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { gpr_mu_unlock(&fd->mu); } -/* Release the reference to pollset->polling_island and set it to NULL. - pollset->mu must be held */ -static void pollset_release_polling_island_locked(grpc_pollset *pollset) { - gpr_mu_lock(&pollset->pi_mu); - if (pollset->polling_island) { - pollset->polling_island = - polling_island_update_and_lock(pollset->polling_island, 1, 0); - polling_island_unref_and_unlock(pollset->polling_island, 1); - pollset->polling_island = NULL; +static void pollset_release_polling_island(grpc_pollset *ps, char *reason) { + gpr_mu_lock(&ps->pi_mu); + if (ps->polling_island != NULL) { + PI_UNREF(ps->polling_island, reason); } - gpr_mu_unlock(&pollset->pi_mu); + ps->polling_island = NULL; + gpr_mu_unlock(&ps->pi_mu); } static void finish_shutdown_locked(grpc_exec_ctx *exec_ctx, @@ -1069,8 +1134,9 @@ static void finish_shutdown_locked(grpc_exec_ctx *exec_ctx, GPR_ASSERT(!pollset_has_workers(pollset)); pollset->finish_shutdown_called = true; - pollset_release_polling_island_locked(pollset); + /* Release the ref and set pollset->polling_island to NULL */ + pollset_release_polling_island(pollset, "ps_shutdown"); grpc_exec_ctx_enqueue(exec_ctx, pollset->shutdown_done, true, NULL); } @@ -1110,7 +1176,7 @@ static void pollset_reset(grpc_pollset *pollset) { pollset->finish_shutdown_called = false; pollset->kicked_without_pollers = false; pollset->shutdown_done = NULL; - pollset_release_polling_island_locked(pollset); + pollset_release_polling_island(pollset, "ps_reset"); } #define GRPC_EPOLL_MAX_EVENTS 1000 @@ -1124,28 +1190,37 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, GPR_TIMER_BEGIN("pollset_work_and_unlock", 0); /* We need to get the epoll_fd to wait on. The epoll_fd is in inside the - polling island pointed by pollset->polling_island. + latest polling island pointed by pollset->polling_island. Acquire the following locks: - pollset->mu (which we already have) - pollset->pi_mu - - pollset->polling_island->mu (call polling_island_update_and_lock())*/ + - pollset->polling_island lock */ gpr_mu_lock(&pollset->pi_mu); - pi = pollset->polling_island; - if (pi == NULL) { - pi = polling_island_create(NULL, 1); + if (pollset->polling_island == NULL) { + pollset->polling_island = polling_island_create(NULL); + PI_ADD_REF(pollset->polling_island, "ps"); } - /* In addition to locking the polling island, add a ref so that the island - does not get destroyed (which means the epoll_fd won't be closed) while - we are are doing an epoll_wait() on the epoll_fd */ - pi = polling_island_update_and_lock(pi, 1, 1); + pi = polling_island_lock(pollset->polling_island); epoll_fd = pi->epoll_fd; - /* Update the pollset->polling_island */ - pollset->polling_island = pi; + /* Update the pollset->polling_island since the island being pointed by + pollset->polling_island may not be the latest (i.e pi) */ + if (pollset->polling_island != pi) { + /* Always do PI_ADD_REF before PI_UNREF because PI_UNREF may cause the + polling island to be deleted */ + PI_ADD_REF(pi, "ps"); + PI_UNREF(pollset->polling_island, "ps"); + pollset->polling_island = pi; + } + + /* Add an extra ref so that the island does not get destroyed (which means + the epoll_fd won't be closed) while we are are doing an epoll_wait() on the + epoll_fd */ + PI_ADD_REF(pi, "ps_work"); - polling_island_unref_and_unlock(pollset->polling_island, 0); /* Keep the ref*/ + gpr_mu_unlock(&pi->mu); gpr_mu_unlock(&pollset->pi_mu); gpr_mu_unlock(&pollset->mu); @@ -1193,14 +1268,12 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, GPR_ASSERT(pi != NULL); - /* Before leaving, release the extra ref we added to the polling island */ - /* It is important to note that at this point 'pi' may not be the same as - * pollset->polling_island. This is because pollset->polling_island pointer - * gets updated whenever the underlying polling island is merged with another - * island and while we are doing epoll_wait() above, the polling island may - * have been merged */ - pi = polling_island_update_and_lock(pi, 1, 0); /* No new ref added */ - polling_island_unref_and_unlock(pi, 1); + /* Before leaving, release the extra ref we added to the polling island. It + is important to use "pi" here (i.e our old copy of pollset->polling_island + that we got before releasing the polling island lock). This is because + pollset->polling_island pointer might get udpated in other parts of the + code when there is an island merge while we are doing epoll_wait() above */ + PI_UNREF(pi, "ps_work"); GPR_TIMER_END("pollset_work_and_unlock", 0); } @@ -1297,20 +1370,34 @@ static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, if (fd->polling_island == pollset->polling_island) { pi_new = fd->polling_island; if (pi_new == NULL) { - pi_new = polling_island_create(fd, 2); + pi_new = polling_island_create(fd); } } else if (fd->polling_island == NULL) { - pi_new = polling_island_update_and_lock(pollset->polling_island, 1, 1); - polling_island_add_fds_locked(pollset->polling_island, &fd, 1, true); + pi_new = polling_island_lock(pollset->polling_island); + polling_island_add_fds_locked(pi_new, &fd, 1, true); gpr_mu_unlock(&pi_new->mu); } else if (pollset->polling_island == NULL) { - pi_new = polling_island_update_and_lock(fd->polling_island, 1, 1); + pi_new = polling_island_lock(fd->polling_island); gpr_mu_unlock(&pi_new->mu); } else { pi_new = polling_island_merge(fd->polling_island, pollset->polling_island); } - fd->polling_island = pollset->polling_island = pi_new; + if (fd->polling_island != pi_new) { + PI_ADD_REF(pi_new, "fd"); + if (fd->polling_island != NULL) { + PI_UNREF(fd->polling_island, "fd"); + } + fd->polling_island = pi_new; + } + + if (pollset->polling_island != pi_new) { + PI_ADD_REF(pi_new, "ps"); + if (pollset->polling_island != NULL) { + PI_UNREF(pollset->polling_island, "ps"); + } + pollset->polling_island = pi_new; + } gpr_mu_unlock(&fd->pi_mu); gpr_mu_unlock(&pollset->pi_mu); @@ -1481,28 +1568,19 @@ void *grpc_pollset_get_polling_island(grpc_pollset *ps) { return pi; } -static polling_island *get_polling_island(polling_island *p) { - if (p == NULL) { - return NULL; - } +bool grpc_are_polling_islands_equal(void *p, void *q) { + polling_island *p1 = p; + polling_island *p2 = q; - polling_island *next; - gpr_mu_lock(&p->mu); - while (p->merged_to != NULL) { - next = p->merged_to; - gpr_mu_unlock(&p->mu); - p = next; - gpr_mu_lock(&p->mu); + polling_island_lock_pair(&p1, &p2); + if (p1 == p2) { + gpr_mu_unlock(&p1->mu); + } else { + gpr_mu_unlock(&p1->mu); + gpr_mu_unlock(&p2->mu); } - gpr_mu_unlock(&p->mu); - - return p; -} -bool grpc_are_polling_islands_equal(void *p, void *q) { - p = get_polling_island(p); - q = get_polling_island(q); - return p == q; + return p1 == p2; } /******************************************************************************* -- cgit v1.2.3 From cddf697ab44a7bab1821915e1e3f6a0f08ca1706 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Tue, 21 Jun 2016 08:27:07 -0700 Subject: Fix refcounting tsan failures and grab pollset lock in the function pollset_add_fd --- src/core/lib/iomgr/ev_epoll_linux.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index 72288889c0..7cc69c876d 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -291,11 +291,11 @@ void pi_unref_dbg(polling_island *pi, int ref_cnt, char *reason, char *file, #endif long pi_add_ref(polling_island *pi, int ref_cnt) { - return gpr_atm_no_barrier_fetch_add(&pi->ref_count, ref_cnt); + return gpr_atm_full_fetch_add(&pi->ref_count, ref_cnt); } long pi_unref(polling_island *pi, int ref_cnt) { - long old_cnt = gpr_atm_no_barrier_fetch_add(&pi->ref_count, -ref_cnt); + long old_cnt = gpr_atm_full_fetch_add(&pi->ref_count, -ref_cnt); /* If ref count went to zero, delete the polling island. Note that this need not be done under a lock. Once the ref count goes to zero, we are @@ -311,6 +311,8 @@ long pi_unref(polling_island *pi, int ref_cnt) { if (next != NULL) { PI_UNREF(next, "pi_delete"); /* Recursive call */ } + } else { + GPR_ASSERT(old_cnt > ref_cnt); } return old_cnt; @@ -445,8 +447,8 @@ static polling_island *polling_island_create(grpc_fd *initial_fd) { pi->fds = NULL; } - gpr_atm_no_barrier_store(&pi->ref_count, 0); - gpr_atm_no_barrier_store(&pi->merged_to, NULL); + gpr_atm_rel_store(&pi->ref_count, 0); + gpr_atm_rel_store(&pi->merged_to, NULL); pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC); @@ -1347,7 +1349,7 @@ static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_fd *fd) { - /* TODO sreek - Double check if we need to get a pollset->mu lock here */ + gpr_mu_lock(&pollset->mu); gpr_mu_lock(&pollset->pi_mu); gpr_mu_lock(&fd->pi_mu); @@ -1401,6 +1403,7 @@ static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, gpr_mu_unlock(&fd->pi_mu); gpr_mu_unlock(&pollset->pi_mu); + gpr_mu_unlock(&pollset->mu); } /******************************************************************************* -- cgit v1.2.3 From 3131c269c14f97294ebf8b6e3d1a235d4acf3317 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Tue, 21 Jun 2016 17:28:28 -0700 Subject: Integrate with unified error reporting --- src/core/lib/iomgr/ev_epoll_linux.c | 116 ++++++++++++++++++++++++++-------- test/core/iomgr/ev_epoll_linux_test.c | 3 +- 2 files changed, 90 insertions(+), 29 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index 7cc69c876d..d625b096a1 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -646,11 +646,18 @@ polling_island *polling_island_merge(polling_island *p, polling_island *q) { return q; } -static void polling_island_global_init() { +static grpc_error *polling_island_global_init() { + grpc_error *error = GRPC_ERROR_NONE; + gpr_mu_init(&g_pi_freelist_mu); g_pi_freelist = NULL; - grpc_wakeup_fd_init(&polling_island_wakeup_fd); - grpc_wakeup_fd_wakeup(&polling_island_wakeup_fd); + + error = grpc_wakeup_fd_init(&polling_island_wakeup_fd); + if (error == GRPC_ERROR_NONE) { + error = grpc_wakeup_fd_wakeup(&polling_island_wakeup_fd); + } + + return error; } static void polling_island_global_shutdown() { @@ -870,21 +877,33 @@ static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, } gpr_mu_unlock(&fd->pi_mu); - grpc_exec_ctx_enqueue(exec_ctx, fd->on_done_closure, true, NULL); + grpc_exec_ctx_sched(exec_ctx, fd->on_done_closure, GRPC_ERROR_NONE, NULL); gpr_mu_unlock(&fd->mu); UNREF_BY(fd, 2, reason); /* Drop the reference */ } +static grpc_error *fd_shutdown_error(bool shutdown) { + if (!shutdown) { + return GRPC_ERROR_NONE; + } else { + return GRPC_ERROR_CREATE("FD shutdown"); + } +} + static void notify_on_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_closure **st, grpc_closure *closure) { - if (*st == CLOSURE_NOT_READY) { + if (fd->shutdown) { + grpc_exec_ctx_sched(exec_ctx, closure, GRPC_ERROR_CREATE("FD shutdown"), + NULL); + } else if (*st == CLOSURE_NOT_READY) { /* not ready ==> switch to a waiting state by setting the closure */ *st = closure; } else if (*st == CLOSURE_READY) { /* already ready ==> queue the closure to run immediately */ *st = CLOSURE_NOT_READY; - grpc_exec_ctx_enqueue(exec_ctx, closure, !fd->shutdown, NULL); + grpc_exec_ctx_sched(exec_ctx, closure, fd_shutdown_error(fd->shutdown), + NULL); } else { /* upcallptr was set to a different closure. This is an error! */ gpr_log(GPR_ERROR, @@ -906,7 +925,7 @@ static int set_ready_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd, return 0; } else { /* waiting ==> queue closure */ - grpc_exec_ctx_enqueue(exec_ctx, *st, !fd->shutdown, NULL); + grpc_exec_ctx_sched(exec_ctx, *st, fd_shutdown_error(fd->shutdown), NULL); *st = CLOSURE_NOT_READY; return 1; } @@ -964,11 +983,11 @@ static void sig_handler(int sig_num) { static void poller_kick_init() { signal(grpc_wakeup_signal, sig_handler); } /* Global state management */ -static void pollset_global_init(void) { - grpc_wakeup_fd_init(&grpc_global_wakeup_fd); +static grpc_error *pollset_global_init(void) { gpr_tls_init(&g_current_thread_pollset); gpr_tls_init(&g_current_thread_worker); poller_kick_init(); + return grpc_wakeup_fd_init(&grpc_global_wakeup_fd); } static void pollset_global_shutdown(void) { @@ -977,8 +996,13 @@ static void pollset_global_shutdown(void) { gpr_tls_destroy(&g_current_thread_worker); } -static void pollset_worker_kick(grpc_pollset_worker *worker) { - pthread_kill(worker->pt_id, grpc_wakeup_signal); +static grpc_error *pollset_worker_kick(grpc_pollset_worker *worker) { + grpc_error *err = GRPC_ERROR_NONE; + int err_num = pthread_kill(worker->pt_id, grpc_wakeup_signal); + if (err_num != 0) { + err = GRPC_OS_ERROR(err_num, "pthread_kill"); + } + return err; } /* Return 1 if the pollset has active threads in pollset_work (pollset must @@ -1014,10 +1038,19 @@ static void push_front_worker(grpc_pollset *p, grpc_pollset_worker *worker) { worker->prev->next = worker->next->prev = worker; } +static void kick_append_error(grpc_error **composite, grpc_error *error) { + if (error == GRPC_ERROR_NONE) return; + if (*composite == GRPC_ERROR_NONE) { + *composite = GRPC_ERROR_CREATE("Kick Failure"); + } + *composite = grpc_error_add_child(*composite, error); +} + /* p->mu must be held before calling this function */ -static void pollset_kick(grpc_pollset *p, - grpc_pollset_worker *specific_worker) { +static grpc_error *pollset_kick(grpc_pollset *p, + grpc_pollset_worker *specific_worker) { GPR_TIMER_BEGIN("pollset_kick", 0); + grpc_error *error = GRPC_ERROR_NONE; grpc_pollset_worker *worker = specific_worker; if (worker != NULL) { @@ -1027,7 +1060,7 @@ static void pollset_kick(grpc_pollset *p, for (worker = p->root_worker.next; worker != &p->root_worker; worker = worker->next) { if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) { - pollset_worker_kick(worker); + kick_append_error(&error, pollset_worker_kick(worker)); } } } else { @@ -1037,7 +1070,7 @@ static void pollset_kick(grpc_pollset *p, } else { GPR_TIMER_MARK("kicked_specifically", 0); if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) { - pollset_worker_kick(worker); + kick_append_error(&error, pollset_worker_kick(worker)); } } } else if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)p) { @@ -1053,7 +1086,7 @@ static void pollset_kick(grpc_pollset *p, if (worker != NULL) { GPR_TIMER_MARK("finally_kick", 0); push_back_worker(p, worker); - pollset_worker_kick(worker); + kick_append_error(&error, pollset_worker_kick(worker)); } else { GPR_TIMER_MARK("kicked_no_pollers", 0); p->kicked_without_pollers = true; @@ -1061,9 +1094,13 @@ static void pollset_kick(grpc_pollset *p, } GPR_TIMER_END("pollset_kick", 0); + GRPC_LOG_IF_ERROR("pollset_kick", GRPC_ERROR_REF(error)); + return error; } -static void kick_poller(void) { grpc_wakeup_fd_wakeup(&grpc_global_wakeup_fd); } +static grpc_error *kick_poller(void) { + return grpc_wakeup_fd_wakeup(&grpc_global_wakeup_fd); +} static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { gpr_mu_init(&pollset->mu); @@ -1139,7 +1176,7 @@ static void finish_shutdown_locked(grpc_exec_ctx *exec_ctx, /* Release the ref and set pollset->polling_island to NULL */ pollset_release_polling_island(pollset, "ps_shutdown"); - grpc_exec_ctx_enqueue(exec_ctx, pollset->shutdown_done, true, NULL); + grpc_exec_ctx_sched(exec_ctx, pollset->shutdown_done, GRPC_ERROR_NONE, NULL); } /* pollset->mu lock must be held by the caller before calling this */ @@ -1181,14 +1218,23 @@ static void pollset_reset(grpc_pollset *pollset) { pollset_release_polling_island(pollset, "ps_reset"); } +static void work_combine_error(grpc_error **composite, grpc_error *error) { + if (error == GRPC_ERROR_NONE) return; + if (*composite == GRPC_ERROR_NONE) { + *composite = GRPC_ERROR_CREATE("pollset_work"); + } + *composite = grpc_error_add_child(*composite, error); +} + #define GRPC_EPOLL_MAX_EVENTS 1000 -static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, - grpc_pollset *pollset, int timeout_ms, - sigset_t *sig_mask) { +static grpc_error *pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, + grpc_pollset *pollset, + int timeout_ms, sigset_t *sig_mask) { struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS]; int epoll_fd = -1; int ep_rv; polling_island *pi = NULL; + grpc_error *error = GRPC_ERROR_NONE; GPR_TIMER_BEGIN("pollset_work_and_unlock", 0); /* We need to get the epoll_fd to wait on. The epoll_fd is in inside the @@ -1232,6 +1278,7 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, if (ep_rv < 0) { if (errno != EINTR) { gpr_log(GPR_ERROR, "epoll_pwait() failed: %s", strerror(errno)); + work_combine_error(&error, GRPC_OS_ERROR(errno, "epoll_pwait")); } else { /* We were interrupted. Save an interation by doing a zero timeout epoll_wait to see if there are any other events of interest */ @@ -1247,7 +1294,8 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, for (int i = 0; i < ep_rv; ++i) { void *data_ptr = ep_ev[i].data.ptr; if (data_ptr == &grpc_global_wakeup_fd) { - grpc_wakeup_fd_consume_wakeup(&grpc_global_wakeup_fd); + work_combine_error( + &error, grpc_wakeup_fd_consume_wakeup(&grpc_global_wakeup_fd)); } else if (data_ptr == &polling_island_wakeup_fd) { /* This means that our polling island is merged with a different island. We do not have to do anything here since the subsequent call @@ -1278,16 +1326,18 @@ static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, PI_UNREF(pi, "ps_work"); GPR_TIMER_END("pollset_work_and_unlock", 0); + return error; } /* pollset->mu lock must be held by the caller before calling this. The function pollset_work() may temporarily release the lock (pollset->mu) during the course of its execution but it will always re-acquire the lock and ensure that it is held by the time the function returns */ -static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_pollset_worker **worker_hdl, gpr_timespec now, - gpr_timespec deadline) { +static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_pollset_worker **worker_hdl, + gpr_timespec now, gpr_timespec deadline) { GPR_TIMER_BEGIN("pollset_work", 0); + grpc_error *error = GRPC_ERROR_NONE; int timeout_ms = poll_deadline_to_millis_timeout(deadline, now); sigset_t new_mask; @@ -1316,7 +1366,7 @@ static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, push_front_worker(pollset, &worker); - pollset_work_and_unlock(exec_ctx, pollset, timeout_ms, &orig_mask); + error = pollset_work_and_unlock(exec_ctx, pollset, timeout_ms, &orig_mask); grpc_exec_ctx_flush(exec_ctx); gpr_mu_lock(&pollset->mu); @@ -1345,6 +1395,8 @@ static void pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, gpr_tls_set(&g_current_thread_pollset, (intptr_t)0); gpr_tls_set(&g_current_thread_worker, (intptr_t)0); GPR_TIMER_END("pollset_work", 0); + GRPC_LOG_IF_ERROR("pollset_work", GRPC_ERROR_REF(error)); + return error; } static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, @@ -1659,8 +1711,16 @@ const grpc_event_engine_vtable *grpc_init_epoll_linux(void) { } fd_global_init(); - pollset_global_init(); - polling_island_global_init(); + + if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) { + return NULL; + } + + if (!GRPC_LOG_IF_ERROR("polling_island_global_init", + polling_island_global_init())) { + return NULL; + } + return &vtable; } diff --git a/test/core/iomgr/ev_epoll_linux_test.c b/test/core/iomgr/ev_epoll_linux_test.c index 51da15faa7..034f17fd58 100644 --- a/test/core/iomgr/ev_epoll_linux_test.c +++ b/test/core/iomgr/ev_epoll_linux_test.c @@ -88,7 +88,8 @@ static void test_pollset_init(test_pollset *pollsets, int num_pollsets) { } } -static void destroy_pollset(grpc_exec_ctx *exec_ctx, void *p, bool success) { +static void destroy_pollset(grpc_exec_ctx *exec_ctx, void *p, + grpc_error *error) { grpc_pollset_destroy(p); } -- cgit v1.2.3 From 0100b2f1c0b08800ba0f7f53fe9cb5fbec7881a7 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Tue, 21 Jun 2016 17:38:13 -0700 Subject: Make fd_shutdown idempotent --- src/core/lib/iomgr/ev_epoll_linux.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index d625b096a1..c077987c01 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -942,15 +942,19 @@ static grpc_pollset *fd_get_read_notifier_pollset(grpc_exec_ctx *exec_ctx, return notifier; } +/* Might be called multiple times */ static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { gpr_mu_lock(&fd->mu); - GPR_ASSERT(!fd->shutdown); - fd->shutdown = true; - - /* Flush any pending read and write closures. Since fd->shutdown is 'true' at - this point, the closures would be called with 'success = false' */ - set_ready_locked(exec_ctx, fd, &fd->read_closure); - set_ready_locked(exec_ctx, fd, &fd->write_closure); + /* Do the actual shutdown only once */ + if (!fd->shutdown) { + fd->shutdown = true; + + shutdown(fd->fd, SHUT_RDWR); + /* Flush any pending read and write closures. Since fd->shutdown is 'true' + at this point, the closures would be called with 'success = false' */ + set_ready_locked(exec_ctx, fd, &fd->read_closure); + set_ready_locked(exec_ctx, fd, &fd->write_closure); + } gpr_mu_unlock(&fd->mu); } -- cgit v1.2.3 From 24b6eae1fc71a4f5d18eb2e7c1cbca5b4e54a46f Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Tue, 21 Jun 2016 18:01:14 -0700 Subject: Add missing function fd_is_shutdown --- src/core/lib/iomgr/ev_epoll_linux.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index c077987c01..3a774a8876 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -942,6 +942,13 @@ static grpc_pollset *fd_get_read_notifier_pollset(grpc_exec_ctx *exec_ctx, return notifier; } +static bool fd_is_shutdown(grpc_fd *fd) { + gpr_mu_lock(&fd->mu); + const bool r = fd->shutdown; + gpr_mu_unlock(&fd->mu); + return r; +} + /* Might be called multiple times */ static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { gpr_mu_lock(&fd->mu); @@ -1659,6 +1666,7 @@ static const grpc_event_engine_vtable vtable = { .fd_wrapped_fd = fd_wrapped_fd, .fd_orphan = fd_orphan, .fd_shutdown = fd_shutdown, + .fd_is_shutdown = fd_is_shutdown, .fd_notify_on_read = fd_notify_on_read, .fd_notify_on_write = fd_notify_on_write, .fd_get_read_notifier_pollset = fd_get_read_notifier_pollset, -- cgit v1.2.3 From 229533b1e68c4a4b8a67148f7fe25543584131f6 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Tue, 21 Jun 2016 20:42:52 -0700 Subject: Remove pollset->pi_mu since it is redundant. Also do not get polling island lock in the fast-path --- src/core/lib/iomgr/ev_epoll_linux.c | 82 ++++++++++++++++++++----------------- src/core/lib/iomgr/ev_posix.c | 6 +-- 2 files changed, 46 insertions(+), 42 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index 3a774a8876..6464d3ba34 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -207,12 +207,7 @@ struct grpc_pollset { bool finish_shutdown_called; /* Is the 'finish_shutdown_locked()' called ? */ grpc_closure *shutdown_done; /* Called after after shutdown is complete */ - /* The polling island to which this pollset belongs to and the mutex - protecting the field */ - /* TODO: sreek: This lock might actually be adding more overhead to the - critical path (i.e pollset_work() function). Consider removing this lock - and just using the overall pollset lock */ - gpr_mu pi_mu; + /* The polling island to which this pollset belongs to */ struct polling_island *polling_island; }; @@ -488,31 +483,47 @@ static void polling_island_delete(polling_island *pi) { gpr_mu_unlock(&g_pi_freelist_mu); } +/* Attempts to gets the last polling island in the linked list (liked by the + * 'merged_to' field). Since this does not lock the polling island, there are no + * guarantees that the island returned is the last island */ +static polling_island *polling_island_maybe_get_latest(polling_island *pi) { + polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to); + while (next != NULL) { + pi = next; + next = (polling_island *)gpr_atm_acq_load(&pi->merged_to); + } + + return pi; +} + /* Gets the lock on the *latest* polling island i.e the last polling island in - the linked list (linked by 'merged_to' link). Call gpr_mu_unlock on the + the linked list (linked by the 'merged_to' field). Call gpr_mu_unlock on the returned polling island's mu. Usage: To lock/unlock polling island "pi", do the following: polling_island *pi_latest = polling_island_lock(pi); ... ... critical section .. ... - gpr_mu_unlock(&pi_latest->mu); //NOTE: use pi_latest->mu. NOT pi->mu */ -polling_island *polling_island_lock(polling_island *pi) { + gpr_mu_unlock(&pi_latest->mu); // NOTE: use pi_latest->mu. NOT pi->mu */ +static polling_island *polling_island_lock(polling_island *pi) { polling_island *next = NULL; + while (true) { next = (polling_island *)gpr_atm_acq_load(&pi->merged_to); if (next == NULL) { - /* pi is the last node in the linked list. Get the lock and check again - (under the pi->mu lock) that pi is still the last node (because a merge - may have happend after the (next == NULL) check above and before - getting the pi->mu lock. - If pi is the last node, we are done. If not, unlock and continue - traversing the list */ + /* Looks like 'pi' is the last node in the linked list but unless we check + this by holding the pi->mu lock, we cannot be sure (i.e without the + pi->mu lock, we don't prevent island merges). + To be absolutely sure, check once more by holding the pi->mu lock */ gpr_mu_lock(&pi->mu); next = (polling_island *)gpr_atm_acq_load(&pi->merged_to); if (next == NULL) { + /* pi is infact the last node and we have the pi->mu lock. we're done */ break; } + + /* pi->merged_to is not NULL i.e pi isn't the last node anymore. pi->mu + * isn't the lock we are interested in. Continue traversing the list */ gpr_mu_unlock(&pi->mu); } @@ -526,11 +537,11 @@ polling_island *polling_island_lock(polling_island *pi) { This function is needed because calling the following block of code to obtain locks on polling islands (*p and *q) is prone to deadlocks. { - polling_island_lock(*p); - polling_island_lock(*q); + polling_island_lock(*p, true); + polling_island_lock(*q, true); } - Usage/exmaple: + Usage/example: polling_island *p1; polling_island *p2; .. @@ -551,7 +562,7 @@ polling_island *polling_island_lock(polling_island *pi) { } */ -void polling_island_lock_pair(polling_island **p, polling_island **q) { +static void polling_island_lock_pair(polling_island **p, polling_island **q) { polling_island *pi_1 = *p; polling_island *pi_2 = *q; polling_island *next_1 = NULL; @@ -611,7 +622,8 @@ void polling_island_lock_pair(polling_island **p, polling_island **q) { *q = pi_2; } -polling_island *polling_island_merge(polling_island *p, polling_island *q) { +static polling_island *polling_island_merge(polling_island *p, + polling_island *q) { /* Get locks on both the polling islands */ polling_island_lock_pair(&p, &q); @@ -1124,7 +1136,6 @@ static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { pollset->finish_shutdown_called = false; pollset->shutdown_done = NULL; - gpr_mu_init(&pollset->pi_mu); pollset->polling_island = NULL; } @@ -1170,12 +1181,10 @@ static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { } static void pollset_release_polling_island(grpc_pollset *ps, char *reason) { - gpr_mu_lock(&ps->pi_mu); if (ps->polling_island != NULL) { PI_UNREF(ps->polling_island, reason); } ps->polling_island = NULL; - gpr_mu_unlock(&ps->pi_mu); } static void finish_shutdown_locked(grpc_exec_ctx *exec_ctx, @@ -1215,7 +1224,6 @@ static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, * here */ static void pollset_destroy(grpc_pollset *pollset) { GPR_ASSERT(!pollset_has_workers(pollset)); - gpr_mu_destroy(&pollset->pi_mu); gpr_mu_destroy(&pollset->mu); } @@ -1250,22 +1258,25 @@ static grpc_error *pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, /* We need to get the epoll_fd to wait on. The epoll_fd is in inside the latest polling island pointed by pollset->polling_island. - Acquire the following locks: - - pollset->mu (which we already have) - - pollset->pi_mu - - pollset->polling_island lock */ - gpr_mu_lock(&pollset->pi_mu); + + Since epoll_fd is immutable, we can read it without obtaining the polling + island lock. There is however a possibility that the polling island (from + which we got the epoll_fd) got merged with another island while we are + in this function. This is still okay because in such a case, we will wakeup + right-away from epoll_wait() and pick up the latest polling_island the next + this function (i.e pollset_work_and_unlock()) is called. + */ if (pollset->polling_island == NULL) { pollset->polling_island = polling_island_create(NULL); PI_ADD_REF(pollset->polling_island, "ps"); } - pi = polling_island_lock(pollset->polling_island); + pi = polling_island_maybe_get_latest(pollset->polling_island); epoll_fd = pi->epoll_fd; /* Update the pollset->polling_island since the island being pointed by - pollset->polling_island may not be the latest (i.e pi) */ + pollset->polling_island maybe older than the one pointed by pi) */ if (pollset->polling_island != pi) { /* Always do PI_ADD_REF before PI_UNREF because PI_UNREF may cause the polling island to be deleted */ @@ -1278,9 +1289,6 @@ static grpc_error *pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, the epoll_fd won't be closed) while we are are doing an epoll_wait() on the epoll_fd */ PI_ADD_REF(pi, "ps_work"); - - gpr_mu_unlock(&pi->mu); - gpr_mu_unlock(&pollset->pi_mu); gpr_mu_unlock(&pollset->mu); do { @@ -1413,7 +1421,6 @@ static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_fd *fd) { gpr_mu_lock(&pollset->mu); - gpr_mu_lock(&pollset->pi_mu); gpr_mu_lock(&fd->pi_mu); polling_island *pi_new = NULL; @@ -1465,7 +1472,6 @@ static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, } gpr_mu_unlock(&fd->pi_mu); - gpr_mu_unlock(&pollset->pi_mu); gpr_mu_unlock(&pollset->mu); } @@ -1627,9 +1633,9 @@ void *grpc_fd_get_polling_island(grpc_fd *fd) { void *grpc_pollset_get_polling_island(grpc_pollset *ps) { polling_island *pi; - gpr_mu_lock(&ps->pi_mu); + gpr_mu_lock(&ps->mu); pi = ps->polling_island; - gpr_mu_unlock(&ps->pi_mu); + gpr_mu_unlock(&ps->mu); return pi; } diff --git a/src/core/lib/iomgr/ev_posix.c b/src/core/lib/iomgr/ev_posix.c index 4cdd13bbdb..a3c1e9db9a 100644 --- a/src/core/lib/iomgr/ev_posix.c +++ b/src/core/lib/iomgr/ev_posix.c @@ -54,7 +54,7 @@ grpc_poll_function_type grpc_poll_function = poll; static const grpc_event_engine_vtable *g_event_engine; -static const char* g_poll_strategy_name = NULL; +static const char *g_poll_strategy_name = NULL; typedef const grpc_event_engine_vtable *(*event_engine_factory_fn)(void); @@ -111,9 +111,7 @@ static void try_engine(const char *engine) { } /* Call this only after calling grpc_event_engine_init() */ -const char *grpc_get_poll_strategy_name() { - return g_poll_strategy_name; -} +const char *grpc_get_poll_strategy_name() { return g_poll_strategy_name; } void grpc_event_engine_init(void) { char *s = gpr_getenv("GRPC_POLL_STRATEGY"); -- cgit v1.2.3 From 76a0795b73ad2632c435fc338bb49368d1d68d9f Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Wed, 22 Jun 2016 15:09:06 -0700 Subject: Fix build errors on some configurations --- src/core/lib/iomgr/ev_epoll_linux.c | 13 +++++++------ test/core/iomgr/ev_epoll_linux_test.c | 10 ++++++++-- 2 files changed, 15 insertions(+), 8 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index 6464d3ba34..88cbc58634 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -34,6 +34,7 @@ #include #include +/* This polling engine is only relevant on linux kernels supporting epoll() */ #ifdef GPR_LINUX_EPOLL #include "src/core/lib/iomgr/ev_epoll_linux.h" @@ -322,7 +323,7 @@ static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds, #ifdef GRPC_TSAN /* See the definition of g_epoll_sync for more context */ - gpr_atm_rel_store(&g_epoll_sync, 0); + gpr_atm_rel_store(&g_epoll_sync, (gpr_atm) 0); #endif /* defined(GRPC_TSAN) */ for (i = 0; i < fd_count; i++) { @@ -442,8 +443,8 @@ static polling_island *polling_island_create(grpc_fd *initial_fd) { pi->fds = NULL; } - gpr_atm_rel_store(&pi->ref_count, 0); - gpr_atm_rel_store(&pi->merged_to, NULL); + gpr_atm_rel_store(&pi->ref_count, (gpr_atm) 0); + gpr_atm_rel_store(&pi->merged_to, (gpr_atm) NULL); pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC); @@ -472,7 +473,7 @@ static polling_island *polling_island_create(grpc_fd *initial_fd) { static void polling_island_delete(polling_island *pi) { GPR_ASSERT(pi->fd_cnt == 0); - gpr_atm_rel_store(&pi->merged_to, NULL); + gpr_atm_rel_store(&pi->merged_to, (gpr_atm) NULL); close(pi->epoll_fd); pi->epoll_fd = -1; @@ -648,7 +649,7 @@ static polling_island *polling_island_merge(polling_island *p, polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd); /* Add the 'merged_to' link from p --> q */ - gpr_atm_rel_store(&p->merged_to, q); + gpr_atm_rel_store(&p->merged_to, (gpr_atm) q); PI_ADD_REF(q, "pi_merge"); /* To account for the new incoming ref from p */ gpr_mu_unlock(&p->mu); @@ -810,7 +811,7 @@ static grpc_fd *fd_create(int fd, const char *name) { holding a lock to it anyway. */ gpr_mu_lock(&new_fd->mu); - gpr_atm_rel_store(&new_fd->refst, 1); + gpr_atm_rel_store(&new_fd->refst, (gpr_atm) 1); new_fd->fd = fd; new_fd->shutdown = false; new_fd->orphaned = false; diff --git a/test/core/iomgr/ev_epoll_linux_test.c b/test/core/iomgr/ev_epoll_linux_test.c index 35eb679130..66a69f52cd 100644 --- a/test/core/iomgr/ev_epoll_linux_test.c +++ b/test/core/iomgr/ev_epoll_linux_test.c @@ -30,7 +30,10 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ +#include +/* This test only relevant on linux systems where epoll() is available */ +#ifdef GPR_LINUX_EPOLL #include "src/core/lib/iomgr/ev_epoll_linux.h" #include "src/core/lib/iomgr/ev_posix.h" @@ -128,8 +131,10 @@ static void test_add_fd_to_pollset() { int i; int r; - /* Create some dummy file descriptors (using pipe fds for this test. Could be - anything). Also NUM_FDS should be even for this test. */ + /* Create some dummy file descriptors. Currently using pipe file descriptors + * for this test but we could use any other type of file descriptors. Also, + * since pipe() used in this test creates two fds in each call, NUM_FDS should + * be an even number */ for (i = 0; i < NUM_FDS; i = i + 2) { r = pipe(fds + i); if (r != 0) { @@ -234,3 +239,4 @@ int main(int argc, char **argv) { grpc_iomgr_shutdown(); return 0; } +#endif /* defined(GPR_LINUX_EPOLL) */ -- cgit v1.2.3 From 0224dcc2dcda932a171776de325fa2e66c95478f Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Wed, 22 Jun 2016 18:04:00 -0700 Subject: clang format --- src/core/lib/iomgr/ev_epoll_linux.c | 12 ++++++------ src/core/lib/iomgr/ev_posix.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index 88cbc58634..b1e9ac8a63 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -323,7 +323,7 @@ static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds, #ifdef GRPC_TSAN /* See the definition of g_epoll_sync for more context */ - gpr_atm_rel_store(&g_epoll_sync, (gpr_atm) 0); + gpr_atm_rel_store(&g_epoll_sync, (gpr_atm)0); #endif /* defined(GRPC_TSAN) */ for (i = 0; i < fd_count; i++) { @@ -443,8 +443,8 @@ static polling_island *polling_island_create(grpc_fd *initial_fd) { pi->fds = NULL; } - gpr_atm_rel_store(&pi->ref_count, (gpr_atm) 0); - gpr_atm_rel_store(&pi->merged_to, (gpr_atm) NULL); + gpr_atm_rel_store(&pi->ref_count, (gpr_atm)0); + gpr_atm_rel_store(&pi->merged_to, (gpr_atm)NULL); pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC); @@ -473,7 +473,7 @@ static polling_island *polling_island_create(grpc_fd *initial_fd) { static void polling_island_delete(polling_island *pi) { GPR_ASSERT(pi->fd_cnt == 0); - gpr_atm_rel_store(&pi->merged_to, (gpr_atm) NULL); + gpr_atm_rel_store(&pi->merged_to, (gpr_atm)NULL); close(pi->epoll_fd); pi->epoll_fd = -1; @@ -649,7 +649,7 @@ static polling_island *polling_island_merge(polling_island *p, polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd); /* Add the 'merged_to' link from p --> q */ - gpr_atm_rel_store(&p->merged_to, (gpr_atm) q); + gpr_atm_rel_store(&p->merged_to, (gpr_atm)q); PI_ADD_REF(q, "pi_merge"); /* To account for the new incoming ref from p */ gpr_mu_unlock(&p->mu); @@ -811,7 +811,7 @@ static grpc_fd *fd_create(int fd, const char *name) { holding a lock to it anyway. */ gpr_mu_lock(&new_fd->mu); - gpr_atm_rel_store(&new_fd->refst, (gpr_atm) 1); + gpr_atm_rel_store(&new_fd->refst, (gpr_atm)1); new_fd->fd = fd; new_fd->shutdown = false; new_fd->orphaned = false; diff --git a/src/core/lib/iomgr/ev_posix.h b/src/core/lib/iomgr/ev_posix.h index 32260fe2ee..579c84ef70 100644 --- a/src/core/lib/iomgr/ev_posix.h +++ b/src/core/lib/iomgr/ev_posix.h @@ -100,7 +100,7 @@ void grpc_event_engine_init(void); void grpc_event_engine_shutdown(void); /* Return the name of the poll strategy */ -const char* grpc_get_poll_strategy_name(); +const char *grpc_get_poll_strategy_name(); /* Create a wrapped file descriptor. Requires fd is a non-blocking file descriptor. -- cgit v1.2.3 From 20d0a167beb287f61a7f33943fddfc34cae75860 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Thu, 23 Jun 2016 15:14:03 -0700 Subject: Better error handling and add polling_island_unlock_pair() helper --- src/core/lib/iomgr/ev_epoll_linux.c | 300 ++++++++++++++++++++++-------------- 1 file changed, 182 insertions(+), 118 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index b1e9ac8a63..a77044edc5 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -237,6 +237,19 @@ struct grpc_pollset_set { grpc_fd **fds; }; +/******************************************************************************* + * Common helpers + */ + +static void append_error(grpc_error **composite, grpc_error *error, + const char *desc) { + if (error == GRPC_ERROR_NONE) return; + if (*composite == GRPC_ERROR_NONE) { + *composite = GRPC_ERROR_CREATE(desc); + } + *composite = grpc_error_add_child(*composite, error); +} + /******************************************************************************* * Polling island Definitions */ @@ -316,10 +329,13 @@ long pi_unref(polling_island *pi, int ref_cnt) { /* The caller is expected to hold pi->mu lock before calling this function */ static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds, - size_t fd_count, bool add_fd_refs) { + size_t fd_count, bool add_fd_refs, + grpc_error **error) { int err; size_t i; struct epoll_event ev; + char *err_msg; + const char *err_desc = "polling_island_add_fds"; #ifdef GRPC_TSAN /* See the definition of g_epoll_sync for more context */ @@ -333,10 +349,12 @@ static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds, if (err < 0) { if (errno != EEXIST) { - /* TODO: sreek - We need a better way to bubble up this error instead of - just logging a message */ - gpr_log(GPR_ERROR, "epoll_ctl add for fd: %d failed with error: %s", - fds[i]->fd, strerror(errno)); + gpr_asprintf( + &err_msg, + "epoll_ctl (epoll_fd: %d) add fd: %d failed with error: %d (%s)", + pi->epoll_fd, fds[i]->fd, errno, strerror(errno)); + append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc); + gpr_free(err_msg); } continue; @@ -356,37 +374,47 @@ static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds, /* The caller is expected to hold pi->mu before calling this */ static void polling_island_add_wakeup_fd_locked(polling_island *pi, - grpc_wakeup_fd *wakeup_fd) { + grpc_wakeup_fd *wakeup_fd, + grpc_error **error) { struct epoll_event ev; int err; + char *err_msg; + const char *err_desc = "polling_island_add_wakeup_fd"; ev.events = (uint32_t)(EPOLLIN | EPOLLET); ev.data.ptr = wakeup_fd; err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), &ev); - if (err < 0) { - gpr_log(GPR_ERROR, - "Failed to add grpc_wake_up_fd (%d) to the epoll set (epoll_fd: %d)" - ". Error: %s", - GRPC_WAKEUP_FD_GET_READ_FD(&grpc_global_wakeup_fd), pi->epoll_fd, - strerror(errno)); + if (err < 0 && errno != EEXIST) { + gpr_asprintf(&err_msg, + "epoll_ctl (epoll_fd: %d) add wakeup fd: %d failed with " + "error: %d (%s)", + pi->epoll_fd, + GRPC_WAKEUP_FD_GET_READ_FD(&grpc_global_wakeup_fd), errno, + strerror(errno)); + append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc); + gpr_free(err_msg); } } /* The caller is expected to hold pi->mu lock before calling this function */ static void polling_island_remove_all_fds_locked(polling_island *pi, - bool remove_fd_refs) { + bool remove_fd_refs, + grpc_error **error) { int err; size_t i; + char *err_msg; + const char *err_desc = "polling_island_remove_fds"; for (i = 0; i < pi->fd_cnt; i++) { err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, pi->fds[i]->fd, NULL); if (err < 0 && errno != ENOENT) { - /* TODO: sreek - We need a better way to bubble up this error instead of - * just logging a message */ - gpr_log(GPR_ERROR, - "epoll_ctl deleting fds[%zu]: %d failed with error: %s", i, - pi->fds[i]->fd, strerror(errno)); + gpr_asprintf(&err_msg, + "epoll_ctl (epoll_fd: %d) delete fds[%zu]: %d failed with " + "error: %d (%s)", + pi->epoll_fd, i, pi->fds[i]->fd, errno, strerror(errno)); + append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc); + gpr_free(err_msg); } if (remove_fd_refs) { @@ -399,17 +427,24 @@ static void polling_island_remove_all_fds_locked(polling_island *pi, /* The caller is expected to hold pi->mu lock before calling this function */ static void polling_island_remove_fd_locked(polling_island *pi, grpc_fd *fd, - bool is_fd_closed) { + bool is_fd_closed, + grpc_error **error) { int err; size_t i; + char *err_msg; + const char *err_desc = "polling_island_remove_fd"; /* If fd is already closed, then it would have been automatically been removed from the epoll set */ if (!is_fd_closed) { err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, fd->fd, NULL); if (err < 0 && errno != ENOENT) { - gpr_log(GPR_ERROR, "epoll_ctl deleting fd: %d failed with error; %s", - fd->fd, strerror(errno)); + gpr_asprintf( + &err_msg, + "epoll_ctl (epoll_fd: %d) del fd: %d failed with error: %d (%s)", + pi->epoll_fd, fd->fd, errno, strerror(errno)); + append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc); + gpr_free(err_msg); } } @@ -422,8 +457,12 @@ static void polling_island_remove_fd_locked(polling_island *pi, grpc_fd *fd, } } -static polling_island *polling_island_create(grpc_fd *initial_fd) { +/* Might return NULL in case of an error */ +static polling_island *polling_island_create(grpc_fd *initial_fd, + grpc_error **error) { polling_island *pi = NULL; + char *err_msg; + const char *err_desc = "polling_island_create"; /* Try to get one from the polling island freelist */ gpr_mu_lock(&g_pi_freelist_mu); @@ -449,22 +488,22 @@ static polling_island *polling_island_create(grpc_fd *initial_fd) { pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC); if (pi->epoll_fd < 0) { - gpr_log(GPR_ERROR, "epoll_create1() failed with error: %s", - strerror(errno)); - } - GPR_ASSERT(pi->epoll_fd >= 0); - - polling_island_add_wakeup_fd_locked(pi, &grpc_global_wakeup_fd); - - pi->next_free = NULL; + gpr_asprintf(&err_msg, "epoll_create1 failed with error %d (%s)", errno, + strerror(errno)); + append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc); + gpr_free(err_msg); + } else { + polling_island_add_wakeup_fd_locked(pi, &grpc_global_wakeup_fd, error); + pi->next_free = NULL; - if (initial_fd != NULL) { - /* Lock the polling island here just in case we got this structure from the - freelist and the polling island lock was not released yet (by the code - that adds the polling island to the freelist) */ - gpr_mu_lock(&pi->mu); - polling_island_add_fds_locked(pi, &initial_fd, 1, true); - gpr_mu_unlock(&pi->mu); + if (initial_fd != NULL) { + /* Lock the polling island here just in case we got this structure from + the freelist and the polling island lock was not released yet (by the + code that adds the polling island to the freelist) */ + gpr_mu_lock(&pi->mu); + polling_island_add_fds_locked(pi, &initial_fd, 1, true, error); + gpr_mu_unlock(&pi->mu); + } } return pi; @@ -534,7 +573,9 @@ static polling_island *polling_island_lock(polling_island *pi) { return pi; } -/* Gets the lock on the *latest* polling islands pointed by *p and *q. +/* Gets the lock on the *latest* polling islands in the linked lists pointed by + *p and *q (and also updates *p and *q to point to the latest polling islands) + This function is needed because calling the following block of code to obtain locks on polling islands (*p and *q) is prone to deadlocks. { @@ -550,18 +591,8 @@ static polling_island *polling_island_lock(polling_island *pi) { .. .. Critical section with both p1 and p2 locked .. - // Release locks - // **IMPORTANT**: Make sure you check p1 == p2 AFTER the function - // polling_island_lock_pair() was called and if so, release the lock only - // once. Note: Even if p1 != p2 beforec calling polling_island_lock_pair(), - // they might be after the function returns: - if (p1 == p2) { - gpr_mu_unlock(&p1->mu) - } else { - gpr_mu_unlock(&p1->mu); - gpr_mu_unlock(&p2->mu); - } - + // Release locks: Always call polling_island_unlock_pair() to release locks + polling_island_unlock_pair(p1, p2); */ static void polling_island_lock_pair(polling_island **p, polling_island **q) { polling_island *pi_1 = *p; @@ -623,39 +654,46 @@ static void polling_island_lock_pair(polling_island **p, polling_island **q) { *q = pi_2; } -static polling_island *polling_island_merge(polling_island *p, - polling_island *q) { - /* Get locks on both the polling islands */ - polling_island_lock_pair(&p, &q); - +static void polling_island_unlock_pair(polling_island *p, polling_island *q) { if (p == q) { - /* Nothing needs to be done here */ gpr_mu_unlock(&p->mu); - return p; + } else { + gpr_mu_unlock(&p->mu); + gpr_mu_unlock(&q->mu); } +} - /* Make sure that p points to the polling island with fewer fds than q */ - if (p->fd_cnt > q->fd_cnt) { - GPR_SWAP(polling_island *, p, q); - } +static polling_island *polling_island_merge(polling_island *p, + polling_island *q, + grpc_error **error) { + /* Get locks on both the polling islands */ + polling_island_lock_pair(&p, &q); - /* "Merge" p with q i.e move all the fds from p (The one with fewer fds) to q - Note that the refcounts on the fds being moved will not change here. This - is why the last parameter in the following two functions is 'false') */ - polling_island_add_fds_locked(q, p->fds, p->fd_cnt, false); - polling_island_remove_all_fds_locked(p, false); + if (p != q) { + /* Make sure that p points to the polling island with fewer fds than q */ + if (p->fd_cnt > q->fd_cnt) { + GPR_SWAP(polling_island *, p, q); + } + + /* Merge p with q i.e move all the fds from p (The one with fewer fds) to q + Note that the refcounts on the fds being moved will not change here. + This is why the last param in the following two functions is 'false') */ + polling_island_add_fds_locked(q, p->fds, p->fd_cnt, false, error); + polling_island_remove_all_fds_locked(p, false, error); - /* Wakeup all the pollers (if any) on p so that they can pickup this change */ - polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd); + /* Wakeup all the pollers (if any) on p so that they pickup this change */ + polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd, error); - /* Add the 'merged_to' link from p --> q */ - gpr_atm_rel_store(&p->merged_to, (gpr_atm)q); - PI_ADD_REF(q, "pi_merge"); /* To account for the new incoming ref from p */ + /* Add the 'merged_to' link from p --> q */ + gpr_atm_rel_store(&p->merged_to, (gpr_atm)q); + PI_ADD_REF(q, "pi_merge"); /* To account for the new incoming ref from p */ + } + /* else if p == q, nothing needs to be done */ - gpr_mu_unlock(&p->mu); - gpr_mu_unlock(&q->mu); + polling_island_unlock_pair(p, q); - /* Return the merged polling island */ + /* Return the merged polling island (Note that no merge would have happened + if p == q which is ok) */ return q; } @@ -853,6 +891,8 @@ static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_closure *on_done, int *release_fd, const char *reason) { bool is_fd_closed = false; + grpc_error *error = GRPC_ERROR_NONE; + gpr_mu_lock(&fd->mu); fd->on_done_closure = on_done; @@ -882,7 +922,7 @@ static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, gpr_mu_lock(&fd->pi_mu); if (fd->polling_island != NULL) { polling_island *pi_latest = polling_island_lock(fd->polling_island); - polling_island_remove_fd_locked(pi_latest, fd, is_fd_closed); + polling_island_remove_fd_locked(pi_latest, fd, is_fd_closed, &error); gpr_mu_unlock(&pi_latest->mu); PI_UNREF(fd->polling_island, "fd_orphan"); @@ -890,10 +930,11 @@ static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, } gpr_mu_unlock(&fd->pi_mu); - grpc_exec_ctx_sched(exec_ctx, fd->on_done_closure, GRPC_ERROR_NONE, NULL); + grpc_exec_ctx_sched(exec_ctx, fd->on_done_closure, error, NULL); gpr_mu_unlock(&fd->mu); UNREF_BY(fd, 2, reason); /* Drop the reference */ + GRPC_LOG_IF_ERROR("fd_orphan", GRPC_ERROR_REF(error)); } static grpc_error *fd_shutdown_error(bool shutdown) { @@ -1062,19 +1103,12 @@ static void push_front_worker(grpc_pollset *p, grpc_pollset_worker *worker) { worker->prev->next = worker->next->prev = worker; } -static void kick_append_error(grpc_error **composite, grpc_error *error) { - if (error == GRPC_ERROR_NONE) return; - if (*composite == GRPC_ERROR_NONE) { - *composite = GRPC_ERROR_CREATE("Kick Failure"); - } - *composite = grpc_error_add_child(*composite, error); -} - /* p->mu must be held before calling this function */ static grpc_error *pollset_kick(grpc_pollset *p, grpc_pollset_worker *specific_worker) { GPR_TIMER_BEGIN("pollset_kick", 0); grpc_error *error = GRPC_ERROR_NONE; + const char *err_desc = "Kick Failure"; grpc_pollset_worker *worker = specific_worker; if (worker != NULL) { @@ -1084,7 +1118,7 @@ static grpc_error *pollset_kick(grpc_pollset *p, for (worker = p->root_worker.next; worker != &p->root_worker; worker = worker->next) { if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) { - kick_append_error(&error, pollset_worker_kick(worker)); + append_error(&error, pollset_worker_kick(worker), err_desc); } } } else { @@ -1094,7 +1128,7 @@ static grpc_error *pollset_kick(grpc_pollset *p, } else { GPR_TIMER_MARK("kicked_specifically", 0); if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) { - kick_append_error(&error, pollset_worker_kick(worker)); + append_error(&error, pollset_worker_kick(worker), err_desc); } } } else if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)p) { @@ -1110,7 +1144,7 @@ static grpc_error *pollset_kick(grpc_pollset *p, if (worker != NULL) { GPR_TIMER_MARK("finally_kick", 0); push_back_worker(p, worker); - kick_append_error(&error, pollset_worker_kick(worker)); + append_error(&error, pollset_worker_kick(worker), err_desc); } else { GPR_TIMER_MARK("kicked_no_pollers", 0); p->kicked_without_pollers = true; @@ -1238,23 +1272,17 @@ static void pollset_reset(grpc_pollset *pollset) { pollset_release_polling_island(pollset, "ps_reset"); } -static void work_combine_error(grpc_error **composite, grpc_error *error) { - if (error == GRPC_ERROR_NONE) return; - if (*composite == GRPC_ERROR_NONE) { - *composite = GRPC_ERROR_CREATE("pollset_work"); - } - *composite = grpc_error_add_child(*composite, error); -} - #define GRPC_EPOLL_MAX_EVENTS 1000 -static grpc_error *pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, - grpc_pollset *pollset, - int timeout_ms, sigset_t *sig_mask) { +/* Note: sig_mask contains the signal mask to use *during* epoll_wait() */ +static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, + grpc_pollset *pollset, int timeout_ms, + sigset_t *sig_mask, grpc_error **error) { struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS]; int epoll_fd = -1; int ep_rv; polling_island *pi = NULL; - grpc_error *error = GRPC_ERROR_NONE; + char *err_msg; + const char *err_desc = "pollset_work_and_unlock"; GPR_TIMER_BEGIN("pollset_work_and_unlock", 0); /* We need to get the epoll_fd to wait on. The epoll_fd is in inside the @@ -1265,11 +1293,15 @@ static grpc_error *pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, which we got the epoll_fd) got merged with another island while we are in this function. This is still okay because in such a case, we will wakeup right-away from epoll_wait() and pick up the latest polling_island the next - this function (i.e pollset_work_and_unlock()) is called. - */ + this function (i.e pollset_work_and_unlock()) is called */ if (pollset->polling_island == NULL) { - pollset->polling_island = polling_island_create(NULL); + pollset->polling_island = polling_island_create(NULL, error); + if (pollset->polling_island == NULL) { + GPR_TIMER_END("pollset_work_and_unlock", 0); + return; /* Fatal error. We cannot continue */ + } + PI_ADD_REF(pollset->polling_island, "ps"); } @@ -1297,8 +1329,10 @@ static grpc_error *pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, sig_mask); if (ep_rv < 0) { if (errno != EINTR) { - gpr_log(GPR_ERROR, "epoll_pwait() failed: %s", strerror(errno)); - work_combine_error(&error, GRPC_OS_ERROR(errno, "epoll_pwait")); + gpr_asprintf(&err_msg, + "epoll_wait() epoll fd: %d failed with error: %d (%s)", + epoll_fd, errno, strerror(errno)); + append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc); } else { /* We were interrupted. Save an interation by doing a zero timeout epoll_wait to see if there are any other events of interest */ @@ -1314,8 +1348,9 @@ static grpc_error *pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, for (int i = 0; i < ep_rv; ++i) { void *data_ptr = ep_ev[i].data.ptr; if (data_ptr == &grpc_global_wakeup_fd) { - work_combine_error( - &error, grpc_wakeup_fd_consume_wakeup(&grpc_global_wakeup_fd)); + append_error(error, + grpc_wakeup_fd_consume_wakeup(&grpc_global_wakeup_fd), + err_desc); } else if (data_ptr == &polling_island_wakeup_fd) { /* This means that our polling island is merged with a different island. We do not have to do anything here since the subsequent call @@ -1346,7 +1381,6 @@ static grpc_error *pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, PI_UNREF(pi, "ps_work"); GPR_TIMER_END("pollset_work_and_unlock", 0); - return error; } /* pollset->mu lock must be held by the caller before calling this. @@ -1368,6 +1402,7 @@ static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, worker.pt_id = pthread_self(); *worker_hdl = &worker; + gpr_tls_set(&g_current_thread_pollset, (intptr_t)pollset); gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker); @@ -1379,14 +1414,37 @@ static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0); pollset->kicked_without_pollers = 0; } else if (!pollset->shutting_down) { + /* We use the posix-signal with number 'grpc_wakeup_signal' for waking up + (i.e 'kicking') a worker in the pollset. + A 'kick' is a way to inform that worker that there is some pending work + that needs immediate attention (like an event on the completion queue, + or a polling island merge that results in a new epoll-fd to wait on) and + that the worker should not spend time waiting in epoll_pwait(). + + A kick can come at anytime (i.e before/during or after the worker calls + epoll_pwait()) but in all cases we have to make sure that when a worker + gets a kick, it does not spend time in epoll_pwait(). In other words, one + kick should result in skipping/exiting of one epoll_pwait(); + + To accomplish this, we mask 'grpc_wakeup_signal' on this worker at all + times *except* when it is in epoll_pwait(). This way, the worker never + misses acting on a kick */ + sigemptyset(&new_mask); sigaddset(&new_mask, grpc_wakeup_signal); pthread_sigmask(SIG_BLOCK, &new_mask, &orig_mask); sigdelset(&orig_mask, grpc_wakeup_signal); + /* new_mask: The new thread mask which blocks 'grpc_wakeup_signal'. This is + the mask used at all times *except during epoll_wait()*" + orig_mask: The thread mask which allows 'grpc_wakeup_signal' and this is + the mask to use *during epoll_wait()* + + The new_mask is set on the worker before it is added to the pollset (i.e + before it can be kicked) */ - push_front_worker(pollset, &worker); + push_front_worker(pollset, &worker); /* Add worker to pollset */ - error = pollset_work_and_unlock(exec_ctx, pollset, timeout_ms, &orig_mask); + pollset_work_and_unlock(exec_ctx, pollset, timeout_ms, &orig_mask, &error); grpc_exec_ctx_flush(exec_ctx); gpr_mu_lock(&pollset->mu); @@ -1412,15 +1470,20 @@ static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, } *worker_hdl = NULL; + gpr_tls_set(&g_current_thread_pollset, (intptr_t)0); gpr_tls_set(&g_current_thread_worker, (intptr_t)0); + GPR_TIMER_END("pollset_work", 0); + GRPC_LOG_IF_ERROR("pollset_work", GRPC_ERROR_REF(error)); return error; } static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_fd *fd) { + grpc_error *error = GRPC_ERROR_NONE; + gpr_mu_lock(&pollset->mu); gpr_mu_lock(&fd->pi_mu); @@ -1443,19 +1506,23 @@ static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, if (fd->polling_island == pollset->polling_island) { pi_new = fd->polling_island; if (pi_new == NULL) { - pi_new = polling_island_create(fd); + pi_new = polling_island_create(fd, &error); } } else if (fd->polling_island == NULL) { pi_new = polling_island_lock(pollset->polling_island); - polling_island_add_fds_locked(pi_new, &fd, 1, true); + polling_island_add_fds_locked(pi_new, &fd, 1, true, &error); gpr_mu_unlock(&pi_new->mu); } else if (pollset->polling_island == NULL) { pi_new = polling_island_lock(fd->polling_island); gpr_mu_unlock(&pi_new->mu); } else { - pi_new = polling_island_merge(fd->polling_island, pollset->polling_island); + pi_new = polling_island_merge(fd->polling_island, pollset->polling_island, + &error); } + /* At this point, pi_new is the polling island that both fd->polling_island + and pollset->polling_island must be pointing to */ + if (fd->polling_island != pi_new) { PI_ADD_REF(pi_new, "fd"); if (fd->polling_island != NULL) { @@ -1645,13 +1712,10 @@ bool grpc_are_polling_islands_equal(void *p, void *q) { polling_island *p1 = p; polling_island *p2 = q; + /* Note: polling_island_lock_pair() may change p1 and p2 to point to the + latest polling islands in their respective linked lists */ polling_island_lock_pair(&p1, &p2); - if (p1 == p2) { - gpr_mu_unlock(&p1->mu); - } else { - gpr_mu_unlock(&p1->mu); - gpr_mu_unlock(&p2->mu); - } + polling_island_unlock_pair(p1, p2); return p1 == p2; } -- cgit v1.2.3 From 6a29545c8c5ef61346af3a9b0bdd2ddb39ba15c8 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Thu, 23 Jun 2016 15:53:10 -0700 Subject: Change the type of 'ref_count' in polling_island from gpr_atm to gpr_refcount --- src/core/lib/iomgr/ev_epoll_linux.c | 69 +++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 38 deletions(-) (limited to 'src/core/lib') diff --git a/src/core/lib/iomgr/ev_epoll_linux.c b/src/core/lib/iomgr/ev_epoll_linux.c index a77044edc5..4dca551e1e 100644 --- a/src/core/lib/iomgr/ev_epoll_linux.c +++ b/src/core/lib/iomgr/ev_epoll_linux.c @@ -121,6 +121,7 @@ struct grpc_fd { }; /* Reference counting for fds */ +// #define GRPC_FD_REF_COUNT_DEBUG #ifdef GRPC_FD_REF_COUNT_DEBUG static void fd_ref(grpc_fd *fd, const char *reason, const char *file, int line); static void fd_unref(grpc_fd *fd, const char *reason, const char *file, @@ -147,13 +148,13 @@ static void fd_global_shutdown(void); // #define GRPC_PI_REF_COUNT_DEBUG #ifdef GRPC_PI_REF_COUNT_DEBUG -#define PI_ADD_REF(p, r) pi_add_ref_dbg((p), 1, (r), __FILE__, __LINE__) -#define PI_UNREF(p, r) pi_unref_dbg((p), 1, (r), __FILE__, __LINE__) +#define PI_ADD_REF(p, r) pi_add_ref_dbg((p), (r), __FILE__, __LINE__) +#define PI_UNREF(p, r) pi_unref_dbg((p), (r), __FILE__, __LINE__) #else /* defined(GRPC_PI_REF_COUNT_DEBUG) */ -#define PI_ADD_REF(p, r) pi_add_ref((p), 1) -#define PI_UNREF(p, r) pi_unref((p), 1) +#define PI_ADD_REF(p, r) pi_add_ref((p)) +#define PI_UNREF(p, r) pi_unref((p)) #endif /* !defined(GPRC_PI_REF_COUNT_DEBUG) */ @@ -164,7 +165,7 @@ typedef struct polling_island { Once the ref count becomes zero, this structure is destroyed which means we should ensure that there is never a scenario where a PI_ADD_REF() is racing with a PI_UNREF() that just made the ref_count zero. */ - gpr_atm ref_count; + gpr_refcount ref_count; /* Pointer to the polling_island this merged into. * merged_to value is only set once in polling_island's lifetime (and that too @@ -281,50 +282,42 @@ gpr_atm g_epoll_sync; #endif /* defined(GRPC_TSAN) */ #ifdef GRPC_PI_REF_COUNT_DEBUG -long pi_add_ref(polling_island *pi, int ref_cnt); -long pi_unref(polling_island *pi, int ref_cnt); +void pi_add_ref(polling_island *pi); +void pi_unref(polling_island *pi); -void pi_add_ref_dbg(polling_island *pi, int ref_cnt, char *reason, char *file, - int line) { - long old_cnt = pi_add_ref(pi, ref_cnt); - gpr_log(GPR_DEBUG, "Add ref pi: %p, old:%ld -> new:%ld (%s) - (%s, %d)", - (void *)pi, old_cnt, (old_cnt + ref_cnt), reason, file, line); +void pi_add_ref_dbg(polling_island *pi, char *reason, char *file, int line) { + long old_cnt = gpr_atm_acq_load(&(pi->ref_count.count)); + pi_add_ref(pi); + gpr_log(GPR_DEBUG, "Add ref pi: %p, old: %ld -> new:%ld (%s) - (%s, %d)", + (void *)pi, old_cnt, old_cnt + 1, reason, file, line); } -void pi_unref_dbg(polling_island *pi, int ref_cnt, char *reason, char *file, - int line) { - long old_cnt = pi_unref(pi, ref_cnt); +void pi_unref_dbg(polling_island *pi, char *reason, char *file, int line) { + long old_cnt = gpr_atm_acq_load(&(pi->ref_count.count)); + pi_unref(pi); gpr_log(GPR_DEBUG, "Unref pi: %p, old:%ld -> new:%ld (%s) - (%s, %d)", - (void *)pi, old_cnt, (old_cnt - ref_cnt), reason, file, line); + (void *)pi, old_cnt, (old_cnt - 1), reason, file, line); } #endif -long pi_add_ref(polling_island *pi, int ref_cnt) { - return gpr_atm_full_fetch_add(&pi->ref_count, ref_cnt); -} - -long pi_unref(polling_island *pi, int ref_cnt) { - long old_cnt = gpr_atm_full_fetch_add(&pi->ref_count, -ref_cnt); +void pi_add_ref(polling_island *pi) { gpr_ref(&pi->ref_count); } - /* If ref count went to zero, delete the polling island. Note that this need - not be done under a lock. Once the ref count goes to zero, we are - guaranteed that no one else holds a reference to the polling island (and - that there is no racing pi_add_ref() call either. +void pi_unref(polling_island *pi) { + /* If ref count went to zero, delete the polling island. + Note that this deletion not be done under a lock. Once the ref count goes + to zero, we are guaranteed that no one else holds a reference to the + polling island (and that there is no racing pi_add_ref() call either). Also, if we are deleting the polling island and the merged_to field is non-empty, we should remove a ref to the merged_to polling island */ - if (old_cnt == ref_cnt) { + if (gpr_unref(&pi->ref_count)) { polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to); polling_island_delete(pi); if (next != NULL) { PI_UNREF(next, "pi_delete"); /* Recursive call */ } - } else { - GPR_ASSERT(old_cnt > ref_cnt); } - - return old_cnt; } /* The caller is expected to hold pi->mu lock before calling this function */ @@ -482,7 +475,7 @@ static polling_island *polling_island_create(grpc_fd *initial_fd, pi->fds = NULL; } - gpr_atm_rel_store(&pi->ref_count, (gpr_atm)0); + gpr_ref_init(&pi->ref_count, 0); gpr_atm_rel_store(&pi->merged_to, (gpr_atm)NULL); pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC); @@ -762,8 +755,8 @@ static gpr_mu fd_freelist_mu; #define UNREF_BY(fd, n, reason) unref_by(fd, n, reason, __FILE__, __LINE__) static void ref_by(grpc_fd *fd, int n, const char *reason, const char *file, int line) { - gpr_log(GPR_DEBUG, "FD %d %p ref %d %d -> %d [%s; %s:%d]", fd->fd, fd, n, - gpr_atm_no_barrier_load(&fd->refst), + gpr_log(GPR_DEBUG, "FD %d %p ref %d %ld -> %ld [%s; %s:%d]", fd->fd, + (void *)fd, n, gpr_atm_no_barrier_load(&fd->refst), gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line); #else #define REF_BY(fd, n, reason) ref_by(fd, n) @@ -777,8 +770,8 @@ static void ref_by(grpc_fd *fd, int n) { static void unref_by(grpc_fd *fd, int n, const char *reason, const char *file, int line) { gpr_atm old; - gpr_log(GPR_DEBUG, "FD %d %p unref %d %d -> %d [%s; %s:%d]", fd->fd, fd, n, - gpr_atm_no_barrier_load(&fd->refst), + gpr_log(GPR_DEBUG, "FD %d %p unref %d %ld -> %ld [%s; %s:%d]", fd->fd, + (void *)fd, n, gpr_atm_no_barrier_load(&fd->refst), gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line); #else static void unref_by(grpc_fd *fd, int n) { @@ -865,10 +858,10 @@ static grpc_fd *fd_create(int fd, const char *name) { char *fd_name; gpr_asprintf(&fd_name, "%s fd=%d", name, fd); grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name); - gpr_free(fd_name); #ifdef GRPC_FD_REF_COUNT_DEBUG - gpr_log(GPR_DEBUG, "FD %d %p create %s", fd, r, fd_name); + gpr_log(GPR_DEBUG, "FD %d %p create %s", fd, (void *)new_fd, fd_name); #endif + gpr_free(fd_name); return new_fd; } -- cgit v1.2.3