From 83062842c3601faeddcae8f901c515e3c78f3661 Mon Sep 17 00:00:00 2001 From: Yash Tibrewal Date: Thu, 21 Sep 2017 18:56:08 -0700 Subject: Changes for C to C++. Adding extern C to header files for compatibility. Also converting to .cc --- src/core/lib/iomgr/call_combiner.c | 202 --- src/core/lib/iomgr/call_combiner.cc | 202 +++ src/core/lib/iomgr/closure.c | 219 --- src/core/lib/iomgr/closure.cc | 219 +++ src/core/lib/iomgr/combiner.c | 370 ---- src/core/lib/iomgr/combiner.cc | 370 ++++ src/core/lib/iomgr/endpoint.c | 59 - src/core/lib/iomgr/endpoint.cc | 59 + src/core/lib/iomgr/endpoint_pair_posix.c | 72 - src/core/lib/iomgr/endpoint_pair_posix.cc | 72 + src/core/lib/iomgr/endpoint_pair_uv.c | 38 - src/core/lib/iomgr/endpoint_pair_uv.cc | 38 + src/core/lib/iomgr/endpoint_pair_windows.c | 86 - src/core/lib/iomgr/endpoint_pair_windows.cc | 86 + src/core/lib/iomgr/error.c | 801 --------- src/core/lib/iomgr/error.cc | 801 +++++++++ src/core/lib/iomgr/ev_epoll1_linux.c | 1267 -------------- src/core/lib/iomgr/ev_epoll1_linux.cc | 1267 ++++++++++++++ src/core/lib/iomgr/ev_epollex_linux.c | 1461 ---------------- src/core/lib/iomgr/ev_epollex_linux.cc | 1461 ++++++++++++++++ src/core/lib/iomgr/ev_epollsig_linux.c | 1769 -------------------- src/core/lib/iomgr/ev_epollsig_linux.cc | 1769 ++++++++++++++++++++ src/core/lib/iomgr/ev_poll_posix.c | 1746 ------------------- src/core/lib/iomgr/ev_poll_posix.cc | 1746 +++++++++++++++++++ src/core/lib/iomgr/ev_posix.c | 266 --- src/core/lib/iomgr/ev_posix.cc | 266 +++ src/core/lib/iomgr/ev_windows.c | 28 - src/core/lib/iomgr/ev_windows.cc | 28 + src/core/lib/iomgr/exec_ctx.c | 113 -- src/core/lib/iomgr/exec_ctx.cc | 113 ++ src/core/lib/iomgr/executor.c | 301 ---- src/core/lib/iomgr/executor.cc | 301 ++++ src/core/lib/iomgr/gethostname_fallback.c | 27 - src/core/lib/iomgr/gethostname_fallback.cc | 27 + src/core/lib/iomgr/gethostname_host_name_max.c | 37 - src/core/lib/iomgr/gethostname_host_name_max.cc | 37 + src/core/lib/iomgr/gethostname_sysconf.c | 37 - src/core/lib/iomgr/gethostname_sysconf.cc | 37 + src/core/lib/iomgr/iocp_windows.c | 155 -- src/core/lib/iomgr/iocp_windows.cc | 155 ++ src/core/lib/iomgr/iomgr.c | 170 -- src/core/lib/iomgr/iomgr.cc | 170 ++ src/core/lib/iomgr/iomgr_posix.c | 41 - src/core/lib/iomgr/iomgr_posix.cc | 41 + src/core/lib/iomgr/iomgr_uv.c | 42 - src/core/lib/iomgr/iomgr_uv.cc | 42 + src/core/lib/iomgr/iomgr_windows.c | 61 - src/core/lib/iomgr/iomgr_windows.cc | 61 + src/core/lib/iomgr/is_epollexclusive_available.c | 101 -- src/core/lib/iomgr/is_epollexclusive_available.cc | 101 ++ src/core/lib/iomgr/load_file.c | 78 - src/core/lib/iomgr/load_file.cc | 78 + src/core/lib/iomgr/lockfree_event.c | 241 --- src/core/lib/iomgr/lockfree_event.cc | 241 +++ src/core/lib/iomgr/network_status_tracker.c | 34 - src/core/lib/iomgr/network_status_tracker.cc | 34 + src/core/lib/iomgr/polling_entity.c | 89 - src/core/lib/iomgr/polling_entity.cc | 89 + src/core/lib/iomgr/pollset_set_uv.c | 48 - src/core/lib/iomgr/pollset_set_uv.cc | 48 + src/core/lib/iomgr/pollset_set_windows.c | 49 - src/core/lib/iomgr/pollset_set_windows.cc | 49 + src/core/lib/iomgr/pollset_uv.c | 155 -- src/core/lib/iomgr/pollset_uv.cc | 155 ++ src/core/lib/iomgr/pollset_windows.c | 222 --- src/core/lib/iomgr/pollset_windows.cc | 222 +++ src/core/lib/iomgr/resolve_address_posix.c | 193 --- src/core/lib/iomgr/resolve_address_posix.cc | 193 +++ src/core/lib/iomgr/resolve_address_uv.c | 280 ---- src/core/lib/iomgr/resolve_address_uv.cc | 280 ++++ src/core/lib/iomgr/resolve_address_windows.c | 175 -- src/core/lib/iomgr/resolve_address_windows.cc | 175 ++ src/core/lib/iomgr/resource_quota.c | 868 ---------- src/core/lib/iomgr/resource_quota.cc | 868 ++++++++++ src/core/lib/iomgr/sockaddr_utils.c | 262 --- src/core/lib/iomgr/sockaddr_utils.cc | 262 +++ src/core/lib/iomgr/socket_factory_posix.c | 92 - src/core/lib/iomgr/socket_factory_posix.cc | 92 + src/core/lib/iomgr/socket_mutator.c | 81 - src/core/lib/iomgr/socket_mutator.cc | 81 + src/core/lib/iomgr/socket_utils_common_posix.c | 315 ---- src/core/lib/iomgr/socket_utils_common_posix.cc | 315 ++++ src/core/lib/iomgr/socket_utils_linux.c | 42 - src/core/lib/iomgr/socket_utils_linux.cc | 42 + src/core/lib/iomgr/socket_utils_posix.c | 58 - src/core/lib/iomgr/socket_utils_posix.cc | 58 + src/core/lib/iomgr/socket_utils_uv.c | 34 - src/core/lib/iomgr/socket_utils_uv.cc | 34 + src/core/lib/iomgr/socket_utils_windows.c | 33 - src/core/lib/iomgr/socket_utils_windows.cc | 33 + src/core/lib/iomgr/socket_windows.c | 152 -- src/core/lib/iomgr/socket_windows.cc | 152 ++ src/core/lib/iomgr/tcp_client_posix.c | 356 ---- src/core/lib/iomgr/tcp_client_posix.cc | 356 ++++ src/core/lib/iomgr/tcp_client_uv.c | 183 -- src/core/lib/iomgr/tcp_client_uv.cc | 183 ++ src/core/lib/iomgr/tcp_client_windows.c | 245 --- src/core/lib/iomgr/tcp_client_windows.cc | 245 +++ src/core/lib/iomgr/tcp_posix.c | 819 --------- src/core/lib/iomgr/tcp_posix.cc | 819 +++++++++ src/core/lib/iomgr/tcp_server_posix.c | 565 ------- src/core/lib/iomgr/tcp_server_posix.cc | 565 +++++++ src/core/lib/iomgr/tcp_server_utils_posix_common.c | 206 --- .../lib/iomgr/tcp_server_utils_posix_common.cc | 206 +++ .../lib/iomgr/tcp_server_utils_posix_ifaddrs.c | 181 -- .../lib/iomgr/tcp_server_utils_posix_ifaddrs.cc | 181 ++ .../lib/iomgr/tcp_server_utils_posix_noifaddrs.c | 34 - .../lib/iomgr/tcp_server_utils_posix_noifaddrs.cc | 34 + src/core/lib/iomgr/tcp_server_uv.c | 454 ----- src/core/lib/iomgr/tcp_server_uv.cc | 454 +++++ src/core/lib/iomgr/tcp_server_windows.c | 543 ------ src/core/lib/iomgr/tcp_server_windows.cc | 543 ++++++ src/core/lib/iomgr/tcp_uv.c | 381 ----- src/core/lib/iomgr/tcp_uv.cc | 381 +++++ src/core/lib/iomgr/tcp_windows.c | 448 ----- src/core/lib/iomgr/tcp_windows.cc | 448 +++++ src/core/lib/iomgr/time_averaged_stats.c | 62 - src/core/lib/iomgr/time_averaged_stats.cc | 62 + src/core/lib/iomgr/timer_generic.c | 705 -------- src/core/lib/iomgr/timer_generic.cc | 705 ++++++++ src/core/lib/iomgr/timer_heap.c | 137 -- src/core/lib/iomgr/timer_heap.cc | 137 ++ src/core/lib/iomgr/timer_manager.c | 354 ---- src/core/lib/iomgr/timer_manager.cc | 354 ++++ src/core/lib/iomgr/timer_uv.c | 101 -- src/core/lib/iomgr/timer_uv.cc | 101 ++ src/core/lib/iomgr/udp_server.c | 549 ------ src/core/lib/iomgr/udp_server.cc | 549 ++++++ src/core/lib/iomgr/unix_sockets_posix.c | 95 -- src/core/lib/iomgr/unix_sockets_posix.cc | 95 ++ src/core/lib/iomgr/unix_sockets_posix_noop.c | 47 - src/core/lib/iomgr/unix_sockets_posix_noop.cc | 47 + src/core/lib/iomgr/wakeup_fd_cv.c | 104 -- src/core/lib/iomgr/wakeup_fd_cv.cc | 104 ++ src/core/lib/iomgr/wakeup_fd_eventfd.c | 82 - src/core/lib/iomgr/wakeup_fd_eventfd.cc | 82 + src/core/lib/iomgr/wakeup_fd_nospecial.c | 36 - src/core/lib/iomgr/wakeup_fd_nospecial.cc | 36 + src/core/lib/iomgr/wakeup_fd_pipe.c | 98 -- src/core/lib/iomgr/wakeup_fd_pipe.cc | 98 ++ src/core/lib/iomgr/wakeup_fd_posix.c | 85 - src/core/lib/iomgr/wakeup_fd_posix.cc | 85 + 142 files changed, 19840 insertions(+), 19840 deletions(-) delete mode 100644 src/core/lib/iomgr/call_combiner.c create mode 100644 src/core/lib/iomgr/call_combiner.cc delete mode 100644 src/core/lib/iomgr/closure.c create mode 100644 src/core/lib/iomgr/closure.cc delete mode 100644 src/core/lib/iomgr/combiner.c create mode 100644 src/core/lib/iomgr/combiner.cc delete mode 100644 src/core/lib/iomgr/endpoint.c create mode 100644 src/core/lib/iomgr/endpoint.cc delete mode 100644 src/core/lib/iomgr/endpoint_pair_posix.c create mode 100644 src/core/lib/iomgr/endpoint_pair_posix.cc delete mode 100644 src/core/lib/iomgr/endpoint_pair_uv.c create mode 100644 src/core/lib/iomgr/endpoint_pair_uv.cc delete mode 100644 src/core/lib/iomgr/endpoint_pair_windows.c create mode 100644 src/core/lib/iomgr/endpoint_pair_windows.cc delete mode 100644 src/core/lib/iomgr/error.c create mode 100644 src/core/lib/iomgr/error.cc delete mode 100644 src/core/lib/iomgr/ev_epoll1_linux.c create mode 100644 src/core/lib/iomgr/ev_epoll1_linux.cc delete mode 100644 src/core/lib/iomgr/ev_epollex_linux.c create mode 100644 src/core/lib/iomgr/ev_epollex_linux.cc delete mode 100644 src/core/lib/iomgr/ev_epollsig_linux.c create mode 100644 src/core/lib/iomgr/ev_epollsig_linux.cc delete mode 100644 src/core/lib/iomgr/ev_poll_posix.c create mode 100644 src/core/lib/iomgr/ev_poll_posix.cc delete mode 100644 src/core/lib/iomgr/ev_posix.c create mode 100644 src/core/lib/iomgr/ev_posix.cc delete mode 100644 src/core/lib/iomgr/ev_windows.c create mode 100644 src/core/lib/iomgr/ev_windows.cc delete mode 100644 src/core/lib/iomgr/exec_ctx.c create mode 100644 src/core/lib/iomgr/exec_ctx.cc delete mode 100644 src/core/lib/iomgr/executor.c create mode 100644 src/core/lib/iomgr/executor.cc delete mode 100644 src/core/lib/iomgr/gethostname_fallback.c create mode 100644 src/core/lib/iomgr/gethostname_fallback.cc delete mode 100644 src/core/lib/iomgr/gethostname_host_name_max.c create mode 100644 src/core/lib/iomgr/gethostname_host_name_max.cc delete mode 100644 src/core/lib/iomgr/gethostname_sysconf.c create mode 100644 src/core/lib/iomgr/gethostname_sysconf.cc delete mode 100644 src/core/lib/iomgr/iocp_windows.c create mode 100644 src/core/lib/iomgr/iocp_windows.cc delete mode 100644 src/core/lib/iomgr/iomgr.c create mode 100644 src/core/lib/iomgr/iomgr.cc delete mode 100644 src/core/lib/iomgr/iomgr_posix.c create mode 100644 src/core/lib/iomgr/iomgr_posix.cc delete mode 100644 src/core/lib/iomgr/iomgr_uv.c create mode 100644 src/core/lib/iomgr/iomgr_uv.cc delete mode 100644 src/core/lib/iomgr/iomgr_windows.c create mode 100644 src/core/lib/iomgr/iomgr_windows.cc delete mode 100644 src/core/lib/iomgr/is_epollexclusive_available.c create mode 100644 src/core/lib/iomgr/is_epollexclusive_available.cc delete mode 100644 src/core/lib/iomgr/load_file.c create mode 100644 src/core/lib/iomgr/load_file.cc delete mode 100644 src/core/lib/iomgr/lockfree_event.c create mode 100644 src/core/lib/iomgr/lockfree_event.cc delete mode 100644 src/core/lib/iomgr/network_status_tracker.c create mode 100644 src/core/lib/iomgr/network_status_tracker.cc delete mode 100644 src/core/lib/iomgr/polling_entity.c create mode 100644 src/core/lib/iomgr/polling_entity.cc delete mode 100644 src/core/lib/iomgr/pollset_set_uv.c create mode 100644 src/core/lib/iomgr/pollset_set_uv.cc delete mode 100644 src/core/lib/iomgr/pollset_set_windows.c create mode 100644 src/core/lib/iomgr/pollset_set_windows.cc delete mode 100644 src/core/lib/iomgr/pollset_uv.c create mode 100644 src/core/lib/iomgr/pollset_uv.cc delete mode 100644 src/core/lib/iomgr/pollset_windows.c create mode 100644 src/core/lib/iomgr/pollset_windows.cc delete mode 100644 src/core/lib/iomgr/resolve_address_posix.c create mode 100644 src/core/lib/iomgr/resolve_address_posix.cc delete mode 100644 src/core/lib/iomgr/resolve_address_uv.c create mode 100644 src/core/lib/iomgr/resolve_address_uv.cc delete mode 100644 src/core/lib/iomgr/resolve_address_windows.c create mode 100644 src/core/lib/iomgr/resolve_address_windows.cc delete mode 100644 src/core/lib/iomgr/resource_quota.c create mode 100644 src/core/lib/iomgr/resource_quota.cc delete mode 100644 src/core/lib/iomgr/sockaddr_utils.c create mode 100644 src/core/lib/iomgr/sockaddr_utils.cc delete mode 100644 src/core/lib/iomgr/socket_factory_posix.c create mode 100644 src/core/lib/iomgr/socket_factory_posix.cc delete mode 100644 src/core/lib/iomgr/socket_mutator.c create mode 100644 src/core/lib/iomgr/socket_mutator.cc delete mode 100644 src/core/lib/iomgr/socket_utils_common_posix.c create mode 100644 src/core/lib/iomgr/socket_utils_common_posix.cc delete mode 100644 src/core/lib/iomgr/socket_utils_linux.c create mode 100644 src/core/lib/iomgr/socket_utils_linux.cc delete mode 100644 src/core/lib/iomgr/socket_utils_posix.c create mode 100644 src/core/lib/iomgr/socket_utils_posix.cc delete mode 100644 src/core/lib/iomgr/socket_utils_uv.c create mode 100644 src/core/lib/iomgr/socket_utils_uv.cc delete mode 100644 src/core/lib/iomgr/socket_utils_windows.c create mode 100644 src/core/lib/iomgr/socket_utils_windows.cc delete mode 100644 src/core/lib/iomgr/socket_windows.c create mode 100644 src/core/lib/iomgr/socket_windows.cc delete mode 100644 src/core/lib/iomgr/tcp_client_posix.c create mode 100644 src/core/lib/iomgr/tcp_client_posix.cc delete mode 100644 src/core/lib/iomgr/tcp_client_uv.c create mode 100644 src/core/lib/iomgr/tcp_client_uv.cc delete mode 100644 src/core/lib/iomgr/tcp_client_windows.c create mode 100644 src/core/lib/iomgr/tcp_client_windows.cc delete mode 100644 src/core/lib/iomgr/tcp_posix.c create mode 100644 src/core/lib/iomgr/tcp_posix.cc delete mode 100644 src/core/lib/iomgr/tcp_server_posix.c create mode 100644 src/core/lib/iomgr/tcp_server_posix.cc delete mode 100644 src/core/lib/iomgr/tcp_server_utils_posix_common.c create mode 100644 src/core/lib/iomgr/tcp_server_utils_posix_common.cc delete mode 100644 src/core/lib/iomgr/tcp_server_utils_posix_ifaddrs.c create mode 100644 src/core/lib/iomgr/tcp_server_utils_posix_ifaddrs.cc delete mode 100644 src/core/lib/iomgr/tcp_server_utils_posix_noifaddrs.c create mode 100644 src/core/lib/iomgr/tcp_server_utils_posix_noifaddrs.cc delete mode 100644 src/core/lib/iomgr/tcp_server_uv.c create mode 100644 src/core/lib/iomgr/tcp_server_uv.cc delete mode 100644 src/core/lib/iomgr/tcp_server_windows.c create mode 100644 src/core/lib/iomgr/tcp_server_windows.cc delete mode 100644 src/core/lib/iomgr/tcp_uv.c create mode 100644 src/core/lib/iomgr/tcp_uv.cc delete mode 100644 src/core/lib/iomgr/tcp_windows.c create mode 100644 src/core/lib/iomgr/tcp_windows.cc delete mode 100644 src/core/lib/iomgr/time_averaged_stats.c create mode 100644 src/core/lib/iomgr/time_averaged_stats.cc delete mode 100644 src/core/lib/iomgr/timer_generic.c create mode 100644 src/core/lib/iomgr/timer_generic.cc delete mode 100644 src/core/lib/iomgr/timer_heap.c create mode 100644 src/core/lib/iomgr/timer_heap.cc delete mode 100644 src/core/lib/iomgr/timer_manager.c create mode 100644 src/core/lib/iomgr/timer_manager.cc delete mode 100644 src/core/lib/iomgr/timer_uv.c create mode 100644 src/core/lib/iomgr/timer_uv.cc delete mode 100644 src/core/lib/iomgr/udp_server.c create mode 100644 src/core/lib/iomgr/udp_server.cc delete mode 100644 src/core/lib/iomgr/unix_sockets_posix.c create mode 100644 src/core/lib/iomgr/unix_sockets_posix.cc delete mode 100644 src/core/lib/iomgr/unix_sockets_posix_noop.c create mode 100644 src/core/lib/iomgr/unix_sockets_posix_noop.cc delete mode 100644 src/core/lib/iomgr/wakeup_fd_cv.c create mode 100644 src/core/lib/iomgr/wakeup_fd_cv.cc delete mode 100644 src/core/lib/iomgr/wakeup_fd_eventfd.c create mode 100644 src/core/lib/iomgr/wakeup_fd_eventfd.cc delete mode 100644 src/core/lib/iomgr/wakeup_fd_nospecial.c create mode 100644 src/core/lib/iomgr/wakeup_fd_nospecial.cc delete mode 100644 src/core/lib/iomgr/wakeup_fd_pipe.c create mode 100644 src/core/lib/iomgr/wakeup_fd_pipe.cc delete mode 100644 src/core/lib/iomgr/wakeup_fd_posix.c create mode 100644 src/core/lib/iomgr/wakeup_fd_posix.cc (limited to 'src/core/lib/iomgr') diff --git a/src/core/lib/iomgr/call_combiner.c b/src/core/lib/iomgr/call_combiner.c deleted file mode 100644 index 48d8eaec18..0000000000 --- a/src/core/lib/iomgr/call_combiner.c +++ /dev/null @@ -1,202 +0,0 @@ -/* - * - * Copyright 2017 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/call_combiner.h" - -#include - -grpc_tracer_flag grpc_call_combiner_trace = - GRPC_TRACER_INITIALIZER(false, "call_combiner"); - -static grpc_error* decode_cancel_state_error(gpr_atm cancel_state) { - if (cancel_state & 1) { - return (grpc_error*)(cancel_state & ~(gpr_atm)1); - } - return GRPC_ERROR_NONE; -} - -static gpr_atm encode_cancel_state_error(grpc_error* error) { - return (gpr_atm)1 | (gpr_atm)error; -} - -void grpc_call_combiner_init(grpc_call_combiner* call_combiner) { - gpr_mpscq_init(&call_combiner->queue); -} - -void grpc_call_combiner_destroy(grpc_call_combiner* call_combiner) { - gpr_mpscq_destroy(&call_combiner->queue); - GRPC_ERROR_UNREF(decode_cancel_state_error(call_combiner->cancel_state)); -} - -#ifndef NDEBUG -#define DEBUG_ARGS , const char *file, int line -#define DEBUG_FMT_STR "%s:%d: " -#define DEBUG_FMT_ARGS , file, line -#else -#define DEBUG_ARGS -#define DEBUG_FMT_STR -#define DEBUG_FMT_ARGS -#endif - -void grpc_call_combiner_start(grpc_exec_ctx* exec_ctx, - grpc_call_combiner* call_combiner, - grpc_closure* closure, - grpc_error* error DEBUG_ARGS, - const char* reason) { - if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { - gpr_log(GPR_DEBUG, - "==> grpc_call_combiner_start() [%p] closure=%p [" DEBUG_FMT_STR - "%s] error=%s", - call_combiner, closure DEBUG_FMT_ARGS, reason, - grpc_error_string(error)); - } - size_t prev_size = - (size_t)gpr_atm_full_fetch_add(&call_combiner->size, (gpr_atm)1); - if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { - gpr_log(GPR_DEBUG, " size: %" PRIdPTR " -> %" PRIdPTR, prev_size, - prev_size + 1); - } - if (prev_size == 0) { - if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { - gpr_log(GPR_DEBUG, " EXECUTING IMMEDIATELY"); - } - // Queue was empty, so execute this closure immediately. - GRPC_CLOSURE_SCHED(exec_ctx, closure, error); - } else { - if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { - gpr_log(GPR_INFO, " QUEUING"); - } - // Queue was not empty, so add closure to queue. - closure->error_data.error = error; - gpr_mpscq_push(&call_combiner->queue, (gpr_mpscq_node*)closure); - } -} - -void grpc_call_combiner_stop(grpc_exec_ctx* exec_ctx, - grpc_call_combiner* call_combiner DEBUG_ARGS, - const char* reason) { - if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { - gpr_log(GPR_DEBUG, - "==> grpc_call_combiner_stop() [%p] [" DEBUG_FMT_STR "%s]", - call_combiner DEBUG_FMT_ARGS, reason); - } - size_t prev_size = - (size_t)gpr_atm_full_fetch_add(&call_combiner->size, (gpr_atm)-1); - if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { - gpr_log(GPR_DEBUG, " size: %" PRIdPTR " -> %" PRIdPTR, prev_size, - prev_size - 1); - } - GPR_ASSERT(prev_size >= 1); - if (prev_size > 1) { - while (true) { - if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { - gpr_log(GPR_DEBUG, " checking queue"); - } - bool empty; - grpc_closure* closure = (grpc_closure*)gpr_mpscq_pop_and_check_end( - &call_combiner->queue, &empty); - if (closure == NULL) { - // This can happen either due to a race condition within the mpscq - // code or because of a race with grpc_call_combiner_start(). - if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { - gpr_log(GPR_DEBUG, " queue returned no result; checking again"); - } - continue; - } - if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { - gpr_log(GPR_DEBUG, " EXECUTING FROM QUEUE: closure=%p error=%s", - closure, grpc_error_string(closure->error_data.error)); - } - GRPC_CLOSURE_SCHED(exec_ctx, closure, closure->error_data.error); - break; - } - } else if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { - gpr_log(GPR_DEBUG, " queue empty"); - } -} - -void grpc_call_combiner_set_notify_on_cancel(grpc_exec_ctx* exec_ctx, - grpc_call_combiner* call_combiner, - grpc_closure* closure) { - while (true) { - // Decode original state. - gpr_atm original_state = gpr_atm_acq_load(&call_combiner->cancel_state); - grpc_error* original_error = decode_cancel_state_error(original_state); - // If error is set, invoke the cancellation closure immediately. - // Otherwise, store the new closure. - if (original_error != GRPC_ERROR_NONE) { - if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { - gpr_log(GPR_DEBUG, - "call_combiner=%p: scheduling notify_on_cancel callback=%p " - "for pre-existing cancellation", - call_combiner, closure); - } - GRPC_CLOSURE_SCHED(exec_ctx, closure, GRPC_ERROR_REF(original_error)); - break; - } else { - if (gpr_atm_full_cas(&call_combiner->cancel_state, original_state, - (gpr_atm)closure)) { - if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { - gpr_log(GPR_DEBUG, "call_combiner=%p: setting notify_on_cancel=%p", - call_combiner, closure); - } - // If we replaced an earlier closure, invoke the original - // closure with GRPC_ERROR_NONE. This allows callers to clean - // up any resources they may be holding for the callback. - if (original_state != 0) { - closure = (grpc_closure*)original_state; - if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { - gpr_log(GPR_DEBUG, - "call_combiner=%p: scheduling old cancel callback=%p", - call_combiner, closure); - } - GRPC_CLOSURE_SCHED(exec_ctx, closure, GRPC_ERROR_NONE); - } - break; - } - } - // cas failed, try again. - } -} - -void grpc_call_combiner_cancel(grpc_exec_ctx* exec_ctx, - grpc_call_combiner* call_combiner, - grpc_error* error) { - while (true) { - gpr_atm original_state = gpr_atm_acq_load(&call_combiner->cancel_state); - grpc_error* original_error = decode_cancel_state_error(original_state); - if (original_error != GRPC_ERROR_NONE) { - GRPC_ERROR_UNREF(error); - break; - } - if (gpr_atm_full_cas(&call_combiner->cancel_state, original_state, - encode_cancel_state_error(error))) { - if (original_state != 0) { - grpc_closure* notify_on_cancel = (grpc_closure*)original_state; - if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { - gpr_log(GPR_DEBUG, - "call_combiner=%p: scheduling notify_on_cancel callback=%p", - call_combiner, notify_on_cancel); - } - GRPC_CLOSURE_SCHED(exec_ctx, notify_on_cancel, GRPC_ERROR_REF(error)); - } - break; - } - // cas failed, try again. - } -} diff --git a/src/core/lib/iomgr/call_combiner.cc b/src/core/lib/iomgr/call_combiner.cc new file mode 100644 index 0000000000..48d8eaec18 --- /dev/null +++ b/src/core/lib/iomgr/call_combiner.cc @@ -0,0 +1,202 @@ +/* + * + * Copyright 2017 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/call_combiner.h" + +#include + +grpc_tracer_flag grpc_call_combiner_trace = + GRPC_TRACER_INITIALIZER(false, "call_combiner"); + +static grpc_error* decode_cancel_state_error(gpr_atm cancel_state) { + if (cancel_state & 1) { + return (grpc_error*)(cancel_state & ~(gpr_atm)1); + } + return GRPC_ERROR_NONE; +} + +static gpr_atm encode_cancel_state_error(grpc_error* error) { + return (gpr_atm)1 | (gpr_atm)error; +} + +void grpc_call_combiner_init(grpc_call_combiner* call_combiner) { + gpr_mpscq_init(&call_combiner->queue); +} + +void grpc_call_combiner_destroy(grpc_call_combiner* call_combiner) { + gpr_mpscq_destroy(&call_combiner->queue); + GRPC_ERROR_UNREF(decode_cancel_state_error(call_combiner->cancel_state)); +} + +#ifndef NDEBUG +#define DEBUG_ARGS , const char *file, int line +#define DEBUG_FMT_STR "%s:%d: " +#define DEBUG_FMT_ARGS , file, line +#else +#define DEBUG_ARGS +#define DEBUG_FMT_STR +#define DEBUG_FMT_ARGS +#endif + +void grpc_call_combiner_start(grpc_exec_ctx* exec_ctx, + grpc_call_combiner* call_combiner, + grpc_closure* closure, + grpc_error* error DEBUG_ARGS, + const char* reason) { + if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { + gpr_log(GPR_DEBUG, + "==> grpc_call_combiner_start() [%p] closure=%p [" DEBUG_FMT_STR + "%s] error=%s", + call_combiner, closure DEBUG_FMT_ARGS, reason, + grpc_error_string(error)); + } + size_t prev_size = + (size_t)gpr_atm_full_fetch_add(&call_combiner->size, (gpr_atm)1); + if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { + gpr_log(GPR_DEBUG, " size: %" PRIdPTR " -> %" PRIdPTR, prev_size, + prev_size + 1); + } + if (prev_size == 0) { + if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { + gpr_log(GPR_DEBUG, " EXECUTING IMMEDIATELY"); + } + // Queue was empty, so execute this closure immediately. + GRPC_CLOSURE_SCHED(exec_ctx, closure, error); + } else { + if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { + gpr_log(GPR_INFO, " QUEUING"); + } + // Queue was not empty, so add closure to queue. + closure->error_data.error = error; + gpr_mpscq_push(&call_combiner->queue, (gpr_mpscq_node*)closure); + } +} + +void grpc_call_combiner_stop(grpc_exec_ctx* exec_ctx, + grpc_call_combiner* call_combiner DEBUG_ARGS, + const char* reason) { + if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { + gpr_log(GPR_DEBUG, + "==> grpc_call_combiner_stop() [%p] [" DEBUG_FMT_STR "%s]", + call_combiner DEBUG_FMT_ARGS, reason); + } + size_t prev_size = + (size_t)gpr_atm_full_fetch_add(&call_combiner->size, (gpr_atm)-1); + if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { + gpr_log(GPR_DEBUG, " size: %" PRIdPTR " -> %" PRIdPTR, prev_size, + prev_size - 1); + } + GPR_ASSERT(prev_size >= 1); + if (prev_size > 1) { + while (true) { + if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { + gpr_log(GPR_DEBUG, " checking queue"); + } + bool empty; + grpc_closure* closure = (grpc_closure*)gpr_mpscq_pop_and_check_end( + &call_combiner->queue, &empty); + if (closure == NULL) { + // This can happen either due to a race condition within the mpscq + // code or because of a race with grpc_call_combiner_start(). + if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { + gpr_log(GPR_DEBUG, " queue returned no result; checking again"); + } + continue; + } + if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { + gpr_log(GPR_DEBUG, " EXECUTING FROM QUEUE: closure=%p error=%s", + closure, grpc_error_string(closure->error_data.error)); + } + GRPC_CLOSURE_SCHED(exec_ctx, closure, closure->error_data.error); + break; + } + } else if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { + gpr_log(GPR_DEBUG, " queue empty"); + } +} + +void grpc_call_combiner_set_notify_on_cancel(grpc_exec_ctx* exec_ctx, + grpc_call_combiner* call_combiner, + grpc_closure* closure) { + while (true) { + // Decode original state. + gpr_atm original_state = gpr_atm_acq_load(&call_combiner->cancel_state); + grpc_error* original_error = decode_cancel_state_error(original_state); + // If error is set, invoke the cancellation closure immediately. + // Otherwise, store the new closure. + if (original_error != GRPC_ERROR_NONE) { + if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { + gpr_log(GPR_DEBUG, + "call_combiner=%p: scheduling notify_on_cancel callback=%p " + "for pre-existing cancellation", + call_combiner, closure); + } + GRPC_CLOSURE_SCHED(exec_ctx, closure, GRPC_ERROR_REF(original_error)); + break; + } else { + if (gpr_atm_full_cas(&call_combiner->cancel_state, original_state, + (gpr_atm)closure)) { + if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { + gpr_log(GPR_DEBUG, "call_combiner=%p: setting notify_on_cancel=%p", + call_combiner, closure); + } + // If we replaced an earlier closure, invoke the original + // closure with GRPC_ERROR_NONE. This allows callers to clean + // up any resources they may be holding for the callback. + if (original_state != 0) { + closure = (grpc_closure*)original_state; + if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { + gpr_log(GPR_DEBUG, + "call_combiner=%p: scheduling old cancel callback=%p", + call_combiner, closure); + } + GRPC_CLOSURE_SCHED(exec_ctx, closure, GRPC_ERROR_NONE); + } + break; + } + } + // cas failed, try again. + } +} + +void grpc_call_combiner_cancel(grpc_exec_ctx* exec_ctx, + grpc_call_combiner* call_combiner, + grpc_error* error) { + while (true) { + gpr_atm original_state = gpr_atm_acq_load(&call_combiner->cancel_state); + grpc_error* original_error = decode_cancel_state_error(original_state); + if (original_error != GRPC_ERROR_NONE) { + GRPC_ERROR_UNREF(error); + break; + } + if (gpr_atm_full_cas(&call_combiner->cancel_state, original_state, + encode_cancel_state_error(error))) { + if (original_state != 0) { + grpc_closure* notify_on_cancel = (grpc_closure*)original_state; + if (GRPC_TRACER_ON(grpc_call_combiner_trace)) { + gpr_log(GPR_DEBUG, + "call_combiner=%p: scheduling notify_on_cancel callback=%p", + call_combiner, notify_on_cancel); + } + GRPC_CLOSURE_SCHED(exec_ctx, notify_on_cancel, GRPC_ERROR_REF(error)); + } + break; + } + // cas failed, try again. + } +} diff --git a/src/core/lib/iomgr/closure.c b/src/core/lib/iomgr/closure.c deleted file mode 100644 index 00edefc6ae..0000000000 --- a/src/core/lib/iomgr/closure.c +++ /dev/null @@ -1,219 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/closure.h" - -#include -#include -#include - -#include "src/core/lib/profiling/timers.h" - -#ifndef NDEBUG -grpc_tracer_flag grpc_trace_closure = GRPC_TRACER_INITIALIZER(false, "closure"); -#endif - -#ifndef NDEBUG -grpc_closure *grpc_closure_init(const char *file, int line, - grpc_closure *closure, grpc_iomgr_cb_func cb, - void *cb_arg, - grpc_closure_scheduler *scheduler) { -#else -grpc_closure *grpc_closure_init(grpc_closure *closure, grpc_iomgr_cb_func cb, - void *cb_arg, - grpc_closure_scheduler *scheduler) { -#endif - closure->cb = cb; - closure->cb_arg = cb_arg; - closure->scheduler = scheduler; -#ifndef NDEBUG - closure->scheduled = false; - closure->file_initiated = NULL; - closure->line_initiated = 0; - closure->run = false; - closure->file_created = file; - closure->line_created = line; -#endif - return closure; -} - -void grpc_closure_list_init(grpc_closure_list *closure_list) { - closure_list->head = closure_list->tail = NULL; -} - -bool grpc_closure_list_append(grpc_closure_list *closure_list, - grpc_closure *closure, grpc_error *error) { - if (closure == NULL) { - GRPC_ERROR_UNREF(error); - return false; - } - closure->error_data.error = error; - closure->next_data.next = NULL; - bool was_empty = (closure_list->head == NULL); - if (was_empty) { - closure_list->head = closure; - } else { - closure_list->tail->next_data.next = closure; - } - closure_list->tail = closure; - return was_empty; -} - -void grpc_closure_list_fail_all(grpc_closure_list *list, - grpc_error *forced_failure) { - for (grpc_closure *c = list->head; c != NULL; c = c->next_data.next) { - if (c->error_data.error == GRPC_ERROR_NONE) { - c->error_data.error = GRPC_ERROR_REF(forced_failure); - } - } - GRPC_ERROR_UNREF(forced_failure); -} - -bool grpc_closure_list_empty(grpc_closure_list closure_list) { - return closure_list.head == NULL; -} - -void grpc_closure_list_move(grpc_closure_list *src, grpc_closure_list *dst) { - if (src->head == NULL) { - return; - } - if (dst->head == NULL) { - *dst = *src; - } else { - dst->tail->next_data.next = src->head; - dst->tail = src->tail; - } - src->head = src->tail = NULL; -} - -typedef struct { - grpc_iomgr_cb_func cb; - void *cb_arg; - grpc_closure wrapper; -} wrapped_closure; - -static void closure_wrapper(grpc_exec_ctx *exec_ctx, void *arg, - grpc_error *error) { - wrapped_closure *wc = (wrapped_closure *)arg; - grpc_iomgr_cb_func cb = wc->cb; - void *cb_arg = wc->cb_arg; - gpr_free(wc); - cb(exec_ctx, cb_arg, error); -} - -#ifndef NDEBUG -grpc_closure *grpc_closure_create(const char *file, int line, - grpc_iomgr_cb_func cb, void *cb_arg, - grpc_closure_scheduler *scheduler) { -#else -grpc_closure *grpc_closure_create(grpc_iomgr_cb_func cb, void *cb_arg, - grpc_closure_scheduler *scheduler) { -#endif - wrapped_closure *wc = (wrapped_closure *)gpr_malloc(sizeof(*wc)); - wc->cb = cb; - wc->cb_arg = cb_arg; -#ifndef NDEBUG - grpc_closure_init(file, line, &wc->wrapper, closure_wrapper, wc, scheduler); -#else - grpc_closure_init(&wc->wrapper, closure_wrapper, wc, scheduler); -#endif - return &wc->wrapper; -} - -#ifndef NDEBUG -void grpc_closure_run(const char *file, int line, grpc_exec_ctx *exec_ctx, - grpc_closure *c, grpc_error *error) { -#else -void grpc_closure_run(grpc_exec_ctx *exec_ctx, grpc_closure *c, - grpc_error *error) { -#endif - GPR_TIMER_BEGIN("grpc_closure_run", 0); - if (c != NULL) { -#ifndef NDEBUG - c->file_initiated = file; - c->line_initiated = line; - c->run = true; -#endif - assert(c->cb); - c->scheduler->vtable->run(exec_ctx, c, error); - } else { - GRPC_ERROR_UNREF(error); - } - GPR_TIMER_END("grpc_closure_run", 0); -} - -#ifndef NDEBUG -void grpc_closure_sched(const char *file, int line, grpc_exec_ctx *exec_ctx, - grpc_closure *c, grpc_error *error) { -#else -void grpc_closure_sched(grpc_exec_ctx *exec_ctx, grpc_closure *c, - grpc_error *error) { -#endif - GPR_TIMER_BEGIN("grpc_closure_sched", 0); - if (c != NULL) { -#ifndef NDEBUG - if (c->scheduled) { - gpr_log(GPR_ERROR, - "Closure already scheduled. (closure: %p, created: [%s:%d], " - "previously scheduled at: [%s: %d] run?: %s", - c, c->file_created, c->line_created, c->file_initiated, - c->line_initiated, c->run ? "true" : "false"); - abort(); - } - c->scheduled = true; - c->file_initiated = file; - c->line_initiated = line; - c->run = false; -#endif - assert(c->cb); - c->scheduler->vtable->sched(exec_ctx, c, error); - } else { - GRPC_ERROR_UNREF(error); - } - GPR_TIMER_END("grpc_closure_sched", 0); -} - -#ifndef NDEBUG -void grpc_closure_list_sched(const char *file, int line, - grpc_exec_ctx *exec_ctx, grpc_closure_list *list) { -#else -void grpc_closure_list_sched(grpc_exec_ctx *exec_ctx, grpc_closure_list *list) { -#endif - grpc_closure *c = list->head; - while (c != NULL) { - grpc_closure *next = c->next_data.next; -#ifndef NDEBUG - if (c->scheduled) { - gpr_log(GPR_ERROR, - "Closure already scheduled. (closure: %p, created: [%s:%d], " - "previously scheduled at: [%s: %d] run?: %s", - c, c->file_created, c->line_created, c->file_initiated, - c->line_initiated, c->run ? "true" : "false"); - abort(); - } - c->scheduled = true; - c->file_initiated = file; - c->line_initiated = line; - c->run = false; -#endif - assert(c->cb); - c->scheduler->vtable->sched(exec_ctx, c, c->error_data.error); - c = next; - } - list->head = list->tail = NULL; -} diff --git a/src/core/lib/iomgr/closure.cc b/src/core/lib/iomgr/closure.cc new file mode 100644 index 0000000000..00edefc6ae --- /dev/null +++ b/src/core/lib/iomgr/closure.cc @@ -0,0 +1,219 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/closure.h" + +#include +#include +#include + +#include "src/core/lib/profiling/timers.h" + +#ifndef NDEBUG +grpc_tracer_flag grpc_trace_closure = GRPC_TRACER_INITIALIZER(false, "closure"); +#endif + +#ifndef NDEBUG +grpc_closure *grpc_closure_init(const char *file, int line, + grpc_closure *closure, grpc_iomgr_cb_func cb, + void *cb_arg, + grpc_closure_scheduler *scheduler) { +#else +grpc_closure *grpc_closure_init(grpc_closure *closure, grpc_iomgr_cb_func cb, + void *cb_arg, + grpc_closure_scheduler *scheduler) { +#endif + closure->cb = cb; + closure->cb_arg = cb_arg; + closure->scheduler = scheduler; +#ifndef NDEBUG + closure->scheduled = false; + closure->file_initiated = NULL; + closure->line_initiated = 0; + closure->run = false; + closure->file_created = file; + closure->line_created = line; +#endif + return closure; +} + +void grpc_closure_list_init(grpc_closure_list *closure_list) { + closure_list->head = closure_list->tail = NULL; +} + +bool grpc_closure_list_append(grpc_closure_list *closure_list, + grpc_closure *closure, grpc_error *error) { + if (closure == NULL) { + GRPC_ERROR_UNREF(error); + return false; + } + closure->error_data.error = error; + closure->next_data.next = NULL; + bool was_empty = (closure_list->head == NULL); + if (was_empty) { + closure_list->head = closure; + } else { + closure_list->tail->next_data.next = closure; + } + closure_list->tail = closure; + return was_empty; +} + +void grpc_closure_list_fail_all(grpc_closure_list *list, + grpc_error *forced_failure) { + for (grpc_closure *c = list->head; c != NULL; c = c->next_data.next) { + if (c->error_data.error == GRPC_ERROR_NONE) { + c->error_data.error = GRPC_ERROR_REF(forced_failure); + } + } + GRPC_ERROR_UNREF(forced_failure); +} + +bool grpc_closure_list_empty(grpc_closure_list closure_list) { + return closure_list.head == NULL; +} + +void grpc_closure_list_move(grpc_closure_list *src, grpc_closure_list *dst) { + if (src->head == NULL) { + return; + } + if (dst->head == NULL) { + *dst = *src; + } else { + dst->tail->next_data.next = src->head; + dst->tail = src->tail; + } + src->head = src->tail = NULL; +} + +typedef struct { + grpc_iomgr_cb_func cb; + void *cb_arg; + grpc_closure wrapper; +} wrapped_closure; + +static void closure_wrapper(grpc_exec_ctx *exec_ctx, void *arg, + grpc_error *error) { + wrapped_closure *wc = (wrapped_closure *)arg; + grpc_iomgr_cb_func cb = wc->cb; + void *cb_arg = wc->cb_arg; + gpr_free(wc); + cb(exec_ctx, cb_arg, error); +} + +#ifndef NDEBUG +grpc_closure *grpc_closure_create(const char *file, int line, + grpc_iomgr_cb_func cb, void *cb_arg, + grpc_closure_scheduler *scheduler) { +#else +grpc_closure *grpc_closure_create(grpc_iomgr_cb_func cb, void *cb_arg, + grpc_closure_scheduler *scheduler) { +#endif + wrapped_closure *wc = (wrapped_closure *)gpr_malloc(sizeof(*wc)); + wc->cb = cb; + wc->cb_arg = cb_arg; +#ifndef NDEBUG + grpc_closure_init(file, line, &wc->wrapper, closure_wrapper, wc, scheduler); +#else + grpc_closure_init(&wc->wrapper, closure_wrapper, wc, scheduler); +#endif + return &wc->wrapper; +} + +#ifndef NDEBUG +void grpc_closure_run(const char *file, int line, grpc_exec_ctx *exec_ctx, + grpc_closure *c, grpc_error *error) { +#else +void grpc_closure_run(grpc_exec_ctx *exec_ctx, grpc_closure *c, + grpc_error *error) { +#endif + GPR_TIMER_BEGIN("grpc_closure_run", 0); + if (c != NULL) { +#ifndef NDEBUG + c->file_initiated = file; + c->line_initiated = line; + c->run = true; +#endif + assert(c->cb); + c->scheduler->vtable->run(exec_ctx, c, error); + } else { + GRPC_ERROR_UNREF(error); + } + GPR_TIMER_END("grpc_closure_run", 0); +} + +#ifndef NDEBUG +void grpc_closure_sched(const char *file, int line, grpc_exec_ctx *exec_ctx, + grpc_closure *c, grpc_error *error) { +#else +void grpc_closure_sched(grpc_exec_ctx *exec_ctx, grpc_closure *c, + grpc_error *error) { +#endif + GPR_TIMER_BEGIN("grpc_closure_sched", 0); + if (c != NULL) { +#ifndef NDEBUG + if (c->scheduled) { + gpr_log(GPR_ERROR, + "Closure already scheduled. (closure: %p, created: [%s:%d], " + "previously scheduled at: [%s: %d] run?: %s", + c, c->file_created, c->line_created, c->file_initiated, + c->line_initiated, c->run ? "true" : "false"); + abort(); + } + c->scheduled = true; + c->file_initiated = file; + c->line_initiated = line; + c->run = false; +#endif + assert(c->cb); + c->scheduler->vtable->sched(exec_ctx, c, error); + } else { + GRPC_ERROR_UNREF(error); + } + GPR_TIMER_END("grpc_closure_sched", 0); +} + +#ifndef NDEBUG +void grpc_closure_list_sched(const char *file, int line, + grpc_exec_ctx *exec_ctx, grpc_closure_list *list) { +#else +void grpc_closure_list_sched(grpc_exec_ctx *exec_ctx, grpc_closure_list *list) { +#endif + grpc_closure *c = list->head; + while (c != NULL) { + grpc_closure *next = c->next_data.next; +#ifndef NDEBUG + if (c->scheduled) { + gpr_log(GPR_ERROR, + "Closure already scheduled. (closure: %p, created: [%s:%d], " + "previously scheduled at: [%s: %d] run?: %s", + c, c->file_created, c->line_created, c->file_initiated, + c->line_initiated, c->run ? "true" : "false"); + abort(); + } + c->scheduled = true; + c->file_initiated = file; + c->line_initiated = line; + c->run = false; +#endif + assert(c->cb); + c->scheduler->vtable->sched(exec_ctx, c, c->error_data.error); + c = next; + } + list->head = list->tail = NULL; +} diff --git a/src/core/lib/iomgr/combiner.c b/src/core/lib/iomgr/combiner.c deleted file mode 100644 index f899b25f10..0000000000 --- a/src/core/lib/iomgr/combiner.c +++ /dev/null @@ -1,370 +0,0 @@ -/* - * - * Copyright 2016 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/combiner.h" - -#include -#include - -#include -#include - -#include "src/core/lib/debug/stats.h" -#include "src/core/lib/iomgr/executor.h" -#include "src/core/lib/profiling/timers.h" - -grpc_tracer_flag grpc_combiner_trace = - GRPC_TRACER_INITIALIZER(false, "combiner"); - -#define GRPC_COMBINER_TRACE(fn) \ - do { \ - if (GRPC_TRACER_ON(grpc_combiner_trace)) { \ - fn; \ - } \ - } while (0) - -#define STATE_UNORPHANED 1 -#define STATE_ELEM_COUNT_LOW_BIT 2 - -struct grpc_combiner { - grpc_combiner *next_combiner_on_this_exec_ctx; - grpc_closure_scheduler scheduler; - grpc_closure_scheduler finally_scheduler; - gpr_mpscq queue; - // either: - // a pointer to the initiating exec ctx if that is the only exec_ctx that has - // ever queued to this combiner, or NULL. If this is non-null, it's not - // dereferencable (since the initiating exec_ctx may have gone out of scope) - gpr_atm initiating_exec_ctx_or_null; - // state is: - // lower bit - zero if orphaned (STATE_UNORPHANED) - // other bits - number of items queued on the lock (STATE_ELEM_COUNT_LOW_BIT) - gpr_atm state; - bool time_to_execute_final_list; - grpc_closure_list final_list; - grpc_closure offload; - gpr_refcount refs; -}; - -static void combiner_exec(grpc_exec_ctx *exec_ctx, grpc_closure *closure, - grpc_error *error); -static void combiner_finally_exec(grpc_exec_ctx *exec_ctx, - grpc_closure *closure, grpc_error *error); - -static const grpc_closure_scheduler_vtable scheduler = { - combiner_exec, combiner_exec, "combiner:immediately"}; -static const grpc_closure_scheduler_vtable finally_scheduler = { - combiner_finally_exec, combiner_finally_exec, "combiner:finally"}; - -static void offload(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error); - -grpc_combiner *grpc_combiner_create(void) { - grpc_combiner *lock = (grpc_combiner *)gpr_zalloc(sizeof(*lock)); - gpr_ref_init(&lock->refs, 1); - lock->scheduler.vtable = &scheduler; - lock->finally_scheduler.vtable = &finally_scheduler; - gpr_atm_no_barrier_store(&lock->state, STATE_UNORPHANED); - gpr_mpscq_init(&lock->queue); - grpc_closure_list_init(&lock->final_list); - GRPC_CLOSURE_INIT(&lock->offload, offload, lock, - grpc_executor_scheduler(GRPC_EXECUTOR_SHORT)); - GRPC_COMBINER_TRACE(gpr_log(GPR_DEBUG, "C:%p create", lock)); - return lock; -} - -static void really_destroy(grpc_exec_ctx *exec_ctx, grpc_combiner *lock) { - GRPC_COMBINER_TRACE(gpr_log(GPR_DEBUG, "C:%p really_destroy", lock)); - GPR_ASSERT(gpr_atm_no_barrier_load(&lock->state) == 0); - gpr_mpscq_destroy(&lock->queue); - gpr_free(lock); -} - -static void start_destroy(grpc_exec_ctx *exec_ctx, grpc_combiner *lock) { - gpr_atm old_state = gpr_atm_full_fetch_add(&lock->state, -STATE_UNORPHANED); - GRPC_COMBINER_TRACE(gpr_log( - GPR_DEBUG, "C:%p really_destroy old_state=%" PRIdPTR, lock, old_state)); - if (old_state == 1) { - really_destroy(exec_ctx, lock); - } -} - -#ifndef NDEBUG -#define GRPC_COMBINER_DEBUG_SPAM(op, delta) \ - if (GRPC_TRACER_ON(grpc_combiner_trace)) { \ - gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG, \ - "C:%p %s %" PRIdPTR " --> %" PRIdPTR " %s", lock, (op), \ - gpr_atm_no_barrier_load(&lock->refs.count), \ - gpr_atm_no_barrier_load(&lock->refs.count) + (delta), reason); \ - } -#else -#define GRPC_COMBINER_DEBUG_SPAM(op, delta) -#endif - -void grpc_combiner_unref(grpc_exec_ctx *exec_ctx, - grpc_combiner *lock GRPC_COMBINER_DEBUG_ARGS) { - GRPC_COMBINER_DEBUG_SPAM("UNREF", -1); - if (gpr_unref(&lock->refs)) { - start_destroy(exec_ctx, lock); - } -} - -grpc_combiner *grpc_combiner_ref(grpc_combiner *lock GRPC_COMBINER_DEBUG_ARGS) { - GRPC_COMBINER_DEBUG_SPAM(" REF", 1); - gpr_ref(&lock->refs); - return lock; -} - -static void push_last_on_exec_ctx(grpc_exec_ctx *exec_ctx, - grpc_combiner *lock) { - lock->next_combiner_on_this_exec_ctx = NULL; - if (exec_ctx->active_combiner == NULL) { - exec_ctx->active_combiner = exec_ctx->last_combiner = lock; - } else { - exec_ctx->last_combiner->next_combiner_on_this_exec_ctx = lock; - exec_ctx->last_combiner = lock; - } -} - -static void push_first_on_exec_ctx(grpc_exec_ctx *exec_ctx, - grpc_combiner *lock) { - lock->next_combiner_on_this_exec_ctx = exec_ctx->active_combiner; - exec_ctx->active_combiner = lock; - if (lock->next_combiner_on_this_exec_ctx == NULL) { - exec_ctx->last_combiner = lock; - } -} - -#define COMBINER_FROM_CLOSURE_SCHEDULER(closure, scheduler_name) \ - ((grpc_combiner *)(((char *)((closure)->scheduler)) - \ - offsetof(grpc_combiner, scheduler_name))) - -static void combiner_exec(grpc_exec_ctx *exec_ctx, grpc_closure *cl, - grpc_error *error) { - GRPC_STATS_INC_COMBINER_LOCKS_SCHEDULED_ITEMS(exec_ctx); - GPR_TIMER_BEGIN("combiner.execute", 0); - grpc_combiner *lock = COMBINER_FROM_CLOSURE_SCHEDULER(cl, scheduler); - gpr_atm last = gpr_atm_full_fetch_add(&lock->state, STATE_ELEM_COUNT_LOW_BIT); - GRPC_COMBINER_TRACE(gpr_log(GPR_DEBUG, - "C:%p grpc_combiner_execute c=%p last=%" PRIdPTR, - lock, cl, last)); - if (last == 1) { - GRPC_STATS_INC_COMBINER_LOCKS_INITIATED(exec_ctx); - gpr_atm_no_barrier_store(&lock->initiating_exec_ctx_or_null, - (gpr_atm)exec_ctx); - // first element on this list: add it to the list of combiner locks - // executing within this exec_ctx - push_last_on_exec_ctx(exec_ctx, lock); - } else { - // there may be a race with setting here: if that happens, we may delay - // offload for one or two actions, and that's fine - gpr_atm initiator = - gpr_atm_no_barrier_load(&lock->initiating_exec_ctx_or_null); - if (initiator != 0 && initiator != (gpr_atm)exec_ctx) { - gpr_atm_no_barrier_store(&lock->initiating_exec_ctx_or_null, 0); - } - } - GPR_ASSERT(last & STATE_UNORPHANED); // ensure lock has not been destroyed - assert(cl->cb); - cl->error_data.error = error; - gpr_mpscq_push(&lock->queue, &cl->next_data.atm_next); - GPR_TIMER_END("combiner.execute", 0); -} - -static void move_next(grpc_exec_ctx *exec_ctx) { - exec_ctx->active_combiner = - exec_ctx->active_combiner->next_combiner_on_this_exec_ctx; - if (exec_ctx->active_combiner == NULL) { - exec_ctx->last_combiner = NULL; - } -} - -static void offload(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) { - grpc_combiner *lock = (grpc_combiner *)arg; - push_last_on_exec_ctx(exec_ctx, lock); -} - -static void queue_offload(grpc_exec_ctx *exec_ctx, grpc_combiner *lock) { - GRPC_STATS_INC_COMBINER_LOCKS_OFFLOADED(exec_ctx); - move_next(exec_ctx); - GRPC_COMBINER_TRACE(gpr_log(GPR_DEBUG, "C:%p queue_offload", lock)); - GRPC_CLOSURE_SCHED(exec_ctx, &lock->offload, GRPC_ERROR_NONE); -} - -bool grpc_combiner_continue_exec_ctx(grpc_exec_ctx *exec_ctx) { - GPR_TIMER_BEGIN("combiner.continue_exec_ctx", 0); - grpc_combiner *lock = exec_ctx->active_combiner; - if (lock == NULL) { - GPR_TIMER_END("combiner.continue_exec_ctx", 0); - return false; - } - - bool contended = - gpr_atm_no_barrier_load(&lock->initiating_exec_ctx_or_null) == 0; - - GRPC_COMBINER_TRACE(gpr_log(GPR_DEBUG, - "C:%p grpc_combiner_continue_exec_ctx " - "contended=%d " - "exec_ctx_ready_to_finish=%d " - "time_to_execute_final_list=%d", - lock, contended, - grpc_exec_ctx_ready_to_finish(exec_ctx), - lock->time_to_execute_final_list)); - - if (contended && grpc_exec_ctx_ready_to_finish(exec_ctx) && - grpc_executor_is_threaded()) { - GPR_TIMER_MARK("offload_from_finished_exec_ctx", 0); - // this execution context wants to move on: schedule remaining work to be - // picked up on the executor - queue_offload(exec_ctx, lock); - GPR_TIMER_END("combiner.continue_exec_ctx", 0); - return true; - } - - if (!lock->time_to_execute_final_list || - // peek to see if something new has shown up, and execute that with - // priority - (gpr_atm_acq_load(&lock->state) >> 1) > 1) { - gpr_mpscq_node *n = gpr_mpscq_pop(&lock->queue); - GRPC_COMBINER_TRACE( - gpr_log(GPR_DEBUG, "C:%p maybe_finish_one n=%p", lock, n)); - if (n == NULL) { - // queue is in an inconsistent state: use this as a cue that we should - // go off and do something else for a while (and come back later) - GPR_TIMER_MARK("delay_busy", 0); - queue_offload(exec_ctx, lock); - GPR_TIMER_END("combiner.continue_exec_ctx", 0); - return true; - } - GPR_TIMER_BEGIN("combiner.exec1", 0); - grpc_closure *cl = (grpc_closure *)n; - grpc_error *cl_err = cl->error_data.error; -#ifndef NDEBUG - cl->scheduled = false; -#endif - cl->cb(exec_ctx, cl->cb_arg, cl_err); - GRPC_ERROR_UNREF(cl_err); - GPR_TIMER_END("combiner.exec1", 0); - } else { - grpc_closure *c = lock->final_list.head; - GPR_ASSERT(c != NULL); - grpc_closure_list_init(&lock->final_list); - int loops = 0; - while (c != NULL) { - GPR_TIMER_BEGIN("combiner.exec_1final", 0); - GRPC_COMBINER_TRACE( - gpr_log(GPR_DEBUG, "C:%p execute_final[%d] c=%p", lock, loops, c)); - grpc_closure *next = c->next_data.next; - grpc_error *error = c->error_data.error; -#ifndef NDEBUG - c->scheduled = false; -#endif - c->cb(exec_ctx, c->cb_arg, error); - GRPC_ERROR_UNREF(error); - c = next; - GPR_TIMER_END("combiner.exec_1final", 0); - } - } - - GPR_TIMER_MARK("unref", 0); - move_next(exec_ctx); - lock->time_to_execute_final_list = false; - gpr_atm old_state = - gpr_atm_full_fetch_add(&lock->state, -STATE_ELEM_COUNT_LOW_BIT); - GRPC_COMBINER_TRACE( - gpr_log(GPR_DEBUG, "C:%p finish old_state=%" PRIdPTR, lock, old_state)); -// Define a macro to ease readability of the following switch statement. -#define OLD_STATE_WAS(orphaned, elem_count) \ - (((orphaned) ? 0 : STATE_UNORPHANED) | \ - ((elem_count)*STATE_ELEM_COUNT_LOW_BIT)) - // Depending on what the previous state was, we need to perform different - // actions. - switch (old_state) { - default: - // we have multiple queued work items: just continue executing them - break; - case OLD_STATE_WAS(false, 2): - case OLD_STATE_WAS(true, 2): - // we're down to one queued item: if it's the final list we should do that - if (!grpc_closure_list_empty(lock->final_list)) { - lock->time_to_execute_final_list = true; - } - break; - case OLD_STATE_WAS(false, 1): - // had one count, one unorphaned --> unlocked unorphaned - GPR_TIMER_END("combiner.continue_exec_ctx", 0); - return true; - case OLD_STATE_WAS(true, 1): - // and one count, one orphaned --> unlocked and orphaned - really_destroy(exec_ctx, lock); - GPR_TIMER_END("combiner.continue_exec_ctx", 0); - return true; - case OLD_STATE_WAS(false, 0): - case OLD_STATE_WAS(true, 0): - // these values are illegal - representing an already unlocked or - // deleted lock - GPR_TIMER_END("combiner.continue_exec_ctx", 0); - GPR_UNREACHABLE_CODE(return true); - } - push_first_on_exec_ctx(exec_ctx, lock); - GPR_TIMER_END("combiner.continue_exec_ctx", 0); - return true; -} - -static void enqueue_finally(grpc_exec_ctx *exec_ctx, void *closure, - grpc_error *error); - -static void combiner_finally_exec(grpc_exec_ctx *exec_ctx, - grpc_closure *closure, grpc_error *error) { - GRPC_STATS_INC_COMBINER_LOCKS_SCHEDULED_FINAL_ITEMS(exec_ctx); - grpc_combiner *lock = - COMBINER_FROM_CLOSURE_SCHEDULER(closure, finally_scheduler); - GRPC_COMBINER_TRACE(gpr_log(GPR_DEBUG, - "C:%p grpc_combiner_execute_finally c=%p; ac=%p", - lock, closure, exec_ctx->active_combiner)); - GPR_TIMER_BEGIN("combiner.execute_finally", 0); - if (exec_ctx->active_combiner != lock) { - GPR_TIMER_MARK("slowpath", 0); - GRPC_CLOSURE_SCHED(exec_ctx, - GRPC_CLOSURE_CREATE(enqueue_finally, closure, - grpc_combiner_scheduler(lock)), - error); - GPR_TIMER_END("combiner.execute_finally", 0); - return; - } - - if (grpc_closure_list_empty(lock->final_list)) { - gpr_atm_full_fetch_add(&lock->state, STATE_ELEM_COUNT_LOW_BIT); - } - grpc_closure_list_append(&lock->final_list, closure, error); - GPR_TIMER_END("combiner.execute_finally", 0); -} - -static void enqueue_finally(grpc_exec_ctx *exec_ctx, void *closure, - grpc_error *error) { - combiner_finally_exec(exec_ctx, (grpc_closure *)closure, - GRPC_ERROR_REF(error)); -} - -grpc_closure_scheduler *grpc_combiner_scheduler(grpc_combiner *combiner) { - return &combiner->scheduler; -} - -grpc_closure_scheduler *grpc_combiner_finally_scheduler( - grpc_combiner *combiner) { - return &combiner->finally_scheduler; -} diff --git a/src/core/lib/iomgr/combiner.cc b/src/core/lib/iomgr/combiner.cc new file mode 100644 index 0000000000..f899b25f10 --- /dev/null +++ b/src/core/lib/iomgr/combiner.cc @@ -0,0 +1,370 @@ +/* + * + * Copyright 2016 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/combiner.h" + +#include +#include + +#include +#include + +#include "src/core/lib/debug/stats.h" +#include "src/core/lib/iomgr/executor.h" +#include "src/core/lib/profiling/timers.h" + +grpc_tracer_flag grpc_combiner_trace = + GRPC_TRACER_INITIALIZER(false, "combiner"); + +#define GRPC_COMBINER_TRACE(fn) \ + do { \ + if (GRPC_TRACER_ON(grpc_combiner_trace)) { \ + fn; \ + } \ + } while (0) + +#define STATE_UNORPHANED 1 +#define STATE_ELEM_COUNT_LOW_BIT 2 + +struct grpc_combiner { + grpc_combiner *next_combiner_on_this_exec_ctx; + grpc_closure_scheduler scheduler; + grpc_closure_scheduler finally_scheduler; + gpr_mpscq queue; + // either: + // a pointer to the initiating exec ctx if that is the only exec_ctx that has + // ever queued to this combiner, or NULL. If this is non-null, it's not + // dereferencable (since the initiating exec_ctx may have gone out of scope) + gpr_atm initiating_exec_ctx_or_null; + // state is: + // lower bit - zero if orphaned (STATE_UNORPHANED) + // other bits - number of items queued on the lock (STATE_ELEM_COUNT_LOW_BIT) + gpr_atm state; + bool time_to_execute_final_list; + grpc_closure_list final_list; + grpc_closure offload; + gpr_refcount refs; +}; + +static void combiner_exec(grpc_exec_ctx *exec_ctx, grpc_closure *closure, + grpc_error *error); +static void combiner_finally_exec(grpc_exec_ctx *exec_ctx, + grpc_closure *closure, grpc_error *error); + +static const grpc_closure_scheduler_vtable scheduler = { + combiner_exec, combiner_exec, "combiner:immediately"}; +static const grpc_closure_scheduler_vtable finally_scheduler = { + combiner_finally_exec, combiner_finally_exec, "combiner:finally"}; + +static void offload(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error); + +grpc_combiner *grpc_combiner_create(void) { + grpc_combiner *lock = (grpc_combiner *)gpr_zalloc(sizeof(*lock)); + gpr_ref_init(&lock->refs, 1); + lock->scheduler.vtable = &scheduler; + lock->finally_scheduler.vtable = &finally_scheduler; + gpr_atm_no_barrier_store(&lock->state, STATE_UNORPHANED); + gpr_mpscq_init(&lock->queue); + grpc_closure_list_init(&lock->final_list); + GRPC_CLOSURE_INIT(&lock->offload, offload, lock, + grpc_executor_scheduler(GRPC_EXECUTOR_SHORT)); + GRPC_COMBINER_TRACE(gpr_log(GPR_DEBUG, "C:%p create", lock)); + return lock; +} + +static void really_destroy(grpc_exec_ctx *exec_ctx, grpc_combiner *lock) { + GRPC_COMBINER_TRACE(gpr_log(GPR_DEBUG, "C:%p really_destroy", lock)); + GPR_ASSERT(gpr_atm_no_barrier_load(&lock->state) == 0); + gpr_mpscq_destroy(&lock->queue); + gpr_free(lock); +} + +static void start_destroy(grpc_exec_ctx *exec_ctx, grpc_combiner *lock) { + gpr_atm old_state = gpr_atm_full_fetch_add(&lock->state, -STATE_UNORPHANED); + GRPC_COMBINER_TRACE(gpr_log( + GPR_DEBUG, "C:%p really_destroy old_state=%" PRIdPTR, lock, old_state)); + if (old_state == 1) { + really_destroy(exec_ctx, lock); + } +} + +#ifndef NDEBUG +#define GRPC_COMBINER_DEBUG_SPAM(op, delta) \ + if (GRPC_TRACER_ON(grpc_combiner_trace)) { \ + gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG, \ + "C:%p %s %" PRIdPTR " --> %" PRIdPTR " %s", lock, (op), \ + gpr_atm_no_barrier_load(&lock->refs.count), \ + gpr_atm_no_barrier_load(&lock->refs.count) + (delta), reason); \ + } +#else +#define GRPC_COMBINER_DEBUG_SPAM(op, delta) +#endif + +void grpc_combiner_unref(grpc_exec_ctx *exec_ctx, + grpc_combiner *lock GRPC_COMBINER_DEBUG_ARGS) { + GRPC_COMBINER_DEBUG_SPAM("UNREF", -1); + if (gpr_unref(&lock->refs)) { + start_destroy(exec_ctx, lock); + } +} + +grpc_combiner *grpc_combiner_ref(grpc_combiner *lock GRPC_COMBINER_DEBUG_ARGS) { + GRPC_COMBINER_DEBUG_SPAM(" REF", 1); + gpr_ref(&lock->refs); + return lock; +} + +static void push_last_on_exec_ctx(grpc_exec_ctx *exec_ctx, + grpc_combiner *lock) { + lock->next_combiner_on_this_exec_ctx = NULL; + if (exec_ctx->active_combiner == NULL) { + exec_ctx->active_combiner = exec_ctx->last_combiner = lock; + } else { + exec_ctx->last_combiner->next_combiner_on_this_exec_ctx = lock; + exec_ctx->last_combiner = lock; + } +} + +static void push_first_on_exec_ctx(grpc_exec_ctx *exec_ctx, + grpc_combiner *lock) { + lock->next_combiner_on_this_exec_ctx = exec_ctx->active_combiner; + exec_ctx->active_combiner = lock; + if (lock->next_combiner_on_this_exec_ctx == NULL) { + exec_ctx->last_combiner = lock; + } +} + +#define COMBINER_FROM_CLOSURE_SCHEDULER(closure, scheduler_name) \ + ((grpc_combiner *)(((char *)((closure)->scheduler)) - \ + offsetof(grpc_combiner, scheduler_name))) + +static void combiner_exec(grpc_exec_ctx *exec_ctx, grpc_closure *cl, + grpc_error *error) { + GRPC_STATS_INC_COMBINER_LOCKS_SCHEDULED_ITEMS(exec_ctx); + GPR_TIMER_BEGIN("combiner.execute", 0); + grpc_combiner *lock = COMBINER_FROM_CLOSURE_SCHEDULER(cl, scheduler); + gpr_atm last = gpr_atm_full_fetch_add(&lock->state, STATE_ELEM_COUNT_LOW_BIT); + GRPC_COMBINER_TRACE(gpr_log(GPR_DEBUG, + "C:%p grpc_combiner_execute c=%p last=%" PRIdPTR, + lock, cl, last)); + if (last == 1) { + GRPC_STATS_INC_COMBINER_LOCKS_INITIATED(exec_ctx); + gpr_atm_no_barrier_store(&lock->initiating_exec_ctx_or_null, + (gpr_atm)exec_ctx); + // first element on this list: add it to the list of combiner locks + // executing within this exec_ctx + push_last_on_exec_ctx(exec_ctx, lock); + } else { + // there may be a race with setting here: if that happens, we may delay + // offload for one or two actions, and that's fine + gpr_atm initiator = + gpr_atm_no_barrier_load(&lock->initiating_exec_ctx_or_null); + if (initiator != 0 && initiator != (gpr_atm)exec_ctx) { + gpr_atm_no_barrier_store(&lock->initiating_exec_ctx_or_null, 0); + } + } + GPR_ASSERT(last & STATE_UNORPHANED); // ensure lock has not been destroyed + assert(cl->cb); + cl->error_data.error = error; + gpr_mpscq_push(&lock->queue, &cl->next_data.atm_next); + GPR_TIMER_END("combiner.execute", 0); +} + +static void move_next(grpc_exec_ctx *exec_ctx) { + exec_ctx->active_combiner = + exec_ctx->active_combiner->next_combiner_on_this_exec_ctx; + if (exec_ctx->active_combiner == NULL) { + exec_ctx->last_combiner = NULL; + } +} + +static void offload(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) { + grpc_combiner *lock = (grpc_combiner *)arg; + push_last_on_exec_ctx(exec_ctx, lock); +} + +static void queue_offload(grpc_exec_ctx *exec_ctx, grpc_combiner *lock) { + GRPC_STATS_INC_COMBINER_LOCKS_OFFLOADED(exec_ctx); + move_next(exec_ctx); + GRPC_COMBINER_TRACE(gpr_log(GPR_DEBUG, "C:%p queue_offload", lock)); + GRPC_CLOSURE_SCHED(exec_ctx, &lock->offload, GRPC_ERROR_NONE); +} + +bool grpc_combiner_continue_exec_ctx(grpc_exec_ctx *exec_ctx) { + GPR_TIMER_BEGIN("combiner.continue_exec_ctx", 0); + grpc_combiner *lock = exec_ctx->active_combiner; + if (lock == NULL) { + GPR_TIMER_END("combiner.continue_exec_ctx", 0); + return false; + } + + bool contended = + gpr_atm_no_barrier_load(&lock->initiating_exec_ctx_or_null) == 0; + + GRPC_COMBINER_TRACE(gpr_log(GPR_DEBUG, + "C:%p grpc_combiner_continue_exec_ctx " + "contended=%d " + "exec_ctx_ready_to_finish=%d " + "time_to_execute_final_list=%d", + lock, contended, + grpc_exec_ctx_ready_to_finish(exec_ctx), + lock->time_to_execute_final_list)); + + if (contended && grpc_exec_ctx_ready_to_finish(exec_ctx) && + grpc_executor_is_threaded()) { + GPR_TIMER_MARK("offload_from_finished_exec_ctx", 0); + // this execution context wants to move on: schedule remaining work to be + // picked up on the executor + queue_offload(exec_ctx, lock); + GPR_TIMER_END("combiner.continue_exec_ctx", 0); + return true; + } + + if (!lock->time_to_execute_final_list || + // peek to see if something new has shown up, and execute that with + // priority + (gpr_atm_acq_load(&lock->state) >> 1) > 1) { + gpr_mpscq_node *n = gpr_mpscq_pop(&lock->queue); + GRPC_COMBINER_TRACE( + gpr_log(GPR_DEBUG, "C:%p maybe_finish_one n=%p", lock, n)); + if (n == NULL) { + // queue is in an inconsistent state: use this as a cue that we should + // go off and do something else for a while (and come back later) + GPR_TIMER_MARK("delay_busy", 0); + queue_offload(exec_ctx, lock); + GPR_TIMER_END("combiner.continue_exec_ctx", 0); + return true; + } + GPR_TIMER_BEGIN("combiner.exec1", 0); + grpc_closure *cl = (grpc_closure *)n; + grpc_error *cl_err = cl->error_data.error; +#ifndef NDEBUG + cl->scheduled = false; +#endif + cl->cb(exec_ctx, cl->cb_arg, cl_err); + GRPC_ERROR_UNREF(cl_err); + GPR_TIMER_END("combiner.exec1", 0); + } else { + grpc_closure *c = lock->final_list.head; + GPR_ASSERT(c != NULL); + grpc_closure_list_init(&lock->final_list); + int loops = 0; + while (c != NULL) { + GPR_TIMER_BEGIN("combiner.exec_1final", 0); + GRPC_COMBINER_TRACE( + gpr_log(GPR_DEBUG, "C:%p execute_final[%d] c=%p", lock, loops, c)); + grpc_closure *next = c->next_data.next; + grpc_error *error = c->error_data.error; +#ifndef NDEBUG + c->scheduled = false; +#endif + c->cb(exec_ctx, c->cb_arg, error); + GRPC_ERROR_UNREF(error); + c = next; + GPR_TIMER_END("combiner.exec_1final", 0); + } + } + + GPR_TIMER_MARK("unref", 0); + move_next(exec_ctx); + lock->time_to_execute_final_list = false; + gpr_atm old_state = + gpr_atm_full_fetch_add(&lock->state, -STATE_ELEM_COUNT_LOW_BIT); + GRPC_COMBINER_TRACE( + gpr_log(GPR_DEBUG, "C:%p finish old_state=%" PRIdPTR, lock, old_state)); +// Define a macro to ease readability of the following switch statement. +#define OLD_STATE_WAS(orphaned, elem_count) \ + (((orphaned) ? 0 : STATE_UNORPHANED) | \ + ((elem_count)*STATE_ELEM_COUNT_LOW_BIT)) + // Depending on what the previous state was, we need to perform different + // actions. + switch (old_state) { + default: + // we have multiple queued work items: just continue executing them + break; + case OLD_STATE_WAS(false, 2): + case OLD_STATE_WAS(true, 2): + // we're down to one queued item: if it's the final list we should do that + if (!grpc_closure_list_empty(lock->final_list)) { + lock->time_to_execute_final_list = true; + } + break; + case OLD_STATE_WAS(false, 1): + // had one count, one unorphaned --> unlocked unorphaned + GPR_TIMER_END("combiner.continue_exec_ctx", 0); + return true; + case OLD_STATE_WAS(true, 1): + // and one count, one orphaned --> unlocked and orphaned + really_destroy(exec_ctx, lock); + GPR_TIMER_END("combiner.continue_exec_ctx", 0); + return true; + case OLD_STATE_WAS(false, 0): + case OLD_STATE_WAS(true, 0): + // these values are illegal - representing an already unlocked or + // deleted lock + GPR_TIMER_END("combiner.continue_exec_ctx", 0); + GPR_UNREACHABLE_CODE(return true); + } + push_first_on_exec_ctx(exec_ctx, lock); + GPR_TIMER_END("combiner.continue_exec_ctx", 0); + return true; +} + +static void enqueue_finally(grpc_exec_ctx *exec_ctx, void *closure, + grpc_error *error); + +static void combiner_finally_exec(grpc_exec_ctx *exec_ctx, + grpc_closure *closure, grpc_error *error) { + GRPC_STATS_INC_COMBINER_LOCKS_SCHEDULED_FINAL_ITEMS(exec_ctx); + grpc_combiner *lock = + COMBINER_FROM_CLOSURE_SCHEDULER(closure, finally_scheduler); + GRPC_COMBINER_TRACE(gpr_log(GPR_DEBUG, + "C:%p grpc_combiner_execute_finally c=%p; ac=%p", + lock, closure, exec_ctx->active_combiner)); + GPR_TIMER_BEGIN("combiner.execute_finally", 0); + if (exec_ctx->active_combiner != lock) { + GPR_TIMER_MARK("slowpath", 0); + GRPC_CLOSURE_SCHED(exec_ctx, + GRPC_CLOSURE_CREATE(enqueue_finally, closure, + grpc_combiner_scheduler(lock)), + error); + GPR_TIMER_END("combiner.execute_finally", 0); + return; + } + + if (grpc_closure_list_empty(lock->final_list)) { + gpr_atm_full_fetch_add(&lock->state, STATE_ELEM_COUNT_LOW_BIT); + } + grpc_closure_list_append(&lock->final_list, closure, error); + GPR_TIMER_END("combiner.execute_finally", 0); +} + +static void enqueue_finally(grpc_exec_ctx *exec_ctx, void *closure, + grpc_error *error) { + combiner_finally_exec(exec_ctx, (grpc_closure *)closure, + GRPC_ERROR_REF(error)); +} + +grpc_closure_scheduler *grpc_combiner_scheduler(grpc_combiner *combiner) { + return &combiner->scheduler; +} + +grpc_closure_scheduler *grpc_combiner_finally_scheduler( + grpc_combiner *combiner) { + return &combiner->finally_scheduler; +} diff --git a/src/core/lib/iomgr/endpoint.c b/src/core/lib/iomgr/endpoint.c deleted file mode 100644 index 37cce335ca..0000000000 --- a/src/core/lib/iomgr/endpoint.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/endpoint.h" - -void grpc_endpoint_read(grpc_exec_ctx* exec_ctx, grpc_endpoint* ep, - grpc_slice_buffer* slices, grpc_closure* cb) { - ep->vtable->read(exec_ctx, ep, slices, cb); -} - -void grpc_endpoint_write(grpc_exec_ctx* exec_ctx, grpc_endpoint* ep, - grpc_slice_buffer* slices, grpc_closure* cb) { - ep->vtable->write(exec_ctx, ep, slices, cb); -} - -void grpc_endpoint_add_to_pollset(grpc_exec_ctx* exec_ctx, grpc_endpoint* ep, - grpc_pollset* pollset) { - ep->vtable->add_to_pollset(exec_ctx, ep, pollset); -} - -void grpc_endpoint_add_to_pollset_set(grpc_exec_ctx* exec_ctx, - grpc_endpoint* ep, - grpc_pollset_set* pollset_set) { - ep->vtable->add_to_pollset_set(exec_ctx, ep, pollset_set); -} - -void grpc_endpoint_shutdown(grpc_exec_ctx* exec_ctx, grpc_endpoint* ep, - grpc_error* why) { - ep->vtable->shutdown(exec_ctx, ep, why); -} - -void grpc_endpoint_destroy(grpc_exec_ctx* exec_ctx, grpc_endpoint* ep) { - ep->vtable->destroy(exec_ctx, ep); -} - -char* grpc_endpoint_get_peer(grpc_endpoint* ep) { - return ep->vtable->get_peer(ep); -} - -int grpc_endpoint_get_fd(grpc_endpoint* ep) { return ep->vtable->get_fd(ep); } - -grpc_resource_user* grpc_endpoint_get_resource_user(grpc_endpoint* ep) { - return ep->vtable->get_resource_user(ep); -} diff --git a/src/core/lib/iomgr/endpoint.cc b/src/core/lib/iomgr/endpoint.cc new file mode 100644 index 0000000000..37cce335ca --- /dev/null +++ b/src/core/lib/iomgr/endpoint.cc @@ -0,0 +1,59 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/endpoint.h" + +void grpc_endpoint_read(grpc_exec_ctx* exec_ctx, grpc_endpoint* ep, + grpc_slice_buffer* slices, grpc_closure* cb) { + ep->vtable->read(exec_ctx, ep, slices, cb); +} + +void grpc_endpoint_write(grpc_exec_ctx* exec_ctx, grpc_endpoint* ep, + grpc_slice_buffer* slices, grpc_closure* cb) { + ep->vtable->write(exec_ctx, ep, slices, cb); +} + +void grpc_endpoint_add_to_pollset(grpc_exec_ctx* exec_ctx, grpc_endpoint* ep, + grpc_pollset* pollset) { + ep->vtable->add_to_pollset(exec_ctx, ep, pollset); +} + +void grpc_endpoint_add_to_pollset_set(grpc_exec_ctx* exec_ctx, + grpc_endpoint* ep, + grpc_pollset_set* pollset_set) { + ep->vtable->add_to_pollset_set(exec_ctx, ep, pollset_set); +} + +void grpc_endpoint_shutdown(grpc_exec_ctx* exec_ctx, grpc_endpoint* ep, + grpc_error* why) { + ep->vtable->shutdown(exec_ctx, ep, why); +} + +void grpc_endpoint_destroy(grpc_exec_ctx* exec_ctx, grpc_endpoint* ep) { + ep->vtable->destroy(exec_ctx, ep); +} + +char* grpc_endpoint_get_peer(grpc_endpoint* ep) { + return ep->vtable->get_peer(ep); +} + +int grpc_endpoint_get_fd(grpc_endpoint* ep) { return ep->vtable->get_fd(ep); } + +grpc_resource_user* grpc_endpoint_get_resource_user(grpc_endpoint* ep) { + return ep->vtable->get_resource_user(ep); +} diff --git a/src/core/lib/iomgr/endpoint_pair_posix.c b/src/core/lib/iomgr/endpoint_pair_posix.c deleted file mode 100644 index 3ade2148ba..0000000000 --- a/src/core/lib/iomgr/endpoint_pair_posix.c +++ /dev/null @@ -1,72 +0,0 @@ -/* - * - * Copyright 2016 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_POSIX_SOCKET - -#include "src/core/lib/iomgr/endpoint_pair.h" -#include "src/core/lib/iomgr/socket_utils_posix.h" -#include "src/core/lib/iomgr/unix_sockets_posix.h" - -#include -#include -#include -#include -#include - -#include -#include -#include -#include "src/core/lib/iomgr/tcp_posix.h" -#include "src/core/lib/support/string.h" - -static void create_sockets(int sv[2]) { - int flags; - grpc_create_socketpair_if_unix(sv); - flags = fcntl(sv[0], F_GETFL, 0); - GPR_ASSERT(fcntl(sv[0], F_SETFL, flags | O_NONBLOCK) == 0); - flags = fcntl(sv[1], F_GETFL, 0); - GPR_ASSERT(fcntl(sv[1], F_SETFL, flags | O_NONBLOCK) == 0); - GPR_ASSERT(grpc_set_socket_no_sigpipe_if_possible(sv[0]) == GRPC_ERROR_NONE); - GPR_ASSERT(grpc_set_socket_no_sigpipe_if_possible(sv[1]) == GRPC_ERROR_NONE); -} - -grpc_endpoint_pair grpc_iomgr_create_endpoint_pair(const char *name, - grpc_channel_args *args) { - int sv[2]; - grpc_endpoint_pair p; - char *final_name; - create_sockets(sv); - - grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; - - gpr_asprintf(&final_name, "%s:client", name); - p.client = grpc_tcp_create(&exec_ctx, grpc_fd_create(sv[1], final_name), args, - "socketpair-server"); - gpr_free(final_name); - gpr_asprintf(&final_name, "%s:server", name); - p.server = grpc_tcp_create(&exec_ctx, grpc_fd_create(sv[0], final_name), args, - "socketpair-client"); - gpr_free(final_name); - - grpc_exec_ctx_finish(&exec_ctx); - return p; -} - -#endif diff --git a/src/core/lib/iomgr/endpoint_pair_posix.cc b/src/core/lib/iomgr/endpoint_pair_posix.cc new file mode 100644 index 0000000000..3ade2148ba --- /dev/null +++ b/src/core/lib/iomgr/endpoint_pair_posix.cc @@ -0,0 +1,72 @@ +/* + * + * Copyright 2016 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_POSIX_SOCKET + +#include "src/core/lib/iomgr/endpoint_pair.h" +#include "src/core/lib/iomgr/socket_utils_posix.h" +#include "src/core/lib/iomgr/unix_sockets_posix.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include "src/core/lib/iomgr/tcp_posix.h" +#include "src/core/lib/support/string.h" + +static void create_sockets(int sv[2]) { + int flags; + grpc_create_socketpair_if_unix(sv); + flags = fcntl(sv[0], F_GETFL, 0); + GPR_ASSERT(fcntl(sv[0], F_SETFL, flags | O_NONBLOCK) == 0); + flags = fcntl(sv[1], F_GETFL, 0); + GPR_ASSERT(fcntl(sv[1], F_SETFL, flags | O_NONBLOCK) == 0); + GPR_ASSERT(grpc_set_socket_no_sigpipe_if_possible(sv[0]) == GRPC_ERROR_NONE); + GPR_ASSERT(grpc_set_socket_no_sigpipe_if_possible(sv[1]) == GRPC_ERROR_NONE); +} + +grpc_endpoint_pair grpc_iomgr_create_endpoint_pair(const char *name, + grpc_channel_args *args) { + int sv[2]; + grpc_endpoint_pair p; + char *final_name; + create_sockets(sv); + + grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; + + gpr_asprintf(&final_name, "%s:client", name); + p.client = grpc_tcp_create(&exec_ctx, grpc_fd_create(sv[1], final_name), args, + "socketpair-server"); + gpr_free(final_name); + gpr_asprintf(&final_name, "%s:server", name); + p.server = grpc_tcp_create(&exec_ctx, grpc_fd_create(sv[0], final_name), args, + "socketpair-client"); + gpr_free(final_name); + + grpc_exec_ctx_finish(&exec_ctx); + return p; +} + +#endif diff --git a/src/core/lib/iomgr/endpoint_pair_uv.c b/src/core/lib/iomgr/endpoint_pair_uv.c deleted file mode 100644 index ff72fe0492..0000000000 --- a/src/core/lib/iomgr/endpoint_pair_uv.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - * - * Copyright 2016 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_UV - -#include - -#include - -#include "src/core/lib/iomgr/endpoint_pair.h" - -grpc_endpoint_pair grpc_iomgr_create_endpoint_pair(const char *name, - grpc_channel_args *args) { - grpc_endpoint_pair endpoint_pair; - // TODO(mlumish): implement this properly under libuv - GPR_ASSERT(false && - "grpc_iomgr_create_endpoint_pair is not suppoted with libuv"); - return endpoint_pair; -} - -#endif /* GRPC_UV */ diff --git a/src/core/lib/iomgr/endpoint_pair_uv.cc b/src/core/lib/iomgr/endpoint_pair_uv.cc new file mode 100644 index 0000000000..ff72fe0492 --- /dev/null +++ b/src/core/lib/iomgr/endpoint_pair_uv.cc @@ -0,0 +1,38 @@ +/* + * + * Copyright 2016 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_UV + +#include + +#include + +#include "src/core/lib/iomgr/endpoint_pair.h" + +grpc_endpoint_pair grpc_iomgr_create_endpoint_pair(const char *name, + grpc_channel_args *args) { + grpc_endpoint_pair endpoint_pair; + // TODO(mlumish): implement this properly under libuv + GPR_ASSERT(false && + "grpc_iomgr_create_endpoint_pair is not suppoted with libuv"); + return endpoint_pair; +} + +#endif /* GRPC_UV */ diff --git a/src/core/lib/iomgr/endpoint_pair_windows.c b/src/core/lib/iomgr/endpoint_pair_windows.c deleted file mode 100644 index 782fa2fd69..0000000000 --- a/src/core/lib/iomgr/endpoint_pair_windows.c +++ /dev/null @@ -1,86 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_WINSOCK_SOCKET -#include "src/core/lib/iomgr/endpoint_pair.h" -#include "src/core/lib/iomgr/sockaddr_utils.h" - -#include -#include -#include - -#include -#include "src/core/lib/iomgr/socket_windows.h" -#include "src/core/lib/iomgr/tcp_windows.h" - -static void create_sockets(SOCKET sv[2]) { - SOCKET svr_sock = INVALID_SOCKET; - SOCKET lst_sock = INVALID_SOCKET; - SOCKET cli_sock = INVALID_SOCKET; - SOCKADDR_IN addr; - int addr_len = sizeof(addr); - - lst_sock = WSASocket(AF_INET, SOCK_STREAM, IPPROTO_TCP, NULL, 0, - WSA_FLAG_OVERLAPPED); - GPR_ASSERT(lst_sock != INVALID_SOCKET); - - memset(&addr, 0, sizeof(addr)); - addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); - addr.sin_family = AF_INET; - GPR_ASSERT(bind(lst_sock, (struct sockaddr *)&addr, sizeof(addr)) != - SOCKET_ERROR); - GPR_ASSERT(listen(lst_sock, SOMAXCONN) != SOCKET_ERROR); - GPR_ASSERT(getsockname(lst_sock, (struct sockaddr *)&addr, &addr_len) != - SOCKET_ERROR); - - cli_sock = WSASocket(AF_INET, SOCK_STREAM, IPPROTO_TCP, NULL, 0, - WSA_FLAG_OVERLAPPED); - GPR_ASSERT(cli_sock != INVALID_SOCKET); - - GPR_ASSERT(WSAConnect(cli_sock, (struct sockaddr *)&addr, addr_len, NULL, - NULL, NULL, NULL) == 0); - svr_sock = accept(lst_sock, (struct sockaddr *)&addr, &addr_len); - GPR_ASSERT(svr_sock != INVALID_SOCKET); - - closesocket(lst_sock); - grpc_tcp_prepare_socket(cli_sock); - grpc_tcp_prepare_socket(svr_sock); - - sv[1] = cli_sock; - sv[0] = svr_sock; -} - -grpc_endpoint_pair grpc_iomgr_create_endpoint_pair( - const char *name, grpc_channel_args *channel_args) { - SOCKET sv[2]; - grpc_endpoint_pair p; - create_sockets(sv); - grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; - p.client = grpc_tcp_create(&exec_ctx, - grpc_winsocket_create(sv[1], "endpoint:client"), - channel_args, "endpoint:server"); - p.server = grpc_tcp_create(&exec_ctx, - grpc_winsocket_create(sv[0], "endpoint:server"), - channel_args, "endpoint:client"); - grpc_exec_ctx_finish(&exec_ctx); - return p; -} - -#endif diff --git a/src/core/lib/iomgr/endpoint_pair_windows.cc b/src/core/lib/iomgr/endpoint_pair_windows.cc new file mode 100644 index 0000000000..782fa2fd69 --- /dev/null +++ b/src/core/lib/iomgr/endpoint_pair_windows.cc @@ -0,0 +1,86 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_WINSOCK_SOCKET +#include "src/core/lib/iomgr/endpoint_pair.h" +#include "src/core/lib/iomgr/sockaddr_utils.h" + +#include +#include +#include + +#include +#include "src/core/lib/iomgr/socket_windows.h" +#include "src/core/lib/iomgr/tcp_windows.h" + +static void create_sockets(SOCKET sv[2]) { + SOCKET svr_sock = INVALID_SOCKET; + SOCKET lst_sock = INVALID_SOCKET; + SOCKET cli_sock = INVALID_SOCKET; + SOCKADDR_IN addr; + int addr_len = sizeof(addr); + + lst_sock = WSASocket(AF_INET, SOCK_STREAM, IPPROTO_TCP, NULL, 0, + WSA_FLAG_OVERLAPPED); + GPR_ASSERT(lst_sock != INVALID_SOCKET); + + memset(&addr, 0, sizeof(addr)); + addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr.sin_family = AF_INET; + GPR_ASSERT(bind(lst_sock, (struct sockaddr *)&addr, sizeof(addr)) != + SOCKET_ERROR); + GPR_ASSERT(listen(lst_sock, SOMAXCONN) != SOCKET_ERROR); + GPR_ASSERT(getsockname(lst_sock, (struct sockaddr *)&addr, &addr_len) != + SOCKET_ERROR); + + cli_sock = WSASocket(AF_INET, SOCK_STREAM, IPPROTO_TCP, NULL, 0, + WSA_FLAG_OVERLAPPED); + GPR_ASSERT(cli_sock != INVALID_SOCKET); + + GPR_ASSERT(WSAConnect(cli_sock, (struct sockaddr *)&addr, addr_len, NULL, + NULL, NULL, NULL) == 0); + svr_sock = accept(lst_sock, (struct sockaddr *)&addr, &addr_len); + GPR_ASSERT(svr_sock != INVALID_SOCKET); + + closesocket(lst_sock); + grpc_tcp_prepare_socket(cli_sock); + grpc_tcp_prepare_socket(svr_sock); + + sv[1] = cli_sock; + sv[0] = svr_sock; +} + +grpc_endpoint_pair grpc_iomgr_create_endpoint_pair( + const char *name, grpc_channel_args *channel_args) { + SOCKET sv[2]; + grpc_endpoint_pair p; + create_sockets(sv); + grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; + p.client = grpc_tcp_create(&exec_ctx, + grpc_winsocket_create(sv[1], "endpoint:client"), + channel_args, "endpoint:server"); + p.server = grpc_tcp_create(&exec_ctx, + grpc_winsocket_create(sv[0], "endpoint:server"), + channel_args, "endpoint:client"); + grpc_exec_ctx_finish(&exec_ctx); + return p; +} + +#endif diff --git a/src/core/lib/iomgr/error.c b/src/core/lib/iomgr/error.c deleted file mode 100644 index aa05501537..0000000000 --- a/src/core/lib/iomgr/error.c +++ /dev/null @@ -1,801 +0,0 @@ -/* - * - * Copyright 2016 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/error.h" - -#include - -#include -#include -#include -#include -#include - -#ifdef GPR_WINDOWS -#include -#endif - -#include "src/core/lib/debug/trace.h" -#include "src/core/lib/iomgr/error_internal.h" -#include "src/core/lib/profiling/timers.h" -#include "src/core/lib/slice/slice_internal.h" - -#ifndef NDEBUG -grpc_tracer_flag grpc_trace_error_refcount = - GRPC_TRACER_INITIALIZER(false, "error_refcount"); -#endif - -static const char *error_int_name(grpc_error_ints key) { - switch (key) { - case GRPC_ERROR_INT_ERRNO: - return "errno"; - case GRPC_ERROR_INT_FILE_LINE: - return "file_line"; - case GRPC_ERROR_INT_STREAM_ID: - return "stream_id"; - case GRPC_ERROR_INT_GRPC_STATUS: - return "grpc_status"; - case GRPC_ERROR_INT_OFFSET: - return "offset"; - case GRPC_ERROR_INT_INDEX: - return "index"; - case GRPC_ERROR_INT_SIZE: - return "size"; - case GRPC_ERROR_INT_HTTP2_ERROR: - return "http2_error"; - case GRPC_ERROR_INT_TSI_CODE: - return "tsi_code"; - case GRPC_ERROR_INT_SECURITY_STATUS: - return "security_status"; - case GRPC_ERROR_INT_FD: - return "fd"; - case GRPC_ERROR_INT_WSA_ERROR: - return "wsa_error"; - case GRPC_ERROR_INT_HTTP_STATUS: - return "http_status"; - case GRPC_ERROR_INT_LIMIT: - return "limit"; - case GRPC_ERROR_INT_OCCURRED_DURING_WRITE: - return "occurred_during_write"; - case GRPC_ERROR_INT_MAX: - GPR_UNREACHABLE_CODE(return "unknown"); - } - GPR_UNREACHABLE_CODE(return "unknown"); -} - -static const char *error_str_name(grpc_error_strs key) { - switch (key) { - case GRPC_ERROR_STR_KEY: - return "key"; - case GRPC_ERROR_STR_VALUE: - return "value"; - case GRPC_ERROR_STR_DESCRIPTION: - return "description"; - case GRPC_ERROR_STR_OS_ERROR: - return "os_error"; - case GRPC_ERROR_STR_TARGET_ADDRESS: - return "target_address"; - case GRPC_ERROR_STR_SYSCALL: - return "syscall"; - case GRPC_ERROR_STR_FILE: - return "file"; - case GRPC_ERROR_STR_GRPC_MESSAGE: - return "grpc_message"; - case GRPC_ERROR_STR_RAW_BYTES: - return "raw_bytes"; - case GRPC_ERROR_STR_TSI_ERROR: - return "tsi_error"; - case GRPC_ERROR_STR_FILENAME: - return "filename"; - case GRPC_ERROR_STR_QUEUED_BUFFERS: - return "queued_buffers"; - case GRPC_ERROR_STR_MAX: - GPR_UNREACHABLE_CODE(return "unknown"); - } - GPR_UNREACHABLE_CODE(return "unknown"); -} - -static const char *error_time_name(grpc_error_times key) { - switch (key) { - case GRPC_ERROR_TIME_CREATED: - return "created"; - case GRPC_ERROR_TIME_MAX: - GPR_UNREACHABLE_CODE(return "unknown"); - } - GPR_UNREACHABLE_CODE(return "unknown"); -} - -bool grpc_error_is_special(grpc_error *err) { - return err == GRPC_ERROR_NONE || err == GRPC_ERROR_OOM || - err == GRPC_ERROR_CANCELLED; -} - -#ifndef NDEBUG -grpc_error *grpc_error_ref(grpc_error *err, const char *file, int line) { - if (grpc_error_is_special(err)) return err; - if (GRPC_TRACER_ON(grpc_trace_error_refcount)) { - gpr_log(GPR_DEBUG, "%p: %" PRIdPTR " -> %" PRIdPTR " [%s:%d]", err, - gpr_atm_no_barrier_load(&err->atomics.refs.count), - gpr_atm_no_barrier_load(&err->atomics.refs.count) + 1, file, line); - } - gpr_ref(&err->atomics.refs); - return err; -} -#else -grpc_error *grpc_error_ref(grpc_error *err) { - if (grpc_error_is_special(err)) return err; - gpr_ref(&err->atomics.refs); - return err; -} -#endif - -static void unref_errs(grpc_error *err) { - uint8_t slot = err->first_err; - while (slot != UINT8_MAX) { - grpc_linked_error *lerr = (grpc_linked_error *)(err->arena + slot); - GRPC_ERROR_UNREF(lerr->err); - GPR_ASSERT(err->last_err == slot ? lerr->next == UINT8_MAX - : lerr->next != UINT8_MAX); - slot = lerr->next; - } -} - -static void unref_slice(grpc_slice slice) { - grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; - grpc_slice_unref_internal(&exec_ctx, slice); - grpc_exec_ctx_finish(&exec_ctx); -} - -static void unref_strs(grpc_error *err) { - for (size_t which = 0; which < GRPC_ERROR_STR_MAX; ++which) { - uint8_t slot = err->strs[which]; - if (slot != UINT8_MAX) { - unref_slice(*(grpc_slice *)(err->arena + slot)); - } - } -} - -static void error_destroy(grpc_error *err) { - GPR_ASSERT(!grpc_error_is_special(err)); - unref_errs(err); - unref_strs(err); - gpr_free((void *)gpr_atm_acq_load(&err->atomics.error_string)); - gpr_free(err); -} - -#ifndef NDEBUG -void grpc_error_unref(grpc_error *err, const char *file, int line) { - if (grpc_error_is_special(err)) return; - if (GRPC_TRACER_ON(grpc_trace_error_refcount)) { - gpr_log(GPR_DEBUG, "%p: %" PRIdPTR " -> %" PRIdPTR " [%s:%d]", err, - gpr_atm_no_barrier_load(&err->atomics.refs.count), - gpr_atm_no_barrier_load(&err->atomics.refs.count) - 1, file, line); - } - if (gpr_unref(&err->atomics.refs)) { - error_destroy(err); - } -} -#else -void grpc_error_unref(grpc_error *err) { - if (grpc_error_is_special(err)) return; - if (gpr_unref(&err->atomics.refs)) { - error_destroy(err); - } -} -#endif - -static uint8_t get_placement(grpc_error **err, size_t size) { - GPR_ASSERT(*err); - uint8_t slots = (uint8_t)(size / sizeof(intptr_t)); - if ((*err)->arena_size + slots > (*err)->arena_capacity) { - (*err)->arena_capacity = - (uint8_t)GPR_MIN(UINT8_MAX - 1, (3 * (*err)->arena_capacity / 2)); - if ((*err)->arena_size + slots > (*err)->arena_capacity) { - return UINT8_MAX; - } -#ifndef NDEBUG - grpc_error *orig = *err; -#endif - *err = (grpc_error *)gpr_realloc( - *err, sizeof(grpc_error) + (*err)->arena_capacity * sizeof(intptr_t)); -#ifndef NDEBUG - if (GRPC_TRACER_ON(grpc_trace_error_refcount)) { - if (*err != orig) { - gpr_log(GPR_DEBUG, "realloc %p -> %p", orig, *err); - } - } -#endif - } - uint8_t placement = (*err)->arena_size; - (*err)->arena_size = (uint8_t)((*err)->arena_size + slots); - return placement; -} - -static void internal_set_int(grpc_error **err, grpc_error_ints which, - intptr_t value) { - uint8_t slot = (*err)->ints[which]; - if (slot == UINT8_MAX) { - slot = get_placement(err, sizeof(value)); - if (slot == UINT8_MAX) { - gpr_log(GPR_ERROR, "Error %p is full, dropping int {\"%s\":%" PRIiPTR "}", - *err, error_int_name(which), value); - return; - } - } - (*err)->ints[which] = slot; - (*err)->arena[slot] = value; -} - -static void internal_set_str(grpc_error **err, grpc_error_strs which, - grpc_slice value) { - uint8_t slot = (*err)->strs[which]; - if (slot == UINT8_MAX) { - slot = get_placement(err, sizeof(value)); - if (slot == UINT8_MAX) { - const char *str = grpc_slice_to_c_string(value); - gpr_log(GPR_ERROR, "Error %p is full, dropping string {\"%s\":\"%s\"}", - *err, error_str_name(which), str); - gpr_free((void *)str); - return; - } - } else { - unref_slice(*(grpc_slice *)((*err)->arena + slot)); - } - (*err)->strs[which] = slot; - memcpy((*err)->arena + slot, &value, sizeof(value)); -} - -static char *fmt_time(gpr_timespec tm); -static void internal_set_time(grpc_error **err, grpc_error_times which, - gpr_timespec value) { - uint8_t slot = (*err)->times[which]; - if (slot == UINT8_MAX) { - slot = get_placement(err, sizeof(value)); - if (slot == UINT8_MAX) { - const char *time_str = fmt_time(value); - gpr_log(GPR_ERROR, "Error %p is full, dropping \"%s\":\"%s\"}", *err, - error_time_name(which), time_str); - gpr_free((void *)time_str); - return; - } - } - (*err)->times[which] = slot; - memcpy((*err)->arena + slot, &value, sizeof(value)); -} - -static void internal_add_error(grpc_error **err, grpc_error *new_err) { - grpc_linked_error new_last = {new_err, UINT8_MAX}; - uint8_t slot = get_placement(err, sizeof(grpc_linked_error)); - if (slot == UINT8_MAX) { - gpr_log(GPR_ERROR, "Error %p is full, dropping error %p = %s", *err, - new_err, grpc_error_string(new_err)); - GRPC_ERROR_UNREF(new_err); - return; - } - if ((*err)->first_err == UINT8_MAX) { - GPR_ASSERT((*err)->last_err == UINT8_MAX); - (*err)->last_err = slot; - (*err)->first_err = slot; - } else { - GPR_ASSERT((*err)->last_err != UINT8_MAX); - grpc_linked_error *old_last = - (grpc_linked_error *)((*err)->arena + (*err)->last_err); - old_last->next = slot; - (*err)->last_err = slot; - } - memcpy((*err)->arena + slot, &new_last, sizeof(grpc_linked_error)); -} - -#define SLOTS_PER_INT (sizeof(intptr_t) / sizeof(intptr_t)) -#define SLOTS_PER_STR (sizeof(grpc_slice) / sizeof(intptr_t)) -#define SLOTS_PER_TIME (sizeof(gpr_timespec) / sizeof(intptr_t)) -#define SLOTS_PER_LINKED_ERROR (sizeof(grpc_linked_error) / sizeof(intptr_t)) - -// size of storing one int and two slices and a timespec. For line, desc, file, -// and time created -#define DEFAULT_ERROR_CAPACITY \ - (SLOTS_PER_INT + (SLOTS_PER_STR * 2) + SLOTS_PER_TIME) - -// It is very common to include and extra int and string in an error -#define SURPLUS_CAPACITY (2 * SLOTS_PER_INT + SLOTS_PER_TIME) - -grpc_error *grpc_error_create(const char *file, int line, grpc_slice desc, - grpc_error **referencing, - size_t num_referencing) { - GPR_TIMER_BEGIN("grpc_error_create", 0); - uint8_t initial_arena_capacity = (uint8_t)( - DEFAULT_ERROR_CAPACITY + - (uint8_t)(num_referencing * SLOTS_PER_LINKED_ERROR) + SURPLUS_CAPACITY); - grpc_error *err = (grpc_error *)gpr_malloc( - sizeof(*err) + initial_arena_capacity * sizeof(intptr_t)); - if (err == NULL) { // TODO(ctiller): make gpr_malloc return NULL - return GRPC_ERROR_OOM; - } -#ifndef NDEBUG - if (GRPC_TRACER_ON(grpc_trace_error_refcount)) { - gpr_log(GPR_DEBUG, "%p create [%s:%d]", err, file, line); - } -#endif - - err->arena_size = 0; - err->arena_capacity = initial_arena_capacity; - err->first_err = UINT8_MAX; - err->last_err = UINT8_MAX; - - memset(err->ints, UINT8_MAX, GRPC_ERROR_INT_MAX); - memset(err->strs, UINT8_MAX, GRPC_ERROR_STR_MAX); - memset(err->times, UINT8_MAX, GRPC_ERROR_TIME_MAX); - - internal_set_int(&err, GRPC_ERROR_INT_FILE_LINE, line); - internal_set_str(&err, GRPC_ERROR_STR_FILE, - grpc_slice_from_static_string(file)); - internal_set_str(&err, GRPC_ERROR_STR_DESCRIPTION, desc); - - for (size_t i = 0; i < num_referencing; ++i) { - if (referencing[i] == GRPC_ERROR_NONE) continue; - internal_add_error( - &err, - GRPC_ERROR_REF( - referencing[i])); // TODO(ncteisen), change ownership semantics - } - - internal_set_time(&err, GRPC_ERROR_TIME_CREATED, gpr_now(GPR_CLOCK_REALTIME)); - - gpr_atm_no_barrier_store(&err->atomics.error_string, 0); - gpr_ref_init(&err->atomics.refs, 1); - GPR_TIMER_END("grpc_error_create", 0); - return err; -} - -static void ref_strs(grpc_error *err) { - for (size_t i = 0; i < GRPC_ERROR_STR_MAX; ++i) { - uint8_t slot = err->strs[i]; - if (slot != UINT8_MAX) { - grpc_slice_ref_internal(*(grpc_slice *)(err->arena + slot)); - } - } -} - -static void ref_errs(grpc_error *err) { - uint8_t slot = err->first_err; - while (slot != UINT8_MAX) { - grpc_linked_error *lerr = (grpc_linked_error *)(err->arena + slot); - GRPC_ERROR_REF(lerr->err); - slot = lerr->next; - } -} - -static grpc_error *copy_error_and_unref(grpc_error *in) { - GPR_TIMER_BEGIN("copy_error_and_unref", 0); - grpc_error *out; - if (grpc_error_is_special(in)) { - out = GRPC_ERROR_CREATE_FROM_STATIC_STRING("unknown"); - if (in == GRPC_ERROR_NONE) { - internal_set_str(&out, GRPC_ERROR_STR_DESCRIPTION, - grpc_slice_from_static_string("no error")); - internal_set_int(&out, GRPC_ERROR_INT_GRPC_STATUS, GRPC_STATUS_OK); - } else if (in == GRPC_ERROR_OOM) { - internal_set_str(&out, GRPC_ERROR_STR_DESCRIPTION, - grpc_slice_from_static_string("oom")); - } else if (in == GRPC_ERROR_CANCELLED) { - internal_set_str(&out, GRPC_ERROR_STR_DESCRIPTION, - grpc_slice_from_static_string("cancelled")); - internal_set_int(&out, GRPC_ERROR_INT_GRPC_STATUS, GRPC_STATUS_CANCELLED); - } - } else if (gpr_ref_is_unique(&in->atomics.refs)) { - out = in; - } else { - uint8_t new_arena_capacity = in->arena_capacity; - // the returned err will be added to, so we ensure this is room to avoid - // unneeded allocations. - if (in->arena_capacity - in->arena_size < (uint8_t)SLOTS_PER_STR) { - new_arena_capacity = (uint8_t)(3 * new_arena_capacity / 2); - } - out = (grpc_error *)gpr_malloc(sizeof(*in) + - new_arena_capacity * sizeof(intptr_t)); -#ifndef NDEBUG - if (GRPC_TRACER_ON(grpc_trace_error_refcount)) { - gpr_log(GPR_DEBUG, "%p create copying %p", out, in); - } -#endif - // bulk memcpy of the rest of the struct. - size_t skip = sizeof(&out->atomics); - memcpy((void *)((uintptr_t)out + skip), (void *)((uintptr_t)in + skip), - sizeof(*in) + (in->arena_size * sizeof(intptr_t)) - skip); - // manually set the atomics and the new capacity - gpr_atm_no_barrier_store(&out->atomics.error_string, 0); - gpr_ref_init(&out->atomics.refs, 1); - out->arena_capacity = new_arena_capacity; - ref_strs(out); - ref_errs(out); - GRPC_ERROR_UNREF(in); - } - GPR_TIMER_END("copy_error_and_unref", 0); - return out; -} - -grpc_error *grpc_error_set_int(grpc_error *src, grpc_error_ints which, - intptr_t value) { - GPR_TIMER_BEGIN("grpc_error_set_int", 0); - grpc_error *new_err = copy_error_and_unref(src); - internal_set_int(&new_err, which, value); - GPR_TIMER_END("grpc_error_set_int", 0); - return new_err; -} - -typedef struct { - grpc_error *error; - grpc_status_code code; - const char *msg; -} special_error_status_map; -static special_error_status_map error_status_map[] = { - {GRPC_ERROR_NONE, GRPC_STATUS_OK, ""}, - {GRPC_ERROR_CANCELLED, GRPC_STATUS_CANCELLED, "Cancelled"}, - {GRPC_ERROR_OOM, GRPC_STATUS_RESOURCE_EXHAUSTED, "Out of memory"}, -}; - -bool grpc_error_get_int(grpc_error *err, grpc_error_ints which, intptr_t *p) { - GPR_TIMER_BEGIN("grpc_error_get_int", 0); - if (grpc_error_is_special(err)) { - if (which == GRPC_ERROR_INT_GRPC_STATUS) { - for (size_t i = 0; i < GPR_ARRAY_SIZE(error_status_map); i++) { - if (error_status_map[i].error == err) { - if (p != NULL) *p = error_status_map[i].code; - GPR_TIMER_END("grpc_error_get_int", 0); - return true; - } - } - } - GPR_TIMER_END("grpc_error_get_int", 0); - return false; - } - uint8_t slot = err->ints[which]; - if (slot != UINT8_MAX) { - if (p != NULL) *p = err->arena[slot]; - GPR_TIMER_END("grpc_error_get_int", 0); - return true; - } - GPR_TIMER_END("grpc_error_get_int", 0); - return false; -} - -grpc_error *grpc_error_set_str(grpc_error *src, grpc_error_strs which, - grpc_slice str) { - GPR_TIMER_BEGIN("grpc_error_set_str", 0); - grpc_error *new_err = copy_error_and_unref(src); - internal_set_str(&new_err, which, str); - GPR_TIMER_END("grpc_error_set_str", 0); - return new_err; -} - -bool grpc_error_get_str(grpc_error *err, grpc_error_strs which, - grpc_slice *str) { - if (grpc_error_is_special(err)) { - if (which == GRPC_ERROR_STR_GRPC_MESSAGE) { - for (size_t i = 0; i < GPR_ARRAY_SIZE(error_status_map); i++) { - if (error_status_map[i].error == err) { - *str = grpc_slice_from_static_string(error_status_map[i].msg); - return true; - } - } - } - return false; - } - uint8_t slot = err->strs[which]; - if (slot != UINT8_MAX) { - *str = *(grpc_slice *)(err->arena + slot); - return true; - } else { - return false; - } -} - -grpc_error *grpc_error_add_child(grpc_error *src, grpc_error *child) { - GPR_TIMER_BEGIN("grpc_error_add_child", 0); - grpc_error *new_err = copy_error_and_unref(src); - internal_add_error(&new_err, child); - GPR_TIMER_END("grpc_error_add_child", 0); - return new_err; -} - -static const char *no_error_string = "\"No Error\""; -static const char *oom_error_string = "\"Out of memory\""; -static const char *cancelled_error_string = "\"Cancelled\""; - -typedef struct { - char *key; - char *value; -} kv_pair; - -typedef struct { - kv_pair *kvs; - size_t num_kvs; - size_t cap_kvs; -} kv_pairs; - -static void append_chr(char c, char **s, size_t *sz, size_t *cap) { - if (*sz == *cap) { - *cap = GPR_MAX(8, 3 * *cap / 2); - *s = (char *)gpr_realloc(*s, *cap); - } - (*s)[(*sz)++] = c; -} - -static void append_str(const char *str, char **s, size_t *sz, size_t *cap) { - for (const char *c = str; *c; c++) { - append_chr(*c, s, sz, cap); - } -} - -static void append_esc_str(const uint8_t *str, size_t len, char **s, size_t *sz, - size_t *cap) { - static const char *hex = "0123456789abcdef"; - append_chr('"', s, sz, cap); - for (size_t i = 0; i < len; i++, str++) { - if (*str < 32 || *str >= 127) { - append_chr('\\', s, sz, cap); - switch (*str) { - case '\b': - append_chr('b', s, sz, cap); - break; - case '\f': - append_chr('f', s, sz, cap); - break; - case '\n': - append_chr('n', s, sz, cap); - break; - case '\r': - append_chr('r', s, sz, cap); - break; - case '\t': - append_chr('t', s, sz, cap); - break; - default: - append_chr('u', s, sz, cap); - append_chr('0', s, sz, cap); - append_chr('0', s, sz, cap); - append_chr(hex[*str >> 4], s, sz, cap); - append_chr(hex[*str & 0x0f], s, sz, cap); - break; - } - } else { - append_chr((char)*str, s, sz, cap); - } - } - append_chr('"', s, sz, cap); -} - -static void append_kv(kv_pairs *kvs, char *key, char *value) { - if (kvs->num_kvs == kvs->cap_kvs) { - kvs->cap_kvs = GPR_MAX(3 * kvs->cap_kvs / 2, 4); - kvs->kvs = - (kv_pair *)gpr_realloc(kvs->kvs, sizeof(*kvs->kvs) * kvs->cap_kvs); - } - kvs->kvs[kvs->num_kvs].key = key; - kvs->kvs[kvs->num_kvs].value = value; - kvs->num_kvs++; -} - -static char *key_int(grpc_error_ints which) { - return gpr_strdup(error_int_name(which)); -} - -static char *fmt_int(intptr_t p) { - char *s; - gpr_asprintf(&s, "%" PRIdPTR, p); - return s; -} - -static void collect_ints_kvs(grpc_error *err, kv_pairs *kvs) { - for (size_t which = 0; which < GRPC_ERROR_INT_MAX; ++which) { - uint8_t slot = err->ints[which]; - if (slot != UINT8_MAX) { - append_kv(kvs, key_int((grpc_error_ints)which), - fmt_int(err->arena[slot])); - } - } -} - -static char *key_str(grpc_error_strs which) { - return gpr_strdup(error_str_name(which)); -} - -static char *fmt_str(grpc_slice slice) { - char *s = NULL; - size_t sz = 0; - size_t cap = 0; - append_esc_str((const uint8_t *)GRPC_SLICE_START_PTR(slice), - GRPC_SLICE_LENGTH(slice), &s, &sz, &cap); - append_chr(0, &s, &sz, &cap); - return s; -} - -static void collect_strs_kvs(grpc_error *err, kv_pairs *kvs) { - for (size_t which = 0; which < GRPC_ERROR_STR_MAX; ++which) { - uint8_t slot = err->strs[which]; - if (slot != UINT8_MAX) { - append_kv(kvs, key_str((grpc_error_strs)which), - fmt_str(*(grpc_slice *)(err->arena + slot))); - } - } -} - -static char *key_time(grpc_error_times which) { - return gpr_strdup(error_time_name(which)); -} - -static char *fmt_time(gpr_timespec tm) { - char *out; - const char *pfx = "!!"; - switch (tm.clock_type) { - case GPR_CLOCK_MONOTONIC: - pfx = "@monotonic:"; - break; - case GPR_CLOCK_REALTIME: - pfx = "@"; - break; - case GPR_CLOCK_PRECISE: - pfx = "@precise:"; - break; - case GPR_TIMESPAN: - pfx = ""; - break; - } - gpr_asprintf(&out, "\"%s%" PRId64 ".%09d\"", pfx, tm.tv_sec, tm.tv_nsec); - return out; -} - -static void collect_times_kvs(grpc_error *err, kv_pairs *kvs) { - for (size_t which = 0; which < GRPC_ERROR_TIME_MAX; ++which) { - uint8_t slot = err->times[which]; - if (slot != UINT8_MAX) { - append_kv(kvs, key_time((grpc_error_times)which), - fmt_time(*(gpr_timespec *)(err->arena + slot))); - } - } -} - -static void add_errs(grpc_error *err, char **s, size_t *sz, size_t *cap) { - uint8_t slot = err->first_err; - bool first = true; - while (slot != UINT8_MAX) { - grpc_linked_error *lerr = (grpc_linked_error *)(err->arena + slot); - if (!first) append_chr(',', s, sz, cap); - first = false; - const char *e = grpc_error_string(lerr->err); - append_str(e, s, sz, cap); - GPR_ASSERT(err->last_err == slot ? lerr->next == UINT8_MAX - : lerr->next != UINT8_MAX); - slot = lerr->next; - } -} - -static char *errs_string(grpc_error *err) { - char *s = NULL; - size_t sz = 0; - size_t cap = 0; - append_chr('[', &s, &sz, &cap); - add_errs(err, &s, &sz, &cap); - append_chr(']', &s, &sz, &cap); - append_chr(0, &s, &sz, &cap); - return s; -} - -static int cmp_kvs(const void *a, const void *b) { - const kv_pair *ka = (const kv_pair *)a; - const kv_pair *kb = (const kv_pair *)b; - return strcmp(ka->key, kb->key); -} - -static char *finish_kvs(kv_pairs *kvs) { - char *s = NULL; - size_t sz = 0; - size_t cap = 0; - - append_chr('{', &s, &sz, &cap); - for (size_t i = 0; i < kvs->num_kvs; i++) { - if (i != 0) append_chr(',', &s, &sz, &cap); - append_esc_str((const uint8_t *)kvs->kvs[i].key, strlen(kvs->kvs[i].key), - &s, &sz, &cap); - gpr_free(kvs->kvs[i].key); - append_chr(':', &s, &sz, &cap); - append_str(kvs->kvs[i].value, &s, &sz, &cap); - gpr_free(kvs->kvs[i].value); - } - append_chr('}', &s, &sz, &cap); - append_chr(0, &s, &sz, &cap); - - gpr_free(kvs->kvs); - return s; -} - -const char *grpc_error_string(grpc_error *err) { - GPR_TIMER_BEGIN("grpc_error_string", 0); - if (err == GRPC_ERROR_NONE) return no_error_string; - if (err == GRPC_ERROR_OOM) return oom_error_string; - if (err == GRPC_ERROR_CANCELLED) return cancelled_error_string; - - void *p = (void *)gpr_atm_acq_load(&err->atomics.error_string); - if (p != NULL) { - GPR_TIMER_END("grpc_error_string", 0); - return (const char *)p; - } - - kv_pairs kvs; - memset(&kvs, 0, sizeof(kvs)); - - collect_ints_kvs(err, &kvs); - collect_strs_kvs(err, &kvs); - collect_times_kvs(err, &kvs); - if (err->first_err != UINT8_MAX) { - append_kv(&kvs, gpr_strdup("referenced_errors"), errs_string(err)); - } - - qsort(kvs.kvs, kvs.num_kvs, sizeof(kv_pair), cmp_kvs); - - char *out = finish_kvs(&kvs); - - if (!gpr_atm_rel_cas(&err->atomics.error_string, 0, (gpr_atm)out)) { - gpr_free(out); - out = (char *)gpr_atm_no_barrier_load(&err->atomics.error_string); - } - - GPR_TIMER_END("grpc_error_string", 0); - return out; -} - -grpc_error *grpc_os_error(const char *file, int line, int err, - const char *call_name) { - return grpc_error_set_str( - grpc_error_set_str( - grpc_error_set_int( - grpc_error_create(file, line, - grpc_slice_from_static_string("OS Error"), NULL, - 0), - GRPC_ERROR_INT_ERRNO, err), - GRPC_ERROR_STR_OS_ERROR, - grpc_slice_from_static_string(strerror(err))), - GRPC_ERROR_STR_SYSCALL, grpc_slice_from_copied_string(call_name)); -} - -#ifdef GPR_WINDOWS -grpc_error *grpc_wsa_error(const char *file, int line, int err, - const char *call_name) { - char *utf8_message = gpr_format_message(err); - grpc_error *error = grpc_error_set_str( - grpc_error_set_str( - grpc_error_set_int( - grpc_error_create(file, line, - grpc_slice_from_static_string("OS Error"), NULL, - 0), - GRPC_ERROR_INT_WSA_ERROR, err), - GRPC_ERROR_STR_OS_ERROR, grpc_slice_from_copied_string(utf8_message)), - GRPC_ERROR_STR_SYSCALL, grpc_slice_from_static_string(call_name)); - gpr_free(utf8_message); - return error; -} -#endif - -bool grpc_log_if_error(const char *what, grpc_error *error, const char *file, - int line) { - if (error == GRPC_ERROR_NONE) return true; - const char *msg = grpc_error_string(error); - gpr_log(file, line, GPR_LOG_SEVERITY_ERROR, "%s: %s", what, msg); - GRPC_ERROR_UNREF(error); - return false; -} diff --git a/src/core/lib/iomgr/error.cc b/src/core/lib/iomgr/error.cc new file mode 100644 index 0000000000..aa05501537 --- /dev/null +++ b/src/core/lib/iomgr/error.cc @@ -0,0 +1,801 @@ +/* + * + * Copyright 2016 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/error.h" + +#include + +#include +#include +#include +#include +#include + +#ifdef GPR_WINDOWS +#include +#endif + +#include "src/core/lib/debug/trace.h" +#include "src/core/lib/iomgr/error_internal.h" +#include "src/core/lib/profiling/timers.h" +#include "src/core/lib/slice/slice_internal.h" + +#ifndef NDEBUG +grpc_tracer_flag grpc_trace_error_refcount = + GRPC_TRACER_INITIALIZER(false, "error_refcount"); +#endif + +static const char *error_int_name(grpc_error_ints key) { + switch (key) { + case GRPC_ERROR_INT_ERRNO: + return "errno"; + case GRPC_ERROR_INT_FILE_LINE: + return "file_line"; + case GRPC_ERROR_INT_STREAM_ID: + return "stream_id"; + case GRPC_ERROR_INT_GRPC_STATUS: + return "grpc_status"; + case GRPC_ERROR_INT_OFFSET: + return "offset"; + case GRPC_ERROR_INT_INDEX: + return "index"; + case GRPC_ERROR_INT_SIZE: + return "size"; + case GRPC_ERROR_INT_HTTP2_ERROR: + return "http2_error"; + case GRPC_ERROR_INT_TSI_CODE: + return "tsi_code"; + case GRPC_ERROR_INT_SECURITY_STATUS: + return "security_status"; + case GRPC_ERROR_INT_FD: + return "fd"; + case GRPC_ERROR_INT_WSA_ERROR: + return "wsa_error"; + case GRPC_ERROR_INT_HTTP_STATUS: + return "http_status"; + case GRPC_ERROR_INT_LIMIT: + return "limit"; + case GRPC_ERROR_INT_OCCURRED_DURING_WRITE: + return "occurred_during_write"; + case GRPC_ERROR_INT_MAX: + GPR_UNREACHABLE_CODE(return "unknown"); + } + GPR_UNREACHABLE_CODE(return "unknown"); +} + +static const char *error_str_name(grpc_error_strs key) { + switch (key) { + case GRPC_ERROR_STR_KEY: + return "key"; + case GRPC_ERROR_STR_VALUE: + return "value"; + case GRPC_ERROR_STR_DESCRIPTION: + return "description"; + case GRPC_ERROR_STR_OS_ERROR: + return "os_error"; + case GRPC_ERROR_STR_TARGET_ADDRESS: + return "target_address"; + case GRPC_ERROR_STR_SYSCALL: + return "syscall"; + case GRPC_ERROR_STR_FILE: + return "file"; + case GRPC_ERROR_STR_GRPC_MESSAGE: + return "grpc_message"; + case GRPC_ERROR_STR_RAW_BYTES: + return "raw_bytes"; + case GRPC_ERROR_STR_TSI_ERROR: + return "tsi_error"; + case GRPC_ERROR_STR_FILENAME: + return "filename"; + case GRPC_ERROR_STR_QUEUED_BUFFERS: + return "queued_buffers"; + case GRPC_ERROR_STR_MAX: + GPR_UNREACHABLE_CODE(return "unknown"); + } + GPR_UNREACHABLE_CODE(return "unknown"); +} + +static const char *error_time_name(grpc_error_times key) { + switch (key) { + case GRPC_ERROR_TIME_CREATED: + return "created"; + case GRPC_ERROR_TIME_MAX: + GPR_UNREACHABLE_CODE(return "unknown"); + } + GPR_UNREACHABLE_CODE(return "unknown"); +} + +bool grpc_error_is_special(grpc_error *err) { + return err == GRPC_ERROR_NONE || err == GRPC_ERROR_OOM || + err == GRPC_ERROR_CANCELLED; +} + +#ifndef NDEBUG +grpc_error *grpc_error_ref(grpc_error *err, const char *file, int line) { + if (grpc_error_is_special(err)) return err; + if (GRPC_TRACER_ON(grpc_trace_error_refcount)) { + gpr_log(GPR_DEBUG, "%p: %" PRIdPTR " -> %" PRIdPTR " [%s:%d]", err, + gpr_atm_no_barrier_load(&err->atomics.refs.count), + gpr_atm_no_barrier_load(&err->atomics.refs.count) + 1, file, line); + } + gpr_ref(&err->atomics.refs); + return err; +} +#else +grpc_error *grpc_error_ref(grpc_error *err) { + if (grpc_error_is_special(err)) return err; + gpr_ref(&err->atomics.refs); + return err; +} +#endif + +static void unref_errs(grpc_error *err) { + uint8_t slot = err->first_err; + while (slot != UINT8_MAX) { + grpc_linked_error *lerr = (grpc_linked_error *)(err->arena + slot); + GRPC_ERROR_UNREF(lerr->err); + GPR_ASSERT(err->last_err == slot ? lerr->next == UINT8_MAX + : lerr->next != UINT8_MAX); + slot = lerr->next; + } +} + +static void unref_slice(grpc_slice slice) { + grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; + grpc_slice_unref_internal(&exec_ctx, slice); + grpc_exec_ctx_finish(&exec_ctx); +} + +static void unref_strs(grpc_error *err) { + for (size_t which = 0; which < GRPC_ERROR_STR_MAX; ++which) { + uint8_t slot = err->strs[which]; + if (slot != UINT8_MAX) { + unref_slice(*(grpc_slice *)(err->arena + slot)); + } + } +} + +static void error_destroy(grpc_error *err) { + GPR_ASSERT(!grpc_error_is_special(err)); + unref_errs(err); + unref_strs(err); + gpr_free((void *)gpr_atm_acq_load(&err->atomics.error_string)); + gpr_free(err); +} + +#ifndef NDEBUG +void grpc_error_unref(grpc_error *err, const char *file, int line) { + if (grpc_error_is_special(err)) return; + if (GRPC_TRACER_ON(grpc_trace_error_refcount)) { + gpr_log(GPR_DEBUG, "%p: %" PRIdPTR " -> %" PRIdPTR " [%s:%d]", err, + gpr_atm_no_barrier_load(&err->atomics.refs.count), + gpr_atm_no_barrier_load(&err->atomics.refs.count) - 1, file, line); + } + if (gpr_unref(&err->atomics.refs)) { + error_destroy(err); + } +} +#else +void grpc_error_unref(grpc_error *err) { + if (grpc_error_is_special(err)) return; + if (gpr_unref(&err->atomics.refs)) { + error_destroy(err); + } +} +#endif + +static uint8_t get_placement(grpc_error **err, size_t size) { + GPR_ASSERT(*err); + uint8_t slots = (uint8_t)(size / sizeof(intptr_t)); + if ((*err)->arena_size + slots > (*err)->arena_capacity) { + (*err)->arena_capacity = + (uint8_t)GPR_MIN(UINT8_MAX - 1, (3 * (*err)->arena_capacity / 2)); + if ((*err)->arena_size + slots > (*err)->arena_capacity) { + return UINT8_MAX; + } +#ifndef NDEBUG + grpc_error *orig = *err; +#endif + *err = (grpc_error *)gpr_realloc( + *err, sizeof(grpc_error) + (*err)->arena_capacity * sizeof(intptr_t)); +#ifndef NDEBUG + if (GRPC_TRACER_ON(grpc_trace_error_refcount)) { + if (*err != orig) { + gpr_log(GPR_DEBUG, "realloc %p -> %p", orig, *err); + } + } +#endif + } + uint8_t placement = (*err)->arena_size; + (*err)->arena_size = (uint8_t)((*err)->arena_size + slots); + return placement; +} + +static void internal_set_int(grpc_error **err, grpc_error_ints which, + intptr_t value) { + uint8_t slot = (*err)->ints[which]; + if (slot == UINT8_MAX) { + slot = get_placement(err, sizeof(value)); + if (slot == UINT8_MAX) { + gpr_log(GPR_ERROR, "Error %p is full, dropping int {\"%s\":%" PRIiPTR "}", + *err, error_int_name(which), value); + return; + } + } + (*err)->ints[which] = slot; + (*err)->arena[slot] = value; +} + +static void internal_set_str(grpc_error **err, grpc_error_strs which, + grpc_slice value) { + uint8_t slot = (*err)->strs[which]; + if (slot == UINT8_MAX) { + slot = get_placement(err, sizeof(value)); + if (slot == UINT8_MAX) { + const char *str = grpc_slice_to_c_string(value); + gpr_log(GPR_ERROR, "Error %p is full, dropping string {\"%s\":\"%s\"}", + *err, error_str_name(which), str); + gpr_free((void *)str); + return; + } + } else { + unref_slice(*(grpc_slice *)((*err)->arena + slot)); + } + (*err)->strs[which] = slot; + memcpy((*err)->arena + slot, &value, sizeof(value)); +} + +static char *fmt_time(gpr_timespec tm); +static void internal_set_time(grpc_error **err, grpc_error_times which, + gpr_timespec value) { + uint8_t slot = (*err)->times[which]; + if (slot == UINT8_MAX) { + slot = get_placement(err, sizeof(value)); + if (slot == UINT8_MAX) { + const char *time_str = fmt_time(value); + gpr_log(GPR_ERROR, "Error %p is full, dropping \"%s\":\"%s\"}", *err, + error_time_name(which), time_str); + gpr_free((void *)time_str); + return; + } + } + (*err)->times[which] = slot; + memcpy((*err)->arena + slot, &value, sizeof(value)); +} + +static void internal_add_error(grpc_error **err, grpc_error *new_err) { + grpc_linked_error new_last = {new_err, UINT8_MAX}; + uint8_t slot = get_placement(err, sizeof(grpc_linked_error)); + if (slot == UINT8_MAX) { + gpr_log(GPR_ERROR, "Error %p is full, dropping error %p = %s", *err, + new_err, grpc_error_string(new_err)); + GRPC_ERROR_UNREF(new_err); + return; + } + if ((*err)->first_err == UINT8_MAX) { + GPR_ASSERT((*err)->last_err == UINT8_MAX); + (*err)->last_err = slot; + (*err)->first_err = slot; + } else { + GPR_ASSERT((*err)->last_err != UINT8_MAX); + grpc_linked_error *old_last = + (grpc_linked_error *)((*err)->arena + (*err)->last_err); + old_last->next = slot; + (*err)->last_err = slot; + } + memcpy((*err)->arena + slot, &new_last, sizeof(grpc_linked_error)); +} + +#define SLOTS_PER_INT (sizeof(intptr_t) / sizeof(intptr_t)) +#define SLOTS_PER_STR (sizeof(grpc_slice) / sizeof(intptr_t)) +#define SLOTS_PER_TIME (sizeof(gpr_timespec) / sizeof(intptr_t)) +#define SLOTS_PER_LINKED_ERROR (sizeof(grpc_linked_error) / sizeof(intptr_t)) + +// size of storing one int and two slices and a timespec. For line, desc, file, +// and time created +#define DEFAULT_ERROR_CAPACITY \ + (SLOTS_PER_INT + (SLOTS_PER_STR * 2) + SLOTS_PER_TIME) + +// It is very common to include and extra int and string in an error +#define SURPLUS_CAPACITY (2 * SLOTS_PER_INT + SLOTS_PER_TIME) + +grpc_error *grpc_error_create(const char *file, int line, grpc_slice desc, + grpc_error **referencing, + size_t num_referencing) { + GPR_TIMER_BEGIN("grpc_error_create", 0); + uint8_t initial_arena_capacity = (uint8_t)( + DEFAULT_ERROR_CAPACITY + + (uint8_t)(num_referencing * SLOTS_PER_LINKED_ERROR) + SURPLUS_CAPACITY); + grpc_error *err = (grpc_error *)gpr_malloc( + sizeof(*err) + initial_arena_capacity * sizeof(intptr_t)); + if (err == NULL) { // TODO(ctiller): make gpr_malloc return NULL + return GRPC_ERROR_OOM; + } +#ifndef NDEBUG + if (GRPC_TRACER_ON(grpc_trace_error_refcount)) { + gpr_log(GPR_DEBUG, "%p create [%s:%d]", err, file, line); + } +#endif + + err->arena_size = 0; + err->arena_capacity = initial_arena_capacity; + err->first_err = UINT8_MAX; + err->last_err = UINT8_MAX; + + memset(err->ints, UINT8_MAX, GRPC_ERROR_INT_MAX); + memset(err->strs, UINT8_MAX, GRPC_ERROR_STR_MAX); + memset(err->times, UINT8_MAX, GRPC_ERROR_TIME_MAX); + + internal_set_int(&err, GRPC_ERROR_INT_FILE_LINE, line); + internal_set_str(&err, GRPC_ERROR_STR_FILE, + grpc_slice_from_static_string(file)); + internal_set_str(&err, GRPC_ERROR_STR_DESCRIPTION, desc); + + for (size_t i = 0; i < num_referencing; ++i) { + if (referencing[i] == GRPC_ERROR_NONE) continue; + internal_add_error( + &err, + GRPC_ERROR_REF( + referencing[i])); // TODO(ncteisen), change ownership semantics + } + + internal_set_time(&err, GRPC_ERROR_TIME_CREATED, gpr_now(GPR_CLOCK_REALTIME)); + + gpr_atm_no_barrier_store(&err->atomics.error_string, 0); + gpr_ref_init(&err->atomics.refs, 1); + GPR_TIMER_END("grpc_error_create", 0); + return err; +} + +static void ref_strs(grpc_error *err) { + for (size_t i = 0; i < GRPC_ERROR_STR_MAX; ++i) { + uint8_t slot = err->strs[i]; + if (slot != UINT8_MAX) { + grpc_slice_ref_internal(*(grpc_slice *)(err->arena + slot)); + } + } +} + +static void ref_errs(grpc_error *err) { + uint8_t slot = err->first_err; + while (slot != UINT8_MAX) { + grpc_linked_error *lerr = (grpc_linked_error *)(err->arena + slot); + GRPC_ERROR_REF(lerr->err); + slot = lerr->next; + } +} + +static grpc_error *copy_error_and_unref(grpc_error *in) { + GPR_TIMER_BEGIN("copy_error_and_unref", 0); + grpc_error *out; + if (grpc_error_is_special(in)) { + out = GRPC_ERROR_CREATE_FROM_STATIC_STRING("unknown"); + if (in == GRPC_ERROR_NONE) { + internal_set_str(&out, GRPC_ERROR_STR_DESCRIPTION, + grpc_slice_from_static_string("no error")); + internal_set_int(&out, GRPC_ERROR_INT_GRPC_STATUS, GRPC_STATUS_OK); + } else if (in == GRPC_ERROR_OOM) { + internal_set_str(&out, GRPC_ERROR_STR_DESCRIPTION, + grpc_slice_from_static_string("oom")); + } else if (in == GRPC_ERROR_CANCELLED) { + internal_set_str(&out, GRPC_ERROR_STR_DESCRIPTION, + grpc_slice_from_static_string("cancelled")); + internal_set_int(&out, GRPC_ERROR_INT_GRPC_STATUS, GRPC_STATUS_CANCELLED); + } + } else if (gpr_ref_is_unique(&in->atomics.refs)) { + out = in; + } else { + uint8_t new_arena_capacity = in->arena_capacity; + // the returned err will be added to, so we ensure this is room to avoid + // unneeded allocations. + if (in->arena_capacity - in->arena_size < (uint8_t)SLOTS_PER_STR) { + new_arena_capacity = (uint8_t)(3 * new_arena_capacity / 2); + } + out = (grpc_error *)gpr_malloc(sizeof(*in) + + new_arena_capacity * sizeof(intptr_t)); +#ifndef NDEBUG + if (GRPC_TRACER_ON(grpc_trace_error_refcount)) { + gpr_log(GPR_DEBUG, "%p create copying %p", out, in); + } +#endif + // bulk memcpy of the rest of the struct. + size_t skip = sizeof(&out->atomics); + memcpy((void *)((uintptr_t)out + skip), (void *)((uintptr_t)in + skip), + sizeof(*in) + (in->arena_size * sizeof(intptr_t)) - skip); + // manually set the atomics and the new capacity + gpr_atm_no_barrier_store(&out->atomics.error_string, 0); + gpr_ref_init(&out->atomics.refs, 1); + out->arena_capacity = new_arena_capacity; + ref_strs(out); + ref_errs(out); + GRPC_ERROR_UNREF(in); + } + GPR_TIMER_END("copy_error_and_unref", 0); + return out; +} + +grpc_error *grpc_error_set_int(grpc_error *src, grpc_error_ints which, + intptr_t value) { + GPR_TIMER_BEGIN("grpc_error_set_int", 0); + grpc_error *new_err = copy_error_and_unref(src); + internal_set_int(&new_err, which, value); + GPR_TIMER_END("grpc_error_set_int", 0); + return new_err; +} + +typedef struct { + grpc_error *error; + grpc_status_code code; + const char *msg; +} special_error_status_map; +static special_error_status_map error_status_map[] = { + {GRPC_ERROR_NONE, GRPC_STATUS_OK, ""}, + {GRPC_ERROR_CANCELLED, GRPC_STATUS_CANCELLED, "Cancelled"}, + {GRPC_ERROR_OOM, GRPC_STATUS_RESOURCE_EXHAUSTED, "Out of memory"}, +}; + +bool grpc_error_get_int(grpc_error *err, grpc_error_ints which, intptr_t *p) { + GPR_TIMER_BEGIN("grpc_error_get_int", 0); + if (grpc_error_is_special(err)) { + if (which == GRPC_ERROR_INT_GRPC_STATUS) { + for (size_t i = 0; i < GPR_ARRAY_SIZE(error_status_map); i++) { + if (error_status_map[i].error == err) { + if (p != NULL) *p = error_status_map[i].code; + GPR_TIMER_END("grpc_error_get_int", 0); + return true; + } + } + } + GPR_TIMER_END("grpc_error_get_int", 0); + return false; + } + uint8_t slot = err->ints[which]; + if (slot != UINT8_MAX) { + if (p != NULL) *p = err->arena[slot]; + GPR_TIMER_END("grpc_error_get_int", 0); + return true; + } + GPR_TIMER_END("grpc_error_get_int", 0); + return false; +} + +grpc_error *grpc_error_set_str(grpc_error *src, grpc_error_strs which, + grpc_slice str) { + GPR_TIMER_BEGIN("grpc_error_set_str", 0); + grpc_error *new_err = copy_error_and_unref(src); + internal_set_str(&new_err, which, str); + GPR_TIMER_END("grpc_error_set_str", 0); + return new_err; +} + +bool grpc_error_get_str(grpc_error *err, grpc_error_strs which, + grpc_slice *str) { + if (grpc_error_is_special(err)) { + if (which == GRPC_ERROR_STR_GRPC_MESSAGE) { + for (size_t i = 0; i < GPR_ARRAY_SIZE(error_status_map); i++) { + if (error_status_map[i].error == err) { + *str = grpc_slice_from_static_string(error_status_map[i].msg); + return true; + } + } + } + return false; + } + uint8_t slot = err->strs[which]; + if (slot != UINT8_MAX) { + *str = *(grpc_slice *)(err->arena + slot); + return true; + } else { + return false; + } +} + +grpc_error *grpc_error_add_child(grpc_error *src, grpc_error *child) { + GPR_TIMER_BEGIN("grpc_error_add_child", 0); + grpc_error *new_err = copy_error_and_unref(src); + internal_add_error(&new_err, child); + GPR_TIMER_END("grpc_error_add_child", 0); + return new_err; +} + +static const char *no_error_string = "\"No Error\""; +static const char *oom_error_string = "\"Out of memory\""; +static const char *cancelled_error_string = "\"Cancelled\""; + +typedef struct { + char *key; + char *value; +} kv_pair; + +typedef struct { + kv_pair *kvs; + size_t num_kvs; + size_t cap_kvs; +} kv_pairs; + +static void append_chr(char c, char **s, size_t *sz, size_t *cap) { + if (*sz == *cap) { + *cap = GPR_MAX(8, 3 * *cap / 2); + *s = (char *)gpr_realloc(*s, *cap); + } + (*s)[(*sz)++] = c; +} + +static void append_str(const char *str, char **s, size_t *sz, size_t *cap) { + for (const char *c = str; *c; c++) { + append_chr(*c, s, sz, cap); + } +} + +static void append_esc_str(const uint8_t *str, size_t len, char **s, size_t *sz, + size_t *cap) { + static const char *hex = "0123456789abcdef"; + append_chr('"', s, sz, cap); + for (size_t i = 0; i < len; i++, str++) { + if (*str < 32 || *str >= 127) { + append_chr('\\', s, sz, cap); + switch (*str) { + case '\b': + append_chr('b', s, sz, cap); + break; + case '\f': + append_chr('f', s, sz, cap); + break; + case '\n': + append_chr('n', s, sz, cap); + break; + case '\r': + append_chr('r', s, sz, cap); + break; + case '\t': + append_chr('t', s, sz, cap); + break; + default: + append_chr('u', s, sz, cap); + append_chr('0', s, sz, cap); + append_chr('0', s, sz, cap); + append_chr(hex[*str >> 4], s, sz, cap); + append_chr(hex[*str & 0x0f], s, sz, cap); + break; + } + } else { + append_chr((char)*str, s, sz, cap); + } + } + append_chr('"', s, sz, cap); +} + +static void append_kv(kv_pairs *kvs, char *key, char *value) { + if (kvs->num_kvs == kvs->cap_kvs) { + kvs->cap_kvs = GPR_MAX(3 * kvs->cap_kvs / 2, 4); + kvs->kvs = + (kv_pair *)gpr_realloc(kvs->kvs, sizeof(*kvs->kvs) * kvs->cap_kvs); + } + kvs->kvs[kvs->num_kvs].key = key; + kvs->kvs[kvs->num_kvs].value = value; + kvs->num_kvs++; +} + +static char *key_int(grpc_error_ints which) { + return gpr_strdup(error_int_name(which)); +} + +static char *fmt_int(intptr_t p) { + char *s; + gpr_asprintf(&s, "%" PRIdPTR, p); + return s; +} + +static void collect_ints_kvs(grpc_error *err, kv_pairs *kvs) { + for (size_t which = 0; which < GRPC_ERROR_INT_MAX; ++which) { + uint8_t slot = err->ints[which]; + if (slot != UINT8_MAX) { + append_kv(kvs, key_int((grpc_error_ints)which), + fmt_int(err->arena[slot])); + } + } +} + +static char *key_str(grpc_error_strs which) { + return gpr_strdup(error_str_name(which)); +} + +static char *fmt_str(grpc_slice slice) { + char *s = NULL; + size_t sz = 0; + size_t cap = 0; + append_esc_str((const uint8_t *)GRPC_SLICE_START_PTR(slice), + GRPC_SLICE_LENGTH(slice), &s, &sz, &cap); + append_chr(0, &s, &sz, &cap); + return s; +} + +static void collect_strs_kvs(grpc_error *err, kv_pairs *kvs) { + for (size_t which = 0; which < GRPC_ERROR_STR_MAX; ++which) { + uint8_t slot = err->strs[which]; + if (slot != UINT8_MAX) { + append_kv(kvs, key_str((grpc_error_strs)which), + fmt_str(*(grpc_slice *)(err->arena + slot))); + } + } +} + +static char *key_time(grpc_error_times which) { + return gpr_strdup(error_time_name(which)); +} + +static char *fmt_time(gpr_timespec tm) { + char *out; + const char *pfx = "!!"; + switch (tm.clock_type) { + case GPR_CLOCK_MONOTONIC: + pfx = "@monotonic:"; + break; + case GPR_CLOCK_REALTIME: + pfx = "@"; + break; + case GPR_CLOCK_PRECISE: + pfx = "@precise:"; + break; + case GPR_TIMESPAN: + pfx = ""; + break; + } + gpr_asprintf(&out, "\"%s%" PRId64 ".%09d\"", pfx, tm.tv_sec, tm.tv_nsec); + return out; +} + +static void collect_times_kvs(grpc_error *err, kv_pairs *kvs) { + for (size_t which = 0; which < GRPC_ERROR_TIME_MAX; ++which) { + uint8_t slot = err->times[which]; + if (slot != UINT8_MAX) { + append_kv(kvs, key_time((grpc_error_times)which), + fmt_time(*(gpr_timespec *)(err->arena + slot))); + } + } +} + +static void add_errs(grpc_error *err, char **s, size_t *sz, size_t *cap) { + uint8_t slot = err->first_err; + bool first = true; + while (slot != UINT8_MAX) { + grpc_linked_error *lerr = (grpc_linked_error *)(err->arena + slot); + if (!first) append_chr(',', s, sz, cap); + first = false; + const char *e = grpc_error_string(lerr->err); + append_str(e, s, sz, cap); + GPR_ASSERT(err->last_err == slot ? lerr->next == UINT8_MAX + : lerr->next != UINT8_MAX); + slot = lerr->next; + } +} + +static char *errs_string(grpc_error *err) { + char *s = NULL; + size_t sz = 0; + size_t cap = 0; + append_chr('[', &s, &sz, &cap); + add_errs(err, &s, &sz, &cap); + append_chr(']', &s, &sz, &cap); + append_chr(0, &s, &sz, &cap); + return s; +} + +static int cmp_kvs(const void *a, const void *b) { + const kv_pair *ka = (const kv_pair *)a; + const kv_pair *kb = (const kv_pair *)b; + return strcmp(ka->key, kb->key); +} + +static char *finish_kvs(kv_pairs *kvs) { + char *s = NULL; + size_t sz = 0; + size_t cap = 0; + + append_chr('{', &s, &sz, &cap); + for (size_t i = 0; i < kvs->num_kvs; i++) { + if (i != 0) append_chr(',', &s, &sz, &cap); + append_esc_str((const uint8_t *)kvs->kvs[i].key, strlen(kvs->kvs[i].key), + &s, &sz, &cap); + gpr_free(kvs->kvs[i].key); + append_chr(':', &s, &sz, &cap); + append_str(kvs->kvs[i].value, &s, &sz, &cap); + gpr_free(kvs->kvs[i].value); + } + append_chr('}', &s, &sz, &cap); + append_chr(0, &s, &sz, &cap); + + gpr_free(kvs->kvs); + return s; +} + +const char *grpc_error_string(grpc_error *err) { + GPR_TIMER_BEGIN("grpc_error_string", 0); + if (err == GRPC_ERROR_NONE) return no_error_string; + if (err == GRPC_ERROR_OOM) return oom_error_string; + if (err == GRPC_ERROR_CANCELLED) return cancelled_error_string; + + void *p = (void *)gpr_atm_acq_load(&err->atomics.error_string); + if (p != NULL) { + GPR_TIMER_END("grpc_error_string", 0); + return (const char *)p; + } + + kv_pairs kvs; + memset(&kvs, 0, sizeof(kvs)); + + collect_ints_kvs(err, &kvs); + collect_strs_kvs(err, &kvs); + collect_times_kvs(err, &kvs); + if (err->first_err != UINT8_MAX) { + append_kv(&kvs, gpr_strdup("referenced_errors"), errs_string(err)); + } + + qsort(kvs.kvs, kvs.num_kvs, sizeof(kv_pair), cmp_kvs); + + char *out = finish_kvs(&kvs); + + if (!gpr_atm_rel_cas(&err->atomics.error_string, 0, (gpr_atm)out)) { + gpr_free(out); + out = (char *)gpr_atm_no_barrier_load(&err->atomics.error_string); + } + + GPR_TIMER_END("grpc_error_string", 0); + return out; +} + +grpc_error *grpc_os_error(const char *file, int line, int err, + const char *call_name) { + return grpc_error_set_str( + grpc_error_set_str( + grpc_error_set_int( + grpc_error_create(file, line, + grpc_slice_from_static_string("OS Error"), NULL, + 0), + GRPC_ERROR_INT_ERRNO, err), + GRPC_ERROR_STR_OS_ERROR, + grpc_slice_from_static_string(strerror(err))), + GRPC_ERROR_STR_SYSCALL, grpc_slice_from_copied_string(call_name)); +} + +#ifdef GPR_WINDOWS +grpc_error *grpc_wsa_error(const char *file, int line, int err, + const char *call_name) { + char *utf8_message = gpr_format_message(err); + grpc_error *error = grpc_error_set_str( + grpc_error_set_str( + grpc_error_set_int( + grpc_error_create(file, line, + grpc_slice_from_static_string("OS Error"), NULL, + 0), + GRPC_ERROR_INT_WSA_ERROR, err), + GRPC_ERROR_STR_OS_ERROR, grpc_slice_from_copied_string(utf8_message)), + GRPC_ERROR_STR_SYSCALL, grpc_slice_from_static_string(call_name)); + gpr_free(utf8_message); + return error; +} +#endif + +bool grpc_log_if_error(const char *what, grpc_error *error, const char *file, + int line) { + if (error == GRPC_ERROR_NONE) return true; + const char *msg = grpc_error_string(error); + gpr_log(file, line, GPR_LOG_SEVERITY_ERROR, "%s: %s", what, msg); + GRPC_ERROR_UNREF(error); + return false; +} diff --git a/src/core/lib/iomgr/ev_epoll1_linux.c b/src/core/lib/iomgr/ev_epoll1_linux.c deleted file mode 100644 index 3ac12ab56f..0000000000 --- a/src/core/lib/iomgr/ev_epoll1_linux.c +++ /dev/null @@ -1,1267 +0,0 @@ -/* - * - * Copyright 2017 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -/* This polling engine is only relevant on linux kernels supporting epoll() */ -#ifdef GRPC_LINUX_EPOLL - -#include "src/core/lib/iomgr/ev_epoll1_linux.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "src/core/lib/debug/stats.h" -#include "src/core/lib/iomgr/ev_posix.h" -#include "src/core/lib/iomgr/iomgr_internal.h" -#include "src/core/lib/iomgr/lockfree_event.h" -#include "src/core/lib/iomgr/wakeup_fd_posix.h" -#include "src/core/lib/profiling/timers.h" -#include "src/core/lib/support/block_annotate.h" -#include "src/core/lib/support/string.h" - -static grpc_wakeup_fd global_wakeup_fd; - -/******************************************************************************* - * Singleton epoll set related fields - */ - -#define MAX_EPOLL_EVENTS 100 -#define MAX_EPOLL_EVENTS_HANDLED_PER_ITERATION 1 - -/* NOTE ON SYNCHRONIZATION: - * - Fields in this struct are only modified by the designated poller. Hence - * there is no need for any locks to protect the struct. - * - num_events and cursor fields have to be of atomic type to provide memory - * visibility guarantees only. i.e In case of multiple pollers, the designated - * polling thread keeps changing; the thread that wrote these values may be - * different from the thread reading the values - */ -typedef struct epoll_set { - int epfd; - - /* The epoll_events after the last call to epoll_wait() */ - struct epoll_event events[MAX_EPOLL_EVENTS]; - - /* The number of epoll_events after the last call to epoll_wait() */ - gpr_atm num_events; - - /* Index of the first event in epoll_events that has to be processed. This - * field is only valid if num_events > 0 */ - gpr_atm cursor; -} epoll_set; - -/* The global singleton epoll set */ -static epoll_set g_epoll_set; - -/* Must be called *only* once */ -static bool epoll_set_init() { - g_epoll_set.epfd = epoll_create1(EPOLL_CLOEXEC); - if (g_epoll_set.epfd < 0) { - gpr_log(GPR_ERROR, "epoll unavailable"); - return false; - } - - gpr_log(GPR_INFO, "grpc epoll fd: %d", g_epoll_set.epfd); - gpr_atm_no_barrier_store(&g_epoll_set.num_events, 0); - gpr_atm_no_barrier_store(&g_epoll_set.cursor, 0); - return true; -} - -/* epoll_set_init() MUST be called before calling this. */ -static void epoll_set_shutdown() { - if (g_epoll_set.epfd >= 0) { - close(g_epoll_set.epfd); - g_epoll_set.epfd = -1; - } -} - -/******************************************************************************* - * Fd Declarations - */ - -struct grpc_fd { - int fd; - - gpr_atm read_closure; - gpr_atm write_closure; - - struct grpc_fd *freelist_next; - - /* The pollset that last noticed that the fd is readable. The actual type - * stored in this is (grpc_pollset *) */ - gpr_atm read_notifier_pollset; - - grpc_iomgr_object iomgr_object; -}; - -static void fd_global_init(void); -static void fd_global_shutdown(void); - -/******************************************************************************* - * Pollset Declarations - */ - -typedef enum { UNKICKED, KICKED, DESIGNATED_POLLER } kick_state; - -static const char *kick_state_string(kick_state st) { - switch (st) { - case UNKICKED: - return "UNKICKED"; - case KICKED: - return "KICKED"; - case DESIGNATED_POLLER: - return "DESIGNATED_POLLER"; - } - GPR_UNREACHABLE_CODE(return "UNKNOWN"); -} - -struct grpc_pollset_worker { - kick_state state; - int kick_state_mutator; // which line of code last changed kick state - bool initialized_cv; - grpc_pollset_worker *next; - grpc_pollset_worker *prev; - gpr_cv cv; - grpc_closure_list schedule_on_end_work; -}; - -#define SET_KICK_STATE(worker, kick_state) \ - do { \ - (worker)->state = (kick_state); \ - (worker)->kick_state_mutator = __LINE__; \ - } while (false) - -#define MAX_NEIGHBORHOODS 1024 - -typedef struct pollset_neighborhood { - gpr_mu mu; - grpc_pollset *active_root; - char pad[GPR_CACHELINE_SIZE]; -} pollset_neighborhood; - -struct grpc_pollset { - gpr_mu mu; - pollset_neighborhood *neighborhood; - bool reassigning_neighborhood; - grpc_pollset_worker *root_worker; - bool kicked_without_poller; - - /* Set to true if the pollset is observed to have no workers available to - poll */ - bool seen_inactive; - bool shutting_down; /* Is the pollset shutting down ? */ - grpc_closure *shutdown_closure; /* Called after after shutdown is complete */ - - /* Number of workers who are *about-to* attach themselves to the pollset - * worker list */ - int begin_refs; - - grpc_pollset *next; - grpc_pollset *prev; -}; - -/******************************************************************************* - * Pollset-set Declarations - */ - -struct grpc_pollset_set { - char unused; -}; - -/******************************************************************************* - * Common helpers - */ - -static bool append_error(grpc_error **composite, grpc_error *error, - const char *desc) { - if (error == GRPC_ERROR_NONE) return true; - if (*composite == GRPC_ERROR_NONE) { - *composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc); - } - *composite = grpc_error_add_child(*composite, error); - return false; -} - -/******************************************************************************* - * Fd Definitions - */ - -/* We need to keep a freelist not because of any concerns of malloc performance - * but instead so that implementations with multiple threads in (for example) - * epoll_wait deal with the race between pollset removal and incoming poll - * notifications. - * - * The problem is that the poller ultimately holds a reference to this - * object, so it is very difficult to know when is safe to free it, at least - * without some expensive synchronization. - * - * If we keep the object freelisted, in the worst case losing this race just - * becomes a spurious read notification on a reused fd. - */ - -/* The alarm system needs to be able to wakeup 'some poller' sometimes - * (specifically when a new alarm needs to be triggered earlier than the next - * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a - * case occurs. */ - -static grpc_fd *fd_freelist = NULL; -static gpr_mu fd_freelist_mu; - -static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); } - -static void fd_global_shutdown(void) { - gpr_mu_lock(&fd_freelist_mu); - gpr_mu_unlock(&fd_freelist_mu); - while (fd_freelist != NULL) { - grpc_fd *fd = fd_freelist; - fd_freelist = fd_freelist->freelist_next; - gpr_free(fd); - } - gpr_mu_destroy(&fd_freelist_mu); -} - -static grpc_fd *fd_create(int fd, const char *name) { - grpc_fd *new_fd = NULL; - - gpr_mu_lock(&fd_freelist_mu); - if (fd_freelist != NULL) { - new_fd = fd_freelist; - fd_freelist = fd_freelist->freelist_next; - } - gpr_mu_unlock(&fd_freelist_mu); - - if (new_fd == NULL) { - new_fd = (grpc_fd *)gpr_malloc(sizeof(grpc_fd)); - } - - new_fd->fd = fd; - grpc_lfev_init(&new_fd->read_closure); - grpc_lfev_init(&new_fd->write_closure); - gpr_atm_no_barrier_store(&new_fd->read_notifier_pollset, (gpr_atm)NULL); - - new_fd->freelist_next = NULL; - - char *fd_name; - gpr_asprintf(&fd_name, "%s fd=%d", name, fd); - grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name); -#ifndef NDEBUG - if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) { - gpr_log(GPR_DEBUG, "FD %d %p create %s", fd, new_fd, fd_name); - } -#endif - gpr_free(fd_name); - - struct epoll_event ev; - ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET); - ev.data.ptr = new_fd; - if (epoll_ctl(g_epoll_set.epfd, EPOLL_CTL_ADD, fd, &ev) != 0) { - gpr_log(GPR_ERROR, "epoll_ctl failed: %s", strerror(errno)); - } - - return new_fd; -} - -static int fd_wrapped_fd(grpc_fd *fd) { return fd->fd; } - -/* if 'releasing_fd' is true, it means that we are going to detach the internal - * fd from grpc_fd structure (i.e which means we should not be calling - * shutdown() syscall on that fd) */ -static void fd_shutdown_internal(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_error *why, bool releasing_fd) { - if (grpc_lfev_set_shutdown(exec_ctx, &fd->read_closure, - GRPC_ERROR_REF(why))) { - if (!releasing_fd) { - shutdown(fd->fd, SHUT_RDWR); - } - grpc_lfev_set_shutdown(exec_ctx, &fd->write_closure, GRPC_ERROR_REF(why)); - } - GRPC_ERROR_UNREF(why); -} - -/* Might be called multiple times */ -static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_error *why) { - fd_shutdown_internal(exec_ctx, fd, why, false); -} - -static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure *on_done, int *release_fd, - bool already_closed, const char *reason) { - grpc_error *error = GRPC_ERROR_NONE; - bool is_release_fd = (release_fd != NULL); - - if (!grpc_lfev_is_shutdown(&fd->read_closure)) { - fd_shutdown_internal(exec_ctx, fd, - GRPC_ERROR_CREATE_FROM_COPIED_STRING(reason), - is_release_fd); - } - - /* If release_fd is not NULL, we should be relinquishing control of the file - descriptor fd->fd (but we still own the grpc_fd structure). */ - if (is_release_fd) { - *release_fd = fd->fd; - } else if (!already_closed) { - close(fd->fd); - } - - GRPC_CLOSURE_SCHED(exec_ctx, on_done, GRPC_ERROR_REF(error)); - - grpc_iomgr_unregister_object(&fd->iomgr_object); - grpc_lfev_destroy(&fd->read_closure); - grpc_lfev_destroy(&fd->write_closure); - - gpr_mu_lock(&fd_freelist_mu); - fd->freelist_next = fd_freelist; - fd_freelist = fd; - gpr_mu_unlock(&fd_freelist_mu); -} - -static grpc_pollset *fd_get_read_notifier_pollset(grpc_exec_ctx *exec_ctx, - grpc_fd *fd) { - gpr_atm notifier = gpr_atm_acq_load(&fd->read_notifier_pollset); - return (grpc_pollset *)notifier; -} - -static bool fd_is_shutdown(grpc_fd *fd) { - return grpc_lfev_is_shutdown(&fd->read_closure); -} - -static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure *closure) { - grpc_lfev_notify_on(exec_ctx, &fd->read_closure, closure, "read"); -} - -static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure *closure) { - grpc_lfev_notify_on(exec_ctx, &fd->write_closure, closure, "write"); -} - -static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_pollset *notifier) { - grpc_lfev_set_ready(exec_ctx, &fd->read_closure, "read"); - /* Use release store to match with acquire load in fd_get_read_notifier */ - gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier); -} - -static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { - grpc_lfev_set_ready(exec_ctx, &fd->write_closure, "write"); -} - -/******************************************************************************* - * Pollset Definitions - */ - -GPR_TLS_DECL(g_current_thread_pollset); -GPR_TLS_DECL(g_current_thread_worker); - -/* The designated poller */ -static gpr_atm g_active_poller; - -static pollset_neighborhood *g_neighborhoods; -static size_t g_num_neighborhoods; - -/* Return true if first in list */ -static bool worker_insert(grpc_pollset *pollset, grpc_pollset_worker *worker) { - if (pollset->root_worker == NULL) { - pollset->root_worker = worker; - worker->next = worker->prev = worker; - return true; - } else { - worker->next = pollset->root_worker; - worker->prev = worker->next->prev; - worker->next->prev = worker; - worker->prev->next = worker; - return false; - } -} - -/* Return true if last in list */ -typedef enum { EMPTIED, NEW_ROOT, REMOVED } worker_remove_result; - -static worker_remove_result worker_remove(grpc_pollset *pollset, - grpc_pollset_worker *worker) { - if (worker == pollset->root_worker) { - if (worker == worker->next) { - pollset->root_worker = NULL; - return EMPTIED; - } else { - pollset->root_worker = worker->next; - worker->prev->next = worker->next; - worker->next->prev = worker->prev; - return NEW_ROOT; - } - } else { - worker->prev->next = worker->next; - worker->next->prev = worker->prev; - return REMOVED; - } -} - -static size_t choose_neighborhood(void) { - return (size_t)gpr_cpu_current_cpu() % g_num_neighborhoods; -} - -static grpc_error *pollset_global_init(void) { - gpr_tls_init(&g_current_thread_pollset); - gpr_tls_init(&g_current_thread_worker); - gpr_atm_no_barrier_store(&g_active_poller, 0); - global_wakeup_fd.read_fd = -1; - grpc_error *err = grpc_wakeup_fd_init(&global_wakeup_fd); - if (err != GRPC_ERROR_NONE) return err; - struct epoll_event ev; - ev.events = (uint32_t)(EPOLLIN | EPOLLET); - ev.data.ptr = &global_wakeup_fd; - if (epoll_ctl(g_epoll_set.epfd, EPOLL_CTL_ADD, global_wakeup_fd.read_fd, - &ev) != 0) { - return GRPC_OS_ERROR(errno, "epoll_ctl"); - } - g_num_neighborhoods = GPR_CLAMP(gpr_cpu_num_cores(), 1, MAX_NEIGHBORHOODS); - g_neighborhoods = (pollset_neighborhood *)gpr_zalloc( - sizeof(*g_neighborhoods) * g_num_neighborhoods); - for (size_t i = 0; i < g_num_neighborhoods; i++) { - gpr_mu_init(&g_neighborhoods[i].mu); - } - return GRPC_ERROR_NONE; -} - -static void pollset_global_shutdown(void) { - gpr_tls_destroy(&g_current_thread_pollset); - gpr_tls_destroy(&g_current_thread_worker); - if (global_wakeup_fd.read_fd != -1) grpc_wakeup_fd_destroy(&global_wakeup_fd); - for (size_t i = 0; i < g_num_neighborhoods; i++) { - gpr_mu_destroy(&g_neighborhoods[i].mu); - } - gpr_free(g_neighborhoods); -} - -static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { - gpr_mu_init(&pollset->mu); - *mu = &pollset->mu; - pollset->neighborhood = &g_neighborhoods[choose_neighborhood()]; - pollset->reassigning_neighborhood = false; - pollset->root_worker = NULL; - pollset->kicked_without_poller = false; - pollset->seen_inactive = true; - pollset->shutting_down = false; - pollset->shutdown_closure = NULL; - pollset->begin_refs = 0; - pollset->next = pollset->prev = NULL; -} - -static void pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) { - gpr_mu_lock(&pollset->mu); - if (!pollset->seen_inactive) { - pollset_neighborhood *neighborhood = pollset->neighborhood; - gpr_mu_unlock(&pollset->mu); - retry_lock_neighborhood: - gpr_mu_lock(&neighborhood->mu); - gpr_mu_lock(&pollset->mu); - if (!pollset->seen_inactive) { - if (pollset->neighborhood != neighborhood) { - gpr_mu_unlock(&neighborhood->mu); - neighborhood = pollset->neighborhood; - gpr_mu_unlock(&pollset->mu); - goto retry_lock_neighborhood; - } - pollset->prev->next = pollset->next; - pollset->next->prev = pollset->prev; - if (pollset == pollset->neighborhood->active_root) { - pollset->neighborhood->active_root = - pollset->next == pollset ? NULL : pollset->next; - } - } - gpr_mu_unlock(&pollset->neighborhood->mu); - } - gpr_mu_unlock(&pollset->mu); - gpr_mu_destroy(&pollset->mu); -} - -static grpc_error *pollset_kick_all(grpc_exec_ctx *exec_ctx, - grpc_pollset *pollset) { - GPR_TIMER_BEGIN("pollset_kick_all", 0); - grpc_error *error = GRPC_ERROR_NONE; - if (pollset->root_worker != NULL) { - grpc_pollset_worker *worker = pollset->root_worker; - do { - GRPC_STATS_INC_POLLSET_KICK(exec_ctx); - switch (worker->state) { - case KICKED: - GRPC_STATS_INC_POLLSET_KICKED_AGAIN(exec_ctx); - break; - case UNKICKED: - SET_KICK_STATE(worker, KICKED); - if (worker->initialized_cv) { - GRPC_STATS_INC_POLLSET_KICK_WAKEUP_CV(exec_ctx); - gpr_cv_signal(&worker->cv); - } - break; - case DESIGNATED_POLLER: - GRPC_STATS_INC_POLLSET_KICK_WAKEUP_FD(exec_ctx); - SET_KICK_STATE(worker, KICKED); - append_error(&error, grpc_wakeup_fd_wakeup(&global_wakeup_fd), - "pollset_kick_all"); - break; - } - - worker = worker->next; - } while (worker != pollset->root_worker); - } - // TODO: sreek. Check if we need to set 'kicked_without_poller' to true here - // in the else case - GPR_TIMER_END("pollset_kick_all", 0); - return error; -} - -static void pollset_maybe_finish_shutdown(grpc_exec_ctx *exec_ctx, - grpc_pollset *pollset) { - if (pollset->shutdown_closure != NULL && pollset->root_worker == NULL && - pollset->begin_refs == 0) { - GPR_TIMER_MARK("pollset_finish_shutdown", 0); - GRPC_CLOSURE_SCHED(exec_ctx, pollset->shutdown_closure, GRPC_ERROR_NONE); - pollset->shutdown_closure = NULL; - } -} - -static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_closure *closure) { - GPR_TIMER_BEGIN("pollset_shutdown", 0); - GPR_ASSERT(pollset->shutdown_closure == NULL); - GPR_ASSERT(!pollset->shutting_down); - pollset->shutdown_closure = closure; - pollset->shutting_down = true; - GRPC_LOG_IF_ERROR("pollset_shutdown", pollset_kick_all(exec_ctx, pollset)); - pollset_maybe_finish_shutdown(exec_ctx, pollset); - GPR_TIMER_END("pollset_shutdown", 0); -} - -static int poll_deadline_to_millis_timeout(gpr_timespec deadline, - gpr_timespec now) { - gpr_timespec timeout; - if (gpr_time_cmp(deadline, gpr_inf_future(deadline.clock_type)) == 0) { - return -1; - } - - if (gpr_time_cmp(deadline, now) <= 0) { - return 0; - } - - static const gpr_timespec round_up = { - 0, /* tv_sec */ - GPR_NS_PER_MS - 1, /* tv_nsec */ - GPR_TIMESPAN /* clock_type */ - }; - timeout = gpr_time_sub(deadline, now); - int millis = gpr_time_to_millis(gpr_time_add(timeout, round_up)); - return millis >= 1 ? millis : 1; -} - -/* Process the epoll events found by do_epoll_wait() function. - - g_epoll_set.cursor points to the index of the first event to be processed - - This function then processes up-to MAX_EPOLL_EVENTS_PER_ITERATION and - updates the g_epoll_set.cursor - - NOTE ON SYNCRHONIZATION: Similar to do_epoll_wait(), this function is only - called by g_active_poller thread. So there is no need for synchronization - when accessing fields in g_epoll_set */ -static grpc_error *process_epoll_events(grpc_exec_ctx *exec_ctx, - grpc_pollset *pollset) { - static const char *err_desc = "process_events"; - grpc_error *error = GRPC_ERROR_NONE; - - GPR_TIMER_BEGIN("process_epoll_events", 0); - long num_events = gpr_atm_acq_load(&g_epoll_set.num_events); - long cursor = gpr_atm_acq_load(&g_epoll_set.cursor); - for (int idx = 0; - (idx < MAX_EPOLL_EVENTS_HANDLED_PER_ITERATION) && cursor != num_events; - idx++) { - long c = cursor++; - struct epoll_event *ev = &g_epoll_set.events[c]; - void *data_ptr = ev->data.ptr; - - if (data_ptr == &global_wakeup_fd) { - append_error(&error, grpc_wakeup_fd_consume_wakeup(&global_wakeup_fd), - err_desc); - } else { - grpc_fd *fd = (grpc_fd *)(data_ptr); - bool cancel = (ev->events & (EPOLLERR | EPOLLHUP)) != 0; - bool read_ev = (ev->events & (EPOLLIN | EPOLLPRI)) != 0; - bool write_ev = (ev->events & EPOLLOUT) != 0; - - if (read_ev || cancel) { - fd_become_readable(exec_ctx, fd, pollset); - } - - if (write_ev || cancel) { - fd_become_writable(exec_ctx, fd); - } - } - } - gpr_atm_rel_store(&g_epoll_set.cursor, cursor); - GPR_TIMER_END("process_epoll_events", 0); - return error; -} - -/* Do epoll_wait and store the events in g_epoll_set.events field. This does not - "process" any of the events yet; that is done in process_epoll_events(). - *See process_epoll_events() function for more details. - - NOTE ON SYNCHRONIZATION: At any point of time, only the g_active_poller - (i.e the designated poller thread) will be calling this function. So there is - no need for any synchronization when accesing fields in g_epoll_set */ -static grpc_error *do_epoll_wait(grpc_exec_ctx *exec_ctx, grpc_pollset *ps, - gpr_timespec now, gpr_timespec deadline) { - GPR_TIMER_BEGIN("do_epoll_wait", 0); - - int r; - int timeout = poll_deadline_to_millis_timeout(deadline, now); - if (timeout != 0) { - GRPC_SCHEDULING_START_BLOCKING_REGION; - } - do { - GRPC_STATS_INC_SYSCALL_POLL(exec_ctx); - r = epoll_wait(g_epoll_set.epfd, g_epoll_set.events, MAX_EPOLL_EVENTS, - timeout); - } while (r < 0 && errno == EINTR); - if (timeout != 0) { - GRPC_SCHEDULING_END_BLOCKING_REGION; - } - - if (r < 0) return GRPC_OS_ERROR(errno, "epoll_wait"); - - GRPC_STATS_INC_POLL_EVENTS_RETURNED(exec_ctx, r); - - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, "ps: %p poll got %d events", ps, r); - } - - gpr_atm_rel_store(&g_epoll_set.num_events, r); - gpr_atm_rel_store(&g_epoll_set.cursor, 0); - - GPR_TIMER_END("do_epoll_wait", 0); - return GRPC_ERROR_NONE; -} - -static bool begin_worker(grpc_pollset *pollset, grpc_pollset_worker *worker, - grpc_pollset_worker **worker_hdl, gpr_timespec *now, - gpr_timespec deadline) { - GPR_TIMER_BEGIN("begin_worker", 0); - if (worker_hdl != NULL) *worker_hdl = worker; - worker->initialized_cv = false; - SET_KICK_STATE(worker, UNKICKED); - worker->schedule_on_end_work = (grpc_closure_list)GRPC_CLOSURE_LIST_INIT; - pollset->begin_refs++; - - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_ERROR, "PS:%p BEGIN_STARTS:%p", pollset, worker); - } - - if (pollset->seen_inactive) { - // pollset has been observed to be inactive, we need to move back to the - // active list - bool is_reassigning = false; - if (!pollset->reassigning_neighborhood) { - is_reassigning = true; - pollset->reassigning_neighborhood = true; - pollset->neighborhood = &g_neighborhoods[choose_neighborhood()]; - } - pollset_neighborhood *neighborhood = pollset->neighborhood; - gpr_mu_unlock(&pollset->mu); - // pollset unlocked: state may change (even worker->kick_state) - retry_lock_neighborhood: - gpr_mu_lock(&neighborhood->mu); - gpr_mu_lock(&pollset->mu); - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_ERROR, "PS:%p BEGIN_REORG:%p kick_state=%s is_reassigning=%d", - pollset, worker, kick_state_string(worker->state), - is_reassigning); - } - if (pollset->seen_inactive) { - if (neighborhood != pollset->neighborhood) { - gpr_mu_unlock(&neighborhood->mu); - neighborhood = pollset->neighborhood; - gpr_mu_unlock(&pollset->mu); - goto retry_lock_neighborhood; - } - - /* In the brief time we released the pollset locks above, the worker MAY - have been kicked. In this case, the worker should get out of this - pollset ASAP and hence this should neither add the pollset to - neighborhood nor mark the pollset as active. - - On a side note, the only way a worker's kick state could have changed - at this point is if it were "kicked specifically". Since the worker has - not added itself to the pollset yet (by calling worker_insert()), it is - not visible in the "kick any" path yet */ - if (worker->state == UNKICKED) { - pollset->seen_inactive = false; - if (neighborhood->active_root == NULL) { - neighborhood->active_root = pollset->next = pollset->prev = pollset; - /* Make this the designated poller if there isn't one already */ - if (worker->state == UNKICKED && - gpr_atm_no_barrier_cas(&g_active_poller, 0, (gpr_atm)worker)) { - SET_KICK_STATE(worker, DESIGNATED_POLLER); - } - } else { - pollset->next = neighborhood->active_root; - pollset->prev = pollset->next->prev; - pollset->next->prev = pollset->prev->next = pollset; - } - } - } - if (is_reassigning) { - GPR_ASSERT(pollset->reassigning_neighborhood); - pollset->reassigning_neighborhood = false; - } - gpr_mu_unlock(&neighborhood->mu); - } - - worker_insert(pollset, worker); - pollset->begin_refs--; - if (worker->state == UNKICKED && !pollset->kicked_without_poller) { - GPR_ASSERT(gpr_atm_no_barrier_load(&g_active_poller) != (gpr_atm)worker); - worker->initialized_cv = true; - gpr_cv_init(&worker->cv); - while (worker->state == UNKICKED && !pollset->shutting_down) { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_ERROR, "PS:%p BEGIN_WAIT:%p kick_state=%s shutdown=%d", - pollset, worker, kick_state_string(worker->state), - pollset->shutting_down); - } - - if (gpr_cv_wait(&worker->cv, &pollset->mu, deadline) && - worker->state == UNKICKED) { - /* If gpr_cv_wait returns true (i.e a timeout), pretend that the worker - received a kick */ - SET_KICK_STATE(worker, KICKED); - } - } - *now = gpr_now(now->clock_type); - } - - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_ERROR, - "PS:%p BEGIN_DONE:%p kick_state=%s shutdown=%d " - "kicked_without_poller: %d", - pollset, worker, kick_state_string(worker->state), - pollset->shutting_down, pollset->kicked_without_poller); - } - - /* We release pollset lock in this function at a couple of places: - * 1. Briefly when assigning pollset to a neighborhood - * 2. When doing gpr_cv_wait() - * It is possible that 'kicked_without_poller' was set to true during (1) and - * 'shutting_down' is set to true during (1) or (2). If either of them is - * true, this worker cannot do polling */ - /* TODO(sreek): Perhaps there is a better way to handle kicked_without_poller - * case; especially when the worker is the DESIGNATED_POLLER */ - - if (pollset->kicked_without_poller) { - pollset->kicked_without_poller = false; - GPR_TIMER_END("begin_worker", 0); - return false; - } - - GPR_TIMER_END("begin_worker", 0); - return worker->state == DESIGNATED_POLLER && !pollset->shutting_down; -} - -static bool check_neighborhood_for_available_poller( - grpc_exec_ctx *exec_ctx, pollset_neighborhood *neighborhood) { - GPR_TIMER_BEGIN("check_neighborhood_for_available_poller", 0); - bool found_worker = false; - do { - grpc_pollset *inspect = neighborhood->active_root; - if (inspect == NULL) { - break; - } - gpr_mu_lock(&inspect->mu); - GPR_ASSERT(!inspect->seen_inactive); - grpc_pollset_worker *inspect_worker = inspect->root_worker; - if (inspect_worker != NULL) { - do { - switch (inspect_worker->state) { - case UNKICKED: - if (gpr_atm_no_barrier_cas(&g_active_poller, 0, - (gpr_atm)inspect_worker)) { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, " .. choose next poller to be %p", - inspect_worker); - } - SET_KICK_STATE(inspect_worker, DESIGNATED_POLLER); - if (inspect_worker->initialized_cv) { - GPR_TIMER_MARK("signal worker", 0); - GRPC_STATS_INC_POLLSET_KICK_WAKEUP_CV(exec_ctx); - gpr_cv_signal(&inspect_worker->cv); - } - } else { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, " .. beaten to choose next poller"); - } - } - // even if we didn't win the cas, there's a worker, we can stop - found_worker = true; - break; - case KICKED: - break; - case DESIGNATED_POLLER: - found_worker = true; // ok, so someone else found the worker, but - // we'll accept that - break; - } - inspect_worker = inspect_worker->next; - } while (!found_worker && inspect_worker != inspect->root_worker); - } - if (!found_worker) { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, " .. mark pollset %p inactive", inspect); - } - inspect->seen_inactive = true; - if (inspect == neighborhood->active_root) { - neighborhood->active_root = - inspect->next == inspect ? NULL : inspect->next; - } - inspect->next->prev = inspect->prev; - inspect->prev->next = inspect->next; - inspect->next = inspect->prev = NULL; - } - gpr_mu_unlock(&inspect->mu); - } while (!found_worker); - GPR_TIMER_END("check_neighborhood_for_available_poller", 0); - return found_worker; -} - -static void end_worker(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_pollset_worker *worker, - grpc_pollset_worker **worker_hdl) { - GPR_TIMER_BEGIN("end_worker", 0); - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, "PS:%p END_WORKER:%p", pollset, worker); - } - if (worker_hdl != NULL) *worker_hdl = NULL; - /* Make sure we appear kicked */ - SET_KICK_STATE(worker, KICKED); - grpc_closure_list_move(&worker->schedule_on_end_work, - &exec_ctx->closure_list); - if (gpr_atm_no_barrier_load(&g_active_poller) == (gpr_atm)worker) { - if (worker->next != worker && worker->next->state == UNKICKED) { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, " .. choose next poller to be peer %p", worker); - } - GPR_ASSERT(worker->next->initialized_cv); - gpr_atm_no_barrier_store(&g_active_poller, (gpr_atm)worker->next); - SET_KICK_STATE(worker->next, DESIGNATED_POLLER); - GRPC_STATS_INC_POLLSET_KICK_WAKEUP_CV(exec_ctx); - gpr_cv_signal(&worker->next->cv); - if (grpc_exec_ctx_has_work(exec_ctx)) { - gpr_mu_unlock(&pollset->mu); - grpc_exec_ctx_flush(exec_ctx); - gpr_mu_lock(&pollset->mu); - } - } else { - gpr_atm_no_barrier_store(&g_active_poller, 0); - size_t poller_neighborhood_idx = - (size_t)(pollset->neighborhood - g_neighborhoods); - gpr_mu_unlock(&pollset->mu); - bool found_worker = false; - bool scan_state[MAX_NEIGHBORHOODS]; - for (size_t i = 0; !found_worker && i < g_num_neighborhoods; i++) { - pollset_neighborhood *neighborhood = - &g_neighborhoods[(poller_neighborhood_idx + i) % - g_num_neighborhoods]; - if (gpr_mu_trylock(&neighborhood->mu)) { - found_worker = - check_neighborhood_for_available_poller(exec_ctx, neighborhood); - gpr_mu_unlock(&neighborhood->mu); - scan_state[i] = true; - } else { - scan_state[i] = false; - } - } - for (size_t i = 0; !found_worker && i < g_num_neighborhoods; i++) { - if (scan_state[i]) continue; - pollset_neighborhood *neighborhood = - &g_neighborhoods[(poller_neighborhood_idx + i) % - g_num_neighborhoods]; - gpr_mu_lock(&neighborhood->mu); - found_worker = - check_neighborhood_for_available_poller(exec_ctx, neighborhood); - gpr_mu_unlock(&neighborhood->mu); - } - grpc_exec_ctx_flush(exec_ctx); - gpr_mu_lock(&pollset->mu); - } - } else if (grpc_exec_ctx_has_work(exec_ctx)) { - gpr_mu_unlock(&pollset->mu); - grpc_exec_ctx_flush(exec_ctx); - gpr_mu_lock(&pollset->mu); - } - if (worker->initialized_cv) { - gpr_cv_destroy(&worker->cv); - } - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, " .. remove worker"); - } - if (EMPTIED == worker_remove(pollset, worker)) { - pollset_maybe_finish_shutdown(exec_ctx, pollset); - } - GPR_ASSERT(gpr_atm_no_barrier_load(&g_active_poller) != (gpr_atm)worker); - GPR_TIMER_END("end_worker", 0); -} - -/* pollset->po.mu lock must be held by the caller before calling this. - The function pollset_work() may temporarily release the lock (pollset->po.mu) - during the course of its execution but it will always re-acquire the lock and - ensure that it is held by the time the function returns */ -static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *ps, - grpc_pollset_worker **worker_hdl, - gpr_timespec now, gpr_timespec deadline) { - grpc_pollset_worker worker; - grpc_error *error = GRPC_ERROR_NONE; - static const char *err_desc = "pollset_work"; - GPR_TIMER_BEGIN("pollset_work", 0); - if (ps->kicked_without_poller) { - ps->kicked_without_poller = false; - GPR_TIMER_END("pollset_work", 0); - return GRPC_ERROR_NONE; - } - - if (begin_worker(ps, &worker, worker_hdl, &now, deadline)) { - gpr_tls_set(&g_current_thread_pollset, (intptr_t)ps); - gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker); - GPR_ASSERT(!ps->shutting_down); - GPR_ASSERT(!ps->seen_inactive); - - gpr_mu_unlock(&ps->mu); /* unlock */ - /* This is the designated polling thread at this point and should ideally do - polling. However, if there are unprocessed events left from a previous - call to do_epoll_wait(), skip calling epoll_wait() in this iteration and - process the pending epoll events. - - The reason for decoupling do_epoll_wait and process_epoll_events is to - better distrubute the work (i.e handling epoll events) across multiple - threads - - process_epoll_events() returns very quickly: It just queues the work on - exec_ctx but does not execute it (the actual exectution or more - accurately grpc_exec_ctx_flush() happens in end_worker() AFTER selecting - a designated poller). So we are not waiting long periods without a - designated poller */ - if (gpr_atm_acq_load(&g_epoll_set.cursor) == - gpr_atm_acq_load(&g_epoll_set.num_events)) { - append_error(&error, do_epoll_wait(exec_ctx, ps, now, deadline), - err_desc); - } - append_error(&error, process_epoll_events(exec_ctx, ps), err_desc); - - gpr_mu_lock(&ps->mu); /* lock */ - - gpr_tls_set(&g_current_thread_worker, 0); - } else { - gpr_tls_set(&g_current_thread_pollset, (intptr_t)ps); - } - end_worker(exec_ctx, ps, &worker, worker_hdl); - - gpr_tls_set(&g_current_thread_pollset, 0); - GPR_TIMER_END("pollset_work", 0); - return error; -} - -static grpc_error *pollset_kick(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_pollset_worker *specific_worker) { - GPR_TIMER_BEGIN("pollset_kick", 0); - GRPC_STATS_INC_POLLSET_KICK(exec_ctx); - grpc_error *ret_err = GRPC_ERROR_NONE; - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_strvec log; - gpr_strvec_init(&log); - char *tmp; - gpr_asprintf( - &tmp, "PS:%p KICK:%p curps=%p curworker=%p root=%p", pollset, - specific_worker, (void *)gpr_tls_get(&g_current_thread_pollset), - (void *)gpr_tls_get(&g_current_thread_worker), pollset->root_worker); - gpr_strvec_add(&log, tmp); - if (pollset->root_worker != NULL) { - gpr_asprintf(&tmp, " {kick_state=%s next=%p {kick_state=%s}}", - kick_state_string(pollset->root_worker->state), - pollset->root_worker->next, - kick_state_string(pollset->root_worker->next->state)); - gpr_strvec_add(&log, tmp); - } - if (specific_worker != NULL) { - gpr_asprintf(&tmp, " worker_kick_state=%s", - kick_state_string(specific_worker->state)); - gpr_strvec_add(&log, tmp); - } - tmp = gpr_strvec_flatten(&log, NULL); - gpr_strvec_destroy(&log); - gpr_log(GPR_ERROR, "%s", tmp); - gpr_free(tmp); - } - - if (specific_worker == NULL) { - if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)pollset) { - grpc_pollset_worker *root_worker = pollset->root_worker; - if (root_worker == NULL) { - GRPC_STATS_INC_POLLSET_KICKED_WITHOUT_POLLER(exec_ctx); - pollset->kicked_without_poller = true; - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_ERROR, " .. kicked_without_poller"); - } - goto done; - } - grpc_pollset_worker *next_worker = root_worker->next; - if (root_worker->state == KICKED) { - GRPC_STATS_INC_POLLSET_KICKED_AGAIN(exec_ctx); - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_ERROR, " .. already kicked %p", root_worker); - } - SET_KICK_STATE(root_worker, KICKED); - goto done; - } else if (next_worker->state == KICKED) { - GRPC_STATS_INC_POLLSET_KICKED_AGAIN(exec_ctx); - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_ERROR, " .. already kicked %p", next_worker); - } - SET_KICK_STATE(next_worker, KICKED); - goto done; - } else if (root_worker == - next_worker && // only try and wake up a poller if - // there is no next worker - root_worker == (grpc_pollset_worker *)gpr_atm_no_barrier_load( - &g_active_poller)) { - GRPC_STATS_INC_POLLSET_KICK_WAKEUP_FD(exec_ctx); - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_ERROR, " .. kicked %p", root_worker); - } - SET_KICK_STATE(root_worker, KICKED); - ret_err = grpc_wakeup_fd_wakeup(&global_wakeup_fd); - goto done; - } else if (next_worker->state == UNKICKED) { - GRPC_STATS_INC_POLLSET_KICK_WAKEUP_CV(exec_ctx); - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_ERROR, " .. kicked %p", next_worker); - } - GPR_ASSERT(next_worker->initialized_cv); - SET_KICK_STATE(next_worker, KICKED); - gpr_cv_signal(&next_worker->cv); - goto done; - } else if (next_worker->state == DESIGNATED_POLLER) { - if (root_worker->state != DESIGNATED_POLLER) { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log( - GPR_ERROR, - " .. kicked root non-poller %p (initialized_cv=%d) (poller=%p)", - root_worker, root_worker->initialized_cv, next_worker); - } - SET_KICK_STATE(root_worker, KICKED); - if (root_worker->initialized_cv) { - GRPC_STATS_INC_POLLSET_KICK_WAKEUP_CV(exec_ctx); - gpr_cv_signal(&root_worker->cv); - } - goto done; - } else { - GRPC_STATS_INC_POLLSET_KICK_WAKEUP_FD(exec_ctx); - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_ERROR, " .. non-root poller %p (root=%p)", next_worker, - root_worker); - } - SET_KICK_STATE(next_worker, KICKED); - ret_err = grpc_wakeup_fd_wakeup(&global_wakeup_fd); - goto done; - } - } else { - GRPC_STATS_INC_POLLSET_KICKED_AGAIN(exec_ctx); - GPR_ASSERT(next_worker->state == KICKED); - SET_KICK_STATE(next_worker, KICKED); - goto done; - } - } else { - GRPC_STATS_INC_POLLSET_KICK_OWN_THREAD(exec_ctx); - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_ERROR, " .. kicked while waking up"); - } - goto done; - } - - GPR_UNREACHABLE_CODE(goto done); - } - - if (specific_worker->state == KICKED) { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_ERROR, " .. specific worker already kicked"); - } - goto done; - } else if (gpr_tls_get(&g_current_thread_worker) == - (intptr_t)specific_worker) { - GRPC_STATS_INC_POLLSET_KICK_OWN_THREAD(exec_ctx); - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_ERROR, " .. mark %p kicked", specific_worker); - } - SET_KICK_STATE(specific_worker, KICKED); - goto done; - } else if (specific_worker == - (grpc_pollset_worker *)gpr_atm_no_barrier_load(&g_active_poller)) { - GRPC_STATS_INC_POLLSET_KICK_WAKEUP_FD(exec_ctx); - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_ERROR, " .. kick active poller"); - } - SET_KICK_STATE(specific_worker, KICKED); - ret_err = grpc_wakeup_fd_wakeup(&global_wakeup_fd); - goto done; - } else if (specific_worker->initialized_cv) { - GRPC_STATS_INC_POLLSET_KICK_WAKEUP_CV(exec_ctx); - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_ERROR, " .. kick waiting worker"); - } - SET_KICK_STATE(specific_worker, KICKED); - gpr_cv_signal(&specific_worker->cv); - goto done; - } else { - GRPC_STATS_INC_POLLSET_KICKED_AGAIN(exec_ctx); - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_ERROR, " .. kick non-waiting worker"); - } - SET_KICK_STATE(specific_worker, KICKED); - goto done; - } -done: - GPR_TIMER_END("pollset_kick", 0); - return ret_err; -} - -static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_fd *fd) {} - -/******************************************************************************* - * Pollset-set Definitions - */ - -static grpc_pollset_set *pollset_set_create(void) { - return (grpc_pollset_set *)((intptr_t)0xdeafbeef); -} - -static void pollset_set_destroy(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pss) {} - -static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss, - grpc_fd *fd) {} - -static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss, - grpc_fd *fd) {} - -static void pollset_set_add_pollset(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pss, grpc_pollset *ps) {} - -static void pollset_set_del_pollset(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pss, grpc_pollset *ps) {} - -static void pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *bag, - grpc_pollset_set *item) {} - -static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *bag, - grpc_pollset_set *item) {} - -/******************************************************************************* - * Event engine binding - */ - -static void shutdown_engine(void) { - fd_global_shutdown(); - pollset_global_shutdown(); - epoll_set_shutdown(); -} - -static const grpc_event_engine_vtable vtable = { - sizeof(grpc_pollset), - - fd_create, - fd_wrapped_fd, - fd_orphan, - fd_shutdown, - fd_notify_on_read, - fd_notify_on_write, - fd_is_shutdown, - fd_get_read_notifier_pollset, - - pollset_init, - pollset_shutdown, - pollset_destroy, - pollset_work, - pollset_kick, - pollset_add_fd, - - pollset_set_create, - pollset_set_destroy, - pollset_set_add_pollset, - pollset_set_del_pollset, - pollset_set_add_pollset_set, - pollset_set_del_pollset_set, - pollset_set_add_fd, - pollset_set_del_fd, - - shutdown_engine, -}; - -/* It is possible that GLIBC has epoll but the underlying kernel doesn't. - * Create epoll_fd (epoll_set_init() takes care of that) to make sure epoll - * support is available */ -const grpc_event_engine_vtable *grpc_init_epoll1_linux(bool explicit_request) { - if (!grpc_has_wakeup_fd()) { - return NULL; - } - - if (!epoll_set_init()) { - return NULL; - } - - fd_global_init(); - - if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) { - fd_global_shutdown(); - epoll_set_shutdown(); - return NULL; - } - - return &vtable; -} - -#else /* defined(GRPC_LINUX_EPOLL) */ -#if defined(GRPC_POSIX_SOCKET) -#include "src/core/lib/iomgr/ev_posix.h" -/* If GRPC_LINUX_EPOLL is not defined, it means epoll is not available. Return - * NULL */ -const grpc_event_engine_vtable *grpc_init_epoll1_linux(bool explicit_request) { - return NULL; -} -#endif /* defined(GRPC_POSIX_SOCKET) */ -#endif /* !defined(GRPC_LINUX_EPOLL) */ diff --git a/src/core/lib/iomgr/ev_epoll1_linux.cc b/src/core/lib/iomgr/ev_epoll1_linux.cc new file mode 100644 index 0000000000..3ac12ab56f --- /dev/null +++ b/src/core/lib/iomgr/ev_epoll1_linux.cc @@ -0,0 +1,1267 @@ +/* + * + * Copyright 2017 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +/* This polling engine is only relevant on linux kernels supporting epoll() */ +#ifdef GRPC_LINUX_EPOLL + +#include "src/core/lib/iomgr/ev_epoll1_linux.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "src/core/lib/debug/stats.h" +#include "src/core/lib/iomgr/ev_posix.h" +#include "src/core/lib/iomgr/iomgr_internal.h" +#include "src/core/lib/iomgr/lockfree_event.h" +#include "src/core/lib/iomgr/wakeup_fd_posix.h" +#include "src/core/lib/profiling/timers.h" +#include "src/core/lib/support/block_annotate.h" +#include "src/core/lib/support/string.h" + +static grpc_wakeup_fd global_wakeup_fd; + +/******************************************************************************* + * Singleton epoll set related fields + */ + +#define MAX_EPOLL_EVENTS 100 +#define MAX_EPOLL_EVENTS_HANDLED_PER_ITERATION 1 + +/* NOTE ON SYNCHRONIZATION: + * - Fields in this struct are only modified by the designated poller. Hence + * there is no need for any locks to protect the struct. + * - num_events and cursor fields have to be of atomic type to provide memory + * visibility guarantees only. i.e In case of multiple pollers, the designated + * polling thread keeps changing; the thread that wrote these values may be + * different from the thread reading the values + */ +typedef struct epoll_set { + int epfd; + + /* The epoll_events after the last call to epoll_wait() */ + struct epoll_event events[MAX_EPOLL_EVENTS]; + + /* The number of epoll_events after the last call to epoll_wait() */ + gpr_atm num_events; + + /* Index of the first event in epoll_events that has to be processed. This + * field is only valid if num_events > 0 */ + gpr_atm cursor; +} epoll_set; + +/* The global singleton epoll set */ +static epoll_set g_epoll_set; + +/* Must be called *only* once */ +static bool epoll_set_init() { + g_epoll_set.epfd = epoll_create1(EPOLL_CLOEXEC); + if (g_epoll_set.epfd < 0) { + gpr_log(GPR_ERROR, "epoll unavailable"); + return false; + } + + gpr_log(GPR_INFO, "grpc epoll fd: %d", g_epoll_set.epfd); + gpr_atm_no_barrier_store(&g_epoll_set.num_events, 0); + gpr_atm_no_barrier_store(&g_epoll_set.cursor, 0); + return true; +} + +/* epoll_set_init() MUST be called before calling this. */ +static void epoll_set_shutdown() { + if (g_epoll_set.epfd >= 0) { + close(g_epoll_set.epfd); + g_epoll_set.epfd = -1; + } +} + +/******************************************************************************* + * Fd Declarations + */ + +struct grpc_fd { + int fd; + + gpr_atm read_closure; + gpr_atm write_closure; + + struct grpc_fd *freelist_next; + + /* The pollset that last noticed that the fd is readable. The actual type + * stored in this is (grpc_pollset *) */ + gpr_atm read_notifier_pollset; + + grpc_iomgr_object iomgr_object; +}; + +static void fd_global_init(void); +static void fd_global_shutdown(void); + +/******************************************************************************* + * Pollset Declarations + */ + +typedef enum { UNKICKED, KICKED, DESIGNATED_POLLER } kick_state; + +static const char *kick_state_string(kick_state st) { + switch (st) { + case UNKICKED: + return "UNKICKED"; + case KICKED: + return "KICKED"; + case DESIGNATED_POLLER: + return "DESIGNATED_POLLER"; + } + GPR_UNREACHABLE_CODE(return "UNKNOWN"); +} + +struct grpc_pollset_worker { + kick_state state; + int kick_state_mutator; // which line of code last changed kick state + bool initialized_cv; + grpc_pollset_worker *next; + grpc_pollset_worker *prev; + gpr_cv cv; + grpc_closure_list schedule_on_end_work; +}; + +#define SET_KICK_STATE(worker, kick_state) \ + do { \ + (worker)->state = (kick_state); \ + (worker)->kick_state_mutator = __LINE__; \ + } while (false) + +#define MAX_NEIGHBORHOODS 1024 + +typedef struct pollset_neighborhood { + gpr_mu mu; + grpc_pollset *active_root; + char pad[GPR_CACHELINE_SIZE]; +} pollset_neighborhood; + +struct grpc_pollset { + gpr_mu mu; + pollset_neighborhood *neighborhood; + bool reassigning_neighborhood; + grpc_pollset_worker *root_worker; + bool kicked_without_poller; + + /* Set to true if the pollset is observed to have no workers available to + poll */ + bool seen_inactive; + bool shutting_down; /* Is the pollset shutting down ? */ + grpc_closure *shutdown_closure; /* Called after after shutdown is complete */ + + /* Number of workers who are *about-to* attach themselves to the pollset + * worker list */ + int begin_refs; + + grpc_pollset *next; + grpc_pollset *prev; +}; + +/******************************************************************************* + * Pollset-set Declarations + */ + +struct grpc_pollset_set { + char unused; +}; + +/******************************************************************************* + * Common helpers + */ + +static bool append_error(grpc_error **composite, grpc_error *error, + const char *desc) { + if (error == GRPC_ERROR_NONE) return true; + if (*composite == GRPC_ERROR_NONE) { + *composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc); + } + *composite = grpc_error_add_child(*composite, error); + return false; +} + +/******************************************************************************* + * Fd Definitions + */ + +/* We need to keep a freelist not because of any concerns of malloc performance + * but instead so that implementations with multiple threads in (for example) + * epoll_wait deal with the race between pollset removal and incoming poll + * notifications. + * + * The problem is that the poller ultimately holds a reference to this + * object, so it is very difficult to know when is safe to free it, at least + * without some expensive synchronization. + * + * If we keep the object freelisted, in the worst case losing this race just + * becomes a spurious read notification on a reused fd. + */ + +/* The alarm system needs to be able to wakeup 'some poller' sometimes + * (specifically when a new alarm needs to be triggered earlier than the next + * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a + * case occurs. */ + +static grpc_fd *fd_freelist = NULL; +static gpr_mu fd_freelist_mu; + +static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); } + +static void fd_global_shutdown(void) { + gpr_mu_lock(&fd_freelist_mu); + gpr_mu_unlock(&fd_freelist_mu); + while (fd_freelist != NULL) { + grpc_fd *fd = fd_freelist; + fd_freelist = fd_freelist->freelist_next; + gpr_free(fd); + } + gpr_mu_destroy(&fd_freelist_mu); +} + +static grpc_fd *fd_create(int fd, const char *name) { + grpc_fd *new_fd = NULL; + + gpr_mu_lock(&fd_freelist_mu); + if (fd_freelist != NULL) { + new_fd = fd_freelist; + fd_freelist = fd_freelist->freelist_next; + } + gpr_mu_unlock(&fd_freelist_mu); + + if (new_fd == NULL) { + new_fd = (grpc_fd *)gpr_malloc(sizeof(grpc_fd)); + } + + new_fd->fd = fd; + grpc_lfev_init(&new_fd->read_closure); + grpc_lfev_init(&new_fd->write_closure); + gpr_atm_no_barrier_store(&new_fd->read_notifier_pollset, (gpr_atm)NULL); + + new_fd->freelist_next = NULL; + + char *fd_name; + gpr_asprintf(&fd_name, "%s fd=%d", name, fd); + grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name); +#ifndef NDEBUG + if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) { + gpr_log(GPR_DEBUG, "FD %d %p create %s", fd, new_fd, fd_name); + } +#endif + gpr_free(fd_name); + + struct epoll_event ev; + ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET); + ev.data.ptr = new_fd; + if (epoll_ctl(g_epoll_set.epfd, EPOLL_CTL_ADD, fd, &ev) != 0) { + gpr_log(GPR_ERROR, "epoll_ctl failed: %s", strerror(errno)); + } + + return new_fd; +} + +static int fd_wrapped_fd(grpc_fd *fd) { return fd->fd; } + +/* if 'releasing_fd' is true, it means that we are going to detach the internal + * fd from grpc_fd structure (i.e which means we should not be calling + * shutdown() syscall on that fd) */ +static void fd_shutdown_internal(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_error *why, bool releasing_fd) { + if (grpc_lfev_set_shutdown(exec_ctx, &fd->read_closure, + GRPC_ERROR_REF(why))) { + if (!releasing_fd) { + shutdown(fd->fd, SHUT_RDWR); + } + grpc_lfev_set_shutdown(exec_ctx, &fd->write_closure, GRPC_ERROR_REF(why)); + } + GRPC_ERROR_UNREF(why); +} + +/* Might be called multiple times */ +static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_error *why) { + fd_shutdown_internal(exec_ctx, fd, why, false); +} + +static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure *on_done, int *release_fd, + bool already_closed, const char *reason) { + grpc_error *error = GRPC_ERROR_NONE; + bool is_release_fd = (release_fd != NULL); + + if (!grpc_lfev_is_shutdown(&fd->read_closure)) { + fd_shutdown_internal(exec_ctx, fd, + GRPC_ERROR_CREATE_FROM_COPIED_STRING(reason), + is_release_fd); + } + + /* If release_fd is not NULL, we should be relinquishing control of the file + descriptor fd->fd (but we still own the grpc_fd structure). */ + if (is_release_fd) { + *release_fd = fd->fd; + } else if (!already_closed) { + close(fd->fd); + } + + GRPC_CLOSURE_SCHED(exec_ctx, on_done, GRPC_ERROR_REF(error)); + + grpc_iomgr_unregister_object(&fd->iomgr_object); + grpc_lfev_destroy(&fd->read_closure); + grpc_lfev_destroy(&fd->write_closure); + + gpr_mu_lock(&fd_freelist_mu); + fd->freelist_next = fd_freelist; + fd_freelist = fd; + gpr_mu_unlock(&fd_freelist_mu); +} + +static grpc_pollset *fd_get_read_notifier_pollset(grpc_exec_ctx *exec_ctx, + grpc_fd *fd) { + gpr_atm notifier = gpr_atm_acq_load(&fd->read_notifier_pollset); + return (grpc_pollset *)notifier; +} + +static bool fd_is_shutdown(grpc_fd *fd) { + return grpc_lfev_is_shutdown(&fd->read_closure); +} + +static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure *closure) { + grpc_lfev_notify_on(exec_ctx, &fd->read_closure, closure, "read"); +} + +static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure *closure) { + grpc_lfev_notify_on(exec_ctx, &fd->write_closure, closure, "write"); +} + +static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_pollset *notifier) { + grpc_lfev_set_ready(exec_ctx, &fd->read_closure, "read"); + /* Use release store to match with acquire load in fd_get_read_notifier */ + gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier); +} + +static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { + grpc_lfev_set_ready(exec_ctx, &fd->write_closure, "write"); +} + +/******************************************************************************* + * Pollset Definitions + */ + +GPR_TLS_DECL(g_current_thread_pollset); +GPR_TLS_DECL(g_current_thread_worker); + +/* The designated poller */ +static gpr_atm g_active_poller; + +static pollset_neighborhood *g_neighborhoods; +static size_t g_num_neighborhoods; + +/* Return true if first in list */ +static bool worker_insert(grpc_pollset *pollset, grpc_pollset_worker *worker) { + if (pollset->root_worker == NULL) { + pollset->root_worker = worker; + worker->next = worker->prev = worker; + return true; + } else { + worker->next = pollset->root_worker; + worker->prev = worker->next->prev; + worker->next->prev = worker; + worker->prev->next = worker; + return false; + } +} + +/* Return true if last in list */ +typedef enum { EMPTIED, NEW_ROOT, REMOVED } worker_remove_result; + +static worker_remove_result worker_remove(grpc_pollset *pollset, + grpc_pollset_worker *worker) { + if (worker == pollset->root_worker) { + if (worker == worker->next) { + pollset->root_worker = NULL; + return EMPTIED; + } else { + pollset->root_worker = worker->next; + worker->prev->next = worker->next; + worker->next->prev = worker->prev; + return NEW_ROOT; + } + } else { + worker->prev->next = worker->next; + worker->next->prev = worker->prev; + return REMOVED; + } +} + +static size_t choose_neighborhood(void) { + return (size_t)gpr_cpu_current_cpu() % g_num_neighborhoods; +} + +static grpc_error *pollset_global_init(void) { + gpr_tls_init(&g_current_thread_pollset); + gpr_tls_init(&g_current_thread_worker); + gpr_atm_no_barrier_store(&g_active_poller, 0); + global_wakeup_fd.read_fd = -1; + grpc_error *err = grpc_wakeup_fd_init(&global_wakeup_fd); + if (err != GRPC_ERROR_NONE) return err; + struct epoll_event ev; + ev.events = (uint32_t)(EPOLLIN | EPOLLET); + ev.data.ptr = &global_wakeup_fd; + if (epoll_ctl(g_epoll_set.epfd, EPOLL_CTL_ADD, global_wakeup_fd.read_fd, + &ev) != 0) { + return GRPC_OS_ERROR(errno, "epoll_ctl"); + } + g_num_neighborhoods = GPR_CLAMP(gpr_cpu_num_cores(), 1, MAX_NEIGHBORHOODS); + g_neighborhoods = (pollset_neighborhood *)gpr_zalloc( + sizeof(*g_neighborhoods) * g_num_neighborhoods); + for (size_t i = 0; i < g_num_neighborhoods; i++) { + gpr_mu_init(&g_neighborhoods[i].mu); + } + return GRPC_ERROR_NONE; +} + +static void pollset_global_shutdown(void) { + gpr_tls_destroy(&g_current_thread_pollset); + gpr_tls_destroy(&g_current_thread_worker); + if (global_wakeup_fd.read_fd != -1) grpc_wakeup_fd_destroy(&global_wakeup_fd); + for (size_t i = 0; i < g_num_neighborhoods; i++) { + gpr_mu_destroy(&g_neighborhoods[i].mu); + } + gpr_free(g_neighborhoods); +} + +static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { + gpr_mu_init(&pollset->mu); + *mu = &pollset->mu; + pollset->neighborhood = &g_neighborhoods[choose_neighborhood()]; + pollset->reassigning_neighborhood = false; + pollset->root_worker = NULL; + pollset->kicked_without_poller = false; + pollset->seen_inactive = true; + pollset->shutting_down = false; + pollset->shutdown_closure = NULL; + pollset->begin_refs = 0; + pollset->next = pollset->prev = NULL; +} + +static void pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) { + gpr_mu_lock(&pollset->mu); + if (!pollset->seen_inactive) { + pollset_neighborhood *neighborhood = pollset->neighborhood; + gpr_mu_unlock(&pollset->mu); + retry_lock_neighborhood: + gpr_mu_lock(&neighborhood->mu); + gpr_mu_lock(&pollset->mu); + if (!pollset->seen_inactive) { + if (pollset->neighborhood != neighborhood) { + gpr_mu_unlock(&neighborhood->mu); + neighborhood = pollset->neighborhood; + gpr_mu_unlock(&pollset->mu); + goto retry_lock_neighborhood; + } + pollset->prev->next = pollset->next; + pollset->next->prev = pollset->prev; + if (pollset == pollset->neighborhood->active_root) { + pollset->neighborhood->active_root = + pollset->next == pollset ? NULL : pollset->next; + } + } + gpr_mu_unlock(&pollset->neighborhood->mu); + } + gpr_mu_unlock(&pollset->mu); + gpr_mu_destroy(&pollset->mu); +} + +static grpc_error *pollset_kick_all(grpc_exec_ctx *exec_ctx, + grpc_pollset *pollset) { + GPR_TIMER_BEGIN("pollset_kick_all", 0); + grpc_error *error = GRPC_ERROR_NONE; + if (pollset->root_worker != NULL) { + grpc_pollset_worker *worker = pollset->root_worker; + do { + GRPC_STATS_INC_POLLSET_KICK(exec_ctx); + switch (worker->state) { + case KICKED: + GRPC_STATS_INC_POLLSET_KICKED_AGAIN(exec_ctx); + break; + case UNKICKED: + SET_KICK_STATE(worker, KICKED); + if (worker->initialized_cv) { + GRPC_STATS_INC_POLLSET_KICK_WAKEUP_CV(exec_ctx); + gpr_cv_signal(&worker->cv); + } + break; + case DESIGNATED_POLLER: + GRPC_STATS_INC_POLLSET_KICK_WAKEUP_FD(exec_ctx); + SET_KICK_STATE(worker, KICKED); + append_error(&error, grpc_wakeup_fd_wakeup(&global_wakeup_fd), + "pollset_kick_all"); + break; + } + + worker = worker->next; + } while (worker != pollset->root_worker); + } + // TODO: sreek. Check if we need to set 'kicked_without_poller' to true here + // in the else case + GPR_TIMER_END("pollset_kick_all", 0); + return error; +} + +static void pollset_maybe_finish_shutdown(grpc_exec_ctx *exec_ctx, + grpc_pollset *pollset) { + if (pollset->shutdown_closure != NULL && pollset->root_worker == NULL && + pollset->begin_refs == 0) { + GPR_TIMER_MARK("pollset_finish_shutdown", 0); + GRPC_CLOSURE_SCHED(exec_ctx, pollset->shutdown_closure, GRPC_ERROR_NONE); + pollset->shutdown_closure = NULL; + } +} + +static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_closure *closure) { + GPR_TIMER_BEGIN("pollset_shutdown", 0); + GPR_ASSERT(pollset->shutdown_closure == NULL); + GPR_ASSERT(!pollset->shutting_down); + pollset->shutdown_closure = closure; + pollset->shutting_down = true; + GRPC_LOG_IF_ERROR("pollset_shutdown", pollset_kick_all(exec_ctx, pollset)); + pollset_maybe_finish_shutdown(exec_ctx, pollset); + GPR_TIMER_END("pollset_shutdown", 0); +} + +static int poll_deadline_to_millis_timeout(gpr_timespec deadline, + gpr_timespec now) { + gpr_timespec timeout; + if (gpr_time_cmp(deadline, gpr_inf_future(deadline.clock_type)) == 0) { + return -1; + } + + if (gpr_time_cmp(deadline, now) <= 0) { + return 0; + } + + static const gpr_timespec round_up = { + 0, /* tv_sec */ + GPR_NS_PER_MS - 1, /* tv_nsec */ + GPR_TIMESPAN /* clock_type */ + }; + timeout = gpr_time_sub(deadline, now); + int millis = gpr_time_to_millis(gpr_time_add(timeout, round_up)); + return millis >= 1 ? millis : 1; +} + +/* Process the epoll events found by do_epoll_wait() function. + - g_epoll_set.cursor points to the index of the first event to be processed + - This function then processes up-to MAX_EPOLL_EVENTS_PER_ITERATION and + updates the g_epoll_set.cursor + + NOTE ON SYNCRHONIZATION: Similar to do_epoll_wait(), this function is only + called by g_active_poller thread. So there is no need for synchronization + when accessing fields in g_epoll_set */ +static grpc_error *process_epoll_events(grpc_exec_ctx *exec_ctx, + grpc_pollset *pollset) { + static const char *err_desc = "process_events"; + grpc_error *error = GRPC_ERROR_NONE; + + GPR_TIMER_BEGIN("process_epoll_events", 0); + long num_events = gpr_atm_acq_load(&g_epoll_set.num_events); + long cursor = gpr_atm_acq_load(&g_epoll_set.cursor); + for (int idx = 0; + (idx < MAX_EPOLL_EVENTS_HANDLED_PER_ITERATION) && cursor != num_events; + idx++) { + long c = cursor++; + struct epoll_event *ev = &g_epoll_set.events[c]; + void *data_ptr = ev->data.ptr; + + if (data_ptr == &global_wakeup_fd) { + append_error(&error, grpc_wakeup_fd_consume_wakeup(&global_wakeup_fd), + err_desc); + } else { + grpc_fd *fd = (grpc_fd *)(data_ptr); + bool cancel = (ev->events & (EPOLLERR | EPOLLHUP)) != 0; + bool read_ev = (ev->events & (EPOLLIN | EPOLLPRI)) != 0; + bool write_ev = (ev->events & EPOLLOUT) != 0; + + if (read_ev || cancel) { + fd_become_readable(exec_ctx, fd, pollset); + } + + if (write_ev || cancel) { + fd_become_writable(exec_ctx, fd); + } + } + } + gpr_atm_rel_store(&g_epoll_set.cursor, cursor); + GPR_TIMER_END("process_epoll_events", 0); + return error; +} + +/* Do epoll_wait and store the events in g_epoll_set.events field. This does not + "process" any of the events yet; that is done in process_epoll_events(). + *See process_epoll_events() function for more details. + + NOTE ON SYNCHRONIZATION: At any point of time, only the g_active_poller + (i.e the designated poller thread) will be calling this function. So there is + no need for any synchronization when accesing fields in g_epoll_set */ +static grpc_error *do_epoll_wait(grpc_exec_ctx *exec_ctx, grpc_pollset *ps, + gpr_timespec now, gpr_timespec deadline) { + GPR_TIMER_BEGIN("do_epoll_wait", 0); + + int r; + int timeout = poll_deadline_to_millis_timeout(deadline, now); + if (timeout != 0) { + GRPC_SCHEDULING_START_BLOCKING_REGION; + } + do { + GRPC_STATS_INC_SYSCALL_POLL(exec_ctx); + r = epoll_wait(g_epoll_set.epfd, g_epoll_set.events, MAX_EPOLL_EVENTS, + timeout); + } while (r < 0 && errno == EINTR); + if (timeout != 0) { + GRPC_SCHEDULING_END_BLOCKING_REGION; + } + + if (r < 0) return GRPC_OS_ERROR(errno, "epoll_wait"); + + GRPC_STATS_INC_POLL_EVENTS_RETURNED(exec_ctx, r); + + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, "ps: %p poll got %d events", ps, r); + } + + gpr_atm_rel_store(&g_epoll_set.num_events, r); + gpr_atm_rel_store(&g_epoll_set.cursor, 0); + + GPR_TIMER_END("do_epoll_wait", 0); + return GRPC_ERROR_NONE; +} + +static bool begin_worker(grpc_pollset *pollset, grpc_pollset_worker *worker, + grpc_pollset_worker **worker_hdl, gpr_timespec *now, + gpr_timespec deadline) { + GPR_TIMER_BEGIN("begin_worker", 0); + if (worker_hdl != NULL) *worker_hdl = worker; + worker->initialized_cv = false; + SET_KICK_STATE(worker, UNKICKED); + worker->schedule_on_end_work = (grpc_closure_list)GRPC_CLOSURE_LIST_INIT; + pollset->begin_refs++; + + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_ERROR, "PS:%p BEGIN_STARTS:%p", pollset, worker); + } + + if (pollset->seen_inactive) { + // pollset has been observed to be inactive, we need to move back to the + // active list + bool is_reassigning = false; + if (!pollset->reassigning_neighborhood) { + is_reassigning = true; + pollset->reassigning_neighborhood = true; + pollset->neighborhood = &g_neighborhoods[choose_neighborhood()]; + } + pollset_neighborhood *neighborhood = pollset->neighborhood; + gpr_mu_unlock(&pollset->mu); + // pollset unlocked: state may change (even worker->kick_state) + retry_lock_neighborhood: + gpr_mu_lock(&neighborhood->mu); + gpr_mu_lock(&pollset->mu); + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_ERROR, "PS:%p BEGIN_REORG:%p kick_state=%s is_reassigning=%d", + pollset, worker, kick_state_string(worker->state), + is_reassigning); + } + if (pollset->seen_inactive) { + if (neighborhood != pollset->neighborhood) { + gpr_mu_unlock(&neighborhood->mu); + neighborhood = pollset->neighborhood; + gpr_mu_unlock(&pollset->mu); + goto retry_lock_neighborhood; + } + + /* In the brief time we released the pollset locks above, the worker MAY + have been kicked. In this case, the worker should get out of this + pollset ASAP and hence this should neither add the pollset to + neighborhood nor mark the pollset as active. + + On a side note, the only way a worker's kick state could have changed + at this point is if it were "kicked specifically". Since the worker has + not added itself to the pollset yet (by calling worker_insert()), it is + not visible in the "kick any" path yet */ + if (worker->state == UNKICKED) { + pollset->seen_inactive = false; + if (neighborhood->active_root == NULL) { + neighborhood->active_root = pollset->next = pollset->prev = pollset; + /* Make this the designated poller if there isn't one already */ + if (worker->state == UNKICKED && + gpr_atm_no_barrier_cas(&g_active_poller, 0, (gpr_atm)worker)) { + SET_KICK_STATE(worker, DESIGNATED_POLLER); + } + } else { + pollset->next = neighborhood->active_root; + pollset->prev = pollset->next->prev; + pollset->next->prev = pollset->prev->next = pollset; + } + } + } + if (is_reassigning) { + GPR_ASSERT(pollset->reassigning_neighborhood); + pollset->reassigning_neighborhood = false; + } + gpr_mu_unlock(&neighborhood->mu); + } + + worker_insert(pollset, worker); + pollset->begin_refs--; + if (worker->state == UNKICKED && !pollset->kicked_without_poller) { + GPR_ASSERT(gpr_atm_no_barrier_load(&g_active_poller) != (gpr_atm)worker); + worker->initialized_cv = true; + gpr_cv_init(&worker->cv); + while (worker->state == UNKICKED && !pollset->shutting_down) { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_ERROR, "PS:%p BEGIN_WAIT:%p kick_state=%s shutdown=%d", + pollset, worker, kick_state_string(worker->state), + pollset->shutting_down); + } + + if (gpr_cv_wait(&worker->cv, &pollset->mu, deadline) && + worker->state == UNKICKED) { + /* If gpr_cv_wait returns true (i.e a timeout), pretend that the worker + received a kick */ + SET_KICK_STATE(worker, KICKED); + } + } + *now = gpr_now(now->clock_type); + } + + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_ERROR, + "PS:%p BEGIN_DONE:%p kick_state=%s shutdown=%d " + "kicked_without_poller: %d", + pollset, worker, kick_state_string(worker->state), + pollset->shutting_down, pollset->kicked_without_poller); + } + + /* We release pollset lock in this function at a couple of places: + * 1. Briefly when assigning pollset to a neighborhood + * 2. When doing gpr_cv_wait() + * It is possible that 'kicked_without_poller' was set to true during (1) and + * 'shutting_down' is set to true during (1) or (2). If either of them is + * true, this worker cannot do polling */ + /* TODO(sreek): Perhaps there is a better way to handle kicked_without_poller + * case; especially when the worker is the DESIGNATED_POLLER */ + + if (pollset->kicked_without_poller) { + pollset->kicked_without_poller = false; + GPR_TIMER_END("begin_worker", 0); + return false; + } + + GPR_TIMER_END("begin_worker", 0); + return worker->state == DESIGNATED_POLLER && !pollset->shutting_down; +} + +static bool check_neighborhood_for_available_poller( + grpc_exec_ctx *exec_ctx, pollset_neighborhood *neighborhood) { + GPR_TIMER_BEGIN("check_neighborhood_for_available_poller", 0); + bool found_worker = false; + do { + grpc_pollset *inspect = neighborhood->active_root; + if (inspect == NULL) { + break; + } + gpr_mu_lock(&inspect->mu); + GPR_ASSERT(!inspect->seen_inactive); + grpc_pollset_worker *inspect_worker = inspect->root_worker; + if (inspect_worker != NULL) { + do { + switch (inspect_worker->state) { + case UNKICKED: + if (gpr_atm_no_barrier_cas(&g_active_poller, 0, + (gpr_atm)inspect_worker)) { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, " .. choose next poller to be %p", + inspect_worker); + } + SET_KICK_STATE(inspect_worker, DESIGNATED_POLLER); + if (inspect_worker->initialized_cv) { + GPR_TIMER_MARK("signal worker", 0); + GRPC_STATS_INC_POLLSET_KICK_WAKEUP_CV(exec_ctx); + gpr_cv_signal(&inspect_worker->cv); + } + } else { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, " .. beaten to choose next poller"); + } + } + // even if we didn't win the cas, there's a worker, we can stop + found_worker = true; + break; + case KICKED: + break; + case DESIGNATED_POLLER: + found_worker = true; // ok, so someone else found the worker, but + // we'll accept that + break; + } + inspect_worker = inspect_worker->next; + } while (!found_worker && inspect_worker != inspect->root_worker); + } + if (!found_worker) { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, " .. mark pollset %p inactive", inspect); + } + inspect->seen_inactive = true; + if (inspect == neighborhood->active_root) { + neighborhood->active_root = + inspect->next == inspect ? NULL : inspect->next; + } + inspect->next->prev = inspect->prev; + inspect->prev->next = inspect->next; + inspect->next = inspect->prev = NULL; + } + gpr_mu_unlock(&inspect->mu); + } while (!found_worker); + GPR_TIMER_END("check_neighborhood_for_available_poller", 0); + return found_worker; +} + +static void end_worker(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_pollset_worker *worker, + grpc_pollset_worker **worker_hdl) { + GPR_TIMER_BEGIN("end_worker", 0); + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, "PS:%p END_WORKER:%p", pollset, worker); + } + if (worker_hdl != NULL) *worker_hdl = NULL; + /* Make sure we appear kicked */ + SET_KICK_STATE(worker, KICKED); + grpc_closure_list_move(&worker->schedule_on_end_work, + &exec_ctx->closure_list); + if (gpr_atm_no_barrier_load(&g_active_poller) == (gpr_atm)worker) { + if (worker->next != worker && worker->next->state == UNKICKED) { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, " .. choose next poller to be peer %p", worker); + } + GPR_ASSERT(worker->next->initialized_cv); + gpr_atm_no_barrier_store(&g_active_poller, (gpr_atm)worker->next); + SET_KICK_STATE(worker->next, DESIGNATED_POLLER); + GRPC_STATS_INC_POLLSET_KICK_WAKEUP_CV(exec_ctx); + gpr_cv_signal(&worker->next->cv); + if (grpc_exec_ctx_has_work(exec_ctx)) { + gpr_mu_unlock(&pollset->mu); + grpc_exec_ctx_flush(exec_ctx); + gpr_mu_lock(&pollset->mu); + } + } else { + gpr_atm_no_barrier_store(&g_active_poller, 0); + size_t poller_neighborhood_idx = + (size_t)(pollset->neighborhood - g_neighborhoods); + gpr_mu_unlock(&pollset->mu); + bool found_worker = false; + bool scan_state[MAX_NEIGHBORHOODS]; + for (size_t i = 0; !found_worker && i < g_num_neighborhoods; i++) { + pollset_neighborhood *neighborhood = + &g_neighborhoods[(poller_neighborhood_idx + i) % + g_num_neighborhoods]; + if (gpr_mu_trylock(&neighborhood->mu)) { + found_worker = + check_neighborhood_for_available_poller(exec_ctx, neighborhood); + gpr_mu_unlock(&neighborhood->mu); + scan_state[i] = true; + } else { + scan_state[i] = false; + } + } + for (size_t i = 0; !found_worker && i < g_num_neighborhoods; i++) { + if (scan_state[i]) continue; + pollset_neighborhood *neighborhood = + &g_neighborhoods[(poller_neighborhood_idx + i) % + g_num_neighborhoods]; + gpr_mu_lock(&neighborhood->mu); + found_worker = + check_neighborhood_for_available_poller(exec_ctx, neighborhood); + gpr_mu_unlock(&neighborhood->mu); + } + grpc_exec_ctx_flush(exec_ctx); + gpr_mu_lock(&pollset->mu); + } + } else if (grpc_exec_ctx_has_work(exec_ctx)) { + gpr_mu_unlock(&pollset->mu); + grpc_exec_ctx_flush(exec_ctx); + gpr_mu_lock(&pollset->mu); + } + if (worker->initialized_cv) { + gpr_cv_destroy(&worker->cv); + } + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, " .. remove worker"); + } + if (EMPTIED == worker_remove(pollset, worker)) { + pollset_maybe_finish_shutdown(exec_ctx, pollset); + } + GPR_ASSERT(gpr_atm_no_barrier_load(&g_active_poller) != (gpr_atm)worker); + GPR_TIMER_END("end_worker", 0); +} + +/* pollset->po.mu lock must be held by the caller before calling this. + The function pollset_work() may temporarily release the lock (pollset->po.mu) + during the course of its execution but it will always re-acquire the lock and + ensure that it is held by the time the function returns */ +static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *ps, + grpc_pollset_worker **worker_hdl, + gpr_timespec now, gpr_timespec deadline) { + grpc_pollset_worker worker; + grpc_error *error = GRPC_ERROR_NONE; + static const char *err_desc = "pollset_work"; + GPR_TIMER_BEGIN("pollset_work", 0); + if (ps->kicked_without_poller) { + ps->kicked_without_poller = false; + GPR_TIMER_END("pollset_work", 0); + return GRPC_ERROR_NONE; + } + + if (begin_worker(ps, &worker, worker_hdl, &now, deadline)) { + gpr_tls_set(&g_current_thread_pollset, (intptr_t)ps); + gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker); + GPR_ASSERT(!ps->shutting_down); + GPR_ASSERT(!ps->seen_inactive); + + gpr_mu_unlock(&ps->mu); /* unlock */ + /* This is the designated polling thread at this point and should ideally do + polling. However, if there are unprocessed events left from a previous + call to do_epoll_wait(), skip calling epoll_wait() in this iteration and + process the pending epoll events. + + The reason for decoupling do_epoll_wait and process_epoll_events is to + better distrubute the work (i.e handling epoll events) across multiple + threads + + process_epoll_events() returns very quickly: It just queues the work on + exec_ctx but does not execute it (the actual exectution or more + accurately grpc_exec_ctx_flush() happens in end_worker() AFTER selecting + a designated poller). So we are not waiting long periods without a + designated poller */ + if (gpr_atm_acq_load(&g_epoll_set.cursor) == + gpr_atm_acq_load(&g_epoll_set.num_events)) { + append_error(&error, do_epoll_wait(exec_ctx, ps, now, deadline), + err_desc); + } + append_error(&error, process_epoll_events(exec_ctx, ps), err_desc); + + gpr_mu_lock(&ps->mu); /* lock */ + + gpr_tls_set(&g_current_thread_worker, 0); + } else { + gpr_tls_set(&g_current_thread_pollset, (intptr_t)ps); + } + end_worker(exec_ctx, ps, &worker, worker_hdl); + + gpr_tls_set(&g_current_thread_pollset, 0); + GPR_TIMER_END("pollset_work", 0); + return error; +} + +static grpc_error *pollset_kick(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_pollset_worker *specific_worker) { + GPR_TIMER_BEGIN("pollset_kick", 0); + GRPC_STATS_INC_POLLSET_KICK(exec_ctx); + grpc_error *ret_err = GRPC_ERROR_NONE; + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_strvec log; + gpr_strvec_init(&log); + char *tmp; + gpr_asprintf( + &tmp, "PS:%p KICK:%p curps=%p curworker=%p root=%p", pollset, + specific_worker, (void *)gpr_tls_get(&g_current_thread_pollset), + (void *)gpr_tls_get(&g_current_thread_worker), pollset->root_worker); + gpr_strvec_add(&log, tmp); + if (pollset->root_worker != NULL) { + gpr_asprintf(&tmp, " {kick_state=%s next=%p {kick_state=%s}}", + kick_state_string(pollset->root_worker->state), + pollset->root_worker->next, + kick_state_string(pollset->root_worker->next->state)); + gpr_strvec_add(&log, tmp); + } + if (specific_worker != NULL) { + gpr_asprintf(&tmp, " worker_kick_state=%s", + kick_state_string(specific_worker->state)); + gpr_strvec_add(&log, tmp); + } + tmp = gpr_strvec_flatten(&log, NULL); + gpr_strvec_destroy(&log); + gpr_log(GPR_ERROR, "%s", tmp); + gpr_free(tmp); + } + + if (specific_worker == NULL) { + if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)pollset) { + grpc_pollset_worker *root_worker = pollset->root_worker; + if (root_worker == NULL) { + GRPC_STATS_INC_POLLSET_KICKED_WITHOUT_POLLER(exec_ctx); + pollset->kicked_without_poller = true; + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_ERROR, " .. kicked_without_poller"); + } + goto done; + } + grpc_pollset_worker *next_worker = root_worker->next; + if (root_worker->state == KICKED) { + GRPC_STATS_INC_POLLSET_KICKED_AGAIN(exec_ctx); + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_ERROR, " .. already kicked %p", root_worker); + } + SET_KICK_STATE(root_worker, KICKED); + goto done; + } else if (next_worker->state == KICKED) { + GRPC_STATS_INC_POLLSET_KICKED_AGAIN(exec_ctx); + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_ERROR, " .. already kicked %p", next_worker); + } + SET_KICK_STATE(next_worker, KICKED); + goto done; + } else if (root_worker == + next_worker && // only try and wake up a poller if + // there is no next worker + root_worker == (grpc_pollset_worker *)gpr_atm_no_barrier_load( + &g_active_poller)) { + GRPC_STATS_INC_POLLSET_KICK_WAKEUP_FD(exec_ctx); + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_ERROR, " .. kicked %p", root_worker); + } + SET_KICK_STATE(root_worker, KICKED); + ret_err = grpc_wakeup_fd_wakeup(&global_wakeup_fd); + goto done; + } else if (next_worker->state == UNKICKED) { + GRPC_STATS_INC_POLLSET_KICK_WAKEUP_CV(exec_ctx); + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_ERROR, " .. kicked %p", next_worker); + } + GPR_ASSERT(next_worker->initialized_cv); + SET_KICK_STATE(next_worker, KICKED); + gpr_cv_signal(&next_worker->cv); + goto done; + } else if (next_worker->state == DESIGNATED_POLLER) { + if (root_worker->state != DESIGNATED_POLLER) { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log( + GPR_ERROR, + " .. kicked root non-poller %p (initialized_cv=%d) (poller=%p)", + root_worker, root_worker->initialized_cv, next_worker); + } + SET_KICK_STATE(root_worker, KICKED); + if (root_worker->initialized_cv) { + GRPC_STATS_INC_POLLSET_KICK_WAKEUP_CV(exec_ctx); + gpr_cv_signal(&root_worker->cv); + } + goto done; + } else { + GRPC_STATS_INC_POLLSET_KICK_WAKEUP_FD(exec_ctx); + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_ERROR, " .. non-root poller %p (root=%p)", next_worker, + root_worker); + } + SET_KICK_STATE(next_worker, KICKED); + ret_err = grpc_wakeup_fd_wakeup(&global_wakeup_fd); + goto done; + } + } else { + GRPC_STATS_INC_POLLSET_KICKED_AGAIN(exec_ctx); + GPR_ASSERT(next_worker->state == KICKED); + SET_KICK_STATE(next_worker, KICKED); + goto done; + } + } else { + GRPC_STATS_INC_POLLSET_KICK_OWN_THREAD(exec_ctx); + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_ERROR, " .. kicked while waking up"); + } + goto done; + } + + GPR_UNREACHABLE_CODE(goto done); + } + + if (specific_worker->state == KICKED) { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_ERROR, " .. specific worker already kicked"); + } + goto done; + } else if (gpr_tls_get(&g_current_thread_worker) == + (intptr_t)specific_worker) { + GRPC_STATS_INC_POLLSET_KICK_OWN_THREAD(exec_ctx); + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_ERROR, " .. mark %p kicked", specific_worker); + } + SET_KICK_STATE(specific_worker, KICKED); + goto done; + } else if (specific_worker == + (grpc_pollset_worker *)gpr_atm_no_barrier_load(&g_active_poller)) { + GRPC_STATS_INC_POLLSET_KICK_WAKEUP_FD(exec_ctx); + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_ERROR, " .. kick active poller"); + } + SET_KICK_STATE(specific_worker, KICKED); + ret_err = grpc_wakeup_fd_wakeup(&global_wakeup_fd); + goto done; + } else if (specific_worker->initialized_cv) { + GRPC_STATS_INC_POLLSET_KICK_WAKEUP_CV(exec_ctx); + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_ERROR, " .. kick waiting worker"); + } + SET_KICK_STATE(specific_worker, KICKED); + gpr_cv_signal(&specific_worker->cv); + goto done; + } else { + GRPC_STATS_INC_POLLSET_KICKED_AGAIN(exec_ctx); + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_ERROR, " .. kick non-waiting worker"); + } + SET_KICK_STATE(specific_worker, KICKED); + goto done; + } +done: + GPR_TIMER_END("pollset_kick", 0); + return ret_err; +} + +static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_fd *fd) {} + +/******************************************************************************* + * Pollset-set Definitions + */ + +static grpc_pollset_set *pollset_set_create(void) { + return (grpc_pollset_set *)((intptr_t)0xdeafbeef); +} + +static void pollset_set_destroy(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pss) {} + +static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss, + grpc_fd *fd) {} + +static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss, + grpc_fd *fd) {} + +static void pollset_set_add_pollset(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pss, grpc_pollset *ps) {} + +static void pollset_set_del_pollset(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pss, grpc_pollset *ps) {} + +static void pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *bag, + grpc_pollset_set *item) {} + +static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *bag, + grpc_pollset_set *item) {} + +/******************************************************************************* + * Event engine binding + */ + +static void shutdown_engine(void) { + fd_global_shutdown(); + pollset_global_shutdown(); + epoll_set_shutdown(); +} + +static const grpc_event_engine_vtable vtable = { + sizeof(grpc_pollset), + + fd_create, + fd_wrapped_fd, + fd_orphan, + fd_shutdown, + fd_notify_on_read, + fd_notify_on_write, + fd_is_shutdown, + fd_get_read_notifier_pollset, + + pollset_init, + pollset_shutdown, + pollset_destroy, + pollset_work, + pollset_kick, + pollset_add_fd, + + pollset_set_create, + pollset_set_destroy, + pollset_set_add_pollset, + pollset_set_del_pollset, + pollset_set_add_pollset_set, + pollset_set_del_pollset_set, + pollset_set_add_fd, + pollset_set_del_fd, + + shutdown_engine, +}; + +/* It is possible that GLIBC has epoll but the underlying kernel doesn't. + * Create epoll_fd (epoll_set_init() takes care of that) to make sure epoll + * support is available */ +const grpc_event_engine_vtable *grpc_init_epoll1_linux(bool explicit_request) { + if (!grpc_has_wakeup_fd()) { + return NULL; + } + + if (!epoll_set_init()) { + return NULL; + } + + fd_global_init(); + + if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) { + fd_global_shutdown(); + epoll_set_shutdown(); + return NULL; + } + + return &vtable; +} + +#else /* defined(GRPC_LINUX_EPOLL) */ +#if defined(GRPC_POSIX_SOCKET) +#include "src/core/lib/iomgr/ev_posix.h" +/* If GRPC_LINUX_EPOLL is not defined, it means epoll is not available. Return + * NULL */ +const grpc_event_engine_vtable *grpc_init_epoll1_linux(bool explicit_request) { + return NULL; +} +#endif /* defined(GRPC_POSIX_SOCKET) */ +#endif /* !defined(GRPC_LINUX_EPOLL) */ diff --git a/src/core/lib/iomgr/ev_epollex_linux.c b/src/core/lib/iomgr/ev_epollex_linux.c deleted file mode 100644 index 8eb4de44d9..0000000000 --- a/src/core/lib/iomgr/ev_epollex_linux.c +++ /dev/null @@ -1,1461 +0,0 @@ -/* - * - * Copyright 2017 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -/* This polling engine is only relevant on linux kernels supporting epoll() */ -#ifdef GRPC_LINUX_EPOLL - -#include "src/core/lib/iomgr/ev_epollex_linux.h" - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "src/core/lib/debug/stats.h" -#include "src/core/lib/iomgr/ev_posix.h" -#include "src/core/lib/iomgr/iomgr_internal.h" -#include "src/core/lib/iomgr/is_epollexclusive_available.h" -#include "src/core/lib/iomgr/lockfree_event.h" -#include "src/core/lib/iomgr/sys_epoll_wrapper.h" -#include "src/core/lib/iomgr/timer.h" -#include "src/core/lib/iomgr/wakeup_fd_posix.h" -#include "src/core/lib/profiling/timers.h" -#include "src/core/lib/support/block_annotate.h" -#include "src/core/lib/support/spinlock.h" - -/******************************************************************************* - * Polling object - */ - -typedef enum { - PO_POLLING_GROUP, - PO_POLLSET_SET, - PO_POLLSET, - PO_FD, /* ordering is important: we always want to lock pollsets before fds: - this guarantees that using an fd as a pollable is safe */ - PO_EMPTY_POLLABLE, - PO_COUNT -} polling_obj_type; - -typedef struct polling_obj polling_obj; -typedef struct polling_group polling_group; - -struct polling_obj { - gpr_mu mu; - polling_obj_type type; - polling_group *group; - struct polling_obj *next; - struct polling_obj *prev; -}; - -struct polling_group { - polling_obj po; - gpr_refcount refs; -}; - -static void po_init(polling_obj *po, polling_obj_type type); -static void po_destroy(polling_obj *po); -static void po_join(grpc_exec_ctx *exec_ctx, polling_obj *a, polling_obj *b); -static int po_cmp(polling_obj *a, polling_obj *b); - -static void pg_create(grpc_exec_ctx *exec_ctx, polling_obj **initial_po, - size_t initial_po_count); -static polling_group *pg_ref(polling_group *pg); -static void pg_unref(polling_group *pg); -static void pg_merge(grpc_exec_ctx *exec_ctx, polling_group *a, - polling_group *b); -static void pg_join(grpc_exec_ctx *exec_ctx, polling_group *pg, - polling_obj *po); - -/******************************************************************************* - * pollable Declarations - */ - -typedef struct pollable { - polling_obj po; - int epfd; - grpc_wakeup_fd wakeup; - grpc_pollset_worker *root_worker; -} pollable; - -static const char *polling_obj_type_string(polling_obj_type t) { - switch (t) { - case PO_POLLING_GROUP: - return "polling_group"; - case PO_POLLSET_SET: - return "pollset_set"; - case PO_POLLSET: - return "pollset"; - case PO_FD: - return "fd"; - case PO_EMPTY_POLLABLE: - return "empty_pollable"; - case PO_COUNT: - return ""; - } - return ""; -} - -static char *pollable_desc(pollable *p) { - char *out; - gpr_asprintf(&out, "type=%s group=%p epfd=%d wakeup=%d", - polling_obj_type_string(p->po.type), p->po.group, p->epfd, - p->wakeup.read_fd); - return out; -} - -static pollable g_empty_pollable; - -static void pollable_init(pollable *p, polling_obj_type type); -static void pollable_destroy(pollable *p); -/* ensure that p->epfd, p->wakeup are initialized; p->po.mu must be held */ -static grpc_error *pollable_materialize(pollable *p); - -/******************************************************************************* - * Fd Declarations - */ - -struct grpc_fd { - pollable pollable_obj; - int fd; - /* refst format: - bit 0 : 1=Active / 0=Orphaned - bits 1-n : refcount - Ref/Unref by two to avoid altering the orphaned bit */ - gpr_atm refst; - - /* The fd is either closed or we relinquished control of it. In either - cases, this indicates that the 'fd' on this structure is no longer - valid */ - gpr_mu orphaned_mu; - bool orphaned; - - gpr_atm read_closure; - gpr_atm write_closure; - - struct grpc_fd *freelist_next; - grpc_closure *on_done_closure; - - /* The pollset that last noticed that the fd is readable. The actual type - * stored in this is (grpc_pollset *) */ - gpr_atm read_notifier_pollset; - - grpc_iomgr_object iomgr_object; -}; - -static void fd_global_init(void); -static void fd_global_shutdown(void); - -/******************************************************************************* - * Pollset Declarations - */ - -typedef struct pollset_worker_link { - grpc_pollset_worker *next; - grpc_pollset_worker *prev; -} pollset_worker_link; - -typedef enum { - PWL_POLLSET, - PWL_POLLABLE, - POLLSET_WORKER_LINK_COUNT -} pollset_worker_links; - -struct grpc_pollset_worker { - bool kicked; - bool initialized_cv; - pollset_worker_link links[POLLSET_WORKER_LINK_COUNT]; - gpr_cv cv; - grpc_pollset *pollset; - pollable *pollable_obj; -}; - -#define MAX_EPOLL_EVENTS 100 -#define MAX_EPOLL_EVENTS_HANDLED_EACH_POLL_CALL 5 - -struct grpc_pollset { - pollable pollable_obj; - pollable *current_pollable_obj; - int kick_alls_pending; - bool kicked_without_poller; - grpc_closure *shutdown_closure; - grpc_pollset_worker *root_worker; - - int event_cursor; - int event_count; - struct epoll_event events[MAX_EPOLL_EVENTS]; -}; - -/******************************************************************************* - * Pollset-set Declarations - */ -struct grpc_pollset_set { - polling_obj po; -}; - -/******************************************************************************* - * Common helpers - */ - -static bool append_error(grpc_error **composite, grpc_error *error, - const char *desc) { - if (error == GRPC_ERROR_NONE) return true; - if (*composite == GRPC_ERROR_NONE) { - *composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc); - } - *composite = grpc_error_add_child(*composite, error); - return false; -} - -/******************************************************************************* - * Fd Definitions - */ - -/* We need to keep a freelist not because of any concerns of malloc performance - * but instead so that implementations with multiple threads in (for example) - * epoll_wait deal with the race between pollset removal and incoming poll - * notifications. - * - * The problem is that the poller ultimately holds a reference to this - * object, so it is very difficult to know when is safe to free it, at least - * without some expensive synchronization. - * - * If we keep the object freelisted, in the worst case losing this race just - * becomes a spurious read notification on a reused fd. - */ - -/* The alarm system needs to be able to wakeup 'some poller' sometimes - * (specifically when a new alarm needs to be triggered earlier than the next - * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a - * case occurs. */ - -static grpc_fd *fd_freelist = NULL; -static gpr_mu fd_freelist_mu; - -#ifndef NDEBUG -#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__) -#define UNREF_BY(ec, fd, n, reason) \ - unref_by(ec, fd, n, reason, __FILE__, __LINE__) -static void ref_by(grpc_fd *fd, int n, const char *reason, const char *file, - int line) { - if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) { - gpr_log(GPR_DEBUG, - "FD %d %p ref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]", - fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst), - gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line); - } -#else -#define REF_BY(fd, n, reason) ref_by(fd, n) -#define UNREF_BY(ec, fd, n, reason) unref_by(ec, fd, n) -static void ref_by(grpc_fd *fd, int n) { -#endif - GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0); -} - -static void fd_destroy(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) { - grpc_fd *fd = (grpc_fd *)arg; - /* Add the fd to the freelist */ - grpc_iomgr_unregister_object(&fd->iomgr_object); - pollable_destroy(&fd->pollable_obj); - gpr_mu_destroy(&fd->orphaned_mu); - gpr_mu_lock(&fd_freelist_mu); - fd->freelist_next = fd_freelist; - fd_freelist = fd; - - grpc_lfev_destroy(&fd->read_closure); - grpc_lfev_destroy(&fd->write_closure); - - gpr_mu_unlock(&fd_freelist_mu); -} - -#ifndef NDEBUG -static void unref_by(grpc_exec_ctx *exec_ctx, grpc_fd *fd, int n, - const char *reason, const char *file, int line) { - if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) { - gpr_log(GPR_DEBUG, - "FD %d %p unref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]", - fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst), - gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line); - } -#else -static void unref_by(grpc_exec_ctx *exec_ctx, grpc_fd *fd, int n) { -#endif - gpr_atm old = gpr_atm_full_fetch_add(&fd->refst, -n); - if (old == n) { - GRPC_CLOSURE_SCHED(exec_ctx, GRPC_CLOSURE_CREATE(fd_destroy, fd, - grpc_schedule_on_exec_ctx), - GRPC_ERROR_NONE); - } else { - GPR_ASSERT(old > n); - } -} - -static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); } - -static void fd_global_shutdown(void) { - gpr_mu_lock(&fd_freelist_mu); - gpr_mu_unlock(&fd_freelist_mu); - while (fd_freelist != NULL) { - grpc_fd *fd = fd_freelist; - fd_freelist = fd_freelist->freelist_next; - gpr_free(fd); - } - gpr_mu_destroy(&fd_freelist_mu); -} - -static grpc_fd *fd_create(int fd, const char *name) { - grpc_fd *new_fd = NULL; - - gpr_mu_lock(&fd_freelist_mu); - if (fd_freelist != NULL) { - new_fd = fd_freelist; - fd_freelist = fd_freelist->freelist_next; - } - gpr_mu_unlock(&fd_freelist_mu); - - if (new_fd == NULL) { - new_fd = (grpc_fd *)gpr_malloc(sizeof(grpc_fd)); - } - - pollable_init(&new_fd->pollable_obj, PO_FD); - - gpr_atm_rel_store(&new_fd->refst, (gpr_atm)1); - new_fd->fd = fd; - gpr_mu_init(&new_fd->orphaned_mu); - new_fd->orphaned = false; - grpc_lfev_init(&new_fd->read_closure); - grpc_lfev_init(&new_fd->write_closure); - gpr_atm_no_barrier_store(&new_fd->read_notifier_pollset, (gpr_atm)NULL); - - new_fd->freelist_next = NULL; - new_fd->on_done_closure = NULL; - - char *fd_name; - gpr_asprintf(&fd_name, "%s fd=%d", name, fd); - grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name); -#ifndef NDEBUG - if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) { - gpr_log(GPR_DEBUG, "FD %d %p create %s", fd, new_fd, fd_name); - } -#endif - gpr_free(fd_name); - return new_fd; -} - -static int fd_wrapped_fd(grpc_fd *fd) { - int ret_fd = -1; - gpr_mu_lock(&fd->orphaned_mu); - if (!fd->orphaned) { - ret_fd = fd->fd; - } - gpr_mu_unlock(&fd->orphaned_mu); - - return ret_fd; -} - -static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure *on_done, int *release_fd, - bool already_closed, const char *reason) { - bool is_fd_closed = already_closed; - grpc_error *error = GRPC_ERROR_NONE; - - gpr_mu_lock(&fd->pollable_obj.po.mu); - gpr_mu_lock(&fd->orphaned_mu); - fd->on_done_closure = on_done; - - /* If release_fd is not NULL, we should be relinquishing control of the file - descriptor fd->fd (but we still own the grpc_fd structure). */ - if (release_fd != NULL) { - *release_fd = fd->fd; - } else if (!is_fd_closed) { - close(fd->fd); - is_fd_closed = true; - } - - fd->orphaned = true; - - if (!is_fd_closed) { - gpr_log(GPR_DEBUG, "TODO: handle fd removal?"); - } - - /* Remove the active status but keep referenced. We want this grpc_fd struct - to be alive (and not added to freelist) until the end of this function */ - REF_BY(fd, 1, reason); - - GRPC_CLOSURE_SCHED(exec_ctx, fd->on_done_closure, GRPC_ERROR_REF(error)); - - gpr_mu_unlock(&fd->orphaned_mu); - gpr_mu_unlock(&fd->pollable_obj.po.mu); - UNREF_BY(exec_ctx, fd, 2, reason); /* Drop the reference */ - GRPC_LOG_IF_ERROR("fd_orphan", GRPC_ERROR_REF(error)); - GRPC_ERROR_UNREF(error); -} - -static grpc_pollset *fd_get_read_notifier_pollset(grpc_exec_ctx *exec_ctx, - grpc_fd *fd) { - gpr_atm notifier = gpr_atm_acq_load(&fd->read_notifier_pollset); - return (grpc_pollset *)notifier; -} - -static bool fd_is_shutdown(grpc_fd *fd) { - return grpc_lfev_is_shutdown(&fd->read_closure); -} - -/* Might be called multiple times */ -static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_error *why) { - if (grpc_lfev_set_shutdown(exec_ctx, &fd->read_closure, - GRPC_ERROR_REF(why))) { - shutdown(fd->fd, SHUT_RDWR); - grpc_lfev_set_shutdown(exec_ctx, &fd->write_closure, GRPC_ERROR_REF(why)); - } - GRPC_ERROR_UNREF(why); -} - -static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure *closure) { - grpc_lfev_notify_on(exec_ctx, &fd->read_closure, closure, "read"); -} - -static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure *closure) { - grpc_lfev_notify_on(exec_ctx, &fd->write_closure, closure, "write"); -} - -/******************************************************************************* - * Pollable Definitions - */ - -static void pollable_init(pollable *p, polling_obj_type type) { - po_init(&p->po, type); - p->root_worker = NULL; - p->epfd = -1; -} - -static void pollable_destroy(pollable *p) { - po_destroy(&p->po); - if (p->epfd != -1) { - close(p->epfd); - grpc_wakeup_fd_destroy(&p->wakeup); - } -} - -/* ensure that p->epfd, p->wakeup are initialized; p->po.mu must be held */ -static grpc_error *pollable_materialize(pollable *p) { - if (p->epfd == -1) { - int new_epfd = epoll_create1(EPOLL_CLOEXEC); - if (new_epfd < 0) { - return GRPC_OS_ERROR(errno, "epoll_create1"); - } - grpc_error *err = grpc_wakeup_fd_init(&p->wakeup); - if (err != GRPC_ERROR_NONE) { - close(new_epfd); - return err; - } - struct epoll_event ev; - ev.events = (uint32_t)(EPOLLIN | EPOLLET); - ev.data.ptr = (void *)(1 | (intptr_t)&p->wakeup); - if (epoll_ctl(new_epfd, EPOLL_CTL_ADD, p->wakeup.read_fd, &ev) != 0) { - err = GRPC_OS_ERROR(errno, "epoll_ctl"); - close(new_epfd); - grpc_wakeup_fd_destroy(&p->wakeup); - return err; - } - - p->epfd = new_epfd; - } - return GRPC_ERROR_NONE; -} - -/* pollable must be materialized */ -static grpc_error *pollable_add_fd(pollable *p, grpc_fd *fd) { - grpc_error *error = GRPC_ERROR_NONE; - static const char *err_desc = "pollable_add_fd"; - const int epfd = p->epfd; - GPR_ASSERT(epfd != -1); - - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, "add fd %p (%d) to pollable %p", fd, fd->fd, p); - } - - gpr_mu_lock(&fd->orphaned_mu); - if (fd->orphaned) { - gpr_mu_unlock(&fd->orphaned_mu); - return GRPC_ERROR_NONE; - } - struct epoll_event ev_fd; - ev_fd.events = (uint32_t)(EPOLLET | EPOLLIN | EPOLLOUT | EPOLLEXCLUSIVE); - ev_fd.data.ptr = fd; - if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd->fd, &ev_fd) != 0) { - switch (errno) { - case EEXIST: - break; - default: - append_error(&error, GRPC_OS_ERROR(errno, "epoll_ctl"), err_desc); - } - } - gpr_mu_unlock(&fd->orphaned_mu); - - return error; -} - -/******************************************************************************* - * Pollset Definitions - */ - -GPR_TLS_DECL(g_current_thread_pollset); -GPR_TLS_DECL(g_current_thread_worker); - -/* Global state management */ -static grpc_error *pollset_global_init(void) { - gpr_tls_init(&g_current_thread_pollset); - gpr_tls_init(&g_current_thread_worker); - pollable_init(&g_empty_pollable, PO_EMPTY_POLLABLE); - return GRPC_ERROR_NONE; -} - -static void pollset_global_shutdown(void) { - pollable_destroy(&g_empty_pollable); - gpr_tls_destroy(&g_current_thread_pollset); - gpr_tls_destroy(&g_current_thread_worker); -} - -static void pollset_maybe_finish_shutdown(grpc_exec_ctx *exec_ctx, - grpc_pollset *pollset) { - if (pollset->shutdown_closure != NULL && pollset->root_worker == NULL && - pollset->kick_alls_pending == 0) { - GRPC_CLOSURE_SCHED(exec_ctx, pollset->shutdown_closure, GRPC_ERROR_NONE); - pollset->shutdown_closure = NULL; - } -} - -static void do_kick_all(grpc_exec_ctx *exec_ctx, void *arg, - grpc_error *error_unused) { - grpc_error *error = GRPC_ERROR_NONE; - grpc_pollset *pollset = (grpc_pollset *)arg; - gpr_mu_lock(&pollset->pollable_obj.po.mu); - if (pollset->root_worker != NULL) { - grpc_pollset_worker *worker = pollset->root_worker; - do { - GRPC_STATS_INC_POLLSET_KICK(exec_ctx); - if (worker->pollable_obj != &pollset->pollable_obj) { - gpr_mu_lock(&worker->pollable_obj->po.mu); - } - if (worker->initialized_cv && worker != pollset->root_worker) { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, "PS:%p kickall_via_cv %p (pollable %p vs %p)", - pollset, worker, &pollset->pollable_obj, - worker->pollable_obj); - } - worker->kicked = true; - gpr_cv_signal(&worker->cv); - } else { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, "PS:%p kickall_via_wakeup %p (pollable %p vs %p)", - pollset, worker, &pollset->pollable_obj, - worker->pollable_obj); - } - append_error(&error, - grpc_wakeup_fd_wakeup(&worker->pollable_obj->wakeup), - "pollset_shutdown"); - } - if (worker->pollable_obj != &pollset->pollable_obj) { - gpr_mu_unlock(&worker->pollable_obj->po.mu); - } - - worker = worker->links[PWL_POLLSET].next; - } while (worker != pollset->root_worker); - } - pollset->kick_alls_pending--; - pollset_maybe_finish_shutdown(exec_ctx, pollset); - gpr_mu_unlock(&pollset->pollable_obj.po.mu); - GRPC_LOG_IF_ERROR("kick_all", error); -} - -static void pollset_kick_all(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) { - pollset->kick_alls_pending++; - GRPC_CLOSURE_SCHED(exec_ctx, GRPC_CLOSURE_CREATE(do_kick_all, pollset, - grpc_schedule_on_exec_ctx), - GRPC_ERROR_NONE); -} - -static grpc_error *pollset_kick_inner(grpc_pollset *pollset, pollable *p, - grpc_pollset_worker *specific_worker) { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, - "PS:%p kick %p tls_pollset=%p tls_worker=%p " - "root_worker=(pollset:%p pollable:%p)", - p, specific_worker, (void *)gpr_tls_get(&g_current_thread_pollset), - (void *)gpr_tls_get(&g_current_thread_worker), pollset->root_worker, - p->root_worker); - } - if (specific_worker == NULL) { - if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)pollset) { - if (pollset->root_worker == NULL) { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, "PS:%p kicked_any_without_poller", p); - } - pollset->kicked_without_poller = true; - return GRPC_ERROR_NONE; - } else { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, "PS:%p kicked_any_via_wakeup_fd", p); - } - grpc_error *err = pollable_materialize(p); - if (err != GRPC_ERROR_NONE) return err; - return grpc_wakeup_fd_wakeup(&p->wakeup); - } - } else { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, "PS:%p kicked_any_but_awake", p); - } - return GRPC_ERROR_NONE; - } - } else if (specific_worker->kicked) { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, "PS:%p kicked_specific_but_already_kicked", p); - } - return GRPC_ERROR_NONE; - } else if (gpr_tls_get(&g_current_thread_worker) == - (intptr_t)specific_worker) { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, "PS:%p kicked_specific_but_awake", p); - } - specific_worker->kicked = true; - return GRPC_ERROR_NONE; - } else if (specific_worker == p->root_worker) { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, "PS:%p kicked_specific_via_wakeup_fd", p); - } - grpc_error *err = pollable_materialize(p); - if (err != GRPC_ERROR_NONE) return err; - specific_worker->kicked = true; - return grpc_wakeup_fd_wakeup(&p->wakeup); - } else { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, "PS:%p kicked_specific_via_cv", p); - } - specific_worker->kicked = true; - gpr_cv_signal(&specific_worker->cv); - return GRPC_ERROR_NONE; - } -} - -/* p->po.mu must be held before calling this function */ -static grpc_error *pollset_kick(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_pollset_worker *specific_worker) { - pollable *p = pollset->current_pollable_obj; - GRPC_STATS_INC_POLLSET_KICK(exec_ctx); - if (p != &pollset->pollable_obj) { - gpr_mu_lock(&p->po.mu); - } - grpc_error *error = pollset_kick_inner(pollset, p, specific_worker); - if (p != &pollset->pollable_obj) { - gpr_mu_unlock(&p->po.mu); - } - return error; -} - -static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { - pollable_init(&pollset->pollable_obj, PO_POLLSET); - pollset->current_pollable_obj = &g_empty_pollable; - pollset->kicked_without_poller = false; - pollset->shutdown_closure = NULL; - pollset->root_worker = NULL; - *mu = &pollset->pollable_obj.po.mu; -} - -/* Convert a timespec to milliseconds: - - Very small or negative poll times are clamped to zero to do a non-blocking - poll (which becomes spin polling) - - Other small values are rounded up to one millisecond - - Longer than a millisecond polls are rounded up to the next nearest - millisecond to avoid spinning - - Infinite timeouts are converted to -1 */ -static int poll_deadline_to_millis_timeout(gpr_timespec deadline, - gpr_timespec now) { - gpr_timespec timeout; - if (gpr_time_cmp(deadline, gpr_inf_future(deadline.clock_type)) == 0) { - return -1; - } - - if (gpr_time_cmp(deadline, now) <= 0) { - return 0; - } - - static const gpr_timespec round_up = { - 0, /* tv_sec */ - GPR_NS_PER_MS - 1, /* tv_nsec */ - GPR_TIMESPAN /* clock_type */ - }; - timeout = gpr_time_sub(deadline, now); - int millis = gpr_time_to_millis(gpr_time_add(timeout, round_up)); - return millis >= 1 ? millis : 1; -} - -static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_pollset *notifier) { - grpc_lfev_set_ready(exec_ctx, &fd->read_closure, "read"); - - /* Note, it is possible that fd_become_readable might be called twice with - different 'notifier's when an fd becomes readable and it is in two epoll - sets (This can happen briefly during polling island merges). In such cases - it does not really matter which notifer is set as the read_notifier_pollset - (They would both point to the same polling island anyway) */ - /* Use release store to match with acquire load in fd_get_read_notifier */ - gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier); -} - -static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { - grpc_lfev_set_ready(exec_ctx, &fd->write_closure, "write"); -} - -static grpc_error *fd_become_pollable_locked(grpc_fd *fd) { - grpc_error *error = GRPC_ERROR_NONE; - static const char *err_desc = "fd_become_pollable"; - if (append_error(&error, pollable_materialize(&fd->pollable_obj), err_desc)) { - append_error(&error, pollable_add_fd(&fd->pollable_obj, fd), err_desc); - } - return error; -} - -/* pollset->po.mu lock must be held by the caller before calling this */ -static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_closure *closure) { - GPR_ASSERT(pollset->shutdown_closure == NULL); - pollset->shutdown_closure = closure; - pollset_kick_all(exec_ctx, pollset); - pollset_maybe_finish_shutdown(exec_ctx, pollset); -} - -static bool pollset_is_pollable_fd(grpc_pollset *pollset, pollable *p) { - return p != &g_empty_pollable && p != &pollset->pollable_obj; -} - -static grpc_error *pollset_process_events(grpc_exec_ctx *exec_ctx, - grpc_pollset *pollset, bool drain) { - static const char *err_desc = "pollset_process_events"; - grpc_error *error = GRPC_ERROR_NONE; - for (int i = 0; (drain || i < MAX_EPOLL_EVENTS_HANDLED_EACH_POLL_CALL) && - pollset->event_cursor != pollset->event_count; - i++) { - int n = pollset->event_cursor++; - struct epoll_event *ev = &pollset->events[n]; - void *data_ptr = ev->data.ptr; - if (1 & (intptr_t)data_ptr) { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, "PS:%p got pollset_wakeup %p", pollset, data_ptr); - } - append_error(&error, - grpc_wakeup_fd_consume_wakeup( - (grpc_wakeup_fd *)((~(intptr_t)1) & (intptr_t)data_ptr)), - err_desc); - } else { - grpc_fd *fd = (grpc_fd *)data_ptr; - bool cancel = (ev->events & (EPOLLERR | EPOLLHUP)) != 0; - bool read_ev = (ev->events & (EPOLLIN | EPOLLPRI)) != 0; - bool write_ev = (ev->events & EPOLLOUT) != 0; - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, - "PS:%p got fd %p: cancel=%d read=%d " - "write=%d", - pollset, fd, cancel, read_ev, write_ev); - } - if (read_ev || cancel) { - fd_become_readable(exec_ctx, fd, pollset); - } - if (write_ev || cancel) { - fd_become_writable(exec_ctx, fd); - } - } - } - - return error; -} - -/* pollset_shutdown is guaranteed to be called before pollset_destroy. */ -static void pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) { - pollable_destroy(&pollset->pollable_obj); - if (pollset_is_pollable_fd(pollset, pollset->current_pollable_obj)) { - UNREF_BY(exec_ctx, (grpc_fd *)pollset->current_pollable_obj, 2, - "pollset_pollable"); - } - GRPC_LOG_IF_ERROR("pollset_process_events", - pollset_process_events(exec_ctx, pollset, true)); -} - -static grpc_error *pollset_epoll(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - pollable *p, gpr_timespec now, - gpr_timespec deadline) { - int timeout = poll_deadline_to_millis_timeout(deadline, now); - - if (GRPC_TRACER_ON(grpc_polling_trace)) { - char *desc = pollable_desc(p); - gpr_log(GPR_DEBUG, "PS:%p poll %p[%s] for %dms", pollset, p, desc, timeout); - gpr_free(desc); - } - - if (timeout != 0) { - GRPC_SCHEDULING_START_BLOCKING_REGION; - } - int r; - do { - GRPC_STATS_INC_SYSCALL_POLL(exec_ctx); - r = epoll_wait(p->epfd, pollset->events, MAX_EPOLL_EVENTS, timeout); - } while (r < 0 && errno == EINTR); - if (timeout != 0) { - GRPC_SCHEDULING_END_BLOCKING_REGION; - } - - if (r < 0) return GRPC_OS_ERROR(errno, "epoll_wait"); - - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, "PS:%p poll %p got %d events", pollset, p, r); - } - - pollset->event_cursor = 0; - pollset->event_count = r; - - return GRPC_ERROR_NONE; -} - -/* Return true if first in list */ -static bool worker_insert(grpc_pollset_worker **root, pollset_worker_links link, - grpc_pollset_worker *worker) { - if (*root == NULL) { - *root = worker; - worker->links[link].next = worker->links[link].prev = worker; - return true; - } else { - worker->links[link].next = *root; - worker->links[link].prev = worker->links[link].next->links[link].prev; - worker->links[link].next->links[link].prev = worker; - worker->links[link].prev->links[link].next = worker; - return false; - } -} - -/* Return true if last in list */ -typedef enum { EMPTIED, NEW_ROOT, REMOVED } worker_remove_result; - -static worker_remove_result worker_remove(grpc_pollset_worker **root, - pollset_worker_links link, - grpc_pollset_worker *worker) { - if (worker == *root) { - if (worker == worker->links[link].next) { - *root = NULL; - return EMPTIED; - } else { - *root = worker->links[link].next; - worker->links[link].prev->links[link].next = worker->links[link].next; - worker->links[link].next->links[link].prev = worker->links[link].prev; - return NEW_ROOT; - } - } else { - worker->links[link].prev->links[link].next = worker->links[link].next; - worker->links[link].next->links[link].prev = worker->links[link].prev; - return REMOVED; - } -} - -/* Return true if this thread should poll */ -static bool begin_worker(grpc_pollset *pollset, grpc_pollset_worker *worker, - grpc_pollset_worker **worker_hdl, gpr_timespec *now, - gpr_timespec deadline) { - bool do_poll = true; - if (worker_hdl != NULL) *worker_hdl = worker; - worker->initialized_cv = false; - worker->kicked = false; - worker->pollset = pollset; - worker->pollable_obj = pollset->current_pollable_obj; - - if (pollset_is_pollable_fd(pollset, worker->pollable_obj)) { - REF_BY((grpc_fd *)worker->pollable_obj, 2, "one_poll"); - } - - worker_insert(&pollset->root_worker, PWL_POLLSET, worker); - if (!worker_insert(&worker->pollable_obj->root_worker, PWL_POLLABLE, - worker)) { - worker->initialized_cv = true; - gpr_cv_init(&worker->cv); - if (worker->pollable_obj != &pollset->pollable_obj) { - gpr_mu_unlock(&pollset->pollable_obj.po.mu); - } - if (GRPC_TRACER_ON(grpc_polling_trace) && - worker->pollable_obj->root_worker != worker) { - gpr_log(GPR_DEBUG, "PS:%p wait %p w=%p for %dms", pollset, - worker->pollable_obj, worker, - poll_deadline_to_millis_timeout(deadline, *now)); - } - while (do_poll && worker->pollable_obj->root_worker != worker) { - if (gpr_cv_wait(&worker->cv, &worker->pollable_obj->po.mu, deadline)) { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, "PS:%p timeout_wait %p w=%p", pollset, - worker->pollable_obj, worker); - } - do_poll = false; - } else if (worker->kicked) { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, "PS:%p wakeup %p w=%p", pollset, - worker->pollable_obj, worker); - } - do_poll = false; - } else if (GRPC_TRACER_ON(grpc_polling_trace) && - worker->pollable_obj->root_worker != worker) { - gpr_log(GPR_DEBUG, "PS:%p spurious_wakeup %p w=%p", pollset, - worker->pollable_obj, worker); - } - } - if (worker->pollable_obj != &pollset->pollable_obj) { - gpr_mu_unlock(&worker->pollable_obj->po.mu); - gpr_mu_lock(&pollset->pollable_obj.po.mu); - gpr_mu_lock(&worker->pollable_obj->po.mu); - } - *now = gpr_now(now->clock_type); - } - - return do_poll && pollset->shutdown_closure == NULL && - pollset->current_pollable_obj == worker->pollable_obj; -} - -static void end_worker(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_pollset_worker *worker, - grpc_pollset_worker **worker_hdl) { - if (NEW_ROOT == - worker_remove(&worker->pollable_obj->root_worker, PWL_POLLABLE, worker)) { - gpr_cv_signal(&worker->pollable_obj->root_worker->cv); - } - if (worker->initialized_cv) { - gpr_cv_destroy(&worker->cv); - } - if (pollset_is_pollable_fd(pollset, worker->pollable_obj)) { - UNREF_BY(exec_ctx, (grpc_fd *)worker->pollable_obj, 2, "one_poll"); - } - if (EMPTIED == worker_remove(&pollset->root_worker, PWL_POLLSET, worker)) { - pollset_maybe_finish_shutdown(exec_ctx, pollset); - } -} - -/* pollset->po.mu lock must be held by the caller before calling this. - The function pollset_work() may temporarily release the lock (pollset->po.mu) - during the course of its execution but it will always re-acquire the lock and - ensure that it is held by the time the function returns */ -static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_pollset_worker **worker_hdl, - gpr_timespec now, gpr_timespec deadline) { - grpc_pollset_worker worker; - if (0 && GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, "PS:%p work hdl=%p worker=%p now=%" PRId64 - ".%09d deadline=%" PRId64 ".%09d kwp=%d root_worker=%p", - pollset, worker_hdl, &worker, now.tv_sec, now.tv_nsec, - deadline.tv_sec, deadline.tv_nsec, pollset->kicked_without_poller, - pollset->root_worker); - } - grpc_error *error = GRPC_ERROR_NONE; - static const char *err_desc = "pollset_work"; - if (pollset->kicked_without_poller) { - pollset->kicked_without_poller = false; - return GRPC_ERROR_NONE; - } - if (pollset->current_pollable_obj != &pollset->pollable_obj) { - gpr_mu_lock(&pollset->current_pollable_obj->po.mu); - } - if (begin_worker(pollset, &worker, worker_hdl, &now, deadline)) { - gpr_tls_set(&g_current_thread_pollset, (intptr_t)pollset); - gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker); - GPR_ASSERT(!pollset->shutdown_closure); - append_error(&error, pollable_materialize(worker.pollable_obj), err_desc); - if (worker.pollable_obj != &pollset->pollable_obj) { - gpr_mu_unlock(&worker.pollable_obj->po.mu); - } - gpr_mu_unlock(&pollset->pollable_obj.po.mu); - if (pollset->event_cursor == pollset->event_count) { - append_error(&error, pollset_epoll(exec_ctx, pollset, worker.pollable_obj, - now, deadline), - err_desc); - } - append_error(&error, pollset_process_events(exec_ctx, pollset, false), - err_desc); - gpr_mu_lock(&pollset->pollable_obj.po.mu); - if (worker.pollable_obj != &pollset->pollable_obj) { - gpr_mu_lock(&worker.pollable_obj->po.mu); - } - gpr_tls_set(&g_current_thread_pollset, 0); - gpr_tls_set(&g_current_thread_worker, 0); - pollset_maybe_finish_shutdown(exec_ctx, pollset); - } - end_worker(exec_ctx, pollset, &worker, worker_hdl); - if (worker.pollable_obj != &pollset->pollable_obj) { - gpr_mu_unlock(&worker.pollable_obj->po.mu); - } - if (grpc_exec_ctx_has_work(exec_ctx)) { - gpr_mu_unlock(&pollset->pollable_obj.po.mu); - grpc_exec_ctx_flush(exec_ctx); - gpr_mu_lock(&pollset->pollable_obj.po.mu); - } - return error; -} - -static void unref_fd_no_longer_poller(grpc_exec_ctx *exec_ctx, void *arg, - grpc_error *error) { - grpc_fd *fd = (grpc_fd *)arg; - UNREF_BY(exec_ctx, fd, 2, "pollset_pollable"); -} - -/* expects pollsets locked, flag whether fd is locked or not */ -static grpc_error *pollset_add_fd_locked(grpc_exec_ctx *exec_ctx, - grpc_pollset *pollset, grpc_fd *fd, - bool fd_locked) { - static const char *err_desc = "pollset_add_fd"; - grpc_error *error = GRPC_ERROR_NONE; - if (pollset->current_pollable_obj == &g_empty_pollable) { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, - "PS:%p add fd %p; transition pollable from empty to fd", pollset, - fd); - } - /* empty pollable --> single fd pollable */ - pollset_kick_all(exec_ctx, pollset); - pollset->current_pollable_obj = &fd->pollable_obj; - if (!fd_locked) gpr_mu_lock(&fd->pollable_obj.po.mu); - append_error(&error, fd_become_pollable_locked(fd), err_desc); - if (!fd_locked) gpr_mu_unlock(&fd->pollable_obj.po.mu); - REF_BY(fd, 2, "pollset_pollable"); - } else if (pollset->current_pollable_obj == &pollset->pollable_obj) { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, "PS:%p add fd %p; already multipolling", pollset, fd); - } - append_error(&error, pollable_add_fd(pollset->current_pollable_obj, fd), - err_desc); - } else if (pollset->current_pollable_obj != &fd->pollable_obj) { - grpc_fd *had_fd = (grpc_fd *)pollset->current_pollable_obj; - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, - "PS:%p add fd %p; transition pollable from fd %p to multipoller", - pollset, fd, had_fd); - } - /* Introduce a spurious completion. - If we do not, then it may be that the fd-specific epoll set consumed - a completion without being polled, leading to a missed edge going up. */ - grpc_lfev_set_ready(exec_ctx, &had_fd->read_closure, "read"); - grpc_lfev_set_ready(exec_ctx, &had_fd->write_closure, "write"); - pollset_kick_all(exec_ctx, pollset); - pollset->current_pollable_obj = &pollset->pollable_obj; - if (append_error(&error, pollable_materialize(&pollset->pollable_obj), - err_desc)) { - pollable_add_fd(&pollset->pollable_obj, had_fd); - pollable_add_fd(&pollset->pollable_obj, fd); - } - GRPC_CLOSURE_SCHED(exec_ctx, - GRPC_CLOSURE_CREATE(unref_fd_no_longer_poller, had_fd, - grpc_schedule_on_exec_ctx), - GRPC_ERROR_NONE); - } - return error; -} - -static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_fd *fd) { - gpr_mu_lock(&pollset->pollable_obj.po.mu); - grpc_error *error = pollset_add_fd_locked(exec_ctx, pollset, fd, false); - gpr_mu_unlock(&pollset->pollable_obj.po.mu); - GRPC_LOG_IF_ERROR("pollset_add_fd", error); -} - -/******************************************************************************* - * Pollset-set Definitions - */ - -static grpc_pollset_set *pollset_set_create(void) { - grpc_pollset_set *pss = (grpc_pollset_set *)gpr_zalloc(sizeof(*pss)); - po_init(&pss->po, PO_POLLSET_SET); - return pss; -} - -static void pollset_set_destroy(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pss) { - po_destroy(&pss->po); - gpr_free(pss); -} - -static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss, - grpc_fd *fd) { - po_join(exec_ctx, &pss->po, &fd->pollable_obj.po); -} - -static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss, - grpc_fd *fd) {} - -static void pollset_set_add_pollset(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pss, grpc_pollset *ps) { - po_join(exec_ctx, &pss->po, &ps->pollable_obj.po); -} - -static void pollset_set_del_pollset(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pss, grpc_pollset *ps) {} - -static void pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *bag, - grpc_pollset_set *item) { - po_join(exec_ctx, &bag->po, &item->po); -} - -static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *bag, - grpc_pollset_set *item) {} - -static void po_init(polling_obj *po, polling_obj_type type) { - gpr_mu_init(&po->mu); - po->type = type; - po->group = NULL; - po->next = po; - po->prev = po; -} - -static polling_group *pg_lock_latest(polling_group *pg) { - /* assumes pg unlocked; consumes ref, returns ref */ - gpr_mu_lock(&pg->po.mu); - while (pg->po.group != NULL) { - polling_group *new_pg = pg_ref(pg->po.group); - gpr_mu_unlock(&pg->po.mu); - pg_unref(pg); - pg = new_pg; - gpr_mu_lock(&pg->po.mu); - } - return pg; -} - -static void po_destroy(polling_obj *po) { - if (po->group != NULL) { - polling_group *pg = pg_lock_latest(po->group); - po->prev->next = po->next; - po->next->prev = po->prev; - gpr_mu_unlock(&pg->po.mu); - pg_unref(pg); - } - gpr_mu_destroy(&po->mu); -} - -static polling_group *pg_ref(polling_group *pg) { - gpr_ref(&pg->refs); - return pg; -} - -static void pg_unref(polling_group *pg) { - if (gpr_unref(&pg->refs)) { - po_destroy(&pg->po); - gpr_free(pg); - } -} - -static int po_cmp(polling_obj *a, polling_obj *b) { - if (a == b) return 0; - if (a->type < b->type) return -1; - if (a->type > b->type) return 1; - if (a < b) return -1; - assert(a > b); - return 1; -} - -static void po_join(grpc_exec_ctx *exec_ctx, polling_obj *a, polling_obj *b) { - switch (po_cmp(a, b)) { - case 0: - return; - case 1: - GPR_SWAP(polling_obj *, a, b); - /* fall through */ - case -1: - gpr_mu_lock(&a->mu); - gpr_mu_lock(&b->mu); - - if (a->group == NULL) { - if (b->group == NULL) { - polling_obj *initial_po[] = {a, b}; - pg_create(exec_ctx, initial_po, GPR_ARRAY_SIZE(initial_po)); - gpr_mu_unlock(&a->mu); - gpr_mu_unlock(&b->mu); - } else { - polling_group *b_group = pg_ref(b->group); - gpr_mu_unlock(&b->mu); - gpr_mu_unlock(&a->mu); - pg_join(exec_ctx, b_group, a); - } - } else if (b->group == NULL) { - polling_group *a_group = pg_ref(a->group); - gpr_mu_unlock(&a->mu); - gpr_mu_unlock(&b->mu); - pg_join(exec_ctx, a_group, b); - } else if (a->group == b->group) { - /* nothing to do */ - gpr_mu_unlock(&a->mu); - gpr_mu_unlock(&b->mu); - } else { - polling_group *a_group = pg_ref(a->group); - polling_group *b_group = pg_ref(b->group); - gpr_mu_unlock(&a->mu); - gpr_mu_unlock(&b->mu); - pg_merge(exec_ctx, a_group, b_group); - } - } -} - -static void pg_notify(grpc_exec_ctx *exec_ctx, polling_obj *a, polling_obj *b) { - if (a->type == PO_FD && b->type == PO_POLLSET) { - pollset_add_fd_locked(exec_ctx, (grpc_pollset *)b, (grpc_fd *)a, true); - } else if (a->type == PO_POLLSET && b->type == PO_FD) { - pollset_add_fd_locked(exec_ctx, (grpc_pollset *)a, (grpc_fd *)b, true); - } -} - -static void pg_broadcast(grpc_exec_ctx *exec_ctx, polling_group *from, - polling_group *to) { - for (polling_obj *a = from->po.next; a != &from->po; a = a->next) { - for (polling_obj *b = to->po.next; b != &to->po; b = b->next) { - if (po_cmp(a, b) < 0) { - gpr_mu_lock(&a->mu); - gpr_mu_lock(&b->mu); - } else { - GPR_ASSERT(po_cmp(a, b) != 0); - gpr_mu_lock(&b->mu); - gpr_mu_lock(&a->mu); - } - pg_notify(exec_ctx, a, b); - gpr_mu_unlock(&a->mu); - gpr_mu_unlock(&b->mu); - } - } -} - -static void pg_create(grpc_exec_ctx *exec_ctx, polling_obj **initial_po, - size_t initial_po_count) { - /* assumes all polling objects in initial_po are locked */ - polling_group *pg = (polling_group *)gpr_malloc(sizeof(*pg)); - po_init(&pg->po, PO_POLLING_GROUP); - gpr_ref_init(&pg->refs, (int)initial_po_count); - for (size_t i = 0; i < initial_po_count; i++) { - GPR_ASSERT(initial_po[i]->group == NULL); - initial_po[i]->group = pg; - } - for (size_t i = 1; i < initial_po_count; i++) { - initial_po[i]->prev = initial_po[i - 1]; - } - for (size_t i = 0; i < initial_po_count - 1; i++) { - initial_po[i]->next = initial_po[i + 1]; - } - initial_po[0]->prev = &pg->po; - initial_po[initial_po_count - 1]->next = &pg->po; - pg->po.next = initial_po[0]; - pg->po.prev = initial_po[initial_po_count - 1]; - for (size_t i = 1; i < initial_po_count; i++) { - for (size_t j = 0; j < i; j++) { - pg_notify(exec_ctx, initial_po[i], initial_po[j]); - } - } -} - -static void pg_join(grpc_exec_ctx *exec_ctx, polling_group *pg, - polling_obj *po) { - /* assumes neither pg nor po are locked; consumes one ref to pg */ - pg = pg_lock_latest(pg); - /* pg locked */ - for (polling_obj *existing = pg->po.next /* skip pg - it's just a stub */; - existing != &pg->po; existing = existing->next) { - if (po_cmp(po, existing) < 0) { - gpr_mu_lock(&po->mu); - gpr_mu_lock(&existing->mu); - } else { - GPR_ASSERT(po_cmp(po, existing) != 0); - gpr_mu_lock(&existing->mu); - gpr_mu_lock(&po->mu); - } - /* pg, po, existing locked */ - if (po->group != NULL) { - gpr_mu_unlock(&pg->po.mu); - polling_group *po_group = pg_ref(po->group); - gpr_mu_unlock(&po->mu); - gpr_mu_unlock(&existing->mu); - pg_merge(exec_ctx, pg, po_group); - /* early exit: polling obj picked up a group during joining: we needed - to do a full merge */ - return; - } - pg_notify(exec_ctx, po, existing); - gpr_mu_unlock(&po->mu); - gpr_mu_unlock(&existing->mu); - } - gpr_mu_lock(&po->mu); - if (po->group != NULL) { - gpr_mu_unlock(&pg->po.mu); - polling_group *po_group = pg_ref(po->group); - gpr_mu_unlock(&po->mu); - pg_merge(exec_ctx, pg, po_group); - /* early exit: polling obj picked up a group during joining: we needed - to do a full merge */ - return; - } - po->group = pg; - po->next = &pg->po; - po->prev = pg->po.prev; - po->prev->next = po->next->prev = po; - gpr_mu_unlock(&pg->po.mu); - gpr_mu_unlock(&po->mu); -} - -static void pg_merge(grpc_exec_ctx *exec_ctx, polling_group *a, - polling_group *b) { - for (;;) { - if (a == b) { - pg_unref(a); - pg_unref(b); - return; - } - if (a > b) GPR_SWAP(polling_group *, a, b); - gpr_mu_lock(&a->po.mu); - gpr_mu_lock(&b->po.mu); - if (a->po.group != NULL) { - polling_group *m2 = pg_ref(a->po.group); - gpr_mu_unlock(&a->po.mu); - gpr_mu_unlock(&b->po.mu); - pg_unref(a); - a = m2; - } else if (b->po.group != NULL) { - polling_group *m2 = pg_ref(b->po.group); - gpr_mu_unlock(&a->po.mu); - gpr_mu_unlock(&b->po.mu); - pg_unref(b); - b = m2; - } else { - break; - } - } - polling_group **unref = NULL; - size_t unref_count = 0; - size_t unref_cap = 0; - b->po.group = a; - pg_broadcast(exec_ctx, a, b); - pg_broadcast(exec_ctx, b, a); - while (b->po.next != &b->po) { - polling_obj *po = b->po.next; - gpr_mu_lock(&po->mu); - if (unref_count == unref_cap) { - unref_cap = GPR_MAX(8, 3 * unref_cap / 2); - unref = (polling_group **)gpr_realloc(unref, unref_cap * sizeof(*unref)); - } - unref[unref_count++] = po->group; - po->group = pg_ref(a); - // unlink from b - po->prev->next = po->next; - po->next->prev = po->prev; - // link to a - po->next = &a->po; - po->prev = a->po.prev; - po->next->prev = po->prev->next = po; - gpr_mu_unlock(&po->mu); - } - gpr_mu_unlock(&a->po.mu); - gpr_mu_unlock(&b->po.mu); - for (size_t i = 0; i < unref_count; i++) { - pg_unref(unref[i]); - } - gpr_free(unref); - pg_unref(b); -} - -/******************************************************************************* - * Event engine binding - */ - -static void shutdown_engine(void) { - fd_global_shutdown(); - pollset_global_shutdown(); -} - -static const grpc_event_engine_vtable vtable = { - sizeof(grpc_pollset), - - fd_create, - fd_wrapped_fd, - fd_orphan, - fd_shutdown, - fd_notify_on_read, - fd_notify_on_write, - fd_is_shutdown, - fd_get_read_notifier_pollset, - - pollset_init, - pollset_shutdown, - pollset_destroy, - pollset_work, - pollset_kick, - pollset_add_fd, - - pollset_set_create, - pollset_set_destroy, - pollset_set_add_pollset, - pollset_set_del_pollset, - pollset_set_add_pollset_set, - pollset_set_del_pollset_set, - pollset_set_add_fd, - pollset_set_del_fd, - - shutdown_engine, -}; - -const grpc_event_engine_vtable *grpc_init_epollex_linux( - bool explicitly_requested) { - if (!grpc_has_wakeup_fd()) { - return NULL; - } - - if (!grpc_is_epollexclusive_available()) { - return NULL; - } - - fd_global_init(); - - if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) { - pollset_global_shutdown(); - fd_global_shutdown(); - return NULL; - } - - return &vtable; -} - -#else /* defined(GRPC_LINUX_EPOLL) */ -#if defined(GRPC_POSIX_SOCKET) -#include "src/core/lib/iomgr/ev_posix.h" -/* If GRPC_LINUX_EPOLL is not defined, it means epoll is not available. Return - * NULL */ -const grpc_event_engine_vtable *grpc_init_epollex_linux( - bool explicitly_requested) { - return NULL; -} -#endif /* defined(GRPC_POSIX_SOCKET) */ - -#endif /* !defined(GRPC_LINUX_EPOLL) */ diff --git a/src/core/lib/iomgr/ev_epollex_linux.cc b/src/core/lib/iomgr/ev_epollex_linux.cc new file mode 100644 index 0000000000..8eb4de44d9 --- /dev/null +++ b/src/core/lib/iomgr/ev_epollex_linux.cc @@ -0,0 +1,1461 @@ +/* + * + * Copyright 2017 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +/* This polling engine is only relevant on linux kernels supporting epoll() */ +#ifdef GRPC_LINUX_EPOLL + +#include "src/core/lib/iomgr/ev_epollex_linux.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "src/core/lib/debug/stats.h" +#include "src/core/lib/iomgr/ev_posix.h" +#include "src/core/lib/iomgr/iomgr_internal.h" +#include "src/core/lib/iomgr/is_epollexclusive_available.h" +#include "src/core/lib/iomgr/lockfree_event.h" +#include "src/core/lib/iomgr/sys_epoll_wrapper.h" +#include "src/core/lib/iomgr/timer.h" +#include "src/core/lib/iomgr/wakeup_fd_posix.h" +#include "src/core/lib/profiling/timers.h" +#include "src/core/lib/support/block_annotate.h" +#include "src/core/lib/support/spinlock.h" + +/******************************************************************************* + * Polling object + */ + +typedef enum { + PO_POLLING_GROUP, + PO_POLLSET_SET, + PO_POLLSET, + PO_FD, /* ordering is important: we always want to lock pollsets before fds: + this guarantees that using an fd as a pollable is safe */ + PO_EMPTY_POLLABLE, + PO_COUNT +} polling_obj_type; + +typedef struct polling_obj polling_obj; +typedef struct polling_group polling_group; + +struct polling_obj { + gpr_mu mu; + polling_obj_type type; + polling_group *group; + struct polling_obj *next; + struct polling_obj *prev; +}; + +struct polling_group { + polling_obj po; + gpr_refcount refs; +}; + +static void po_init(polling_obj *po, polling_obj_type type); +static void po_destroy(polling_obj *po); +static void po_join(grpc_exec_ctx *exec_ctx, polling_obj *a, polling_obj *b); +static int po_cmp(polling_obj *a, polling_obj *b); + +static void pg_create(grpc_exec_ctx *exec_ctx, polling_obj **initial_po, + size_t initial_po_count); +static polling_group *pg_ref(polling_group *pg); +static void pg_unref(polling_group *pg); +static void pg_merge(grpc_exec_ctx *exec_ctx, polling_group *a, + polling_group *b); +static void pg_join(grpc_exec_ctx *exec_ctx, polling_group *pg, + polling_obj *po); + +/******************************************************************************* + * pollable Declarations + */ + +typedef struct pollable { + polling_obj po; + int epfd; + grpc_wakeup_fd wakeup; + grpc_pollset_worker *root_worker; +} pollable; + +static const char *polling_obj_type_string(polling_obj_type t) { + switch (t) { + case PO_POLLING_GROUP: + return "polling_group"; + case PO_POLLSET_SET: + return "pollset_set"; + case PO_POLLSET: + return "pollset"; + case PO_FD: + return "fd"; + case PO_EMPTY_POLLABLE: + return "empty_pollable"; + case PO_COUNT: + return ""; + } + return ""; +} + +static char *pollable_desc(pollable *p) { + char *out; + gpr_asprintf(&out, "type=%s group=%p epfd=%d wakeup=%d", + polling_obj_type_string(p->po.type), p->po.group, p->epfd, + p->wakeup.read_fd); + return out; +} + +static pollable g_empty_pollable; + +static void pollable_init(pollable *p, polling_obj_type type); +static void pollable_destroy(pollable *p); +/* ensure that p->epfd, p->wakeup are initialized; p->po.mu must be held */ +static grpc_error *pollable_materialize(pollable *p); + +/******************************************************************************* + * Fd Declarations + */ + +struct grpc_fd { + pollable pollable_obj; + int fd; + /* refst format: + bit 0 : 1=Active / 0=Orphaned + bits 1-n : refcount + Ref/Unref by two to avoid altering the orphaned bit */ + gpr_atm refst; + + /* The fd is either closed or we relinquished control of it. In either + cases, this indicates that the 'fd' on this structure is no longer + valid */ + gpr_mu orphaned_mu; + bool orphaned; + + gpr_atm read_closure; + gpr_atm write_closure; + + struct grpc_fd *freelist_next; + grpc_closure *on_done_closure; + + /* The pollset that last noticed that the fd is readable. The actual type + * stored in this is (grpc_pollset *) */ + gpr_atm read_notifier_pollset; + + grpc_iomgr_object iomgr_object; +}; + +static void fd_global_init(void); +static void fd_global_shutdown(void); + +/******************************************************************************* + * Pollset Declarations + */ + +typedef struct pollset_worker_link { + grpc_pollset_worker *next; + grpc_pollset_worker *prev; +} pollset_worker_link; + +typedef enum { + PWL_POLLSET, + PWL_POLLABLE, + POLLSET_WORKER_LINK_COUNT +} pollset_worker_links; + +struct grpc_pollset_worker { + bool kicked; + bool initialized_cv; + pollset_worker_link links[POLLSET_WORKER_LINK_COUNT]; + gpr_cv cv; + grpc_pollset *pollset; + pollable *pollable_obj; +}; + +#define MAX_EPOLL_EVENTS 100 +#define MAX_EPOLL_EVENTS_HANDLED_EACH_POLL_CALL 5 + +struct grpc_pollset { + pollable pollable_obj; + pollable *current_pollable_obj; + int kick_alls_pending; + bool kicked_without_poller; + grpc_closure *shutdown_closure; + grpc_pollset_worker *root_worker; + + int event_cursor; + int event_count; + struct epoll_event events[MAX_EPOLL_EVENTS]; +}; + +/******************************************************************************* + * Pollset-set Declarations + */ +struct grpc_pollset_set { + polling_obj po; +}; + +/******************************************************************************* + * Common helpers + */ + +static bool append_error(grpc_error **composite, grpc_error *error, + const char *desc) { + if (error == GRPC_ERROR_NONE) return true; + if (*composite == GRPC_ERROR_NONE) { + *composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc); + } + *composite = grpc_error_add_child(*composite, error); + return false; +} + +/******************************************************************************* + * Fd Definitions + */ + +/* We need to keep a freelist not because of any concerns of malloc performance + * but instead so that implementations with multiple threads in (for example) + * epoll_wait deal with the race between pollset removal and incoming poll + * notifications. + * + * The problem is that the poller ultimately holds a reference to this + * object, so it is very difficult to know when is safe to free it, at least + * without some expensive synchronization. + * + * If we keep the object freelisted, in the worst case losing this race just + * becomes a spurious read notification on a reused fd. + */ + +/* The alarm system needs to be able to wakeup 'some poller' sometimes + * (specifically when a new alarm needs to be triggered earlier than the next + * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a + * case occurs. */ + +static grpc_fd *fd_freelist = NULL; +static gpr_mu fd_freelist_mu; + +#ifndef NDEBUG +#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__) +#define UNREF_BY(ec, fd, n, reason) \ + unref_by(ec, fd, n, reason, __FILE__, __LINE__) +static void ref_by(grpc_fd *fd, int n, const char *reason, const char *file, + int line) { + if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) { + gpr_log(GPR_DEBUG, + "FD %d %p ref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]", + fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst), + gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line); + } +#else +#define REF_BY(fd, n, reason) ref_by(fd, n) +#define UNREF_BY(ec, fd, n, reason) unref_by(ec, fd, n) +static void ref_by(grpc_fd *fd, int n) { +#endif + GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0); +} + +static void fd_destroy(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) { + grpc_fd *fd = (grpc_fd *)arg; + /* Add the fd to the freelist */ + grpc_iomgr_unregister_object(&fd->iomgr_object); + pollable_destroy(&fd->pollable_obj); + gpr_mu_destroy(&fd->orphaned_mu); + gpr_mu_lock(&fd_freelist_mu); + fd->freelist_next = fd_freelist; + fd_freelist = fd; + + grpc_lfev_destroy(&fd->read_closure); + grpc_lfev_destroy(&fd->write_closure); + + gpr_mu_unlock(&fd_freelist_mu); +} + +#ifndef NDEBUG +static void unref_by(grpc_exec_ctx *exec_ctx, grpc_fd *fd, int n, + const char *reason, const char *file, int line) { + if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) { + gpr_log(GPR_DEBUG, + "FD %d %p unref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]", + fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst), + gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line); + } +#else +static void unref_by(grpc_exec_ctx *exec_ctx, grpc_fd *fd, int n) { +#endif + gpr_atm old = gpr_atm_full_fetch_add(&fd->refst, -n); + if (old == n) { + GRPC_CLOSURE_SCHED(exec_ctx, GRPC_CLOSURE_CREATE(fd_destroy, fd, + grpc_schedule_on_exec_ctx), + GRPC_ERROR_NONE); + } else { + GPR_ASSERT(old > n); + } +} + +static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); } + +static void fd_global_shutdown(void) { + gpr_mu_lock(&fd_freelist_mu); + gpr_mu_unlock(&fd_freelist_mu); + while (fd_freelist != NULL) { + grpc_fd *fd = fd_freelist; + fd_freelist = fd_freelist->freelist_next; + gpr_free(fd); + } + gpr_mu_destroy(&fd_freelist_mu); +} + +static grpc_fd *fd_create(int fd, const char *name) { + grpc_fd *new_fd = NULL; + + gpr_mu_lock(&fd_freelist_mu); + if (fd_freelist != NULL) { + new_fd = fd_freelist; + fd_freelist = fd_freelist->freelist_next; + } + gpr_mu_unlock(&fd_freelist_mu); + + if (new_fd == NULL) { + new_fd = (grpc_fd *)gpr_malloc(sizeof(grpc_fd)); + } + + pollable_init(&new_fd->pollable_obj, PO_FD); + + gpr_atm_rel_store(&new_fd->refst, (gpr_atm)1); + new_fd->fd = fd; + gpr_mu_init(&new_fd->orphaned_mu); + new_fd->orphaned = false; + grpc_lfev_init(&new_fd->read_closure); + grpc_lfev_init(&new_fd->write_closure); + gpr_atm_no_barrier_store(&new_fd->read_notifier_pollset, (gpr_atm)NULL); + + new_fd->freelist_next = NULL; + new_fd->on_done_closure = NULL; + + char *fd_name; + gpr_asprintf(&fd_name, "%s fd=%d", name, fd); + grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name); +#ifndef NDEBUG + if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) { + gpr_log(GPR_DEBUG, "FD %d %p create %s", fd, new_fd, fd_name); + } +#endif + gpr_free(fd_name); + return new_fd; +} + +static int fd_wrapped_fd(grpc_fd *fd) { + int ret_fd = -1; + gpr_mu_lock(&fd->orphaned_mu); + if (!fd->orphaned) { + ret_fd = fd->fd; + } + gpr_mu_unlock(&fd->orphaned_mu); + + return ret_fd; +} + +static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure *on_done, int *release_fd, + bool already_closed, const char *reason) { + bool is_fd_closed = already_closed; + grpc_error *error = GRPC_ERROR_NONE; + + gpr_mu_lock(&fd->pollable_obj.po.mu); + gpr_mu_lock(&fd->orphaned_mu); + fd->on_done_closure = on_done; + + /* If release_fd is not NULL, we should be relinquishing control of the file + descriptor fd->fd (but we still own the grpc_fd structure). */ + if (release_fd != NULL) { + *release_fd = fd->fd; + } else if (!is_fd_closed) { + close(fd->fd); + is_fd_closed = true; + } + + fd->orphaned = true; + + if (!is_fd_closed) { + gpr_log(GPR_DEBUG, "TODO: handle fd removal?"); + } + + /* Remove the active status but keep referenced. We want this grpc_fd struct + to be alive (and not added to freelist) until the end of this function */ + REF_BY(fd, 1, reason); + + GRPC_CLOSURE_SCHED(exec_ctx, fd->on_done_closure, GRPC_ERROR_REF(error)); + + gpr_mu_unlock(&fd->orphaned_mu); + gpr_mu_unlock(&fd->pollable_obj.po.mu); + UNREF_BY(exec_ctx, fd, 2, reason); /* Drop the reference */ + GRPC_LOG_IF_ERROR("fd_orphan", GRPC_ERROR_REF(error)); + GRPC_ERROR_UNREF(error); +} + +static grpc_pollset *fd_get_read_notifier_pollset(grpc_exec_ctx *exec_ctx, + grpc_fd *fd) { + gpr_atm notifier = gpr_atm_acq_load(&fd->read_notifier_pollset); + return (grpc_pollset *)notifier; +} + +static bool fd_is_shutdown(grpc_fd *fd) { + return grpc_lfev_is_shutdown(&fd->read_closure); +} + +/* Might be called multiple times */ +static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_error *why) { + if (grpc_lfev_set_shutdown(exec_ctx, &fd->read_closure, + GRPC_ERROR_REF(why))) { + shutdown(fd->fd, SHUT_RDWR); + grpc_lfev_set_shutdown(exec_ctx, &fd->write_closure, GRPC_ERROR_REF(why)); + } + GRPC_ERROR_UNREF(why); +} + +static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure *closure) { + grpc_lfev_notify_on(exec_ctx, &fd->read_closure, closure, "read"); +} + +static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure *closure) { + grpc_lfev_notify_on(exec_ctx, &fd->write_closure, closure, "write"); +} + +/******************************************************************************* + * Pollable Definitions + */ + +static void pollable_init(pollable *p, polling_obj_type type) { + po_init(&p->po, type); + p->root_worker = NULL; + p->epfd = -1; +} + +static void pollable_destroy(pollable *p) { + po_destroy(&p->po); + if (p->epfd != -1) { + close(p->epfd); + grpc_wakeup_fd_destroy(&p->wakeup); + } +} + +/* ensure that p->epfd, p->wakeup are initialized; p->po.mu must be held */ +static grpc_error *pollable_materialize(pollable *p) { + if (p->epfd == -1) { + int new_epfd = epoll_create1(EPOLL_CLOEXEC); + if (new_epfd < 0) { + return GRPC_OS_ERROR(errno, "epoll_create1"); + } + grpc_error *err = grpc_wakeup_fd_init(&p->wakeup); + if (err != GRPC_ERROR_NONE) { + close(new_epfd); + return err; + } + struct epoll_event ev; + ev.events = (uint32_t)(EPOLLIN | EPOLLET); + ev.data.ptr = (void *)(1 | (intptr_t)&p->wakeup); + if (epoll_ctl(new_epfd, EPOLL_CTL_ADD, p->wakeup.read_fd, &ev) != 0) { + err = GRPC_OS_ERROR(errno, "epoll_ctl"); + close(new_epfd); + grpc_wakeup_fd_destroy(&p->wakeup); + return err; + } + + p->epfd = new_epfd; + } + return GRPC_ERROR_NONE; +} + +/* pollable must be materialized */ +static grpc_error *pollable_add_fd(pollable *p, grpc_fd *fd) { + grpc_error *error = GRPC_ERROR_NONE; + static const char *err_desc = "pollable_add_fd"; + const int epfd = p->epfd; + GPR_ASSERT(epfd != -1); + + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, "add fd %p (%d) to pollable %p", fd, fd->fd, p); + } + + gpr_mu_lock(&fd->orphaned_mu); + if (fd->orphaned) { + gpr_mu_unlock(&fd->orphaned_mu); + return GRPC_ERROR_NONE; + } + struct epoll_event ev_fd; + ev_fd.events = (uint32_t)(EPOLLET | EPOLLIN | EPOLLOUT | EPOLLEXCLUSIVE); + ev_fd.data.ptr = fd; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd->fd, &ev_fd) != 0) { + switch (errno) { + case EEXIST: + break; + default: + append_error(&error, GRPC_OS_ERROR(errno, "epoll_ctl"), err_desc); + } + } + gpr_mu_unlock(&fd->orphaned_mu); + + return error; +} + +/******************************************************************************* + * Pollset Definitions + */ + +GPR_TLS_DECL(g_current_thread_pollset); +GPR_TLS_DECL(g_current_thread_worker); + +/* Global state management */ +static grpc_error *pollset_global_init(void) { + gpr_tls_init(&g_current_thread_pollset); + gpr_tls_init(&g_current_thread_worker); + pollable_init(&g_empty_pollable, PO_EMPTY_POLLABLE); + return GRPC_ERROR_NONE; +} + +static void pollset_global_shutdown(void) { + pollable_destroy(&g_empty_pollable); + gpr_tls_destroy(&g_current_thread_pollset); + gpr_tls_destroy(&g_current_thread_worker); +} + +static void pollset_maybe_finish_shutdown(grpc_exec_ctx *exec_ctx, + grpc_pollset *pollset) { + if (pollset->shutdown_closure != NULL && pollset->root_worker == NULL && + pollset->kick_alls_pending == 0) { + GRPC_CLOSURE_SCHED(exec_ctx, pollset->shutdown_closure, GRPC_ERROR_NONE); + pollset->shutdown_closure = NULL; + } +} + +static void do_kick_all(grpc_exec_ctx *exec_ctx, void *arg, + grpc_error *error_unused) { + grpc_error *error = GRPC_ERROR_NONE; + grpc_pollset *pollset = (grpc_pollset *)arg; + gpr_mu_lock(&pollset->pollable_obj.po.mu); + if (pollset->root_worker != NULL) { + grpc_pollset_worker *worker = pollset->root_worker; + do { + GRPC_STATS_INC_POLLSET_KICK(exec_ctx); + if (worker->pollable_obj != &pollset->pollable_obj) { + gpr_mu_lock(&worker->pollable_obj->po.mu); + } + if (worker->initialized_cv && worker != pollset->root_worker) { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, "PS:%p kickall_via_cv %p (pollable %p vs %p)", + pollset, worker, &pollset->pollable_obj, + worker->pollable_obj); + } + worker->kicked = true; + gpr_cv_signal(&worker->cv); + } else { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, "PS:%p kickall_via_wakeup %p (pollable %p vs %p)", + pollset, worker, &pollset->pollable_obj, + worker->pollable_obj); + } + append_error(&error, + grpc_wakeup_fd_wakeup(&worker->pollable_obj->wakeup), + "pollset_shutdown"); + } + if (worker->pollable_obj != &pollset->pollable_obj) { + gpr_mu_unlock(&worker->pollable_obj->po.mu); + } + + worker = worker->links[PWL_POLLSET].next; + } while (worker != pollset->root_worker); + } + pollset->kick_alls_pending--; + pollset_maybe_finish_shutdown(exec_ctx, pollset); + gpr_mu_unlock(&pollset->pollable_obj.po.mu); + GRPC_LOG_IF_ERROR("kick_all", error); +} + +static void pollset_kick_all(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) { + pollset->kick_alls_pending++; + GRPC_CLOSURE_SCHED(exec_ctx, GRPC_CLOSURE_CREATE(do_kick_all, pollset, + grpc_schedule_on_exec_ctx), + GRPC_ERROR_NONE); +} + +static grpc_error *pollset_kick_inner(grpc_pollset *pollset, pollable *p, + grpc_pollset_worker *specific_worker) { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, + "PS:%p kick %p tls_pollset=%p tls_worker=%p " + "root_worker=(pollset:%p pollable:%p)", + p, specific_worker, (void *)gpr_tls_get(&g_current_thread_pollset), + (void *)gpr_tls_get(&g_current_thread_worker), pollset->root_worker, + p->root_worker); + } + if (specific_worker == NULL) { + if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)pollset) { + if (pollset->root_worker == NULL) { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, "PS:%p kicked_any_without_poller", p); + } + pollset->kicked_without_poller = true; + return GRPC_ERROR_NONE; + } else { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, "PS:%p kicked_any_via_wakeup_fd", p); + } + grpc_error *err = pollable_materialize(p); + if (err != GRPC_ERROR_NONE) return err; + return grpc_wakeup_fd_wakeup(&p->wakeup); + } + } else { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, "PS:%p kicked_any_but_awake", p); + } + return GRPC_ERROR_NONE; + } + } else if (specific_worker->kicked) { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, "PS:%p kicked_specific_but_already_kicked", p); + } + return GRPC_ERROR_NONE; + } else if (gpr_tls_get(&g_current_thread_worker) == + (intptr_t)specific_worker) { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, "PS:%p kicked_specific_but_awake", p); + } + specific_worker->kicked = true; + return GRPC_ERROR_NONE; + } else if (specific_worker == p->root_worker) { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, "PS:%p kicked_specific_via_wakeup_fd", p); + } + grpc_error *err = pollable_materialize(p); + if (err != GRPC_ERROR_NONE) return err; + specific_worker->kicked = true; + return grpc_wakeup_fd_wakeup(&p->wakeup); + } else { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, "PS:%p kicked_specific_via_cv", p); + } + specific_worker->kicked = true; + gpr_cv_signal(&specific_worker->cv); + return GRPC_ERROR_NONE; + } +} + +/* p->po.mu must be held before calling this function */ +static grpc_error *pollset_kick(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_pollset_worker *specific_worker) { + pollable *p = pollset->current_pollable_obj; + GRPC_STATS_INC_POLLSET_KICK(exec_ctx); + if (p != &pollset->pollable_obj) { + gpr_mu_lock(&p->po.mu); + } + grpc_error *error = pollset_kick_inner(pollset, p, specific_worker); + if (p != &pollset->pollable_obj) { + gpr_mu_unlock(&p->po.mu); + } + return error; +} + +static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { + pollable_init(&pollset->pollable_obj, PO_POLLSET); + pollset->current_pollable_obj = &g_empty_pollable; + pollset->kicked_without_poller = false; + pollset->shutdown_closure = NULL; + pollset->root_worker = NULL; + *mu = &pollset->pollable_obj.po.mu; +} + +/* Convert a timespec to milliseconds: + - Very small or negative poll times are clamped to zero to do a non-blocking + poll (which becomes spin polling) + - Other small values are rounded up to one millisecond + - Longer than a millisecond polls are rounded up to the next nearest + millisecond to avoid spinning + - Infinite timeouts are converted to -1 */ +static int poll_deadline_to_millis_timeout(gpr_timespec deadline, + gpr_timespec now) { + gpr_timespec timeout; + if (gpr_time_cmp(deadline, gpr_inf_future(deadline.clock_type)) == 0) { + return -1; + } + + if (gpr_time_cmp(deadline, now) <= 0) { + return 0; + } + + static const gpr_timespec round_up = { + 0, /* tv_sec */ + GPR_NS_PER_MS - 1, /* tv_nsec */ + GPR_TIMESPAN /* clock_type */ + }; + timeout = gpr_time_sub(deadline, now); + int millis = gpr_time_to_millis(gpr_time_add(timeout, round_up)); + return millis >= 1 ? millis : 1; +} + +static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_pollset *notifier) { + grpc_lfev_set_ready(exec_ctx, &fd->read_closure, "read"); + + /* Note, it is possible that fd_become_readable might be called twice with + different 'notifier's when an fd becomes readable and it is in two epoll + sets (This can happen briefly during polling island merges). In such cases + it does not really matter which notifer is set as the read_notifier_pollset + (They would both point to the same polling island anyway) */ + /* Use release store to match with acquire load in fd_get_read_notifier */ + gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier); +} + +static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { + grpc_lfev_set_ready(exec_ctx, &fd->write_closure, "write"); +} + +static grpc_error *fd_become_pollable_locked(grpc_fd *fd) { + grpc_error *error = GRPC_ERROR_NONE; + static const char *err_desc = "fd_become_pollable"; + if (append_error(&error, pollable_materialize(&fd->pollable_obj), err_desc)) { + append_error(&error, pollable_add_fd(&fd->pollable_obj, fd), err_desc); + } + return error; +} + +/* pollset->po.mu lock must be held by the caller before calling this */ +static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_closure *closure) { + GPR_ASSERT(pollset->shutdown_closure == NULL); + pollset->shutdown_closure = closure; + pollset_kick_all(exec_ctx, pollset); + pollset_maybe_finish_shutdown(exec_ctx, pollset); +} + +static bool pollset_is_pollable_fd(grpc_pollset *pollset, pollable *p) { + return p != &g_empty_pollable && p != &pollset->pollable_obj; +} + +static grpc_error *pollset_process_events(grpc_exec_ctx *exec_ctx, + grpc_pollset *pollset, bool drain) { + static const char *err_desc = "pollset_process_events"; + grpc_error *error = GRPC_ERROR_NONE; + for (int i = 0; (drain || i < MAX_EPOLL_EVENTS_HANDLED_EACH_POLL_CALL) && + pollset->event_cursor != pollset->event_count; + i++) { + int n = pollset->event_cursor++; + struct epoll_event *ev = &pollset->events[n]; + void *data_ptr = ev->data.ptr; + if (1 & (intptr_t)data_ptr) { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, "PS:%p got pollset_wakeup %p", pollset, data_ptr); + } + append_error(&error, + grpc_wakeup_fd_consume_wakeup( + (grpc_wakeup_fd *)((~(intptr_t)1) & (intptr_t)data_ptr)), + err_desc); + } else { + grpc_fd *fd = (grpc_fd *)data_ptr; + bool cancel = (ev->events & (EPOLLERR | EPOLLHUP)) != 0; + bool read_ev = (ev->events & (EPOLLIN | EPOLLPRI)) != 0; + bool write_ev = (ev->events & EPOLLOUT) != 0; + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, + "PS:%p got fd %p: cancel=%d read=%d " + "write=%d", + pollset, fd, cancel, read_ev, write_ev); + } + if (read_ev || cancel) { + fd_become_readable(exec_ctx, fd, pollset); + } + if (write_ev || cancel) { + fd_become_writable(exec_ctx, fd); + } + } + } + + return error; +} + +/* pollset_shutdown is guaranteed to be called before pollset_destroy. */ +static void pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) { + pollable_destroy(&pollset->pollable_obj); + if (pollset_is_pollable_fd(pollset, pollset->current_pollable_obj)) { + UNREF_BY(exec_ctx, (grpc_fd *)pollset->current_pollable_obj, 2, + "pollset_pollable"); + } + GRPC_LOG_IF_ERROR("pollset_process_events", + pollset_process_events(exec_ctx, pollset, true)); +} + +static grpc_error *pollset_epoll(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + pollable *p, gpr_timespec now, + gpr_timespec deadline) { + int timeout = poll_deadline_to_millis_timeout(deadline, now); + + if (GRPC_TRACER_ON(grpc_polling_trace)) { + char *desc = pollable_desc(p); + gpr_log(GPR_DEBUG, "PS:%p poll %p[%s] for %dms", pollset, p, desc, timeout); + gpr_free(desc); + } + + if (timeout != 0) { + GRPC_SCHEDULING_START_BLOCKING_REGION; + } + int r; + do { + GRPC_STATS_INC_SYSCALL_POLL(exec_ctx); + r = epoll_wait(p->epfd, pollset->events, MAX_EPOLL_EVENTS, timeout); + } while (r < 0 && errno == EINTR); + if (timeout != 0) { + GRPC_SCHEDULING_END_BLOCKING_REGION; + } + + if (r < 0) return GRPC_OS_ERROR(errno, "epoll_wait"); + + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, "PS:%p poll %p got %d events", pollset, p, r); + } + + pollset->event_cursor = 0; + pollset->event_count = r; + + return GRPC_ERROR_NONE; +} + +/* Return true if first in list */ +static bool worker_insert(grpc_pollset_worker **root, pollset_worker_links link, + grpc_pollset_worker *worker) { + if (*root == NULL) { + *root = worker; + worker->links[link].next = worker->links[link].prev = worker; + return true; + } else { + worker->links[link].next = *root; + worker->links[link].prev = worker->links[link].next->links[link].prev; + worker->links[link].next->links[link].prev = worker; + worker->links[link].prev->links[link].next = worker; + return false; + } +} + +/* Return true if last in list */ +typedef enum { EMPTIED, NEW_ROOT, REMOVED } worker_remove_result; + +static worker_remove_result worker_remove(grpc_pollset_worker **root, + pollset_worker_links link, + grpc_pollset_worker *worker) { + if (worker == *root) { + if (worker == worker->links[link].next) { + *root = NULL; + return EMPTIED; + } else { + *root = worker->links[link].next; + worker->links[link].prev->links[link].next = worker->links[link].next; + worker->links[link].next->links[link].prev = worker->links[link].prev; + return NEW_ROOT; + } + } else { + worker->links[link].prev->links[link].next = worker->links[link].next; + worker->links[link].next->links[link].prev = worker->links[link].prev; + return REMOVED; + } +} + +/* Return true if this thread should poll */ +static bool begin_worker(grpc_pollset *pollset, grpc_pollset_worker *worker, + grpc_pollset_worker **worker_hdl, gpr_timespec *now, + gpr_timespec deadline) { + bool do_poll = true; + if (worker_hdl != NULL) *worker_hdl = worker; + worker->initialized_cv = false; + worker->kicked = false; + worker->pollset = pollset; + worker->pollable_obj = pollset->current_pollable_obj; + + if (pollset_is_pollable_fd(pollset, worker->pollable_obj)) { + REF_BY((grpc_fd *)worker->pollable_obj, 2, "one_poll"); + } + + worker_insert(&pollset->root_worker, PWL_POLLSET, worker); + if (!worker_insert(&worker->pollable_obj->root_worker, PWL_POLLABLE, + worker)) { + worker->initialized_cv = true; + gpr_cv_init(&worker->cv); + if (worker->pollable_obj != &pollset->pollable_obj) { + gpr_mu_unlock(&pollset->pollable_obj.po.mu); + } + if (GRPC_TRACER_ON(grpc_polling_trace) && + worker->pollable_obj->root_worker != worker) { + gpr_log(GPR_DEBUG, "PS:%p wait %p w=%p for %dms", pollset, + worker->pollable_obj, worker, + poll_deadline_to_millis_timeout(deadline, *now)); + } + while (do_poll && worker->pollable_obj->root_worker != worker) { + if (gpr_cv_wait(&worker->cv, &worker->pollable_obj->po.mu, deadline)) { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, "PS:%p timeout_wait %p w=%p", pollset, + worker->pollable_obj, worker); + } + do_poll = false; + } else if (worker->kicked) { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, "PS:%p wakeup %p w=%p", pollset, + worker->pollable_obj, worker); + } + do_poll = false; + } else if (GRPC_TRACER_ON(grpc_polling_trace) && + worker->pollable_obj->root_worker != worker) { + gpr_log(GPR_DEBUG, "PS:%p spurious_wakeup %p w=%p", pollset, + worker->pollable_obj, worker); + } + } + if (worker->pollable_obj != &pollset->pollable_obj) { + gpr_mu_unlock(&worker->pollable_obj->po.mu); + gpr_mu_lock(&pollset->pollable_obj.po.mu); + gpr_mu_lock(&worker->pollable_obj->po.mu); + } + *now = gpr_now(now->clock_type); + } + + return do_poll && pollset->shutdown_closure == NULL && + pollset->current_pollable_obj == worker->pollable_obj; +} + +static void end_worker(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_pollset_worker *worker, + grpc_pollset_worker **worker_hdl) { + if (NEW_ROOT == + worker_remove(&worker->pollable_obj->root_worker, PWL_POLLABLE, worker)) { + gpr_cv_signal(&worker->pollable_obj->root_worker->cv); + } + if (worker->initialized_cv) { + gpr_cv_destroy(&worker->cv); + } + if (pollset_is_pollable_fd(pollset, worker->pollable_obj)) { + UNREF_BY(exec_ctx, (grpc_fd *)worker->pollable_obj, 2, "one_poll"); + } + if (EMPTIED == worker_remove(&pollset->root_worker, PWL_POLLSET, worker)) { + pollset_maybe_finish_shutdown(exec_ctx, pollset); + } +} + +/* pollset->po.mu lock must be held by the caller before calling this. + The function pollset_work() may temporarily release the lock (pollset->po.mu) + during the course of its execution but it will always re-acquire the lock and + ensure that it is held by the time the function returns */ +static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_pollset_worker **worker_hdl, + gpr_timespec now, gpr_timespec deadline) { + grpc_pollset_worker worker; + if (0 && GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, "PS:%p work hdl=%p worker=%p now=%" PRId64 + ".%09d deadline=%" PRId64 ".%09d kwp=%d root_worker=%p", + pollset, worker_hdl, &worker, now.tv_sec, now.tv_nsec, + deadline.tv_sec, deadline.tv_nsec, pollset->kicked_without_poller, + pollset->root_worker); + } + grpc_error *error = GRPC_ERROR_NONE; + static const char *err_desc = "pollset_work"; + if (pollset->kicked_without_poller) { + pollset->kicked_without_poller = false; + return GRPC_ERROR_NONE; + } + if (pollset->current_pollable_obj != &pollset->pollable_obj) { + gpr_mu_lock(&pollset->current_pollable_obj->po.mu); + } + if (begin_worker(pollset, &worker, worker_hdl, &now, deadline)) { + gpr_tls_set(&g_current_thread_pollset, (intptr_t)pollset); + gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker); + GPR_ASSERT(!pollset->shutdown_closure); + append_error(&error, pollable_materialize(worker.pollable_obj), err_desc); + if (worker.pollable_obj != &pollset->pollable_obj) { + gpr_mu_unlock(&worker.pollable_obj->po.mu); + } + gpr_mu_unlock(&pollset->pollable_obj.po.mu); + if (pollset->event_cursor == pollset->event_count) { + append_error(&error, pollset_epoll(exec_ctx, pollset, worker.pollable_obj, + now, deadline), + err_desc); + } + append_error(&error, pollset_process_events(exec_ctx, pollset, false), + err_desc); + gpr_mu_lock(&pollset->pollable_obj.po.mu); + if (worker.pollable_obj != &pollset->pollable_obj) { + gpr_mu_lock(&worker.pollable_obj->po.mu); + } + gpr_tls_set(&g_current_thread_pollset, 0); + gpr_tls_set(&g_current_thread_worker, 0); + pollset_maybe_finish_shutdown(exec_ctx, pollset); + } + end_worker(exec_ctx, pollset, &worker, worker_hdl); + if (worker.pollable_obj != &pollset->pollable_obj) { + gpr_mu_unlock(&worker.pollable_obj->po.mu); + } + if (grpc_exec_ctx_has_work(exec_ctx)) { + gpr_mu_unlock(&pollset->pollable_obj.po.mu); + grpc_exec_ctx_flush(exec_ctx); + gpr_mu_lock(&pollset->pollable_obj.po.mu); + } + return error; +} + +static void unref_fd_no_longer_poller(grpc_exec_ctx *exec_ctx, void *arg, + grpc_error *error) { + grpc_fd *fd = (grpc_fd *)arg; + UNREF_BY(exec_ctx, fd, 2, "pollset_pollable"); +} + +/* expects pollsets locked, flag whether fd is locked or not */ +static grpc_error *pollset_add_fd_locked(grpc_exec_ctx *exec_ctx, + grpc_pollset *pollset, grpc_fd *fd, + bool fd_locked) { + static const char *err_desc = "pollset_add_fd"; + grpc_error *error = GRPC_ERROR_NONE; + if (pollset->current_pollable_obj == &g_empty_pollable) { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, + "PS:%p add fd %p; transition pollable from empty to fd", pollset, + fd); + } + /* empty pollable --> single fd pollable */ + pollset_kick_all(exec_ctx, pollset); + pollset->current_pollable_obj = &fd->pollable_obj; + if (!fd_locked) gpr_mu_lock(&fd->pollable_obj.po.mu); + append_error(&error, fd_become_pollable_locked(fd), err_desc); + if (!fd_locked) gpr_mu_unlock(&fd->pollable_obj.po.mu); + REF_BY(fd, 2, "pollset_pollable"); + } else if (pollset->current_pollable_obj == &pollset->pollable_obj) { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, "PS:%p add fd %p; already multipolling", pollset, fd); + } + append_error(&error, pollable_add_fd(pollset->current_pollable_obj, fd), + err_desc); + } else if (pollset->current_pollable_obj != &fd->pollable_obj) { + grpc_fd *had_fd = (grpc_fd *)pollset->current_pollable_obj; + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, + "PS:%p add fd %p; transition pollable from fd %p to multipoller", + pollset, fd, had_fd); + } + /* Introduce a spurious completion. + If we do not, then it may be that the fd-specific epoll set consumed + a completion without being polled, leading to a missed edge going up. */ + grpc_lfev_set_ready(exec_ctx, &had_fd->read_closure, "read"); + grpc_lfev_set_ready(exec_ctx, &had_fd->write_closure, "write"); + pollset_kick_all(exec_ctx, pollset); + pollset->current_pollable_obj = &pollset->pollable_obj; + if (append_error(&error, pollable_materialize(&pollset->pollable_obj), + err_desc)) { + pollable_add_fd(&pollset->pollable_obj, had_fd); + pollable_add_fd(&pollset->pollable_obj, fd); + } + GRPC_CLOSURE_SCHED(exec_ctx, + GRPC_CLOSURE_CREATE(unref_fd_no_longer_poller, had_fd, + grpc_schedule_on_exec_ctx), + GRPC_ERROR_NONE); + } + return error; +} + +static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_fd *fd) { + gpr_mu_lock(&pollset->pollable_obj.po.mu); + grpc_error *error = pollset_add_fd_locked(exec_ctx, pollset, fd, false); + gpr_mu_unlock(&pollset->pollable_obj.po.mu); + GRPC_LOG_IF_ERROR("pollset_add_fd", error); +} + +/******************************************************************************* + * Pollset-set Definitions + */ + +static grpc_pollset_set *pollset_set_create(void) { + grpc_pollset_set *pss = (grpc_pollset_set *)gpr_zalloc(sizeof(*pss)); + po_init(&pss->po, PO_POLLSET_SET); + return pss; +} + +static void pollset_set_destroy(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pss) { + po_destroy(&pss->po); + gpr_free(pss); +} + +static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss, + grpc_fd *fd) { + po_join(exec_ctx, &pss->po, &fd->pollable_obj.po); +} + +static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss, + grpc_fd *fd) {} + +static void pollset_set_add_pollset(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pss, grpc_pollset *ps) { + po_join(exec_ctx, &pss->po, &ps->pollable_obj.po); +} + +static void pollset_set_del_pollset(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pss, grpc_pollset *ps) {} + +static void pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *bag, + grpc_pollset_set *item) { + po_join(exec_ctx, &bag->po, &item->po); +} + +static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *bag, + grpc_pollset_set *item) {} + +static void po_init(polling_obj *po, polling_obj_type type) { + gpr_mu_init(&po->mu); + po->type = type; + po->group = NULL; + po->next = po; + po->prev = po; +} + +static polling_group *pg_lock_latest(polling_group *pg) { + /* assumes pg unlocked; consumes ref, returns ref */ + gpr_mu_lock(&pg->po.mu); + while (pg->po.group != NULL) { + polling_group *new_pg = pg_ref(pg->po.group); + gpr_mu_unlock(&pg->po.mu); + pg_unref(pg); + pg = new_pg; + gpr_mu_lock(&pg->po.mu); + } + return pg; +} + +static void po_destroy(polling_obj *po) { + if (po->group != NULL) { + polling_group *pg = pg_lock_latest(po->group); + po->prev->next = po->next; + po->next->prev = po->prev; + gpr_mu_unlock(&pg->po.mu); + pg_unref(pg); + } + gpr_mu_destroy(&po->mu); +} + +static polling_group *pg_ref(polling_group *pg) { + gpr_ref(&pg->refs); + return pg; +} + +static void pg_unref(polling_group *pg) { + if (gpr_unref(&pg->refs)) { + po_destroy(&pg->po); + gpr_free(pg); + } +} + +static int po_cmp(polling_obj *a, polling_obj *b) { + if (a == b) return 0; + if (a->type < b->type) return -1; + if (a->type > b->type) return 1; + if (a < b) return -1; + assert(a > b); + return 1; +} + +static void po_join(grpc_exec_ctx *exec_ctx, polling_obj *a, polling_obj *b) { + switch (po_cmp(a, b)) { + case 0: + return; + case 1: + GPR_SWAP(polling_obj *, a, b); + /* fall through */ + case -1: + gpr_mu_lock(&a->mu); + gpr_mu_lock(&b->mu); + + if (a->group == NULL) { + if (b->group == NULL) { + polling_obj *initial_po[] = {a, b}; + pg_create(exec_ctx, initial_po, GPR_ARRAY_SIZE(initial_po)); + gpr_mu_unlock(&a->mu); + gpr_mu_unlock(&b->mu); + } else { + polling_group *b_group = pg_ref(b->group); + gpr_mu_unlock(&b->mu); + gpr_mu_unlock(&a->mu); + pg_join(exec_ctx, b_group, a); + } + } else if (b->group == NULL) { + polling_group *a_group = pg_ref(a->group); + gpr_mu_unlock(&a->mu); + gpr_mu_unlock(&b->mu); + pg_join(exec_ctx, a_group, b); + } else if (a->group == b->group) { + /* nothing to do */ + gpr_mu_unlock(&a->mu); + gpr_mu_unlock(&b->mu); + } else { + polling_group *a_group = pg_ref(a->group); + polling_group *b_group = pg_ref(b->group); + gpr_mu_unlock(&a->mu); + gpr_mu_unlock(&b->mu); + pg_merge(exec_ctx, a_group, b_group); + } + } +} + +static void pg_notify(grpc_exec_ctx *exec_ctx, polling_obj *a, polling_obj *b) { + if (a->type == PO_FD && b->type == PO_POLLSET) { + pollset_add_fd_locked(exec_ctx, (grpc_pollset *)b, (grpc_fd *)a, true); + } else if (a->type == PO_POLLSET && b->type == PO_FD) { + pollset_add_fd_locked(exec_ctx, (grpc_pollset *)a, (grpc_fd *)b, true); + } +} + +static void pg_broadcast(grpc_exec_ctx *exec_ctx, polling_group *from, + polling_group *to) { + for (polling_obj *a = from->po.next; a != &from->po; a = a->next) { + for (polling_obj *b = to->po.next; b != &to->po; b = b->next) { + if (po_cmp(a, b) < 0) { + gpr_mu_lock(&a->mu); + gpr_mu_lock(&b->mu); + } else { + GPR_ASSERT(po_cmp(a, b) != 0); + gpr_mu_lock(&b->mu); + gpr_mu_lock(&a->mu); + } + pg_notify(exec_ctx, a, b); + gpr_mu_unlock(&a->mu); + gpr_mu_unlock(&b->mu); + } + } +} + +static void pg_create(grpc_exec_ctx *exec_ctx, polling_obj **initial_po, + size_t initial_po_count) { + /* assumes all polling objects in initial_po are locked */ + polling_group *pg = (polling_group *)gpr_malloc(sizeof(*pg)); + po_init(&pg->po, PO_POLLING_GROUP); + gpr_ref_init(&pg->refs, (int)initial_po_count); + for (size_t i = 0; i < initial_po_count; i++) { + GPR_ASSERT(initial_po[i]->group == NULL); + initial_po[i]->group = pg; + } + for (size_t i = 1; i < initial_po_count; i++) { + initial_po[i]->prev = initial_po[i - 1]; + } + for (size_t i = 0; i < initial_po_count - 1; i++) { + initial_po[i]->next = initial_po[i + 1]; + } + initial_po[0]->prev = &pg->po; + initial_po[initial_po_count - 1]->next = &pg->po; + pg->po.next = initial_po[0]; + pg->po.prev = initial_po[initial_po_count - 1]; + for (size_t i = 1; i < initial_po_count; i++) { + for (size_t j = 0; j < i; j++) { + pg_notify(exec_ctx, initial_po[i], initial_po[j]); + } + } +} + +static void pg_join(grpc_exec_ctx *exec_ctx, polling_group *pg, + polling_obj *po) { + /* assumes neither pg nor po are locked; consumes one ref to pg */ + pg = pg_lock_latest(pg); + /* pg locked */ + for (polling_obj *existing = pg->po.next /* skip pg - it's just a stub */; + existing != &pg->po; existing = existing->next) { + if (po_cmp(po, existing) < 0) { + gpr_mu_lock(&po->mu); + gpr_mu_lock(&existing->mu); + } else { + GPR_ASSERT(po_cmp(po, existing) != 0); + gpr_mu_lock(&existing->mu); + gpr_mu_lock(&po->mu); + } + /* pg, po, existing locked */ + if (po->group != NULL) { + gpr_mu_unlock(&pg->po.mu); + polling_group *po_group = pg_ref(po->group); + gpr_mu_unlock(&po->mu); + gpr_mu_unlock(&existing->mu); + pg_merge(exec_ctx, pg, po_group); + /* early exit: polling obj picked up a group during joining: we needed + to do a full merge */ + return; + } + pg_notify(exec_ctx, po, existing); + gpr_mu_unlock(&po->mu); + gpr_mu_unlock(&existing->mu); + } + gpr_mu_lock(&po->mu); + if (po->group != NULL) { + gpr_mu_unlock(&pg->po.mu); + polling_group *po_group = pg_ref(po->group); + gpr_mu_unlock(&po->mu); + pg_merge(exec_ctx, pg, po_group); + /* early exit: polling obj picked up a group during joining: we needed + to do a full merge */ + return; + } + po->group = pg; + po->next = &pg->po; + po->prev = pg->po.prev; + po->prev->next = po->next->prev = po; + gpr_mu_unlock(&pg->po.mu); + gpr_mu_unlock(&po->mu); +} + +static void pg_merge(grpc_exec_ctx *exec_ctx, polling_group *a, + polling_group *b) { + for (;;) { + if (a == b) { + pg_unref(a); + pg_unref(b); + return; + } + if (a > b) GPR_SWAP(polling_group *, a, b); + gpr_mu_lock(&a->po.mu); + gpr_mu_lock(&b->po.mu); + if (a->po.group != NULL) { + polling_group *m2 = pg_ref(a->po.group); + gpr_mu_unlock(&a->po.mu); + gpr_mu_unlock(&b->po.mu); + pg_unref(a); + a = m2; + } else if (b->po.group != NULL) { + polling_group *m2 = pg_ref(b->po.group); + gpr_mu_unlock(&a->po.mu); + gpr_mu_unlock(&b->po.mu); + pg_unref(b); + b = m2; + } else { + break; + } + } + polling_group **unref = NULL; + size_t unref_count = 0; + size_t unref_cap = 0; + b->po.group = a; + pg_broadcast(exec_ctx, a, b); + pg_broadcast(exec_ctx, b, a); + while (b->po.next != &b->po) { + polling_obj *po = b->po.next; + gpr_mu_lock(&po->mu); + if (unref_count == unref_cap) { + unref_cap = GPR_MAX(8, 3 * unref_cap / 2); + unref = (polling_group **)gpr_realloc(unref, unref_cap * sizeof(*unref)); + } + unref[unref_count++] = po->group; + po->group = pg_ref(a); + // unlink from b + po->prev->next = po->next; + po->next->prev = po->prev; + // link to a + po->next = &a->po; + po->prev = a->po.prev; + po->next->prev = po->prev->next = po; + gpr_mu_unlock(&po->mu); + } + gpr_mu_unlock(&a->po.mu); + gpr_mu_unlock(&b->po.mu); + for (size_t i = 0; i < unref_count; i++) { + pg_unref(unref[i]); + } + gpr_free(unref); + pg_unref(b); +} + +/******************************************************************************* + * Event engine binding + */ + +static void shutdown_engine(void) { + fd_global_shutdown(); + pollset_global_shutdown(); +} + +static const grpc_event_engine_vtable vtable = { + sizeof(grpc_pollset), + + fd_create, + fd_wrapped_fd, + fd_orphan, + fd_shutdown, + fd_notify_on_read, + fd_notify_on_write, + fd_is_shutdown, + fd_get_read_notifier_pollset, + + pollset_init, + pollset_shutdown, + pollset_destroy, + pollset_work, + pollset_kick, + pollset_add_fd, + + pollset_set_create, + pollset_set_destroy, + pollset_set_add_pollset, + pollset_set_del_pollset, + pollset_set_add_pollset_set, + pollset_set_del_pollset_set, + pollset_set_add_fd, + pollset_set_del_fd, + + shutdown_engine, +}; + +const grpc_event_engine_vtable *grpc_init_epollex_linux( + bool explicitly_requested) { + if (!grpc_has_wakeup_fd()) { + return NULL; + } + + if (!grpc_is_epollexclusive_available()) { + return NULL; + } + + fd_global_init(); + + if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) { + pollset_global_shutdown(); + fd_global_shutdown(); + return NULL; + } + + return &vtable; +} + +#else /* defined(GRPC_LINUX_EPOLL) */ +#if defined(GRPC_POSIX_SOCKET) +#include "src/core/lib/iomgr/ev_posix.h" +/* If GRPC_LINUX_EPOLL is not defined, it means epoll is not available. Return + * NULL */ +const grpc_event_engine_vtable *grpc_init_epollex_linux( + bool explicitly_requested) { + return NULL; +} +#endif /* defined(GRPC_POSIX_SOCKET) */ + +#endif /* !defined(GRPC_LINUX_EPOLL) */ diff --git a/src/core/lib/iomgr/ev_epollsig_linux.c b/src/core/lib/iomgr/ev_epollsig_linux.c deleted file mode 100644 index 4d8bdf1401..0000000000 --- a/src/core/lib/iomgr/ev_epollsig_linux.c +++ /dev/null @@ -1,1769 +0,0 @@ -/* - * - * Copyright 2016 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -/* This polling engine is only relevant on linux kernels supporting epoll() */ -#ifdef GRPC_LINUX_EPOLL - -#include "src/core/lib/iomgr/ev_epollsig_linux.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "src/core/lib/debug/stats.h" -#include "src/core/lib/iomgr/ev_posix.h" -#include "src/core/lib/iomgr/iomgr_internal.h" -#include "src/core/lib/iomgr/lockfree_event.h" -#include "src/core/lib/iomgr/timer.h" -#include "src/core/lib/iomgr/wakeup_fd_posix.h" -#include "src/core/lib/profiling/timers.h" -#include "src/core/lib/support/block_annotate.h" - -#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker *)1) - -#define GRPC_POLLING_TRACE(...) \ - if (GRPC_TRACER_ON(grpc_polling_trace)) { \ - gpr_log(GPR_INFO, __VA_ARGS__); \ - } - -static int grpc_wakeup_signal = -1; -static bool is_grpc_wakeup_signal_initialized = false; - -/* Implements the function defined in grpc_posix.h. This function might be - * called before even calling grpc_init() to set either a different signal to - * use. If signum == -1, then the use of signals is disabled */ -void grpc_use_signal(int signum) { - grpc_wakeup_signal = signum; - is_grpc_wakeup_signal_initialized = true; - - if (grpc_wakeup_signal < 0) { - gpr_log(GPR_INFO, - "Use of signals is disabled. Epoll engine will not be used"); - } else { - gpr_log(GPR_INFO, "epoll engine will be using signal: %d", - grpc_wakeup_signal); - } -} - -struct polling_island; - -typedef enum { - POLL_OBJ_FD, - POLL_OBJ_POLLSET, - POLL_OBJ_POLLSET_SET -} poll_obj_type; - -typedef struct poll_obj { -#ifndef NDEBUG - poll_obj_type obj_type; -#endif - gpr_mu mu; - struct polling_island *pi; -} poll_obj; - -const char *poll_obj_string(poll_obj_type po_type) { - switch (po_type) { - case POLL_OBJ_FD: - return "fd"; - case POLL_OBJ_POLLSET: - return "pollset"; - case POLL_OBJ_POLLSET_SET: - return "pollset_set"; - } - - GPR_UNREACHABLE_CODE(return "UNKNOWN"); -} - -/******************************************************************************* - * Fd Declarations - */ - -#define FD_FROM_PO(po) ((grpc_fd *)(po)) - -struct grpc_fd { - poll_obj po; - - int fd; - /* refst format: - bit 0 : 1=Active / 0=Orphaned - bits 1-n : refcount - Ref/Unref by two to avoid altering the orphaned bit */ - gpr_atm refst; - - /* The fd is either closed or we relinquished control of it. In either - cases, this indicates that the 'fd' on this structure is no longer - valid */ - bool orphaned; - - gpr_atm read_closure; - gpr_atm write_closure; - - struct grpc_fd *freelist_next; - grpc_closure *on_done_closure; - - /* The pollset that last noticed that the fd is readable. The actual type - * stored in this is (grpc_pollset *) */ - gpr_atm read_notifier_pollset; - - grpc_iomgr_object iomgr_object; -}; - -/* Reference counting for fds */ -#ifndef NDEBUG -static void fd_ref(grpc_fd *fd, const char *reason, const char *file, int line); -static void fd_unref(grpc_fd *fd, const char *reason, const char *file, - int line); -#define GRPC_FD_REF(fd, reason) fd_ref(fd, reason, __FILE__, __LINE__) -#define GRPC_FD_UNREF(fd, reason) fd_unref(fd, reason, __FILE__, __LINE__) -#else -static void fd_ref(grpc_fd *fd); -static void fd_unref(grpc_fd *fd); -#define GRPC_FD_REF(fd, reason) fd_ref(fd) -#define GRPC_FD_UNREF(fd, reason) fd_unref(fd) -#endif - -static void fd_global_init(void); -static void fd_global_shutdown(void); - -/******************************************************************************* - * Polling island Declarations - */ - -#ifndef NDEBUG - -#define PI_ADD_REF(p, r) pi_add_ref_dbg((p), (r), __FILE__, __LINE__) -#define PI_UNREF(exec_ctx, p, r) \ - pi_unref_dbg((exec_ctx), (p), (r), __FILE__, __LINE__) - -#else - -#define PI_ADD_REF(p, r) pi_add_ref((p)) -#define PI_UNREF(exec_ctx, p, r) pi_unref((exec_ctx), (p)) - -#endif - -/* This is also used as grpc_workqueue (by directly casing it) */ -typedef struct polling_island { - gpr_mu mu; - /* Ref count. Use PI_ADD_REF() and PI_UNREF() macros to increment/decrement - the refcount. - Once the ref count becomes zero, this structure is destroyed which means - we should ensure that there is never a scenario where a PI_ADD_REF() is - racing with a PI_UNREF() that just made the ref_count zero. */ - gpr_atm ref_count; - - /* Pointer to the polling_island this merged into. - * merged_to value is only set once in polling_island's lifetime (and that too - * only if the island is merged with another island). Because of this, we can - * use gpr_atm type here so that we can do atomic access on this and reduce - * lock contention on 'mu' mutex. - * - * Note that if this field is not NULL (i.e not 0), all the remaining fields - * (except mu and ref_count) are invalid and must be ignored. */ - gpr_atm merged_to; - - /* Number of threads currently polling on this island */ - gpr_atm poller_count; - - /* The fd of the underlying epoll set */ - int epoll_fd; - - /* The file descriptors in the epoll set */ - size_t fd_cnt; - size_t fd_capacity; - grpc_fd **fds; -} polling_island; - -/******************************************************************************* - * Pollset Declarations - */ -struct grpc_pollset_worker { - /* Thread id of this worker */ - pthread_t pt_id; - - /* Used to prevent a worker from getting kicked multiple times */ - gpr_atm is_kicked; - struct grpc_pollset_worker *next; - struct grpc_pollset_worker *prev; -}; - -struct grpc_pollset { - poll_obj po; - - grpc_pollset_worker root_worker; - bool kicked_without_pollers; - - bool shutting_down; /* Is the pollset shutting down ? */ - bool finish_shutdown_called; /* Is the 'finish_shutdown_locked()' called ? */ - grpc_closure *shutdown_done; /* Called after after shutdown is complete */ -}; - -/******************************************************************************* - * Pollset-set Declarations - */ -struct grpc_pollset_set { - poll_obj po; -}; - -/******************************************************************************* - * Common helpers - */ - -static bool append_error(grpc_error **composite, grpc_error *error, - const char *desc) { - if (error == GRPC_ERROR_NONE) return true; - if (*composite == GRPC_ERROR_NONE) { - *composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc); - } - *composite = grpc_error_add_child(*composite, error); - return false; -} - -/******************************************************************************* - * Polling island Definitions - */ - -/* The wakeup fd that is used to wake up all threads in a Polling island. This - is useful in the polling island merge operation where we need to wakeup all - the threads currently polling the smaller polling island (so that they can - start polling the new/merged polling island) - - NOTE: This fd is initialized to be readable and MUST NOT be consumed i.e the - threads that woke up MUST NOT call grpc_wakeup_fd_consume_wakeup() */ -static grpc_wakeup_fd polling_island_wakeup_fd; - -/* The polling island being polled right now. - See comments in workqueue_maybe_wakeup for why this is tracked. */ -static __thread polling_island *g_current_thread_polling_island; - -/* Forward declaration */ -static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi); - -#ifdef GRPC_TSAN -/* Currently TSAN may incorrectly flag data races between epoll_ctl and - epoll_wait for any grpc_fd structs that are added to the epoll set via - epoll_ctl and are returned (within a very short window) via epoll_wait(). - - To work-around this race, we establish a happens-before relation between - the code just-before epoll_ctl() and the code after epoll_wait() by using - this atomic */ -gpr_atm g_epoll_sync; -#endif /* defined(GRPC_TSAN) */ - -static void pi_add_ref(polling_island *pi); -static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi); - -#ifndef NDEBUG -static void pi_add_ref_dbg(polling_island *pi, const char *reason, - const char *file, int line) { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_atm old_cnt = gpr_atm_acq_load(&pi->ref_count); - gpr_log(GPR_DEBUG, "Add ref pi: %p, old:%" PRIdPTR " -> new:%" PRIdPTR - " (%s) - (%s, %d)", - pi, old_cnt, old_cnt + 1, reason, file, line); - } - pi_add_ref(pi); -} - -static void pi_unref_dbg(grpc_exec_ctx *exec_ctx, polling_island *pi, - const char *reason, const char *file, int line) { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_atm old_cnt = gpr_atm_acq_load(&pi->ref_count); - gpr_log(GPR_DEBUG, "Unref pi: %p, old:%" PRIdPTR " -> new:%" PRIdPTR - " (%s) - (%s, %d)", - pi, old_cnt, (old_cnt - 1), reason, file, line); - } - pi_unref(exec_ctx, pi); -} -#endif - -static void pi_add_ref(polling_island *pi) { - gpr_atm_no_barrier_fetch_add(&pi->ref_count, 1); -} - -static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi) { - /* If ref count went to zero, delete the polling island. - Note that this deletion not be done under a lock. Once the ref count goes - to zero, we are guaranteed that no one else holds a reference to the - polling island (and that there is no racing pi_add_ref() call either). - - Also, if we are deleting the polling island and the merged_to field is - non-empty, we should remove a ref to the merged_to polling island - */ - if (1 == gpr_atm_full_fetch_add(&pi->ref_count, -1)) { - polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to); - polling_island_delete(exec_ctx, pi); - if (next != NULL) { - PI_UNREF(exec_ctx, next, "pi_delete"); /* Recursive call */ - } - } -} - -/* The caller is expected to hold pi->mu lock before calling this function */ -static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds, - size_t fd_count, bool add_fd_refs, - grpc_error **error) { - int err; - size_t i; - struct epoll_event ev; - char *err_msg; - const char *err_desc = "polling_island_add_fds"; - -#ifdef GRPC_TSAN - /* See the definition of g_epoll_sync for more context */ - gpr_atm_rel_store(&g_epoll_sync, (gpr_atm)0); -#endif /* defined(GRPC_TSAN) */ - - for (i = 0; i < fd_count; i++) { - ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET); - ev.data.ptr = fds[i]; - err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, fds[i]->fd, &ev); - - if (err < 0) { - if (errno != EEXIST) { - gpr_asprintf( - &err_msg, - "epoll_ctl (epoll_fd: %d) add fd: %d failed with error: %d (%s)", - pi->epoll_fd, fds[i]->fd, errno, strerror(errno)); - append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc); - gpr_free(err_msg); - } - - continue; - } - - if (pi->fd_cnt == pi->fd_capacity) { - pi->fd_capacity = GPR_MAX(pi->fd_capacity + 8, pi->fd_cnt * 3 / 2); - pi->fds = - (grpc_fd **)gpr_realloc(pi->fds, sizeof(grpc_fd *) * pi->fd_capacity); - } - - pi->fds[pi->fd_cnt++] = fds[i]; - if (add_fd_refs) { - GRPC_FD_REF(fds[i], "polling_island"); - } - } -} - -/* The caller is expected to hold pi->mu before calling this */ -static void polling_island_add_wakeup_fd_locked(polling_island *pi, - grpc_wakeup_fd *wakeup_fd, - grpc_error **error) { - struct epoll_event ev; - int err; - char *err_msg; - const char *err_desc = "polling_island_add_wakeup_fd"; - - ev.events = (uint32_t)(EPOLLIN | EPOLLET); - ev.data.ptr = wakeup_fd; - err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, - GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), &ev); - if (err < 0 && errno != EEXIST) { - gpr_asprintf(&err_msg, - "epoll_ctl (epoll_fd: %d) add wakeup fd: %d failed with " - "error: %d (%s)", - pi->epoll_fd, GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), errno, - strerror(errno)); - append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc); - gpr_free(err_msg); - } -} - -/* The caller is expected to hold pi->mu lock before calling this function */ -static void polling_island_remove_all_fds_locked(polling_island *pi, - bool remove_fd_refs, - grpc_error **error) { - int err; - size_t i; - char *err_msg; - const char *err_desc = "polling_island_remove_fds"; - - for (i = 0; i < pi->fd_cnt; i++) { - err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, pi->fds[i]->fd, NULL); - if (err < 0 && errno != ENOENT) { - gpr_asprintf(&err_msg, - "epoll_ctl (epoll_fd: %d) delete fds[%zu]: %d failed with " - "error: %d (%s)", - pi->epoll_fd, i, pi->fds[i]->fd, errno, strerror(errno)); - append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc); - gpr_free(err_msg); - } - - if (remove_fd_refs) { - GRPC_FD_UNREF(pi->fds[i], "polling_island"); - } - } - - pi->fd_cnt = 0; -} - -/* The caller is expected to hold pi->mu lock before calling this function */ -static void polling_island_remove_fd_locked(polling_island *pi, grpc_fd *fd, - bool is_fd_closed, - grpc_error **error) { - int err; - size_t i; - char *err_msg; - const char *err_desc = "polling_island_remove_fd"; - - /* If fd is already closed, then it would have been automatically been removed - from the epoll set */ - if (!is_fd_closed) { - err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, fd->fd, NULL); - if (err < 0 && errno != ENOENT) { - gpr_asprintf( - &err_msg, - "epoll_ctl (epoll_fd: %d) del fd: %d failed with error: %d (%s)", - pi->epoll_fd, fd->fd, errno, strerror(errno)); - append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc); - gpr_free(err_msg); - } - } - - for (i = 0; i < pi->fd_cnt; i++) { - if (pi->fds[i] == fd) { - pi->fds[i] = pi->fds[--pi->fd_cnt]; - GRPC_FD_UNREF(fd, "polling_island"); - break; - } - } -} - -/* Might return NULL in case of an error */ -static polling_island *polling_island_create(grpc_exec_ctx *exec_ctx, - grpc_fd *initial_fd, - grpc_error **error) { - polling_island *pi = NULL; - const char *err_desc = "polling_island_create"; - - *error = GRPC_ERROR_NONE; - - pi = (polling_island *)gpr_malloc(sizeof(*pi)); - gpr_mu_init(&pi->mu); - pi->fd_cnt = 0; - pi->fd_capacity = 0; - pi->fds = NULL; - pi->epoll_fd = -1; - - gpr_atm_rel_store(&pi->ref_count, 0); - gpr_atm_rel_store(&pi->poller_count, 0); - gpr_atm_rel_store(&pi->merged_to, (gpr_atm)NULL); - - pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC); - - if (pi->epoll_fd < 0) { - append_error(error, GRPC_OS_ERROR(errno, "epoll_create1"), err_desc); - goto done; - } - - if (initial_fd != NULL) { - polling_island_add_fds_locked(pi, &initial_fd, 1, true, error); - } - -done: - if (*error != GRPC_ERROR_NONE) { - polling_island_delete(exec_ctx, pi); - pi = NULL; - } - return pi; -} - -static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi) { - GPR_ASSERT(pi->fd_cnt == 0); - - if (pi->epoll_fd >= 0) { - close(pi->epoll_fd); - } - gpr_mu_destroy(&pi->mu); - gpr_free(pi->fds); - gpr_free(pi); -} - -/* Attempts to gets the last polling island in the linked list (liked by the - * 'merged_to' field). Since this does not lock the polling island, there are no - * guarantees that the island returned is the last island */ -static polling_island *polling_island_maybe_get_latest(polling_island *pi) { - polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to); - while (next != NULL) { - pi = next; - next = (polling_island *)gpr_atm_acq_load(&pi->merged_to); - } - - return pi; -} - -/* Gets the lock on the *latest* polling island i.e the last polling island in - the linked list (linked by the 'merged_to' field). Call gpr_mu_unlock on the - returned polling island's mu. - Usage: To lock/unlock polling island "pi", do the following: - polling_island *pi_latest = polling_island_lock(pi); - ... - ... critical section .. - ... - gpr_mu_unlock(&pi_latest->mu); // NOTE: use pi_latest->mu. NOT pi->mu */ -static polling_island *polling_island_lock(polling_island *pi) { - polling_island *next = NULL; - - while (true) { - next = (polling_island *)gpr_atm_acq_load(&pi->merged_to); - if (next == NULL) { - /* Looks like 'pi' is the last node in the linked list but unless we check - this by holding the pi->mu lock, we cannot be sure (i.e without the - pi->mu lock, we don't prevent island merges). - To be absolutely sure, check once more by holding the pi->mu lock */ - gpr_mu_lock(&pi->mu); - next = (polling_island *)gpr_atm_acq_load(&pi->merged_to); - if (next == NULL) { - /* pi is infact the last node and we have the pi->mu lock. we're done */ - break; - } - - /* pi->merged_to is not NULL i.e pi isn't the last node anymore. pi->mu - * isn't the lock we are interested in. Continue traversing the list */ - gpr_mu_unlock(&pi->mu); - } - - pi = next; - } - - return pi; -} - -/* Gets the lock on the *latest* polling islands in the linked lists pointed by - *p and *q (and also updates *p and *q to point to the latest polling islands) - - This function is needed because calling the following block of code to obtain - locks on polling islands (*p and *q) is prone to deadlocks. - { - polling_island_lock(*p, true); - polling_island_lock(*q, true); - } - - Usage/example: - polling_island *p1; - polling_island *p2; - .. - polling_island_lock_pair(&p1, &p2); - .. - .. Critical section with both p1 and p2 locked - .. - // Release locks: Always call polling_island_unlock_pair() to release locks - polling_island_unlock_pair(p1, p2); -*/ -static void polling_island_lock_pair(polling_island **p, polling_island **q) { - polling_island *pi_1 = *p; - polling_island *pi_2 = *q; - polling_island *next_1 = NULL; - polling_island *next_2 = NULL; - - /* The algorithm is simple: - - Go to the last polling islands in the linked lists *pi_1 and *pi_2 (and - keep updating pi_1 and pi_2) - - Then obtain locks on the islands by following a lock order rule of - locking polling_island with lower address first - Special case: Before obtaining the locks, check if pi_1 and pi_2 are - pointing to the same island. If that is the case, we can just call - polling_island_lock() - - After obtaining both the locks, double check that the polling islands - are still the last polling islands in their respective linked lists - (this is because there might have been polling island merges before - we got the lock) - - If the polling islands are the last islands, we are done. If not, - release the locks and continue the process from the first step */ - while (true) { - next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to); - while (next_1 != NULL) { - pi_1 = next_1; - next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to); - } - - next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to); - while (next_2 != NULL) { - pi_2 = next_2; - next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to); - } - - if (pi_1 == pi_2) { - pi_1 = pi_2 = polling_island_lock(pi_1); - break; - } - - if (pi_1 < pi_2) { - gpr_mu_lock(&pi_1->mu); - gpr_mu_lock(&pi_2->mu); - } else { - gpr_mu_lock(&pi_2->mu); - gpr_mu_lock(&pi_1->mu); - } - - next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to); - next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to); - if (next_1 == NULL && next_2 == NULL) { - break; - } - - gpr_mu_unlock(&pi_1->mu); - gpr_mu_unlock(&pi_2->mu); - } - - *p = pi_1; - *q = pi_2; -} - -static void polling_island_unlock_pair(polling_island *p, polling_island *q) { - if (p == q) { - gpr_mu_unlock(&p->mu); - } else { - gpr_mu_unlock(&p->mu); - gpr_mu_unlock(&q->mu); - } -} - -static polling_island *polling_island_merge(polling_island *p, - polling_island *q, - grpc_error **error) { - /* Get locks on both the polling islands */ - polling_island_lock_pair(&p, &q); - - if (p != q) { - /* Make sure that p points to the polling island with fewer fds than q */ - if (p->fd_cnt > q->fd_cnt) { - GPR_SWAP(polling_island *, p, q); - } - - /* Merge p with q i.e move all the fds from p (The one with fewer fds) to q - Note that the refcounts on the fds being moved will not change here. - This is why the last param in the following two functions is 'false') */ - polling_island_add_fds_locked(q, p->fds, p->fd_cnt, false, error); - polling_island_remove_all_fds_locked(p, false, error); - - /* Wakeup all the pollers (if any) on p so that they pickup this change */ - polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd, error); - - /* Add the 'merged_to' link from p --> q */ - gpr_atm_rel_store(&p->merged_to, (gpr_atm)q); - PI_ADD_REF(q, "pi_merge"); /* To account for the new incoming ref from p */ - } - /* else if p == q, nothing needs to be done */ - - polling_island_unlock_pair(p, q); - - /* Return the merged polling island (Note that no merge would have happened - if p == q which is ok) */ - return q; -} - -static grpc_error *polling_island_global_init() { - grpc_error *error = GRPC_ERROR_NONE; - - error = grpc_wakeup_fd_init(&polling_island_wakeup_fd); - if (error == GRPC_ERROR_NONE) { - error = grpc_wakeup_fd_wakeup(&polling_island_wakeup_fd); - } - - return error; -} - -static void polling_island_global_shutdown() { - grpc_wakeup_fd_destroy(&polling_island_wakeup_fd); -} - -/******************************************************************************* - * Fd Definitions - */ - -/* We need to keep a freelist not because of any concerns of malloc performance - * but instead so that implementations with multiple threads in (for example) - * epoll_wait deal with the race between pollset removal and incoming poll - * notifications. - * - * The problem is that the poller ultimately holds a reference to this - * object, so it is very difficult to know when is safe to free it, at least - * without some expensive synchronization. - * - * If we keep the object freelisted, in the worst case losing this race just - * becomes a spurious read notification on a reused fd. - */ - -/* The alarm system needs to be able to wakeup 'some poller' sometimes - * (specifically when a new alarm needs to be triggered earlier than the next - * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a - * case occurs. */ - -static grpc_fd *fd_freelist = NULL; -static gpr_mu fd_freelist_mu; - -#ifndef NDEBUG -#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__) -#define UNREF_BY(fd, n, reason) unref_by(fd, n, reason, __FILE__, __LINE__) -static void ref_by(grpc_fd *fd, int n, const char *reason, const char *file, - int line) { - if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) { - gpr_log(GPR_DEBUG, - "FD %d %p ref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]", - fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst), - gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line); - } -#else -#define REF_BY(fd, n, reason) ref_by(fd, n) -#define UNREF_BY(fd, n, reason) unref_by(fd, n) -static void ref_by(grpc_fd *fd, int n) { -#endif - GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0); -} - -#ifndef NDEBUG -static void unref_by(grpc_fd *fd, int n, const char *reason, const char *file, - int line) { - if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) { - gpr_log(GPR_DEBUG, - "FD %d %p unref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]", - fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst), - gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line); - } -#else -static void unref_by(grpc_fd *fd, int n) { -#endif - gpr_atm old = gpr_atm_full_fetch_add(&fd->refst, -n); - if (old == n) { - /* Add the fd to the freelist */ - gpr_mu_lock(&fd_freelist_mu); - fd->freelist_next = fd_freelist; - fd_freelist = fd; - grpc_iomgr_unregister_object(&fd->iomgr_object); - - grpc_lfev_destroy(&fd->read_closure); - grpc_lfev_destroy(&fd->write_closure); - - gpr_mu_unlock(&fd_freelist_mu); - } else { - GPR_ASSERT(old > n); - } -} - -/* Increment refcount by two to avoid changing the orphan bit */ -#ifndef NDEBUG -static void fd_ref(grpc_fd *fd, const char *reason, const char *file, - int line) { - ref_by(fd, 2, reason, file, line); -} - -static void fd_unref(grpc_fd *fd, const char *reason, const char *file, - int line) { - unref_by(fd, 2, reason, file, line); -} -#else -static void fd_ref(grpc_fd *fd) { ref_by(fd, 2); } -static void fd_unref(grpc_fd *fd) { unref_by(fd, 2); } -#endif - -static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); } - -static void fd_global_shutdown(void) { - gpr_mu_lock(&fd_freelist_mu); - gpr_mu_unlock(&fd_freelist_mu); - while (fd_freelist != NULL) { - grpc_fd *fd = fd_freelist; - fd_freelist = fd_freelist->freelist_next; - gpr_mu_destroy(&fd->po.mu); - gpr_free(fd); - } - gpr_mu_destroy(&fd_freelist_mu); -} - -static grpc_fd *fd_create(int fd, const char *name) { - grpc_fd *new_fd = NULL; - - gpr_mu_lock(&fd_freelist_mu); - if (fd_freelist != NULL) { - new_fd = fd_freelist; - fd_freelist = fd_freelist->freelist_next; - } - gpr_mu_unlock(&fd_freelist_mu); - - if (new_fd == NULL) { - new_fd = (grpc_fd *)gpr_malloc(sizeof(grpc_fd)); - gpr_mu_init(&new_fd->po.mu); - } - - /* Note: It is not really needed to get the new_fd->po.mu lock here. If this - * is a newly created fd (or an fd we got from the freelist), no one else - * would be holding a lock to it anyway. */ - gpr_mu_lock(&new_fd->po.mu); - new_fd->po.pi = NULL; -#ifndef NDEBUG - new_fd->po.obj_type = POLL_OBJ_FD; -#endif - - gpr_atm_rel_store(&new_fd->refst, (gpr_atm)1); - new_fd->fd = fd; - new_fd->orphaned = false; - grpc_lfev_init(&new_fd->read_closure); - grpc_lfev_init(&new_fd->write_closure); - gpr_atm_no_barrier_store(&new_fd->read_notifier_pollset, (gpr_atm)NULL); - - new_fd->freelist_next = NULL; - new_fd->on_done_closure = NULL; - - gpr_mu_unlock(&new_fd->po.mu); - - char *fd_name; - gpr_asprintf(&fd_name, "%s fd=%d", name, fd); - grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name); - gpr_free(fd_name); - return new_fd; -} - -static int fd_wrapped_fd(grpc_fd *fd) { - int ret_fd = -1; - gpr_mu_lock(&fd->po.mu); - if (!fd->orphaned) { - ret_fd = fd->fd; - } - gpr_mu_unlock(&fd->po.mu); - - return ret_fd; -} - -static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure *on_done, int *release_fd, - bool already_closed, const char *reason) { - grpc_error *error = GRPC_ERROR_NONE; - polling_island *unref_pi = NULL; - - gpr_mu_lock(&fd->po.mu); - fd->on_done_closure = on_done; - - /* Remove the active status but keep referenced. We want this grpc_fd struct - to be alive (and not added to freelist) until the end of this function */ - REF_BY(fd, 1, reason); - - /* Remove the fd from the polling island: - - Get a lock on the latest polling island (i.e the last island in the - linked list pointed by fd->po.pi). This is the island that - would actually contain the fd - - Remove the fd from the latest polling island - - Unlock the latest polling island - - Set fd->po.pi to NULL (but remove the ref on the polling island - before doing this.) */ - if (fd->po.pi != NULL) { - polling_island *pi_latest = polling_island_lock(fd->po.pi); - polling_island_remove_fd_locked(pi_latest, fd, already_closed, &error); - gpr_mu_unlock(&pi_latest->mu); - - unref_pi = fd->po.pi; - fd->po.pi = NULL; - } - - /* If release_fd is not NULL, we should be relinquishing control of the file - descriptor fd->fd (but we still own the grpc_fd structure). */ - if (release_fd != NULL) { - *release_fd = fd->fd; - } else { - close(fd->fd); - } - - fd->orphaned = true; - - GRPC_CLOSURE_SCHED(exec_ctx, fd->on_done_closure, GRPC_ERROR_REF(error)); - - gpr_mu_unlock(&fd->po.mu); - UNREF_BY(fd, 2, reason); /* Drop the reference */ - if (unref_pi != NULL) { - /* Unref stale polling island here, outside the fd lock above. - The polling island owns a workqueue which owns an fd, and unreffing - inside the lock can cause an eventual lock loop that makes TSAN very - unhappy. */ - PI_UNREF(exec_ctx, unref_pi, "fd_orphan"); - } - if (error != GRPC_ERROR_NONE) { - const char *msg = grpc_error_string(error); - gpr_log(GPR_DEBUG, "fd_orphan: %s", msg); - } - GRPC_ERROR_UNREF(error); -} - -static grpc_pollset *fd_get_read_notifier_pollset(grpc_exec_ctx *exec_ctx, - grpc_fd *fd) { - gpr_atm notifier = gpr_atm_acq_load(&fd->read_notifier_pollset); - return (grpc_pollset *)notifier; -} - -static bool fd_is_shutdown(grpc_fd *fd) { - return grpc_lfev_is_shutdown(&fd->read_closure); -} - -/* Might be called multiple times */ -static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_error *why) { - if (grpc_lfev_set_shutdown(exec_ctx, &fd->read_closure, - GRPC_ERROR_REF(why))) { - shutdown(fd->fd, SHUT_RDWR); - grpc_lfev_set_shutdown(exec_ctx, &fd->write_closure, GRPC_ERROR_REF(why)); - } - GRPC_ERROR_UNREF(why); -} - -static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure *closure) { - grpc_lfev_notify_on(exec_ctx, &fd->read_closure, closure, "read"); -} - -static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure *closure) { - grpc_lfev_notify_on(exec_ctx, &fd->write_closure, closure, "write"); -} - -/******************************************************************************* - * Pollset Definitions - */ -GPR_TLS_DECL(g_current_thread_pollset); -GPR_TLS_DECL(g_current_thread_worker); -static __thread bool g_initialized_sigmask; -static __thread sigset_t g_orig_sigmask; - -static void sig_handler(int sig_num) { -#ifdef GRPC_EPOLL_DEBUG - gpr_log(GPR_INFO, "Received signal %d", sig_num); -#endif -} - -static void poller_kick_init() { signal(grpc_wakeup_signal, sig_handler); } - -/* Global state management */ -static grpc_error *pollset_global_init(void) { - gpr_tls_init(&g_current_thread_pollset); - gpr_tls_init(&g_current_thread_worker); - poller_kick_init(); - return GRPC_ERROR_NONE; -} - -static void pollset_global_shutdown(void) { - gpr_tls_destroy(&g_current_thread_pollset); - gpr_tls_destroy(&g_current_thread_worker); -} - -static grpc_error *pollset_worker_kick(grpc_pollset_worker *worker) { - grpc_error *err = GRPC_ERROR_NONE; - - /* Kick the worker only if it was not already kicked */ - if (gpr_atm_no_barrier_cas(&worker->is_kicked, (gpr_atm)0, (gpr_atm)1)) { - GRPC_POLLING_TRACE( - "pollset_worker_kick: Kicking worker: %p (thread id: %ld)", - (void *)worker, (long int)worker->pt_id); - int err_num = pthread_kill(worker->pt_id, grpc_wakeup_signal); - if (err_num != 0) { - err = GRPC_OS_ERROR(err_num, "pthread_kill"); - } - } - return err; -} - -/* Return 1 if the pollset has active threads in pollset_work (pollset must - * be locked) */ -static int pollset_has_workers(grpc_pollset *p) { - return p->root_worker.next != &p->root_worker; -} - -static void remove_worker(grpc_pollset *p, grpc_pollset_worker *worker) { - worker->prev->next = worker->next; - worker->next->prev = worker->prev; -} - -static grpc_pollset_worker *pop_front_worker(grpc_pollset *p) { - if (pollset_has_workers(p)) { - grpc_pollset_worker *w = p->root_worker.next; - remove_worker(p, w); - return w; - } else { - return NULL; - } -} - -static void push_back_worker(grpc_pollset *p, grpc_pollset_worker *worker) { - worker->next = &p->root_worker; - worker->prev = worker->next->prev; - worker->prev->next = worker->next->prev = worker; -} - -static void push_front_worker(grpc_pollset *p, grpc_pollset_worker *worker) { - worker->prev = &p->root_worker; - worker->next = worker->prev->next; - worker->prev->next = worker->next->prev = worker; -} - -/* p->mu must be held before calling this function */ -static grpc_error *pollset_kick(grpc_exec_ctx *exec_ctx, grpc_pollset *p, - grpc_pollset_worker *specific_worker) { - GPR_TIMER_BEGIN("pollset_kick", 0); - grpc_error *error = GRPC_ERROR_NONE; - GRPC_STATS_INC_POLLSET_KICK(exec_ctx); - const char *err_desc = "Kick Failure"; - grpc_pollset_worker *worker = specific_worker; - if (worker != NULL) { - if (worker == GRPC_POLLSET_KICK_BROADCAST) { - if (pollset_has_workers(p)) { - GPR_TIMER_BEGIN("pollset_kick.broadcast", 0); - for (worker = p->root_worker.next; worker != &p->root_worker; - worker = worker->next) { - if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) { - append_error(&error, pollset_worker_kick(worker), err_desc); - } - } - GPR_TIMER_END("pollset_kick.broadcast", 0); - } else { - p->kicked_without_pollers = true; - } - } else { - GPR_TIMER_MARK("kicked_specifically", 0); - if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) { - append_error(&error, pollset_worker_kick(worker), err_desc); - } - } - } else if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)p) { - /* Since worker == NULL, it means that we can kick "any" worker on this - pollset 'p'. If 'p' happens to be the same pollset this thread is - currently polling (i.e in pollset_work() function), then there is no need - to kick any other worker since the current thread can just absorb the - kick. This is the reason why we enter this case only when - g_current_thread_pollset is != p */ - - GPR_TIMER_MARK("kick_anonymous", 0); - worker = pop_front_worker(p); - if (worker != NULL) { - GPR_TIMER_MARK("finally_kick", 0); - push_back_worker(p, worker); - append_error(&error, pollset_worker_kick(worker), err_desc); - } else { - GPR_TIMER_MARK("kicked_no_pollers", 0); - p->kicked_without_pollers = true; - } - } - - GPR_TIMER_END("pollset_kick", 0); - GRPC_LOG_IF_ERROR("pollset_kick", GRPC_ERROR_REF(error)); - return error; -} - -static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { - gpr_mu_init(&pollset->po.mu); - *mu = &pollset->po.mu; - pollset->po.pi = NULL; -#ifndef NDEBUG - pollset->po.obj_type = POLL_OBJ_POLLSET; -#endif - - pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker; - pollset->kicked_without_pollers = false; - - pollset->shutting_down = false; - pollset->finish_shutdown_called = false; - pollset->shutdown_done = NULL; -} - -/* Convert a timespec to milliseconds: - - Very small or negative poll times are clamped to zero to do a non-blocking - poll (which becomes spin polling) - - Other small values are rounded up to one millisecond - - Longer than a millisecond polls are rounded up to the next nearest - millisecond to avoid spinning - - Infinite timeouts are converted to -1 */ -static int poll_deadline_to_millis_timeout(gpr_timespec deadline, - gpr_timespec now) { - gpr_timespec timeout; - static const int64_t max_spin_polling_us = 10; - if (gpr_time_cmp(deadline, gpr_inf_future(deadline.clock_type)) == 0) { - return -1; - } - - if (gpr_time_cmp(deadline, gpr_time_add(now, gpr_time_from_micros( - max_spin_polling_us, - GPR_TIMESPAN))) <= 0) { - return 0; - } - timeout = gpr_time_sub(deadline, now); - int millis = gpr_time_to_millis(gpr_time_add( - timeout, gpr_time_from_nanos(GPR_NS_PER_MS - 1, GPR_TIMESPAN))); - return millis >= 1 ? millis : 1; -} - -static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_pollset *notifier) { - grpc_lfev_set_ready(exec_ctx, &fd->read_closure, "read"); - - /* Note, it is possible that fd_become_readable might be called twice with - different 'notifier's when an fd becomes readable and it is in two epoll - sets (This can happen briefly during polling island merges). In such cases - it does not really matter which notifer is set as the read_notifier_pollset - (They would both point to the same polling island anyway) */ - /* Use release store to match with acquire load in fd_get_read_notifier */ - gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier); -} - -static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { - grpc_lfev_set_ready(exec_ctx, &fd->write_closure, "write"); -} - -static void pollset_release_polling_island(grpc_exec_ctx *exec_ctx, - grpc_pollset *ps, - const char *reason) { - if (ps->po.pi != NULL) { - PI_UNREF(exec_ctx, ps->po.pi, reason); - } - ps->po.pi = NULL; -} - -static void finish_shutdown_locked(grpc_exec_ctx *exec_ctx, - grpc_pollset *pollset) { - /* The pollset cannot have any workers if we are at this stage */ - GPR_ASSERT(!pollset_has_workers(pollset)); - - pollset->finish_shutdown_called = true; - - /* Release the ref and set pollset->po.pi to NULL */ - pollset_release_polling_island(exec_ctx, pollset, "ps_shutdown"); - GRPC_CLOSURE_SCHED(exec_ctx, pollset->shutdown_done, GRPC_ERROR_NONE); -} - -/* pollset->po.mu lock must be held by the caller before calling this */ -static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_closure *closure) { - GPR_TIMER_BEGIN("pollset_shutdown", 0); - GPR_ASSERT(!pollset->shutting_down); - pollset->shutting_down = true; - pollset->shutdown_done = closure; - pollset_kick(exec_ctx, pollset, GRPC_POLLSET_KICK_BROADCAST); - - /* If the pollset has any workers, we cannot call finish_shutdown_locked() - because it would release the underlying polling island. In such a case, we - let the last worker call finish_shutdown_locked() from pollset_work() */ - if (!pollset_has_workers(pollset)) { - GPR_ASSERT(!pollset->finish_shutdown_called); - GPR_TIMER_MARK("pollset_shutdown.finish_shutdown_locked", 0); - finish_shutdown_locked(exec_ctx, pollset); - } - GPR_TIMER_END("pollset_shutdown", 0); -} - -/* pollset_shutdown is guaranteed to be called before pollset_destroy. So other - * than destroying the mutexes, there is nothing special that needs to be done - * here */ -static void pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) { - GPR_ASSERT(!pollset_has_workers(pollset)); - gpr_mu_destroy(&pollset->po.mu); -} - -#define GRPC_EPOLL_MAX_EVENTS 100 -/* Note: sig_mask contains the signal mask to use *during* epoll_wait() */ -static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, - grpc_pollset *pollset, - grpc_pollset_worker *worker, int timeout_ms, - sigset_t *sig_mask, grpc_error **error) { - struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS]; - int epoll_fd = -1; - int ep_rv; - polling_island *pi = NULL; - char *err_msg; - const char *err_desc = "pollset_work_and_unlock"; - GPR_TIMER_BEGIN("pollset_work_and_unlock", 0); - - /* We need to get the epoll_fd to wait on. The epoll_fd is in inside the - latest polling island pointed by pollset->po.pi - - Since epoll_fd is immutable, we can read it without obtaining the polling - island lock. There is however a possibility that the polling island (from - which we got the epoll_fd) got merged with another island while we are - in this function. This is still okay because in such a case, we will wakeup - right-away from epoll_wait() and pick up the latest polling_island the next - this function (i.e pollset_work_and_unlock()) is called */ - - if (pollset->po.pi == NULL) { - pollset->po.pi = polling_island_create(exec_ctx, NULL, error); - if (pollset->po.pi == NULL) { - GPR_TIMER_END("pollset_work_and_unlock", 0); - return; /* Fatal error. We cannot continue */ - } - - PI_ADD_REF(pollset->po.pi, "ps"); - GRPC_POLLING_TRACE("pollset_work: pollset: %p created new pi: %p", - (void *)pollset, (void *)pollset->po.pi); - } - - pi = polling_island_maybe_get_latest(pollset->po.pi); - epoll_fd = pi->epoll_fd; - - /* Update the pollset->po.pi since the island being pointed by - pollset->po.pi maybe older than the one pointed by pi) */ - if (pollset->po.pi != pi) { - /* Always do PI_ADD_REF before PI_UNREF because PI_UNREF may cause the - polling island to be deleted */ - PI_ADD_REF(pi, "ps"); - PI_UNREF(exec_ctx, pollset->po.pi, "ps"); - pollset->po.pi = pi; - } - - /* Add an extra ref so that the island does not get destroyed (which means - the epoll_fd won't be closed) while we are are doing an epoll_wait() on the - epoll_fd */ - PI_ADD_REF(pi, "ps_work"); - gpr_mu_unlock(&pollset->po.mu); - - gpr_atm_no_barrier_fetch_add(&pi->poller_count, 1); - g_current_thread_polling_island = pi; - - GRPC_SCHEDULING_START_BLOCKING_REGION; - GRPC_STATS_INC_SYSCALL_POLL(exec_ctx); - ep_rv = - epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms, sig_mask); - GRPC_SCHEDULING_END_BLOCKING_REGION; - if (ep_rv < 0) { - if (errno != EINTR) { - gpr_asprintf(&err_msg, - "epoll_wait() epoll fd: %d failed with error: %d (%s)", - epoll_fd, errno, strerror(errno)); - append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc); - } else { - /* We were interrupted. Save an interation by doing a zero timeout - epoll_wait to see if there are any other events of interest */ - GRPC_POLLING_TRACE("pollset_work: pollset: %p, worker: %p received kick", - (void *)pollset, (void *)worker); - ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0); - } - } - -#ifdef GRPC_TSAN - /* See the definition of g_poll_sync for more details */ - gpr_atm_acq_load(&g_epoll_sync); -#endif /* defined(GRPC_TSAN) */ - - for (int i = 0; i < ep_rv; ++i) { - void *data_ptr = ep_ev[i].data.ptr; - if (data_ptr == &polling_island_wakeup_fd) { - GRPC_POLLING_TRACE( - "pollset_work: pollset: %p, worker: %p polling island (epoll_fd: " - "%d) got merged", - (void *)pollset, (void *)worker, epoll_fd); - /* This means that our polling island is merged with a different - island. We do not have to do anything here since the subsequent call - to the function pollset_work_and_unlock() will pick up the correct - epoll_fd */ - } else { - grpc_fd *fd = (grpc_fd *)data_ptr; - int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP); - int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI); - int write_ev = ep_ev[i].events & EPOLLOUT; - if (read_ev || cancel) { - fd_become_readable(exec_ctx, fd, pollset); - } - if (write_ev || cancel) { - fd_become_writable(exec_ctx, fd); - } - } - } - - g_current_thread_polling_island = NULL; - gpr_atm_no_barrier_fetch_add(&pi->poller_count, -1); - - GPR_ASSERT(pi != NULL); - - /* Before leaving, release the extra ref we added to the polling island. It - is important to use "pi" here (i.e our old copy of pollset->po.pi - that we got before releasing the polling island lock). This is because - pollset->po.pi pointer might get udpated in other parts of the - code when there is an island merge while we are doing epoll_wait() above */ - PI_UNREF(exec_ctx, pi, "ps_work"); - - GPR_TIMER_END("pollset_work_and_unlock", 0); -} - -/* pollset->po.mu lock must be held by the caller before calling this. - The function pollset_work() may temporarily release the lock (pollset->po.mu) - during the course of its execution but it will always re-acquire the lock and - ensure that it is held by the time the function returns */ -static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_pollset_worker **worker_hdl, - gpr_timespec now, gpr_timespec deadline) { - GPR_TIMER_BEGIN("pollset_work", 0); - grpc_error *error = GRPC_ERROR_NONE; - int timeout_ms = poll_deadline_to_millis_timeout(deadline, now); - - sigset_t new_mask; - - grpc_pollset_worker worker; - worker.next = worker.prev = NULL; - worker.pt_id = pthread_self(); - gpr_atm_no_barrier_store(&worker.is_kicked, (gpr_atm)0); - - if (worker_hdl) *worker_hdl = &worker; - - gpr_tls_set(&g_current_thread_pollset, (intptr_t)pollset); - gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker); - - if (pollset->kicked_without_pollers) { - /* If the pollset was kicked without pollers, pretend that the current - worker got the kick and skip polling. A kick indicates that there is some - work that needs attention like an event on the completion queue or an - alarm */ - GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0); - pollset->kicked_without_pollers = 0; - } else if (!pollset->shutting_down) { - /* We use the posix-signal with number 'grpc_wakeup_signal' for waking up - (i.e 'kicking') a worker in the pollset. A 'kick' is a way to inform the - worker that there is some pending work that needs immediate attention - (like an event on the completion queue, or a polling island merge that - results in a new epoll-fd to wait on) and that the worker should not - spend time waiting in epoll_pwait(). - - A worker can be kicked anytime from the point it is added to the pollset - via push_front_worker() (or push_back_worker()) to the point it is - removed via remove_worker(). - If the worker is kicked before/during it calls epoll_pwait(), it should - immediately exit from epoll_wait(). If the worker is kicked after it - returns from epoll_wait(), then nothing really needs to be done. - - To accomplish this, we mask 'grpc_wakeup_signal' on this thread at all - times *except* when it is in epoll_pwait(). This way, the worker never - misses acting on a kick */ - - if (!g_initialized_sigmask) { - sigemptyset(&new_mask); - sigaddset(&new_mask, grpc_wakeup_signal); - pthread_sigmask(SIG_BLOCK, &new_mask, &g_orig_sigmask); - sigdelset(&g_orig_sigmask, grpc_wakeup_signal); - g_initialized_sigmask = true; - /* new_mask: The new thread mask which blocks 'grpc_wakeup_signal'. - This is the mask used at all times *except during - epoll_wait()*" - g_orig_sigmask: The thread mask which allows 'grpc_wakeup_signal' and - this is the mask to use *during epoll_wait()* - - The new_mask is set on the worker before it is added to the pollset - (i.e before it can be kicked) */ - } - - push_front_worker(pollset, &worker); /* Add worker to pollset */ - - pollset_work_and_unlock(exec_ctx, pollset, &worker, timeout_ms, - &g_orig_sigmask, &error); - grpc_exec_ctx_flush(exec_ctx); - - gpr_mu_lock(&pollset->po.mu); - - /* Note: There is no need to reset worker.is_kicked to 0 since we are no - longer going to use this worker */ - remove_worker(pollset, &worker); - } - - /* If we are the last worker on the pollset (i.e pollset_has_workers() is - false at this point) and the pollset is shutting down, we may have to - finish the shutdown process by calling finish_shutdown_locked(). - See pollset_shutdown() for more details. - - Note: Continuing to access pollset here is safe; it is the caller's - responsibility to not destroy a pollset when it has outstanding calls to - pollset_work() */ - if (pollset->shutting_down && !pollset_has_workers(pollset) && - !pollset->finish_shutdown_called) { - GPR_TIMER_MARK("pollset_work.finish_shutdown_locked", 0); - finish_shutdown_locked(exec_ctx, pollset); - - gpr_mu_unlock(&pollset->po.mu); - grpc_exec_ctx_flush(exec_ctx); - gpr_mu_lock(&pollset->po.mu); - } - - if (worker_hdl) *worker_hdl = NULL; - - gpr_tls_set(&g_current_thread_pollset, (intptr_t)0); - gpr_tls_set(&g_current_thread_worker, (intptr_t)0); - - GPR_TIMER_END("pollset_work", 0); - - GRPC_LOG_IF_ERROR("pollset_work", GRPC_ERROR_REF(error)); - return error; -} - -static void add_poll_object(grpc_exec_ctx *exec_ctx, poll_obj *bag, - poll_obj_type bag_type, poll_obj *item, - poll_obj_type item_type) { - GPR_TIMER_BEGIN("add_poll_object", 0); - -#ifndef NDEBUG - GPR_ASSERT(item->obj_type == item_type); - GPR_ASSERT(bag->obj_type == bag_type); -#endif - - grpc_error *error = GRPC_ERROR_NONE; - polling_island *pi_new = NULL; - - gpr_mu_lock(&bag->mu); - gpr_mu_lock(&item->mu); - -retry: - /* - * 1) If item->pi and bag->pi are both non-NULL and equal, do nothing - * 2) If item->pi and bag->pi are both NULL, create a new polling island (with - * a refcount of 2) and point item->pi and bag->pi to the new island - * 3) If exactly one of item->pi or bag->pi is NULL, update it to point to - * the other's non-NULL pi - * 4) Finally if item->pi and bag-pi are non-NULL and not-equal, merge the - * polling islands and update item->pi and bag->pi to point to the new - * island - */ - - /* Early out if we are trying to add an 'fd' to a 'bag' but the fd is already - * orphaned */ - if (item_type == POLL_OBJ_FD && (FD_FROM_PO(item))->orphaned) { - gpr_mu_unlock(&item->mu); - gpr_mu_unlock(&bag->mu); - return; - } - - if (item->pi == bag->pi) { - pi_new = item->pi; - if (pi_new == NULL) { - /* GPR_ASSERT(item->pi == bag->pi == NULL) */ - - /* If we are adding an fd to a bag (i.e pollset or pollset_set), then - * we need to do some extra work to make TSAN happy */ - if (item_type == POLL_OBJ_FD) { - /* Unlock before creating a new polling island: the polling island will - create a workqueue which creates a file descriptor, and holding an fd - lock here can eventually cause a loop to appear to TSAN (making it - unhappy). We don't think it's a real loop (there's an epoch point - where that loop possibility disappears), but the advantages of - keeping TSAN happy outweigh any performance advantage we might have - by keeping the lock held. */ - gpr_mu_unlock(&item->mu); - pi_new = polling_island_create(exec_ctx, FD_FROM_PO(item), &error); - gpr_mu_lock(&item->mu); - - /* Need to reverify any assumptions made between the initial lock and - getting to this branch: if they've changed, we need to throw away our - work and figure things out again. */ - if (item->pi != NULL) { - GRPC_POLLING_TRACE( - "add_poll_object: Raced creating new polling island. pi_new: %p " - "(fd: %d, %s: %p)", - (void *)pi_new, FD_FROM_PO(item)->fd, poll_obj_string(bag_type), - (void *)bag); - /* No need to lock 'pi_new' here since this is a new polling island - and no one has a reference to it yet */ - polling_island_remove_all_fds_locked(pi_new, true, &error); - - /* Ref and unref so that the polling island gets deleted during unref - */ - PI_ADD_REF(pi_new, "dance_of_destruction"); - PI_UNREF(exec_ctx, pi_new, "dance_of_destruction"); - goto retry; - } - } else { - pi_new = polling_island_create(exec_ctx, NULL, &error); - } - - GRPC_POLLING_TRACE( - "add_poll_object: Created new polling island. pi_new: %p (%s: %p, " - "%s: %p)", - (void *)pi_new, poll_obj_string(item_type), (void *)item, - poll_obj_string(bag_type), (void *)bag); - } else { - GRPC_POLLING_TRACE( - "add_poll_object: Same polling island. pi: %p (%s, %s)", - (void *)pi_new, poll_obj_string(item_type), - poll_obj_string(bag_type)); - } - } else if (item->pi == NULL) { - /* GPR_ASSERT(bag->pi != NULL) */ - /* Make pi_new point to latest pi*/ - pi_new = polling_island_lock(bag->pi); - - if (item_type == POLL_OBJ_FD) { - grpc_fd *fd = FD_FROM_PO(item); - polling_island_add_fds_locked(pi_new, &fd, 1, true, &error); - } - - gpr_mu_unlock(&pi_new->mu); - GRPC_POLLING_TRACE( - "add_poll_obj: item->pi was NULL. pi_new: %p (item(%s): %p, " - "bag(%s): %p)", - (void *)pi_new, poll_obj_string(item_type), (void *)item, - poll_obj_string(bag_type), (void *)bag); - } else if (bag->pi == NULL) { - /* GPR_ASSERT(item->pi != NULL) */ - /* Make pi_new to point to latest pi */ - pi_new = polling_island_lock(item->pi); - gpr_mu_unlock(&pi_new->mu); - GRPC_POLLING_TRACE( - "add_poll_obj: bag->pi was NULL. pi_new: %p (item(%s): %p, " - "bag(%s): %p)", - (void *)pi_new, poll_obj_string(item_type), (void *)item, - poll_obj_string(bag_type), (void *)bag); - } else { - pi_new = polling_island_merge(item->pi, bag->pi, &error); - GRPC_POLLING_TRACE( - "add_poll_obj: polling islands merged. pi_new: %p (item(%s): %p, " - "bag(%s): %p)", - (void *)pi_new, poll_obj_string(item_type), (void *)item, - poll_obj_string(bag_type), (void *)bag); - } - - /* At this point, pi_new is the polling island that both item->pi and bag->pi - MUST be pointing to */ - - if (item->pi != pi_new) { - PI_ADD_REF(pi_new, poll_obj_string(item_type)); - if (item->pi != NULL) { - PI_UNREF(exec_ctx, item->pi, poll_obj_string(item_type)); - } - item->pi = pi_new; - } - - if (bag->pi != pi_new) { - PI_ADD_REF(pi_new, poll_obj_string(bag_type)); - if (bag->pi != NULL) { - PI_UNREF(exec_ctx, bag->pi, poll_obj_string(bag_type)); - } - bag->pi = pi_new; - } - - gpr_mu_unlock(&item->mu); - gpr_mu_unlock(&bag->mu); - - GRPC_LOG_IF_ERROR("add_poll_object", error); - GPR_TIMER_END("add_poll_object", 0); -} - -static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_fd *fd) { - add_poll_object(exec_ctx, &pollset->po, POLL_OBJ_POLLSET, &fd->po, - POLL_OBJ_FD); -} - -/******************************************************************************* - * Pollset-set Definitions - */ - -static grpc_pollset_set *pollset_set_create(void) { - grpc_pollset_set *pss = (grpc_pollset_set *)gpr_malloc(sizeof(*pss)); - gpr_mu_init(&pss->po.mu); - pss->po.pi = NULL; -#ifndef NDEBUG - pss->po.obj_type = POLL_OBJ_POLLSET_SET; -#endif - return pss; -} - -static void pollset_set_destroy(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pss) { - gpr_mu_destroy(&pss->po.mu); - - if (pss->po.pi != NULL) { - PI_UNREF(exec_ctx, pss->po.pi, "pss_destroy"); - } - - gpr_free(pss); -} - -static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss, - grpc_fd *fd) { - add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &fd->po, - POLL_OBJ_FD); -} - -static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss, - grpc_fd *fd) { - /* Nothing to do */ -} - -static void pollset_set_add_pollset(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pss, grpc_pollset *ps) { - add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &ps->po, - POLL_OBJ_POLLSET); -} - -static void pollset_set_del_pollset(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pss, grpc_pollset *ps) { - /* Nothing to do */ -} - -static void pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *bag, - grpc_pollset_set *item) { - add_poll_object(exec_ctx, &bag->po, POLL_OBJ_POLLSET_SET, &item->po, - POLL_OBJ_POLLSET_SET); -} - -static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *bag, - grpc_pollset_set *item) { - /* Nothing to do */ -} - -/* Test helper functions - * */ -void *grpc_fd_get_polling_island(grpc_fd *fd) { - polling_island *pi; - - gpr_mu_lock(&fd->po.mu); - pi = fd->po.pi; - gpr_mu_unlock(&fd->po.mu); - - return pi; -} - -void *grpc_pollset_get_polling_island(grpc_pollset *ps) { - polling_island *pi; - - gpr_mu_lock(&ps->po.mu); - pi = ps->po.pi; - gpr_mu_unlock(&ps->po.mu); - - return pi; -} - -bool grpc_are_polling_islands_equal(void *p, void *q) { - polling_island *p1 = (polling_island *)p; - polling_island *p2 = (polling_island *)q; - - /* Note: polling_island_lock_pair() may change p1 and p2 to point to the - latest polling islands in their respective linked lists */ - polling_island_lock_pair(&p1, &p2); - polling_island_unlock_pair(p1, p2); - - return p1 == p2; -} - -/******************************************************************************* - * Event engine binding - */ - -static void shutdown_engine(void) { - fd_global_shutdown(); - pollset_global_shutdown(); - polling_island_global_shutdown(); -} - -static const grpc_event_engine_vtable vtable = { - sizeof(grpc_pollset), - - fd_create, - fd_wrapped_fd, - fd_orphan, - fd_shutdown, - fd_notify_on_read, - fd_notify_on_write, - fd_is_shutdown, - fd_get_read_notifier_pollset, - - pollset_init, - pollset_shutdown, - pollset_destroy, - pollset_work, - pollset_kick, - pollset_add_fd, - - pollset_set_create, - pollset_set_destroy, - pollset_set_add_pollset, - pollset_set_del_pollset, - pollset_set_add_pollset_set, - pollset_set_del_pollset_set, - pollset_set_add_fd, - pollset_set_del_fd, - - shutdown_engine, -}; - -/* It is possible that GLIBC has epoll but the underlying kernel doesn't. - * Create a dummy epoll_fd to make sure epoll support is available */ -static bool is_epoll_available() { - int fd = epoll_create1(EPOLL_CLOEXEC); - if (fd < 0) { - gpr_log( - GPR_ERROR, - "epoll_create1 failed with error: %d. Not using epoll polling engine", - fd); - return false; - } - close(fd); - return true; -} - -const grpc_event_engine_vtable *grpc_init_epollsig_linux( - bool explicit_request) { - /* If use of signals is disabled, we cannot use epoll engine*/ - if (is_grpc_wakeup_signal_initialized && grpc_wakeup_signal < 0) { - return NULL; - } - - if (!grpc_has_wakeup_fd()) { - return NULL; - } - - if (!is_epoll_available()) { - return NULL; - } - - if (!is_grpc_wakeup_signal_initialized) { - if (explicit_request) { - grpc_use_signal(SIGRTMIN + 6); - } else { - return NULL; - } - } - - fd_global_init(); - - if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) { - return NULL; - } - - if (!GRPC_LOG_IF_ERROR("polling_island_global_init", - polling_island_global_init())) { - return NULL; - } - - return &vtable; -} - -#else /* defined(GRPC_LINUX_EPOLL) */ -#if defined(GRPC_POSIX_SOCKET) -#include "src/core/lib/iomgr/ev_posix.h" -/* If GRPC_LINUX_EPOLL is not defined, it means epoll is not available. Return - * NULL */ -const grpc_event_engine_vtable *grpc_init_epollsig_linux( - bool explicit_request) { - return NULL; -} -#endif /* defined(GRPC_POSIX_SOCKET) */ - -void grpc_use_signal(int signum) {} -#endif /* !defined(GRPC_LINUX_EPOLL) */ diff --git a/src/core/lib/iomgr/ev_epollsig_linux.cc b/src/core/lib/iomgr/ev_epollsig_linux.cc new file mode 100644 index 0000000000..4d8bdf1401 --- /dev/null +++ b/src/core/lib/iomgr/ev_epollsig_linux.cc @@ -0,0 +1,1769 @@ +/* + * + * Copyright 2016 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +/* This polling engine is only relevant on linux kernels supporting epoll() */ +#ifdef GRPC_LINUX_EPOLL + +#include "src/core/lib/iomgr/ev_epollsig_linux.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "src/core/lib/debug/stats.h" +#include "src/core/lib/iomgr/ev_posix.h" +#include "src/core/lib/iomgr/iomgr_internal.h" +#include "src/core/lib/iomgr/lockfree_event.h" +#include "src/core/lib/iomgr/timer.h" +#include "src/core/lib/iomgr/wakeup_fd_posix.h" +#include "src/core/lib/profiling/timers.h" +#include "src/core/lib/support/block_annotate.h" + +#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker *)1) + +#define GRPC_POLLING_TRACE(...) \ + if (GRPC_TRACER_ON(grpc_polling_trace)) { \ + gpr_log(GPR_INFO, __VA_ARGS__); \ + } + +static int grpc_wakeup_signal = -1; +static bool is_grpc_wakeup_signal_initialized = false; + +/* Implements the function defined in grpc_posix.h. This function might be + * called before even calling grpc_init() to set either a different signal to + * use. If signum == -1, then the use of signals is disabled */ +void grpc_use_signal(int signum) { + grpc_wakeup_signal = signum; + is_grpc_wakeup_signal_initialized = true; + + if (grpc_wakeup_signal < 0) { + gpr_log(GPR_INFO, + "Use of signals is disabled. Epoll engine will not be used"); + } else { + gpr_log(GPR_INFO, "epoll engine will be using signal: %d", + grpc_wakeup_signal); + } +} + +struct polling_island; + +typedef enum { + POLL_OBJ_FD, + POLL_OBJ_POLLSET, + POLL_OBJ_POLLSET_SET +} poll_obj_type; + +typedef struct poll_obj { +#ifndef NDEBUG + poll_obj_type obj_type; +#endif + gpr_mu mu; + struct polling_island *pi; +} poll_obj; + +const char *poll_obj_string(poll_obj_type po_type) { + switch (po_type) { + case POLL_OBJ_FD: + return "fd"; + case POLL_OBJ_POLLSET: + return "pollset"; + case POLL_OBJ_POLLSET_SET: + return "pollset_set"; + } + + GPR_UNREACHABLE_CODE(return "UNKNOWN"); +} + +/******************************************************************************* + * Fd Declarations + */ + +#define FD_FROM_PO(po) ((grpc_fd *)(po)) + +struct grpc_fd { + poll_obj po; + + int fd; + /* refst format: + bit 0 : 1=Active / 0=Orphaned + bits 1-n : refcount + Ref/Unref by two to avoid altering the orphaned bit */ + gpr_atm refst; + + /* The fd is either closed or we relinquished control of it. In either + cases, this indicates that the 'fd' on this structure is no longer + valid */ + bool orphaned; + + gpr_atm read_closure; + gpr_atm write_closure; + + struct grpc_fd *freelist_next; + grpc_closure *on_done_closure; + + /* The pollset that last noticed that the fd is readable. The actual type + * stored in this is (grpc_pollset *) */ + gpr_atm read_notifier_pollset; + + grpc_iomgr_object iomgr_object; +}; + +/* Reference counting for fds */ +#ifndef NDEBUG +static void fd_ref(grpc_fd *fd, const char *reason, const char *file, int line); +static void fd_unref(grpc_fd *fd, const char *reason, const char *file, + int line); +#define GRPC_FD_REF(fd, reason) fd_ref(fd, reason, __FILE__, __LINE__) +#define GRPC_FD_UNREF(fd, reason) fd_unref(fd, reason, __FILE__, __LINE__) +#else +static void fd_ref(grpc_fd *fd); +static void fd_unref(grpc_fd *fd); +#define GRPC_FD_REF(fd, reason) fd_ref(fd) +#define GRPC_FD_UNREF(fd, reason) fd_unref(fd) +#endif + +static void fd_global_init(void); +static void fd_global_shutdown(void); + +/******************************************************************************* + * Polling island Declarations + */ + +#ifndef NDEBUG + +#define PI_ADD_REF(p, r) pi_add_ref_dbg((p), (r), __FILE__, __LINE__) +#define PI_UNREF(exec_ctx, p, r) \ + pi_unref_dbg((exec_ctx), (p), (r), __FILE__, __LINE__) + +#else + +#define PI_ADD_REF(p, r) pi_add_ref((p)) +#define PI_UNREF(exec_ctx, p, r) pi_unref((exec_ctx), (p)) + +#endif + +/* This is also used as grpc_workqueue (by directly casing it) */ +typedef struct polling_island { + gpr_mu mu; + /* Ref count. Use PI_ADD_REF() and PI_UNREF() macros to increment/decrement + the refcount. + Once the ref count becomes zero, this structure is destroyed which means + we should ensure that there is never a scenario where a PI_ADD_REF() is + racing with a PI_UNREF() that just made the ref_count zero. */ + gpr_atm ref_count; + + /* Pointer to the polling_island this merged into. + * merged_to value is only set once in polling_island's lifetime (and that too + * only if the island is merged with another island). Because of this, we can + * use gpr_atm type here so that we can do atomic access on this and reduce + * lock contention on 'mu' mutex. + * + * Note that if this field is not NULL (i.e not 0), all the remaining fields + * (except mu and ref_count) are invalid and must be ignored. */ + gpr_atm merged_to; + + /* Number of threads currently polling on this island */ + gpr_atm poller_count; + + /* The fd of the underlying epoll set */ + int epoll_fd; + + /* The file descriptors in the epoll set */ + size_t fd_cnt; + size_t fd_capacity; + grpc_fd **fds; +} polling_island; + +/******************************************************************************* + * Pollset Declarations + */ +struct grpc_pollset_worker { + /* Thread id of this worker */ + pthread_t pt_id; + + /* Used to prevent a worker from getting kicked multiple times */ + gpr_atm is_kicked; + struct grpc_pollset_worker *next; + struct grpc_pollset_worker *prev; +}; + +struct grpc_pollset { + poll_obj po; + + grpc_pollset_worker root_worker; + bool kicked_without_pollers; + + bool shutting_down; /* Is the pollset shutting down ? */ + bool finish_shutdown_called; /* Is the 'finish_shutdown_locked()' called ? */ + grpc_closure *shutdown_done; /* Called after after shutdown is complete */ +}; + +/******************************************************************************* + * Pollset-set Declarations + */ +struct grpc_pollset_set { + poll_obj po; +}; + +/******************************************************************************* + * Common helpers + */ + +static bool append_error(grpc_error **composite, grpc_error *error, + const char *desc) { + if (error == GRPC_ERROR_NONE) return true; + if (*composite == GRPC_ERROR_NONE) { + *composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc); + } + *composite = grpc_error_add_child(*composite, error); + return false; +} + +/******************************************************************************* + * Polling island Definitions + */ + +/* The wakeup fd that is used to wake up all threads in a Polling island. This + is useful in the polling island merge operation where we need to wakeup all + the threads currently polling the smaller polling island (so that they can + start polling the new/merged polling island) + + NOTE: This fd is initialized to be readable and MUST NOT be consumed i.e the + threads that woke up MUST NOT call grpc_wakeup_fd_consume_wakeup() */ +static grpc_wakeup_fd polling_island_wakeup_fd; + +/* The polling island being polled right now. + See comments in workqueue_maybe_wakeup for why this is tracked. */ +static __thread polling_island *g_current_thread_polling_island; + +/* Forward declaration */ +static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi); + +#ifdef GRPC_TSAN +/* Currently TSAN may incorrectly flag data races between epoll_ctl and + epoll_wait for any grpc_fd structs that are added to the epoll set via + epoll_ctl and are returned (within a very short window) via epoll_wait(). + + To work-around this race, we establish a happens-before relation between + the code just-before epoll_ctl() and the code after epoll_wait() by using + this atomic */ +gpr_atm g_epoll_sync; +#endif /* defined(GRPC_TSAN) */ + +static void pi_add_ref(polling_island *pi); +static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi); + +#ifndef NDEBUG +static void pi_add_ref_dbg(polling_island *pi, const char *reason, + const char *file, int line) { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_atm old_cnt = gpr_atm_acq_load(&pi->ref_count); + gpr_log(GPR_DEBUG, "Add ref pi: %p, old:%" PRIdPTR " -> new:%" PRIdPTR + " (%s) - (%s, %d)", + pi, old_cnt, old_cnt + 1, reason, file, line); + } + pi_add_ref(pi); +} + +static void pi_unref_dbg(grpc_exec_ctx *exec_ctx, polling_island *pi, + const char *reason, const char *file, int line) { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_atm old_cnt = gpr_atm_acq_load(&pi->ref_count); + gpr_log(GPR_DEBUG, "Unref pi: %p, old:%" PRIdPTR " -> new:%" PRIdPTR + " (%s) - (%s, %d)", + pi, old_cnt, (old_cnt - 1), reason, file, line); + } + pi_unref(exec_ctx, pi); +} +#endif + +static void pi_add_ref(polling_island *pi) { + gpr_atm_no_barrier_fetch_add(&pi->ref_count, 1); +} + +static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi) { + /* If ref count went to zero, delete the polling island. + Note that this deletion not be done under a lock. Once the ref count goes + to zero, we are guaranteed that no one else holds a reference to the + polling island (and that there is no racing pi_add_ref() call either). + + Also, if we are deleting the polling island and the merged_to field is + non-empty, we should remove a ref to the merged_to polling island + */ + if (1 == gpr_atm_full_fetch_add(&pi->ref_count, -1)) { + polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to); + polling_island_delete(exec_ctx, pi); + if (next != NULL) { + PI_UNREF(exec_ctx, next, "pi_delete"); /* Recursive call */ + } + } +} + +/* The caller is expected to hold pi->mu lock before calling this function */ +static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds, + size_t fd_count, bool add_fd_refs, + grpc_error **error) { + int err; + size_t i; + struct epoll_event ev; + char *err_msg; + const char *err_desc = "polling_island_add_fds"; + +#ifdef GRPC_TSAN + /* See the definition of g_epoll_sync for more context */ + gpr_atm_rel_store(&g_epoll_sync, (gpr_atm)0); +#endif /* defined(GRPC_TSAN) */ + + for (i = 0; i < fd_count; i++) { + ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET); + ev.data.ptr = fds[i]; + err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, fds[i]->fd, &ev); + + if (err < 0) { + if (errno != EEXIST) { + gpr_asprintf( + &err_msg, + "epoll_ctl (epoll_fd: %d) add fd: %d failed with error: %d (%s)", + pi->epoll_fd, fds[i]->fd, errno, strerror(errno)); + append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc); + gpr_free(err_msg); + } + + continue; + } + + if (pi->fd_cnt == pi->fd_capacity) { + pi->fd_capacity = GPR_MAX(pi->fd_capacity + 8, pi->fd_cnt * 3 / 2); + pi->fds = + (grpc_fd **)gpr_realloc(pi->fds, sizeof(grpc_fd *) * pi->fd_capacity); + } + + pi->fds[pi->fd_cnt++] = fds[i]; + if (add_fd_refs) { + GRPC_FD_REF(fds[i], "polling_island"); + } + } +} + +/* The caller is expected to hold pi->mu before calling this */ +static void polling_island_add_wakeup_fd_locked(polling_island *pi, + grpc_wakeup_fd *wakeup_fd, + grpc_error **error) { + struct epoll_event ev; + int err; + char *err_msg; + const char *err_desc = "polling_island_add_wakeup_fd"; + + ev.events = (uint32_t)(EPOLLIN | EPOLLET); + ev.data.ptr = wakeup_fd; + err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, + GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), &ev); + if (err < 0 && errno != EEXIST) { + gpr_asprintf(&err_msg, + "epoll_ctl (epoll_fd: %d) add wakeup fd: %d failed with " + "error: %d (%s)", + pi->epoll_fd, GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), errno, + strerror(errno)); + append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc); + gpr_free(err_msg); + } +} + +/* The caller is expected to hold pi->mu lock before calling this function */ +static void polling_island_remove_all_fds_locked(polling_island *pi, + bool remove_fd_refs, + grpc_error **error) { + int err; + size_t i; + char *err_msg; + const char *err_desc = "polling_island_remove_fds"; + + for (i = 0; i < pi->fd_cnt; i++) { + err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, pi->fds[i]->fd, NULL); + if (err < 0 && errno != ENOENT) { + gpr_asprintf(&err_msg, + "epoll_ctl (epoll_fd: %d) delete fds[%zu]: %d failed with " + "error: %d (%s)", + pi->epoll_fd, i, pi->fds[i]->fd, errno, strerror(errno)); + append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc); + gpr_free(err_msg); + } + + if (remove_fd_refs) { + GRPC_FD_UNREF(pi->fds[i], "polling_island"); + } + } + + pi->fd_cnt = 0; +} + +/* The caller is expected to hold pi->mu lock before calling this function */ +static void polling_island_remove_fd_locked(polling_island *pi, grpc_fd *fd, + bool is_fd_closed, + grpc_error **error) { + int err; + size_t i; + char *err_msg; + const char *err_desc = "polling_island_remove_fd"; + + /* If fd is already closed, then it would have been automatically been removed + from the epoll set */ + if (!is_fd_closed) { + err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, fd->fd, NULL); + if (err < 0 && errno != ENOENT) { + gpr_asprintf( + &err_msg, + "epoll_ctl (epoll_fd: %d) del fd: %d failed with error: %d (%s)", + pi->epoll_fd, fd->fd, errno, strerror(errno)); + append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc); + gpr_free(err_msg); + } + } + + for (i = 0; i < pi->fd_cnt; i++) { + if (pi->fds[i] == fd) { + pi->fds[i] = pi->fds[--pi->fd_cnt]; + GRPC_FD_UNREF(fd, "polling_island"); + break; + } + } +} + +/* Might return NULL in case of an error */ +static polling_island *polling_island_create(grpc_exec_ctx *exec_ctx, + grpc_fd *initial_fd, + grpc_error **error) { + polling_island *pi = NULL; + const char *err_desc = "polling_island_create"; + + *error = GRPC_ERROR_NONE; + + pi = (polling_island *)gpr_malloc(sizeof(*pi)); + gpr_mu_init(&pi->mu); + pi->fd_cnt = 0; + pi->fd_capacity = 0; + pi->fds = NULL; + pi->epoll_fd = -1; + + gpr_atm_rel_store(&pi->ref_count, 0); + gpr_atm_rel_store(&pi->poller_count, 0); + gpr_atm_rel_store(&pi->merged_to, (gpr_atm)NULL); + + pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC); + + if (pi->epoll_fd < 0) { + append_error(error, GRPC_OS_ERROR(errno, "epoll_create1"), err_desc); + goto done; + } + + if (initial_fd != NULL) { + polling_island_add_fds_locked(pi, &initial_fd, 1, true, error); + } + +done: + if (*error != GRPC_ERROR_NONE) { + polling_island_delete(exec_ctx, pi); + pi = NULL; + } + return pi; +} + +static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi) { + GPR_ASSERT(pi->fd_cnt == 0); + + if (pi->epoll_fd >= 0) { + close(pi->epoll_fd); + } + gpr_mu_destroy(&pi->mu); + gpr_free(pi->fds); + gpr_free(pi); +} + +/* Attempts to gets the last polling island in the linked list (liked by the + * 'merged_to' field). Since this does not lock the polling island, there are no + * guarantees that the island returned is the last island */ +static polling_island *polling_island_maybe_get_latest(polling_island *pi) { + polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to); + while (next != NULL) { + pi = next; + next = (polling_island *)gpr_atm_acq_load(&pi->merged_to); + } + + return pi; +} + +/* Gets the lock on the *latest* polling island i.e the last polling island in + the linked list (linked by the 'merged_to' field). Call gpr_mu_unlock on the + returned polling island's mu. + Usage: To lock/unlock polling island "pi", do the following: + polling_island *pi_latest = polling_island_lock(pi); + ... + ... critical section .. + ... + gpr_mu_unlock(&pi_latest->mu); // NOTE: use pi_latest->mu. NOT pi->mu */ +static polling_island *polling_island_lock(polling_island *pi) { + polling_island *next = NULL; + + while (true) { + next = (polling_island *)gpr_atm_acq_load(&pi->merged_to); + if (next == NULL) { + /* Looks like 'pi' is the last node in the linked list but unless we check + this by holding the pi->mu lock, we cannot be sure (i.e without the + pi->mu lock, we don't prevent island merges). + To be absolutely sure, check once more by holding the pi->mu lock */ + gpr_mu_lock(&pi->mu); + next = (polling_island *)gpr_atm_acq_load(&pi->merged_to); + if (next == NULL) { + /* pi is infact the last node and we have the pi->mu lock. we're done */ + break; + } + + /* pi->merged_to is not NULL i.e pi isn't the last node anymore. pi->mu + * isn't the lock we are interested in. Continue traversing the list */ + gpr_mu_unlock(&pi->mu); + } + + pi = next; + } + + return pi; +} + +/* Gets the lock on the *latest* polling islands in the linked lists pointed by + *p and *q (and also updates *p and *q to point to the latest polling islands) + + This function is needed because calling the following block of code to obtain + locks on polling islands (*p and *q) is prone to deadlocks. + { + polling_island_lock(*p, true); + polling_island_lock(*q, true); + } + + Usage/example: + polling_island *p1; + polling_island *p2; + .. + polling_island_lock_pair(&p1, &p2); + .. + .. Critical section with both p1 and p2 locked + .. + // Release locks: Always call polling_island_unlock_pair() to release locks + polling_island_unlock_pair(p1, p2); +*/ +static void polling_island_lock_pair(polling_island **p, polling_island **q) { + polling_island *pi_1 = *p; + polling_island *pi_2 = *q; + polling_island *next_1 = NULL; + polling_island *next_2 = NULL; + + /* The algorithm is simple: + - Go to the last polling islands in the linked lists *pi_1 and *pi_2 (and + keep updating pi_1 and pi_2) + - Then obtain locks on the islands by following a lock order rule of + locking polling_island with lower address first + Special case: Before obtaining the locks, check if pi_1 and pi_2 are + pointing to the same island. If that is the case, we can just call + polling_island_lock() + - After obtaining both the locks, double check that the polling islands + are still the last polling islands in their respective linked lists + (this is because there might have been polling island merges before + we got the lock) + - If the polling islands are the last islands, we are done. If not, + release the locks and continue the process from the first step */ + while (true) { + next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to); + while (next_1 != NULL) { + pi_1 = next_1; + next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to); + } + + next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to); + while (next_2 != NULL) { + pi_2 = next_2; + next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to); + } + + if (pi_1 == pi_2) { + pi_1 = pi_2 = polling_island_lock(pi_1); + break; + } + + if (pi_1 < pi_2) { + gpr_mu_lock(&pi_1->mu); + gpr_mu_lock(&pi_2->mu); + } else { + gpr_mu_lock(&pi_2->mu); + gpr_mu_lock(&pi_1->mu); + } + + next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to); + next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to); + if (next_1 == NULL && next_2 == NULL) { + break; + } + + gpr_mu_unlock(&pi_1->mu); + gpr_mu_unlock(&pi_2->mu); + } + + *p = pi_1; + *q = pi_2; +} + +static void polling_island_unlock_pair(polling_island *p, polling_island *q) { + if (p == q) { + gpr_mu_unlock(&p->mu); + } else { + gpr_mu_unlock(&p->mu); + gpr_mu_unlock(&q->mu); + } +} + +static polling_island *polling_island_merge(polling_island *p, + polling_island *q, + grpc_error **error) { + /* Get locks on both the polling islands */ + polling_island_lock_pair(&p, &q); + + if (p != q) { + /* Make sure that p points to the polling island with fewer fds than q */ + if (p->fd_cnt > q->fd_cnt) { + GPR_SWAP(polling_island *, p, q); + } + + /* Merge p with q i.e move all the fds from p (The one with fewer fds) to q + Note that the refcounts on the fds being moved will not change here. + This is why the last param in the following two functions is 'false') */ + polling_island_add_fds_locked(q, p->fds, p->fd_cnt, false, error); + polling_island_remove_all_fds_locked(p, false, error); + + /* Wakeup all the pollers (if any) on p so that they pickup this change */ + polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd, error); + + /* Add the 'merged_to' link from p --> q */ + gpr_atm_rel_store(&p->merged_to, (gpr_atm)q); + PI_ADD_REF(q, "pi_merge"); /* To account for the new incoming ref from p */ + } + /* else if p == q, nothing needs to be done */ + + polling_island_unlock_pair(p, q); + + /* Return the merged polling island (Note that no merge would have happened + if p == q which is ok) */ + return q; +} + +static grpc_error *polling_island_global_init() { + grpc_error *error = GRPC_ERROR_NONE; + + error = grpc_wakeup_fd_init(&polling_island_wakeup_fd); + if (error == GRPC_ERROR_NONE) { + error = grpc_wakeup_fd_wakeup(&polling_island_wakeup_fd); + } + + return error; +} + +static void polling_island_global_shutdown() { + grpc_wakeup_fd_destroy(&polling_island_wakeup_fd); +} + +/******************************************************************************* + * Fd Definitions + */ + +/* We need to keep a freelist not because of any concerns of malloc performance + * but instead so that implementations with multiple threads in (for example) + * epoll_wait deal with the race between pollset removal and incoming poll + * notifications. + * + * The problem is that the poller ultimately holds a reference to this + * object, so it is very difficult to know when is safe to free it, at least + * without some expensive synchronization. + * + * If we keep the object freelisted, in the worst case losing this race just + * becomes a spurious read notification on a reused fd. + */ + +/* The alarm system needs to be able to wakeup 'some poller' sometimes + * (specifically when a new alarm needs to be triggered earlier than the next + * alarm 'epoch'). This wakeup_fd gives us something to alert on when such a + * case occurs. */ + +static grpc_fd *fd_freelist = NULL; +static gpr_mu fd_freelist_mu; + +#ifndef NDEBUG +#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__) +#define UNREF_BY(fd, n, reason) unref_by(fd, n, reason, __FILE__, __LINE__) +static void ref_by(grpc_fd *fd, int n, const char *reason, const char *file, + int line) { + if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) { + gpr_log(GPR_DEBUG, + "FD %d %p ref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]", + fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst), + gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line); + } +#else +#define REF_BY(fd, n, reason) ref_by(fd, n) +#define UNREF_BY(fd, n, reason) unref_by(fd, n) +static void ref_by(grpc_fd *fd, int n) { +#endif + GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0); +} + +#ifndef NDEBUG +static void unref_by(grpc_fd *fd, int n, const char *reason, const char *file, + int line) { + if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) { + gpr_log(GPR_DEBUG, + "FD %d %p unref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]", + fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst), + gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line); + } +#else +static void unref_by(grpc_fd *fd, int n) { +#endif + gpr_atm old = gpr_atm_full_fetch_add(&fd->refst, -n); + if (old == n) { + /* Add the fd to the freelist */ + gpr_mu_lock(&fd_freelist_mu); + fd->freelist_next = fd_freelist; + fd_freelist = fd; + grpc_iomgr_unregister_object(&fd->iomgr_object); + + grpc_lfev_destroy(&fd->read_closure); + grpc_lfev_destroy(&fd->write_closure); + + gpr_mu_unlock(&fd_freelist_mu); + } else { + GPR_ASSERT(old > n); + } +} + +/* Increment refcount by two to avoid changing the orphan bit */ +#ifndef NDEBUG +static void fd_ref(grpc_fd *fd, const char *reason, const char *file, + int line) { + ref_by(fd, 2, reason, file, line); +} + +static void fd_unref(grpc_fd *fd, const char *reason, const char *file, + int line) { + unref_by(fd, 2, reason, file, line); +} +#else +static void fd_ref(grpc_fd *fd) { ref_by(fd, 2); } +static void fd_unref(grpc_fd *fd) { unref_by(fd, 2); } +#endif + +static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); } + +static void fd_global_shutdown(void) { + gpr_mu_lock(&fd_freelist_mu); + gpr_mu_unlock(&fd_freelist_mu); + while (fd_freelist != NULL) { + grpc_fd *fd = fd_freelist; + fd_freelist = fd_freelist->freelist_next; + gpr_mu_destroy(&fd->po.mu); + gpr_free(fd); + } + gpr_mu_destroy(&fd_freelist_mu); +} + +static grpc_fd *fd_create(int fd, const char *name) { + grpc_fd *new_fd = NULL; + + gpr_mu_lock(&fd_freelist_mu); + if (fd_freelist != NULL) { + new_fd = fd_freelist; + fd_freelist = fd_freelist->freelist_next; + } + gpr_mu_unlock(&fd_freelist_mu); + + if (new_fd == NULL) { + new_fd = (grpc_fd *)gpr_malloc(sizeof(grpc_fd)); + gpr_mu_init(&new_fd->po.mu); + } + + /* Note: It is not really needed to get the new_fd->po.mu lock here. If this + * is a newly created fd (or an fd we got from the freelist), no one else + * would be holding a lock to it anyway. */ + gpr_mu_lock(&new_fd->po.mu); + new_fd->po.pi = NULL; +#ifndef NDEBUG + new_fd->po.obj_type = POLL_OBJ_FD; +#endif + + gpr_atm_rel_store(&new_fd->refst, (gpr_atm)1); + new_fd->fd = fd; + new_fd->orphaned = false; + grpc_lfev_init(&new_fd->read_closure); + grpc_lfev_init(&new_fd->write_closure); + gpr_atm_no_barrier_store(&new_fd->read_notifier_pollset, (gpr_atm)NULL); + + new_fd->freelist_next = NULL; + new_fd->on_done_closure = NULL; + + gpr_mu_unlock(&new_fd->po.mu); + + char *fd_name; + gpr_asprintf(&fd_name, "%s fd=%d", name, fd); + grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name); + gpr_free(fd_name); + return new_fd; +} + +static int fd_wrapped_fd(grpc_fd *fd) { + int ret_fd = -1; + gpr_mu_lock(&fd->po.mu); + if (!fd->orphaned) { + ret_fd = fd->fd; + } + gpr_mu_unlock(&fd->po.mu); + + return ret_fd; +} + +static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure *on_done, int *release_fd, + bool already_closed, const char *reason) { + grpc_error *error = GRPC_ERROR_NONE; + polling_island *unref_pi = NULL; + + gpr_mu_lock(&fd->po.mu); + fd->on_done_closure = on_done; + + /* Remove the active status but keep referenced. We want this grpc_fd struct + to be alive (and not added to freelist) until the end of this function */ + REF_BY(fd, 1, reason); + + /* Remove the fd from the polling island: + - Get a lock on the latest polling island (i.e the last island in the + linked list pointed by fd->po.pi). This is the island that + would actually contain the fd + - Remove the fd from the latest polling island + - Unlock the latest polling island + - Set fd->po.pi to NULL (but remove the ref on the polling island + before doing this.) */ + if (fd->po.pi != NULL) { + polling_island *pi_latest = polling_island_lock(fd->po.pi); + polling_island_remove_fd_locked(pi_latest, fd, already_closed, &error); + gpr_mu_unlock(&pi_latest->mu); + + unref_pi = fd->po.pi; + fd->po.pi = NULL; + } + + /* If release_fd is not NULL, we should be relinquishing control of the file + descriptor fd->fd (but we still own the grpc_fd structure). */ + if (release_fd != NULL) { + *release_fd = fd->fd; + } else { + close(fd->fd); + } + + fd->orphaned = true; + + GRPC_CLOSURE_SCHED(exec_ctx, fd->on_done_closure, GRPC_ERROR_REF(error)); + + gpr_mu_unlock(&fd->po.mu); + UNREF_BY(fd, 2, reason); /* Drop the reference */ + if (unref_pi != NULL) { + /* Unref stale polling island here, outside the fd lock above. + The polling island owns a workqueue which owns an fd, and unreffing + inside the lock can cause an eventual lock loop that makes TSAN very + unhappy. */ + PI_UNREF(exec_ctx, unref_pi, "fd_orphan"); + } + if (error != GRPC_ERROR_NONE) { + const char *msg = grpc_error_string(error); + gpr_log(GPR_DEBUG, "fd_orphan: %s", msg); + } + GRPC_ERROR_UNREF(error); +} + +static grpc_pollset *fd_get_read_notifier_pollset(grpc_exec_ctx *exec_ctx, + grpc_fd *fd) { + gpr_atm notifier = gpr_atm_acq_load(&fd->read_notifier_pollset); + return (grpc_pollset *)notifier; +} + +static bool fd_is_shutdown(grpc_fd *fd) { + return grpc_lfev_is_shutdown(&fd->read_closure); +} + +/* Might be called multiple times */ +static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_error *why) { + if (grpc_lfev_set_shutdown(exec_ctx, &fd->read_closure, + GRPC_ERROR_REF(why))) { + shutdown(fd->fd, SHUT_RDWR); + grpc_lfev_set_shutdown(exec_ctx, &fd->write_closure, GRPC_ERROR_REF(why)); + } + GRPC_ERROR_UNREF(why); +} + +static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure *closure) { + grpc_lfev_notify_on(exec_ctx, &fd->read_closure, closure, "read"); +} + +static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure *closure) { + grpc_lfev_notify_on(exec_ctx, &fd->write_closure, closure, "write"); +} + +/******************************************************************************* + * Pollset Definitions + */ +GPR_TLS_DECL(g_current_thread_pollset); +GPR_TLS_DECL(g_current_thread_worker); +static __thread bool g_initialized_sigmask; +static __thread sigset_t g_orig_sigmask; + +static void sig_handler(int sig_num) { +#ifdef GRPC_EPOLL_DEBUG + gpr_log(GPR_INFO, "Received signal %d", sig_num); +#endif +} + +static void poller_kick_init() { signal(grpc_wakeup_signal, sig_handler); } + +/* Global state management */ +static grpc_error *pollset_global_init(void) { + gpr_tls_init(&g_current_thread_pollset); + gpr_tls_init(&g_current_thread_worker); + poller_kick_init(); + return GRPC_ERROR_NONE; +} + +static void pollset_global_shutdown(void) { + gpr_tls_destroy(&g_current_thread_pollset); + gpr_tls_destroy(&g_current_thread_worker); +} + +static grpc_error *pollset_worker_kick(grpc_pollset_worker *worker) { + grpc_error *err = GRPC_ERROR_NONE; + + /* Kick the worker only if it was not already kicked */ + if (gpr_atm_no_barrier_cas(&worker->is_kicked, (gpr_atm)0, (gpr_atm)1)) { + GRPC_POLLING_TRACE( + "pollset_worker_kick: Kicking worker: %p (thread id: %ld)", + (void *)worker, (long int)worker->pt_id); + int err_num = pthread_kill(worker->pt_id, grpc_wakeup_signal); + if (err_num != 0) { + err = GRPC_OS_ERROR(err_num, "pthread_kill"); + } + } + return err; +} + +/* Return 1 if the pollset has active threads in pollset_work (pollset must + * be locked) */ +static int pollset_has_workers(grpc_pollset *p) { + return p->root_worker.next != &p->root_worker; +} + +static void remove_worker(grpc_pollset *p, grpc_pollset_worker *worker) { + worker->prev->next = worker->next; + worker->next->prev = worker->prev; +} + +static grpc_pollset_worker *pop_front_worker(grpc_pollset *p) { + if (pollset_has_workers(p)) { + grpc_pollset_worker *w = p->root_worker.next; + remove_worker(p, w); + return w; + } else { + return NULL; + } +} + +static void push_back_worker(grpc_pollset *p, grpc_pollset_worker *worker) { + worker->next = &p->root_worker; + worker->prev = worker->next->prev; + worker->prev->next = worker->next->prev = worker; +} + +static void push_front_worker(grpc_pollset *p, grpc_pollset_worker *worker) { + worker->prev = &p->root_worker; + worker->next = worker->prev->next; + worker->prev->next = worker->next->prev = worker; +} + +/* p->mu must be held before calling this function */ +static grpc_error *pollset_kick(grpc_exec_ctx *exec_ctx, grpc_pollset *p, + grpc_pollset_worker *specific_worker) { + GPR_TIMER_BEGIN("pollset_kick", 0); + grpc_error *error = GRPC_ERROR_NONE; + GRPC_STATS_INC_POLLSET_KICK(exec_ctx); + const char *err_desc = "Kick Failure"; + grpc_pollset_worker *worker = specific_worker; + if (worker != NULL) { + if (worker == GRPC_POLLSET_KICK_BROADCAST) { + if (pollset_has_workers(p)) { + GPR_TIMER_BEGIN("pollset_kick.broadcast", 0); + for (worker = p->root_worker.next; worker != &p->root_worker; + worker = worker->next) { + if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) { + append_error(&error, pollset_worker_kick(worker), err_desc); + } + } + GPR_TIMER_END("pollset_kick.broadcast", 0); + } else { + p->kicked_without_pollers = true; + } + } else { + GPR_TIMER_MARK("kicked_specifically", 0); + if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) { + append_error(&error, pollset_worker_kick(worker), err_desc); + } + } + } else if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)p) { + /* Since worker == NULL, it means that we can kick "any" worker on this + pollset 'p'. If 'p' happens to be the same pollset this thread is + currently polling (i.e in pollset_work() function), then there is no need + to kick any other worker since the current thread can just absorb the + kick. This is the reason why we enter this case only when + g_current_thread_pollset is != p */ + + GPR_TIMER_MARK("kick_anonymous", 0); + worker = pop_front_worker(p); + if (worker != NULL) { + GPR_TIMER_MARK("finally_kick", 0); + push_back_worker(p, worker); + append_error(&error, pollset_worker_kick(worker), err_desc); + } else { + GPR_TIMER_MARK("kicked_no_pollers", 0); + p->kicked_without_pollers = true; + } + } + + GPR_TIMER_END("pollset_kick", 0); + GRPC_LOG_IF_ERROR("pollset_kick", GRPC_ERROR_REF(error)); + return error; +} + +static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { + gpr_mu_init(&pollset->po.mu); + *mu = &pollset->po.mu; + pollset->po.pi = NULL; +#ifndef NDEBUG + pollset->po.obj_type = POLL_OBJ_POLLSET; +#endif + + pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker; + pollset->kicked_without_pollers = false; + + pollset->shutting_down = false; + pollset->finish_shutdown_called = false; + pollset->shutdown_done = NULL; +} + +/* Convert a timespec to milliseconds: + - Very small or negative poll times are clamped to zero to do a non-blocking + poll (which becomes spin polling) + - Other small values are rounded up to one millisecond + - Longer than a millisecond polls are rounded up to the next nearest + millisecond to avoid spinning + - Infinite timeouts are converted to -1 */ +static int poll_deadline_to_millis_timeout(gpr_timespec deadline, + gpr_timespec now) { + gpr_timespec timeout; + static const int64_t max_spin_polling_us = 10; + if (gpr_time_cmp(deadline, gpr_inf_future(deadline.clock_type)) == 0) { + return -1; + } + + if (gpr_time_cmp(deadline, gpr_time_add(now, gpr_time_from_micros( + max_spin_polling_us, + GPR_TIMESPAN))) <= 0) { + return 0; + } + timeout = gpr_time_sub(deadline, now); + int millis = gpr_time_to_millis(gpr_time_add( + timeout, gpr_time_from_nanos(GPR_NS_PER_MS - 1, GPR_TIMESPAN))); + return millis >= 1 ? millis : 1; +} + +static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_pollset *notifier) { + grpc_lfev_set_ready(exec_ctx, &fd->read_closure, "read"); + + /* Note, it is possible that fd_become_readable might be called twice with + different 'notifier's when an fd becomes readable and it is in two epoll + sets (This can happen briefly during polling island merges). In such cases + it does not really matter which notifer is set as the read_notifier_pollset + (They would both point to the same polling island anyway) */ + /* Use release store to match with acquire load in fd_get_read_notifier */ + gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier); +} + +static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { + grpc_lfev_set_ready(exec_ctx, &fd->write_closure, "write"); +} + +static void pollset_release_polling_island(grpc_exec_ctx *exec_ctx, + grpc_pollset *ps, + const char *reason) { + if (ps->po.pi != NULL) { + PI_UNREF(exec_ctx, ps->po.pi, reason); + } + ps->po.pi = NULL; +} + +static void finish_shutdown_locked(grpc_exec_ctx *exec_ctx, + grpc_pollset *pollset) { + /* The pollset cannot have any workers if we are at this stage */ + GPR_ASSERT(!pollset_has_workers(pollset)); + + pollset->finish_shutdown_called = true; + + /* Release the ref and set pollset->po.pi to NULL */ + pollset_release_polling_island(exec_ctx, pollset, "ps_shutdown"); + GRPC_CLOSURE_SCHED(exec_ctx, pollset->shutdown_done, GRPC_ERROR_NONE); +} + +/* pollset->po.mu lock must be held by the caller before calling this */ +static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_closure *closure) { + GPR_TIMER_BEGIN("pollset_shutdown", 0); + GPR_ASSERT(!pollset->shutting_down); + pollset->shutting_down = true; + pollset->shutdown_done = closure; + pollset_kick(exec_ctx, pollset, GRPC_POLLSET_KICK_BROADCAST); + + /* If the pollset has any workers, we cannot call finish_shutdown_locked() + because it would release the underlying polling island. In such a case, we + let the last worker call finish_shutdown_locked() from pollset_work() */ + if (!pollset_has_workers(pollset)) { + GPR_ASSERT(!pollset->finish_shutdown_called); + GPR_TIMER_MARK("pollset_shutdown.finish_shutdown_locked", 0); + finish_shutdown_locked(exec_ctx, pollset); + } + GPR_TIMER_END("pollset_shutdown", 0); +} + +/* pollset_shutdown is guaranteed to be called before pollset_destroy. So other + * than destroying the mutexes, there is nothing special that needs to be done + * here */ +static void pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) { + GPR_ASSERT(!pollset_has_workers(pollset)); + gpr_mu_destroy(&pollset->po.mu); +} + +#define GRPC_EPOLL_MAX_EVENTS 100 +/* Note: sig_mask contains the signal mask to use *during* epoll_wait() */ +static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx, + grpc_pollset *pollset, + grpc_pollset_worker *worker, int timeout_ms, + sigset_t *sig_mask, grpc_error **error) { + struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS]; + int epoll_fd = -1; + int ep_rv; + polling_island *pi = NULL; + char *err_msg; + const char *err_desc = "pollset_work_and_unlock"; + GPR_TIMER_BEGIN("pollset_work_and_unlock", 0); + + /* We need to get the epoll_fd to wait on. The epoll_fd is in inside the + latest polling island pointed by pollset->po.pi + + Since epoll_fd is immutable, we can read it without obtaining the polling + island lock. There is however a possibility that the polling island (from + which we got the epoll_fd) got merged with another island while we are + in this function. This is still okay because in such a case, we will wakeup + right-away from epoll_wait() and pick up the latest polling_island the next + this function (i.e pollset_work_and_unlock()) is called */ + + if (pollset->po.pi == NULL) { + pollset->po.pi = polling_island_create(exec_ctx, NULL, error); + if (pollset->po.pi == NULL) { + GPR_TIMER_END("pollset_work_and_unlock", 0); + return; /* Fatal error. We cannot continue */ + } + + PI_ADD_REF(pollset->po.pi, "ps"); + GRPC_POLLING_TRACE("pollset_work: pollset: %p created new pi: %p", + (void *)pollset, (void *)pollset->po.pi); + } + + pi = polling_island_maybe_get_latest(pollset->po.pi); + epoll_fd = pi->epoll_fd; + + /* Update the pollset->po.pi since the island being pointed by + pollset->po.pi maybe older than the one pointed by pi) */ + if (pollset->po.pi != pi) { + /* Always do PI_ADD_REF before PI_UNREF because PI_UNREF may cause the + polling island to be deleted */ + PI_ADD_REF(pi, "ps"); + PI_UNREF(exec_ctx, pollset->po.pi, "ps"); + pollset->po.pi = pi; + } + + /* Add an extra ref so that the island does not get destroyed (which means + the epoll_fd won't be closed) while we are are doing an epoll_wait() on the + epoll_fd */ + PI_ADD_REF(pi, "ps_work"); + gpr_mu_unlock(&pollset->po.mu); + + gpr_atm_no_barrier_fetch_add(&pi->poller_count, 1); + g_current_thread_polling_island = pi; + + GRPC_SCHEDULING_START_BLOCKING_REGION; + GRPC_STATS_INC_SYSCALL_POLL(exec_ctx); + ep_rv = + epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms, sig_mask); + GRPC_SCHEDULING_END_BLOCKING_REGION; + if (ep_rv < 0) { + if (errno != EINTR) { + gpr_asprintf(&err_msg, + "epoll_wait() epoll fd: %d failed with error: %d (%s)", + epoll_fd, errno, strerror(errno)); + append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc); + } else { + /* We were interrupted. Save an interation by doing a zero timeout + epoll_wait to see if there are any other events of interest */ + GRPC_POLLING_TRACE("pollset_work: pollset: %p, worker: %p received kick", + (void *)pollset, (void *)worker); + ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0); + } + } + +#ifdef GRPC_TSAN + /* See the definition of g_poll_sync for more details */ + gpr_atm_acq_load(&g_epoll_sync); +#endif /* defined(GRPC_TSAN) */ + + for (int i = 0; i < ep_rv; ++i) { + void *data_ptr = ep_ev[i].data.ptr; + if (data_ptr == &polling_island_wakeup_fd) { + GRPC_POLLING_TRACE( + "pollset_work: pollset: %p, worker: %p polling island (epoll_fd: " + "%d) got merged", + (void *)pollset, (void *)worker, epoll_fd); + /* This means that our polling island is merged with a different + island. We do not have to do anything here since the subsequent call + to the function pollset_work_and_unlock() will pick up the correct + epoll_fd */ + } else { + grpc_fd *fd = (grpc_fd *)data_ptr; + int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP); + int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI); + int write_ev = ep_ev[i].events & EPOLLOUT; + if (read_ev || cancel) { + fd_become_readable(exec_ctx, fd, pollset); + } + if (write_ev || cancel) { + fd_become_writable(exec_ctx, fd); + } + } + } + + g_current_thread_polling_island = NULL; + gpr_atm_no_barrier_fetch_add(&pi->poller_count, -1); + + GPR_ASSERT(pi != NULL); + + /* Before leaving, release the extra ref we added to the polling island. It + is important to use "pi" here (i.e our old copy of pollset->po.pi + that we got before releasing the polling island lock). This is because + pollset->po.pi pointer might get udpated in other parts of the + code when there is an island merge while we are doing epoll_wait() above */ + PI_UNREF(exec_ctx, pi, "ps_work"); + + GPR_TIMER_END("pollset_work_and_unlock", 0); +} + +/* pollset->po.mu lock must be held by the caller before calling this. + The function pollset_work() may temporarily release the lock (pollset->po.mu) + during the course of its execution but it will always re-acquire the lock and + ensure that it is held by the time the function returns */ +static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_pollset_worker **worker_hdl, + gpr_timespec now, gpr_timespec deadline) { + GPR_TIMER_BEGIN("pollset_work", 0); + grpc_error *error = GRPC_ERROR_NONE; + int timeout_ms = poll_deadline_to_millis_timeout(deadline, now); + + sigset_t new_mask; + + grpc_pollset_worker worker; + worker.next = worker.prev = NULL; + worker.pt_id = pthread_self(); + gpr_atm_no_barrier_store(&worker.is_kicked, (gpr_atm)0); + + if (worker_hdl) *worker_hdl = &worker; + + gpr_tls_set(&g_current_thread_pollset, (intptr_t)pollset); + gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker); + + if (pollset->kicked_without_pollers) { + /* If the pollset was kicked without pollers, pretend that the current + worker got the kick and skip polling. A kick indicates that there is some + work that needs attention like an event on the completion queue or an + alarm */ + GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0); + pollset->kicked_without_pollers = 0; + } else if (!pollset->shutting_down) { + /* We use the posix-signal with number 'grpc_wakeup_signal' for waking up + (i.e 'kicking') a worker in the pollset. A 'kick' is a way to inform the + worker that there is some pending work that needs immediate attention + (like an event on the completion queue, or a polling island merge that + results in a new epoll-fd to wait on) and that the worker should not + spend time waiting in epoll_pwait(). + + A worker can be kicked anytime from the point it is added to the pollset + via push_front_worker() (or push_back_worker()) to the point it is + removed via remove_worker(). + If the worker is kicked before/during it calls epoll_pwait(), it should + immediately exit from epoll_wait(). If the worker is kicked after it + returns from epoll_wait(), then nothing really needs to be done. + + To accomplish this, we mask 'grpc_wakeup_signal' on this thread at all + times *except* when it is in epoll_pwait(). This way, the worker never + misses acting on a kick */ + + if (!g_initialized_sigmask) { + sigemptyset(&new_mask); + sigaddset(&new_mask, grpc_wakeup_signal); + pthread_sigmask(SIG_BLOCK, &new_mask, &g_orig_sigmask); + sigdelset(&g_orig_sigmask, grpc_wakeup_signal); + g_initialized_sigmask = true; + /* new_mask: The new thread mask which blocks 'grpc_wakeup_signal'. + This is the mask used at all times *except during + epoll_wait()*" + g_orig_sigmask: The thread mask which allows 'grpc_wakeup_signal' and + this is the mask to use *during epoll_wait()* + + The new_mask is set on the worker before it is added to the pollset + (i.e before it can be kicked) */ + } + + push_front_worker(pollset, &worker); /* Add worker to pollset */ + + pollset_work_and_unlock(exec_ctx, pollset, &worker, timeout_ms, + &g_orig_sigmask, &error); + grpc_exec_ctx_flush(exec_ctx); + + gpr_mu_lock(&pollset->po.mu); + + /* Note: There is no need to reset worker.is_kicked to 0 since we are no + longer going to use this worker */ + remove_worker(pollset, &worker); + } + + /* If we are the last worker on the pollset (i.e pollset_has_workers() is + false at this point) and the pollset is shutting down, we may have to + finish the shutdown process by calling finish_shutdown_locked(). + See pollset_shutdown() for more details. + + Note: Continuing to access pollset here is safe; it is the caller's + responsibility to not destroy a pollset when it has outstanding calls to + pollset_work() */ + if (pollset->shutting_down && !pollset_has_workers(pollset) && + !pollset->finish_shutdown_called) { + GPR_TIMER_MARK("pollset_work.finish_shutdown_locked", 0); + finish_shutdown_locked(exec_ctx, pollset); + + gpr_mu_unlock(&pollset->po.mu); + grpc_exec_ctx_flush(exec_ctx); + gpr_mu_lock(&pollset->po.mu); + } + + if (worker_hdl) *worker_hdl = NULL; + + gpr_tls_set(&g_current_thread_pollset, (intptr_t)0); + gpr_tls_set(&g_current_thread_worker, (intptr_t)0); + + GPR_TIMER_END("pollset_work", 0); + + GRPC_LOG_IF_ERROR("pollset_work", GRPC_ERROR_REF(error)); + return error; +} + +static void add_poll_object(grpc_exec_ctx *exec_ctx, poll_obj *bag, + poll_obj_type bag_type, poll_obj *item, + poll_obj_type item_type) { + GPR_TIMER_BEGIN("add_poll_object", 0); + +#ifndef NDEBUG + GPR_ASSERT(item->obj_type == item_type); + GPR_ASSERT(bag->obj_type == bag_type); +#endif + + grpc_error *error = GRPC_ERROR_NONE; + polling_island *pi_new = NULL; + + gpr_mu_lock(&bag->mu); + gpr_mu_lock(&item->mu); + +retry: + /* + * 1) If item->pi and bag->pi are both non-NULL and equal, do nothing + * 2) If item->pi and bag->pi are both NULL, create a new polling island (with + * a refcount of 2) and point item->pi and bag->pi to the new island + * 3) If exactly one of item->pi or bag->pi is NULL, update it to point to + * the other's non-NULL pi + * 4) Finally if item->pi and bag-pi are non-NULL and not-equal, merge the + * polling islands and update item->pi and bag->pi to point to the new + * island + */ + + /* Early out if we are trying to add an 'fd' to a 'bag' but the fd is already + * orphaned */ + if (item_type == POLL_OBJ_FD && (FD_FROM_PO(item))->orphaned) { + gpr_mu_unlock(&item->mu); + gpr_mu_unlock(&bag->mu); + return; + } + + if (item->pi == bag->pi) { + pi_new = item->pi; + if (pi_new == NULL) { + /* GPR_ASSERT(item->pi == bag->pi == NULL) */ + + /* If we are adding an fd to a bag (i.e pollset or pollset_set), then + * we need to do some extra work to make TSAN happy */ + if (item_type == POLL_OBJ_FD) { + /* Unlock before creating a new polling island: the polling island will + create a workqueue which creates a file descriptor, and holding an fd + lock here can eventually cause a loop to appear to TSAN (making it + unhappy). We don't think it's a real loop (there's an epoch point + where that loop possibility disappears), but the advantages of + keeping TSAN happy outweigh any performance advantage we might have + by keeping the lock held. */ + gpr_mu_unlock(&item->mu); + pi_new = polling_island_create(exec_ctx, FD_FROM_PO(item), &error); + gpr_mu_lock(&item->mu); + + /* Need to reverify any assumptions made between the initial lock and + getting to this branch: if they've changed, we need to throw away our + work and figure things out again. */ + if (item->pi != NULL) { + GRPC_POLLING_TRACE( + "add_poll_object: Raced creating new polling island. pi_new: %p " + "(fd: %d, %s: %p)", + (void *)pi_new, FD_FROM_PO(item)->fd, poll_obj_string(bag_type), + (void *)bag); + /* No need to lock 'pi_new' here since this is a new polling island + and no one has a reference to it yet */ + polling_island_remove_all_fds_locked(pi_new, true, &error); + + /* Ref and unref so that the polling island gets deleted during unref + */ + PI_ADD_REF(pi_new, "dance_of_destruction"); + PI_UNREF(exec_ctx, pi_new, "dance_of_destruction"); + goto retry; + } + } else { + pi_new = polling_island_create(exec_ctx, NULL, &error); + } + + GRPC_POLLING_TRACE( + "add_poll_object: Created new polling island. pi_new: %p (%s: %p, " + "%s: %p)", + (void *)pi_new, poll_obj_string(item_type), (void *)item, + poll_obj_string(bag_type), (void *)bag); + } else { + GRPC_POLLING_TRACE( + "add_poll_object: Same polling island. pi: %p (%s, %s)", + (void *)pi_new, poll_obj_string(item_type), + poll_obj_string(bag_type)); + } + } else if (item->pi == NULL) { + /* GPR_ASSERT(bag->pi != NULL) */ + /* Make pi_new point to latest pi*/ + pi_new = polling_island_lock(bag->pi); + + if (item_type == POLL_OBJ_FD) { + grpc_fd *fd = FD_FROM_PO(item); + polling_island_add_fds_locked(pi_new, &fd, 1, true, &error); + } + + gpr_mu_unlock(&pi_new->mu); + GRPC_POLLING_TRACE( + "add_poll_obj: item->pi was NULL. pi_new: %p (item(%s): %p, " + "bag(%s): %p)", + (void *)pi_new, poll_obj_string(item_type), (void *)item, + poll_obj_string(bag_type), (void *)bag); + } else if (bag->pi == NULL) { + /* GPR_ASSERT(item->pi != NULL) */ + /* Make pi_new to point to latest pi */ + pi_new = polling_island_lock(item->pi); + gpr_mu_unlock(&pi_new->mu); + GRPC_POLLING_TRACE( + "add_poll_obj: bag->pi was NULL. pi_new: %p (item(%s): %p, " + "bag(%s): %p)", + (void *)pi_new, poll_obj_string(item_type), (void *)item, + poll_obj_string(bag_type), (void *)bag); + } else { + pi_new = polling_island_merge(item->pi, bag->pi, &error); + GRPC_POLLING_TRACE( + "add_poll_obj: polling islands merged. pi_new: %p (item(%s): %p, " + "bag(%s): %p)", + (void *)pi_new, poll_obj_string(item_type), (void *)item, + poll_obj_string(bag_type), (void *)bag); + } + + /* At this point, pi_new is the polling island that both item->pi and bag->pi + MUST be pointing to */ + + if (item->pi != pi_new) { + PI_ADD_REF(pi_new, poll_obj_string(item_type)); + if (item->pi != NULL) { + PI_UNREF(exec_ctx, item->pi, poll_obj_string(item_type)); + } + item->pi = pi_new; + } + + if (bag->pi != pi_new) { + PI_ADD_REF(pi_new, poll_obj_string(bag_type)); + if (bag->pi != NULL) { + PI_UNREF(exec_ctx, bag->pi, poll_obj_string(bag_type)); + } + bag->pi = pi_new; + } + + gpr_mu_unlock(&item->mu); + gpr_mu_unlock(&bag->mu); + + GRPC_LOG_IF_ERROR("add_poll_object", error); + GPR_TIMER_END("add_poll_object", 0); +} + +static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_fd *fd) { + add_poll_object(exec_ctx, &pollset->po, POLL_OBJ_POLLSET, &fd->po, + POLL_OBJ_FD); +} + +/******************************************************************************* + * Pollset-set Definitions + */ + +static grpc_pollset_set *pollset_set_create(void) { + grpc_pollset_set *pss = (grpc_pollset_set *)gpr_malloc(sizeof(*pss)); + gpr_mu_init(&pss->po.mu); + pss->po.pi = NULL; +#ifndef NDEBUG + pss->po.obj_type = POLL_OBJ_POLLSET_SET; +#endif + return pss; +} + +static void pollset_set_destroy(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pss) { + gpr_mu_destroy(&pss->po.mu); + + if (pss->po.pi != NULL) { + PI_UNREF(exec_ctx, pss->po.pi, "pss_destroy"); + } + + gpr_free(pss); +} + +static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss, + grpc_fd *fd) { + add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &fd->po, + POLL_OBJ_FD); +} + +static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss, + grpc_fd *fd) { + /* Nothing to do */ +} + +static void pollset_set_add_pollset(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pss, grpc_pollset *ps) { + add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &ps->po, + POLL_OBJ_POLLSET); +} + +static void pollset_set_del_pollset(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pss, grpc_pollset *ps) { + /* Nothing to do */ +} + +static void pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *bag, + grpc_pollset_set *item) { + add_poll_object(exec_ctx, &bag->po, POLL_OBJ_POLLSET_SET, &item->po, + POLL_OBJ_POLLSET_SET); +} + +static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *bag, + grpc_pollset_set *item) { + /* Nothing to do */ +} + +/* Test helper functions + * */ +void *grpc_fd_get_polling_island(grpc_fd *fd) { + polling_island *pi; + + gpr_mu_lock(&fd->po.mu); + pi = fd->po.pi; + gpr_mu_unlock(&fd->po.mu); + + return pi; +} + +void *grpc_pollset_get_polling_island(grpc_pollset *ps) { + polling_island *pi; + + gpr_mu_lock(&ps->po.mu); + pi = ps->po.pi; + gpr_mu_unlock(&ps->po.mu); + + return pi; +} + +bool grpc_are_polling_islands_equal(void *p, void *q) { + polling_island *p1 = (polling_island *)p; + polling_island *p2 = (polling_island *)q; + + /* Note: polling_island_lock_pair() may change p1 and p2 to point to the + latest polling islands in their respective linked lists */ + polling_island_lock_pair(&p1, &p2); + polling_island_unlock_pair(p1, p2); + + return p1 == p2; +} + +/******************************************************************************* + * Event engine binding + */ + +static void shutdown_engine(void) { + fd_global_shutdown(); + pollset_global_shutdown(); + polling_island_global_shutdown(); +} + +static const grpc_event_engine_vtable vtable = { + sizeof(grpc_pollset), + + fd_create, + fd_wrapped_fd, + fd_orphan, + fd_shutdown, + fd_notify_on_read, + fd_notify_on_write, + fd_is_shutdown, + fd_get_read_notifier_pollset, + + pollset_init, + pollset_shutdown, + pollset_destroy, + pollset_work, + pollset_kick, + pollset_add_fd, + + pollset_set_create, + pollset_set_destroy, + pollset_set_add_pollset, + pollset_set_del_pollset, + pollset_set_add_pollset_set, + pollset_set_del_pollset_set, + pollset_set_add_fd, + pollset_set_del_fd, + + shutdown_engine, +}; + +/* It is possible that GLIBC has epoll but the underlying kernel doesn't. + * Create a dummy epoll_fd to make sure epoll support is available */ +static bool is_epoll_available() { + int fd = epoll_create1(EPOLL_CLOEXEC); + if (fd < 0) { + gpr_log( + GPR_ERROR, + "epoll_create1 failed with error: %d. Not using epoll polling engine", + fd); + return false; + } + close(fd); + return true; +} + +const grpc_event_engine_vtable *grpc_init_epollsig_linux( + bool explicit_request) { + /* If use of signals is disabled, we cannot use epoll engine*/ + if (is_grpc_wakeup_signal_initialized && grpc_wakeup_signal < 0) { + return NULL; + } + + if (!grpc_has_wakeup_fd()) { + return NULL; + } + + if (!is_epoll_available()) { + return NULL; + } + + if (!is_grpc_wakeup_signal_initialized) { + if (explicit_request) { + grpc_use_signal(SIGRTMIN + 6); + } else { + return NULL; + } + } + + fd_global_init(); + + if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) { + return NULL; + } + + if (!GRPC_LOG_IF_ERROR("polling_island_global_init", + polling_island_global_init())) { + return NULL; + } + + return &vtable; +} + +#else /* defined(GRPC_LINUX_EPOLL) */ +#if defined(GRPC_POSIX_SOCKET) +#include "src/core/lib/iomgr/ev_posix.h" +/* If GRPC_LINUX_EPOLL is not defined, it means epoll is not available. Return + * NULL */ +const grpc_event_engine_vtable *grpc_init_epollsig_linux( + bool explicit_request) { + return NULL; +} +#endif /* defined(GRPC_POSIX_SOCKET) */ + +void grpc_use_signal(int signum) {} +#endif /* !defined(GRPC_LINUX_EPOLL) */ diff --git a/src/core/lib/iomgr/ev_poll_posix.c b/src/core/lib/iomgr/ev_poll_posix.c deleted file mode 100644 index e170702dca..0000000000 --- a/src/core/lib/iomgr/ev_poll_posix.c +++ /dev/null @@ -1,1746 +0,0 @@ -/* - * - * Copyright 2016 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_POSIX_SOCKET - -#include "src/core/lib/iomgr/ev_poll_posix.h" - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "src/core/lib/debug/stats.h" -#include "src/core/lib/iomgr/iomgr_internal.h" -#include "src/core/lib/iomgr/timer.h" -#include "src/core/lib/iomgr/wakeup_fd_cv.h" -#include "src/core/lib/iomgr/wakeup_fd_posix.h" -#include "src/core/lib/profiling/timers.h" -#include "src/core/lib/support/block_annotate.h" -#include "src/core/lib/support/murmur_hash.h" - -#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker *)1) - -/******************************************************************************* - * FD declarations - */ - -typedef struct grpc_fd_watcher { - struct grpc_fd_watcher *next; - struct grpc_fd_watcher *prev; - grpc_pollset *pollset; - grpc_pollset_worker *worker; - grpc_fd *fd; -} grpc_fd_watcher; - -struct grpc_fd { - int fd; - /* refst format: - bit0: 1=active/0=orphaned - bit1-n: refcount - meaning that mostly we ref by two to avoid altering the orphaned bit, - and just unref by 1 when we're ready to flag the object as orphaned */ - gpr_atm refst; - - gpr_mu mu; - int shutdown; - int closed; - int released; - grpc_error *shutdown_error; - - /* The watcher list. - - The following watcher related fields are protected by watcher_mu. - - An fd_watcher is an ephemeral object created when an fd wants to - begin polling, and destroyed after the poll. - - It denotes the fd's interest in whether to read poll or write poll - or both or neither on this fd. - - If a watcher is asked to poll for reads or writes, the read_watcher - or write_watcher fields are set respectively. A watcher may be asked - to poll for both, in which case both fields will be set. - - read_watcher and write_watcher may be NULL if no watcher has been - asked to poll for reads or writes. - - If an fd_watcher is not asked to poll for reads or writes, it's added - to a linked list of inactive watchers, rooted at inactive_watcher_root. - If at a later time there becomes need of a poller to poll, one of - the inactive pollers may be kicked out of their poll loops to take - that responsibility. */ - grpc_fd_watcher inactive_watcher_root; - grpc_fd_watcher *read_watcher; - grpc_fd_watcher *write_watcher; - - grpc_closure *read_closure; - grpc_closure *write_closure; - - grpc_closure *on_done_closure; - - grpc_iomgr_object iomgr_object; - - /* The pollset that last noticed and notified that the fd is readable */ - grpc_pollset *read_notifier_pollset; -}; - -/* Begin polling on an fd. - Registers that the given pollset is interested in this fd - so that if read - or writability interest changes, the pollset can be kicked to pick up that - new interest. - Return value is: - (fd_needs_read? read_mask : 0) | (fd_needs_write? write_mask : 0) - i.e. a combination of read_mask and write_mask determined by the fd's current - interest in said events. - Polling strategies that do not need to alter their behavior depending on the - fd's current interest (such as epoll) do not need to call this function. - MUST NOT be called with a pollset lock taken */ -static uint32_t fd_begin_poll(grpc_fd *fd, grpc_pollset *pollset, - grpc_pollset_worker *worker, uint32_t read_mask, - uint32_t write_mask, grpc_fd_watcher *rec); -/* Complete polling previously started with fd_begin_poll - MUST NOT be called with a pollset lock taken - if got_read or got_write are 1, also does the become_{readable,writable} as - appropriate. */ -static void fd_end_poll(grpc_exec_ctx *exec_ctx, grpc_fd_watcher *rec, - int got_read, int got_write, - grpc_pollset *read_notifier_pollset); - -/* Return 1 if this fd is orphaned, 0 otherwise */ -static bool fd_is_orphaned(grpc_fd *fd); - -#ifndef NDEBUG -static void fd_ref(grpc_fd *fd, const char *reason, const char *file, int line); -static void fd_unref(grpc_fd *fd, const char *reason, const char *file, - int line); -#define GRPC_FD_REF(fd, reason) fd_ref(fd, reason, __FILE__, __LINE__) -#define GRPC_FD_UNREF(fd, reason) fd_unref(fd, reason, __FILE__, __LINE__) -#else -static void fd_ref(grpc_fd *fd); -static void fd_unref(grpc_fd *fd); -#define GRPC_FD_REF(fd, reason) fd_ref(fd) -#define GRPC_FD_UNREF(fd, reason) fd_unref(fd) -#endif - -#define CLOSURE_NOT_READY ((grpc_closure *)0) -#define CLOSURE_READY ((grpc_closure *)1) - -/******************************************************************************* - * pollset declarations - */ - -typedef struct grpc_cached_wakeup_fd { - grpc_wakeup_fd fd; - struct grpc_cached_wakeup_fd *next; -} grpc_cached_wakeup_fd; - -struct grpc_pollset_worker { - grpc_cached_wakeup_fd *wakeup_fd; - int reevaluate_polling_on_wakeup; - int kicked_specifically; - struct grpc_pollset_worker *next; - struct grpc_pollset_worker *prev; -}; - -struct grpc_pollset { - gpr_mu mu; - grpc_pollset_worker root_worker; - int shutting_down; - int called_shutdown; - int kicked_without_pollers; - grpc_closure *shutdown_done; - grpc_closure_list idle_jobs; - int pollset_set_count; - /* all polled fds */ - size_t fd_count; - size_t fd_capacity; - grpc_fd **fds; - /* Local cache of eventfds for workers */ - grpc_cached_wakeup_fd *local_wakeup_cache; -}; - -/* Add an fd to a pollset */ -static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - struct grpc_fd *fd); - -static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pollset_set, grpc_fd *fd); - -/* Convert a timespec to milliseconds: - - very small or negative poll times are clamped to zero to do a - non-blocking poll (which becomes spin polling) - - other small values are rounded up to one millisecond - - longer than a millisecond polls are rounded up to the next nearest - millisecond to avoid spinning - - infinite timeouts are converted to -1 */ -static int poll_deadline_to_millis_timeout(gpr_timespec deadline, - gpr_timespec now); - -/* Allow kick to wakeup the currently polling worker */ -#define GRPC_POLLSET_CAN_KICK_SELF 1 -/* Force the wakee to repoll when awoken */ -#define GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP 2 -/* As per pollset_kick, with an extended set of flags (defined above) - -- mostly for fd_posix's use. */ -static grpc_error *pollset_kick_ext(grpc_exec_ctx *exec_ctx, grpc_pollset *p, - grpc_pollset_worker *specific_worker, - uint32_t flags) GRPC_MUST_USE_RESULT; - -/* Return 1 if the pollset has active threads in pollset_work (pollset must - * be locked) */ -static bool pollset_has_workers(grpc_pollset *pollset); - -/******************************************************************************* - * pollset_set definitions - */ - -struct grpc_pollset_set { - gpr_mu mu; - - size_t pollset_count; - size_t pollset_capacity; - grpc_pollset **pollsets; - - size_t pollset_set_count; - size_t pollset_set_capacity; - struct grpc_pollset_set **pollset_sets; - - size_t fd_count; - size_t fd_capacity; - grpc_fd **fds; -}; - -/******************************************************************************* - * condition variable polling definitions - */ - -#define POLLCV_THREAD_GRACE_MS 1000 -#define CV_POLL_PERIOD_MS 1000 -#define CV_DEFAULT_TABLE_SIZE 16 - -typedef struct poll_result { - gpr_refcount refcount; - cv_node *watchers; - int watchcount; - struct pollfd *fds; - nfds_t nfds; - int retval; - int err; - int completed; -} poll_result; - -typedef struct poll_args { - gpr_cv trigger; - int trigger_set; - struct pollfd *fds; - nfds_t nfds; - poll_result *result; - struct poll_args *next; - struct poll_args *prev; -} poll_args; - -// This is a 2-tiered cache, we mantain a hash table -// of active poll calls, so we can wait on the result -// of that call. We also maintain a freelist of inactive -// poll threads. -typedef struct poll_hash_table { - poll_args *free_pollers; - poll_args **active_pollers; - unsigned int size; - unsigned int count; -} poll_hash_table; - -poll_hash_table poll_cache; -cv_fd_table g_cvfds; - -/******************************************************************************* - * fd_posix.c - */ - -#ifndef NDEBUG -#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__) -#define UNREF_BY(fd, n, reason) unref_by(fd, n, reason, __FILE__, __LINE__) -static void ref_by(grpc_fd *fd, int n, const char *reason, const char *file, - int line) { - if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) { - gpr_log(GPR_DEBUG, - "FD %d %p ref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]", - fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst), - gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line); - } -#else -#define REF_BY(fd, n, reason) ref_by(fd, n) -#define UNREF_BY(fd, n, reason) unref_by(fd, n) -static void ref_by(grpc_fd *fd, int n) { -#endif - GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0); -} - -#ifndef NDEBUG -static void unref_by(grpc_fd *fd, int n, const char *reason, const char *file, - int line) { - if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) { - gpr_log(GPR_DEBUG, - "FD %d %p unref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]", - fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst), - gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line); - } -#else -static void unref_by(grpc_fd *fd, int n) { -#endif - gpr_atm old = gpr_atm_full_fetch_add(&fd->refst, -n); - if (old == n) { - gpr_mu_destroy(&fd->mu); - grpc_iomgr_unregister_object(&fd->iomgr_object); - if (fd->shutdown) GRPC_ERROR_UNREF(fd->shutdown_error); - gpr_free(fd); - } else { - GPR_ASSERT(old > n); - } -} - -static grpc_fd *fd_create(int fd, const char *name) { - grpc_fd *r = (grpc_fd *)gpr_malloc(sizeof(*r)); - gpr_mu_init(&r->mu); - gpr_atm_rel_store(&r->refst, 1); - r->shutdown = 0; - r->read_closure = CLOSURE_NOT_READY; - r->write_closure = CLOSURE_NOT_READY; - r->fd = fd; - r->inactive_watcher_root.next = r->inactive_watcher_root.prev = - &r->inactive_watcher_root; - r->read_watcher = r->write_watcher = NULL; - r->on_done_closure = NULL; - r->closed = 0; - r->released = 0; - r->read_notifier_pollset = NULL; - - char *name2; - gpr_asprintf(&name2, "%s fd=%d", name, fd); - grpc_iomgr_register_object(&r->iomgr_object, name2); - gpr_free(name2); - return r; -} - -static bool fd_is_orphaned(grpc_fd *fd) { - return (gpr_atm_acq_load(&fd->refst) & 1) == 0; -} - -/* Return the read-notifier pollset */ -static grpc_pollset *fd_get_read_notifier_pollset(grpc_exec_ctx *exec_ctx, - grpc_fd *fd) { - grpc_pollset *notifier = NULL; - - gpr_mu_lock(&fd->mu); - notifier = fd->read_notifier_pollset; - gpr_mu_unlock(&fd->mu); - - return notifier; -} - -static grpc_error *pollset_kick_locked(grpc_exec_ctx *exec_ctx, - grpc_fd_watcher *watcher) { - gpr_mu_lock(&watcher->pollset->mu); - GPR_ASSERT(watcher->worker); - grpc_error *err = - pollset_kick_ext(exec_ctx, watcher->pollset, watcher->worker, - GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP); - gpr_mu_unlock(&watcher->pollset->mu); - return err; -} - -static void maybe_wake_one_watcher_locked(grpc_exec_ctx *exec_ctx, - grpc_fd *fd) { - if (fd->inactive_watcher_root.next != &fd->inactive_watcher_root) { - pollset_kick_locked(exec_ctx, fd->inactive_watcher_root.next); - } else if (fd->read_watcher) { - pollset_kick_locked(exec_ctx, fd->read_watcher); - } else if (fd->write_watcher) { - pollset_kick_locked(exec_ctx, fd->write_watcher); - } -} - -static void wake_all_watchers_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { - grpc_fd_watcher *watcher; - for (watcher = fd->inactive_watcher_root.next; - watcher != &fd->inactive_watcher_root; watcher = watcher->next) { - pollset_kick_locked(exec_ctx, watcher); - } - if (fd->read_watcher) { - pollset_kick_locked(exec_ctx, fd->read_watcher); - } - if (fd->write_watcher && fd->write_watcher != fd->read_watcher) { - pollset_kick_locked(exec_ctx, fd->write_watcher); - } -} - -static int has_watchers(grpc_fd *fd) { - return fd->read_watcher != NULL || fd->write_watcher != NULL || - fd->inactive_watcher_root.next != &fd->inactive_watcher_root; -} - -static void close_fd_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { - fd->closed = 1; - if (!fd->released) { - close(fd->fd); - } - GRPC_CLOSURE_SCHED(exec_ctx, fd->on_done_closure, GRPC_ERROR_NONE); -} - -static int fd_wrapped_fd(grpc_fd *fd) { - if (fd->released || fd->closed) { - return -1; - } else { - return fd->fd; - } -} - -static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure *on_done, int *release_fd, - bool already_closed, const char *reason) { - fd->on_done_closure = on_done; - fd->released = release_fd != NULL; - if (release_fd != NULL) { - *release_fd = fd->fd; - fd->released = true; - } else if (already_closed) { - fd->released = true; - } - gpr_mu_lock(&fd->mu); - REF_BY(fd, 1, reason); /* remove active status, but keep referenced */ - if (!has_watchers(fd)) { - close_fd_locked(exec_ctx, fd); - } else { - wake_all_watchers_locked(exec_ctx, fd); - } - gpr_mu_unlock(&fd->mu); - UNREF_BY(fd, 2, reason); /* drop the reference */ -} - -/* increment refcount by two to avoid changing the orphan bit */ -#ifndef NDEBUG -static void fd_ref(grpc_fd *fd, const char *reason, const char *file, - int line) { - ref_by(fd, 2, reason, file, line); -} - -static void fd_unref(grpc_fd *fd, const char *reason, const char *file, - int line) { - unref_by(fd, 2, reason, file, line); -} -#else -static void fd_ref(grpc_fd *fd) { ref_by(fd, 2); } - -static void fd_unref(grpc_fd *fd) { unref_by(fd, 2); } -#endif - -static grpc_error *fd_shutdown_error(grpc_fd *fd) { - if (!fd->shutdown) { - return GRPC_ERROR_NONE; - } else { - return GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( - "FD shutdown", &fd->shutdown_error, 1); - } -} - -static void notify_on_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure **st, grpc_closure *closure) { - if (fd->shutdown) { - GRPC_CLOSURE_SCHED(exec_ctx, closure, - GRPC_ERROR_CREATE_FROM_STATIC_STRING("FD shutdown")); - } else if (*st == CLOSURE_NOT_READY) { - /* not ready ==> switch to a waiting state by setting the closure */ - *st = closure; - } else if (*st == CLOSURE_READY) { - /* already ready ==> queue the closure to run immediately */ - *st = CLOSURE_NOT_READY; - GRPC_CLOSURE_SCHED(exec_ctx, closure, fd_shutdown_error(fd)); - maybe_wake_one_watcher_locked(exec_ctx, fd); - } else { - /* upcallptr was set to a different closure. This is an error! */ - gpr_log(GPR_ERROR, - "User called a notify_on function with a previous callback still " - "pending"); - abort(); - } -} - -/* returns 1 if state becomes not ready */ -static int set_ready_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure **st) { - if (*st == CLOSURE_READY) { - /* duplicate ready ==> ignore */ - return 0; - } else if (*st == CLOSURE_NOT_READY) { - /* not ready, and not waiting ==> flag ready */ - *st = CLOSURE_READY; - return 0; - } else { - /* waiting ==> queue closure */ - GRPC_CLOSURE_SCHED(exec_ctx, *st, fd_shutdown_error(fd)); - *st = CLOSURE_NOT_READY; - return 1; - } -} - -static void set_read_notifier_pollset_locked( - grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_pollset *read_notifier_pollset) { - fd->read_notifier_pollset = read_notifier_pollset; -} - -static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_error *why) { - gpr_mu_lock(&fd->mu); - /* only shutdown once */ - if (!fd->shutdown) { - fd->shutdown = 1; - fd->shutdown_error = why; - /* signal read/write closed to OS so that future operations fail */ - shutdown(fd->fd, SHUT_RDWR); - set_ready_locked(exec_ctx, fd, &fd->read_closure); - set_ready_locked(exec_ctx, fd, &fd->write_closure); - } else { - GRPC_ERROR_UNREF(why); - } - gpr_mu_unlock(&fd->mu); -} - -static bool fd_is_shutdown(grpc_fd *fd) { - gpr_mu_lock(&fd->mu); - bool r = fd->shutdown; - gpr_mu_unlock(&fd->mu); - return r; -} - -static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure *closure) { - gpr_mu_lock(&fd->mu); - notify_on_locked(exec_ctx, fd, &fd->read_closure, closure); - gpr_mu_unlock(&fd->mu); -} - -static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure *closure) { - gpr_mu_lock(&fd->mu); - notify_on_locked(exec_ctx, fd, &fd->write_closure, closure); - gpr_mu_unlock(&fd->mu); -} - -static uint32_t fd_begin_poll(grpc_fd *fd, grpc_pollset *pollset, - grpc_pollset_worker *worker, uint32_t read_mask, - uint32_t write_mask, grpc_fd_watcher *watcher) { - uint32_t mask = 0; - grpc_closure *cur; - int requested; - /* keep track of pollers that have requested our events, in case they change - */ - GRPC_FD_REF(fd, "poll"); - - gpr_mu_lock(&fd->mu); - - /* if we are shutdown, then don't add to the watcher set */ - if (fd->shutdown) { - watcher->fd = NULL; - watcher->pollset = NULL; - watcher->worker = NULL; - gpr_mu_unlock(&fd->mu); - GRPC_FD_UNREF(fd, "poll"); - return 0; - } - - /* if there is nobody polling for read, but we need to, then start doing so */ - cur = fd->read_closure; - requested = cur != CLOSURE_READY; - if (read_mask && fd->read_watcher == NULL && requested) { - fd->read_watcher = watcher; - mask |= read_mask; - } - /* if there is nobody polling for write, but we need to, then start doing so - */ - cur = fd->write_closure; - requested = cur != CLOSURE_READY; - if (write_mask && fd->write_watcher == NULL && requested) { - fd->write_watcher = watcher; - mask |= write_mask; - } - /* if not polling, remember this watcher in case we need someone to later */ - if (mask == 0 && worker != NULL) { - watcher->next = &fd->inactive_watcher_root; - watcher->prev = watcher->next->prev; - watcher->next->prev = watcher->prev->next = watcher; - } - watcher->pollset = pollset; - watcher->worker = worker; - watcher->fd = fd; - gpr_mu_unlock(&fd->mu); - - return mask; -} - -static void fd_end_poll(grpc_exec_ctx *exec_ctx, grpc_fd_watcher *watcher, - int got_read, int got_write, - grpc_pollset *read_notifier_pollset) { - int was_polling = 0; - int kick = 0; - grpc_fd *fd = watcher->fd; - - if (fd == NULL) { - return; - } - - gpr_mu_lock(&fd->mu); - - if (watcher == fd->read_watcher) { - /* remove read watcher, kick if we still need a read */ - was_polling = 1; - if (!got_read) { - kick = 1; - } - fd->read_watcher = NULL; - } - if (watcher == fd->write_watcher) { - /* remove write watcher, kick if we still need a write */ - was_polling = 1; - if (!got_write) { - kick = 1; - } - fd->write_watcher = NULL; - } - if (!was_polling && watcher->worker != NULL) { - /* remove from inactive list */ - watcher->next->prev = watcher->prev; - watcher->prev->next = watcher->next; - } - if (got_read) { - if (set_ready_locked(exec_ctx, fd, &fd->read_closure)) { - kick = 1; - } - if (read_notifier_pollset != NULL) { - set_read_notifier_pollset_locked(exec_ctx, fd, read_notifier_pollset); - } - } - if (got_write) { - if (set_ready_locked(exec_ctx, fd, &fd->write_closure)) { - kick = 1; - } - } - if (kick) { - maybe_wake_one_watcher_locked(exec_ctx, fd); - } - if (fd_is_orphaned(fd) && !has_watchers(fd) && !fd->closed) { - close_fd_locked(exec_ctx, fd); - } - gpr_mu_unlock(&fd->mu); - - GRPC_FD_UNREF(fd, "poll"); -} - -/******************************************************************************* - * pollset_posix.c - */ - -GPR_TLS_DECL(g_current_thread_poller); -GPR_TLS_DECL(g_current_thread_worker); - -static void remove_worker(grpc_pollset *p, grpc_pollset_worker *worker) { - worker->prev->next = worker->next; - worker->next->prev = worker->prev; -} - -static bool pollset_has_workers(grpc_pollset *p) { - return p->root_worker.next != &p->root_worker; -} - -static bool pollset_in_pollset_sets(grpc_pollset *p) { - return p->pollset_set_count; -} - -static bool pollset_has_observers(grpc_pollset *p) { - return pollset_has_workers(p) || pollset_in_pollset_sets(p); -} - -static grpc_pollset_worker *pop_front_worker(grpc_pollset *p) { - if (pollset_has_workers(p)) { - grpc_pollset_worker *w = p->root_worker.next; - remove_worker(p, w); - return w; - } else { - return NULL; - } -} - -static void push_back_worker(grpc_pollset *p, grpc_pollset_worker *worker) { - worker->next = &p->root_worker; - worker->prev = worker->next->prev; - worker->prev->next = worker->next->prev = worker; -} - -static void push_front_worker(grpc_pollset *p, grpc_pollset_worker *worker) { - worker->prev = &p->root_worker; - worker->next = worker->prev->next; - worker->prev->next = worker->next->prev = worker; -} - -static void kick_append_error(grpc_error **composite, grpc_error *error) { - if (error == GRPC_ERROR_NONE) return; - if (*composite == GRPC_ERROR_NONE) { - *composite = GRPC_ERROR_CREATE_FROM_STATIC_STRING("Kick Failure"); - } - *composite = grpc_error_add_child(*composite, error); -} - -static grpc_error *pollset_kick_ext(grpc_exec_ctx *exec_ctx, grpc_pollset *p, - grpc_pollset_worker *specific_worker, - uint32_t flags) { - GPR_TIMER_BEGIN("pollset_kick_ext", 0); - grpc_error *error = GRPC_ERROR_NONE; - GRPC_STATS_INC_POLLSET_KICK(exec_ctx); - - /* pollset->mu already held */ - if (specific_worker != NULL) { - if (specific_worker == GRPC_POLLSET_KICK_BROADCAST) { - GPR_TIMER_BEGIN("pollset_kick_ext.broadcast", 0); - GPR_ASSERT((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) == 0); - for (specific_worker = p->root_worker.next; - specific_worker != &p->root_worker; - specific_worker = specific_worker->next) { - kick_append_error( - &error, grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd)); - } - p->kicked_without_pollers = true; - GPR_TIMER_END("pollset_kick_ext.broadcast", 0); - } else if (gpr_tls_get(&g_current_thread_worker) != - (intptr_t)specific_worker) { - GPR_TIMER_MARK("different_thread_worker", 0); - if ((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) != 0) { - specific_worker->reevaluate_polling_on_wakeup = true; - } - specific_worker->kicked_specifically = true; - kick_append_error(&error, - grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd)); - } else if ((flags & GRPC_POLLSET_CAN_KICK_SELF) != 0) { - GPR_TIMER_MARK("kick_yoself", 0); - if ((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) != 0) { - specific_worker->reevaluate_polling_on_wakeup = true; - } - specific_worker->kicked_specifically = true; - kick_append_error(&error, - grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd)); - } - } else if (gpr_tls_get(&g_current_thread_poller) != (intptr_t)p) { - GPR_ASSERT((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) == 0); - GPR_TIMER_MARK("kick_anonymous", 0); - specific_worker = pop_front_worker(p); - if (specific_worker != NULL) { - if (gpr_tls_get(&g_current_thread_worker) == (intptr_t)specific_worker) { - GPR_TIMER_MARK("kick_anonymous_not_self", 0); - push_back_worker(p, specific_worker); - specific_worker = pop_front_worker(p); - if ((flags & GRPC_POLLSET_CAN_KICK_SELF) == 0 && - gpr_tls_get(&g_current_thread_worker) == - (intptr_t)specific_worker) { - push_back_worker(p, specific_worker); - specific_worker = NULL; - } - } - if (specific_worker != NULL) { - GPR_TIMER_MARK("finally_kick", 0); - push_back_worker(p, specific_worker); - kick_append_error( - &error, grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd)); - } - } else { - GPR_TIMER_MARK("kicked_no_pollers", 0); - p->kicked_without_pollers = true; - } - } - - GPR_TIMER_END("pollset_kick_ext", 0); - GRPC_LOG_IF_ERROR("pollset_kick_ext", GRPC_ERROR_REF(error)); - return error; -} - -static grpc_error *pollset_kick(grpc_exec_ctx *exec_ctx, grpc_pollset *p, - grpc_pollset_worker *specific_worker) { - return pollset_kick_ext(exec_ctx, p, specific_worker, 0); -} - -/* global state management */ - -static grpc_error *pollset_global_init(void) { - gpr_tls_init(&g_current_thread_poller); - gpr_tls_init(&g_current_thread_worker); - return GRPC_ERROR_NONE; -} - -static void pollset_global_shutdown(void) { - gpr_tls_destroy(&g_current_thread_poller); - gpr_tls_destroy(&g_current_thread_worker); -} - -/* main interface */ - -static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { - gpr_mu_init(&pollset->mu); - *mu = &pollset->mu; - pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker; - pollset->shutting_down = 0; - pollset->called_shutdown = 0; - pollset->kicked_without_pollers = 0; - pollset->idle_jobs.head = pollset->idle_jobs.tail = NULL; - pollset->local_wakeup_cache = NULL; - pollset->kicked_without_pollers = 0; - pollset->fd_count = 0; - pollset->fd_capacity = 0; - pollset->fds = NULL; - pollset->pollset_set_count = 0; -} - -static void pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) { - GPR_ASSERT(!pollset_has_workers(pollset)); - GPR_ASSERT(pollset->idle_jobs.head == pollset->idle_jobs.tail); - while (pollset->local_wakeup_cache) { - grpc_cached_wakeup_fd *next = pollset->local_wakeup_cache->next; - grpc_wakeup_fd_destroy(&pollset->local_wakeup_cache->fd); - gpr_free(pollset->local_wakeup_cache); - pollset->local_wakeup_cache = next; - } - gpr_free(pollset->fds); - gpr_mu_destroy(&pollset->mu); -} - -static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_fd *fd) { - gpr_mu_lock(&pollset->mu); - size_t i; - /* TODO(ctiller): this is O(num_fds^2); maybe switch to a hash set here */ - for (i = 0; i < pollset->fd_count; i++) { - if (pollset->fds[i] == fd) goto exit; - } - if (pollset->fd_count == pollset->fd_capacity) { - pollset->fd_capacity = - GPR_MAX(pollset->fd_capacity + 8, pollset->fd_count * 3 / 2); - pollset->fds = (grpc_fd **)gpr_realloc( - pollset->fds, sizeof(grpc_fd *) * pollset->fd_capacity); - } - pollset->fds[pollset->fd_count++] = fd; - GRPC_FD_REF(fd, "multipoller"); - pollset_kick(exec_ctx, pollset, NULL); -exit: - gpr_mu_unlock(&pollset->mu); -} - -static void finish_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) { - GPR_ASSERT(grpc_closure_list_empty(pollset->idle_jobs)); - size_t i; - for (i = 0; i < pollset->fd_count; i++) { - GRPC_FD_UNREF(pollset->fds[i], "multipoller"); - } - pollset->fd_count = 0; - GRPC_CLOSURE_SCHED(exec_ctx, pollset->shutdown_done, GRPC_ERROR_NONE); -} - -static void work_combine_error(grpc_error **composite, grpc_error *error) { - if (error == GRPC_ERROR_NONE) return; - if (*composite == GRPC_ERROR_NONE) { - *composite = GRPC_ERROR_CREATE_FROM_STATIC_STRING("pollset_work"); - } - *composite = grpc_error_add_child(*composite, error); -} - -static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_pollset_worker **worker_hdl, - gpr_timespec now, gpr_timespec deadline) { - grpc_pollset_worker worker; - if (worker_hdl) *worker_hdl = &worker; - grpc_error *error = GRPC_ERROR_NONE; - - /* Avoid malloc for small number of elements. */ - enum { inline_elements = 96 }; - struct pollfd pollfd_space[inline_elements]; - struct grpc_fd_watcher watcher_space[inline_elements]; - - /* pollset->mu already held */ - int added_worker = 0; - int locked = 1; - int queued_work = 0; - int keep_polling = 0; - GPR_TIMER_BEGIN("pollset_work", 0); - /* this must happen before we (potentially) drop pollset->mu */ - worker.next = worker.prev = NULL; - worker.reevaluate_polling_on_wakeup = 0; - if (pollset->local_wakeup_cache != NULL) { - worker.wakeup_fd = pollset->local_wakeup_cache; - pollset->local_wakeup_cache = worker.wakeup_fd->next; - } else { - worker.wakeup_fd = - (grpc_cached_wakeup_fd *)gpr_malloc(sizeof(*worker.wakeup_fd)); - error = grpc_wakeup_fd_init(&worker.wakeup_fd->fd); - if (error != GRPC_ERROR_NONE) { - GRPC_LOG_IF_ERROR("pollset_work", GRPC_ERROR_REF(error)); - return error; - } - } - worker.kicked_specifically = 0; - /* If there's work waiting for the pollset to be idle, and the - pollset is idle, then do that work */ - if (!pollset_has_workers(pollset) && - !grpc_closure_list_empty(pollset->idle_jobs)) { - GPR_TIMER_MARK("pollset_work.idle_jobs", 0); - GRPC_CLOSURE_LIST_SCHED(exec_ctx, &pollset->idle_jobs); - goto done; - } - /* If we're shutting down then we don't execute any extended work */ - if (pollset->shutting_down) { - GPR_TIMER_MARK("pollset_work.shutting_down", 0); - goto done; - } - /* Start polling, and keep doing so while we're being asked to - re-evaluate our pollers (this allows poll() based pollers to - ensure they don't miss wakeups) */ - keep_polling = 1; - gpr_tls_set(&g_current_thread_poller, (intptr_t)pollset); - while (keep_polling) { - keep_polling = 0; - if (!pollset->kicked_without_pollers) { - if (!added_worker) { - push_front_worker(pollset, &worker); - added_worker = 1; - gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker); - } - GPR_TIMER_BEGIN("maybe_work_and_unlock", 0); -#define POLLOUT_CHECK (POLLOUT | POLLHUP | POLLERR) -#define POLLIN_CHECK (POLLIN | POLLHUP | POLLERR) - - int timeout; - int r; - size_t i, fd_count; - nfds_t pfd_count; - grpc_fd_watcher *watchers; - struct pollfd *pfds; - - timeout = poll_deadline_to_millis_timeout(deadline, now); - - if (pollset->fd_count + 2 <= inline_elements) { - pfds = pollfd_space; - watchers = watcher_space; - } else { - /* Allocate one buffer to hold both pfds and watchers arrays */ - const size_t pfd_size = sizeof(*pfds) * (pollset->fd_count + 2); - const size_t watch_size = sizeof(*watchers) * (pollset->fd_count + 2); - void *buf = gpr_malloc(pfd_size + watch_size); - pfds = (struct pollfd *)buf; - watchers = (grpc_fd_watcher *)(void *)((char *)buf + pfd_size); - } - - fd_count = 0; - pfd_count = 1; - pfds[0].fd = GRPC_WAKEUP_FD_GET_READ_FD(&worker.wakeup_fd->fd); - pfds[0].events = POLLIN; - pfds[0].revents = 0; - for (i = 0; i < pollset->fd_count; i++) { - if (fd_is_orphaned(pollset->fds[i])) { - GRPC_FD_UNREF(pollset->fds[i], "multipoller"); - } else { - pollset->fds[fd_count++] = pollset->fds[i]; - watchers[pfd_count].fd = pollset->fds[i]; - GRPC_FD_REF(watchers[pfd_count].fd, "multipoller_start"); - pfds[pfd_count].fd = pollset->fds[i]->fd; - pfds[pfd_count].revents = 0; - pfd_count++; - } - } - pollset->fd_count = fd_count; - gpr_mu_unlock(&pollset->mu); - - for (i = 1; i < pfd_count; i++) { - grpc_fd *fd = watchers[i].fd; - pfds[i].events = (short)fd_begin_poll(fd, pollset, &worker, POLLIN, - POLLOUT, &watchers[i]); - GRPC_FD_UNREF(fd, "multipoller_start"); - } - - /* TODO(vpai): Consider first doing a 0 timeout poll here to avoid - even going into the blocking annotation if possible */ - GRPC_SCHEDULING_START_BLOCKING_REGION; - GRPC_STATS_INC_SYSCALL_POLL(exec_ctx); - r = grpc_poll_function(pfds, pfd_count, timeout); - GRPC_SCHEDULING_END_BLOCKING_REGION; - - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, "%p poll=%d", pollset, r); - } - - if (r < 0) { - if (errno != EINTR) { - work_combine_error(&error, GRPC_OS_ERROR(errno, "poll")); - } - - for (i = 1; i < pfd_count; i++) { - if (watchers[i].fd == NULL) { - fd_end_poll(exec_ctx, &watchers[i], 0, 0, NULL); - } else { - // Wake up all the file descriptors, if we have an invalid one - // we can identify it on the next pollset_work() - fd_end_poll(exec_ctx, &watchers[i], 1, 1, pollset); - } - } - } else if (r == 0) { - for (i = 1; i < pfd_count; i++) { - fd_end_poll(exec_ctx, &watchers[i], 0, 0, NULL); - } - } else { - if (pfds[0].revents & POLLIN_CHECK) { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, "%p: got_wakeup", pollset); - } - work_combine_error( - &error, grpc_wakeup_fd_consume_wakeup(&worker.wakeup_fd->fd)); - } - for (i = 1; i < pfd_count; i++) { - if (watchers[i].fd == NULL) { - fd_end_poll(exec_ctx, &watchers[i], 0, 0, NULL); - } else { - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_DEBUG, "%p got_event: %d r:%d w:%d [%d]", pollset, - pfds[i].fd, (pfds[i].revents & POLLIN_CHECK) != 0, - (pfds[i].revents & POLLOUT_CHECK) != 0, pfds[i].revents); - } - fd_end_poll(exec_ctx, &watchers[i], pfds[i].revents & POLLIN_CHECK, - pfds[i].revents & POLLOUT_CHECK, pollset); - } - } - } - - if (pfds != pollfd_space) { - /* pfds and watchers are in the same memory block pointed to by pfds */ - gpr_free(pfds); - } - - GPR_TIMER_END("maybe_work_and_unlock", 0); - locked = 0; - } else { - GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0); - pollset->kicked_without_pollers = 0; - } - /* Finished execution - start cleaning up. - Note that we may arrive here from outside the enclosing while() loop. - In that case we won't loop though as we haven't added worker to the - worker list, which means nobody could ask us to re-evaluate polling). */ - done: - if (!locked) { - queued_work |= grpc_exec_ctx_flush(exec_ctx); - gpr_mu_lock(&pollset->mu); - locked = 1; - } - /* If we're forced to re-evaluate polling (via pollset_kick with - GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) then we land here and force - a loop */ - if (worker.reevaluate_polling_on_wakeup && error == GRPC_ERROR_NONE) { - worker.reevaluate_polling_on_wakeup = 0; - pollset->kicked_without_pollers = 0; - if (queued_work || worker.kicked_specifically) { - /* If there's queued work on the list, then set the deadline to be - immediate so we get back out of the polling loop quickly */ - deadline = gpr_inf_past(GPR_CLOCK_MONOTONIC); - } - keep_polling = 1; - } - if (keep_polling) { - now = gpr_now(now.clock_type); - } - } - gpr_tls_set(&g_current_thread_poller, 0); - if (added_worker) { - remove_worker(pollset, &worker); - gpr_tls_set(&g_current_thread_worker, 0); - } - /* release wakeup fd to the local pool */ - worker.wakeup_fd->next = pollset->local_wakeup_cache; - pollset->local_wakeup_cache = worker.wakeup_fd; - /* check shutdown conditions */ - if (pollset->shutting_down) { - if (pollset_has_workers(pollset)) { - pollset_kick(exec_ctx, pollset, NULL); - } else if (!pollset->called_shutdown && !pollset_has_observers(pollset)) { - pollset->called_shutdown = 1; - gpr_mu_unlock(&pollset->mu); - finish_shutdown(exec_ctx, pollset); - grpc_exec_ctx_flush(exec_ctx); - /* Continuing to access pollset here is safe -- it is the caller's - * responsibility to not destroy when it has outstanding calls to - * pollset_work. - * TODO(dklempner): Can we refactor the shutdown logic to avoid this? */ - gpr_mu_lock(&pollset->mu); - } else if (!grpc_closure_list_empty(pollset->idle_jobs)) { - GRPC_CLOSURE_LIST_SCHED(exec_ctx, &pollset->idle_jobs); - gpr_mu_unlock(&pollset->mu); - grpc_exec_ctx_flush(exec_ctx); - gpr_mu_lock(&pollset->mu); - } - } - if (worker_hdl) *worker_hdl = NULL; - GPR_TIMER_END("pollset_work", 0); - GRPC_LOG_IF_ERROR("pollset_work", GRPC_ERROR_REF(error)); - return error; -} - -static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_closure *closure) { - GPR_ASSERT(!pollset->shutting_down); - pollset->shutting_down = 1; - pollset->shutdown_done = closure; - pollset_kick(exec_ctx, pollset, GRPC_POLLSET_KICK_BROADCAST); - if (!pollset_has_workers(pollset)) { - GRPC_CLOSURE_LIST_SCHED(exec_ctx, &pollset->idle_jobs); - } - if (!pollset->called_shutdown && !pollset_has_observers(pollset)) { - pollset->called_shutdown = 1; - finish_shutdown(exec_ctx, pollset); - } -} - -static int poll_deadline_to_millis_timeout(gpr_timespec deadline, - gpr_timespec now) { - gpr_timespec timeout; - static const int64_t max_spin_polling_us = 10; - if (gpr_time_cmp(deadline, gpr_inf_future(deadline.clock_type)) == 0) { - return -1; - } - if (gpr_time_cmp(deadline, gpr_time_add(now, gpr_time_from_micros( - max_spin_polling_us, - GPR_TIMESPAN))) <= 0) { - return 0; - } - timeout = gpr_time_sub(deadline, now); - return gpr_time_to_millis(gpr_time_add( - timeout, gpr_time_from_nanos(GPR_NS_PER_MS - 1, GPR_TIMESPAN))); -} - -/******************************************************************************* - * pollset_set_posix.c - */ - -static grpc_pollset_set *pollset_set_create(void) { - grpc_pollset_set *pollset_set = - (grpc_pollset_set *)gpr_zalloc(sizeof(*pollset_set)); - gpr_mu_init(&pollset_set->mu); - return pollset_set; -} - -static void pollset_set_destroy(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pollset_set) { - size_t i; - gpr_mu_destroy(&pollset_set->mu); - for (i = 0; i < pollset_set->fd_count; i++) { - GRPC_FD_UNREF(pollset_set->fds[i], "pollset_set"); - } - for (i = 0; i < pollset_set->pollset_count; i++) { - grpc_pollset *pollset = pollset_set->pollsets[i]; - gpr_mu_lock(&pollset->mu); - pollset->pollset_set_count--; - /* check shutdown */ - if (pollset->shutting_down && !pollset->called_shutdown && - !pollset_has_observers(pollset)) { - pollset->called_shutdown = 1; - gpr_mu_unlock(&pollset->mu); - finish_shutdown(exec_ctx, pollset); - } else { - gpr_mu_unlock(&pollset->mu); - } - } - gpr_free(pollset_set->pollsets); - gpr_free(pollset_set->pollset_sets); - gpr_free(pollset_set->fds); - gpr_free(pollset_set); -} - -static void pollset_set_add_pollset(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pollset_set, - grpc_pollset *pollset) { - size_t i, j; - gpr_mu_lock(&pollset->mu); - pollset->pollset_set_count++; - gpr_mu_unlock(&pollset->mu); - gpr_mu_lock(&pollset_set->mu); - if (pollset_set->pollset_count == pollset_set->pollset_capacity) { - pollset_set->pollset_capacity = - GPR_MAX(8, 2 * pollset_set->pollset_capacity); - pollset_set->pollsets = (grpc_pollset **)gpr_realloc( - pollset_set->pollsets, - pollset_set->pollset_capacity * sizeof(*pollset_set->pollsets)); - } - pollset_set->pollsets[pollset_set->pollset_count++] = pollset; - for (i = 0, j = 0; i < pollset_set->fd_count; i++) { - if (fd_is_orphaned(pollset_set->fds[i])) { - GRPC_FD_UNREF(pollset_set->fds[i], "pollset_set"); - } else { - pollset_add_fd(exec_ctx, pollset, pollset_set->fds[i]); - pollset_set->fds[j++] = pollset_set->fds[i]; - } - } - pollset_set->fd_count = j; - gpr_mu_unlock(&pollset_set->mu); -} - -static void pollset_set_del_pollset(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pollset_set, - grpc_pollset *pollset) { - size_t i; - gpr_mu_lock(&pollset_set->mu); - for (i = 0; i < pollset_set->pollset_count; i++) { - if (pollset_set->pollsets[i] == pollset) { - pollset_set->pollset_count--; - GPR_SWAP(grpc_pollset *, pollset_set->pollsets[i], - pollset_set->pollsets[pollset_set->pollset_count]); - break; - } - } - gpr_mu_unlock(&pollset_set->mu); - gpr_mu_lock(&pollset->mu); - pollset->pollset_set_count--; - /* check shutdown */ - if (pollset->shutting_down && !pollset->called_shutdown && - !pollset_has_observers(pollset)) { - pollset->called_shutdown = 1; - gpr_mu_unlock(&pollset->mu); - finish_shutdown(exec_ctx, pollset); - } else { - gpr_mu_unlock(&pollset->mu); - } -} - -static void pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *bag, - grpc_pollset_set *item) { - size_t i, j; - gpr_mu_lock(&bag->mu); - if (bag->pollset_set_count == bag->pollset_set_capacity) { - bag->pollset_set_capacity = GPR_MAX(8, 2 * bag->pollset_set_capacity); - bag->pollset_sets = (grpc_pollset_set **)gpr_realloc( - bag->pollset_sets, - bag->pollset_set_capacity * sizeof(*bag->pollset_sets)); - } - bag->pollset_sets[bag->pollset_set_count++] = item; - for (i = 0, j = 0; i < bag->fd_count; i++) { - if (fd_is_orphaned(bag->fds[i])) { - GRPC_FD_UNREF(bag->fds[i], "pollset_set"); - } else { - pollset_set_add_fd(exec_ctx, item, bag->fds[i]); - bag->fds[j++] = bag->fds[i]; - } - } - bag->fd_count = j; - gpr_mu_unlock(&bag->mu); -} - -static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *bag, - grpc_pollset_set *item) { - size_t i; - gpr_mu_lock(&bag->mu); - for (i = 0; i < bag->pollset_set_count; i++) { - if (bag->pollset_sets[i] == item) { - bag->pollset_set_count--; - GPR_SWAP(grpc_pollset_set *, bag->pollset_sets[i], - bag->pollset_sets[bag->pollset_set_count]); - break; - } - } - gpr_mu_unlock(&bag->mu); -} - -static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pollset_set, grpc_fd *fd) { - size_t i; - gpr_mu_lock(&pollset_set->mu); - if (pollset_set->fd_count == pollset_set->fd_capacity) { - pollset_set->fd_capacity = GPR_MAX(8, 2 * pollset_set->fd_capacity); - pollset_set->fds = (grpc_fd **)gpr_realloc( - pollset_set->fds, pollset_set->fd_capacity * sizeof(*pollset_set->fds)); - } - GRPC_FD_REF(fd, "pollset_set"); - pollset_set->fds[pollset_set->fd_count++] = fd; - for (i = 0; i < pollset_set->pollset_count; i++) { - pollset_add_fd(exec_ctx, pollset_set->pollsets[i], fd); - } - for (i = 0; i < pollset_set->pollset_set_count; i++) { - pollset_set_add_fd(exec_ctx, pollset_set->pollset_sets[i], fd); - } - gpr_mu_unlock(&pollset_set->mu); -} - -static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pollset_set, grpc_fd *fd) { - size_t i; - gpr_mu_lock(&pollset_set->mu); - for (i = 0; i < pollset_set->fd_count; i++) { - if (pollset_set->fds[i] == fd) { - pollset_set->fd_count--; - GPR_SWAP(grpc_fd *, pollset_set->fds[i], - pollset_set->fds[pollset_set->fd_count]); - GRPC_FD_UNREF(fd, "pollset_set"); - break; - } - } - for (i = 0; i < pollset_set->pollset_set_count; i++) { - pollset_set_del_fd(exec_ctx, pollset_set->pollset_sets[i], fd); - } - gpr_mu_unlock(&pollset_set->mu); -} - -/******************************************************************************* - * Condition Variable polling extensions - */ - -static void run_poll(void *args); -static void cache_poller_locked(poll_args *args); - -static void cache_insert_locked(poll_args *args) { - uint32_t key = gpr_murmur_hash3(args->fds, args->nfds * sizeof(struct pollfd), - 0xDEADBEEF); - key = key % poll_cache.size; - if (poll_cache.active_pollers[key]) { - poll_cache.active_pollers[key]->prev = args; - } - args->next = poll_cache.active_pollers[key]; - args->prev = NULL; - poll_cache.active_pollers[key] = args; - poll_cache.count++; -} - -static void init_result(poll_args *pargs) { - pargs->result = (poll_result *)gpr_malloc(sizeof(poll_result)); - gpr_ref_init(&pargs->result->refcount, 1); - pargs->result->watchers = NULL; - pargs->result->watchcount = 0; - pargs->result->fds = - (struct pollfd *)gpr_malloc(sizeof(struct pollfd) * pargs->nfds); - memcpy(pargs->result->fds, pargs->fds, sizeof(struct pollfd) * pargs->nfds); - pargs->result->nfds = pargs->nfds; - pargs->result->retval = 0; - pargs->result->err = 0; - pargs->result->completed = 0; -} - -// Creates a poll_args object for a given arguments to poll(). -// This object may return a poll_args in the cache. -static poll_args *get_poller_locked(struct pollfd *fds, nfds_t count) { - uint32_t key = - gpr_murmur_hash3(fds, count * sizeof(struct pollfd), 0xDEADBEEF); - key = key % poll_cache.size; - poll_args *curr = poll_cache.active_pollers[key]; - while (curr) { - if (curr->nfds == count && - memcmp(curr->fds, fds, count * sizeof(struct pollfd)) == 0) { - gpr_free(fds); - return curr; - } - curr = curr->next; - } - - if (poll_cache.free_pollers) { - poll_args *pargs = poll_cache.free_pollers; - poll_cache.free_pollers = pargs->next; - if (poll_cache.free_pollers) { - poll_cache.free_pollers->prev = NULL; - } - pargs->fds = fds; - pargs->nfds = count; - pargs->next = NULL; - pargs->prev = NULL; - init_result(pargs); - cache_poller_locked(pargs); - return pargs; - } - - poll_args *pargs = (poll_args *)gpr_malloc(sizeof(struct poll_args)); - gpr_cv_init(&pargs->trigger); - pargs->fds = fds; - pargs->nfds = count; - pargs->next = NULL; - pargs->prev = NULL; - pargs->trigger_set = 0; - init_result(pargs); - cache_poller_locked(pargs); - gpr_thd_id t_id; - gpr_thd_options opt = gpr_thd_options_default(); - gpr_ref(&g_cvfds.pollcount); - gpr_thd_options_set_detached(&opt); - GPR_ASSERT(gpr_thd_new(&t_id, &run_poll, pargs, &opt)); - return pargs; -} - -static void cache_delete_locked(poll_args *args) { - if (!args->prev) { - uint32_t key = gpr_murmur_hash3( - args->fds, args->nfds * sizeof(struct pollfd), 0xDEADBEEF); - key = key % poll_cache.size; - GPR_ASSERT(poll_cache.active_pollers[key] == args); - poll_cache.active_pollers[key] = args->next; - } else { - args->prev->next = args->next; - } - - if (args->next) { - args->next->prev = args->prev; - } - - poll_cache.count--; - if (poll_cache.free_pollers) { - poll_cache.free_pollers->prev = args; - } - args->prev = NULL; - args->next = poll_cache.free_pollers; - gpr_free(args->fds); - poll_cache.free_pollers = args; -} - -static void cache_poller_locked(poll_args *args) { - if (poll_cache.count + 1 > poll_cache.size / 2) { - poll_args **old_active_pollers = poll_cache.active_pollers; - poll_cache.size = poll_cache.size * 2; - poll_cache.count = 0; - poll_cache.active_pollers = - (poll_args **)gpr_malloc(sizeof(void *) * poll_cache.size); - for (unsigned int i = 0; i < poll_cache.size; i++) { - poll_cache.active_pollers[i] = NULL; - } - for (unsigned int i = 0; i < poll_cache.size / 2; i++) { - poll_args *curr = old_active_pollers[i]; - poll_args *next = NULL; - while (curr) { - next = curr->next; - cache_insert_locked(curr); - curr = next; - } - } - gpr_free(old_active_pollers); - } - - cache_insert_locked(args); -} - -static void cache_destroy_locked(poll_args *args) { - if (args->next) { - args->next->prev = args->prev; - } - - if (args->prev) { - args->prev->next = args->next; - } else { - poll_cache.free_pollers = args->next; - } - - gpr_free(args); -} - -static void decref_poll_result(poll_result *res) { - if (gpr_unref(&res->refcount)) { - GPR_ASSERT(!res->watchers); - gpr_free(res->fds); - gpr_free(res); - } -} - -void remove_cvn(cv_node **head, cv_node *target) { - if (target->next) { - target->next->prev = target->prev; - } - - if (target->prev) { - target->prev->next = target->next; - } else { - *head = target->next; - } -} - -gpr_timespec thread_grace; - -// Poll in a background thread -static void run_poll(void *args) { - poll_args *pargs = (poll_args *)args; - while (1) { - poll_result *result = pargs->result; - int retval = g_cvfds.poll(result->fds, result->nfds, CV_POLL_PERIOD_MS); - gpr_mu_lock(&g_cvfds.mu); - if (retval != 0) { - result->completed = 1; - result->retval = retval; - result->err = errno; - cv_node *watcher = result->watchers; - while (watcher) { - gpr_cv_signal(watcher->cv); - watcher = watcher->next; - } - } - if (result->watchcount == 0 || result->completed) { - cache_delete_locked(pargs); - decref_poll_result(result); - // Leave this polling thread alive for a grace period to do another poll() - // op - gpr_timespec deadline = gpr_now(GPR_CLOCK_REALTIME); - deadline = gpr_time_add(deadline, thread_grace); - pargs->trigger_set = 0; - gpr_cv_wait(&pargs->trigger, &g_cvfds.mu, deadline); - if (!pargs->trigger_set) { - cache_destroy_locked(pargs); - break; - } - } - gpr_mu_unlock(&g_cvfds.mu); - } - - // We still have the lock here - if (gpr_unref(&g_cvfds.pollcount)) { - gpr_cv_signal(&g_cvfds.shutdown_cv); - } - gpr_mu_unlock(&g_cvfds.mu); -} - -// This function overrides poll() to handle condition variable wakeup fds -static int cvfd_poll(struct pollfd *fds, nfds_t nfds, int timeout) { - unsigned int i; - int res, idx; - cv_node *pollcv; - int skip_poll = 0; - nfds_t nsockfds = 0; - poll_result *result = NULL; - gpr_mu_lock(&g_cvfds.mu); - pollcv = (cv_node *)gpr_malloc(sizeof(cv_node)); - pollcv->next = NULL; - gpr_cv pollcv_cv; - gpr_cv_init(&pollcv_cv); - pollcv->cv = &pollcv_cv; - cv_node *fd_cvs = (cv_node *)gpr_malloc(nfds * sizeof(cv_node)); - - for (i = 0; i < nfds; i++) { - fds[i].revents = 0; - if (fds[i].fd < 0 && (fds[i].events & POLLIN)) { - idx = GRPC_FD_TO_IDX(fds[i].fd); - fd_cvs[i].cv = &pollcv_cv; - fd_cvs[i].prev = NULL; - fd_cvs[i].next = g_cvfds.cvfds[idx].cvs; - if (g_cvfds.cvfds[idx].cvs) { - g_cvfds.cvfds[idx].cvs->prev = &(fd_cvs[i]); - } - g_cvfds.cvfds[idx].cvs = &(fd_cvs[i]); - // Don't bother polling if a wakeup fd is ready - if (g_cvfds.cvfds[idx].is_set) { - skip_poll = 1; - } - } else if (fds[i].fd >= 0) { - nsockfds++; - } - } - - gpr_timespec deadline = gpr_now(GPR_CLOCK_REALTIME); - if (timeout < 0) { - deadline = gpr_inf_future(GPR_CLOCK_REALTIME); - } else { - deadline = - gpr_time_add(deadline, gpr_time_from_millis(timeout, GPR_TIMESPAN)); - } - - res = 0; - if (!skip_poll && nsockfds > 0) { - struct pollfd *pollfds = - (struct pollfd *)gpr_malloc(sizeof(struct pollfd) * nsockfds); - idx = 0; - for (i = 0; i < nfds; i++) { - if (fds[i].fd >= 0) { - pollfds[idx].fd = fds[i].fd; - pollfds[idx].events = fds[i].events; - pollfds[idx].revents = 0; - idx++; - } - } - poll_args *pargs = get_poller_locked(pollfds, nsockfds); - result = pargs->result; - pollcv->next = result->watchers; - pollcv->prev = NULL; - if (result->watchers) { - result->watchers->prev = pollcv; - } - result->watchers = pollcv; - result->watchcount++; - gpr_ref(&result->refcount); - - pargs->trigger_set = 1; - gpr_cv_signal(&pargs->trigger); - gpr_cv_wait(&pollcv_cv, &g_cvfds.mu, deadline); - res = result->retval; - errno = result->err; - result->watchcount--; - remove_cvn(&result->watchers, pollcv); - } else if (!skip_poll) { - gpr_cv_wait(&pollcv_cv, &g_cvfds.mu, deadline); - } - - idx = 0; - for (i = 0; i < nfds; i++) { - if (fds[i].fd < 0 && (fds[i].events & POLLIN)) { - remove_cvn(&g_cvfds.cvfds[GRPC_FD_TO_IDX(fds[i].fd)].cvs, &(fd_cvs[i])); - if (g_cvfds.cvfds[GRPC_FD_TO_IDX(fds[i].fd)].is_set) { - fds[i].revents = POLLIN; - if (res >= 0) res++; - } - } else if (!skip_poll && fds[i].fd >= 0 && result->completed) { - fds[i].revents = result->fds[idx].revents; - idx++; - } - } - - gpr_free(fd_cvs); - gpr_free(pollcv); - if (result) { - decref_poll_result(result); - } - - gpr_mu_unlock(&g_cvfds.mu); - - return res; -} - -static void global_cv_fd_table_init() { - gpr_mu_init(&g_cvfds.mu); - gpr_mu_lock(&g_cvfds.mu); - gpr_cv_init(&g_cvfds.shutdown_cv); - gpr_ref_init(&g_cvfds.pollcount, 1); - g_cvfds.size = CV_DEFAULT_TABLE_SIZE; - g_cvfds.cvfds = - (fd_node *)gpr_malloc(sizeof(fd_node) * CV_DEFAULT_TABLE_SIZE); - g_cvfds.free_fds = NULL; - thread_grace = gpr_time_from_millis(POLLCV_THREAD_GRACE_MS, GPR_TIMESPAN); - for (int i = 0; i < CV_DEFAULT_TABLE_SIZE; i++) { - g_cvfds.cvfds[i].is_set = 0; - g_cvfds.cvfds[i].cvs = NULL; - g_cvfds.cvfds[i].next_free = g_cvfds.free_fds; - g_cvfds.free_fds = &g_cvfds.cvfds[i]; - } - // Override the poll function with one that supports cvfds - g_cvfds.poll = grpc_poll_function; - grpc_poll_function = &cvfd_poll; - - // Initialize the cache - poll_cache.size = 32; - poll_cache.count = 0; - poll_cache.free_pollers = NULL; - poll_cache.active_pollers = (poll_args **)gpr_malloc(sizeof(void *) * 32); - for (unsigned int i = 0; i < poll_cache.size; i++) { - poll_cache.active_pollers[i] = NULL; - } - - gpr_mu_unlock(&g_cvfds.mu); -} - -static void global_cv_fd_table_shutdown() { - gpr_mu_lock(&g_cvfds.mu); - // Attempt to wait for all abandoned poll() threads to terminate - // Not doing so will result in reported memory leaks - if (!gpr_unref(&g_cvfds.pollcount)) { - int res = gpr_cv_wait(&g_cvfds.shutdown_cv, &g_cvfds.mu, - gpr_time_add(gpr_now(GPR_CLOCK_REALTIME), - gpr_time_from_seconds(3, GPR_TIMESPAN))); - GPR_ASSERT(res == 0); - } - gpr_cv_destroy(&g_cvfds.shutdown_cv); - grpc_poll_function = g_cvfds.poll; - gpr_free(g_cvfds.cvfds); - - gpr_free(poll_cache.active_pollers); - - gpr_mu_unlock(&g_cvfds.mu); - gpr_mu_destroy(&g_cvfds.mu); -} - -/******************************************************************************* - * event engine binding - */ - -static void shutdown_engine(void) { - pollset_global_shutdown(); - if (grpc_cv_wakeup_fds_enabled()) { - global_cv_fd_table_shutdown(); - } -} - -static const grpc_event_engine_vtable vtable = { - sizeof(grpc_pollset), - - fd_create, - fd_wrapped_fd, - fd_orphan, - fd_shutdown, - fd_notify_on_read, - fd_notify_on_write, - fd_is_shutdown, - fd_get_read_notifier_pollset, - - pollset_init, - pollset_shutdown, - pollset_destroy, - pollset_work, - pollset_kick, - pollset_add_fd, - - pollset_set_create, - pollset_set_destroy, - pollset_set_add_pollset, - pollset_set_del_pollset, - pollset_set_add_pollset_set, - pollset_set_del_pollset_set, - pollset_set_add_fd, - pollset_set_del_fd, - - shutdown_engine, -}; - -const grpc_event_engine_vtable *grpc_init_poll_posix(bool explicit_request) { - if (!grpc_has_wakeup_fd()) { - return NULL; - } - if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) { - return NULL; - } - return &vtable; -} - -const grpc_event_engine_vtable *grpc_init_poll_cv_posix(bool explicit_request) { - global_cv_fd_table_init(); - grpc_enable_cv_wakeup_fds(1); - if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) { - global_cv_fd_table_shutdown(); - grpc_enable_cv_wakeup_fds(0); - return NULL; - } - return &vtable; -} - -#endif diff --git a/src/core/lib/iomgr/ev_poll_posix.cc b/src/core/lib/iomgr/ev_poll_posix.cc new file mode 100644 index 0000000000..e170702dca --- /dev/null +++ b/src/core/lib/iomgr/ev_poll_posix.cc @@ -0,0 +1,1746 @@ +/* + * + * Copyright 2016 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_POSIX_SOCKET + +#include "src/core/lib/iomgr/ev_poll_posix.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "src/core/lib/debug/stats.h" +#include "src/core/lib/iomgr/iomgr_internal.h" +#include "src/core/lib/iomgr/timer.h" +#include "src/core/lib/iomgr/wakeup_fd_cv.h" +#include "src/core/lib/iomgr/wakeup_fd_posix.h" +#include "src/core/lib/profiling/timers.h" +#include "src/core/lib/support/block_annotate.h" +#include "src/core/lib/support/murmur_hash.h" + +#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker *)1) + +/******************************************************************************* + * FD declarations + */ + +typedef struct grpc_fd_watcher { + struct grpc_fd_watcher *next; + struct grpc_fd_watcher *prev; + grpc_pollset *pollset; + grpc_pollset_worker *worker; + grpc_fd *fd; +} grpc_fd_watcher; + +struct grpc_fd { + int fd; + /* refst format: + bit0: 1=active/0=orphaned + bit1-n: refcount + meaning that mostly we ref by two to avoid altering the orphaned bit, + and just unref by 1 when we're ready to flag the object as orphaned */ + gpr_atm refst; + + gpr_mu mu; + int shutdown; + int closed; + int released; + grpc_error *shutdown_error; + + /* The watcher list. + + The following watcher related fields are protected by watcher_mu. + + An fd_watcher is an ephemeral object created when an fd wants to + begin polling, and destroyed after the poll. + + It denotes the fd's interest in whether to read poll or write poll + or both or neither on this fd. + + If a watcher is asked to poll for reads or writes, the read_watcher + or write_watcher fields are set respectively. A watcher may be asked + to poll for both, in which case both fields will be set. + + read_watcher and write_watcher may be NULL if no watcher has been + asked to poll for reads or writes. + + If an fd_watcher is not asked to poll for reads or writes, it's added + to a linked list of inactive watchers, rooted at inactive_watcher_root. + If at a later time there becomes need of a poller to poll, one of + the inactive pollers may be kicked out of their poll loops to take + that responsibility. */ + grpc_fd_watcher inactive_watcher_root; + grpc_fd_watcher *read_watcher; + grpc_fd_watcher *write_watcher; + + grpc_closure *read_closure; + grpc_closure *write_closure; + + grpc_closure *on_done_closure; + + grpc_iomgr_object iomgr_object; + + /* The pollset that last noticed and notified that the fd is readable */ + grpc_pollset *read_notifier_pollset; +}; + +/* Begin polling on an fd. + Registers that the given pollset is interested in this fd - so that if read + or writability interest changes, the pollset can be kicked to pick up that + new interest. + Return value is: + (fd_needs_read? read_mask : 0) | (fd_needs_write? write_mask : 0) + i.e. a combination of read_mask and write_mask determined by the fd's current + interest in said events. + Polling strategies that do not need to alter their behavior depending on the + fd's current interest (such as epoll) do not need to call this function. + MUST NOT be called with a pollset lock taken */ +static uint32_t fd_begin_poll(grpc_fd *fd, grpc_pollset *pollset, + grpc_pollset_worker *worker, uint32_t read_mask, + uint32_t write_mask, grpc_fd_watcher *rec); +/* Complete polling previously started with fd_begin_poll + MUST NOT be called with a pollset lock taken + if got_read or got_write are 1, also does the become_{readable,writable} as + appropriate. */ +static void fd_end_poll(grpc_exec_ctx *exec_ctx, grpc_fd_watcher *rec, + int got_read, int got_write, + grpc_pollset *read_notifier_pollset); + +/* Return 1 if this fd is orphaned, 0 otherwise */ +static bool fd_is_orphaned(grpc_fd *fd); + +#ifndef NDEBUG +static void fd_ref(grpc_fd *fd, const char *reason, const char *file, int line); +static void fd_unref(grpc_fd *fd, const char *reason, const char *file, + int line); +#define GRPC_FD_REF(fd, reason) fd_ref(fd, reason, __FILE__, __LINE__) +#define GRPC_FD_UNREF(fd, reason) fd_unref(fd, reason, __FILE__, __LINE__) +#else +static void fd_ref(grpc_fd *fd); +static void fd_unref(grpc_fd *fd); +#define GRPC_FD_REF(fd, reason) fd_ref(fd) +#define GRPC_FD_UNREF(fd, reason) fd_unref(fd) +#endif + +#define CLOSURE_NOT_READY ((grpc_closure *)0) +#define CLOSURE_READY ((grpc_closure *)1) + +/******************************************************************************* + * pollset declarations + */ + +typedef struct grpc_cached_wakeup_fd { + grpc_wakeup_fd fd; + struct grpc_cached_wakeup_fd *next; +} grpc_cached_wakeup_fd; + +struct grpc_pollset_worker { + grpc_cached_wakeup_fd *wakeup_fd; + int reevaluate_polling_on_wakeup; + int kicked_specifically; + struct grpc_pollset_worker *next; + struct grpc_pollset_worker *prev; +}; + +struct grpc_pollset { + gpr_mu mu; + grpc_pollset_worker root_worker; + int shutting_down; + int called_shutdown; + int kicked_without_pollers; + grpc_closure *shutdown_done; + grpc_closure_list idle_jobs; + int pollset_set_count; + /* all polled fds */ + size_t fd_count; + size_t fd_capacity; + grpc_fd **fds; + /* Local cache of eventfds for workers */ + grpc_cached_wakeup_fd *local_wakeup_cache; +}; + +/* Add an fd to a pollset */ +static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + struct grpc_fd *fd); + +static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pollset_set, grpc_fd *fd); + +/* Convert a timespec to milliseconds: + - very small or negative poll times are clamped to zero to do a + non-blocking poll (which becomes spin polling) + - other small values are rounded up to one millisecond + - longer than a millisecond polls are rounded up to the next nearest + millisecond to avoid spinning + - infinite timeouts are converted to -1 */ +static int poll_deadline_to_millis_timeout(gpr_timespec deadline, + gpr_timespec now); + +/* Allow kick to wakeup the currently polling worker */ +#define GRPC_POLLSET_CAN_KICK_SELF 1 +/* Force the wakee to repoll when awoken */ +#define GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP 2 +/* As per pollset_kick, with an extended set of flags (defined above) + -- mostly for fd_posix's use. */ +static grpc_error *pollset_kick_ext(grpc_exec_ctx *exec_ctx, grpc_pollset *p, + grpc_pollset_worker *specific_worker, + uint32_t flags) GRPC_MUST_USE_RESULT; + +/* Return 1 if the pollset has active threads in pollset_work (pollset must + * be locked) */ +static bool pollset_has_workers(grpc_pollset *pollset); + +/******************************************************************************* + * pollset_set definitions + */ + +struct grpc_pollset_set { + gpr_mu mu; + + size_t pollset_count; + size_t pollset_capacity; + grpc_pollset **pollsets; + + size_t pollset_set_count; + size_t pollset_set_capacity; + struct grpc_pollset_set **pollset_sets; + + size_t fd_count; + size_t fd_capacity; + grpc_fd **fds; +}; + +/******************************************************************************* + * condition variable polling definitions + */ + +#define POLLCV_THREAD_GRACE_MS 1000 +#define CV_POLL_PERIOD_MS 1000 +#define CV_DEFAULT_TABLE_SIZE 16 + +typedef struct poll_result { + gpr_refcount refcount; + cv_node *watchers; + int watchcount; + struct pollfd *fds; + nfds_t nfds; + int retval; + int err; + int completed; +} poll_result; + +typedef struct poll_args { + gpr_cv trigger; + int trigger_set; + struct pollfd *fds; + nfds_t nfds; + poll_result *result; + struct poll_args *next; + struct poll_args *prev; +} poll_args; + +// This is a 2-tiered cache, we mantain a hash table +// of active poll calls, so we can wait on the result +// of that call. We also maintain a freelist of inactive +// poll threads. +typedef struct poll_hash_table { + poll_args *free_pollers; + poll_args **active_pollers; + unsigned int size; + unsigned int count; +} poll_hash_table; + +poll_hash_table poll_cache; +cv_fd_table g_cvfds; + +/******************************************************************************* + * fd_posix.c + */ + +#ifndef NDEBUG +#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__) +#define UNREF_BY(fd, n, reason) unref_by(fd, n, reason, __FILE__, __LINE__) +static void ref_by(grpc_fd *fd, int n, const char *reason, const char *file, + int line) { + if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) { + gpr_log(GPR_DEBUG, + "FD %d %p ref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]", + fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst), + gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line); + } +#else +#define REF_BY(fd, n, reason) ref_by(fd, n) +#define UNREF_BY(fd, n, reason) unref_by(fd, n) +static void ref_by(grpc_fd *fd, int n) { +#endif + GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0); +} + +#ifndef NDEBUG +static void unref_by(grpc_fd *fd, int n, const char *reason, const char *file, + int line) { + if (GRPC_TRACER_ON(grpc_trace_fd_refcount)) { + gpr_log(GPR_DEBUG, + "FD %d %p unref %d %" PRIdPTR " -> %" PRIdPTR " [%s; %s:%d]", + fd->fd, fd, n, gpr_atm_no_barrier_load(&fd->refst), + gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line); + } +#else +static void unref_by(grpc_fd *fd, int n) { +#endif + gpr_atm old = gpr_atm_full_fetch_add(&fd->refst, -n); + if (old == n) { + gpr_mu_destroy(&fd->mu); + grpc_iomgr_unregister_object(&fd->iomgr_object); + if (fd->shutdown) GRPC_ERROR_UNREF(fd->shutdown_error); + gpr_free(fd); + } else { + GPR_ASSERT(old > n); + } +} + +static grpc_fd *fd_create(int fd, const char *name) { + grpc_fd *r = (grpc_fd *)gpr_malloc(sizeof(*r)); + gpr_mu_init(&r->mu); + gpr_atm_rel_store(&r->refst, 1); + r->shutdown = 0; + r->read_closure = CLOSURE_NOT_READY; + r->write_closure = CLOSURE_NOT_READY; + r->fd = fd; + r->inactive_watcher_root.next = r->inactive_watcher_root.prev = + &r->inactive_watcher_root; + r->read_watcher = r->write_watcher = NULL; + r->on_done_closure = NULL; + r->closed = 0; + r->released = 0; + r->read_notifier_pollset = NULL; + + char *name2; + gpr_asprintf(&name2, "%s fd=%d", name, fd); + grpc_iomgr_register_object(&r->iomgr_object, name2); + gpr_free(name2); + return r; +} + +static bool fd_is_orphaned(grpc_fd *fd) { + return (gpr_atm_acq_load(&fd->refst) & 1) == 0; +} + +/* Return the read-notifier pollset */ +static grpc_pollset *fd_get_read_notifier_pollset(grpc_exec_ctx *exec_ctx, + grpc_fd *fd) { + grpc_pollset *notifier = NULL; + + gpr_mu_lock(&fd->mu); + notifier = fd->read_notifier_pollset; + gpr_mu_unlock(&fd->mu); + + return notifier; +} + +static grpc_error *pollset_kick_locked(grpc_exec_ctx *exec_ctx, + grpc_fd_watcher *watcher) { + gpr_mu_lock(&watcher->pollset->mu); + GPR_ASSERT(watcher->worker); + grpc_error *err = + pollset_kick_ext(exec_ctx, watcher->pollset, watcher->worker, + GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP); + gpr_mu_unlock(&watcher->pollset->mu); + return err; +} + +static void maybe_wake_one_watcher_locked(grpc_exec_ctx *exec_ctx, + grpc_fd *fd) { + if (fd->inactive_watcher_root.next != &fd->inactive_watcher_root) { + pollset_kick_locked(exec_ctx, fd->inactive_watcher_root.next); + } else if (fd->read_watcher) { + pollset_kick_locked(exec_ctx, fd->read_watcher); + } else if (fd->write_watcher) { + pollset_kick_locked(exec_ctx, fd->write_watcher); + } +} + +static void wake_all_watchers_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { + grpc_fd_watcher *watcher; + for (watcher = fd->inactive_watcher_root.next; + watcher != &fd->inactive_watcher_root; watcher = watcher->next) { + pollset_kick_locked(exec_ctx, watcher); + } + if (fd->read_watcher) { + pollset_kick_locked(exec_ctx, fd->read_watcher); + } + if (fd->write_watcher && fd->write_watcher != fd->read_watcher) { + pollset_kick_locked(exec_ctx, fd->write_watcher); + } +} + +static int has_watchers(grpc_fd *fd) { + return fd->read_watcher != NULL || fd->write_watcher != NULL || + fd->inactive_watcher_root.next != &fd->inactive_watcher_root; +} + +static void close_fd_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd) { + fd->closed = 1; + if (!fd->released) { + close(fd->fd); + } + GRPC_CLOSURE_SCHED(exec_ctx, fd->on_done_closure, GRPC_ERROR_NONE); +} + +static int fd_wrapped_fd(grpc_fd *fd) { + if (fd->released || fd->closed) { + return -1; + } else { + return fd->fd; + } +} + +static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure *on_done, int *release_fd, + bool already_closed, const char *reason) { + fd->on_done_closure = on_done; + fd->released = release_fd != NULL; + if (release_fd != NULL) { + *release_fd = fd->fd; + fd->released = true; + } else if (already_closed) { + fd->released = true; + } + gpr_mu_lock(&fd->mu); + REF_BY(fd, 1, reason); /* remove active status, but keep referenced */ + if (!has_watchers(fd)) { + close_fd_locked(exec_ctx, fd); + } else { + wake_all_watchers_locked(exec_ctx, fd); + } + gpr_mu_unlock(&fd->mu); + UNREF_BY(fd, 2, reason); /* drop the reference */ +} + +/* increment refcount by two to avoid changing the orphan bit */ +#ifndef NDEBUG +static void fd_ref(grpc_fd *fd, const char *reason, const char *file, + int line) { + ref_by(fd, 2, reason, file, line); +} + +static void fd_unref(grpc_fd *fd, const char *reason, const char *file, + int line) { + unref_by(fd, 2, reason, file, line); +} +#else +static void fd_ref(grpc_fd *fd) { ref_by(fd, 2); } + +static void fd_unref(grpc_fd *fd) { unref_by(fd, 2); } +#endif + +static grpc_error *fd_shutdown_error(grpc_fd *fd) { + if (!fd->shutdown) { + return GRPC_ERROR_NONE; + } else { + return GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( + "FD shutdown", &fd->shutdown_error, 1); + } +} + +static void notify_on_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure **st, grpc_closure *closure) { + if (fd->shutdown) { + GRPC_CLOSURE_SCHED(exec_ctx, closure, + GRPC_ERROR_CREATE_FROM_STATIC_STRING("FD shutdown")); + } else if (*st == CLOSURE_NOT_READY) { + /* not ready ==> switch to a waiting state by setting the closure */ + *st = closure; + } else if (*st == CLOSURE_READY) { + /* already ready ==> queue the closure to run immediately */ + *st = CLOSURE_NOT_READY; + GRPC_CLOSURE_SCHED(exec_ctx, closure, fd_shutdown_error(fd)); + maybe_wake_one_watcher_locked(exec_ctx, fd); + } else { + /* upcallptr was set to a different closure. This is an error! */ + gpr_log(GPR_ERROR, + "User called a notify_on function with a previous callback still " + "pending"); + abort(); + } +} + +/* returns 1 if state becomes not ready */ +static int set_ready_locked(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure **st) { + if (*st == CLOSURE_READY) { + /* duplicate ready ==> ignore */ + return 0; + } else if (*st == CLOSURE_NOT_READY) { + /* not ready, and not waiting ==> flag ready */ + *st = CLOSURE_READY; + return 0; + } else { + /* waiting ==> queue closure */ + GRPC_CLOSURE_SCHED(exec_ctx, *st, fd_shutdown_error(fd)); + *st = CLOSURE_NOT_READY; + return 1; + } +} + +static void set_read_notifier_pollset_locked( + grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_pollset *read_notifier_pollset) { + fd->read_notifier_pollset = read_notifier_pollset; +} + +static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_error *why) { + gpr_mu_lock(&fd->mu); + /* only shutdown once */ + if (!fd->shutdown) { + fd->shutdown = 1; + fd->shutdown_error = why; + /* signal read/write closed to OS so that future operations fail */ + shutdown(fd->fd, SHUT_RDWR); + set_ready_locked(exec_ctx, fd, &fd->read_closure); + set_ready_locked(exec_ctx, fd, &fd->write_closure); + } else { + GRPC_ERROR_UNREF(why); + } + gpr_mu_unlock(&fd->mu); +} + +static bool fd_is_shutdown(grpc_fd *fd) { + gpr_mu_lock(&fd->mu); + bool r = fd->shutdown; + gpr_mu_unlock(&fd->mu); + return r; +} + +static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure *closure) { + gpr_mu_lock(&fd->mu); + notify_on_locked(exec_ctx, fd, &fd->read_closure, closure); + gpr_mu_unlock(&fd->mu); +} + +static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure *closure) { + gpr_mu_lock(&fd->mu); + notify_on_locked(exec_ctx, fd, &fd->write_closure, closure); + gpr_mu_unlock(&fd->mu); +} + +static uint32_t fd_begin_poll(grpc_fd *fd, grpc_pollset *pollset, + grpc_pollset_worker *worker, uint32_t read_mask, + uint32_t write_mask, grpc_fd_watcher *watcher) { + uint32_t mask = 0; + grpc_closure *cur; + int requested; + /* keep track of pollers that have requested our events, in case they change + */ + GRPC_FD_REF(fd, "poll"); + + gpr_mu_lock(&fd->mu); + + /* if we are shutdown, then don't add to the watcher set */ + if (fd->shutdown) { + watcher->fd = NULL; + watcher->pollset = NULL; + watcher->worker = NULL; + gpr_mu_unlock(&fd->mu); + GRPC_FD_UNREF(fd, "poll"); + return 0; + } + + /* if there is nobody polling for read, but we need to, then start doing so */ + cur = fd->read_closure; + requested = cur != CLOSURE_READY; + if (read_mask && fd->read_watcher == NULL && requested) { + fd->read_watcher = watcher; + mask |= read_mask; + } + /* if there is nobody polling for write, but we need to, then start doing so + */ + cur = fd->write_closure; + requested = cur != CLOSURE_READY; + if (write_mask && fd->write_watcher == NULL && requested) { + fd->write_watcher = watcher; + mask |= write_mask; + } + /* if not polling, remember this watcher in case we need someone to later */ + if (mask == 0 && worker != NULL) { + watcher->next = &fd->inactive_watcher_root; + watcher->prev = watcher->next->prev; + watcher->next->prev = watcher->prev->next = watcher; + } + watcher->pollset = pollset; + watcher->worker = worker; + watcher->fd = fd; + gpr_mu_unlock(&fd->mu); + + return mask; +} + +static void fd_end_poll(grpc_exec_ctx *exec_ctx, grpc_fd_watcher *watcher, + int got_read, int got_write, + grpc_pollset *read_notifier_pollset) { + int was_polling = 0; + int kick = 0; + grpc_fd *fd = watcher->fd; + + if (fd == NULL) { + return; + } + + gpr_mu_lock(&fd->mu); + + if (watcher == fd->read_watcher) { + /* remove read watcher, kick if we still need a read */ + was_polling = 1; + if (!got_read) { + kick = 1; + } + fd->read_watcher = NULL; + } + if (watcher == fd->write_watcher) { + /* remove write watcher, kick if we still need a write */ + was_polling = 1; + if (!got_write) { + kick = 1; + } + fd->write_watcher = NULL; + } + if (!was_polling && watcher->worker != NULL) { + /* remove from inactive list */ + watcher->next->prev = watcher->prev; + watcher->prev->next = watcher->next; + } + if (got_read) { + if (set_ready_locked(exec_ctx, fd, &fd->read_closure)) { + kick = 1; + } + if (read_notifier_pollset != NULL) { + set_read_notifier_pollset_locked(exec_ctx, fd, read_notifier_pollset); + } + } + if (got_write) { + if (set_ready_locked(exec_ctx, fd, &fd->write_closure)) { + kick = 1; + } + } + if (kick) { + maybe_wake_one_watcher_locked(exec_ctx, fd); + } + if (fd_is_orphaned(fd) && !has_watchers(fd) && !fd->closed) { + close_fd_locked(exec_ctx, fd); + } + gpr_mu_unlock(&fd->mu); + + GRPC_FD_UNREF(fd, "poll"); +} + +/******************************************************************************* + * pollset_posix.c + */ + +GPR_TLS_DECL(g_current_thread_poller); +GPR_TLS_DECL(g_current_thread_worker); + +static void remove_worker(grpc_pollset *p, grpc_pollset_worker *worker) { + worker->prev->next = worker->next; + worker->next->prev = worker->prev; +} + +static bool pollset_has_workers(grpc_pollset *p) { + return p->root_worker.next != &p->root_worker; +} + +static bool pollset_in_pollset_sets(grpc_pollset *p) { + return p->pollset_set_count; +} + +static bool pollset_has_observers(grpc_pollset *p) { + return pollset_has_workers(p) || pollset_in_pollset_sets(p); +} + +static grpc_pollset_worker *pop_front_worker(grpc_pollset *p) { + if (pollset_has_workers(p)) { + grpc_pollset_worker *w = p->root_worker.next; + remove_worker(p, w); + return w; + } else { + return NULL; + } +} + +static void push_back_worker(grpc_pollset *p, grpc_pollset_worker *worker) { + worker->next = &p->root_worker; + worker->prev = worker->next->prev; + worker->prev->next = worker->next->prev = worker; +} + +static void push_front_worker(grpc_pollset *p, grpc_pollset_worker *worker) { + worker->prev = &p->root_worker; + worker->next = worker->prev->next; + worker->prev->next = worker->next->prev = worker; +} + +static void kick_append_error(grpc_error **composite, grpc_error *error) { + if (error == GRPC_ERROR_NONE) return; + if (*composite == GRPC_ERROR_NONE) { + *composite = GRPC_ERROR_CREATE_FROM_STATIC_STRING("Kick Failure"); + } + *composite = grpc_error_add_child(*composite, error); +} + +static grpc_error *pollset_kick_ext(grpc_exec_ctx *exec_ctx, grpc_pollset *p, + grpc_pollset_worker *specific_worker, + uint32_t flags) { + GPR_TIMER_BEGIN("pollset_kick_ext", 0); + grpc_error *error = GRPC_ERROR_NONE; + GRPC_STATS_INC_POLLSET_KICK(exec_ctx); + + /* pollset->mu already held */ + if (specific_worker != NULL) { + if (specific_worker == GRPC_POLLSET_KICK_BROADCAST) { + GPR_TIMER_BEGIN("pollset_kick_ext.broadcast", 0); + GPR_ASSERT((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) == 0); + for (specific_worker = p->root_worker.next; + specific_worker != &p->root_worker; + specific_worker = specific_worker->next) { + kick_append_error( + &error, grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd)); + } + p->kicked_without_pollers = true; + GPR_TIMER_END("pollset_kick_ext.broadcast", 0); + } else if (gpr_tls_get(&g_current_thread_worker) != + (intptr_t)specific_worker) { + GPR_TIMER_MARK("different_thread_worker", 0); + if ((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) != 0) { + specific_worker->reevaluate_polling_on_wakeup = true; + } + specific_worker->kicked_specifically = true; + kick_append_error(&error, + grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd)); + } else if ((flags & GRPC_POLLSET_CAN_KICK_SELF) != 0) { + GPR_TIMER_MARK("kick_yoself", 0); + if ((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) != 0) { + specific_worker->reevaluate_polling_on_wakeup = true; + } + specific_worker->kicked_specifically = true; + kick_append_error(&error, + grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd)); + } + } else if (gpr_tls_get(&g_current_thread_poller) != (intptr_t)p) { + GPR_ASSERT((flags & GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) == 0); + GPR_TIMER_MARK("kick_anonymous", 0); + specific_worker = pop_front_worker(p); + if (specific_worker != NULL) { + if (gpr_tls_get(&g_current_thread_worker) == (intptr_t)specific_worker) { + GPR_TIMER_MARK("kick_anonymous_not_self", 0); + push_back_worker(p, specific_worker); + specific_worker = pop_front_worker(p); + if ((flags & GRPC_POLLSET_CAN_KICK_SELF) == 0 && + gpr_tls_get(&g_current_thread_worker) == + (intptr_t)specific_worker) { + push_back_worker(p, specific_worker); + specific_worker = NULL; + } + } + if (specific_worker != NULL) { + GPR_TIMER_MARK("finally_kick", 0); + push_back_worker(p, specific_worker); + kick_append_error( + &error, grpc_wakeup_fd_wakeup(&specific_worker->wakeup_fd->fd)); + } + } else { + GPR_TIMER_MARK("kicked_no_pollers", 0); + p->kicked_without_pollers = true; + } + } + + GPR_TIMER_END("pollset_kick_ext", 0); + GRPC_LOG_IF_ERROR("pollset_kick_ext", GRPC_ERROR_REF(error)); + return error; +} + +static grpc_error *pollset_kick(grpc_exec_ctx *exec_ctx, grpc_pollset *p, + grpc_pollset_worker *specific_worker) { + return pollset_kick_ext(exec_ctx, p, specific_worker, 0); +} + +/* global state management */ + +static grpc_error *pollset_global_init(void) { + gpr_tls_init(&g_current_thread_poller); + gpr_tls_init(&g_current_thread_worker); + return GRPC_ERROR_NONE; +} + +static void pollset_global_shutdown(void) { + gpr_tls_destroy(&g_current_thread_poller); + gpr_tls_destroy(&g_current_thread_worker); +} + +/* main interface */ + +static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) { + gpr_mu_init(&pollset->mu); + *mu = &pollset->mu; + pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker; + pollset->shutting_down = 0; + pollset->called_shutdown = 0; + pollset->kicked_without_pollers = 0; + pollset->idle_jobs.head = pollset->idle_jobs.tail = NULL; + pollset->local_wakeup_cache = NULL; + pollset->kicked_without_pollers = 0; + pollset->fd_count = 0; + pollset->fd_capacity = 0; + pollset->fds = NULL; + pollset->pollset_set_count = 0; +} + +static void pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) { + GPR_ASSERT(!pollset_has_workers(pollset)); + GPR_ASSERT(pollset->idle_jobs.head == pollset->idle_jobs.tail); + while (pollset->local_wakeup_cache) { + grpc_cached_wakeup_fd *next = pollset->local_wakeup_cache->next; + grpc_wakeup_fd_destroy(&pollset->local_wakeup_cache->fd); + gpr_free(pollset->local_wakeup_cache); + pollset->local_wakeup_cache = next; + } + gpr_free(pollset->fds); + gpr_mu_destroy(&pollset->mu); +} + +static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_fd *fd) { + gpr_mu_lock(&pollset->mu); + size_t i; + /* TODO(ctiller): this is O(num_fds^2); maybe switch to a hash set here */ + for (i = 0; i < pollset->fd_count; i++) { + if (pollset->fds[i] == fd) goto exit; + } + if (pollset->fd_count == pollset->fd_capacity) { + pollset->fd_capacity = + GPR_MAX(pollset->fd_capacity + 8, pollset->fd_count * 3 / 2); + pollset->fds = (grpc_fd **)gpr_realloc( + pollset->fds, sizeof(grpc_fd *) * pollset->fd_capacity); + } + pollset->fds[pollset->fd_count++] = fd; + GRPC_FD_REF(fd, "multipoller"); + pollset_kick(exec_ctx, pollset, NULL); +exit: + gpr_mu_unlock(&pollset->mu); +} + +static void finish_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) { + GPR_ASSERT(grpc_closure_list_empty(pollset->idle_jobs)); + size_t i; + for (i = 0; i < pollset->fd_count; i++) { + GRPC_FD_UNREF(pollset->fds[i], "multipoller"); + } + pollset->fd_count = 0; + GRPC_CLOSURE_SCHED(exec_ctx, pollset->shutdown_done, GRPC_ERROR_NONE); +} + +static void work_combine_error(grpc_error **composite, grpc_error *error) { + if (error == GRPC_ERROR_NONE) return; + if (*composite == GRPC_ERROR_NONE) { + *composite = GRPC_ERROR_CREATE_FROM_STATIC_STRING("pollset_work"); + } + *composite = grpc_error_add_child(*composite, error); +} + +static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_pollset_worker **worker_hdl, + gpr_timespec now, gpr_timespec deadline) { + grpc_pollset_worker worker; + if (worker_hdl) *worker_hdl = &worker; + grpc_error *error = GRPC_ERROR_NONE; + + /* Avoid malloc for small number of elements. */ + enum { inline_elements = 96 }; + struct pollfd pollfd_space[inline_elements]; + struct grpc_fd_watcher watcher_space[inline_elements]; + + /* pollset->mu already held */ + int added_worker = 0; + int locked = 1; + int queued_work = 0; + int keep_polling = 0; + GPR_TIMER_BEGIN("pollset_work", 0); + /* this must happen before we (potentially) drop pollset->mu */ + worker.next = worker.prev = NULL; + worker.reevaluate_polling_on_wakeup = 0; + if (pollset->local_wakeup_cache != NULL) { + worker.wakeup_fd = pollset->local_wakeup_cache; + pollset->local_wakeup_cache = worker.wakeup_fd->next; + } else { + worker.wakeup_fd = + (grpc_cached_wakeup_fd *)gpr_malloc(sizeof(*worker.wakeup_fd)); + error = grpc_wakeup_fd_init(&worker.wakeup_fd->fd); + if (error != GRPC_ERROR_NONE) { + GRPC_LOG_IF_ERROR("pollset_work", GRPC_ERROR_REF(error)); + return error; + } + } + worker.kicked_specifically = 0; + /* If there's work waiting for the pollset to be idle, and the + pollset is idle, then do that work */ + if (!pollset_has_workers(pollset) && + !grpc_closure_list_empty(pollset->idle_jobs)) { + GPR_TIMER_MARK("pollset_work.idle_jobs", 0); + GRPC_CLOSURE_LIST_SCHED(exec_ctx, &pollset->idle_jobs); + goto done; + } + /* If we're shutting down then we don't execute any extended work */ + if (pollset->shutting_down) { + GPR_TIMER_MARK("pollset_work.shutting_down", 0); + goto done; + } + /* Start polling, and keep doing so while we're being asked to + re-evaluate our pollers (this allows poll() based pollers to + ensure they don't miss wakeups) */ + keep_polling = 1; + gpr_tls_set(&g_current_thread_poller, (intptr_t)pollset); + while (keep_polling) { + keep_polling = 0; + if (!pollset->kicked_without_pollers) { + if (!added_worker) { + push_front_worker(pollset, &worker); + added_worker = 1; + gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker); + } + GPR_TIMER_BEGIN("maybe_work_and_unlock", 0); +#define POLLOUT_CHECK (POLLOUT | POLLHUP | POLLERR) +#define POLLIN_CHECK (POLLIN | POLLHUP | POLLERR) + + int timeout; + int r; + size_t i, fd_count; + nfds_t pfd_count; + grpc_fd_watcher *watchers; + struct pollfd *pfds; + + timeout = poll_deadline_to_millis_timeout(deadline, now); + + if (pollset->fd_count + 2 <= inline_elements) { + pfds = pollfd_space; + watchers = watcher_space; + } else { + /* Allocate one buffer to hold both pfds and watchers arrays */ + const size_t pfd_size = sizeof(*pfds) * (pollset->fd_count + 2); + const size_t watch_size = sizeof(*watchers) * (pollset->fd_count + 2); + void *buf = gpr_malloc(pfd_size + watch_size); + pfds = (struct pollfd *)buf; + watchers = (grpc_fd_watcher *)(void *)((char *)buf + pfd_size); + } + + fd_count = 0; + pfd_count = 1; + pfds[0].fd = GRPC_WAKEUP_FD_GET_READ_FD(&worker.wakeup_fd->fd); + pfds[0].events = POLLIN; + pfds[0].revents = 0; + for (i = 0; i < pollset->fd_count; i++) { + if (fd_is_orphaned(pollset->fds[i])) { + GRPC_FD_UNREF(pollset->fds[i], "multipoller"); + } else { + pollset->fds[fd_count++] = pollset->fds[i]; + watchers[pfd_count].fd = pollset->fds[i]; + GRPC_FD_REF(watchers[pfd_count].fd, "multipoller_start"); + pfds[pfd_count].fd = pollset->fds[i]->fd; + pfds[pfd_count].revents = 0; + pfd_count++; + } + } + pollset->fd_count = fd_count; + gpr_mu_unlock(&pollset->mu); + + for (i = 1; i < pfd_count; i++) { + grpc_fd *fd = watchers[i].fd; + pfds[i].events = (short)fd_begin_poll(fd, pollset, &worker, POLLIN, + POLLOUT, &watchers[i]); + GRPC_FD_UNREF(fd, "multipoller_start"); + } + + /* TODO(vpai): Consider first doing a 0 timeout poll here to avoid + even going into the blocking annotation if possible */ + GRPC_SCHEDULING_START_BLOCKING_REGION; + GRPC_STATS_INC_SYSCALL_POLL(exec_ctx); + r = grpc_poll_function(pfds, pfd_count, timeout); + GRPC_SCHEDULING_END_BLOCKING_REGION; + + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, "%p poll=%d", pollset, r); + } + + if (r < 0) { + if (errno != EINTR) { + work_combine_error(&error, GRPC_OS_ERROR(errno, "poll")); + } + + for (i = 1; i < pfd_count; i++) { + if (watchers[i].fd == NULL) { + fd_end_poll(exec_ctx, &watchers[i], 0, 0, NULL); + } else { + // Wake up all the file descriptors, if we have an invalid one + // we can identify it on the next pollset_work() + fd_end_poll(exec_ctx, &watchers[i], 1, 1, pollset); + } + } + } else if (r == 0) { + for (i = 1; i < pfd_count; i++) { + fd_end_poll(exec_ctx, &watchers[i], 0, 0, NULL); + } + } else { + if (pfds[0].revents & POLLIN_CHECK) { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, "%p: got_wakeup", pollset); + } + work_combine_error( + &error, grpc_wakeup_fd_consume_wakeup(&worker.wakeup_fd->fd)); + } + for (i = 1; i < pfd_count; i++) { + if (watchers[i].fd == NULL) { + fd_end_poll(exec_ctx, &watchers[i], 0, 0, NULL); + } else { + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_DEBUG, "%p got_event: %d r:%d w:%d [%d]", pollset, + pfds[i].fd, (pfds[i].revents & POLLIN_CHECK) != 0, + (pfds[i].revents & POLLOUT_CHECK) != 0, pfds[i].revents); + } + fd_end_poll(exec_ctx, &watchers[i], pfds[i].revents & POLLIN_CHECK, + pfds[i].revents & POLLOUT_CHECK, pollset); + } + } + } + + if (pfds != pollfd_space) { + /* pfds and watchers are in the same memory block pointed to by pfds */ + gpr_free(pfds); + } + + GPR_TIMER_END("maybe_work_and_unlock", 0); + locked = 0; + } else { + GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0); + pollset->kicked_without_pollers = 0; + } + /* Finished execution - start cleaning up. + Note that we may arrive here from outside the enclosing while() loop. + In that case we won't loop though as we haven't added worker to the + worker list, which means nobody could ask us to re-evaluate polling). */ + done: + if (!locked) { + queued_work |= grpc_exec_ctx_flush(exec_ctx); + gpr_mu_lock(&pollset->mu); + locked = 1; + } + /* If we're forced to re-evaluate polling (via pollset_kick with + GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) then we land here and force + a loop */ + if (worker.reevaluate_polling_on_wakeup && error == GRPC_ERROR_NONE) { + worker.reevaluate_polling_on_wakeup = 0; + pollset->kicked_without_pollers = 0; + if (queued_work || worker.kicked_specifically) { + /* If there's queued work on the list, then set the deadline to be + immediate so we get back out of the polling loop quickly */ + deadline = gpr_inf_past(GPR_CLOCK_MONOTONIC); + } + keep_polling = 1; + } + if (keep_polling) { + now = gpr_now(now.clock_type); + } + } + gpr_tls_set(&g_current_thread_poller, 0); + if (added_worker) { + remove_worker(pollset, &worker); + gpr_tls_set(&g_current_thread_worker, 0); + } + /* release wakeup fd to the local pool */ + worker.wakeup_fd->next = pollset->local_wakeup_cache; + pollset->local_wakeup_cache = worker.wakeup_fd; + /* check shutdown conditions */ + if (pollset->shutting_down) { + if (pollset_has_workers(pollset)) { + pollset_kick(exec_ctx, pollset, NULL); + } else if (!pollset->called_shutdown && !pollset_has_observers(pollset)) { + pollset->called_shutdown = 1; + gpr_mu_unlock(&pollset->mu); + finish_shutdown(exec_ctx, pollset); + grpc_exec_ctx_flush(exec_ctx); + /* Continuing to access pollset here is safe -- it is the caller's + * responsibility to not destroy when it has outstanding calls to + * pollset_work. + * TODO(dklempner): Can we refactor the shutdown logic to avoid this? */ + gpr_mu_lock(&pollset->mu); + } else if (!grpc_closure_list_empty(pollset->idle_jobs)) { + GRPC_CLOSURE_LIST_SCHED(exec_ctx, &pollset->idle_jobs); + gpr_mu_unlock(&pollset->mu); + grpc_exec_ctx_flush(exec_ctx); + gpr_mu_lock(&pollset->mu); + } + } + if (worker_hdl) *worker_hdl = NULL; + GPR_TIMER_END("pollset_work", 0); + GRPC_LOG_IF_ERROR("pollset_work", GRPC_ERROR_REF(error)); + return error; +} + +static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_closure *closure) { + GPR_ASSERT(!pollset->shutting_down); + pollset->shutting_down = 1; + pollset->shutdown_done = closure; + pollset_kick(exec_ctx, pollset, GRPC_POLLSET_KICK_BROADCAST); + if (!pollset_has_workers(pollset)) { + GRPC_CLOSURE_LIST_SCHED(exec_ctx, &pollset->idle_jobs); + } + if (!pollset->called_shutdown && !pollset_has_observers(pollset)) { + pollset->called_shutdown = 1; + finish_shutdown(exec_ctx, pollset); + } +} + +static int poll_deadline_to_millis_timeout(gpr_timespec deadline, + gpr_timespec now) { + gpr_timespec timeout; + static const int64_t max_spin_polling_us = 10; + if (gpr_time_cmp(deadline, gpr_inf_future(deadline.clock_type)) == 0) { + return -1; + } + if (gpr_time_cmp(deadline, gpr_time_add(now, gpr_time_from_micros( + max_spin_polling_us, + GPR_TIMESPAN))) <= 0) { + return 0; + } + timeout = gpr_time_sub(deadline, now); + return gpr_time_to_millis(gpr_time_add( + timeout, gpr_time_from_nanos(GPR_NS_PER_MS - 1, GPR_TIMESPAN))); +} + +/******************************************************************************* + * pollset_set_posix.c + */ + +static grpc_pollset_set *pollset_set_create(void) { + grpc_pollset_set *pollset_set = + (grpc_pollset_set *)gpr_zalloc(sizeof(*pollset_set)); + gpr_mu_init(&pollset_set->mu); + return pollset_set; +} + +static void pollset_set_destroy(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pollset_set) { + size_t i; + gpr_mu_destroy(&pollset_set->mu); + for (i = 0; i < pollset_set->fd_count; i++) { + GRPC_FD_UNREF(pollset_set->fds[i], "pollset_set"); + } + for (i = 0; i < pollset_set->pollset_count; i++) { + grpc_pollset *pollset = pollset_set->pollsets[i]; + gpr_mu_lock(&pollset->mu); + pollset->pollset_set_count--; + /* check shutdown */ + if (pollset->shutting_down && !pollset->called_shutdown && + !pollset_has_observers(pollset)) { + pollset->called_shutdown = 1; + gpr_mu_unlock(&pollset->mu); + finish_shutdown(exec_ctx, pollset); + } else { + gpr_mu_unlock(&pollset->mu); + } + } + gpr_free(pollset_set->pollsets); + gpr_free(pollset_set->pollset_sets); + gpr_free(pollset_set->fds); + gpr_free(pollset_set); +} + +static void pollset_set_add_pollset(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pollset_set, + grpc_pollset *pollset) { + size_t i, j; + gpr_mu_lock(&pollset->mu); + pollset->pollset_set_count++; + gpr_mu_unlock(&pollset->mu); + gpr_mu_lock(&pollset_set->mu); + if (pollset_set->pollset_count == pollset_set->pollset_capacity) { + pollset_set->pollset_capacity = + GPR_MAX(8, 2 * pollset_set->pollset_capacity); + pollset_set->pollsets = (grpc_pollset **)gpr_realloc( + pollset_set->pollsets, + pollset_set->pollset_capacity * sizeof(*pollset_set->pollsets)); + } + pollset_set->pollsets[pollset_set->pollset_count++] = pollset; + for (i = 0, j = 0; i < pollset_set->fd_count; i++) { + if (fd_is_orphaned(pollset_set->fds[i])) { + GRPC_FD_UNREF(pollset_set->fds[i], "pollset_set"); + } else { + pollset_add_fd(exec_ctx, pollset, pollset_set->fds[i]); + pollset_set->fds[j++] = pollset_set->fds[i]; + } + } + pollset_set->fd_count = j; + gpr_mu_unlock(&pollset_set->mu); +} + +static void pollset_set_del_pollset(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pollset_set, + grpc_pollset *pollset) { + size_t i; + gpr_mu_lock(&pollset_set->mu); + for (i = 0; i < pollset_set->pollset_count; i++) { + if (pollset_set->pollsets[i] == pollset) { + pollset_set->pollset_count--; + GPR_SWAP(grpc_pollset *, pollset_set->pollsets[i], + pollset_set->pollsets[pollset_set->pollset_count]); + break; + } + } + gpr_mu_unlock(&pollset_set->mu); + gpr_mu_lock(&pollset->mu); + pollset->pollset_set_count--; + /* check shutdown */ + if (pollset->shutting_down && !pollset->called_shutdown && + !pollset_has_observers(pollset)) { + pollset->called_shutdown = 1; + gpr_mu_unlock(&pollset->mu); + finish_shutdown(exec_ctx, pollset); + } else { + gpr_mu_unlock(&pollset->mu); + } +} + +static void pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *bag, + grpc_pollset_set *item) { + size_t i, j; + gpr_mu_lock(&bag->mu); + if (bag->pollset_set_count == bag->pollset_set_capacity) { + bag->pollset_set_capacity = GPR_MAX(8, 2 * bag->pollset_set_capacity); + bag->pollset_sets = (grpc_pollset_set **)gpr_realloc( + bag->pollset_sets, + bag->pollset_set_capacity * sizeof(*bag->pollset_sets)); + } + bag->pollset_sets[bag->pollset_set_count++] = item; + for (i = 0, j = 0; i < bag->fd_count; i++) { + if (fd_is_orphaned(bag->fds[i])) { + GRPC_FD_UNREF(bag->fds[i], "pollset_set"); + } else { + pollset_set_add_fd(exec_ctx, item, bag->fds[i]); + bag->fds[j++] = bag->fds[i]; + } + } + bag->fd_count = j; + gpr_mu_unlock(&bag->mu); +} + +static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *bag, + grpc_pollset_set *item) { + size_t i; + gpr_mu_lock(&bag->mu); + for (i = 0; i < bag->pollset_set_count; i++) { + if (bag->pollset_sets[i] == item) { + bag->pollset_set_count--; + GPR_SWAP(grpc_pollset_set *, bag->pollset_sets[i], + bag->pollset_sets[bag->pollset_set_count]); + break; + } + } + gpr_mu_unlock(&bag->mu); +} + +static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pollset_set, grpc_fd *fd) { + size_t i; + gpr_mu_lock(&pollset_set->mu); + if (pollset_set->fd_count == pollset_set->fd_capacity) { + pollset_set->fd_capacity = GPR_MAX(8, 2 * pollset_set->fd_capacity); + pollset_set->fds = (grpc_fd **)gpr_realloc( + pollset_set->fds, pollset_set->fd_capacity * sizeof(*pollset_set->fds)); + } + GRPC_FD_REF(fd, "pollset_set"); + pollset_set->fds[pollset_set->fd_count++] = fd; + for (i = 0; i < pollset_set->pollset_count; i++) { + pollset_add_fd(exec_ctx, pollset_set->pollsets[i], fd); + } + for (i = 0; i < pollset_set->pollset_set_count; i++) { + pollset_set_add_fd(exec_ctx, pollset_set->pollset_sets[i], fd); + } + gpr_mu_unlock(&pollset_set->mu); +} + +static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pollset_set, grpc_fd *fd) { + size_t i; + gpr_mu_lock(&pollset_set->mu); + for (i = 0; i < pollset_set->fd_count; i++) { + if (pollset_set->fds[i] == fd) { + pollset_set->fd_count--; + GPR_SWAP(grpc_fd *, pollset_set->fds[i], + pollset_set->fds[pollset_set->fd_count]); + GRPC_FD_UNREF(fd, "pollset_set"); + break; + } + } + for (i = 0; i < pollset_set->pollset_set_count; i++) { + pollset_set_del_fd(exec_ctx, pollset_set->pollset_sets[i], fd); + } + gpr_mu_unlock(&pollset_set->mu); +} + +/******************************************************************************* + * Condition Variable polling extensions + */ + +static void run_poll(void *args); +static void cache_poller_locked(poll_args *args); + +static void cache_insert_locked(poll_args *args) { + uint32_t key = gpr_murmur_hash3(args->fds, args->nfds * sizeof(struct pollfd), + 0xDEADBEEF); + key = key % poll_cache.size; + if (poll_cache.active_pollers[key]) { + poll_cache.active_pollers[key]->prev = args; + } + args->next = poll_cache.active_pollers[key]; + args->prev = NULL; + poll_cache.active_pollers[key] = args; + poll_cache.count++; +} + +static void init_result(poll_args *pargs) { + pargs->result = (poll_result *)gpr_malloc(sizeof(poll_result)); + gpr_ref_init(&pargs->result->refcount, 1); + pargs->result->watchers = NULL; + pargs->result->watchcount = 0; + pargs->result->fds = + (struct pollfd *)gpr_malloc(sizeof(struct pollfd) * pargs->nfds); + memcpy(pargs->result->fds, pargs->fds, sizeof(struct pollfd) * pargs->nfds); + pargs->result->nfds = pargs->nfds; + pargs->result->retval = 0; + pargs->result->err = 0; + pargs->result->completed = 0; +} + +// Creates a poll_args object for a given arguments to poll(). +// This object may return a poll_args in the cache. +static poll_args *get_poller_locked(struct pollfd *fds, nfds_t count) { + uint32_t key = + gpr_murmur_hash3(fds, count * sizeof(struct pollfd), 0xDEADBEEF); + key = key % poll_cache.size; + poll_args *curr = poll_cache.active_pollers[key]; + while (curr) { + if (curr->nfds == count && + memcmp(curr->fds, fds, count * sizeof(struct pollfd)) == 0) { + gpr_free(fds); + return curr; + } + curr = curr->next; + } + + if (poll_cache.free_pollers) { + poll_args *pargs = poll_cache.free_pollers; + poll_cache.free_pollers = pargs->next; + if (poll_cache.free_pollers) { + poll_cache.free_pollers->prev = NULL; + } + pargs->fds = fds; + pargs->nfds = count; + pargs->next = NULL; + pargs->prev = NULL; + init_result(pargs); + cache_poller_locked(pargs); + return pargs; + } + + poll_args *pargs = (poll_args *)gpr_malloc(sizeof(struct poll_args)); + gpr_cv_init(&pargs->trigger); + pargs->fds = fds; + pargs->nfds = count; + pargs->next = NULL; + pargs->prev = NULL; + pargs->trigger_set = 0; + init_result(pargs); + cache_poller_locked(pargs); + gpr_thd_id t_id; + gpr_thd_options opt = gpr_thd_options_default(); + gpr_ref(&g_cvfds.pollcount); + gpr_thd_options_set_detached(&opt); + GPR_ASSERT(gpr_thd_new(&t_id, &run_poll, pargs, &opt)); + return pargs; +} + +static void cache_delete_locked(poll_args *args) { + if (!args->prev) { + uint32_t key = gpr_murmur_hash3( + args->fds, args->nfds * sizeof(struct pollfd), 0xDEADBEEF); + key = key % poll_cache.size; + GPR_ASSERT(poll_cache.active_pollers[key] == args); + poll_cache.active_pollers[key] = args->next; + } else { + args->prev->next = args->next; + } + + if (args->next) { + args->next->prev = args->prev; + } + + poll_cache.count--; + if (poll_cache.free_pollers) { + poll_cache.free_pollers->prev = args; + } + args->prev = NULL; + args->next = poll_cache.free_pollers; + gpr_free(args->fds); + poll_cache.free_pollers = args; +} + +static void cache_poller_locked(poll_args *args) { + if (poll_cache.count + 1 > poll_cache.size / 2) { + poll_args **old_active_pollers = poll_cache.active_pollers; + poll_cache.size = poll_cache.size * 2; + poll_cache.count = 0; + poll_cache.active_pollers = + (poll_args **)gpr_malloc(sizeof(void *) * poll_cache.size); + for (unsigned int i = 0; i < poll_cache.size; i++) { + poll_cache.active_pollers[i] = NULL; + } + for (unsigned int i = 0; i < poll_cache.size / 2; i++) { + poll_args *curr = old_active_pollers[i]; + poll_args *next = NULL; + while (curr) { + next = curr->next; + cache_insert_locked(curr); + curr = next; + } + } + gpr_free(old_active_pollers); + } + + cache_insert_locked(args); +} + +static void cache_destroy_locked(poll_args *args) { + if (args->next) { + args->next->prev = args->prev; + } + + if (args->prev) { + args->prev->next = args->next; + } else { + poll_cache.free_pollers = args->next; + } + + gpr_free(args); +} + +static void decref_poll_result(poll_result *res) { + if (gpr_unref(&res->refcount)) { + GPR_ASSERT(!res->watchers); + gpr_free(res->fds); + gpr_free(res); + } +} + +void remove_cvn(cv_node **head, cv_node *target) { + if (target->next) { + target->next->prev = target->prev; + } + + if (target->prev) { + target->prev->next = target->next; + } else { + *head = target->next; + } +} + +gpr_timespec thread_grace; + +// Poll in a background thread +static void run_poll(void *args) { + poll_args *pargs = (poll_args *)args; + while (1) { + poll_result *result = pargs->result; + int retval = g_cvfds.poll(result->fds, result->nfds, CV_POLL_PERIOD_MS); + gpr_mu_lock(&g_cvfds.mu); + if (retval != 0) { + result->completed = 1; + result->retval = retval; + result->err = errno; + cv_node *watcher = result->watchers; + while (watcher) { + gpr_cv_signal(watcher->cv); + watcher = watcher->next; + } + } + if (result->watchcount == 0 || result->completed) { + cache_delete_locked(pargs); + decref_poll_result(result); + // Leave this polling thread alive for a grace period to do another poll() + // op + gpr_timespec deadline = gpr_now(GPR_CLOCK_REALTIME); + deadline = gpr_time_add(deadline, thread_grace); + pargs->trigger_set = 0; + gpr_cv_wait(&pargs->trigger, &g_cvfds.mu, deadline); + if (!pargs->trigger_set) { + cache_destroy_locked(pargs); + break; + } + } + gpr_mu_unlock(&g_cvfds.mu); + } + + // We still have the lock here + if (gpr_unref(&g_cvfds.pollcount)) { + gpr_cv_signal(&g_cvfds.shutdown_cv); + } + gpr_mu_unlock(&g_cvfds.mu); +} + +// This function overrides poll() to handle condition variable wakeup fds +static int cvfd_poll(struct pollfd *fds, nfds_t nfds, int timeout) { + unsigned int i; + int res, idx; + cv_node *pollcv; + int skip_poll = 0; + nfds_t nsockfds = 0; + poll_result *result = NULL; + gpr_mu_lock(&g_cvfds.mu); + pollcv = (cv_node *)gpr_malloc(sizeof(cv_node)); + pollcv->next = NULL; + gpr_cv pollcv_cv; + gpr_cv_init(&pollcv_cv); + pollcv->cv = &pollcv_cv; + cv_node *fd_cvs = (cv_node *)gpr_malloc(nfds * sizeof(cv_node)); + + for (i = 0; i < nfds; i++) { + fds[i].revents = 0; + if (fds[i].fd < 0 && (fds[i].events & POLLIN)) { + idx = GRPC_FD_TO_IDX(fds[i].fd); + fd_cvs[i].cv = &pollcv_cv; + fd_cvs[i].prev = NULL; + fd_cvs[i].next = g_cvfds.cvfds[idx].cvs; + if (g_cvfds.cvfds[idx].cvs) { + g_cvfds.cvfds[idx].cvs->prev = &(fd_cvs[i]); + } + g_cvfds.cvfds[idx].cvs = &(fd_cvs[i]); + // Don't bother polling if a wakeup fd is ready + if (g_cvfds.cvfds[idx].is_set) { + skip_poll = 1; + } + } else if (fds[i].fd >= 0) { + nsockfds++; + } + } + + gpr_timespec deadline = gpr_now(GPR_CLOCK_REALTIME); + if (timeout < 0) { + deadline = gpr_inf_future(GPR_CLOCK_REALTIME); + } else { + deadline = + gpr_time_add(deadline, gpr_time_from_millis(timeout, GPR_TIMESPAN)); + } + + res = 0; + if (!skip_poll && nsockfds > 0) { + struct pollfd *pollfds = + (struct pollfd *)gpr_malloc(sizeof(struct pollfd) * nsockfds); + idx = 0; + for (i = 0; i < nfds; i++) { + if (fds[i].fd >= 0) { + pollfds[idx].fd = fds[i].fd; + pollfds[idx].events = fds[i].events; + pollfds[idx].revents = 0; + idx++; + } + } + poll_args *pargs = get_poller_locked(pollfds, nsockfds); + result = pargs->result; + pollcv->next = result->watchers; + pollcv->prev = NULL; + if (result->watchers) { + result->watchers->prev = pollcv; + } + result->watchers = pollcv; + result->watchcount++; + gpr_ref(&result->refcount); + + pargs->trigger_set = 1; + gpr_cv_signal(&pargs->trigger); + gpr_cv_wait(&pollcv_cv, &g_cvfds.mu, deadline); + res = result->retval; + errno = result->err; + result->watchcount--; + remove_cvn(&result->watchers, pollcv); + } else if (!skip_poll) { + gpr_cv_wait(&pollcv_cv, &g_cvfds.mu, deadline); + } + + idx = 0; + for (i = 0; i < nfds; i++) { + if (fds[i].fd < 0 && (fds[i].events & POLLIN)) { + remove_cvn(&g_cvfds.cvfds[GRPC_FD_TO_IDX(fds[i].fd)].cvs, &(fd_cvs[i])); + if (g_cvfds.cvfds[GRPC_FD_TO_IDX(fds[i].fd)].is_set) { + fds[i].revents = POLLIN; + if (res >= 0) res++; + } + } else if (!skip_poll && fds[i].fd >= 0 && result->completed) { + fds[i].revents = result->fds[idx].revents; + idx++; + } + } + + gpr_free(fd_cvs); + gpr_free(pollcv); + if (result) { + decref_poll_result(result); + } + + gpr_mu_unlock(&g_cvfds.mu); + + return res; +} + +static void global_cv_fd_table_init() { + gpr_mu_init(&g_cvfds.mu); + gpr_mu_lock(&g_cvfds.mu); + gpr_cv_init(&g_cvfds.shutdown_cv); + gpr_ref_init(&g_cvfds.pollcount, 1); + g_cvfds.size = CV_DEFAULT_TABLE_SIZE; + g_cvfds.cvfds = + (fd_node *)gpr_malloc(sizeof(fd_node) * CV_DEFAULT_TABLE_SIZE); + g_cvfds.free_fds = NULL; + thread_grace = gpr_time_from_millis(POLLCV_THREAD_GRACE_MS, GPR_TIMESPAN); + for (int i = 0; i < CV_DEFAULT_TABLE_SIZE; i++) { + g_cvfds.cvfds[i].is_set = 0; + g_cvfds.cvfds[i].cvs = NULL; + g_cvfds.cvfds[i].next_free = g_cvfds.free_fds; + g_cvfds.free_fds = &g_cvfds.cvfds[i]; + } + // Override the poll function with one that supports cvfds + g_cvfds.poll = grpc_poll_function; + grpc_poll_function = &cvfd_poll; + + // Initialize the cache + poll_cache.size = 32; + poll_cache.count = 0; + poll_cache.free_pollers = NULL; + poll_cache.active_pollers = (poll_args **)gpr_malloc(sizeof(void *) * 32); + for (unsigned int i = 0; i < poll_cache.size; i++) { + poll_cache.active_pollers[i] = NULL; + } + + gpr_mu_unlock(&g_cvfds.mu); +} + +static void global_cv_fd_table_shutdown() { + gpr_mu_lock(&g_cvfds.mu); + // Attempt to wait for all abandoned poll() threads to terminate + // Not doing so will result in reported memory leaks + if (!gpr_unref(&g_cvfds.pollcount)) { + int res = gpr_cv_wait(&g_cvfds.shutdown_cv, &g_cvfds.mu, + gpr_time_add(gpr_now(GPR_CLOCK_REALTIME), + gpr_time_from_seconds(3, GPR_TIMESPAN))); + GPR_ASSERT(res == 0); + } + gpr_cv_destroy(&g_cvfds.shutdown_cv); + grpc_poll_function = g_cvfds.poll; + gpr_free(g_cvfds.cvfds); + + gpr_free(poll_cache.active_pollers); + + gpr_mu_unlock(&g_cvfds.mu); + gpr_mu_destroy(&g_cvfds.mu); +} + +/******************************************************************************* + * event engine binding + */ + +static void shutdown_engine(void) { + pollset_global_shutdown(); + if (grpc_cv_wakeup_fds_enabled()) { + global_cv_fd_table_shutdown(); + } +} + +static const grpc_event_engine_vtable vtable = { + sizeof(grpc_pollset), + + fd_create, + fd_wrapped_fd, + fd_orphan, + fd_shutdown, + fd_notify_on_read, + fd_notify_on_write, + fd_is_shutdown, + fd_get_read_notifier_pollset, + + pollset_init, + pollset_shutdown, + pollset_destroy, + pollset_work, + pollset_kick, + pollset_add_fd, + + pollset_set_create, + pollset_set_destroy, + pollset_set_add_pollset, + pollset_set_del_pollset, + pollset_set_add_pollset_set, + pollset_set_del_pollset_set, + pollset_set_add_fd, + pollset_set_del_fd, + + shutdown_engine, +}; + +const grpc_event_engine_vtable *grpc_init_poll_posix(bool explicit_request) { + if (!grpc_has_wakeup_fd()) { + return NULL; + } + if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) { + return NULL; + } + return &vtable; +} + +const grpc_event_engine_vtable *grpc_init_poll_cv_posix(bool explicit_request) { + global_cv_fd_table_init(); + grpc_enable_cv_wakeup_fds(1); + if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) { + global_cv_fd_table_shutdown(); + grpc_enable_cv_wakeup_fds(0); + return NULL; + } + return &vtable; +} + +#endif diff --git a/src/core/lib/iomgr/ev_posix.c b/src/core/lib/iomgr/ev_posix.c deleted file mode 100644 index 4d3ae2228e..0000000000 --- a/src/core/lib/iomgr/ev_posix.c +++ /dev/null @@ -1,266 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_POSIX_SOCKET - -#include "src/core/lib/iomgr/ev_posix.h" - -#include - -#include -#include -#include -#include - -#include "src/core/lib/debug/trace.h" -#include "src/core/lib/iomgr/ev_epoll1_linux.h" -#include "src/core/lib/iomgr/ev_epollex_linux.h" -#include "src/core/lib/iomgr/ev_epollsig_linux.h" -#include "src/core/lib/iomgr/ev_poll_posix.h" -#include "src/core/lib/support/env.h" - -grpc_tracer_flag grpc_polling_trace = - GRPC_TRACER_INITIALIZER(false, "polling"); /* Disabled by default */ - -#ifndef NDEBUG -grpc_tracer_flag grpc_trace_fd_refcount = - GRPC_TRACER_INITIALIZER(false, "fd_refcount"); -#endif - -/** Default poll() function - a pointer so that it can be overridden by some - * tests */ -grpc_poll_function_type grpc_poll_function = poll; - -grpc_wakeup_fd grpc_global_wakeup_fd; - -static const grpc_event_engine_vtable *g_event_engine; -static const char *g_poll_strategy_name = NULL; - -typedef const grpc_event_engine_vtable *(*event_engine_factory_fn)( - bool explicit_request); - -typedef struct { - const char *name; - event_engine_factory_fn factory; -} event_engine_factory; - -static const event_engine_factory g_factories[] = { - {"epoll1", grpc_init_epoll1_linux}, - {"epollsig", grpc_init_epollsig_linux}, - {"poll", grpc_init_poll_posix}, - {"poll-cv", grpc_init_poll_cv_posix}, - {"epollex", grpc_init_epollex_linux}, -}; - -static void add(const char *beg, const char *end, char ***ss, size_t *ns) { - size_t n = *ns; - size_t np = n + 1; - char *s; - size_t len; - GPR_ASSERT(end >= beg); - len = (size_t)(end - beg); - s = (char *)gpr_malloc(len + 1); - memcpy(s, beg, len); - s[len] = 0; - *ss = (char **)gpr_realloc(*ss, sizeof(char **) * np); - (*ss)[n] = s; - *ns = np; -} - -static void split(const char *s, char ***ss, size_t *ns) { - const char *c = strchr(s, ','); - if (c == NULL) { - add(s, s + strlen(s), ss, ns); - } else { - add(s, c, ss, ns); - split(c + 1, ss, ns); - } -} - -static bool is(const char *want, const char *have) { - return 0 == strcmp(want, "all") || 0 == strcmp(want, have); -} - -static void try_engine(const char *engine) { - for (size_t i = 0; i < GPR_ARRAY_SIZE(g_factories); i++) { - if (is(engine, g_factories[i].name)) { - if ((g_event_engine = g_factories[i].factory( - 0 == strcmp(engine, g_factories[i].name)))) { - g_poll_strategy_name = g_factories[i].name; - gpr_log(GPR_DEBUG, "Using polling engine: %s", g_factories[i].name); - return; - } - } - } -} - -/* This should be used for testing purposes ONLY */ -void grpc_set_event_engine_test_only( - const grpc_event_engine_vtable *ev_engine) { - g_event_engine = ev_engine; -} - -const grpc_event_engine_vtable *grpc_get_event_engine_test_only() { - return g_event_engine; -} - -/* Call this only after calling grpc_event_engine_init() */ -const char *grpc_get_poll_strategy_name() { return g_poll_strategy_name; } - -void grpc_event_engine_init(void) { - grpc_register_tracer(&grpc_polling_trace); - - char *s = gpr_getenv("GRPC_POLL_STRATEGY"); - if (s == NULL) { - s = gpr_strdup("all"); - } - - char **strings = NULL; - size_t nstrings = 0; - split(s, &strings, &nstrings); - - for (size_t i = 0; g_event_engine == NULL && i < nstrings; i++) { - try_engine(strings[i]); - } - - for (size_t i = 0; i < nstrings; i++) { - gpr_free(strings[i]); - } - gpr_free(strings); - gpr_free(s); - - if (g_event_engine == NULL) { - gpr_log(GPR_ERROR, "No event engine could be initialized"); - abort(); - } -} - -void grpc_event_engine_shutdown(void) { - g_event_engine->shutdown_engine(); - g_event_engine = NULL; -} - -grpc_fd *grpc_fd_create(int fd, const char *name) { - return g_event_engine->fd_create(fd, name); -} - -int grpc_fd_wrapped_fd(grpc_fd *fd) { - return g_event_engine->fd_wrapped_fd(fd); -} - -void grpc_fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_closure *on_done, - int *release_fd, bool already_closed, const char *reason) { - g_event_engine->fd_orphan(exec_ctx, fd, on_done, release_fd, already_closed, - reason); -} - -void grpc_fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_error *why) { - g_event_engine->fd_shutdown(exec_ctx, fd, why); -} - -bool grpc_fd_is_shutdown(grpc_fd *fd) { - return g_event_engine->fd_is_shutdown(fd); -} - -void grpc_fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure *closure) { - g_event_engine->fd_notify_on_read(exec_ctx, fd, closure); -} - -void grpc_fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd, - grpc_closure *closure) { - g_event_engine->fd_notify_on_write(exec_ctx, fd, closure); -} - -size_t grpc_pollset_size(void) { return g_event_engine->pollset_size; } - -void grpc_pollset_init(grpc_pollset *pollset, gpr_mu **mu) { - g_event_engine->pollset_init(pollset, mu); -} - -void grpc_pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_closure *closure) { - g_event_engine->pollset_shutdown(exec_ctx, pollset, closure); -} - -void grpc_pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) { - g_event_engine->pollset_destroy(exec_ctx, pollset); -} - -grpc_error *grpc_pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_pollset_worker **worker, gpr_timespec now, - gpr_timespec deadline) { - return g_event_engine->pollset_work(exec_ctx, pollset, worker, now, deadline); -} - -grpc_error *grpc_pollset_kick(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_pollset_worker *specific_worker) { - return g_event_engine->pollset_kick(exec_ctx, pollset, specific_worker); -} - -void grpc_pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - struct grpc_fd *fd) { - g_event_engine->pollset_add_fd(exec_ctx, pollset, fd); -} - -grpc_pollset_set *grpc_pollset_set_create(void) { - return g_event_engine->pollset_set_create(); -} - -void grpc_pollset_set_destroy(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pollset_set) { - g_event_engine->pollset_set_destroy(exec_ctx, pollset_set); -} - -void grpc_pollset_set_add_pollset(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pollset_set, - grpc_pollset *pollset) { - g_event_engine->pollset_set_add_pollset(exec_ctx, pollset_set, pollset); -} - -void grpc_pollset_set_del_pollset(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pollset_set, - grpc_pollset *pollset) { - g_event_engine->pollset_set_del_pollset(exec_ctx, pollset_set, pollset); -} - -void grpc_pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *bag, - grpc_pollset_set *item) { - g_event_engine->pollset_set_add_pollset_set(exec_ctx, bag, item); -} - -void grpc_pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *bag, - grpc_pollset_set *item) { - g_event_engine->pollset_set_del_pollset_set(exec_ctx, bag, item); -} - -void grpc_pollset_set_add_fd(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pollset_set, grpc_fd *fd) { - g_event_engine->pollset_set_add_fd(exec_ctx, pollset_set, fd); -} - -void grpc_pollset_set_del_fd(grpc_exec_ctx *exec_ctx, - grpc_pollset_set *pollset_set, grpc_fd *fd) { - g_event_engine->pollset_set_del_fd(exec_ctx, pollset_set, fd); -} - -#endif // GRPC_POSIX_SOCKET diff --git a/src/core/lib/iomgr/ev_posix.cc b/src/core/lib/iomgr/ev_posix.cc new file mode 100644 index 0000000000..4d3ae2228e --- /dev/null +++ b/src/core/lib/iomgr/ev_posix.cc @@ -0,0 +1,266 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_POSIX_SOCKET + +#include "src/core/lib/iomgr/ev_posix.h" + +#include + +#include +#include +#include +#include + +#include "src/core/lib/debug/trace.h" +#include "src/core/lib/iomgr/ev_epoll1_linux.h" +#include "src/core/lib/iomgr/ev_epollex_linux.h" +#include "src/core/lib/iomgr/ev_epollsig_linux.h" +#include "src/core/lib/iomgr/ev_poll_posix.h" +#include "src/core/lib/support/env.h" + +grpc_tracer_flag grpc_polling_trace = + GRPC_TRACER_INITIALIZER(false, "polling"); /* Disabled by default */ + +#ifndef NDEBUG +grpc_tracer_flag grpc_trace_fd_refcount = + GRPC_TRACER_INITIALIZER(false, "fd_refcount"); +#endif + +/** Default poll() function - a pointer so that it can be overridden by some + * tests */ +grpc_poll_function_type grpc_poll_function = poll; + +grpc_wakeup_fd grpc_global_wakeup_fd; + +static const grpc_event_engine_vtable *g_event_engine; +static const char *g_poll_strategy_name = NULL; + +typedef const grpc_event_engine_vtable *(*event_engine_factory_fn)( + bool explicit_request); + +typedef struct { + const char *name; + event_engine_factory_fn factory; +} event_engine_factory; + +static const event_engine_factory g_factories[] = { + {"epoll1", grpc_init_epoll1_linux}, + {"epollsig", grpc_init_epollsig_linux}, + {"poll", grpc_init_poll_posix}, + {"poll-cv", grpc_init_poll_cv_posix}, + {"epollex", grpc_init_epollex_linux}, +}; + +static void add(const char *beg, const char *end, char ***ss, size_t *ns) { + size_t n = *ns; + size_t np = n + 1; + char *s; + size_t len; + GPR_ASSERT(end >= beg); + len = (size_t)(end - beg); + s = (char *)gpr_malloc(len + 1); + memcpy(s, beg, len); + s[len] = 0; + *ss = (char **)gpr_realloc(*ss, sizeof(char **) * np); + (*ss)[n] = s; + *ns = np; +} + +static void split(const char *s, char ***ss, size_t *ns) { + const char *c = strchr(s, ','); + if (c == NULL) { + add(s, s + strlen(s), ss, ns); + } else { + add(s, c, ss, ns); + split(c + 1, ss, ns); + } +} + +static bool is(const char *want, const char *have) { + return 0 == strcmp(want, "all") || 0 == strcmp(want, have); +} + +static void try_engine(const char *engine) { + for (size_t i = 0; i < GPR_ARRAY_SIZE(g_factories); i++) { + if (is(engine, g_factories[i].name)) { + if ((g_event_engine = g_factories[i].factory( + 0 == strcmp(engine, g_factories[i].name)))) { + g_poll_strategy_name = g_factories[i].name; + gpr_log(GPR_DEBUG, "Using polling engine: %s", g_factories[i].name); + return; + } + } + } +} + +/* This should be used for testing purposes ONLY */ +void grpc_set_event_engine_test_only( + const grpc_event_engine_vtable *ev_engine) { + g_event_engine = ev_engine; +} + +const grpc_event_engine_vtable *grpc_get_event_engine_test_only() { + return g_event_engine; +} + +/* Call this only after calling grpc_event_engine_init() */ +const char *grpc_get_poll_strategy_name() { return g_poll_strategy_name; } + +void grpc_event_engine_init(void) { + grpc_register_tracer(&grpc_polling_trace); + + char *s = gpr_getenv("GRPC_POLL_STRATEGY"); + if (s == NULL) { + s = gpr_strdup("all"); + } + + char **strings = NULL; + size_t nstrings = 0; + split(s, &strings, &nstrings); + + for (size_t i = 0; g_event_engine == NULL && i < nstrings; i++) { + try_engine(strings[i]); + } + + for (size_t i = 0; i < nstrings; i++) { + gpr_free(strings[i]); + } + gpr_free(strings); + gpr_free(s); + + if (g_event_engine == NULL) { + gpr_log(GPR_ERROR, "No event engine could be initialized"); + abort(); + } +} + +void grpc_event_engine_shutdown(void) { + g_event_engine->shutdown_engine(); + g_event_engine = NULL; +} + +grpc_fd *grpc_fd_create(int fd, const char *name) { + return g_event_engine->fd_create(fd, name); +} + +int grpc_fd_wrapped_fd(grpc_fd *fd) { + return g_event_engine->fd_wrapped_fd(fd); +} + +void grpc_fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_closure *on_done, + int *release_fd, bool already_closed, const char *reason) { + g_event_engine->fd_orphan(exec_ctx, fd, on_done, release_fd, already_closed, + reason); +} + +void grpc_fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_error *why) { + g_event_engine->fd_shutdown(exec_ctx, fd, why); +} + +bool grpc_fd_is_shutdown(grpc_fd *fd) { + return g_event_engine->fd_is_shutdown(fd); +} + +void grpc_fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure *closure) { + g_event_engine->fd_notify_on_read(exec_ctx, fd, closure); +} + +void grpc_fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd, + grpc_closure *closure) { + g_event_engine->fd_notify_on_write(exec_ctx, fd, closure); +} + +size_t grpc_pollset_size(void) { return g_event_engine->pollset_size; } + +void grpc_pollset_init(grpc_pollset *pollset, gpr_mu **mu) { + g_event_engine->pollset_init(pollset, mu); +} + +void grpc_pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_closure *closure) { + g_event_engine->pollset_shutdown(exec_ctx, pollset, closure); +} + +void grpc_pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) { + g_event_engine->pollset_destroy(exec_ctx, pollset); +} + +grpc_error *grpc_pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_pollset_worker **worker, gpr_timespec now, + gpr_timespec deadline) { + return g_event_engine->pollset_work(exec_ctx, pollset, worker, now, deadline); +} + +grpc_error *grpc_pollset_kick(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_pollset_worker *specific_worker) { + return g_event_engine->pollset_kick(exec_ctx, pollset, specific_worker); +} + +void grpc_pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + struct grpc_fd *fd) { + g_event_engine->pollset_add_fd(exec_ctx, pollset, fd); +} + +grpc_pollset_set *grpc_pollset_set_create(void) { + return g_event_engine->pollset_set_create(); +} + +void grpc_pollset_set_destroy(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pollset_set) { + g_event_engine->pollset_set_destroy(exec_ctx, pollset_set); +} + +void grpc_pollset_set_add_pollset(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pollset_set, + grpc_pollset *pollset) { + g_event_engine->pollset_set_add_pollset(exec_ctx, pollset_set, pollset); +} + +void grpc_pollset_set_del_pollset(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pollset_set, + grpc_pollset *pollset) { + g_event_engine->pollset_set_del_pollset(exec_ctx, pollset_set, pollset); +} + +void grpc_pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *bag, + grpc_pollset_set *item) { + g_event_engine->pollset_set_add_pollset_set(exec_ctx, bag, item); +} + +void grpc_pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *bag, + grpc_pollset_set *item) { + g_event_engine->pollset_set_del_pollset_set(exec_ctx, bag, item); +} + +void grpc_pollset_set_add_fd(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pollset_set, grpc_fd *fd) { + g_event_engine->pollset_set_add_fd(exec_ctx, pollset_set, fd); +} + +void grpc_pollset_set_del_fd(grpc_exec_ctx *exec_ctx, + grpc_pollset_set *pollset_set, grpc_fd *fd) { + g_event_engine->pollset_set_del_fd(exec_ctx, pollset_set, fd); +} + +#endif // GRPC_POSIX_SOCKET diff --git a/src/core/lib/iomgr/ev_windows.c b/src/core/lib/iomgr/ev_windows.c deleted file mode 100644 index c24dfaeaf7..0000000000 --- a/src/core/lib/iomgr/ev_windows.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_WINSOCK_SOCKET - -#include "src/core/lib/debug/trace.h" - -grpc_tracer_flag grpc_polling_trace = - GRPC_TRACER_INITIALIZER(false, "polling"); /* Disabled by default */ - -#endif // GRPC_WINSOCK_SOCKET diff --git a/src/core/lib/iomgr/ev_windows.cc b/src/core/lib/iomgr/ev_windows.cc new file mode 100644 index 0000000000..c24dfaeaf7 --- /dev/null +++ b/src/core/lib/iomgr/ev_windows.cc @@ -0,0 +1,28 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_WINSOCK_SOCKET + +#include "src/core/lib/debug/trace.h" + +grpc_tracer_flag grpc_polling_trace = + GRPC_TRACER_INITIALIZER(false, "polling"); /* Disabled by default */ + +#endif // GRPC_WINSOCK_SOCKET diff --git a/src/core/lib/iomgr/exec_ctx.c b/src/core/lib/iomgr/exec_ctx.c deleted file mode 100644 index 41c69add17..0000000000 --- a/src/core/lib/iomgr/exec_ctx.c +++ /dev/null @@ -1,113 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/exec_ctx.h" - -#include -#include -#include - -#include "src/core/lib/iomgr/combiner.h" -#include "src/core/lib/profiling/timers.h" - -bool grpc_exec_ctx_ready_to_finish(grpc_exec_ctx *exec_ctx) { - if ((exec_ctx->flags & GRPC_EXEC_CTX_FLAG_IS_FINISHED) == 0) { - if (exec_ctx->check_ready_to_finish(exec_ctx, - exec_ctx->check_ready_to_finish_arg)) { - exec_ctx->flags |= GRPC_EXEC_CTX_FLAG_IS_FINISHED; - return true; - } - return false; - } else { - return true; - } -} - -bool grpc_never_ready_to_finish(grpc_exec_ctx *exec_ctx, void *arg_ignored) { - return false; -} - -bool grpc_always_ready_to_finish(grpc_exec_ctx *exec_ctx, void *arg_ignored) { - return true; -} - -bool grpc_exec_ctx_has_work(grpc_exec_ctx *exec_ctx) { - return exec_ctx->active_combiner != NULL || - !grpc_closure_list_empty(exec_ctx->closure_list); -} - -void grpc_exec_ctx_finish(grpc_exec_ctx *exec_ctx) { - exec_ctx->flags |= GRPC_EXEC_CTX_FLAG_IS_FINISHED; - grpc_exec_ctx_flush(exec_ctx); -} - -static void exec_ctx_run(grpc_exec_ctx *exec_ctx, grpc_closure *closure, - grpc_error *error) { -#ifndef NDEBUG - closure->scheduled = false; - if (GRPC_TRACER_ON(grpc_trace_closure)) { - gpr_log(GPR_DEBUG, "running closure %p: created [%s:%d]: %s [%s:%d]", - closure, closure->file_created, closure->line_created, - closure->run ? "run" : "scheduled", closure->file_initiated, - closure->line_initiated); - } -#endif - closure->cb(exec_ctx, closure->cb_arg, error); -#ifndef NDEBUG - if (GRPC_TRACER_ON(grpc_trace_closure)) { - gpr_log(GPR_DEBUG, "closure %p finished", closure); - } -#endif - GRPC_ERROR_UNREF(error); -} - -bool grpc_exec_ctx_flush(grpc_exec_ctx *exec_ctx) { - bool did_something = 0; - GPR_TIMER_BEGIN("grpc_exec_ctx_flush", 0); - for (;;) { - if (!grpc_closure_list_empty(exec_ctx->closure_list)) { - grpc_closure *c = exec_ctx->closure_list.head; - exec_ctx->closure_list.head = exec_ctx->closure_list.tail = NULL; - while (c != NULL) { - grpc_closure *next = c->next_data.next; - grpc_error *error = c->error_data.error; - did_something = true; - exec_ctx_run(exec_ctx, c, error); - c = next; - } - } else if (!grpc_combiner_continue_exec_ctx(exec_ctx)) { - break; - } - } - GPR_ASSERT(exec_ctx->active_combiner == NULL); - GPR_TIMER_END("grpc_exec_ctx_flush", 0); - return did_something; -} - -static void exec_ctx_sched(grpc_exec_ctx *exec_ctx, grpc_closure *closure, - grpc_error *error) { - grpc_closure_list_append(&exec_ctx->closure_list, closure, error); -} - -void grpc_exec_ctx_global_init(void) {} -void grpc_exec_ctx_global_shutdown(void) {} - -static const grpc_closure_scheduler_vtable exec_ctx_scheduler_vtable = { - exec_ctx_run, exec_ctx_sched, "exec_ctx"}; -static grpc_closure_scheduler exec_ctx_scheduler = {&exec_ctx_scheduler_vtable}; -grpc_closure_scheduler *grpc_schedule_on_exec_ctx = &exec_ctx_scheduler; diff --git a/src/core/lib/iomgr/exec_ctx.cc b/src/core/lib/iomgr/exec_ctx.cc new file mode 100644 index 0000000000..41c69add17 --- /dev/null +++ b/src/core/lib/iomgr/exec_ctx.cc @@ -0,0 +1,113 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/exec_ctx.h" + +#include +#include +#include + +#include "src/core/lib/iomgr/combiner.h" +#include "src/core/lib/profiling/timers.h" + +bool grpc_exec_ctx_ready_to_finish(grpc_exec_ctx *exec_ctx) { + if ((exec_ctx->flags & GRPC_EXEC_CTX_FLAG_IS_FINISHED) == 0) { + if (exec_ctx->check_ready_to_finish(exec_ctx, + exec_ctx->check_ready_to_finish_arg)) { + exec_ctx->flags |= GRPC_EXEC_CTX_FLAG_IS_FINISHED; + return true; + } + return false; + } else { + return true; + } +} + +bool grpc_never_ready_to_finish(grpc_exec_ctx *exec_ctx, void *arg_ignored) { + return false; +} + +bool grpc_always_ready_to_finish(grpc_exec_ctx *exec_ctx, void *arg_ignored) { + return true; +} + +bool grpc_exec_ctx_has_work(grpc_exec_ctx *exec_ctx) { + return exec_ctx->active_combiner != NULL || + !grpc_closure_list_empty(exec_ctx->closure_list); +} + +void grpc_exec_ctx_finish(grpc_exec_ctx *exec_ctx) { + exec_ctx->flags |= GRPC_EXEC_CTX_FLAG_IS_FINISHED; + grpc_exec_ctx_flush(exec_ctx); +} + +static void exec_ctx_run(grpc_exec_ctx *exec_ctx, grpc_closure *closure, + grpc_error *error) { +#ifndef NDEBUG + closure->scheduled = false; + if (GRPC_TRACER_ON(grpc_trace_closure)) { + gpr_log(GPR_DEBUG, "running closure %p: created [%s:%d]: %s [%s:%d]", + closure, closure->file_created, closure->line_created, + closure->run ? "run" : "scheduled", closure->file_initiated, + closure->line_initiated); + } +#endif + closure->cb(exec_ctx, closure->cb_arg, error); +#ifndef NDEBUG + if (GRPC_TRACER_ON(grpc_trace_closure)) { + gpr_log(GPR_DEBUG, "closure %p finished", closure); + } +#endif + GRPC_ERROR_UNREF(error); +} + +bool grpc_exec_ctx_flush(grpc_exec_ctx *exec_ctx) { + bool did_something = 0; + GPR_TIMER_BEGIN("grpc_exec_ctx_flush", 0); + for (;;) { + if (!grpc_closure_list_empty(exec_ctx->closure_list)) { + grpc_closure *c = exec_ctx->closure_list.head; + exec_ctx->closure_list.head = exec_ctx->closure_list.tail = NULL; + while (c != NULL) { + grpc_closure *next = c->next_data.next; + grpc_error *error = c->error_data.error; + did_something = true; + exec_ctx_run(exec_ctx, c, error); + c = next; + } + } else if (!grpc_combiner_continue_exec_ctx(exec_ctx)) { + break; + } + } + GPR_ASSERT(exec_ctx->active_combiner == NULL); + GPR_TIMER_END("grpc_exec_ctx_flush", 0); + return did_something; +} + +static void exec_ctx_sched(grpc_exec_ctx *exec_ctx, grpc_closure *closure, + grpc_error *error) { + grpc_closure_list_append(&exec_ctx->closure_list, closure, error); +} + +void grpc_exec_ctx_global_init(void) {} +void grpc_exec_ctx_global_shutdown(void) {} + +static const grpc_closure_scheduler_vtable exec_ctx_scheduler_vtable = { + exec_ctx_run, exec_ctx_sched, "exec_ctx"}; +static grpc_closure_scheduler exec_ctx_scheduler = {&exec_ctx_scheduler_vtable}; +grpc_closure_scheduler *grpc_schedule_on_exec_ctx = &exec_ctx_scheduler; diff --git a/src/core/lib/iomgr/executor.c b/src/core/lib/iomgr/executor.c deleted file mode 100644 index 892385d7d7..0000000000 --- a/src/core/lib/iomgr/executor.c +++ /dev/null @@ -1,301 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/executor.h" - -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "src/core/lib/debug/stats.h" -#include "src/core/lib/iomgr/exec_ctx.h" -#include "src/core/lib/support/spinlock.h" - -#define MAX_DEPTH 2 - -typedef struct { - gpr_mu mu; - gpr_cv cv; - grpc_closure_list elems; - size_t depth; - bool shutdown; - bool queued_long_job; - gpr_thd_id id; -} thread_state; - -static thread_state *g_thread_state; -static size_t g_max_threads; -static gpr_atm g_cur_threads; -static gpr_spinlock g_adding_thread_lock = GPR_SPINLOCK_STATIC_INITIALIZER; - -GPR_TLS_DECL(g_this_thread_state); - -static grpc_tracer_flag executor_trace = - GRPC_TRACER_INITIALIZER(false, "executor"); - -static void executor_thread(void *arg); - -static size_t run_closures(grpc_exec_ctx *exec_ctx, grpc_closure_list list) { - size_t n = 0; - - grpc_closure *c = list.head; - while (c != NULL) { - grpc_closure *next = c->next_data.next; - grpc_error *error = c->error_data.error; - if (GRPC_TRACER_ON(executor_trace)) { -#ifndef NDEBUG - gpr_log(GPR_DEBUG, "EXECUTOR: run %p [created by %s:%d]", c, - c->file_created, c->line_created); -#else - gpr_log(GPR_DEBUG, "EXECUTOR: run %p", c); -#endif - } -#ifndef NDEBUG - c->scheduled = false; -#endif - c->cb(exec_ctx, c->cb_arg, error); - GRPC_ERROR_UNREF(error); - c = next; - n++; - grpc_exec_ctx_flush(exec_ctx); - } - - return n; -} - -bool grpc_executor_is_threaded() { - return gpr_atm_no_barrier_load(&g_cur_threads) > 0; -} - -void grpc_executor_set_threading(grpc_exec_ctx *exec_ctx, bool threading) { - gpr_atm cur_threads = gpr_atm_no_barrier_load(&g_cur_threads); - if (threading) { - if (cur_threads > 0) return; - g_max_threads = GPR_MAX(1, 2 * gpr_cpu_num_cores()); - gpr_atm_no_barrier_store(&g_cur_threads, 1); - gpr_tls_init(&g_this_thread_state); - g_thread_state = - (thread_state *)gpr_zalloc(sizeof(thread_state) * g_max_threads); - for (size_t i = 0; i < g_max_threads; i++) { - gpr_mu_init(&g_thread_state[i].mu); - gpr_cv_init(&g_thread_state[i].cv); - g_thread_state[i].elems = (grpc_closure_list)GRPC_CLOSURE_LIST_INIT; - } - - gpr_thd_options opt = gpr_thd_options_default(); - gpr_thd_options_set_joinable(&opt); - gpr_thd_new(&g_thread_state[0].id, executor_thread, &g_thread_state[0], - &opt); - } else { - if (cur_threads == 0) return; - for (size_t i = 0; i < g_max_threads; i++) { - gpr_mu_lock(&g_thread_state[i].mu); - g_thread_state[i].shutdown = true; - gpr_cv_signal(&g_thread_state[i].cv); - gpr_mu_unlock(&g_thread_state[i].mu); - } - /* ensure no thread is adding a new thread... once this is past, then - no thread will try to add a new one either (since shutdown is true) */ - gpr_spinlock_lock(&g_adding_thread_lock); - gpr_spinlock_unlock(&g_adding_thread_lock); - for (gpr_atm i = 0; i < g_cur_threads; i++) { - gpr_thd_join(g_thread_state[i].id); - } - gpr_atm_no_barrier_store(&g_cur_threads, 0); - for (size_t i = 0; i < g_max_threads; i++) { - gpr_mu_destroy(&g_thread_state[i].mu); - gpr_cv_destroy(&g_thread_state[i].cv); - run_closures(exec_ctx, g_thread_state[i].elems); - } - gpr_free(g_thread_state); - gpr_tls_destroy(&g_this_thread_state); - } -} - -void grpc_executor_init(grpc_exec_ctx *exec_ctx) { - grpc_register_tracer(&executor_trace); - gpr_atm_no_barrier_store(&g_cur_threads, 0); - grpc_executor_set_threading(exec_ctx, true); -} - -void grpc_executor_shutdown(grpc_exec_ctx *exec_ctx) { - grpc_executor_set_threading(exec_ctx, false); -} - -static void executor_thread(void *arg) { - thread_state *ts = (thread_state *)arg; - gpr_tls_set(&g_this_thread_state, (intptr_t)ts); - - grpc_exec_ctx exec_ctx = - GRPC_EXEC_CTX_INITIALIZER(0, grpc_never_ready_to_finish, NULL); - - size_t subtract_depth = 0; - for (;;) { - if (GRPC_TRACER_ON(executor_trace)) { - gpr_log(GPR_DEBUG, "EXECUTOR[%d]: step (sub_depth=%" PRIdPTR ")", - (int)(ts - g_thread_state), subtract_depth); - } - gpr_mu_lock(&ts->mu); - ts->depth -= subtract_depth; - while (grpc_closure_list_empty(ts->elems) && !ts->shutdown) { - ts->queued_long_job = false; - gpr_cv_wait(&ts->cv, &ts->mu, gpr_inf_future(GPR_CLOCK_REALTIME)); - } - if (ts->shutdown) { - if (GRPC_TRACER_ON(executor_trace)) { - gpr_log(GPR_DEBUG, "EXECUTOR[%d]: shutdown", - (int)(ts - g_thread_state)); - } - gpr_mu_unlock(&ts->mu); - break; - } - GRPC_STATS_INC_EXECUTOR_QUEUE_DRAINED(&exec_ctx); - grpc_closure_list exec = ts->elems; - ts->elems = (grpc_closure_list)GRPC_CLOSURE_LIST_INIT; - gpr_mu_unlock(&ts->mu); - if (GRPC_TRACER_ON(executor_trace)) { - gpr_log(GPR_DEBUG, "EXECUTOR[%d]: execute", (int)(ts - g_thread_state)); - } - - subtract_depth = run_closures(&exec_ctx, exec); - } - grpc_exec_ctx_finish(&exec_ctx); -} - -static void executor_push(grpc_exec_ctx *exec_ctx, grpc_closure *closure, - grpc_error *error, bool is_short) { - bool retry_push; - if (is_short) { - GRPC_STATS_INC_EXECUTOR_SCHEDULED_SHORT_ITEMS(exec_ctx); - } else { - GRPC_STATS_INC_EXECUTOR_SCHEDULED_LONG_ITEMS(exec_ctx); - } - do { - retry_push = false; - size_t cur_thread_count = (size_t)gpr_atm_no_barrier_load(&g_cur_threads); - if (cur_thread_count == 0) { - if (GRPC_TRACER_ON(executor_trace)) { -#ifndef NDEBUG - gpr_log(GPR_DEBUG, "EXECUTOR: schedule %p (created %s:%d) inline", - closure, closure->file_created, closure->line_created); -#else - gpr_log(GPR_DEBUG, "EXECUTOR: schedule %p inline", closure); -#endif - } - grpc_closure_list_append(&exec_ctx->closure_list, closure, error); - return; - } - thread_state *ts = (thread_state *)gpr_tls_get(&g_this_thread_state); - if (ts == NULL) { - ts = &g_thread_state[GPR_HASH_POINTER(exec_ctx, cur_thread_count)]; - } else { - GRPC_STATS_INC_EXECUTOR_SCHEDULED_TO_SELF(exec_ctx); - } - thread_state *orig_ts = ts; - - bool try_new_thread; - for (;;) { - if (GRPC_TRACER_ON(executor_trace)) { -#ifndef NDEBUG - gpr_log( - GPR_DEBUG, - "EXECUTOR: try to schedule %p (%s) (created %s:%d) to thread %d", - closure, is_short ? "short" : "long", closure->file_created, - closure->line_created, (int)(ts - g_thread_state)); -#else - gpr_log(GPR_DEBUG, "EXECUTOR: try to schedule %p (%s) to thread %d", - closure, is_short ? "short" : "long", - (int)(ts - g_thread_state)); -#endif - } - gpr_mu_lock(&ts->mu); - if (ts->queued_long_job) { - // if there's a long job queued, we never queue anything else to this - // queue (since long jobs can take 'infinite' time and we need to - // guarantee no starvation) - // ... spin through queues and try again - gpr_mu_unlock(&ts->mu); - size_t idx = (size_t)(ts - g_thread_state); - ts = &g_thread_state[(idx + 1) % cur_thread_count]; - if (ts == orig_ts) { - retry_push = true; - try_new_thread = true; - break; - } - continue; - } - if (grpc_closure_list_empty(ts->elems)) { - GRPC_STATS_INC_EXECUTOR_WAKEUP_INITIATED(exec_ctx); - gpr_cv_signal(&ts->cv); - } - grpc_closure_list_append(&ts->elems, closure, error); - ts->depth++; - try_new_thread = ts->depth > MAX_DEPTH && - cur_thread_count < g_max_threads && !ts->shutdown; - if (!is_short) ts->queued_long_job = true; - gpr_mu_unlock(&ts->mu); - break; - } - if (try_new_thread && gpr_spinlock_trylock(&g_adding_thread_lock)) { - cur_thread_count = (size_t)gpr_atm_no_barrier_load(&g_cur_threads); - if (cur_thread_count < g_max_threads) { - gpr_atm_no_barrier_store(&g_cur_threads, cur_thread_count + 1); - - gpr_thd_options opt = gpr_thd_options_default(); - gpr_thd_options_set_joinable(&opt); - gpr_thd_new(&g_thread_state[cur_thread_count].id, executor_thread, - &g_thread_state[cur_thread_count], &opt); - } - gpr_spinlock_unlock(&g_adding_thread_lock); - } - if (retry_push) { - GRPC_STATS_INC_EXECUTOR_PUSH_RETRIES(exec_ctx); - } - } while (retry_push); -} - -static void executor_push_short(grpc_exec_ctx *exec_ctx, grpc_closure *closure, - grpc_error *error) { - executor_push(exec_ctx, closure, error, true); -} - -static void executor_push_long(grpc_exec_ctx *exec_ctx, grpc_closure *closure, - grpc_error *error) { - executor_push(exec_ctx, closure, error, false); -} - -static const grpc_closure_scheduler_vtable executor_vtable_short = { - executor_push_short, executor_push_short, "executor"}; -static grpc_closure_scheduler executor_scheduler_short = { - &executor_vtable_short}; - -static const grpc_closure_scheduler_vtable executor_vtable_long = { - executor_push_long, executor_push_long, "executor"}; -static grpc_closure_scheduler executor_scheduler_long = {&executor_vtable_long}; - -grpc_closure_scheduler *grpc_executor_scheduler( - grpc_executor_job_length length) { - return length == GRPC_EXECUTOR_SHORT ? &executor_scheduler_short - : &executor_scheduler_long; -} diff --git a/src/core/lib/iomgr/executor.cc b/src/core/lib/iomgr/executor.cc new file mode 100644 index 0000000000..892385d7d7 --- /dev/null +++ b/src/core/lib/iomgr/executor.cc @@ -0,0 +1,301 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/executor.h" + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "src/core/lib/debug/stats.h" +#include "src/core/lib/iomgr/exec_ctx.h" +#include "src/core/lib/support/spinlock.h" + +#define MAX_DEPTH 2 + +typedef struct { + gpr_mu mu; + gpr_cv cv; + grpc_closure_list elems; + size_t depth; + bool shutdown; + bool queued_long_job; + gpr_thd_id id; +} thread_state; + +static thread_state *g_thread_state; +static size_t g_max_threads; +static gpr_atm g_cur_threads; +static gpr_spinlock g_adding_thread_lock = GPR_SPINLOCK_STATIC_INITIALIZER; + +GPR_TLS_DECL(g_this_thread_state); + +static grpc_tracer_flag executor_trace = + GRPC_TRACER_INITIALIZER(false, "executor"); + +static void executor_thread(void *arg); + +static size_t run_closures(grpc_exec_ctx *exec_ctx, grpc_closure_list list) { + size_t n = 0; + + grpc_closure *c = list.head; + while (c != NULL) { + grpc_closure *next = c->next_data.next; + grpc_error *error = c->error_data.error; + if (GRPC_TRACER_ON(executor_trace)) { +#ifndef NDEBUG + gpr_log(GPR_DEBUG, "EXECUTOR: run %p [created by %s:%d]", c, + c->file_created, c->line_created); +#else + gpr_log(GPR_DEBUG, "EXECUTOR: run %p", c); +#endif + } +#ifndef NDEBUG + c->scheduled = false; +#endif + c->cb(exec_ctx, c->cb_arg, error); + GRPC_ERROR_UNREF(error); + c = next; + n++; + grpc_exec_ctx_flush(exec_ctx); + } + + return n; +} + +bool grpc_executor_is_threaded() { + return gpr_atm_no_barrier_load(&g_cur_threads) > 0; +} + +void grpc_executor_set_threading(grpc_exec_ctx *exec_ctx, bool threading) { + gpr_atm cur_threads = gpr_atm_no_barrier_load(&g_cur_threads); + if (threading) { + if (cur_threads > 0) return; + g_max_threads = GPR_MAX(1, 2 * gpr_cpu_num_cores()); + gpr_atm_no_barrier_store(&g_cur_threads, 1); + gpr_tls_init(&g_this_thread_state); + g_thread_state = + (thread_state *)gpr_zalloc(sizeof(thread_state) * g_max_threads); + for (size_t i = 0; i < g_max_threads; i++) { + gpr_mu_init(&g_thread_state[i].mu); + gpr_cv_init(&g_thread_state[i].cv); + g_thread_state[i].elems = (grpc_closure_list)GRPC_CLOSURE_LIST_INIT; + } + + gpr_thd_options opt = gpr_thd_options_default(); + gpr_thd_options_set_joinable(&opt); + gpr_thd_new(&g_thread_state[0].id, executor_thread, &g_thread_state[0], + &opt); + } else { + if (cur_threads == 0) return; + for (size_t i = 0; i < g_max_threads; i++) { + gpr_mu_lock(&g_thread_state[i].mu); + g_thread_state[i].shutdown = true; + gpr_cv_signal(&g_thread_state[i].cv); + gpr_mu_unlock(&g_thread_state[i].mu); + } + /* ensure no thread is adding a new thread... once this is past, then + no thread will try to add a new one either (since shutdown is true) */ + gpr_spinlock_lock(&g_adding_thread_lock); + gpr_spinlock_unlock(&g_adding_thread_lock); + for (gpr_atm i = 0; i < g_cur_threads; i++) { + gpr_thd_join(g_thread_state[i].id); + } + gpr_atm_no_barrier_store(&g_cur_threads, 0); + for (size_t i = 0; i < g_max_threads; i++) { + gpr_mu_destroy(&g_thread_state[i].mu); + gpr_cv_destroy(&g_thread_state[i].cv); + run_closures(exec_ctx, g_thread_state[i].elems); + } + gpr_free(g_thread_state); + gpr_tls_destroy(&g_this_thread_state); + } +} + +void grpc_executor_init(grpc_exec_ctx *exec_ctx) { + grpc_register_tracer(&executor_trace); + gpr_atm_no_barrier_store(&g_cur_threads, 0); + grpc_executor_set_threading(exec_ctx, true); +} + +void grpc_executor_shutdown(grpc_exec_ctx *exec_ctx) { + grpc_executor_set_threading(exec_ctx, false); +} + +static void executor_thread(void *arg) { + thread_state *ts = (thread_state *)arg; + gpr_tls_set(&g_this_thread_state, (intptr_t)ts); + + grpc_exec_ctx exec_ctx = + GRPC_EXEC_CTX_INITIALIZER(0, grpc_never_ready_to_finish, NULL); + + size_t subtract_depth = 0; + for (;;) { + if (GRPC_TRACER_ON(executor_trace)) { + gpr_log(GPR_DEBUG, "EXECUTOR[%d]: step (sub_depth=%" PRIdPTR ")", + (int)(ts - g_thread_state), subtract_depth); + } + gpr_mu_lock(&ts->mu); + ts->depth -= subtract_depth; + while (grpc_closure_list_empty(ts->elems) && !ts->shutdown) { + ts->queued_long_job = false; + gpr_cv_wait(&ts->cv, &ts->mu, gpr_inf_future(GPR_CLOCK_REALTIME)); + } + if (ts->shutdown) { + if (GRPC_TRACER_ON(executor_trace)) { + gpr_log(GPR_DEBUG, "EXECUTOR[%d]: shutdown", + (int)(ts - g_thread_state)); + } + gpr_mu_unlock(&ts->mu); + break; + } + GRPC_STATS_INC_EXECUTOR_QUEUE_DRAINED(&exec_ctx); + grpc_closure_list exec = ts->elems; + ts->elems = (grpc_closure_list)GRPC_CLOSURE_LIST_INIT; + gpr_mu_unlock(&ts->mu); + if (GRPC_TRACER_ON(executor_trace)) { + gpr_log(GPR_DEBUG, "EXECUTOR[%d]: execute", (int)(ts - g_thread_state)); + } + + subtract_depth = run_closures(&exec_ctx, exec); + } + grpc_exec_ctx_finish(&exec_ctx); +} + +static void executor_push(grpc_exec_ctx *exec_ctx, grpc_closure *closure, + grpc_error *error, bool is_short) { + bool retry_push; + if (is_short) { + GRPC_STATS_INC_EXECUTOR_SCHEDULED_SHORT_ITEMS(exec_ctx); + } else { + GRPC_STATS_INC_EXECUTOR_SCHEDULED_LONG_ITEMS(exec_ctx); + } + do { + retry_push = false; + size_t cur_thread_count = (size_t)gpr_atm_no_barrier_load(&g_cur_threads); + if (cur_thread_count == 0) { + if (GRPC_TRACER_ON(executor_trace)) { +#ifndef NDEBUG + gpr_log(GPR_DEBUG, "EXECUTOR: schedule %p (created %s:%d) inline", + closure, closure->file_created, closure->line_created); +#else + gpr_log(GPR_DEBUG, "EXECUTOR: schedule %p inline", closure); +#endif + } + grpc_closure_list_append(&exec_ctx->closure_list, closure, error); + return; + } + thread_state *ts = (thread_state *)gpr_tls_get(&g_this_thread_state); + if (ts == NULL) { + ts = &g_thread_state[GPR_HASH_POINTER(exec_ctx, cur_thread_count)]; + } else { + GRPC_STATS_INC_EXECUTOR_SCHEDULED_TO_SELF(exec_ctx); + } + thread_state *orig_ts = ts; + + bool try_new_thread; + for (;;) { + if (GRPC_TRACER_ON(executor_trace)) { +#ifndef NDEBUG + gpr_log( + GPR_DEBUG, + "EXECUTOR: try to schedule %p (%s) (created %s:%d) to thread %d", + closure, is_short ? "short" : "long", closure->file_created, + closure->line_created, (int)(ts - g_thread_state)); +#else + gpr_log(GPR_DEBUG, "EXECUTOR: try to schedule %p (%s) to thread %d", + closure, is_short ? "short" : "long", + (int)(ts - g_thread_state)); +#endif + } + gpr_mu_lock(&ts->mu); + if (ts->queued_long_job) { + // if there's a long job queued, we never queue anything else to this + // queue (since long jobs can take 'infinite' time and we need to + // guarantee no starvation) + // ... spin through queues and try again + gpr_mu_unlock(&ts->mu); + size_t idx = (size_t)(ts - g_thread_state); + ts = &g_thread_state[(idx + 1) % cur_thread_count]; + if (ts == orig_ts) { + retry_push = true; + try_new_thread = true; + break; + } + continue; + } + if (grpc_closure_list_empty(ts->elems)) { + GRPC_STATS_INC_EXECUTOR_WAKEUP_INITIATED(exec_ctx); + gpr_cv_signal(&ts->cv); + } + grpc_closure_list_append(&ts->elems, closure, error); + ts->depth++; + try_new_thread = ts->depth > MAX_DEPTH && + cur_thread_count < g_max_threads && !ts->shutdown; + if (!is_short) ts->queued_long_job = true; + gpr_mu_unlock(&ts->mu); + break; + } + if (try_new_thread && gpr_spinlock_trylock(&g_adding_thread_lock)) { + cur_thread_count = (size_t)gpr_atm_no_barrier_load(&g_cur_threads); + if (cur_thread_count < g_max_threads) { + gpr_atm_no_barrier_store(&g_cur_threads, cur_thread_count + 1); + + gpr_thd_options opt = gpr_thd_options_default(); + gpr_thd_options_set_joinable(&opt); + gpr_thd_new(&g_thread_state[cur_thread_count].id, executor_thread, + &g_thread_state[cur_thread_count], &opt); + } + gpr_spinlock_unlock(&g_adding_thread_lock); + } + if (retry_push) { + GRPC_STATS_INC_EXECUTOR_PUSH_RETRIES(exec_ctx); + } + } while (retry_push); +} + +static void executor_push_short(grpc_exec_ctx *exec_ctx, grpc_closure *closure, + grpc_error *error) { + executor_push(exec_ctx, closure, error, true); +} + +static void executor_push_long(grpc_exec_ctx *exec_ctx, grpc_closure *closure, + grpc_error *error) { + executor_push(exec_ctx, closure, error, false); +} + +static const grpc_closure_scheduler_vtable executor_vtable_short = { + executor_push_short, executor_push_short, "executor"}; +static grpc_closure_scheduler executor_scheduler_short = { + &executor_vtable_short}; + +static const grpc_closure_scheduler_vtable executor_vtable_long = { + executor_push_long, executor_push_long, "executor"}; +static grpc_closure_scheduler executor_scheduler_long = {&executor_vtable_long}; + +grpc_closure_scheduler *grpc_executor_scheduler( + grpc_executor_job_length length) { + return length == GRPC_EXECUTOR_SHORT ? &executor_scheduler_short + : &executor_scheduler_long; +} diff --git a/src/core/lib/iomgr/gethostname_fallback.c b/src/core/lib/iomgr/gethostname_fallback.c deleted file mode 100644 index 6229461568..0000000000 --- a/src/core/lib/iomgr/gethostname_fallback.c +++ /dev/null @@ -1,27 +0,0 @@ -/* - * - * Copyright 2017 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_GETHOSTNAME_FALLBACK - -#include - -char *grpc_gethostname() { return NULL; } - -#endif // GRPC_GETHOSTNAME_FALLBACK diff --git a/src/core/lib/iomgr/gethostname_fallback.cc b/src/core/lib/iomgr/gethostname_fallback.cc new file mode 100644 index 0000000000..6229461568 --- /dev/null +++ b/src/core/lib/iomgr/gethostname_fallback.cc @@ -0,0 +1,27 @@ +/* + * + * Copyright 2017 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_GETHOSTNAME_FALLBACK + +#include + +char *grpc_gethostname() { return NULL; } + +#endif // GRPC_GETHOSTNAME_FALLBACK diff --git a/src/core/lib/iomgr/gethostname_host_name_max.c b/src/core/lib/iomgr/gethostname_host_name_max.c deleted file mode 100644 index 4d0511412e..0000000000 --- a/src/core/lib/iomgr/gethostname_host_name_max.c +++ /dev/null @@ -1,37 +0,0 @@ -/* - * - * Copyright 2017 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_POSIX_HOST_NAME_MAX - -#include -#include - -#include - -char *grpc_gethostname() { - char *hostname = (char *)gpr_malloc(HOST_NAME_MAX); - if (gethostname(hostname, HOST_NAME_MAX) != 0) { - gpr_free(hostname); - return NULL; - } - return hostname; -} - -#endif // GRPC_POSIX_HOST_NAME_MAX diff --git a/src/core/lib/iomgr/gethostname_host_name_max.cc b/src/core/lib/iomgr/gethostname_host_name_max.cc new file mode 100644 index 0000000000..4d0511412e --- /dev/null +++ b/src/core/lib/iomgr/gethostname_host_name_max.cc @@ -0,0 +1,37 @@ +/* + * + * Copyright 2017 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_POSIX_HOST_NAME_MAX + +#include +#include + +#include + +char *grpc_gethostname() { + char *hostname = (char *)gpr_malloc(HOST_NAME_MAX); + if (gethostname(hostname, HOST_NAME_MAX) != 0) { + gpr_free(hostname); + return NULL; + } + return hostname; +} + +#endif // GRPC_POSIX_HOST_NAME_MAX diff --git a/src/core/lib/iomgr/gethostname_sysconf.c b/src/core/lib/iomgr/gethostname_sysconf.c deleted file mode 100644 index 51bac5d69d..0000000000 --- a/src/core/lib/iomgr/gethostname_sysconf.c +++ /dev/null @@ -1,37 +0,0 @@ -/* - * - * Copyright 2017 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_POSIX_SYSCONF - -#include - -#include - -char *grpc_gethostname() { - size_t host_name_max = (size_t)sysconf(_SC_HOST_NAME_MAX); - char *hostname = (char *)gpr_malloc(host_name_max); - if (gethostname(hostname, host_name_max) != 0) { - gpr_free(hostname); - return NULL; - } - return hostname; -} - -#endif // GRPC_POSIX_SYSCONF diff --git a/src/core/lib/iomgr/gethostname_sysconf.cc b/src/core/lib/iomgr/gethostname_sysconf.cc new file mode 100644 index 0000000000..51bac5d69d --- /dev/null +++ b/src/core/lib/iomgr/gethostname_sysconf.cc @@ -0,0 +1,37 @@ +/* + * + * Copyright 2017 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_POSIX_SYSCONF + +#include + +#include + +char *grpc_gethostname() { + size_t host_name_max = (size_t)sysconf(_SC_HOST_NAME_MAX); + char *hostname = (char *)gpr_malloc(host_name_max); + if (gethostname(hostname, host_name_max) != 0) { + gpr_free(hostname); + return NULL; + } + return hostname; +} + +#endif // GRPC_POSIX_SYSCONF diff --git a/src/core/lib/iomgr/iocp_windows.c b/src/core/lib/iomgr/iocp_windows.c deleted file mode 100644 index c082179c0b..0000000000 --- a/src/core/lib/iomgr/iocp_windows.c +++ /dev/null @@ -1,155 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_WINSOCK_SOCKET - -#include - -#include -#include -#include -#include - -#include "src/core/lib/debug/stats.h" -#include "src/core/lib/iomgr/iocp_windows.h" -#include "src/core/lib/iomgr/iomgr_internal.h" -#include "src/core/lib/iomgr/socket_windows.h" -#include "src/core/lib/iomgr/timer.h" - -static ULONG g_iocp_kick_token; -static OVERLAPPED g_iocp_custom_overlap; - -static gpr_atm g_custom_events = 0; - -static HANDLE g_iocp; - -static DWORD deadline_to_millis_timeout(gpr_timespec deadline, - gpr_timespec now) { - gpr_timespec timeout; - static const int64_t max_spin_polling_us = 10; - if (gpr_time_cmp(deadline, gpr_inf_future(deadline.clock_type)) == 0) { - return INFINITE; - } - if (gpr_time_cmp(deadline, gpr_time_add(now, gpr_time_from_micros( - max_spin_polling_us, - GPR_TIMESPAN))) <= 0) { - return 0; - } - timeout = gpr_time_sub(deadline, now); - return (DWORD)gpr_time_to_millis(gpr_time_add( - timeout, gpr_time_from_nanos(GPR_NS_PER_MS - 1, GPR_TIMESPAN))); -} - -grpc_iocp_work_status grpc_iocp_work(grpc_exec_ctx *exec_ctx, - gpr_timespec deadline) { - BOOL success; - DWORD bytes = 0; - DWORD flags = 0; - ULONG_PTR completion_key; - LPOVERLAPPED overlapped; - grpc_winsocket *socket; - grpc_winsocket_callback_info *info; - GRPC_STATS_INC_SYSCALL_POLL(exec_ctx); - success = GetQueuedCompletionStatus( - g_iocp, &bytes, &completion_key, &overlapped, - deadline_to_millis_timeout(deadline, gpr_now(deadline.clock_type))); - if (success == 0 && overlapped == NULL) { - return GRPC_IOCP_WORK_TIMEOUT; - } - GPR_ASSERT(completion_key && overlapped); - if (overlapped == &g_iocp_custom_overlap) { - gpr_atm_full_fetch_add(&g_custom_events, -1); - if (completion_key == (ULONG_PTR)&g_iocp_kick_token) { - /* We were awoken from a kick. */ - return GRPC_IOCP_WORK_KICK; - } - gpr_log(GPR_ERROR, "Unknown custom completion key."); - abort(); - } - - socket = (grpc_winsocket *)completion_key; - if (overlapped == &socket->write_info.overlapped) { - info = &socket->write_info; - } else if (overlapped == &socket->read_info.overlapped) { - info = &socket->read_info; - } else { - abort(); - } - success = WSAGetOverlappedResult(socket->socket, &info->overlapped, &bytes, - FALSE, &flags); - info->bytes_transfered = bytes; - info->wsa_error = success ? 0 : WSAGetLastError(); - GPR_ASSERT(overlapped == &info->overlapped); - grpc_socket_become_ready(exec_ctx, socket, info); - return GRPC_IOCP_WORK_WORK; -} - -void grpc_iocp_init(void) { - g_iocp = - CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, (ULONG_PTR)NULL, 0); - GPR_ASSERT(g_iocp); -} - -void grpc_iocp_kick(void) { - BOOL success; - - gpr_atm_full_fetch_add(&g_custom_events, 1); - success = PostQueuedCompletionStatus(g_iocp, 0, (ULONG_PTR)&g_iocp_kick_token, - &g_iocp_custom_overlap); - GPR_ASSERT(success); -} - -void grpc_iocp_flush(void) { - grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; - grpc_iocp_work_status work_status; - - do { - work_status = grpc_iocp_work(&exec_ctx, gpr_inf_past(GPR_CLOCK_MONOTONIC)); - } while (work_status == GRPC_IOCP_WORK_KICK || - grpc_exec_ctx_flush(&exec_ctx)); -} - -void grpc_iocp_shutdown(void) { - grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; - while (gpr_atm_acq_load(&g_custom_events)) { - grpc_iocp_work(&exec_ctx, gpr_inf_future(GPR_CLOCK_MONOTONIC)); - grpc_exec_ctx_flush(&exec_ctx); - } - grpc_exec_ctx_finish(&exec_ctx); - GPR_ASSERT(CloseHandle(g_iocp)); -} - -void grpc_iocp_add_socket(grpc_winsocket *socket) { - HANDLE ret; - if (socket->added_to_iocp) return; - ret = CreateIoCompletionPort((HANDLE)socket->socket, g_iocp, - (uintptr_t)socket, 0); - if (!ret) { - char *utf8_message = gpr_format_message(WSAGetLastError()); - gpr_log(GPR_ERROR, "Unable to add socket to iocp: %s", utf8_message); - gpr_free(utf8_message); - __debugbreak(); - abort(); - } - socket->added_to_iocp = 1; - GPR_ASSERT(ret == g_iocp); -} - -#endif /* GRPC_WINSOCK_SOCKET */ diff --git a/src/core/lib/iomgr/iocp_windows.cc b/src/core/lib/iomgr/iocp_windows.cc new file mode 100644 index 0000000000..c082179c0b --- /dev/null +++ b/src/core/lib/iomgr/iocp_windows.cc @@ -0,0 +1,155 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_WINSOCK_SOCKET + +#include + +#include +#include +#include +#include + +#include "src/core/lib/debug/stats.h" +#include "src/core/lib/iomgr/iocp_windows.h" +#include "src/core/lib/iomgr/iomgr_internal.h" +#include "src/core/lib/iomgr/socket_windows.h" +#include "src/core/lib/iomgr/timer.h" + +static ULONG g_iocp_kick_token; +static OVERLAPPED g_iocp_custom_overlap; + +static gpr_atm g_custom_events = 0; + +static HANDLE g_iocp; + +static DWORD deadline_to_millis_timeout(gpr_timespec deadline, + gpr_timespec now) { + gpr_timespec timeout; + static const int64_t max_spin_polling_us = 10; + if (gpr_time_cmp(deadline, gpr_inf_future(deadline.clock_type)) == 0) { + return INFINITE; + } + if (gpr_time_cmp(deadline, gpr_time_add(now, gpr_time_from_micros( + max_spin_polling_us, + GPR_TIMESPAN))) <= 0) { + return 0; + } + timeout = gpr_time_sub(deadline, now); + return (DWORD)gpr_time_to_millis(gpr_time_add( + timeout, gpr_time_from_nanos(GPR_NS_PER_MS - 1, GPR_TIMESPAN))); +} + +grpc_iocp_work_status grpc_iocp_work(grpc_exec_ctx *exec_ctx, + gpr_timespec deadline) { + BOOL success; + DWORD bytes = 0; + DWORD flags = 0; + ULONG_PTR completion_key; + LPOVERLAPPED overlapped; + grpc_winsocket *socket; + grpc_winsocket_callback_info *info; + GRPC_STATS_INC_SYSCALL_POLL(exec_ctx); + success = GetQueuedCompletionStatus( + g_iocp, &bytes, &completion_key, &overlapped, + deadline_to_millis_timeout(deadline, gpr_now(deadline.clock_type))); + if (success == 0 && overlapped == NULL) { + return GRPC_IOCP_WORK_TIMEOUT; + } + GPR_ASSERT(completion_key && overlapped); + if (overlapped == &g_iocp_custom_overlap) { + gpr_atm_full_fetch_add(&g_custom_events, -1); + if (completion_key == (ULONG_PTR)&g_iocp_kick_token) { + /* We were awoken from a kick. */ + return GRPC_IOCP_WORK_KICK; + } + gpr_log(GPR_ERROR, "Unknown custom completion key."); + abort(); + } + + socket = (grpc_winsocket *)completion_key; + if (overlapped == &socket->write_info.overlapped) { + info = &socket->write_info; + } else if (overlapped == &socket->read_info.overlapped) { + info = &socket->read_info; + } else { + abort(); + } + success = WSAGetOverlappedResult(socket->socket, &info->overlapped, &bytes, + FALSE, &flags); + info->bytes_transfered = bytes; + info->wsa_error = success ? 0 : WSAGetLastError(); + GPR_ASSERT(overlapped == &info->overlapped); + grpc_socket_become_ready(exec_ctx, socket, info); + return GRPC_IOCP_WORK_WORK; +} + +void grpc_iocp_init(void) { + g_iocp = + CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, (ULONG_PTR)NULL, 0); + GPR_ASSERT(g_iocp); +} + +void grpc_iocp_kick(void) { + BOOL success; + + gpr_atm_full_fetch_add(&g_custom_events, 1); + success = PostQueuedCompletionStatus(g_iocp, 0, (ULONG_PTR)&g_iocp_kick_token, + &g_iocp_custom_overlap); + GPR_ASSERT(success); +} + +void grpc_iocp_flush(void) { + grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; + grpc_iocp_work_status work_status; + + do { + work_status = grpc_iocp_work(&exec_ctx, gpr_inf_past(GPR_CLOCK_MONOTONIC)); + } while (work_status == GRPC_IOCP_WORK_KICK || + grpc_exec_ctx_flush(&exec_ctx)); +} + +void grpc_iocp_shutdown(void) { + grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; + while (gpr_atm_acq_load(&g_custom_events)) { + grpc_iocp_work(&exec_ctx, gpr_inf_future(GPR_CLOCK_MONOTONIC)); + grpc_exec_ctx_flush(&exec_ctx); + } + grpc_exec_ctx_finish(&exec_ctx); + GPR_ASSERT(CloseHandle(g_iocp)); +} + +void grpc_iocp_add_socket(grpc_winsocket *socket) { + HANDLE ret; + if (socket->added_to_iocp) return; + ret = CreateIoCompletionPort((HANDLE)socket->socket, g_iocp, + (uintptr_t)socket, 0); + if (!ret) { + char *utf8_message = gpr_format_message(WSAGetLastError()); + gpr_log(GPR_ERROR, "Unable to add socket to iocp: %s", utf8_message); + gpr_free(utf8_message); + __debugbreak(); + abort(); + } + socket->added_to_iocp = 1; + GPR_ASSERT(ret == g_iocp); +} + +#endif /* GRPC_WINSOCK_SOCKET */ diff --git a/src/core/lib/iomgr/iomgr.c b/src/core/lib/iomgr/iomgr.c deleted file mode 100644 index f63f190155..0000000000 --- a/src/core/lib/iomgr/iomgr.c +++ /dev/null @@ -1,170 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/iomgr.h" - -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "src/core/lib/iomgr/exec_ctx.h" -#include "src/core/lib/iomgr/executor.h" -#include "src/core/lib/iomgr/iomgr_internal.h" -#include "src/core/lib/iomgr/network_status_tracker.h" -#include "src/core/lib/iomgr/timer.h" -#include "src/core/lib/iomgr/timer_manager.h" -#include "src/core/lib/support/env.h" -#include "src/core/lib/support/string.h" - -static gpr_mu g_mu; -static gpr_cv g_rcv; -static int g_shutdown; -static grpc_iomgr_object g_root_object; - -void grpc_iomgr_init(grpc_exec_ctx *exec_ctx) { - g_shutdown = 0; - gpr_mu_init(&g_mu); - gpr_cv_init(&g_rcv); - grpc_exec_ctx_global_init(); - grpc_executor_init(exec_ctx); - grpc_timer_list_init(gpr_now(GPR_CLOCK_MONOTONIC)); - g_root_object.next = g_root_object.prev = &g_root_object; - g_root_object.name = (char *)"root"; - grpc_network_status_init(); - grpc_iomgr_platform_init(); -} - -void grpc_iomgr_start(grpc_exec_ctx *exec_ctx) { grpc_timer_manager_init(); } - -static size_t count_objects(void) { - grpc_iomgr_object *obj; - size_t n = 0; - for (obj = g_root_object.next; obj != &g_root_object; obj = obj->next) { - n++; - } - return n; -} - -static void dump_objects(const char *kind) { - grpc_iomgr_object *obj; - for (obj = g_root_object.next; obj != &g_root_object; obj = obj->next) { - gpr_log(GPR_DEBUG, "%s OBJECT: %s %p", kind, obj->name, obj); - } -} - -void grpc_iomgr_shutdown(grpc_exec_ctx *exec_ctx) { - gpr_timespec shutdown_deadline = gpr_time_add( - gpr_now(GPR_CLOCK_REALTIME), gpr_time_from_seconds(10, GPR_TIMESPAN)); - gpr_timespec last_warning_time = gpr_now(GPR_CLOCK_REALTIME); - - grpc_timer_manager_shutdown(); - grpc_iomgr_platform_flush(); - grpc_executor_shutdown(exec_ctx); - - gpr_mu_lock(&g_mu); - g_shutdown = 1; - while (g_root_object.next != &g_root_object) { - if (gpr_time_cmp( - gpr_time_sub(gpr_now(GPR_CLOCK_REALTIME), last_warning_time), - gpr_time_from_seconds(1, GPR_TIMESPAN)) >= 0) { - if (g_root_object.next != &g_root_object) { - gpr_log(GPR_DEBUG, - "Waiting for %" PRIuPTR " iomgr objects to be destroyed", - count_objects()); - } - last_warning_time = gpr_now(GPR_CLOCK_REALTIME); - } - if (grpc_timer_check(exec_ctx, gpr_inf_future(GPR_CLOCK_MONOTONIC), NULL) == - GRPC_TIMERS_FIRED) { - gpr_mu_unlock(&g_mu); - grpc_exec_ctx_flush(exec_ctx); - grpc_iomgr_platform_flush(); - gpr_mu_lock(&g_mu); - continue; - } - if (g_root_object.next != &g_root_object) { - if (grpc_iomgr_abort_on_leaks()) { - gpr_log(GPR_DEBUG, "Failed to free %" PRIuPTR - " iomgr objects before shutdown deadline: " - "memory leaks are likely", - count_objects()); - dump_objects("LEAKED"); - abort(); - } - gpr_timespec short_deadline = gpr_time_add( - gpr_now(GPR_CLOCK_REALTIME), gpr_time_from_millis(100, GPR_TIMESPAN)); - if (gpr_cv_wait(&g_rcv, &g_mu, short_deadline)) { - if (gpr_time_cmp(gpr_now(GPR_CLOCK_REALTIME), shutdown_deadline) > 0) { - if (g_root_object.next != &g_root_object) { - gpr_log(GPR_DEBUG, "Failed to free %" PRIuPTR - " iomgr objects before shutdown deadline: " - "memory leaks are likely", - count_objects()); - dump_objects("LEAKED"); - } - break; - } - } - } - } - gpr_mu_unlock(&g_mu); - - grpc_timer_list_shutdown(exec_ctx); - grpc_exec_ctx_flush(exec_ctx); - - /* ensure all threads have left g_mu */ - gpr_mu_lock(&g_mu); - gpr_mu_unlock(&g_mu); - - grpc_iomgr_platform_shutdown(); - grpc_exec_ctx_global_shutdown(); - grpc_network_status_shutdown(); - gpr_mu_destroy(&g_mu); - gpr_cv_destroy(&g_rcv); -} - -void grpc_iomgr_register_object(grpc_iomgr_object *obj, const char *name) { - obj->name = gpr_strdup(name); - gpr_mu_lock(&g_mu); - obj->next = &g_root_object; - obj->prev = g_root_object.prev; - obj->next->prev = obj->prev->next = obj; - gpr_mu_unlock(&g_mu); -} - -void grpc_iomgr_unregister_object(grpc_iomgr_object *obj) { - gpr_mu_lock(&g_mu); - obj->next->prev = obj->prev; - obj->prev->next = obj->next; - gpr_cv_signal(&g_rcv); - gpr_mu_unlock(&g_mu); - gpr_free(obj->name); -} - -bool grpc_iomgr_abort_on_leaks(void) { - char *env = gpr_getenv("GRPC_ABORT_ON_LEAKS"); - bool should_we = gpr_is_true(env); - gpr_free(env); - return should_we; -} diff --git a/src/core/lib/iomgr/iomgr.cc b/src/core/lib/iomgr/iomgr.cc new file mode 100644 index 0000000000..f63f190155 --- /dev/null +++ b/src/core/lib/iomgr/iomgr.cc @@ -0,0 +1,170 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/iomgr.h" + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "src/core/lib/iomgr/exec_ctx.h" +#include "src/core/lib/iomgr/executor.h" +#include "src/core/lib/iomgr/iomgr_internal.h" +#include "src/core/lib/iomgr/network_status_tracker.h" +#include "src/core/lib/iomgr/timer.h" +#include "src/core/lib/iomgr/timer_manager.h" +#include "src/core/lib/support/env.h" +#include "src/core/lib/support/string.h" + +static gpr_mu g_mu; +static gpr_cv g_rcv; +static int g_shutdown; +static grpc_iomgr_object g_root_object; + +void grpc_iomgr_init(grpc_exec_ctx *exec_ctx) { + g_shutdown = 0; + gpr_mu_init(&g_mu); + gpr_cv_init(&g_rcv); + grpc_exec_ctx_global_init(); + grpc_executor_init(exec_ctx); + grpc_timer_list_init(gpr_now(GPR_CLOCK_MONOTONIC)); + g_root_object.next = g_root_object.prev = &g_root_object; + g_root_object.name = (char *)"root"; + grpc_network_status_init(); + grpc_iomgr_platform_init(); +} + +void grpc_iomgr_start(grpc_exec_ctx *exec_ctx) { grpc_timer_manager_init(); } + +static size_t count_objects(void) { + grpc_iomgr_object *obj; + size_t n = 0; + for (obj = g_root_object.next; obj != &g_root_object; obj = obj->next) { + n++; + } + return n; +} + +static void dump_objects(const char *kind) { + grpc_iomgr_object *obj; + for (obj = g_root_object.next; obj != &g_root_object; obj = obj->next) { + gpr_log(GPR_DEBUG, "%s OBJECT: %s %p", kind, obj->name, obj); + } +} + +void grpc_iomgr_shutdown(grpc_exec_ctx *exec_ctx) { + gpr_timespec shutdown_deadline = gpr_time_add( + gpr_now(GPR_CLOCK_REALTIME), gpr_time_from_seconds(10, GPR_TIMESPAN)); + gpr_timespec last_warning_time = gpr_now(GPR_CLOCK_REALTIME); + + grpc_timer_manager_shutdown(); + grpc_iomgr_platform_flush(); + grpc_executor_shutdown(exec_ctx); + + gpr_mu_lock(&g_mu); + g_shutdown = 1; + while (g_root_object.next != &g_root_object) { + if (gpr_time_cmp( + gpr_time_sub(gpr_now(GPR_CLOCK_REALTIME), last_warning_time), + gpr_time_from_seconds(1, GPR_TIMESPAN)) >= 0) { + if (g_root_object.next != &g_root_object) { + gpr_log(GPR_DEBUG, + "Waiting for %" PRIuPTR " iomgr objects to be destroyed", + count_objects()); + } + last_warning_time = gpr_now(GPR_CLOCK_REALTIME); + } + if (grpc_timer_check(exec_ctx, gpr_inf_future(GPR_CLOCK_MONOTONIC), NULL) == + GRPC_TIMERS_FIRED) { + gpr_mu_unlock(&g_mu); + grpc_exec_ctx_flush(exec_ctx); + grpc_iomgr_platform_flush(); + gpr_mu_lock(&g_mu); + continue; + } + if (g_root_object.next != &g_root_object) { + if (grpc_iomgr_abort_on_leaks()) { + gpr_log(GPR_DEBUG, "Failed to free %" PRIuPTR + " iomgr objects before shutdown deadline: " + "memory leaks are likely", + count_objects()); + dump_objects("LEAKED"); + abort(); + } + gpr_timespec short_deadline = gpr_time_add( + gpr_now(GPR_CLOCK_REALTIME), gpr_time_from_millis(100, GPR_TIMESPAN)); + if (gpr_cv_wait(&g_rcv, &g_mu, short_deadline)) { + if (gpr_time_cmp(gpr_now(GPR_CLOCK_REALTIME), shutdown_deadline) > 0) { + if (g_root_object.next != &g_root_object) { + gpr_log(GPR_DEBUG, "Failed to free %" PRIuPTR + " iomgr objects before shutdown deadline: " + "memory leaks are likely", + count_objects()); + dump_objects("LEAKED"); + } + break; + } + } + } + } + gpr_mu_unlock(&g_mu); + + grpc_timer_list_shutdown(exec_ctx); + grpc_exec_ctx_flush(exec_ctx); + + /* ensure all threads have left g_mu */ + gpr_mu_lock(&g_mu); + gpr_mu_unlock(&g_mu); + + grpc_iomgr_platform_shutdown(); + grpc_exec_ctx_global_shutdown(); + grpc_network_status_shutdown(); + gpr_mu_destroy(&g_mu); + gpr_cv_destroy(&g_rcv); +} + +void grpc_iomgr_register_object(grpc_iomgr_object *obj, const char *name) { + obj->name = gpr_strdup(name); + gpr_mu_lock(&g_mu); + obj->next = &g_root_object; + obj->prev = g_root_object.prev; + obj->next->prev = obj->prev->next = obj; + gpr_mu_unlock(&g_mu); +} + +void grpc_iomgr_unregister_object(grpc_iomgr_object *obj) { + gpr_mu_lock(&g_mu); + obj->next->prev = obj->prev; + obj->prev->next = obj->next; + gpr_cv_signal(&g_rcv); + gpr_mu_unlock(&g_mu); + gpr_free(obj->name); +} + +bool grpc_iomgr_abort_on_leaks(void) { + char *env = gpr_getenv("GRPC_ABORT_ON_LEAKS"); + bool should_we = gpr_is_true(env); + gpr_free(env); + return should_we; +} diff --git a/src/core/lib/iomgr/iomgr_posix.c b/src/core/lib/iomgr/iomgr_posix.c deleted file mode 100644 index f5875a247e..0000000000 --- a/src/core/lib/iomgr/iomgr_posix.c +++ /dev/null @@ -1,41 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_POSIX_SOCKET - -#include "src/core/lib/debug/trace.h" -#include "src/core/lib/iomgr/ev_posix.h" -#include "src/core/lib/iomgr/iomgr_posix.h" -#include "src/core/lib/iomgr/tcp_posix.h" - -void grpc_iomgr_platform_init(void) { - grpc_wakeup_fd_global_init(); - grpc_event_engine_init(); - grpc_register_tracer(&grpc_tcp_trace); -} - -void grpc_iomgr_platform_flush(void) {} - -void grpc_iomgr_platform_shutdown(void) { - grpc_event_engine_shutdown(); - grpc_wakeup_fd_global_destroy(); -} - -#endif /* GRPC_POSIX_SOCKET */ diff --git a/src/core/lib/iomgr/iomgr_posix.cc b/src/core/lib/iomgr/iomgr_posix.cc new file mode 100644 index 0000000000..f5875a247e --- /dev/null +++ b/src/core/lib/iomgr/iomgr_posix.cc @@ -0,0 +1,41 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_POSIX_SOCKET + +#include "src/core/lib/debug/trace.h" +#include "src/core/lib/iomgr/ev_posix.h" +#include "src/core/lib/iomgr/iomgr_posix.h" +#include "src/core/lib/iomgr/tcp_posix.h" + +void grpc_iomgr_platform_init(void) { + grpc_wakeup_fd_global_init(); + grpc_event_engine_init(); + grpc_register_tracer(&grpc_tcp_trace); +} + +void grpc_iomgr_platform_flush(void) {} + +void grpc_iomgr_platform_shutdown(void) { + grpc_event_engine_shutdown(); + grpc_wakeup_fd_global_destroy(); +} + +#endif /* GRPC_POSIX_SOCKET */ diff --git a/src/core/lib/iomgr/iomgr_uv.c b/src/core/lib/iomgr/iomgr_uv.c deleted file mode 100644 index df5d23af3b..0000000000 --- a/src/core/lib/iomgr/iomgr_uv.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - * - * Copyright 2016 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_UV - -#include "src/core/lib/debug/trace.h" -#include "src/core/lib/iomgr/executor.h" -#include "src/core/lib/iomgr/iomgr_uv.h" -#include "src/core/lib/iomgr/pollset_uv.h" -#include "src/core/lib/iomgr/tcp_uv.h" - -gpr_thd_id g_init_thread; - -void grpc_iomgr_platform_init(void) { - grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; - grpc_pollset_global_init(); - grpc_register_tracer(&grpc_tcp_trace); - grpc_executor_set_threading(&exec_ctx, false); - g_init_thread = gpr_thd_currentid(); - grpc_exec_ctx_finish(&exec_ctx); -} -void grpc_iomgr_platform_flush(void) {} -void grpc_iomgr_platform_shutdown(void) { grpc_pollset_global_shutdown(); } - -#endif /* GRPC_UV */ diff --git a/src/core/lib/iomgr/iomgr_uv.cc b/src/core/lib/iomgr/iomgr_uv.cc new file mode 100644 index 0000000000..df5d23af3b --- /dev/null +++ b/src/core/lib/iomgr/iomgr_uv.cc @@ -0,0 +1,42 @@ +/* + * + * Copyright 2016 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_UV + +#include "src/core/lib/debug/trace.h" +#include "src/core/lib/iomgr/executor.h" +#include "src/core/lib/iomgr/iomgr_uv.h" +#include "src/core/lib/iomgr/pollset_uv.h" +#include "src/core/lib/iomgr/tcp_uv.h" + +gpr_thd_id g_init_thread; + +void grpc_iomgr_platform_init(void) { + grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; + grpc_pollset_global_init(); + grpc_register_tracer(&grpc_tcp_trace); + grpc_executor_set_threading(&exec_ctx, false); + g_init_thread = gpr_thd_currentid(); + grpc_exec_ctx_finish(&exec_ctx); +} +void grpc_iomgr_platform_flush(void) {} +void grpc_iomgr_platform_shutdown(void) { grpc_pollset_global_shutdown(); } + +#endif /* GRPC_UV */ diff --git a/src/core/lib/iomgr/iomgr_windows.c b/src/core/lib/iomgr/iomgr_windows.c deleted file mode 100644 index 630370166d..0000000000 --- a/src/core/lib/iomgr/iomgr_windows.c +++ /dev/null @@ -1,61 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_WINSOCK_SOCKET - -#include "src/core/lib/iomgr/sockaddr_windows.h" - -#include - -#include "src/core/lib/iomgr/iocp_windows.h" -#include "src/core/lib/iomgr/iomgr.h" -#include "src/core/lib/iomgr/pollset_windows.h" -#include "src/core/lib/iomgr/socket_windows.h" - -/* Windows' io manager is going to be fully designed using IO completion - ports. All of what we're doing here is basically make sure that - Windows sockets are initialized in and out. */ - -static void winsock_init(void) { - WSADATA wsaData; - int status = WSAStartup(MAKEWORD(2, 0), &wsaData); - GPR_ASSERT(status == 0); -} - -static void winsock_shutdown(void) { - int status = WSACleanup(); - GPR_ASSERT(status == 0); -} - -void grpc_iomgr_platform_init(void) { - winsock_init(); - grpc_iocp_init(); - grpc_pollset_global_init(); -} - -void grpc_iomgr_platform_flush(void) { grpc_iocp_flush(); } - -void grpc_iomgr_platform_shutdown(void) { - grpc_pollset_global_shutdown(); - grpc_iocp_shutdown(); - winsock_shutdown(); -} - -#endif /* GRPC_WINSOCK_SOCKET */ diff --git a/src/core/lib/iomgr/iomgr_windows.cc b/src/core/lib/iomgr/iomgr_windows.cc new file mode 100644 index 0000000000..630370166d --- /dev/null +++ b/src/core/lib/iomgr/iomgr_windows.cc @@ -0,0 +1,61 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_WINSOCK_SOCKET + +#include "src/core/lib/iomgr/sockaddr_windows.h" + +#include + +#include "src/core/lib/iomgr/iocp_windows.h" +#include "src/core/lib/iomgr/iomgr.h" +#include "src/core/lib/iomgr/pollset_windows.h" +#include "src/core/lib/iomgr/socket_windows.h" + +/* Windows' io manager is going to be fully designed using IO completion + ports. All of what we're doing here is basically make sure that + Windows sockets are initialized in and out. */ + +static void winsock_init(void) { + WSADATA wsaData; + int status = WSAStartup(MAKEWORD(2, 0), &wsaData); + GPR_ASSERT(status == 0); +} + +static void winsock_shutdown(void) { + int status = WSACleanup(); + GPR_ASSERT(status == 0); +} + +void grpc_iomgr_platform_init(void) { + winsock_init(); + grpc_iocp_init(); + grpc_pollset_global_init(); +} + +void grpc_iomgr_platform_flush(void) { grpc_iocp_flush(); } + +void grpc_iomgr_platform_shutdown(void) { + grpc_pollset_global_shutdown(); + grpc_iocp_shutdown(); + winsock_shutdown(); +} + +#endif /* GRPC_WINSOCK_SOCKET */ diff --git a/src/core/lib/iomgr/is_epollexclusive_available.c b/src/core/lib/iomgr/is_epollexclusive_available.c deleted file mode 100644 index d08844c0df..0000000000 --- a/src/core/lib/iomgr/is_epollexclusive_available.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * - * Copyright 2017 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#include "src/core/lib/iomgr/is_epollexclusive_available.h" - -#ifdef GRPC_LINUX_EPOLL - -#include - -#include -#include -#include - -#include "src/core/lib/iomgr/sys_epoll_wrapper.h" - -/* This polling engine is only relevant on linux kernels supporting epoll() */ -bool grpc_is_epollexclusive_available(void) { - static bool logged_why_not = false; - - int fd = epoll_create1(EPOLL_CLOEXEC); - if (fd < 0) { - if (!logged_why_not) { - gpr_log(GPR_ERROR, - "epoll_create1 failed with error: %d. Not using epollex polling " - "engine.", - fd); - logged_why_not = true; - } - return false; - } - int evfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); - if (evfd < 0) { - if (!logged_why_not) { - gpr_log(GPR_ERROR, - "eventfd failed with error: %d. Not using epollex polling " - "engine.", - fd); - logged_why_not = true; - } - close(fd); - return false; - } - struct epoll_event ev; - /* choose events that should cause an error on - EPOLLEXCLUSIVE enabled kernels - specifically the combination of - EPOLLONESHOT and EPOLLEXCLUSIVE */ - ev.events = (uint32_t)(EPOLLET | EPOLLIN | EPOLLEXCLUSIVE | EPOLLONESHOT); - ev.data.ptr = NULL; - if (epoll_ctl(fd, EPOLL_CTL_ADD, evfd, &ev) != 0) { - if (errno != EINVAL) { - if (!logged_why_not) { - gpr_log( - GPR_ERROR, - "epoll_ctl with EPOLLEXCLUSIVE | EPOLLONESHOT failed with error: " - "%d. Not using epollex polling engine.", - errno); - logged_why_not = true; - } - close(fd); - close(evfd); - return false; - } - } else { - if (!logged_why_not) { - gpr_log(GPR_ERROR, - "epoll_ctl with EPOLLEXCLUSIVE | EPOLLONESHOT succeeded. This is " - "evidence of no EPOLLEXCLUSIVE support. Not using " - "epollex polling engine."); - logged_why_not = true; - } - close(fd); - close(evfd); - return false; - } - close(evfd); - close(fd); - return true; -} - -#else - -bool grpc_is_epollexclusive_available(void) { return false; } - -#endif diff --git a/src/core/lib/iomgr/is_epollexclusive_available.cc b/src/core/lib/iomgr/is_epollexclusive_available.cc new file mode 100644 index 0000000000..d08844c0df --- /dev/null +++ b/src/core/lib/iomgr/is_epollexclusive_available.cc @@ -0,0 +1,101 @@ +/* + * + * Copyright 2017 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#include "src/core/lib/iomgr/is_epollexclusive_available.h" + +#ifdef GRPC_LINUX_EPOLL + +#include + +#include +#include +#include + +#include "src/core/lib/iomgr/sys_epoll_wrapper.h" + +/* This polling engine is only relevant on linux kernels supporting epoll() */ +bool grpc_is_epollexclusive_available(void) { + static bool logged_why_not = false; + + int fd = epoll_create1(EPOLL_CLOEXEC); + if (fd < 0) { + if (!logged_why_not) { + gpr_log(GPR_ERROR, + "epoll_create1 failed with error: %d. Not using epollex polling " + "engine.", + fd); + logged_why_not = true; + } + return false; + } + int evfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (evfd < 0) { + if (!logged_why_not) { + gpr_log(GPR_ERROR, + "eventfd failed with error: %d. Not using epollex polling " + "engine.", + fd); + logged_why_not = true; + } + close(fd); + return false; + } + struct epoll_event ev; + /* choose events that should cause an error on + EPOLLEXCLUSIVE enabled kernels - specifically the combination of + EPOLLONESHOT and EPOLLEXCLUSIVE */ + ev.events = (uint32_t)(EPOLLET | EPOLLIN | EPOLLEXCLUSIVE | EPOLLONESHOT); + ev.data.ptr = NULL; + if (epoll_ctl(fd, EPOLL_CTL_ADD, evfd, &ev) != 0) { + if (errno != EINVAL) { + if (!logged_why_not) { + gpr_log( + GPR_ERROR, + "epoll_ctl with EPOLLEXCLUSIVE | EPOLLONESHOT failed with error: " + "%d. Not using epollex polling engine.", + errno); + logged_why_not = true; + } + close(fd); + close(evfd); + return false; + } + } else { + if (!logged_why_not) { + gpr_log(GPR_ERROR, + "epoll_ctl with EPOLLEXCLUSIVE | EPOLLONESHOT succeeded. This is " + "evidence of no EPOLLEXCLUSIVE support. Not using " + "epollex polling engine."); + logged_why_not = true; + } + close(fd); + close(evfd); + return false; + } + close(evfd); + close(fd); + return true; +} + +#else + +bool grpc_is_epollexclusive_available(void) { return false; } + +#endif diff --git a/src/core/lib/iomgr/load_file.c b/src/core/lib/iomgr/load_file.c deleted file mode 100644 index 0b4d41ea4b..0000000000 --- a/src/core/lib/iomgr/load_file.c +++ /dev/null @@ -1,78 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/load_file.h" - -#include -#include - -#include -#include -#include - -#include "src/core/lib/support/block_annotate.h" -#include "src/core/lib/support/string.h" - -grpc_error *grpc_load_file(const char *filename, int add_null_terminator, - grpc_slice *output) { - unsigned char *contents = NULL; - size_t contents_size = 0; - grpc_slice result = grpc_empty_slice(); - FILE *file; - size_t bytes_read = 0; - grpc_error *error = GRPC_ERROR_NONE; - - GRPC_SCHEDULING_START_BLOCKING_REGION; - file = fopen(filename, "rb"); - if (file == NULL) { - error = GRPC_OS_ERROR(errno, "fopen"); - goto end; - } - fseek(file, 0, SEEK_END); - /* Converting to size_t on the assumption that it will not fail */ - contents_size = (size_t)ftell(file); - fseek(file, 0, SEEK_SET); - contents = (unsigned char *)gpr_malloc(contents_size + - (add_null_terminator ? 1 : 0)); - bytes_read = fread(contents, 1, contents_size, file); - if (bytes_read < contents_size) { - error = GRPC_OS_ERROR(errno, "fread"); - GPR_ASSERT(ferror(file)); - goto end; - } - if (add_null_terminator) { - contents[contents_size++] = 0; - } - result = grpc_slice_new(contents, contents_size, gpr_free); - -end: - *output = result; - if (file != NULL) fclose(file); - if (error != GRPC_ERROR_NONE) { - grpc_error *error_out = - grpc_error_set_str(GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( - "Failed to load file", &error, 1), - GRPC_ERROR_STR_FILENAME, - grpc_slice_from_copied_string( - filename)); // TODO(ncteisen), always static? - GRPC_ERROR_UNREF(error); - error = error_out; - } - GRPC_SCHEDULING_END_BLOCKING_REGION; - return error; -} diff --git a/src/core/lib/iomgr/load_file.cc b/src/core/lib/iomgr/load_file.cc new file mode 100644 index 0000000000..0b4d41ea4b --- /dev/null +++ b/src/core/lib/iomgr/load_file.cc @@ -0,0 +1,78 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/load_file.h" + +#include +#include + +#include +#include +#include + +#include "src/core/lib/support/block_annotate.h" +#include "src/core/lib/support/string.h" + +grpc_error *grpc_load_file(const char *filename, int add_null_terminator, + grpc_slice *output) { + unsigned char *contents = NULL; + size_t contents_size = 0; + grpc_slice result = grpc_empty_slice(); + FILE *file; + size_t bytes_read = 0; + grpc_error *error = GRPC_ERROR_NONE; + + GRPC_SCHEDULING_START_BLOCKING_REGION; + file = fopen(filename, "rb"); + if (file == NULL) { + error = GRPC_OS_ERROR(errno, "fopen"); + goto end; + } + fseek(file, 0, SEEK_END); + /* Converting to size_t on the assumption that it will not fail */ + contents_size = (size_t)ftell(file); + fseek(file, 0, SEEK_SET); + contents = (unsigned char *)gpr_malloc(contents_size + + (add_null_terminator ? 1 : 0)); + bytes_read = fread(contents, 1, contents_size, file); + if (bytes_read < contents_size) { + error = GRPC_OS_ERROR(errno, "fread"); + GPR_ASSERT(ferror(file)); + goto end; + } + if (add_null_terminator) { + contents[contents_size++] = 0; + } + result = grpc_slice_new(contents, contents_size, gpr_free); + +end: + *output = result; + if (file != NULL) fclose(file); + if (error != GRPC_ERROR_NONE) { + grpc_error *error_out = + grpc_error_set_str(GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( + "Failed to load file", &error, 1), + GRPC_ERROR_STR_FILENAME, + grpc_slice_from_copied_string( + filename)); // TODO(ncteisen), always static? + GRPC_ERROR_UNREF(error); + error = error_out; + } + GRPC_SCHEDULING_END_BLOCKING_REGION; + return error; +} diff --git a/src/core/lib/iomgr/lockfree_event.c b/src/core/lib/iomgr/lockfree_event.c deleted file mode 100644 index f967b22ba9..0000000000 --- a/src/core/lib/iomgr/lockfree_event.c +++ /dev/null @@ -1,241 +0,0 @@ -/* - * - * Copyright 2017 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/lockfree_event.h" - -#include - -#include "src/core/lib/debug/trace.h" - -extern grpc_tracer_flag grpc_polling_trace; - -/* 'state' holds the to call when the fd is readable or writable respectively. - It can contain one of the following values: - CLOSURE_READY : The fd has an I/O event of interest but there is no - closure yet to execute - - CLOSURE_NOT_READY : The fd has no I/O event of interest - - closure ptr : The closure to be executed when the fd has an I/O - event of interest - - shutdown_error | FD_SHUTDOWN_BIT : - 'shutdown_error' field ORed with FD_SHUTDOWN_BIT. - This indicates that the fd is shutdown. Since all - memory allocations are word-aligned, the lower two - bits of the shutdown_error pointer are always 0. So - it is safe to OR these with FD_SHUTDOWN_BIT - - Valid state transitions: - - <-----3------ CLOSURE_NOT_READY ----1----> CLOSURE_READY - | | ^ | ^ | | - | | | | | | | - | +--------------4----------+ 6 +---------2---------------+ | - | | | - | v | - +-----5-------> [shutdown_error | FD_SHUTDOWN_BIT] <----7---------+ - - For 1, 4 : See grpc_lfev_set_ready() function - For 2, 3 : See grpc_lfev_notify_on() function - For 5,6,7: See grpc_lfev_set_shutdown() function */ - -#define CLOSURE_NOT_READY ((gpr_atm)0) -#define CLOSURE_READY ((gpr_atm)2) - -#define FD_SHUTDOWN_BIT ((gpr_atm)1) - -void grpc_lfev_init(gpr_atm *state) { - gpr_atm_no_barrier_store(state, CLOSURE_NOT_READY); -} - -void grpc_lfev_destroy(gpr_atm *state) { - gpr_atm curr = gpr_atm_no_barrier_load(state); - if (curr & FD_SHUTDOWN_BIT) { - GRPC_ERROR_UNREF((grpc_error *)(curr & ~FD_SHUTDOWN_BIT)); - } else { - GPR_ASSERT(curr == CLOSURE_NOT_READY || curr == CLOSURE_READY); - } -} - -bool grpc_lfev_is_shutdown(gpr_atm *state) { - gpr_atm curr = gpr_atm_no_barrier_load(state); - return (curr & FD_SHUTDOWN_BIT) != 0; -} - -void grpc_lfev_notify_on(grpc_exec_ctx *exec_ctx, gpr_atm *state, - grpc_closure *closure, const char *variable) { - while (true) { - gpr_atm curr = gpr_atm_no_barrier_load(state); - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_ERROR, "lfev_notify_on[%s]: %p curr=%p closure=%p", variable, - state, (void *)curr, closure); - } - switch (curr) { - case CLOSURE_NOT_READY: { - /* CLOSURE_NOT_READY -> . - - We're guaranteed by API that there's an acquire barrier before here, - so there's no need to double-dip and this can be a release-only. - - The release itself pairs with the acquire half of a set_ready full - barrier. */ - if (gpr_atm_rel_cas(state, CLOSURE_NOT_READY, (gpr_atm)closure)) { - return; /* Successful. Return */ - } - - break; /* retry */ - } - - case CLOSURE_READY: { - /* Change the state to CLOSURE_NOT_READY. Schedule the closure if - successful. If not, the state most likely transitioned to shutdown. - We should retry. - - This can be a no-barrier cas since the state is being transitioned to - CLOSURE_NOT_READY; set_ready and set_shutdown do not schedule any - closure when transitioning out of CLOSURE_NO_READY state (i.e there - is no other code that needs to 'happen-after' this) */ - if (gpr_atm_no_barrier_cas(state, CLOSURE_READY, CLOSURE_NOT_READY)) { - GRPC_CLOSURE_SCHED(exec_ctx, closure, GRPC_ERROR_NONE); - return; /* Successful. Return */ - } - - break; /* retry */ - } - - default: { - /* 'curr' is either a closure or the fd is shutdown(in which case 'curr' - contains a pointer to the shutdown-error). If the fd is shutdown, - schedule the closure with the shutdown error */ - if ((curr & FD_SHUTDOWN_BIT) > 0) { - grpc_error *shutdown_err = (grpc_error *)(curr & ~FD_SHUTDOWN_BIT); - GRPC_CLOSURE_SCHED(exec_ctx, closure, - GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( - "FD Shutdown", &shutdown_err, 1)); - return; - } - - /* There is already a closure!. This indicates a bug in the code */ - gpr_log(GPR_ERROR, - "notify_on called with a previous callback still pending"); - abort(); - } - } - } - - GPR_UNREACHABLE_CODE(return ); -} - -bool grpc_lfev_set_shutdown(grpc_exec_ctx *exec_ctx, gpr_atm *state, - grpc_error *shutdown_err) { - gpr_atm new_state = (gpr_atm)shutdown_err | FD_SHUTDOWN_BIT; - - while (true) { - gpr_atm curr = gpr_atm_no_barrier_load(state); - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_ERROR, "lfev_set_shutdown: %p curr=%p err=%s", state, - (void *)curr, grpc_error_string(shutdown_err)); - } - switch (curr) { - case CLOSURE_READY: - case CLOSURE_NOT_READY: - /* Need a full barrier here so that the initial load in notify_on - doesn't need a barrier */ - if (gpr_atm_full_cas(state, curr, new_state)) { - return true; /* early out */ - } - break; /* retry */ - - default: { - /* 'curr' is either a closure or the fd is already shutdown */ - - /* If fd is already shutdown, we are done */ - if ((curr & FD_SHUTDOWN_BIT) > 0) { - GRPC_ERROR_UNREF(shutdown_err); - return false; - } - - /* Fd is not shutdown. Schedule the closure and move the state to - shutdown state. - Needs an acquire to pair with setting the closure (and get a - happens-after on that edge), and a release to pair with anything - loading the shutdown state. */ - if (gpr_atm_full_cas(state, curr, new_state)) { - GRPC_CLOSURE_SCHED(exec_ctx, (grpc_closure *)curr, - GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( - "FD Shutdown", &shutdown_err, 1)); - return true; - } - - /* 'curr' was a closure but now changed to a different state. We will - have to retry */ - break; - } - } - } - - GPR_UNREACHABLE_CODE(return false); -} - -void grpc_lfev_set_ready(grpc_exec_ctx *exec_ctx, gpr_atm *state, - const char *variable) { - while (true) { - gpr_atm curr = gpr_atm_no_barrier_load(state); - - if (GRPC_TRACER_ON(grpc_polling_trace)) { - gpr_log(GPR_ERROR, "lfev_set_ready[%s]: %p curr=%p", variable, state, - (void *)curr); - } - - switch (curr) { - case CLOSURE_READY: { - /* Already ready. We are done here */ - return; - } - - case CLOSURE_NOT_READY: { - /* No barrier required as we're transitioning to a state that does not - involve a closure */ - if (gpr_atm_no_barrier_cas(state, CLOSURE_NOT_READY, CLOSURE_READY)) { - return; /* early out */ - } - break; /* retry */ - } - - default: { - /* 'curr' is either a closure or the fd is shutdown */ - if ((curr & FD_SHUTDOWN_BIT) > 0) { - /* The fd is shutdown. Do nothing */ - return; - } - /* Full cas: acquire pairs with this cas' release in the event of a - spurious set_ready; release pairs with this or the acquire in - notify_on (or set_shutdown) */ - else if (gpr_atm_full_cas(state, curr, CLOSURE_NOT_READY)) { - GRPC_CLOSURE_SCHED(exec_ctx, (grpc_closure *)curr, GRPC_ERROR_NONE); - return; - } - /* else the state changed again (only possible by either a racing - set_ready or set_shutdown functions. In both these cases, the closure - would have been scheduled for execution. So we are done here */ - return; - } - } - } -} diff --git a/src/core/lib/iomgr/lockfree_event.cc b/src/core/lib/iomgr/lockfree_event.cc new file mode 100644 index 0000000000..f967b22ba9 --- /dev/null +++ b/src/core/lib/iomgr/lockfree_event.cc @@ -0,0 +1,241 @@ +/* + * + * Copyright 2017 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/lockfree_event.h" + +#include + +#include "src/core/lib/debug/trace.h" + +extern grpc_tracer_flag grpc_polling_trace; + +/* 'state' holds the to call when the fd is readable or writable respectively. + It can contain one of the following values: + CLOSURE_READY : The fd has an I/O event of interest but there is no + closure yet to execute + + CLOSURE_NOT_READY : The fd has no I/O event of interest + + closure ptr : The closure to be executed when the fd has an I/O + event of interest + + shutdown_error | FD_SHUTDOWN_BIT : + 'shutdown_error' field ORed with FD_SHUTDOWN_BIT. + This indicates that the fd is shutdown. Since all + memory allocations are word-aligned, the lower two + bits of the shutdown_error pointer are always 0. So + it is safe to OR these with FD_SHUTDOWN_BIT + + Valid state transitions: + + <-----3------ CLOSURE_NOT_READY ----1----> CLOSURE_READY + | | ^ | ^ | | + | | | | | | | + | +--------------4----------+ 6 +---------2---------------+ | + | | | + | v | + +-----5-------> [shutdown_error | FD_SHUTDOWN_BIT] <----7---------+ + + For 1, 4 : See grpc_lfev_set_ready() function + For 2, 3 : See grpc_lfev_notify_on() function + For 5,6,7: See grpc_lfev_set_shutdown() function */ + +#define CLOSURE_NOT_READY ((gpr_atm)0) +#define CLOSURE_READY ((gpr_atm)2) + +#define FD_SHUTDOWN_BIT ((gpr_atm)1) + +void grpc_lfev_init(gpr_atm *state) { + gpr_atm_no_barrier_store(state, CLOSURE_NOT_READY); +} + +void grpc_lfev_destroy(gpr_atm *state) { + gpr_atm curr = gpr_atm_no_barrier_load(state); + if (curr & FD_SHUTDOWN_BIT) { + GRPC_ERROR_UNREF((grpc_error *)(curr & ~FD_SHUTDOWN_BIT)); + } else { + GPR_ASSERT(curr == CLOSURE_NOT_READY || curr == CLOSURE_READY); + } +} + +bool grpc_lfev_is_shutdown(gpr_atm *state) { + gpr_atm curr = gpr_atm_no_barrier_load(state); + return (curr & FD_SHUTDOWN_BIT) != 0; +} + +void grpc_lfev_notify_on(grpc_exec_ctx *exec_ctx, gpr_atm *state, + grpc_closure *closure, const char *variable) { + while (true) { + gpr_atm curr = gpr_atm_no_barrier_load(state); + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_ERROR, "lfev_notify_on[%s]: %p curr=%p closure=%p", variable, + state, (void *)curr, closure); + } + switch (curr) { + case CLOSURE_NOT_READY: { + /* CLOSURE_NOT_READY -> . + + We're guaranteed by API that there's an acquire barrier before here, + so there's no need to double-dip and this can be a release-only. + + The release itself pairs with the acquire half of a set_ready full + barrier. */ + if (gpr_atm_rel_cas(state, CLOSURE_NOT_READY, (gpr_atm)closure)) { + return; /* Successful. Return */ + } + + break; /* retry */ + } + + case CLOSURE_READY: { + /* Change the state to CLOSURE_NOT_READY. Schedule the closure if + successful. If not, the state most likely transitioned to shutdown. + We should retry. + + This can be a no-barrier cas since the state is being transitioned to + CLOSURE_NOT_READY; set_ready and set_shutdown do not schedule any + closure when transitioning out of CLOSURE_NO_READY state (i.e there + is no other code that needs to 'happen-after' this) */ + if (gpr_atm_no_barrier_cas(state, CLOSURE_READY, CLOSURE_NOT_READY)) { + GRPC_CLOSURE_SCHED(exec_ctx, closure, GRPC_ERROR_NONE); + return; /* Successful. Return */ + } + + break; /* retry */ + } + + default: { + /* 'curr' is either a closure or the fd is shutdown(in which case 'curr' + contains a pointer to the shutdown-error). If the fd is shutdown, + schedule the closure with the shutdown error */ + if ((curr & FD_SHUTDOWN_BIT) > 0) { + grpc_error *shutdown_err = (grpc_error *)(curr & ~FD_SHUTDOWN_BIT); + GRPC_CLOSURE_SCHED(exec_ctx, closure, + GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( + "FD Shutdown", &shutdown_err, 1)); + return; + } + + /* There is already a closure!. This indicates a bug in the code */ + gpr_log(GPR_ERROR, + "notify_on called with a previous callback still pending"); + abort(); + } + } + } + + GPR_UNREACHABLE_CODE(return ); +} + +bool grpc_lfev_set_shutdown(grpc_exec_ctx *exec_ctx, gpr_atm *state, + grpc_error *shutdown_err) { + gpr_atm new_state = (gpr_atm)shutdown_err | FD_SHUTDOWN_BIT; + + while (true) { + gpr_atm curr = gpr_atm_no_barrier_load(state); + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_ERROR, "lfev_set_shutdown: %p curr=%p err=%s", state, + (void *)curr, grpc_error_string(shutdown_err)); + } + switch (curr) { + case CLOSURE_READY: + case CLOSURE_NOT_READY: + /* Need a full barrier here so that the initial load in notify_on + doesn't need a barrier */ + if (gpr_atm_full_cas(state, curr, new_state)) { + return true; /* early out */ + } + break; /* retry */ + + default: { + /* 'curr' is either a closure or the fd is already shutdown */ + + /* If fd is already shutdown, we are done */ + if ((curr & FD_SHUTDOWN_BIT) > 0) { + GRPC_ERROR_UNREF(shutdown_err); + return false; + } + + /* Fd is not shutdown. Schedule the closure and move the state to + shutdown state. + Needs an acquire to pair with setting the closure (and get a + happens-after on that edge), and a release to pair with anything + loading the shutdown state. */ + if (gpr_atm_full_cas(state, curr, new_state)) { + GRPC_CLOSURE_SCHED(exec_ctx, (grpc_closure *)curr, + GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( + "FD Shutdown", &shutdown_err, 1)); + return true; + } + + /* 'curr' was a closure but now changed to a different state. We will + have to retry */ + break; + } + } + } + + GPR_UNREACHABLE_CODE(return false); +} + +void grpc_lfev_set_ready(grpc_exec_ctx *exec_ctx, gpr_atm *state, + const char *variable) { + while (true) { + gpr_atm curr = gpr_atm_no_barrier_load(state); + + if (GRPC_TRACER_ON(grpc_polling_trace)) { + gpr_log(GPR_ERROR, "lfev_set_ready[%s]: %p curr=%p", variable, state, + (void *)curr); + } + + switch (curr) { + case CLOSURE_READY: { + /* Already ready. We are done here */ + return; + } + + case CLOSURE_NOT_READY: { + /* No barrier required as we're transitioning to a state that does not + involve a closure */ + if (gpr_atm_no_barrier_cas(state, CLOSURE_NOT_READY, CLOSURE_READY)) { + return; /* early out */ + } + break; /* retry */ + } + + default: { + /* 'curr' is either a closure or the fd is shutdown */ + if ((curr & FD_SHUTDOWN_BIT) > 0) { + /* The fd is shutdown. Do nothing */ + return; + } + /* Full cas: acquire pairs with this cas' release in the event of a + spurious set_ready; release pairs with this or the acquire in + notify_on (or set_shutdown) */ + else if (gpr_atm_full_cas(state, curr, CLOSURE_NOT_READY)) { + GRPC_CLOSURE_SCHED(exec_ctx, (grpc_closure *)curr, GRPC_ERROR_NONE); + return; + } + /* else the state changed again (only possible by either a racing + set_ready or set_shutdown functions. In both these cases, the closure + would have been scheduled for execution. So we are done here */ + return; + } + } + } +} diff --git a/src/core/lib/iomgr/network_status_tracker.c b/src/core/lib/iomgr/network_status_tracker.c deleted file mode 100644 index 57a7faa9f1..0000000000 --- a/src/core/lib/iomgr/network_status_tracker.c +++ /dev/null @@ -1,34 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/network_status_tracker.h" -#include "src/core/lib/iomgr/endpoint.h" - -void grpc_network_status_shutdown(void) {} - -void grpc_network_status_init(void) { - // TODO(makarandd): Install callback with OS to monitor network status. -} - -void grpc_destroy_network_status_monitor() {} - -void grpc_network_status_register_endpoint(grpc_endpoint *ep) { (void)ep; } - -void grpc_network_status_unregister_endpoint(grpc_endpoint *ep) { (void)ep; } - -void grpc_network_status_shutdown_all_endpoints() {} diff --git a/src/core/lib/iomgr/network_status_tracker.cc b/src/core/lib/iomgr/network_status_tracker.cc new file mode 100644 index 0000000000..57a7faa9f1 --- /dev/null +++ b/src/core/lib/iomgr/network_status_tracker.cc @@ -0,0 +1,34 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/network_status_tracker.h" +#include "src/core/lib/iomgr/endpoint.h" + +void grpc_network_status_shutdown(void) {} + +void grpc_network_status_init(void) { + // TODO(makarandd): Install callback with OS to monitor network status. +} + +void grpc_destroy_network_status_monitor() {} + +void grpc_network_status_register_endpoint(grpc_endpoint *ep) { (void)ep; } + +void grpc_network_status_unregister_endpoint(grpc_endpoint *ep) { (void)ep; } + +void grpc_network_status_shutdown_all_endpoints() {} diff --git a/src/core/lib/iomgr/polling_entity.c b/src/core/lib/iomgr/polling_entity.c deleted file mode 100644 index 8591a5518e..0000000000 --- a/src/core/lib/iomgr/polling_entity.c +++ /dev/null @@ -1,89 +0,0 @@ -/* - * - * Copyright 2016 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include -#include - -#include "src/core/lib/iomgr/polling_entity.h" - -grpc_polling_entity grpc_polling_entity_create_from_pollset_set( - grpc_pollset_set *pollset_set) { - grpc_polling_entity pollent; - pollent.pollent.pollset_set = pollset_set; - pollent.tag = GRPC_POLLS_POLLSET_SET; - return pollent; -} - -grpc_polling_entity grpc_polling_entity_create_from_pollset( - grpc_pollset *pollset) { - grpc_polling_entity pollent; - pollent.pollent.pollset = pollset; - pollent.tag = GRPC_POLLS_POLLSET; - return pollent; -} - -grpc_pollset *grpc_polling_entity_pollset(grpc_polling_entity *pollent) { - if (pollent->tag == GRPC_POLLS_POLLSET) { - return pollent->pollent.pollset; - } - return NULL; -} - -grpc_pollset_set *grpc_polling_entity_pollset_set( - grpc_polling_entity *pollent) { - if (pollent->tag == GRPC_POLLS_POLLSET_SET) { - return pollent->pollent.pollset_set; - } - return NULL; -} - -bool grpc_polling_entity_is_empty(const grpc_polling_entity *pollent) { - return pollent->tag == GRPC_POLLS_NONE; -} - -void grpc_polling_entity_add_to_pollset_set(grpc_exec_ctx *exec_ctx, - grpc_polling_entity *pollent, - grpc_pollset_set *pss_dst) { - if (pollent->tag == GRPC_POLLS_POLLSET) { - GPR_ASSERT(pollent->pollent.pollset != NULL); - grpc_pollset_set_add_pollset(exec_ctx, pss_dst, pollent->pollent.pollset); - } else if (pollent->tag == GRPC_POLLS_POLLSET_SET) { - GPR_ASSERT(pollent->pollent.pollset_set != NULL); - grpc_pollset_set_add_pollset_set(exec_ctx, pss_dst, - pollent->pollent.pollset_set); - } else { - gpr_log(GPR_ERROR, "Invalid grpc_polling_entity tag '%d'", pollent->tag); - abort(); - } -} - -void grpc_polling_entity_del_from_pollset_set(grpc_exec_ctx *exec_ctx, - grpc_polling_entity *pollent, - grpc_pollset_set *pss_dst) { - if (pollent->tag == GRPC_POLLS_POLLSET) { - GPR_ASSERT(pollent->pollent.pollset != NULL); - grpc_pollset_set_del_pollset(exec_ctx, pss_dst, pollent->pollent.pollset); - } else if (pollent->tag == GRPC_POLLS_POLLSET_SET) { - GPR_ASSERT(pollent->pollent.pollset_set != NULL); - grpc_pollset_set_del_pollset_set(exec_ctx, pss_dst, - pollent->pollent.pollset_set); - } else { - gpr_log(GPR_ERROR, "Invalid grpc_polling_entity tag '%d'", pollent->tag); - abort(); - } -} diff --git a/src/core/lib/iomgr/polling_entity.cc b/src/core/lib/iomgr/polling_entity.cc new file mode 100644 index 0000000000..8591a5518e --- /dev/null +++ b/src/core/lib/iomgr/polling_entity.cc @@ -0,0 +1,89 @@ +/* + * + * Copyright 2016 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include + +#include "src/core/lib/iomgr/polling_entity.h" + +grpc_polling_entity grpc_polling_entity_create_from_pollset_set( + grpc_pollset_set *pollset_set) { + grpc_polling_entity pollent; + pollent.pollent.pollset_set = pollset_set; + pollent.tag = GRPC_POLLS_POLLSET_SET; + return pollent; +} + +grpc_polling_entity grpc_polling_entity_create_from_pollset( + grpc_pollset *pollset) { + grpc_polling_entity pollent; + pollent.pollent.pollset = pollset; + pollent.tag = GRPC_POLLS_POLLSET; + return pollent; +} + +grpc_pollset *grpc_polling_entity_pollset(grpc_polling_entity *pollent) { + if (pollent->tag == GRPC_POLLS_POLLSET) { + return pollent->pollent.pollset; + } + return NULL; +} + +grpc_pollset_set *grpc_polling_entity_pollset_set( + grpc_polling_entity *pollent) { + if (pollent->tag == GRPC_POLLS_POLLSET_SET) { + return pollent->pollent.pollset_set; + } + return NULL; +} + +bool grpc_polling_entity_is_empty(const grpc_polling_entity *pollent) { + return pollent->tag == GRPC_POLLS_NONE; +} + +void grpc_polling_entity_add_to_pollset_set(grpc_exec_ctx *exec_ctx, + grpc_polling_entity *pollent, + grpc_pollset_set *pss_dst) { + if (pollent->tag == GRPC_POLLS_POLLSET) { + GPR_ASSERT(pollent->pollent.pollset != NULL); + grpc_pollset_set_add_pollset(exec_ctx, pss_dst, pollent->pollent.pollset); + } else if (pollent->tag == GRPC_POLLS_POLLSET_SET) { + GPR_ASSERT(pollent->pollent.pollset_set != NULL); + grpc_pollset_set_add_pollset_set(exec_ctx, pss_dst, + pollent->pollent.pollset_set); + } else { + gpr_log(GPR_ERROR, "Invalid grpc_polling_entity tag '%d'", pollent->tag); + abort(); + } +} + +void grpc_polling_entity_del_from_pollset_set(grpc_exec_ctx *exec_ctx, + grpc_polling_entity *pollent, + grpc_pollset_set *pss_dst) { + if (pollent->tag == GRPC_POLLS_POLLSET) { + GPR_ASSERT(pollent->pollent.pollset != NULL); + grpc_pollset_set_del_pollset(exec_ctx, pss_dst, pollent->pollent.pollset); + } else if (pollent->tag == GRPC_POLLS_POLLSET_SET) { + GPR_ASSERT(pollent->pollent.pollset_set != NULL); + grpc_pollset_set_del_pollset_set(exec_ctx, pss_dst, + pollent->pollent.pollset_set); + } else { + gpr_log(GPR_ERROR, "Invalid grpc_polling_entity tag '%d'", pollent->tag); + abort(); + } +} diff --git a/src/core/lib/iomgr/pollset_set_uv.c b/src/core/lib/iomgr/pollset_set_uv.c deleted file mode 100644 index 90186edbb7..0000000000 --- a/src/core/lib/iomgr/pollset_set_uv.c +++ /dev/null @@ -1,48 +0,0 @@ -/* - * - * Copyright 2016 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_UV - -#include "src/core/lib/iomgr/pollset_set.h" - -grpc_pollset_set* grpc_pollset_set_create(void) { - return (grpc_pollset_set*)((intptr_t)0xdeafbeef); -} - -void grpc_pollset_set_destroy(grpc_exec_ctx* exec_ctx, - grpc_pollset_set* pollset_set) {} - -void grpc_pollset_set_add_pollset(grpc_exec_ctx* exec_ctx, - grpc_pollset_set* pollset_set, - grpc_pollset* pollset) {} - -void grpc_pollset_set_del_pollset(grpc_exec_ctx* exec_ctx, - grpc_pollset_set* pollset_set, - grpc_pollset* pollset) {} - -void grpc_pollset_set_add_pollset_set(grpc_exec_ctx* exec_ctx, - grpc_pollset_set* bag, - grpc_pollset_set* item) {} - -void grpc_pollset_set_del_pollset_set(grpc_exec_ctx* exec_ctx, - grpc_pollset_set* bag, - grpc_pollset_set* item) {} - -#endif /* GRPC_UV */ diff --git a/src/core/lib/iomgr/pollset_set_uv.cc b/src/core/lib/iomgr/pollset_set_uv.cc new file mode 100644 index 0000000000..90186edbb7 --- /dev/null +++ b/src/core/lib/iomgr/pollset_set_uv.cc @@ -0,0 +1,48 @@ +/* + * + * Copyright 2016 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_UV + +#include "src/core/lib/iomgr/pollset_set.h" + +grpc_pollset_set* grpc_pollset_set_create(void) { + return (grpc_pollset_set*)((intptr_t)0xdeafbeef); +} + +void grpc_pollset_set_destroy(grpc_exec_ctx* exec_ctx, + grpc_pollset_set* pollset_set) {} + +void grpc_pollset_set_add_pollset(grpc_exec_ctx* exec_ctx, + grpc_pollset_set* pollset_set, + grpc_pollset* pollset) {} + +void grpc_pollset_set_del_pollset(grpc_exec_ctx* exec_ctx, + grpc_pollset_set* pollset_set, + grpc_pollset* pollset) {} + +void grpc_pollset_set_add_pollset_set(grpc_exec_ctx* exec_ctx, + grpc_pollset_set* bag, + grpc_pollset_set* item) {} + +void grpc_pollset_set_del_pollset_set(grpc_exec_ctx* exec_ctx, + grpc_pollset_set* bag, + grpc_pollset_set* item) {} + +#endif /* GRPC_UV */ diff --git a/src/core/lib/iomgr/pollset_set_windows.c b/src/core/lib/iomgr/pollset_set_windows.c deleted file mode 100644 index 2105a47ad4..0000000000 --- a/src/core/lib/iomgr/pollset_set_windows.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_WINSOCK_SOCKET - -#include "src/core/lib/iomgr/pollset_set_windows.h" - -grpc_pollset_set* grpc_pollset_set_create(void) { - return (grpc_pollset_set*)((intptr_t)0xdeafbeef); -} - -void grpc_pollset_set_destroy(grpc_exec_ctx* exec_ctx, - grpc_pollset_set* pollset_set) {} - -void grpc_pollset_set_add_pollset(grpc_exec_ctx* exec_ctx, - grpc_pollset_set* pollset_set, - grpc_pollset* pollset) {} - -void grpc_pollset_set_del_pollset(grpc_exec_ctx* exec_ctx, - grpc_pollset_set* pollset_set, - grpc_pollset* pollset) {} - -void grpc_pollset_set_add_pollset_set(grpc_exec_ctx* exec_ctx, - grpc_pollset_set* bag, - grpc_pollset_set* item) {} - -void grpc_pollset_set_del_pollset_set(grpc_exec_ctx* exec_ctx, - grpc_pollset_set* bag, - grpc_pollset_set* item) {} - -#endif /* GRPC_WINSOCK_SOCKET */ diff --git a/src/core/lib/iomgr/pollset_set_windows.cc b/src/core/lib/iomgr/pollset_set_windows.cc new file mode 100644 index 0000000000..2105a47ad4 --- /dev/null +++ b/src/core/lib/iomgr/pollset_set_windows.cc @@ -0,0 +1,49 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_WINSOCK_SOCKET + +#include "src/core/lib/iomgr/pollset_set_windows.h" + +grpc_pollset_set* grpc_pollset_set_create(void) { + return (grpc_pollset_set*)((intptr_t)0xdeafbeef); +} + +void grpc_pollset_set_destroy(grpc_exec_ctx* exec_ctx, + grpc_pollset_set* pollset_set) {} + +void grpc_pollset_set_add_pollset(grpc_exec_ctx* exec_ctx, + grpc_pollset_set* pollset_set, + grpc_pollset* pollset) {} + +void grpc_pollset_set_del_pollset(grpc_exec_ctx* exec_ctx, + grpc_pollset_set* pollset_set, + grpc_pollset* pollset) {} + +void grpc_pollset_set_add_pollset_set(grpc_exec_ctx* exec_ctx, + grpc_pollset_set* bag, + grpc_pollset_set* item) {} + +void grpc_pollset_set_del_pollset_set(grpc_exec_ctx* exec_ctx, + grpc_pollset_set* bag, + grpc_pollset_set* item) {} + +#endif /* GRPC_WINSOCK_SOCKET */ diff --git a/src/core/lib/iomgr/pollset_uv.c b/src/core/lib/iomgr/pollset_uv.c deleted file mode 100644 index 2651325e25..0000000000 --- a/src/core/lib/iomgr/pollset_uv.c +++ /dev/null @@ -1,155 +0,0 @@ -/* - * - * Copyright 2016 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_UV - -#include - -#include - -#include -#include -#include - -#include "src/core/lib/iomgr/iomgr_uv.h" -#include "src/core/lib/iomgr/pollset.h" -#include "src/core/lib/iomgr/pollset_uv.h" - -#include "src/core/lib/debug/trace.h" - -#ifndef NDEBUG -grpc_tracer_flag grpc_trace_fd_refcount = - GRPC_TRACER_INITIALIZER(false, "fd_refcount"); -#endif - -struct grpc_pollset { - uv_timer_t timer; - int shutting_down; -}; - -/* Indicates that grpc_pollset_work should run an iteration of the UV loop - before running callbacks. This defaults to 1, and should be disabled if - grpc_pollset_work will be called within the callstack of uv_run */ -int grpc_pollset_work_run_loop; - -gpr_mu grpc_polling_mu; - -/* This is used solely to kick the uv loop, by setting a callback to be run - immediately in the next loop iteration. - Note: In the future, if there is a bug that involves missing wakeups in the - future, try adding a uv_async_t to kick the loop differently */ -uv_timer_t *dummy_uv_handle; - -size_t grpc_pollset_size() { return sizeof(grpc_pollset); } - -void dummy_timer_cb(uv_timer_t *handle) {} - -void dummy_handle_close_cb(uv_handle_t *handle) { gpr_free(handle); } - -void grpc_pollset_global_init(void) { - gpr_mu_init(&grpc_polling_mu); - dummy_uv_handle = gpr_malloc(sizeof(uv_timer_t)); - uv_timer_init(uv_default_loop(), dummy_uv_handle); - grpc_pollset_work_run_loop = 1; -} - -void grpc_pollset_global_shutdown(void) { - GRPC_UV_ASSERT_SAME_THREAD(); - gpr_mu_destroy(&grpc_polling_mu); - uv_close((uv_handle_t *)dummy_uv_handle, dummy_handle_close_cb); -} - -static void timer_run_cb(uv_timer_t *timer) {} - -static void timer_close_cb(uv_handle_t *handle) { handle->data = (void *)1; } - -void grpc_pollset_init(grpc_pollset *pollset, gpr_mu **mu) { - GRPC_UV_ASSERT_SAME_THREAD(); - *mu = &grpc_polling_mu; - uv_timer_init(uv_default_loop(), &pollset->timer); - pollset->shutting_down = 0; -} - -void grpc_pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_closure *closure) { - GPR_ASSERT(!pollset->shutting_down); - GRPC_UV_ASSERT_SAME_THREAD(); - pollset->shutting_down = 1; - if (grpc_pollset_work_run_loop) { - // Drain any pending UV callbacks without blocking - uv_run(uv_default_loop(), UV_RUN_NOWAIT); - } else { - // kick the loop once - uv_timer_start(dummy_uv_handle, dummy_timer_cb, 0, 0); - } - GRPC_CLOSURE_SCHED(exec_ctx, closure, GRPC_ERROR_NONE); -} - -void grpc_pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) { - GRPC_UV_ASSERT_SAME_THREAD(); - uv_close((uv_handle_t *)&pollset->timer, timer_close_cb); - // timer.data is a boolean indicating that the timer has finished closing - pollset->timer.data = (void *)0; - if (grpc_pollset_work_run_loop) { - while (!pollset->timer.data) { - uv_run(uv_default_loop(), UV_RUN_NOWAIT); - } - } -} - -grpc_error *grpc_pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_pollset_worker **worker_hdl, - gpr_timespec now, gpr_timespec deadline) { - uint64_t timeout; - GRPC_UV_ASSERT_SAME_THREAD(); - gpr_mu_unlock(&grpc_polling_mu); - if (grpc_pollset_work_run_loop) { - if (gpr_time_cmp(deadline, now) >= 0) { - timeout = (uint64_t)gpr_time_to_millis(gpr_time_sub(deadline, now)); - } else { - timeout = 0; - } - /* We special-case timeout=0 so that we don't bother with the timer when - the loop won't block anyway */ - if (timeout > 0) { - uv_timer_start(&pollset->timer, timer_run_cb, timeout, 0); - /* Run until there is some I/O activity or the timer triggers. It doesn't - matter which happens */ - uv_run(uv_default_loop(), UV_RUN_ONCE); - uv_timer_stop(&pollset->timer); - } else { - uv_run(uv_default_loop(), UV_RUN_NOWAIT); - } - } - if (!grpc_closure_list_empty(exec_ctx->closure_list)) { - grpc_exec_ctx_flush(exec_ctx); - } - gpr_mu_lock(&grpc_polling_mu); - return GRPC_ERROR_NONE; -} - -grpc_error *grpc_pollset_kick(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_pollset_worker *specific_worker) { - GRPC_UV_ASSERT_SAME_THREAD(); - uv_timer_start(dummy_uv_handle, dummy_timer_cb, 0, 0); - return GRPC_ERROR_NONE; -} - -#endif /* GRPC_UV */ diff --git a/src/core/lib/iomgr/pollset_uv.cc b/src/core/lib/iomgr/pollset_uv.cc new file mode 100644 index 0000000000..2651325e25 --- /dev/null +++ b/src/core/lib/iomgr/pollset_uv.cc @@ -0,0 +1,155 @@ +/* + * + * Copyright 2016 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_UV + +#include + +#include + +#include +#include +#include + +#include "src/core/lib/iomgr/iomgr_uv.h" +#include "src/core/lib/iomgr/pollset.h" +#include "src/core/lib/iomgr/pollset_uv.h" + +#include "src/core/lib/debug/trace.h" + +#ifndef NDEBUG +grpc_tracer_flag grpc_trace_fd_refcount = + GRPC_TRACER_INITIALIZER(false, "fd_refcount"); +#endif + +struct grpc_pollset { + uv_timer_t timer; + int shutting_down; +}; + +/* Indicates that grpc_pollset_work should run an iteration of the UV loop + before running callbacks. This defaults to 1, and should be disabled if + grpc_pollset_work will be called within the callstack of uv_run */ +int grpc_pollset_work_run_loop; + +gpr_mu grpc_polling_mu; + +/* This is used solely to kick the uv loop, by setting a callback to be run + immediately in the next loop iteration. + Note: In the future, if there is a bug that involves missing wakeups in the + future, try adding a uv_async_t to kick the loop differently */ +uv_timer_t *dummy_uv_handle; + +size_t grpc_pollset_size() { return sizeof(grpc_pollset); } + +void dummy_timer_cb(uv_timer_t *handle) {} + +void dummy_handle_close_cb(uv_handle_t *handle) { gpr_free(handle); } + +void grpc_pollset_global_init(void) { + gpr_mu_init(&grpc_polling_mu); + dummy_uv_handle = gpr_malloc(sizeof(uv_timer_t)); + uv_timer_init(uv_default_loop(), dummy_uv_handle); + grpc_pollset_work_run_loop = 1; +} + +void grpc_pollset_global_shutdown(void) { + GRPC_UV_ASSERT_SAME_THREAD(); + gpr_mu_destroy(&grpc_polling_mu); + uv_close((uv_handle_t *)dummy_uv_handle, dummy_handle_close_cb); +} + +static void timer_run_cb(uv_timer_t *timer) {} + +static void timer_close_cb(uv_handle_t *handle) { handle->data = (void *)1; } + +void grpc_pollset_init(grpc_pollset *pollset, gpr_mu **mu) { + GRPC_UV_ASSERT_SAME_THREAD(); + *mu = &grpc_polling_mu; + uv_timer_init(uv_default_loop(), &pollset->timer); + pollset->shutting_down = 0; +} + +void grpc_pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_closure *closure) { + GPR_ASSERT(!pollset->shutting_down); + GRPC_UV_ASSERT_SAME_THREAD(); + pollset->shutting_down = 1; + if (grpc_pollset_work_run_loop) { + // Drain any pending UV callbacks without blocking + uv_run(uv_default_loop(), UV_RUN_NOWAIT); + } else { + // kick the loop once + uv_timer_start(dummy_uv_handle, dummy_timer_cb, 0, 0); + } + GRPC_CLOSURE_SCHED(exec_ctx, closure, GRPC_ERROR_NONE); +} + +void grpc_pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) { + GRPC_UV_ASSERT_SAME_THREAD(); + uv_close((uv_handle_t *)&pollset->timer, timer_close_cb); + // timer.data is a boolean indicating that the timer has finished closing + pollset->timer.data = (void *)0; + if (grpc_pollset_work_run_loop) { + while (!pollset->timer.data) { + uv_run(uv_default_loop(), UV_RUN_NOWAIT); + } + } +} + +grpc_error *grpc_pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_pollset_worker **worker_hdl, + gpr_timespec now, gpr_timespec deadline) { + uint64_t timeout; + GRPC_UV_ASSERT_SAME_THREAD(); + gpr_mu_unlock(&grpc_polling_mu); + if (grpc_pollset_work_run_loop) { + if (gpr_time_cmp(deadline, now) >= 0) { + timeout = (uint64_t)gpr_time_to_millis(gpr_time_sub(deadline, now)); + } else { + timeout = 0; + } + /* We special-case timeout=0 so that we don't bother with the timer when + the loop won't block anyway */ + if (timeout > 0) { + uv_timer_start(&pollset->timer, timer_run_cb, timeout, 0); + /* Run until there is some I/O activity or the timer triggers. It doesn't + matter which happens */ + uv_run(uv_default_loop(), UV_RUN_ONCE); + uv_timer_stop(&pollset->timer); + } else { + uv_run(uv_default_loop(), UV_RUN_NOWAIT); + } + } + if (!grpc_closure_list_empty(exec_ctx->closure_list)) { + grpc_exec_ctx_flush(exec_ctx); + } + gpr_mu_lock(&grpc_polling_mu); + return GRPC_ERROR_NONE; +} + +grpc_error *grpc_pollset_kick(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_pollset_worker *specific_worker) { + GRPC_UV_ASSERT_SAME_THREAD(); + uv_timer_start(dummy_uv_handle, dummy_timer_cb, 0, 0); + return GRPC_ERROR_NONE; +} + +#endif /* GRPC_UV */ diff --git a/src/core/lib/iomgr/pollset_windows.c b/src/core/lib/iomgr/pollset_windows.c deleted file mode 100644 index eb295d3eeb..0000000000 --- a/src/core/lib/iomgr/pollset_windows.c +++ /dev/null @@ -1,222 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_WINSOCK_SOCKET - -#include -#include - -#include "src/core/lib/iomgr/iocp_windows.h" -#include "src/core/lib/iomgr/iomgr_internal.h" -#include "src/core/lib/iomgr/pollset.h" -#include "src/core/lib/iomgr/pollset_windows.h" - -#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker *)1) - -#ifndef NDEBUG -grpc_tracer_flag grpc_trace_fd_refcount = - GRPC_TRACER_INITIALIZER(false, "fd_refcount"); -#endif - -gpr_mu grpc_polling_mu; -static grpc_pollset_worker *g_active_poller; -static grpc_pollset_worker g_global_root_worker; - -void grpc_pollset_global_init(void) { - gpr_mu_init(&grpc_polling_mu); - g_active_poller = NULL; - g_global_root_worker.links[GRPC_POLLSET_WORKER_LINK_GLOBAL].next = - g_global_root_worker.links[GRPC_POLLSET_WORKER_LINK_GLOBAL].prev = - &g_global_root_worker; -} - -void grpc_pollset_global_shutdown(void) { gpr_mu_destroy(&grpc_polling_mu); } - -static void remove_worker(grpc_pollset_worker *worker, - grpc_pollset_worker_link_type type) { - worker->links[type].prev->links[type].next = worker->links[type].next; - worker->links[type].next->links[type].prev = worker->links[type].prev; - worker->links[type].next = worker->links[type].prev = worker; -} - -static int has_workers(grpc_pollset_worker *root, - grpc_pollset_worker_link_type type) { - return root->links[type].next != root; -} - -static grpc_pollset_worker *pop_front_worker( - grpc_pollset_worker *root, grpc_pollset_worker_link_type type) { - if (has_workers(root, type)) { - grpc_pollset_worker *w = root->links[type].next; - remove_worker(w, type); - return w; - } else { - return NULL; - } -} - -static void push_front_worker(grpc_pollset_worker *root, - grpc_pollset_worker_link_type type, - grpc_pollset_worker *worker) { - worker->links[type].prev = root; - worker->links[type].next = worker->links[type].prev->links[type].next; - worker->links[type].prev->links[type].next = - worker->links[type].next->links[type].prev = worker; -} - -size_t grpc_pollset_size(void) { return sizeof(grpc_pollset); } - -/* There isn't really any such thing as a pollset under Windows, due to the - nature of the IO completion ports. We're still going to provide a minimal - set of features for the sake of the rest of grpc. But grpc_pollset_work - won't actually do any polling, and return as quickly as possible. */ - -void grpc_pollset_init(grpc_pollset *pollset, gpr_mu **mu) { - *mu = &grpc_polling_mu; - pollset->root_worker.links[GRPC_POLLSET_WORKER_LINK_POLLSET].next = - pollset->root_worker.links[GRPC_POLLSET_WORKER_LINK_POLLSET].prev = - &pollset->root_worker; -} - -void grpc_pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_closure *closure) { - pollset->shutting_down = 1; - grpc_pollset_kick(exec_ctx, pollset, GRPC_POLLSET_KICK_BROADCAST); - if (!pollset->is_iocp_worker) { - GRPC_CLOSURE_SCHED(exec_ctx, closure, GRPC_ERROR_NONE); - } else { - pollset->on_shutdown = closure; - } -} - -void grpc_pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) {} - -grpc_error *grpc_pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, - grpc_pollset_worker **worker_hdl, - gpr_timespec now, gpr_timespec deadline) { - grpc_pollset_worker worker; - if (worker_hdl) *worker_hdl = &worker; - - int added_worker = 0; - worker.links[GRPC_POLLSET_WORKER_LINK_POLLSET].next = - worker.links[GRPC_POLLSET_WORKER_LINK_POLLSET].prev = - worker.links[GRPC_POLLSET_WORKER_LINK_GLOBAL].next = - worker.links[GRPC_POLLSET_WORKER_LINK_GLOBAL].prev = NULL; - worker.kicked = 0; - worker.pollset = pollset; - gpr_cv_init(&worker.cv); - if (!pollset->kicked_without_pollers && !pollset->shutting_down) { - if (g_active_poller == NULL) { - grpc_pollset_worker *next_worker; - /* become poller */ - pollset->is_iocp_worker = 1; - g_active_poller = &worker; - gpr_mu_unlock(&grpc_polling_mu); - grpc_iocp_work(exec_ctx, deadline); - grpc_exec_ctx_flush(exec_ctx); - gpr_mu_lock(&grpc_polling_mu); - pollset->is_iocp_worker = 0; - g_active_poller = NULL; - /* try to get a worker from this pollsets worker list */ - next_worker = pop_front_worker(&pollset->root_worker, - GRPC_POLLSET_WORKER_LINK_POLLSET); - if (next_worker == NULL) { - /* try to get a worker from the global list */ - next_worker = pop_front_worker(&g_global_root_worker, - GRPC_POLLSET_WORKER_LINK_GLOBAL); - } - if (next_worker != NULL) { - next_worker->kicked = 1; - gpr_cv_signal(&next_worker->cv); - } - - if (pollset->shutting_down && pollset->on_shutdown != NULL) { - GRPC_CLOSURE_SCHED(exec_ctx, pollset->on_shutdown, GRPC_ERROR_NONE); - pollset->on_shutdown = NULL; - } - goto done; - } - push_front_worker(&g_global_root_worker, GRPC_POLLSET_WORKER_LINK_GLOBAL, - &worker); - push_front_worker(&pollset->root_worker, GRPC_POLLSET_WORKER_LINK_POLLSET, - &worker); - added_worker = 1; - while (!worker.kicked) { - if (gpr_cv_wait(&worker.cv, &grpc_polling_mu, deadline)) { - break; - } - } - } else { - pollset->kicked_without_pollers = 0; - } -done: - if (!grpc_closure_list_empty(exec_ctx->closure_list)) { - gpr_mu_unlock(&grpc_polling_mu); - grpc_exec_ctx_flush(exec_ctx); - gpr_mu_lock(&grpc_polling_mu); - } - if (added_worker) { - remove_worker(&worker, GRPC_POLLSET_WORKER_LINK_GLOBAL); - remove_worker(&worker, GRPC_POLLSET_WORKER_LINK_POLLSET); - } - gpr_cv_destroy(&worker.cv); - if (worker_hdl) *worker_hdl = NULL; - return GRPC_ERROR_NONE; -} - -grpc_error *grpc_pollset_kick(grpc_exec_ctx *exec_ctx, grpc_pollset *p, - grpc_pollset_worker *specific_worker) { - if (specific_worker != NULL) { - if (specific_worker == GRPC_POLLSET_KICK_BROADCAST) { - for (specific_worker = - p->root_worker.links[GRPC_POLLSET_WORKER_LINK_POLLSET].next; - specific_worker != &p->root_worker; - specific_worker = - specific_worker->links[GRPC_POLLSET_WORKER_LINK_POLLSET].next) { - specific_worker->kicked = 1; - gpr_cv_signal(&specific_worker->cv); - } - p->kicked_without_pollers = 1; - if (p->is_iocp_worker) { - grpc_iocp_kick(); - } - } else { - if (p->is_iocp_worker && g_active_poller == specific_worker) { - grpc_iocp_kick(); - } else { - specific_worker->kicked = 1; - gpr_cv_signal(&specific_worker->cv); - } - } - } else { - specific_worker = - pop_front_worker(&p->root_worker, GRPC_POLLSET_WORKER_LINK_POLLSET); - if (specific_worker != NULL) { - grpc_pollset_kick(exec_ctx, p, specific_worker); - } else if (p->is_iocp_worker) { - grpc_iocp_kick(); - } else { - p->kicked_without_pollers = 1; - } - } - return GRPC_ERROR_NONE; -} - -#endif /* GRPC_WINSOCK_SOCKET */ diff --git a/src/core/lib/iomgr/pollset_windows.cc b/src/core/lib/iomgr/pollset_windows.cc new file mode 100644 index 0000000000..eb295d3eeb --- /dev/null +++ b/src/core/lib/iomgr/pollset_windows.cc @@ -0,0 +1,222 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_WINSOCK_SOCKET + +#include +#include + +#include "src/core/lib/iomgr/iocp_windows.h" +#include "src/core/lib/iomgr/iomgr_internal.h" +#include "src/core/lib/iomgr/pollset.h" +#include "src/core/lib/iomgr/pollset_windows.h" + +#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker *)1) + +#ifndef NDEBUG +grpc_tracer_flag grpc_trace_fd_refcount = + GRPC_TRACER_INITIALIZER(false, "fd_refcount"); +#endif + +gpr_mu grpc_polling_mu; +static grpc_pollset_worker *g_active_poller; +static grpc_pollset_worker g_global_root_worker; + +void grpc_pollset_global_init(void) { + gpr_mu_init(&grpc_polling_mu); + g_active_poller = NULL; + g_global_root_worker.links[GRPC_POLLSET_WORKER_LINK_GLOBAL].next = + g_global_root_worker.links[GRPC_POLLSET_WORKER_LINK_GLOBAL].prev = + &g_global_root_worker; +} + +void grpc_pollset_global_shutdown(void) { gpr_mu_destroy(&grpc_polling_mu); } + +static void remove_worker(grpc_pollset_worker *worker, + grpc_pollset_worker_link_type type) { + worker->links[type].prev->links[type].next = worker->links[type].next; + worker->links[type].next->links[type].prev = worker->links[type].prev; + worker->links[type].next = worker->links[type].prev = worker; +} + +static int has_workers(grpc_pollset_worker *root, + grpc_pollset_worker_link_type type) { + return root->links[type].next != root; +} + +static grpc_pollset_worker *pop_front_worker( + grpc_pollset_worker *root, grpc_pollset_worker_link_type type) { + if (has_workers(root, type)) { + grpc_pollset_worker *w = root->links[type].next; + remove_worker(w, type); + return w; + } else { + return NULL; + } +} + +static void push_front_worker(grpc_pollset_worker *root, + grpc_pollset_worker_link_type type, + grpc_pollset_worker *worker) { + worker->links[type].prev = root; + worker->links[type].next = worker->links[type].prev->links[type].next; + worker->links[type].prev->links[type].next = + worker->links[type].next->links[type].prev = worker; +} + +size_t grpc_pollset_size(void) { return sizeof(grpc_pollset); } + +/* There isn't really any such thing as a pollset under Windows, due to the + nature of the IO completion ports. We're still going to provide a minimal + set of features for the sake of the rest of grpc. But grpc_pollset_work + won't actually do any polling, and return as quickly as possible. */ + +void grpc_pollset_init(grpc_pollset *pollset, gpr_mu **mu) { + *mu = &grpc_polling_mu; + pollset->root_worker.links[GRPC_POLLSET_WORKER_LINK_POLLSET].next = + pollset->root_worker.links[GRPC_POLLSET_WORKER_LINK_POLLSET].prev = + &pollset->root_worker; +} + +void grpc_pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_closure *closure) { + pollset->shutting_down = 1; + grpc_pollset_kick(exec_ctx, pollset, GRPC_POLLSET_KICK_BROADCAST); + if (!pollset->is_iocp_worker) { + GRPC_CLOSURE_SCHED(exec_ctx, closure, GRPC_ERROR_NONE); + } else { + pollset->on_shutdown = closure; + } +} + +void grpc_pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) {} + +grpc_error *grpc_pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, + grpc_pollset_worker **worker_hdl, + gpr_timespec now, gpr_timespec deadline) { + grpc_pollset_worker worker; + if (worker_hdl) *worker_hdl = &worker; + + int added_worker = 0; + worker.links[GRPC_POLLSET_WORKER_LINK_POLLSET].next = + worker.links[GRPC_POLLSET_WORKER_LINK_POLLSET].prev = + worker.links[GRPC_POLLSET_WORKER_LINK_GLOBAL].next = + worker.links[GRPC_POLLSET_WORKER_LINK_GLOBAL].prev = NULL; + worker.kicked = 0; + worker.pollset = pollset; + gpr_cv_init(&worker.cv); + if (!pollset->kicked_without_pollers && !pollset->shutting_down) { + if (g_active_poller == NULL) { + grpc_pollset_worker *next_worker; + /* become poller */ + pollset->is_iocp_worker = 1; + g_active_poller = &worker; + gpr_mu_unlock(&grpc_polling_mu); + grpc_iocp_work(exec_ctx, deadline); + grpc_exec_ctx_flush(exec_ctx); + gpr_mu_lock(&grpc_polling_mu); + pollset->is_iocp_worker = 0; + g_active_poller = NULL; + /* try to get a worker from this pollsets worker list */ + next_worker = pop_front_worker(&pollset->root_worker, + GRPC_POLLSET_WORKER_LINK_POLLSET); + if (next_worker == NULL) { + /* try to get a worker from the global list */ + next_worker = pop_front_worker(&g_global_root_worker, + GRPC_POLLSET_WORKER_LINK_GLOBAL); + } + if (next_worker != NULL) { + next_worker->kicked = 1; + gpr_cv_signal(&next_worker->cv); + } + + if (pollset->shutting_down && pollset->on_shutdown != NULL) { + GRPC_CLOSURE_SCHED(exec_ctx, pollset->on_shutdown, GRPC_ERROR_NONE); + pollset->on_shutdown = NULL; + } + goto done; + } + push_front_worker(&g_global_root_worker, GRPC_POLLSET_WORKER_LINK_GLOBAL, + &worker); + push_front_worker(&pollset->root_worker, GRPC_POLLSET_WORKER_LINK_POLLSET, + &worker); + added_worker = 1; + while (!worker.kicked) { + if (gpr_cv_wait(&worker.cv, &grpc_polling_mu, deadline)) { + break; + } + } + } else { + pollset->kicked_without_pollers = 0; + } +done: + if (!grpc_closure_list_empty(exec_ctx->closure_list)) { + gpr_mu_unlock(&grpc_polling_mu); + grpc_exec_ctx_flush(exec_ctx); + gpr_mu_lock(&grpc_polling_mu); + } + if (added_worker) { + remove_worker(&worker, GRPC_POLLSET_WORKER_LINK_GLOBAL); + remove_worker(&worker, GRPC_POLLSET_WORKER_LINK_POLLSET); + } + gpr_cv_destroy(&worker.cv); + if (worker_hdl) *worker_hdl = NULL; + return GRPC_ERROR_NONE; +} + +grpc_error *grpc_pollset_kick(grpc_exec_ctx *exec_ctx, grpc_pollset *p, + grpc_pollset_worker *specific_worker) { + if (specific_worker != NULL) { + if (specific_worker == GRPC_POLLSET_KICK_BROADCAST) { + for (specific_worker = + p->root_worker.links[GRPC_POLLSET_WORKER_LINK_POLLSET].next; + specific_worker != &p->root_worker; + specific_worker = + specific_worker->links[GRPC_POLLSET_WORKER_LINK_POLLSET].next) { + specific_worker->kicked = 1; + gpr_cv_signal(&specific_worker->cv); + } + p->kicked_without_pollers = 1; + if (p->is_iocp_worker) { + grpc_iocp_kick(); + } + } else { + if (p->is_iocp_worker && g_active_poller == specific_worker) { + grpc_iocp_kick(); + } else { + specific_worker->kicked = 1; + gpr_cv_signal(&specific_worker->cv); + } + } + } else { + specific_worker = + pop_front_worker(&p->root_worker, GRPC_POLLSET_WORKER_LINK_POLLSET); + if (specific_worker != NULL) { + grpc_pollset_kick(exec_ctx, p, specific_worker); + } else if (p->is_iocp_worker) { + grpc_iocp_kick(); + } else { + p->kicked_without_pollers = 1; + } + } + return GRPC_ERROR_NONE; +} + +#endif /* GRPC_WINSOCK_SOCKET */ diff --git a/src/core/lib/iomgr/resolve_address_posix.c b/src/core/lib/iomgr/resolve_address_posix.c deleted file mode 100644 index 60cfeebd47..0000000000 --- a/src/core/lib/iomgr/resolve_address_posix.c +++ /dev/null @@ -1,193 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" -#ifdef GRPC_POSIX_SOCKET - -#include "src/core/lib/iomgr/sockaddr.h" - -#include "src/core/lib/iomgr/resolve_address.h" - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include "src/core/lib/iomgr/executor.h" -#include "src/core/lib/iomgr/iomgr_internal.h" -#include "src/core/lib/iomgr/unix_sockets_posix.h" -#include "src/core/lib/support/block_annotate.h" -#include "src/core/lib/support/string.h" - -static grpc_error *blocking_resolve_address_impl( - const char *name, const char *default_port, - grpc_resolved_addresses **addresses) { - struct addrinfo hints; - struct addrinfo *result = NULL, *resp; - char *host; - char *port; - int s; - size_t i; - grpc_error *err; - - if (name[0] == 'u' && name[1] == 'n' && name[2] == 'i' && name[3] == 'x' && - name[4] == ':' && name[5] != 0) { - return grpc_resolve_unix_domain_address(name + 5, addresses); - } - - /* parse name, splitting it into host and port parts */ - gpr_split_host_port(name, &host, &port); - if (host == NULL) { - err = grpc_error_set_str( - GRPC_ERROR_CREATE_FROM_STATIC_STRING("unparseable host:port"), - GRPC_ERROR_STR_TARGET_ADDRESS, grpc_slice_from_copied_string(name)); - goto done; - } - if (port == NULL) { - if (default_port == NULL) { - err = grpc_error_set_str( - GRPC_ERROR_CREATE_FROM_STATIC_STRING("no port in name"), - GRPC_ERROR_STR_TARGET_ADDRESS, grpc_slice_from_copied_string(name)); - goto done; - } - port = gpr_strdup(default_port); - } - - /* Call getaddrinfo */ - memset(&hints, 0, sizeof(hints)); - hints.ai_family = AF_UNSPEC; /* ipv4 or ipv6 */ - hints.ai_socktype = SOCK_STREAM; /* stream socket */ - hints.ai_flags = AI_PASSIVE; /* for wildcard IP address */ - - GRPC_SCHEDULING_START_BLOCKING_REGION; - s = getaddrinfo(host, port, &hints, &result); - GRPC_SCHEDULING_END_BLOCKING_REGION; - - if (s != 0) { - /* Retry if well-known service name is recognized */ - const char *svc[][2] = {{"http", "80"}, {"https", "443"}}; - for (i = 0; i < GPR_ARRAY_SIZE(svc); i++) { - if (strcmp(port, svc[i][0]) == 0) { - GRPC_SCHEDULING_START_BLOCKING_REGION; - s = getaddrinfo(host, svc[i][1], &hints, &result); - GRPC_SCHEDULING_END_BLOCKING_REGION; - break; - } - } - } - - if (s != 0) { - err = grpc_error_set_str( - grpc_error_set_str( - grpc_error_set_str( - grpc_error_set_int( - GRPC_ERROR_CREATE_FROM_STATIC_STRING("OS Error"), - GRPC_ERROR_INT_ERRNO, s), - GRPC_ERROR_STR_OS_ERROR, - grpc_slice_from_static_string(gai_strerror(s))), - GRPC_ERROR_STR_SYSCALL, - grpc_slice_from_static_string("getaddrinfo")), - GRPC_ERROR_STR_TARGET_ADDRESS, grpc_slice_from_copied_string(name)); - goto done; - } - - /* Success path: set addrs non-NULL, fill it in */ - *addresses = - (grpc_resolved_addresses *)gpr_malloc(sizeof(grpc_resolved_addresses)); - (*addresses)->naddrs = 0; - for (resp = result; resp != NULL; resp = resp->ai_next) { - (*addresses)->naddrs++; - } - (*addresses)->addrs = (grpc_resolved_address *)gpr_malloc( - sizeof(grpc_resolved_address) * (*addresses)->naddrs); - i = 0; - for (resp = result; resp != NULL; resp = resp->ai_next) { - memcpy(&(*addresses)->addrs[i].addr, resp->ai_addr, resp->ai_addrlen); - (*addresses)->addrs[i].len = resp->ai_addrlen; - i++; - } - err = GRPC_ERROR_NONE; - -done: - gpr_free(host); - gpr_free(port); - if (result) { - freeaddrinfo(result); - } - return err; -} - -grpc_error *(*grpc_blocking_resolve_address)( - const char *name, const char *default_port, - grpc_resolved_addresses **addresses) = blocking_resolve_address_impl; - -typedef struct { - char *name; - char *default_port; - grpc_closure *on_done; - grpc_resolved_addresses **addrs_out; - grpc_closure request_closure; - void *arg; -} request; - -/* Callback to be passed to grpc_executor to asynch-ify - * grpc_blocking_resolve_address */ -static void do_request_thread(grpc_exec_ctx *exec_ctx, void *rp, - grpc_error *error) { - request *r = (request *)rp; - GRPC_CLOSURE_SCHED( - exec_ctx, r->on_done, - grpc_blocking_resolve_address(r->name, r->default_port, r->addrs_out)); - gpr_free(r->name); - gpr_free(r->default_port); - gpr_free(r); -} - -void grpc_resolved_addresses_destroy(grpc_resolved_addresses *addrs) { - if (addrs != NULL) { - gpr_free(addrs->addrs); - } - gpr_free(addrs); -} - -static void resolve_address_impl(grpc_exec_ctx *exec_ctx, const char *name, - const char *default_port, - grpc_pollset_set *interested_parties, - grpc_closure *on_done, - grpc_resolved_addresses **addrs) { - request *r = (request *)gpr_malloc(sizeof(request)); - GRPC_CLOSURE_INIT(&r->request_closure, do_request_thread, r, - grpc_executor_scheduler(GRPC_EXECUTOR_SHORT)); - r->name = gpr_strdup(name); - r->default_port = gpr_strdup(default_port); - r->on_done = on_done; - r->addrs_out = addrs; - GRPC_CLOSURE_SCHED(exec_ctx, &r->request_closure, GRPC_ERROR_NONE); -} - -void (*grpc_resolve_address)( - grpc_exec_ctx *exec_ctx, const char *name, const char *default_port, - grpc_pollset_set *interested_parties, grpc_closure *on_done, - grpc_resolved_addresses **addrs) = resolve_address_impl; - -#endif diff --git a/src/core/lib/iomgr/resolve_address_posix.cc b/src/core/lib/iomgr/resolve_address_posix.cc new file mode 100644 index 0000000000..60cfeebd47 --- /dev/null +++ b/src/core/lib/iomgr/resolve_address_posix.cc @@ -0,0 +1,193 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" +#ifdef GRPC_POSIX_SOCKET + +#include "src/core/lib/iomgr/sockaddr.h" + +#include "src/core/lib/iomgr/resolve_address.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include "src/core/lib/iomgr/executor.h" +#include "src/core/lib/iomgr/iomgr_internal.h" +#include "src/core/lib/iomgr/unix_sockets_posix.h" +#include "src/core/lib/support/block_annotate.h" +#include "src/core/lib/support/string.h" + +static grpc_error *blocking_resolve_address_impl( + const char *name, const char *default_port, + grpc_resolved_addresses **addresses) { + struct addrinfo hints; + struct addrinfo *result = NULL, *resp; + char *host; + char *port; + int s; + size_t i; + grpc_error *err; + + if (name[0] == 'u' && name[1] == 'n' && name[2] == 'i' && name[3] == 'x' && + name[4] == ':' && name[5] != 0) { + return grpc_resolve_unix_domain_address(name + 5, addresses); + } + + /* parse name, splitting it into host and port parts */ + gpr_split_host_port(name, &host, &port); + if (host == NULL) { + err = grpc_error_set_str( + GRPC_ERROR_CREATE_FROM_STATIC_STRING("unparseable host:port"), + GRPC_ERROR_STR_TARGET_ADDRESS, grpc_slice_from_copied_string(name)); + goto done; + } + if (port == NULL) { + if (default_port == NULL) { + err = grpc_error_set_str( + GRPC_ERROR_CREATE_FROM_STATIC_STRING("no port in name"), + GRPC_ERROR_STR_TARGET_ADDRESS, grpc_slice_from_copied_string(name)); + goto done; + } + port = gpr_strdup(default_port); + } + + /* Call getaddrinfo */ + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; /* ipv4 or ipv6 */ + hints.ai_socktype = SOCK_STREAM; /* stream socket */ + hints.ai_flags = AI_PASSIVE; /* for wildcard IP address */ + + GRPC_SCHEDULING_START_BLOCKING_REGION; + s = getaddrinfo(host, port, &hints, &result); + GRPC_SCHEDULING_END_BLOCKING_REGION; + + if (s != 0) { + /* Retry if well-known service name is recognized */ + const char *svc[][2] = {{"http", "80"}, {"https", "443"}}; + for (i = 0; i < GPR_ARRAY_SIZE(svc); i++) { + if (strcmp(port, svc[i][0]) == 0) { + GRPC_SCHEDULING_START_BLOCKING_REGION; + s = getaddrinfo(host, svc[i][1], &hints, &result); + GRPC_SCHEDULING_END_BLOCKING_REGION; + break; + } + } + } + + if (s != 0) { + err = grpc_error_set_str( + grpc_error_set_str( + grpc_error_set_str( + grpc_error_set_int( + GRPC_ERROR_CREATE_FROM_STATIC_STRING("OS Error"), + GRPC_ERROR_INT_ERRNO, s), + GRPC_ERROR_STR_OS_ERROR, + grpc_slice_from_static_string(gai_strerror(s))), + GRPC_ERROR_STR_SYSCALL, + grpc_slice_from_static_string("getaddrinfo")), + GRPC_ERROR_STR_TARGET_ADDRESS, grpc_slice_from_copied_string(name)); + goto done; + } + + /* Success path: set addrs non-NULL, fill it in */ + *addresses = + (grpc_resolved_addresses *)gpr_malloc(sizeof(grpc_resolved_addresses)); + (*addresses)->naddrs = 0; + for (resp = result; resp != NULL; resp = resp->ai_next) { + (*addresses)->naddrs++; + } + (*addresses)->addrs = (grpc_resolved_address *)gpr_malloc( + sizeof(grpc_resolved_address) * (*addresses)->naddrs); + i = 0; + for (resp = result; resp != NULL; resp = resp->ai_next) { + memcpy(&(*addresses)->addrs[i].addr, resp->ai_addr, resp->ai_addrlen); + (*addresses)->addrs[i].len = resp->ai_addrlen; + i++; + } + err = GRPC_ERROR_NONE; + +done: + gpr_free(host); + gpr_free(port); + if (result) { + freeaddrinfo(result); + } + return err; +} + +grpc_error *(*grpc_blocking_resolve_address)( + const char *name, const char *default_port, + grpc_resolved_addresses **addresses) = blocking_resolve_address_impl; + +typedef struct { + char *name; + char *default_port; + grpc_closure *on_done; + grpc_resolved_addresses **addrs_out; + grpc_closure request_closure; + void *arg; +} request; + +/* Callback to be passed to grpc_executor to asynch-ify + * grpc_blocking_resolve_address */ +static void do_request_thread(grpc_exec_ctx *exec_ctx, void *rp, + grpc_error *error) { + request *r = (request *)rp; + GRPC_CLOSURE_SCHED( + exec_ctx, r->on_done, + grpc_blocking_resolve_address(r->name, r->default_port, r->addrs_out)); + gpr_free(r->name); + gpr_free(r->default_port); + gpr_free(r); +} + +void grpc_resolved_addresses_destroy(grpc_resolved_addresses *addrs) { + if (addrs != NULL) { + gpr_free(addrs->addrs); + } + gpr_free(addrs); +} + +static void resolve_address_impl(grpc_exec_ctx *exec_ctx, const char *name, + const char *default_port, + grpc_pollset_set *interested_parties, + grpc_closure *on_done, + grpc_resolved_addresses **addrs) { + request *r = (request *)gpr_malloc(sizeof(request)); + GRPC_CLOSURE_INIT(&r->request_closure, do_request_thread, r, + grpc_executor_scheduler(GRPC_EXECUTOR_SHORT)); + r->name = gpr_strdup(name); + r->default_port = gpr_strdup(default_port); + r->on_done = on_done; + r->addrs_out = addrs; + GRPC_CLOSURE_SCHED(exec_ctx, &r->request_closure, GRPC_ERROR_NONE); +} + +void (*grpc_resolve_address)( + grpc_exec_ctx *exec_ctx, const char *name, const char *default_port, + grpc_pollset_set *interested_parties, grpc_closure *on_done, + grpc_resolved_addresses **addrs) = resolve_address_impl; + +#endif diff --git a/src/core/lib/iomgr/resolve_address_uv.c b/src/core/lib/iomgr/resolve_address_uv.c deleted file mode 100644 index 2d438e8b48..0000000000 --- a/src/core/lib/iomgr/resolve_address_uv.c +++ /dev/null @@ -1,280 +0,0 @@ -/* - * - * Copyright 2016 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" -#ifdef GRPC_UV - -#include - -#include -#include -#include -#include -#include - -#include "src/core/lib/iomgr/closure.h" -#include "src/core/lib/iomgr/error.h" -#include "src/core/lib/iomgr/exec_ctx.h" -#include "src/core/lib/iomgr/iomgr_uv.h" -#include "src/core/lib/iomgr/resolve_address.h" -#include "src/core/lib/iomgr/sockaddr.h" -#include "src/core/lib/iomgr/sockaddr_utils.h" - -#include - -typedef struct request { - grpc_closure *on_done; - grpc_resolved_addresses **addresses; - struct addrinfo *hints; - char *host; - char *port; -} request; - -static int retry_named_port_failure(int status, request *r, - uv_getaddrinfo_cb getaddrinfo_cb) { - if (status != 0) { - // This loop is copied from resolve_address_posix.c - char *svc[][2] = {{"http", "80"}, {"https", "443"}}; - for (size_t i = 0; i < GPR_ARRAY_SIZE(svc); i++) { - if (strcmp(r->port, svc[i][0]) == 0) { - int retry_status; - uv_getaddrinfo_t *req = gpr_malloc(sizeof(uv_getaddrinfo_t)); - req->data = r; - r->port = gpr_strdup(svc[i][1]); - retry_status = uv_getaddrinfo(uv_default_loop(), req, getaddrinfo_cb, - r->host, r->port, r->hints); - if (retry_status < 0 || getaddrinfo_cb == NULL) { - // The callback will not be called - gpr_free(req); - } - return retry_status; - } - } - } - /* If this function calls uv_getaddrinfo, it will return that function's - return value. That function only returns numbers <=0, so we can safely - return 1 to indicate that we never retried */ - return 1; -} - -static grpc_error *handle_addrinfo_result(int status, struct addrinfo *result, - grpc_resolved_addresses **addresses) { - struct addrinfo *resp; - size_t i; - if (status != 0) { - grpc_error *error; - *addresses = NULL; - error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("getaddrinfo failed"); - error = - grpc_error_set_str(error, GRPC_ERROR_STR_OS_ERROR, - grpc_slice_from_static_string(uv_strerror(status))); - return error; - } - (*addresses) = gpr_malloc(sizeof(grpc_resolved_addresses)); - (*addresses)->naddrs = 0; - for (resp = result; resp != NULL; resp = resp->ai_next) { - (*addresses)->naddrs++; - } - (*addresses)->addrs = - gpr_malloc(sizeof(grpc_resolved_address) * (*addresses)->naddrs); - i = 0; - for (resp = result; resp != NULL; resp = resp->ai_next) { - memcpy(&(*addresses)->addrs[i].addr, resp->ai_addr, resp->ai_addrlen); - (*addresses)->addrs[i].len = resp->ai_addrlen; - i++; - } - - { - for (i = 0; i < (*addresses)->naddrs; i++) { - char *buf; - grpc_sockaddr_to_string(&buf, &(*addresses)->addrs[i], 0); - gpr_free(buf); - } - } - return GRPC_ERROR_NONE; -} - -static void getaddrinfo_callback(uv_getaddrinfo_t *req, int status, - struct addrinfo *res) { - request *r = (request *)req->data; - grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; - grpc_error *error; - int retry_status; - char *port = r->port; - - gpr_free(req); - retry_status = retry_named_port_failure(status, r, getaddrinfo_callback); - if (retry_status == 0) { - /* The request is being retried. It is using its own port string, so we free - * the original one */ - gpr_free(port); - return; - } - /* Either no retry was attempted, or the retry failed. Either way, the - original error probably has more interesting information */ - error = handle_addrinfo_result(status, res, r->addresses); - GRPC_CLOSURE_SCHED(&exec_ctx, r->on_done, error); - grpc_exec_ctx_finish(&exec_ctx); - gpr_free(r->hints); - gpr_free(r->host); - gpr_free(r->port); - gpr_free(r); - uv_freeaddrinfo(res); -} - -static grpc_error *try_split_host_port(const char *name, - const char *default_port, char **host, - char **port) { - /* parse name, splitting it into host and port parts */ - grpc_error *error; - gpr_split_host_port(name, host, port); - if (*host == NULL) { - char *msg; - gpr_asprintf(&msg, "unparseable host:port: '%s'", name); - error = GRPC_ERROR_CREATE_FROM_COPIED_STRING(msg); - gpr_free(msg); - return error; - } - if (*port == NULL) { - // TODO(murgatroid99): add tests for this case - if (default_port == NULL) { - char *msg; - gpr_asprintf(&msg, "no port in name '%s'", name); - error = GRPC_ERROR_CREATE_FROM_COPIED_STRING(msg); - gpr_free(msg); - return error; - } - *port = gpr_strdup(default_port); - } - return GRPC_ERROR_NONE; -} - -static grpc_error *blocking_resolve_address_impl( - const char *name, const char *default_port, - grpc_resolved_addresses **addresses) { - char *host; - char *port; - struct addrinfo hints; - uv_getaddrinfo_t req; - int s; - grpc_error *err; - int retry_status; - - GRPC_UV_ASSERT_SAME_THREAD(); - - req.addrinfo = NULL; - - err = try_split_host_port(name, default_port, &host, &port); - if (err != GRPC_ERROR_NONE) { - goto done; - } - - /* Call getaddrinfo */ - memset(&hints, 0, sizeof(hints)); - hints.ai_family = AF_UNSPEC; /* ipv4 or ipv6 */ - hints.ai_socktype = SOCK_STREAM; /* stream socket */ - hints.ai_flags = AI_PASSIVE; /* for wildcard IP address */ - - s = uv_getaddrinfo(uv_default_loop(), &req, NULL, host, port, &hints); - request r = { - .addresses = addresses, .hints = &hints, .host = host, .port = port}; - retry_status = retry_named_port_failure(s, &r, NULL); - if (retry_status <= 0) { - s = retry_status; - } - err = handle_addrinfo_result(s, req.addrinfo, addresses); - -done: - gpr_free(host); - gpr_free(port); - if (req.addrinfo) { - uv_freeaddrinfo(req.addrinfo); - } - return err; -} - -grpc_error *(*grpc_blocking_resolve_address)( - const char *name, const char *default_port, - grpc_resolved_addresses **addresses) = blocking_resolve_address_impl; - -void grpc_resolved_addresses_destroy(grpc_resolved_addresses *addrs) { - if (addrs != NULL) { - gpr_free(addrs->addrs); - } - gpr_free(addrs); -} - -static void resolve_address_impl(grpc_exec_ctx *exec_ctx, const char *name, - const char *default_port, - grpc_pollset_set *interested_parties, - grpc_closure *on_done, - grpc_resolved_addresses **addrs) { - uv_getaddrinfo_t *req = NULL; - request *r = NULL; - struct addrinfo *hints = NULL; - char *host = NULL; - char *port = NULL; - grpc_error *err; - int s; - GRPC_UV_ASSERT_SAME_THREAD(); - err = try_split_host_port(name, default_port, &host, &port); - if (err != GRPC_ERROR_NONE) { - GRPC_CLOSURE_SCHED(exec_ctx, on_done, err); - gpr_free(host); - gpr_free(port); - return; - } - r = gpr_malloc(sizeof(request)); - r->on_done = on_done; - r->addresses = addrs; - r->host = host; - r->port = port; - req = gpr_malloc(sizeof(uv_getaddrinfo_t)); - req->data = r; - - /* Call getaddrinfo */ - hints = gpr_malloc(sizeof(struct addrinfo)); - memset(hints, 0, sizeof(struct addrinfo)); - hints->ai_family = AF_UNSPEC; /* ipv4 or ipv6 */ - hints->ai_socktype = SOCK_STREAM; /* stream socket */ - hints->ai_flags = AI_PASSIVE; /* for wildcard IP address */ - r->hints = hints; - - s = uv_getaddrinfo(uv_default_loop(), req, getaddrinfo_callback, host, port, - hints); - - if (s != 0) { - *addrs = NULL; - err = GRPC_ERROR_CREATE_FROM_STATIC_STRING("getaddrinfo failed"); - err = grpc_error_set_str(err, GRPC_ERROR_STR_OS_ERROR, - grpc_slice_from_static_string(uv_strerror(s))); - GRPC_CLOSURE_SCHED(exec_ctx, on_done, err); - gpr_free(r); - gpr_free(req); - gpr_free(hints); - gpr_free(host); - gpr_free(port); - } -} - -void (*grpc_resolve_address)( - grpc_exec_ctx *exec_ctx, const char *name, const char *default_port, - grpc_pollset_set *interested_parties, grpc_closure *on_done, - grpc_resolved_addresses **addrs) = resolve_address_impl; - -#endif /* GRPC_UV */ diff --git a/src/core/lib/iomgr/resolve_address_uv.cc b/src/core/lib/iomgr/resolve_address_uv.cc new file mode 100644 index 0000000000..2d438e8b48 --- /dev/null +++ b/src/core/lib/iomgr/resolve_address_uv.cc @@ -0,0 +1,280 @@ +/* + * + * Copyright 2016 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" +#ifdef GRPC_UV + +#include + +#include +#include +#include +#include +#include + +#include "src/core/lib/iomgr/closure.h" +#include "src/core/lib/iomgr/error.h" +#include "src/core/lib/iomgr/exec_ctx.h" +#include "src/core/lib/iomgr/iomgr_uv.h" +#include "src/core/lib/iomgr/resolve_address.h" +#include "src/core/lib/iomgr/sockaddr.h" +#include "src/core/lib/iomgr/sockaddr_utils.h" + +#include + +typedef struct request { + grpc_closure *on_done; + grpc_resolved_addresses **addresses; + struct addrinfo *hints; + char *host; + char *port; +} request; + +static int retry_named_port_failure(int status, request *r, + uv_getaddrinfo_cb getaddrinfo_cb) { + if (status != 0) { + // This loop is copied from resolve_address_posix.c + char *svc[][2] = {{"http", "80"}, {"https", "443"}}; + for (size_t i = 0; i < GPR_ARRAY_SIZE(svc); i++) { + if (strcmp(r->port, svc[i][0]) == 0) { + int retry_status; + uv_getaddrinfo_t *req = gpr_malloc(sizeof(uv_getaddrinfo_t)); + req->data = r; + r->port = gpr_strdup(svc[i][1]); + retry_status = uv_getaddrinfo(uv_default_loop(), req, getaddrinfo_cb, + r->host, r->port, r->hints); + if (retry_status < 0 || getaddrinfo_cb == NULL) { + // The callback will not be called + gpr_free(req); + } + return retry_status; + } + } + } + /* If this function calls uv_getaddrinfo, it will return that function's + return value. That function only returns numbers <=0, so we can safely + return 1 to indicate that we never retried */ + return 1; +} + +static grpc_error *handle_addrinfo_result(int status, struct addrinfo *result, + grpc_resolved_addresses **addresses) { + struct addrinfo *resp; + size_t i; + if (status != 0) { + grpc_error *error; + *addresses = NULL; + error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("getaddrinfo failed"); + error = + grpc_error_set_str(error, GRPC_ERROR_STR_OS_ERROR, + grpc_slice_from_static_string(uv_strerror(status))); + return error; + } + (*addresses) = gpr_malloc(sizeof(grpc_resolved_addresses)); + (*addresses)->naddrs = 0; + for (resp = result; resp != NULL; resp = resp->ai_next) { + (*addresses)->naddrs++; + } + (*addresses)->addrs = + gpr_malloc(sizeof(grpc_resolved_address) * (*addresses)->naddrs); + i = 0; + for (resp = result; resp != NULL; resp = resp->ai_next) { + memcpy(&(*addresses)->addrs[i].addr, resp->ai_addr, resp->ai_addrlen); + (*addresses)->addrs[i].len = resp->ai_addrlen; + i++; + } + + { + for (i = 0; i < (*addresses)->naddrs; i++) { + char *buf; + grpc_sockaddr_to_string(&buf, &(*addresses)->addrs[i], 0); + gpr_free(buf); + } + } + return GRPC_ERROR_NONE; +} + +static void getaddrinfo_callback(uv_getaddrinfo_t *req, int status, + struct addrinfo *res) { + request *r = (request *)req->data; + grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; + grpc_error *error; + int retry_status; + char *port = r->port; + + gpr_free(req); + retry_status = retry_named_port_failure(status, r, getaddrinfo_callback); + if (retry_status == 0) { + /* The request is being retried. It is using its own port string, so we free + * the original one */ + gpr_free(port); + return; + } + /* Either no retry was attempted, or the retry failed. Either way, the + original error probably has more interesting information */ + error = handle_addrinfo_result(status, res, r->addresses); + GRPC_CLOSURE_SCHED(&exec_ctx, r->on_done, error); + grpc_exec_ctx_finish(&exec_ctx); + gpr_free(r->hints); + gpr_free(r->host); + gpr_free(r->port); + gpr_free(r); + uv_freeaddrinfo(res); +} + +static grpc_error *try_split_host_port(const char *name, + const char *default_port, char **host, + char **port) { + /* parse name, splitting it into host and port parts */ + grpc_error *error; + gpr_split_host_port(name, host, port); + if (*host == NULL) { + char *msg; + gpr_asprintf(&msg, "unparseable host:port: '%s'", name); + error = GRPC_ERROR_CREATE_FROM_COPIED_STRING(msg); + gpr_free(msg); + return error; + } + if (*port == NULL) { + // TODO(murgatroid99): add tests for this case + if (default_port == NULL) { + char *msg; + gpr_asprintf(&msg, "no port in name '%s'", name); + error = GRPC_ERROR_CREATE_FROM_COPIED_STRING(msg); + gpr_free(msg); + return error; + } + *port = gpr_strdup(default_port); + } + return GRPC_ERROR_NONE; +} + +static grpc_error *blocking_resolve_address_impl( + const char *name, const char *default_port, + grpc_resolved_addresses **addresses) { + char *host; + char *port; + struct addrinfo hints; + uv_getaddrinfo_t req; + int s; + grpc_error *err; + int retry_status; + + GRPC_UV_ASSERT_SAME_THREAD(); + + req.addrinfo = NULL; + + err = try_split_host_port(name, default_port, &host, &port); + if (err != GRPC_ERROR_NONE) { + goto done; + } + + /* Call getaddrinfo */ + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; /* ipv4 or ipv6 */ + hints.ai_socktype = SOCK_STREAM; /* stream socket */ + hints.ai_flags = AI_PASSIVE; /* for wildcard IP address */ + + s = uv_getaddrinfo(uv_default_loop(), &req, NULL, host, port, &hints); + request r = { + .addresses = addresses, .hints = &hints, .host = host, .port = port}; + retry_status = retry_named_port_failure(s, &r, NULL); + if (retry_status <= 0) { + s = retry_status; + } + err = handle_addrinfo_result(s, req.addrinfo, addresses); + +done: + gpr_free(host); + gpr_free(port); + if (req.addrinfo) { + uv_freeaddrinfo(req.addrinfo); + } + return err; +} + +grpc_error *(*grpc_blocking_resolve_address)( + const char *name, const char *default_port, + grpc_resolved_addresses **addresses) = blocking_resolve_address_impl; + +void grpc_resolved_addresses_destroy(grpc_resolved_addresses *addrs) { + if (addrs != NULL) { + gpr_free(addrs->addrs); + } + gpr_free(addrs); +} + +static void resolve_address_impl(grpc_exec_ctx *exec_ctx, const char *name, + const char *default_port, + grpc_pollset_set *interested_parties, + grpc_closure *on_done, + grpc_resolved_addresses **addrs) { + uv_getaddrinfo_t *req = NULL; + request *r = NULL; + struct addrinfo *hints = NULL; + char *host = NULL; + char *port = NULL; + grpc_error *err; + int s; + GRPC_UV_ASSERT_SAME_THREAD(); + err = try_split_host_port(name, default_port, &host, &port); + if (err != GRPC_ERROR_NONE) { + GRPC_CLOSURE_SCHED(exec_ctx, on_done, err); + gpr_free(host); + gpr_free(port); + return; + } + r = gpr_malloc(sizeof(request)); + r->on_done = on_done; + r->addresses = addrs; + r->host = host; + r->port = port; + req = gpr_malloc(sizeof(uv_getaddrinfo_t)); + req->data = r; + + /* Call getaddrinfo */ + hints = gpr_malloc(sizeof(struct addrinfo)); + memset(hints, 0, sizeof(struct addrinfo)); + hints->ai_family = AF_UNSPEC; /* ipv4 or ipv6 */ + hints->ai_socktype = SOCK_STREAM; /* stream socket */ + hints->ai_flags = AI_PASSIVE; /* for wildcard IP address */ + r->hints = hints; + + s = uv_getaddrinfo(uv_default_loop(), req, getaddrinfo_callback, host, port, + hints); + + if (s != 0) { + *addrs = NULL; + err = GRPC_ERROR_CREATE_FROM_STATIC_STRING("getaddrinfo failed"); + err = grpc_error_set_str(err, GRPC_ERROR_STR_OS_ERROR, + grpc_slice_from_static_string(uv_strerror(s))); + GRPC_CLOSURE_SCHED(exec_ctx, on_done, err); + gpr_free(r); + gpr_free(req); + gpr_free(hints); + gpr_free(host); + gpr_free(port); + } +} + +void (*grpc_resolve_address)( + grpc_exec_ctx *exec_ctx, const char *name, const char *default_port, + grpc_pollset_set *interested_parties, grpc_closure *on_done, + grpc_resolved_addresses **addrs) = resolve_address_impl; + +#endif /* GRPC_UV */ diff --git a/src/core/lib/iomgr/resolve_address_windows.c b/src/core/lib/iomgr/resolve_address_windows.c deleted file mode 100644 index 0cb0029f4e..0000000000 --- a/src/core/lib/iomgr/resolve_address_windows.c +++ /dev/null @@ -1,175 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" -#ifdef GRPC_WINSOCK_SOCKET - -#include "src/core/lib/iomgr/sockaddr.h" - -#include "src/core/lib/iomgr/resolve_address.h" - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include "src/core/lib/iomgr/executor.h" -#include "src/core/lib/iomgr/iomgr_internal.h" -#include "src/core/lib/iomgr/sockaddr_utils.h" -#include "src/core/lib/support/block_annotate.h" -#include "src/core/lib/support/string.h" - -typedef struct { - char *name; - char *default_port; - grpc_closure request_closure; - grpc_closure *on_done; - grpc_resolved_addresses **addresses; -} request; - -static grpc_error *blocking_resolve_address_impl( - const char *name, const char *default_port, - grpc_resolved_addresses **addresses) { - struct addrinfo hints; - struct addrinfo *result = NULL, *resp; - char *host; - char *port; - int s; - size_t i; - grpc_error *error = GRPC_ERROR_NONE; - - /* parse name, splitting it into host and port parts */ - gpr_split_host_port(name, &host, &port); - if (host == NULL) { - char *msg; - gpr_asprintf(&msg, "unparseable host:port: '%s'", name); - error = GRPC_ERROR_CREATE_FROM_COPIED_STRING(msg); - gpr_free(msg); - goto done; - } - if (port == NULL) { - if (default_port == NULL) { - char *msg; - gpr_asprintf(&msg, "no port in name '%s'", name); - error = GRPC_ERROR_CREATE_FROM_COPIED_STRING(msg); - gpr_free(msg); - goto done; - } - port = gpr_strdup(default_port); - } - - /* Call getaddrinfo */ - memset(&hints, 0, sizeof(hints)); - hints.ai_family = AF_UNSPEC; /* ipv4 or ipv6 */ - hints.ai_socktype = SOCK_STREAM; /* stream socket */ - hints.ai_flags = AI_PASSIVE; /* for wildcard IP address */ - - GRPC_SCHEDULING_START_BLOCKING_REGION; - s = getaddrinfo(host, port, &hints, &result); - GRPC_SCHEDULING_END_BLOCKING_REGION; - if (s != 0) { - error = GRPC_WSA_ERROR(WSAGetLastError(), "getaddrinfo"); - goto done; - } - - /* Success path: set addrs non-NULL, fill it in */ - (*addresses) = gpr_malloc(sizeof(grpc_resolved_addresses)); - (*addresses)->naddrs = 0; - for (resp = result; resp != NULL; resp = resp->ai_next) { - (*addresses)->naddrs++; - } - (*addresses)->addrs = - gpr_malloc(sizeof(grpc_resolved_address) * (*addresses)->naddrs); - i = 0; - for (resp = result; resp != NULL; resp = resp->ai_next) { - memcpy(&(*addresses)->addrs[i].addr, resp->ai_addr, resp->ai_addrlen); - (*addresses)->addrs[i].len = resp->ai_addrlen; - i++; - } - - { - for (i = 0; i < (*addresses)->naddrs; i++) { - char *buf; - grpc_sockaddr_to_string(&buf, &(*addresses)->addrs[i], 0); - gpr_free(buf); - } - } - -done: - gpr_free(host); - gpr_free(port); - if (result) { - freeaddrinfo(result); - } - return error; -} - -grpc_error *(*grpc_blocking_resolve_address)( - const char *name, const char *default_port, - grpc_resolved_addresses **addresses) = blocking_resolve_address_impl; - -/* Callback to be passed to grpc_executor to asynch-ify - * grpc_blocking_resolve_address */ -static void do_request_thread(grpc_exec_ctx *exec_ctx, void *rp, - grpc_error *error) { - request *r = rp; - if (error == GRPC_ERROR_NONE) { - error = - grpc_blocking_resolve_address(r->name, r->default_port, r->addresses); - } else { - GRPC_ERROR_REF(error); - } - GRPC_CLOSURE_SCHED(exec_ctx, r->on_done, error); - gpr_free(r->name); - gpr_free(r->default_port); - gpr_free(r); -} - -void grpc_resolved_addresses_destroy(grpc_resolved_addresses *addrs) { - if (addrs != NULL) { - gpr_free(addrs->addrs); - } - gpr_free(addrs); -} - -static void resolve_address_impl(grpc_exec_ctx *exec_ctx, const char *name, - const char *default_port, - grpc_pollset_set *interested_parties, - grpc_closure *on_done, - grpc_resolved_addresses **addresses) { - request *r = gpr_malloc(sizeof(request)); - GRPC_CLOSURE_INIT(&r->request_closure, do_request_thread, r, - grpc_executor_scheduler(GRPC_EXECUTOR_SHORT)); - r->name = gpr_strdup(name); - r->default_port = gpr_strdup(default_port); - r->on_done = on_done; - r->addresses = addresses; - GRPC_CLOSURE_SCHED(exec_ctx, &r->request_closure, GRPC_ERROR_NONE); -} - -void (*grpc_resolve_address)( - grpc_exec_ctx *exec_ctx, const char *name, const char *default_port, - grpc_pollset_set *interested_parties, grpc_closure *on_done, - grpc_resolved_addresses **addresses) = resolve_address_impl; - -#endif diff --git a/src/core/lib/iomgr/resolve_address_windows.cc b/src/core/lib/iomgr/resolve_address_windows.cc new file mode 100644 index 0000000000..0cb0029f4e --- /dev/null +++ b/src/core/lib/iomgr/resolve_address_windows.cc @@ -0,0 +1,175 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" +#ifdef GRPC_WINSOCK_SOCKET + +#include "src/core/lib/iomgr/sockaddr.h" + +#include "src/core/lib/iomgr/resolve_address.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include "src/core/lib/iomgr/executor.h" +#include "src/core/lib/iomgr/iomgr_internal.h" +#include "src/core/lib/iomgr/sockaddr_utils.h" +#include "src/core/lib/support/block_annotate.h" +#include "src/core/lib/support/string.h" + +typedef struct { + char *name; + char *default_port; + grpc_closure request_closure; + grpc_closure *on_done; + grpc_resolved_addresses **addresses; +} request; + +static grpc_error *blocking_resolve_address_impl( + const char *name, const char *default_port, + grpc_resolved_addresses **addresses) { + struct addrinfo hints; + struct addrinfo *result = NULL, *resp; + char *host; + char *port; + int s; + size_t i; + grpc_error *error = GRPC_ERROR_NONE; + + /* parse name, splitting it into host and port parts */ + gpr_split_host_port(name, &host, &port); + if (host == NULL) { + char *msg; + gpr_asprintf(&msg, "unparseable host:port: '%s'", name); + error = GRPC_ERROR_CREATE_FROM_COPIED_STRING(msg); + gpr_free(msg); + goto done; + } + if (port == NULL) { + if (default_port == NULL) { + char *msg; + gpr_asprintf(&msg, "no port in name '%s'", name); + error = GRPC_ERROR_CREATE_FROM_COPIED_STRING(msg); + gpr_free(msg); + goto done; + } + port = gpr_strdup(default_port); + } + + /* Call getaddrinfo */ + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; /* ipv4 or ipv6 */ + hints.ai_socktype = SOCK_STREAM; /* stream socket */ + hints.ai_flags = AI_PASSIVE; /* for wildcard IP address */ + + GRPC_SCHEDULING_START_BLOCKING_REGION; + s = getaddrinfo(host, port, &hints, &result); + GRPC_SCHEDULING_END_BLOCKING_REGION; + if (s != 0) { + error = GRPC_WSA_ERROR(WSAGetLastError(), "getaddrinfo"); + goto done; + } + + /* Success path: set addrs non-NULL, fill it in */ + (*addresses) = gpr_malloc(sizeof(grpc_resolved_addresses)); + (*addresses)->naddrs = 0; + for (resp = result; resp != NULL; resp = resp->ai_next) { + (*addresses)->naddrs++; + } + (*addresses)->addrs = + gpr_malloc(sizeof(grpc_resolved_address) * (*addresses)->naddrs); + i = 0; + for (resp = result; resp != NULL; resp = resp->ai_next) { + memcpy(&(*addresses)->addrs[i].addr, resp->ai_addr, resp->ai_addrlen); + (*addresses)->addrs[i].len = resp->ai_addrlen; + i++; + } + + { + for (i = 0; i < (*addresses)->naddrs; i++) { + char *buf; + grpc_sockaddr_to_string(&buf, &(*addresses)->addrs[i], 0); + gpr_free(buf); + } + } + +done: + gpr_free(host); + gpr_free(port); + if (result) { + freeaddrinfo(result); + } + return error; +} + +grpc_error *(*grpc_blocking_resolve_address)( + const char *name, const char *default_port, + grpc_resolved_addresses **addresses) = blocking_resolve_address_impl; + +/* Callback to be passed to grpc_executor to asynch-ify + * grpc_blocking_resolve_address */ +static void do_request_thread(grpc_exec_ctx *exec_ctx, void *rp, + grpc_error *error) { + request *r = rp; + if (error == GRPC_ERROR_NONE) { + error = + grpc_blocking_resolve_address(r->name, r->default_port, r->addresses); + } else { + GRPC_ERROR_REF(error); + } + GRPC_CLOSURE_SCHED(exec_ctx, r->on_done, error); + gpr_free(r->name); + gpr_free(r->default_port); + gpr_free(r); +} + +void grpc_resolved_addresses_destroy(grpc_resolved_addresses *addrs) { + if (addrs != NULL) { + gpr_free(addrs->addrs); + } + gpr_free(addrs); +} + +static void resolve_address_impl(grpc_exec_ctx *exec_ctx, const char *name, + const char *default_port, + grpc_pollset_set *interested_parties, + grpc_closure *on_done, + grpc_resolved_addresses **addresses) { + request *r = gpr_malloc(sizeof(request)); + GRPC_CLOSURE_INIT(&r->request_closure, do_request_thread, r, + grpc_executor_scheduler(GRPC_EXECUTOR_SHORT)); + r->name = gpr_strdup(name); + r->default_port = gpr_strdup(default_port); + r->on_done = on_done; + r->addresses = addresses; + GRPC_CLOSURE_SCHED(exec_ctx, &r->request_closure, GRPC_ERROR_NONE); +} + +void (*grpc_resolve_address)( + grpc_exec_ctx *exec_ctx, const char *name, const char *default_port, + grpc_pollset_set *interested_parties, grpc_closure *on_done, + grpc_resolved_addresses **addresses) = resolve_address_impl; + +#endif diff --git a/src/core/lib/iomgr/resource_quota.c b/src/core/lib/iomgr/resource_quota.c deleted file mode 100644 index 4d69986fbc..0000000000 --- a/src/core/lib/iomgr/resource_quota.c +++ /dev/null @@ -1,868 +0,0 @@ -/* - * - * Copyright 2016 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/resource_quota.h" - -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "src/core/lib/iomgr/combiner.h" - -grpc_tracer_flag grpc_resource_quota_trace = - GRPC_TRACER_INITIALIZER(false, "resource_quota"); - -#define MEMORY_USAGE_ESTIMATION_MAX 65536 - -/* Internal linked list pointers for a resource user */ -typedef struct { - grpc_resource_user *next; - grpc_resource_user *prev; -} grpc_resource_user_link; - -/* Resource users are kept in (potentially) several intrusive linked lists - at once. These are the list names. */ -typedef enum { - /* Resource users that are waiting for an allocation */ - GRPC_RULIST_AWAITING_ALLOCATION, - /* Resource users that have free memory available for internal reclamation */ - GRPC_RULIST_NON_EMPTY_FREE_POOL, - /* Resource users that have published a benign reclamation is available */ - GRPC_RULIST_RECLAIMER_BENIGN, - /* Resource users that have published a destructive reclamation is - available */ - GRPC_RULIST_RECLAIMER_DESTRUCTIVE, - /* Number of lists: must be last */ - GRPC_RULIST_COUNT -} grpc_rulist; - -struct grpc_resource_user { - /* The quota this resource user consumes from */ - grpc_resource_quota *resource_quota; - - /* Closure to schedule an allocation under the resource quota combiner lock */ - grpc_closure allocate_closure; - /* Closure to publish a non empty free pool under the resource quota combiner - lock */ - grpc_closure add_to_free_pool_closure; - - /* one ref for each ref call (released by grpc_resource_user_unref), and one - ref for each byte allocated (released by grpc_resource_user_free) */ - gpr_atm refs; - /* is this resource user unlocked? starts at 0, increases for each shutdown - call */ - gpr_atm shutdown; - - gpr_mu mu; - /* The amount of memory (in bytes) this user has cached for its own use: to - avoid quota contention, each resource user can keep some memory in - addition to what it is immediately using (e.g., for caching), and the quota - can pull it back under memory pressure. - This value can become negative if more memory has been requested than - existed in the free pool, at which point the quota is consulted to bring - this value non-negative (asynchronously). */ - int64_t free_pool; - /* A list of closures to call once free_pool becomes non-negative - ie when - all outstanding allocations have been granted. */ - grpc_closure_list on_allocated; - /* True if we are currently trying to allocate from the quota, false if not */ - bool allocating; - /* True if we are currently trying to add ourselves to the non-free quota - list, false otherwise */ - bool added_to_free_pool; - - /* Reclaimers: index 0 is the benign reclaimer, 1 is the destructive reclaimer - */ - grpc_closure *reclaimers[2]; - /* Reclaimers just posted: once we're in the combiner lock, we'll move them - to the array above */ - grpc_closure *new_reclaimers[2]; - /* Trampoline closures to finish reclamation and re-enter the quota combiner - lock */ - grpc_closure post_reclaimer_closure[2]; - - /* Closure to execute under the quota combiner to de-register and shutdown the - resource user */ - grpc_closure destroy_closure; - - /* Links in the various grpc_rulist lists */ - grpc_resource_user_link links[GRPC_RULIST_COUNT]; - - /* The name of this resource user, for debugging/tracing */ - char *name; -}; - -struct grpc_resource_quota { - /* refcount */ - gpr_refcount refs; - - /* estimate of current memory usage - scaled to the range [0..RESOURCE_USAGE_ESTIMATION_MAX] */ - gpr_atm memory_usage_estimation; - - /* Master combiner lock: all activity on a quota executes under this combiner - * (so no mutex is needed for this data structure) */ - grpc_combiner *combiner; - /* Size of the resource quota */ - int64_t size; - /* Amount of free memory in the resource quota */ - int64_t free_pool; - - gpr_atm last_size; - - /* Has rq_step been scheduled to occur? */ - bool step_scheduled; - /* Are we currently reclaiming memory */ - bool reclaiming; - /* Closure around rq_step */ - grpc_closure rq_step_closure; - /* Closure around rq_reclamation_done */ - grpc_closure rq_reclamation_done_closure; - - /* This is only really usable for debugging: it's always a stale pointer, but - a stale pointer that might just be fresh enough to guide us to where the - reclamation system is stuck */ - grpc_closure *debug_only_last_initiated_reclaimer; - grpc_resource_user *debug_only_last_reclaimer_resource_user; - - /* Roots of all resource user lists */ - grpc_resource_user *roots[GRPC_RULIST_COUNT]; - - char *name; -}; - -/******************************************************************************* - * list management - */ - -static void rulist_add_head(grpc_resource_user *resource_user, - grpc_rulist list) { - grpc_resource_quota *resource_quota = resource_user->resource_quota; - grpc_resource_user **root = &resource_quota->roots[list]; - if (*root == NULL) { - *root = resource_user; - resource_user->links[list].next = resource_user->links[list].prev = - resource_user; - } else { - resource_user->links[list].next = *root; - resource_user->links[list].prev = (*root)->links[list].prev; - resource_user->links[list].next->links[list].prev = - resource_user->links[list].prev->links[list].next = resource_user; - *root = resource_user; - } -} - -static void rulist_add_tail(grpc_resource_user *resource_user, - grpc_rulist list) { - grpc_resource_quota *resource_quota = resource_user->resource_quota; - grpc_resource_user **root = &resource_quota->roots[list]; - if (*root == NULL) { - *root = resource_user; - resource_user->links[list].next = resource_user->links[list].prev = - resource_user; - } else { - resource_user->links[list].next = (*root)->links[list].next; - resource_user->links[list].prev = *root; - resource_user->links[list].next->links[list].prev = - resource_user->links[list].prev->links[list].next = resource_user; - } -} - -static bool rulist_empty(grpc_resource_quota *resource_quota, - grpc_rulist list) { - return resource_quota->roots[list] == NULL; -} - -static grpc_resource_user *rulist_pop_head(grpc_resource_quota *resource_quota, - grpc_rulist list) { - grpc_resource_user **root = &resource_quota->roots[list]; - grpc_resource_user *resource_user = *root; - if (resource_user == NULL) { - return NULL; - } - if (resource_user->links[list].next == resource_user) { - *root = NULL; - } else { - resource_user->links[list].next->links[list].prev = - resource_user->links[list].prev; - resource_user->links[list].prev->links[list].next = - resource_user->links[list].next; - *root = resource_user->links[list].next; - } - resource_user->links[list].next = resource_user->links[list].prev = NULL; - return resource_user; -} - -static void rulist_remove(grpc_resource_user *resource_user, grpc_rulist list) { - if (resource_user->links[list].next == NULL) return; - grpc_resource_quota *resource_quota = resource_user->resource_quota; - if (resource_quota->roots[list] == resource_user) { - resource_quota->roots[list] = resource_user->links[list].next; - if (resource_quota->roots[list] == resource_user) { - resource_quota->roots[list] = NULL; - } - } - resource_user->links[list].next->links[list].prev = - resource_user->links[list].prev; - resource_user->links[list].prev->links[list].next = - resource_user->links[list].next; - resource_user->links[list].next = resource_user->links[list].prev = NULL; -} - -/******************************************************************************* - * resource quota state machine - */ - -static bool rq_alloc(grpc_exec_ctx *exec_ctx, - grpc_resource_quota *resource_quota); -static bool rq_reclaim_from_per_user_free_pool( - grpc_exec_ctx *exec_ctx, grpc_resource_quota *resource_quota); -static bool rq_reclaim(grpc_exec_ctx *exec_ctx, - grpc_resource_quota *resource_quota, bool destructive); - -static void rq_step(grpc_exec_ctx *exec_ctx, void *rq, grpc_error *error) { - grpc_resource_quota *resource_quota = (grpc_resource_quota *)rq; - resource_quota->step_scheduled = false; - do { - if (rq_alloc(exec_ctx, resource_quota)) goto done; - } while (rq_reclaim_from_per_user_free_pool(exec_ctx, resource_quota)); - - if (!rq_reclaim(exec_ctx, resource_quota, false)) { - rq_reclaim(exec_ctx, resource_quota, true); - } - -done: - grpc_resource_quota_unref_internal(exec_ctx, resource_quota); -} - -static void rq_step_sched(grpc_exec_ctx *exec_ctx, - grpc_resource_quota *resource_quota) { - if (resource_quota->step_scheduled) return; - resource_quota->step_scheduled = true; - grpc_resource_quota_ref_internal(resource_quota); - GRPC_CLOSURE_SCHED(exec_ctx, &resource_quota->rq_step_closure, - GRPC_ERROR_NONE); -} - -/* update the atomically available resource estimate - use no barriers since - timeliness of delivery really doesn't matter much */ -static void rq_update_estimate(grpc_resource_quota *resource_quota) { - gpr_atm memory_usage_estimation = MEMORY_USAGE_ESTIMATION_MAX; - if (resource_quota->size != 0) { - memory_usage_estimation = - GPR_CLAMP((gpr_atm)((1.0 - - ((double)resource_quota->free_pool) / - ((double)resource_quota->size)) * - MEMORY_USAGE_ESTIMATION_MAX), - 0, MEMORY_USAGE_ESTIMATION_MAX); - } - gpr_atm_no_barrier_store(&resource_quota->memory_usage_estimation, - memory_usage_estimation); -} - -/* returns true if all allocations are completed */ -static bool rq_alloc(grpc_exec_ctx *exec_ctx, - grpc_resource_quota *resource_quota) { - grpc_resource_user *resource_user; - while ((resource_user = rulist_pop_head(resource_quota, - GRPC_RULIST_AWAITING_ALLOCATION))) { - gpr_mu_lock(&resource_user->mu); - if (resource_user->free_pool < 0 && - -resource_user->free_pool <= resource_quota->free_pool) { - int64_t amt = -resource_user->free_pool; - resource_user->free_pool = 0; - resource_quota->free_pool -= amt; - rq_update_estimate(resource_quota); - if (GRPC_TRACER_ON(grpc_resource_quota_trace)) { - gpr_log(GPR_DEBUG, "RQ %s %s: grant alloc %" PRId64 - " bytes; rq_free_pool -> %" PRId64, - resource_quota->name, resource_user->name, amt, - resource_quota->free_pool); - } - } else if (GRPC_TRACER_ON(grpc_resource_quota_trace) && - resource_user->free_pool >= 0) { - gpr_log(GPR_DEBUG, "RQ %s %s: discard already satisfied alloc request", - resource_quota->name, resource_user->name); - } - if (resource_user->free_pool >= 0) { - resource_user->allocating = false; - GRPC_CLOSURE_LIST_SCHED(exec_ctx, &resource_user->on_allocated); - gpr_mu_unlock(&resource_user->mu); - } else { - rulist_add_head(resource_user, GRPC_RULIST_AWAITING_ALLOCATION); - gpr_mu_unlock(&resource_user->mu); - return false; - } - } - return true; -} - -/* returns true if any memory could be reclaimed from buffers */ -static bool rq_reclaim_from_per_user_free_pool( - grpc_exec_ctx *exec_ctx, grpc_resource_quota *resource_quota) { - grpc_resource_user *resource_user; - while ((resource_user = rulist_pop_head(resource_quota, - GRPC_RULIST_NON_EMPTY_FREE_POOL))) { - gpr_mu_lock(&resource_user->mu); - if (resource_user->free_pool > 0) { - int64_t amt = resource_user->free_pool; - resource_user->free_pool = 0; - resource_quota->free_pool += amt; - rq_update_estimate(resource_quota); - if (GRPC_TRACER_ON(grpc_resource_quota_trace)) { - gpr_log(GPR_DEBUG, "RQ %s %s: reclaim_from_per_user_free_pool %" PRId64 - " bytes; rq_free_pool -> %" PRId64, - resource_quota->name, resource_user->name, amt, - resource_quota->free_pool); - } - gpr_mu_unlock(&resource_user->mu); - return true; - } else { - gpr_mu_unlock(&resource_user->mu); - } - } - return false; -} - -/* returns true if reclamation is proceeding */ -static bool rq_reclaim(grpc_exec_ctx *exec_ctx, - grpc_resource_quota *resource_quota, bool destructive) { - if (resource_quota->reclaiming) return true; - grpc_rulist list = destructive ? GRPC_RULIST_RECLAIMER_DESTRUCTIVE - : GRPC_RULIST_RECLAIMER_BENIGN; - grpc_resource_user *resource_user = rulist_pop_head(resource_quota, list); - if (resource_user == NULL) return false; - if (GRPC_TRACER_ON(grpc_resource_quota_trace)) { - gpr_log(GPR_DEBUG, "RQ %s %s: initiate %s reclamation", - resource_quota->name, resource_user->name, - destructive ? "destructive" : "benign"); - } - resource_quota->reclaiming = true; - grpc_resource_quota_ref_internal(resource_quota); - grpc_closure *c = resource_user->reclaimers[destructive]; - GPR_ASSERT(c); - resource_quota->debug_only_last_reclaimer_resource_user = resource_user; - resource_quota->debug_only_last_initiated_reclaimer = c; - resource_user->reclaimers[destructive] = NULL; - GRPC_CLOSURE_RUN(exec_ctx, c, GRPC_ERROR_NONE); - return true; -} - -/******************************************************************************* - * ru_slice: a slice implementation that is backed by a grpc_resource_user - */ - -typedef struct { - grpc_slice_refcount base; - gpr_refcount refs; - grpc_resource_user *resource_user; - size_t size; -} ru_slice_refcount; - -static void ru_slice_ref(void *p) { - ru_slice_refcount *rc = (ru_slice_refcount *)p; - gpr_ref(&rc->refs); -} - -static void ru_slice_unref(grpc_exec_ctx *exec_ctx, void *p) { - ru_slice_refcount *rc = (ru_slice_refcount *)p; - if (gpr_unref(&rc->refs)) { - grpc_resource_user_free(exec_ctx, rc->resource_user, rc->size); - gpr_free(rc); - } -} - -static const grpc_slice_refcount_vtable ru_slice_vtable = { - ru_slice_ref, ru_slice_unref, grpc_slice_default_eq_impl, - grpc_slice_default_hash_impl}; - -static grpc_slice ru_slice_create(grpc_resource_user *resource_user, - size_t size) { - ru_slice_refcount *rc = - (ru_slice_refcount *)gpr_malloc(sizeof(ru_slice_refcount) + size); - rc->base.vtable = &ru_slice_vtable; - rc->base.sub_refcount = &rc->base; - gpr_ref_init(&rc->refs, 1); - rc->resource_user = resource_user; - rc->size = size; - grpc_slice slice; - slice.refcount = &rc->base; - slice.data.refcounted.bytes = (uint8_t *)(rc + 1); - slice.data.refcounted.length = size; - return slice; -} - -/******************************************************************************* - * grpc_resource_quota internal implementation: resource user manipulation under - * the combiner - */ - -static void ru_allocate(grpc_exec_ctx *exec_ctx, void *ru, grpc_error *error) { - grpc_resource_user *resource_user = (grpc_resource_user *)ru; - if (rulist_empty(resource_user->resource_quota, - GRPC_RULIST_AWAITING_ALLOCATION)) { - rq_step_sched(exec_ctx, resource_user->resource_quota); - } - rulist_add_tail(resource_user, GRPC_RULIST_AWAITING_ALLOCATION); -} - -static void ru_add_to_free_pool(grpc_exec_ctx *exec_ctx, void *ru, - grpc_error *error) { - grpc_resource_user *resource_user = (grpc_resource_user *)ru; - if (!rulist_empty(resource_user->resource_quota, - GRPC_RULIST_AWAITING_ALLOCATION) && - rulist_empty(resource_user->resource_quota, - GRPC_RULIST_NON_EMPTY_FREE_POOL)) { - rq_step_sched(exec_ctx, resource_user->resource_quota); - } - rulist_add_tail(resource_user, GRPC_RULIST_NON_EMPTY_FREE_POOL); -} - -static bool ru_post_reclaimer(grpc_exec_ctx *exec_ctx, - grpc_resource_user *resource_user, - bool destructive) { - grpc_closure *closure = resource_user->new_reclaimers[destructive]; - GPR_ASSERT(closure != NULL); - resource_user->new_reclaimers[destructive] = NULL; - GPR_ASSERT(resource_user->reclaimers[destructive] == NULL); - if (gpr_atm_acq_load(&resource_user->shutdown) > 0) { - GRPC_CLOSURE_SCHED(exec_ctx, closure, GRPC_ERROR_CANCELLED); - return false; - } - resource_user->reclaimers[destructive] = closure; - return true; -} - -static void ru_post_benign_reclaimer(grpc_exec_ctx *exec_ctx, void *ru, - grpc_error *error) { - grpc_resource_user *resource_user = (grpc_resource_user *)ru; - if (!ru_post_reclaimer(exec_ctx, resource_user, false)) return; - if (!rulist_empty(resource_user->resource_quota, - GRPC_RULIST_AWAITING_ALLOCATION) && - rulist_empty(resource_user->resource_quota, - GRPC_RULIST_NON_EMPTY_FREE_POOL) && - rulist_empty(resource_user->resource_quota, - GRPC_RULIST_RECLAIMER_BENIGN)) { - rq_step_sched(exec_ctx, resource_user->resource_quota); - } - rulist_add_tail(resource_user, GRPC_RULIST_RECLAIMER_BENIGN); -} - -static void ru_post_destructive_reclaimer(grpc_exec_ctx *exec_ctx, void *ru, - grpc_error *error) { - grpc_resource_user *resource_user = (grpc_resource_user *)ru; - if (!ru_post_reclaimer(exec_ctx, resource_user, true)) return; - if (!rulist_empty(resource_user->resource_quota, - GRPC_RULIST_AWAITING_ALLOCATION) && - rulist_empty(resource_user->resource_quota, - GRPC_RULIST_NON_EMPTY_FREE_POOL) && - rulist_empty(resource_user->resource_quota, - GRPC_RULIST_RECLAIMER_BENIGN) && - rulist_empty(resource_user->resource_quota, - GRPC_RULIST_RECLAIMER_DESTRUCTIVE)) { - rq_step_sched(exec_ctx, resource_user->resource_quota); - } - rulist_add_tail(resource_user, GRPC_RULIST_RECLAIMER_DESTRUCTIVE); -} - -static void ru_shutdown(grpc_exec_ctx *exec_ctx, void *ru, grpc_error *error) { - grpc_resource_user *resource_user = (grpc_resource_user *)ru; - GRPC_CLOSURE_SCHED(exec_ctx, resource_user->reclaimers[0], - GRPC_ERROR_CANCELLED); - GRPC_CLOSURE_SCHED(exec_ctx, resource_user->reclaimers[1], - GRPC_ERROR_CANCELLED); - resource_user->reclaimers[0] = NULL; - resource_user->reclaimers[1] = NULL; - rulist_remove(resource_user, GRPC_RULIST_RECLAIMER_BENIGN); - rulist_remove(resource_user, GRPC_RULIST_RECLAIMER_DESTRUCTIVE); -} - -static void ru_destroy(grpc_exec_ctx *exec_ctx, void *ru, grpc_error *error) { - grpc_resource_user *resource_user = (grpc_resource_user *)ru; - GPR_ASSERT(gpr_atm_no_barrier_load(&resource_user->refs) == 0); - for (int i = 0; i < GRPC_RULIST_COUNT; i++) { - rulist_remove(resource_user, (grpc_rulist)i); - } - GRPC_CLOSURE_SCHED(exec_ctx, resource_user->reclaimers[0], - GRPC_ERROR_CANCELLED); - GRPC_CLOSURE_SCHED(exec_ctx, resource_user->reclaimers[1], - GRPC_ERROR_CANCELLED); - if (resource_user->free_pool != 0) { - resource_user->resource_quota->free_pool += resource_user->free_pool; - rq_step_sched(exec_ctx, resource_user->resource_quota); - } - grpc_resource_quota_unref_internal(exec_ctx, resource_user->resource_quota); - gpr_mu_destroy(&resource_user->mu); - gpr_free(resource_user->name); - gpr_free(resource_user); -} - -static void ru_allocated_slices(grpc_exec_ctx *exec_ctx, void *arg, - grpc_error *error) { - grpc_resource_user_slice_allocator *slice_allocator = - (grpc_resource_user_slice_allocator *)arg; - if (error == GRPC_ERROR_NONE) { - for (size_t i = 0; i < slice_allocator->count; i++) { - grpc_slice_buffer_add_indexed( - slice_allocator->dest, ru_slice_create(slice_allocator->resource_user, - slice_allocator->length)); - } - } - GRPC_CLOSURE_RUN(exec_ctx, &slice_allocator->on_done, GRPC_ERROR_REF(error)); -} - -/******************************************************************************* - * grpc_resource_quota internal implementation: quota manipulation under the - * combiner - */ - -typedef struct { - int64_t size; - grpc_resource_quota *resource_quota; - grpc_closure closure; -} rq_resize_args; - -static void rq_resize(grpc_exec_ctx *exec_ctx, void *args, grpc_error *error) { - rq_resize_args *a = (rq_resize_args *)args; - int64_t delta = a->size - a->resource_quota->size; - a->resource_quota->size += delta; - a->resource_quota->free_pool += delta; - rq_update_estimate(a->resource_quota); - rq_step_sched(exec_ctx, a->resource_quota); - grpc_resource_quota_unref_internal(exec_ctx, a->resource_quota); - gpr_free(a); -} - -static void rq_reclamation_done(grpc_exec_ctx *exec_ctx, void *rq, - grpc_error *error) { - grpc_resource_quota *resource_quota = (grpc_resource_quota *)rq; - resource_quota->reclaiming = false; - rq_step_sched(exec_ctx, resource_quota); - grpc_resource_quota_unref_internal(exec_ctx, resource_quota); -} - -/******************************************************************************* - * grpc_resource_quota api - */ - -/* Public API */ -grpc_resource_quota *grpc_resource_quota_create(const char *name) { - grpc_resource_quota *resource_quota = - (grpc_resource_quota *)gpr_malloc(sizeof(*resource_quota)); - gpr_ref_init(&resource_quota->refs, 1); - resource_quota->combiner = grpc_combiner_create(); - resource_quota->free_pool = INT64_MAX; - resource_quota->size = INT64_MAX; - gpr_atm_no_barrier_store(&resource_quota->last_size, GPR_ATM_MAX); - resource_quota->step_scheduled = false; - resource_quota->reclaiming = false; - gpr_atm_no_barrier_store(&resource_quota->memory_usage_estimation, 0); - if (name != NULL) { - resource_quota->name = gpr_strdup(name); - } else { - gpr_asprintf(&resource_quota->name, "anonymous_pool_%" PRIxPTR, - (intptr_t)resource_quota); - } - GRPC_CLOSURE_INIT(&resource_quota->rq_step_closure, rq_step, resource_quota, - grpc_combiner_finally_scheduler(resource_quota->combiner)); - GRPC_CLOSURE_INIT(&resource_quota->rq_reclamation_done_closure, - rq_reclamation_done, resource_quota, - grpc_combiner_scheduler(resource_quota->combiner)); - for (int i = 0; i < GRPC_RULIST_COUNT; i++) { - resource_quota->roots[i] = NULL; - } - return resource_quota; -} - -void grpc_resource_quota_unref_internal(grpc_exec_ctx *exec_ctx, - grpc_resource_quota *resource_quota) { - if (gpr_unref(&resource_quota->refs)) { - GRPC_COMBINER_UNREF(exec_ctx, resource_quota->combiner, "resource_quota"); - gpr_free(resource_quota->name); - gpr_free(resource_quota); - } -} - -/* Public API */ -void grpc_resource_quota_unref(grpc_resource_quota *resource_quota) { - grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; - grpc_resource_quota_unref_internal(&exec_ctx, resource_quota); - grpc_exec_ctx_finish(&exec_ctx); -} - -grpc_resource_quota *grpc_resource_quota_ref_internal( - grpc_resource_quota *resource_quota) { - gpr_ref(&resource_quota->refs); - return resource_quota; -} - -/* Public API */ -void grpc_resource_quota_ref(grpc_resource_quota *resource_quota) { - grpc_resource_quota_ref_internal(resource_quota); -} - -double grpc_resource_quota_get_memory_pressure( - grpc_resource_quota *resource_quota) { - return ((double)(gpr_atm_no_barrier_load( - &resource_quota->memory_usage_estimation))) / - ((double)MEMORY_USAGE_ESTIMATION_MAX); -} - -/* Public API */ -void grpc_resource_quota_resize(grpc_resource_quota *resource_quota, - size_t size) { - grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; - rq_resize_args *a = (rq_resize_args *)gpr_malloc(sizeof(*a)); - a->resource_quota = grpc_resource_quota_ref_internal(resource_quota); - a->size = (int64_t)size; - gpr_atm_no_barrier_store(&resource_quota->last_size, - (gpr_atm)GPR_MIN((size_t)GPR_ATM_MAX, size)); - GRPC_CLOSURE_INIT(&a->closure, rq_resize, a, grpc_schedule_on_exec_ctx); - GRPC_CLOSURE_SCHED(&exec_ctx, &a->closure, GRPC_ERROR_NONE); - grpc_exec_ctx_finish(&exec_ctx); -} - -size_t grpc_resource_quota_peek_size(grpc_resource_quota *resource_quota) { - return (size_t)gpr_atm_no_barrier_load(&resource_quota->last_size); -} - -/******************************************************************************* - * grpc_resource_user channel args api - */ - -grpc_resource_quota *grpc_resource_quota_from_channel_args( - const grpc_channel_args *channel_args) { - for (size_t i = 0; i < channel_args->num_args; i++) { - if (0 == strcmp(channel_args->args[i].key, GRPC_ARG_RESOURCE_QUOTA)) { - if (channel_args->args[i].type == GRPC_ARG_POINTER) { - return grpc_resource_quota_ref_internal( - (grpc_resource_quota *)channel_args->args[i].value.pointer.p); - } else { - gpr_log(GPR_DEBUG, GRPC_ARG_RESOURCE_QUOTA " should be a pointer"); - } - } - } - return grpc_resource_quota_create(NULL); -} - -static void *rq_copy(void *rq) { - grpc_resource_quota_ref((grpc_resource_quota *)rq); - return rq; -} - -static void rq_destroy(grpc_exec_ctx *exec_ctx, void *rq) { - grpc_resource_quota_unref_internal(exec_ctx, (grpc_resource_quota *)rq); -} - -static int rq_cmp(void *a, void *b) { return GPR_ICMP(a, b); } - -const grpc_arg_pointer_vtable *grpc_resource_quota_arg_vtable(void) { - static const grpc_arg_pointer_vtable vtable = {rq_copy, rq_destroy, rq_cmp}; - return &vtable; -} - -/******************************************************************************* - * grpc_resource_user api - */ - -grpc_resource_user *grpc_resource_user_create( - grpc_resource_quota *resource_quota, const char *name) { - grpc_resource_user *resource_user = - (grpc_resource_user *)gpr_malloc(sizeof(*resource_user)); - resource_user->resource_quota = - grpc_resource_quota_ref_internal(resource_quota); - GRPC_CLOSURE_INIT(&resource_user->allocate_closure, &ru_allocate, - resource_user, - grpc_combiner_scheduler(resource_quota->combiner)); - GRPC_CLOSURE_INIT(&resource_user->add_to_free_pool_closure, - &ru_add_to_free_pool, resource_user, - grpc_combiner_scheduler(resource_quota->combiner)); - GRPC_CLOSURE_INIT(&resource_user->post_reclaimer_closure[0], - &ru_post_benign_reclaimer, resource_user, - grpc_combiner_scheduler(resource_quota->combiner)); - GRPC_CLOSURE_INIT(&resource_user->post_reclaimer_closure[1], - &ru_post_destructive_reclaimer, resource_user, - grpc_combiner_scheduler(resource_quota->combiner)); - GRPC_CLOSURE_INIT(&resource_user->destroy_closure, &ru_destroy, resource_user, - grpc_combiner_scheduler(resource_quota->combiner)); - gpr_mu_init(&resource_user->mu); - gpr_atm_rel_store(&resource_user->refs, 1); - gpr_atm_rel_store(&resource_user->shutdown, 0); - resource_user->free_pool = 0; - grpc_closure_list_init(&resource_user->on_allocated); - resource_user->allocating = false; - resource_user->added_to_free_pool = false; - resource_user->reclaimers[0] = NULL; - resource_user->reclaimers[1] = NULL; - resource_user->new_reclaimers[0] = NULL; - resource_user->new_reclaimers[1] = NULL; - for (int i = 0; i < GRPC_RULIST_COUNT; i++) { - resource_user->links[i].next = resource_user->links[i].prev = NULL; - } - if (name != NULL) { - resource_user->name = gpr_strdup(name); - } else { - gpr_asprintf(&resource_user->name, "anonymous_resource_user_%" PRIxPTR, - (intptr_t)resource_user); - } - return resource_user; -} - -grpc_resource_quota *grpc_resource_user_quota( - grpc_resource_user *resource_user) { - return resource_user->resource_quota; -} - -static void ru_ref_by(grpc_resource_user *resource_user, gpr_atm amount) { - GPR_ASSERT(amount > 0); - GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&resource_user->refs, amount) != 0); -} - -static void ru_unref_by(grpc_exec_ctx *exec_ctx, - grpc_resource_user *resource_user, gpr_atm amount) { - GPR_ASSERT(amount > 0); - gpr_atm old = gpr_atm_full_fetch_add(&resource_user->refs, -amount); - GPR_ASSERT(old >= amount); - if (old == amount) { - GRPC_CLOSURE_SCHED(exec_ctx, &resource_user->destroy_closure, - GRPC_ERROR_NONE); - } -} - -void grpc_resource_user_ref(grpc_resource_user *resource_user) { - ru_ref_by(resource_user, 1); -} - -void grpc_resource_user_unref(grpc_exec_ctx *exec_ctx, - grpc_resource_user *resource_user) { - ru_unref_by(exec_ctx, resource_user, 1); -} - -void grpc_resource_user_shutdown(grpc_exec_ctx *exec_ctx, - grpc_resource_user *resource_user) { - if (gpr_atm_full_fetch_add(&resource_user->shutdown, 1) == 0) { - GRPC_CLOSURE_SCHED( - exec_ctx, - GRPC_CLOSURE_CREATE( - ru_shutdown, resource_user, - grpc_combiner_scheduler(resource_user->resource_quota->combiner)), - GRPC_ERROR_NONE); - } -} - -void grpc_resource_user_alloc(grpc_exec_ctx *exec_ctx, - grpc_resource_user *resource_user, size_t size, - grpc_closure *optional_on_done) { - gpr_mu_lock(&resource_user->mu); - ru_ref_by(resource_user, (gpr_atm)size); - resource_user->free_pool -= (int64_t)size; - if (GRPC_TRACER_ON(grpc_resource_quota_trace)) { - gpr_log(GPR_DEBUG, "RQ %s %s: alloc %" PRIdPTR "; free_pool -> %" PRId64, - resource_user->resource_quota->name, resource_user->name, size, - resource_user->free_pool); - } - if (resource_user->free_pool < 0) { - grpc_closure_list_append(&resource_user->on_allocated, optional_on_done, - GRPC_ERROR_NONE); - if (!resource_user->allocating) { - resource_user->allocating = true; - GRPC_CLOSURE_SCHED(exec_ctx, &resource_user->allocate_closure, - GRPC_ERROR_NONE); - } - } else { - GRPC_CLOSURE_SCHED(exec_ctx, optional_on_done, GRPC_ERROR_NONE); - } - gpr_mu_unlock(&resource_user->mu); -} - -void grpc_resource_user_free(grpc_exec_ctx *exec_ctx, - grpc_resource_user *resource_user, size_t size) { - gpr_mu_lock(&resource_user->mu); - bool was_zero_or_negative = resource_user->free_pool <= 0; - resource_user->free_pool += (int64_t)size; - if (GRPC_TRACER_ON(grpc_resource_quota_trace)) { - gpr_log(GPR_DEBUG, "RQ %s %s: free %" PRIdPTR "; free_pool -> %" PRId64, - resource_user->resource_quota->name, resource_user->name, size, - resource_user->free_pool); - } - bool is_bigger_than_zero = resource_user->free_pool > 0; - if (is_bigger_than_zero && was_zero_or_negative && - !resource_user->added_to_free_pool) { - resource_user->added_to_free_pool = true; - GRPC_CLOSURE_SCHED(exec_ctx, &resource_user->add_to_free_pool_closure, - GRPC_ERROR_NONE); - } - gpr_mu_unlock(&resource_user->mu); - ru_unref_by(exec_ctx, resource_user, (gpr_atm)size); -} - -void grpc_resource_user_post_reclaimer(grpc_exec_ctx *exec_ctx, - grpc_resource_user *resource_user, - bool destructive, - grpc_closure *closure) { - GPR_ASSERT(resource_user->new_reclaimers[destructive] == NULL); - resource_user->new_reclaimers[destructive] = closure; - GRPC_CLOSURE_SCHED(exec_ctx, - &resource_user->post_reclaimer_closure[destructive], - GRPC_ERROR_NONE); -} - -void grpc_resource_user_finish_reclamation(grpc_exec_ctx *exec_ctx, - grpc_resource_user *resource_user) { - if (GRPC_TRACER_ON(grpc_resource_quota_trace)) { - gpr_log(GPR_DEBUG, "RQ %s %s: reclamation complete", - resource_user->resource_quota->name, resource_user->name); - } - GRPC_CLOSURE_SCHED( - exec_ctx, &resource_user->resource_quota->rq_reclamation_done_closure, - GRPC_ERROR_NONE); -} - -void grpc_resource_user_slice_allocator_init( - grpc_resource_user_slice_allocator *slice_allocator, - grpc_resource_user *resource_user, grpc_iomgr_cb_func cb, void *p) { - GRPC_CLOSURE_INIT(&slice_allocator->on_allocated, ru_allocated_slices, - slice_allocator, grpc_schedule_on_exec_ctx); - GRPC_CLOSURE_INIT(&slice_allocator->on_done, cb, p, - grpc_schedule_on_exec_ctx); - slice_allocator->resource_user = resource_user; -} - -void grpc_resource_user_alloc_slices( - grpc_exec_ctx *exec_ctx, - grpc_resource_user_slice_allocator *slice_allocator, size_t length, - size_t count, grpc_slice_buffer *dest) { - slice_allocator->length = length; - slice_allocator->count = count; - slice_allocator->dest = dest; - grpc_resource_user_alloc(exec_ctx, slice_allocator->resource_user, - count * length, &slice_allocator->on_allocated); -} - -grpc_slice grpc_resource_user_slice_malloc(grpc_exec_ctx *exec_ctx, - grpc_resource_user *resource_user, - size_t size) { - grpc_resource_user_alloc(exec_ctx, resource_user, size, NULL); - return ru_slice_create(resource_user, size); -} diff --git a/src/core/lib/iomgr/resource_quota.cc b/src/core/lib/iomgr/resource_quota.cc new file mode 100644 index 0000000000..4d69986fbc --- /dev/null +++ b/src/core/lib/iomgr/resource_quota.cc @@ -0,0 +1,868 @@ +/* + * + * Copyright 2016 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/resource_quota.h" + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "src/core/lib/iomgr/combiner.h" + +grpc_tracer_flag grpc_resource_quota_trace = + GRPC_TRACER_INITIALIZER(false, "resource_quota"); + +#define MEMORY_USAGE_ESTIMATION_MAX 65536 + +/* Internal linked list pointers for a resource user */ +typedef struct { + grpc_resource_user *next; + grpc_resource_user *prev; +} grpc_resource_user_link; + +/* Resource users are kept in (potentially) several intrusive linked lists + at once. These are the list names. */ +typedef enum { + /* Resource users that are waiting for an allocation */ + GRPC_RULIST_AWAITING_ALLOCATION, + /* Resource users that have free memory available for internal reclamation */ + GRPC_RULIST_NON_EMPTY_FREE_POOL, + /* Resource users that have published a benign reclamation is available */ + GRPC_RULIST_RECLAIMER_BENIGN, + /* Resource users that have published a destructive reclamation is + available */ + GRPC_RULIST_RECLAIMER_DESTRUCTIVE, + /* Number of lists: must be last */ + GRPC_RULIST_COUNT +} grpc_rulist; + +struct grpc_resource_user { + /* The quota this resource user consumes from */ + grpc_resource_quota *resource_quota; + + /* Closure to schedule an allocation under the resource quota combiner lock */ + grpc_closure allocate_closure; + /* Closure to publish a non empty free pool under the resource quota combiner + lock */ + grpc_closure add_to_free_pool_closure; + + /* one ref for each ref call (released by grpc_resource_user_unref), and one + ref for each byte allocated (released by grpc_resource_user_free) */ + gpr_atm refs; + /* is this resource user unlocked? starts at 0, increases for each shutdown + call */ + gpr_atm shutdown; + + gpr_mu mu; + /* The amount of memory (in bytes) this user has cached for its own use: to + avoid quota contention, each resource user can keep some memory in + addition to what it is immediately using (e.g., for caching), and the quota + can pull it back under memory pressure. + This value can become negative if more memory has been requested than + existed in the free pool, at which point the quota is consulted to bring + this value non-negative (asynchronously). */ + int64_t free_pool; + /* A list of closures to call once free_pool becomes non-negative - ie when + all outstanding allocations have been granted. */ + grpc_closure_list on_allocated; + /* True if we are currently trying to allocate from the quota, false if not */ + bool allocating; + /* True if we are currently trying to add ourselves to the non-free quota + list, false otherwise */ + bool added_to_free_pool; + + /* Reclaimers: index 0 is the benign reclaimer, 1 is the destructive reclaimer + */ + grpc_closure *reclaimers[2]; + /* Reclaimers just posted: once we're in the combiner lock, we'll move them + to the array above */ + grpc_closure *new_reclaimers[2]; + /* Trampoline closures to finish reclamation and re-enter the quota combiner + lock */ + grpc_closure post_reclaimer_closure[2]; + + /* Closure to execute under the quota combiner to de-register and shutdown the + resource user */ + grpc_closure destroy_closure; + + /* Links in the various grpc_rulist lists */ + grpc_resource_user_link links[GRPC_RULIST_COUNT]; + + /* The name of this resource user, for debugging/tracing */ + char *name; +}; + +struct grpc_resource_quota { + /* refcount */ + gpr_refcount refs; + + /* estimate of current memory usage + scaled to the range [0..RESOURCE_USAGE_ESTIMATION_MAX] */ + gpr_atm memory_usage_estimation; + + /* Master combiner lock: all activity on a quota executes under this combiner + * (so no mutex is needed for this data structure) */ + grpc_combiner *combiner; + /* Size of the resource quota */ + int64_t size; + /* Amount of free memory in the resource quota */ + int64_t free_pool; + + gpr_atm last_size; + + /* Has rq_step been scheduled to occur? */ + bool step_scheduled; + /* Are we currently reclaiming memory */ + bool reclaiming; + /* Closure around rq_step */ + grpc_closure rq_step_closure; + /* Closure around rq_reclamation_done */ + grpc_closure rq_reclamation_done_closure; + + /* This is only really usable for debugging: it's always a stale pointer, but + a stale pointer that might just be fresh enough to guide us to where the + reclamation system is stuck */ + grpc_closure *debug_only_last_initiated_reclaimer; + grpc_resource_user *debug_only_last_reclaimer_resource_user; + + /* Roots of all resource user lists */ + grpc_resource_user *roots[GRPC_RULIST_COUNT]; + + char *name; +}; + +/******************************************************************************* + * list management + */ + +static void rulist_add_head(grpc_resource_user *resource_user, + grpc_rulist list) { + grpc_resource_quota *resource_quota = resource_user->resource_quota; + grpc_resource_user **root = &resource_quota->roots[list]; + if (*root == NULL) { + *root = resource_user; + resource_user->links[list].next = resource_user->links[list].prev = + resource_user; + } else { + resource_user->links[list].next = *root; + resource_user->links[list].prev = (*root)->links[list].prev; + resource_user->links[list].next->links[list].prev = + resource_user->links[list].prev->links[list].next = resource_user; + *root = resource_user; + } +} + +static void rulist_add_tail(grpc_resource_user *resource_user, + grpc_rulist list) { + grpc_resource_quota *resource_quota = resource_user->resource_quota; + grpc_resource_user **root = &resource_quota->roots[list]; + if (*root == NULL) { + *root = resource_user; + resource_user->links[list].next = resource_user->links[list].prev = + resource_user; + } else { + resource_user->links[list].next = (*root)->links[list].next; + resource_user->links[list].prev = *root; + resource_user->links[list].next->links[list].prev = + resource_user->links[list].prev->links[list].next = resource_user; + } +} + +static bool rulist_empty(grpc_resource_quota *resource_quota, + grpc_rulist list) { + return resource_quota->roots[list] == NULL; +} + +static grpc_resource_user *rulist_pop_head(grpc_resource_quota *resource_quota, + grpc_rulist list) { + grpc_resource_user **root = &resource_quota->roots[list]; + grpc_resource_user *resource_user = *root; + if (resource_user == NULL) { + return NULL; + } + if (resource_user->links[list].next == resource_user) { + *root = NULL; + } else { + resource_user->links[list].next->links[list].prev = + resource_user->links[list].prev; + resource_user->links[list].prev->links[list].next = + resource_user->links[list].next; + *root = resource_user->links[list].next; + } + resource_user->links[list].next = resource_user->links[list].prev = NULL; + return resource_user; +} + +static void rulist_remove(grpc_resource_user *resource_user, grpc_rulist list) { + if (resource_user->links[list].next == NULL) return; + grpc_resource_quota *resource_quota = resource_user->resource_quota; + if (resource_quota->roots[list] == resource_user) { + resource_quota->roots[list] = resource_user->links[list].next; + if (resource_quota->roots[list] == resource_user) { + resource_quota->roots[list] = NULL; + } + } + resource_user->links[list].next->links[list].prev = + resource_user->links[list].prev; + resource_user->links[list].prev->links[list].next = + resource_user->links[list].next; + resource_user->links[list].next = resource_user->links[list].prev = NULL; +} + +/******************************************************************************* + * resource quota state machine + */ + +static bool rq_alloc(grpc_exec_ctx *exec_ctx, + grpc_resource_quota *resource_quota); +static bool rq_reclaim_from_per_user_free_pool( + grpc_exec_ctx *exec_ctx, grpc_resource_quota *resource_quota); +static bool rq_reclaim(grpc_exec_ctx *exec_ctx, + grpc_resource_quota *resource_quota, bool destructive); + +static void rq_step(grpc_exec_ctx *exec_ctx, void *rq, grpc_error *error) { + grpc_resource_quota *resource_quota = (grpc_resource_quota *)rq; + resource_quota->step_scheduled = false; + do { + if (rq_alloc(exec_ctx, resource_quota)) goto done; + } while (rq_reclaim_from_per_user_free_pool(exec_ctx, resource_quota)); + + if (!rq_reclaim(exec_ctx, resource_quota, false)) { + rq_reclaim(exec_ctx, resource_quota, true); + } + +done: + grpc_resource_quota_unref_internal(exec_ctx, resource_quota); +} + +static void rq_step_sched(grpc_exec_ctx *exec_ctx, + grpc_resource_quota *resource_quota) { + if (resource_quota->step_scheduled) return; + resource_quota->step_scheduled = true; + grpc_resource_quota_ref_internal(resource_quota); + GRPC_CLOSURE_SCHED(exec_ctx, &resource_quota->rq_step_closure, + GRPC_ERROR_NONE); +} + +/* update the atomically available resource estimate - use no barriers since + timeliness of delivery really doesn't matter much */ +static void rq_update_estimate(grpc_resource_quota *resource_quota) { + gpr_atm memory_usage_estimation = MEMORY_USAGE_ESTIMATION_MAX; + if (resource_quota->size != 0) { + memory_usage_estimation = + GPR_CLAMP((gpr_atm)((1.0 - + ((double)resource_quota->free_pool) / + ((double)resource_quota->size)) * + MEMORY_USAGE_ESTIMATION_MAX), + 0, MEMORY_USAGE_ESTIMATION_MAX); + } + gpr_atm_no_barrier_store(&resource_quota->memory_usage_estimation, + memory_usage_estimation); +} + +/* returns true if all allocations are completed */ +static bool rq_alloc(grpc_exec_ctx *exec_ctx, + grpc_resource_quota *resource_quota) { + grpc_resource_user *resource_user; + while ((resource_user = rulist_pop_head(resource_quota, + GRPC_RULIST_AWAITING_ALLOCATION))) { + gpr_mu_lock(&resource_user->mu); + if (resource_user->free_pool < 0 && + -resource_user->free_pool <= resource_quota->free_pool) { + int64_t amt = -resource_user->free_pool; + resource_user->free_pool = 0; + resource_quota->free_pool -= amt; + rq_update_estimate(resource_quota); + if (GRPC_TRACER_ON(grpc_resource_quota_trace)) { + gpr_log(GPR_DEBUG, "RQ %s %s: grant alloc %" PRId64 + " bytes; rq_free_pool -> %" PRId64, + resource_quota->name, resource_user->name, amt, + resource_quota->free_pool); + } + } else if (GRPC_TRACER_ON(grpc_resource_quota_trace) && + resource_user->free_pool >= 0) { + gpr_log(GPR_DEBUG, "RQ %s %s: discard already satisfied alloc request", + resource_quota->name, resource_user->name); + } + if (resource_user->free_pool >= 0) { + resource_user->allocating = false; + GRPC_CLOSURE_LIST_SCHED(exec_ctx, &resource_user->on_allocated); + gpr_mu_unlock(&resource_user->mu); + } else { + rulist_add_head(resource_user, GRPC_RULIST_AWAITING_ALLOCATION); + gpr_mu_unlock(&resource_user->mu); + return false; + } + } + return true; +} + +/* returns true if any memory could be reclaimed from buffers */ +static bool rq_reclaim_from_per_user_free_pool( + grpc_exec_ctx *exec_ctx, grpc_resource_quota *resource_quota) { + grpc_resource_user *resource_user; + while ((resource_user = rulist_pop_head(resource_quota, + GRPC_RULIST_NON_EMPTY_FREE_POOL))) { + gpr_mu_lock(&resource_user->mu); + if (resource_user->free_pool > 0) { + int64_t amt = resource_user->free_pool; + resource_user->free_pool = 0; + resource_quota->free_pool += amt; + rq_update_estimate(resource_quota); + if (GRPC_TRACER_ON(grpc_resource_quota_trace)) { + gpr_log(GPR_DEBUG, "RQ %s %s: reclaim_from_per_user_free_pool %" PRId64 + " bytes; rq_free_pool -> %" PRId64, + resource_quota->name, resource_user->name, amt, + resource_quota->free_pool); + } + gpr_mu_unlock(&resource_user->mu); + return true; + } else { + gpr_mu_unlock(&resource_user->mu); + } + } + return false; +} + +/* returns true if reclamation is proceeding */ +static bool rq_reclaim(grpc_exec_ctx *exec_ctx, + grpc_resource_quota *resource_quota, bool destructive) { + if (resource_quota->reclaiming) return true; + grpc_rulist list = destructive ? GRPC_RULIST_RECLAIMER_DESTRUCTIVE + : GRPC_RULIST_RECLAIMER_BENIGN; + grpc_resource_user *resource_user = rulist_pop_head(resource_quota, list); + if (resource_user == NULL) return false; + if (GRPC_TRACER_ON(grpc_resource_quota_trace)) { + gpr_log(GPR_DEBUG, "RQ %s %s: initiate %s reclamation", + resource_quota->name, resource_user->name, + destructive ? "destructive" : "benign"); + } + resource_quota->reclaiming = true; + grpc_resource_quota_ref_internal(resource_quota); + grpc_closure *c = resource_user->reclaimers[destructive]; + GPR_ASSERT(c); + resource_quota->debug_only_last_reclaimer_resource_user = resource_user; + resource_quota->debug_only_last_initiated_reclaimer = c; + resource_user->reclaimers[destructive] = NULL; + GRPC_CLOSURE_RUN(exec_ctx, c, GRPC_ERROR_NONE); + return true; +} + +/******************************************************************************* + * ru_slice: a slice implementation that is backed by a grpc_resource_user + */ + +typedef struct { + grpc_slice_refcount base; + gpr_refcount refs; + grpc_resource_user *resource_user; + size_t size; +} ru_slice_refcount; + +static void ru_slice_ref(void *p) { + ru_slice_refcount *rc = (ru_slice_refcount *)p; + gpr_ref(&rc->refs); +} + +static void ru_slice_unref(grpc_exec_ctx *exec_ctx, void *p) { + ru_slice_refcount *rc = (ru_slice_refcount *)p; + if (gpr_unref(&rc->refs)) { + grpc_resource_user_free(exec_ctx, rc->resource_user, rc->size); + gpr_free(rc); + } +} + +static const grpc_slice_refcount_vtable ru_slice_vtable = { + ru_slice_ref, ru_slice_unref, grpc_slice_default_eq_impl, + grpc_slice_default_hash_impl}; + +static grpc_slice ru_slice_create(grpc_resource_user *resource_user, + size_t size) { + ru_slice_refcount *rc = + (ru_slice_refcount *)gpr_malloc(sizeof(ru_slice_refcount) + size); + rc->base.vtable = &ru_slice_vtable; + rc->base.sub_refcount = &rc->base; + gpr_ref_init(&rc->refs, 1); + rc->resource_user = resource_user; + rc->size = size; + grpc_slice slice; + slice.refcount = &rc->base; + slice.data.refcounted.bytes = (uint8_t *)(rc + 1); + slice.data.refcounted.length = size; + return slice; +} + +/******************************************************************************* + * grpc_resource_quota internal implementation: resource user manipulation under + * the combiner + */ + +static void ru_allocate(grpc_exec_ctx *exec_ctx, void *ru, grpc_error *error) { + grpc_resource_user *resource_user = (grpc_resource_user *)ru; + if (rulist_empty(resource_user->resource_quota, + GRPC_RULIST_AWAITING_ALLOCATION)) { + rq_step_sched(exec_ctx, resource_user->resource_quota); + } + rulist_add_tail(resource_user, GRPC_RULIST_AWAITING_ALLOCATION); +} + +static void ru_add_to_free_pool(grpc_exec_ctx *exec_ctx, void *ru, + grpc_error *error) { + grpc_resource_user *resource_user = (grpc_resource_user *)ru; + if (!rulist_empty(resource_user->resource_quota, + GRPC_RULIST_AWAITING_ALLOCATION) && + rulist_empty(resource_user->resource_quota, + GRPC_RULIST_NON_EMPTY_FREE_POOL)) { + rq_step_sched(exec_ctx, resource_user->resource_quota); + } + rulist_add_tail(resource_user, GRPC_RULIST_NON_EMPTY_FREE_POOL); +} + +static bool ru_post_reclaimer(grpc_exec_ctx *exec_ctx, + grpc_resource_user *resource_user, + bool destructive) { + grpc_closure *closure = resource_user->new_reclaimers[destructive]; + GPR_ASSERT(closure != NULL); + resource_user->new_reclaimers[destructive] = NULL; + GPR_ASSERT(resource_user->reclaimers[destructive] == NULL); + if (gpr_atm_acq_load(&resource_user->shutdown) > 0) { + GRPC_CLOSURE_SCHED(exec_ctx, closure, GRPC_ERROR_CANCELLED); + return false; + } + resource_user->reclaimers[destructive] = closure; + return true; +} + +static void ru_post_benign_reclaimer(grpc_exec_ctx *exec_ctx, void *ru, + grpc_error *error) { + grpc_resource_user *resource_user = (grpc_resource_user *)ru; + if (!ru_post_reclaimer(exec_ctx, resource_user, false)) return; + if (!rulist_empty(resource_user->resource_quota, + GRPC_RULIST_AWAITING_ALLOCATION) && + rulist_empty(resource_user->resource_quota, + GRPC_RULIST_NON_EMPTY_FREE_POOL) && + rulist_empty(resource_user->resource_quota, + GRPC_RULIST_RECLAIMER_BENIGN)) { + rq_step_sched(exec_ctx, resource_user->resource_quota); + } + rulist_add_tail(resource_user, GRPC_RULIST_RECLAIMER_BENIGN); +} + +static void ru_post_destructive_reclaimer(grpc_exec_ctx *exec_ctx, void *ru, + grpc_error *error) { + grpc_resource_user *resource_user = (grpc_resource_user *)ru; + if (!ru_post_reclaimer(exec_ctx, resource_user, true)) return; + if (!rulist_empty(resource_user->resource_quota, + GRPC_RULIST_AWAITING_ALLOCATION) && + rulist_empty(resource_user->resource_quota, + GRPC_RULIST_NON_EMPTY_FREE_POOL) && + rulist_empty(resource_user->resource_quota, + GRPC_RULIST_RECLAIMER_BENIGN) && + rulist_empty(resource_user->resource_quota, + GRPC_RULIST_RECLAIMER_DESTRUCTIVE)) { + rq_step_sched(exec_ctx, resource_user->resource_quota); + } + rulist_add_tail(resource_user, GRPC_RULIST_RECLAIMER_DESTRUCTIVE); +} + +static void ru_shutdown(grpc_exec_ctx *exec_ctx, void *ru, grpc_error *error) { + grpc_resource_user *resource_user = (grpc_resource_user *)ru; + GRPC_CLOSURE_SCHED(exec_ctx, resource_user->reclaimers[0], + GRPC_ERROR_CANCELLED); + GRPC_CLOSURE_SCHED(exec_ctx, resource_user->reclaimers[1], + GRPC_ERROR_CANCELLED); + resource_user->reclaimers[0] = NULL; + resource_user->reclaimers[1] = NULL; + rulist_remove(resource_user, GRPC_RULIST_RECLAIMER_BENIGN); + rulist_remove(resource_user, GRPC_RULIST_RECLAIMER_DESTRUCTIVE); +} + +static void ru_destroy(grpc_exec_ctx *exec_ctx, void *ru, grpc_error *error) { + grpc_resource_user *resource_user = (grpc_resource_user *)ru; + GPR_ASSERT(gpr_atm_no_barrier_load(&resource_user->refs) == 0); + for (int i = 0; i < GRPC_RULIST_COUNT; i++) { + rulist_remove(resource_user, (grpc_rulist)i); + } + GRPC_CLOSURE_SCHED(exec_ctx, resource_user->reclaimers[0], + GRPC_ERROR_CANCELLED); + GRPC_CLOSURE_SCHED(exec_ctx, resource_user->reclaimers[1], + GRPC_ERROR_CANCELLED); + if (resource_user->free_pool != 0) { + resource_user->resource_quota->free_pool += resource_user->free_pool; + rq_step_sched(exec_ctx, resource_user->resource_quota); + } + grpc_resource_quota_unref_internal(exec_ctx, resource_user->resource_quota); + gpr_mu_destroy(&resource_user->mu); + gpr_free(resource_user->name); + gpr_free(resource_user); +} + +static void ru_allocated_slices(grpc_exec_ctx *exec_ctx, void *arg, + grpc_error *error) { + grpc_resource_user_slice_allocator *slice_allocator = + (grpc_resource_user_slice_allocator *)arg; + if (error == GRPC_ERROR_NONE) { + for (size_t i = 0; i < slice_allocator->count; i++) { + grpc_slice_buffer_add_indexed( + slice_allocator->dest, ru_slice_create(slice_allocator->resource_user, + slice_allocator->length)); + } + } + GRPC_CLOSURE_RUN(exec_ctx, &slice_allocator->on_done, GRPC_ERROR_REF(error)); +} + +/******************************************************************************* + * grpc_resource_quota internal implementation: quota manipulation under the + * combiner + */ + +typedef struct { + int64_t size; + grpc_resource_quota *resource_quota; + grpc_closure closure; +} rq_resize_args; + +static void rq_resize(grpc_exec_ctx *exec_ctx, void *args, grpc_error *error) { + rq_resize_args *a = (rq_resize_args *)args; + int64_t delta = a->size - a->resource_quota->size; + a->resource_quota->size += delta; + a->resource_quota->free_pool += delta; + rq_update_estimate(a->resource_quota); + rq_step_sched(exec_ctx, a->resource_quota); + grpc_resource_quota_unref_internal(exec_ctx, a->resource_quota); + gpr_free(a); +} + +static void rq_reclamation_done(grpc_exec_ctx *exec_ctx, void *rq, + grpc_error *error) { + grpc_resource_quota *resource_quota = (grpc_resource_quota *)rq; + resource_quota->reclaiming = false; + rq_step_sched(exec_ctx, resource_quota); + grpc_resource_quota_unref_internal(exec_ctx, resource_quota); +} + +/******************************************************************************* + * grpc_resource_quota api + */ + +/* Public API */ +grpc_resource_quota *grpc_resource_quota_create(const char *name) { + grpc_resource_quota *resource_quota = + (grpc_resource_quota *)gpr_malloc(sizeof(*resource_quota)); + gpr_ref_init(&resource_quota->refs, 1); + resource_quota->combiner = grpc_combiner_create(); + resource_quota->free_pool = INT64_MAX; + resource_quota->size = INT64_MAX; + gpr_atm_no_barrier_store(&resource_quota->last_size, GPR_ATM_MAX); + resource_quota->step_scheduled = false; + resource_quota->reclaiming = false; + gpr_atm_no_barrier_store(&resource_quota->memory_usage_estimation, 0); + if (name != NULL) { + resource_quota->name = gpr_strdup(name); + } else { + gpr_asprintf(&resource_quota->name, "anonymous_pool_%" PRIxPTR, + (intptr_t)resource_quota); + } + GRPC_CLOSURE_INIT(&resource_quota->rq_step_closure, rq_step, resource_quota, + grpc_combiner_finally_scheduler(resource_quota->combiner)); + GRPC_CLOSURE_INIT(&resource_quota->rq_reclamation_done_closure, + rq_reclamation_done, resource_quota, + grpc_combiner_scheduler(resource_quota->combiner)); + for (int i = 0; i < GRPC_RULIST_COUNT; i++) { + resource_quota->roots[i] = NULL; + } + return resource_quota; +} + +void grpc_resource_quota_unref_internal(grpc_exec_ctx *exec_ctx, + grpc_resource_quota *resource_quota) { + if (gpr_unref(&resource_quota->refs)) { + GRPC_COMBINER_UNREF(exec_ctx, resource_quota->combiner, "resource_quota"); + gpr_free(resource_quota->name); + gpr_free(resource_quota); + } +} + +/* Public API */ +void grpc_resource_quota_unref(grpc_resource_quota *resource_quota) { + grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; + grpc_resource_quota_unref_internal(&exec_ctx, resource_quota); + grpc_exec_ctx_finish(&exec_ctx); +} + +grpc_resource_quota *grpc_resource_quota_ref_internal( + grpc_resource_quota *resource_quota) { + gpr_ref(&resource_quota->refs); + return resource_quota; +} + +/* Public API */ +void grpc_resource_quota_ref(grpc_resource_quota *resource_quota) { + grpc_resource_quota_ref_internal(resource_quota); +} + +double grpc_resource_quota_get_memory_pressure( + grpc_resource_quota *resource_quota) { + return ((double)(gpr_atm_no_barrier_load( + &resource_quota->memory_usage_estimation))) / + ((double)MEMORY_USAGE_ESTIMATION_MAX); +} + +/* Public API */ +void grpc_resource_quota_resize(grpc_resource_quota *resource_quota, + size_t size) { + grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; + rq_resize_args *a = (rq_resize_args *)gpr_malloc(sizeof(*a)); + a->resource_quota = grpc_resource_quota_ref_internal(resource_quota); + a->size = (int64_t)size; + gpr_atm_no_barrier_store(&resource_quota->last_size, + (gpr_atm)GPR_MIN((size_t)GPR_ATM_MAX, size)); + GRPC_CLOSURE_INIT(&a->closure, rq_resize, a, grpc_schedule_on_exec_ctx); + GRPC_CLOSURE_SCHED(&exec_ctx, &a->closure, GRPC_ERROR_NONE); + grpc_exec_ctx_finish(&exec_ctx); +} + +size_t grpc_resource_quota_peek_size(grpc_resource_quota *resource_quota) { + return (size_t)gpr_atm_no_barrier_load(&resource_quota->last_size); +} + +/******************************************************************************* + * grpc_resource_user channel args api + */ + +grpc_resource_quota *grpc_resource_quota_from_channel_args( + const grpc_channel_args *channel_args) { + for (size_t i = 0; i < channel_args->num_args; i++) { + if (0 == strcmp(channel_args->args[i].key, GRPC_ARG_RESOURCE_QUOTA)) { + if (channel_args->args[i].type == GRPC_ARG_POINTER) { + return grpc_resource_quota_ref_internal( + (grpc_resource_quota *)channel_args->args[i].value.pointer.p); + } else { + gpr_log(GPR_DEBUG, GRPC_ARG_RESOURCE_QUOTA " should be a pointer"); + } + } + } + return grpc_resource_quota_create(NULL); +} + +static void *rq_copy(void *rq) { + grpc_resource_quota_ref((grpc_resource_quota *)rq); + return rq; +} + +static void rq_destroy(grpc_exec_ctx *exec_ctx, void *rq) { + grpc_resource_quota_unref_internal(exec_ctx, (grpc_resource_quota *)rq); +} + +static int rq_cmp(void *a, void *b) { return GPR_ICMP(a, b); } + +const grpc_arg_pointer_vtable *grpc_resource_quota_arg_vtable(void) { + static const grpc_arg_pointer_vtable vtable = {rq_copy, rq_destroy, rq_cmp}; + return &vtable; +} + +/******************************************************************************* + * grpc_resource_user api + */ + +grpc_resource_user *grpc_resource_user_create( + grpc_resource_quota *resource_quota, const char *name) { + grpc_resource_user *resource_user = + (grpc_resource_user *)gpr_malloc(sizeof(*resource_user)); + resource_user->resource_quota = + grpc_resource_quota_ref_internal(resource_quota); + GRPC_CLOSURE_INIT(&resource_user->allocate_closure, &ru_allocate, + resource_user, + grpc_combiner_scheduler(resource_quota->combiner)); + GRPC_CLOSURE_INIT(&resource_user->add_to_free_pool_closure, + &ru_add_to_free_pool, resource_user, + grpc_combiner_scheduler(resource_quota->combiner)); + GRPC_CLOSURE_INIT(&resource_user->post_reclaimer_closure[0], + &ru_post_benign_reclaimer, resource_user, + grpc_combiner_scheduler(resource_quota->combiner)); + GRPC_CLOSURE_INIT(&resource_user->post_reclaimer_closure[1], + &ru_post_destructive_reclaimer, resource_user, + grpc_combiner_scheduler(resource_quota->combiner)); + GRPC_CLOSURE_INIT(&resource_user->destroy_closure, &ru_destroy, resource_user, + grpc_combiner_scheduler(resource_quota->combiner)); + gpr_mu_init(&resource_user->mu); + gpr_atm_rel_store(&resource_user->refs, 1); + gpr_atm_rel_store(&resource_user->shutdown, 0); + resource_user->free_pool = 0; + grpc_closure_list_init(&resource_user->on_allocated); + resource_user->allocating = false; + resource_user->added_to_free_pool = false; + resource_user->reclaimers[0] = NULL; + resource_user->reclaimers[1] = NULL; + resource_user->new_reclaimers[0] = NULL; + resource_user->new_reclaimers[1] = NULL; + for (int i = 0; i < GRPC_RULIST_COUNT; i++) { + resource_user->links[i].next = resource_user->links[i].prev = NULL; + } + if (name != NULL) { + resource_user->name = gpr_strdup(name); + } else { + gpr_asprintf(&resource_user->name, "anonymous_resource_user_%" PRIxPTR, + (intptr_t)resource_user); + } + return resource_user; +} + +grpc_resource_quota *grpc_resource_user_quota( + grpc_resource_user *resource_user) { + return resource_user->resource_quota; +} + +static void ru_ref_by(grpc_resource_user *resource_user, gpr_atm amount) { + GPR_ASSERT(amount > 0); + GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&resource_user->refs, amount) != 0); +} + +static void ru_unref_by(grpc_exec_ctx *exec_ctx, + grpc_resource_user *resource_user, gpr_atm amount) { + GPR_ASSERT(amount > 0); + gpr_atm old = gpr_atm_full_fetch_add(&resource_user->refs, -amount); + GPR_ASSERT(old >= amount); + if (old == amount) { + GRPC_CLOSURE_SCHED(exec_ctx, &resource_user->destroy_closure, + GRPC_ERROR_NONE); + } +} + +void grpc_resource_user_ref(grpc_resource_user *resource_user) { + ru_ref_by(resource_user, 1); +} + +void grpc_resource_user_unref(grpc_exec_ctx *exec_ctx, + grpc_resource_user *resource_user) { + ru_unref_by(exec_ctx, resource_user, 1); +} + +void grpc_resource_user_shutdown(grpc_exec_ctx *exec_ctx, + grpc_resource_user *resource_user) { + if (gpr_atm_full_fetch_add(&resource_user->shutdown, 1) == 0) { + GRPC_CLOSURE_SCHED( + exec_ctx, + GRPC_CLOSURE_CREATE( + ru_shutdown, resource_user, + grpc_combiner_scheduler(resource_user->resource_quota->combiner)), + GRPC_ERROR_NONE); + } +} + +void grpc_resource_user_alloc(grpc_exec_ctx *exec_ctx, + grpc_resource_user *resource_user, size_t size, + grpc_closure *optional_on_done) { + gpr_mu_lock(&resource_user->mu); + ru_ref_by(resource_user, (gpr_atm)size); + resource_user->free_pool -= (int64_t)size; + if (GRPC_TRACER_ON(grpc_resource_quota_trace)) { + gpr_log(GPR_DEBUG, "RQ %s %s: alloc %" PRIdPTR "; free_pool -> %" PRId64, + resource_user->resource_quota->name, resource_user->name, size, + resource_user->free_pool); + } + if (resource_user->free_pool < 0) { + grpc_closure_list_append(&resource_user->on_allocated, optional_on_done, + GRPC_ERROR_NONE); + if (!resource_user->allocating) { + resource_user->allocating = true; + GRPC_CLOSURE_SCHED(exec_ctx, &resource_user->allocate_closure, + GRPC_ERROR_NONE); + } + } else { + GRPC_CLOSURE_SCHED(exec_ctx, optional_on_done, GRPC_ERROR_NONE); + } + gpr_mu_unlock(&resource_user->mu); +} + +void grpc_resource_user_free(grpc_exec_ctx *exec_ctx, + grpc_resource_user *resource_user, size_t size) { + gpr_mu_lock(&resource_user->mu); + bool was_zero_or_negative = resource_user->free_pool <= 0; + resource_user->free_pool += (int64_t)size; + if (GRPC_TRACER_ON(grpc_resource_quota_trace)) { + gpr_log(GPR_DEBUG, "RQ %s %s: free %" PRIdPTR "; free_pool -> %" PRId64, + resource_user->resource_quota->name, resource_user->name, size, + resource_user->free_pool); + } + bool is_bigger_than_zero = resource_user->free_pool > 0; + if (is_bigger_than_zero && was_zero_or_negative && + !resource_user->added_to_free_pool) { + resource_user->added_to_free_pool = true; + GRPC_CLOSURE_SCHED(exec_ctx, &resource_user->add_to_free_pool_closure, + GRPC_ERROR_NONE); + } + gpr_mu_unlock(&resource_user->mu); + ru_unref_by(exec_ctx, resource_user, (gpr_atm)size); +} + +void grpc_resource_user_post_reclaimer(grpc_exec_ctx *exec_ctx, + grpc_resource_user *resource_user, + bool destructive, + grpc_closure *closure) { + GPR_ASSERT(resource_user->new_reclaimers[destructive] == NULL); + resource_user->new_reclaimers[destructive] = closure; + GRPC_CLOSURE_SCHED(exec_ctx, + &resource_user->post_reclaimer_closure[destructive], + GRPC_ERROR_NONE); +} + +void grpc_resource_user_finish_reclamation(grpc_exec_ctx *exec_ctx, + grpc_resource_user *resource_user) { + if (GRPC_TRACER_ON(grpc_resource_quota_trace)) { + gpr_log(GPR_DEBUG, "RQ %s %s: reclamation complete", + resource_user->resource_quota->name, resource_user->name); + } + GRPC_CLOSURE_SCHED( + exec_ctx, &resource_user->resource_quota->rq_reclamation_done_closure, + GRPC_ERROR_NONE); +} + +void grpc_resource_user_slice_allocator_init( + grpc_resource_user_slice_allocator *slice_allocator, + grpc_resource_user *resource_user, grpc_iomgr_cb_func cb, void *p) { + GRPC_CLOSURE_INIT(&slice_allocator->on_allocated, ru_allocated_slices, + slice_allocator, grpc_schedule_on_exec_ctx); + GRPC_CLOSURE_INIT(&slice_allocator->on_done, cb, p, + grpc_schedule_on_exec_ctx); + slice_allocator->resource_user = resource_user; +} + +void grpc_resource_user_alloc_slices( + grpc_exec_ctx *exec_ctx, + grpc_resource_user_slice_allocator *slice_allocator, size_t length, + size_t count, grpc_slice_buffer *dest) { + slice_allocator->length = length; + slice_allocator->count = count; + slice_allocator->dest = dest; + grpc_resource_user_alloc(exec_ctx, slice_allocator->resource_user, + count * length, &slice_allocator->on_allocated); +} + +grpc_slice grpc_resource_user_slice_malloc(grpc_exec_ctx *exec_ctx, + grpc_resource_user *resource_user, + size_t size) { + grpc_resource_user_alloc(exec_ctx, resource_user, size, NULL); + return ru_slice_create(resource_user, size); +} diff --git a/src/core/lib/iomgr/sockaddr_utils.c b/src/core/lib/iomgr/sockaddr_utils.c deleted file mode 100644 index 3f4145d104..0000000000 --- a/src/core/lib/iomgr/sockaddr_utils.c +++ /dev/null @@ -1,262 +0,0 @@ -/* - * - * Copyright 2016 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/sockaddr_utils.h" - -#include -#include - -#include -#include -#include -#include -#include - -#include "src/core/lib/iomgr/sockaddr.h" -#include "src/core/lib/iomgr/socket_utils.h" -#include "src/core/lib/iomgr/unix_sockets_posix.h" -#include "src/core/lib/support/string.h" - -static const uint8_t kV4MappedPrefix[] = {0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0xff, 0xff}; - -int grpc_sockaddr_is_v4mapped(const grpc_resolved_address *resolved_addr, - grpc_resolved_address *resolved_addr4_out) { - GPR_ASSERT(resolved_addr != resolved_addr4_out); - const struct sockaddr *addr = (const struct sockaddr *)resolved_addr->addr; - struct sockaddr_in *addr4_out = - resolved_addr4_out == NULL - ? NULL - : (struct sockaddr_in *)resolved_addr4_out->addr; - if (addr->sa_family == AF_INET6) { - const struct sockaddr_in6 *addr6 = (const struct sockaddr_in6 *)addr; - if (memcmp(addr6->sin6_addr.s6_addr, kV4MappedPrefix, - sizeof(kV4MappedPrefix)) == 0) { - if (resolved_addr4_out != NULL) { - /* Normalize ::ffff:0.0.0.0/96 to IPv4. */ - memset(resolved_addr4_out, 0, sizeof(*resolved_addr4_out)); - addr4_out->sin_family = AF_INET; - /* s6_addr32 would be nice, but it's non-standard. */ - memcpy(&addr4_out->sin_addr, &addr6->sin6_addr.s6_addr[12], 4); - addr4_out->sin_port = addr6->sin6_port; - resolved_addr4_out->len = sizeof(struct sockaddr_in); - } - return 1; - } - } - return 0; -} - -int grpc_sockaddr_to_v4mapped(const grpc_resolved_address *resolved_addr, - grpc_resolved_address *resolved_addr6_out) { - GPR_ASSERT(resolved_addr != resolved_addr6_out); - const struct sockaddr *addr = (const struct sockaddr *)resolved_addr->addr; - struct sockaddr_in6 *addr6_out = - (struct sockaddr_in6 *)resolved_addr6_out->addr; - if (addr->sa_family == AF_INET) { - const struct sockaddr_in *addr4 = (const struct sockaddr_in *)addr; - memset(resolved_addr6_out, 0, sizeof(*resolved_addr6_out)); - addr6_out->sin6_family = AF_INET6; - memcpy(&addr6_out->sin6_addr.s6_addr[0], kV4MappedPrefix, 12); - memcpy(&addr6_out->sin6_addr.s6_addr[12], &addr4->sin_addr, 4); - addr6_out->sin6_port = addr4->sin_port; - resolved_addr6_out->len = sizeof(struct sockaddr_in6); - return 1; - } - return 0; -} - -int grpc_sockaddr_is_wildcard(const grpc_resolved_address *resolved_addr, - int *port_out) { - const struct sockaddr *addr; - grpc_resolved_address addr4_normalized; - if (grpc_sockaddr_is_v4mapped(resolved_addr, &addr4_normalized)) { - resolved_addr = &addr4_normalized; - } - addr = (const struct sockaddr *)resolved_addr->addr; - if (addr->sa_family == AF_INET) { - /* Check for 0.0.0.0 */ - const struct sockaddr_in *addr4 = (const struct sockaddr_in *)addr; - if (addr4->sin_addr.s_addr != 0) { - return 0; - } - *port_out = ntohs(addr4->sin_port); - return 1; - } else if (addr->sa_family == AF_INET6) { - /* Check for :: */ - const struct sockaddr_in6 *addr6 = (const struct sockaddr_in6 *)addr; - int i; - for (i = 0; i < 16; i++) { - if (addr6->sin6_addr.s6_addr[i] != 0) { - return 0; - } - } - *port_out = ntohs(addr6->sin6_port); - return 1; - } else { - return 0; - } -} - -void grpc_sockaddr_make_wildcards(int port, grpc_resolved_address *wild4_out, - grpc_resolved_address *wild6_out) { - grpc_sockaddr_make_wildcard4(port, wild4_out); - grpc_sockaddr_make_wildcard6(port, wild6_out); -} - -void grpc_sockaddr_make_wildcard4(int port, - grpc_resolved_address *resolved_wild_out) { - struct sockaddr_in *wild_out = (struct sockaddr_in *)resolved_wild_out->addr; - GPR_ASSERT(port >= 0 && port < 65536); - memset(resolved_wild_out, 0, sizeof(*resolved_wild_out)); - wild_out->sin_family = AF_INET; - wild_out->sin_port = htons((uint16_t)port); - resolved_wild_out->len = sizeof(struct sockaddr_in); -} - -void grpc_sockaddr_make_wildcard6(int port, - grpc_resolved_address *resolved_wild_out) { - struct sockaddr_in6 *wild_out = - (struct sockaddr_in6 *)resolved_wild_out->addr; - GPR_ASSERT(port >= 0 && port < 65536); - memset(resolved_wild_out, 0, sizeof(*resolved_wild_out)); - wild_out->sin6_family = AF_INET6; - wild_out->sin6_port = htons((uint16_t)port); - resolved_wild_out->len = sizeof(struct sockaddr_in6); -} - -int grpc_sockaddr_to_string(char **out, - const grpc_resolved_address *resolved_addr, - int normalize) { - const struct sockaddr *addr; - const int save_errno = errno; - grpc_resolved_address addr_normalized; - char ntop_buf[INET6_ADDRSTRLEN]; - const void *ip = NULL; - int port; - uint32_t sin6_scope_id = 0; - int ret; - - *out = NULL; - if (normalize && grpc_sockaddr_is_v4mapped(resolved_addr, &addr_normalized)) { - resolved_addr = &addr_normalized; - } - addr = (const struct sockaddr *)resolved_addr->addr; - if (addr->sa_family == AF_INET) { - const struct sockaddr_in *addr4 = (const struct sockaddr_in *)addr; - ip = &addr4->sin_addr; - port = ntohs(addr4->sin_port); - } else if (addr->sa_family == AF_INET6) { - const struct sockaddr_in6 *addr6 = (const struct sockaddr_in6 *)addr; - ip = &addr6->sin6_addr; - port = ntohs(addr6->sin6_port); - sin6_scope_id = addr6->sin6_scope_id; - } - if (ip != NULL && - grpc_inet_ntop(addr->sa_family, ip, ntop_buf, sizeof(ntop_buf)) != NULL) { - if (sin6_scope_id != 0) { - char *host_with_scope; - /* Enclose sin6_scope_id with the format defined in RFC 6784 section 2. */ - gpr_asprintf(&host_with_scope, "%s%%25%" PRIu32, ntop_buf, sin6_scope_id); - ret = gpr_join_host_port(out, host_with_scope, port); - gpr_free(host_with_scope); - } else { - ret = gpr_join_host_port(out, ntop_buf, port); - } - } else { - ret = gpr_asprintf(out, "(sockaddr family=%d)", addr->sa_family); - } - /* This is probably redundant, but we wouldn't want to log the wrong error. */ - errno = save_errno; - return ret; -} - -char *grpc_sockaddr_to_uri(const grpc_resolved_address *resolved_addr) { - grpc_resolved_address addr_normalized; - if (grpc_sockaddr_is_v4mapped(resolved_addr, &addr_normalized)) { - resolved_addr = &addr_normalized; - } - const char *scheme = grpc_sockaddr_get_uri_scheme(resolved_addr); - if (scheme == NULL || strcmp("unix", scheme) == 0) { - return grpc_sockaddr_to_uri_unix_if_possible(resolved_addr); - } - char *path = NULL; - char *uri_str = NULL; - if (grpc_sockaddr_to_string(&path, resolved_addr, - false /* suppress errors */) && - scheme != NULL) { - gpr_asprintf(&uri_str, "%s:%s", scheme, path); - } - gpr_free(path); - return uri_str != NULL ? uri_str : NULL; -} - -const char *grpc_sockaddr_get_uri_scheme( - const grpc_resolved_address *resolved_addr) { - const struct sockaddr *addr = (const struct sockaddr *)resolved_addr->addr; - switch (addr->sa_family) { - case AF_INET: - return "ipv4"; - case AF_INET6: - return "ipv6"; - case AF_UNIX: - return "unix"; - } - return NULL; -} - -int grpc_sockaddr_get_family(const grpc_resolved_address *resolved_addr) { - const struct sockaddr *addr = (const struct sockaddr *)resolved_addr->addr; - return addr->sa_family; -} - -int grpc_sockaddr_get_port(const grpc_resolved_address *resolved_addr) { - const struct sockaddr *addr = (const struct sockaddr *)resolved_addr->addr; - switch (addr->sa_family) { - case AF_INET: - return ntohs(((struct sockaddr_in *)addr)->sin_port); - case AF_INET6: - return ntohs(((struct sockaddr_in6 *)addr)->sin6_port); - default: - if (grpc_is_unix_socket(resolved_addr)) { - return 1; - } - gpr_log(GPR_ERROR, "Unknown socket family %d in grpc_sockaddr_get_port", - addr->sa_family); - return 0; - } -} - -int grpc_sockaddr_set_port(const grpc_resolved_address *resolved_addr, - int port) { - const struct sockaddr *addr = (const struct sockaddr *)resolved_addr->addr; - switch (addr->sa_family) { - case AF_INET: - GPR_ASSERT(port >= 0 && port < 65536); - ((struct sockaddr_in *)addr)->sin_port = htons((uint16_t)port); - return 1; - case AF_INET6: - GPR_ASSERT(port >= 0 && port < 65536); - ((struct sockaddr_in6 *)addr)->sin6_port = htons((uint16_t)port); - return 1; - default: - gpr_log(GPR_ERROR, "Unknown socket family %d in grpc_sockaddr_set_port", - addr->sa_family); - return 0; - } -} diff --git a/src/core/lib/iomgr/sockaddr_utils.cc b/src/core/lib/iomgr/sockaddr_utils.cc new file mode 100644 index 0000000000..3f4145d104 --- /dev/null +++ b/src/core/lib/iomgr/sockaddr_utils.cc @@ -0,0 +1,262 @@ +/* + * + * Copyright 2016 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/sockaddr_utils.h" + +#include +#include + +#include +#include +#include +#include +#include + +#include "src/core/lib/iomgr/sockaddr.h" +#include "src/core/lib/iomgr/socket_utils.h" +#include "src/core/lib/iomgr/unix_sockets_posix.h" +#include "src/core/lib/support/string.h" + +static const uint8_t kV4MappedPrefix[] = {0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0xff, 0xff}; + +int grpc_sockaddr_is_v4mapped(const grpc_resolved_address *resolved_addr, + grpc_resolved_address *resolved_addr4_out) { + GPR_ASSERT(resolved_addr != resolved_addr4_out); + const struct sockaddr *addr = (const struct sockaddr *)resolved_addr->addr; + struct sockaddr_in *addr4_out = + resolved_addr4_out == NULL + ? NULL + : (struct sockaddr_in *)resolved_addr4_out->addr; + if (addr->sa_family == AF_INET6) { + const struct sockaddr_in6 *addr6 = (const struct sockaddr_in6 *)addr; + if (memcmp(addr6->sin6_addr.s6_addr, kV4MappedPrefix, + sizeof(kV4MappedPrefix)) == 0) { + if (resolved_addr4_out != NULL) { + /* Normalize ::ffff:0.0.0.0/96 to IPv4. */ + memset(resolved_addr4_out, 0, sizeof(*resolved_addr4_out)); + addr4_out->sin_family = AF_INET; + /* s6_addr32 would be nice, but it's non-standard. */ + memcpy(&addr4_out->sin_addr, &addr6->sin6_addr.s6_addr[12], 4); + addr4_out->sin_port = addr6->sin6_port; + resolved_addr4_out->len = sizeof(struct sockaddr_in); + } + return 1; + } + } + return 0; +} + +int grpc_sockaddr_to_v4mapped(const grpc_resolved_address *resolved_addr, + grpc_resolved_address *resolved_addr6_out) { + GPR_ASSERT(resolved_addr != resolved_addr6_out); + const struct sockaddr *addr = (const struct sockaddr *)resolved_addr->addr; + struct sockaddr_in6 *addr6_out = + (struct sockaddr_in6 *)resolved_addr6_out->addr; + if (addr->sa_family == AF_INET) { + const struct sockaddr_in *addr4 = (const struct sockaddr_in *)addr; + memset(resolved_addr6_out, 0, sizeof(*resolved_addr6_out)); + addr6_out->sin6_family = AF_INET6; + memcpy(&addr6_out->sin6_addr.s6_addr[0], kV4MappedPrefix, 12); + memcpy(&addr6_out->sin6_addr.s6_addr[12], &addr4->sin_addr, 4); + addr6_out->sin6_port = addr4->sin_port; + resolved_addr6_out->len = sizeof(struct sockaddr_in6); + return 1; + } + return 0; +} + +int grpc_sockaddr_is_wildcard(const grpc_resolved_address *resolved_addr, + int *port_out) { + const struct sockaddr *addr; + grpc_resolved_address addr4_normalized; + if (grpc_sockaddr_is_v4mapped(resolved_addr, &addr4_normalized)) { + resolved_addr = &addr4_normalized; + } + addr = (const struct sockaddr *)resolved_addr->addr; + if (addr->sa_family == AF_INET) { + /* Check for 0.0.0.0 */ + const struct sockaddr_in *addr4 = (const struct sockaddr_in *)addr; + if (addr4->sin_addr.s_addr != 0) { + return 0; + } + *port_out = ntohs(addr4->sin_port); + return 1; + } else if (addr->sa_family == AF_INET6) { + /* Check for :: */ + const struct sockaddr_in6 *addr6 = (const struct sockaddr_in6 *)addr; + int i; + for (i = 0; i < 16; i++) { + if (addr6->sin6_addr.s6_addr[i] != 0) { + return 0; + } + } + *port_out = ntohs(addr6->sin6_port); + return 1; + } else { + return 0; + } +} + +void grpc_sockaddr_make_wildcards(int port, grpc_resolved_address *wild4_out, + grpc_resolved_address *wild6_out) { + grpc_sockaddr_make_wildcard4(port, wild4_out); + grpc_sockaddr_make_wildcard6(port, wild6_out); +} + +void grpc_sockaddr_make_wildcard4(int port, + grpc_resolved_address *resolved_wild_out) { + struct sockaddr_in *wild_out = (struct sockaddr_in *)resolved_wild_out->addr; + GPR_ASSERT(port >= 0 && port < 65536); + memset(resolved_wild_out, 0, sizeof(*resolved_wild_out)); + wild_out->sin_family = AF_INET; + wild_out->sin_port = htons((uint16_t)port); + resolved_wild_out->len = sizeof(struct sockaddr_in); +} + +void grpc_sockaddr_make_wildcard6(int port, + grpc_resolved_address *resolved_wild_out) { + struct sockaddr_in6 *wild_out = + (struct sockaddr_in6 *)resolved_wild_out->addr; + GPR_ASSERT(port >= 0 && port < 65536); + memset(resolved_wild_out, 0, sizeof(*resolved_wild_out)); + wild_out->sin6_family = AF_INET6; + wild_out->sin6_port = htons((uint16_t)port); + resolved_wild_out->len = sizeof(struct sockaddr_in6); +} + +int grpc_sockaddr_to_string(char **out, + const grpc_resolved_address *resolved_addr, + int normalize) { + const struct sockaddr *addr; + const int save_errno = errno; + grpc_resolved_address addr_normalized; + char ntop_buf[INET6_ADDRSTRLEN]; + const void *ip = NULL; + int port; + uint32_t sin6_scope_id = 0; + int ret; + + *out = NULL; + if (normalize && grpc_sockaddr_is_v4mapped(resolved_addr, &addr_normalized)) { + resolved_addr = &addr_normalized; + } + addr = (const struct sockaddr *)resolved_addr->addr; + if (addr->sa_family == AF_INET) { + const struct sockaddr_in *addr4 = (const struct sockaddr_in *)addr; + ip = &addr4->sin_addr; + port = ntohs(addr4->sin_port); + } else if (addr->sa_family == AF_INET6) { + const struct sockaddr_in6 *addr6 = (const struct sockaddr_in6 *)addr; + ip = &addr6->sin6_addr; + port = ntohs(addr6->sin6_port); + sin6_scope_id = addr6->sin6_scope_id; + } + if (ip != NULL && + grpc_inet_ntop(addr->sa_family, ip, ntop_buf, sizeof(ntop_buf)) != NULL) { + if (sin6_scope_id != 0) { + char *host_with_scope; + /* Enclose sin6_scope_id with the format defined in RFC 6784 section 2. */ + gpr_asprintf(&host_with_scope, "%s%%25%" PRIu32, ntop_buf, sin6_scope_id); + ret = gpr_join_host_port(out, host_with_scope, port); + gpr_free(host_with_scope); + } else { + ret = gpr_join_host_port(out, ntop_buf, port); + } + } else { + ret = gpr_asprintf(out, "(sockaddr family=%d)", addr->sa_family); + } + /* This is probably redundant, but we wouldn't want to log the wrong error. */ + errno = save_errno; + return ret; +} + +char *grpc_sockaddr_to_uri(const grpc_resolved_address *resolved_addr) { + grpc_resolved_address addr_normalized; + if (grpc_sockaddr_is_v4mapped(resolved_addr, &addr_normalized)) { + resolved_addr = &addr_normalized; + } + const char *scheme = grpc_sockaddr_get_uri_scheme(resolved_addr); + if (scheme == NULL || strcmp("unix", scheme) == 0) { + return grpc_sockaddr_to_uri_unix_if_possible(resolved_addr); + } + char *path = NULL; + char *uri_str = NULL; + if (grpc_sockaddr_to_string(&path, resolved_addr, + false /* suppress errors */) && + scheme != NULL) { + gpr_asprintf(&uri_str, "%s:%s", scheme, path); + } + gpr_free(path); + return uri_str != NULL ? uri_str : NULL; +} + +const char *grpc_sockaddr_get_uri_scheme( + const grpc_resolved_address *resolved_addr) { + const struct sockaddr *addr = (const struct sockaddr *)resolved_addr->addr; + switch (addr->sa_family) { + case AF_INET: + return "ipv4"; + case AF_INET6: + return "ipv6"; + case AF_UNIX: + return "unix"; + } + return NULL; +} + +int grpc_sockaddr_get_family(const grpc_resolved_address *resolved_addr) { + const struct sockaddr *addr = (const struct sockaddr *)resolved_addr->addr; + return addr->sa_family; +} + +int grpc_sockaddr_get_port(const grpc_resolved_address *resolved_addr) { + const struct sockaddr *addr = (const struct sockaddr *)resolved_addr->addr; + switch (addr->sa_family) { + case AF_INET: + return ntohs(((struct sockaddr_in *)addr)->sin_port); + case AF_INET6: + return ntohs(((struct sockaddr_in6 *)addr)->sin6_port); + default: + if (grpc_is_unix_socket(resolved_addr)) { + return 1; + } + gpr_log(GPR_ERROR, "Unknown socket family %d in grpc_sockaddr_get_port", + addr->sa_family); + return 0; + } +} + +int grpc_sockaddr_set_port(const grpc_resolved_address *resolved_addr, + int port) { + const struct sockaddr *addr = (const struct sockaddr *)resolved_addr->addr; + switch (addr->sa_family) { + case AF_INET: + GPR_ASSERT(port >= 0 && port < 65536); + ((struct sockaddr_in *)addr)->sin_port = htons((uint16_t)port); + return 1; + case AF_INET6: + GPR_ASSERT(port >= 0 && port < 65536); + ((struct sockaddr_in6 *)addr)->sin6_port = htons((uint16_t)port); + return 1; + default: + gpr_log(GPR_ERROR, "Unknown socket family %d in grpc_sockaddr_set_port", + addr->sa_family); + return 0; + } +} diff --git a/src/core/lib/iomgr/socket_factory_posix.c b/src/core/lib/iomgr/socket_factory_posix.c deleted file mode 100644 index 8e907703ae..0000000000 --- a/src/core/lib/iomgr/socket_factory_posix.c +++ /dev/null @@ -1,92 +0,0 @@ -/* - * - * Copyright 2017 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_POSIX_SOCKET - -#include "src/core/lib/channel/channel_args.h" -#include "src/core/lib/iomgr/socket_factory_posix.h" - -#include -#include -#include - -void grpc_socket_factory_init(grpc_socket_factory *factory, - const grpc_socket_factory_vtable *vtable) { - factory->vtable = vtable; - gpr_ref_init(&factory->refcount, 1); -} - -int grpc_socket_factory_socket(grpc_socket_factory *factory, int domain, - int type, int protocol) { - return factory->vtable->socket(factory, domain, type, protocol); -} - -int grpc_socket_factory_bind(grpc_socket_factory *factory, int sockfd, - const grpc_resolved_address *addr) { - return factory->vtable->bind(factory, sockfd, addr); -} - -int grpc_socket_factory_compare(grpc_socket_factory *a, - grpc_socket_factory *b) { - int c = GPR_ICMP(a, b); - if (c != 0) { - grpc_socket_factory *sma = a; - grpc_socket_factory *smb = b; - c = GPR_ICMP(sma->vtable, smb->vtable); - if (c == 0) { - c = sma->vtable->compare(sma, smb); - } - } - return c; -} - -grpc_socket_factory *grpc_socket_factory_ref(grpc_socket_factory *factory) { - gpr_ref(&factory->refcount); - return factory; -} - -void grpc_socket_factory_unref(grpc_socket_factory *factory) { - if (gpr_unref(&factory->refcount)) { - factory->vtable->destroy(factory); - } -} - -static void *socket_factory_arg_copy(void *p) { - return grpc_socket_factory_ref((grpc_socket_factory *)p); -} - -static void socket_factory_arg_destroy(grpc_exec_ctx *exec_ctx, void *p) { - grpc_socket_factory_unref((grpc_socket_factory *)p); -} - -static int socket_factory_cmp(void *a, void *b) { - return grpc_socket_factory_compare((grpc_socket_factory *)a, - (grpc_socket_factory *)b); -} - -static const grpc_arg_pointer_vtable socket_factory_arg_vtable = { - socket_factory_arg_copy, socket_factory_arg_destroy, socket_factory_cmp}; - -grpc_arg grpc_socket_factory_to_arg(grpc_socket_factory *factory) { - return grpc_channel_arg_pointer_create((char *)GRPC_ARG_SOCKET_FACTORY, - factory, &socket_factory_arg_vtable); -} - -#endif diff --git a/src/core/lib/iomgr/socket_factory_posix.cc b/src/core/lib/iomgr/socket_factory_posix.cc new file mode 100644 index 0000000000..8e907703ae --- /dev/null +++ b/src/core/lib/iomgr/socket_factory_posix.cc @@ -0,0 +1,92 @@ +/* + * + * Copyright 2017 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_POSIX_SOCKET + +#include "src/core/lib/channel/channel_args.h" +#include "src/core/lib/iomgr/socket_factory_posix.h" + +#include +#include +#include + +void grpc_socket_factory_init(grpc_socket_factory *factory, + const grpc_socket_factory_vtable *vtable) { + factory->vtable = vtable; + gpr_ref_init(&factory->refcount, 1); +} + +int grpc_socket_factory_socket(grpc_socket_factory *factory, int domain, + int type, int protocol) { + return factory->vtable->socket(factory, domain, type, protocol); +} + +int grpc_socket_factory_bind(grpc_socket_factory *factory, int sockfd, + const grpc_resolved_address *addr) { + return factory->vtable->bind(factory, sockfd, addr); +} + +int grpc_socket_factory_compare(grpc_socket_factory *a, + grpc_socket_factory *b) { + int c = GPR_ICMP(a, b); + if (c != 0) { + grpc_socket_factory *sma = a; + grpc_socket_factory *smb = b; + c = GPR_ICMP(sma->vtable, smb->vtable); + if (c == 0) { + c = sma->vtable->compare(sma, smb); + } + } + return c; +} + +grpc_socket_factory *grpc_socket_factory_ref(grpc_socket_factory *factory) { + gpr_ref(&factory->refcount); + return factory; +} + +void grpc_socket_factory_unref(grpc_socket_factory *factory) { + if (gpr_unref(&factory->refcount)) { + factory->vtable->destroy(factory); + } +} + +static void *socket_factory_arg_copy(void *p) { + return grpc_socket_factory_ref((grpc_socket_factory *)p); +} + +static void socket_factory_arg_destroy(grpc_exec_ctx *exec_ctx, void *p) { + grpc_socket_factory_unref((grpc_socket_factory *)p); +} + +static int socket_factory_cmp(void *a, void *b) { + return grpc_socket_factory_compare((grpc_socket_factory *)a, + (grpc_socket_factory *)b); +} + +static const grpc_arg_pointer_vtable socket_factory_arg_vtable = { + socket_factory_arg_copy, socket_factory_arg_destroy, socket_factory_cmp}; + +grpc_arg grpc_socket_factory_to_arg(grpc_socket_factory *factory) { + return grpc_channel_arg_pointer_create((char *)GRPC_ARG_SOCKET_FACTORY, + factory, &socket_factory_arg_vtable); +} + +#endif diff --git a/src/core/lib/iomgr/socket_mutator.c b/src/core/lib/iomgr/socket_mutator.c deleted file mode 100644 index b0435d5a07..0000000000 --- a/src/core/lib/iomgr/socket_mutator.c +++ /dev/null @@ -1,81 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/socket_mutator.h" - -#include "src/core/lib/channel/channel_args.h" - -#include -#include -#include - -void grpc_socket_mutator_init(grpc_socket_mutator *mutator, - const grpc_socket_mutator_vtable *vtable) { - mutator->vtable = vtable; - gpr_ref_init(&mutator->refcount, 1); -} - -grpc_socket_mutator *grpc_socket_mutator_ref(grpc_socket_mutator *mutator) { - gpr_ref(&mutator->refcount); - return mutator; -} - -bool grpc_socket_mutator_mutate_fd(grpc_socket_mutator *mutator, int fd) { - return mutator->vtable->mutate_fd(fd, mutator); -} - -int grpc_socket_mutator_compare(grpc_socket_mutator *a, - grpc_socket_mutator *b) { - int c = GPR_ICMP(a, b); - if (c != 0) { - grpc_socket_mutator *sma = a; - grpc_socket_mutator *smb = b; - c = GPR_ICMP(sma->vtable, smb->vtable); - if (c == 0) { - c = sma->vtable->compare(sma, smb); - } - } - return c; -} - -void grpc_socket_mutator_unref(grpc_socket_mutator *mutator) { - if (gpr_unref(&mutator->refcount)) { - mutator->vtable->destory(mutator); - } -} - -static void *socket_mutator_arg_copy(void *p) { - return grpc_socket_mutator_ref((grpc_socket_mutator *)p); -} - -static void socket_mutator_arg_destroy(grpc_exec_ctx *exec_ctx, void *p) { - grpc_socket_mutator_unref((grpc_socket_mutator *)p); -} - -static int socket_mutator_cmp(void *a, void *b) { - return grpc_socket_mutator_compare((grpc_socket_mutator *)a, - (grpc_socket_mutator *)b); -} - -static const grpc_arg_pointer_vtable socket_mutator_arg_vtable = { - socket_mutator_arg_copy, socket_mutator_arg_destroy, socket_mutator_cmp}; - -grpc_arg grpc_socket_mutator_to_arg(grpc_socket_mutator *mutator) { - return grpc_channel_arg_pointer_create((char *)GRPC_ARG_SOCKET_MUTATOR, - mutator, &socket_mutator_arg_vtable); -} diff --git a/src/core/lib/iomgr/socket_mutator.cc b/src/core/lib/iomgr/socket_mutator.cc new file mode 100644 index 0000000000..b0435d5a07 --- /dev/null +++ b/src/core/lib/iomgr/socket_mutator.cc @@ -0,0 +1,81 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/socket_mutator.h" + +#include "src/core/lib/channel/channel_args.h" + +#include +#include +#include + +void grpc_socket_mutator_init(grpc_socket_mutator *mutator, + const grpc_socket_mutator_vtable *vtable) { + mutator->vtable = vtable; + gpr_ref_init(&mutator->refcount, 1); +} + +grpc_socket_mutator *grpc_socket_mutator_ref(grpc_socket_mutator *mutator) { + gpr_ref(&mutator->refcount); + return mutator; +} + +bool grpc_socket_mutator_mutate_fd(grpc_socket_mutator *mutator, int fd) { + return mutator->vtable->mutate_fd(fd, mutator); +} + +int grpc_socket_mutator_compare(grpc_socket_mutator *a, + grpc_socket_mutator *b) { + int c = GPR_ICMP(a, b); + if (c != 0) { + grpc_socket_mutator *sma = a; + grpc_socket_mutator *smb = b; + c = GPR_ICMP(sma->vtable, smb->vtable); + if (c == 0) { + c = sma->vtable->compare(sma, smb); + } + } + return c; +} + +void grpc_socket_mutator_unref(grpc_socket_mutator *mutator) { + if (gpr_unref(&mutator->refcount)) { + mutator->vtable->destory(mutator); + } +} + +static void *socket_mutator_arg_copy(void *p) { + return grpc_socket_mutator_ref((grpc_socket_mutator *)p); +} + +static void socket_mutator_arg_destroy(grpc_exec_ctx *exec_ctx, void *p) { + grpc_socket_mutator_unref((grpc_socket_mutator *)p); +} + +static int socket_mutator_cmp(void *a, void *b) { + return grpc_socket_mutator_compare((grpc_socket_mutator *)a, + (grpc_socket_mutator *)b); +} + +static const grpc_arg_pointer_vtable socket_mutator_arg_vtable = { + socket_mutator_arg_copy, socket_mutator_arg_destroy, socket_mutator_cmp}; + +grpc_arg grpc_socket_mutator_to_arg(grpc_socket_mutator *mutator) { + return grpc_channel_arg_pointer_create((char *)GRPC_ARG_SOCKET_MUTATOR, + mutator, &socket_mutator_arg_vtable); +} diff --git a/src/core/lib/iomgr/socket_utils_common_posix.c b/src/core/lib/iomgr/socket_utils_common_posix.c deleted file mode 100644 index b8e2a0cdfd..0000000000 --- a/src/core/lib/iomgr/socket_utils_common_posix.c +++ /dev/null @@ -1,315 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_POSIX_SOCKET - -#include "src/core/lib/iomgr/socket_utils.h" -#include "src/core/lib/iomgr/socket_utils_posix.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include "src/core/lib/iomgr/sockaddr_utils.h" -#include "src/core/lib/support/string.h" - -/* set a socket to non blocking mode */ -grpc_error *grpc_set_socket_nonblocking(int fd, int non_blocking) { - int oldflags = fcntl(fd, F_GETFL, 0); - if (oldflags < 0) { - return GRPC_OS_ERROR(errno, "fcntl"); - } - - if (non_blocking) { - oldflags |= O_NONBLOCK; - } else { - oldflags &= ~O_NONBLOCK; - } - - if (fcntl(fd, F_SETFL, oldflags) != 0) { - return GRPC_OS_ERROR(errno, "fcntl"); - } - - return GRPC_ERROR_NONE; -} - -grpc_error *grpc_set_socket_no_sigpipe_if_possible(int fd) { -#ifdef GRPC_HAVE_SO_NOSIGPIPE - int val = 1; - int newval; - socklen_t intlen = sizeof(newval); - if (0 != setsockopt(fd, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof(val))) { - return GRPC_OS_ERROR(errno, "setsockopt(SO_NOSIGPIPE)"); - } - if (0 != getsockopt(fd, SOL_SOCKET, SO_NOSIGPIPE, &newval, &intlen)) { - return GRPC_OS_ERROR(errno, "getsockopt(SO_NOSIGPIPE)"); - } - if ((newval != 0) != (val != 0)) { - return GRPC_ERROR_CREATE_FROM_STATIC_STRING("Failed to set SO_NOSIGPIPE"); - } -#endif - return GRPC_ERROR_NONE; -} - -grpc_error *grpc_set_socket_ip_pktinfo_if_possible(int fd) { -#ifdef GRPC_HAVE_IP_PKTINFO - int get_local_ip = 1; - if (0 != setsockopt(fd, IPPROTO_IP, IP_PKTINFO, &get_local_ip, - sizeof(get_local_ip))) { - return GRPC_OS_ERROR(errno, "setsockopt(IP_PKTINFO)"); - } -#endif - return GRPC_ERROR_NONE; -} - -grpc_error *grpc_set_socket_ipv6_recvpktinfo_if_possible(int fd) { -#ifdef GRPC_HAVE_IPV6_RECVPKTINFO - int get_local_ip = 1; - if (0 != setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &get_local_ip, - sizeof(get_local_ip))) { - return GRPC_OS_ERROR(errno, "setsockopt(IPV6_RECVPKTINFO)"); - } -#endif - return GRPC_ERROR_NONE; -} - -grpc_error *grpc_set_socket_sndbuf(int fd, int buffer_size_bytes) { - return 0 == setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buffer_size_bytes, - sizeof(buffer_size_bytes)) - ? GRPC_ERROR_NONE - : GRPC_OS_ERROR(errno, "setsockopt(SO_SNDBUF)"); -} - -grpc_error *grpc_set_socket_rcvbuf(int fd, int buffer_size_bytes) { - return 0 == setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &buffer_size_bytes, - sizeof(buffer_size_bytes)) - ? GRPC_ERROR_NONE - : GRPC_OS_ERROR(errno, "setsockopt(SO_RCVBUF)"); -} - -/* set a socket to close on exec */ -grpc_error *grpc_set_socket_cloexec(int fd, int close_on_exec) { - int oldflags = fcntl(fd, F_GETFD, 0); - if (oldflags < 0) { - return GRPC_OS_ERROR(errno, "fcntl"); - } - - if (close_on_exec) { - oldflags |= FD_CLOEXEC; - } else { - oldflags &= ~FD_CLOEXEC; - } - - if (fcntl(fd, F_SETFD, oldflags) != 0) { - return GRPC_OS_ERROR(errno, "fcntl"); - } - - return GRPC_ERROR_NONE; -} - -/* set a socket to reuse old addresses */ -grpc_error *grpc_set_socket_reuse_addr(int fd, int reuse) { - int val = (reuse != 0); - int newval; - socklen_t intlen = sizeof(newval); - if (0 != setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val))) { - return GRPC_OS_ERROR(errno, "setsockopt(SO_REUSEADDR)"); - } - if (0 != getsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &newval, &intlen)) { - return GRPC_OS_ERROR(errno, "getsockopt(SO_REUSEADDR)"); - } - if ((newval != 0) != val) { - return GRPC_ERROR_CREATE_FROM_STATIC_STRING("Failed to set SO_REUSEADDR"); - } - - return GRPC_ERROR_NONE; -} - -/* set a socket to reuse old addresses */ -grpc_error *grpc_set_socket_reuse_port(int fd, int reuse) { -#ifndef SO_REUSEPORT - return GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "SO_REUSEPORT unavailable on compiling system"); -#else - int val = (reuse != 0); - int newval; - socklen_t intlen = sizeof(newval); - if (0 != setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val))) { - return GRPC_OS_ERROR(errno, "setsockopt(SO_REUSEPORT)"); - } - if (0 != getsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &newval, &intlen)) { - return GRPC_OS_ERROR(errno, "getsockopt(SO_REUSEPORT)"); - } - if ((newval != 0) != val) { - return GRPC_ERROR_CREATE_FROM_STATIC_STRING("Failed to set SO_REUSEPORT"); - } - - return GRPC_ERROR_NONE; -#endif -} - -/* disable nagle */ -grpc_error *grpc_set_socket_low_latency(int fd, int low_latency) { - int val = (low_latency != 0); - int newval; - socklen_t intlen = sizeof(newval); - if (0 != setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val))) { - return GRPC_OS_ERROR(errno, "setsockopt(TCP_NODELAY)"); - } - if (0 != getsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &newval, &intlen)) { - return GRPC_OS_ERROR(errno, "getsockopt(TCP_NODELAY)"); - } - if ((newval != 0) != val) { - return GRPC_ERROR_CREATE_FROM_STATIC_STRING("Failed to set TCP_NODELAY"); - } - return GRPC_ERROR_NONE; -} - -/* set a socket using a grpc_socket_mutator */ -grpc_error *grpc_set_socket_with_mutator(int fd, grpc_socket_mutator *mutator) { - GPR_ASSERT(mutator); - if (!grpc_socket_mutator_mutate_fd(mutator, fd)) { - return GRPC_ERROR_CREATE_FROM_STATIC_STRING("grpc_socket_mutator failed."); - } - return GRPC_ERROR_NONE; -} - -static gpr_once g_probe_ipv6_once = GPR_ONCE_INIT; -static int g_ipv6_loopback_available; - -static void probe_ipv6_once(void) { - int fd = socket(AF_INET6, SOCK_STREAM, 0); - g_ipv6_loopback_available = 0; - if (fd < 0) { - gpr_log(GPR_INFO, "Disabling AF_INET6 sockets because socket() failed."); - } else { - struct sockaddr_in6 addr; - memset(&addr, 0, sizeof(addr)); - addr.sin6_family = AF_INET6; - addr.sin6_addr.s6_addr[15] = 1; /* [::1]:0 */ - if (bind(fd, (struct sockaddr *)&addr, sizeof(addr)) == 0) { - g_ipv6_loopback_available = 1; - } else { - gpr_log(GPR_INFO, - "Disabling AF_INET6 sockets because ::1 is not available."); - } - close(fd); - } -} - -int grpc_ipv6_loopback_available(void) { - gpr_once_init(&g_probe_ipv6_once, probe_ipv6_once); - return g_ipv6_loopback_available; -} - -/* This should be 0 in production, but it may be enabled for testing or - debugging purposes, to simulate an environment where IPv6 sockets can't - also speak IPv4. */ -int grpc_forbid_dualstack_sockets_for_testing = 0; - -static int set_socket_dualstack(int fd) { - if (!grpc_forbid_dualstack_sockets_for_testing) { - const int off = 0; - return 0 == setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &off, sizeof(off)); - } else { - /* Force an IPv6-only socket, for testing purposes. */ - const int on = 1; - setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on)); - return 0; - } -} - -static grpc_error *error_for_fd(int fd, const grpc_resolved_address *addr) { - if (fd >= 0) return GRPC_ERROR_NONE; - char *addr_str; - grpc_sockaddr_to_string(&addr_str, addr, 0); - grpc_error *err = grpc_error_set_str(GRPC_OS_ERROR(errno, "socket"), - GRPC_ERROR_STR_TARGET_ADDRESS, - grpc_slice_from_copied_string(addr_str)); - gpr_free(addr_str); - return err; -} - -grpc_error *grpc_create_dualstack_socket( - const grpc_resolved_address *resolved_addr, int type, int protocol, - grpc_dualstack_mode *dsmode, int *newfd) { - return grpc_create_dualstack_socket_using_factory(NULL, resolved_addr, type, - protocol, dsmode, newfd); -} - -static int create_socket(grpc_socket_factory *factory, int domain, int type, - int protocol) { - return (factory != NULL) - ? grpc_socket_factory_socket(factory, domain, type, protocol) - : socket(domain, type, protocol); -} - -grpc_error *grpc_create_dualstack_socket_using_factory( - grpc_socket_factory *factory, const grpc_resolved_address *resolved_addr, - int type, int protocol, grpc_dualstack_mode *dsmode, int *newfd) { - const struct sockaddr *addr = (const struct sockaddr *)resolved_addr->addr; - int family = addr->sa_family; - if (family == AF_INET6) { - if (grpc_ipv6_loopback_available()) { - *newfd = create_socket(factory, family, type, protocol); - } else { - *newfd = -1; - errno = EAFNOSUPPORT; - } - /* Check if we've got a valid dualstack socket. */ - if (*newfd >= 0 && set_socket_dualstack(*newfd)) { - *dsmode = GRPC_DSMODE_DUALSTACK; - return GRPC_ERROR_NONE; - } - /* If this isn't an IPv4 address, then return whatever we've got. */ - if (!grpc_sockaddr_is_v4mapped(resolved_addr, NULL)) { - *dsmode = GRPC_DSMODE_IPV6; - return error_for_fd(*newfd, resolved_addr); - } - /* Fall back to AF_INET. */ - if (*newfd >= 0) { - close(*newfd); - } - family = AF_INET; - } - *dsmode = family == AF_INET ? GRPC_DSMODE_IPV4 : GRPC_DSMODE_NONE; - *newfd = create_socket(factory, family, type, protocol); - return error_for_fd(*newfd, resolved_addr); -} - -const char *grpc_inet_ntop(int af, const void *src, char *dst, size_t size) { - GPR_ASSERT(size <= (socklen_t)-1); - return inet_ntop(af, src, dst, (socklen_t)size); -} - -#endif diff --git a/src/core/lib/iomgr/socket_utils_common_posix.cc b/src/core/lib/iomgr/socket_utils_common_posix.cc new file mode 100644 index 0000000000..b8e2a0cdfd --- /dev/null +++ b/src/core/lib/iomgr/socket_utils_common_posix.cc @@ -0,0 +1,315 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_POSIX_SOCKET + +#include "src/core/lib/iomgr/socket_utils.h" +#include "src/core/lib/iomgr/socket_utils_posix.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include "src/core/lib/iomgr/sockaddr_utils.h" +#include "src/core/lib/support/string.h" + +/* set a socket to non blocking mode */ +grpc_error *grpc_set_socket_nonblocking(int fd, int non_blocking) { + int oldflags = fcntl(fd, F_GETFL, 0); + if (oldflags < 0) { + return GRPC_OS_ERROR(errno, "fcntl"); + } + + if (non_blocking) { + oldflags |= O_NONBLOCK; + } else { + oldflags &= ~O_NONBLOCK; + } + + if (fcntl(fd, F_SETFL, oldflags) != 0) { + return GRPC_OS_ERROR(errno, "fcntl"); + } + + return GRPC_ERROR_NONE; +} + +grpc_error *grpc_set_socket_no_sigpipe_if_possible(int fd) { +#ifdef GRPC_HAVE_SO_NOSIGPIPE + int val = 1; + int newval; + socklen_t intlen = sizeof(newval); + if (0 != setsockopt(fd, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof(val))) { + return GRPC_OS_ERROR(errno, "setsockopt(SO_NOSIGPIPE)"); + } + if (0 != getsockopt(fd, SOL_SOCKET, SO_NOSIGPIPE, &newval, &intlen)) { + return GRPC_OS_ERROR(errno, "getsockopt(SO_NOSIGPIPE)"); + } + if ((newval != 0) != (val != 0)) { + return GRPC_ERROR_CREATE_FROM_STATIC_STRING("Failed to set SO_NOSIGPIPE"); + } +#endif + return GRPC_ERROR_NONE; +} + +grpc_error *grpc_set_socket_ip_pktinfo_if_possible(int fd) { +#ifdef GRPC_HAVE_IP_PKTINFO + int get_local_ip = 1; + if (0 != setsockopt(fd, IPPROTO_IP, IP_PKTINFO, &get_local_ip, + sizeof(get_local_ip))) { + return GRPC_OS_ERROR(errno, "setsockopt(IP_PKTINFO)"); + } +#endif + return GRPC_ERROR_NONE; +} + +grpc_error *grpc_set_socket_ipv6_recvpktinfo_if_possible(int fd) { +#ifdef GRPC_HAVE_IPV6_RECVPKTINFO + int get_local_ip = 1; + if (0 != setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &get_local_ip, + sizeof(get_local_ip))) { + return GRPC_OS_ERROR(errno, "setsockopt(IPV6_RECVPKTINFO)"); + } +#endif + return GRPC_ERROR_NONE; +} + +grpc_error *grpc_set_socket_sndbuf(int fd, int buffer_size_bytes) { + return 0 == setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buffer_size_bytes, + sizeof(buffer_size_bytes)) + ? GRPC_ERROR_NONE + : GRPC_OS_ERROR(errno, "setsockopt(SO_SNDBUF)"); +} + +grpc_error *grpc_set_socket_rcvbuf(int fd, int buffer_size_bytes) { + return 0 == setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &buffer_size_bytes, + sizeof(buffer_size_bytes)) + ? GRPC_ERROR_NONE + : GRPC_OS_ERROR(errno, "setsockopt(SO_RCVBUF)"); +} + +/* set a socket to close on exec */ +grpc_error *grpc_set_socket_cloexec(int fd, int close_on_exec) { + int oldflags = fcntl(fd, F_GETFD, 0); + if (oldflags < 0) { + return GRPC_OS_ERROR(errno, "fcntl"); + } + + if (close_on_exec) { + oldflags |= FD_CLOEXEC; + } else { + oldflags &= ~FD_CLOEXEC; + } + + if (fcntl(fd, F_SETFD, oldflags) != 0) { + return GRPC_OS_ERROR(errno, "fcntl"); + } + + return GRPC_ERROR_NONE; +} + +/* set a socket to reuse old addresses */ +grpc_error *grpc_set_socket_reuse_addr(int fd, int reuse) { + int val = (reuse != 0); + int newval; + socklen_t intlen = sizeof(newval); + if (0 != setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val))) { + return GRPC_OS_ERROR(errno, "setsockopt(SO_REUSEADDR)"); + } + if (0 != getsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &newval, &intlen)) { + return GRPC_OS_ERROR(errno, "getsockopt(SO_REUSEADDR)"); + } + if ((newval != 0) != val) { + return GRPC_ERROR_CREATE_FROM_STATIC_STRING("Failed to set SO_REUSEADDR"); + } + + return GRPC_ERROR_NONE; +} + +/* set a socket to reuse old addresses */ +grpc_error *grpc_set_socket_reuse_port(int fd, int reuse) { +#ifndef SO_REUSEPORT + return GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "SO_REUSEPORT unavailable on compiling system"); +#else + int val = (reuse != 0); + int newval; + socklen_t intlen = sizeof(newval); + if (0 != setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val))) { + return GRPC_OS_ERROR(errno, "setsockopt(SO_REUSEPORT)"); + } + if (0 != getsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &newval, &intlen)) { + return GRPC_OS_ERROR(errno, "getsockopt(SO_REUSEPORT)"); + } + if ((newval != 0) != val) { + return GRPC_ERROR_CREATE_FROM_STATIC_STRING("Failed to set SO_REUSEPORT"); + } + + return GRPC_ERROR_NONE; +#endif +} + +/* disable nagle */ +grpc_error *grpc_set_socket_low_latency(int fd, int low_latency) { + int val = (low_latency != 0); + int newval; + socklen_t intlen = sizeof(newval); + if (0 != setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val))) { + return GRPC_OS_ERROR(errno, "setsockopt(TCP_NODELAY)"); + } + if (0 != getsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &newval, &intlen)) { + return GRPC_OS_ERROR(errno, "getsockopt(TCP_NODELAY)"); + } + if ((newval != 0) != val) { + return GRPC_ERROR_CREATE_FROM_STATIC_STRING("Failed to set TCP_NODELAY"); + } + return GRPC_ERROR_NONE; +} + +/* set a socket using a grpc_socket_mutator */ +grpc_error *grpc_set_socket_with_mutator(int fd, grpc_socket_mutator *mutator) { + GPR_ASSERT(mutator); + if (!grpc_socket_mutator_mutate_fd(mutator, fd)) { + return GRPC_ERROR_CREATE_FROM_STATIC_STRING("grpc_socket_mutator failed."); + } + return GRPC_ERROR_NONE; +} + +static gpr_once g_probe_ipv6_once = GPR_ONCE_INIT; +static int g_ipv6_loopback_available; + +static void probe_ipv6_once(void) { + int fd = socket(AF_INET6, SOCK_STREAM, 0); + g_ipv6_loopback_available = 0; + if (fd < 0) { + gpr_log(GPR_INFO, "Disabling AF_INET6 sockets because socket() failed."); + } else { + struct sockaddr_in6 addr; + memset(&addr, 0, sizeof(addr)); + addr.sin6_family = AF_INET6; + addr.sin6_addr.s6_addr[15] = 1; /* [::1]:0 */ + if (bind(fd, (struct sockaddr *)&addr, sizeof(addr)) == 0) { + g_ipv6_loopback_available = 1; + } else { + gpr_log(GPR_INFO, + "Disabling AF_INET6 sockets because ::1 is not available."); + } + close(fd); + } +} + +int grpc_ipv6_loopback_available(void) { + gpr_once_init(&g_probe_ipv6_once, probe_ipv6_once); + return g_ipv6_loopback_available; +} + +/* This should be 0 in production, but it may be enabled for testing or + debugging purposes, to simulate an environment where IPv6 sockets can't + also speak IPv4. */ +int grpc_forbid_dualstack_sockets_for_testing = 0; + +static int set_socket_dualstack(int fd) { + if (!grpc_forbid_dualstack_sockets_for_testing) { + const int off = 0; + return 0 == setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &off, sizeof(off)); + } else { + /* Force an IPv6-only socket, for testing purposes. */ + const int on = 1; + setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on)); + return 0; + } +} + +static grpc_error *error_for_fd(int fd, const grpc_resolved_address *addr) { + if (fd >= 0) return GRPC_ERROR_NONE; + char *addr_str; + grpc_sockaddr_to_string(&addr_str, addr, 0); + grpc_error *err = grpc_error_set_str(GRPC_OS_ERROR(errno, "socket"), + GRPC_ERROR_STR_TARGET_ADDRESS, + grpc_slice_from_copied_string(addr_str)); + gpr_free(addr_str); + return err; +} + +grpc_error *grpc_create_dualstack_socket( + const grpc_resolved_address *resolved_addr, int type, int protocol, + grpc_dualstack_mode *dsmode, int *newfd) { + return grpc_create_dualstack_socket_using_factory(NULL, resolved_addr, type, + protocol, dsmode, newfd); +} + +static int create_socket(grpc_socket_factory *factory, int domain, int type, + int protocol) { + return (factory != NULL) + ? grpc_socket_factory_socket(factory, domain, type, protocol) + : socket(domain, type, protocol); +} + +grpc_error *grpc_create_dualstack_socket_using_factory( + grpc_socket_factory *factory, const grpc_resolved_address *resolved_addr, + int type, int protocol, grpc_dualstack_mode *dsmode, int *newfd) { + const struct sockaddr *addr = (const struct sockaddr *)resolved_addr->addr; + int family = addr->sa_family; + if (family == AF_INET6) { + if (grpc_ipv6_loopback_available()) { + *newfd = create_socket(factory, family, type, protocol); + } else { + *newfd = -1; + errno = EAFNOSUPPORT; + } + /* Check if we've got a valid dualstack socket. */ + if (*newfd >= 0 && set_socket_dualstack(*newfd)) { + *dsmode = GRPC_DSMODE_DUALSTACK; + return GRPC_ERROR_NONE; + } + /* If this isn't an IPv4 address, then return whatever we've got. */ + if (!grpc_sockaddr_is_v4mapped(resolved_addr, NULL)) { + *dsmode = GRPC_DSMODE_IPV6; + return error_for_fd(*newfd, resolved_addr); + } + /* Fall back to AF_INET. */ + if (*newfd >= 0) { + close(*newfd); + } + family = AF_INET; + } + *dsmode = family == AF_INET ? GRPC_DSMODE_IPV4 : GRPC_DSMODE_NONE; + *newfd = create_socket(factory, family, type, protocol); + return error_for_fd(*newfd, resolved_addr); +} + +const char *grpc_inet_ntop(int af, const void *src, char *dst, size_t size) { + GPR_ASSERT(size <= (socklen_t)-1); + return inet_ntop(af, src, dst, (socklen_t)size); +} + +#endif diff --git a/src/core/lib/iomgr/socket_utils_linux.c b/src/core/lib/iomgr/socket_utils_linux.c deleted file mode 100644 index e7b094d216..0000000000 --- a/src/core/lib/iomgr/socket_utils_linux.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_LINUX_SOCKETUTILS - -#include "src/core/lib/iomgr/sockaddr.h" -#include "src/core/lib/iomgr/socket_utils_posix.h" - -#include - -#include -#include - -int grpc_accept4(int sockfd, grpc_resolved_address *resolved_addr, int nonblock, - int cloexec) { - int flags = 0; - GPR_ASSERT(sizeof(socklen_t) <= sizeof(size_t)); - GPR_ASSERT(resolved_addr->len <= (socklen_t)-1); - flags |= nonblock ? SOCK_NONBLOCK : 0; - flags |= cloexec ? SOCK_CLOEXEC : 0; - return accept4(sockfd, (struct sockaddr *)resolved_addr->addr, - (socklen_t *)&resolved_addr->len, flags); -} - -#endif diff --git a/src/core/lib/iomgr/socket_utils_linux.cc b/src/core/lib/iomgr/socket_utils_linux.cc new file mode 100644 index 0000000000..e7b094d216 --- /dev/null +++ b/src/core/lib/iomgr/socket_utils_linux.cc @@ -0,0 +1,42 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_LINUX_SOCKETUTILS + +#include "src/core/lib/iomgr/sockaddr.h" +#include "src/core/lib/iomgr/socket_utils_posix.h" + +#include + +#include +#include + +int grpc_accept4(int sockfd, grpc_resolved_address *resolved_addr, int nonblock, + int cloexec) { + int flags = 0; + GPR_ASSERT(sizeof(socklen_t) <= sizeof(size_t)); + GPR_ASSERT(resolved_addr->len <= (socklen_t)-1); + flags |= nonblock ? SOCK_NONBLOCK : 0; + flags |= cloexec ? SOCK_CLOEXEC : 0; + return accept4(sockfd, (struct sockaddr *)resolved_addr->addr, + (socklen_t *)&resolved_addr->len, flags); +} + +#endif diff --git a/src/core/lib/iomgr/socket_utils_posix.c b/src/core/lib/iomgr/socket_utils_posix.c deleted file mode 100644 index dfd1ffd1e3..0000000000 --- a/src/core/lib/iomgr/socket_utils_posix.c +++ /dev/null @@ -1,58 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_POSIX_SOCKETUTILS - -#include "src/core/lib/iomgr/socket_utils_posix.h" - -#include -#include -#include - -#include -#include "src/core/lib/iomgr/sockaddr.h" - -int grpc_accept4(int sockfd, grpc_resolved_address *resolved_addr, int nonblock, - int cloexec) { - int fd, flags; - GPR_ASSERT(sizeof(socklen_t) <= sizeof(size_t)); - GPR_ASSERT(resolved_addr->len <= (socklen_t)-1); - fd = accept(sockfd, (struct sockaddr *)resolved_addr->addr, - (socklen_t *)&resolved_addr->len); - if (fd >= 0) { - if (nonblock) { - flags = fcntl(fd, F_GETFL, 0); - if (flags < 0) goto close_and_error; - if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) != 0) goto close_and_error; - } - if (cloexec) { - flags = fcntl(fd, F_GETFD, 0); - if (flags < 0) goto close_and_error; - if (fcntl(fd, F_SETFD, flags | FD_CLOEXEC) != 0) goto close_and_error; - } - } - return fd; - -close_and_error: - close(fd); - return -1; -} - -#endif /* GRPC_POSIX_SOCKETUTILS */ diff --git a/src/core/lib/iomgr/socket_utils_posix.cc b/src/core/lib/iomgr/socket_utils_posix.cc new file mode 100644 index 0000000000..dfd1ffd1e3 --- /dev/null +++ b/src/core/lib/iomgr/socket_utils_posix.cc @@ -0,0 +1,58 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_POSIX_SOCKETUTILS + +#include "src/core/lib/iomgr/socket_utils_posix.h" + +#include +#include +#include + +#include +#include "src/core/lib/iomgr/sockaddr.h" + +int grpc_accept4(int sockfd, grpc_resolved_address *resolved_addr, int nonblock, + int cloexec) { + int fd, flags; + GPR_ASSERT(sizeof(socklen_t) <= sizeof(size_t)); + GPR_ASSERT(resolved_addr->len <= (socklen_t)-1); + fd = accept(sockfd, (struct sockaddr *)resolved_addr->addr, + (socklen_t *)&resolved_addr->len); + if (fd >= 0) { + if (nonblock) { + flags = fcntl(fd, F_GETFL, 0); + if (flags < 0) goto close_and_error; + if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) != 0) goto close_and_error; + } + if (cloexec) { + flags = fcntl(fd, F_GETFD, 0); + if (flags < 0) goto close_and_error; + if (fcntl(fd, F_SETFD, flags | FD_CLOEXEC) != 0) goto close_and_error; + } + } + return fd; + +close_and_error: + close(fd); + return -1; +} + +#endif /* GRPC_POSIX_SOCKETUTILS */ diff --git a/src/core/lib/iomgr/socket_utils_uv.c b/src/core/lib/iomgr/socket_utils_uv.c deleted file mode 100644 index 0f7de4dfad..0000000000 --- a/src/core/lib/iomgr/socket_utils_uv.c +++ /dev/null @@ -1,34 +0,0 @@ -/* - * - * Copyright 2016 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_UV - -#include - -#include "src/core/lib/iomgr/socket_utils.h" - -#include - -const char *grpc_inet_ntop(int af, const void *src, char *dst, size_t size) { - uv_inet_ntop(af, src, dst, size); - return dst; -} - -#endif /* GRPC_UV */ diff --git a/src/core/lib/iomgr/socket_utils_uv.cc b/src/core/lib/iomgr/socket_utils_uv.cc new file mode 100644 index 0000000000..0f7de4dfad --- /dev/null +++ b/src/core/lib/iomgr/socket_utils_uv.cc @@ -0,0 +1,34 @@ +/* + * + * Copyright 2016 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_UV + +#include + +#include "src/core/lib/iomgr/socket_utils.h" + +#include + +const char *grpc_inet_ntop(int af, const void *src, char *dst, size_t size) { + uv_inet_ntop(af, src, dst, size); + return dst; +} + +#endif /* GRPC_UV */ diff --git a/src/core/lib/iomgr/socket_utils_windows.c b/src/core/lib/iomgr/socket_utils_windows.c deleted file mode 100644 index 6e85e4b61f..0000000000 --- a/src/core/lib/iomgr/socket_utils_windows.c +++ /dev/null @@ -1,33 +0,0 @@ -/* - * - * Copyright 2016 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_WINDOWS_SOCKETUTILS - -#include "src/core/lib/iomgr/sockaddr.h" -#include "src/core/lib/iomgr/socket_utils.h" - -#include - -const char *grpc_inet_ntop(int af, const void *src, char *dst, size_t size) { - /* Windows InetNtopA wants a mutable ip pointer */ - return InetNtopA(af, (void *)src, dst, size); -} - -#endif /* GRPC_WINDOWS_SOCKETUTILS */ diff --git a/src/core/lib/iomgr/socket_utils_windows.cc b/src/core/lib/iomgr/socket_utils_windows.cc new file mode 100644 index 0000000000..6e85e4b61f --- /dev/null +++ b/src/core/lib/iomgr/socket_utils_windows.cc @@ -0,0 +1,33 @@ +/* + * + * Copyright 2016 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_WINDOWS_SOCKETUTILS + +#include "src/core/lib/iomgr/sockaddr.h" +#include "src/core/lib/iomgr/socket_utils.h" + +#include + +const char *grpc_inet_ntop(int af, const void *src, char *dst, size_t size) { + /* Windows InetNtopA wants a mutable ip pointer */ + return InetNtopA(af, (void *)src, dst, size); +} + +#endif /* GRPC_WINDOWS_SOCKETUTILS */ diff --git a/src/core/lib/iomgr/socket_windows.c b/src/core/lib/iomgr/socket_windows.c deleted file mode 100644 index a0d731b942..0000000000 --- a/src/core/lib/iomgr/socket_windows.c +++ /dev/null @@ -1,152 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_WINSOCK_SOCKET - -#include - -// must be included after winsock2.h -#include - -#include -#include -#include -#include - -#include "src/core/lib/iomgr/iocp_windows.h" -#include "src/core/lib/iomgr/iomgr_internal.h" -#include "src/core/lib/iomgr/pollset.h" -#include "src/core/lib/iomgr/pollset_windows.h" -#include "src/core/lib/iomgr/socket_windows.h" - -grpc_winsocket *grpc_winsocket_create(SOCKET socket, const char *name) { - char *final_name; - grpc_winsocket *r = gpr_malloc(sizeof(grpc_winsocket)); - memset(r, 0, sizeof(grpc_winsocket)); - r->socket = socket; - gpr_mu_init(&r->state_mu); - gpr_asprintf(&final_name, "%s:socket=0x%p", name, r); - grpc_iomgr_register_object(&r->iomgr_object, final_name); - gpr_free(final_name); - grpc_iocp_add_socket(r); - return r; -} - -/* Schedule a shutdown of the socket operations. Will call the pending - operations to abort them. We need to do that this way because of the - various callsites of that function, which happens to be in various - mutex hold states, and that'd be unsafe to call them directly. */ -void grpc_winsocket_shutdown(grpc_winsocket *winsocket) { - /* Grab the function pointer for DisconnectEx for that specific socket. - It may change depending on the interface. */ - int status; - GUID guid = WSAID_DISCONNECTEX; - LPFN_DISCONNECTEX DisconnectEx; - DWORD ioctl_num_bytes; - - gpr_mu_lock(&winsocket->state_mu); - if (winsocket->shutdown_called) { - gpr_mu_unlock(&winsocket->state_mu); - return; - } - winsocket->shutdown_called = true; - gpr_mu_unlock(&winsocket->state_mu); - - status = WSAIoctl(winsocket->socket, SIO_GET_EXTENSION_FUNCTION_POINTER, - &guid, sizeof(guid), &DisconnectEx, sizeof(DisconnectEx), - &ioctl_num_bytes, NULL, NULL); - - if (status == 0) { - DisconnectEx(winsocket->socket, NULL, 0, 0); - } else { - char *utf8_message = gpr_format_message(WSAGetLastError()); - gpr_log(GPR_INFO, "Unable to retrieve DisconnectEx pointer : %s", - utf8_message); - gpr_free(utf8_message); - } - closesocket(winsocket->socket); -} - -static void destroy(grpc_winsocket *winsocket) { - grpc_iomgr_unregister_object(&winsocket->iomgr_object); - gpr_mu_destroy(&winsocket->state_mu); - gpr_free(winsocket); -} - -static bool check_destroyable(grpc_winsocket *winsocket) { - return winsocket->destroy_called == true && - winsocket->write_info.closure == NULL && - winsocket->read_info.closure == NULL; -} - -void grpc_winsocket_destroy(grpc_winsocket *winsocket) { - gpr_mu_lock(&winsocket->state_mu); - GPR_ASSERT(!winsocket->destroy_called); - winsocket->destroy_called = true; - bool should_destroy = check_destroyable(winsocket); - gpr_mu_unlock(&winsocket->state_mu); - if (should_destroy) destroy(winsocket); -} - -/* Calling notify_on_read or write means either of two things: --) The IOCP already completed in the background, and we need to call -the callback now. --) The IOCP hasn't completed yet, and we're queuing it for later. */ -static void socket_notify_on_iocp(grpc_exec_ctx *exec_ctx, - grpc_winsocket *socket, grpc_closure *closure, - grpc_winsocket_callback_info *info) { - GPR_ASSERT(info->closure == NULL); - gpr_mu_lock(&socket->state_mu); - if (info->has_pending_iocp) { - info->has_pending_iocp = 0; - GRPC_CLOSURE_SCHED(exec_ctx, closure, GRPC_ERROR_NONE); - } else { - info->closure = closure; - } - gpr_mu_unlock(&socket->state_mu); -} - -void grpc_socket_notify_on_write(grpc_exec_ctx *exec_ctx, - grpc_winsocket *socket, - grpc_closure *closure) { - socket_notify_on_iocp(exec_ctx, socket, closure, &socket->write_info); -} - -void grpc_socket_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_winsocket *socket, - grpc_closure *closure) { - socket_notify_on_iocp(exec_ctx, socket, closure, &socket->read_info); -} - -void grpc_socket_become_ready(grpc_exec_ctx *exec_ctx, grpc_winsocket *socket, - grpc_winsocket_callback_info *info) { - GPR_ASSERT(!info->has_pending_iocp); - gpr_mu_lock(&socket->state_mu); - if (info->closure) { - GRPC_CLOSURE_SCHED(exec_ctx, info->closure, GRPC_ERROR_NONE); - info->closure = NULL; - } else { - info->has_pending_iocp = 1; - } - bool should_destroy = check_destroyable(socket); - gpr_mu_unlock(&socket->state_mu); - if (should_destroy) destroy(socket); -} - -#endif /* GRPC_WINSOCK_SOCKET */ diff --git a/src/core/lib/iomgr/socket_windows.cc b/src/core/lib/iomgr/socket_windows.cc new file mode 100644 index 0000000000..a0d731b942 --- /dev/null +++ b/src/core/lib/iomgr/socket_windows.cc @@ -0,0 +1,152 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_WINSOCK_SOCKET + +#include + +// must be included after winsock2.h +#include + +#include +#include +#include +#include + +#include "src/core/lib/iomgr/iocp_windows.h" +#include "src/core/lib/iomgr/iomgr_internal.h" +#include "src/core/lib/iomgr/pollset.h" +#include "src/core/lib/iomgr/pollset_windows.h" +#include "src/core/lib/iomgr/socket_windows.h" + +grpc_winsocket *grpc_winsocket_create(SOCKET socket, const char *name) { + char *final_name; + grpc_winsocket *r = gpr_malloc(sizeof(grpc_winsocket)); + memset(r, 0, sizeof(grpc_winsocket)); + r->socket = socket; + gpr_mu_init(&r->state_mu); + gpr_asprintf(&final_name, "%s:socket=0x%p", name, r); + grpc_iomgr_register_object(&r->iomgr_object, final_name); + gpr_free(final_name); + grpc_iocp_add_socket(r); + return r; +} + +/* Schedule a shutdown of the socket operations. Will call the pending + operations to abort them. We need to do that this way because of the + various callsites of that function, which happens to be in various + mutex hold states, and that'd be unsafe to call them directly. */ +void grpc_winsocket_shutdown(grpc_winsocket *winsocket) { + /* Grab the function pointer for DisconnectEx for that specific socket. + It may change depending on the interface. */ + int status; + GUID guid = WSAID_DISCONNECTEX; + LPFN_DISCONNECTEX DisconnectEx; + DWORD ioctl_num_bytes; + + gpr_mu_lock(&winsocket->state_mu); + if (winsocket->shutdown_called) { + gpr_mu_unlock(&winsocket->state_mu); + return; + } + winsocket->shutdown_called = true; + gpr_mu_unlock(&winsocket->state_mu); + + status = WSAIoctl(winsocket->socket, SIO_GET_EXTENSION_FUNCTION_POINTER, + &guid, sizeof(guid), &DisconnectEx, sizeof(DisconnectEx), + &ioctl_num_bytes, NULL, NULL); + + if (status == 0) { + DisconnectEx(winsocket->socket, NULL, 0, 0); + } else { + char *utf8_message = gpr_format_message(WSAGetLastError()); + gpr_log(GPR_INFO, "Unable to retrieve DisconnectEx pointer : %s", + utf8_message); + gpr_free(utf8_message); + } + closesocket(winsocket->socket); +} + +static void destroy(grpc_winsocket *winsocket) { + grpc_iomgr_unregister_object(&winsocket->iomgr_object); + gpr_mu_destroy(&winsocket->state_mu); + gpr_free(winsocket); +} + +static bool check_destroyable(grpc_winsocket *winsocket) { + return winsocket->destroy_called == true && + winsocket->write_info.closure == NULL && + winsocket->read_info.closure == NULL; +} + +void grpc_winsocket_destroy(grpc_winsocket *winsocket) { + gpr_mu_lock(&winsocket->state_mu); + GPR_ASSERT(!winsocket->destroy_called); + winsocket->destroy_called = true; + bool should_destroy = check_destroyable(winsocket); + gpr_mu_unlock(&winsocket->state_mu); + if (should_destroy) destroy(winsocket); +} + +/* Calling notify_on_read or write means either of two things: +-) The IOCP already completed in the background, and we need to call +the callback now. +-) The IOCP hasn't completed yet, and we're queuing it for later. */ +static void socket_notify_on_iocp(grpc_exec_ctx *exec_ctx, + grpc_winsocket *socket, grpc_closure *closure, + grpc_winsocket_callback_info *info) { + GPR_ASSERT(info->closure == NULL); + gpr_mu_lock(&socket->state_mu); + if (info->has_pending_iocp) { + info->has_pending_iocp = 0; + GRPC_CLOSURE_SCHED(exec_ctx, closure, GRPC_ERROR_NONE); + } else { + info->closure = closure; + } + gpr_mu_unlock(&socket->state_mu); +} + +void grpc_socket_notify_on_write(grpc_exec_ctx *exec_ctx, + grpc_winsocket *socket, + grpc_closure *closure) { + socket_notify_on_iocp(exec_ctx, socket, closure, &socket->write_info); +} + +void grpc_socket_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_winsocket *socket, + grpc_closure *closure) { + socket_notify_on_iocp(exec_ctx, socket, closure, &socket->read_info); +} + +void grpc_socket_become_ready(grpc_exec_ctx *exec_ctx, grpc_winsocket *socket, + grpc_winsocket_callback_info *info) { + GPR_ASSERT(!info->has_pending_iocp); + gpr_mu_lock(&socket->state_mu); + if (info->closure) { + GRPC_CLOSURE_SCHED(exec_ctx, info->closure, GRPC_ERROR_NONE); + info->closure = NULL; + } else { + info->has_pending_iocp = 1; + } + bool should_destroy = check_destroyable(socket); + gpr_mu_unlock(&socket->state_mu); + if (should_destroy) destroy(socket); +} + +#endif /* GRPC_WINSOCK_SOCKET */ diff --git a/src/core/lib/iomgr/tcp_client_posix.c b/src/core/lib/iomgr/tcp_client_posix.c deleted file mode 100644 index 39dbb506e2..0000000000 --- a/src/core/lib/iomgr/tcp_client_posix.c +++ /dev/null @@ -1,356 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_POSIX_SOCKET - -#include "src/core/lib/iomgr/tcp_client_posix.h" - -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "src/core/lib/channel/channel_args.h" -#include "src/core/lib/iomgr/ev_posix.h" -#include "src/core/lib/iomgr/iomgr_posix.h" -#include "src/core/lib/iomgr/sockaddr_utils.h" -#include "src/core/lib/iomgr/socket_mutator.h" -#include "src/core/lib/iomgr/socket_utils_posix.h" -#include "src/core/lib/iomgr/tcp_posix.h" -#include "src/core/lib/iomgr/timer.h" -#include "src/core/lib/iomgr/unix_sockets_posix.h" -#include "src/core/lib/support/string.h" - -extern grpc_tracer_flag grpc_tcp_trace; - -typedef struct { - gpr_mu mu; - grpc_fd *fd; - gpr_timespec deadline; - grpc_timer alarm; - grpc_closure on_alarm; - int refs; - grpc_closure write_closure; - grpc_pollset_set *interested_parties; - char *addr_str; - grpc_endpoint **ep; - grpc_closure *closure; - grpc_channel_args *channel_args; -} async_connect; - -static grpc_error *prepare_socket(const grpc_resolved_address *addr, int fd, - const grpc_channel_args *channel_args) { - grpc_error *err = GRPC_ERROR_NONE; - - GPR_ASSERT(fd >= 0); - - err = grpc_set_socket_nonblocking(fd, 1); - if (err != GRPC_ERROR_NONE) goto error; - err = grpc_set_socket_cloexec(fd, 1); - if (err != GRPC_ERROR_NONE) goto error; - if (!grpc_is_unix_socket(addr)) { - err = grpc_set_socket_low_latency(fd, 1); - if (err != GRPC_ERROR_NONE) goto error; - } - err = grpc_set_socket_no_sigpipe_if_possible(fd); - if (err != GRPC_ERROR_NONE) goto error; - if (channel_args) { - for (size_t i = 0; i < channel_args->num_args; i++) { - if (0 == strcmp(channel_args->args[i].key, GRPC_ARG_SOCKET_MUTATOR)) { - GPR_ASSERT(channel_args->args[i].type == GRPC_ARG_POINTER); - grpc_socket_mutator *mutator = - (grpc_socket_mutator *)channel_args->args[i].value.pointer.p; - err = grpc_set_socket_with_mutator(fd, mutator); - if (err != GRPC_ERROR_NONE) goto error; - } - } - } - goto done; - -error: - if (fd >= 0) { - close(fd); - } -done: - return err; -} - -static void tc_on_alarm(grpc_exec_ctx *exec_ctx, void *acp, grpc_error *error) { - int done; - async_connect *ac = (async_connect *)acp; - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - const char *str = grpc_error_string(error); - gpr_log(GPR_DEBUG, "CLIENT_CONNECT: %s: on_alarm: error=%s", ac->addr_str, - str); - } - gpr_mu_lock(&ac->mu); - if (ac->fd != NULL) { - grpc_fd_shutdown(exec_ctx, ac->fd, GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "connect() timed out")); - } - done = (--ac->refs == 0); - gpr_mu_unlock(&ac->mu); - if (done) { - gpr_mu_destroy(&ac->mu); - gpr_free(ac->addr_str); - grpc_channel_args_destroy(exec_ctx, ac->channel_args); - gpr_free(ac); - } -} - -grpc_endpoint *grpc_tcp_client_create_from_fd( - grpc_exec_ctx *exec_ctx, grpc_fd *fd, const grpc_channel_args *channel_args, - const char *addr_str) { - return grpc_tcp_create(exec_ctx, fd, channel_args, addr_str); -} - -static void on_writable(grpc_exec_ctx *exec_ctx, void *acp, grpc_error *error) { - async_connect *ac = (async_connect *)acp; - int so_error = 0; - socklen_t so_error_size; - int err; - int done; - grpc_endpoint **ep = ac->ep; - grpc_closure *closure = ac->closure; - grpc_fd *fd; - - GRPC_ERROR_REF(error); - - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - const char *str = grpc_error_string(error); - gpr_log(GPR_DEBUG, "CLIENT_CONNECT: %s: on_writable: error=%s", - ac->addr_str, str); - } - - gpr_mu_lock(&ac->mu); - GPR_ASSERT(ac->fd); - fd = ac->fd; - ac->fd = NULL; - gpr_mu_unlock(&ac->mu); - - grpc_timer_cancel(exec_ctx, &ac->alarm); - - gpr_mu_lock(&ac->mu); - if (error != GRPC_ERROR_NONE) { - error = - grpc_error_set_str(error, GRPC_ERROR_STR_OS_ERROR, - grpc_slice_from_static_string("Timeout occurred")); - goto finish; - } - - do { - so_error_size = sizeof(so_error); - err = getsockopt(grpc_fd_wrapped_fd(fd), SOL_SOCKET, SO_ERROR, &so_error, - &so_error_size); - } while (err < 0 && errno == EINTR); - if (err < 0) { - error = GRPC_OS_ERROR(errno, "getsockopt"); - goto finish; - } - - switch (so_error) { - case 0: - grpc_pollset_set_del_fd(exec_ctx, ac->interested_parties, fd); - *ep = grpc_tcp_client_create_from_fd(exec_ctx, fd, ac->channel_args, - ac->addr_str); - fd = NULL; - break; - case ENOBUFS: - /* We will get one of these errors if we have run out of - memory in the kernel for the data structures allocated - when you connect a socket. If this happens it is very - likely that if we wait a little bit then try again the - connection will work (since other programs or this - program will close their network connections and free up - memory). This does _not_ indicate that there is anything - wrong with the server we are connecting to, this is a - local problem. - - If you are looking at this code, then chances are that - your program or another program on the same computer - opened too many network connections. The "easy" fix: - don't do that! */ - gpr_log(GPR_ERROR, "kernel out of buffers"); - gpr_mu_unlock(&ac->mu); - grpc_fd_notify_on_write(exec_ctx, fd, &ac->write_closure); - return; - case ECONNREFUSED: - /* This error shouldn't happen for anything other than connect(). */ - error = GRPC_OS_ERROR(so_error, "connect"); - break; - default: - /* We don't really know which syscall triggered the problem here, - so punt by reporting getsockopt(). */ - error = GRPC_OS_ERROR(so_error, "getsockopt(SO_ERROR)"); - break; - } - -finish: - if (fd != NULL) { - grpc_pollset_set_del_fd(exec_ctx, ac->interested_parties, fd); - grpc_fd_orphan(exec_ctx, fd, NULL, NULL, false /* already_closed */, - "tcp_client_orphan"); - fd = NULL; - } - done = (--ac->refs == 0); - gpr_mu_unlock(&ac->mu); - if (error != GRPC_ERROR_NONE) { - char *error_descr; - grpc_slice str; - bool ret = grpc_error_get_str(error, GRPC_ERROR_STR_DESCRIPTION, &str); - GPR_ASSERT(ret); - char *desc = grpc_slice_to_c_string(str); - gpr_asprintf(&error_descr, "Failed to connect to remote host: %s", desc); - error = grpc_error_set_str(error, GRPC_ERROR_STR_DESCRIPTION, - grpc_slice_from_copied_string(error_descr)); - gpr_free(error_descr); - gpr_free(desc); - error = grpc_error_set_str(error, GRPC_ERROR_STR_TARGET_ADDRESS, - grpc_slice_from_copied_string(ac->addr_str)); - } - if (done) { - gpr_mu_destroy(&ac->mu); - gpr_free(ac->addr_str); - grpc_channel_args_destroy(exec_ctx, ac->channel_args); - gpr_free(ac); - } - GRPC_CLOSURE_SCHED(exec_ctx, closure, error); -} - -static void tcp_client_connect_impl(grpc_exec_ctx *exec_ctx, - grpc_closure *closure, grpc_endpoint **ep, - grpc_pollset_set *interested_parties, - const grpc_channel_args *channel_args, - const grpc_resolved_address *addr, - gpr_timespec deadline) { - int fd; - grpc_dualstack_mode dsmode; - int err; - async_connect *ac; - grpc_resolved_address addr6_v4mapped; - grpc_resolved_address addr4_copy; - grpc_fd *fdobj; - char *name; - char *addr_str; - grpc_error *error; - - *ep = NULL; - - /* Use dualstack sockets where available. */ - if (grpc_sockaddr_to_v4mapped(addr, &addr6_v4mapped)) { - addr = &addr6_v4mapped; - } - - error = grpc_create_dualstack_socket(addr, SOCK_STREAM, 0, &dsmode, &fd); - if (error != GRPC_ERROR_NONE) { - GRPC_CLOSURE_SCHED(exec_ctx, closure, error); - return; - } - if (dsmode == GRPC_DSMODE_IPV4) { - /* If we got an AF_INET socket, map the address back to IPv4. */ - GPR_ASSERT(grpc_sockaddr_is_v4mapped(addr, &addr4_copy)); - addr = &addr4_copy; - } - if ((error = prepare_socket(addr, fd, channel_args)) != GRPC_ERROR_NONE) { - GRPC_CLOSURE_SCHED(exec_ctx, closure, error); - return; - } - - do { - GPR_ASSERT(addr->len < ~(socklen_t)0); - err = - connect(fd, (const struct sockaddr *)addr->addr, (socklen_t)addr->len); - } while (err < 0 && errno == EINTR); - - addr_str = grpc_sockaddr_to_uri(addr); - gpr_asprintf(&name, "tcp-client:%s", addr_str); - - fdobj = grpc_fd_create(fd, name); - - if (err >= 0) { - *ep = - grpc_tcp_client_create_from_fd(exec_ctx, fdobj, channel_args, addr_str); - GRPC_CLOSURE_SCHED(exec_ctx, closure, GRPC_ERROR_NONE); - goto done; - } - - if (errno != EWOULDBLOCK && errno != EINPROGRESS) { - grpc_fd_orphan(exec_ctx, fdobj, NULL, NULL, false /* already_closed */, - "tcp_client_connect_error"); - GRPC_CLOSURE_SCHED(exec_ctx, closure, GRPC_OS_ERROR(errno, "connect")); - goto done; - } - - grpc_pollset_set_add_fd(exec_ctx, interested_parties, fdobj); - - ac = (async_connect *)gpr_malloc(sizeof(async_connect)); - ac->closure = closure; - ac->ep = ep; - ac->fd = fdobj; - ac->interested_parties = interested_parties; - ac->addr_str = addr_str; - addr_str = NULL; - gpr_mu_init(&ac->mu); - ac->refs = 2; - GRPC_CLOSURE_INIT(&ac->write_closure, on_writable, ac, - grpc_schedule_on_exec_ctx); - ac->channel_args = grpc_channel_args_copy(channel_args); - - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "CLIENT_CONNECT: %s: asynchronously connecting fd %p", - ac->addr_str, fdobj); - } - - gpr_mu_lock(&ac->mu); - GRPC_CLOSURE_INIT(&ac->on_alarm, tc_on_alarm, ac, grpc_schedule_on_exec_ctx); - grpc_timer_init(exec_ctx, &ac->alarm, - gpr_convert_clock_type(deadline, GPR_CLOCK_MONOTONIC), - &ac->on_alarm, gpr_now(GPR_CLOCK_MONOTONIC)); - grpc_fd_notify_on_write(exec_ctx, ac->fd, &ac->write_closure); - gpr_mu_unlock(&ac->mu); - -done: - gpr_free(name); - gpr_free(addr_str); -} - -// overridden by api_fuzzer.c -void (*grpc_tcp_client_connect_impl)( - grpc_exec_ctx *exec_ctx, grpc_closure *closure, grpc_endpoint **ep, - grpc_pollset_set *interested_parties, const grpc_channel_args *channel_args, - const grpc_resolved_address *addr, - gpr_timespec deadline) = tcp_client_connect_impl; - -void grpc_tcp_client_connect(grpc_exec_ctx *exec_ctx, grpc_closure *closure, - grpc_endpoint **ep, - grpc_pollset_set *interested_parties, - const grpc_channel_args *channel_args, - const grpc_resolved_address *addr, - gpr_timespec deadline) { - grpc_tcp_client_connect_impl(exec_ctx, closure, ep, interested_parties, - channel_args, addr, deadline); -} - -#endif diff --git a/src/core/lib/iomgr/tcp_client_posix.cc b/src/core/lib/iomgr/tcp_client_posix.cc new file mode 100644 index 0000000000..39dbb506e2 --- /dev/null +++ b/src/core/lib/iomgr/tcp_client_posix.cc @@ -0,0 +1,356 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_POSIX_SOCKET + +#include "src/core/lib/iomgr/tcp_client_posix.h" + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "src/core/lib/channel/channel_args.h" +#include "src/core/lib/iomgr/ev_posix.h" +#include "src/core/lib/iomgr/iomgr_posix.h" +#include "src/core/lib/iomgr/sockaddr_utils.h" +#include "src/core/lib/iomgr/socket_mutator.h" +#include "src/core/lib/iomgr/socket_utils_posix.h" +#include "src/core/lib/iomgr/tcp_posix.h" +#include "src/core/lib/iomgr/timer.h" +#include "src/core/lib/iomgr/unix_sockets_posix.h" +#include "src/core/lib/support/string.h" + +extern grpc_tracer_flag grpc_tcp_trace; + +typedef struct { + gpr_mu mu; + grpc_fd *fd; + gpr_timespec deadline; + grpc_timer alarm; + grpc_closure on_alarm; + int refs; + grpc_closure write_closure; + grpc_pollset_set *interested_parties; + char *addr_str; + grpc_endpoint **ep; + grpc_closure *closure; + grpc_channel_args *channel_args; +} async_connect; + +static grpc_error *prepare_socket(const grpc_resolved_address *addr, int fd, + const grpc_channel_args *channel_args) { + grpc_error *err = GRPC_ERROR_NONE; + + GPR_ASSERT(fd >= 0); + + err = grpc_set_socket_nonblocking(fd, 1); + if (err != GRPC_ERROR_NONE) goto error; + err = grpc_set_socket_cloexec(fd, 1); + if (err != GRPC_ERROR_NONE) goto error; + if (!grpc_is_unix_socket(addr)) { + err = grpc_set_socket_low_latency(fd, 1); + if (err != GRPC_ERROR_NONE) goto error; + } + err = grpc_set_socket_no_sigpipe_if_possible(fd); + if (err != GRPC_ERROR_NONE) goto error; + if (channel_args) { + for (size_t i = 0; i < channel_args->num_args; i++) { + if (0 == strcmp(channel_args->args[i].key, GRPC_ARG_SOCKET_MUTATOR)) { + GPR_ASSERT(channel_args->args[i].type == GRPC_ARG_POINTER); + grpc_socket_mutator *mutator = + (grpc_socket_mutator *)channel_args->args[i].value.pointer.p; + err = grpc_set_socket_with_mutator(fd, mutator); + if (err != GRPC_ERROR_NONE) goto error; + } + } + } + goto done; + +error: + if (fd >= 0) { + close(fd); + } +done: + return err; +} + +static void tc_on_alarm(grpc_exec_ctx *exec_ctx, void *acp, grpc_error *error) { + int done; + async_connect *ac = (async_connect *)acp; + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + const char *str = grpc_error_string(error); + gpr_log(GPR_DEBUG, "CLIENT_CONNECT: %s: on_alarm: error=%s", ac->addr_str, + str); + } + gpr_mu_lock(&ac->mu); + if (ac->fd != NULL) { + grpc_fd_shutdown(exec_ctx, ac->fd, GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "connect() timed out")); + } + done = (--ac->refs == 0); + gpr_mu_unlock(&ac->mu); + if (done) { + gpr_mu_destroy(&ac->mu); + gpr_free(ac->addr_str); + grpc_channel_args_destroy(exec_ctx, ac->channel_args); + gpr_free(ac); + } +} + +grpc_endpoint *grpc_tcp_client_create_from_fd( + grpc_exec_ctx *exec_ctx, grpc_fd *fd, const grpc_channel_args *channel_args, + const char *addr_str) { + return grpc_tcp_create(exec_ctx, fd, channel_args, addr_str); +} + +static void on_writable(grpc_exec_ctx *exec_ctx, void *acp, grpc_error *error) { + async_connect *ac = (async_connect *)acp; + int so_error = 0; + socklen_t so_error_size; + int err; + int done; + grpc_endpoint **ep = ac->ep; + grpc_closure *closure = ac->closure; + grpc_fd *fd; + + GRPC_ERROR_REF(error); + + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + const char *str = grpc_error_string(error); + gpr_log(GPR_DEBUG, "CLIENT_CONNECT: %s: on_writable: error=%s", + ac->addr_str, str); + } + + gpr_mu_lock(&ac->mu); + GPR_ASSERT(ac->fd); + fd = ac->fd; + ac->fd = NULL; + gpr_mu_unlock(&ac->mu); + + grpc_timer_cancel(exec_ctx, &ac->alarm); + + gpr_mu_lock(&ac->mu); + if (error != GRPC_ERROR_NONE) { + error = + grpc_error_set_str(error, GRPC_ERROR_STR_OS_ERROR, + grpc_slice_from_static_string("Timeout occurred")); + goto finish; + } + + do { + so_error_size = sizeof(so_error); + err = getsockopt(grpc_fd_wrapped_fd(fd), SOL_SOCKET, SO_ERROR, &so_error, + &so_error_size); + } while (err < 0 && errno == EINTR); + if (err < 0) { + error = GRPC_OS_ERROR(errno, "getsockopt"); + goto finish; + } + + switch (so_error) { + case 0: + grpc_pollset_set_del_fd(exec_ctx, ac->interested_parties, fd); + *ep = grpc_tcp_client_create_from_fd(exec_ctx, fd, ac->channel_args, + ac->addr_str); + fd = NULL; + break; + case ENOBUFS: + /* We will get one of these errors if we have run out of + memory in the kernel for the data structures allocated + when you connect a socket. If this happens it is very + likely that if we wait a little bit then try again the + connection will work (since other programs or this + program will close their network connections and free up + memory). This does _not_ indicate that there is anything + wrong with the server we are connecting to, this is a + local problem. + + If you are looking at this code, then chances are that + your program or another program on the same computer + opened too many network connections. The "easy" fix: + don't do that! */ + gpr_log(GPR_ERROR, "kernel out of buffers"); + gpr_mu_unlock(&ac->mu); + grpc_fd_notify_on_write(exec_ctx, fd, &ac->write_closure); + return; + case ECONNREFUSED: + /* This error shouldn't happen for anything other than connect(). */ + error = GRPC_OS_ERROR(so_error, "connect"); + break; + default: + /* We don't really know which syscall triggered the problem here, + so punt by reporting getsockopt(). */ + error = GRPC_OS_ERROR(so_error, "getsockopt(SO_ERROR)"); + break; + } + +finish: + if (fd != NULL) { + grpc_pollset_set_del_fd(exec_ctx, ac->interested_parties, fd); + grpc_fd_orphan(exec_ctx, fd, NULL, NULL, false /* already_closed */, + "tcp_client_orphan"); + fd = NULL; + } + done = (--ac->refs == 0); + gpr_mu_unlock(&ac->mu); + if (error != GRPC_ERROR_NONE) { + char *error_descr; + grpc_slice str; + bool ret = grpc_error_get_str(error, GRPC_ERROR_STR_DESCRIPTION, &str); + GPR_ASSERT(ret); + char *desc = grpc_slice_to_c_string(str); + gpr_asprintf(&error_descr, "Failed to connect to remote host: %s", desc); + error = grpc_error_set_str(error, GRPC_ERROR_STR_DESCRIPTION, + grpc_slice_from_copied_string(error_descr)); + gpr_free(error_descr); + gpr_free(desc); + error = grpc_error_set_str(error, GRPC_ERROR_STR_TARGET_ADDRESS, + grpc_slice_from_copied_string(ac->addr_str)); + } + if (done) { + gpr_mu_destroy(&ac->mu); + gpr_free(ac->addr_str); + grpc_channel_args_destroy(exec_ctx, ac->channel_args); + gpr_free(ac); + } + GRPC_CLOSURE_SCHED(exec_ctx, closure, error); +} + +static void tcp_client_connect_impl(grpc_exec_ctx *exec_ctx, + grpc_closure *closure, grpc_endpoint **ep, + grpc_pollset_set *interested_parties, + const grpc_channel_args *channel_args, + const grpc_resolved_address *addr, + gpr_timespec deadline) { + int fd; + grpc_dualstack_mode dsmode; + int err; + async_connect *ac; + grpc_resolved_address addr6_v4mapped; + grpc_resolved_address addr4_copy; + grpc_fd *fdobj; + char *name; + char *addr_str; + grpc_error *error; + + *ep = NULL; + + /* Use dualstack sockets where available. */ + if (grpc_sockaddr_to_v4mapped(addr, &addr6_v4mapped)) { + addr = &addr6_v4mapped; + } + + error = grpc_create_dualstack_socket(addr, SOCK_STREAM, 0, &dsmode, &fd); + if (error != GRPC_ERROR_NONE) { + GRPC_CLOSURE_SCHED(exec_ctx, closure, error); + return; + } + if (dsmode == GRPC_DSMODE_IPV4) { + /* If we got an AF_INET socket, map the address back to IPv4. */ + GPR_ASSERT(grpc_sockaddr_is_v4mapped(addr, &addr4_copy)); + addr = &addr4_copy; + } + if ((error = prepare_socket(addr, fd, channel_args)) != GRPC_ERROR_NONE) { + GRPC_CLOSURE_SCHED(exec_ctx, closure, error); + return; + } + + do { + GPR_ASSERT(addr->len < ~(socklen_t)0); + err = + connect(fd, (const struct sockaddr *)addr->addr, (socklen_t)addr->len); + } while (err < 0 && errno == EINTR); + + addr_str = grpc_sockaddr_to_uri(addr); + gpr_asprintf(&name, "tcp-client:%s", addr_str); + + fdobj = grpc_fd_create(fd, name); + + if (err >= 0) { + *ep = + grpc_tcp_client_create_from_fd(exec_ctx, fdobj, channel_args, addr_str); + GRPC_CLOSURE_SCHED(exec_ctx, closure, GRPC_ERROR_NONE); + goto done; + } + + if (errno != EWOULDBLOCK && errno != EINPROGRESS) { + grpc_fd_orphan(exec_ctx, fdobj, NULL, NULL, false /* already_closed */, + "tcp_client_connect_error"); + GRPC_CLOSURE_SCHED(exec_ctx, closure, GRPC_OS_ERROR(errno, "connect")); + goto done; + } + + grpc_pollset_set_add_fd(exec_ctx, interested_parties, fdobj); + + ac = (async_connect *)gpr_malloc(sizeof(async_connect)); + ac->closure = closure; + ac->ep = ep; + ac->fd = fdobj; + ac->interested_parties = interested_parties; + ac->addr_str = addr_str; + addr_str = NULL; + gpr_mu_init(&ac->mu); + ac->refs = 2; + GRPC_CLOSURE_INIT(&ac->write_closure, on_writable, ac, + grpc_schedule_on_exec_ctx); + ac->channel_args = grpc_channel_args_copy(channel_args); + + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "CLIENT_CONNECT: %s: asynchronously connecting fd %p", + ac->addr_str, fdobj); + } + + gpr_mu_lock(&ac->mu); + GRPC_CLOSURE_INIT(&ac->on_alarm, tc_on_alarm, ac, grpc_schedule_on_exec_ctx); + grpc_timer_init(exec_ctx, &ac->alarm, + gpr_convert_clock_type(deadline, GPR_CLOCK_MONOTONIC), + &ac->on_alarm, gpr_now(GPR_CLOCK_MONOTONIC)); + grpc_fd_notify_on_write(exec_ctx, ac->fd, &ac->write_closure); + gpr_mu_unlock(&ac->mu); + +done: + gpr_free(name); + gpr_free(addr_str); +} + +// overridden by api_fuzzer.c +void (*grpc_tcp_client_connect_impl)( + grpc_exec_ctx *exec_ctx, grpc_closure *closure, grpc_endpoint **ep, + grpc_pollset_set *interested_parties, const grpc_channel_args *channel_args, + const grpc_resolved_address *addr, + gpr_timespec deadline) = tcp_client_connect_impl; + +void grpc_tcp_client_connect(grpc_exec_ctx *exec_ctx, grpc_closure *closure, + grpc_endpoint **ep, + grpc_pollset_set *interested_parties, + const grpc_channel_args *channel_args, + const grpc_resolved_address *addr, + gpr_timespec deadline) { + grpc_tcp_client_connect_impl(exec_ctx, closure, ep, interested_parties, + channel_args, addr, deadline); +} + +#endif diff --git a/src/core/lib/iomgr/tcp_client_uv.c b/src/core/lib/iomgr/tcp_client_uv.c deleted file mode 100644 index 786c456b73..0000000000 --- a/src/core/lib/iomgr/tcp_client_uv.c +++ /dev/null @@ -1,183 +0,0 @@ -/* - * - * Copyright 2016 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_UV - -#include - -#include -#include - -#include "src/core/lib/iomgr/error.h" -#include "src/core/lib/iomgr/iomgr_uv.h" -#include "src/core/lib/iomgr/sockaddr_utils.h" -#include "src/core/lib/iomgr/tcp_client.h" -#include "src/core/lib/iomgr/tcp_uv.h" -#include "src/core/lib/iomgr/timer.h" - -extern grpc_tracer_flag grpc_tcp_trace; - -typedef struct grpc_uv_tcp_connect { - uv_connect_t connect_req; - grpc_timer alarm; - grpc_closure on_alarm; - uv_tcp_t *tcp_handle; - grpc_closure *closure; - grpc_endpoint **endpoint; - int refs; - char *addr_name; - grpc_resource_quota *resource_quota; -} grpc_uv_tcp_connect; - -static void uv_tcp_connect_cleanup(grpc_exec_ctx *exec_ctx, - grpc_uv_tcp_connect *connect) { - grpc_resource_quota_unref_internal(exec_ctx, connect->resource_quota); - gpr_free(connect->addr_name); - gpr_free(connect); -} - -static void tcp_close_callback(uv_handle_t *handle) { gpr_free(handle); } - -static void uv_tc_on_alarm(grpc_exec_ctx *exec_ctx, void *acp, - grpc_error *error) { - int done; - grpc_uv_tcp_connect *connect = acp; - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - const char *str = grpc_error_string(error); - gpr_log(GPR_DEBUG, "CLIENT_CONNECT: %s: on_alarm: error=%s", - connect->addr_name, str); - } - if (error == GRPC_ERROR_NONE) { - /* error == NONE implies that the timer ran out, and wasn't cancelled. If - it was cancelled, then the handler that cancelled it also should close - the handle, if applicable */ - uv_close((uv_handle_t *)connect->tcp_handle, tcp_close_callback); - } - done = (--connect->refs == 0); - if (done) { - uv_tcp_connect_cleanup(exec_ctx, connect); - } -} - -static void uv_tc_on_connect(uv_connect_t *req, int status) { - grpc_uv_tcp_connect *connect = req->data; - grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; - grpc_error *error = GRPC_ERROR_NONE; - int done; - grpc_closure *closure = connect->closure; - grpc_timer_cancel(&exec_ctx, &connect->alarm); - if (status == 0) { - *connect->endpoint = grpc_tcp_create( - connect->tcp_handle, connect->resource_quota, connect->addr_name); - } else { - error = GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "Failed to connect to remote host"); - error = grpc_error_set_int(error, GRPC_ERROR_INT_ERRNO, -status); - error = - grpc_error_set_str(error, GRPC_ERROR_STR_OS_ERROR, - grpc_slice_from_static_string(uv_strerror(status))); - if (status == UV_ECANCELED) { - error = - grpc_error_set_str(error, GRPC_ERROR_STR_OS_ERROR, - grpc_slice_from_static_string("Timeout occurred")); - // This should only happen if the handle is already closed - } else { - error = grpc_error_set_str( - error, GRPC_ERROR_STR_OS_ERROR, - grpc_slice_from_static_string(uv_strerror(status))); - uv_close((uv_handle_t *)connect->tcp_handle, tcp_close_callback); - } - } - done = (--connect->refs == 0); - if (done) { - grpc_exec_ctx_flush(&exec_ctx); - uv_tcp_connect_cleanup(&exec_ctx, connect); - } - GRPC_CLOSURE_SCHED(&exec_ctx, closure, error); - grpc_exec_ctx_finish(&exec_ctx); -} - -static void tcp_client_connect_impl(grpc_exec_ctx *exec_ctx, - grpc_closure *closure, grpc_endpoint **ep, - grpc_pollset_set *interested_parties, - const grpc_channel_args *channel_args, - const grpc_resolved_address *resolved_addr, - gpr_timespec deadline) { - grpc_uv_tcp_connect *connect; - grpc_resource_quota *resource_quota = grpc_resource_quota_create(NULL); - (void)channel_args; - (void)interested_parties; - - GRPC_UV_ASSERT_SAME_THREAD(); - - if (channel_args != NULL) { - for (size_t i = 0; i < channel_args->num_args; i++) { - if (0 == strcmp(channel_args->args[i].key, GRPC_ARG_RESOURCE_QUOTA)) { - grpc_resource_quota_unref_internal(exec_ctx, resource_quota); - resource_quota = grpc_resource_quota_ref_internal( - channel_args->args[i].value.pointer.p); - } - } - } - - connect = gpr_zalloc(sizeof(grpc_uv_tcp_connect)); - connect->closure = closure; - connect->endpoint = ep; - connect->tcp_handle = gpr_malloc(sizeof(uv_tcp_t)); - connect->addr_name = grpc_sockaddr_to_uri(resolved_addr); - connect->resource_quota = resource_quota; - uv_tcp_init(uv_default_loop(), connect->tcp_handle); - connect->connect_req.data = connect; - connect->refs = 1; - - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "CLIENT_CONNECT: %s: asynchronously connecting", - connect->addr_name); - } - - // TODO(murgatroid99): figure out what the return value here means - uv_tcp_connect(&connect->connect_req, connect->tcp_handle, - (const struct sockaddr *)resolved_addr->addr, - uv_tc_on_connect); - GRPC_CLOSURE_INIT(&connect->on_alarm, uv_tc_on_alarm, connect, - grpc_schedule_on_exec_ctx); - grpc_timer_init(exec_ctx, &connect->alarm, - gpr_convert_clock_type(deadline, GPR_CLOCK_MONOTONIC), - &connect->on_alarm, gpr_now(GPR_CLOCK_MONOTONIC)); -} - -// overridden by api_fuzzer.c -void (*grpc_tcp_client_connect_impl)( - grpc_exec_ctx *exec_ctx, grpc_closure *closure, grpc_endpoint **ep, - grpc_pollset_set *interested_parties, const grpc_channel_args *channel_args, - const grpc_resolved_address *addr, - gpr_timespec deadline) = tcp_client_connect_impl; - -void grpc_tcp_client_connect(grpc_exec_ctx *exec_ctx, grpc_closure *closure, - grpc_endpoint **ep, - grpc_pollset_set *interested_parties, - const grpc_channel_args *channel_args, - const grpc_resolved_address *addr, - gpr_timespec deadline) { - grpc_tcp_client_connect_impl(exec_ctx, closure, ep, interested_parties, - channel_args, addr, deadline); -} - -#endif /* GRPC_UV */ diff --git a/src/core/lib/iomgr/tcp_client_uv.cc b/src/core/lib/iomgr/tcp_client_uv.cc new file mode 100644 index 0000000000..786c456b73 --- /dev/null +++ b/src/core/lib/iomgr/tcp_client_uv.cc @@ -0,0 +1,183 @@ +/* + * + * Copyright 2016 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_UV + +#include + +#include +#include + +#include "src/core/lib/iomgr/error.h" +#include "src/core/lib/iomgr/iomgr_uv.h" +#include "src/core/lib/iomgr/sockaddr_utils.h" +#include "src/core/lib/iomgr/tcp_client.h" +#include "src/core/lib/iomgr/tcp_uv.h" +#include "src/core/lib/iomgr/timer.h" + +extern grpc_tracer_flag grpc_tcp_trace; + +typedef struct grpc_uv_tcp_connect { + uv_connect_t connect_req; + grpc_timer alarm; + grpc_closure on_alarm; + uv_tcp_t *tcp_handle; + grpc_closure *closure; + grpc_endpoint **endpoint; + int refs; + char *addr_name; + grpc_resource_quota *resource_quota; +} grpc_uv_tcp_connect; + +static void uv_tcp_connect_cleanup(grpc_exec_ctx *exec_ctx, + grpc_uv_tcp_connect *connect) { + grpc_resource_quota_unref_internal(exec_ctx, connect->resource_quota); + gpr_free(connect->addr_name); + gpr_free(connect); +} + +static void tcp_close_callback(uv_handle_t *handle) { gpr_free(handle); } + +static void uv_tc_on_alarm(grpc_exec_ctx *exec_ctx, void *acp, + grpc_error *error) { + int done; + grpc_uv_tcp_connect *connect = acp; + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + const char *str = grpc_error_string(error); + gpr_log(GPR_DEBUG, "CLIENT_CONNECT: %s: on_alarm: error=%s", + connect->addr_name, str); + } + if (error == GRPC_ERROR_NONE) { + /* error == NONE implies that the timer ran out, and wasn't cancelled. If + it was cancelled, then the handler that cancelled it also should close + the handle, if applicable */ + uv_close((uv_handle_t *)connect->tcp_handle, tcp_close_callback); + } + done = (--connect->refs == 0); + if (done) { + uv_tcp_connect_cleanup(exec_ctx, connect); + } +} + +static void uv_tc_on_connect(uv_connect_t *req, int status) { + grpc_uv_tcp_connect *connect = req->data; + grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; + grpc_error *error = GRPC_ERROR_NONE; + int done; + grpc_closure *closure = connect->closure; + grpc_timer_cancel(&exec_ctx, &connect->alarm); + if (status == 0) { + *connect->endpoint = grpc_tcp_create( + connect->tcp_handle, connect->resource_quota, connect->addr_name); + } else { + error = GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "Failed to connect to remote host"); + error = grpc_error_set_int(error, GRPC_ERROR_INT_ERRNO, -status); + error = + grpc_error_set_str(error, GRPC_ERROR_STR_OS_ERROR, + grpc_slice_from_static_string(uv_strerror(status))); + if (status == UV_ECANCELED) { + error = + grpc_error_set_str(error, GRPC_ERROR_STR_OS_ERROR, + grpc_slice_from_static_string("Timeout occurred")); + // This should only happen if the handle is already closed + } else { + error = grpc_error_set_str( + error, GRPC_ERROR_STR_OS_ERROR, + grpc_slice_from_static_string(uv_strerror(status))); + uv_close((uv_handle_t *)connect->tcp_handle, tcp_close_callback); + } + } + done = (--connect->refs == 0); + if (done) { + grpc_exec_ctx_flush(&exec_ctx); + uv_tcp_connect_cleanup(&exec_ctx, connect); + } + GRPC_CLOSURE_SCHED(&exec_ctx, closure, error); + grpc_exec_ctx_finish(&exec_ctx); +} + +static void tcp_client_connect_impl(grpc_exec_ctx *exec_ctx, + grpc_closure *closure, grpc_endpoint **ep, + grpc_pollset_set *interested_parties, + const grpc_channel_args *channel_args, + const grpc_resolved_address *resolved_addr, + gpr_timespec deadline) { + grpc_uv_tcp_connect *connect; + grpc_resource_quota *resource_quota = grpc_resource_quota_create(NULL); + (void)channel_args; + (void)interested_parties; + + GRPC_UV_ASSERT_SAME_THREAD(); + + if (channel_args != NULL) { + for (size_t i = 0; i < channel_args->num_args; i++) { + if (0 == strcmp(channel_args->args[i].key, GRPC_ARG_RESOURCE_QUOTA)) { + grpc_resource_quota_unref_internal(exec_ctx, resource_quota); + resource_quota = grpc_resource_quota_ref_internal( + channel_args->args[i].value.pointer.p); + } + } + } + + connect = gpr_zalloc(sizeof(grpc_uv_tcp_connect)); + connect->closure = closure; + connect->endpoint = ep; + connect->tcp_handle = gpr_malloc(sizeof(uv_tcp_t)); + connect->addr_name = grpc_sockaddr_to_uri(resolved_addr); + connect->resource_quota = resource_quota; + uv_tcp_init(uv_default_loop(), connect->tcp_handle); + connect->connect_req.data = connect; + connect->refs = 1; + + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "CLIENT_CONNECT: %s: asynchronously connecting", + connect->addr_name); + } + + // TODO(murgatroid99): figure out what the return value here means + uv_tcp_connect(&connect->connect_req, connect->tcp_handle, + (const struct sockaddr *)resolved_addr->addr, + uv_tc_on_connect); + GRPC_CLOSURE_INIT(&connect->on_alarm, uv_tc_on_alarm, connect, + grpc_schedule_on_exec_ctx); + grpc_timer_init(exec_ctx, &connect->alarm, + gpr_convert_clock_type(deadline, GPR_CLOCK_MONOTONIC), + &connect->on_alarm, gpr_now(GPR_CLOCK_MONOTONIC)); +} + +// overridden by api_fuzzer.c +void (*grpc_tcp_client_connect_impl)( + grpc_exec_ctx *exec_ctx, grpc_closure *closure, grpc_endpoint **ep, + grpc_pollset_set *interested_parties, const grpc_channel_args *channel_args, + const grpc_resolved_address *addr, + gpr_timespec deadline) = tcp_client_connect_impl; + +void grpc_tcp_client_connect(grpc_exec_ctx *exec_ctx, grpc_closure *closure, + grpc_endpoint **ep, + grpc_pollset_set *interested_parties, + const grpc_channel_args *channel_args, + const grpc_resolved_address *addr, + gpr_timespec deadline) { + grpc_tcp_client_connect_impl(exec_ctx, closure, ep, interested_parties, + channel_args, addr, deadline); +} + +#endif /* GRPC_UV */ diff --git a/src/core/lib/iomgr/tcp_client_windows.c b/src/core/lib/iomgr/tcp_client_windows.c deleted file mode 100644 index fc62105cc9..0000000000 --- a/src/core/lib/iomgr/tcp_client_windows.c +++ /dev/null @@ -1,245 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_WINSOCK_SOCKET - -#include "src/core/lib/iomgr/sockaddr_windows.h" - -#include -#include -#include -#include -#include - -#include "src/core/lib/channel/channel_args.h" -#include "src/core/lib/iomgr/iocp_windows.h" -#include "src/core/lib/iomgr/sockaddr.h" -#include "src/core/lib/iomgr/sockaddr_utils.h" -#include "src/core/lib/iomgr/socket_windows.h" -#include "src/core/lib/iomgr/tcp_client.h" -#include "src/core/lib/iomgr/tcp_windows.h" -#include "src/core/lib/iomgr/timer.h" - -typedef struct { - grpc_closure *on_done; - gpr_mu mu; - grpc_winsocket *socket; - gpr_timespec deadline; - grpc_timer alarm; - grpc_closure on_alarm; - char *addr_name; - int refs; - grpc_closure on_connect; - grpc_endpoint **endpoint; - grpc_channel_args *channel_args; -} async_connect; - -static void async_connect_unlock_and_cleanup(grpc_exec_ctx *exec_ctx, - async_connect *ac, - grpc_winsocket *socket) { - int done = (--ac->refs == 0); - gpr_mu_unlock(&ac->mu); - if (done) { - grpc_channel_args_destroy(exec_ctx, ac->channel_args); - gpr_mu_destroy(&ac->mu); - gpr_free(ac->addr_name); - gpr_free(ac); - } - if (socket != NULL) grpc_winsocket_destroy(socket); -} - -static void on_alarm(grpc_exec_ctx *exec_ctx, void *acp, grpc_error *error) { - async_connect *ac = acp; - gpr_mu_lock(&ac->mu); - grpc_winsocket *socket = ac->socket; - ac->socket = NULL; - if (socket != NULL) { - grpc_winsocket_shutdown(socket); - } - async_connect_unlock_and_cleanup(exec_ctx, ac, socket); -} - -static void on_connect(grpc_exec_ctx *exec_ctx, void *acp, grpc_error *error) { - async_connect *ac = acp; - grpc_endpoint **ep = ac->endpoint; - GPR_ASSERT(*ep == NULL); - grpc_closure *on_done = ac->on_done; - - GRPC_ERROR_REF(error); - - gpr_mu_lock(&ac->mu); - grpc_winsocket *socket = ac->socket; - ac->socket = NULL; - gpr_mu_unlock(&ac->mu); - - grpc_timer_cancel(exec_ctx, &ac->alarm); - - gpr_mu_lock(&ac->mu); - - if (error == GRPC_ERROR_NONE) { - if (socket != NULL) { - DWORD transfered_bytes = 0; - DWORD flags; - BOOL wsa_success = - WSAGetOverlappedResult(socket->socket, &socket->write_info.overlapped, - &transfered_bytes, FALSE, &flags); - GPR_ASSERT(transfered_bytes == 0); - if (!wsa_success) { - error = GRPC_WSA_ERROR(WSAGetLastError(), "ConnectEx"); - } else { - *ep = - grpc_tcp_create(exec_ctx, socket, ac->channel_args, ac->addr_name); - socket = NULL; - } - } else { - error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("socket is null"); - } - } - - async_connect_unlock_and_cleanup(exec_ctx, ac, socket); - /* If the connection was aborted, the callback was already called when - the deadline was met. */ - GRPC_CLOSURE_SCHED(exec_ctx, on_done, error); -} - -/* Tries to issue one async connection, then schedules both an IOCP - notification request for the connection, and one timeout alert. */ -static void tcp_client_connect_impl( - grpc_exec_ctx *exec_ctx, grpc_closure *on_done, grpc_endpoint **endpoint, - grpc_pollset_set *interested_parties, const grpc_channel_args *channel_args, - const grpc_resolved_address *addr, gpr_timespec deadline) { - SOCKET sock = INVALID_SOCKET; - BOOL success; - int status; - grpc_resolved_address addr6_v4mapped; - grpc_resolved_address local_address; - async_connect *ac; - grpc_winsocket *socket = NULL; - LPFN_CONNECTEX ConnectEx; - GUID guid = WSAID_CONNECTEX; - DWORD ioctl_num_bytes; - grpc_winsocket_callback_info *info; - grpc_error *error = GRPC_ERROR_NONE; - - *endpoint = NULL; - - /* Use dualstack sockets where available. */ - if (grpc_sockaddr_to_v4mapped(addr, &addr6_v4mapped)) { - addr = &addr6_v4mapped; - } - - sock = WSASocket(AF_INET6, SOCK_STREAM, IPPROTO_TCP, NULL, 0, - WSA_FLAG_OVERLAPPED); - if (sock == INVALID_SOCKET) { - error = GRPC_WSA_ERROR(WSAGetLastError(), "WSASocket"); - goto failure; - } - - error = grpc_tcp_prepare_socket(sock); - if (error != GRPC_ERROR_NONE) { - goto failure; - } - - /* Grab the function pointer for ConnectEx for that specific socket. - It may change depending on the interface. */ - status = - WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER, &guid, sizeof(guid), - &ConnectEx, sizeof(ConnectEx), &ioctl_num_bytes, NULL, NULL); - - if (status != 0) { - error = GRPC_WSA_ERROR(WSAGetLastError(), - "WSAIoctl(SIO_GET_EXTENSION_FUNCTION_POINTER)"); - goto failure; - } - - grpc_sockaddr_make_wildcard6(0, &local_address); - - status = bind(sock, (struct sockaddr *)&local_address.addr, - (int)local_address.len); - if (status != 0) { - error = GRPC_WSA_ERROR(WSAGetLastError(), "bind"); - goto failure; - } - - socket = grpc_winsocket_create(sock, "client"); - info = &socket->write_info; - success = ConnectEx(sock, (struct sockaddr *)&addr->addr, (int)addr->len, - NULL, 0, NULL, &info->overlapped); - - /* It wouldn't be unusual to get a success immediately. But we'll still get - an IOCP notification, so let's ignore it. */ - if (!success) { - int last_error = WSAGetLastError(); - if (last_error != ERROR_IO_PENDING) { - error = GRPC_WSA_ERROR(last_error, "ConnectEx"); - goto failure; - } - } - - ac = gpr_malloc(sizeof(async_connect)); - ac->on_done = on_done; - ac->socket = socket; - gpr_mu_init(&ac->mu); - ac->refs = 2; - ac->addr_name = grpc_sockaddr_to_uri(addr); - ac->endpoint = endpoint; - ac->channel_args = grpc_channel_args_copy(channel_args); - GRPC_CLOSURE_INIT(&ac->on_connect, on_connect, ac, grpc_schedule_on_exec_ctx); - - GRPC_CLOSURE_INIT(&ac->on_alarm, on_alarm, ac, grpc_schedule_on_exec_ctx); - grpc_timer_init(exec_ctx, &ac->alarm, deadline, &ac->on_alarm, - gpr_now(GPR_CLOCK_MONOTONIC)); - grpc_socket_notify_on_write(exec_ctx, socket, &ac->on_connect); - return; - -failure: - GPR_ASSERT(error != GRPC_ERROR_NONE); - char *target_uri = grpc_sockaddr_to_uri(addr); - grpc_error *final_error = grpc_error_set_str( - GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING("Failed to connect", - &error, 1), - GRPC_ERROR_STR_TARGET_ADDRESS, grpc_slice_from_copied_string(target_uri)); - GRPC_ERROR_UNREF(error); - if (socket != NULL) { - grpc_winsocket_destroy(socket); - } else if (sock != INVALID_SOCKET) { - closesocket(sock); - } - GRPC_CLOSURE_SCHED(exec_ctx, on_done, final_error); -} - -// overridden by api_fuzzer.c -void (*grpc_tcp_client_connect_impl)( - grpc_exec_ctx *exec_ctx, grpc_closure *closure, grpc_endpoint **ep, - grpc_pollset_set *interested_parties, const grpc_channel_args *channel_args, - const grpc_resolved_address *addr, - gpr_timespec deadline) = tcp_client_connect_impl; - -void grpc_tcp_client_connect(grpc_exec_ctx *exec_ctx, grpc_closure *closure, - grpc_endpoint **ep, - grpc_pollset_set *interested_parties, - const grpc_channel_args *channel_args, - const grpc_resolved_address *addr, - gpr_timespec deadline) { - grpc_tcp_client_connect_impl(exec_ctx, closure, ep, interested_parties, - channel_args, addr, deadline); -} - -#endif /* GRPC_WINSOCK_SOCKET */ diff --git a/src/core/lib/iomgr/tcp_client_windows.cc b/src/core/lib/iomgr/tcp_client_windows.cc new file mode 100644 index 0000000000..fc62105cc9 --- /dev/null +++ b/src/core/lib/iomgr/tcp_client_windows.cc @@ -0,0 +1,245 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_WINSOCK_SOCKET + +#include "src/core/lib/iomgr/sockaddr_windows.h" + +#include +#include +#include +#include +#include + +#include "src/core/lib/channel/channel_args.h" +#include "src/core/lib/iomgr/iocp_windows.h" +#include "src/core/lib/iomgr/sockaddr.h" +#include "src/core/lib/iomgr/sockaddr_utils.h" +#include "src/core/lib/iomgr/socket_windows.h" +#include "src/core/lib/iomgr/tcp_client.h" +#include "src/core/lib/iomgr/tcp_windows.h" +#include "src/core/lib/iomgr/timer.h" + +typedef struct { + grpc_closure *on_done; + gpr_mu mu; + grpc_winsocket *socket; + gpr_timespec deadline; + grpc_timer alarm; + grpc_closure on_alarm; + char *addr_name; + int refs; + grpc_closure on_connect; + grpc_endpoint **endpoint; + grpc_channel_args *channel_args; +} async_connect; + +static void async_connect_unlock_and_cleanup(grpc_exec_ctx *exec_ctx, + async_connect *ac, + grpc_winsocket *socket) { + int done = (--ac->refs == 0); + gpr_mu_unlock(&ac->mu); + if (done) { + grpc_channel_args_destroy(exec_ctx, ac->channel_args); + gpr_mu_destroy(&ac->mu); + gpr_free(ac->addr_name); + gpr_free(ac); + } + if (socket != NULL) grpc_winsocket_destroy(socket); +} + +static void on_alarm(grpc_exec_ctx *exec_ctx, void *acp, grpc_error *error) { + async_connect *ac = acp; + gpr_mu_lock(&ac->mu); + grpc_winsocket *socket = ac->socket; + ac->socket = NULL; + if (socket != NULL) { + grpc_winsocket_shutdown(socket); + } + async_connect_unlock_and_cleanup(exec_ctx, ac, socket); +} + +static void on_connect(grpc_exec_ctx *exec_ctx, void *acp, grpc_error *error) { + async_connect *ac = acp; + grpc_endpoint **ep = ac->endpoint; + GPR_ASSERT(*ep == NULL); + grpc_closure *on_done = ac->on_done; + + GRPC_ERROR_REF(error); + + gpr_mu_lock(&ac->mu); + grpc_winsocket *socket = ac->socket; + ac->socket = NULL; + gpr_mu_unlock(&ac->mu); + + grpc_timer_cancel(exec_ctx, &ac->alarm); + + gpr_mu_lock(&ac->mu); + + if (error == GRPC_ERROR_NONE) { + if (socket != NULL) { + DWORD transfered_bytes = 0; + DWORD flags; + BOOL wsa_success = + WSAGetOverlappedResult(socket->socket, &socket->write_info.overlapped, + &transfered_bytes, FALSE, &flags); + GPR_ASSERT(transfered_bytes == 0); + if (!wsa_success) { + error = GRPC_WSA_ERROR(WSAGetLastError(), "ConnectEx"); + } else { + *ep = + grpc_tcp_create(exec_ctx, socket, ac->channel_args, ac->addr_name); + socket = NULL; + } + } else { + error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("socket is null"); + } + } + + async_connect_unlock_and_cleanup(exec_ctx, ac, socket); + /* If the connection was aborted, the callback was already called when + the deadline was met. */ + GRPC_CLOSURE_SCHED(exec_ctx, on_done, error); +} + +/* Tries to issue one async connection, then schedules both an IOCP + notification request for the connection, and one timeout alert. */ +static void tcp_client_connect_impl( + grpc_exec_ctx *exec_ctx, grpc_closure *on_done, grpc_endpoint **endpoint, + grpc_pollset_set *interested_parties, const grpc_channel_args *channel_args, + const grpc_resolved_address *addr, gpr_timespec deadline) { + SOCKET sock = INVALID_SOCKET; + BOOL success; + int status; + grpc_resolved_address addr6_v4mapped; + grpc_resolved_address local_address; + async_connect *ac; + grpc_winsocket *socket = NULL; + LPFN_CONNECTEX ConnectEx; + GUID guid = WSAID_CONNECTEX; + DWORD ioctl_num_bytes; + grpc_winsocket_callback_info *info; + grpc_error *error = GRPC_ERROR_NONE; + + *endpoint = NULL; + + /* Use dualstack sockets where available. */ + if (grpc_sockaddr_to_v4mapped(addr, &addr6_v4mapped)) { + addr = &addr6_v4mapped; + } + + sock = WSASocket(AF_INET6, SOCK_STREAM, IPPROTO_TCP, NULL, 0, + WSA_FLAG_OVERLAPPED); + if (sock == INVALID_SOCKET) { + error = GRPC_WSA_ERROR(WSAGetLastError(), "WSASocket"); + goto failure; + } + + error = grpc_tcp_prepare_socket(sock); + if (error != GRPC_ERROR_NONE) { + goto failure; + } + + /* Grab the function pointer for ConnectEx for that specific socket. + It may change depending on the interface. */ + status = + WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER, &guid, sizeof(guid), + &ConnectEx, sizeof(ConnectEx), &ioctl_num_bytes, NULL, NULL); + + if (status != 0) { + error = GRPC_WSA_ERROR(WSAGetLastError(), + "WSAIoctl(SIO_GET_EXTENSION_FUNCTION_POINTER)"); + goto failure; + } + + grpc_sockaddr_make_wildcard6(0, &local_address); + + status = bind(sock, (struct sockaddr *)&local_address.addr, + (int)local_address.len); + if (status != 0) { + error = GRPC_WSA_ERROR(WSAGetLastError(), "bind"); + goto failure; + } + + socket = grpc_winsocket_create(sock, "client"); + info = &socket->write_info; + success = ConnectEx(sock, (struct sockaddr *)&addr->addr, (int)addr->len, + NULL, 0, NULL, &info->overlapped); + + /* It wouldn't be unusual to get a success immediately. But we'll still get + an IOCP notification, so let's ignore it. */ + if (!success) { + int last_error = WSAGetLastError(); + if (last_error != ERROR_IO_PENDING) { + error = GRPC_WSA_ERROR(last_error, "ConnectEx"); + goto failure; + } + } + + ac = gpr_malloc(sizeof(async_connect)); + ac->on_done = on_done; + ac->socket = socket; + gpr_mu_init(&ac->mu); + ac->refs = 2; + ac->addr_name = grpc_sockaddr_to_uri(addr); + ac->endpoint = endpoint; + ac->channel_args = grpc_channel_args_copy(channel_args); + GRPC_CLOSURE_INIT(&ac->on_connect, on_connect, ac, grpc_schedule_on_exec_ctx); + + GRPC_CLOSURE_INIT(&ac->on_alarm, on_alarm, ac, grpc_schedule_on_exec_ctx); + grpc_timer_init(exec_ctx, &ac->alarm, deadline, &ac->on_alarm, + gpr_now(GPR_CLOCK_MONOTONIC)); + grpc_socket_notify_on_write(exec_ctx, socket, &ac->on_connect); + return; + +failure: + GPR_ASSERT(error != GRPC_ERROR_NONE); + char *target_uri = grpc_sockaddr_to_uri(addr); + grpc_error *final_error = grpc_error_set_str( + GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING("Failed to connect", + &error, 1), + GRPC_ERROR_STR_TARGET_ADDRESS, grpc_slice_from_copied_string(target_uri)); + GRPC_ERROR_UNREF(error); + if (socket != NULL) { + grpc_winsocket_destroy(socket); + } else if (sock != INVALID_SOCKET) { + closesocket(sock); + } + GRPC_CLOSURE_SCHED(exec_ctx, on_done, final_error); +} + +// overridden by api_fuzzer.c +void (*grpc_tcp_client_connect_impl)( + grpc_exec_ctx *exec_ctx, grpc_closure *closure, grpc_endpoint **ep, + grpc_pollset_set *interested_parties, const grpc_channel_args *channel_args, + const grpc_resolved_address *addr, + gpr_timespec deadline) = tcp_client_connect_impl; + +void grpc_tcp_client_connect(grpc_exec_ctx *exec_ctx, grpc_closure *closure, + grpc_endpoint **ep, + grpc_pollset_set *interested_parties, + const grpc_channel_args *channel_args, + const grpc_resolved_address *addr, + gpr_timespec deadline) { + grpc_tcp_client_connect_impl(exec_ctx, closure, ep, interested_parties, + channel_args, addr, deadline); +} + +#endif /* GRPC_WINSOCK_SOCKET */ diff --git a/src/core/lib/iomgr/tcp_posix.c b/src/core/lib/iomgr/tcp_posix.c deleted file mode 100644 index 7e271294fd..0000000000 --- a/src/core/lib/iomgr/tcp_posix.c +++ /dev/null @@ -1,819 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_POSIX_SOCKET - -#include "src/core/lib/iomgr/network_status_tracker.h" -#include "src/core/lib/iomgr/tcp_posix.h" - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "src/core/lib/channel/channel_args.h" -#include "src/core/lib/debug/stats.h" -#include "src/core/lib/debug/trace.h" -#include "src/core/lib/iomgr/ev_posix.h" -#include "src/core/lib/iomgr/executor.h" -#include "src/core/lib/profiling/timers.h" -#include "src/core/lib/slice/slice_internal.h" -#include "src/core/lib/slice/slice_string_helpers.h" -#include "src/core/lib/support/string.h" - -#ifdef GRPC_HAVE_MSG_NOSIGNAL -#define SENDMSG_FLAGS MSG_NOSIGNAL -#else -#define SENDMSG_FLAGS 0 -#endif - -#ifdef GRPC_MSG_IOVLEN_TYPE -typedef GRPC_MSG_IOVLEN_TYPE msg_iovlen_type; -#else -typedef size_t msg_iovlen_type; -#endif - -grpc_tracer_flag grpc_tcp_trace = GRPC_TRACER_INITIALIZER(false, "tcp"); - -typedef struct { - grpc_endpoint base; - grpc_fd *em_fd; - int fd; - bool finished_edge; - double target_length; - double bytes_read_this_round; - gpr_refcount refcount; - gpr_atm shutdown_count; - - int min_read_chunk_size; - int max_read_chunk_size; - - /* garbage after the last read */ - grpc_slice_buffer last_read_buffer; - - grpc_slice_buffer *incoming_buffer; - grpc_slice_buffer *outgoing_buffer; - /** slice within outgoing_buffer to write next */ - size_t outgoing_slice_idx; - /** byte within outgoing_buffer->slices[outgoing_slice_idx] to write next */ - size_t outgoing_byte_idx; - - grpc_closure *read_cb; - grpc_closure *write_cb; - grpc_closure *release_fd_cb; - int *release_fd; - - grpc_closure read_done_closure; - grpc_closure write_done_closure; - - char *peer_string; - - grpc_resource_user *resource_user; - grpc_resource_user_slice_allocator slice_allocator; -} grpc_tcp; - -typedef struct backup_poller { - gpr_mu *pollset_mu; - grpc_closure run_poller; -} backup_poller; - -#define BACKUP_POLLER_POLLSET(b) ((grpc_pollset *)((b) + 1)) - -static gpr_atm g_uncovered_notifications_pending; -static gpr_atm g_backup_poller; /* backup_poller* */ - -static void tcp_handle_read(grpc_exec_ctx *exec_ctx, void *arg /* grpc_tcp */, - grpc_error *error); -static void tcp_handle_write(grpc_exec_ctx *exec_ctx, void *arg /* grpc_tcp */, - grpc_error *error); -static void tcp_drop_uncovered_then_handle_write(grpc_exec_ctx *exec_ctx, - void *arg /* grpc_tcp */, - grpc_error *error); - -static void done_poller(grpc_exec_ctx *exec_ctx, void *bp, - grpc_error *error_ignored) { - backup_poller *p = (backup_poller *)bp; - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "BACKUP_POLLER:%p destroy", p); - } - grpc_pollset_destroy(exec_ctx, BACKUP_POLLER_POLLSET(p)); - gpr_free(p); -} - -static void run_poller(grpc_exec_ctx *exec_ctx, void *bp, - grpc_error *error_ignored) { - backup_poller *p = (backup_poller *)bp; - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "BACKUP_POLLER:%p run", p); - } - gpr_mu_lock(p->pollset_mu); - gpr_timespec now = gpr_now(GPR_CLOCK_MONOTONIC); - gpr_timespec deadline = - gpr_time_add(now, gpr_time_from_seconds(10, GPR_TIMESPAN)); - GRPC_STATS_INC_TCP_BACKUP_POLLER_POLLS(exec_ctx); - GRPC_LOG_IF_ERROR("backup_poller:pollset_work", - grpc_pollset_work(exec_ctx, BACKUP_POLLER_POLLSET(p), NULL, - now, deadline)); - gpr_mu_unlock(p->pollset_mu); - /* last "uncovered" notification is the ref that keeps us polling, if we get - * there try a cas to release it */ - if (gpr_atm_no_barrier_load(&g_uncovered_notifications_pending) == 1 && - gpr_atm_full_cas(&g_uncovered_notifications_pending, 1, 0)) { - gpr_mu_lock(p->pollset_mu); - bool cas_ok = gpr_atm_full_cas(&g_backup_poller, (gpr_atm)p, 0); - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "BACKUP_POLLER:%p done cas_ok=%d", p, cas_ok); - } - gpr_mu_unlock(p->pollset_mu); - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "BACKUP_POLLER:%p shutdown", p); - } - grpc_pollset_shutdown(exec_ctx, BACKUP_POLLER_POLLSET(p), - GRPC_CLOSURE_INIT(&p->run_poller, done_poller, p, - grpc_schedule_on_exec_ctx)); - } else { - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "BACKUP_POLLER:%p reschedule", p); - } - GRPC_CLOSURE_SCHED(exec_ctx, &p->run_poller, GRPC_ERROR_NONE); - } -} - -static void drop_uncovered(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) { - backup_poller *p = (backup_poller *)gpr_atm_acq_load(&g_backup_poller); - gpr_atm old_count = - gpr_atm_no_barrier_fetch_add(&g_uncovered_notifications_pending, -1); - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "BACKUP_POLLER:%p uncover cnt %d->%d", p, (int)old_count, - (int)old_count - 1); - } - GPR_ASSERT(old_count != 1); -} - -static void cover_self(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) { - backup_poller *p; - gpr_atm old_count = - gpr_atm_no_barrier_fetch_add(&g_uncovered_notifications_pending, 2); - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "BACKUP_POLLER: cover cnt %d->%d", (int)old_count, - 2 + (int)old_count); - } - if (old_count == 0) { - GRPC_STATS_INC_TCP_BACKUP_POLLERS_CREATED(exec_ctx); - p = (backup_poller *)gpr_malloc(sizeof(*p) + grpc_pollset_size()); - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "BACKUP_POLLER:%p create", p); - } - grpc_pollset_init(BACKUP_POLLER_POLLSET(p), &p->pollset_mu); - gpr_atm_rel_store(&g_backup_poller, (gpr_atm)p); - GRPC_CLOSURE_SCHED( - exec_ctx, - GRPC_CLOSURE_INIT(&p->run_poller, run_poller, p, - grpc_executor_scheduler(GRPC_EXECUTOR_LONG)), - GRPC_ERROR_NONE); - } else { - while ((p = (backup_poller *)gpr_atm_acq_load(&g_backup_poller)) == NULL) { - // spin waiting for backup poller - } - } - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "BACKUP_POLLER:%p add %p", p, tcp); - } - grpc_pollset_add_fd(exec_ctx, BACKUP_POLLER_POLLSET(p), tcp->em_fd); - if (old_count != 0) { - drop_uncovered(exec_ctx, tcp); - } -} - -static void notify_on_read(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) { - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "TCP:%p notify_on_read", tcp); - } - GRPC_CLOSURE_INIT(&tcp->read_done_closure, tcp_handle_read, tcp, - grpc_schedule_on_exec_ctx); - grpc_fd_notify_on_read(exec_ctx, tcp->em_fd, &tcp->read_done_closure); -} - -static void notify_on_write(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) { - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "TCP:%p notify_on_write", tcp); - } - cover_self(exec_ctx, tcp); - GRPC_CLOSURE_INIT(&tcp->write_done_closure, - tcp_drop_uncovered_then_handle_write, tcp, - grpc_schedule_on_exec_ctx); - grpc_fd_notify_on_write(exec_ctx, tcp->em_fd, &tcp->write_done_closure); -} - -static void tcp_drop_uncovered_then_handle_write(grpc_exec_ctx *exec_ctx, - void *arg, grpc_error *error) { - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "TCP:%p got_write: %s", arg, grpc_error_string(error)); - } - drop_uncovered(exec_ctx, (grpc_tcp *)arg); - tcp_handle_write(exec_ctx, arg, error); -} - -static void add_to_estimate(grpc_tcp *tcp, size_t bytes) { - tcp->bytes_read_this_round += (double)bytes; -} - -static void finish_estimate(grpc_tcp *tcp) { - /* If we read >80% of the target buffer in one read loop, increase the size - of the target buffer to either the amount read, or twice its previous - value */ - if (tcp->bytes_read_this_round > tcp->target_length * 0.8) { - tcp->target_length = - GPR_MAX(2 * tcp->target_length, tcp->bytes_read_this_round); - } else { - tcp->target_length = - 0.99 * tcp->target_length + 0.01 * tcp->bytes_read_this_round; - } - tcp->bytes_read_this_round = 0; -} - -static size_t get_target_read_size(grpc_tcp *tcp) { - grpc_resource_quota *rq = grpc_resource_user_quota(tcp->resource_user); - double pressure = grpc_resource_quota_get_memory_pressure(rq); - double target = - tcp->target_length * (pressure > 0.8 ? (1.0 - pressure) / 0.2 : 1.0); - size_t sz = (((size_t)GPR_CLAMP(target, tcp->min_read_chunk_size, - tcp->max_read_chunk_size)) + - 255) & - ~(size_t)255; - /* don't use more than 1/16th of the overall resource quota for a single read - * alloc */ - size_t rqmax = grpc_resource_quota_peek_size(rq); - if (sz > rqmax / 16 && rqmax > 1024) { - sz = rqmax / 16; - } - return sz; -} - -static grpc_error *tcp_annotate_error(grpc_error *src_error, grpc_tcp *tcp) { - return grpc_error_set_str( - grpc_error_set_int(src_error, GRPC_ERROR_INT_FD, tcp->fd), - GRPC_ERROR_STR_TARGET_ADDRESS, - grpc_slice_from_copied_string(tcp->peer_string)); -} - -static void tcp_handle_read(grpc_exec_ctx *exec_ctx, void *arg /* grpc_tcp */, - grpc_error *error); -static void tcp_handle_write(grpc_exec_ctx *exec_ctx, void *arg /* grpc_tcp */, - grpc_error *error); - -static void tcp_shutdown(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, - grpc_error *why) { - grpc_tcp *tcp = (grpc_tcp *)ep; - grpc_fd_shutdown(exec_ctx, tcp->em_fd, why); - grpc_resource_user_shutdown(exec_ctx, tcp->resource_user); -} - -static void tcp_free(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) { - grpc_fd_orphan(exec_ctx, tcp->em_fd, tcp->release_fd_cb, tcp->release_fd, - false /* already_closed */, "tcp_unref_orphan"); - grpc_slice_buffer_destroy_internal(exec_ctx, &tcp->last_read_buffer); - grpc_resource_user_unref(exec_ctx, tcp->resource_user); - gpr_free(tcp->peer_string); - gpr_free(tcp); -} - -#ifndef NDEBUG -#define TCP_UNREF(cl, tcp, reason) \ - tcp_unref((cl), (tcp), (reason), __FILE__, __LINE__) -#define TCP_REF(tcp, reason) tcp_ref((tcp), (reason), __FILE__, __LINE__) -static void tcp_unref(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp, - const char *reason, const char *file, int line) { - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_atm val = gpr_atm_no_barrier_load(&tcp->refcount.count); - gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG, - "TCP unref %p : %s %" PRIdPTR " -> %" PRIdPTR, tcp, reason, val, - val - 1); - } - if (gpr_unref(&tcp->refcount)) { - tcp_free(exec_ctx, tcp); - } -} - -static void tcp_ref(grpc_tcp *tcp, const char *reason, const char *file, - int line) { - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_atm val = gpr_atm_no_barrier_load(&tcp->refcount.count); - gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG, - "TCP ref %p : %s %" PRIdPTR " -> %" PRIdPTR, tcp, reason, val, - val + 1); - } - gpr_ref(&tcp->refcount); -} -#else -#define TCP_UNREF(cl, tcp, reason) tcp_unref((cl), (tcp)) -#define TCP_REF(tcp, reason) tcp_ref((tcp)) -static void tcp_unref(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) { - if (gpr_unref(&tcp->refcount)) { - tcp_free(exec_ctx, tcp); - } -} - -static void tcp_ref(grpc_tcp *tcp) { gpr_ref(&tcp->refcount); } -#endif - -static void tcp_destroy(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep) { - grpc_network_status_unregister_endpoint(ep); - grpc_tcp *tcp = (grpc_tcp *)ep; - grpc_slice_buffer_reset_and_unref_internal(exec_ctx, &tcp->last_read_buffer); - TCP_UNREF(exec_ctx, tcp, "destroy"); -} - -static void call_read_cb(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp, - grpc_error *error) { - grpc_closure *cb = tcp->read_cb; - - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "TCP:%p call_cb %p %p:%p", tcp, cb, cb->cb, cb->cb_arg); - size_t i; - const char *str = grpc_error_string(error); - gpr_log(GPR_DEBUG, "read: error=%s", str); - - for (i = 0; i < tcp->incoming_buffer->count; i++) { - char *dump = grpc_dump_slice(tcp->incoming_buffer->slices[i], - GPR_DUMP_HEX | GPR_DUMP_ASCII); - gpr_log(GPR_DEBUG, "READ %p (peer=%s): %s", tcp, tcp->peer_string, dump); - gpr_free(dump); - } - } - - tcp->read_cb = NULL; - tcp->incoming_buffer = NULL; - GRPC_CLOSURE_RUN(exec_ctx, cb, error); -} - -#define MAX_READ_IOVEC 4 -static void tcp_do_read(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) { - struct msghdr msg; - struct iovec iov[MAX_READ_IOVEC]; - ssize_t read_bytes; - size_t i; - - GPR_ASSERT(!tcp->finished_edge); - GPR_ASSERT(tcp->incoming_buffer->count <= MAX_READ_IOVEC); - GPR_TIMER_BEGIN("tcp_continue_read", 0); - - for (i = 0; i < tcp->incoming_buffer->count; i++) { - iov[i].iov_base = GRPC_SLICE_START_PTR(tcp->incoming_buffer->slices[i]); - iov[i].iov_len = GRPC_SLICE_LENGTH(tcp->incoming_buffer->slices[i]); - } - - msg.msg_name = NULL; - msg.msg_namelen = 0; - msg.msg_iov = iov; - msg.msg_iovlen = (msg_iovlen_type)tcp->incoming_buffer->count; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; - - GRPC_STATS_INC_TCP_READ_OFFER(exec_ctx, tcp->incoming_buffer->length); - GRPC_STATS_INC_TCP_READ_OFFER_IOV_SIZE(exec_ctx, tcp->incoming_buffer->count); - - GPR_TIMER_BEGIN("recvmsg", 0); - do { - GRPC_STATS_INC_SYSCALL_READ(exec_ctx); - read_bytes = recvmsg(tcp->fd, &msg, 0); - } while (read_bytes < 0 && errno == EINTR); - GPR_TIMER_END("recvmsg", read_bytes >= 0); - - if (read_bytes < 0) { - /* NB: After calling call_read_cb a parallel call of the read handler may - * be running. */ - if (errno == EAGAIN) { - finish_estimate(tcp); - /* We've consumed the edge, request a new one */ - notify_on_read(exec_ctx, tcp); - } else { - grpc_slice_buffer_reset_and_unref_internal(exec_ctx, - tcp->incoming_buffer); - call_read_cb(exec_ctx, tcp, - tcp_annotate_error(GRPC_OS_ERROR(errno, "recvmsg"), tcp)); - TCP_UNREF(exec_ctx, tcp, "read"); - } - } else if (read_bytes == 0) { - /* 0 read size ==> end of stream */ - grpc_slice_buffer_reset_and_unref_internal(exec_ctx, tcp->incoming_buffer); - call_read_cb( - exec_ctx, tcp, - tcp_annotate_error( - GRPC_ERROR_CREATE_FROM_STATIC_STRING("Socket closed"), tcp)); - TCP_UNREF(exec_ctx, tcp, "read"); - } else { - GRPC_STATS_INC_TCP_READ_SIZE(exec_ctx, read_bytes); - add_to_estimate(tcp, (size_t)read_bytes); - GPR_ASSERT((size_t)read_bytes <= tcp->incoming_buffer->length); - if ((size_t)read_bytes < tcp->incoming_buffer->length) { - grpc_slice_buffer_trim_end( - tcp->incoming_buffer, - tcp->incoming_buffer->length - (size_t)read_bytes, - &tcp->last_read_buffer); - } - GPR_ASSERT((size_t)read_bytes == tcp->incoming_buffer->length); - call_read_cb(exec_ctx, tcp, GRPC_ERROR_NONE); - TCP_UNREF(exec_ctx, tcp, "read"); - } - - GPR_TIMER_END("tcp_continue_read", 0); -} - -static void tcp_read_allocation_done(grpc_exec_ctx *exec_ctx, void *tcpp, - grpc_error *error) { - grpc_tcp *tcp = (grpc_tcp *)tcpp; - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "TCP:%p read_allocation_done: %s", tcp, - grpc_error_string(error)); - } - if (error != GRPC_ERROR_NONE) { - grpc_slice_buffer_reset_and_unref_internal(exec_ctx, tcp->incoming_buffer); - grpc_slice_buffer_reset_and_unref_internal(exec_ctx, - &tcp->last_read_buffer); - call_read_cb(exec_ctx, tcp, GRPC_ERROR_REF(error)); - TCP_UNREF(exec_ctx, tcp, "read"); - } else { - tcp_do_read(exec_ctx, tcp); - } -} - -static void tcp_continue_read(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) { - size_t target_read_size = get_target_read_size(tcp); - if (tcp->incoming_buffer->length < target_read_size && - tcp->incoming_buffer->count < MAX_READ_IOVEC) { - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "TCP:%p alloc_slices", tcp); - } - grpc_resource_user_alloc_slices(exec_ctx, &tcp->slice_allocator, - target_read_size, 1, tcp->incoming_buffer); - } else { - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "TCP:%p do_read", tcp); - } - tcp_do_read(exec_ctx, tcp); - } -} - -static void tcp_handle_read(grpc_exec_ctx *exec_ctx, void *arg /* grpc_tcp */, - grpc_error *error) { - grpc_tcp *tcp = (grpc_tcp *)arg; - GPR_ASSERT(!tcp->finished_edge); - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "TCP:%p got_read: %s", tcp, grpc_error_string(error)); - } - - if (error != GRPC_ERROR_NONE) { - grpc_slice_buffer_reset_and_unref_internal(exec_ctx, tcp->incoming_buffer); - grpc_slice_buffer_reset_and_unref_internal(exec_ctx, - &tcp->last_read_buffer); - call_read_cb(exec_ctx, tcp, GRPC_ERROR_REF(error)); - TCP_UNREF(exec_ctx, tcp, "read"); - } else { - tcp_continue_read(exec_ctx, tcp); - } -} - -static void tcp_read(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, - grpc_slice_buffer *incoming_buffer, grpc_closure *cb) { - grpc_tcp *tcp = (grpc_tcp *)ep; - GPR_ASSERT(tcp->read_cb == NULL); - tcp->read_cb = cb; - tcp->incoming_buffer = incoming_buffer; - grpc_slice_buffer_reset_and_unref_internal(exec_ctx, incoming_buffer); - grpc_slice_buffer_swap(incoming_buffer, &tcp->last_read_buffer); - TCP_REF(tcp, "read"); - if (tcp->finished_edge) { - tcp->finished_edge = false; - notify_on_read(exec_ctx, tcp); - } else { - GRPC_CLOSURE_SCHED(exec_ctx, &tcp->read_done_closure, GRPC_ERROR_NONE); - } -} - -/* returns true if done, false if pending; if returning true, *error is set */ -#define MAX_WRITE_IOVEC 1000 -static bool tcp_flush(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp, - grpc_error **error) { - struct msghdr msg; - struct iovec iov[MAX_WRITE_IOVEC]; - msg_iovlen_type iov_size; - ssize_t sent_length; - size_t sending_length; - size_t trailing; - size_t unwind_slice_idx; - size_t unwind_byte_idx; - - for (;;) { - sending_length = 0; - unwind_slice_idx = tcp->outgoing_slice_idx; - unwind_byte_idx = tcp->outgoing_byte_idx; - for (iov_size = 0; tcp->outgoing_slice_idx != tcp->outgoing_buffer->count && - iov_size != MAX_WRITE_IOVEC; - iov_size++) { - iov[iov_size].iov_base = - GRPC_SLICE_START_PTR( - tcp->outgoing_buffer->slices[tcp->outgoing_slice_idx]) + - tcp->outgoing_byte_idx; - iov[iov_size].iov_len = - GRPC_SLICE_LENGTH( - tcp->outgoing_buffer->slices[tcp->outgoing_slice_idx]) - - tcp->outgoing_byte_idx; - sending_length += iov[iov_size].iov_len; - tcp->outgoing_slice_idx++; - tcp->outgoing_byte_idx = 0; - } - GPR_ASSERT(iov_size > 0); - - msg.msg_name = NULL; - msg.msg_namelen = 0; - msg.msg_iov = iov; - msg.msg_iovlen = iov_size; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; - - GRPC_STATS_INC_TCP_WRITE_SIZE(exec_ctx, sending_length); - GRPC_STATS_INC_TCP_WRITE_IOV_SIZE(exec_ctx, iov_size); - - GPR_TIMER_BEGIN("sendmsg", 1); - do { - /* TODO(klempner): Cork if this is a partial write */ - GRPC_STATS_INC_SYSCALL_WRITE(exec_ctx); - sent_length = sendmsg(tcp->fd, &msg, SENDMSG_FLAGS); - } while (sent_length < 0 && errno == EINTR); - GPR_TIMER_END("sendmsg", 0); - - if (sent_length < 0) { - if (errno == EAGAIN) { - tcp->outgoing_slice_idx = unwind_slice_idx; - tcp->outgoing_byte_idx = unwind_byte_idx; - return false; - } else if (errno == EPIPE) { - *error = grpc_error_set_int(GRPC_OS_ERROR(errno, "sendmsg"), - GRPC_ERROR_INT_GRPC_STATUS, - GRPC_STATUS_UNAVAILABLE); - return true; - } else { - *error = tcp_annotate_error(GRPC_OS_ERROR(errno, "sendmsg"), tcp); - return true; - } - } - - GPR_ASSERT(tcp->outgoing_byte_idx == 0); - trailing = sending_length - (size_t)sent_length; - while (trailing > 0) { - size_t slice_length; - - tcp->outgoing_slice_idx--; - slice_length = GRPC_SLICE_LENGTH( - tcp->outgoing_buffer->slices[tcp->outgoing_slice_idx]); - if (slice_length > trailing) { - tcp->outgoing_byte_idx = slice_length - trailing; - break; - } else { - trailing -= slice_length; - } - } - - if (tcp->outgoing_slice_idx == tcp->outgoing_buffer->count) { - *error = GRPC_ERROR_NONE; - return true; - } - }; -} - -static void tcp_handle_write(grpc_exec_ctx *exec_ctx, void *arg /* grpc_tcp */, - grpc_error *error) { - grpc_tcp *tcp = (grpc_tcp *)arg; - grpc_closure *cb; - - if (error != GRPC_ERROR_NONE) { - cb = tcp->write_cb; - tcp->write_cb = NULL; - cb->cb(exec_ctx, cb->cb_arg, error); - TCP_UNREF(exec_ctx, tcp, "write"); - return; - } - - if (!tcp_flush(exec_ctx, tcp, &error)) { - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "write: delayed"); - } - notify_on_write(exec_ctx, tcp); - } else { - cb = tcp->write_cb; - tcp->write_cb = NULL; - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - const char *str = grpc_error_string(error); - gpr_log(GPR_DEBUG, "write: %s", str); - } - - GRPC_CLOSURE_RUN(exec_ctx, cb, error); - TCP_UNREF(exec_ctx, tcp, "write"); - } -} - -static void tcp_write(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, - grpc_slice_buffer *buf, grpc_closure *cb) { - grpc_tcp *tcp = (grpc_tcp *)ep; - grpc_error *error = GRPC_ERROR_NONE; - - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - size_t i; - - for (i = 0; i < buf->count; i++) { - char *data = - grpc_dump_slice(buf->slices[i], GPR_DUMP_HEX | GPR_DUMP_ASCII); - gpr_log(GPR_DEBUG, "WRITE %p (peer=%s): %s", tcp, tcp->peer_string, data); - gpr_free(data); - } - } - - GPR_TIMER_BEGIN("tcp_write", 0); - GPR_ASSERT(tcp->write_cb == NULL); - - if (buf->length == 0) { - GPR_TIMER_END("tcp_write", 0); - GRPC_CLOSURE_SCHED( - exec_ctx, cb, - grpc_fd_is_shutdown(tcp->em_fd) - ? tcp_annotate_error(GRPC_ERROR_CREATE_FROM_STATIC_STRING("EOF"), - tcp) - : GRPC_ERROR_NONE); - return; - } - tcp->outgoing_buffer = buf; - tcp->outgoing_slice_idx = 0; - tcp->outgoing_byte_idx = 0; - - if (!tcp_flush(exec_ctx, tcp, &error)) { - TCP_REF(tcp, "write"); - tcp->write_cb = cb; - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "write: delayed"); - } - notify_on_write(exec_ctx, tcp); - } else { - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - const char *str = grpc_error_string(error); - gpr_log(GPR_DEBUG, "write: %s", str); - } - GRPC_CLOSURE_SCHED(exec_ctx, cb, error); - } - - GPR_TIMER_END("tcp_write", 0); -} - -static void tcp_add_to_pollset(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, - grpc_pollset *pollset) { - grpc_tcp *tcp = (grpc_tcp *)ep; - grpc_pollset_add_fd(exec_ctx, pollset, tcp->em_fd); -} - -static void tcp_add_to_pollset_set(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, - grpc_pollset_set *pollset_set) { - grpc_tcp *tcp = (grpc_tcp *)ep; - grpc_pollset_set_add_fd(exec_ctx, pollset_set, tcp->em_fd); -} - -static char *tcp_get_peer(grpc_endpoint *ep) { - grpc_tcp *tcp = (grpc_tcp *)ep; - return gpr_strdup(tcp->peer_string); -} - -static int tcp_get_fd(grpc_endpoint *ep) { - grpc_tcp *tcp = (grpc_tcp *)ep; - return tcp->fd; -} - -static grpc_resource_user *tcp_get_resource_user(grpc_endpoint *ep) { - grpc_tcp *tcp = (grpc_tcp *)ep; - return tcp->resource_user; -} - -static const grpc_endpoint_vtable vtable = { - tcp_read, tcp_write, tcp_add_to_pollset, tcp_add_to_pollset_set, - tcp_shutdown, tcp_destroy, tcp_get_resource_user, tcp_get_peer, - tcp_get_fd}; - -#define MAX_CHUNK_SIZE 32 * 1024 * 1024 - -grpc_endpoint *grpc_tcp_create(grpc_exec_ctx *exec_ctx, grpc_fd *em_fd, - const grpc_channel_args *channel_args, - const char *peer_string) { - int tcp_read_chunk_size = GRPC_TCP_DEFAULT_READ_SLICE_SIZE; - int tcp_max_read_chunk_size = 4 * 1024 * 1024; - int tcp_min_read_chunk_size = 256; - grpc_resource_quota *resource_quota = grpc_resource_quota_create(NULL); - if (channel_args != NULL) { - for (size_t i = 0; i < channel_args->num_args; i++) { - if (0 == - strcmp(channel_args->args[i].key, GRPC_ARG_TCP_READ_CHUNK_SIZE)) { - grpc_integer_options options = {(int)tcp_read_chunk_size, 1, - MAX_CHUNK_SIZE}; - tcp_read_chunk_size = - grpc_channel_arg_get_integer(&channel_args->args[i], options); - } else if (0 == strcmp(channel_args->args[i].key, - GRPC_ARG_TCP_MIN_READ_CHUNK_SIZE)) { - grpc_integer_options options = {(int)tcp_read_chunk_size, 1, - MAX_CHUNK_SIZE}; - tcp_min_read_chunk_size = - grpc_channel_arg_get_integer(&channel_args->args[i], options); - } else if (0 == strcmp(channel_args->args[i].key, - GRPC_ARG_TCP_MAX_READ_CHUNK_SIZE)) { - grpc_integer_options options = {(int)tcp_read_chunk_size, 1, - MAX_CHUNK_SIZE}; - tcp_max_read_chunk_size = - grpc_channel_arg_get_integer(&channel_args->args[i], options); - } else if (0 == - strcmp(channel_args->args[i].key, GRPC_ARG_RESOURCE_QUOTA)) { - grpc_resource_quota_unref_internal(exec_ctx, resource_quota); - resource_quota = grpc_resource_quota_ref_internal( - (grpc_resource_quota *)channel_args->args[i].value.pointer.p); - } - } - } - - if (tcp_min_read_chunk_size > tcp_max_read_chunk_size) { - tcp_min_read_chunk_size = tcp_max_read_chunk_size; - } - tcp_read_chunk_size = GPR_CLAMP(tcp_read_chunk_size, tcp_min_read_chunk_size, - tcp_max_read_chunk_size); - - grpc_tcp *tcp = (grpc_tcp *)gpr_malloc(sizeof(grpc_tcp)); - tcp->base.vtable = &vtable; - tcp->peer_string = gpr_strdup(peer_string); - tcp->fd = grpc_fd_wrapped_fd(em_fd); - tcp->read_cb = NULL; - tcp->write_cb = NULL; - tcp->release_fd_cb = NULL; - tcp->release_fd = NULL; - tcp->incoming_buffer = NULL; - tcp->target_length = (double)tcp_read_chunk_size; - tcp->min_read_chunk_size = tcp_min_read_chunk_size; - tcp->max_read_chunk_size = tcp_max_read_chunk_size; - tcp->bytes_read_this_round = 0; - tcp->finished_edge = true; - /* paired with unref in grpc_tcp_destroy */ - gpr_ref_init(&tcp->refcount, 1); - gpr_atm_no_barrier_store(&tcp->shutdown_count, 0); - tcp->em_fd = em_fd; - grpc_slice_buffer_init(&tcp->last_read_buffer); - tcp->resource_user = grpc_resource_user_create(resource_quota, peer_string); - grpc_resource_user_slice_allocator_init( - &tcp->slice_allocator, tcp->resource_user, tcp_read_allocation_done, tcp); - /* Tell network status tracker about new endpoint */ - grpc_network_status_register_endpoint(&tcp->base); - grpc_resource_quota_unref_internal(exec_ctx, resource_quota); - - return &tcp->base; -} - -int grpc_tcp_fd(grpc_endpoint *ep) { - grpc_tcp *tcp = (grpc_tcp *)ep; - GPR_ASSERT(ep->vtable == &vtable); - return grpc_fd_wrapped_fd(tcp->em_fd); -} - -void grpc_tcp_destroy_and_release_fd(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, - int *fd, grpc_closure *done) { - grpc_network_status_unregister_endpoint(ep); - grpc_tcp *tcp = (grpc_tcp *)ep; - GPR_ASSERT(ep->vtable == &vtable); - tcp->release_fd = fd; - tcp->release_fd_cb = done; - grpc_slice_buffer_reset_and_unref_internal(exec_ctx, &tcp->last_read_buffer); - TCP_UNREF(exec_ctx, tcp, "destroy"); -} - -#endif diff --git a/src/core/lib/iomgr/tcp_posix.cc b/src/core/lib/iomgr/tcp_posix.cc new file mode 100644 index 0000000000..7e271294fd --- /dev/null +++ b/src/core/lib/iomgr/tcp_posix.cc @@ -0,0 +1,819 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_POSIX_SOCKET + +#include "src/core/lib/iomgr/network_status_tracker.h" +#include "src/core/lib/iomgr/tcp_posix.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "src/core/lib/channel/channel_args.h" +#include "src/core/lib/debug/stats.h" +#include "src/core/lib/debug/trace.h" +#include "src/core/lib/iomgr/ev_posix.h" +#include "src/core/lib/iomgr/executor.h" +#include "src/core/lib/profiling/timers.h" +#include "src/core/lib/slice/slice_internal.h" +#include "src/core/lib/slice/slice_string_helpers.h" +#include "src/core/lib/support/string.h" + +#ifdef GRPC_HAVE_MSG_NOSIGNAL +#define SENDMSG_FLAGS MSG_NOSIGNAL +#else +#define SENDMSG_FLAGS 0 +#endif + +#ifdef GRPC_MSG_IOVLEN_TYPE +typedef GRPC_MSG_IOVLEN_TYPE msg_iovlen_type; +#else +typedef size_t msg_iovlen_type; +#endif + +grpc_tracer_flag grpc_tcp_trace = GRPC_TRACER_INITIALIZER(false, "tcp"); + +typedef struct { + grpc_endpoint base; + grpc_fd *em_fd; + int fd; + bool finished_edge; + double target_length; + double bytes_read_this_round; + gpr_refcount refcount; + gpr_atm shutdown_count; + + int min_read_chunk_size; + int max_read_chunk_size; + + /* garbage after the last read */ + grpc_slice_buffer last_read_buffer; + + grpc_slice_buffer *incoming_buffer; + grpc_slice_buffer *outgoing_buffer; + /** slice within outgoing_buffer to write next */ + size_t outgoing_slice_idx; + /** byte within outgoing_buffer->slices[outgoing_slice_idx] to write next */ + size_t outgoing_byte_idx; + + grpc_closure *read_cb; + grpc_closure *write_cb; + grpc_closure *release_fd_cb; + int *release_fd; + + grpc_closure read_done_closure; + grpc_closure write_done_closure; + + char *peer_string; + + grpc_resource_user *resource_user; + grpc_resource_user_slice_allocator slice_allocator; +} grpc_tcp; + +typedef struct backup_poller { + gpr_mu *pollset_mu; + grpc_closure run_poller; +} backup_poller; + +#define BACKUP_POLLER_POLLSET(b) ((grpc_pollset *)((b) + 1)) + +static gpr_atm g_uncovered_notifications_pending; +static gpr_atm g_backup_poller; /* backup_poller* */ + +static void tcp_handle_read(grpc_exec_ctx *exec_ctx, void *arg /* grpc_tcp */, + grpc_error *error); +static void tcp_handle_write(grpc_exec_ctx *exec_ctx, void *arg /* grpc_tcp */, + grpc_error *error); +static void tcp_drop_uncovered_then_handle_write(grpc_exec_ctx *exec_ctx, + void *arg /* grpc_tcp */, + grpc_error *error); + +static void done_poller(grpc_exec_ctx *exec_ctx, void *bp, + grpc_error *error_ignored) { + backup_poller *p = (backup_poller *)bp; + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "BACKUP_POLLER:%p destroy", p); + } + grpc_pollset_destroy(exec_ctx, BACKUP_POLLER_POLLSET(p)); + gpr_free(p); +} + +static void run_poller(grpc_exec_ctx *exec_ctx, void *bp, + grpc_error *error_ignored) { + backup_poller *p = (backup_poller *)bp; + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "BACKUP_POLLER:%p run", p); + } + gpr_mu_lock(p->pollset_mu); + gpr_timespec now = gpr_now(GPR_CLOCK_MONOTONIC); + gpr_timespec deadline = + gpr_time_add(now, gpr_time_from_seconds(10, GPR_TIMESPAN)); + GRPC_STATS_INC_TCP_BACKUP_POLLER_POLLS(exec_ctx); + GRPC_LOG_IF_ERROR("backup_poller:pollset_work", + grpc_pollset_work(exec_ctx, BACKUP_POLLER_POLLSET(p), NULL, + now, deadline)); + gpr_mu_unlock(p->pollset_mu); + /* last "uncovered" notification is the ref that keeps us polling, if we get + * there try a cas to release it */ + if (gpr_atm_no_barrier_load(&g_uncovered_notifications_pending) == 1 && + gpr_atm_full_cas(&g_uncovered_notifications_pending, 1, 0)) { + gpr_mu_lock(p->pollset_mu); + bool cas_ok = gpr_atm_full_cas(&g_backup_poller, (gpr_atm)p, 0); + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "BACKUP_POLLER:%p done cas_ok=%d", p, cas_ok); + } + gpr_mu_unlock(p->pollset_mu); + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "BACKUP_POLLER:%p shutdown", p); + } + grpc_pollset_shutdown(exec_ctx, BACKUP_POLLER_POLLSET(p), + GRPC_CLOSURE_INIT(&p->run_poller, done_poller, p, + grpc_schedule_on_exec_ctx)); + } else { + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "BACKUP_POLLER:%p reschedule", p); + } + GRPC_CLOSURE_SCHED(exec_ctx, &p->run_poller, GRPC_ERROR_NONE); + } +} + +static void drop_uncovered(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) { + backup_poller *p = (backup_poller *)gpr_atm_acq_load(&g_backup_poller); + gpr_atm old_count = + gpr_atm_no_barrier_fetch_add(&g_uncovered_notifications_pending, -1); + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "BACKUP_POLLER:%p uncover cnt %d->%d", p, (int)old_count, + (int)old_count - 1); + } + GPR_ASSERT(old_count != 1); +} + +static void cover_self(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) { + backup_poller *p; + gpr_atm old_count = + gpr_atm_no_barrier_fetch_add(&g_uncovered_notifications_pending, 2); + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "BACKUP_POLLER: cover cnt %d->%d", (int)old_count, + 2 + (int)old_count); + } + if (old_count == 0) { + GRPC_STATS_INC_TCP_BACKUP_POLLERS_CREATED(exec_ctx); + p = (backup_poller *)gpr_malloc(sizeof(*p) + grpc_pollset_size()); + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "BACKUP_POLLER:%p create", p); + } + grpc_pollset_init(BACKUP_POLLER_POLLSET(p), &p->pollset_mu); + gpr_atm_rel_store(&g_backup_poller, (gpr_atm)p); + GRPC_CLOSURE_SCHED( + exec_ctx, + GRPC_CLOSURE_INIT(&p->run_poller, run_poller, p, + grpc_executor_scheduler(GRPC_EXECUTOR_LONG)), + GRPC_ERROR_NONE); + } else { + while ((p = (backup_poller *)gpr_atm_acq_load(&g_backup_poller)) == NULL) { + // spin waiting for backup poller + } + } + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "BACKUP_POLLER:%p add %p", p, tcp); + } + grpc_pollset_add_fd(exec_ctx, BACKUP_POLLER_POLLSET(p), tcp->em_fd); + if (old_count != 0) { + drop_uncovered(exec_ctx, tcp); + } +} + +static void notify_on_read(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) { + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "TCP:%p notify_on_read", tcp); + } + GRPC_CLOSURE_INIT(&tcp->read_done_closure, tcp_handle_read, tcp, + grpc_schedule_on_exec_ctx); + grpc_fd_notify_on_read(exec_ctx, tcp->em_fd, &tcp->read_done_closure); +} + +static void notify_on_write(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) { + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "TCP:%p notify_on_write", tcp); + } + cover_self(exec_ctx, tcp); + GRPC_CLOSURE_INIT(&tcp->write_done_closure, + tcp_drop_uncovered_then_handle_write, tcp, + grpc_schedule_on_exec_ctx); + grpc_fd_notify_on_write(exec_ctx, tcp->em_fd, &tcp->write_done_closure); +} + +static void tcp_drop_uncovered_then_handle_write(grpc_exec_ctx *exec_ctx, + void *arg, grpc_error *error) { + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "TCP:%p got_write: %s", arg, grpc_error_string(error)); + } + drop_uncovered(exec_ctx, (grpc_tcp *)arg); + tcp_handle_write(exec_ctx, arg, error); +} + +static void add_to_estimate(grpc_tcp *tcp, size_t bytes) { + tcp->bytes_read_this_round += (double)bytes; +} + +static void finish_estimate(grpc_tcp *tcp) { + /* If we read >80% of the target buffer in one read loop, increase the size + of the target buffer to either the amount read, or twice its previous + value */ + if (tcp->bytes_read_this_round > tcp->target_length * 0.8) { + tcp->target_length = + GPR_MAX(2 * tcp->target_length, tcp->bytes_read_this_round); + } else { + tcp->target_length = + 0.99 * tcp->target_length + 0.01 * tcp->bytes_read_this_round; + } + tcp->bytes_read_this_round = 0; +} + +static size_t get_target_read_size(grpc_tcp *tcp) { + grpc_resource_quota *rq = grpc_resource_user_quota(tcp->resource_user); + double pressure = grpc_resource_quota_get_memory_pressure(rq); + double target = + tcp->target_length * (pressure > 0.8 ? (1.0 - pressure) / 0.2 : 1.0); + size_t sz = (((size_t)GPR_CLAMP(target, tcp->min_read_chunk_size, + tcp->max_read_chunk_size)) + + 255) & + ~(size_t)255; + /* don't use more than 1/16th of the overall resource quota for a single read + * alloc */ + size_t rqmax = grpc_resource_quota_peek_size(rq); + if (sz > rqmax / 16 && rqmax > 1024) { + sz = rqmax / 16; + } + return sz; +} + +static grpc_error *tcp_annotate_error(grpc_error *src_error, grpc_tcp *tcp) { + return grpc_error_set_str( + grpc_error_set_int(src_error, GRPC_ERROR_INT_FD, tcp->fd), + GRPC_ERROR_STR_TARGET_ADDRESS, + grpc_slice_from_copied_string(tcp->peer_string)); +} + +static void tcp_handle_read(grpc_exec_ctx *exec_ctx, void *arg /* grpc_tcp */, + grpc_error *error); +static void tcp_handle_write(grpc_exec_ctx *exec_ctx, void *arg /* grpc_tcp */, + grpc_error *error); + +static void tcp_shutdown(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, + grpc_error *why) { + grpc_tcp *tcp = (grpc_tcp *)ep; + grpc_fd_shutdown(exec_ctx, tcp->em_fd, why); + grpc_resource_user_shutdown(exec_ctx, tcp->resource_user); +} + +static void tcp_free(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) { + grpc_fd_orphan(exec_ctx, tcp->em_fd, tcp->release_fd_cb, tcp->release_fd, + false /* already_closed */, "tcp_unref_orphan"); + grpc_slice_buffer_destroy_internal(exec_ctx, &tcp->last_read_buffer); + grpc_resource_user_unref(exec_ctx, tcp->resource_user); + gpr_free(tcp->peer_string); + gpr_free(tcp); +} + +#ifndef NDEBUG +#define TCP_UNREF(cl, tcp, reason) \ + tcp_unref((cl), (tcp), (reason), __FILE__, __LINE__) +#define TCP_REF(tcp, reason) tcp_ref((tcp), (reason), __FILE__, __LINE__) +static void tcp_unref(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp, + const char *reason, const char *file, int line) { + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_atm val = gpr_atm_no_barrier_load(&tcp->refcount.count); + gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG, + "TCP unref %p : %s %" PRIdPTR " -> %" PRIdPTR, tcp, reason, val, + val - 1); + } + if (gpr_unref(&tcp->refcount)) { + tcp_free(exec_ctx, tcp); + } +} + +static void tcp_ref(grpc_tcp *tcp, const char *reason, const char *file, + int line) { + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_atm val = gpr_atm_no_barrier_load(&tcp->refcount.count); + gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG, + "TCP ref %p : %s %" PRIdPTR " -> %" PRIdPTR, tcp, reason, val, + val + 1); + } + gpr_ref(&tcp->refcount); +} +#else +#define TCP_UNREF(cl, tcp, reason) tcp_unref((cl), (tcp)) +#define TCP_REF(tcp, reason) tcp_ref((tcp)) +static void tcp_unref(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) { + if (gpr_unref(&tcp->refcount)) { + tcp_free(exec_ctx, tcp); + } +} + +static void tcp_ref(grpc_tcp *tcp) { gpr_ref(&tcp->refcount); } +#endif + +static void tcp_destroy(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep) { + grpc_network_status_unregister_endpoint(ep); + grpc_tcp *tcp = (grpc_tcp *)ep; + grpc_slice_buffer_reset_and_unref_internal(exec_ctx, &tcp->last_read_buffer); + TCP_UNREF(exec_ctx, tcp, "destroy"); +} + +static void call_read_cb(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp, + grpc_error *error) { + grpc_closure *cb = tcp->read_cb; + + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "TCP:%p call_cb %p %p:%p", tcp, cb, cb->cb, cb->cb_arg); + size_t i; + const char *str = grpc_error_string(error); + gpr_log(GPR_DEBUG, "read: error=%s", str); + + for (i = 0; i < tcp->incoming_buffer->count; i++) { + char *dump = grpc_dump_slice(tcp->incoming_buffer->slices[i], + GPR_DUMP_HEX | GPR_DUMP_ASCII); + gpr_log(GPR_DEBUG, "READ %p (peer=%s): %s", tcp, tcp->peer_string, dump); + gpr_free(dump); + } + } + + tcp->read_cb = NULL; + tcp->incoming_buffer = NULL; + GRPC_CLOSURE_RUN(exec_ctx, cb, error); +} + +#define MAX_READ_IOVEC 4 +static void tcp_do_read(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) { + struct msghdr msg; + struct iovec iov[MAX_READ_IOVEC]; + ssize_t read_bytes; + size_t i; + + GPR_ASSERT(!tcp->finished_edge); + GPR_ASSERT(tcp->incoming_buffer->count <= MAX_READ_IOVEC); + GPR_TIMER_BEGIN("tcp_continue_read", 0); + + for (i = 0; i < tcp->incoming_buffer->count; i++) { + iov[i].iov_base = GRPC_SLICE_START_PTR(tcp->incoming_buffer->slices[i]); + iov[i].iov_len = GRPC_SLICE_LENGTH(tcp->incoming_buffer->slices[i]); + } + + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = iov; + msg.msg_iovlen = (msg_iovlen_type)tcp->incoming_buffer->count; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + GRPC_STATS_INC_TCP_READ_OFFER(exec_ctx, tcp->incoming_buffer->length); + GRPC_STATS_INC_TCP_READ_OFFER_IOV_SIZE(exec_ctx, tcp->incoming_buffer->count); + + GPR_TIMER_BEGIN("recvmsg", 0); + do { + GRPC_STATS_INC_SYSCALL_READ(exec_ctx); + read_bytes = recvmsg(tcp->fd, &msg, 0); + } while (read_bytes < 0 && errno == EINTR); + GPR_TIMER_END("recvmsg", read_bytes >= 0); + + if (read_bytes < 0) { + /* NB: After calling call_read_cb a parallel call of the read handler may + * be running. */ + if (errno == EAGAIN) { + finish_estimate(tcp); + /* We've consumed the edge, request a new one */ + notify_on_read(exec_ctx, tcp); + } else { + grpc_slice_buffer_reset_and_unref_internal(exec_ctx, + tcp->incoming_buffer); + call_read_cb(exec_ctx, tcp, + tcp_annotate_error(GRPC_OS_ERROR(errno, "recvmsg"), tcp)); + TCP_UNREF(exec_ctx, tcp, "read"); + } + } else if (read_bytes == 0) { + /* 0 read size ==> end of stream */ + grpc_slice_buffer_reset_and_unref_internal(exec_ctx, tcp->incoming_buffer); + call_read_cb( + exec_ctx, tcp, + tcp_annotate_error( + GRPC_ERROR_CREATE_FROM_STATIC_STRING("Socket closed"), tcp)); + TCP_UNREF(exec_ctx, tcp, "read"); + } else { + GRPC_STATS_INC_TCP_READ_SIZE(exec_ctx, read_bytes); + add_to_estimate(tcp, (size_t)read_bytes); + GPR_ASSERT((size_t)read_bytes <= tcp->incoming_buffer->length); + if ((size_t)read_bytes < tcp->incoming_buffer->length) { + grpc_slice_buffer_trim_end( + tcp->incoming_buffer, + tcp->incoming_buffer->length - (size_t)read_bytes, + &tcp->last_read_buffer); + } + GPR_ASSERT((size_t)read_bytes == tcp->incoming_buffer->length); + call_read_cb(exec_ctx, tcp, GRPC_ERROR_NONE); + TCP_UNREF(exec_ctx, tcp, "read"); + } + + GPR_TIMER_END("tcp_continue_read", 0); +} + +static void tcp_read_allocation_done(grpc_exec_ctx *exec_ctx, void *tcpp, + grpc_error *error) { + grpc_tcp *tcp = (grpc_tcp *)tcpp; + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "TCP:%p read_allocation_done: %s", tcp, + grpc_error_string(error)); + } + if (error != GRPC_ERROR_NONE) { + grpc_slice_buffer_reset_and_unref_internal(exec_ctx, tcp->incoming_buffer); + grpc_slice_buffer_reset_and_unref_internal(exec_ctx, + &tcp->last_read_buffer); + call_read_cb(exec_ctx, tcp, GRPC_ERROR_REF(error)); + TCP_UNREF(exec_ctx, tcp, "read"); + } else { + tcp_do_read(exec_ctx, tcp); + } +} + +static void tcp_continue_read(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) { + size_t target_read_size = get_target_read_size(tcp); + if (tcp->incoming_buffer->length < target_read_size && + tcp->incoming_buffer->count < MAX_READ_IOVEC) { + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "TCP:%p alloc_slices", tcp); + } + grpc_resource_user_alloc_slices(exec_ctx, &tcp->slice_allocator, + target_read_size, 1, tcp->incoming_buffer); + } else { + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "TCP:%p do_read", tcp); + } + tcp_do_read(exec_ctx, tcp); + } +} + +static void tcp_handle_read(grpc_exec_ctx *exec_ctx, void *arg /* grpc_tcp */, + grpc_error *error) { + grpc_tcp *tcp = (grpc_tcp *)arg; + GPR_ASSERT(!tcp->finished_edge); + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "TCP:%p got_read: %s", tcp, grpc_error_string(error)); + } + + if (error != GRPC_ERROR_NONE) { + grpc_slice_buffer_reset_and_unref_internal(exec_ctx, tcp->incoming_buffer); + grpc_slice_buffer_reset_and_unref_internal(exec_ctx, + &tcp->last_read_buffer); + call_read_cb(exec_ctx, tcp, GRPC_ERROR_REF(error)); + TCP_UNREF(exec_ctx, tcp, "read"); + } else { + tcp_continue_read(exec_ctx, tcp); + } +} + +static void tcp_read(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, + grpc_slice_buffer *incoming_buffer, grpc_closure *cb) { + grpc_tcp *tcp = (grpc_tcp *)ep; + GPR_ASSERT(tcp->read_cb == NULL); + tcp->read_cb = cb; + tcp->incoming_buffer = incoming_buffer; + grpc_slice_buffer_reset_and_unref_internal(exec_ctx, incoming_buffer); + grpc_slice_buffer_swap(incoming_buffer, &tcp->last_read_buffer); + TCP_REF(tcp, "read"); + if (tcp->finished_edge) { + tcp->finished_edge = false; + notify_on_read(exec_ctx, tcp); + } else { + GRPC_CLOSURE_SCHED(exec_ctx, &tcp->read_done_closure, GRPC_ERROR_NONE); + } +} + +/* returns true if done, false if pending; if returning true, *error is set */ +#define MAX_WRITE_IOVEC 1000 +static bool tcp_flush(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp, + grpc_error **error) { + struct msghdr msg; + struct iovec iov[MAX_WRITE_IOVEC]; + msg_iovlen_type iov_size; + ssize_t sent_length; + size_t sending_length; + size_t trailing; + size_t unwind_slice_idx; + size_t unwind_byte_idx; + + for (;;) { + sending_length = 0; + unwind_slice_idx = tcp->outgoing_slice_idx; + unwind_byte_idx = tcp->outgoing_byte_idx; + for (iov_size = 0; tcp->outgoing_slice_idx != tcp->outgoing_buffer->count && + iov_size != MAX_WRITE_IOVEC; + iov_size++) { + iov[iov_size].iov_base = + GRPC_SLICE_START_PTR( + tcp->outgoing_buffer->slices[tcp->outgoing_slice_idx]) + + tcp->outgoing_byte_idx; + iov[iov_size].iov_len = + GRPC_SLICE_LENGTH( + tcp->outgoing_buffer->slices[tcp->outgoing_slice_idx]) - + tcp->outgoing_byte_idx; + sending_length += iov[iov_size].iov_len; + tcp->outgoing_slice_idx++; + tcp->outgoing_byte_idx = 0; + } + GPR_ASSERT(iov_size > 0); + + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = iov; + msg.msg_iovlen = iov_size; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + GRPC_STATS_INC_TCP_WRITE_SIZE(exec_ctx, sending_length); + GRPC_STATS_INC_TCP_WRITE_IOV_SIZE(exec_ctx, iov_size); + + GPR_TIMER_BEGIN("sendmsg", 1); + do { + /* TODO(klempner): Cork if this is a partial write */ + GRPC_STATS_INC_SYSCALL_WRITE(exec_ctx); + sent_length = sendmsg(tcp->fd, &msg, SENDMSG_FLAGS); + } while (sent_length < 0 && errno == EINTR); + GPR_TIMER_END("sendmsg", 0); + + if (sent_length < 0) { + if (errno == EAGAIN) { + tcp->outgoing_slice_idx = unwind_slice_idx; + tcp->outgoing_byte_idx = unwind_byte_idx; + return false; + } else if (errno == EPIPE) { + *error = grpc_error_set_int(GRPC_OS_ERROR(errno, "sendmsg"), + GRPC_ERROR_INT_GRPC_STATUS, + GRPC_STATUS_UNAVAILABLE); + return true; + } else { + *error = tcp_annotate_error(GRPC_OS_ERROR(errno, "sendmsg"), tcp); + return true; + } + } + + GPR_ASSERT(tcp->outgoing_byte_idx == 0); + trailing = sending_length - (size_t)sent_length; + while (trailing > 0) { + size_t slice_length; + + tcp->outgoing_slice_idx--; + slice_length = GRPC_SLICE_LENGTH( + tcp->outgoing_buffer->slices[tcp->outgoing_slice_idx]); + if (slice_length > trailing) { + tcp->outgoing_byte_idx = slice_length - trailing; + break; + } else { + trailing -= slice_length; + } + } + + if (tcp->outgoing_slice_idx == tcp->outgoing_buffer->count) { + *error = GRPC_ERROR_NONE; + return true; + } + }; +} + +static void tcp_handle_write(grpc_exec_ctx *exec_ctx, void *arg /* grpc_tcp */, + grpc_error *error) { + grpc_tcp *tcp = (grpc_tcp *)arg; + grpc_closure *cb; + + if (error != GRPC_ERROR_NONE) { + cb = tcp->write_cb; + tcp->write_cb = NULL; + cb->cb(exec_ctx, cb->cb_arg, error); + TCP_UNREF(exec_ctx, tcp, "write"); + return; + } + + if (!tcp_flush(exec_ctx, tcp, &error)) { + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "write: delayed"); + } + notify_on_write(exec_ctx, tcp); + } else { + cb = tcp->write_cb; + tcp->write_cb = NULL; + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + const char *str = grpc_error_string(error); + gpr_log(GPR_DEBUG, "write: %s", str); + } + + GRPC_CLOSURE_RUN(exec_ctx, cb, error); + TCP_UNREF(exec_ctx, tcp, "write"); + } +} + +static void tcp_write(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, + grpc_slice_buffer *buf, grpc_closure *cb) { + grpc_tcp *tcp = (grpc_tcp *)ep; + grpc_error *error = GRPC_ERROR_NONE; + + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + size_t i; + + for (i = 0; i < buf->count; i++) { + char *data = + grpc_dump_slice(buf->slices[i], GPR_DUMP_HEX | GPR_DUMP_ASCII); + gpr_log(GPR_DEBUG, "WRITE %p (peer=%s): %s", tcp, tcp->peer_string, data); + gpr_free(data); + } + } + + GPR_TIMER_BEGIN("tcp_write", 0); + GPR_ASSERT(tcp->write_cb == NULL); + + if (buf->length == 0) { + GPR_TIMER_END("tcp_write", 0); + GRPC_CLOSURE_SCHED( + exec_ctx, cb, + grpc_fd_is_shutdown(tcp->em_fd) + ? tcp_annotate_error(GRPC_ERROR_CREATE_FROM_STATIC_STRING("EOF"), + tcp) + : GRPC_ERROR_NONE); + return; + } + tcp->outgoing_buffer = buf; + tcp->outgoing_slice_idx = 0; + tcp->outgoing_byte_idx = 0; + + if (!tcp_flush(exec_ctx, tcp, &error)) { + TCP_REF(tcp, "write"); + tcp->write_cb = cb; + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "write: delayed"); + } + notify_on_write(exec_ctx, tcp); + } else { + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + const char *str = grpc_error_string(error); + gpr_log(GPR_DEBUG, "write: %s", str); + } + GRPC_CLOSURE_SCHED(exec_ctx, cb, error); + } + + GPR_TIMER_END("tcp_write", 0); +} + +static void tcp_add_to_pollset(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, + grpc_pollset *pollset) { + grpc_tcp *tcp = (grpc_tcp *)ep; + grpc_pollset_add_fd(exec_ctx, pollset, tcp->em_fd); +} + +static void tcp_add_to_pollset_set(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, + grpc_pollset_set *pollset_set) { + grpc_tcp *tcp = (grpc_tcp *)ep; + grpc_pollset_set_add_fd(exec_ctx, pollset_set, tcp->em_fd); +} + +static char *tcp_get_peer(grpc_endpoint *ep) { + grpc_tcp *tcp = (grpc_tcp *)ep; + return gpr_strdup(tcp->peer_string); +} + +static int tcp_get_fd(grpc_endpoint *ep) { + grpc_tcp *tcp = (grpc_tcp *)ep; + return tcp->fd; +} + +static grpc_resource_user *tcp_get_resource_user(grpc_endpoint *ep) { + grpc_tcp *tcp = (grpc_tcp *)ep; + return tcp->resource_user; +} + +static const grpc_endpoint_vtable vtable = { + tcp_read, tcp_write, tcp_add_to_pollset, tcp_add_to_pollset_set, + tcp_shutdown, tcp_destroy, tcp_get_resource_user, tcp_get_peer, + tcp_get_fd}; + +#define MAX_CHUNK_SIZE 32 * 1024 * 1024 + +grpc_endpoint *grpc_tcp_create(grpc_exec_ctx *exec_ctx, grpc_fd *em_fd, + const grpc_channel_args *channel_args, + const char *peer_string) { + int tcp_read_chunk_size = GRPC_TCP_DEFAULT_READ_SLICE_SIZE; + int tcp_max_read_chunk_size = 4 * 1024 * 1024; + int tcp_min_read_chunk_size = 256; + grpc_resource_quota *resource_quota = grpc_resource_quota_create(NULL); + if (channel_args != NULL) { + for (size_t i = 0; i < channel_args->num_args; i++) { + if (0 == + strcmp(channel_args->args[i].key, GRPC_ARG_TCP_READ_CHUNK_SIZE)) { + grpc_integer_options options = {(int)tcp_read_chunk_size, 1, + MAX_CHUNK_SIZE}; + tcp_read_chunk_size = + grpc_channel_arg_get_integer(&channel_args->args[i], options); + } else if (0 == strcmp(channel_args->args[i].key, + GRPC_ARG_TCP_MIN_READ_CHUNK_SIZE)) { + grpc_integer_options options = {(int)tcp_read_chunk_size, 1, + MAX_CHUNK_SIZE}; + tcp_min_read_chunk_size = + grpc_channel_arg_get_integer(&channel_args->args[i], options); + } else if (0 == strcmp(channel_args->args[i].key, + GRPC_ARG_TCP_MAX_READ_CHUNK_SIZE)) { + grpc_integer_options options = {(int)tcp_read_chunk_size, 1, + MAX_CHUNK_SIZE}; + tcp_max_read_chunk_size = + grpc_channel_arg_get_integer(&channel_args->args[i], options); + } else if (0 == + strcmp(channel_args->args[i].key, GRPC_ARG_RESOURCE_QUOTA)) { + grpc_resource_quota_unref_internal(exec_ctx, resource_quota); + resource_quota = grpc_resource_quota_ref_internal( + (grpc_resource_quota *)channel_args->args[i].value.pointer.p); + } + } + } + + if (tcp_min_read_chunk_size > tcp_max_read_chunk_size) { + tcp_min_read_chunk_size = tcp_max_read_chunk_size; + } + tcp_read_chunk_size = GPR_CLAMP(tcp_read_chunk_size, tcp_min_read_chunk_size, + tcp_max_read_chunk_size); + + grpc_tcp *tcp = (grpc_tcp *)gpr_malloc(sizeof(grpc_tcp)); + tcp->base.vtable = &vtable; + tcp->peer_string = gpr_strdup(peer_string); + tcp->fd = grpc_fd_wrapped_fd(em_fd); + tcp->read_cb = NULL; + tcp->write_cb = NULL; + tcp->release_fd_cb = NULL; + tcp->release_fd = NULL; + tcp->incoming_buffer = NULL; + tcp->target_length = (double)tcp_read_chunk_size; + tcp->min_read_chunk_size = tcp_min_read_chunk_size; + tcp->max_read_chunk_size = tcp_max_read_chunk_size; + tcp->bytes_read_this_round = 0; + tcp->finished_edge = true; + /* paired with unref in grpc_tcp_destroy */ + gpr_ref_init(&tcp->refcount, 1); + gpr_atm_no_barrier_store(&tcp->shutdown_count, 0); + tcp->em_fd = em_fd; + grpc_slice_buffer_init(&tcp->last_read_buffer); + tcp->resource_user = grpc_resource_user_create(resource_quota, peer_string); + grpc_resource_user_slice_allocator_init( + &tcp->slice_allocator, tcp->resource_user, tcp_read_allocation_done, tcp); + /* Tell network status tracker about new endpoint */ + grpc_network_status_register_endpoint(&tcp->base); + grpc_resource_quota_unref_internal(exec_ctx, resource_quota); + + return &tcp->base; +} + +int grpc_tcp_fd(grpc_endpoint *ep) { + grpc_tcp *tcp = (grpc_tcp *)ep; + GPR_ASSERT(ep->vtable == &vtable); + return grpc_fd_wrapped_fd(tcp->em_fd); +} + +void grpc_tcp_destroy_and_release_fd(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, + int *fd, grpc_closure *done) { + grpc_network_status_unregister_endpoint(ep); + grpc_tcp *tcp = (grpc_tcp *)ep; + GPR_ASSERT(ep->vtable == &vtable); + tcp->release_fd = fd; + tcp->release_fd_cb = done; + grpc_slice_buffer_reset_and_unref_internal(exec_ctx, &tcp->last_read_buffer); + TCP_UNREF(exec_ctx, tcp, "destroy"); +} + +#endif diff --git a/src/core/lib/iomgr/tcp_server_posix.c b/src/core/lib/iomgr/tcp_server_posix.c deleted file mode 100644 index 06612d639c..0000000000 --- a/src/core/lib/iomgr/tcp_server_posix.c +++ /dev/null @@ -1,565 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -/* FIXME: "posix" files shouldn't be depending on _GNU_SOURCE */ -#ifndef _GNU_SOURCE -#define _GNU_SOURCE -#endif - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_POSIX_SOCKET - -#include "src/core/lib/iomgr/tcp_server.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "src/core/lib/channel/channel_args.h" -#include "src/core/lib/iomgr/resolve_address.h" -#include "src/core/lib/iomgr/sockaddr.h" -#include "src/core/lib/iomgr/sockaddr_utils.h" -#include "src/core/lib/iomgr/socket_utils_posix.h" -#include "src/core/lib/iomgr/tcp_posix.h" -#include "src/core/lib/iomgr/tcp_server_utils_posix.h" -#include "src/core/lib/iomgr/unix_sockets_posix.h" -#include "src/core/lib/support/string.h" - -static gpr_once check_init = GPR_ONCE_INIT; -static bool has_so_reuseport = false; - -static void init(void) { -#ifndef GPR_MANYLINUX1 - int s = socket(AF_INET, SOCK_STREAM, 0); - if (s >= 0) { - has_so_reuseport = GRPC_LOG_IF_ERROR("check for SO_REUSEPORT", - grpc_set_socket_reuse_port(s, 1)); - close(s); - } -#endif -} - -grpc_error *grpc_tcp_server_create(grpc_exec_ctx *exec_ctx, - grpc_closure *shutdown_complete, - const grpc_channel_args *args, - grpc_tcp_server **server) { - gpr_once_init(&check_init, init); - - grpc_tcp_server *s = (grpc_tcp_server *)gpr_zalloc(sizeof(grpc_tcp_server)); - s->so_reuseport = has_so_reuseport; - s->expand_wildcard_addrs = false; - for (size_t i = 0; i < (args == NULL ? 0 : args->num_args); i++) { - if (0 == strcmp(GRPC_ARG_ALLOW_REUSEPORT, args->args[i].key)) { - if (args->args[i].type == GRPC_ARG_INTEGER) { - s->so_reuseport = - has_so_reuseport && (args->args[i].value.integer != 0); - } else { - gpr_free(s); - return GRPC_ERROR_CREATE_FROM_STATIC_STRING(GRPC_ARG_ALLOW_REUSEPORT - " must be an integer"); - } - } else if (0 == strcmp(GRPC_ARG_EXPAND_WILDCARD_ADDRS, args->args[i].key)) { - if (args->args[i].type == GRPC_ARG_INTEGER) { - s->expand_wildcard_addrs = (args->args[i].value.integer != 0); - } else { - gpr_free(s); - return GRPC_ERROR_CREATE_FROM_STATIC_STRING( - GRPC_ARG_EXPAND_WILDCARD_ADDRS " must be an integer"); - } - } - } - gpr_ref_init(&s->refs, 1); - gpr_mu_init(&s->mu); - s->active_ports = 0; - s->destroyed_ports = 0; - s->shutdown = false; - s->shutdown_starting.head = NULL; - s->shutdown_starting.tail = NULL; - s->shutdown_complete = shutdown_complete; - s->on_accept_cb = NULL; - s->on_accept_cb_arg = NULL; - s->head = NULL; - s->tail = NULL; - s->nports = 0; - s->channel_args = grpc_channel_args_copy(args); - gpr_atm_no_barrier_store(&s->next_pollset_to_assign, 0); - *server = s; - return GRPC_ERROR_NONE; -} - -static void finish_shutdown(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) { - gpr_mu_lock(&s->mu); - GPR_ASSERT(s->shutdown); - gpr_mu_unlock(&s->mu); - if (s->shutdown_complete != NULL) { - GRPC_CLOSURE_SCHED(exec_ctx, s->shutdown_complete, GRPC_ERROR_NONE); - } - - gpr_mu_destroy(&s->mu); - - while (s->head) { - grpc_tcp_listener *sp = s->head; - s->head = sp->next; - gpr_free(sp); - } - grpc_channel_args_destroy(exec_ctx, s->channel_args); - - gpr_free(s); -} - -static void destroyed_port(grpc_exec_ctx *exec_ctx, void *server, - grpc_error *error) { - grpc_tcp_server *s = (grpc_tcp_server *)server; - gpr_mu_lock(&s->mu); - s->destroyed_ports++; - if (s->destroyed_ports == s->nports) { - gpr_mu_unlock(&s->mu); - finish_shutdown(exec_ctx, s); - } else { - GPR_ASSERT(s->destroyed_ports < s->nports); - gpr_mu_unlock(&s->mu); - } -} - -/* called when all listening endpoints have been shutdown, so no further - events will be received on them - at this point it's safe to destroy - things */ -static void deactivated_all_ports(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) { - /* delete ALL the things */ - gpr_mu_lock(&s->mu); - - GPR_ASSERT(s->shutdown); - - if (s->head) { - grpc_tcp_listener *sp; - for (sp = s->head; sp; sp = sp->next) { - grpc_unlink_if_unix_domain_socket(&sp->addr); - GRPC_CLOSURE_INIT(&sp->destroyed_closure, destroyed_port, s, - grpc_schedule_on_exec_ctx); - grpc_fd_orphan(exec_ctx, sp->emfd, &sp->destroyed_closure, NULL, - false /* already_closed */, "tcp_listener_shutdown"); - } - gpr_mu_unlock(&s->mu); - } else { - gpr_mu_unlock(&s->mu); - finish_shutdown(exec_ctx, s); - } -} - -static void tcp_server_destroy(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) { - gpr_mu_lock(&s->mu); - - GPR_ASSERT(!s->shutdown); - s->shutdown = true; - - /* shutdown all fd's */ - if (s->active_ports) { - grpc_tcp_listener *sp; - for (sp = s->head; sp; sp = sp->next) { - grpc_fd_shutdown(exec_ctx, sp->emfd, GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "Server destroyed")); - } - gpr_mu_unlock(&s->mu); - } else { - gpr_mu_unlock(&s->mu); - deactivated_all_ports(exec_ctx, s); - } -} - -/* event manager callback when reads are ready */ -static void on_read(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *err) { - grpc_tcp_listener *sp = (grpc_tcp_listener *)arg; - grpc_pollset *read_notifier_pollset; - if (err != GRPC_ERROR_NONE) { - goto error; - } - - read_notifier_pollset = - sp->server->pollsets[(size_t)gpr_atm_no_barrier_fetch_add( - &sp->server->next_pollset_to_assign, 1) % - sp->server->pollset_count]; - - /* loop until accept4 returns EAGAIN, and then re-arm notification */ - for (;;) { - grpc_resolved_address addr; - char *addr_str; - char *name; - addr.len = sizeof(struct sockaddr_storage); - /* Note: If we ever decide to return this address to the user, remember to - strip off the ::ffff:0.0.0.0/96 prefix first. */ - int fd = grpc_accept4(sp->fd, &addr, 1, 1); - if (fd < 0) { - switch (errno) { - case EINTR: - continue; - case EAGAIN: - grpc_fd_notify_on_read(exec_ctx, sp->emfd, &sp->read_closure); - return; - default: - gpr_mu_lock(&sp->server->mu); - if (!sp->server->shutdown_listeners) { - gpr_log(GPR_ERROR, "Failed accept4: %s", strerror(errno)); - } else { - /* if we have shutdown listeners, accept4 could fail, and we - needn't notify users */ - } - gpr_mu_unlock(&sp->server->mu); - goto error; - } - } - - grpc_set_socket_no_sigpipe_if_possible(fd); - - addr_str = grpc_sockaddr_to_uri(&addr); - gpr_asprintf(&name, "tcp-server-connection:%s", addr_str); - - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "SERVER_CONNECT: incoming connection: %s", addr_str); - } - - grpc_fd *fdobj = grpc_fd_create(fd, name); - - grpc_pollset_add_fd(exec_ctx, read_notifier_pollset, fdobj); - - // Create acceptor. - grpc_tcp_server_acceptor *acceptor = - (grpc_tcp_server_acceptor *)gpr_malloc(sizeof(*acceptor)); - acceptor->from_server = sp->server; - acceptor->port_index = sp->port_index; - acceptor->fd_index = sp->fd_index; - - sp->server->on_accept_cb( - exec_ctx, sp->server->on_accept_cb_arg, - grpc_tcp_create(exec_ctx, fdobj, sp->server->channel_args, addr_str), - read_notifier_pollset, acceptor); - - gpr_free(name); - gpr_free(addr_str); - } - - GPR_UNREACHABLE_CODE(return ); - -error: - gpr_mu_lock(&sp->server->mu); - if (0 == --sp->server->active_ports && sp->server->shutdown) { - gpr_mu_unlock(&sp->server->mu); - deactivated_all_ports(exec_ctx, sp->server); - } else { - gpr_mu_unlock(&sp->server->mu); - } -} - -/* Treat :: or 0.0.0.0 as a family-agnostic wildcard. */ -static grpc_error *add_wildcard_addrs_to_server(grpc_tcp_server *s, - unsigned port_index, - int requested_port, - int *out_port) { - grpc_resolved_address wild4; - grpc_resolved_address wild6; - unsigned fd_index = 0; - grpc_dualstack_mode dsmode; - grpc_tcp_listener *sp = NULL; - grpc_tcp_listener *sp2 = NULL; - grpc_error *v6_err = GRPC_ERROR_NONE; - grpc_error *v4_err = GRPC_ERROR_NONE; - *out_port = -1; - - if (grpc_tcp_server_have_ifaddrs() && s->expand_wildcard_addrs) { - return grpc_tcp_server_add_all_local_addrs(s, port_index, requested_port, - out_port); - } - - grpc_sockaddr_make_wildcards(requested_port, &wild4, &wild6); - /* Try listening on IPv6 first. */ - if ((v6_err = grpc_tcp_server_add_addr(s, &wild6, port_index, fd_index, - &dsmode, &sp)) == GRPC_ERROR_NONE) { - ++fd_index; - requested_port = *out_port = sp->port; - if (dsmode == GRPC_DSMODE_DUALSTACK || dsmode == GRPC_DSMODE_IPV4) { - return GRPC_ERROR_NONE; - } - } - /* If we got a v6-only socket or nothing, try adding 0.0.0.0. */ - grpc_sockaddr_set_port(&wild4, requested_port); - if ((v4_err = grpc_tcp_server_add_addr(s, &wild4, port_index, fd_index, - &dsmode, &sp2)) == GRPC_ERROR_NONE) { - *out_port = sp2->port; - if (sp != NULL) { - sp2->is_sibling = 1; - sp->sibling = sp2; - } - } - if (*out_port > 0) { - if (v6_err != GRPC_ERROR_NONE) { - gpr_log(GPR_INFO, - "Failed to add :: listener, " - "the environment may not support IPv6: %s", - grpc_error_string(v6_err)); - GRPC_ERROR_UNREF(v6_err); - } - if (v4_err != GRPC_ERROR_NONE) { - gpr_log(GPR_INFO, - "Failed to add 0.0.0.0 listener, " - "the environment may not support IPv4: %s", - grpc_error_string(v4_err)); - GRPC_ERROR_UNREF(v4_err); - } - return GRPC_ERROR_NONE; - } else { - grpc_error *root_err = GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "Failed to add any wildcard listeners"); - GPR_ASSERT(v6_err != GRPC_ERROR_NONE && v4_err != GRPC_ERROR_NONE); - root_err = grpc_error_add_child(root_err, v6_err); - root_err = grpc_error_add_child(root_err, v4_err); - return root_err; - } -} - -static grpc_error *clone_port(grpc_tcp_listener *listener, unsigned count) { - grpc_tcp_listener *sp = NULL; - char *addr_str; - char *name; - grpc_error *err; - - for (grpc_tcp_listener *l = listener->next; l && l->is_sibling; l = l->next) { - l->fd_index += count; - } - - for (unsigned i = 0; i < count; i++) { - int fd = -1; - int port = -1; - grpc_dualstack_mode dsmode; - err = grpc_create_dualstack_socket(&listener->addr, SOCK_STREAM, 0, &dsmode, - &fd); - if (err != GRPC_ERROR_NONE) return err; - err = grpc_tcp_server_prepare_socket(fd, &listener->addr, true, &port); - if (err != GRPC_ERROR_NONE) return err; - listener->server->nports++; - grpc_sockaddr_to_string(&addr_str, &listener->addr, 1); - gpr_asprintf(&name, "tcp-server-listener:%s/clone-%d", addr_str, i); - sp = (grpc_tcp_listener *)gpr_malloc(sizeof(grpc_tcp_listener)); - sp->next = listener->next; - listener->next = sp; - /* sp (the new listener) is a sibling of 'listener' (the original - listener). */ - sp->is_sibling = 1; - sp->sibling = listener->sibling; - listener->sibling = sp; - sp->server = listener->server; - sp->fd = fd; - sp->emfd = grpc_fd_create(fd, name); - memcpy(&sp->addr, &listener->addr, sizeof(grpc_resolved_address)); - sp->port = port; - sp->port_index = listener->port_index; - sp->fd_index = listener->fd_index + count - i; - GPR_ASSERT(sp->emfd); - while (listener->server->tail->next != NULL) { - listener->server->tail = listener->server->tail->next; - } - gpr_free(addr_str); - gpr_free(name); - } - - return GRPC_ERROR_NONE; -} - -grpc_error *grpc_tcp_server_add_port(grpc_tcp_server *s, - const grpc_resolved_address *addr, - int *out_port) { - grpc_tcp_listener *sp; - grpc_resolved_address sockname_temp; - grpc_resolved_address addr6_v4mapped; - int requested_port = grpc_sockaddr_get_port(addr); - unsigned port_index = 0; - grpc_dualstack_mode dsmode; - grpc_error *err; - *out_port = -1; - if (s->tail != NULL) { - port_index = s->tail->port_index + 1; - } - grpc_unlink_if_unix_domain_socket(addr); - - /* Check if this is a wildcard port, and if so, try to keep the port the same - as some previously created listener. */ - if (requested_port == 0) { - for (sp = s->head; sp; sp = sp->next) { - sockname_temp.len = sizeof(struct sockaddr_storage); - if (0 == getsockname(sp->fd, (struct sockaddr *)&sockname_temp.addr, - (socklen_t *)&sockname_temp.len)) { - int used_port = grpc_sockaddr_get_port(&sockname_temp); - if (used_port > 0) { - memcpy(&sockname_temp, addr, sizeof(grpc_resolved_address)); - grpc_sockaddr_set_port(&sockname_temp, used_port); - requested_port = used_port; - addr = &sockname_temp; - break; - } - } - } - } - if (grpc_sockaddr_is_wildcard(addr, &requested_port)) { - return add_wildcard_addrs_to_server(s, port_index, requested_port, - out_port); - } - if (grpc_sockaddr_to_v4mapped(addr, &addr6_v4mapped)) { - addr = &addr6_v4mapped; - } - if ((err = grpc_tcp_server_add_addr(s, addr, port_index, 0, &dsmode, &sp)) == - GRPC_ERROR_NONE) { - *out_port = sp->port; - } - return err; -} - -/* Return listener at port_index or NULL. Should only be called with s->mu - locked. */ -static grpc_tcp_listener *get_port_index(grpc_tcp_server *s, - unsigned port_index) { - unsigned num_ports = 0; - grpc_tcp_listener *sp; - for (sp = s->head; sp; sp = sp->next) { - if (!sp->is_sibling) { - if (++num_ports > port_index) { - return sp; - } - } - } - return NULL; -} - -unsigned grpc_tcp_server_port_fd_count(grpc_tcp_server *s, - unsigned port_index) { - unsigned num_fds = 0; - gpr_mu_lock(&s->mu); - grpc_tcp_listener *sp = get_port_index(s, port_index); - for (; sp; sp = sp->sibling) { - ++num_fds; - } - gpr_mu_unlock(&s->mu); - return num_fds; -} - -int grpc_tcp_server_port_fd(grpc_tcp_server *s, unsigned port_index, - unsigned fd_index) { - gpr_mu_lock(&s->mu); - grpc_tcp_listener *sp = get_port_index(s, port_index); - for (; sp; sp = sp->sibling, --fd_index) { - if (fd_index == 0) { - gpr_mu_unlock(&s->mu); - return sp->fd; - } - } - gpr_mu_unlock(&s->mu); - return -1; -} - -void grpc_tcp_server_start(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s, - grpc_pollset **pollsets, size_t pollset_count, - grpc_tcp_server_cb on_accept_cb, - void *on_accept_cb_arg) { - size_t i; - grpc_tcp_listener *sp; - GPR_ASSERT(on_accept_cb); - gpr_mu_lock(&s->mu); - GPR_ASSERT(!s->on_accept_cb); - GPR_ASSERT(s->active_ports == 0); - s->on_accept_cb = on_accept_cb; - s->on_accept_cb_arg = on_accept_cb_arg; - s->pollsets = pollsets; - s->pollset_count = pollset_count; - sp = s->head; - while (sp != NULL) { - if (s->so_reuseport && !grpc_is_unix_socket(&sp->addr) && - pollset_count > 1) { - GPR_ASSERT(GRPC_LOG_IF_ERROR( - "clone_port", clone_port(sp, (unsigned)(pollset_count - 1)))); - for (i = 0; i < pollset_count; i++) { - grpc_pollset_add_fd(exec_ctx, pollsets[i], sp->emfd); - GRPC_CLOSURE_INIT(&sp->read_closure, on_read, sp, - grpc_schedule_on_exec_ctx); - grpc_fd_notify_on_read(exec_ctx, sp->emfd, &sp->read_closure); - s->active_ports++; - sp = sp->next; - } - } else { - for (i = 0; i < pollset_count; i++) { - grpc_pollset_add_fd(exec_ctx, pollsets[i], sp->emfd); - } - GRPC_CLOSURE_INIT(&sp->read_closure, on_read, sp, - grpc_schedule_on_exec_ctx); - grpc_fd_notify_on_read(exec_ctx, sp->emfd, &sp->read_closure); - s->active_ports++; - sp = sp->next; - } - } - gpr_mu_unlock(&s->mu); -} - -grpc_tcp_server *grpc_tcp_server_ref(grpc_tcp_server *s) { - gpr_ref_non_zero(&s->refs); - return s; -} - -void grpc_tcp_server_shutdown_starting_add(grpc_tcp_server *s, - grpc_closure *shutdown_starting) { - gpr_mu_lock(&s->mu); - grpc_closure_list_append(&s->shutdown_starting, shutdown_starting, - GRPC_ERROR_NONE); - gpr_mu_unlock(&s->mu); -} - -void grpc_tcp_server_unref(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) { - if (gpr_unref(&s->refs)) { - grpc_tcp_server_shutdown_listeners(exec_ctx, s); - gpr_mu_lock(&s->mu); - GRPC_CLOSURE_LIST_SCHED(exec_ctx, &s->shutdown_starting); - gpr_mu_unlock(&s->mu); - tcp_server_destroy(exec_ctx, s); - } -} - -void grpc_tcp_server_shutdown_listeners(grpc_exec_ctx *exec_ctx, - grpc_tcp_server *s) { - gpr_mu_lock(&s->mu); - s->shutdown_listeners = true; - /* shutdown all fd's */ - if (s->active_ports) { - grpc_tcp_listener *sp; - for (sp = s->head; sp; sp = sp->next) { - grpc_fd_shutdown(exec_ctx, sp->emfd, - GRPC_ERROR_CREATE_FROM_STATIC_STRING("Server shutdown")); - } - } - gpr_mu_unlock(&s->mu); -} - -#endif diff --git a/src/core/lib/iomgr/tcp_server_posix.cc b/src/core/lib/iomgr/tcp_server_posix.cc new file mode 100644 index 0000000000..06612d639c --- /dev/null +++ b/src/core/lib/iomgr/tcp_server_posix.cc @@ -0,0 +1,565 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +/* FIXME: "posix" files shouldn't be depending on _GNU_SOURCE */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_POSIX_SOCKET + +#include "src/core/lib/iomgr/tcp_server.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "src/core/lib/channel/channel_args.h" +#include "src/core/lib/iomgr/resolve_address.h" +#include "src/core/lib/iomgr/sockaddr.h" +#include "src/core/lib/iomgr/sockaddr_utils.h" +#include "src/core/lib/iomgr/socket_utils_posix.h" +#include "src/core/lib/iomgr/tcp_posix.h" +#include "src/core/lib/iomgr/tcp_server_utils_posix.h" +#include "src/core/lib/iomgr/unix_sockets_posix.h" +#include "src/core/lib/support/string.h" + +static gpr_once check_init = GPR_ONCE_INIT; +static bool has_so_reuseport = false; + +static void init(void) { +#ifndef GPR_MANYLINUX1 + int s = socket(AF_INET, SOCK_STREAM, 0); + if (s >= 0) { + has_so_reuseport = GRPC_LOG_IF_ERROR("check for SO_REUSEPORT", + grpc_set_socket_reuse_port(s, 1)); + close(s); + } +#endif +} + +grpc_error *grpc_tcp_server_create(grpc_exec_ctx *exec_ctx, + grpc_closure *shutdown_complete, + const grpc_channel_args *args, + grpc_tcp_server **server) { + gpr_once_init(&check_init, init); + + grpc_tcp_server *s = (grpc_tcp_server *)gpr_zalloc(sizeof(grpc_tcp_server)); + s->so_reuseport = has_so_reuseport; + s->expand_wildcard_addrs = false; + for (size_t i = 0; i < (args == NULL ? 0 : args->num_args); i++) { + if (0 == strcmp(GRPC_ARG_ALLOW_REUSEPORT, args->args[i].key)) { + if (args->args[i].type == GRPC_ARG_INTEGER) { + s->so_reuseport = + has_so_reuseport && (args->args[i].value.integer != 0); + } else { + gpr_free(s); + return GRPC_ERROR_CREATE_FROM_STATIC_STRING(GRPC_ARG_ALLOW_REUSEPORT + " must be an integer"); + } + } else if (0 == strcmp(GRPC_ARG_EXPAND_WILDCARD_ADDRS, args->args[i].key)) { + if (args->args[i].type == GRPC_ARG_INTEGER) { + s->expand_wildcard_addrs = (args->args[i].value.integer != 0); + } else { + gpr_free(s); + return GRPC_ERROR_CREATE_FROM_STATIC_STRING( + GRPC_ARG_EXPAND_WILDCARD_ADDRS " must be an integer"); + } + } + } + gpr_ref_init(&s->refs, 1); + gpr_mu_init(&s->mu); + s->active_ports = 0; + s->destroyed_ports = 0; + s->shutdown = false; + s->shutdown_starting.head = NULL; + s->shutdown_starting.tail = NULL; + s->shutdown_complete = shutdown_complete; + s->on_accept_cb = NULL; + s->on_accept_cb_arg = NULL; + s->head = NULL; + s->tail = NULL; + s->nports = 0; + s->channel_args = grpc_channel_args_copy(args); + gpr_atm_no_barrier_store(&s->next_pollset_to_assign, 0); + *server = s; + return GRPC_ERROR_NONE; +} + +static void finish_shutdown(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) { + gpr_mu_lock(&s->mu); + GPR_ASSERT(s->shutdown); + gpr_mu_unlock(&s->mu); + if (s->shutdown_complete != NULL) { + GRPC_CLOSURE_SCHED(exec_ctx, s->shutdown_complete, GRPC_ERROR_NONE); + } + + gpr_mu_destroy(&s->mu); + + while (s->head) { + grpc_tcp_listener *sp = s->head; + s->head = sp->next; + gpr_free(sp); + } + grpc_channel_args_destroy(exec_ctx, s->channel_args); + + gpr_free(s); +} + +static void destroyed_port(grpc_exec_ctx *exec_ctx, void *server, + grpc_error *error) { + grpc_tcp_server *s = (grpc_tcp_server *)server; + gpr_mu_lock(&s->mu); + s->destroyed_ports++; + if (s->destroyed_ports == s->nports) { + gpr_mu_unlock(&s->mu); + finish_shutdown(exec_ctx, s); + } else { + GPR_ASSERT(s->destroyed_ports < s->nports); + gpr_mu_unlock(&s->mu); + } +} + +/* called when all listening endpoints have been shutdown, so no further + events will be received on them - at this point it's safe to destroy + things */ +static void deactivated_all_ports(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) { + /* delete ALL the things */ + gpr_mu_lock(&s->mu); + + GPR_ASSERT(s->shutdown); + + if (s->head) { + grpc_tcp_listener *sp; + for (sp = s->head; sp; sp = sp->next) { + grpc_unlink_if_unix_domain_socket(&sp->addr); + GRPC_CLOSURE_INIT(&sp->destroyed_closure, destroyed_port, s, + grpc_schedule_on_exec_ctx); + grpc_fd_orphan(exec_ctx, sp->emfd, &sp->destroyed_closure, NULL, + false /* already_closed */, "tcp_listener_shutdown"); + } + gpr_mu_unlock(&s->mu); + } else { + gpr_mu_unlock(&s->mu); + finish_shutdown(exec_ctx, s); + } +} + +static void tcp_server_destroy(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) { + gpr_mu_lock(&s->mu); + + GPR_ASSERT(!s->shutdown); + s->shutdown = true; + + /* shutdown all fd's */ + if (s->active_ports) { + grpc_tcp_listener *sp; + for (sp = s->head; sp; sp = sp->next) { + grpc_fd_shutdown(exec_ctx, sp->emfd, GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "Server destroyed")); + } + gpr_mu_unlock(&s->mu); + } else { + gpr_mu_unlock(&s->mu); + deactivated_all_ports(exec_ctx, s); + } +} + +/* event manager callback when reads are ready */ +static void on_read(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *err) { + grpc_tcp_listener *sp = (grpc_tcp_listener *)arg; + grpc_pollset *read_notifier_pollset; + if (err != GRPC_ERROR_NONE) { + goto error; + } + + read_notifier_pollset = + sp->server->pollsets[(size_t)gpr_atm_no_barrier_fetch_add( + &sp->server->next_pollset_to_assign, 1) % + sp->server->pollset_count]; + + /* loop until accept4 returns EAGAIN, and then re-arm notification */ + for (;;) { + grpc_resolved_address addr; + char *addr_str; + char *name; + addr.len = sizeof(struct sockaddr_storage); + /* Note: If we ever decide to return this address to the user, remember to + strip off the ::ffff:0.0.0.0/96 prefix first. */ + int fd = grpc_accept4(sp->fd, &addr, 1, 1); + if (fd < 0) { + switch (errno) { + case EINTR: + continue; + case EAGAIN: + grpc_fd_notify_on_read(exec_ctx, sp->emfd, &sp->read_closure); + return; + default: + gpr_mu_lock(&sp->server->mu); + if (!sp->server->shutdown_listeners) { + gpr_log(GPR_ERROR, "Failed accept4: %s", strerror(errno)); + } else { + /* if we have shutdown listeners, accept4 could fail, and we + needn't notify users */ + } + gpr_mu_unlock(&sp->server->mu); + goto error; + } + } + + grpc_set_socket_no_sigpipe_if_possible(fd); + + addr_str = grpc_sockaddr_to_uri(&addr); + gpr_asprintf(&name, "tcp-server-connection:%s", addr_str); + + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "SERVER_CONNECT: incoming connection: %s", addr_str); + } + + grpc_fd *fdobj = grpc_fd_create(fd, name); + + grpc_pollset_add_fd(exec_ctx, read_notifier_pollset, fdobj); + + // Create acceptor. + grpc_tcp_server_acceptor *acceptor = + (grpc_tcp_server_acceptor *)gpr_malloc(sizeof(*acceptor)); + acceptor->from_server = sp->server; + acceptor->port_index = sp->port_index; + acceptor->fd_index = sp->fd_index; + + sp->server->on_accept_cb( + exec_ctx, sp->server->on_accept_cb_arg, + grpc_tcp_create(exec_ctx, fdobj, sp->server->channel_args, addr_str), + read_notifier_pollset, acceptor); + + gpr_free(name); + gpr_free(addr_str); + } + + GPR_UNREACHABLE_CODE(return ); + +error: + gpr_mu_lock(&sp->server->mu); + if (0 == --sp->server->active_ports && sp->server->shutdown) { + gpr_mu_unlock(&sp->server->mu); + deactivated_all_ports(exec_ctx, sp->server); + } else { + gpr_mu_unlock(&sp->server->mu); + } +} + +/* Treat :: or 0.0.0.0 as a family-agnostic wildcard. */ +static grpc_error *add_wildcard_addrs_to_server(grpc_tcp_server *s, + unsigned port_index, + int requested_port, + int *out_port) { + grpc_resolved_address wild4; + grpc_resolved_address wild6; + unsigned fd_index = 0; + grpc_dualstack_mode dsmode; + grpc_tcp_listener *sp = NULL; + grpc_tcp_listener *sp2 = NULL; + grpc_error *v6_err = GRPC_ERROR_NONE; + grpc_error *v4_err = GRPC_ERROR_NONE; + *out_port = -1; + + if (grpc_tcp_server_have_ifaddrs() && s->expand_wildcard_addrs) { + return grpc_tcp_server_add_all_local_addrs(s, port_index, requested_port, + out_port); + } + + grpc_sockaddr_make_wildcards(requested_port, &wild4, &wild6); + /* Try listening on IPv6 first. */ + if ((v6_err = grpc_tcp_server_add_addr(s, &wild6, port_index, fd_index, + &dsmode, &sp)) == GRPC_ERROR_NONE) { + ++fd_index; + requested_port = *out_port = sp->port; + if (dsmode == GRPC_DSMODE_DUALSTACK || dsmode == GRPC_DSMODE_IPV4) { + return GRPC_ERROR_NONE; + } + } + /* If we got a v6-only socket or nothing, try adding 0.0.0.0. */ + grpc_sockaddr_set_port(&wild4, requested_port); + if ((v4_err = grpc_tcp_server_add_addr(s, &wild4, port_index, fd_index, + &dsmode, &sp2)) == GRPC_ERROR_NONE) { + *out_port = sp2->port; + if (sp != NULL) { + sp2->is_sibling = 1; + sp->sibling = sp2; + } + } + if (*out_port > 0) { + if (v6_err != GRPC_ERROR_NONE) { + gpr_log(GPR_INFO, + "Failed to add :: listener, " + "the environment may not support IPv6: %s", + grpc_error_string(v6_err)); + GRPC_ERROR_UNREF(v6_err); + } + if (v4_err != GRPC_ERROR_NONE) { + gpr_log(GPR_INFO, + "Failed to add 0.0.0.0 listener, " + "the environment may not support IPv4: %s", + grpc_error_string(v4_err)); + GRPC_ERROR_UNREF(v4_err); + } + return GRPC_ERROR_NONE; + } else { + grpc_error *root_err = GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "Failed to add any wildcard listeners"); + GPR_ASSERT(v6_err != GRPC_ERROR_NONE && v4_err != GRPC_ERROR_NONE); + root_err = grpc_error_add_child(root_err, v6_err); + root_err = grpc_error_add_child(root_err, v4_err); + return root_err; + } +} + +static grpc_error *clone_port(grpc_tcp_listener *listener, unsigned count) { + grpc_tcp_listener *sp = NULL; + char *addr_str; + char *name; + grpc_error *err; + + for (grpc_tcp_listener *l = listener->next; l && l->is_sibling; l = l->next) { + l->fd_index += count; + } + + for (unsigned i = 0; i < count; i++) { + int fd = -1; + int port = -1; + grpc_dualstack_mode dsmode; + err = grpc_create_dualstack_socket(&listener->addr, SOCK_STREAM, 0, &dsmode, + &fd); + if (err != GRPC_ERROR_NONE) return err; + err = grpc_tcp_server_prepare_socket(fd, &listener->addr, true, &port); + if (err != GRPC_ERROR_NONE) return err; + listener->server->nports++; + grpc_sockaddr_to_string(&addr_str, &listener->addr, 1); + gpr_asprintf(&name, "tcp-server-listener:%s/clone-%d", addr_str, i); + sp = (grpc_tcp_listener *)gpr_malloc(sizeof(grpc_tcp_listener)); + sp->next = listener->next; + listener->next = sp; + /* sp (the new listener) is a sibling of 'listener' (the original + listener). */ + sp->is_sibling = 1; + sp->sibling = listener->sibling; + listener->sibling = sp; + sp->server = listener->server; + sp->fd = fd; + sp->emfd = grpc_fd_create(fd, name); + memcpy(&sp->addr, &listener->addr, sizeof(grpc_resolved_address)); + sp->port = port; + sp->port_index = listener->port_index; + sp->fd_index = listener->fd_index + count - i; + GPR_ASSERT(sp->emfd); + while (listener->server->tail->next != NULL) { + listener->server->tail = listener->server->tail->next; + } + gpr_free(addr_str); + gpr_free(name); + } + + return GRPC_ERROR_NONE; +} + +grpc_error *grpc_tcp_server_add_port(grpc_tcp_server *s, + const grpc_resolved_address *addr, + int *out_port) { + grpc_tcp_listener *sp; + grpc_resolved_address sockname_temp; + grpc_resolved_address addr6_v4mapped; + int requested_port = grpc_sockaddr_get_port(addr); + unsigned port_index = 0; + grpc_dualstack_mode dsmode; + grpc_error *err; + *out_port = -1; + if (s->tail != NULL) { + port_index = s->tail->port_index + 1; + } + grpc_unlink_if_unix_domain_socket(addr); + + /* Check if this is a wildcard port, and if so, try to keep the port the same + as some previously created listener. */ + if (requested_port == 0) { + for (sp = s->head; sp; sp = sp->next) { + sockname_temp.len = sizeof(struct sockaddr_storage); + if (0 == getsockname(sp->fd, (struct sockaddr *)&sockname_temp.addr, + (socklen_t *)&sockname_temp.len)) { + int used_port = grpc_sockaddr_get_port(&sockname_temp); + if (used_port > 0) { + memcpy(&sockname_temp, addr, sizeof(grpc_resolved_address)); + grpc_sockaddr_set_port(&sockname_temp, used_port); + requested_port = used_port; + addr = &sockname_temp; + break; + } + } + } + } + if (grpc_sockaddr_is_wildcard(addr, &requested_port)) { + return add_wildcard_addrs_to_server(s, port_index, requested_port, + out_port); + } + if (grpc_sockaddr_to_v4mapped(addr, &addr6_v4mapped)) { + addr = &addr6_v4mapped; + } + if ((err = grpc_tcp_server_add_addr(s, addr, port_index, 0, &dsmode, &sp)) == + GRPC_ERROR_NONE) { + *out_port = sp->port; + } + return err; +} + +/* Return listener at port_index or NULL. Should only be called with s->mu + locked. */ +static grpc_tcp_listener *get_port_index(grpc_tcp_server *s, + unsigned port_index) { + unsigned num_ports = 0; + grpc_tcp_listener *sp; + for (sp = s->head; sp; sp = sp->next) { + if (!sp->is_sibling) { + if (++num_ports > port_index) { + return sp; + } + } + } + return NULL; +} + +unsigned grpc_tcp_server_port_fd_count(grpc_tcp_server *s, + unsigned port_index) { + unsigned num_fds = 0; + gpr_mu_lock(&s->mu); + grpc_tcp_listener *sp = get_port_index(s, port_index); + for (; sp; sp = sp->sibling) { + ++num_fds; + } + gpr_mu_unlock(&s->mu); + return num_fds; +} + +int grpc_tcp_server_port_fd(grpc_tcp_server *s, unsigned port_index, + unsigned fd_index) { + gpr_mu_lock(&s->mu); + grpc_tcp_listener *sp = get_port_index(s, port_index); + for (; sp; sp = sp->sibling, --fd_index) { + if (fd_index == 0) { + gpr_mu_unlock(&s->mu); + return sp->fd; + } + } + gpr_mu_unlock(&s->mu); + return -1; +} + +void grpc_tcp_server_start(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s, + grpc_pollset **pollsets, size_t pollset_count, + grpc_tcp_server_cb on_accept_cb, + void *on_accept_cb_arg) { + size_t i; + grpc_tcp_listener *sp; + GPR_ASSERT(on_accept_cb); + gpr_mu_lock(&s->mu); + GPR_ASSERT(!s->on_accept_cb); + GPR_ASSERT(s->active_ports == 0); + s->on_accept_cb = on_accept_cb; + s->on_accept_cb_arg = on_accept_cb_arg; + s->pollsets = pollsets; + s->pollset_count = pollset_count; + sp = s->head; + while (sp != NULL) { + if (s->so_reuseport && !grpc_is_unix_socket(&sp->addr) && + pollset_count > 1) { + GPR_ASSERT(GRPC_LOG_IF_ERROR( + "clone_port", clone_port(sp, (unsigned)(pollset_count - 1)))); + for (i = 0; i < pollset_count; i++) { + grpc_pollset_add_fd(exec_ctx, pollsets[i], sp->emfd); + GRPC_CLOSURE_INIT(&sp->read_closure, on_read, sp, + grpc_schedule_on_exec_ctx); + grpc_fd_notify_on_read(exec_ctx, sp->emfd, &sp->read_closure); + s->active_ports++; + sp = sp->next; + } + } else { + for (i = 0; i < pollset_count; i++) { + grpc_pollset_add_fd(exec_ctx, pollsets[i], sp->emfd); + } + GRPC_CLOSURE_INIT(&sp->read_closure, on_read, sp, + grpc_schedule_on_exec_ctx); + grpc_fd_notify_on_read(exec_ctx, sp->emfd, &sp->read_closure); + s->active_ports++; + sp = sp->next; + } + } + gpr_mu_unlock(&s->mu); +} + +grpc_tcp_server *grpc_tcp_server_ref(grpc_tcp_server *s) { + gpr_ref_non_zero(&s->refs); + return s; +} + +void grpc_tcp_server_shutdown_starting_add(grpc_tcp_server *s, + grpc_closure *shutdown_starting) { + gpr_mu_lock(&s->mu); + grpc_closure_list_append(&s->shutdown_starting, shutdown_starting, + GRPC_ERROR_NONE); + gpr_mu_unlock(&s->mu); +} + +void grpc_tcp_server_unref(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) { + if (gpr_unref(&s->refs)) { + grpc_tcp_server_shutdown_listeners(exec_ctx, s); + gpr_mu_lock(&s->mu); + GRPC_CLOSURE_LIST_SCHED(exec_ctx, &s->shutdown_starting); + gpr_mu_unlock(&s->mu); + tcp_server_destroy(exec_ctx, s); + } +} + +void grpc_tcp_server_shutdown_listeners(grpc_exec_ctx *exec_ctx, + grpc_tcp_server *s) { + gpr_mu_lock(&s->mu); + s->shutdown_listeners = true; + /* shutdown all fd's */ + if (s->active_ports) { + grpc_tcp_listener *sp; + for (sp = s->head; sp; sp = sp->next) { + grpc_fd_shutdown(exec_ctx, sp->emfd, + GRPC_ERROR_CREATE_FROM_STATIC_STRING("Server shutdown")); + } + } + gpr_mu_unlock(&s->mu); +} + +#endif diff --git a/src/core/lib/iomgr/tcp_server_utils_posix_common.c b/src/core/lib/iomgr/tcp_server_utils_posix_common.c deleted file mode 100644 index a828bee074..0000000000 --- a/src/core/lib/iomgr/tcp_server_utils_posix_common.c +++ /dev/null @@ -1,206 +0,0 @@ -/* - * - * Copyright 2017 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_POSIX_SOCKET - -#include "src/core/lib/iomgr/tcp_server_utils_posix.h" - -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "src/core/lib/iomgr/error.h" -#include "src/core/lib/iomgr/sockaddr.h" -#include "src/core/lib/iomgr/sockaddr_utils.h" -#include "src/core/lib/iomgr/unix_sockets_posix.h" - -#define MIN_SAFE_ACCEPT_QUEUE_SIZE 100 - -static gpr_once s_init_max_accept_queue_size = GPR_ONCE_INIT; -static int s_max_accept_queue_size; - -/* get max listen queue size on linux */ -static void init_max_accept_queue_size(void) { - int n = SOMAXCONN; - char buf[64]; - FILE *fp = fopen("/proc/sys/net/core/somaxconn", "r"); - if (fp == NULL) { - /* 2.4 kernel. */ - s_max_accept_queue_size = SOMAXCONN; - return; - } - if (fgets(buf, sizeof buf, fp)) { - char *end; - long i = strtol(buf, &end, 10); - if (i > 0 && i <= INT_MAX && end && *end == 0) { - n = (int)i; - } - } - fclose(fp); - s_max_accept_queue_size = n; - - if (s_max_accept_queue_size < MIN_SAFE_ACCEPT_QUEUE_SIZE) { - gpr_log(GPR_INFO, - "Suspiciously small accept queue (%d) will probably lead to " - "connection drops", - s_max_accept_queue_size); - } -} - -static int get_max_accept_queue_size(void) { - gpr_once_init(&s_init_max_accept_queue_size, init_max_accept_queue_size); - return s_max_accept_queue_size; -} - -static grpc_error *add_socket_to_server(grpc_tcp_server *s, int fd, - const grpc_resolved_address *addr, - unsigned port_index, unsigned fd_index, - grpc_tcp_listener **listener) { - grpc_tcp_listener *sp = NULL; - int port = -1; - char *addr_str; - char *name; - - grpc_error *err = - grpc_tcp_server_prepare_socket(fd, addr, s->so_reuseport, &port); - if (err == GRPC_ERROR_NONE) { - GPR_ASSERT(port > 0); - grpc_sockaddr_to_string(&addr_str, addr, 1); - gpr_asprintf(&name, "tcp-server-listener:%s", addr_str); - gpr_mu_lock(&s->mu); - s->nports++; - GPR_ASSERT(!s->on_accept_cb && "must add ports before starting server"); - sp = (grpc_tcp_listener *)gpr_malloc(sizeof(grpc_tcp_listener)); - sp->next = NULL; - if (s->head == NULL) { - s->head = sp; - } else { - s->tail->next = sp; - } - s->tail = sp; - sp->server = s; - sp->fd = fd; - sp->emfd = grpc_fd_create(fd, name); - memcpy(&sp->addr, addr, sizeof(grpc_resolved_address)); - sp->port = port; - sp->port_index = port_index; - sp->fd_index = fd_index; - sp->is_sibling = 0; - sp->sibling = NULL; - GPR_ASSERT(sp->emfd); - gpr_mu_unlock(&s->mu); - gpr_free(addr_str); - gpr_free(name); - } - - *listener = sp; - return err; -} - -/* If successful, add a listener to s for addr, set *dsmode for the socket, and - return the *listener. */ -grpc_error *grpc_tcp_server_add_addr(grpc_tcp_server *s, - const grpc_resolved_address *addr, - unsigned port_index, unsigned fd_index, - grpc_dualstack_mode *dsmode, - grpc_tcp_listener **listener) { - grpc_resolved_address addr4_copy; - int fd; - grpc_error *err = - grpc_create_dualstack_socket(addr, SOCK_STREAM, 0, dsmode, &fd); - if (err != GRPC_ERROR_NONE) { - return err; - } - if (*dsmode == GRPC_DSMODE_IPV4 && - grpc_sockaddr_is_v4mapped(addr, &addr4_copy)) { - addr = &addr4_copy; - } - return add_socket_to_server(s, fd, addr, port_index, fd_index, listener); -} - -/* Prepare a recently-created socket for listening. */ -grpc_error *grpc_tcp_server_prepare_socket(int fd, - const grpc_resolved_address *addr, - bool so_reuseport, int *port) { - grpc_resolved_address sockname_temp; - grpc_error *err = GRPC_ERROR_NONE; - - GPR_ASSERT(fd >= 0); - - if (so_reuseport && !grpc_is_unix_socket(addr)) { - err = grpc_set_socket_reuse_port(fd, 1); - if (err != GRPC_ERROR_NONE) goto error; - } - - err = grpc_set_socket_nonblocking(fd, 1); - if (err != GRPC_ERROR_NONE) goto error; - err = grpc_set_socket_cloexec(fd, 1); - if (err != GRPC_ERROR_NONE) goto error; - if (!grpc_is_unix_socket(addr)) { - err = grpc_set_socket_low_latency(fd, 1); - if (err != GRPC_ERROR_NONE) goto error; - err = grpc_set_socket_reuse_addr(fd, 1); - if (err != GRPC_ERROR_NONE) goto error; - } - err = grpc_set_socket_no_sigpipe_if_possible(fd); - if (err != GRPC_ERROR_NONE) goto error; - - GPR_ASSERT(addr->len < ~(socklen_t)0); - if (bind(fd, (struct sockaddr *)addr->addr, (socklen_t)addr->len) < 0) { - err = GRPC_OS_ERROR(errno, "bind"); - goto error; - } - - if (listen(fd, get_max_accept_queue_size()) < 0) { - err = GRPC_OS_ERROR(errno, "listen"); - goto error; - } - - sockname_temp.len = sizeof(struct sockaddr_storage); - - if (getsockname(fd, (struct sockaddr *)sockname_temp.addr, - (socklen_t *)&sockname_temp.len) < 0) { - err = GRPC_OS_ERROR(errno, "getsockname"); - goto error; - } - - *port = grpc_sockaddr_get_port(&sockname_temp); - return GRPC_ERROR_NONE; - -error: - GPR_ASSERT(err != GRPC_ERROR_NONE); - if (fd >= 0) { - close(fd); - } - grpc_error *ret = - grpc_error_set_int(GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( - "Unable to configure socket", &err, 1), - GRPC_ERROR_INT_FD, fd); - GRPC_ERROR_UNREF(err); - return ret; -} - -#endif /* GRPC_POSIX_SOCKET */ diff --git a/src/core/lib/iomgr/tcp_server_utils_posix_common.cc b/src/core/lib/iomgr/tcp_server_utils_posix_common.cc new file mode 100644 index 0000000000..a828bee074 --- /dev/null +++ b/src/core/lib/iomgr/tcp_server_utils_posix_common.cc @@ -0,0 +1,206 @@ +/* + * + * Copyright 2017 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_POSIX_SOCKET + +#include "src/core/lib/iomgr/tcp_server_utils_posix.h" + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "src/core/lib/iomgr/error.h" +#include "src/core/lib/iomgr/sockaddr.h" +#include "src/core/lib/iomgr/sockaddr_utils.h" +#include "src/core/lib/iomgr/unix_sockets_posix.h" + +#define MIN_SAFE_ACCEPT_QUEUE_SIZE 100 + +static gpr_once s_init_max_accept_queue_size = GPR_ONCE_INIT; +static int s_max_accept_queue_size; + +/* get max listen queue size on linux */ +static void init_max_accept_queue_size(void) { + int n = SOMAXCONN; + char buf[64]; + FILE *fp = fopen("/proc/sys/net/core/somaxconn", "r"); + if (fp == NULL) { + /* 2.4 kernel. */ + s_max_accept_queue_size = SOMAXCONN; + return; + } + if (fgets(buf, sizeof buf, fp)) { + char *end; + long i = strtol(buf, &end, 10); + if (i > 0 && i <= INT_MAX && end && *end == 0) { + n = (int)i; + } + } + fclose(fp); + s_max_accept_queue_size = n; + + if (s_max_accept_queue_size < MIN_SAFE_ACCEPT_QUEUE_SIZE) { + gpr_log(GPR_INFO, + "Suspiciously small accept queue (%d) will probably lead to " + "connection drops", + s_max_accept_queue_size); + } +} + +static int get_max_accept_queue_size(void) { + gpr_once_init(&s_init_max_accept_queue_size, init_max_accept_queue_size); + return s_max_accept_queue_size; +} + +static grpc_error *add_socket_to_server(grpc_tcp_server *s, int fd, + const grpc_resolved_address *addr, + unsigned port_index, unsigned fd_index, + grpc_tcp_listener **listener) { + grpc_tcp_listener *sp = NULL; + int port = -1; + char *addr_str; + char *name; + + grpc_error *err = + grpc_tcp_server_prepare_socket(fd, addr, s->so_reuseport, &port); + if (err == GRPC_ERROR_NONE) { + GPR_ASSERT(port > 0); + grpc_sockaddr_to_string(&addr_str, addr, 1); + gpr_asprintf(&name, "tcp-server-listener:%s", addr_str); + gpr_mu_lock(&s->mu); + s->nports++; + GPR_ASSERT(!s->on_accept_cb && "must add ports before starting server"); + sp = (grpc_tcp_listener *)gpr_malloc(sizeof(grpc_tcp_listener)); + sp->next = NULL; + if (s->head == NULL) { + s->head = sp; + } else { + s->tail->next = sp; + } + s->tail = sp; + sp->server = s; + sp->fd = fd; + sp->emfd = grpc_fd_create(fd, name); + memcpy(&sp->addr, addr, sizeof(grpc_resolved_address)); + sp->port = port; + sp->port_index = port_index; + sp->fd_index = fd_index; + sp->is_sibling = 0; + sp->sibling = NULL; + GPR_ASSERT(sp->emfd); + gpr_mu_unlock(&s->mu); + gpr_free(addr_str); + gpr_free(name); + } + + *listener = sp; + return err; +} + +/* If successful, add a listener to s for addr, set *dsmode for the socket, and + return the *listener. */ +grpc_error *grpc_tcp_server_add_addr(grpc_tcp_server *s, + const grpc_resolved_address *addr, + unsigned port_index, unsigned fd_index, + grpc_dualstack_mode *dsmode, + grpc_tcp_listener **listener) { + grpc_resolved_address addr4_copy; + int fd; + grpc_error *err = + grpc_create_dualstack_socket(addr, SOCK_STREAM, 0, dsmode, &fd); + if (err != GRPC_ERROR_NONE) { + return err; + } + if (*dsmode == GRPC_DSMODE_IPV4 && + grpc_sockaddr_is_v4mapped(addr, &addr4_copy)) { + addr = &addr4_copy; + } + return add_socket_to_server(s, fd, addr, port_index, fd_index, listener); +} + +/* Prepare a recently-created socket for listening. */ +grpc_error *grpc_tcp_server_prepare_socket(int fd, + const grpc_resolved_address *addr, + bool so_reuseport, int *port) { + grpc_resolved_address sockname_temp; + grpc_error *err = GRPC_ERROR_NONE; + + GPR_ASSERT(fd >= 0); + + if (so_reuseport && !grpc_is_unix_socket(addr)) { + err = grpc_set_socket_reuse_port(fd, 1); + if (err != GRPC_ERROR_NONE) goto error; + } + + err = grpc_set_socket_nonblocking(fd, 1); + if (err != GRPC_ERROR_NONE) goto error; + err = grpc_set_socket_cloexec(fd, 1); + if (err != GRPC_ERROR_NONE) goto error; + if (!grpc_is_unix_socket(addr)) { + err = grpc_set_socket_low_latency(fd, 1); + if (err != GRPC_ERROR_NONE) goto error; + err = grpc_set_socket_reuse_addr(fd, 1); + if (err != GRPC_ERROR_NONE) goto error; + } + err = grpc_set_socket_no_sigpipe_if_possible(fd); + if (err != GRPC_ERROR_NONE) goto error; + + GPR_ASSERT(addr->len < ~(socklen_t)0); + if (bind(fd, (struct sockaddr *)addr->addr, (socklen_t)addr->len) < 0) { + err = GRPC_OS_ERROR(errno, "bind"); + goto error; + } + + if (listen(fd, get_max_accept_queue_size()) < 0) { + err = GRPC_OS_ERROR(errno, "listen"); + goto error; + } + + sockname_temp.len = sizeof(struct sockaddr_storage); + + if (getsockname(fd, (struct sockaddr *)sockname_temp.addr, + (socklen_t *)&sockname_temp.len) < 0) { + err = GRPC_OS_ERROR(errno, "getsockname"); + goto error; + } + + *port = grpc_sockaddr_get_port(&sockname_temp); + return GRPC_ERROR_NONE; + +error: + GPR_ASSERT(err != GRPC_ERROR_NONE); + if (fd >= 0) { + close(fd); + } + grpc_error *ret = + grpc_error_set_int(GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( + "Unable to configure socket", &err, 1), + GRPC_ERROR_INT_FD, fd); + GRPC_ERROR_UNREF(err); + return ret; +} + +#endif /* GRPC_POSIX_SOCKET */ diff --git a/src/core/lib/iomgr/tcp_server_utils_posix_ifaddrs.c b/src/core/lib/iomgr/tcp_server_utils_posix_ifaddrs.c deleted file mode 100644 index a243b69f35..0000000000 --- a/src/core/lib/iomgr/tcp_server_utils_posix_ifaddrs.c +++ /dev/null @@ -1,181 +0,0 @@ -/* - * - * Copyright 2017 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_HAVE_IFADDRS - -#include "src/core/lib/iomgr/tcp_server_utils_posix.h" - -#include -#include -#include -#include - -#include -#include -#include - -#include "src/core/lib/iomgr/error.h" -#include "src/core/lib/iomgr/sockaddr.h" -#include "src/core/lib/iomgr/sockaddr_utils.h" - -/* Return the listener in s with address addr or NULL. */ -static grpc_tcp_listener *find_listener_with_addr(grpc_tcp_server *s, - grpc_resolved_address *addr) { - grpc_tcp_listener *l; - gpr_mu_lock(&s->mu); - for (l = s->head; l != NULL; l = l->next) { - if (l->addr.len != addr->len) { - continue; - } - if (memcmp(l->addr.addr, addr->addr, addr->len) == 0) { - break; - } - } - gpr_mu_unlock(&s->mu); - return l; -} - -/* Bind to "::" to get a port number not used by any address. */ -static grpc_error *get_unused_port(int *port) { - grpc_resolved_address wild; - grpc_sockaddr_make_wildcard6(0, &wild); - grpc_dualstack_mode dsmode; - int fd; - grpc_error *err = - grpc_create_dualstack_socket(&wild, SOCK_STREAM, 0, &dsmode, &fd); - if (err != GRPC_ERROR_NONE) { - return err; - } - if (dsmode == GRPC_DSMODE_IPV4) { - grpc_sockaddr_make_wildcard4(0, &wild); - } - if (bind(fd, (const struct sockaddr *)wild.addr, (socklen_t)wild.len) != 0) { - err = GRPC_OS_ERROR(errno, "bind"); - close(fd); - return err; - } - if (getsockname(fd, (struct sockaddr *)wild.addr, (socklen_t *)&wild.len) != - 0) { - err = GRPC_OS_ERROR(errno, "getsockname"); - close(fd); - return err; - } - close(fd); - *port = grpc_sockaddr_get_port(&wild); - return *port <= 0 ? GRPC_ERROR_CREATE_FROM_STATIC_STRING("Bad port") - : GRPC_ERROR_NONE; -} - -grpc_error *grpc_tcp_server_add_all_local_addrs(grpc_tcp_server *s, - unsigned port_index, - int requested_port, - int *out_port) { - struct ifaddrs *ifa = NULL; - struct ifaddrs *ifa_it; - unsigned fd_index = 0; - grpc_tcp_listener *sp = NULL; - grpc_error *err = GRPC_ERROR_NONE; - if (requested_port == 0) { - /* Note: There could be a race where some local addrs can listen on the - selected port and some can't. The sane way to handle this would be to - retry by recreating the whole grpc_tcp_server. Backing out individual - listeners and orphaning the FDs looks like too much trouble. */ - if ((err = get_unused_port(&requested_port)) != GRPC_ERROR_NONE) { - return err; - } else if (requested_port <= 0) { - return GRPC_ERROR_CREATE_FROM_STATIC_STRING("Bad get_unused_port()"); - } - gpr_log(GPR_DEBUG, "Picked unused port %d", requested_port); - } - if (getifaddrs(&ifa) != 0 || ifa == NULL) { - return GRPC_OS_ERROR(errno, "getifaddrs"); - } - for (ifa_it = ifa; ifa_it != NULL; ifa_it = ifa_it->ifa_next) { - grpc_resolved_address addr; - char *addr_str = NULL; - grpc_dualstack_mode dsmode; - grpc_tcp_listener *new_sp = NULL; - const char *ifa_name = (ifa_it->ifa_name ? ifa_it->ifa_name : ""); - if (ifa_it->ifa_addr == NULL) { - continue; - } else if (ifa_it->ifa_addr->sa_family == AF_INET) { - addr.len = sizeof(struct sockaddr_in); - } else if (ifa_it->ifa_addr->sa_family == AF_INET6) { - addr.len = sizeof(struct sockaddr_in6); - } else { - continue; - } - memcpy(addr.addr, ifa_it->ifa_addr, addr.len); - if (!grpc_sockaddr_set_port(&addr, requested_port)) { - /* Should never happen, because we check sa_family above. */ - err = GRPC_ERROR_CREATE_FROM_STATIC_STRING("Failed to set port"); - break; - } - if (grpc_sockaddr_to_string(&addr_str, &addr, 0) < 0) { - addr_str = gpr_strdup(""); - } - gpr_log(GPR_DEBUG, - "Adding local addr from interface %s flags 0x%x to server: %s", - ifa_name, ifa_it->ifa_flags, addr_str); - /* We could have multiple interfaces with the same address (e.g., bonding), - so look for duplicates. */ - if (find_listener_with_addr(s, &addr) != NULL) { - gpr_log(GPR_DEBUG, "Skipping duplicate addr %s on interface %s", addr_str, - ifa_name); - gpr_free(addr_str); - continue; - } - if ((err = grpc_tcp_server_add_addr(s, &addr, port_index, fd_index, &dsmode, - &new_sp)) != GRPC_ERROR_NONE) { - char *err_str = NULL; - grpc_error *root_err; - if (gpr_asprintf(&err_str, "Failed to add listener: %s", addr_str) < 0) { - err_str = gpr_strdup("Failed to add listener"); - } - root_err = GRPC_ERROR_CREATE_FROM_COPIED_STRING(err_str); - gpr_free(err_str); - gpr_free(addr_str); - err = grpc_error_add_child(root_err, err); - break; - } else { - GPR_ASSERT(requested_port == new_sp->port); - ++fd_index; - if (sp != NULL) { - new_sp->is_sibling = 1; - sp->sibling = new_sp; - } - sp = new_sp; - } - gpr_free(addr_str); - } - freeifaddrs(ifa); - if (err != GRPC_ERROR_NONE) { - return err; - } else if (sp == NULL) { - return GRPC_ERROR_CREATE_FROM_STATIC_STRING("No local addresses"); - } else { - *out_port = sp->port; - return GRPC_ERROR_NONE; - } -} - -bool grpc_tcp_server_have_ifaddrs(void) { return true; } - -#endif /* GRPC_HAVE_IFADDRS */ diff --git a/src/core/lib/iomgr/tcp_server_utils_posix_ifaddrs.cc b/src/core/lib/iomgr/tcp_server_utils_posix_ifaddrs.cc new file mode 100644 index 0000000000..a243b69f35 --- /dev/null +++ b/src/core/lib/iomgr/tcp_server_utils_posix_ifaddrs.cc @@ -0,0 +1,181 @@ +/* + * + * Copyright 2017 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_HAVE_IFADDRS + +#include "src/core/lib/iomgr/tcp_server_utils_posix.h" + +#include +#include +#include +#include + +#include +#include +#include + +#include "src/core/lib/iomgr/error.h" +#include "src/core/lib/iomgr/sockaddr.h" +#include "src/core/lib/iomgr/sockaddr_utils.h" + +/* Return the listener in s with address addr or NULL. */ +static grpc_tcp_listener *find_listener_with_addr(grpc_tcp_server *s, + grpc_resolved_address *addr) { + grpc_tcp_listener *l; + gpr_mu_lock(&s->mu); + for (l = s->head; l != NULL; l = l->next) { + if (l->addr.len != addr->len) { + continue; + } + if (memcmp(l->addr.addr, addr->addr, addr->len) == 0) { + break; + } + } + gpr_mu_unlock(&s->mu); + return l; +} + +/* Bind to "::" to get a port number not used by any address. */ +static grpc_error *get_unused_port(int *port) { + grpc_resolved_address wild; + grpc_sockaddr_make_wildcard6(0, &wild); + grpc_dualstack_mode dsmode; + int fd; + grpc_error *err = + grpc_create_dualstack_socket(&wild, SOCK_STREAM, 0, &dsmode, &fd); + if (err != GRPC_ERROR_NONE) { + return err; + } + if (dsmode == GRPC_DSMODE_IPV4) { + grpc_sockaddr_make_wildcard4(0, &wild); + } + if (bind(fd, (const struct sockaddr *)wild.addr, (socklen_t)wild.len) != 0) { + err = GRPC_OS_ERROR(errno, "bind"); + close(fd); + return err; + } + if (getsockname(fd, (struct sockaddr *)wild.addr, (socklen_t *)&wild.len) != + 0) { + err = GRPC_OS_ERROR(errno, "getsockname"); + close(fd); + return err; + } + close(fd); + *port = grpc_sockaddr_get_port(&wild); + return *port <= 0 ? GRPC_ERROR_CREATE_FROM_STATIC_STRING("Bad port") + : GRPC_ERROR_NONE; +} + +grpc_error *grpc_tcp_server_add_all_local_addrs(grpc_tcp_server *s, + unsigned port_index, + int requested_port, + int *out_port) { + struct ifaddrs *ifa = NULL; + struct ifaddrs *ifa_it; + unsigned fd_index = 0; + grpc_tcp_listener *sp = NULL; + grpc_error *err = GRPC_ERROR_NONE; + if (requested_port == 0) { + /* Note: There could be a race where some local addrs can listen on the + selected port and some can't. The sane way to handle this would be to + retry by recreating the whole grpc_tcp_server. Backing out individual + listeners and orphaning the FDs looks like too much trouble. */ + if ((err = get_unused_port(&requested_port)) != GRPC_ERROR_NONE) { + return err; + } else if (requested_port <= 0) { + return GRPC_ERROR_CREATE_FROM_STATIC_STRING("Bad get_unused_port()"); + } + gpr_log(GPR_DEBUG, "Picked unused port %d", requested_port); + } + if (getifaddrs(&ifa) != 0 || ifa == NULL) { + return GRPC_OS_ERROR(errno, "getifaddrs"); + } + for (ifa_it = ifa; ifa_it != NULL; ifa_it = ifa_it->ifa_next) { + grpc_resolved_address addr; + char *addr_str = NULL; + grpc_dualstack_mode dsmode; + grpc_tcp_listener *new_sp = NULL; + const char *ifa_name = (ifa_it->ifa_name ? ifa_it->ifa_name : ""); + if (ifa_it->ifa_addr == NULL) { + continue; + } else if (ifa_it->ifa_addr->sa_family == AF_INET) { + addr.len = sizeof(struct sockaddr_in); + } else if (ifa_it->ifa_addr->sa_family == AF_INET6) { + addr.len = sizeof(struct sockaddr_in6); + } else { + continue; + } + memcpy(addr.addr, ifa_it->ifa_addr, addr.len); + if (!grpc_sockaddr_set_port(&addr, requested_port)) { + /* Should never happen, because we check sa_family above. */ + err = GRPC_ERROR_CREATE_FROM_STATIC_STRING("Failed to set port"); + break; + } + if (grpc_sockaddr_to_string(&addr_str, &addr, 0) < 0) { + addr_str = gpr_strdup(""); + } + gpr_log(GPR_DEBUG, + "Adding local addr from interface %s flags 0x%x to server: %s", + ifa_name, ifa_it->ifa_flags, addr_str); + /* We could have multiple interfaces with the same address (e.g., bonding), + so look for duplicates. */ + if (find_listener_with_addr(s, &addr) != NULL) { + gpr_log(GPR_DEBUG, "Skipping duplicate addr %s on interface %s", addr_str, + ifa_name); + gpr_free(addr_str); + continue; + } + if ((err = grpc_tcp_server_add_addr(s, &addr, port_index, fd_index, &dsmode, + &new_sp)) != GRPC_ERROR_NONE) { + char *err_str = NULL; + grpc_error *root_err; + if (gpr_asprintf(&err_str, "Failed to add listener: %s", addr_str) < 0) { + err_str = gpr_strdup("Failed to add listener"); + } + root_err = GRPC_ERROR_CREATE_FROM_COPIED_STRING(err_str); + gpr_free(err_str); + gpr_free(addr_str); + err = grpc_error_add_child(root_err, err); + break; + } else { + GPR_ASSERT(requested_port == new_sp->port); + ++fd_index; + if (sp != NULL) { + new_sp->is_sibling = 1; + sp->sibling = new_sp; + } + sp = new_sp; + } + gpr_free(addr_str); + } + freeifaddrs(ifa); + if (err != GRPC_ERROR_NONE) { + return err; + } else if (sp == NULL) { + return GRPC_ERROR_CREATE_FROM_STATIC_STRING("No local addresses"); + } else { + *out_port = sp->port; + return GRPC_ERROR_NONE; + } +} + +bool grpc_tcp_server_have_ifaddrs(void) { return true; } + +#endif /* GRPC_HAVE_IFADDRS */ diff --git a/src/core/lib/iomgr/tcp_server_utils_posix_noifaddrs.c b/src/core/lib/iomgr/tcp_server_utils_posix_noifaddrs.c deleted file mode 100644 index 34eab20d6a..0000000000 --- a/src/core/lib/iomgr/tcp_server_utils_posix_noifaddrs.c +++ /dev/null @@ -1,34 +0,0 @@ -/* - * - * Copyright 2017 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#if defined(GRPC_POSIX_SOCKET) && !defined(GRPC_HAVE_IFADDRS) - -#include "src/core/lib/iomgr/tcp_server_utils_posix.h" - -grpc_error *grpc_tcp_server_add_all_local_addrs(grpc_tcp_server *s, - unsigned port_index, - int requested_port, - int *out_port) { - return GRPC_ERROR_CREATE_FROM_STATIC_STRING("no ifaddrs available"); -} - -bool grpc_tcp_server_have_ifaddrs(void) { return false; } - -#endif /* defined(GRPC_POSIX_SOCKET) && !defined(GRPC_HAVE_IFADDRS) */ diff --git a/src/core/lib/iomgr/tcp_server_utils_posix_noifaddrs.cc b/src/core/lib/iomgr/tcp_server_utils_posix_noifaddrs.cc new file mode 100644 index 0000000000..34eab20d6a --- /dev/null +++ b/src/core/lib/iomgr/tcp_server_utils_posix_noifaddrs.cc @@ -0,0 +1,34 @@ +/* + * + * Copyright 2017 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#if defined(GRPC_POSIX_SOCKET) && !defined(GRPC_HAVE_IFADDRS) + +#include "src/core/lib/iomgr/tcp_server_utils_posix.h" + +grpc_error *grpc_tcp_server_add_all_local_addrs(grpc_tcp_server *s, + unsigned port_index, + int requested_port, + int *out_port) { + return GRPC_ERROR_CREATE_FROM_STATIC_STRING("no ifaddrs available"); +} + +bool grpc_tcp_server_have_ifaddrs(void) { return false; } + +#endif /* defined(GRPC_POSIX_SOCKET) && !defined(GRPC_HAVE_IFADDRS) */ diff --git a/src/core/lib/iomgr/tcp_server_uv.c b/src/core/lib/iomgr/tcp_server_uv.c deleted file mode 100644 index 3b9332321f..0000000000 --- a/src/core/lib/iomgr/tcp_server_uv.c +++ /dev/null @@ -1,454 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_UV - -#include -#include - -#include -#include - -#include "src/core/lib/iomgr/error.h" -#include "src/core/lib/iomgr/exec_ctx.h" -#include "src/core/lib/iomgr/iomgr_uv.h" -#include "src/core/lib/iomgr/sockaddr.h" -#include "src/core/lib/iomgr/sockaddr_utils.h" -#include "src/core/lib/iomgr/tcp_server.h" -#include "src/core/lib/iomgr/tcp_uv.h" - -/* one listening port */ -typedef struct grpc_tcp_listener grpc_tcp_listener; -struct grpc_tcp_listener { - uv_tcp_t *handle; - grpc_tcp_server *server; - unsigned port_index; - int port; - /* linked list */ - struct grpc_tcp_listener *next; - - bool closed; - - bool has_pending_connection; -}; - -struct grpc_tcp_server { - gpr_refcount refs; - - /* Called whenever accept() succeeds on a server port. */ - grpc_tcp_server_cb on_accept_cb; - void *on_accept_cb_arg; - - int open_ports; - - /* linked list of server ports */ - grpc_tcp_listener *head; - grpc_tcp_listener *tail; - - /* List of closures passed to shutdown_starting_add(). */ - grpc_closure_list shutdown_starting; - - /* shutdown callback */ - grpc_closure *shutdown_complete; - - bool shutdown; - - grpc_resource_quota *resource_quota; -}; - -grpc_error *grpc_tcp_server_create(grpc_exec_ctx *exec_ctx, - grpc_closure *shutdown_complete, - const grpc_channel_args *args, - grpc_tcp_server **server) { - grpc_tcp_server *s = gpr_malloc(sizeof(grpc_tcp_server)); - s->resource_quota = grpc_resource_quota_create(NULL); - for (size_t i = 0; i < (args == NULL ? 0 : args->num_args); i++) { - if (0 == strcmp(GRPC_ARG_RESOURCE_QUOTA, args->args[i].key)) { - if (args->args[i].type == GRPC_ARG_POINTER) { - grpc_resource_quota_unref_internal(exec_ctx, s->resource_quota); - s->resource_quota = - grpc_resource_quota_ref_internal(args->args[i].value.pointer.p); - } else { - grpc_resource_quota_unref_internal(exec_ctx, s->resource_quota); - gpr_free(s); - return GRPC_ERROR_CREATE_FROM_STATIC_STRING( - GRPC_ARG_RESOURCE_QUOTA " must be a pointer to a buffer pool"); - } - } - } - gpr_ref_init(&s->refs, 1); - s->on_accept_cb = NULL; - s->on_accept_cb_arg = NULL; - s->open_ports = 0; - s->head = NULL; - s->tail = NULL; - s->shutdown_starting.head = NULL; - s->shutdown_starting.tail = NULL; - s->shutdown_complete = shutdown_complete; - s->shutdown = false; - *server = s; - return GRPC_ERROR_NONE; -} - -grpc_tcp_server *grpc_tcp_server_ref(grpc_tcp_server *s) { - GRPC_UV_ASSERT_SAME_THREAD(); - gpr_ref(&s->refs); - return s; -} - -void grpc_tcp_server_shutdown_starting_add(grpc_tcp_server *s, - grpc_closure *shutdown_starting) { - grpc_closure_list_append(&s->shutdown_starting, shutdown_starting, - GRPC_ERROR_NONE); -} - -static void finish_shutdown(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) { - GPR_ASSERT(s->shutdown); - if (s->shutdown_complete != NULL) { - GRPC_CLOSURE_SCHED(exec_ctx, s->shutdown_complete, GRPC_ERROR_NONE); - } - - while (s->head) { - grpc_tcp_listener *sp = s->head; - s->head = sp->next; - sp->next = NULL; - gpr_free(sp->handle); - gpr_free(sp); - } - grpc_resource_quota_unref_internal(exec_ctx, s->resource_quota); - gpr_free(s); -} - -static void handle_close_callback(uv_handle_t *handle) { - grpc_tcp_listener *sp = (grpc_tcp_listener *)handle->data; - grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; - sp->server->open_ports--; - if (sp->server->open_ports == 0 && sp->server->shutdown) { - finish_shutdown(&exec_ctx, sp->server); - } - grpc_exec_ctx_finish(&exec_ctx); -} - -static void close_listener(grpc_tcp_listener *sp) { - if (!sp->closed) { - sp->closed = true; - uv_close((uv_handle_t *)sp->handle, handle_close_callback); - } -} - -static void tcp_server_destroy(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) { - int immediately_done = 0; - grpc_tcp_listener *sp; - - GPR_ASSERT(!s->shutdown); - s->shutdown = true; - - if (s->open_ports == 0) { - immediately_done = 1; - } - for (sp = s->head; sp; sp = sp->next) { - close_listener(sp); - } - - if (immediately_done) { - finish_shutdown(exec_ctx, s); - } -} - -void grpc_tcp_server_unref(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) { - GRPC_UV_ASSERT_SAME_THREAD(); - if (gpr_unref(&s->refs)) { - /* Complete shutdown_starting work before destroying. */ - grpc_exec_ctx local_exec_ctx = GRPC_EXEC_CTX_INIT; - GRPC_CLOSURE_LIST_SCHED(&local_exec_ctx, &s->shutdown_starting); - if (exec_ctx == NULL) { - grpc_exec_ctx_flush(&local_exec_ctx); - tcp_server_destroy(&local_exec_ctx, s); - grpc_exec_ctx_finish(&local_exec_ctx); - } else { - grpc_exec_ctx_finish(&local_exec_ctx); - tcp_server_destroy(exec_ctx, s); - } - } -} - -static void finish_accept(grpc_exec_ctx *exec_ctx, grpc_tcp_listener *sp) { - grpc_tcp_server_acceptor *acceptor = gpr_malloc(sizeof(*acceptor)); - uv_tcp_t *client; - grpc_endpoint *ep = NULL; - grpc_resolved_address peer_name; - char *peer_name_string; - int err; - uv_tcp_t *server = sp->handle; - - client = gpr_malloc(sizeof(uv_tcp_t)); - uv_tcp_init(uv_default_loop(), client); - // UV documentation says this is guaranteed to succeed - uv_accept((uv_stream_t *)server, (uv_stream_t *)client); - peer_name_string = NULL; - memset(&peer_name, 0, sizeof(grpc_resolved_address)); - peer_name.len = sizeof(struct sockaddr_storage); - err = uv_tcp_getpeername(client, (struct sockaddr *)&peer_name.addr, - (int *)&peer_name.len); - if (err == 0) { - peer_name_string = grpc_sockaddr_to_uri(&peer_name); - } else { - gpr_log(GPR_INFO, "uv_tcp_getpeername error: %s", uv_strerror(err)); - } - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - if (peer_name_string) { - gpr_log(GPR_DEBUG, "SERVER_CONNECT: %p accepted connection: %s", - sp->server, peer_name_string); - } else { - gpr_log(GPR_DEBUG, "SERVER_CONNECT: %p accepted connection", sp->server); - } - } - ep = grpc_tcp_create(client, sp->server->resource_quota, peer_name_string); - acceptor->from_server = sp->server; - acceptor->port_index = sp->port_index; - acceptor->fd_index = 0; - sp->server->on_accept_cb(exec_ctx, sp->server->on_accept_cb_arg, ep, NULL, - acceptor); - gpr_free(peer_name_string); -} - -static void on_connect(uv_stream_t *server, int status) { - grpc_tcp_listener *sp = (grpc_tcp_listener *)server->data; - grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; - - if (status < 0) { - switch (status) { - case UV_EINTR: - case UV_EAGAIN: - return; - default: - close_listener(sp); - return; - } - } - - GPR_ASSERT(!sp->has_pending_connection); - - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "SERVER_CONNECT: %p incoming connection", sp->server); - } - - // Create acceptor. - if (sp->server->on_accept_cb) { - finish_accept(&exec_ctx, sp); - } else { - sp->has_pending_connection = true; - } - grpc_exec_ctx_finish(&exec_ctx); -} - -static grpc_error *add_socket_to_server(grpc_tcp_server *s, uv_tcp_t *handle, - const grpc_resolved_address *addr, - unsigned port_index, - grpc_tcp_listener **listener) { - grpc_tcp_listener *sp = NULL; - int port = -1; - int status; - grpc_error *error; - grpc_resolved_address sockname_temp; - - // The last argument to uv_tcp_bind is flags - status = uv_tcp_bind(handle, (struct sockaddr *)addr->addr, 0); - if (status != 0) { - error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("Failed to bind to port"); - error = - grpc_error_set_str(error, GRPC_ERROR_STR_OS_ERROR, - grpc_slice_from_static_string(uv_strerror(status))); - return error; - } - - status = uv_listen((uv_stream_t *)handle, SOMAXCONN, on_connect); - if (status != 0) { - error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("Failed to listen to port"); - error = - grpc_error_set_str(error, GRPC_ERROR_STR_OS_ERROR, - grpc_slice_from_static_string(uv_strerror(status))); - return error; - } - - sockname_temp.len = (int)sizeof(struct sockaddr_storage); - status = uv_tcp_getsockname(handle, (struct sockaddr *)&sockname_temp.addr, - (int *)&sockname_temp.len); - if (status != 0) { - error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("getsockname failed"); - error = - grpc_error_set_str(error, GRPC_ERROR_STR_OS_ERROR, - grpc_slice_from_static_string(uv_strerror(status))); - return error; - } - - port = grpc_sockaddr_get_port(&sockname_temp); - - GPR_ASSERT(port >= 0); - GPR_ASSERT(!s->on_accept_cb && "must add ports before starting server"); - sp = gpr_zalloc(sizeof(grpc_tcp_listener)); - sp->next = NULL; - if (s->head == NULL) { - s->head = sp; - } else { - s->tail->next = sp; - } - s->tail = sp; - sp->server = s; - sp->handle = handle; - sp->port = port; - sp->port_index = port_index; - sp->closed = false; - handle->data = sp; - s->open_ports++; - GPR_ASSERT(sp->handle); - *listener = sp; - - return GRPC_ERROR_NONE; -} - -grpc_error *grpc_tcp_server_add_port(grpc_tcp_server *s, - const grpc_resolved_address *addr, - int *port) { - // This function is mostly copied from tcp_server_windows.c - grpc_tcp_listener *sp = NULL; - uv_tcp_t *handle; - grpc_resolved_address addr6_v4mapped; - grpc_resolved_address wildcard; - grpc_resolved_address *allocated_addr = NULL; - grpc_resolved_address sockname_temp; - unsigned port_index = 0; - int status; - grpc_error *error = GRPC_ERROR_NONE; - int family; - - GRPC_UV_ASSERT_SAME_THREAD(); - - if (s->tail != NULL) { - port_index = s->tail->port_index + 1; - } - - /* Check if this is a wildcard port, and if so, try to keep the port the same - as some previously created listener. */ - if (grpc_sockaddr_get_port(addr) == 0) { - for (sp = s->head; sp; sp = sp->next) { - sockname_temp.len = sizeof(struct sockaddr_storage); - if (0 == uv_tcp_getsockname(sp->handle, - (struct sockaddr *)&sockname_temp.addr, - (int *)&sockname_temp.len)) { - *port = grpc_sockaddr_get_port(&sockname_temp); - if (*port > 0) { - allocated_addr = gpr_malloc(sizeof(grpc_resolved_address)); - memcpy(allocated_addr, addr, sizeof(grpc_resolved_address)); - grpc_sockaddr_set_port(allocated_addr, *port); - addr = allocated_addr; - break; - } - } - } - } - - if (grpc_sockaddr_to_v4mapped(addr, &addr6_v4mapped)) { - addr = &addr6_v4mapped; - } - - /* Treat :: or 0.0.0.0 as a family-agnostic wildcard. */ - if (grpc_sockaddr_is_wildcard(addr, port)) { - grpc_sockaddr_make_wildcard6(*port, &wildcard); - - addr = &wildcard; - } - - handle = gpr_malloc(sizeof(uv_tcp_t)); - - family = grpc_sockaddr_get_family(addr); - status = uv_tcp_init_ex(uv_default_loop(), handle, (unsigned int)family); -#if defined(GPR_LINUX) && defined(SO_REUSEPORT) - if (family == AF_INET || family == AF_INET6) { - int fd; - uv_fileno((uv_handle_t *)handle, &fd); - int enable = 1; - setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &enable, sizeof(enable)); - } -#endif /* GPR_LINUX && SO_REUSEPORT */ - - if (status == 0) { - error = add_socket_to_server(s, handle, addr, port_index, &sp); - } else { - error = GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "Failed to initialize UV tcp handle"); - error = - grpc_error_set_str(error, GRPC_ERROR_STR_OS_ERROR, - grpc_slice_from_static_string(uv_strerror(status))); - } - - gpr_free(allocated_addr); - - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - char *port_string; - grpc_sockaddr_to_string(&port_string, addr, 0); - const char *str = grpc_error_string(error); - if (port_string) { - gpr_log(GPR_DEBUG, "SERVER %p add_port %s error=%s", s, port_string, str); - gpr_free(port_string); - } else { - gpr_log(GPR_DEBUG, "SERVER %p add_port error=%s", s, str); - } - } - - if (error != GRPC_ERROR_NONE) { - grpc_error *error_out = GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( - "Failed to add port to server", &error, 1); - GRPC_ERROR_UNREF(error); - error = error_out; - *port = -1; - } else { - GPR_ASSERT(sp != NULL); - *port = sp->port; - } - return error; -} - -void grpc_tcp_server_start(grpc_exec_ctx *exec_ctx, grpc_tcp_server *server, - grpc_pollset **pollsets, size_t pollset_count, - grpc_tcp_server_cb on_accept_cb, void *cb_arg) { - grpc_tcp_listener *sp; - (void)pollsets; - (void)pollset_count; - GRPC_UV_ASSERT_SAME_THREAD(); - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "SERVER_START %p", server); - } - GPR_ASSERT(on_accept_cb); - GPR_ASSERT(!server->on_accept_cb); - server->on_accept_cb = on_accept_cb; - server->on_accept_cb_arg = cb_arg; - for (sp = server->head; sp; sp = sp->next) { - if (sp->has_pending_connection) { - finish_accept(exec_ctx, sp); - sp->has_pending_connection = false; - } - } -} - -void grpc_tcp_server_shutdown_listeners(grpc_exec_ctx *exec_ctx, - grpc_tcp_server *s) {} - -#endif /* GRPC_UV */ diff --git a/src/core/lib/iomgr/tcp_server_uv.cc b/src/core/lib/iomgr/tcp_server_uv.cc new file mode 100644 index 0000000000..3b9332321f --- /dev/null +++ b/src/core/lib/iomgr/tcp_server_uv.cc @@ -0,0 +1,454 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_UV + +#include +#include + +#include +#include + +#include "src/core/lib/iomgr/error.h" +#include "src/core/lib/iomgr/exec_ctx.h" +#include "src/core/lib/iomgr/iomgr_uv.h" +#include "src/core/lib/iomgr/sockaddr.h" +#include "src/core/lib/iomgr/sockaddr_utils.h" +#include "src/core/lib/iomgr/tcp_server.h" +#include "src/core/lib/iomgr/tcp_uv.h" + +/* one listening port */ +typedef struct grpc_tcp_listener grpc_tcp_listener; +struct grpc_tcp_listener { + uv_tcp_t *handle; + grpc_tcp_server *server; + unsigned port_index; + int port; + /* linked list */ + struct grpc_tcp_listener *next; + + bool closed; + + bool has_pending_connection; +}; + +struct grpc_tcp_server { + gpr_refcount refs; + + /* Called whenever accept() succeeds on a server port. */ + grpc_tcp_server_cb on_accept_cb; + void *on_accept_cb_arg; + + int open_ports; + + /* linked list of server ports */ + grpc_tcp_listener *head; + grpc_tcp_listener *tail; + + /* List of closures passed to shutdown_starting_add(). */ + grpc_closure_list shutdown_starting; + + /* shutdown callback */ + grpc_closure *shutdown_complete; + + bool shutdown; + + grpc_resource_quota *resource_quota; +}; + +grpc_error *grpc_tcp_server_create(grpc_exec_ctx *exec_ctx, + grpc_closure *shutdown_complete, + const grpc_channel_args *args, + grpc_tcp_server **server) { + grpc_tcp_server *s = gpr_malloc(sizeof(grpc_tcp_server)); + s->resource_quota = grpc_resource_quota_create(NULL); + for (size_t i = 0; i < (args == NULL ? 0 : args->num_args); i++) { + if (0 == strcmp(GRPC_ARG_RESOURCE_QUOTA, args->args[i].key)) { + if (args->args[i].type == GRPC_ARG_POINTER) { + grpc_resource_quota_unref_internal(exec_ctx, s->resource_quota); + s->resource_quota = + grpc_resource_quota_ref_internal(args->args[i].value.pointer.p); + } else { + grpc_resource_quota_unref_internal(exec_ctx, s->resource_quota); + gpr_free(s); + return GRPC_ERROR_CREATE_FROM_STATIC_STRING( + GRPC_ARG_RESOURCE_QUOTA " must be a pointer to a buffer pool"); + } + } + } + gpr_ref_init(&s->refs, 1); + s->on_accept_cb = NULL; + s->on_accept_cb_arg = NULL; + s->open_ports = 0; + s->head = NULL; + s->tail = NULL; + s->shutdown_starting.head = NULL; + s->shutdown_starting.tail = NULL; + s->shutdown_complete = shutdown_complete; + s->shutdown = false; + *server = s; + return GRPC_ERROR_NONE; +} + +grpc_tcp_server *grpc_tcp_server_ref(grpc_tcp_server *s) { + GRPC_UV_ASSERT_SAME_THREAD(); + gpr_ref(&s->refs); + return s; +} + +void grpc_tcp_server_shutdown_starting_add(grpc_tcp_server *s, + grpc_closure *shutdown_starting) { + grpc_closure_list_append(&s->shutdown_starting, shutdown_starting, + GRPC_ERROR_NONE); +} + +static void finish_shutdown(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) { + GPR_ASSERT(s->shutdown); + if (s->shutdown_complete != NULL) { + GRPC_CLOSURE_SCHED(exec_ctx, s->shutdown_complete, GRPC_ERROR_NONE); + } + + while (s->head) { + grpc_tcp_listener *sp = s->head; + s->head = sp->next; + sp->next = NULL; + gpr_free(sp->handle); + gpr_free(sp); + } + grpc_resource_quota_unref_internal(exec_ctx, s->resource_quota); + gpr_free(s); +} + +static void handle_close_callback(uv_handle_t *handle) { + grpc_tcp_listener *sp = (grpc_tcp_listener *)handle->data; + grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; + sp->server->open_ports--; + if (sp->server->open_ports == 0 && sp->server->shutdown) { + finish_shutdown(&exec_ctx, sp->server); + } + grpc_exec_ctx_finish(&exec_ctx); +} + +static void close_listener(grpc_tcp_listener *sp) { + if (!sp->closed) { + sp->closed = true; + uv_close((uv_handle_t *)sp->handle, handle_close_callback); + } +} + +static void tcp_server_destroy(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) { + int immediately_done = 0; + grpc_tcp_listener *sp; + + GPR_ASSERT(!s->shutdown); + s->shutdown = true; + + if (s->open_ports == 0) { + immediately_done = 1; + } + for (sp = s->head; sp; sp = sp->next) { + close_listener(sp); + } + + if (immediately_done) { + finish_shutdown(exec_ctx, s); + } +} + +void grpc_tcp_server_unref(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) { + GRPC_UV_ASSERT_SAME_THREAD(); + if (gpr_unref(&s->refs)) { + /* Complete shutdown_starting work before destroying. */ + grpc_exec_ctx local_exec_ctx = GRPC_EXEC_CTX_INIT; + GRPC_CLOSURE_LIST_SCHED(&local_exec_ctx, &s->shutdown_starting); + if (exec_ctx == NULL) { + grpc_exec_ctx_flush(&local_exec_ctx); + tcp_server_destroy(&local_exec_ctx, s); + grpc_exec_ctx_finish(&local_exec_ctx); + } else { + grpc_exec_ctx_finish(&local_exec_ctx); + tcp_server_destroy(exec_ctx, s); + } + } +} + +static void finish_accept(grpc_exec_ctx *exec_ctx, grpc_tcp_listener *sp) { + grpc_tcp_server_acceptor *acceptor = gpr_malloc(sizeof(*acceptor)); + uv_tcp_t *client; + grpc_endpoint *ep = NULL; + grpc_resolved_address peer_name; + char *peer_name_string; + int err; + uv_tcp_t *server = sp->handle; + + client = gpr_malloc(sizeof(uv_tcp_t)); + uv_tcp_init(uv_default_loop(), client); + // UV documentation says this is guaranteed to succeed + uv_accept((uv_stream_t *)server, (uv_stream_t *)client); + peer_name_string = NULL; + memset(&peer_name, 0, sizeof(grpc_resolved_address)); + peer_name.len = sizeof(struct sockaddr_storage); + err = uv_tcp_getpeername(client, (struct sockaddr *)&peer_name.addr, + (int *)&peer_name.len); + if (err == 0) { + peer_name_string = grpc_sockaddr_to_uri(&peer_name); + } else { + gpr_log(GPR_INFO, "uv_tcp_getpeername error: %s", uv_strerror(err)); + } + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + if (peer_name_string) { + gpr_log(GPR_DEBUG, "SERVER_CONNECT: %p accepted connection: %s", + sp->server, peer_name_string); + } else { + gpr_log(GPR_DEBUG, "SERVER_CONNECT: %p accepted connection", sp->server); + } + } + ep = grpc_tcp_create(client, sp->server->resource_quota, peer_name_string); + acceptor->from_server = sp->server; + acceptor->port_index = sp->port_index; + acceptor->fd_index = 0; + sp->server->on_accept_cb(exec_ctx, sp->server->on_accept_cb_arg, ep, NULL, + acceptor); + gpr_free(peer_name_string); +} + +static void on_connect(uv_stream_t *server, int status) { + grpc_tcp_listener *sp = (grpc_tcp_listener *)server->data; + grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; + + if (status < 0) { + switch (status) { + case UV_EINTR: + case UV_EAGAIN: + return; + default: + close_listener(sp); + return; + } + } + + GPR_ASSERT(!sp->has_pending_connection); + + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "SERVER_CONNECT: %p incoming connection", sp->server); + } + + // Create acceptor. + if (sp->server->on_accept_cb) { + finish_accept(&exec_ctx, sp); + } else { + sp->has_pending_connection = true; + } + grpc_exec_ctx_finish(&exec_ctx); +} + +static grpc_error *add_socket_to_server(grpc_tcp_server *s, uv_tcp_t *handle, + const grpc_resolved_address *addr, + unsigned port_index, + grpc_tcp_listener **listener) { + grpc_tcp_listener *sp = NULL; + int port = -1; + int status; + grpc_error *error; + grpc_resolved_address sockname_temp; + + // The last argument to uv_tcp_bind is flags + status = uv_tcp_bind(handle, (struct sockaddr *)addr->addr, 0); + if (status != 0) { + error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("Failed to bind to port"); + error = + grpc_error_set_str(error, GRPC_ERROR_STR_OS_ERROR, + grpc_slice_from_static_string(uv_strerror(status))); + return error; + } + + status = uv_listen((uv_stream_t *)handle, SOMAXCONN, on_connect); + if (status != 0) { + error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("Failed to listen to port"); + error = + grpc_error_set_str(error, GRPC_ERROR_STR_OS_ERROR, + grpc_slice_from_static_string(uv_strerror(status))); + return error; + } + + sockname_temp.len = (int)sizeof(struct sockaddr_storage); + status = uv_tcp_getsockname(handle, (struct sockaddr *)&sockname_temp.addr, + (int *)&sockname_temp.len); + if (status != 0) { + error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("getsockname failed"); + error = + grpc_error_set_str(error, GRPC_ERROR_STR_OS_ERROR, + grpc_slice_from_static_string(uv_strerror(status))); + return error; + } + + port = grpc_sockaddr_get_port(&sockname_temp); + + GPR_ASSERT(port >= 0); + GPR_ASSERT(!s->on_accept_cb && "must add ports before starting server"); + sp = gpr_zalloc(sizeof(grpc_tcp_listener)); + sp->next = NULL; + if (s->head == NULL) { + s->head = sp; + } else { + s->tail->next = sp; + } + s->tail = sp; + sp->server = s; + sp->handle = handle; + sp->port = port; + sp->port_index = port_index; + sp->closed = false; + handle->data = sp; + s->open_ports++; + GPR_ASSERT(sp->handle); + *listener = sp; + + return GRPC_ERROR_NONE; +} + +grpc_error *grpc_tcp_server_add_port(grpc_tcp_server *s, + const grpc_resolved_address *addr, + int *port) { + // This function is mostly copied from tcp_server_windows.c + grpc_tcp_listener *sp = NULL; + uv_tcp_t *handle; + grpc_resolved_address addr6_v4mapped; + grpc_resolved_address wildcard; + grpc_resolved_address *allocated_addr = NULL; + grpc_resolved_address sockname_temp; + unsigned port_index = 0; + int status; + grpc_error *error = GRPC_ERROR_NONE; + int family; + + GRPC_UV_ASSERT_SAME_THREAD(); + + if (s->tail != NULL) { + port_index = s->tail->port_index + 1; + } + + /* Check if this is a wildcard port, and if so, try to keep the port the same + as some previously created listener. */ + if (grpc_sockaddr_get_port(addr) == 0) { + for (sp = s->head; sp; sp = sp->next) { + sockname_temp.len = sizeof(struct sockaddr_storage); + if (0 == uv_tcp_getsockname(sp->handle, + (struct sockaddr *)&sockname_temp.addr, + (int *)&sockname_temp.len)) { + *port = grpc_sockaddr_get_port(&sockname_temp); + if (*port > 0) { + allocated_addr = gpr_malloc(sizeof(grpc_resolved_address)); + memcpy(allocated_addr, addr, sizeof(grpc_resolved_address)); + grpc_sockaddr_set_port(allocated_addr, *port); + addr = allocated_addr; + break; + } + } + } + } + + if (grpc_sockaddr_to_v4mapped(addr, &addr6_v4mapped)) { + addr = &addr6_v4mapped; + } + + /* Treat :: or 0.0.0.0 as a family-agnostic wildcard. */ + if (grpc_sockaddr_is_wildcard(addr, port)) { + grpc_sockaddr_make_wildcard6(*port, &wildcard); + + addr = &wildcard; + } + + handle = gpr_malloc(sizeof(uv_tcp_t)); + + family = grpc_sockaddr_get_family(addr); + status = uv_tcp_init_ex(uv_default_loop(), handle, (unsigned int)family); +#if defined(GPR_LINUX) && defined(SO_REUSEPORT) + if (family == AF_INET || family == AF_INET6) { + int fd; + uv_fileno((uv_handle_t *)handle, &fd); + int enable = 1; + setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &enable, sizeof(enable)); + } +#endif /* GPR_LINUX && SO_REUSEPORT */ + + if (status == 0) { + error = add_socket_to_server(s, handle, addr, port_index, &sp); + } else { + error = GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "Failed to initialize UV tcp handle"); + error = + grpc_error_set_str(error, GRPC_ERROR_STR_OS_ERROR, + grpc_slice_from_static_string(uv_strerror(status))); + } + + gpr_free(allocated_addr); + + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + char *port_string; + grpc_sockaddr_to_string(&port_string, addr, 0); + const char *str = grpc_error_string(error); + if (port_string) { + gpr_log(GPR_DEBUG, "SERVER %p add_port %s error=%s", s, port_string, str); + gpr_free(port_string); + } else { + gpr_log(GPR_DEBUG, "SERVER %p add_port error=%s", s, str); + } + } + + if (error != GRPC_ERROR_NONE) { + grpc_error *error_out = GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( + "Failed to add port to server", &error, 1); + GRPC_ERROR_UNREF(error); + error = error_out; + *port = -1; + } else { + GPR_ASSERT(sp != NULL); + *port = sp->port; + } + return error; +} + +void grpc_tcp_server_start(grpc_exec_ctx *exec_ctx, grpc_tcp_server *server, + grpc_pollset **pollsets, size_t pollset_count, + grpc_tcp_server_cb on_accept_cb, void *cb_arg) { + grpc_tcp_listener *sp; + (void)pollsets; + (void)pollset_count; + GRPC_UV_ASSERT_SAME_THREAD(); + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "SERVER_START %p", server); + } + GPR_ASSERT(on_accept_cb); + GPR_ASSERT(!server->on_accept_cb); + server->on_accept_cb = on_accept_cb; + server->on_accept_cb_arg = cb_arg; + for (sp = server->head; sp; sp = sp->next) { + if (sp->has_pending_connection) { + finish_accept(exec_ctx, sp); + sp->has_pending_connection = false; + } + } +} + +void grpc_tcp_server_shutdown_listeners(grpc_exec_ctx *exec_ctx, + grpc_tcp_server *s) {} + +#endif /* GRPC_UV */ diff --git a/src/core/lib/iomgr/tcp_server_windows.c b/src/core/lib/iomgr/tcp_server_windows.c deleted file mode 100644 index 0162afc1ad..0000000000 --- a/src/core/lib/iomgr/tcp_server_windows.c +++ /dev/null @@ -1,543 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_WINSOCK_SOCKET - -#include "src/core/lib/iomgr/sockaddr.h" - -#include - -#include -#include -#include -#include -#include -#include - -#include "src/core/lib/channel/channel_args.h" -#include "src/core/lib/iomgr/iocp_windows.h" -#include "src/core/lib/iomgr/pollset_windows.h" -#include "src/core/lib/iomgr/resolve_address.h" -#include "src/core/lib/iomgr/sockaddr_utils.h" -#include "src/core/lib/iomgr/socket_windows.h" -#include "src/core/lib/iomgr/tcp_server.h" -#include "src/core/lib/iomgr/tcp_windows.h" - -#define MIN_SAFE_ACCEPT_QUEUE_SIZE 100 - -/* one listening port */ -typedef struct grpc_tcp_listener grpc_tcp_listener; -struct grpc_tcp_listener { - /* This seemingly magic number comes from AcceptEx's documentation. each - address buffer needs to have at least 16 more bytes at their end. */ - uint8_t addresses[(sizeof(struct sockaddr_in6) + 16) * 2]; - /* This will hold the socket for the next accept. */ - SOCKET new_socket; - /* The listener winsocket. */ - grpc_winsocket *socket; - /* The actual TCP port number. */ - int port; - unsigned port_index; - grpc_tcp_server *server; - /* The cached AcceptEx for that port. */ - LPFN_ACCEPTEX AcceptEx; - int shutting_down; - int outstanding_calls; - /* closure for socket notification of accept being ready */ - grpc_closure on_accept; - /* linked list */ - struct grpc_tcp_listener *next; -}; - -/* the overall server */ -struct grpc_tcp_server { - gpr_refcount refs; - /* Called whenever accept() succeeds on a server port. */ - grpc_tcp_server_cb on_accept_cb; - void *on_accept_cb_arg; - - gpr_mu mu; - - /* active port count: how many ports are actually still listening */ - int active_ports; - - /* linked list of server ports */ - grpc_tcp_listener *head; - grpc_tcp_listener *tail; - - /* List of closures passed to shutdown_starting_add(). */ - grpc_closure_list shutdown_starting; - - /* shutdown callback */ - grpc_closure *shutdown_complete; - - grpc_channel_args *channel_args; -}; - -/* Public function. Allocates the proper data structures to hold a - grpc_tcp_server. */ -grpc_error *grpc_tcp_server_create(grpc_exec_ctx *exec_ctx, - grpc_closure *shutdown_complete, - const grpc_channel_args *args, - grpc_tcp_server **server) { - grpc_tcp_server *s = gpr_malloc(sizeof(grpc_tcp_server)); - s->channel_args = grpc_channel_args_copy(args); - gpr_ref_init(&s->refs, 1); - gpr_mu_init(&s->mu); - s->active_ports = 0; - s->on_accept_cb = NULL; - s->on_accept_cb_arg = NULL; - s->head = NULL; - s->tail = NULL; - s->shutdown_starting.head = NULL; - s->shutdown_starting.tail = NULL; - s->shutdown_complete = shutdown_complete; - *server = s; - return GRPC_ERROR_NONE; -} - -static void destroy_server(grpc_exec_ctx *exec_ctx, void *arg, - grpc_error *error) { - grpc_tcp_server *s = arg; - - /* Now that the accepts have been aborted, we can destroy the sockets. - The IOCP won't get notified on these, so we can flag them as already - closed by the system. */ - while (s->head) { - grpc_tcp_listener *sp = s->head; - s->head = sp->next; - sp->next = NULL; - grpc_winsocket_destroy(sp->socket); - gpr_free(sp); - } - grpc_channel_args_destroy(exec_ctx, s->channel_args); - gpr_free(s); -} - -static void finish_shutdown_locked(grpc_exec_ctx *exec_ctx, - grpc_tcp_server *s) { - if (s->shutdown_complete != NULL) { - GRPC_CLOSURE_SCHED(exec_ctx, s->shutdown_complete, GRPC_ERROR_NONE); - } - - GRPC_CLOSURE_SCHED(exec_ctx, GRPC_CLOSURE_CREATE(destroy_server, s, - grpc_schedule_on_exec_ctx), - GRPC_ERROR_NONE); -} - -grpc_tcp_server *grpc_tcp_server_ref(grpc_tcp_server *s) { - gpr_ref_non_zero(&s->refs); - return s; -} - -void grpc_tcp_server_shutdown_starting_add(grpc_tcp_server *s, - grpc_closure *shutdown_starting) { - gpr_mu_lock(&s->mu); - grpc_closure_list_append(&s->shutdown_starting, shutdown_starting, - GRPC_ERROR_NONE); - gpr_mu_unlock(&s->mu); -} - -static void tcp_server_destroy(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) { - grpc_tcp_listener *sp; - gpr_mu_lock(&s->mu); - - /* First, shutdown all fd's. This will queue abortion calls for all - of the pending accepts due to the normal operation mechanism. */ - if (s->active_ports == 0) { - finish_shutdown_locked(exec_ctx, s); - } else { - for (sp = s->head; sp; sp = sp->next) { - sp->shutting_down = 1; - grpc_winsocket_shutdown(sp->socket); - } - } - gpr_mu_unlock(&s->mu); -} - -void grpc_tcp_server_unref(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) { - if (gpr_unref(&s->refs)) { - grpc_tcp_server_shutdown_listeners(exec_ctx, s); - gpr_mu_lock(&s->mu); - GRPC_CLOSURE_LIST_SCHED(exec_ctx, &s->shutdown_starting); - gpr_mu_unlock(&s->mu); - tcp_server_destroy(exec_ctx, s); - } -} - -/* Prepare (bind) a recently-created socket for listening. */ -static grpc_error *prepare_socket(SOCKET sock, - const grpc_resolved_address *addr, - int *port) { - grpc_resolved_address sockname_temp; - grpc_error *error = GRPC_ERROR_NONE; - - error = grpc_tcp_prepare_socket(sock); - if (error != GRPC_ERROR_NONE) { - goto failure; - } - - if (bind(sock, (const struct sockaddr *)addr->addr, (int)addr->len) == - SOCKET_ERROR) { - error = GRPC_WSA_ERROR(WSAGetLastError(), "bind"); - goto failure; - } - - if (listen(sock, SOMAXCONN) == SOCKET_ERROR) { - error = GRPC_WSA_ERROR(WSAGetLastError(), "listen"); - goto failure; - } - - int sockname_temp_len = sizeof(struct sockaddr_storage); - if (getsockname(sock, (struct sockaddr *)sockname_temp.addr, - &sockname_temp_len) == SOCKET_ERROR) { - error = GRPC_WSA_ERROR(WSAGetLastError(), "getsockname"); - goto failure; - } - sockname_temp.len = (size_t)sockname_temp_len; - - *port = grpc_sockaddr_get_port(&sockname_temp); - return GRPC_ERROR_NONE; - -failure: - GPR_ASSERT(error != GRPC_ERROR_NONE); - char *tgtaddr = grpc_sockaddr_to_uri(addr); - grpc_error_set_int( - grpc_error_set_str(GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( - "Failed to prepare server socket", &error, 1), - GRPC_ERROR_STR_TARGET_ADDRESS, - grpc_slice_from_copied_string(tgtaddr)), - GRPC_ERROR_INT_FD, (intptr_t)sock); - gpr_free(tgtaddr); - GRPC_ERROR_UNREF(error); - if (sock != INVALID_SOCKET) closesocket(sock); - return error; -} - -static void decrement_active_ports_and_notify_locked(grpc_exec_ctx *exec_ctx, - grpc_tcp_listener *sp) { - sp->shutting_down = 0; - GPR_ASSERT(sp->server->active_ports > 0); - if (0 == --sp->server->active_ports) { - finish_shutdown_locked(exec_ctx, sp->server); - } -} - -/* In order to do an async accept, we need to create a socket first which - will be the one assigned to the new incoming connection. */ -static grpc_error *start_accept_locked(grpc_exec_ctx *exec_ctx, - grpc_tcp_listener *port) { - SOCKET sock = INVALID_SOCKET; - BOOL success; - DWORD addrlen = sizeof(struct sockaddr_in6) + 16; - DWORD bytes_received = 0; - grpc_error *error = GRPC_ERROR_NONE; - - if (port->shutting_down) { - return GRPC_ERROR_NONE; - } - - sock = WSASocket(AF_INET6, SOCK_STREAM, IPPROTO_TCP, NULL, 0, - WSA_FLAG_OVERLAPPED); - if (sock == INVALID_SOCKET) { - error = GRPC_WSA_ERROR(WSAGetLastError(), "WSASocket"); - goto failure; - } - - error = grpc_tcp_prepare_socket(sock); - if (error != GRPC_ERROR_NONE) goto failure; - - /* Start the "accept" asynchronously. */ - success = port->AcceptEx(port->socket->socket, sock, port->addresses, 0, - addrlen, addrlen, &bytes_received, - &port->socket->read_info.overlapped); - - /* It is possible to get an accept immediately without delay. However, we - will still get an IOCP notification for it. So let's just ignore it. */ - if (!success) { - int last_error = WSAGetLastError(); - if (last_error != ERROR_IO_PENDING) { - error = GRPC_WSA_ERROR(last_error, "AcceptEx"); - goto failure; - } - } - - /* We're ready to do the accept. Calling grpc_socket_notify_on_read may - immediately process an accept that happened in the meantime. */ - port->new_socket = sock; - grpc_socket_notify_on_read(exec_ctx, port->socket, &port->on_accept); - port->outstanding_calls++; - return error; - -failure: - GPR_ASSERT(error != GRPC_ERROR_NONE); - if (sock != INVALID_SOCKET) closesocket(sock); - return error; -} - -/* Event manager callback when reads are ready. */ -static void on_accept(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) { - grpc_tcp_listener *sp = arg; - SOCKET sock = sp->new_socket; - grpc_winsocket_callback_info *info = &sp->socket->read_info; - grpc_endpoint *ep = NULL; - grpc_resolved_address peer_name; - char *peer_name_string; - char *fd_name; - DWORD transfered_bytes; - DWORD flags; - BOOL wsa_success; - int err; - - gpr_mu_lock(&sp->server->mu); - - peer_name.len = sizeof(struct sockaddr_storage); - - /* The general mechanism for shutting down is to queue abortion calls. While - this is necessary in the read/write case, it's useless for the accept - case. We only need to adjust the pending callback count */ - if (error != GRPC_ERROR_NONE) { - const char *msg = grpc_error_string(error); - gpr_log(GPR_INFO, "Skipping on_accept due to error: %s", msg); - - gpr_mu_unlock(&sp->server->mu); - return; - } - - /* The IOCP notified us of a completed operation. Let's grab the results, - and act accordingly. */ - transfered_bytes = 0; - wsa_success = WSAGetOverlappedResult(sock, &info->overlapped, - &transfered_bytes, FALSE, &flags); - if (!wsa_success) { - if (!sp->shutting_down) { - char *utf8_message = gpr_format_message(WSAGetLastError()); - gpr_log(GPR_ERROR, "on_accept error: %s", utf8_message); - gpr_free(utf8_message); - } - closesocket(sock); - } else { - if (!sp->shutting_down) { - peer_name_string = NULL; - err = setsockopt(sock, SOL_SOCKET, SO_UPDATE_ACCEPT_CONTEXT, - (char *)&sp->socket->socket, sizeof(sp->socket->socket)); - if (err) { - char *utf8_message = gpr_format_message(WSAGetLastError()); - gpr_log(GPR_ERROR, "setsockopt error: %s", utf8_message); - gpr_free(utf8_message); - } - int peer_name_len = (int)peer_name.len; - err = - getpeername(sock, (struct sockaddr *)peer_name.addr, &peer_name_len); - peer_name.len = (size_t)peer_name_len; - if (!err) { - peer_name_string = grpc_sockaddr_to_uri(&peer_name); - } else { - char *utf8_message = gpr_format_message(WSAGetLastError()); - gpr_log(GPR_ERROR, "getpeername error: %s", utf8_message); - gpr_free(utf8_message); - } - gpr_asprintf(&fd_name, "tcp_server:%s", peer_name_string); - ep = grpc_tcp_create(exec_ctx, grpc_winsocket_create(sock, fd_name), - sp->server->channel_args, peer_name_string); - gpr_free(fd_name); - gpr_free(peer_name_string); - } else { - closesocket(sock); - } - } - - /* The only time we should call our callback, is where we successfully - managed to accept a connection, and created an endpoint. */ - if (ep) { - // Create acceptor. - grpc_tcp_server_acceptor *acceptor = gpr_malloc(sizeof(*acceptor)); - acceptor->from_server = sp->server; - acceptor->port_index = sp->port_index; - acceptor->fd_index = 0; - sp->server->on_accept_cb(exec_ctx, sp->server->on_accept_cb_arg, ep, NULL, - acceptor); - } - /* As we were notified from the IOCP of one and exactly one accept, - the former socked we created has now either been destroy or assigned - to the new connection. We need to create a new one for the next - connection. */ - GPR_ASSERT( - GRPC_LOG_IF_ERROR("start_accept", start_accept_locked(exec_ctx, sp))); - if (0 == --sp->outstanding_calls) { - decrement_active_ports_and_notify_locked(exec_ctx, sp); - } - gpr_mu_unlock(&sp->server->mu); -} - -static grpc_error *add_socket_to_server(grpc_tcp_server *s, SOCKET sock, - const grpc_resolved_address *addr, - unsigned port_index, - grpc_tcp_listener **listener) { - grpc_tcp_listener *sp = NULL; - int port = -1; - int status; - GUID guid = WSAID_ACCEPTEX; - DWORD ioctl_num_bytes; - LPFN_ACCEPTEX AcceptEx; - grpc_error *error = GRPC_ERROR_NONE; - - /* We need to grab the AcceptEx pointer for that port, as it may be - interface-dependent. We'll cache it to avoid doing that again. */ - status = - WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER, &guid, sizeof(guid), - &AcceptEx, sizeof(AcceptEx), &ioctl_num_bytes, NULL, NULL); - - if (status != 0) { - char *utf8_message = gpr_format_message(WSAGetLastError()); - gpr_log(GPR_ERROR, "on_connect error: %s", utf8_message); - gpr_free(utf8_message); - closesocket(sock); - return NULL; - } - - error = prepare_socket(sock, addr, &port); - if (error != GRPC_ERROR_NONE) { - return error; - } - - GPR_ASSERT(port >= 0); - gpr_mu_lock(&s->mu); - GPR_ASSERT(!s->on_accept_cb && "must add ports before starting server"); - sp = gpr_malloc(sizeof(grpc_tcp_listener)); - sp->next = NULL; - if (s->head == NULL) { - s->head = sp; - } else { - s->tail->next = sp; - } - s->tail = sp; - sp->server = s; - sp->socket = grpc_winsocket_create(sock, "listener"); - sp->shutting_down = 0; - sp->outstanding_calls = 0; - sp->AcceptEx = AcceptEx; - sp->new_socket = INVALID_SOCKET; - sp->port = port; - sp->port_index = port_index; - GRPC_CLOSURE_INIT(&sp->on_accept, on_accept, sp, grpc_schedule_on_exec_ctx); - GPR_ASSERT(sp->socket); - gpr_mu_unlock(&s->mu); - *listener = sp; - - return GRPC_ERROR_NONE; -} - -grpc_error *grpc_tcp_server_add_port(grpc_tcp_server *s, - const grpc_resolved_address *addr, - int *port) { - grpc_tcp_listener *sp = NULL; - SOCKET sock; - grpc_resolved_address addr6_v4mapped; - grpc_resolved_address wildcard; - grpc_resolved_address *allocated_addr = NULL; - grpc_resolved_address sockname_temp; - unsigned port_index = 0; - grpc_error *error = GRPC_ERROR_NONE; - - if (s->tail != NULL) { - port_index = s->tail->port_index + 1; - } - - /* Check if this is a wildcard port, and if so, try to keep the port the same - as some previously created listener. */ - if (grpc_sockaddr_get_port(addr) == 0) { - for (sp = s->head; sp; sp = sp->next) { - int sockname_temp_len = sizeof(struct sockaddr_storage); - if (0 == getsockname(sp->socket->socket, - (struct sockaddr *)sockname_temp.addr, - &sockname_temp_len)) { - sockname_temp.len = (size_t)sockname_temp_len; - *port = grpc_sockaddr_get_port(&sockname_temp); - if (*port > 0) { - allocated_addr = gpr_malloc(sizeof(grpc_resolved_address)); - memcpy(allocated_addr, addr, sizeof(grpc_resolved_address)); - grpc_sockaddr_set_port(allocated_addr, *port); - addr = allocated_addr; - break; - } - } - } - } - - if (grpc_sockaddr_to_v4mapped(addr, &addr6_v4mapped)) { - addr = &addr6_v4mapped; - } - - /* Treat :: or 0.0.0.0 as a family-agnostic wildcard. */ - if (grpc_sockaddr_is_wildcard(addr, port)) { - grpc_sockaddr_make_wildcard6(*port, &wildcard); - - addr = &wildcard; - } - - sock = WSASocket(AF_INET6, SOCK_STREAM, IPPROTO_TCP, NULL, 0, - WSA_FLAG_OVERLAPPED); - if (sock == INVALID_SOCKET) { - error = GRPC_WSA_ERROR(WSAGetLastError(), "WSASocket"); - goto done; - } - - error = add_socket_to_server(s, sock, addr, port_index, &sp); - -done: - gpr_free(allocated_addr); - - if (error != GRPC_ERROR_NONE) { - grpc_error *error_out = GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( - "Failed to add port to server", &error, 1); - GRPC_ERROR_UNREF(error); - error = error_out; - *port = -1; - } else { - GPR_ASSERT(sp != NULL); - *port = sp->port; - } - return error; -} - -void grpc_tcp_server_start(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s, - grpc_pollset **pollset, size_t pollset_count, - grpc_tcp_server_cb on_accept_cb, - void *on_accept_cb_arg) { - grpc_tcp_listener *sp; - GPR_ASSERT(on_accept_cb); - gpr_mu_lock(&s->mu); - GPR_ASSERT(!s->on_accept_cb); - GPR_ASSERT(s->active_ports == 0); - s->on_accept_cb = on_accept_cb; - s->on_accept_cb_arg = on_accept_cb_arg; - for (sp = s->head; sp; sp = sp->next) { - GPR_ASSERT( - GRPC_LOG_IF_ERROR("start_accept", start_accept_locked(exec_ctx, sp))); - s->active_ports++; - } - gpr_mu_unlock(&s->mu); -} - -void grpc_tcp_server_shutdown_listeners(grpc_exec_ctx *exec_ctx, - grpc_tcp_server *s) {} - -#endif /* GRPC_WINSOCK_SOCKET */ diff --git a/src/core/lib/iomgr/tcp_server_windows.cc b/src/core/lib/iomgr/tcp_server_windows.cc new file mode 100644 index 0000000000..0162afc1ad --- /dev/null +++ b/src/core/lib/iomgr/tcp_server_windows.cc @@ -0,0 +1,543 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_WINSOCK_SOCKET + +#include "src/core/lib/iomgr/sockaddr.h" + +#include + +#include +#include +#include +#include +#include +#include + +#include "src/core/lib/channel/channel_args.h" +#include "src/core/lib/iomgr/iocp_windows.h" +#include "src/core/lib/iomgr/pollset_windows.h" +#include "src/core/lib/iomgr/resolve_address.h" +#include "src/core/lib/iomgr/sockaddr_utils.h" +#include "src/core/lib/iomgr/socket_windows.h" +#include "src/core/lib/iomgr/tcp_server.h" +#include "src/core/lib/iomgr/tcp_windows.h" + +#define MIN_SAFE_ACCEPT_QUEUE_SIZE 100 + +/* one listening port */ +typedef struct grpc_tcp_listener grpc_tcp_listener; +struct grpc_tcp_listener { + /* This seemingly magic number comes from AcceptEx's documentation. each + address buffer needs to have at least 16 more bytes at their end. */ + uint8_t addresses[(sizeof(struct sockaddr_in6) + 16) * 2]; + /* This will hold the socket for the next accept. */ + SOCKET new_socket; + /* The listener winsocket. */ + grpc_winsocket *socket; + /* The actual TCP port number. */ + int port; + unsigned port_index; + grpc_tcp_server *server; + /* The cached AcceptEx for that port. */ + LPFN_ACCEPTEX AcceptEx; + int shutting_down; + int outstanding_calls; + /* closure for socket notification of accept being ready */ + grpc_closure on_accept; + /* linked list */ + struct grpc_tcp_listener *next; +}; + +/* the overall server */ +struct grpc_tcp_server { + gpr_refcount refs; + /* Called whenever accept() succeeds on a server port. */ + grpc_tcp_server_cb on_accept_cb; + void *on_accept_cb_arg; + + gpr_mu mu; + + /* active port count: how many ports are actually still listening */ + int active_ports; + + /* linked list of server ports */ + grpc_tcp_listener *head; + grpc_tcp_listener *tail; + + /* List of closures passed to shutdown_starting_add(). */ + grpc_closure_list shutdown_starting; + + /* shutdown callback */ + grpc_closure *shutdown_complete; + + grpc_channel_args *channel_args; +}; + +/* Public function. Allocates the proper data structures to hold a + grpc_tcp_server. */ +grpc_error *grpc_tcp_server_create(grpc_exec_ctx *exec_ctx, + grpc_closure *shutdown_complete, + const grpc_channel_args *args, + grpc_tcp_server **server) { + grpc_tcp_server *s = gpr_malloc(sizeof(grpc_tcp_server)); + s->channel_args = grpc_channel_args_copy(args); + gpr_ref_init(&s->refs, 1); + gpr_mu_init(&s->mu); + s->active_ports = 0; + s->on_accept_cb = NULL; + s->on_accept_cb_arg = NULL; + s->head = NULL; + s->tail = NULL; + s->shutdown_starting.head = NULL; + s->shutdown_starting.tail = NULL; + s->shutdown_complete = shutdown_complete; + *server = s; + return GRPC_ERROR_NONE; +} + +static void destroy_server(grpc_exec_ctx *exec_ctx, void *arg, + grpc_error *error) { + grpc_tcp_server *s = arg; + + /* Now that the accepts have been aborted, we can destroy the sockets. + The IOCP won't get notified on these, so we can flag them as already + closed by the system. */ + while (s->head) { + grpc_tcp_listener *sp = s->head; + s->head = sp->next; + sp->next = NULL; + grpc_winsocket_destroy(sp->socket); + gpr_free(sp); + } + grpc_channel_args_destroy(exec_ctx, s->channel_args); + gpr_free(s); +} + +static void finish_shutdown_locked(grpc_exec_ctx *exec_ctx, + grpc_tcp_server *s) { + if (s->shutdown_complete != NULL) { + GRPC_CLOSURE_SCHED(exec_ctx, s->shutdown_complete, GRPC_ERROR_NONE); + } + + GRPC_CLOSURE_SCHED(exec_ctx, GRPC_CLOSURE_CREATE(destroy_server, s, + grpc_schedule_on_exec_ctx), + GRPC_ERROR_NONE); +} + +grpc_tcp_server *grpc_tcp_server_ref(grpc_tcp_server *s) { + gpr_ref_non_zero(&s->refs); + return s; +} + +void grpc_tcp_server_shutdown_starting_add(grpc_tcp_server *s, + grpc_closure *shutdown_starting) { + gpr_mu_lock(&s->mu); + grpc_closure_list_append(&s->shutdown_starting, shutdown_starting, + GRPC_ERROR_NONE); + gpr_mu_unlock(&s->mu); +} + +static void tcp_server_destroy(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) { + grpc_tcp_listener *sp; + gpr_mu_lock(&s->mu); + + /* First, shutdown all fd's. This will queue abortion calls for all + of the pending accepts due to the normal operation mechanism. */ + if (s->active_ports == 0) { + finish_shutdown_locked(exec_ctx, s); + } else { + for (sp = s->head; sp; sp = sp->next) { + sp->shutting_down = 1; + grpc_winsocket_shutdown(sp->socket); + } + } + gpr_mu_unlock(&s->mu); +} + +void grpc_tcp_server_unref(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s) { + if (gpr_unref(&s->refs)) { + grpc_tcp_server_shutdown_listeners(exec_ctx, s); + gpr_mu_lock(&s->mu); + GRPC_CLOSURE_LIST_SCHED(exec_ctx, &s->shutdown_starting); + gpr_mu_unlock(&s->mu); + tcp_server_destroy(exec_ctx, s); + } +} + +/* Prepare (bind) a recently-created socket for listening. */ +static grpc_error *prepare_socket(SOCKET sock, + const grpc_resolved_address *addr, + int *port) { + grpc_resolved_address sockname_temp; + grpc_error *error = GRPC_ERROR_NONE; + + error = grpc_tcp_prepare_socket(sock); + if (error != GRPC_ERROR_NONE) { + goto failure; + } + + if (bind(sock, (const struct sockaddr *)addr->addr, (int)addr->len) == + SOCKET_ERROR) { + error = GRPC_WSA_ERROR(WSAGetLastError(), "bind"); + goto failure; + } + + if (listen(sock, SOMAXCONN) == SOCKET_ERROR) { + error = GRPC_WSA_ERROR(WSAGetLastError(), "listen"); + goto failure; + } + + int sockname_temp_len = sizeof(struct sockaddr_storage); + if (getsockname(sock, (struct sockaddr *)sockname_temp.addr, + &sockname_temp_len) == SOCKET_ERROR) { + error = GRPC_WSA_ERROR(WSAGetLastError(), "getsockname"); + goto failure; + } + sockname_temp.len = (size_t)sockname_temp_len; + + *port = grpc_sockaddr_get_port(&sockname_temp); + return GRPC_ERROR_NONE; + +failure: + GPR_ASSERT(error != GRPC_ERROR_NONE); + char *tgtaddr = grpc_sockaddr_to_uri(addr); + grpc_error_set_int( + grpc_error_set_str(GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( + "Failed to prepare server socket", &error, 1), + GRPC_ERROR_STR_TARGET_ADDRESS, + grpc_slice_from_copied_string(tgtaddr)), + GRPC_ERROR_INT_FD, (intptr_t)sock); + gpr_free(tgtaddr); + GRPC_ERROR_UNREF(error); + if (sock != INVALID_SOCKET) closesocket(sock); + return error; +} + +static void decrement_active_ports_and_notify_locked(grpc_exec_ctx *exec_ctx, + grpc_tcp_listener *sp) { + sp->shutting_down = 0; + GPR_ASSERT(sp->server->active_ports > 0); + if (0 == --sp->server->active_ports) { + finish_shutdown_locked(exec_ctx, sp->server); + } +} + +/* In order to do an async accept, we need to create a socket first which + will be the one assigned to the new incoming connection. */ +static grpc_error *start_accept_locked(grpc_exec_ctx *exec_ctx, + grpc_tcp_listener *port) { + SOCKET sock = INVALID_SOCKET; + BOOL success; + DWORD addrlen = sizeof(struct sockaddr_in6) + 16; + DWORD bytes_received = 0; + grpc_error *error = GRPC_ERROR_NONE; + + if (port->shutting_down) { + return GRPC_ERROR_NONE; + } + + sock = WSASocket(AF_INET6, SOCK_STREAM, IPPROTO_TCP, NULL, 0, + WSA_FLAG_OVERLAPPED); + if (sock == INVALID_SOCKET) { + error = GRPC_WSA_ERROR(WSAGetLastError(), "WSASocket"); + goto failure; + } + + error = grpc_tcp_prepare_socket(sock); + if (error != GRPC_ERROR_NONE) goto failure; + + /* Start the "accept" asynchronously. */ + success = port->AcceptEx(port->socket->socket, sock, port->addresses, 0, + addrlen, addrlen, &bytes_received, + &port->socket->read_info.overlapped); + + /* It is possible to get an accept immediately without delay. However, we + will still get an IOCP notification for it. So let's just ignore it. */ + if (!success) { + int last_error = WSAGetLastError(); + if (last_error != ERROR_IO_PENDING) { + error = GRPC_WSA_ERROR(last_error, "AcceptEx"); + goto failure; + } + } + + /* We're ready to do the accept. Calling grpc_socket_notify_on_read may + immediately process an accept that happened in the meantime. */ + port->new_socket = sock; + grpc_socket_notify_on_read(exec_ctx, port->socket, &port->on_accept); + port->outstanding_calls++; + return error; + +failure: + GPR_ASSERT(error != GRPC_ERROR_NONE); + if (sock != INVALID_SOCKET) closesocket(sock); + return error; +} + +/* Event manager callback when reads are ready. */ +static void on_accept(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) { + grpc_tcp_listener *sp = arg; + SOCKET sock = sp->new_socket; + grpc_winsocket_callback_info *info = &sp->socket->read_info; + grpc_endpoint *ep = NULL; + grpc_resolved_address peer_name; + char *peer_name_string; + char *fd_name; + DWORD transfered_bytes; + DWORD flags; + BOOL wsa_success; + int err; + + gpr_mu_lock(&sp->server->mu); + + peer_name.len = sizeof(struct sockaddr_storage); + + /* The general mechanism for shutting down is to queue abortion calls. While + this is necessary in the read/write case, it's useless for the accept + case. We only need to adjust the pending callback count */ + if (error != GRPC_ERROR_NONE) { + const char *msg = grpc_error_string(error); + gpr_log(GPR_INFO, "Skipping on_accept due to error: %s", msg); + + gpr_mu_unlock(&sp->server->mu); + return; + } + + /* The IOCP notified us of a completed operation. Let's grab the results, + and act accordingly. */ + transfered_bytes = 0; + wsa_success = WSAGetOverlappedResult(sock, &info->overlapped, + &transfered_bytes, FALSE, &flags); + if (!wsa_success) { + if (!sp->shutting_down) { + char *utf8_message = gpr_format_message(WSAGetLastError()); + gpr_log(GPR_ERROR, "on_accept error: %s", utf8_message); + gpr_free(utf8_message); + } + closesocket(sock); + } else { + if (!sp->shutting_down) { + peer_name_string = NULL; + err = setsockopt(sock, SOL_SOCKET, SO_UPDATE_ACCEPT_CONTEXT, + (char *)&sp->socket->socket, sizeof(sp->socket->socket)); + if (err) { + char *utf8_message = gpr_format_message(WSAGetLastError()); + gpr_log(GPR_ERROR, "setsockopt error: %s", utf8_message); + gpr_free(utf8_message); + } + int peer_name_len = (int)peer_name.len; + err = + getpeername(sock, (struct sockaddr *)peer_name.addr, &peer_name_len); + peer_name.len = (size_t)peer_name_len; + if (!err) { + peer_name_string = grpc_sockaddr_to_uri(&peer_name); + } else { + char *utf8_message = gpr_format_message(WSAGetLastError()); + gpr_log(GPR_ERROR, "getpeername error: %s", utf8_message); + gpr_free(utf8_message); + } + gpr_asprintf(&fd_name, "tcp_server:%s", peer_name_string); + ep = grpc_tcp_create(exec_ctx, grpc_winsocket_create(sock, fd_name), + sp->server->channel_args, peer_name_string); + gpr_free(fd_name); + gpr_free(peer_name_string); + } else { + closesocket(sock); + } + } + + /* The only time we should call our callback, is where we successfully + managed to accept a connection, and created an endpoint. */ + if (ep) { + // Create acceptor. + grpc_tcp_server_acceptor *acceptor = gpr_malloc(sizeof(*acceptor)); + acceptor->from_server = sp->server; + acceptor->port_index = sp->port_index; + acceptor->fd_index = 0; + sp->server->on_accept_cb(exec_ctx, sp->server->on_accept_cb_arg, ep, NULL, + acceptor); + } + /* As we were notified from the IOCP of one and exactly one accept, + the former socked we created has now either been destroy or assigned + to the new connection. We need to create a new one for the next + connection. */ + GPR_ASSERT( + GRPC_LOG_IF_ERROR("start_accept", start_accept_locked(exec_ctx, sp))); + if (0 == --sp->outstanding_calls) { + decrement_active_ports_and_notify_locked(exec_ctx, sp); + } + gpr_mu_unlock(&sp->server->mu); +} + +static grpc_error *add_socket_to_server(grpc_tcp_server *s, SOCKET sock, + const grpc_resolved_address *addr, + unsigned port_index, + grpc_tcp_listener **listener) { + grpc_tcp_listener *sp = NULL; + int port = -1; + int status; + GUID guid = WSAID_ACCEPTEX; + DWORD ioctl_num_bytes; + LPFN_ACCEPTEX AcceptEx; + grpc_error *error = GRPC_ERROR_NONE; + + /* We need to grab the AcceptEx pointer for that port, as it may be + interface-dependent. We'll cache it to avoid doing that again. */ + status = + WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER, &guid, sizeof(guid), + &AcceptEx, sizeof(AcceptEx), &ioctl_num_bytes, NULL, NULL); + + if (status != 0) { + char *utf8_message = gpr_format_message(WSAGetLastError()); + gpr_log(GPR_ERROR, "on_connect error: %s", utf8_message); + gpr_free(utf8_message); + closesocket(sock); + return NULL; + } + + error = prepare_socket(sock, addr, &port); + if (error != GRPC_ERROR_NONE) { + return error; + } + + GPR_ASSERT(port >= 0); + gpr_mu_lock(&s->mu); + GPR_ASSERT(!s->on_accept_cb && "must add ports before starting server"); + sp = gpr_malloc(sizeof(grpc_tcp_listener)); + sp->next = NULL; + if (s->head == NULL) { + s->head = sp; + } else { + s->tail->next = sp; + } + s->tail = sp; + sp->server = s; + sp->socket = grpc_winsocket_create(sock, "listener"); + sp->shutting_down = 0; + sp->outstanding_calls = 0; + sp->AcceptEx = AcceptEx; + sp->new_socket = INVALID_SOCKET; + sp->port = port; + sp->port_index = port_index; + GRPC_CLOSURE_INIT(&sp->on_accept, on_accept, sp, grpc_schedule_on_exec_ctx); + GPR_ASSERT(sp->socket); + gpr_mu_unlock(&s->mu); + *listener = sp; + + return GRPC_ERROR_NONE; +} + +grpc_error *grpc_tcp_server_add_port(grpc_tcp_server *s, + const grpc_resolved_address *addr, + int *port) { + grpc_tcp_listener *sp = NULL; + SOCKET sock; + grpc_resolved_address addr6_v4mapped; + grpc_resolved_address wildcard; + grpc_resolved_address *allocated_addr = NULL; + grpc_resolved_address sockname_temp; + unsigned port_index = 0; + grpc_error *error = GRPC_ERROR_NONE; + + if (s->tail != NULL) { + port_index = s->tail->port_index + 1; + } + + /* Check if this is a wildcard port, and if so, try to keep the port the same + as some previously created listener. */ + if (grpc_sockaddr_get_port(addr) == 0) { + for (sp = s->head; sp; sp = sp->next) { + int sockname_temp_len = sizeof(struct sockaddr_storage); + if (0 == getsockname(sp->socket->socket, + (struct sockaddr *)sockname_temp.addr, + &sockname_temp_len)) { + sockname_temp.len = (size_t)sockname_temp_len; + *port = grpc_sockaddr_get_port(&sockname_temp); + if (*port > 0) { + allocated_addr = gpr_malloc(sizeof(grpc_resolved_address)); + memcpy(allocated_addr, addr, sizeof(grpc_resolved_address)); + grpc_sockaddr_set_port(allocated_addr, *port); + addr = allocated_addr; + break; + } + } + } + } + + if (grpc_sockaddr_to_v4mapped(addr, &addr6_v4mapped)) { + addr = &addr6_v4mapped; + } + + /* Treat :: or 0.0.0.0 as a family-agnostic wildcard. */ + if (grpc_sockaddr_is_wildcard(addr, port)) { + grpc_sockaddr_make_wildcard6(*port, &wildcard); + + addr = &wildcard; + } + + sock = WSASocket(AF_INET6, SOCK_STREAM, IPPROTO_TCP, NULL, 0, + WSA_FLAG_OVERLAPPED); + if (sock == INVALID_SOCKET) { + error = GRPC_WSA_ERROR(WSAGetLastError(), "WSASocket"); + goto done; + } + + error = add_socket_to_server(s, sock, addr, port_index, &sp); + +done: + gpr_free(allocated_addr); + + if (error != GRPC_ERROR_NONE) { + grpc_error *error_out = GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( + "Failed to add port to server", &error, 1); + GRPC_ERROR_UNREF(error); + error = error_out; + *port = -1; + } else { + GPR_ASSERT(sp != NULL); + *port = sp->port; + } + return error; +} + +void grpc_tcp_server_start(grpc_exec_ctx *exec_ctx, grpc_tcp_server *s, + grpc_pollset **pollset, size_t pollset_count, + grpc_tcp_server_cb on_accept_cb, + void *on_accept_cb_arg) { + grpc_tcp_listener *sp; + GPR_ASSERT(on_accept_cb); + gpr_mu_lock(&s->mu); + GPR_ASSERT(!s->on_accept_cb); + GPR_ASSERT(s->active_ports == 0); + s->on_accept_cb = on_accept_cb; + s->on_accept_cb_arg = on_accept_cb_arg; + for (sp = s->head; sp; sp = sp->next) { + GPR_ASSERT( + GRPC_LOG_IF_ERROR("start_accept", start_accept_locked(exec_ctx, sp))); + s->active_ports++; + } + gpr_mu_unlock(&s->mu); +} + +void grpc_tcp_server_shutdown_listeners(grpc_exec_ctx *exec_ctx, + grpc_tcp_server *s) {} + +#endif /* GRPC_WINSOCK_SOCKET */ diff --git a/src/core/lib/iomgr/tcp_uv.c b/src/core/lib/iomgr/tcp_uv.c deleted file mode 100644 index a05c19b4ac..0000000000 --- a/src/core/lib/iomgr/tcp_uv.c +++ /dev/null @@ -1,381 +0,0 @@ -/* - * - * Copyright 2016 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_UV - -#include -#include - -#include - -#include -#include -#include - -#include "src/core/lib/iomgr/error.h" -#include "src/core/lib/iomgr/iomgr_uv.h" -#include "src/core/lib/iomgr/network_status_tracker.h" -#include "src/core/lib/iomgr/resource_quota.h" -#include "src/core/lib/iomgr/tcp_uv.h" -#include "src/core/lib/slice/slice_internal.h" -#include "src/core/lib/slice/slice_string_helpers.h" -#include "src/core/lib/support/string.h" - -grpc_tracer_flag grpc_tcp_trace = GRPC_TRACER_INITIALIZER(false, "tcp"); - -typedef struct { - grpc_endpoint base; - gpr_refcount refcount; - - uv_write_t write_req; - uv_shutdown_t shutdown_req; - - uv_tcp_t *handle; - - grpc_closure *read_cb; - grpc_closure *write_cb; - - grpc_slice read_slice; - grpc_slice_buffer *read_slices; - grpc_slice_buffer *write_slices; - uv_buf_t *write_buffers; - - grpc_resource_user *resource_user; - - bool shutting_down; - - char *peer_string; - grpc_pollset *pollset; -} grpc_tcp; - -static void tcp_free(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) { - grpc_slice_unref_internal(exec_ctx, tcp->read_slice); - grpc_resource_user_unref(exec_ctx, tcp->resource_user); - gpr_free(tcp->handle); - gpr_free(tcp->peer_string); - gpr_free(tcp); -} - -#ifndef NDEBUG -#define TCP_UNREF(exec_ctx, tcp, reason) \ - tcp_unref((exec_ctx), (tcp), (reason), __FILE__, __LINE__) -#define TCP_REF(tcp, reason) tcp_ref((tcp), (reason), __FILE__, __LINE__) -static void tcp_unref(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp, - const char *reason, const char *file, int line) { - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_atm val = gpr_atm_no_barrier_load(&tcp->refcount.count); - gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG, - "TCP unref %p : %s %" PRIdPTR " -> %" PRIdPTR, tcp, reason, val, - val - 1); - } - if (gpr_unref(&tcp->refcount)) { - tcp_free(exec_ctx, tcp); - } -} - -static void tcp_ref(grpc_tcp *tcp, const char *reason, const char *file, - int line) { - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_atm val = gpr_atm_no_barrier_load(&tcp->refcount.count); - gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG, - "TCP ref %p : %s %" PRIdPTR " -> %" PRIdPTR, tcp, reason, val, - val + 1); - } - gpr_ref(&tcp->refcount); -} -#else -#define TCP_UNREF(exec_ctx, tcp, reason) tcp_unref((exec_ctx), (tcp)) -#define TCP_REF(tcp, reason) tcp_ref((tcp)) -static void tcp_unref(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) { - if (gpr_unref(&tcp->refcount)) { - tcp_free(exec_ctx, tcp); - } -} - -static void tcp_ref(grpc_tcp *tcp) { gpr_ref(&tcp->refcount); } -#endif - -static void uv_close_callback(uv_handle_t *handle) { - grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; - grpc_tcp *tcp = handle->data; - TCP_UNREF(&exec_ctx, tcp, "destroy"); - grpc_exec_ctx_finish(&exec_ctx); -} - -static grpc_slice alloc_read_slice(grpc_exec_ctx *exec_ctx, - grpc_resource_user *resource_user) { - return grpc_resource_user_slice_malloc(exec_ctx, resource_user, - GRPC_TCP_DEFAULT_READ_SLICE_SIZE); -} - -static void alloc_uv_buf(uv_handle_t *handle, size_t suggested_size, - uv_buf_t *buf) { - grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; - grpc_tcp *tcp = handle->data; - (void)suggested_size; - buf->base = (char *)GRPC_SLICE_START_PTR(tcp->read_slice); - buf->len = GRPC_SLICE_LENGTH(tcp->read_slice); - grpc_exec_ctx_finish(&exec_ctx); -} - -static void read_callback(uv_stream_t *stream, ssize_t nread, - const uv_buf_t *buf) { - grpc_slice sub; - grpc_error *error; - grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; - grpc_tcp *tcp = stream->data; - grpc_closure *cb = tcp->read_cb; - if (nread == 0) { - // Nothing happened. Wait for the next callback - return; - } - TCP_UNREF(&exec_ctx, tcp, "read"); - tcp->read_cb = NULL; - // TODO(murgatroid99): figure out what the return value here means - uv_read_stop(stream); - if (nread == UV_EOF) { - error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("EOF"); - } else if (nread > 0) { - // Successful read - sub = grpc_slice_sub_no_ref(tcp->read_slice, 0, (size_t)nread); - grpc_slice_buffer_add(tcp->read_slices, sub); - tcp->read_slice = alloc_read_slice(&exec_ctx, tcp->resource_user); - error = GRPC_ERROR_NONE; - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - size_t i; - const char *str = grpc_error_string(error); - gpr_log(GPR_DEBUG, "read: error=%s", str); - - for (i = 0; i < tcp->read_slices->count; i++) { - char *dump = grpc_dump_slice(tcp->read_slices->slices[i], - GPR_DUMP_HEX | GPR_DUMP_ASCII); - gpr_log(GPR_DEBUG, "READ %p (peer=%s): %s", tcp, tcp->peer_string, - dump); - gpr_free(dump); - } - } - } else { - // nread < 0: Error - error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("TCP Read failed"); - } - GRPC_CLOSURE_SCHED(&exec_ctx, cb, error); - grpc_exec_ctx_finish(&exec_ctx); -} - -static void uv_endpoint_read(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, - grpc_slice_buffer *read_slices, grpc_closure *cb) { - grpc_tcp *tcp = (grpc_tcp *)ep; - int status; - grpc_error *error = GRPC_ERROR_NONE; - GRPC_UV_ASSERT_SAME_THREAD(); - GPR_ASSERT(tcp->read_cb == NULL); - tcp->read_cb = cb; - tcp->read_slices = read_slices; - grpc_slice_buffer_reset_and_unref_internal(exec_ctx, read_slices); - TCP_REF(tcp, "read"); - // TODO(murgatroid99): figure out what the return value here means - status = - uv_read_start((uv_stream_t *)tcp->handle, alloc_uv_buf, read_callback); - if (status != 0) { - error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("TCP Read failed at start"); - error = - grpc_error_set_str(error, GRPC_ERROR_STR_OS_ERROR, - grpc_slice_from_static_string(uv_strerror(status))); - GRPC_CLOSURE_SCHED(exec_ctx, cb, error); - } - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - const char *str = grpc_error_string(error); - gpr_log(GPR_DEBUG, "Initiating read on %p: error=%s", tcp, str); - } -} - -static void write_callback(uv_write_t *req, int status) { - grpc_tcp *tcp = req->data; - grpc_error *error; - grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; - grpc_closure *cb = tcp->write_cb; - tcp->write_cb = NULL; - TCP_UNREF(&exec_ctx, tcp, "write"); - if (status == 0) { - error = GRPC_ERROR_NONE; - } else { - error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("TCP Write failed"); - } - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - const char *str = grpc_error_string(error); - gpr_log(GPR_DEBUG, "write complete on %p: error=%s", tcp, str); - } - gpr_free(tcp->write_buffers); - grpc_resource_user_free(&exec_ctx, tcp->resource_user, - sizeof(uv_buf_t) * tcp->write_slices->count); - GRPC_CLOSURE_SCHED(&exec_ctx, cb, error); - grpc_exec_ctx_finish(&exec_ctx); -} - -static void uv_endpoint_write(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, - grpc_slice_buffer *write_slices, - grpc_closure *cb) { - grpc_tcp *tcp = (grpc_tcp *)ep; - uv_buf_t *buffers; - unsigned int buffer_count; - unsigned int i; - grpc_slice *slice; - uv_write_t *write_req; - GRPC_UV_ASSERT_SAME_THREAD(); - - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - size_t j; - - for (j = 0; j < write_slices->count; j++) { - char *data = grpc_dump_slice(write_slices->slices[j], - GPR_DUMP_HEX | GPR_DUMP_ASCII); - gpr_log(GPR_DEBUG, "WRITE %p (peer=%s): %s", tcp, tcp->peer_string, data); - gpr_free(data); - } - } - - if (tcp->shutting_down) { - GRPC_CLOSURE_SCHED(exec_ctx, cb, GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "TCP socket is shutting down")); - return; - } - - GPR_ASSERT(tcp->write_cb == NULL); - tcp->write_slices = write_slices; - GPR_ASSERT(tcp->write_slices->count <= UINT_MAX); - if (tcp->write_slices->count == 0) { - // No slices means we don't have to do anything, - // and libuv doesn't like empty writes - GRPC_CLOSURE_SCHED(exec_ctx, cb, GRPC_ERROR_NONE); - return; - } - - tcp->write_cb = cb; - buffer_count = (unsigned int)tcp->write_slices->count; - buffers = gpr_malloc(sizeof(uv_buf_t) * buffer_count); - grpc_resource_user_alloc(exec_ctx, tcp->resource_user, - sizeof(uv_buf_t) * buffer_count, NULL); - for (i = 0; i < buffer_count; i++) { - slice = &tcp->write_slices->slices[i]; - buffers[i].base = (char *)GRPC_SLICE_START_PTR(*slice); - buffers[i].len = GRPC_SLICE_LENGTH(*slice); - } - tcp->write_buffers = buffers; - write_req = &tcp->write_req; - write_req->data = tcp; - TCP_REF(tcp, "write"); - // TODO(murgatroid99): figure out what the return value here means - uv_write(write_req, (uv_stream_t *)tcp->handle, buffers, buffer_count, - write_callback); -} - -static void uv_add_to_pollset(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, - grpc_pollset *pollset) { - // No-op. We're ignoring pollsets currently - (void)exec_ctx; - (void)ep; - (void)pollset; - grpc_tcp *tcp = (grpc_tcp *)ep; - tcp->pollset = pollset; -} - -static void uv_add_to_pollset_set(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, - grpc_pollset_set *pollset) { - // No-op. We're ignoring pollsets currently - (void)exec_ctx; - (void)ep; - (void)pollset; -} - -static void shutdown_callback(uv_shutdown_t *req, int status) {} - -static void uv_endpoint_shutdown(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, - grpc_error *why) { - grpc_tcp *tcp = (grpc_tcp *)ep; - if (!tcp->shutting_down) { - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - const char *str = grpc_error_string(why); - gpr_log(GPR_DEBUG, "TCP %p shutdown why=%s", tcp->handle, str); - } - tcp->shutting_down = true; - uv_shutdown_t *req = &tcp->shutdown_req; - uv_shutdown(req, (uv_stream_t *)tcp->handle, shutdown_callback); - grpc_resource_user_shutdown(exec_ctx, tcp->resource_user); - } - GRPC_ERROR_UNREF(why); -} - -static void uv_destroy(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep) { - grpc_network_status_unregister_endpoint(ep); - grpc_tcp *tcp = (grpc_tcp *)ep; - uv_close((uv_handle_t *)tcp->handle, uv_close_callback); -} - -static char *uv_get_peer(grpc_endpoint *ep) { - grpc_tcp *tcp = (grpc_tcp *)ep; - return gpr_strdup(tcp->peer_string); -} - -static grpc_resource_user *uv_get_resource_user(grpc_endpoint *ep) { - grpc_tcp *tcp = (grpc_tcp *)ep; - return tcp->resource_user; -} - -static int uv_get_fd(grpc_endpoint *ep) { return -1; } - -static grpc_endpoint_vtable vtable = { - uv_endpoint_read, uv_endpoint_write, uv_add_to_pollset, - uv_add_to_pollset_set, uv_endpoint_shutdown, uv_destroy, - uv_get_resource_user, uv_get_peer, uv_get_fd}; - -grpc_endpoint *grpc_tcp_create(uv_tcp_t *handle, - grpc_resource_quota *resource_quota, - char *peer_string) { - grpc_tcp *tcp = (grpc_tcp *)gpr_malloc(sizeof(grpc_tcp)); - grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; - - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_log(GPR_DEBUG, "Creating TCP endpoint %p", tcp); - } - - /* Disable Nagle's Algorithm */ - uv_tcp_nodelay(handle, 1); - - memset(tcp, 0, sizeof(grpc_tcp)); - tcp->base.vtable = &vtable; - tcp->handle = handle; - handle->data = tcp; - gpr_ref_init(&tcp->refcount, 1); - tcp->peer_string = gpr_strdup(peer_string); - tcp->shutting_down = false; - tcp->resource_user = grpc_resource_user_create(resource_quota, peer_string); - tcp->read_slice = alloc_read_slice(&exec_ctx, tcp->resource_user); - /* Tell network status tracking code about the new endpoint */ - grpc_network_status_register_endpoint(&tcp->base); - -#ifndef GRPC_UV_TCP_HOLD_LOOP - uv_unref((uv_handle_t *)handle); -#endif - - grpc_exec_ctx_finish(&exec_ctx); - return &tcp->base; -} - -#endif /* GRPC_UV */ diff --git a/src/core/lib/iomgr/tcp_uv.cc b/src/core/lib/iomgr/tcp_uv.cc new file mode 100644 index 0000000000..a05c19b4ac --- /dev/null +++ b/src/core/lib/iomgr/tcp_uv.cc @@ -0,0 +1,381 @@ +/* + * + * Copyright 2016 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_UV + +#include +#include + +#include + +#include +#include +#include + +#include "src/core/lib/iomgr/error.h" +#include "src/core/lib/iomgr/iomgr_uv.h" +#include "src/core/lib/iomgr/network_status_tracker.h" +#include "src/core/lib/iomgr/resource_quota.h" +#include "src/core/lib/iomgr/tcp_uv.h" +#include "src/core/lib/slice/slice_internal.h" +#include "src/core/lib/slice/slice_string_helpers.h" +#include "src/core/lib/support/string.h" + +grpc_tracer_flag grpc_tcp_trace = GRPC_TRACER_INITIALIZER(false, "tcp"); + +typedef struct { + grpc_endpoint base; + gpr_refcount refcount; + + uv_write_t write_req; + uv_shutdown_t shutdown_req; + + uv_tcp_t *handle; + + grpc_closure *read_cb; + grpc_closure *write_cb; + + grpc_slice read_slice; + grpc_slice_buffer *read_slices; + grpc_slice_buffer *write_slices; + uv_buf_t *write_buffers; + + grpc_resource_user *resource_user; + + bool shutting_down; + + char *peer_string; + grpc_pollset *pollset; +} grpc_tcp; + +static void tcp_free(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) { + grpc_slice_unref_internal(exec_ctx, tcp->read_slice); + grpc_resource_user_unref(exec_ctx, tcp->resource_user); + gpr_free(tcp->handle); + gpr_free(tcp->peer_string); + gpr_free(tcp); +} + +#ifndef NDEBUG +#define TCP_UNREF(exec_ctx, tcp, reason) \ + tcp_unref((exec_ctx), (tcp), (reason), __FILE__, __LINE__) +#define TCP_REF(tcp, reason) tcp_ref((tcp), (reason), __FILE__, __LINE__) +static void tcp_unref(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp, + const char *reason, const char *file, int line) { + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_atm val = gpr_atm_no_barrier_load(&tcp->refcount.count); + gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG, + "TCP unref %p : %s %" PRIdPTR " -> %" PRIdPTR, tcp, reason, val, + val - 1); + } + if (gpr_unref(&tcp->refcount)) { + tcp_free(exec_ctx, tcp); + } +} + +static void tcp_ref(grpc_tcp *tcp, const char *reason, const char *file, + int line) { + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_atm val = gpr_atm_no_barrier_load(&tcp->refcount.count); + gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG, + "TCP ref %p : %s %" PRIdPTR " -> %" PRIdPTR, tcp, reason, val, + val + 1); + } + gpr_ref(&tcp->refcount); +} +#else +#define TCP_UNREF(exec_ctx, tcp, reason) tcp_unref((exec_ctx), (tcp)) +#define TCP_REF(tcp, reason) tcp_ref((tcp)) +static void tcp_unref(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) { + if (gpr_unref(&tcp->refcount)) { + tcp_free(exec_ctx, tcp); + } +} + +static void tcp_ref(grpc_tcp *tcp) { gpr_ref(&tcp->refcount); } +#endif + +static void uv_close_callback(uv_handle_t *handle) { + grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; + grpc_tcp *tcp = handle->data; + TCP_UNREF(&exec_ctx, tcp, "destroy"); + grpc_exec_ctx_finish(&exec_ctx); +} + +static grpc_slice alloc_read_slice(grpc_exec_ctx *exec_ctx, + grpc_resource_user *resource_user) { + return grpc_resource_user_slice_malloc(exec_ctx, resource_user, + GRPC_TCP_DEFAULT_READ_SLICE_SIZE); +} + +static void alloc_uv_buf(uv_handle_t *handle, size_t suggested_size, + uv_buf_t *buf) { + grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; + grpc_tcp *tcp = handle->data; + (void)suggested_size; + buf->base = (char *)GRPC_SLICE_START_PTR(tcp->read_slice); + buf->len = GRPC_SLICE_LENGTH(tcp->read_slice); + grpc_exec_ctx_finish(&exec_ctx); +} + +static void read_callback(uv_stream_t *stream, ssize_t nread, + const uv_buf_t *buf) { + grpc_slice sub; + grpc_error *error; + grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; + grpc_tcp *tcp = stream->data; + grpc_closure *cb = tcp->read_cb; + if (nread == 0) { + // Nothing happened. Wait for the next callback + return; + } + TCP_UNREF(&exec_ctx, tcp, "read"); + tcp->read_cb = NULL; + // TODO(murgatroid99): figure out what the return value here means + uv_read_stop(stream); + if (nread == UV_EOF) { + error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("EOF"); + } else if (nread > 0) { + // Successful read + sub = grpc_slice_sub_no_ref(tcp->read_slice, 0, (size_t)nread); + grpc_slice_buffer_add(tcp->read_slices, sub); + tcp->read_slice = alloc_read_slice(&exec_ctx, tcp->resource_user); + error = GRPC_ERROR_NONE; + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + size_t i; + const char *str = grpc_error_string(error); + gpr_log(GPR_DEBUG, "read: error=%s", str); + + for (i = 0; i < tcp->read_slices->count; i++) { + char *dump = grpc_dump_slice(tcp->read_slices->slices[i], + GPR_DUMP_HEX | GPR_DUMP_ASCII); + gpr_log(GPR_DEBUG, "READ %p (peer=%s): %s", tcp, tcp->peer_string, + dump); + gpr_free(dump); + } + } + } else { + // nread < 0: Error + error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("TCP Read failed"); + } + GRPC_CLOSURE_SCHED(&exec_ctx, cb, error); + grpc_exec_ctx_finish(&exec_ctx); +} + +static void uv_endpoint_read(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, + grpc_slice_buffer *read_slices, grpc_closure *cb) { + grpc_tcp *tcp = (grpc_tcp *)ep; + int status; + grpc_error *error = GRPC_ERROR_NONE; + GRPC_UV_ASSERT_SAME_THREAD(); + GPR_ASSERT(tcp->read_cb == NULL); + tcp->read_cb = cb; + tcp->read_slices = read_slices; + grpc_slice_buffer_reset_and_unref_internal(exec_ctx, read_slices); + TCP_REF(tcp, "read"); + // TODO(murgatroid99): figure out what the return value here means + status = + uv_read_start((uv_stream_t *)tcp->handle, alloc_uv_buf, read_callback); + if (status != 0) { + error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("TCP Read failed at start"); + error = + grpc_error_set_str(error, GRPC_ERROR_STR_OS_ERROR, + grpc_slice_from_static_string(uv_strerror(status))); + GRPC_CLOSURE_SCHED(exec_ctx, cb, error); + } + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + const char *str = grpc_error_string(error); + gpr_log(GPR_DEBUG, "Initiating read on %p: error=%s", tcp, str); + } +} + +static void write_callback(uv_write_t *req, int status) { + grpc_tcp *tcp = req->data; + grpc_error *error; + grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; + grpc_closure *cb = tcp->write_cb; + tcp->write_cb = NULL; + TCP_UNREF(&exec_ctx, tcp, "write"); + if (status == 0) { + error = GRPC_ERROR_NONE; + } else { + error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("TCP Write failed"); + } + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + const char *str = grpc_error_string(error); + gpr_log(GPR_DEBUG, "write complete on %p: error=%s", tcp, str); + } + gpr_free(tcp->write_buffers); + grpc_resource_user_free(&exec_ctx, tcp->resource_user, + sizeof(uv_buf_t) * tcp->write_slices->count); + GRPC_CLOSURE_SCHED(&exec_ctx, cb, error); + grpc_exec_ctx_finish(&exec_ctx); +} + +static void uv_endpoint_write(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, + grpc_slice_buffer *write_slices, + grpc_closure *cb) { + grpc_tcp *tcp = (grpc_tcp *)ep; + uv_buf_t *buffers; + unsigned int buffer_count; + unsigned int i; + grpc_slice *slice; + uv_write_t *write_req; + GRPC_UV_ASSERT_SAME_THREAD(); + + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + size_t j; + + for (j = 0; j < write_slices->count; j++) { + char *data = grpc_dump_slice(write_slices->slices[j], + GPR_DUMP_HEX | GPR_DUMP_ASCII); + gpr_log(GPR_DEBUG, "WRITE %p (peer=%s): %s", tcp, tcp->peer_string, data); + gpr_free(data); + } + } + + if (tcp->shutting_down) { + GRPC_CLOSURE_SCHED(exec_ctx, cb, GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "TCP socket is shutting down")); + return; + } + + GPR_ASSERT(tcp->write_cb == NULL); + tcp->write_slices = write_slices; + GPR_ASSERT(tcp->write_slices->count <= UINT_MAX); + if (tcp->write_slices->count == 0) { + // No slices means we don't have to do anything, + // and libuv doesn't like empty writes + GRPC_CLOSURE_SCHED(exec_ctx, cb, GRPC_ERROR_NONE); + return; + } + + tcp->write_cb = cb; + buffer_count = (unsigned int)tcp->write_slices->count; + buffers = gpr_malloc(sizeof(uv_buf_t) * buffer_count); + grpc_resource_user_alloc(exec_ctx, tcp->resource_user, + sizeof(uv_buf_t) * buffer_count, NULL); + for (i = 0; i < buffer_count; i++) { + slice = &tcp->write_slices->slices[i]; + buffers[i].base = (char *)GRPC_SLICE_START_PTR(*slice); + buffers[i].len = GRPC_SLICE_LENGTH(*slice); + } + tcp->write_buffers = buffers; + write_req = &tcp->write_req; + write_req->data = tcp; + TCP_REF(tcp, "write"); + // TODO(murgatroid99): figure out what the return value here means + uv_write(write_req, (uv_stream_t *)tcp->handle, buffers, buffer_count, + write_callback); +} + +static void uv_add_to_pollset(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, + grpc_pollset *pollset) { + // No-op. We're ignoring pollsets currently + (void)exec_ctx; + (void)ep; + (void)pollset; + grpc_tcp *tcp = (grpc_tcp *)ep; + tcp->pollset = pollset; +} + +static void uv_add_to_pollset_set(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, + grpc_pollset_set *pollset) { + // No-op. We're ignoring pollsets currently + (void)exec_ctx; + (void)ep; + (void)pollset; +} + +static void shutdown_callback(uv_shutdown_t *req, int status) {} + +static void uv_endpoint_shutdown(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, + grpc_error *why) { + grpc_tcp *tcp = (grpc_tcp *)ep; + if (!tcp->shutting_down) { + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + const char *str = grpc_error_string(why); + gpr_log(GPR_DEBUG, "TCP %p shutdown why=%s", tcp->handle, str); + } + tcp->shutting_down = true; + uv_shutdown_t *req = &tcp->shutdown_req; + uv_shutdown(req, (uv_stream_t *)tcp->handle, shutdown_callback); + grpc_resource_user_shutdown(exec_ctx, tcp->resource_user); + } + GRPC_ERROR_UNREF(why); +} + +static void uv_destroy(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep) { + grpc_network_status_unregister_endpoint(ep); + grpc_tcp *tcp = (grpc_tcp *)ep; + uv_close((uv_handle_t *)tcp->handle, uv_close_callback); +} + +static char *uv_get_peer(grpc_endpoint *ep) { + grpc_tcp *tcp = (grpc_tcp *)ep; + return gpr_strdup(tcp->peer_string); +} + +static grpc_resource_user *uv_get_resource_user(grpc_endpoint *ep) { + grpc_tcp *tcp = (grpc_tcp *)ep; + return tcp->resource_user; +} + +static int uv_get_fd(grpc_endpoint *ep) { return -1; } + +static grpc_endpoint_vtable vtable = { + uv_endpoint_read, uv_endpoint_write, uv_add_to_pollset, + uv_add_to_pollset_set, uv_endpoint_shutdown, uv_destroy, + uv_get_resource_user, uv_get_peer, uv_get_fd}; + +grpc_endpoint *grpc_tcp_create(uv_tcp_t *handle, + grpc_resource_quota *resource_quota, + char *peer_string) { + grpc_tcp *tcp = (grpc_tcp *)gpr_malloc(sizeof(grpc_tcp)); + grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; + + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_log(GPR_DEBUG, "Creating TCP endpoint %p", tcp); + } + + /* Disable Nagle's Algorithm */ + uv_tcp_nodelay(handle, 1); + + memset(tcp, 0, sizeof(grpc_tcp)); + tcp->base.vtable = &vtable; + tcp->handle = handle; + handle->data = tcp; + gpr_ref_init(&tcp->refcount, 1); + tcp->peer_string = gpr_strdup(peer_string); + tcp->shutting_down = false; + tcp->resource_user = grpc_resource_user_create(resource_quota, peer_string); + tcp->read_slice = alloc_read_slice(&exec_ctx, tcp->resource_user); + /* Tell network status tracking code about the new endpoint */ + grpc_network_status_register_endpoint(&tcp->base); + +#ifndef GRPC_UV_TCP_HOLD_LOOP + uv_unref((uv_handle_t *)handle); +#endif + + grpc_exec_ctx_finish(&exec_ctx); + return &tcp->base; +} + +#endif /* GRPC_UV */ diff --git a/src/core/lib/iomgr/tcp_windows.c b/src/core/lib/iomgr/tcp_windows.c deleted file mode 100644 index 2cbb97403b..0000000000 --- a/src/core/lib/iomgr/tcp_windows.c +++ /dev/null @@ -1,448 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_WINSOCK_SOCKET - -#include - -#include "src/core/lib/iomgr/network_status_tracker.h" -#include "src/core/lib/iomgr/sockaddr_windows.h" - -#include -#include -#include -#include -#include -#include - -#include "src/core/lib/iomgr/iocp_windows.h" -#include "src/core/lib/iomgr/sockaddr.h" -#include "src/core/lib/iomgr/sockaddr_utils.h" -#include "src/core/lib/iomgr/socket_windows.h" -#include "src/core/lib/iomgr/tcp_client.h" -#include "src/core/lib/iomgr/timer.h" -#include "src/core/lib/slice/slice_internal.h" - -#if defined(__MSYS__) && defined(GPR_ARCH_64) -/* Nasty workaround for nasty bug when using the 64 bits msys compiler - in conjunction with Microsoft Windows headers. */ -#define GRPC_FIONBIO _IOW('f', 126, uint32_t) -#else -#define GRPC_FIONBIO FIONBIO -#endif - -grpc_tracer_flag grpc_tcp_trace = GRPC_TRACER_INITIALIZER(false, "tcp"); - -static grpc_error *set_non_block(SOCKET sock) { - int status; - uint32_t param = 1; - DWORD ret; - status = WSAIoctl(sock, GRPC_FIONBIO, ¶m, sizeof(param), NULL, 0, &ret, - NULL, NULL); - return status == 0 - ? GRPC_ERROR_NONE - : GRPC_WSA_ERROR(WSAGetLastError(), "WSAIoctl(GRPC_FIONBIO)"); -} - -static grpc_error *set_dualstack(SOCKET sock) { - int status; - unsigned long param = 0; - status = setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY, (const char *)¶m, - sizeof(param)); - return status == 0 - ? GRPC_ERROR_NONE - : GRPC_WSA_ERROR(WSAGetLastError(), "setsockopt(IPV6_V6ONLY)"); -} - -grpc_error *grpc_tcp_prepare_socket(SOCKET sock) { - grpc_error *err; - err = set_non_block(sock); - if (err != GRPC_ERROR_NONE) return err; - err = set_dualstack(sock); - if (err != GRPC_ERROR_NONE) return err; - return GRPC_ERROR_NONE; -} - -typedef struct grpc_tcp { - /* This is our C++ class derivation emulation. */ - grpc_endpoint base; - /* The one socket this endpoint is using. */ - grpc_winsocket *socket; - /* Refcounting how many operations are in progress. */ - gpr_refcount refcount; - - grpc_closure on_read; - grpc_closure on_write; - - grpc_closure *read_cb; - grpc_closure *write_cb; - grpc_slice read_slice; - grpc_slice_buffer *write_slices; - grpc_slice_buffer *read_slices; - - grpc_resource_user *resource_user; - - /* The IO Completion Port runs from another thread. We need some mechanism - to protect ourselves when requesting a shutdown. */ - gpr_mu mu; - int shutting_down; - grpc_error *shutdown_error; - - char *peer_string; -} grpc_tcp; - -static void tcp_free(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) { - grpc_winsocket_destroy(tcp->socket); - gpr_mu_destroy(&tcp->mu); - gpr_free(tcp->peer_string); - grpc_resource_user_unref(exec_ctx, tcp->resource_user); - if (tcp->shutting_down) GRPC_ERROR_UNREF(tcp->shutdown_error); - gpr_free(tcp); -} - -#ifndef NDEBUG -#define TCP_UNREF(exec_ctx, tcp, reason) \ - tcp_unref((exec_ctx), (tcp), (reason), __FILE__, __LINE__) -#define TCP_REF(tcp, reason) tcp_ref((tcp), (reason), __FILE__, __LINE__) -static void tcp_unref(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp, - const char *reason, const char *file, int line) { - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_atm val = gpr_atm_no_barrier_load(&tcp->refcount.count); - gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG, - "TCP unref %p : %s %" PRIdPTR " -> %" PRIdPTR, tcp, reason, val, - val - 1); - } - if (gpr_unref(&tcp->refcount)) { - tcp_free(exec_ctx, tcp); - } -} - -static void tcp_ref(grpc_tcp *tcp, const char *reason, const char *file, - int line) { - if (GRPC_TRACER_ON(grpc_tcp_trace)) { - gpr_atm val = gpr_atm_no_barrier_load(&tcp->refcount.count); - gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG, - "TCP ref %p : %s %" PRIdPTR " -> %" PRIdPTR, tcp, reason, val, - val + 1); - } - gpr_ref(&tcp->refcount); -} -#else -#define TCP_UNREF(exec_ctx, tcp, reason) tcp_unref((exec_ctx), (tcp)) -#define TCP_REF(tcp, reason) tcp_ref((tcp)) -static void tcp_unref(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) { - if (gpr_unref(&tcp->refcount)) { - tcp_free(exec_ctx, tcp); - } -} - -static void tcp_ref(grpc_tcp *tcp) { gpr_ref(&tcp->refcount); } -#endif - -/* Asynchronous callback from the IOCP, or the background thread. */ -static void on_read(grpc_exec_ctx *exec_ctx, void *tcpp, grpc_error *error) { - grpc_tcp *tcp = tcpp; - grpc_closure *cb = tcp->read_cb; - grpc_winsocket *socket = tcp->socket; - grpc_slice sub; - grpc_winsocket_callback_info *info = &socket->read_info; - - GRPC_ERROR_REF(error); - - if (error == GRPC_ERROR_NONE) { - if (info->wsa_error != 0 && !tcp->shutting_down) { - char *utf8_message = gpr_format_message(info->wsa_error); - error = GRPC_ERROR_CREATE_FROM_COPIED_STRING(utf8_message); - gpr_free(utf8_message); - grpc_slice_unref_internal(exec_ctx, tcp->read_slice); - } else { - if (info->bytes_transfered != 0 && !tcp->shutting_down) { - sub = grpc_slice_sub_no_ref(tcp->read_slice, 0, info->bytes_transfered); - grpc_slice_buffer_add(tcp->read_slices, sub); - } else { - grpc_slice_unref_internal(exec_ctx, tcp->read_slice); - error = tcp->shutting_down - ? GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( - "TCP stream shutting down", &tcp->shutdown_error, 1) - : GRPC_ERROR_CREATE_FROM_STATIC_STRING("End of TCP stream"); - } - } - } - - tcp->read_cb = NULL; - TCP_UNREF(exec_ctx, tcp, "read"); - GRPC_CLOSURE_SCHED(exec_ctx, cb, error); -} - -static void win_read(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, - grpc_slice_buffer *read_slices, grpc_closure *cb) { - grpc_tcp *tcp = (grpc_tcp *)ep; - grpc_winsocket *handle = tcp->socket; - grpc_winsocket_callback_info *info = &handle->read_info; - int status; - DWORD bytes_read = 0; - DWORD flags = 0; - WSABUF buffer; - - if (tcp->shutting_down) { - GRPC_CLOSURE_SCHED( - exec_ctx, cb, - GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( - "TCP socket is shutting down", &tcp->shutdown_error, 1)); - return; - } - - tcp->read_cb = cb; - tcp->read_slices = read_slices; - grpc_slice_buffer_reset_and_unref_internal(exec_ctx, read_slices); - - tcp->read_slice = GRPC_SLICE_MALLOC(8192); - - buffer.len = (ULONG)GRPC_SLICE_LENGTH( - tcp->read_slice); // we know slice size fits in 32bit. - buffer.buf = (char *)GRPC_SLICE_START_PTR(tcp->read_slice); - - TCP_REF(tcp, "read"); - - /* First let's try a synchronous, non-blocking read. */ - status = - WSARecv(tcp->socket->socket, &buffer, 1, &bytes_read, &flags, NULL, NULL); - info->wsa_error = status == 0 ? 0 : WSAGetLastError(); - - /* Did we get data immediately ? Yay. */ - if (info->wsa_error != WSAEWOULDBLOCK) { - info->bytes_transfered = bytes_read; - GRPC_CLOSURE_SCHED(exec_ctx, &tcp->on_read, GRPC_ERROR_NONE); - return; - } - - /* Otherwise, let's retry, by queuing a read. */ - memset(&tcp->socket->read_info.overlapped, 0, sizeof(OVERLAPPED)); - status = WSARecv(tcp->socket->socket, &buffer, 1, &bytes_read, &flags, - &info->overlapped, NULL); - - if (status != 0) { - int wsa_error = WSAGetLastError(); - if (wsa_error != WSA_IO_PENDING) { - info->wsa_error = wsa_error; - GRPC_CLOSURE_SCHED(exec_ctx, &tcp->on_read, - GRPC_WSA_ERROR(info->wsa_error, "WSARecv")); - return; - } - } - - grpc_socket_notify_on_read(exec_ctx, tcp->socket, &tcp->on_read); -} - -/* Asynchronous callback from the IOCP, or the background thread. */ -static void on_write(grpc_exec_ctx *exec_ctx, void *tcpp, grpc_error *error) { - grpc_tcp *tcp = (grpc_tcp *)tcpp; - grpc_winsocket *handle = tcp->socket; - grpc_winsocket_callback_info *info = &handle->write_info; - grpc_closure *cb; - - GRPC_ERROR_REF(error); - - gpr_mu_lock(&tcp->mu); - cb = tcp->write_cb; - tcp->write_cb = NULL; - gpr_mu_unlock(&tcp->mu); - - if (error == GRPC_ERROR_NONE) { - if (info->wsa_error != 0) { - error = GRPC_WSA_ERROR(info->wsa_error, "WSASend"); - } else { - GPR_ASSERT(info->bytes_transfered == tcp->write_slices->length); - } - } - - TCP_UNREF(exec_ctx, tcp, "write"); - GRPC_CLOSURE_SCHED(exec_ctx, cb, error); -} - -/* Initiates a write. */ -static void win_write(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, - grpc_slice_buffer *slices, grpc_closure *cb) { - grpc_tcp *tcp = (grpc_tcp *)ep; - grpc_winsocket *socket = tcp->socket; - grpc_winsocket_callback_info *info = &socket->write_info; - unsigned i; - DWORD bytes_sent; - int status; - WSABUF local_buffers[16]; - WSABUF *allocated = NULL; - WSABUF *buffers = local_buffers; - size_t len; - - if (tcp->shutting_down) { - GRPC_CLOSURE_SCHED( - exec_ctx, cb, - GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( - "TCP socket is shutting down", &tcp->shutdown_error, 1)); - return; - } - - tcp->write_cb = cb; - tcp->write_slices = slices; - GPR_ASSERT(tcp->write_slices->count <= UINT_MAX); - if (tcp->write_slices->count > GPR_ARRAY_SIZE(local_buffers)) { - buffers = (WSABUF *)gpr_malloc(sizeof(WSABUF) * tcp->write_slices->count); - allocated = buffers; - } - - for (i = 0; i < tcp->write_slices->count; i++) { - len = GRPC_SLICE_LENGTH(tcp->write_slices->slices[i]); - GPR_ASSERT(len <= ULONG_MAX); - buffers[i].len = (ULONG)len; - buffers[i].buf = (char *)GRPC_SLICE_START_PTR(tcp->write_slices->slices[i]); - } - - /* First, let's try a synchronous, non-blocking write. */ - status = WSASend(socket->socket, buffers, (DWORD)tcp->write_slices->count, - &bytes_sent, 0, NULL, NULL); - info->wsa_error = status == 0 ? 0 : WSAGetLastError(); - - /* We would kind of expect to get a WSAEWOULDBLOCK here, especially on a busy - connection that has its send queue filled up. But if we don't, then we can - avoid doing an async write operation at all. */ - if (info->wsa_error != WSAEWOULDBLOCK) { - grpc_error *error = status == 0 - ? GRPC_ERROR_NONE - : GRPC_WSA_ERROR(info->wsa_error, "WSASend"); - GRPC_CLOSURE_SCHED(exec_ctx, cb, error); - if (allocated) gpr_free(allocated); - return; - } - - TCP_REF(tcp, "write"); - - /* If we got a WSAEWOULDBLOCK earlier, then we need to re-do the same - operation, this time asynchronously. */ - memset(&socket->write_info.overlapped, 0, sizeof(OVERLAPPED)); - status = WSASend(socket->socket, buffers, (DWORD)tcp->write_slices->count, - &bytes_sent, 0, &socket->write_info.overlapped, NULL); - if (allocated) gpr_free(allocated); - - if (status != 0) { - int wsa_error = WSAGetLastError(); - if (wsa_error != WSA_IO_PENDING) { - TCP_UNREF(exec_ctx, tcp, "write"); - GRPC_CLOSURE_SCHED(exec_ctx, cb, GRPC_WSA_ERROR(wsa_error, "WSASend")); - return; - } - } - - /* As all is now setup, we can now ask for the IOCP notification. It may - trigger the callback immediately however, but no matter. */ - grpc_socket_notify_on_write(exec_ctx, socket, &tcp->on_write); -} - -static void win_add_to_pollset(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, - grpc_pollset *ps) { - grpc_tcp *tcp; - (void)ps; - tcp = (grpc_tcp *)ep; - grpc_iocp_add_socket(tcp->socket); -} - -static void win_add_to_pollset_set(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, - grpc_pollset_set *pss) { - grpc_tcp *tcp; - (void)pss; - tcp = (grpc_tcp *)ep; - grpc_iocp_add_socket(tcp->socket); -} - -/* Initiates a shutdown of the TCP endpoint. This will queue abort callbacks - for the potential read and write operations. It is up to the caller to - guarantee this isn't called in parallel to a read or write request, so - we're not going to protect against these. However the IO Completion Port - callback will happen from another thread, so we need to protect against - concurrent access of the data structure in that regard. */ -static void win_shutdown(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, - grpc_error *why) { - grpc_tcp *tcp = (grpc_tcp *)ep; - gpr_mu_lock(&tcp->mu); - /* At that point, what may happen is that we're already inside the IOCP - callback. See the comments in on_read and on_write. */ - if (!tcp->shutting_down) { - tcp->shutting_down = 1; - tcp->shutdown_error = why; - } else { - GRPC_ERROR_UNREF(why); - } - grpc_winsocket_shutdown(tcp->socket); - gpr_mu_unlock(&tcp->mu); - grpc_resource_user_shutdown(exec_ctx, tcp->resource_user); -} - -static void win_destroy(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep) { - grpc_network_status_unregister_endpoint(ep); - grpc_tcp *tcp = (grpc_tcp *)ep; - TCP_UNREF(exec_ctx, tcp, "destroy"); -} - -static char *win_get_peer(grpc_endpoint *ep) { - grpc_tcp *tcp = (grpc_tcp *)ep; - return gpr_strdup(tcp->peer_string); -} - -static grpc_resource_user *win_get_resource_user(grpc_endpoint *ep) { - grpc_tcp *tcp = (grpc_tcp *)ep; - return tcp->resource_user; -} - -static int win_get_fd(grpc_endpoint *ep) { return -1; } - -static grpc_endpoint_vtable vtable = { - win_read, win_write, win_add_to_pollset, win_add_to_pollset_set, - win_shutdown, win_destroy, win_get_resource_user, win_get_peer, - win_get_fd}; - -grpc_endpoint *grpc_tcp_create(grpc_exec_ctx *exec_ctx, grpc_winsocket *socket, - grpc_channel_args *channel_args, - char *peer_string) { - grpc_resource_quota *resource_quota = grpc_resource_quota_create(NULL); - if (channel_args != NULL) { - for (size_t i = 0; i < channel_args->num_args; i++) { - if (0 == strcmp(channel_args->args[i].key, GRPC_ARG_RESOURCE_QUOTA)) { - grpc_resource_quota_unref_internal(exec_ctx, resource_quota); - resource_quota = grpc_resource_quota_ref_internal( - channel_args->args[i].value.pointer.p); - } - } - } - grpc_tcp *tcp = (grpc_tcp *)gpr_malloc(sizeof(grpc_tcp)); - memset(tcp, 0, sizeof(grpc_tcp)); - tcp->base.vtable = &vtable; - tcp->socket = socket; - gpr_mu_init(&tcp->mu); - gpr_ref_init(&tcp->refcount, 1); - GRPC_CLOSURE_INIT(&tcp->on_read, on_read, tcp, grpc_schedule_on_exec_ctx); - GRPC_CLOSURE_INIT(&tcp->on_write, on_write, tcp, grpc_schedule_on_exec_ctx); - tcp->peer_string = gpr_strdup(peer_string); - tcp->resource_user = grpc_resource_user_create(resource_quota, peer_string); - /* Tell network status tracking code about the new endpoint */ - grpc_network_status_register_endpoint(&tcp->base); - - return &tcp->base; -} - -#endif /* GRPC_WINSOCK_SOCKET */ diff --git a/src/core/lib/iomgr/tcp_windows.cc b/src/core/lib/iomgr/tcp_windows.cc new file mode 100644 index 0000000000..2cbb97403b --- /dev/null +++ b/src/core/lib/iomgr/tcp_windows.cc @@ -0,0 +1,448 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_WINSOCK_SOCKET + +#include + +#include "src/core/lib/iomgr/network_status_tracker.h" +#include "src/core/lib/iomgr/sockaddr_windows.h" + +#include +#include +#include +#include +#include +#include + +#include "src/core/lib/iomgr/iocp_windows.h" +#include "src/core/lib/iomgr/sockaddr.h" +#include "src/core/lib/iomgr/sockaddr_utils.h" +#include "src/core/lib/iomgr/socket_windows.h" +#include "src/core/lib/iomgr/tcp_client.h" +#include "src/core/lib/iomgr/timer.h" +#include "src/core/lib/slice/slice_internal.h" + +#if defined(__MSYS__) && defined(GPR_ARCH_64) +/* Nasty workaround for nasty bug when using the 64 bits msys compiler + in conjunction with Microsoft Windows headers. */ +#define GRPC_FIONBIO _IOW('f', 126, uint32_t) +#else +#define GRPC_FIONBIO FIONBIO +#endif + +grpc_tracer_flag grpc_tcp_trace = GRPC_TRACER_INITIALIZER(false, "tcp"); + +static grpc_error *set_non_block(SOCKET sock) { + int status; + uint32_t param = 1; + DWORD ret; + status = WSAIoctl(sock, GRPC_FIONBIO, ¶m, sizeof(param), NULL, 0, &ret, + NULL, NULL); + return status == 0 + ? GRPC_ERROR_NONE + : GRPC_WSA_ERROR(WSAGetLastError(), "WSAIoctl(GRPC_FIONBIO)"); +} + +static grpc_error *set_dualstack(SOCKET sock) { + int status; + unsigned long param = 0; + status = setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY, (const char *)¶m, + sizeof(param)); + return status == 0 + ? GRPC_ERROR_NONE + : GRPC_WSA_ERROR(WSAGetLastError(), "setsockopt(IPV6_V6ONLY)"); +} + +grpc_error *grpc_tcp_prepare_socket(SOCKET sock) { + grpc_error *err; + err = set_non_block(sock); + if (err != GRPC_ERROR_NONE) return err; + err = set_dualstack(sock); + if (err != GRPC_ERROR_NONE) return err; + return GRPC_ERROR_NONE; +} + +typedef struct grpc_tcp { + /* This is our C++ class derivation emulation. */ + grpc_endpoint base; + /* The one socket this endpoint is using. */ + grpc_winsocket *socket; + /* Refcounting how many operations are in progress. */ + gpr_refcount refcount; + + grpc_closure on_read; + grpc_closure on_write; + + grpc_closure *read_cb; + grpc_closure *write_cb; + grpc_slice read_slice; + grpc_slice_buffer *write_slices; + grpc_slice_buffer *read_slices; + + grpc_resource_user *resource_user; + + /* The IO Completion Port runs from another thread. We need some mechanism + to protect ourselves when requesting a shutdown. */ + gpr_mu mu; + int shutting_down; + grpc_error *shutdown_error; + + char *peer_string; +} grpc_tcp; + +static void tcp_free(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) { + grpc_winsocket_destroy(tcp->socket); + gpr_mu_destroy(&tcp->mu); + gpr_free(tcp->peer_string); + grpc_resource_user_unref(exec_ctx, tcp->resource_user); + if (tcp->shutting_down) GRPC_ERROR_UNREF(tcp->shutdown_error); + gpr_free(tcp); +} + +#ifndef NDEBUG +#define TCP_UNREF(exec_ctx, tcp, reason) \ + tcp_unref((exec_ctx), (tcp), (reason), __FILE__, __LINE__) +#define TCP_REF(tcp, reason) tcp_ref((tcp), (reason), __FILE__, __LINE__) +static void tcp_unref(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp, + const char *reason, const char *file, int line) { + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_atm val = gpr_atm_no_barrier_load(&tcp->refcount.count); + gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG, + "TCP unref %p : %s %" PRIdPTR " -> %" PRIdPTR, tcp, reason, val, + val - 1); + } + if (gpr_unref(&tcp->refcount)) { + tcp_free(exec_ctx, tcp); + } +} + +static void tcp_ref(grpc_tcp *tcp, const char *reason, const char *file, + int line) { + if (GRPC_TRACER_ON(grpc_tcp_trace)) { + gpr_atm val = gpr_atm_no_barrier_load(&tcp->refcount.count); + gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG, + "TCP ref %p : %s %" PRIdPTR " -> %" PRIdPTR, tcp, reason, val, + val + 1); + } + gpr_ref(&tcp->refcount); +} +#else +#define TCP_UNREF(exec_ctx, tcp, reason) tcp_unref((exec_ctx), (tcp)) +#define TCP_REF(tcp, reason) tcp_ref((tcp)) +static void tcp_unref(grpc_exec_ctx *exec_ctx, grpc_tcp *tcp) { + if (gpr_unref(&tcp->refcount)) { + tcp_free(exec_ctx, tcp); + } +} + +static void tcp_ref(grpc_tcp *tcp) { gpr_ref(&tcp->refcount); } +#endif + +/* Asynchronous callback from the IOCP, or the background thread. */ +static void on_read(grpc_exec_ctx *exec_ctx, void *tcpp, grpc_error *error) { + grpc_tcp *tcp = tcpp; + grpc_closure *cb = tcp->read_cb; + grpc_winsocket *socket = tcp->socket; + grpc_slice sub; + grpc_winsocket_callback_info *info = &socket->read_info; + + GRPC_ERROR_REF(error); + + if (error == GRPC_ERROR_NONE) { + if (info->wsa_error != 0 && !tcp->shutting_down) { + char *utf8_message = gpr_format_message(info->wsa_error); + error = GRPC_ERROR_CREATE_FROM_COPIED_STRING(utf8_message); + gpr_free(utf8_message); + grpc_slice_unref_internal(exec_ctx, tcp->read_slice); + } else { + if (info->bytes_transfered != 0 && !tcp->shutting_down) { + sub = grpc_slice_sub_no_ref(tcp->read_slice, 0, info->bytes_transfered); + grpc_slice_buffer_add(tcp->read_slices, sub); + } else { + grpc_slice_unref_internal(exec_ctx, tcp->read_slice); + error = tcp->shutting_down + ? GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( + "TCP stream shutting down", &tcp->shutdown_error, 1) + : GRPC_ERROR_CREATE_FROM_STATIC_STRING("End of TCP stream"); + } + } + } + + tcp->read_cb = NULL; + TCP_UNREF(exec_ctx, tcp, "read"); + GRPC_CLOSURE_SCHED(exec_ctx, cb, error); +} + +static void win_read(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, + grpc_slice_buffer *read_slices, grpc_closure *cb) { + grpc_tcp *tcp = (grpc_tcp *)ep; + grpc_winsocket *handle = tcp->socket; + grpc_winsocket_callback_info *info = &handle->read_info; + int status; + DWORD bytes_read = 0; + DWORD flags = 0; + WSABUF buffer; + + if (tcp->shutting_down) { + GRPC_CLOSURE_SCHED( + exec_ctx, cb, + GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( + "TCP socket is shutting down", &tcp->shutdown_error, 1)); + return; + } + + tcp->read_cb = cb; + tcp->read_slices = read_slices; + grpc_slice_buffer_reset_and_unref_internal(exec_ctx, read_slices); + + tcp->read_slice = GRPC_SLICE_MALLOC(8192); + + buffer.len = (ULONG)GRPC_SLICE_LENGTH( + tcp->read_slice); // we know slice size fits in 32bit. + buffer.buf = (char *)GRPC_SLICE_START_PTR(tcp->read_slice); + + TCP_REF(tcp, "read"); + + /* First let's try a synchronous, non-blocking read. */ + status = + WSARecv(tcp->socket->socket, &buffer, 1, &bytes_read, &flags, NULL, NULL); + info->wsa_error = status == 0 ? 0 : WSAGetLastError(); + + /* Did we get data immediately ? Yay. */ + if (info->wsa_error != WSAEWOULDBLOCK) { + info->bytes_transfered = bytes_read; + GRPC_CLOSURE_SCHED(exec_ctx, &tcp->on_read, GRPC_ERROR_NONE); + return; + } + + /* Otherwise, let's retry, by queuing a read. */ + memset(&tcp->socket->read_info.overlapped, 0, sizeof(OVERLAPPED)); + status = WSARecv(tcp->socket->socket, &buffer, 1, &bytes_read, &flags, + &info->overlapped, NULL); + + if (status != 0) { + int wsa_error = WSAGetLastError(); + if (wsa_error != WSA_IO_PENDING) { + info->wsa_error = wsa_error; + GRPC_CLOSURE_SCHED(exec_ctx, &tcp->on_read, + GRPC_WSA_ERROR(info->wsa_error, "WSARecv")); + return; + } + } + + grpc_socket_notify_on_read(exec_ctx, tcp->socket, &tcp->on_read); +} + +/* Asynchronous callback from the IOCP, or the background thread. */ +static void on_write(grpc_exec_ctx *exec_ctx, void *tcpp, grpc_error *error) { + grpc_tcp *tcp = (grpc_tcp *)tcpp; + grpc_winsocket *handle = tcp->socket; + grpc_winsocket_callback_info *info = &handle->write_info; + grpc_closure *cb; + + GRPC_ERROR_REF(error); + + gpr_mu_lock(&tcp->mu); + cb = tcp->write_cb; + tcp->write_cb = NULL; + gpr_mu_unlock(&tcp->mu); + + if (error == GRPC_ERROR_NONE) { + if (info->wsa_error != 0) { + error = GRPC_WSA_ERROR(info->wsa_error, "WSASend"); + } else { + GPR_ASSERT(info->bytes_transfered == tcp->write_slices->length); + } + } + + TCP_UNREF(exec_ctx, tcp, "write"); + GRPC_CLOSURE_SCHED(exec_ctx, cb, error); +} + +/* Initiates a write. */ +static void win_write(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, + grpc_slice_buffer *slices, grpc_closure *cb) { + grpc_tcp *tcp = (grpc_tcp *)ep; + grpc_winsocket *socket = tcp->socket; + grpc_winsocket_callback_info *info = &socket->write_info; + unsigned i; + DWORD bytes_sent; + int status; + WSABUF local_buffers[16]; + WSABUF *allocated = NULL; + WSABUF *buffers = local_buffers; + size_t len; + + if (tcp->shutting_down) { + GRPC_CLOSURE_SCHED( + exec_ctx, cb, + GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( + "TCP socket is shutting down", &tcp->shutdown_error, 1)); + return; + } + + tcp->write_cb = cb; + tcp->write_slices = slices; + GPR_ASSERT(tcp->write_slices->count <= UINT_MAX); + if (tcp->write_slices->count > GPR_ARRAY_SIZE(local_buffers)) { + buffers = (WSABUF *)gpr_malloc(sizeof(WSABUF) * tcp->write_slices->count); + allocated = buffers; + } + + for (i = 0; i < tcp->write_slices->count; i++) { + len = GRPC_SLICE_LENGTH(tcp->write_slices->slices[i]); + GPR_ASSERT(len <= ULONG_MAX); + buffers[i].len = (ULONG)len; + buffers[i].buf = (char *)GRPC_SLICE_START_PTR(tcp->write_slices->slices[i]); + } + + /* First, let's try a synchronous, non-blocking write. */ + status = WSASend(socket->socket, buffers, (DWORD)tcp->write_slices->count, + &bytes_sent, 0, NULL, NULL); + info->wsa_error = status == 0 ? 0 : WSAGetLastError(); + + /* We would kind of expect to get a WSAEWOULDBLOCK here, especially on a busy + connection that has its send queue filled up. But if we don't, then we can + avoid doing an async write operation at all. */ + if (info->wsa_error != WSAEWOULDBLOCK) { + grpc_error *error = status == 0 + ? GRPC_ERROR_NONE + : GRPC_WSA_ERROR(info->wsa_error, "WSASend"); + GRPC_CLOSURE_SCHED(exec_ctx, cb, error); + if (allocated) gpr_free(allocated); + return; + } + + TCP_REF(tcp, "write"); + + /* If we got a WSAEWOULDBLOCK earlier, then we need to re-do the same + operation, this time asynchronously. */ + memset(&socket->write_info.overlapped, 0, sizeof(OVERLAPPED)); + status = WSASend(socket->socket, buffers, (DWORD)tcp->write_slices->count, + &bytes_sent, 0, &socket->write_info.overlapped, NULL); + if (allocated) gpr_free(allocated); + + if (status != 0) { + int wsa_error = WSAGetLastError(); + if (wsa_error != WSA_IO_PENDING) { + TCP_UNREF(exec_ctx, tcp, "write"); + GRPC_CLOSURE_SCHED(exec_ctx, cb, GRPC_WSA_ERROR(wsa_error, "WSASend")); + return; + } + } + + /* As all is now setup, we can now ask for the IOCP notification. It may + trigger the callback immediately however, but no matter. */ + grpc_socket_notify_on_write(exec_ctx, socket, &tcp->on_write); +} + +static void win_add_to_pollset(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, + grpc_pollset *ps) { + grpc_tcp *tcp; + (void)ps; + tcp = (grpc_tcp *)ep; + grpc_iocp_add_socket(tcp->socket); +} + +static void win_add_to_pollset_set(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, + grpc_pollset_set *pss) { + grpc_tcp *tcp; + (void)pss; + tcp = (grpc_tcp *)ep; + grpc_iocp_add_socket(tcp->socket); +} + +/* Initiates a shutdown of the TCP endpoint. This will queue abort callbacks + for the potential read and write operations. It is up to the caller to + guarantee this isn't called in parallel to a read or write request, so + we're not going to protect against these. However the IO Completion Port + callback will happen from another thread, so we need to protect against + concurrent access of the data structure in that regard. */ +static void win_shutdown(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep, + grpc_error *why) { + grpc_tcp *tcp = (grpc_tcp *)ep; + gpr_mu_lock(&tcp->mu); + /* At that point, what may happen is that we're already inside the IOCP + callback. See the comments in on_read and on_write. */ + if (!tcp->shutting_down) { + tcp->shutting_down = 1; + tcp->shutdown_error = why; + } else { + GRPC_ERROR_UNREF(why); + } + grpc_winsocket_shutdown(tcp->socket); + gpr_mu_unlock(&tcp->mu); + grpc_resource_user_shutdown(exec_ctx, tcp->resource_user); +} + +static void win_destroy(grpc_exec_ctx *exec_ctx, grpc_endpoint *ep) { + grpc_network_status_unregister_endpoint(ep); + grpc_tcp *tcp = (grpc_tcp *)ep; + TCP_UNREF(exec_ctx, tcp, "destroy"); +} + +static char *win_get_peer(grpc_endpoint *ep) { + grpc_tcp *tcp = (grpc_tcp *)ep; + return gpr_strdup(tcp->peer_string); +} + +static grpc_resource_user *win_get_resource_user(grpc_endpoint *ep) { + grpc_tcp *tcp = (grpc_tcp *)ep; + return tcp->resource_user; +} + +static int win_get_fd(grpc_endpoint *ep) { return -1; } + +static grpc_endpoint_vtable vtable = { + win_read, win_write, win_add_to_pollset, win_add_to_pollset_set, + win_shutdown, win_destroy, win_get_resource_user, win_get_peer, + win_get_fd}; + +grpc_endpoint *grpc_tcp_create(grpc_exec_ctx *exec_ctx, grpc_winsocket *socket, + grpc_channel_args *channel_args, + char *peer_string) { + grpc_resource_quota *resource_quota = grpc_resource_quota_create(NULL); + if (channel_args != NULL) { + for (size_t i = 0; i < channel_args->num_args; i++) { + if (0 == strcmp(channel_args->args[i].key, GRPC_ARG_RESOURCE_QUOTA)) { + grpc_resource_quota_unref_internal(exec_ctx, resource_quota); + resource_quota = grpc_resource_quota_ref_internal( + channel_args->args[i].value.pointer.p); + } + } + } + grpc_tcp *tcp = (grpc_tcp *)gpr_malloc(sizeof(grpc_tcp)); + memset(tcp, 0, sizeof(grpc_tcp)); + tcp->base.vtable = &vtable; + tcp->socket = socket; + gpr_mu_init(&tcp->mu); + gpr_ref_init(&tcp->refcount, 1); + GRPC_CLOSURE_INIT(&tcp->on_read, on_read, tcp, grpc_schedule_on_exec_ctx); + GRPC_CLOSURE_INIT(&tcp->on_write, on_write, tcp, grpc_schedule_on_exec_ctx); + tcp->peer_string = gpr_strdup(peer_string); + tcp->resource_user = grpc_resource_user_create(resource_quota, peer_string); + /* Tell network status tracking code about the new endpoint */ + grpc_network_status_register_endpoint(&tcp->base); + + return &tcp->base; +} + +#endif /* GRPC_WINSOCK_SOCKET */ diff --git a/src/core/lib/iomgr/time_averaged_stats.c b/src/core/lib/iomgr/time_averaged_stats.c deleted file mode 100644 index 3bddec04dd..0000000000 --- a/src/core/lib/iomgr/time_averaged_stats.c +++ /dev/null @@ -1,62 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/time_averaged_stats.h" - -void grpc_time_averaged_stats_init(grpc_time_averaged_stats* stats, - double init_avg, double regress_weight, - double persistence_factor) { - stats->init_avg = init_avg; - stats->regress_weight = regress_weight; - stats->persistence_factor = persistence_factor; - stats->batch_total_value = 0; - stats->batch_num_samples = 0; - stats->aggregate_total_weight = 0; - stats->aggregate_weighted_avg = init_avg; -} - -void grpc_time_averaged_stats_add_sample(grpc_time_averaged_stats* stats, - double value) { - stats->batch_total_value += value; - ++stats->batch_num_samples; -} - -double grpc_time_averaged_stats_update_average( - grpc_time_averaged_stats* stats) { - /* Start with the current batch: */ - double weighted_sum = stats->batch_total_value; - double total_weight = stats->batch_num_samples; - if (stats->regress_weight > 0) { - /* Add in the regression towards init_avg_: */ - weighted_sum += stats->regress_weight * stats->init_avg; - total_weight += stats->regress_weight; - } - if (stats->persistence_factor > 0) { - /* Add in the persistence: */ - const double prev_sample_weight = - stats->persistence_factor * stats->aggregate_total_weight; - weighted_sum += prev_sample_weight * stats->aggregate_weighted_avg; - total_weight += prev_sample_weight; - } - stats->aggregate_weighted_avg = - (total_weight > 0) ? (weighted_sum / total_weight) : stats->init_avg; - stats->aggregate_total_weight = total_weight; - stats->batch_num_samples = 0; - stats->batch_total_value = 0; - return stats->aggregate_weighted_avg; -} diff --git a/src/core/lib/iomgr/time_averaged_stats.cc b/src/core/lib/iomgr/time_averaged_stats.cc new file mode 100644 index 0000000000..3bddec04dd --- /dev/null +++ b/src/core/lib/iomgr/time_averaged_stats.cc @@ -0,0 +1,62 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/time_averaged_stats.h" + +void grpc_time_averaged_stats_init(grpc_time_averaged_stats* stats, + double init_avg, double regress_weight, + double persistence_factor) { + stats->init_avg = init_avg; + stats->regress_weight = regress_weight; + stats->persistence_factor = persistence_factor; + stats->batch_total_value = 0; + stats->batch_num_samples = 0; + stats->aggregate_total_weight = 0; + stats->aggregate_weighted_avg = init_avg; +} + +void grpc_time_averaged_stats_add_sample(grpc_time_averaged_stats* stats, + double value) { + stats->batch_total_value += value; + ++stats->batch_num_samples; +} + +double grpc_time_averaged_stats_update_average( + grpc_time_averaged_stats* stats) { + /* Start with the current batch: */ + double weighted_sum = stats->batch_total_value; + double total_weight = stats->batch_num_samples; + if (stats->regress_weight > 0) { + /* Add in the regression towards init_avg_: */ + weighted_sum += stats->regress_weight * stats->init_avg; + total_weight += stats->regress_weight; + } + if (stats->persistence_factor > 0) { + /* Add in the persistence: */ + const double prev_sample_weight = + stats->persistence_factor * stats->aggregate_total_weight; + weighted_sum += prev_sample_weight * stats->aggregate_weighted_avg; + total_weight += prev_sample_weight; + } + stats->aggregate_weighted_avg = + (total_weight > 0) ? (weighted_sum / total_weight) : stats->init_avg; + stats->aggregate_total_weight = total_weight; + stats->batch_num_samples = 0; + stats->batch_total_value = 0; + return stats->aggregate_weighted_avg; +} diff --git a/src/core/lib/iomgr/timer_generic.c b/src/core/lib/iomgr/timer_generic.c deleted file mode 100644 index 2472cf26be..0000000000 --- a/src/core/lib/iomgr/timer_generic.c +++ /dev/null @@ -1,705 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_TIMER_USE_GENERIC - -#include "src/core/lib/iomgr/timer.h" - -#include -#include -#include -#include -#include -#include -#include "src/core/lib/debug/trace.h" -#include "src/core/lib/iomgr/time_averaged_stats.h" -#include "src/core/lib/iomgr/timer_heap.h" -#include "src/core/lib/support/spinlock.h" - -#define INVALID_HEAP_INDEX 0xffffffffu - -#define LOG2_NUM_SHARDS 5 -#define NUM_SHARDS (1 << LOG2_NUM_SHARDS) -#define ADD_DEADLINE_SCALE 0.33 -#define MIN_QUEUE_WINDOW_DURATION 0.01 -#define MAX_QUEUE_WINDOW_DURATION 1 - -grpc_tracer_flag grpc_timer_trace = GRPC_TRACER_INITIALIZER(false, "timer"); -grpc_tracer_flag grpc_timer_check_trace = - GRPC_TRACER_INITIALIZER(false, "timer_check"); - -/* A "timer shard". Contains a 'heap' and a 'list' of timers. All timers with - * deadlines earlier than 'queue_deadline" cap are maintained in the heap and - * others are maintained in the list (unordered). This helps to keep the number - * of elements in the heap low. - * - * The 'queue_deadline_cap' gets recomputed periodically based on the timer - * stats maintained in 'stats' and the relevant timers are then moved from the - * 'list' to 'heap' - */ -typedef struct { - gpr_mu mu; - grpc_time_averaged_stats stats; - /* All and only timers with deadlines <= this will be in the heap. */ - gpr_atm queue_deadline_cap; - /* The deadline of the next timer due in this shard */ - gpr_atm min_deadline; - /* Index of this timer_shard in the g_shard_queue */ - uint32_t shard_queue_index; - /* This holds all timers with deadlines < queue_deadline_cap. Timers in this - list have the top bit of their deadline set to 0. */ - grpc_timer_heap heap; - /* This holds timers whose deadline is >= queue_deadline_cap. */ - grpc_timer list; -} timer_shard; - -/* Array of timer shards. Whenever a timer (grpc_timer *) is added, its address - * is hashed to select the timer shard to add the timer to */ -static timer_shard g_shards[NUM_SHARDS]; - -/* Maintains a sorted list of timer shards (sorted by their min_deadline, i.e - * the deadline of the next timer in each shard). - * Access to this is protected by g_shared_mutables.mu */ -static timer_shard *g_shard_queue[NUM_SHARDS]; - -#ifndef NDEBUG - -/* == Hash table for duplicate timer detection == */ - -#define NUM_HASH_BUCKETS 1009 /* Prime number close to 1000 */ - -static gpr_mu g_hash_mu[NUM_HASH_BUCKETS]; /* One mutex per bucket */ -static grpc_timer *g_timer_ht[NUM_HASH_BUCKETS] = {NULL}; - -static void init_timer_ht() { - for (int i = 0; i < NUM_HASH_BUCKETS; i++) { - gpr_mu_init(&g_hash_mu[i]); - } -} - -static bool is_in_ht(grpc_timer *t) { - size_t i = GPR_HASH_POINTER(t, NUM_HASH_BUCKETS); - - gpr_mu_lock(&g_hash_mu[i]); - grpc_timer *p = g_timer_ht[i]; - while (p != NULL && p != t) { - p = p->hash_table_next; - } - gpr_mu_unlock(&g_hash_mu[i]); - - return (p == t); -} - -static void add_to_ht(grpc_timer *t) { - GPR_ASSERT(!t->hash_table_next); - size_t i = GPR_HASH_POINTER(t, NUM_HASH_BUCKETS); - - gpr_mu_lock(&g_hash_mu[i]); - grpc_timer *p = g_timer_ht[i]; - while (p != NULL && p != t) { - p = p->hash_table_next; - } - - if (p == t) { - grpc_closure *c = t->closure; - gpr_log(GPR_ERROR, - "** Duplicate timer (%p) being added. Closure: (%p), created at: " - "(%s:%d), scheduled at: (%s:%d) **", - t, c, c->file_created, c->line_created, c->file_initiated, - c->line_initiated); - abort(); - } - - /* Timer not present in the bucket. Insert at head of the list */ - t->hash_table_next = g_timer_ht[i]; - g_timer_ht[i] = t; - gpr_mu_unlock(&g_hash_mu[i]); -} - -static void remove_from_ht(grpc_timer *t) { - size_t i = GPR_HASH_POINTER(t, NUM_HASH_BUCKETS); - bool removed = false; - - gpr_mu_lock(&g_hash_mu[i]); - if (g_timer_ht[i] == t) { - g_timer_ht[i] = g_timer_ht[i]->hash_table_next; - removed = true; - } else if (g_timer_ht[i] != NULL) { - grpc_timer *p = g_timer_ht[i]; - while (p->hash_table_next != NULL && p->hash_table_next != t) { - p = p->hash_table_next; - } - - if (p->hash_table_next == t) { - p->hash_table_next = t->hash_table_next; - removed = true; - } - } - gpr_mu_unlock(&g_hash_mu[i]); - - if (!removed) { - grpc_closure *c = t->closure; - gpr_log(GPR_ERROR, - "** Removing timer (%p) that is not added to hash table. Closure " - "(%p), created at: (%s:%d), scheduled at: (%s:%d) **", - t, c, c->file_created, c->line_created, c->file_initiated, - c->line_initiated); - abort(); - } - - t->hash_table_next = NULL; -} - -/* If a timer is added to a timer shard (either heap or a list), it cannot - * be pending. A timer is added to hash table only-if it is added to the - * timer shard. - * Therefore, if timer->pending is false, it cannot be in hash table */ -static void validate_non_pending_timer(grpc_timer *t) { - if (!t->pending && is_in_ht(t)) { - grpc_closure *c = t->closure; - gpr_log(GPR_ERROR, - "** gpr_timer_cancel() called on a non-pending timer (%p) which " - "is in the hash table. Closure: (%p), created at: (%s:%d), " - "scheduled at: (%s:%d) **", - t, c, c->file_created, c->line_created, c->file_initiated, - c->line_initiated); - abort(); - } -} - -#define INIT_TIMER_HASH_TABLE() init_timer_ht() -#define ADD_TO_HASH_TABLE(t) add_to_ht((t)) -#define REMOVE_FROM_HASH_TABLE(t) remove_from_ht((t)) -#define VALIDATE_NON_PENDING_TIMER(t) validate_non_pending_timer((t)) - -#else - -#define INIT_TIMER_HASH_TABLE() -#define ADD_TO_HASH_TABLE(t) -#define REMOVE_FROM_HASH_TABLE(t) -#define VALIDATE_NON_PENDING_TIMER(t) - -#endif - -/* Thread local variable that stores the deadline of the next timer the thread - * has last-seen. This is an optimization to prevent the thread from checking - * shared_mutables.min_timer (which requires acquiring shared_mutables.mu lock, - * an expensive operation) */ -GPR_TLS_DECL(g_last_seen_min_timer); - -struct shared_mutables { - /* The deadline of the next timer due across all timer shards */ - gpr_atm min_timer; - /* Allow only one run_some_expired_timers at once */ - gpr_spinlock checker_mu; - bool initialized; - /* Protects g_shard_queue (and the shared_mutables struct itself) */ - gpr_mu mu; -} GPR_ALIGN_STRUCT(GPR_CACHELINE_SIZE); - -static struct shared_mutables g_shared_mutables; - -static gpr_clock_type g_clock_type; -static gpr_timespec g_start_time; - -static gpr_atm saturating_add(gpr_atm a, gpr_atm b) { - if (a > GPR_ATM_MAX - b) { - return GPR_ATM_MAX; - } - return a + b; -} - -static grpc_timer_check_result run_some_expired_timers(grpc_exec_ctx *exec_ctx, - gpr_atm now, - gpr_atm *next, - grpc_error *error); - -static gpr_timespec dbl_to_ts(double d) { - gpr_timespec ts; - ts.tv_sec = (int64_t)d; - ts.tv_nsec = (int32_t)(1e9 * (d - (double)ts.tv_sec)); - ts.clock_type = GPR_TIMESPAN; - return ts; -} - -static gpr_atm timespec_to_atm_round_up(gpr_timespec ts) { - ts = gpr_time_sub(ts, g_start_time); - double x = GPR_MS_PER_SEC * (double)ts.tv_sec + - (double)ts.tv_nsec / GPR_NS_PER_MS + - (double)(GPR_NS_PER_SEC - 1) / (double)GPR_NS_PER_SEC; - if (x < 0) return 0; - if (x > GPR_ATM_MAX) return GPR_ATM_MAX; - return (gpr_atm)x; -} - -static gpr_atm timespec_to_atm_round_down(gpr_timespec ts) { - ts = gpr_time_sub(ts, g_start_time); - double x = - GPR_MS_PER_SEC * (double)ts.tv_sec + (double)ts.tv_nsec / GPR_NS_PER_MS; - if (x < 0) return 0; - if (x > GPR_ATM_MAX) return GPR_ATM_MAX; - return (gpr_atm)x; -} - -static gpr_timespec atm_to_timespec(gpr_atm x) { - return gpr_time_add(g_start_time, dbl_to_ts((double)x / 1000.0)); -} - -static gpr_atm compute_min_deadline(timer_shard *shard) { - return grpc_timer_heap_is_empty(&shard->heap) - ? saturating_add(shard->queue_deadline_cap, 1) - : grpc_timer_heap_top(&shard->heap)->deadline; -} - -void grpc_timer_list_init(gpr_timespec now) { - uint32_t i; - - g_shared_mutables.initialized = true; - g_shared_mutables.checker_mu = GPR_SPINLOCK_INITIALIZER; - gpr_mu_init(&g_shared_mutables.mu); - g_clock_type = now.clock_type; - g_start_time = now; - g_shared_mutables.min_timer = timespec_to_atm_round_down(now); - gpr_tls_init(&g_last_seen_min_timer); - gpr_tls_set(&g_last_seen_min_timer, 0); - grpc_register_tracer(&grpc_timer_trace); - grpc_register_tracer(&grpc_timer_check_trace); - - for (i = 0; i < NUM_SHARDS; i++) { - timer_shard *shard = &g_shards[i]; - gpr_mu_init(&shard->mu); - grpc_time_averaged_stats_init(&shard->stats, 1.0 / ADD_DEADLINE_SCALE, 0.1, - 0.5); - shard->queue_deadline_cap = g_shared_mutables.min_timer; - shard->shard_queue_index = i; - grpc_timer_heap_init(&shard->heap); - shard->list.next = shard->list.prev = &shard->list; - shard->min_deadline = compute_min_deadline(shard); - g_shard_queue[i] = shard; - } - - INIT_TIMER_HASH_TABLE(); -} - -void grpc_timer_list_shutdown(grpc_exec_ctx *exec_ctx) { - int i; - run_some_expired_timers( - exec_ctx, GPR_ATM_MAX, NULL, - GRPC_ERROR_CREATE_FROM_STATIC_STRING("Timer list shutdown")); - for (i = 0; i < NUM_SHARDS; i++) { - timer_shard *shard = &g_shards[i]; - gpr_mu_destroy(&shard->mu); - grpc_timer_heap_destroy(&shard->heap); - } - gpr_mu_destroy(&g_shared_mutables.mu); - gpr_tls_destroy(&g_last_seen_min_timer); - g_shared_mutables.initialized = false; -} - -static double ts_to_dbl(gpr_timespec ts) { - return (double)ts.tv_sec + 1e-9 * ts.tv_nsec; -} - -/* returns true if the first element in the list */ -static void list_join(grpc_timer *head, grpc_timer *timer) { - timer->next = head; - timer->prev = head->prev; - timer->next->prev = timer->prev->next = timer; -} - -static void list_remove(grpc_timer *timer) { - timer->next->prev = timer->prev; - timer->prev->next = timer->next; -} - -static void swap_adjacent_shards_in_queue(uint32_t first_shard_queue_index) { - timer_shard *temp; - temp = g_shard_queue[first_shard_queue_index]; - g_shard_queue[first_shard_queue_index] = - g_shard_queue[first_shard_queue_index + 1]; - g_shard_queue[first_shard_queue_index + 1] = temp; - g_shard_queue[first_shard_queue_index]->shard_queue_index = - first_shard_queue_index; - g_shard_queue[first_shard_queue_index + 1]->shard_queue_index = - first_shard_queue_index + 1; -} - -static void note_deadline_change(timer_shard *shard) { - while (shard->shard_queue_index > 0 && - shard->min_deadline < - g_shard_queue[shard->shard_queue_index - 1]->min_deadline) { - swap_adjacent_shards_in_queue(shard->shard_queue_index - 1); - } - while (shard->shard_queue_index < NUM_SHARDS - 1 && - shard->min_deadline > - g_shard_queue[shard->shard_queue_index + 1]->min_deadline) { - swap_adjacent_shards_in_queue(shard->shard_queue_index); - } -} - -void grpc_timer_init_unset(grpc_timer *timer) { timer->pending = false; } - -void grpc_timer_init(grpc_exec_ctx *exec_ctx, grpc_timer *timer, - gpr_timespec deadline, grpc_closure *closure, - gpr_timespec now) { - int is_first_timer = 0; - timer_shard *shard = &g_shards[GPR_HASH_POINTER(timer, NUM_SHARDS)]; - GPR_ASSERT(deadline.clock_type == g_clock_type); - GPR_ASSERT(now.clock_type == g_clock_type); - timer->closure = closure; - gpr_atm deadline_atm = timer->deadline = timespec_to_atm_round_up(deadline); - -#ifndef NDEBUG - timer->hash_table_next = NULL; -#endif - - if (GRPC_TRACER_ON(grpc_timer_trace)) { - gpr_log(GPR_DEBUG, "TIMER %p: SET %" PRId64 ".%09d [%" PRIdPTR - "] now %" PRId64 ".%09d [%" PRIdPTR "] call %p[%p]", - timer, deadline.tv_sec, deadline.tv_nsec, deadline_atm, now.tv_sec, - now.tv_nsec, timespec_to_atm_round_down(now), closure, closure->cb); - } - - if (!g_shared_mutables.initialized) { - timer->pending = false; - GRPC_CLOSURE_SCHED(exec_ctx, timer->closure, - GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "Attempt to create timer before initialization")); - return; - } - - gpr_mu_lock(&shard->mu); - timer->pending = true; - if (gpr_time_cmp(deadline, now) <= 0) { - timer->pending = false; - GRPC_CLOSURE_SCHED(exec_ctx, timer->closure, GRPC_ERROR_NONE); - gpr_mu_unlock(&shard->mu); - /* early out */ - return; - } - - grpc_time_averaged_stats_add_sample(&shard->stats, - ts_to_dbl(gpr_time_sub(deadline, now))); - - ADD_TO_HASH_TABLE(timer); - - if (deadline_atm < shard->queue_deadline_cap) { - is_first_timer = grpc_timer_heap_add(&shard->heap, timer); - } else { - timer->heap_index = INVALID_HEAP_INDEX; - list_join(&shard->list, timer); - } - if (GRPC_TRACER_ON(grpc_timer_trace)) { - gpr_log(GPR_DEBUG, " .. add to shard %d with queue_deadline_cap=%" PRIdPTR - " => is_first_timer=%s", - (int)(shard - g_shards), shard->queue_deadline_cap, - is_first_timer ? "true" : "false"); - } - gpr_mu_unlock(&shard->mu); - - /* Deadline may have decreased, we need to adjust the master queue. Note - that there is a potential racy unlocked region here. There could be a - reordering of multiple grpc_timer_init calls, at this point, but the < test - below should ensure that we err on the side of caution. There could - also be a race with grpc_timer_check, which might beat us to the lock. In - that case, it is possible that the timer that we added will have already - run by the time we hold the lock, but that too is a safe error. - Finally, it's possible that the grpc_timer_check that intervened failed to - trigger the new timer because the min_deadline hadn't yet been reduced. - In that case, the timer will simply have to wait for the next - grpc_timer_check. */ - if (is_first_timer) { - gpr_mu_lock(&g_shared_mutables.mu); - if (GRPC_TRACER_ON(grpc_timer_trace)) { - gpr_log(GPR_DEBUG, " .. old shard min_deadline=%" PRIdPTR, - shard->min_deadline); - } - if (deadline_atm < shard->min_deadline) { - gpr_atm old_min_deadline = g_shard_queue[0]->min_deadline; - shard->min_deadline = deadline_atm; - note_deadline_change(shard); - if (shard->shard_queue_index == 0 && deadline_atm < old_min_deadline) { - gpr_atm_no_barrier_store(&g_shared_mutables.min_timer, deadline_atm); - grpc_kick_poller(); - } - } - gpr_mu_unlock(&g_shared_mutables.mu); - } -} - -void grpc_timer_consume_kick(void) { - /* force re-evaluation of last seeen min */ - gpr_tls_set(&g_last_seen_min_timer, 0); -} - -void grpc_timer_cancel(grpc_exec_ctx *exec_ctx, grpc_timer *timer) { - if (!g_shared_mutables.initialized) { - /* must have already been cancelled, also the shard mutex is invalid */ - return; - } - - timer_shard *shard = &g_shards[GPR_HASH_POINTER(timer, NUM_SHARDS)]; - gpr_mu_lock(&shard->mu); - if (GRPC_TRACER_ON(grpc_timer_trace)) { - gpr_log(GPR_DEBUG, "TIMER %p: CANCEL pending=%s", timer, - timer->pending ? "true" : "false"); - } - - if (timer->pending) { - REMOVE_FROM_HASH_TABLE(timer); - - GRPC_CLOSURE_SCHED(exec_ctx, timer->closure, GRPC_ERROR_CANCELLED); - timer->pending = false; - if (timer->heap_index == INVALID_HEAP_INDEX) { - list_remove(timer); - } else { - grpc_timer_heap_remove(&shard->heap, timer); - } - } else { - VALIDATE_NON_PENDING_TIMER(timer); - } - gpr_mu_unlock(&shard->mu); -} - -/* Rebalances the timer shard by computing a new 'queue_deadline_cap' and moving - all relevant timers in shard->list (i.e timers with deadlines earlier than - 'queue_deadline_cap') into into shard->heap. - Returns 'true' if shard->heap has atleast ONE element - REQUIRES: shard->mu locked */ -static int refill_heap(timer_shard *shard, gpr_atm now) { - /* Compute the new queue window width and bound by the limits: */ - double computed_deadline_delta = - grpc_time_averaged_stats_update_average(&shard->stats) * - ADD_DEADLINE_SCALE; - double deadline_delta = - GPR_CLAMP(computed_deadline_delta, MIN_QUEUE_WINDOW_DURATION, - MAX_QUEUE_WINDOW_DURATION); - grpc_timer *timer, *next; - - /* Compute the new cap and put all timers under it into the queue: */ - shard->queue_deadline_cap = - saturating_add(GPR_MAX(now, shard->queue_deadline_cap), - (gpr_atm)(deadline_delta * 1000.0)); - - if (GRPC_TRACER_ON(grpc_timer_check_trace)) { - gpr_log(GPR_DEBUG, " .. shard[%d]->queue_deadline_cap --> %" PRIdPTR, - (int)(shard - g_shards), shard->queue_deadline_cap); - } - for (timer = shard->list.next; timer != &shard->list; timer = next) { - next = timer->next; - - if (timer->deadline < shard->queue_deadline_cap) { - if (GRPC_TRACER_ON(grpc_timer_check_trace)) { - gpr_log(GPR_DEBUG, " .. add timer with deadline %" PRIdPTR " to heap", - timer->deadline); - } - list_remove(timer); - grpc_timer_heap_add(&shard->heap, timer); - } - } - return !grpc_timer_heap_is_empty(&shard->heap); -} - -/* This pops the next non-cancelled timer with deadline <= now from the - queue, or returns NULL if there isn't one. - REQUIRES: shard->mu locked */ -static grpc_timer *pop_one(timer_shard *shard, gpr_atm now) { - grpc_timer *timer; - for (;;) { - if (GRPC_TRACER_ON(grpc_timer_check_trace)) { - gpr_log(GPR_DEBUG, " .. shard[%d]: heap_empty=%s", - (int)(shard - g_shards), - grpc_timer_heap_is_empty(&shard->heap) ? "true" : "false"); - } - if (grpc_timer_heap_is_empty(&shard->heap)) { - if (now < shard->queue_deadline_cap) return NULL; - if (!refill_heap(shard, now)) return NULL; - } - timer = grpc_timer_heap_top(&shard->heap); - if (GRPC_TRACER_ON(grpc_timer_check_trace)) { - gpr_log(GPR_DEBUG, - " .. check top timer deadline=%" PRIdPTR " now=%" PRIdPTR, - timer->deadline, now); - } - if (timer->deadline > now) return NULL; - if (GRPC_TRACER_ON(grpc_timer_trace)) { - gpr_log(GPR_DEBUG, "TIMER %p: FIRE %" PRIdPTR "ms late", timer, - now - timer->deadline); - } - timer->pending = false; - grpc_timer_heap_pop(&shard->heap); - return timer; - } -} - -/* REQUIRES: shard->mu unlocked */ -static size_t pop_timers(grpc_exec_ctx *exec_ctx, timer_shard *shard, - gpr_atm now, gpr_atm *new_min_deadline, - grpc_error *error) { - size_t n = 0; - grpc_timer *timer; - gpr_mu_lock(&shard->mu); - while ((timer = pop_one(shard, now))) { - REMOVE_FROM_HASH_TABLE(timer); - GRPC_CLOSURE_SCHED(exec_ctx, timer->closure, GRPC_ERROR_REF(error)); - n++; - } - *new_min_deadline = compute_min_deadline(shard); - gpr_mu_unlock(&shard->mu); - return n; -} - -static grpc_timer_check_result run_some_expired_timers(grpc_exec_ctx *exec_ctx, - gpr_atm now, - gpr_atm *next, - grpc_error *error) { - grpc_timer_check_result result = GRPC_TIMERS_NOT_CHECKED; - - gpr_atm min_timer = gpr_atm_no_barrier_load(&g_shared_mutables.min_timer); - gpr_tls_set(&g_last_seen_min_timer, min_timer); - if (now < min_timer) { - if (next != NULL) *next = GPR_MIN(*next, min_timer); - return GRPC_TIMERS_CHECKED_AND_EMPTY; - } - - if (gpr_spinlock_trylock(&g_shared_mutables.checker_mu)) { - gpr_mu_lock(&g_shared_mutables.mu); - result = GRPC_TIMERS_CHECKED_AND_EMPTY; - - if (GRPC_TRACER_ON(grpc_timer_check_trace)) { - gpr_log(GPR_DEBUG, " .. shard[%d]->min_deadline = %" PRIdPTR, - (int)(g_shard_queue[0] - g_shards), - g_shard_queue[0]->min_deadline); - } - - while (g_shard_queue[0]->min_deadline < now || - (now != GPR_ATM_MAX && g_shard_queue[0]->min_deadline == now)) { - gpr_atm new_min_deadline; - - /* For efficiency, we pop as many available timers as we can from the - shard. This may violate perfect timer deadline ordering, but that - shouldn't be a big deal because we don't make ordering guarantees. */ - if (pop_timers(exec_ctx, g_shard_queue[0], now, &new_min_deadline, - error) > 0) { - result = GRPC_TIMERS_FIRED; - } - - if (GRPC_TRACER_ON(grpc_timer_check_trace)) { - gpr_log(GPR_DEBUG, - " .. result --> %d" - ", shard[%d]->min_deadline %" PRIdPTR " --> %" PRIdPTR - ", now=%" PRIdPTR, - result, (int)(g_shard_queue[0] - g_shards), - g_shard_queue[0]->min_deadline, new_min_deadline, now); - } - - /* An grpc_timer_init() on the shard could intervene here, adding a new - timer that is earlier than new_min_deadline. However, - grpc_timer_init() will block on the master_lock before it can call - set_min_deadline, so this one will complete first and then the Addtimer - will reduce the min_deadline (perhaps unnecessarily). */ - g_shard_queue[0]->min_deadline = new_min_deadline; - note_deadline_change(g_shard_queue[0]); - } - - if (next) { - *next = GPR_MIN(*next, g_shard_queue[0]->min_deadline); - } - - gpr_atm_no_barrier_store(&g_shared_mutables.min_timer, - g_shard_queue[0]->min_deadline); - gpr_mu_unlock(&g_shared_mutables.mu); - gpr_spinlock_unlock(&g_shared_mutables.checker_mu); - } - - GRPC_ERROR_UNREF(error); - - return result; -} - -grpc_timer_check_result grpc_timer_check(grpc_exec_ctx *exec_ctx, - gpr_timespec now, gpr_timespec *next) { - // prelude - GPR_ASSERT(now.clock_type == g_clock_type); - gpr_atm now_atm = timespec_to_atm_round_down(now); - - /* fetch from a thread-local first: this avoids contention on a globally - mutable cacheline in the common case */ - gpr_atm min_timer = gpr_tls_get(&g_last_seen_min_timer); - if (now_atm < min_timer) { - if (next != NULL) { - *next = - atm_to_timespec(GPR_MIN(timespec_to_atm_round_up(*next), min_timer)); - } - if (GRPC_TRACER_ON(grpc_timer_check_trace)) { - gpr_log(GPR_DEBUG, - "TIMER CHECK SKIP: now_atm=%" PRIdPTR " min_timer=%" PRIdPTR, - now_atm, min_timer); - } - return GRPC_TIMERS_CHECKED_AND_EMPTY; - } - - grpc_error *shutdown_error = - gpr_time_cmp(now, gpr_inf_future(now.clock_type)) != 0 - ? GRPC_ERROR_NONE - : GRPC_ERROR_CREATE_FROM_STATIC_STRING("Shutting down timer system"); - - // tracing - if (GRPC_TRACER_ON(grpc_timer_check_trace)) { - char *next_str; - if (next == NULL) { - next_str = gpr_strdup("NULL"); - } else { - gpr_asprintf(&next_str, "%" PRId64 ".%09d [%" PRIdPTR "]", next->tv_sec, - next->tv_nsec, timespec_to_atm_round_down(*next)); - } - gpr_log(GPR_DEBUG, "TIMER CHECK BEGIN: now=%" PRId64 ".%09d [%" PRIdPTR - "] next=%s tls_min=%" PRIdPTR " glob_min=%" PRIdPTR, - now.tv_sec, now.tv_nsec, now_atm, next_str, - gpr_tls_get(&g_last_seen_min_timer), - gpr_atm_no_barrier_load(&g_shared_mutables.min_timer)); - gpr_free(next_str); - } - // actual code - grpc_timer_check_result r; - gpr_atm next_atm; - if (next == NULL) { - r = run_some_expired_timers(exec_ctx, now_atm, NULL, shutdown_error); - } else { - next_atm = timespec_to_atm_round_down(*next); - r = run_some_expired_timers(exec_ctx, now_atm, &next_atm, shutdown_error); - *next = atm_to_timespec(next_atm); - } - // tracing - if (GRPC_TRACER_ON(grpc_timer_check_trace)) { - char *next_str; - if (next == NULL) { - next_str = gpr_strdup("NULL"); - } else { - gpr_asprintf(&next_str, "%" PRId64 ".%09d [%" PRIdPTR "]", next->tv_sec, - next->tv_nsec, next_atm); - } - gpr_log(GPR_DEBUG, "TIMER CHECK END: r=%d; next=%s", r, next_str); - gpr_free(next_str); - } - return r; -} - -#endif /* GRPC_TIMER_USE_GENERIC */ diff --git a/src/core/lib/iomgr/timer_generic.cc b/src/core/lib/iomgr/timer_generic.cc new file mode 100644 index 0000000000..2472cf26be --- /dev/null +++ b/src/core/lib/iomgr/timer_generic.cc @@ -0,0 +1,705 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_TIMER_USE_GENERIC + +#include "src/core/lib/iomgr/timer.h" + +#include +#include +#include +#include +#include +#include +#include "src/core/lib/debug/trace.h" +#include "src/core/lib/iomgr/time_averaged_stats.h" +#include "src/core/lib/iomgr/timer_heap.h" +#include "src/core/lib/support/spinlock.h" + +#define INVALID_HEAP_INDEX 0xffffffffu + +#define LOG2_NUM_SHARDS 5 +#define NUM_SHARDS (1 << LOG2_NUM_SHARDS) +#define ADD_DEADLINE_SCALE 0.33 +#define MIN_QUEUE_WINDOW_DURATION 0.01 +#define MAX_QUEUE_WINDOW_DURATION 1 + +grpc_tracer_flag grpc_timer_trace = GRPC_TRACER_INITIALIZER(false, "timer"); +grpc_tracer_flag grpc_timer_check_trace = + GRPC_TRACER_INITIALIZER(false, "timer_check"); + +/* A "timer shard". Contains a 'heap' and a 'list' of timers. All timers with + * deadlines earlier than 'queue_deadline" cap are maintained in the heap and + * others are maintained in the list (unordered). This helps to keep the number + * of elements in the heap low. + * + * The 'queue_deadline_cap' gets recomputed periodically based on the timer + * stats maintained in 'stats' and the relevant timers are then moved from the + * 'list' to 'heap' + */ +typedef struct { + gpr_mu mu; + grpc_time_averaged_stats stats; + /* All and only timers with deadlines <= this will be in the heap. */ + gpr_atm queue_deadline_cap; + /* The deadline of the next timer due in this shard */ + gpr_atm min_deadline; + /* Index of this timer_shard in the g_shard_queue */ + uint32_t shard_queue_index; + /* This holds all timers with deadlines < queue_deadline_cap. Timers in this + list have the top bit of their deadline set to 0. */ + grpc_timer_heap heap; + /* This holds timers whose deadline is >= queue_deadline_cap. */ + grpc_timer list; +} timer_shard; + +/* Array of timer shards. Whenever a timer (grpc_timer *) is added, its address + * is hashed to select the timer shard to add the timer to */ +static timer_shard g_shards[NUM_SHARDS]; + +/* Maintains a sorted list of timer shards (sorted by their min_deadline, i.e + * the deadline of the next timer in each shard). + * Access to this is protected by g_shared_mutables.mu */ +static timer_shard *g_shard_queue[NUM_SHARDS]; + +#ifndef NDEBUG + +/* == Hash table for duplicate timer detection == */ + +#define NUM_HASH_BUCKETS 1009 /* Prime number close to 1000 */ + +static gpr_mu g_hash_mu[NUM_HASH_BUCKETS]; /* One mutex per bucket */ +static grpc_timer *g_timer_ht[NUM_HASH_BUCKETS] = {NULL}; + +static void init_timer_ht() { + for (int i = 0; i < NUM_HASH_BUCKETS; i++) { + gpr_mu_init(&g_hash_mu[i]); + } +} + +static bool is_in_ht(grpc_timer *t) { + size_t i = GPR_HASH_POINTER(t, NUM_HASH_BUCKETS); + + gpr_mu_lock(&g_hash_mu[i]); + grpc_timer *p = g_timer_ht[i]; + while (p != NULL && p != t) { + p = p->hash_table_next; + } + gpr_mu_unlock(&g_hash_mu[i]); + + return (p == t); +} + +static void add_to_ht(grpc_timer *t) { + GPR_ASSERT(!t->hash_table_next); + size_t i = GPR_HASH_POINTER(t, NUM_HASH_BUCKETS); + + gpr_mu_lock(&g_hash_mu[i]); + grpc_timer *p = g_timer_ht[i]; + while (p != NULL && p != t) { + p = p->hash_table_next; + } + + if (p == t) { + grpc_closure *c = t->closure; + gpr_log(GPR_ERROR, + "** Duplicate timer (%p) being added. Closure: (%p), created at: " + "(%s:%d), scheduled at: (%s:%d) **", + t, c, c->file_created, c->line_created, c->file_initiated, + c->line_initiated); + abort(); + } + + /* Timer not present in the bucket. Insert at head of the list */ + t->hash_table_next = g_timer_ht[i]; + g_timer_ht[i] = t; + gpr_mu_unlock(&g_hash_mu[i]); +} + +static void remove_from_ht(grpc_timer *t) { + size_t i = GPR_HASH_POINTER(t, NUM_HASH_BUCKETS); + bool removed = false; + + gpr_mu_lock(&g_hash_mu[i]); + if (g_timer_ht[i] == t) { + g_timer_ht[i] = g_timer_ht[i]->hash_table_next; + removed = true; + } else if (g_timer_ht[i] != NULL) { + grpc_timer *p = g_timer_ht[i]; + while (p->hash_table_next != NULL && p->hash_table_next != t) { + p = p->hash_table_next; + } + + if (p->hash_table_next == t) { + p->hash_table_next = t->hash_table_next; + removed = true; + } + } + gpr_mu_unlock(&g_hash_mu[i]); + + if (!removed) { + grpc_closure *c = t->closure; + gpr_log(GPR_ERROR, + "** Removing timer (%p) that is not added to hash table. Closure " + "(%p), created at: (%s:%d), scheduled at: (%s:%d) **", + t, c, c->file_created, c->line_created, c->file_initiated, + c->line_initiated); + abort(); + } + + t->hash_table_next = NULL; +} + +/* If a timer is added to a timer shard (either heap or a list), it cannot + * be pending. A timer is added to hash table only-if it is added to the + * timer shard. + * Therefore, if timer->pending is false, it cannot be in hash table */ +static void validate_non_pending_timer(grpc_timer *t) { + if (!t->pending && is_in_ht(t)) { + grpc_closure *c = t->closure; + gpr_log(GPR_ERROR, + "** gpr_timer_cancel() called on a non-pending timer (%p) which " + "is in the hash table. Closure: (%p), created at: (%s:%d), " + "scheduled at: (%s:%d) **", + t, c, c->file_created, c->line_created, c->file_initiated, + c->line_initiated); + abort(); + } +} + +#define INIT_TIMER_HASH_TABLE() init_timer_ht() +#define ADD_TO_HASH_TABLE(t) add_to_ht((t)) +#define REMOVE_FROM_HASH_TABLE(t) remove_from_ht((t)) +#define VALIDATE_NON_PENDING_TIMER(t) validate_non_pending_timer((t)) + +#else + +#define INIT_TIMER_HASH_TABLE() +#define ADD_TO_HASH_TABLE(t) +#define REMOVE_FROM_HASH_TABLE(t) +#define VALIDATE_NON_PENDING_TIMER(t) + +#endif + +/* Thread local variable that stores the deadline of the next timer the thread + * has last-seen. This is an optimization to prevent the thread from checking + * shared_mutables.min_timer (which requires acquiring shared_mutables.mu lock, + * an expensive operation) */ +GPR_TLS_DECL(g_last_seen_min_timer); + +struct shared_mutables { + /* The deadline of the next timer due across all timer shards */ + gpr_atm min_timer; + /* Allow only one run_some_expired_timers at once */ + gpr_spinlock checker_mu; + bool initialized; + /* Protects g_shard_queue (and the shared_mutables struct itself) */ + gpr_mu mu; +} GPR_ALIGN_STRUCT(GPR_CACHELINE_SIZE); + +static struct shared_mutables g_shared_mutables; + +static gpr_clock_type g_clock_type; +static gpr_timespec g_start_time; + +static gpr_atm saturating_add(gpr_atm a, gpr_atm b) { + if (a > GPR_ATM_MAX - b) { + return GPR_ATM_MAX; + } + return a + b; +} + +static grpc_timer_check_result run_some_expired_timers(grpc_exec_ctx *exec_ctx, + gpr_atm now, + gpr_atm *next, + grpc_error *error); + +static gpr_timespec dbl_to_ts(double d) { + gpr_timespec ts; + ts.tv_sec = (int64_t)d; + ts.tv_nsec = (int32_t)(1e9 * (d - (double)ts.tv_sec)); + ts.clock_type = GPR_TIMESPAN; + return ts; +} + +static gpr_atm timespec_to_atm_round_up(gpr_timespec ts) { + ts = gpr_time_sub(ts, g_start_time); + double x = GPR_MS_PER_SEC * (double)ts.tv_sec + + (double)ts.tv_nsec / GPR_NS_PER_MS + + (double)(GPR_NS_PER_SEC - 1) / (double)GPR_NS_PER_SEC; + if (x < 0) return 0; + if (x > GPR_ATM_MAX) return GPR_ATM_MAX; + return (gpr_atm)x; +} + +static gpr_atm timespec_to_atm_round_down(gpr_timespec ts) { + ts = gpr_time_sub(ts, g_start_time); + double x = + GPR_MS_PER_SEC * (double)ts.tv_sec + (double)ts.tv_nsec / GPR_NS_PER_MS; + if (x < 0) return 0; + if (x > GPR_ATM_MAX) return GPR_ATM_MAX; + return (gpr_atm)x; +} + +static gpr_timespec atm_to_timespec(gpr_atm x) { + return gpr_time_add(g_start_time, dbl_to_ts((double)x / 1000.0)); +} + +static gpr_atm compute_min_deadline(timer_shard *shard) { + return grpc_timer_heap_is_empty(&shard->heap) + ? saturating_add(shard->queue_deadline_cap, 1) + : grpc_timer_heap_top(&shard->heap)->deadline; +} + +void grpc_timer_list_init(gpr_timespec now) { + uint32_t i; + + g_shared_mutables.initialized = true; + g_shared_mutables.checker_mu = GPR_SPINLOCK_INITIALIZER; + gpr_mu_init(&g_shared_mutables.mu); + g_clock_type = now.clock_type; + g_start_time = now; + g_shared_mutables.min_timer = timespec_to_atm_round_down(now); + gpr_tls_init(&g_last_seen_min_timer); + gpr_tls_set(&g_last_seen_min_timer, 0); + grpc_register_tracer(&grpc_timer_trace); + grpc_register_tracer(&grpc_timer_check_trace); + + for (i = 0; i < NUM_SHARDS; i++) { + timer_shard *shard = &g_shards[i]; + gpr_mu_init(&shard->mu); + grpc_time_averaged_stats_init(&shard->stats, 1.0 / ADD_DEADLINE_SCALE, 0.1, + 0.5); + shard->queue_deadline_cap = g_shared_mutables.min_timer; + shard->shard_queue_index = i; + grpc_timer_heap_init(&shard->heap); + shard->list.next = shard->list.prev = &shard->list; + shard->min_deadline = compute_min_deadline(shard); + g_shard_queue[i] = shard; + } + + INIT_TIMER_HASH_TABLE(); +} + +void grpc_timer_list_shutdown(grpc_exec_ctx *exec_ctx) { + int i; + run_some_expired_timers( + exec_ctx, GPR_ATM_MAX, NULL, + GRPC_ERROR_CREATE_FROM_STATIC_STRING("Timer list shutdown")); + for (i = 0; i < NUM_SHARDS; i++) { + timer_shard *shard = &g_shards[i]; + gpr_mu_destroy(&shard->mu); + grpc_timer_heap_destroy(&shard->heap); + } + gpr_mu_destroy(&g_shared_mutables.mu); + gpr_tls_destroy(&g_last_seen_min_timer); + g_shared_mutables.initialized = false; +} + +static double ts_to_dbl(gpr_timespec ts) { + return (double)ts.tv_sec + 1e-9 * ts.tv_nsec; +} + +/* returns true if the first element in the list */ +static void list_join(grpc_timer *head, grpc_timer *timer) { + timer->next = head; + timer->prev = head->prev; + timer->next->prev = timer->prev->next = timer; +} + +static void list_remove(grpc_timer *timer) { + timer->next->prev = timer->prev; + timer->prev->next = timer->next; +} + +static void swap_adjacent_shards_in_queue(uint32_t first_shard_queue_index) { + timer_shard *temp; + temp = g_shard_queue[first_shard_queue_index]; + g_shard_queue[first_shard_queue_index] = + g_shard_queue[first_shard_queue_index + 1]; + g_shard_queue[first_shard_queue_index + 1] = temp; + g_shard_queue[first_shard_queue_index]->shard_queue_index = + first_shard_queue_index; + g_shard_queue[first_shard_queue_index + 1]->shard_queue_index = + first_shard_queue_index + 1; +} + +static void note_deadline_change(timer_shard *shard) { + while (shard->shard_queue_index > 0 && + shard->min_deadline < + g_shard_queue[shard->shard_queue_index - 1]->min_deadline) { + swap_adjacent_shards_in_queue(shard->shard_queue_index - 1); + } + while (shard->shard_queue_index < NUM_SHARDS - 1 && + shard->min_deadline > + g_shard_queue[shard->shard_queue_index + 1]->min_deadline) { + swap_adjacent_shards_in_queue(shard->shard_queue_index); + } +} + +void grpc_timer_init_unset(grpc_timer *timer) { timer->pending = false; } + +void grpc_timer_init(grpc_exec_ctx *exec_ctx, grpc_timer *timer, + gpr_timespec deadline, grpc_closure *closure, + gpr_timespec now) { + int is_first_timer = 0; + timer_shard *shard = &g_shards[GPR_HASH_POINTER(timer, NUM_SHARDS)]; + GPR_ASSERT(deadline.clock_type == g_clock_type); + GPR_ASSERT(now.clock_type == g_clock_type); + timer->closure = closure; + gpr_atm deadline_atm = timer->deadline = timespec_to_atm_round_up(deadline); + +#ifndef NDEBUG + timer->hash_table_next = NULL; +#endif + + if (GRPC_TRACER_ON(grpc_timer_trace)) { + gpr_log(GPR_DEBUG, "TIMER %p: SET %" PRId64 ".%09d [%" PRIdPTR + "] now %" PRId64 ".%09d [%" PRIdPTR "] call %p[%p]", + timer, deadline.tv_sec, deadline.tv_nsec, deadline_atm, now.tv_sec, + now.tv_nsec, timespec_to_atm_round_down(now), closure, closure->cb); + } + + if (!g_shared_mutables.initialized) { + timer->pending = false; + GRPC_CLOSURE_SCHED(exec_ctx, timer->closure, + GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "Attempt to create timer before initialization")); + return; + } + + gpr_mu_lock(&shard->mu); + timer->pending = true; + if (gpr_time_cmp(deadline, now) <= 0) { + timer->pending = false; + GRPC_CLOSURE_SCHED(exec_ctx, timer->closure, GRPC_ERROR_NONE); + gpr_mu_unlock(&shard->mu); + /* early out */ + return; + } + + grpc_time_averaged_stats_add_sample(&shard->stats, + ts_to_dbl(gpr_time_sub(deadline, now))); + + ADD_TO_HASH_TABLE(timer); + + if (deadline_atm < shard->queue_deadline_cap) { + is_first_timer = grpc_timer_heap_add(&shard->heap, timer); + } else { + timer->heap_index = INVALID_HEAP_INDEX; + list_join(&shard->list, timer); + } + if (GRPC_TRACER_ON(grpc_timer_trace)) { + gpr_log(GPR_DEBUG, " .. add to shard %d with queue_deadline_cap=%" PRIdPTR + " => is_first_timer=%s", + (int)(shard - g_shards), shard->queue_deadline_cap, + is_first_timer ? "true" : "false"); + } + gpr_mu_unlock(&shard->mu); + + /* Deadline may have decreased, we need to adjust the master queue. Note + that there is a potential racy unlocked region here. There could be a + reordering of multiple grpc_timer_init calls, at this point, but the < test + below should ensure that we err on the side of caution. There could + also be a race with grpc_timer_check, which might beat us to the lock. In + that case, it is possible that the timer that we added will have already + run by the time we hold the lock, but that too is a safe error. + Finally, it's possible that the grpc_timer_check that intervened failed to + trigger the new timer because the min_deadline hadn't yet been reduced. + In that case, the timer will simply have to wait for the next + grpc_timer_check. */ + if (is_first_timer) { + gpr_mu_lock(&g_shared_mutables.mu); + if (GRPC_TRACER_ON(grpc_timer_trace)) { + gpr_log(GPR_DEBUG, " .. old shard min_deadline=%" PRIdPTR, + shard->min_deadline); + } + if (deadline_atm < shard->min_deadline) { + gpr_atm old_min_deadline = g_shard_queue[0]->min_deadline; + shard->min_deadline = deadline_atm; + note_deadline_change(shard); + if (shard->shard_queue_index == 0 && deadline_atm < old_min_deadline) { + gpr_atm_no_barrier_store(&g_shared_mutables.min_timer, deadline_atm); + grpc_kick_poller(); + } + } + gpr_mu_unlock(&g_shared_mutables.mu); + } +} + +void grpc_timer_consume_kick(void) { + /* force re-evaluation of last seeen min */ + gpr_tls_set(&g_last_seen_min_timer, 0); +} + +void grpc_timer_cancel(grpc_exec_ctx *exec_ctx, grpc_timer *timer) { + if (!g_shared_mutables.initialized) { + /* must have already been cancelled, also the shard mutex is invalid */ + return; + } + + timer_shard *shard = &g_shards[GPR_HASH_POINTER(timer, NUM_SHARDS)]; + gpr_mu_lock(&shard->mu); + if (GRPC_TRACER_ON(grpc_timer_trace)) { + gpr_log(GPR_DEBUG, "TIMER %p: CANCEL pending=%s", timer, + timer->pending ? "true" : "false"); + } + + if (timer->pending) { + REMOVE_FROM_HASH_TABLE(timer); + + GRPC_CLOSURE_SCHED(exec_ctx, timer->closure, GRPC_ERROR_CANCELLED); + timer->pending = false; + if (timer->heap_index == INVALID_HEAP_INDEX) { + list_remove(timer); + } else { + grpc_timer_heap_remove(&shard->heap, timer); + } + } else { + VALIDATE_NON_PENDING_TIMER(timer); + } + gpr_mu_unlock(&shard->mu); +} + +/* Rebalances the timer shard by computing a new 'queue_deadline_cap' and moving + all relevant timers in shard->list (i.e timers with deadlines earlier than + 'queue_deadline_cap') into into shard->heap. + Returns 'true' if shard->heap has atleast ONE element + REQUIRES: shard->mu locked */ +static int refill_heap(timer_shard *shard, gpr_atm now) { + /* Compute the new queue window width and bound by the limits: */ + double computed_deadline_delta = + grpc_time_averaged_stats_update_average(&shard->stats) * + ADD_DEADLINE_SCALE; + double deadline_delta = + GPR_CLAMP(computed_deadline_delta, MIN_QUEUE_WINDOW_DURATION, + MAX_QUEUE_WINDOW_DURATION); + grpc_timer *timer, *next; + + /* Compute the new cap and put all timers under it into the queue: */ + shard->queue_deadline_cap = + saturating_add(GPR_MAX(now, shard->queue_deadline_cap), + (gpr_atm)(deadline_delta * 1000.0)); + + if (GRPC_TRACER_ON(grpc_timer_check_trace)) { + gpr_log(GPR_DEBUG, " .. shard[%d]->queue_deadline_cap --> %" PRIdPTR, + (int)(shard - g_shards), shard->queue_deadline_cap); + } + for (timer = shard->list.next; timer != &shard->list; timer = next) { + next = timer->next; + + if (timer->deadline < shard->queue_deadline_cap) { + if (GRPC_TRACER_ON(grpc_timer_check_trace)) { + gpr_log(GPR_DEBUG, " .. add timer with deadline %" PRIdPTR " to heap", + timer->deadline); + } + list_remove(timer); + grpc_timer_heap_add(&shard->heap, timer); + } + } + return !grpc_timer_heap_is_empty(&shard->heap); +} + +/* This pops the next non-cancelled timer with deadline <= now from the + queue, or returns NULL if there isn't one. + REQUIRES: shard->mu locked */ +static grpc_timer *pop_one(timer_shard *shard, gpr_atm now) { + grpc_timer *timer; + for (;;) { + if (GRPC_TRACER_ON(grpc_timer_check_trace)) { + gpr_log(GPR_DEBUG, " .. shard[%d]: heap_empty=%s", + (int)(shard - g_shards), + grpc_timer_heap_is_empty(&shard->heap) ? "true" : "false"); + } + if (grpc_timer_heap_is_empty(&shard->heap)) { + if (now < shard->queue_deadline_cap) return NULL; + if (!refill_heap(shard, now)) return NULL; + } + timer = grpc_timer_heap_top(&shard->heap); + if (GRPC_TRACER_ON(grpc_timer_check_trace)) { + gpr_log(GPR_DEBUG, + " .. check top timer deadline=%" PRIdPTR " now=%" PRIdPTR, + timer->deadline, now); + } + if (timer->deadline > now) return NULL; + if (GRPC_TRACER_ON(grpc_timer_trace)) { + gpr_log(GPR_DEBUG, "TIMER %p: FIRE %" PRIdPTR "ms late", timer, + now - timer->deadline); + } + timer->pending = false; + grpc_timer_heap_pop(&shard->heap); + return timer; + } +} + +/* REQUIRES: shard->mu unlocked */ +static size_t pop_timers(grpc_exec_ctx *exec_ctx, timer_shard *shard, + gpr_atm now, gpr_atm *new_min_deadline, + grpc_error *error) { + size_t n = 0; + grpc_timer *timer; + gpr_mu_lock(&shard->mu); + while ((timer = pop_one(shard, now))) { + REMOVE_FROM_HASH_TABLE(timer); + GRPC_CLOSURE_SCHED(exec_ctx, timer->closure, GRPC_ERROR_REF(error)); + n++; + } + *new_min_deadline = compute_min_deadline(shard); + gpr_mu_unlock(&shard->mu); + return n; +} + +static grpc_timer_check_result run_some_expired_timers(grpc_exec_ctx *exec_ctx, + gpr_atm now, + gpr_atm *next, + grpc_error *error) { + grpc_timer_check_result result = GRPC_TIMERS_NOT_CHECKED; + + gpr_atm min_timer = gpr_atm_no_barrier_load(&g_shared_mutables.min_timer); + gpr_tls_set(&g_last_seen_min_timer, min_timer); + if (now < min_timer) { + if (next != NULL) *next = GPR_MIN(*next, min_timer); + return GRPC_TIMERS_CHECKED_AND_EMPTY; + } + + if (gpr_spinlock_trylock(&g_shared_mutables.checker_mu)) { + gpr_mu_lock(&g_shared_mutables.mu); + result = GRPC_TIMERS_CHECKED_AND_EMPTY; + + if (GRPC_TRACER_ON(grpc_timer_check_trace)) { + gpr_log(GPR_DEBUG, " .. shard[%d]->min_deadline = %" PRIdPTR, + (int)(g_shard_queue[0] - g_shards), + g_shard_queue[0]->min_deadline); + } + + while (g_shard_queue[0]->min_deadline < now || + (now != GPR_ATM_MAX && g_shard_queue[0]->min_deadline == now)) { + gpr_atm new_min_deadline; + + /* For efficiency, we pop as many available timers as we can from the + shard. This may violate perfect timer deadline ordering, but that + shouldn't be a big deal because we don't make ordering guarantees. */ + if (pop_timers(exec_ctx, g_shard_queue[0], now, &new_min_deadline, + error) > 0) { + result = GRPC_TIMERS_FIRED; + } + + if (GRPC_TRACER_ON(grpc_timer_check_trace)) { + gpr_log(GPR_DEBUG, + " .. result --> %d" + ", shard[%d]->min_deadline %" PRIdPTR " --> %" PRIdPTR + ", now=%" PRIdPTR, + result, (int)(g_shard_queue[0] - g_shards), + g_shard_queue[0]->min_deadline, new_min_deadline, now); + } + + /* An grpc_timer_init() on the shard could intervene here, adding a new + timer that is earlier than new_min_deadline. However, + grpc_timer_init() will block on the master_lock before it can call + set_min_deadline, so this one will complete first and then the Addtimer + will reduce the min_deadline (perhaps unnecessarily). */ + g_shard_queue[0]->min_deadline = new_min_deadline; + note_deadline_change(g_shard_queue[0]); + } + + if (next) { + *next = GPR_MIN(*next, g_shard_queue[0]->min_deadline); + } + + gpr_atm_no_barrier_store(&g_shared_mutables.min_timer, + g_shard_queue[0]->min_deadline); + gpr_mu_unlock(&g_shared_mutables.mu); + gpr_spinlock_unlock(&g_shared_mutables.checker_mu); + } + + GRPC_ERROR_UNREF(error); + + return result; +} + +grpc_timer_check_result grpc_timer_check(grpc_exec_ctx *exec_ctx, + gpr_timespec now, gpr_timespec *next) { + // prelude + GPR_ASSERT(now.clock_type == g_clock_type); + gpr_atm now_atm = timespec_to_atm_round_down(now); + + /* fetch from a thread-local first: this avoids contention on a globally + mutable cacheline in the common case */ + gpr_atm min_timer = gpr_tls_get(&g_last_seen_min_timer); + if (now_atm < min_timer) { + if (next != NULL) { + *next = + atm_to_timespec(GPR_MIN(timespec_to_atm_round_up(*next), min_timer)); + } + if (GRPC_TRACER_ON(grpc_timer_check_trace)) { + gpr_log(GPR_DEBUG, + "TIMER CHECK SKIP: now_atm=%" PRIdPTR " min_timer=%" PRIdPTR, + now_atm, min_timer); + } + return GRPC_TIMERS_CHECKED_AND_EMPTY; + } + + grpc_error *shutdown_error = + gpr_time_cmp(now, gpr_inf_future(now.clock_type)) != 0 + ? GRPC_ERROR_NONE + : GRPC_ERROR_CREATE_FROM_STATIC_STRING("Shutting down timer system"); + + // tracing + if (GRPC_TRACER_ON(grpc_timer_check_trace)) { + char *next_str; + if (next == NULL) { + next_str = gpr_strdup("NULL"); + } else { + gpr_asprintf(&next_str, "%" PRId64 ".%09d [%" PRIdPTR "]", next->tv_sec, + next->tv_nsec, timespec_to_atm_round_down(*next)); + } + gpr_log(GPR_DEBUG, "TIMER CHECK BEGIN: now=%" PRId64 ".%09d [%" PRIdPTR + "] next=%s tls_min=%" PRIdPTR " glob_min=%" PRIdPTR, + now.tv_sec, now.tv_nsec, now_atm, next_str, + gpr_tls_get(&g_last_seen_min_timer), + gpr_atm_no_barrier_load(&g_shared_mutables.min_timer)); + gpr_free(next_str); + } + // actual code + grpc_timer_check_result r; + gpr_atm next_atm; + if (next == NULL) { + r = run_some_expired_timers(exec_ctx, now_atm, NULL, shutdown_error); + } else { + next_atm = timespec_to_atm_round_down(*next); + r = run_some_expired_timers(exec_ctx, now_atm, &next_atm, shutdown_error); + *next = atm_to_timespec(next_atm); + } + // tracing + if (GRPC_TRACER_ON(grpc_timer_check_trace)) { + char *next_str; + if (next == NULL) { + next_str = gpr_strdup("NULL"); + } else { + gpr_asprintf(&next_str, "%" PRId64 ".%09d [%" PRIdPTR "]", next->tv_sec, + next->tv_nsec, next_atm); + } + gpr_log(GPR_DEBUG, "TIMER CHECK END: r=%d; next=%s", r, next_str); + gpr_free(next_str); + } + return r; +} + +#endif /* GRPC_TIMER_USE_GENERIC */ diff --git a/src/core/lib/iomgr/timer_heap.c b/src/core/lib/iomgr/timer_heap.c deleted file mode 100644 index 2648d5da5d..0000000000 --- a/src/core/lib/iomgr/timer_heap.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_TIMER_USE_GENERIC - -#include "src/core/lib/iomgr/timer_heap.h" - -#include - -#include -#include - -/* Adjusts a heap so as to move a hole at position i closer to the root, - until a suitable position is found for element t. Then, copies t into that - position. This functor is called each time immediately after modifying a - value in the underlying container, with the offset of the modified element as - its argument. */ -static void adjust_upwards(grpc_timer **first, uint32_t i, grpc_timer *t) { - while (i > 0) { - uint32_t parent = (uint32_t)(((int)i - 1) / 2); - if (first[parent]->deadline <= t->deadline) break; - first[i] = first[parent]; - first[i]->heap_index = i; - i = parent; - } - first[i] = t; - t->heap_index = i; -} - -/* Adjusts a heap so as to move a hole at position i farther away from the root, - until a suitable position is found for element t. Then, copies t into that - position. */ -static void adjust_downwards(grpc_timer **first, uint32_t i, uint32_t length, - grpc_timer *t) { - for (;;) { - uint32_t left_child = 1u + 2u * i; - if (left_child >= length) break; - uint32_t right_child = left_child + 1; - uint32_t next_i = - right_child < length && - first[left_child]->deadline > first[right_child]->deadline - ? right_child - : left_child; - if (t->deadline <= first[next_i]->deadline) break; - first[i] = first[next_i]; - first[i]->heap_index = i; - i = next_i; - } - first[i] = t; - t->heap_index = i; -} - -#define SHRINK_MIN_ELEMS 8 -#define SHRINK_FULLNESS_FACTOR 2 - -static void maybe_shrink(grpc_timer_heap *heap) { - if (heap->timer_count >= 8 && - heap->timer_count <= heap->timer_capacity / SHRINK_FULLNESS_FACTOR / 2) { - heap->timer_capacity = heap->timer_count * SHRINK_FULLNESS_FACTOR; - heap->timers = (grpc_timer **)gpr_realloc( - heap->timers, heap->timer_capacity * sizeof(grpc_timer *)); - } -} - -static void note_changed_priority(grpc_timer_heap *heap, grpc_timer *timer) { - uint32_t i = timer->heap_index; - uint32_t parent = (uint32_t)(((int)i - 1) / 2); - if (heap->timers[parent]->deadline > timer->deadline) { - adjust_upwards(heap->timers, i, timer); - } else { - adjust_downwards(heap->timers, i, heap->timer_count, timer); - } -} - -void grpc_timer_heap_init(grpc_timer_heap *heap) { - memset(heap, 0, sizeof(*heap)); -} - -void grpc_timer_heap_destroy(grpc_timer_heap *heap) { gpr_free(heap->timers); } - -int grpc_timer_heap_add(grpc_timer_heap *heap, grpc_timer *timer) { - if (heap->timer_count == heap->timer_capacity) { - heap->timer_capacity = - GPR_MAX(heap->timer_capacity + 1, heap->timer_capacity * 3 / 2); - heap->timers = (grpc_timer **)gpr_realloc( - heap->timers, heap->timer_capacity * sizeof(grpc_timer *)); - } - timer->heap_index = heap->timer_count; - adjust_upwards(heap->timers, heap->timer_count, timer); - heap->timer_count++; - return timer->heap_index == 0; -} - -void grpc_timer_heap_remove(grpc_timer_heap *heap, grpc_timer *timer) { - uint32_t i = timer->heap_index; - if (i == heap->timer_count - 1) { - heap->timer_count--; - maybe_shrink(heap); - return; - } - heap->timers[i] = heap->timers[heap->timer_count - 1]; - heap->timers[i]->heap_index = i; - heap->timer_count--; - maybe_shrink(heap); - note_changed_priority(heap, heap->timers[i]); -} - -int grpc_timer_heap_is_empty(grpc_timer_heap *heap) { - return heap->timer_count == 0; -} - -grpc_timer *grpc_timer_heap_top(grpc_timer_heap *heap) { - return heap->timers[0]; -} - -void grpc_timer_heap_pop(grpc_timer_heap *heap) { - grpc_timer_heap_remove(heap, grpc_timer_heap_top(heap)); -} - -#endif /* GRPC_TIMER_USE_GENERIC */ diff --git a/src/core/lib/iomgr/timer_heap.cc b/src/core/lib/iomgr/timer_heap.cc new file mode 100644 index 0000000000..2648d5da5d --- /dev/null +++ b/src/core/lib/iomgr/timer_heap.cc @@ -0,0 +1,137 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_TIMER_USE_GENERIC + +#include "src/core/lib/iomgr/timer_heap.h" + +#include + +#include +#include + +/* Adjusts a heap so as to move a hole at position i closer to the root, + until a suitable position is found for element t. Then, copies t into that + position. This functor is called each time immediately after modifying a + value in the underlying container, with the offset of the modified element as + its argument. */ +static void adjust_upwards(grpc_timer **first, uint32_t i, grpc_timer *t) { + while (i > 0) { + uint32_t parent = (uint32_t)(((int)i - 1) / 2); + if (first[parent]->deadline <= t->deadline) break; + first[i] = first[parent]; + first[i]->heap_index = i; + i = parent; + } + first[i] = t; + t->heap_index = i; +} + +/* Adjusts a heap so as to move a hole at position i farther away from the root, + until a suitable position is found for element t. Then, copies t into that + position. */ +static void adjust_downwards(grpc_timer **first, uint32_t i, uint32_t length, + grpc_timer *t) { + for (;;) { + uint32_t left_child = 1u + 2u * i; + if (left_child >= length) break; + uint32_t right_child = left_child + 1; + uint32_t next_i = + right_child < length && + first[left_child]->deadline > first[right_child]->deadline + ? right_child + : left_child; + if (t->deadline <= first[next_i]->deadline) break; + first[i] = first[next_i]; + first[i]->heap_index = i; + i = next_i; + } + first[i] = t; + t->heap_index = i; +} + +#define SHRINK_MIN_ELEMS 8 +#define SHRINK_FULLNESS_FACTOR 2 + +static void maybe_shrink(grpc_timer_heap *heap) { + if (heap->timer_count >= 8 && + heap->timer_count <= heap->timer_capacity / SHRINK_FULLNESS_FACTOR / 2) { + heap->timer_capacity = heap->timer_count * SHRINK_FULLNESS_FACTOR; + heap->timers = (grpc_timer **)gpr_realloc( + heap->timers, heap->timer_capacity * sizeof(grpc_timer *)); + } +} + +static void note_changed_priority(grpc_timer_heap *heap, grpc_timer *timer) { + uint32_t i = timer->heap_index; + uint32_t parent = (uint32_t)(((int)i - 1) / 2); + if (heap->timers[parent]->deadline > timer->deadline) { + adjust_upwards(heap->timers, i, timer); + } else { + adjust_downwards(heap->timers, i, heap->timer_count, timer); + } +} + +void grpc_timer_heap_init(grpc_timer_heap *heap) { + memset(heap, 0, sizeof(*heap)); +} + +void grpc_timer_heap_destroy(grpc_timer_heap *heap) { gpr_free(heap->timers); } + +int grpc_timer_heap_add(grpc_timer_heap *heap, grpc_timer *timer) { + if (heap->timer_count == heap->timer_capacity) { + heap->timer_capacity = + GPR_MAX(heap->timer_capacity + 1, heap->timer_capacity * 3 / 2); + heap->timers = (grpc_timer **)gpr_realloc( + heap->timers, heap->timer_capacity * sizeof(grpc_timer *)); + } + timer->heap_index = heap->timer_count; + adjust_upwards(heap->timers, heap->timer_count, timer); + heap->timer_count++; + return timer->heap_index == 0; +} + +void grpc_timer_heap_remove(grpc_timer_heap *heap, grpc_timer *timer) { + uint32_t i = timer->heap_index; + if (i == heap->timer_count - 1) { + heap->timer_count--; + maybe_shrink(heap); + return; + } + heap->timers[i] = heap->timers[heap->timer_count - 1]; + heap->timers[i]->heap_index = i; + heap->timer_count--; + maybe_shrink(heap); + note_changed_priority(heap, heap->timers[i]); +} + +int grpc_timer_heap_is_empty(grpc_timer_heap *heap) { + return heap->timer_count == 0; +} + +grpc_timer *grpc_timer_heap_top(grpc_timer_heap *heap) { + return heap->timers[0]; +} + +void grpc_timer_heap_pop(grpc_timer_heap *heap) { + grpc_timer_heap_remove(heap, grpc_timer_heap_top(heap)); +} + +#endif /* GRPC_TIMER_USE_GENERIC */ diff --git a/src/core/lib/iomgr/timer_manager.c b/src/core/lib/iomgr/timer_manager.c deleted file mode 100644 index 04ca44563d..0000000000 --- a/src/core/lib/iomgr/timer_manager.c +++ /dev/null @@ -1,354 +0,0 @@ -/* - * - * Copyright 2017 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/timer_manager.h" - -#include -#include -#include - -#include "src/core/lib/debug/trace.h" -#include "src/core/lib/iomgr/timer.h" - -typedef struct completed_thread { - gpr_thd_id t; - struct completed_thread *next; -} completed_thread; - -extern grpc_tracer_flag grpc_timer_check_trace; - -// global mutex -static gpr_mu g_mu; -// are we multi-threaded -static bool g_threaded; -// cv to wait until a thread is needed -static gpr_cv g_cv_wait; -// cv for notification when threading ends -static gpr_cv g_cv_shutdown; -// number of threads in the system -static int g_thread_count; -// number of threads sitting around waiting -static int g_waiter_count; -// linked list of threads that have completed (and need joining) -static completed_thread *g_completed_threads; -// was the manager kicked by the timer system -static bool g_kicked; -// is there a thread waiting until the next timer should fire? -static bool g_has_timed_waiter; -// the deadline of the current timed waiter thread (only relevant if -// g_has_timed_waiter is true) -static gpr_timespec g_timed_waiter_deadline; -// generation counter to track which thread is waiting for the next timer -static uint64_t g_timed_waiter_generation; - -static void timer_thread(void *completed_thread_ptr); - -static void gc_completed_threads(void) { - if (g_completed_threads != NULL) { - completed_thread *to_gc = g_completed_threads; - g_completed_threads = NULL; - gpr_mu_unlock(&g_mu); - while (to_gc != NULL) { - gpr_thd_join(to_gc->t); - completed_thread *next = to_gc->next; - gpr_free(to_gc); - to_gc = next; - } - gpr_mu_lock(&g_mu); - } -} - -static void start_timer_thread_and_unlock(void) { - GPR_ASSERT(g_threaded); - ++g_waiter_count; - ++g_thread_count; - gpr_mu_unlock(&g_mu); - if (GRPC_TRACER_ON(grpc_timer_check_trace)) { - gpr_log(GPR_DEBUG, "Spawn timer thread"); - } - gpr_thd_options opt = gpr_thd_options_default(); - gpr_thd_options_set_joinable(&opt); - completed_thread *ct = (completed_thread *)gpr_malloc(sizeof(*ct)); - // The call to gpr_thd_new() has to be under the same lock used by - // gc_completed_threads(), particularly due to ct->t, which is written here - // (internally by gpr_thd_new) and read there. Otherwise it's possible for ct - // to leak through g_completed_threads and be freed in gc_completed_threads() - // before "&ct->t" is written to, causing a use-after-free. - gpr_mu_lock(&g_mu); - gpr_thd_new(&ct->t, timer_thread, ct, &opt); - gpr_mu_unlock(&g_mu); -} - -void grpc_timer_manager_tick() { - grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; - gpr_timespec next = gpr_inf_future(GPR_CLOCK_MONOTONIC); - gpr_timespec now = gpr_now(GPR_CLOCK_MONOTONIC); - grpc_timer_check(&exec_ctx, now, &next); - grpc_exec_ctx_finish(&exec_ctx); -} - -static void run_some_timers(grpc_exec_ctx *exec_ctx) { - // if there's something to execute... - gpr_mu_lock(&g_mu); - // remove a waiter from the pool, and start another thread if necessary - --g_waiter_count; - if (g_waiter_count == 0 && g_threaded) { - start_timer_thread_and_unlock(); - } else { - // if there's no thread waiting with a timeout, kick an existing - // waiter so that the next deadline is not missed - if (!g_has_timed_waiter) { - if (GRPC_TRACER_ON(grpc_timer_check_trace)) { - gpr_log(GPR_DEBUG, "kick untimed waiter"); - } - gpr_cv_signal(&g_cv_wait); - } - gpr_mu_unlock(&g_mu); - } - // without our lock, flush the exec_ctx - grpc_exec_ctx_flush(exec_ctx); - gpr_mu_lock(&g_mu); - // garbage collect any threads hanging out that are dead - gc_completed_threads(); - // get ready to wait again - ++g_waiter_count; - gpr_mu_unlock(&g_mu); -} - -// wait until 'next' (or forever if there is already a timed waiter in the pool) -// returns true if the thread should continue executing (false if it should -// shutdown) -static bool wait_until(gpr_timespec next) { - const gpr_timespec inf_future = gpr_inf_future(GPR_CLOCK_MONOTONIC); - gpr_mu_lock(&g_mu); - // if we're not threaded anymore, leave - if (!g_threaded) { - gpr_mu_unlock(&g_mu); - return false; - } - - // If g_kicked is true at this point, it means there was a kick from the timer - // system that the timer-manager threads here missed. We cannot trust 'next' - // here any longer (since there might be an earlier deadline). So if g_kicked - // is true at this point, we should quickly exit this and get the next - // deadline from the timer system - - if (!g_kicked) { - // if there's no timed waiter, we should become one: that waiter waits - // only until the next timer should expire. All other timers wait forever - // - // 'g_timed_waiter_generation' is a global generation counter. The idea here - // is that the thread becoming a timed-waiter increments and stores this - // global counter locally in 'my_timed_waiter_generation' before going to - // sleep. After waking up, if my_timed_waiter_generation == - // g_timed_waiter_generation, it can be sure that it was the timed_waiter - // thread (and that no other thread took over while this was asleep) - // - // Initialize my_timed_waiter_generation to some value that is NOT equal to - // g_timed_waiter_generation - uint64_t my_timed_waiter_generation = g_timed_waiter_generation - 1; - - /* If there's no timed waiter, we should become one: that waiter waits only - until the next timer should expire. All other timer threads wait forever - unless their 'next' is earlier than the current timed-waiter's deadline - (in which case the thread with earlier 'next' takes over as the new timed - waiter) */ - if (gpr_time_cmp(next, inf_future) != 0) { - if (!g_has_timed_waiter || - (gpr_time_cmp(next, g_timed_waiter_deadline) < 0)) { - my_timed_waiter_generation = ++g_timed_waiter_generation; - g_has_timed_waiter = true; - g_timed_waiter_deadline = next; - - if (GRPC_TRACER_ON(grpc_timer_check_trace)) { - gpr_timespec wait_time = - gpr_time_sub(next, gpr_now(GPR_CLOCK_MONOTONIC)); - gpr_log(GPR_DEBUG, "sleep for a %" PRId64 ".%09d seconds", - wait_time.tv_sec, wait_time.tv_nsec); - } - } else { // g_timed_waiter == true && next >= g_timed_waiter_deadline - next = inf_future; - } - } - - if (GRPC_TRACER_ON(grpc_timer_check_trace) && - gpr_time_cmp(next, inf_future) == 0) { - gpr_log(GPR_DEBUG, "sleep until kicked"); - } - - gpr_cv_wait(&g_cv_wait, &g_mu, next); - - if (GRPC_TRACER_ON(grpc_timer_check_trace)) { - gpr_log(GPR_DEBUG, "wait ended: was_timed:%d kicked:%d", - my_timed_waiter_generation == g_timed_waiter_generation, - g_kicked); - } - // if this was the timed waiter, then we need to check timers, and flag - // that there's now no timed waiter... we'll look for a replacement if - // there's work to do after checking timers (code above) - if (my_timed_waiter_generation == g_timed_waiter_generation) { - g_has_timed_waiter = false; - g_timed_waiter_deadline = gpr_inf_future(GPR_CLOCK_MONOTONIC); - } - } - - // if this was a kick from the timer system, consume it (and don't stop - // this thread yet) - if (g_kicked) { - grpc_timer_consume_kick(); - g_kicked = false; - } - - gpr_mu_unlock(&g_mu); - return true; -} - -static void timer_main_loop(grpc_exec_ctx *exec_ctx) { - const gpr_timespec inf_future = gpr_inf_future(GPR_CLOCK_MONOTONIC); - for (;;) { - gpr_timespec next = inf_future; - gpr_timespec now = gpr_now(GPR_CLOCK_MONOTONIC); - // check timer state, updates next to the next time to run a check - switch (grpc_timer_check(exec_ctx, now, &next)) { - case GRPC_TIMERS_FIRED: - run_some_timers(exec_ctx); - break; - case GRPC_TIMERS_NOT_CHECKED: - /* This case only happens under contention, meaning more than one timer - manager thread checked timers concurrently. - - If that happens, we're guaranteed that some other thread has just - checked timers, and this will avalanche into some other thread seeing - empty timers and doing a timed sleep. - - Consequently, we can just sleep forever here and be happy at some - saved wakeup cycles. */ - if (GRPC_TRACER_ON(grpc_timer_check_trace)) { - gpr_log(GPR_DEBUG, "timers not checked: expect another thread to"); - } - next = inf_future; - /* fall through */ - case GRPC_TIMERS_CHECKED_AND_EMPTY: - if (!wait_until(next)) { - return; - } - break; - } - } -} - -static void timer_thread_cleanup(completed_thread *ct) { - gpr_mu_lock(&g_mu); - // terminate the thread: drop the waiter count, thread count, and let whomever - // stopped the threading stuff know that we're done - --g_waiter_count; - --g_thread_count; - if (0 == g_thread_count) { - gpr_cv_signal(&g_cv_shutdown); - } - ct->next = g_completed_threads; - g_completed_threads = ct; - gpr_mu_unlock(&g_mu); - if (GRPC_TRACER_ON(grpc_timer_check_trace)) { - gpr_log(GPR_DEBUG, "End timer thread"); - } -} - -static void timer_thread(void *completed_thread_ptr) { - // this threads exec_ctx: we try to run things through to completion here - // since it's easy to spin up new threads - grpc_exec_ctx exec_ctx = - GRPC_EXEC_CTX_INITIALIZER(0, grpc_never_ready_to_finish, NULL); - timer_main_loop(&exec_ctx); - grpc_exec_ctx_finish(&exec_ctx); - timer_thread_cleanup((completed_thread *)completed_thread_ptr); -} - -static void start_threads(void) { - gpr_mu_lock(&g_mu); - if (!g_threaded) { - g_threaded = true; - start_timer_thread_and_unlock(); - } else { - g_threaded = false; - gpr_mu_unlock(&g_mu); - } -} - -void grpc_timer_manager_init(void) { - gpr_mu_init(&g_mu); - gpr_cv_init(&g_cv_wait); - gpr_cv_init(&g_cv_shutdown); - g_threaded = false; - g_thread_count = 0; - g_waiter_count = 0; - g_completed_threads = NULL; - - g_has_timed_waiter = false; - g_timed_waiter_deadline = gpr_inf_future(GPR_CLOCK_MONOTONIC); - - start_threads(); -} - -static void stop_threads(void) { - gpr_mu_lock(&g_mu); - if (GRPC_TRACER_ON(grpc_timer_check_trace)) { - gpr_log(GPR_DEBUG, "stop timer threads: threaded=%d", g_threaded); - } - if (g_threaded) { - g_threaded = false; - gpr_cv_broadcast(&g_cv_wait); - if (GRPC_TRACER_ON(grpc_timer_check_trace)) { - gpr_log(GPR_DEBUG, "num timer threads: %d", g_thread_count); - } - while (g_thread_count > 0) { - gpr_cv_wait(&g_cv_shutdown, &g_mu, gpr_inf_future(GPR_CLOCK_REALTIME)); - if (GRPC_TRACER_ON(grpc_timer_check_trace)) { - gpr_log(GPR_DEBUG, "num timer threads: %d", g_thread_count); - } - gc_completed_threads(); - } - } - gpr_mu_unlock(&g_mu); -} - -void grpc_timer_manager_shutdown(void) { - stop_threads(); - - gpr_mu_destroy(&g_mu); - gpr_cv_destroy(&g_cv_wait); - gpr_cv_destroy(&g_cv_shutdown); -} - -void grpc_timer_manager_set_threading(bool threaded) { - if (threaded) { - start_threads(); - } else { - stop_threads(); - } -} - -void grpc_kick_poller(void) { - gpr_mu_lock(&g_mu); - g_kicked = true; - g_has_timed_waiter = false; - g_timed_waiter_deadline = gpr_inf_future(GPR_CLOCK_MONOTONIC); - ++g_timed_waiter_generation; - gpr_cv_signal(&g_cv_wait); - gpr_mu_unlock(&g_mu); -} diff --git a/src/core/lib/iomgr/timer_manager.cc b/src/core/lib/iomgr/timer_manager.cc new file mode 100644 index 0000000000..04ca44563d --- /dev/null +++ b/src/core/lib/iomgr/timer_manager.cc @@ -0,0 +1,354 @@ +/* + * + * Copyright 2017 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/timer_manager.h" + +#include +#include +#include + +#include "src/core/lib/debug/trace.h" +#include "src/core/lib/iomgr/timer.h" + +typedef struct completed_thread { + gpr_thd_id t; + struct completed_thread *next; +} completed_thread; + +extern grpc_tracer_flag grpc_timer_check_trace; + +// global mutex +static gpr_mu g_mu; +// are we multi-threaded +static bool g_threaded; +// cv to wait until a thread is needed +static gpr_cv g_cv_wait; +// cv for notification when threading ends +static gpr_cv g_cv_shutdown; +// number of threads in the system +static int g_thread_count; +// number of threads sitting around waiting +static int g_waiter_count; +// linked list of threads that have completed (and need joining) +static completed_thread *g_completed_threads; +// was the manager kicked by the timer system +static bool g_kicked; +// is there a thread waiting until the next timer should fire? +static bool g_has_timed_waiter; +// the deadline of the current timed waiter thread (only relevant if +// g_has_timed_waiter is true) +static gpr_timespec g_timed_waiter_deadline; +// generation counter to track which thread is waiting for the next timer +static uint64_t g_timed_waiter_generation; + +static void timer_thread(void *completed_thread_ptr); + +static void gc_completed_threads(void) { + if (g_completed_threads != NULL) { + completed_thread *to_gc = g_completed_threads; + g_completed_threads = NULL; + gpr_mu_unlock(&g_mu); + while (to_gc != NULL) { + gpr_thd_join(to_gc->t); + completed_thread *next = to_gc->next; + gpr_free(to_gc); + to_gc = next; + } + gpr_mu_lock(&g_mu); + } +} + +static void start_timer_thread_and_unlock(void) { + GPR_ASSERT(g_threaded); + ++g_waiter_count; + ++g_thread_count; + gpr_mu_unlock(&g_mu); + if (GRPC_TRACER_ON(grpc_timer_check_trace)) { + gpr_log(GPR_DEBUG, "Spawn timer thread"); + } + gpr_thd_options opt = gpr_thd_options_default(); + gpr_thd_options_set_joinable(&opt); + completed_thread *ct = (completed_thread *)gpr_malloc(sizeof(*ct)); + // The call to gpr_thd_new() has to be under the same lock used by + // gc_completed_threads(), particularly due to ct->t, which is written here + // (internally by gpr_thd_new) and read there. Otherwise it's possible for ct + // to leak through g_completed_threads and be freed in gc_completed_threads() + // before "&ct->t" is written to, causing a use-after-free. + gpr_mu_lock(&g_mu); + gpr_thd_new(&ct->t, timer_thread, ct, &opt); + gpr_mu_unlock(&g_mu); +} + +void grpc_timer_manager_tick() { + grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; + gpr_timespec next = gpr_inf_future(GPR_CLOCK_MONOTONIC); + gpr_timespec now = gpr_now(GPR_CLOCK_MONOTONIC); + grpc_timer_check(&exec_ctx, now, &next); + grpc_exec_ctx_finish(&exec_ctx); +} + +static void run_some_timers(grpc_exec_ctx *exec_ctx) { + // if there's something to execute... + gpr_mu_lock(&g_mu); + // remove a waiter from the pool, and start another thread if necessary + --g_waiter_count; + if (g_waiter_count == 0 && g_threaded) { + start_timer_thread_and_unlock(); + } else { + // if there's no thread waiting with a timeout, kick an existing + // waiter so that the next deadline is not missed + if (!g_has_timed_waiter) { + if (GRPC_TRACER_ON(grpc_timer_check_trace)) { + gpr_log(GPR_DEBUG, "kick untimed waiter"); + } + gpr_cv_signal(&g_cv_wait); + } + gpr_mu_unlock(&g_mu); + } + // without our lock, flush the exec_ctx + grpc_exec_ctx_flush(exec_ctx); + gpr_mu_lock(&g_mu); + // garbage collect any threads hanging out that are dead + gc_completed_threads(); + // get ready to wait again + ++g_waiter_count; + gpr_mu_unlock(&g_mu); +} + +// wait until 'next' (or forever if there is already a timed waiter in the pool) +// returns true if the thread should continue executing (false if it should +// shutdown) +static bool wait_until(gpr_timespec next) { + const gpr_timespec inf_future = gpr_inf_future(GPR_CLOCK_MONOTONIC); + gpr_mu_lock(&g_mu); + // if we're not threaded anymore, leave + if (!g_threaded) { + gpr_mu_unlock(&g_mu); + return false; + } + + // If g_kicked is true at this point, it means there was a kick from the timer + // system that the timer-manager threads here missed. We cannot trust 'next' + // here any longer (since there might be an earlier deadline). So if g_kicked + // is true at this point, we should quickly exit this and get the next + // deadline from the timer system + + if (!g_kicked) { + // if there's no timed waiter, we should become one: that waiter waits + // only until the next timer should expire. All other timers wait forever + // + // 'g_timed_waiter_generation' is a global generation counter. The idea here + // is that the thread becoming a timed-waiter increments and stores this + // global counter locally in 'my_timed_waiter_generation' before going to + // sleep. After waking up, if my_timed_waiter_generation == + // g_timed_waiter_generation, it can be sure that it was the timed_waiter + // thread (and that no other thread took over while this was asleep) + // + // Initialize my_timed_waiter_generation to some value that is NOT equal to + // g_timed_waiter_generation + uint64_t my_timed_waiter_generation = g_timed_waiter_generation - 1; + + /* If there's no timed waiter, we should become one: that waiter waits only + until the next timer should expire. All other timer threads wait forever + unless their 'next' is earlier than the current timed-waiter's deadline + (in which case the thread with earlier 'next' takes over as the new timed + waiter) */ + if (gpr_time_cmp(next, inf_future) != 0) { + if (!g_has_timed_waiter || + (gpr_time_cmp(next, g_timed_waiter_deadline) < 0)) { + my_timed_waiter_generation = ++g_timed_waiter_generation; + g_has_timed_waiter = true; + g_timed_waiter_deadline = next; + + if (GRPC_TRACER_ON(grpc_timer_check_trace)) { + gpr_timespec wait_time = + gpr_time_sub(next, gpr_now(GPR_CLOCK_MONOTONIC)); + gpr_log(GPR_DEBUG, "sleep for a %" PRId64 ".%09d seconds", + wait_time.tv_sec, wait_time.tv_nsec); + } + } else { // g_timed_waiter == true && next >= g_timed_waiter_deadline + next = inf_future; + } + } + + if (GRPC_TRACER_ON(grpc_timer_check_trace) && + gpr_time_cmp(next, inf_future) == 0) { + gpr_log(GPR_DEBUG, "sleep until kicked"); + } + + gpr_cv_wait(&g_cv_wait, &g_mu, next); + + if (GRPC_TRACER_ON(grpc_timer_check_trace)) { + gpr_log(GPR_DEBUG, "wait ended: was_timed:%d kicked:%d", + my_timed_waiter_generation == g_timed_waiter_generation, + g_kicked); + } + // if this was the timed waiter, then we need to check timers, and flag + // that there's now no timed waiter... we'll look for a replacement if + // there's work to do after checking timers (code above) + if (my_timed_waiter_generation == g_timed_waiter_generation) { + g_has_timed_waiter = false; + g_timed_waiter_deadline = gpr_inf_future(GPR_CLOCK_MONOTONIC); + } + } + + // if this was a kick from the timer system, consume it (and don't stop + // this thread yet) + if (g_kicked) { + grpc_timer_consume_kick(); + g_kicked = false; + } + + gpr_mu_unlock(&g_mu); + return true; +} + +static void timer_main_loop(grpc_exec_ctx *exec_ctx) { + const gpr_timespec inf_future = gpr_inf_future(GPR_CLOCK_MONOTONIC); + for (;;) { + gpr_timespec next = inf_future; + gpr_timespec now = gpr_now(GPR_CLOCK_MONOTONIC); + // check timer state, updates next to the next time to run a check + switch (grpc_timer_check(exec_ctx, now, &next)) { + case GRPC_TIMERS_FIRED: + run_some_timers(exec_ctx); + break; + case GRPC_TIMERS_NOT_CHECKED: + /* This case only happens under contention, meaning more than one timer + manager thread checked timers concurrently. + + If that happens, we're guaranteed that some other thread has just + checked timers, and this will avalanche into some other thread seeing + empty timers and doing a timed sleep. + + Consequently, we can just sleep forever here and be happy at some + saved wakeup cycles. */ + if (GRPC_TRACER_ON(grpc_timer_check_trace)) { + gpr_log(GPR_DEBUG, "timers not checked: expect another thread to"); + } + next = inf_future; + /* fall through */ + case GRPC_TIMERS_CHECKED_AND_EMPTY: + if (!wait_until(next)) { + return; + } + break; + } + } +} + +static void timer_thread_cleanup(completed_thread *ct) { + gpr_mu_lock(&g_mu); + // terminate the thread: drop the waiter count, thread count, and let whomever + // stopped the threading stuff know that we're done + --g_waiter_count; + --g_thread_count; + if (0 == g_thread_count) { + gpr_cv_signal(&g_cv_shutdown); + } + ct->next = g_completed_threads; + g_completed_threads = ct; + gpr_mu_unlock(&g_mu); + if (GRPC_TRACER_ON(grpc_timer_check_trace)) { + gpr_log(GPR_DEBUG, "End timer thread"); + } +} + +static void timer_thread(void *completed_thread_ptr) { + // this threads exec_ctx: we try to run things through to completion here + // since it's easy to spin up new threads + grpc_exec_ctx exec_ctx = + GRPC_EXEC_CTX_INITIALIZER(0, grpc_never_ready_to_finish, NULL); + timer_main_loop(&exec_ctx); + grpc_exec_ctx_finish(&exec_ctx); + timer_thread_cleanup((completed_thread *)completed_thread_ptr); +} + +static void start_threads(void) { + gpr_mu_lock(&g_mu); + if (!g_threaded) { + g_threaded = true; + start_timer_thread_and_unlock(); + } else { + g_threaded = false; + gpr_mu_unlock(&g_mu); + } +} + +void grpc_timer_manager_init(void) { + gpr_mu_init(&g_mu); + gpr_cv_init(&g_cv_wait); + gpr_cv_init(&g_cv_shutdown); + g_threaded = false; + g_thread_count = 0; + g_waiter_count = 0; + g_completed_threads = NULL; + + g_has_timed_waiter = false; + g_timed_waiter_deadline = gpr_inf_future(GPR_CLOCK_MONOTONIC); + + start_threads(); +} + +static void stop_threads(void) { + gpr_mu_lock(&g_mu); + if (GRPC_TRACER_ON(grpc_timer_check_trace)) { + gpr_log(GPR_DEBUG, "stop timer threads: threaded=%d", g_threaded); + } + if (g_threaded) { + g_threaded = false; + gpr_cv_broadcast(&g_cv_wait); + if (GRPC_TRACER_ON(grpc_timer_check_trace)) { + gpr_log(GPR_DEBUG, "num timer threads: %d", g_thread_count); + } + while (g_thread_count > 0) { + gpr_cv_wait(&g_cv_shutdown, &g_mu, gpr_inf_future(GPR_CLOCK_REALTIME)); + if (GRPC_TRACER_ON(grpc_timer_check_trace)) { + gpr_log(GPR_DEBUG, "num timer threads: %d", g_thread_count); + } + gc_completed_threads(); + } + } + gpr_mu_unlock(&g_mu); +} + +void grpc_timer_manager_shutdown(void) { + stop_threads(); + + gpr_mu_destroy(&g_mu); + gpr_cv_destroy(&g_cv_wait); + gpr_cv_destroy(&g_cv_shutdown); +} + +void grpc_timer_manager_set_threading(bool threaded) { + if (threaded) { + start_threads(); + } else { + stop_threads(); + } +} + +void grpc_kick_poller(void) { + gpr_mu_lock(&g_mu); + g_kicked = true; + g_has_timed_waiter = false; + g_timed_waiter_deadline = gpr_inf_future(GPR_CLOCK_MONOTONIC); + ++g_timed_waiter_generation; + gpr_cv_signal(&g_cv_wait); + gpr_mu_unlock(&g_mu); +} diff --git a/src/core/lib/iomgr/timer_uv.c b/src/core/lib/iomgr/timer_uv.c deleted file mode 100644 index adced41f53..0000000000 --- a/src/core/lib/iomgr/timer_uv.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * - * Copyright 2016 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#if GRPC_UV - -#include -#include - -#include "src/core/lib/debug/trace.h" -#include "src/core/lib/iomgr/iomgr_uv.h" -#include "src/core/lib/iomgr/timer.h" - -#include - -grpc_tracer_flag grpc_timer_trace = GRPC_TRACER_INITIALIZER(false, "timer"); -grpc_tracer_flag grpc_timer_check_trace = - GRPC_TRACER_INITIALIZER(false, "timer_check"); - -static void timer_close_callback(uv_handle_t *handle) { gpr_free(handle); } - -static void stop_uv_timer(uv_timer_t *handle) { - uv_timer_stop(handle); - uv_unref((uv_handle_t *)handle); - uv_close((uv_handle_t *)handle, timer_close_callback); -} - -void run_expired_timer(uv_timer_t *handle) { - grpc_timer *timer = (grpc_timer *)handle->data; - grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; - GRPC_UV_ASSERT_SAME_THREAD(); - GPR_ASSERT(timer->pending); - timer->pending = 0; - GRPC_CLOSURE_SCHED(&exec_ctx, timer->closure, GRPC_ERROR_NONE); - stop_uv_timer(handle); - grpc_exec_ctx_finish(&exec_ctx); -} - -void grpc_timer_init(grpc_exec_ctx *exec_ctx, grpc_timer *timer, - gpr_timespec deadline, grpc_closure *closure, - gpr_timespec now) { - uint64_t timeout; - uv_timer_t *uv_timer; - GRPC_UV_ASSERT_SAME_THREAD(); - timer->closure = closure; - if (gpr_time_cmp(deadline, now) <= 0) { - timer->pending = 0; - GRPC_CLOSURE_SCHED(exec_ctx, timer->closure, GRPC_ERROR_NONE); - return; - } - timer->pending = 1; - timeout = (uint64_t)gpr_time_to_millis(gpr_time_sub(deadline, now)); - uv_timer = gpr_malloc(sizeof(uv_timer_t)); - uv_timer_init(uv_default_loop(), uv_timer); - uv_timer->data = timer; - timer->uv_timer = uv_timer; - uv_timer_start(uv_timer, run_expired_timer, timeout, 0); - /* We assume that gRPC timers are only used alongside other active gRPC - objects, and that there will therefore always be something else keeping - the uv loop alive whenever there is a timer */ - uv_unref((uv_handle_t *)uv_timer); -} - -void grpc_timer_init_unset(grpc_timer *timer) { timer->pending = 0; } - -void grpc_timer_cancel(grpc_exec_ctx *exec_ctx, grpc_timer *timer) { - GRPC_UV_ASSERT_SAME_THREAD(); - if (timer->pending) { - timer->pending = 0; - GRPC_CLOSURE_SCHED(exec_ctx, timer->closure, GRPC_ERROR_CANCELLED); - stop_uv_timer((uv_timer_t *)timer->uv_timer); - } -} - -grpc_timer_check_result grpc_timer_check(grpc_exec_ctx *exec_ctx, - gpr_timespec now, gpr_timespec *next) { - return GRPC_TIMERS_NOT_CHECKED; -} - -void grpc_timer_list_init(gpr_timespec now) {} -void grpc_timer_list_shutdown(grpc_exec_ctx *exec_ctx) {} - -void grpc_timer_consume_kick(void) {} - -#endif /* GRPC_UV */ diff --git a/src/core/lib/iomgr/timer_uv.cc b/src/core/lib/iomgr/timer_uv.cc new file mode 100644 index 0000000000..adced41f53 --- /dev/null +++ b/src/core/lib/iomgr/timer_uv.cc @@ -0,0 +1,101 @@ +/* + * + * Copyright 2016 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#if GRPC_UV + +#include +#include + +#include "src/core/lib/debug/trace.h" +#include "src/core/lib/iomgr/iomgr_uv.h" +#include "src/core/lib/iomgr/timer.h" + +#include + +grpc_tracer_flag grpc_timer_trace = GRPC_TRACER_INITIALIZER(false, "timer"); +grpc_tracer_flag grpc_timer_check_trace = + GRPC_TRACER_INITIALIZER(false, "timer_check"); + +static void timer_close_callback(uv_handle_t *handle) { gpr_free(handle); } + +static void stop_uv_timer(uv_timer_t *handle) { + uv_timer_stop(handle); + uv_unref((uv_handle_t *)handle); + uv_close((uv_handle_t *)handle, timer_close_callback); +} + +void run_expired_timer(uv_timer_t *handle) { + grpc_timer *timer = (grpc_timer *)handle->data; + grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; + GRPC_UV_ASSERT_SAME_THREAD(); + GPR_ASSERT(timer->pending); + timer->pending = 0; + GRPC_CLOSURE_SCHED(&exec_ctx, timer->closure, GRPC_ERROR_NONE); + stop_uv_timer(handle); + grpc_exec_ctx_finish(&exec_ctx); +} + +void grpc_timer_init(grpc_exec_ctx *exec_ctx, grpc_timer *timer, + gpr_timespec deadline, grpc_closure *closure, + gpr_timespec now) { + uint64_t timeout; + uv_timer_t *uv_timer; + GRPC_UV_ASSERT_SAME_THREAD(); + timer->closure = closure; + if (gpr_time_cmp(deadline, now) <= 0) { + timer->pending = 0; + GRPC_CLOSURE_SCHED(exec_ctx, timer->closure, GRPC_ERROR_NONE); + return; + } + timer->pending = 1; + timeout = (uint64_t)gpr_time_to_millis(gpr_time_sub(deadline, now)); + uv_timer = gpr_malloc(sizeof(uv_timer_t)); + uv_timer_init(uv_default_loop(), uv_timer); + uv_timer->data = timer; + timer->uv_timer = uv_timer; + uv_timer_start(uv_timer, run_expired_timer, timeout, 0); + /* We assume that gRPC timers are only used alongside other active gRPC + objects, and that there will therefore always be something else keeping + the uv loop alive whenever there is a timer */ + uv_unref((uv_handle_t *)uv_timer); +} + +void grpc_timer_init_unset(grpc_timer *timer) { timer->pending = 0; } + +void grpc_timer_cancel(grpc_exec_ctx *exec_ctx, grpc_timer *timer) { + GRPC_UV_ASSERT_SAME_THREAD(); + if (timer->pending) { + timer->pending = 0; + GRPC_CLOSURE_SCHED(exec_ctx, timer->closure, GRPC_ERROR_CANCELLED); + stop_uv_timer((uv_timer_t *)timer->uv_timer); + } +} + +grpc_timer_check_result grpc_timer_check(grpc_exec_ctx *exec_ctx, + gpr_timespec now, gpr_timespec *next) { + return GRPC_TIMERS_NOT_CHECKED; +} + +void grpc_timer_list_init(gpr_timespec now) {} +void grpc_timer_list_shutdown(grpc_exec_ctx *exec_ctx) {} + +void grpc_timer_consume_kick(void) {} + +#endif /* GRPC_UV */ diff --git a/src/core/lib/iomgr/udp_server.c b/src/core/lib/iomgr/udp_server.c deleted file mode 100644 index 00b2e68bb5..0000000000 --- a/src/core/lib/iomgr/udp_server.c +++ /dev/null @@ -1,549 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -/* FIXME: "posix" files shouldn't be depending on _GNU_SOURCE */ -#ifndef _GNU_SOURCE -#define _GNU_SOURCE -#endif - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_POSIX_SOCKET - -#include "src/core/lib/iomgr/udp_server.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include "src/core/lib/channel/channel_args.h" -#include "src/core/lib/iomgr/error.h" -#include "src/core/lib/iomgr/ev_posix.h" -#include "src/core/lib/iomgr/resolve_address.h" -#include "src/core/lib/iomgr/sockaddr.h" -#include "src/core/lib/iomgr/sockaddr_utils.h" -#include "src/core/lib/iomgr/socket_factory_posix.h" -#include "src/core/lib/iomgr/socket_utils_posix.h" -#include "src/core/lib/iomgr/unix_sockets_posix.h" -#include "src/core/lib/support/string.h" - -/* one listening port */ -typedef struct grpc_udp_listener grpc_udp_listener; -struct grpc_udp_listener { - int fd; - grpc_fd *emfd; - grpc_udp_server *server; - grpc_resolved_address addr; - grpc_closure read_closure; - grpc_closure write_closure; - // To be called when corresponding QuicGrpcServer closes all active - // connections. - grpc_closure orphan_fd_closure; - grpc_closure destroyed_closure; - grpc_udp_server_read_cb read_cb; - grpc_udp_server_write_cb write_cb; - grpc_udp_server_orphan_cb orphan_cb; - // True if orphan_cb is trigered. - bool orphan_notified; - - struct grpc_udp_listener *next; -}; - -struct shutdown_fd_args { - grpc_fd *fd; - gpr_mu *server_mu; -}; - -/* the overall server */ -struct grpc_udp_server { - gpr_mu mu; - - /* factory to use for creating and binding sockets, or NULL */ - grpc_socket_factory *socket_factory; - - /* active port count: how many ports are actually still listening */ - size_t active_ports; - /* destroyed port count: how many ports are completely destroyed */ - size_t destroyed_ports; - - /* is this server shutting down? (boolean) */ - int shutdown; - - /* linked list of server ports */ - grpc_udp_listener *head; - grpc_udp_listener *tail; - unsigned nports; - - /* shutdown callback */ - grpc_closure *shutdown_complete; - - /* all pollsets interested in new connections */ - grpc_pollset **pollsets; - /* number of pollsets in the pollsets array */ - size_t pollset_count; - /* opaque object to pass to callbacks */ - void *user_data; -}; - -static grpc_socket_factory *get_socket_factory(const grpc_channel_args *args) { - if (args) { - const grpc_arg *arg = grpc_channel_args_find(args, GRPC_ARG_SOCKET_FACTORY); - if (arg) { - GPR_ASSERT(arg->type == GRPC_ARG_POINTER); - return (grpc_socket_factory *)arg->value.pointer.p; - } - } - return NULL; -} - -grpc_udp_server *grpc_udp_server_create(const grpc_channel_args *args) { - grpc_udp_server *s = (grpc_udp_server *)gpr_malloc(sizeof(grpc_udp_server)); - gpr_mu_init(&s->mu); - s->socket_factory = get_socket_factory(args); - if (s->socket_factory) { - grpc_socket_factory_ref(s->socket_factory); - } - s->active_ports = 0; - s->destroyed_ports = 0; - s->shutdown = 0; - s->head = NULL; - s->tail = NULL; - s->nports = 0; - - return s; -} - -static void shutdown_fd(grpc_exec_ctx *exec_ctx, void *args, - grpc_error *error) { - struct shutdown_fd_args *shutdown_args = (struct shutdown_fd_args *)args; - gpr_mu_lock(shutdown_args->server_mu); - grpc_fd_shutdown(exec_ctx, shutdown_args->fd, GRPC_ERROR_REF(error)); - gpr_mu_unlock(shutdown_args->server_mu); - gpr_free(shutdown_args); -} - -static void dummy_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) { - // No-op. -} - -static void finish_shutdown(grpc_exec_ctx *exec_ctx, grpc_udp_server *s) { - if (s->shutdown_complete != NULL) { - GRPC_CLOSURE_SCHED(exec_ctx, s->shutdown_complete, GRPC_ERROR_NONE); - } - - gpr_mu_destroy(&s->mu); - - while (s->head) { - grpc_udp_listener *sp = s->head; - s->head = sp->next; - gpr_free(sp); - } - - if (s->socket_factory) { - grpc_socket_factory_unref(s->socket_factory); - } - - gpr_free(s); -} - -static void destroyed_port(grpc_exec_ctx *exec_ctx, void *server, - grpc_error *error) { - grpc_udp_server *s = (grpc_udp_server *)server; - gpr_mu_lock(&s->mu); - s->destroyed_ports++; - if (s->destroyed_ports == s->nports) { - gpr_mu_unlock(&s->mu); - finish_shutdown(exec_ctx, s); - } else { - gpr_mu_unlock(&s->mu); - } -} - -/* called when all listening endpoints have been shutdown, so no further - events will be received on them - at this point it's safe to destroy - things */ -static void deactivated_all_ports(grpc_exec_ctx *exec_ctx, grpc_udp_server *s) { - /* delete ALL the things */ - gpr_mu_lock(&s->mu); - - GPR_ASSERT(s->shutdown); - - if (s->head) { - grpc_udp_listener *sp; - for (sp = s->head; sp; sp = sp->next) { - grpc_unlink_if_unix_domain_socket(&sp->addr); - - GRPC_CLOSURE_INIT(&sp->destroyed_closure, destroyed_port, s, - grpc_schedule_on_exec_ctx); - if (!sp->orphan_notified) { - /* Call the orphan_cb to signal that the FD is about to be closed and - * should no longer be used. Because at this point, all listening ports - * have been shutdown already, no need to shutdown again.*/ - GRPC_CLOSURE_INIT(&sp->orphan_fd_closure, dummy_cb, sp->emfd, - grpc_schedule_on_exec_ctx); - GPR_ASSERT(sp->orphan_cb); - sp->orphan_cb(exec_ctx, sp->emfd, &sp->orphan_fd_closure, - sp->server->user_data); - } - grpc_fd_orphan(exec_ctx, sp->emfd, &sp->destroyed_closure, NULL, - false /* already_closed */, "udp_listener_shutdown"); - } - gpr_mu_unlock(&s->mu); - } else { - gpr_mu_unlock(&s->mu); - finish_shutdown(exec_ctx, s); - } -} - -void grpc_udp_server_destroy(grpc_exec_ctx *exec_ctx, grpc_udp_server *s, - grpc_closure *on_done) { - grpc_udp_listener *sp; - gpr_mu_lock(&s->mu); - - GPR_ASSERT(!s->shutdown); - s->shutdown = 1; - - s->shutdown_complete = on_done; - - /* shutdown all fd's */ - if (s->active_ports) { - for (sp = s->head; sp; sp = sp->next) { - GPR_ASSERT(sp->orphan_cb); - struct shutdown_fd_args *args = - (struct shutdown_fd_args *)gpr_malloc(sizeof(*args)); - args->fd = sp->emfd; - args->server_mu = &s->mu; - GRPC_CLOSURE_INIT(&sp->orphan_fd_closure, shutdown_fd, args, - grpc_schedule_on_exec_ctx); - sp->orphan_cb(exec_ctx, sp->emfd, &sp->orphan_fd_closure, - sp->server->user_data); - sp->orphan_notified = true; - } - gpr_mu_unlock(&s->mu); - } else { - gpr_mu_unlock(&s->mu); - deactivated_all_ports(exec_ctx, s); - } -} - -static int bind_socket(grpc_socket_factory *socket_factory, int sockfd, - const grpc_resolved_address *addr) { - return (socket_factory != NULL) - ? grpc_socket_factory_bind(socket_factory, sockfd, addr) - : bind(sockfd, (struct sockaddr *)addr->addr, - (socklen_t)addr->len); -} - -/* Prepare a recently-created socket for listening. */ -static int prepare_socket(grpc_socket_factory *socket_factory, int fd, - const grpc_resolved_address *addr) { - grpc_resolved_address sockname_temp; - struct sockaddr *addr_ptr = (struct sockaddr *)addr->addr; - /* Set send/receive socket buffers to 1 MB */ - int buffer_size_bytes = 1024 * 1024; - - if (fd < 0) { - goto error; - } - - if (grpc_set_socket_nonblocking(fd, 1) != GRPC_ERROR_NONE) { - gpr_log(GPR_ERROR, "Unable to set nonblocking %d: %s", fd, strerror(errno)); - goto error; - } - if (grpc_set_socket_cloexec(fd, 1) != GRPC_ERROR_NONE) { - gpr_log(GPR_ERROR, "Unable to set cloexec %d: %s", fd, strerror(errno)); - goto error; - } - - if (grpc_set_socket_ip_pktinfo_if_possible(fd) != GRPC_ERROR_NONE) { - gpr_log(GPR_ERROR, "Unable to set ip_pktinfo."); - goto error; - } else if (addr_ptr->sa_family == AF_INET6) { - if (grpc_set_socket_ipv6_recvpktinfo_if_possible(fd) != GRPC_ERROR_NONE) { - gpr_log(GPR_ERROR, "Unable to set ipv6_recvpktinfo."); - goto error; - } - } - - GPR_ASSERT(addr->len < ~(socklen_t)0); - if (bind_socket(socket_factory, fd, addr) < 0) { - char *addr_str; - grpc_sockaddr_to_string(&addr_str, addr, 0); - gpr_log(GPR_ERROR, "bind addr=%s: %s", addr_str, strerror(errno)); - gpr_free(addr_str); - goto error; - } - - sockname_temp.len = sizeof(struct sockaddr_storage); - - if (getsockname(fd, (struct sockaddr *)sockname_temp.addr, - (socklen_t *)&sockname_temp.len) < 0) { - goto error; - } - - if (grpc_set_socket_sndbuf(fd, buffer_size_bytes) != GRPC_ERROR_NONE) { - gpr_log(GPR_ERROR, "Failed to set send buffer size to %d bytes", - buffer_size_bytes); - goto error; - } - - if (grpc_set_socket_rcvbuf(fd, buffer_size_bytes) != GRPC_ERROR_NONE) { - gpr_log(GPR_ERROR, "Failed to set receive buffer size to %d bytes", - buffer_size_bytes); - goto error; - } - - return grpc_sockaddr_get_port(&sockname_temp); - -error: - if (fd >= 0) { - close(fd); - } - return -1; -} - -/* event manager callback when reads are ready */ -static void on_read(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) { - grpc_udp_listener *sp = (grpc_udp_listener *)arg; - - gpr_mu_lock(&sp->server->mu); - if (error != GRPC_ERROR_NONE) { - if (0 == --sp->server->active_ports && sp->server->shutdown) { - gpr_mu_unlock(&sp->server->mu); - deactivated_all_ports(exec_ctx, sp->server); - } else { - gpr_mu_unlock(&sp->server->mu); - } - return; - } - - /* Tell the registered callback that data is available to read. */ - GPR_ASSERT(sp->read_cb); - sp->read_cb(exec_ctx, sp->emfd, sp->server->user_data); - - /* Re-arm the notification event so we get another chance to read. */ - grpc_fd_notify_on_read(exec_ctx, sp->emfd, &sp->read_closure); - gpr_mu_unlock(&sp->server->mu); -} - -static void on_write(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) { - grpc_udp_listener *sp = (grpc_udp_listener *)arg; - - gpr_mu_lock(&(sp->server->mu)); - if (error != GRPC_ERROR_NONE) { - if (0 == --sp->server->active_ports && sp->server->shutdown) { - gpr_mu_unlock(&sp->server->mu); - deactivated_all_ports(exec_ctx, sp->server); - } else { - gpr_mu_unlock(&sp->server->mu); - } - return; - } - - /* Tell the registered callback that the socket is writeable. */ - GPR_ASSERT(sp->write_cb); - sp->write_cb(exec_ctx, sp->emfd, sp->server->user_data); - - /* Re-arm the notification event so we get another chance to write. */ - grpc_fd_notify_on_write(exec_ctx, sp->emfd, &sp->write_closure); - gpr_mu_unlock(&sp->server->mu); -} - -static int add_socket_to_server(grpc_udp_server *s, int fd, - const grpc_resolved_address *addr, - grpc_udp_server_read_cb read_cb, - grpc_udp_server_write_cb write_cb, - grpc_udp_server_orphan_cb orphan_cb) { - grpc_udp_listener *sp; - int port; - char *addr_str; - char *name; - - port = prepare_socket(s->socket_factory, fd, addr); - if (port >= 0) { - grpc_sockaddr_to_string(&addr_str, addr, 1); - gpr_asprintf(&name, "udp-server-listener:%s", addr_str); - gpr_free(addr_str); - gpr_mu_lock(&s->mu); - s->nports++; - sp = (grpc_udp_listener *)gpr_malloc(sizeof(grpc_udp_listener)); - sp->next = NULL; - if (s->head == NULL) { - s->head = sp; - } else { - s->tail->next = sp; - } - s->tail = sp; - sp->server = s; - sp->fd = fd; - sp->emfd = grpc_fd_create(fd, name); - memcpy(&sp->addr, addr, sizeof(grpc_resolved_address)); - sp->read_cb = read_cb; - sp->write_cb = write_cb; - sp->orphan_cb = orphan_cb; - sp->orphan_notified = false; - GPR_ASSERT(sp->emfd); - gpr_mu_unlock(&s->mu); - gpr_free(name); - } - - return port; -} - -int grpc_udp_server_add_port(grpc_udp_server *s, - const grpc_resolved_address *addr, - grpc_udp_server_read_cb read_cb, - grpc_udp_server_write_cb write_cb, - grpc_udp_server_orphan_cb orphan_cb) { - grpc_udp_listener *sp; - int allocated_port1 = -1; - int allocated_port2 = -1; - int fd; - grpc_dualstack_mode dsmode; - grpc_resolved_address addr6_v4mapped; - grpc_resolved_address wild4; - grpc_resolved_address wild6; - grpc_resolved_address addr4_copy; - grpc_resolved_address *allocated_addr = NULL; - grpc_resolved_address sockname_temp; - int port; - - /* Check if this is a wildcard port, and if so, try to keep the port the same - as some previously created listener. */ - if (grpc_sockaddr_get_port(addr) == 0) { - for (sp = s->head; sp; sp = sp->next) { - sockname_temp.len = sizeof(struct sockaddr_storage); - if (0 == getsockname(sp->fd, (struct sockaddr *)sockname_temp.addr, - (socklen_t *)&sockname_temp.len)) { - port = grpc_sockaddr_get_port(&sockname_temp); - if (port > 0) { - allocated_addr = (grpc_resolved_address *)gpr_malloc( - sizeof(grpc_resolved_address)); - memcpy(allocated_addr, addr, sizeof(grpc_resolved_address)); - grpc_sockaddr_set_port(allocated_addr, port); - addr = allocated_addr; - break; - } - } - } - } - - if (grpc_sockaddr_to_v4mapped(addr, &addr6_v4mapped)) { - addr = &addr6_v4mapped; - } - - /* Treat :: or 0.0.0.0 as a family-agnostic wildcard. */ - if (grpc_sockaddr_is_wildcard(addr, &port)) { - grpc_sockaddr_make_wildcards(port, &wild4, &wild6); - - /* Try listening on IPv6 first. */ - addr = &wild6; - // TODO(rjshade): Test and propagate the returned grpc_error*: - GRPC_ERROR_UNREF(grpc_create_dualstack_socket_using_factory( - s->socket_factory, addr, SOCK_DGRAM, IPPROTO_UDP, &dsmode, &fd)); - allocated_port1 = - add_socket_to_server(s, fd, addr, read_cb, write_cb, orphan_cb); - if (fd >= 0 && dsmode == GRPC_DSMODE_DUALSTACK) { - goto done; - } - - /* If we didn't get a dualstack socket, also listen on 0.0.0.0. */ - if (port == 0 && allocated_port1 > 0) { - grpc_sockaddr_set_port(&wild4, allocated_port1); - } - addr = &wild4; - } - - // TODO(rjshade): Test and propagate the returned grpc_error*: - GRPC_ERROR_UNREF(grpc_create_dualstack_socket_using_factory( - s->socket_factory, addr, SOCK_DGRAM, IPPROTO_UDP, &dsmode, &fd)); - if (fd < 0) { - gpr_log(GPR_ERROR, "Unable to create socket: %s", strerror(errno)); - } - if (dsmode == GRPC_DSMODE_IPV4 && - grpc_sockaddr_is_v4mapped(addr, &addr4_copy)) { - addr = &addr4_copy; - } - allocated_port2 = - add_socket_to_server(s, fd, addr, read_cb, write_cb, orphan_cb); - -done: - gpr_free(allocated_addr); - return allocated_port1 >= 0 ? allocated_port1 : allocated_port2; -} - -int grpc_udp_server_get_fd(grpc_udp_server *s, unsigned port_index) { - grpc_udp_listener *sp; - if (port_index >= s->nports) { - return -1; - } - - for (sp = s->head; sp && port_index != 0; sp = sp->next) { - --port_index; - } - return sp->fd; -} - -void grpc_udp_server_start(grpc_exec_ctx *exec_ctx, grpc_udp_server *s, - grpc_pollset **pollsets, size_t pollset_count, - void *user_data) { - size_t i; - gpr_mu_lock(&s->mu); - grpc_udp_listener *sp; - GPR_ASSERT(s->active_ports == 0); - s->pollsets = pollsets; - s->user_data = user_data; - - sp = s->head; - while (sp != NULL) { - for (i = 0; i < pollset_count; i++) { - grpc_pollset_add_fd(exec_ctx, pollsets[i], sp->emfd); - } - GRPC_CLOSURE_INIT(&sp->read_closure, on_read, sp, - grpc_schedule_on_exec_ctx); - grpc_fd_notify_on_read(exec_ctx, sp->emfd, &sp->read_closure); - - GRPC_CLOSURE_INIT(&sp->write_closure, on_write, sp, - grpc_schedule_on_exec_ctx); - grpc_fd_notify_on_write(exec_ctx, sp->emfd, &sp->write_closure); - - /* Registered for both read and write callbacks: increment active_ports - * twice to account for this, and delay free-ing of memory until both - * on_read and on_write have fired. */ - s->active_ports += 2; - - sp = sp->next; - } - - gpr_mu_unlock(&s->mu); -} - -#endif diff --git a/src/core/lib/iomgr/udp_server.cc b/src/core/lib/iomgr/udp_server.cc new file mode 100644 index 0000000000..00b2e68bb5 --- /dev/null +++ b/src/core/lib/iomgr/udp_server.cc @@ -0,0 +1,549 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +/* FIXME: "posix" files shouldn't be depending on _GNU_SOURCE */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_POSIX_SOCKET + +#include "src/core/lib/iomgr/udp_server.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include "src/core/lib/channel/channel_args.h" +#include "src/core/lib/iomgr/error.h" +#include "src/core/lib/iomgr/ev_posix.h" +#include "src/core/lib/iomgr/resolve_address.h" +#include "src/core/lib/iomgr/sockaddr.h" +#include "src/core/lib/iomgr/sockaddr_utils.h" +#include "src/core/lib/iomgr/socket_factory_posix.h" +#include "src/core/lib/iomgr/socket_utils_posix.h" +#include "src/core/lib/iomgr/unix_sockets_posix.h" +#include "src/core/lib/support/string.h" + +/* one listening port */ +typedef struct grpc_udp_listener grpc_udp_listener; +struct grpc_udp_listener { + int fd; + grpc_fd *emfd; + grpc_udp_server *server; + grpc_resolved_address addr; + grpc_closure read_closure; + grpc_closure write_closure; + // To be called when corresponding QuicGrpcServer closes all active + // connections. + grpc_closure orphan_fd_closure; + grpc_closure destroyed_closure; + grpc_udp_server_read_cb read_cb; + grpc_udp_server_write_cb write_cb; + grpc_udp_server_orphan_cb orphan_cb; + // True if orphan_cb is trigered. + bool orphan_notified; + + struct grpc_udp_listener *next; +}; + +struct shutdown_fd_args { + grpc_fd *fd; + gpr_mu *server_mu; +}; + +/* the overall server */ +struct grpc_udp_server { + gpr_mu mu; + + /* factory to use for creating and binding sockets, or NULL */ + grpc_socket_factory *socket_factory; + + /* active port count: how many ports are actually still listening */ + size_t active_ports; + /* destroyed port count: how many ports are completely destroyed */ + size_t destroyed_ports; + + /* is this server shutting down? (boolean) */ + int shutdown; + + /* linked list of server ports */ + grpc_udp_listener *head; + grpc_udp_listener *tail; + unsigned nports; + + /* shutdown callback */ + grpc_closure *shutdown_complete; + + /* all pollsets interested in new connections */ + grpc_pollset **pollsets; + /* number of pollsets in the pollsets array */ + size_t pollset_count; + /* opaque object to pass to callbacks */ + void *user_data; +}; + +static grpc_socket_factory *get_socket_factory(const grpc_channel_args *args) { + if (args) { + const grpc_arg *arg = grpc_channel_args_find(args, GRPC_ARG_SOCKET_FACTORY); + if (arg) { + GPR_ASSERT(arg->type == GRPC_ARG_POINTER); + return (grpc_socket_factory *)arg->value.pointer.p; + } + } + return NULL; +} + +grpc_udp_server *grpc_udp_server_create(const grpc_channel_args *args) { + grpc_udp_server *s = (grpc_udp_server *)gpr_malloc(sizeof(grpc_udp_server)); + gpr_mu_init(&s->mu); + s->socket_factory = get_socket_factory(args); + if (s->socket_factory) { + grpc_socket_factory_ref(s->socket_factory); + } + s->active_ports = 0; + s->destroyed_ports = 0; + s->shutdown = 0; + s->head = NULL; + s->tail = NULL; + s->nports = 0; + + return s; +} + +static void shutdown_fd(grpc_exec_ctx *exec_ctx, void *args, + grpc_error *error) { + struct shutdown_fd_args *shutdown_args = (struct shutdown_fd_args *)args; + gpr_mu_lock(shutdown_args->server_mu); + grpc_fd_shutdown(exec_ctx, shutdown_args->fd, GRPC_ERROR_REF(error)); + gpr_mu_unlock(shutdown_args->server_mu); + gpr_free(shutdown_args); +} + +static void dummy_cb(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) { + // No-op. +} + +static void finish_shutdown(grpc_exec_ctx *exec_ctx, grpc_udp_server *s) { + if (s->shutdown_complete != NULL) { + GRPC_CLOSURE_SCHED(exec_ctx, s->shutdown_complete, GRPC_ERROR_NONE); + } + + gpr_mu_destroy(&s->mu); + + while (s->head) { + grpc_udp_listener *sp = s->head; + s->head = sp->next; + gpr_free(sp); + } + + if (s->socket_factory) { + grpc_socket_factory_unref(s->socket_factory); + } + + gpr_free(s); +} + +static void destroyed_port(grpc_exec_ctx *exec_ctx, void *server, + grpc_error *error) { + grpc_udp_server *s = (grpc_udp_server *)server; + gpr_mu_lock(&s->mu); + s->destroyed_ports++; + if (s->destroyed_ports == s->nports) { + gpr_mu_unlock(&s->mu); + finish_shutdown(exec_ctx, s); + } else { + gpr_mu_unlock(&s->mu); + } +} + +/* called when all listening endpoints have been shutdown, so no further + events will be received on them - at this point it's safe to destroy + things */ +static void deactivated_all_ports(grpc_exec_ctx *exec_ctx, grpc_udp_server *s) { + /* delete ALL the things */ + gpr_mu_lock(&s->mu); + + GPR_ASSERT(s->shutdown); + + if (s->head) { + grpc_udp_listener *sp; + for (sp = s->head; sp; sp = sp->next) { + grpc_unlink_if_unix_domain_socket(&sp->addr); + + GRPC_CLOSURE_INIT(&sp->destroyed_closure, destroyed_port, s, + grpc_schedule_on_exec_ctx); + if (!sp->orphan_notified) { + /* Call the orphan_cb to signal that the FD is about to be closed and + * should no longer be used. Because at this point, all listening ports + * have been shutdown already, no need to shutdown again.*/ + GRPC_CLOSURE_INIT(&sp->orphan_fd_closure, dummy_cb, sp->emfd, + grpc_schedule_on_exec_ctx); + GPR_ASSERT(sp->orphan_cb); + sp->orphan_cb(exec_ctx, sp->emfd, &sp->orphan_fd_closure, + sp->server->user_data); + } + grpc_fd_orphan(exec_ctx, sp->emfd, &sp->destroyed_closure, NULL, + false /* already_closed */, "udp_listener_shutdown"); + } + gpr_mu_unlock(&s->mu); + } else { + gpr_mu_unlock(&s->mu); + finish_shutdown(exec_ctx, s); + } +} + +void grpc_udp_server_destroy(grpc_exec_ctx *exec_ctx, grpc_udp_server *s, + grpc_closure *on_done) { + grpc_udp_listener *sp; + gpr_mu_lock(&s->mu); + + GPR_ASSERT(!s->shutdown); + s->shutdown = 1; + + s->shutdown_complete = on_done; + + /* shutdown all fd's */ + if (s->active_ports) { + for (sp = s->head; sp; sp = sp->next) { + GPR_ASSERT(sp->orphan_cb); + struct shutdown_fd_args *args = + (struct shutdown_fd_args *)gpr_malloc(sizeof(*args)); + args->fd = sp->emfd; + args->server_mu = &s->mu; + GRPC_CLOSURE_INIT(&sp->orphan_fd_closure, shutdown_fd, args, + grpc_schedule_on_exec_ctx); + sp->orphan_cb(exec_ctx, sp->emfd, &sp->orphan_fd_closure, + sp->server->user_data); + sp->orphan_notified = true; + } + gpr_mu_unlock(&s->mu); + } else { + gpr_mu_unlock(&s->mu); + deactivated_all_ports(exec_ctx, s); + } +} + +static int bind_socket(grpc_socket_factory *socket_factory, int sockfd, + const grpc_resolved_address *addr) { + return (socket_factory != NULL) + ? grpc_socket_factory_bind(socket_factory, sockfd, addr) + : bind(sockfd, (struct sockaddr *)addr->addr, + (socklen_t)addr->len); +} + +/* Prepare a recently-created socket for listening. */ +static int prepare_socket(grpc_socket_factory *socket_factory, int fd, + const grpc_resolved_address *addr) { + grpc_resolved_address sockname_temp; + struct sockaddr *addr_ptr = (struct sockaddr *)addr->addr; + /* Set send/receive socket buffers to 1 MB */ + int buffer_size_bytes = 1024 * 1024; + + if (fd < 0) { + goto error; + } + + if (grpc_set_socket_nonblocking(fd, 1) != GRPC_ERROR_NONE) { + gpr_log(GPR_ERROR, "Unable to set nonblocking %d: %s", fd, strerror(errno)); + goto error; + } + if (grpc_set_socket_cloexec(fd, 1) != GRPC_ERROR_NONE) { + gpr_log(GPR_ERROR, "Unable to set cloexec %d: %s", fd, strerror(errno)); + goto error; + } + + if (grpc_set_socket_ip_pktinfo_if_possible(fd) != GRPC_ERROR_NONE) { + gpr_log(GPR_ERROR, "Unable to set ip_pktinfo."); + goto error; + } else if (addr_ptr->sa_family == AF_INET6) { + if (grpc_set_socket_ipv6_recvpktinfo_if_possible(fd) != GRPC_ERROR_NONE) { + gpr_log(GPR_ERROR, "Unable to set ipv6_recvpktinfo."); + goto error; + } + } + + GPR_ASSERT(addr->len < ~(socklen_t)0); + if (bind_socket(socket_factory, fd, addr) < 0) { + char *addr_str; + grpc_sockaddr_to_string(&addr_str, addr, 0); + gpr_log(GPR_ERROR, "bind addr=%s: %s", addr_str, strerror(errno)); + gpr_free(addr_str); + goto error; + } + + sockname_temp.len = sizeof(struct sockaddr_storage); + + if (getsockname(fd, (struct sockaddr *)sockname_temp.addr, + (socklen_t *)&sockname_temp.len) < 0) { + goto error; + } + + if (grpc_set_socket_sndbuf(fd, buffer_size_bytes) != GRPC_ERROR_NONE) { + gpr_log(GPR_ERROR, "Failed to set send buffer size to %d bytes", + buffer_size_bytes); + goto error; + } + + if (grpc_set_socket_rcvbuf(fd, buffer_size_bytes) != GRPC_ERROR_NONE) { + gpr_log(GPR_ERROR, "Failed to set receive buffer size to %d bytes", + buffer_size_bytes); + goto error; + } + + return grpc_sockaddr_get_port(&sockname_temp); + +error: + if (fd >= 0) { + close(fd); + } + return -1; +} + +/* event manager callback when reads are ready */ +static void on_read(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) { + grpc_udp_listener *sp = (grpc_udp_listener *)arg; + + gpr_mu_lock(&sp->server->mu); + if (error != GRPC_ERROR_NONE) { + if (0 == --sp->server->active_ports && sp->server->shutdown) { + gpr_mu_unlock(&sp->server->mu); + deactivated_all_ports(exec_ctx, sp->server); + } else { + gpr_mu_unlock(&sp->server->mu); + } + return; + } + + /* Tell the registered callback that data is available to read. */ + GPR_ASSERT(sp->read_cb); + sp->read_cb(exec_ctx, sp->emfd, sp->server->user_data); + + /* Re-arm the notification event so we get another chance to read. */ + grpc_fd_notify_on_read(exec_ctx, sp->emfd, &sp->read_closure); + gpr_mu_unlock(&sp->server->mu); +} + +static void on_write(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) { + grpc_udp_listener *sp = (grpc_udp_listener *)arg; + + gpr_mu_lock(&(sp->server->mu)); + if (error != GRPC_ERROR_NONE) { + if (0 == --sp->server->active_ports && sp->server->shutdown) { + gpr_mu_unlock(&sp->server->mu); + deactivated_all_ports(exec_ctx, sp->server); + } else { + gpr_mu_unlock(&sp->server->mu); + } + return; + } + + /* Tell the registered callback that the socket is writeable. */ + GPR_ASSERT(sp->write_cb); + sp->write_cb(exec_ctx, sp->emfd, sp->server->user_data); + + /* Re-arm the notification event so we get another chance to write. */ + grpc_fd_notify_on_write(exec_ctx, sp->emfd, &sp->write_closure); + gpr_mu_unlock(&sp->server->mu); +} + +static int add_socket_to_server(grpc_udp_server *s, int fd, + const grpc_resolved_address *addr, + grpc_udp_server_read_cb read_cb, + grpc_udp_server_write_cb write_cb, + grpc_udp_server_orphan_cb orphan_cb) { + grpc_udp_listener *sp; + int port; + char *addr_str; + char *name; + + port = prepare_socket(s->socket_factory, fd, addr); + if (port >= 0) { + grpc_sockaddr_to_string(&addr_str, addr, 1); + gpr_asprintf(&name, "udp-server-listener:%s", addr_str); + gpr_free(addr_str); + gpr_mu_lock(&s->mu); + s->nports++; + sp = (grpc_udp_listener *)gpr_malloc(sizeof(grpc_udp_listener)); + sp->next = NULL; + if (s->head == NULL) { + s->head = sp; + } else { + s->tail->next = sp; + } + s->tail = sp; + sp->server = s; + sp->fd = fd; + sp->emfd = grpc_fd_create(fd, name); + memcpy(&sp->addr, addr, sizeof(grpc_resolved_address)); + sp->read_cb = read_cb; + sp->write_cb = write_cb; + sp->orphan_cb = orphan_cb; + sp->orphan_notified = false; + GPR_ASSERT(sp->emfd); + gpr_mu_unlock(&s->mu); + gpr_free(name); + } + + return port; +} + +int grpc_udp_server_add_port(grpc_udp_server *s, + const grpc_resolved_address *addr, + grpc_udp_server_read_cb read_cb, + grpc_udp_server_write_cb write_cb, + grpc_udp_server_orphan_cb orphan_cb) { + grpc_udp_listener *sp; + int allocated_port1 = -1; + int allocated_port2 = -1; + int fd; + grpc_dualstack_mode dsmode; + grpc_resolved_address addr6_v4mapped; + grpc_resolved_address wild4; + grpc_resolved_address wild6; + grpc_resolved_address addr4_copy; + grpc_resolved_address *allocated_addr = NULL; + grpc_resolved_address sockname_temp; + int port; + + /* Check if this is a wildcard port, and if so, try to keep the port the same + as some previously created listener. */ + if (grpc_sockaddr_get_port(addr) == 0) { + for (sp = s->head; sp; sp = sp->next) { + sockname_temp.len = sizeof(struct sockaddr_storage); + if (0 == getsockname(sp->fd, (struct sockaddr *)sockname_temp.addr, + (socklen_t *)&sockname_temp.len)) { + port = grpc_sockaddr_get_port(&sockname_temp); + if (port > 0) { + allocated_addr = (grpc_resolved_address *)gpr_malloc( + sizeof(grpc_resolved_address)); + memcpy(allocated_addr, addr, sizeof(grpc_resolved_address)); + grpc_sockaddr_set_port(allocated_addr, port); + addr = allocated_addr; + break; + } + } + } + } + + if (grpc_sockaddr_to_v4mapped(addr, &addr6_v4mapped)) { + addr = &addr6_v4mapped; + } + + /* Treat :: or 0.0.0.0 as a family-agnostic wildcard. */ + if (grpc_sockaddr_is_wildcard(addr, &port)) { + grpc_sockaddr_make_wildcards(port, &wild4, &wild6); + + /* Try listening on IPv6 first. */ + addr = &wild6; + // TODO(rjshade): Test and propagate the returned grpc_error*: + GRPC_ERROR_UNREF(grpc_create_dualstack_socket_using_factory( + s->socket_factory, addr, SOCK_DGRAM, IPPROTO_UDP, &dsmode, &fd)); + allocated_port1 = + add_socket_to_server(s, fd, addr, read_cb, write_cb, orphan_cb); + if (fd >= 0 && dsmode == GRPC_DSMODE_DUALSTACK) { + goto done; + } + + /* If we didn't get a dualstack socket, also listen on 0.0.0.0. */ + if (port == 0 && allocated_port1 > 0) { + grpc_sockaddr_set_port(&wild4, allocated_port1); + } + addr = &wild4; + } + + // TODO(rjshade): Test and propagate the returned grpc_error*: + GRPC_ERROR_UNREF(grpc_create_dualstack_socket_using_factory( + s->socket_factory, addr, SOCK_DGRAM, IPPROTO_UDP, &dsmode, &fd)); + if (fd < 0) { + gpr_log(GPR_ERROR, "Unable to create socket: %s", strerror(errno)); + } + if (dsmode == GRPC_DSMODE_IPV4 && + grpc_sockaddr_is_v4mapped(addr, &addr4_copy)) { + addr = &addr4_copy; + } + allocated_port2 = + add_socket_to_server(s, fd, addr, read_cb, write_cb, orphan_cb); + +done: + gpr_free(allocated_addr); + return allocated_port1 >= 0 ? allocated_port1 : allocated_port2; +} + +int grpc_udp_server_get_fd(grpc_udp_server *s, unsigned port_index) { + grpc_udp_listener *sp; + if (port_index >= s->nports) { + return -1; + } + + for (sp = s->head; sp && port_index != 0; sp = sp->next) { + --port_index; + } + return sp->fd; +} + +void grpc_udp_server_start(grpc_exec_ctx *exec_ctx, grpc_udp_server *s, + grpc_pollset **pollsets, size_t pollset_count, + void *user_data) { + size_t i; + gpr_mu_lock(&s->mu); + grpc_udp_listener *sp; + GPR_ASSERT(s->active_ports == 0); + s->pollsets = pollsets; + s->user_data = user_data; + + sp = s->head; + while (sp != NULL) { + for (i = 0; i < pollset_count; i++) { + grpc_pollset_add_fd(exec_ctx, pollsets[i], sp->emfd); + } + GRPC_CLOSURE_INIT(&sp->read_closure, on_read, sp, + grpc_schedule_on_exec_ctx); + grpc_fd_notify_on_read(exec_ctx, sp->emfd, &sp->read_closure); + + GRPC_CLOSURE_INIT(&sp->write_closure, on_write, sp, + grpc_schedule_on_exec_ctx); + grpc_fd_notify_on_write(exec_ctx, sp->emfd, &sp->write_closure); + + /* Registered for both read and write callbacks: increment active_ports + * twice to account for this, and delay free-ing of memory until both + * on_read and on_write have fired. */ + s->active_ports += 2; + + sp = sp->next; + } + + gpr_mu_unlock(&s->mu); +} + +#endif diff --git a/src/core/lib/iomgr/unix_sockets_posix.c b/src/core/lib/iomgr/unix_sockets_posix.c deleted file mode 100644 index 35f898f13a..0000000000 --- a/src/core/lib/iomgr/unix_sockets_posix.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * - * Copyright 2016 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_HAVE_UNIX_SOCKET - -#include "src/core/lib/iomgr/sockaddr.h" - -#include -#include -#include -#include - -#include "src/core/lib/iomgr/unix_sockets_posix.h" - -#include -#include -#include - -void grpc_create_socketpair_if_unix(int sv[2]) { - GPR_ASSERT(socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == 0); -} - -grpc_error *grpc_resolve_unix_domain_address(const char *name, - grpc_resolved_addresses **addrs) { - struct sockaddr_un *un; - if (strlen(name) > GPR_ARRAY_SIZE(((struct sockaddr_un *)0)->sun_path) - 1) { - char *err_msg; - grpc_error *err; - gpr_asprintf(&err_msg, - "Path name should not have more than %" PRIuPTR " characters.", - GPR_ARRAY_SIZE(un->sun_path) - 1); - err = GRPC_ERROR_CREATE_FROM_COPIED_STRING(err_msg); - gpr_free(err_msg); - return err; - } - *addrs = - (grpc_resolved_addresses *)gpr_malloc(sizeof(grpc_resolved_addresses)); - (*addrs)->naddrs = 1; - (*addrs)->addrs = - (grpc_resolved_address *)gpr_malloc(sizeof(grpc_resolved_address)); - un = (struct sockaddr_un *)(*addrs)->addrs->addr; - un->sun_family = AF_UNIX; - strcpy(un->sun_path, name); - (*addrs)->addrs->len = strlen(un->sun_path) + sizeof(un->sun_family) + 1; - return GRPC_ERROR_NONE; -} - -int grpc_is_unix_socket(const grpc_resolved_address *resolved_addr) { - const struct sockaddr *addr = (const struct sockaddr *)resolved_addr->addr; - return addr->sa_family == AF_UNIX; -} - -void grpc_unlink_if_unix_domain_socket( - const grpc_resolved_address *resolved_addr) { - const struct sockaddr *addr = (const struct sockaddr *)resolved_addr->addr; - if (addr->sa_family != AF_UNIX) { - return; - } - struct sockaddr_un *un = (struct sockaddr_un *)resolved_addr->addr; - struct stat st; - - if (stat(un->sun_path, &st) == 0 && (st.st_mode & S_IFMT) == S_IFSOCK) { - unlink(un->sun_path); - } -} - -char *grpc_sockaddr_to_uri_unix_if_possible( - const grpc_resolved_address *resolved_addr) { - const struct sockaddr *addr = (const struct sockaddr *)resolved_addr->addr; - if (addr->sa_family != AF_UNIX) { - return NULL; - } - - char *result; - gpr_asprintf(&result, "unix:%s", ((struct sockaddr_un *)addr)->sun_path); - return result; -} - -#endif diff --git a/src/core/lib/iomgr/unix_sockets_posix.cc b/src/core/lib/iomgr/unix_sockets_posix.cc new file mode 100644 index 0000000000..35f898f13a --- /dev/null +++ b/src/core/lib/iomgr/unix_sockets_posix.cc @@ -0,0 +1,95 @@ +/* + * + * Copyright 2016 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_HAVE_UNIX_SOCKET + +#include "src/core/lib/iomgr/sockaddr.h" + +#include +#include +#include +#include + +#include "src/core/lib/iomgr/unix_sockets_posix.h" + +#include +#include +#include + +void grpc_create_socketpair_if_unix(int sv[2]) { + GPR_ASSERT(socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == 0); +} + +grpc_error *grpc_resolve_unix_domain_address(const char *name, + grpc_resolved_addresses **addrs) { + struct sockaddr_un *un; + if (strlen(name) > GPR_ARRAY_SIZE(((struct sockaddr_un *)0)->sun_path) - 1) { + char *err_msg; + grpc_error *err; + gpr_asprintf(&err_msg, + "Path name should not have more than %" PRIuPTR " characters.", + GPR_ARRAY_SIZE(un->sun_path) - 1); + err = GRPC_ERROR_CREATE_FROM_COPIED_STRING(err_msg); + gpr_free(err_msg); + return err; + } + *addrs = + (grpc_resolved_addresses *)gpr_malloc(sizeof(grpc_resolved_addresses)); + (*addrs)->naddrs = 1; + (*addrs)->addrs = + (grpc_resolved_address *)gpr_malloc(sizeof(grpc_resolved_address)); + un = (struct sockaddr_un *)(*addrs)->addrs->addr; + un->sun_family = AF_UNIX; + strcpy(un->sun_path, name); + (*addrs)->addrs->len = strlen(un->sun_path) + sizeof(un->sun_family) + 1; + return GRPC_ERROR_NONE; +} + +int grpc_is_unix_socket(const grpc_resolved_address *resolved_addr) { + const struct sockaddr *addr = (const struct sockaddr *)resolved_addr->addr; + return addr->sa_family == AF_UNIX; +} + +void grpc_unlink_if_unix_domain_socket( + const grpc_resolved_address *resolved_addr) { + const struct sockaddr *addr = (const struct sockaddr *)resolved_addr->addr; + if (addr->sa_family != AF_UNIX) { + return; + } + struct sockaddr_un *un = (struct sockaddr_un *)resolved_addr->addr; + struct stat st; + + if (stat(un->sun_path, &st) == 0 && (st.st_mode & S_IFMT) == S_IFSOCK) { + unlink(un->sun_path); + } +} + +char *grpc_sockaddr_to_uri_unix_if_possible( + const grpc_resolved_address *resolved_addr) { + const struct sockaddr *addr = (const struct sockaddr *)resolved_addr->addr; + if (addr->sa_family != AF_UNIX) { + return NULL; + } + + char *result; + gpr_asprintf(&result, "unix:%s", ((struct sockaddr_un *)addr)->sun_path); + return result; +} + +#endif diff --git a/src/core/lib/iomgr/unix_sockets_posix_noop.c b/src/core/lib/iomgr/unix_sockets_posix_noop.c deleted file mode 100644 index e46b1c003d..0000000000 --- a/src/core/lib/iomgr/unix_sockets_posix_noop.c +++ /dev/null @@ -1,47 +0,0 @@ -/* - * - * Copyright 2016 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/unix_sockets_posix.h" - -#ifndef GRPC_HAVE_UNIX_SOCKET - -#include - -void grpc_create_socketpair_if_unix(int sv[2]) { - // TODO: Either implement this for the non-Unix socket case or make - // sure that it is never called in any such case. Until then, leave an - // assertion to notify if this gets called inadvertently - GPR_ASSERT(0); -} - -grpc_error *grpc_resolve_unix_domain_address( - const char *name, grpc_resolved_addresses **addresses) { - *addresses = NULL; - return GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "Unix domain sockets are not supported on Windows"); -} - -int grpc_is_unix_socket(const grpc_resolved_address *addr) { return false; } - -void grpc_unlink_if_unix_domain_socket(const grpc_resolved_address *addr) {} - -char *grpc_sockaddr_to_uri_unix_if_possible(const grpc_resolved_address *addr) { - return NULL; -} - -#endif diff --git a/src/core/lib/iomgr/unix_sockets_posix_noop.cc b/src/core/lib/iomgr/unix_sockets_posix_noop.cc new file mode 100644 index 0000000000..e46b1c003d --- /dev/null +++ b/src/core/lib/iomgr/unix_sockets_posix_noop.cc @@ -0,0 +1,47 @@ +/* + * + * Copyright 2016 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/unix_sockets_posix.h" + +#ifndef GRPC_HAVE_UNIX_SOCKET + +#include + +void grpc_create_socketpair_if_unix(int sv[2]) { + // TODO: Either implement this for the non-Unix socket case or make + // sure that it is never called in any such case. Until then, leave an + // assertion to notify if this gets called inadvertently + GPR_ASSERT(0); +} + +grpc_error *grpc_resolve_unix_domain_address( + const char *name, grpc_resolved_addresses **addresses) { + *addresses = NULL; + return GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "Unix domain sockets are not supported on Windows"); +} + +int grpc_is_unix_socket(const grpc_resolved_address *addr) { return false; } + +void grpc_unlink_if_unix_domain_socket(const grpc_resolved_address *addr) {} + +char *grpc_sockaddr_to_uri_unix_if_possible(const grpc_resolved_address *addr) { + return NULL; +} + +#endif diff --git a/src/core/lib/iomgr/wakeup_fd_cv.c b/src/core/lib/iomgr/wakeup_fd_cv.c deleted file mode 100644 index 268e0175dd..0000000000 --- a/src/core/lib/iomgr/wakeup_fd_cv.c +++ /dev/null @@ -1,104 +0,0 @@ -/* - * - * Copyright 2016 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_POSIX_WAKEUP_FD - -#include "src/core/lib/iomgr/wakeup_fd_cv.h" - -#include -#include - -#include -#include -#include -#include -#include -#include - -#define MAX_TABLE_RESIZE 256 - -extern cv_fd_table g_cvfds; - -static grpc_error* cv_fd_init(grpc_wakeup_fd* fd_info) { - unsigned int i, newsize; - int idx; - gpr_mu_lock(&g_cvfds.mu); - if (!g_cvfds.free_fds) { - newsize = GPR_MIN(g_cvfds.size * 2, g_cvfds.size + MAX_TABLE_RESIZE); - g_cvfds.cvfds = - (fd_node*)gpr_realloc(g_cvfds.cvfds, sizeof(fd_node) * newsize); - for (i = g_cvfds.size; i < newsize; i++) { - g_cvfds.cvfds[i].is_set = 0; - g_cvfds.cvfds[i].cvs = NULL; - g_cvfds.cvfds[i].next_free = g_cvfds.free_fds; - g_cvfds.free_fds = &g_cvfds.cvfds[i]; - } - g_cvfds.size = newsize; - } - - idx = (int)(g_cvfds.free_fds - g_cvfds.cvfds); - g_cvfds.free_fds = g_cvfds.free_fds->next_free; - g_cvfds.cvfds[idx].cvs = NULL; - g_cvfds.cvfds[idx].is_set = 0; - fd_info->read_fd = GRPC_IDX_TO_FD(idx); - fd_info->write_fd = -1; - gpr_mu_unlock(&g_cvfds.mu); - return GRPC_ERROR_NONE; -} - -static grpc_error* cv_fd_wakeup(grpc_wakeup_fd* fd_info) { - cv_node* cvn; - gpr_mu_lock(&g_cvfds.mu); - g_cvfds.cvfds[GRPC_FD_TO_IDX(fd_info->read_fd)].is_set = 1; - cvn = g_cvfds.cvfds[GRPC_FD_TO_IDX(fd_info->read_fd)].cvs; - while (cvn) { - gpr_cv_signal(cvn->cv); - cvn = cvn->next; - } - gpr_mu_unlock(&g_cvfds.mu); - return GRPC_ERROR_NONE; -} - -static grpc_error* cv_fd_consume(grpc_wakeup_fd* fd_info) { - gpr_mu_lock(&g_cvfds.mu); - g_cvfds.cvfds[GRPC_FD_TO_IDX(fd_info->read_fd)].is_set = 0; - gpr_mu_unlock(&g_cvfds.mu); - return GRPC_ERROR_NONE; -} - -static void cv_fd_destroy(grpc_wakeup_fd* fd_info) { - if (fd_info->read_fd == 0) { - return; - } - gpr_mu_lock(&g_cvfds.mu); - // Assert that there are no active pollers - GPR_ASSERT(!g_cvfds.cvfds[GRPC_FD_TO_IDX(fd_info->read_fd)].cvs); - g_cvfds.cvfds[GRPC_FD_TO_IDX(fd_info->read_fd)].next_free = g_cvfds.free_fds; - g_cvfds.free_fds = &g_cvfds.cvfds[GRPC_FD_TO_IDX(fd_info->read_fd)]; - gpr_mu_unlock(&g_cvfds.mu); -} - -static int cv_check_availability(void) { return 1; } - -const grpc_wakeup_fd_vtable grpc_cv_wakeup_fd_vtable = { - cv_fd_init, cv_fd_consume, cv_fd_wakeup, cv_fd_destroy, - cv_check_availability}; - -#endif /* GRPC_POSIX_WAKUP_FD */ diff --git a/src/core/lib/iomgr/wakeup_fd_cv.cc b/src/core/lib/iomgr/wakeup_fd_cv.cc new file mode 100644 index 0000000000..268e0175dd --- /dev/null +++ b/src/core/lib/iomgr/wakeup_fd_cv.cc @@ -0,0 +1,104 @@ +/* + * + * Copyright 2016 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_POSIX_WAKEUP_FD + +#include "src/core/lib/iomgr/wakeup_fd_cv.h" + +#include +#include + +#include +#include +#include +#include +#include +#include + +#define MAX_TABLE_RESIZE 256 + +extern cv_fd_table g_cvfds; + +static grpc_error* cv_fd_init(grpc_wakeup_fd* fd_info) { + unsigned int i, newsize; + int idx; + gpr_mu_lock(&g_cvfds.mu); + if (!g_cvfds.free_fds) { + newsize = GPR_MIN(g_cvfds.size * 2, g_cvfds.size + MAX_TABLE_RESIZE); + g_cvfds.cvfds = + (fd_node*)gpr_realloc(g_cvfds.cvfds, sizeof(fd_node) * newsize); + for (i = g_cvfds.size; i < newsize; i++) { + g_cvfds.cvfds[i].is_set = 0; + g_cvfds.cvfds[i].cvs = NULL; + g_cvfds.cvfds[i].next_free = g_cvfds.free_fds; + g_cvfds.free_fds = &g_cvfds.cvfds[i]; + } + g_cvfds.size = newsize; + } + + idx = (int)(g_cvfds.free_fds - g_cvfds.cvfds); + g_cvfds.free_fds = g_cvfds.free_fds->next_free; + g_cvfds.cvfds[idx].cvs = NULL; + g_cvfds.cvfds[idx].is_set = 0; + fd_info->read_fd = GRPC_IDX_TO_FD(idx); + fd_info->write_fd = -1; + gpr_mu_unlock(&g_cvfds.mu); + return GRPC_ERROR_NONE; +} + +static grpc_error* cv_fd_wakeup(grpc_wakeup_fd* fd_info) { + cv_node* cvn; + gpr_mu_lock(&g_cvfds.mu); + g_cvfds.cvfds[GRPC_FD_TO_IDX(fd_info->read_fd)].is_set = 1; + cvn = g_cvfds.cvfds[GRPC_FD_TO_IDX(fd_info->read_fd)].cvs; + while (cvn) { + gpr_cv_signal(cvn->cv); + cvn = cvn->next; + } + gpr_mu_unlock(&g_cvfds.mu); + return GRPC_ERROR_NONE; +} + +static grpc_error* cv_fd_consume(grpc_wakeup_fd* fd_info) { + gpr_mu_lock(&g_cvfds.mu); + g_cvfds.cvfds[GRPC_FD_TO_IDX(fd_info->read_fd)].is_set = 0; + gpr_mu_unlock(&g_cvfds.mu); + return GRPC_ERROR_NONE; +} + +static void cv_fd_destroy(grpc_wakeup_fd* fd_info) { + if (fd_info->read_fd == 0) { + return; + } + gpr_mu_lock(&g_cvfds.mu); + // Assert that there are no active pollers + GPR_ASSERT(!g_cvfds.cvfds[GRPC_FD_TO_IDX(fd_info->read_fd)].cvs); + g_cvfds.cvfds[GRPC_FD_TO_IDX(fd_info->read_fd)].next_free = g_cvfds.free_fds; + g_cvfds.free_fds = &g_cvfds.cvfds[GRPC_FD_TO_IDX(fd_info->read_fd)]; + gpr_mu_unlock(&g_cvfds.mu); +} + +static int cv_check_availability(void) { return 1; } + +const grpc_wakeup_fd_vtable grpc_cv_wakeup_fd_vtable = { + cv_fd_init, cv_fd_consume, cv_fd_wakeup, cv_fd_destroy, + cv_check_availability}; + +#endif /* GRPC_POSIX_WAKUP_FD */ diff --git a/src/core/lib/iomgr/wakeup_fd_eventfd.c b/src/core/lib/iomgr/wakeup_fd_eventfd.c deleted file mode 100644 index 81cb7ee280..0000000000 --- a/src/core/lib/iomgr/wakeup_fd_eventfd.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_LINUX_EVENTFD - -#include -#include -#include - -#include - -#include "src/core/lib/iomgr/wakeup_fd_posix.h" -#include "src/core/lib/profiling/timers.h" - -static grpc_error* eventfd_create(grpc_wakeup_fd* fd_info) { - int efd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); - if (efd < 0) { - return GRPC_OS_ERROR(errno, "eventfd"); - } - fd_info->read_fd = efd; - fd_info->write_fd = -1; - return GRPC_ERROR_NONE; -} - -static grpc_error* eventfd_consume(grpc_wakeup_fd* fd_info) { - eventfd_t value; - int err; - do { - err = eventfd_read(fd_info->read_fd, &value); - } while (err < 0 && errno == EINTR); - if (err < 0 && errno != EAGAIN) { - return GRPC_OS_ERROR(errno, "eventfd_read"); - } - return GRPC_ERROR_NONE; -} - -static grpc_error* eventfd_wakeup(grpc_wakeup_fd* fd_info) { - int err; - GPR_TIMER_BEGIN("eventfd_wakeup", 0); - do { - err = eventfd_write(fd_info->read_fd, 1); - } while (err < 0 && errno == EINTR); - if (err < 0) { - return GRPC_OS_ERROR(errno, "eventfd_write"); - } - GPR_TIMER_END("eventfd_wakeup", 0); - return GRPC_ERROR_NONE; -} - -static void eventfd_destroy(grpc_wakeup_fd* fd_info) { - if (fd_info->read_fd != 0) close(fd_info->read_fd); -} - -static int eventfd_check_availability(void) { - const int efd = eventfd(0, 0); - const int is_available = efd >= 0; - if (is_available) close(efd); - return is_available; -} - -const grpc_wakeup_fd_vtable grpc_specialized_wakeup_fd_vtable = { - eventfd_create, eventfd_consume, eventfd_wakeup, eventfd_destroy, - eventfd_check_availability}; - -#endif /* GRPC_LINUX_EVENTFD */ diff --git a/src/core/lib/iomgr/wakeup_fd_eventfd.cc b/src/core/lib/iomgr/wakeup_fd_eventfd.cc new file mode 100644 index 0000000000..81cb7ee280 --- /dev/null +++ b/src/core/lib/iomgr/wakeup_fd_eventfd.cc @@ -0,0 +1,82 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_LINUX_EVENTFD + +#include +#include +#include + +#include + +#include "src/core/lib/iomgr/wakeup_fd_posix.h" +#include "src/core/lib/profiling/timers.h" + +static grpc_error* eventfd_create(grpc_wakeup_fd* fd_info) { + int efd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (efd < 0) { + return GRPC_OS_ERROR(errno, "eventfd"); + } + fd_info->read_fd = efd; + fd_info->write_fd = -1; + return GRPC_ERROR_NONE; +} + +static grpc_error* eventfd_consume(grpc_wakeup_fd* fd_info) { + eventfd_t value; + int err; + do { + err = eventfd_read(fd_info->read_fd, &value); + } while (err < 0 && errno == EINTR); + if (err < 0 && errno != EAGAIN) { + return GRPC_OS_ERROR(errno, "eventfd_read"); + } + return GRPC_ERROR_NONE; +} + +static grpc_error* eventfd_wakeup(grpc_wakeup_fd* fd_info) { + int err; + GPR_TIMER_BEGIN("eventfd_wakeup", 0); + do { + err = eventfd_write(fd_info->read_fd, 1); + } while (err < 0 && errno == EINTR); + if (err < 0) { + return GRPC_OS_ERROR(errno, "eventfd_write"); + } + GPR_TIMER_END("eventfd_wakeup", 0); + return GRPC_ERROR_NONE; +} + +static void eventfd_destroy(grpc_wakeup_fd* fd_info) { + if (fd_info->read_fd != 0) close(fd_info->read_fd); +} + +static int eventfd_check_availability(void) { + const int efd = eventfd(0, 0); + const int is_available = efd >= 0; + if (is_available) close(efd); + return is_available; +} + +const grpc_wakeup_fd_vtable grpc_specialized_wakeup_fd_vtable = { + eventfd_create, eventfd_consume, eventfd_wakeup, eventfd_destroy, + eventfd_check_availability}; + +#endif /* GRPC_LINUX_EVENTFD */ diff --git a/src/core/lib/iomgr/wakeup_fd_nospecial.c b/src/core/lib/iomgr/wakeup_fd_nospecial.c deleted file mode 100644 index 4c20b8c1b7..0000000000 --- a/src/core/lib/iomgr/wakeup_fd_nospecial.c +++ /dev/null @@ -1,36 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -/* - * This is a dummy file to provide an invalid specialized_wakeup_fd_vtable on - * systems without anything better than pipe. - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_POSIX_NO_SPECIAL_WAKEUP_FD - -#include -#include "src/core/lib/iomgr/wakeup_fd_posix.h" - -static int check_availability_invalid(void) { return 0; } - -const grpc_wakeup_fd_vtable grpc_specialized_wakeup_fd_vtable = { - NULL, NULL, NULL, NULL, check_availability_invalid}; - -#endif /* GRPC_POSIX_NO_SPECIAL_WAKEUP_FD */ diff --git a/src/core/lib/iomgr/wakeup_fd_nospecial.cc b/src/core/lib/iomgr/wakeup_fd_nospecial.cc new file mode 100644 index 0000000000..4c20b8c1b7 --- /dev/null +++ b/src/core/lib/iomgr/wakeup_fd_nospecial.cc @@ -0,0 +1,36 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +/* + * This is a dummy file to provide an invalid specialized_wakeup_fd_vtable on + * systems without anything better than pipe. + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_POSIX_NO_SPECIAL_WAKEUP_FD + +#include +#include "src/core/lib/iomgr/wakeup_fd_posix.h" + +static int check_availability_invalid(void) { return 0; } + +const grpc_wakeup_fd_vtable grpc_specialized_wakeup_fd_vtable = { + NULL, NULL, NULL, NULL, check_availability_invalid}; + +#endif /* GRPC_POSIX_NO_SPECIAL_WAKEUP_FD */ diff --git a/src/core/lib/iomgr/wakeup_fd_pipe.c b/src/core/lib/iomgr/wakeup_fd_pipe.c deleted file mode 100644 index 05d69dc9cc..0000000000 --- a/src/core/lib/iomgr/wakeup_fd_pipe.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_POSIX_WAKEUP_FD - -#include "src/core/lib/iomgr/wakeup_fd_pipe.h" -#include "src/core/lib/iomgr/wakeup_fd_posix.h" - -#include -#include -#include - -#include - -#include "src/core/lib/iomgr/socket_utils_posix.h" - -static grpc_error* pipe_init(grpc_wakeup_fd* fd_info) { - int pipefd[2]; - int r = pipe(pipefd); - if (0 != r) { - gpr_log(GPR_ERROR, "pipe creation failed (%d): %s", errno, strerror(errno)); - return GRPC_OS_ERROR(errno, "pipe"); - } - grpc_error* err; - err = grpc_set_socket_nonblocking(pipefd[0], 1); - if (err != GRPC_ERROR_NONE) return err; - err = grpc_set_socket_nonblocking(pipefd[1], 1); - if (err != GRPC_ERROR_NONE) return err; - fd_info->read_fd = pipefd[0]; - fd_info->write_fd = pipefd[1]; - return GRPC_ERROR_NONE; -} - -static grpc_error* pipe_consume(grpc_wakeup_fd* fd_info) { - char buf[128]; - ssize_t r; - - for (;;) { - r = read(fd_info->read_fd, buf, sizeof(buf)); - if (r > 0) continue; - if (r == 0) return GRPC_ERROR_NONE; - switch (errno) { - case EAGAIN: - return GRPC_ERROR_NONE; - case EINTR: - continue; - default: - return GRPC_OS_ERROR(errno, "read"); - } - } -} - -static grpc_error* pipe_wakeup(grpc_wakeup_fd* fd_info) { - char c = 0; - while (write(fd_info->write_fd, &c, 1) != 1 && errno == EINTR) - ; - return GRPC_ERROR_NONE; -} - -static void pipe_destroy(grpc_wakeup_fd* fd_info) { - if (fd_info->read_fd != 0) close(fd_info->read_fd); - if (fd_info->write_fd != 0) close(fd_info->write_fd); -} - -static int pipe_check_availability(void) { - grpc_wakeup_fd fd; - fd.read_fd = fd.write_fd = -1; - - if (pipe_init(&fd) == GRPC_ERROR_NONE) { - pipe_destroy(&fd); - return 1; - } else { - return 0; - } -} - -const grpc_wakeup_fd_vtable grpc_pipe_wakeup_fd_vtable = { - pipe_init, pipe_consume, pipe_wakeup, pipe_destroy, - pipe_check_availability}; - -#endif /* GPR_POSIX_WAKUP_FD */ diff --git a/src/core/lib/iomgr/wakeup_fd_pipe.cc b/src/core/lib/iomgr/wakeup_fd_pipe.cc new file mode 100644 index 0000000000..05d69dc9cc --- /dev/null +++ b/src/core/lib/iomgr/wakeup_fd_pipe.cc @@ -0,0 +1,98 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_POSIX_WAKEUP_FD + +#include "src/core/lib/iomgr/wakeup_fd_pipe.h" +#include "src/core/lib/iomgr/wakeup_fd_posix.h" + +#include +#include +#include + +#include + +#include "src/core/lib/iomgr/socket_utils_posix.h" + +static grpc_error* pipe_init(grpc_wakeup_fd* fd_info) { + int pipefd[2]; + int r = pipe(pipefd); + if (0 != r) { + gpr_log(GPR_ERROR, "pipe creation failed (%d): %s", errno, strerror(errno)); + return GRPC_OS_ERROR(errno, "pipe"); + } + grpc_error* err; + err = grpc_set_socket_nonblocking(pipefd[0], 1); + if (err != GRPC_ERROR_NONE) return err; + err = grpc_set_socket_nonblocking(pipefd[1], 1); + if (err != GRPC_ERROR_NONE) return err; + fd_info->read_fd = pipefd[0]; + fd_info->write_fd = pipefd[1]; + return GRPC_ERROR_NONE; +} + +static grpc_error* pipe_consume(grpc_wakeup_fd* fd_info) { + char buf[128]; + ssize_t r; + + for (;;) { + r = read(fd_info->read_fd, buf, sizeof(buf)); + if (r > 0) continue; + if (r == 0) return GRPC_ERROR_NONE; + switch (errno) { + case EAGAIN: + return GRPC_ERROR_NONE; + case EINTR: + continue; + default: + return GRPC_OS_ERROR(errno, "read"); + } + } +} + +static grpc_error* pipe_wakeup(grpc_wakeup_fd* fd_info) { + char c = 0; + while (write(fd_info->write_fd, &c, 1) != 1 && errno == EINTR) + ; + return GRPC_ERROR_NONE; +} + +static void pipe_destroy(grpc_wakeup_fd* fd_info) { + if (fd_info->read_fd != 0) close(fd_info->read_fd); + if (fd_info->write_fd != 0) close(fd_info->write_fd); +} + +static int pipe_check_availability(void) { + grpc_wakeup_fd fd; + fd.read_fd = fd.write_fd = -1; + + if (pipe_init(&fd) == GRPC_ERROR_NONE) { + pipe_destroy(&fd); + return 1; + } else { + return 0; + } +} + +const grpc_wakeup_fd_vtable grpc_pipe_wakeup_fd_vtable = { + pipe_init, pipe_consume, pipe_wakeup, pipe_destroy, + pipe_check_availability}; + +#endif /* GPR_POSIX_WAKUP_FD */ diff --git a/src/core/lib/iomgr/wakeup_fd_posix.c b/src/core/lib/iomgr/wakeup_fd_posix.c deleted file mode 100644 index 9af96d314b..0000000000 --- a/src/core/lib/iomgr/wakeup_fd_posix.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "src/core/lib/iomgr/port.h" - -#ifdef GRPC_POSIX_WAKEUP_FD - -#include -#include "src/core/lib/iomgr/wakeup_fd_cv.h" -#include "src/core/lib/iomgr/wakeup_fd_pipe.h" -#include "src/core/lib/iomgr/wakeup_fd_posix.h" - -static const grpc_wakeup_fd_vtable *wakeup_fd_vtable = NULL; - -int grpc_allow_specialized_wakeup_fd = 1; -int grpc_allow_pipe_wakeup_fd = 1; - -int has_real_wakeup_fd = 1; -int cv_wakeup_fds_enabled = 0; - -void grpc_wakeup_fd_global_init(void) { - if (grpc_allow_specialized_wakeup_fd && - grpc_specialized_wakeup_fd_vtable.check_availability()) { - wakeup_fd_vtable = &grpc_specialized_wakeup_fd_vtable; - } else if (grpc_allow_pipe_wakeup_fd && - grpc_pipe_wakeup_fd_vtable.check_availability()) { - wakeup_fd_vtable = &grpc_pipe_wakeup_fd_vtable; - } else { - has_real_wakeup_fd = 0; - } -} - -void grpc_wakeup_fd_global_destroy(void) { wakeup_fd_vtable = NULL; } - -int grpc_has_wakeup_fd(void) { return has_real_wakeup_fd; } - -int grpc_cv_wakeup_fds_enabled(void) { return cv_wakeup_fds_enabled; } - -void grpc_enable_cv_wakeup_fds(int enable) { cv_wakeup_fds_enabled = enable; } - -grpc_error *grpc_wakeup_fd_init(grpc_wakeup_fd *fd_info) { - if (cv_wakeup_fds_enabled) { - return grpc_cv_wakeup_fd_vtable.init(fd_info); - } - return wakeup_fd_vtable->init(fd_info); -} - -grpc_error *grpc_wakeup_fd_consume_wakeup(grpc_wakeup_fd *fd_info) { - if (cv_wakeup_fds_enabled) { - return grpc_cv_wakeup_fd_vtable.consume(fd_info); - } - return wakeup_fd_vtable->consume(fd_info); -} - -grpc_error *grpc_wakeup_fd_wakeup(grpc_wakeup_fd *fd_info) { - if (cv_wakeup_fds_enabled) { - return grpc_cv_wakeup_fd_vtable.wakeup(fd_info); - } - return wakeup_fd_vtable->wakeup(fd_info); -} - -void grpc_wakeup_fd_destroy(grpc_wakeup_fd *fd_info) { - if (cv_wakeup_fds_enabled) { - grpc_cv_wakeup_fd_vtable.destroy(fd_info); - } else { - wakeup_fd_vtable->destroy(fd_info); - } -} - -#endif /* GRPC_POSIX_WAKEUP_FD */ diff --git a/src/core/lib/iomgr/wakeup_fd_posix.cc b/src/core/lib/iomgr/wakeup_fd_posix.cc new file mode 100644 index 0000000000..9af96d314b --- /dev/null +++ b/src/core/lib/iomgr/wakeup_fd_posix.cc @@ -0,0 +1,85 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_POSIX_WAKEUP_FD + +#include +#include "src/core/lib/iomgr/wakeup_fd_cv.h" +#include "src/core/lib/iomgr/wakeup_fd_pipe.h" +#include "src/core/lib/iomgr/wakeup_fd_posix.h" + +static const grpc_wakeup_fd_vtable *wakeup_fd_vtable = NULL; + +int grpc_allow_specialized_wakeup_fd = 1; +int grpc_allow_pipe_wakeup_fd = 1; + +int has_real_wakeup_fd = 1; +int cv_wakeup_fds_enabled = 0; + +void grpc_wakeup_fd_global_init(void) { + if (grpc_allow_specialized_wakeup_fd && + grpc_specialized_wakeup_fd_vtable.check_availability()) { + wakeup_fd_vtable = &grpc_specialized_wakeup_fd_vtable; + } else if (grpc_allow_pipe_wakeup_fd && + grpc_pipe_wakeup_fd_vtable.check_availability()) { + wakeup_fd_vtable = &grpc_pipe_wakeup_fd_vtable; + } else { + has_real_wakeup_fd = 0; + } +} + +void grpc_wakeup_fd_global_destroy(void) { wakeup_fd_vtable = NULL; } + +int grpc_has_wakeup_fd(void) { return has_real_wakeup_fd; } + +int grpc_cv_wakeup_fds_enabled(void) { return cv_wakeup_fds_enabled; } + +void grpc_enable_cv_wakeup_fds(int enable) { cv_wakeup_fds_enabled = enable; } + +grpc_error *grpc_wakeup_fd_init(grpc_wakeup_fd *fd_info) { + if (cv_wakeup_fds_enabled) { + return grpc_cv_wakeup_fd_vtable.init(fd_info); + } + return wakeup_fd_vtable->init(fd_info); +} + +grpc_error *grpc_wakeup_fd_consume_wakeup(grpc_wakeup_fd *fd_info) { + if (cv_wakeup_fds_enabled) { + return grpc_cv_wakeup_fd_vtable.consume(fd_info); + } + return wakeup_fd_vtable->consume(fd_info); +} + +grpc_error *grpc_wakeup_fd_wakeup(grpc_wakeup_fd *fd_info) { + if (cv_wakeup_fds_enabled) { + return grpc_cv_wakeup_fd_vtable.wakeup(fd_info); + } + return wakeup_fd_vtable->wakeup(fd_info); +} + +void grpc_wakeup_fd_destroy(grpc_wakeup_fd *fd_info) { + if (cv_wakeup_fds_enabled) { + grpc_cv_wakeup_fd_vtable.destroy(fd_info); + } else { + wakeup_fd_vtable->destroy(fd_info); + } +} + +#endif /* GRPC_POSIX_WAKEUP_FD */ -- cgit v1.2.3