diff options
Diffstat (limited to 'src/core/lib/iomgr')
-rw-r--r-- | src/core/lib/iomgr/buffer_list.cc | 134 | ||||
-rw-r--r-- | src/core/lib/iomgr/buffer_list.h | 96 | ||||
-rw-r--r-- | src/core/lib/iomgr/endpoint.cc | 4 | ||||
-rw-r--r-- | src/core/lib/iomgr/endpoint.h | 8 | ||||
-rw-r--r-- | src/core/lib/iomgr/endpoint_cfstream.cc | 2 | ||||
-rw-r--r-- | src/core/lib/iomgr/endpoint_pair_posix.cc | 4 | ||||
-rw-r--r-- | src/core/lib/iomgr/ev_posix.cc | 9 | ||||
-rw-r--r-- | src/core/lib/iomgr/internal_errqueue.cc | 40 | ||||
-rw-r--r-- | src/core/lib/iomgr/internal_errqueue.h | 62 | ||||
-rw-r--r-- | src/core/lib/iomgr/port.h | 6 | ||||
-rw-r--r-- | src/core/lib/iomgr/tcp_client_posix.cc | 2 | ||||
-rw-r--r-- | src/core/lib/iomgr/tcp_custom.cc | 2 | ||||
-rw-r--r-- | src/core/lib/iomgr/tcp_posix.cc | 311 | ||||
-rw-r--r-- | src/core/lib/iomgr/tcp_posix.h | 3 | ||||
-rw-r--r-- | src/core/lib/iomgr/tcp_server_posix.cc | 4 | ||||
-rw-r--r-- | src/core/lib/iomgr/tcp_server_utils_posix_common.cc | 2 | ||||
-rw-r--r-- | src/core/lib/iomgr/tcp_windows.cc | 2 | ||||
-rw-r--r-- | src/core/lib/iomgr/udp_server.cc | 2 |
18 files changed, 664 insertions, 29 deletions
diff --git a/src/core/lib/iomgr/buffer_list.cc b/src/core/lib/iomgr/buffer_list.cc new file mode 100644 index 0000000000..6ada23db1c --- /dev/null +++ b/src/core/lib/iomgr/buffer_list.cc @@ -0,0 +1,134 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include <grpc/support/port_platform.h> + +#include "src/core/lib/iomgr/buffer_list.h" +#include "src/core/lib/iomgr/port.h" + +#include <grpc/support/log.h> + +#ifdef GRPC_LINUX_ERRQUEUE +#include <time.h> + +#include "src/core/lib/gprpp/memory.h" + +namespace grpc_core { +void TracedBuffer::AddNewEntry(TracedBuffer** head, uint32_t seq_no, + void* arg) { + GPR_DEBUG_ASSERT(head != nullptr); + TracedBuffer* new_elem = New<TracedBuffer>(seq_no, arg); + /* Store the current time as the sendmsg time. */ + new_elem->ts_.sendmsg_time = gpr_now(GPR_CLOCK_REALTIME); + if (*head == nullptr) { + *head = new_elem; + return; + } + /* Append at the end. */ + TracedBuffer* ptr = *head; + while (ptr->next_ != nullptr) { + ptr = ptr->next_; + } + ptr->next_ = new_elem; +} + +namespace { +/** Fills gpr_timespec gts based on values from timespec ts */ +void fill_gpr_from_timestamp(gpr_timespec* gts, const struct timespec* ts) { + gts->tv_sec = ts->tv_sec; + gts->tv_nsec = static_cast<int32_t>(ts->tv_nsec); + gts->clock_type = GPR_CLOCK_REALTIME; +} + +/** The saved callback function that will be invoked when we get all the + * timestamps that we are going to get for a TracedBuffer. */ +void (*timestamps_callback)(void*, grpc_core::Timestamps*, + grpc_error* shutdown_err); +} /* namespace */ + +void TracedBuffer::ProcessTimestamp(TracedBuffer** head, + struct sock_extended_err* serr, + struct scm_timestamping* tss) { + GPR_DEBUG_ASSERT(head != nullptr); + TracedBuffer* elem = *head; + TracedBuffer* next = nullptr; + while (elem != nullptr) { + /* The byte number refers to the sequence number of the last byte which this + * timestamp relates to. */ + if (serr->ee_data >= elem->seq_no_) { + switch (serr->ee_info) { + case SCM_TSTAMP_SCHED: + fill_gpr_from_timestamp(&(elem->ts_.scheduled_time), &(tss->ts[0])); + elem = elem->next_; + break; + case SCM_TSTAMP_SND: + fill_gpr_from_timestamp(&(elem->ts_.sent_time), &(tss->ts[0])); + elem = elem->next_; + break; + case SCM_TSTAMP_ACK: + fill_gpr_from_timestamp(&(elem->ts_.acked_time), &(tss->ts[0])); + /* Got all timestamps. Do the callback and free this TracedBuffer. + * The thing below can be passed by value if we don't want the + * restriction on the lifetime. */ + timestamps_callback(elem->arg_, &(elem->ts_), GRPC_ERROR_NONE); + next = elem->next_; + Delete<TracedBuffer>(elem); + *head = elem = next; + break; + default: + abort(); + } + } else { + break; + } + } +} + +void TracedBuffer::Shutdown(TracedBuffer** head, grpc_error* shutdown_err) { + GPR_DEBUG_ASSERT(head != nullptr); + TracedBuffer* elem = *head; + while (elem != nullptr) { + if (timestamps_callback) { + timestamps_callback(elem->arg_, &(elem->ts_), shutdown_err); + } + auto* next = elem->next_; + Delete<TracedBuffer>(elem); + elem = next; + } + *head = nullptr; + GRPC_ERROR_UNREF(shutdown_err); +} + +void grpc_tcp_set_write_timestamps_callback(void (*fn)(void*, + grpc_core::Timestamps*, + grpc_error* error)) { + timestamps_callback = fn; +} +} /* namespace grpc_core */ + +#else /* GRPC_LINUX_ERRQUEUE */ + +namespace grpc_core { +void grpc_tcp_set_write_timestamps_callback(void (*fn)(void*, + grpc_core::Timestamps*, + grpc_error* error)) { + gpr_log(GPR_DEBUG, "Timestamps callback is not enabled for this platform"); +} +} /* namespace grpc_core */ + +#endif /* GRPC_LINUX_ERRQUEUE */ diff --git a/src/core/lib/iomgr/buffer_list.h b/src/core/lib/iomgr/buffer_list.h new file mode 100644 index 0000000000..cbbf50a657 --- /dev/null +++ b/src/core/lib/iomgr/buffer_list.h @@ -0,0 +1,96 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#ifndef GRPC_CORE_LIB_IOMGR_BUFFER_LIST_H +#define GRPC_CORE_LIB_IOMGR_BUFFER_LIST_H + +#include <grpc/support/port_platform.h> + +#include "src/core/lib/iomgr/port.h" + +#include <grpc/support/time.h> + +#include "src/core/lib/gprpp/memory.h" +#include "src/core/lib/iomgr/error.h" +#include "src/core/lib/iomgr/internal_errqueue.h" + +namespace grpc_core { +struct Timestamps { + /* TODO(yashykt): This would also need to store OPTSTAT once support is added + */ + gpr_timespec sendmsg_time; + gpr_timespec scheduled_time; + gpr_timespec sent_time; + gpr_timespec acked_time; +}; + +/** TracedBuffer is a class to keep track of timestamps for a specific buffer in + * the TCP layer. We are only tracking timestamps for Linux kernels and hence + * this class would only be used by Linux platforms. For all other platforms, + * TracedBuffer would be an empty class. + * + * The timestamps collected are according to grpc_core::Timestamps declared + * above. + * + * A TracedBuffer list is kept track of using the head element of the list. If + * the head element of the list is nullptr, then the list is empty. + */ +#ifdef GRPC_LINUX_ERRQUEUE +class TracedBuffer { + public: + /** Add a new entry in the TracedBuffer list pointed to by head. Also saves + * sendmsg_time with the current timestamp. */ + static void AddNewEntry(grpc_core::TracedBuffer** head, uint32_t seq_no, + void* arg); + + /** Processes a received timestamp based on sock_extended_err and + * scm_timestamping structures. It will invoke the timestamps callback if the + * timestamp type is SCM_TSTAMP_ACK. */ + static void ProcessTimestamp(grpc_core::TracedBuffer** head, + struct sock_extended_err* serr, + struct scm_timestamping* tss); + + /** Cleans the list by calling the callback for each traced buffer in the list + * with timestamps that it has. */ + static void Shutdown(grpc_core::TracedBuffer** head, + grpc_error* shutdown_err); + + private: + GPRC_ALLOW_CLASS_TO_USE_NON_PUBLIC_NEW + + TracedBuffer(int seq_no, void* arg) + : seq_no_(seq_no), arg_(arg), next_(nullptr) {} + + uint32_t seq_no_; /* The sequence number for the last byte in the buffer */ + void* arg_; /* The arg to pass to timestamps_callback */ + grpc_core::Timestamps ts_; /* The timestamps corresponding to this buffer */ + grpc_core::TracedBuffer* next_; /* The next TracedBuffer in the list */ +}; +#else /* GRPC_LINUX_ERRQUEUE */ +class TracedBuffer {}; +#endif /* GRPC_LINUX_ERRQUEUE */ + +/** Sets the callback function to call when timestamps for a write are + * collected. The callback does not own a reference to error. */ +void grpc_tcp_set_write_timestamps_callback(void (*fn)(void*, + grpc_core::Timestamps*, + grpc_error* error)); + +}; /* namespace grpc_core */ + +#endif /* GRPC_CORE_LIB_IOMGR_BUFFER_LIST_H */ diff --git a/src/core/lib/iomgr/endpoint.cc b/src/core/lib/iomgr/endpoint.cc index 92e7930111..44fb47e19d 100644 --- a/src/core/lib/iomgr/endpoint.cc +++ b/src/core/lib/iomgr/endpoint.cc @@ -28,8 +28,8 @@ void grpc_endpoint_read(grpc_endpoint* ep, grpc_slice_buffer* slices, } void grpc_endpoint_write(grpc_endpoint* ep, grpc_slice_buffer* slices, - grpc_closure* cb) { - ep->vtable->write(ep, slices, cb); + grpc_closure* cb, void* arg) { + ep->vtable->write(ep, slices, cb, arg); } void grpc_endpoint_add_to_pollset(grpc_endpoint* ep, grpc_pollset* pollset) { diff --git a/src/core/lib/iomgr/endpoint.h b/src/core/lib/iomgr/endpoint.h index 15db1649fa..1f590a80ca 100644 --- a/src/core/lib/iomgr/endpoint.h +++ b/src/core/lib/iomgr/endpoint.h @@ -33,10 +33,12 @@ typedef struct grpc_endpoint grpc_endpoint; typedef struct grpc_endpoint_vtable grpc_endpoint_vtable; +class Timestamps; struct grpc_endpoint_vtable { void (*read)(grpc_endpoint* ep, grpc_slice_buffer* slices, grpc_closure* cb); - void (*write)(grpc_endpoint* ep, grpc_slice_buffer* slices, grpc_closure* cb); + void (*write)(grpc_endpoint* ep, grpc_slice_buffer* slices, grpc_closure* cb, + void* arg); void (*add_to_pollset)(grpc_endpoint* ep, grpc_pollset* pollset); void (*add_to_pollset_set)(grpc_endpoint* ep, grpc_pollset_set* pollset); void (*delete_from_pollset_set)(grpc_endpoint* ep, grpc_pollset_set* pollset); @@ -70,9 +72,11 @@ int grpc_endpoint_get_fd(grpc_endpoint* ep); \a slices may be mutated at will by the endpoint until cb is called. No guarantee is made to the content of slices after a write EXCEPT that it is a valid slice buffer. + \a arg is platform specific. It is currently only used by TCP on linux + platforms as an argument that would be forwarded to the timestamps callback. */ void grpc_endpoint_write(grpc_endpoint* ep, grpc_slice_buffer* slices, - grpc_closure* cb); + grpc_closure* cb, void* arg); /* Causes any pending and future read/write callbacks to run immediately with success==0 */ diff --git a/src/core/lib/iomgr/endpoint_cfstream.cc b/src/core/lib/iomgr/endpoint_cfstream.cc index c3bc0cc8fd..df2cf508c8 100644 --- a/src/core/lib/iomgr/endpoint_cfstream.cc +++ b/src/core/lib/iomgr/endpoint_cfstream.cc @@ -268,7 +268,7 @@ static void CFStreamRead(grpc_endpoint* ep, grpc_slice_buffer* slices, } static void CFStreamWrite(grpc_endpoint* ep, grpc_slice_buffer* slices, - grpc_closure* cb) { + grpc_closure* cb, void* arg) { CFStreamEndpoint* ep_impl = reinterpret_cast<CFStreamEndpoint*>(ep); if (grpc_tcp_trace.enabled()) { gpr_log(GPR_DEBUG, "CFStream endpoint:%p write (%p, %p) length:%zu", diff --git a/src/core/lib/iomgr/endpoint_pair_posix.cc b/src/core/lib/iomgr/endpoint_pair_posix.cc index 5c5c246f99..3afbfd7254 100644 --- a/src/core/lib/iomgr/endpoint_pair_posix.cc +++ b/src/core/lib/iomgr/endpoint_pair_posix.cc @@ -59,11 +59,11 @@ grpc_endpoint_pair grpc_iomgr_create_endpoint_pair(const char* name, grpc_core::ExecCtx exec_ctx; gpr_asprintf(&final_name, "%s:client", name); - p.client = grpc_tcp_create(grpc_fd_create(sv[1], final_name, false), args, + p.client = grpc_tcp_create(grpc_fd_create(sv[1], final_name, true), args, "socketpair-server"); gpr_free(final_name); gpr_asprintf(&final_name, "%s:server", name); - p.server = grpc_tcp_create(grpc_fd_create(sv[0], final_name, false), args, + p.server = grpc_tcp_create(grpc_fd_create(sv[0], final_name, true), args, "socketpair-client"); gpr_free(final_name); diff --git a/src/core/lib/iomgr/ev_posix.cc b/src/core/lib/iomgr/ev_posix.cc index 0205363d5c..d4377e2d50 100644 --- a/src/core/lib/iomgr/ev_posix.cc +++ b/src/core/lib/iomgr/ev_posix.cc @@ -237,14 +237,19 @@ void grpc_event_engine_shutdown(void) { } bool grpc_event_engine_can_track_errors(void) { +/* Only track errors if platform supports errqueue. */ +#ifdef GRPC_LINUX_ERRQUEUE return g_event_engine->can_track_err; +#else + return false; +#endif /* GRPC_LINUX_ERRQUEUE */ } grpc_fd* grpc_fd_create(int fd, const char* name, bool track_err) { GRPC_POLLING_API_TRACE("fd_create(%d, %s, %d)", fd, name, track_err); GRPC_FD_TRACE("fd_create(%d, %s, %d)", fd, name, track_err); - GPR_DEBUG_ASSERT(!track_err || g_event_engine->can_track_err); - return g_event_engine->fd_create(fd, name, track_err); + return g_event_engine->fd_create(fd, name, + track_err && g_event_engine->can_track_err); } int grpc_fd_wrapped_fd(grpc_fd* fd) { diff --git a/src/core/lib/iomgr/internal_errqueue.cc b/src/core/lib/iomgr/internal_errqueue.cc new file mode 100644 index 0000000000..8823737e49 --- /dev/null +++ b/src/core/lib/iomgr/internal_errqueue.cc @@ -0,0 +1,40 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include <grpc/support/port_platform.h> + +#include "src/core/lib/iomgr/port.h" + +#include "src/core/lib/iomgr/internal_errqueue.h" + +#ifdef GRPC_POSIX_SOCKET_TCP + +#ifdef GPR_LINUX +#include <linux/version.h> +#endif /* GPR_LINUX */ + +bool kernel_supports_errqueue() { +#ifdef LINUX_VERSION_CODE +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0) + return true; +#endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(4, 0, 0) */ +#endif /* LINUX_VERSION_CODE */ + return false; +} + +#endif /* GRPC_POSIX_SOCKET_TCP */ diff --git a/src/core/lib/iomgr/internal_errqueue.h b/src/core/lib/iomgr/internal_errqueue.h new file mode 100644 index 0000000000..fc11be9a6d --- /dev/null +++ b/src/core/lib/iomgr/internal_errqueue.h @@ -0,0 +1,62 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +/* This file contains constants defined in <linux/errqueue.h> and + * <linux/net_tstamp.h> so as to allow collecting network timestamps in the + * kernel. This file allows tcp_posix.cc to compile on platforms that do not + * have <linux/errqueue.h> and <linux/net_tstamp.h>. + */ + +#ifndef GRPC_CORE_LIB_IOMGR_INTERNAL_ERRQUEUE_H +#define GRPC_CORE_LIB_IOMGR_INTERNAL_ERRQUEUE_H + +#include <grpc/support/port_platform.h> + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_POSIX_SOCKET_TCP + +#include <sys/types.h> +#include <time.h> + +#ifdef GRPC_LINUX_ERRQUEUE +#include <linux/errqueue.h> +#include <linux/net_tstamp.h> +#include <sys/socket.h> +#endif /* GRPC_LINUX_ERRQUEUE */ + +namespace grpc_core { + +#ifdef GRPC_LINUX_ERRQUEUE +constexpr uint32_t kTimestampingSocketOptions = SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID | + SOF_TIMESTAMPING_OPT_TSONLY; +constexpr uint32_t kTimestampingRecordingOptions = + SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK; +#endif /* GRPC_LINUX_ERRQUEUE */ + +/* Returns true if kernel is capable of supporting errqueue and timestamping. + * Currently allowing only linux kernels above 4.0.0 + */ +bool kernel_supports_errqueue(); +} // namespace grpc_core + +#endif /* GRPC_POSIX_SOCKET_TCP */ + +#endif /* GRPC_CORE_LIB_IOMGR_INTERNAL_ERRQUEUE_H */ diff --git a/src/core/lib/iomgr/port.h b/src/core/lib/iomgr/port.h index 066417b93c..a4688fd0ef 100644 --- a/src/core/lib/iomgr/port.h +++ b/src/core/lib/iomgr/port.h @@ -60,6 +60,12 @@ #define GRPC_HAVE_IP_PKTINFO 1 #define GRPC_HAVE_MSG_NOSIGNAL 1 #define GRPC_HAVE_UNIX_SOCKET 1 +#include <linux/version.h> +#ifdef LINUX_VERSION_CODE +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0) +#define GRPC_LINUX_ERRQUEUE 1 +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0) */ +#endif /* LINUX_VERSION_CODE */ #define GRPC_LINUX_MULTIPOLL_WITH_EPOLL 1 #define GRPC_POSIX_FORK 1 #define GRPC_POSIX_HOST_NAME_MAX 1 diff --git a/src/core/lib/iomgr/tcp_client_posix.cc b/src/core/lib/iomgr/tcp_client_posix.cc index 296ee74311..9c989b7dfe 100644 --- a/src/core/lib/iomgr/tcp_client_posix.cc +++ b/src/core/lib/iomgr/tcp_client_posix.cc @@ -279,7 +279,7 @@ grpc_error* grpc_tcp_client_prepare_fd(const grpc_channel_args* channel_args, } addr_str = grpc_sockaddr_to_uri(mapped_addr); gpr_asprintf(&name, "tcp-client:%s", addr_str); - *fdobj = grpc_fd_create(fd, name, false); + *fdobj = grpc_fd_create(fd, name, true); gpr_free(name); gpr_free(addr_str); return GRPC_ERROR_NONE; diff --git a/src/core/lib/iomgr/tcp_custom.cc b/src/core/lib/iomgr/tcp_custom.cc index 990e8d632b..e02a1898f2 100644 --- a/src/core/lib/iomgr/tcp_custom.cc +++ b/src/core/lib/iomgr/tcp_custom.cc @@ -221,7 +221,7 @@ static void custom_write_callback(grpc_custom_socket* socket, } static void endpoint_write(grpc_endpoint* ep, grpc_slice_buffer* write_slices, - grpc_closure* cb) { + grpc_closure* cb, void* arg) { custom_tcp_endpoint* tcp = (custom_tcp_endpoint*)ep; GRPC_CUSTOM_IOMGR_ASSERT_SAME_THREAD(); diff --git a/src/core/lib/iomgr/tcp_posix.cc b/src/core/lib/iomgr/tcp_posix.cc index b53ffbf01c..1db2790265 100644 --- a/src/core/lib/iomgr/tcp_posix.cc +++ b/src/core/lib/iomgr/tcp_posix.cc @@ -27,7 +27,9 @@ #include <errno.h> #include <limits.h> +#include <netinet/in.h> #include <stdbool.h> +#include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/socket.h> @@ -46,6 +48,7 @@ #include "src/core/lib/debug/trace.h" #include "src/core/lib/gpr/string.h" #include "src/core/lib/gpr/useful.h" +#include "src/core/lib/iomgr/buffer_list.h" #include "src/core/lib/iomgr/ev_posix.h" #include "src/core/lib/iomgr/executor.h" #include "src/core/lib/profiling/timers.h" @@ -97,17 +100,42 @@ struct grpc_tcp { grpc_closure read_done_closure; grpc_closure write_done_closure; + grpc_closure error_closure; char* peer_string; grpc_resource_user* resource_user; grpc_resource_user_slice_allocator slice_allocator; + + grpc_core::TracedBuffer* tb_head; /* List of traced buffers */ + gpr_mu tb_mu; /* Lock for access to list of traced buffers */ + + /* grpc_endpoint_write takes an argument which if non-null means that the + * transport layer wants the TCP layer to collect timestamps for this write. + * This arg is forwarded to the timestamps callback function when the ACK + * timestamp is received from the kernel. This arg is a (void *) which allows + * users of this API to pass in a pointer to any kind of structure. This + * structure could actually be a tag or any book-keeping object that the user + * can use to distinguish between different traced writes. The only + * requirement from the TCP endpoint layer is that this arg should be non-null + * if the user wants timestamps for the write. */ + void* outgoing_buffer_arg; + /* A counter which starts at 0. It is initialized the first time the socket + * options for collecting timestamps are set, and is incremented with each + * byte sent. */ + int bytes_counter; + bool socket_ts_enabled; /* True if timestamping options are set on the socket + */ + gpr_atm + stop_error_notification; /* Set to 1 if we do not want to be notified on + errors anymore */ }; struct backup_poller { gpr_mu* pollset_mu; grpc_closure run_poller; }; + } // namespace #define BACKUP_POLLER_POLLSET(b) ((grpc_pollset*)((b) + 1)) @@ -302,6 +330,7 @@ static void tcp_free(grpc_tcp* tcp) { grpc_slice_buffer_destroy_internal(&tcp->last_read_buffer); grpc_resource_user_unref(tcp->resource_user); gpr_free(tcp->peer_string); + gpr_mu_destroy(&tcp->tb_mu); gpr_free(tcp); } @@ -347,6 +376,10 @@ static void tcp_destroy(grpc_endpoint* ep) { grpc_network_status_unregister_endpoint(ep); grpc_tcp* tcp = reinterpret_cast<grpc_tcp*>(ep); grpc_slice_buffer_reset_and_unref_internal(&tcp->last_read_buffer); + if (grpc_event_engine_can_track_errors()) { + gpr_atm_no_barrier_store(&tcp->stop_error_notification, true); + grpc_fd_set_error(tcp->em_fd); + } TCP_UNREF(tcp, "destroy"); } @@ -513,6 +546,234 @@ static void tcp_read(grpc_endpoint* ep, grpc_slice_buffer* incoming_buffer, } } +/* A wrapper around sendmsg. It sends \a msg over \a fd and returns the number + * of bytes sent. */ +ssize_t tcp_send(int fd, const struct msghdr* msg) { + GPR_TIMER_SCOPE("sendmsg", 1); + ssize_t sent_length; + do { + /* TODO(klempner): Cork if this is a partial write */ + GRPC_STATS_INC_SYSCALL_WRITE(); + sent_length = sendmsg(fd, msg, SENDMSG_FLAGS); + } while (sent_length < 0 && errno == EINTR); + return sent_length; +} + +/** This is to be called if outgoing_buffer_arg is not null. On linux platforms, + * this will call sendmsg with socket options set to collect timestamps inside + * the kernel. On return, sent_length is set to the return value of the sendmsg + * call. Returns false if setting the socket options failed. This is not + * implemented for non-linux platforms currently, and crashes out. + */ +static bool tcp_write_with_timestamps(grpc_tcp* tcp, struct msghdr* msg, + size_t sending_length, + ssize_t* sent_length, grpc_error** error); + +/** The callback function to be invoked when we get an error on the socket. */ +static void tcp_handle_error(void* arg /* grpc_tcp */, grpc_error* error); + +#ifdef GRPC_LINUX_ERRQUEUE +static bool tcp_write_with_timestamps(grpc_tcp* tcp, struct msghdr* msg, + size_t sending_length, + ssize_t* sent_length, + grpc_error** error) { + if (!tcp->socket_ts_enabled) { + uint32_t opt = grpc_core::kTimestampingSocketOptions; + if (setsockopt(tcp->fd, SOL_SOCKET, SO_TIMESTAMPING, + static_cast<void*>(&opt), sizeof(opt)) != 0) { + *error = tcp_annotate_error(GRPC_OS_ERROR(errno, "setsockopt"), tcp); + grpc_slice_buffer_reset_and_unref_internal(tcp->outgoing_buffer); + if (grpc_tcp_trace.enabled()) { + gpr_log(GPR_ERROR, "Failed to set timestamping options on the socket."); + } + return false; + } + tcp->bytes_counter = -1; + tcp->socket_ts_enabled = true; + } + /* Set control message to indicate that you want timestamps. */ + union { + char cmsg_buf[CMSG_SPACE(sizeof(uint32_t))]; + struct cmsghdr align; + } u; + cmsghdr* cmsg = reinterpret_cast<cmsghdr*>(u.cmsg_buf); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SO_TIMESTAMPING; + cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); + *reinterpret_cast<int*>(CMSG_DATA(cmsg)) = + grpc_core::kTimestampingRecordingOptions; + msg->msg_control = u.cmsg_buf; + msg->msg_controllen = CMSG_SPACE(sizeof(uint32_t)); + + /* If there was an error on sendmsg the logic in tcp_flush will handle it. */ + ssize_t length = tcp_send(tcp->fd, msg); + *sent_length = length; + /* Only save timestamps if all the bytes were taken by sendmsg. */ + if (sending_length == static_cast<size_t>(length)) { + gpr_mu_lock(&tcp->tb_mu); + grpc_core::TracedBuffer::AddNewEntry( + &tcp->tb_head, static_cast<int>(tcp->bytes_counter + length), + tcp->outgoing_buffer_arg); + gpr_mu_unlock(&tcp->tb_mu); + tcp->outgoing_buffer_arg = nullptr; + } + return true; +} + +/** Reads \a cmsg to derive timestamps from the control messages. If a valid + * timestamp is found, the traced buffer list is updated with this timestamp. + * The caller of this function should be looping on the control messages found + * in \a msg. \a cmsg should point to the control message that the caller wants + * processed. + * On return, a pointer to a control message is returned. On the next iteration, + * CMSG_NXTHDR(msg, ret_val) should be passed as \a cmsg. */ +struct cmsghdr* process_timestamp(grpc_tcp* tcp, msghdr* msg, + struct cmsghdr* cmsg) { + auto next_cmsg = CMSG_NXTHDR(msg, cmsg); + if (next_cmsg == nullptr) { + if (grpc_tcp_trace.enabled()) { + gpr_log(GPR_ERROR, "Received timestamp without extended error"); + } + return cmsg; + } + + if (!(next_cmsg->cmsg_level == SOL_IP || next_cmsg->cmsg_level == SOL_IPV6) || + !(next_cmsg->cmsg_type == IP_RECVERR || + next_cmsg->cmsg_type == IPV6_RECVERR)) { + if (grpc_tcp_trace.enabled()) { + gpr_log(GPR_ERROR, "Unexpected control message"); + } + return cmsg; + } + + auto tss = reinterpret_cast<struct scm_timestamping*>(CMSG_DATA(cmsg)); + auto serr = reinterpret_cast<struct sock_extended_err*>(CMSG_DATA(next_cmsg)); + if (serr->ee_errno != ENOMSG || + serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) { + gpr_log(GPR_ERROR, "Unexpected control message"); + return cmsg; + } + /* The error handling can potentially be done on another thread so we need + * to protect the traced buffer list. A lock free list might be better. Using + * a simple mutex for now. */ + gpr_mu_lock(&tcp->tb_mu); + grpc_core::TracedBuffer::ProcessTimestamp(&tcp->tb_head, serr, tss); + gpr_mu_unlock(&tcp->tb_mu); + return next_cmsg; +} + +/** For linux platforms, reads the socket's error queue and processes error + * messages from the queue. Returns true if all the errors processed were + * timestamps. Returns false if any of the errors were not timestamps. For + * non-linux platforms, error processing is not used/enabled currently. + */ +static bool process_errors(grpc_tcp* tcp) { + while (true) { + struct iovec iov; + iov.iov_base = nullptr; + iov.iov_len = 0; + struct msghdr msg; + msg.msg_name = nullptr; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 0; + msg.msg_flags = 0; + + union { + char rbuf[1024 /*CMSG_SPACE(sizeof(scm_timestamping)) + + CMSG_SPACE(sizeof(sock_extended_err) + sizeof(sockaddr_in))*/]; + struct cmsghdr align; + } aligned_buf; + memset(&aligned_buf, 0, sizeof(aligned_buf)); + + msg.msg_control = aligned_buf.rbuf; + msg.msg_controllen = sizeof(aligned_buf.rbuf); + + int r, saved_errno; + do { + r = recvmsg(tcp->fd, &msg, MSG_ERRQUEUE); + saved_errno = errno; + } while (r < 0 && saved_errno == EINTR); + + if (r == -1 && saved_errno == EAGAIN) { + return true; /* No more errors to process */ + } + if (r == -1) { + return false; + } + if (grpc_tcp_trace.enabled()) { + if ((msg.msg_flags & MSG_CTRUNC) == 1) { + gpr_log(GPR_INFO, "Error message was truncated."); + } + } + + if (msg.msg_controllen == 0) { + /* There was no control message found. It was probably spurious. */ + return true; + } + for (auto cmsg = CMSG_FIRSTHDR(&msg); cmsg && cmsg->cmsg_len; + cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level != SOL_SOCKET || + cmsg->cmsg_type != SCM_TIMESTAMPING) { + /* Got a control message that is not a timestamp. Don't know how to + * handle this. */ + if (grpc_tcp_trace.enabled()) { + gpr_log(GPR_INFO, + "unknown control message cmsg_level:%d cmsg_type:%d", + cmsg->cmsg_level, cmsg->cmsg_type); + } + return false; + } + process_timestamp(tcp, &msg, cmsg); + } + } +} + +static void tcp_handle_error(void* arg /* grpc_tcp */, grpc_error* error) { + grpc_tcp* tcp = static_cast<grpc_tcp*>(arg); + if (grpc_tcp_trace.enabled()) { + gpr_log(GPR_INFO, "TCP:%p got_error: %s", tcp, grpc_error_string(error)); + } + + if (error != GRPC_ERROR_NONE || + static_cast<bool>(gpr_atm_acq_load(&tcp->stop_error_notification))) { + /* We aren't going to register to hear on error anymore, so it is safe to + * unref. */ + grpc_core::TracedBuffer::Shutdown(&tcp->tb_head, GRPC_ERROR_REF(error)); + TCP_UNREF(tcp, "error-tracking"); + return; + } + + /* We are still interested in collecting timestamps, so let's try reading + * them. */ + if (!process_errors(tcp)) { + /* This was not a timestamps error. This was an actual error. Set the + * read and write closures to be ready. + */ + grpc_fd_set_readable(tcp->em_fd); + grpc_fd_set_writable(tcp->em_fd); + } + GRPC_CLOSURE_INIT(&tcp->error_closure, tcp_handle_error, tcp, + grpc_schedule_on_exec_ctx); + grpc_fd_notify_on_error(tcp->em_fd, &tcp->error_closure); +} + +#else /* GRPC_LINUX_ERRQUEUE */ +static bool tcp_write_with_timestamps(grpc_tcp* tcp, struct msghdr* msg, + size_t sending_length, + ssize_t* sent_length, + grpc_error** error) { + gpr_log(GPR_ERROR, "Write with timestamps not supported for this platform"); + GPR_ASSERT(0); + return false; +} + +static void tcp_handle_error(void* arg /* grpc_tcp */, grpc_error* error) { + gpr_log(GPR_ERROR, "Error handling is not supported for this platform"); + GPR_ASSERT(0); +} +#endif /* GRPC_LINUX_ERRQUEUE */ + /* returns true if done, false if pending; if returning true, *error is set */ #if defined(IOV_MAX) && IOV_MAX < 1000 #define MAX_WRITE_IOVEC IOV_MAX @@ -557,19 +818,20 @@ static bool tcp_flush(grpc_tcp* tcp, grpc_error** error) { msg.msg_namelen = 0; msg.msg_iov = iov; msg.msg_iovlen = iov_size; - msg.msg_control = nullptr; - msg.msg_controllen = 0; msg.msg_flags = 0; + if (tcp->outgoing_buffer_arg != nullptr) { + if (!tcp_write_with_timestamps(tcp, &msg, sending_length, &sent_length, + error)) + return true; /* something went wrong with timestamps */ + } else { + msg.msg_control = nullptr; + msg.msg_controllen = 0; - GRPC_STATS_INC_TCP_WRITE_SIZE(sending_length); - GRPC_STATS_INC_TCP_WRITE_IOV_SIZE(iov_size); + GRPC_STATS_INC_TCP_WRITE_SIZE(sending_length); + GRPC_STATS_INC_TCP_WRITE_IOV_SIZE(iov_size); - GPR_TIMER_SCOPE("sendmsg", 1); - do { - /* TODO(klempner): Cork if this is a partial write */ - GRPC_STATS_INC_SYSCALL_WRITE(); - sent_length = sendmsg(tcp->fd, &msg, SENDMSG_FLAGS); - } while (sent_length < 0 && errno == EINTR); + sent_length = tcp_send(tcp->fd, &msg); + } if (sent_length < 0) { if (errno == EAGAIN) { @@ -593,6 +855,7 @@ static bool tcp_flush(grpc_tcp* tcp, grpc_error** error) { } GPR_ASSERT(tcp->outgoing_byte_idx == 0); + tcp->bytes_counter += sent_length; trailing = sending_length - static_cast<size_t>(sent_length); while (trailing > 0) { size_t slice_length; @@ -607,7 +870,6 @@ static bool tcp_flush(grpc_tcp* tcp, grpc_error** error) { trailing -= slice_length; } } - if (outgoing_slice_idx == tcp->outgoing_buffer->count) { *error = GRPC_ERROR_NONE; grpc_slice_buffer_reset_and_unref_internal(tcp->outgoing_buffer); @@ -640,14 +902,13 @@ static void tcp_handle_write(void* arg /* grpc_tcp */, grpc_error* error) { const char* str = grpc_error_string(error); gpr_log(GPR_INFO, "write: %s", str); } - GRPC_CLOSURE_SCHED(cb, error); TCP_UNREF(tcp, "write"); } } static void tcp_write(grpc_endpoint* ep, grpc_slice_buffer* buf, - grpc_closure* cb) { + grpc_closure* cb, void* arg) { GPR_TIMER_SCOPE("tcp_write", 0); grpc_tcp* tcp = reinterpret_cast<grpc_tcp*>(ep); grpc_error* error = GRPC_ERROR_NONE; @@ -675,6 +936,10 @@ static void tcp_write(grpc_endpoint* ep, grpc_slice_buffer* buf, } tcp->outgoing_buffer = buf; tcp->outgoing_byte_idx = 0; + tcp->outgoing_buffer_arg = arg; + if (arg) { + GPR_ASSERT(grpc_event_engine_can_track_errors()); + } if (!tcp_flush(tcp, &error)) { TCP_REF(tcp, "write"); @@ -792,6 +1057,8 @@ grpc_endpoint* grpc_tcp_create(grpc_fd* em_fd, tcp->bytes_read_this_round = 0; /* Will be set to false by the very first endpoint read function */ tcp->is_first_read = true; + tcp->bytes_counter = -1; + tcp->socket_ts_enabled = false; /* paired with unref in grpc_tcp_destroy */ gpr_ref_init(&tcp->refcount, 1); gpr_atm_no_barrier_store(&tcp->shutdown_count, 0); @@ -803,6 +1070,19 @@ grpc_endpoint* grpc_tcp_create(grpc_fd* em_fd, /* Tell network status tracker about new endpoint */ grpc_network_status_register_endpoint(&tcp->base); grpc_resource_quota_unref_internal(resource_quota); + gpr_mu_init(&tcp->tb_mu); + tcp->tb_head = nullptr; + /* Start being notified on errors if event engine can track errors. */ + if (grpc_event_engine_can_track_errors()) { + /* Grab a ref to tcp so that we can safely access the tcp struct when + * processing errors. We unref when we no longer want to track errors + * separately. */ + TCP_REF(tcp, "error-tracking"); + gpr_atm_rel_store(&tcp->stop_error_notification, 0); + GRPC_CLOSURE_INIT(&tcp->error_closure, tcp_handle_error, tcp, + grpc_schedule_on_exec_ctx); + grpc_fd_notify_on_error(tcp->em_fd, &tcp->error_closure); + } return &tcp->base; } @@ -821,6 +1101,11 @@ void grpc_tcp_destroy_and_release_fd(grpc_endpoint* ep, int* fd, tcp->release_fd = fd; tcp->release_fd_cb = done; grpc_slice_buffer_reset_and_unref_internal(&tcp->last_read_buffer); + if (grpc_event_engine_can_track_errors()) { + /* Stop errors notification. */ + gpr_atm_no_barrier_store(&tcp->stop_error_notification, true); + grpc_fd_set_error(tcp->em_fd); + } TCP_UNREF(tcp, "destroy"); } diff --git a/src/core/lib/iomgr/tcp_posix.h b/src/core/lib/iomgr/tcp_posix.h index af89bd24db..eff825cb92 100644 --- a/src/core/lib/iomgr/tcp_posix.h +++ b/src/core/lib/iomgr/tcp_posix.h @@ -31,7 +31,10 @@ #include <grpc/support/port_platform.h> +#include "src/core/lib/iomgr/port.h" + #include "src/core/lib/debug/trace.h" +#include "src/core/lib/iomgr/buffer_list.h" #include "src/core/lib/iomgr/endpoint.h" #include "src/core/lib/iomgr/ev_posix.h" diff --git a/src/core/lib/iomgr/tcp_server_posix.cc b/src/core/lib/iomgr/tcp_server_posix.cc index 8ddf684fea..824db07fbf 100644 --- a/src/core/lib/iomgr/tcp_server_posix.cc +++ b/src/core/lib/iomgr/tcp_server_posix.cc @@ -226,7 +226,7 @@ static void on_read(void* arg, grpc_error* err) { gpr_log(GPR_INFO, "SERVER_CONNECT: incoming connection: %s", addr_str); } - grpc_fd* fdobj = grpc_fd_create(fd, name, false); + grpc_fd* fdobj = grpc_fd_create(fd, name, true); read_notifier_pollset = sp->server->pollsets[static_cast<size_t>(gpr_atm_no_barrier_fetch_add( @@ -362,7 +362,7 @@ static grpc_error* clone_port(grpc_tcp_listener* listener, unsigned count) { listener->sibling = sp; sp->server = listener->server; sp->fd = fd; - sp->emfd = grpc_fd_create(fd, name, false); + sp->emfd = grpc_fd_create(fd, name, true); memcpy(&sp->addr, &listener->addr, sizeof(grpc_resolved_address)); sp->port = port; sp->port_index = listener->port_index; diff --git a/src/core/lib/iomgr/tcp_server_utils_posix_common.cc b/src/core/lib/iomgr/tcp_server_utils_posix_common.cc index b9f8145572..9595c028ce 100644 --- a/src/core/lib/iomgr/tcp_server_utils_posix_common.cc +++ b/src/core/lib/iomgr/tcp_server_utils_posix_common.cc @@ -105,7 +105,7 @@ static grpc_error* add_socket_to_server(grpc_tcp_server* s, int fd, s->tail = sp; sp->server = s; sp->fd = fd; - sp->emfd = grpc_fd_create(fd, name, false); + sp->emfd = grpc_fd_create(fd, name, true); memcpy(&sp->addr, addr, sizeof(grpc_resolved_address)); sp->port = port; sp->port_index = port_index; diff --git a/src/core/lib/iomgr/tcp_windows.cc b/src/core/lib/iomgr/tcp_windows.cc index b3cb442f18..64c4a56ae9 100644 --- a/src/core/lib/iomgr/tcp_windows.cc +++ b/src/core/lib/iomgr/tcp_windows.cc @@ -296,7 +296,7 @@ static void on_write(void* tcpp, grpc_error* error) { /* Initiates a write. */ static void win_write(grpc_endpoint* ep, grpc_slice_buffer* slices, - grpc_closure* cb) { + grpc_closure* cb, void* arg) { grpc_tcp* tcp = (grpc_tcp*)ep; grpc_winsocket* socket = tcp->socket; grpc_winsocket_callback_info* info = &socket->write_info; diff --git a/src/core/lib/iomgr/udp_server.cc b/src/core/lib/iomgr/udp_server.cc index bdb2d0e764..3dd7cab855 100644 --- a/src/core/lib/iomgr/udp_server.cc +++ b/src/core/lib/iomgr/udp_server.cc @@ -152,7 +152,7 @@ GrpcUdpListener::GrpcUdpListener(grpc_udp_server* server, int fd, grpc_sockaddr_to_string(&addr_str, addr, 1); gpr_asprintf(&name, "udp-server-listener:%s", addr_str); gpr_free(addr_str); - emfd_ = grpc_fd_create(fd, name, false); + emfd_ = grpc_fd_create(fd, name, true); memcpy(&addr_, addr, sizeof(grpc_resolved_address)); GPR_ASSERT(emfd_); gpr_free(name); |