diff options
23 files changed, 719 insertions, 87 deletions
@@ -767,11 +767,20 @@ else LDLIBS_SECURE += $(addprefix -l, $(LIBS_SECURE)) endif +# gpr .pc file +PC_NAME = gpr +PC_DESCRIPTION = gRPC platform support library +PC_CFLAGS = +PC_REQUIRES_PRIVATE = $(PC_REQUIRES_GPR) +PC_LIBS_PRIVATE = $(PC_LIBS_GPR) +PC_LIB = -lgpr +GPR_PC_FILE := $(CORE_PC_TEMPLATE) + # grpc .pc file PC_NAME = gRPC PC_DESCRIPTION = high performance general RPC framework PC_CFLAGS = -PC_REQUIRES_PRIVATE = $(PC_REQUIRES_GRPC) $(PC_REQUIRES_SECURE) +PC_REQUIRES_PRIVATE = gpr $(PC_REQUIRES_GRPC) $(PC_REQUIRES_SECURE) PC_LIBS_PRIVATE = $(PC_LIBS_GRPC) $(PC_LIBS_SECURE) PC_LIB = -lgrpc GRPC_PC_FILE := $(CORE_PC_TEMPLATE) @@ -780,7 +789,7 @@ GRPC_PC_FILE := $(CORE_PC_TEMPLATE) PC_NAME = gRPC unsecure PC_DESCRIPTION = high performance general RPC framework without SSL PC_CFLAGS = -PC_REQUIRES_PRIVATE = $(PC_REQUIRES_GRPC) +PC_REQUIRES_PRIVATE = gpr $(PC_REQUIRES_GRPC) PC_LIBS_PRIVATE = $(PC_LIBS_GRPC) PC_LIB = -lgrpc GRPC_UNSECURE_PC_FILE := $(CORE_PC_TEMPLATE) @@ -1398,9 +1407,9 @@ plugins: $(PROTOC_PLUGINS) privatelibs: privatelibs_c privatelibs_cxx privatelibs_c: $(LIBDIR)/$(CONFIG)/libalts_test_util.a $(LIBDIR)/$(CONFIG)/libcxxabi.a $(LIBDIR)/$(CONFIG)/libgpr_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc_test_util_unsecure.a $(LIBDIR)/$(CONFIG)/libreconnect_server.a $(LIBDIR)/$(CONFIG)/libtest_tcp_server.a $(LIBDIR)/$(CONFIG)/libz.a $(LIBDIR)/$(CONFIG)/libares.a $(LIBDIR)/$(CONFIG)/libbad_client_test.a $(LIBDIR)/$(CONFIG)/libbad_ssl_test_server.a $(LIBDIR)/$(CONFIG)/libend2end_tests.a $(LIBDIR)/$(CONFIG)/libend2end_nosec_tests.a -pc_c: $(LIBDIR)/$(CONFIG)/pkgconfig/grpc.pc +pc_c: $(LIBDIR)/$(CONFIG)/pkgconfig/grpc.pc $(LIBDIR)/$(CONFIG)/pkgconfig/gpr.pc -pc_c_unsecure: $(LIBDIR)/$(CONFIG)/pkgconfig/grpc_unsecure.pc +pc_c_unsecure: $(LIBDIR)/$(CONFIG)/pkgconfig/grpc_unsecure.pc $(LIBDIR)/$(CONFIG)/pkgconfig/gpr.pc pc_cxx: $(LIBDIR)/$(CONFIG)/pkgconfig/grpc++.pc @@ -2519,6 +2528,11 @@ cache.mk:: $(E) "[MAKE] Generating $@" $(Q) echo "$(CACHE_MK)" | tr , '\n' >$@ +$(LIBDIR)/$(CONFIG)/pkgconfig/gpr.pc: + $(E) "[MAKE] Generating $@" + $(Q) mkdir -p $(@D) + $(Q) echo "$(GPR_PC_FILE)" | tr , '\n' >$@ + $(LIBDIR)/$(CONFIG)/pkgconfig/grpc.pc: $(E) "[MAKE] Generating $@" $(Q) mkdir -p $(@D) @@ -3129,6 +3143,7 @@ install-grpc-cli: grpc_cli install-pkg-config_c: pc_c pc_c_unsecure $(E) "[INSTALL] Installing C pkg-config files" $(Q) $(INSTALL) -d $(prefix)/lib/pkgconfig + $(Q) $(INSTALL) -m 0644 $(LIBDIR)/$(CONFIG)/pkgconfig/gpr.pc $(prefix)/lib/pkgconfig/gpr.pc $(Q) $(INSTALL) -m 0644 $(LIBDIR)/$(CONFIG)/pkgconfig/grpc.pc $(prefix)/lib/pkgconfig/grpc.pc $(Q) $(INSTALL) -m 0644 $(LIBDIR)/$(CONFIG)/pkgconfig/grpc_unsecure.pc $(prefix)/lib/pkgconfig/grpc_unsecure.pc @@ -70,6 +70,7 @@ EXPORTS grpc_resource_quota_ref grpc_resource_quota_unref grpc_resource_quota_resize + grpc_resource_quota_set_max_threads grpc_resource_quota_arg_vtable grpc_channelz_get_top_channels grpc_channelz_get_channel diff --git a/include/grpc/grpc.h b/include/grpc/grpc.h index 4c3af45100..897b89851a 100644 --- a/include/grpc/grpc.h +++ b/include/grpc/grpc.h @@ -466,6 +466,10 @@ GRPCAPI void grpc_resource_quota_unref(grpc_resource_quota* resource_quota); GRPCAPI void grpc_resource_quota_resize(grpc_resource_quota* resource_quota, size_t new_size); +/** Update the size of the maximum number of threads allowed */ +GRPCAPI void grpc_resource_quota_set_max_threads( + grpc_resource_quota* resource_quota, int new_max_threads); + /** Fetch a vtable for a grpc_channel_arg that points to a grpc_resource_quota */ GRPCAPI const grpc_arg_pointer_vtable* grpc_resource_quota_arg_vtable(void); diff --git a/include/grpcpp/resource_quota.h b/include/grpcpp/resource_quota.h index 554437a40d..50bd1cb849 100644 --- a/include/grpcpp/resource_quota.h +++ b/include/grpcpp/resource_quota.h @@ -26,10 +26,10 @@ struct grpc_resource_quota; namespace grpc { -/// ResourceQuota represents a bound on memory usage by the gRPC library. -/// A ResourceQuota can be attached to a server (via \a ServerBuilder), +/// ResourceQuota represents a bound on memory and thread usage by the gRPC +/// library. A ResourceQuota can be attached to a server (via \a ServerBuilder), /// or a client channel (via \a ChannelArguments). -/// gRPC will attempt to keep memory used by all attached entities +/// gRPC will attempt to keep memory and threads used by all attached entities /// below the ResourceQuota bound. class ResourceQuota final : private GrpcLibraryCodegen { public: @@ -44,6 +44,16 @@ class ResourceQuota final : private GrpcLibraryCodegen { /// No time bound is given for this to occur however. ResourceQuota& Resize(size_t new_size); + /// Set the max number of threads that can be allocated from this + /// ResourceQuota object. + /// + /// If the new_max_threads value is smaller than the current value, no new + /// threads are allocated until the number of active threads fall below + /// new_max_threads. There is no time bound on when this may happen i.e none + /// of the current threads are forcefully destroyed and all threads run their + /// normal course. + ResourceQuota& SetMaxThreads(int new_max_threads); + grpc_resource_quota* c_resource_quota() const { return impl_; } private: diff --git a/include/grpcpp/server.h b/include/grpcpp/server.h index 81c3907f86..189d8bec22 100644 --- a/include/grpcpp/server.h +++ b/include/grpcpp/server.h @@ -120,6 +120,10 @@ class Server : public ServerInterface, private GrpcLibraryCodegen { int AddListeningPort(const grpc::string& addr, ServerCredentials* creds) override; + /// NOTE: This is *NOT* a public API. The server constructors are supposed to + /// be used by \a ServerBuilder class only. The constructor will be made + /// 'private' very soon. + /// /// Server constructors. To be used by \a ServerBuilder only. /// /// \param max_message_size Maximum message length that the channel can @@ -144,7 +148,8 @@ class Server : public ServerInterface, private GrpcLibraryCodegen { Server(int max_message_size, ChannelArguments* args, std::shared_ptr<std::vector<std::unique_ptr<ServerCompletionQueue>>> sync_server_cqs, - int min_pollers, int max_pollers, int sync_cq_timeout_msec); + int min_pollers, int max_pollers, int sync_cq_timeout_msec, + grpc_resource_quota* server_rq = nullptr); /// Start the server. /// diff --git a/src/core/ext/transport/chttp2/transport/chttp2_transport.cc b/src/core/ext/transport/chttp2/transport/chttp2_transport.cc index bc6fa0d0eb..36511fa608 100644 --- a/src/core/ext/transport/chttp2/transport/chttp2_transport.cc +++ b/src/core/ext/transport/chttp2/transport/chttp2_transport.cc @@ -812,6 +812,12 @@ static void set_write_state(grpc_chttp2_transport* t, write_state_name(t->write_state), write_state_name(st), reason)); t->write_state = st; + /* If the state is being reset back to idle, it means a write was just + * finished. Make sure all the run_after_write closures are scheduled. + * + * This is also our chance to close the transport if the transport was marked + * to be closed after all writes finish (for example, if we received a go-away + * from peer while we had some pending writes) */ if (st == GRPC_CHTTP2_WRITE_STATE_IDLE) { GRPC_CLOSURE_LIST_SCHED(&t->run_after_write); if (t->close_transport_on_writes_finished != nullptr) { @@ -899,6 +905,22 @@ void grpc_chttp2_initiate_write(grpc_chttp2_transport* t, grpc_chttp2_initiate_write_reason_string(reason)); t->is_first_write_in_batch = true; GRPC_CHTTP2_REF_TRANSPORT(t, "writing"); + /* Note that the 'write_action_begin_locked' closure is being scheduled + * on the 'finally_scheduler' of t->combiner. This means that + * 'write_action_begin_locked' is called only *after* all the other + * closures (some of which are potentially initiating more writes on the + * transport) are executed on the t->combiner. + * + * The reason for scheduling on finally_scheduler is to make sure we batch + * as many writes as possible. 'write_action_begin_locked' is the function + * that gathers all the relevant bytes (which are at various places in the + * grpc_chttp2_transport structure) and append them to 'outbuf' field in + * grpc_chttp2_transport thereby batching what would have been potentially + * multiple write operations. + * + * Also, 'write_action_begin_locked' only gathers the bytes into outbuf. + * It does not call the endpoint to write the bytes. That is done by the + * 'write_action' (which is scheduled by 'write_action_begin_locked') */ GRPC_CLOSURE_SCHED( GRPC_CLOSURE_INIT(&t->write_action_begin_locked, write_action_begin_locked, t, @@ -1010,6 +1032,8 @@ static void write_action(void* gt, grpc_error* error) { grpc_combiner_scheduler(t->combiner))); } +/* Callback from the grpc_endpoint after bytes have been written by calling + * sendmsg */ static void write_action_end_locked(void* tp, grpc_error* error) { GPR_TIMER_SCOPE("terminate_writing_with_lock", 0); grpc_chttp2_transport* t = static_cast<grpc_chttp2_transport*>(tp); diff --git a/src/core/ext/transport/chttp2/transport/flow_control.cc b/src/core/ext/transport/chttp2/transport/flow_control.cc index 5f3dd98461..53932bcb7f 100644 --- a/src/core/ext/transport/chttp2/transport/flow_control.cc +++ b/src/core/ext/transport/chttp2/transport/flow_control.cc @@ -40,6 +40,7 @@ namespace chttp2 { namespace { static constexpr const int kTracePadding = 30; +static constexpr const uint32_t kMaxWindowUpdateSize = (1u << 31) - 1; static char* fmt_int64_diff_str(int64_t old_val, int64_t new_val) { char* str; @@ -193,7 +194,7 @@ uint32_t TransportFlowControl::MaybeSendUpdate(bool writing_anyway) { if ((writing_anyway || announced_window_ <= target_announced_window / 2) && announced_window_ != target_announced_window) { const uint32_t announce = static_cast<uint32_t> GPR_CLAMP( - target_announced_window - announced_window_, 0, UINT32_MAX); + target_announced_window - announced_window_, 0, kMaxWindowUpdateSize); announced_window_ += announce; return announce; } @@ -267,7 +268,7 @@ uint32_t StreamFlowControl::MaybeSendUpdate() { FlowControlTrace trace("s updt sent", tfc_, this); if (local_window_delta_ > announced_window_delta_) { uint32_t announce = static_cast<uint32_t> GPR_CLAMP( - local_window_delta_ - announced_window_delta_, 0, UINT32_MAX); + local_window_delta_ - announced_window_delta_, 0, kMaxWindowUpdateSize); UpdateAnnouncedWindowDelta(tfc_, announce); return announce; } diff --git a/src/core/lib/iomgr/resource_quota.cc b/src/core/lib/iomgr/resource_quota.cc index 539bc120ce..b6fc7579f7 100644 --- a/src/core/lib/iomgr/resource_quota.cc +++ b/src/core/lib/iomgr/resource_quota.cc @@ -96,6 +96,9 @@ struct grpc_resource_user { list, false otherwise */ bool added_to_free_pool; + /* The number of threads currently allocated to this resource user */ + gpr_atm num_threads_allocated; + /* Reclaimers: index 0 is the benign reclaimer, 1 is the destructive reclaimer */ grpc_closure* reclaimers[2]; @@ -135,12 +138,33 @@ struct grpc_resource_quota { gpr_atm last_size; + /* Mutex to protect max_threads and num_threads_allocated */ + /* Note: We could have used gpr_atm for max_threads and num_threads_allocated + * and avoid having this mutex; but in that case, each invocation of the + * function grpc_resource_user_allocate_threads() would have had to do at + * least two atomic loads (for max_threads and num_threads_allocated) followed + * by a CAS (on num_threads_allocated). + * Moreover, we expect grpc_resource_user_allocate_threads() to be often + * called concurrently thereby increasing the chances of failing the CAS + * operation. This additional complexity is not worth the tiny perf gain we + * may (or may not) have by using atomics */ + gpr_mu thread_count_mu; + + /* Max number of threads allowed */ + int max_threads; + + /* Number of threads currently allocated via this resource_quota object */ + int num_threads_allocated; + /* Has rq_step been scheduled to occur? */ bool step_scheduled; + /* Are we currently reclaiming memory */ bool reclaiming; + /* Closure around rq_step */ grpc_closure rq_step_closure; + /* Closure around rq_reclamation_done */ grpc_closure rq_reclamation_done_closure; @@ -524,6 +548,11 @@ static void ru_shutdown(void* ru, grpc_error* error) { static void ru_destroy(void* ru, grpc_error* error) { grpc_resource_user* resource_user = static_cast<grpc_resource_user*>(ru); GPR_ASSERT(gpr_atm_no_barrier_load(&resource_user->refs) == 0); + // Free all the remaining thread quota + grpc_resource_user_free_threads(resource_user, + static_cast<int>(gpr_atm_no_barrier_load( + &resource_user->num_threads_allocated))); + for (int i = 0; i < GRPC_RULIST_COUNT; i++) { rulist_remove(resource_user, static_cast<grpc_rulist>(i)); } @@ -594,6 +623,9 @@ grpc_resource_quota* grpc_resource_quota_create(const char* name) { resource_quota->free_pool = INT64_MAX; resource_quota->size = INT64_MAX; gpr_atm_no_barrier_store(&resource_quota->last_size, GPR_ATM_MAX); + gpr_mu_init(&resource_quota->thread_count_mu); + resource_quota->max_threads = INT_MAX; + resource_quota->num_threads_allocated = 0; resource_quota->step_scheduled = false; resource_quota->reclaiming = false; gpr_atm_no_barrier_store(&resource_quota->memory_usage_estimation, 0); @@ -616,6 +648,8 @@ grpc_resource_quota* grpc_resource_quota_create(const char* name) { void grpc_resource_quota_unref_internal(grpc_resource_quota* resource_quota) { if (gpr_unref(&resource_quota->refs)) { + // No outstanding thread quota + GPR_ASSERT(resource_quota->num_threads_allocated == 0); GRPC_COMBINER_UNREF(resource_quota->combiner, "resource_quota"); gpr_free(resource_quota->name); gpr_free(resource_quota); @@ -647,6 +681,15 @@ double grpc_resource_quota_get_memory_pressure( } /* Public API */ +void grpc_resource_quota_set_max_threads(grpc_resource_quota* resource_quota, + int new_max_threads) { + GPR_ASSERT(new_max_threads >= 0); + gpr_mu_lock(&resource_quota->thread_count_mu); + resource_quota->max_threads = new_max_threads; + gpr_mu_unlock(&resource_quota->thread_count_mu); +} + +/* Public API */ void grpc_resource_quota_resize(grpc_resource_quota* resource_quota, size_t size) { grpc_core::ExecCtx exec_ctx; @@ -731,6 +774,7 @@ grpc_resource_user* grpc_resource_user_create( grpc_closure_list_init(&resource_user->on_allocated); resource_user->allocating = false; resource_user->added_to_free_pool = false; + gpr_atm_no_barrier_store(&resource_user->num_threads_allocated, 0); resource_user->reclaimers[0] = nullptr; resource_user->reclaimers[1] = nullptr; resource_user->new_reclaimers[0] = nullptr; @@ -785,6 +829,40 @@ void grpc_resource_user_shutdown(grpc_resource_user* resource_user) { } } +bool grpc_resource_user_allocate_threads(grpc_resource_user* resource_user, + int thread_count) { + GPR_ASSERT(thread_count >= 0); + bool is_success = false; + gpr_mu_lock(&resource_user->resource_quota->thread_count_mu); + grpc_resource_quota* rq = resource_user->resource_quota; + if (rq->num_threads_allocated + thread_count <= rq->max_threads) { + rq->num_threads_allocated += thread_count; + gpr_atm_no_barrier_fetch_add(&resource_user->num_threads_allocated, + thread_count); + is_success = true; + } + gpr_mu_unlock(&resource_user->resource_quota->thread_count_mu); + return is_success; +} + +void grpc_resource_user_free_threads(grpc_resource_user* resource_user, + int thread_count) { + GPR_ASSERT(thread_count >= 0); + gpr_mu_lock(&resource_user->resource_quota->thread_count_mu); + grpc_resource_quota* rq = resource_user->resource_quota; + rq->num_threads_allocated -= thread_count; + int old_count = static_cast<int>(gpr_atm_no_barrier_fetch_add( + &resource_user->num_threads_allocated, -thread_count)); + if (old_count < thread_count || rq->num_threads_allocated < 0) { + gpr_log(GPR_ERROR, + "Releasing more threads (%d) than currently allocated (rq threads: " + "%d, ru threads: %d)", + thread_count, rq->num_threads_allocated + thread_count, old_count); + abort(); + } + gpr_mu_unlock(&resource_user->resource_quota->thread_count_mu); +} + void grpc_resource_user_alloc(grpc_resource_user* resource_user, size_t size, grpc_closure* optional_on_done) { gpr_mu_lock(&resource_user->mu); diff --git a/src/core/lib/iomgr/resource_quota.h b/src/core/lib/iomgr/resource_quota.h index 937daf8728..7b0ed7417a 100644 --- a/src/core/lib/iomgr/resource_quota.h +++ b/src/core/lib/iomgr/resource_quota.h @@ -93,6 +93,22 @@ void grpc_resource_user_ref(grpc_resource_user* resource_user); void grpc_resource_user_unref(grpc_resource_user* resource_user); void grpc_resource_user_shutdown(grpc_resource_user* resource_user); +/* Attempts to get quota from the resource_user to create 'thread_count' number + * of threads. Returns true if successful (i.e the caller is now free to create + * 'thread_count' number of threads) or false if quota is not available */ +bool grpc_resource_user_allocate_threads(grpc_resource_user* resource_user, + int thread_count); +/* Releases 'thread_count' worth of quota back to the resource user. The quota + * should have been previously obtained successfully by calling + * grpc_resource_user_allocate_threads(). + * + * Note: There need not be an exact one-to-one correspondence between + * grpc_resource_user_allocate_threads() and grpc_resource_user_free_threads() + * calls. The only requirement is that the number of threads allocated should + * all be eventually released */ +void grpc_resource_user_free_threads(grpc_resource_user* resource_user, + int thread_count); + /* Allocate from the resource user (and its quota). If optional_on_done is NULL, then allocate immediately. This may push the quota over-limit, at which point reclamation will kick in. diff --git a/src/cpp/common/resource_quota_cc.cc b/src/cpp/common/resource_quota_cc.cc index daeb0ba171..276e5f7954 100644 --- a/src/cpp/common/resource_quota_cc.cc +++ b/src/cpp/common/resource_quota_cc.cc @@ -33,4 +33,8 @@ ResourceQuota& ResourceQuota::Resize(size_t new_size) { return *this; } +ResourceQuota& ResourceQuota::SetMaxThreads(int new_max_threads) { + grpc_resource_quota_set_max_threads(impl_, new_max_threads); + return *this; +} } // namespace grpc diff --git a/src/cpp/server/server_builder.cc b/src/cpp/server/server_builder.cc index e0b9b7a62b..8417c45e64 100644 --- a/src/cpp/server/server_builder.cc +++ b/src/cpp/server/server_builder.cc @@ -263,7 +263,7 @@ std::unique_ptr<Server> ServerBuilder::BuildAndStart() { std::unique_ptr<Server> server(new Server( max_receive_message_size_, &args, sync_server_cqs, sync_server_settings_.min_pollers, sync_server_settings_.max_pollers, - sync_server_settings_.cq_timeout_msec)); + sync_server_settings_.cq_timeout_msec, resource_quota_)); if (has_sync_methods) { // This is a Sync server diff --git a/src/cpp/server/server_cc.cc b/src/cpp/server/server_cc.cc index 0d77510e29..d32d6b4904 100644 --- a/src/cpp/server/server_cc.cc +++ b/src/cpp/server/server_cc.cc @@ -47,6 +47,12 @@ namespace grpc { namespace { +// The default value for maximum number of threads that can be created in the +// sync server. This value of 500 is empirically chosen. To increase the max +// number of threads in a sync server, pass a custom ResourceQuota object (with +// the desired number of max-threads set) to the server builder +#define DEFAULT_MAX_SYNC_SERVER_THREADS 500 + class DefaultGlobalCallbacks final : public Server::GlobalCallbacks { public: ~DefaultGlobalCallbacks() override {} @@ -266,9 +272,9 @@ class Server::SyncRequestThreadManager : public ThreadManager { public: SyncRequestThreadManager(Server* server, CompletionQueue* server_cq, std::shared_ptr<GlobalCallbacks> global_callbacks, - int min_pollers, int max_pollers, - int cq_timeout_msec) - : ThreadManager(min_pollers, max_pollers), + grpc_resource_quota* rq, int min_pollers, + int max_pollers, int cq_timeout_msec) + : ThreadManager("SyncServer", rq, min_pollers, max_pollers), server_(server), server_cq_(server_cq), cq_timeout_msec_(cq_timeout_msec), @@ -376,7 +382,8 @@ Server::Server( int max_receive_message_size, ChannelArguments* args, std::shared_ptr<std::vector<std::unique_ptr<ServerCompletionQueue>>> sync_server_cqs, - int min_pollers, int max_pollers, int sync_cq_timeout_msec) + int min_pollers, int max_pollers, int sync_cq_timeout_msec, + grpc_resource_quota* server_rq) : max_receive_message_size_(max_receive_message_size), sync_server_cqs_(std::move(sync_server_cqs)), started_(false), @@ -392,10 +399,22 @@ Server::Server( global_callbacks_->UpdateArguments(args); if (sync_server_cqs_ != nullptr) { + bool default_rq_created = false; + if (server_rq == nullptr) { + server_rq = grpc_resource_quota_create("SyncServer-default-rq"); + grpc_resource_quota_set_max_threads(server_rq, + DEFAULT_MAX_SYNC_SERVER_THREADS); + default_rq_created = true; + } + for (const auto& it : *sync_server_cqs_) { sync_req_mgrs_.emplace_back(new SyncRequestThreadManager( - this, it.get(), global_callbacks_, min_pollers, max_pollers, - sync_cq_timeout_msec)); + this, it.get(), global_callbacks_, server_rq, min_pollers, + max_pollers, sync_cq_timeout_msec)); + } + + if (default_rq_created) { + grpc_resource_quota_unref(server_rq); } } diff --git a/src/cpp/thread_manager/thread_manager.cc b/src/cpp/thread_manager/thread_manager.cc index 02ac56a3fd..fa9eec5f9b 100644 --- a/src/cpp/thread_manager/thread_manager.cc +++ b/src/cpp/thread_manager/thread_manager.cc @@ -22,8 +22,8 @@ #include <mutex> #include <grpc/support/log.h> - #include "src/core/lib/gprpp/thd.h" +#include "src/core/lib/iomgr/exec_ctx.h" namespace grpc { @@ -48,12 +48,17 @@ ThreadManager::WorkerThread::~WorkerThread() { thd_.Join(); } -ThreadManager::ThreadManager(int min_pollers, int max_pollers) +ThreadManager::ThreadManager(const char* name, + grpc_resource_quota* resource_quota, + int min_pollers, int max_pollers) : shutdown_(false), num_pollers_(0), min_pollers_(min_pollers), max_pollers_(max_pollers == -1 ? INT_MAX : max_pollers), - num_threads_(0) {} + num_threads_(0), + max_active_threads_sofar_(0) { + resource_user_ = grpc_resource_user_create(resource_quota, name); +} ThreadManager::~ThreadManager() { { @@ -61,6 +66,8 @@ ThreadManager::~ThreadManager() { GPR_ASSERT(num_threads_ == 0); } + grpc_core::ExecCtx exec_ctx; // grpc_resource_user_unref needs an exec_ctx + grpc_resource_user_unref(resource_user_); CleanupCompletedThreads(); } @@ -81,17 +88,27 @@ bool ThreadManager::IsShutdown() { return shutdown_; } +int ThreadManager::GetMaxActiveThreadsSoFar() { + std::lock_guard<std::mutex> list_lock(list_mu_); + return max_active_threads_sofar_; +} + void ThreadManager::MarkAsCompleted(WorkerThread* thd) { { std::lock_guard<std::mutex> list_lock(list_mu_); completed_threads_.push_back(thd); } - std::lock_guard<std::mutex> lock(mu_); - num_threads_--; - if (num_threads_ == 0) { - shutdown_cv_.notify_one(); + { + std::lock_guard<std::mutex> lock(mu_); + num_threads_--; + if (num_threads_ == 0) { + shutdown_cv_.notify_one(); + } } + + // Give a thread back to the resource quota + grpc_resource_user_free_threads(resource_user_, 1); } void ThreadManager::CleanupCompletedThreads() { @@ -106,14 +123,22 @@ void ThreadManager::CleanupCompletedThreads() { } void ThreadManager::Initialize() { + if (!grpc_resource_user_allocate_threads(resource_user_, min_pollers_)) { + gpr_log(GPR_ERROR, + "No thread quota available to even create the minimum required " + "polling threads (i.e %d). Unable to start the thread manager", + min_pollers_); + abort(); + } + { std::unique_lock<std::mutex> lock(mu_); num_pollers_ = min_pollers_; num_threads_ = min_pollers_; + max_active_threads_sofar_ = min_pollers_; } for (int i = 0; i < min_pollers_; i++) { - // Create a new thread (which ends up calling the MainWorkLoop() function new WorkerThread(this); } } @@ -139,11 +164,15 @@ void ThreadManager::MainWorkLoop() { done = true; break; case WORK_FOUND: - // If we got work and there are now insufficient pollers, start a new - // one - if (!shutdown_ && num_pollers_ < min_pollers_) { + // If we got work and there are now insufficient pollers and there is + // quota available to create a new thread, start a new poller thread + if (!shutdown_ && num_pollers_ < min_pollers_ && + grpc_resource_user_allocate_threads(resource_user_, 1)) { num_pollers_++; num_threads_++; + if (num_threads_ > max_active_threads_sofar_) { + max_active_threads_sofar_ = num_threads_; + } // Drop lock before spawning thread to avoid contention lock.unlock(); new WorkerThread(this); @@ -196,6 +225,8 @@ void ThreadManager::MainWorkLoop() { } }; + // This thread is exiting. Do some cleanup work i.e delete already completed + // worker threads CleanupCompletedThreads(); // If we are here, either ThreadManager is shutting down or it already has diff --git a/src/cpp/thread_manager/thread_manager.h b/src/cpp/thread_manager/thread_manager.h index 5a40f2de47..01043edb31 100644 --- a/src/cpp/thread_manager/thread_manager.h +++ b/src/cpp/thread_manager/thread_manager.h @@ -27,12 +27,14 @@ #include <grpcpp/support/config.h> #include "src/core/lib/gprpp/thd.h" +#include "src/core/lib/iomgr/resource_quota.h" namespace grpc { class ThreadManager { public: - explicit ThreadManager(int min_pollers, int max_pollers); + explicit ThreadManager(const char* name, grpc_resource_quota* resource_quota, + int min_pollers, int max_pollers); virtual ~ThreadManager(); // Initializes and Starts the Rpc Manager threads @@ -84,6 +86,11 @@ class ThreadManager { // all the threads have drained all the outstanding work virtual void Wait(); + // Max number of concurrent threads that were ever active in this thread + // manager so far. This is useful for debugging purposes (and in unit tests) + // to check if resource_quota is properly being enforced. + int GetMaxActiveThreadsSoFar(); + private: // Helper wrapper class around grpc_core::Thread. Takes a ThreadManager object // and starts a new grpc_core::Thread to calls the Run() function. @@ -91,6 +98,24 @@ class ThreadManager { // The Run() function calls ThreadManager::MainWorkLoop() function and once // that completes, it marks the WorkerThread completed by calling // ThreadManager::MarkAsCompleted() + // + // WHY IS THIS NEEDED?: + // When a thread terminates, some other thread *must* call Join() on that + // thread so that the resources are released. Having a WorkerThread wrapper + // will make this easier. Once Run() completes, each thread calls the + // following two functions: + // ThreadManager::CleanupCompletedThreads() + // ThreadManager::MarkAsCompleted() + // + // - MarkAsCompleted() puts the WorkerThread object in the ThreadManger's + // completed_threads_ list + // - CleanupCompletedThreads() calls "Join()" on the threads that are already + // in the completed_threads_ list (since a thread cannot call Join() on + // itself, it calls CleanupCompletedThreads() *before* calling + // MarkAsCompleted()) + // + // TODO(sreek): Consider creating the threads 'detached' so that Join() need + // not be called (and the need for this WorkerThread class is eliminated) class WorkerThread { public: WorkerThread(ThreadManager* thd_mgr); @@ -111,13 +136,21 @@ class ThreadManager { void MarkAsCompleted(WorkerThread* thd); void CleanupCompletedThreads(); - // Protects shutdown_, num_pollers_ and num_threads_ - // TODO: sreek - Change num_pollers and num_threads_ to atomics + // Protects shutdown_, num_pollers_, num_threads_ and + // max_active_threads_sofar_ std::mutex mu_; bool shutdown_; std::condition_variable shutdown_cv_; + // The resource user object to use when requesting quota to create threads + // + // Note: The user of this ThreadManager object must create grpc_resource_quota + // object (that contains the actual max thread quota) and a grpc_resource_user + // object through which quota is requested whenver new threads need to be + // created + grpc_resource_user* resource_user_; + // Number of threads doing polling int num_pollers_; @@ -125,10 +158,15 @@ class ThreadManager { int min_pollers_; int max_pollers_; - // The total number of threads (includes threads includes the threads that are - // currently polling i.e num_pollers_) + // The total number of threads currently active (includes threads includes the + // threads that are currently polling i.e num_pollers_) int num_threads_; + // See GetMaxActiveThreadsSoFar()'s description. + // To be more specific, this variable tracks the max value num_threads_ was + // ever set so far + int max_active_threads_sofar_; + std::mutex list_mu_; std::list<WorkerThread*> completed_threads_; }; diff --git a/src/ruby/ext/grpc/rb_grpc_imports.generated.c b/src/ruby/ext/grpc/rb_grpc_imports.generated.c index 8a2edc41f8..37b97aabd4 100644 --- a/src/ruby/ext/grpc/rb_grpc_imports.generated.c +++ b/src/ruby/ext/grpc/rb_grpc_imports.generated.c @@ -93,6 +93,7 @@ grpc_resource_quota_create_type grpc_resource_quota_create_import; grpc_resource_quota_ref_type grpc_resource_quota_ref_import; grpc_resource_quota_unref_type grpc_resource_quota_unref_import; grpc_resource_quota_resize_type grpc_resource_quota_resize_import; +grpc_resource_quota_set_max_threads_type grpc_resource_quota_set_max_threads_import; grpc_resource_quota_arg_vtable_type grpc_resource_quota_arg_vtable_import; grpc_channelz_get_top_channels_type grpc_channelz_get_top_channels_import; grpc_channelz_get_channel_type grpc_channelz_get_channel_import; @@ -345,6 +346,7 @@ void grpc_rb_load_imports(HMODULE library) { grpc_resource_quota_ref_import = (grpc_resource_quota_ref_type) GetProcAddress(library, "grpc_resource_quota_ref"); grpc_resource_quota_unref_import = (grpc_resource_quota_unref_type) GetProcAddress(library, "grpc_resource_quota_unref"); grpc_resource_quota_resize_import = (grpc_resource_quota_resize_type) GetProcAddress(library, "grpc_resource_quota_resize"); + grpc_resource_quota_set_max_threads_import = (grpc_resource_quota_set_max_threads_type) GetProcAddress(library, "grpc_resource_quota_set_max_threads"); grpc_resource_quota_arg_vtable_import = (grpc_resource_quota_arg_vtable_type) GetProcAddress(library, "grpc_resource_quota_arg_vtable"); grpc_channelz_get_top_channels_import = (grpc_channelz_get_top_channels_type) GetProcAddress(library, "grpc_channelz_get_top_channels"); grpc_channelz_get_channel_import = (grpc_channelz_get_channel_type) GetProcAddress(library, "grpc_channelz_get_channel"); diff --git a/src/ruby/ext/grpc/rb_grpc_imports.generated.h b/src/ruby/ext/grpc/rb_grpc_imports.generated.h index 5a7884cdcd..f7a00046e3 100644 --- a/src/ruby/ext/grpc/rb_grpc_imports.generated.h +++ b/src/ruby/ext/grpc/rb_grpc_imports.generated.h @@ -254,6 +254,9 @@ extern grpc_resource_quota_unref_type grpc_resource_quota_unref_import; typedef void(*grpc_resource_quota_resize_type)(grpc_resource_quota* resource_quota, size_t new_size); extern grpc_resource_quota_resize_type grpc_resource_quota_resize_import; #define grpc_resource_quota_resize grpc_resource_quota_resize_import +typedef void(*grpc_resource_quota_set_max_threads_type)(grpc_resource_quota* resource_quota, int new_max_threads); +extern grpc_resource_quota_set_max_threads_type grpc_resource_quota_set_max_threads_import; +#define grpc_resource_quota_set_max_threads grpc_resource_quota_set_max_threads_import typedef const grpc_arg_pointer_vtable*(*grpc_resource_quota_arg_vtable_type)(void); extern grpc_resource_quota_arg_vtable_type grpc_resource_quota_arg_vtable_import; #define grpc_resource_quota_arg_vtable grpc_resource_quota_arg_vtable_import diff --git a/summerofcode/2018/naresh.md b/summerofcode/2018/naresh.md new file mode 100644 index 0000000000..0d196bd600 --- /dev/null +++ b/summerofcode/2018/naresh.md @@ -0,0 +1,191 @@ +# Project overview + +## Title + +Enable Building of gRPC Python with Bazel + +## Overview + +gRPC Python currently has a constellation of scripts written to build the +project, but it has a lot of limitations in terms of speed and maintainability. +[Bazel](https://bazel.build/) is the open-sourced variant of Google's internal +system, Blaze, which is an ideal replacement for building such projects in a +fast and declarative fashion. But Bazel in itself is still in active +development, especially in terms of Python (amongst a few other languages). + +The project aimed to fill this gap and build gRPC Python with Bazel. + +[Project page](https://summerofcode.withgoogle.com/projects/#6482576244473856) + +[Link to proposal](https://storage.googleapis.com/summerofcode-prod.appspot.com/gsoc/core_project/doc/5316764725411840_1522049732_Naresh_Ramesh_-_GSoC_proposal.pdf) + +## Thoughts and challenges + +### State of Bazel for Python + +Although previously speculated, the project didn't require any contributions +directly to [bazelbuild/bazel](https://github.com/bazelbuild/bazel). The Bazel +rules for Python are currently being separated out into their own repo at +[bazelbuild/rules_python](https://github.com/bazelbuild/rules_python/). + +Bazel is [still very much in active development for +Python](https://groups.google.com/forum/#!topic/bazel-sig-python/iQjV9sfSufw) +though. There's still challenges when it comes to building for Python 2 vs 3. +Using pip packages is still in experimental. Bazel Python support is currently +distributed across these two repositories and is yet to begin migration to one +place (which will be +[bazelbuild/rules_python](https://github.com/bazelbuild/rules_python/)). + +Bazel's roadmap for Python is publicly available [here as a Google +doc](https://docs.google.com/document/d/1A6J3j3y1SQ0HliS86_mZBnB5UeBe7vExWL2Ryd_EONI/edit). + +### Cross collaboration between projects + +Cross contribution surprisingly came up because of building protobuf sources +for Python, which is still not natively supported by Bazel. An existing +repository, [pubref/rules_protobuf](https://github.com/pubref/rules_protobuf), +which was maintained by an independent maintainer (i.e. not a part of Bazel) +helped solve this problem, but had [one major blocking +issue](https://github.com/pubref/rules_protobuf/issues/233) and could not be +resolved at the source. But [a solution to the +issue](https://github.com/pubref/rules_protobuf/pull/196) was proposed by user +dududko, which was not merged because of failing golang tests but worked well +for Python. Hence, a fork of this repo was made and is to be used with gRPC +until the solution can be merged back at the source. + +### Building Cython code + +Building Cython code is still not supported by Bazel, but the team at +[cython/cython](https://github.com/cython/cython) have added support for Bazel +on their side. The way it works is by including Cython as a third-party Bazel +dependency and using custom Bazel rules for building our Cython code using the +binary within the dependency. + +### Packaging Python code using Bazel + +pip and PyPI still remain the de-facto standard for distributing Python +packages. Although Bazel is pretty versatile and is amazing for it's +reproducible and incremental build capabilities, these can only be still used +by the contributors and developers for building and testing the gRPC code. But +there's no way yet to build Python packages for distribution. + +### Building gRPC Python with Bazel on Kokoro (internal CI) + +Integration with the internal CI was one of the areas that highlighted how +simple Bazel can be to use. gRPC was already using a dockerized Bazel setup to +build some of it's core code (but not as the primary build setup). Adding a new +job on the internal CI ended up being as simple as creating a new shell script +to install the required dependencies (which were python-dev and Bazel) and a +new configuration file which pointed to the subdirectiory (src/python) under +which to look for targets and run the tests accordingly. + +### Handling imports in Python code + +When writing Python packages, imports in nested modules are typically made +relative to the package root. But because of the way Bazel works, these paths +wouldn't make sense from the Workspace root. So, the folks at Bazel have added +a nifty `imports` parameter to all the Python rules which lets us specify for +each target, which path to consider as the root. This parameter allows for +relative paths like `imports = ["../",]`. + +### Fetching Python headers for Cython code to use + +Cython code makes use of `Python.h`, which pulls in the Python API for C +extension modules to use, but it's location depending on the Python version and +operating system the code is building on. To make this easier, the folks at +Tensorflow wrote [repository rules for Python +autoconfiguration](https://github.com/tensorflow/tensorflow/tree/e447ae4759317156d31a9421290716f0ffbffcd8/third_party/py). +This has been [adapted with some some +modifications](https://github.com/grpc/grpc/pull/15992) for use in gRPC Python +as well. + +## How to use + +All the Bazel tests for gRPC Python can be run using a single command: + +```bash +bazel test --spawn_strategy=standalone --genrule_strategy=standalone //src/python/... +``` + +If any specific test is to be run, like say `LoggingPoolTest` (which is present +in +`src/python/grpcio_tests/tests/unit/framework/foundation/_logging_pool_test.py`), +the command to run would be: + +```bash +bazel test --spawn_strategy=standalone --genrule_strategy=standalone //src/python/grpcio_tests/tests/unit/framework/foundation:logging_pool_test +``` + +where, `logging_pool_test` is the name of the Bazel target for this test. + +Similarly, to run a particular method, use: + +```bash +bazel test --spawn_strategy=standalone --genrule_strategy=standalone //src/python/grpcio_tests/tests/unit/_rpc_test --test_arg=RPCTest.testUnrecognizedMethod +``` + +## Useful Bazel flags + +- Use `bazel build` with a `-s` flag to see the logs being printed out to + standard output while building. +- Similarly, use `bazel test` with a `--test_output=streamed` to see the the + test logs while testing. Something to know while using this flag is that all + tests will be run locally, without sharding, one at a time. + +## Contributions + +### Related to the project + +- [435c6f8](https://github.com/grpc/grpc/commit/435c6f8d1e53783ec049b3482445813afd8bc514) + Update grpc_gevent cython files to include .pxi +- [74426fd](https://github.com/grpc/grpc/commit/74426fd2164c51d6754732ebe372133c19ba718c) + Add gevent_util.h to grpc_base_c Bazel target +- [b6518af](https://github.com/grpc/grpc/commit/b6518afdd610f0115b42aee1ffc71520c6b0d6b1) + Upgrade Bazel to 0.15.0 +- [ebcf04d](https://github.com/grpc/grpc/commit/ebcf04d075333c42979536c5dd2091d363f67e5a) + Kokoro setup for building gRPC Python with Bazel +- [3af1aaa](https://github.com/grpc/grpc/commit/3af1aaadabf49bc6274711a11f81627c0f351a9a) + Basic setup to build gRPC Python with Bazel +- [11f199e](https://github.com/grpc/grpc/commit/11f199e34dc416a2bd8b56391b242a867bedade4) + Workspace changes to build gRPC Python with Bazel +- [848fd9d](https://github.com/grpc/grpc/commit/848fd9d75f6df10f00e8328ff052c0237b3002ab) + Minimal Bazel BUILD files for grpcio Python + +### Other contibutions + +- [89ce16b](https://github.com/grpc/grpc/commit/89ce16b6daaad4caeb1c9ba670c6c4b62ea1a93c) + Update Dockerfiles for python artifacts to use latest git version +- [32f7c48](https://github.com/grpc/grpc/commit/32f7c48dad71cac7af652bf994ab1dde3ddb0607) + Revert removals from python artifact dockerfiles +- [712eb9f](https://github.com/grpc/grpc/commit/712eb9ff91cde66af94e8381ec01ad512ed6d03c) + Make logging after success in jobset more apparent +- [c6e4372](https://github.com/grpc/grpc/commit/c6e4372f8a93bb0eb996b5f202465785422290f2) + Create README for gRPC Python reflection package +- [2e113ca](https://github.com/grpc/grpc/commit/2e113ca6b2cc31aa8a9687d40ee1bd759381654f) + Update logging in Python to use module-level logger + +### Pending PRs + +- BUILD files for all tests in + [tests.json](https://github.com/ghostwriternr/grpc/blob/70c8a58b2918a5369905e5a203d7ce7897b6207e/src/python/grpcio_tests/tests/tests.json). +- BUILD files for gRPC testing, gRPC health checking, gRPC reflection. +- (Yet to complete) BUILD files for grpcio_tools. One test depends on this. + +## Known issues + +- [grpc/grpc #16336](https://github.com/grpc/grpc/issues/16336) RuntimeError + for `_reconnect_test` Python unit test with Bazel +- Some tests in Bazel pass despite throwing an exception. Example: + `testAbortedStreamStream` in + `src/python/grpcio_tests/tests/unit/_metadata_code_details_test.py`. +- [#14557](https://github.com/grpc/grpc/pull/14557) introduced a minor bug + where the module level loggers don't initialize a default logging handler. +- Sanity test doesn't make sense in the context of Bazel, and thus fails. +- There are some issues with Python2 vs Python3. Specifically, + - On some machines, “cygrpc.so: undefined symbol: _Py_FalseStruct” error + shows up. This is because of incorrect Python version being used to build + Cython. + - Some external packages like enum34 throw errors when used with Python 3 and + some extra packages are currently installed as Python version in current + build scripts. For now, the extra packages are added to a + `requirements.bazel.txt` file in the repository root. diff --git a/templates/Makefile.template b/templates/Makefile.template index 50b81e5f9f..2e3d75d819 100644 --- a/templates/Makefile.template +++ b/templates/Makefile.template @@ -668,11 +668,20 @@ LDLIBS_SECURE += $(addprefix -l, $(LIBS_SECURE)) endif + # gpr .pc file + PC_NAME = gpr + PC_DESCRIPTION = gRPC platform support library + PC_CFLAGS = + PC_REQUIRES_PRIVATE = $(PC_REQUIRES_GPR) + PC_LIBS_PRIVATE = $(PC_LIBS_GPR) + PC_LIB = -lgpr + GPR_PC_FILE := $(CORE_PC_TEMPLATE) + # grpc .pc file PC_NAME = gRPC PC_DESCRIPTION = high performance general RPC framework PC_CFLAGS = - PC_REQUIRES_PRIVATE = $(PC_REQUIRES_GRPC) $(PC_REQUIRES_SECURE) + PC_REQUIRES_PRIVATE = gpr $(PC_REQUIRES_GRPC) $(PC_REQUIRES_SECURE) PC_LIBS_PRIVATE = $(PC_LIBS_GRPC) $(PC_LIBS_SECURE) PC_LIB = -lgrpc GRPC_PC_FILE := $(CORE_PC_TEMPLATE) @@ -681,7 +690,7 @@ PC_NAME = gRPC unsecure PC_DESCRIPTION = high performance general RPC framework without SSL PC_CFLAGS = - PC_REQUIRES_PRIVATE = $(PC_REQUIRES_GRPC) + PC_REQUIRES_PRIVATE = gpr $(PC_REQUIRES_GRPC) PC_LIBS_PRIVATE = $(PC_LIBS_GRPC) PC_LIB = -lgrpc GRPC_UNSECURE_PC_FILE := $(CORE_PC_TEMPLATE) @@ -976,9 +985,9 @@ % endif % endfor - pc_c: $(LIBDIR)/$(CONFIG)/pkgconfig/grpc.pc + pc_c: $(LIBDIR)/$(CONFIG)/pkgconfig/grpc.pc $(LIBDIR)/$(CONFIG)/pkgconfig/gpr.pc - pc_c_unsecure: $(LIBDIR)/$(CONFIG)/pkgconfig/grpc_unsecure.pc + pc_c_unsecure: $(LIBDIR)/$(CONFIG)/pkgconfig/grpc_unsecure.pc $(LIBDIR)/$(CONFIG)/pkgconfig/gpr.pc pc_cxx: $(LIBDIR)/$(CONFIG)/pkgconfig/grpc++.pc @@ -1199,6 +1208,11 @@ $(E) "[MAKE] Generating $@" $(Q) echo "$(CACHE_MK)" | tr , '\n' >$@ + $(LIBDIR)/$(CONFIG)/pkgconfig/gpr.pc: + $(E) "[MAKE] Generating $@" + $(Q) mkdir -p $(@D) + $(Q) echo "$(GPR_PC_FILE)" | tr , '\n' >$@ + $(LIBDIR)/$(CONFIG)/pkgconfig/grpc.pc: $(E) "[MAKE] Generating $@" $(Q) mkdir -p $(@D) @@ -1397,6 +1411,7 @@ install-pkg-config_c: pc_c pc_c_unsecure $(E) "[INSTALL] Installing C pkg-config files" $(Q) $(INSTALL) -d $(prefix)/lib/pkgconfig + $(Q) $(INSTALL) -m 0644 $(LIBDIR)/$(CONFIG)/pkgconfig/gpr.pc $(prefix)/lib/pkgconfig/gpr.pc $(Q) $(INSTALL) -m 0644 $(LIBDIR)/$(CONFIG)/pkgconfig/grpc.pc $(prefix)/lib/pkgconfig/grpc.pc $(Q) $(INSTALL) -m 0644 $(LIBDIR)/$(CONFIG)/pkgconfig/grpc_unsecure.pc $(prefix)/lib/pkgconfig/grpc_unsecure.pc diff --git a/test/core/iomgr/resource_quota_test.cc b/test/core/iomgr/resource_quota_test.cc index 059ff7b5f8..f3b35fed32 100644 --- a/test/core/iomgr/resource_quota_test.cc +++ b/test/core/iomgr/resource_quota_test.cc @@ -798,6 +798,98 @@ static void test_negative_rq_free_pool(void) { } } +// Simple test to check resource quota thread limits +static void test_thread_limit() { + grpc_core::ExecCtx exec_ctx; + + grpc_resource_quota* rq = grpc_resource_quota_create("test_thread_limit"); + grpc_resource_user* ru1 = grpc_resource_user_create(rq, "ru1"); + grpc_resource_user* ru2 = grpc_resource_user_create(rq, "ru2"); + + // Max threads = 100 + grpc_resource_quota_set_max_threads(rq, 100); + + // Request quota for 100 threads (50 for ru1, 50 for ru2) + GPR_ASSERT(grpc_resource_user_allocate_threads(ru1, 10)); + GPR_ASSERT(grpc_resource_user_allocate_threads(ru2, 10)); + GPR_ASSERT(grpc_resource_user_allocate_threads(ru1, 40)); + GPR_ASSERT(grpc_resource_user_allocate_threads(ru2, 40)); + + // Threads exhausted. Next request must fail + GPR_ASSERT(!grpc_resource_user_allocate_threads(ru2, 20)); + + // Free 20 threads from two different users + grpc_resource_user_free_threads(ru1, 10); + grpc_resource_user_free_threads(ru2, 10); + + // Next request to 20 threads must succeed + GPR_ASSERT(grpc_resource_user_allocate_threads(ru2, 20)); + + // No more thread quota again + GPR_ASSERT(!grpc_resource_user_allocate_threads(ru1, 20)); + + // Free 10 more + grpc_resource_user_free_threads(ru1, 10); + + GPR_ASSERT(grpc_resource_user_allocate_threads(ru1, 5)); + GPR_ASSERT( + !grpc_resource_user_allocate_threads(ru2, 10)); // Only 5 available + GPR_ASSERT(grpc_resource_user_allocate_threads(ru2, 5)); + + // Teardown (ru1 and ru2 release all the quota back to rq) + grpc_resource_user_unref(ru1); + grpc_resource_user_unref(ru2); + grpc_resource_quota_unref(rq); +} + +// Change max quota in either direction dynamically +static void test_thread_maxquota_change() { + grpc_core::ExecCtx exec_ctx; + + grpc_resource_quota* rq = + grpc_resource_quota_create("test_thread_maxquota_change"); + grpc_resource_user* ru1 = grpc_resource_user_create(rq, "ru1"); + grpc_resource_user* ru2 = grpc_resource_user_create(rq, "ru2"); + + // Max threads = 100 + grpc_resource_quota_set_max_threads(rq, 100); + + // Request quota for 100 threads (50 for ru1, 50 for ru2) + GPR_ASSERT(grpc_resource_user_allocate_threads(ru1, 50)); + GPR_ASSERT(grpc_resource_user_allocate_threads(ru2, 50)); + + // Threads exhausted. Next request must fail + GPR_ASSERT(!grpc_resource_user_allocate_threads(ru2, 20)); + + // Increase maxquota and retry + // Max threads = 150; + grpc_resource_quota_set_max_threads(rq, 150); + GPR_ASSERT(grpc_resource_user_allocate_threads(ru2, 20)); // ru2=70, ru1=50 + + // Decrease maxquota (Note: Quota already given to ru1 and ru2 is unaffected) + // Max threads = 10; + grpc_resource_quota_set_max_threads(rq, 10); + + // New requests will fail until quota is available + GPR_ASSERT(!grpc_resource_user_allocate_threads(ru1, 10)); + + // Make quota available + grpc_resource_user_free_threads(ru1, 50); // ru1 now has 0 + GPR_ASSERT(!grpc_resource_user_allocate_threads(ru1, 10)); // not enough + + grpc_resource_user_free_threads(ru2, 70); // ru2 now has 0 + + // Now we can get quota up-to 10, the current max + GPR_ASSERT(grpc_resource_user_allocate_threads(ru2, 10)); + // No more thread quota again + GPR_ASSERT(!grpc_resource_user_allocate_threads(ru1, 10)); + + // Teardown (ru1 and ru2 release all the quota back to rq) + grpc_resource_user_unref(ru1); + grpc_resource_user_unref(ru2); + grpc_resource_quota_unref(rq); +} + int main(int argc, char** argv) { grpc_test_init(argc, argv); grpc_init(); @@ -827,6 +919,11 @@ int main(int argc, char** argv) { test_negative_rq_free_pool(); gpr_mu_destroy(&g_mu); gpr_cv_destroy(&g_cv); + + // Resource quota thread related + test_thread_limit(); + test_thread_maxquota_change(); + grpc_shutdown(); return 0; } diff --git a/test/core/surface/public_headers_must_be_c89.c b/test/core/surface/public_headers_must_be_c89.c index 69b3de16c4..42dc95b9bb 100644 --- a/test/core/surface/public_headers_must_be_c89.c +++ b/test/core/surface/public_headers_must_be_c89.c @@ -132,6 +132,7 @@ int main(int argc, char **argv) { printf("%lx", (unsigned long) grpc_resource_quota_ref); printf("%lx", (unsigned long) grpc_resource_quota_unref); printf("%lx", (unsigned long) grpc_resource_quota_resize); + printf("%lx", (unsigned long) grpc_resource_quota_set_max_threads); printf("%lx", (unsigned long) grpc_resource_quota_arg_vtable); printf("%lx", (unsigned long) grpc_channelz_get_top_channels); printf("%lx", (unsigned long) grpc_channelz_get_channel); diff --git a/test/cpp/thread_manager/thread_manager_test.cc b/test/cpp/thread_manager/thread_manager_test.cc index 7a95a9f17d..838f5f72ad 100644 --- a/test/cpp/thread_manager/thread_manager_test.cc +++ b/test/cpp/thread_manager/thread_manager_test.cc @@ -30,30 +30,44 @@ #include "test/cpp/util/test_config.h" namespace grpc { + +struct ThreadManagerTestSettings { + // The min number of pollers that SHOULD be active in ThreadManager + int min_pollers; + // The max number of pollers that could be active in ThreadManager + int max_pollers; + // The sleep duration in PollForWork() function to simulate "polling" + int poll_duration_ms; + // The sleep duration in DoWork() function to simulate "work" + int work_duration_ms; + // Max number of times PollForWork() is called before shutting down + int max_poll_calls; +}; + class ThreadManagerTest final : public grpc::ThreadManager { public: - ThreadManagerTest() - : ThreadManager(kMinPollers, kMaxPollers), + ThreadManagerTest(const char* name, grpc_resource_quota* rq, + const ThreadManagerTestSettings& settings) + : ThreadManager(name, rq, settings.min_pollers, settings.max_pollers), + settings_(settings), num_do_work_(0), num_poll_for_work_(0), num_work_found_(0) {} grpc::ThreadManager::WorkStatus PollForWork(void** tag, bool* ok) override; void DoWork(void* tag, bool ok) override; - void PerformTest(); + + // Get number of times PollForWork() returned WORK_FOUND + int GetNumWorkFound(); + // Get number of times DoWork() was called + int GetNumDoWork(); private: void SleepForMs(int sleep_time_ms); - static const int kMinPollers = 2; - static const int kMaxPollers = 10; - - static const int kPollingTimeoutMsec = 10; - static const int kDoWorkDurationMsec = 1; - - // PollForWork will return SHUTDOWN after these many number of invocations - static const int kMaxNumPollForWork = 50; + ThreadManagerTestSettings settings_; + // Counters gpr_atm num_do_work_; // Number of calls to DoWork gpr_atm num_poll_for_work_; // Number of calls to PollForWork gpr_atm num_work_found_; // Number of times WORK_FOUND was returned @@ -69,54 +83,117 @@ void ThreadManagerTest::SleepForMs(int duration_ms) { grpc::ThreadManager::WorkStatus ThreadManagerTest::PollForWork(void** tag, bool* ok) { int call_num = gpr_atm_no_barrier_fetch_add(&num_poll_for_work_, 1); - - if (call_num >= kMaxNumPollForWork) { + if (call_num >= settings_.max_poll_calls) { Shutdown(); return SHUTDOWN; } - // Simulate "polling for work" by sleeping for sometime - SleepForMs(kPollingTimeoutMsec); - + SleepForMs(settings_.poll_duration_ms); // Simulate "polling" duration *tag = nullptr; *ok = true; - // Return timeout roughly 1 out of every 3 calls + // Return timeout roughly 1 out of every 3 calls just to make the test a bit + // more interesting if (call_num % 3 == 0) { return TIMEOUT; - } else { - gpr_atm_no_barrier_fetch_add(&num_work_found_, 1); - return WORK_FOUND; } + + gpr_atm_no_barrier_fetch_add(&num_work_found_, 1); + return WORK_FOUND; } void ThreadManagerTest::DoWork(void* tag, bool ok) { gpr_atm_no_barrier_fetch_add(&num_do_work_, 1); - SleepForMs(kDoWorkDurationMsec); // Simulate doing work by sleeping + SleepForMs(settings_.work_duration_ms); // Simulate work by sleeping } -void ThreadManagerTest::PerformTest() { - // Initialize() starts the ThreadManager - Initialize(); - - // Wait for all the threads to gracefully terminate - Wait(); +int ThreadManagerTest::GetNumWorkFound() { + return static_cast<int>(gpr_atm_no_barrier_load(&num_work_found_)); +} - // The number of times DoWork() was called is equal to the number of times - // WORK_FOUND was returned - gpr_log(GPR_DEBUG, "DoWork() called %" PRIdPTR " times", - gpr_atm_no_barrier_load(&num_do_work_)); - GPR_ASSERT(gpr_atm_no_barrier_load(&num_do_work_) == - gpr_atm_no_barrier_load(&num_work_found_)); +int ThreadManagerTest::GetNumDoWork() { + return static_cast<int>(gpr_atm_no_barrier_load(&num_do_work_)); } } // namespace grpc +// Test that the number of times DoWork() is called is equal to the number of +// times PollForWork() returned WORK_FOUND +static void TestPollAndWork() { + grpc_resource_quota* rq = grpc_resource_quota_create("Test-poll-and-work"); + grpc::ThreadManagerTestSettings settings = { + 2 /* min_pollers */, 10 /* max_pollers */, 10 /* poll_duration_ms */, + 1 /* work_duration_ms */, 50 /* max_poll_calls */}; + + grpc::ThreadManagerTest test_thread_mgr("TestThreadManager", rq, settings); + grpc_resource_quota_unref(rq); + + test_thread_mgr.Initialize(); // Start the thread manager + test_thread_mgr.Wait(); // Wait for all threads to finish + + // Verify that The number of times DoWork() was called is equal to the number + // of times WORK_FOUND was returned + gpr_log(GPR_DEBUG, "DoWork() called %d times", + test_thread_mgr.GetNumDoWork()); + GPR_ASSERT(test_thread_mgr.GetNumDoWork() == + test_thread_mgr.GetNumWorkFound()); +} + +static void TestThreadQuota() { + const int kMaxNumThreads = 3; + grpc_resource_quota* rq = grpc_resource_quota_create("Test-thread-quota"); + grpc_resource_quota_set_max_threads(rq, kMaxNumThreads); + + // Set work_duration_ms to be much greater than poll_duration_ms. This way, + // the thread manager will be forced to create more 'polling' threads to + // honor the min_pollers guarantee + grpc::ThreadManagerTestSettings settings = { + 1 /* min_pollers */, 1 /* max_pollers */, 1 /* poll_duration_ms */, + 10 /* work_duration_ms */, 50 /* max_poll_calls */}; + + // Create two thread managers (but with same resource quota). This means + // that the max number of active threads across BOTH the thread managers + // cannot be greater than kMaxNumthreads + grpc::ThreadManagerTest test_thread_mgr_1("TestThreadManager-1", rq, + settings); + grpc::ThreadManagerTest test_thread_mgr_2("TestThreadManager-2", rq, + settings); + // It is ok to unref resource quota before starting thread managers. + grpc_resource_quota_unref(rq); + + // Start both thread managers + test_thread_mgr_1.Initialize(); + test_thread_mgr_2.Initialize(); + + // Wait for both to finish + test_thread_mgr_1.Wait(); + test_thread_mgr_2.Wait(); + + // Now verify that the total number of active threads in either thread manager + // never exceeds kMaxNumThreads + // + // NOTE: Actually the total active threads across *both* thread managers at + // any point of time never exceeds kMaxNumThreads but unfortunately there is + // no easy way to verify it (i.e we can't just do (max1 + max2 <= k)) + // Its okay to not test this case here. The resource quota c-core tests + // provide enough coverage to resource quota object with multiple resource + // users + int max1 = test_thread_mgr_1.GetMaxActiveThreadsSoFar(); + int max2 = test_thread_mgr_2.GetMaxActiveThreadsSoFar(); + gpr_log( + GPR_DEBUG, + "MaxActiveThreads in TestThreadManager_1: %d, TestThreadManager_2: %d", + max1, max2); + GPR_ASSERT(max1 <= kMaxNumThreads && max2 <= kMaxNumThreads); +} + int main(int argc, char** argv) { std::srand(std::time(nullptr)); - grpc::testing::InitTest(&argc, &argv, true); - grpc::ThreadManagerTest test_rpc_manager; - test_rpc_manager.PerformTest(); + grpc_init(); + + TestPollAndWork(); + TestThreadQuota(); + grpc_shutdown(); return 0; } diff --git a/tools/run_tests/performance/bq_upload_result.py b/tools/run_tests/performance/bq_upload_result.py index 6702587557..b442f0cf83 100755 --- a/tools/run_tests/performance/bq_upload_result.py +++ b/tools/run_tests/performance/bq_upload_result.py @@ -128,14 +128,16 @@ def _flatten_result_inplace(scenario_result): def _populate_metadata_inplace(scenario_result): """Populates metadata based on environment variables set by Jenkins.""" - # NOTE: Grabbing the Jenkins environment variables will only work if the - # driver is running locally on the same machine where Jenkins has started + # NOTE: Grabbing the Kokoro environment variables will only work if the + # driver is running locally on the same machine where Kokoro has started # the job. For our setup, this is currently the case, so just assume that. - build_number = os.getenv('BUILD_NUMBER') - build_url = os.getenv('BUILD_URL') - job_name = os.getenv('JOB_NAME') - git_commit = os.getenv('GIT_COMMIT') + build_number = os.getenv('KOKORO_BUILD_NUMBER') + build_url = 'https://source.cloud.google.com/results/invocations/%s' % os.getenv( + 'KOKORO_BUILD_ID') + job_name = os.getenv('KOKORO_JOB_NAME') + git_commit = os.getenv('KOKORO_GIT_COMMIT') # actual commit is the actual head of PR that is getting tested + # TODO(jtattermusch): unclear how to obtain on Kokoro git_actual_commit = os.getenv('ghprbActualCommit') utc_timestamp = str(calendar.timegm(time.gmtime())) diff --git a/tools/run_tests/python_utils/upload_test_results.py b/tools/run_tests/python_utils/upload_test_results.py index cbb4c32a2a..9d99703725 100644 --- a/tools/run_tests/python_utils/upload_test_results.py +++ b/tools/run_tests/python_utils/upload_test_results.py @@ -68,15 +68,13 @@ _INTEROP_RESULTS_SCHEMA = [ def _get_build_metadata(test_results): - """Add Jenkins/Kokoro build metadata to test_results based on environment - variables set by Jenkins/Kokoro. + """Add Kokoro build metadata to test_results based on environment + variables set by Kokoro. """ - build_id = os.getenv('BUILD_ID') or os.getenv('KOKORO_BUILD_NUMBER') - build_url = os.getenv('BUILD_URL') - if os.getenv('KOKORO_BUILD_ID'): - build_url = 'https://source.cloud.google.com/results/invocations/%s' % os.getenv( - 'KOKORO_BUILD_ID') - job_name = os.getenv('JOB_BASE_NAME') or os.getenv('KOKORO_JOB_NAME') + build_id = os.getenv('KOKORO_BUILD_NUMBER') + build_url = 'https://source.cloud.google.com/results/invocations/%s' % os.getenv( + 'KOKORO_BUILD_ID') + job_name = os.getenv('KOKORO_JOB_NAME') if build_id: test_results['build_id'] = build_id |