diff options
Diffstat (limited to 'src/cpp/server/load_reporter')
16 files changed, 1901 insertions, 14 deletions
diff --git a/src/cpp/server/load_reporter/constants.h b/src/cpp/server/load_reporter/constants.h new file mode 100644 index 0000000000..00ad794a04 --- /dev/null +++ b/src/cpp/server/load_reporter/constants.h @@ -0,0 +1,81 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#ifndef GRPC_SRC_CPP_SERVER_LOAD_REPORTER_UTIL_H +#define GRPC_SRC_CPP_SERVER_LOAD_REPORTER_UTIL_H + +#include <grpc/impl/codegen/port_platform.h> + +namespace grpc { +namespace load_reporter { + +// TODO(juanlishen): Update the version number with the PR number every time +// there is any change to the server load reporter. +constexpr uint32_t kVersion = 15853; + +// TODO(juanlishen): This window size is from the internal spec for the load +// reporter. Need to ask the gRPC LB team whether we should make this and the +// fetching interval configurable. +constexpr uint32_t kFeedbackSampleWindowSeconds = 10; +constexpr uint32_t kFetchAndSampleIntervalSeconds = 1; + +constexpr size_t kLbIdLength = 8; +constexpr size_t kIpv4AddressLength = 8; +constexpr size_t kIpv6AddressLength = 32; + +constexpr char kInvalidLbId[] = "<INVALID_LBID_238dsb234890rb>"; + +// Call statuses. + +constexpr char kCallStatusOk[] = "OK"; +constexpr char kCallStatusServerError[] = "5XX"; +constexpr char kCallStatusClientError[] = "4XX"; + +// Tag keys. + +constexpr char kTagKeyToken[] = "token"; +constexpr char kTagKeyHost[] = "host"; +constexpr char kTagKeyUserId[] = "user_id"; +constexpr char kTagKeyStatus[] = "status"; +constexpr char kTagKeyMetricName[] = "metric_name"; + +// Measure names. + +constexpr char kMeasureStartCount[] = "grpc.io/lb/start_count"; +constexpr char kMeasureEndCount[] = "grpc.io/lb/end_count"; +constexpr char kMeasureEndBytesSent[] = "grpc.io/lb/bytes_sent"; +constexpr char kMeasureEndBytesReceived[] = "grpc.io/lb/bytes_received"; +constexpr char kMeasureEndLatencyMs[] = "grpc.io/lb/latency_ms"; +constexpr char kMeasureOtherCallMetric[] = "grpc.io/lb/other_call_metric"; + +// View names. + +constexpr char kViewStartCount[] = "grpc.io/lb_view/start_count"; +constexpr char kViewEndCount[] = "grpc.io/lb_view/end_count"; +constexpr char kViewEndBytesSent[] = "grpc.io/lb_view/bytes_sent"; +constexpr char kViewEndBytesReceived[] = "grpc.io/lb_view/bytes_received"; +constexpr char kViewEndLatencyMs[] = "grpc.io/lb_view/latency_ms"; +constexpr char kViewOtherCallMetricCount[] = + "grpc.io/lb_view/other_call_metric_count"; +constexpr char kViewOtherCallMetricValue[] = + "grpc.io/lb_view/other_call_metric_value"; + +} // namespace load_reporter +} // namespace grpc + +#endif // GRPC_SRC_CPP_SERVER_LOAD_REPORTER_UTIL_H diff --git a/src/cpp/server/load_reporter/get_cpu_stats.h b/src/cpp/server/load_reporter/get_cpu_stats.h new file mode 100644 index 0000000000..f514b0752f --- /dev/null +++ b/src/cpp/server/load_reporter/get_cpu_stats.h @@ -0,0 +1,36 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#ifndef GRPC_SRC_CPP_SERVER_LOAD_REPORTER_GET_CPU_STATS_H +#define GRPC_SRC_CPP_SERVER_LOAD_REPORTER_GET_CPU_STATS_H + +#include <grpc/impl/codegen/port_platform.h> + +#include <utility> + +namespace grpc { +namespace load_reporter { + +// Reads the CPU stats (in a pair of busy and total numbers) from the system. +// The units of the stats should be the same. +std::pair<uint64_t, uint64_t> GetCpuStatsImpl(); + +} // namespace load_reporter +} // namespace grpc + +#endif // GRPC_SRC_CPP_SERVER_LOAD_REPORTER_GET_CPU_STATS_H diff --git a/src/cpp/server/load_reporter/get_cpu_stats_linux.cc b/src/cpp/server/load_reporter/get_cpu_stats_linux.cc new file mode 100644 index 0000000000..9c1fd0cd0b --- /dev/null +++ b/src/cpp/server/load_reporter/get_cpu_stats_linux.cc @@ -0,0 +1,45 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include <grpc/support/port_platform.h> + +#ifdef GPR_LINUX + +#include <cstdio> + +#include "src/cpp/server/load_reporter/get_cpu_stats.h" + +namespace grpc { +namespace load_reporter { + +std::pair<uint64_t, uint64_t> GetCpuStatsImpl() { + uint64_t busy = 0, total = 0; + FILE* fp; + fp = fopen("/proc/stat", "r"); + uint64_t user, nice, system, idle; + fscanf(fp, "cpu %lu %lu %lu %lu", &user, &nice, &system, &idle); + fclose(fp); + busy = user + nice + system; + total = busy + idle; + return std::make_pair(busy, total); +} + +} // namespace load_reporter +} // namespace grpc + +#endif // GPR_LINUX diff --git a/src/cpp/server/load_reporter/get_cpu_stats_macos.cc b/src/cpp/server/load_reporter/get_cpu_stats_macos.cc new file mode 100644 index 0000000000..dbdde304c2 --- /dev/null +++ b/src/cpp/server/load_reporter/get_cpu_stats_macos.cc @@ -0,0 +1,45 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include <grpc/support/port_platform.h> + +#ifdef GPR_APPLE + +#include <mach/mach.h> + +#include "src/cpp/server/load_reporter/get_cpu_stats.h" + +namespace grpc { +namespace load_reporter { + +std::pair<uint64_t, uint64_t> GetCpuStatsImpl() { + uint64_t busy = 0, total = 0; + host_cpu_load_info_data_t cpuinfo; + mach_msg_type_number_t count = HOST_CPU_LOAD_INFO_COUNT; + if (host_statistics(mach_host_self(), HOST_CPU_LOAD_INFO, + (host_info_t)&cpuinfo, &count) == KERN_SUCCESS) { + for (int i = 0; i < CPU_STATE_MAX; i++) total += cpuinfo.cpu_ticks[i]; + busy = total - cpuinfo.cpu_ticks[CPU_STATE_IDLE]; + } + return std::make_pair(busy, total); +} + +} // namespace load_reporter +} // namespace grpc + +#endif // GPR_APPLE diff --git a/src/cpp/server/load_reporter/get_cpu_stats_unsupported.cc b/src/cpp/server/load_reporter/get_cpu_stats_unsupported.cc new file mode 100644 index 0000000000..80fb8b6da1 --- /dev/null +++ b/src/cpp/server/load_reporter/get_cpu_stats_unsupported.cc @@ -0,0 +1,40 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include <grpc/support/port_platform.h> + +#if !defined(GPR_LINUX) && !defined(GPR_WINDOWS) && !defined(GPR_APPLE) + +#include <grpc/support/log.h> + +#include "src/cpp/server/load_reporter/get_cpu_stats.h" + +namespace grpc { +namespace load_reporter { + +std::pair<uint64_t, uint64_t> GetCpuStatsImpl() { + uint64_t busy = 0, total = 0; + gpr_log(GPR_ERROR, + "Platforms other than Linux, Windows, and MacOS are not supported."); + return std::make_pair(busy, total); +} + +} // namespace load_reporter +} // namespace grpc + +#endif // !defined(GPR_LINUX) && !defined(GPR_WINDOWS) && !defined(GPR_APPLE) diff --git a/src/cpp/server/load_reporter/get_cpu_stats_windows.cc b/src/cpp/server/load_reporter/get_cpu_stats_windows.cc new file mode 100644 index 0000000000..0a98e848a2 --- /dev/null +++ b/src/cpp/server/load_reporter/get_cpu_stats_windows.cc @@ -0,0 +1,55 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include <grpc/support/port_platform.h> + +#ifdef GPR_WINDOWS + +#include <windows.h> +#include <cstdint> + +#include "src/cpp/server/load_reporter/get_cpu_stats.h" + +namespace grpc { +namespace load_reporter { + +namespace { + +uint64_t FiletimeToInt(const FILETIME& ft) { + ULARGE_INTEGER i; + i.LowPart = ft.dwLowDateTime; + i.HighPart = ft.dwHighDateTime; + return i.QuadPart; +} + +} // namespace + +std::pair<uint64_t, uint64_t> GetCpuStatsImpl() { + uint64_t busy = 0, total = 0; + FILETIME idle, kernel, user; + if (GetSystemTimes(&idle, &kernel, &user) != 0) { + total = FiletimeToInt(kernel) + FiletimeToInt(user); + busy = total - FiletimeToInt(idle); + } + return std::make_pair(busy, total); +} + +} // namespace load_reporter +} // namespace grpc + +#endif // GPR_WINDOWS diff --git a/src/cpp/server/load_reporter/load_data_store.cc b/src/cpp/server/load_reporter/load_data_store.cc index 70f12c1102..594473f5e7 100644 --- a/src/cpp/server/load_reporter/load_data_store.cc +++ b/src/cpp/server/load_reporter/load_data_store.cc @@ -16,11 +16,15 @@ * */ +#include <grpc/impl/codegen/port_platform.h> + +#include <stdio.h> #include <cstdlib> #include <set> #include <unordered_map> #include <vector> +#include "src/core/lib/iomgr/socket_utils.h" #include "src/cpp/server/load_reporter/load_data_store.h" namespace grpc { @@ -73,6 +77,67 @@ const typename C::value_type* RandomElement(const C& container) { } // namespace +LoadRecordKey::LoadRecordKey(const grpc::string& client_ip_and_token, + grpc::string user_id) + : user_id_(std::move(user_id)) { + GPR_ASSERT(client_ip_and_token.size() >= 2); + int ip_hex_size; + GPR_ASSERT(sscanf(client_ip_and_token.substr(0, 2).c_str(), "%d", + &ip_hex_size) == 1); + GPR_ASSERT(ip_hex_size == 0 || ip_hex_size == kIpv4AddressLength || + ip_hex_size == kIpv6AddressLength); + size_t cur_pos = 2; + client_ip_hex_ = client_ip_and_token.substr(cur_pos, ip_hex_size); + cur_pos += ip_hex_size; + if (client_ip_and_token.size() - cur_pos < kLbIdLength) { + lb_id_ = kInvalidLbId; + lb_tag_ = ""; + } else { + lb_id_ = client_ip_and_token.substr(cur_pos, kLbIdLength); + lb_tag_ = client_ip_and_token.substr(cur_pos + kLbIdLength); + } +} + +grpc::string LoadRecordKey::GetClientIpBytes() const { + if (client_ip_hex_.empty()) { + return ""; + } else if (client_ip_hex_.size() == kIpv4AddressLength) { + uint32_t ip_bytes; + if (sscanf(client_ip_hex_.c_str(), "%x", &ip_bytes) != 1) { + gpr_log(GPR_ERROR, + "Can't parse client IP (%s) from a hex string to an integer.", + client_ip_hex_.c_str()); + return ""; + } + ip_bytes = grpc_htonl(ip_bytes); + return grpc::string(reinterpret_cast<const char*>(&ip_bytes), + sizeof(ip_bytes)); + } else if (client_ip_hex_.size() == kIpv6AddressLength) { + uint32_t ip_bytes[4]; + for (size_t i = 0; i < 4; ++i) { + if (sscanf(client_ip_hex_.substr(i * 8, (i + 1) * 8).c_str(), "%x", + ip_bytes + i) != 1) { + gpr_log( + GPR_ERROR, + "Can't parse client IP part (%s) from a hex string to an integer.", + client_ip_hex_.substr(i * 8, (i + 1) * 8).c_str()); + return ""; + } + ip_bytes[i] = grpc_htonl(ip_bytes[i]); + } + return grpc::string(reinterpret_cast<const char*>(ip_bytes), + sizeof(ip_bytes)); + } else { + GPR_UNREACHABLE_CODE(return ""); + } +} + +LoadRecordValue::LoadRecordValue(grpc::string metric_name, uint64_t num_calls, + double total_metric_value) { + call_metrics_.emplace(std::move(metric_name), + CallMetricValue(num_calls, total_metric_value)); +} + void PerBalancerStore::MergeRow(const LoadRecordKey& key, const LoadRecordValue& value) { // During suspension, the load data received will be dropped. diff --git a/src/cpp/server/load_reporter/load_data_store.h b/src/cpp/server/load_reporter/load_data_store.h index feb8b2fd59..dc08ecf479 100644 --- a/src/cpp/server/load_reporter/load_data_store.h +++ b/src/cpp/server/load_reporter/load_data_store.h @@ -28,12 +28,11 @@ #include <grpc/support/log.h> #include <grpcpp/impl/codegen/config.h> +#include "src/cpp/server/load_reporter/constants.h" + namespace grpc { namespace load_reporter { -constexpr char kInvalidLbId[] = "<INVALID_LBID_238dsb234890rb>"; -constexpr uint8_t kLbIdLen = 8; - // The load data storage is organized in hierarchy. The LoadDataStore is the // top-level data store. In LoadDataStore, for each host we keep a // PerHostStore, in which for each balancer we keep a PerBalancerStore. Each @@ -68,13 +67,16 @@ class CallMetricValue { // The key of a load record. class LoadRecordKey { public: - explicit LoadRecordKey(grpc::string lb_id, grpc::string lb_tag, - grpc::string user_id, grpc::string client_ip_hex) + LoadRecordKey(grpc::string lb_id, grpc::string lb_tag, grpc::string user_id, + grpc::string client_ip_hex) : lb_id_(std::move(lb_id)), lb_tag_(std::move(lb_tag)), user_id_(std::move(user_id)), client_ip_hex_(std::move(client_ip_hex)) {} + // Parses the input client_ip_and_token to set client IP, LB ID, and LB tag. + LoadRecordKey(const grpc::string& client_ip_and_token, grpc::string user_id); + grpc::string ToString() const { return "[lb_id_=" + lb_id_ + ", lb_tag_=" + lb_tag_ + ", user_id_=" + user_id_ + ", client_ip_hex_=" + client_ip_hex_ + @@ -86,6 +88,9 @@ class LoadRecordKey { user_id_ == other.user_id_ && client_ip_hex_ == other.client_ip_hex_; } + // Gets the client IP bytes in network order (i.e., big-endian). + grpc::string GetClientIpBytes() const; + // Getters. const grpc::string& lb_id() const { return lb_id_; } const grpc::string& lb_tag() const { return lb_tag_; } @@ -119,8 +124,8 @@ class LoadRecordKey { class LoadRecordValue { public: explicit LoadRecordValue(uint64_t start_count = 0, uint64_t ok_count = 0, - uint64_t error_count = 0, double bytes_sent = 0, - double bytes_recv = 0, double latency_ms = 0) + uint64_t error_count = 0, uint64_t bytes_sent = 0, + uint64_t bytes_recv = 0, uint64_t latency_ms = 0) : start_count_(start_count), ok_count_(ok_count), error_count_(error_count), @@ -128,6 +133,9 @@ class LoadRecordValue { bytes_recv_(bytes_recv), latency_ms_(latency_ms) {} + LoadRecordValue(grpc::string metric_name, uint64_t num_calls, + double total_metric_value); + void MergeFrom(const LoadRecordValue& other) { start_count_ += other.start_count_; ok_count_ += other.ok_count_; @@ -152,7 +160,8 @@ class LoadRecordValue { ", error_count_=" + grpc::to_string(error_count_) + ", bytes_sent_=" + grpc::to_string(bytes_sent_) + ", bytes_recv_=" + grpc::to_string(bytes_recv_) + - ", latency_ms_=" + grpc::to_string(latency_ms_) + "]"; + ", latency_ms_=" + grpc::to_string(latency_ms_) + ", " + + grpc::to_string(call_metrics_.size()) + " other call metric(s)]"; } bool InsertCallMetric(const grpc::string& metric_name, @@ -164,9 +173,9 @@ class LoadRecordValue { uint64_t start_count() const { return start_count_; } uint64_t ok_count() const { return ok_count_; } uint64_t error_count() const { return error_count_; } - double bytes_sent() const { return bytes_sent_; } - double bytes_recv() const { return bytes_recv_; } - double latency_ms() const { return latency_ms_; } + uint64_t bytes_sent() const { return bytes_sent_; } + uint64_t bytes_recv() const { return bytes_recv_; } + uint64_t latency_ms() const { return latency_ms_; } const std::unordered_map<grpc::string, CallMetricValue>& call_metrics() const { return call_metrics_; @@ -176,9 +185,9 @@ class LoadRecordValue { uint64_t start_count_ = 0; uint64_t ok_count_ = 0; uint64_t error_count_ = 0; - double bytes_sent_ = 0; - double bytes_recv_ = 0; - double latency_ms_ = 0; + uint64_t bytes_sent_ = 0; + uint64_t bytes_recv_ = 0; + uint64_t latency_ms_ = 0; std::unordered_map<grpc::string, CallMetricValue> call_metrics_; }; diff --git a/src/cpp/server/load_reporter/load_reporter.cc b/src/cpp/server/load_reporter/load_reporter.cc new file mode 100644 index 0000000000..464063a13f --- /dev/null +++ b/src/cpp/server/load_reporter/load_reporter.cc @@ -0,0 +1,509 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include <grpc/impl/codegen/port_platform.h> + +#include <stdint.h> +#include <stdio.h> +#include <chrono> +#include <ctime> +#include <iterator> + +#include "src/cpp/server/load_reporter/constants.h" +#include "src/cpp/server/load_reporter/get_cpu_stats.h" +#include "src/cpp/server/load_reporter/load_reporter.h" + +#include "opencensus/stats/internal/set_aggregation_window.h" + +namespace grpc { +namespace load_reporter { + +CpuStatsProvider::CpuStatsSample CpuStatsProviderDefaultImpl::GetCpuStats() { + return GetCpuStatsImpl(); +} + +CensusViewProvider::CensusViewProvider() + : tag_key_token_(::opencensus::stats::TagKey::Register(kTagKeyToken)), + tag_key_host_(::opencensus::stats::TagKey::Register(kTagKeyHost)), + tag_key_user_id_(::opencensus::stats::TagKey::Register(kTagKeyUserId)), + tag_key_status_(::opencensus::stats::TagKey::Register(kTagKeyStatus)), + tag_key_metric_name_( + ::opencensus::stats::TagKey::Register(kTagKeyMetricName)) { + // One view related to starting a call. + auto vd_start_count = + ::opencensus::stats::ViewDescriptor() + .set_name(kViewStartCount) + .set_measure(kMeasureStartCount) + .set_aggregation(::opencensus::stats::Aggregation::Sum()) + .add_column(tag_key_token_) + .add_column(tag_key_host_) + .add_column(tag_key_user_id_) + .set_description( + "Delta count of calls started broken down by <token, host, " + "user_id>."); + ::opencensus::stats::SetAggregationWindow( + ::opencensus::stats::AggregationWindow::Delta(), &vd_start_count); + view_descriptor_map_.emplace(kViewStartCount, vd_start_count); + // Four views related to ending a call. + // If this view is set as Count of kMeasureEndBytesSent (in hope of saving one + // measure), it's infeasible to prepare fake data for testing. That's because + // the OpenCensus API to make up view data will add the input data as separate + // measurements instead of setting the data values directly. + auto vd_end_count = + ::opencensus::stats::ViewDescriptor() + .set_name(kViewEndCount) + .set_measure(kMeasureEndCount) + .set_aggregation(::opencensus::stats::Aggregation::Sum()) + .add_column(tag_key_token_) + .add_column(tag_key_host_) + .add_column(tag_key_user_id_) + .add_column(tag_key_status_) + .set_description( + "Delta count of calls ended broken down by <token, host, " + "user_id, status>."); + ::opencensus::stats::SetAggregationWindow( + ::opencensus::stats::AggregationWindow::Delta(), &vd_end_count); + view_descriptor_map_.emplace(kViewEndCount, vd_end_count); + auto vd_end_bytes_sent = + ::opencensus::stats::ViewDescriptor() + .set_name(kViewEndBytesSent) + .set_measure(kMeasureEndBytesSent) + .set_aggregation(::opencensus::stats::Aggregation::Sum()) + .add_column(tag_key_token_) + .add_column(tag_key_host_) + .add_column(tag_key_user_id_) + .add_column(tag_key_status_) + .set_description( + "Delta sum of bytes sent broken down by <token, host, user_id, " + "status>."); + ::opencensus::stats::SetAggregationWindow( + ::opencensus::stats::AggregationWindow::Delta(), &vd_end_bytes_sent); + view_descriptor_map_.emplace(kViewEndBytesSent, vd_end_bytes_sent); + auto vd_end_bytes_received = + ::opencensus::stats::ViewDescriptor() + .set_name(kViewEndBytesReceived) + .set_measure(kMeasureEndBytesReceived) + .set_aggregation(::opencensus::stats::Aggregation::Sum()) + .add_column(tag_key_token_) + .add_column(tag_key_host_) + .add_column(tag_key_user_id_) + .add_column(tag_key_status_) + .set_description( + "Delta sum of bytes received broken down by <token, host, " + "user_id, status>."); + ::opencensus::stats::SetAggregationWindow( + ::opencensus::stats::AggregationWindow::Delta(), &vd_end_bytes_received); + view_descriptor_map_.emplace(kViewEndBytesReceived, vd_end_bytes_received); + auto vd_end_latency_ms = + ::opencensus::stats::ViewDescriptor() + .set_name(kViewEndLatencyMs) + .set_measure(kMeasureEndLatencyMs) + .set_aggregation(::opencensus::stats::Aggregation::Sum()) + .add_column(tag_key_token_) + .add_column(tag_key_host_) + .add_column(tag_key_user_id_) + .add_column(tag_key_status_) + .set_description( + "Delta sum of latency in ms broken down by <token, host, " + "user_id, status>."); + ::opencensus::stats::SetAggregationWindow( + ::opencensus::stats::AggregationWindow::Delta(), &vd_end_latency_ms); + view_descriptor_map_.emplace(kViewEndLatencyMs, vd_end_latency_ms); + // Two views related to other call metrics. + auto vd_metric_call_count = + ::opencensus::stats::ViewDescriptor() + .set_name(kViewOtherCallMetricCount) + .set_measure(kMeasureOtherCallMetric) + .set_aggregation(::opencensus::stats::Aggregation::Count()) + .add_column(tag_key_token_) + .add_column(tag_key_host_) + .add_column(tag_key_user_id_) + .add_column(tag_key_metric_name_) + .set_description( + "Delta count of calls broken down by <token, host, user_id, " + "metric_name>."); + ::opencensus::stats::SetAggregationWindow( + ::opencensus::stats::AggregationWindow::Delta(), &vd_metric_call_count); + view_descriptor_map_.emplace(kViewOtherCallMetricCount, vd_metric_call_count); + auto vd_metric_value = + ::opencensus::stats::ViewDescriptor() + .set_name(kViewOtherCallMetricValue) + .set_measure(kMeasureOtherCallMetric) + .set_aggregation(::opencensus::stats::Aggregation::Sum()) + .add_column(tag_key_token_) + .add_column(tag_key_host_) + .add_column(tag_key_user_id_) + .add_column(tag_key_metric_name_) + .set_description( + "Delta sum of call metric value broken down " + "by <token, host, user_id, metric_name>."); + ::opencensus::stats::SetAggregationWindow( + ::opencensus::stats::AggregationWindow::Delta(), &vd_metric_value); + view_descriptor_map_.emplace(kViewOtherCallMetricValue, vd_metric_value); +} + +double CensusViewProvider::GetRelatedViewDataRowDouble( + const ViewDataMap& view_data_map, const char* view_name, + size_t view_name_len, const std::vector<grpc::string>& tag_values) { + auto it_vd = view_data_map.find(grpc::string(view_name, view_name_len)); + GPR_ASSERT(it_vd != view_data_map.end()); + GPR_ASSERT(it_vd->second.type() == + ::opencensus::stats::ViewData::Type::kDouble); + auto it_row = it_vd->second.double_data().find(tag_values); + GPR_ASSERT(it_row != it_vd->second.double_data().end()); + return it_row->second; +} + +uint64_t CensusViewProvider::GetRelatedViewDataRowInt( + const ViewDataMap& view_data_map, const char* view_name, + size_t view_name_len, const std::vector<grpc::string>& tag_values) { + auto it_vd = view_data_map.find(grpc::string(view_name, view_name_len)); + GPR_ASSERT(it_vd != view_data_map.end()); + GPR_ASSERT(it_vd->second.type() == + ::opencensus::stats::ViewData::Type::kInt64); + auto it_row = it_vd->second.int_data().find(tag_values); + GPR_ASSERT(it_row != it_vd->second.int_data().end()); + GPR_ASSERT(it_row->second >= 0); + return it_row->second; +} + +CensusViewProviderDefaultImpl::CensusViewProviderDefaultImpl() { + for (const auto& p : view_descriptor_map()) { + const grpc::string& view_name = p.first; + const ::opencensus::stats::ViewDescriptor& vd = p.second; + // We need to use pair's piecewise ctor here, otherwise the deleted copy + // ctor of View will be called. + view_map_.emplace(std::piecewise_construct, + std::forward_as_tuple(view_name), + std::forward_as_tuple(vd)); + } +} + +CensusViewProvider::ViewDataMap CensusViewProviderDefaultImpl::FetchViewData() { + gpr_log(GPR_DEBUG, "[CVP %p] Starts fetching Census view data.", this); + ViewDataMap view_data_map; + for (auto& p : view_map_) { + const grpc::string& view_name = p.first; + ::opencensus::stats::View& view = p.second; + if (view.IsValid()) { + view_data_map.emplace(view_name, view.GetData()); + gpr_log(GPR_DEBUG, "[CVP %p] Fetched view data (view: %s).", this, + view_name.c_str()); + } else { + gpr_log( + GPR_DEBUG, + "[CVP %p] Can't fetch view data because view is invalid (view: %s).", + this, view_name.c_str()); + } + } + return view_data_map; +} + +grpc::string LoadReporter::GenerateLbId() { + while (true) { + if (next_lb_id_ > UINT32_MAX) { + gpr_log(GPR_ERROR, "[LR %p] The LB ID exceeds the max valid value!", + this); + return ""; + } + int64_t lb_id = next_lb_id_++; + // Overflow should never happen. + GPR_ASSERT(lb_id >= 0); + // Convert to padded hex string for a 32-bit LB ID. E.g, "0000ca5b". + char buf[kLbIdLength + 1]; + snprintf(buf, sizeof(buf), "%08lx", lb_id); + grpc::string lb_id_str(buf, kLbIdLength); + // The client may send requests with LB ID that has never been allocated + // by this load reporter. Those IDs are tracked and will be skipped when + // we generate a new ID. + if (!load_data_store_.IsTrackedUnknownBalancerId(lb_id_str)) { + return lb_id_str; + } + } +} + +::grpc::lb::v1::LoadBalancingFeedback +LoadReporter::GenerateLoadBalancingFeedback() { + std::unique_lock<std::mutex> lock(feedback_mu_); + auto now = std::chrono::system_clock::now(); + // Discard records outside the window until there is only one record + // outside the window, which is used as the base for difference. + while (feedback_records_.size() > 1 && + !IsRecordInWindow(feedback_records_[1], now)) { + feedback_records_.pop_front(); + } + if (feedback_records_.size() < 2) { + return ::grpc::lb::v1::LoadBalancingFeedback::default_instance(); + } + // Find the longest range with valid ends. + auto oldest = feedback_records_.begin(); + auto newest = feedback_records_.end() - 1; + while (std::distance(oldest, newest) > 0 && + (newest->cpu_limit == 0 || oldest->cpu_limit == 0)) { + // A zero limit means that the system info reading was failed, so these + // records can't be used to calculate CPU utilization. + if (newest->cpu_limit == 0) --newest; + if (oldest->cpu_limit == 0) ++oldest; + } + if (std::distance(oldest, newest) < 1 || + oldest->end_time == newest->end_time || + newest->cpu_limit == oldest->cpu_limit) { + return ::grpc::lb::v1::LoadBalancingFeedback::default_instance(); + } + uint64_t rpcs = 0; + uint64_t errors = 0; + for (auto p = newest; p != oldest; --p) { + // Because these two numbers are counters, the oldest record shouldn't be + // included. + rpcs += p->rpcs; + errors += p->errors; + } + double cpu_usage = newest->cpu_usage - oldest->cpu_usage; + double cpu_limit = newest->cpu_limit - oldest->cpu_limit; + std::chrono::duration<double> duration_seconds = + newest->end_time - oldest->end_time; + lock.unlock(); + ::grpc::lb::v1::LoadBalancingFeedback feedback; + feedback.set_server_utilization(static_cast<float>(cpu_usage / cpu_limit)); + feedback.set_calls_per_second( + static_cast<float>(rpcs / duration_seconds.count())); + feedback.set_errors_per_second( + static_cast<float>(errors / duration_seconds.count())); + return feedback; +} + +::google::protobuf::RepeatedPtrField<::grpc::lb::v1::Load> +LoadReporter::GenerateLoads(const grpc::string& hostname, + const grpc::string& lb_id) { + std::lock_guard<std::mutex> lock(store_mu_); + auto assigned_stores = load_data_store_.GetAssignedStores(hostname, lb_id); + GPR_ASSERT(assigned_stores != nullptr); + GPR_ASSERT(!assigned_stores->empty()); + ::google::protobuf::RepeatedPtrField<::grpc::lb::v1::Load> loads; + for (PerBalancerStore* per_balancer_store : *assigned_stores) { + GPR_ASSERT(!per_balancer_store->IsSuspended()); + if (!per_balancer_store->load_record_map().empty()) { + for (const auto& p : per_balancer_store->load_record_map()) { + const auto& key = p.first; + const auto& value = p.second; + auto load = loads.Add(); + load->set_load_balance_tag(key.lb_tag()); + load->set_user_id(key.user_id()); + load->set_client_ip_address(key.GetClientIpBytes()); + load->set_num_calls_started(static_cast<int64_t>(value.start_count())); + load->set_num_calls_finished_without_error( + static_cast<int64_t>(value.ok_count())); + load->set_num_calls_finished_with_error( + static_cast<int64_t>(value.error_count())); + load->set_total_bytes_sent(static_cast<int64_t>(value.bytes_sent())); + load->set_total_bytes_received( + static_cast<int64_t>(value.bytes_recv())); + load->mutable_total_latency()->set_seconds( + static_cast<int64_t>(value.latency_ms() / 1000)); + load->mutable_total_latency()->set_nanos( + (static_cast<int32_t>(value.latency_ms()) % 1000) * 1000000); + for (const auto& p : value.call_metrics()) { + const grpc::string& metric_name = p.first; + const CallMetricValue& metric_value = p.second; + auto call_metric_data = load->add_metric_data(); + call_metric_data->set_metric_name(metric_name); + call_metric_data->set_num_calls_finished_with_metric( + metric_value.num_calls()); + call_metric_data->set_total_metric_value( + metric_value.total_metric_value()); + } + if (per_balancer_store->lb_id() != lb_id) { + // This per-balancer store is an orphan assigned to this receiving + // balancer. + AttachOrphanLoadId(load, *per_balancer_store); + } + } + per_balancer_store->ClearLoadRecordMap(); + } + if (per_balancer_store->IsNumCallsInProgressChangedSinceLastReport()) { + auto load = loads.Add(); + load->set_num_calls_in_progress( + per_balancer_store->GetNumCallsInProgressForReport()); + if (per_balancer_store->lb_id() != lb_id) { + // This per-balancer store is an orphan assigned to this receiving + // balancer. + AttachOrphanLoadId(load, *per_balancer_store); + } + } + } + return loads; +} + +void LoadReporter::AttachOrphanLoadId( + ::grpc::lb::v1::Load* load, const PerBalancerStore& per_balancer_store) { + if (per_balancer_store.lb_id() == kInvalidLbId) { + load->set_load_key_unknown(true); + } else { + // We shouldn't set load_key_unknown to any value in this case because + // load_key_unknown and orphaned_load_identifier are under an oneof struct. + load->mutable_orphaned_load_identifier()->set_load_key( + per_balancer_store.load_key()); + load->mutable_orphaned_load_identifier()->set_load_balancer_id( + per_balancer_store.lb_id()); + } +} + +void LoadReporter::AppendNewFeedbackRecord(uint64_t rpcs, uint64_t errors) { + CpuStatsProvider::CpuStatsSample cpu_stats; + if (cpu_stats_provider_ != nullptr) { + cpu_stats = cpu_stats_provider_->GetCpuStats(); + } else { + // This will make the load balancing feedback generation a no-op. + cpu_stats = {0, 0}; + } + std::unique_lock<std::mutex> lock(feedback_mu_); + feedback_records_.emplace_back(std::chrono::system_clock::now(), rpcs, errors, + cpu_stats.first, cpu_stats.second); +} + +void LoadReporter::ReportStreamCreated(const grpc::string& hostname, + const grpc::string& lb_id, + const grpc::string& load_key) { + std::lock_guard<std::mutex> lock(store_mu_); + load_data_store_.ReportStreamCreated(hostname, lb_id, load_key); + gpr_log(GPR_INFO, + "[LR %p] Report stream created (host: %s, LB ID: %s, load key: %s).", + this, hostname.c_str(), lb_id.c_str(), load_key.c_str()); +} + +void LoadReporter::ReportStreamClosed(const grpc::string& hostname, + const grpc::string& lb_id) { + std::lock_guard<std::mutex> lock(store_mu_); + load_data_store_.ReportStreamClosed(hostname, lb_id); + gpr_log(GPR_INFO, "[LR %p] Report stream closed (host: %s, LB ID: %s).", this, + hostname.c_str(), lb_id.c_str()); +} + +void LoadReporter::ProcessViewDataCallStart( + const CensusViewProvider::ViewDataMap& view_data_map) { + auto it = view_data_map.find(kViewStartCount); + if (it != view_data_map.end()) { + for (const auto& p : it->second.int_data()) { + const std::vector<grpc::string>& tag_values = p.first; + const uint64_t start_count = static_cast<uint64_t>(p.second); + const grpc::string& client_ip_and_token = tag_values[0]; + const grpc::string& host = tag_values[1]; + const grpc::string& user_id = tag_values[2]; + LoadRecordKey key(client_ip_and_token, user_id); + LoadRecordValue value = LoadRecordValue(start_count); + { + std::unique_lock<std::mutex> lock(store_mu_); + load_data_store_.MergeRow(host, key, value); + } + } + } +} + +void LoadReporter::ProcessViewDataCallEnd( + const CensusViewProvider::ViewDataMap& view_data_map) { + uint64_t total_end_count = 0; + uint64_t total_error_count = 0; + auto it = view_data_map.find(kViewEndCount); + if (it != view_data_map.end()) { + for (const auto& p : it->second.int_data()) { + const std::vector<grpc::string>& tag_values = p.first; + const uint64_t end_count = static_cast<uint64_t>(p.second); + const grpc::string& client_ip_and_token = tag_values[0]; + const grpc::string& host = tag_values[1]; + const grpc::string& user_id = tag_values[2]; + const grpc::string& status = tag_values[3]; + // This is due to a bug reported internally of Java server load reporting + // implementation. + // TODO(juanlishen): Check whether this situation happens in OSS C++. + if (client_ip_and_token.size() == 0) { + gpr_log(GPR_DEBUG, + "Skipping processing Opencensus record with empty " + "client_ip_and_token tag."); + continue; + } + LoadRecordKey key(client_ip_and_token, user_id); + const uint64_t bytes_sent = CensusViewProvider::GetRelatedViewDataRowInt( + view_data_map, kViewEndBytesSent, sizeof(kViewEndBytesSent) - 1, + tag_values); + const uint64_t bytes_received = + CensusViewProvider::GetRelatedViewDataRowInt( + view_data_map, kViewEndBytesReceived, + sizeof(kViewEndBytesReceived) - 1, tag_values); + const uint64_t latency_ms = CensusViewProvider::GetRelatedViewDataRowInt( + view_data_map, kViewEndLatencyMs, sizeof(kViewEndLatencyMs) - 1, + tag_values); + uint64_t ok_count = 0; + uint64_t error_count = 0; + total_end_count += end_count; + if (std::strcmp(status.c_str(), kCallStatusOk) == 0) { + ok_count = end_count; + } else { + error_count = end_count; + total_error_count += end_count; + } + LoadRecordValue value = LoadRecordValue( + 0, ok_count, error_count, bytes_sent, bytes_received, latency_ms); + { + std::unique_lock<std::mutex> lock(store_mu_); + load_data_store_.MergeRow(host, key, value); + } + } + } + AppendNewFeedbackRecord(total_end_count, total_error_count); +} + +void LoadReporter::ProcessViewDataOtherCallMetrics( + const CensusViewProvider::ViewDataMap& view_data_map) { + auto it = view_data_map.find(kViewOtherCallMetricCount); + if (it != view_data_map.end()) { + for (const auto& p : it->second.int_data()) { + const std::vector<grpc::string>& tag_values = p.first; + const int64_t num_calls = p.second; + const grpc::string& client_ip_and_token = tag_values[0]; + const grpc::string& host = tag_values[1]; + const grpc::string& user_id = tag_values[2]; + const grpc::string& metric_name = tag_values[3]; + LoadRecordKey key(client_ip_and_token, user_id); + const double total_metric_value = + CensusViewProvider::GetRelatedViewDataRowDouble( + view_data_map, kViewOtherCallMetricValue, + sizeof(kViewOtherCallMetricValue) - 1, tag_values); + LoadRecordValue value = LoadRecordValue( + metric_name, static_cast<uint64_t>(num_calls), total_metric_value); + { + std::unique_lock<std::mutex> lock(store_mu_); + load_data_store_.MergeRow(host, key, value); + } + } + } +} + +void LoadReporter::FetchAndSample() { + gpr_log(GPR_DEBUG, + "[LR %p] Starts fetching Census view data and sampling LB feedback " + "record.", + this); + CensusViewProvider::ViewDataMap view_data_map = + census_view_provider_->FetchViewData(); + ProcessViewDataCallStart(view_data_map); + ProcessViewDataCallEnd(view_data_map); + ProcessViewDataOtherCallMetrics(view_data_map); +} + +} // namespace load_reporter +} // namespace grpc diff --git a/src/cpp/server/load_reporter/load_reporter.h b/src/cpp/server/load_reporter/load_reporter.h new file mode 100644 index 0000000000..b2254d5601 --- /dev/null +++ b/src/cpp/server/load_reporter/load_reporter.h @@ -0,0 +1,228 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#ifndef GRPC_SRC_CPP_SERVER_LOAD_REPORTER_LOAD_REPORTER_H +#define GRPC_SRC_CPP_SERVER_LOAD_REPORTER_LOAD_REPORTER_H + +#include <grpc/support/port_platform.h> + +#include <atomic> +#include <chrono> +#include <deque> +#include <vector> + +#include <grpc/support/log.h> +#include <grpcpp/impl/codegen/config.h> + +#include "src/cpp/server/load_reporter/load_data_store.h" +#include "src/proto/grpc/lb/v1/load_reporter.grpc.pb.h" + +#include "opencensus/stats/stats.h" + +namespace grpc { +namespace load_reporter { + +// The interface to get the Census stats. Abstracted for mocking. +class CensusViewProvider { + public: + // Maps from the view name to the view data. + using ViewDataMap = + std::unordered_map<grpc::string, ::opencensus::stats::ViewData>; + // Maps from the view name to the view descriptor. + using ViewDescriptorMap = + std::unordered_map<grpc::string, ::opencensus::stats::ViewDescriptor>; + + CensusViewProvider(); + virtual ~CensusViewProvider() = default; + + // Fetches the view data accumulated since last fetching, and returns it as a + // map from the view name to the view data. + virtual ViewDataMap FetchViewData() = 0; + + // A helper function that gets a row with the input tag values from the view + // data. Only used when we know that row must exist because we have seen a row + // with the same tag values in a related view data. Several ViewData's are + // considered related if their views are based on the measures that are always + // recorded at the same time. + static double GetRelatedViewDataRowDouble( + const ViewDataMap& view_data_map, const char* view_name, + size_t view_name_len, const std::vector<grpc::string>& tag_values); + static uint64_t GetRelatedViewDataRowInt( + const ViewDataMap& view_data_map, const char* view_name, + size_t view_name_len, const std::vector<grpc::string>& tag_values); + + protected: + const ViewDescriptorMap& view_descriptor_map() const { + return view_descriptor_map_; + } + + private: + ViewDescriptorMap view_descriptor_map_; + // Tag keys. + ::opencensus::stats::TagKey tag_key_token_; + ::opencensus::stats::TagKey tag_key_host_; + ::opencensus::stats::TagKey tag_key_user_id_; + ::opencensus::stats::TagKey tag_key_status_; + ::opencensus::stats::TagKey tag_key_metric_name_; +}; + +// The default implementation fetches the real stats from Census. +class CensusViewProviderDefaultImpl : public CensusViewProvider { + public: + CensusViewProviderDefaultImpl(); + + ViewDataMap FetchViewData() override; + + private: + std::unordered_map<grpc::string, ::opencensus::stats::View> view_map_; +}; + +// The interface to get the CPU stats. Abstracted for mocking. +class CpuStatsProvider { + public: + // The used and total amounts of CPU usage. + using CpuStatsSample = std::pair<uint64_t, uint64_t>; + + virtual ~CpuStatsProvider() = default; + + // Gets the cumulative used CPU and total CPU resource. + virtual CpuStatsSample GetCpuStats() = 0; +}; + +// The default implementation reads CPU jiffies from the system to calculate CPU +// utilization. +class CpuStatsProviderDefaultImpl : public CpuStatsProvider { + public: + CpuStatsSample GetCpuStats() override; +}; + +// Maintains all the load data and load reporting streams. +class LoadReporter { + public: + // TODO(juanlishen): Allow config for providers from users. + LoadReporter(uint32_t feedback_sample_window_seconds, + std::unique_ptr<CensusViewProvider> census_view_provider, + std::unique_ptr<CpuStatsProvider> cpu_stats_provider) + : feedback_sample_window_seconds_(feedback_sample_window_seconds), + census_view_provider_(std::move(census_view_provider)), + cpu_stats_provider_(std::move(cpu_stats_provider)) { + // Append the initial record so that the next real record can have a base. + AppendNewFeedbackRecord(0, 0); + } + + // Fetches the latest data from Census and merge it into the data store. + // Also adds a new sample to the LB feedback sliding window. + // Thread-unsafe. (1). The access to the load data store and feedback records + // has locking. (2). The access to the Census view provider and CPU stats + // provider lacks locking, but we only access these two members in this method + // (in testing, we also access them when setting up expectation). So the + // invocations of this method must be serialized. + void FetchAndSample(); + + // Generates a report for that host and balancer. The report contains + // all the stats data accumulated between the last report (i.e., the last + // consumption) and the last fetch from Census (i.e., the last production). + // Thread-safe. + ::google::protobuf::RepeatedPtrField<::grpc::lb::v1::Load> GenerateLoads( + const grpc::string& hostname, const grpc::string& lb_id); + + // The feedback is calculated from the stats data recorded in the sliding + // window. Outdated records are discarded. + // Thread-safe. + ::grpc::lb::v1::LoadBalancingFeedback GenerateLoadBalancingFeedback(); + + // Wrapper around LoadDataStore::ReportStreamCreated. + // Thread-safe. + void ReportStreamCreated(const grpc::string& hostname, + const grpc::string& lb_id, + const grpc::string& load_key); + + // Wrapper around LoadDataStore::ReportStreamClosed. + // Thread-safe. + void ReportStreamClosed(const grpc::string& hostname, + const grpc::string& lb_id); + + // Generates a unique LB ID of length kLbIdLength. Returns an empty string + // upon failure. Thread-safe. + grpc::string GenerateLbId(); + + // Accessors only for testing. + CensusViewProvider* census_view_provider() { + return census_view_provider_.get(); + } + CpuStatsProvider* cpu_stats_provider() { return cpu_stats_provider_.get(); } + + private: + struct LoadBalancingFeedbackRecord { + std::chrono::system_clock::time_point end_time; + uint64_t rpcs; + uint64_t errors; + uint64_t cpu_usage; + uint64_t cpu_limit; + + LoadBalancingFeedbackRecord( + const std::chrono::system_clock::time_point& end_time, uint64_t rpcs, + uint64_t errors, uint64_t cpu_usage, uint64_t cpu_limit) + : end_time(end_time), + rpcs(rpcs), + errors(errors), + cpu_usage(cpu_usage), + cpu_limit(cpu_limit) {} + }; + + // Finds the view data about starting call from the view_data_map and merges + // the data to the load data store. + void ProcessViewDataCallStart( + const CensusViewProvider::ViewDataMap& view_data_map); + // Finds the view data about ending call from the view_data_map and merges the + // data to the load data store. + void ProcessViewDataCallEnd( + const CensusViewProvider::ViewDataMap& view_data_map); + // Finds the view data about the customized call metrics from the + // view_data_map and merges the data to the load data store. + void ProcessViewDataOtherCallMetrics( + const CensusViewProvider::ViewDataMap& view_data_map); + + bool IsRecordInWindow(const LoadBalancingFeedbackRecord& record, + std::chrono::system_clock::time_point now) { + return record.end_time > now - feedback_sample_window_seconds_; + } + + void AppendNewFeedbackRecord(uint64_t rpcs, uint64_t errors); + + // Extracts an OrphanedLoadIdentifier from the per-balancer store and attaches + // it to the load. + void AttachOrphanLoadId(::grpc::lb::v1::Load* load, + const PerBalancerStore& per_balancer_store); + + std::atomic<int64_t> next_lb_id_{0}; + const std::chrono::seconds feedback_sample_window_seconds_; + std::mutex feedback_mu_; + std::deque<LoadBalancingFeedbackRecord> feedback_records_; + // TODO(juanlishen): Lock in finer grain. Locking the whole store may be + // too expensive. + std::mutex store_mu_; + LoadDataStore load_data_store_; + std::unique_ptr<CensusViewProvider> census_view_provider_; + std::unique_ptr<CpuStatsProvider> cpu_stats_provider_; +}; + +} // namespace load_reporter +} // namespace grpc + +#endif // GRPC_SRC_CPP_SERVER_LOAD_REPORTER_LOAD_REPORTER_H diff --git a/src/cpp/server/load_reporter/load_reporter_async_service_impl.cc b/src/cpp/server/load_reporter/load_reporter_async_service_impl.cc new file mode 100644 index 0000000000..d001199b8c --- /dev/null +++ b/src/cpp/server/load_reporter/load_reporter_async_service_impl.cc @@ -0,0 +1,370 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include <grpc/support/port_platform.h> + +#include "src/cpp/server/load_reporter/load_reporter_async_service_impl.h" + +namespace grpc { +namespace load_reporter { + +void LoadReporterAsyncServiceImpl::CallableTag::Run(bool ok) { + GPR_ASSERT(handler_function_ != nullptr); + GPR_ASSERT(handler_ != nullptr); + handler_function_(std::move(handler_), ok); +} + +LoadReporterAsyncServiceImpl::LoadReporterAsyncServiceImpl( + std::unique_ptr<ServerCompletionQueue> cq) + : cq_(std::move(cq)) { + thread_ = std::unique_ptr<::grpc_core::Thread>( + new ::grpc_core::Thread("server_load_reporting", Work, this)); + std::unique_ptr<CpuStatsProvider> cpu_stats_provider = nullptr; +#if defined(GPR_LINUX) || defined(GPR_WINDOWS) || defined(GPR_APPLE) + cpu_stats_provider.reset(new CpuStatsProviderDefaultImpl()); +#endif + load_reporter_ = std::unique_ptr<LoadReporter>(new LoadReporter( + kFeedbackSampleWindowSeconds, + std::unique_ptr<CensusViewProvider>(new CensusViewProviderDefaultImpl()), + std::move(cpu_stats_provider))); +} + +LoadReporterAsyncServiceImpl::~LoadReporterAsyncServiceImpl() { + // We will reach here after the server starts shutting down. + shutdown_ = true; + { + std::unique_lock<std::mutex> lock(cq_shutdown_mu_); + cq_->Shutdown(); + } + if (next_fetch_and_sample_alarm_ != nullptr) + next_fetch_and_sample_alarm_->Cancel(); + thread_->Join(); +} + +void LoadReporterAsyncServiceImpl::ScheduleNextFetchAndSample() { + auto next_fetch_and_sample_time = + gpr_time_add(gpr_now(GPR_CLOCK_MONOTONIC), + gpr_time_from_millis(kFetchAndSampleIntervalSeconds * 1000, + GPR_TIMESPAN)); + { + std::unique_lock<std::mutex> lock(cq_shutdown_mu_); + if (shutdown_) return; + // TODO(juanlishen): Improve the Alarm implementation to reuse a single + // instance for multiple events. + next_fetch_and_sample_alarm_.reset(new Alarm); + next_fetch_and_sample_alarm_->Set(cq_.get(), next_fetch_and_sample_time, + this); + } + gpr_log(GPR_DEBUG, "[LRS %p] Next fetch-and-sample scheduled.", this); +} + +void LoadReporterAsyncServiceImpl::FetchAndSample(bool ok) { + if (!ok) { + gpr_log(GPR_INFO, "[LRS %p] Fetch-and-sample is stopped.", this); + return; + } + gpr_log(GPR_DEBUG, "[LRS %p] Starting a fetch-and-sample...", this); + load_reporter_->FetchAndSample(); + ScheduleNextFetchAndSample(); +} + +void LoadReporterAsyncServiceImpl::Work(void* arg) { + LoadReporterAsyncServiceImpl* service = + reinterpret_cast<LoadReporterAsyncServiceImpl*>(arg); + service->FetchAndSample(true /* ok */); + // TODO(juanlishen): This is a workaround to wait for the cq to be ready. Need + // to figure out why cq is not ready after service starts. + gpr_sleep_until(gpr_time_add(gpr_now(GPR_CLOCK_MONOTONIC), + gpr_time_from_seconds(1, GPR_TIMESPAN))); + ReportLoadHandler::CreateAndStart(service->cq_.get(), service, + service->load_reporter_.get()); + void* tag; + bool ok; + while (true) { + if (!service->cq_->Next(&tag, &ok)) { + // The completion queue is shutting down. + GPR_ASSERT(service->shutdown_); + break; + } + if (tag == service) { + service->FetchAndSample(ok); + } else { + auto* next_step = static_cast<CallableTag*>(tag); + next_step->Run(ok); + } + } +} + +void LoadReporterAsyncServiceImpl::StartThread() { thread_->Start(); } + +void LoadReporterAsyncServiceImpl::ReportLoadHandler::CreateAndStart( + ServerCompletionQueue* cq, LoadReporterAsyncServiceImpl* service, + LoadReporter* load_reporter) { + std::shared_ptr<ReportLoadHandler> handler = + std::make_shared<ReportLoadHandler>(cq, service, load_reporter); + ReportLoadHandler* p = handler.get(); + { + std::unique_lock<std::mutex> lock(service->cq_shutdown_mu_); + if (service->shutdown_) return; + p->on_done_notified_ = + CallableTag(std::bind(&ReportLoadHandler::OnDoneNotified, p, + std::placeholders::_1, std::placeholders::_2), + handler); + p->next_inbound_ = + CallableTag(std::bind(&ReportLoadHandler::OnRequestDelivered, p, + std::placeholders::_1, std::placeholders::_2), + std::move(handler)); + p->ctx_.AsyncNotifyWhenDone(&p->on_done_notified_); + service->RequestReportLoad(&p->ctx_, &p->stream_, cq, cq, + &p->next_inbound_); + } +} + +LoadReporterAsyncServiceImpl::ReportLoadHandler::ReportLoadHandler( + ServerCompletionQueue* cq, LoadReporterAsyncServiceImpl* service, + LoadReporter* load_reporter) + : cq_(cq), + service_(service), + load_reporter_(load_reporter), + stream_(&ctx_), + call_status_(WAITING_FOR_DELIVERY) {} + +void LoadReporterAsyncServiceImpl::ReportLoadHandler::OnRequestDelivered( + std::shared_ptr<ReportLoadHandler> self, bool ok) { + if (ok) { + call_status_ = DELIVERED; + } else { + // AsyncNotifyWhenDone() needs to be called before the call starts, but the + // tag will not pop out if the call never starts ( + // https://github.com/grpc/grpc/issues/10136). So we need to manually + // release the ownership of the handler in this case. + GPR_ASSERT(on_done_notified_.ReleaseHandler() != nullptr); + } + if (!ok || shutdown_) { + // The value of ok being false means that the server is shutting down. + Shutdown(std::move(self), "OnRequestDelivered"); + return; + } + // Spawn a new handler instance to serve the next new client. Every handler + // instance will deallocate itself when it's done. + CreateAndStart(cq_, service_, load_reporter_); + { + std::unique_lock<std::mutex> lock(service_->cq_shutdown_mu_); + if (service_->shutdown_) { + lock.release()->unlock(); + Shutdown(std::move(self), "OnRequestDelivered"); + return; + } + next_inbound_ = + CallableTag(std::bind(&ReportLoadHandler::OnReadDone, this, + std::placeholders::_1, std::placeholders::_2), + std::move(self)); + stream_.Read(&request_, &next_inbound_); + } + // LB ID is unique for each load reporting stream. + lb_id_ = load_reporter_->GenerateLbId(); + gpr_log(GPR_INFO, + "[LRS %p] Call request delivered (lb_id_: %s, handler: %p). " + "Start reading the initial request...", + service_, lb_id_.c_str(), this); +} + +void LoadReporterAsyncServiceImpl::ReportLoadHandler::OnReadDone( + std::shared_ptr<ReportLoadHandler> self, bool ok) { + if (!ok || shutdown_) { + if (!ok && call_status_ < INITIAL_REQUEST_RECEIVED) { + // The client may have half-closed the stream or the stream is broken. + gpr_log(GPR_INFO, + "[LRS %p] Failed reading the initial request from the stream " + "(lb_id_: %s, handler: %p, done_notified: %d, is_cancelled: %d).", + service_, lb_id_.c_str(), this, static_cast<int>(done_notified_), + static_cast<int>(is_cancelled_)); + } + Shutdown(std::move(self), "OnReadDone"); + return; + } + // We only receive one request, which is the initial request. + if (call_status_ < INITIAL_REQUEST_RECEIVED) { + if (!request_.has_initial_request()) { + Shutdown(std::move(self), "OnReadDone+initial_request_not_found"); + } else { + call_status_ = INITIAL_REQUEST_RECEIVED; + const auto& initial_request = request_.initial_request(); + load_balanced_hostname_ = initial_request.load_balanced_hostname(); + load_key_ = initial_request.load_key(); + load_reporter_->ReportStreamCreated(load_balanced_hostname_, lb_id_, + load_key_); + const auto& load_report_interval = initial_request.load_report_interval(); + load_report_interval_ms_ = + static_cast<uint64_t>(load_report_interval.seconds() * 1000 + + load_report_interval.nanos() / 1000); + gpr_log( + GPR_INFO, + "[LRS %p] Initial request received. Start load reporting (load " + "balanced host: %s, interval: %lu ms, lb_id_: %s, handler: %p)...", + service_, load_balanced_hostname_.c_str(), load_report_interval_ms_, + lb_id_.c_str(), this); + SendReport(self, true /* ok */); + // Expect this read to fail. + { + std::unique_lock<std::mutex> lock(service_->cq_shutdown_mu_); + if (service_->shutdown_) { + lock.release()->unlock(); + Shutdown(std::move(self), "OnReadDone"); + return; + } + next_inbound_ = + CallableTag(std::bind(&ReportLoadHandler::OnReadDone, this, + std::placeholders::_1, std::placeholders::_2), + std::move(self)); + stream_.Read(&request_, &next_inbound_); + } + } + } else { + // Another request received! This violates the spec. + gpr_log(GPR_ERROR, + "[LRS %p] Another request received (lb_id_: %s, handler: %p).", + service_, lb_id_.c_str(), this); + Shutdown(std::move(self), "OnReadDone+second_request"); + } +} + +void LoadReporterAsyncServiceImpl::ReportLoadHandler::ScheduleNextReport( + std::shared_ptr<ReportLoadHandler> self, bool ok) { + if (!ok || shutdown_) { + Shutdown(std::move(self), "ScheduleNextReport"); + return; + } + auto next_report_time = gpr_time_add( + gpr_now(GPR_CLOCK_MONOTONIC), + gpr_time_from_millis(load_report_interval_ms_, GPR_TIMESPAN)); + { + std::unique_lock<std::mutex> lock(service_->cq_shutdown_mu_); + if (service_->shutdown_) { + lock.release()->unlock(); + Shutdown(std::move(self), "ScheduleNextReport"); + return; + } + next_outbound_ = + CallableTag(std::bind(&ReportLoadHandler::SendReport, this, + std::placeholders::_1, std::placeholders::_2), + std::move(self)); + // TODO(juanlishen): Improve the Alarm implementation to reuse a single + // instance for multiple events. + next_report_alarm_.reset(new Alarm); + next_report_alarm_->Set(cq_, next_report_time, &next_outbound_); + } + gpr_log(GPR_DEBUG, + "[LRS %p] Next load report scheduled (lb_id_: %s, handler: %p).", + service_, lb_id_.c_str(), this); +} + +void LoadReporterAsyncServiceImpl::ReportLoadHandler::SendReport( + std::shared_ptr<ReportLoadHandler> self, bool ok) { + if (!ok || shutdown_) { + Shutdown(std::move(self), "SendReport"); + return; + } + ::grpc::lb::v1::LoadReportResponse response; + auto loads = load_reporter_->GenerateLoads(load_balanced_hostname_, lb_id_); + response.mutable_load()->Swap(&loads); + auto feedback = load_reporter_->GenerateLoadBalancingFeedback(); + response.mutable_load_balancing_feedback()->Swap(&feedback); + if (call_status_ < INITIAL_RESPONSE_SENT) { + auto initial_response = response.mutable_initial_response(); + initial_response->set_load_balancer_id(lb_id_); + initial_response->set_implementation_id( + ::grpc::lb::v1::InitialLoadReportResponse::CPP); + initial_response->set_server_version(kVersion); + call_status_ = INITIAL_RESPONSE_SENT; + } + { + std::unique_lock<std::mutex> lock(service_->cq_shutdown_mu_); + if (service_->shutdown_) { + lock.release()->unlock(); + Shutdown(std::move(self), "SendReport"); + return; + } + next_outbound_ = + CallableTag(std::bind(&ReportLoadHandler::ScheduleNextReport, this, + std::placeholders::_1, std::placeholders::_2), + std::move(self)); + stream_.Write(response, &next_outbound_); + gpr_log(GPR_INFO, + "[LRS %p] Sending load report (lb_id_: %s, handler: %p, loads " + "count: %d)...", + service_, lb_id_.c_str(), this, response.load().size()); + } +} + +void LoadReporterAsyncServiceImpl::ReportLoadHandler::OnDoneNotified( + std::shared_ptr<ReportLoadHandler> self, bool ok) { + GPR_ASSERT(ok); + done_notified_ = true; + if (ctx_.IsCancelled()) { + is_cancelled_ = true; + } + gpr_log(GPR_INFO, + "[LRS %p] Load reporting call is notified done (handler: %p, " + "is_cancelled: %d).", + service_, this, static_cast<int>(is_cancelled_)); + Shutdown(std::move(self), "OnDoneNotified"); +} + +void LoadReporterAsyncServiceImpl::ReportLoadHandler::Shutdown( + std::shared_ptr<ReportLoadHandler> self, const char* reason) { + if (!shutdown_) { + gpr_log(GPR_INFO, + "[LRS %p] Shutting down the handler (lb_id_: %s, handler: %p, " + "reason: %s).", + service_, lb_id_.c_str(), this, reason); + shutdown_ = true; + if (call_status_ >= INITIAL_REQUEST_RECEIVED) { + load_reporter_->ReportStreamClosed(load_balanced_hostname_, lb_id_); + next_report_alarm_->Cancel(); + } + } + // OnRequestDelivered() may be called after OnDoneNotified(), so we need to + // try to Finish() every time we are in Shutdown(). + if (call_status_ >= DELIVERED && call_status_ < FINISH_CALLED) { + std::unique_lock<std::mutex> lock(service_->cq_shutdown_mu_); + if (!service_->shutdown_) { + on_finish_done_ = + CallableTag(std::bind(&ReportLoadHandler::OnFinishDone, this, + std::placeholders::_1, std::placeholders::_2), + std::move(self)); + // TODO(juanlishen): Maybe add a message proto for the client to + // explicitly cancel the stream so that we can return OK status in such + // cases. + stream_.Finish(Status::CANCELLED, &on_finish_done_); + call_status_ = FINISH_CALLED; + } + } +} + +void LoadReporterAsyncServiceImpl::ReportLoadHandler::OnFinishDone( + std::shared_ptr<ReportLoadHandler> self, bool ok) { + if (ok) { + gpr_log(GPR_INFO, + "[LRS %p] Load reporting finished (lb_id_: %s, handler: %p).", + service_, lb_id_.c_str(), this); + } +} + +} // namespace load_reporter +} // namespace grpc diff --git a/src/cpp/server/load_reporter/load_reporter_async_service_impl.h b/src/cpp/server/load_reporter/load_reporter_async_service_impl.h new file mode 100644 index 0000000000..6fc577ff49 --- /dev/null +++ b/src/cpp/server/load_reporter/load_reporter_async_service_impl.h @@ -0,0 +1,194 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#ifndef GRPC_SRC_CPP_SERVER_LOAD_REPORTER_ASYNC_SERVICE_IMPL_H +#define GRPC_SRC_CPP_SERVER_LOAD_REPORTER_ASYNC_SERVICE_IMPL_H + +#include <grpc/support/port_platform.h> + +#include <grpc/support/log.h> +#include <grpcpp/alarm.h> +#include <grpcpp/grpcpp.h> + +#include "src/core/lib/gprpp/thd.h" +#include "src/cpp/server/load_reporter/load_reporter.h" + +namespace grpc { +namespace load_reporter { + +// Async load reporting service. It's mainly responsible for controlling the +// procedure of incoming requests. The real business logic is handed off to the +// LoadReporter. There should be at most one instance of this service on a +// server to avoid spreading the load data into multiple places. +class LoadReporterAsyncServiceImpl + : public grpc::lb::v1::LoadReporter::AsyncService { + public: + explicit LoadReporterAsyncServiceImpl( + std::unique_ptr<ServerCompletionQueue> cq); + ~LoadReporterAsyncServiceImpl(); + + // Starts the working thread. + void StartThread(); + + // Not copyable nor movable. + LoadReporterAsyncServiceImpl(const LoadReporterAsyncServiceImpl&) = delete; + LoadReporterAsyncServiceImpl& operator=(const LoadReporterAsyncServiceImpl&) = + delete; + + private: + class ReportLoadHandler; + + // A tag that can be called with a bool argument. It's tailored for + // ReportLoadHandler's use. Before being used, it should be constructed with a + // method of ReportLoadHandler and a shared pointer to the handler. The + // shared pointer will be moved to the invoked function and the function can + // only be invoked once. That makes ref counting of the handler easier, + // because the shared pointer is not bound to the function and can be gone + // once the invoked function returns (if not used any more). + class CallableTag { + public: + using HandlerFunction = + std::function<void(std::shared_ptr<ReportLoadHandler>, bool)>; + + CallableTag() {} + + CallableTag(HandlerFunction func, + std::shared_ptr<ReportLoadHandler> handler) + : handler_function_(std::move(func)), handler_(std::move(handler)) { + GPR_ASSERT(handler_function_ != nullptr); + GPR_ASSERT(handler_ != nullptr); + } + + // Runs the tag. This should be called only once. The handler is no longer + // owned by this tag after this method is invoked. + void Run(bool ok); + + // Releases and returns the shared pointer to the handler. + std::shared_ptr<ReportLoadHandler> ReleaseHandler() { + return std::move(handler_); + } + + private: + HandlerFunction handler_function_ = nullptr; + std::shared_ptr<ReportLoadHandler> handler_; + }; + + // Each handler takes care of one load reporting stream. It contains + // per-stream data and it will access the members of the parent class (i.e., + // LoadReporterAsyncServiceImpl) for service-wide data (e.g., the load data). + class ReportLoadHandler { + public: + // Instantiates a ReportLoadHandler and requests the next load reporting + // call. The handler object will manage its own lifetime, so no action is + // needed from the caller any more regarding that object. + static void CreateAndStart(ServerCompletionQueue* cq, + LoadReporterAsyncServiceImpl* service, + LoadReporter* load_reporter); + + // This ctor is public because we want to use std::make_shared<> in + // CreateAndStart(). This ctor shouldn't be used elsewhere. + ReportLoadHandler(ServerCompletionQueue* cq, + LoadReporterAsyncServiceImpl* service, + LoadReporter* load_reporter); + + private: + // After the handler has a call request delivered, it starts reading the + // initial request. Also, a new handler is spawned so that we can keep + // servicing future calls. + void OnRequestDelivered(std::shared_ptr<ReportLoadHandler> self, bool ok); + + // The first Read() is expected to succeed, after which the handler starts + // sending load reports back to the balancer. The second Read() is + // expected to fail, which happens when the balancer half-closes the + // stream to signal that it's no longer interested in the load reports. For + // the latter case, the handler will then close the stream. + void OnReadDone(std::shared_ptr<ReportLoadHandler> self, bool ok); + + // The report sending operations are sequential as: send report -> send + // done, schedule the next send -> waiting for the alarm to fire -> alarm + // fires, send report -> ... + void SendReport(std::shared_ptr<ReportLoadHandler> self, bool ok); + void ScheduleNextReport(std::shared_ptr<ReportLoadHandler> self, bool ok); + + // Called when Finish() is done. + void OnFinishDone(std::shared_ptr<ReportLoadHandler> self, bool ok); + + // Called when AsyncNotifyWhenDone() notifies us. + void OnDoneNotified(std::shared_ptr<ReportLoadHandler> self, bool ok); + + void Shutdown(std::shared_ptr<ReportLoadHandler> self, const char* reason); + + // The key fields of the stream. + grpc::string lb_id_; + grpc::string load_balanced_hostname_; + grpc::string load_key_; + uint64_t load_report_interval_ms_; + + // The data for RPC communication with the load reportee. + ServerContext ctx_; + ::grpc::lb::v1::LoadReportRequest request_; + + // The members passed down from LoadReporterAsyncServiceImpl. + ServerCompletionQueue* cq_; + LoadReporterAsyncServiceImpl* service_; + LoadReporter* load_reporter_; + ServerAsyncReaderWriter<::grpc::lb::v1::LoadReportResponse, + ::grpc::lb::v1::LoadReportRequest> + stream_; + + // The status of the RPC progress. + enum CallStatus { + WAITING_FOR_DELIVERY, + DELIVERED, + INITIAL_REQUEST_RECEIVED, + INITIAL_RESPONSE_SENT, + FINISH_CALLED + } call_status_; + bool shutdown_{false}; + bool done_notified_{false}; + bool is_cancelled_{false}; + CallableTag on_done_notified_; + CallableTag on_finish_done_; + CallableTag next_inbound_; + CallableTag next_outbound_; + std::unique_ptr<Alarm> next_report_alarm_; + }; + + // Handles the incoming requests and drives the completion queue in a loop. + static void Work(void* arg); + + // Schedules the next data fetching from Census and LB feedback sampling. + void ScheduleNextFetchAndSample(); + + // Fetches data from Census and samples LB feedback. + void FetchAndSample(bool ok); + + std::unique_ptr<ServerCompletionQueue> cq_; + // To synchronize the operations related to shutdown state of cq_, so that we + // don't enqueue new tags into cq_ after it is already shut down. + std::mutex cq_shutdown_mu_; + std::atomic_bool shutdown_{false}; + std::unique_ptr<::grpc_core::Thread> thread_; + std::unique_ptr<LoadReporter> load_reporter_; + std::unique_ptr<Alarm> next_fetch_and_sample_alarm_; +}; + +} // namespace load_reporter +} // namespace grpc + +#endif // GRPC_SRC_CPP_SERVER_LOAD_REPORTER_ASYNC_SERVICE_IMPL_H diff --git a/src/cpp/server/load_reporter/load_reporting_service_server_builder_option.cc b/src/cpp/server/load_reporter/load_reporting_service_server_builder_option.cc new file mode 100644 index 0000000000..81cf6ac562 --- /dev/null +++ b/src/cpp/server/load_reporter/load_reporting_service_server_builder_option.cc @@ -0,0 +1,41 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include <grpc/support/port_platform.h> + +#include <grpcpp/ext/server_load_reporting.h> + +#include "src/cpp/server/load_reporter/load_reporting_service_server_builder_plugin.h" + +namespace grpc { +namespace load_reporter { +namespace experimental { + +void LoadReportingServiceServerBuilderOption::UpdateArguments( + ::grpc::ChannelArguments* args) { + args->SetInt(GRPC_ARG_ENABLE_LOAD_REPORTING, true); +} + +void LoadReportingServiceServerBuilderOption::UpdatePlugins( + std::vector<std::unique_ptr<::grpc::ServerBuilderPlugin>>* plugins) { + plugins->emplace_back(new LoadReportingServiceServerBuilderPlugin()); +} + +} // namespace experimental +} // namespace load_reporter +} // namespace grpc diff --git a/src/cpp/server/load_reporter/load_reporting_service_server_builder_plugin.cc b/src/cpp/server/load_reporter/load_reporting_service_server_builder_plugin.cc new file mode 100644 index 0000000000..c2c5dd5ed5 --- /dev/null +++ b/src/cpp/server/load_reporter/load_reporting_service_server_builder_plugin.cc @@ -0,0 +1,60 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include <grpc/support/port_platform.h> + +#include "src/cpp/server/load_reporter/load_reporting_service_server_builder_plugin.h" + +#include <grpcpp/impl/server_initializer.h> + +namespace grpc { +namespace load_reporter { + +bool LoadReportingServiceServerBuilderPlugin::has_sync_methods() const { + if (service_ != nullptr) { + return service_->has_synchronous_methods(); + } + return false; +} + +bool LoadReportingServiceServerBuilderPlugin::has_async_methods() const { + if (service_ != nullptr) { + return service_->has_async_methods(); + } + return false; +} + +void LoadReportingServiceServerBuilderPlugin::UpdateServerBuilder( + grpc::ServerBuilder* builder) { + auto cq = builder->AddCompletionQueue(); + service_ = std::make_shared<LoadReporterAsyncServiceImpl>(std::move(cq)); +} + +void LoadReportingServiceServerBuilderPlugin::InitServer( + grpc::ServerInitializer* si) { + si->RegisterService(service_); +} + +void LoadReportingServiceServerBuilderPlugin::Finish( + grpc::ServerInitializer* si) { + service_->StartThread(); + service_.reset(); +} + +} // namespace load_reporter +} // namespace grpc diff --git a/src/cpp/server/load_reporter/load_reporting_service_server_builder_plugin.h b/src/cpp/server/load_reporter/load_reporting_service_server_builder_plugin.h new file mode 100644 index 0000000000..1f098591d4 --- /dev/null +++ b/src/cpp/server/load_reporter/load_reporting_service_server_builder_plugin.h @@ -0,0 +1,62 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#ifndef GRPC_SRC_CPP_LOAD_REPORTING_SERVICE_SERVER_BUILDER_PLUGIN_H +#define GRPC_SRC_CPP_LOAD_REPORTING_SERVICE_SERVER_BUILDER_PLUGIN_H + +#include <grpc/support/port_platform.h> + +#include <grpcpp/impl/server_builder_plugin.h> + +#include "src/cpp/server/load_reporter/load_reporter_async_service_impl.h" + +namespace grpc { +namespace load_reporter { + +// The plugin that registers and starts load reporting service when starting a +// server. +class LoadReportingServiceServerBuilderPlugin : public ServerBuilderPlugin { + public: + ~LoadReportingServiceServerBuilderPlugin() override = default; + grpc::string name() override { return "load_reporting_service"; } + + // Creates a load reporting service. + void UpdateServerBuilder(grpc::ServerBuilder* builder) override; + + // Registers the load reporter service. + void InitServer(grpc::ServerInitializer* si) override; + + // Starts the load reporter service. + void Finish(grpc::ServerInitializer* si) override; + + void ChangeArguments(const grpc::string& name, void* value) override {} + void UpdateChannelArguments(grpc::ChannelArguments* args) override {} + bool has_sync_methods() const override; + bool has_async_methods() const override; + + private: + std::shared_ptr<LoadReporterAsyncServiceImpl> service_; +}; + +std::unique_ptr<grpc::ServerBuilderPlugin> +CreateLoadReportingServiceServerBuilderPlugin(); + +} // namespace load_reporter +} // namespace grpc + +#endif // GRPC_SRC_CPP_LOAD_REPORTING_SERVICE_SERVER_BUILDER_PLUGIN_H diff --git a/src/cpp/server/load_reporter/util.cc b/src/cpp/server/load_reporter/util.cc new file mode 100644 index 0000000000..89bdf57049 --- /dev/null +++ b/src/cpp/server/load_reporter/util.cc @@ -0,0 +1,47 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include <grpc/impl/codegen/port_platform.h> + +#include <grpcpp/ext/server_load_reporting.h> + +#include <cmath> + +#include <grpc/support/log.h> + +namespace grpc { +namespace load_reporter { +namespace experimental { + +void AddLoadReportingCost(grpc::ServerContext* ctx, + const grpc::string& cost_name, double cost_value) { + if (std::isnormal(cost_value)) { + grpc::string buf; + buf.resize(sizeof(cost_value) + cost_name.size()); + memcpy(&(*buf.begin()), &cost_value, sizeof(cost_value)); + memcpy(&(*buf.begin()) + sizeof(cost_value), cost_name.data(), + cost_name.size()); + ctx->AddTrailingMetadata(GRPC_LB_COST_MD_KEY, buf); + } else { + gpr_log(GPR_ERROR, "Call metric value is not normal."); + } +} + +} // namespace experimental +} // namespace load_reporter +} // namespace grpc |