diff options
author | Yash Tibrewal <yashkt@google.com> | 2018-09-12 17:10:09 -0700 |
---|---|---|
committer | Yash Tibrewal <yashkt@google.com> | 2018-09-12 17:10:09 -0700 |
commit | 18a6a012f2abe06bb90ecffc3fb9d46eb30e1f86 (patch) | |
tree | c653d07d4d8158ed651fcca6e5e3c36a973934a3 /src/core/lib | |
parent | 86530acca735b6ba49dee12bfc7e80d86d49c0cb (diff) | |
parent | 20345b21bb00c626ef346c26dbb3903c96c21c52 (diff) |
Merge branch 'master' into linuxversion
Diffstat (limited to 'src/core/lib')
39 files changed, 1304 insertions, 535 deletions
diff --git a/src/core/lib/channel/channel_trace.cc b/src/core/lib/channel/channel_trace.cc index b3443310ac..cfb2faba51 100644 --- a/src/core/lib/channel/channel_trace.cc +++ b/src/core/lib/channel/channel_trace.cc @@ -41,16 +41,14 @@ namespace grpc_core { namespace channelz { -ChannelTrace::TraceEvent::TraceEvent( - Severity severity, grpc_slice data, - RefCountedPtr<ChannelNode> referenced_channel, ReferencedType type) +ChannelTrace::TraceEvent::TraceEvent(Severity severity, grpc_slice data, + RefCountedPtr<BaseNode> referenced_entity) : severity_(severity), data_(data), timestamp_(grpc_millis_to_timespec(grpc_core::ExecCtx::Get()->Now(), GPR_CLOCK_REALTIME)), next_(nullptr), - referenced_channel_(std::move(referenced_channel)), - referenced_type_(type) {} + referenced_entity_(std::move(referenced_entity)) {} ChannelTrace::TraceEvent::TraceEvent(Severity severity, grpc_slice data) : severity_(severity), @@ -110,23 +108,13 @@ void ChannelTrace::AddTraceEvent(Severity severity, grpc_slice data) { AddTraceEventHelper(New<TraceEvent>(severity, data)); } -void ChannelTrace::AddTraceEventReferencingChannel( - Severity severity, grpc_slice data, - RefCountedPtr<ChannelNode> referenced_channel) { - if (max_list_size_ == 0) return; // tracing is disabled if max_events == 0 - // create and fill up the new event - AddTraceEventHelper(New<TraceEvent>( - severity, data, std::move(referenced_channel), ReferencedType::Channel)); -} - -void ChannelTrace::AddTraceEventReferencingSubchannel( +void ChannelTrace::AddTraceEventWithReference( Severity severity, grpc_slice data, - RefCountedPtr<ChannelNode> referenced_subchannel) { + RefCountedPtr<BaseNode> referenced_entity) { if (max_list_size_ == 0) return; // tracing is disabled if max_events == 0 // create and fill up the new event - AddTraceEventHelper(New<TraceEvent>(severity, data, - std::move(referenced_subchannel), - ReferencedType::Subchannel)); + AddTraceEventHelper( + New<TraceEvent>(severity, data, std::move(referenced_entity))); } namespace { @@ -157,19 +145,18 @@ void ChannelTrace::TraceEvent::RenderTraceEvent(grpc_json* json) const { json_iterator = grpc_json_create_child(json_iterator, json, "timestamp", gpr_format_timespec(timestamp_), GRPC_JSON_STRING, true); - if (referenced_channel_ != nullptr) { + if (referenced_entity_ != nullptr) { + const bool is_channel = + (referenced_entity_->type() == BaseNode::EntityType::kTopLevelChannel || + referenced_entity_->type() == BaseNode::EntityType::kInternalChannel); char* uuid_str; - gpr_asprintf(&uuid_str, "%" PRIdPTR, referenced_channel_->channel_uuid()); + gpr_asprintf(&uuid_str, "%" PRIdPTR, referenced_entity_->uuid()); grpc_json* child_ref = grpc_json_create_child( - json_iterator, json, - (referenced_type_ == ReferencedType::Channel) ? "channelRef" - : "subchannelRef", + json_iterator, json, is_channel ? "channelRef" : "subchannelRef", nullptr, GRPC_JSON_OBJECT, false); json_iterator = grpc_json_create_child( - nullptr, child_ref, - (referenced_type_ == ReferencedType::Channel) ? "channelId" - : "subchannelId", - uuid_str, GRPC_JSON_STRING, true); + nullptr, child_ref, is_channel ? "channelId" : "subchannelId", uuid_str, + GRPC_JSON_STRING, true); json_iterator = child_ref; } } @@ -178,24 +165,26 @@ grpc_json* ChannelTrace::RenderJson() const { if (!max_list_size_) return nullptr; // tracing is disabled if max_events == 0 grpc_json* json = grpc_json_create(GRPC_JSON_OBJECT); - char* num_events_logged_str; - gpr_asprintf(&num_events_logged_str, "%" PRId64, num_events_logged_); grpc_json* json_iterator = nullptr; - json_iterator = - grpc_json_create_child(json_iterator, json, "numEventsLogged", - num_events_logged_str, GRPC_JSON_STRING, true); + if (num_events_logged_ > 0) { + json_iterator = grpc_json_add_number_string_child( + json, json_iterator, "numEventsLogged", num_events_logged_); + } json_iterator = grpc_json_create_child( json_iterator, json, "creationTimestamp", gpr_format_timespec(time_created_), GRPC_JSON_STRING, true); - grpc_json* events = grpc_json_create_child(json_iterator, json, "events", - nullptr, GRPC_JSON_ARRAY, false); - json_iterator = nullptr; - TraceEvent* it = head_trace_; - while (it != nullptr) { - json_iterator = grpc_json_create_child(json_iterator, events, nullptr, - nullptr, GRPC_JSON_OBJECT, false); - it->RenderTraceEvent(json_iterator); - it = it->next(); + // only add in the event list if it is non-empty. + if (num_events_logged_ > 0) { + grpc_json* events = grpc_json_create_child(json_iterator, json, "events", + nullptr, GRPC_JSON_ARRAY, false); + json_iterator = nullptr; + TraceEvent* it = head_trace_; + while (it != nullptr) { + json_iterator = grpc_json_create_child(json_iterator, events, nullptr, + nullptr, GRPC_JSON_OBJECT, false); + it->RenderTraceEvent(json_iterator); + it = it->next(); + } } return json; } diff --git a/src/core/lib/channel/channel_trace.h b/src/core/lib/channel/channel_trace.h index 596af7402f..94fea20b45 100644 --- a/src/core/lib/channel/channel_trace.h +++ b/src/core/lib/channel/channel_trace.h @@ -30,7 +30,7 @@ namespace grpc_core { namespace channelz { -class ChannelNode; +class BaseNode; // Object used to hold live data for a channel. This data is exposed via the // channelz service: @@ -55,35 +55,28 @@ class ChannelTrace { void AddTraceEvent(Severity severity, grpc_slice data); // Adds a new trace event to the tracing object. This trace event refers to a - // an event on a child of the channel. For example, if this channel has - // created a new subchannel, then it would record that with a TraceEvent - // referencing the new subchannel. + // an event that concerns a different channelz entity. For example, if this + // channel has created a new subchannel, then it would record that with + // a TraceEvent referencing the new subchannel. // // TODO(ncteisen): as this call is used more and more throughout the gRPC // stack, determine if it makes more sense to accept a char* instead of a // slice. - void AddTraceEventReferencingChannel( - Severity severity, grpc_slice data, - RefCountedPtr<ChannelNode> referenced_channel); - void AddTraceEventReferencingSubchannel( - Severity severity, grpc_slice data, - RefCountedPtr<ChannelNode> referenced_subchannel); + void AddTraceEventWithReference(Severity severity, grpc_slice data, + RefCountedPtr<BaseNode> referenced_entity); // Creates and returns the raw grpc_json object, so a parent channelz // object may incorporate the json before rendering. grpc_json* RenderJson() const; private: - // Types of objects that can be references by trace events. - enum class ReferencedType { Channel, Subchannel }; // Private class to encapsulate all the data and bookkeeping needed for a // a trace event. class TraceEvent { public: - // Constructor for a TraceEvent that references a different channel. + // Constructor for a TraceEvent that references a channel. TraceEvent(Severity severity, grpc_slice data, - RefCountedPtr<ChannelNode> referenced_channel, - ReferencedType type); + RefCountedPtr<BaseNode> referenced_entity_); // Constructor for a TraceEvent that does not reverence a different // channel. @@ -105,10 +98,7 @@ class ChannelTrace { gpr_timespec timestamp_; TraceEvent* next_; // the tracer object for the (sub)channel that this trace event refers to. - RefCountedPtr<ChannelNode> referenced_channel_; - // the type that the referenced tracer points to. Unused if this trace - // does not point to any channel or subchannel - ReferencedType referenced_type_; + RefCountedPtr<BaseNode> referenced_entity_; }; // TraceEvent // Internal helper to add and link in a trace event diff --git a/src/core/lib/channel/channelz.cc b/src/core/lib/channel/channelz.cc index 9d6002ed8a..375cf25cc6 100644 --- a/src/core/lib/channel/channelz.cc +++ b/src/core/lib/channel/channelz.cc @@ -41,33 +41,62 @@ namespace grpc_core { namespace channelz { -ChannelNode::ChannelNode(grpc_channel* channel, size_t channel_tracer_max_nodes, - bool is_top_level_channel) - : channel_(channel), - target_(nullptr), - channel_uuid_(-1), - is_top_level_channel_(is_top_level_channel) { - trace_.Init(channel_tracer_max_nodes); - target_ = UniquePtr<char>(grpc_channel_get_target(channel_)); - channel_uuid_ = ChannelzRegistry::RegisterChannelNode(this); +BaseNode::BaseNode(EntityType type) + : type_(type), uuid_(ChannelzRegistry::Register(this)) {} + +BaseNode::~BaseNode() { ChannelzRegistry::Unregister(uuid_); } + +char* BaseNode::RenderJsonString() { + grpc_json* json = RenderJson(); + GPR_ASSERT(json != nullptr); + char* json_str = grpc_json_dump_to_string(json, 0); + grpc_json_destroy(json); + return json_str; +} + +CallCountingHelper::CallCountingHelper() { gpr_atm_no_barrier_store(&last_call_started_millis_, (gpr_atm)ExecCtx::Get()->Now()); } -ChannelNode::~ChannelNode() { - trace_.Destroy(); - ChannelzRegistry::UnregisterChannelNode(channel_uuid_); -} +CallCountingHelper::~CallCountingHelper() {} -void ChannelNode::RecordCallStarted() { +void CallCountingHelper::RecordCallStarted() { gpr_atm_no_barrier_fetch_add(&calls_started_, (gpr_atm)1); gpr_atm_no_barrier_store(&last_call_started_millis_, (gpr_atm)ExecCtx::Get()->Now()); } -void ChannelNode::PopulateConnectivityState(grpc_json* json) {} +void CallCountingHelper::PopulateCallCounts(grpc_json* json) { + grpc_json* json_iterator = nullptr; + if (calls_started_ != 0) { + json_iterator = grpc_json_add_number_string_child( + json, json_iterator, "callsStarted", calls_started_); + } + if (calls_succeeded_ != 0) { + json_iterator = grpc_json_add_number_string_child( + json, json_iterator, "callsSucceeded", calls_succeeded_); + } + if (calls_failed_) { + json_iterator = grpc_json_add_number_string_child( + json, json_iterator, "callsFailed", calls_failed_); + } + gpr_timespec ts = + grpc_millis_to_timespec(last_call_started_millis_, GPR_CLOCK_REALTIME); + json_iterator = + grpc_json_create_child(json_iterator, json, "lastCallStartedTimestamp", + gpr_format_timespec(ts), GRPC_JSON_STRING, true); +} + +ChannelNode::ChannelNode(grpc_channel* channel, size_t channel_tracer_max_nodes, + bool is_top_level_channel) + : BaseNode(is_top_level_channel ? EntityType::kTopLevelChannel + : EntityType::kInternalChannel), + channel_(channel), + target_(UniquePtr<char>(grpc_channel_get_target(channel_))), + trace_(channel_tracer_max_nodes) {} -void ChannelNode::PopulateChildRefs(grpc_json* json) {} +ChannelNode::~ChannelNode() {} grpc_json* ChannelNode::RenderJson() { // We need to track these three json objects to build our object @@ -80,7 +109,7 @@ grpc_json* ChannelNode::RenderJson() { json = json_iterator; json_iterator = nullptr; json_iterator = grpc_json_add_number_string_child(json, json_iterator, - "channelId", channel_uuid_); + "channelId", uuid()); // reset json iterators to top level object json = top_level_json; json_iterator = nullptr; @@ -89,51 +118,28 @@ grpc_json* ChannelNode::RenderJson() { GRPC_JSON_OBJECT, false); json = data; json_iterator = nullptr; + // template method. Child classes may override this to add their specific + // functionality. PopulateConnectivityState(json); + // populate the target. GPR_ASSERT(target_.get() != nullptr); - json_iterator = grpc_json_create_child( - json_iterator, json, "target", target_.get(), GRPC_JSON_STRING, false); + grpc_json_create_child(nullptr, json, "target", target_.get(), + GRPC_JSON_STRING, false); // fill in the channel trace if applicable - grpc_json* trace = trace_->RenderJson(); - if (trace != nullptr) { - // we manually link up and fill the child since it was created for us in - // ChannelTrace::RenderJson - trace->key = "trace"; // this object is named trace in channelz.proto - json_iterator = grpc_json_link_child(json, trace, json_iterator); - } - // reset the parent to be the data object. - json = data; - json_iterator = nullptr; - if (calls_started_ != 0) { - json_iterator = grpc_json_add_number_string_child( - json, json_iterator, "callsStarted", calls_started_); + grpc_json* trace_json = trace_.RenderJson(); + if (trace_json != nullptr) { + trace_json->key = "trace"; // this object is named trace in channelz.proto + grpc_json_link_child(json, trace_json, nullptr); } - if (calls_succeeded_ != 0) { - json_iterator = grpc_json_add_number_string_child( - json, json_iterator, "callsSucceeded", calls_succeeded_); - } - if (calls_failed_) { - json_iterator = grpc_json_add_number_string_child( - json, json_iterator, "callsFailed", calls_failed_); - } - gpr_timespec ts = - grpc_millis_to_timespec(last_call_started_millis_, GPR_CLOCK_REALTIME); - json_iterator = - grpc_json_create_child(json_iterator, json, "lastCallStartedTimestamp", - gpr_format_timespec(ts), GRPC_JSON_STRING, true); + // ask CallCountingHelper to populate trace and call count data. + call_counter_.PopulateCallCounts(json); json = top_level_json; - json_iterator = nullptr; + // template method. Child classes may override this to add their specific + // functionality. PopulateChildRefs(json); return top_level_json; } -char* ChannelNode::RenderJsonString() { - grpc_json* json = RenderJson(); - char* json_str = grpc_json_dump_to_string(json, 0); - grpc_json_destroy(json); - return json_str; -} - RefCountedPtr<ChannelNode> ChannelNode::MakeChannelNode( grpc_channel* channel, size_t channel_tracer_max_nodes, bool is_top_level_channel) { @@ -141,12 +147,41 @@ RefCountedPtr<ChannelNode> ChannelNode::MakeChannelNode( channel, channel_tracer_max_nodes, is_top_level_channel); } -SubchannelNode::SubchannelNode() { - subchannel_uuid_ = ChannelzRegistry::RegisterSubchannelNode(this); -} +ServerNode::ServerNode(size_t channel_tracer_max_nodes) + : BaseNode(EntityType::kServer), trace_(channel_tracer_max_nodes) {} + +ServerNode::~ServerNode() {} -SubchannelNode::~SubchannelNode() { - ChannelzRegistry::UnregisterSubchannelNode(subchannel_uuid_); +grpc_json* ServerNode::RenderJson() { + // We need to track these three json objects to build our object + grpc_json* top_level_json = grpc_json_create(GRPC_JSON_OBJECT); + grpc_json* json = top_level_json; + grpc_json* json_iterator = nullptr; + // create and fill the ref child + json_iterator = grpc_json_create_child(json_iterator, json, "ref", nullptr, + GRPC_JSON_OBJECT, false); + json = json_iterator; + json_iterator = nullptr; + json_iterator = grpc_json_add_number_string_child(json, json_iterator, + "serverId", uuid()); + // reset json iterators to top level object + json = top_level_json; + json_iterator = nullptr; + // create and fill the data child. + grpc_json* data = grpc_json_create_child(json_iterator, json, "data", nullptr, + GRPC_JSON_OBJECT, false); + json = data; + json_iterator = nullptr; + // fill in the channel trace if applicable + grpc_json* trace_json = trace_.RenderJson(); + if (trace_json != nullptr) { + trace_json->key = "trace"; // this object is named trace in channelz.proto + grpc_json_link_child(json, trace_json, nullptr); + } + // ask CallCountingHelper to populate trace and call count data. + call_counter_.PopulateCallCounts(json); + json = top_level_json; + return top_level_json; } } // namespace channelz diff --git a/src/core/lib/channel/channelz.h b/src/core/lib/channel/channelz.h index 07eb73d626..9be256147b 100644 --- a/src/core/lib/channel/channelz.h +++ b/src/core/lib/channel/channelz.h @@ -43,14 +43,52 @@ namespace grpc_core { namespace channelz { namespace testing { +class CallCountingHelperPeer; class ChannelNodePeer; -} +} // namespace testing -class ChannelNode : public RefCounted<ChannelNode> { +// base class for all channelz entities +class BaseNode : public RefCounted<BaseNode> { public: - static RefCountedPtr<ChannelNode> MakeChannelNode( - grpc_channel* channel, size_t channel_tracer_max_nodes, - bool is_top_level_channel); + // There are only four high level channelz entities. However, to support + // GetTopChannelsRequest, we split the Channel entity into two different + // types. All children of BaseNode must be one of these types. + enum class EntityType { + kTopLevelChannel, + kInternalChannel, + kSubchannel, + kServer, + kSocket, + }; + + explicit BaseNode(EntityType type); + virtual ~BaseNode(); + + // All children must implement this function. + virtual grpc_json* RenderJson() GRPC_ABSTRACT; + + // Renders the json and returns allocated string that must be freed by the + // caller. + char* RenderJsonString(); + + EntityType type() const { return type_; } + intptr_t uuid() const { return uuid_; } + + private: + const EntityType type_; + const intptr_t uuid_; +}; + +// This class is a helper class for channelz entities that deal with Channels, +// Subchannels, and Servers, since those have similar proto definitions. +// This class has the ability to: +// - track calls_{started,succeeded,failed} +// - track last_call_started_timestamp +// - perform rendering of the above items +class CallCountingHelper { + public: + CallCountingHelper(); + ~CallCountingHelper(); void RecordCallStarted(); void RecordCallFailed() { @@ -60,17 +98,46 @@ class ChannelNode : public RefCounted<ChannelNode> { gpr_atm_no_barrier_fetch_add(&calls_succeeded_, (gpr_atm(1))); } - grpc_json* RenderJson(); - char* RenderJsonString(); + // Common rendering of the call count data and last_call_started_timestamp. + void PopulateCallCounts(grpc_json* json); - // helper for getting and populating connectivity state. It is virtual - // because it allows the client_channel specific code to live in ext/ - // instead of lib/ - virtual void PopulateConnectivityState(grpc_json* json); + private: + // testing peer friend. + friend class testing::CallCountingHelperPeer; - virtual void PopulateChildRefs(grpc_json* json); + gpr_atm calls_started_ = 0; + gpr_atm calls_succeeded_ = 0; + gpr_atm calls_failed_ = 0; + gpr_atm last_call_started_millis_ = 0; +}; - ChannelTrace* trace() { return trace_.get(); } +// Handles channelz bookkeeping for channels +class ChannelNode : public BaseNode { + public: + static RefCountedPtr<ChannelNode> MakeChannelNode( + grpc_channel* channel, size_t channel_tracer_max_nodes, + bool is_top_level_channel); + + ChannelNode(grpc_channel* channel, size_t channel_tracer_max_nodes, + bool is_top_level_channel); + ~ChannelNode() override; + + grpc_json* RenderJson() override; + + // template methods. RenderJSON uses these methods to render its JSON + // representation. These are virtual so that children classes may provide + // their specific mechanism for populating these parts of the channelz + // object. + // + // ChannelNode does not have a notion of connectivity state or child refs, + // so it leaves these implementations blank. + // + // This is utilizing the template method design pattern. + // + // TODO(ncteisen): remove these template methods in favor of manual traversal + // and mutation of the grpc_json object. + virtual void PopulateConnectivityState(grpc_json* json) {} + virtual void PopulateChildRefs(grpc_json* json) {} void MarkChannelDestroyed() { GPR_ASSERT(channel_ != nullptr); @@ -79,47 +146,62 @@ class ChannelNode : public RefCounted<ChannelNode> { bool ChannelIsDestroyed() { return channel_ == nullptr; } - intptr_t channel_uuid() { return channel_uuid_; } - bool is_top_level_channel() { return is_top_level_channel_; } - - protected: - GPRC_ALLOW_CLASS_TO_USE_NON_PUBLIC_DELETE - GPRC_ALLOW_CLASS_TO_USE_NON_PUBLIC_NEW - ChannelNode(grpc_channel* channel, size_t channel_tracer_max_nodes, - bool is_top_level_channel); - virtual ~ChannelNode(); + // proxy methods to composed classes. + void AddTraceEvent(ChannelTrace::Severity severity, grpc_slice data) { + trace_.AddTraceEvent(severity, data); + } + void AddTraceEventWithReference(ChannelTrace::Severity severity, + grpc_slice data, + RefCountedPtr<BaseNode> referenced_channel) { + trace_.AddTraceEventWithReference(severity, data, + std::move(referenced_channel)); + } + void RecordCallStarted() { call_counter_.RecordCallStarted(); } + void RecordCallFailed() { call_counter_.RecordCallFailed(); } + void RecordCallSucceeded() { call_counter_.RecordCallSucceeded(); } private: - // testing peer friend. + // to allow the channel trace test to access trace_. friend class testing::ChannelNodePeer; - grpc_channel* channel_ = nullptr; UniquePtr<char> target_; - gpr_atm calls_started_ = 0; - gpr_atm calls_succeeded_ = 0; - gpr_atm calls_failed_ = 0; - gpr_atm last_call_started_millis_ = 0; - intptr_t channel_uuid_; - bool is_top_level_channel_ = true; - ManualConstructor<ChannelTrace> trace_; + CallCountingHelper call_counter_; + ChannelTrace trace_; }; -// Placeholds channelz class for subchannels. All this can do now is track its -// uuid (this information is needed by the parent channelz class). -// TODO(ncteisen): build this out to support the GetSubchannel channelz request. -class SubchannelNode : public RefCounted<SubchannelNode> { +// Handles channelz bookkeeping for servers +class ServerNode : public BaseNode { public: - SubchannelNode(); - virtual ~SubchannelNode(); + explicit ServerNode(size_t channel_tracer_max_nodes); + ~ServerNode() override; - intptr_t subchannel_uuid() { return subchannel_uuid_; } + grpc_json* RenderJson() override; - protected: - GPRC_ALLOW_CLASS_TO_USE_NON_PUBLIC_DELETE - GPRC_ALLOW_CLASS_TO_USE_NON_PUBLIC_NEW + // proxy methods to composed classes. + void AddTraceEvent(ChannelTrace::Severity severity, grpc_slice data) { + trace_.AddTraceEvent(severity, data); + } + void AddTraceEventWithReference(ChannelTrace::Severity severity, + grpc_slice data, + RefCountedPtr<BaseNode> referenced_channel) { + trace_.AddTraceEventWithReference(severity, data, + std::move(referenced_channel)); + } + void RecordCallStarted() { call_counter_.RecordCallStarted(); } + void RecordCallFailed() { call_counter_.RecordCallFailed(); } + void RecordCallSucceeded() { call_counter_.RecordCallSucceeded(); } private: - intptr_t subchannel_uuid_; + CallCountingHelper call_counter_; + ChannelTrace trace_; +}; + +// Handles channelz bookkeeping for sockets +// TODO(ncteisen): implement in subsequent PR. +class SocketNode : public BaseNode { + public: + SocketNode() : BaseNode(EntityType::kSocket) {} + ~SocketNode() override {} }; // Creation functions diff --git a/src/core/lib/channel/channelz_registry.cc b/src/core/lib/channel/channelz_registry.cc index f79d2f0c17..adc7b6ba44 100644 --- a/src/core/lib/channel/channelz_registry.cc +++ b/src/core/lib/channel/channelz_registry.cc @@ -53,54 +53,46 @@ ChannelzRegistry::ChannelzRegistry() { gpr_mu_init(&mu_); } ChannelzRegistry::~ChannelzRegistry() { gpr_mu_destroy(&mu_); } -intptr_t ChannelzRegistry::InternalRegisterEntry(const RegistryEntry& entry) { +intptr_t ChannelzRegistry::InternalRegister(BaseNode* node) { MutexLock lock(&mu_); - entities_.push_back(entry); + entities_.push_back(node); intptr_t uuid = entities_.size(); return uuid; } -void ChannelzRegistry::InternalUnregisterEntry(intptr_t uuid, EntityType type) { +void ChannelzRegistry::InternalUnregister(intptr_t uuid) { GPR_ASSERT(uuid >= 1); MutexLock lock(&mu_); GPR_ASSERT(static_cast<size_t>(uuid) <= entities_.size()); - GPR_ASSERT(entities_[uuid - 1].type == type); - entities_[uuid - 1].object = nullptr; - entities_[uuid - 1].type = EntityType::kUnset; + entities_[uuid - 1] = nullptr; } -void* ChannelzRegistry::InternalGetEntry(intptr_t uuid, EntityType type) { +BaseNode* ChannelzRegistry::InternalGet(intptr_t uuid) { MutexLock lock(&mu_); if (uuid < 1 || uuid > static_cast<intptr_t>(entities_.size())) { return nullptr; } - if (entities_[uuid - 1].type == type) { - return entities_[uuid - 1].object; - } else { - return nullptr; - } + return entities_[uuid - 1]; } char* ChannelzRegistry::InternalGetTopChannels(intptr_t start_channel_id) { grpc_json* top_level_json = grpc_json_create(GRPC_JSON_OBJECT); grpc_json* json = top_level_json; grpc_json* json_iterator = nullptr; - InlinedVector<ChannelNode*, 10> top_level_channels; + InlinedVector<BaseNode*, 10> top_level_channels; // uuids index into entities one-off (idx 0 is really uuid 1, since 0 is // reserved). However, we want to support requests coming in with // start_channel_id=0, which signifies "give me everything." Hence this // funky looking line below. size_t start_idx = start_channel_id == 0 ? 0 : start_channel_id - 1; for (size_t i = start_idx; i < entities_.size(); ++i) { - if (entities_[i].type == EntityType::kChannelNode) { - ChannelNode* channel_node = - static_cast<ChannelNode*>(entities_[i].object); - if (channel_node->is_top_level_channel()) { - top_level_channels.push_back(channel_node); - } + if (entities_[i] != nullptr && + entities_[i]->type() == + grpc_core::channelz::BaseNode::EntityType::kTopLevelChannel) { + top_level_channels.push_back(entities_[i]); } } - if (top_level_channels.size() > 0) { + if (!top_level_channels.empty()) { // create list of channels grpc_json* array_parent = grpc_json_create_child( nullptr, json, "channel", nullptr, GRPC_JSON_ARRAY, false); @@ -120,6 +112,42 @@ char* ChannelzRegistry::InternalGetTopChannels(intptr_t start_channel_id) { return json_str; } +char* ChannelzRegistry::InternalGetServers(intptr_t start_server_id) { + grpc_json* top_level_json = grpc_json_create(GRPC_JSON_OBJECT); + grpc_json* json = top_level_json; + grpc_json* json_iterator = nullptr; + InlinedVector<BaseNode*, 10> servers; + // uuids index into entities one-off (idx 0 is really uuid 1, since 0 is + // reserved). However, we want to support requests coming in with + // start_server_id=0, which signifies "give me everything." + size_t start_idx = start_server_id == 0 ? 0 : start_server_id - 1; + for (size_t i = start_idx; i < entities_.size(); ++i) { + if (entities_[i] != nullptr && + entities_[i]->type() == + grpc_core::channelz::BaseNode::EntityType::kServer) { + servers.push_back(entities_[i]); + } + } + if (!servers.empty()) { + // create list of servers + grpc_json* array_parent = grpc_json_create_child( + nullptr, json, "server", nullptr, GRPC_JSON_ARRAY, false); + for (size_t i = 0; i < servers.size(); ++i) { + grpc_json* server_json = servers[i]->RenderJson(); + json_iterator = + grpc_json_link_child(array_parent, server_json, json_iterator); + } + } + // For now we do not have any pagination rules. In the future we could + // pick a constant for max_channels_sent for a GetServers request. + // Tracking: https://github.com/grpc/grpc/issues/16019. + json_iterator = grpc_json_create_child(nullptr, json, "end", nullptr, + GRPC_JSON_TRUE, false); + char* json_str = grpc_json_dump_to_string(top_level_json, 0); + grpc_json_destroy(top_level_json); + return json_str; +} + } // namespace channelz } // namespace grpc_core @@ -128,10 +156,18 @@ char* grpc_channelz_get_top_channels(intptr_t start_channel_id) { start_channel_id); } +char* grpc_channelz_get_servers(intptr_t start_server_id) { + return grpc_core::channelz::ChannelzRegistry::GetServers(start_server_id); +} + char* grpc_channelz_get_channel(intptr_t channel_id) { - grpc_core::channelz::ChannelNode* channel_node = - grpc_core::channelz::ChannelzRegistry::GetChannelNode(channel_id); - if (channel_node == nullptr) { + grpc_core::channelz::BaseNode* channel_node = + grpc_core::channelz::ChannelzRegistry::Get(channel_id); + if (channel_node == nullptr || + (channel_node->type() != + grpc_core::channelz::BaseNode::EntityType::kTopLevelChannel && + channel_node->type() != + grpc_core::channelz::BaseNode::EntityType::kInternalChannel)) { return nullptr; } grpc_json* top_level_json = grpc_json_create(GRPC_JSON_OBJECT); @@ -143,3 +179,21 @@ char* grpc_channelz_get_channel(intptr_t channel_id) { grpc_json_destroy(top_level_json); return json_str; } + +char* grpc_channelz_get_subchannel(intptr_t subchannel_id) { + grpc_core::channelz::BaseNode* subchannel_node = + grpc_core::channelz::ChannelzRegistry::Get(subchannel_id); + if (subchannel_node == nullptr || + subchannel_node->type() != + grpc_core::channelz::BaseNode::EntityType::kSubchannel) { + return nullptr; + } + grpc_json* top_level_json = grpc_json_create(GRPC_JSON_OBJECT); + grpc_json* json = top_level_json; + grpc_json* subchannel_json = subchannel_node->RenderJson(); + subchannel_json->key = "subchannel"; + grpc_json_link_child(json, subchannel_json, nullptr); + char* json_str = grpc_json_dump_to_string(top_level_json, 0); + grpc_json_destroy(top_level_json); + return json_str; +} diff --git a/src/core/lib/channel/channelz_registry.h b/src/core/lib/channel/channelz_registry.h index 5d7c936726..d0d660600d 100644 --- a/src/core/lib/channel/channelz_registry.h +++ b/src/core/lib/channel/channelz_registry.h @@ -40,32 +40,11 @@ class ChannelzRegistry { // To be called in grpc_shutdown(); static void Shutdown(); - // Register/Unregister/Get for ChannelNode - static intptr_t RegisterChannelNode(ChannelNode* channel_node) { - RegistryEntry entry(channel_node, EntityType::kChannelNode); - return Default()->InternalRegisterEntry(entry); - } - static void UnregisterChannelNode(intptr_t uuid) { - Default()->InternalUnregisterEntry(uuid, EntityType::kChannelNode); - } - static ChannelNode* GetChannelNode(intptr_t uuid) { - void* gotten = Default()->InternalGetEntry(uuid, EntityType::kChannelNode); - return gotten == nullptr ? nullptr : static_cast<ChannelNode*>(gotten); - } - - // Register/Unregister/Get for SubchannelNode - static intptr_t RegisterSubchannelNode(SubchannelNode* channel_node) { - RegistryEntry entry(channel_node, EntityType::kSubchannelNode); - return Default()->InternalRegisterEntry(entry); - } - static void UnregisterSubchannelNode(intptr_t uuid) { - Default()->InternalUnregisterEntry(uuid, EntityType::kSubchannelNode); - } - static SubchannelNode* GetSubchannelNode(intptr_t uuid) { - void* gotten = - Default()->InternalGetEntry(uuid, EntityType::kSubchannelNode); - return gotten == nullptr ? nullptr : static_cast<SubchannelNode*>(gotten); + static intptr_t Register(BaseNode* node) { + return Default()->InternalRegister(node); } + static void Unregister(intptr_t uuid) { Default()->InternalUnregister(uuid); } + static BaseNode* Get(intptr_t uuid) { return Default()->InternalGet(uuid); } // Returns the allocated JSON string that represents the proto // GetTopChannelsResponse as per channelz.proto. @@ -73,20 +52,13 @@ class ChannelzRegistry { return Default()->InternalGetTopChannels(start_channel_id); } - private: - enum class EntityType { - kChannelNode, - kSubchannelNode, - kUnset, - }; - - struct RegistryEntry { - RegistryEntry(void* object_in, EntityType type_in) - : object(object_in), type(type_in) {} - void* object; - EntityType type; - }; + // Returns the allocated JSON string that represents the proto + // GetServersResponse as per channelz.proto. + static char* GetServers(intptr_t start_server_id) { + return Default()->InternalGetServers(start_server_id); + } + private: GPRC_ALLOW_CLASS_TO_USE_NON_PUBLIC_NEW GPRC_ALLOW_CLASS_TO_USE_NON_PUBLIC_DELETE @@ -97,21 +69,22 @@ class ChannelzRegistry { static ChannelzRegistry* Default(); // globally registers an Entry. Returns its unique uuid - intptr_t InternalRegisterEntry(const RegistryEntry& entry); + intptr_t InternalRegister(BaseNode* node); // globally unregisters the object that is associated to uuid. Also does // sanity check that an object doesn't try to unregister the wrong type. - void InternalUnregisterEntry(intptr_t uuid, EntityType type); + void InternalUnregister(intptr_t uuid); // if object with uuid has previously been registered as the correct type, // returns the void* associated with that uuid. Else returns nullptr. - void* InternalGetEntry(intptr_t uuid, EntityType type); + BaseNode* InternalGet(intptr_t uuid); char* InternalGetTopChannels(intptr_t start_channel_id); + char* InternalGetServers(intptr_t start_server_id); // protects entities_ and uuid_ gpr_mu mu_; - InlinedVector<RegistryEntry, 20> entities_; + InlinedVector<BaseNode*, 20> entities_; }; } // namespace channelz diff --git a/src/core/lib/http/httpcli.cc b/src/core/lib/http/httpcli.cc index 12060074c5..3bd7a2ce59 100644 --- a/src/core/lib/http/httpcli.cc +++ b/src/core/lib/http/httpcli.cc @@ -163,7 +163,7 @@ static void done_write(void* arg, grpc_error* error) { static void start_write(internal_request* req) { grpc_slice_ref_internal(req->request_text); grpc_slice_buffer_add(&req->outgoing, req->request_text); - grpc_endpoint_write(req->ep, &req->outgoing, &req->done_write); + grpc_endpoint_write(req->ep, &req->outgoing, &req->done_write, nullptr); } static void on_handshake_done(void* arg, grpc_endpoint* ep) { diff --git a/src/core/lib/iomgr/buffer_list.cc b/src/core/lib/iomgr/buffer_list.cc new file mode 100644 index 0000000000..6ada23db1c --- /dev/null +++ b/src/core/lib/iomgr/buffer_list.cc @@ -0,0 +1,134 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include <grpc/support/port_platform.h> + +#include "src/core/lib/iomgr/buffer_list.h" +#include "src/core/lib/iomgr/port.h" + +#include <grpc/support/log.h> + +#ifdef GRPC_LINUX_ERRQUEUE +#include <time.h> + +#include "src/core/lib/gprpp/memory.h" + +namespace grpc_core { +void TracedBuffer::AddNewEntry(TracedBuffer** head, uint32_t seq_no, + void* arg) { + GPR_DEBUG_ASSERT(head != nullptr); + TracedBuffer* new_elem = New<TracedBuffer>(seq_no, arg); + /* Store the current time as the sendmsg time. */ + new_elem->ts_.sendmsg_time = gpr_now(GPR_CLOCK_REALTIME); + if (*head == nullptr) { + *head = new_elem; + return; + } + /* Append at the end. */ + TracedBuffer* ptr = *head; + while (ptr->next_ != nullptr) { + ptr = ptr->next_; + } + ptr->next_ = new_elem; +} + +namespace { +/** Fills gpr_timespec gts based on values from timespec ts */ +void fill_gpr_from_timestamp(gpr_timespec* gts, const struct timespec* ts) { + gts->tv_sec = ts->tv_sec; + gts->tv_nsec = static_cast<int32_t>(ts->tv_nsec); + gts->clock_type = GPR_CLOCK_REALTIME; +} + +/** The saved callback function that will be invoked when we get all the + * timestamps that we are going to get for a TracedBuffer. */ +void (*timestamps_callback)(void*, grpc_core::Timestamps*, + grpc_error* shutdown_err); +} /* namespace */ + +void TracedBuffer::ProcessTimestamp(TracedBuffer** head, + struct sock_extended_err* serr, + struct scm_timestamping* tss) { + GPR_DEBUG_ASSERT(head != nullptr); + TracedBuffer* elem = *head; + TracedBuffer* next = nullptr; + while (elem != nullptr) { + /* The byte number refers to the sequence number of the last byte which this + * timestamp relates to. */ + if (serr->ee_data >= elem->seq_no_) { + switch (serr->ee_info) { + case SCM_TSTAMP_SCHED: + fill_gpr_from_timestamp(&(elem->ts_.scheduled_time), &(tss->ts[0])); + elem = elem->next_; + break; + case SCM_TSTAMP_SND: + fill_gpr_from_timestamp(&(elem->ts_.sent_time), &(tss->ts[0])); + elem = elem->next_; + break; + case SCM_TSTAMP_ACK: + fill_gpr_from_timestamp(&(elem->ts_.acked_time), &(tss->ts[0])); + /* Got all timestamps. Do the callback and free this TracedBuffer. + * The thing below can be passed by value if we don't want the + * restriction on the lifetime. */ + timestamps_callback(elem->arg_, &(elem->ts_), GRPC_ERROR_NONE); + next = elem->next_; + Delete<TracedBuffer>(elem); + *head = elem = next; + break; + default: + abort(); + } + } else { + break; + } + } +} + +void TracedBuffer::Shutdown(TracedBuffer** head, grpc_error* shutdown_err) { + GPR_DEBUG_ASSERT(head != nullptr); + TracedBuffer* elem = *head; + while (elem != nullptr) { + if (timestamps_callback) { + timestamps_callback(elem->arg_, &(elem->ts_), shutdown_err); + } + auto* next = elem->next_; + Delete<TracedBuffer>(elem); + elem = next; + } + *head = nullptr; + GRPC_ERROR_UNREF(shutdown_err); +} + +void grpc_tcp_set_write_timestamps_callback(void (*fn)(void*, + grpc_core::Timestamps*, + grpc_error* error)) { + timestamps_callback = fn; +} +} /* namespace grpc_core */ + +#else /* GRPC_LINUX_ERRQUEUE */ + +namespace grpc_core { +void grpc_tcp_set_write_timestamps_callback(void (*fn)(void*, + grpc_core::Timestamps*, + grpc_error* error)) { + gpr_log(GPR_DEBUG, "Timestamps callback is not enabled for this platform"); +} +} /* namespace grpc_core */ + +#endif /* GRPC_LINUX_ERRQUEUE */ diff --git a/src/core/lib/iomgr/buffer_list.h b/src/core/lib/iomgr/buffer_list.h new file mode 100644 index 0000000000..cbbf50a657 --- /dev/null +++ b/src/core/lib/iomgr/buffer_list.h @@ -0,0 +1,96 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#ifndef GRPC_CORE_LIB_IOMGR_BUFFER_LIST_H +#define GRPC_CORE_LIB_IOMGR_BUFFER_LIST_H + +#include <grpc/support/port_platform.h> + +#include "src/core/lib/iomgr/port.h" + +#include <grpc/support/time.h> + +#include "src/core/lib/gprpp/memory.h" +#include "src/core/lib/iomgr/error.h" +#include "src/core/lib/iomgr/internal_errqueue.h" + +namespace grpc_core { +struct Timestamps { + /* TODO(yashykt): This would also need to store OPTSTAT once support is added + */ + gpr_timespec sendmsg_time; + gpr_timespec scheduled_time; + gpr_timespec sent_time; + gpr_timespec acked_time; +}; + +/** TracedBuffer is a class to keep track of timestamps for a specific buffer in + * the TCP layer. We are only tracking timestamps for Linux kernels and hence + * this class would only be used by Linux platforms. For all other platforms, + * TracedBuffer would be an empty class. + * + * The timestamps collected are according to grpc_core::Timestamps declared + * above. + * + * A TracedBuffer list is kept track of using the head element of the list. If + * the head element of the list is nullptr, then the list is empty. + */ +#ifdef GRPC_LINUX_ERRQUEUE +class TracedBuffer { + public: + /** Add a new entry in the TracedBuffer list pointed to by head. Also saves + * sendmsg_time with the current timestamp. */ + static void AddNewEntry(grpc_core::TracedBuffer** head, uint32_t seq_no, + void* arg); + + /** Processes a received timestamp based on sock_extended_err and + * scm_timestamping structures. It will invoke the timestamps callback if the + * timestamp type is SCM_TSTAMP_ACK. */ + static void ProcessTimestamp(grpc_core::TracedBuffer** head, + struct sock_extended_err* serr, + struct scm_timestamping* tss); + + /** Cleans the list by calling the callback for each traced buffer in the list + * with timestamps that it has. */ + static void Shutdown(grpc_core::TracedBuffer** head, + grpc_error* shutdown_err); + + private: + GPRC_ALLOW_CLASS_TO_USE_NON_PUBLIC_NEW + + TracedBuffer(int seq_no, void* arg) + : seq_no_(seq_no), arg_(arg), next_(nullptr) {} + + uint32_t seq_no_; /* The sequence number for the last byte in the buffer */ + void* arg_; /* The arg to pass to timestamps_callback */ + grpc_core::Timestamps ts_; /* The timestamps corresponding to this buffer */ + grpc_core::TracedBuffer* next_; /* The next TracedBuffer in the list */ +}; +#else /* GRPC_LINUX_ERRQUEUE */ +class TracedBuffer {}; +#endif /* GRPC_LINUX_ERRQUEUE */ + +/** Sets the callback function to call when timestamps for a write are + * collected. The callback does not own a reference to error. */ +void grpc_tcp_set_write_timestamps_callback(void (*fn)(void*, + grpc_core::Timestamps*, + grpc_error* error)); + +}; /* namespace grpc_core */ + +#endif /* GRPC_CORE_LIB_IOMGR_BUFFER_LIST_H */ diff --git a/src/core/lib/iomgr/endpoint.cc b/src/core/lib/iomgr/endpoint.cc index 92e7930111..44fb47e19d 100644 --- a/src/core/lib/iomgr/endpoint.cc +++ b/src/core/lib/iomgr/endpoint.cc @@ -28,8 +28,8 @@ void grpc_endpoint_read(grpc_endpoint* ep, grpc_slice_buffer* slices, } void grpc_endpoint_write(grpc_endpoint* ep, grpc_slice_buffer* slices, - grpc_closure* cb) { - ep->vtable->write(ep, slices, cb); + grpc_closure* cb, void* arg) { + ep->vtable->write(ep, slices, cb, arg); } void grpc_endpoint_add_to_pollset(grpc_endpoint* ep, grpc_pollset* pollset) { diff --git a/src/core/lib/iomgr/endpoint.h b/src/core/lib/iomgr/endpoint.h index 15db1649fa..1f590a80ca 100644 --- a/src/core/lib/iomgr/endpoint.h +++ b/src/core/lib/iomgr/endpoint.h @@ -33,10 +33,12 @@ typedef struct grpc_endpoint grpc_endpoint; typedef struct grpc_endpoint_vtable grpc_endpoint_vtable; +class Timestamps; struct grpc_endpoint_vtable { void (*read)(grpc_endpoint* ep, grpc_slice_buffer* slices, grpc_closure* cb); - void (*write)(grpc_endpoint* ep, grpc_slice_buffer* slices, grpc_closure* cb); + void (*write)(grpc_endpoint* ep, grpc_slice_buffer* slices, grpc_closure* cb, + void* arg); void (*add_to_pollset)(grpc_endpoint* ep, grpc_pollset* pollset); void (*add_to_pollset_set)(grpc_endpoint* ep, grpc_pollset_set* pollset); void (*delete_from_pollset_set)(grpc_endpoint* ep, grpc_pollset_set* pollset); @@ -70,9 +72,11 @@ int grpc_endpoint_get_fd(grpc_endpoint* ep); \a slices may be mutated at will by the endpoint until cb is called. No guarantee is made to the content of slices after a write EXCEPT that it is a valid slice buffer. + \a arg is platform specific. It is currently only used by TCP on linux + platforms as an argument that would be forwarded to the timestamps callback. */ void grpc_endpoint_write(grpc_endpoint* ep, grpc_slice_buffer* slices, - grpc_closure* cb); + grpc_closure* cb, void* arg); /* Causes any pending and future read/write callbacks to run immediately with success==0 */ diff --git a/src/core/lib/iomgr/endpoint_cfstream.cc b/src/core/lib/iomgr/endpoint_cfstream.cc index c3bc0cc8fd..df2cf508c8 100644 --- a/src/core/lib/iomgr/endpoint_cfstream.cc +++ b/src/core/lib/iomgr/endpoint_cfstream.cc @@ -268,7 +268,7 @@ static void CFStreamRead(grpc_endpoint* ep, grpc_slice_buffer* slices, } static void CFStreamWrite(grpc_endpoint* ep, grpc_slice_buffer* slices, - grpc_closure* cb) { + grpc_closure* cb, void* arg) { CFStreamEndpoint* ep_impl = reinterpret_cast<CFStreamEndpoint*>(ep); if (grpc_tcp_trace.enabled()) { gpr_log(GPR_DEBUG, "CFStream endpoint:%p write (%p, %p) length:%zu", diff --git a/src/core/lib/iomgr/endpoint_pair_posix.cc b/src/core/lib/iomgr/endpoint_pair_posix.cc index 5c5c246f99..3afbfd7254 100644 --- a/src/core/lib/iomgr/endpoint_pair_posix.cc +++ b/src/core/lib/iomgr/endpoint_pair_posix.cc @@ -59,11 +59,11 @@ grpc_endpoint_pair grpc_iomgr_create_endpoint_pair(const char* name, grpc_core::ExecCtx exec_ctx; gpr_asprintf(&final_name, "%s:client", name); - p.client = grpc_tcp_create(grpc_fd_create(sv[1], final_name, false), args, + p.client = grpc_tcp_create(grpc_fd_create(sv[1], final_name, true), args, "socketpair-server"); gpr_free(final_name); gpr_asprintf(&final_name, "%s:server", name); - p.server = grpc_tcp_create(grpc_fd_create(sv[0], final_name, false), args, + p.server = grpc_tcp_create(grpc_fd_create(sv[0], final_name, true), args, "socketpair-client"); gpr_free(final_name); diff --git a/src/core/lib/iomgr/error.cc b/src/core/lib/iomgr/error.cc index 90ed34da11..13bc69ffb6 100644 --- a/src/core/lib/iomgr/error.cc +++ b/src/core/lib/iomgr/error.cc @@ -513,9 +513,24 @@ bool grpc_error_get_str(grpc_error* err, grpc_error_strs which, grpc_error* grpc_error_add_child(grpc_error* src, grpc_error* child) { GPR_TIMER_SCOPE("grpc_error_add_child", 0); - grpc_error* new_err = copy_error_and_unref(src); - internal_add_error(&new_err, child); - return new_err; + if (src != GRPC_ERROR_NONE) { + if (child == GRPC_ERROR_NONE) { + /* \a child is empty. Simply return the ref to \a src */ + return src; + } else if (child != src) { + grpc_error* new_err = copy_error_and_unref(src); + internal_add_error(&new_err, child); + return new_err; + } else { + /* \a src and \a child are the same. Drop one of the references and return + * the other */ + GRPC_ERROR_UNREF(child); + return src; + } + } else { + /* \a src is empty. Simply return the ref to \a child */ + return child; + } } static const char* no_error_string = "\"No Error\""; diff --git a/src/core/lib/iomgr/error.h b/src/core/lib/iomgr/error.h index 27c4d22fd1..49f4029bc2 100644 --- a/src/core/lib/iomgr/error.h +++ b/src/core/lib/iomgr/error.h @@ -185,8 +185,16 @@ bool grpc_error_get_str(grpc_error* error, grpc_error_strs which, /// error occurring. Allows root causing high level errors from lower level /// errors that contributed to them. The src error takes ownership of the /// child error. +/// +/// Edge Conditions - +/// 1) If either of \a src or \a child is GRPC_ERROR_NONE, returns a reference +/// to the other argument. 2) If both \a src and \a child are GRPC_ERROR_NONE, +/// returns GRPC_ERROR_NONE. 3) If \a src and \a child point to the same error, +/// returns a single reference. (Note that, 2 references should have been +/// received to the error in this case.) grpc_error* grpc_error_add_child(grpc_error* src, grpc_error* child) GRPC_MUST_USE_RESULT; + grpc_error* grpc_os_error(const char* file, int line, int err, const char* call_name) GRPC_MUST_USE_RESULT; diff --git a/src/core/lib/iomgr/ev_posix.cc b/src/core/lib/iomgr/ev_posix.cc index 0205363d5c..d4377e2d50 100644 --- a/src/core/lib/iomgr/ev_posix.cc +++ b/src/core/lib/iomgr/ev_posix.cc @@ -237,14 +237,19 @@ void grpc_event_engine_shutdown(void) { } bool grpc_event_engine_can_track_errors(void) { +/* Only track errors if platform supports errqueue. */ +#ifdef GRPC_LINUX_ERRQUEUE return g_event_engine->can_track_err; +#else + return false; +#endif /* GRPC_LINUX_ERRQUEUE */ } grpc_fd* grpc_fd_create(int fd, const char* name, bool track_err) { GRPC_POLLING_API_TRACE("fd_create(%d, %s, %d)", fd, name, track_err); GRPC_FD_TRACE("fd_create(%d, %s, %d)", fd, name, track_err); - GPR_DEBUG_ASSERT(!track_err || g_event_engine->can_track_err); - return g_event_engine->fd_create(fd, name, track_err); + return g_event_engine->fd_create(fd, name, + track_err && g_event_engine->can_track_err); } int grpc_fd_wrapped_fd(grpc_fd* fd) { diff --git a/src/core/lib/iomgr/internal_errqueue.cc b/src/core/lib/iomgr/internal_errqueue.cc new file mode 100644 index 0000000000..99c22e9055 --- /dev/null +++ b/src/core/lib/iomgr/internal_errqueue.cc @@ -0,0 +1,36 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include <grpc/support/port_platform.h> + +#include "src/core/lib/iomgr/port.h" + +#include "src/core/lib/iomgr/internal_errqueue.h" + +#ifdef GRPC_POSIX_SOCKET_TCP + +bool kernel_supports_errqueue() { +#ifdef LINUX_VERSION_CODE +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0) + return true; +#endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(4, 0, 0) */ +#endif /* LINUX_VERSION_CODE */ + return false; +} + +#endif /* GRPC_POSIX_SOCKET_TCP */ diff --git a/src/core/lib/iomgr/internal_errqueue.h b/src/core/lib/iomgr/internal_errqueue.h new file mode 100644 index 0000000000..9d122808f9 --- /dev/null +++ b/src/core/lib/iomgr/internal_errqueue.h @@ -0,0 +1,83 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +/* This file contains constants defined in <linux/errqueue.h> and + * <linux/net_tstamp.h> so as to allow collecting network timestamps in the + * kernel. This file allows tcp_posix.cc to compile on platforms that do not + * have <linux/errqueue.h> and <linux/net_tstamp.h>. + */ + +#ifndef GRPC_CORE_LIB_IOMGR_INTERNAL_ERRQUEUE_H +#define GRPC_CORE_LIB_IOMGR_INTERNAL_ERRQUEUE_H + +#include <grpc/support/port_platform.h> + +#include "src/core/lib/iomgr/port.h" + +#ifdef GRPC_POSIX_SOCKET_TCP + +#include <sys/types.h> +#include <time.h> + +#ifdef GRPC_LINUX_ERRQUEUE +#include <linux/errqueue.h> +#include <linux/net_tstamp.h> +#include <sys/socket.h> +#endif /* GRPC_LINUX_ERRQUEUE */ + +namespace grpc_core { + +#ifdef GRPC_LINUX_ERRQUEUE + +/* Redefining scm_timestamping in the same way that <linux/errqueue.h> defines + * it, so that code compiles on systems that don't have it. */ +struct scm_timestamping { + struct timespec ts[3]; +}; +/* Also redefine timestamp types */ +/* The timestamp type for when the driver passed skb to NIC, or HW. */ +constexpr int SCM_TSTAMP_SND = 0; +/* The timestamp type for when data entered the packet scheduler. */ +constexpr int SCM_TSTAMP_SCHED = 1; +/* The timestamp type for when data acknowledged by peer. */ +constexpr int SCM_TSTAMP_ACK = 2; +/* Redefine required constants from <linux/net_tstamp.h> */ +constexpr uint32_t SOF_TIMESTAMPING_TX_SOFTWARE = 1u << 1; +constexpr uint32_t SOF_TIMESTAMPING_SOFTWARE = 1u << 4; +constexpr uint32_t SOF_TIMESTAMPING_OPT_ID = 1u << 7; +constexpr uint32_t SOF_TIMESTAMPING_TX_SCHED = 1u << 8; +constexpr uint32_t SOF_TIMESTAMPING_TX_ACK = 1u << 9; +constexpr uint32_t SOF_TIMESTAMPING_OPT_TSONLY = 1u << 11; + +constexpr uint32_t kTimestampingSocketOptions = SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID | + SOF_TIMESTAMPING_OPT_TSONLY; +constexpr uint32_t kTimestampingRecordingOptions = + SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK; +#endif /* GRPC_LINUX_ERRQUEUE */ + +/* Returns true if kernel is capable of supporting errqueue and timestamping. + * Currently allowing only linux kernels above 4.0.0 + */ +bool kernel_supports_errqueue(); +} // namespace grpc_core + +#endif /* GRPC_POSIX_SOCKET_TCP */ + +#endif /* GRPC_CORE_LIB_IOMGR_INTERNAL_ERRQUEUE_H */ diff --git a/src/core/lib/iomgr/port.h b/src/core/lib/iomgr/port.h index 066417b93c..abf96662f5 100644 --- a/src/core/lib/iomgr/port.h +++ b/src/core/lib/iomgr/port.h @@ -60,6 +60,11 @@ #define GRPC_HAVE_IP_PKTINFO 1 #define GRPC_HAVE_MSG_NOSIGNAL 1 #define GRPC_HAVE_UNIX_SOCKET 1 +#ifdef LINUX_VERSION_CODE +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0) +#define GRPC_LINUX_ERRQUEUE 1 +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0) */ +#endif /* LINUX_VERSION_CODE */ #define GRPC_LINUX_MULTIPOLL_WITH_EPOLL 1 #define GRPC_POSIX_FORK 1 #define GRPC_POSIX_HOST_NAME_MAX 1 diff --git a/src/core/lib/iomgr/tcp_client_posix.cc b/src/core/lib/iomgr/tcp_client_posix.cc index 296ee74311..9c989b7dfe 100644 --- a/src/core/lib/iomgr/tcp_client_posix.cc +++ b/src/core/lib/iomgr/tcp_client_posix.cc @@ -279,7 +279,7 @@ grpc_error* grpc_tcp_client_prepare_fd(const grpc_channel_args* channel_args, } addr_str = grpc_sockaddr_to_uri(mapped_addr); gpr_asprintf(&name, "tcp-client:%s", addr_str); - *fdobj = grpc_fd_create(fd, name, false); + *fdobj = grpc_fd_create(fd, name, true); gpr_free(name); gpr_free(addr_str); return GRPC_ERROR_NONE; diff --git a/src/core/lib/iomgr/tcp_custom.cc b/src/core/lib/iomgr/tcp_custom.cc index 990e8d632b..e02a1898f2 100644 --- a/src/core/lib/iomgr/tcp_custom.cc +++ b/src/core/lib/iomgr/tcp_custom.cc @@ -221,7 +221,7 @@ static void custom_write_callback(grpc_custom_socket* socket, } static void endpoint_write(grpc_endpoint* ep, grpc_slice_buffer* write_slices, - grpc_closure* cb) { + grpc_closure* cb, void* arg) { custom_tcp_endpoint* tcp = (custom_tcp_endpoint*)ep; GRPC_CUSTOM_IOMGR_ASSERT_SAME_THREAD(); diff --git a/src/core/lib/iomgr/tcp_posix.cc b/src/core/lib/iomgr/tcp_posix.cc index b53ffbf01c..ac1e919acb 100644 --- a/src/core/lib/iomgr/tcp_posix.cc +++ b/src/core/lib/iomgr/tcp_posix.cc @@ -27,7 +27,9 @@ #include <errno.h> #include <limits.h> +#include <netinet/in.h> #include <stdbool.h> +#include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/socket.h> @@ -46,6 +48,7 @@ #include "src/core/lib/debug/trace.h" #include "src/core/lib/gpr/string.h" #include "src/core/lib/gpr/useful.h" +#include "src/core/lib/iomgr/buffer_list.h" #include "src/core/lib/iomgr/ev_posix.h" #include "src/core/lib/iomgr/executor.h" #include "src/core/lib/profiling/timers.h" @@ -97,17 +100,42 @@ struct grpc_tcp { grpc_closure read_done_closure; grpc_closure write_done_closure; + grpc_closure error_closure; char* peer_string; grpc_resource_user* resource_user; grpc_resource_user_slice_allocator slice_allocator; + + grpc_core::TracedBuffer* tb_head; /* List of traced buffers */ + gpr_mu tb_mu; /* Lock for access to list of traced buffers */ + + /* grpc_endpoint_write takes an argument which if non-null means that the + * transport layer wants the TCP layer to collect timestamps for this write. + * This arg is forwarded to the timestamps callback function when the ACK + * timestamp is received from the kernel. This arg is a (void *) which allows + * users of this API to pass in a pointer to any kind of structure. This + * structure could actually be a tag or any book-keeping object that the user + * can use to distinguish between different traced writes. The only + * requirement from the TCP endpoint layer is that this arg should be non-null + * if the user wants timestamps for the write. */ + void* outgoing_buffer_arg; + /* A counter which starts at 0. It is initialized the first time the socket + * options for collecting timestamps are set, and is incremented with each + * byte sent. */ + int bytes_counter; + bool socket_ts_enabled; /* True if timestamping options are set on the socket + */ + gpr_atm + stop_error_notification; /* Set to 1 if we do not want to be notified on + errors anymore */ }; struct backup_poller { gpr_mu* pollset_mu; grpc_closure run_poller; }; + } // namespace #define BACKUP_POLLER_POLLSET(b) ((grpc_pollset*)((b) + 1)) @@ -302,6 +330,7 @@ static void tcp_free(grpc_tcp* tcp) { grpc_slice_buffer_destroy_internal(&tcp->last_read_buffer); grpc_resource_user_unref(tcp->resource_user); gpr_free(tcp->peer_string); + gpr_mu_destroy(&tcp->tb_mu); gpr_free(tcp); } @@ -347,6 +376,10 @@ static void tcp_destroy(grpc_endpoint* ep) { grpc_network_status_unregister_endpoint(ep); grpc_tcp* tcp = reinterpret_cast<grpc_tcp*>(ep); grpc_slice_buffer_reset_and_unref_internal(&tcp->last_read_buffer); + if (grpc_event_engine_can_track_errors()) { + gpr_atm_no_barrier_store(&tcp->stop_error_notification, true); + grpc_fd_set_error(tcp->em_fd); + } TCP_UNREF(tcp, "destroy"); } @@ -513,6 +546,235 @@ static void tcp_read(grpc_endpoint* ep, grpc_slice_buffer* incoming_buffer, } } +/* A wrapper around sendmsg. It sends \a msg over \a fd and returns the number + * of bytes sent. */ +ssize_t tcp_send(int fd, const struct msghdr* msg) { + GPR_TIMER_SCOPE("sendmsg", 1); + ssize_t sent_length; + do { + /* TODO(klempner): Cork if this is a partial write */ + GRPC_STATS_INC_SYSCALL_WRITE(); + sent_length = sendmsg(fd, msg, SENDMSG_FLAGS); + } while (sent_length < 0 && errno == EINTR); + return sent_length; +} + +/** This is to be called if outgoing_buffer_arg is not null. On linux platforms, + * this will call sendmsg with socket options set to collect timestamps inside + * the kernel. On return, sent_length is set to the return value of the sendmsg + * call. Returns false if setting the socket options failed. This is not + * implemented for non-linux platforms currently, and crashes out. + */ +static bool tcp_write_with_timestamps(grpc_tcp* tcp, struct msghdr* msg, + size_t sending_length, + ssize_t* sent_length, grpc_error** error); + +/** The callback function to be invoked when we get an error on the socket. */ +static void tcp_handle_error(void* arg /* grpc_tcp */, grpc_error* error); + +#ifdef GRPC_LINUX_ERRQUEUE +static bool tcp_write_with_timestamps(grpc_tcp* tcp, struct msghdr* msg, + size_t sending_length, + ssize_t* sent_length, + grpc_error** error) { + if (!tcp->socket_ts_enabled) { + uint32_t opt = grpc_core::kTimestampingSocketOptions; + if (setsockopt(tcp->fd, SOL_SOCKET, SO_TIMESTAMPING, + static_cast<void*>(&opt), sizeof(opt)) != 0) { + *error = tcp_annotate_error(GRPC_OS_ERROR(errno, "setsockopt"), tcp); + grpc_slice_buffer_reset_and_unref_internal(tcp->outgoing_buffer); + if (grpc_tcp_trace.enabled()) { + gpr_log(GPR_ERROR, "Failed to set timestamping options on the socket."); + } + return false; + } + tcp->bytes_counter = -1; + tcp->socket_ts_enabled = true; + } + /* Set control message to indicate that you want timestamps. */ + union { + char cmsg_buf[CMSG_SPACE(sizeof(uint32_t))]; + struct cmsghdr align; + } u; + cmsghdr* cmsg = reinterpret_cast<cmsghdr*>(u.cmsg_buf); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SO_TIMESTAMPING; + cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); + *reinterpret_cast<int*>(CMSG_DATA(cmsg)) = + grpc_core::kTimestampingRecordingOptions; + msg->msg_control = u.cmsg_buf; + msg->msg_controllen = CMSG_SPACE(sizeof(uint32_t)); + + /* If there was an error on sendmsg the logic in tcp_flush will handle it. */ + ssize_t length = tcp_send(tcp->fd, msg); + *sent_length = length; + /* Only save timestamps if all the bytes were taken by sendmsg. */ + if (sending_length == static_cast<size_t>(length)) { + gpr_mu_lock(&tcp->tb_mu); + grpc_core::TracedBuffer::AddNewEntry( + &tcp->tb_head, static_cast<int>(tcp->bytes_counter + length), + tcp->outgoing_buffer_arg); + gpr_mu_unlock(&tcp->tb_mu); + tcp->outgoing_buffer_arg = nullptr; + } + return true; +} + +/** Reads \a cmsg to derive timestamps from the control messages. If a valid + * timestamp is found, the traced buffer list is updated with this timestamp. + * The caller of this function should be looping on the control messages found + * in \a msg. \a cmsg should point to the control message that the caller wants + * processed. + * On return, a pointer to a control message is returned. On the next iteration, + * CMSG_NXTHDR(msg, ret_val) should be passed as \a cmsg. */ +struct cmsghdr* process_timestamp(grpc_tcp* tcp, msghdr* msg, + struct cmsghdr* cmsg) { + auto next_cmsg = CMSG_NXTHDR(msg, cmsg); + if (next_cmsg == nullptr) { + if (grpc_tcp_trace.enabled()) { + gpr_log(GPR_ERROR, "Received timestamp without extended error"); + } + return cmsg; + } + + if (!(next_cmsg->cmsg_level == SOL_IP || next_cmsg->cmsg_level == SOL_IPV6) || + !(next_cmsg->cmsg_type == IP_RECVERR || + next_cmsg->cmsg_type == IPV6_RECVERR)) { + if (grpc_tcp_trace.enabled()) { + gpr_log(GPR_ERROR, "Unexpected control message"); + } + return cmsg; + } + + auto tss = + reinterpret_cast<struct grpc_core::scm_timestamping*>(CMSG_DATA(cmsg)); + auto serr = reinterpret_cast<struct sock_extended_err*>(CMSG_DATA(next_cmsg)); + if (serr->ee_errno != ENOMSG || + serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) { + gpr_log(GPR_ERROR, "Unexpected control message"); + return cmsg; + } + /* The error handling can potentially be done on another thread so we need + * to protect the traced buffer list. A lock free list might be better. Using + * a simple mutex for now. */ + gpr_mu_lock(&tcp->tb_mu); + grpc_core::TracedBuffer::ProcessTimestamp(&tcp->tb_head, serr, tss); + gpr_mu_unlock(&tcp->tb_mu); + return next_cmsg; +} + +/** For linux platforms, reads the socket's error queue and processes error + * messages from the queue. Returns true if all the errors processed were + * timestamps. Returns false if any of the errors were not timestamps. For + * non-linux platforms, error processing is not used/enabled currently. + */ +static bool process_errors(grpc_tcp* tcp) { + while (true) { + struct iovec iov; + iov.iov_base = nullptr; + iov.iov_len = 0; + struct msghdr msg; + msg.msg_name = nullptr; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 0; + msg.msg_flags = 0; + + union { + char rbuf[1024 /*CMSG_SPACE(sizeof(scm_timestamping)) + + CMSG_SPACE(sizeof(sock_extended_err) + sizeof(sockaddr_in))*/]; + struct cmsghdr align; + } aligned_buf; + memset(&aligned_buf, 0, sizeof(aligned_buf)); + + msg.msg_control = aligned_buf.rbuf; + msg.msg_controllen = sizeof(aligned_buf.rbuf); + + int r, saved_errno; + do { + r = recvmsg(tcp->fd, &msg, MSG_ERRQUEUE); + saved_errno = errno; + } while (r < 0 && saved_errno == EINTR); + + if (r == -1 && saved_errno == EAGAIN) { + return true; /* No more errors to process */ + } + if (r == -1) { + return false; + } + if (grpc_tcp_trace.enabled()) { + if ((msg.msg_flags & MSG_CTRUNC) == 1) { + gpr_log(GPR_INFO, "Error message was truncated."); + } + } + + if (msg.msg_controllen == 0) { + /* There was no control message found. It was probably spurious. */ + return true; + } + for (auto cmsg = CMSG_FIRSTHDR(&msg); cmsg && cmsg->cmsg_len; + cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level != SOL_SOCKET || + cmsg->cmsg_type != SCM_TIMESTAMPING) { + /* Got a control message that is not a timestamp. Don't know how to + * handle this. */ + if (grpc_tcp_trace.enabled()) { + gpr_log(GPR_INFO, + "unknown control message cmsg_level:%d cmsg_type:%d", + cmsg->cmsg_level, cmsg->cmsg_type); + } + return false; + } + process_timestamp(tcp, &msg, cmsg); + } + } +} + +static void tcp_handle_error(void* arg /* grpc_tcp */, grpc_error* error) { + grpc_tcp* tcp = static_cast<grpc_tcp*>(arg); + if (grpc_tcp_trace.enabled()) { + gpr_log(GPR_INFO, "TCP:%p got_error: %s", tcp, grpc_error_string(error)); + } + + if (error != GRPC_ERROR_NONE || + static_cast<bool>(gpr_atm_acq_load(&tcp->stop_error_notification))) { + /* We aren't going to register to hear on error anymore, so it is safe to + * unref. */ + grpc_core::TracedBuffer::Shutdown(&tcp->tb_head, GRPC_ERROR_REF(error)); + TCP_UNREF(tcp, "error-tracking"); + return; + } + + /* We are still interested in collecting timestamps, so let's try reading + * them. */ + if (!process_errors(tcp)) { + /* This was not a timestamps error. This was an actual error. Set the + * read and write closures to be ready. + */ + grpc_fd_set_readable(tcp->em_fd); + grpc_fd_set_writable(tcp->em_fd); + } + GRPC_CLOSURE_INIT(&tcp->error_closure, tcp_handle_error, tcp, + grpc_schedule_on_exec_ctx); + grpc_fd_notify_on_error(tcp->em_fd, &tcp->error_closure); +} + +#else /* GRPC_LINUX_ERRQUEUE */ +static bool tcp_write_with_timestamps(grpc_tcp* tcp, struct msghdr* msg, + size_t sending_length, + ssize_t* sent_length, + grpc_error** error) { + gpr_log(GPR_ERROR, "Write with timestamps not supported for this platform"); + GPR_ASSERT(0); + return false; +} + +static void tcp_handle_error(void* arg /* grpc_tcp */, grpc_error* error) { + gpr_log(GPR_ERROR, "Error handling is not supported for this platform"); + GPR_ASSERT(0); +} +#endif /* GRPC_LINUX_ERRQUEUE */ + /* returns true if done, false if pending; if returning true, *error is set */ #if defined(IOV_MAX) && IOV_MAX < 1000 #define MAX_WRITE_IOVEC IOV_MAX @@ -557,19 +819,20 @@ static bool tcp_flush(grpc_tcp* tcp, grpc_error** error) { msg.msg_namelen = 0; msg.msg_iov = iov; msg.msg_iovlen = iov_size; - msg.msg_control = nullptr; - msg.msg_controllen = 0; msg.msg_flags = 0; + if (tcp->outgoing_buffer_arg != nullptr) { + if (!tcp_write_with_timestamps(tcp, &msg, sending_length, &sent_length, + error)) + return true; /* something went wrong with timestamps */ + } else { + msg.msg_control = nullptr; + msg.msg_controllen = 0; - GRPC_STATS_INC_TCP_WRITE_SIZE(sending_length); - GRPC_STATS_INC_TCP_WRITE_IOV_SIZE(iov_size); + GRPC_STATS_INC_TCP_WRITE_SIZE(sending_length); + GRPC_STATS_INC_TCP_WRITE_IOV_SIZE(iov_size); - GPR_TIMER_SCOPE("sendmsg", 1); - do { - /* TODO(klempner): Cork if this is a partial write */ - GRPC_STATS_INC_SYSCALL_WRITE(); - sent_length = sendmsg(tcp->fd, &msg, SENDMSG_FLAGS); - } while (sent_length < 0 && errno == EINTR); + sent_length = tcp_send(tcp->fd, &msg); + } if (sent_length < 0) { if (errno == EAGAIN) { @@ -593,6 +856,7 @@ static bool tcp_flush(grpc_tcp* tcp, grpc_error** error) { } GPR_ASSERT(tcp->outgoing_byte_idx == 0); + tcp->bytes_counter += sent_length; trailing = sending_length - static_cast<size_t>(sent_length); while (trailing > 0) { size_t slice_length; @@ -607,7 +871,6 @@ static bool tcp_flush(grpc_tcp* tcp, grpc_error** error) { trailing -= slice_length; } } - if (outgoing_slice_idx == tcp->outgoing_buffer->count) { *error = GRPC_ERROR_NONE; grpc_slice_buffer_reset_and_unref_internal(tcp->outgoing_buffer); @@ -640,14 +903,13 @@ static void tcp_handle_write(void* arg /* grpc_tcp */, grpc_error* error) { const char* str = grpc_error_string(error); gpr_log(GPR_INFO, "write: %s", str); } - GRPC_CLOSURE_SCHED(cb, error); TCP_UNREF(tcp, "write"); } } static void tcp_write(grpc_endpoint* ep, grpc_slice_buffer* buf, - grpc_closure* cb) { + grpc_closure* cb, void* arg) { GPR_TIMER_SCOPE("tcp_write", 0); grpc_tcp* tcp = reinterpret_cast<grpc_tcp*>(ep); grpc_error* error = GRPC_ERROR_NONE; @@ -675,6 +937,10 @@ static void tcp_write(grpc_endpoint* ep, grpc_slice_buffer* buf, } tcp->outgoing_buffer = buf; tcp->outgoing_byte_idx = 0; + tcp->outgoing_buffer_arg = arg; + if (arg) { + GPR_ASSERT(grpc_event_engine_can_track_errors()); + } if (!tcp_flush(tcp, &error)) { TCP_REF(tcp, "write"); @@ -792,6 +1058,8 @@ grpc_endpoint* grpc_tcp_create(grpc_fd* em_fd, tcp->bytes_read_this_round = 0; /* Will be set to false by the very first endpoint read function */ tcp->is_first_read = true; + tcp->bytes_counter = -1; + tcp->socket_ts_enabled = false; /* paired with unref in grpc_tcp_destroy */ gpr_ref_init(&tcp->refcount, 1); gpr_atm_no_barrier_store(&tcp->shutdown_count, 0); @@ -803,6 +1071,19 @@ grpc_endpoint* grpc_tcp_create(grpc_fd* em_fd, /* Tell network status tracker about new endpoint */ grpc_network_status_register_endpoint(&tcp->base); grpc_resource_quota_unref_internal(resource_quota); + gpr_mu_init(&tcp->tb_mu); + tcp->tb_head = nullptr; + /* Start being notified on errors if event engine can track errors. */ + if (grpc_event_engine_can_track_errors()) { + /* Grab a ref to tcp so that we can safely access the tcp struct when + * processing errors. We unref when we no longer want to track errors + * separately. */ + TCP_REF(tcp, "error-tracking"); + gpr_atm_rel_store(&tcp->stop_error_notification, 0); + GRPC_CLOSURE_INIT(&tcp->error_closure, tcp_handle_error, tcp, + grpc_schedule_on_exec_ctx); + grpc_fd_notify_on_error(tcp->em_fd, &tcp->error_closure); + } return &tcp->base; } @@ -821,6 +1102,11 @@ void grpc_tcp_destroy_and_release_fd(grpc_endpoint* ep, int* fd, tcp->release_fd = fd; tcp->release_fd_cb = done; grpc_slice_buffer_reset_and_unref_internal(&tcp->last_read_buffer); + if (grpc_event_engine_can_track_errors()) { + /* Stop errors notification. */ + gpr_atm_no_barrier_store(&tcp->stop_error_notification, true); + grpc_fd_set_error(tcp->em_fd); + } TCP_UNREF(tcp, "destroy"); } diff --git a/src/core/lib/iomgr/tcp_posix.h b/src/core/lib/iomgr/tcp_posix.h index af89bd24db..eff825cb92 100644 --- a/src/core/lib/iomgr/tcp_posix.h +++ b/src/core/lib/iomgr/tcp_posix.h @@ -31,7 +31,10 @@ #include <grpc/support/port_platform.h> +#include "src/core/lib/iomgr/port.h" + #include "src/core/lib/debug/trace.h" +#include "src/core/lib/iomgr/buffer_list.h" #include "src/core/lib/iomgr/endpoint.h" #include "src/core/lib/iomgr/ev_posix.h" diff --git a/src/core/lib/iomgr/tcp_server_posix.cc b/src/core/lib/iomgr/tcp_server_posix.cc index 8ddf684fea..824db07fbf 100644 --- a/src/core/lib/iomgr/tcp_server_posix.cc +++ b/src/core/lib/iomgr/tcp_server_posix.cc @@ -226,7 +226,7 @@ static void on_read(void* arg, grpc_error* err) { gpr_log(GPR_INFO, "SERVER_CONNECT: incoming connection: %s", addr_str); } - grpc_fd* fdobj = grpc_fd_create(fd, name, false); + grpc_fd* fdobj = grpc_fd_create(fd, name, true); read_notifier_pollset = sp->server->pollsets[static_cast<size_t>(gpr_atm_no_barrier_fetch_add( @@ -362,7 +362,7 @@ static grpc_error* clone_port(grpc_tcp_listener* listener, unsigned count) { listener->sibling = sp; sp->server = listener->server; sp->fd = fd; - sp->emfd = grpc_fd_create(fd, name, false); + sp->emfd = grpc_fd_create(fd, name, true); memcpy(&sp->addr, &listener->addr, sizeof(grpc_resolved_address)); sp->port = port; sp->port_index = listener->port_index; diff --git a/src/core/lib/iomgr/tcp_server_utils_posix_common.cc b/src/core/lib/iomgr/tcp_server_utils_posix_common.cc index b9f8145572..9595c028ce 100644 --- a/src/core/lib/iomgr/tcp_server_utils_posix_common.cc +++ b/src/core/lib/iomgr/tcp_server_utils_posix_common.cc @@ -105,7 +105,7 @@ static grpc_error* add_socket_to_server(grpc_tcp_server* s, int fd, s->tail = sp; sp->server = s; sp->fd = fd; - sp->emfd = grpc_fd_create(fd, name, false); + sp->emfd = grpc_fd_create(fd, name, true); memcpy(&sp->addr, addr, sizeof(grpc_resolved_address)); sp->port = port; sp->port_index = port_index; diff --git a/src/core/lib/iomgr/tcp_windows.cc b/src/core/lib/iomgr/tcp_windows.cc index b3cb442f18..64c4a56ae9 100644 --- a/src/core/lib/iomgr/tcp_windows.cc +++ b/src/core/lib/iomgr/tcp_windows.cc @@ -296,7 +296,7 @@ static void on_write(void* tcpp, grpc_error* error) { /* Initiates a write. */ static void win_write(grpc_endpoint* ep, grpc_slice_buffer* slices, - grpc_closure* cb) { + grpc_closure* cb, void* arg) { grpc_tcp* tcp = (grpc_tcp*)ep; grpc_winsocket* socket = tcp->socket; grpc_winsocket_callback_info* info = &socket->write_info; diff --git a/src/core/lib/iomgr/timer_generic.cc b/src/core/lib/iomgr/timer_generic.cc index 4294162af7..008d37119a 100644 --- a/src/core/lib/iomgr/timer_generic.cc +++ b/src/core/lib/iomgr/timer_generic.cc @@ -291,7 +291,7 @@ static void timer_list_init() { static void timer_list_shutdown() { size_t i; run_some_expired_timers( - GPR_ATM_MAX, nullptr, + GRPC_MILLIS_INF_FUTURE, nullptr, GRPC_ERROR_CREATE_FROM_STATIC_STRING("Timer list shutdown")); for (i = 0; i < g_num_shards; i++) { timer_shard* shard = &g_shards[i]; @@ -714,9 +714,10 @@ static grpc_timer_check_result timer_check(grpc_millis* next) { #if GPR_ARCH_64 gpr_log(GPR_INFO, "TIMER CHECK BEGIN: now=%" PRId64 " next=%s tls_min=%" PRId64 - " glob_min=%" PRIdPTR, + " glob_min=%" PRId64, now, next_str, min_timer, - gpr_atm_no_barrier_load((gpr_atm*)(&g_shared_mutables.min_timer))); + static_cast<grpc_millis>(gpr_atm_no_barrier_load( + (gpr_atm*)(&g_shared_mutables.min_timer)))); #else gpr_log(GPR_INFO, "TIMER CHECK BEGIN: now=%" PRId64 " next=%s min=%" PRId64, now, next_str, min_timer); diff --git a/src/core/lib/iomgr/udp_server.cc b/src/core/lib/iomgr/udp_server.cc index bdb2d0e764..3dd7cab855 100644 --- a/src/core/lib/iomgr/udp_server.cc +++ b/src/core/lib/iomgr/udp_server.cc @@ -152,7 +152,7 @@ GrpcUdpListener::GrpcUdpListener(grpc_udp_server* server, int fd, grpc_sockaddr_to_string(&addr_str, addr, 1); gpr_asprintf(&name, "udp-server-listener:%s", addr_str); gpr_free(addr_str); - emfd_ = grpc_fd_create(fd, name, false); + emfd_ = grpc_fd_create(fd, name, true); memcpy(&addr_, addr, sizeof(grpc_resolved_address)); GPR_ASSERT(emfd_); gpr_free(name); diff --git a/src/core/lib/security/security_connector/security_connector.cc b/src/core/lib/security/security_connector/security_connector.cc index 04b4c87c71..6246613e7b 100644 --- a/src/core/lib/security/security_connector/security_connector.cc +++ b/src/core/lib/security/security_connector/security_connector.cc @@ -59,8 +59,8 @@ static const char* installed_roots_path = /** Environment variable used as a flag to enable/disable loading system root certificates from the OS trust store. */ -#ifndef GRPC_USE_SYSTEM_SSL_ROOTS_ENV_VAR -#define GRPC_USE_SYSTEM_SSL_ROOTS_ENV_VAR "GRPC_USE_SYSTEM_SSL_ROOTS" +#ifndef GRPC_NOT_USE_SYSTEM_SSL_ROOTS_ENV_VAR +#define GRPC_NOT_USE_SYSTEM_SSL_ROOTS_ENV_VAR "GRPC_NOT_USE_SYSTEM_SSL_ROOTS" #endif #ifndef TSI_OPENSSL_ALPN_SUPPORT @@ -1192,10 +1192,10 @@ const char* DefaultSslRootStore::GetPemRootCerts() { grpc_slice DefaultSslRootStore::ComputePemRootCerts() { grpc_slice result = grpc_empty_slice(); - char* use_system_roots_env_value = - gpr_getenv(GRPC_USE_SYSTEM_SSL_ROOTS_ENV_VAR); - const bool use_system_roots = gpr_is_true(use_system_roots_env_value); - gpr_free(use_system_roots_env_value); + char* not_use_system_roots_env_value = + gpr_getenv(GRPC_NOT_USE_SYSTEM_SSL_ROOTS_ENV_VAR); + const bool not_use_system_roots = gpr_is_true(not_use_system_roots_env_value); + gpr_free(not_use_system_roots_env_value); // First try to load the roots from the environment. char* default_root_certs_path = gpr_getenv(GRPC_DEFAULT_SSL_ROOTS_FILE_PATH_ENV_VAR); @@ -1218,7 +1218,7 @@ grpc_slice DefaultSslRootStore::ComputePemRootCerts() { gpr_free(pem_root_certs); } // Try loading roots from OS trust store if flag is enabled. - if (GRPC_SLICE_IS_EMPTY(result) && use_system_roots) { + if (GRPC_SLICE_IS_EMPTY(result) && !not_use_system_roots) { result = LoadSystemRootCerts(); } // Fallback to roots manually shipped with gRPC. diff --git a/src/core/lib/security/transport/secure_endpoint.cc b/src/core/lib/security/transport/secure_endpoint.cc index 840b2e73bc..f40f969bb7 100644 --- a/src/core/lib/security/transport/secure_endpoint.cc +++ b/src/core/lib/security/transport/secure_endpoint.cc @@ -254,7 +254,7 @@ static void flush_write_staging_buffer(secure_endpoint* ep, uint8_t** cur, } static void endpoint_write(grpc_endpoint* secure_ep, grpc_slice_buffer* slices, - grpc_closure* cb) { + grpc_closure* cb, void* arg) { GPR_TIMER_SCOPE("secure_endpoint.endpoint_write", 0); unsigned i; @@ -342,7 +342,7 @@ static void endpoint_write(grpc_endpoint* secure_ep, grpc_slice_buffer* slices, return; } - grpc_endpoint_write(ep->wrapped_ep, &ep->output_buffer, cb); + grpc_endpoint_write(ep->wrapped_ep, &ep->output_buffer, cb, arg); } static void endpoint_shutdown(grpc_endpoint* secure_ep, grpc_error* why) { diff --git a/src/core/lib/security/transport/security_handshaker.cc b/src/core/lib/security/transport/security_handshaker.cc index aff723ed04..d76d582638 100644 --- a/src/core/lib/security/transport/security_handshaker.cc +++ b/src/core/lib/security/transport/security_handshaker.cc @@ -259,7 +259,7 @@ static grpc_error* on_handshake_next_done_locked( grpc_slice_buffer_reset_and_unref_internal(&h->outgoing); grpc_slice_buffer_add(&h->outgoing, to_send); grpc_endpoint_write(h->args->endpoint, &h->outgoing, - &h->on_handshake_data_sent_to_peer); + &h->on_handshake_data_sent_to_peer, nullptr); } else if (handshaker_result == nullptr) { // There is nothing to send, but need to read from peer. grpc_endpoint_read(h->args->endpoint, h->args->read_buffer, diff --git a/src/core/lib/security/transport/server_auth_filter.cc b/src/core/lib/security/transport/server_auth_filter.cc index 19cbb03b63..552e70130a 100644 --- a/src/core/lib/security/transport/server_auth_filter.cc +++ b/src/core/lib/security/transport/server_auth_filter.cc @@ -41,6 +41,9 @@ struct call_data { grpc_transport_stream_op_batch* recv_initial_metadata_batch; grpc_closure* original_recv_initial_metadata_ready; grpc_closure recv_initial_metadata_ready; + grpc_error* error; + grpc_closure recv_trailing_metadata_ready; + grpc_closure* original_recv_trailing_metadata_ready; grpc_metadata_array md; const grpc_metadata* consumed_md; size_t num_consumed_md; @@ -111,6 +114,7 @@ static void on_md_processing_done_inner(grpc_call_element* elem, batch->payload->recv_initial_metadata.recv_initial_metadata, remove_consumed_md, elem, "Response metadata filtering error"); } + calld->error = GRPC_ERROR_REF(error); GRPC_CLOSURE_SCHED(calld->original_recv_initial_metadata_ready, error); } @@ -184,6 +188,13 @@ static void recv_initial_metadata_ready(void* arg, grpc_error* error) { GRPC_ERROR_REF(error)); } +static void recv_trailing_metadata_ready(void* user_data, grpc_error* err) { + grpc_call_element* elem = static_cast<grpc_call_element*>(user_data); + call_data* calld = static_cast<call_data*>(elem->call_data); + err = grpc_error_add_child(GRPC_ERROR_REF(err), GRPC_ERROR_REF(calld->error)); + GRPC_CLOSURE_RUN(calld->original_recv_trailing_metadata_ready, err); +} + static void auth_start_transport_stream_op_batch( grpc_call_element* elem, grpc_transport_stream_op_batch* batch) { call_data* calld = static_cast<call_data*>(elem->call_data); @@ -195,6 +206,12 @@ static void auth_start_transport_stream_op_batch( batch->payload->recv_initial_metadata.recv_initial_metadata_ready = &calld->recv_initial_metadata_ready; } + if (batch->recv_trailing_metadata) { + calld->original_recv_trailing_metadata_ready = + batch->payload->recv_trailing_metadata.recv_trailing_metadata_ready; + batch->payload->recv_trailing_metadata.recv_trailing_metadata_ready = + &calld->recv_trailing_metadata_ready; + } grpc_call_next_op(elem, batch); } @@ -208,6 +225,9 @@ static grpc_error* init_call_elem(grpc_call_element* elem, GRPC_CLOSURE_INIT(&calld->recv_initial_metadata_ready, recv_initial_metadata_ready, elem, grpc_schedule_on_exec_ctx); + GRPC_CLOSURE_INIT(&calld->recv_trailing_metadata_ready, + recv_trailing_metadata_ready, elem, + grpc_schedule_on_exec_ctx); // Create server security context. Set its auth context from channel // data and save it in the call context. grpc_server_security_context* server_ctx = @@ -227,7 +247,10 @@ static grpc_error* init_call_elem(grpc_call_element* elem, /* Destructor for call_data */ static void destroy_call_elem(grpc_call_element* elem, const grpc_call_final_info* final_info, - grpc_closure* ignored) {} + grpc_closure* ignored) { + call_data* calld = static_cast<call_data*>(elem->call_data); + GRPC_ERROR_UNREF(calld->error); +} /* Constructor for channel_data */ static grpc_error* init_channel_elem(grpc_channel_element* elem, diff --git a/src/core/lib/surface/call.cc b/src/core/lib/surface/call.cc index 2923a86646..11b438f5dc 100644 --- a/src/core/lib/surface/call.cc +++ b/src/core/lib/surface/call.cc @@ -48,6 +48,7 @@ #include "src/core/lib/surface/call_test_only.h" #include "src/core/lib/surface/channel.h" #include "src/core/lib/surface/completion_queue.h" +#include "src/core/lib/surface/server.h" #include "src/core/lib/surface/validate_metadata.h" #include "src/core/lib/transport/error_utils.h" #include "src/core/lib/transport/metadata.h" @@ -71,46 +72,6 @@ // Used to create arena for the first call. #define ESTIMATED_MDELEM_COUNT 16 -/* Status data for a request can come from several sources; this - enumerates them all, and acts as a priority sorting for which - status to return to the application - earlier entries override - later ones */ -typedef enum { - /* Status came from the application layer overriding whatever - the wire says */ - STATUS_FROM_API_OVERRIDE = 0, - /* Status came from 'the wire' - or somewhere below the surface - layer */ - STATUS_FROM_WIRE, - /* Status was created by some internal channel stack operation: must come via - add_batch_error */ - STATUS_FROM_CORE, - /* Status was created by some surface error */ - STATUS_FROM_SURFACE, - /* Status came from the server sending status */ - STATUS_FROM_SERVER_STATUS, - STATUS_SOURCE_COUNT -} status_source; - -typedef struct { - bool is_set; - grpc_error* error; -} received_status; - -static gpr_atm pack_received_status(received_status r) { - return r.is_set ? (1 | (gpr_atm)r.error) : 0; -} - -static received_status unpack_received_status(gpr_atm atm) { - if ((atm & 1) == 0) { - return {false, GRPC_ERROR_NONE}; - } else { - return {true, (grpc_error*)(atm & ~static_cast<gpr_atm>(1))}; - } -} - -#define MAX_ERRORS_PER_BATCH 4 - typedef struct batch_control { grpc_call* call; /* Share memory for cq_completion and notify_tag as they are never needed @@ -135,10 +96,7 @@ typedef struct batch_control { grpc_closure start_batch; grpc_closure finish_batch; gpr_refcount steps_to_complete; - - grpc_error* errors[MAX_ERRORS_PER_BATCH]; - gpr_atm num_errors; - + gpr_atm batch_error; grpc_transport_stream_op_batch op; } batch_control; @@ -201,9 +159,6 @@ struct grpc_call { // A char* indicating the peer name. gpr_atm peer_string; - /* Packed received call statuses from various sources */ - gpr_atm status[STATUS_SOURCE_COUNT]; - /* Call data useful used for reporting. Only valid after the call has * completed */ grpc_call_final_info final_info; @@ -236,6 +191,7 @@ struct grpc_call { grpc_closure receiving_initial_metadata_ready; grpc_closure receiving_trailing_metadata_ready; uint32_t test_only_last_message_flags; + gpr_atm cancelled; grpc_closure release_call; @@ -247,8 +203,11 @@ struct grpc_call { } client; struct { int* cancelled; + // backpointer to owning server if this is a server side call. + grpc_server* server; } server; } final_op; + grpc_error* status_error; /* recv_state can contain one of the following values: RECV_NONE : : no initial metadata and messages received @@ -286,23 +245,15 @@ grpc_core::TraceFlag grpc_compression_trace(false, "compression"); static void execute_batch(grpc_call* call, grpc_transport_stream_op_batch* op, grpc_closure* start_batch_closure); -static void cancel_with_status(grpc_call* c, status_source source, - grpc_status_code status, + +static void cancel_with_status(grpc_call* c, grpc_status_code status, const char* description); -static void cancel_with_error(grpc_call* c, status_source source, - grpc_error* error); +static void cancel_with_error(grpc_call* c, grpc_error* error); static void destroy_call(void* call_stack, grpc_error* error); static void receiving_slice_ready(void* bctlp, grpc_error* error); -static void get_final_status( - grpc_call* call, void (*set_value)(grpc_status_code code, void* user_data), - void* set_value_user_data, grpc_slice* details, const char** error_string); -static void set_status_value_directly(grpc_status_code status, void* dest); -static void set_status_from_error(grpc_call* call, status_source source, - grpc_error* error); +static void set_final_status(grpc_call* call, grpc_error* error); static void process_data_after_md(batch_control* bctl); static void post_batch_completion(batch_control* bctl); -static void add_batch_error(batch_control* bctl, grpc_error* error, - bool has_cancelled); static void add_init_error(grpc_error** composite, grpc_error* new_err) { if (new_err == GRPC_ERROR_NONE) return; @@ -353,6 +304,7 @@ grpc_error* grpc_call_create(const grpc_call_create_args* args, gpr_arena_alloc(arena, GPR_ROUND_UP_TO_ALIGNMENT_SIZE(sizeof(grpc_call)) + channel_stack->call_stack_size)); gpr_ref_init(&call->ext_ref, 1); + gpr_atm_no_barrier_store(&call->cancelled, 0); call->arena = arena; grpc_call_combiner_init(&call->call_combiner); *out_call = call; @@ -362,14 +314,10 @@ grpc_error* grpc_call_create(const grpc_call_create_args* args, /* Always support no compression */ GPR_BITSET(&call->encodings_accepted_by_peer, GRPC_MESSAGE_COMPRESS_NONE); call->is_client = args->server_transport_data == nullptr; - if (call->is_client) { - GRPC_STATS_INC_CLIENT_CALLS_CREATED(); - } else { - GRPC_STATS_INC_SERVER_CALLS_CREATED(); - } call->stream_op_payload.context = call->context; grpc_slice path = grpc_empty_slice(); if (call->is_client) { + GRPC_STATS_INC_CLIENT_CALLS_CREATED(); GPR_ASSERT(args->add_initial_metadata_count < MAX_SEND_EXTRA_METADATA_COUNT); for (i = 0; i < args->add_initial_metadata_count; i++) { @@ -383,6 +331,8 @@ grpc_error* grpc_call_create(const grpc_call_create_args* args, call->send_extra_metadata_count = static_cast<int>(args->add_initial_metadata_count); } else { + GRPC_STATS_INC_SERVER_CALLS_CREATED(); + call->final_op.server.server = args->server; GPR_ASSERT(args->add_initial_metadata_count == 0); call->send_extra_metadata_count = 0; } @@ -464,10 +414,10 @@ grpc_error* grpc_call_create(const grpc_call_create_args* args, gpr_mu_unlock(&pc->child_list_mu); } if (error != GRPC_ERROR_NONE) { - cancel_with_error(call, STATUS_FROM_SURFACE, GRPC_ERROR_REF(error)); + cancel_with_error(call, GRPC_ERROR_REF(error)); } if (immediately_cancel) { - cancel_with_error(call, STATUS_FROM_API_OVERRIDE, GRPC_ERROR_CANCELLED); + cancel_with_error(call, GRPC_ERROR_CANCELLED); } if (args->cq != nullptr) { GPR_ASSERT(args->pollset_set_alternative == nullptr && @@ -486,10 +436,18 @@ grpc_error* grpc_call_create(const grpc_call_create_args* args, &call->pollent); } - grpc_core::channelz::ChannelNode* channelz_channel = - grpc_channel_get_channelz_node(call->channel); - if (channelz_channel != nullptr) { - channelz_channel->RecordCallStarted(); + if (call->is_client) { + grpc_core::channelz::ChannelNode* channelz_channel = + grpc_channel_get_channelz_node(call->channel); + if (channelz_channel != nullptr) { + channelz_channel->RecordCallStarted(); + } + } else { + grpc_core::channelz::ServerNode* channelz_server = + grpc_server_get_channelz_node(call->final_op.server.server); + if (channelz_server != nullptr) { + channelz_server->RecordCallStarted(); + } } grpc_slice_unref_internal(path); @@ -561,16 +519,13 @@ static void destroy_call(void* call, grpc_error* error) { GRPC_CQ_INTERNAL_UNREF(c->cq, "bind"); } - get_final_status(c, set_status_value_directly, &c->final_info.final_status, - nullptr, &(c->final_info.error_string)); + grpc_error_get_status(c->status_error, c->send_deadline, + &c->final_info.final_status, nullptr, nullptr, + &(c->final_info.error_string)); + GRPC_ERROR_UNREF(c->status_error); c->final_info.stats.latency = gpr_time_sub(gpr_now(GPR_CLOCK_MONOTONIC), c->start_time); - for (i = 0; i < STATUS_SOURCE_COUNT; i++) { - GRPC_ERROR_UNREF( - unpack_received_status(gpr_atm_acq_load(&c->status[i])).error); - } - grpc_call_stack_destroy(CALL_STACK_FROM_CALL(c), &c->final_info, GRPC_CLOSURE_INIT(&c->release_call, release_call, c, grpc_schedule_on_exec_ctx)); @@ -608,7 +563,7 @@ void grpc_call_unref(grpc_call* c) { bool cancel = gpr_atm_acq_load(&c->any_ops_sent_atm) != 0 && gpr_atm_acq_load(&c->received_final_op_atm) == 0; if (cancel) { - cancel_with_error(c, STATUS_FROM_API_OVERRIDE, GRPC_ERROR_CANCELLED); + cancel_with_error(c, GRPC_ERROR_CANCELLED); } else { // Unset the call combiner cancellation closure. This has the // effect of scheduling the previously set cancellation closure, if @@ -626,8 +581,7 @@ grpc_call_error grpc_call_cancel(grpc_call* call, void* reserved) { GRPC_API_TRACE("grpc_call_cancel(call=%p, reserved=%p)", 2, (call, reserved)); GPR_ASSERT(!reserved); grpc_core::ExecCtx exec_ctx; - cancel_with_error(call, STATUS_FROM_API_OVERRIDE, GRPC_ERROR_CANCELLED); - + cancel_with_error(call, GRPC_ERROR_CANCELLED); return GRPC_CALL_OK; } @@ -681,8 +635,7 @@ grpc_call_error grpc_call_cancel_with_status(grpc_call* c, "c=%p, status=%d, description=%s, reserved=%p)", 4, (c, (int)status, description, reserved)); GPR_ASSERT(reserved == nullptr); - cancel_with_status(c, STATUS_FROM_API_OVERRIDE, status, description); - + cancel_with_status(c, status, description); return GRPC_CALL_OK; } @@ -702,15 +655,17 @@ static void done_termination(void* arg, grpc_error* error) { gpr_free(state); } -static void cancel_with_error(grpc_call* c, status_source source, - grpc_error* error) { +static void cancel_with_error(grpc_call* c, grpc_error* error) { + if (!gpr_atm_rel_cas(&c->cancelled, 0, 1)) { + GRPC_ERROR_UNREF(error); + return; + } GRPC_CALL_INTERNAL_REF(c, "termination"); // Inform the call combiner of the cancellation, so that it can cancel // any in-flight asynchronous actions that may be holding the call // combiner. This ensures that the cancel_stream batch can be sent // down the filter stack in a timely manner. grpc_call_combiner_cancel(&c->call_combiner, GRPC_ERROR_REF(error)); - set_status_from_error(c, source, GRPC_ERROR_REF(error)); cancel_state* state = static_cast<cancel_state*>(gpr_malloc(sizeof(*state))); state->call = c; GRPC_CLOSURE_INIT(&state->finish_batch, done_termination, state, @@ -733,90 +688,45 @@ static grpc_error* error_from_status(grpc_status_code status, GRPC_ERROR_INT_GRPC_STATUS, status); } -static void cancel_with_status(grpc_call* c, status_source source, - grpc_status_code status, +static void cancel_with_status(grpc_call* c, grpc_status_code status, const char* description) { - cancel_with_error(c, source, error_from_status(status, description)); -} - -/******************************************************************************* - * FINAL STATUS CODE MANIPULATION - */ - -static bool get_final_status_from( - grpc_call* call, grpc_error* error, bool allow_ok_status, - void (*set_value)(grpc_status_code code, void* user_data), - void* set_value_user_data, grpc_slice* details, const char** error_string) { - grpc_status_code code; - grpc_slice slice = grpc_empty_slice(); - grpc_error_get_status(error, call->send_deadline, &code, &slice, nullptr, - error_string); - if (code == GRPC_STATUS_OK && !allow_ok_status) { - return false; - } - - set_value(code, set_value_user_data); - if (details != nullptr) { - *details = grpc_slice_ref_internal(slice); - } - return true; + cancel_with_error(c, error_from_status(status, description)); } -static void get_final_status( - grpc_call* call, void (*set_value)(grpc_status_code code, void* user_data), - void* set_value_user_data, grpc_slice* details, const char** error_string) { - int i; - received_status status[STATUS_SOURCE_COUNT]; - for (i = 0; i < STATUS_SOURCE_COUNT; i++) { - status[i] = unpack_received_status(gpr_atm_acq_load(&call->status[i])); - } +static void set_final_status(grpc_call* call, grpc_error* error) { if (grpc_call_error_trace.enabled()) { - gpr_log(GPR_INFO, "get_final_status %s", call->is_client ? "CLI" : "SVR"); - for (i = 0; i < STATUS_SOURCE_COUNT; i++) { - if (status[i].is_set) { - gpr_log(GPR_INFO, " %d: %s", i, grpc_error_string(status[i].error)); - } - } + gpr_log(GPR_DEBUG, "set_final_status %s", call->is_client ? "CLI" : "SVR"); + gpr_log(GPR_DEBUG, "%s", grpc_error_string(error)); } - /* first search through ignoring "OK" statuses: if something went wrong, - * ensure we report it */ - for (int allow_ok_status = 0; allow_ok_status < 2; allow_ok_status++) { - /* search for the best status we can present: ideally the error we use has a - clearly defined grpc-status, and we'll prefer that. */ - for (i = 0; i < STATUS_SOURCE_COUNT; i++) { - if (status[i].is_set && - grpc_error_has_clear_grpc_status(status[i].error)) { - if (get_final_status_from(call, status[i].error, allow_ok_status != 0, - set_value, set_value_user_data, details, - error_string)) { - return; - } + if (call->is_client) { + grpc_error_get_status(error, call->send_deadline, + call->final_op.client.status, + call->final_op.client.status_details, nullptr, + call->final_op.client.error_string); + // explicitly take a ref + grpc_slice_ref_internal(*call->final_op.client.status_details); + call->status_error = error; + grpc_core::channelz::ChannelNode* channelz_channel = + grpc_channel_get_channelz_node(call->channel); + if (channelz_channel != nullptr) { + if (*call->final_op.client.status != GRPC_STATUS_OK) { + channelz_channel->RecordCallFailed(); + } else { + channelz_channel->RecordCallSucceeded(); } } - /* If no clearly defined status exists, search for 'anything' */ - for (i = 0; i < STATUS_SOURCE_COUNT; i++) { - if (status[i].is_set) { - if (get_final_status_from(call, status[i].error, allow_ok_status != 0, - set_value, set_value_user_data, details, - error_string)) { - return; - } + } else { + *call->final_op.server.cancelled = + error != GRPC_ERROR_NONE || call->status_error != GRPC_ERROR_NONE; + grpc_core::channelz::ServerNode* channelz_server = + grpc_server_get_channelz_node(call->final_op.server.server); + if (channelz_server != nullptr) { + if (*call->final_op.server.cancelled) { + channelz_server->RecordCallFailed(); + } else { + channelz_server->RecordCallSucceeded(); } } - } - /* If nothing exists, set some default */ - if (call->is_client) { - set_value(GRPC_STATUS_UNKNOWN, set_value_user_data); - } else { - set_value(GRPC_STATUS_OK, set_value_user_data); - } -} - -static void set_status_from_error(grpc_call* call, status_source source, - grpc_error* error) { - if (!gpr_atm_rel_cas(&call->status[source], - pack_received_status({false, GRPC_ERROR_NONE}), - pack_received_status({true, error}))) { GRPC_ERROR_UNREF(error); } } @@ -1035,6 +945,7 @@ static grpc_stream_compression_algorithm decode_stream_compression( static void publish_app_metadata(grpc_call* call, grpc_metadata_batch* b, int is_trailing) { if (b->list.count == 0) return; + if (!call->is_client && is_trailing) return; if (is_trailing && call->buffered_metadata[1] == nullptr) return; GPR_TIMER_SCOPE("publish_app_metadata", 0); grpc_metadata_array* dest; @@ -1088,9 +999,12 @@ static void recv_initial_filter(grpc_call* call, grpc_metadata_batch* b) { publish_app_metadata(call, b, false); } -static void recv_trailing_filter(void* args, grpc_metadata_batch* b) { +static void recv_trailing_filter(void* args, grpc_metadata_batch* b, + grpc_error* batch_error) { grpc_call* call = static_cast<grpc_call*>(args); - if (b->idx.named.grpc_status != nullptr) { + if (batch_error != GRPC_ERROR_NONE) { + set_final_status(call, batch_error); + } else if (b->idx.named.grpc_status != nullptr) { grpc_status_code status_code = grpc_get_status_code_from_metadata(b->idx.named.grpc_status->md); grpc_error* error = GRPC_ERROR_NONE; @@ -1108,8 +1022,18 @@ static void recv_trailing_filter(void* args, grpc_metadata_batch* b) { error = grpc_error_set_str(error, GRPC_ERROR_STR_GRPC_MESSAGE, grpc_empty_slice()); } - set_status_from_error(call, STATUS_FROM_WIRE, error); + set_final_status(call, GRPC_ERROR_REF(error)); grpc_metadata_batch_remove(b, b->idx.named.grpc_status); + GRPC_ERROR_UNREF(error); + } else if (!call->is_client) { + set_final_status(call, GRPC_ERROR_NONE); + } else { + gpr_log(GPR_DEBUG, + "Received trailing metadata with no error and no status"); + set_final_status( + call, grpc_error_set_int( + GRPC_ERROR_CREATE_FROM_STATIC_STRING("No status received"), + GRPC_ERROR_INT_GRPC_STATUS, GRPC_STATUS_UNKNOWN)); } publish_app_metadata(call, b, true); } @@ -1124,14 +1048,6 @@ grpc_call_stack* grpc_call_get_call_stack(grpc_call* call) { * BATCH API IMPLEMENTATION */ -static void set_status_value_directly(grpc_status_code status, void* dest) { - *static_cast<grpc_status_code*>(dest) = status; -} - -static void set_cancelled_value(grpc_status_code status, void* dest) { - *static_cast<int*>(dest) = (status != GRPC_STATUS_OK); -} - static bool are_write_flags_valid(uint32_t flags) { /* check that only bits in GRPC_WRITE_(INTERNAL?)_USED_MASK are set */ const uint32_t allowed_write_positions = @@ -1199,31 +1115,18 @@ static void finish_batch_completion(void* user_data, GRPC_CALL_INTERNAL_UNREF(call, "completion"); } -static grpc_error* consolidate_batch_errors(batch_control* bctl) { - size_t n = static_cast<size_t>(gpr_atm_acq_load(&bctl->num_errors)); - if (n == 0) { - return GRPC_ERROR_NONE; - } else if (n == 1) { - /* Skip creating a composite error in the case that only one error was - logged */ - grpc_error* e = bctl->errors[0]; - bctl->errors[0] = nullptr; - return e; - } else { - grpc_error* error = GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING( - "Call batch failed", bctl->errors, n); - for (size_t i = 0; i < n; i++) { - GRPC_ERROR_UNREF(bctl->errors[i]); - bctl->errors[i] = nullptr; - } - return error; - } +static void reset_batch_errors(batch_control* bctl) { + GRPC_ERROR_UNREF( + reinterpret_cast<grpc_error*>(gpr_atm_acq_load(&bctl->batch_error))); + gpr_atm_rel_store(&bctl->batch_error, + reinterpret_cast<gpr_atm>(GRPC_ERROR_NONE)); } static void post_batch_completion(batch_control* bctl) { grpc_call* next_child_call; grpc_call* call = bctl->call; - grpc_error* error = consolidate_batch_errors(bctl); + grpc_error* error = GRPC_ERROR_REF( + reinterpret_cast<grpc_error*>(gpr_atm_acq_load(&bctl->batch_error))); if (bctl->op.send_initial_metadata) { grpc_metadata_batch_destroy( @@ -1249,8 +1152,7 @@ static void post_batch_completion(batch_control* bctl) { next_child_call = child->child->sibling_next; if (child->cancellation_is_inherited) { GRPC_CALL_INTERNAL_REF(child, "propagate_cancel"); - cancel_with_error(child, STATUS_FROM_API_OVERRIDE, - GRPC_ERROR_CANCELLED); + cancel_with_error(child, GRPC_ERROR_CANCELLED); GRPC_CALL_INTERNAL_UNREF(child, "propagate_cancel"); } child = next_child_call; @@ -1258,24 +1160,6 @@ static void post_batch_completion(batch_control* bctl) { } gpr_mu_unlock(&pc->child_list_mu); } - if (call->is_client) { - get_final_status(call, set_status_value_directly, - call->final_op.client.status, - call->final_op.client.status_details, - call->final_op.client.error_string); - } else { - get_final_status(call, set_cancelled_value, - call->final_op.server.cancelled, nullptr, nullptr); - } - grpc_core::channelz::ChannelNode* channelz_channel = - grpc_channel_get_channelz_node(call->channel); - if (channelz_channel != nullptr) { - if (*call->final_op.client.status != GRPC_STATUS_OK) { - channelz_channel->RecordCallFailed(); - } else { - channelz_channel->RecordCallSucceeded(); - } - } GRPC_ERROR_UNREF(error); error = GRPC_ERROR_NONE; } @@ -1284,9 +1168,10 @@ static void post_batch_completion(batch_control* bctl) { grpc_byte_buffer_destroy(*call->receiving_buffer); *call->receiving_buffer = nullptr; } + reset_batch_errors(bctl); if (bctl->completion_data.notify_tag.is_closure) { - /* unrefs bctl->error */ + /* unrefs error */ bctl->call = nullptr; /* This closure may be meant to be run within some combiner. Since we aren't * running in any combiner here, we need to use GRPC_CLOSURE_SCHED instead @@ -1296,7 +1181,7 @@ static void post_batch_completion(batch_control* bctl) { error); GRPC_CALL_INTERNAL_UNREF(call, "completion"); } else { - /* unrefs bctl->error */ + /* unrefs error */ grpc_cq_end_op(bctl->call->cq, bctl->completion_data.notify_tag.tag, error, finish_batch_completion, bctl, &bctl->completion_data.cq_completion); @@ -1405,8 +1290,12 @@ static void receiving_stream_ready(void* bctlp, grpc_error* error) { grpc_call* call = bctl->call; if (error != GRPC_ERROR_NONE) { call->receiving_stream.reset(); - add_batch_error(bctl, GRPC_ERROR_REF(error), true); - cancel_with_error(call, STATUS_FROM_SURFACE, GRPC_ERROR_REF(error)); + if (reinterpret_cast<grpc_error*>(gpr_atm_acq_load(&bctl->batch_error)) == + GRPC_ERROR_NONE) { + gpr_atm_rel_store(&bctl->batch_error, + reinterpret_cast<gpr_atm>(GRPC_ERROR_REF(error))); + } + cancel_with_error(call, GRPC_ERROR_REF(error)); } /* If recv_state is RECV_NONE, we will save the batch_control * object with rel_cas, and will not use it after the cas. Its corresponding @@ -1442,8 +1331,7 @@ static void validate_filtered_metadata(batch_control* bctl) { call->incoming_stream_compression_algorithm, call->incoming_message_compression_algorithm); gpr_log(GPR_ERROR, "%s", error_msg); - cancel_with_status(call, STATUS_FROM_SURFACE, GRPC_STATUS_INTERNAL, - error_msg); + cancel_with_status(call, GRPC_STATUS_INTERNAL, error_msg); gpr_free(error_msg); } else if ( grpc_compression_algorithm_from_message_stream_compression_algorithm( @@ -1455,8 +1343,7 @@ static void validate_filtered_metadata(batch_control* bctl) { "compression (%d).", call->incoming_stream_compression_algorithm, call->incoming_message_compression_algorithm); - cancel_with_status(call, STATUS_FROM_SURFACE, GRPC_STATUS_INTERNAL, - error_msg); + cancel_with_status(call, GRPC_STATUS_INTERNAL, error_msg); gpr_free(error_msg); } else { char* error_msg = nullptr; @@ -1466,8 +1353,7 @@ static void validate_filtered_metadata(batch_control* bctl) { gpr_asprintf(&error_msg, "Invalid compression algorithm value '%d'.", compression_algorithm); gpr_log(GPR_ERROR, "%s", error_msg); - cancel_with_status(call, STATUS_FROM_SURFACE, GRPC_STATUS_UNIMPLEMENTED, - error_msg); + cancel_with_status(call, GRPC_STATUS_UNIMPLEMENTED, error_msg); } else if (grpc_compression_options_is_algorithm_enabled( &compression_options, compression_algorithm) == 0) { /* check if algorithm is supported by current channel config */ @@ -1476,8 +1362,7 @@ static void validate_filtered_metadata(batch_control* bctl) { gpr_asprintf(&error_msg, "Compression algorithm '%s' is disabled.", algo_name); gpr_log(GPR_ERROR, "%s", error_msg); - cancel_with_status(call, STATUS_FROM_SURFACE, GRPC_STATUS_UNIMPLEMENTED, - error_msg); + cancel_with_status(call, GRPC_STATUS_UNIMPLEMENTED, error_msg); } gpr_free(error_msg); @@ -1495,23 +1380,12 @@ static void validate_filtered_metadata(batch_control* bctl) { } } -static void add_batch_error(batch_control* bctl, grpc_error* error, - bool has_cancelled) { - if (error == GRPC_ERROR_NONE) return; - int idx = static_cast<int>(gpr_atm_full_fetch_add(&bctl->num_errors, 1)); - if (idx == 0 && !has_cancelled) { - cancel_with_error(bctl->call, STATUS_FROM_CORE, GRPC_ERROR_REF(error)); - } - bctl->errors[idx] = error; -} - static void receiving_initial_metadata_ready(void* bctlp, grpc_error* error) { batch_control* bctl = static_cast<batch_control*>(bctlp); grpc_call* call = bctl->call; GRPC_CALL_COMBINER_STOP(&call->call_combiner, "recv_initial_metadata_ready"); - add_batch_error(bctl, GRPC_ERROR_REF(error), false); if (error == GRPC_ERROR_NONE) { grpc_metadata_batch* md = &call->metadata_batch[1 /* is_receiving */][0 /* is_trailing */]; @@ -1524,6 +1398,13 @@ static void receiving_initial_metadata_ready(void* bctlp, grpc_error* error) { if (md->deadline != GRPC_MILLIS_INF_FUTURE && !call->is_client) { call->send_deadline = md->deadline; } + } else { + if (reinterpret_cast<grpc_error*>(gpr_atm_acq_load(&bctl->batch_error)) == + GRPC_ERROR_NONE) { + gpr_atm_rel_store(&bctl->batch_error, + reinterpret_cast<gpr_atm>(GRPC_ERROR_REF(error))); + } + cancel_with_error(call, GRPC_ERROR_REF(error)); } grpc_closure* saved_rsr_closure = nullptr; @@ -1561,10 +1442,9 @@ static void receiving_trailing_metadata_ready(void* bctlp, grpc_error* error) { batch_control* bctl = static_cast<batch_control*>(bctlp); grpc_call* call = bctl->call; GRPC_CALL_COMBINER_STOP(&call->call_combiner, "recv_trailing_metadata_ready"); - add_batch_error(bctl, GRPC_ERROR_REF(error), false); grpc_metadata_batch* md = &call->metadata_batch[1 /* is_receiving */][1 /* is_trailing */]; - recv_trailing_filter(call, md); + recv_trailing_filter(call, md, GRPC_ERROR_REF(error)); finish_batch_step(bctl); } @@ -1572,7 +1452,14 @@ static void finish_batch(void* bctlp, grpc_error* error) { batch_control* bctl = static_cast<batch_control*>(bctlp); grpc_call* call = bctl->call; GRPC_CALL_COMBINER_STOP(&call->call_combiner, "on_complete"); - add_batch_error(bctl, GRPC_ERROR_REF(error), false); + if (reinterpret_cast<grpc_error*>(gpr_atm_acq_load(&bctl->batch_error)) == + GRPC_ERROR_NONE) { + gpr_atm_rel_store(&bctl->batch_error, + reinterpret_cast<gpr_atm>(GRPC_ERROR_REF(error))); + } + if (error != GRPC_ERROR_NONE) { + cancel_with_error(call, GRPC_ERROR_REF(error)); + } finish_batch_step(bctl); } @@ -1774,28 +1661,32 @@ static grpc_call_error call_start_batch(grpc_call* call, const grpc_op* ops, call->send_extra_metadata_count = 1; call->send_extra_metadata[0].md = grpc_channel_get_reffed_status_elem( call->channel, op->data.send_status_from_server.status); - { - grpc_error* override_error = GRPC_ERROR_NONE; - if (op->data.send_status_from_server.status != GRPC_STATUS_OK) { - override_error = - error_from_status(op->data.send_status_from_server.status, - "Returned non-ok status"); - } - if (op->data.send_status_from_server.status_details != nullptr) { - call->send_extra_metadata[1].md = grpc_mdelem_from_slices( - GRPC_MDSTR_GRPC_MESSAGE, - grpc_slice_ref_internal( - *op->data.send_status_from_server.status_details)); - call->send_extra_metadata_count++; + grpc_error* status_error = + op->data.send_status_from_server.status == GRPC_STATUS_OK + ? GRPC_ERROR_NONE + : grpc_error_set_int( + GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "Server returned error"), + GRPC_ERROR_INT_GRPC_STATUS, + static_cast<intptr_t>( + op->data.send_status_from_server.status)); + if (op->data.send_status_from_server.status_details != nullptr) { + call->send_extra_metadata[1].md = grpc_mdelem_from_slices( + GRPC_MDSTR_GRPC_MESSAGE, + grpc_slice_ref_internal( + *op->data.send_status_from_server.status_details)); + call->send_extra_metadata_count++; + if (status_error != GRPC_ERROR_NONE) { char* msg = grpc_slice_to_c_string( GRPC_MDVALUE(call->send_extra_metadata[1].md)); - override_error = - grpc_error_set_str(override_error, GRPC_ERROR_STR_GRPC_MESSAGE, + status_error = + grpc_error_set_str(status_error, GRPC_ERROR_STR_GRPC_MESSAGE, grpc_slice_from_copied_string(msg)); gpr_free(msg); } - set_status_from_error(call, STATUS_FROM_API_OVERRIDE, override_error); } + + call->status_error = status_error; if (!prepare_application_metadata( call, static_cast<int>( diff --git a/src/core/lib/surface/call.h b/src/core/lib/surface/call.h index b3b06059d4..b34260505a 100644 --- a/src/core/lib/surface/call.h +++ b/src/core/lib/surface/call.h @@ -33,6 +33,7 @@ typedef void (*grpc_ioreq_completion_func)(grpc_call* call, int success, typedef struct grpc_call_create_args { grpc_channel* channel; + grpc_server* server; grpc_call* parent; uint32_t propagation_mask; diff --git a/src/core/lib/surface/channel.cc b/src/core/lib/surface/channel.cc index 82635d3c21..054fe105c3 100644 --- a/src/core/lib/surface/channel.cc +++ b/src/core/lib/surface/channel.cc @@ -100,7 +100,6 @@ grpc_channel* grpc_channel_create_with_builder( return channel; } - memset(channel, 0, sizeof(*channel)); channel->target = target; channel->is_client = grpc_channel_stack_type_is_client(channel_stack_type); size_t channel_tracer_max_nodes = 0; // default to off @@ -166,11 +165,12 @@ grpc_channel* grpc_channel_create_with_builder( } grpc_channel_args_destroy(args); - if (channelz_enabled) { - bool is_top_level_channel = channel->is_client && !internal_channel; + // we only need to do the channelz bookkeeping for clients here. The channelz + // bookkeeping for server channels occurs in src/core/lib/surface/server.cc + if (channelz_enabled && channel->is_client) { channel->channelz_channel = channel_node_create_func( - channel, channel_tracer_max_nodes, is_top_level_channel); - channel->channelz_channel->trace()->AddTraceEvent( + channel, channel_tracer_max_nodes, !internal_channel); + channel->channelz_channel->AddTraceEvent( grpc_core::channelz::ChannelTrace::Severity::Info, grpc_slice_from_static_string("Channel created")); } @@ -428,6 +428,9 @@ void grpc_channel_internal_unref(grpc_channel* c REF_ARG) { static void destroy_channel(void* arg, grpc_error* error) { grpc_channel* channel = static_cast<grpc_channel*>(arg); if (channel->channelz_channel != nullptr) { + channel->channelz_channel->AddTraceEvent( + grpc_core::channelz::ChannelTrace::Severity::Info, + grpc_slice_from_static_string("Channel destroyed")); channel->channelz_channel->MarkChannelDestroyed(); channel->channelz_channel.reset(); } diff --git a/src/core/lib/surface/completion_queue.cc b/src/core/lib/surface/completion_queue.cc index 0769d9e4f6..c2cf450e94 100644 --- a/src/core/lib/surface/completion_queue.cc +++ b/src/core/lib/surface/completion_queue.cc @@ -1364,9 +1364,11 @@ static void cq_shutdown_callback(grpc_completion_queue* cq) { } cqd->shutdown_called = true; if (gpr_atm_full_fetch_add(&cqd->pending_events, -1) == 1) { + gpr_mu_unlock(cq->mu); cq_finish_shutdown_callback(cq); + } else { + gpr_mu_unlock(cq->mu); } - gpr_mu_unlock(cq->mu); GRPC_CQ_INTERNAL_UNREF(cq, "shutting_down (callback cq)"); } diff --git a/src/core/lib/surface/server.cc b/src/core/lib/surface/server.cc index cb34def740..5fa58ffdec 100644 --- a/src/core/lib/surface/server.cc +++ b/src/core/lib/surface/server.cc @@ -149,6 +149,9 @@ struct call_data { grpc_closure server_on_recv_initial_metadata; grpc_closure kill_zombie_closure; grpc_closure* on_done_recv_initial_metadata; + grpc_closure recv_trailing_metadata_ready; + grpc_error* error; + grpc_closure* original_recv_trailing_metadata_ready; grpc_closure publish; @@ -219,6 +222,8 @@ struct grpc_server { /** when did we print the last shutdown progress message */ gpr_timespec last_shutdown_message_time; + + grpc_core::RefCountedPtr<grpc_core::channelz::ServerNode> channelz_server; }; #define SERVER_FROM_CALL_ELEM(elem) \ @@ -364,6 +369,7 @@ static void server_ref(grpc_server* server) { static void server_delete(grpc_server* server) { registered_method* rm; size_t i; + server->channelz_server.reset(); grpc_channel_args_destroy(server->channel_args); gpr_mu_destroy(&server->mu_global); gpr_mu_destroy(&server->mu_call); @@ -730,6 +736,14 @@ static void server_on_recv_initial_metadata(void* ptr, grpc_error* error) { GRPC_CLOSURE_RUN(calld->on_done_recv_initial_metadata, error); } +static void server_recv_trailing_metadata_ready(void* user_data, + grpc_error* err) { + grpc_call_element* elem = static_cast<grpc_call_element*>(user_data); + call_data* calld = static_cast<call_data*>(elem->call_data); + err = grpc_error_add_child(GRPC_ERROR_REF(err), GRPC_ERROR_REF(calld->error)); + GRPC_CLOSURE_RUN(calld->original_recv_trailing_metadata_ready, err); +} + static void server_mutate_op(grpc_call_element* elem, grpc_transport_stream_op_batch* op) { call_data* calld = static_cast<call_data*>(elem->call_data); @@ -745,6 +759,12 @@ static void server_mutate_op(grpc_call_element* elem, op->payload->recv_initial_metadata.recv_flags = &calld->recv_initial_metadata_flags; } + if (op->recv_trailing_metadata) { + calld->original_recv_trailing_metadata_ready = + op->payload->recv_trailing_metadata.recv_trailing_metadata_ready; + op->payload->recv_trailing_metadata.recv_trailing_metadata_ready = + &calld->recv_trailing_metadata_ready; + } } static void server_start_transport_stream_op_batch( @@ -779,6 +799,7 @@ static void accept_stream(void* cd, grpc_transport* transport, args.channel = chand->channel; args.server_transport_data = transport_server_data; args.send_deadline = GRPC_MILLIS_INF_FUTURE; + args.server = chand->server; grpc_call* call; grpc_error* error = grpc_call_create(&args, &call); grpc_call_element* elem = @@ -828,7 +849,9 @@ static grpc_error* init_call_elem(grpc_call_element* elem, GRPC_CLOSURE_INIT(&calld->server_on_recv_initial_metadata, server_on_recv_initial_metadata, elem, grpc_schedule_on_exec_ctx); - + GRPC_CLOSURE_INIT(&calld->recv_trailing_metadata_ready, + server_recv_trailing_metadata_ready, elem, + grpc_schedule_on_exec_ctx); server_ref(chand->server); return GRPC_ERROR_NONE; } @@ -840,7 +863,7 @@ static void destroy_call_elem(grpc_call_element* elem, call_data* calld = static_cast<call_data*>(elem->call_data); GPR_ASSERT(calld->state != PENDING); - + GRPC_ERROR_UNREF(calld->error); if (calld->host_set) { grpc_slice_unref_internal(calld->host); } @@ -941,6 +964,7 @@ void grpc_server_register_completion_queue(grpc_server* server, } grpc_server* grpc_server_create(const grpc_channel_args* args, void* reserved) { + grpc_core::ExecCtx exec_ctx; GRPC_API_TRACE("grpc_server_create(%p, %p)", 2, (args, reserved)); grpc_server* server = @@ -957,6 +981,20 @@ grpc_server* grpc_server_create(const grpc_channel_args* args, void* reserved) { server->channel_args = grpc_channel_args_copy(args); + const grpc_arg* arg = grpc_channel_args_find(args, GRPC_ARG_ENABLE_CHANNELZ); + if (grpc_channel_arg_get_bool(arg, false)) { + arg = grpc_channel_args_find(args, + GRPC_ARG_MAX_CHANNEL_TRACE_EVENTS_PER_NODE); + size_t trace_events_per_node = + grpc_channel_arg_get_integer(arg, {0, 0, INT_MAX}); + server->channelz_server = + grpc_core::MakeRefCounted<grpc_core::channelz::ServerNode>( + trace_events_per_node); + server->channelz_server->AddTraceEvent( + grpc_core::channelz::ChannelTrace::Severity::Info, + grpc_slice_from_static_string("Server created")); + } + return server; } @@ -1459,3 +1497,11 @@ int grpc_server_has_open_connections(grpc_server* server) { gpr_mu_unlock(&server->mu_global); return r; } + +grpc_core::channelz::ServerNode* grpc_server_get_channelz_node( + grpc_server* server) { + if (server == nullptr) { + return nullptr; + } + return server->channelz_server.get(); +} diff --git a/src/core/lib/surface/server.h b/src/core/lib/surface/server.h index c617cc223e..0196743ff9 100644 --- a/src/core/lib/surface/server.h +++ b/src/core/lib/surface/server.h @@ -23,6 +23,7 @@ #include <grpc/grpc.h> #include "src/core/lib/channel/channel_stack.h" +#include "src/core/lib/channel/channelz.h" #include "src/core/lib/debug/trace.h" #include "src/core/lib/transport/transport.h" @@ -46,6 +47,9 @@ void grpc_server_setup_transport(grpc_server* server, grpc_transport* transport, grpc_pollset* accepting_pollset, const grpc_channel_args* args); +grpc_core::channelz::ServerNode* grpc_server_get_channelz_node( + grpc_server* server); + const grpc_channel_args* grpc_server_get_channel_args(grpc_server* server); int grpc_server_has_open_connections(grpc_server* server); diff --git a/src/core/lib/surface/version.cc b/src/core/lib/surface/version.cc index e92fe2c5a1..a44f9acdc3 100644 --- a/src/core/lib/surface/version.cc +++ b/src/core/lib/surface/version.cc @@ -25,4 +25,4 @@ const char* grpc_version_string(void) { return "6.0.0-dev"; } -const char* grpc_g_stands_for(void) { return "glider"; } +const char* grpc_g_stands_for(void) { return "gao"; } |