From d2e357b10d087075e582434292ea05766cc9cc53 Mon Sep 17 00:00:00 2001 From: Alexander Polcyn Date: Fri, 8 Jun 2018 14:19:46 -0700 Subject: Revert "Merge pull request #15685 from grpc/revert-15435-fix_shutdown_closed_socket" This reverts commit 8b5e45a35b83908d7a5f1dae6f412929f70c90ef, reversing changes made to 2ea5e1d1dee3d7d7219051723290293901e37fbc. --- test/cpp/naming/BUILD | 18 +- test/cpp/naming/cancel_ares_query_test.cc | 289 +++++++++++++++++++++++++++++ test/cpp/naming/gen_build_yaml.py | 19 ++ test/cpp/naming/resolver_component_test.cc | 110 ++++++++++- 4 files changed, 432 insertions(+), 4 deletions(-) create mode 100644 test/cpp/naming/cancel_ares_query_test.cc (limited to 'test/cpp/naming') diff --git a/test/cpp/naming/BUILD b/test/cpp/naming/BUILD index fa0b216f8f..2925e8fbcf 100644 --- a/test/cpp/naming/BUILD +++ b/test/cpp/naming/BUILD @@ -22,7 +22,7 @@ package( licenses(["notice"]) # Apache v2 -load("//bazel:grpc_build_system.bzl", "grpc_py_binary") +load("//bazel:grpc_build_system.bzl", "grpc_py_binary", "grpc_cc_test") load(":generate_resolver_component_tests.bzl", "generate_resolver_component_tests") @@ -35,4 +35,20 @@ grpc_py_binary( testonly = True, ) +grpc_cc_test( + name = "cancel_ares_query_test", + srcs = ["cancel_ares_query_test.cc"], + external_deps = ["gmock"], + deps = [ + "//test/cpp/util:test_util", + "//test/core/util:grpc_test_util", + "//test/core/util:gpr_test_util", + "//:grpc++", + "//:grpc", + "//:gpr", + "//test/cpp/util:test_config", + "//test/core/end2end:cq_verifier", + ], +) + generate_resolver_component_tests() diff --git a/test/cpp/naming/cancel_ares_query_test.cc b/test/cpp/naming/cancel_ares_query_test.cc new file mode 100644 index 0000000000..11cdc0b774 --- /dev/null +++ b/test/cpp/naming/cancel_ares_query_test.cc @@ -0,0 +1,289 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include "include/grpc/support/string_util.h" +#include "src/core/ext/filters/client_channel/resolver.h" +#include "src/core/ext/filters/client_channel/resolver_registry.h" +#include "src/core/lib/channel/channel_args.h" +#include "src/core/lib/debug/stats.h" +#include "src/core/lib/gpr/env.h" +#include "src/core/lib/gpr/host_port.h" +#include "src/core/lib/gpr/string.h" +#include "src/core/lib/gprpp/orphanable.h" +#include "src/core/lib/gprpp/thd.h" +#include "src/core/lib/iomgr/combiner.h" +#include "src/core/lib/iomgr/pollset.h" +#include "src/core/lib/iomgr/pollset_set.h" +#include "test/core/end2end/cq_verifier.h" +#include "test/core/util/cmdline.h" +#include "test/core/util/port.h" +#include "test/core/util/test_config.h" + +// TODO: pull in different headers when enabling this +// test on windows. Also set BAD_SOCKET_RETURN_VAL +// to INVALID_SOCKET on windows. +#include "src/core/lib/iomgr/sockaddr_posix.h" +#define BAD_SOCKET_RETURN_VAL -1 + +namespace { + +void* Tag(intptr_t t) { return (void*)t; } + +gpr_timespec FiveSecondsFromNow(void) { + return grpc_timeout_seconds_to_deadline(5); +} + +void DrainCq(grpc_completion_queue* cq) { + grpc_event ev; + do { + ev = grpc_completion_queue_next(cq, FiveSecondsFromNow(), nullptr); + } while (ev.type != GRPC_QUEUE_SHUTDOWN); +} + +void EndTest(grpc_channel* client, grpc_completion_queue* cq) { + grpc_channel_destroy(client); + grpc_completion_queue_shutdown(cq); + DrainCq(cq); + grpc_completion_queue_destroy(cq); +} + +class FakeNonResponsiveDNSServer { + public: + FakeNonResponsiveDNSServer(int port) { + socket_ = socket(AF_INET6, SOCK_DGRAM, 0); + if (socket_ == BAD_SOCKET_RETURN_VAL) { + gpr_log(GPR_DEBUG, "Failed to create UDP ipv6 socket"); + abort(); + } + sockaddr_in6 addr; + memset(&addr, 0, sizeof(addr)); + addr.sin6_family = AF_INET6; + addr.sin6_port = htons(port); + ((char*)&addr.sin6_addr)[15] = 1; + if (bind(socket_, (const sockaddr*)&addr, sizeof(addr)) != 0) { + gpr_log(GPR_DEBUG, "Failed to bind UDP ipv6 socket to [::1]:%d", port); + abort(); + } + } + ~FakeNonResponsiveDNSServer() { close(socket_); } + + private: + int socket_; +}; + +struct ArgsStruct { + gpr_atm done_atm; + gpr_mu* mu; + grpc_pollset* pollset; + grpc_pollset_set* pollset_set; + grpc_combiner* lock; + grpc_channel_args* channel_args; +}; + +void ArgsInit(ArgsStruct* args) { + args->pollset = (grpc_pollset*)gpr_zalloc(grpc_pollset_size()); + grpc_pollset_init(args->pollset, &args->mu); + args->pollset_set = grpc_pollset_set_create(); + grpc_pollset_set_add_pollset(args->pollset_set, args->pollset); + args->lock = grpc_combiner_create(); + gpr_atm_rel_store(&args->done_atm, 0); + args->channel_args = nullptr; +} + +void DoNothing(void* arg, grpc_error* error) {} + +void ArgsFinish(ArgsStruct* args) { + grpc_pollset_set_del_pollset(args->pollset_set, args->pollset); + grpc_pollset_set_destroy(args->pollset_set); + grpc_closure DoNothing_cb; + GRPC_CLOSURE_INIT(&DoNothing_cb, DoNothing, nullptr, + grpc_schedule_on_exec_ctx); + grpc_pollset_shutdown(args->pollset, &DoNothing_cb); + // exec_ctx needs to be flushed before calling grpc_pollset_destroy() + grpc_channel_args_destroy(args->channel_args); + grpc_core::ExecCtx::Get()->Flush(); + grpc_pollset_destroy(args->pollset); + gpr_free(args->pollset); + GRPC_COMBINER_UNREF(args->lock, nullptr); +} + +void PollPollsetUntilRequestDone(ArgsStruct* args) { + while (true) { + bool done = gpr_atm_acq_load(&args->done_atm) != 0; + if (done) { + break; + } + grpc_pollset_worker* worker = nullptr; + grpc_core::ExecCtx exec_ctx; + gpr_mu_lock(args->mu); + GRPC_LOG_IF_ERROR( + "pollset_work", + grpc_pollset_work(args->pollset, &worker, + grpc_timespec_to_millis_round_up( + gpr_inf_future(GPR_CLOCK_REALTIME)))); + gpr_mu_unlock(args->mu); + } +} + +void CheckResolverResultAssertFailureLocked(void* arg, grpc_error* error) { + EXPECT_NE(error, GRPC_ERROR_NONE); + ArgsStruct* args = static_cast(arg); + gpr_atm_rel_store(&args->done_atm, 1); + gpr_mu_lock(args->mu); + GRPC_LOG_IF_ERROR("pollset_kick", grpc_pollset_kick(args->pollset, nullptr)); + gpr_mu_unlock(args->mu); +} + +TEST(CancelDuringAresQuery, TestCancelActiveDNSQuery) { + grpc_core::ExecCtx exec_ctx; + ArgsStruct args; + ArgsInit(&args); + int fake_dns_port = grpc_pick_unused_port_or_die(); + FakeNonResponsiveDNSServer fake_dns_server(fake_dns_port); + char* client_target; + GPR_ASSERT(gpr_asprintf( + &client_target, + "dns://[::1]:%d/dont-care-since-wont-be-resolved.test.com:1234", + fake_dns_port)); + // create resolver and resolve + grpc_core::OrphanablePtr resolver = + grpc_core::ResolverRegistry::CreateResolver(client_target, nullptr, + args.pollset_set, args.lock); + gpr_free(client_target); + grpc_closure on_resolver_result_changed; + GRPC_CLOSURE_INIT(&on_resolver_result_changed, + CheckResolverResultAssertFailureLocked, (void*)&args, + grpc_combiner_scheduler(args.lock)); + resolver->NextLocked(&args.channel_args, &on_resolver_result_changed); + // Without resetting and causing resolver shutdown, the + // PollPollsetUntilRequestDone call should never finish. + resolver.reset(); + grpc_core::ExecCtx::Get()->Flush(); + PollPollsetUntilRequestDone(&args); + ArgsFinish(&args); +} + +TEST(CancelDuringAresQuery, + TestHitDeadlineAndDestroyChannelDuringAresResolutionIsGraceful) { + // Start up fake non responsive DNS server + int fake_dns_port = grpc_pick_unused_port_or_die(); + FakeNonResponsiveDNSServer fake_dns_server(fake_dns_port); + // Create a call that will try to use the fake DNS server + char* client_target = nullptr; + GPR_ASSERT(gpr_asprintf( + &client_target, + "dns://[::1]:%d/dont-care-since-wont-be-resolved.test.com:1234", + fake_dns_port)); + grpc_channel* client = + grpc_insecure_channel_create(client_target, + /* client_args */ nullptr, nullptr); + gpr_free(client_target); + grpc_completion_queue* cq = grpc_completion_queue_create_for_next(nullptr); + cq_verifier* cqv = cq_verifier_create(cq); + gpr_timespec deadline = grpc_timeout_milliseconds_to_deadline(10); + grpc_call* call = grpc_channel_create_call( + client, nullptr, GRPC_PROPAGATE_DEFAULTS, cq, + grpc_slice_from_static_string("/foo"), nullptr, deadline, nullptr); + GPR_ASSERT(call); + grpc_metadata_array initial_metadata_recv; + grpc_metadata_array trailing_metadata_recv; + grpc_metadata_array request_metadata_recv; + grpc_metadata_array_init(&initial_metadata_recv); + grpc_metadata_array_init(&trailing_metadata_recv); + grpc_metadata_array_init(&request_metadata_recv); + grpc_call_details call_details; + grpc_call_details_init(&call_details); + grpc_status_code status; + const char* error_string; + grpc_slice details; + // Set ops for client the request + grpc_op ops_base[6]; + memset(ops_base, 0, sizeof(ops_base)); + grpc_op* op = ops_base; + op->op = GRPC_OP_SEND_INITIAL_METADATA; + op->data.send_initial_metadata.count = 0; + op->flags = 0; + op->reserved = nullptr; + op++; + op->op = GRPC_OP_SEND_CLOSE_FROM_CLIENT; + op->flags = 0; + op->reserved = nullptr; + op++; + op->op = GRPC_OP_RECV_INITIAL_METADATA; + op->data.recv_initial_metadata.recv_initial_metadata = &initial_metadata_recv; + op->flags = 0; + op->reserved = nullptr; + op++; + op->op = GRPC_OP_RECV_STATUS_ON_CLIENT; + op->data.recv_status_on_client.trailing_metadata = &trailing_metadata_recv; + op->data.recv_status_on_client.status = &status; + op->data.recv_status_on_client.status_details = &details; + op->data.recv_status_on_client.error_string = &error_string; + op->flags = 0; + op->reserved = nullptr; + op++; + // Run the call and sanity check it failed as expected + grpc_call_error error = grpc_call_start_batch( + call, ops_base, static_cast(op - ops_base), Tag(1), nullptr); + EXPECT_EQ(GRPC_CALL_OK, error); + CQ_EXPECT_COMPLETION(cqv, Tag(1), 1); + cq_verify(cqv); + EXPECT_EQ(status, GRPC_STATUS_DEADLINE_EXCEEDED); + // Teardown + grpc_slice_unref(details); + gpr_free((void*)error_string); + grpc_metadata_array_destroy(&initial_metadata_recv); + grpc_metadata_array_destroy(&trailing_metadata_recv); + grpc_metadata_array_destroy(&request_metadata_recv); + grpc_call_details_destroy(&call_details); + grpc_call_unref(call); + cq_verifier_destroy(cqv); + EndTest(client, cq); +} + +} // namespace + +int main(int argc, char** argv) { + grpc_test_init(argc, argv); + ::testing::InitGoogleTest(&argc, argv); + gpr_setenv("GRPC_DNS_RESOLVER", "ares"); + // Sanity check the time that it takes to run the test + // including the teardown time (the teardown + // part of the test involves cancelling the DNS query, + // which is the main point of interest for this test). + gpr_timespec overall_deadline = grpc_timeout_seconds_to_deadline(4); + grpc_init(); + auto result = RUN_ALL_TESTS(); + grpc_shutdown(); + if (gpr_time_cmp(gpr_now(GPR_CLOCK_MONOTONIC), overall_deadline) > 0) { + gpr_log(GPR_ERROR, "Test took too long"); + abort(); + } + return result; +} diff --git a/test/cpp/naming/gen_build_yaml.py b/test/cpp/naming/gen_build_yaml.py index 6e63cbe483..eb2c01e7ad 100755 --- a/test/cpp/naming/gen_build_yaml.py +++ b/test/cpp/naming/gen_build_yaml.py @@ -120,6 +120,25 @@ def main(): 'grpc++_test_config', ], } for unsecure_build_config_suffix in ['_unsecure', ''] + ] + [ + { + 'name': 'cancel_ares_query_test', + 'build': 'test', + 'language': 'c++', + 'gtest': True, + 'run': True, + 'src': ['test/cpp/naming/cancel_ares_query_test.cc'], + 'platforms': ['linux', 'posix', 'mac'], + 'deps': [ + 'grpc++_test_util', + 'grpc_test_util', + 'gpr_test_util', + 'grpc++', + 'grpc', + 'gpr', + 'grpc++_test_config', + ], + }, ] } diff --git a/test/cpp/naming/resolver_component_test.cc b/test/cpp/naming/resolver_component_test.cc index f4be064305..649441d4fa 100644 --- a/test/cpp/naming/resolver_component_test.cc +++ b/test/cpp/naming/resolver_component_test.cc @@ -22,10 +22,14 @@ #include #include #include + #include +#include +#include #include #include +#include #include #include "test/cpp/util/subprocess.h" @@ -48,6 +52,12 @@ #include "test/core/util/port.h" #include "test/core/util/test_config.h" +// TODO: pull in different headers when enabling this +// test on windows. Also set BAD_SOCKET_RETURN_VAL +// to INVALID_SOCKET on windows. +#include "src/core/lib/iomgr/sockaddr_posix.h" +#define BAD_SOCKET_RETURN_VAL -1 + using grpc::SubProcess; using std::vector; using testing::UnorderedElementsAreArray; @@ -231,7 +241,73 @@ void CheckLBPolicyResultLocked(grpc_channel_args* channel_args, } } +void OpenAndCloseSocketsStressLoop(int dummy_port, gpr_event* done_ev) { + // The goal of this loop is to catch socket + // "use after close" bugs within the c-ares resolver by acting + // like some separate thread doing I/O. + // It's goal is to try to hit race conditions whereby: + // 1) The c-ares resolver closes a socket. + // 2) This loop opens a socket with (coincidentally) the same handle. + // 3) the c-ares resolver mistakenly uses that same socket without + // realizing that its closed. + // 4) This loop performs an operation on that socket that should + // succeed but instead fails because of what the c-ares + // resolver did in the meantime. + sockaddr_in6 addr; + memset(&addr, 0, sizeof(addr)); + addr.sin6_family = AF_INET6; + addr.sin6_port = htons(dummy_port); + ((char*)&addr.sin6_addr)[15] = 1; + for (;;) { + if (gpr_event_get(done_ev)) { + return; + } + std::vector sockets; + // First open a bunch of sockets, bind and listen + // '50' is an arbitrary number that, experimentally, + // has a good chance of catching bugs. + for (size_t i = 0; i < 50; i++) { + int s = socket(AF_INET6, SOCK_STREAM, 0); + int val = 1; + setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)); + fcntl(s, F_SETFL, O_NONBLOCK); + ASSERT_TRUE(s != BAD_SOCKET_RETURN_VAL) + << "Failed to create TCP ipv6 socket"; + gpr_log(GPR_DEBUG, "Opened fd: %d", s); + ASSERT_TRUE(bind(s, (const sockaddr*)&addr, sizeof(addr)) == 0) + << "Failed to bind socket " + std::to_string(s) + + " to [::1]:" + std::to_string(dummy_port) + + ". errno: " + std::to_string(errno); + ASSERT_TRUE(listen(s, 1) == 0) << "Failed to listen on socket " + + std::to_string(s) + + ". errno: " + std::to_string(errno); + sockets.push_back(s); + } + // Do a non-blocking accept followed by a close on all of those sockets. + // Do this in a separate loop to try to induce a time window to hit races. + for (size_t i = 0; i < sockets.size(); i++) { + gpr_log(GPR_DEBUG, "non-blocking accept then close on %d", sockets[i]); + if (accept(sockets[i], nullptr, nullptr)) { + // If e.g. a "shutdown" was called on this fd from another thread, + // then this accept call should fail with an unexpected error. + ASSERT_TRUE(errno == EAGAIN || errno == EWOULDBLOCK) + << "OpenAndCloseSocketsStressLoop accept on socket " + + std::to_string(sockets[i]) + + " failed in " + "an unexpected way. " + "errno: " + + std::to_string(errno) + + ". Socket use-after-close bugs are likely."; + } + ASSERT_TRUE(close(sockets[i]) == 0) + << "Failed to close socket: " + std::to_string(sockets[i]) + + ". errno: " + std::to_string(errno); + } + } +} + void CheckResolverResultLocked(void* argsp, grpc_error* err) { + EXPECT_EQ(err, GRPC_ERROR_NONE); ArgsStruct* args = (ArgsStruct*)argsp; grpc_channel_args* channel_args = args->channel_args; const grpc_arg* channel_arg = @@ -271,7 +347,17 @@ void CheckResolverResultLocked(void* argsp, grpc_error* err) { gpr_mu_unlock(args->mu); } -TEST(ResolverComponentTest, TestResolvesRelevantRecords) { +void CheckResolvedWithoutErrorLocked(void* argsp, grpc_error* err) { + EXPECT_EQ(err, GRPC_ERROR_NONE); + ArgsStruct* args = (ArgsStruct*)argsp; + gpr_atm_rel_store(&args->done_atm, 1); + gpr_mu_lock(args->mu); + GRPC_LOG_IF_ERROR("pollset_kick", grpc_pollset_kick(args->pollset, nullptr)); + gpr_mu_unlock(args->mu); +} + +void RunResolvesRelevantRecordsTest(void (*OnDoneLocked)(void* arg, + grpc_error* error)) { grpc_core::ExecCtx exec_ctx; ArgsStruct args; ArgsInit(&args); @@ -289,14 +375,32 @@ TEST(ResolverComponentTest, TestResolvesRelevantRecords) { args.pollset_set, args.lock); gpr_free(whole_uri); grpc_closure on_resolver_result_changed; - GRPC_CLOSURE_INIT(&on_resolver_result_changed, CheckResolverResultLocked, - (void*)&args, grpc_combiner_scheduler(args.lock)); + GRPC_CLOSURE_INIT(&on_resolver_result_changed, OnDoneLocked, (void*)&args, + grpc_combiner_scheduler(args.lock)); resolver->NextLocked(&args.channel_args, &on_resolver_result_changed); grpc_core::ExecCtx::Get()->Flush(); PollPollsetUntilRequestDone(&args); ArgsFinish(&args); } +TEST(ResolverComponentTest, TestResolvesRelevantRecords) { + RunResolvesRelevantRecordsTest(CheckResolverResultLocked); +} + +TEST(ResolverComponentTest, TestResolvesRelevantRecordsWithConcurrentFdStress) { + // Start up background stress thread + int dummy_port = grpc_pick_unused_port_or_die(); + gpr_event done_ev; + gpr_event_init(&done_ev); + std::thread socket_stress_thread(OpenAndCloseSocketsStressLoop, dummy_port, + &done_ev); + // Run the resolver test + RunResolvesRelevantRecordsTest(CheckResolvedWithoutErrorLocked); + // Shutdown and join stress thread + gpr_event_set(&done_ev, (void*)1); + socket_stress_thread.join(); +} + } // namespace int main(int argc, char** argv) { -- cgit v1.2.3