From c4f271d1a68366b6fa5ff38ea7d951b6a22af044 Mon Sep 17 00:00:00 2001 From: philwo Date: Mon, 15 May 2017 19:10:21 +0200 Subject: Automated g4 rollback of commit 3e5edafa2a04a71cd3596e929e83222da725f3f9. *** Reason for rollback *** Likely cause for b/38172480 ("blaze now waits for all processes spawned by local tests to terminate") and b/38194553 ("Server terminated abruptly (error code: 14, error message: 'Endpoint read failed'"). I have a fix almost ready, but it consists of many lines of new code - we shouldn't rush that into Bazel's 0.5.0 release. Instead, let's roll this back, do a release using the known good older process-wrapper and then go forward in 0.5.1 with a better and well tested new version of this. *** Original change description *** process-wrapper: Wait for all (grand)children before exiting. This uses Linux's PR_SET_CHILD_SUBREAPER and FreeBSD's PROC_REAP_ACQUIRE features to become an init-like process for all (grand)children spawned by process-wrapper, which allows us to a) kill them reliably and then b) wait for them reliably. Before this change, we only killed the main child, waited for it, then fired off a kill -9 on the process group, without waiting for it. This led to a race condition where Bazel would try to use... *** PiperOrigin-RevId: 156068188 --- src/main/tools/BUILD | 31 +--- src/main/tools/linux-sandbox-options.cc | 16 +- src/main/tools/linux-sandbox-pid1.cc | 211 ++++++++++++++++++++++----- src/main/tools/linux-sandbox.cc | 163 ++++++++++----------- src/main/tools/process-tools.c | 151 +++++++++++++++++++ src/main/tools/process-tools.h | 91 ++++++------ src/main/tools/process-wrapper.c | 169 +++++++++++++++++++++ src/main/tools/process-wrapper.cc | 186 ----------------------- src/test/shell/bazel/process-wrapper_test.sh | 2 +- 9 files changed, 641 insertions(+), 379 deletions(-) create mode 100644 src/main/tools/process-tools.c create mode 100644 src/main/tools/process-wrapper.c delete mode 100644 src/main/tools/process-wrapper.cc diff --git a/src/main/tools/BUILD b/src/main/tools/BUILD index 1a38423d7c..3ddad54f9c 100644 --- a/src/main/tools/BUILD +++ b/src/main/tools/BUILD @@ -1,28 +1,20 @@ package(default_visibility = ["//src:__subpackages__"]) -cc_library( - name = "process-tools", - srcs = [ - "process-tools.cc", - "process-tools.h", - ], -) - cc_binary( name = "process-wrapper", srcs = select({ "//src:windows_msvc": ["process-wrapper-windows.cc"], "//conditions:default": [ - "process-wrapper.cc", + "process-tools.c", + "process-tools.h", + "process-wrapper.c", ], }), - linkopts = ["-lm"], - deps = select({ + copts = select({ "//src:windows_msvc": [], - "//conditions:default": [ - ":process-tools", - ], + "//conditions:default": ["-std=c99"], }), + linkopts = ["-lm"], ) cc_binary( @@ -53,17 +45,6 @@ cc_binary( ], }), linkopts = ["-lm"], - deps = select({ - "//src:darwin": [], - "//src:darwin_x86_64": [], - "//src:freebsd": [], - "//src:windows": [], - "//src:windows_msys": [], - "//src:windows_msvc": [], - "//conditions:default": [ - ":process-tools", - ], - }), ) filegroup( diff --git a/src/main/tools/linux-sandbox-options.cc b/src/main/tools/linux-sandbox-options.cc index 095bfb9023..51368247e8 100644 --- a/src/main/tools/linux-sandbox-options.cc +++ b/src/main/tools/linux-sandbox-options.cc @@ -12,13 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "src/main/tools/linux-sandbox-options.h" +#include "linux-sandbox-options.h" +#include "linux-sandbox-utils.h" #define DIE(args...) \ { \ fprintf(stderr, __FILE__ ":" S__LINE__ ": \"" args); \ fprintf(stderr, "\": "); \ - perror(nullptr); \ + perror(NULL); \ exit(EXIT_FAILURE); \ } @@ -38,8 +39,6 @@ #include #include -#include "src/main/tools/linux-sandbox-utils.h" - using std::ifstream; using std::unique_ptr; using std::vector; @@ -200,7 +199,6 @@ static void ParseCommandLine(unique_ptr> args) { if (optind < static_cast(args->size())) { if (opt.args.empty()) { opt.args.assign(args->begin() + optind, args->end()); - opt.args.push_back(nullptr); } else { Usage(args->front(), "Merging commands not supported."); } @@ -209,8 +207,8 @@ static void ParseCommandLine(unique_ptr> args) { // Expands a single argument, expanding options @filename to read in the content // of the file and add it to the list of processed arguments. -static unique_ptr> ExpandArgument( - unique_ptr> expanded, char *arg) { +unique_ptr> ExpandArgument(unique_ptr> expanded, + char *arg) { if (arg[0] == '@') { const char *filename = arg + 1; // strip off the '@'. ifstream f(filename); @@ -238,7 +236,7 @@ static unique_ptr> ExpandArgument( // Pre-processes an argument list, expanding options @filename to read in the // content of the file and add it to the list of arguments. Stops expanding // arguments once it encounters "--". -static unique_ptr> ExpandArguments(const vector &args) { +unique_ptr> ExpandArguments(const vector &args) { unique_ptr> expanded(new vector()); expanded->reserve(args.size()); for (auto arg = args.begin(); arg != args.end(); ++arg) { @@ -262,6 +260,6 @@ void ParseOptions(int argc, char *argv[]) { } if (opt.working_dir.empty()) { - opt.working_dir = getcwd(nullptr, 0); + opt.working_dir = getcwd(NULL, 0); } } diff --git a/src/main/tools/linux-sandbox-pid1.cc b/src/main/tools/linux-sandbox-pid1.cc index 4a366f2a35..0095d7262c 100644 --- a/src/main/tools/linux-sandbox-pid1.cc +++ b/src/main/tools/linux-sandbox-pid1.cc @@ -17,6 +17,21 @@ * mount, UTS, IPC and PID namespace. */ +#include "linux-sandbox-options.h" +#include "linux-sandbox-utils.h" +#include "linux-sandbox.h" + +// Note that we define DIE() here and not in a shared header, because we want to +// use _exit() in the +// pid1 child, but exit() in the parent. +#define DIE(args...) \ + { \ + fprintf(stderr, __FILE__ ":" S__LINE__ ": \"" args); \ + fprintf(stderr, "\": "); \ + perror(NULL); \ + _exit(EXIT_FAILURE); \ + } + #include #include #include @@ -41,11 +56,6 @@ #include -#include "src/main/tools/linux-sandbox-options.h" -#include "src/main/tools/linux-sandbox-utils.h" -#include "src/main/tools/linux-sandbox.h" -#include "src/main/tools/process-tools.h" - static int global_child_pid; static void SetupSelfDestruction(int *sync_pipe) { @@ -74,11 +84,31 @@ static void SetupSelfDestruction(int *sync_pipe) { static void SetupMountNamespace() { // Fully isolate our mount namespace private from outside events, so that // mounts in the outside environment do not affect our sandbox. - if (mount(nullptr, "/", nullptr, MS_REC | MS_PRIVATE, nullptr) < 0) { + if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL) < 0) { DIE("mount"); } } +static void WriteFile(const std::string &filename, const char *fmt, ...) { + FILE *stream = fopen(filename.c_str(), "w"); + if (stream == NULL) { + DIE("fopen(%s)", filename.c_str()); + } + + va_list ap; + va_start(ap, fmt); + int r = vfprintf(stream, fmt, ap); + va_end(ap); + + if (r < 0) { + DIE("vfprintf"); + } + + if (fclose(stream) != 0) { + DIE("fclose(%s)", filename.c_str()); + } +} + static void SetupUserNamespace() { // Disable needs for CAP_SETGID. struct stat sb; @@ -100,7 +130,7 @@ static void SetupUserNamespace() { } else if (opt.fake_username) { // Change our username to 'nobody'. struct passwd *pwd = getpwnam("nobody"); - if (pwd == nullptr) { + if (pwd == NULL) { DIE("unable to find passwd entry for user nobody") } @@ -130,8 +160,8 @@ static void MountFilesystems() { for (const std::string &tmpfs_dir : opt.tmpfs_dirs) { PRINT_DEBUG("tmpfs: %s", tmpfs_dir.c_str()); if (mount("tmpfs", tmpfs_dir.c_str(), "tmpfs", - MS_NOSUID | MS_NODEV | MS_NOATIME, nullptr) < 0) { - DIE("mount(tmpfs, %s, tmpfs, MS_NOSUID | MS_NODEV | MS_NOATIME, nullptr)", + MS_NOSUID | MS_NODEV | MS_NOATIME, NULL) < 0) { + DIE("mount(tmpfs, %s, tmpfs, MS_NOSUID | MS_NODEV | MS_NOATIME, NULL)", tmpfs_dir.c_str()); } } @@ -140,9 +170,9 @@ static void MountFilesystems() { // do this is by bind-mounting it upon itself. PRINT_DEBUG("working dir: %s", opt.working_dir.c_str()); - if (mount(opt.working_dir.c_str(), opt.working_dir.c_str(), nullptr, MS_BIND, - nullptr) < 0) { - DIE("mount(%s, %s, nullptr, MS_BIND, nullptr)", opt.working_dir.c_str(), + if (mount(opt.working_dir.c_str(), opt.working_dir.c_str(), NULL, MS_BIND, + NULL) < 0) { + DIE("mount(%s, %s, NULL, MS_BIND, NULL)", opt.working_dir.c_str(), opt.working_dir.c_str()); } @@ -150,17 +180,16 @@ static void MountFilesystems() { std::string source = opt.bind_mount_sources.at(i); std::string target = opt.bind_mount_targets.at(i); PRINT_DEBUG("bind mount: %s -> %s", source.c_str(), target.c_str()); - if (mount(source.c_str(), target.c_str(), nullptr, MS_BIND, nullptr) < 0) { - DIE("mount(%s, %s, nullptr, MS_BIND, nullptr)", source.c_str(), - target.c_str()); + if (mount(source.c_str(), target.c_str(), NULL, MS_BIND, NULL) < 0) { + DIE("mount(%s, %s, NULL, MS_BIND, NULL)", source.c_str(), target.c_str()); } } for (const std::string &writable_file : opt.writable_files) { PRINT_DEBUG("writable: %s", writable_file.c_str()); - if (mount(writable_file.c_str(), writable_file.c_str(), nullptr, MS_BIND, - nullptr) < 0) { - DIE("mount(%s, %s, nullptr, MS_BIND, nullptr)", writable_file.c_str(), + if (mount(writable_file.c_str(), writable_file.c_str(), NULL, MS_BIND, + NULL) < 0) { + DIE("mount(%s, %s, NULL, MS_BIND, NULL)", writable_file.c_str(), writable_file.c_str()); } } @@ -192,34 +221,34 @@ static bool ShouldBeWritable(const std::string &mnt_dir) { // ShouldBeWritable returns true. static void MakeFilesystemMostlyReadOnly() { FILE *mounts = setmntent("/proc/self/mounts", "r"); - if (mounts == nullptr) { + if (mounts == NULL) { DIE("setmntent"); } struct mntent *ent; - while ((ent = getmntent(mounts)) != nullptr) { + while ((ent = getmntent(mounts)) != NULL) { int mountFlags = MS_BIND | MS_REMOUNT; // MS_REMOUNT does not allow us to change certain flags. This means, we have // to first read them out and then pass them in back again. There seems to // be no better way than this (an API for just getting the mount flags of a // mount entry as a bitmask would be great). - if (hasmntopt(ent, "nodev") != nullptr) { + if (hasmntopt(ent, "nodev") != NULL) { mountFlags |= MS_NODEV; } - if (hasmntopt(ent, "noexec") != nullptr) { + if (hasmntopt(ent, "noexec") != NULL) { mountFlags |= MS_NOEXEC; } - if (hasmntopt(ent, "nosuid") != nullptr) { + if (hasmntopt(ent, "nosuid") != NULL) { mountFlags |= MS_NOSUID; } - if (hasmntopt(ent, "noatime") != nullptr) { + if (hasmntopt(ent, "noatime") != NULL) { mountFlags |= MS_NOATIME; } - if (hasmntopt(ent, "nodiratime") != nullptr) { + if (hasmntopt(ent, "nodiratime") != NULL) { mountFlags |= MS_NODIRATIME; } - if (hasmntopt(ent, "relatime") != nullptr) { + if (hasmntopt(ent, "relatime") != NULL) { mountFlags |= MS_RELATIME; } @@ -229,7 +258,7 @@ static void MakeFilesystemMostlyReadOnly() { PRINT_DEBUG("remount %s: %s", (mountFlags & MS_RDONLY) ? "ro" : "rw", ent->mnt_dir); - if (mount(nullptr, ent->mnt_dir, nullptr, mountFlags, nullptr) < 0) { + if (mount(NULL, ent->mnt_dir, NULL, mountFlags, NULL) < 0) { // If we get EACCES or EPERM, this might be a mount-point for which we // don't have read access. Not much we can do about this, but it also // won't do any harm, so let's go on. The same goes for EINVAL or ENOENT, @@ -243,8 +272,7 @@ static void MakeFilesystemMostlyReadOnly() { // should just ignore it. if (errno != EACCES && errno != EPERM && errno != EINVAL && errno != ENOENT && errno != ESTALE) { - DIE("remount(nullptr, %s, nullptr, %d, nullptr)", ent->mnt_dir, - mountFlags); + DIE("remount(NULL, %s, NULL, %d, NULL)", ent->mnt_dir, mountFlags); } } } @@ -255,8 +283,8 @@ static void MakeFilesystemMostlyReadOnly() { static void MountProc() { // Mount a new proc on top of the old one, because the old one still refers to // our parent PID namespace. - if (mount("/proc", "/proc", "proc", MS_NODEV | MS_NOEXEC | MS_NOSUID, - nullptr) < 0) { + if (mount("/proc", "/proc", "proc", MS_NODEV | MS_NOEXEC | MS_NOSUID, NULL) < + 0) { DIE("mount"); } } @@ -298,6 +326,57 @@ static void EnterSandbox() { } } +static void InstallSignalHandler(int signum, void (*handler)(int)) { + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = handler; + if (handler == SIG_IGN || handler == SIG_DFL) { + // No point in blocking signals when using the default handler or ignoring + // the signal. + if (sigemptyset(&sa.sa_mask) < 0) { + DIE("sigemptyset"); + } + } else { + // When using a custom handler, block all signals from firing while the + // handler is running. + if (sigfillset(&sa.sa_mask) < 0) { + DIE("sigfillset"); + } + } + // sigaction may fail for certain reserved signals. Ignore failure in this + // case, but report it in debug mode, just in case. + if (sigaction(signum, &sa, NULL) < 0) { + PRINT_DEBUG("sigaction(%d, &sa, NULL) failed", signum); + } +} + +static void IgnoreSignal(int signum) { InstallSignalHandler(signum, SIG_IGN); } + +// Reset the signal mask and restore the default handler for all signals. +static void RestoreSignalHandlersAndMask() { + // Use an empty signal mask for the process (= unblock all signals). + sigset_t empty_set; + if (sigemptyset(&empty_set) < 0) { + DIE("sigemptyset"); + } + if (sigprocmask(SIG_SETMASK, &empty_set, nullptr) < 0) { + DIE("sigprocmask(SIG_SETMASK, , nullptr)"); + } + + // Set the default signal handler for all signals. + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + if (sigemptyset(&sa.sa_mask) < 0) { + DIE("sigemptyset"); + } + sa.sa_handler = SIG_DFL; + for (int i = 1; i < NSIG; ++i) { + // Ignore possible errors, because we might not be allowed to set the + // handler for certain signals, but we still want to try. + sigaction(i, &sa, nullptr); + } +} + static void ForwardSignal(int signum) { PRINT_DEBUG("ForwardSignal(%d)", signum); kill(-global_child_pid, signum); @@ -340,6 +419,71 @@ static void SetupSignalHandlers() { } } +static void SpawnChild() { + global_child_pid = fork(); + + if (global_child_pid < 0) { + DIE("fork()"); + } else if (global_child_pid == 0) { + // Put the child into its own process group. + if (setpgid(0, 0) < 0) { + DIE("setpgid"); + } + + // Try to assign our terminal to the child process. + if (tcsetpgrp(STDIN_FILENO, getpgrp()) < 0 && errno != ENOTTY) { + DIE("tcsetpgrp") + } + + // Unblock all signals, restore default handlers. + RestoreSignalHandlersAndMask(); + + // Force umask to include read and execute for everyone, to make output + // permissions predictable. + umask(022); + + // argv[] passed to execve() must be a null-terminated array. + opt.args.push_back(nullptr); + + if (execvp(opt.args[0], opt.args.data()) < 0) { + DIE("execvp(%s, %p)", opt.args[0], opt.args.data()); + } + } +} + +static void WaitForChild() { + while (1) { + // Check for zombies to be reaped and exit, if our own child exited. + int status; + pid_t killed_pid = waitpid(-1, &status, 0); + PRINT_DEBUG("waitpid returned %d", killed_pid); + + if (killed_pid < 0) { + // Our PID1 process got a signal that interrupted the waitpid() call and + // that was either ignored or forwared to the child. This is expected & + // fine, just continue waiting. + if (errno == EINTR) { + continue; + } + DIE("waitpid") + } else { + if (killed_pid == global_child_pid) { + // If the child process we spawned earlier terminated, we'll also + // terminate. We can simply _exit() here, because the Linux kernel will + // kindly SIGKILL all remaining processes in our PID namespace once we + // exit. + if (WIFSIGNALED(status)) { + PRINT_DEBUG("child died due to signal %d", WTERMSIG(status)); + _exit(128 + WTERMSIG(status)); + } else { + PRINT_DEBUG("child exited with code %d", WEXITSTATUS(status)); + _exit(WEXITSTATUS(status)); + } + } + } + } +} + int Pid1Main(void *sync_pipe_param) { if (getpid() != 1) { DIE("Using PID namespaces, but we are not PID 1"); @@ -357,6 +501,7 @@ int Pid1Main(void *sync_pipe_param) { SetupNetworking(); EnterSandbox(); SetupSignalHandlers(); - global_child_pid = SpawnCommand(opt.args); - return WaitForChild(global_child_pid); + SpawnChild(); + WaitForChild(); + _exit(EXIT_FAILURE); } diff --git a/src/main/tools/linux-sandbox.cc b/src/main/tools/linux-sandbox.cc index 129454a666..1f078026d7 100644 --- a/src/main/tools/linux-sandbox.cc +++ b/src/main/tools/linux-sandbox.cc @@ -37,6 +37,18 @@ * system are invisible. */ +#include "linux-sandbox-options.h" +#include "linux-sandbox-pid1.h" +#include "linux-sandbox-utils.h" + +#define DIE(args...) \ + { \ + fprintf(stderr, __FILE__ ":" S__LINE__ ": \"" args); \ + fprintf(stderr, "\": "); \ + perror(NULL); \ + exit(EXIT_FAILURE); \ + } + #include #include #include @@ -58,26 +70,20 @@ #include #include -#include "src/main/tools/linux-sandbox-options.h" -#include "src/main/tools/linux-sandbox-pid1.h" -#include "src/main/tools/linux-sandbox-utils.h" -#include "src/main/tools/process-tools.h" - int global_outer_uid; int global_outer_gid; -// The PID of our child. -static volatile sig_atomic_t global_child_pid; +static int global_child_pid; // The signal that will be sent to the child when a timeout occurs. static volatile sig_atomic_t global_next_timeout_signal = SIGTERM; -// Whether the child was killed due to a timeout. -static volatile sig_atomic_t global_timeout_occurred; +// The signal that caused us to kill the child (e.g. on timeout). +static volatile sig_atomic_t global_signal; static void CloseFds() { DIR *fds = opendir("/proc/self/fd"); - if (fds == nullptr) { + if (fds == NULL) { DIE("opendir"); } @@ -85,7 +91,7 @@ static void CloseFds() { errno = 0; struct dirent *dent = readdir(fds); - if (dent == nullptr) { + if (dent == NULL) { if (errno != 0) { DIE("readdir"); } @@ -112,67 +118,28 @@ static void CloseFds() { } } -static void OnTimeout(int sig) { - global_timeout_occurred = true; - kill(global_child_pid, global_next_timeout_signal); - if (global_next_timeout_signal == SIGTERM && opt.kill_delay_secs > 0) { - global_next_timeout_signal = SIGKILL; - SetTimeout(opt.kill_delay_secs); +static void HandleSignal(int signum, void (*handler)(int)) { + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = handler; + if (sigemptyset(&sa.sa_mask) < 0) { + DIE("sigemptyset"); } -} - -static void ForwardSignal(int signum) { - if (global_child_pid > 0) { - kill(global_child_pid, signum); + if (sigaction(signum, &sa, NULL) < 0) { + DIE("sigaction"); } } -static void SetupSignalHandlers() { - RestoreSignalHandlersAndMask(); - - for (int signum = 1; signum < NSIG; signum++) { - switch (signum) { - // Some signals should indeed kill us and not be forwarded to the child, - // thus we can use the default handler. - case SIGABRT: - case SIGBUS: - case SIGFPE: - case SIGILL: - case SIGSEGV: - case SIGSYS: - case SIGTRAP: - break; - // It's fine to use the default handler for SIGCHLD, because we use - // waitpid() in the main loop to wait for our child to die anyway. - case SIGCHLD: - break; - // One does not simply install a signal handler for these two signals - case SIGKILL: - case SIGSTOP: - break; - // Ignore SIGTTIN and SIGTTOU, as we hand off the terminal to the child in - // SpawnChild() later. - case SIGTTIN: - case SIGTTOU: - IgnoreSignal(signum); - break; - // We need a special signal handler for this if we use a timeout. - case SIGALRM: - if (opt.timeout_secs > 0) { - InstallSignalHandler(signum, OnTimeout); - } else { - InstallSignalHandler(signum, ForwardSignal); - } - break; - // All other signals should be forwarded to the child. - default: - InstallSignalHandler(signum, ForwardSignal); - break; - } +static void OnTimeout(int sig) { + global_signal = sig; + kill(global_child_pid, global_next_timeout_signal); + if (global_next_timeout_signal == SIGTERM && opt.kill_delay_secs > 0) { + global_next_timeout_signal = SIGKILL; + alarm(opt.kill_delay_secs); } } -static int SpawnPid1() { +static void SpawnPid1() { const int kStackSize = 1024 * 1024; std::vector child_stack(kStackSize); @@ -193,13 +160,13 @@ static int SpawnPid1() { // We use clone instead of unshare, because unshare sometimes fails with // EINVAL due to a race condition in the Linux kernel (see // https://lkml.org/lkml/2015/7/28/833). - int child_pid = + global_child_pid = clone(Pid1Main, child_stack.data() + kStackSize, clone_flags, sync_pipe); - if (child_pid < 0) { + if (global_child_pid < 0) { DIE("clone"); } - PRINT_DEBUG("linux-sandbox-pid1 has PID %d", child_pid); + PRINT_DEBUG("linux-sandbox-pid1 has PID %d", global_child_pid); // We close the write end of the sync pipe, read a byte and then close the // pipe. This proves to the linux-sandbox-pid1 process that we still existed @@ -215,26 +182,25 @@ static int SpawnPid1() { if (close(sync_pipe[0]) < 0) { DIE("close"); } - - return child_pid; } -static int WaitForPid1(int child_pid) { +static int WaitForPid1() { int err, status; do { - err = waitpid(child_pid, &status, 0); + err = waitpid(global_child_pid, &status, 0); } while (err < 0 && errno == EINTR); if (err < 0) { DIE("waitpid"); } - if (global_timeout_occurred) { + if (global_signal > 0) { // The child exited because we killed it due to receiving a signal // ourselves. Do not trust the exitcode in this case, just calculate it from // the signal. - PRINT_DEBUG("child exited due to timeout"); - return 128 + SIGALRM; + PRINT_DEBUG("child exited due to us catching signal: %s", + strsignal(global_signal)); + return 128 + global_signal; } else if (WIFSIGNALED(status)) { PRINT_DEBUG("child exited due to receiving signal: %s", strsignal(WTERMSIG(status))); @@ -245,14 +211,48 @@ static int WaitForPid1(int child_pid) { } } +static void Redirect(const std::string &target_path, int fd) { + if (!target_path.empty() && target_path != "-") { + const int flags = O_WRONLY | O_CREAT | O_TRUNC | O_APPEND; + int fd_out = open(target_path.c_str(), flags, 0666); + if (fd_out < 0) { + DIE("open(%s)", target_path.c_str()); + } + // If we were launched with less than 3 fds (stdin, stdout, stderr) open, + // but redirection is still requested via a command-line flag, something is + // wacky and the following code would not do what we intend to do, so let's + // bail. + if (fd_out < 3) { + DIE("open(%s) returned a handle that is reserved for stdin / stdout / " + "stderr", + target_path.c_str()); + } + if (dup2(fd_out, fd) < 0) { + DIE("dup2()"); + } + if (close(fd_out) < 0) { + DIE("close()"); + } + } +} + int main(int argc, char *argv[]) { - KillMeWhenMyParentDies(SIGKILL); - DropPrivileges(); + // Ask the kernel to kill us with SIGKILL if our parent dies. + if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) { + DIE("prctl"); + } + ParseOptions(argc, argv); Redirect(opt.stdout_path, STDOUT_FILENO); Redirect(opt.stderr_path, STDERR_FILENO); + // This should never be called as a setuid binary, drop privileges just in + // case. We don't need to be root, because we use user namespaces anyway. + if (setuid(getuid()) < 0) { + DIE("setuid"); + } + global_outer_uid = getuid(); global_outer_gid = getgid(); @@ -260,12 +260,11 @@ int main(int argc, char *argv[]) { // file handles from our parent. CloseFds(); - SetupSignalHandlers(); - global_child_pid = SpawnPid1(); - + HandleSignal(SIGALRM, OnTimeout); if (opt.timeout_secs > 0) { - SetTimeout(opt.timeout_secs); + alarm(opt.timeout_secs); } - return WaitForPid1(global_child_pid); + SpawnPid1(); + return WaitForPid1(); } diff --git a/src/main/tools/process-tools.c b/src/main/tools/process-tools.c new file mode 100644 index 0000000000..9dddfee5f5 --- /dev/null +++ b/src/main/tools/process-tools.c @@ -0,0 +1,151 @@ +// Copyright 2015 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "process-tools.h" + +int SwitchToEuid() { + int uid = getuid(); + int euid = geteuid(); + if (uid != euid) { + CHECK_CALL(setreuid(euid, euid)); + } + return euid; +} + +int SwitchToEgid() { + int gid = getgid(); + int egid = getegid(); + if (gid != egid) { + CHECK_CALL(setregid(egid, egid)); + } + return egid; +} + +void Redirect(const char *target_path, int fd, const char *name) { + if (target_path != NULL && strcmp(target_path, "-") != 0) { + int fd_out; + const int flags = O_WRONLY | O_CREAT | O_TRUNC | O_APPEND; + CHECK_CALL(fd_out = open(target_path, flags, 0666)); + CHECK_CALL(dup2(fd_out, fd)); + CHECK_CALL(close(fd_out)); + } +} + +void RedirectStdout(const char *stdout_path) { + Redirect(stdout_path, STDOUT_FILENO, "stdout"); +} + +void RedirectStderr(const char *stderr_path) { + Redirect(stderr_path, STDERR_FILENO, "stderr"); +} + +void KillEverything(int pgrp, bool gracefully, double graceful_kill_delay) { + if (gracefully) { + kill(-pgrp, SIGTERM); + + // Round up fractional seconds in this polling implementation. + int kill_delay = (int)(ceil(graceful_kill_delay)); + + // If the process is still alive, give it some time to die gracefully. + while (kill_delay-- > 0 && kill(-pgrp, 0) == 0) { + sleep(1); + } + } + + kill(-pgrp, SIGKILL); +} + +void HandleSignal(int sig, void (*handler)(int)) { + struct sigaction sa = {.sa_handler = handler}; + CHECK_CALL(sigemptyset(&sa.sa_mask)); + CHECK_CALL(sigaction(sig, &sa, NULL)); +} + +void UnHandle(int sig) { + switch (sig) { + case SIGSTOP: + case SIGKILL: + // These signals can't be handled, so they'll always have a valid default + // handler. In fact, even trying to install SIG_DFL again will result in + // EINVAL, so we'll just not do anything for these. + return; + default: + HandleSignal(sig, SIG_DFL); + } +} + +void ClearSignalMask() { + // Use an empty signal mask for the process. + sigset_t empty_sset; + CHECK_CALL(sigemptyset(&empty_sset)); + CHECK_CALL(sigprocmask(SIG_SETMASK, &empty_sset, NULL)); + + // Set the default signal handler for all signals. + for (int i = 1; i < NSIG; ++i) { + if (i == SIGKILL || i == SIGSTOP) { + continue; + } + struct sigaction sa = {.sa_handler = SIG_DFL}; + CHECK_CALL(sigemptyset(&sa.sa_mask)); + // Ignore possible errors, because we might not be allowed to set the + // handler for certain signals, but we still want to try. + sigaction(i, &sa, NULL); + } +} + +void SetTimeout(double timeout_secs) { + if (timeout_secs <= 0) { + return; + } + + double int_val, fraction_val; + fraction_val = modf(timeout_secs, &int_val); + + struct itimerval timer; + timer.it_interval.tv_sec = 0; + timer.it_interval.tv_usec = 0; + timer.it_value.tv_sec = (long)int_val, + timer.it_value.tv_usec = (long)(fraction_val * 1e6); + + CHECK_CALL(setitimer(ITIMER_REAL, &timer, NULL)); +} + +int WaitChild(pid_t pid, const char *name) { + int err, status; + + do { + err = waitpid(pid, &status, 0); + } while (err == -1 && errno == EINTR); + + if (err == -1) { + DIE("wait on %s (pid %d) failed\n", name, pid); + } + + return status; +} diff --git a/src/main/tools/process-tools.h b/src/main/tools/process-tools.h index 0244ebb816..a0ba38816d 100644 --- a/src/main/tools/process-tools.h +++ b/src/main/tools/process-tools.h @@ -15,66 +15,71 @@ #ifndef PROCESS_TOOLS_H__ #define PROCESS_TOOLS_H__ -#include -#include +#include +#include +// see +// http://stackoverflow.com/questions/5641427/how-to-make-preprocessor-generate-a-string-for-line-keyword #define S(x) #x #define S_(x) S(x) #define S__LINE__ S_(__LINE__) -#define DIE(...) \ - { \ - fprintf(stderr, __FILE__ ":" S__LINE__ ": \"" __VA_ARGS__); \ - fprintf(stderr, "\": "); \ - perror(nullptr); \ - exit(EXIT_FAILURE); \ +#define DIE(args...) \ + { \ + fprintf(stderr, __FILE__ ":" S__LINE__ ": " args); \ + exit(EXIT_FAILURE); \ } -#define PRINT_DEBUG(...) \ - do { \ - if (opt.debug) { \ - fprintf(stderr, __FILE__ ":" S__LINE__ ": " __VA_ARGS__); \ - fprintf(stderr, "\n"); \ - } \ - } while (0) +#define CHECK_CALL(x) \ + if ((x) == -1) { \ + fprintf(stderr, __FILE__ ":" S__LINE__ ": "); \ + perror(#x); \ + exit(EXIT_FAILURE); \ + } -// Set the effective and saved uid / gid to the real uid / gid. -void DropPrivileges(); +#define CHECK_NOT_NULL(x) \ + if (x == NULL) { \ + perror(#x); \ + exit(EXIT_FAILURE); \ + } -// Redirect the open file descriptor fd to the file target_path. Do nothing if -// target_path is '-'. -void Redirect(const std::string &target_path, int fd); +// Switch completely to the effective uid. +// Some programs (notably, bash) ignore the euid and just use the uid. This +// limits the ability for us to use process-wrapper as a setuid binary for +// security/user-isolation. +int SwitchToEuid(); -// Write formatted contents into the file filename. -void WriteFile(const std::string &filename, const char *fmt, ...); +// Switch completely to the effective gid. +int SwitchToEgid(); -// Receive SIGALRM after the given timeout. timeout_secs must be positive. -void SetTimeout(double timeout_secs); +// Redirect stdout to the file stdout_path (but not if stdout_path is "-"). +void RedirectStdout(const char *stdout_path); -// Installs a signal handler for signum and sets all signals to block during -// that signal. -void InstallSignalHandler(int signum, void (*handler)(int)); +// Redirect stderr to the file stdout_path (but not if stderr_path is "-"). +void RedirectStderr(const char *stderr_path); -// Sets the signal handler of signum to SIG_IGN. -void IgnoreSignal(int signum); +// Make sure the process group "pgrp" and all its subprocesses are killed. +// If "gracefully" is true, sends SIGTERM first and after a timeout of +// "graceful_kill_delay" seconds, sends SIGKILL. +// If not, send SIGKILL immediately. +void KillEverything(int pgrp, bool gracefully, double graceful_kill_delay); -// Reset the signal mask and restore the default handler for all signals. -void RestoreSignalHandlersAndMask(); +// Set up a signal handler for a signal. +void HandleSignal(int sig, void (*handler)(int)); -// Ask the kernel to kill us with signum if our parent dies. -void KillMeWhenMyParentDies(int signum); +// Revert signal handler for a signal to the default. +void UnHandle(int sig); -// This is the magic that makes waiting for all children (even grandchildren) -// work. By becoming a subreaper, all grandchildren that are not waited for by -// our direct child will be reparented to us, which allows us to wait for them. -void BecomeSubreaper(); +// Use an empty signal mask for the process and set all signal handlers to their +// default. +void ClearSignalMask(); -// Forks and execvp's the process specified in args in its own process group. -// Returns the pid of the spawned process. -int SpawnCommand(const std::vector &args); +// Receive SIGALRM after the given timeout. No-op if the timeout is +// non-positive. +void SetTimeout(double timeout_secs); -// Waits for child_pid to exit, then kills all remaining (grand)children, waits -// for them to exit, then returns the exitcode of child_pid. -int WaitForChild(int child_pid); +// Wait for "pid" to exit and return its exit code. +// "name" is used for the error message only. +int WaitChild(pid_t pid, const char *name); #endif // PROCESS_TOOLS_H__ diff --git a/src/main/tools/process-wrapper.c b/src/main/tools/process-wrapper.c new file mode 100644 index 0000000000..07ae840a52 --- /dev/null +++ b/src/main/tools/process-wrapper.c @@ -0,0 +1,169 @@ +// Copyright 2014 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// process-wrapper runs a subprocess with a given timeout (optional), +// redirecting stdout and stderr to given files. Upon exit, whether +// from normal termination or timeout, the subprocess (and any of its children) +// is killed. +// +// The exit status of this program is whatever the child process returned, +// unless process-wrapper receives a signal. ie, on SIGTERM this program will +// die with raise(SIGTERM) even if the child process handles SIGTERM with +// exit(0). + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "process-tools.h" + +// Not in headers on OSX. +extern char **environ; + +static double global_kill_delay; +static int global_child_pid; +static volatile sig_atomic_t global_signal; + +// Options parsing result. +struct Options { + double timeout_secs; + double kill_delay_secs; + const char *stdout_path; + const char *stderr_path; + char *const *args; +}; + +// Print out a usage error. argc and argv are the argument counter and vector, +// fmt is a format, +// string for the error message to print. +static void Usage(char *const *argv) { + fprintf(stderr, + "Usage: %s " + " [args] ...\n", + argv[0]); + exit(EXIT_FAILURE); +} + +// Parse the command line flags and return the result in an Options structure +// passed as argument. +static void ParseCommandLine(int argc, char *const *argv, struct Options *opt) { + if (argc <= 5) { + Usage(argv); + } + + argv++; + if (sscanf(*argv++, "%lf", &opt->timeout_secs) != 1) { + DIE("timeout_secs is not a real number.\n"); + } + if (sscanf(*argv++, "%lf", &opt->kill_delay_secs) != 1) { + DIE("kill_delay_secs is not a real number.\n"); + } + opt->stdout_path = *argv++; + opt->stderr_path = *argv++; + opt->args = argv; +} + +// Called when timeout or signal occurs. +void OnSignal(int sig) { + global_signal = sig; + + // Nothing to do if we received a signal before spawning the child. + if (global_child_pid == -1) { + return; + } + + if (sig == SIGALRM) { + // SIGALRM represents a timeout, so we should give the process a bit of + // time to die gracefully if it needs it. + KillEverything(global_child_pid, true, global_kill_delay); + } else { + // Signals should kill the process quickly, as it's typically blocking + // the return of the prompt after a user hits "Ctrl-C". + KillEverything(global_child_pid, false, global_kill_delay); + } +} + +// Run the command specified by the argv array and kill it after timeout +// seconds. +static void SpawnCommand(char *const *argv, double timeout_secs) { + CHECK_CALL(global_child_pid = fork()); + if (global_child_pid == 0) { + // In child. + CHECK_CALL(setsid()); + ClearSignalMask(); + + // Force umask to include read and execute for everyone, to make + // output permissions predictable. + umask(022); + + // Does not return unless something went wrong. + execvp(argv[0], argv); + err(EXIT_FAILURE, "execvp(\"%s\", ...)", argv[0]); + } else { + // In parent. + + // Set up a signal handler which kills all subprocesses when the given + // signal is triggered. + HandleSignal(SIGALRM, OnSignal); + HandleSignal(SIGTERM, OnSignal); + HandleSignal(SIGINT, OnSignal); + SetTimeout(timeout_secs); + + int status = WaitChild(global_child_pid, argv[0]); + + // The child is done for, but may have grandchildren that we still have to + // kill. + kill(-global_child_pid, SIGKILL); + + if (global_signal > 0) { + // Don't trust the exit code if we got a timeout or signal. + UnHandle(global_signal); + raise(global_signal); + } else if (WIFEXITED(status)) { + exit(WEXITSTATUS(status)); + } else { + int sig = WTERMSIG(status); + UnHandle(sig); + raise(sig); + } + } +} + +int main(int argc, char *argv[]) { + struct Options opt; + memset(&opt, 0, sizeof(opt)); + + ParseCommandLine(argc, argv, &opt); + global_kill_delay = opt.kill_delay_secs; + + SwitchToEuid(); + SwitchToEgid(); + + RedirectStdout(opt.stdout_path); + RedirectStderr(opt.stderr_path); + + SpawnCommand(opt.args, opt.timeout_secs); + + return 0; +} diff --git a/src/main/tools/process-wrapper.cc b/src/main/tools/process-wrapper.cc deleted file mode 100644 index 9d02eae79f..0000000000 --- a/src/main/tools/process-wrapper.cc +++ /dev/null @@ -1,186 +0,0 @@ -// Copyright 2014 The Bazel Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// process-wrapper runs a subprocess with a given timeout (optional), -// redirecting stdout and stderr to given files. Upon exit, whether -// from normal termination or timeout, the subprocess (and any of its children) -// is killed. -// -// The exit status of this program is whatever the child process returned, -// unless process-wrapper receives a signal. ie, on SIGTERM this program will -// die with raise(SIGTERM) even if the child process handles SIGTERM with -// exit(0). - -#include "src/main/tools/process-tools.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -using std::vector; - -// Not in headers on OSX. -extern char **environ; - -// The pid of the spawned child process. -static volatile sig_atomic_t global_child_pid; - -// The signal that will be sent to the child when a timeout occurs. -static volatile sig_atomic_t global_next_timeout_signal = SIGTERM; - -// Whether the child was killed due to a timeout. -static volatile sig_atomic_t global_timeout_occurred; - -// Options parsing result. -struct Options { - double timeout_secs; - double kill_delay_secs; - std::string stdout_path; - std::string stderr_path; - bool debug; - vector args; -}; - -static struct Options opt; - -// Print out a usage error and exit with EXIT_FAILURE. -static void Usage(char *program_name) { - fprintf(stderr, - "Usage: %s " - " [args] ...\n", - program_name); - exit(EXIT_FAILURE); -} - -// Parse the command line flags and put the results in the global opt variable. -static void ParseCommandLine(vector args) { - if (args.size() <= 5) { - Usage(args.front()); - } - - int optind = 1; - - if (sscanf(args[optind++], "%lf", &opt.timeout_secs) != 1) { - DIE("timeout_secs is not a real number.\n"); - } - if (sscanf(args[optind++], "%lf", &opt.kill_delay_secs) != 1) { - DIE("kill_delay_secs is not a real number.\n"); - } - opt.stdout_path.assign(args[optind++]); - opt.stderr_path.assign(args[optind++]); - opt.args.assign(args.begin() + optind, args.end()); - - // argv[] passed to execve() must be a null-terminated array. - opt.args.push_back(nullptr); -} - -static void OnTimeout(int signum) { - global_timeout_occurred = true; - kill(-global_child_pid, global_next_timeout_signal); - if (global_next_timeout_signal == SIGTERM && opt.kill_delay_secs > 0) { - global_next_timeout_signal = SIGKILL; - SetTimeout(opt.kill_delay_secs); - } -} - -static void ForwardSignal(int signum) { - if (global_child_pid > 0) { - kill(-global_child_pid, signum); - } -} - -static void SetupSignalHandlers() { - RestoreSignalHandlersAndMask(); - - for (int signum = 1; signum < NSIG; signum++) { - switch (signum) { - // Some signals should indeed kill us and not be forwarded to the child, - // thus we can use the default handler. - case SIGABRT: - case SIGBUS: - case SIGFPE: - case SIGILL: - case SIGSEGV: - case SIGSYS: - case SIGTRAP: - break; - // It's fine to use the default handler for SIGCHLD, because we use wait() - // in the main loop to wait for children to die anyway. - case SIGCHLD: - break; - // One does not simply install a signal handler for these two signals - case SIGKILL: - case SIGSTOP: - break; - // Ignore SIGTTIN and SIGTTOU, as we hand off the terminal to the child in - // SpawnChild(). - case SIGTTIN: - case SIGTTOU: - IgnoreSignal(signum); - break; - // We need a special signal handler for this if we use a timeout. - case SIGALRM: - if (opt.timeout_secs > 0) { - InstallSignalHandler(signum, OnTimeout); - } else { - InstallSignalHandler(signum, ForwardSignal); - } - break; - // All other signals should be forwarded to the child. - default: - InstallSignalHandler(signum, ForwardSignal); - break; - } - } -} - -int main(int argc, char *argv[]) { - KillMeWhenMyParentDies(SIGTERM); - DropPrivileges(); - - vector args(argv, argv + argc); - ParseCommandLine(args); - - Redirect(opt.stdout_path, STDOUT_FILENO); - Redirect(opt.stderr_path, STDERR_FILENO); - - SetupSignalHandlers(); - BecomeSubreaper(); - global_child_pid = SpawnCommand(opt.args); - - if (opt.timeout_secs > 0) { - SetTimeout(opt.timeout_secs); - } - - int exitcode = WaitForChild(global_child_pid); - if (global_timeout_occurred) { - return 128 + SIGALRM; - } - - return exitcode; -} diff --git a/src/test/shell/bazel/process-wrapper_test.sh b/src/test/shell/bazel/process-wrapper_test.sh index 18903d3bfc..b720ab8dbd 100755 --- a/src/test/shell/bazel/process-wrapper_test.sh +++ b/src/test/shell/bazel/process-wrapper_test.sh @@ -105,7 +105,7 @@ function test_execvp_error_message() { local code=0 $process_wrapper -1 0 $OUT $ERR /bin/notexisting &> $TEST_log || code=$? assert_equals 1 "$code" - assert_contains "\"execvp(/bin/notexisting, [[:alnum:]]\+)\": No such file or directory" "$ERR" + assert_contains "execvp(\"/bin/notexisting\", ...): No such file or directory" "$ERR" } run_suite "process-wrapper" -- cgit v1.2.3