#define _GNU_SOURCE // Copyright 2014 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int global_debug = 0; static int global_cpid; // Returned by fork() #define PRINT_DEBUG(...) do { if (global_debug) {fprintf(stderr, "sandbox.c: " __VA_ARGS__);}} while(0) #define CHECK_CALL(x) if ((x) == -1) { perror(#x); exit(1); } #define CHECK_NOT_NULL(x) if (x == NULL) { perror(#x); exit(1); } #define DIE() do { fprintf(stderr, "Error in %d\n", __LINE__); exit(-1); } while(0); int kChildrenCleanupDelay = 1; void Usage() { fprintf(stderr, "Usage: ./sandbox [-R sandbox-root] [-m mount] -C command arg1\n" "Mandatory arguments:\n" " -C command to run inside sandbox, followed by arguments\n" " -S directory which will become the root of the sandbox\n" "\n" "Optional arguments:\n" " -t absolute path to bazel tools directory\n" " -T timeout after which sandbox will be terminated\n" " -m system directory to mount inside the sandbox\n" " Multiple directories can be specified and each of them will\n" " be mount as readonly\n" " -D if set, debug info will be printed\n"); exit(1); } void PropagateSignals(); void EnableAlarm(); void SetupSlashDev(); static volatile sig_atomic_t global_signal_received = 0; int main(int argc, char *argv[]) { char *include_prefix = NULL; char *sandbox_root = NULL; char *tools = NULL; char **mounts = malloc(argc * sizeof(char*)); char **includes = malloc(argc * sizeof(char*)); int num_mounts = 0; int num_includes = 0; int iArg = 0; int uid = getuid(); int gid = getgid(); int timeout = 0; for (iArg = 1; iArg < argc - 1; iArg++) { if (strlen(argv[iArg]) != 2) { Usage(); } if (argv[iArg][0] != '-') { Usage(); } switch (argv[iArg][1]) { case 'S': if (sandbox_root == NULL) { sandbox_root = argv[++iArg]; } else { fprintf(stderr, "Multiple sandbox roots (-S) specified (expected one).\n"); Usage(); } break; case 'm': mounts[num_mounts++] = argv[++iArg]; break; case 'D': global_debug = 1; break; case 'T': sscanf(argv[iArg], "%d", &timeout); break; case 'N': include_prefix = argv[++iArg]; break; case 'n': includes[num_includes++] = argv[++iArg]; break; case 'C': iArg++; goto parsing_finished; case 't': tools = argv[++iArg]; break; default: fprintf(stderr, "Unrecognized argument: %s\n", argv[iArg]); Usage(); } } parsing_finished: if (iArg == argc) { fprintf(stderr, "No command specified.\n"); Usage(); } if (timeout < 0) { fprintf(stderr, "Invalid timeout (-T) value: %d", timeout); Usage(); } // parsed all arguments, now prepare sandbox PRINT_DEBUG("%s\n", sandbox_root); // create new namespaces in which this process and its children will live CHECK_CALL(unshare(CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER)); CHECK_CALL(mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)); // mount sandbox and go there CHECK_CALL(mount(sandbox_root, sandbox_root, NULL, MS_BIND | MS_NOSUID, NULL)); CHECK_CALL(chdir(sandbox_root)); SetupSlashDev(); // mount blaze specific directories - tools/ and build-runfiles/ if (tools != NULL) { PRINT_DEBUG("tools: %s\n", tools); CHECK_CALL(mkdir("tools", 0755)); CHECK_CALL(mount(tools, "tools", NULL, MS_BIND | MS_RDONLY, NULL)); } // mounts passed in argv; those are mostly dirs for shared libs for (int i = 0; i < num_mounts; i++) { CHECK_CALL(mount(mounts[i], mounts[i] + 1, NULL, MS_BIND | MS_RDONLY, NULL)); } // c++ compilation // headers go in separate directory if (include_prefix != NULL) { CHECK_CALL(chdir(include_prefix)); for (int i = 0; i < num_includes; i++) { // TODO(bazel-team) sometimes list of -iquote given by bazel contains // invalid (non-existing) entries, ideally we would like not to have them PRINT_DEBUG("include: %s\n", includes[i]); if (mount(includes[i], includes[i] + 1 , NULL, MS_BIND, NULL) > -1) { continue; } if (errno == ENOENT) { continue; } CHECK_CALL(-1); } CHECK_CALL(chdir("..")); } // set group and user mapping from outer namespace to inner: // no changes in the parent, be root in the child int uid_fd, gid_fd; char uid_mapping[64], gid_mapping[64]; sprintf(uid_mapping, "0 %d 1\n", uid); sprintf(gid_mapping, "0 %d 1\n", gid); uid_fd = open("/proc/self/uid_map", O_WRONLY); CHECK_CALL(uid_fd); CHECK_CALL(write(uid_fd, uid_mapping, strlen(uid_mapping))); CHECK_CALL(close(uid_fd)); gid_fd = open("/proc/self/gid_map", O_WRONLY); CHECK_CALL(gid_fd); CHECK_CALL(write(gid_fd, gid_mapping, strlen(gid_mapping))); CHECK_CALL(close(gid_fd)); CHECK_CALL(setresuid(0, 0, 0)); CHECK_CALL(setresgid(0, 0, 0)); CHECK_CALL(mkdir("proc", 0755)); CHECK_CALL(mount("/proc", "proc", NULL, MS_REC | MS_BIND, NULL)); // make sandbox actually hermetic: // move the real root to old_root, then detach it char old_root[16] = "old-root-XXXXXX"; CHECK_NOT_NULL(mkdtemp(old_root)); // pivot_root has no wrapper in libc, so we need syscall() CHECK_CALL(syscall(SYS_pivot_root, ".", old_root)); CHECK_CALL(chroot(".")); CHECK_CALL(umount2(old_root, MNT_DETACH)); CHECK_CALL(rmdir(old_root)); free(mounts); free(includes); for (int i = iArg; i < argc; i += 1) { PRINT_DEBUG("arg: %s\n", argv[i]); } // spawn child and wait until it finishes global_cpid = fork(); if (global_cpid == 0) { CHECK_CALL(setpgid(0, 0)); // if the execvp below fails with "No such file or directory" it means that: // a) the binary is not in the sandbox (which means it wasn't included in // the inputs) // b) the binary uses shared library which is not inside sandbox - you can // check for that by running "ldd ./a.out" (by default directories // starting with /lib* and /usr/lib* should be there) // c) the binary uses elf interpreter which is not inside sandbox - you can // check for that by running "readelf -a a.out | grep interpreter" (the // sandbox code assumes that it is either in /lib*/ or /usr/lib*/) CHECK_CALL(execvp(argv[iArg], argv + iArg)); PRINT_DEBUG("Exec failed near %s:%d\n", __FILE__, __LINE__); exit(1); } else { // PARENT // make sure that all signals propagate to children (mostly useful to kill // entire sandbox) PropagateSignals(); // after given timeout, kill children EnableAlarm(timeout); int status = 0; while (1) { PRINT_DEBUG("Waiting for the child...\n"); pid_t pid = wait(&status); if (global_signal_received) { PRINT_DEBUG("Received signal: %s\n", strsignal(global_signal_received)); CHECK_CALL(killpg(global_cpid, global_signal_received)); // give children some time for cleanup before they terminate sleep(kChildrenCleanupDelay); CHECK_CALL(killpg(global_cpid, SIGKILL)); exit(128 | global_signal_received); } if (errno == EINTR) { continue; } if (pid < 0) { perror("Wait failed:"); exit(1); } if (WIFEXITED(status)) { PRINT_DEBUG("Child exited with status: %d\n", WEXITSTATUS(status)); exit(WEXITSTATUS(status)); } if (WIFSIGNALED(status)) { PRINT_DEBUG("Child terminated by a signal: %d\n", WTERMSIG(status)); exit(WEXITSTATUS(status)); } if (WIFSTOPPED(status)) { PRINT_DEBUG("Child stopped by a signal: %d\n", WSTOPSIG(status)); } } } return 0; } void SignalHandler(int signum, siginfo_t *info, void *uctxt) { global_signal_received = signum; } void PropagateSignals() { // propagate some signals received by the parent to processes in sandbox, so // that it's easier to terminate entire sandbox struct sigaction action = {}; action.sa_flags = SA_SIGINFO; action.sa_sigaction = SignalHandler; // handle all signals that could terminate the process int signals[] = {SIGHUP, SIGINT, SIGKILL, SIGPIPE, SIGALRM, SIGTERM, SIGPOLL, SIGPROF, SIGVTALRM, // signals below produce core dump by default, however at the moment we'll // just terminate SIGQUIT, SIGILL, SIGABRT, SIGFPE, SIGSEGV, SIGBUS, SIGSYS, SIGTRAP, SIGXCPU, SIGXFSZ, -1}; for (int *p = signals; *p != -1; p++) { sigaction(*p, &action, NULL); } } void SetupSlashDev() { CHECK_CALL(mkdir("dev", 0755)); const char *devs[] = { "/dev/null", "/dev/random", "/dev/urandom", "/dev/zero", NULL }; for (int i = 0; devs[i] != NULL; i++) { // open+close to create the file, which will become mount point for actual // device int handle = open(devs[i] + 1, O_CREAT | O_RDONLY, 0644); CHECK_CALL(handle); CHECK_CALL(close(handle)); CHECK_CALL(mount(devs[i], devs[i] + 1, NULL, MS_BIND, NULL)); } } void EnableAlarm(int timeout) { if (timeout <= 0) return; struct itimerval timer = {}; timer.it_value.tv_sec = (long) timeout; CHECK_CALL(setitimer(ITIMER_REAL, &timer, NULL)); }