#define _GNU_SOURCE // Copyright 2014 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int global_debug = 0; #define PRINT_DEBUG(...) do { if (global_debug) {fprintf(stderr, "sandbox.c: " __VA_ARGS__);}} while(0) #define CHECK_CALL(x) if ((x) == -1) { perror(#x); exit(1); } #define CHECK_NOT_NULL(x) if (x == NULL) { perror(#x); exit(1); } #define DIE() do { fprintf(stderr, "Error in %d\n", __LINE__); exit(-1); } while(0); const int kChildrenCleanupDelay = 1; static volatile sig_atomic_t global_signal_received = 0; // // Options parsing result // struct Options { char **args; // Command to run (-C / --) char *include_prefix; // Include prefix (-N) char *sandbox_root; // Sandbox root (-S) char *tools; // tools directory (-t) char **mounts; // List of directories to mount (-m) char **includes; // List of include directories (-n) int num_mounts; // size of mounts int num_includes; // size of includes int timeout; // Timeout (-T) }; // Print out a usage error. argc and argv are the argument counter // and vector, fmt is a format string for the error message to print. void Usage(int argc, char **argv, char *fmt, ...); // Parse the command line flags and return the result in an // Options structure passed as argument. void ParseCommandLine(int argc, char **argv, struct Options *opt); // Signal hanlding void PropagateSignals(); void EnableAlarm(); // Sandbox setup void SetupDirectories(struct Options* opt); void SetupSlashDev(); void SetupUserNamespace(int uid, int gid); void ChangeRoot(); // Write the file "filename" using a format string specified by "fmt". // Returns -1 on failure. int WriteFile(const char *filename, const char *fmt, ...); // Run the command specified by the argv array and kill it after // timeout seconds. void SpawnCommand(char **argv, int timeout); int main(int argc, char *argv[]) { struct Options opt = { .args = NULL, .include_prefix = NULL, .sandbox_root = NULL, .tools = NULL, .mounts = calloc(argc, sizeof(char*)), .includes = calloc(argc, sizeof(char*)), .num_mounts = 0, .num_includes = 0, .timeout = 0 }; ParseCommandLine(argc, argv, &opt); int uid = getuid(); int gid = getgid(); // parsed all arguments, now prepare sandbox PRINT_DEBUG("%s\n", opt.sandbox_root); // create new namespaces in which this process and its children will live CHECK_CALL(unshare(CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER)); CHECK_CALL(mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)); // Create the sandbox directory layout SetupDirectories(&opt); // Set the user namespace (user_namespaces(7)) SetupUserNamespace(uid, gid); // make sandbox actually hermetic: ChangeRoot(); // Finally call the command free(opt.mounts); free(opt.includes); SpawnCommand(opt.args, opt.timeout); return 0; } void SpawnCommand(char **argv, int timeout) { for (int i = 0; argv[i] != NULL; i++) { PRINT_DEBUG("arg: %s\n", argv[i]); } // spawn child and wait until it finishes pid_t cpid = fork(); if (cpid == 0) { CHECK_CALL(setpgid(0, 0)); // if the execvp below fails with "No such file or directory" it means that: // a) the binary is not in the sandbox (which means it wasn't included in // the inputs) // b) the binary uses shared library which is not inside sandbox - you can // check for that by running "ldd ./a.out" (by default directories // starting with /lib* and /usr/lib* should be there) // c) the binary uses elf interpreter which is not inside sandbox - you can // check for that by running "readelf -a a.out | grep interpreter" (the // sandbox code assumes that it is either in /lib*/ or /usr/lib*/) CHECK_CALL(execvp(argv[0], argv)); PRINT_DEBUG("Exec failed near %s:%d\n", __FILE__, __LINE__); exit(1); } else { // PARENT // make sure that all signals propagate to children (mostly useful to kill // entire sandbox) PropagateSignals(); // after given timeout, kill children EnableAlarm(timeout); int status = 0; while (1) { PRINT_DEBUG("Waiting for the child...\n"); pid_t pid = wait(&status); if (global_signal_received) { PRINT_DEBUG("Received signal: %s\n", strsignal(global_signal_received)); CHECK_CALL(killpg(cpid, global_signal_received)); // give children some time for cleanup before they terminate sleep(kChildrenCleanupDelay); CHECK_CALL(killpg(cpid, SIGKILL)); exit(128 | global_signal_received); } if (errno == EINTR) { continue; } if (pid < 0) { perror("Wait failed:"); exit(1); } if (WIFEXITED(status)) { PRINT_DEBUG("Child exited with status: %d\n", WEXITSTATUS(status)); exit(WEXITSTATUS(status)); } if (WIFSIGNALED(status)) { PRINT_DEBUG("Child terminated by a signal: %d\n", WTERMSIG(status)); exit(WEXITSTATUS(status)); } if (WIFSTOPPED(status)) { PRINT_DEBUG("Child stopped by a signal: %d\n", WSTOPSIG(status)); } } } } int WriteFile(const char *filename, const char *fmt, ...) { int r; va_list ap; FILE *stream = fopen(filename, "w"); if (stream == NULL) { return -1; } va_start(ap, fmt); r = vfprintf(stream, fmt, ap); va_end(ap); if (r >= 0) { r = fclose(stream); } return r; } // // Signal handling // void SignalHandler(int signum, siginfo_t *info, void *uctxt) { global_signal_received = signum; } void PropagateSignals() { // propagate some signals received by the parent to processes in sandbox, so // that it's easier to terminate entire sandbox struct sigaction action = {}; action.sa_flags = SA_SIGINFO; action.sa_sigaction = SignalHandler; // handle all signals that could terminate the process int signals[] = {SIGHUP, SIGINT, SIGKILL, SIGPIPE, SIGALRM, SIGTERM, SIGPOLL, SIGPROF, SIGVTALRM, // signals below produce core dump by default, however at the moment we'll // just terminate SIGQUIT, SIGILL, SIGABRT, SIGFPE, SIGSEGV, SIGBUS, SIGSYS, SIGTRAP, SIGXCPU, SIGXFSZ, -1}; for (int *p = signals; *p != -1; p++) { sigaction(*p, &action, NULL); } } void EnableAlarm(int timeout) { if (timeout <= 0) return; struct itimerval timer = {}; timer.it_value.tv_sec = (long) timeout; CHECK_CALL(setitimer(ITIMER_REAL, &timer, NULL)); } // // Sandbox setup // void SetupSlashDev() { CHECK_CALL(mkdir("dev", 0755)); const char *devs[] = { "/dev/null", "/dev/random", "/dev/urandom", "/dev/zero", NULL }; for (int i = 0; devs[i] != NULL; i++) { // open+close to create the file, which will become mount point for actual // device int handle = open(devs[i] + 1, O_CREAT | O_RDONLY, 0644); CHECK_CALL(handle); CHECK_CALL(close(handle)); CHECK_CALL(mount(devs[i], devs[i] + 1, NULL, MS_BIND, NULL)); } } void SetupDirectories(struct Options *opt) { // Mount the sandbox and go there. CHECK_CALL(mount(opt->sandbox_root, opt->sandbox_root, NULL, MS_BIND | MS_NOSUID, NULL)); CHECK_CALL(chdir(opt->sandbox_root)); SetupSlashDev(); // Mount blaze specific directories - tools/ and build-runfiles/. if (opt->tools != NULL) { PRINT_DEBUG("tools: %s\n", opt->tools); CHECK_CALL(mkdir("tools", 0755)); CHECK_CALL(mount(opt->tools, "tools", NULL, MS_BIND | MS_RDONLY, NULL)); } // Mount directories passed in argv; those are mostly dirs for shared libs. for (int i = 0; i < opt->num_mounts; i++) { CHECK_CALL(mount(opt->mounts[i], opt->mounts[i] + 1, NULL, MS_BIND | MS_RDONLY, NULL)); } // C++ compilation // C++ headers go in a separate directory. if (opt->include_prefix != NULL) { CHECK_CALL(chdir(opt->include_prefix)); for (int i = 0; i < opt->num_includes; i++) { // TODO(bazel-team): sometimes list of -iquote given by bazel contains // invalid (non-existing) entries, ideally we would like not to have them PRINT_DEBUG("include: %s\n", opt->includes[i]); if (mount(opt->includes[i], opt->includes[i] + 1 , NULL, MS_BIND, NULL) > -1) { continue; } if (errno == ENOENT) { continue; } CHECK_CALL(-1); } CHECK_CALL(chdir("..")); } CHECK_CALL(mkdir("proc", 0755)); CHECK_CALL(mount("/proc", "proc", NULL, MS_REC | MS_BIND, NULL)); } void SetupUserNamespace(int uid, int gid) { // Disable needs for CAP_SETGID int r = WriteFile("/proc/self/setgroups", "deny"); if (r < 0 && errno != ENOENT) { // Writing to /proc/self/setgroups might fail on earlier // version of linux because setgroups does not exist, ignore. perror("WriteFile(\"/proc/self/setgroups\", \"deny\")"); exit(-1); } // set group and user mapping from outer namespace to inner: // no changes in the parent, be root in the child CHECK_CALL(WriteFile("/proc/self/uid_map", "0 %d 1\n", uid)); CHECK_CALL(WriteFile("/proc/self/gid_map", "0 %d 1\n", gid)); CHECK_CALL(setresuid(0, 0, 0)); CHECK_CALL(setresgid(0, 0, 0)); } void ChangeRoot() { // move the real root to old_root, then detach it char old_root[16] = "old-root-XXXXXX"; CHECK_NOT_NULL(mkdtemp(old_root)); // pivot_root has no wrapper in libc, so we need syscall() CHECK_CALL(syscall(SYS_pivot_root, ".", old_root)); CHECK_CALL(chroot(".")); CHECK_CALL(umount2(old_root, MNT_DETACH)); CHECK_CALL(rmdir(old_root)); } // // Command line parsing // void Usage(int argc, char **argv, char *fmt, ...) { int i; va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); fprintf(stderr, "\nUsage: %s [-S sandbox-root] [-m mount] [-C|--] command arg1\n", argv[0]); fprintf(stderr, " provided:"); for (i = 0; i < argc; i++) { fprintf(stderr, " %s", argv[i]); } fprintf(stderr, "\nMandatory arguments:\n" " [-C|--] command to run inside sandbox, followed by arguments\n" " -S directory which will become the root of the sandbox\n" "\n" "Optional arguments:\n" " -t absolute path to bazel tools directory\n" " -T timeout after which sandbox will be terminated\n" " -m system directory to mount inside the sandbox\n" " Multiple directories can be specified and each of them will\n" " be mount as readonly\n" " -D if set, debug info will be printed\n"); exit(1); } void ParseCommandLine(int argc, char **argv, struct Options *opt) { extern char *optarg; extern int optind, optopt; int c; opt->include_prefix = NULL; opt->sandbox_root = NULL; opt->tools = NULL; opt->mounts = malloc(argc * sizeof(char*)); opt->includes = malloc(argc * sizeof(char*)); opt->num_mounts = 0; opt->num_includes = 0; opt->timeout = 0; while ((c = getopt(argc, argv, "+:S:t:T:m:N:n:DC")) != -1) { switch(c) { case 'S': if (opt->sandbox_root == NULL) { opt->sandbox_root = optarg; } else { Usage(argc, argv, "Multiple sandbox roots (-S) specified (expected one)."); } break; case 'm': opt->mounts[opt->num_mounts++] = optarg; break; case 'D': global_debug = 1; break; case 'T': sscanf(optarg, "%d", &opt->timeout); if (opt->timeout < 0) { Usage(argc, argv, "Invalid timeout (-T) value: %d", opt->timeout); } break; case 'N': opt->include_prefix = optarg; break; case 'n': opt->includes[opt->num_includes++] = optarg; break; case 'C': break; // deprecated, ignore. case 't': opt->tools = optarg; break; case '?': Usage(argc, argv, "Unrecognized argument: -%c (%d)", optopt, optind); break; case ':': Usage(argc, argv, "Flag -%c requires an argument", optopt); break; } } opt->args = argv + optind; if (argc <= optind) { Usage(argc, argv, "No command specified"); } }