aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/main/tools/namespace-sandbox.c
diff options
context:
space:
mode:
authorGravatar Philipp Wollermann <philwo@google.com>2015-08-25 12:52:57 +0000
committerGravatar Lukacs Berki <lberki@google.com>2015-08-26 07:37:05 +0000
commit43c4a1a1452603bfe5e6883626c5ac91ea4e8eb6 (patch)
tree257c9f0f924b5b2cf96c208cd53ba4ff40259aca /src/main/tools/namespace-sandbox.c
parent988bb21407c3abf97100d90cff2b823dd594ef30 (diff)
Execute spawns inside sandboxes to improve hermeticity (spawns can no longer use non-declared inputs) and safety (spawns can no longer affect the host system, e.g. accidentally wipe your home directory). This implementation works on Linux only and uses Linux containers ("namespaces").
The strategy works with all actions that Bazel supports (C++ / Java compilation, genrules, test execution, Skylark-based rules, ...) and in tests, Bazel could successfully bootstrap itself and pass the whole test suite using sandboxed execution. This is not the default behavior yet, but can be activated explicitly by using: bazel build --genrule_strategy=sandboxed --spawn_strategy=sandboxed //my:stuff -- MOS_MIGRATED_REVID=101457297
Diffstat (limited to 'src/main/tools/namespace-sandbox.c')
-rw-r--r--src/main/tools/namespace-sandbox.c738
1 files changed, 417 insertions, 321 deletions
diff --git a/src/main/tools/namespace-sandbox.c b/src/main/tools/namespace-sandbox.c
index 5cf6b433be..060356d13f 100644
--- a/src/main/tools/namespace-sandbox.c
+++ b/src/main/tools/namespace-sandbox.c
@@ -1,5 +1,3 @@
-#define _GNU_SOURCE
-
// Copyright 2014 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,404 +12,502 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#define _GNU_SOURCE
+
#include <errno.h>
#include <fcntl.h>
-#include <getopt.h>
-#include <limits.h>
-#include <linux/capability.h>
+#include <libgen.h>
+#include <pwd.h>
#include <sched.h>
#include <signal.h>
#include <stdarg.h>
+#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/syscall.h>
-#include <sys/time.h>
#include <sys/types.h>
-#include <sys/wait.h>
#include <unistd.h>
-static int global_debug = 0;
+#include "process-tools.h"
-#define PRINT_DEBUG(...) do { if (global_debug) {fprintf(stderr, "sandbox.c: " __VA_ARGS__);}} while(0)
+#define PRINT_DEBUG(...) \
+ do { \
+ if (global_debug) { \
+ fprintf(stderr, __FILE__ ":" S__LINE__ ": " __VA_ARGS__); \
+ } \
+ } while (0)
-#define CHECK_CALL(x) if ((x) == -1) { perror(#x); exit(1); }
-#define CHECK_NOT_NULL(x) if (x == NULL) { perror(#x); exit(1); }
-#define DIE() do { fprintf(stderr, "Error in %d\n", __LINE__); exit(-1); } while(0);
+static bool global_debug = false;
+static double global_kill_delay;
+static int global_child_pid;
+static volatile sig_atomic_t global_signal;
-const int kChildrenCleanupDelay = 1;
+// The uid and gid of the user and group 'nobody'.
+static const int kNobodyUid = 65534;
+static const int kNobodyGid = 65534;
-static volatile sig_atomic_t global_signal_received = 0;
-
-//
-// Options parsing result
-//
+// Options parsing result.
struct Options {
- char **args; // Command to run (-C / --)
- char *include_prefix; // Include prefix (-N)
- char *sandbox_root; // Sandbox root (-S)
- char *tools; // tools directory (-t)
- char **mounts; // List of directories to mount (-m)
- char **includes; // List of include directories (-n)
- int num_mounts; // size of mounts
- int num_includes; // size of includes
- int timeout; // Timeout (-T)
+ double timeout_secs; // How long to wait before killing the child (-T)
+ double kill_delay_secs; // How long to wait before sending SIGKILL in case of
+ // timeout (-t)
+ const char *stdout_path; // Where to redirect stdout (-l)
+ const char *stderr_path; // Where to redirect stderr (-L)
+ char *const *args; // Command to run (--)
+ const char *sandbox_root; // Sandbox root (-S)
+ const char *working_dir; // Working directory (-W)
+ char **mount_sources; // Map of directories to mount, from
+ char **mount_targets; // sources -> targets (-m)
+ int num_mounts; // How many mounts were specified
};
-// Print out a usage error. argc and argv are the argument counter
-// and vector, fmt is a format string for the error message to print.
-void Usage(int argc, char **argv, char *fmt, ...);
-// Parse the command line flags and return the result in an
-// Options structure passed as argument.
-void ParseCommandLine(int argc, char **argv, struct Options *opt);
-
-// Signal hanlding
-void PropagateSignals();
-void EnableAlarm();
-// Sandbox setup
-void SetupDirectories(struct Options* opt);
-void SetupSlashDev();
-void SetupUserNamespace(int uid, int gid);
-void ChangeRoot();
-// Write the file "filename" using a format string specified by "fmt".
-// Returns -1 on failure.
-int WriteFile(const char *filename, const char *fmt, ...);
-// Run the command specified by the argv array and kill it after
-// timeout seconds.
-void SpawnCommand(char **argv, int timeout);
-
-
-
-int main(int argc, char *argv[]) {
- struct Options opt = {
- .args = NULL,
- .include_prefix = NULL,
- .sandbox_root = NULL,
- .tools = NULL,
- .mounts = calloc(argc, sizeof(char*)),
- .includes = calloc(argc, sizeof(char*)),
- .num_mounts = 0,
- .num_includes = 0,
- .timeout = 0
- };
- ParseCommandLine(argc, argv, &opt);
- int uid = getuid();
- int gid = getgid();
-
- // parsed all arguments, now prepare sandbox
- PRINT_DEBUG("%s\n", opt.sandbox_root);
- // create new namespaces in which this process and its children will live
- CHECK_CALL(unshare(CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER));
- CHECK_CALL(mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL));
- // Create the sandbox directory layout
- SetupDirectories(&opt);
- // Set the user namespace (user_namespaces(7))
- SetupUserNamespace(uid, gid);
- // make sandbox actually hermetic:
- ChangeRoot();
+// Print out a usage error. argc and argv are the argument counter and vector,
+// fmt is a format,
+// string for the error message to print.
+static void Usage(int argc, char *const *argv, const char *fmt, ...) {
+ int i;
+ va_list ap;
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
- // Finally call the command
- free(opt.mounts);
- free(opt.includes);
- SpawnCommand(opt.args, opt.timeout);
- return 0;
+ fprintf(stderr,
+ "\nUsage: %s [-S sandbox-root] [-W working-dir] [-M source -m "
+ "target] -- command arg1\n",
+ argv[0]);
+ fprintf(stderr, " provided:");
+ for (i = 0; i < argc; i++) {
+ fprintf(stderr, " %s", argv[i]);
+ }
+ fprintf(stderr,
+ "\nMandatory arguments:\n"
+ " -S directory which will become the root of the sandbox\n"
+ " -- command to run inside sandbox, followed by arguments\n"
+ "\n"
+ "Optional arguments:\n"
+ " -W working directory\n"
+ " -t time to give the child to shutdown cleanly before sending it a "
+ "SIGKILL\n"
+ " -T timeout after which sandbox will be terminated\n"
+ " -t in case timeout occurs, how long to wait before killing the "
+ "child with SIGKILL\n"
+ " -M/-m system directory to mount inside the sandbox\n"
+ " Multiple directories can be specified and each of them will\n"
+ " be mounted readonly. The -M option specifies which directory\n"
+ " to mount, the -m option specifies where to mount it in the\n"
+ " sandbox.\n"
+ " -D if set, debug info will be printed\n"
+ " -l redirect stdout to a file\n"
+ " -L redirect stderr to a file\n");
+ exit(EXIT_FAILURE);
}
-void SpawnCommand(char **argv, int timeout) {
- for (int i = 0; argv[i] != NULL; i++) {
- PRINT_DEBUG("arg: %s\n", argv[i]);
- }
+// Parse the command line flags and return the result in an Options structure
+// passed as argument.
+static void ParseCommandLine(int argc, char *const *argv, struct Options *opt) {
+ extern char *optarg;
+ extern int optind, optopt;
+ int c;
- // spawn child and wait until it finishes
- pid_t cpid = fork();
- if (cpid == 0) {
- CHECK_CALL(setpgid(0, 0));
- // if the execvp below fails with "No such file or directory" it means that:
- // a) the binary is not in the sandbox (which means it wasn't included in
- // the inputs)
- // b) the binary uses shared library which is not inside sandbox - you can
- // check for that by running "ldd ./a.out" (by default directories
- // starting with /lib* and /usr/lib* should be there)
- // c) the binary uses elf interpreter which is not inside sandbox - you can
- // check for that by running "readelf -a a.out | grep interpreter" (the
- // sandbox code assumes that it is either in /lib*/ or /usr/lib*/)
- CHECK_CALL(execvp(argv[0], argv));
- PRINT_DEBUG("Exec failed near %s:%d\n", __FILE__, __LINE__);
- exit(1);
- } else {
- // PARENT
- // make sure that all signals propagate to children (mostly useful to kill
- // entire sandbox)
- PropagateSignals();
- // after given timeout, kill children
- EnableAlarm(timeout);
- int status = 0;
- while (1) {
- PRINT_DEBUG("Waiting for the child...\n");
- pid_t pid = wait(&status);
- if (global_signal_received) {
- PRINT_DEBUG("Received signal: %s\n", strsignal(global_signal_received));
- CHECK_CALL(killpg(cpid, global_signal_received));
- // give children some time for cleanup before they terminate
- sleep(kChildrenCleanupDelay);
- CHECK_CALL(killpg(cpid, SIGKILL));
- exit(128 | global_signal_received);
- }
- if (errno == EINTR) {
- continue;
- }
- if (pid < 0) {
- perror("Wait failed:");
- exit(1);
- }
- if (WIFEXITED(status)) {
- PRINT_DEBUG("Child exited with status: %d\n", WEXITSTATUS(status));
- exit(WEXITSTATUS(status));
- }
- if (WIFSIGNALED(status)) {
- PRINT_DEBUG("Child terminated by a signal: %d\n", WTERMSIG(status));
- exit(WEXITSTATUS(status));
- }
- if (WIFSTOPPED(status)) {
- PRINT_DEBUG("Child stopped by a signal: %d\n", WSTOPSIG(status));
- }
+ while ((c = getopt(argc, argv, ":DS:W:t:T:M:m:l:L:")) != -1) {
+ switch (c) {
+ case 'S':
+ if (opt->sandbox_root == NULL) {
+ opt->sandbox_root = optarg;
+ } else {
+ Usage(argc, argv,
+ "Multiple sandbox roots (-S) specified, expected one.");
+ }
+ break;
+ case 'W':
+ if (opt->working_dir == NULL) {
+ opt->working_dir = optarg;
+ } else {
+ Usage(argc, argv,
+ "Multiple working directories (-W) specified, expected at most "
+ "one.");
+ }
+ break;
+ case 't':
+ if (sscanf(optarg, "%lf", &opt->kill_delay_secs) != 1 ||
+ opt->kill_delay_secs < 0) {
+ Usage(argc, argv, "Invalid kill delay (-t) value: %lf",
+ opt->kill_delay_secs);
+ }
+ break;
+ case 'T':
+ if (sscanf(optarg, "%lf", &opt->timeout_secs) != 1 ||
+ opt->timeout_secs < 0) {
+ Usage(argc, argv, "Invalid timeout (-T) value: %lf",
+ opt->timeout_secs);
+ }
+ break;
+ case 'M':
+ if (opt->mount_sources[opt->num_mounts] != NULL) {
+ Usage(argc, argv, "The -M option must be followed by an -m option.");
+ }
+ opt->mount_sources[opt->num_mounts] = optarg;
+ break;
+ case 'm':
+ if (opt->mount_sources[opt->num_mounts] == NULL) {
+ Usage(argc, argv, "The -m option must be preceded by an -M option.");
+ }
+ if (opt->sandbox_root == NULL) {
+ Usage(argc, argv,
+ "The sandbox root must be set via the -S option before "
+ "specifying an"
+ " -m option.");
+ }
+ if (strstr(optarg, opt->sandbox_root) != optarg) {
+ Usage(argc, argv,
+ "A path passed to the -m option must start with the sandbox "
+ "root.");
+ }
+ opt->mount_targets[opt->num_mounts++] = optarg;
+ break;
+ case 'D':
+ global_debug = true;
+ break;
+ case 'l':
+ if (opt->stdout_path == NULL) {
+ opt->stdout_path = optarg;
+ } else {
+ Usage(argc, argv,
+ "Cannot redirect stdout to more than one destination.");
+ }
+ break;
+ case 'L':
+ if (opt->stderr_path == NULL) {
+ opt->stderr_path = optarg;
+ } else {
+ Usage(argc, argv,
+ "Cannot redirect stderr to more than one destination.");
+ }
+ break;
+ case '?':
+ Usage(argc, argv, "Unrecognized argument: -%c (%d)", optopt, optind);
+ break;
+ case ':':
+ Usage(argc, argv, "Flag -%c requires an argument", optopt);
+ break;
}
}
-}
-int WriteFile(const char *filename, const char *fmt, ...) {
- int r;
- va_list ap;
- FILE *stream = fopen(filename, "w");
- if (stream == NULL) {
- return -1;
+ if (opt->sandbox_root == NULL) {
+ Usage(argc, argv, "Sandbox root (-S) must be specified");
}
- va_start(ap, fmt);
- r = vfprintf(stream, fmt, ap);
- va_end(ap);
- if (r >= 0) {
- r = fclose(stream);
+
+ if (opt->mount_sources[opt->num_mounts] != NULL &&
+ opt->mount_sources[opt->num_mounts] == NULL) {
+ Usage(argc, argv, "An -m option is missing.");
}
- return r;
-}
-//
-// Signal handling
-//
-void SignalHandler(int signum, siginfo_t *info, void *uctxt) {
- global_signal_received = signum;
+ opt->args = argv + optind;
+ if (argc <= optind) {
+ Usage(argc, argv, "No command specified.");
+ }
}
-void PropagateSignals() {
- // propagate some signals received by the parent to processes in sandbox, so
- // that it's easier to terminate entire sandbox
- struct sigaction action = {};
- action.sa_flags = SA_SIGINFO;
- action.sa_sigaction = SignalHandler;
-
- // handle all signals that could terminate the process
- int signals[] = {SIGHUP, SIGINT, SIGKILL, SIGPIPE, SIGALRM, SIGTERM, SIGPOLL,
- SIGPROF, SIGVTALRM,
- // signals below produce core dump by default, however at the moment we'll
- // just terminate
- SIGQUIT, SIGILL, SIGABRT, SIGFPE, SIGSEGV, SIGBUS, SIGSYS, SIGTRAP, SIGXCPU,
- SIGXFSZ, -1};
- for (int *p = signals; *p != -1; p++) {
- sigaction(*p, &action, NULL);
+static void CreateNamespaces() {
+ // This weird workaround is necessary due to unshare sometimes failing with EINVAL due to a race
+ // condition in the Linux kernel (see https://lkml.org/lkml/2015/7/28/833).
+ // An alternative would be to use clone/waitpid instead.
+ int delay = 1;
+ int tries = 0;
+ const int max_tries = 5000000;
+ while (tries++ < max_tries) {
+ if (unshare(CLONE_NEWUSER | CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC) ==
+ 0) {
+ PRINT_DEBUG("unshare succeeded after %d tries\n", tries);
+ return;
+ } else {
+ if (errno != EINVAL) {
+ perror("unshare");
+ exit(EXIT_FAILURE);
+ }
+ }
+ usleep(delay);
+ delay = (delay * 3) / 2;
}
+ fprintf(stderr,
+ "unshare failed with EINVAL even after %d tries, giving up.\n",
+ tries);
+ exit(EXIT_FAILURE);
}
-void EnableAlarm(int timeout) {
- if (timeout <= 0) return;
-
- struct itimerval timer = {};
- timer.it_value.tv_sec = (long) timeout;
- CHECK_CALL(setitimer(ITIMER_REAL, &timer, NULL));
+static void CreateFile(const char *path) {
+ int handle;
+ CHECK_CALL(handle = open(path, O_CREAT | O_WRONLY | O_EXCL, 0666));
+ CHECK_CALL(close(handle));
}
-//
-// Sandbox setup
-//
-void SetupSlashDev() {
+static void SetupDevices() {
CHECK_CALL(mkdir("dev", 0755));
- const char *devs[] = {
- "/dev/null",
- "/dev/random",
- "/dev/urandom",
- "/dev/zero",
- NULL
- };
+ const char *devs[] = {"/dev/null", "/dev/random", "/dev/urandom", "/dev/zero",
+ NULL};
for (int i = 0; devs[i] != NULL; i++) {
- // open+close to create the file, which will become mount point for actual
- // device
- int handle = open(devs[i] + 1, O_CREAT | O_RDONLY, 0644);
- CHECK_CALL(handle);
- CHECK_CALL(close(handle));
+ CreateFile(devs[i] + 1);
CHECK_CALL(mount(devs[i], devs[i] + 1, NULL, MS_BIND, NULL));
}
+
+ CHECK_CALL(symlink("/proc/self/fd", "dev/fd"));
}
-void SetupDirectories(struct Options *opt) {
- // Mount the sandbox and go there.
- CHECK_CALL(mount(opt->sandbox_root, opt->sandbox_root, NULL, MS_BIND | MS_NOSUID, NULL));
- CHECK_CALL(chdir(opt->sandbox_root));
- SetupSlashDev();
- // Mount blaze specific directories - tools/ and build-runfiles/.
- if (opt->tools != NULL) {
- PRINT_DEBUG("tools: %s\n", opt->tools);
- CHECK_CALL(mkdir("tools", 0755));
- CHECK_CALL(mount(opt->tools, "tools", NULL, MS_BIND | MS_RDONLY, NULL));
+// Recursively creates the file or directory specified in "path" and its parent
+// directories.
+static int CreateTarget(const char *path, bool is_directory) {
+ if (path == NULL) {
+ errno = EINVAL;
+ return -1;
}
- // Mount directories passed in argv; those are mostly dirs for shared libs.
- for (int i = 0; i < opt->num_mounts; i++) {
- CHECK_CALL(mount(opt->mounts[i], opt->mounts[i] + 1, NULL, MS_BIND | MS_RDONLY, NULL));
+ struct stat sb;
+ // If the path already exists...
+ if (stat(path, &sb) == 0) {
+ if (is_directory && S_ISDIR(sb.st_mode)) {
+ // and it's a directory and supposed to be a directory, we're done here.
+ return 0;
+ } else if (!is_directory && S_ISREG(sb.st_mode)) {
+ // and it's a regular file and supposed to be one, we're done here.
+ return 0;
+ } else {
+ // otherwise something is really wrong.
+ errno = is_directory ? ENOTDIR : EEXIST;
+ return -1;
+ }
+ } else {
+ // If stat failed because of any error other than "the path does not exist",
+ // this is an error.
+ if (errno != ENOENT) {
+ return -1;
+ }
}
- // C++ compilation
- // C++ headers go in a separate directory.
- if (opt->include_prefix != NULL) {
- CHECK_CALL(chdir(opt->include_prefix));
- for (int i = 0; i < opt->num_includes; i++) {
- // TODO(bazel-team): sometimes list of -iquote given by bazel contains
- // invalid (non-existing) entries, ideally we would like not to have them
- PRINT_DEBUG("include: %s\n", opt->includes[i]);
- if (mount(opt->includes[i], opt->includes[i] + 1 , NULL, MS_BIND, NULL) > -1) {
- continue;
- }
- if (errno == ENOENT) {
- continue;
- }
- CHECK_CALL(-1);
- }
- CHECK_CALL(chdir(".."));
+ // Create the parent directory.
+ CHECK_CALL(CreateTarget(dirname(strdupa(path)), true));
+
+ if (is_directory) {
+ CHECK_CALL(mkdir(path, 0755));
+ } else {
+ CreateFile(path);
}
+ return 0;
+}
+
+static void SetupDirectories(struct Options *opt) {
+ // Mount the sandbox and go there.
+ CHECK_CALL(mount(opt->sandbox_root, opt->sandbox_root, NULL,
+ MS_BIND | MS_NOSUID, NULL));
+ CHECK_CALL(chdir(opt->sandbox_root));
+
+ // Setup /dev.
+ SetupDevices();
+
CHECK_CALL(mkdir("proc", 0755));
CHECK_CALL(mount("/proc", "proc", NULL, MS_REC | MS_BIND, NULL));
+
+ CHECK_CALL(mkdir("tmp", 0755));
+ CHECK_CALL(mount("tmpfs", "tmp", "tmpfs", MS_NOSUID | MS_NODEV,
+ "size=25%,mode=1777"));
+
+ // Make sure the home directory exists and is writable.
+ const char *homedir;
+ if ((homedir = getenv("HOME")) == NULL) {
+ homedir = getpwuid(getuid())->pw_dir;
+ }
+
+ if (homedir[0] != '/') {
+ DIE("Home directory of user nobody must be an absolute path, but is %s", homedir);
+ }
+
+ char *homedir_absolute = malloc(strlen(opt->sandbox_root) + strlen(homedir) + 1);
+ strcpy(homedir_absolute, opt->sandbox_root);
+ strcat(homedir_absolute, homedir);
+
+ CreateTarget(homedir_absolute, true);
+ CHECK_CALL(mount("tmpfs", homedir_absolute, "tmpfs", MS_NOSUID | MS_NODEV,
+ "size=25%,mode=1777"));
+
+ // Mount directories passed in argv
+ for (int i = 0; i < opt->num_mounts; i++) {
+ struct stat sb;
+ stat(opt->mount_sources[i], &sb);
+
+ CHECK_CALL(CreateTarget(opt->mount_targets[i], S_ISDIR(sb.st_mode)));
+
+ PRINT_DEBUG("mount -o rbind,ro %s %s\n", opt->mount_sources[i],
+ opt->mount_targets[i]);
+ CHECK_CALL(mount(opt->mount_sources[i], opt->mount_targets[i], NULL,
+ MS_REC | MS_BIND | MS_RDONLY, NULL));
+ }
}
-void SetupUserNamespace(int uid, int gid) {
+// Write the file "filename" using a format string specified by "fmt". Returns
+// -1 on failure.
+static int WriteFile(const char *filename, const char *fmt, ...) {
+ int r;
+ va_list ap;
+ FILE *stream = fopen(filename, "w");
+ if (stream == NULL) {
+ return -1;
+ }
+ va_start(ap, fmt);
+ r = vfprintf(stream, fmt, ap);
+ va_end(ap);
+ if (r >= 0) {
+ r = fclose(stream);
+ }
+ return r;
+}
+
+static void SetupUserNamespace(int uid, int gid) {
// Disable needs for CAP_SETGID
int r = WriteFile("/proc/self/setgroups", "deny");
if (r < 0 && errno != ENOENT) {
// Writing to /proc/self/setgroups might fail on earlier
// version of linux because setgroups does not exist, ignore.
perror("WriteFile(\"/proc/self/setgroups\", \"deny\")");
- exit(-1);
+ exit(EXIT_FAILURE);
}
- // set group and user mapping from outer namespace to inner:
- // no changes in the parent, be root in the child
- CHECK_CALL(WriteFile("/proc/self/uid_map", "0 %d 1\n", uid));
- CHECK_CALL(WriteFile("/proc/self/gid_map", "0 %d 1\n", gid));
- CHECK_CALL(setresuid(0, 0, 0));
- CHECK_CALL(setresgid(0, 0, 0));
+ // Set group and user mapping from outer namespace to inner:
+ // No changes in the parent, be nobody in the child.
+ //
+ // We can't be root in the child, because some code may assume that running as root grants it
+ // certain capabilities that it doesn't in fact have. It's safer to let the child think that it
+ // is just a normal user.
+ CHECK_CALL(WriteFile("/proc/self/uid_map", "%d %d 1\n", kNobodyUid, uid));
+ CHECK_CALL(WriteFile("/proc/self/gid_map", "%d %d 1\n", kNobodyGid, gid));
+
+ CHECK_CALL(setresuid(kNobodyUid, kNobodyUid, kNobodyUid));
+ CHECK_CALL(setresgid(kNobodyGid, kNobodyGid, kNobodyGid));
}
-void ChangeRoot() {
+static void ChangeRoot(struct Options *opt) {
// move the real root to old_root, then detach it
char old_root[16] = "old-root-XXXXXX";
- CHECK_NOT_NULL(mkdtemp(old_root));
+ if (mkdtemp(old_root) == NULL) {
+ perror("mkdtemp");
+ DIE("mkdtemp returned NULL\n");
+ }
+
// pivot_root has no wrapper in libc, so we need syscall()
CHECK_CALL(syscall(SYS_pivot_root, ".", old_root));
CHECK_CALL(chroot("."));
CHECK_CALL(umount2(old_root, MNT_DETACH));
CHECK_CALL(rmdir(old_root));
+
+ if (opt->working_dir != NULL) {
+ CHECK_CALL(chdir(opt->working_dir));
+ }
}
-//
-// Command line parsing
-//
-void Usage(int argc, char **argv, char *fmt, ...) {
- int i;
- va_list ap;
- va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
- va_end(ap);
+// Called when timeout or signal occurs.
+void OnSignal(int sig) {
+ global_signal = sig;
- fprintf(stderr,
- "\nUsage: %s [-S sandbox-root] [-m mount] [-C|--] command arg1\n",
- argv[0]);
- fprintf(stderr, " provided:");
- for (i = 0; i < argc; i++) {
- fprintf(stderr, " %s", argv[i]);
+ // Nothing to do if we received a signal before spawning the child.
+ if (global_child_pid == -1) {
+ return;
+ }
+
+ if (sig == SIGALRM) {
+ // SIGALRM represents a timeout, so we should give the process a bit of
+ // time to die gracefully if it needs it.
+ KillEverything(global_child_pid, true, global_kill_delay);
+ } else {
+ // Signals should kill the process quickly, as it's typically blocking
+ // the return of the prompt after a user hits "Ctrl-C".
+ KillEverything(global_child_pid, false, global_kill_delay);
}
- fprintf(stderr,
- "\nMandatory arguments:\n"
- " [-C|--] command to run inside sandbox, followed by arguments\n"
- " -S directory which will become the root of the sandbox\n"
- "\n"
- "Optional arguments:\n"
- " -t absolute path to bazel tools directory\n"
- " -T timeout after which sandbox will be terminated\n"
- " -m system directory to mount inside the sandbox\n"
- " Multiple directories can be specified and each of them will\n"
- " be mount as readonly\n"
- " -D if set, debug info will be printed\n");
- exit(1);
}
-void ParseCommandLine(int argc, char **argv, struct Options *opt) {
- extern char *optarg;
- extern int optind, optopt;
- int c;
+// Run the command specified by the argv array and kill it after timeout
+// seconds.
+static void SpawnCommand(char *const *argv, double timeout_secs) {
+ for (int i = 0; argv[i] != NULL; i++) {
+ PRINT_DEBUG("arg: %s\n", argv[i]);
+ }
- opt->include_prefix = NULL;
- opt->sandbox_root = NULL;
- opt->tools = NULL;
- opt->mounts = malloc(argc * sizeof(char*));
- opt->includes = malloc(argc * sizeof(char*));
- opt->num_mounts = 0;
- opt->num_includes = 0;
- opt->timeout = 0;
-
- while ((c = getopt(argc, argv, "+:S:t:T:m:N:n:DC")) != -1) {
- switch(c) {
- case 'S':
- if (opt->sandbox_root == NULL) {
- opt->sandbox_root = optarg;
- } else {
- Usage(argc, argv,
- "Multiple sandbox roots (-S) specified (expected one).");
- }
- break;
- case 'm':
- opt->mounts[opt->num_mounts++] = optarg;
- break;
- case 'D':
- global_debug = 1;
- break;
- case 'T':
- sscanf(optarg, "%d", &opt->timeout);
- if (opt->timeout < 0) {
- Usage(argc, argv, "Invalid timeout (-T) value: %d", opt->timeout);
- }
- break;
- case 'N':
- opt->include_prefix = optarg;
- break;
- case 'n':
- opt->includes[opt->num_includes++] = optarg;
- break;
- case 'C':
- break; // deprecated, ignore.
- case 't':
- opt->tools = optarg;
- break;
- case '?':
- Usage(argc, argv, "Unrecognized argument: -%c (%d)", optopt, optind);
- break;
- case ':':
- Usage(argc, argv, "Flag -%c requires an argument", optopt);
- break;
+ CHECK_CALL(global_child_pid = fork());
+ if (global_child_pid == 0) {
+ // In child.
+ CHECK_CALL(setsid());
+ ClearSignalMask();
+
+ // Force umask to include read and execute for everyone, to make
+ // output permissions predictable.
+ umask(022);
+
+ // Does not return unless something went wrong.
+ CHECK_CALL(execvp(argv[0], argv));
+ } else {
+ // In parent.
+
+ // Set up a signal handler which kills all subprocesses when the given
+ // signal is triggered.
+ HandleSignal(SIGALRM, OnSignal);
+ HandleSignal(SIGTERM, OnSignal);
+ HandleSignal(SIGINT, OnSignal);
+ SetTimeout(timeout_secs);
+
+ int status = WaitChild(global_child_pid, argv[0]);
+
+ // The child is done for, but may have grandchildren that we still have to
+ // kill.
+ kill(-global_child_pid, SIGKILL);
+
+ if (global_signal > 0) {
+ // Don't trust the exit code if we got a timeout or signal.
+ UnHandle(global_signal);
+ raise(global_signal);
+ } else if (WIFEXITED(status)) {
+ exit(WEXITSTATUS(status));
+ } else {
+ int sig = WTERMSIG(status);
+ UnHandle(sig);
+ raise(sig);
}
}
+}
- opt->args = argv + optind;
- if (argc <= optind) {
- Usage(argc, argv, "No command specified");
- }
+int main(int argc, char *const argv[]) {
+ struct Options opt;
+ memset(&opt, 0, sizeof(opt));
+ opt.mount_sources = calloc(argc, sizeof(char *));
+ opt.mount_targets = calloc(argc, sizeof(char *));
+
+ ParseCommandLine(argc, argv, &opt);
+ global_kill_delay = opt.kill_delay_secs;
+
+ int uid = SwitchToEuid();
+ int gid = SwitchToEgid();
+
+ RedirectStdout(opt.stdout_path);
+ RedirectStderr(opt.stderr_path);
+
+ PRINT_DEBUG("sandbox root is %s\n", opt.sandbox_root);
+ PRINT_DEBUG("working dir is %s\n",
+ (opt.working_dir != NULL) ? opt.working_dir : "/ (default)");
+
+ CreateNamespaces();
+
+ // Make our mount namespace private, so that further mounts do not affect the
+ // outside environment.
+ CHECK_CALL(mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL));
+
+ SetupDirectories(&opt);
+ SetupUserNamespace(uid, gid);
+ ChangeRoot(&opt);
+
+ SpawnCommand(opt.args, opt.timeout_secs);
+
+ free(opt.mount_sources);
+ free(opt.mount_targets);
+
+ return 0;
}