path: root/src/main/tools/namespace-sandbox.c
diff options
Diffstat (limited to 'src/main/tools/namespace-sandbox.c')
1 files changed, 323 insertions, 0 deletions
diff --git a/src/main/tools/namespace-sandbox.c b/src/main/tools/namespace-sandbox.c
new file mode 100644
index 0000000000..08e5fc2c4f
--- /dev/null
+++ b/src/main/tools/namespace-sandbox.c
@@ -0,0 +1,323 @@
+#define _GNU_SOURCE
+// Copyright 2014 Google Inc. All rights reserved.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <limits.h>
+#include <linux/capability.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+static int global_debug = 0;
+static int global_cpid; // Returned by fork()
+#define PRINT_DEBUG(...) do { if (global_debug) {fprintf(stderr, "sandbox.c: " __VA_ARGS__);}} while(0)
+#define CHECK_CALL(x) if ((x) == -1) { perror(#x); exit(1); }
+#define CHECK_NOT_NULL(x) if (x == NULL) { perror(#x); exit(1); }
+#define DIE() do { fprintf(stderr, "Error in %d\n", __LINE__); exit(-1); } while(0);
+int kChildrenCleanupDelay = 1;
+void Usage() {
+ fprintf(stderr,
+ "Usage: ./sandbox [-R sandbox-root] [-m mount] -C command arg1\n"
+ "Mandatory arguments:\n"
+ " -C command to run inside sandbox, followed by arguments\n"
+ " -S directory which will become the root of the sandbox\n"
+ "\n"
+ "Optional arguments:\n"
+ " -t absolute path to bazel tools directory\n"
+ " -T timeout after which sandbox will be terminated\n"
+ " -m system directory to mount inside the sandbox\n"
+ " Multiple directories can be specified and each of them will\n"
+ " be mount as readonly\n"
+ " -D if set, debug info will be printed\n");
+ exit(1);
+void PropagateSignals();
+void EnableAlarm();
+void SetupSlashDev();
+static volatile sig_atomic_t global_signal_received = 0;
+int main(int argc, char *argv[]) {
+ char *include_prefix = NULL;
+ char *sandbox_root = NULL;
+ char *tools = NULL;
+ char **mounts = malloc(argc * sizeof(char*));
+ char **includes = malloc(argc * sizeof(char*));
+ int num_mounts = 0;
+ int num_includes = 0;
+ int iArg = 0;
+ int uid = getuid();
+ int gid = getgid();
+ int timeout = 0;
+ for (iArg = 1; iArg < argc - 1; iArg++) {
+ if (strlen(argv[iArg]) != 2) {
+ Usage();
+ }
+ if (argv[iArg][0] != '-') {
+ Usage();
+ }
+ switch (argv[iArg][1]) {
+ case 'S':
+ if (sandbox_root == NULL) {
+ sandbox_root = argv[++iArg];
+ } else {
+ fprintf(stderr,
+ "Multiple sandbox roots (-S) specified (expected one).\n");
+ Usage();
+ }
+ break;
+ case 'm':
+ mounts[num_mounts++] = argv[++iArg];
+ break;
+ case 'D':
+ global_debug = 1;
+ break;
+ case 'T':
+ sscanf(argv[iArg], "%d", &timeout);
+ break;
+ case 'N':
+ include_prefix = argv[++iArg];
+ break;
+ case 'n':
+ includes[num_includes++] = argv[++iArg];
+ break;
+ case 'C':
+ iArg++;
+ goto parsing_finished;
+ case 't':
+ tools = argv[++iArg];
+ break;
+ default:
+ fprintf(stderr, "Unrecognized argument: %s\n", argv[iArg]);
+ Usage();
+ }
+ }
+ if (iArg == argc) {
+ fprintf(stderr, "No command specified.\n");
+ Usage();
+ }
+ if (timeout < 0) {
+ fprintf(stderr, "Invalid timeout (-T) value: %d", timeout);
+ Usage();
+ }
+ // parsed all arguments, now prepare sandbox
+ PRINT_DEBUG("%s\n", sandbox_root);
+ // create new namespaces in which this process and its children will live
+ CHECK_CALL(mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL));
+ // mount sandbox and go there
+ CHECK_CALL(mount(sandbox_root, sandbox_root, NULL, MS_BIND | MS_NOSUID, NULL));
+ CHECK_CALL(chdir(sandbox_root));
+ SetupSlashDev();
+ // mount blaze specific directories - tools/ and build-runfiles/
+ if (tools != NULL) {
+ PRINT_DEBUG("tools: %s\n", tools);
+ CHECK_CALL(mkdir("tools", 0755));
+ CHECK_CALL(mount(tools, "tools", NULL, MS_BIND | MS_RDONLY, NULL));
+ }
+ // mounts passed in argv; those are mostly dirs for shared libs
+ for (int i = 0; i < num_mounts; i++) {
+ CHECK_CALL(mount(mounts[i], mounts[i] + 1, NULL, MS_BIND | MS_RDONLY, NULL));
+ }
+ // c++ compilation
+ // headers go in separate directory
+ if (include_prefix != NULL) {
+ CHECK_CALL(chdir(include_prefix));
+ for (int i = 0; i < num_includes; i++) {
+ // TODO(bazel-team) sometimes list of -iquote given by bazel contains
+ // invalid (non-existing) entries, ideally we would like not to have them
+ PRINT_DEBUG("include: %s\n", includes[i]);
+ if (mount(includes[i], includes[i] + 1 , NULL, MS_BIND, NULL) > -1) {
+ continue;
+ }
+ if (errno == ENOENT) {
+ continue;
+ }
+ }
+ CHECK_CALL(chdir(".."));
+ }
+ // set group and user mapping from outer namespace to inner:
+ // no changes in the parent, be root in the child
+ int uid_fd, gid_fd;
+ char uid_mapping[64], gid_mapping[64];
+ sprintf(uid_mapping, "0 %d 1\n", uid);
+ sprintf(gid_mapping, "0 %d 1\n", gid);
+ uid_fd = open("/proc/self/uid_map", O_WRONLY);
+ CHECK_CALL(uid_fd);
+ CHECK_CALL(write(uid_fd, uid_mapping, strlen(uid_mapping)));
+ CHECK_CALL(close(uid_fd));
+ gid_fd = open("/proc/self/gid_map", O_WRONLY);
+ CHECK_CALL(gid_fd);
+ CHECK_CALL(write(gid_fd, gid_mapping, strlen(gid_mapping)));
+ CHECK_CALL(close(gid_fd));
+ CHECK_CALL(setresuid(0, 0, 0));
+ CHECK_CALL(setresgid(0, 0, 0));
+ CHECK_CALL(mkdir("proc", 0755));
+ CHECK_CALL(mount("/proc", "proc", NULL, MS_REC | MS_BIND, NULL));
+ // make sandbox actually hermetic:
+ // move the real root to old_root, then detach it
+ char old_root[16] = "old-root-XXXXXX";
+ CHECK_NOT_NULL(mkdtemp(old_root));
+ // pivot_root has no wrapper in libc, so we need syscall()
+ CHECK_CALL(syscall(SYS_pivot_root, ".", old_root));
+ CHECK_CALL(chroot("."));
+ CHECK_CALL(umount2(old_root, MNT_DETACH));
+ CHECK_CALL(rmdir(old_root));
+ free(mounts);
+ free(includes);
+ for (int i = iArg; i < argc; i += 1) {
+ PRINT_DEBUG("arg: %s\n", argv[i]);
+ }
+ // spawn child and wait until it finishes
+ global_cpid = fork();
+ if (global_cpid == 0) {
+ CHECK_CALL(setpgid(0, 0));
+ // if the execvp below fails with "No such file or directory" it means that:
+ // a) the binary is not in the sandbox (which means it wasn't included in
+ // the inputs)
+ // b) the binary uses shared library which is not inside sandbox - you can
+ // check for that by running "ldd ./a.out" (by default directories
+ // starting with /lib* and /usr/lib* should be there)
+ // c) the binary uses elf interpreter which is not inside sandbox - you can
+ // check for that by running "readelf -a a.out | grep interpreter" (the
+ // sandbox code assumes that it is either in /lib*/ or /usr/lib*/)
+ CHECK_CALL(execvp(argv[iArg], argv + iArg));
+ PRINT_DEBUG("Exec failed near %s:%d\n", __FILE__, __LINE__);
+ exit(1);
+ } else {
+ // make sure that all signals propagate to children (mostly useful to kill
+ // entire sandbox)
+ PropagateSignals();
+ // after given timeout, kill children
+ EnableAlarm(timeout);
+ int status = 0;
+ while (1) {
+ PRINT_DEBUG("Waiting for the child...\n");
+ pid_t pid = wait(&status);
+ if (global_signal_received) {
+ PRINT_DEBUG("Received signal: %s\n", strsignal(global_signal_received));
+ CHECK_CALL(killpg(global_cpid, global_signal_received));
+ // give children some time for cleanup before they terminate
+ sleep(kChildrenCleanupDelay);
+ CHECK_CALL(killpg(global_cpid, SIGKILL));
+ exit(128 | global_signal_received);
+ }
+ if (errno == EINTR) {
+ continue;
+ }
+ if (pid < 0) {
+ perror("Wait failed:");
+ exit(1);
+ }
+ if (WIFEXITED(status)) {
+ PRINT_DEBUG("Child exited with status: %d\n", WEXITSTATUS(status));
+ exit(WEXITSTATUS(status));
+ }
+ if (WIFSIGNALED(status)) {
+ PRINT_DEBUG("Child terminated by a signal: %d\n", WTERMSIG(status));
+ exit(WEXITSTATUS(status));
+ }
+ if (WIFSTOPPED(status)) {
+ PRINT_DEBUG("Child stopped by a signal: %d\n", WSTOPSIG(status));
+ }
+ }
+ }
+ return 0;
+void SignalHandler(int signum, siginfo_t *info, void *uctxt) {
+ global_signal_received = signum;
+void PropagateSignals() {
+ // propagate some signals received by the parent to processes in sandbox, so
+ // that it's easier to terminate entire sandbox
+ struct sigaction action = {};
+ action.sa_flags = SA_SIGINFO;
+ action.sa_sigaction = SignalHandler;
+ // handle all signals that could terminate the process
+ // signals below produce core dump by default, however at the moment we'll
+ // just terminate
+ SIGXFSZ, -1};
+ for (int *p = signals; *p != -1; p++) {
+ sigaction(*p, &action, NULL);
+ }
+void SetupSlashDev() {
+ CHECK_CALL(mkdir("dev", 0755));
+ const char *devs[] = {
+ "/dev/null",
+ "/dev/random",
+ "/dev/urandom",
+ "/dev/zero",
+ };
+ for (int i = 0; devs[i] != NULL; i++) {
+ // open+close to create the file, which will become mount point for actual
+ // device
+ int handle = open(devs[i] + 1, O_CREAT | O_RDONLY, 0644);
+ CHECK_CALL(handle);
+ CHECK_CALL(close(handle));
+ CHECK_CALL(mount(devs[i], devs[i] + 1, NULL, MS_BIND, NULL));
+ }
+void EnableAlarm(int timeout) {
+ if (timeout <= 0) return;
+ struct itimerval timer = {};
+ timer.it_value.tv_sec = (long) timeout;
+ CHECK_CALL(setitimer(ITIMER_REAL, &timer, NULL));