aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar ruperts <ruperts@google.com>2017-11-28 21:17:48 -0800
committerGravatar Copybara-Service <copybara-piper@google.com>2017-11-28 21:19:26 -0800
commitb394da457e9e1f717581b00926e811f6c9e018f3 (patch)
treede17a49a8222a84ca26a477a0a1f42300004471f
parent85d69f2397d7b54fa8b8c69b7bc9ec6d36cb1d49 (diff)
Make process-wrapper output execution statistics for executed commands.
For example, it now outputs resource usage statistics like the amount of user time and system time used. RELNOTES: None PiperOrigin-RevId: 177263221
-rw-r--r--src/main/protobuf/BUILD13
-rw-r--r--src/main/protobuf/execution_statistics.proto47
-rw-r--r--src/main/tools/BUILD5
-rw-r--r--src/main/tools/process-tools.cc68
-rw-r--r--src/main/tools/process-tools.h9
-rw-r--r--src/main/tools/process-wrapper-legacy.cc11
-rw-r--r--src/main/tools/process-wrapper-options.cc14
-rw-r--r--src/main/tools/process-wrapper-options.h2
-rw-r--r--src/test/shell/integration/BUILD16
-rwxr-xr-xsrc/test/shell/integration/process-wrapper_test.sh81
-rw-r--r--src/test/shell/integration/spend_cpu_time.cc117
11 files changed, 373 insertions, 10 deletions
diff --git a/src/main/protobuf/BUILD b/src/main/protobuf/BUILD
index 89afb792ee..d48de4388e 100644
--- a/src/main/protobuf/BUILD
+++ b/src/main/protobuf/BUILD
@@ -5,6 +5,13 @@ load("//tools/build_rules:utilities.bzl", "java_library_srcs")
load("//third_party/protobuf/3.4.0:protobuf.bzl", "py_proto_library")
load("//third_party/grpc:build_defs.bzl", "java_grpc_library")
+exports_files(
+ ["execution_statitics.proto"],
+ visibility = [
+ "//src/test/shell/integration:process_wrapper_test",
+ ],
+)
+
FILES = [
"action_cache",
"android_deploy_info",
@@ -15,6 +22,7 @@ FILES = [
"crosstool_config",
"deps",
"desugar_deps",
+ "execution_statistics",
"extra_actions_base",
"invocation_policy",
"java_compilation",
@@ -114,6 +122,11 @@ java_library_srcs(
deps = ["//third_party/pprof:profile_java_proto"],
)
+cc_proto_library(
+ name = "execution_statistics_cc_proto",
+ deps = [":execution_statistics_proto"],
+)
+
filegroup(
name = "srcs",
srcs = glob(["**"]),
diff --git a/src/main/protobuf/execution_statistics.proto b/src/main/protobuf/execution_statistics.proto
new file mode 100644
index 0000000000..86bfe35b03
--- /dev/null
+++ b/src/main/protobuf/execution_statistics.proto
@@ -0,0 +1,47 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package tools.protos;
+
+option java_package = "com.google.devtools.build.lib.shell";
+option java_outer_classname = "Protos";
+
+// Verbatim representation of the rusage structure returned by getrusage(2).
+// For further details on all these cryptic names, see that manual page.
+message ResourceUsage {
+ int64 utime_sec = 1; // user CPU time used, seconds part
+ int64 utime_usec = 2; // user CPU time used, microseconds part
+ int64 stime_sec = 3; // system CPU time used, seconds part
+ int64 stime_usec = 4; // system CPU time used, microseconds part
+ int64 maxrss = 5; // maximum resident set size
+ int64 ixrss = 6; // integral shared memory size
+ int64 idrss = 7; // integral unshared data size
+ int64 isrss = 8; // integral unshared stack size
+ int64 minflt = 9; // page reclaims (soft page faults)
+ int64 majflt = 10; // page faults (hard page faults)
+ int64 nswap = 11; // swaps
+ int64 inblock = 12; // block input operations
+ int64 oublock = 13; // block output operations
+ int64 msgsnd = 14; // IPC messages sent
+ int64 msgrcv = 15; // IPC messages received
+ int64 nsignals = 16; // signals received
+ int64 nvcsw = 17; // voluntary context switches
+ int64 nivcsw = 18; // involuntary context switches
+}
+
+message ExecutionStatistics {
+ ResourceUsage resource_usage = 1;
+}
diff --git a/src/main/tools/BUILD b/src/main/tools/BUILD
index dc788a4c54..9319b0a0d3 100644
--- a/src/main/tools/BUILD
+++ b/src/main/tools/BUILD
@@ -10,7 +10,10 @@ cc_library(
name = "process-tools",
srcs = ["process-tools.cc"],
hdrs = ["process-tools.h"],
- deps = [":logging"],
+ deps = [
+ ":logging",
+ "//src/main/protobuf:execution_statistics_cc_proto",
+ ],
)
cc_binary(
diff --git a/src/main/tools/process-tools.cc b/src/main/tools/process-tools.cc
index 7dd623fa76..45aa6b7346 100644
--- a/src/main/tools/process-tools.cc
+++ b/src/main/tools/process-tools.cc
@@ -21,12 +21,16 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
+#include <memory>
+
+#include "src/main/protobuf/execution_statistics.pb.h"
#include "src/main/tools/logging.h"
int SwitchToEuid() {
@@ -178,8 +182,70 @@ int WaitChild(pid_t pid) {
} while (err == -1 && errno == EINTR);
if (err == -1) {
- DIE("wait");
+ DIE("waitpid");
}
return status;
}
+
+int WaitChildWithRusage(pid_t pid, struct rusage *rusage) {
+ int err, status;
+
+ do {
+ err = wait4(pid, &status, 0, rusage);
+ } while (err == -1 && errno == EINTR);
+
+ if (err == -1) {
+ DIE("wait4");
+ }
+
+ return status;
+}
+
+static std::unique_ptr<tools::protos::ExecutionStatistics>
+CreateExecutionStatisticsProto(struct rusage *rusage) {
+ std::unique_ptr<tools::protos::ExecutionStatistics> execution_statistics(
+ new tools::protos::ExecutionStatistics);
+
+ tools::protos::ResourceUsage *resource_usage =
+ execution_statistics->mutable_resource_usage();
+
+ resource_usage->set_utime_sec(rusage->ru_utime.tv_sec);
+ resource_usage->set_utime_usec(rusage->ru_utime.tv_usec);
+ resource_usage->set_stime_sec(rusage->ru_stime.tv_sec);
+ resource_usage->set_stime_usec(rusage->ru_stime.tv_usec);
+ resource_usage->set_maxrss(rusage->ru_maxrss);
+ resource_usage->set_ixrss(rusage->ru_ixrss);
+ resource_usage->set_idrss(rusage->ru_idrss);
+ resource_usage->set_isrss(rusage->ru_isrss);
+ resource_usage->set_minflt(rusage->ru_minflt);
+ resource_usage->set_majflt(rusage->ru_majflt);
+ resource_usage->set_nswap(rusage->ru_nswap);
+ resource_usage->set_inblock(rusage->ru_inblock);
+ resource_usage->set_oublock(rusage->ru_oublock);
+ resource_usage->set_msgsnd(rusage->ru_msgsnd);
+ resource_usage->set_msgrcv(rusage->ru_msgrcv);
+ resource_usage->set_nsignals(rusage->ru_nsignals);
+ resource_usage->set_nvcsw(rusage->ru_nvcsw);
+ resource_usage->set_nivcsw(rusage->ru_nivcsw);
+
+ return execution_statistics;
+}
+
+// Write execution statistics (e.g. resource usage) to a file.
+void WriteStatsToFile(struct rusage *rusage, const std::string &stats_path) {
+ const int flags = O_WRONLY | O_CREAT | O_TRUNC | O_APPEND;
+ int fd_out = open(stats_path.c_str(), flags, 0666);
+ if (fd_out < 0) {
+ DIE("open(%s)", stats_path.c_str());
+ }
+
+ std::unique_ptr<tools::protos::ExecutionStatistics> execution_statistics =
+ CreateExecutionStatisticsProto(rusage);
+
+ if (!execution_statistics->SerializeToFileDescriptor(fd_out)) {
+ DIE("could not write resource usage to file: %s", stats_path.c_str());
+ }
+
+ close(fd_out);
+}
diff --git a/src/main/tools/process-tools.h b/src/main/tools/process-tools.h
index 8f122f6b35..6f6f4946f9 100644
--- a/src/main/tools/process-tools.h
+++ b/src/main/tools/process-tools.h
@@ -55,7 +55,14 @@ void ClearSignalMask();
void SetTimeout(double timeout_secs);
// Wait for "pid" to exit and return its exit code.
-// "name" is used for the error message only.
int WaitChild(pid_t pid);
+// Wait for "pid" to exit and return its exit code.
+// Resource usage is returned in "rusage" regardless of the exit status of the
+// child process.
+int WaitChildWithRusage(pid_t pid, struct rusage *rusage);
+
+// Write execution statistics to a file.
+void WriteStatsToFile(struct rusage *rusage, const std::string &stats_path);
+
#endif // PROCESS_TOOLS_H__
diff --git a/src/main/tools/process-wrapper-legacy.cc b/src/main/tools/process-wrapper-legacy.cc
index ac7b3ec5be..03fdec5448 100644
--- a/src/main/tools/process-wrapper-legacy.cc
+++ b/src/main/tools/process-wrapper-legacy.cc
@@ -17,7 +17,9 @@
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
+#include <sys/resource.h>
#include <sys/stat.h>
+#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
@@ -68,7 +70,14 @@ void LegacyProcessWrapper::WaitForChild() {
SetTimeout(opt.timeout_secs);
}
- int status = WaitChild(child_pid);
+ int status;
+ if (!opt.stats_path.empty()) {
+ struct rusage child_rusage;
+ status = WaitChildWithRusage(child_pid, &child_rusage);
+ WriteStatsToFile(&child_rusage, opt.stats_path);
+ } else {
+ status = WaitChild(child_pid);
+ }
// The child is done for, but may have grandchildren that we still have to
// kill.
diff --git a/src/main/tools/process-wrapper-options.cc b/src/main/tools/process-wrapper-options.cc
index 94b66d1536..82384113c9 100644
--- a/src/main/tools/process-wrapper-options.cc
+++ b/src/main/tools/process-wrapper-options.cc
@@ -45,6 +45,7 @@ static void Usage(char *program_name, const char *fmt, ...) {
"before killing the child with SIGKILL\n"
" -o/--stdout <file> redirect stdout to a file\n"
" -e/--stderr <file> redirect stderr to a file\n"
+ " -s/--stats <file> if set, write stats in protobuf format to a file\n"
" -d/--debug if set, debug info will be printed\n"
" -- command to run inside sandbox, followed by arguments\n");
exit(EXIT_FAILURE);
@@ -58,14 +59,15 @@ static void ParseCommandLine(const std::vector<char *> &args) {
{"kill_delay", required_argument, 0, 'k'},
{"stdout", required_argument, 0, 'o'},
{"stderr", required_argument, 0, 'e'},
+ {"stats", required_argument, 0, 's'},
{"debug", no_argument, 0, 'd'},
{0, 0, 0, 0}};
extern char *optarg;
extern int optind, optopt;
int c;
- while ((c = getopt_long(args.size(), args.data(), "+:t:k:o:e:d", long_options,
- nullptr)) != -1) {
+ while ((c = getopt_long(args.size(), args.data(), "+:t:k:o:e:s:d",
+ long_options, nullptr)) != -1) {
switch (c) {
case 't':
if (sscanf(optarg, "%lf", &opt.timeout_secs) != 1) {
@@ -93,6 +95,14 @@ static void ParseCommandLine(const std::vector<char *> &args) {
"Cannot redirect stderr (-e) to more than one destination.");
}
break;
+ case 's':
+ if (opt.stats_path.empty()) {
+ opt.stats_path.assign(optarg);
+ } else {
+ Usage(args.front(),
+ "Cannot write stats (-s) to more than one destination.");
+ }
+ break;
case 'd':
opt.debug = true;
break;
diff --git a/src/main/tools/process-wrapper-options.h b/src/main/tools/process-wrapper-options.h
index cb173027e2..e8156a801e 100644
--- a/src/main/tools/process-wrapper-options.h
+++ b/src/main/tools/process-wrapper-options.h
@@ -30,6 +30,8 @@ struct Options {
std::string stderr_path;
// Whether to print debugging messages (-d)
bool debug;
+ // Where to write stats, in protobuf format (-s)
+ std::string stats_path;
// Command to run (--)
std::vector<char *> args;
};
diff --git a/src/test/shell/integration/BUILD b/src/test/shell/integration/BUILD
index a04bb2c6b3..1d9b55d76c 100644
--- a/src/test/shell/integration/BUILD
+++ b/src/test/shell/integration/BUILD
@@ -288,9 +288,23 @@ sh_test(
name = "process_wrapper_test",
size = "medium",
srcs = ["process-wrapper_test.sh"],
- data = [":test-deps"],
+ data = [
+ ":spend_cpu_time",
+ ":test-deps",
+ "//src/main/protobuf:execution_statistics.proto",
+ ],
)
+cc_binary(
+ name = "spend_cpu_time",
+ testonly = 1,
+ srcs = ["spend_cpu_time.cc"],
+ malloc = "//base:system_malloc",
+)
+
+########################################################################
+# Test suites.
+
test_suite(
name = "all_tests",
visibility = ["//visibility:public"],
diff --git a/src/test/shell/integration/process-wrapper_test.sh b/src/test/shell/integration/process-wrapper_test.sh
index bff6099e5d..e6aeda064e 100755
--- a/src/test/shell/integration/process-wrapper_test.sh
+++ b/src/test/shell/integration/process-wrapper_test.sh
@@ -24,10 +24,19 @@ CURRENT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${CURRENT_DIR}/../integration_test_setup.sh" \
|| { echo "integration_test_setup.sh not found!" >&2; exit 1; }
+enable_errexit
+
+readonly STATS_PROTO_PATH="${CURRENT_DIR}/../../../main/protobuf/execution_statistics.proto"
+readonly STATS_PROTO_DIR="$(cd "$(dirname "${STATS_PROTO_PATH}")" && pwd)"
+
+readonly CPU_TIME_SPENDER="${CURRENT_DIR}/../../../test/shell/integration/spend_cpu_time"
+
readonly OUT_DIR="${TEST_TMPDIR}/out"
readonly OUT="${OUT_DIR}/outfile"
readonly ERR="${OUT_DIR}/errfile"
+readonly EXIT_STATUS_SIGALRM=$((128 + 14))
+
function set_up() {
rm -rf $OUT_DIR
mkdir -p $OUT_DIR
@@ -68,7 +77,7 @@ function test_signal_catcher() {
local code=0
$process_wrapper --timeout=1 --kill_delay=10 --stdout=$OUT --stderr=$ERR /bin/sh -c \
'trap "echo later; exit 0" INT TERM ALRM; sleep 10' &> $TEST_log || code=$?
- assert_equals 142 "$code" # SIGNAL_BASE + SIGALRM = 128 + 14
+ assert_equals "${EXIT_STATUS_SIGALRM}" "$code"
assert_stdout "later"
}
@@ -87,7 +96,7 @@ function test_timeout_grace() {
$process_wrapper --timeout=1 --kill_delay=10 --stdout=$OUT --stderr=$ERR /bin/sh -c \
'trap "echo before; sleep 1; echo after; exit 0" INT TERM ALRM; sleep 10' \
&> $TEST_log || code=$?
- assert_equals 142 "$code" # SIGNAL_BASE + SIGALRM = 128 + 14
+ assert_equals "${EXIT_STATUS_SIGALRM}" "$code"
assert_stdout 'before
after'
}
@@ -101,7 +110,7 @@ function test_timeout_kill() {
$process_wrapper --timeout=1 --kill_delay=2 --stdout=$OUT --stderr=$ERR /bin/sh -c \
'trap "echo before; sleep 10; echo after; exit 0" INT TERM ALRM; sleep 10' \
&> $TEST_log || code=$?
- assert_equals 142 "$code" # SIGNAL_BASE + SIGALRM = 128 + 14
+ assert_equals "${EXIT_STATUS_SIGALRM}" "$code"
assert_stdout "before"
}
@@ -112,4 +121,70 @@ function test_execvp_error_message() {
assert_contains "\"execvp(/bin/notexisting, ...)\": No such file or directory" "$ERR"
}
+function check_execution_time_for_command() {
+ local user_time_low="$1"; shift 1
+ local user_time_high="$1"; shift 1
+ local sys_time_low="$1"; shift 1
+ local sys_time_high="$1"; shift 1
+
+ local stats_out_path="${OUT_DIR}/statsfile"
+ local stats_out_decoded_path="${OUT_DIR}/statsfile.decoded"
+
+ # Wrapped process will be terminated after 100 seconds if not self terminated.
+ local code=0
+ "${process_wrapper}" --timeout=100 --kill_delay=2 --stdout="${OUT}" \
+ --stderr="${ERR}" --stats="${stats_out_path}" \
+ "$@" &> "${TEST_log}" || code="$?"
+ assert_equals 0 "${code}"
+
+ if [ ! -e "${stats_out_path}" ]; then
+ fail "Stats file not found: '${stats_out_path}'"
+ fi
+
+ "${protoc_compiler}" --proto_path="${STATS_PROTO_DIR}" \
+ --decode tools.protos.ExecutionStatistics execution_statistics.proto \
+ < "${stats_out_path}" > "${stats_out_decoded_path}"
+
+ if [ ! -e "${stats_out_decoded_path}" ]; then
+ fail "Decoded stats file not found: '${stats_out_decoded_path}'"
+ fi
+
+ local utime=0
+ if grep -q utime_sec "${stats_out_decoded_path}"; then
+ utime="$(grep utime_sec ${stats_out_decoded_path} | cut -f2 -d':' | \
+ tr -dc '0-9')"
+ fi
+
+ local stime=0
+ if grep -q stime_sec "${stats_out_decoded_path}"; then
+ stime="$(grep stime_sec ${stats_out_decoded_path} | cut -f2 -d':' | \
+ tr -dc '0-9')"
+ fi
+
+ if ! [ ${utime} -ge ${user_time_low} -a ${utime} -le ${user_time_high} ]; then
+ fail "reported utime of '${utime}' is out of expected range"
+ fi
+ if ! [ ${stime} -ge ${sys_time_low} -a ${stime} -le ${sys_time_high} ]; then
+ fail "reported stime of '${stime}' is out of expected range"
+ fi
+}
+
+function test_stats_high_user_time() {
+ # Tested with blaze test --runs_per_test 1000 on November 28, 2017.
+ check_execution_time_for_command 10 11 0 1 \
+ "${CPU_TIME_SPENDER}" 10 0
+}
+
+function test_stats_high_system_time() {
+ # Tested with blaze test --runs_per_test 1000 on November 28, 2017.
+ check_execution_time_for_command 0 1 10 11 \
+ "${CPU_TIME_SPENDER}" 0 10
+}
+
+function test_stats_high_user_time_and_high_system_time() {
+ # Tested with blaze test --runs_per_test 1000 on November 28, 2017.
+ check_execution_time_for_command 10 11 10 11 \
+ "${CPU_TIME_SPENDER}" 10 10
+}
+
run_suite "process-wrapper"
diff --git a/src/test/shell/integration/spend_cpu_time.cc b/src/test/shell/integration/spend_cpu_time.cc
new file mode 100644
index 0000000000..636a2a1692
--- /dev/null
+++ b/src/test/shell/integration/spend_cpu_time.cc
@@ -0,0 +1,117 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/resource.h>
+#include <err.h>
+#include <unistd.h>
+
+#include <cerrno>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+// Marked as volatile to force the C compiler to calculate it.
+volatile uint64_t volatile_counter;
+
+static void WasteUserTime() {
+ volatile_counter = 0;
+ while (true) {
+ volatile_counter++;
+ if (volatile_counter == 10000000) {
+ break;
+ }
+ }
+}
+
+static void WasteSystemTime() {
+ char current_dir_path[MAXPATHLEN];
+ if (getcwd(current_dir_path, sizeof(current_dir_path)) == NULL) {
+ err(EXIT_FAILURE, "getcwd() failed");
+ }
+
+ volatile_counter = 0;
+ while (true) {
+ // Arbitrary syscall to waste system time.
+ if (chdir(current_dir_path) != 0) {
+ err(EXIT_FAILURE, "chdir() failed");
+ }
+ volatile_counter++;
+ if (volatile_counter == 100000) {
+ break;
+ }
+ }
+}
+
+static void GetResourceUsage(struct rusage *rusage) {
+ if (getrusage(RUSAGE_SELF, rusage) != 0) {
+ err(EXIT_FAILURE, "getrusage() failed");
+ }
+}
+
+static int GetUsedUserTimeSeconds() {
+ struct rusage my_rusage;
+ GetResourceUsage(&my_rusage);
+ return my_rusage.ru_utime.tv_sec;
+}
+
+static int GetUsedSystemTimeSeconds() {
+ struct rusage my_rusage;
+ GetResourceUsage(&my_rusage);
+ return my_rusage.ru_stime.tv_sec;
+}
+
+// This program just wastes (at least) the desired amount of CPU time, by
+// checking its own resource usage (rusage) while running.
+int main(int argc, char **argv) {
+ // Parse command-line arguments.
+ const char *progname = argv[0] ? argv[0] : "spend_cpu_time";
+ if (argc != 3) {
+ fprintf(stderr, "Usage: %s <user_time_seconds> <system_time_seconds>\n",
+ progname);
+ exit(EXIT_FAILURE);
+ }
+
+ int requested_user_time_seconds \
+ = atoi(argv[1]); // NOLINT(runtime/deprecated_fn)
+ int requested_system_time_seconds \
+ = atoi(argv[2]); // NOLINT(runtime/deprecated_fn)
+
+ // Waste system time first, because this also wastes some user time.
+ if (requested_system_time_seconds > 0) {
+ int spent_system_time_seconds = 0;
+ while (spent_system_time_seconds < requested_system_time_seconds) {
+ WasteSystemTime();
+ spent_system_time_seconds = GetUsedSystemTimeSeconds();
+ }
+ }
+
+ // Waste user time if we haven't already wasted enough.
+ if (requested_user_time_seconds > 0) {
+ int spent_user_time_seconds = 0;
+ while (spent_user_time_seconds < requested_user_time_seconds) {
+ WasteUserTime();
+ spent_user_time_seconds = GetUsedUserTimeSeconds();
+ }
+ }
+
+ int spent_user_time_seconds = GetUsedUserTimeSeconds();
+ int spent_system_time_seconds = GetUsedSystemTimeSeconds();
+ printf("Total user time wasted: %d seconds\n", spent_user_time_seconds);
+ printf("Total system time wasted: %d seconds\n", spent_system_time_seconds);
+
+ exit(EXIT_SUCCESS);
+}