From 8cc772ef98604678d99b6a685e412a11a6508ba5 Mon Sep 17 00:00:00 2001 From: Janak Ramakrishnan Date: Wed, 23 Mar 2016 17:26:12 +0000 Subject: Add startup option --experimental_oom_more_eagerly_threshold, with default value 90. When --experimental_oom_more_eagerly is enabled, if after two full GCs the old gen is still >=--experimental_oom_more_eagerly_threshold% full, exit the JVM. -- MOS_MIGRATED_REVID=117943361 --- src/main/cpp/blaze.cc | 3 + src/main/cpp/blaze_startup_options.cc | 1 + src/main/cpp/blaze_startup_options.h | 2 + src/main/cpp/blaze_startup_options_common.cc | 14 +++ .../devtools/build/lib/runtime/BlazeRuntime.java | 1 + .../lib/runtime/BlazeServerStartupOptions.java | 10 ++ .../build/lib/runtime/RetainedHeapLimiter.java | 130 +++++++++++++++++++++ 7 files changed, 161 insertions(+) create mode 100644 src/main/java/com/google/devtools/build/lib/runtime/RetainedHeapLimiter.java (limited to 'src') diff --git a/src/main/cpp/blaze.cc b/src/main/cpp/blaze.cc index 0fa9b75535..ab185c2fbf 100644 --- a/src/main/cpp/blaze.cc +++ b/src/main/cpp/blaze.cc @@ -280,7 +280,10 @@ static vector GetArgumentArray() { } if (globals->options.oom_more_eagerly) { result.push_back("--experimental_oom_more_eagerly"); + result.push_back("--experimental_oom_more_eagerly_threshold=" + + ToString(globals->options.oom_more_eagerly_threshold)); } + if (globals->options.watchfs) { result.push_back("--watchfs"); } diff --git a/src/main/cpp/blaze_startup_options.cc b/src/main/cpp/blaze_startup_options.cc index 7a27abe1ec..2a16cf5e58 100644 --- a/src/main/cpp/blaze_startup_options.cc +++ b/src/main/cpp/blaze_startup_options.cc @@ -52,6 +52,7 @@ BlazeStartupOptions::BlazeStartupOptions(const BlazeStartupOptions &rhs) io_nice_level(rhs.io_nice_level), max_idle_secs(rhs.max_idle_secs), oom_more_eagerly(rhs.oom_more_eagerly), + oom_more_eagerly_threshold(rhs.oom_more_eagerly_threshold), watchfs(rhs.watchfs), allow_configurable_attributes(rhs.allow_configurable_attributes), option_sources(rhs.option_sources), diff --git a/src/main/cpp/blaze_startup_options.h b/src/main/cpp/blaze_startup_options.h index 3e984879b8..41629b2f9b 100644 --- a/src/main/cpp/blaze_startup_options.h +++ b/src/main/cpp/blaze_startup_options.h @@ -148,6 +148,8 @@ class BlazeStartupOptions { bool oom_more_eagerly; + int oom_more_eagerly_threshold; + // If true, Blaze will listen to OS-level file change notifications. bool watchfs; diff --git a/src/main/cpp/blaze_startup_options_common.cc b/src/main/cpp/blaze_startup_options_common.cc index 4b0633593b..086f719b07 100644 --- a/src/main/cpp/blaze_startup_options_common.cc +++ b/src/main/cpp/blaze_startup_options_common.cc @@ -49,6 +49,7 @@ void BlazeStartupOptions::Init() { io_nice_level = -1; // 3 hours (but only 5 seconds if used within a test) max_idle_secs = testing ? 5 : (3 * 3600); + oom_more_eagerly_threshold = 90; webstatus_port = 0; oom_more_eagerly = false; watchfs = false; @@ -212,6 +213,19 @@ blaze_exit_code::ExitCode BlazeStartupOptions::ProcessArg( } else if (GetNullaryOption(arg, "--noexperimental_oom_more_eagerly")) { oom_more_eagerly = false; option_sources["experimental_oom_more_eagerly"] = rcfile; + } else if (GetUnaryOption(arg, next_arg, + "--experimental_oom_more_eagerly_threshold") != + NULL) { + if (!blaze_util::safe_strto32(value, &oom_more_eagerly_threshold) || + oom_more_eagerly_threshold < 0) { + blaze_util::StringPrintf(error, + "Invalid argument to " + "--experimental_oom_more_eagerly_threshold: " + "'%s'.", + value); + return blaze_exit_code::BAD_ARGV; + } + option_sources["experimental_oom_more_eagerly_threshold"] = rcfile; } else if (GetNullaryOption(arg, "--watchfs")) { watchfs = true; option_sources["watchfs"] = rcfile; diff --git a/src/main/java/com/google/devtools/build/lib/runtime/BlazeRuntime.java b/src/main/java/com/google/devtools/build/lib/runtime/BlazeRuntime.java index 5a0e51262f..5b291c039a 100644 --- a/src/main/java/com/google/devtools/build/lib/runtime/BlazeRuntime.java +++ b/src/main/java/com/google/devtools/build/lib/runtime/BlazeRuntime.java @@ -1133,6 +1133,7 @@ public final class BlazeRuntime { BlazeServerStartupOptions startupOptions = options.getOptions(BlazeServerStartupOptions.class); if (startupOptions.batch && startupOptions.oomMoreEagerly) { new OomSignalHandler(); + new RetainedHeapLimiter(startupOptions.oomMoreEagerlyThreshold).install(); } PathFragment workspaceDirectory = startupOptions.workspaceDirectory; PathFragment installBase = startupOptions.installBase; diff --git a/src/main/java/com/google/devtools/build/lib/runtime/BlazeServerStartupOptions.java b/src/main/java/com/google/devtools/build/lib/runtime/BlazeServerStartupOptions.java index bd07e166e1..47e3cacaa7 100644 --- a/src/main/java/com/google/devtools/build/lib/runtime/BlazeServerStartupOptions.java +++ b/src/main/java/com/google/devtools/build/lib/runtime/BlazeServerStartupOptions.java @@ -177,6 +177,16 @@ public class BlazeServerStartupOptions extends OptionsBase { ) public boolean oomMoreEagerly; + @Option( + name = "experimental_oom_more_eagerly_threshold", + defaultValue = "90", // NOTE: purely decorative! See class docstring. + category = "server startup", + help = + "If --experimental_oom_more_eagerly is set, Blaze will OOM if, after two full GC's, more " + + "than this percentage of the (old gen) heap is still occupied." + ) + public int oomMoreEagerlyThreshold; + @Option(name = "block_for_lock", defaultValue = "true", // NOTE: purely decorative! See class docstring. category = "server startup", diff --git a/src/main/java/com/google/devtools/build/lib/runtime/RetainedHeapLimiter.java b/src/main/java/com/google/devtools/build/lib/runtime/RetainedHeapLimiter.java new file mode 100644 index 0000000000..a98e540737 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/runtime/RetainedHeapLimiter.java @@ -0,0 +1,130 @@ +// Copyright 2016 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.devtools.build.lib.runtime; + +import com.google.devtools.build.lib.util.Preconditions; +import com.sun.management.GarbageCollectionNotificationInfo; +import java.lang.management.GarbageCollectorMXBean; +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryUsage; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.logging.Logger; +import javax.management.Notification; +import javax.management.NotificationEmitter; +import javax.management.NotificationListener; +import javax.management.openmbean.CompositeData; + +/** + * Monitor the size of the retained heap and exit promptly if it grows too large. Specifically, + * check the size of the tenured space after each major GC; if it exceeds 90%, call + * {@code System.gc()} to trigger a stop-the-world collection; if it's still more than 90% full, + * exit with an {@link OutOfMemoryError}. + */ +class RetainedHeapLimiter implements NotificationListener { + private static final Logger LOG = Logger.getLogger(RetainedHeapLimiter.class.getName()); + private static final long MIN_TIME_BETWEEN_TRIGGERED_GC_MILLISECONDS = 60000; + + private boolean installed = false; + private final AtomicBoolean throwingOom = new AtomicBoolean(false); + private long lastTriggeredGcInMilliseconds = 0; + private final int occupiedHeapPercentageThreshold; + + RetainedHeapLimiter(int occupiedHeapPercentageThreshold) { + this.occupiedHeapPercentageThreshold = occupiedHeapPercentageThreshold; + } + + void install() { + Preconditions.checkState(!installed, "RetainedHeapLimiter installed twice"); + installed = true; + List gcbeans = ManagementFactory.getGarbageCollectorMXBeans(); + boolean foundTenured = false; + // Examine all collectors and register for notifications from those which collect the tenured + // space. Normally there is one such collector. + for (GarbageCollectorMXBean gcbean : gcbeans) { + boolean collectsTenured = false; + for (String name : gcbean.getMemoryPoolNames()) { + collectsTenured |= isTenuredSpace(name); + } + if (collectsTenured) { + foundTenured = true; + NotificationEmitter emitter = (NotificationEmitter) gcbean; + emitter.addNotificationListener(this, null, null); + } + } + if (!foundTenured) { + throw new IllegalStateException( + "Can't find tenured space; update this class for a new collector"); + } + } + + @Override + public void handleNotification(Notification notification, Object handback) { + if (!notification + .getType() + .equals(GarbageCollectionNotificationInfo.GARBAGE_COLLECTION_NOTIFICATION)) { + return; + } + GarbageCollectionNotificationInfo info = + GarbageCollectionNotificationInfo.from((CompositeData) notification.getUserData()); + Map spaces = info.getGcInfo().getMemoryUsageAfterGc(); + for (Map.Entry entry : spaces.entrySet()) { + if (isTenuredSpace(entry.getKey())) { + MemoryUsage space = entry.getValue(); + if (space.getMax() == 0) { + // The CMS collector sometimes passes us nonsense stats. + continue; + } + + long percentUsed = 100 * space.getUsed() / space.getMax(); + if (percentUsed > occupiedHeapPercentageThreshold) { + if (info.getGcCause().equals("System.gc()") && !throwingOom.getAndSet(true)) { + // Assume we got here from a GC initiated by the other branch. + String exitMsg = + String.format( + "RetainedHeapLimiter forcing exit due to GC thrashing: tenured space " + + "%s out of %s (>%s%%) occupied after back-to-back full GCs", + space.getUsed(), + space.getMax(), + occupiedHeapPercentageThreshold); + System.err.println(exitMsg); + LOG.info(exitMsg); + // Exits the runtime. + BugReport.handleCrash(new OutOfMemoryError(exitMsg)); + } else if (System.currentTimeMillis() - lastTriggeredGcInMilliseconds + > MIN_TIME_BETWEEN_TRIGGERED_GC_MILLISECONDS) { + LOG.info( + "Triggering a full GC with " + + space.getUsed() + + " out of " + + space.getMax() + + " used"); + // Force a full stop-the-world GC and see if it can get us below the threshold. + System.gc(); + lastTriggeredGcInMilliseconds = System.currentTimeMillis(); + } + } + } + } + } + + private static boolean isTenuredSpace(String name) { + return "CMS Old Gen".equals(name) + || "G1 Old Gen".equals(name) + || "PS Old Gen".equals(name) + || "Tenured Gen".equals(name); + } +} -- cgit v1.2.3