diff options
author | 2018-07-01 21:29:19 -0700 | |
---|---|---|
committer | 2018-07-01 21:30:22 -0700 | |
commit | 4f64b77a3dd8e4ccdc8077051927985f9578a3a5 (patch) | |
tree | 29bd53b6118db8d9f6d135425d10f2842c575e74 /src/main/java/com/google/devtools/build/skyframe | |
parent | b9166943e5dc23f7ce8ef2dce3cb98e5ce26e50b (diff) |
Native Skyframe support for node restarting
Useful for attempting to recover relationships between Skyframe graph
state and external systems, when the evaluation of a Skyframe node has
the side effect of creating that relationship.
Currently, only supported in graph evaluations when reverse dependency
edges are not tracked.
RELNOTES: None.
PiperOrigin-RevId: 202892953
Diffstat (limited to 'src/main/java/com/google/devtools/build/skyframe')
4 files changed, 84 insertions, 28 deletions
diff --git a/src/main/java/com/google/devtools/build/skyframe/AbstractParallelEvaluator.java b/src/main/java/com/google/devtools/build/skyframe/AbstractParallelEvaluator.java index 1a2cab9b4b..c2f8561c32 100644 --- a/src/main/java/com/google/devtools/build/skyframe/AbstractParallelEvaluator.java +++ b/src/main/java/com/google/devtools/build/skyframe/AbstractParallelEvaluator.java @@ -33,12 +33,14 @@ import com.google.devtools.build.skyframe.NodeEntry.DependencyState; import com.google.devtools.build.skyframe.NodeEntry.DirtyState; import com.google.devtools.build.skyframe.ParallelEvaluatorContext.EnqueueParentBehavior; import com.google.devtools.build.skyframe.QueryableGraph.Reason; +import com.google.devtools.build.skyframe.SkyFunction.Restart; import com.google.devtools.build.skyframe.SkyFunctionEnvironment.UndonePreviouslyRequestedDep; import com.google.devtools.build.skyframe.SkyFunctionException.ReifiedSkyFunctionException; import java.time.Duration; import java.util.Collection; import java.util.Iterator; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.concurrent.ForkJoinPool; import java.util.logging.Logger; @@ -179,8 +181,10 @@ public abstract class AbstractParallelEvaluator { throws InterruptedException { return depGroup.size() == 1 && depGroup.contains(ErrorTransienceValue.KEY) - && !graph.get( - null, Reason.OTHER, ErrorTransienceValue.KEY).getVersion().atMost(entry.getVersion()); + && !graph + .get(null, Reason.OTHER, ErrorTransienceValue.KEY) + .getVersion() + .atMost(entry.getVersion()); } private DirtyOutcome maybeHandleDirtyNode(NodeEntry state) throws InterruptedException { @@ -363,20 +367,21 @@ public abstract class AbstractParallelEvaluator { Set<SkyKey> oldDeps = state.getAllRemainingDirtyDirectDeps(); SkyFunctionEnvironment env; try { - evaluatorContext.getProgressReceiver().stateStarting(skyKey, - NodeState.INITIALIZING_ENVIRONMENT); + evaluatorContext + .getProgressReceiver() + .stateStarting(skyKey, NodeState.INITIALIZING_ENVIRONMENT); env = new SkyFunctionEnvironment( skyKey, state.getTemporaryDirectDeps(), oldDeps, evaluatorContext); } catch (UndonePreviouslyRequestedDep undonePreviouslyRequestedDep) { // If a previously requested dep is no longer done, restart this node from scratch. - maybeEraseNodeToRestartFromScratch( - skyKey, state, SkyFunction.SENTINEL_FOR_RESTART_FROM_SCRATCH); + restart(skyKey, state); evaluatorContext.getVisitor().enqueueEvaluation(skyKey); return; } finally { - evaluatorContext.getProgressReceiver().stateEnding(skyKey, - NodeState.INITIALIZING_ENVIRONMENT, -1); + evaluatorContext + .getProgressReceiver() + .stateEnding(skyKey, NodeState.INITIALIZING_ENVIRONMENT, -1); } SkyFunctionName functionName = skyKey.functionName(); SkyFunction factory = @@ -476,7 +481,7 @@ public abstract class AbstractParallelEvaluator { env.doneBuilding(); } - if (maybeEraseNodeToRestartFromScratch(skyKey, state, value)) { + if (maybeHandleRestart(skyKey, state, value)) { evaluatorContext.getVisitor().enqueueEvaluation(skyKey); return; } @@ -652,15 +657,36 @@ public abstract class AbstractParallelEvaluator { private static final int MAX_REVERSEDEP_DUMP_LENGTH = 1000; } - private boolean maybeEraseNodeToRestartFromScratch( - SkyKey key, NodeEntry entry, SkyValue returnedValue) { - if (!SkyFunction.SENTINEL_FOR_RESTART_FROM_SCRATCH.equals(returnedValue)) { + /** + * If {@code returnedValue} is a {@link Restart} value, then {@code entry} will be reset, and the + * nodes specified by {@code returnedValue.getAdditionalKeysToRestart()} will be marked changed. + * + * @return {@code returnedValue instanceof Restart} + */ + private boolean maybeHandleRestart(SkyKey key, NodeEntry entry, SkyValue returnedValue) + throws InterruptedException { + if (!(returnedValue instanceof Restart)) { return false; } - evaluatorContext - .getGraphInconsistencyReceiver() - .noteInconsistencyAndMaybeThrow(key, /*otherKey=*/ null, Inconsistency.RESET_REQUESTED); - entry.resetForRestartFromScratch(); + restart(key, entry); + + Restart restart = (Restart) returnedValue; + + Map<SkyKey, ? extends NodeEntry> additionalNodesToRestart = + this.evaluatorContext + .getBatchValues(key, Reason.INVALIDATION, restart.getAdditionalKeysToRestart()); + for (Entry<SkyKey, ? extends NodeEntry> restartEntry : additionalNodesToRestart.entrySet()) { + evaluatorContext + .getGraphInconsistencyReceiver() + .noteInconsistencyAndMaybeThrow( + restartEntry.getKey(), + /*otherKey=*/ key, + Inconsistency.CHILD_FORCED_REEVALUATION_BY_PARENT); + // Nodes are marked changed to ensure that they run. (Also, marking dirty-but-not-changed + // would fail if the node has no deps, because dirtying works only when nodes have deps.) + restartEntry.getValue().markDirty(/*isChanged=*/ true); + } + // TODO(mschaller): rdeps of children have to be handled here. If the graph does not keep edges, // nothing has to be done, since there are no reverse deps to keep consistent. If the graph // keeps edges, it's a harder problem. The reverse deps could just be removed, but in the case @@ -670,6 +696,13 @@ public abstract class AbstractParallelEvaluator { return true; } + private void restart(SkyKey key, NodeEntry entry) { + evaluatorContext + .getGraphInconsistencyReceiver() + .noteInconsistencyAndMaybeThrow(key, /*otherKey=*/ null, Inconsistency.RESET_REQUESTED); + entry.resetForRestartFromScratch(); + } + void propagateEvaluatorContextCrashIfAny() { if (!evaluatorContext.getVisitor().getCrashes().isEmpty()) { evaluatorContext diff --git a/src/main/java/com/google/devtools/build/skyframe/GraphInconsistencyReceiver.java b/src/main/java/com/google/devtools/build/skyframe/GraphInconsistencyReceiver.java index 5d99c7d2ad..c668b26e0e 100644 --- a/src/main/java/com/google/devtools/build/skyframe/GraphInconsistencyReceiver.java +++ b/src/main/java/com/google/devtools/build/skyframe/GraphInconsistencyReceiver.java @@ -31,6 +31,7 @@ public interface GraphInconsistencyReceiver { enum Inconsistency { RESET_REQUESTED, CHILD_MISSING_FOR_DIRTY_NODE, + CHILD_FORCED_REEVALUATION_BY_PARENT, CHILD_UNDONE_FOR_BUILDING_NODE } diff --git a/src/main/java/com/google/devtools/build/skyframe/NodeEntry.java b/src/main/java/com/google/devtools/build/skyframe/NodeEntry.java index a9f1d78bfd..1e6169496b 100644 --- a/src/main/java/com/google/devtools/build/skyframe/NodeEntry.java +++ b/src/main/java/com/google/devtools/build/skyframe/NodeEntry.java @@ -362,8 +362,8 @@ public interface NodeEntry extends ThinNodeEntry { /** * Erases all stored work during this evaluation from this entry, namely all temporary direct * deps. The entry will be as if it had never evaluated at this version. Called after the {@link - * SkyFunction} for this entry returns {@link SkyFunction#SENTINEL_FOR_RESTART_FROM_SCRATCH}, - * indicating that something went wrong in external state and the evaluation has to be restarted. + * SkyFunction} for this entry returns {@link SkyFunction.Restart}, indicating that something went + * wrong in external state and the evaluation has to be restarted. */ @ThreadSafe void resetForRestartFromScratch(); diff --git a/src/main/java/com/google/devtools/build/skyframe/SkyFunction.java b/src/main/java/com/google/devtools/build/skyframe/SkyFunction.java index 42019886ec..6c34940c10 100644 --- a/src/main/java/com/google/devtools/build/skyframe/SkyFunction.java +++ b/src/main/java/com/google/devtools/build/skyframe/SkyFunction.java @@ -14,6 +14,7 @@ package com.google.devtools.build.skyframe; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe; import com.google.devtools.build.lib.events.ExtendedEventHandler; import com.google.devtools.build.lib.util.GroupedList; @@ -46,8 +47,8 @@ public interface SkyFunction { * another containing that additional context. If it has no such additional context, then it * should allow its dependency's exception to be thrown through it. * - * <p>This method may return {@link #SENTINEL_FOR_RESTART_FROM_SCRATCH} in rare circumstances. See - * its docs. Do not return this value unless you know exactly what you are doing. + * <p>This method may return {@link Restart} in rare circumstances. See its docs. Do not return + * values of this type unless you know exactly what you are doing. * * @throws SkyFunctionException on failure * @throws InterruptedException if interrupted @@ -69,15 +70,36 @@ public interface SkyFunction { String extractTag(SkyKey skyKey); /** - * Sentinel value for {@link #compute} to return, indicating that something went wrong in external - * state and the evaluation has to be restarted from scratch, ignoring any deps that the {@link - * #compute} function may have declared during evaluation at this version (including deps declared - * during previous calls that returned null). Common causes for returning this would be data loss, - * if a dep's data is not actually available externally. In this case, the {@link SkyFunction} - * will likely dirty the unusable dep to force its re-evalution when re-requested by the restarted - * entry's computation. + * Sentinel {@link SkyValue} type for {@link #compute} to return, indicating that something went + * wrong, and that the evaluation returning this value must be restarted, and the nodes associated + * with the specified keys (which should be direct or transitive dependencies of the failed + * evaluation) must also be restarted. + * + * <p>An intended cause for returning this is external data loss; e.g., if a dependency's + * "done-ness" is intended to mean that certain data is available in an external system, but + * during evaluation of a node that depends on that external data, that data has gone missing, and + * reevaluation of the dependency is expected to repair the discrepancy. + * + * <p>Values of this type will <em>never</em> be returned by {@link Environment}'s getValue + * methods or from {@link NodeEntry#getValue()}. + * + * <p>TODO(mschaller): the ability to specify arbitrary additional keys to restart is error-prone. + * It would be safer to require nodes requesting restarts to provide dependency paths, which the + * framework could efficiently verify before restarting. */ - SkyValue SENTINEL_FOR_RESTART_FROM_SCRATCH = new SkyValue() {}; + interface Restart extends SkyValue { + Restart SELF = ImmutableList::of; + + static Restart selfAnd(SkyKey... additionalKeysToRestart) { + return selfAnd(ImmutableList.copyOf(additionalKeysToRestart)); + } + + static Restart selfAnd(ImmutableList<SkyKey> additionalKeysToRestart) { + return () -> additionalKeysToRestart; + } + + ImmutableList<SkyKey> getAdditionalKeysToRestart(); + } /** * The services provided to the {@link SkyFunction#compute} implementation by the Skyframe |