aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/main/java/com/google/devtools/build/skyframe
diff options
context:
space:
mode:
authorGravatar mschaller <mschaller@google.com>2018-07-01 21:29:19 -0700
committerGravatar Copybara-Service <copybara-piper@google.com>2018-07-01 21:30:22 -0700
commit4f64b77a3dd8e4ccdc8077051927985f9578a3a5 (patch)
tree29bd53b6118db8d9f6d135425d10f2842c575e74 /src/main/java/com/google/devtools/build/skyframe
parentb9166943e5dc23f7ce8ef2dce3cb98e5ce26e50b (diff)
Native Skyframe support for node restarting
Useful for attempting to recover relationships between Skyframe graph state and external systems, when the evaluation of a Skyframe node has the side effect of creating that relationship. Currently, only supported in graph evaluations when reverse dependency edges are not tracked. RELNOTES: None. PiperOrigin-RevId: 202892953
Diffstat (limited to 'src/main/java/com/google/devtools/build/skyframe')
-rw-r--r--src/main/java/com/google/devtools/build/skyframe/AbstractParallelEvaluator.java65
-rw-r--r--src/main/java/com/google/devtools/build/skyframe/GraphInconsistencyReceiver.java1
-rw-r--r--src/main/java/com/google/devtools/build/skyframe/NodeEntry.java4
-rw-r--r--src/main/java/com/google/devtools/build/skyframe/SkyFunction.java42
4 files changed, 84 insertions, 28 deletions
diff --git a/src/main/java/com/google/devtools/build/skyframe/AbstractParallelEvaluator.java b/src/main/java/com/google/devtools/build/skyframe/AbstractParallelEvaluator.java
index 1a2cab9b4b..c2f8561c32 100644
--- a/src/main/java/com/google/devtools/build/skyframe/AbstractParallelEvaluator.java
+++ b/src/main/java/com/google/devtools/build/skyframe/AbstractParallelEvaluator.java
@@ -33,12 +33,14 @@ import com.google.devtools.build.skyframe.NodeEntry.DependencyState;
import com.google.devtools.build.skyframe.NodeEntry.DirtyState;
import com.google.devtools.build.skyframe.ParallelEvaluatorContext.EnqueueParentBehavior;
import com.google.devtools.build.skyframe.QueryableGraph.Reason;
+import com.google.devtools.build.skyframe.SkyFunction.Restart;
import com.google.devtools.build.skyframe.SkyFunctionEnvironment.UndonePreviouslyRequestedDep;
import com.google.devtools.build.skyframe.SkyFunctionException.ReifiedSkyFunctionException;
import java.time.Duration;
import java.util.Collection;
import java.util.Iterator;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.ForkJoinPool;
import java.util.logging.Logger;
@@ -179,8 +181,10 @@ public abstract class AbstractParallelEvaluator {
throws InterruptedException {
return depGroup.size() == 1
&& depGroup.contains(ErrorTransienceValue.KEY)
- && !graph.get(
- null, Reason.OTHER, ErrorTransienceValue.KEY).getVersion().atMost(entry.getVersion());
+ && !graph
+ .get(null, Reason.OTHER, ErrorTransienceValue.KEY)
+ .getVersion()
+ .atMost(entry.getVersion());
}
private DirtyOutcome maybeHandleDirtyNode(NodeEntry state) throws InterruptedException {
@@ -363,20 +367,21 @@ public abstract class AbstractParallelEvaluator {
Set<SkyKey> oldDeps = state.getAllRemainingDirtyDirectDeps();
SkyFunctionEnvironment env;
try {
- evaluatorContext.getProgressReceiver().stateStarting(skyKey,
- NodeState.INITIALIZING_ENVIRONMENT);
+ evaluatorContext
+ .getProgressReceiver()
+ .stateStarting(skyKey, NodeState.INITIALIZING_ENVIRONMENT);
env =
new SkyFunctionEnvironment(
skyKey, state.getTemporaryDirectDeps(), oldDeps, evaluatorContext);
} catch (UndonePreviouslyRequestedDep undonePreviouslyRequestedDep) {
// If a previously requested dep is no longer done, restart this node from scratch.
- maybeEraseNodeToRestartFromScratch(
- skyKey, state, SkyFunction.SENTINEL_FOR_RESTART_FROM_SCRATCH);
+ restart(skyKey, state);
evaluatorContext.getVisitor().enqueueEvaluation(skyKey);
return;
} finally {
- evaluatorContext.getProgressReceiver().stateEnding(skyKey,
- NodeState.INITIALIZING_ENVIRONMENT, -1);
+ evaluatorContext
+ .getProgressReceiver()
+ .stateEnding(skyKey, NodeState.INITIALIZING_ENVIRONMENT, -1);
}
SkyFunctionName functionName = skyKey.functionName();
SkyFunction factory =
@@ -476,7 +481,7 @@ public abstract class AbstractParallelEvaluator {
env.doneBuilding();
}
- if (maybeEraseNodeToRestartFromScratch(skyKey, state, value)) {
+ if (maybeHandleRestart(skyKey, state, value)) {
evaluatorContext.getVisitor().enqueueEvaluation(skyKey);
return;
}
@@ -652,15 +657,36 @@ public abstract class AbstractParallelEvaluator {
private static final int MAX_REVERSEDEP_DUMP_LENGTH = 1000;
}
- private boolean maybeEraseNodeToRestartFromScratch(
- SkyKey key, NodeEntry entry, SkyValue returnedValue) {
- if (!SkyFunction.SENTINEL_FOR_RESTART_FROM_SCRATCH.equals(returnedValue)) {
+ /**
+ * If {@code returnedValue} is a {@link Restart} value, then {@code entry} will be reset, and the
+ * nodes specified by {@code returnedValue.getAdditionalKeysToRestart()} will be marked changed.
+ *
+ * @return {@code returnedValue instanceof Restart}
+ */
+ private boolean maybeHandleRestart(SkyKey key, NodeEntry entry, SkyValue returnedValue)
+ throws InterruptedException {
+ if (!(returnedValue instanceof Restart)) {
return false;
}
- evaluatorContext
- .getGraphInconsistencyReceiver()
- .noteInconsistencyAndMaybeThrow(key, /*otherKey=*/ null, Inconsistency.RESET_REQUESTED);
- entry.resetForRestartFromScratch();
+ restart(key, entry);
+
+ Restart restart = (Restart) returnedValue;
+
+ Map<SkyKey, ? extends NodeEntry> additionalNodesToRestart =
+ this.evaluatorContext
+ .getBatchValues(key, Reason.INVALIDATION, restart.getAdditionalKeysToRestart());
+ for (Entry<SkyKey, ? extends NodeEntry> restartEntry : additionalNodesToRestart.entrySet()) {
+ evaluatorContext
+ .getGraphInconsistencyReceiver()
+ .noteInconsistencyAndMaybeThrow(
+ restartEntry.getKey(),
+ /*otherKey=*/ key,
+ Inconsistency.CHILD_FORCED_REEVALUATION_BY_PARENT);
+ // Nodes are marked changed to ensure that they run. (Also, marking dirty-but-not-changed
+ // would fail if the node has no deps, because dirtying works only when nodes have deps.)
+ restartEntry.getValue().markDirty(/*isChanged=*/ true);
+ }
+
// TODO(mschaller): rdeps of children have to be handled here. If the graph does not keep edges,
// nothing has to be done, since there are no reverse deps to keep consistent. If the graph
// keeps edges, it's a harder problem. The reverse deps could just be removed, but in the case
@@ -670,6 +696,13 @@ public abstract class AbstractParallelEvaluator {
return true;
}
+ private void restart(SkyKey key, NodeEntry entry) {
+ evaluatorContext
+ .getGraphInconsistencyReceiver()
+ .noteInconsistencyAndMaybeThrow(key, /*otherKey=*/ null, Inconsistency.RESET_REQUESTED);
+ entry.resetForRestartFromScratch();
+ }
+
void propagateEvaluatorContextCrashIfAny() {
if (!evaluatorContext.getVisitor().getCrashes().isEmpty()) {
evaluatorContext
diff --git a/src/main/java/com/google/devtools/build/skyframe/GraphInconsistencyReceiver.java b/src/main/java/com/google/devtools/build/skyframe/GraphInconsistencyReceiver.java
index 5d99c7d2ad..c668b26e0e 100644
--- a/src/main/java/com/google/devtools/build/skyframe/GraphInconsistencyReceiver.java
+++ b/src/main/java/com/google/devtools/build/skyframe/GraphInconsistencyReceiver.java
@@ -31,6 +31,7 @@ public interface GraphInconsistencyReceiver {
enum Inconsistency {
RESET_REQUESTED,
CHILD_MISSING_FOR_DIRTY_NODE,
+ CHILD_FORCED_REEVALUATION_BY_PARENT,
CHILD_UNDONE_FOR_BUILDING_NODE
}
diff --git a/src/main/java/com/google/devtools/build/skyframe/NodeEntry.java b/src/main/java/com/google/devtools/build/skyframe/NodeEntry.java
index a9f1d78bfd..1e6169496b 100644
--- a/src/main/java/com/google/devtools/build/skyframe/NodeEntry.java
+++ b/src/main/java/com/google/devtools/build/skyframe/NodeEntry.java
@@ -362,8 +362,8 @@ public interface NodeEntry extends ThinNodeEntry {
/**
* Erases all stored work during this evaluation from this entry, namely all temporary direct
* deps. The entry will be as if it had never evaluated at this version. Called after the {@link
- * SkyFunction} for this entry returns {@link SkyFunction#SENTINEL_FOR_RESTART_FROM_SCRATCH},
- * indicating that something went wrong in external state and the evaluation has to be restarted.
+ * SkyFunction} for this entry returns {@link SkyFunction.Restart}, indicating that something went
+ * wrong in external state and the evaluation has to be restarted.
*/
@ThreadSafe
void resetForRestartFromScratch();
diff --git a/src/main/java/com/google/devtools/build/skyframe/SkyFunction.java b/src/main/java/com/google/devtools/build/skyframe/SkyFunction.java
index 42019886ec..6c34940c10 100644
--- a/src/main/java/com/google/devtools/build/skyframe/SkyFunction.java
+++ b/src/main/java/com/google/devtools/build/skyframe/SkyFunction.java
@@ -14,6 +14,7 @@
package com.google.devtools.build.skyframe;
import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableList;
import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe;
import com.google.devtools.build.lib.events.ExtendedEventHandler;
import com.google.devtools.build.lib.util.GroupedList;
@@ -46,8 +47,8 @@ public interface SkyFunction {
* another containing that additional context. If it has no such additional context, then it
* should allow its dependency's exception to be thrown through it.
*
- * <p>This method may return {@link #SENTINEL_FOR_RESTART_FROM_SCRATCH} in rare circumstances. See
- * its docs. Do not return this value unless you know exactly what you are doing.
+ * <p>This method may return {@link Restart} in rare circumstances. See its docs. Do not return
+ * values of this type unless you know exactly what you are doing.
*
* @throws SkyFunctionException on failure
* @throws InterruptedException if interrupted
@@ -69,15 +70,36 @@ public interface SkyFunction {
String extractTag(SkyKey skyKey);
/**
- * Sentinel value for {@link #compute} to return, indicating that something went wrong in external
- * state and the evaluation has to be restarted from scratch, ignoring any deps that the {@link
- * #compute} function may have declared during evaluation at this version (including deps declared
- * during previous calls that returned null). Common causes for returning this would be data loss,
- * if a dep's data is not actually available externally. In this case, the {@link SkyFunction}
- * will likely dirty the unusable dep to force its re-evalution when re-requested by the restarted
- * entry's computation.
+ * Sentinel {@link SkyValue} type for {@link #compute} to return, indicating that something went
+ * wrong, and that the evaluation returning this value must be restarted, and the nodes associated
+ * with the specified keys (which should be direct or transitive dependencies of the failed
+ * evaluation) must also be restarted.
+ *
+ * <p>An intended cause for returning this is external data loss; e.g., if a dependency's
+ * "done-ness" is intended to mean that certain data is available in an external system, but
+ * during evaluation of a node that depends on that external data, that data has gone missing, and
+ * reevaluation of the dependency is expected to repair the discrepancy.
+ *
+ * <p>Values of this type will <em>never</em> be returned by {@link Environment}'s getValue
+ * methods or from {@link NodeEntry#getValue()}.
+ *
+ * <p>TODO(mschaller): the ability to specify arbitrary additional keys to restart is error-prone.
+ * It would be safer to require nodes requesting restarts to provide dependency paths, which the
+ * framework could efficiently verify before restarting.
*/
- SkyValue SENTINEL_FOR_RESTART_FROM_SCRATCH = new SkyValue() {};
+ interface Restart extends SkyValue {
+ Restart SELF = ImmutableList::of;
+
+ static Restart selfAnd(SkyKey... additionalKeysToRestart) {
+ return selfAnd(ImmutableList.copyOf(additionalKeysToRestart));
+ }
+
+ static Restart selfAnd(ImmutableList<SkyKey> additionalKeysToRestart) {
+ return () -> additionalKeysToRestart;
+ }
+
+ ImmutableList<SkyKey> getAdditionalKeysToRestart();
+ }
/**
* The services provided to the {@link SkyFunction#compute} implementation by the Skyframe