// RUN: %dafny /compile:0 /dprint:"%t.dprint" /autoTriggers:0 "%s" > "%t" // RUN: %diff "%s.expect" "%t" // Schorr-Waite algorithms, written and verified in Dafny. // Rustan Leino // Original version: 7 November 2008 // Version with proof divided into stages: June 2012. // Copyright (c) 2008-2012 Microsoft. abstract module M0 { // In this module, we declare the Node class, the definition of Reachability, and the specification // and implementation of the Schorr-Waite algorithm. class Node { var children: seq var marked: bool var childrenVisited: nat } datatype Path = Empty | Extend(Path, Node) predicate Reachable(source: Node, sink: Node, S: set) reads S requires null !in S { exists via: Path :: ReachableVia(source, via, sink, S) } predicate ReachableVia(source: Node, p: Path, sink: Node, S: set) reads S requires null !in S decreases p { match p case Empty => source == sink case Extend(prefix, n) => n in S && sink in n.children && ReachableVia(source, prefix, n, S) } method SchorrWaite(root: Node, ghost S: set) requires root in S // S is closed under 'children': requires forall n :: n in S ==> n != null && forall ch :: ch in n.children ==> ch == null || ch in S // the graph starts off with nothing marked and nothing being indicated as currently being visited: requires forall n :: n in S ==> !n.marked && n.childrenVisited == 0 modifies S // nodes reachable from 'root' are marked: ensures root.marked ensures forall n :: n in S && n.marked ==> forall ch :: ch in n.children && ch != null ==> ch.marked // every marked node was reachable from 'root' in the pre-state: ensures forall n :: n in S && n.marked ==> old(Reachable(root, n, S)) // the structure of the graph has not changed: ensures forall n :: n in S ==> n.childrenVisited == old(n.childrenVisited) && n.children == old(n.children) decreases * // leave termination checking for a later refinement { root.marked := true; var t, p := root, null; ghost var stackNodes := []; while true // stackNodes is a sequence of nodes from S: invariant forall n :: n in stackNodes ==> n in S // The current node, t, is not included in stackNodes. Rather, t is just above the top of stackNodes. // We say that the stack stackNodes+[t] are the "active" nodes. invariant t in S && t !in stackNodes // p points to the parent of the current node, that is, the node through which t was encountered in the // depth-first traversal. This amounts to being the top of stackNodes, or null if stackNodes is empty. // Note, it may seem like the variable p is redundant, since it holds a value that can be computed // from stackNodes. But note that stackNodes is a ghost variable and won't be present at run // time, where p is a physical variable that will be present at run time. invariant p == if |stackNodes| == 0 then null else stackNodes[|stackNodes|-1] // The .childrenVisited field is the extra information that the Schorr-Waite algorithm needs. It // is used only for the active nodes, where it keeps track of how many of a node's children have been // processed. For the nodes on stackNodes, .childrenVisited is less than the number of children, so // it is an index of a child. For t, .childrenVisited may be as large as all of the children, which // indicates that the node is ready to be popped off the stack of active nodes. For all other nodes, // .childrenVisited is the original value, which is 0. invariant forall n :: n in stackNodes ==> n.childrenVisited < |n.children| invariant t.childrenVisited <= |t.children| invariant forall n :: n in S && n !in stackNodes && n != t ==> n.childrenVisited == 0 // To represent the stackNodes, the algorithm reverses children pointers. It never changes the number // of children a node has. The only nodes with children pointers different than the original values are // the nodes on stackNodes; moreover, only the index of the currently active child of a node is different. invariant forall n :: n in stackNodes ==> |n.children| == old(|n.children|) && forall j :: 0 <= j < |n.children| ==> j == n.childrenVisited || n.children[j] == old(n.children[j]) invariant forall n :: n in S && n !in stackNodes ==> n.children == old(n.children) // The children pointers that have been changed form a stack. That is, the active child of stackNodes[k] // points to stackNodes[k-1], with the end case pointing to null. invariant 0 < |stackNodes| ==> stackNodes[0].children[stackNodes[0].childrenVisited] == null invariant forall k :: 0 < k < |stackNodes| ==> stackNodes[k].children[stackNodes[k].childrenVisited] == stackNodes[k-1] // We also need to keep track of what the original values of the children pointers had been. Here, we // have that the active child of stackNodes[k] used to be stackNodes[k+1], with the end case pointing // to t. invariant forall k :: 0 <= k < |stackNodes|-1 ==> old(stackNodes[k].children)[stackNodes[k].childrenVisited] == stackNodes[k+1] invariant 0 < |stackNodes| ==> old(stackNodes[|stackNodes|-1].children)[stackNodes[|stackNodes|-1].childrenVisited] == t decreases * // leave termination checking for a later refinement { if t.childrenVisited == |t.children| { // pop t.childrenVisited := 0; if p == null { break; } t, p, p.children := p, p.children[p.childrenVisited], p.children[p.childrenVisited := t]; stackNodes := stackNodes[..|stackNodes| - 1]; t.childrenVisited := t.childrenVisited + 1; } else if t.children[t.childrenVisited] == null || t.children[t.childrenVisited].marked { // just advance to next child t.childrenVisited := t.childrenVisited + 1; } else { // push stackNodes := stackNodes + [t]; t, p, t.children := t.children[t.childrenVisited], t, t.children[t.childrenVisited := p]; t.marked := true; // To prove that this "if" branch maintains the invariant "t !in stackNodes" will require // some more properties about the loop. Therefore, we just assume the property here and // prove it in a separate refinement. assume t !in stackNodes; } } // From the loop invariant, it now follows that all children pointers have been restored, // and so have all .childrenVisited fields. Thus, the last postcondition (the one that says // the structure of the graph has not been changed) has been established. // Eventually, we also need to prove that exactly the right nodes have been marked, // but let's just assume those properties for now and prove them in later refinements: assume root.marked && forall n :: n in S && n.marked ==> forall ch :: ch in n.children && ch != null ==> ch.marked; assume forall n :: n in S && n.marked ==> old(Reachable(root, n, S)); } } abstract module M1 refines M0 { // In this superposition, we start reasoning about the marks. In particular, we prove that the method // marks all reachable nodes. method SchorrWaite... { ...; while ... // The loop keeps marking nodes: The root is always marked. All children of any marked non-active // node are marked. Active nodes are marked, and the first .childrenVisited of every active node // are marked. invariant root.marked invariant forall n :: n in S && n.marked && n !in stackNodes && n != t ==> forall ch :: ch in n.children && ch != null ==> ch.marked invariant forall n :: n in stackNodes || n == t ==> n.marked && forall j :: 0 <= j < n.childrenVisited ==> n.children[j] == null || n.children[j].marked { if ... { // pop } else if ... { // next child } else { // push ...; // With the new loop invariants, we know that all active nodes are marked. Since the new value // of "t" was not marked at the beginning of this iteration, we can now prove that the invariant // "t !in stackNodes" is maintained, so we'll refine the assume from above with an assert. assert ...; } } // The new loop invariants give us a lower bound on which nodes are marked. Hence, we can now // discharge the "everything reachable is marked" postcondition, whose proof we postponed above // by supplying an assume statement. Here, we refine that assume statement into an assert. assert ...; assume ...; } } abstract module M2 refines M1 { // In this superposition, we prove that only reachable nodes are marked. Essentially, we want // to add a loop invariant that says t is reachable from root, because then the loop invariant // that marked nodes are reachable follows. More precisely, we need to say that the node // referenced by t is *in the initial state* reachable from root--because as we change // children pointers during the course of the algorithm, what is reachable at some point in // time may be different from what was reachable initially. // To do our proof, which involves establishing reachability between various nodes, we need // some additional bookkeeping. In particular, we keep track of the path from root to t, // and we associate with every marked node the path to it from root. The former is easily // maintained in a local ghost variable; the latter is most easily represented as a ghost // field in each node (an alternative would be to have a local variable that is a map from // nodes to paths). So, we add a field declaration to the Node class: class Node { ghost var pathFromRoot: Path } method SchorrWaite... { ghost var path := Path.Empty; root.pathFromRoot := path; ...; while ... // There's a subtle complication when we speak of old(ReachableVia(... P ...)) for a path // P. The expression old(...) says to evaluate the expression "..." in the pre-state of // the method. So, old(ReachableVia(...)) says to evaluate ReachableVia(...) in the pre- // state of the method. But in order for the "old(...)" expression to be well-formed, // the subexpressions of the "..." must only refer to values that existed in the pre-state // of the method. This incurs the proof obligation that any objects referenced by the // parameters of ReachableVia(...) must have been allocated in the pre-state of the // method. The proof obligation is easy to establish for root, t, and S (since root and // S were given as in-parameters to the method, and we have "t in S"). But what about // the path argument to ReachableVia? Since datatype values of type Path contain object // references, we need to make sure we can deal with the proof obligation for the path // argument. For this reason, we add invariants that say that "path" and the .pathFromRoot // field of all marked nodes contain values that make sense in the pre-state. invariant !fresh(path) && old(ReachableVia(root, path, t, S)) invariant forall n :: n in S && n.marked ==> var pth := n.pathFromRoot; !fresh(pth) && old(ReachableVia(root, pth, n, S)) invariant forall n :: n in S && n.marked ==> old(Reachable(root, n, S)) { if ... { // pop ...; path := t.pathFromRoot; } else if ... { // advance to next child } else { // push path := Path.Extend(path, t); ...; t.pathFromRoot := path; } } // In M0 above, we placed two assume statements here. In M1, we refined the first of these // into an assert. We repeat that assert here: assert ...; // And now we we refine the second assume into an assert, proving that only reachable nodes // have been marked. assert ...; } } module M3 refines M2 { // In this final superposition, we prove termination of the loop. method SchorrWaite... decreases true // say that the method is now one that is proved to terminate (note, the value // 'true' is arbitrary; we just need to supply _some_ decreases clause so that // the 'decreases *' from above is not inherited) { // The loop variant is a lexicographic triple, consisting of (0) the set of unmarked // nodes, (1) the (length of the) stackNodes sequence, and (2) the number children of // the current node that are still to be investigated. We introduce a ghost variable // to keep track of the set of unmarked nodes. ghost var unmarkedNodes := S - {root}; ...; while ... invariant forall n :: n in S && !n.marked ==> n in unmarkedNodes decreases unmarkedNodes, stackNodes, |t.children| - t.childrenVisited { if ... { // pop } else if ... { // next child } else { // push ...; unmarkedNodes := unmarkedNodes - {t}; } } } }