1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
|
// RUN: %dafny /compile:0 /dprint:"%t.dprint" "%s" > "%t"
// RUN: %diff "%s.expect" "%t"
// Schorr-Waite algorithms, written and verified in Dafny.
// Rustan Leino
// Original version: 7 November 2008
// Version with proof divided into stages: June 2012.
// Copyright (c) 2008-2012 Microsoft.
abstract module M0 {
// In this module, we declare the Node class, the definition of Reachability, and the specification
// and implementation of the Schorr-Waite algorithm.
class Node {
var children: seq<Node>
var marked: bool
var childrenVisited: nat
}
datatype Path = Empty | Extend(Path, Node)
predicate Reachable(source: Node, sink: Node, S: set<Node>)
reads S
requires null !in S
{
exists via: Path :: ReachableVia(source, via, sink, S)
}
predicate ReachableVia(source: Node, p: Path, sink: Node, S: set<Node>)
reads S
requires null !in S
decreases p
{
match p
case Empty => source == sink
case Extend(prefix, n) => n in S && sink in n.children && ReachableVia(source, prefix, n, S)
}
method SchorrWaite(root: Node, ghost S: set<Node>)
requires root in S
// S is closed under 'children':
requires forall n :: n in S ==> n != null &&
forall ch :: ch in n.children ==> ch == null || ch in S
// the graph starts off with nothing marked and nothing being indicated as currently being visited:
requires forall n :: n in S ==> !n.marked && n.childrenVisited == 0
modifies S
// nodes reachable from 'root' are marked:
ensures root.marked
ensures forall n :: n in S && n.marked ==>
forall ch :: ch in n.children && ch != null ==> ch.marked
// every marked node was reachable from 'root' in the pre-state:
ensures forall n :: n in S && n.marked ==> old(Reachable(root, n, S))
// the structure of the graph has not changed:
ensures forall n :: n in S ==>
n.childrenVisited == old(n.childrenVisited) &&
n.children == old(n.children)
decreases * // leave termination checking for a later refinement
{
root.marked := true;
var t, p := root, null;
ghost var stackNodes := [];
while true
// stackNodes is a sequence of nodes from S:
invariant forall n :: n in stackNodes ==> n in S
// The current node, t, is not included in stackNodes. Rather, t is just above the top of stackNodes.
// We say that the stack stackNodes+[t] are the "active" nodes.
invariant t in S && t !in stackNodes
// p points to the parent of the current node, that is, the node through which t was encountered in the
// depth-first traversal. This amounts to being the top of stackNodes, or null if stackNodes is empty.
// Note, it may seem like the variable p is redundant, since it holds a value that can be computed
// from stackNodes. But note that stackNodes is a ghost variable and won't be present at run
// time, where p is a physical variable that will be present at run time.
invariant p == if |stackNodes| == 0 then null else stackNodes[|stackNodes|-1]
// The .childrenVisited field is the extra information that the Schorr-Waite algorithm needs. It
// is used only for the active nodes, where it keeps track of how many of a node's children have been
// processed. For the nodes on stackNodes, .childrenVisited is less than the number of children, so
// it is an index of a child. For t, .childrenVisited may be as large as all of the children, which
// indicates that the node is ready to be popped off the stack of active nodes. For all other nodes,
// .childrenVisited is the original value, which is 0.
invariant forall n :: n in stackNodes ==> n.childrenVisited < |n.children|
invariant t.childrenVisited <= |t.children|
invariant forall n :: n in S && n !in stackNodes && n != t ==> n.childrenVisited == 0
// To represent the stackNodes, the algorithm reverses children pointers. It never changes the number
// of children a node has. The only nodes with children pointers different than the original values are
// the nodes on stackNodes; moreover, only the index of the currently active child of a node is different.
invariant forall n :: n in stackNodes ==>
|n.children| == old(|n.children|) &&
forall j :: 0 <= j < |n.children| ==> j == n.childrenVisited || n.children[j] == old(n.children[j])
invariant forall n :: n in S && n !in stackNodes ==> n.children == old(n.children)
// The children pointers that have been changed form a stack. That is, the active child of stackNodes[k]
// points to stackNodes[k-1], with the end case pointing to null.
invariant 0 < |stackNodes| ==>
stackNodes[0].children[stackNodes[0].childrenVisited] == null
invariant forall k :: 0 < k < |stackNodes| ==>
stackNodes[k].children[stackNodes[k].childrenVisited] == stackNodes[k-1]
// We also need to keep track of what the original values of the children pointers had been. Here, we
// have that the active child of stackNodes[k] used to be stackNodes[k+1], with the end case pointing
// to t.
invariant forall k :: 0 <= k < |stackNodes|-1 ==>
old(stackNodes[k].children)[stackNodes[k].childrenVisited] == stackNodes[k+1]
invariant 0 < |stackNodes| ==>
old(stackNodes[|stackNodes|-1].children)[stackNodes[|stackNodes|-1].childrenVisited] == t
decreases * // leave termination checking for a later refinement
{
if t.childrenVisited == |t.children| {
// pop
t.childrenVisited := 0;
if p == null { break; }
t, p, p.children := p, p.children[p.childrenVisited], p.children[p.childrenVisited := t];
stackNodes := stackNodes[..|stackNodes| - 1];
t.childrenVisited := t.childrenVisited + 1;
} else if t.children[t.childrenVisited] == null || t.children[t.childrenVisited].marked {
// just advance to next child
t.childrenVisited := t.childrenVisited + 1;
} else {
// push
stackNodes := stackNodes + [t];
t, p, t.children := t.children[t.childrenVisited], t, t.children[t.childrenVisited := p];
t.marked := true;
// To prove that this "if" branch maintains the invariant "t !in stackNodes" will require
// some more properties about the loop. Therefore, we just assume the property here and
// prove it in a separate refinement.
assume t !in stackNodes;
}
}
// From the loop invariant, it now follows that all children pointers have been restored,
// and so have all .childrenVisited fields. Thus, the last postcondition (the one that says
// the structure of the graph has not been changed) has been established.
// Eventually, we also need to prove that exactly the right nodes have been marked,
// but let's just assume those properties for now and prove them in later refinements:
assume root.marked && forall n :: n in S && n.marked ==>
forall ch :: ch in n.children && ch != null ==> ch.marked;
assume forall n :: n in S && n.marked ==> old(Reachable(root, n, S));
}
}
abstract module M1 refines M0 {
// In this superposition, we start reasoning about the marks. In particular, we prove that the method
// marks all reachable nodes.
method SchorrWaite...
{
...;
while ...
// The loop keeps marking nodes: The root is always marked. All children of any marked non-active
// node are marked. Active nodes are marked, and the first .childrenVisited of every active node
// are marked.
invariant root.marked
invariant forall n :: n in S && n.marked && n !in stackNodes && n != t ==>
forall ch :: ch in n.children && ch != null ==> ch.marked
invariant forall n :: n in stackNodes || n == t ==>
n.marked &&
forall j :: 0 <= j < n.childrenVisited ==> n.children[j] == null || n.children[j].marked
{
if ... { // pop
} else if ... { // next child
} else { // push
...;
// With the new loop invariants, we know that all active nodes are marked. Since the new value
// of "t" was not marked at the beginning of this iteration, we can now prove that the invariant
// "t !in stackNodes" is maintained, so we'll refine the assume from above with an assert.
assert ...;
}
}
// The new loop invariants give us a lower bound on which nodes are marked. Hence, we can now
// discharge the "everything reachable is marked" postcondition, whose proof we postponed above
// by supplying an assume statement. Here, we refine that assume statement into an assert.
assert ...;
assume ...;
}
}
abstract module M2 refines M1 {
// In this superposition, we prove that only reachable nodes are marked. Essentially, we want
// to add a loop invariant that says t is reachable from root, because then the loop invariant
// that marked nodes are reachable follows. More precisely, we need to say that the node
// referenced by t is *in the initial state* reachable from root--because as we change
// children pointers during the course of the algorithm, what is reachable at some point in
// time may be different from what was reachable initially.
// To do our proof, which involves establishing reachability between various nodes, we need
// some additional bookkeeping. In particular, we keep track of the path from root to t,
// and we associate with every marked node the path to it from root. The former is easily
// maintained in a local ghost variable; the latter is most easily represented as a ghost
// field in each node (an alternative would be to have a local variable that is a map from
// nodes to paths). So, we add a field declaration to the Node class:
class Node {
ghost var pathFromRoot: Path
}
method SchorrWaite...
{
ghost var path := Path.Empty;
root.pathFromRoot := path;
...;
while ...
// There's a subtle complication when we speak of old(ReachableVia(... P ...)) for a path
// P. The expression old(...) says to evaluate the expression "..." in the pre-state of
// the method. So, old(ReachableVia(...)) says to evaluate ReachableVia(...) in the pre-
// state of the method. But in order for the "old(...)" expression to be well-formed,
// the subexpressions of the "..." must only refer to values that existed in the pre-state
// of the method. This incurs the proof obligation that any objects referenced by the
// parameters of ReachableVia(...) must have been allocated in the pre-state of the
// method. The proof obligation is easy to establish for root, t, and S (since root and
// S were given as in-parameters to the method, and we have "t in S"). But what about
// the path argument to ReachableVia? Since datatype values of type Path contain object
// references, we need to make sure we can deal with the proof obligation for the path
// argument. For this reason, we add invariants that say that "path" and the .pathFromRoot
// field of all marked nodes contain values that make sense in the pre-state.
invariant !fresh(path) && old(ReachableVia(root, path, t, S))
invariant forall n :: n in S && n.marked ==> var pth := n.pathFromRoot;
!fresh(pth) && old(ReachableVia(root, pth, n, S))
invariant forall n :: n in S && n.marked ==> old(Reachable(root, n, S))
{
if ... {
// pop
...;
path := t.pathFromRoot;
} else if ... {
// advance to next child
} else {
// push
path := Path.Extend(path, t);
...;
t.pathFromRoot := path;
}
}
// In M0 above, we placed two assume statements here. In M1, we refined the first of these
// into an assert. We repeat that assert here:
assert ...;
// And now we we refine the second assume into an assert, proving that only reachable nodes
// have been marked.
assert ...;
}
}
module M3 refines M2 {
// In this final superposition, we prove termination of the loop.
method SchorrWaite...
decreases true // say that the method is now one that is proved to terminate (note, the value
// 'true' is arbitrary; we just need to supply _some_ decreases clause so that
// the 'decreases *' from above is not inherited)
{
// The loop variant is a lexicographic triple, consisting of (0) the set of unmarked
// nodes, (1) the (length of the) stackNodes sequence, and (2) the number children of
// the current node that are still to be investigated. We introduce a ghost variable
// to keep track of the set of unmarked nodes.
ghost var unmarkedNodes := S - {root};
...;
while ...
invariant forall n :: n in S && !n.marked ==> n in unmarkedNodes
decreases unmarkedNodes, stackNodes, |t.children| - t.childrenVisited
{
if ... { // pop
} else if ... { // next child
} else { // push
...;
unmarkedNodes := unmarkedNodes - {t};
}
}
}
}
|