Test/dafny2/MajorityVote.dfy


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360

// RUN: %dafny /compile:0 /dprint:"%t.dprint" "%s" > "%t"
// RUN: %diff "%s.expect" "%t"

// Rustan Leino, June 2012.
// This file verifies an algorithm, due to Boyer and Moore, that finds the majority choice
// among a sequence of votes, see http://www.cs.utexas.edu/~moore/best-ideas/mjrty/.
// Actually, this algorithm is a slight variation on theirs, but the general idea for why
// it is correct is the same.  In the Boyer and Moore algorithm, the loop counter is advanced
// by exactly 1 each iteration, which means that there may or may not be a "current leader".
// In my program below, I had instead written the loop invariant to say there is always a
// "current leader", which requires the loop index sometimes to skip a value.
//
// This file has two versions of the algorithm.  In the first version, the given sequence
// of votes is assumed to have a (strict) majority choice, meaning that strictly more than
// 50% of the votes are for one candidate.  It is convenient to have a name for the majority
// choice, in order to talk about it in specifications.  The easiest way to do this in
// Dafny is probably to introduce a ghost parameter with the given properties.  That's what
// the algorithm does, see parameter K.  The postcondition is thus to output the value of
// K, which is done in the non-ghost out-parameter k.
// The proof of the algorithm requires two lemmas.  These lemmas are proved automatically
// by Dafny's induction tactic.
//
// In the second version of the program, the main method does not assume there is a majority
// choice.  Rather, it eseentially uses the first algorithm and then checks if what it
// returns really is a majority choice.  To do this, the specification of the first algorithm
// needs to be changed slightly to accommodate the possibility that there is no majority
// choice.  That change in specification is also reflected in the loop invariant.  Moreover,
// the algorithm itself now needs to extra 'if' statements to see if the entire sequence
// has been searched through.  (This extra 'if' is essentially already handled by Boyer and
// Moore's algorithm, because it increments the loop index by 1 each iteration and therefore
// already has a special case for the case of running out of sequence elements without a
// current leader.)
// The calling harness, DetermineElection, somewhat existentially comes up with the majority
// choice, if there is such a choice, and then passes in that choice as the ghost parameter K
// to the main algorithm.  Neat, huh?

// Language comment:
// The "(==)" that sits after some type parameters in this program says that the actual
// type argument must support equality.

// Advanced remark:
// There is a subtle situation in the verification of DetermineElection.  Suppose the type
// parameter Candidate denotes some type whose instances depend on which object are
// allocated.  For example, if Candidate is some class type, then more candidates can come
// into being by object allocations (using "new").  What does the quantification of
// candidates "c" in the postcondition of DetermineElection now mean--all candidates that
// existed in the pre-state or (the possibly larger set of) all candidates that exist in the
// post-state?  (It means the latter.)  And if there does not exist a candidate in majority
// in the pre-state, could there be a (newly created) candidate in majority in the post-state?
// This will require some proof.  The simplest argument seems to be that even if more candidates
// are created during the course of DetermineElection, such candidates cannot possibly
// be in majority in the sequence "a", since "a" can only contain candidates that were already
// created in the pre-state.  This property is easily specified by adding a postcondition
// to the Count function.  Alternatively, one could have added the antecedent "c in a" or
// "old(allocated(c))" to the "forall c" quantification in the postcondition of DetermineElection.

// About reading the proofs:
// Dafny proves the FindWinner algorithm from the given loop invariants and the two lemmas
// Lemma_Unique and Lemma_Split.  In showing this proof to some colleagues, they found they
// were not as quick as Dafny in constructing the proof from these ingredients.  For a human
// to understand the situation better, it helps to take smaller (and more) steps in the proof.
// At the end of this file, Nadia Polikarpova has written two versions of FindWinner that does
// that, using Dafny's support for calculational proofs.

function method Count<T(==)>(a: seq<T>, s: int, t: int, x: T): int
  requires 0 <= s <= t <= |a|;
  ensures Count(a, s, t, x) == 0 || x in a;
{
  if s == t then 0 else
  Count(a, s, t-1, x) + if a[t-1] == x then 1 else 0
}

predicate HasMajority<T(==)>(a: seq<T>, s: int, t: int, x: T)
  requires 0 <= s <= t <= |a|;
{
  2 * Count(a, s, t, x) > t - s
}

// Here is the first version of the algorithm, the one that assumes there is a majority choice.

method FindWinner<Candidate(==)>(a: seq<Candidate>, ghost K: Candidate) returns (k: Candidate)
  requires HasMajority(a, 0, |a|, K); // K has a (strict) majority of the votes
  ensures k == K;  // find K
{
  k := a[0];
  var n, c, s := 1, 1, 0;
  while (n < |a|)
    invariant 0 <= s <= n <= |a|;
    invariant 2 * Count(a, s, |a|, K) > |a| - s;  // K has majority among a[s..]
    invariant 2 * Count(a, s, n, k) > n - s;  // k has majority among a[s..n]
    invariant c == Count(a, s, n, k);
  {
    if (a[n] == k) {
      n, c := n + 1, c + 1;
    } else if (2 * c > n + 1 - s) {
      n := n + 1;
    } else {
      n := n + 1;
      // We have 2*Count(a, s, n, k) == n-s, and thus the following lemma
      // lets us conclude 2*Count(a, s, n, K) <= n-s.
      Lemma_Unique(a, s, n, K, k);
      // We also have 2*Count(a, s, |a|, K) > |a|-s, and the following lemma
      // tells us Count(a, s, |a|, K) == Count(a, s, n, K) + Count(a, n, |a|, K),
      // and thus we can conclude 2*Count(a, n, |a|, K) > |a|-n.
      Lemma_Split(a, s, n, |a|, K);
      k, n, c, s := a[n], n + 1, 1, n;
    }
  }
  Lemma_Unique(a, s, |a|, K, k);  // both k and K have a majority, so K == k
}

// ------------------------------------------------------------------------------

// Here is the second version of the program, the one that also computes whether or not
// there is a majority choice.

method DetermineElection<Candidate(==)>(a: seq<Candidate>) returns (hasWinner: bool, cand: Candidate)
  ensures hasWinner ==> 2 * Count(a, 0, |a|, cand) > |a|;
  ensures !hasWinner ==> forall c :: 2 * Count(a, 0, |a|, c) <= |a|;
{
  ghost var b := exists c :: 2 * Count(a, 0, |a|, c) > |a|;
  ghost var w :| b ==> 2 * Count(a, 0, |a|, w) > |a|;
  cand := SearchForWinner(a, b, w);
  return 2 * Count(a, 0, |a|, cand) > |a|, cand;
}

// The difference between SearchForWinner for FindWinner above are the occurrences of the
// antecedent "hasWinner ==>" and the two checks for no-more-votes that may result in a "return"
// statement.

method SearchForWinner<Candidate(==)>(a: seq<Candidate>, ghost hasWinner: bool, ghost K: Candidate) returns (k: Candidate)
  requires hasWinner ==> 2 * Count(a, 0, |a|, K) > |a|;  // K has a (strict) majority of the votes
  ensures hasWinner ==> k == K;  // find K
{
  if (|a| == 0) { return; }
  k := a[0];
  var n, c, s := 1, 1, 0;
  while (n < |a|)
    invariant 0 <= s <= n <= |a|;
    invariant hasWinner ==> 2 * Count(a, s, |a|, K) > |a| - s;  // K has majority among a[s..]
    invariant 2 * Count(a, s, n, k) > n - s;  // k has majority among a[s..n]
    invariant c == Count(a, s, n, k);
  {
    if (a[n] == k) {
      n, c := n + 1, c + 1;
    } else if (2 * c > n + 1 - s) {
      n := n + 1;
    } else {
      n := n + 1;
      // We have 2*Count(a, s, n, k) == n-s, and thus the following lemma
      // lets us conclude 2*Count(a, s, n, K) <= n-s.
      Lemma_Unique(a, s, n, K, k);
      // We also have 2*Count(a, s, |a|, K) > |a|-s, and the following lemma
      // tells us Count(a, s, |a|, K) == Count(a, s, n, K) + Count(a, n, |a|, K),
      // and thus we can conclude 2*Count(a, n, |a|, K) > |a|-n.
      Lemma_Split(a, s, n, |a|, K);
      if (|a| == n) { return; }
      k, n, c, s := a[n], n + 1, 1, n;
    }
  }
  Lemma_Unique(a, s, |a|, K, k);  // both k and K have a majority, so K == k
}

// ------------------------------------------------------------------------------

// Here are two lemmas about Count that are used in the methods above.

ghost method Lemma_Split<T>(a: seq<T>, s: int, t: int, u: int, x: T)
  requires 0 <= s <= t <= u <= |a|;
  ensures Count(a, s, t, x) + Count(a, t, u, x) == Count(a, s, u, x);
{
  /* The postcondition of this method is proved automatically via Dafny's
     induction tactic.  But if a manual proof had to be provided, it would
     look like this:
  if (s != t) {
    Lemma_Split(a, s, t-1, u, x);
  }
  */
}

ghost method Lemma_Unique<T>(a: seq<T>, s: int, t: int, x: T, y: T)
  requires 0 <= s <= t <= |a|;
  ensures x != y ==> Count(a, s, t, x) + Count(a, s, t, y) <= t - s;
{
  /* The postcondition of this method is proved automatically via Dafny's
     induction tactic.  But if a manual proof had to be provided, it would
     look like this:
  if (s != t) {
    Lemma_Unique(a, s, t-1, x, y);
  }
  */
}

// ------------------------------------------------------------------------------

// This version uses more calculations with integer formulas
method FindWinner'<Candidate(==)>(a: seq<Candidate>, ghost K: Candidate) returns (k: Candidate)
  requires HasMajority(a, 0, |a|, K); // K has a (strict) majority of the votes
  ensures k == K;  // find K
{
  k := a[0]; // Current candidate: the first element
  var lo, up, c := 0, 1, 1; // Window: [0..1], number of occurrences of k in the window: 1
  while (up < |a|)
    invariant 0 <= lo < up <= |a|; // (0)
    invariant HasMajority(a, lo, |a|, K); // (1) K has majority among a[lo..]
    invariant HasMajority(a, lo, up, k); // (2) k has majority among a[lo..up] (in the current window)
    invariant c == Count(a, lo, up, k); // (3)
  {
    if (a[up] == k) {
      // One more occurrence of k
      up, c := up + 1, c + 1;
    } else if (2 * c > up + 1 - lo) {
      // An occurrence of another value, but k still has the majority
      up := up + 1;
    } else {
      // An occurrence of another value and k just lost the majority.
      // Prove that k has exactly 50% in the future window a[lo..up + 1]:
      calc /* k has 50% among a[lo..up + 1] */ {
        true;
        // negation of the previous branch condition;
        2 * c <= up + 1 - lo;
        // loop invariant (3)
        2 * Count(a, lo, up, k) <= up + 1 - lo;
        calc {
          true;
          // loop invariant (2)
          HasMajority(a, lo, up, k);
          // def. HasMajority
          2 * Count(a, lo, up, k) > up - lo;
          2 * Count(a, lo, up, k) >= up + 1 - lo;
        }
        2 * Count(a, lo, up, k) == up + 1 - lo;
      }
      up := up + 1;
      assert 2 * Count(a, lo, up, k) == up - lo; // k has exactly 50% in the current window a[lo..up]
      
      // We are going to start a new window a[up..up + 1] and choose a new candidate,
      // so invariants (2) and (3) will be easy to re-establish.
      // To re-establish (1) we have to prove that K has majority among a[up..], as up will become the new lo.
      // The main idea is that we had enough K's in a[lo..], and there cannot be too many in a[lo..up].
      calc /* K has majority among a[up..] */ {
        2 * Count(a, up, |a|, K);
        { Lemma_Split(a, lo, up, |a|, K); }
        2 * Count(a, lo, |a|, K) - 2 * Count(a, lo, up, K);
        > { assert HasMajority(a, lo, |a|, K); } // loop invariant (1)
        |a| - lo - 2 * Count(a, lo, up, K);
        >= 
        { if (k == K) {
            calc {
              2 * Count(a, lo, up, K);              
              2 * Count(a, lo, up, k);
              { assert 2 * Count(a, lo, up, k) == up - lo; } // k has 50% among a[lo..up]
              up - lo;
            }
          } else {
            calc {
              2 * Count(a, lo, up, K);
              <= { Lemma_Unique(a, lo, up, k, K); }
              2 * ((up - lo) - Count(a, lo, up, k));
              { assert 2 * Count(a, lo, up, k) == up - lo; } // k has 50% among a[lo..up]
              up - lo;
            }
          }
          assert 2 * Count(a, lo, up, K) <= up - lo;
        }
        |a| - lo - (up - lo);
        |a| - up;
      }
      assert HasMajority(a, up, |a|, K);
      
      k, lo, up, c := a[up], up, up + 1, 1;
      assert HasMajority(a, lo, |a|, K);	
    }
  }
  Lemma_Unique(a, lo, |a|, K, k);  // both k and K have a majority among a[lo..], so K == k
}

// This version uses more calculations with boolean formulas
method FindWinner''<Candidate(==)>(a: seq<Candidate>, ghost K: Candidate) returns (k: Candidate)
  requires HasMajority(a, 0, |a|, K); // K has a (strict) majority of the votes
  ensures k == K;  // find K
{
  k := a[0]; // Current candidate: the first element
  var lo, up, c := 0, 1, 1; // Window: [0..1], number of occurrences of k in the window: 1
  while (up < |a|)
    invariant 0 <= lo < up <= |a|; // (0)
    invariant HasMajority(a, lo, |a|, K); // (1) K has majority among a[lo..]
    invariant HasMajority(a, lo, up, k); // (2) k has majority among a[lo..up] (in the current window)
    invariant c == Count(a, lo, up, k); // (3)
  {
    if (a[up] == k) {
      // One more occurrence of k
      up, c := up + 1, c + 1;
    } else if (2 * c > up + 1 - lo) {
      // An occurrence of another value, but k still has the majority
      up := up + 1;
    } else {
      // An occurrence of another value and k just lost the majority.
      // Prove that k has exactly 50% in the future window a[lo..up + 1]:
      calc /* k has 50% among a[lo..up + 1] */ {
        true;
        // negation of the previous branch condition;
        2 * c <= up + 1 - lo;
        // loop invariant (3)
        2 * Count(a, lo, up, k) <= up + 1 - lo;
        calc {
          true;
          // loop invariant (2)
          HasMajority(a, lo, up, k);
          // def. HasMajority
          2 * Count(a, lo, up, k) > up - lo;
          2 * Count(a, lo, up, k) >= up + 1 - lo;
        }
        2 * Count(a, lo, up, k) == up + 1 - lo;
      }
      up := up + 1;
      assert 2 * Count(a, lo, up, k) == up - lo; // k has exactly 50% in the current window a[lo..up]
      
      // We are going to start a new window a[up..up + 1] and choose a new candidate,
      // so invariants (2) and (3) will be easy to re-establish.
      // To re-establish (1) we have to prove that K has majority among a[up..], as up will become the new lo.
      // The main idea is that we had enough K's in a[lo..], and there cannot be too many in a[lo..up].      
      calc /* K has majority among a[up..] */ {
        true;
        // loop invariant (1)
        HasMajority(a, lo, |a|, K);
        2 * Count(a, lo, |a|, K) > |a| - lo;
        { Lemma_Split(a, lo, up, |a|, K); }
        2 * Count(a, lo, up, K) + 2 * Count(a, up, |a|, K) > |a| - lo;
        ==> 
        { if (k == K) {
            calc {
              2 * Count(a, lo, up, K);
              2 * Count(a, lo, up, k);
              { assert 2 * Count(a, lo, up, k) == up - lo; } // k has 50% among a[lo..up]
              up - lo;
            }
          } else {
            calc {
              true;
              { Lemma_Unique(a, lo, up, k, K); }
              Count(a, lo, up, K) + Count(a, lo, up, k) <= up - lo;
              2 * Count(a, lo, up, K) + 2 * Count(a, lo, up, k) <= 2 * (up - lo);
              { assert 2 * Count(a, lo, up, k) == up - lo; } // k has 50% among a[lo..up]
              2 * Count(a, lo, up, K) <= up - lo;
            }
          }
          assert 2 * Count(a, lo, up, K) <= up - lo;
        }
        // subtract off Count(a, lo, up, K) from the LHS and subtract off the larger amount up - lo from the RHS
        2 * Count(a, up, |a|, K) > (|a| - lo) - (up - lo);
        2 * Count(a, up, |a|, K) > |a| - up;
        HasMajority(a, up, |a|, K);
      }	
      k, lo, up, c := a[up], up, up + 1, 1;
      assert HasMajority(a, lo, |a|, K);	
    }
  }
  Lemma_Unique(a, lo, |a|, K, k);  // both k and K have a majority among a[lo..], so K == k
}