Dafny: added another version of the majority finding algorithm to the test suite

author: Unknown <leino@LEINO6.redmond.corp.microsoft.com> 2012-06-12 17:41:30 -0700
committer: Unknown <leino@LEINO6.redmond.corp.microsoft.com> 2012-06-12 17:41:30 -0700
commit: b2eb7236b06ea3102b8cf86fc8f41c555a089614 (patch)
tree: 1d7850c0e60cafb048fd2b79ffa085a4751b16b1
parent: f2eb93519ffd02b775d427d2909cebd0690dc090 (diff)
2 files changed, 116 insertions, 8 deletions
diff --git a/Test/dafny2/Answer b/Test/dafny2/Answer
index b0bbfe05..93524dfa 100644
--- a/Test/dafny2/Answer
+++ b/Test/dafny2/Answer
@@ -41,7 +41,7 @@ Dafny program verifier finished with 22 verified, 0 errors
 
 -------------------- MajorityVote.dfy --------------------
 
-Dafny program verifier finished with 7 verified, 0 errors
+Dafny program verifier finished with 11 verified, 0 errors
 
 -------------------- SegmentSum.dfy --------------------
 
diff --git a/Test/dafny2/MajorityVote.dfy b/Test/dafny2/MajorityVote.dfy
index 0adba718..a7512486 100644
--- a/Test/dafny2/MajorityVote.dfy
+++ b/Test/dafny2/MajorityVote.dfy
@@ -1,3 +1,62 @@
+// Rustan Leino, June 2012.
+// This file verifies an algorithm, due to Boyer and Moore, that finds the majority choice
+// among a sequence of votes, see http://www.cs.utexas.edu/~moore/best-ideas/mjrty/.
+// Actually, this algorithm is a slight variation on theirs, but the general idea for why
+// it is correct is the same.  In the Boyer and Moore algorithm, the loop counter is advanced
+// by exactly 1 each iteration, which means that there may or may not be a "current leader".
+// In my program below, I had instead written the loop invariant to say there is always a
+// "current leader", which requires the loop index sometimes to skip a value.
+//
+// This file has two versions of the algorithm.  In the first version, the given sequence
+// of votes is assumed to have a (strict) majority choice, meaning that strictly more than
+// 50% of the votes are for one candidate.  It is convenient to have a name for the majority
+// choice, in order to talk about it in specifications.  The easiest way to do this in
+// Dafny is probably to introduce a ghost parameter with the given properties.  That's what
+// the algorithm does, see parameter K.  The postcondition is thus to output the value of
+// K, which is done in the non-ghost out-parameter k.
+// The proof of the algorithm requires two lemmas.  These lemmas are proved automatically
+// by Dafny's induction tactic.
+//
+// In the second version of the program, the main method does not assume there is a majority
+// choice.  Rather, it eseentially uses the first algorithm and then checks if what it
+// returns really is a majority choice.  To do this, the specification of the first algorithm
+// needs to be changed slightly to accommodate the possibility that there is no majority
+// choice.  That change in specification is also reflected in the loop invariant.  Moreover,
+// the algorithm itself now needs to extra 'if' statements to see if the entire sequence
+// has been searched through.  (This extra 'if' is essentially already handled by Boyer and
+// Moore's algorithm, because it increments the loop index by 1 each iteration and therefore
+// already has a special case for the case of running out of sequence elements without a
+// current leader.)
+// The calling harness, DetermineElection, somewhat existentially comes up with the majority
+// choice, if there is such a choice, and then passes in that choice as the ghost parameter K
+// to the main algorithm.  Neat, huh?
+
+// Advanced remark:
+// There is a subtle situation in the verification of DetermineElection.  Suppose the type
+// parameter Candidate denotes some type whose instances depend on which object are
+// allocated.  For example, if Candidate is some class type, then more candidates can come
+// into being by object allocations (using "new").  What does the quantification of
+// candidates "c" in the postcondition of DetermineElection now mean--all candidates that
+// existed in the pre-state or (the possibly larger set of) all candidates that exist in the
+// post-state?  (It means the latter.)  And if there does not exist a candidate in majority
+// in the pre-state, could there be a (newly created) candidate in majority in the post-state?
+// This will require some proof.  The simplest argument seems to be that even if more candidates
+// are created during the course of DetermineElection, such candidates cannot possibly
+// be in majority in the sequence "a", since "a" can only contain candidates that were already
+// created in the pre-state.  This property is easily specified by adding a postcondition
+// to the Count function.  Alternatively, one could have added the antecedent "c in a" or
+// "old(allocated(c))" to the "forall c" quantification in the postcondition of DetermineElection.
+
+function method Count<T>(a: seq<T>, s: int, t: int, x: T): int
+  requires 0 <= s <= t <= |a|;
+  ensures Count(a, s, t, x) == 0 || x in a;
+{
+  if s == t then 0 else
+  Count(a, s, t-1, x) + if a[t-1] == x then 1 else 0
+}
+
+// Here is the first version of the algorithm, the one that assumes there is a majority choice.
+
 method FindWinner<Candidate>(a: seq<Candidate>, ghost K: Candidate) returns (k: Candidate)
   requires 2 * Count(a, 0, |a|, K) > |a|;  // K has a (strict) majority of the votes
   ensures k == K;  // find K
@@ -29,6 +88,62 @@ method FindWinner<Candidate>(a: seq<Candidate>, ghost K: Candidate) returns (k:
   Lemma_Unique(a, s, |a|, K, k);  // both k and K have a majority, so K == k
 }
 
+// ------------------------------------------------------------------------------
+
+// Here is the second version of the program, the one that also computes whether or not
+// there is a majority choice.
+
+method DetermineElection<Candidate>(a: seq<Candidate>) returns (hasWinner: bool, cand: Candidate)
+  ensures hasWinner ==> 2 * Count(a, 0, |a|, cand) > |a|;
+  ensures !hasWinner ==> forall c :: 2 * Count(a, 0, |a|, c) <= |a|;
+{
+  ghost var b := exists c :: 2 * Count(a, 0, |a|, c) > |a|;
+  ghost var w :| b ==> 2 * Count(a, 0, |a|, w) > |a|;
+  cand := SearchForWinner(a, b, w);
+  return 2 * Count(a, 0, |a|, cand) > |a|, cand;
+}
+
+// The difference between SearchForWinner for FindWinner above are the occurrences of the
+// antecedent "hasWinner ==>" and the two checks for no-more-votes that may result in a "return"
+// statement.
+
+method SearchForWinner<Candidate>(a: seq<Candidate>, ghost hasWinner: bool, ghost K: Candidate) returns (k: Candidate)
+  requires hasWinner ==> 2 * Count(a, 0, |a|, K) > |a|;  // K has a (strict) majority of the votes
+  ensures hasWinner ==> k == K;  // find K
+{
+  if (|a| == 0) { return; }
+  k := a[0];
+  var n, c, s := 1, 1, 0;
+  while (n < |a|)
+    invariant 0 <= s <= n <= |a|;
+    invariant hasWinner ==> 2 * Count(a, s, |a|, K) > |a| - s;  // K has majority among a[s..]
+    invariant 2 * Count(a, s, n, k) > n - s;  // k has majority among a[s..n]
+    invariant c == Count(a, s, n, k);
+  {
+    if (a[n] == k) {
+      n, c := n + 1, c + 1;
+    } else if (2 * c > n + 1 - s) {
+      n := n + 1;
+    } else {
+      n := n + 1;
+      // We have 2*Count(a, s, n, k) == n-s, and thus the following lemma
+      // lets us conclude 2*Count(a, s, n, K) <= n-s.
+      Lemma_Unique(a, s, n, K, k);
+      // We also have 2*Count(a, s, |a|, K) > |a|-s, and the following lemma
+      // tells us Count(a, s, |a|, K) == Count(a, s, n, K) + Count(a, n, |a|, K),
+      // and thus we can conclude 2*Count(a, n, |a|, K) > |a|-n.
+      Lemma_Split(a, s, n, |a|, K);
+      if (|a| == n) { return; }
+      k, n, c, s := a[n], n + 1, 1, n;
+    }
+  }
+  Lemma_Unique(a, s, |a|, K, k);  // both k and K have a majority, so K == k
+}
+
+// ------------------------------------------------------------------------------
+
+// Here are two lemmas about Count that are used in the methods above.
+
 ghost method Lemma_Split<T>(a: seq<T>, s: int, t: int, u: int, x: T)
   requires 0 <= s <= t <= u <= |a|;
   ensures Count(a, s, t, x) + Count(a, t, u, x) == Count(a, s, u, x);
@@ -54,10 +169,3 @@ ghost method Lemma_Unique<T>(a: seq<T>, s: int, t: int, x: T, y: T)
   }
   */
 }
-
-function Count<T>(a: seq<T>, s: int, t: int, x: T): int
-  requires 0 <= s <= t <= |a|;
-{
-  if s == t then 0 else
-  Count(a, s, t-1, x) + if a[t-1] == x then 1 else 0
-}
author	Unknown <leino@LEINO6.redmond.corp.microsoft.com>	2012-06-12 17:41:30 -0700
committer	Unknown <leino@LEINO6.redmond.corp.microsoft.com>	2012-06-12 17:41:30 -0700
commit	b2eb7236b06ea3102b8cf86fc8f41c555a089614 (patch)
tree	1d7850c0e60cafb048fd2b79ffa085a4751b16b1
parent	f2eb93519ffd02b775d427d2909cebd0690dc090 (diff)