1 files changed, 567 insertions, 0 deletions
diff --git a/Test/runTests.py b/Test/runTests.py
new file mode 100644
index 00000000..8caa65d1
--- /dev/null
+++ b/Test/runTests.py
@@ -0,0 +1,567 @@
+import os
+import re
+import sys
+import csv
+import shutil
+import argparse
+import operator
+import platform
+from math import floor, ceil
+from enum import Enum
+from time import time, strftime
+from collections import defaultdict
+from multiprocessing import Pool, Manager
+from subprocess import Popen, call, PIPE, TimeoutExpired
+
+# C:/Python34/python.exe runTests.py --compiler "c:/MSR/dafny/Binaries/Dafny.exe" --flags "/useBaseNameForFileName /compile:1 /nologo" --difftool "C:\Program Files (x86)\Meld\Meld.exe" -j4 --flags "/dprelude preludes\AlmostAllTriggers.bpl" dafny0\SeqFromArray.dfy
+
+# c:/Python34/python.exe runTests.py --compare ../TestStable/results/SequenceAxioms/2015-06-06-00-54-52--PrettyPrinted.report.csv ../TestStable/results/SequenceAxioms/*.csv
+
+VERBOSITY = None
+KILLED = False
+ANSI = False
+
+try:
+    import colorama
+    no_native_ansi = os.name == 'nt' and os.environ.get("TERM") in [None, "cygwin"]
+    tty = all(hasattr(stream, 'isatty') and stream.isatty() for stream in (sys.stdout, sys.stderr))
+    colorama.init(strip=no_native_ansi, convert=no_native_ansi and tty)
+    ANSI = True
+except ImportError:
+    try:
+        import tendo.ansiterm
+        ANSI = True
+    except ImportError:
+        pass
+
+class Defaults:
+    EXCLUDED_FILES = ["^flycheck_"]
+    EXCLUDED_FOLDERS = ["Inputs", "Output", "sandbox", "desktop"]
+    DAFNY_BIN = os.path.realpath(os.path.join(os.path.dirname(__file__), "../Binaries/Dafny.exe"))
+    COMPILER = [DAFNY_BIN]
+    FLAGS = ["/useBaseNameForFileName", "/compile:1", "/nologo", "/timeLimit:300"]
+    EXTENSIONS = [".dfy", ".transcript"]
+
+class Colors:
+    RED = '\033[91m'
+    GREEN = '\033[92m'
+    YELLOW = '\033[93m'
+    BRIGHT = '\033[1m'
+    DIM = '\033[2m'
+    RESET = '\033[0m'
+
+class Debug(Enum):
+    ERROR   = (-1, Colors.RED)
+    WARNING = (-1, Colors.YELLOW)
+    REPORT  = (0, Colors.RESET, True)
+    INFO    = (1, Colors.RESET, True)
+    DEBUG   = (2, Colors.RESET)
+    TRACE   = (3, Colors.RESET)
+
+    def __init__(self, index, color, elide=False):
+        self.index = index
+        self.color = color
+        self.elide = elide
+
+def wrap_color(string, color, silent=False):
+    if silent:
+        return " " * len(string)
+    elif ANSI:
+        return color + string + Colors.RESET
+    else:
+        return string
+
+def debug(level, *args, **kwargs):
+    kwargs["file"] = sys.stderr
+    kwargs["flush"] = True
+
+    headers = kwargs.pop("headers", [])
+    if isinstance(headers, Enum):
+        headers = [headers]
+
+    silentheaders = kwargs.pop("silentheaders", False)
+
+    if level and level.index <= VERBOSITY:
+        if level:
+            headers = [level] + headers
+
+        headers = tuple(wrap_color("{: <8}".format("[" + h.name + "]"), h.color, silent = silentheaders)
+                        for h in headers if not h.elide)
+        print(*(headers + args), **kwargs)
+
+class TestStatus(Enum):
+    PENDING = (0, Colors.RESET)
+    PASSED  = (1, Colors.GREEN)
+    FAILED  = (2, Colors.RED)
+    UNKNOWN = (3, Colors.RED)
+    TIMEOUT = (4, Colors.RED)
+
+    def __init__(self, index, color):
+        self.index = index
+        self.color = color
+        self.elide = False
+
+class Test:
+    COLUMNS = ["name", "status", "start", "end", "duration", "returncodes", "suite_time", "njobs", "proc_info", "source_path", "temp_directory", "cmds", "expected", "output"]
+
+    def __init__(self, name, source_path, cmds, timeout, compiler_id = 0):
+        self.name = name
+        self.source_path = Test.uncygdrive(source_path)
+        self.expect_path = Test.source_to_expect_path(self.source_path)
+        self.source_directory, self.fname = os.path.split(self.source_path)
+        self.temp_directory = os.path.join(self.source_directory, "Output")
+        self.temp_output_path = os.path.join(self.temp_directory, self.fname + ".tmp")
+
+        self.output = None
+        self.expected = Test.read_normalize(self.expect_path)
+
+        self.cmds = cmds
+        self.timeout = timeout
+        self.compiler_id = compiler_id
+        self.cmds = [cmd.replace("%s", self.source_path) for cmd in self.cmds]
+        self.cmds = [cmd.replace("%S", self.source_directory) for cmd in self.cmds]
+        self.cmds = [cmd.replace("%t", self.temp_output_path) for cmd in self.cmds]
+        self.cmds = [cmd.replace("%T", self.temp_directory) for cmd in self.cmds]
+
+        self.status = TestStatus.PENDING
+        self.proc_info = platform.processor()
+
+        self.time, self.suite_time = None, None
+        self.njobs, self.returncodes = None, []
+        self.start, self.end, self.duration = None, None, None
+
+    @staticmethod
+    def source_to_expect_path(source):
+        return source + ".expect"
+
+    @staticmethod
+    def uncygdrive(path):
+        return re.sub("^/cygdrive/([a-zA-Z])/", r"\1:/", path)
+
+    @staticmethod
+    def read_normalize(path):
+        try:
+            with open(path, mode="rb") as reader:
+                return reader.read().replace(b'\r\n', b'\n').replace(b'\r', b'\n')
+        except FileNotFoundError:
+            debug(Debug.WARNING, "{} not found".format(path))
+            return ""
+
+    @staticmethod
+    def build_report(tests, name):
+        now = strftime("%Y-%m-%d-%H-%M-%S")
+        if name:
+            directory, fname = os.path.split(name)
+            name = os.path.join(directory, now + "--" + fname)
+        else:
+            name = now
+
+        with open(name + ".csv", mode='w', newline='') as writer:
+            csv_writer = csv.DictWriter(writer, Test.COLUMNS, dialect='excel')
+            csv_writer.writeheader()
+            for test in tests:
+                test.serialize(csv_writer)
+
+    @staticmethod
+    def load_report(path):
+        results = []
+        with open(path) as csvfile:
+            for row in csv.DictReader(csvfile):  #, fieldnames=Test.COLUMNS):
+                results.append(Test.deserialize(row))
+        return results
+
+    @staticmethod
+    def mean_duration(results, margin):
+        durations = sorted(result.duration for result in results
+                           if result.status in (TestStatus.PASSED, TestStatus.FAILED))
+        if len(durations) >= 15:
+            lq = durations[floor(0.25 * len(durations))]
+            hq = durations[ceil(0.85 * len(durations))]
+            iqr = hq - lq
+            filtered = [d for d in durations if (lq - margin * iqr) <= d <= (hq + margin * iqr)]
+            if filtered:
+                avg = sum(durations) / len(durations)
+                trimmed_avg = sum(filtered) / len(filtered)
+                outliers_count = len(durations) - len(filtered)
+                msg = "mean completion time: {:.2f}s".format(avg)
+                if outliers_count > 0:
+                    msg += "; ignoring {} outliers: {:.2f}s".format(outliers_count, trimmed_avg)
+                return " ({})".format(msg)
+        return ""
+
+    @staticmethod
+    def summarize(results):
+        debug(Debug.INFO, "\nTesting complete ({} test(s))".format(len(results)))
+
+        if results:
+            grouped = defaultdict(list)
+            for test in results:
+                grouped[test.status].append(test)
+
+            for status, tests in sorted(grouped.items(), key=lambda x: x[0].index):
+                if tests:
+                    debug(Debug.REPORT, "{} of {}".format(len(tests), len(results)), headers=status)
+                    if status != TestStatus.PASSED:
+                        for test in tests:
+                            debug(Debug.REPORT, "* " + test.name, headers=status, silentheaders=True)
+
+            debug(Debug.REPORT)
+
+            failing = [t for t in results if t.status != TestStatus.PASSED]
+            if failing:
+                with open("failing.lst", mode='w') as writer:
+                    for t in failing:
+                        writer.write("{}\n".format(t.name))
+                debug(Debug.REPORT, "Some tests failed: use [runTests.py failing.lst] to rerun the failing tests")
+
+            debug(Debug.REPORT, "Testing took {:.2f}s on {} thread(s){}".format(
+                results[0].suite_time, results[0].njobs, Test.mean_duration(results, 1.5)))
+
+
+    def run(self):
+        debug(Debug.DEBUG, "Starting {}".format(self.name))
+        os.makedirs(self.temp_directory, exist_ok=True)
+        # os.chdir(self.source_directory)
+
+        stdout, stderr = b'', b''
+        self.start = time()
+
+        try:
+            for cmd in self.cmds:
+                debug(Debug.DEBUG, "> {}".format(cmd))
+                try:
+                    proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True)
+                    _stdout, _stderr = proc.communicate(timeout=self.timeout)
+                    stdout, stderr = stdout + _stdout, stderr + _stderr
+                    self.returncodes.append(proc.returncode)
+                except FileNotFoundError:
+                    debug(Debug.ERROR, "Program '{}' not found".format(cmd))
+                    self.status = TestStatus.UNKNOWN
+                    return
+                except TimeoutExpired:
+                    self.status = TestStatus.TIMEOUT
+                    self.end = self.start + self.timeout
+                    self.duration = self.timeout
+                    return
+
+            self.end = time()
+            self.duration = self.end - self.start
+
+            stdout, stderr = stdout.strip(), stderr.strip()
+            if stdout != b"":
+                debug(Debug.TRACE, "Writing the output of {} to {}".format(self.name, self.temp_output_path))
+                with open(self.temp_output_path, mode='ab') as writer:
+                    writer.write(stdout)
+            if stderr != b"":
+                debug(Debug.INFO, stderr.decode("utf-8"))
+
+            self.update_status()
+        except TimeoutExpired:
+            self.status = TestStatus.TIMEOUT
+        except KeyboardInterrupt:
+            raise
+
+    def update_status(self):
+        self.output = Test.read_normalize(self.temp_output_path)
+        self.status = TestStatus.PASSED if self.expected == self.output else TestStatus.FAILED
+
+    def report(self, tid, running, alltests):
+        running = [alltests[rid].fname for rid in running]
+        running = "; oldest: {}".format(running[0]) if running else ""
+
+        fstring = "[{:5.2f}s] {} ({}{})"
+        progress = "{}/{}".format(tid, len(alltests))
+        message = fstring.format(self.duration, wrap_color(self.name, Colors.BRIGHT),
+                                 wrap_color(progress, Colors.BRIGHT), running)
+
+        debug(Debug.INFO, message, headers=self.status)
+
+    @staticmethod
+    def write_bytes(base_directory, relative_path, extension, contents):
+        with open(os.path.join(base_directory, relative_path + extension), mode='wb') as writer:
+            writer.write(contents)
+
+    def serialize(self, csv_writer):
+        csv_writer.writerow({col: getattr(self, col) for col in Test.COLUMNS})
+
+    @classmethod
+    def deserialize(cls, row):
+        test = cls.__new__(cls)
+        for col, val in row.items():
+            setattr(test, col, val)
+        test.duration = float(test.duration)
+        test.status = next(x for x in TestStatus if str(x) == test.status)
+        return test
+
+def setup_parser():
+    parser = argparse.ArgumentParser(description='Run the Dafny test suite.')
+
+    parser.add_argument('path', type=str, action='store', nargs='+',
+                        help='Input files or folders. Folders are searched for test files. Lists of files can also be specified by passing a .lst file (for an example of such a file, look at failing.lst after running failing tests.')
+
+    parser.add_argument('--compiler', type=str, action='append', default=None,
+                        help='Dafny executable. Default: {}'.format(Defaults.DAFNY_BIN))
+
+    parser.add_argument('--base-flags', type=str, action='append', default=None,
+                        help='Arguments to pass to dafny. Multiple --flags are concatenated. Default: {}'.format(Defaults.FLAGS))
+
+    parser.add_argument('--flags', '-f', type=str, action='append', default=[],
+                        help='Additional arguments to pass to dafny. Useful to override some of the defaults found in --base-flags.')
+
+    parser.add_argument('--njobs', '-j', action='store', type=int, default=None,
+                        help='Number of test workers.')
+
+    parser.add_argument('--exclude', action='append', type=str, default=[],
+                        help='Excluded directories. {} are automatically added.'.format(Defaults.EXCLUDED_FOLDERS))
+
+    parser.add_argument('--verbosity', action='store', type=int, default=1,
+                        help='Set verbosity level. 0: Minimal; 1: Some info; 2: More info; 3: Trace.')
+
+    parser.add_argument('-v', action='store_const', default=1, dest="verbosity", const=2,
+                        help='Short for --verbosity 2.')
+
+    parser.add_argument('-vv', action='store_const', default=1, dest="verbosity", const=3,
+                        help='Short for --verbosity 3.')
+
+    parser.add_argument('--report', '-r', action='store', type=str, default=None,
+                        help='Give an explicit name to the report file. Defaults to the current date and time.')
+
+    parser.add_argument('--timeout', action='store', type=float, default=15*60.0,
+                        help='Prover timeout')
+
+    parser.add_argument('--compare', action='store_true',
+                        help="Compare two previously generated reports.")
+
+    parser.add_argument('--time-all', action='store_true',
+                        help="When comparing, include all timings.")
+
+    parser.add_argument('--diff', '-d', action='store_true',
+                        help="Don't run tests; show differences between outputs and .expect files, optionally overwritting .expect files.")
+
+    parser.add_argument('--accept', '-a', action='store_true',
+                        help="Don't run tests; copy outputs to .expect files.")
+
+    parser.add_argument('--open', '-o', action='store_true',
+                        help="Don't run tests; open one file.")
+
+    parser.add_argument('--difftool', action='store', type=str, default="diff",
+                        help='Diff program. Default: diff.')
+
+    return parser
+
+def run_one_internal(test, test_id, args, running):
+    global KILLED
+    global VERBOSITY
+    VERBOSITY = args.verbosity
+
+    if not KILLED:
+        try:
+            running.append(test_id)
+            test.run()
+        except KeyboardInterrupt:
+            # There's no reliable way to handle this cleanly on Windows: if one
+            # of the worker dies, it gets respawned. The reliable solution is to
+            # ignore further work once you receive a kill signal
+            KILLED = True
+        except Exception as e:
+            debug(Debug.ERROR, "[{}] {}".format(test.name, e))
+            test.status = TestStatus.UNKNOWN
+        finally:
+            running.remove(test_id)
+
+    return test
+
+def run_one(args):
+    return run_one_internal(*args)
+
+def get_server_path(compiler):
+    REGEXP = r"\bDafny.exe\b.*"
+    if re.search(REGEXP, compiler):
+        return re.sub(REGEXP, "DafnyServer.exe", compiler)
+    else:
+        return None
+
+def substitute_binaries(cmd, compiler):
+    cmd = cmd.replace("%dafny", compiler)
+    cmd = cmd.replace("%server", get_server_path(compiler))
+    return cmd
+
+def read_one_test(fname, compiler_cmds, timeout):
+    for cid, compiler_cmd in enumerate(compiler_cmds):
+        source_path = os.path.realpath(fname)
+        with open(source_path, mode='r') as reader:
+            cmds = []
+            for line in reader:
+                line = line.strip()
+                match = re.match("^[/# ]*RUN: *(?!%diff)([^ ].*)$", line)
+                if match:
+                    debug(Debug.TRACE, "Found RUN spec: {}".format(line))
+                    cmds.append(substitute_binaries(match.groups()[0], compiler_cmd))
+                else:
+                    break
+        if cmds:
+            yield Test(fname, source_path, cmds, timeout, cid)
+        else:
+            debug(Debug.WARNING, "Test file {} has no RUN specification".format(fname))
+
+
+def find_one(fname, compiler_cmds, timeout):
+    _, name = os.path.split(fname)
+    _, ext = os.path.splitext(name)
+    if ext in Defaults.EXTENSIONS and not any(re.search(pattern, name, re.IGNORECASE) for pattern in Defaults.EXCLUDED_FILES):
+        if os.path.exists(fname):
+            debug(Debug.TRACE, "Found test file: {}".format(fname))
+            yield from read_one_test(fname, compiler_cmds, timeout)
+        else:
+            debug(Debug.ERROR, "Test file {} not found".format(fname))
+    else:
+        debug(Debug.TRACE, "Ignoring {}".format(fname))
+
+
+def expand_lsts(paths):
+    for path in paths:
+        _, ext = os.path.splitext(path)
+        if ext == ".lst": #lst files are only read if explicitly listed on the CLI
+            debug(Debug.INFO, "Loading tests from {}".format(path))
+            with open(path) as reader:
+                for line in reader:
+                    _path = line.strip()
+                    yield _path
+        else:
+            yield path
+
+def find_tests(paths, compiler_cmds, excluded, timeout):
+    for path in expand_lsts(paths):
+        if os.path.isdir(path):
+            debug(Debug.TRACE, "Searching for tests in {}".format(path))
+            for base, dirnames, fnames in os.walk(path):
+                dirnames[:] = [d for d in dirnames if d not in excluded]
+                for fname in fnames:
+                    yield from find_one(os.path.join(base, fname), compiler_cmds, timeout)
+        else:
+            yield from find_one(path, compiler_cmds, timeout)
+
+def run_tests(args):
+    if args.compiler is None:
+        args.compiler = Defaults.COMPILER
+    if args.base_flags is None:
+        args.base_flags = Defaults.FLAGS
+
+    for compiler in args.compiler:
+        server = get_server_path(compiler)
+        if not os.path.exists(compiler):
+            debug(Debug.ERROR, "Compiler not found: {}".format(compiler))
+            return
+        if not os.path.exists(server):
+            debug(Debug.WARNING, "Server not found")
+
+    tests = list(find_tests(args.path, [compiler + ' ' + " ".join(args.base_flags + args.flags)
+                                        for compiler in args.compiler],
+                            args.exclude + Defaults.EXCLUDED_FOLDERS, args.timeout))
+    tests.sort(key=operator.attrgetter("name"))
+
+    args.njobs = max(1, min(args.njobs or os.cpu_count() or 1, len(tests)))
+    debug(Debug.INFO, "\nRunning {} test(s) on {} testing thread(s), timeout is {:.2f}s, started at {}".format(len(tests), args.njobs, args.timeout, strftime("%H:%M:%S")))
+
+    try:
+        pool = Pool(args.njobs)
+
+        results = []
+        start = time()
+        with Manager() as manager:
+            running = manager.list()
+            payloads = [(t, tid, args, running) for (tid, t) in enumerate(tests)]
+            for tid, test in enumerate(pool.imap_unordered(run_one, payloads, 1)):
+                test.report(tid + 1, running, tests)
+                results.append(test)
+            pool.close()
+            pool.join()
+        suite_time = time() - start
+
+        for t in results:
+            t.njobs = args.njobs
+            t.suite_time = suite_time
+
+        Test.summarize(results)
+        Test.build_report(results, args.report)
+    except KeyboardInterrupt:
+        try:
+            pool.terminate()
+            pool.join()
+        except (FileNotFoundError, EOFError, ConnectionAbortedError):
+            pass
+        debug(Debug.ERROR, "Testing interrupted")
+
+
+def diff(paths, force_accept, difftool):
+    for path in expand_lsts(paths):
+        if not os.path.exists(path):
+            debug(Debug.ERROR, "Not found: {}".format(path))
+        else:
+            test = Test(None, path, [], None)
+            accept = force_accept
+
+            if not accept:
+                call([difftool, test.expect_path, test.temp_output_path])
+                accept = input("Accept this change? (y/N) ") == "y"
+
+            if accept:
+                debug(Debug.INFO, path, "accepted.")
+                shutil.copy(test.temp_output_path, test.expect_path)
+            else:
+                debug(Debug.INFO, path, "not accepted.")
+
+def compare_results(globs, time_all):
+    from glob import glob
+    paths = [path for g in globs for path in glob(g)]
+    reports = {path: Test.load_report(path) for path in paths}
+    resultsets = {path: {test.name: (test.status, test.duration) for test in report}
+                  for path, report in reports.items()}
+
+    all_tests = set(name for resultset in resultsets.values() for name in resultset.keys())
+
+    reference = resultsets[paths[0]]
+    for path, resultset in resultsets.items():
+        resultset["$$TOTAL$$"] = None, sum(v[1] for v in resultset.values() if v[1] and v[0] != TestStatus.TIMEOUT)
+
+    with open("compare.csv", mode='w', newline='') as writer:
+        csv_writer = csv.writer(writer, dialect='excel')
+        csv_writer.writerow(["Name"] + [os.path.split(path)[1].lstrip("0123456789-") for path in paths])
+
+        for name in sorted(all_tests) + ["$$TOTAL$$"]:
+            ref_status, ref_duration = reference[name]
+
+            row = []
+            row.append(name)
+            row.append(ref_duration)
+            for path in paths[1:]:
+                res = resultsets[path].get(name)
+                test_status, test_duration = res if res else (TestStatus.UNKNOWN, None)
+                if res is not None and (test_status == ref_status or time_all):
+                    result = "{:.2%}".format((test_duration - ref_duration) / ref_duration)
+                else:
+                    result = test_status.name + "?!"
+                row.append(result)
+
+            csv_writer.writerow(row)
+
+def main():
+    global VERBOSITY
+    parser = setup_parser()
+    args = parser.parse_args()
+    VERBOSITY = args.verbosity
+
+    if os.name != 'nt' and os.environ.get("TERM") == "cygwin":
+        debug(Debug.WARNING, "If you run into issues, try using Windows' Python instead of Cygwin's")
+
+    if args.diff or args.accept:
+        diff(args.path, args.accept, args.difftool)
+    elif args.open:
+        os.startfile(args.path[0])
+    elif args.compare:
+        compare_results(args.path, args.time_all)
+    else:
+        run_tests(args)
+
+if __name__ == '__main__':
+    main()