From 517c1e2921bc20b8600061aa21a77a8810db597e Mon Sep 17 00:00:00 2001 From: "commit-bot@chromium.org" Date: Wed, 22 Jan 2014 22:57:19 +0000 Subject: Chromium Codereview Comparison Script. This script takes two Codereview URLs, looks at the trybot results for the two codereviews and compares the results. Motivation: This should speed up the DEPS roll procedure. BUG=skia:2039 R=robertphillips@google.com, borenet@google.com Author: halcanary@google.com Review URL: https://codereview.chromium.org/143503003 git-svn-id: http://skia.googlecode.com/svn/trunk@13144 2bbb7eff-a529-9590-31e7-b0007b416f81 --- tools/compare_codereview.py | 387 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 387 insertions(+) create mode 100755 tools/compare_codereview.py (limited to 'tools/compare_codereview.py') diff --git a/tools/compare_codereview.py b/tools/compare_codereview.py new file mode 100755 index 0000000000..16f3a0198d --- /dev/null +++ b/tools/compare_codereview.py @@ -0,0 +1,387 @@ +#!/usr/bin/python2 + +# Copyright 2014 Google Inc. +# +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Skia's Chromium Codereview Comparison Script. + +This script takes two Codereview URLs, looks at the trybot results for +the two codereviews and compares the results. + +Usage: + compare_codereview.py CONTROL_URL ROLL_URL +""" + +import collections +import os +import re +import sys +import urllib2 +import HTMLParser + + +class CodeReviewHTMLParser(HTMLParser.HTMLParser): + """Parses CodeReview web page. + + Use the CodeReviewHTMLParser.parse static function to make use of + this class. + + This uses the HTMLParser class because it's the best thing in + Python's standard library. We need a little more power than a + regex. [Search for "You can't parse [X]HTML with regex." for more + information. + """ + # pylint: disable=I0011,R0904 + @staticmethod + def parse(url): + """Parses a CodeReview web pages. + + Args: + url (string), a codereview URL like this: + 'https://codereview.chromium.org/?????????'. + + Returns: + A dictionary; the keys are bot_name strings, the values + are CodeReviewHTMLParser.Status objects + """ + parser = CodeReviewHTMLParser() + try: + parser.feed(urllib2.urlopen(url).read()) + except (urllib2.URLError,): + print >> sys.stderr, 'Error getting', url + return None + parser.close() + return parser.statuses + + # namedtuples are like lightweight structs in Python. The low + # overhead of a tuple, but the ease of use of an object. + Status = collections.namedtuple('Status', ['status', 'url']) + + def __init__(self): + HTMLParser.HTMLParser.__init__(self) + self._id = None + self._status = None + self._href = None + self._anchor_data = '' + self._currently_parsing_trybotdiv = False + # statuses is a dictionary of CodeReviewHTMLParser.Status + self.statuses = {} + + def handle_starttag(self, tag, attrs): + """Overrides the HTMLParser method to implement functionality. + + [[begin standard library documentation]] + This method is called to handle the start of a tag + (e.g.
). + + The tag argument is the name of the tag converted to lower + case. The attrs argument is a list of (name, value) pairs + containing the attributes found inside the tag's <> + brackets. The name will be translated to lower case, and + quotes in the value have been removed, and character and + entity references have been replaced. + + For instance, for the tag , this + method would be called as handle_starttag('a', [('href', + 'http://www.cwi.nl/')]). + [[end standard library documentation]] + """ + attrs = dict(attrs) + if tag == 'div': + # We are looking for
. + id_attr = attrs.get('id','') + if id_attr.startswith('tryjobdiv'): + self._id = id_attr + if (self._id and tag == 'a' + and 'build-result' in attrs.get('class', '').split()): + # If we are already inside a ). The tag argument is the name of the tag + converted to lower case. + [[end standard library documentation]] + """ + if tag == 'a' and self._status: + # We take the accumulated self._anchor_data and save it as + # the bot name. + bot = self._anchor_data.strip() + stat = CodeReviewHTMLParser.Status(status=self._status, + url=self._href) + if bot: + # Add to accumulating dictionary. + self.statuses[bot] = stat + # Reset state to search for the next bot. + self._currently_parsing_trybotdiv = False + self._anchor_data = '' + self._status = None + self._href = None + + +class BuilderHTMLParser(HTMLParser.HTMLParser): + """parses Trybot web pages. + + Use the BuilderHTMLParser.parse static function to make use of + this class. + + This uses the HTMLParser class because it's the best thing in + Python's standard library. We need a little more power than a + regex. [Search for "You can't parse [X]HTML with regex." for more + information. + """ + # pylint: disable=I0011,R0904 + @staticmethod + def parse(url): + """Parses a Trybot web page. + + Args: + url (string), a trybot result URL. + + Returns: + An array of BuilderHTMLParser.Results, each a description + of failure results, along with an optional url + """ + parser = BuilderHTMLParser() + try: + parser.feed(urllib2.urlopen(url).read()) + except (urllib2.URLError,): + print >> sys.stderr, 'Error getting', url + return [] + parser.close() + return parser.failure_results + + Result = collections.namedtuple('Result', ['text', 'url']) + + def __init__(self): + HTMLParser.HTMLParser.__init__(self) + self.failure_results = [] + self._current_failure_result = None + self._divlevel = None + self._li_level = 0 + self._li_data = '' + self._current_failure = False + self._failure_results_url = '' + + def handle_starttag(self, tag, attrs): + """Overrides the HTMLParser method to implement functionality. + + [[begin standard library documentation]] + This method is called to handle the start of a tag + (e.g.
). + + The tag argument is the name of the tag converted to lower + case. The attrs argument is a list of (name, value) pairs + containing the attributes found inside the tag's <> + brackets. The name will be translated to lower case, and + quotes in the value have been removed, and character and + entity references have been replaced. + + For instance, for the tag , this + method would be called as handle_starttag('a', [('href', + 'http://www.cwi.nl/')]). + [[end standard library documentation]] + """ + attrs = dict(attrs) + if tag == 'li': + #
  • tags can be nested. So we have to count the + # nest-level for backing out. + self._li_level += 1 + return + if tag == 'div' and attrs.get('class') == 'failure result': + # We care about this sort of thing: + #
  • + #
  • + #
  • + #
    ...
    + #
  • + # + # We want this text here. + # + if self._li_level > 0: + self._current_failure = True # Tells us to keep text. + return + + if tag == 'a' and self._current_failure: + href = attrs.get('href') + # Sometimes we want to keep the stdio url. We always + # return it, just in case. + if href.endswith('/logs/stdio'): + self._failure_results_url = href + + def handle_data(self, data): + """Overrides the HTMLParser method to implement functionality. + + [[begin standard library documentation]] + This method is called to process arbitrary data (e.g. text + nodes and the content of and + ). + [[end standard library documentation]] + """ + if self._current_failure: + self._li_data += data + + def handle_endtag(self, tag): + """Overrides the HTMLParser method to implement functionality. + + [[begin standard library documentation]] + This method is called to handle the end tag of an element + (e.g.
    ). The tag argument is the name of the tag + converted to lower case. + [[end standard library documentation]] + """ + if tag == 'li': + self._li_level -= 1 + if 0 == self._li_level: + if self._current_failure: + result = self._li_data.strip() + first = result.split()[0] + if first: + result = re.sub( + r'^%s(\s+%s)+' % (first, first), first, result) + # Sometimes, it repeats the same thing + # multiple times. + result = re.sub(r'unexpected flaky.*', '', result) + # Remove some extra unnecessary text. + result = re.sub(r'\bpreamble\b', '', result) + result = re.sub(r'\bstdio\b', '', result) + url = self._failure_results_url + self.failure_results.append( + BuilderHTMLParser.Result(result, url)) + self._current_failure_result = None + # Reset the state. + self._current_failure = False + self._li_data = '' + self._failure_results_url = '' + + +def printer(indent, string): + """Print indented, wrapped text. + """ + def wrap_to(line, columns): + """Wrap a line to the given number of columns, return a list + of strings. + """ + ret = [] + nextline = '' + for word in line.split(): + if nextline: + if len(nextline) + 1 + len(word) > columns: + ret.append(nextline) + nextline = word + else: + nextline += (' ' + word) + else: + nextline = word + if nextline: + ret.append(nextline) + return ret + out = sys.stdout + spacer = ' ' + for line in string.split('\n'): + for i, wrapped_line in enumerate(wrap_to(line, 68 - (2 * indent))): + out.write(spacer * indent) + if i > 0: + out.write(spacer) + out.write(wrapped_line) + out.write('\n') + out.flush() + + +def main(control_url, roll_url, verbosity=1): + """Compare two Codereview URLs + + Args: + control_url, roll_url: (strings) URL of the format + https://codereview.chromium.org/????????? + + verbosity: (int) verbose level. 0, 1, or 2. + """ + # pylint: disable=I0011,R0914,R0912 + control = CodeReviewHTMLParser.parse(control_url) + roll = CodeReviewHTMLParser.parse(roll_url) + if not (control and roll): + return + + control_name = '[control %s]' % control_url.split('/')[-1] + roll_name = '[roll %s]' % roll_url.split('/')[-1] + all_bots = set(control) & set(roll) # Set intersection. + + out = sys.stdout + if verbosity > 0: + # Print out summary of all of the bots. + out.write('%11s %11s %4s %s\n\n' % + ('CONTROL', 'ROLL', 'DIFF', 'BOT')) + for bot in sorted(all_bots): + if control[bot].status != roll[bot].status: + diff = '****' + elif (control[bot].status != 'success' or + roll[bot].status != 'success'): + diff = '....' + else: + diff = '' + out.write('%11s %11s %4s %s\n' % ( + control[bot].status, roll[bot].status, diff, bot)) + out.write('\n') + out.flush() + + for bot in sorted(all_bots): + if (roll[bot].status == 'success'): + if verbosity > 1: + printer(0, '==%s==' % bot) + printer(1, 'OK') + continue + printer(0, '==%s==' % bot) + + for (status, name, url) in ( + (control[bot].status, control_name, control[bot].url), + (roll[bot].status, roll_name, roll[bot].url)): + + if status == 'failure': + printer(1, name) + results = BuilderHTMLParser.parse(url) + for result in results: + formatted_result = re.sub( + r'(\S*\.html) ', '\n__\g<1>\n', result.text) + printer(2, formatted_result) + if ('compile' in result.text + or '...and more' in result.text): + printer(3, re.sub('/[^/]*$', '/', url) + result.url) + else: + printer(1, name) + printer(2, status) + out.write('\n') + + +if __name__ == '__main__': + if len(sys.argv) < 3: + print >> sys.stderr, __doc__ + exit(1) + main(sys.argv[1], sys.argv[2], + int(os.environ.get('COMPARE_CODEREVIEW_VERBOSITY', 1))) + -- cgit v1.2.3