diff options
authorGravatar commit-bot@chromium.org <commit-bot@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>2014-01-22 22:57:19 +0000
committerGravatar commit-bot@chromium.org <commit-bot@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>2014-01-22 22:57:19 +0000
commit517c1e2921bc20b8600061aa21a77a8810db597e (patch)
parentf1d63aa80dc951417ad34dd00914c8150112e42a (diff)
Chromium Codereview Comparison Script.
This script takes two Codereview URLs, looks at the trybot results for the two codereviews and compares the results. Motivation: This should speed up the DEPS roll procedure. BUG=skia:2039 R=robertphillips@google.com, borenet@google.com Author: halcanary@google.com Review URL: https://codereview.chromium.org/143503003 git-svn-id: http://skia.googlecode.com/svn/trunk@13144 2bbb7eff-a529-9590-31e7-b0007b416f81
1 files changed, 387 insertions, 0 deletions
diff --git a/tools/compare_codereview.py b/tools/compare_codereview.py
new file mode 100755
index 0000000000..16f3a0198d
--- /dev/null
+++ b/tools/compare_codereview.py
@@ -0,0 +1,387 @@
+# Copyright 2014 Google Inc.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Skia's Chromium Codereview Comparison Script.
+This script takes two Codereview URLs, looks at the trybot results for
+the two codereviews and compares the results.
+ compare_codereview.py CONTROL_URL ROLL_URL
+import collections
+import os
+import re
+import sys
+import urllib2
+import HTMLParser
+class CodeReviewHTMLParser(HTMLParser.HTMLParser):
+ """Parses CodeReview web page.
+ Use the CodeReviewHTMLParser.parse static function to make use of
+ this class.
+ This uses the HTMLParser class because it's the best thing in
+ Python's standard library. We need a little more power than a
+ regex. [Search for "You can't parse [X]HTML with regex." for more
+ information.
+ """
+ # pylint: disable=I0011,R0904
+ @staticmethod
+ def parse(url):
+ """Parses a CodeReview web pages.
+ Args:
+ url (string), a codereview URL like this:
+ 'https://codereview.chromium.org/?????????'.
+ Returns:
+ A dictionary; the keys are bot_name strings, the values
+ are CodeReviewHTMLParser.Status objects
+ """
+ parser = CodeReviewHTMLParser()
+ try:
+ parser.feed(urllib2.urlopen(url).read())
+ except (urllib2.URLError,):
+ print >> sys.stderr, 'Error getting', url
+ return None
+ parser.close()
+ return parser.statuses
+ # namedtuples are like lightweight structs in Python. The low
+ # overhead of a tuple, but the ease of use of an object.
+ Status = collections.namedtuple('Status', ['status', 'url'])
+ def __init__(self):
+ HTMLParser.HTMLParser.__init__(self)
+ self._id = None
+ self._status = None
+ self._href = None
+ self._anchor_data = ''
+ self._currently_parsing_trybotdiv = False
+ # statuses is a dictionary of CodeReviewHTMLParser.Status
+ self.statuses = {}
+ def handle_starttag(self, tag, attrs):
+ """Overrides the HTMLParser method to implement functionality.
+ [[begin standard library documentation]]
+ This method is called to handle the start of a tag
+ (e.g. <div id="main">).
+ The tag argument is the name of the tag converted to lower
+ case. The attrs argument is a list of (name, value) pairs
+ containing the attributes found inside the tag's <>
+ brackets. The name will be translated to lower case, and
+ quotes in the value have been removed, and character and
+ entity references have been replaced.
+ For instance, for the tag <A HREF="http://www.cwi.nl/">, this
+ method would be called as handle_starttag('a', [('href',
+ 'http://www.cwi.nl/')]).
+ [[end standard library documentation]]
+ """
+ attrs = dict(attrs)
+ if tag == 'div':
+ # We are looking for <div id="tryjobdiv*">.
+ id_attr = attrs.get('id','')
+ if id_attr.startswith('tryjobdiv'):
+ self._id = id_attr
+ if (self._id and tag == 'a'
+ and 'build-result' in attrs.get('class', '').split()):
+ # If we are already inside a <div id="tryjobdiv*">, we
+ # look for a link if the form
+ # <a class="build-result" href="*">. Then we save the
+ # (non-standard) status attribute and the URL.
+ self._status = attrs.get('status')
+ self._href = attrs.get('href')
+ self._currently_parsing_trybotdiv = True
+ # Start saving anchor data.
+ def handle_data(self, data):
+ """Overrides the HTMLParser method to implement functionality.
+ [[begin standard library documentation]]
+ This method is called to process arbitrary data (e.g. text
+ nodes and the content of <script>...</script> and
+ <style>...</style>).
+ [[end standard library documentation]]
+ """
+ # Save the text inside the <a></a> tags. Assume <a> tags
+ # aren't nested.
+ if self._currently_parsing_trybotdiv:
+ self._anchor_data += data
+ def handle_endtag(self, tag):
+ """Overrides the HTMLParser method to implement functionality.
+ [[begin standard library documentation]]
+ This method is called to handle the end tag of an element
+ (e.g. </div>). The tag argument is the name of the tag
+ converted to lower case.
+ [[end standard library documentation]]
+ """
+ if tag == 'a' and self._status:
+ # We take the accumulated self._anchor_data and save it as
+ # the bot name.
+ bot = self._anchor_data.strip()
+ stat = CodeReviewHTMLParser.Status(status=self._status,
+ url=self._href)
+ if bot:
+ # Add to accumulating dictionary.
+ self.statuses[bot] = stat
+ # Reset state to search for the next bot.
+ self._currently_parsing_trybotdiv = False
+ self._anchor_data = ''
+ self._status = None
+ self._href = None
+class BuilderHTMLParser(HTMLParser.HTMLParser):
+ """parses Trybot web pages.
+ Use the BuilderHTMLParser.parse static function to make use of
+ this class.
+ This uses the HTMLParser class because it's the best thing in
+ Python's standard library. We need a little more power than a
+ regex. [Search for "You can't parse [X]HTML with regex." for more
+ information.
+ """
+ # pylint: disable=I0011,R0904
+ @staticmethod
+ def parse(url):
+ """Parses a Trybot web page.
+ Args:
+ url (string), a trybot result URL.
+ Returns:
+ An array of BuilderHTMLParser.Results, each a description
+ of failure results, along with an optional url
+ """
+ parser = BuilderHTMLParser()
+ try:
+ parser.feed(urllib2.urlopen(url).read())
+ except (urllib2.URLError,):
+ print >> sys.stderr, 'Error getting', url
+ return []
+ parser.close()
+ return parser.failure_results
+ Result = collections.namedtuple('Result', ['text', 'url'])
+ def __init__(self):
+ HTMLParser.HTMLParser.__init__(self)
+ self.failure_results = []
+ self._current_failure_result = None
+ self._divlevel = None
+ self._li_level = 0
+ self._li_data = ''
+ self._current_failure = False
+ self._failure_results_url = ''
+ def handle_starttag(self, tag, attrs):
+ """Overrides the HTMLParser method to implement functionality.
+ [[begin standard library documentation]]
+ This method is called to handle the start of a tag
+ (e.g. <div id="main">).
+ The tag argument is the name of the tag converted to lower
+ case. The attrs argument is a list of (name, value) pairs
+ containing the attributes found inside the tag's <>
+ brackets. The name will be translated to lower case, and
+ quotes in the value have been removed, and character and
+ entity references have been replaced.
+ For instance, for the tag <A HREF="http://www.cwi.nl/">, this
+ method would be called as handle_starttag('a', [('href',
+ 'http://www.cwi.nl/')]).
+ [[end standard library documentation]]
+ """
+ attrs = dict(attrs)
+ if tag == 'li':
+ # <li> tags can be nested. So we have to count the
+ # nest-level for backing out.
+ self._li_level += 1
+ return
+ if tag == 'div' and attrs.get('class') == 'failure result':
+ # We care about this sort of thing:
+ # <li>
+ # <li>
+ # <li>
+ # <div class="failure result">...</div>
+ # </li>
+ # </li>
+ # We want this text here.
+ # </li>
+ if self._li_level > 0:
+ self._current_failure = True # Tells us to keep text.
+ return
+ if tag == 'a' and self._current_failure:
+ href = attrs.get('href')
+ # Sometimes we want to keep the stdio url. We always
+ # return it, just in case.
+ if href.endswith('/logs/stdio'):
+ self._failure_results_url = href
+ def handle_data(self, data):
+ """Overrides the HTMLParser method to implement functionality.
+ [[begin standard library documentation]]
+ This method is called to process arbitrary data (e.g. text
+ nodes and the content of <script>...</script> and
+ <style>...</style>).
+ [[end standard library documentation]]
+ """
+ if self._current_failure:
+ self._li_data += data
+ def handle_endtag(self, tag):
+ """Overrides the HTMLParser method to implement functionality.
+ [[begin standard library documentation]]
+ This method is called to handle the end tag of an element
+ (e.g. </div>). The tag argument is the name of the tag
+ converted to lower case.
+ [[end standard library documentation]]
+ """
+ if tag == 'li':
+ self._li_level -= 1
+ if 0 == self._li_level:
+ if self._current_failure:
+ result = self._li_data.strip()
+ first = result.split()[0]
+ if first:
+ result = re.sub(
+ r'^%s(\s+%s)+' % (first, first), first, result)
+ # Sometimes, it repeats the same thing
+ # multiple times.
+ result = re.sub(r'unexpected flaky.*', '', result)
+ # Remove some extra unnecessary text.
+ result = re.sub(r'\bpreamble\b', '', result)
+ result = re.sub(r'\bstdio\b', '', result)
+ url = self._failure_results_url
+ self.failure_results.append(
+ BuilderHTMLParser.Result(result, url))
+ self._current_failure_result = None
+ # Reset the state.
+ self._current_failure = False
+ self._li_data = ''
+ self._failure_results_url = ''
+def printer(indent, string):
+ """Print indented, wrapped text.
+ """
+ def wrap_to(line, columns):
+ """Wrap a line to the given number of columns, return a list
+ of strings.
+ """
+ ret = []
+ nextline = ''
+ for word in line.split():
+ if nextline:
+ if len(nextline) + 1 + len(word) > columns:
+ ret.append(nextline)
+ nextline = word
+ else:
+ nextline += (' ' + word)
+ else:
+ nextline = word
+ if nextline:
+ ret.append(nextline)
+ return ret
+ out = sys.stdout
+ spacer = ' '
+ for line in string.split('\n'):
+ for i, wrapped_line in enumerate(wrap_to(line, 68 - (2 * indent))):
+ out.write(spacer * indent)
+ if i > 0:
+ out.write(spacer)
+ out.write(wrapped_line)
+ out.write('\n')
+ out.flush()
+def main(control_url, roll_url, verbosity=1):
+ """Compare two Codereview URLs
+ Args:
+ control_url, roll_url: (strings) URL of the format
+ https://codereview.chromium.org/?????????
+ verbosity: (int) verbose level. 0, 1, or 2.
+ """
+ # pylint: disable=I0011,R0914,R0912
+ control = CodeReviewHTMLParser.parse(control_url)
+ roll = CodeReviewHTMLParser.parse(roll_url)
+ if not (control and roll):
+ return
+ control_name = '[control %s]' % control_url.split('/')[-1]
+ roll_name = '[roll %s]' % roll_url.split('/')[-1]
+ all_bots = set(control) & set(roll) # Set intersection.
+ out = sys.stdout
+ if verbosity > 0:
+ # Print out summary of all of the bots.
+ out.write('%11s %11s %4s %s\n\n' %
+ ('CONTROL', 'ROLL', 'DIFF', 'BOT'))
+ for bot in sorted(all_bots):
+ if control[bot].status != roll[bot].status:
+ diff = '****'
+ elif (control[bot].status != 'success' or
+ roll[bot].status != 'success'):
+ diff = '....'
+ else:
+ diff = ''
+ out.write('%11s %11s %4s %s\n' % (
+ control[bot].status, roll[bot].status, diff, bot))
+ out.write('\n')
+ out.flush()
+ for bot in sorted(all_bots):
+ if (roll[bot].status == 'success'):
+ if verbosity > 1:
+ printer(0, '==%s==' % bot)
+ printer(1, 'OK')
+ continue
+ printer(0, '==%s==' % bot)
+ for (status, name, url) in (
+ (control[bot].status, control_name, control[bot].url),
+ (roll[bot].status, roll_name, roll[bot].url)):
+ if status == 'failure':
+ printer(1, name)
+ results = BuilderHTMLParser.parse(url)
+ for result in results:
+ formatted_result = re.sub(
+ r'(\S*\.html) ', '\n__\g<1>\n', result.text)
+ printer(2, formatted_result)
+ if ('compile' in result.text
+ or '...and more' in result.text):
+ printer(3, re.sub('/[^/]*$', '/', url) + result.url)
+ else:
+ printer(1, name)
+ printer(2, status)
+ out.write('\n')
+if __name__ == '__main__':
+ if len(sys.argv) < 3:
+ print >> sys.stderr, __doc__
+ exit(1)
+ main(sys.argv[1], sys.argv[2],
+ int(os.environ.get('COMPARE_CODEREVIEW_VERBOSITY', 1)))