gm/rebaseline_server/download_actuals.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289

#!/usr/bin/python

"""
Copyright 2014 Google Inc.

Use of this source code is governed by a BSD-style license that can be
found in the LICENSE file.

Download actual GM results for a particular builder.
"""

# System-level imports
import httplib
import logging
import optparse
import os
import posixpath
import re
import urllib2

# Must fix up PYTHONPATH before importing from within Skia
import rs_fixpypath  # pylint: disable=W0611

# Imports from within Skia
from py.utils import gs_utils
from py.utils import url_utils
import buildbot_globals
import gm_json


GM_SUMMARIES_BUCKET = buildbot_globals.Get('gm_summaries_bucket')
DEFAULT_ACTUALS_BASE_URL = (
    'http://storage.googleapis.com/%s' % GM_SUMMARIES_BUCKET)
DEFAULT_JSON_FILENAME = 'actual-results.json'


class Download(object):

  def __init__(self, actuals_base_url=DEFAULT_ACTUALS_BASE_URL,
               json_filename=DEFAULT_JSON_FILENAME,
               gm_actuals_root_url=gm_json.GM_ACTUALS_ROOT_HTTP_URL):
    """
    Args:
      actuals_base_url: URL pointing at the root directory
          containing all actual-results.json files, e.g.,
          http://domain.name/path/to/dir  OR
          file:///absolute/path/to/localdir
      json_filename: The JSON filename to read from within each directory.
      gm_actuals_root_url: Base URL under which the actually-generated-by-bots
          GM images are stored.
    """
    self._actuals_base_url = actuals_base_url
    self._json_filename = json_filename
    self._gm_actuals_root_url = gm_actuals_root_url
    self._image_filename_re = re.compile(gm_json.IMAGE_FILENAME_PATTERN)

  def fetch(self, builder_name, dest_dir):
    """ Downloads actual GM results for a particular builder.

    Args:
      builder_name: which builder to download results of
      dest_dir: path to directory where the image files will be written;
                if the directory does not exist yet, it will be created

    TODO(epoger): Display progress info.  Right now, it can take a long time
    to download all of the results, and there is no indication of progress.

    TODO(epoger): Download multiple images in parallel to speed things up.
    """
    json_url = posixpath.join(self._actuals_base_url, builder_name,
                              self._json_filename)
    json_contents = urllib2.urlopen(json_url).read()
    results_dict = gm_json.LoadFromString(json_contents)

    actual_results_dict = results_dict[gm_json.JSONKEY_ACTUALRESULTS]
    for result_type in sorted(actual_results_dict.keys()):
      results_of_this_type = actual_results_dict[result_type]
      if not results_of_this_type:
        continue
      for image_name in sorted(results_of_this_type.keys()):
        (test, config) = self._image_filename_re.match(image_name).groups()
        (hash_type, hash_digest) = results_of_this_type[image_name]
        source_url = gm_json.CreateGmActualUrl(
            test_name=test, hash_type=hash_type, hash_digest=hash_digest,
            gm_actuals_root_url=self._gm_actuals_root_url)
        dest_path = os.path.join(dest_dir, config, test + '.png')
        url_utils.copy_contents(source_url=source_url, dest_path=dest_path,
                                create_subdirs_if_needed=True)


def get_builders_list(summaries_bucket=GM_SUMMARIES_BUCKET):
  """ Returns the list of builders we have actual results for.

  Args:
    summaries_bucket: Google Cloud Storage bucket containing the summary
        JSON files
  """
  dirs, _ = gs_utils.GSUtils().list_bucket_contents(bucket=GM_SUMMARIES_BUCKET)
  return dirs


class ActualLocation(object):
  def __init__(self, bucket, path, generation):
    self.bucket = bucket
    self.path = path
    self.generation = generation


class TipOfTreeActuals(object):
  def __init__(self, summaries_bucket=GM_SUMMARIES_BUCKET,
               json_filename=DEFAULT_JSON_FILENAME):
    """
    Args:
      summaries_bucket: URL pointing at the root directory
          containing all actual-results.json files, e.g.,
          http://domain.name/path/to/dir  OR
          file:///absolute/path/to/localdir
      json_filename: The JSON filename to read from within each directory.
    """
    self._json_filename = json_filename
    self._summaries_bucket = summaries_bucket

  def description(self):
    return 'gm_summaries_bucket %s' % (self._summaries_bucket,)

  def get_builders(self):
    """ Returns the list of builders we have actual results for.
    {builder:string -> ActualLocation}
    """
    dirs = get_builders_list(self._summaries_bucket)
    result = dict()
    for builder in dirs:
      result[builder] = ActualLocation(
          self._summaries_bucket,
          "%s/%s" % (builder, self._json_filename),
          None)
    return result


class RietveldIssueActuals(object):
  def __init__(self, issue, json_filename=DEFAULT_JSON_FILENAME):
    """
    Args:
      issue: The rietveld issue from which to obtain actuals.
      json_filename: The JSON filename to read from within each directory.
    """
    self._issue = issue
    self._json_filename = json_filename

  def description(self):
    return 'rietveld issue %s' % (self._issue,)

  def get_builders(self):
    """ Returns the actuals for the given rietveld issue's tryjobs.
    {builder:string -> ActualLocation}

    e.g.
    {'Test-Android-Xoom-Tegra2-Arm7-Release': (
        'chromium-skia-gm-summaries',
        'Test-Android-Xoom-Tegra2-Arm7-Release-Trybot/actual-results.json',
        '1415041165535000')}
    """
    result = dict()
    json_filename_re = re.compile(
        'Created: gs://([^/]+)/((?:[^/]+/)+%s)#(\d+)'
        % re.escape(self._json_filename))
    codereview_api_url = 'https://codereview.chromium.org/api'
    upload_gm_step_url = '/steps/Upload GM Results/logs/stdio'

    logging.info('Fetching issue %s ...' % (self._issue,))
    json_issue_url = '%s/%s' % (codereview_api_url, self._issue)
    json_issue_data = urllib2.urlopen(json_issue_url).read()
    issue_dict = gm_json.LoadFromString(json_issue_data)

    patchsets = issue_dict.get("patchsets", [])
    patchset = patchsets[-1]
    if not patchset:
      logging.warning('No patchsets for rietveld issue %s.' % (self._issue,))
      return result

    logging.info('Fetching issue %s patch %s...' % (self._issue, patchset))
    json_patchset_url = '%s/%s/%s' % (codereview_api_url, self._issue, patchset)
    json_patchset_data = urllib2.urlopen(json_patchset_url).read()
    patchset_dict = gm_json.LoadFromString(json_patchset_data)

    # try_job_results is ordered reverse chronologically
    try_job_results = patchset_dict.get('try_job_results', [])
    for try_job_result in try_job_results:
      try_builder = try_job_result.get('builder', '<bad builder>')
      if not try_builder.endswith('-Trybot'):
        logging.warning('Builder %s is not a trybot?' % (try_builder,))
        continue
      builder = try_builder[:-len('-Trybot')]
      if builder in result:
        continue

      logging.info('Fetching issue %s patch %s try %s...' %
                  (self._issue, patchset, try_builder))
      build_url = try_job_result.get('url', '<bad url>')
      if build_url is None:
        logging.warning('Builder %s has not started.' % (try_builder,))
        continue
      gm_upload_output_url = build_url + urllib2.quote(upload_gm_step_url)
      logging.info('Fetching %s ...' % (gm_upload_output_url,))

      # Tryjobs might not produce the step, but don't let that fail everything.
      gm_upload_output = None
      try:
        gm_upload_output = urllib2.urlopen(gm_upload_output_url).read()
      except (urllib2.HTTPError, urllib2.URLError, httplib.HTTPException) as e:
        logging.warning(e)
      except Exception:
        logging.exception('Error opening %s .' % (gm_upload_output_url,))
      if not gm_upload_output:
        logging.warning('Could not fetch %s .' % (gm_upload_output_url,))
        continue

      json_filename_match = json_filename_re.search(gm_upload_output)
      if json_filename_match:
        logging.info('Found issue %s patch %s try %s result gs://%s/%s#%s .' %
                    (self._issue, patchset, builder,
                    json_filename_match.group(1),
                    json_filename_match.group(2),
                    json_filename_match.group(3)))
        result[builder] = ActualLocation(json_filename_match.group(1),
                                         json_filename_match.group(2),
                                         json_filename_match.group(3))
      else:
        logging.warning('Did not find %s for issue %s patch %s try %s.' %
                      (self._json_filename, self._issue, patchset, try_builder))

    return result


def main():
  parser = optparse.OptionParser()
  required_params = []
  parser.add_option('--actuals-base-url',
                    action='store', type='string',
                    default=DEFAULT_ACTUALS_BASE_URL,
                    help=('Base URL from which to read files containing JSON '
                          'summaries of actual GM results; defaults to '
                          '"%default".'))
  required_params.append('builder')
  # TODO(epoger): Before https://codereview.chromium.org/309653005 , when this
  # tool downloaded the JSON summaries from skia-autogen, it had the ability
  # to get results as of a specific revision number.  We should add similar
  # functionality when retrieving the summaries from Google Storage.
  parser.add_option('--builder',
                    action='store', type='string',
                    help=('REQUIRED: Which builder to download results for. '
                          'To see a list of builders, run with the '
                          '--list-builders option set.'))
  required_params.append('dest_dir')
  parser.add_option('--dest-dir',
                    action='store', type='string',
                    help=('REQUIRED: Directory where all images should be '
                          'written. If this directory does not exist yet, it '
                          'will be created.'))
  parser.add_option('--json-filename',
                    action='store', type='string',
                    default=DEFAULT_JSON_FILENAME,
                    help=('JSON summary filename to read for each builder; '
                          'defaults to "%default".'))
  parser.add_option('--list-builders', action='store_true',
                    help=('List all available builders.'))
  (params, remaining_args) = parser.parse_args()

  if params.list_builders:
    print '\n'.join(get_builders_list())
    return

  # Make sure all required options were set,
  # and that there were no items left over in the command line.
  for required_param in required_params:
    if not getattr(params, required_param):
      raise Exception('required option \'%s\' was not set' % required_param)
  if len(remaining_args) is not 0:
    raise Exception('extra items specified in the command line: %s' %
                    remaining_args)

  downloader = Download(actuals_base_url=params.actuals_base_url)
  downloader.fetch(builder_name=params.builder,
                   dest_dir=params.dest_dir)


if __name__ == '__main__':
  main()