aboutsummaryrefslogtreecommitdiffhomepage
path: root/gm/rebaseline_server/imagediffdb.py
blob: 89f9fef319a4045909553c7c840bbc19fb1004f0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
#!/usr/bin/python

"""
Copyright 2013 Google Inc.

Use of this source code is governed by a BSD-style license that can be
found in the LICENSE file.

Calulate differences between image pairs, and store them in a database.
"""

# System-level imports
import contextlib
import json
import logging
import os
import re
import shutil
import tempfile
import urllib

# Must fix up PYTHONPATH before importing from within Skia
import fix_pythonpath  # pylint: disable=W0611

# Imports from within Skia
import find_run_binary

SKPDIFF_BINARY = find_run_binary.find_path_to_program('skpdiff')

DEFAULT_IMAGE_SUFFIX = '.png'
DEFAULT_IMAGES_SUBDIR = 'images'

DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]')

RGBDIFFS_SUBDIR = 'diffs'
WHITEDIFFS_SUBDIR = 'whitediffs'

# Keys used within DiffRecord dictionary representations.
# NOTE: Keep these in sync with static/constants.js
KEY__DIFFERENCES__MAX_DIFF_PER_CHANNEL = 'maxDiffPerChannel'
KEY__DIFFERENCES__NUM_DIFF_PIXELS = 'numDifferingPixels'
KEY__DIFFERENCES__PERCENT_DIFF_PIXELS = 'percentDifferingPixels'
KEY__DIFFERENCES__PERCEPTUAL_DIFF = 'perceptualDifference'


class DiffRecord(object):
  """ Record of differences between two images. """

  def __init__(self, storage_root,
               expected_image_url, expected_image_locator,
               actual_image_url, actual_image_locator,
               expected_images_subdir=DEFAULT_IMAGES_SUBDIR,
               actual_images_subdir=DEFAULT_IMAGES_SUBDIR,
               image_suffix=DEFAULT_IMAGE_SUFFIX):
    """Download this pair of images (unless we already have them on local disk),
    and prepare a DiffRecord for them.

    TODO(epoger): Make this asynchronously download images, rather than blocking
    until the images have been downloaded and processed.

    Args:
      storage_root: root directory on local disk within which we store all
          images
      expected_image_url: file or HTTP url from which we will download the
          expected image
      expected_image_locator: a unique ID string under which we will store the
          expected image within storage_root (probably including a checksum to
          guarantee uniqueness)
      actual_image_url: file or HTTP url from which we will download the
          actual image
      actual_image_locator: a unique ID string under which we will store the
          actual image within storage_root (probably including a checksum to
          guarantee uniqueness)
      expected_images_subdir: the subdirectory expected images are stored in.
      actual_images_subdir: the subdirectory actual images are stored in.
      image_suffix: the suffix of images.
    """
    expected_image_locator = _sanitize_locator(expected_image_locator)
    actual_image_locator = _sanitize_locator(actual_image_locator)

    # Download the expected/actual images, if we don't have them already.
    # TODO(rmistry): Add a parameter that just tries to use already-present
    # image files rather than downloading them.
    expected_image_file = os.path.join(
        storage_root, expected_images_subdir,
        str(expected_image_locator) + image_suffix)
    actual_image_file = os.path.join(
        storage_root, actual_images_subdir,
        str(actual_image_locator) + image_suffix)
    try:
      _download_file(expected_image_file, expected_image_url)
    except Exception:
      logging.exception('unable to download expected_image_url %s to file %s' %
                        (expected_image_url, expected_image_file))
      raise
    try:
      _download_file(actual_image_file, actual_image_url)
    except Exception:
      logging.exception('unable to download actual_image_url %s to file %s' %
                        (actual_image_url, actual_image_file))
      raise

    # Get all diff images and values from skpdiff binary.
    skpdiff_output_dir = tempfile.mkdtemp()
    try:
      skpdiff_summary_file = os.path.join(skpdiff_output_dir,
                                          'skpdiff-output.json')
      skpdiff_rgbdiff_dir = os.path.join(skpdiff_output_dir, 'rgbDiff')
      skpdiff_whitediff_dir = os.path.join(skpdiff_output_dir, 'whiteDiff')
      expected_img = os.path.join(storage_root, expected_images_subdir,
                                  str(expected_image_locator) + image_suffix)
      actual_img = os.path.join(storage_root, actual_images_subdir,
                                str(actual_image_locator) + image_suffix)

      # TODO: Call skpdiff ONCE for all image pairs, instead of calling it
      # repeatedly.  This will allow us to parallelize a lot more work.
      find_run_binary.run_command(
          [SKPDIFF_BINARY, '-p', expected_img, actual_img,
           '--jsonp', 'false',
           '--output', skpdiff_summary_file,
           '--differs', 'perceptual', 'different_pixels',
           '--rgbDiffDir', skpdiff_rgbdiff_dir,
           '--whiteDiffDir', skpdiff_whitediff_dir,
           ])

      # Get information out of the skpdiff_summary_file.
      with contextlib.closing(open(skpdiff_summary_file)) as fp:
        data = json.load(fp)

      # For now, we can assume there is only one record in the output summary,
      # since we passed skpdiff only one pair of images.
      record = data['records'][0]
      self._width = record['width']
      self._height = record['height']
      # TODO: make max_diff_per_channel a tuple instead of a list, because the
      # structure is meaningful (first element is red, second is green, etc.)
      # See http://stackoverflow.com/a/626871
      self._max_diff_per_channel = [
          record['maxRedDiff'], record['maxGreenDiff'], record['maxBlueDiff']]
      rgb_diff_path = record['rgbDiffPath']
      white_diff_path = record['whiteDiffPath']
      per_differ_stats = record['diffs']
      for stats in per_differ_stats:
        differ_name = stats['differName']
        if differ_name == 'different_pixels':
          self._num_pixels_differing = stats['pointsOfInterest']
        elif differ_name == 'perceptual':
          perceptual_similarity = stats['result']

      # skpdiff returns the perceptual similarity; convert it to get the
      # perceptual difference percentage.
      # skpdiff outputs -1 if the images are different sizes. Treat any
      # output that does not lie in [0, 1] as having 0% perceptual
      # similarity.
      if not 0 <= perceptual_similarity <= 1:
        perceptual_similarity = 0
      self._perceptual_difference = 100 - (perceptual_similarity * 100)

      # Store the rgbdiff and whitediff images generated above.
      diff_image_locator = _get_difference_locator(
          expected_image_locator=expected_image_locator,
          actual_image_locator=actual_image_locator)
      basename = str(diff_image_locator) + image_suffix
      _mkdir_unless_exists(os.path.join(storage_root, RGBDIFFS_SUBDIR))
      _mkdir_unless_exists(os.path.join(storage_root, WHITEDIFFS_SUBDIR))
      # TODO: Modify skpdiff's behavior so we can tell it exactly where to
      # write the image files into, rather than having to move them around
      # after skpdiff writes them out.
      shutil.copyfile(rgb_diff_path,
                      os.path.join(storage_root, RGBDIFFS_SUBDIR, basename))
      shutil.copyfile(white_diff_path,
                      os.path.join(storage_root, WHITEDIFFS_SUBDIR, basename))

    finally:
      shutil.rmtree(skpdiff_output_dir)

  # TODO(epoger): Use properties instead of getters throughout.
  # See http://stackoverflow.com/a/6618176
  def get_num_pixels_differing(self):
    """Returns the absolute number of pixels that differ."""
    return self._num_pixels_differing

  def get_percent_pixels_differing(self):
    """Returns the percentage of pixels that differ, as a float between
    0 and 100 (inclusive)."""
    return ((float(self._num_pixels_differing) * 100) /
            (self._width * self._height))

  def get_perceptual_difference(self):
    """Returns the perceptual difference percentage."""
    return self._perceptual_difference

  def get_max_diff_per_channel(self):
    """Returns the maximum difference between the expected and actual images
    for each R/G/B channel, as a list."""
    return self._max_diff_per_channel

  def as_dict(self):
    """Returns a dictionary representation of this DiffRecord, as needed when
    constructing the JSON representation."""
    return {
        KEY__DIFFERENCES__NUM_DIFF_PIXELS: self._num_pixels_differing,
        KEY__DIFFERENCES__PERCENT_DIFF_PIXELS:
            self.get_percent_pixels_differing(),
        KEY__DIFFERENCES__MAX_DIFF_PER_CHANNEL: self._max_diff_per_channel,
        KEY__DIFFERENCES__PERCEPTUAL_DIFF: self._perceptual_difference,
    }


class ImageDiffDB(object):
  """ Calculates differences between image pairs, maintaining a database of
  them for download."""

  def __init__(self, storage_root):
    """
    Args:
      storage_root: string; root path within the DB will store all of its stuff
    """
    self._storage_root = storage_root

    # Dictionary of DiffRecords, keyed by (expected_image_locator,
    # actual_image_locator) tuples.
    self._diff_dict = {}

  @property
  def storage_root(self):
    return self._storage_root

  def add_image_pair(self,
                     expected_image_url, expected_image_locator,
                     actual_image_url, actual_image_locator):
    """Download this pair of images (unless we already have them on local disk),
    and prepare a DiffRecord for them.

    TODO(epoger): Make this asynchronously download images, rather than blocking
    until the images have been downloaded and processed.
    When we do that, we should probably add a new method that will block
    until all of the images have been downloaded and processed.  Otherwise,
    we won't know when it's safe to start calling get_diff_record().
    jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a
    thread-pool/worker queue at a higher level that just uses ImageDiffDB?

    Args:
      expected_image_url: file or HTTP url from which we will download the
          expected image
      expected_image_locator: a unique ID string under which we will store the
          expected image within storage_root (probably including a checksum to
          guarantee uniqueness)
      actual_image_url: file or HTTP url from which we will download the
          actual image
      actual_image_locator: a unique ID string under which we will store the
          actual image within storage_root (probably including a checksum to
          guarantee uniqueness)
    """
    expected_image_locator = _sanitize_locator(expected_image_locator)
    actual_image_locator = _sanitize_locator(actual_image_locator)
    key = (expected_image_locator, actual_image_locator)
    if not key in self._diff_dict:
      try:
        new_diff_record = DiffRecord(
            self._storage_root,
            expected_image_url=expected_image_url,
            expected_image_locator=expected_image_locator,
            actual_image_url=actual_image_url,
            actual_image_locator=actual_image_locator)
      except Exception:
        # If we can't create a real DiffRecord for this (expected, actual) pair,
        # store None and the UI will show whatever information we DO have.
        # Fixes http://skbug.com/2368 .
        logging.exception(
            'got exception while creating a DiffRecord for '
            'expected_image_url=%s , actual_image_url=%s; returning None' % (
                expected_image_url, actual_image_url))
        new_diff_record = None
      self._diff_dict[key] = new_diff_record

  def get_diff_record(self, expected_image_locator, actual_image_locator):
    """Returns the DiffRecord for this image pair.

    Raises a KeyError if we don't have a DiffRecord for this image pair.
    """
    key = (_sanitize_locator(expected_image_locator),
           _sanitize_locator(actual_image_locator))
    return self._diff_dict[key]


# Utility functions

def _download_file(local_filepath, url):
  """Download a file from url to local_filepath, unless it is already there.

  Args:
    local_filepath: path on local disk where the image should be stored
    url: URL from which we can download the image if we don't have it yet
  """
  if not os.path.exists(local_filepath):
    _mkdir_unless_exists(os.path.dirname(local_filepath))
    with contextlib.closing(urllib.urlopen(url)) as url_handle:
      with open(local_filepath, 'wb') as file_handle:
        shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)


def _mkdir_unless_exists(path):
  """Unless path refers to an already-existing directory, create it.

  Args:
    path: path on local disk
  """
  if not os.path.isdir(path):
    os.makedirs(path)


def _sanitize_locator(locator):
  """Returns a sanitized version of a locator (one in which we know none of the
  characters will have special meaning in filenames).

  Args:
    locator: string, or something that can be represented as a string
  """
  return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator))


def _get_difference_locator(expected_image_locator, actual_image_locator):
  """Returns the locator string used to look up the diffs between expected_image
  and actual_image.

  We must keep this function in sync with getImageDiffRelativeUrl() in
  static/loader.js

  Args:
    expected_image_locator: locator string pointing at expected image
    actual_image_locator: locator string pointing at actual image

  Returns: already-sanitized locator where the diffs between expected and
      actual images can be found
  """
  return "%s-vs-%s" % (_sanitize_locator(expected_image_locator),
                       _sanitize_locator(actual_image_locator))