aboutsummaryrefslogtreecommitdiffhomepage
path: root/infra/cifuzz/fuzz_target.py
blob: 28dd80baa81f50fc69fb2ce4d1b4974338b9d83f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A module to handle running a fuzz target for a specified amount of time."""
import logging
import os
import posixpath
import re
import stat
import subprocess
import sys
import tempfile
import time
import urllib.error
import urllib.request
import zipfile

# pylint: disable=wrong-import-position
# pylint: disable=import-error
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import utils

# TODO: Turn default logging to WARNING when CIFuzz is stable.
logging.basicConfig(
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    level=logging.DEBUG)

LIBFUZZER_OPTIONS = '-seed=1337 -len_control=0'

# Location of google cloud storage for latest OSS-Fuzz builds.
GCS_BASE_URL = 'https://storage.googleapis.com/'

# Location of cluster fuzz builds on GCS.
CLUSTERFUZZ_BUILDS = 'clusterfuzz-builds'

# The get request for the latest version of a project's build.
VERSION_STRING = '{project_name}-{sanitizer}-latest.version'

# The name to store the latest OSS-Fuzz build at.
BUILD_ARCHIVE_NAME = 'oss_fuzz_latest.zip'

# Zip file name containing the corpus.
CORPUS_ZIP_NAME = 'public.zip'

# The number of reproduce attempts for a crash.
REPRODUCE_ATTEMPTS = 10

# Seconds on top of duration until a timeout error is raised.
BUFFER_TIME = 10

# Log message for is_crash_reportable if it can't check if crash repros
# on OSS-Fuzz build.
COULD_NOT_TEST_ON_OSS_FUZZ_MESSAGE = (
    'Crash is reproducible. Could not run OSS-Fuzz build of '
    'target to determine if this pull request introduced crash. '
    'Assuming this pull request introduced crash.')


class ReproduceError(Exception):
  """Error for when we can't attempt to reproduce a crash."""


class FuzzTarget:
  """A class to manage a single fuzz target.

  Attributes:
    target_name: The name of the fuzz target.
    duration: The length of time in seconds that the target should run.
    target_path: The location of the fuzz target binary.
    out_dir: The location of where output artifacts are stored.
    project_name: The name of the relevant OSS-Fuzz project.
  """

  #pylint: disable=too-many-arguments
  def __init__(self,
               target_path,
               duration,
               out_dir,
               project_name=None,
               sanitizer='address'):
    """Represents a single fuzz target.

    Note: project_name should be none when the fuzzer being run is not
    associated with a specific OSS-Fuzz project.

    Args:
      target_path: The location of the fuzz target binary.
      duration: The length of time  in seconds the target should run.
      out_dir: The location of where the output from crashes should be stored.
      project_name: The name of the relevant OSS-Fuzz project.
    """
    self.target_name = os.path.basename(target_path)
    self.duration = int(duration)
    self.target_path = target_path
    self.out_dir = out_dir
    self.project_name = project_name
    self.sanitizer = sanitizer

  def fuzz(self):
    """Starts the fuzz target run for the length of time specified by duration.

    Returns:
      (test_case, stack trace, time in seconds) on crash or
      (None, None, time in seconds) on timeout or error.
    """
    logging.info('Fuzzer %s, started.', self.target_name)
    docker_container = utils.get_container_name()
    command = ['docker', 'run', '--rm', '--privileged']
    if docker_container:
      command += [
          '--volumes-from', docker_container, '-e', 'OUT=' + self.out_dir
      ]
    else:
      command += ['-v', '%s:%s' % (self.out_dir, '/out')]

    command += [
        '-e', 'FUZZING_ENGINE=libfuzzer', '-e', 'SANITIZER=' + self.sanitizer,
        '-e', 'RUN_FUZZER_MODE=interactive', 'gcr.io/oss-fuzz-base/base-runner',
        'bash', '-c'
    ]

    run_fuzzer_command = 'run_fuzzer {fuzz_target} {options}'.format(
        fuzz_target=self.target_name,
        options=LIBFUZZER_OPTIONS + ' -max_total_time=' + str(self.duration))

    # If corpus can be downloaded use it for fuzzing.
    latest_corpus_path = self.download_latest_corpus()
    if latest_corpus_path:
      run_fuzzer_command = run_fuzzer_command + ' ' + latest_corpus_path
    command.append(run_fuzzer_command)

    logging.info('Running command: %s', ' '.join(command))
    process = subprocess.Popen(command,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)

    try:
      _, stderr = process.communicate(timeout=self.duration + BUFFER_TIME)
    except subprocess.TimeoutExpired:
      logging.error('Fuzzer %s timed out, ending fuzzing.', self.target_name)
      return None, None

    # Libfuzzer timeout was reached.
    if not process.returncode:
      logging.info('Fuzzer %s finished with no crashes discovered.',
                   self.target_name)
      return None, None

    # Crash was discovered.
    logging.info('Fuzzer %s, ended before timeout.', self.target_name)
    test_case = self.get_test_case(stderr)
    if not test_case:
      logging.error(b'No test case found in stack trace: %s.', stderr)
      return None, None
    if self.is_crash_reportable(test_case):
      return test_case, stderr
    return None, None

  def is_reproducible(self, test_case, target_path):
    """Checks if the test case reproduces.

      Args:
        test_case: The path to the test case to be tested.
        target_path: The path to the fuzz target to be tested

      Returns:
        True if crash is reproducible and we were able to run the
        binary.

      Raises:
        ReproduceError if we can't attempt to reproduce the crash.
    """

    if not os.path.exists(target_path):
      raise ReproduceError('Target %s not found.' % target_path)

    os.chmod(target_path, stat.S_IRWXO)

    target_dirname = os.path.dirname(target_path)
    command = ['docker', 'run', '--rm', '--privileged']
    container = utils.get_container_name()
    if container:
      command += [
          '--volumes-from', container, '-e', 'OUT=' + target_dirname, '-e',
          'TESTCASE=' + test_case
      ]
    else:
      command += [
          '-v',
          '%s:/out' % target_dirname, '-v',
          '%s:/testcase' % test_case
      ]

    command += [
        '-t', 'gcr.io/oss-fuzz-base/base-runner', 'reproduce', self.target_name,
        '-runs=100'
    ]

    logging.info('Running reproduce command: %s.', ' '.join(command))
    for _ in range(REPRODUCE_ATTEMPTS):
      _, _, returncode = utils.execute(command)
      if returncode != 0:
        logging.info('Reproduce command returned: %s. Reproducible on %s.',
                     returncode, target_path)

        return True

    logging.info('Reproduce command returned 0. Not reproducible on %s.',
                 target_path)
    return False

  def is_crash_reportable(self, test_case):
    """Returns True if a crash is reportable. This means the crash is
    reproducible but not reproducible on a build from OSS-Fuzz (meaning the
    crash was introduced by this PR).

    NOTE: If no project is specified the crash is assumed introduced
    by the pull request if it is reproducible.

    Args:
      test_case: The path to the test_case that triggered the crash.

    Returns:
      True if the crash was introduced by the current pull request.

    Raises:
      ReproduceError if we can't attempt to reproduce the crash on the PR build.
    """
    if not os.path.exists(test_case):
      raise ReproduceError('Test case %s not found.' % test_case)

    try:
      reproducible_on_pr_build = self.is_reproducible(test_case,
                                                      self.target_path)
    except ReproduceError as error:
      logging.error('Could not run target when checking for reproducibility.'
                    'Please file an issue:'
                    'https://github.com/google/oss-fuzz/issues/new.')
      raise error

    if not self.project_name:
      return reproducible_on_pr_build

    if not reproducible_on_pr_build:
      logging.info(
          'Failed to reproduce the crash using the obtained test case.')
      return False

    oss_fuzz_build_dir = self.download_oss_fuzz_build()
    if not oss_fuzz_build_dir:
      # Crash is reproducible on PR build and we can't test on OSS-Fuzz build.
      logging.info(COULD_NOT_TEST_ON_OSS_FUZZ_MESSAGE)
      return True

    oss_fuzz_target_path = os.path.join(oss_fuzz_build_dir, self.target_name)
    try:
      reproducible_on_oss_fuzz_build = self.is_reproducible(
          test_case, oss_fuzz_target_path)
    except ReproduceError:
      # This happens if the project has OSS-Fuzz builds, but the fuzz target
      # is not in it (e.g. because the fuzz target is new).
      logging.info(COULD_NOT_TEST_ON_OSS_FUZZ_MESSAGE)
      return True

    if not reproducible_on_oss_fuzz_build:
      logging.info('The crash is reproducible. The crash doesn\'t reproduce '
                   'on old builds. This pull request probably introduced the '
                   'crash.')
      return True

    logging.info('The crash is reproducible without the current pull request.')
    return False

  def get_test_case(self, error_bytes):
    """Gets the file from a fuzzer run stack trace.

    Args:
      error_bytes: The bytes containing the output from the fuzzer.

    Returns:
      The path to the test case or None if not found.
    """
    match = re.search(rb'\bTest unit written to \.\/([^\s]+)', error_bytes)
    if match:
      return os.path.join(self.out_dir, match.group(1).decode('utf-8'))
    return None

  def get_lastest_build_version(self):
    """Gets the latest OSS-Fuzz build version for a projects' fuzzers.

    Returns:
      A string with the latest build version or None.
    """
    if not self.project_name:
      return None

    version = VERSION_STRING.format(project_name=self.project_name,
                                    sanitizer=self.sanitizer)
    version_url = url_join(GCS_BASE_URL, CLUSTERFUZZ_BUILDS, self.project_name,
                           version)
    try:
      response = urllib.request.urlopen(version_url)
    except urllib.error.HTTPError:
      logging.error('Error getting latest build version for %s with url %s.',
                    self.project_name, version_url)
      return None
    return response.read().decode()

  def download_oss_fuzz_build(self):
    """Downloads the latest OSS-Fuzz build from GCS.

    Returns:
      A path to where the OSS-Fuzz build is located, or None.
    """
    if not os.path.exists(self.out_dir):
      logging.error('Out directory %s does not exist.', self.out_dir)
      return None
    if not self.project_name:
      return None

    build_dir = os.path.join(self.out_dir, 'oss_fuzz_latest', self.project_name)
    if os.path.exists(os.path.join(build_dir, self.target_name)):
      return build_dir
    os.makedirs(build_dir, exist_ok=True)
    latest_build_str = self.get_lastest_build_version()
    if not latest_build_str:
      return None

    oss_fuzz_build_url = url_join(GCS_BASE_URL, CLUSTERFUZZ_BUILDS,
                                  self.project_name, latest_build_str)
    return download_and_unpack_zip(oss_fuzz_build_url, build_dir)

  def download_latest_corpus(self):
    """Downloads the latest OSS-Fuzz corpus for the target from google cloud.

    Returns:
      The local path to to corpus or None if download failed.
    """
    if not self.project_name:
      return None
    if not os.path.exists(self.out_dir):
      logging.error('Out directory %s does not exist.', self.out_dir)
      return None

    corpus_dir = os.path.join(self.out_dir, 'backup_corpus', self.target_name)
    os.makedirs(corpus_dir, exist_ok=True)
    project_qualified_fuzz_target_name = self.target_name
    qualified_name_prefix = '%s_' % self.project_name
    if not self.target_name.startswith(qualified_name_prefix):
      project_qualified_fuzz_target_name = qualified_name_prefix + \
      self.target_name
    corpus_url = url_join(
        GCS_BASE_URL,
        '{0}-backup.clusterfuzz-external.appspot.com/corpus/libFuzzer/'.format(
            self.project_name), project_qualified_fuzz_target_name,
        CORPUS_ZIP_NAME)
    return download_and_unpack_zip(corpus_url, corpus_dir)


def download_url(url, filename, num_retries=3):
  """Downloads the file located at |url|, using HTTP to |filename|.

  Args:
    url: A url to a file to download.
    filename: The path the file should be downloaded to.
    num_retries: The number of times to retry the download on
       ConnectionResetError.

  Returns:
    True on success.
  """
  sleep_time = 1

  for _ in range(num_retries):
    try:
      urllib.request.urlretrieve(url, filename)
      return True
    except urllib.error.HTTPError:
      # In these cases, retrying probably wont work since the error probably
      # means there is nothing at the URL to download.
      logging.error('Unable to download from: %s.', url)
      return False
    except ConnectionResetError:
      # These errors are more likely to be transient. Retry.
      pass
    time.sleep(sleep_time)

  logging.error('Failed to download %s, %d times.', url, num_retries)

  return False


def download_and_unpack_zip(url, out_dir):
  """Downloads and unpacks a zip file from an http url.

  Args:
    url: A url to the zip file to be downloaded and unpacked.
    out_dir: The path where the zip file should be extracted to.

  Returns:
    A path to the extracted file or None on failure.
  """
  if not os.path.exists(out_dir):
    logging.error('Out directory %s does not exist.', out_dir)
    return None

  # Gives the temporary zip file a unique identifier in the case that
  # that download_and_unpack_zip is done in parallel.
  with tempfile.NamedTemporaryFile(suffix='.zip') as tmp_file:
    result = download_url(url, tmp_file.name)
    if not result:
      return None

    try:
      with zipfile.ZipFile(tmp_file.name, 'r') as zip_file:
        zip_file.extractall(out_dir)
    except zipfile.BadZipFile:
      logging.error('Error unpacking zip from %s. Bad Zipfile.', url)
      return None
  return out_dir


def url_join(*url_parts):
  """Joins URLs together using the posix join method.

  Args:
    url_parts: Sections of a URL to be joined.

  Returns:
    Joined URL.
  """
  return posixpath.join(*url_parts)