Use libClusterFuzz for CIFuzz fuzzing and pruning. (#6375)

Part of #6326.
author: Oliver Chang <oliverchang@users.noreply.github.com> 2021-09-13 12:16:13 +1000
committer: GitHub <noreply@github.com> 2021-09-13 12:16:13 +1000
commit: fd34d343c317c45ca1d6a34989db7656097117cb (patch)
tree: e1e32bfc526a3fa9b284d3dab76d2be0086fe870
parent: ab6233dee9fe051e037f95da9127a138a4d34025 (diff)
7 files changed, 67 insertions, 112 deletions
diff --git a/.github/workflows/infra_tests.yml b/.github/workflows/infra_tests.yml
index 63d58581..53c1ad54 100644
--- a/.github/workflows/infra_tests.yml
+++ b/.github/workflows/infra_tests.yml
@@ -31,6 +31,7 @@ jobs:
           sudo env "PATH=$PATH" python -m pip install --upgrade pip
           sudo env "PATH=$PATH" pip install -r infra/ci/requirements.txt
           sudo env "PATH=$PATH" pip install -r infra/build/functions/requirements.txt
+          sudo env "PATH=$PATH" pip install -r infra/cifuzz/requirements.txt
 
       - uses: google-github-actions/setup-gcloud@master
         with:
diff --git a/infra/cifuzz/fuzz_target.py b/infra/cifuzz/fuzz_target.py
index 7fd43a69..85486b0b 100644
--- a/infra/cifuzz/fuzz_target.py
+++ b/infra/cifuzz/fuzz_target.py
@@ -15,17 +15,19 @@
 import collections
 import logging
 import os
-import re
 import shutil
 import stat
-import subprocess
 import sys
 
 import base_runner_utils
+import config_utils
 # pylint: disable=wrong-import-position,import-error
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import utils
 
+import clusterfuzz.environment
+import clusterfuzz.fuzz
+
 logging.basicConfig(
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
     level=logging.DEBUG)
@@ -111,68 +113,58 @@ class FuzzTarget:  # pylint: disable=too-many-instance-attributes
   def prune(self):
     """Prunes the corpus and returns the result."""
     self._download_corpus()
-    prune_options = [
-        '-merge=1', self.pruned_corpus_path, self.latest_corpus_path
-    ]
-    result = self.fuzz(use_corpus=False, extra_libfuzzer_options=prune_options)
-    return FuzzResult(result.testcase, result.stacktrace,
-                      self.pruned_corpus_path)
-
-  def fuzz(self, use_corpus=True, extra_libfuzzer_options=None):
+    with clusterfuzz.environment.Environment(config_utils.DEFAULT_ENGINE,
+                                             self.config.sanitizer,
+                                             self.target_path,
+                                             interactive=True):
+      engine_impl = clusterfuzz.fuzz.get_engine(config_utils.DEFAULT_ENGINE)
+      result = engine_impl.minimize_corpus(self.target_path, [],
+                                           [self.latest_corpus_path],
+                                           self.pruned_corpus_path,
+                                           self.workspace.artifacts,
+                                           self.duration)
+
+    return FuzzResult(None, result.logs, self.pruned_corpus_path)
+
+  def fuzz(self):
     """Starts the fuzz target run for the length of time specified by duration.
 
     Returns:
       FuzzResult namedtuple with stacktrace and testcase if applicable.
     """
     logging.info('Running fuzzer: %s.', self.target_name)
-    if extra_libfuzzer_options is None:
-      extra_libfuzzer_options = []
-    env = base_runner_utils.get_env(self.config, self.workspace)
-    # TODO(metzman): Is this needed?
-    env['RUN_FUZZER_MODE'] = 'interactive'
-
-    if use_corpus:
-      # If corpus can be downloaded, use it for fuzzing.
-      self._download_corpus()
-      env['CORPUS_DIR'] = self.latest_corpus_path
-
-    options = LIBFUZZER_OPTIONS.copy() + [
-        f'-max_total_time={self.duration}',
-        # Make sure libFuzzer artifact files don't pollute $OUT.
-        f'-artifact_prefix={self.workspace.artifacts}/'
-    ] + extra_libfuzzer_options
-    command = ['run_fuzzer', self.target_name] + options
-
-    logging.info('Running command: %s', command)
-    process = subprocess.Popen(command,
-                               env=env,
-                               stdout=subprocess.PIPE,
-                               stderr=subprocess.PIPE)
 
-    try:
-      _, stderr = process.communicate(timeout=self.duration + BUFFER_TIME)
-    except subprocess.TimeoutExpired:
-      logging.error('Fuzzer %s timed out, ending fuzzing.', self.target_name)
-      return FuzzResult(None, None, self.latest_corpus_path)
+    self._download_corpus()
+    corpus_path = self.latest_corpus_path
+
+    logging.info('Starting fuzzing')
+    with clusterfuzz.environment.Environment(config_utils.DEFAULT_ENGINE,
+                                             self.config.sanitizer,
+                                             self.target_path,
+                                             interactive=True) as env:
+      engine_impl = clusterfuzz.fuzz.get_engine(config_utils.DEFAULT_ENGINE)
+      options = engine_impl.prepare(corpus_path, env.target_path, env.build_dir)
+      options.merge_back_new_testcases = False
+      options.analyze_dictionary = False
+      options.arguments.extend(LIBFUZZER_OPTIONS)
+
+      result = engine_impl.fuzz(self.target_path, options,
+                                self.workspace.artifacts, self.duration)
 
     # Libfuzzer timeout was reached.
-    if not process.returncode:
+    if not result.crashes:
       logging.info('Fuzzer %s finished with no crashes discovered.',
                    self.target_name)
       return FuzzResult(None, None, self.latest_corpus_path)
 
-    # Crash was discovered.
-    logging.info('Fuzzer %s, ended before timeout.', self.target_name)
-    testcase = get_testcase(stderr)
-    if not testcase:
-      logging.error(b'No testcase found in stacktrace: %s.', stderr)
-      return FuzzResult(None, None, self.latest_corpus_path)
+    # Only report first crash.
+    crash = result.crashes[0]
+    logging.info('Fuzzer: %s. Detected bug:\n%s', self.target_name,
+                 crash.stacktrace)
 
-    utils.binary_print(b'Fuzzer: %s. Detected bug:\n%s' %
-                       (self.target_name.encode(), stderr))
-    if self.is_crash_reportable(testcase):
+    if self.is_crash_reportable(crash.input_path):
       # We found a bug in the fuzz target and we will report it.
-      return FuzzResult(testcase, stderr, self.latest_corpus_path)
+      return FuzzResult(crash.input_path, result.logs, self.latest_corpus_path)
 
     # We found a bug but we won't report it.
     return FuzzResult(None, None, self.latest_corpus_path)
@@ -303,18 +295,3 @@ class FuzzTarget:  # pylint: disable=too-many-instance-attributes
     logging.info('The crash is not reproducible on previous build. '
                  'Code change (pr/commit) introduced crash.')
     return True
-
-
-def get_testcase(stderr_bytes):
-  """Gets the file from a fuzzer run stacktrace.
-
-  Args:
-    stderr_bytes: The bytes containing the output from the fuzzer.
-
-  Returns:
-    The path to the testcase or None if not found.
-  """
-  match = re.search(rb'\bTest unit written to (.+)', stderr_bytes)
-  if match:
-    return match.group(1).decode('utf-8')
-  return None
diff --git a/infra/cifuzz/fuzz_target_test.py b/infra/cifuzz/fuzz_target_test.py
index 25835b72..3eafdc49 100644
--- a/infra/cifuzz/fuzz_target_test.py
+++ b/infra/cifuzz/fuzz_target_test.py
@@ -136,30 +136,6 @@ class IsReproducibleTest(fake_filesystem_unittest.TestCase):
       self.assertFalse(result)
 
 
-class GetTestCaseTest(unittest.TestCase):
-  """Tests get_testcase."""
-
-  def test_valid_error_string(self):
-    """Tests that get_testcase returns the correct testcase give an error."""
-    testcase_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-                                 'test_data', 'example_crash_fuzzer_output.txt')
-    with open(testcase_path, 'rb') as test_fuzz_output:
-      parsed_testcase = fuzz_target.get_testcase(test_fuzz_output.read())
-    self.assertEqual(parsed_testcase,
-                     './crash-ad6700613693ef977ff3a8c8f4dae239c3dde6f5')
-
-  def test_invalid_error_string(self):
-    """Tests that get_testcase returns None with a bad error string."""
-    self.assertIsNone(fuzz_target.get_testcase(b''))
-    self.assertIsNone(fuzz_target.get_testcase(b' Example crash string.'))
-
-  def test_encoding(self):
-    """Tests that get_testcase accepts bytes and returns a string."""
-    fuzzer_output = b'\x8fTest unit written to ./crash-1'
-    result = fuzz_target.get_testcase(fuzzer_output)
-    self.assertTrue(isinstance(result, str))
-
-
 class IsCrashReportableTest(fake_filesystem_unittest.TestCase):
   """Tests the is_crash_reportable method of FuzzTarget."""
 
diff --git a/infra/cifuzz/requirements.txt b/infra/cifuzz/requirements.txt
index 9d84d358..3944ad5d 100644
--- a/infra/cifuzz/requirements.txt
+++ b/infra/cifuzz/requirements.txt
@@ -1 +1,2 @@
+clusterfuzz==2.5.4.post5
 requests==2.25.1
diff --git a/infra/cifuzz/run_fuzzers_test.py b/infra/cifuzz/run_fuzzers_test.py
index 21b10dcc..42bf0ea6 100644
--- a/infra/cifuzz/run_fuzzers_test.py
+++ b/infra/cifuzz/run_fuzzers_test.py
@@ -255,7 +255,7 @@ class CiFuzzTargetRunnerTest(fake_filesystem_unittest.TestCase):
     runner.initialize()
     testcase = os.path.join(workspace, 'testcase')
     self.fs.create_file(testcase)
-    stacktrace = b'stacktrace'
+    stacktrace = 'stacktrace'
     corpus_dir = 'corpus'
     self.fs.create_dir(corpus_dir)
     mock_run_fuzz_target.return_value = fuzz_target.FuzzResult(
@@ -272,7 +272,7 @@ class CiFuzzTargetRunnerTest(fake_filesystem_unittest.TestCase):
 class BatchFuzzTargetRunnerTest(fake_filesystem_unittest.TestCase):
   """Tests that BatchFuzzTargetRunnerTest works as intended."""
   WORKSPACE = 'workspace'
-  STACKTRACE = b'stacktrace'
+  STACKTRACE = 'stacktrace'
   CORPUS_DIR = 'corpus'
 
   def setUp(self):
diff --git a/infra/cifuzz/stack_parser.py b/infra/cifuzz/stack_parser.py
index 69c44bc2..b53f875f 100644
--- a/infra/cifuzz/stack_parser.py
+++ b/infra/cifuzz/stack_parser.py
@@ -18,30 +18,30 @@ import logging
 # From clusterfuzz: src/python/crash_analysis/crash_analyzer.py
 # Used to get the beginning of the stacktrace.
 STACKTRACE_TOOL_MARKERS = [
-    b'AddressSanitizer',
-    b'ASAN:',
-    b'CFI: Most likely a control flow integrity violation;',
-    b'ERROR: libFuzzer',
-    b'KASAN:',
-    b'LeakSanitizer',
-    b'MemorySanitizer',
-    b'ThreadSanitizer',
-    b'UndefinedBehaviorSanitizer',
-    b'UndefinedSanitizer',
+    'AddressSanitizer',
+    'ASAN:',
+    'CFI: Most likely a control flow integrity violation;',
+    'ERROR: libFuzzer',
+    'KASAN:',
+    'LeakSanitizer',
+    'MemorySanitizer',
+    'ThreadSanitizer',
+    'UndefinedBehaviorSanitizer',
+    'UndefinedSanitizer',
 ]
 
 # From clusterfuzz: src/python/crash_analysis/crash_analyzer.py
 # Used to get the end of the stacktrace.
 STACKTRACE_END_MARKERS = [
-    b'ABORTING',
-    b'END MEMORY TOOL REPORT',
-    b'End of process memory map.',
-    b'END_KASAN_OUTPUT',
-    b'SUMMARY:',
-    b'Shadow byte and word',
-    b'[end of stack trace]',
-    b'\nExiting',
-    b'minidump has been written',
+    'ABORTING',
+    'END MEMORY TOOL REPORT',
+    'End of process memory map.',
+    'END_KASAN_OUTPUT',
+    'SUMMARY:',
+    'Shadow byte and word',
+    '[end of stack trace]',
+    '\nExiting',
+    'minidump has been written',
 ]
 
 
@@ -82,5 +82,5 @@ def parse_fuzzer_output(fuzzer_output, parsed_output_file_path):
   summary_str = fuzzer_output[begin_stack:end_stack]
 
   # Write sections of fuzzer output to specific files.
-  with open(parsed_output_file_path, 'ab') as summary_handle:
+  with open(parsed_output_file_path, 'a') as summary_handle:
     summary_handle.write(summary_str)
diff --git a/infra/cifuzz/stack_parser_test.py b/infra/cifuzz/stack_parser_test.py
index 2c1b223a..5a631b42 100644
--- a/infra/cifuzz/stack_parser_test.py
+++ b/infra/cifuzz/stack_parser_test.py
@@ -46,7 +46,7 @@ class ParseOutputTest(fake_filesystem_unittest.TestCase):
     # Read the fuzzer output from disk.
     fuzzer_output_path = os.path.join(TEST_DATA_PATH, fuzzer_output_file)
     self.fs.add_real_file(fuzzer_output_path)
-    with open(fuzzer_output_path, 'rb') as fuzzer_output_handle:
+    with open(fuzzer_output_path, 'r') as fuzzer_output_handle:
       fuzzer_output = fuzzer_output_handle.read()
     bug_summary_path = '/bug-summary.txt'
     with mock.patch('logging.info') as mock_info:
@@ -68,7 +68,7 @@ class ParseOutputTest(fake_filesystem_unittest.TestCase):
     """Checks that no files are created when an invalid input was given."""
     artifact_path = '/bug-summary.txt'
     with mock.patch('logging.error') as mock_error:
-      stack_parser.parse_fuzzer_output(b'not a valid output_string',
+      stack_parser.parse_fuzzer_output('not a valid output_string',
                                        artifact_path)
       assert mock_error.call_count
     self.assertFalse(os.path.exists(artifact_path))
author	Oliver Chang <oliverchang@users.noreply.github.com>	2021-09-13 12:16:13 +1000
committer	GitHub <noreply@github.com>	2021-09-13 12:16:13 +1000
commit	fd34d343c317c45ca1d6a34989db7656097117cb (patch)
tree	e1e32bfc526a3fa9b284d3dab76d2be0086fe870
parent	ab6233dee9fe051e037f95da9127a138a4d34025 (diff)