[CIFuzz] Don't assume fuzzer output is ASCII (#4537)

[CIFuzz] Don't assume fuzzer output is ascii This is wrong because the output can be arbitrary. Instead change code that deals with the output to deal with bytes. The testcase, which is derived from the output can be decoded as UTF-8 since it will be a unix path.
author: jonathanmetzman <31354670+jonathanmetzman@users.noreply.github.com> 2020-10-19 10:10:22 -0700
committer: GitHub <noreply@github.com> 2020-10-19 10:10:22 -0700
commit: 8daba1a1a1e6d064e04a760f8c9c788938a3ce83 (patch)
tree: 2dfdf1a2f93f392f762ebbe4cc84a116b1d30ba8 /infra/cifuzz
parent: bca4ff4ee843da1b1d28866e2798a3e28d4657a6 (diff)
4 files changed, 41 insertions, 36 deletions
diff --git a/infra/cifuzz/cifuzz.py b/infra/cifuzz/cifuzz.py
index e20dc45a..11a2485b 100644
--- a/infra/cifuzz/cifuzz.py
+++ b/infra/cifuzz/cifuzz.py
@@ -40,30 +40,30 @@ import utils
 # From clusterfuzz: src/python/crash_analysis/crash_analyzer.py
 # Used to get the beginning of the stack trace.
 STACKTRACE_TOOL_MARKERS = [
-    'AddressSanitizer',
-    'ASAN:',
-    'CFI: Most likely a control flow integrity violation;',
-    'ERROR: libFuzzer',
-    'KASAN:',
-    'LeakSanitizer',
-    'MemorySanitizer',
-    'ThreadSanitizer',
-    'UndefinedBehaviorSanitizer',
-    'UndefinedSanitizer',
+    b'AddressSanitizer',
+    b'ASAN:',
+    b'CFI: Most likely a control flow integrity violation;',
+    b'ERROR: libFuzzer',
+    b'KASAN:',
+    b'LeakSanitizer',
+    b'MemorySanitizer',
+    b'ThreadSanitizer',
+    b'UndefinedBehaviorSanitizer',
+    b'UndefinedSanitizer',
 ]
 
 # From clusterfuzz: src/python/crash_analysis/crash_analyzer.py
 # Used to get the end of the stack trace.
 STACKTRACE_END_MARKERS = [
-    'ABORTING',
-    'END MEMORY TOOL REPORT',
-    'End of process memory map.',
-    'END_KASAN_OUTPUT',
-    'SUMMARY:',
-    'Shadow byte and word',
-    '[end of stack trace]',
-    '\nExiting',
-    'minidump has been written',
+    b'ABORTING',
+    b'END MEMORY TOOL REPORT',
+    b'End of process memory map.',
+    b'END_KASAN_OUTPUT',
+    b'SUMMARY:',
+    b'Shadow byte and word',
+    b'[end of stack trace]',
+    b'\nExiting',
+    b'minidump has been written',
 ]
 
 #  Default fuzz configuration.
@@ -254,7 +254,7 @@ def run_fuzzers(fuzz_seconds, workspace, project_name, sanitizer='address'):
     if not test_case or not stack_trace:
       logging.info('Fuzzer %s, finished running.', target.target_name)
     else:
-      logging.info('Fuzzer %s, detected error: %s.', target.target_name,
+      logging.info(b'Fuzzer %s, detected error: %s.', target.target_name,
                    stack_trace)
       shutil.move(test_case, os.path.join(artifacts_dir, 'test_case'))
       parse_fuzzer_output(stack_trace, artifacts_dir)
@@ -512,5 +512,5 @@ def parse_fuzzer_output(fuzzer_output, out_dir):
 
   # Write sections of fuzzer output to specific files.
   summary_file_path = os.path.join(out_dir, 'bug_summary.txt')
-  with open(summary_file_path, 'a') as summary_handle:
+  with open(summary_file_path, 'ab') as summary_handle:
     summary_handle.write(summary_str)
diff --git a/infra/cifuzz/cifuzz_test.py b/infra/cifuzz/cifuzz_test.py
index abc24469..1de18e8a 100644
--- a/infra/cifuzz/cifuzz_test.py
+++ b/infra/cifuzz/cifuzz_test.py
@@ -278,7 +278,7 @@ class ParseOutputUnitTest(unittest.TestCase):
                                     'example_crash_fuzzer_output.txt')
     test_summary_path = os.path.join(TEST_FILES_PATH, 'bug_summary_example.txt')
     with tempfile.TemporaryDirectory() as tmp_dir:
-      with open(test_output_path, 'r') as test_fuzz_output:
+      with open(test_output_path, 'rb') as test_fuzz_output:
         cifuzz.parse_fuzzer_output(test_fuzz_output.read(), tmp_dir)
       result_files = ['bug_summary.txt']
       self.assertCountEqual(os.listdir(tmp_dir), result_files)
@@ -293,7 +293,7 @@ class ParseOutputUnitTest(unittest.TestCase):
   def test_parse_invalid_output(self):
     """Checks that no files are created when an invalid input was given."""
     with tempfile.TemporaryDirectory() as tmp_dir:
-      cifuzz.parse_fuzzer_output('not a valid output_string', tmp_dir)
+      cifuzz.parse_fuzzer_output(b'not a valid output_string', tmp_dir)
       self.assertEqual(len(os.listdir(tmp_dir)), 0)
 
 
diff --git a/infra/cifuzz/fuzz_target.py b/infra/cifuzz/fuzz_target.py
index d469d855..28dd80ba 100644
--- a/infra/cifuzz/fuzz_target.py
+++ b/infra/cifuzz/fuzz_target.py
@@ -145,7 +145,7 @@ class FuzzTarget:
                                stderr=subprocess.PIPE)
 
     try:
-      _, err = process.communicate(timeout=self.duration + BUFFER_TIME)
+      _, stderr = process.communicate(timeout=self.duration + BUFFER_TIME)
     except subprocess.TimeoutExpired:
       logging.error('Fuzzer %s timed out, ending fuzzing.', self.target_name)
       return None, None
@@ -158,13 +158,12 @@ class FuzzTarget:
 
     # Crash was discovered.
     logging.info('Fuzzer %s, ended before timeout.', self.target_name)
-    err_str = err.decode('ascii')
-    test_case = self.get_test_case(err_str)
+    test_case = self.get_test_case(stderr)
     if not test_case:
-      logging.error('No test case found in stack trace: %s.', err_str)
+      logging.error(b'No test case found in stack trace: %s.', stderr)
       return None, None
     if self.is_crash_reportable(test_case):
-      return test_case, err_str
+      return test_case, stderr
     return None, None
 
   def is_reproducible(self, test_case, target_path):
@@ -282,18 +281,18 @@ class FuzzTarget:
     logging.info('The crash is reproducible without the current pull request.')
     return False
 
-  def get_test_case(self, error_string):
+  def get_test_case(self, error_bytes):
     """Gets the file from a fuzzer run stack trace.
 
     Args:
-      error_string: The stack trace string containing the error.
+      error_bytes: The bytes containing the output from the fuzzer.
 
     Returns:
-      The error test case or None if not found.
+      The path to the test case or None if not found.
     """
-    match = re.search(r'\bTest unit written to \.\/([^\s]+)', error_string)
+    match = re.search(rb'\bTest unit written to \.\/([^\s]+)', error_bytes)
     if match:
-      return os.path.join(self.out_dir, match.group(1))
+      return os.path.join(self.out_dir, match.group(1).decode('utf-8'))
     return None
 
   def get_lastest_build_version(self):
diff --git a/infra/cifuzz/fuzz_target_test.py b/infra/cifuzz/fuzz_target_test.py
index 00422659..b4600f01 100644
--- a/infra/cifuzz/fuzz_target_test.py
+++ b/infra/cifuzz/fuzz_target_test.py
@@ -125,7 +125,7 @@ class GetTestCaseUnitTest(unittest.TestCase):
     test_case_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                   'test_files',
                                   'example_crash_fuzzer_output.txt')
-    with open(test_case_path, 'r') as test_fuzz_output:
+    with open(test_case_path, 'rb') as test_fuzz_output:
       parsed_test_case = self.test_target.get_test_case(test_fuzz_output.read())
     self.assertEqual(
         parsed_test_case,
@@ -133,8 +133,14 @@ class GetTestCaseUnitTest(unittest.TestCase):
 
   def test_invalid_error_string(self):
     """Tests that get_test_case returns None with a bad error string."""
-    self.assertIsNone(self.test_target.get_test_case(''))
-    self.assertIsNone(self.test_target.get_test_case(' Example crash string.'))
+    self.assertIsNone(self.test_target.get_test_case(b''))
+    self.assertIsNone(self.test_target.get_test_case(b' Example crash string.'))
+
+  def test_encoding(self):
+    """Tests that get_test_case accepts bytes and returns a string."""
+    fuzzer_output = b'\x8fTest unit written to ./crash-1'
+    result = self.test_target.get_test_case(fuzzer_output)
+    self.assertTrue(isinstance(result, str))
 
 
 class DownloadLatestCorpusUnitTest(unittest.TestCase):
author	jonathanmetzman <31354670+jonathanmetzman@users.noreply.github.com>	2020-10-19 10:10:22 -0700
committer	GitHub <noreply@github.com>	2020-10-19 10:10:22 -0700
commit	8daba1a1a1e6d064e04a760f8c9c788938a3ce83 (patch)
tree	2dfdf1a2f93f392f762ebbe4cc84a116b1d30ba8 /infra/cifuzz
parent	bca4ff4ee843da1b1d28866e2798a3e28d4657a6 (diff)