diff options
author | 2022-04-06 15:31:34 +0100 | |
---|---|---|
committer | 2022-04-06 10:31:34 -0400 | |
commit | 0cb820e5af64f279f66d0c0d3c93b7437ecabe91 (patch) | |
tree | db17b5d39d7ef7c1fc899676fff3b4742c0c8e78 /infra/base-images | |
parent | 46121987d0510526e8112cff934bd745e6967fe7 (diff) |
infra: add Python coverage support (#7298)
* infra: add Python coverage support
* update python coverage helper script
* nits
* switch from commands to python
* pin coverage package
* switch to single quote strings throughout
* nit
* fix style
* fix style
* fix ci
* fix ci
Diffstat (limited to 'infra/base-images')
-rw-r--r-- | infra/base-images/base-builder/Dockerfile | 2 | ||||
-rwxr-xr-x | infra/base-images/base-builder/compile | 4 | ||||
-rwxr-xr-x | infra/base-images/base-builder/compile_python_fuzzer | 68 | ||||
-rwxr-xr-x | infra/base-images/base-builder/install_python.sh | 2 | ||||
-rw-r--r-- | infra/base-images/base-builder/python_coverage_helper.py | 118 | ||||
-rwxr-xr-x | infra/base-images/base-runner/Dockerfile | 5 | ||||
-rwxr-xr-x | infra/base-images/base-runner/coverage | 32 | ||||
-rwxr-xr-x | infra/base-images/base-runner/python_coverage_runner_help.py | 83 |
8 files changed, 310 insertions, 4 deletions
diff --git a/infra/base-images/base-builder/Dockerfile b/infra/base-images/base-builder/Dockerfile index afaf860e..276300df 100644 --- a/infra/base-images/base-builder/Dockerfile +++ b/infra/base-images/base-builder/Dockerfile @@ -148,6 +148,8 @@ RUN precompile_honggfuzz COPY cargo compile compile_afl compile_dataflow compile_libfuzzer compile_honggfuzz \ compile_go_fuzzer \ compile_native_go_fuzzer \ + compile_python_fuzzer \ + python_coverage_helper.py \ debug_afl srcmap \ write_labels.py bazel_build_fuzz_tests \ # Go, java, and swift installation scripts. diff --git a/infra/base-images/base-builder/compile b/infra/base-images/base-builder/compile index 3c7736c6..9995b67e 100755 --- a/infra/base-images/base-builder/compile +++ b/infra/base-images/base-builder/compile @@ -42,8 +42,8 @@ if [ "$FUZZING_LANGUAGE" = "python" ]; then echo "ERROR: Python projects can be fuzzed with libFuzzer engine only." exit 1 fi - if [ "$SANITIZER" != "address" ] && [ "$SANITIZER" != "undefined" ]; then - echo "ERROR: Python projects can be fuzzed with AddressSanitizer or UndefinedBehaviorSanitizer only." + if [ "$SANITIZER" != "address" ] && [ "$SANITIZER" != "undefined" ] && [ "$SANITIZER" != "coverage" ]; then + echo "ERROR: Python projects can be fuzzed with AddressSanitizer or UndefinedBehaviorSanitizer or Coverage only." exit 1 fi if [ "$ARCHITECTURE" != "x86_64" ]; then diff --git a/infra/base-images/base-builder/compile_python_fuzzer b/infra/base-images/base-builder/compile_python_fuzzer new file mode 100755 index 00000000..36c3626a --- /dev/null +++ b/infra/base-images/base-builder/compile_python_fuzzer @@ -0,0 +1,68 @@ +#!/bin/bash -eu +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +fuzzer_path=$1 +fuzzer_basename=$(basename -s .py $fuzzer_path) +fuzzer_package=${fuzzer_basename}.pkg + +PYFUZZ_WORKPATH=$SRC/pyfuzzworkdir/ +FUZZ_WORKPATH=$PYFUZZ_WORKPATH/$fuzzer_basename + +# In coverage mode prepend coverage logic to the fuzzer source +if [[ $SANITIZER = *coverage* ]]; then + cat <<EOF >> coverage_wrapper.py +###### Coverage stub +import atexit +import coverage +cov = coverage.coverage(data_file='.coverage', cover_pylib=True) +cov.start() +# Register an exist handler that will print coverage +def exit_handler(): + cov.stop() + cov.save() +atexit.register(exit_handler) +####### End of coverage stub +EOF + + # Prepend stub and create tmp file + cat coverage_wrapper.py $fuzzer_path > tmp_fuzzer_coverage.py + + # Overwrite existing fuzzer with new fuzzer that has stub + mv tmp_fuzzer_coverage.py $fuzzer_path +fi + +rm -rf $PYFUZZ_WORKPATH +mkdir $PYFUZZ_WORKPATH $FUZZ_WORKPATH + +pyinstaller --distpath $OUT --workpath=$FUZZ_WORKPATH --onefile --name $fuzzer_package $fuzzer_path + +# In coverage mode save source files of dependencies in pyinstalled binary +if [[ $SANITIZER = *coverage* ]]; then + rm -rf /medio/ + python3 /usr/local/bin/python_coverage_helper.py $FUZZ_WORKPATH "/medio" + zip -r $fuzzer_package.deps.zip /medio + mv $fuzzer_package.deps.zip $OUT/ +fi + +# Create execution wrapper. +echo "#!/bin/sh +# LLVMFuzzerTestOneInput for fuzzer detection. +this_dir=\$(dirname \"\$0\") +LD_PRELOAD=\$this_dir/sanitizer_with_fuzzer.so \ +ASAN_OPTIONS=\$ASAN_OPTIONS:symbolize=1:external_symbolizer_path=\$this_dir/llvm-symbolizer:detect_leaks=0 \ +\$this_dir/$fuzzer_package \$@" > $OUT/$fuzzer_basename +chmod +x $OUT/$fuzzer_basename diff --git a/infra/base-images/base-builder/install_python.sh b/infra/base-images/base-builder/install_python.sh index 4d05f1ae..adb53040 100755 --- a/infra/base-images/base-builder/install_python.sh +++ b/infra/base-images/base-builder/install_python.sh @@ -17,5 +17,5 @@ echo "ATHERIS INSTALL" unset CFLAGS CXXFLAGS -pip3 install -v --no-cache-dir "atheris>=2.0.6" "pyinstaller==4.1" +pip3 install -v --no-cache-dir "atheris>=2.0.6" "pyinstaller==4.1" "coverage==6.3.2" rm -rf /tmp/* diff --git a/infra/base-images/base-builder/python_coverage_helper.py b/infra/base-images/base-builder/python_coverage_helper.py new file mode 100644 index 00000000..93c917f1 --- /dev/null +++ b/infra/base-images/base-builder/python_coverage_helper.py @@ -0,0 +1,118 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Extracts file paths to copy files from pyinstaller-generated executables""" +import os +import sys +import shutil +import zipfile + + +# Finds all *.toc files in ./workpath and reads these files in order to +# identify Python files associated with a pyinstaller packaged executable. +# Copies all of the Python files to a temporary directory (/medio) following +# the original directory structure. +def get_all_files_from_toc(toc_file, file_path_set): + """ + Extract filepaths from a .toc file and add to file_path_set + """ + with open(toc_file, 'rb') as toc_file_fd: + for line in toc_file_fd: + try: + line = line.decode() + except: # pylint:disable=bare-except + continue + if '.py' not in line: + continue + + split_line = line.split(' ') + for word in split_line: + word = word.replace('\'', '').replace(',', '').replace('\n', '') + if '.py' not in word: + continue + # Check if .egg is in the path and if so we need to split it + if os.path.isfile(word): + file_path_set.add(word) + elif '.egg' in word: # check if this is an egg + egg_path_split = word.split('.egg') + if len(egg_path_split) != 2: + continue + egg_path = egg_path_split[0] + '.egg' + + print('Unzipping contents of %s' % egg_path) + + # We have an egg. This needs to be unzipped and then replaced + # with the unzipped data. + tmp_dir_name = 'zipdcontents' + if os.path.isdir(tmp_dir_name): + shutil.rmtree(tmp_dir_name) + + # unzip egg and replace path with unzipped content + with zipfile.ZipFile(egg_path, 'r') as zip_f: + zip_f.extractall(tmp_dir_name) + os.remove(egg_path) + shutil.copytree(tmp_dir_name, egg_path) + + # Now the lines should be accessible, so check again + if os.path.isfile(word): + file_path_set.add(word) + + +def create_file_structure_from_tocs(work_path, out_path): + """ + Extract the Python files that are added as paths in the output of + a pyinstaller operation. The files are determined by reading through + all of the *.toc files in the workpath of pyinstaller. + + The files will be copied into the out_path using a similar file path + as they originally are. If any archive (.egg) files are present in the + .toc files, then unzip the archives and substitute the archive for the + unzipped content, i.e. we will extract the archives and collect the source + files. + """ + print('Extracts files from the pyinstaller workpath') + file_path_set = set() + for path1 in os.listdir(work_path): + full_path = os.path.join(work_path, path1) + if not os.path.isdir(full_path): + continue + + # We have a directory + for path2 in os.listdir(full_path): + if not '.toc' in path2: + continue + full_toc_file = os.path.join(full_path, path2) + get_all_files_from_toc(full_toc_file, file_path_set) + + for file_path in file_path_set: + relative_src = file_path[1:] if file_path[0] == '/' else file_path + dst_path = os.path.join(out_path, relative_src) + os.makedirs(os.path.dirname(dst_path), exist_ok=True) + shutil.copy(file_path, dst_path) + + +def main(): + """ + Main handler. + """ + if len(sys.argv) != 3: + print('Use: python3 python_coverage_helper.py pyinstaller_workpath ' + 'destination_for_output') + sys.exit(1) + work_path = sys.argv[1] + out_path = sys.argv[2] + create_file_structure_from_tocs(work_path, out_path) + + +if __name__ == '__main__': + main() diff --git a/infra/base-images/base-runner/Dockerfile b/infra/base-images/base-runner/Dockerfile index ca1c69bd..b666bef9 100755 --- a/infra/base-images/base-runner/Dockerfile +++ b/infra/base-images/base-runner/Dockerfile @@ -46,6 +46,7 @@ RUN apt-get update && apt-get install -y \ python3 \ python3-pip \ python3-setuptools \ + rsync \ unzip \ wget \ zip --no-install-recommends @@ -56,7 +57,8 @@ RUN git clone https://chromium.googlesource.com/chromium/src/tools/code_coverage git checkout edba4873b5e8a390e977a64c522db2df18a8b27d && \ pip3 install wheel && \ pip3 install -r requirements.txt && \ - pip3 install MarkupSafe==0.23 + pip3 install MarkupSafe==0.23 && \ + pip3 install coverage # Default environment options for various sanitizers. # Note that these match the settings used in ClusterFuzz and @@ -119,4 +121,5 @@ COPY bad_build_check \ targets_list \ test_all.py \ test_one.py \ + python_coverage_runner_help.py \ /usr/local/bin/ diff --git a/infra/base-images/base-runner/coverage b/infra/base-images/base-runner/coverage index 0d10d149..15532fda 100755 --- a/infra/base-images/base-runner/coverage +++ b/infra/base-images/base-runner/coverage @@ -151,6 +151,14 @@ function run_go_fuzz_target { $SYSGOPATH/bin/gocovsum $DUMPS_DIR/$target.profdata > $FUZZER_STATS_DIR/$target.json } +function run_python_fuzz_target { + local target=$1 + local zipped_sources="$DUMPS_DIR/$target.deps.zip" + local corpus_real="$CORPUS_DIR/${target}" + $OUT/$target.pkg $corpus_real -atheris_runs=$(ls -la $corpus_real | wc -l) + mv .coverage $OUT/.coverage_$target +} + function run_java_fuzz_target { local target=$1 @@ -224,6 +232,9 @@ for fuzz_target in $FUZZ_TARGETS; do grep "FUZZ_CORPUS_DIR" $fuzz_target > /dev/null 2>&1 || continue fi run_go_fuzz_target $fuzz_target & + elif [[ $FUZZING_LANGUAGE == "python" ]]; then + echo "Entering python fuzzing" + run_python_fuzz_target $fuzz_target elif [[ $FUZZING_LANGUAGE == "jvm" ]]; then # Continue if not a fuzz target. if [[ $FUZZING_ENGINE != "none" ]]; then @@ -271,6 +282,27 @@ if [[ $FUZZING_LANGUAGE == "go" ]]; then mv merged.data $REPORT_ROOT_DIR/heap.prof #TODO some proxy for go tool pprof -http=127.0.0.1:8001 $DUMPS_DIR/cpu.prof echo "Finished generating code coverage report for Go fuzz targets." +elif [[ $FUZZING_LANGUAGE == "python" ]]; then + # Extract source files from all dependency zip folders + mkdir -p /pythoncovmergedfiles/medio + PYCOVDIR=/pycovdir/ + mkdir $PYCOVDIR + for fuzzer in $FUZZ_TARGETS; do + unzip $OUT/$fuzzer.deps.zip + rsync -r ./medio /pythoncovmergedfiles/medio + rm -rf ./medio + + # Translate paths in unzipped folders to paths that we can use + mv $OUT/.coverage_$fuzzer .coverage + python3 /usr/local/bin/python_coverage_runner_help.py translate /pythoncovmergedfiles/medio + cp .new_coverage $PYCOVDIR/.coverage_$fuzzer + done + + # Combine coverage + cd $PYCOVDIR + python3 /usr/local/bin/python_coverage_runner_help.py combine .coverage_* + python3 /usr/local/bin/python_coverage_runner_help.py html + mv htmlcov $REPORT_ROOT_DIR/ elif [[ $FUZZING_LANGUAGE == "jvm" ]]; then # From this point on the script does not tolerate any errors. diff --git a/infra/base-images/base-runner/python_coverage_runner_help.py b/infra/base-images/base-runner/python_coverage_runner_help.py new file mode 100755 index 00000000..e22e84b3 --- /dev/null +++ b/infra/base-images/base-runner/python_coverage_runner_help.py @@ -0,0 +1,83 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Helper to manage coverage.py related operations. Does two main +things: (1) pass commands into the coverage.py library and (2) +translate .coverage created from a pyinstaller executable into +paths that match local files. This is needed for html report creation. +""" +import os +import re +import sys +from coverage.cmdline import main as coverage_main +from coverage.data import CoverageData + + +def translate_lines(cov_data, new_cov_data, all_file_paths): + """ + Translate lines in a .coverage file created by coverage.py such that + the file paths points to local files instead. This is needed when collecting + coverage from executables created by pyinstaller. + """ + for pyinstaller_file_path in cov_data.measured_files(): + stripped_py_file_path = pyinstaller_file_path + if stripped_py_file_path.startswith('/tmp/_MEI'): + stripped_py_file_path = '/'.join(stripped_py_file_path.split('/')[3:]) + if stripped_py_file_path.startswith('/out/'): + stripped_py_file_path = stripped_py_file_path.replace('/out/', '') + + # Check if this file exists in our file paths: + for local_file_path in all_file_paths: + if local_file_path.endswith(stripped_py_file_path): + print('Found matching: %s' % (local_file_path)) + new_cov_data.add_lines( + {local_file_path: cov_data.lines(pyinstaller_file_path)}) + + +def translate_coverage(all_file_paths): + """ + Translate pyinstaller-generated file paths in .coverage (produced by + coverage.py) into local file paths. Place result in .new_coverage. + """ + covdata_pre_translation = CoverageData('.coverage') + covdata_post_translation = CoverageData('.new_coverage') + + covdata_pre_translation.read() + translate_lines(covdata_pre_translation, covdata_post_translation, + all_file_paths) + covdata_post_translation.write() + + +def main(): + """ + Main handler. + """ + if sys.argv[1] == 'translate': + print('Translating the coverage') + files_path = sys.argv[2] + all_file_paths = list() + for root, _, files in os.walk(files_path): + for relative_file_path in files: + abs_file_path = os.path.abspath(os.path.join(root, relative_file_path)) + all_file_paths.append(abs_file_path) + print('Done with path walk') + translate_coverage(all_file_paths) + else: + # Pass commands into coverage package + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(coverage_main()) + + +if __name__ == '__main__': + main() |