aboutsummaryrefslogtreecommitdiffhomepage
path: root/infra/base-images
diff options
context:
space:
mode:
authorGravatar DavidKorczynski <david@adalogics.com>2022-04-06 15:31:34 +0100
committerGravatar GitHub <noreply@github.com>2022-04-06 10:31:34 -0400
commit0cb820e5af64f279f66d0c0d3c93b7437ecabe91 (patch)
treedb17b5d39d7ef7c1fc899676fff3b4742c0c8e78 /infra/base-images
parent46121987d0510526e8112cff934bd745e6967fe7 (diff)
infra: add Python coverage support (#7298)
* infra: add Python coverage support * update python coverage helper script * nits * switch from commands to python * pin coverage package * switch to single quote strings throughout * nit * fix style * fix style * fix ci * fix ci
Diffstat (limited to 'infra/base-images')
-rw-r--r--infra/base-images/base-builder/Dockerfile2
-rwxr-xr-xinfra/base-images/base-builder/compile4
-rwxr-xr-xinfra/base-images/base-builder/compile_python_fuzzer68
-rwxr-xr-xinfra/base-images/base-builder/install_python.sh2
-rw-r--r--infra/base-images/base-builder/python_coverage_helper.py118
-rwxr-xr-xinfra/base-images/base-runner/Dockerfile5
-rwxr-xr-xinfra/base-images/base-runner/coverage32
-rwxr-xr-xinfra/base-images/base-runner/python_coverage_runner_help.py83
8 files changed, 310 insertions, 4 deletions
diff --git a/infra/base-images/base-builder/Dockerfile b/infra/base-images/base-builder/Dockerfile
index afaf860e..276300df 100644
--- a/infra/base-images/base-builder/Dockerfile
+++ b/infra/base-images/base-builder/Dockerfile
@@ -148,6 +148,8 @@ RUN precompile_honggfuzz
COPY cargo compile compile_afl compile_dataflow compile_libfuzzer compile_honggfuzz \
compile_go_fuzzer \
compile_native_go_fuzzer \
+ compile_python_fuzzer \
+ python_coverage_helper.py \
debug_afl srcmap \
write_labels.py bazel_build_fuzz_tests \
# Go, java, and swift installation scripts.
diff --git a/infra/base-images/base-builder/compile b/infra/base-images/base-builder/compile
index 3c7736c6..9995b67e 100755
--- a/infra/base-images/base-builder/compile
+++ b/infra/base-images/base-builder/compile
@@ -42,8 +42,8 @@ if [ "$FUZZING_LANGUAGE" = "python" ]; then
echo "ERROR: Python projects can be fuzzed with libFuzzer engine only."
exit 1
fi
- if [ "$SANITIZER" != "address" ] && [ "$SANITIZER" != "undefined" ]; then
- echo "ERROR: Python projects can be fuzzed with AddressSanitizer or UndefinedBehaviorSanitizer only."
+ if [ "$SANITIZER" != "address" ] && [ "$SANITIZER" != "undefined" ] && [ "$SANITIZER" != "coverage" ]; then
+ echo "ERROR: Python projects can be fuzzed with AddressSanitizer or UndefinedBehaviorSanitizer or Coverage only."
exit 1
fi
if [ "$ARCHITECTURE" != "x86_64" ]; then
diff --git a/infra/base-images/base-builder/compile_python_fuzzer b/infra/base-images/base-builder/compile_python_fuzzer
new file mode 100755
index 00000000..36c3626a
--- /dev/null
+++ b/infra/base-images/base-builder/compile_python_fuzzer
@@ -0,0 +1,68 @@
+#!/bin/bash -eu
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+
+fuzzer_path=$1
+fuzzer_basename=$(basename -s .py $fuzzer_path)
+fuzzer_package=${fuzzer_basename}.pkg
+
+PYFUZZ_WORKPATH=$SRC/pyfuzzworkdir/
+FUZZ_WORKPATH=$PYFUZZ_WORKPATH/$fuzzer_basename
+
+# In coverage mode prepend coverage logic to the fuzzer source
+if [[ $SANITIZER = *coverage* ]]; then
+ cat <<EOF >> coverage_wrapper.py
+###### Coverage stub
+import atexit
+import coverage
+cov = coverage.coverage(data_file='.coverage', cover_pylib=True)
+cov.start()
+# Register an exist handler that will print coverage
+def exit_handler():
+ cov.stop()
+ cov.save()
+atexit.register(exit_handler)
+####### End of coverage stub
+EOF
+
+ # Prepend stub and create tmp file
+ cat coverage_wrapper.py $fuzzer_path > tmp_fuzzer_coverage.py
+
+ # Overwrite existing fuzzer with new fuzzer that has stub
+ mv tmp_fuzzer_coverage.py $fuzzer_path
+fi
+
+rm -rf $PYFUZZ_WORKPATH
+mkdir $PYFUZZ_WORKPATH $FUZZ_WORKPATH
+
+pyinstaller --distpath $OUT --workpath=$FUZZ_WORKPATH --onefile --name $fuzzer_package $fuzzer_path
+
+# In coverage mode save source files of dependencies in pyinstalled binary
+if [[ $SANITIZER = *coverage* ]]; then
+ rm -rf /medio/
+ python3 /usr/local/bin/python_coverage_helper.py $FUZZ_WORKPATH "/medio"
+ zip -r $fuzzer_package.deps.zip /medio
+ mv $fuzzer_package.deps.zip $OUT/
+fi
+
+# Create execution wrapper.
+echo "#!/bin/sh
+# LLVMFuzzerTestOneInput for fuzzer detection.
+this_dir=\$(dirname \"\$0\")
+LD_PRELOAD=\$this_dir/sanitizer_with_fuzzer.so \
+ASAN_OPTIONS=\$ASAN_OPTIONS:symbolize=1:external_symbolizer_path=\$this_dir/llvm-symbolizer:detect_leaks=0 \
+\$this_dir/$fuzzer_package \$@" > $OUT/$fuzzer_basename
+chmod +x $OUT/$fuzzer_basename
diff --git a/infra/base-images/base-builder/install_python.sh b/infra/base-images/base-builder/install_python.sh
index 4d05f1ae..adb53040 100755
--- a/infra/base-images/base-builder/install_python.sh
+++ b/infra/base-images/base-builder/install_python.sh
@@ -17,5 +17,5 @@
echo "ATHERIS INSTALL"
unset CFLAGS CXXFLAGS
-pip3 install -v --no-cache-dir "atheris>=2.0.6" "pyinstaller==4.1"
+pip3 install -v --no-cache-dir "atheris>=2.0.6" "pyinstaller==4.1" "coverage==6.3.2"
rm -rf /tmp/*
diff --git a/infra/base-images/base-builder/python_coverage_helper.py b/infra/base-images/base-builder/python_coverage_helper.py
new file mode 100644
index 00000000..93c917f1
--- /dev/null
+++ b/infra/base-images/base-builder/python_coverage_helper.py
@@ -0,0 +1,118 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Extracts file paths to copy files from pyinstaller-generated executables"""
+import os
+import sys
+import shutil
+import zipfile
+
+
+# Finds all *.toc files in ./workpath and reads these files in order to
+# identify Python files associated with a pyinstaller packaged executable.
+# Copies all of the Python files to a temporary directory (/medio) following
+# the original directory structure.
+def get_all_files_from_toc(toc_file, file_path_set):
+ """
+ Extract filepaths from a .toc file and add to file_path_set
+ """
+ with open(toc_file, 'rb') as toc_file_fd:
+ for line in toc_file_fd:
+ try:
+ line = line.decode()
+ except: # pylint:disable=bare-except
+ continue
+ if '.py' not in line:
+ continue
+
+ split_line = line.split(' ')
+ for word in split_line:
+ word = word.replace('\'', '').replace(',', '').replace('\n', '')
+ if '.py' not in word:
+ continue
+ # Check if .egg is in the path and if so we need to split it
+ if os.path.isfile(word):
+ file_path_set.add(word)
+ elif '.egg' in word: # check if this is an egg
+ egg_path_split = word.split('.egg')
+ if len(egg_path_split) != 2:
+ continue
+ egg_path = egg_path_split[0] + '.egg'
+
+ print('Unzipping contents of %s' % egg_path)
+
+ # We have an egg. This needs to be unzipped and then replaced
+ # with the unzipped data.
+ tmp_dir_name = 'zipdcontents'
+ if os.path.isdir(tmp_dir_name):
+ shutil.rmtree(tmp_dir_name)
+
+ # unzip egg and replace path with unzipped content
+ with zipfile.ZipFile(egg_path, 'r') as zip_f:
+ zip_f.extractall(tmp_dir_name)
+ os.remove(egg_path)
+ shutil.copytree(tmp_dir_name, egg_path)
+
+ # Now the lines should be accessible, so check again
+ if os.path.isfile(word):
+ file_path_set.add(word)
+
+
+def create_file_structure_from_tocs(work_path, out_path):
+ """
+ Extract the Python files that are added as paths in the output of
+ a pyinstaller operation. The files are determined by reading through
+ all of the *.toc files in the workpath of pyinstaller.
+
+ The files will be copied into the out_path using a similar file path
+ as they originally are. If any archive (.egg) files are present in the
+ .toc files, then unzip the archives and substitute the archive for the
+ unzipped content, i.e. we will extract the archives and collect the source
+ files.
+ """
+ print('Extracts files from the pyinstaller workpath')
+ file_path_set = set()
+ for path1 in os.listdir(work_path):
+ full_path = os.path.join(work_path, path1)
+ if not os.path.isdir(full_path):
+ continue
+
+ # We have a directory
+ for path2 in os.listdir(full_path):
+ if not '.toc' in path2:
+ continue
+ full_toc_file = os.path.join(full_path, path2)
+ get_all_files_from_toc(full_toc_file, file_path_set)
+
+ for file_path in file_path_set:
+ relative_src = file_path[1:] if file_path[0] == '/' else file_path
+ dst_path = os.path.join(out_path, relative_src)
+ os.makedirs(os.path.dirname(dst_path), exist_ok=True)
+ shutil.copy(file_path, dst_path)
+
+
+def main():
+ """
+ Main handler.
+ """
+ if len(sys.argv) != 3:
+ print('Use: python3 python_coverage_helper.py pyinstaller_workpath '
+ 'destination_for_output')
+ sys.exit(1)
+ work_path = sys.argv[1]
+ out_path = sys.argv[2]
+ create_file_structure_from_tocs(work_path, out_path)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/infra/base-images/base-runner/Dockerfile b/infra/base-images/base-runner/Dockerfile
index ca1c69bd..b666bef9 100755
--- a/infra/base-images/base-runner/Dockerfile
+++ b/infra/base-images/base-runner/Dockerfile
@@ -46,6 +46,7 @@ RUN apt-get update && apt-get install -y \
python3 \
python3-pip \
python3-setuptools \
+ rsync \
unzip \
wget \
zip --no-install-recommends
@@ -56,7 +57,8 @@ RUN git clone https://chromium.googlesource.com/chromium/src/tools/code_coverage
git checkout edba4873b5e8a390e977a64c522db2df18a8b27d && \
pip3 install wheel && \
pip3 install -r requirements.txt && \
- pip3 install MarkupSafe==0.23
+ pip3 install MarkupSafe==0.23 && \
+ pip3 install coverage
# Default environment options for various sanitizers.
# Note that these match the settings used in ClusterFuzz and
@@ -119,4 +121,5 @@ COPY bad_build_check \
targets_list \
test_all.py \
test_one.py \
+ python_coverage_runner_help.py \
/usr/local/bin/
diff --git a/infra/base-images/base-runner/coverage b/infra/base-images/base-runner/coverage
index 0d10d149..15532fda 100755
--- a/infra/base-images/base-runner/coverage
+++ b/infra/base-images/base-runner/coverage
@@ -151,6 +151,14 @@ function run_go_fuzz_target {
$SYSGOPATH/bin/gocovsum $DUMPS_DIR/$target.profdata > $FUZZER_STATS_DIR/$target.json
}
+function run_python_fuzz_target {
+ local target=$1
+ local zipped_sources="$DUMPS_DIR/$target.deps.zip"
+ local corpus_real="$CORPUS_DIR/${target}"
+ $OUT/$target.pkg $corpus_real -atheris_runs=$(ls -la $corpus_real | wc -l)
+ mv .coverage $OUT/.coverage_$target
+}
+
function run_java_fuzz_target {
local target=$1
@@ -224,6 +232,9 @@ for fuzz_target in $FUZZ_TARGETS; do
grep "FUZZ_CORPUS_DIR" $fuzz_target > /dev/null 2>&1 || continue
fi
run_go_fuzz_target $fuzz_target &
+ elif [[ $FUZZING_LANGUAGE == "python" ]]; then
+ echo "Entering python fuzzing"
+ run_python_fuzz_target $fuzz_target
elif [[ $FUZZING_LANGUAGE == "jvm" ]]; then
# Continue if not a fuzz target.
if [[ $FUZZING_ENGINE != "none" ]]; then
@@ -271,6 +282,27 @@ if [[ $FUZZING_LANGUAGE == "go" ]]; then
mv merged.data $REPORT_ROOT_DIR/heap.prof
#TODO some proxy for go tool pprof -http=127.0.0.1:8001 $DUMPS_DIR/cpu.prof
echo "Finished generating code coverage report for Go fuzz targets."
+elif [[ $FUZZING_LANGUAGE == "python" ]]; then
+ # Extract source files from all dependency zip folders
+ mkdir -p /pythoncovmergedfiles/medio
+ PYCOVDIR=/pycovdir/
+ mkdir $PYCOVDIR
+ for fuzzer in $FUZZ_TARGETS; do
+ unzip $OUT/$fuzzer.deps.zip
+ rsync -r ./medio /pythoncovmergedfiles/medio
+ rm -rf ./medio
+
+ # Translate paths in unzipped folders to paths that we can use
+ mv $OUT/.coverage_$fuzzer .coverage
+ python3 /usr/local/bin/python_coverage_runner_help.py translate /pythoncovmergedfiles/medio
+ cp .new_coverage $PYCOVDIR/.coverage_$fuzzer
+ done
+
+ # Combine coverage
+ cd $PYCOVDIR
+ python3 /usr/local/bin/python_coverage_runner_help.py combine .coverage_*
+ python3 /usr/local/bin/python_coverage_runner_help.py html
+ mv htmlcov $REPORT_ROOT_DIR/
elif [[ $FUZZING_LANGUAGE == "jvm" ]]; then
# From this point on the script does not tolerate any errors.
diff --git a/infra/base-images/base-runner/python_coverage_runner_help.py b/infra/base-images/base-runner/python_coverage_runner_help.py
new file mode 100755
index 00000000..e22e84b3
--- /dev/null
+++ b/infra/base-images/base-runner/python_coverage_runner_help.py
@@ -0,0 +1,83 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Helper to manage coverage.py related operations. Does two main
+things: (1) pass commands into the coverage.py library and (2)
+translate .coverage created from a pyinstaller executable into
+paths that match local files. This is needed for html report creation.
+"""
+import os
+import re
+import sys
+from coverage.cmdline import main as coverage_main
+from coverage.data import CoverageData
+
+
+def translate_lines(cov_data, new_cov_data, all_file_paths):
+ """
+ Translate lines in a .coverage file created by coverage.py such that
+ the file paths points to local files instead. This is needed when collecting
+ coverage from executables created by pyinstaller.
+ """
+ for pyinstaller_file_path in cov_data.measured_files():
+ stripped_py_file_path = pyinstaller_file_path
+ if stripped_py_file_path.startswith('/tmp/_MEI'):
+ stripped_py_file_path = '/'.join(stripped_py_file_path.split('/')[3:])
+ if stripped_py_file_path.startswith('/out/'):
+ stripped_py_file_path = stripped_py_file_path.replace('/out/', '')
+
+ # Check if this file exists in our file paths:
+ for local_file_path in all_file_paths:
+ if local_file_path.endswith(stripped_py_file_path):
+ print('Found matching: %s' % (local_file_path))
+ new_cov_data.add_lines(
+ {local_file_path: cov_data.lines(pyinstaller_file_path)})
+
+
+def translate_coverage(all_file_paths):
+ """
+ Translate pyinstaller-generated file paths in .coverage (produced by
+ coverage.py) into local file paths. Place result in .new_coverage.
+ """
+ covdata_pre_translation = CoverageData('.coverage')
+ covdata_post_translation = CoverageData('.new_coverage')
+
+ covdata_pre_translation.read()
+ translate_lines(covdata_pre_translation, covdata_post_translation,
+ all_file_paths)
+ covdata_post_translation.write()
+
+
+def main():
+ """
+ Main handler.
+ """
+ if sys.argv[1] == 'translate':
+ print('Translating the coverage')
+ files_path = sys.argv[2]
+ all_file_paths = list()
+ for root, _, files in os.walk(files_path):
+ for relative_file_path in files:
+ abs_file_path = os.path.abspath(os.path.join(root, relative_file_path))
+ all_file_paths.append(abs_file_path)
+ print('Done with path walk')
+ translate_coverage(all_file_paths)
+ else:
+ # Pass commands into coverage package
+ sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
+ sys.exit(coverage_main())
+
+
+if __name__ == '__main__':
+ main()