diff options
author | jonathanmetzman <31354670+jonathanmetzman@users.noreply.github.com> | 2022-07-12 15:51:03 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-07-12 15:51:03 +0200 |
commit | 0975cbfc0aa085ffaca8a5a5150f098db86d7c3e (patch) | |
tree | 8640c8fac686665c9f425698d80c8d6b87dec877 | |
parent | 6e39f856ab445c9cb36dffe10963249bb3947c60 (diff) |
Remove build support for dataflow sanitizer. (#7972)
Related: #7964
-rw-r--r-- | infra/base-images/base-builder/Dockerfile | 7 | ||||
-rwxr-xr-x | infra/base-images/base-builder/compile | 10 | ||||
-rwxr-xr-x | infra/base-images/base-builder/compile_dataflow | 32 | ||||
-rwxr-xr-x | infra/base-images/base-clang/checkout_build_install_llvm.sh | 12 | ||||
-rwxr-xr-x | infra/base-images/base-runner/Dockerfile | 2 | ||||
-rwxr-xr-x | infra/base-images/base-runner/collect_dft | 67 | ||||
-rwxr-xr-x | infra/base-images/base-runner/coverage | 10 | ||||
-rwxr-xr-x | infra/base-images/base-runner/dataflow_tracer.py | 156 |
8 files changed, 1 insertions, 295 deletions
diff --git a/infra/base-images/base-builder/Dockerfile b/infra/base-images/base-builder/Dockerfile index df23f03b..093e3595 100644 --- a/infra/base-images/base-builder/Dockerfile +++ b/infra/base-images/base-builder/Dockerfile @@ -79,8 +79,6 @@ ENV SANITIZER_FLAGS_undefined "-fsanitize=array-bounds,bool,builtin,enum,float-d ENV SANITIZER_FLAGS_memory "-fsanitize=memory -fsanitize-memory-track-origins" -ENV SANITIZER_FLAGS_dataflow "-fsanitize=dataflow" - ENV SANITIZER_FLAGS_thread "-fsanitize=thread" ENV SANITIZER_FLAGS_introspector "-O0 -flto -fno-inline-functions -fuse-ld=gold -Wno-unused-command-line-argument" @@ -102,9 +100,6 @@ ENV COVERAGE_FLAGS="-fsanitize=fuzzer-no-link" # messages which are treated as errors by some projects. ENV COVERAGE_FLAGS_coverage "-fprofile-instr-generate -fcoverage-mapping -pthread -Wl,--no-as-needed -Wl,-ldl -Wl,-lm -Wno-unused-command-line-argument" -# Coverage isntrumentation flags for dataflow builds. -ENV COVERAGE_FLAGS_dataflow="-fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp" - # Default sanitizer, fuzzing engine and architecture to use. ENV SANITIZER="address" ENV FUZZING_ENGINE="libfuzzer" @@ -145,7 +140,7 @@ RUN precompile_afl COPY precompile_honggfuzz /usr/local/bin/ RUN precompile_honggfuzz -COPY cargo compile compile_afl compile_dataflow compile_libfuzzer compile_honggfuzz \ +COPY cargo compile compile_afl compile_libfuzzer compile_honggfuzz \ compile_go_fuzzer \ compile_native_go_fuzzer \ compile_python_fuzzer \ diff --git a/infra/base-images/base-builder/compile b/infra/base-images/base-builder/compile index 1cd367af..bf4defad 100755 --- a/infra/base-images/base-builder/compile +++ b/infra/base-images/base-builder/compile @@ -22,11 +22,6 @@ if [ -n "${OLD_LLVMPASS-}" ]; then export SANITIZER_FLAGS_introspector=$(echo $SANITIZER_FLAGS_introspector | sed -r 's/-O0/-flegacy-pass-manager/') fi -if [ "$SANITIZER" = "dataflow" ] && [ "$FUZZING_ENGINE" != "dataflow" ]; then - echo "ERROR: 'dataflow' sanitizer can be used with 'dataflow' engine only." - exit 1 -fi - if [ "$FUZZING_LANGUAGE" = "jvm" ]; then if [ "$FUZZING_ENGINE" != "libfuzzer" ]; then echo "ERROR: JVM projects can be fuzzed with libFuzzer engine only." @@ -245,8 +240,3 @@ if [ "$SANITIZER" = "introspector" ]; then cp -rf $SRC/inspector $OUT/inspector fi - -if [[ "$FUZZING_ENGINE" = "dataflow" ]]; then - # Remove seed corpus as it can be huge but is not needed for a dataflow build. - rm -f $OUT/*.zip -fi diff --git a/infra/base-images/base-builder/compile_dataflow b/infra/base-images/base-builder/compile_dataflow deleted file mode 100755 index bf0a425f..00000000 --- a/infra/base-images/base-builder/compile_dataflow +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash -eu -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -################################################################################ - -export LIB_FUZZING_ENGINE="/usr/lib/DataFlow*.o" -echo -n "Compiling DataFlow to $LIB_FUZZING_ENGINE... " -mkdir -p $WORK/libfuzzer -pushd $WORK/libfuzzer > /dev/null - -$CXX $CXXFLAGS -fno-sanitize=all $SANITIZER_FLAGS -std=c++11 -O2 -c \ - $SRC/libfuzzer/dataflow/DataFlow.cpp -$CXX $CXXFLAGS -fno-sanitize=all -fPIC -std=c++11 -O2 -c \ - $SRC/libfuzzer/dataflow/DataFlowCallbacks.cpp - -cp $WORK/libfuzzer/DataFlow*.o /usr/lib/ - -popd > /dev/null -rm -rf $WORK/libfuzzer -echo " done." diff --git a/infra/base-images/base-clang/checkout_build_install_llvm.sh b/infra/base-images/base-clang/checkout_build_install_llvm.sh index 75c85121..f2147cfb 100755 --- a/infra/base-images/base-clang/checkout_build_install_llvm.sh +++ b/infra/base-images/base-clang/checkout_build_install_llvm.sh @@ -192,18 +192,6 @@ ninja -j $NPROC cxx ninja install-cxx rm -rf $WORK/msan -# DataFlowSanitizer instrumented libraries. -mkdir -p $WORK/dfsan -cd $WORK/dfsan - -cmake_libcxx $CMAKE_EXTRA_ARGS \ - -DLLVM_USE_SANITIZER=DataFlow \ - -DCMAKE_INSTALL_PREFIX=/usr/dfsan/ - -ninja -j $NPROC cxx cxxabi -ninja install-cxx install-cxxabi -rm -rf $WORK/dfsan - # libFuzzer sources. cp -r $LLVM_SRC/compiler-rt/lib/fuzzer $SRC/libfuzzer diff --git a/infra/base-images/base-runner/Dockerfile b/infra/base-images/base-runner/Dockerfile index b666bef9..73c3d568 100755 --- a/infra/base-images/base-runner/Dockerfile +++ b/infra/base-images/base-runner/Dockerfile @@ -107,10 +107,8 @@ RUN wget https://repo1.maven.org/maven2/org/jacoco/org.jacoco.cli/0.8.7/org.jaco # Do this last to make developing these files easier/faster due to caching. COPY bad_build_check \ - collect_dft \ coverage \ coverage_helper \ - dataflow_tracer.py \ download_corpus \ jacoco_report_converter.py \ rcfilt \ diff --git a/infra/base-images/base-runner/collect_dft b/infra/base-images/base-runner/collect_dft deleted file mode 100755 index 3f6c6899..00000000 --- a/infra/base-images/base-runner/collect_dft +++ /dev/null @@ -1,67 +0,0 @@ -#!/bin/bash -u -# Copyright 2020 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -################################################################################ -cd $OUT - -if (( $# > 0 )); then - FUZZ_TARGETS="$@" -else - FUZZ_TARGETS="$(find . -maxdepth 1 -type f -executable -printf '%P\n')" -fi - -# Timeout for running a single fuzz target. -if [ -z "$COLLECT_DFT_TIMEOUT" ]; then - COLLECT_DFT_TIMEOUT=1h -fi - -# Number of CPUs available, this is needed for running targets in parallel. -NPROC=$(nproc) - -function run_one_target { - local target=$1 - local corpus="/corpus/${target}" - local traces="$OUT/${target}_dft" - - # Put the logs in $OUT as well for debugging purposes. - local log="$OUT/${target}_dft.log" - - rm -rf $traces && mkdir -p $traces - - timeout $COLLECT_DFT_TIMEOUT dataflow_tracer.py $OUT/$target $corpus $traces &> $log - if (( $? != 0 )); then - echo "Error occured while collecting data flow traces for $target:" - cat $log - fi -} - -# Run each fuzz target, write data flow traces into corresponding dir in $OUT. -for fuzz_target in $FUZZ_TARGETS; do - # Skip binaries that do not seem to be fuzz targets. - grep "LLVMFuzzerTestOneInput" $fuzz_target > /dev/null 2>&1 || continue - - echo "Running $fuzz_target" - run_one_target $fuzz_target & - - # Do not spawn more processes than the number of CPUs available. - n_child_proc=$(jobs -rp | wc -l) - while [ "$n_child_proc" -eq "$NPROC" ]; do - sleep 4 - n_child_proc=$(jobs -rp | wc -l) - done -done - -# Wait for background processes to finish. -wait diff --git a/infra/base-images/base-runner/coverage b/infra/base-images/base-runner/coverage index fa2292ca..1659ee94 100755 --- a/infra/base-images/base-runner/coverage +++ b/infra/base-images/base-runner/coverage @@ -120,16 +120,6 @@ function run_fuzz_target { # For introspector. llvm-cov show -instr-profile=$profdata_file -object=$target -line-coverage-gt=0 $shared_libraries $BRANCH_COV_ARGS $LLVM_COV_COMMON_ARGS > ${TEXTCOV_REPORT_DIR}/$target.covreport - - if [ -n "${FULL_SUMMARY_PER_TARGET-}" ]; then - # This is needed for dataflow strategy analysis, can be removed later. See - # - https://github.com/google/oss-fuzz/pull/3306 - # - https://github.com/google/oss-fuzz/issues/1632 - # Intentionally writing these to the logs dir in order to hide the dumps - # from the ClusterFuzz cron job. - llvm-cov export -instr-profile=$profdata_file -object=$target \ - $shared_libraries $LLVM_COV_COMMON_ARGS > $LOGS_DIR/$target.json - fi } function run_go_fuzz_target { diff --git a/infra/base-images/base-runner/dataflow_tracer.py b/infra/base-images/base-runner/dataflow_tracer.py deleted file mode 100755 index 92a48fa4..00000000 --- a/infra/base-images/base-runner/dataflow_tracer.py +++ /dev/null @@ -1,156 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2020 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -################################################################################ -"""Script for collecting dataflow traces using DFSan compiled binary. The script -imitates `CollectDataFlow` function from libFuzzer but provides some flexibility -for skipping long and/or slow corpus elements. - -Follow https://github.com/google/oss-fuzz/issues/1632 for more details.""" -import hashlib -import os -import subprocess -import sys - -# pylint: skip-file - -# See https://github.com/google/oss-fuzz/pull/5024#discussion_r561313003 for why -# we are disabling pylint for this file (we can't do it in .pylintrc, probably -# because of weirdness with this file's package, so we do it here). - -# These can be controlled by the runner in order to change the values without -# rebuilding OSS-Fuzz base images. -FILE_SIZE_LIMIT = int(os.getenv('DFT_FILE_SIZE_LIMIT', 32 * 1024)) -MIN_TIMEOUT = float(os.getenv('DFT_MIN_TIMEOUT', 1.0)) -TIMEOUT_RANGE = float(os.getenv('DFT_TIMEOUT_RANGE', 3.0)) - -DFSAN_OPTIONS = 'fast16labels=1:warn_unimplemented=0' - - -def _error(msg): - sys.stderr.write(msg + '\n') - - -def _list_dir(dirpath): - for root, _, files in os.walk(dirpath): - for f in files: - yield os.path.join(root, f) - - -def _sha1(filepath): - h = hashlib.sha1() - with open(filepath, 'rb') as f: - h.update(f.read()) - return h.hexdigest() - - -def _run(cmd, timeout=None): - result = None - try: - result = subprocess.run(cmd, - timeout=timeout, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - if result.returncode: - _error('{command} finished with non-zero code: {code}'.format( - command=str(cmd), code=result.returncode)) - - except subprocess.TimeoutExpired: - raise - except Exception as e: - _error('Exception: ' + str(e)) - - return result - - -def _timeout(size): - # Dynamic timeout value (proportional to file size) to discard slow units. - timeout = MIN_TIMEOUT - timeout += size * TIMEOUT_RANGE / FILE_SIZE_LIMIT - return timeout - - -def collect_traces(binary, corpus_dir, dft_dir): - stats = { - 'total': 0, - 'traced': 0, - 'long': 0, - 'slow': 0, - 'failed': 0, - } - - files_and_sizes = {} - for f in _list_dir(corpus_dir): - stats['total'] += 1 - size = os.path.getsize(f) - if size > FILE_SIZE_LIMIT: - stats['long'] += 1 - print('Skipping large file ({size}b): {path}'.format(size=size, path=f)) - continue - files_and_sizes[f] = size - - for f in sorted(files_and_sizes, key=files_and_sizes.get): - output_path = os.path.join(dft_dir, _sha1(f)) - try: - result = _run([binary, f, output_path], timeout=_timeout(size)) - if result.returncode: - stats['failed'] += 1 - else: - stats['traced'] += 1 - - except subprocess.TimeoutExpired as e: - _error('Slow input: ' + str(e)) - stats['slow'] += 1 - - return stats - - -def dump_functions(binary, dft_dir): - result = _run([binary]) - if not result or result.returncode: - return False - - with open(os.path.join(dft_dir, 'functions.txt'), 'wb') as f: - f.write(result.stdout) - - return True - - -def main(): - if len(sys.argv) < 4: - _error('Usage: {0} <binary> <corpus_dir> <dft_dir>'.format(sys.argv[0])) - sys.exit(1) - - binary = sys.argv[1] - corpus_dir = sys.argv[2] - dft_dir = sys.argv[3] - - os.environ['DFSAN_OPTIONS'] = DFSAN_OPTIONS - - if not dump_functions(binary, dft_dir): - _error('Failed to dump functions. Something is wrong.') - sys.exit(1) - - stats = collect_traces(binary, corpus_dir, dft_dir) - for k, v in stats.items(): - print('{0}: {1}'.format(k, v)) - - # Checksum that we didn't lose track of any of the inputs. - assert stats['total'] * 2 == sum(v for v in stats.values()) - sys.exit(0) - - -if __name__ == "__main__": - main() |