aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar jonathanmetzman <31354670+jonathanmetzman@users.noreply.github.com>2022-07-12 15:51:03 +0200
committerGravatar GitHub <noreply@github.com>2022-07-12 15:51:03 +0200
commit0975cbfc0aa085ffaca8a5a5150f098db86d7c3e (patch)
tree8640c8fac686665c9f425698d80c8d6b87dec877
parent6e39f856ab445c9cb36dffe10963249bb3947c60 (diff)
Remove build support for dataflow sanitizer. (#7972)
Related: #7964
-rw-r--r--infra/base-images/base-builder/Dockerfile7
-rwxr-xr-xinfra/base-images/base-builder/compile10
-rwxr-xr-xinfra/base-images/base-builder/compile_dataflow32
-rwxr-xr-xinfra/base-images/base-clang/checkout_build_install_llvm.sh12
-rwxr-xr-xinfra/base-images/base-runner/Dockerfile2
-rwxr-xr-xinfra/base-images/base-runner/collect_dft67
-rwxr-xr-xinfra/base-images/base-runner/coverage10
-rwxr-xr-xinfra/base-images/base-runner/dataflow_tracer.py156
8 files changed, 1 insertions, 295 deletions
diff --git a/infra/base-images/base-builder/Dockerfile b/infra/base-images/base-builder/Dockerfile
index df23f03b..093e3595 100644
--- a/infra/base-images/base-builder/Dockerfile
+++ b/infra/base-images/base-builder/Dockerfile
@@ -79,8 +79,6 @@ ENV SANITIZER_FLAGS_undefined "-fsanitize=array-bounds,bool,builtin,enum,float-d
ENV SANITIZER_FLAGS_memory "-fsanitize=memory -fsanitize-memory-track-origins"
-ENV SANITIZER_FLAGS_dataflow "-fsanitize=dataflow"
-
ENV SANITIZER_FLAGS_thread "-fsanitize=thread"
ENV SANITIZER_FLAGS_introspector "-O0 -flto -fno-inline-functions -fuse-ld=gold -Wno-unused-command-line-argument"
@@ -102,9 +100,6 @@ ENV COVERAGE_FLAGS="-fsanitize=fuzzer-no-link"
# messages which are treated as errors by some projects.
ENV COVERAGE_FLAGS_coverage "-fprofile-instr-generate -fcoverage-mapping -pthread -Wl,--no-as-needed -Wl,-ldl -Wl,-lm -Wno-unused-command-line-argument"
-# Coverage isntrumentation flags for dataflow builds.
-ENV COVERAGE_FLAGS_dataflow="-fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp"
-
# Default sanitizer, fuzzing engine and architecture to use.
ENV SANITIZER="address"
ENV FUZZING_ENGINE="libfuzzer"
@@ -145,7 +140,7 @@ RUN precompile_afl
COPY precompile_honggfuzz /usr/local/bin/
RUN precompile_honggfuzz
-COPY cargo compile compile_afl compile_dataflow compile_libfuzzer compile_honggfuzz \
+COPY cargo compile compile_afl compile_libfuzzer compile_honggfuzz \
compile_go_fuzzer \
compile_native_go_fuzzer \
compile_python_fuzzer \
diff --git a/infra/base-images/base-builder/compile b/infra/base-images/base-builder/compile
index 1cd367af..bf4defad 100755
--- a/infra/base-images/base-builder/compile
+++ b/infra/base-images/base-builder/compile
@@ -22,11 +22,6 @@ if [ -n "${OLD_LLVMPASS-}" ]; then
export SANITIZER_FLAGS_introspector=$(echo $SANITIZER_FLAGS_introspector | sed -r 's/-O0/-flegacy-pass-manager/')
fi
-if [ "$SANITIZER" = "dataflow" ] && [ "$FUZZING_ENGINE" != "dataflow" ]; then
- echo "ERROR: 'dataflow' sanitizer can be used with 'dataflow' engine only."
- exit 1
-fi
-
if [ "$FUZZING_LANGUAGE" = "jvm" ]; then
if [ "$FUZZING_ENGINE" != "libfuzzer" ]; then
echo "ERROR: JVM projects can be fuzzed with libFuzzer engine only."
@@ -245,8 +240,3 @@ if [ "$SANITIZER" = "introspector" ]; then
cp -rf $SRC/inspector $OUT/inspector
fi
-
-if [[ "$FUZZING_ENGINE" = "dataflow" ]]; then
- # Remove seed corpus as it can be huge but is not needed for a dataflow build.
- rm -f $OUT/*.zip
-fi
diff --git a/infra/base-images/base-builder/compile_dataflow b/infra/base-images/base-builder/compile_dataflow
deleted file mode 100755
index bf0a425f..00000000
--- a/infra/base-images/base-builder/compile_dataflow
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash -eu
-# Copyright 2019 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-################################################################################
-
-export LIB_FUZZING_ENGINE="/usr/lib/DataFlow*.o"
-echo -n "Compiling DataFlow to $LIB_FUZZING_ENGINE... "
-mkdir -p $WORK/libfuzzer
-pushd $WORK/libfuzzer > /dev/null
-
-$CXX $CXXFLAGS -fno-sanitize=all $SANITIZER_FLAGS -std=c++11 -O2 -c \
- $SRC/libfuzzer/dataflow/DataFlow.cpp
-$CXX $CXXFLAGS -fno-sanitize=all -fPIC -std=c++11 -O2 -c \
- $SRC/libfuzzer/dataflow/DataFlowCallbacks.cpp
-
-cp $WORK/libfuzzer/DataFlow*.o /usr/lib/
-
-popd > /dev/null
-rm -rf $WORK/libfuzzer
-echo " done."
diff --git a/infra/base-images/base-clang/checkout_build_install_llvm.sh b/infra/base-images/base-clang/checkout_build_install_llvm.sh
index 75c85121..f2147cfb 100755
--- a/infra/base-images/base-clang/checkout_build_install_llvm.sh
+++ b/infra/base-images/base-clang/checkout_build_install_llvm.sh
@@ -192,18 +192,6 @@ ninja -j $NPROC cxx
ninja install-cxx
rm -rf $WORK/msan
-# DataFlowSanitizer instrumented libraries.
-mkdir -p $WORK/dfsan
-cd $WORK/dfsan
-
-cmake_libcxx $CMAKE_EXTRA_ARGS \
- -DLLVM_USE_SANITIZER=DataFlow \
- -DCMAKE_INSTALL_PREFIX=/usr/dfsan/
-
-ninja -j $NPROC cxx cxxabi
-ninja install-cxx install-cxxabi
-rm -rf $WORK/dfsan
-
# libFuzzer sources.
cp -r $LLVM_SRC/compiler-rt/lib/fuzzer $SRC/libfuzzer
diff --git a/infra/base-images/base-runner/Dockerfile b/infra/base-images/base-runner/Dockerfile
index b666bef9..73c3d568 100755
--- a/infra/base-images/base-runner/Dockerfile
+++ b/infra/base-images/base-runner/Dockerfile
@@ -107,10 +107,8 @@ RUN wget https://repo1.maven.org/maven2/org/jacoco/org.jacoco.cli/0.8.7/org.jaco
# Do this last to make developing these files easier/faster due to caching.
COPY bad_build_check \
- collect_dft \
coverage \
coverage_helper \
- dataflow_tracer.py \
download_corpus \
jacoco_report_converter.py \
rcfilt \
diff --git a/infra/base-images/base-runner/collect_dft b/infra/base-images/base-runner/collect_dft
deleted file mode 100755
index 3f6c6899..00000000
--- a/infra/base-images/base-runner/collect_dft
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/bin/bash -u
-# Copyright 2020 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-################################################################################
-cd $OUT
-
-if (( $# > 0 )); then
- FUZZ_TARGETS="$@"
-else
- FUZZ_TARGETS="$(find . -maxdepth 1 -type f -executable -printf '%P\n')"
-fi
-
-# Timeout for running a single fuzz target.
-if [ -z "$COLLECT_DFT_TIMEOUT" ]; then
- COLLECT_DFT_TIMEOUT=1h
-fi
-
-# Number of CPUs available, this is needed for running targets in parallel.
-NPROC=$(nproc)
-
-function run_one_target {
- local target=$1
- local corpus="/corpus/${target}"
- local traces="$OUT/${target}_dft"
-
- # Put the logs in $OUT as well for debugging purposes.
- local log="$OUT/${target}_dft.log"
-
- rm -rf $traces && mkdir -p $traces
-
- timeout $COLLECT_DFT_TIMEOUT dataflow_tracer.py $OUT/$target $corpus $traces &> $log
- if (( $? != 0 )); then
- echo "Error occured while collecting data flow traces for $target:"
- cat $log
- fi
-}
-
-# Run each fuzz target, write data flow traces into corresponding dir in $OUT.
-for fuzz_target in $FUZZ_TARGETS; do
- # Skip binaries that do not seem to be fuzz targets.
- grep "LLVMFuzzerTestOneInput" $fuzz_target > /dev/null 2>&1 || continue
-
- echo "Running $fuzz_target"
- run_one_target $fuzz_target &
-
- # Do not spawn more processes than the number of CPUs available.
- n_child_proc=$(jobs -rp | wc -l)
- while [ "$n_child_proc" -eq "$NPROC" ]; do
- sleep 4
- n_child_proc=$(jobs -rp | wc -l)
- done
-done
-
-# Wait for background processes to finish.
-wait
diff --git a/infra/base-images/base-runner/coverage b/infra/base-images/base-runner/coverage
index fa2292ca..1659ee94 100755
--- a/infra/base-images/base-runner/coverage
+++ b/infra/base-images/base-runner/coverage
@@ -120,16 +120,6 @@ function run_fuzz_target {
# For introspector.
llvm-cov show -instr-profile=$profdata_file -object=$target -line-coverage-gt=0 $shared_libraries $BRANCH_COV_ARGS $LLVM_COV_COMMON_ARGS > ${TEXTCOV_REPORT_DIR}/$target.covreport
-
- if [ -n "${FULL_SUMMARY_PER_TARGET-}" ]; then
- # This is needed for dataflow strategy analysis, can be removed later. See
- # - https://github.com/google/oss-fuzz/pull/3306
- # - https://github.com/google/oss-fuzz/issues/1632
- # Intentionally writing these to the logs dir in order to hide the dumps
- # from the ClusterFuzz cron job.
- llvm-cov export -instr-profile=$profdata_file -object=$target \
- $shared_libraries $LLVM_COV_COMMON_ARGS > $LOGS_DIR/$target.json
- fi
}
function run_go_fuzz_target {
diff --git a/infra/base-images/base-runner/dataflow_tracer.py b/infra/base-images/base-runner/dataflow_tracer.py
deleted file mode 100755
index 92a48fa4..00000000
--- a/infra/base-images/base-runner/dataflow_tracer.py
+++ /dev/null
@@ -1,156 +0,0 @@
-#!/usr/bin/env python3
-# Copyright 2020 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-################################################################################
-"""Script for collecting dataflow traces using DFSan compiled binary. The script
-imitates `CollectDataFlow` function from libFuzzer but provides some flexibility
-for skipping long and/or slow corpus elements.
-
-Follow https://github.com/google/oss-fuzz/issues/1632 for more details."""
-import hashlib
-import os
-import subprocess
-import sys
-
-# pylint: skip-file
-
-# See https://github.com/google/oss-fuzz/pull/5024#discussion_r561313003 for why
-# we are disabling pylint for this file (we can't do it in .pylintrc, probably
-# because of weirdness with this file's package, so we do it here).
-
-# These can be controlled by the runner in order to change the values without
-# rebuilding OSS-Fuzz base images.
-FILE_SIZE_LIMIT = int(os.getenv('DFT_FILE_SIZE_LIMIT', 32 * 1024))
-MIN_TIMEOUT = float(os.getenv('DFT_MIN_TIMEOUT', 1.0))
-TIMEOUT_RANGE = float(os.getenv('DFT_TIMEOUT_RANGE', 3.0))
-
-DFSAN_OPTIONS = 'fast16labels=1:warn_unimplemented=0'
-
-
-def _error(msg):
- sys.stderr.write(msg + '\n')
-
-
-def _list_dir(dirpath):
- for root, _, files in os.walk(dirpath):
- for f in files:
- yield os.path.join(root, f)
-
-
-def _sha1(filepath):
- h = hashlib.sha1()
- with open(filepath, 'rb') as f:
- h.update(f.read())
- return h.hexdigest()
-
-
-def _run(cmd, timeout=None):
- result = None
- try:
- result = subprocess.run(cmd,
- timeout=timeout,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
- if result.returncode:
- _error('{command} finished with non-zero code: {code}'.format(
- command=str(cmd), code=result.returncode))
-
- except subprocess.TimeoutExpired:
- raise
- except Exception as e:
- _error('Exception: ' + str(e))
-
- return result
-
-
-def _timeout(size):
- # Dynamic timeout value (proportional to file size) to discard slow units.
- timeout = MIN_TIMEOUT
- timeout += size * TIMEOUT_RANGE / FILE_SIZE_LIMIT
- return timeout
-
-
-def collect_traces(binary, corpus_dir, dft_dir):
- stats = {
- 'total': 0,
- 'traced': 0,
- 'long': 0,
- 'slow': 0,
- 'failed': 0,
- }
-
- files_and_sizes = {}
- for f in _list_dir(corpus_dir):
- stats['total'] += 1
- size = os.path.getsize(f)
- if size > FILE_SIZE_LIMIT:
- stats['long'] += 1
- print('Skipping large file ({size}b): {path}'.format(size=size, path=f))
- continue
- files_and_sizes[f] = size
-
- for f in sorted(files_and_sizes, key=files_and_sizes.get):
- output_path = os.path.join(dft_dir, _sha1(f))
- try:
- result = _run([binary, f, output_path], timeout=_timeout(size))
- if result.returncode:
- stats['failed'] += 1
- else:
- stats['traced'] += 1
-
- except subprocess.TimeoutExpired as e:
- _error('Slow input: ' + str(e))
- stats['slow'] += 1
-
- return stats
-
-
-def dump_functions(binary, dft_dir):
- result = _run([binary])
- if not result or result.returncode:
- return False
-
- with open(os.path.join(dft_dir, 'functions.txt'), 'wb') as f:
- f.write(result.stdout)
-
- return True
-
-
-def main():
- if len(sys.argv) < 4:
- _error('Usage: {0} <binary> <corpus_dir> <dft_dir>'.format(sys.argv[0]))
- sys.exit(1)
-
- binary = sys.argv[1]
- corpus_dir = sys.argv[2]
- dft_dir = sys.argv[3]
-
- os.environ['DFSAN_OPTIONS'] = DFSAN_OPTIONS
-
- if not dump_functions(binary, dft_dir):
- _error('Failed to dump functions. Something is wrong.')
- sys.exit(1)
-
- stats = collect_traces(binary, corpus_dir, dft_dir)
- for k, v in stats.items():
- print('{0}: {1}'.format(k, v))
-
- # Checksum that we didn't lose track of any of the inputs.
- assert stats['total'] * 2 == sum(v for v in stats.values())
- sys.exit(0)
-
-
-if __name__ == "__main__":
- main()