diff options
Diffstat (limited to 'projects')
-rw-r--r-- | projects/bs4/Dockerfile | 27 | ||||
-rw-r--r-- | projects/bs4/bs4_fuzzer.py | 66 | ||||
-rw-r--r-- | projects/bs4/build.sh | 33 | ||||
-rw-r--r-- | projects/bs4/project.yaml | 12 |
4 files changed, 138 insertions, 0 deletions
diff --git a/projects/bs4/Dockerfile b/projects/bs4/Dockerfile new file mode 100644 index 00000000..dc5c9696 --- /dev/null +++ b/projects/bs4/Dockerfile @@ -0,0 +1,27 @@ +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +FROM gcr.io/oss-fuzz-base/base-builder + +RUN apt install -y bzr python-lxml python-html5lib +RUN pip3 install 2to3 soupsieve html5lib lxml +RUN bzr branch lp:beautifulsoup +WORKDIR beautifulsoup +# Beautifulsoup is written in python2, with a script to atomatically convert it to python3. +RUN yes | ./convert-py3k +WORKDIR py3k + +COPY build.sh bs4_fuzzer.py $SRC/ diff --git a/projects/bs4/bs4_fuzzer.py b/projects/bs4/bs4_fuzzer.py new file mode 100644 index 00000000..11942617 --- /dev/null +++ b/projects/bs4/bs4_fuzzer.py @@ -0,0 +1,66 @@ +#!/usr/bin/python3 + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import sys +import warnings +import atheris + +from bs4 import BeautifulSoup + + +try: + import HTMLParser + HTMLParseError = HTMLParser.HTMLParseError +except ImportError: + # HTMLParseError is removed in Python 3.5. Since it can never be + # thrown in 3.5, we can just define our own class as a placeholder. + + class HTMLParseError(Exception): + pass + + +def TestOneInput(data): + """TestOneInput gets random data from the fuzzer, and throws it at bs4.""" + if len(data) < 1: + return + + parsers = ['lxml-xml', 'html5lib', 'html.parser', 'lxml'] + try: + idx = int(data[0]) % len(parsers) + except ValueError: + return + + try: + soup = BeautifulSoup(data[1:], features=parsers[idx]) + except HTMLParseError: + return + except ValueError: + return + + list(soup.find_all(True)) + soup.prettify() + + +def main(): + logging.disable(logging.CRITICAL) + warnings.filterwarnings('ignore') + atheris.Setup(sys.argv, TestOneInput, enable_python_coverage=True) + atheris.Fuzz() + + +if __name__ == "__main__": + main() diff --git a/projects/bs4/build.sh b/projects/bs4/build.sh new file mode 100644 index 00000000..111be464 --- /dev/null +++ b/projects/bs4/build.sh @@ -0,0 +1,33 @@ +#!/bin/bash -eu +# Copyright 2020 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + + +# Build fuzzers in $OUT. +for fuzzer in $(find $SRC -name '*_fuzzer.py'); do + fuzzer_basename=$(basename -s .py $fuzzer) + fuzzer_package=${fuzzer_basename}.pkg + pyinstaller --distpath $OUT --onefile --name $fuzzer_package $fuzzer + + # Create execution wrapper. + echo "#!/bin/sh +# LLVMFuzzerTestOneInput for fuzzer detection. +this_dir=\$(dirname \"\$0\") +LD_PRELOAD=\$this_dir/sanitizer_with_fuzzer.so \ +ASAN_OPTIONS=\$ASAN_OPTIONS:symbolize=1:external_symbolizer_path=\$this_dir/llvm-symbolizer:detect_leaks=0 \ +\$this_dir/$fuzzer_package \$@" > $OUT/$fuzzer_basename + chmod u+x $OUT/$fuzzer_basename +done diff --git a/projects/bs4/project.yaml b/projects/bs4/project.yaml new file mode 100644 index 00000000..01ff4517 --- /dev/null +++ b/projects/bs4/project.yaml @@ -0,0 +1,12 @@ +homepage: "https://www.crummy.com/software/BeautifulSoup/" +main_repo: "https://code.launchpad.net/~leonardr/beautifulsoup/bs4" +language: python +primary_contact: "security-tps@google.com" +auto_ccs: + - "jvoisin@google.com" + - "ipudney@google.com" +fuzzing_engines: + - libfuzzer +sanitizers: + - address + - undefined |