diff options
author | 2020-05-06 13:23:14 +1000 | |
---|---|---|
committer | 2020-05-05 20:23:14 -0700 | |
commit | e731fac3db61c3eb18b9d66e64cec4e622f561eb (patch) | |
tree | 485c6904d210995c15f158d91688542abd4e68e1 /projects/wuffs | |
parent | b16fcfc903ffe522e89996faea9388baed6c5d7e (diff) |
[wuffs] Skip dupe files when building seed corpus (#3763)
Updates google/oss-fuzz#22035
Diffstat (limited to 'projects/wuffs')
-rwxr-xr-x | projects/wuffs/build.sh | 18 |
1 files changed, 17 insertions, 1 deletions
diff --git a/projects/wuffs/build.sh b/projects/wuffs/build.sh index 6177a3b7..d51af38c 100755 --- a/projects/wuffs/build.sh +++ b/projects/wuffs/build.sh @@ -32,8 +32,24 @@ for f in fuzz/c/std/*_fuzzer.c; do # Make the optional "gzip_fuzzer_seed_corpus.zip" archive. This means # extracting the "foo/bar/*.gz" out of the matching "gzip: foo/bar/*.gz" # lines in fuzz/c/std/seed_corpora.txt. + # + # The seed_corpora.txt lines can contain multiple entries, combining + # independent corpora. A naive "zip --junk-paths" of all those files can fail + # if there are duplicate file names, which can easily happen if the file name + # is a hash of its contents and the contents are a (trivial) minimal + # reproducer. We use a de-duplication step of copying all of those files into + # a single directory. Doing that in a single "cp" or "mv" call can fail with + # "will not overwrite just-created 'foo/etc' with 'bar/etc'", so we make + # multiple calls, each copying one file at a time. Later duplicates overwrite + # earlier duplicates. It's OK if the contents aren't identical. The result is + # still a valid uber-corpus of seed files. seeds=$(sed -n -e "/^$b:/s/^$b: *//p" fuzz/c/std/seed_corpora.txt) if [ -n "$seeds" ]; then - zip --junk-paths $OUT/${b}_fuzzer_seed_corpus.zip $seeds + mkdir ${b}_fuzzer_seed_corpus + for s in $seeds; do + cp $s ${b}_fuzzer_seed_corpus + done + zip --junk-paths --recurse-paths $OUT/${b}_fuzzer_seed_corpus.zip ${b}_fuzzer_seed_corpus + rm -rf ${b}_fuzzer_seed_corpus fi done |